Commit 74bf4312 authored by David S. Miller's avatar David S. Miller Committed by David S. Miller
Browse files

[SPARC64]: Move away from virtual page tables, part 1.



We now use the TSB hardware assist features of the UltraSPARC
MMUs.

SMP is currently knowingly broken, we need to find another place
to store the per-cpu base pointers.  We hid them away in the TSB
base register, and that obviously will not work any more :-)

Another known broken case is non-8KB base page size.

Also noticed that flush_tlb_all() is not referenced anywhere, only
the internal __flush_tlb_all() (local cpu only) is used by the
sparc64 port, so we can get rid of flush_tlb_all().

The kernel gets it's own 8KB TSB (swapper_tsb) and each address space
gets it's own private 8K TSB.  Later we can add code to dynamically
increase the size of per-process TSB as the RSS grows.  An 8KB TSB is
good enough for up to about a 4MB RSS, after which the TSB starts to
incur many capacity and conflict misses.

We even accumulate OBP translations into the kernel TSB.

Another area for refinement is large page size support.  We could use
a secondary address space TSB to handle those.

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 30d4d1ff
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -38,5 +38,5 @@ else
  CMODEL_CFLAG := -m64 -mcmodel=medlow
endif

head.o: head.S ttable.S itlb_base.S dtlb_base.S dtlb_backend.S dtlb_prot.S \
head.o: head.S ttable.S itlb_miss.S dtlb_miss.S ktlb.S tsb.S \
	etrap.S rtrap.S winfixup.S entry.S
+4 −9
Original line number Diff line number Diff line
@@ -31,6 +31,7 @@
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/pgalloc.h>
#include <asm/mmu_context.h>

static int load_aout32_binary(struct linux_binprm *, struct pt_regs * regs);
static int load_aout32_library(struct file*);
@@ -329,15 +330,9 @@ beyond_if:

	current->mm->start_stack =
		(unsigned long) create_aout32_tables((char __user *)bprm->p, bprm);
	if (!(orig_thr_flags & _TIF_32BIT)) {
		unsigned long pgd_cache = get_pgd_cache(current->mm->pgd);

		__asm__ __volatile__("stxa\t%0, [%1] %2\n\t"
				     "membar #Sync"
				     : /* no outputs */
				     : "r" (pgd_cache),
				       "r" (TSB_REG), "i" (ASI_DMMU));
	}
	tsb_context_switch(__pa(current->mm->pgd),
	                   current->mm->context.sparc64_tsb);

	start_thread32(regs, ex.a_entry, current->mm->start_stack);
	if (current->ptrace & PT_PTRACED)
		send_sig(SIGTRAP, current, 0);
+0 −170
Original line number Diff line number Diff line
/* $Id: dtlb_backend.S,v 1.16 2001/10/09 04:02:11 davem Exp $
 * dtlb_backend.S: Back end to DTLB miss replacement strategy.
 *                 This is included directly into the trap table.
 *
 * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
 * Copyright (C) 1997,1998 Jakub Jelinek   (jj@ultra.linux.cz)
 */

#include <asm/pgtable.h>
#include <asm/mmu.h>

#define VALID_SZ_BITS	(_PAGE_VALID | _PAGE_SZBITS)

#define VPTE_BITS		(_PAGE_CP | _PAGE_CV | _PAGE_P )
#define VPTE_SHIFT		(PAGE_SHIFT - 3)

/* Ways we can get here:
 *
 * 1) Nucleus loads and stores to/from PA-->VA direct mappings at tl>1.
 * 2) Nucleus loads and stores to/from user/kernel window save areas.
 * 3) VPTE misses from dtlb_base and itlb_base.
 *
 * We need to extract out the PMD and PGDIR indexes from the
 * linear virtual page table access address.  The PTE index
 * is at the bottom, but we are not concerned with it.  Bits
 * 0 to 2 are clear since each PTE is 8 bytes in size.  Each
 * PMD and PGDIR entry are 4 bytes in size.   Thus, this
 * address looks something like:
 *
 * |---------------------------------------------------------------|
 * |  ...   |    PGDIR index    |    PMD index    | PTE index  |   |
 * |---------------------------------------------------------------|
 *   63   F   E               D   C             B   A         3 2 0  <- bit nr
 *
 *  The variable bits above are defined as:
 *  A --> 3 + (PAGE_SHIFT - log2(8))
 *    --> 3 + (PAGE_SHIFT - 3) - 1
 *        (ie. this is "bit 3" + PAGE_SIZE - size of PTE entry in bits - 1)
 *  B --> A + 1
 *  C --> B + (PAGE_SHIFT - log2(4))
 *    -->  B + (PAGE_SHIFT - 2) - 1
 *        (ie. this is "bit B" + PAGE_SIZE - size of PMD entry in bits - 1)
 *  D --> C + 1
 *  E --> D + (PAGE_SHIFT - log2(4))
 *    --> D + (PAGE_SHIFT - 2) - 1
 *        (ie. this is "bit D" + PAGE_SIZE - size of PGDIR entry in bits - 1)
 *  F --> E + 1
 *
 * (Note how "B" always evalutes to PAGE_SHIFT, all the other constants
 *  cancel out.)
 *
 * For 8K PAGE_SIZE (thus, PAGE_SHIFT of 13) the bit numbers are:
 * A --> 12
 * B --> 13
 * C --> 23
 * D --> 24
 * E --> 34
 * F --> 35
 *
 * For 64K PAGE_SIZE (thus, PAGE_SHIFT of 16) the bit numbers are:
 * A --> 15
 * B --> 16
 * C --> 29
 * D --> 30
 * E --> 43
 * F --> 44
 *
 * Because bits both above and below each PGDIR and PMD index need to
 * be masked out, and the index can be as long as 14 bits (when using a
 * 64K PAGE_SIZE, and thus a PAGE_SHIFT of 16), we need 3 instructions
 * to extract each index out.
 *
 * Shifts do not pair very well on UltraSPARC-I, II, IIi, and IIe, so
 * we try to avoid using them for the entire operation.  We could setup
 * a mask anywhere from bit 31 down to bit 10 using the sethi instruction.
 *
 * We need a mask covering bits B --> C and one covering D --> E.
 * For 8K PAGE_SIZE these masks are 0x00ffe000 and 0x7ff000000.
 * For 64K PAGE_SIZE these masks are 0x3fff0000 and 0xfffc0000000.
 * The second in each set cannot be loaded with a single sethi
 * instruction, because the upper bits are past bit 32.  We would
 * need to use a sethi + a shift.
 *
 * For the time being, we use 2 shifts and a simple "and" mask.
 * We shift left to clear the bits above the index, we shift down
 * to clear the bits below the index (sans the log2(4 or 8) bits)
 * and a mask to clear the log2(4 or 8) bits.  We need therefore
 * define 4 shift counts, all of which are relative to PAGE_SHIFT.
 *
 * Although unsupportable for other reasons, this does mean that
 * 512K and 4MB page sizes would be generaally supported by the
 * kernel.  (ELF binaries would break with > 64K PAGE_SIZE since
 * the sections are only aligned that strongly).
 *
 * The operations performed for extraction are thus:
 *
 *      ((X << FOO_SHIFT_LEFT) >> FOO_SHIFT_RIGHT) & ~0x3
 *
 */

#define A (3 + (PAGE_SHIFT - 3) - 1)
#define B (A + 1)
#define C (B + (PAGE_SHIFT - 2) - 1)
#define D (C + 1)
#define E (D + (PAGE_SHIFT - 2) - 1)
#define F (E + 1)

#define PMD_SHIFT_LEFT		(64 - D)
#define PMD_SHIFT_RIGHT		(64 - (D - B) - 2)
#define PGDIR_SHIFT_LEFT 	(64 - F)
#define PGDIR_SHIFT_RIGHT	(64 - (F - D) - 2)
#define LOW_MASK_BITS		0x3

/* TLB1 ** ICACHE line 1: tl1 DTLB and quick VPTE miss	*/
	ldxa		[%g1 + %g1] ASI_DMMU, %g4	! Get TAG_ACCESS
	add		%g3, %g3, %g5			! Compute VPTE base
	cmp		%g4, %g5			! VPTE miss?
	bgeu,pt		%xcc, 1f			! Continue here
	 andcc		%g4, TAG_CONTEXT_BITS, %g5	! tl0 miss Nucleus test
	ba,a,pt		%xcc, from_tl1_trap		! Fall to tl0 miss
1:	sllx		%g6, VPTE_SHIFT, %g4		! Position TAG_ACCESS
	or		%g4, %g5, %g4			! Prepare TAG_ACCESS

/* TLB1 ** ICACHE line 2: Quick VPTE miss	  	*/
	mov		TSB_REG, %g1			! Grab TSB reg
	ldxa		[%g1] ASI_DMMU, %g5		! Doing PGD caching?
	sllx		%g6, PMD_SHIFT_LEFT, %g1	! Position PMD offset
	be,pn		%xcc, sparc64_vpte_nucleus	! Is it from Nucleus?
	 srlx		%g1, PMD_SHIFT_RIGHT, %g1	! Mask PMD offset bits
	brnz,pt		%g5, sparc64_vpte_continue	! Yep, go like smoke
	 andn		%g1, LOW_MASK_BITS, %g1		! Final PMD mask
	sllx		%g6, PGDIR_SHIFT_LEFT, %g5	! Position PGD offset

/* TLB1 ** ICACHE line 3: Quick VPTE miss	  	*/
	srlx		%g5, PGDIR_SHIFT_RIGHT, %g5	! Mask PGD offset bits
	andn		%g5, LOW_MASK_BITS, %g5		! Final PGD mask
	lduwa		[%g7 + %g5] ASI_PHYS_USE_EC, %g5! Load PGD
	brz,pn		%g5, vpte_noent			! Valid?
sparc64_kpte_continue:
	 sllx		%g5, 11, %g5			! Shift into place
sparc64_vpte_continue:
	lduwa		[%g5 + %g1] ASI_PHYS_USE_EC, %g5! Load PMD
	sllx		%g5, 11, %g5			! Shift into place
	brz,pn		%g5, vpte_noent			! Valid?

/* TLB1 ** ICACHE line 4: Quick VPTE miss	  	*/
	 mov		(VALID_SZ_BITS >> 61), %g1	! upper vpte into %g1
	sllx		%g1, 61, %g1			! finish calc
	or		%g5, VPTE_BITS, %g5		! Prepare VPTE data
	or		%g5, %g1, %g5			! ...
	mov		TLB_SFSR, %g1			! Restore %g1 value
	stxa		%g5, [%g0] ASI_DTLB_DATA_IN	! Load VPTE into TLB
	stxa		%g4, [%g1 + %g1] ASI_DMMU	! Restore previous TAG_ACCESS
	retry						! Load PTE once again

#undef VALID_SZ_BITS
#undef VPTE_SHIFT
#undef VPTE_BITS
#undef A
#undef B
#undef C
#undef D
#undef E
#undef F
#undef PMD_SHIFT_LEFT
#undef PMD_SHIFT_RIGHT
#undef PGDIR_SHIFT_LEFT
#undef PGDIR_SHIFT_RIGHT
#undef LOW_MASK_BITS

arch/sparc64/kernel/dtlb_base.S

deleted100644 → 0
+0 −109
Original line number Diff line number Diff line
/* $Id: dtlb_base.S,v 1.17 2001/10/11 22:33:52 davem Exp $
 * dtlb_base.S:	Front end to DTLB miss replacement strategy.
 *              This is included directly into the trap table.
 *
 * Copyright (C) 1996,1998 David S. Miller (davem@redhat.com)
 * Copyright (C) 1997,1998 Jakub Jelinek   (jj@ultra.linux.cz)
 */

#include <asm/pgtable.h>
#include <asm/mmu.h>

/* %g1	TLB_SFSR	(%g1 + %g1 == TLB_TAG_ACCESS)
 * %g2	(KERN_HIGHBITS | KERN_LOWBITS)
 * %g3  VPTE base	(0xfffffffe00000000)	Spitfire/Blackbird (44-bit VA space)
 *			(0xffe0000000000000)	Cheetah		   (64-bit VA space)
 * %g7	__pa(current->mm->pgd)
 *
 * The VPTE base value is completely magic, but note that
 * few places in the kernel other than these TLB miss
 * handlers know anything about the VPTE mechanism or
 * how it works (see VPTE_SIZE, TASK_SIZE and PTRS_PER_PGD).
 * Consider the 44-bit VADDR Ultra-I/II case as an example:
 *
 * VA[0 :  (1<<43)] produce VPTE index [%g3                        :   0]
 * VA[0 : -(1<<43)] produce VPTE index [%g3-(1<<(43-PAGE_SHIFT+3)) : %g3]
 *
 * For Cheetah's 64-bit VADDR space this is:
 *
 * VA[0 :  (1<<63)] produce VPTE index [%g3                        :   0]
 * VA[0 : -(1<<63)] produce VPTE index [%g3-(1<<(63-PAGE_SHIFT+3)) : %g3]
 *
 * If you're paying attention you'll notice that this means half of
 * the VPTE table is above %g3 and half is below, low VA addresses
 * map progressively upwards from %g3, and high VA addresses map
 * progressively upwards towards %g3.  This trick was needed to make
 * the same 8 instruction handler work both for Spitfire/Blackbird's
 * peculiar VA space hole configuration and the full 64-bit VA space
 * one of Cheetah at the same time.
 */

/* Ways we can get here:
 *
 * 1) Nucleus loads and stores to/from PA-->VA direct mappings.
 * 2) Nucleus loads and stores to/from vmalloc() areas.
 * 3) User loads and stores.
 * 4) User space accesses by nucleus at tl0
 */

#if PAGE_SHIFT == 13
/*
 * To compute vpte offset, we need to do ((addr >> 13) << 3),
 * which can be optimized to (addr >> 10) if bits 10/11/12 can
 * be guaranteed to be 0 ... mmu_context.h does guarantee this
 * by only using 10 bits in the hwcontext value.
 */
#define CREATE_VPTE_OFFSET1(r1, r2) nop
#define CREATE_VPTE_OFFSET2(r1, r2) \
				srax	r1, 10, r2
#else
#define CREATE_VPTE_OFFSET1(r1, r2) \
				srax	r1, PAGE_SHIFT, r2
#define CREATE_VPTE_OFFSET2(r1, r2) \
				sllx	r2, 3, r2
#endif

/* DTLB ** ICACHE line 1: Quick user TLB misses		*/
	mov		TLB_SFSR, %g1
	ldxa		[%g1 + %g1] ASI_DMMU, %g4	! Get TAG_ACCESS
	andcc		%g4, TAG_CONTEXT_BITS, %g0	! From Nucleus?
from_tl1_trap:
	rdpr		%tl, %g5			! For TL==3 test
	CREATE_VPTE_OFFSET1(%g4, %g6)			! Create VPTE offset
	be,pn		%xcc, kvmap			! Yep, special processing
	 CREATE_VPTE_OFFSET2(%g4, %g6)			! Create VPTE offset
	cmp		%g5, 4				! Last trap level?

/* DTLB ** ICACHE line 2: User finish + quick kernel TLB misses	*/
	be,pn		%xcc, longpath			! Yep, cannot risk VPTE miss
	 nop						! delay slot
	ldxa		[%g3 + %g6] ASI_S, %g5		! Load VPTE
1:	brgez,pn	%g5, longpath			! Invalid, branch out
	 nop						! Delay-slot
9:	stxa		%g5, [%g0] ASI_DTLB_DATA_IN	! Reload TLB
	retry						! Trap return
	nop

/* DTLB ** ICACHE line 3: winfixups+real_faults		*/
longpath:
	rdpr		%pstate, %g5			! Move into alternate globals
	wrpr		%g5, PSTATE_AG|PSTATE_MG, %pstate
	rdpr		%tl, %g4			! See where we came from.
	cmp		%g4, 1				! Is etrap/rtrap window fault?
	mov		TLB_TAG_ACCESS, %g4		! Prepare for fault processing
	ldxa		[%g4] ASI_DMMU, %g5		! Load faulting VA page
	be,pt		%xcc, sparc64_realfault_common	! Jump to normal fault handling
	 mov		FAULT_CODE_DTLB, %g4		! It was read from DTLB

/* DTLB ** ICACHE line 4: Unused...	*/
	ba,a,pt		%xcc, winfix_trampoline		! Call window fixup code
	nop
	nop
	nop
	nop
	nop
	nop
	nop

#undef CREATE_VPTE_OFFSET1
#undef CREATE_VPTE_OFFSET2
+39 −0
Original line number Diff line number Diff line
/* DTLB ** ICACHE line 1: Context 0 check and TSB load	*/
	ldxa	[%g0] ASI_DMMU_TSB_8KB_PTR, %g1	! Get TSB 8K pointer
	ldxa	[%g0] ASI_DMMU, %g6		! Get TAG TARGET
	srlx	%g6, 48, %g5			! Get context
	brz,pn	%g5, kvmap_dtlb			! Context 0 processing
	 nop					! Delay slot (fill me)
	ldda	[%g1] ASI_NUCLEUS_QUAD_LDD, %g4	! Load TSB entry
	nop					! Push branch to next I$ line
	cmp	%g4, %g6			! Compare TAG

/* DTLB ** ICACHE line 2: TSB compare and TLB load	*/
	bne,pn	%xcc, tsb_miss_dtlb		! Miss
	 mov	FAULT_CODE_DTLB, %g3
	stxa	%g5, [%g0] ASI_DTLB_DATA_IN	! Load TLB
	retry					! Trap done
	nop
	nop
	nop
	nop

/* DTLB ** ICACHE line 3:				*/
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop

/* DTLB ** ICACHE line 4: 				*/
	nop
	nop
	nop
	nop
	nop
	nop
	nop
	nop
Loading