Commit 619b6e18 authored by Maciej W. Rozycki's avatar Maciej W. Rozycki Committed by Ralf Baechle
Browse files

[MIPS] R4000/R4400 daddiu erratum workaround



 This complements the generic R4000/R4400 errata workaround code and adds 
bits for the daddiu problem.  In most places it just modifies handwritten 
assembly code so that the assembler is allowed to use a temporary register 
as daddiu may now be treated as a macro that expands to a sequence of li 
and daddu.  It is the AT register or, where AT is unavailable or used 
explicitly for another purpose, an explicitly-named register is selected, 
using the .set at=<reg> feature added recently to gas.  This feature is 
only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the 
workaround remains disabled, the required version of binutils stays 
unchanged.

 Similarly, daddiu instructions put in branch delay slots in noreorder 
fragments are now taken out of them and the assembler is allowed to 
reorder them itself as possible (which it does making the whole idea of 
scheduling them into delay slots manually questionable).

 Also in the very few places where such a simple conversion was not 
possible, a handcoded longer sequence is implemented.

 Other than that there are changes to code responsible for building the 
TLB fault and page clear/copy handlers to avoid daddiu as appropriate.  
These are only effective if the erratum is verified to be present at the 
run time.

 Finally there is a trivial update to __delay(), because it uses daddiu in 
a branch delay slot.

Signed-off-by: default avatarMaciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent 20d60d99
Loading
Loading
Loading
Loading
+7 −1
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@
 * Copyright (C) 1994 - 2000, 2001, 2003 Ralf Baechle
 * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
 * Copyright (C) 2001 MIPS Technologies, Inc.
 * Copyright (C) 2002 Maciej W. Rozycki
 * Copyright (C) 2002, 2007  Maciej W. Rozycki
 */
#include <linux/init.h>

@@ -471,7 +471,13 @@ NESTED(nmi_handler, PT_SIZE, sp)
	jr	k0
	 rfe
#else
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
	LONG_ADDIU	k0, 4		/* stall on $k0 */
#else
	.set	at=v1
	LONG_ADDIU	k0, 4
	.set	noat
#endif
	MTC0	k0, CP0_EPC
	/* I hope three instructions between MTC0 and ERET are enough... */
	ori	k1, _THREAD_MASK
+51 −10
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@
 *
 * Copyright (C) 1998, 1999 Ralf Baechle
 * Copyright (C) 1999 Silicon Graphics, Inc.
 * Copyright (C) 2007  Maciej W. Rozycki
 */
#include <linux/errno.h>
#include <asm/asm.h>
@@ -52,9 +53,12 @@
#define UNIT(unit)  ((unit)*NBYTES)

#define ADDC(sum,reg)						\
	.set	push;						\
	.set	noat;						\
	ADD	sum, reg;					\
	sltu	v1, sum, reg;					\
	ADD	sum, v1
	ADD	sum, v1;					\
	.set	pop

#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3)	\
	LOAD	_t0, (offset + UNIT(0))(src);			\
@@ -178,8 +182,10 @@ move_128bytes:
	CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
	CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
	LONG_SUBU	t8, t8, 0x01
	bnez	t8, move_128bytes
	.set	reorder				/* DADDI_WAR */
	PTR_ADDU	src, src, 0x80
	bnez	t8, move_128bytes
	.set	noreorder

1:
	beqz	t2, 1f
@@ -208,8 +214,10 @@ end_words:
	lw	t0, (src)
	LONG_SUBU	t8, t8, 0x1
	ADDC(sum, t0)
	bnez	t8, end_words
	.set	reorder				/* DADDI_WAR */
	PTR_ADDU	src, src, 0x4
	bnez	t8, end_words
	.set	noreorder

/* unknown src alignment and < 8 bytes to go  */
small_csumcpy:
@@ -246,6 +254,8 @@ small_csumcpy:
1:	ADDC(sum, t1)

	/* fold checksum */
	.set	push
	.set	noat
#ifdef USE_DOUBLE
	dsll32	v1, sum, 0
	daddu	sum, v1
@@ -266,6 +276,7 @@ small_csumcpy:
	srl	sum, sum, 8
	or	sum, v1
	andi	sum, 0xffff
	.set	pop
1:
	.set	reorder
	/* Add the passed partial csum.  */
@@ -373,7 +384,11 @@ small_csumcpy:

#define ADDRMASK (NBYTES-1)

#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
	.set	noat
#else
	.set	at=v1
#endif

LEAF(__csum_partial_copy_user)
	PTR_ADDU	AT, src, len	/* See (1) above. */
@@ -441,8 +456,10 @@ EXC( STORE t6, UNIT(6)(dst), s_exc)
	ADDC(sum, t6)
EXC(	STORE	t7, UNIT(7)(dst),	s_exc)
	ADDC(sum, t7)
	bgez	len, 1b
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, 8*NBYTES
	bgez	len, 1b
	.set	noreorder
	ADD	len, 8*NBYTES		# revert len (see above)

	/*
@@ -471,8 +488,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc)
	ADDC(sum, t2)
EXC(	STORE	t3, UNIT(3)(dst),	s_exc)
	ADDC(sum, t3)
	beqz	len, done
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, 4*NBYTES
	beqz	len, done
	.set	noreorder
less_than_4units:
	/*
	 * rem = len % NBYTES
@@ -485,8 +504,10 @@ EXC( LOAD t0, 0(src), l_exc)
	SUB	len, len, NBYTES
EXC(	STORE	t0, 0(dst),		s_exc)
	ADDC(sum, t0)
	bne	rem, len, 1b
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, NBYTES
	bne	rem, len, 1b
	.set	noreorder

	/*
	 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
@@ -572,8 +593,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc)
	ADDC(sum, t2)
EXC(	STORE	t3, UNIT(3)(dst),	s_exc)
	ADDC(sum, t3)
	bne	len, rem, 1b
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, 4*NBYTES
	bne	len, rem, 1b
	.set	noreorder

cleanup_src_unaligned:
	beqz	len, done
@@ -587,8 +610,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
	SUB	len, len, NBYTES
EXC(	STORE	t0, 0(dst),		s_exc)
	ADDC(sum, t0)
	bne	len, rem, 1b
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, NBYTES
	bne	len, rem, 1b
	.set	noreorder

copy_bytes_checklen:
	beqz	len, done
@@ -631,6 +656,8 @@ copy_bytes_done:
	ADDC(sum, t2)
done:
	/* fold checksum */
	.set	push
	.set	noat
#ifdef USE_DOUBLE
	dsll32	v1, sum, 0
	daddu	sum, v1
@@ -651,6 +678,7 @@ done:
	srl	sum, sum, 8
	or	sum, v1
	andi	sum, 0xffff
	.set	pop
1:
	.set reorder
	ADDC(sum, psum)
@@ -678,8 +706,10 @@ EXC( lbu t1, 0(src), l_exc)
	SLLV	t1, t1, t2
	addu	t2, SHIFT_INC
	ADDC(sum, t1)
	bne	src, t0, 1b
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, 1
	bne	src, t0, 1b
	.set	noreorder
l_exc:
	LOAD	t0, TI_TASK($28)
	 nop
@@ -697,12 +727,22 @@ l_exc:
	 * Clear len bytes starting at dst.  Can't call __bzero because it
	 * might modify len.  An inefficient loop for these rare times...
	 */
	beqz	len, done
	.set	reorder				/* DADDI_WAR */
	SUB	src, len, 1
	beqz	len, done
	.set	noreorder
1:	sb	zero, 0(dst)
	ADD	dst, dst, 1
	.set	push
	.set	noat
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
	bnez	src, 1b
	 SUB	src, src, 1
#else
	li	v1, 1
	bnez	src, 1b
	 SUB	src, src, v1
#endif
	li	v1, -EFAULT
	b	done
	 sw	v1, (errptr)
@@ -712,4 +752,5 @@ s_exc:
	li	v1, -EFAULT
	jr	ra
	 sw	v1, (errptr)
	.set	pop
	END(__csum_partial_copy_user)
+20 −5
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
 * Copyright (C) 2002 Broadcom, Inc.
 *   memcpy/copy_user author: Mark Vandevoorde
 * Copyright (C) 2007  Maciej W. Rozycki
 *
 * Mnemonic names for arguments to memcpy/__copy_user
 */
@@ -175,7 +176,11 @@

	.text
	.set	noreorder
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
	.set	noat
#else
	.set	at=v1
#endif

/*
 * A combined memcpy/__copy_user
@@ -268,8 +273,10 @@ EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
	STORE	t1, UNIT(1)(dst)
	STORE	t2, UNIT(2)(dst)
	STORE	t3, UNIT(3)(dst)
	beqz	len, done
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, 4*NBYTES
	beqz	len, done
	.set	noreorder
less_than_4units:
	/*
	 * rem = len % NBYTES
@@ -281,8 +288,10 @@ EXC( LOAD t0, 0(src), l_exc)
	ADD	src, src, NBYTES
	SUB	len, len, NBYTES
	STORE	t0, 0(dst)
	bne	rem, len, 1b
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, NBYTES
	bne	rem, len, 1b
	.set	noreorder

	/*
	 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
@@ -361,8 +370,10 @@ EXC( LDREST t3, REST(3)(src), l_exc_copy)
	STORE	t2, UNIT(2)(dst)
	STORE	t3, UNIT(3)(dst)
	PREF(	1, 9*32(dst) )     	# 1 is PREF_STORE (not streamed)
	bne	len, rem, 1b
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, 4*NBYTES
	bne	len, rem, 1b
	.set	noreorder

cleanup_src_unaligned:
	beqz	len, done
@@ -375,8 +386,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
	ADD	src, src, NBYTES
	SUB	len, len, NBYTES
	STORE	t0, 0(dst)
	bne	len, rem, 1b
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, NBYTES
	bne	len, rem, 1b
	.set	noreorder

copy_bytes_checklen:
	beqz	len, done
@@ -424,8 +437,10 @@ l_exc_copy:
EXC(	lb	t1, 0(src),	l_exc)
	ADD	src, src, 1
	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
	bne	src, t0, 1b
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, 1
	bne	src, t0, 1b
	.set	noreorder
l_exc:
	LOAD	t0, TI_TASK($28)
	 nop
+47 −13
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@
 * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
 * Copyright (C) 2002 Broadcom, Inc.
 *   memcpy/copy_user author: Mark Vandevoorde
 * Copyright (C) 2007  Maciej W. Rozycki
 *
 * Mnemonic names for arguments to memcpy/__copy_user
 */
@@ -175,7 +176,11 @@

	.text
	.set	noreorder
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
	.set	noat
#else
	.set	at=v1
#endif

/*
 * A combined memcpy/__copy_user
@@ -271,8 +276,10 @@ EXC( STORE t0, UNIT(0)(dst), s_exc_p4u)
EXC(	STORE	t1, UNIT(1)(dst),	s_exc_p3u)
EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
	beqz	len, done
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, 4*NBYTES
	beqz	len, done
	.set	noreorder
less_than_4units:
	/*
	 * rem = len % NBYTES
@@ -284,8 +291,10 @@ EXC( LOAD t0, 0(src), l_exc)
	ADD	src, src, NBYTES
	SUB	len, len, NBYTES
EXC(	STORE	t0, 0(dst),		s_exc_p1u)
	bne	rem, len, 1b
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, NBYTES
	bne	rem, len, 1b
	.set	noreorder

	/*
	 * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
@@ -364,8 +373,10 @@ EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
EXC(	STORE	t2, UNIT(2)(dst),	s_exc_p2u)
EXC(	STORE	t3, UNIT(3)(dst),	s_exc_p1u)
	PREF(	1, 9*32(dst) )     	# 1 is PREF_STORE (not streamed)
	bne	len, rem, 1b
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, 4*NBYTES
	bne	len, rem, 1b
	.set	noreorder

cleanup_src_unaligned:
	beqz	len, done
@@ -378,8 +389,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
	ADD	src, src, NBYTES
	SUB	len, len, NBYTES
EXC(	STORE	t0, 0(dst),		s_exc_p1u)
	bne	len, rem, 1b
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, NBYTES
	bne	len, rem, 1b
	.set	noreorder

copy_bytes_checklen:
	beqz	len, done
@@ -427,8 +440,10 @@ l_exc_copy:
EXC(	lb	t1, 0(src),	l_exc)
	ADD	src, src, 1
	sb	t1, 0(dst)	# can't fault -- we're copy_from_user
	bne	src, t0, 1b
	.set	reorder				/* DADDI_WAR */
	ADD	dst, dst, 1
	bne	src, t0, 1b
	.set	noreorder
l_exc:
	LOAD	t0, TI_TASK($28)
	 nop
@@ -446,20 +461,33 @@ l_exc:
	 * Clear len bytes starting at dst.  Can't call __bzero because it
	 * might modify len.  An inefficient loop for these rare times...
	 */
	beqz	len, done
	.set	reorder				/* DADDI_WAR */
	SUB	src, len, 1
	beqz	len, done
	.set	noreorder
1:	sb	zero, 0(dst)
	ADD	dst, dst, 1
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
	bnez	src, 1b
	 SUB	src, src, 1
#else
	.set	push
	.set	noat
	li	v1, 1
	bnez	src, 1b
	 SUB	src, src, v1
	.set	pop
#endif
	jr	ra
	 nop


#define SEXC(n)							\
	.set	reorder;			/* DADDI_WAR */	\
s_exc_p ## n ## u:						\
	ADD	len, len, n*NBYTES;				\
	jr	ra;						\
	 ADD	len, len, n*NBYTES
	.set	noreorder

SEXC(8)
SEXC(7)
@@ -471,8 +499,10 @@ SEXC(2)
SEXC(1)

s_exc_p1:
	jr	ra
	.set	reorder				/* DADDI_WAR */
	ADD	len, len, 1
	jr	ra
	.set	noreorder
s_exc:
	jr	ra
	 nop
@@ -502,8 +532,10 @@ r_end_bytes:
	SUB	a2, a2, 0x1
	sb	t0, -1(a0)
	SUB	a1, a1, 0x1
	bnez	a2, r_end_bytes
	.set	reorder				/* DADDI_WAR */
	SUB	a0, a0, 0x1
	bnez	a2, r_end_bytes
	.set	noreorder

r_out:
	jr	ra
@@ -514,8 +546,10 @@ r_end_bytes_up:
	SUB	a2, a2, 0x1
	sb	t0, (a0)
	ADD	a1, a1, 0x1
	bnez	a2, r_end_bytes_up
	.set	reorder				/* DADDI_WAR */
	ADD	a0, a0, 0x1
	bnez	a2, r_end_bytes_up
	.set	noreorder

	jr	ra
	 move	a2, zero
+10 −1
Original line number Diff line number Diff line
@@ -5,6 +5,7 @@
 *
 * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
 * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
 * Copyright (C) 2007  Maciej W. Rozycki
 */
#include <asm/asm.h>
#include <asm/asm-offsets.h>
@@ -74,8 +75,16 @@ FEXPORT(__bzero)
	bnez		t0, small_memset
	 andi		t0, a0, LONGMASK	/* aligned? */

#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
	beqz		t0, 1f
	 PTR_SUBU	t0, LONGSIZE		/* alignment in bytes */
#else
	.set		noat
	li		AT, LONGSIZE
	beqz		t0, 1f
	 PTR_SUBU	t0, AT			/* alignment in bytes */
	.set		at
#endif

#ifdef __MIPSEB__
	EX(LONG_S_L, a1, (a0), first_fixup)	/* make word/dword aligned */
@@ -106,7 +115,7 @@ memset_partial:
	.set		noat
	LONG_SRL		AT, t0, 1
	PTR_SUBU	t1, AT
	.set		noat
	.set		at
#endif
	jr		t1
	 PTR_ADDU	a0, t0			/* dest ptr */
Loading