Commit b445e26c authored by David S. Miller's avatar David S. Miller
Browse files

[SPARC64]: Avoid membar instructions in delay slots.



In particular, avoid membar instructions in the delay
slot of a jmpl instruction.

UltraSPARC-I, II, IIi, and IIe have a bug, documented in
the UltraSPARC-IIi User's Manual, Appendix K, Erratum 51

The long and short of it is that if the IMU unit misses
on a branch or jmpl, and there is a store buffer synchronizing
membar in the delay slot, the chip can stop fetching instructions.

If interrupts are enabled or some other trap is enabled, the
chip will unwedge itself, but performance will suffer.

We already had a workaround for this bug in a few spots, but
it's better to have the entire tree sanitized for this rule.

Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
parent 020f46a3
Loading
Loading
Loading
Loading
+4 −2
Original line number Original line Diff line number Diff line
@@ -271,8 +271,9 @@ cplus_fptrap_insn_1:
	fmuld		%f0, %f2, %f26
	fmuld		%f0, %f2, %f26
	faddd		%f0, %f2, %f28
	faddd		%f0, %f2, %f28
	fmuld		%f0, %f2, %f30
	fmuld		%f0, %f2, %f30
	b,pt		%xcc, fpdis_exit
	membar		#Sync
	membar		#Sync
	b,pt		%xcc, fpdis_exit
	 nop
2:	andcc		%g5, FPRS_DU, %g0
2:	andcc		%g5, FPRS_DU, %g0
	bne,pt		%icc, 3f
	bne,pt		%icc, 3f
	 fzero		%f32
	 fzero		%f32
@@ -301,8 +302,9 @@ cplus_fptrap_insn_2:
	fmuld		%f32, %f34, %f58
	fmuld		%f32, %f34, %f58
	faddd		%f32, %f34, %f60
	faddd		%f32, %f34, %f60
	fmuld		%f32, %f34, %f62
	fmuld		%f32, %f34, %f62
	ba,pt		%xcc, fpdis_exit
	membar		#Sync
	membar		#Sync
	ba,pt		%xcc, fpdis_exit
	 nop
3:	mov		SECONDARY_CONTEXT, %g3
3:	mov		SECONDARY_CONTEXT, %g3
	add		%g6, TI_FPREGS, %g1
	add		%g6, TI_FPREGS, %g1
	ldxa		[%g3] ASI_DMMU, %g5
	ldxa		[%g3] ASI_DMMU, %g5
+8 −4
Original line number Original line Diff line number Diff line
@@ -32,8 +32,9 @@ static __inline__ int __sem_update_count(struct semaphore *sem, int incr)
"	add	%1, %4, %1\n"
"	add	%1, %4, %1\n"
"	cas	[%3], %0, %1\n"
"	cas	[%3], %0, %1\n"
"	cmp	%0, %1\n"
"	cmp	%0, %1\n"
"	bne,pn	%%icc, 1b\n"
"	membar	#StoreLoad | #StoreStore\n"
"	membar	#StoreLoad | #StoreStore\n"
"	bne,pn	%%icc, 1b\n"
"	 nop\n"
	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
	: "=&r" (old_count), "=&r" (tmp), "=m" (sem->count)
	: "r" (&sem->count), "r" (incr), "m" (sem->count)
	: "r" (&sem->count), "r" (incr), "m" (sem->count)
	: "cc");
	: "cc");
@@ -71,8 +72,9 @@ void up(struct semaphore *sem)
"	cmp	%%g1, %%g7\n"
"	cmp	%%g1, %%g7\n"
"	bne,pn	%%icc, 1b\n"
"	bne,pn	%%icc, 1b\n"
"	 addcc	%%g7, 1, %%g0\n"
"	 addcc	%%g7, 1, %%g0\n"
"	ble,pn	%%icc, 3f\n"
"	membar	#StoreLoad | #StoreStore\n"
"	membar	#StoreLoad | #StoreStore\n"
"	ble,pn	%%icc, 3f\n"
"	 nop\n"
"2:\n"
"2:\n"
"	.subsection 2\n"
"	.subsection 2\n"
"3:	mov	%0, %%g1\n"
"3:	mov	%0, %%g1\n"
@@ -128,8 +130,9 @@ void __sched down(struct semaphore *sem)
"	cmp	%%g1, %%g7\n"
"	cmp	%%g1, %%g7\n"
"	bne,pn	%%icc, 1b\n"
"	bne,pn	%%icc, 1b\n"
"	 cmp	%%g7, 1\n"
"	 cmp	%%g7, 1\n"
"	bl,pn	%%icc, 3f\n"
"	membar	#StoreLoad | #StoreStore\n"
"	membar	#StoreLoad | #StoreStore\n"
"	bl,pn	%%icc, 3f\n"
"	 nop\n"
"2:\n"
"2:\n"
"	.subsection 2\n"
"	.subsection 2\n"
"3:	mov	%0, %%g1\n"
"3:	mov	%0, %%g1\n"
@@ -233,8 +236,9 @@ int __sched down_interruptible(struct semaphore *sem)
"	cmp	%%g1, %%g7\n"
"	cmp	%%g1, %%g7\n"
"	bne,pn	%%icc, 1b\n"
"	bne,pn	%%icc, 1b\n"
"	 cmp	%%g7, 1\n"
"	 cmp	%%g7, 1\n"
"	bl,pn	%%icc, 3f\n"
"	membar	#StoreLoad | #StoreStore\n"
"	membar	#StoreLoad | #StoreStore\n"
"	bl,pn	%%icc, 3f\n"
"	 nop\n"
"2:\n"
"2:\n"
"	.subsection 2\n"
"	.subsection 2\n"
"3:	mov	%2, %%g1\n"
"3:	mov	%2, %%g1\n"
+2 −1
Original line number Original line Diff line number Diff line
@@ -98,8 +98,9 @@ startup_continue:


	sethi		%hi(prom_entry_lock), %g2
	sethi		%hi(prom_entry_lock), %g2
1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
1:	ldstub		[%g2 + %lo(prom_entry_lock)], %g1
	brnz,pn		%g1, 1b
	membar		#StoreLoad | #StoreStore
	membar		#StoreLoad | #StoreStore
	brnz,pn		%g1, 1b
	 nop


	sethi		%hi(p1275buf), %g2
	sethi		%hi(p1275buf), %g2
	or		%g2, %lo(p1275buf), %g2
	or		%g2, %lo(p1275buf), %g2
+53 −50
Original line number Original line Diff line number Diff line
@@ -87,14 +87,17 @@
#define LOOP_CHUNK3(src, dest, len, branch_dest)		\
#define LOOP_CHUNK3(src, dest, len, branch_dest)		\
	MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)
	MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest)


#define DO_SYNC			membar	#Sync;
#define STORE_SYNC(dest, fsrc)				\
#define STORE_SYNC(dest, fsrc)				\
	EX_ST(STORE_BLK(%fsrc, %dest));			\
	EX_ST(STORE_BLK(%fsrc, %dest));			\
	add			%dest, 0x40, %dest;
	add			%dest, 0x40, %dest;	\
	DO_SYNC


#define STORE_JUMP(dest, fsrc, target)			\
#define STORE_JUMP(dest, fsrc, target)			\
	EX_ST(STORE_BLK(%fsrc, %dest));			\
	EX_ST(STORE_BLK(%fsrc, %dest));			\
	add			%dest, 0x40, %dest;	\
	add			%dest, 0x40, %dest;	\
	ba,pt			%xcc, target;
	ba,pt			%xcc, target;		\
	 nop;


#define FINISH_VISCHUNK(dest, f0, f1, left)	\
#define FINISH_VISCHUNK(dest, f0, f1, left)	\
	subcc			%left, 8, %left;\
	subcc			%left, 8, %left;\
@@ -239,17 +242,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	ba,pt		%xcc, 1b+4
	 faligndata	%f0, %f2, %f48
	 faligndata	%f0, %f2, %f48
1:	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
1:	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
	STORE_JUMP(o0, f48, 40f) membar #Sync
	STORE_JUMP(o0, f48, 40f)
2:	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
2:	FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
	STORE_JUMP(o0, f48, 48f) membar #Sync
	STORE_JUMP(o0, f48, 48f)
3:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
3:	FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
	FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
	STORE_JUMP(o0, f48, 56f) membar #Sync
	STORE_JUMP(o0, f48, 56f)


1:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
1:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -260,17 +263,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	ba,pt		%xcc, 1b+4
	 faligndata	%f2, %f4, %f48
	 faligndata	%f2, %f4, %f48
1:	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
1:	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
	STORE_JUMP(o0, f48, 41f) membar #Sync
	STORE_JUMP(o0, f48, 41f)
2:	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
2:	FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
	STORE_JUMP(o0, f48, 49f) membar #Sync
	STORE_JUMP(o0, f48, 49f)
3:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
3:	FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
	FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
	STORE_JUMP(o0, f48, 57f) membar #Sync
	STORE_JUMP(o0, f48, 57f)


1:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
1:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -281,17 +284,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	ba,pt		%xcc, 1b+4
	 faligndata	%f4, %f6, %f48
	 faligndata	%f4, %f6, %f48
1:	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
1:	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
	STORE_JUMP(o0, f48, 42f) membar #Sync
	STORE_JUMP(o0, f48, 42f)
2:	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
2:	FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
	STORE_JUMP(o0, f48, 50f) membar #Sync
	STORE_JUMP(o0, f48, 50f)
3:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
3:	FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
	FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
	STORE_JUMP(o0, f48, 58f) membar #Sync
	STORE_JUMP(o0, f48, 58f)


1:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
1:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -302,17 +305,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	ba,pt		%xcc, 1b+4
	 faligndata	%f6, %f8, %f48
	 faligndata	%f6, %f8, %f48
1:	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
1:	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
	STORE_JUMP(o0, f48, 43f) membar #Sync
	STORE_JUMP(o0, f48, 43f)
2:	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
2:	FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
	STORE_JUMP(o0, f48, 51f) membar #Sync
	STORE_JUMP(o0, f48, 51f)
3:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
3:	FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
	FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
	STORE_JUMP(o0, f48, 59f) membar #Sync
	STORE_JUMP(o0, f48, 59f)


1:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
1:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -323,17 +326,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	ba,pt		%xcc, 1b+4
	 faligndata	%f8, %f10, %f48
	 faligndata	%f8, %f10, %f48
1:	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
1:	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
	STORE_JUMP(o0, f48, 44f) membar #Sync
	STORE_JUMP(o0, f48, 44f)
2:	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
2:	FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
	STORE_JUMP(o0, f48, 52f) membar #Sync
	STORE_JUMP(o0, f48, 52f)
3:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
3:	FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
	FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
	STORE_JUMP(o0, f48, 60f) membar #Sync
	STORE_JUMP(o0, f48, 60f)


1:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
1:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -344,17 +347,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	ba,pt		%xcc, 1b+4
	 faligndata	%f10, %f12, %f48
	 faligndata	%f10, %f12, %f48
1:	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
1:	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
	STORE_JUMP(o0, f48, 45f) membar #Sync
	STORE_JUMP(o0, f48, 45f)
2:	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
2:	FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
	STORE_JUMP(o0, f48, 53f) membar #Sync
	STORE_JUMP(o0, f48, 53f)
3:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
3:	FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
	FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
	STORE_JUMP(o0, f48, 61f) membar #Sync
	STORE_JUMP(o0, f48, 61f)


1:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
1:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -365,17 +368,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	ba,pt		%xcc, 1b+4
	 faligndata	%f12, %f14, %f48
	 faligndata	%f12, %f14, %f48
1:	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
1:	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
	STORE_JUMP(o0, f48, 46f) membar #Sync
	STORE_JUMP(o0, f48, 46f)
2:	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
2:	FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
	STORE_JUMP(o0, f48, 54f) membar #Sync
	STORE_JUMP(o0, f48, 54f)
3:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
3:	FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
	FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
	STORE_JUMP(o0, f48, 62f) membar #Sync
	STORE_JUMP(o0, f48, 62f)


1:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
1:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
	LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f)
@@ -386,17 +389,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
	ba,pt		%xcc, 1b+4
	ba,pt		%xcc, 1b+4
	 faligndata	%f14, %f16, %f48
	 faligndata	%f14, %f16, %f48
1:	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
1:	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
	STORE_JUMP(o0, f48, 47f) membar #Sync
	STORE_JUMP(o0, f48, 47f)
2:	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
2:	FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
	STORE_JUMP(o0, f48, 55f) membar #Sync
	STORE_JUMP(o0, f48, 55f)
3:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
3:	FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
	STORE_SYNC(o0, f48) membar #Sync
	STORE_SYNC(o0, f48)
	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
	FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
	STORE_JUMP(o0, f48, 63f) membar #Sync
	STORE_JUMP(o0, f48, 63f)


40:	FINISH_VISCHUNK(o0, f0,  f2,  g3)
40:	FINISH_VISCHUNK(o0, f0,  f2,  g3)
41:	FINISH_VISCHUNK(o0, f2,  f4,  g3)
41:	FINISH_VISCHUNK(o0, f2,  f4,  g3)
+13 −2
Original line number Original line Diff line number Diff line
@@ -72,7 +72,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3


	stda		%f48, [%g3 + %g1] ASI_BLK_P
	stda		%f48, [%g3 + %g1] ASI_BLK_P
5:	membar		#Sync
5:	membar		#Sync
	jmpl		%g7 + %g0, %g0
	ba,pt		%xcc, 80f
	 nop

	.align		32
80:	jmpl		%g7 + %g0, %g0
	 nop
	 nop


6:	ldub		[%g3 + TI_FPSAVED], %o5
6:	ldub		[%g3 + TI_FPSAVED], %o5
@@ -87,8 +91,11 @@ vis1: ldub [%g6 + TI_FPSAVED], %g3
	stda		%f32, [%g2 + %g1] ASI_BLK_P
	stda		%f32, [%g2 + %g1] ASI_BLK_P
	stda		%f48, [%g3 + %g1] ASI_BLK_P
	stda		%f48, [%g3 + %g1] ASI_BLK_P
	membar		#Sync
	membar		#Sync
	jmpl		%g7 + %g0, %g0
	ba,pt		%xcc, 80f
	 nop


	.align		32
80:	jmpl		%g7 + %g0, %g0
	 nop
	 nop


	.align		32
	.align		32
@@ -126,6 +133,10 @@ VISenterhalf:
	stda		%f0, [%g2 + %g1] ASI_BLK_P
	stda		%f0, [%g2 + %g1] ASI_BLK_P
	stda		%f16, [%g3 + %g1] ASI_BLK_P
	stda		%f16, [%g3 + %g1] ASI_BLK_P
	membar		#Sync
	membar		#Sync
	ba,pt		%xcc, 4f
	 nop

	.align		32
4:	and		%o5, FPRS_DU, %o5
4:	and		%o5, FPRS_DU, %o5
	jmpl		%g7 + %g0, %g0
	jmpl		%g7 + %g0, %g0
	 wr		%o5, FPRS_FEF, %fprs
	 wr		%o5, FPRS_FEF, %fprs
Loading