Unverified Commit 68dec269 authored by Maciej W. Rozycki's avatar Maciej W. Rozycki Committed by Paul Burton
Browse files

MIPS: memset: Limit excessive `noreorder' assembly mode use



Rewrite to use the `reorder' assembly mode and remove manually scheduled
delay slots except where GAS cannot schedule a delay-slot instruction
due to a data dependency or a section switch (as is the case with the EX
macro).  No change in machine code produced.

Signed-off-by: default avatarMaciej W. Rozycki <macro@linux-mips.org>
[paul.burton@mips.com:
  Fix conflict with commit 932afdee ("MIPS: Add Kconfig variable for
  CPUs with unaligned load/store instructions")]
Signed-off-by: default avatarPaul Burton <paul.burton@mips.com>
Patchwork: https://patchwork.linux-mips.org/patch/20834/
Cc: Ralf Baechle <ralf@linux-mips.org>
parent 2f7619ae
Loading
Loading
Loading
Loading
+24 −24
Original line number Diff line number Diff line
@@ -78,7 +78,6 @@
#endif
	.endm

	.set	noreorder
	.align	5

	/*
@@ -94,13 +93,16 @@
	.endif

	sltiu		t0, a2, STORSIZE	/* very small region? */
	.set		noreorder
	bnez		t0, .Lsmall_memset\@
	 andi		t0, a0, STORMASK	/* aligned? */
	.set		reorder

#ifdef CONFIG_CPU_MICROMIPS
	move		t8, a1			/* used by 'swp' instruction */
	move		t9, a1
#endif
	.set		noreorder
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
	beqz		t0, 1f
	 PTR_SUBU	t0, STORSIZE		/* alignment in bytes */
@@ -111,6 +113,7 @@
	 PTR_SUBU	t0, AT			/* alignment in bytes */
	.set		at
#endif
	.set		reorder

#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
	R10KCBARRIER(0(ra))
@@ -125,8 +128,10 @@
#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
#define STORE_BYTE(N)				\
	EX(sb, a1, N(a0), .Lbyte_fixup\@);	\
	.set		noreorder;		\
	beqz		t0, 0f;			\
	PTR_ADDU	t0, 1;
	 PTR_ADDU	t0, 1;			\
	.set		reorder;

	PTR_ADDU	a2, t0			/* correct size */
	PTR_ADDU	t0, 1
@@ -148,16 +153,14 @@
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
1:	ori		t1, a2, 0x3f		/* # of full blocks */
	xori		t1, 0x3f
	beqz		t1, .Lmemset_partial\@	/* no block to fill */
	andi		t0, a2, 0x40-STORSIZE
	beqz		t1, .Lmemset_partial\@	/* no block to fill */

	PTR_ADDU	t1, a0			/* end address */
	.set		reorder
1:	PTR_ADDIU	a0, 64
	R10KCBARRIER(0(ra))
	f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode
	bne		t1, a0, 1b
	.set		noreorder

.Lmemset_partial\@:
	R10KCBARRIER(0(ra))
@@ -173,20 +176,18 @@
	PTR_SUBU	t1, AT
	.set		at
#endif
	jr		t1
	PTR_ADDU	a0, t0			/* dest ptr */
	jr		t1

	.set		push
	.set		noreorder
	.set		nomacro
	/* ... but first do longs ... */
	f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode
2:	.set		pop
	andi		a2, STORMASK		/* At most one long to go */
2:	andi		a2, STORMASK		/* At most one long to go */

	.set		noreorder
	beqz		a2, 1f
#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
	 PTR_ADDU	a0, a2			/* What's left */
	.set		reorder
	R10KCBARRIER(0(ra))
#ifdef __MIPSEB__
	EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
@@ -195,6 +196,7 @@
#endif
#else
	 PTR_SUBU	t0, $0, a2
	.set		reorder
	move		a2, zero		/* No remaining longs */
	PTR_ADDIU	t0, 1
	STORE_BYTE(0)
@@ -210,20 +212,22 @@
#endif
0:
#endif
1:	jr		ra
	 move		a2, zero
1:	move		a2, zero
	jr		ra

.Lsmall_memset\@:
	beqz		a2, 2f
	PTR_ADDU	t1, a0, a2
	beqz		a2, 2f

1:	PTR_ADDIU	a0, 1			/* fill bytewise */
	R10KCBARRIER(0(ra))
	.set		noreorder
	bne		t1, a0, 1b
	 EX(sb, a1, -1(a0), .Lsmall_fixup\@)
	.set		reorder

2:	jr		ra			/* done */
	 move		a2, zero
2:	move		a2, zero
	jr		ra			/* done */
	.if __memset == 1
	END(memset)
	.set __memset, 0
@@ -237,14 +241,13 @@
	 *      a2     =             a2                -              t0                   + 1
	 */
	PTR_SUBU	a2, t0
	jr		ra
	PTR_ADDIU	a2, 1
	jr		ra
#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */

.Lfirst_fixup\@:
	/* unset_bytes already in a2 */
	jr	ra
	 nop

.Lfwd_fixup\@:
	/*
@@ -255,8 +258,8 @@
	andi		a2, 0x3f
	LONG_L		t0, THREAD_BUADDR(t0)
	LONG_ADDU	a2, t1
	jr		ra
	LONG_SUBU	a2, t0
	jr		ra

.Lpartial_fixup\@:
	/*
@@ -267,24 +270,21 @@
	andi		a2, STORMASK
	LONG_L		t0, THREAD_BUADDR(t0)
	LONG_ADDU	a2, a0
	jr		ra
	LONG_SUBU	a2, t0
	jr		ra

.Llast_fixup\@:
	/* unset_bytes already in a2 */
	jr		ra
	 nop

.Lsmall_fixup\@:
	/*
	 * unset_bytes = end_addr - current_addr + 1
	 *      a2     =    t1    -      a0      + 1
	 */
	.set		reorder
	PTR_SUBU	a2, t1, a0
	PTR_ADDIU	a2, 1
	jr		ra
	.set		noreorder

	.endm

@@ -298,8 +298,8 @@

LEAF(memset)
EXPORT_SYMBOL(memset)
	beqz		a1, 1f
	move		v0, a0			/* result */
	beqz		a1, 1f

	andi		a1, 0xff		/* spread fillword */
	LONG_SLL		t1, a1, 8