Commit b05ae4ee authored by Kyle Moffett's avatar Kyle Moffett Committed by Benjamin Herrenschmidt
Browse files

powerpc: Remove duplicate cacheable_memcpy/memzero functions



These functions are only used from one place each.  If the cacheable_*
versions really are more efficient, then those changes should be
migrated into the common code instead.

NOTE: The old routines are just flat buggy on kernels that support
      hardware with different cacheline sizes.

Signed-off-by: default avatarKyle Moffett <Kyle.D.Moffett@boeing.com>
Signed-off-by: default avatarBenjamin Herrenschmidt <benh@kernel.crashing.org>
parent 9eccca08
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -76,9 +76,6 @@ extern void _set_L3CR(unsigned long);
#define _set_L3CR(val)	do { } while(0)
#endif

extern void cacheable_memzero(void *p, unsigned int nb);
extern void *cacheable_memcpy(void *, const void *, unsigned int);

#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_CACHE_H */
+0 −127
Original line number Diff line number Diff line
@@ -69,54 +69,6 @@ CACHELINE_BYTES = L1_CACHE_BYTES
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
CACHELINE_MASK = (L1_CACHE_BYTES-1)

/*
 * Use dcbz on the complete cache lines in the destination
 * to set them to zero.  This requires that the destination
 * area is cacheable.  -- paulus
 */
_GLOBAL(cacheable_memzero)
	mr	r5,r4
	li	r4,0
	addi	r6,r3,-4
	cmplwi	0,r5,4
	blt	7f
	stwu	r4,4(r6)
	beqlr
	andi.	r0,r6,3
	add	r5,r0,r5
	subf	r6,r0,r6
	clrlwi	r7,r6,32-LG_CACHELINE_BYTES
	add	r8,r7,r5
	srwi	r9,r8,LG_CACHELINE_BYTES
	addic.	r9,r9,-1	/* total number of complete cachelines */
	ble	2f
	xori	r0,r7,CACHELINE_MASK & ~3
	srwi.	r0,r0,2
	beq	3f
	mtctr	r0
4:	stwu	r4,4(r6)
	bdnz	4b
3:	mtctr	r9
	li	r7,4
10:	dcbz	r7,r6
	addi	r6,r6,CACHELINE_BYTES
	bdnz	10b
	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
	addi	r5,r5,4
2:	srwi	r0,r5,2
	mtctr	r0
	bdz	6f
1:	stwu	r4,4(r6)
	bdnz	1b
6:	andi.	r5,r5,3
7:	cmpwi	0,r5,0
	beqlr
	mtctr	r5
	addi	r6,r6,3
8:	stbu	r4,1(r6)
	bdnz	8b
	blr

_GLOBAL(memset)
	rlwimi	r4,r4,8,16,23
	rlwimi	r4,r4,16,0,15
@@ -142,85 +94,6 @@ _GLOBAL(memset)
	bdnz	8b
	blr

/*
 * This version uses dcbz on the complete cache lines in the
 * destination area to reduce memory traffic.  This requires that
 * the destination area is cacheable.
 * We only use this version if the source and dest don't overlap.
 * -- paulus.
 */
_GLOBAL(cacheable_memcpy)
	add	r7,r3,r5		/* test if the src & dst overlap */
	add	r8,r4,r5
	cmplw	0,r4,r7
	cmplw	1,r3,r8
	crand	0,0,4			/* cr0.lt &= cr1.lt */
	blt	memcpy			/* if regions overlap */

	addi	r4,r4,-4
	addi	r6,r3,-4
	neg	r0,r3
	andi.	r0,r0,CACHELINE_MASK	/* # bytes to start of cache line */
	beq	58f

	cmplw	0,r5,r0			/* is this more than total to do? */
	blt	63f			/* if not much to do */
	andi.	r8,r0,3			/* get it word-aligned first */
	subf	r5,r0,r5
	mtctr	r8
	beq+	61f
70:	lbz	r9,4(r4)		/* do some bytes */
	stb	r9,4(r6)
	addi	r4,r4,1
	addi	r6,r6,1
	bdnz	70b
61:	srwi.	r0,r0,2
	mtctr	r0
	beq	58f
72:	lwzu	r9,4(r4)		/* do some words */
	stwu	r9,4(r6)
	bdnz	72b

58:	srwi.	r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
	clrlwi	r5,r5,32-LG_CACHELINE_BYTES
	li	r11,4
	mtctr	r0
	beq	63f
53:
	dcbz	r11,r6
	COPY_16_BYTES
#if L1_CACHE_BYTES >= 32
	COPY_16_BYTES
#if L1_CACHE_BYTES >= 64
	COPY_16_BYTES
	COPY_16_BYTES
#if L1_CACHE_BYTES >= 128
	COPY_16_BYTES
	COPY_16_BYTES
	COPY_16_BYTES
	COPY_16_BYTES
#endif
#endif
#endif
	bdnz	53b

63:	srwi.	r0,r5,2
	mtctr	r0
	beq	64f
30:	lwzu	r0,4(r4)
	stwu	r0,4(r6)
	bdnz	30b

64:	andi.	r0,r5,3
	mtctr	r0
	beq+	65f
40:	lbz	r0,4(r4)
	stb	r0,4(r6)
	addi	r4,r4,1
	addi	r6,r6,1
	bdnz	40b
65:	blr

_GLOBAL(memmove)
	cmplw	0,r3,r4
	bgt	backwards_memcpy
+0 −4
Original line number Diff line number Diff line
@@ -8,10 +8,6 @@ EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memcmp);
EXPORT_SYMBOL(memchr);
#ifdef CONFIG_PPC32
EXPORT_SYMBOL(cacheable_memcpy);
EXPORT_SYMBOL(cacheable_memzero);
#endif

EXPORT_SYMBOL(strcpy);
EXPORT_SYMBOL(strncpy);
+1 −1
Original line number Diff line number Diff line
@@ -224,7 +224,7 @@ void __init MMU_init_hw(void)
	 */
	if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322);
	Hash = __va(memblock_alloc(Hash_size, Hash_size));
	cacheable_memzero(Hash, Hash_size);
	memset(Hash, 0, Hash_size);
	_SDR1 = __pa(Hash) | SDR1_LOW_BITS;

	Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
+2 −10
Original line number Diff line number Diff line
@@ -79,13 +79,6 @@ MODULE_AUTHOR
    ("Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>");
MODULE_LICENSE("GPL");

/*
 * PPC64 doesn't (yet) have a cacheable_memcpy
 */
#ifdef CONFIG_PPC64
#define cacheable_memcpy(d,s,n) memcpy((d),(s),(n))
#endif

/* minimum number of free TX descriptors required to wake up TX process */
#define EMAC_TX_WAKEUP_THRESH		(NUM_TX_BUFF / 4)

@@ -1673,7 +1666,7 @@ static inline int emac_rx_sg_append(struct emac_instance *dev, int slot)
			dev_kfree_skb(dev->rx_sg_skb);
			dev->rx_sg_skb = NULL;
		} else {
			cacheable_memcpy(skb_tail_pointer(dev->rx_sg_skb),
			memcpy(skb_tail_pointer(dev->rx_sg_skb),
					 dev->rx_skb[slot]->data, len);
			skb_put(dev->rx_sg_skb, len);
			emac_recycle_rx_skb(dev, slot, len);
@@ -1730,8 +1723,7 @@ static int emac_poll_rx(void *param, int budget)
				goto oom;

			skb_reserve(copy_skb, EMAC_RX_SKB_HEADROOM + 2);
			cacheable_memcpy(copy_skb->data - 2, skb->data - 2,
					 len + 2);
			memcpy(copy_skb->data - 2, skb->data - 2, len + 2);
			emac_recycle_rx_skb(dev, slot, len);
			skb = copy_skb;
		} else if (unlikely(emac_alloc_rx_skb(dev, slot, GFP_ATOMIC)))