Commit 0b17c967 authored by Paul Burton's avatar Paul Burton Committed by Ralf Baechle
Browse files

MIPS: Use queued spinlocks (qspinlock)



This patch switches MIPS to make use of generically implemented queued
spinlocks, rather than the ticket spinlocks used previously. This allows
us to drop a whole load of inline assembly, share more generic code, and
is also a performance win.

Results from running the AIM7 short workload on a MIPS Creator Ci40 (ie.
2 core 2 thread interAptiv CPU clocked at 546MHz) with v4.12-rc4
pistachio_defconfig, with ftrace disabled due to a current bug, and both
with & without use of queued rwlocks & spinlocks:

  Forks | v4.12-rc4 | +qlocks  | Change
 -------|-----------|----------|--------
     10 | 52630.32  | 53316.31 | +1.01%
     20 | 51777.80  | 52623.15 | +1.02%
     30 | 51645.92  | 52517.26 | +1.02%
     40 | 51634.88  | 52419.89 | +1.02%
     50 | 51506.75  | 52307.81 | +1.02%
     60 | 51500.74  | 52322.72 | +1.02%
     70 | 51434.81  | 52288.60 | +1.02%
     80 | 51423.22  | 52434.85 | +1.02%
     90 | 51428.65  | 52410.10 | +1.02%

The kernels used for these tests also had my "MIPS: Hardcode cpu_has_*
where known at compile time due to ISA" patch applied, which allows the
kernel_uses_llsc checks in cmpxchg() & xchg() to be optimised away at
compile time.

Signed-off-by: default avatarPaul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16358/


Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent 25da4e9d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@ config MIPS
	select ARCH_USE_BUILTIN_BSWAP
	select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
	select ARCH_USE_QUEUED_RWLOCKS
	select ARCH_USE_QUEUED_SPINLOCKS
	select ARCH_WANT_IPC_PARSE_VERSION
	select BUILDTIME_EXTABLE_SORT
	select CLONE_BACKWARDS
+1 −0
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@ generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += qrwlock.h
generic-y += qspinlock.h
generic-y += sections.h
generic-y += segment.h
generic-y += serial.h
+1 −209
Original line number Diff line number Diff line
@@ -9,217 +9,9 @@
#ifndef _ASM_SPINLOCK_H
#define _ASM_SPINLOCK_H

#include <linux/compiler.h>

#include <asm/barrier.h>
#include <asm/processor.h>
#include <asm/qrwlock.h>
#include <asm/compiler.h>
#include <asm/war.h>

/*
 * Your basic SMP spinlocks, allowing only a single CPU anywhere
 *
 * Simple spin lock operations.	 There are two variants, one clears IRQ's
 * on the local processor, one does not.
 *
 * These are fair FIFO ticket locks
 *
 * (the type definitions are in asm/spinlock_types.h)
 */


/*
 * Ticket locks are conceptually two parts, one indicating the current head of
 * the queue, and the other indicating the current tail. The lock is acquired
 * by atomically noting the tail and incrementing it by one (thus adding
 * ourself to the queue and noting our position), then waiting until the head
 * becomes equal to the the initial value of the tail.
 */

static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
	u32 counters = ACCESS_ONCE(lock->lock);

	return ((counters >> 16) ^ counters) & 0xffff;
}

static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
	return lock.h.serving_now == lock.h.ticket;
}

#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)

static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
	u16 owner = READ_ONCE(lock->h.serving_now);
	smp_rmb();
	for (;;) {
		arch_spinlock_t tmp = READ_ONCE(*lock);

		if (tmp.h.serving_now == tmp.h.ticket ||
		    tmp.h.serving_now != owner)
			break;

		cpu_relax();
	}
	smp_acquire__after_ctrl_dep();
}

static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
	u32 counters = ACCESS_ONCE(lock->lock);

	return (((counters >> 16) - counters) & 0xffff) > 1;
}
#define arch_spin_is_contended	arch_spin_is_contended

static inline void arch_spin_lock(arch_spinlock_t *lock)
{
	int my_ticket;
	int tmp;
	int inc = 0x10000;

	if (R10000_LLSC_WAR) {
		__asm__ __volatile__ (
		"	.set push		# arch_spin_lock	\n"
		"	.set noreorder					\n"
		"							\n"
		"1:	ll	%[ticket], %[ticket_ptr]		\n"
		"	addu	%[my_ticket], %[ticket], %[inc]		\n"
		"	sc	%[my_ticket], %[ticket_ptr]		\n"
		"	beqzl	%[my_ticket], 1b			\n"
		"	 nop						\n"
		"	srl	%[my_ticket], %[ticket], 16		\n"
		"	andi	%[ticket], %[ticket], 0xffff		\n"
		"	bne	%[ticket], %[my_ticket], 4f		\n"
		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
		"2:							\n"
		"	.subsection 2					\n"
		"4:	andi	%[ticket], %[ticket], 0xffff		\n"
		"	sll	%[ticket], 5				\n"
		"							\n"
		"6:	bnez	%[ticket], 6b				\n"
		"	 subu	%[ticket], 1				\n"
		"							\n"
		"	lhu	%[ticket], %[serving_now_ptr]		\n"
		"	beq	%[ticket], %[my_ticket], 2b		\n"
		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
		"	b	4b					\n"
		"	 subu	%[ticket], %[ticket], 1			\n"
		"	.previous					\n"
		"	.set pop					\n"
		: [ticket_ptr] "+" GCC_OFF_SMALL_ASM() (lock->lock),
		  [serving_now_ptr] "+m" (lock->h.serving_now),
		  [ticket] "=&r" (tmp),
		  [my_ticket] "=&r" (my_ticket)
		: [inc] "r" (inc));
	} else {
		__asm__ __volatile__ (
		"	.set push		# arch_spin_lock	\n"
		"	.set noreorder					\n"
		"							\n"
		"1:	ll	%[ticket], %[ticket_ptr]		\n"
		"	addu	%[my_ticket], %[ticket], %[inc]		\n"
		"	sc	%[my_ticket], %[ticket_ptr]		\n"
		"	beqz	%[my_ticket], 1b			\n"
		"	 srl	%[my_ticket], %[ticket], 16		\n"
		"	andi	%[ticket], %[ticket], 0xffff		\n"
		"	bne	%[ticket], %[my_ticket], 4f		\n"
		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
		"2:	.insn						\n"
		"	.subsection 2					\n"
		"4:	andi	%[ticket], %[ticket], 0xffff		\n"
		"	sll	%[ticket], 5				\n"
		"							\n"
		"6:	bnez	%[ticket], 6b				\n"
		"	 subu	%[ticket], 1				\n"
		"							\n"
		"	lhu	%[ticket], %[serving_now_ptr]		\n"
		"	beq	%[ticket], %[my_ticket], 2b		\n"
		"	 subu	%[ticket], %[my_ticket], %[ticket]	\n"
		"	b	4b					\n"
		"	 subu	%[ticket], %[ticket], 1			\n"
		"	.previous					\n"
		"	.set pop					\n"
		: [ticket_ptr] "+" GCC_OFF_SMALL_ASM() (lock->lock),
		  [serving_now_ptr] "+m" (lock->h.serving_now),
		  [ticket] "=&r" (tmp),
		  [my_ticket] "=&r" (my_ticket)
		: [inc] "r" (inc));
	}

	smp_llsc_mb();
}

static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
	unsigned int serving_now = lock->h.serving_now + 1;
	wmb();
	lock->h.serving_now = (u16)serving_now;
	nudge_writes();
}

static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
{
	int tmp, tmp2, tmp3;
	int inc = 0x10000;

	if (R10000_LLSC_WAR) {
		__asm__ __volatile__ (
		"	.set push		# arch_spin_trylock	\n"
		"	.set noreorder					\n"
		"							\n"
		"1:	ll	%[ticket], %[ticket_ptr]		\n"
		"	srl	%[my_ticket], %[ticket], 16		\n"
		"	andi	%[now_serving], %[ticket], 0xffff	\n"
		"	bne	%[my_ticket], %[now_serving], 3f	\n"
		"	 addu	%[ticket], %[ticket], %[inc]		\n"
		"	sc	%[ticket], %[ticket_ptr]		\n"
		"	beqzl	%[ticket], 1b				\n"
		"	 li	%[ticket], 1				\n"
		"2:							\n"
		"	.subsection 2					\n"
		"3:	b	2b					\n"
		"	 li	%[ticket], 0				\n"
		"	.previous					\n"
		"	.set pop					\n"
		: [ticket_ptr] "+" GCC_OFF_SMALL_ASM() (lock->lock),
		  [ticket] "=&r" (tmp),
		  [my_ticket] "=&r" (tmp2),
		  [now_serving] "=&r" (tmp3)
		: [inc] "r" (inc));
	} else {
		__asm__ __volatile__ (
		"	.set push		# arch_spin_trylock	\n"
		"	.set noreorder					\n"
		"							\n"
		"1:	ll	%[ticket], %[ticket_ptr]		\n"
		"	srl	%[my_ticket], %[ticket], 16		\n"
		"	andi	%[now_serving], %[ticket], 0xffff	\n"
		"	bne	%[my_ticket], %[now_serving], 3f	\n"
		"	 addu	%[ticket], %[ticket], %[inc]		\n"
		"	sc	%[ticket], %[ticket_ptr]		\n"
		"	beqz	%[ticket], 1b				\n"
		"	 li	%[ticket], 1				\n"
		"2:	.insn						\n"
		"	.subsection 2					\n"
		"3:	b	2b					\n"
		"	 li	%[ticket], 0				\n"
		"	.previous					\n"
		"	.set pop					\n"
		: [ticket_ptr] "+" GCC_OFF_SMALL_ASM() (lock->lock),
		  [ticket] "=&r" (tmp),
		  [my_ticket] "=&r" (tmp2),
		  [now_serving] "=&r" (tmp3)
		: [inc] "r" (inc));
	}

	smp_llsc_mb();

	return tmp;
}
#include <asm/qspinlock.h>

#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+1 −23
Original line number Diff line number Diff line
#ifndef _ASM_SPINLOCK_TYPES_H
#define _ASM_SPINLOCK_TYPES_H

#include <linux/types.h>

#include <asm/byteorder.h>

typedef union {
	/*
	 * bits	 0..15 : serving_now
	 * bits 16..31 : ticket
	 */
	u32 lock;
	struct {
#ifdef __BIG_ENDIAN
		u16 ticket;
		u16 serving_now;
#else
		u16 serving_now;
		u16 ticket;
#endif
	} h;
} arch_spinlock_t;

#define __ARCH_SPIN_LOCK_UNLOCKED	{ .lock = 0 }

#include <asm-generic/qspinlock_types.h>
#include <asm-generic/qrwlock_types.h>

#endif