Commit 25da4e9d authored by Paul Burton's avatar Paul Burton Committed by Ralf Baechle
Browse files

MIPS: Use queued read/write locks (qrwlock)



This patch switches MIPS to make use of generically implemented queued
read/write locks, rather than the custom implementation used previously.
This allows us to drop a whole load of inline assembly, share more
generic code, and is also a performance win.

Results from running the AIM7 short workload on a MIPS Creator Ci40 (ie.
2 core 2 thread interAptiv CPU clocked at 546MHz) with v4.12-rc4
pistachio_defconfig, with ftrace disabled due to a current bug, and both
with & without use of queued rwlocks & spinlocks:

  Forks | v4.12-rc4 | +qlocks  | Change
 -------|-----------|----------|--------
     10 | 52630.32  | 53316.31 | +1.01%
     20 | 51777.80  | 52623.15 | +1.02%
     30 | 51645.92  | 52517.26 | +1.02%
     40 | 51634.88  | 52419.89 | +1.02%
     50 | 51506.75  | 52307.81 | +1.02%
     60 | 51500.74  | 52322.72 | +1.02%
     70 | 51434.81  | 52288.60 | +1.02%
     80 | 51423.22  | 52434.85 | +1.02%
     90 | 51428.65  | 52410.10 | +1.02%

The kernels used for these tests also had my "MIPS: Hardcode cpu_has_*
where known at compile time due to ISA" patch applied, which allows the
kernel_uses_llsc checks in cmpxchg() & xchg() to be optimised away at
compile time.

Signed-off-by: default avatarPaul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/16357/


Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent 4843cf8d
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -11,6 +11,7 @@ config MIPS
	select ARCH_SUPPORTS_UPROBES
	select ARCH_USE_BUILTIN_BSWAP
	select ARCH_USE_CMPXCHG_LOCKREF if 64BIT
	select ARCH_USE_QUEUED_RWLOCKS
	select ARCH_WANT_IPC_PARSE_VERSION
	select BUILDTIME_EXTABLE_SORT
	select CLONE_BACKWARDS
+1 −0
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@ generic-y += mm-arch-hooks.h
generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += qrwlock.h
generic-y += sections.h
generic-y += segment.h
generic-y += serial.h
+1 −215
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@

#include <asm/barrier.h>
#include <asm/processor.h>
#include <asm/qrwlock.h>
#include <asm/compiler.h>
#include <asm/war.h>

@@ -220,221 +221,6 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
	return tmp;
}

/*
 * Read-write spinlocks, allowing multiple readers but only one writer.
 *
 * NOTE! it is quite common to have readers in interrupts but no interrupt
 * writers. For those circumstances we can "mix" irq-safe locks - any writer
 * needs to get a irq-safe write-lock, but readers can get non-irqsafe
 * read-locks.
 */

/*
 * read_can_lock - would read_trylock() succeed?
 * @lock: the rwlock in question.
 */
#define arch_read_can_lock(rw)	((rw)->lock >= 0)

/*
 * write_can_lock - would write_trylock() succeed?
 * @lock: the rwlock in question.
 */
#define arch_write_can_lock(rw) (!(rw)->lock)

static inline void arch_read_lock(arch_rwlock_t *rw)
{
	unsigned int tmp;

	if (R10000_LLSC_WAR) {
		__asm__ __volatile__(
		"	.set	noreorder	# arch_read_lock	\n"
		"1:	ll	%1, %2					\n"
		"	bltz	%1, 1b					\n"
		"	 addu	%1, 1					\n"
		"	sc	%1, %0					\n"
		"	beqzl	%1, 1b					\n"
		"	 nop						\n"
		"	.set	reorder					\n"
		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
		: GCC_OFF_SMALL_ASM() (rw->lock)
		: "memory");
	} else {
		do {
			__asm__ __volatile__(
			"1:	ll	%1, %2	# arch_read_lock	\n"
			"	bltz	%1, 1b				\n"
			"	 addu	%1, 1				\n"
			"2:	sc	%1, %0				\n"
			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
			: GCC_OFF_SMALL_ASM() (rw->lock)
			: "memory");
		} while (unlikely(!tmp));
	}

	smp_llsc_mb();
}

static inline void arch_read_unlock(arch_rwlock_t *rw)
{
	unsigned int tmp;

	smp_mb__before_llsc();

	if (R10000_LLSC_WAR) {
		__asm__ __volatile__(
		"1:	ll	%1, %2		# arch_read_unlock	\n"
		"	addiu	%1, -1					\n"
		"	sc	%1, %0					\n"
		"	beqzl	%1, 1b					\n"
		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
		: GCC_OFF_SMALL_ASM() (rw->lock)
		: "memory");
	} else {
		do {
			__asm__ __volatile__(
			"1:	ll	%1, %2	# arch_read_unlock	\n"
			"	addiu	%1, -1				\n"
			"	sc	%1, %0				\n"
			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
			: GCC_OFF_SMALL_ASM() (rw->lock)
			: "memory");
		} while (unlikely(!tmp));
	}
}

static inline void arch_write_lock(arch_rwlock_t *rw)
{
	unsigned int tmp;

	if (R10000_LLSC_WAR) {
		__asm__ __volatile__(
		"	.set	noreorder	# arch_write_lock	\n"
		"1:	ll	%1, %2					\n"
		"	bnez	%1, 1b					\n"
		"	 lui	%1, 0x8000				\n"
		"	sc	%1, %0					\n"
		"	beqzl	%1, 1b					\n"
		"	 nop						\n"
		"	.set	reorder					\n"
		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
		: GCC_OFF_SMALL_ASM() (rw->lock)
		: "memory");
	} else {
		do {
			__asm__ __volatile__(
			"1:	ll	%1, %2	# arch_write_lock	\n"
			"	bnez	%1, 1b				\n"
			"	 lui	%1, 0x8000			\n"
			"2:	sc	%1, %0				\n"
			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp)
			: GCC_OFF_SMALL_ASM() (rw->lock)
			: "memory");
		} while (unlikely(!tmp));
	}

	smp_llsc_mb();
}

static inline void arch_write_unlock(arch_rwlock_t *rw)
{
	smp_mb__before_llsc();

	__asm__ __volatile__(
	"				# arch_write_unlock	\n"
	"	sw	$0, %0					\n"
	: "=m" (rw->lock)
	: "m" (rw->lock)
	: "memory");
}

static inline int arch_read_trylock(arch_rwlock_t *rw)
{
	unsigned int tmp;
	int ret;

	if (R10000_LLSC_WAR) {
		__asm__ __volatile__(
		"	.set	noreorder	# arch_read_trylock	\n"
		"	li	%2, 0					\n"
		"1:	ll	%1, %3					\n"
		"	bltz	%1, 2f					\n"
		"	 addu	%1, 1					\n"
		"	sc	%1, %0					\n"
		"	.set	reorder					\n"
		"	beqzl	%1, 1b					\n"
		"	 nop						\n"
		__WEAK_LLSC_MB
		"	li	%2, 1					\n"
		"2:							\n"
		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
		: GCC_OFF_SMALL_ASM() (rw->lock)
		: "memory");
	} else {
		__asm__ __volatile__(
		"	.set	noreorder	# arch_read_trylock	\n"
		"	li	%2, 0					\n"
		"1:	ll	%1, %3					\n"
		"	bltz	%1, 2f					\n"
		"	 addu	%1, 1					\n"
		"	sc	%1, %0					\n"
		"	beqz	%1, 1b					\n"
		"	 nop						\n"
		"	.set	reorder					\n"
		__WEAK_LLSC_MB
		"	li	%2, 1					\n"
		"2:	.insn						\n"
		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
		: GCC_OFF_SMALL_ASM() (rw->lock)
		: "memory");
	}

	return ret;
}

static inline int arch_write_trylock(arch_rwlock_t *rw)
{
	unsigned int tmp;
	int ret;

	if (R10000_LLSC_WAR) {
		__asm__ __volatile__(
		"	.set	noreorder	# arch_write_trylock	\n"
		"	li	%2, 0					\n"
		"1:	ll	%1, %3					\n"
		"	bnez	%1, 2f					\n"
		"	 lui	%1, 0x8000				\n"
		"	sc	%1, %0					\n"
		"	beqzl	%1, 1b					\n"
		"	 nop						\n"
		__WEAK_LLSC_MB
		"	li	%2, 1					\n"
		"	.set	reorder					\n"
		"2:							\n"
		: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp), "=&r" (ret)
		: GCC_OFF_SMALL_ASM() (rw->lock)
		: "memory");
	} else {
		do {
			__asm__ __volatile__(
			"	ll	%1, %3	# arch_write_trylock	\n"
			"	li	%2, 0				\n"
			"	bnez	%1, 2f				\n"
			"	lui	%1, 0x8000			\n"
			"	sc	%1, %0				\n"
			"	li	%2, 1				\n"
			"2:	.insn					\n"
			: "=" GCC_OFF_SMALL_ASM() (rw->lock), "=&r" (tmp),
			  "=&r" (ret)
			: GCC_OFF_SMALL_ASM() (rw->lock)
			: "memory");
		} while (unlikely(!tmp));

		smp_llsc_mb();
	}

	return ret;
}

#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)

+1 −9
Original line number Diff line number Diff line
#ifndef _ASM_SPINLOCK_TYPES_H
#define _ASM_SPINLOCK_TYPES_H

#ifndef __LINUX_SPINLOCK_TYPES_H
# error "please don't include this file directly"
#endif

#include <linux/types.h>

#include <asm/byteorder.h>
@@ -28,10 +24,6 @@ typedef union {

#define __ARCH_SPIN_LOCK_UNLOCKED	{ .lock = 0 }

typedef struct {
	volatile unsigned int lock;
} arch_rwlock_t;

#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
#include <asm-generic/qrwlock_types.h>

#endif