Pull branch 'for-rmk' of git://git.linaro.org/people/ardbiesheuvel/linux-arm into devel-stable (b4f656ee) · Commits · 戴 / test

arch/arm/Kconfig

+7 −0

Original line number	Diff line number	Diff line
		@@ -2176,6 +2176,13 @@ config NEON
		Say Y to include support code for NEON, the ARMv7 Advanced SIMD
		Extension.

		config KERNEL_MODE_NEON
		bool "Support for NEON in kernel mode"
		default n
		depends on NEON
		help
		Say Y to include support for NEON in kernel mode.

		endmenu

		menu "Userspace binary formats"

arch/arm/include/asm/neon.h

0 → 100644

+36 −0

Original line number	Diff line number	Diff line
		/*
		* linux/arch/arm/include/asm/neon.h
		*
		* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
		*
		* This program is free software; you can redistribute it and/or modify
		* it under the terms of the GNU General Public License version 2 as
		* published by the Free Software Foundation.
		*/

		#include <asm/hwcap.h>

		#define cpu_has_neon() (!!(elf_hwcap & HWCAP_NEON))

		#ifdef __ARM_NEON__

		/*
		* If you are affected by the BUILD_BUG below, it probably means that you are
		* using NEON code /and/ calling the kernel_neon_begin() function from the same
		* compilation unit. To prevent issues that may arise from GCC reordering or
		* generating(1) NEON instructions outside of these begin/end functions, the
		* only supported way of using NEON code in the kernel is by isolating it in a
		* separate compilation unit, and calling it from another unit from inside a
		* kernel_neon_begin/kernel_neon_end pair.
		*
		* (1) Current GCC (4.7) might generate NEON instructions at O3 level if
		* -mpfu=neon is set.
		*/

		#define kernel_neon_begin() \
		BUILD_BUG_ON_MSG(1, "kernel_neon_begin() called from NEON code")

		#else
		void kernel_neon_begin(void);
		#endif
		void kernel_neon_end(void);

arch/arm/include/asm/xor.h

+73 −0

Original line number	Diff line number	Diff line
		@@ -7,7 +7,10 @@
		* it under the terms of the GNU General Public License version 2 as
		* published by the Free Software Foundation.
		*/
		#include <linux/hardirq.h>
		#include <asm-generic/xor.h>
		#include <asm/hwcap.h>
		#include <asm/neon.h>

		#define __XOR(a1, a2) a1 ^= a2

		@@ -138,4 +141,74 @@ static struct xor_block_template xor_block_arm4regs = {
		xor_speed(&xor_block_arm4regs); \
		xor_speed(&xor_block_8regs); \
		xor_speed(&xor_block_32regs); \
		NEON_TEMPLATES; \
		} while (0)

		#ifdef CONFIG_KERNEL_MODE_NEON

		extern struct xor_block_template const xor_block_neon_inner;

		static void
		xor_neon_2(unsigned long bytes, unsigned long p1, unsigned long p2)
		{
		if (in_interrupt()) {
		xor_arm4regs_2(bytes, p1, p2);
		} else {
		kernel_neon_begin();
		xor_block_neon_inner.do_2(bytes, p1, p2);
		kernel_neon_end();
		}
		}

		static void
		xor_neon_3(unsigned long bytes, unsigned long p1, unsigned long p2,
		unsigned long *p3)
		{
		if (in_interrupt()) {
		xor_arm4regs_3(bytes, p1, p2, p3);
		} else {
		kernel_neon_begin();
		xor_block_neon_inner.do_3(bytes, p1, p2, p3);
		kernel_neon_end();
		}
		}

		static void
		xor_neon_4(unsigned long bytes, unsigned long p1, unsigned long p2,
		unsigned long p3, unsigned long p4)
		{
		if (in_interrupt()) {
		xor_arm4regs_4(bytes, p1, p2, p3, p4);
		} else {
		kernel_neon_begin();
		xor_block_neon_inner.do_4(bytes, p1, p2, p3, p4);
		kernel_neon_end();
		}
		}

		static void
		xor_neon_5(unsigned long bytes, unsigned long p1, unsigned long p2,
		unsigned long p3, unsigned long p4, unsigned long *p5)
		{
		if (in_interrupt()) {
		xor_arm4regs_5(bytes, p1, p2, p3, p4, p5);
		} else {
		kernel_neon_begin();
		xor_block_neon_inner.do_5(bytes, p1, p2, p3, p4, p5);
		kernel_neon_end();
		}
		}

		static struct xor_block_template xor_block_neon = {
		.name = "neon",
		.do_2 = xor_neon_2,
		.do_3 = xor_neon_3,
		.do_4 = xor_neon_4,
		.do_5 = xor_neon_5
		};

		#define NEON_TEMPLATES \
		do { if (cpu_has_neon()) xor_speed(&xor_block_neon); } while (0)
		#else
		#define NEON_TEMPLATES
		#endif

arch/arm/lib/Makefile

+6 −0

Original line number	Diff line number	Diff line
		@@ -45,3 +45,9 @@ lib-$(CONFIG_ARCH_SHARK) += io-shark.o

		$(obj)/csumpartialcopy.o: $(obj)/csumpartialcopygeneric.S
		$(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S

		ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
		NEON_FLAGS := -mfloat-abi=softfp -mfpu=neon
		CFLAGS_xor-neon.o += $(NEON_FLAGS)
		lib-$(CONFIG_XOR_BLOCKS) += xor-neon.o
		endif

arch/arm/lib/xor-neon.c

0 → 100644

+42 −0

Original line number	Diff line number	Diff line
		/*
		* linux/arch/arm/lib/xor-neon.c
		*
		* Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
		*
		* This program is free software; you can redistribute it and/or modify
		* it under the terms of the GNU General Public License version 2 as
		* published by the Free Software Foundation.
		*/

		#include <linux/raid/xor.h>

		#ifndef __ARM_NEON__
		#error You should compile this file with '-mfloat-abi=softfp -mfpu=neon'
		#endif

		/*
		* Pull in the reference implementations while instructing GCC (through
		* -ftree-vectorize) to attempt to exploit implicit parallelism and emit
		* NEON instructions.
		*/
		#if __GNUC__ > 4 \|\| (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
		#pragma GCC optimize "tree-vectorize"
		#else
		/*
		* While older versions of GCC do not generate incorrect code, they fail to
		* recognize the parallel nature of these functions, and emit plain ARM code,
		* which is known to be slower than the optimized ARM code in asm-arm/xor.h.
		*/
		#warning This code requires at least version 4.6 of GCC
		#endif

		#pragma GCC diagnostic ignored "-Wunused-variable"
		#include <asm-generic/xor.h>

		struct xor_block_template const xor_block_neon_inner = {
		.name = "__inner_neon__",
		.do_2 = xor_8regs_2,
		.do_3 = xor_8regs_3,
		.do_4 = xor_8regs_4,
		.do_5 = xor_8regs_5,
		};

Admin message