Commit 019cd469 authored by Ard Biesheuvel's avatar Ard Biesheuvel Committed by Herbert Xu
Browse files

crypto: arm64/aes-ce-cipher - move assembler code to .S file



Most crypto drivers involving kernel mode NEON take care to put the code
that actually touches the NEON register file in a separate compilation
unit, to prevent the compiler from reordering code that preserves or
restores the NEON context with code that may corrupt it. This is
necessary because we currently have no way to express the restrictions
imposed upon use of the NEON in kernel mode in a way that the compiler
understands.

However, in the case of aes-ce-cipher, it did not seem unreasonable to
deviate from this rule, given how it does not seem possible for the
compiler to reorder cross object function calls with asm blocks whose
in- and output constraints reflect that it reads from and writes to
memory.

Now that LTO is being proposed for the arm64 kernel, it is time to
revisit this. The link time optimization may replace the function
calls to kernel_neon_begin() and kernel_neon_end() with instantiations
of the IR that make up its implementation, allowing further reordering
with the asm block.

So let's clean this up, and move the asm() blocks into a separate .S
file.

Signed-off-by: default avatarArd Biesheuvel <ard.biesheuvel@linaro.org>
Reviewed-By: default avatarNick Desaulniers <ndesaulniers@google.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 1964e333
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -24,7 +24,7 @@ obj-$(CONFIG_CRYPTO_CRC32_ARM64_CE) += crc32-ce.o
crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o

obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
CFLAGS_aes-ce-cipher.o += -march=armv8-a+crypto
aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o

obj-$(CONFIG_CRYPTO_AES_ARM64_CE_CCM) += aes-ce-ccm.o
aes-ce-ccm-y := aes-ce-ccm-glue.o aes-ce-ccm-core.o
+87 −0
Original line number Diff line number Diff line
/*
 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/linkage.h>
#include <asm/assembler.h>

	.arch		armv8-a+crypto

ENTRY(__aes_ce_encrypt)
	sub		w3, w3, #2
	ld1		{v0.16b}, [x2]
	ld1		{v1.4s}, [x0], #16
	cmp		w3, #10
	bmi		0f
	bne		3f
	mov		v3.16b, v1.16b
	b		2f
0:	mov		v2.16b, v1.16b
	ld1		{v3.4s}, [x0], #16
1:	aese		v0.16b, v2.16b
	aesmc		v0.16b, v0.16b
2:	ld1		{v1.4s}, [x0], #16
	aese		v0.16b, v3.16b
	aesmc		v0.16b, v0.16b
3:	ld1		{v2.4s}, [x0], #16
	subs		w3, w3, #3
	aese		v0.16b, v1.16b
	aesmc		v0.16b, v0.16b
	ld1		{v3.4s}, [x0], #16
	bpl		1b
	aese		v0.16b, v2.16b
	eor		v0.16b, v0.16b, v3.16b
	st1		{v0.16b}, [x1]
	ret
ENDPROC(__aes_ce_encrypt)

ENTRY(__aes_ce_decrypt)
	sub		w3, w3, #2
	ld1		{v0.16b}, [x2]
	ld1		{v1.4s}, [x0], #16
	cmp		w3, #10
	bmi		0f
	bne		3f
	mov		v3.16b, v1.16b
	b		2f
0:	mov		v2.16b, v1.16b
	ld1		{v3.4s}, [x0], #16
1:	aesd		v0.16b, v2.16b
	aesimc		v0.16b, v0.16b
2:	ld1		{v1.4s}, [x0], #16
	aesd		v0.16b, v3.16b
	aesimc		v0.16b, v0.16b
3:	ld1		{v2.4s}, [x0], #16
	subs		w3, w3, #3
	aesd		v0.16b, v1.16b
	aesimc		v0.16b, v0.16b
	ld1		{v3.4s}, [x0], #16
	bpl		1b
	aesd		v0.16b, v2.16b
	eor		v0.16b, v0.16b, v3.16b
	st1		{v0.16b}, [x1]
	ret
ENDPROC(__aes_ce_decrypt)

/*
 * __aes_ce_sub() - use the aese instruction to perform the AES sbox
 *                  substitution on each byte in 'input'
 */
ENTRY(__aes_ce_sub)
	dup		v1.4s, w0
	movi		v0.16b, #0
	aese		v0.16b, v1.16b
	umov		w0, v0.s[0]
	ret
ENDPROC(__aes_ce_sub)

ENTRY(__aes_ce_invert)
	ld1		{v0.4s}, [x1]
	aesimc		v1.16b, v0.16b
	st1		{v1.4s}, [x0]
	ret
ENDPROC(__aes_ce_invert)
+12 −103
Original line number Diff line number Diff line
@@ -29,6 +29,13 @@ struct aes_block {
	u8 b[AES_BLOCK_SIZE];
};

asmlinkage void __aes_ce_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
asmlinkage void __aes_ce_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);

asmlinkage u32 __aes_ce_sub(u32 l);
asmlinkage void __aes_ce_invert(struct aes_block *out,
				const struct aes_block *in);

static int num_rounds(struct crypto_aes_ctx *ctx)
{
	/*
@@ -44,10 +51,6 @@ static int num_rounds(struct crypto_aes_ctx *ctx)
static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
{
	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
	struct aes_block *out = (struct aes_block *)dst;
	struct aes_block const *in = (struct aes_block *)src;
	void *dummy0;
	int dummy1;

	if (!may_use_simd()) {
		__aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
@@ -55,49 +58,13 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
	}

	kernel_neon_begin();

	__asm__("	ld1	{v0.16b}, %[in]			;"
		"	ld1	{v1.4s}, [%[key]], #16		;"
		"	cmp	%w[rounds], #10			;"
		"	bmi	0f				;"
		"	bne	3f				;"
		"	mov	v3.16b, v1.16b			;"
		"	b	2f				;"
		"0:	mov	v2.16b, v1.16b			;"
		"	ld1	{v3.4s}, [%[key]], #16		;"
		"1:	aese	v0.16b, v2.16b			;"
		"	aesmc	v0.16b, v0.16b			;"
		"2:	ld1	{v1.4s}, [%[key]], #16		;"
		"	aese	v0.16b, v3.16b			;"
		"	aesmc	v0.16b, v0.16b			;"
		"3:	ld1	{v2.4s}, [%[key]], #16		;"
		"	subs	%w[rounds], %w[rounds], #3	;"
		"	aese	v0.16b, v1.16b			;"
		"	aesmc	v0.16b, v0.16b			;"
		"	ld1	{v3.4s}, [%[key]], #16		;"
		"	bpl	1b				;"
		"	aese	v0.16b, v2.16b			;"
		"	eor	v0.16b, v0.16b, v3.16b		;"
		"	st1	{v0.16b}, %[out]		;"

	:	[out]		"=Q"(*out),
		[key]		"=r"(dummy0),
		[rounds]	"=r"(dummy1)
	:	[in]		"Q"(*in),
				"1"(ctx->key_enc),
				"2"(num_rounds(ctx) - 2)
	:	"cc");

	__aes_ce_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
	kernel_neon_end();
}

static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
{
	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
	struct aes_block *out = (struct aes_block *)dst;
	struct aes_block const *in = (struct aes_block *)src;
	void *dummy0;
	int dummy1;

	if (!may_use_simd()) {
		__aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
@@ -105,62 +72,10 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
	}

	kernel_neon_begin();

	__asm__("	ld1	{v0.16b}, %[in]			;"
		"	ld1	{v1.4s}, [%[key]], #16		;"
		"	cmp	%w[rounds], #10			;"
		"	bmi	0f				;"
		"	bne	3f				;"
		"	mov	v3.16b, v1.16b			;"
		"	b	2f				;"
		"0:	mov	v2.16b, v1.16b			;"
		"	ld1	{v3.4s}, [%[key]], #16		;"
		"1:	aesd	v0.16b, v2.16b			;"
		"	aesimc	v0.16b, v0.16b			;"
		"2:	ld1	{v1.4s}, [%[key]], #16		;"
		"	aesd	v0.16b, v3.16b			;"
		"	aesimc	v0.16b, v0.16b			;"
		"3:	ld1	{v2.4s}, [%[key]], #16		;"
		"	subs	%w[rounds], %w[rounds], #3	;"
		"	aesd	v0.16b, v1.16b			;"
		"	aesimc	v0.16b, v0.16b			;"
		"	ld1	{v3.4s}, [%[key]], #16		;"
		"	bpl	1b				;"
		"	aesd	v0.16b, v2.16b			;"
		"	eor	v0.16b, v0.16b, v3.16b		;"
		"	st1	{v0.16b}, %[out]		;"

	:	[out]		"=Q"(*out),
		[key]		"=r"(dummy0),
		[rounds]	"=r"(dummy1)
	:	[in]		"Q"(*in),
				"1"(ctx->key_dec),
				"2"(num_rounds(ctx) - 2)
	:	"cc");

	__aes_ce_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
	kernel_neon_end();
}

/*
 * aes_sub() - use the aese instruction to perform the AES sbox substitution
 *             on each byte in 'input'
 */
static u32 aes_sub(u32 input)
{
	u32 ret;

	__asm__("dup	v1.4s, %w[in]		;"
		"movi	v0.16b, #0		;"
		"aese	v0.16b, v1.16b		;"
		"umov	%w[out], v0.4s[0]	;"

	:	[out]	"=r"(ret)
	:	[in]	"r"(input)
	:		"v0","v1");

	return ret;
}

int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
		     unsigned int key_len)
{
@@ -189,7 +104,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
		u32 *rki = ctx->key_enc + (i * kwords);
		u32 *rko = rki + kwords;

		rko[0] = ror32(aes_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
		rko[0] = ror32(__aes_ce_sub(rki[kwords - 1]), 8) ^ rcon[i] ^ rki[0];
		rko[1] = rko[0] ^ rki[1];
		rko[2] = rko[1] ^ rki[2];
		rko[3] = rko[2] ^ rki[3];
@@ -202,7 +117,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
		} else if (key_len == AES_KEYSIZE_256) {
			if (i >= 6)
				break;
			rko[4] = aes_sub(rko[3]) ^ rki[4];
			rko[4] = __aes_ce_sub(rko[3]) ^ rki[4];
			rko[5] = rko[4] ^ rki[5];
			rko[6] = rko[5] ^ rki[6];
			rko[7] = rko[6] ^ rki[7];
@@ -221,13 +136,7 @@ int ce_aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,

	key_dec[0] = key_enc[j];
	for (i = 1, j--; j > 0; i++, j--)
		__asm__("ld1	{v0.4s}, %[in]		;"
			"aesimc	v1.16b, v0.16b		;"
			"st1	{v1.4s}, %[out]	;"

		:	[out]	"=Q"(key_dec[i])
		:	[in]	"Q"(key_enc[j])
		:		"v0","v1");
		__aes_ce_invert(key_dec + i, key_enc + j);
	key_dec[i] = key_enc[0];

	kernel_neon_end();