Commit e044d505 authored by Dave Watson's avatar Dave Watson Committed by Herbert Xu
Browse files

crypto: aesni - Introduce partial block macro



Before this diff, multiple calls to GCM_ENC_DEC will
succeed, but only if all calls are a multiple of 16 bytes.

Handle partial blocks at the start of GCM_ENC_DEC, and update
aadhash as appropriate.

The data offset %r11 is also updated after the partial block.

Signed-off-by: default avatarDave Watson <davejwatson@fb.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent ec8c02d9
Loading
Loading
Loading
Loading
+150 −6
Original line number Diff line number Diff line
@@ -301,6 +301,12 @@ VARIABLE_OFFSET = 16*8
        vmovdqu  HashKey(arg2), %xmm13      # xmm13 = HashKey
        add arg5, InLen(arg2)

        # initialize the data pointer offset as zero
        xor     %r11d, %r11d

        PARTIAL_BLOCK \GHASH_MUL, arg3, arg4, arg5, %r11, %xmm8, \ENC_DEC
        sub %r11, arg5

        mov     arg5, %r13                  # save the number of bytes of plaintext/ciphertext
        and     $-16, %r13                  # r13 = r13 - (r13 mod 16)

@@ -737,6 +743,150 @@ _read_next_byte_lt8_\@:
_done_read_partial_block_\@:
.endm

# PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
# between update calls.
# Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
# Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
# Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
.macro PARTIAL_BLOCK GHASH_MUL CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
        AAD_HASH ENC_DEC
        mov 	PBlockLen(arg2), %r13
        cmp	$0, %r13
        je	_partial_block_done_\@	# Leave Macro if no partial blocks
        # Read in input data without over reading
        cmp	$16, \PLAIN_CYPH_LEN
        jl	_fewer_than_16_bytes_\@
        vmovdqu	(\PLAIN_CYPH_IN), %xmm1	# If more than 16 bytes, just fill xmm
        jmp	_data_read_\@

_fewer_than_16_bytes_\@:
        lea	(\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
        mov	\PLAIN_CYPH_LEN, %r12
        READ_PARTIAL_BLOCK %r10 %r12 %xmm1

        mov PBlockLen(arg2), %r13

_data_read_\@:				# Finished reading in data

        vmovdqu	PBlockEncKey(arg2), %xmm9
        vmovdqu	HashKey(arg2), %xmm13

        lea	SHIFT_MASK(%rip), %r12

        # adjust the shuffle mask pointer to be able to shift r13 bytes
        # r16-r13 is the number of bytes in plaintext mod 16)
        add	%r13, %r12
        vmovdqu	(%r12), %xmm2		# get the appropriate shuffle mask
        vpshufb %xmm2, %xmm9, %xmm9		# shift right r13 bytes

.if  \ENC_DEC ==  DEC
        vmovdqa	%xmm1, %xmm3
        pxor	%xmm1, %xmm9		# Cyphertext XOR E(K, Yn)

        mov	\PLAIN_CYPH_LEN, %r10
        add	%r13, %r10
        # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
        sub	$16, %r10
        # Determine if if partial block is not being filled and
        # shift mask accordingly
        jge	_no_extra_mask_1_\@
        sub	%r10, %r12
_no_extra_mask_1_\@:

        vmovdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
        # get the appropriate mask to mask out bottom r13 bytes of xmm9
        vpand	%xmm1, %xmm9, %xmm9		# mask out bottom r13 bytes of xmm9

        vpand	%xmm1, %xmm3, %xmm3
        vmovdqa	SHUF_MASK(%rip), %xmm10
        vpshufb	%xmm10, %xmm3, %xmm3
        vpshufb	%xmm2, %xmm3, %xmm3
        vpxor	%xmm3, \AAD_HASH, \AAD_HASH

        cmp	$0, %r10
        jl	_partial_incomplete_1_\@

        # GHASH computation for the last <16 Byte block
        \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
        xor	%eax,%eax

        mov	%rax, PBlockLen(arg2)
        jmp	_dec_done_\@
_partial_incomplete_1_\@:
        add	\PLAIN_CYPH_LEN, PBlockLen(arg2)
_dec_done_\@:
        vmovdqu	\AAD_HASH, AadHash(arg2)
.else
        vpxor	%xmm1, %xmm9, %xmm9			# Plaintext XOR E(K, Yn)

        mov	\PLAIN_CYPH_LEN, %r10
        add	%r13, %r10
        # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
        sub	$16, %r10
        # Determine if if partial block is not being filled and
        # shift mask accordingly
        jge	_no_extra_mask_2_\@
        sub	%r10, %r12
_no_extra_mask_2_\@:

        vmovdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
        # get the appropriate mask to mask out bottom r13 bytes of xmm9
        vpand	%xmm1, %xmm9, %xmm9

        vmovdqa	SHUF_MASK(%rip), %xmm1
        vpshufb %xmm1, %xmm9, %xmm9
        vpshufb %xmm2, %xmm9, %xmm9
        vpxor	%xmm9, \AAD_HASH, \AAD_HASH

        cmp	$0, %r10
        jl	_partial_incomplete_2_\@

        # GHASH computation for the last <16 Byte block
        \GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
        xor	%eax,%eax

        mov	%rax, PBlockLen(arg2)
        jmp	_encode_done_\@
_partial_incomplete_2_\@:
        add	\PLAIN_CYPH_LEN, PBlockLen(arg2)
_encode_done_\@:
        vmovdqu	\AAD_HASH, AadHash(arg2)

        vmovdqa	SHUF_MASK(%rip), %xmm10
        # shuffle xmm9 back to output as ciphertext
        vpshufb	%xmm10, %xmm9, %xmm9
        vpshufb	%xmm2, %xmm9, %xmm9
.endif
        # output encrypted Bytes
        cmp	$0, %r10
        jl	_partial_fill_\@
        mov	%r13, %r12
        mov	$16, %r13
        # Set r13 to be the number of bytes to write out
        sub	%r12, %r13
        jmp	_count_set_\@
_partial_fill_\@:
        mov	\PLAIN_CYPH_LEN, %r13
_count_set_\@:
        vmovdqa	%xmm9, %xmm0
        vmovq	%xmm0, %rax
        cmp	$8, %r13
        jle	_less_than_8_bytes_left_\@

        mov	%rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
        add	$8, \DATA_OFFSET
        psrldq	$8, %xmm0
        vmovq	%xmm0, %rax
        sub	$8, %r13
_less_than_8_bytes_left_\@:
        movb	%al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
        add	$1, \DATA_OFFSET
        shr	$8, %rax
        sub	$1, %r13
        jne	_less_than_8_bytes_left_\@
_partial_block_done_\@:
.endm # PARTIAL_BLOCK

#ifdef CONFIG_AS_AVX
###############################################################################
# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
@@ -856,9 +1006,6 @@ _done_read_partial_block_\@:
	setreg
        vmovdqu AadHash(arg2), reg_i

	# initialize the data pointer offset as zero
	xor     %r11d, %r11d

	# start AES for num_initial_blocks blocks
	vmovdqu CurCount(arg2), \CTR

@@ -1798,9 +1945,6 @@ ENDPROC(aesni_gcm_dec_avx_gen2)
	setreg
	vmovdqu AadHash(arg2), reg_i

	# initialize the data pointer offset as zero
	xor     %r11d, %r11d

	# start AES for num_initial_blocks blocks
	vmovdqu CurCount(arg2), \CTR