Commit ec8c02d9 authored by Dave Watson's avatar Dave Watson Committed by Herbert Xu
Browse files

crypto: aesni - Introduce READ_PARTIAL_BLOCK macro



Introduce READ_PARTIAL_BLOCK macro, and use it in the two existing
partial block cases: AAD and the end of ENC_DEC.   In particular,
the ENC_DEC case should be faster, since we read by 8/4 bytes if
possible.

This macro will also be used to read partial blocks between
enc_update and dec_update calls.

Signed-off-by: default avatarDave Watson <davejwatson@fb.com>
Signed-off-by: default avatarHerbert Xu <herbert@gondor.apana.org.au>
parent 517a448e
Loading
Loading
Loading
Loading
+59 −43
Original line number Diff line number Diff line
@@ -415,15 +415,13 @@ _zero_cipher_left\@:
        vmovdqu %xmm14, AadHash(arg2)
        vmovdqu %xmm9, CurCount(arg2)

        cmp     $16, arg5
        jl      _only_less_than_16\@

        # check for 0 length
        mov     arg5, %r13
        and     $15, %r13                            # r13 = (arg5 mod 16)

        je      _multiple_of_16_bytes\@

        # handle the last <16 Byte block seperately
        # handle the last <16 Byte block separately

        mov %r13, PBlockLen(arg2)

@@ -434,49 +432,39 @@ _zero_cipher_left\@:
        ENCRYPT_SINGLE_BLOCK    \REP, %xmm9                # E(K, Yn)
        vmovdqu %xmm9, PBlockEncKey(arg2)

        sub     $16, %r11
        add     %r13, %r11
        vmovdqu (arg4, %r11), %xmm1                  # receive the last <16 Byte block
        cmp $16, arg5
        jge _large_enough_update\@

        lea (arg4,%r11,1), %r10
        mov %r13, %r12

        READ_PARTIAL_BLOCK %r10 %r12 %xmm1

        lea     SHIFT_MASK+16(%rip), %r12
        sub     %r13, %r12                           # adjust the shuffle mask pointer to be
						     # able to shift 16-r13 bytes (r13 is the
	# number of bytes in plaintext mod 16)
        vmovdqu (%r12), %xmm2                        # get the appropriate shuffle mask
        vpshufb %xmm2, %xmm1, %xmm1                  # shift right 16-r13 bytes
        jmp     _final_ghash_mul\@

_only_less_than_16\@:
        # check for 0 length
        mov     arg5, %r13
        and     $15, %r13                            # r13 = (arg5 mod 16)

        je      _multiple_of_16_bytes\@

        # handle the last <16 Byte block separately
        jmp _final_ghash_mul\@

_large_enough_update\@:
        sub $16, %r11
        add %r13, %r11

        vpaddd  ONE(%rip), %xmm9, %xmm9              # INCR CNT to get Yn
        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9
        ENCRYPT_SINGLE_BLOCK    \REP, %xmm9                # E(K, Yn)
        # receive the last <16 Byte block
        vmovdqu	(arg4, %r11, 1), %xmm1

        vmovdqu %xmm9, PBlockEncKey(arg2)
        sub	%r13, %r11
        add	$16, %r11

        lea	SHIFT_MASK+16(%rip), %r12
        sub     %r13, %r12                           # adjust the shuffle mask pointer to be
						     # able to shift 16-r13 bytes (r13 is the
						     # number of bytes in plaintext mod 16)

_get_last_16_byte_loop\@:
        movb    (arg4, %r11),  %al
        movb    %al,  TMP1 (%rsp , %r11)
        add     $1, %r11
        cmp     %r13,  %r11
        jne     _get_last_16_byte_loop\@

        vmovdqu  TMP1(%rsp), %xmm1

        sub     $16, %r11
        # adjust the shuffle mask pointer to be able to shift 16-r13 bytes
        # (r13 is the number of bytes in plaintext mod 16)
        sub	%r13, %r12
        # get the appropriate shuffle mask
        vmovdqu	(%r12), %xmm2
        # shift right 16-r13 bytes
        vpshufb  %xmm2, %xmm1, %xmm1

_final_ghash_mul\@:
        .if  \ENC_DEC ==  DEC
@@ -490,8 +478,6 @@ _final_ghash_mul\@:
        vpxor   %xmm2, %xmm14, %xmm14

        vmovdqu %xmm14, AadHash(arg2)
        sub     %r13, %r11
        add     $16, %r11
        .else
        vpxor   %xmm1, %xmm9, %xmm9                  # Plaintext XOR E(K, Yn)
        vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1        # get the appropriate mask to
@@ -501,8 +487,6 @@ _final_ghash_mul\@:
        vpxor   %xmm9, %xmm14, %xmm14

        vmovdqu %xmm14, AadHash(arg2)
        sub     %r13, %r11
        add     $16, %r11
        vpshufb SHUF_MASK(%rip), %xmm9, %xmm9        # shuffle xmm9 back to output as ciphertext
        .endif

@@ -721,6 +705,38 @@ _get_AAD_done\@:
        \PRECOMPUTE  %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5
.endm


# Reads DLEN bytes starting at DPTR and stores in XMMDst
# where 0 < DLEN < 16
# Clobbers %rax, DLEN
.macro READ_PARTIAL_BLOCK DPTR DLEN XMMDst
        vpxor \XMMDst, \XMMDst, \XMMDst

        cmp $8, \DLEN
        jl _read_lt8_\@
        mov (\DPTR), %rax
        vpinsrq $0, %rax, \XMMDst, \XMMDst
        sub $8, \DLEN
        jz _done_read_partial_block_\@
        xor %eax, %eax
_read_next_byte_\@:
        shl $8, %rax
        mov 7(\DPTR, \DLEN, 1), %al
        dec \DLEN
        jnz _read_next_byte_\@
        vpinsrq $1, %rax, \XMMDst, \XMMDst
        jmp _done_read_partial_block_\@
_read_lt8_\@:
        xor %eax, %eax
_read_next_byte_lt8_\@:
        shl $8, %rax
        mov -1(\DPTR, \DLEN, 1), %al
        dec \DLEN
        jnz _read_next_byte_lt8_\@
        vpinsrq $0, %rax, \XMMDst, \XMMDst
_done_read_partial_block_\@:
.endm

#ifdef CONFIG_AS_AVX
###############################################################################
# GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)