/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
.text
#define IV_OFFSET 256
/*
* Warning: the length values used in this module are "unsigned int"
* in C, which is 32-bit. When they're passed in registers, use only
* the low 32 bits, because the top half is unspecified.
*
* This is called from C code, so the contents of those bits can
* depend on the C compiler's optimization decisions. This means that
* mistakes might not be obvious in testing if those bits happen to be
* zero in your build.
*
* Exception: 32-bit lea instructions use a 64-bit address because the
* address size doesn't affect the result, and that form is more
* compactly encoded and preferred by compilers over a 32-bit address.
*/
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_encrypt_init_128,@function
.globl intel_aes_encrypt_init_128
.
align 16
intel_aes_encrypt_init_128:
movups (%rdi), %xmm1
movups %xmm1, (%rsi)
leaq 16(%rsi), %rsi
xorl %eax, %eax
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01
/* aeskeygenassist $0x01, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02
/* aeskeygenassist $0x02, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04
/* aeskeygenassist $0x04, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08
/* aeskeygenassist $0x08, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10
/* aeskeygenassist $0x10, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20
/* aeskeygenassist $0x20, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40
/* aeskeygenassist $0x40, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80
/* aeskeygenassist $0x80, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b
/* aeskeygenassist $0x1b, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36
/* aeskeygenassist $0x36, %xmm1, %xmm2 */
call key_expansion128
ret
.
size intel_aes_encrypt_init_128, .-intel_aes_encrypt_init_128
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_decrypt_init_128,@function
.globl intel_aes_decrypt_init_128
.
align 16
intel_aes_decrypt_init_128:
movups (%rdi), %xmm1
movups %xmm1, (%rsi)
leaq 16(%rsi), %rsi
xorl %eax, %eax
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01
/* aeskeygenassist $0x01, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02
/* aeskeygenassist $0x02, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04
/* aeskeygenassist $0x04, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08
/* aeskeygenassist $0x08, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10
/* aeskeygenassist $0x10, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20
/* aeskeygenassist $0x20, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40
/* aeskeygenassist $0x40, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80
/* aeskeygenassist $0x80, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b
/* aeskeygenassist $0x1b, %xmm1, %xmm2 */
call key_expansion128
.byte 0x66,0x0f,0x38,0xdb,0xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36
/* aeskeygenassist $0x36, %xmm1, %xmm2 */
call key_expansion128
ret
.
size intel_aes_decrypt_init_128, .-intel_aes_decrypt_init_128
.type key_expansion128,@function
.
align 16
key_expansion128:
movd %eax, %xmm3
pshufd $0xff, %xmm2, %xmm2
shufps $0x10, %xmm1, %xmm3
pxor %xmm3, %xmm1
shufps $0x8c, %xmm1, %xmm3
pxor %xmm2, %xmm1
pxor %xmm3, %xmm1
movdqu %xmm1, (%rsi)
addq $16, %rsi
ret
.
size key_expansion128, .-key_expansion128
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_ecb_128,@function
.globl intel_aes_encrypt_ecb_128
.
align 16
intel_aes_encrypt_ecb_128:
movdqu (%rdi), %xmm2
movdqu 160(%rdi), %xmm12
xor %eax, %eax
// cmpl $8*16, %r9d
cmpl $128, %r9d
jb 1f
// leal -8*16(%r9), %r11d
leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm2, %xmm3
pxor %xmm2, %xmm4
pxor %xmm2, %xmm5
pxor %xmm2, %xmm6
pxor %xmm2, %xmm7
pxor %xmm2, %xmm8
pxor %xmm2, %xmm9
pxor %xmm2, %xmm10
// complete loop unrolling
movdqu 16(%rdi), %xmm1
movdqu 32(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xdc,0xd9
/* aesenc %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xdc,0xe1
/* aesenc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdc,0xe9
/* aesenc %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xdc,0xf1
/* aesenc %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xdc,0xf9
/* aesenc %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1
/* aesenc %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1
/* aesenc %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb
/* aesenc %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3
/* aesenc %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb
/* aesenc %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3
/* aesenc %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb
/* aesenc %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3
/* aesenc %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3
/* aesenc %xmm11, %xmm10 */
movdqu 48(%rdi), %xmm1
movdqu 64(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xdc,0xd9
/* aesenc %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xdc,0xe1
/* aesenc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdc,0xe9
/* aesenc %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xdc,0xf1
/* aesenc %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xdc,0xf9
/* aesenc %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1
/* aesenc %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1
/* aesenc %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb
/* aesenc %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3
/* aesenc %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb
/* aesenc %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3
/* aesenc %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb
/* aesenc %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3
/* aesenc %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3
/* aesenc %xmm11, %xmm10 */
movdqu 80(%rdi), %xmm1
movdqu 96(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xdc,0xd9
/* aesenc %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xdc,0xe1
/* aesenc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdc,0xe9
/* aesenc %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xdc,0xf1
/* aesenc %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xdc,0xf9
/* aesenc %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1
/* aesenc %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1
/* aesenc %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb
/* aesenc %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3
/* aesenc %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb
/* aesenc %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3
/* aesenc %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb
/* aesenc %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3
/* aesenc %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3
/* aesenc %xmm11, %xmm10 */
movdqu 112(%rdi), %xmm1
movdqu 128(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xdc,0xd9
/* aesenc %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xdc,0xe1
/* aesenc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdc,0xe9
/* aesenc %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xdc,0xf1
/* aesenc %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xdc,0xf9
/* aesenc %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1
/* aesenc %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1
/* aesenc %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb
/* aesenc %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3
/* aesenc %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb
/* aesenc %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3
/* aesenc %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb
/* aesenc %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3
/* aesenc %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3
/* aesenc %xmm11, %xmm10 */
movdqu 144(%rdi), %xmm1
.byte 0x66,0x0f,0x38,0xdc,0xd9
/* aesenc %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xdc,0xe1
/* aesenc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdc,0xe9
/* aesenc %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xdc,0xf1
/* aesenc %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xdc,0xf9
/* aesenc %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1
/* aesenc %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1
/* aesenc %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xdc
/* aesenclast %xmm12, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xe4
/* aesenclast %xmm12, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xec
/* aesenclast %xmm12, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xf4
/* aesenclast %xmm12, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xfc
/* aesenclast %xmm12, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xc4
/* aesenclast %xmm12, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xcc
/* aesenclast %xmm12, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xd4
/* aesenclast %xmm12, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
// addl $8*16, %eax
addl $128, %eax
cmpl %r11d, %eax
jbe 2b
1: cmpl %eax, %r9d
je 5f
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
4: movdqu (%r8, %rax), %xmm1
pxor %xmm2, %xmm1
.byte 0x66,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcc
/* aesenc %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcd
/* aesenc %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xce
/* aesenc %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcf
/* aesenc %xmm7, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8
/* aesenc %xmm8, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca
/* aesenc %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcc
/* aesenclast %xmm12, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
addl $16, %eax
cmpl %eax, %r9d
jne 4b
5: xor %eax, %eax
ret
.
size intel_aes_encrypt_ecb_128, .-intel_aes_encrypt_ecb_128
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_ecb_128,@function
.globl intel_aes_decrypt_ecb_128
.
align 16
intel_aes_decrypt_ecb_128:
movdqu (%rdi), %xmm2
movdqu 160(%rdi), %xmm12
xorl %eax, %eax
// cmpl $8*16, %r9d
cmpl $128, %r9d
jb 1f
// leal -8*16(%r9), %r11d
leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm12, %xmm3
pxor %xmm12, %xmm4
pxor %xmm12, %xmm5
pxor %xmm12, %xmm6
pxor %xmm12, %xmm7
pxor %xmm12, %xmm8
pxor %xmm12, %xmm9
pxor %xmm12, %xmm10
// complete loop unrolling
movdqu 144(%rdi), %xmm1
movdqu 128(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 112(%rdi), %xmm1
movdqu 96(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 80(%rdi), %xmm1
movdqu 64(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 48(%rdi), %xmm1
movdqu 32(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 16(%rdi), %xmm1
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x0f,0x38,0xdf,0xda
/* aesdeclast %xmm2, %xmm3 */
.byte 0x66,0x0f,0x38,0xdf,0xe2
/* aesdeclast %xmm2, %xmm4 */
.byte 0x66,0x0f,0x38,0xdf,0xea
/* aesdeclast %xmm2, %xmm5 */
.byte 0x66,0x0f,0x38,0xdf,0xf2
/* aesdeclast %xmm2, %xmm6 */
.byte 0x66,0x0f,0x38,0xdf,0xfa
/* aesdeclast %xmm2, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2
/* aesdeclast %xmm2, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca
/* aesdeclast %xmm2, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2
/* aesdeclast %xmm2, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
// addl $8*16, %eax
addl $128, %eax
cmpl %r11d, %eax
jbe 2b
1: cmpl %eax, %r9d
je 5f
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
4: movdqu (%r8, %rax), %xmm1
pxor %xmm12, %xmm1
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xca
/* aesdec %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9
/* aesdec %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8
/* aesdec %xmm8, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcf
/* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xce
/* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcd
/* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcc
/* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcb
/* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xdf,0xca
/* aesdeclast %xmm2, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
addl $16, %eax
cmpl %eax, %r9d
jne 4b
5: xor %eax, %eax
ret
.
size intel_aes_decrypt_ecb_128, .-intel_aes_decrypt_ecb_128
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_cbc_128,@function
.globl intel_aes_encrypt_cbc_128
.
align 16
intel_aes_encrypt_cbc_128:
testl %r9d, %r9d
je 2f
// leaq IV_OFFSET(%rdi), %rdx
leaq 256(%rdi), %rdx
movdqu (%rdx), %xmm0
movdqu (%rdi), %xmm2
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
movdqu 160(%rdi), %xmm12
xorl %eax, %eax
1: movdqu (%r8, %rax), %xmm1
pxor %xmm0, %xmm1
pxor %xmm2, %xmm1
.byte 0x66,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcc
/* aesenc %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcd
/* aesenc %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xce
/* aesenc %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcf
/* aesenc %xmm7, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8
/* aesenc %xmm8, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca
/* aesenc %xmma, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb
/* aesenc %xmmb, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcc
/* aesenclast %xmm12, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm1, %xmm0
addl $16, %eax
cmpl %eax, %r9d
jne 1b
movdqu %xmm0, (%rdx)
2: xor %eax, %eax
ret
.
size intel_aes_encrypt_cbc_128, .-intel_aes_encrypt_cbc_128
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_cbc_128,@function
.globl intel_aes_decrypt_cbc_128
.
align 16
intel_aes_decrypt_cbc_128:
// leaq IV_OFFSET(%rdi), %rdx
leaq 256(%rdi), %rdx
movdqu (%rdx), %xmm0
/* iv */
movdqu (%rdi), %xmm2
/* first key block */
movdqu 160(%rdi), %xmm12
/* last key block */
xorl %eax, %eax
cmpl $128, %r9d
jb 1f
leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3
/* 1st data block */
movdqu 16(%r8, %rax), %xmm4
/* 2d data block */
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm12, %xmm3
pxor %xmm12, %xmm4
pxor %xmm12, %xmm5
pxor %xmm12, %xmm6
pxor %xmm12, %xmm7
pxor %xmm12, %xmm8
pxor %xmm12, %xmm9
pxor %xmm12, %xmm10
// complete loop unrolling
movdqu 144(%rdi), %xmm1
movdqu 128(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 112(%rdi), %xmm1
movdqu 96(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 80(%rdi), %xmm1
movdqu 64(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 48(%rdi), %xmm1
movdqu 32(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 16(%rdi), %xmm1
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x0f,0x38,0xdf,0xda
/* aesdeclast %xmm2, %xmm3 */
.byte 0x66,0x0f,0x38,0xdf,0xe2
/* aesdeclast %xmm2, %xmm4 */
.byte 0x66,0x0f,0x38,0xdf,0xea
/* aesdeclast %xmm2, %xmm5 */
.byte 0x66,0x0f,0x38,0xdf,0xf2
/* aesdeclast %xmm2, %xmm6 */
.byte 0x66,0x0f,0x38,0xdf,0xfa
/* aesdeclast %xmm2, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2
/* aesdeclast %xmm2, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca
/* aesdeclast %xmm2, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2
/* aesdeclast %xmm2, %xmm10 */
pxor %xmm0, %xmm3
movdqu (%r8, %rax), %xmm0
pxor %xmm0, %xmm4
movdqu 16(%r8, %rax), %xmm0
pxor %xmm0, %xmm5
movdqu 32(%r8, %rax), %xmm0
pxor %xmm0, %xmm6
movdqu 48(%r8, %rax), %xmm0
pxor %xmm0, %xmm7
movdqu 64(%r8, %rax), %xmm0
pxor %xmm0, %xmm8
movdqu 80(%r8, %rax), %xmm0
pxor %xmm0, %xmm9
movdqu 96(%r8, %rax), %xmm0
pxor %xmm0, %xmm10
movdqu 112(%r8, %rax), %xmm0
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
addl $128, %eax
cmpl %r11d, %eax
jbe 2b
1: cmpl %eax, %r9d
je 5f
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
4: movdqu (%r8, %rax), %xmm1
movdqa %xmm1, %xmm13
pxor %xmm12, %xmm1
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xca
/* aesdec %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9
/* aesdec %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8
/* aesdec %xmm8, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcf
/* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xce
/* aesdec %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcd
/* aesdec %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcc
/* aesdec %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcb
/* aesdec %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdf,0xca
/* aesdeclast %xmm2, %xmm1 */
pxor %xmm0, %xmm1
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm13, %xmm0
addl $16, %eax
cmpl %eax, %r9d
jne 4b
5: movdqu %xmm0, (%rdx)
xor %eax, %eax
ret
.
size intel_aes_decrypt_cbc_128, .-intel_aes_decrypt_cbc_128
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_encrypt_init_192,@function
.globl intel_aes_encrypt_init_192
.
align 16
intel_aes_encrypt_init_192:
movdqu (%rdi), %xmm1
movq 16(%rdi), %xmm3
movdqu %xmm1, (%rsi)
movq %xmm3, 16(%rsi)
leaq 24(%rsi), %rsi
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01
/* aeskeygenassist $0x01, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02
/* aeskeygenassist $0x02, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04
/* aeskeygenassist $0x04, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08
/* aeskeygenassist $0x08, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10
/* aeskeygenassist $0x10, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20
/* aeskeygenassist $0x20, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40
/* aeskeygenassist $0x40, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80
/* aeskeygenassist $0x80, %xmm3, %xmm2 */
call key_expansion192
ret
.
size intel_aes_encrypt_init_192, .-intel_aes_encrypt_init_192
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_decrypt_init_192,@function
.globl intel_aes_decrypt_init_192
.
align 16
intel_aes_decrypt_init_192:
movdqu (%rdi), %xmm1
movq 16(%rdi), %xmm3
movdqu %xmm1, (%rsi)
movq %xmm3, 16(%rsi)
leaq 24(%rsi), %rsi
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01
/* aeskeygenassist $0x01, %xmm3, %xmm2 */
call key_expansion192
movups -32(%rsi), %xmm2
movups -16(%rsi), %xmm4
.byte 0x66,0x0f,0x38,0xdb,0xd2
/* aesimc %xmm2, %xmm2 */
.byte 0x66,0x0f,0x38,0xdb,0xe4
/* aesimc %xmm4, %xmm4 */
movups %xmm2, -32(%rsi)
movups %xmm4, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02
/* aeskeygenassist $0x02, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x38,0xdb,0xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -24(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04
/* aeskeygenassist $0x04, %xmm3, %xmm2 */
call key_expansion192
movups -32(%rsi), %xmm2
movups -16(%rsi), %xmm4
.byte 0x66,0x0f,0x38,0xdb,0xd2
/* aesimc %xmm2, %xmm2 */
.byte 0x66,0x0f,0x38,0xdb,0xe4
/* aesimc %xmm4, %xmm4 */
movups %xmm2, -32(%rsi)
movups %xmm4, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08
/* aeskeygenassist $0x08, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x38,0xdb,0xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -24(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10
/* aeskeygenassist $0x10, %xmm3, %xmm2 */
call key_expansion192
movups -32(%rsi), %xmm2
movups -16(%rsi), %xmm4
.byte 0x66,0x0f,0x38,0xdb,0xd2
/* aesimc %xmm2, %xmm2 */
.byte 0x66,0x0f,0x38,0xdb,0xe4
/* aesimc %xmm4, %xmm4 */
movups %xmm2, -32(%rsi)
movups %xmm4, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20
/* aeskeygenassist $0x20, %xmm3, %xmm2 */
call key_expansion192
.byte 0x66,0x0f,0x38,0xdb,0xd1
/* aesimc %xmm1, %xmm2 */
movups %xmm2, -24(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40
/* aeskeygenassist $0x40, %xmm3, %xmm2 */
call key_expansion192
movups -32(%rsi), %xmm2
movups -16(%rsi), %xmm4
.byte 0x66,0x0f,0x38,0xdb,0xd2
/* aesimc %xmm2, %xmm2 */
.byte 0x66,0x0f,0x38,0xdb,0xe4
/* aesimc %xmm4, %xmm4 */
movups %xmm2, -32(%rsi)
movups %xmm4, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80
/* aeskeygenassist $0x80, %xmm3, %xmm2 */
call key_expansion192
ret
.
size intel_aes_decrypt_init_192, .-intel_aes_decrypt_init_192
.type key_expansion192,@function
.
align 16
key_expansion192:
pshufd $0x55, %xmm2, %xmm2
xor %eax, %eax
movd %eax, %xmm4
shufps $0x10, %xmm1, %xmm4
pxor %xmm4, %xmm1
shufps $0x8c, %xmm1, %xmm4
pxor %xmm2, %xmm1
pxor %xmm4, %xmm1
movdqu %xmm1, (%rsi)
addq $16, %rsi
pshufd $0xff, %xmm1, %xmm4
movd %eax, %xmm5
shufps $0x00, %xmm3, %xmm5
shufps $0x08, %xmm3, %xmm5
pxor %xmm4, %xmm3
pxor %xmm5, %xmm3
movq %xmm3, (%rsi)
addq $8, %rsi
ret
.
size key_expansion192, .-key_expansion192
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_ecb_192,@function
.globl intel_aes_encrypt_ecb_192
.
align 16
intel_aes_encrypt_ecb_192:
movdqu (%rdi), %xmm2
movdqu 192(%rdi), %xmm14
xorl %eax, %eax
// cmpl $8*16, %r9d
cmpl $128, %r9d
jb 1f
// leal -8*16(%r9), %r11d
leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm2, %xmm3
pxor %xmm2, %xmm4
pxor %xmm2, %xmm5
pxor %xmm2, %xmm6
pxor %xmm2, %xmm7
pxor %xmm2, %xmm8
pxor %xmm2, %xmm9
pxor %xmm2, %xmm10
// complete loop unrolling
movdqu 16(%rdi), %xmm1
movdqu 32(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xdc,0xd9
/* aesenc %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xdc,0xe1
/* aesenc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdc,0xe9
/* aesenc %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xdc,0xf1
/* aesenc %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xdc,0xf9
/* aesenc %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1
/* aesenc %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1
/* aesenc %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb
/* aesenc %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3
/* aesenc %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb
/* aesenc %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3
/* aesenc %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb
/* aesenc %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3
/* aesenc %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3
/* aesenc %xmm11, %xmm10 */
movdqu 48(%rdi), %xmm1
movdqu 64(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xdc,0xd9
/* aesenc %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xdc,0xe1
/* aesenc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdc,0xe9
/* aesenc %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xdc,0xf1
/* aesenc %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xdc,0xf9
/* aesenc %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1
/* aesenc %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1
/* aesenc %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb
/* aesenc %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3
/* aesenc %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb
/* aesenc %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3
/* aesenc %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb
/* aesenc %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3
/* aesenc %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3
/* aesenc %xmm11, %xmm10 */
movdqu 80(%rdi), %xmm1
movdqu 96(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xdc,0xd9
/* aesenc %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xdc,0xe1
/* aesenc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdc,0xe9
/* aesenc %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xdc,0xf1
/* aesenc %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xdc,0xf9
/* aesenc %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1
/* aesenc %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1
/* aesenc %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb
/* aesenc %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3
/* aesenc %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb
/* aesenc %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3
/* aesenc %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb
/* aesenc %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3
/* aesenc %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3
/* aesenc %xmm11, %xmm10 */
movdqu 112(%rdi), %xmm1
movdqu 128(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xdc,0xd9
/* aesenc %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xdc,0xe1
/* aesenc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdc,0xe9
/* aesenc %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xdc,0xf1
/* aesenc %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xdc,0xf9
/* aesenc %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1
/* aesenc %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1
/* aesenc %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb
/* aesenc %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3
/* aesenc %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb
/* aesenc %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3
/* aesenc %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb
/* aesenc %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3
/* aesenc %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3
/* aesenc %xmm11, %xmm10 */
movdqu 144(%rdi), %xmm1
movdqu 160(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xdc,0xd9
/* aesenc %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xdc,0xe1
/* aesenc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdc,0xe9
/* aesenc %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xdc,0xf1
/* aesenc %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xdc,0xf9
/* aesenc %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1
/* aesenc %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1
/* aesenc %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xdb
/* aesenc %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xe3
/* aesenc %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xeb
/* aesenc %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xf3
/* aesenc %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xfb
/* aesenc %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xc3
/* aesenc %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xdc,0xd3
/* aesenc %xmm11, %xmm10 */
movdqu 176(%rdi), %xmm1
.byte 0x66,0x0f,0x38,0xdc,0xd9
/* aesenc %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xdc,0xe1
/* aesenc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdc,0xe9
/* aesenc %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xdc,0xf1
/* aesenc %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xdc,0xf9
/* aesenc %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc1
/* aesenc %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdc,0xd1
/* aesenc %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xde
/* aesenclast %xmm14, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xe6
/* aesenclast %xmm14, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xee
/* aesenclast %xmm14, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xf6
/* aesenclast %xmm14, %xmm7 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xfe
/* aesenclast %xmm14, %xmm3 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xc6
/* aesenclast %xmm14, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xce
/* aesenclast %xmm14, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xdd,0xd6
/* aesenclast %xmm14, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
// addl $8*16, %eax
addl $128, %eax
cmpl %r11d, %eax
jbe 2b
1: cmpl %eax, %r9d
je 5f
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
movdqu 160(%rdi), %xmm12
movdqu 176(%rdi), %xmm13
4: movdqu (%r8, %rax), %xmm1
pxor %xmm2, %xmm1
.byte 0x66,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcc
/* aesenc %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcd
/* aesenc %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xce
/* aesenc %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcf
/* aesenc %xmm7, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8
/* aesenc %xmm8, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca
/* aesenc %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc
/* aesenc %xmm12, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd
/* aesenc %xmm13, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xce
/* aesenclast %xmm14, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
addl $16, %eax
cmpl %eax, %r9d
jne 4b
5: xor %eax, %eax
ret
.
size intel_aes_encrypt_ecb_192, .-intel_aes_encrypt_ecb_192
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_ecb_192,@function
.globl intel_aes_decrypt_ecb_192
.
align 16
intel_aes_decrypt_ecb_192:
movdqu (%rdi), %xmm2
movdqu 192(%rdi), %xmm14
xorl %eax, %eax
// cmpl $8*16, %r9d
cmpl $128, %r9d
jb 1f
// leal -8*16(%r9), %r11d
leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm14, %xmm3
pxor %xmm14, %xmm4
pxor %xmm14, %xmm5
pxor %xmm14, %xmm6
pxor %xmm14, %xmm7
pxor %xmm14, %xmm8
pxor %xmm14, %xmm9
pxor %xmm14, %xmm10
// complete loop unrolling
movdqu 176(%rdi), %xmm1
movdqu 160(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 144(%rdi), %xmm1
movdqu 128(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 112(%rdi), %xmm1
movdqu 96(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 80(%rdi), %xmm1
movdqu 64(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 48(%rdi), %xmm1
movdqu 32(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 16(%rdi), %xmm1
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x0f,0x38,0xdf,0xda
/* aesdeclast %xmm2, %xmm3 */
.byte 0x66,0x0f,0x38,0xdf,0xe2
/* aesdeclast %xmm2, %xmm4 */
.byte 0x66,0x0f,0x38,0xdf,0xea
/* aesdeclast %xmm2, %xmm5 */
.byte 0x66,0x0f,0x38,0xdf,0xf2
/* aesdeclast %xmm2, %xmm6 */
.byte 0x66,0x0f,0x38,0xdf,0xfa
/* aesdeclast %xmm2, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2
/* aesdeclast %xmm2, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca
/* aesdeclast %xmm2, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2
/* aesdeclast %xmm2, %xmm10 */
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
// addl $8*16, %eax
addl $128, %eax
cmpl %r11d, %eax
jbe 2b
1: cmpl %eax, %r9d
je 5f
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
movdqu 160(%rdi), %xmm12
movdqu 176(%rdi), %xmm13
4: movdqu (%r8, %rax), %xmm1
pxor %xmm14, %xmm1
.byte 0x66,0x41,0x0f,0x38,0xde,0xcd
/* aesdec %xmm13, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xcc
/* aesdec %xmm12, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xca
/* aesdec %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9
/* aesdec %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8
/* aesdec %xmm8, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcf
/* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xce
/* aesdec %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcd
/* aesdec %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcc
/* aesdec %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcb
/* aesdec %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdf,0xca
/* aesdeclast %xmm2, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
addl $16, %eax
cmpl %eax, %r9d
jne 4b
5: xor %eax, %eax
ret
.
size intel_aes_decrypt_ecb_192, .-intel_aes_decrypt_ecb_192
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %ecx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_cbc_192,@function
.globl intel_aes_encrypt_cbc_192
.
align 16
intel_aes_encrypt_cbc_192:
testl %r9d, %r9d
je 2f
// leaq IV_OFFSET(%rdi), %rdx
leaq 256(%rdi), %rdx
movdqu (%rdx), %xmm0
movdqu (%rdi), %xmm2
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
movdqu 160(%rdi), %xmm12
movdqu 176(%rdi), %xmm13
movdqu 192(%rdi), %xmm14
xorl %eax, %eax
1: movdqu (%r8, %rax), %xmm1
pxor %xmm0, %xmm1
pxor %xmm2, %xmm1
.byte 0x66,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcc
/* aesenc %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcd
/* aesenc %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xce
/* aesenc %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xdc,0xcf
/* aesenc %xmm7, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc8
/* aesenc %xmm8, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xc9
/* aesenc %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xca
/* aesenc %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb
/* aesenc %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcc
/* aesenc %xmm12, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd
/* aesenc %xmm13, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xce
/* aesenclast %xmm14, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm1, %xmm0
addl $16, %eax
cmpl %eax, %r9d
jne 1b
movdqu %xmm0, (%rdx)
2: xor %eax, %eax
ret
.
size intel_aes_encrypt_cbc_192, .-intel_aes_encrypt_cbc_192
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
(already filled in by caller)
in %exx : maxOutputLen - length of output buffer
(already checked by caller)
in %r8 : input - pointer to input buffer
in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_cbc_192,@function
.globl intel_aes_decrypt_cbc_192
.
align 16
intel_aes_decrypt_cbc_192:
// leaq IV_OFFSET(%rdi), %rdx
leaq 256(%rdi), %rdx
movdqu (%rdx), %xmm0
movdqu (%rdi), %xmm2
movdqu 192(%rdi), %xmm14
xorl %eax, %eax
cmpl $128, %r9d
jb 1f
leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
movdqu 48(%r8, %rax), %xmm6
movdqu 64(%r8, %rax), %xmm7
movdqu 80(%r8, %rax), %xmm8
movdqu 96(%r8, %rax), %xmm9
movdqu 112(%r8, %rax), %xmm10
pxor %xmm14, %xmm3
pxor %xmm14, %xmm4
pxor %xmm14, %xmm5
pxor %xmm14, %xmm6
pxor %xmm14, %xmm7
pxor %xmm14, %xmm8
pxor %xmm14, %xmm9
pxor %xmm14, %xmm10
// complete loop unrolling
movdqu 176(%rdi), %xmm1
movdqu 160(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 144(%rdi), %xmm1
movdqu 128(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 112(%rdi), %xmm1
movdqu 96(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 80(%rdi), %xmm1
movdqu 64(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 48(%rdi), %xmm1
movdqu 32(%rdi), %xmm11
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xdb
/* aesdec %xmm11, %xmm3 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xe3
/* aesdec %xmm11, %xmm4 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xeb
/* aesdec %xmm11, %xmm5 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xf3
/* aesdec %xmm11, %xmm6 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xfb
/* aesdec %xmm11, %xmm7 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xc3
/* aesdec %xmm11, %xmm8 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm9 */
.byte 0x66,0x45,0x0f,0x38,0xde,0xd3
/* aesdec %xmm11, %xmm10 */
movdqu 16(%rdi), %xmm1
.byte 0x66,0x0f,0x38,0xde,0xd9
/* aesdec %xmm1, %xmm3 */
.byte 0x66,0x0f,0x38,0xde,0xe1
/* aesdec %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xde,0xe9
/* aesdec %xmm1, %xmm5 */
.byte 0x66,0x0f,0x38,0xde,0xf1
/* aesdec %xmm1, %xmm6 */
.byte 0x66,0x0f,0x38,0xde,0xf9
/* aesdec %xmm1, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc1
/* aesdec %xmm1, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xc9
/* aesdec %xmm1, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xde,0xd1
/* aesdec %xmm1, %xmm10 */
.byte 0x66,0x0f,0x38,0xdf,0xda
/* aesdeclast %xmm2, %xmm3 */
.byte 0x66,0x0f,0x38,0xdf,0xe2
/* aesdeclast %xmm2, %xmm4 */
.byte 0x66,0x0f,0x38,0xdf,0xea
/* aesdeclast %xmm2, %xmm5 */
.byte 0x66,0x0f,0x38,0xdf,0xf2
/* aesdeclast %xmm2, %xmm6 */
.byte 0x66,0x0f,0x38,0xdf,0xfa
/* aesdeclast %xmm2, %xmm7 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xc2
/* aesdeclast %xmm2, %xmm8 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xca
/* aesdeclast %xmm2, %xmm9 */
.byte 0x66,0x44,0x0f,0x38,0xdf,0xd2
/* aesdeclast %xmm2, %xmm10 */
pxor %xmm0, %xmm3
movdqu (%r8, %rax), %xmm0
pxor %xmm0, %xmm4
movdqu 16(%r8, %rax), %xmm0
pxor %xmm0, %xmm5
movdqu 32(%r8, %rax), %xmm0
pxor %xmm0, %xmm6
movdqu 48(%r8, %rax), %xmm0
pxor %xmm0, %xmm7
movdqu 64(%r8, %rax), %xmm0
pxor %xmm0, %xmm8
movdqu 80(%r8, %rax), %xmm0
pxor %xmm0, %xmm9
movdqu 96(%r8, %rax), %xmm0
pxor %xmm0, %xmm10
movdqu 112(%r8, %rax), %xmm0
movdqu %xmm3, (%rsi, %rax)
movdqu %xmm4, 16(%rsi, %rax)
movdqu %xmm5, 32(%rsi, %rax)
movdqu %xmm6, 48(%rsi, %rax)
movdqu %xmm7, 64(%rsi, %rax)
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
addl $128, %eax
cmpl %r11d, %eax
jbe 2b
1: cmpl %eax, %r9d
je 5f
movdqu 16(%rdi), %xmm3
movdqu 32(%rdi), %xmm4
movdqu 48(%rdi), %xmm5
movdqu 64(%rdi), %xmm6
movdqu 80(%rdi), %xmm7
movdqu 96(%rdi), %xmm8
movdqu 112(%rdi), %xmm9
movdqu 128(%rdi), %xmm10
movdqu 144(%rdi), %xmm11
movdqu 160(%rdi), %xmm12
movdqu 176(%rdi), %xmm13
4: movdqu (%r8, %rax), %xmm1
movdqa %xmm1, %xmm15
pxor %xmm14, %xmm1
.byte 0x66,0x41,0x0f,0x38,0xde,0xcd
/* aesdec %xmm13, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xcc
/* aesdec %xmm12, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xcb
/* aesdec %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xca
/* aesdec %xmm10, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc9
/* aesdec %xmm9, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xde,0xc8
/* aesdec %xmm8, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcf
/* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xce
/* aesdec %xmm6, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcd
/* aesdec %xmm5, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcc
/* aesdec %xmm4, %xmm1 */
.byte 0x66,0x0f,0x38,0xde,0xcb
/* aesdec %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdf,0xca
/* aesdeclast %xmm2, %xmm1 */
pxor %xmm0, %xmm1
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm15, %xmm0
addl $16, %eax
cmpl %eax, %r9d
jne 4b
5: movdqu %xmm0, (%rdx)
xor %eax, %eax
ret
.
size intel_aes_decrypt_cbc_192, .-intel_aes_decrypt_cbc_192
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_encrypt_init_256,@function
.globl intel_aes_encrypt_init_256
.
align 16
intel_aes_encrypt_init_256:
movdqu (%rdi), %xmm1
movdqu 16(%rdi), %xmm3
movdqu %xmm1, (%rsi)
movdqu %xmm3, 16(%rsi)
leaq 32(%rsi), %rsi
xor %eax, %eax
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01
/* aeskeygenassist $0x01, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02
/* aeskeygenassist $0x02, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04
/* aeskeygenassist $0x04, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08
/* aeskeygenassist $0x08, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10
/* aeskeygenassist $0x10, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20
/* aeskeygenassist $0x20, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40
/* aeskeygenassist $0x40, %xmm3, %xmm2 */
pxor %xmm6, %xmm6
pshufd $0xff, %xmm2, %xmm2
shufps $0x10, %xmm1, %xmm6
pxor %xmm6, %xmm1
shufps $0x8c, %xmm1, %xmm6
pxor %xmm2, %xmm1
pxor %xmm6, %xmm1
movdqu %xmm1, (%rsi)
ret
.
size intel_aes_encrypt_init_256, .-intel_aes_encrypt_init_256
/* in %rdi : the key
in %rsi : buffer for expanded key
*/
.type intel_aes_decrypt_init_256,@function
.globl intel_aes_decrypt_init_256
.
align 16
intel_aes_decrypt_init_256:
movdqu (%rdi), %xmm1
movdqu 16(%rdi), %xmm3
movdqu %xmm1, (%rsi)
.byte 0x66,0x0f,0x38,0xdb,0xe3
/* aesimc %xmm3, %xmm4 */
movdqu %xmm4, 16(%rsi)
leaq 32(%rsi), %rsi
xor %eax, %eax
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01
/* aeskeygenassist $0x01, %xmm3, %xmm2 */
call key_expansion256
.byte 0x66,0x0f,0x38,0xdb,0xe1
/* aesimc %xmm1, %xmm4 */
.byte 0x66,0x0f,0x38,0xdb,0xeb
/* aesimc %xmm3, %xmm5 */
movdqu %xmm4, -32(%rsi)
movdqu %xmm5, -16(%rsi)
.byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02
/* aeskeygenassist $0x02, %xmm3, %xmm2 */
--> --------------------
--> maximum size reached
--> --------------------