/* * Keep 64 bits of the IV in a register. For CTR mode this lets us * easily increment the IV. For XCTR mode this lets us efficiently XOR * the 64-bit counter with the IV.
*/
.if \xctr
umov IV_PART, vctr.d[0]
lsr CTR_W, BYTE_CTR_W, #4
.else
umov IV_PART, vctr.d[1]
rev IV_PART, IV_PART
.endif
/* * Set up the counter values in v0-v{MAX_STRIDE-1}. * * If we are encrypting less than MAX_STRIDE blocks, the tail block * handling code expects the last keystream block to be in * v{MAX_STRIDE-1}. For example: if encrypting two blocks with * MAX_STRIDE=5, then v3 and v4 should have the next two counter blocks.
*/
.if \xctr
add CTR, CTR, BLOCKS
.else
adds IV_PART, IV_PART, BLOCKS
.endif
mov v0.16b, vctr.16b
mov v1.16b, vctr.16b
mov v2.16b, vctr.16b
mov v3.16b, vctr.16b
ST5( mov v4.16b, vctr.16b )
.if \xctr sub x6, CTR, #MAX_STRIDE - 1 sub x7, CTR, #MAX_STRIDE - 2 sub x8, CTR, #MAX_STRIDE - 3 sub x9, CTR, #MAX_STRIDE - 4
ST5( sub x10, CTR, #MAX_STRIDE - 5 )
eor x6, x6, IV_PART
eor x7, x7, IV_PART
eor x8, x8, IV_PART
eor x9, x9, IV_PART
ST5( eor x10, x10, IV_PART )
mov v0.d[0], x6
mov v1.d[0], x7
mov v2.d[0], x8
mov v3.d[0], x9
ST5( mov v4.d[0], x10 )
.else
bcs 0f
.subsection 1 /* * This subsection handles carries. * * Conditional branching here is allowed with respect to time * invariance since the branches are dependent on the IV instead * of the plaintext or key. This code is rarely executed in * practice anyway.
*/
/* * Apply carry to counter blocks if needed. * * Since the carry flag was set, we know 0 <= IV_PART < * MAX_STRIDE. Using the value of IV_PART we can determine how * many counter blocks need to be updated.
*/
cbz IV_PART, 2f
adr x16, 1f sub x16, x16, IV_PART, lsl #3
br x16
bti c
mov v0.d[0], vctr.d[0]
bti c
mov v1.d[0], vctr.d[0]
bti c
mov v2.d[0], vctr.d[0]
bti c
mov v3.d[0], vctr.d[0]
ST5( bti c )
ST5( mov v4.d[0], vctr.d[0] )
1: b 2f
.previous
/* * If there are at least MAX_STRIDE blocks left, XOR the data with * keystream and store. Otherwise jump to tail handling.
*/
tbnz BYTES_W, #31, .Lctrtail\xctr
ld1 {v5.16b-v7.16b}, [IN], #48
ST4( bl aes_encrypt_block4x )
ST5( bl aes_encrypt_block5x )
eor v0.16b, v5.16b, v0.16b
ST4( ld1 {v5.16b}, [IN], #16 )
eor v1.16b, v6.16b, v1.16b
ST5( ld1 {v5.16b-v6.16b}, [IN], #32 )
eor v2.16b, v7.16b, v2.16b
eor v3.16b, v5.16b, v3.16b
ST5( eor v4.16b, v6.16b, v4.16b )
st1 {v0.16b-v3.16b}, [OUT], #64
ST5( st1 {v4.16b}, [OUT], #16 )
cbz BYTES_W, .Lctrout\xctr
b .LctrloopNx\xctr
.Lctrout\xctr:
.if !\xctr
st1 {vctr.16b}, [IV] /* return next CTR value */
.endif
frame_pop
ret
.Lctrtail\xctr: /* * Handle up to MAX_STRIDE * 16 - 1 bytes of plaintext * * This code expects the last keystream block to be in v{MAX_STRIDE-1}. * For example: if encrypting two blocks with MAX_STRIDE=5, then v3 and * v4 should have the next two counter blocks. * * This allows us to store the ciphertext by writing to overlapping * regions of memory. Any invalid ciphertext blocks get overwritten by * correctly computed blocks. This approach greatly simplifies the * logic for storing the ciphertext.
*/
mov x16, #16
ands w7, BYTES_W, #0xf
csel x13, x7, x16, ne
.Lctrtail1x\xctr: /* * Handle <= 16 bytes of plaintext * * This code always reads and writes 16 bytes. To avoid out of bounds * accesses, XCTR and CTR modes must use a temporary buffer when * encrypting/decrypting less than 16 bytes. * * This code is unusual in that it loads the input and stores the output * relative to the end of the buffers rather than relative to the start. * This causes unusual behaviour when encrypting/decrypting less than 16 * bytes; the end of the data is expected to be at the end of the * temporary buffer rather than the start of the data being at the start * of the temporary buffer.
*/ sub x8, x7, #16
csel x7, x7, x8, eq
add IN, IN, x7
add OUT, OUT, x7
ld1 {v5.16b}, [IN]
ld1 {v6.16b}, [OUT]
ST5( mov v3.16b, v4.16b )
encrypt_block v3, ROUNDS_W, KEY, x8, w7
ld1 {v10.16b-v11.16b}, [x9]
tbl v3.16b, {v3.16b}, v10.16b
sshr v11.16b, v11.16b, #7
eor v5.16b, v5.16b, v3.16b
bif v5.16b, v6.16b, v11.16b
st1 {v5.16b}, [OUT]
b .Lctrout\xctr
// Arguments
.unreq OUT
.unreq IN
.unreq KEY
.unreq ROUNDS_W
.unreq BYTES_W
.unreq IV
.unreq BYTE_CTR_W // XCTR only
// Intermediate values
.unreq CTR_W // XCTR only
.unreq CTR // XCTR only
.unreq IV_PART
.unreq BLOCKS
.unreq BLOCKS_W
.endm
/* * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int bytes, u8 ctr[]) * * The input and output buffers must always be at least 16 bytes even if * encrypting/decrypting less than 16 bytes. Otherwise out of bounds * accesses will occur. The data to be encrypted/decrypted is expected * to be at the end of this 16-byte temporary buffer rather than the * start.
*/
/* * aes_xctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int bytes, u8 const iv[], int byte_ctr) * * The input and output buffers must always be at least 16 bytes even if * encrypting/decrypting less than 16 bytes. Otherwise out of bounds * accesses will occur. The data to be encrypted/decrypted is expected * to be at the end of this 16-byte temporary buffer rather than the * start.
*/
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.