krb5 commit: Adjust AESNI sources for krb5 tree
Greg Hudson
ghudson at MIT.EDU
Fri May 24 14:26:13 EDT 2013
https://github.com/krb5/krb5/commit/0231309631acb59cc8b22227ca461005f38cc668
commit 0231309631acb59cc8b22227ca461005f38cc668
Author: Greg Hudson <ghudson at mit.edu>
Date: Sat May 4 19:09:38 2013 -0400
Adjust AESNI sources for krb5 tree
Remove functions we don't need. Add macros to redefine functions with
an appropriate namespace prefix.
src/lib/crypto/builtin/aes/iaesx64.s | 1277 +---------------------------
src/lib/crypto/builtin/aes/iaesx86.s | 1594 +++-------------------------------
2 files changed, 150 insertions(+), 2721 deletions(-)
diff --git a/src/lib/crypto/builtin/aes/iaesx64.s b/src/lib/crypto/builtin/aes/iaesx64.s
index 1012e36..1c091c1 100644
--- a/src/lib/crypto/builtin/aes/iaesx64.s
+++ b/src/lib/crypto/builtin/aes/iaesx64.s
@@ -27,6 +27,15 @@
; OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
; ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+%define iEncExpandKey128 k5_iEncExpandKey128
+%define iEncExpandKey256 k5_iEncExpandKey256
+%define iDecExpandKey128 k5_iDecExpandKey128
+%define iDecExpandKey256 k5_iDecExpandKey256
+%define iEnc128_CBC k5_iEnc128_CBC
+%define iEnc256_CBC k5_iEnc256_CBC
+%define iDec128_CBC k5_iDec128_CBC
+%define iDec256_CBC k5_iDec256_CBC
+
%macro linux_setup 0
%ifdef __linux__
mov rcx, rdi
@@ -338,66 +347,6 @@ iEncExpandKey128:
align 16
-global iEncExpandKey192
-iEncExpandKey192:
-
- linux_setup
- sub rsp,64+8
- movdqa [rsp],xmm6
- movdqa [rsp+16],xmm7
-
-
- movq xmm7, [rcx+16] ; loading the AES key
- movq [rdx+16], xmm7 ; Storing key in memory where all key expansion
- pshufd xmm4, xmm7, 01001111b
- movdqu xmm1, [rcx] ; loading the AES key
- movdqu [rdx], xmm1 ; Storing key in memory where all key expansion
-
- pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion.
- pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion.
-
- aeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
- key_expansion_1_192 24
- key_expansion_2_192 40
-
- aeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
- key_expansion_1_192 48
- key_expansion_2_192 64
-
- aeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
- key_expansion_1_192 72
- key_expansion_2_192 88
-
- aeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
- key_expansion_1_192 96
- key_expansion_2_192 112
-
- aeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
- key_expansion_1_192 120
- key_expansion_2_192 136
-
- aeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
- key_expansion_1_192 144
- key_expansion_2_192 160
-
- aeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
- key_expansion_1_192 168
- key_expansion_2_192 184
-
- aeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12
- key_expansion_1_192 192
-
-
- movdqa xmm6,[rsp]
- movdqa xmm7,[rsp+16]
- add rsp,64+8
-
- ret
-
-
-
-
-align 16
global iDecExpandKey128
iDecExpandKey128:
@@ -425,37 +374,6 @@ iDecExpandKey128:
ret
-align 16
-global iDecExpandKey192
-iDecExpandKey192:
-
- linux_setup
- push rcx
- push rdx
- sub rsp,16+8
-
- call iEncExpandKey192
-
- add rsp,16+8
- pop rdx
- pop rcx
-
-
- inversekey [rdx + 1*16]
- inversekey [rdx + 2*16]
- inversekey [rdx + 3*16]
- inversekey [rdx + 4*16]
- inversekey [rdx + 5*16]
- inversekey [rdx + 6*16]
- inversekey [rdx + 7*16]
- inversekey [rdx + 8*16]
- inversekey [rdx + 9*16]
- inversekey [rdx + 10*16]
- inversekey [rdx + 11*16]
-
- ret
-
-
align 16
global iDecExpandKey256
@@ -539,103 +457,6 @@ iEncExpandKey256:
-
-
-
-align 16
-global iDec128
-iDec128:
-
- linux_setup
- sub rsp,16*16+8
-
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
- test eax,eax
- jz end_dec128
-
- cmp eax,4
- jl lp128decsingle
-
- test rcx,0xf
- jz lp128decfour
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- mov rcx,rsp
-
-
-
-align 16
-lp128decfour:
-
- test eax,eax
- jz end_dec128
-
- cmp eax,4
- jl lp128decsingle
-
- load_and_xor4 rdx, [rcx+10*16]
- add rdx,16*4
- aesdec4 [rcx+9*16]
- aesdec4 [rcx+8*16]
- aesdec4 [rcx+7*16]
- aesdec4 [rcx+6*16]
- aesdec4 [rcx+5*16]
- aesdec4 [rcx+4*16]
- aesdec4 [rcx+3*16]
- aesdec4 [rcx+2*16]
- aesdec4 [rcx+1*16]
- aesdeclast4 [rcx+0*16]
-
- sub eax,4
- store4 r8+rdx-(16*4)
- jmp lp128decfour
-
-
- align 16
-lp128decsingle:
-
- movdqu xmm0, [rdx]
- movdqu xmm4,[rcx+10*16]
- pxor xmm0, xmm4
- aesdec1_u [rcx+9*16]
- aesdec1_u [rcx+8*16]
- aesdec1_u [rcx+7*16]
- aesdec1_u [rcx+6*16]
- aesdec1_u [rcx+5*16]
- aesdec1_u [rcx+4*16]
- aesdec1_u [rcx+3*16]
- aesdec1_u [rcx+2*16]
- aesdec1_u [rcx+1*16]
- aesdeclast1_u [rcx+0*16]
-
- add rdx, 16
- movdqu [r8 + rdx - 16], xmm0
- dec eax
- jnz lp128decsingle
-
-end_dec128:
-
- add rsp,16*16+8
- ret
-
-
align 16
global iDec128_CBC
iDec128_CBC:
@@ -748,124 +569,6 @@ end_dec128_CBC:
ret
-align 16
-global iDec192_CBC
-iDec192_CBC:
-
- linux_setup
- sub rsp,16*16+8
-
- mov r9,rcx
- mov rax,[rcx+24]
- movdqu xmm5,[rax]
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
-
- sub r8,rdx
-
- test eax,eax
- jz end_dec192_CBC
-
- cmp eax,4
- jl lp192decsingle_CBC
-
- test rcx,0xf
- jz lp192decfour_CBC
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- mov rcx,rsp
-
-
-align 16
-lp192decfour_CBC:
-
- test eax,eax
- jz end_dec192_CBC
-
- cmp eax,4
- jl lp192decsingle_CBC
-
- load_and_xor4 rdx, [rcx+12*16]
- add rdx,16*4
- aesdec4 [rcx+11*16]
- aesdec4 [rcx+10*16]
- aesdec4 [rcx+9*16]
- aesdec4 [rcx+8*16]
- aesdec4 [rcx+7*16]
- aesdec4 [rcx+6*16]
- aesdec4 [rcx+5*16]
- aesdec4 [rcx+4*16]
- aesdec4 [rcx+3*16]
- aesdec4 [rcx+2*16]
- aesdec4 [rcx+1*16]
- aesdeclast4 [rcx+0*16]
-
- pxor xmm0,xmm5
- movdqu xmm4,[rdx - 16*4 + 0*16]
- pxor xmm1,xmm4
- movdqu xmm4,[rdx - 16*4 + 1*16]
- pxor xmm2,xmm4
- movdqu xmm4,[rdx - 16*4 + 2*16]
- pxor xmm3,xmm4
- movdqu xmm5,[rdx - 16*4 + 3*16]
-
- sub eax,4
- store4 r8+rdx-(16*4)
- jmp lp192decfour_CBC
-
-
- align 16
-lp192decsingle_CBC:
-
- movdqu xmm0, [rdx]
- movdqu xmm4,[rcx+12*16]
- movdqa xmm1,xmm0
- pxor xmm0, xmm4
- aesdec1_u [rcx+11*16]
- aesdec1_u [rcx+10*16]
- aesdec1_u [rcx+9*16]
- aesdec1_u [rcx+8*16]
- aesdec1_u [rcx+7*16]
- aesdec1_u [rcx+6*16]
- aesdec1_u [rcx+5*16]
- aesdec1_u [rcx+4*16]
- aesdec1_u [rcx+3*16]
- aesdec1_u [rcx+2*16]
- aesdec1_u [rcx+1*16]
- aesdeclast1_u [rcx+0*16]
-
- pxor xmm0,xmm5
- movdqa xmm5,xmm1
- add rdx, 16
- movdqu [r8 + rdx - 16], xmm0
- dec eax
- jnz lp192decsingle_CBC
-
-end_dec192_CBC:
-
- mov r9,[r9+24]
- movdqu [r9],xmm5
- add rsp,16*16+8
- ret
-
-
-
align 16
global iDec256_CBC
@@ -990,15 +693,17 @@ end_dec256_CBC:
-
-
align 16
-global iDec192
-iDec192:
+global iEnc128_CBC
+iEnc128_CBC:
linux_setup
sub rsp,16*16+8
+ mov r9,rcx
+ mov rax,[rcx+24]
+ movdqu xmm1,[rax]
+
mov eax,[rcx+32] ; numblocks
mov rdx,[rcx]
mov r8,[rcx+8]
@@ -1006,677 +711,9 @@ iDec192:
sub r8,rdx
- test eax,eax
- jz end_dec192
-
- cmp eax,4
- jl lp192decsingle
test rcx,0xf
- jz lp192decfour
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- mov rcx,rsp
-
-align 16
-lp192decfour:
-
- test eax,eax
- jz end_dec192
-
- cmp eax,4
- jl lp192decsingle
-
- load_and_xor4 rdx, [rcx+12*16]
- add rdx,16*4
- aesdec4 [rcx+11*16]
- aesdec4 [rcx+10*16]
- aesdec4 [rcx+9*16]
- aesdec4 [rcx+8*16]
- aesdec4 [rcx+7*16]
- aesdec4 [rcx+6*16]
- aesdec4 [rcx+5*16]
- aesdec4 [rcx+4*16]
- aesdec4 [rcx+3*16]
- aesdec4 [rcx+2*16]
- aesdec4 [rcx+1*16]
- aesdeclast4 [rcx+0*16]
-
- sub eax,4
- store4 r8+rdx-(16*4)
- jmp lp192decfour
-
-
- align 16
-lp192decsingle:
-
- movdqu xmm0, [rdx]
- movdqu xmm4,[rcx+12*16]
- pxor xmm0, xmm4
- aesdec1_u [rcx+11*16]
- aesdec1_u [rcx+10*16]
- aesdec1_u [rcx+9*16]
- aesdec1_u [rcx+8*16]
- aesdec1_u [rcx+7*16]
- aesdec1_u [rcx+6*16]
- aesdec1_u [rcx+5*16]
- aesdec1_u [rcx+4*16]
- aesdec1_u [rcx+3*16]
- aesdec1_u [rcx+2*16]
- aesdec1_u [rcx+1*16]
- aesdeclast1_u [rcx+0*16]
-
- add rdx, 16
- movdqu [r8 + rdx - 16], xmm0
- dec eax
- jnz lp192decsingle
-
-end_dec192:
-
- add rsp,16*16+8
- ret
-
-
-
-
-align 16
-global iDec256
-iDec256:
-
- linux_setup
- sub rsp,16*16+8
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
-
- test eax,eax
- jz end_dec256
-
- cmp eax,4
- jl lp256dec
-
- test rcx,0xf
- jz lp256dec4
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- copy_round_keys rsp,rcx,13
- copy_round_keys rsp,rcx,14
- mov rcx,rsp
-
-
- align 16
-lp256dec4:
- test eax,eax
- jz end_dec256
-
- cmp eax,4
- jl lp256dec
-
- load_and_xor4 rdx,[rcx+14*16]
- add rdx, 4*16
- aesdec4 [rcx+13*16]
- aesdec4 [rcx+12*16]
- aesdec4 [rcx+11*16]
- aesdec4 [rcx+10*16]
- aesdec4 [rcx+9*16]
- aesdec4 [rcx+8*16]
- aesdec4 [rcx+7*16]
- aesdec4 [rcx+6*16]
- aesdec4 [rcx+5*16]
- aesdec4 [rcx+4*16]
- aesdec4 [rcx+3*16]
- aesdec4 [rcx+2*16]
- aesdec4 [rcx+1*16]
- aesdeclast4 [rcx+0*16]
-
- store4 r8+rdx-16*4
- sub eax,4
- jmp lp256dec4
-
- align 16
-lp256dec:
-
- movdqu xmm0, [rdx]
- movdqu xmm4,[rcx+14*16]
- add rdx, 16
- pxor xmm0, xmm4 ; Round 0 (only xor)
- aesdec1_u [rcx+13*16]
- aesdec1_u [rcx+12*16]
- aesdec1_u [rcx+11*16]
- aesdec1_u [rcx+10*16]
- aesdec1_u [rcx+9*16]
- aesdec1_u [rcx+8*16]
- aesdec1_u [rcx+7*16]
- aesdec1_u [rcx+6*16]
- aesdec1_u [rcx+5*16]
- aesdec1_u [rcx+4*16]
- aesdec1_u [rcx+3*16]
- aesdec1_u [rcx+2*16]
- aesdec1_u [rcx+1*16]
- aesdeclast1_u [rcx+0*16]
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp256dec
-
-end_dec256:
-
- add rsp,16*16+8
- ret
-
-
-
-
-
-
-align 16
-global iEnc128
-iEnc128:
-
- linux_setup
- sub rsp,16*16+8
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
-
- test eax,eax
- jz end_enc128
-
- cmp eax,4
- jl lp128encsingle
-
- test rcx,0xf
- jz lpenc128four
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- mov rcx,rsp
-
-
- align 16
-
-lpenc128four:
-
- test eax,eax
- jz end_enc128
-
- cmp eax,4
- jl lp128encsingle
-
- load_and_xor4 rdx,[rcx+0*16]
- add rdx,4*16
- aesenc4 [rcx+1*16]
- aesenc4 [rcx+2*16]
- aesenc4 [rcx+3*16]
- aesenc4 [rcx+4*16]
- aesenc4 [rcx+5*16]
- aesenc4 [rcx+6*16]
- aesenc4 [rcx+7*16]
- aesenc4 [rcx+8*16]
- aesenc4 [rcx+9*16]
- aesenclast4 [rcx+10*16]
-
- store4 r8+rdx-16*4
- sub eax,4
- jmp lpenc128four
-
- align 16
-lp128encsingle:
-
- movdqu xmm0, [rdx]
- movdqu xmm4,[rcx+0*16]
- add rdx, 16
- pxor xmm0, xmm4
- aesenc1_u [rcx+1*16]
- aesenc1_u [rcx+2*16]
- aesenc1_u [rcx+3*16]
- aesenc1_u [rcx+4*16]
- aesenc1_u [rcx+5*16]
- aesenc1_u [rcx+6*16]
- aesenc1_u [rcx+7*16]
- aesenc1_u [rcx+8*16]
- aesenc1_u [rcx+9*16]
- aesenclast1_u [rcx+10*16]
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp128encsingle
-
-end_enc128:
-
- add rsp,16*16+8
- ret
-
-
-align 16
-global iEnc128_CTR
-iEnc128_CTR:
-
- linux_setup
-
- mov r9,rcx
- mov rax,[rcx+24]
- movdqu xmm5,[rax]
-
-
- sub rsp,16*16+8+16
-
- movdqa [rsp+16*16], xmm6
- movdqa xmm6, [byte_swap_16 wrt rip]
- pshufb xmm5, xmm6 ; byte swap counter
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
- test eax,eax
- jz end_encctr128
-
- cmp eax,4
- jl lp128encctrsingle
-
- test rcx,0xf
- jz lpencctr128four
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- mov rcx,rsp
-
-
- align 16
-
-lpencctr128four:
-
- test eax,eax
- jz end_encctr128
-
- cmp eax,4
- jl lp128encctrsingle
-
- load_and_inc4 [rcx+0*16]
- add rdx,4*16
- aesenc4 [rcx+1*16]
- aesenc4 [rcx+2*16]
- aesenc4 [rcx+3*16]
- aesenc4 [rcx+4*16]
- aesenc4 [rcx+5*16]
- aesenc4 [rcx+6*16]
- aesenc4 [rcx+7*16]
- aesenc4 [rcx+8*16]
- aesenc4 [rcx+9*16]
- aesenclast4 [rcx+10*16]
- xor_with_input4 rdx-(4*16)
-
- store4 r8+rdx-16*4
- sub eax,4
- jmp lpencctr128four
-
- align 16
-lp128encctrsingle:
-
- movdqa xmm0,xmm5
- pshufb xmm0, xmm6 ; byte swap counter back
- paddd xmm5,[counter_add_one wrt rip]
- add rdx, 16
- movdqu xmm4,[rcx+0*16]
- pxor xmm0, xmm4
- aesenc1_u [rcx+1*16]
- aesenc1_u [rcx+2*16]
- aesenc1_u [rcx+3*16]
- aesenc1_u [rcx+4*16]
- aesenc1_u [rcx+5*16]
- aesenc1_u [rcx+6*16]
- aesenc1_u [rcx+7*16]
- aesenc1_u [rcx+8*16]
- aesenc1_u [rcx+9*16]
- aesenclast1_u [rcx+10*16]
- movdqu xmm4, [rdx-16]
- pxor xmm0,xmm4
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp128encctrsingle
-
-end_encctr128:
-
- mov r9,[r9+24]
-
- pshufb xmm5, xmm6 ; byte swap counter
- movdqu [r9],xmm5
- movdqa xmm6, [rsp+16*16]
- add rsp,16*16+8+16
- ret
-
-
-
-align 16
-global iEnc192_CTR
-iEnc192_CTR:
-
- linux_setup
-
- mov r9,rcx
- mov rax,[rcx+24]
- movdqu xmm5,[rax]
-
-
- sub rsp,16*16+8+16
-
- movdqa [rsp+16*16], xmm6
- movdqa xmm6, [byte_swap_16 wrt rip]
- pshufb xmm5, xmm6 ; byte swap counter
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
-
- test eax,eax
- jz end_encctr192
-
- cmp eax,4
- jl lp192encctrsingle
-
- test rcx,0xf
- jz lpencctr192four
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- mov rcx,rsp
-
-
- align 16
-
-lpencctr192four:
-
- test eax,eax
- jz end_encctr192
-
- cmp eax,4
- jl lp192encctrsingle
-
- load_and_inc4 [rcx+0*16]
- add rdx,4*16
- aesenc4 [rcx+1*16]
- aesenc4 [rcx+2*16]
- aesenc4 [rcx+3*16]
- aesenc4 [rcx+4*16]
- aesenc4 [rcx+5*16]
- aesenc4 [rcx+6*16]
- aesenc4 [rcx+7*16]
- aesenc4 [rcx+8*16]
- aesenc4 [rcx+9*16]
- aesenc4 [rcx+10*16]
- aesenc4 [rcx+11*16]
- aesenclast4 [rcx+12*16]
- xor_with_input4 rdx-(4*16)
-
- store4 r8+rdx-16*4
- sub eax,4
- jmp lpencctr192four
-
- align 16
-lp192encctrsingle:
-
- movdqa xmm0,xmm5
- pshufb xmm0, xmm6 ; byte swap counter back
- movdqu xmm4,[rcx+0*16]
- paddd xmm5,[counter_add_one wrt rip]
- add rdx, 16
- pxor xmm0, xmm4
- aesenc1_u [rcx+1*16]
- aesenc1_u [rcx+2*16]
- aesenc1_u [rcx+3*16]
- aesenc1_u [rcx+4*16]
- aesenc1_u [rcx+5*16]
- aesenc1_u [rcx+6*16]
- aesenc1_u [rcx+7*16]
- aesenc1_u [rcx+8*16]
- aesenc1_u [rcx+9*16]
- aesenc1_u [rcx+10*16]
- aesenc1_u [rcx+11*16]
- aesenclast1_u [rcx+12*16]
- movdqu xmm4, [rdx-16]
- pxor xmm0,xmm4
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp192encctrsingle
-
-end_encctr192:
-
- mov r9,[r9+24]
- pshufb xmm5, xmm6 ; byte swap counter
- movdqu [r9],xmm5
- movdqa xmm6, [rsp+16*16]
- add rsp,16*16+8+16
- ret
-
-
-align 16
-global iEnc256_CTR
-iEnc256_CTR:
-
- linux_setup
-
- mov r9,rcx
- mov rax,[rcx+24]
- movdqu xmm5,[rax]
-
-
- sub rsp,16*16+8+16
-
- movdqa [rsp+16*16], xmm6
- movdqa xmm6, [byte_swap_16 wrt rip]
- pshufb xmm5, xmm6 ; byte swap counter
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
-
- test eax,eax
- jz end_encctr256
-
- cmp eax,4
- jl lp256encctrsingle
-
- test rcx,0xf
- jz lpencctr256four
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- copy_round_keys rsp,rcx,13
- copy_round_keys rsp,rcx,14
- mov rcx,rsp
-
-
- align 16
-
-lpencctr256four:
-
- test eax,eax
- jz end_encctr256
-
- cmp eax,4
- jl lp256encctrsingle
-
- load_and_inc4 [rcx+0*16]
- add rdx,4*16
- aesenc4 [rcx+1*16]
- aesenc4 [rcx+2*16]
- aesenc4 [rcx+3*16]
- aesenc4 [rcx+4*16]
- aesenc4 [rcx+5*16]
- aesenc4 [rcx+6*16]
- aesenc4 [rcx+7*16]
- aesenc4 [rcx+8*16]
- aesenc4 [rcx+9*16]
- aesenc4 [rcx+10*16]
- aesenc4 [rcx+11*16]
- aesenc4 [rcx+12*16]
- aesenc4 [rcx+13*16]
- aesenclast4 [rcx+14*16]
- xor_with_input4 rdx-(4*16)
-
- store4 r8+rdx-16*4
- sub eax,4
- jmp lpencctr256four
-
- align 16
-lp256encctrsingle:
-
- movdqa xmm0,xmm5
- pshufb xmm0, xmm6 ; byte swap counter back
- movdqu xmm4,[rcx+0*16]
- paddd xmm5,[counter_add_one wrt rip]
- add rdx, 16
- pxor xmm0, xmm4
- aesenc1_u [rcx+1*16]
- aesenc1_u [rcx+2*16]
- aesenc1_u [rcx+3*16]
- aesenc1_u [rcx+4*16]
- aesenc1_u [rcx+5*16]
- aesenc1_u [rcx+6*16]
- aesenc1_u [rcx+7*16]
- aesenc1_u [rcx+8*16]
- aesenc1_u [rcx+9*16]
- aesenc1_u [rcx+10*16]
- aesenc1_u [rcx+11*16]
- aesenc1_u [rcx+12*16]
- aesenc1_u [rcx+13*16]
- aesenclast1_u [rcx+14*16]
- movdqu xmm4, [rdx-16]
- pxor xmm0,xmm4
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp256encctrsingle
-
-end_encctr256:
-
- mov r9,[r9+24]
- pshufb xmm5, xmm6 ; byte swap counter
- movdqu [r9],xmm5
- movdqa xmm6, [rsp+16*16]
- add rsp,16*16+8+16
- ret
-
-
-
-
-
-
-
-align 16
-global iEnc128_CBC
-iEnc128_CBC:
-
- linux_setup
- sub rsp,16*16+8
-
- mov r9,rcx
- mov rax,[rcx+24]
- movdqu xmm1,[rax]
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
-
- test rcx,0xf
- jz lp128encsingle_CBC
+ jz lp128encsingle_CBC
copy_round_keys rsp,rcx,0
copy_round_keys rsp,rcx,1
@@ -1724,77 +761,6 @@ lp128encsingle_CBC:
ret
-align 16
-global iEnc192_CBC
-iEnc192_CBC:
-
- linux_setup
- sub rsp,16*16+8
- mov r9,rcx
- mov rax,[rcx+24]
- movdqu xmm1,[rax]
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
- test rcx,0xf
- jz lp192encsingle_CBC
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- mov rcx,rsp
-
-
-
- align 16
-
-lp192encsingle_CBC:
-
- movdqu xmm0, [rdx]
- movdqu xmm4, [rcx+0*16]
- add rdx, 16
- pxor xmm0, xmm1
- pxor xmm0, xmm4
- aesenc1 [rcx+1*16]
- aesenc1 [rcx+2*16]
- aesenc1 [rcx+3*16]
- aesenc1 [rcx+4*16]
- aesenc1 [rcx+5*16]
- aesenc1 [rcx+6*16]
- aesenc1 [rcx+7*16]
- aesenc1 [rcx+8*16]
- aesenc1 [rcx+9*16]
- aesenc1 [rcx+10*16]
- aesenc1 [rcx+11*16]
- aesenclast1 [rcx+12*16]
- movdqa xmm1,xmm0
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp192encsingle_CBC
-
- mov r9,[r9+24]
- movdqu [r9],xmm1
-
- add rsp,16*16+8
- ret
-
align 16
global iEnc256_CBC
@@ -1868,214 +834,3 @@ lp256encsingle_CBC:
movdqu [r9],xmm1
add rsp,16*16+8
ret
-
-
-
-
-align 16
-global iEnc192
-iEnc192:
-
- linux_setup
- sub rsp,16*16+8
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
- test eax,eax
- jz end_enc192
-
- cmp eax,4
- jl lp192encsingle
-
- test rcx,0xf
- jz lpenc192four
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- mov rcx,rsp
-
-
- align 16
-
-lpenc192four:
-
- test eax,eax
- jz end_enc192
-
- cmp eax,4
- jl lp192encsingle
-
- load_and_xor4 rdx,[rcx+0*16]
- add rdx,4*16
- aesenc4 [rcx+1*16]
- aesenc4 [rcx+2*16]
- aesenc4 [rcx+3*16]
- aesenc4 [rcx+4*16]
- aesenc4 [rcx+5*16]
- aesenc4 [rcx+6*16]
- aesenc4 [rcx+7*16]
- aesenc4 [rcx+8*16]
- aesenc4 [rcx+9*16]
- aesenc4 [rcx+10*16]
- aesenc4 [rcx+11*16]
- aesenclast4 [rcx+12*16]
-
- store4 r8+rdx-16*4
- sub eax,4
- jmp lpenc192four
-
- align 16
-lp192encsingle:
-
- movdqu xmm0, [rdx]
- movdqu xmm4, [rcx+0*16]
- add rdx, 16
- pxor xmm0, xmm4
- aesenc1_u [rcx+1*16]
- aesenc1_u [rcx+2*16]
- aesenc1_u [rcx+3*16]
- aesenc1_u [rcx+4*16]
- aesenc1_u [rcx+5*16]
- aesenc1_u [rcx+6*16]
- aesenc1_u [rcx+7*16]
- aesenc1_u [rcx+8*16]
- aesenc1_u [rcx+9*16]
- aesenc1_u [rcx+10*16]
- aesenc1_u [rcx+11*16]
- aesenclast1_u [rcx+12*16]
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp192encsingle
-
-end_enc192:
-
- add rsp,16*16+8
- ret
-
-
-
-
-
-
-align 16
-global iEnc256
-iEnc256:
-
- linux_setup
- sub rsp,16*16+8
-
- mov eax,[rcx+32] ; numblocks
- mov rdx,[rcx]
- mov r8,[rcx+8]
- mov rcx,[rcx+16]
-
- sub r8,rdx
-
-
- test eax,eax
- jz end_enc256
-
- cmp eax,4
- jl lp256enc
-
- test rcx,0xf
- jz lp256enc4
-
- copy_round_keys rsp,rcx,0
- copy_round_keys rsp,rcx,1
- copy_round_keys rsp,rcx,2
- copy_round_keys rsp,rcx,3
- copy_round_keys rsp,rcx,4
- copy_round_keys rsp,rcx,5
- copy_round_keys rsp,rcx,6
- copy_round_keys rsp,rcx,7
- copy_round_keys rsp,rcx,8
- copy_round_keys rsp,rcx,9
- copy_round_keys rsp,rcx,10
- copy_round_keys rsp,rcx,11
- copy_round_keys rsp,rcx,12
- copy_round_keys rsp,rcx,13
- copy_round_keys rsp,rcx,14
- mov rcx,rsp
-
-
- align 16
-
-lp256enc4:
- test eax,eax
- jz end_enc256
-
- cmp eax,4
- jl lp256enc
-
-
- load_and_xor4 rdx,[rcx+0*16]
- add rdx, 16*4
- aesenc4 [rcx+1*16]
- aesenc4 [rcx+2*16]
- aesenc4 [rcx+3*16]
- aesenc4 [rcx+4*16]
- aesenc4 [rcx+5*16]
- aesenc4 [rcx+6*16]
- aesenc4 [rcx+7*16]
- aesenc4 [rcx+8*16]
- aesenc4 [rcx+9*16]
- aesenc4 [rcx+10*16]
- aesenc4 [rcx+11*16]
- aesenc4 [rcx+12*16]
- aesenc4 [rcx+13*16]
- aesenclast4 [rcx+14*16]
-
- store4 r8+rdx-16*4
- sub eax,4
- jmp lp256enc4
-
- align 16
-lp256enc:
-
- movdqu xmm0, [rdx]
- movdqu xmm4, [rcx+0*16]
- add rdx, 16
- pxor xmm0, xmm4
- aesenc1_u [rcx+1*16]
- aesenc1_u [rcx+2*16]
- aesenc1_u [rcx+3*16]
- aesenc1_u [rcx+4*16]
- aesenc1_u [rcx+5*16]
- aesenc1_u [rcx+6*16]
- aesenc1_u [rcx+7*16]
- aesenc1_u [rcx+8*16]
- aesenc1_u [rcx+9*16]
- aesenc1_u [rcx+10*16]
- aesenc1_u [rcx+11*16]
- aesenc1_u [rcx+12*16]
- aesenc1_u [rcx+13*16]
- aesenclast1_u [rcx+14*16]
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [r8+rdx-16], xmm0
- dec eax
- jnz lp256enc
-
-end_enc256:
-
- add rsp,16*16+8
- ret
diff --git a/src/lib/crypto/builtin/aes/iaesx86.s b/src/lib/crypto/builtin/aes/iaesx86.s
index c65921b..b667acd 100644
--- a/src/lib/crypto/builtin/aes/iaesx86.s
+++ b/src/lib/crypto/builtin/aes/iaesx86.s
@@ -27,6 +27,14 @@
; OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
; ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+%define _iEncExpandKey128 k5_iEncExpandKey128
+%define _iEncExpandKey256 k5_iEncExpandKey256
+%define _iDecExpandKey128 k5_iDecExpandKey128
+%define _iDecExpandKey256 k5_iDecExpandKey256
+%define _iEnc128_CBC k5_iEnc128_CBC
+%define _iEnc256_CBC k5_iEnc256_CBC
+%define _iDec128_CBC k5_iDec128_CBC
+%define _iDec256_CBC k5_iDec256_CBC
%macro inversekey 1
movdqu xmm1,%1
@@ -343,59 +351,6 @@ _iEncExpandKey128:
ret
-align 16
-global _iEncExpandKey192
-_iEncExpandKey192:
-
- mov ecx,[esp-4+8] ;input
- mov edx,[esp-4+12] ;ctx
-
- movq xmm7, [ecx+16] ; loading the AES key
- movq [edx+16], xmm7 ; Storing key in memory where all key expansion
- pshufd xmm4, xmm7, 01001111b
- movdqu xmm1, [ecx] ; loading the AES key
- movdqu [edx], xmm1 ; Storing key in memory where all key expansion
-
- pxor xmm3, xmm3 ; Set xmm3 to be all zeros. Required for the key_expansion.
- pxor xmm6, xmm6 ; Set xmm3 to be all zeros. Required for the key_expansion.
-
- aeskeygenassist xmm2, xmm4, 0x1 ; Complete round key 1 and generate round key 2
- key_expansion_1_192 24
- key_expansion_2_192 40
-
- aeskeygenassist xmm2, xmm4, 0x2 ; Generate round key 3 and part of round key 4
- key_expansion_1_192 48
- key_expansion_2_192 64
-
- aeskeygenassist xmm2, xmm4, 0x4 ; Complete round key 4 and generate round key 5
- key_expansion_1_192 72
- key_expansion_2_192 88
-
- aeskeygenassist xmm2, xmm4, 0x8 ; Generate round key 6 and part of round key 7
- key_expansion_1_192 96
- key_expansion_2_192 112
-
- aeskeygenassist xmm2, xmm4, 0x10 ; Complete round key 7 and generate round key 8
- key_expansion_1_192 120
- key_expansion_2_192 136
-
- aeskeygenassist xmm2, xmm4, 0x20 ; Generate round key 9 and part of round key 10
- key_expansion_1_192 144
- key_expansion_2_192 160
-
- aeskeygenassist xmm2, xmm4, 0x40 ; Complete round key 10 and generate round key 11
- key_expansion_1_192 168
- key_expansion_2_192 184
-
- aeskeygenassist xmm2, xmm4, 0x80 ; Generate round key 12
- key_expansion_1_192 192
-
- ret
-
-
-
-
-
align 16
global _iDecExpandKey128
@@ -422,35 +377,6 @@ _iDecExpandKey128:
-
-align 16
-global _iDecExpandKey192
-_iDecExpandKey192:
- push DWORD [esp+8]
- push DWORD [esp+8]
-
- call _iEncExpandKey192
- add esp,8
-
- mov edx,[esp-4+12] ;ctx
-
- inversekey [edx + 1*16]
- inversekey [edx + 2*16]
- inversekey [edx + 3*16]
- inversekey [edx + 4*16]
- inversekey [edx + 5*16]
- inversekey [edx + 6*16]
- inversekey [edx + 7*16]
- inversekey [edx + 8*16]
- inversekey [edx + 9*16]
- inversekey [edx + 10*16]
- inversekey [edx + 11*16]
-
- ret
-
-
-
-
align 16
global _iDecExpandKey256
_iDecExpandKey256:
@@ -530,22 +456,21 @@ _iEncExpandKey256:
-
-
-
align 16
-global _iDec128
-_iDec128:
+global _iDec128_CBC
+_iDec128_CBC:
mov ecx,[esp-4+8]
push esi
push edi
push ebp
mov ebp,esp
-
sub esp,16*16
and esp,0xfffffff0
+ mov eax,[ecx+12]
+ movdqu xmm5,[eax] ;iv
+
mov eax,[ecx+16] ; numblocks
mov esi,[ecx]
mov edi,[ecx+4]
@@ -554,13 +479,13 @@ _iDec128:
sub edi,esi
test eax,eax
- jz end_dec128
+ jz end_dec128_CBC
cmp eax,4
- jl lp128decsingle
+ jl lp128decsingle_CBC
test ecx,0xf
- jz lp128decfour
+ jz lp128decfour_CBC
copy_round_keys esp,ecx,0
copy_round_keys esp,ecx,1
@@ -577,13 +502,13 @@ _iDec128:
align 16
-lp128decfour:
+lp128decfour_CBC:
test eax,eax
- jz end_dec128
+ jz end_dec128_CBC
cmp eax,4
- jl lp128decsingle
+ jl lp128decsingle_CBC
load_and_xor4 esi, [ecx+10*16]
add esi,16*4
@@ -598,15 +523,25 @@ lp128decfour:
aesdec4 [ecx+1*16]
aesdeclast4 [ecx+0*16]
+ pxor xmm0,xmm5
+ movdqu xmm4,[esi- 16*4 + 0*16]
+ pxor xmm1,xmm4
+ movdqu xmm4,[esi- 16*4 + 1*16]
+ pxor xmm2,xmm4
+ movdqu xmm4,[esi- 16*4 + 2*16]
+ pxor xmm3,xmm4
+ movdqu xmm5,[esi- 16*4 + 3*16]
+
sub eax,4
store4 esi+edi-(16*4)
- jmp lp128decfour
+ jmp lp128decfour_CBC
align 16
-lp128decsingle:
+lp128decsingle_CBC:
movdqu xmm0, [esi]
+ movdqa xmm1,xmm0
movdqu xmm4,[ecx+10*16]
pxor xmm0, xmm4
aesdec1_u [ecx+9*16]
@@ -620,31 +555,39 @@ lp128decsingle:
aesdec1_u [ecx+1*16]
aesdeclast1_u [ecx+0*16]
+ pxor xmm0,xmm5
+ movdqa xmm5,xmm1
+
add esi, 16
movdqu [edi+esi - 16], xmm0
dec eax
- jnz lp128decsingle
+ jnz lp128decsingle_CBC
-end_dec128:
+end_dec128_CBC:
mov esp,ebp
pop ebp
pop edi
pop esi
+ mov ecx,[esp-4+8] ; first arg
+ mov ecx,[ecx+12]
+ movdqu [ecx],xmm5 ; store last iv for chaining
+
ret
align 16
-global _iDec128_CBC
-_iDec128_CBC:
+global _iDec256_CBC
+_iDec256_CBC:
mov ecx,[esp-4+8]
push esi
push edi
push ebp
mov ebp,esp
+
sub esp,16*16
and esp,0xfffffff0
@@ -659,13 +602,13 @@ _iDec128_CBC:
sub edi,esi
test eax,eax
- jz end_dec128_CBC
+ jz end_dec256_CBC
cmp eax,4
- jl lp128decsingle_CBC
+ jl lp256decsingle_CBC
test ecx,0xf
- jz lp128decfour_CBC
+ jz lp256decfour_CBC
copy_round_keys esp,ecx,0
copy_round_keys esp,ecx,1
@@ -678,20 +621,27 @@ _iDec128_CBC:
copy_round_keys esp,ecx,8
copy_round_keys esp,ecx,9
copy_round_keys esp,ecx,10
+ copy_round_keys esp,ecx,11
+ copy_round_keys esp,ecx,12
+ copy_round_keys esp,ecx,13
+ copy_round_keys esp,ecx,14
mov ecx,esp
-
align 16
-lp128decfour_CBC:
+lp256decfour_CBC:
test eax,eax
- jz end_dec128_CBC
+ jz end_dec256_CBC
cmp eax,4
- jl lp128decsingle_CBC
+ jl lp256decsingle_CBC
- load_and_xor4 esi, [ecx+10*16]
+ load_and_xor4 esi, [ecx+14*16]
add esi,16*4
+ aesdec4 [ecx+13*16]
+ aesdec4 [ecx+12*16]
+ aesdec4 [ecx+11*16]
+ aesdec4 [ecx+10*16]
aesdec4 [ecx+9*16]
aesdec4 [ecx+8*16]
aesdec4 [ecx+7*16]
@@ -714,16 +664,20 @@ lp128decfour_CBC:
sub eax,4
store4 esi+edi-(16*4)
- jmp lp128decfour_CBC
+ jmp lp256decfour_CBC
align 16
-lp128decsingle_CBC:
+lp256decsingle_CBC:
movdqu xmm0, [esi]
movdqa xmm1,xmm0
- movdqu xmm4,[ecx+10*16]
+ movdqu xmm4, [ecx+14*16]
pxor xmm0, xmm4
+ aesdec1_u [ecx+13*16]
+ aesdec1_u [ecx+12*16]
+ aesdec1_u [ecx+11*16]
+ aesdec1_u [ecx+10*16]
aesdec1_u [ecx+9*16]
aesdec1_u [ecx+8*16]
aesdec1_u [ecx+7*16]
@@ -733,7 +687,7 @@ lp128decsingle_CBC:
aesdec1_u [ecx+3*16]
aesdec1_u [ecx+2*16]
aesdec1_u [ecx+1*16]
- aesdeclast1_u [ecx+0*16]
+ aesdeclast1_u [ecx+0*16]
pxor xmm0,xmm5
movdqa xmm5,xmm1
@@ -741,16 +695,17 @@ lp128decsingle_CBC:
add esi, 16
movdqu [edi+esi - 16], xmm0
dec eax
- jnz lp128decsingle_CBC
+ jnz lp256decsingle_CBC
+
+end_dec256_CBC:
-end_dec128_CBC:
mov esp,ebp
pop ebp
pop edi
pop esi
- mov ecx,[esp-4+8] ; first arg
+ mov ecx,[esp-4+8] ; first arg
mov ecx,[ecx+12]
movdqu [ecx],xmm5 ; store last iv for chaining
@@ -758,12 +713,9 @@ end_dec128_CBC:
-
-
-
align 16
-global _iDec192
-_iDec192:
+global _iEnc128_CBC
+_iEnc128_CBC:
mov ecx,[esp-4+8]
push esi
@@ -774,21 +726,17 @@ _iDec192:
sub esp,16*16
and esp,0xfffffff0
+ mov eax,[ecx+12]
+ movdqu xmm1,[eax] ;iv
+
mov eax,[ecx+16] ; numblocks
mov esi,[ecx]
mov edi,[ecx+4]
mov ecx,[ecx+8]
-
sub edi,esi
- test eax,eax
- jz end_dec192
-
- cmp eax,4
- jl lp192decsingle
-
test ecx,0xf
- jz lp192decfour
+ jz lp128encsingle_CBC
copy_round_keys esp,ecx,0
copy_round_keys esp,ecx,1
@@ -801,79 +749,50 @@ _iDec192:
copy_round_keys esp,ecx,8
copy_round_keys esp,ecx,9
copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
mov ecx,esp
-
-align 16
-lp192decfour:
-
- test eax,eax
- jz end_dec192
-
- cmp eax,4
- jl lp192decsingle
-
- load_and_xor4 esi, [ecx+12*16]
- add esi,16*4
- aesdec4 [ecx+11*16]
- aesdec4 [ecx+10*16]
- aesdec4 [ecx+9*16]
- aesdec4 [ecx+8*16]
- aesdec4 [ecx+7*16]
- aesdec4 [ecx+6*16]
- aesdec4 [ecx+5*16]
- aesdec4 [ecx+4*16]
- aesdec4 [ecx+3*16]
- aesdec4 [ecx+2*16]
- aesdec4 [ecx+1*16]
- aesdeclast4 [ecx+0*16]
-
- sub eax,4
- store4 esi+edi-(16*4)
- jmp lp192decfour
-
-
align 16
-lp192decsingle:
- movdqu xmm0, [esi]
- movdqu xmm4,[ecx+12*16]
- pxor xmm0, xmm4
- aesdec1_u [ecx+11*16]
- aesdec1_u [ecx+10*16]
- aesdec1_u [ecx+9*16]
- aesdec1_u [ecx+8*16]
- aesdec1_u [ecx+7*16]
- aesdec1_u [ecx+6*16]
- aesdec1_u [ecx+5*16]
- aesdec1_u [ecx+4*16]
- aesdec1_u [ecx+3*16]
- aesdec1_u [ecx+2*16]
- aesdec1_u [ecx+1*16]
- aesdeclast1_u [ecx+0*16]
+lp128encsingle_CBC:
+ movdqu xmm0, [esi]
add esi, 16
- movdqu [edi+esi - 16], xmm0
+ pxor xmm0, xmm1
+ movdqu xmm4,[ecx+0*16]
+ pxor xmm0, xmm4
+ aesenc1 [ecx+1*16]
+ aesenc1 [ecx+2*16]
+ aesenc1 [ecx+3*16]
+ aesenc1 [ecx+4*16]
+ aesenc1 [ecx+5*16]
+ aesenc1 [ecx+6*16]
+ aesenc1 [ecx+7*16]
+ aesenc1 [ecx+8*16]
+ aesenc1 [ecx+9*16]
+ aesenclast1 [ecx+10*16]
+ ; Store output encrypted data into CIPHERTEXT array
+ movdqu [esi+edi-16], xmm0
+ movdqa xmm1,xmm0
dec eax
- jnz lp192decsingle
-
-end_dec192:
+ jnz lp128encsingle_CBC
mov esp,ebp
pop ebp
pop edi
pop esi
+ mov ecx,[esp-4+8] ; first arg
+ mov ecx,[ecx+12]
+ movdqu [ecx],xmm1 ; store last iv for chaining
ret
+
align 16
-global _iDec192_CBC
-_iDec192_CBC:
- mov ecx,[esp-4+8]
+global _iEnc256_CBC
+_iEnc256_CBC:
+ mov ecx,[esp-4+8] ; first arg
push esi
push edi
@@ -883,24 +802,17 @@ _iDec192_CBC:
sub esp,16*16
and esp,0xfffffff0
- mov eax,[ecx+12]
- movdqu xmm5,[eax] ;iv
+ mov eax,[ecx+12]
+ movdqu xmm1,[eax] ;iv
mov eax,[ecx+16] ; numblocks
mov esi,[ecx]
mov edi,[ecx+4]
mov ecx,[ecx+8]
-
sub edi,esi
- test eax,eax
- jz end_dec192_CBC
-
- cmp eax,4
- jl lp192decsingle_CBC
-
test ecx,0xf
- jz lp192decfour_CBC
+ jz lp256encsingle_CBC
copy_round_keys esp,ecx,0
copy_round_keys esp,ecx,1
@@ -915,1047 +827,39 @@ _iDec192_CBC:
copy_round_keys esp,ecx,10
copy_round_keys esp,ecx,11
copy_round_keys esp,ecx,12
+ copy_round_keys esp,ecx,13
+ copy_round_keys esp,ecx,14
mov ecx,esp
-align 16
-lp192decfour_CBC:
+ align 16
- test eax,eax
- jz end_dec192_CBC
+lp256encsingle_CBC:
- cmp eax,4
- jl lp192decsingle_CBC
-
- load_and_xor4 esi, [ecx+12*16]
- add esi,16*4
- aesdec4 [ecx+11*16]
- aesdec4 [ecx+10*16]
- aesdec4 [ecx+9*16]
- aesdec4 [ecx+8*16]
- aesdec4 [ecx+7*16]
- aesdec4 [ecx+6*16]
- aesdec4 [ecx+5*16]
- aesdec4 [ecx+4*16]
- aesdec4 [ecx+3*16]
- aesdec4 [ecx+2*16]
- aesdec4 [ecx+1*16]
- aesdeclast4 [ecx+0*16]
-
- pxor xmm0,xmm5
- movdqu xmm4,[esi- 16*4 + 0*16]
- pxor xmm1,xmm4
- movdqu xmm4,[esi- 16*4 + 1*16]
- pxor xmm2,xmm4
- movdqu xmm4,[esi- 16*4 + 2*16]
- pxor xmm3,xmm4
- movdqu xmm5,[esi- 16*4 + 3*16]
-
- sub eax,4
- store4 esi+edi-(16*4)
- jmp lp192decfour_CBC
-
-
- align 16
-lp192decsingle_CBC:
-
- movdqu xmm0, [esi]
- movdqu xmm4,[ecx+12*16]
- movdqa xmm1,xmm0
- pxor xmm0, xmm4
- aesdec1_u [ecx+11*16]
- aesdec1_u [ecx+10*16]
- aesdec1_u [ecx+9*16]
- aesdec1_u [ecx+8*16]
- aesdec1_u [ecx+7*16]
- aesdec1_u [ecx+6*16]
- aesdec1_u [ecx+5*16]
- aesdec1_u [ecx+4*16]
- aesdec1_u [ecx+3*16]
- aesdec1_u [ecx+2*16]
- aesdec1_u [ecx+1*16]
- aesdeclast1_u [ecx+0*16]
-
- pxor xmm0,xmm5
- movdqa xmm5,xmm1
-
- add esi, 16
- movdqu [edi+esi - 16], xmm0
- dec eax
- jnz lp192decsingle_CBC
-
-end_dec192_CBC:
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- mov ecx,[esp-4+8]
- mov ecx,[ecx+12]
- movdqu [ecx],xmm5 ; store last iv for chaining
-
- ret
-
-
-
-
-
-align 16
-global _iDec256
-_iDec256:
- mov ecx, [esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
-
- test eax,eax
- jz end_dec256
-
- cmp eax,4
- jl lp256dec
-
- test ecx,0xf
- jz lp256dec4
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- copy_round_keys esp,ecx,13
- copy_round_keys esp,ecx,14
- mov ecx,esp
-
- align 16
-lp256dec4:
- test eax,eax
- jz end_dec256
-
- cmp eax,4
- jl lp256dec
-
- load_and_xor4 esi,[ecx+14*16]
- add esi, 4*16
- aesdec4 [ecx+13*16]
- aesdec4 [ecx+12*16]
- aesdec4 [ecx+11*16]
- aesdec4 [ecx+10*16]
- aesdec4 [ecx+9*16]
- aesdec4 [ecx+8*16]
- aesdec4 [ecx+7*16]
- aesdec4 [ecx+6*16]
- aesdec4 [ecx+5*16]
- aesdec4 [ecx+4*16]
- aesdec4 [ecx+3*16]
- aesdec4 [ecx+2*16]
- aesdec4 [ecx+1*16]
- aesdeclast4 [ecx+0*16]
-
- store4 esi+edi-16*4
- sub eax,4
- jmp lp256dec4
-
- align 16
-lp256dec:
-
- movdqu xmm0, [esi]
- movdqu xmm4,[ecx+14*16]
- add esi, 16
- pxor xmm0, xmm4 ; Round 0 (only xor)
- aesdec1_u [ecx+13*16]
- aesdec1_u [ecx+12*16]
- aesdec1_u [ecx+11*16]
- aesdec1_u [ecx+10*16]
- aesdec1_u [ecx+9*16]
- aesdec1_u [ecx+8*16]
- aesdec1_u [ecx+7*16]
- aesdec1_u [ecx+6*16]
- aesdec1_u [ecx+5*16]
- aesdec1_u [ecx+4*16]
- aesdec1_u [ecx+3*16]
- aesdec1_u [ecx+2*16]
- aesdec1_u [ecx+1*16]
- aesdeclast1_u [ecx+0*16]
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- dec eax
- jnz lp256dec
-
-end_dec256:
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- ret
-
-
-
-
-align 16
-global _iDec256_CBC
-_iDec256_CBC:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+12]
- movdqu xmm5,[eax] ;iv
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_dec256_CBC
-
- cmp eax,4
- jl lp256decsingle_CBC
-
- test ecx,0xf
- jz lp256decfour_CBC
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- copy_round_keys esp,ecx,13
- copy_round_keys esp,ecx,14
- mov ecx,esp
-
-align 16
-lp256decfour_CBC:
-
- test eax,eax
- jz end_dec256_CBC
-
- cmp eax,4
- jl lp256decsingle_CBC
-
- load_and_xor4 esi, [ecx+14*16]
- add esi,16*4
- aesdec4 [ecx+13*16]
- aesdec4 [ecx+12*16]
- aesdec4 [ecx+11*16]
- aesdec4 [ecx+10*16]
- aesdec4 [ecx+9*16]
- aesdec4 [ecx+8*16]
- aesdec4 [ecx+7*16]
- aesdec4 [ecx+6*16]
- aesdec4 [ecx+5*16]
- aesdec4 [ecx+4*16]
- aesdec4 [ecx+3*16]
- aesdec4 [ecx+2*16]
- aesdec4 [ecx+1*16]
- aesdeclast4 [ecx+0*16]
-
- pxor xmm0,xmm5
- movdqu xmm4,[esi- 16*4 + 0*16]
- pxor xmm1,xmm4
- movdqu xmm4,[esi- 16*4 + 1*16]
- pxor xmm2,xmm4
- movdqu xmm4,[esi- 16*4 + 2*16]
- pxor xmm3,xmm4
- movdqu xmm5,[esi- 16*4 + 3*16]
-
- sub eax,4
- store4 esi+edi-(16*4)
- jmp lp256decfour_CBC
-
-
- align 16
-lp256decsingle_CBC:
-
- movdqu xmm0, [esi]
- movdqa xmm1,xmm0
- movdqu xmm4, [ecx+14*16]
- pxor xmm0, xmm4
- aesdec1_u [ecx+13*16]
- aesdec1_u [ecx+12*16]
- aesdec1_u [ecx+11*16]
- aesdec1_u [ecx+10*16]
- aesdec1_u [ecx+9*16]
- aesdec1_u [ecx+8*16]
- aesdec1_u [ecx+7*16]
- aesdec1_u [ecx+6*16]
- aesdec1_u [ecx+5*16]
- aesdec1_u [ecx+4*16]
- aesdec1_u [ecx+3*16]
- aesdec1_u [ecx+2*16]
- aesdec1_u [ecx+1*16]
- aesdeclast1_u [ecx+0*16]
-
- pxor xmm0,xmm5
- movdqa xmm5,xmm1
-
- add esi, 16
- movdqu [edi+esi - 16], xmm0
- dec eax
- jnz lp256decsingle_CBC
-
-end_dec256_CBC:
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- mov ecx,[esp-4+8] ; first arg
- mov ecx,[ecx+12]
- movdqu [ecx],xmm5 ; store last iv for chaining
-
- ret
-
-
-
-
-
-
-
-
-
-align 16
-global _iEnc128
-_iEnc128:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_enc128
-
- cmp eax,4
- jl lp128encsingle
-
- test ecx,0xf
- jz lpenc128four
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- mov ecx,esp
-
-
- align 16
-
-lpenc128four:
-
- test eax,eax
- jz end_enc128
-
- cmp eax,4
- jl lp128encsingle
-
- load_and_xor4 esi,[ecx+0*16]
- add esi,4*16
- aesenc4 [ecx+1*16]
- aesenc4 [ecx+2*16]
- aesenc4 [ecx+3*16]
- aesenc4 [ecx+4*16]
- aesenc4 [ecx+5*16]
- aesenc4 [ecx+6*16]
- aesenc4 [ecx+7*16]
- aesenc4 [ecx+8*16]
- aesenc4 [ecx+9*16]
- aesenclast4 [ecx+10*16]
-
- store4 esi+edi-16*4
- sub eax,4
- jmp lpenc128four
-
- align 16
-lp128encsingle:
-
- movdqu xmm0, [esi]
- add esi, 16
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1_u [ecx+1*16]
- aesenc1_u [ecx+2*16]
- aesenc1_u [ecx+3*16]
- aesenc1_u [ecx+4*16]
- aesenc1_u [ecx+5*16]
- aesenc1_u [ecx+6*16]
- aesenc1_u [ecx+7*16]
- aesenc1_u [ecx+8*16]
- aesenc1_u [ecx+9*16]
- aesenclast1_u [ecx+10*16]
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- dec eax
- jnz lp128encsingle
-
-end_enc128:
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- ret
-
-
-align 16
-global _iEnc128_CTR
-_iEnc128_CTR:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+12]
- movdqu xmm5,[eax] ;initial counter
- movdqa xmm6, [byte_swap_16]
- pshufb xmm5, xmm6 ; byte swap counter
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_encctr128
-
- cmp eax,4
- jl lp128encctrsingle
-
- test ecx,0xf
- jz lpencctr128four
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- mov ecx,esp
-
-
- align 16
-
-lpencctr128four:
-
- test eax,eax
- jz end_encctr128
-
- cmp eax,4
- jl lp128encctrsingle
-
- load_and_inc4 [ecx+0*16]
- add esi,4*16
- aesenc4 [ecx+1*16]
- aesenc4 [ecx+2*16]
- aesenc4 [ecx+3*16]
- aesenc4 [ecx+4*16]
- aesenc4 [ecx+5*16]
- aesenc4 [ecx+6*16]
- aesenc4 [ecx+7*16]
- aesenc4 [ecx+8*16]
- aesenc4 [ecx+9*16]
- aesenclast4 [ecx+10*16]
- xor_with_input4 esi-(4*16)
-
- store4 esi+edi-16*4
- sub eax,4
- jmp lpencctr128four
-
- align 16
-lp128encctrsingle:
-
- movdqa xmm0,xmm5
- pshufb xmm0, xmm6 ; byte swap counter back
- paddd xmm5,[counter_add_one]
- add esi, 16
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1_u [ecx+1*16]
- aesenc1_u [ecx+2*16]
- aesenc1_u [ecx+3*16]
- aesenc1_u [ecx+4*16]
- aesenc1_u [ecx+5*16]
- aesenc1_u [ecx+6*16]
- aesenc1_u [ecx+7*16]
- aesenc1_u [ecx+8*16]
- aesenc1_u [ecx+9*16]
- aesenclast1_u [ecx+10*16]
- movdqu xmm4, [esi-16]
- pxor xmm0,xmm4
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- dec eax
- jnz lp128encctrsingle
-
-end_encctr128:
- pshufb xmm5, xmm6 ; byte swap counter
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- mov ecx,[esp-4+8] ; first arg
- mov ecx,[ecx+12]
- movdqu [ecx],xmm5 ; store last counter for chaining
-
- ret
-
-
-align 16
-global _iEnc192_CTR
-_iEnc192_CTR:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+12]
- movdqu xmm5,[eax] ;initial counter
- movdqa xmm6, [byte_swap_16]
- pshufb xmm5, xmm6 ; byte swap counter
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_encctr192
-
- cmp eax,4
- jl lp192encctrsingle
-
- test ecx,0xf
- jz lpencctr192four
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- mov ecx,esp
-
-
- align 16
-
-lpencctr192four:
-
- test eax,eax
- jz end_encctr192
-
- cmp eax,4
- jl lp192encctrsingle
-
- load_and_inc4 [ecx+0*16]
- add esi,4*16
- aesenc4 [ecx+1*16]
- aesenc4 [ecx+2*16]
- aesenc4 [ecx+3*16]
- aesenc4 [ecx+4*16]
- aesenc4 [ecx+5*16]
- aesenc4 [ecx+6*16]
- aesenc4 [ecx+7*16]
- aesenc4 [ecx+8*16]
- aesenc4 [ecx+9*16]
- aesenc4 [ecx+10*16]
- aesenc4 [ecx+11*16]
- aesenclast4 [ecx+12*16]
- xor_with_input4 esi-(4*16)
-
- store4 esi+edi-16*4
- sub eax,4
- jmp lpencctr192four
-
- align 16
-lp192encctrsingle:
-
- movdqa xmm0,xmm5
- pshufb xmm0, xmm6 ; byte swap counter back
- paddd xmm5,[counter_add_one]
- add esi, 16
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1_u [ecx+1*16]
- aesenc1_u [ecx+2*16]
- aesenc1_u [ecx+3*16]
- aesenc1_u [ecx+4*16]
- aesenc1_u [ecx+5*16]
- aesenc1_u [ecx+6*16]
- aesenc1_u [ecx+7*16]
- aesenc1_u [ecx+8*16]
- aesenc1_u [ecx+9*16]
- aesenc1_u [ecx+10*16]
- aesenc1_u [ecx+11*16]
- aesenclast1_u [ecx+12*16]
- movdqu xmm4, [esi-16]
- pxor xmm0,xmm4
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- dec eax
- jnz lp192encctrsingle
-
-end_encctr192:
-
- pshufb xmm5, xmm6 ; byte swap counter
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- mov ecx,[esp-4+8] ; first arg
- mov ecx,[ecx+12]
- movdqu [ecx],xmm5 ; store last counter for chaining
-
- ret
-
-
-align 16
-global _iEnc256_CTR
-_iEnc256_CTR:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+12]
- movdqu xmm5,[eax] ;initial counter
- movdqa xmm6, [byte_swap_16]
- pshufb xmm5, xmm6 ; byte swap counter
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_encctr256
-
- cmp eax,4
- jl lp256encctrsingle
-
- test ecx,0xf
- jz lpencctr256four
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- copy_round_keys esp,ecx,13
- copy_round_keys esp,ecx,14
- mov ecx,esp
-
-
- align 16
-
-lpencctr256four:
-
- test eax,eax
- jz end_encctr256
-
- cmp eax,4
- jl lp256encctrsingle
-
- load_and_inc4 [ecx+0*16]
- add esi,4*16
- aesenc4 [ecx+1*16]
- aesenc4 [ecx+2*16]
- aesenc4 [ecx+3*16]
- aesenc4 [ecx+4*16]
- aesenc4 [ecx+5*16]
- aesenc4 [ecx+6*16]
- aesenc4 [ecx+7*16]
- aesenc4 [ecx+8*16]
- aesenc4 [ecx+9*16]
- aesenc4 [ecx+10*16]
- aesenc4 [ecx+11*16]
- aesenc4 [ecx+12*16]
- aesenc4 [ecx+13*16]
- aesenclast4 [ecx+14*16]
- xor_with_input4 esi-(4*16)
-
- store4 esi+edi-16*4
- sub eax,4
- jmp lpencctr256four
-
- align 16
-
-lp256encctrsingle:
-
- movdqa xmm0,xmm5
- pshufb xmm0, xmm6 ; byte swap counter back
- paddd xmm5,[counter_add_one]
- add esi, 16
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1_u [ecx+1*16]
- aesenc1_u [ecx+2*16]
- aesenc1_u [ecx+3*16]
- aesenc1_u [ecx+4*16]
- aesenc1_u [ecx+5*16]
- aesenc1_u [ecx+6*16]
- aesenc1_u [ecx+7*16]
- aesenc1_u [ecx+8*16]
- aesenc1_u [ecx+9*16]
- aesenc1_u [ecx+10*16]
- aesenc1_u [ecx+11*16]
- aesenc1_u [ecx+12*16]
- aesenc1_u [ecx+13*16]
- aesenclast1_u [ecx+14*16]
- movdqu xmm4, [esi-16]
- pxor xmm0,xmm4
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- dec eax
- jnz lp256encctrsingle
-
-end_encctr256:
-
- pshufb xmm5, xmm6 ; byte swap counter
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- mov ecx,[esp-4+8] ; first arg
- mov ecx,[ecx+12]
- movdqu [ecx],xmm5 ; store last counter for chaining
-
- ret
-
-
-
-
-
-
-align 16
-global _iEnc128_CBC
-_iEnc128_CBC:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+12]
- movdqu xmm1,[eax] ;iv
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
- sub edi,esi
-
- test ecx,0xf
- jz lp128encsingle_CBC
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- mov ecx,esp
-
- align 16
-
-lp128encsingle_CBC:
-
- movdqu xmm0, [esi]
- add esi, 16
- pxor xmm0, xmm1
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1 [ecx+1*16]
- aesenc1 [ecx+2*16]
- aesenc1 [ecx+3*16]
- aesenc1 [ecx+4*16]
- aesenc1 [ecx+5*16]
- aesenc1 [ecx+6*16]
- aesenc1 [ecx+7*16]
- aesenc1 [ecx+8*16]
- aesenc1 [ecx+9*16]
- aesenclast1 [ecx+10*16]
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- movdqa xmm1,xmm0
- dec eax
- jnz lp128encsingle_CBC
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
- mov ecx,[esp-4+8] ; first arg
- mov ecx,[ecx+12]
- movdqu [ecx],xmm1 ; store last iv for chaining
-
- ret
-
-
-align 16
-global _iEnc192_CBC
-_iEnc192_CBC:
- mov ecx,[esp-4+8] ; first arg
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+12]
- movdqu xmm1,[eax] ;iv
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
- sub edi,esi
-
- test ecx,0xf
- jz lp192encsingle_CBC
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- mov ecx,esp
-
- align 16
-
-lp192encsingle_CBC:
-
- movdqu xmm0, [esi]
- add esi, 16
- pxor xmm0, xmm1
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1 [ecx+1*16]
- aesenc1 [ecx+2*16]
- aesenc1 [ecx+3*16]
- aesenc1 [ecx+4*16]
- aesenc1 [ecx+5*16]
- aesenc1 [ecx+6*16]
- aesenc1 [ecx+7*16]
- aesenc1 [ecx+8*16]
- aesenc1 [ecx+9*16]
- aesenc1 [ecx+10*16]
- aesenc1 [ecx+11*16]
- aesenclast1 [ecx+12*16]
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- movdqa xmm1,xmm0
- dec eax
- jnz lp192encsingle_CBC
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
- mov ecx,[esp-4+8] ; first arg
- mov ecx,[ecx+12]
- movdqu [ecx],xmm1 ; store last iv for chaining
-
- ret
-
-align 16
-global _iEnc256_CBC
-_iEnc256_CBC:
- mov ecx,[esp-4+8] ; first arg
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+12]
- movdqu xmm1,[eax] ;iv
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
- sub edi,esi
-
- test ecx,0xf
- jz lp256encsingle_CBC
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- copy_round_keys esp,ecx,13
- copy_round_keys esp,ecx,14
- mov ecx,esp
-
- align 16
-
-lp256encsingle_CBC:
-
-;abab
- movdqu xmm0, [esi]
- add esi, 16
- pxor xmm0, xmm1
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1 [ecx+1*16]
- aesenc1 [ecx+2*16]
- aesenc1 [ecx+3*16]
- aesenc1 [ecx+4*16]
- aesenc1 [ecx+5*16]
- aesenc1 [ecx+6*16]
- aesenc1 [ecx+7*16]
- aesenc1 [ecx+8*16]
- aesenc1 [ecx+9*16]
- aesenc1 [ecx+10*16]
- aesenc1 [ecx+11*16]
- aesenc1 [ecx+12*16]
- aesenc1 [ecx+13*16]
- aesenclast1 [ecx+14*16]
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- movdqa xmm1,xmm0
- dec eax
- jnz lp256encsingle_CBC
+;abab
+ movdqu xmm0, [esi]
+ add esi, 16
+ pxor xmm0, xmm1
+ movdqu xmm4,[ecx+0*16]
+ pxor xmm0, xmm4
+ aesenc1 [ecx+1*16]
+ aesenc1 [ecx+2*16]
+ aesenc1 [ecx+3*16]
+ aesenc1 [ecx+4*16]
+ aesenc1 [ecx+5*16]
+ aesenc1 [ecx+6*16]
+ aesenc1 [ecx+7*16]
+ aesenc1 [ecx+8*16]
+ aesenc1 [ecx+9*16]
+ aesenc1 [ecx+10*16]
+ aesenc1 [ecx+11*16]
+ aesenc1 [ecx+12*16]
+ aesenc1 [ecx+13*16]
+ aesenclast1 [ecx+14*16]
+ ; Store output encrypted data into CIPHERTEXT array
+ movdqu [esi+edi-16], xmm0
+ movdqa xmm1,xmm0
+ dec eax
+ jnz lp256encsingle_CBC
mov esp,ebp
@@ -1967,233 +871,3 @@ lp256encsingle_CBC:
movdqu [ecx],xmm1 ; store last iv for chaining
ret
-
-
-
-
-
-align 16
-global _iEnc192
-_iEnc192:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_enc192
-
- cmp eax,4
- jl lp192encsingle
-
- test ecx,0xf
- jz lpenc192four
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- mov ecx,esp
-
- align 16
-
-lpenc192four:
-
- test eax,eax
- jz end_enc192
-
- cmp eax,4
- jl lp192encsingle
-
- load_and_xor4 esi,[ecx+0*16]
- add esi,4*16
- aesenc4 [ecx+1*16]
- aesenc4 [ecx+2*16]
- aesenc4 [ecx+3*16]
- aesenc4 [ecx+4*16]
- aesenc4 [ecx+5*16]
- aesenc4 [ecx+6*16]
- aesenc4 [ecx+7*16]
- aesenc4 [ecx+8*16]
- aesenc4 [ecx+9*16]
- aesenc4 [ecx+10*16]
- aesenc4 [ecx+11*16]
- aesenclast4 [ecx+12*16]
-
- store4 esi+edi-16*4
- sub eax,4
- jmp lpenc192four
-
- align 16
-lp192encsingle:
-
- movdqu xmm0, [esi]
- add esi, 16
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1_u [ecx+1*16]
- aesenc1_u [ecx+2*16]
- aesenc1_u [ecx+3*16]
- aesenc1_u [ecx+4*16]
- aesenc1_u [ecx+5*16]
- aesenc1_u [ecx+6*16]
- aesenc1_u [ecx+7*16]
- aesenc1_u [ecx+8*16]
- aesenc1_u [ecx+9*16]
- aesenc1_u [ecx+10*16]
- aesenc1_u [ecx+11*16]
- aesenclast1_u [ecx+12*16]
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- dec eax
- jnz lp192encsingle
-
-end_enc192:
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- ret
-
-
-
-
-align 16
-global _iEnc256
-_iEnc256:
- mov ecx,[esp-4+8]
-
- push esi
- push edi
- push ebp
- mov ebp,esp
-
- sub esp,16*16
- and esp,0xfffffff0
-
- mov eax,[ecx+16] ; numblocks
- mov esi,[ecx]
- mov edi,[ecx+4]
- mov ecx,[ecx+8]
-
- sub edi,esi
-
- test eax,eax
- jz end_enc256
-
- cmp eax,4
- jl lp256enc
-
- test ecx,0xf
- jz lp256enc4
-
- copy_round_keys esp,ecx,0
- copy_round_keys esp,ecx,1
- copy_round_keys esp,ecx,2
- copy_round_keys esp,ecx,3
- copy_round_keys esp,ecx,4
- copy_round_keys esp,ecx,5
- copy_round_keys esp,ecx,6
- copy_round_keys esp,ecx,7
- copy_round_keys esp,ecx,8
- copy_round_keys esp,ecx,9
- copy_round_keys esp,ecx,10
- copy_round_keys esp,ecx,11
- copy_round_keys esp,ecx,12
- copy_round_keys esp,ecx,13
- copy_round_keys esp,ecx,14
- mov ecx,esp
-
-
-
- align 16
-
-lp256enc4:
- test eax,eax
- jz end_enc256
-
- cmp eax,4
- jl lp256enc
-
-
- load_and_xor4 esi,[ecx+0*16]
- add esi, 16*4
- aesenc4 [ecx+1*16]
- aesenc4 [ecx+2*16]
- aesenc4 [ecx+3*16]
- aesenc4 [ecx+4*16]
- aesenc4 [ecx+5*16]
- aesenc4 [ecx+6*16]
- aesenc4 [ecx+7*16]
- aesenc4 [ecx+8*16]
- aesenc4 [ecx+9*16]
- aesenc4 [ecx+10*16]
- aesenc4 [ecx+11*16]
- aesenc4 [ecx+12*16]
- aesenc4 [ecx+13*16]
- aesenclast4 [ecx+14*16]
-
- store4 esi+edi-16*4
- sub eax,4
- jmp lp256enc4
-
- align 16
-lp256enc:
-
- movdqu xmm0, [esi]
- add esi, 16
- movdqu xmm4,[ecx+0*16]
- pxor xmm0, xmm4
- aesenc1_u [ecx+1*16]
- aesenc1_u [ecx+2*16]
- aesenc1_u [ecx+3*16]
- aesenc1_u [ecx+4*16]
- aesenc1_u [ecx+5*16]
- aesenc1_u [ecx+6*16]
- aesenc1_u [ecx+7*16]
- aesenc1_u [ecx+8*16]
- aesenc1_u [ecx+9*16]
- aesenc1_u [ecx+10*16]
- aesenc1_u [ecx+11*16]
- aesenc1_u [ecx+12*16]
- aesenc1_u [ecx+13*16]
- aesenclast1_u [ecx+14*16]
-
- ; Store output encrypted data into CIPHERTEXT array
- movdqu [esi+edi-16], xmm0
- dec eax
- jnz lp256enc
-
-end_enc256:
-
-
- mov esp,ebp
- pop ebp
- pop edi
- pop esi
-
- ret
More information about the cvs-krb5
mailing list