summaryrefslogtreecommitdiff
path: root/src/lib/crypto/builtin/aes/aescrypt.asm
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/crypto/builtin/aes/aescrypt.asm')
-rw-r--r--src/lib/crypto/builtin/aes/aescrypt.asm402
1 files changed, 402 insertions, 0 deletions
diff --git a/src/lib/crypto/builtin/aes/aescrypt.asm b/src/lib/crypto/builtin/aes/aescrypt.asm
new file mode 100644
index 0000000000000..35a6818b6ec3d
--- /dev/null
+++ b/src/lib/crypto/builtin/aes/aescrypt.asm
@@ -0,0 +1,402 @@
+
+; -------------------------------------------------------------------------
+; Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK.
+; All rights reserved.
+;
+; LICENSE TERMS
+;
+; The free distribution and use of this software in both source and binary
+; form is allowed (with or without changes) provided that:
+;
+; 1. distributions of this source code include the above copyright
+; notice, this list of conditions and the following disclaimer;
+;
+; 2. distributions in binary form include the above copyright
+; notice, this list of conditions and the following disclaimer
+; in the documentation and/or other associated materials;
+;
+; 3. the copyright holder's name is not used to endorse products
+; built using this software without specific written permission.
+;
+; DISCLAIMER
+;
+; This software is provided 'as is' with no explcit or implied warranties
+; in respect of any properties, including, but not limited to, correctness
+; and fitness for purpose.
+; -------------------------------------------------------------------------
+; Issue Date: 15/01/2002
+
+; An AES (Rijndael) implementation for the Pentium MMX family using the NASM
+; assembler <http://www.web-sites.co.uk/nasm/>. This version only implements
+; the standard AES block length (128 bits, 16 bytes) with the same interface
+; as that used in my C/C++ implementation. This code does not preserve the
+; eax, ecx or edx registers or the artihmetic status flags. However, the ebx,
+; esi, edi, and ebp registers are preserved across calls. Only encryption
+; and decryption are implemented here, the key schedule code being that from
+; compiling aes.c with USE_ASM defined. This code uses VC++ register saving
+; conentions; if it is used with another compiler, its conventions for using
+; and saving registers will need to be checked.
+
+ section .text use32
+
+; aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1]);
+; aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1]);
+
+ global _aes_enc_blk
+ global _aes_dec_blk
+
+ extern _ft_tab
+ extern _fl_tab
+ extern _it_tab
+ extern _il_tab
+
+;%define USE_MMX ; include this to use MMX registers for temporary storage
+;%define USE_EMMS ; include this if you make use of floating point operations
+
+%ifdef USE_MMX
+%ifdef USE_EMMS
+%define EMMS_ON
+%endif
+%endif
+
+tlen: equ 1024 ; length of each of 4 'xor' arrays (256 32-bit words)
+
+; offsets to parameters with one register pushed onto stack
+
+in_blk: equ 8 ; input byte array address parameter
+out_blk:equ 12 ; output byte array address parameter
+ctx: equ 16 ; AES context structure
+
+; offsets in context structure
+
+ksch: equ 0 ; encryption key schedule base address
+nrnd: equ 256 ; number of rounds
+nblk: equ 260 ; number of rounds
+
+; register mapping for encrypt and decrypt subroutines
+
+%define r0 eax
+%define r1 ebx
+%define r2 ecx
+%define r3 edx
+%define r4 esi
+%define r5 edi
+%define r6 ebp
+
+%define eaxl al
+%define eaxh ah
+%define ebxl bl
+%define ebxh bh
+%define ecxl cl
+%define ecxh ch
+%define edxl dl
+%define edxh dh
+
+; This macro takes a 32-bit word representing a column and uses
+; each of its four bytes to index into four tables of 256 32-bit
+; words to obtain values that are then xored into the appropriate
+; output registers r0, r1, r4 or r5.
+
+; Parameters:
+; %1 out_state[0]
+; %2 out_state[1]
+; %3 out_state[2]
+; %4 out_state[3]
+; %5 table base address
+; %6 input register for the round (destroyed)
+; %7 scratch register for the round
+
+%macro do_col 7
+
+ movzx %7,%6l
+ xor %1,[4*%7+%5]
+ movzx %7,%6h
+ shr %6,16
+ xor %2,[4*%7+%5+tlen]
+ movzx %7,%6l
+ movzx %6,%6h
+ xor %3,[4*%7+%5+2*tlen]
+ xor %4,[4*%6+%5+3*tlen]
+
+%endmacro
+
+; initialise output registers from the key schedule
+
+%macro do_fcol 8
+
+ mov %1,[%8]
+ movzx %7,%6l
+ mov %2,[%8+12]
+ xor %1,[4*%7+%5]
+ mov %4,[%8+ 4]
+ movzx %7,%6h
+ shr %6,16
+ xor %2,[4*%7+%5+tlen]
+ movzx %7,%6l
+ movzx %6,%6h
+ xor %4,[4*%6+%5+3*tlen]
+ mov %6,%3
+ mov %3,[%8+ 8]
+ xor %3,[4*%7+%5+2*tlen]
+
+%endmacro
+
+; initialise output registers from the key schedule
+
+%macro do_icol 8
+
+ mov %1,[%8]
+ movzx %7,%6l
+ mov %2,[%8+ 4]
+ xor %1,[4*%7+%5]
+ mov %4,[%8+12]
+ movzx %7,%6h
+ shr %6,16
+ xor %2,[4*%7+%5+tlen]
+ movzx %7,%6l
+ movzx %6,%6h
+ xor %4,[4*%6+%5+3*tlen]
+ mov %6,%3
+ mov %3,[%8+ 8]
+ xor %3,[4*%7+%5+2*tlen]
+
+%endmacro
+
+; These macros implement either MMX or stack based local variables
+
+%ifdef USE_MMX
+
+%macro save 2
+ movd mm%1,%2
+%endmacro
+
+%macro restore 2
+ movd %1,mm%2
+%endmacro
+
+%else
+
+%macro save 2
+ mov [esp+4*%1],%2
+%endmacro
+
+%macro restore 2
+ mov %1,[esp+4*%2]
+%endmacro
+
+%endif
+
+; This macro performs a forward encryption cycle. It is entered with
+; the first previous round column values in r0, r1, r4 and r5 and
+; exits with the final values in the same registers, using the MMX
+; registers mm0-mm1 for temporary storage
+
+%macro fwd_rnd 1-2 _ft_tab
+
+; mov current column values into the MMX registers
+
+ mov r2,r0
+ save 0,r1
+ save 1,r5
+
+; compute new column values
+
+ do_fcol r0,r5,r4,r1, %2, r2,r3, %1
+ do_col r4,r1,r0,r5, %2, r2,r3
+ restore r2,0
+ do_col r1,r0,r5,r4, %2, r2,r3
+ restore r2,1
+ do_col r5,r4,r1,r0, %2, r2,r3
+
+%endmacro
+
+; This macro performs an inverse encryption cycle. It is entered with
+; the first previous round column values in r0, r1, r4 and r5 and
+; exits with the final values in the same registers, using the MMX
+; registers mm0-mm1 for temporary storage
+
+%macro inv_rnd 1-2 _it_tab
+
+; mov current column values into the MMX registers
+
+ mov r2,r0
+ save 0,r1
+ save 1,r5
+
+; compute new column values
+
+ do_icol r0,r1,r4,r5, %2, r2,r3, %1
+ do_col r4,r5,r0,r1, %2, r2,r3
+ restore r2,0
+ do_col r1,r4,r5,r0, %2, r2,r3
+ restore r2,1
+ do_col r5,r0,r1,r4, %2, r2,r3
+
+%endmacro
+
+; AES (Rijndael) Encryption Subroutine
+
+_aes_enc_blk:
+ push ebp
+ mov ebp,[esp+ctx] ; pointer to context
+ xor eax,eax
+ test [ebp+nblk],byte 1
+ je .0
+ cmp eax,[ebp+nrnd] ; encryption/decryption flags
+ jne short .1
+.0: pop ebp
+ ret
+
+; CAUTION: the order and the values used in these assigns
+; rely on the register mappings
+
+.1: push ebx
+ mov r2,[esp+in_blk+4]
+ push esi
+ mov r3,[ebp+nrnd] ; number of rounds
+ push edi
+ lea r6,[ebp+ksch] ; key pointer
+
+; input four columns and xor in first round key
+
+ mov r0,[r2]
+ mov r1,[r2+4]
+ mov r4,[r2+8]
+ mov r5,[r2+12]
+ xor r0,[r6]
+ xor r1,[r6+4]
+ xor r4,[r6+8]
+ xor r5,[r6+12]
+
+%ifndef USE_MMX
+ sub esp,8 ; space for register saves on stack
+%endif
+ add r6,16 ; increment to next round key
+ sub r3,10
+ je .4 ; 10 rounds for 128-bit key
+ add r6,32
+ sub r3,2
+ je .3 ; 12 rounds for 128-bit key
+ add r6,32
+
+.2: fwd_rnd r6-64 ; 14 rounds for 128-bit key
+ fwd_rnd r6-48
+.3: fwd_rnd r6-32 ; 12 rounds for 128-bit key
+ fwd_rnd r6-16
+.4: fwd_rnd r6 ; 10 rounds for 128-bit key
+ fwd_rnd r6+ 16
+ fwd_rnd r6+ 32
+ fwd_rnd r6+ 48
+ fwd_rnd r6+ 64
+ fwd_rnd r6+ 80
+ fwd_rnd r6+ 96
+ fwd_rnd r6+112
+ fwd_rnd r6+128
+ fwd_rnd r6+144,_fl_tab ; last round uses a different table
+
+; move final values to the output array. CAUTION: the
+; order of these assigns rely on the register mappings
+
+%ifndef USE_MMX
+ add esp,8
+%endif
+ mov r6,[esp+out_blk+12]
+ mov [r6+12],r5
+ pop edi
+ mov [r6+8],r4
+ pop esi
+ mov [r6+4],r1
+ pop ebx
+ mov [r6],r0
+ pop ebp
+ mov eax,1
+%ifdef EMMS_ON
+ emms
+%endif
+ ret
+
+; AES (Rijndael) Decryption Subroutine
+
+_aes_dec_blk:
+ push ebp
+ mov ebp,[esp+ctx] ; pointer to context
+ xor eax,eax
+ test [ebp+nblk],byte 2
+ je .0
+ cmp eax,[ebp+nrnd] ; encryption/decryption flags
+ jne short .1
+.0: pop ebp
+ ret
+
+; CAUTION: the order and the values used in these assigns
+; rely on the register mappings
+
+.1: push ebx
+ mov r2,[esp+in_blk+4]
+ push esi
+ mov r3,[ebp+nrnd] ; number of rounds
+ push edi
+ lea r6,[ebp+ksch] ; key pointer
+ mov r0,r3
+ shl r0,4
+ add r6,r0
+
+; input four columns and xor in first round key
+
+ mov r0,[r2]
+ mov r1,[r2+4]
+ mov r4,[r2+8]
+ mov r5,[r2+12]
+ xor r0,[r6]
+ xor r1,[r6+4]
+ xor r4,[r6+8]
+ xor r5,[r6+12]
+
+%ifndef USE_MMX
+ sub esp,8 ; space for register saves on stack
+%endif
+ sub r6,16 ; increment to next round key
+ sub r3,10
+ je .4 ; 10 rounds for 128-bit key
+ sub r6,32
+ sub r3,2
+ je .3 ; 12 rounds for 128-bit key
+ sub r6,32
+
+.2: inv_rnd r6+64 ; 14 rounds for 128-bit key
+ inv_rnd r6+48
+.3: inv_rnd r6+32 ; 12 rounds for 128-bit key
+ inv_rnd r6+16
+.4: inv_rnd r6 ; 10 rounds for 128-bit key
+ inv_rnd r6- 16
+ inv_rnd r6- 32
+ inv_rnd r6- 48
+ inv_rnd r6- 64
+ inv_rnd r6- 80
+ inv_rnd r6- 96
+ inv_rnd r6-112
+ inv_rnd r6-128
+ inv_rnd r6-144,_il_tab ; last round uses a different table
+
+; move final values to the output array. CAUTION: the
+; order of these assigns rely on the register mappings
+
+%ifndef USE_MMX
+ add esp,8
+%endif
+ mov r6,[esp+out_blk+12]
+ mov [r6+12],r5
+ pop edi
+ mov [r6+8],r4
+ pop esi
+ mov [r6+4],r1
+ pop ebx
+ mov [r6],r0
+ pop ebp
+ mov eax,1
+%ifdef EMMS_ON
+ emms
+%endif
+ ret
+
+ end