diff options
Diffstat (limited to 'src/lib/crypto/builtin/aes/aescrypt.asm')
| -rw-r--r-- | src/lib/crypto/builtin/aes/aescrypt.asm | 402 |
1 files changed, 402 insertions, 0 deletions
diff --git a/src/lib/crypto/builtin/aes/aescrypt.asm b/src/lib/crypto/builtin/aes/aescrypt.asm new file mode 100644 index 0000000000000..35a6818b6ec3d --- /dev/null +++ b/src/lib/crypto/builtin/aes/aescrypt.asm @@ -0,0 +1,402 @@ + +; ------------------------------------------------------------------------- +; Copyright (c) 2001, Dr Brian Gladman <brg@gladman.uk.net>, Worcester, UK. +; All rights reserved. +; +; LICENSE TERMS +; +; The free distribution and use of this software in both source and binary +; form is allowed (with or without changes) provided that: +; +; 1. distributions of this source code include the above copyright +; notice, this list of conditions and the following disclaimer; +; +; 2. distributions in binary form include the above copyright +; notice, this list of conditions and the following disclaimer +; in the documentation and/or other associated materials; +; +; 3. the copyright holder's name is not used to endorse products +; built using this software without specific written permission. +; +; DISCLAIMER +; +; This software is provided 'as is' with no explcit or implied warranties +; in respect of any properties, including, but not limited to, correctness +; and fitness for purpose. +; ------------------------------------------------------------------------- +; Issue Date: 15/01/2002 + +; An AES (Rijndael) implementation for the Pentium MMX family using the NASM +; assembler <http://www.web-sites.co.uk/nasm/>. This version only implements +; the standard AES block length (128 bits, 16 bytes) with the same interface +; as that used in my C/C++ implementation. This code does not preserve the +; eax, ecx or edx registers or the artihmetic status flags. However, the ebx, +; esi, edi, and ebp registers are preserved across calls. Only encryption +; and decryption are implemented here, the key schedule code being that from +; compiling aes.c with USE_ASM defined. This code uses VC++ register saving +; conentions; if it is used with another compiler, its conventions for using +; and saving registers will need to be checked. + + section .text use32 + +; aes_rval aes_enc_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1]); +; aes_rval aes_dec_blk(const unsigned char in_blk[], unsigned char out_blk[], const aes_ctx cx[1]); + + global _aes_enc_blk + global _aes_dec_blk + + extern _ft_tab + extern _fl_tab + extern _it_tab + extern _il_tab + +;%define USE_MMX ; include this to use MMX registers for temporary storage +;%define USE_EMMS ; include this if you make use of floating point operations + +%ifdef USE_MMX +%ifdef USE_EMMS +%define EMMS_ON +%endif +%endif + +tlen: equ 1024 ; length of each of 4 'xor' arrays (256 32-bit words) + +; offsets to parameters with one register pushed onto stack + +in_blk: equ 8 ; input byte array address parameter +out_blk:equ 12 ; output byte array address parameter +ctx: equ 16 ; AES context structure + +; offsets in context structure + +ksch: equ 0 ; encryption key schedule base address +nrnd: equ 256 ; number of rounds +nblk: equ 260 ; number of rounds + +; register mapping for encrypt and decrypt subroutines + +%define r0 eax +%define r1 ebx +%define r2 ecx +%define r3 edx +%define r4 esi +%define r5 edi +%define r6 ebp + +%define eaxl al +%define eaxh ah +%define ebxl bl +%define ebxh bh +%define ecxl cl +%define ecxh ch +%define edxl dl +%define edxh dh + +; This macro takes a 32-bit word representing a column and uses +; each of its four bytes to index into four tables of 256 32-bit +; words to obtain values that are then xored into the appropriate +; output registers r0, r1, r4 or r5. + +; Parameters: +; %1 out_state[0] +; %2 out_state[1] +; %3 out_state[2] +; %4 out_state[3] +; %5 table base address +; %6 input register for the round (destroyed) +; %7 scratch register for the round + +%macro do_col 7 + + movzx %7,%6l + xor %1,[4*%7+%5] + movzx %7,%6h + shr %6,16 + xor %2,[4*%7+%5+tlen] + movzx %7,%6l + movzx %6,%6h + xor %3,[4*%7+%5+2*tlen] + xor %4,[4*%6+%5+3*tlen] + +%endmacro + +; initialise output registers from the key schedule + +%macro do_fcol 8 + + mov %1,[%8] + movzx %7,%6l + mov %2,[%8+12] + xor %1,[4*%7+%5] + mov %4,[%8+ 4] + movzx %7,%6h + shr %6,16 + xor %2,[4*%7+%5+tlen] + movzx %7,%6l + movzx %6,%6h + xor %4,[4*%6+%5+3*tlen] + mov %6,%3 + mov %3,[%8+ 8] + xor %3,[4*%7+%5+2*tlen] + +%endmacro + +; initialise output registers from the key schedule + +%macro do_icol 8 + + mov %1,[%8] + movzx %7,%6l + mov %2,[%8+ 4] + xor %1,[4*%7+%5] + mov %4,[%8+12] + movzx %7,%6h + shr %6,16 + xor %2,[4*%7+%5+tlen] + movzx %7,%6l + movzx %6,%6h + xor %4,[4*%6+%5+3*tlen] + mov %6,%3 + mov %3,[%8+ 8] + xor %3,[4*%7+%5+2*tlen] + +%endmacro + +; These macros implement either MMX or stack based local variables + +%ifdef USE_MMX + +%macro save 2 + movd mm%1,%2 +%endmacro + +%macro restore 2 + movd %1,mm%2 +%endmacro + +%else + +%macro save 2 + mov [esp+4*%1],%2 +%endmacro + +%macro restore 2 + mov %1,[esp+4*%2] +%endmacro + +%endif + +; This macro performs a forward encryption cycle. It is entered with +; the first previous round column values in r0, r1, r4 and r5 and +; exits with the final values in the same registers, using the MMX +; registers mm0-mm1 for temporary storage + +%macro fwd_rnd 1-2 _ft_tab + +; mov current column values into the MMX registers + + mov r2,r0 + save 0,r1 + save 1,r5 + +; compute new column values + + do_fcol r0,r5,r4,r1, %2, r2,r3, %1 + do_col r4,r1,r0,r5, %2, r2,r3 + restore r2,0 + do_col r1,r0,r5,r4, %2, r2,r3 + restore r2,1 + do_col r5,r4,r1,r0, %2, r2,r3 + +%endmacro + +; This macro performs an inverse encryption cycle. It is entered with +; the first previous round column values in r0, r1, r4 and r5 and +; exits with the final values in the same registers, using the MMX +; registers mm0-mm1 for temporary storage + +%macro inv_rnd 1-2 _it_tab + +; mov current column values into the MMX registers + + mov r2,r0 + save 0,r1 + save 1,r5 + +; compute new column values + + do_icol r0,r1,r4,r5, %2, r2,r3, %1 + do_col r4,r5,r0,r1, %2, r2,r3 + restore r2,0 + do_col r1,r4,r5,r0, %2, r2,r3 + restore r2,1 + do_col r5,r0,r1,r4, %2, r2,r3 + +%endmacro + +; AES (Rijndael) Encryption Subroutine + +_aes_enc_blk: + push ebp + mov ebp,[esp+ctx] ; pointer to context + xor eax,eax + test [ebp+nblk],byte 1 + je .0 + cmp eax,[ebp+nrnd] ; encryption/decryption flags + jne short .1 +.0: pop ebp + ret + +; CAUTION: the order and the values used in these assigns +; rely on the register mappings + +.1: push ebx + mov r2,[esp+in_blk+4] + push esi + mov r3,[ebp+nrnd] ; number of rounds + push edi + lea r6,[ebp+ksch] ; key pointer + +; input four columns and xor in first round key + + mov r0,[r2] + mov r1,[r2+4] + mov r4,[r2+8] + mov r5,[r2+12] + xor r0,[r6] + xor r1,[r6+4] + xor r4,[r6+8] + xor r5,[r6+12] + +%ifndef USE_MMX + sub esp,8 ; space for register saves on stack +%endif + add r6,16 ; increment to next round key + sub r3,10 + je .4 ; 10 rounds for 128-bit key + add r6,32 + sub r3,2 + je .3 ; 12 rounds for 128-bit key + add r6,32 + +.2: fwd_rnd r6-64 ; 14 rounds for 128-bit key + fwd_rnd r6-48 +.3: fwd_rnd r6-32 ; 12 rounds for 128-bit key + fwd_rnd r6-16 +.4: fwd_rnd r6 ; 10 rounds for 128-bit key + fwd_rnd r6+ 16 + fwd_rnd r6+ 32 + fwd_rnd r6+ 48 + fwd_rnd r6+ 64 + fwd_rnd r6+ 80 + fwd_rnd r6+ 96 + fwd_rnd r6+112 + fwd_rnd r6+128 + fwd_rnd r6+144,_fl_tab ; last round uses a different table + +; move final values to the output array. CAUTION: the +; order of these assigns rely on the register mappings + +%ifndef USE_MMX + add esp,8 +%endif + mov r6,[esp+out_blk+12] + mov [r6+12],r5 + pop edi + mov [r6+8],r4 + pop esi + mov [r6+4],r1 + pop ebx + mov [r6],r0 + pop ebp + mov eax,1 +%ifdef EMMS_ON + emms +%endif + ret + +; AES (Rijndael) Decryption Subroutine + +_aes_dec_blk: + push ebp + mov ebp,[esp+ctx] ; pointer to context + xor eax,eax + test [ebp+nblk],byte 2 + je .0 + cmp eax,[ebp+nrnd] ; encryption/decryption flags + jne short .1 +.0: pop ebp + ret + +; CAUTION: the order and the values used in these assigns +; rely on the register mappings + +.1: push ebx + mov r2,[esp+in_blk+4] + push esi + mov r3,[ebp+nrnd] ; number of rounds + push edi + lea r6,[ebp+ksch] ; key pointer + mov r0,r3 + shl r0,4 + add r6,r0 + +; input four columns and xor in first round key + + mov r0,[r2] + mov r1,[r2+4] + mov r4,[r2+8] + mov r5,[r2+12] + xor r0,[r6] + xor r1,[r6+4] + xor r4,[r6+8] + xor r5,[r6+12] + +%ifndef USE_MMX + sub esp,8 ; space for register saves on stack +%endif + sub r6,16 ; increment to next round key + sub r3,10 + je .4 ; 10 rounds for 128-bit key + sub r6,32 + sub r3,2 + je .3 ; 12 rounds for 128-bit key + sub r6,32 + +.2: inv_rnd r6+64 ; 14 rounds for 128-bit key + inv_rnd r6+48 +.3: inv_rnd r6+32 ; 12 rounds for 128-bit key + inv_rnd r6+16 +.4: inv_rnd r6 ; 10 rounds for 128-bit key + inv_rnd r6- 16 + inv_rnd r6- 32 + inv_rnd r6- 48 + inv_rnd r6- 64 + inv_rnd r6- 80 + inv_rnd r6- 96 + inv_rnd r6-112 + inv_rnd r6-128 + inv_rnd r6-144,_il_tab ; last round uses a different table + +; move final values to the output array. CAUTION: the +; order of these assigns rely on the register mappings + +%ifndef USE_MMX + add esp,8 +%endif + mov r6,[esp+out_blk+12] + mov [r6+12],r5 + pop edi + mov [r6+8],r4 + pop esi + mov [r6+4],r1 + pop ebx + mov [r6],r0 + pop ebp + mov eax,1 +%ifdef EMMS_ON + emms +%endif + ret + + end |
