diff options
Diffstat (limited to 'crypto/aes/asm/aes-x86_64.pl')
-rwxr-xr-x | crypto/aes/asm/aes-x86_64.pl | 181 |
1 files changed, 135 insertions, 46 deletions
diff --git a/crypto/aes/asm/aes-x86_64.pl b/crypto/aes/asm/aes-x86_64.pl index 47f416375d1e..4d1dc9c70199 100755 --- a/crypto/aes/asm/aes-x86_64.pl +++ b/crypto/aes/asm/aes-x86_64.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== -# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -37,7 +44,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or die "can't locate x86_64-xlate.pl"; -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; $verticalspin=1; # unlike 32-bit version $verticalspin performs @@ -592,15 +599,23 @@ $code.=<<___; .hidden asm_AES_encrypt asm_AES_encrypt: AES_encrypt: +.cfi_startproc + mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 # allocate frame "above" key schedule - mov %rsp,%r10 lea -63(%rdx),%rcx # %rdx is key argument and \$-64,%rsp sub %rsp,%rcx @@ -610,7 +625,8 @@ AES_encrypt: sub \$32,%rsp mov %rsi,16(%rsp) # save out - mov %r10,24(%rsp) # save real stack pointer + mov %rax,24(%rsp) # save original stack pointer +.cfi_cfa_expression %rsp+24,deref,+8 .Lenc_prologue: mov %rdx,$key @@ -637,20 +653,29 @@ AES_encrypt: mov 16(%rsp),$out # restore out mov 24(%rsp),%rsi # restore saved stack pointer +.cfi_def_cfa %rsi,8 mov $s0,0($out) # write output vector mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 +.cfi_restore %r15 + mov -40(%rsi),%r14 +.cfi_restore %r14 + mov -32(%rsi),%r13 +.cfi_restore %r13 + mov -24(%rsi),%r12 +.cfi_restore %r12 + mov -16(%rsi),%rbp +.cfi_restore %rbp + mov -8(%rsi),%rbx +.cfi_restore %rbx + lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lenc_epilogue: ret +.cfi_endproc .size AES_encrypt,.-AES_encrypt ___ @@ -1190,15 +1215,23 @@ $code.=<<___; .hidden asm_AES_decrypt asm_AES_decrypt: AES_decrypt: +.cfi_startproc + mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 # allocate frame "above" key schedule - mov %rsp,%r10 lea -63(%rdx),%rcx # %rdx is key argument and \$-64,%rsp sub %rsp,%rcx @@ -1208,7 +1241,8 @@ AES_decrypt: sub \$32,%rsp mov %rsi,16(%rsp) # save out - mov %r10,24(%rsp) # save real stack pointer + mov %rax,24(%rsp) # save original stack pointer +.cfi_cfa_expression %rsp+24,deref,+8 .Ldec_prologue: mov %rdx,$key @@ -1237,20 +1271,29 @@ AES_decrypt: mov 16(%rsp),$out # restore out mov 24(%rsp),%rsi # restore saved stack pointer +.cfi_def_cfa %rsi,8 mov $s0,0($out) # write output vector mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) - mov (%rsi),%r15 - mov 8(%rsi),%r14 - mov 16(%rsi),%r13 - mov 24(%rsi),%r12 - mov 32(%rsi),%rbp - mov 40(%rsi),%rbx - lea 48(%rsi),%rsp + mov -48(%rsi),%r15 +.cfi_restore %r15 + mov -40(%rsi),%r14 +.cfi_restore %r14 + mov -32(%rsi),%r13 +.cfi_restore %r13 + mov -24(%rsi),%r12 +.cfi_restore %r12 + mov -16(%rsi),%rbp +.cfi_restore %rbp + mov -8(%rsi),%rbx +.cfi_restore %rbx + lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Ldec_epilogue: ret +.cfi_endproc .size AES_decrypt,.-AES_decrypt ___ #------------------------------------------------------------------# @@ -1282,30 +1325,42 @@ $code.=<<___; ___ } -# int private_AES_set_encrypt_key(const unsigned char *userKey, const int bits, +# int AES_set_encrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) $code.=<<___; -.globl private_AES_set_encrypt_key -.type private_AES_set_encrypt_key,\@function,3 +.globl AES_set_encrypt_key +.type AES_set_encrypt_key,\@function,3 .align 16 -private_AES_set_encrypt_key: +AES_set_encrypt_key: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp - push %r12 # redundant, but allows to share +.cfi_push %rbp + push %r12 # redundant, but allows to share +.cfi_push %r12 push %r13 # exception handler... +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 sub \$8,%rsp +.cfi_adjust_cfa_offset 8 .Lenc_key_prologue: call _x86_64_AES_set_encrypt_key mov 40(%rsp),%rbp +.cfi_restore %rbp mov 48(%rsp),%rbx +.cfi_restore %rbx add \$56,%rsp +.cfi_adjust_cfa_offset -56 .Lenc_key_epilogue: ret -.size private_AES_set_encrypt_key,.-private_AES_set_encrypt_key +.cfi_endproc +.size AES_set_encrypt_key,.-AES_set_encrypt_key .type _x86_64_AES_set_encrypt_key,\@abi-omnipotent .align 16 @@ -1417,7 +1472,7 @@ $code.=<<___; xor %rax,%rax jmp .Lexit -.L14rounds: +.L14rounds: mov 0(%rsi),%rax # copy first 8 dwords mov 8(%rsi),%rbx mov 16(%rsi),%rcx @@ -1548,20 +1603,28 @@ $code.=<<___; ___ } -# int private_AES_set_decrypt_key(const unsigned char *userKey, const int bits, +# int AES_set_decrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) $code.=<<___; -.globl private_AES_set_decrypt_key -.type private_AES_set_decrypt_key,\@function,3 +.globl AES_set_decrypt_key +.type AES_set_decrypt_key,\@function,3 .align 16 -private_AES_set_decrypt_key: +AES_set_decrypt_key: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 push %rdx # save key schedule +.cfi_adjust_cfa_offset 8 .Ldec_key_prologue: call _x86_64_AES_set_encrypt_key @@ -1615,15 +1678,23 @@ $code.=<<___; xor %rax,%rax .Labort: mov 8(%rsp),%r15 +.cfi_restore %r15 mov 16(%rsp),%r14 +.cfi_restore %r14 mov 24(%rsp),%r13 +.cfi_restore %r13 mov 32(%rsp),%r12 +.cfi_restore %r12 mov 40(%rsp),%rbp +.cfi_restore %rbp mov 48(%rsp),%rbx +.cfi_restore %rbx add \$56,%rsp +.cfi_adjust_cfa_offset -56 .Ldec_key_epilogue: ret -.size private_AES_set_decrypt_key,.-private_AES_set_decrypt_key +.cfi_endproc +.size AES_set_decrypt_key,.-AES_set_decrypt_key ___ # void AES_cbc_encrypt (const void char *inp, unsigned char *out, @@ -1653,25 +1724,32 @@ $code.=<<___; .hidden asm_AES_cbc_encrypt asm_AES_cbc_encrypt: AES_cbc_encrypt: +.cfi_startproc cmp \$0,%rdx # check length je .Lcbc_epilogue pushfq +.cfi_push 49 # %rflags push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lcbc_prologue: cld mov %r9d,%r9d # clear upper half of enc lea .LAES_Te(%rip),$sbox + lea .LAES_Td(%rip),%r10 cmp \$0,%r9 - jne .Lcbc_picked_te - lea .LAES_Td(%rip),$sbox -.Lcbc_picked_te: + cmoveq %r10,$sbox mov OPENSSL_ia32cap_P(%rip),%r10d cmp \$$speed_limit,%rdx @@ -1707,8 +1785,10 @@ AES_cbc_encrypt: .Lcbc_te_ok: xchg %rsp,$key +.cfi_def_cfa_register $key #add \$8,%rsp # reserve for return address! mov $key,$_rsp # save %rsp +.cfi_cfa_expression $_rsp,deref,+64 .Lcbc_fast_body: mov %rdi,$_inp # save copy of inp mov %rsi,$_out # save copy of out @@ -1938,7 +2018,7 @@ AES_cbc_encrypt: lea ($key,%rax),%rax mov %rax,$keyend - # pick Te4 copy which can't "overlap" with stack frame or key scdedule + # pick Te4 copy which can't "overlap" with stack frame or key schedule lea 2048($sbox),$sbox lea 768-8(%rsp),%rax sub $sbox,%rax @@ -2090,17 +2170,27 @@ AES_cbc_encrypt: .align 16 .Lcbc_exit: mov $_rsp,%rsi +.cfi_def_cfa %rsi,64 mov (%rsi),%r15 +.cfi_restore %r15 mov 8(%rsi),%r14 +.cfi_restore %r14 mov 16(%rsi),%r13 +.cfi_restore %r13 mov 24(%rsi),%r12 +.cfi_restore %r12 mov 32(%rsi),%rbp +.cfi_restore %rbp mov 40(%rsi),%rbx +.cfi_restore %rbx lea 48(%rsi),%rsp +.cfi_def_cfa %rsp,16 .Lcbc_popfq: popfq +.cfi_pop 49 # %rflags .Lcbc_epilogue: ret +.cfi_endproc .size AES_cbc_encrypt,.-AES_cbc_encrypt ___ } @@ -2573,7 +2663,6 @@ block_se_handler: jae .Lin_block_prologue mov 24(%rax),%rax # pull saved real stack pointer - lea 48(%rax),%rax # adjust... mov -8(%rax),%rbx mov -16(%rax),%rbp @@ -2770,13 +2859,13 @@ cbc_se_handler: .rva .LSEH_end_AES_decrypt .rva .LSEH_info_AES_decrypt - .rva .LSEH_begin_private_AES_set_encrypt_key - .rva .LSEH_end_private_AES_set_encrypt_key - .rva .LSEH_info_private_AES_set_encrypt_key + .rva .LSEH_begin_AES_set_encrypt_key + .rva .LSEH_end_AES_set_encrypt_key + .rva .LSEH_info_AES_set_encrypt_key - .rva .LSEH_begin_private_AES_set_decrypt_key - .rva .LSEH_end_private_AES_set_decrypt_key - .rva .LSEH_info_private_AES_set_decrypt_key + .rva .LSEH_begin_AES_set_decrypt_key + .rva .LSEH_end_AES_set_decrypt_key + .rva .LSEH_info_AES_set_decrypt_key .rva .LSEH_begin_AES_cbc_encrypt .rva .LSEH_end_AES_cbc_encrypt @@ -2792,11 +2881,11 @@ cbc_se_handler: .byte 9,0,0,0 .rva block_se_handler .rva .Ldec_prologue,.Ldec_epilogue # HandlerData[] -.LSEH_info_private_AES_set_encrypt_key: +.LSEH_info_AES_set_encrypt_key: .byte 9,0,0,0 .rva key_se_handler .rva .Lenc_key_prologue,.Lenc_key_epilogue # HandlerData[] -.LSEH_info_private_AES_set_decrypt_key: +.LSEH_info_AES_set_decrypt_key: .byte 9,0,0,0 .rva key_se_handler .rva .Ldec_key_prologue,.Ldec_key_epilogue # HandlerData[] |