diff options
Diffstat (limited to 'sys/crypto/openssl/powerpc64')
-rw-r--r-- | sys/crypto/openssl/powerpc64/aes-gcm-ppc.S | 1340 | ||||
-rw-r--r-- | sys/crypto/openssl/powerpc64/chachap10-ppc.S | 1228 | ||||
-rw-r--r-- | sys/crypto/openssl/powerpc64/ecp_nistp384-ppc64.S | 1526 | ||||
-rw-r--r-- | sys/crypto/openssl/powerpc64/keccak1600-ppc64.S | 5 | ||||
-rw-r--r-- | sys/crypto/openssl/powerpc64/ppccpuid.S | 14 |
5 files changed, 4111 insertions, 2 deletions
diff --git a/sys/crypto/openssl/powerpc64/aes-gcm-ppc.S b/sys/crypto/openssl/powerpc64/aes-gcm-ppc.S new file mode 100644 index 000000000000..2ff143c42ab7 --- /dev/null +++ b/sys/crypto/openssl/powerpc64/aes-gcm-ppc.S @@ -0,0 +1,1340 @@ +/* Do not modify. This file is auto-generated from aes-gcm-ppc.pl. */ +.machine "any" +.abiversion 2 +.text + + + + + +.macro .Loop_aes_middle4x + xxlor 19+32, 1, 1 + xxlor 20+32, 2, 2 + xxlor 21+32, 3, 3 + xxlor 22+32, 4, 4 + + .long 0x11EF9D08 + .long 0x12109D08 + .long 0x12319D08 + .long 0x12529D08 + + .long 0x11EFA508 + .long 0x1210A508 + .long 0x1231A508 + .long 0x1252A508 + + .long 0x11EFAD08 + .long 0x1210AD08 + .long 0x1231AD08 + .long 0x1252AD08 + + .long 0x11EFB508 + .long 0x1210B508 + .long 0x1231B508 + .long 0x1252B508 + + xxlor 19+32, 5, 5 + xxlor 20+32, 6, 6 + xxlor 21+32, 7, 7 + xxlor 22+32, 8, 8 + + .long 0x11EF9D08 + .long 0x12109D08 + .long 0x12319D08 + .long 0x12529D08 + + .long 0x11EFA508 + .long 0x1210A508 + .long 0x1231A508 + .long 0x1252A508 + + .long 0x11EFAD08 + .long 0x1210AD08 + .long 0x1231AD08 + .long 0x1252AD08 + + .long 0x11EFB508 + .long 0x1210B508 + .long 0x1231B508 + .long 0x1252B508 + + xxlor 23+32, 9, 9 + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 +.endm + + + + + +.macro .Loop_aes_middle8x + xxlor 23+32, 1, 1 + xxlor 24+32, 2, 2 + xxlor 25+32, 3, 3 + xxlor 26+32, 4, 4 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + + .long 0x11EFD508 + .long 0x1210D508 + .long 0x1231D508 + .long 0x1252D508 + .long 0x1273D508 + .long 0x1294D508 + .long 0x12B5D508 + .long 0x12D6D508 + + xxlor 23+32, 5, 5 + xxlor 24+32, 6, 6 + xxlor 25+32, 7, 7 + xxlor 26+32, 8, 8 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + .long 0x11EFCD08 + .long 0x1210CD08 + .long 0x1231CD08 + .long 0x1252CD08 + .long 0x1273CD08 + .long 0x1294CD08 + .long 0x12B5CD08 + .long 0x12D6CD08 + + .long 0x11EFD508 + .long 0x1210D508 + .long 0x1231D508 + .long 0x1252D508 + .long 0x1273D508 + .long 0x1294D508 + .long 0x12B5D508 + .long 0x12D6D508 + + xxlor 23+32, 9, 9 + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 +.endm + + + + +ppc_aes_gcm_ghash: + vxor 15, 15, 0 + + xxlxor 29, 29, 29 + + .long 0x12EC7CC8 + .long 0x130984C8 + .long 0x13268CC8 + .long 0x134394C8 + + vxor 23, 23, 24 + vxor 23, 23, 25 + vxor 23, 23, 26 + + .long 0x130D7CC8 + .long 0x132A84C8 + .long 0x13478CC8 + .long 0x136494C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + + .long 0x139714C8 + + xxlor 29+32, 29, 29 + vsldoi 26, 24, 29, 8 + vsldoi 29, 29, 24, 8 + vxor 23, 23, 26 + + vsldoi 23, 23, 23, 8 + vxor 23, 23, 28 + + .long 0x130E7CC8 + .long 0x132B84C8 + .long 0x13488CC8 + .long 0x136594C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + vxor 24, 24, 29 + + + vsldoi 27, 23, 23, 8 + .long 0x12F714C8 + vxor 27, 27, 24 + vxor 23, 23, 27 + + xxlor 32, 23+32, 23+32 + + blr + + + + + +.macro ppc_aes_gcm_ghash2_4x + + vxor 15, 15, 0 + + xxlxor 29, 29, 29 + + .long 0x12EC7CC8 + .long 0x130984C8 + .long 0x13268CC8 + .long 0x134394C8 + + vxor 23, 23, 24 + vxor 23, 23, 25 + vxor 23, 23, 26 + + .long 0x130D7CC8 + .long 0x132A84C8 + .long 0x13478CC8 + .long 0x136494C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + + + .long 0x139714C8 + + xxlor 29+32, 29, 29 + + vxor 24, 24, 27 + vsldoi 26, 24, 29, 8 + vsldoi 29, 29, 24, 8 + vxor 23, 23, 26 + + vsldoi 23, 23, 23, 8 + vxor 23, 23, 28 + + .long 0x130E7CC8 + .long 0x132B84C8 + .long 0x13488CC8 + .long 0x136594C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + vxor 24, 24, 29 + + + vsldoi 27, 23, 23, 8 + .long 0x12F714C8 + vxor 27, 27, 24 + vxor 27, 23, 27 + + + .long 0x1309A4C8 + .long 0x1326ACC8 + .long 0x1343B4C8 + vxor 19, 19, 27 + .long 0x12EC9CC8 + + vxor 23, 23, 24 + vxor 23, 23, 25 + vxor 23, 23, 26 + + .long 0x130D9CC8 + .long 0x132AA4C8 + .long 0x1347ACC8 + .long 0x1364B4C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + + + .long 0x139714C8 + + xxlor 29+32, 29, 29 + + vxor 24, 24, 27 + vsldoi 26, 24, 29, 8 + vsldoi 29, 29, 24, 8 + vxor 23, 23, 26 + + vsldoi 23, 23, 23, 8 + vxor 23, 23, 28 + + .long 0x130E9CC8 + .long 0x132BA4C8 + .long 0x1348ACC8 + .long 0x1365B4C8 + + vxor 24, 24, 25 + vxor 24, 24, 26 + vxor 24, 24, 27 + + vxor 24, 24, 29 + + + vsldoi 27, 23, 23, 8 + .long 0x12F714C8 + vxor 27, 27, 24 + vxor 23, 23, 27 + + xxlor 32, 23+32, 23+32 + +.endm + + + + +.macro ppc_update_hash_1x + vxor 28, 28, 0 + + vxor 19, 19, 19 + + .long 0x12C3E4C8 + .long 0x12E4E4C8 + .long 0x1305E4C8 + + .long 0x137614C8 + + vsldoi 25, 23, 19, 8 + vsldoi 26, 19, 23, 8 + vxor 22, 22, 25 + vxor 24, 24, 26 + + vsldoi 22, 22, 22, 8 + vxor 22, 22, 27 + + vsldoi 20, 22, 22, 8 + .long 0x12D614C8 + vxor 20, 20, 24 + vxor 22, 22, 20 + + vor 0,22,22 + +.endm + + + + + + + + + + + + + +.global ppc_aes_gcm_encrypt +.align 5 +ppc_aes_gcm_encrypt: +_ppc_aes_gcm_encrypt: + + stdu 1,-512(1) + mflr 0 + + std 14,112(1) + std 15,120(1) + std 16,128(1) + std 17,136(1) + std 18,144(1) + std 19,152(1) + std 20,160(1) + std 21,168(1) + li 9, 256 + stvx 20, 9, 1 + addi 9, 9, 16 + stvx 21, 9, 1 + addi 9, 9, 16 + stvx 22, 9, 1 + addi 9, 9, 16 + stvx 23, 9, 1 + addi 9, 9, 16 + stvx 24, 9, 1 + addi 9, 9, 16 + stvx 25, 9, 1 + addi 9, 9, 16 + stvx 26, 9, 1 + addi 9, 9, 16 + stvx 27, 9, 1 + addi 9, 9, 16 + stvx 28, 9, 1 + addi 9, 9, 16 + stvx 29, 9, 1 + addi 9, 9, 16 + stvx 30, 9, 1 + addi 9, 9, 16 + stvx 31, 9, 1 + std 0, 528(1) + + + lxvb16x 32, 0, 8 + + + li 10, 32 + lxvd2x 2+32, 10, 8 + li 10, 48 + lxvd2x 3+32, 10, 8 + li 10, 64 + lxvd2x 4+32, 10, 8 + li 10, 80 + lxvd2x 5+32, 10, 8 + + li 10, 96 + lxvd2x 6+32, 10, 8 + li 10, 112 + lxvd2x 7+32, 10, 8 + li 10, 128 + lxvd2x 8+32, 10, 8 + + li 10, 144 + lxvd2x 9+32, 10, 8 + li 10, 160 + lxvd2x 10+32, 10, 8 + li 10, 176 + lxvd2x 11+32, 10, 8 + + li 10, 192 + lxvd2x 12+32, 10, 8 + li 10, 208 + lxvd2x 13+32, 10, 8 + li 10, 224 + lxvd2x 14+32, 10, 8 + + + lxvb16x 30+32, 0, 7 + + mr 12, 5 + li 11, 0 + + + vxor 31, 31, 31 + vspltisb 22,1 + vsldoi 31, 31, 22,1 + + + lxv 0, 0(6) + lxv 1, 0x10(6) + lxv 2, 0x20(6) + lxv 3, 0x30(6) + lxv 4, 0x40(6) + lxv 5, 0x50(6) + lxv 6, 0x60(6) + lxv 7, 0x70(6) + lxv 8, 0x80(6) + lxv 9, 0x90(6) + lxv 10, 0xa0(6) + + + lwz 9,240(6) + + + + xxlor 32+29, 0, 0 + vxor 15, 30, 29 + + cmpdi 9, 10 + beq .Loop_aes_gcm_8x + + + lxv 11, 0xb0(6) + lxv 12, 0xc0(6) + + cmpdi 9, 12 + beq .Loop_aes_gcm_8x + + + lxv 13, 0xd0(6) + lxv 14, 0xe0(6) + cmpdi 9, 14 + beq .Loop_aes_gcm_8x + + b aes_gcm_out + +.align 5 +.Loop_aes_gcm_8x: + mr 14, 3 + mr 9, 4 + + + li 10, 128 + divdu 10, 5, 10 + cmpdi 10, 0 + beq .Loop_last_block + + .long 0x13DEF8C0 + vxor 16, 30, 29 + .long 0x13DEF8C0 + vxor 17, 30, 29 + .long 0x13DEF8C0 + vxor 18, 30, 29 + .long 0x13DEF8C0 + vxor 19, 30, 29 + .long 0x13DEF8C0 + vxor 20, 30, 29 + .long 0x13DEF8C0 + vxor 21, 30, 29 + .long 0x13DEF8C0 + vxor 22, 30, 29 + + mtctr 10 + + li 15, 16 + li 16, 32 + li 17, 48 + li 18, 64 + li 19, 80 + li 20, 96 + li 21, 112 + + lwz 10, 240(6) + +.Loop_8x_block: + + lxvb16x 15, 0, 14 + lxvb16x 16, 15, 14 + lxvb16x 17, 16, 14 + lxvb16x 18, 17, 14 + lxvb16x 19, 18, 14 + lxvb16x 20, 19, 14 + lxvb16x 21, 20, 14 + lxvb16x 22, 21, 14 + addi 14, 14, 128 + +.Loop_aes_middle8x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_next_ghash + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_next_ghash + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_next_ghash + b aes_gcm_out + +Do_next_ghash: + + + + .long 0x11EFBD09 + .long 0x1210BD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + xxlxor 48, 48, 16 + stxvb16x 48, 15, 9 + + .long 0x1231BD09 + .long 0x1252BD09 + + xxlxor 49, 49, 17 + stxvb16x 49, 16, 9 + xxlxor 50, 50, 18 + stxvb16x 50, 17, 9 + + .long 0x1273BD09 + .long 0x1294BD09 + + xxlxor 51, 51, 19 + stxvb16x 51, 18, 9 + xxlxor 52, 52, 20 + stxvb16x 52, 19, 9 + + .long 0x12B5BD09 + .long 0x12D6BD09 + + xxlxor 53, 53, 21 + stxvb16x 53, 20, 9 + xxlxor 54, 54, 22 + stxvb16x 54, 21, 9 + + addi 9, 9, 128 + + + ppc_aes_gcm_ghash2_4x + + xxlor 27+32, 0, 0 + .long 0x13DEF8C0 + vor 29,30,30 + vxor 15, 30, 27 + .long 0x13DEF8C0 + vxor 16, 30, 27 + .long 0x13DEF8C0 + vxor 17, 30, 27 + .long 0x13DEF8C0 + vxor 18, 30, 27 + .long 0x13DEF8C0 + vxor 19, 30, 27 + .long 0x13DEF8C0 + vxor 20, 30, 27 + .long 0x13DEF8C0 + vxor 21, 30, 27 + .long 0x13DEF8C0 + vxor 22, 30, 27 + + addi 12, 12, -128 + addi 11, 11, 128 + + bdnz .Loop_8x_block + + vor 30,29,29 + +.Loop_last_block: + cmpdi 12, 0 + beq aes_gcm_out + + + li 10, 16 + divdu 10, 12, 10 + + mtctr 10 + + lwz 10, 240(6) + + cmpdi 12, 16 + blt Final_block + +.macro .Loop_aes_middle_1x + xxlor 19+32, 1, 1 + xxlor 20+32, 2, 2 + xxlor 21+32, 3, 3 + xxlor 22+32, 4, 4 + + .long 0x11EF9D08 + .long 0x11EFA508 + .long 0x11EFAD08 + .long 0x11EFB508 + + xxlor 19+32, 5, 5 + xxlor 20+32, 6, 6 + xxlor 21+32, 7, 7 + xxlor 22+32, 8, 8 + + .long 0x11EF9D08 + .long 0x11EFA508 + .long 0x11EFAD08 + .long 0x11EFB508 + + xxlor 19+32, 9, 9 + .long 0x11EF9D08 +.endm + +Next_rem_block: + lxvb16x 15, 0, 14 + +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_next_1x + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_next_1x + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_next_1x + +Do_next_1x: + .long 0x11EFBD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + addi 14, 14, 16 + addi 9, 9, 16 + + vor 28,15,15 + ppc_update_hash_1x + + addi 12, 12, -16 + addi 11, 11, 16 + xxlor 19+32, 0, 0 + .long 0x13DEF8C0 + vxor 15, 30, 19 + + bdnz Next_rem_block + + cmpdi 12, 0 + beq aes_gcm_out + +Final_block: +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_final_1x + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_final_1x + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_final_1x + +Do_final_1x: + .long 0x11EFBD09 + + lxvb16x 15, 0, 14 + xxlxor 47, 47, 15 + + + li 15, 16 + sub 15, 15, 12 + + vspltisb 16,-1 + vspltisb 17,0 + li 10, 192 + stvx 16, 10, 1 + addi 10, 10, 16 + stvx 17, 10, 1 + + addi 10, 1, 192 + lxvb16x 16, 15, 10 + xxland 47, 47, 16 + + vor 28,15,15 + ppc_update_hash_1x + + + bl Write_partial_block + + b aes_gcm_out + + + + + + + +Write_partial_block: + li 10, 192 + stxvb16x 15+32, 10, 1 + + + addi 10, 9, -1 + addi 16, 1, 191 + + mtctr 12 + li 15, 0 + +Write_last_byte: + lbzu 14, 1(16) + stbu 14, 1(10) + bdnz Write_last_byte + blr + +aes_gcm_out: + + stxvb16x 32, 0, 8 + add 3, 11, 12 + + li 9, 256 + lvx 20, 9, 1 + addi 9, 9, 16 + lvx 21, 9, 1 + addi 9, 9, 16 + lvx 22, 9, 1 + addi 9, 9, 16 + lvx 23, 9, 1 + addi 9, 9, 16 + lvx 24, 9, 1 + addi 9, 9, 16 + lvx 25, 9, 1 + addi 9, 9, 16 + lvx 26, 9, 1 + addi 9, 9, 16 + lvx 27, 9, 1 + addi 9, 9, 16 + lvx 28, 9, 1 + addi 9, 9, 16 + lvx 29, 9, 1 + addi 9, 9, 16 + lvx 30, 9, 1 + addi 9, 9, 16 + lvx 31, 9, 1 + + ld 0, 528(1) + ld 14,112(1) + ld 15,120(1) + ld 16,128(1) + ld 17,136(1) + ld 18,144(1) + ld 19,152(1) + ld 20,160(1) + ld 21,168(1) + + mtlr 0 + addi 1, 1, 512 + blr + + + + +.global ppc_aes_gcm_decrypt +.align 5 +ppc_aes_gcm_decrypt: +_ppc_aes_gcm_decrypt: + + stdu 1,-512(1) + mflr 0 + + std 14,112(1) + std 15,120(1) + std 16,128(1) + std 17,136(1) + std 18,144(1) + std 19,152(1) + std 20,160(1) + std 21,168(1) + li 9, 256 + stvx 20, 9, 1 + addi 9, 9, 16 + stvx 21, 9, 1 + addi 9, 9, 16 + stvx 22, 9, 1 + addi 9, 9, 16 + stvx 23, 9, 1 + addi 9, 9, 16 + stvx 24, 9, 1 + addi 9, 9, 16 + stvx 25, 9, 1 + addi 9, 9, 16 + stvx 26, 9, 1 + addi 9, 9, 16 + stvx 27, 9, 1 + addi 9, 9, 16 + stvx 28, 9, 1 + addi 9, 9, 16 + stvx 29, 9, 1 + addi 9, 9, 16 + stvx 30, 9, 1 + addi 9, 9, 16 + stvx 31, 9, 1 + std 0, 528(1) + + + lxvb16x 32, 0, 8 + + + li 10, 32 + lxvd2x 2+32, 10, 8 + li 10, 48 + lxvd2x 3+32, 10, 8 + li 10, 64 + lxvd2x 4+32, 10, 8 + li 10, 80 + lxvd2x 5+32, 10, 8 + + li 10, 96 + lxvd2x 6+32, 10, 8 + li 10, 112 + lxvd2x 7+32, 10, 8 + li 10, 128 + lxvd2x 8+32, 10, 8 + + li 10, 144 + lxvd2x 9+32, 10, 8 + li 10, 160 + lxvd2x 10+32, 10, 8 + li 10, 176 + lxvd2x 11+32, 10, 8 + + li 10, 192 + lxvd2x 12+32, 10, 8 + li 10, 208 + lxvd2x 13+32, 10, 8 + li 10, 224 + lxvd2x 14+32, 10, 8 + + + lxvb16x 30+32, 0, 7 + + mr 12, 5 + li 11, 0 + + + vxor 31, 31, 31 + vspltisb 22,1 + vsldoi 31, 31, 22,1 + + + lxv 0, 0(6) + lxv 1, 0x10(6) + lxv 2, 0x20(6) + lxv 3, 0x30(6) + lxv 4, 0x40(6) + lxv 5, 0x50(6) + lxv 6, 0x60(6) + lxv 7, 0x70(6) + lxv 8, 0x80(6) + lxv 9, 0x90(6) + lxv 10, 0xa0(6) + + + lwz 9,240(6) + + + + xxlor 32+29, 0, 0 + vxor 15, 30, 29 + + cmpdi 9, 10 + beq .Loop_aes_gcm_8x_dec + + + lxv 11, 0xb0(6) + lxv 12, 0xc0(6) + + cmpdi 9, 12 + beq .Loop_aes_gcm_8x_dec + + + lxv 13, 0xd0(6) + lxv 14, 0xe0(6) + cmpdi 9, 14 + beq .Loop_aes_gcm_8x_dec + + b aes_gcm_out + +.align 5 +.Loop_aes_gcm_8x_dec: + mr 14, 3 + mr 9, 4 + + + li 10, 128 + divdu 10, 5, 10 + cmpdi 10, 0 + beq .Loop_last_block_dec + + .long 0x13DEF8C0 + vxor 16, 30, 29 + .long 0x13DEF8C0 + vxor 17, 30, 29 + .long 0x13DEF8C0 + vxor 18, 30, 29 + .long 0x13DEF8C0 + vxor 19, 30, 29 + .long 0x13DEF8C0 + vxor 20, 30, 29 + .long 0x13DEF8C0 + vxor 21, 30, 29 + .long 0x13DEF8C0 + vxor 22, 30, 29 + + mtctr 10 + + li 15, 16 + li 16, 32 + li 17, 48 + li 18, 64 + li 19, 80 + li 20, 96 + li 21, 112 + + lwz 10, 240(6) + +.Loop_8x_block_dec: + + lxvb16x 15, 0, 14 + lxvb16x 16, 15, 14 + lxvb16x 17, 16, 14 + lxvb16x 18, 17, 14 + lxvb16x 19, 18, 14 + lxvb16x 20, 19, 14 + lxvb16x 21, 20, 14 + lxvb16x 22, 21, 14 + addi 14, 14, 128 + +.Loop_aes_middle8x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_last_aes_dec + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_last_aes_dec + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x1210BD08 + .long 0x1231BD08 + .long 0x1252BD08 + .long 0x1273BD08 + .long 0x1294BD08 + .long 0x12B5BD08 + .long 0x12D6BD08 + + .long 0x11EFC508 + .long 0x1210C508 + .long 0x1231C508 + .long 0x1252C508 + .long 0x1273C508 + .long 0x1294C508 + .long 0x12B5C508 + .long 0x12D6C508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_last_aes_dec + b aes_gcm_out + +Do_last_aes_dec: + + + + .long 0x11EFBD09 + .long 0x1210BD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + xxlxor 48, 48, 16 + stxvb16x 48, 15, 9 + + .long 0x1231BD09 + .long 0x1252BD09 + + xxlxor 49, 49, 17 + stxvb16x 49, 16, 9 + xxlxor 50, 50, 18 + stxvb16x 50, 17, 9 + + .long 0x1273BD09 + .long 0x1294BD09 + + xxlxor 51, 51, 19 + stxvb16x 51, 18, 9 + xxlxor 52, 52, 20 + stxvb16x 52, 19, 9 + + .long 0x12B5BD09 + .long 0x12D6BD09 + + xxlxor 53, 53, 21 + stxvb16x 53, 20, 9 + xxlxor 54, 54, 22 + stxvb16x 54, 21, 9 + + addi 9, 9, 128 + + xxlor 15+32, 15, 15 + xxlor 16+32, 16, 16 + xxlor 17+32, 17, 17 + xxlor 18+32, 18, 18 + xxlor 19+32, 19, 19 + xxlor 20+32, 20, 20 + xxlor 21+32, 21, 21 + xxlor 22+32, 22, 22 + + + ppc_aes_gcm_ghash2_4x + + xxlor 27+32, 0, 0 + .long 0x13DEF8C0 + vor 29,30,30 + vxor 15, 30, 27 + .long 0x13DEF8C0 + vxor 16, 30, 27 + .long 0x13DEF8C0 + vxor 17, 30, 27 + .long 0x13DEF8C0 + vxor 18, 30, 27 + .long 0x13DEF8C0 + vxor 19, 30, 27 + .long 0x13DEF8C0 + vxor 20, 30, 27 + .long 0x13DEF8C0 + vxor 21, 30, 27 + .long 0x13DEF8C0 + vxor 22, 30, 27 + addi 12, 12, -128 + addi 11, 11, 128 + + bdnz .Loop_8x_block_dec + + vor 30,29,29 + +.Loop_last_block_dec: + cmpdi 12, 0 + beq aes_gcm_out + + + li 10, 16 + divdu 10, 12, 10 + + mtctr 10 + + lwz 10,240(6) + + cmpdi 12, 16 + blt Final_block_dec + +Next_rem_block_dec: + lxvb16x 15, 0, 14 + +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_next_1x_dec + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_next_1x_dec + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_next_1x_dec + +Do_next_1x_dec: + .long 0x11EFBD09 + + xxlxor 47, 47, 15 + stxvb16x 47, 0, 9 + addi 14, 14, 16 + addi 9, 9, 16 + + xxlor 28+32, 15, 15 + ppc_update_hash_1x + + addi 12, 12, -16 + addi 11, 11, 16 + xxlor 19+32, 0, 0 + .long 0x13DEF8C0 + vxor 15, 30, 19 + + bdnz Next_rem_block_dec + + cmpdi 12, 0 + beq aes_gcm_out + +Final_block_dec: +.Loop_aes_middle_1x + + xxlor 23+32, 10, 10 + + cmpdi 10, 10 + beq Do_final_1x_dec + + + xxlor 24+32, 11, 11 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 12, 12 + + cmpdi 10, 12 + beq Do_final_1x_dec + + + xxlor 24+32, 13, 13 + + .long 0x11EFBD08 + .long 0x11EFC508 + + xxlor 23+32, 14, 14 + + cmpdi 10, 14 + beq Do_final_1x_dec + +Do_final_1x_dec: + .long 0x11EFBD09 + + lxvb16x 15, 0, 14 + xxlxor 47, 47, 15 + + + li 15, 16 + sub 15, 15, 12 + + vspltisb 16,-1 + vspltisb 17,0 + li 10, 192 + stvx 16, 10, 1 + addi 10, 10, 16 + stvx 17, 10, 1 + + addi 10, 1, 192 + lxvb16x 16, 15, 10 + xxland 47, 47, 16 + + xxlor 28+32, 15, 15 + ppc_update_hash_1x + + + bl Write_partial_block + + b aes_gcm_out diff --git a/sys/crypto/openssl/powerpc64/chachap10-ppc.S b/sys/crypto/openssl/powerpc64/chachap10-ppc.S new file mode 100644 index 000000000000..239c0e1297e1 --- /dev/null +++ b/sys/crypto/openssl/powerpc64/chachap10-ppc.S @@ -0,0 +1,1228 @@ +/* Do not modify. This file is auto-generated from chachap10-ppc.pl. */ + +.globl ChaCha20_ctr32_vsx_p10 +.type ChaCha20_ctr32_vsx_p10,@function +.align 5 +ChaCha20_ctr32_vsx_p10: +.localentry ChaCha20_ctr32_vsx_p10,0 + + cmpldi 5,255 + ble .Not_greater_than_8x + b ChaCha20_ctr32_vsx_8x +.Not_greater_than_8x: + stdu 1,-224(1) + mflr 0 + li 10,127 + li 11,143 + li 12,-1 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + stw 12,220(1) + li 12,-4096+63 + std 0, 240(1) + or 12,12,12 + + bl .Lconsts + .long 0x7E006619 + addi 12,12,0x70 + li 8,16 + li 9,32 + li 10,48 + li 11,64 + + .long 0x7E203619 + .long 0x7E483619 + .long 0x7E603E19 + + vxor 27,27,27 + .long 0x7F8B6619 + vspltw 26,19,0 + vsldoi 19,19,27,4 + vsldoi 19,27,19,12 + vadduwm 26,26,28 + + lvsl 31,0,8 + vspltisb 27,3 + vxor 31,31,27 + + li 0,10 + mtctr 0 + b .Loop_outer_vsx + +.align 5 +.Loop_outer_vsx: + lvx 0,0,12 + lvx 1,8,12 + lvx 2,9,12 + lvx 3,10,12 + + vspltw 4,17,0 + vspltw 5,17,1 + vspltw 6,17,2 + vspltw 7,17,3 + + vspltw 8,18,0 + vspltw 9,18,1 + vspltw 10,18,2 + vspltw 11,18,3 + + vor 12,26,26 + vspltw 13,19,1 + vspltw 14,19,2 + vspltw 15,19,3 + + vspltisw 27,-16 + vspltisw 28,12 + vspltisw 29,8 + vspltisw 30,7 + +.Loop_vsx_4x: + vadduwm 0,0,4 + vadduwm 1,1,5 + vadduwm 2,2,6 + vadduwm 3,3,7 + vxor 12,12,0 + vxor 13,13,1 + vxor 14,14,2 + vxor 15,15,3 + vrlw 12,12,27 + vrlw 13,13,27 + vrlw 14,14,27 + vrlw 15,15,27 + vadduwm 8,8,12 + vadduwm 9,9,13 + vadduwm 10,10,14 + vadduwm 11,11,15 + vxor 4,4,8 + vxor 5,5,9 + vxor 6,6,10 + vxor 7,7,11 + vrlw 4,4,28 + vrlw 5,5,28 + vrlw 6,6,28 + vrlw 7,7,28 + vadduwm 0,0,4 + vadduwm 1,1,5 + vadduwm 2,2,6 + vadduwm 3,3,7 + vxor 12,12,0 + vxor 13,13,1 + vxor 14,14,2 + vxor 15,15,3 + vrlw 12,12,29 + vrlw 13,13,29 + vrlw 14,14,29 + vrlw 15,15,29 + vadduwm 8,8,12 + vadduwm 9,9,13 + vadduwm 10,10,14 + vadduwm 11,11,15 + vxor 4,4,8 + vxor 5,5,9 + vxor 6,6,10 + vxor 7,7,11 + vrlw 4,4,30 + vrlw 5,5,30 + vrlw 6,6,30 + vrlw 7,7,30 + vadduwm 0,0,5 + vadduwm 1,1,6 + vadduwm 2,2,7 + vadduwm 3,3,4 + vxor 15,15,0 + vxor 12,12,1 + vxor 13,13,2 + vxor 14,14,3 + vrlw 15,15,27 + vrlw 12,12,27 + vrlw 13,13,27 + vrlw 14,14,27 + vadduwm 10,10,15 + vadduwm 11,11,12 + vadduwm 8,8,13 + vadduwm 9,9,14 + vxor 5,5,10 + vxor 6,6,11 + vxor 7,7,8 + vxor 4,4,9 + vrlw 5,5,28 + vrlw 6,6,28 + vrlw 7,7,28 + vrlw 4,4,28 + vadduwm 0,0,5 + vadduwm 1,1,6 + vadduwm 2,2,7 + vadduwm 3,3,4 + vxor 15,15,0 + vxor 12,12,1 + vxor 13,13,2 + vxor 14,14,3 + vrlw 15,15,29 + vrlw 12,12,29 + vrlw 13,13,29 + vrlw 14,14,29 + vadduwm 10,10,15 + vadduwm 11,11,12 + vadduwm 8,8,13 + vadduwm 9,9,14 + vxor 5,5,10 + vxor 6,6,11 + vxor 7,7,8 + vxor 4,4,9 + vrlw 5,5,30 + vrlw 6,6,30 + vrlw 7,7,30 + vrlw 4,4,30 + + bdnz .Loop_vsx_4x + + vadduwm 12,12,26 + + .long 0x13600F8C + .long 0x13821F8C + .long 0x10000E8C + .long 0x10421E8C + .long 0x13A42F8C + .long 0x13C63F8C + .long 0xF0201057 + .long 0xF0601357 + .long 0xF01BE057 + .long 0xF05BE357 + + .long 0x10842E8C + .long 0x10C63E8C + .long 0x13684F8C + .long 0x138A5F8C + .long 0xF0A43057 + .long 0xF0E43357 + .long 0xF09DF057 + .long 0xF0DDF357 + + .long 0x11084E8C + .long 0x114A5E8C + .long 0x13AC6F8C + .long 0x13CE7F8C + .long 0xF1285057 + .long 0xF1685357 + .long 0xF11BE057 + .long 0xF15BE357 + + .long 0x118C6E8C + .long 0x11CE7E8C + vspltisw 27,4 + vadduwm 26,26,27 + .long 0xF1AC7057 + .long 0xF1EC7357 + .long 0xF19DF057 + .long 0xF1DDF357 + + vadduwm 0,0,16 + vadduwm 4,4,17 + vadduwm 8,8,18 + vadduwm 12,12,19 + + vperm 0,0,0,31 + vperm 4,4,4,31 + vperm 8,8,8,31 + vperm 12,12,12,31 + + cmpldi 5,0x40 + blt .Ltail_vsx + + .long 0x7F602619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 27,27,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7F601F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx + + vadduwm 0,1,16 + vadduwm 4,5,17 + vadduwm 8,9,18 + vadduwm 12,13,19 + + vperm 0,0,0,31 + vperm 4,4,4,31 + vperm 8,8,8,31 + vperm 12,12,12,31 + + cmpldi 5,0x40 + blt .Ltail_vsx + + .long 0x7F602619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 27,27,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7F601F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx + + vadduwm 0,2,16 + vadduwm 4,6,17 + vadduwm 8,10,18 + vadduwm 12,14,19 + + vperm 0,0,0,31 + vperm 4,4,4,31 + vperm 8,8,8,31 + vperm 12,12,12,31 + + cmpldi 5,0x40 + blt .Ltail_vsx + + .long 0x7F602619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 27,27,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7F601F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx + + vadduwm 0,3,16 + vadduwm 4,7,17 + vadduwm 8,11,18 + vadduwm 12,15,19 + + vperm 0,0,0,31 + vperm 4,4,4,31 + vperm 8,8,8,31 + vperm 12,12,12,31 + + cmpldi 5,0x40 + blt .Ltail_vsx + + .long 0x7F602619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 27,27,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7F601F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + mtctr 0 + bne .Loop_outer_vsx + +.Ldone_vsx: + lwz 12,220(1) + li 10,127 + li 11,143 + ld 0, 240(1) + or 12,12,12 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + mtlr 0 + addi 1,1,224 + blr + +.align 4 +.Ltail_vsx: + addi 11,1,48 + mtctr 5 + .long 0x7C005F19 + .long 0x7C885F19 + .long 0x7D095F19 + .long 0x7D8A5F19 + subi 12,11,1 + subi 4,4,1 + subi 3,3,1 + +.Loop_tail_vsx: + lbzu 6,1(12) + lbzu 7,1(4) + xor 6,6,7 + stbu 6,1(3) + bdnz .Loop_tail_vsx + + .long 0x7E005F19 + .long 0x7E085F19 + .long 0x7E095F19 + .long 0x7E0A5F19 + + b .Ldone_vsx +.long 0 +.byte 0,12,0x04,1,0x80,0,5,0 +.long 0 +.size ChaCha20_ctr32_vsx_p10,.-ChaCha20_ctr32_vsx_p10 + +.globl ChaCha20_ctr32_vsx_8x +.type ChaCha20_ctr32_vsx_8x,@function +.align 5 +ChaCha20_ctr32_vsx_8x: +.localentry ChaCha20_ctr32_vsx_8x,0 + + stdu 1,-256(1) + mflr 0 + li 10,127 + li 11,143 + li 12,-1 + stvx 24,10,1 + addi 10,10,32 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + stw 12,252(1) + li 12,-4096+63 + std 0, 272(1) + or 12,12,12 + + bl .Lconsts + + .long 0x7F606619 + addi 12,12,0x70 + li 8,16 + li 9,32 + li 10,48 + li 11,64 + + vspltisw 16,-16 + vspltisw 20,12 + vspltisw 24,8 + vspltisw 28,7 + + lvx 0,0,12 + lvx 1,8,12 + lvx 2,9,12 + lvx 3,10,12 + + .long 0xF1308496 + .long 0xF154A496 + .long 0xF178C496 + .long 0xF19CE496 + .long 0xF2C00496 + .long 0xF2E10C96 + .long 0xF3021496 + .long 0xF3231C96 + + .long 0x7F003619 + .long 0x7F283619 + .long 0x7F403E19 + vspltisw 30,4 + + + vxor 29,29,29 + .long 0x7F8B6619 + vspltw 2,26,0 + vsldoi 26,26,29,4 + vsldoi 26,29,26,12 + vadduwm 28,2,28 + vadduwm 30,28,30 + vspltw 0,25,2 + + lvsl 31,0,8 + vspltisb 23,3 + vxor 31,31,23 + .long 0xF35FFC96 + + .long 0xF01BDC96 + .long 0xF038C496 + .long 0xF059CC96 + .long 0xF07AD496 + .long 0xF09CE496 + .long 0xF0BEF496 + .long 0xF1000496 + + li 0,10 + mtctr 0 + b .Loop_outer_vsx_8x + +.align 5 +.Loop_outer_vsx_8x: + .long 0xF016B491 + .long 0xF037BC91 + .long 0xF058C491 + .long 0xF079CC91 + .long 0xF216B491 + .long 0xF237BC91 + .long 0xF258C491 + .long 0xF279CC91 + + vspltw 4,24,0 + vspltw 5,24,1 + vspltw 6,24,2 + vspltw 7,24,3 + vspltw 20,24,0 + vspltw 21,24,1 + vspltw 22,24,2 + vspltw 23,24,3 + + vspltw 8,25,0 + vspltw 9,25,1 + vspltw 10,25,2 + vspltw 11,25,3 + vspltw 24,25,0 + vspltw 27,25,3 + vspltw 25,25,1 + + .long 0xF1842491 + vspltw 13,26,1 + vspltw 14,26,2 + vspltw 15,26,3 + .long 0xF3852C91 + vspltw 29,26,1 + vspltw 30,26,2 + vspltw 31,26,3 + .long 0xF3484491 + +.Loop_vsx_8x: + .long 0xF1FBDC96 + .long 0xF3694C91 + vadduwm 0,0,4 + vadduwm 1,1,5 + vadduwm 2,2,6 + vadduwm 3,3,7 + vadduwm 16,16,20 + vadduwm 17,17,21 + vadduwm 18,18,22 + vadduwm 19,19,23 + vxor 12,12,0 + vxor 13,13,1 + vxor 14,14,2 + vxor 15,15,3 + vxor 28,28,16 + vxor 29,29,17 + vxor 30,30,18 + vxor 31,31,19 + vrlw 12,12,27 + vrlw 13,13,27 + vrlw 14,14,27 + vrlw 15,15,27 + vrlw 28,28,27 + vrlw 29,29,27 + vrlw 30,30,27 + vrlw 31,31,27 + .long 0xF1B39C96 + .long 0xF36F7C91 + .long 0xF26A5491 + vadduwm 8,8,12 + vadduwm 9,9,13 + vadduwm 10,10,14 + vadduwm 11,11,15 + vadduwm 24,24,28 + vadduwm 25,25,29 + vadduwm 26,26,30 + vadduwm 27,27,31 + vxor 4,4,8 + vxor 5,5,9 + vxor 6,6,10 + vxor 7,7,11 + vxor 20,20,24 + vxor 21,21,25 + vxor 22,22,26 + vxor 23,23,27 + vrlw 4,4,19 + vrlw 5,5,19 + vrlw 6,6,19 + vrlw 7,7,19 + vrlw 20,20,19 + vrlw 21,21,19 + vrlw 22,22,19 + vrlw 23,23,19 + .long 0xF26D6C91 + .long 0xF1FBDC96 + .long 0xF36B5C91 + vadduwm 0,0,4 + vadduwm 1,1,5 + vadduwm 2,2,6 + vadduwm 3,3,7 + vadduwm 16,16,20 + vadduwm 17,17,21 + vadduwm 18,18,22 + vadduwm 19,19,23 + vxor 12,12,0 + vxor 13,13,1 + vxor 14,14,2 + vxor 15,15,3 + vxor 28,28,16 + vxor 29,29,17 + vxor 30,30,18 + vxor 31,31,19 + vrlw 12,12,27 + vrlw 13,13,27 + vrlw 14,14,27 + vrlw 15,15,27 + vrlw 28,28,27 + vrlw 29,29,27 + vrlw 30,30,27 + vrlw 31,31,27 + .long 0xF36F7C91 + .long 0xF1B39C96 + .long 0xF26C6491 + vadduwm 8,8,12 + vadduwm 9,9,13 + vadduwm 10,10,14 + vadduwm 11,11,15 + vadduwm 24,24,28 + vadduwm 25,25,29 + vadduwm 26,26,30 + vadduwm 27,27,31 + vxor 4,4,8 + vxor 5,5,9 + vxor 6,6,10 + vxor 7,7,11 + vxor 20,20,24 + vxor 21,21,25 + vxor 22,22,26 + vxor 23,23,27 + vrlw 4,4,19 + vrlw 5,5,19 + vrlw 6,6,19 + vrlw 7,7,19 + vrlw 20,20,19 + vrlw 21,21,19 + vrlw 22,22,19 + vrlw 23,23,19 + .long 0xF26D6C91 + .long 0xF1F9CC96 + .long 0xF3294C91 + vadduwm 0,0,5 + vadduwm 1,1,6 + vadduwm 2,2,7 + vadduwm 3,3,4 + vadduwm 16,16,21 + vadduwm 17,17,22 + vadduwm 18,18,23 + vadduwm 19,19,20 + vxor 15,15,0 + vxor 12,12,1 + vxor 13,13,2 + vxor 14,14,3 + vxor 31,31,16 + vxor 28,28,17 + vxor 29,29,18 + vxor 30,30,19 + vrlw 15,15,25 + vrlw 12,12,25 + vrlw 13,13,25 + vrlw 14,14,25 + vrlw 31,31,25 + vrlw 28,28,25 + vrlw 29,29,25 + vrlw 30,30,25 + .long 0xF1B39C96 + .long 0xF32F7C91 + .long 0xF26A5491 + vadduwm 10,10,15 + vadduwm 11,11,12 + vadduwm 8,8,13 + vadduwm 9,9,14 + vadduwm 26,26,31 + vadduwm 27,27,28 + vadduwm 24,24,29 + vadduwm 25,25,30 + vxor 5,5,10 + vxor 6,6,11 + vxor 7,7,8 + vxor 4,4,9 + vxor 21,21,26 + vxor 22,22,27 + vxor 23,23,24 + vxor 20,20,25 + vrlw 5,5,19 + vrlw 6,6,19 + vrlw 7,7,19 + vrlw 4,4,19 + vrlw 21,21,19 + vrlw 22,22,19 + vrlw 23,23,19 + vrlw 20,20,19 + .long 0xF26D6C91 + .long 0xF1F9CC96 + .long 0xF32B5C91 + vadduwm 0,0,5 + vadduwm 1,1,6 + vadduwm 2,2,7 + vadduwm 3,3,4 + vadduwm 16,16,21 + vadduwm 17,17,22 + vadduwm 18,18,23 + vadduwm 19,19,20 + vxor 15,15,0 + vxor 12,12,1 + vxor 13,13,2 + vxor 14,14,3 + vxor 31,31,16 + vxor 28,28,17 + vxor 29,29,18 + vxor 30,30,19 + vrlw 15,15,25 + vrlw 12,12,25 + vrlw 13,13,25 + vrlw 14,14,25 + vrlw 31,31,25 + vrlw 28,28,25 + vrlw 29,29,25 + vrlw 30,30,25 + .long 0xF32F7C91 + .long 0xF1B39C96 + .long 0xF26C6491 + vadduwm 10,10,15 + vadduwm 11,11,12 + vadduwm 8,8,13 + vadduwm 9,9,14 + vadduwm 26,26,31 + vadduwm 27,27,28 + vadduwm 24,24,29 + vadduwm 25,25,30 + vxor 5,5,10 + vxor 6,6,11 + vxor 7,7,8 + vxor 4,4,9 + vxor 21,21,26 + vxor 22,22,27 + vxor 23,23,24 + vxor 20,20,25 + vrlw 5,5,19 + vrlw 6,6,19 + vrlw 7,7,19 + vrlw 4,4,19 + vrlw 21,21,19 + vrlw 22,22,19 + vrlw 23,23,19 + vrlw 20,20,19 + .long 0xF26D6C91 + + bdnz .Loop_vsx_8x + .long 0xF1BCE496 + .long 0xF1DDEC96 + .long 0xF1FEF496 + .long 0xF21FFC96 + + .long 0xF258C496 + .long 0xF279CC96 + .long 0xF29AD496 + .long 0xF2BBDC96 + + .long 0xF0D6B496 + .long 0xF0F7BC96 + .long 0xF3FAD491 + + .long 0xF3600491 + .long 0xF3010C91 + .long 0xF3221491 + .long 0xF3431C91 + .long 0xF2C42491 + + + .long 0x12E00F8C + .long 0x13821F8C + .long 0x10000E8C + .long 0x10421E8C + + .long 0x13A42F8C + .long 0x13C63F8C + .long 0x10842E8C + .long 0x10C63E8C + + vadduwm 12,12,22 + + .long 0xF0201057 + .long 0xF0601357 + .long 0xF017E057 + .long 0xF057E357 + .long 0xF0A43057 + .long 0xF0E43357 + .long 0xF09DF057 + .long 0xF0DDF357 + + .long 0x12E84F8C + .long 0x138A5F8C + .long 0x11084E8C + .long 0x114A5E8C + .long 0x13AC6F8C + .long 0x13CE7F8C + .long 0x118C6E8C + .long 0x11CE7E8C + + .long 0xF1285057 + .long 0xF1685357 + .long 0xF117E057 + .long 0xF157E357 + .long 0xF1AC7057 + .long 0xF1EC7357 + .long 0xF19DF057 + .long 0xF1DDF357 + + vspltisw 23,8 + vadduwm 22,22,23 + .long 0xF096B496 + + vadduwm 0,0,27 + vadduwm 4,4,24 + vadduwm 8,8,25 + vadduwm 12,12,26 + + vperm 0,0,0,31 + vperm 4,4,4,31 + vperm 8,8,8,31 + vperm 12,12,12,31 + + cmpldi 5,0x40 + blt .Ltail_vsx_8x + + .long 0x7EE02619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 23,23,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7EE01F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx_8x + + vadduwm 0,1,27 + vadduwm 4,5,24 + vadduwm 8,9,25 + vadduwm 12,13,26 + + vperm 0,0,0,31 + vperm 4,4,4,31 + vperm 8,8,8,31 + vperm 12,12,12,31 + + cmpldi 5,0x40 + blt .Ltail_vsx_8x + + .long 0x7EE02619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 23,23,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7EE01F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx_8x + + vadduwm 0,2,27 + vadduwm 4,6,24 + vadduwm 8,10,25 + vadduwm 12,14,26 + + vperm 0,0,0,31 + vperm 4,4,4,31 + vperm 8,8,8,31 + vperm 12,12,12,31 + + cmpldi 5,0x40 + blt .Ltail_vsx_8x + + .long 0x7EE02619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 23,23,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7EE01F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx_8x + + vadduwm 0,3,27 + vadduwm 4,7,24 + vadduwm 8,11,25 + vadduwm 12,15,26 + + vperm 0,0,0,31 + vperm 4,4,4,31 + vperm 8,8,8,31 + vperm 12,12,12,31 + + cmpldi 5,0x40 + blt .Ltail_vsx_8x + + .long 0x7EE02619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 23,23,0 + vxor 28,28,4 + vxor 29,29,8 + vxor 30,30,12 + + .long 0x7EE01F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx_8x + + + + + .long 0xF0A52C91 + + .long 0xF1129491 + .long 0xF1339C91 + .long 0xF154A491 + .long 0xF175AC91 + + .long 0xF18D6C91 + .long 0xF1AE7491 + .long 0xF1CF7C91 + .long 0xF1F08491 + vadduwm 12,12,5 + + .long 0xF2C63491 + .long 0xF2E73C91 + + + .long 0x10908F8C + .long 0x13929F8C + .long 0x12108E8C + .long 0x12529E8C + .long 0x13B4AF8C + .long 0x13D6BF8C + .long 0x1294AE8C + .long 0x12D6BE8C + + .long 0xF2309057 + .long 0xF2709357 + .long 0xF204E057 + .long 0xF244E357 + .long 0xF2B4B057 + .long 0xF2F4B357 + .long 0xF29DF057 + .long 0xF2DDF357 + + .long 0x10884F8C + .long 0x138A5F8C + .long 0x11084E8C + .long 0x114A5E8C + .long 0x13AC6F8C + .long 0x13CE7F8C + .long 0x118C6E8C + .long 0x11CE7E8C + + .long 0xF1285057 + .long 0xF1685357 + .long 0xF104E057 + .long 0xF144E357 + .long 0xF1AC7057 + .long 0xF1EC7357 + .long 0xF19DF057 + .long 0xF1DDF357 + + vspltisw 4,8 + vadduwm 5,5,4 + .long 0xF0A52C96 + + vadduwm 0,16,27 + vadduwm 1,20,24 + vadduwm 2,8,25 + vadduwm 3,12,26 + + vperm 0,0,0,31 + vperm 1,1,1,31 + vperm 2,2,2,31 + vperm 3,3,3,31 + + cmpldi 5,0x40 + blt .Ltail_vsx_8x_1 + + .long 0x7C802619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 4,4,0 + vxor 28,28,1 + vxor 29,29,2 + vxor 30,30,3 + + .long 0x7C801F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx_8x + + vadduwm 0,17,27 + vadduwm 1,21,24 + vadduwm 2,9,25 + vadduwm 3,13,26 + + vperm 0,0,0,31 + vperm 1,1,1,31 + vperm 2,2,2,31 + vperm 3,3,3,31 + + cmpldi 5,0x40 + blt .Ltail_vsx_8x_1 + + .long 0x7C802619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 4,4,0 + vxor 28,28,1 + vxor 29,29,2 + vxor 30,30,3 + + .long 0x7C801F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx_8x + + vadduwm 0,18,27 + vadduwm 1,22,24 + vadduwm 2,10,25 + vadduwm 3,14,26 + + vperm 0,0,0,31 + vperm 1,1,1,31 + vperm 2,2,2,31 + vperm 3,3,3,31 + + cmpldi 5,0x40 + blt .Ltail_vsx_8x_1 + + .long 0x7C802619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 4,4,0 + vxor 28,28,1 + vxor 29,29,2 + vxor 30,30,3 + + .long 0x7C801F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx_8x + + vadduwm 0,19,27 + vadduwm 1,23,24 + vadduwm 2,11,25 + vadduwm 3,15,26 + + vperm 0,0,0,31 + vperm 1,1,1,31 + vperm 2,2,2,31 + vperm 3,3,3,31 + + cmpldi 5,0x40 + blt .Ltail_vsx_8x_1 + + .long 0x7C802619 + .long 0x7F882619 + .long 0x7FA92619 + .long 0x7FCA2619 + + vxor 4,4,0 + vxor 28,28,1 + vxor 29,29,2 + vxor 30,30,3 + + .long 0x7C801F19 + .long 0x7F881F19 + addi 4,4,0x40 + .long 0x7FA91F19 + subi 5,5,0x40 + .long 0x7FCA1F19 + addi 3,3,0x40 + beq .Ldone_vsx_8x + + mtctr 0 + bne .Loop_outer_vsx_8x + +.Ldone_vsx_8x: + lwz 12,252(1) + li 10,127 + li 11,143 + ld 0, 272(1) + or 12,12,12 + lvx 24,10,1 + addi 10,10,32 + lvx 25,11,1 + addi 11,11,32 + lvx 26,10,1 + addi 10,10,32 + lvx 27,11,1 + addi 11,11,32 + lvx 28,10,1 + addi 10,10,32 + lvx 29,11,1 + addi 11,11,32 + lvx 30,10,1 + lvx 31,11,1 + mtlr 0 + addi 1,1,256 + blr + +.align 4 +.Ltail_vsx_8x: + addi 11,1,48 + mtctr 5 + .long 0x7C005F19 + .long 0x7C885F19 + .long 0x7D095F19 + .long 0x7D8A5F19 + subi 12,11,1 + subi 4,4,1 + subi 3,3,1 + bl .Loop_tail_vsx_8x +.Ltail_vsx_8x_1: + addi 11,1,48 + mtctr 5 + .long 0x7C005F19 + .long 0x7C285F19 + .long 0x7C495F19 + .long 0x7C6A5F19 + subi 12,11,1 + subi 4,4,1 + subi 3,3,1 + bl .Loop_tail_vsx_8x + +.Loop_tail_vsx_8x: + lbzu 6,1(12) + lbzu 7,1(4) + xor 6,6,7 + stbu 6,1(3) + bdnz .Loop_tail_vsx_8x + + .long 0x7F605F19 + .long 0x7F685F19 + .long 0x7F695F19 + .long 0x7F6A5F19 + + b .Ldone_vsx_8x +.long 0 +.byte 0,12,0x04,1,0x80,0,5,0 +.long 0 +.size ChaCha20_ctr32_vsx_8x,.-ChaCha20_ctr32_vsx_8x +.align 5 +.Lconsts: + mflr 0 + bcl 20,31,$+4 + mflr 12 + addi 12,12,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.Lsigma: +.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 +.long 1,0,0,0 +.long 2,0,0,0 +.long 3,0,0,0 +.long 4,0,0,0 +.long 0x02030001,0x06070405,0x0a0b0809,0x0e0f0c0d +.long 0x01020300,0x05060704,0x090a0b08,0x0d0e0f0c +.long 0x61707865,0x61707865,0x61707865,0x61707865 +.long 0x3320646e,0x3320646e,0x3320646e,0x3320646e +.long 0x79622d32,0x79622d32,0x79622d32,0x79622d32 +.long 0x6b206574,0x6b206574,0x6b206574,0x6b206574 +.long 0,1,2,3 +.long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c +.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,80,111,119,101,114,80,67,47,65,108,116,105,86,101,99,44,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +.align 2 +.align 2 diff --git a/sys/crypto/openssl/powerpc64/ecp_nistp384-ppc64.S b/sys/crypto/openssl/powerpc64/ecp_nistp384-ppc64.S new file mode 100644 index 000000000000..3719683097ef --- /dev/null +++ b/sys/crypto/openssl/powerpc64/ecp_nistp384-ppc64.S @@ -0,0 +1,1526 @@ +/* Do not modify. This file is auto-generated from ecp_nistp384-ppc64.pl. */ +.machine "any" +.abiversion 2 +.text + +.globl p384_felem_mul +.type p384_felem_mul,@function +.type p384_felem_mul,@function +.align 4 +p384_felem_mul: +.localentry p384_felem_mul,0 + + + stdu 1, -176(1) + mflr 0 + std 14, 56(1) + std 15, 64(1) + std 16, 72(1) + std 17, 80(1) + std 18, 88(1) + std 19, 96(1) + std 20, 104(1) + std 21, 112(1) + std 22, 120(1) + + bl _p384_felem_mul_core + + mtlr 0 + ld 14, 56(1) + ld 15, 64(1) + ld 16, 72(1) + ld 17, 80(1) + ld 18, 88(1) + ld 19, 96(1) + ld 20, 104(1) + ld 21, 112(1) + ld 22, 120(1) + addi 1, 1, 176 + blr +.size p384_felem_mul,.-p384_felem_mul + +.globl p384_felem_square +.type p384_felem_square,@function +.type p384_felem_square,@function +.align 4 +p384_felem_square: +.localentry p384_felem_square,0 + + + stdu 1, -176(1) + mflr 0 + std 14, 56(1) + std 15, 64(1) + std 16, 72(1) + std 17, 80(1) + + bl _p384_felem_square_core + + mtlr 0 + ld 14, 56(1) + ld 15, 64(1) + ld 16, 72(1) + ld 17, 80(1) + addi 1, 1, 176 + blr +.size p384_felem_square,.-p384_felem_square + + + + + +.type _p384_felem_mul_core,@function +.align 4 +_p384_felem_mul_core: +.localentry _p384_felem_mul_core,0 + + + ld 6,0(4) + ld 14,0(5) + ld 7,8(4) + ld 15,8(5) + ld 8,16(4) + ld 16,16(5) + ld 9,24(4) + ld 17,24(5) + ld 10,32(4) + ld 18,32(5) + ld 11,40(4) + ld 19,40(5) + ld 12,48(4) + ld 20,48(5) + + + mulld 21, 14, 6 + mulhdu 22, 14, 6 + std 21, 0(3) + std 22, 8(3) + + vxor 0, 0, 0 + + + mtvsrdd 32+13, 14, 6 + mtvsrdd 32+14, 7, 15 + .long 0x102D7023 + + + mtvsrdd 32+15, 15, 6 + mtvsrdd 32+16, 7, 16 + mtvsrdd 32+17, 0, 8 + mtvsrdd 32+18, 0, 14 + .long 0x126F8023 + .long 0x105194E3 + + + mtvsrdd 32+13, 16, 6 + mtvsrdd 32+14, 7, 17 + mtvsrdd 32+15, 14, 8 + mtvsrdd 32+16, 9, 15 + .long 0x126D7023 + .long 0x106F84E3 + + + mtvsrdd 32+13, 17, 6 + mtvsrdd 32+14, 7, 18 + mtvsrdd 32+15, 15, 8 + mtvsrdd 32+16, 9, 16 + mtvsrdd 32+17, 0, 10 + mtvsrdd 32+18, 0, 14 + .long 0x126D7023 + .long 0x108F84E3 + .long 0x10919123 + + + mtvsrdd 32+13, 18, 6 + mtvsrdd 32+14, 7, 19 + mtvsrdd 32+15, 16, 8 + mtvsrdd 32+16, 9, 17 + mtvsrdd 32+17, 14, 10 + mtvsrdd 32+18, 11, 15 + .long 0x126D7023 + .long 0x10AF84E3 + .long 0x10B19163 + + stxv 32+1, 16(3) + stxv 32+2, 32(3) + stxv 32+3, 48(3) + stxv 32+4, 64(3) + stxv 32+5, 80(3) + + + mtvsrdd 32+13, 19, 6 + mtvsrdd 32+14, 7, 20 + mtvsrdd 32+15, 17, 8 + mtvsrdd 32+16, 9, 18 + mtvsrdd 32+17, 15, 10 + mtvsrdd 32+18, 11, 16 + .long 0x126D7023 + .long 0x10CF84E3 + mtvsrdd 32+13, 0, 12 + mtvsrdd 32+14, 0, 14 + .long 0x127191A3 + .long 0x10CD74E3 + + + mtvsrdd 32+13, 19, 7 + mtvsrdd 32+14, 8, 20 + mtvsrdd 32+15, 17, 9 + mtvsrdd 32+16, 10, 18 + mtvsrdd 32+17, 15, 11 + mtvsrdd 32+18, 12, 16 + .long 0x126D7023 + .long 0x10EF84E3 + .long 0x10F191E3 + + + mtvsrdd 32+13, 19, 8 + mtvsrdd 32+14, 9, 20 + mtvsrdd 32+15, 17, 10 + mtvsrdd 32+16, 11, 18 + mtvsrdd 32+17, 0, 12 + mtvsrdd 32+18, 0, 16 + .long 0x126D7023 + .long 0x110F84E3 + .long 0x11119223 + + + mtvsrdd 32+13, 19, 9 + mtvsrdd 32+14, 10, 20 + mtvsrdd 32+15, 17, 11 + mtvsrdd 32+16, 12, 18 + .long 0x126D7023 + .long 0x112F84E3 + + + mtvsrdd 32+13, 19, 10 + mtvsrdd 32+14, 11, 20 + mtvsrdd 32+15, 0, 12 + mtvsrdd 32+16, 0, 18 + .long 0x126D7023 + .long 0x114F84E3 + + + mtvsrdd 32+17, 19, 11 + mtvsrdd 32+18, 12, 20 + .long 0x11719023 + + stxv 32+6, 96(3) + stxv 32+7, 112(3) + stxv 32+8, 128(3) + stxv 32+9, 144(3) + stxv 32+10, 160(3) + stxv 32+11, 176(3) + + + mulld 21, 20, 12 + mulhdu 22, 20, 12 + + std 21, 192(3) + std 22, 200(3) + + blr +.size _p384_felem_mul_core,.-_p384_felem_mul_core + + + + + +.type _p384_felem_square_core,@function +.align 4 +_p384_felem_square_core: +.localentry _p384_felem_square_core,0 + + + ld 6, 0(4) + ld 7, 8(4) + ld 8, 16(4) + ld 9, 24(4) + ld 10, 32(4) + ld 11, 40(4) + ld 12, 48(4) + + vxor 0, 0, 0 + + + mulld 14, 6, 6 + mulhdu 15, 6, 6 + std 14, 0(3) + std 15, 8(3) + + + add 14, 6, 6 + mtvsrdd 32+13, 0, 14 + mtvsrdd 32+14, 0, 7 + .long 0x102D7023 + + + mtvsrdd 32+15, 7, 14 + mtvsrdd 32+16, 7, 8 + .long 0x104F8023 + + + add 15, 7, 7 + mtvsrdd 32+13, 8, 14 + mtvsrdd 32+14, 15, 9 + .long 0x106D7023 + + + mtvsrdd 32+13, 9, 14 + mtvsrdd 32+14, 15, 10 + mtvsrdd 32+15, 0, 8 + .long 0x108D7023 + .long 0x108F7923 + + + mtvsrdd 32+13, 10, 14 + mtvsrdd 32+14, 15, 11 + add 16, 8, 8 + mtvsrdd 32+15, 0, 16 + mtvsrdd 32+16, 0, 9 + .long 0x10AD7023 + .long 0x10AF8163 + + stxv 32+1, 16(3) + stxv 32+2, 32(3) + stxv 32+3, 48(3) + stxv 32+4, 64(3) + + + mtvsrdd 32+13, 11, 14 + mtvsrdd 32+14, 15, 12 + mtvsrdd 32+15, 9, 16 + mtvsrdd 32+16, 9, 10 + stxv 32+5, 80(3) + .long 0x126D7023 + .long 0x10CF84E3 + + + add 17, 9, 9 + mtvsrdd 32+13, 11, 15 + mtvsrdd 32+14, 16, 12 + mtvsrdd 32+15, 0, 17 + mtvsrdd 32+16, 0, 10 + .long 0x126D7023 + .long 0x10EF84E3 + + + mtvsrdd 32+13, 11, 16 + mtvsrdd 32+14, 17, 12 + mtvsrdd 32+15, 0, 10 + .long 0x126D7023 + .long 0x110F7CE3 + + + add 14, 10, 10 + mtvsrdd 32+13, 11, 17 + mtvsrdd 32+14, 14, 12 + .long 0x112D7023 + + + mtvsrdd 32+13, 11, 14 + mtvsrdd 32+14, 11, 12 + .long 0x114D7023 + + stxv 32+6, 96(3) + stxv 32+7, 112(3) + + + + + + + + mulld 6, 12, 11 + mulhdu 7, 12, 11 + addc 8, 6, 6 + adde 9, 7, 7 + + stxv 32+8, 128(3) + stxv 32+9, 144(3) + stxv 32+10, 160(3) + + + + mulld 14, 12, 12 + mulhdu 15, 12, 12 + + std 8, 176(3) + std 9, 184(3) + std 14, 192(3) + std 15, 200(3) + + blr +.size _p384_felem_square_core,.-_p384_felem_square_core + + + + +.macro F128_X_8 _off1 _off2 + ld 9,\_off1(3) + ld 8,\_off2(3) + srdi 10,9,61 + rldimi 10,8,3,0 + sldi 9,9,3 + std 9,\_off1(3) + std 10,\_off2(3) +.endm + +.globl p384_felem128_mul_by_8 +.type p384_felem128_mul_by_8,@function +.type p384_felem128_mul_by_8,@function +.align 4 +p384_felem128_mul_by_8: +.localentry p384_felem128_mul_by_8,0 + + + F128_X_8 0, 8 + + F128_X_8 16, 24 + + F128_X_8 32, 40 + + F128_X_8 48, 56 + + F128_X_8 64, 72 + + F128_X_8 80, 88 + + F128_X_8 96, 104 + + F128_X_8 112, 120 + + F128_X_8 128, 136 + + F128_X_8 144, 152 + + F128_X_8 160, 168 + + F128_X_8 176, 184 + + F128_X_8 192, 200 + + blr +.size p384_felem128_mul_by_8,.-p384_felem128_mul_by_8 + + + + +.macro F128_X_2 _off1 _off2 + ld 9,\_off1(3) + ld 8,\_off2(3) + srdi 10,9,63 + rldimi 10,8,1,0 + sldi 9,9,1 + std 9,\_off1(3) + std 10,\_off2(3) +.endm + +.globl p384_felem128_mul_by_2 +.type p384_felem128_mul_by_2,@function +.type p384_felem128_mul_by_2,@function +.align 4 +p384_felem128_mul_by_2: +.localentry p384_felem128_mul_by_2,0 + + + F128_X_2 0, 8 + + F128_X_2 16, 24 + + F128_X_2 32, 40 + + F128_X_2 48, 56 + + F128_X_2 64, 72 + + F128_X_2 80, 88 + + F128_X_2 96, 104 + + F128_X_2 112, 120 + + F128_X_2 128, 136 + + F128_X_2 144, 152 + + F128_X_2 160, 168 + + F128_X_2 176, 184 + + F128_X_2 192, 200 + + blr +.size p384_felem128_mul_by_2,.-p384_felem128_mul_by_2 + +.globl p384_felem_diff128 +.type p384_felem_diff128,@function +.type p384_felem_diff128,@function +.align 4 +p384_felem_diff128: +.localentry p384_felem_diff128,0 + + + addis 5, 2, .LConst_two127@toc@ha + addi 5, 5, .LConst_two127@toc@l + + ld 10, 0(3) + ld 8, 8(3) + li 9, 0 + addc 10, 10, 9 + li 7, -1 + rldicr 7, 7, 0, 0 + adde 8, 8, 7 + ld 11, 0(4) + ld 12, 8(4) + subfc 11, 11, 10 + subfe 12, 12, 8 + std 11, 0(3) + std 12, 8(3) + + + ld 8, 16(3) + ld 7, 24(3) + ld 10, 24(5) + addc 8, 8, 9 + adde 7, 7, 10 + ld 11, 16(4) + ld 12, 24(4) + subfc 11, 11, 8 + subfe 12, 12, 7 + std 11, 16(3) + std 12, 24(3) + + ld 8, 32(3) + ld 7, 40(3) + addc 8, 8, 9 + adde 7, 7, 10 + ld 11, 32(4) + ld 12, 40(4) + subfc 11, 11, 8 + subfe 12, 12, 7 + std 11, 32(3) + std 12, 40(3) + + ld 8, 48(3) + ld 7, 56(3) + addc 8, 8, 9 + adde 7, 7, 10 + ld 11, 48(4) + ld 12, 56(4) + subfc 11, 11, 8 + subfe 12, 12, 7 + std 11, 48(3) + std 12, 56(3) + + ld 8, 64(3) + ld 7, 72(3) + addc 8, 8, 9 + adde 7, 7, 10 + ld 11, 64(4) + ld 12, 72(4) + subfc 11, 11, 8 + subfe 12, 12, 7 + std 11, 64(3) + std 12, 72(3) + + ld 8, 80(3) + ld 7, 88(3) + addc 8, 8, 9 + adde 7, 7, 10 + ld 11, 80(4) + ld 12, 88(4) + subfc 11, 11, 8 + subfe 12, 12, 7 + std 11, 80(3) + std 12, 88(3) + + ld 8, 96(3) + ld 7, 104(3) + ld 6, 40(5) + addc 8, 8, 9 + adde 7, 7, 6 + ld 11, 96(4) + ld 12, 104(4) + subfc 11, 11, 8 + subfe 12, 12, 7 + std 11, 96(3) + std 12, 104(3) + + ld 8, 112(3) + ld 7, 120(3) + ld 6, 56(5) + addc 8, 8, 9 + adde 7, 7, 6 + ld 11, 112(4) + ld 12, 120(4) + subfc 11, 11, 8 + subfe 12, 12, 7 + std 11, 112(3) + std 12, 120(3) + + ld 8, 128(3) + ld 7, 136(3) + ld 6, 72(5) + addc 8, 8, 9 + adde 7, 7, 6 + ld 11, 128(4) + ld 12, 136(4) + subfc 11, 11, 8 + subfe 12, 12, 7 + std 11, 128(3) + std 12, 136(3) + + ld 8, 144(3) + ld 7, 152(3) + addc 8, 8, 9 + adde 7, 7, 10 + ld 11, 144(4) + ld 12, 152(4) + subfc 11, 11, 8 + subfe 12, 12, 7 + std 11, 144(3) + std 12, 152(3) + + ld 8, 160(3) + ld 7, 168(3) + addc 8, 8, 9 + adde 7, 7, 10 + ld 11, 160(4) + ld 12, 168(4) + subfc 11, 11, 8 + subfe 12, 12, 7 + std 11, 160(3) + std 12, 168(3) + + ld 8, 176(3) + ld 7, 184(3) + addc 8, 8, 9 + adde 7, 7, 10 + ld 11, 176(4) + ld 12, 184(4) + subfc 11, 11, 8 + subfe 12, 12, 7 + std 11, 176(3) + std 12, 184(3) + + ld 8, 192(3) + ld 7, 200(3) + addc 8, 8, 9 + adde 7, 7, 10 + ld 11, 192(4) + ld 12, 200(4) + subfc 11, 11, 8 + subfe 12, 12, 7 + std 11, 192(3) + std 12, 200(3) + + blr +.size p384_felem_diff128,.-p384_felem_diff128 + +.data +.align 4 +.LConst_two127: + +.long 0x00000000, 0x00000000, 0x00000000, 0x80000000 + +.long 0x00000000, 0x00000000, 0xffffff80, 0x7fffffff + +.long 0x00000000, 0x00000000, 0xffff7f80, 0x80007fff + +.long 0x00000000, 0x00000000, 0xffffff80, 0x7f7fffff + +.long 0x00000000, 0x00000000, 0x7fffff80, 0x7fffffff + +.abiversion 2 +.text + +.globl p384_felem_diff_128_64 +.type p384_felem_diff_128_64,@function +.type p384_felem_diff_128_64,@function +.align 4 +p384_felem_diff_128_64: +.localentry p384_felem_diff_128_64,0 + + addis 5, 2, .LConst_128_two64@toc@ha + addi 5, 5, .LConst_128_two64@toc@l + + ld 9, 0(3) + ld 10, 8(3) + ld 8, 48(5) + li 7, 0 + addc 9, 9, 8 + li 6, 1 + adde 10, 10, 6 + ld 11, 0(4) + subfc 8, 11, 9 + subfe 12, 7, 10 + std 8, 0(3) + std 12, 8(3) + + ld 9, 16(3) + ld 10, 24(3) + ld 8, 0(5) + addc 9, 9, 8 + addze 10, 10 + ld 11, 8(4) + subfc 11, 11, 9 + subfe 12, 7, 10 + std 11, 16(3) + std 12, 24(3) + + ld 9, 32(3) + ld 10, 40(3) + ld 8, 16(5) + addc 9, 9, 8 + addze 10, 10 + ld 11, 16(4) + subfc 11, 11, 9 + subfe 12, 7, 10 + std 11, 32(3) + std 12, 40(3) + + ld 10, 48(3) + ld 8, 56(3) + + li 9, -256 + addc 10, 10, 9 + addze 8, 8 + ld 11, 24(4) + subfc 11, 11, 10 + subfe 12, 7, 8 + std 11, 48(3) + std 12, 56(3) + + ld 10, 64(3) + ld 8, 72(3) + addc 10, 10, 9 + addze 8, 8 + ld 11, 32(4) + subfc 11, 11, 10 + subfe 12, 7, 8 + std 11, 64(3) + std 12, 72(3) + + ld 10, 80(3) + ld 8, 88(3) + addc 10, 10, 9 + addze 8, 8 + ld 11, 40(4) + subfc 11, 11, 10 + subfe 12, 7, 8 + std 11, 80(3) + std 12, 88(3) + + ld 10, 96(3) + ld 8, 104(3) + addc 10, 10, 9 + addze 9, 8 + ld 11, 48(4) + subfc 11, 11, 10 + subfe 12, 7, 9 + std 11, 96(3) + std 12, 104(3) + + blr +.size p384_felem_diff_128_64,.-p384_felem_diff_128_64 + +.data +.align 4 +.LConst_128_two64: + +.long 0xffffff00, 0xfeffffff, 0x00000000, 0x00000000 + +.long 0xffffff00, 0xfffffffe, 0x00000000, 0x00000000 + +.long 0xffffff00, 0xffffffff, 0x00000000, 0x00000000 + +.long 0xffff0000, 0x0000ffff, 0x00000001, 0x00000000 + +.LConst_two60: + +.long 0xfffffff0, 0x0fefffff, 0x0, 0x0 + +.long 0xfffff000, 0x10000fff, 0x0, 0x0 + +.long 0xeffffff0, 0x0fffffff, 0x0, 0x0 + +.long 0xfffffff0, 0x0fffffff, 0x0, 0x0 + +.abiversion 2 +.text + + + +.globl p384_felem_diff64 +.type p384_felem_diff64,@function +.type p384_felem_diff64,@function +.align 4 +p384_felem_diff64: +.localentry p384_felem_diff64,0 + + addis 5, 2, .LConst_two60@toc@ha + addi 5, 5, .LConst_two60@toc@l + + ld 9, 0(3) + ld 8, 16(5) + li 7, 0 + add 9, 9, 8 + ld 11, 0(4) + subf 8, 11, 9 + std 8, 0(3) + + ld 9, 8(3) + ld 8, 0(5) + add 9, 9, 8 + ld 11, 8(4) + subf 11, 11, 9 + std 11, 8(3) + + ld 9, 16(3) + ld 8, 32(5) + add 9, 9, 8 + ld 11, 16(4) + subf 11, 11, 9 + std 11, 16(3) + + ld 10, 24(3) + ld 9, 48(5) + add 10, 10, 9 + ld 12, 24(4) + subf 12, 12, 10 + std 12, 24(3) + + ld 10, 32(3) + add 10, 10, 9 + ld 11, 32(4) + subf 11, 11, 10 + std 11, 32(3) + + ld 10, 40(3) + add 10, 10, 9 + ld 12, 40(4) + subf 12, 12, 10 + std 12, 40(3) + + ld 10, 48(3) + add 10, 10, 9 + ld 11, 48(4) + subf 11, 11, 10 + std 11, 48(3) + + blr +.size p384_felem_diff64,.-p384_felem_diff64 + +.abiversion 2 +.text + + + +.macro SHR o_h o_l in_h in_l nbits + srdi \o_l, \in_l, \nbits + rldimi \o_l, \in_h, 64-\nbits, 0 + srdi \o_h, \in_h, \nbits +.endm + + + + +.global p384_felem_reduce +.type p384_felem_reduce,@function +.align 4 +p384_felem_reduce: +.localentry p384_felem_reduce,0 + + + stdu 1, -208(1) + mflr 0 + std 14, 56(1) + std 15, 64(1) + std 16, 72(1) + std 17, 80(1) + std 18, 88(1) + std 19, 96(1) + std 20, 104(1) + std 21, 112(1) + std 22, 120(1) + std 23, 128(1) + std 24, 136(1) + std 25, 144(1) + std 26, 152(1) + std 27, 160(1) + std 28, 168(1) + std 29, 176(1) + std 30, 184(1) + std 31, 192(1) + + bl _p384_felem_reduce_core + + mtlr 0 + ld 14, 56(1) + ld 15, 64(1) + ld 16, 72(1) + ld 17, 80(1) + ld 18, 88(1) + ld 19, 96(1) + ld 20, 104(1) + ld 21, 112(1) + ld 22, 120(1) + ld 23, 128(1) + ld 24, 136(1) + ld 25, 144(1) + ld 26, 152(1) + ld 27, 160(1) + ld 28, 168(1) + ld 29, 176(1) + ld 30, 184(1) + ld 31, 192(1) + addi 1, 1, 208 + blr +.size p384_felem_reduce,.-p384_felem_reduce + + + + + +.type _p384_felem_reduce_core,@function +.align 4 +_p384_felem_reduce_core: +.localentry _p384_felem_reduce_core,0 + + addis 12, 2, .LConst@toc@ha + addi 12, 12, .LConst@toc@l + + + ld 11, 8(12) + + + ld 26, 96(4) + ld 27, 96+8(4) + add 27, 27, 11 + + + ld 24, 80(4) + ld 25, 80+8(4) + add 25, 25, 11 + + + ld 22, 64(4) + ld 23, 64+8(4) + add 23, 23, 11 + + + ld 20, 48(4) + ld 21, 48+8(4) + add 21, 21, 11 + + ld 11, 48+8(12) + + + ld 18, 32(4) + ld 19, 32+8(4) + add 19, 19, 11 + + ld 11, 16+8(12) + + + ld 16, 16(4) + ld 17, 16+8(4) + add 17, 17, 11 + + ld 11, 32+8(12) + + + ld 14, 0(4) + ld 15, 0+8(4) + add 15, 15, 11 + + + li 7, -1 + + + + + ld 5, 192(4) + ld 6, 192+8(4) + SHR 9, 10, 6, 5, 32 + ld 30, 128(4) + ld 31, 136(4) + addc 30, 30, 10 + adde 31, 31, 9 + + + srdi 11, 7, 32 + and 11, 11, 5 + sldi 11, 11, 24 + ld 28, 112(4) + ld 29, 120(4) + addc 28, 28, 11 + addze 29, 29 + + + SHR 9, 10, 6, 5, 8 + addc 28, 28, 10 + adde 29, 29, 9 + + + andi. 11, 5, 0xff + sldi 11, 11, 48 + addc 26, 26, 11 + addze 27, 27 + + + SHR 9, 10, 6, 5, 16 + subfc 26, 10, 26 + subfe 27, 9, 27 + + + srdi 11, 7, 48 + and 11, 11, 5 + sldi 11, 11, 40 + li 9, 0 + subfc 24, 11, 24 + subfe 25, 9, 25 + + + SHR 9, 10, 6, 5, 48 + addc 26, 26, 10 + adde 27, 27, 9 + + + srdi 11, 7, 16 + and 11, 11, 5 + sldi 11, 11, 8 + addc 24, 24, 11 + addze 25, 25 + + + + + ld 5, 176(4) + ld 6, 176+8(4) + SHR 9, 10, 6, 5, 32 + addc 28, 28, 10 + adde 29, 29, 9 + + + srdi 11, 7, 32 + and 11, 11, 5 + sldi 11, 11, 24 + addc 26, 26, 11 + addze 27, 27 + + + SHR 9, 10, 6, 5, 8 + addc 26, 26, 10 + adde 27, 27, 9 + + + andi. 11, 5, 0xff + sldi 11, 11, 48 + addc 24, 24, 11 + addze 25, 25 + + + SHR 9, 10, 6, 5, 16 + subfc 24, 10, 24 + subfe 25, 9, 25 + + + srdi 11, 7, 48 + and 11, 11, 5 + sldi 11, 11, 40 + li 9, 0 + subfc 22, 11, 22 + subfe 23, 9, 23 + + + SHR 9, 10, 6, 5, 48 + addc 24, 24, 10 + adde 25, 25, 9 + + + srdi 11, 7, 16 + and 11, 11, 5 + sldi 11, 11, 8 + addc 22, 22, 11 + addze 23, 23 + + + + + ld 5, 160(4) + ld 6, 160+8(4) + SHR 9, 10, 6, 5, 32 + addc 26, 26, 10 + adde 27, 27, 9 + + + srdi 11, 7, 32 + and 11, 11, 5 + sldi 11, 11, 24 + addc 24, 24, 11 + addze 25, 25 + + + SHR 9, 10, 6, 5, 8 + addc 24, 24, 10 + adde 25, 25, 9 + + + andi. 11, 5, 0xff + sldi 11, 11, 48 + addc 22, 22, 11 + addze 23, 23 + + + SHR 9, 10, 6, 5, 16 + subfc 22, 10, 22 + subfe 23, 9, 23 + + + srdi 11, 7, 48 + and 11, 11, 5 + sldi 11, 11, 40 + li 9, 0 + subfc 20, 11, 20 + subfe 21, 9, 21 + + + SHR 9, 10, 6, 5, 48 + addc 22, 22, 10 + adde 23, 23, 9 + + + srdi 11, 7, 16 + and 11, 11, 5 + sldi 11, 11, 8 + addc 20, 20, 11 + addze 21, 21 + + + + + ld 5, 144(4) + ld 6, 144+8(4) + SHR 9, 10, 6, 5, 32 + addc 24, 24, 10 + adde 25, 25, 9 + + + srdi 11, 7, 32 + and 11, 11, 5 + sldi 11, 11, 24 + addc 22, 22, 11 + addze 23, 23 + + + SHR 9, 10, 6, 5, 8 + addc 22, 22, 10 + adde 23, 23, 9 + + + andi. 11, 5, 0xff + sldi 11, 11, 48 + addc 20, 20, 11 + addze 21, 21 + + + SHR 9, 10, 6, 5, 16 + subfc 20, 10, 20 + subfe 21, 9, 21 + + + srdi 11, 7, 48 + and 11, 11, 5 + sldi 11, 11, 40 + li 9, 0 + subfc 18, 11, 18 + subfe 19, 9, 19 + + + SHR 9, 10, 6, 5, 48 + addc 20, 20, 10 + adde 21, 21, 9 + + + srdi 11, 7, 16 + and 11, 11, 5 + sldi 11, 11, 8 + addc 18, 18, 11 + addze 19, 19 + + + + + mr 5, 30 + mr 6, 31 + SHR 9, 10, 6, 5, 32 + addc 22, 22, 10 + adde 23, 23, 9 + + + srdi 11, 7, 32 + and 11, 11, 5 + sldi 11, 11, 24 + addc 20, 20, 11 + addze 21, 21 + + + SHR 9, 10, 6, 5, 8 + addc 20, 20, 10 + adde 21, 21, 9 + + + andi. 11, 5, 0xff + sldi 11, 11, 48 + addc 18, 18, 11 + addze 19, 19 + + + SHR 9, 10, 6, 5, 16 + subfc 18, 10, 18 + subfe 19, 9, 19 + + + srdi 11, 7, 48 + and 11, 11, 5 + sldi 11, 11, 40 + li 9, 0 + subfc 16, 11, 16 + subfe 17, 9, 17 + + + SHR 9, 10, 6, 5, 48 + addc 18, 18, 10 + adde 19, 19, 9 + + + srdi 11, 7, 16 + and 11, 11, 5 + sldi 11, 11, 8 + addc 16, 16, 11 + addze 17, 17 + + + + + mr 5, 28 + mr 6, 29 + SHR 9, 10, 6, 5, 32 + addc 20, 20, 10 + adde 21, 21, 9 + + + srdi 11, 7, 32 + and 11, 11, 5 + sldi 11, 11, 24 + addc 18, 18, 11 + addze 19, 19 + + + SHR 9, 10, 6, 5, 8 + addc 18, 18, 10 + adde 19, 19, 9 + + + andi. 11, 5, 0xff + sldi 11, 11, 48 + addc 16, 16, 11 + addze 17, 17 + + + SHR 9, 10, 6, 5, 16 + subfc 16, 10, 16 + subfe 17, 9, 17 + + + srdi 11, 7, 48 + and 11, 11, 5 + sldi 11, 11, 40 + li 9, 0 + subfc 14, 11, 14 + subfe 15, 9, 15 + + + SHR 9, 10, 6, 5, 48 + addc 16, 16, 10 + adde 17, 17, 9 + + + srdi 11, 7, 16 + and 11, 11, 5 + sldi 11, 11, 8 + addc 14, 14, 11 + addze 15, 15 + + + + + + + SHR 9, 10, 23, 22, 56 + addc 24, 24, 10 + adde 25, 25, 9 + srdi 11, 7, 8 + and 22, 22, 11 + li 23, 0 + + + + SHR 9, 10, 25, 24, 56 + addc 26, 26, 10 + adde 27, 27, 9 + and 24, 24, 11 + li 25, 0 + + + + + SHR 31, 30, 27, 26, 48 + srdi 11, 7, 16 + and 26, 26, 11 + li 27, 0 + + + + SHR 9, 10, 31, 30, 40 + addc 20, 20, 10 + adde 21, 21, 9 + + + srdi 11, 7, 24 + and 10, 30, 11 + sldi 10, 10, 16 + addc 18, 18, 10 + addze 19, 19 + + + SHR 9, 10, 31, 30, 16 + addc 18, 18, 10 + adde 19, 19, 9 + + + srdi 11, 7, 48 + and 10, 30, 11 + sldi 10, 10, 40 + addc 16, 16, 10 + addze 17, 17 + + + SHR 9, 10, 31, 30, 24 + subfc 16, 10, 16 + subfe 17, 9, 17 + + + srdi 11, 7, 40 + and 10, 30, 11 + sldi 10, 10, 32 + li 9, 0 + subfc 14, 10, 14 + subfe 15, 9, 15 + + + addc 14, 14, 30 + adde 15, 15, 31 + + + + + SHR 9, 10, 15, 14, 56 + addc 16, 16, 10 + adde 17, 17, 9 + + + srdi 11, 7, 8 + and 14, 14, 11 + li 15, 0 + + + SHR 9, 10, 17, 16, 56 + addc 18, 18, 10 + adde 19, 19, 9 + + + and 16, 16, 11 + li 17, 0 + + + SHR 9, 10, 19, 18, 56 + addc 20, 20, 10 + adde 21, 21, 9 + + + and 18, 18, 11 + li 19, 0 + + + SHR 9, 10, 21, 20, 56 + addc 22, 22, 10 + adde 23, 23, 9 + + + and 20, 20, 11 + li 21, 0 + + + SHR 9, 10, 23, 22, 56 + addc 24, 24, 10 + adde 25, 25, 9 + + + and 22, 22, 11 + + + SHR 9, 10, 25, 24, 56 + addc 26, 26, 10 + adde 27, 27, 9 + + + and 24, 24, 11 + + std 14, 0(3) + std 16, 8(3) + std 18, 16(3) + std 20, 24(3) + std 22, 32(3) + std 24, 40(3) + std 26, 48(3) + blr +.size _p384_felem_reduce_core,.-_p384_felem_reduce_core + +.data +.align 4 +.LConst: + +.long 0x0, 0x0, 0xfffffff0, 0xfffffff + +.long 0x0, 0x0, 0xfffffff0, 0xfefffff + +.long 0x0, 0x0, 0xfffff000, 0x10000fff + +.long 0x0, 0x0, 0xeffffff0, 0xfffffff + +.abiversion 2 +.text + + + + +.global p384_felem_square_reduce +.type p384_felem_square_reduce,@function +.align 4 +p384_felem_square_reduce: +.localentry p384_felem_square_reduce,0 + + stdu 1, -512(1) + mflr 0 + std 14, 56(1) + std 15, 64(1) + std 16, 72(1) + std 17, 80(1) + std 18, 88(1) + std 19, 96(1) + std 20, 104(1) + std 21, 112(1) + std 22, 120(1) + std 23, 128(1) + std 24, 136(1) + std 25, 144(1) + std 26, 152(1) + std 27, 160(1) + std 28, 168(1) + std 29, 176(1) + std 30, 184(1) + std 31, 192(1) + + std 3, 496(1) + addi 3, 1, 208 + bl _p384_felem_square_core + + mr 4, 3 + ld 3, 496(1) + bl _p384_felem_reduce_core + + ld 14, 56(1) + ld 15, 64(1) + ld 16, 72(1) + ld 17, 80(1) + ld 18, 88(1) + ld 19, 96(1) + ld 20, 104(1) + ld 21, 112(1) + ld 22, 120(1) + ld 23, 128(1) + ld 24, 136(1) + ld 25, 144(1) + ld 26, 152(1) + ld 27, 160(1) + ld 28, 168(1) + ld 29, 176(1) + ld 30, 184(1) + ld 31, 192(1) + addi 1, 1, 512 + mtlr 0 + blr +.size p384_felem_square_reduce,.-p384_felem_square_reduce + + + + +.global p384_felem_mul_reduce +.type p384_felem_mul_reduce,@function +.align 5 +p384_felem_mul_reduce: +.localentry p384_felem_mul_reduce,0 + + stdu 1, -512(1) + mflr 0 + std 14, 56(1) + std 15, 64(1) + std 16, 72(1) + std 17, 80(1) + std 18, 88(1) + std 19, 96(1) + std 20, 104(1) + std 21, 112(1) + std 22, 120(1) + std 23, 128(1) + std 24, 136(1) + std 25, 144(1) + std 26, 152(1) + std 27, 160(1) + std 28, 168(1) + std 29, 176(1) + std 30, 184(1) + std 31, 192(1) + + std 3, 496(1) + addi 3, 1, 208 + bl _p384_felem_mul_core + + mr 4, 3 + ld 3, 496(1) + bl _p384_felem_reduce_core + + ld 14, 56(1) + ld 15, 64(1) + ld 16, 72(1) + ld 17, 80(1) + ld 18, 88(1) + ld 19, 96(1) + ld 20, 104(1) + ld 21, 112(1) + ld 22, 120(1) + ld 23, 128(1) + ld 24, 136(1) + ld 25, 144(1) + ld 26, 152(1) + ld 27, 160(1) + ld 28, 168(1) + ld 29, 176(1) + ld 30, 184(1) + ld 31, 192(1) + addi 1, 1, 512 + mtlr 0 + blr +.size p384_felem_mul_reduce,.-p384_felem_mul_reduce diff --git a/sys/crypto/openssl/powerpc64/keccak1600-ppc64.S b/sys/crypto/openssl/powerpc64/keccak1600-ppc64.S index 251f59855f5d..236f9147f85c 100644 --- a/sys/crypto/openssl/powerpc64/keccak1600-ppc64.S +++ b/sys/crypto/openssl/powerpc64/keccak1600-ppc64.S @@ -298,7 +298,6 @@ KeccakF1600: .byte 0,12,4,1,0x80,18,1,0 .long 0 .size KeccakF1600,.-KeccakF1600 - .type dword_le_load,@function .align 5 dword_le_load: @@ -324,7 +323,6 @@ dword_le_load: .byte 0,12,0x14,0,0,0,1,0 .long 0 .size dword_le_load,.-dword_le_load - .globl SHA3_absorb .type SHA3_absorb,@function .type SHA3_absorb,@function @@ -571,6 +569,8 @@ SHA3_squeeze: subi 29,4,1 mr 30,5 mr 31,6 + cmplwi 7,0 + bne .Lnext_block b .Loop_squeeze .align 4 @@ -601,6 +601,7 @@ SHA3_squeeze: subic. 6,6,8 bgt .Loop_squeeze +.Lnext_block: mr 3,28 bl KeccakF1600 subi 3,28,8 diff --git a/sys/crypto/openssl/powerpc64/ppccpuid.S b/sys/crypto/openssl/powerpc64/ppccpuid.S index d352a41008b3..7bc3166145cb 100644 --- a/sys/crypto/openssl/powerpc64/ppccpuid.S +++ b/sys/crypto/openssl/powerpc64/ppccpuid.S @@ -65,6 +65,20 @@ OPENSSL_madd300_probe: .long 0 .byte 0,12,0x14,0,0,0,0,0 +.globl OPENSSL_brd31_probe +.type OPENSSL_brd31_probe,@function +.align 4 +OPENSSL_brd31_probe: +.localentry OPENSSL_brd31_probe,0 + + xor 0,0,0 + .long 0x7C030176 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.size OPENSSL_brd31_probe,.-OPENSSL_brd31_probe + + .globl OPENSSL_wipe_cpu .type OPENSSL_wipe_cpu,@function .align 4 |