diff options
author | Jan Beich <jbeich@FreeBSD.org> | 2017-11-14 21:56:56 +0000 |
---|---|---|
committer | Jan Beich <jbeich@FreeBSD.org> | 2017-11-14 21:56:56 +0000 |
commit | 02c0d6d1f3851f457ff023430b5698db30230e72 (patch) | |
tree | d4a9cd923ad54f952b76ac70c45b6e4ffa37ddee /security/nss/files | |
parent | b638c463455894bbd94a2222048929deb96ee43f (diff) | |
download | ports-02c0d6d1f3851f457ff023430b5698db30230e72.tar.gz ports-02c0d6d1f3851f457ff023430b5698db30230e72.zip |
Notes
Diffstat (limited to 'security/nss/files')
-rw-r--r-- | security/nss/files/patch-bug1400603 | 942 |
1 files changed, 0 insertions, 942 deletions
diff --git a/security/nss/files/patch-bug1400603 b/security/nss/files/patch-bug1400603 deleted file mode 100644 index 321f50dfb9a9..000000000000 --- a/security/nss/files/patch-bug1400603 +++ /dev/null @@ -1,942 +0,0 @@ -commit e84403331d99 -Author: Daiki Ueno <dueno@redhat.com> -Date: Fri Sep 22 11:27:34 2017 +0200 - - Bug 1400603 - freebl: Reorganize AES-GCM source code based on hw/sw implementation, r=franziskus - - Reviewers: franziskus - - Reviewed By: franziskus - - Bug #: 1400603 - - Differential Revision: https://phabricator.services.mozilla.com/D65 ---- - lib/freebl/Makefile | 4 +- - lib/freebl/aes-x86.c | 157 +++++++++++++++++++++++++++++++++++++++++ - lib/freebl/freebl.gyp | 65 ++++++++++------- - lib/freebl/gcm-x86.c | 127 ++++++++++++++++++++++++++++++++++ - lib/freebl/gcm.c | 162 +++++++++++++------------------------------ - lib/freebl/gcm.h | 14 ++++ - lib/freebl/rijndael.c | 188 ++++++++------------------------------------------ - lib/freebl/rijndael.h | 18 ++++- - 8 files changed, 436 insertions(+), 299 deletions(-) - -diff --git lib/freebl/Makefile lib/freebl/Makefile -index d50e18696b..bc1ea86a5e 100644 ---- lib/freebl/Makefile -+++ lib/freebl/Makefile -@@ -110,7 +110,9 @@ endif - # NSS_X86_OR_X64 means the target is either x86 or x64 - ifeq (,$(filter-out i386 x386 x86 x86_64,$(CPU_ARCH))) - DEFINES += -DNSS_X86_OR_X64 -- CFLAGS += -mpclmul -maes -+ EXTRA_SRCS += gcm-x86.c aes-x86.c -+$(OBJDIR)/gcm-x86.o: CFLAGS += -mpclmul -maes -+$(OBJDIR)/aes-x86.o: CFLAGS += -mpclmul -maes - ifneq (,$(USE_64)$(USE_X32)) - DEFINES += -DNSS_X64 - else -diff --git lib/freebl/aes-x86.c lib/freebl/aes-x86.c -new file mode 100644 -index 0000000000..830b4782fe ---- /dev/null -+++ lib/freebl/aes-x86.c -@@ -0,0 +1,157 @@ -+/* This Source Code Form is subject to the terms of the Mozilla Public -+ * License, v. 2.0. If a copy of the MPL was not distributed with this -+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -+ -+#ifdef FREEBL_NO_DEPEND -+#include "stubs.h" -+#endif -+#include "rijndael.h" -+#include "secerr.h" -+ -+#include <wmmintrin.h> /* aes-ni */ -+ -+#define EXPAND_KEY128(k, rcon, res) \ -+ tmp_key = _mm_aeskeygenassist_si128(k, rcon); \ -+ tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF); \ -+ tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4)); \ -+ tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ -+ tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ -+ res = _mm_xor_si128(tmp, tmp_key) -+ -+static void -+native_key_expansion128(AESContext *cx, const unsigned char *key) -+{ -+ __m128i *keySchedule = cx->keySchedule; -+ pre_align __m128i tmp_key post_align; -+ pre_align __m128i tmp post_align; -+ keySchedule[0] = _mm_loadu_si128((__m128i *)key); -+ EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]); -+ EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]); -+ EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]); -+ EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]); -+ EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]); -+ EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]); -+ EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]); -+ EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]); -+ EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]); -+ EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]); -+} -+ -+#define EXPAND_KEY192_PART1(res, k0, kt, rcon) \ -+ tmp2 = _mm_slli_si128(k0, 4); \ -+ tmp1 = _mm_xor_si128(k0, tmp2); \ -+ tmp2 = _mm_slli_si128(tmp2, 4); \ -+ tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ -+ tmp2 = _mm_aeskeygenassist_si128(kt, rcon); \ -+ res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55)) -+ -+#define EXPAND_KEY192_PART2(res, k1, k2) \ -+ tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \ -+ res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF)) -+ -+#define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2) \ -+ EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1); \ -+ EXPAND_KEY192_PART2(carry, res1, tmp3); \ -+ res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1), \ -+ _mm_castsi128_pd(tmp3), 0)); \ -+ res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3), \ -+ _mm_castsi128_pd(carry), 1)); \ -+ EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2) -+ -+static void -+native_key_expansion192(AESContext *cx, const unsigned char *key) -+{ -+ __m128i *keySchedule = cx->keySchedule; -+ pre_align __m128i tmp1 post_align; -+ pre_align __m128i tmp2 post_align; -+ pre_align __m128i tmp3 post_align; -+ pre_align __m128i carry post_align; -+ keySchedule[0] = _mm_loadu_si128((__m128i *)key); -+ keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); -+ EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2], -+ keySchedule[3], carry, 0x1, 0x2); -+ EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]); -+ EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5], -+ keySchedule[6], carry, 0x4, 0x8); -+ EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]); -+ EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8], -+ keySchedule[9], carry, 0x10, 0x20); -+ EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]); -+ EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11], -+ keySchedule[12], carry, 0x40, 0x80); -+} -+ -+#define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X) \ -+ tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X); \ -+ tmp2 = _mm_slli_si128(k1x, 4); \ -+ tmp1 = _mm_xor_si128(k1x, tmp2); \ -+ tmp2 = _mm_slli_si128(tmp2, 4); \ -+ tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ -+ res = _mm_xor_si128(tmp1, tmp_key); -+ -+#define EXPAND_KEY256(res1, res2, k1, k2, rcon) \ -+ EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \ -+ EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA) -+ -+static void -+native_key_expansion256(AESContext *cx, const unsigned char *key) -+{ -+ __m128i *keySchedule = cx->keySchedule; -+ pre_align __m128i tmp_key post_align; -+ pre_align __m128i tmp1 post_align; -+ pre_align __m128i tmp2 post_align; -+ keySchedule[0] = _mm_loadu_si128((__m128i *)key); -+ keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); -+ EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0], -+ keySchedule[1], 0x01); -+ EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2], -+ keySchedule[3], 0x02); -+ EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4], -+ keySchedule[5], 0x04); -+ EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6], -+ keySchedule[7], 0x08); -+ EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8], -+ keySchedule[9], 0x10); -+ EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10], -+ keySchedule[11], 0x20); -+ EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12], -+ keySchedule[13], 0xFF); -+} -+ -+/* -+ * AES key expansion using aes-ni instructions. -+ */ -+void -+rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, -+ unsigned int Nk) -+{ -+ switch (Nk) { -+ case 4: -+ native_key_expansion128(cx, key); -+ return; -+ case 6: -+ native_key_expansion192(cx, key); -+ return; -+ case 8: -+ native_key_expansion256(cx, key); -+ return; -+ default: -+ /* This shouldn't happen (checked by the caller). */ -+ return; -+ } -+} -+ -+void -+rijndael_native_encryptBlock(AESContext *cx, -+ unsigned char *output, -+ const unsigned char *input) -+{ -+ int i; -+ pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); -+ m = _mm_xor_si128(m, cx->keySchedule[0]); -+ for (i = 1; i < cx->Nr; ++i) { -+ m = _mm_aesenc_si128(m, cx->keySchedule[i]); -+ } -+ m = _mm_aesenclast_si128(m, cx->keySchedule[cx->Nr]); -+ _mm_storeu_si128((__m128i *)output, m); -+} -diff --git lib/freebl/freebl.gyp lib/freebl/freebl.gyp -index 1e93475004..5f59eef29c 100644 ---- lib/freebl/freebl.gyp -+++ lib/freebl/freebl.gyp -@@ -22,6 +22,37 @@ - '-mssse3' - ] - }, -+ { -+ 'target_name': 'gcm-aes-x86_c_lib', -+ 'type': 'static_library', -+ 'sources': [ -+ 'gcm-x86.c', 'aes-x86.c' -+ ], -+ 'dependencies': [ -+ '<(DEPTH)/exports.gyp:nss_exports' -+ ], -+ # Enable isa option for pclmul and aes-ni; supported since gcc 4.4. -+ # This is only supported by x84/x64. It's not needed for Windows, -+ # unless clang-cl is used. -+ 'cflags_mozilla': [ -+ '-mpclmul', '-maes' -+ ], -+ 'conditions': [ -+ [ 'OS=="linux" or OS=="android" or OS=="dragonfly" or OS=="freebsd" or OS=="netbsd" or OS=="openbsd"', { -+ 'cflags': [ -+ '-mpclmul', '-maes' -+ ], -+ }], -+ # macOS build doesn't use cflags. -+ [ 'OS=="mac"', { -+ 'xcode_settings': { -+ 'OTHER_CFLAGS': [ -+ '-mpclmul', '-maes' -+ ], -+ }, -+ }] -+ ] -+ }, - { - 'target_name': 'freebl', - 'type': 'static_library', -@@ -45,6 +76,11 @@ - '<(DEPTH)/exports.gyp:nss_exports', - ], - 'conditions': [ -+ [ 'target_arch=="ia32" or target_arch=="x64"', { -+ 'dependencies': [ -+ 'gcm-aes-x86_c_lib' -+ ], -+ }], - [ 'OS=="linux"', { - 'defines!': [ - 'FREEBL_NO_DEPEND', -@@ -76,6 +112,11 @@ - '<(DEPTH)/exports.gyp:nss_exports', - ], - 'conditions': [ -+ [ 'target_arch=="ia32" or target_arch=="x64"', { -+ 'dependencies': [ -+ 'gcm-aes-x86_c_lib' -+ ] -+ }], - [ 'OS!="linux" and OS!="android"', { - 'conditions': [ - [ 'moz_fold_libs==0', { -@@ -154,27 +195,11 @@ - 'MP_API_COMPATIBLE' - ], - 'conditions': [ -- [ 'target_arch=="ia32" or target_arch=="x64"', { -- 'cflags_mozilla': [ -- '-mpclmul', -- '-maes', -- ], -- 'conditions': [ -- [ 'OS=="dragonfly" or OS=="freebsd" or OS=="netbsd" or OS=="openbsd"', { -- 'cflags': [ -- '-mpclmul', -- '-maes', -- ], -- }], -- ], -- }], - [ 'OS=="mac"', { - 'xcode_settings': { - # I'm not sure since when this is supported. - # But I hope that doesn't matter. We also assume this is x86/x64. - 'OTHER_CFLAGS': [ -- '-mpclmul', -- '-maes', - '-std=gnu99', - ], - }, -@@ -268,14 +293,6 @@ - 'MP_USE_UINT_DIGIT', - ], - }], -- [ 'target_arch=="ia32" or target_arch=="x64"', { -- 'cflags': [ -- # enable isa option for pclmul am aes-ni; supported since gcc 4.4 -- # This is only support by x84/x64. It's not needed for Windows. -- '-mpclmul', -- '-maes', -- ], -- }], - [ 'target_arch=="arm"', { - 'defines': [ - 'MP_ASSEMBLY_MULTIPLY', -diff --git lib/freebl/gcm-x86.c lib/freebl/gcm-x86.c -new file mode 100644 -index 0000000000..e34d633943 ---- /dev/null -+++ lib/freebl/gcm-x86.c -@@ -0,0 +1,127 @@ -+/* This Source Code Form is subject to the terms of the Mozilla Public -+ * License, v. 2.0. If a copy of the MPL was not distributed with this -+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -+ -+#ifdef FREEBL_NO_DEPEND -+#include "stubs.h" -+#endif -+#include "gcm.h" -+#include "secerr.h" -+ -+#include <wmmintrin.h> /* clmul */ -+ -+#define WRITE64(x, bytes) \ -+ (bytes)[0] = (x) >> 56; \ -+ (bytes)[1] = (x) >> 48; \ -+ (bytes)[2] = (x) >> 40; \ -+ (bytes)[3] = (x) >> 32; \ -+ (bytes)[4] = (x) >> 24; \ -+ (bytes)[5] = (x) >> 16; \ -+ (bytes)[6] = (x) >> 8; \ -+ (bytes)[7] = (x); -+ -+SECStatus -+gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf) -+{ -+ uint64_t tmp_out[2]; -+ _mm_storeu_si128((__m128i *)tmp_out, ghash->x); -+ /* maxout must be larger than 16 byte (checked by the caller). */ -+ WRITE64(tmp_out[0], outbuf + 8); -+ WRITE64(tmp_out[1], outbuf); -+ return SECSuccess; -+} -+ -+SECStatus -+gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, -+ unsigned int count) -+{ -+ size_t i; -+ pre_align __m128i z_high post_align; -+ pre_align __m128i z_low post_align; -+ pre_align __m128i C post_align; -+ pre_align __m128i D post_align; -+ pre_align __m128i E post_align; -+ pre_align __m128i F post_align; -+ pre_align __m128i bin post_align; -+ pre_align __m128i Ci post_align; -+ pre_align __m128i tmp post_align; -+ -+ for (i = 0; i < count; i++, buf += 16) { -+ bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1], -+ ((uint16_t)buf[2] << 8) | buf[3], -+ ((uint16_t)buf[4] << 8) | buf[5], -+ ((uint16_t)buf[6] << 8) | buf[7], -+ ((uint16_t)buf[8] << 8) | buf[9], -+ ((uint16_t)buf[10] << 8) | buf[11], -+ ((uint16_t)buf[12] << 8) | buf[13], -+ ((uint16_t)buf[14] << 8) | buf[15]); -+ Ci = _mm_xor_si128(bin, ghash->x); -+ -+ /* Do binary mult ghash->X = Ci * ghash->H. */ -+ C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00); -+ D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11); -+ E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01); -+ F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10); -+ tmp = _mm_xor_si128(E, F); -+ z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8)); -+ z_high = _mm_unpackhi_epi64(z_high, D); -+ z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C); -+ z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low); -+ -+ /* Shift one to the left (multiply by x) as gcm spec is stupid. */ -+ C = _mm_slli_si128(z_low, 8); -+ E = _mm_srli_epi64(C, 63); -+ D = _mm_slli_si128(z_high, 8); -+ F = _mm_srli_epi64(D, 63); -+ /* Carry over */ -+ C = _mm_srli_si128(z_low, 8); -+ D = _mm_srli_epi64(C, 63); -+ z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E); -+ z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D); -+ -+ /* Reduce */ -+ C = _mm_slli_si128(z_low, 8); -+ /* D = z_low << 127 */ -+ D = _mm_slli_epi64(C, 63); -+ /* E = z_low << 126 */ -+ E = _mm_slli_epi64(C, 62); -+ /* F = z_low << 121 */ -+ F = _mm_slli_epi64(C, 57); -+ /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */ -+ z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F); -+ C = _mm_srli_si128(z_low, 8); -+ /* D = z_low >> 1 */ -+ D = _mm_slli_epi64(C, 63); -+ D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D); -+ /* E = z_low >> 2 */ -+ E = _mm_slli_epi64(C, 62); -+ E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E); -+ /* F = z_low >> 7 */ -+ F = _mm_slli_epi64(C, 57); -+ F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F); -+ /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */ -+ ghash->x = _mm_xor_si128(_mm_xor_si128( -+ _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E), -+ F); -+ } -+ return SECSuccess; -+} -+ -+SECStatus -+gcm_HashInit_hw(gcmHashContext *ghash) -+{ -+ ghash->ghash_mul = gcm_HashMult_hw; -+ ghash->x = _mm_setzero_si128(); -+ /* MSVC requires __m64 to load epi64. */ -+ ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high, -+ ghash->h_low >> 32, (uint32_t)ghash->h_low); -+ ghash->hw = PR_TRUE; -+ return SECSuccess; -+} -+ -+SECStatus -+gcm_HashZeroX_hw(gcmHashContext *ghash) -+{ -+ ghash->x = _mm_setzero_si128(); -+ return SECSuccess; -+} -diff --git lib/freebl/gcm.c lib/freebl/gcm.c -index 780b7a6322..f1e16da78e 100644 ---- lib/freebl/gcm.c -+++ lib/freebl/gcm.c -@@ -17,18 +17,50 @@ - - #include <limits.h> - --#ifdef NSS_X86_OR_X64 --#include <wmmintrin.h> /* clmul */ --#endif -- - /* Forward declarations */ -+SECStatus gcm_HashInit_hw(gcmHashContext *ghash); -+SECStatus gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf); - SECStatus gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, - unsigned int count); -+SECStatus gcm_HashZeroX_hw(gcmHashContext *ghash); - SECStatus gcm_HashMult_sftw(gcmHashContext *ghash, const unsigned char *buf, - unsigned int count); - SECStatus gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf, - unsigned int count); - -+/* Stub definitions for the above *_hw functions, which shouldn't be -+ * used unless NSS_X86_OR_X64 is defined */ -+#ifndef NSS_X86_OR_X64 -+SECStatus -+gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf) -+{ -+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); -+ return SECFailure; -+} -+ -+SECStatus -+gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, -+ unsigned int count) -+{ -+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); -+ return SECFailure; -+} -+ -+SECStatus -+gcm_HashInit_hw(gcmHashContext *ghash) -+{ -+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); -+ return SECFailure; -+} -+ -+SECStatus -+gcm_HashZeroX_hw(gcmHashContext *ghash) -+{ -+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); -+ return SECFailure; -+} -+#endif /* NSS_X86_OR_X64 */ -+ - uint64_t - get64(const unsigned char *bytes) - { -@@ -46,6 +78,8 @@ get64(const unsigned char *bytes) - SECStatus - gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw) - { -+ SECStatus rv = SECSuccess; -+ - ghash->cLen = 0; - ghash->bufLen = 0; - PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf)); -@@ -53,17 +87,7 @@ gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw) - ghash->h_low = get64(H + 8); - ghash->h_high = get64(H); - if (clmul_support() && !sw) { --#ifdef NSS_X86_OR_X64 -- ghash->ghash_mul = gcm_HashMult_hw; -- ghash->x = _mm_setzero_si128(); -- /* MSVC requires __m64 to load epi64. */ -- ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high, -- ghash->h_low >> 32, (uint32_t)ghash->h_low); -- ghash->hw = PR_TRUE; --#else -- PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); -- return SECFailure; --#endif /* NSS_X86_OR_X64 */ -+ rv = gcm_HashInit_hw(ghash); - } else { - /* We fall back to the software implementation if we can't use / don't - * want to use pclmul. */ -@@ -75,7 +99,7 @@ gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw) - ghash->x_high = ghash->x_low = 0; - ghash->hw = PR_FALSE; - } -- return SECSuccess; -+ return rv; - } - - #ifdef HAVE_INT128_SUPPORT -@@ -283,102 +307,17 @@ gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf, - } - #endif /* HAVE_INT128_SUPPORT */ - --SECStatus --gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, -- unsigned int count) --{ --#ifdef NSS_X86_OR_X64 -- size_t i; -- pre_align __m128i z_high post_align; -- pre_align __m128i z_low post_align; -- pre_align __m128i C post_align; -- pre_align __m128i D post_align; -- pre_align __m128i E post_align; -- pre_align __m128i F post_align; -- pre_align __m128i bin post_align; -- pre_align __m128i Ci post_align; -- pre_align __m128i tmp post_align; -- -- for (i = 0; i < count; i++, buf += 16) { -- bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1], -- ((uint16_t)buf[2] << 8) | buf[3], -- ((uint16_t)buf[4] << 8) | buf[5], -- ((uint16_t)buf[6] << 8) | buf[7], -- ((uint16_t)buf[8] << 8) | buf[9], -- ((uint16_t)buf[10] << 8) | buf[11], -- ((uint16_t)buf[12] << 8) | buf[13], -- ((uint16_t)buf[14] << 8) | buf[15]); -- Ci = _mm_xor_si128(bin, ghash->x); -- -- /* Do binary mult ghash->X = Ci * ghash->H. */ -- C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00); -- D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11); -- E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01); -- F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10); -- tmp = _mm_xor_si128(E, F); -- z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8)); -- z_high = _mm_unpackhi_epi64(z_high, D); -- z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C); -- z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low); -- -- /* Shift one to the left (multiply by x) as gcm spec is stupid. */ -- C = _mm_slli_si128(z_low, 8); -- E = _mm_srli_epi64(C, 63); -- D = _mm_slli_si128(z_high, 8); -- F = _mm_srli_epi64(D, 63); -- /* Carry over */ -- C = _mm_srli_si128(z_low, 8); -- D = _mm_srli_epi64(C, 63); -- z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E); -- z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D); -- -- /* Reduce */ -- C = _mm_slli_si128(z_low, 8); -- /* D = z_low << 127 */ -- D = _mm_slli_epi64(C, 63); -- /* E = z_low << 126 */ -- E = _mm_slli_epi64(C, 62); -- /* F = z_low << 121 */ -- F = _mm_slli_epi64(C, 57); -- /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */ -- z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F); -- C = _mm_srli_si128(z_low, 8); -- /* D = z_low >> 1 */ -- D = _mm_slli_epi64(C, 63); -- D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D); -- /* E = z_low >> 2 */ -- E = _mm_slli_epi64(C, 62); -- E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E); -- /* F = z_low >> 7 */ -- F = _mm_slli_epi64(C, 57); -- F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F); -- /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */ -- ghash->x = _mm_xor_si128(_mm_xor_si128( -- _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E), -- F); -- } -- return SECSuccess; --#else -- PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); -- return SECFailure; --#endif /* NSS_X86_OR_X64 */ --} -- - static SECStatus - gcm_zeroX(gcmHashContext *ghash) - { -+ SECStatus rv = SECSuccess; -+ - if (ghash->hw) { --#ifdef NSS_X86_OR_X64 -- ghash->x = _mm_setzero_si128(); -- return SECSuccess; --#else -- PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); -- return SECFailure; --#endif /* NSS_X86_OR_X64 */ -+ rv = gcm_HashZeroX_hw(ghash); - } - - ghash->x_high = ghash->x_low = 0; -- return SECSuccess; -+ return rv; - } - - /* -@@ -503,15 +442,10 @@ gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf, - } - - if (ghash->hw) { --#ifdef NSS_X86_OR_X64 -- uint64_t tmp_out[2]; -- _mm_storeu_si128((__m128i *)tmp_out, ghash->x); -- WRITE64(tmp_out[0], T + 8); -- WRITE64(tmp_out[1], T); --#else -- PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); -- return SECFailure; --#endif /* NSS_X86_OR_X64 */ -+ rv = gcm_HashWrite_hw(ghash, T); -+ if (rv != SECSuccess) { -+ goto cleanup; -+ } - } else { - WRITE64(ghash->x_low, T + 8); - WRITE64(ghash->x_high, T); -diff --git lib/freebl/gcm.h lib/freebl/gcm.h -index 0c707a0811..42ef0f7179 100644 ---- lib/freebl/gcm.h -+++ lib/freebl/gcm.h -@@ -9,7 +9,21 @@ - #include <stdint.h> - - #ifdef NSS_X86_OR_X64 -+/* GCC <= 4.8 doesn't support including emmintrin.h without enabling SSE2 */ -+#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && \ -+ (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) -+#pragma GCC push_options -+#pragma GCC target("sse2") -+#undef NSS_DISABLE_SSE2 -+#define NSS_DISABLE_SSE2 1 -+#endif /* GCC <= 4.8 */ -+ - #include <emmintrin.h> /* __m128i */ -+ -+#ifdef NSS_DISABLE_SSE2 -+#undef NSS_DISABLE_SSE2 -+#pragma GCC pop_options -+#endif /* NSS_DISABLE_SSE2 */ - #endif - - SEC_BEGIN_PROTOS -diff --git lib/freebl/rijndael.c lib/freebl/rijndael.c -index a09f13098e..5de27de9ce 100644 ---- lib/freebl/rijndael.c -+++ lib/freebl/rijndael.c -@@ -27,6 +27,34 @@ - #include "intel-gcm.h" - #endif /* INTEL_GCM */ - -+/* Forward declarations */ -+void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, -+ unsigned int Nk); -+void rijndael_native_encryptBlock(AESContext *cx, -+ unsigned char *output, -+ const unsigned char *input); -+ -+/* Stub definitions for the above rijndael_native_* functions, which -+ * shouldn't be used unless NSS_X86_OR_X64 is defined */ -+#ifndef NSS_X86_OR_X64 -+void -+rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, -+ unsigned int Nk) -+{ -+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); -+ PORT_Assert(0); -+} -+ -+void -+rijndael_native_encryptBlock(AESContext *cx, -+ unsigned char *output, -+ const unsigned char *input) -+{ -+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); -+ PORT_Assert(0); -+} -+#endif /* NSS_X86_OR_X64 */ -+ - /* - * There are currently three ways to build this code, varying in performance - * and code size. -@@ -309,162 +337,6 @@ rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int N - } - } - --#if defined(NSS_X86_OR_X64) --#define EXPAND_KEY128(k, rcon, res) \ -- tmp_key = _mm_aeskeygenassist_si128(k, rcon); \ -- tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF); \ -- tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4)); \ -- tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ -- tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ -- res = _mm_xor_si128(tmp, tmp_key) -- --static void --native_key_expansion128(AESContext *cx, const unsigned char *key) --{ -- __m128i *keySchedule = cx->keySchedule; -- pre_align __m128i tmp_key post_align; -- pre_align __m128i tmp post_align; -- keySchedule[0] = _mm_loadu_si128((__m128i *)key); -- EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]); -- EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]); -- EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]); -- EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]); -- EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]); -- EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]); -- EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]); -- EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]); -- EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]); -- EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]); --} -- --#define EXPAND_KEY192_PART1(res, k0, kt, rcon) \ -- tmp2 = _mm_slli_si128(k0, 4); \ -- tmp1 = _mm_xor_si128(k0, tmp2); \ -- tmp2 = _mm_slli_si128(tmp2, 4); \ -- tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ -- tmp2 = _mm_aeskeygenassist_si128(kt, rcon); \ -- res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55)) -- --#define EXPAND_KEY192_PART2(res, k1, k2) \ -- tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \ -- res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF)) -- --#define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2) \ -- EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1); \ -- EXPAND_KEY192_PART2(carry, res1, tmp3); \ -- res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1), \ -- _mm_castsi128_pd(tmp3), 0)); \ -- res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3), \ -- _mm_castsi128_pd(carry), 1)); \ -- EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2) -- --static void --native_key_expansion192(AESContext *cx, const unsigned char *key) --{ -- __m128i *keySchedule = cx->keySchedule; -- pre_align __m128i tmp1 post_align; -- pre_align __m128i tmp2 post_align; -- pre_align __m128i tmp3 post_align; -- pre_align __m128i carry post_align; -- keySchedule[0] = _mm_loadu_si128((__m128i *)key); -- keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); -- EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2], -- keySchedule[3], carry, 0x1, 0x2); -- EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]); -- EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5], -- keySchedule[6], carry, 0x4, 0x8); -- EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]); -- EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8], -- keySchedule[9], carry, 0x10, 0x20); -- EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]); -- EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11], -- keySchedule[12], carry, 0x40, 0x80); --} -- --#define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X) \ -- tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X); \ -- tmp2 = _mm_slli_si128(k1x, 4); \ -- tmp1 = _mm_xor_si128(k1x, tmp2); \ -- tmp2 = _mm_slli_si128(tmp2, 4); \ -- tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ -- res = _mm_xor_si128(tmp1, tmp_key); -- --#define EXPAND_KEY256(res1, res2, k1, k2, rcon) \ -- EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \ -- EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA) -- --static void --native_key_expansion256(AESContext *cx, const unsigned char *key) --{ -- __m128i *keySchedule = cx->keySchedule; -- pre_align __m128i tmp_key post_align; -- pre_align __m128i tmp1 post_align; -- pre_align __m128i tmp2 post_align; -- keySchedule[0] = _mm_loadu_si128((__m128i *)key); -- keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); -- EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0], -- keySchedule[1], 0x01); -- EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2], -- keySchedule[3], 0x02); -- EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4], -- keySchedule[5], 0x04); -- EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6], -- keySchedule[7], 0x08); -- EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8], -- keySchedule[9], 0x10); -- EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10], -- keySchedule[11], 0x20); -- EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12], -- keySchedule[13], 0xFF); --} -- --#endif /* NSS_X86_OR_X64 */ -- --/* -- * AES key expansion using aes-ni instructions. -- */ --static void --native_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) --{ --#ifdef NSS_X86_OR_X64 -- switch (Nk) { -- case 4: -- native_key_expansion128(cx, key); -- return; -- case 6: -- native_key_expansion192(cx, key); -- return; -- case 8: -- native_key_expansion256(cx, key); -- return; -- default: -- /* This shouldn't happen. */ -- PORT_Assert(0); -- } --#else -- PORT_Assert(0); --#endif /* NSS_X86_OR_X64 */ --} -- --static void --native_encryptBlock(AESContext *cx, -- unsigned char *output, -- const unsigned char *input) --{ --#ifdef NSS_X86_OR_X64 -- int i; -- pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); -- m = _mm_xor_si128(m, cx->keySchedule[0]); -- for (i = 1; i < cx->Nr; ++i) { -- m = _mm_aesenc_si128(m, cx->keySchedule[i]); -- } -- m = _mm_aesenclast_si128(m, cx->keySchedule[cx->Nr]); -- _mm_storeu_si128((__m128i *)output, m); --#else -- PORT_Assert(0); --#endif /* NSS_X86_OR_X64 */ --} -- - /* rijndael_key_expansion - * - * Generate the expanded key from the key input by the user. -@@ -830,7 +702,7 @@ rijndael_encryptECB(AESContext *cx, unsigned char *output, - - if (aesni_support()) { - /* Use hardware acceleration for normal AES parameters. */ -- encryptor = &native_encryptBlock; -+ encryptor = &rijndael_native_encryptBlock; - } else { - encryptor = &rijndael_encryptBlock128; - } -@@ -1026,7 +898,7 @@ aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, - cx->mode == NSS_AES_CTR)) { - PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32); - /* Prepare hardware key for normal AES parameters. */ -- native_key_expansion(cx, key, Nk); -+ rijndael_native_key_expansion(cx, key, Nk); - } else { - rijndael_key_expansion(cx, key, Nk); - } -diff --git lib/freebl/rijndael.h lib/freebl/rijndael.h -index 1f4a8a9f73..1b63a323da 100644 ---- lib/freebl/rijndael.h -+++ lib/freebl/rijndael.h -@@ -8,8 +8,22 @@ - #include "blapii.h" - #include <stdint.h> - --#ifdef NSS_X86_OR_X64 --#include <wmmintrin.h> /* aes-ni */ -+#if defined(NSS_X86_OR_X64) -+/* GCC <= 4.8 doesn't support including emmintrin.h without enabling SSE2 */ -+#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && \ -+ (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) -+#pragma GCC push_options -+#pragma GCC target("sse2") -+#undef NSS_DISABLE_SSE2 -+#define NSS_DISABLE_SSE2 1 -+#endif /* GCC <= 4.8 */ -+ -+#include <emmintrin.h> /* __m128i */ -+ -+#ifdef NSS_DISABLE_SSE2 -+#undef NSS_DISABLE_SSE2 -+#pragma GCC pop_options -+#endif /* NSS_DISABLE_SSE2 */ - #endif - - typedef void AESBlockFunc(AESContext *cx, |