aboutsummaryrefslogtreecommitdiff
path: root/security/nss/files
diff options
context:
space:
mode:
authorJan Beich <jbeich@FreeBSD.org>2017-11-14 21:56:56 +0000
committerJan Beich <jbeich@FreeBSD.org>2017-11-14 21:56:56 +0000
commit02c0d6d1f3851f457ff023430b5698db30230e72 (patch)
treed4a9cd923ad54f952b76ac70c45b6e4ffa37ddee /security/nss/files
parentb638c463455894bbd94a2222048929deb96ee43f (diff)
downloadports-02c0d6d1f3851f457ff023430b5698db30230e72.tar.gz
ports-02c0d6d1f3851f457ff023430b5698db30230e72.zip
Notes
Diffstat (limited to 'security/nss/files')
-rw-r--r--security/nss/files/patch-bug1400603942
1 files changed, 0 insertions, 942 deletions
diff --git a/security/nss/files/patch-bug1400603 b/security/nss/files/patch-bug1400603
deleted file mode 100644
index 321f50dfb9a9..000000000000
--- a/security/nss/files/patch-bug1400603
+++ /dev/null
@@ -1,942 +0,0 @@
-commit e84403331d99
-Author: Daiki Ueno <dueno@redhat.com>
-Date: Fri Sep 22 11:27:34 2017 +0200
-
- Bug 1400603 - freebl: Reorganize AES-GCM source code based on hw/sw implementation, r=franziskus
-
- Reviewers: franziskus
-
- Reviewed By: franziskus
-
- Bug #: 1400603
-
- Differential Revision: https://phabricator.services.mozilla.com/D65
----
- lib/freebl/Makefile | 4 +-
- lib/freebl/aes-x86.c | 157 +++++++++++++++++++++++++++++++++++++++++
- lib/freebl/freebl.gyp | 65 ++++++++++-------
- lib/freebl/gcm-x86.c | 127 ++++++++++++++++++++++++++++++++++
- lib/freebl/gcm.c | 162 +++++++++++++------------------------------
- lib/freebl/gcm.h | 14 ++++
- lib/freebl/rijndael.c | 188 ++++++++------------------------------------------
- lib/freebl/rijndael.h | 18 ++++-
- 8 files changed, 436 insertions(+), 299 deletions(-)
-
-diff --git lib/freebl/Makefile lib/freebl/Makefile
-index d50e18696b..bc1ea86a5e 100644
---- lib/freebl/Makefile
-+++ lib/freebl/Makefile
-@@ -110,7 +110,9 @@ endif
- # NSS_X86_OR_X64 means the target is either x86 or x64
- ifeq (,$(filter-out i386 x386 x86 x86_64,$(CPU_ARCH)))
- DEFINES += -DNSS_X86_OR_X64
-- CFLAGS += -mpclmul -maes
-+ EXTRA_SRCS += gcm-x86.c aes-x86.c
-+$(OBJDIR)/gcm-x86.o: CFLAGS += -mpclmul -maes
-+$(OBJDIR)/aes-x86.o: CFLAGS += -mpclmul -maes
- ifneq (,$(USE_64)$(USE_X32))
- DEFINES += -DNSS_X64
- else
-diff --git lib/freebl/aes-x86.c lib/freebl/aes-x86.c
-new file mode 100644
-index 0000000000..830b4782fe
---- /dev/null
-+++ lib/freebl/aes-x86.c
-@@ -0,0 +1,157 @@
-+/* This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#ifdef FREEBL_NO_DEPEND
-+#include "stubs.h"
-+#endif
-+#include "rijndael.h"
-+#include "secerr.h"
-+
-+#include <wmmintrin.h> /* aes-ni */
-+
-+#define EXPAND_KEY128(k, rcon, res) \
-+ tmp_key = _mm_aeskeygenassist_si128(k, rcon); \
-+ tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF); \
-+ tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4)); \
-+ tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \
-+ tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \
-+ res = _mm_xor_si128(tmp, tmp_key)
-+
-+static void
-+native_key_expansion128(AESContext *cx, const unsigned char *key)
-+{
-+ __m128i *keySchedule = cx->keySchedule;
-+ pre_align __m128i tmp_key post_align;
-+ pre_align __m128i tmp post_align;
-+ keySchedule[0] = _mm_loadu_si128((__m128i *)key);
-+ EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]);
-+ EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]);
-+ EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]);
-+ EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]);
-+ EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]);
-+ EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]);
-+ EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]);
-+ EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]);
-+ EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]);
-+ EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]);
-+}
-+
-+#define EXPAND_KEY192_PART1(res, k0, kt, rcon) \
-+ tmp2 = _mm_slli_si128(k0, 4); \
-+ tmp1 = _mm_xor_si128(k0, tmp2); \
-+ tmp2 = _mm_slli_si128(tmp2, 4); \
-+ tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \
-+ tmp2 = _mm_aeskeygenassist_si128(kt, rcon); \
-+ res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55))
-+
-+#define EXPAND_KEY192_PART2(res, k1, k2) \
-+ tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \
-+ res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF))
-+
-+#define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2) \
-+ EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1); \
-+ EXPAND_KEY192_PART2(carry, res1, tmp3); \
-+ res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1), \
-+ _mm_castsi128_pd(tmp3), 0)); \
-+ res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3), \
-+ _mm_castsi128_pd(carry), 1)); \
-+ EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2)
-+
-+static void
-+native_key_expansion192(AESContext *cx, const unsigned char *key)
-+{
-+ __m128i *keySchedule = cx->keySchedule;
-+ pre_align __m128i tmp1 post_align;
-+ pre_align __m128i tmp2 post_align;
-+ pre_align __m128i tmp3 post_align;
-+ pre_align __m128i carry post_align;
-+ keySchedule[0] = _mm_loadu_si128((__m128i *)key);
-+ keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16));
-+ EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2],
-+ keySchedule[3], carry, 0x1, 0x2);
-+ EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]);
-+ EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5],
-+ keySchedule[6], carry, 0x4, 0x8);
-+ EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]);
-+ EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8],
-+ keySchedule[9], carry, 0x10, 0x20);
-+ EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]);
-+ EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11],
-+ keySchedule[12], carry, 0x40, 0x80);
-+}
-+
-+#define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X) \
-+ tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X); \
-+ tmp2 = _mm_slli_si128(k1x, 4); \
-+ tmp1 = _mm_xor_si128(k1x, tmp2); \
-+ tmp2 = _mm_slli_si128(tmp2, 4); \
-+ tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \
-+ res = _mm_xor_si128(tmp1, tmp_key);
-+
-+#define EXPAND_KEY256(res1, res2, k1, k2, rcon) \
-+ EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \
-+ EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA)
-+
-+static void
-+native_key_expansion256(AESContext *cx, const unsigned char *key)
-+{
-+ __m128i *keySchedule = cx->keySchedule;
-+ pre_align __m128i tmp_key post_align;
-+ pre_align __m128i tmp1 post_align;
-+ pre_align __m128i tmp2 post_align;
-+ keySchedule[0] = _mm_loadu_si128((__m128i *)key);
-+ keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16));
-+ EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0],
-+ keySchedule[1], 0x01);
-+ EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2],
-+ keySchedule[3], 0x02);
-+ EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4],
-+ keySchedule[5], 0x04);
-+ EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6],
-+ keySchedule[7], 0x08);
-+ EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8],
-+ keySchedule[9], 0x10);
-+ EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10],
-+ keySchedule[11], 0x20);
-+ EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12],
-+ keySchedule[13], 0xFF);
-+}
-+
-+/*
-+ * AES key expansion using aes-ni instructions.
-+ */
-+void
-+rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
-+ unsigned int Nk)
-+{
-+ switch (Nk) {
-+ case 4:
-+ native_key_expansion128(cx, key);
-+ return;
-+ case 6:
-+ native_key_expansion192(cx, key);
-+ return;
-+ case 8:
-+ native_key_expansion256(cx, key);
-+ return;
-+ default:
-+ /* This shouldn't happen (checked by the caller). */
-+ return;
-+ }
-+}
-+
-+void
-+rijndael_native_encryptBlock(AESContext *cx,
-+ unsigned char *output,
-+ const unsigned char *input)
-+{
-+ int i;
-+ pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input);
-+ m = _mm_xor_si128(m, cx->keySchedule[0]);
-+ for (i = 1; i < cx->Nr; ++i) {
-+ m = _mm_aesenc_si128(m, cx->keySchedule[i]);
-+ }
-+ m = _mm_aesenclast_si128(m, cx->keySchedule[cx->Nr]);
-+ _mm_storeu_si128((__m128i *)output, m);
-+}
-diff --git lib/freebl/freebl.gyp lib/freebl/freebl.gyp
-index 1e93475004..5f59eef29c 100644
---- lib/freebl/freebl.gyp
-+++ lib/freebl/freebl.gyp
-@@ -22,6 +22,37 @@
- '-mssse3'
- ]
- },
-+ {
-+ 'target_name': 'gcm-aes-x86_c_lib',
-+ 'type': 'static_library',
-+ 'sources': [
-+ 'gcm-x86.c', 'aes-x86.c'
-+ ],
-+ 'dependencies': [
-+ '<(DEPTH)/exports.gyp:nss_exports'
-+ ],
-+ # Enable isa option for pclmul and aes-ni; supported since gcc 4.4.
-+ # This is only supported by x84/x64. It's not needed for Windows,
-+ # unless clang-cl is used.
-+ 'cflags_mozilla': [
-+ '-mpclmul', '-maes'
-+ ],
-+ 'conditions': [
-+ [ 'OS=="linux" or OS=="android" or OS=="dragonfly" or OS=="freebsd" or OS=="netbsd" or OS=="openbsd"', {
-+ 'cflags': [
-+ '-mpclmul', '-maes'
-+ ],
-+ }],
-+ # macOS build doesn't use cflags.
-+ [ 'OS=="mac"', {
-+ 'xcode_settings': {
-+ 'OTHER_CFLAGS': [
-+ '-mpclmul', '-maes'
-+ ],
-+ },
-+ }]
-+ ]
-+ },
- {
- 'target_name': 'freebl',
- 'type': 'static_library',
-@@ -45,6 +76,11 @@
- '<(DEPTH)/exports.gyp:nss_exports',
- ],
- 'conditions': [
-+ [ 'target_arch=="ia32" or target_arch=="x64"', {
-+ 'dependencies': [
-+ 'gcm-aes-x86_c_lib'
-+ ],
-+ }],
- [ 'OS=="linux"', {
- 'defines!': [
- 'FREEBL_NO_DEPEND',
-@@ -76,6 +112,11 @@
- '<(DEPTH)/exports.gyp:nss_exports',
- ],
- 'conditions': [
-+ [ 'target_arch=="ia32" or target_arch=="x64"', {
-+ 'dependencies': [
-+ 'gcm-aes-x86_c_lib'
-+ ]
-+ }],
- [ 'OS!="linux" and OS!="android"', {
- 'conditions': [
- [ 'moz_fold_libs==0', {
-@@ -154,27 +195,11 @@
- 'MP_API_COMPATIBLE'
- ],
- 'conditions': [
-- [ 'target_arch=="ia32" or target_arch=="x64"', {
-- 'cflags_mozilla': [
-- '-mpclmul',
-- '-maes',
-- ],
-- 'conditions': [
-- [ 'OS=="dragonfly" or OS=="freebsd" or OS=="netbsd" or OS=="openbsd"', {
-- 'cflags': [
-- '-mpclmul',
-- '-maes',
-- ],
-- }],
-- ],
-- }],
- [ 'OS=="mac"', {
- 'xcode_settings': {
- # I'm not sure since when this is supported.
- # But I hope that doesn't matter. We also assume this is x86/x64.
- 'OTHER_CFLAGS': [
-- '-mpclmul',
-- '-maes',
- '-std=gnu99',
- ],
- },
-@@ -268,14 +293,6 @@
- 'MP_USE_UINT_DIGIT',
- ],
- }],
-- [ 'target_arch=="ia32" or target_arch=="x64"', {
-- 'cflags': [
-- # enable isa option for pclmul am aes-ni; supported since gcc 4.4
-- # This is only support by x84/x64. It's not needed for Windows.
-- '-mpclmul',
-- '-maes',
-- ],
-- }],
- [ 'target_arch=="arm"', {
- 'defines': [
- 'MP_ASSEMBLY_MULTIPLY',
-diff --git lib/freebl/gcm-x86.c lib/freebl/gcm-x86.c
-new file mode 100644
-index 0000000000..e34d633943
---- /dev/null
-+++ lib/freebl/gcm-x86.c
-@@ -0,0 +1,127 @@
-+/* This Source Code Form is subject to the terms of the Mozilla Public
-+ * License, v. 2.0. If a copy of the MPL was not distributed with this
-+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-+
-+#ifdef FREEBL_NO_DEPEND
-+#include "stubs.h"
-+#endif
-+#include "gcm.h"
-+#include "secerr.h"
-+
-+#include <wmmintrin.h> /* clmul */
-+
-+#define WRITE64(x, bytes) \
-+ (bytes)[0] = (x) >> 56; \
-+ (bytes)[1] = (x) >> 48; \
-+ (bytes)[2] = (x) >> 40; \
-+ (bytes)[3] = (x) >> 32; \
-+ (bytes)[4] = (x) >> 24; \
-+ (bytes)[5] = (x) >> 16; \
-+ (bytes)[6] = (x) >> 8; \
-+ (bytes)[7] = (x);
-+
-+SECStatus
-+gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf)
-+{
-+ uint64_t tmp_out[2];
-+ _mm_storeu_si128((__m128i *)tmp_out, ghash->x);
-+ /* maxout must be larger than 16 byte (checked by the caller). */
-+ WRITE64(tmp_out[0], outbuf + 8);
-+ WRITE64(tmp_out[1], outbuf);
-+ return SECSuccess;
-+}
-+
-+SECStatus
-+gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
-+ unsigned int count)
-+{
-+ size_t i;
-+ pre_align __m128i z_high post_align;
-+ pre_align __m128i z_low post_align;
-+ pre_align __m128i C post_align;
-+ pre_align __m128i D post_align;
-+ pre_align __m128i E post_align;
-+ pre_align __m128i F post_align;
-+ pre_align __m128i bin post_align;
-+ pre_align __m128i Ci post_align;
-+ pre_align __m128i tmp post_align;
-+
-+ for (i = 0; i < count; i++, buf += 16) {
-+ bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1],
-+ ((uint16_t)buf[2] << 8) | buf[3],
-+ ((uint16_t)buf[4] << 8) | buf[5],
-+ ((uint16_t)buf[6] << 8) | buf[7],
-+ ((uint16_t)buf[8] << 8) | buf[9],
-+ ((uint16_t)buf[10] << 8) | buf[11],
-+ ((uint16_t)buf[12] << 8) | buf[13],
-+ ((uint16_t)buf[14] << 8) | buf[15]);
-+ Ci = _mm_xor_si128(bin, ghash->x);
-+
-+ /* Do binary mult ghash->X = Ci * ghash->H. */
-+ C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00);
-+ D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11);
-+ E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01);
-+ F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10);
-+ tmp = _mm_xor_si128(E, F);
-+ z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8));
-+ z_high = _mm_unpackhi_epi64(z_high, D);
-+ z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C);
-+ z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low);
-+
-+ /* Shift one to the left (multiply by x) as gcm spec is stupid. */
-+ C = _mm_slli_si128(z_low, 8);
-+ E = _mm_srli_epi64(C, 63);
-+ D = _mm_slli_si128(z_high, 8);
-+ F = _mm_srli_epi64(D, 63);
-+ /* Carry over */
-+ C = _mm_srli_si128(z_low, 8);
-+ D = _mm_srli_epi64(C, 63);
-+ z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E);
-+ z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D);
-+
-+ /* Reduce */
-+ C = _mm_slli_si128(z_low, 8);
-+ /* D = z_low << 127 */
-+ D = _mm_slli_epi64(C, 63);
-+ /* E = z_low << 126 */
-+ E = _mm_slli_epi64(C, 62);
-+ /* F = z_low << 121 */
-+ F = _mm_slli_epi64(C, 57);
-+ /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */
-+ z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F);
-+ C = _mm_srli_si128(z_low, 8);
-+ /* D = z_low >> 1 */
-+ D = _mm_slli_epi64(C, 63);
-+ D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D);
-+ /* E = z_low >> 2 */
-+ E = _mm_slli_epi64(C, 62);
-+ E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E);
-+ /* F = z_low >> 7 */
-+ F = _mm_slli_epi64(C, 57);
-+ F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F);
-+ /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */
-+ ghash->x = _mm_xor_si128(_mm_xor_si128(
-+ _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E),
-+ F);
-+ }
-+ return SECSuccess;
-+}
-+
-+SECStatus
-+gcm_HashInit_hw(gcmHashContext *ghash)
-+{
-+ ghash->ghash_mul = gcm_HashMult_hw;
-+ ghash->x = _mm_setzero_si128();
-+ /* MSVC requires __m64 to load epi64. */
-+ ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high,
-+ ghash->h_low >> 32, (uint32_t)ghash->h_low);
-+ ghash->hw = PR_TRUE;
-+ return SECSuccess;
-+}
-+
-+SECStatus
-+gcm_HashZeroX_hw(gcmHashContext *ghash)
-+{
-+ ghash->x = _mm_setzero_si128();
-+ return SECSuccess;
-+}
-diff --git lib/freebl/gcm.c lib/freebl/gcm.c
-index 780b7a6322..f1e16da78e 100644
---- lib/freebl/gcm.c
-+++ lib/freebl/gcm.c
-@@ -17,18 +17,50 @@
-
- #include <limits.h>
-
--#ifdef NSS_X86_OR_X64
--#include <wmmintrin.h> /* clmul */
--#endif
--
- /* Forward declarations */
-+SECStatus gcm_HashInit_hw(gcmHashContext *ghash);
-+SECStatus gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf);
- SECStatus gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
- unsigned int count);
-+SECStatus gcm_HashZeroX_hw(gcmHashContext *ghash);
- SECStatus gcm_HashMult_sftw(gcmHashContext *ghash, const unsigned char *buf,
- unsigned int count);
- SECStatus gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf,
- unsigned int count);
-
-+/* Stub definitions for the above *_hw functions, which shouldn't be
-+ * used unless NSS_X86_OR_X64 is defined */
-+#ifndef NSS_X86_OR_X64
-+SECStatus
-+gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf)
-+{
-+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
-+ return SECFailure;
-+}
-+
-+SECStatus
-+gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
-+ unsigned int count)
-+{
-+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
-+ return SECFailure;
-+}
-+
-+SECStatus
-+gcm_HashInit_hw(gcmHashContext *ghash)
-+{
-+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
-+ return SECFailure;
-+}
-+
-+SECStatus
-+gcm_HashZeroX_hw(gcmHashContext *ghash)
-+{
-+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
-+ return SECFailure;
-+}
-+#endif /* NSS_X86_OR_X64 */
-+
- uint64_t
- get64(const unsigned char *bytes)
- {
-@@ -46,6 +78,8 @@ get64(const unsigned char *bytes)
- SECStatus
- gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw)
- {
-+ SECStatus rv = SECSuccess;
-+
- ghash->cLen = 0;
- ghash->bufLen = 0;
- PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf));
-@@ -53,17 +87,7 @@ gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw)
- ghash->h_low = get64(H + 8);
- ghash->h_high = get64(H);
- if (clmul_support() && !sw) {
--#ifdef NSS_X86_OR_X64
-- ghash->ghash_mul = gcm_HashMult_hw;
-- ghash->x = _mm_setzero_si128();
-- /* MSVC requires __m64 to load epi64. */
-- ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high,
-- ghash->h_low >> 32, (uint32_t)ghash->h_low);
-- ghash->hw = PR_TRUE;
--#else
-- PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
-- return SECFailure;
--#endif /* NSS_X86_OR_X64 */
-+ rv = gcm_HashInit_hw(ghash);
- } else {
- /* We fall back to the software implementation if we can't use / don't
- * want to use pclmul. */
-@@ -75,7 +99,7 @@ gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw)
- ghash->x_high = ghash->x_low = 0;
- ghash->hw = PR_FALSE;
- }
-- return SECSuccess;
-+ return rv;
- }
-
- #ifdef HAVE_INT128_SUPPORT
-@@ -283,102 +307,17 @@ gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf,
- }
- #endif /* HAVE_INT128_SUPPORT */
-
--SECStatus
--gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
-- unsigned int count)
--{
--#ifdef NSS_X86_OR_X64
-- size_t i;
-- pre_align __m128i z_high post_align;
-- pre_align __m128i z_low post_align;
-- pre_align __m128i C post_align;
-- pre_align __m128i D post_align;
-- pre_align __m128i E post_align;
-- pre_align __m128i F post_align;
-- pre_align __m128i bin post_align;
-- pre_align __m128i Ci post_align;
-- pre_align __m128i tmp post_align;
--
-- for (i = 0; i < count; i++, buf += 16) {
-- bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1],
-- ((uint16_t)buf[2] << 8) | buf[3],
-- ((uint16_t)buf[4] << 8) | buf[5],
-- ((uint16_t)buf[6] << 8) | buf[7],
-- ((uint16_t)buf[8] << 8) | buf[9],
-- ((uint16_t)buf[10] << 8) | buf[11],
-- ((uint16_t)buf[12] << 8) | buf[13],
-- ((uint16_t)buf[14] << 8) | buf[15]);
-- Ci = _mm_xor_si128(bin, ghash->x);
--
-- /* Do binary mult ghash->X = Ci * ghash->H. */
-- C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00);
-- D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11);
-- E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01);
-- F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10);
-- tmp = _mm_xor_si128(E, F);
-- z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8));
-- z_high = _mm_unpackhi_epi64(z_high, D);
-- z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C);
-- z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low);
--
-- /* Shift one to the left (multiply by x) as gcm spec is stupid. */
-- C = _mm_slli_si128(z_low, 8);
-- E = _mm_srli_epi64(C, 63);
-- D = _mm_slli_si128(z_high, 8);
-- F = _mm_srli_epi64(D, 63);
-- /* Carry over */
-- C = _mm_srli_si128(z_low, 8);
-- D = _mm_srli_epi64(C, 63);
-- z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E);
-- z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D);
--
-- /* Reduce */
-- C = _mm_slli_si128(z_low, 8);
-- /* D = z_low << 127 */
-- D = _mm_slli_epi64(C, 63);
-- /* E = z_low << 126 */
-- E = _mm_slli_epi64(C, 62);
-- /* F = z_low << 121 */
-- F = _mm_slli_epi64(C, 57);
-- /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */
-- z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F);
-- C = _mm_srli_si128(z_low, 8);
-- /* D = z_low >> 1 */
-- D = _mm_slli_epi64(C, 63);
-- D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D);
-- /* E = z_low >> 2 */
-- E = _mm_slli_epi64(C, 62);
-- E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E);
-- /* F = z_low >> 7 */
-- F = _mm_slli_epi64(C, 57);
-- F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F);
-- /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */
-- ghash->x = _mm_xor_si128(_mm_xor_si128(
-- _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E),
-- F);
-- }
-- return SECSuccess;
--#else
-- PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
-- return SECFailure;
--#endif /* NSS_X86_OR_X64 */
--}
--
- static SECStatus
- gcm_zeroX(gcmHashContext *ghash)
- {
-+ SECStatus rv = SECSuccess;
-+
- if (ghash->hw) {
--#ifdef NSS_X86_OR_X64
-- ghash->x = _mm_setzero_si128();
-- return SECSuccess;
--#else
-- PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
-- return SECFailure;
--#endif /* NSS_X86_OR_X64 */
-+ rv = gcm_HashZeroX_hw(ghash);
- }
-
- ghash->x_high = ghash->x_low = 0;
-- return SECSuccess;
-+ return rv;
- }
-
- /*
-@@ -503,15 +442,10 @@ gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf,
- }
-
- if (ghash->hw) {
--#ifdef NSS_X86_OR_X64
-- uint64_t tmp_out[2];
-- _mm_storeu_si128((__m128i *)tmp_out, ghash->x);
-- WRITE64(tmp_out[0], T + 8);
-- WRITE64(tmp_out[1], T);
--#else
-- PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
-- return SECFailure;
--#endif /* NSS_X86_OR_X64 */
-+ rv = gcm_HashWrite_hw(ghash, T);
-+ if (rv != SECSuccess) {
-+ goto cleanup;
-+ }
- } else {
- WRITE64(ghash->x_low, T + 8);
- WRITE64(ghash->x_high, T);
-diff --git lib/freebl/gcm.h lib/freebl/gcm.h
-index 0c707a0811..42ef0f7179 100644
---- lib/freebl/gcm.h
-+++ lib/freebl/gcm.h
-@@ -9,7 +9,21 @@
- #include <stdint.h>
-
- #ifdef NSS_X86_OR_X64
-+/* GCC <= 4.8 doesn't support including emmintrin.h without enabling SSE2 */
-+#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && \
-+ (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
-+#pragma GCC push_options
-+#pragma GCC target("sse2")
-+#undef NSS_DISABLE_SSE2
-+#define NSS_DISABLE_SSE2 1
-+#endif /* GCC <= 4.8 */
-+
- #include <emmintrin.h> /* __m128i */
-+
-+#ifdef NSS_DISABLE_SSE2
-+#undef NSS_DISABLE_SSE2
-+#pragma GCC pop_options
-+#endif /* NSS_DISABLE_SSE2 */
- #endif
-
- SEC_BEGIN_PROTOS
-diff --git lib/freebl/rijndael.c lib/freebl/rijndael.c
-index a09f13098e..5de27de9ce 100644
---- lib/freebl/rijndael.c
-+++ lib/freebl/rijndael.c
-@@ -27,6 +27,34 @@
- #include "intel-gcm.h"
- #endif /* INTEL_GCM */
-
-+/* Forward declarations */
-+void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
-+ unsigned int Nk);
-+void rijndael_native_encryptBlock(AESContext *cx,
-+ unsigned char *output,
-+ const unsigned char *input);
-+
-+/* Stub definitions for the above rijndael_native_* functions, which
-+ * shouldn't be used unless NSS_X86_OR_X64 is defined */
-+#ifndef NSS_X86_OR_X64
-+void
-+rijndael_native_key_expansion(AESContext *cx, const unsigned char *key,
-+ unsigned int Nk)
-+{
-+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
-+ PORT_Assert(0);
-+}
-+
-+void
-+rijndael_native_encryptBlock(AESContext *cx,
-+ unsigned char *output,
-+ const unsigned char *input)
-+{
-+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
-+ PORT_Assert(0);
-+}
-+#endif /* NSS_X86_OR_X64 */
-+
- /*
- * There are currently three ways to build this code, varying in performance
- * and code size.
-@@ -309,162 +337,6 @@ rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int N
- }
- }
-
--#if defined(NSS_X86_OR_X64)
--#define EXPAND_KEY128(k, rcon, res) \
-- tmp_key = _mm_aeskeygenassist_si128(k, rcon); \
-- tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF); \
-- tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4)); \
-- tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \
-- tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \
-- res = _mm_xor_si128(tmp, tmp_key)
--
--static void
--native_key_expansion128(AESContext *cx, const unsigned char *key)
--{
-- __m128i *keySchedule = cx->keySchedule;
-- pre_align __m128i tmp_key post_align;
-- pre_align __m128i tmp post_align;
-- keySchedule[0] = _mm_loadu_si128((__m128i *)key);
-- EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]);
-- EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]);
-- EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]);
-- EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]);
-- EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]);
-- EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]);
-- EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]);
-- EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]);
-- EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]);
-- EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]);
--}
--
--#define EXPAND_KEY192_PART1(res, k0, kt, rcon) \
-- tmp2 = _mm_slli_si128(k0, 4); \
-- tmp1 = _mm_xor_si128(k0, tmp2); \
-- tmp2 = _mm_slli_si128(tmp2, 4); \
-- tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \
-- tmp2 = _mm_aeskeygenassist_si128(kt, rcon); \
-- res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55))
--
--#define EXPAND_KEY192_PART2(res, k1, k2) \
-- tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \
-- res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF))
--
--#define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2) \
-- EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1); \
-- EXPAND_KEY192_PART2(carry, res1, tmp3); \
-- res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1), \
-- _mm_castsi128_pd(tmp3), 0)); \
-- res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3), \
-- _mm_castsi128_pd(carry), 1)); \
-- EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2)
--
--static void
--native_key_expansion192(AESContext *cx, const unsigned char *key)
--{
-- __m128i *keySchedule = cx->keySchedule;
-- pre_align __m128i tmp1 post_align;
-- pre_align __m128i tmp2 post_align;
-- pre_align __m128i tmp3 post_align;
-- pre_align __m128i carry post_align;
-- keySchedule[0] = _mm_loadu_si128((__m128i *)key);
-- keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16));
-- EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2],
-- keySchedule[3], carry, 0x1, 0x2);
-- EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]);
-- EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5],
-- keySchedule[6], carry, 0x4, 0x8);
-- EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]);
-- EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8],
-- keySchedule[9], carry, 0x10, 0x20);
-- EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]);
-- EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11],
-- keySchedule[12], carry, 0x40, 0x80);
--}
--
--#define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X) \
-- tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X); \
-- tmp2 = _mm_slli_si128(k1x, 4); \
-- tmp1 = _mm_xor_si128(k1x, tmp2); \
-- tmp2 = _mm_slli_si128(tmp2, 4); \
-- tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \
-- res = _mm_xor_si128(tmp1, tmp_key);
--
--#define EXPAND_KEY256(res1, res2, k1, k2, rcon) \
-- EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \
-- EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA)
--
--static void
--native_key_expansion256(AESContext *cx, const unsigned char *key)
--{
-- __m128i *keySchedule = cx->keySchedule;
-- pre_align __m128i tmp_key post_align;
-- pre_align __m128i tmp1 post_align;
-- pre_align __m128i tmp2 post_align;
-- keySchedule[0] = _mm_loadu_si128((__m128i *)key);
-- keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16));
-- EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0],
-- keySchedule[1], 0x01);
-- EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2],
-- keySchedule[3], 0x02);
-- EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4],
-- keySchedule[5], 0x04);
-- EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6],
-- keySchedule[7], 0x08);
-- EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8],
-- keySchedule[9], 0x10);
-- EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10],
-- keySchedule[11], 0x20);
-- EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12],
-- keySchedule[13], 0xFF);
--}
--
--#endif /* NSS_X86_OR_X64 */
--
--/*
-- * AES key expansion using aes-ni instructions.
-- */
--static void
--native_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
--{
--#ifdef NSS_X86_OR_X64
-- switch (Nk) {
-- case 4:
-- native_key_expansion128(cx, key);
-- return;
-- case 6:
-- native_key_expansion192(cx, key);
-- return;
-- case 8:
-- native_key_expansion256(cx, key);
-- return;
-- default:
-- /* This shouldn't happen. */
-- PORT_Assert(0);
-- }
--#else
-- PORT_Assert(0);
--#endif /* NSS_X86_OR_X64 */
--}
--
--static void
--native_encryptBlock(AESContext *cx,
-- unsigned char *output,
-- const unsigned char *input)
--{
--#ifdef NSS_X86_OR_X64
-- int i;
-- pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input);
-- m = _mm_xor_si128(m, cx->keySchedule[0]);
-- for (i = 1; i < cx->Nr; ++i) {
-- m = _mm_aesenc_si128(m, cx->keySchedule[i]);
-- }
-- m = _mm_aesenclast_si128(m, cx->keySchedule[cx->Nr]);
-- _mm_storeu_si128((__m128i *)output, m);
--#else
-- PORT_Assert(0);
--#endif /* NSS_X86_OR_X64 */
--}
--
- /* rijndael_key_expansion
- *
- * Generate the expanded key from the key input by the user.
-@@ -830,7 +702,7 @@ rijndael_encryptECB(AESContext *cx, unsigned char *output,
-
- if (aesni_support()) {
- /* Use hardware acceleration for normal AES parameters. */
-- encryptor = &native_encryptBlock;
-+ encryptor = &rijndael_native_encryptBlock;
- } else {
- encryptor = &rijndael_encryptBlock128;
- }
-@@ -1026,7 +898,7 @@ aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
- cx->mode == NSS_AES_CTR)) {
- PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32);
- /* Prepare hardware key for normal AES parameters. */
-- native_key_expansion(cx, key, Nk);
-+ rijndael_native_key_expansion(cx, key, Nk);
- } else {
- rijndael_key_expansion(cx, key, Nk);
- }
-diff --git lib/freebl/rijndael.h lib/freebl/rijndael.h
-index 1f4a8a9f73..1b63a323da 100644
---- lib/freebl/rijndael.h
-+++ lib/freebl/rijndael.h
-@@ -8,8 +8,22 @@
- #include "blapii.h"
- #include <stdint.h>
-
--#ifdef NSS_X86_OR_X64
--#include <wmmintrin.h> /* aes-ni */
-+#if defined(NSS_X86_OR_X64)
-+/* GCC <= 4.8 doesn't support including emmintrin.h without enabling SSE2 */
-+#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && \
-+ (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 8))
-+#pragma GCC push_options
-+#pragma GCC target("sse2")
-+#undef NSS_DISABLE_SSE2
-+#define NSS_DISABLE_SSE2 1
-+#endif /* GCC <= 4.8 */
-+
-+#include <emmintrin.h> /* __m128i */
-+
-+#ifdef NSS_DISABLE_SSE2
-+#undef NSS_DISABLE_SSE2
-+#pragma GCC pop_options
-+#endif /* NSS_DISABLE_SSE2 */
- #endif
-
- typedef void AESBlockFunc(AESContext *cx,