summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Makefile.am5
-rw-r--r--src/mum.h417
-rw-r--r--src/ucl_hash.c87
-rw-r--r--src/ucl_internal.h1
-rw-r--r--src/ucl_msgpack.c24
-rw-r--r--src/ucl_parser.c35
-rw-r--r--src/ucl_util.c17
-rw-r--r--src/xxhash.c941
-rw-r--r--src/xxhash.h165
-rw-r--r--uthash/uthash.h72
10 files changed, 533 insertions, 1231 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index c2f3a4b423475..80ce5b185b833 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -12,8 +12,7 @@ libucl_la_SOURCES= ucl_emitter.c \
ucl_schema.c \
ucl_util.c \
ucl_msgpack.c \
- ucl_sexp.c \
- xxhash.c
+ ucl_sexp.c
libucl_la_CFLAGS= $(libucl_common_cflags) \
@CURL_CFLAGS@
libucl_la_LDFLAGS = -version-info @SO_VERSION@
@@ -25,7 +24,7 @@ libucl_la_LIBADD= @LIBFETCH_LIBS@ \
include_HEADERS= $(top_srcdir)/include/ucl.h \
$(top_srcdir)/include/ucl++.h
noinst_HEADERS= ucl_internal.h \
- xxhash.h \
+ mum.h \
ucl_hash.h \
ucl_chartable.h \
tree.h
diff --git a/src/mum.h b/src/mum.h
new file mode 100644
index 0000000000000..ae6eec16c1821
--- /dev/null
+++ b/src/mum.h
@@ -0,0 +1,417 @@
+/* Copyright (c) 2016 Vladimir Makarov <vmakarov@gcc.gnu.org>
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use, copy,
+ modify, merge, publish, distribute, sublicense, and/or sell copies
+ of the Software, and to permit persons to whom the Software is
+ furnished to do so, subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+*/
+
+/* This file implements MUM (MUltiply and Mix) hashing. We randomize
+ input data by 64x64-bit multiplication and mixing hi- and low-parts
+ of the multiplication result by using an addition and then mix it
+ into the current state. We use prime numbers randomly generated
+ with the equal probability of their bit values for the
+ multiplication. When all primes are used once, the state is
+ randomized and the same prime numbers are used again for data
+ randomization.
+
+ The MUM hashing passes all SMHasher tests. Pseudo Random Number
+ Generator based on MUM also passes NIST Statistical Test Suite for
+ Random and Pseudorandom Number Generators for Cryptographic
+ Applications (version 2.2.1) with 1000 bitstreams each containing
+ 1M bits. MUM hashing is also faster Spooky64 and City64 on small
+ strings (at least upto 512-bit) on Haswell and Power7. The MUM bulk
+ speed (speed on very long data) is bigger than Spooky and City on
+ Power7. On Haswell the bulk speed is bigger than Spooky one and
+ close to City speed. */
+
+#ifndef __MUM_HASH__
+#define __MUM_HASH__
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#ifdef _MSC_VER
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+/* Macro saying to use 128-bit integers implemented by GCC for some
+ targets. */
+#ifndef _MUM_USE_INT128
+/* In GCC uint128_t is defined if HOST_BITS_PER_WIDE_INT >= 64.
+ HOST_WIDE_INT is long if HOST_BITS_PER_LONG > HOST_BITS_PER_INT,
+ otherwise int. */
+#if defined(__GNUC__) && UINT_MAX != ULONG_MAX
+#define _MUM_USE_INT128 1
+#else
+#define _MUM_USE_INT128 0
+#endif
+#endif
+
+#if defined(__GNUC__) && ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 9) || (__GNUC__ > 4))
+#define _MUM_FRESH_GCC
+#endif
+
+#if defined(__GNUC__) && !defined(__llvm__)
+#define _MUM_ATTRIBUTE_UNUSED __attribute__((unused))
+#define _MUM_OPTIMIZE(opts) __attribute__((__optimize__ (opts)))
+#define _MUM_TARGET(opts) __attribute__((__target__ (opts)))
+#else
+#define _MUM_ATTRIBUTE_UNUSED
+#define _MUM_OPTIMIZE(opts)
+#define _MUM_TARGET(opts)
+#endif
+
+
+/* Here are different primes randomly generated with the equal
+ probability of their bit values. They are used to randomize input
+ values. */
+static uint64_t _mum_hash_step_prime = 0x2e0bb864e9ea7df5ULL;
+static uint64_t _mum_key_step_prime = 0xcdb32970830fcaa1ULL;
+static uint64_t _mum_block_start_prime = 0xc42b5e2e6480b23bULL;
+static uint64_t _mum_unroll_prime = 0x7b51ec3d22f7096fULL;
+static uint64_t _mum_tail_prime = 0xaf47d47c99b1461bULL;
+static uint64_t _mum_finish_prime1 = 0xa9a7ae7ceff79f3fULL;
+static uint64_t _mum_finish_prime2 = 0xaf47d47c99b1461bULL;
+
+static uint64_t _mum_primes [] = {
+ 0X9ebdcae10d981691, 0X32b9b9b97a27ac7d, 0X29b5584d83d35bbd, 0X4b04e0e61401255f,
+ 0X25e8f7b1f1c9d027, 0X80d4c8c000f3e881, 0Xbd1255431904b9dd, 0X8a3bd4485eee6d81,
+ 0X3bc721b2aad05197, 0X71b1a19b907d6e33, 0X525e6c1084a8534b, 0X9e4c2cd340c1299f,
+ 0Xde3add92e94caa37, 0X7e14eadb1f65311d, 0X3f5aa40f89812853, 0X33b15a3b587d15c9,
+};
+
+/* Multiply 64-bit V and P and return sum of high and low parts of the
+ result. */
+static inline uint64_t
+_mum (uint64_t v, uint64_t p) {
+ uint64_t hi, lo;
+#if _MUM_USE_INT128
+#if defined(__aarch64__)
+ /* AARCH64 needs 2 insns to calculate 128-bit result of the
+ multiplication. If we use a generic code we actually call a
+ function doing 128x128->128 bit multiplication. The function is
+ very slow. */
+ lo = v * p, hi;
+ asm ("umulh %0, %1, %2" : "=r" (hi) : "r" (v), "r" (p));
+#else
+ __uint128_t r = (__uint128_t) v * (__uint128_t) p;
+ hi = (uint64_t) (r >> 64);
+ lo = (uint64_t) r;
+#endif
+#else
+ /* Implementation of 64x64->128-bit multiplication by four 32x32->64
+ bit multiplication. */
+ uint64_t hv = v >> 32, hp = p >> 32;
+ uint64_t lv = (uint32_t) v, lp = (uint32_t) p;
+ uint64_t rh = hv * hp;
+ uint64_t rm_0 = hv * lp;
+ uint64_t rm_1 = hp * lv;
+ uint64_t rl = lv * lp;
+ uint64_t t, carry = 0;
+
+ /* We could ignore a carry bit here if we did not care about the
+ same hash for 32-bit and 64-bit targets. */
+ t = rl + (rm_0 << 32);
+#ifdef MUM_TARGET_INDEPENDENT_HASH
+ carry = t < rl;
+#endif
+ lo = t + (rm_1 << 32);
+#ifdef MUM_TARGET_INDEPENDENT_HASH
+ carry += lo < t;
+#endif
+ hi = rh + (rm_0 >> 32) + (rm_1 >> 32) + carry;
+#endif
+ /* We could use XOR here too but, for some reasons, on Haswell and
+ Power7 using an addition improves hashing performance by 10% for
+ small strings. */
+ return hi + lo;
+}
+
+#if defined(_MSC_VER)
+#define _mum_bswap_32(x) _byteswap_uint32_t (x)
+#define _mum_bswap_64(x) _byteswap_uint64_t (x)
+#elif defined(__APPLE__)
+#include <libkern/OSByteOrder.h>
+#define _mum_bswap_32(x) OSSwapInt32 (x)
+#define _mum_bswap_64(x) OSSwapInt64 (x)
+#elif defined(__GNUC__)
+#define _mum_bswap32(x) __builtin_bswap32 (x)
+#define _mum_bswap64(x) __builtin_bswap64 (x)
+#else
+#include <byteswap.h>
+#define _mum_bswap32(x) bswap32 (x)
+#define _mum_bswap64(x) bswap64 (x)
+#endif
+
+static inline uint64_t
+_mum_le (uint64_t v) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
+ return v;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return _mum_bswap64 (v);
+#else
+#error "Unknown endianess"
+#endif
+}
+
+static inline uint32_t
+_mum_le32 (uint32_t v) {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || !defined(MUM_TARGET_INDEPENDENT_HASH)
+ return v;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ return _mum_bswap32 (v);
+#else
+#error "Unknown endianess"
+#endif
+}
+
+/* Macro defining how many times the most nested loop in
+ _mum_hash_aligned will be unrolled by the compiler (although it can
+ make an own decision:). Use only a constant here to help a
+ compiler to unroll a major loop.
+
+ The macro value affects the result hash for strings > 128 bit. The
+ unroll factor greatly affects the hashing speed. We prefer the
+ speed. */
+#ifndef _MUM_UNROLL_FACTOR_POWER
+#if defined(__PPC64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
+#define _MUM_UNROLL_FACTOR_POWER 3
+#elif defined(__aarch64__) && !defined(MUM_TARGET_INDEPENDENT_HASH)
+#define _MUM_UNROLL_FACTOR_POWER 4
+#else
+#define _MUM_UNROLL_FACTOR_POWER 2
+#endif
+#endif
+
+#if _MUM_UNROLL_FACTOR_POWER < 1
+#error "too small unroll factor"
+#elif _MUM_UNROLL_FACTOR_POWER > 4
+#error "We have not enough primes for such unroll factor"
+#endif
+
+#define _MUM_UNROLL_FACTOR (1 << _MUM_UNROLL_FACTOR_POWER)
+
+static inline uint64_t _MUM_OPTIMIZE("unroll-loops")
+_mum_hash_aligned (uint64_t start, const void *key, size_t len) {
+ uint64_t result = start;
+ const unsigned char *str = (const unsigned char *) key;
+ uint64_t u64;
+ int i;
+ size_t n;
+
+ result = _mum (result, _mum_block_start_prime);
+ while (len > _MUM_UNROLL_FACTOR * sizeof (uint64_t)) {
+ /* This loop could be vectorized when we have vector insns for
+ 64x64->128-bit multiplication. AVX2 currently only have a
+ vector insn for 4 32x32->64-bit multiplication. */
+ for (i = 0; i < _MUM_UNROLL_FACTOR; i++)
+ result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
+ len -= _MUM_UNROLL_FACTOR * sizeof (uint64_t);
+ str += _MUM_UNROLL_FACTOR * sizeof (uint64_t);
+ /* We will use the same prime numbers on the next iterations --
+ randomize the state. */
+ result = _mum (result, _mum_unroll_prime);
+ }
+ n = len / sizeof (uint64_t);
+ for (i = 0; i < (int)n; i++)
+ result ^= _mum (_mum_le (((uint64_t *) str)[i]), _mum_primes[i]);
+ len -= n * sizeof (uint64_t); str += n * sizeof (uint64_t);
+ switch (len) {
+ case 7:
+ u64 = _mum_le32 (*(uint32_t *) str);
+ u64 |= (uint64_t) str[4] << 32;
+ u64 |= (uint64_t) str[5] << 40;
+ u64 |= (uint64_t) str[6] << 48;
+ return result ^ _mum (u64, _mum_tail_prime);
+ case 6:
+ u64 = _mum_le32 (*(uint32_t *) str);
+ u64 |= (uint64_t) str[4] << 32;
+ u64 |= (uint64_t) str[5] << 40;
+ return result ^ _mum (u64, _mum_tail_prime);
+ case 5:
+ u64 = _mum_le32 (*(uint32_t *) str);
+ u64 |= (uint64_t) str[4] << 32;
+ return result ^ _mum (u64, _mum_tail_prime);
+ case 4:
+ u64 = _mum_le32 (*(uint32_t *) str);
+ return result ^ _mum (u64, _mum_tail_prime);
+ case 3:
+ u64 = str[0];
+ u64 |= (uint64_t) str[1] << 8;
+ u64 |= (uint64_t) str[2] << 16;
+ return result ^ _mum (u64, _mum_tail_prime);
+ case 2:
+ u64 = str[0];
+ u64 |= (uint64_t) str[1] << 8;
+ return result ^ _mum (u64, _mum_tail_prime);
+ case 1:
+ u64 = str[0];
+ return result ^ _mum (u64, _mum_tail_prime);
+ }
+ return result;
+}
+
+/* Final randomization of H. */
+static inline uint64_t
+_mum_final (uint64_t h) {
+ h ^= _mum (h, _mum_finish_prime1);
+ h ^= _mum (h, _mum_finish_prime2);
+ return h;
+}
+
+#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
+
+/* We want to use AVX2 insn MULX instead of generic x86-64 MULQ where
+ it is possible. Although on modern Intel processors MULQ takes
+ 3-cycles vs. 4 for MULX, MULX permits more freedom in insn
+ scheduling as it uses less fixed registers. */
+static inline uint64_t _MUM_TARGET("arch=haswell")
+_mum_hash_avx2 (const void * key, size_t len, uint64_t seed) {
+ return _mum_final (_mum_hash_aligned (seed + len, key, len));
+}
+#endif
+
+#ifndef _MUM_UNALIGNED_ACCESS
+#if defined(__x86_64__) || defined(__i386__) || defined(__PPC64__) \
+ || defined(__s390__) || defined(__m32c__) || defined(cris) \
+ || defined(__CR16__) || defined(__vax__) || defined(__m68k__) \
+ || defined(__aarch64__)
+#define _MUM_UNALIGNED_ACCESS 1
+#else
+#define _MUM_UNALIGNED_ACCESS 0
+#endif
+#endif
+
+/* When we need an aligned access to data being hashed we move part of
+ the unaligned data to an aligned block of given size and then
+ process it, repeating processing the data by the block. */
+#ifndef _MUM_BLOCK_LEN
+#define _MUM_BLOCK_LEN 1024
+#endif
+
+#if _MUM_BLOCK_LEN < 8
+#error "too small block length"
+#endif
+
+static inline uint64_t
+#if defined(__x86_64__)
+_MUM_TARGET("inline-all-stringops")
+#endif
+_mum_hash_default (const void *key, size_t len, uint64_t seed) {
+ uint64_t result;
+ const unsigned char *str = (const unsigned char *) key;
+ size_t block_len;
+ uint64_t buf[_MUM_BLOCK_LEN / sizeof (uint64_t)];
+
+ result = seed + len;
+ if (_MUM_UNALIGNED_ACCESS || ((size_t) str & 0x7) == 0)
+ result = _mum_hash_aligned (result, key, len);
+ else {
+ while (len != 0) {
+ block_len = len < _MUM_BLOCK_LEN ? len : _MUM_BLOCK_LEN;
+ memmove (buf, str, block_len);
+ result = _mum_hash_aligned (result, buf, block_len);
+ len -= block_len;
+ str += block_len;
+ }
+ }
+ return _mum_final (result);
+}
+
+static inline uint64_t
+_mum_next_factor (void) {
+ uint64_t start = 0;
+ int i;
+
+ for (i = 0; i < 8; i++)
+ start = (start << 8) | rand() % 256;
+ return start;
+}
+
+/* ++++++++++++++++++++++++++ Interface functions: +++++++++++++++++++ */
+
+/* Set random multiplicators depending on SEED. */
+static inline void
+mum_hash_randomize (uint64_t seed) {
+ int i;
+
+ srand (seed);
+ _mum_hash_step_prime = _mum_next_factor ();
+ _mum_key_step_prime = _mum_next_factor ();
+ _mum_finish_prime1 = _mum_next_factor ();
+ _mum_finish_prime2 = _mum_next_factor ();
+ _mum_block_start_prime = _mum_next_factor ();
+ _mum_unroll_prime = _mum_next_factor ();
+ _mum_tail_prime = _mum_next_factor ();
+ for (i = 0; i < (int)(sizeof (_mum_primes) / sizeof (uint64_t)); i++)
+ _mum_primes[i] = _mum_next_factor ();
+}
+
+/* Start hashing data with SEED. Return the state. */
+static inline uint64_t
+mum_hash_init (uint64_t seed) {
+ return seed;
+}
+
+/* Process data KEY with the state H and return the updated state. */
+static inline uint64_t
+mum_hash_step (uint64_t h, uint64_t key)
+{
+ return _mum (h, _mum_hash_step_prime) ^ _mum (key, _mum_key_step_prime);
+}
+
+/* Return the result of hashing using the current state H. */
+static inline uint64_t
+mum_hash_finish (uint64_t h) {
+ return _mum_final (h);
+}
+
+/* Fast hashing of KEY with SEED. The hash is always the same for the
+ same key on any target. */
+static inline size_t
+mum_hash64 (uint64_t key, uint64_t seed) {
+ return mum_hash_finish (mum_hash_step (mum_hash_init (seed), key));
+}
+
+/* Hash data KEY of length LEN and SEED. The hash depends on the
+ target endianess and the unroll factor. */
+static inline uint64_t
+mum_hash (const void *key, size_t len, uint64_t seed) {
+#if defined(__x86_64__) && defined(_MUM_FRESH_GCC)
+ static int avx2_support = 0;
+
+ if (avx2_support > 0)
+ return _mum_hash_avx2 (key, len, seed);
+ else if (! avx2_support) {
+ __builtin_cpu_init ();
+ avx2_support = __builtin_cpu_supports ("avx2") ? 1 : -1;
+ if (avx2_support > 0)
+ return _mum_hash_avx2 (key, len, seed);
+ }
+#endif
+ return _mum_hash_default (key, len, seed);
+}
+
+#endif
diff --git a/src/ucl_hash.c b/src/ucl_hash.c
index c54fba7c90a5d..bdc7fb486fc4c 100644
--- a/src/ucl_hash.c
+++ b/src/ucl_hash.c
@@ -25,6 +25,7 @@
#include "ucl_hash.h"
#include "khash.h"
#include "kvec.h"
+#include "mum.h"
#include <time.h>
#include <limits.h>
@@ -99,20 +100,11 @@ static const unsigned char lc_map[256] = {
#define UCL64_BIT_HASH 1
#endif
-#ifdef UCL64_BIT_HASH
static inline uint32_t
ucl_hash_func (const ucl_object_t *o)
{
- return XXH64 (o->key, o->keylen, ucl_hash_seed ());
+ return mum_hash (o->key, o->keylen, ucl_hash_seed ());
}
-#else
-static inline uint32_t
-ucl_hash_func (const ucl_object_t *o)
-{
- return XXH32 (o->key, o->keylen, ucl_hash_seed ());
-}
-#endif
-
static inline int
ucl_hash_equal (const ucl_object_t *k1, const ucl_object_t *k2)
{
@@ -126,91 +118,60 @@ ucl_hash_equal (const ucl_object_t *k1, const ucl_object_t *k2)
KHASH_INIT (ucl_hash_node, const ucl_object_t *, struct ucl_hash_elt, 1,
ucl_hash_func, ucl_hash_equal)
-#ifdef UCL64_BIT_HASH
static inline uint32_t
ucl_hash_caseless_func (const ucl_object_t *o)
{
unsigned len = o->keylen;
- unsigned leftover = o->keylen % 4;
+ unsigned leftover = o->keylen % 8;
unsigned fp, i;
const uint8_t* s = (const uint8_t*)o->key;
union {
struct {
- unsigned char c1, c2, c3, c4;
+ unsigned char c1, c2, c3, c4, c5, c6, c7, c8;
} c;
- uint32_t pp;
+ uint64_t pp;
} u;
- XXH64_state_t st;
+ uint64_t r;
fp = len - leftover;
- XXH64_reset (&st, ucl_hash_seed ());
+ r = ucl_hash_seed ();
- for (i = 0; i != fp; i += 4) {
+ for (i = 0; i != fp; i += 8) {
u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
+ u.c.c5 = s[i + 4], u.c.c6 = s[i + 5], u.c.c7 = s[i + 6], u.c.c8 = s[i + 7];
u.c.c1 = lc_map[u.c.c1];
u.c.c2 = lc_map[u.c.c2];
u.c.c3 = lc_map[u.c.c3];
u.c.c4 = lc_map[u.c.c4];
- XXH64_update (&st, &u.pp, sizeof (u));
+ u.c.c1 = lc_map[u.c.c5];
+ u.c.c2 = lc_map[u.c.c6];
+ u.c.c3 = lc_map[u.c.c7];
+ u.c.c4 = lc_map[u.c.c8];
+ r = mum_hash_step (r, u.pp);
}
u.pp = 0;
switch (leftover) {
+ case 7:
+ u.c.c7 = lc_map[(unsigned char)s[i++]];
+ case 6:
+ u.c.c6 = lc_map[(unsigned char)s[i++]];
+ case 5:
+ u.c.c5 = lc_map[(unsigned char)s[i++]];
+ case 4:
+ u.c.c4 = lc_map[(unsigned char)s[i++]];
case 3:
u.c.c3 = lc_map[(unsigned char)s[i++]];
case 2:
u.c.c2 = lc_map[(unsigned char)s[i++]];
case 1:
u.c.c1 = lc_map[(unsigned char)s[i]];
- XXH64_update (&st, &u.pp, leftover);
+ r = mum_hash_step (r, u.pp);
break;
}
- return XXH64_digest (&st);
+ return mum_hash_finish (r);
}
-#else
-static inline uint32_t
-ucl_hash_caseless_func (const ucl_object_t *o)
-{
- unsigned len = o->keylen;
- unsigned leftover = o->keylen % 4;
- unsigned fp, i;
- const uint8_t* s = (const uint8_t*)o->key;
- union {
- struct {
- unsigned char c1, c2, c3, c4;
- } c;
- uint32_t pp;
- } u;
- XXH32_state_t st;
-
- fp = len - leftover;
- XXH32_reset (&st, ucl_hash_seed ());
-
- for (i = 0; i != fp; i += 4) {
- u.c.c1 = s[i], u.c.c2 = s[i + 1], u.c.c3 = s[i + 2], u.c.c4 = s[i + 3];
- u.c.c1 = lc_map[u.c.c1];
- u.c.c2 = lc_map[u.c.c2];
- u.c.c3 = lc_map[u.c.c3];
- u.c.c4 = lc_map[u.c.c4];
- XXH32_update (&st, &u.pp, sizeof (u));
- }
-
- u.pp = 0;
- switch (leftover) {
- case 3:
- u.c.c3 = lc_map[(unsigned char)s[i++]];
- case 2:
- u.c.c2 = lc_map[(unsigned char)s[i++]];
- case 1:
- u.c.c1 = lc_map[(unsigned char)s[i]];
- XXH32_update (&st, &u.pp, leftover);
- break;
- }
-
- return XXH32_digest (&st);
-}
-#endif
static inline int
ucl_hash_caseless_equal (const ucl_object_t *k1, const ucl_object_t *k2)
diff --git a/src/ucl_internal.h b/src/ucl_internal.h
index db8a12c408b1f..37871eb666df7 100644
--- a/src/ucl_internal.h
+++ b/src/ucl_internal.h
@@ -93,7 +93,6 @@
#include "uthash.h"
#include "ucl.h"
#include "ucl_hash.h"
-#include "xxhash.h"
#ifdef HAVE_OPENSSL
#include <openssl/evp.h>
diff --git a/src/ucl_msgpack.c b/src/ucl_msgpack.c
index 96f4809f892ff..bd7c3a1ce7850 100644
--- a/src/ucl_msgpack.c
+++ b/src/ucl_msgpack.c
@@ -1423,6 +1423,10 @@ ucl_msgpack_parse_int (struct ucl_parser *parser,
int16_t iv16;
int32_t iv32;
int64_t iv64;
+ uint16_t uiv16;
+ uint32_t uiv32;
+ uint64_t uiv64;
+
if (len > remain) {
return -1;
@@ -1455,7 +1459,9 @@ ucl_msgpack_parse_int (struct ucl_parser *parser,
len = 2;
break;
case msgpack_uint16:
- obj->value.iv = FROM_BE16 (*(uint16_t *)pos);
+ memcpy (&uiv16, pos, sizeof (uiv16));
+ uiv16 = FROM_BE16 (uiv16);
+ obj->value.iv = uiv16;
len = 2;
break;
case msgpack_int32:
@@ -1465,7 +1471,9 @@ ucl_msgpack_parse_int (struct ucl_parser *parser,
len = 4;
break;
case msgpack_uint32:
- obj->value.iv = FROM_BE32 (*(uint32_t *)pos);
+ memcpy(&uiv32, pos, sizeof(uiv32));
+ uiv32 = FROM_BE32(uiv32);
+ obj->value.iv = uiv32;
len = 4;
break;
case msgpack_int64:
@@ -1475,7 +1483,9 @@ ucl_msgpack_parse_int (struct ucl_parser *parser,
len = 8;
break;
case msgpack_uint64:
- obj->value.iv = FROM_BE64 (*(uint64_t *)pos);
+ memcpy(&uiv64, pos, sizeof(uiv64));
+ uiv64 = FROM_BE64(uiv64);
+ obj->value.iv = uiv64;
len = 8;
break;
default:
@@ -1498,6 +1508,7 @@ ucl_msgpack_parse_float (struct ucl_parser *parser,
uint32_t i;
float f;
} d;
+ uint64_t uiv64;
if (len > remain) {
return -1;
@@ -1507,13 +1518,16 @@ ucl_msgpack_parse_float (struct ucl_parser *parser,
switch (fmt) {
case msgpack_float32:
- d.i = FROM_BE32 (*(uint32_t *)pos);
+ memcpy(&d.i, pos, sizeof(d.i));
+ d.i = FROM_BE32(d.i);
/* XXX: can be slow */
obj->value.dv = d.f;
len = 4;
break;
case msgpack_float64:
- obj->value.iv = FROM_BE64 (*(uint64_t *)pos);
+ memcpy(&uiv64, pos, sizeof(uiv64));
+ uiv64 = FROM_BE64(uiv64);
+ obj->value.iv = uiv64;
len = 8;
break;
default:
diff --git a/src/ucl_parser.c b/src/ucl_parser.c
index 0aaa4dd988db6..fc7cea07febab 100644
--- a/src/ucl_parser.c
+++ b/src/ucl_parser.c
@@ -2597,12 +2597,7 @@ ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data,
return false;
}
- if (len == 0) {
- parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
- return true;
- }
-
- if (data == NULL) {
+ if (data == NULL && len != 0) {
ucl_create_err (&parser->err, "invalid chunk added");
return false;
}
@@ -2613,6 +2608,7 @@ ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data,
ucl_create_err (&parser->err, "cannot allocate chunk structure");
return false;
}
+
chunk->begin = data;
chunk->remain = len;
chunk->pos = chunk->begin;
@@ -2631,12 +2627,27 @@ ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data,
return false;
}
- switch (parse_type) {
- default:
- case UCL_PARSE_UCL:
- return ucl_state_machine (parser);
- case UCL_PARSE_MSGPACK:
- return ucl_parse_msgpack (parser);
+ if (len > 0) {
+ /* Need to parse something */
+ switch (parse_type) {
+ default:
+ case UCL_PARSE_UCL:
+ return ucl_state_machine (parser);
+ case UCL_PARSE_MSGPACK:
+ return ucl_parse_msgpack (parser);
+ }
+ }
+ else {
+ /* Just add empty chunk and go forward */
+ if (parser->top_obj == NULL) {
+ /*
+ * In case of empty object, create one to indicate that we've
+ * read something
+ */
+ parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
+ }
+
+ return true;
}
}
diff --git a/src/ucl_util.c b/src/ucl_util.c
index 261bf95f5b7db..900658bb6bb91 100644
--- a/src/ucl_util.c
+++ b/src/ucl_util.c
@@ -975,6 +975,7 @@ ucl_include_file_single (const unsigned char *data, size_t len,
if (params->soft_fail) {
return false;
}
+
return (!params->must_exist || false);
}
@@ -1172,11 +1173,14 @@ ucl_include_file_single (const unsigned char *data, size_t len,
res = ucl_parser_add_chunk_full (parser, buf, buflen, params->priority,
params->strat, params->parse_type);
- if (!res && !params->must_exist) {
- /* Free error */
- utstring_free (parser->err);
- parser->err = NULL;
- parser->state = UCL_STATE_AFTER_VALUE;
+
+ if (!res) {
+ if (!params->must_exist) {
+ /* Free error */
+ utstring_free (parser->err);
+ parser->err = NULL;
+ res = true;
+ }
}
/* Stop nesting the include, take 1 level off the stack */
@@ -1849,6 +1853,9 @@ ucl_parser_add_fd_priority (struct ucl_parser *parser, int fd,
fd, strerror (errno));
return false;
}
+ if (st.st_size == 0) {
+ return true;
+ }
if ((buf = ucl_mmap (NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0)) == MAP_FAILED) {
ucl_create_err (&parser->err, "cannot mmap fd %d: %s",
fd, strerror (errno));
diff --git a/src/xxhash.c b/src/xxhash.c
deleted file mode 100644
index 3473eb46a3b0a..0000000000000
--- a/src/xxhash.c
+++ /dev/null
@@ -1,941 +0,0 @@
-/*
-xxHash - Fast Hash algorithm
-Copyright (C) 2012-2014, Yann Collet.
-BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-* Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-You can contact the author at :
-- xxHash source repository : http://code.google.com/p/xxhash/
-- public discussion board : https://groups.google.com/forum/#!forum/lz4c
-*/
-
-
-//**************************************
-// Tuning parameters
-//**************************************
-// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
-// For others CPU, the compiler will be more cautious, and insert extra code to ensure aligned access is respected.
-// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance.
-// You can also enable this parameter if you know your input data will always be aligned (boundaries of 4, for U32).
-#if defined(__ARM_FEATURE_UNALIGNED) || defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
-# define XXH_USE_UNALIGNED_ACCESS 1
-#endif
-
-// XXH_ACCEPT_NULL_INPUT_POINTER :
-// If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
-// When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
-// This option has a very small performance cost (only measurable on small inputs).
-// By default, this option is disabled. To enable it, uncomment below define :
-// #define XXH_ACCEPT_NULL_INPUT_POINTER 1
-
-// XXH_FORCE_NATIVE_FORMAT :
-// By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
-// Results are therefore identical for little-endian and big-endian CPU.
-// This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
-// Should endian-independance be of no importance for your application, you may set the #define below to 1.
-// It will improve speed for Big-endian CPU.
-// This option has no impact on Little_Endian CPU.
-#define XXH_FORCE_NATIVE_FORMAT 0
-
-//**************************************
-// Compiler Specific Options
-//**************************************
-// Disable some Visual warning messages
-#ifdef _MSC_VER // Visual Studio
-# pragma warning(disable : 4127) // disable: C4127: conditional expression is constant
-#endif
-
-#ifdef _MSC_VER // Visual Studio
-# define FORCE_INLINE static __forceinline
-#else
-# ifdef __GNUC__
-# define FORCE_INLINE static inline __attribute__((always_inline))
-# else
-# define FORCE_INLINE static inline
-# endif
-#endif
-
-//**************************************
-// Includes & Memory related functions
-//**************************************
-#include "xxhash.h"
-// Modify the local functions below should you wish to use some other memory routines
-// for malloc(), free()
-#include <stdlib.h>
-static void* XXH_malloc(size_t s) { return malloc(s); }
-static void XXH_free (void* p) { free(p); }
-// for memcpy()
-#include <string.h>
-static void* XXH_memcpy(void* dest, const void* src, size_t size)
-{
- return memcpy(dest,src,size);
-}
-
-
-//**************************************
-// Basic Types
-//**************************************
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
-# include <stdint.h>
-typedef uint8_t BYTE;
-typedef uint16_t U16;
-typedef uint32_t U32;
-typedef int32_t S32;
-typedef uint64_t U64;
-#else
-typedef unsigned char BYTE;
-typedef unsigned short U16;
-typedef unsigned int U32;
-typedef signed int S32;
-typedef uint64_t U64;
-#endif
-
-#if defined(__GNUC__) && !defined(XXH_USE_UNALIGNED_ACCESS)
-# define _PACKED __attribute__ ((packed))
-#else
-# define _PACKED
-#endif
-
-#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
-# ifdef __IBMC__
-# pragma pack(1)
-# else
-# pragma pack(push, 1)
-# endif
-#endif
-
-typedef struct _U32_S
-{
- U32 v;
-} _PACKED U32_S;
-typedef struct _U64_S
-{
- U64 v;
-} _PACKED U64_S;
-
-#if !defined(XXH_USE_UNALIGNED_ACCESS) && !defined(__GNUC__)
-# pragma pack(pop)
-#endif
-
-#define A32(x) (((U32_S *)(x))->v)
-#define A64(x) (((U64_S *)(x))->v)
-
-
-//***************************************
-// Compiler-specific Functions and Macros
-//***************************************
-#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-
-// Note : although _rotl exists for minGW (GCC under windows), performance seems poor
-#if defined(_MSC_VER)
-# define XXH_rotl32(x,r) _rotl(x,r)
-# define XXH_rotl64(x,r) _rotl64(x,r)
-#else
-# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
-# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
-#endif
-
-#if defined(_MSC_VER) // Visual Studio
-# define XXH_swap32 _byteswap_ulong
-# define XXH_swap64 _byteswap_uint64
-#elif GCC_VERSION >= 403 || defined(__clang__)
-# define XXH_swap32 __builtin_bswap32
-# define XXH_swap64 __builtin_bswap64
-#else
-static inline U32 XXH_swap32 (U32 x)
-{
- return ((x << 24) & 0xff000000 ) |
- ((x << 8) & 0x00ff0000 ) |
- ((x >> 8) & 0x0000ff00 ) |
- ((x >> 24) & 0x000000ff );
-}
-static inline U64 XXH_swap64 (U64 x)
-{
- return ((x << 56) & 0xff00000000000000ULL) |
- ((x << 40) & 0x00ff000000000000ULL) |
- ((x << 24) & 0x0000ff0000000000ULL) |
- ((x << 8) & 0x000000ff00000000ULL) |
- ((x >> 8) & 0x00000000ff000000ULL) |
- ((x >> 24) & 0x0000000000ff0000ULL) |
- ((x >> 40) & 0x000000000000ff00ULL) |
- ((x >> 56) & 0x00000000000000ffULL);
-}
-#endif
-
-
-//**************************************
-// Constants
-//**************************************
-#define PRIME32_1 2654435761U
-#define PRIME32_2 2246822519U
-#define PRIME32_3 3266489917U
-#define PRIME32_4 668265263U
-#define PRIME32_5 374761393U
-
-#define PRIME64_1 11400714785074694791ULL
-#define PRIME64_2 14029467366897019727ULL
-#define PRIME64_3 1609587929392839161ULL
-#define PRIME64_4 9650029242287828579ULL
-#define PRIME64_5 2870177450012600261ULL
-
-//**************************************
-// Architecture Macros
-//**************************************
-typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
-#ifndef XXH_CPU_LITTLE_ENDIAN // It is possible to define XXH_CPU_LITTLE_ENDIAN externally, for example using a compiler switch
-static const int one = 1;
-# define XXH_CPU_LITTLE_ENDIAN (*(char*)(&one))
-#endif
-
-
-//**************************************
-// Macros
-//**************************************
-#define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(!!(c)) }; } // use only *after* variable declarations
-
-
-//****************************
-// Memory reads
-//****************************
-typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
-
-FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
-{
- if (align==XXH_unaligned)
- return endian==XXH_littleEndian ? A32(ptr) : XXH_swap32(A32(ptr));
- else
- return endian==XXH_littleEndian ? *(U32*)ptr : XXH_swap32(*(U32*)ptr);
-}
-
-FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
-{
- return XXH_readLE32_align(ptr, endian, XXH_unaligned);
-}
-
-FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
-{
- if (align==XXH_unaligned)
- return endian==XXH_littleEndian ? A64(ptr) : XXH_swap64(A64(ptr));
- else
- return endian==XXH_littleEndian ? *(U64*)ptr : XXH_swap64(*(U64*)ptr);
-}
-
-FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
-{
- return XXH_readLE64_align(ptr, endian, XXH_unaligned);
-}
-
-
-//****************************
-// Simple Hash Functions
-//****************************
-FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
-{
- const BYTE* p = (const BYTE*)input;
- const BYTE* bEnd = p + len;
- U32 h32;
-#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
-
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
- if (p==NULL)
- {
- len=0;
- bEnd=p=(const BYTE*)(size_t)16;
- }
-#endif
-
- if (len>=16)
- {
- const BYTE* const limit = bEnd - 16;
- U32 v1 = seed + PRIME32_1 + PRIME32_2;
- U32 v2 = seed + PRIME32_2;
- U32 v3 = seed + 0;
- U32 v4 = seed - PRIME32_1;
-
- do
- {
- v1 += XXH_get32bits(p) * PRIME32_2;
- v1 = XXH_rotl32(v1, 13);
- v1 *= PRIME32_1;
- p+=4;
- v2 += XXH_get32bits(p) * PRIME32_2;
- v2 = XXH_rotl32(v2, 13);
- v2 *= PRIME32_1;
- p+=4;
- v3 += XXH_get32bits(p) * PRIME32_2;
- v3 = XXH_rotl32(v3, 13);
- v3 *= PRIME32_1;
- p+=4;
- v4 += XXH_get32bits(p) * PRIME32_2;
- v4 = XXH_rotl32(v4, 13);
- v4 *= PRIME32_1;
- p+=4;
- }
- while (p<=limit);
-
- h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
- }
- else
- {
- h32 = seed + PRIME32_5;
- }
-
- h32 += (U32) len;
-
- while (p+4<=bEnd)
- {
- h32 += XXH_get32bits(p) * PRIME32_3;
- h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
- p+=4;
- }
-
- while (p<bEnd)
- {
- h32 += (*p) * PRIME32_5;
- h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
- p++;
- }
-
- h32 ^= h32 >> 15;
- h32 *= PRIME32_2;
- h32 ^= h32 >> 13;
- h32 *= PRIME32_3;
- h32 ^= h32 >> 16;
-
- return h32;
-}
-
-
-unsigned int XXH32 (const void* input, size_t len, unsigned seed)
-{
-#if 0
- // Simple version, good for code maintenance, but unfortunately slow for small inputs
- XXH32_state_t state;
- XXH32_reset(&state, seed);
- XXH32_update(&state, input, len);
- return XXH32_digest(&state);
-#else
- XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
-# if !defined(XXH_USE_UNALIGNED_ACCESS)
- if ((((size_t)input) & 3) == 0) // Input is aligned, let's leverage the speed advantage
- {
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
- else
- return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
- }
-# endif
-
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
- else
- return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
-#endif
-}
-
-FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
-{
- const BYTE* p = (const BYTE*)input;
- const BYTE* bEnd = p + len;
- U64 h64;
-#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
-
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
- if (p==NULL)
- {
- len=0;
- bEnd=p=(const BYTE*)(size_t)32;
- }
-#endif
-
- if (len>=32)
- {
- const BYTE* const limit = bEnd - 32;
- U64 v1 = seed + PRIME64_1 + PRIME64_2;
- U64 v2 = seed + PRIME64_2;
- U64 v3 = seed + 0;
- U64 v4 = seed - PRIME64_1;
-
- do
- {
- v1 += XXH_get64bits(p) * PRIME64_2;
- p+=8;
- v1 = XXH_rotl64(v1, 31);
- v1 *= PRIME64_1;
- v2 += XXH_get64bits(p) * PRIME64_2;
- p+=8;
- v2 = XXH_rotl64(v2, 31);
- v2 *= PRIME64_1;
- v3 += XXH_get64bits(p) * PRIME64_2;
- p+=8;
- v3 = XXH_rotl64(v3, 31);
- v3 *= PRIME64_1;
- v4 += XXH_get64bits(p) * PRIME64_2;
- p+=8;
- v4 = XXH_rotl64(v4, 31);
- v4 *= PRIME64_1;
- }
- while (p<=limit);
-
- h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
-
- v1 *= PRIME64_2;
- v1 = XXH_rotl64(v1, 31);
- v1 *= PRIME64_1;
- h64 ^= v1;
- h64 = h64 * PRIME64_1 + PRIME64_4;
-
- v2 *= PRIME64_2;
- v2 = XXH_rotl64(v2, 31);
- v2 *= PRIME64_1;
- h64 ^= v2;
- h64 = h64 * PRIME64_1 + PRIME64_4;
-
- v3 *= PRIME64_2;
- v3 = XXH_rotl64(v3, 31);
- v3 *= PRIME64_1;
- h64 ^= v3;
- h64 = h64 * PRIME64_1 + PRIME64_4;
-
- v4 *= PRIME64_2;
- v4 = XXH_rotl64(v4, 31);
- v4 *= PRIME64_1;
- h64 ^= v4;
- h64 = h64 * PRIME64_1 + PRIME64_4;
- }
- else
- {
- h64 = seed + PRIME64_5;
- }
-
- h64 += (U64) len;
-
- while (p+8<=bEnd)
- {
- U64 k1 = XXH_get64bits(p);
- k1 *= PRIME64_2;
- k1 = XXH_rotl64(k1,31);
- k1 *= PRIME64_1;
- h64 ^= k1;
- h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
- p+=8;
- }
-
- if (p+4<=bEnd)
- {
- h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
- h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
- p+=4;
- }
-
- while (p<bEnd)
- {
- h64 ^= (*p) * PRIME64_5;
- h64 = XXH_rotl64(h64, 11) * PRIME64_1;
- p++;
- }
-
- h64 ^= h64 >> 33;
- h64 *= PRIME64_2;
- h64 ^= h64 >> 29;
- h64 *= PRIME64_3;
- h64 ^= h64 >> 32;
-
- return h64;
-}
-
-
-uint64_t XXH64 (const void* input, size_t len, uint64_t seed)
-{
-#if 0
- // Simple version, good for code maintenance, but unfortunately slow for small inputs
- XXH64_state_t state;
- XXH64_reset(&state, seed);
- XXH64_update(&state, input, len);
- return XXH64_digest(&state);
-#else
- XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
-# if !defined(XXH_USE_UNALIGNED_ACCESS)
- if ((((size_t)input) & 7)==0) // Input is aligned, let's leverage the speed advantage
- {
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
- else
- return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
- }
-# endif
-
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
- else
- return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
-#endif
-}
-
-/****************************************************
- * Advanced Hash Functions
-****************************************************/
-
-/*** Allocation ***/
-typedef struct
-{
- U64 total_len;
- U32 seed;
- U32 v1;
- U32 v2;
- U32 v3;
- U32 v4;
- U32 mem32[4]; /* defined as U32 for alignment */
- U32 memsize;
-} XXH_istate32_t;
-
-typedef struct
-{
- U64 total_len;
- U64 seed;
- U64 v1;
- U64 v2;
- U64 v3;
- U64 v4;
- U64 mem64[4]; /* defined as U64 for alignment */
- U32 memsize;
-} XXH_istate64_t;
-
-
-XXH32_state_t* XXH32_createState(void)
-{
- XXH_STATIC_ASSERT(sizeof(XXH32_state_t) >= sizeof(XXH_istate32_t)); // A compilation error here means XXH32_state_t is not large enough
- return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
-}
-
-void* XXH32_init (unsigned seed)
-{
- XXH32_state_t *st = XXH32_createState();
- XXH32_reset(st, seed);
-
- return st;
-}
-
-XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
-{
- XXH_free(statePtr);
- return XXH_OK;
-};
-
-XXH64_state_t* XXH64_createState(void)
-{
- XXH_STATIC_ASSERT(sizeof(XXH64_state_t) >= sizeof(XXH_istate64_t)); // A compilation error here means XXH64_state_t is not large enough
- return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
-}
-XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
-{
- XXH_free(statePtr);
- return XXH_OK;
-};
-
-
-/*** Hash feed ***/
-
-XXH_errorcode XXH32_reset(XXH32_state_t* state_in, U32 seed)
-{
- XXH_istate32_t* state = (XXH_istate32_t*) state_in;
- state->seed = seed;
- state->v1 = seed + PRIME32_1 + PRIME32_2;
- state->v2 = seed + PRIME32_2;
- state->v3 = seed + 0;
- state->v4 = seed - PRIME32_1;
- state->total_len = 0;
- state->memsize = 0;
- return XXH_OK;
-}
-
-XXH_errorcode XXH64_reset(XXH64_state_t* state_in, uint64_t seed)
-{
- XXH_istate64_t* state = (XXH_istate64_t*) state_in;
- state->seed = seed;
- state->v1 = seed + PRIME64_1 + PRIME64_2;
- state->v2 = seed + PRIME64_2;
- state->v3 = seed + 0;
- state->v4 = seed - PRIME64_1;
- state->total_len = 0;
- state->memsize = 0;
- return XXH_OK;
-}
-
-
-FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
-{
- XXH_istate32_t* state = (XXH_istate32_t *) state_in;
- const BYTE* p = (const BYTE*)input;
- const BYTE* const bEnd = p + len;
-
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
- if (input==NULL) return XXH_ERROR;
-#endif
-
- state->total_len += len;
-
- if (state->memsize + len < 16) // fill in tmp buffer
- {
- XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
- state->memsize += (U32)len;
- return XXH_OK;
- }
-
- if (state->memsize) // some data left from previous update
- {
- XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
- {
- const U32* p32 = state->mem32;
- state->v1 += XXH_readLE32(p32, endian) * PRIME32_2;
- state->v1 = XXH_rotl32(state->v1, 13);
- state->v1 *= PRIME32_1;
- p32++;
- state->v2 += XXH_readLE32(p32, endian) * PRIME32_2;
- state->v2 = XXH_rotl32(state->v2, 13);
- state->v2 *= PRIME32_1;
- p32++;
- state->v3 += XXH_readLE32(p32, endian) * PRIME32_2;
- state->v3 = XXH_rotl32(state->v3, 13);
- state->v3 *= PRIME32_1;
- p32++;
- state->v4 += XXH_readLE32(p32, endian) * PRIME32_2;
- state->v4 = XXH_rotl32(state->v4, 13);
- state->v4 *= PRIME32_1;
- p32++;
- }
- p += 16-state->memsize;
- state->memsize = 0;
- }
-
- if (p <= bEnd-16)
- {
- const BYTE* const limit = bEnd - 16;
- U32 v1 = state->v1;
- U32 v2 = state->v2;
- U32 v3 = state->v3;
- U32 v4 = state->v4;
-
- do
- {
- v1 += XXH_readLE32(p, endian) * PRIME32_2;
- v1 = XXH_rotl32(v1, 13);
- v1 *= PRIME32_1;
- p+=4;
- v2 += XXH_readLE32(p, endian) * PRIME32_2;
- v2 = XXH_rotl32(v2, 13);
- v2 *= PRIME32_1;
- p+=4;
- v3 += XXH_readLE32(p, endian) * PRIME32_2;
- v3 = XXH_rotl32(v3, 13);
- v3 *= PRIME32_1;
- p+=4;
- v4 += XXH_readLE32(p, endian) * PRIME32_2;
- v4 = XXH_rotl32(v4, 13);
- v4 *= PRIME32_1;
- p+=4;
- }
- while (p<=limit);
-
- state->v1 = v1;
- state->v2 = v2;
- state->v3 = v3;
- state->v4 = v4;
- }
-
- if (p < bEnd)
- {
- XXH_memcpy(state->mem32, p, bEnd-p);
- state->memsize = (int)(bEnd-p);
- }
-
- return XXH_OK;
-}
-
-XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
-{
- XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
- else
- return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
-}
-
-
-
-FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state_in, XXH_endianess endian)
-{
- XXH_istate32_t* state = (XXH_istate32_t*) state_in;
- const BYTE * p = (const BYTE*)state->mem32;
- BYTE* bEnd = (BYTE*)(state->mem32) + state->memsize;
- U32 h32;
-
- if (state->total_len >= 16)
- {
- h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
- }
- else
- {
- h32 = state->seed + PRIME32_5;
- }
-
- h32 += (U32) state->total_len;
-
- while (p+4<=bEnd)
- {
- h32 += XXH_readLE32(p, endian) * PRIME32_3;
- h32 = XXH_rotl32(h32, 17) * PRIME32_4;
- p+=4;
- }
-
- while (p<bEnd)
- {
- h32 += (*p) * PRIME32_5;
- h32 = XXH_rotl32(h32, 11) * PRIME32_1;
- p++;
- }
-
- h32 ^= h32 >> 15;
- h32 *= PRIME32_2;
- h32 ^= h32 >> 13;
- h32 *= PRIME32_3;
- h32 ^= h32 >> 16;
-#if 0
- XXH32_freeState((XXH32_state_t *)state_in);
-#endif
- return h32;
-}
-
-
-U32 XXH32_digest (const XXH32_state_t* state_in)
-{
- XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH32_digest_endian(state_in, XXH_littleEndian);
- else
- return XXH32_digest_endian(state_in, XXH_bigEndian);
-}
-
-
-FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state_in, const void* input, size_t len, XXH_endianess endian)
-{
- XXH_istate64_t * state = (XXH_istate64_t *) state_in;
- const BYTE* p = (const BYTE*)input;
- const BYTE* const bEnd = p + len;
-
-#ifdef XXH_ACCEPT_NULL_INPUT_POINTER
- if (input==NULL) return XXH_ERROR;
-#endif
-
- state->total_len += len;
-
- if (state->memsize + len < 32) // fill in tmp buffer
- {
- XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
- state->memsize += (U32)len;
- return XXH_OK;
- }
-
- if (state->memsize) // some data left from previous update
- {
- XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
- {
- const U64* p64 = state->mem64;
- state->v1 += XXH_readLE64(p64, endian) * PRIME64_2;
- state->v1 = XXH_rotl64(state->v1, 31);
- state->v1 *= PRIME64_1;
- p64++;
- state->v2 += XXH_readLE64(p64, endian) * PRIME64_2;
- state->v2 = XXH_rotl64(state->v2, 31);
- state->v2 *= PRIME64_1;
- p64++;
- state->v3 += XXH_readLE64(p64, endian) * PRIME64_2;
- state->v3 = XXH_rotl64(state->v3, 31);
- state->v3 *= PRIME64_1;
- p64++;
- state->v4 += XXH_readLE64(p64, endian) * PRIME64_2;
- state->v4 = XXH_rotl64(state->v4, 31);
- state->v4 *= PRIME64_1;
- p64++;
- }
- p += 32-state->memsize;
- state->memsize = 0;
- }
-
- if (p+32 <= bEnd)
- {
- const BYTE* const limit = bEnd - 32;
- U64 v1 = state->v1;
- U64 v2 = state->v2;
- U64 v3 = state->v3;
- U64 v4 = state->v4;
-
- do
- {
- v1 += XXH_readLE64(p, endian) * PRIME64_2;
- v1 = XXH_rotl64(v1, 31);
- v1 *= PRIME64_1;
- p+=8;
- v2 += XXH_readLE64(p, endian) * PRIME64_2;
- v2 = XXH_rotl64(v2, 31);
- v2 *= PRIME64_1;
- p+=8;
- v3 += XXH_readLE64(p, endian) * PRIME64_2;
- v3 = XXH_rotl64(v3, 31);
- v3 *= PRIME64_1;
- p+=8;
- v4 += XXH_readLE64(p, endian) * PRIME64_2;
- v4 = XXH_rotl64(v4, 31);
- v4 *= PRIME64_1;
- p+=8;
- }
- while (p<=limit);
-
- state->v1 = v1;
- state->v2 = v2;
- state->v3 = v3;
- state->v4 = v4;
- }
-
- if (p < bEnd)
- {
- XXH_memcpy(state->mem64, p, bEnd-p);
- state->memsize = (int)(bEnd-p);
- }
-
- return XXH_OK;
-}
-
-XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
-{
- XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
- else
- return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
-}
-
-
-
-FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state_in, XXH_endianess endian)
-{
- XXH_istate64_t * state = (XXH_istate64_t *) state_in;
- const BYTE * p = (const BYTE*)state->mem64;
- BYTE* bEnd = (BYTE*)state->mem64 + state->memsize;
- U64 h64;
-
- if (state->total_len >= 32)
- {
- U64 v1 = state->v1;
- U64 v2 = state->v2;
- U64 v3 = state->v3;
- U64 v4 = state->v4;
-
- h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
-
- v1 *= PRIME64_2;
- v1 = XXH_rotl64(v1, 31);
- v1 *= PRIME64_1;
- h64 ^= v1;
- h64 = h64*PRIME64_1 + PRIME64_4;
-
- v2 *= PRIME64_2;
- v2 = XXH_rotl64(v2, 31);
- v2 *= PRIME64_1;
- h64 ^= v2;
- h64 = h64*PRIME64_1 + PRIME64_4;
-
- v3 *= PRIME64_2;
- v3 = XXH_rotl64(v3, 31);
- v3 *= PRIME64_1;
- h64 ^= v3;
- h64 = h64*PRIME64_1 + PRIME64_4;
-
- v4 *= PRIME64_2;
- v4 = XXH_rotl64(v4, 31);
- v4 *= PRIME64_1;
- h64 ^= v4;
- h64 = h64*PRIME64_1 + PRIME64_4;
- }
- else
- {
- h64 = state->seed + PRIME64_5;
- }
-
- h64 += (U64) state->total_len;
-
- while (p+8<=bEnd)
- {
- U64 k1 = XXH_readLE64(p, endian);
- k1 *= PRIME64_2;
- k1 = XXH_rotl64(k1,31);
- k1 *= PRIME64_1;
- h64 ^= k1;
- h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
- p+=8;
- }
-
- if (p+4<=bEnd)
- {
- h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
- h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
- p+=4;
- }
-
- while (p<bEnd)
- {
- h64 ^= (*p) * PRIME64_5;
- h64 = XXH_rotl64(h64, 11) * PRIME64_1;
- p++;
- }
-
- h64 ^= h64 >> 33;
- h64 *= PRIME64_2;
- h64 ^= h64 >> 29;
- h64 *= PRIME64_3;
- h64 ^= h64 >> 32;
-#if 0
- XXH64_freeState((XXH64_state_t *)state_in);
-#endif
- return h64;
-}
-
-
-uint64_t XXH64_digest (const XXH64_state_t* state_in)
-{
- XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH64_digest_endian(state_in, XXH_littleEndian);
- else
- return XXH64_digest_endian(state_in, XXH_bigEndian);
-}
-
-
diff --git a/src/xxhash.h b/src/xxhash.h
deleted file mode 100644
index 49b0239071e38..0000000000000
--- a/src/xxhash.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- xxHash - Extremely Fast Hash algorithm
- Header File
- Copyright (C) 2012-2014, Yann Collet.
- BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are
- met:
-
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following disclaimer
- in the documentation and/or other materials provided with the
- distribution.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- You can contact the author at :
- - xxHash source repository : http://code.google.com/p/xxhash/
-*/
-
-/* Notice extracted from xxHash homepage :
-
-xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
-It also successfully passes all tests from the SMHasher suite.
-
-Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
-
-Name Speed Q.Score Author
-xxHash 5.4 GB/s 10
-CrapWow 3.2 GB/s 2 Andrew
-MumurHash 3a 2.7 GB/s 10 Austin Appleby
-SpookyHash 2.0 GB/s 10 Bob Jenkins
-SBox 1.4 GB/s 9 Bret Mulvey
-Lookup3 1.2 GB/s 9 Bob Jenkins
-SuperFastHash 1.2 GB/s 1 Paul Hsieh
-CityHash64 1.05 GB/s 10 Pike & Alakuijala
-FNV 0.55 GB/s 5 Fowler, Noll, Vo
-CRC32 0.43 GB/s 9
-MD5-32 0.33 GB/s 10 Ronald L. Rivest
-SHA1-32 0.28 GB/s 10
-
-Q.Score is a measure of quality of the hash function.
-It depends on successfully passing SMHasher test set.
-10 is a perfect score.
-*/
-#ifndef LIBUCL_XXHASH_H
-#define LIBUCL_XXHASH_H
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-
-/*****************************
- Includes
-*****************************/
-#include <stddef.h> /* size_t */
-#include <stdint.h>
-
-
-/*****************************
- Type
-*****************************/
-typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
-
-
-
-/*****************************
- Simple Hash Functions
-*****************************/
-
-unsigned int XXH32 (const void* input, size_t length, unsigned seed);
-uint64_t XXH64 (const void* input, size_t length, uint64_t seed);
-
-/*
-XXH32() :
- Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
- The memory between input & input+length must be valid (allocated and read-accessible).
- "seed" can be used to alter the result predictably.
- This function successfully passes all SMHasher tests.
- Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
-XXH64() :
- Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
-*/
-
-
-
-/*****************************
- Advanced Hash Functions
-*****************************/
-typedef struct { int64_t ll[ 6]; } XXH32_state_t;
-typedef struct { int64_t ll[11]; } XXH64_state_t;
-
-/*
-These structures allow static allocation of XXH states.
-States must then be initialized using XXHnn_reset() before first use.
-
-If you prefer dynamic allocation, please refer to functions below.
-*/
-
-/*
- * !!!
- * Rspamd specific: we use the legacy method to free state when digest is obtained
- * !!!
- */
-void * XXH32_init (unsigned seed);
-XXH32_state_t* XXH32_createState(void);
-XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
-
-XXH64_state_t* XXH64_createState(void);
-XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
-
-/*
-These functions create and release memory for XXH state.
-States must then be initialized using XXHnn_reset() before first use.
-*/
-
-
-XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned seed);
-XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
-unsigned int XXH32_digest (const XXH32_state_t* statePtr);
-
-XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, uint64_t seed);
-XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
-uint64_t XXH64_digest (const XXH64_state_t* statePtr);
-
-/*
-These functions calculate the xxHash of an input provided in multiple smaller packets,
-as opposed to an input provided as a single block.
-
-XXH state space must first be allocated, using either static or dynamic method provided above.
-
-Start a new hash by initializing state with a seed, using XXHnn_reset().
-
-Then, feed the hash state by calling XXHnn_update() as many times as necessary.
-Obviously, input must be valid, meaning allocated and read accessible.
-The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
-
-Finally, you can produce a hash anytime, by using XXHnn_digest().
-This function returns the final nn-bits hash.
-You can nonetheless continue feeding the hash state with more input,
-and therefore get some new hashes, by calling again XXHnn_digest().
-
-When you are done, don't forget to free XXH state space, using typically XXHnn_freeState().
-*/
-
-
-#if defined (__cplusplus)
-}
-#endif
-
-#endif
diff --git a/uthash/uthash.h b/uthash/uthash.h
index 36b1cf46a4b4c..9ed8bea4db4c7 100644
--- a/uthash/uthash.h
+++ b/uthash/uthash.h
@@ -22,12 +22,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef UTHASH_H
-#define UTHASH_H
+#define UTHASH_H
#include <string.h> /* memcmp,strlen */
#include <stddef.h> /* ptrdiff_t */
#include <stdlib.h> /* exit() */
-#include "xxhash.h"
+#include "mum.h"
/* These macros use decltype or the earlier __typeof GNU extension.
As decltype is only available in newer compilers (VS2010 or gcc 4.3+
@@ -50,7 +50,7 @@ do {
char **_da_dst = (char**)(&(dst)); \
*_da_dst = (char*)(src); \
} while(0)
-#else
+#else
#define DECLTYPE_ASSIGN(dst,src) \
do { \
(dst) = DECLTYPE(dst)(src); \
@@ -115,12 +115,12 @@ do {
if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \
memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \
(tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \
-} while (0)
+} while (0)
#define HASH_BLOOM_FREE(tbl) \
do { \
uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \
-} while (0)
+} while (0)
#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8] |= (1U << ((idx)%8)))
#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8] & (1U << ((idx)%8)))
@@ -132,9 +132,9 @@ do {
HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1)))
#else
-#define HASH_BLOOM_MAKE(tbl)
-#define HASH_BLOOM_FREE(tbl)
-#define HASH_BLOOM_ADD(tbl,hashv)
+#define HASH_BLOOM_MAKE(tbl)
+#define HASH_BLOOM_FREE(tbl)
+#define HASH_BLOOM_ADD(tbl,hashv)
#define HASH_BLOOM_TEST(tbl,hashv) (1)
#define HASH_BLOOM_BYTELEN 0
#endif
@@ -170,7 +170,7 @@ do {
}; \
HASH_ADD(hh,head,fieldname,keylen_in,add); \
} while(0)
-
+
#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \
do { \
unsigned _ha_bkt; \
@@ -328,10 +328,10 @@ do {
} \
} while (0)
#else
-#define HASH_FSCK(hh,head)
+#define HASH_FSCK(hh,head)
#endif
-/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
+/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to
* the descriptor to which this macro is defined for tuning the hash function.
* The app can #include <unistd.h> to get the prototype for write(2). */
#ifdef HASH_EMIT_KEYS
@@ -341,12 +341,12 @@ do {
write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \
write(HASH_EMIT_KEYS, keyptr, fieldlen); \
} while (0)
-#else
-#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
+#else
+#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen)
#endif
/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */
-#ifdef HASH_FUNCTION
+#ifdef HASH_FUNCTION
#define HASH_FCN HASH_FUNCTION
#else
#define HASH_FCN HASH_XX
@@ -356,14 +356,14 @@ do {
#define HASH_XX(key,keylen,num_bkts,hashv,bkt) \
do { \
- hashv = XXH32 (key, keylen, XX_HASH_PRIME); \
+ hashv = mum_hash (key, keylen, XX_HASH_PRIME); \
bkt = (hashv) & (num_bkts-1); \
} while (0)
/* key comparison function; return 0 if keys equal */
-#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
+#define HASH_KEYCMP(a,b,len) memcmp(a,b,len)
/* iterate over items in a known bucket to find desired item */
#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,out) \
@@ -404,36 +404,36 @@ do {
} \
if (hh_del->hh_next) { \
hh_del->hh_next->hh_prev = hh_del->hh_prev; \
- }
+ }
/* Bucket expansion has the effect of doubling the number of buckets
* and redistributing the items into the new buckets. Ideally the
* items will distribute more or less evenly into the new buckets
* (the extent to which this is true is a measure of the quality of
- * the hash function as it applies to the key domain).
- *
+ * the hash function as it applies to the key domain).
+ *
* With the items distributed into more buckets, the chain length
* (item count) in each bucket is reduced. Thus by expanding buckets
- * the hash keeps a bound on the chain length. This bounded chain
+ * the hash keeps a bound on the chain length. This bounded chain
* length is the essence of how a hash provides constant time lookup.
- *
+ *
* The calculation of tbl->ideal_chain_maxlen below deserves some
* explanation. First, keep in mind that we're calculating the ideal
* maximum chain length based on the *new* (doubled) bucket count.
* In fractions this is just n/b (n=number of items,b=new num buckets).
- * Since the ideal chain length is an integer, we want to calculate
+ * Since the ideal chain length is an integer, we want to calculate
* ceil(n/b). We don't depend on floating point arithmetic in this
* hash, so to calculate ceil(n/b) with integers we could write
- *
+ *
* ceil(n/b) = (n/b) + ((n%b)?1:0)
- *
+ *
* and in fact a previous version of this hash did just that.
* But now we have improved things a bit by recognizing that b is
* always a power of two. We keep its base 2 log handy (call it lb),
* so now we can write this with a bit shift and logical AND:
- *
+ *
* ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0)
- *
+ *
*/
#define HASH_EXPAND_BUCKETS(tbl) \
do { \
@@ -485,7 +485,7 @@ do {
/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */
-/* Note that HASH_SORT assumes the hash handle name to be hh.
+/* Note that HASH_SORT assumes the hash handle name to be hh.
* HASH_SRT was added to allow the hash handle name to be passed in. */
#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn)
#define HASH_SRT(hh,head,cmpfcn) \
@@ -575,10 +575,10 @@ do {
} \
} while (0)
-/* This function selects items from one hash into another hash.
- * The end result is that the selected items have dual presence
- * in both hashes. There is no copy of the items made; rather
- * they are added into the new hash through a secondary hash
+/* This function selects items from one hash into another hash.
+ * The end result is that the selected items have dual presence
+ * in both hashes. There is no copy of the items made; rather
+ * they are added into the new hash through a secondary hash
* hash handle that must be present in the structure. */
#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \
do { \
@@ -638,7 +638,7 @@ do {
#ifdef NO_DECLTYPE
#define HASH_ITER(hh,head,el,tmp) \
for((el)=(head), (*(char**)(&(tmp)))=(char*)((head)?(head)->hh.next:NULL); \
- el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
+ el; (el)=(tmp),(*(char**)(&(tmp)))=(char*)((tmp)?(tmp)->hh.next:NULL))
#else
#define HASH_ITER(hh,head,el,tmp) \
for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL); \
@@ -646,7 +646,7 @@ for((el)=(head),(tmp)=DECLTYPE(el)((head)?(head)->hh.next:NULL);
#endif
/* obtain a count of items in the hash */
-#define HASH_COUNT(head) HASH_CNT(hh,head)
+#define HASH_COUNT(head) HASH_CNT(hh,head)
#define HASH_CNT(hh,head) ((head)?((head)->hh.tbl->num_items):0)
typedef struct UT_hash_bucket {
@@ -655,7 +655,7 @@ typedef struct UT_hash_bucket {
/* expand_mult is normally set to 0. In this situation, the max chain length
* threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If
- * the bucket's chain exceeds this length, bucket expansion is triggered).
+ * the bucket's chain exceeds this length, bucket expansion is triggered).
* However, setting expand_mult to a non-zero value delays bucket expansion
* (that would be triggered by additions to this particular bucket)
* until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH.
@@ -663,7 +663,7 @@ typedef struct UT_hash_bucket {
* multiplier is to reduce bucket expansions, since they are expensive, in
* situations where we know that a particular bucket tends to be overused.
* It is better to let its chain length grow to a longer yet-still-bounded
- * value, than to do an O(n) bucket expansion too often.
+ * value, than to do an O(n) bucket expansion too often.
*/
unsigned expand_mult;
@@ -689,7 +689,7 @@ typedef struct UT_hash_table {
* hash distribution; reaching them in a chain traversal takes >ideal steps */
unsigned nonideal_items;
- /* ineffective expands occur when a bucket doubling was performed, but
+ /* ineffective expands occur when a bucket doubling was performed, but
* afterward, more than half the items in the hash had nonideal chain
* positions. If this happens on two consecutive expansions we inhibit any
* further expansion, as it's not helping; this happens when the hash