author | Alberto Bertogli
<albertito@blitiri.com.ar> 2010-04-15 21:13:23 UTC |
committer | Alberto Bertogli
<albertito@blitiri.com.ar> 2010-04-15 22:12:36 UTC |
parent | 8b4ab9201633c17412bd96cabb5c25c311489628 |
nmdb/cache.c | +1 | -28 |
nmdb/hash.h | +164 | -0 |
diff --git a/nmdb/cache.c b/nmdb/cache.c index e8fe449..5d1e2c8 100644 --- a/nmdb/cache.c +++ b/nmdb/cache.c @@ -10,6 +10,7 @@ #include <stdlib.h> /* for malloc() */ #include <string.h> /* for memcpy()/memcmp() */ #include <stdio.h> /* snprintf() */ +#include "hash.h" /* hash() */ #include "cache.h" @@ -101,34 +102,6 @@ static void free_entry(struct cache_entry *e) e->vsize = -1; } -/* - * The hash function used is the "One at a time" function, which seems simple, - * fast and popular. Others for future consideration if speed becomes an issue - * include: - * * FNV Hash (http://www.isthe.com/chongo/tech/comp/fnv/) - * * SuperFastHash (http://www.azillionmonkeys.com/qed/hash.html) - * * Judy dynamic arrays (http://judy.sf.net) - * - * A good comparison can be found at - * http://eternallyconfuzzled.com/tuts/hashing.html#existing - */ - -static uint32_t hash(const unsigned char *key, const size_t ksize) -{ - uint32_t h = 0; - size_t i; - - for (i = 0; i < ksize; i++) { - h += key[i]; - h += (h << 10); - h ^= (h >> 6); - } - h += (h << 3); - h ^= (h >> 11); - h += (h << 15); - return h; -} - /* Looks up the given key in the chain. Returns NULL if not found, or a * pointer to the cache entry if it is. The chain can be empty. */ diff --git a/nmdb/hash.h b/nmdb/hash.h new file mode 100644 index 0000000..706c5a6 --- /dev/null +++ b/nmdb/hash.h @@ -0,0 +1,164 @@ + +#ifndef _HASH_H +#define _HASH_H + +/* + * Hash function used in the cache. + * + * This is kept here instead of a .c file so the compiler can inline if it + * decides it's worth it. + * + * The hash function used is Austin Appleby's MurmurHash2. It used to be + * Jenkins's one-at-a-time, but this one is much faster. However, we keep + * the others that were tested for future comparison purposes. + */ +#define hash(k, s) murmurhash2(k, s) + +/* MurmurHash2, by Austin Appleby. The one we use. + * It has been modify to fit into the coding style, to work on uint32_t + * instead of ints, and the seed was fixed to a random number because it's not + * an issue for us. The author placed it in the public domain, so it's safe. + * http://sites.google.com/site/murmurhash/ */ +static uint32_t murmurhash2(const unsigned char *key, size_t len) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + const uint32_t seed = 0x34a4b627; + + // Initialize the hash to a 'random' value + uint32_t h = seed ^ len; + + // Mix 4 bytes at a time into the hash + while (len >= 4) { + uint32_t k = *(uint32_t *) key; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + key += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + switch (len) { + case 3: h ^= key[2] << 16; + case 2: h ^= key[1] << 8; + case 1: h ^= key[0]; + h *= m; + } + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + + +/* Unused functions, left for comparison purposes */ +#if 0 + +/* Paul Hsieh's SuperFastHash, which is really fast, but a tad slower than + * MurmurHash2. + * http://www.azillionmonkeys.com/qed/hash.html */ +static uint32_t superfast(const unsigned char *data, size_t len) +{ + uint32_t h = len, tmp; + int rem; + + /* this is the same as (*((const uint16_t *) (d))) on little endians, + * but we keep the generic version since gcc compiles decent code for + * both and there's no noticeable difference in performance */ + #define get16bits(d) ( \ + ( ( (uint32_t) ( ((const uint8_t *)(d))[1] ) ) << 8 ) \ + + (uint32_t) ( ((const uint8_t *)(d))[0] ) ) + + if (len <= 0 || data == NULL) + return 0; + + rem = len & 3; + len >>= 2; + + /* Main loop */ + for (; len > 0; len--) { + h += get16bits(data); + tmp = (get16bits(data+2) << 11) ^ h; + h = (h << 16) ^ tmp; + data += 2*sizeof (uint16_t); + h += h >> 11; + } + + /* Handle end cases */ + switch (rem) { + case 3: h += get16bits(data); + h ^= h << 16; + h ^= data[sizeof(uint16_t)] << 18; + h += h >> 11; + break; + case 2: h += get16bits(data); + h ^= h << 11; + h += h >> 17; + break; + case 1: h += *data; + h ^= h << 10; + h += h >> 1; + } + + /* Force "avalanching" of final 127 bits */ + h ^= h << 3; + h += h >> 5; + h ^= h << 4; + h += h >> 17; + h ^= h << 25; + h += h >> 6; + + return h; + + #undef get16bits +} + +/* Jenkins' one-at-a-time hash, the one we used to use. + * http://en.wikipedia.org/wiki/Jenkins_hash_function */ +static uint32_t oneatatime(const unsigned char *key, const size_t ksize) +{ + uint32_t h = 0; + size_t i; + + for (i = 0; i < ksize; i++) { + h += key[i]; + h += (h << 10); + h ^= (h >> 6); + } + h += (h << 3); + h ^= (h >> 11); + h += (h << 15); + return h; +} + +/* FNV 32 bit hash; slower than the rest. + * http://www.isthe.com/chongo/tech/comp/fnv/ */ +static uint32_t fnv_hash(const unsigned char *key, const size_t ksize) +{ + const unsigned char *end = key + ksize; + uint32_t hval; + + hval = 0x811c9dc5; + while (key < end) { + hval += (hval<<1) + (hval<<4) + (hval<<7) + + (hval<<8) + (hval<<24); + hval ^= (uint32_t) *key++; + } + + return hval; +} + +#endif + +#endif +