git » nmdb » commit cf06a74

diff --git a/nmdb/cache.c b/nmdb/cache.c
index e8fe449..5d1e2c8 100644
--- a/nmdb/cache.c
+++ b/nmdb/cache.c
@@ -10,6 +10,7 @@
 #include <stdlib.h>		/* for malloc() */
 #include <string.h>		/* for memcpy()/memcmp() */
 #include <stdio.h>		/* snprintf() */
+#include "hash.h"		/* hash() */
 #include "cache.h"
 
 
@@ -101,34 +102,6 @@ static void free_entry(struct cache_entry *e)
 	e->vsize = -1;
 }
 
-/*
- * The hash function used is the "One at a time" function, which seems simple,
- * fast and popular. Others for future consideration if speed becomes an issue
- * include:
- *  * FNV Hash (http://www.isthe.com/chongo/tech/comp/fnv/)
- *  * SuperFastHash (http://www.azillionmonkeys.com/qed/hash.html)
- *  * Judy dynamic arrays (http://judy.sf.net)
- *
- * A good comparison can be found at
- * http://eternallyconfuzzled.com/tuts/hashing.html#existing
- */
-
-static uint32_t hash(const unsigned char *key, const size_t ksize)
-{
-	uint32_t h = 0;
-	size_t i;
-
-	for (i = 0; i < ksize; i++) {
-		h += key[i];
-		h += (h << 10);
-		h ^= (h >> 6);
-	}
-	h += (h << 3);
-	h ^= (h >> 11);
-	h += (h << 15);
-	return h;
-}
-
 
 /* Looks up the given key in the chain. Returns NULL if not found, or a
  * pointer to the cache entry if it is. The chain can be empty. */
diff --git a/nmdb/hash.h b/nmdb/hash.h
new file mode 100644
index 0000000..706c5a6
--- /dev/null
+++ b/nmdb/hash.h
@@ -0,0 +1,164 @@
+
+#ifndef _HASH_H
+#define _HASH_H
+
+/*
+ * Hash function used in the cache.
+ *
+ * This is kept here instead of a .c file so the compiler can inline if it
+ * decides it's worth it.
+ *
+ * The hash function used is Austin Appleby's MurmurHash2. It used to be
+ * Jenkins's one-at-a-time, but this one is much faster. However, we keep
+ * the others that were tested for future comparison purposes.
+ */
+#define hash(k, s) murmurhash2(k, s)
+
+/* MurmurHash2, by Austin Appleby. The one we use.
+ * It has been modify to fit into the coding style, to work on uint32_t
+ * instead of ints, and the seed was fixed to a random number because it's not
+ * an issue for us. The author placed it in the public domain, so it's safe.
+ * http://sites.google.com/site/murmurhash/ */
+static uint32_t murmurhash2(const unsigned char *key, size_t len)
+{
+	const uint32_t m = 0x5bd1e995;
+	const int r = 24;
+	const uint32_t seed = 0x34a4b627;
+
+	// Initialize the hash to a 'random' value
+	uint32_t h = seed ^ len;
+
+	// Mix 4 bytes at a time into the hash
+	while (len >= 4) {
+		uint32_t k = *(uint32_t *) key;
+
+		k *= m;
+		k ^= k >> r;
+		k *= m;
+
+		h *= m;
+		h ^= k;
+
+		key += 4;
+		len -= 4;
+	}
+
+	// Handle the last few bytes of the input array
+	switch (len) {
+		case 3: h ^= key[2] << 16;
+		case 2: h ^= key[1] << 8;
+		case 1: h ^= key[0];
+			h *= m;
+	}
+
+	// Do a few final mixes of the hash to ensure the last few
+	// bytes are well-incorporated.
+	h ^= h >> 13;
+	h *= m;
+	h ^= h >> 15;
+
+	return h;
+}
+
+
+/* Unused functions, left for comparison purposes */
+#if 0
+
+/* Paul Hsieh's SuperFastHash, which is really fast, but a tad slower than
+ * MurmurHash2.
+ * http://www.azillionmonkeys.com/qed/hash.html */
+static uint32_t superfast(const unsigned char *data, size_t len)
+{
+	uint32_t h = len, tmp;
+	int rem;
+
+	/* this is the same as (*((const uint16_t *) (d))) on little endians,
+	 * but we keep the generic version since gcc compiles decent code for
+	 * both and there's no noticeable difference in performance */
+	#define get16bits(d) ( \
+		( ( (uint32_t) ( ((const uint8_t *)(d))[1] ) ) << 8 ) \
+		+ (uint32_t) ( ((const uint8_t *)(d))[0] ) )
+
+	if (len <= 0 || data == NULL)
+		return 0;
+
+	rem = len & 3;
+	len >>= 2;
+
+	/* Main loop */
+	for (; len > 0; len--) {
+		h += get16bits(data);
+		tmp = (get16bits(data+2) << 11) ^ h;
+		h = (h << 16) ^ tmp;
+		data += 2*sizeof (uint16_t);
+		h += h >> 11;
+	}
+
+	/* Handle end cases */
+	switch (rem) {
+		case 3: h += get16bits(data);
+			h ^= h << 16;
+			h ^= data[sizeof(uint16_t)] << 18;
+			h += h >> 11;
+			break;
+		case 2: h += get16bits(data);
+			h ^= h << 11;
+			h += h >> 17;
+			break;
+		case 1: h += *data;
+			h ^= h << 10;
+			h += h >> 1;
+	}
+
+	/* Force "avalanching" of final 127 bits */
+	h ^= h << 3;
+	h += h >> 5;
+	h ^= h << 4;
+	h += h >> 17;
+	h ^= h << 25;
+	h += h >> 6;
+
+	return h;
+
+	#undef get16bits
+}
+
+/* Jenkins' one-at-a-time hash, the one we used to use.
+ * http://en.wikipedia.org/wiki/Jenkins_hash_function */
+static uint32_t oneatatime(const unsigned char *key, const size_t ksize)
+{
+	uint32_t h = 0;
+	size_t i;
+
+	for (i = 0; i < ksize; i++) {
+		h += key[i];
+		h += (h << 10);
+		h ^= (h >> 6);
+	}
+	h += (h << 3);
+	h ^= (h >> 11);
+	h += (h << 15);
+	return h;
+}
+
+/* FNV 32 bit hash; slower than the rest.
+ * http://www.isthe.com/chongo/tech/comp/fnv/ */
+static uint32_t fnv_hash(const unsigned char *key, const size_t ksize)
+{
+	const unsigned char *end = key + ksize;
+	uint32_t hval;
+
+	hval = 0x811c9dc5;
+	while (key < end) {
+		hval += (hval<<1) + (hval<<4) + (hval<<7) +
+			(hval<<8) + (hval<<24);
+		hval ^= (uint32_t) *key++;
+	}
+
+	return hval;
+}
+
+#endif
+
+#endif
+
author	Alberto Bertogli <albertito@blitiri.com.ar> 2010-04-15 21:13:23 UTC
committer	Alberto Bertogli <albertito@blitiri.com.ar> 2010-04-15 22:12:36 UTC
parent	8b4ab9201633c17412bd96cabb5c25c311489628
nmdb/cache.c	+1	-28
nmdb/hash.h	+164	-0
git » nmdb » commit cf06a74

nmdb: Change the cache hash function to Austin Appleby's MurmurHash2