git » libfiu » commit f4e937f

libfiu: Use a hash table for the final entries

author Alberto Bertogli
2013-10-23 01:44:17 UTC
committer Alberto Bertogli
2013-10-29 22:43:24 UTC
parent f672b4dc9bb9d348ab014d660bd892df78e8a148

libfiu: Use a hash table for the final entries

This patch makes wtable use a hash for the final entries.

This passes the tests, and improves the performance massively when the failure
points are non-wildcarded.

Signed-off-by: Alberto Bertogli <albertito@blitiri.com.ar>

libfiu/Makefile +1 -1
libfiu/hash.c +266 -0
libfiu/hash.h +20 -0
libfiu/wtable.c +118 -70

diff --git a/libfiu/Makefile b/libfiu/Makefile
index 3a543ba..4e0d01a 100644
--- a/libfiu/Makefile
+++ b/libfiu/Makefile
@@ -26,7 +26,7 @@ DESTDIR=$(PREFIX)
 INSTALL=install
 
 
-OBJS = fiu.o fiu-rc.o backtrace.o wtable.o
+OBJS = fiu.o fiu-rc.o backtrace.o wtable.o hash.o
 
 
 ifneq ($(V), 1)
diff --git a/libfiu/hash.c b/libfiu/hash.c
new file mode 100644
index 0000000..ee6b5c1
--- /dev/null
+++ b/libfiu/hash.c
@@ -0,0 +1,266 @@
+
+/*
+ * Generic, simple hash table.
+ *
+ * Takes \0-terminated strings as keys, and void * as values.
+ * It is tuned for a small number of elements (< 1000).
+ */
+
+#include <sys/types.h>		/* for size_t */
+#include <stdint.h>		/* for [u]int*_t */
+#include <stdbool.h>		/* for bool */
+#include <stdlib.h>		/* for malloc() */
+#include <string.h>		/* for memcpy()/memcmp() */
+#include <stdio.h>		/* snprintf() */
+#include "hash.h"
+
+/* MurmurHash2, by Austin Appleby. The one we use.
+ * It has been modify to fit into the coding style, to work on uint32_t
+ * instead of ints, and the seed was fixed to a random number because it's not
+ * an issue for us. The author placed it in the public domain, so it's ok to
+ * use it here.
+ * http://sites.google.com/site/murmurhash/ */
+static uint32_t murmurhash2(const char *key, size_t len)
+{
+	const uint32_t m = 0x5bd1e995;
+	const int r = 24;
+	const uint32_t seed = 0x34a4b627;
+
+	// Initialize the hash to a 'random' value
+	uint32_t h = seed ^ len;
+
+	// Mix 4 bytes at a time into the hash
+	while (len >= 4) {
+		uint32_t k = *(uint32_t *) key;
+
+		k *= m;
+		k ^= k >> r;
+		k *= m;
+
+		h *= m;
+		h ^= k;
+
+		key += 4;
+		len -= 4;
+	}
+
+	// Handle the last few bytes of the input array
+	switch (len) {
+		case 3: h ^= key[2] << 16;
+		case 2: h ^= key[1] << 8;
+		case 1: h ^= key[0];
+			h *= m;
+	}
+
+	// Do a few final mixes of the hash to ensure the last few
+	// bytes are well-incorporated.
+	h ^= h >> 13;
+	h *= m;
+	h ^= h >> 15;
+
+	return h;
+}
+
+enum used_as {
+	NEVER = 0,
+	IN_USE = 1,
+	REMOVED = 2,
+};
+
+struct entry {
+	char *key;
+	void *value;
+	enum used_as in_use;
+};
+
+struct hash {
+	struct entry *entries;
+	size_t table_size;
+	size_t nentries;
+	void (*destructor)(void *);
+};
+
+
+/* Minimum table size. */
+#define MIN_SIZE 10
+
+struct hash *hash_create(void (*destructor)(void *))
+{
+	struct hash *h = malloc(sizeof(struct hash));
+	if (h == NULL)
+		return NULL;
+
+	h->entries = malloc(sizeof(struct entry) * MIN_SIZE);
+	if (h->entries == NULL) {
+		free(h);
+		return NULL;
+	}
+
+	memset(h->entries, 0, sizeof(struct entry) * MIN_SIZE);
+
+	h->table_size = MIN_SIZE;
+	h->nentries = 0;
+	h->destructor = destructor;
+
+	return h;
+}
+
+void hash_free(struct hash *h)
+{
+	int i;
+	struct entry *entry;
+
+	for (i = 0; i < h->table_size; i++) {
+		entry = h->entries + i;
+		if (entry->in_use == IN_USE) {
+			h->destructor(entry->value);
+			free(entry->key);
+		}
+	}
+
+	free(h->entries);
+	free(h);
+}
+
+void *hash_get(struct hash *h, const char *key)
+{
+	size_t pos;
+	struct entry *entry;
+
+	pos = murmurhash2(key, strlen(key)) % h->table_size;
+
+	for (;;) {
+		entry = h->entries + pos;
+		if (entry->in_use == NEVER) {
+			/* We got to a entry never used, no match. */
+			return NULL;
+		} else if (entry->in_use == IN_USE &&
+				strcmp(key, entry->key) == 0) {
+			/* The key matches. */
+			return entry->value;
+		} else {
+			/* We use linear probing for now */
+			pos = (pos + 1) % h->table_size;
+		}
+	}
+
+	return NULL;
+}
+
+/* Internal version of hash_set.
+ * It uses the key as-is (it won't copy it), and it won't resize the array
+ * either. */
+static bool _hash_set(struct hash *h, char *key, void *value)
+{
+	size_t pos;
+	struct entry *entry;
+
+	pos = murmurhash2(key, strlen(key)) % h->table_size;
+
+	for (;;) {
+		entry = h->entries + pos;
+		if (entry->in_use != IN_USE) {
+			entry->in_use = IN_USE;
+			entry->key = key;
+			entry->value = value;
+			h->nentries++;
+			return true;
+		} else if (strcmp(key, entry->key) == 0) {
+			/* The key matches, override the value. */
+			h->destructor(entry->value);
+			entry->value = value;
+			return true;
+		} else {
+			/* The key doesn't match, linear probing. */
+			pos = (pos + 1) % h->table_size;
+		}
+	}
+
+	return false;
+}
+
+static bool resize_table(struct hash *h, size_t new_size)
+{
+	size_t i;
+	struct entry *old_entries, *e;
+	size_t old_size;
+
+	if (new_size < MIN_SIZE) {
+		/* Do not resize below minimum size */
+		return true;
+	}
+
+	old_entries = h->entries;
+	old_size = h->table_size;
+
+	h->entries = malloc(sizeof(struct entry) * new_size);
+	if (h->entries == NULL)
+		return false;
+
+	memset(h->entries, 0, sizeof(struct entry) * new_size);
+	h->table_size = new_size;
+	h->nentries = 0;
+
+	/* Insert the old entries into the new table. We use the internal
+	 * version _hash_set() to avoid copying the keys again. */
+	for (i = 0; i < old_size; i++) {
+		e = old_entries + i;
+		if (e->in_use == IN_USE)
+			_hash_set(h, e->key, e->value);
+	}
+
+	free(old_entries);
+
+	return true;
+}
+
+bool hash_set(struct hash *h, const char *key, void *value)
+{
+	if ((float) h->nentries / h->table_size > 0.7) {
+		/* If we're over 70% full, grow the table by 30% */
+		if (!resize_table(h, h->table_size * 1.3))
+			return false;
+	}
+
+	return _hash_set(h, strdup(key), value);
+}
+
+
+bool hash_del(struct hash *h, const char *key)
+{
+	size_t pos;
+	struct entry *entry;
+
+	pos = murmurhash2(key, strlen(key)) % h->table_size;
+
+	for (;;) {
+		entry = h->entries + pos;
+		if (entry->in_use == NEVER) {
+			/* We got to a never used key, not found. */
+			return false;
+		} else if (entry->in_use == IN_USE &&
+				strcmp(key, entry->key) == 0) {
+			/* The key matches, remove it. */
+			free(entry->key);
+			h->destructor(entry->value);
+			entry->key = NULL;
+			entry->value = NULL;
+			entry->in_use = REMOVED;
+			break;
+		} else {
+			/* The key doesn't match, linear probing. */
+			pos = (pos + 1) % h->table_size;
+		}
+	}
+
+	if (h->table_size > MIN_SIZE &&
+			(float) h->nentries / h->table_size < 0.5) {
+		/* If we're over 50% free, shrink. */
+		if (!resize_table(h, h->table_size * 0.8))
+			return false;
+	}
+
+	return true;
+
+}
+
diff --git a/libfiu/hash.h b/libfiu/hash.h
new file mode 100644
index 0000000..194105c
--- /dev/null
+++ b/libfiu/hash.h
@@ -0,0 +1,20 @@
+
+#ifndef _HASH_H
+#define _HASH_H
+
+/* Generic hash table. See hash.c for more information. */
+
+#include <sys/types.h>		/* for size_t */
+#include <stdint.h>		/* for int64_t */
+
+typedef struct hash hash_t;
+
+hash_t *hash_create(void (*destructor)(void *));
+void hash_free(hash_t *h);
+
+void *hash_get(hash_t *h, const char *key);
+bool hash_set(hash_t *h, const char *key, void *value);
+bool hash_del(hash_t *h, const char *key);
+
+#endif
+
diff --git a/libfiu/wtable.c b/libfiu/wtable.c
index 11dc764..2cce744 100644
--- a/libfiu/wtable.c
+++ b/libfiu/wtable.c
@@ -22,10 +22,13 @@
 #include <stdlib.h>		/* for malloc() */
 #include <string.h>		/* for memcpy()/memcmp() */
 #include <stdio.h>		/* snprintf() */
+
+#include "hash.h"
 #include "wtable.h"
 
 
-struct entry {
+/* Entry of the wildcard array. */
+struct wentry {
 	char *key;
 	size_t key_len;
 
@@ -34,9 +37,14 @@ struct entry {
 };
 
 struct wtable {
-	struct entry *entries;
-	size_t table_size;
-	size_t nentries;
+	/* Final (non-wildcard) entries are kept in this hash. */
+	hash_t *finals;
+
+	/* Wildcarded entries are kept in this dynamic array. */
+	struct wentry *wildcards;
+	size_t ws_size;
+	size_t ws_used_count;
+
 	void (*destructor)(void *);
 };
 
@@ -44,41 +52,55 @@ struct wtable {
 /* Minimum table size. */
 #define MIN_SIZE 10
 
+
 struct wtable *wtable_create(void (*destructor)(void *))
 {
 	struct wtable *t = malloc(sizeof(struct wtable));
 	if (t == NULL)
 		return NULL;
 
-	t->entries = malloc(sizeof(struct entry) * MIN_SIZE);
-	if (t->entries == NULL) {
-		free(t);
-		return NULL;
-	}
+	t->wildcards = NULL;
+
+	t->finals = hash_create(destructor);
+	if (t->finals == NULL)
+		goto error;
+
+	t->wildcards = malloc(sizeof(struct wentry) * MIN_SIZE);
+	if (t->wildcards == NULL)
+		goto error;
 
-	memset(t->entries, 0, sizeof(struct entry) * MIN_SIZE);
+	memset(t->wildcards, 0, sizeof(struct wentry) * MIN_SIZE);
 
-	t->table_size = MIN_SIZE;
-	t->nentries = 0;
+	t->ws_size = MIN_SIZE;
+	t->ws_used_count = 0;
 	t->destructor = destructor;
 
 	return t;
+
+error:
+	if (t->finals)
+		hash_free(t->finals);
+	free(t->wildcards);
+	free(t);
+	return NULL;
 }
 
 void wtable_free(struct wtable *t)
 {
 	int i;
-	struct entry *entry;
+	struct wentry *entry;
+
+	hash_free(t->finals);
 
-	for (i = 0; i < t->table_size; i++) {
-		entry = t->entries + i;
+	for (i = 0; i < t->ws_size; i++) {
+		entry = t->wildcards + i;
 		if (entry->in_use) {
 			t->destructor(entry->value);
 			free(entry->key);
 		}
 	}
 
-	free(t->entries);
+	free(t->wildcards);
 	free(t);
 }
 
@@ -96,6 +118,14 @@ static unsigned int strlast(const char *s1, const char *s2)
 	return i;
 }
 
+
+/* True if s is a wildcarded string, False otherwise. */
+static bool is_wildcard(const char *s, size_t len)
+{
+	/* Note we only support wildcards at the end of the string for now. */
+	return s[len - 1] == '*';
+}
+
 /* Checks if ws matches s.
  *
  * ws is a "wildcard string", which can end in a '*', in which case we compare
@@ -109,7 +139,7 @@ static bool ws_matches_s(
 	if (ws == NULL || s == NULL)
 		return false;
 
-	if (exact || ws[ws_len - 1] != '*') {
+	if (exact || !is_wildcard(ws, ws_len)) {
 		/* Exact match */
 		if (ws_len != s_len)
 			return false;
@@ -128,18 +158,18 @@ static bool ws_matches_s(
  *
  * Returns a pointer to the entry, or NULL if not found.
  */
-static struct entry *find_entry(struct wtable *t, const char *key,
-		bool exact, struct entry **first_free)
+static struct wentry *wildcards_find_entry(struct wtable *t, const char *key,
+		bool exact, struct wentry **first_free)
 {
 	size_t key_len;
 	size_t pos;
-	struct entry *entry;
+	struct wentry *entry;
 	bool found_free = false;
 
 	key_len = strlen(key);
 
-	for (pos = 0; pos < t->table_size; pos++) {
-		entry = t->entries + pos;
+	for (pos = 0; pos < t->ws_size; pos++) {
+		entry = t->wildcards + pos;
 		if (!entry->in_use) {
 			if (!found_free && first_free) {
 				*first_free = entry;
@@ -156,27 +186,36 @@ static struct entry *find_entry(struct wtable *t, const char *key,
 
 	/* No match. */
 	return NULL;
-
 }
 
 void *wtable_get(struct wtable *t, const char *key)
 {
-	struct entry *entry = find_entry(t, key, false, NULL);
+	void *value;
+	struct wentry *entry;
 
+	/* Do an exact lookup first. */
+	value = hash_get(t->finals, key);
+	if (value)
+		return value;
+
+	/* And then a wildcards one. */
+	entry = wildcards_find_entry(t, key, false, NULL);
 	if (entry)
 		return entry->value;
+
 	return NULL;
 }
 
-/* Internal version of wtable_set().
+/* Set on our wildcards table.
+ * For internal use only.
  * It uses the key as-is (it won't copy it), and it won't resize the array
  * either. */
-static bool _wtable_set(struct wtable *t, char *key, void *value)
+static bool wildcards_set(struct wtable *t, char *key, void *value)
 {
-	struct entry *entry, *first_free;
+	struct wentry *entry, *first_free;
 
 	first_free = NULL;
-	entry = find_entry(t, key, true, &first_free);
+	entry = wildcards_find_entry(t, key, true, &first_free);
 
 	if (entry) {
 		/* Found a match, override the value. */
@@ -191,7 +230,7 @@ static bool _wtable_set(struct wtable *t, char *key, void *value)
 		first_free->key_len = strlen(key);
 		first_free->value = value;
 		first_free->in_use = true;
-		t->nentries++;
+		t->ws_used_count++;
 		return true;
 	}
 
@@ -203,7 +242,7 @@ static bool _wtable_set(struct wtable *t, char *key, void *value)
 static bool resize_table(struct wtable *t, size_t new_size)
 {
 	size_t i;
-	struct entry *old_entries, *e;
+	struct wentry *old_wildcards, *e;
 	size_t old_size;
 
 	if (new_size < MIN_SIZE) {
@@ -211,71 +250,80 @@ static bool resize_table(struct wtable *t, size_t new_size)
 		return true;
 	}
 
-	old_entries = t->entries;
-	old_size = t->table_size;
+	old_wildcards = t->wildcards;
+	old_size = t->ws_size;
 
-	t->entries = malloc(sizeof(struct entry) * new_size);
-	if (t->entries == NULL)
+	t->wildcards = malloc(sizeof(struct wentry) * new_size);
+	if (t->wildcards == NULL)
 		return false;
 
-	memset(t->entries, 0, sizeof(struct entry) * new_size);
-	t->table_size = new_size;
-	t->nentries = 0;
+	memset(t->wildcards, 0, sizeof(struct wentry) * new_size);
+	t->ws_size = new_size;
+	t->ws_used_count = 0;
 
-	/* Insert the old entries into the new table. We use the internal
-	 * version _wtable_set() to avoid copying the keys again. */
+	/* Insert the old entries into the new table. */
 	for (i = 0; i < old_size; i++) {
-		e = old_entries + i;
+		e = old_wildcards + i;
 		if (e->in_use) {
-			_wtable_set(t, e->key, e->value);
+			wildcards_set(t, e->key, e->value);
 		}
 	}
 
+	free(old_wildcards);
+
 	return true;
 }
 
 bool wtable_set(struct wtable *t, const char *key, void *value)
 {
-	if ((t->table_size - t->nentries) <= 1) {
-		/* If we only have one entry left, grow by 30%, plus one to
-		 * make sure we always increase even if the percentage isn't
-		 * enough. */
-		if (!resize_table(t, t->table_size * 1.3 + 1))
-			return false;
-	}
+	if (is_wildcard(key, strlen(key))) {
+		if ((t->ws_size - t->ws_used_count) <= 1) {
+			/* If we only have one entry left, grow by 30%, plus
+			 * one to make sure we always increase even if the
+			 * percentage isn't enough. */
+			if (!resize_table(t, t->ws_size * 1.3 + 1))
+				return false;
+		}
 
-	return _wtable_set(t, strdup(key), value);
+		return wildcards_set(t, strdup(key), value);
+	} else {
+		return hash_set(t->finals, key, value);
+	}
 }
 
 
 bool wtable_del(struct wtable *t, const char *key)
 {
-	struct entry *entry;
+	struct wentry *entry;
 
-	entry = find_entry(t, key, true, NULL);
+	if (is_wildcard(key, strlen(key))) {
+		entry = wildcards_find_entry(t, key, true, NULL);
 
-	if (!entry) {
-		/* Key not found. */
-		return false;
-	}
+		if (!entry) {
+			/* Key not found. */
+			return false;
+		}
 
-	/* Mark the entry as free. */
-	free(entry->key);
-	entry->key = NULL;
-	entry->key_len = 0;
-	t->destructor(entry->value);
-	entry->value = NULL;
-	entry->in_use = false;
+		/* Mark the entry as free. */
+		free(entry->key);
+		entry->key = NULL;
+		entry->key_len = 0;
+		t->destructor(entry->value);
+		entry->value = NULL;
+		entry->in_use = false;
 
-	t->nentries--;
+		t->ws_used_count--;
 
-	/* Shrink if the table is less than 60% occupied. */
-	if (t->table_size > MIN_SIZE &&
-			(float) t->nentries / t->table_size < 0.6) {
-		if (!resize_table(t, t->nentries + 3))
-			return false;
-	}
+		/* Shrink if the table is less than 60% occupied. */
+		if (t->ws_size > MIN_SIZE &&
+				(float) t->ws_used_count / t->ws_size < 0.6) {
+			if (!resize_table(t, t->ws_used_count + 3))
+				return false;
+		}
 
-	return true;
+		return true;
+	} else {
+		return hash_del(t->finals, key);
+	}
 }