author | Alberto Bertogli
<albertito@blitiri.com.ar> 2013-10-23 01:44:17 UTC |
committer | Alberto Bertogli
<albertito@blitiri.com.ar> 2013-10-29 22:43:24 UTC |
parent | f672b4dc9bb9d348ab014d660bd892df78e8a148 |
libfiu/Makefile | +1 | -1 |
libfiu/hash.c | +266 | -0 |
libfiu/hash.h | +20 | -0 |
libfiu/wtable.c | +118 | -70 |
diff --git a/libfiu/Makefile b/libfiu/Makefile index 3a543ba..4e0d01a 100644 --- a/libfiu/Makefile +++ b/libfiu/Makefile @@ -26,7 +26,7 @@ DESTDIR=$(PREFIX) INSTALL=install -OBJS = fiu.o fiu-rc.o backtrace.o wtable.o +OBJS = fiu.o fiu-rc.o backtrace.o wtable.o hash.o ifneq ($(V), 1) diff --git a/libfiu/hash.c b/libfiu/hash.c new file mode 100644 index 0000000..ee6b5c1 --- /dev/null +++ b/libfiu/hash.c @@ -0,0 +1,266 @@ + +/* + * Generic, simple hash table. + * + * Takes \0-terminated strings as keys, and void * as values. + * It is tuned for a small number of elements (< 1000). + */ + +#include <sys/types.h> /* for size_t */ +#include <stdint.h> /* for [u]int*_t */ +#include <stdbool.h> /* for bool */ +#include <stdlib.h> /* for malloc() */ +#include <string.h> /* for memcpy()/memcmp() */ +#include <stdio.h> /* snprintf() */ +#include "hash.h" + +/* MurmurHash2, by Austin Appleby. The one we use. + * It has been modify to fit into the coding style, to work on uint32_t + * instead of ints, and the seed was fixed to a random number because it's not + * an issue for us. The author placed it in the public domain, so it's ok to + * use it here. + * http://sites.google.com/site/murmurhash/ */ +static uint32_t murmurhash2(const char *key, size_t len) +{ + const uint32_t m = 0x5bd1e995; + const int r = 24; + const uint32_t seed = 0x34a4b627; + + // Initialize the hash to a 'random' value + uint32_t h = seed ^ len; + + // Mix 4 bytes at a time into the hash + while (len >= 4) { + uint32_t k = *(uint32_t *) key; + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + key += 4; + len -= 4; + } + + // Handle the last few bytes of the input array + switch (len) { + case 3: h ^= key[2] << 16; + case 2: h ^= key[1] << 8; + case 1: h ^= key[0]; + h *= m; + } + + // Do a few final mixes of the hash to ensure the last few + // bytes are well-incorporated. + h ^= h >> 13; + h *= m; + h ^= h >> 15; + + return h; +} + +enum used_as { + NEVER = 0, + IN_USE = 1, + REMOVED = 2, +}; + +struct entry { + char *key; + void *value; + enum used_as in_use; +}; + +struct hash { + struct entry *entries; + size_t table_size; + size_t nentries; + void (*destructor)(void *); +}; + + +/* Minimum table size. */ +#define MIN_SIZE 10 + +struct hash *hash_create(void (*destructor)(void *)) +{ + struct hash *h = malloc(sizeof(struct hash)); + if (h == NULL) + return NULL; + + h->entries = malloc(sizeof(struct entry) * MIN_SIZE); + if (h->entries == NULL) { + free(h); + return NULL; + } + + memset(h->entries, 0, sizeof(struct entry) * MIN_SIZE); + + h->table_size = MIN_SIZE; + h->nentries = 0; + h->destructor = destructor; + + return h; +} + +void hash_free(struct hash *h) +{ + int i; + struct entry *entry; + + for (i = 0; i < h->table_size; i++) { + entry = h->entries + i; + if (entry->in_use == IN_USE) { + h->destructor(entry->value); + free(entry->key); + } + } + + free(h->entries); + free(h); +} + +void *hash_get(struct hash *h, const char *key) +{ + size_t pos; + struct entry *entry; + + pos = murmurhash2(key, strlen(key)) % h->table_size; + + for (;;) { + entry = h->entries + pos; + if (entry->in_use == NEVER) { + /* We got to a entry never used, no match. */ + return NULL; + } else if (entry->in_use == IN_USE && + strcmp(key, entry->key) == 0) { + /* The key matches. */ + return entry->value; + } else { + /* We use linear probing for now */ + pos = (pos + 1) % h->table_size; + } + } + + return NULL; +} + +/* Internal version of hash_set. + * It uses the key as-is (it won't copy it), and it won't resize the array + * either. */ +static bool _hash_set(struct hash *h, char *key, void *value) +{ + size_t pos; + struct entry *entry; + + pos = murmurhash2(key, strlen(key)) % h->table_size; + + for (;;) { + entry = h->entries + pos; + if (entry->in_use != IN_USE) { + entry->in_use = IN_USE; + entry->key = key; + entry->value = value; + h->nentries++; + return true; + } else if (strcmp(key, entry->key) == 0) { + /* The key matches, override the value. */ + h->destructor(entry->value); + entry->value = value; + return true; + } else { + /* The key doesn't match, linear probing. */ + pos = (pos + 1) % h->table_size; + } + } + + return false; +} + +static bool resize_table(struct hash *h, size_t new_size) +{ + size_t i; + struct entry *old_entries, *e; + size_t old_size; + + if (new_size < MIN_SIZE) { + /* Do not resize below minimum size */ + return true; + } + + old_entries = h->entries; + old_size = h->table_size; + + h->entries = malloc(sizeof(struct entry) * new_size); + if (h->entries == NULL) + return false; + + memset(h->entries, 0, sizeof(struct entry) * new_size); + h->table_size = new_size; + h->nentries = 0; + + /* Insert the old entries into the new table. We use the internal + * version _hash_set() to avoid copying the keys again. */ + for (i = 0; i < old_size; i++) { + e = old_entries + i; + if (e->in_use == IN_USE) + _hash_set(h, e->key, e->value); + } + + free(old_entries); + + return true; +} + +bool hash_set(struct hash *h, const char *key, void *value) +{ + if ((float) h->nentries / h->table_size > 0.7) { + /* If we're over 70% full, grow the table by 30% */ + if (!resize_table(h, h->table_size * 1.3)) + return false; + } + + return _hash_set(h, strdup(key), value); +} + + +bool hash_del(struct hash *h, const char *key) +{ + size_t pos; + struct entry *entry; + + pos = murmurhash2(key, strlen(key)) % h->table_size; + + for (;;) { + entry = h->entries + pos; + if (entry->in_use == NEVER) { + /* We got to a never used key, not found. */ + return false; + } else if (entry->in_use == IN_USE && + strcmp(key, entry->key) == 0) { + /* The key matches, remove it. */ + free(entry->key); + h->destructor(entry->value); + entry->key = NULL; + entry->value = NULL; + entry->in_use = REMOVED; + break; + } else { + /* The key doesn't match, linear probing. */ + pos = (pos + 1) % h->table_size; + } + } + + if (h->table_size > MIN_SIZE && + (float) h->nentries / h->table_size < 0.5) { + /* If we're over 50% free, shrink. */ + if (!resize_table(h, h->table_size * 0.8)) + return false; + } + + return true; + +} + diff --git a/libfiu/hash.h b/libfiu/hash.h new file mode 100644 index 0000000..194105c --- /dev/null +++ b/libfiu/hash.h @@ -0,0 +1,20 @@ + +#ifndef _HASH_H +#define _HASH_H + +/* Generic hash table. See hash.c for more information. */ + +#include <sys/types.h> /* for size_t */ +#include <stdint.h> /* for int64_t */ + +typedef struct hash hash_t; + +hash_t *hash_create(void (*destructor)(void *)); +void hash_free(hash_t *h); + +void *hash_get(hash_t *h, const char *key); +bool hash_set(hash_t *h, const char *key, void *value); +bool hash_del(hash_t *h, const char *key); + +#endif + diff --git a/libfiu/wtable.c b/libfiu/wtable.c index 11dc764..2cce744 100644 --- a/libfiu/wtable.c +++ b/libfiu/wtable.c @@ -22,10 +22,13 @@ #include <stdlib.h> /* for malloc() */ #include <string.h> /* for memcpy()/memcmp() */ #include <stdio.h> /* snprintf() */ + +#include "hash.h" #include "wtable.h" -struct entry { +/* Entry of the wildcard array. */ +struct wentry { char *key; size_t key_len; @@ -34,9 +37,14 @@ struct entry { }; struct wtable { - struct entry *entries; - size_t table_size; - size_t nentries; + /* Final (non-wildcard) entries are kept in this hash. */ + hash_t *finals; + + /* Wildcarded entries are kept in this dynamic array. */ + struct wentry *wildcards; + size_t ws_size; + size_t ws_used_count; + void (*destructor)(void *); }; @@ -44,41 +52,55 @@ struct wtable { /* Minimum table size. */ #define MIN_SIZE 10 + struct wtable *wtable_create(void (*destructor)(void *)) { struct wtable *t = malloc(sizeof(struct wtable)); if (t == NULL) return NULL; - t->entries = malloc(sizeof(struct entry) * MIN_SIZE); - if (t->entries == NULL) { - free(t); - return NULL; - } + t->wildcards = NULL; + + t->finals = hash_create(destructor); + if (t->finals == NULL) + goto error; + + t->wildcards = malloc(sizeof(struct wentry) * MIN_SIZE); + if (t->wildcards == NULL) + goto error; - memset(t->entries, 0, sizeof(struct entry) * MIN_SIZE); + memset(t->wildcards, 0, sizeof(struct wentry) * MIN_SIZE); - t->table_size = MIN_SIZE; - t->nentries = 0; + t->ws_size = MIN_SIZE; + t->ws_used_count = 0; t->destructor = destructor; return t; + +error: + if (t->finals) + hash_free(t->finals); + free(t->wildcards); + free(t); + return NULL; } void wtable_free(struct wtable *t) { int i; - struct entry *entry; + struct wentry *entry; + + hash_free(t->finals); - for (i = 0; i < t->table_size; i++) { - entry = t->entries + i; + for (i = 0; i < t->ws_size; i++) { + entry = t->wildcards + i; if (entry->in_use) { t->destructor(entry->value); free(entry->key); } } - free(t->entries); + free(t->wildcards); free(t); } @@ -96,6 +118,14 @@ static unsigned int strlast(const char *s1, const char *s2) return i; } + +/* True if s is a wildcarded string, False otherwise. */ +static bool is_wildcard(const char *s, size_t len) +{ + /* Note we only support wildcards at the end of the string for now. */ + return s[len - 1] == '*'; +} + /* Checks if ws matches s. * * ws is a "wildcard string", which can end in a '*', in which case we compare @@ -109,7 +139,7 @@ static bool ws_matches_s( if (ws == NULL || s == NULL) return false; - if (exact || ws[ws_len - 1] != '*') { + if (exact || !is_wildcard(ws, ws_len)) { /* Exact match */ if (ws_len != s_len) return false; @@ -128,18 +158,18 @@ static bool ws_matches_s( * * Returns a pointer to the entry, or NULL if not found. */ -static struct entry *find_entry(struct wtable *t, const char *key, - bool exact, struct entry **first_free) +static struct wentry *wildcards_find_entry(struct wtable *t, const char *key, + bool exact, struct wentry **first_free) { size_t key_len; size_t pos; - struct entry *entry; + struct wentry *entry; bool found_free = false; key_len = strlen(key); - for (pos = 0; pos < t->table_size; pos++) { - entry = t->entries + pos; + for (pos = 0; pos < t->ws_size; pos++) { + entry = t->wildcards + pos; if (!entry->in_use) { if (!found_free && first_free) { *first_free = entry; @@ -156,27 +186,36 @@ static struct entry *find_entry(struct wtable *t, const char *key, /* No match. */ return NULL; - } void *wtable_get(struct wtable *t, const char *key) { - struct entry *entry = find_entry(t, key, false, NULL); + void *value; + struct wentry *entry; + /* Do an exact lookup first. */ + value = hash_get(t->finals, key); + if (value) + return value; + + /* And then a wildcards one. */ + entry = wildcards_find_entry(t, key, false, NULL); if (entry) return entry->value; + return NULL; } -/* Internal version of wtable_set(). +/* Set on our wildcards table. + * For internal use only. * It uses the key as-is (it won't copy it), and it won't resize the array * either. */ -static bool _wtable_set(struct wtable *t, char *key, void *value) +static bool wildcards_set(struct wtable *t, char *key, void *value) { - struct entry *entry, *first_free; + struct wentry *entry, *first_free; first_free = NULL; - entry = find_entry(t, key, true, &first_free); + entry = wildcards_find_entry(t, key, true, &first_free); if (entry) { /* Found a match, override the value. */ @@ -191,7 +230,7 @@ static bool _wtable_set(struct wtable *t, char *key, void *value) first_free->key_len = strlen(key); first_free->value = value; first_free->in_use = true; - t->nentries++; + t->ws_used_count++; return true; } @@ -203,7 +242,7 @@ static bool _wtable_set(struct wtable *t, char *key, void *value) static bool resize_table(struct wtable *t, size_t new_size) { size_t i; - struct entry *old_entries, *e; + struct wentry *old_wildcards, *e; size_t old_size; if (new_size < MIN_SIZE) { @@ -211,71 +250,80 @@ static bool resize_table(struct wtable *t, size_t new_size) return true; } - old_entries = t->entries; - old_size = t->table_size; + old_wildcards = t->wildcards; + old_size = t->ws_size; - t->entries = malloc(sizeof(struct entry) * new_size); - if (t->entries == NULL) + t->wildcards = malloc(sizeof(struct wentry) * new_size); + if (t->wildcards == NULL) return false; - memset(t->entries, 0, sizeof(struct entry) * new_size); - t->table_size = new_size; - t->nentries = 0; + memset(t->wildcards, 0, sizeof(struct wentry) * new_size); + t->ws_size = new_size; + t->ws_used_count = 0; - /* Insert the old entries into the new table. We use the internal - * version _wtable_set() to avoid copying the keys again. */ + /* Insert the old entries into the new table. */ for (i = 0; i < old_size; i++) { - e = old_entries + i; + e = old_wildcards + i; if (e->in_use) { - _wtable_set(t, e->key, e->value); + wildcards_set(t, e->key, e->value); } } + free(old_wildcards); + return true; } bool wtable_set(struct wtable *t, const char *key, void *value) { - if ((t->table_size - t->nentries) <= 1) { - /* If we only have one entry left, grow by 30%, plus one to - * make sure we always increase even if the percentage isn't - * enough. */ - if (!resize_table(t, t->table_size * 1.3 + 1)) - return false; - } + if (is_wildcard(key, strlen(key))) { + if ((t->ws_size - t->ws_used_count) <= 1) { + /* If we only have one entry left, grow by 30%, plus + * one to make sure we always increase even if the + * percentage isn't enough. */ + if (!resize_table(t, t->ws_size * 1.3 + 1)) + return false; + } - return _wtable_set(t, strdup(key), value); + return wildcards_set(t, strdup(key), value); + } else { + return hash_set(t->finals, key, value); + } } bool wtable_del(struct wtable *t, const char *key) { - struct entry *entry; + struct wentry *entry; - entry = find_entry(t, key, true, NULL); + if (is_wildcard(key, strlen(key))) { + entry = wildcards_find_entry(t, key, true, NULL); - if (!entry) { - /* Key not found. */ - return false; - } + if (!entry) { + /* Key not found. */ + return false; + } - /* Mark the entry as free. */ - free(entry->key); - entry->key = NULL; - entry->key_len = 0; - t->destructor(entry->value); - entry->value = NULL; - entry->in_use = false; + /* Mark the entry as free. */ + free(entry->key); + entry->key = NULL; + entry->key_len = 0; + t->destructor(entry->value); + entry->value = NULL; + entry->in_use = false; - t->nentries--; + t->ws_used_count--; - /* Shrink if the table is less than 60% occupied. */ - if (t->table_size > MIN_SIZE && - (float) t->nentries / t->table_size < 0.6) { - if (!resize_table(t, t->nentries + 3)) - return false; - } + /* Shrink if the table is less than 60% occupied. */ + if (t->ws_size > MIN_SIZE && + (float) t->ws_used_count / t->ws_size < 0.6) { + if (!resize_table(t, t->ws_used_count + 3)) + return false; + } - return true; + return true; + } else { + return hash_del(t->finals, key); + } }