diff --git a/README.md b/README.md index 4fe7d14..1640b66 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ - # C Reusables Readme Home: https://github.com/mity/c-reusables @@ -39,6 +38,8 @@ The following rules apply: * `data/buffer.[hc]`: Simple growing buffer. It offers also a stack-like interface (push, pop operations) and array-like interface. + * `data/htable.[hc]`: Simple growing intrusive hash table. + * `data/list.h`: Intrusive double-linked and single-linked lists. * `data/rbtree.[hc]`: Intrusive red-black tree. @@ -59,7 +60,7 @@ The following rules apply: * `hash/crc32.[hc]`: 32-bit cyclic redundancy check function. - * `hash/fnv1a.[hc]`: 32-bit and 64-bit Fowler–Noll–Vo (variant 1a) hash + * `hash/fnv1a.[hc]`: 32-bit and 64-bit Fowler-Noll-Vo (variant 1a) hash functions. ### Directory `mem` diff --git a/data/htable.c b/data/htable.c new file mode 100644 index 0000000..e51d160 --- /dev/null +++ b/data/htable.c @@ -0,0 +1,251 @@ +/* + * C Reusables + * + * + * Copyright (c) 2023 Martin Mitas + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "htable.h" + +#include + + +#define HTABLE_PLANE_SIZE(plane_index) (59 << (plane_index)) +#define HTABLE_SLOT_COUNT(htable) (HTABLE_PLANE_SIZE(0) * ((1 << (htable)->n_planes) - 1)) +#define HTABLE_TOO_FULL(htable) ((htable)->n >= HTABLE_SLOT_COUNT(htable)) +#define HTABLE_TOO_EMPTY(htable) ((htable)->n < (HTABLE_SLOT_COUNT(htable) / 4)) + + +static int +htable_grow(HTABLE* htable) +{ + HTABLE_NODE** new_planes; + + /* Grow by appending one more plane, twice the size the previous one. */ + new_planes = (HTABLE_NODE**) realloc(htable->planes, (htable->n_planes+1) * sizeof(HTABLE_NODE*)); + if(new_planes == NULL) { + if(htable->n_planes > 0) { + /* It may be suboptimal, but we can still add the new stuff into + * the current planes. */ + return 0; + } + return -1; + } + htable->planes = (void**) new_planes; + + new_planes[htable->n_planes] = (HTABLE_NODE*) malloc(HTABLE_PLANE_SIZE(htable->n_planes) * sizeof(HTABLE_NODE*)); + if(new_planes[htable->n_planes] == NULL) + return -1; + memset(new_planes[htable->n_planes], 0, HTABLE_PLANE_SIZE(htable->n_planes) * sizeof(HTABLE_NODE*)); + htable->n_planes++; + + return 0; +} + +static void +htable_free_all_planes(HTABLE* htable) +{ + int i; + + for(i = 0; i < htable->n_planes; i++) + free(htable->planes[i]); + free(htable->planes); + + htable->planes = NULL; + htable->n_planes = 0; +} + +static void +htable_shrink(HTABLE* htable) +{ + HTABLE_NODE** prev_plane; + size_t prev_plane_size; + HTABLE_NODE** plane; + size_t plane_size; + size_t i; + + if(htable->n == 0) { + htable_free_all_planes(htable); + return; + } + + if(htable->n_planes <= 1) + return; + + /* We shrink by merging the largest plane into the previous one. */ + prev_plane = (HTABLE_NODE**) htable->planes[htable->n_planes-2]; + prev_plane_size = HTABLE_PLANE_SIZE(htable->n_planes-2); + plane = (HTABLE_NODE**) htable->planes[htable->n_planes-1]; + plane_size = HTABLE_PLANE_SIZE(htable->n_planes-1); + for(i = 0; i < plane_size; i++) { + if(plane[i] != NULL) { + if(prev_plane[i % prev_plane_size] != NULL) { + /* Join the slot in the previous plane to our tail. */ + HTABLE_NODE* tail = plane[i]; + while(tail->next != NULL) + tail = tail->next; + tail->next = prev_plane[i % prev_plane_size]; + } + + /* Move it to the previous plane. */ + prev_plane[i % prev_plane_size] = plane[i]; + } + } + + free(plane); + htable->n_planes--; +} + +static HTABLE_NODE* +htable_lookup_internal(HTABLE* htable, uint32_t hash, const HTABLE_NODE* key, + HTABLE_NODE*** p_ref, HTABLE_CMP_FUNC cmp_func) +{ + HTABLE_NODE* node; + HTABLE_NODE** ref; + int i; + + /* It's better to lookup the biggest planes first, as much more stuff is + * stored there, and also because it's moire recently inserted stuff which + * is arguably more likely to accessed soon. */ + for(i = htable->n_planes-1; i >= 0; i--) { + HTABLE_NODE** plane; + size_t index; + + plane = (HTABLE_NODE**) htable->planes[i]; + index = hash % HTABLE_PLANE_SIZE(i); + + ref = &plane[index]; + node = plane[index]; + + while(node != NULL) { + if(cmp_func(key, node) == 0) { + if(p_ref != NULL) + *p_ref = ref; + return node; + } + + ref = &node->next; + node = node->next; + } + } + + return NULL; +} + +void +htable_fini(HTABLE* htable, void (*dtor_func)(HTABLE_NODE*)) +{ + if(dtor_func != NULL && htable->n > 0) { + unsigned i; + size_t index; + + for(i = 0; i < htable->n_planes; i++) { + HTABLE_NODE** plane = (HTABLE_NODE**) htable->planes[i]; + for(index = 0; index < HTABLE_PLANE_SIZE(i); index++) { + while(plane[index] != NULL) { + HTABLE_NODE* node; + + node = plane[index]; + plane[index] = node->next; + dtor_func(node); + } + } + } + } + htable->n = 0; + + htable_free_all_planes(htable); +} + +static int +htable_insert_internal(HTABLE* htable, HTABLE_NODE* node, + HTABLE_CMP_FUNC cmp_func, HTABLE_HASH_FUNC hash_func, + int skip_lookup) +{ + uint32_t hash; + HTABLE_NODE** plane; + + hash = hash_func(node); + if(!skip_lookup) { + if(htable_lookup_internal(htable, hash, node, NULL, cmp_func) != NULL) + return -1; + } + + /* When we are too populated, grow by adding a new plane. */ + if(HTABLE_TOO_FULL(htable)) { + if(htable_grow(htable) != 0) + return -1; + } + + plane = (HTABLE_NODE**) htable->planes[htable->n_planes - 1]; + + node->next = plane[hash % HTABLE_PLANE_SIZE(htable->n_planes - 1)]; + plane[hash % HTABLE_PLANE_SIZE(htable->n_planes - 1)] = node; + + htable->n++; + return 0; +} + +int +htable_insert(HTABLE* htable, HTABLE_NODE* node, + HTABLE_CMP_FUNC cmp_func, HTABLE_HASH_FUNC hash_func) +{ + return htable_insert_internal(htable, node, cmp_func, hash_func, 0); +} + +int +htable_insert_unsafe(HTABLE* htable, HTABLE_NODE* node, + HTABLE_CMP_FUNC cmp_func, HTABLE_HASH_FUNC hash_func) +{ + return htable_insert_internal(htable, node, cmp_func, hash_func, 1); +} + +HTABLE_NODE* +htable_remove(HTABLE* htable, const HTABLE_NODE* key, + HTABLE_CMP_FUNC cmp_func, HTABLE_HASH_FUNC hash_func) +{ + uint32_t hash; + HTABLE_NODE* node; + HTABLE_NODE** p_ref; + + hash = hash_func(key); + node = htable_lookup_internal(htable, hash, key, &p_ref, cmp_func); + if(node == NULL) + return NULL; + + *p_ref = node->next; + htable->n--; + + if(HTABLE_TOO_EMPTY(htable)) + htable_shrink(htable); + + return node; +} + +HTABLE_NODE* +htable_lookup(HTABLE* htable, const HTABLE_NODE* key, + HTABLE_CMP_FUNC cmp_func, HTABLE_HASH_FUNC hash_func) +{ + uint32_t hash; + + hash = hash_func(key); + return htable_lookup_internal(htable, hash, key, NULL, cmp_func); +} diff --git a/data/htable.h b/data/htable.h new file mode 100644 index 0000000..f26b2a7 --- /dev/null +++ b/data/htable.h @@ -0,0 +1,171 @@ +/* + * C Reusables + * + * + * Copyright (c) 2023 Martin Mitas + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef CRE_HTABLE_H +#define CRE_HTABLE_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +#if defined __cplusplus + #define HTABLE_INLINE__ inline +#elif defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L + #define HTABLE_INLINE__ static inline +#elif defined __GNUC__ + #define HTABLE_INLINE__ static __inline__ +#elif defined _MSC_VER + #define HTABLE_INLINE__ static __inline +#else + #define HTABLE_INLINE__ static +#endif + + +#if defined offsetof + #define HTABLE_OFFSETOF__(type, member) offsetof(type, member) +#elif defined __GNUC__ && __GNUC__ >= 4 + #define HTABLE_OFFSETOF__(type, member) __builtin_offsetof(type, member) +#else + #define HTABLE_OFFSETOF__(type, member) ((size_t) &((type*)0)->member) +#endif + + +typedef struct HTABLE_NODE { + struct HTABLE_NODE* next; +} HTABLE_NODE; + + +typedef struct HTABLE { + void** planes; + int n_planes; + size_t n; +} HTABLE; + + +/* Comparator function type. + * + * The comparator function determines whether the nodes have the identical key. + * By 'key' we understand any internal data which uniquely identifies the node + * within the hashtable and which during its presense in the hash table never + * changes. + * + * Note we assume the same comparator function is used throughout the life + * time of the hashtable. + * + * WARNING: When different comparator functions are used during the life time + * of the hash table, the behavior is undefined. + * + * It has to return: + * - zero if the two nodes are equal; + * - non-zero otherwise. + */ +typedef int (*HTABLE_CMP_FUNC)(const HTABLE_NODE*, const HTABLE_NODE*); + +/* Hash function. + * + * The function implementation must compute the hash only from some part of the + * node data, which serves as the unique key of the node. + * + * The function should be implemented so that for (very large number of) nodes, + * the returned values are about as uniformly distributed as possible in the + * whole range between 0 and `UITN32_MAX`. + * + * WARNING: When different hash functions are used during the life time + * of the hash table, the behavior is undefined. + */ +typedef uint32_t (*HTABLE_HASH_FUNC)(const HTABLE_NODE*); + + +/* Macro for getting pointer to the structure holding the hashtable node data. + * + * (If you use the HTABLE_NODE as the first member of your structure, you + * can use a simple casting instead.) + */ +#define HTABLE_DATA(node_ptr, type, member) \ + ((type*)((char*)(node_ptr) - HTABLE_OFFSETOF__(type, member))) + + +#define HTABLE_INITIALIZER { NULL, 0, 0 } + +HTABLE_INLINE__ void htable_init(HTABLE* htable) + { htable->planes = NULL; htable->n_planes = 0; htable->n = 0; } + +/* Cleaner of the hashtable. Calls the provided destructor for every node + * and releases all itnernal buffers.:x + */ +void htable_fini(HTABLE* htable, void (*dtor_func)(HTABLE_NODE*)); + +/* Returns non-zero if the hash table is empty. + */ +HTABLE_INLINE__ int htable_is_empty(HTABLE* htable) + { return (htable->n == 0); } + +/* Insert a new node into the hash table. + * + * Returns 0 on success or -1 on failure. The function fails if an internal + * memory allocation fails, or if a node with the same key is already present. + */ +int htable_insert(HTABLE* htable, HTABLE_NODE* node, + HTABLE_CMP_FUNC cmp_func, HTABLE_HASH_FUNC hash_func); + +/* Faster version of htable_insert() which does not check whether there is not + * yet any node of the same key present. We generally assume, application uses + * this function only if it _knows_ there is no such node yet present. + * + * WARNING: If multiple nodes of the same key are inserted into the hash table, + * it's then undefined which of those nodes are matched whenever lookup for the + * given key is performed. + * + * Returns 0 on success or -1 on failure. + */ +int htable_insert_unsafe(HTABLE* htable, HTABLE_NODE* node, + HTABLE_CMP_FUNC cmp_func, HTABLE_HASH_FUNC hash_func); + +/* Remove a node from the hash table equal to the provided key. + * + * Returns pointer to the removed node (so caller may e.g. free any resources + * associated with it), or NULL if no such node is present in the table. + */ +HTABLE_NODE* htable_remove(HTABLE* htable, const HTABLE_NODE* key, + HTABLE_CMP_FUNC cmp_func, HTABLE_HASH_FUNC hash_func); + +/* Looks for a node from the hash table equal to the provided key. + * + * Returns pointer to the found nod, or NULL if no such node is present in the + * table. + */ +HTABLE_NODE* htable_lookup(HTABLE* htable, const HTABLE_NODE* key, + HTABLE_CMP_FUNC cmp_func, HTABLE_HASH_FUNC hash_func); + + +#ifdef __cplusplus +} /* extern "C" { */ +#endif + +#endif /* #ifndef CRE_HTABLE_H */ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 94fef52..0c24afa 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -17,6 +17,9 @@ add_definitions(-DCRE_TEST) add_executable(test-buffer acutest.h test-buffer.c ../data/buffer.h ../data/buffer.c) target_include_directories(test-buffer PRIVATE ../data) +add_executable(test-htable acutest.h test-htable.c ../data/htable.h ../data/htable.c) +target_include_directories(test-htable PRIVATE ../data) + add_executable(test-list acutest.h test-list.c ../data/list.h) target_include_directories(test-list PRIVATE ../data) diff --git a/tests/test-htable.c b/tests/test-htable.c new file mode 100644 index 0000000..8b2362a --- /dev/null +++ b/tests/test-htable.c @@ -0,0 +1,194 @@ +/* + * C Reusables + * + * + * Copyright (c) 2023 Martin Mitas + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "acutest.h" +#include "htable.h" + +#include + + +typedef struct VAL { + HTABLE_NODE the_node; + char* key; + int payload; +} VAL; + + +static HTABLE_NODE* +make_val(const char* key, int payload) +{ + VAL* v; + + v = (VAL*) malloc(sizeof(VAL)); + TEST_ASSERT(v != NULL); + + v->key = strdup(key); + v->payload = payload; + + return &v->the_node; +} + +static uint32_t +hash_func(const HTABLE_NODE* node) +{ + /* FNV-1a */ + static const uint32_t FNV1A_PRIME_32 = 16777619; + VAL* val = (VAL*) HTABLE_DATA(node, VAL, the_node); + const uint8_t* ptr = (const uint8_t*) val->key; + uint32_t fnv1a = 0; + + while(*ptr) { + fnv1a ^= *ptr; + fnv1a *= FNV1A_PRIME_32; + ptr++; + } + + return fnv1a; +} + +static int +cmp_func(const HTABLE_NODE* node1, const HTABLE_NODE* node2) +{ + VAL* data1 = (VAL*) HTABLE_DATA(node1, VAL, the_node); + VAL* data2 = (VAL*) HTABLE_DATA(node2, VAL, the_node); + + return strcmp(data1->key, data2->key); +} + +static void +dtor_func(HTABLE_NODE* node) +{ + VAL* val; + + TEST_ASSERT(node != NULL); + + val = (VAL*) HTABLE_DATA(node, VAL, the_node); + free(val->key); + free(val); +} + + +/***************************** + *** The test routines *** + *****************************/ + +static void +test_empty(void) +{ + HTABLE htable = HTABLE_INITIALIZER; + + TEST_CHECK(htable_is_empty(&htable)); + TEST_CHECK(htable_insert(&htable, make_val("key", 42), cmp_func, hash_func) == 0); + TEST_CHECK(!htable_is_empty(&htable)); + htable_fini(&htable, dtor_func); + TEST_CHECK(htable_is_empty(&htable)); +} + +static void +test_insert(void) +{ + HTABLE htable = HTABLE_INITIALIZER; + + TEST_CHECK(htable_is_empty(&htable)); + TEST_CHECK(htable_insert(&htable, make_val("key", 42), cmp_func, hash_func) == 0); + TEST_CHECK(!htable_is_empty(&htable)); + + /* Check we cannot insert value with the same key. */ + TEST_CHECK(htable_insert(&htable, make_val("key", 42), cmp_func, hash_func) != 0); + htable_fini(&htable, dtor_func); + TEST_CHECK(htable_is_empty(&htable)); +} + +static void +test_lookup(void) +{ + HTABLE htable = HTABLE_INITIALIZER; + VAL val_key; + char key[8]; + int i; + + for(i = 0; i < 100000; i++) { + snprintf(key, 8, "%d", i); + TEST_CHECK(htable_insert(&htable, make_val(key, i), cmp_func, hash_func) == 0); + } + + for(i = 0; i < 100000; i++) { + HTABLE_NODE* node; + + val_key.key = key; + snprintf(val_key.key, 8, "%d", i); + + node = htable_lookup(&htable, &val_key.the_node, cmp_func, hash_func); + TEST_CHECK(node != NULL); + TEST_CHECK(HTABLE_DATA(node, VAL, the_node)->payload == i); + } + + val_key.key = key; + snprintf(val_key.key, 8, "n/a"); + TEST_CHECK(htable_lookup(&htable, &val_key.the_node, cmp_func, hash_func) == NULL); + + htable_fini(&htable, dtor_func); +} + +static void +test_remove(void) +{ + HTABLE htable = HTABLE_INITIALIZER; + VAL val_key; + char key[8]; + int i; + + for(i = 0; i < 100000; i++) { + snprintf(key, 8, "%d", i); + TEST_CHECK(htable_insert(&htable, make_val(key, i), cmp_func, hash_func) == 0); + } + + for(i = 0; i < 100000; i++) { + HTABLE_NODE* node; + + val_key.key = key; + snprintf(val_key.key, 8, "%d", i); + + node = htable_remove(&htable, &val_key.the_node, cmp_func, hash_func); + TEST_CHECK(node != NULL); + TEST_MSG("Broken element: %d", i); + dtor_func(node); + } + + htable_fini(&htable, dtor_func); +} + + +/************************* + *** List of tests *** + *************************/ + +TEST_LIST = { + { "empty", test_empty }, + { "insert", test_insert }, + { "lookup", test_lookup }, + { "remove", test_remove }, + { NULL, NULL } +};