From 7597a6e8c28d1262e0cd8312f019491744d55f27 Mon Sep 17 00:00:00 2001 From: Blair Steven Date: Thu, 28 Nov 2024 09:36:01 +1300 Subject: [PATCH] Store all strings in a cache Apteryxd can end up storing a lot of duplicated strings. This commit adds a cache to remove this duplication - it is called the same way strdup / free would be and will take care of allocating, reference counting and releasing strings. This cache has no locking, so needs to be called under an external lock. At the moment this used solely in code inside the db_lock in database.c --- Makefile | 4 +- database.c | 18 ++++---- hashtree.c | 5 +- string-cache.c | 121 +++++++++++++++++++++++++++++++++++++++++++++++++ string-cache.h | 34 ++++++++++++++ 5 files changed, 168 insertions(+), 14 deletions(-) create mode 100644 string-cache.c create mode 100644 string-cache.h diff --git a/Makefile b/Makefile index 1bcf327..6ac3afa 100644 --- a/Makefile +++ b/Makefile @@ -69,11 +69,11 @@ $(BUILDDIR)/%.o: %.c | $(BUILDDIR) @echo "Compiling "$<"" $(Q)$(CC) $(CFLAGS) $(EXTRA_CFLAGS) -c $< -o $@ -$(BUILDDIR)/apteryxd: apteryxd.c hashtree.c database.c $(BUILDDIR)/rpc_transport.o $(BUILDDIR)/rpc_socket.o $(BUILDDIR)/config.o $(BUILDDIR)/callbacks.o $(BUILDDIR)/libapteryx.so +$(BUILDDIR)/apteryxd: apteryxd.c hashtree.c database.c string-cache.c $(BUILDDIR)/rpc_transport.o $(BUILDDIR)/rpc_socket.o $(BUILDDIR)/config.o $(BUILDDIR)/callbacks.o $(BUILDDIR)/libapteryx.so @echo "Building $@" $(Q)$(CC) $(CFLAGS) $(EXTRA_CFLAGS) -o $@ $^ $(EXTRA_LDFLAGS) -$(BUILDDIR)/apteryx: apteryxc.c hashtree.c database.c callbacks.c $(BUILDDIR)/libapteryx.so $(EXTRA_CSRC) +$(BUILDDIR)/apteryx: apteryxc.c hashtree.c database.c string-cache.c callbacks.c $(BUILDDIR)/libapteryx.so $(EXTRA_CSRC) @echo "Building $@" $(Q)$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(apteryx_CFLAGS) -o $@ $^ -lapteryx $(EXTRA_LDFLAGS) $(apteryx_LDFLAGS) diff --git a/database.c b/database.c index 3920a8f..e09d610 100644 --- a/database.c +++ b/database.c @@ -32,6 +32,7 @@ #endif #include "hashtree.h" +#include "string-cache.h" struct database_node { @@ -189,10 +190,7 @@ _db_update (struct database_node *parent_node, GNode *new_node, uint64_t ts) { const char *value = APTERYX_VALUE(new_node); - if (db_node->value) - { - g_free(db_node->value); - } + string_cache_release ((char *)db_node->value); /* We interpret an empty string as removal, so anything else * is a value to set. NULL values can be in the tree when @@ -200,7 +198,7 @@ _db_update (struct database_node *parent_node, GNode *new_node, uint64_t ts) */ if (value && value[0]) { - db_node->value = (unsigned char*)g_strdup(value); + db_node->value = (unsigned char *) string_cache_get (value); db_node->length = strlen(value) + 1; } else @@ -324,12 +322,11 @@ db_add_no_lock (const char *path, const unsigned char *value, size_t length, uin new_value = (struct database_node *) hashtree_node_add (root, sizeof (*new_value), path); } - g_free (new_value->value); + string_cache_release ((char *)new_value->value); new_value->value = NULL; if (length > 0) { - new_value->value = g_malloc (length); - memcpy (new_value->value, value, length); + new_value->value = (unsigned char *)string_cache_get ((char *)value); } new_value->length = length; @@ -375,7 +372,7 @@ db_delete_no_lock (const char *path, uint64_t ts) if (((struct database_node *) node)->value != NULL) { - g_free (((struct database_node *) node)->value); + string_cache_release ((char *)((struct database_node *) node)->value); ((struct database_node *) node)->value = NULL; ((struct database_node *) node)->length = 0; } @@ -624,6 +621,7 @@ void db_init () { pthread_rwlock_wrlock (&db_lock); + string_cache_init (); if (!root) { root = hashtree_init (sizeof (struct database_node)); @@ -643,7 +641,7 @@ db_purge (struct database_node *node) if (node->value) { - g_free (node->value); + string_cache_release ((char *)node->value); } node->value = NULL; node->length = 0; diff --git a/hashtree.c b/hashtree.c index 39b25fe..629dcde 100644 --- a/hashtree.c +++ b/hashtree.c @@ -1,6 +1,7 @@ #include #include "hashtree.h" +#include "string-cache.h" #include #include #include @@ -66,7 +67,7 @@ hashtree_node_add (struct hashtree_node *root, size_t size, const char *path) { next_node = g_malloc0 (size); next_node->parent = parent; - next_node->key = g_strdup (key); + next_node->key = (char *) string_cache_get (key); if (parent->children == NULL) { parent->children = g_hash_table_new (g_str_hash, g_str_equal); @@ -107,7 +108,7 @@ hashtree_node_delete (struct hashtree_node *root, struct hashtree_node *node) g_hash_table_destroy (node->children); } - g_free (node->key); + string_cache_release (node->key); g_free (node); } diff --git a/string-cache.c b/string-cache.c new file mode 100644 index 0000000..55bbea3 --- /dev/null +++ b/string-cache.c @@ -0,0 +1,121 @@ +/** + * @file string-cache.c + * Provide a cache of strings to reduce memory allocations + * + * Copyright 2024, Allied Telesis Labs New Zealand, Ltd + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this library. If not, see + */ + +#include "string-cache.h" +#include "hashtree.h" +#include +#include + +/* Structure with a flexible array member for the string. + * When we allocate this it will be done with space for the string. */ +typedef struct +{ + uint64_t refcount; + char str[0]; +} StringCacheEntry; + +static GHashTable *string_cache = NULL; + +/* Function to create a new cache entry. */ +static StringCacheEntry * +string_cache_entry_new (const char *str) +{ + /* Include space for null terminator. */ + size_t len = strlen (str) + 1; + StringCacheEntry *entry = g_malloc (sizeof (StringCacheEntry) + len); + entry->refcount = 1; + /* Copy the string into the allocated memory (including null terminator). */ + memcpy (entry->str, str, len); + return entry; +} + +/* Function to free a cache entry. */ +static void +string_cache_entry_free (StringCacheEntry *entry) +{ + assert (entry->refcount == 0); + if (entry) + { + g_free (entry); + } +} + +/* Retrieve a string from the cache or add it if it doesn't exist */ +const char * +string_cache_get (const char *str) +{ + if (!str) + { + return NULL; + } + + StringCacheEntry *entry = g_hash_table_lookup (string_cache, str); + if (entry) + { + entry->refcount++; + return entry->str; + } + else + { + entry = string_cache_entry_new (str); + /* Use entry->str as the key to ensure hash/equal functions operate on the string */ + g_hash_table_insert (string_cache, entry->str, entry); + return entry->str; + } +} + +/* Decrease the reference count of a string and remove it if refcount reaches 0 */ +void +string_cache_release (const char *str) +{ + if (!str) + { + return; + } + + StringCacheEntry *entry = g_hash_table_lookup (string_cache, str); + if (entry) + { + entry->refcount--; + if (entry->refcount == 0) + { + g_hash_table_remove (string_cache, str); + } + } +} + +void +string_cache_init () +{ + if (!string_cache) + { + string_cache = + g_hash_table_new_full (g_str_hash, g_str_equal, NULL, + (GDestroyNotify) string_cache_entry_free); + } +} + +/* This function should only be called when *all* strings have been released. */ +void +string_cache_deinit () +{ + g_hash_table_destroy (string_cache); + string_cache = NULL; +} diff --git a/string-cache.h b/string-cache.h new file mode 100644 index 0000000..37e564d --- /dev/null +++ b/string-cache.h @@ -0,0 +1,34 @@ +#ifndef _STRING_CACHE_H_ +#define _STRING_CACHE_H_ + +/** + * @file string-cache.h + * Provide a cache of strings to reduce memory allocations + * + * Copyright 2024, Allied Telesis Labs New Zealand, Ltd + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this library. If not, see + */ + + +#include + +/* Prepare string cache */ +void string_cache_init(); + +/* Retrieve a string from the cache or add it if it doesn't exist */ +const char *string_cache_get (const char *str) ; +void string_cache_release (const char *str); + +#endif \ No newline at end of file