Skip to content

Commit

Permalink
Store all strings in a cache
Browse files Browse the repository at this point in the history
Apteryxd can end up storing a lot of duplicated strings. This commit
adds a cache to remove this duplication - it is called the same way
strdup / free would be and will take care of allocating, reference
counting and releasing strings.

This cache has no locking, so needs to be called under an external lock.
At the moment this used solely in code inside the db_lock in database.c
  • Loading branch information
blairsteven committed Nov 28, 2024
1 parent 0808047 commit 7c470f6
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 14 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@ $(BUILDDIR)/%.o: %.c | $(BUILDDIR)
@echo "Compiling "$<""
$(Q)$(CC) $(CFLAGS) $(EXTRA_CFLAGS) -c $< -o $@

$(BUILDDIR)/apteryxd: apteryxd.c hashtree.c database.c $(BUILDDIR)/rpc_transport.o $(BUILDDIR)/rpc_socket.o $(BUILDDIR)/config.o $(BUILDDIR)/callbacks.o $(BUILDDIR)/libapteryx.so
$(BUILDDIR)/apteryxd: apteryxd.c hashtree.c database.c string-cache.c $(BUILDDIR)/rpc_transport.o $(BUILDDIR)/rpc_socket.o $(BUILDDIR)/config.o $(BUILDDIR)/callbacks.o $(BUILDDIR)/libapteryx.so
@echo "Building $@"
$(Q)$(CC) $(CFLAGS) $(EXTRA_CFLAGS) -o $@ $^ $(EXTRA_LDFLAGS)

$(BUILDDIR)/apteryx: apteryxc.c hashtree.c database.c callbacks.c $(BUILDDIR)/libapteryx.so $(EXTRA_CSRC)
$(BUILDDIR)/apteryx: apteryxc.c hashtree.c database.c string-cache.c callbacks.c $(BUILDDIR)/libapteryx.so $(EXTRA_CSRC)
@echo "Building $@"
$(Q)$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(apteryx_CFLAGS) -o $@ $^ -lapteryx $(EXTRA_LDFLAGS) $(apteryx_LDFLAGS)

Expand Down
18 changes: 8 additions & 10 deletions database.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#endif

#include "hashtree.h"
#include "string-cache.h"

struct database_node
{
Expand Down Expand Up @@ -189,18 +190,15 @@ _db_update (struct database_node *parent_node, GNode *new_node, uint64_t ts)
{
const char *value = APTERYX_VALUE(new_node);

if (db_node->value)
{
g_free(db_node->value);
}
string_cache_release ((char *)db_node->value);

/* We interpret an empty string as removal, so anything else
* is a value to set. NULL values can be in the tree when
* doing a query.
*/
if (value && value[0])
{
db_node->value = (unsigned char*)g_strdup(value);
db_node->value = (unsigned char *) string_cache_get (value);
db_node->length = strlen(value) + 1;
}
else
Expand Down Expand Up @@ -324,12 +322,11 @@ db_add_no_lock (const char *path, const unsigned char *value, size_t length, uin
new_value =
(struct database_node *) hashtree_node_add (root, sizeof (*new_value), path);
}
g_free (new_value->value);
string_cache_release ((char *)new_value->value);
new_value->value = NULL;
if (length > 0)
{
new_value->value = g_malloc (length);
memcpy (new_value->value, value, length);
new_value->value = (unsigned char *)string_cache_get ((char *)value);
}
new_value->length = length;

Expand Down Expand Up @@ -375,7 +372,7 @@ db_delete_no_lock (const char *path, uint64_t ts)

if (((struct database_node *) node)->value != NULL)
{
g_free (((struct database_node *) node)->value);
string_cache_release ((char *)((struct database_node *) node)->value);
((struct database_node *) node)->value = NULL;
((struct database_node *) node)->length = 0;
}
Expand Down Expand Up @@ -624,6 +621,7 @@ void
db_init ()
{
pthread_rwlock_wrlock (&db_lock);
string_cache_init ();
if (!root)
{
root = hashtree_init (sizeof (struct database_node));
Expand All @@ -643,7 +641,7 @@ db_purge (struct database_node *node)

if (node->value)
{
g_free (node->value);
string_cache_release ((char *)node->value);
}
node->value = NULL;
node->length = 0;
Expand Down
5 changes: 3 additions & 2 deletions hashtree.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

#include <assert.h>
#include "hashtree.h"
#include "string-cache.h"
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
Expand Down Expand Up @@ -66,7 +67,7 @@ hashtree_node_add (struct hashtree_node *root, size_t size, const char *path)
{
next_node = g_malloc0 (size);
next_node->parent = parent;
next_node->key = g_strdup (key);
next_node->key = (char *) string_cache_get (key);
if (parent->children == NULL)
{
parent->children = g_hash_table_new (g_str_hash, g_str_equal);
Expand Down Expand Up @@ -107,7 +108,7 @@ hashtree_node_delete (struct hashtree_node *root, struct hashtree_node *node)
g_hash_table_destroy (node->children);
}

g_free (node->key);
string_cache_release (node->key);
g_free (node);
}

Expand Down
121 changes: 121 additions & 0 deletions string-cache.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/**
* @file string-cache.c
* Provide a cache of strings to reduce memory allocations
*
* Copyright 2024, Allied Telesis Labs New Zealand, Ltd
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this library. If not, see <http://www.gnu.org/licenses/>
*/

#include "string-cache.h"
#include "hashtree.h"
#include <stdlib.h>
#include <assert.h>

/* Structure with a flexible array member for the string.
* When we allocate this it will be done with space for the string. */
typedef struct
{
uint64_t refcount;
char str[0];
} StringCacheEntry;

static GHashTable *string_cache = NULL;

/* Function to create a new cache entry. */
static StringCacheEntry *
string_cache_entry_new (const char *str)
{
/* Include space for null terminator. */
size_t len = strlen (str) + 1;
StringCacheEntry *entry = g_malloc (sizeof (StringCacheEntry) + len);
entry->refcount = 1;
/* Copy the string into the allocated memory (including null terminator). */
memcpy (entry->str, str, len);
return entry;
}

/* Function to free a cache entry. */
static void
string_cache_entry_free (StringCacheEntry *entry)
{
assert (entry->refcount == 0);
if (entry)
{
g_free (entry);
}
}

/* Retrieve a string from the cache or add it if it doesn't exist */
const char *
string_cache_get (const char *str)
{
if (!str)
{
return NULL;
}

StringCacheEntry *entry = g_hash_table_lookup (string_cache, str);
if (entry)
{
entry->refcount++;
return entry->str;
}
else
{
entry = string_cache_entry_new (str);
/* Use entry->str as the key to ensure hash/equal functions operate on the string */
g_hash_table_insert (string_cache, entry->str, entry);
return entry->str;
}
}

/* Decrease the reference count of a string and remove it if refcount reaches 0 */
void
string_cache_release (const char *str)
{
if (!str)
{
return;
}

StringCacheEntry *entry = g_hash_table_lookup (string_cache, str);
if (entry)
{
entry->refcount--;
if (entry->refcount == 0)
{
g_hash_table_remove (string_cache, str);
}
}
}

void
string_cache_init ()
{
if (!string_cache)
{
string_cache =
g_hash_table_new_full (g_str_hash, g_str_equal, NULL,
(GDestroyNotify) string_cache_entry_free);
}
}

/* This function should only be called when *all* strings have been released. */
void
string_cache_deinit ()
{
g_hash_table_destroy (string_cache);
string_cache = NULL;
}
34 changes: 34 additions & 0 deletions string-cache.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#ifndef _STRING_CACHE_H_
#define _STRING_CACHE_H_

/**
* @file string-cache.h
* Provide a cache of strings to reduce memory allocations
*
* Copyright 2024, Allied Telesis Labs New Zealand, Ltd
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this library. If not, see <http://www.gnu.org/licenses/>
*/


#include <glib.h>

/* Prepare string cache */
void string_cache_init();

/* Retrieve a string from the cache or add it if it doesn't exist */
const char *string_cache_get (const char *str) ;
void string_cache_release (const char *str);

#endif

0 comments on commit 7c470f6

Please sign in to comment.