From a077265f459643f96dddf1ad0c1cb498fc4128cf Mon Sep 17 00:00:00 2001 From: peterrrock2 <27579114+peterrrock2@users.noreply.github.com> Date: Tue, 27 Aug 2024 11:14:45 -0600 Subject: [PATCH] Change gerrydb cache to have default max size 20gb --- gerrydb/cache.py | 68 ++++++++++++++++++++++++++++++++++++++--------- gerrydb/client.py | 6 ++++- 2 files changed, 60 insertions(+), 14 deletions(-) diff --git a/gerrydb/cache.py b/gerrydb/cache.py index 01547eb..ae33824 100644 --- a/gerrydb/cache.py +++ b/gerrydb/cache.py @@ -1,7 +1,9 @@ """Internal cache operations for GerryDB.""" + import gzip import pickle import sqlite3 +import os from datetime import datetime from os import PathLike from pathlib import Path @@ -28,7 +30,10 @@ class GerryCache: data_dir: Path def __init__( - self, database: Union[str, PathLike, sqlite3.Connection], data_dir: Path + self, + database: Union[str, PathLike, sqlite3.Connection], + data_dir: Path, + max_size_gb: float = 20, ): """Loads or initializes a cache.""" if isinstance(database, sqlite3.Connection): @@ -47,6 +52,7 @@ def __init__( self._assert_clean() self.data_dir = data_dir + self.max_size_gb = max_size_gb def upsert_view_gpkg( self, namespace: str, path: str, render_id: str, content: bytes @@ -58,7 +64,9 @@ def upsert_view_gpkg( """ gpkg_path = self.data_dir / f"{render_id}.gpkg" with open(gpkg_path, "wb") as gpkg_fp: - gpkg_fp.write(content) + bytes_written = gpkg_fp.write(content) + + kb_written = bytes_written // 1024 + 1 # always round up to nearest kb with self._conn: # Register the new render. @@ -78,12 +86,44 @@ def upsert_view_gpkg( self._conn.execute( ( - "INSERT INTO view (namespace, path, render_id, cached_at) " - "VALUES (?, ?, ?, ?)" + "INSERT INTO view (namespace, path, render_id, cached_at, file_size_kb) " + "VALUES (?, ?, ?, ?, ?)" ), - (namespace, path, render_id, datetime.now().isoformat()), + (namespace, path, render_id, datetime.now().isoformat(), kb_written), ) + db_cursor = self._conn.cursor() + + db_cursor.execute("SELECT SUM(file_size_kb) FROM view") + total_db_size = db_cursor.fetchone()[0] + + print(total_db_size) + print(f"max_size: {self.max_size_gb * 1024 * 1024}") + + while total_db_size > self.max_size_gb * 1024 * 1024: + db_cursor.execute("SELECT * FROM view ORDER BY cached_at ASC LIMIT 1") + oldest = db_cursor.fetchone() + oldest_namespace, oldest_path, oldest_render_id = ( + oldest[0], + oldest[1], + oldest[2], + ) + print(f"Found oldest render: {oldest_namespace}, {oldest_path}") + print(oldest) + total_db_size -= oldest[4] + db_cursor.execute( + "DELETE FROM view WHERE namespace = ? AND path = ?", + (oldest_namespace, oldest_path), + ) + + print(f"The new db size is", total_db_size) + print(f"Now deleting the render file: {oldest_render_id}.gpkg") + + try: + os.remove(self.data_dir / f"{oldest_render_id}.gpkg") + except FileNotFoundError: + print(f"Could not find the render file: {oldest_render_id}.gpkg") + return gpkg_path def get_view_gpkg(self, namespace: str, path: str) -> Optional[Path]: @@ -146,19 +186,21 @@ def _init_db(self) -> None: ) self._conn.execute( """CREATE TABLE view( - namespace TEXT NOT NULL, - path TEXT NOT NULL, - render_id TEXT NOT NULL, - cached_at TEXT NOT NULL, + namespace TEXT NOT NULL, + path TEXT NOT NULL, + render_id TEXT NOT NULL, + cached_at TIMESTAMP NOT NULL, + file_size_kb BIGINTEGER NOT NULL, UNIQUE(namespace, path) )""" ) self._conn.execute( """CREATE TABLE graph( - render_id TEXT NOT NULL REFERENCES view(render_id), - plans INTEGER NOT NULL, - geometry INTEGER NOT NULL, - cached_at TEXT NOT NULL, + render_id TEXT NOT NULL REFERENCES view(render_id), + plans INTEGER NOT NULL, + geometry INTEGER NOT NULL, + cached_at TIMESTAMP NOT NULL, + file_size_kb BIGINTEGER NOT NULL, UNIQUE(render_id, plans, geometry) )""" ) diff --git a/gerrydb/client.py b/gerrydb/client.py index 71a202e..1675aa1 100644 --- a/gerrydb/client.py +++ b/gerrydb/client.py @@ -67,6 +67,7 @@ def __init__( namespace: Optional[str] = None, offline: bool = False, timeout: int = 180, + cache_max_size_gb: float = 20, ): """Creates a GerryDB session. @@ -105,7 +106,9 @@ def __init__( if host is not None and key is not None: self._temp_dir = TemporaryDirectory() - self.cache = GerryCache(":memory:", Path(self._temp_dir.name)) + self.cache = GerryCache( + ":memory:", Path(self._temp_dir.name), max_size_gb=cache_max_size_gb + ) else: GERRYDB_ROOT = Path(os.getenv("GERRYDB_ROOT", DEFAULT_GERRYDB_ROOT)) try: @@ -151,6 +154,7 @@ def __init__( self.cache = GerryCache( database=GERRYDB_ROOT / "caches" / f"{profile}.db", data_dir=profile_cache_dir, + max_size_gb=cache_max_size_gb, ) host = config["host"]