From 55608d5c578a1803ff95a9493addf3e4e1554619 Mon Sep 17 00:00:00 2001 From: peterrrock2 <27579114+peterrrock2@users.noreply.github.com> Date: Thu, 19 Sep 2024 22:04:37 -0600 Subject: [PATCH] Make some path characters illegal and update TODO --- .gitignore | 4 ++++ TODO.md | 10 ++++++++++ gerrydb/exceptions.py | 3 +++ gerrydb/repos/base.py | 22 +++++++++++++++++++++- gerrydb/repos/column.py | 11 ++++++++--- gerrydb/schemas.py | 5 ++++- 6 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 TODO.md diff --git a/.gitignore b/.gitignore index dbce267..32964df 100644 --- a/.gitignore +++ b/.gitignore @@ -160,3 +160,7 @@ cython_debug/ #.idea/ .DS_Store + + +# Ignore all of the backup files that might be floating around +*.tar \ No newline at end of file diff --git a/TODO.md b/TODO.md new file mode 100644 index 0000000..89f57b3 --- /dev/null +++ b/TODO.md @@ -0,0 +1,10 @@ +# For Docs + +[ ] Show how to make a column with the write context + [ ] Show how this all works with the the load_dataframe method + + +# Ideas +[ ] Maybe cache all of the columns in the database on the local machine so + validation can be done locally? Periodically checks for columns can be done + to ensure that the local cache is up to date. \ No newline at end of file diff --git a/gerrydb/exceptions.py b/gerrydb/exceptions.py index 2f7cbbd..b973331 100644 --- a/gerrydb/exceptions.py +++ b/gerrydb/exceptions.py @@ -39,3 +39,6 @@ class CacheInitError(CacheError): class ViewLoadError(GerryDBError): """Raised when a view cannot be loaded (e.g. from a GeoPackage).""" + +class GerryPathError(GerryDBError): + """Raised when an invalid path is provided. Generally, this means invalid characters are present""" \ No newline at end of file diff --git a/gerrydb/repos/base.py b/gerrydb/repos/base.py index 7e0828a..0e4b285 100644 --- a/gerrydb/repos/base.py +++ b/gerrydb/repos/base.py @@ -7,7 +7,13 @@ import httpx import pydantic -from gerrydb.exceptions import OnlineError, RequestError, ResultError, WriteContextError +from gerrydb.exceptions import ( + OnlineError, + RequestError, + ResultError, + WriteContextError, + GerryPathError, +) from gerrydb.schemas import BaseModel if TYPE_CHECKING: @@ -104,6 +110,14 @@ def write_context_wrapper(*args, **kwargs): return write_context_wrapper +INVALID_PATH_SUBSTRINGS = set( + { + "..", + " ", + } +) + + def normalize_path(path: str, case_sensitive_uid: bool = False) -> str: """Normalizes a path (removes leading, trailing, and duplicate slashes, and lowercases the path if `case_sensitive` is `False`). @@ -111,6 +125,12 @@ def normalize_path(path: str, case_sensitive_uid: bool = False) -> str: Some paths, such as paths containing GEOIDs, are case-sensitive in the last segment. In these cases, `case_sensitive` should be set to `True`. """ + for item in INVALID_PATH_SUBSTRINGS: + if item in path: + raise GerryPathError( + f"Invalid path: '{path}'. Please remove the following substring: '{item}'" + ) + if case_sensitive_uid: path_list = path.strip().split("/") return "/".join( diff --git a/gerrydb/repos/column.py b/gerrydb/repos/column.py index 363a90c..48e40c7 100644 --- a/gerrydb/repos/column.py +++ b/gerrydb/repos/column.py @@ -11,6 +11,7 @@ namespaced, online, write_context, + normalize_path, ) from gerrydb.schemas import ( Column, @@ -68,6 +69,7 @@ def create( Returns: Metadata for the new column. """ + path = normalize_path(path) response = self.ctx.client.post( f"{self.base_url}/{namespace}", json=ColumnCreate( @@ -106,8 +108,9 @@ def update( Returns: The updated column. """ + clean_path = normalize_path(f"{self.base_url}/{namespace}/{path}") response = self.ctx.client.patch( - f"{self.base_url}/{namespace}/{path}", + clean_path, json=ColumnPatch(aliases=aliases).dict(), ) response.raise_for_status() @@ -164,9 +167,10 @@ def set_values( raise ValueError("Either `path` or `col` must be provided.") path = col.path if col is not None else path + clean_path = normalize_path(f"{self.base_url}/{namespace}/{path}") response = self.ctx.client.put( - f"{self.base_url}/{namespace}/{path}", + clean_path, json=[ ColumnValue( path=( @@ -219,6 +223,7 @@ async def async_set_values( raise ValueError("Either `path` or `col` must be provided.") path = col.path if col is not None else path + clean_path = normalize_path(f"{self.base_url}/{namespace}/{path}") ephemeral_client = client is None if ephemeral_client: @@ -238,7 +243,7 @@ async def async_set_values( for geo, value in values.items() ] response = await client.put( - f"{self.base_url}/{namespace}/{path}", + clean_path, json=json, ) diff --git a/gerrydb/schemas.py b/gerrydb/schemas.py index e463264..8467f5e 100644 --- a/gerrydb/schemas.py +++ b/gerrydb/schemas.py @@ -2,6 +2,7 @@ This file should be kept in sync with the server-side version. """ + from datetime import datetime from enum import Enum from typing import Any, Optional, Union @@ -16,7 +17,9 @@ UserEmail = constr(max_length=254) # constr is a constrained string, so this is some path that needs to satisfy this regex -GerryPath = constr(regex=r"[a-z0-9][a-z0-9-_/]*") # must start with lowercase or digit, then followed by any lowercase, digit, hyphen, underscore, slash +GerryPath = constr( + regex=r"[a-z0-9][a-z0-9-_/]*" +) # must start with lowercase or digit, then followed by any lowercase, digit, hyphen, underscore, slash NamespacedGerryPath = constr(regex=r"[a-z0-9/][a-z0-9-_/]*") NATIVE_PROJ = pyproj.CRS("EPSG:4269")