Skip to content

Commit

Permalink
Make some path characters illegal and update TODO
Browse files Browse the repository at this point in the history
  • Loading branch information
peterrrock2 committed Sep 20, 2024
1 parent e2c74bd commit 55608d5
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 5 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,7 @@ cython_debug/
#.idea/

.DS_Store


# Ignore all of the backup files that might be floating around
*.tar
10 changes: 10 additions & 0 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# For Docs

[ ] Show how to make a column with the write context
[ ] Show how this all works with the the load_dataframe method


# Ideas
[ ] Maybe cache all of the columns in the database on the local machine so
validation can be done locally? Periodically checks for columns can be done
to ensure that the local cache is up to date.
3 changes: 3 additions & 0 deletions gerrydb/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,6 @@ class CacheInitError(CacheError):

class ViewLoadError(GerryDBError):
"""Raised when a view cannot be loaded (e.g. from a GeoPackage)."""

class GerryPathError(GerryDBError):
"""Raised when an invalid path is provided. Generally, this means invalid characters are present"""
22 changes: 21 additions & 1 deletion gerrydb/repos/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@
import httpx
import pydantic

from gerrydb.exceptions import OnlineError, RequestError, ResultError, WriteContextError
from gerrydb.exceptions import (
OnlineError,
RequestError,
ResultError,
WriteContextError,
GerryPathError,
)
from gerrydb.schemas import BaseModel

if TYPE_CHECKING:
Expand Down Expand Up @@ -104,13 +110,27 @@ def write_context_wrapper(*args, **kwargs):
return write_context_wrapper


INVALID_PATH_SUBSTRINGS = set(
{
"..",
" ",
}
)


def normalize_path(path: str, case_sensitive_uid: bool = False) -> str:
"""Normalizes a path (removes leading, trailing, and duplicate slashes, and
lowercases the path if `case_sensitive` is `False`).
Some paths, such as paths containing GEOIDs, are case-sensitive in the last
segment. In these cases, `case_sensitive` should be set to `True`.
"""
for item in INVALID_PATH_SUBSTRINGS:
if item in path:
raise GerryPathError(
f"Invalid path: '{path}'. Please remove the following substring: '{item}'"
)

if case_sensitive_uid:
path_list = path.strip().split("/")
return "/".join(
Expand Down
11 changes: 8 additions & 3 deletions gerrydb/repos/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
namespaced,
online,
write_context,
normalize_path,
)
from gerrydb.schemas import (
Column,
Expand Down Expand Up @@ -68,6 +69,7 @@ def create(
Returns:
Metadata for the new column.
"""
path = normalize_path(path)
response = self.ctx.client.post(
f"{self.base_url}/{namespace}",
json=ColumnCreate(
Expand Down Expand Up @@ -106,8 +108,9 @@ def update(
Returns:
The updated column.
"""
clean_path = normalize_path(f"{self.base_url}/{namespace}/{path}")
response = self.ctx.client.patch(
f"{self.base_url}/{namespace}/{path}",
clean_path,
json=ColumnPatch(aliases=aliases).dict(),
)
response.raise_for_status()
Expand Down Expand Up @@ -164,9 +167,10 @@ def set_values(
raise ValueError("Either `path` or `col` must be provided.")

path = col.path if col is not None else path
clean_path = normalize_path(f"{self.base_url}/{namespace}/{path}")

response = self.ctx.client.put(
f"{self.base_url}/{namespace}/{path}",
clean_path,
json=[
ColumnValue(
path=(
Expand Down Expand Up @@ -219,6 +223,7 @@ async def async_set_values(
raise ValueError("Either `path` or `col` must be provided.")

path = col.path if col is not None else path
clean_path = normalize_path(f"{self.base_url}/{namespace}/{path}")

ephemeral_client = client is None
if ephemeral_client:
Expand All @@ -238,7 +243,7 @@ async def async_set_values(
for geo, value in values.items()
]
response = await client.put(
f"{self.base_url}/{namespace}/{path}",
clean_path,
json=json,
)

Expand Down
5 changes: 4 additions & 1 deletion gerrydb/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
This file should be kept in sync with the server-side version.
"""

from datetime import datetime
from enum import Enum
from typing import Any, Optional, Union
Expand All @@ -16,7 +17,9 @@
UserEmail = constr(max_length=254)

# constr is a constrained string, so this is some path that needs to satisfy this regex
GerryPath = constr(regex=r"[a-z0-9][a-z0-9-_/]*") # must start with lowercase or digit, then followed by any lowercase, digit, hyphen, underscore, slash
GerryPath = constr(
regex=r"[a-z0-9][a-z0-9-_/]*"
) # must start with lowercase or digit, then followed by any lowercase, digit, hyphen, underscore, slash
NamespacedGerryPath = constr(regex=r"[a-z0-9/][a-z0-9-_/]*")

NATIVE_PROJ = pyproj.CRS("EPSG:4269")
Expand Down

0 comments on commit 55608d5

Please sign in to comment.