Skip to content

Commit

Permalink
Upadted dict handling for metadata (#147)
Browse files Browse the repository at this point in the history
* Upadted dict handling for metadata

* Missing parens

* moved deepcopy to util function
  • Loading branch information
dogversioning authored Feb 7, 2025
1 parent b561123 commit fa94b7d
Show file tree
Hide file tree
Showing 3 changed files with 291 additions and 61 deletions.
34 changes: 25 additions & 9 deletions src/shared/functions.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Functions used across different lambdas"""

import copy
import io
import json
import logging
Expand All @@ -9,6 +10,9 @@

from . import enums

logger = logging.getLogger()
logger.setLevel("INFO")

TRANSACTION_METADATA_TEMPLATE = {
enums.TransactionKeys.TRANSACTION_FORMAT_VERSION.value: "2",
enums.TransactionKeys.LAST_UPLOAD.value: None,
Expand Down Expand Up @@ -108,29 +112,35 @@ def update_metadata(
if extra_items is None:
extra_items = {}
check_meta_type(meta_type)
logger.info(f"### Updating metadata {meta_type}")
logger.info(f"{study} {data_package} {version}")
logger.info(f"Key: {target} Value: {value}")
logger.info(f"Pre-update size: {len(metadata.keys())}")

match meta_type:
case enums.JsonFilename.TRANSACTIONS.value:
site_metadata = metadata.setdefault(site, {})
study_metadata = site_metadata.setdefault(study, {})
data_package_metadata = study_metadata.setdefault(data_package, {})
data_version_metadata = data_package_metadata.setdefault(
version, TRANSACTION_METADATA_TEMPLATE
data_version_metadata = _update_or_clone_template(
data_package_metadata, version, TRANSACTION_METADATA_TEMPLATE
)

dt = dt or datetime.now(UTC)
data_version_metadata[target] = dt.isoformat()
case enums.JsonFilename.STUDY_PERIODS.value:
site_metadata = metadata.setdefault(site, {})
study_period_metadata = site_metadata.setdefault(study, {})
data_version_metadata = study_period_metadata.setdefault(
version, STUDY_PERIOD_METADATA_TEMPLATE
data_version_metadata = _update_or_clone_template(
study_period_metadata, version, STUDY_PERIOD_METADATA_TEMPLATE
)
dt = dt or datetime.now(UTC)
data_version_metadata[target] = dt.isoformat()
case enums.JsonFilename.COLUMN_TYPES.value:
study_metadata = metadata.setdefault(study, {})
data_package_metadata = study_metadata.setdefault(data_package, {})
data_version_metadata = data_package_metadata.setdefault(
version, COLUMN_TYPES_METADATA_TEMPLATE
data_version_metadata = _update_or_clone_template(
data_package_metadata, version, COLUMN_TYPES_METADATA_TEMPLATE
)
if target == enums.ColumnTypesKeys.COLUMNS.value:
data_version_metadata[target] = value
Expand All @@ -142,9 +152,15 @@ def update_metadata(
case _:
raise ValueError(f"{meta_type} does not have a handler for updates.")
data_version_metadata.update(extra_items)
logger.info(f"Post-update size: {len(metadata.keys())}")
logger.info(f"### Updated metadata {meta_type}")
return metadata


def _update_or_clone_template(meta_dict: dict, version, template: str):
return meta_dict.setdefault(version, copy.deepcopy(template))


def write_metadata(
*,
s3_client,
Expand Down Expand Up @@ -174,11 +190,11 @@ def move_s3_file(s3_client, s3_bucket_name: str, old_key: str, new_key: str) ->
source = {"Bucket": s3_bucket_name, "Key": old_key}
copy_response = s3_client.copy_object(CopySource=source, Bucket=s3_bucket_name, Key=new_key)
if copy_response["ResponseMetadata"]["HTTPStatusCode"] != 200:
logging.error("error copying file %s to %s", old_key, new_key)
logger.error("error copying file %s to %s", old_key, new_key)
raise S3UploadError
delete_response = s3_client.delete_object(Bucket=s3_bucket_name, Key=old_key)
if delete_response["ResponseMetadata"]["HTTPStatusCode"] != 204:
logging.error("error deleting file %s", old_key)
logger.error("error deleting file %s", old_key)
raise S3UploadError


Expand Down Expand Up @@ -243,5 +259,5 @@ def get_latest_data_package_version(bucket, prefix):
if int(highest_ver) < int(ver_str):
highest_ver = ver_str
if "Contents" not in s3_res or highest_ver is None:
logging.error("No data package versions found for %s", prefix)
logger.error("No data package versions found for %s", prefix)
return highest_ver
16 changes: 14 additions & 2 deletions tests/mock_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def get_mock_column_types_metadata():
EXISTING_STUDY: {
EXISTING_DATA_P: {
EXISTING_VERSION: {
"column_types_format_version": "1",
"column_types_format_version": 2,
"columns": {
"cnt": "integer",
"gender": "string",
Expand All @@ -126,13 +126,19 @@ def get_mock_column_types_metadata():
"site": "string",
},
"last_data_update": "2023-02-24T15:08:07.771080+00:00",
"s3_path": (
f"aggregates/{EXISTING_STUDY}/{EXISTING_STUDY}_{EXISTING_DATA_P}/"
f"{EXISTING_STUDY}_{EXISTING_DATA_P}__{EXISTING_VERSION}/"
f"{EXISTING_STUDY}_{EXISTING_DATA_P}__aggregate.csv"
),
"total": 1000,
}
}
},
OTHER_STUDY: {
EXISTING_DATA_P: {
EXISTING_VERSION: {
"column_types_format_version": "1",
"column_types_format_version": 2,
"columns": {
"cnt": "integer",
"gender": "string",
Expand All @@ -141,6 +147,12 @@ def get_mock_column_types_metadata():
"site": "string",
},
"last_data_update": "2023-02-24T15:08:07.771080+00:00",
"s3_path": (
f"aggregates/{OTHER_STUDY}/{OTHER_STUDY}_{EXISTING_DATA_P}/"
f"{OTHER_STUDY}_{EXISTING_DATA_P}__{EXISTING_VERSION}/"
f"{OTHER_STUDY}_{EXISTING_DATA_P}__aggregate.csv"
),
"total": 2000,
}
}
},
Expand Down
Loading

0 comments on commit fa94b7d

Please sign in to comment.