Skip to content

Commit

Permalink
Some coverage updates
Browse files Browse the repository at this point in the history
  • Loading branch information
dogversioning committed Dec 4, 2024
1 parent 2172d7b commit 773cf96
Show file tree
Hide file tree
Showing 9 changed files with 125 additions and 28 deletions.
33 changes: 21 additions & 12 deletions src/shared/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def update_metadata(
# Should only be hit if you add a new JSON dict and forget to add it
# to this function
case _:
raise OSError(f"{meta_type} does not have a handler for updates.")
raise ValueError(f"{meta_type} does not have a handler for updates.")
data_version_metadata.update(extra_items)
return metadata

Expand Down Expand Up @@ -182,14 +182,22 @@ def move_s3_file(s3_client, s3_bucket_name: str, old_key: str, new_key: str) ->
raise S3UploadError


def get_s3_keys(s3_client, s3_bucket_name: str, prefix: str, token: str | None = None) -> list:
def get_s3_keys(
s3_client,
s3_bucket_name: str,
prefix: str,
token: str | None = None,
max_keys: int | None = None,
) -> list[str]:
"""Gets the list of all keys in S3 starting with the prefix"""
if max_keys is None:
max_keys = 1000
if token:
res = s3_client.list_objects_v2(
Bucket=s3_bucket_name, Prefix=prefix, ContinuationToken=token
Bucket=s3_bucket_name, Prefix=prefix, ContinuationToken=token, MaxKeys=max_keys
)
else:
res = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=prefix)
res = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=prefix, MaxKeys=max_keys)
if "Contents" not in res:
return []
contents = [record["Key"] for record in res["Contents"]]
Expand Down Expand Up @@ -225,14 +233,15 @@ def get_latest_data_package_version(bucket, prefix):
prefix = prefix + "/"
s3_res = s3_client.list_objects_v2(Bucket=bucket, Prefix=prefix)
highest_ver = None
for item in s3_res["Contents"]:
ver_str = item["Key"].replace(prefix, "").split("/")[0]
if ver_str.isdigit():
if highest_ver is None:
highest_ver = ver_str
else:
if int(highest_ver) < int(ver_str):
if "Contents" in s3_res:
for item in s3_res["Contents"]:
ver_str = item["Key"].replace(prefix, "").split("/")[1].split("__")[2]
if ver_str.isdigit():
if highest_ver is None:
highest_ver = ver_str
if highest_ver is None:
else:
if int(highest_ver) < int(ver_str):
highest_ver = ver_str
if "Contents" not in s3_res or highest_ver is None:
logging.error("No data package versions found for %s", prefix)
return highest_ver
25 changes: 11 additions & 14 deletions src/site_upload/cache_api/cache_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,17 @@ def cache_api_data(s3_client, s3_bucket_name: str, db: str, target: str) -> None
"study": dp.split("__")[0],
"name": dp.split("__")[1],
}
try:
versions = column_types[dp_detail["study"]][dp_detail["name"]]
for version in versions:
dp_dict = {
**dp_detail,
**versions[version],
"version": version,
"id": f"{dp_detail['study']}__{dp_detail['name']}__{version}",
}
if "__flat" in dp:
dp_dict["type"] = "flat"
dp_details.append(dp_dict)
except KeyError as e:
raise e
versions = column_types[dp_detail["study"]][dp_detail["name"]]
for version in versions:
dp_dict = {
**dp_detail,
**versions[version],
"version": version,
"id": f"{dp_detail['study']}__{dp_detail['name']}__{version}",
}
if "__flat" in dp:
dp_dict["type"] = "flat"
dp_details.append(dp_dict)
s3_client.put_object(
Bucket=s3_bucket_name,
Key=f"{enums.BucketPath.CACHE.value}/{enums.JsonFilename.DATA_PACKAGES.value}.json",
Expand Down
16 changes: 16 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def _init_mock_data(s3_client, bucket, study, data_package, version):
The following items are added:
- Aggregates, with a site of plainsboro, in parquet and csv, for the
study provided
- Flat tables, with a site of plainsboro, in parquet and csv, for the
study provided
- a data_package cache for api testing
- credentials for the 3 unit test hospitals (princeton, elsewhere, hope)
Expand All @@ -58,6 +60,20 @@ def _init_mock_data(s3_client, bucket, study, data_package, version):
f"{enums.BucketPath.CSVAGGREGATE.value}/{study}/"
f"{study}__{data_package}/{version}/{study}__{data_package}__aggregate.csv",
)
s3_client.upload_file(
"./tests/test_data/flat_synthea_q_date_recent.parquet",
bucket,
f"{enums.BucketPath.FLAT.value}/{study}/{mock_utils.EXISTING_SITE}"
f"{study}__{data_package}__{version}/"
f"{study}__{data_package}__flat.parquet",
)
s3_client.upload_file(
"./tests/test_data/flat_synthea_q_date_recent.csv",
bucket,
f"{enums.BucketPath.CSVFLAT.value}/{study}/{mock_utils.EXISTING_SITE}"
f"{study}__{data_package}__{version}/"
f"{study}__{data_package}__flat.csv",
)
s3_client.upload_file(
"./tests/test_data/data_packages_cache.json",
bucket,
Expand Down
2 changes: 1 addition & 1 deletion tests/mock_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
TEST_PROCESS_COUNTS_ARN = "arn:aws:sns:us-east-1:123456789012:test-counts"
TEST_PROCESS_STUDY_META_ARN = "arn:aws:sns:us-east-1:123456789012:test-meta"
TEST_CACHE_API_ARN = "arn:aws:sns:us-east-1:123456789012:test-cache"
ITEM_COUNT = 9
ITEM_COUNT = 13
DATA_PACKAGE_COUNT = 3

EXISTING_SITE = "princeton_plainsboro_teaching_hospital"
Expand Down
54 changes: 53 additions & 1 deletion tests/shared/test_functions.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@
"""Unit tests for shared functions.
As of this writing, since a lot of this was historically covered by other tests,
this file does not contain a 1-1 set of tests to the source module,
instead focusing only on edge case scenarios (though in those cases, tests
should be comprehensive). 1-1 coverage is a desirable long term goal.
"""

from contextlib import nullcontext as does_not_raise
from unittest import mock

import boto3
import pandas
import pytest

from src.shared import functions, pandas_functions
from src.shared import enums, functions, pandas_functions
from tests import mock_utils


@pytest.mark.parametrize(
Expand Down Expand Up @@ -60,3 +71,44 @@ def test_column_datatypes():
"bool": "boolean",
"string": "string",
}


def test_update_metadata_error(mock_bucket):
with pytest.raises(ValueError):
enums.JsonFilename.FOO = "foo"
functions.update_metadata(
metadata={}, study="", data_package="", version="", target="", meta_type="foo"
)


def test_get_s3_keys(mock_bucket):
s3_client = boto3.client("s3")
res = functions.get_s3_keys(s3_client, mock_utils.TEST_BUCKET, "")
assert len(res) == mock_utils.ITEM_COUNT
res = functions.get_s3_keys(s3_client, mock_utils.TEST_BUCKET, "", max_keys=2)
assert len(res) == mock_utils.ITEM_COUNT
res = functions.get_s3_keys(s3_client, mock_utils.TEST_BUCKET, "cache")
assert res == ["cache/data_packages.json"]


def test_latest_data_package_version(mock_bucket):
version = functions.get_latest_data_package_version(
mock_utils.TEST_BUCKET, f"{enums.BucketPath.AGGREGATE.value}/{mock_utils.EXISTING_STUDY}"
)
assert version == mock_utils.EXISTING_VERSION
s3_client = boto3.client("s3")
s3_client.upload_file(
"./tests/test_data/count_synthea_patient_agg.parquet",
mock_utils.TEST_BUCKET,
f"{enums.BucketPath.AGGREGATE.value}/{mock_utils.EXISTING_STUDY}/"
f"{mock_utils.EXISTING_STUDY}__{mock_utils.EXISTING_DATA_P}/"
f"{mock_utils.EXISTING_STUDY}__{mock_utils.EXISTING_DATA_P}__{mock_utils.NEW_VERSION}/"
f"{mock_utils.EXISTING_STUDY}__{mock_utils.EXISTING_DATA_P}__aggregate.parquet",
)
version = functions.get_latest_data_package_version(
mock_utils.TEST_BUCKET, f"{enums.BucketPath.AGGREGATE.value}/{mock_utils.EXISTING_STUDY}"
)
version = functions.get_latest_data_package_version(
mock_utils.TEST_BUCKET, f"{enums.BucketPath.AGGREGATE.value}/not_a_study"
)
assert version is None
2 changes: 2 additions & 0 deletions tests/site_upload/test_powerset_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,8 @@ def test_powerset_merge_single_upload(
or item["Key"].startswith(enums.BucketPath.ERROR.value)
or item["Key"].startswith(enums.BucketPath.ADMIN.value)
or item["Key"].startswith(enums.BucketPath.CACHE.value)
or item["Key"].startswith(enums.BucketPath.FLAT.value)
or item["Key"].startswith(enums.BucketPath.CSVFLAT.value)
or item["Key"].endswith("study_periods.json")
)
if archives:
Expand Down
2 changes: 2 additions & 0 deletions tests/site_upload/test_process_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ def test_process_upload(
or item["Key"].startswith(enums.BucketPath.ERROR.value)
or item["Key"].startswith(enums.BucketPath.ADMIN.value)
or item["Key"].startswith(enums.BucketPath.CACHE.value)
or item["Key"].startswith(enums.BucketPath.FLAT.value)
or item["Key"].startswith(enums.BucketPath.CSVFLAT.value)
or item["Key"].endswith("study_periods.json")
or item["Key"].endswith("column_types.json")
)
Expand Down
19 changes: 19 additions & 0 deletions tests/test_data/flat_synthea_q_date_recent.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
resource,subgroup,numerator,denominator,percentage
Procedure,,2,5,40.00
Procedure,cumulus__all,2,5,40.00
Observation,,0,0,0.00
Observation,cumulus__all,0,0,0.00
MedicationRequest,,0,0,0.00
MedicationRequest,cumulus__all,0,0,0.00
Immunization,,0,0,0.00
Immunization,cumulus__all,0,0,0.00
Encounter,,1,4,25.00
Encounter,cumulus__all,1,4,25.00
DocumentReference,,0,0,0.00
DocumentReference,cumulus__all,0,0,0.00
DiagnosticReport,,0,0,0.00
DiagnosticReport,cumulus__all,0,0,0.00
Condition,,2,4,50.00
Condition,cumulus__all,2,4,50.00
AllergyIntolerance,,0,0,0.00
AllergyIntolerance,cumulus__all,0,0,0.00
Binary file not shown.

0 comments on commit 773cf96

Please sign in to comment.