Skip to content

Commit

Permalink
upload: be more cautious about grabbing files for upload
Browse files Browse the repository at this point in the history
Instead of looking for study name in the file, try harder to make sure
we are looking at a study *prefix* in the filename.
  • Loading branch information
mikix committed Oct 4, 2024
1 parent 4502da8 commit d3fb06a
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 28 deletions.
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,14 @@ output.sql
MRCONSO.RRF
*.zip
coverage.xml
*.parquet
valueset_data/

# test artifacts (TODO: have the tests work in a tmpdir instead)
/cumulus_library/builders/valueset/lookup_drug_from_ingredient_rules.parquet
/tests/test_data/study_static_file/bsvs/ICD10CM_2023AA.parquet
/tests/test_data/study_static_file/bsvs/ICD10PCS_2023AA.parquet
/tests/test_data/valueset/rules_file.parquet

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
7 changes: 5 additions & 2 deletions cumulus_library/actions/uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def upload_data(
progress: Progress,
file_upload_progress: TaskID,
file_path: Path,
version: int,
version: str,
args: dict,
):
"""Fetches presigned url and uploads file to aggregator"""
Expand Down Expand Up @@ -71,7 +71,10 @@ def upload_files(args: dict):
if args["target"]:
filtered_paths = []
for path in file_paths:
if any(study in str(path) for study in args["target"]):
if any(
path.parent.name == study and path.name.startswith(f"{study}__")
for study in args["target"]
):
filtered_paths.append(path)
file_paths = filtered_paths

Expand Down
93 changes: 70 additions & 23 deletions tests/test_actions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import builtins
import contextlib
import datetime
import pathlib
import shutil
Expand All @@ -11,6 +12,7 @@
import requests
import responses
from freezegun import freeze_time
from responses import matchers

from cumulus_library import base_utils, enums, errors, log_utils, study_manifest
from cumulus_library.actions import (
Expand Down Expand Up @@ -410,6 +412,52 @@ def test_import_study(tmp_path, mock_db_config):
)


def do_upload(
*,
login_error: bool = False,
user: str = "user",
id_token: str = "id",
preview: bool = True,
raises: contextlib.AbstractContextManager = does_not_raise(),
status: int = 204,
version: str = "12345.0",
data_path: pathlib.Path | None = pathlib.Path.cwd() / "tests/test_data/upload/",
):
with raises:
if login_error:
responses.add(responses.POST, "https://upload.url.test/upload/", status=401)
else:
responses.add(
responses.POST,
"https://upload.url.test/upload/",
match=[
matchers.json_params_matcher(
{
"study": "upload",
"data_package": "upload__count_synthea_patient",
"data_package_version": version,
"filename": f"{user}_upload__count_synthea_patient.parquet",
}
)
],
json={"url": "https://presigned.url.test", "fields": {"a": "b"}},
)
args = {
"data_path": data_path,
"id": id_token,
"preview": preview,
"target": ["upload"],
"url": "https://upload.url.test/upload/",
"user": user,
}
responses.add(responses.POST, "https://presigned.url.test", status=status)
uploader.upload_files(args)
if preview:
responses.assert_call_count("https://upload.url.test/upload/", 1)
elif raises == does_not_raise():
responses.assert_call_count("https://upload.url.test/upload/", 2)


@pytest.mark.parametrize(
"user,id_token,status,login_error,preview,raises",
[
Expand Down Expand Up @@ -451,29 +499,28 @@ def test_import_study(tmp_path, mock_db_config):
)
@responses.activate
def test_upload_data(user, id_token, status, preview, login_error, raises):
with raises:
if login_error:
responses.add(responses.POST, "https://upload.url.test/upload/", status=401)
else:
responses.add(
responses.POST,
"https://upload.url.test/upload/",
json={"url": "https://presigned.url.test", "fields": {"a": "b"}},
)
args = {
"data_path": pathlib.Path.cwd() / "tests/test_data/upload/",
"id": id_token,
"preview": preview,
"target": "core",
"url": "https://upload.url.test/upload/",
"user": user,
}
responses.add(responses.POST, "https://presigned.url.test", status=status)
uploader.upload_files(args)
if preview:
responses.assert_call_count("https://upload.url.test/upload/", 1)
elif raises == does_not_raise():
responses.assert_call_count("https://upload.url.test/upload/", 2)
do_upload(
user=user,
id_token=id_token,
status=status,
preview=preview,
login_error=login_error,
raises=raises,
)


def test_upload_data_no_path():
with pytest.raises(SystemExit):
do_upload(data_path=None)


@responses.activate
def test_upload_data_no_version(tmp_path):
src = pathlib.Path.cwd() / "tests/test_data/upload/upload__count_synthea_patient.parquet"
dest = pathlib.Path(tmp_path) / "upload"
dest.mkdir()
shutil.copy(src, dest)
do_upload(data_path=dest, version="0")


@pytest.mark.parametrize(
Expand Down
8 changes: 6 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,8 +601,12 @@ def test_cli_upload_studies(mock_glob, args, status, login_error, raises):
def test_cli_upload_filter(mock_upload_data, mock_glob, args, calls):
mock_glob.side_effect = [
[
Path(str(Path(__file__).parent) + "/test_data/count_synthea_patient.parquet"),
Path(str(Path(__file__).parent) + "/other_data/count_synthea_patient.parquet"),
Path(
str(Path(__file__).parent) + "/test_data/test_data__count_synthea_patient.parquet"
),
Path(
str(Path(__file__).parent) + "/other_data/other_data__count_synthea_patient.parquet"
),
],
]
cli.main(cli_args=args)
Expand Down
2 changes: 2 additions & 0 deletions tests/test_data/upload/upload__meta_version.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
"data_package_version"
12345
Binary file added tests/test_data/upload/upload__meta_version.parquet
Binary file not shown.

0 comments on commit d3fb06a

Please sign in to comment.