smart-on-fhir · mikix · Oct 7, 2024 · Oct 3, 2024 · mikix · Oct 4, 2024
diff --git a/.gitignore b/.gitignore
@@ -10,9 +10,20 @@ output.sql
 MRCONSO.RRF
 *.zip
 coverage.xml
-*.parquet
 valueset_data/
 
+# parquet and csv files show up during an export, which devs might do a fair bit in odd places.
+# So for safety, ignore them both here, except in the tests dir where we use them for test data.
+*.csv
+*.parquet
+!/tests/**/*.csv
+!/tests/**/*.parquet
+# test artifacts (TODO: have the tests work in a tmpdir instead)
+/cumulus_library/builders/valueset/lookup_drug_from_ingredient_rules.parquet
+/tests/test_data/study_static_file/bsvs/ICD10CM_2023AA.parquet
+/tests/test_data/study_static_file/bsvs/ICD10PCS_2023AA.parquet
+/tests/test_data/valueset/rules_file.parquet
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

diff --git a/cumulus_library/actions/uploader.py b/cumulus_library/actions/uploader.py
@@ -14,7 +14,7 @@ def upload_data(
     progress: Progress,
     file_upload_progress: TaskID,
     file_path: Path,
-    version: int,
+    version: str,
     args: dict,
 ):
     """Fetches presigned url and uploads file to aggregator"""
@@ -71,7 +71,10 @@ def upload_files(args: dict):
     if args["target"]:
         filtered_paths = []
         for path in file_paths:
-            if any(study in str(path) for study in args["target"]):
+            if any(
+                path.parent.name == study and path.name.startswith(f"{study}__")
+                for study in args["target"]
+            ):
                 filtered_paths.append(path)
         file_paths = filtered_paths
 

diff --git a/tests/test_actions.py b/tests/test_actions.py
@@ -1,4 +1,5 @@
 import builtins
+import contextlib
 import datetime
 import pathlib
 import shutil
@@ -11,6 +12,7 @@
 import requests
 import responses
 from freezegun import freeze_time
+from responses import matchers
 
 from cumulus_library import base_utils, enums, errors, log_utils, study_manifest
 from cumulus_library.actions import (
@@ -410,6 +412,52 @@ def test_import_study(tmp_path, mock_db_config):
         )
 
 
+def do_upload(
+    *,
+    login_error: bool = False,
+    user: str = "user",
+    id_token: str = "id",
+    preview: bool = True,
+    raises: contextlib.AbstractContextManager = does_not_raise(),
+    status: int = 204,
+    version: str = "12345.0",
+    data_path: pathlib.Path | None = pathlib.Path.cwd() / "tests/test_data/upload/",
+):
+    with raises:
+        if login_error:
+            responses.add(responses.POST, "https://upload.url.test/upload/", status=401)
+        else:
+            responses.add(
+                responses.POST,
+                "https://upload.url.test/upload/",
+                match=[
+                    matchers.json_params_matcher(
+                        {
+                            "study": "upload",
+                            "data_package": "upload__count_synthea_patient",
+                            "data_package_version": version,
+                            "filename": f"{user}_upload__count_synthea_patient.parquet",
+                        }
+                    )
+                ],
+                json={"url": "https://presigned.url.test", "fields": {"a": "b"}},
+            )
+        args = {
+            "data_path": data_path,
+            "id": id_token,
+            "preview": preview,
+            "target": ["upload"],
+            "url": "https://upload.url.test/upload/",
+            "user": user,
+        }
+        responses.add(responses.POST, "https://presigned.url.test", status=status)
+        uploader.upload_files(args)
+        if preview:
+            responses.assert_call_count("https://upload.url.test/upload/", 1)
+        elif raises == does_not_raise():
+            responses.assert_call_count("https://upload.url.test/upload/", 2)
+
+
 @pytest.mark.parametrize(
     "user,id_token,status,login_error,preview,raises",
     [
@@ -451,29 +499,28 @@ def test_import_study(tmp_path, mock_db_config):
 )
 @responses.activate
 def test_upload_data(user, id_token, status, preview, login_error, raises):
-    with raises:
-        if login_error:
-            responses.add(responses.POST, "https://upload.url.test/upload/", status=401)
-        else:
-            responses.add(
-                responses.POST,
-                "https://upload.url.test/upload/",
-                json={"url": "https://presigned.url.test", "fields": {"a": "b"}},
-            )
-        args = {
-            "data_path": pathlib.Path.cwd() / "tests/test_data/upload/",
-            "id": id_token,
-            "preview": preview,
-            "target": "core",
-            "url": "https://upload.url.test/upload/",
-            "user": user,
-        }
-        responses.add(responses.POST, "https://presigned.url.test", status=status)
-        uploader.upload_files(args)
-        if preview:
-            responses.assert_call_count("https://upload.url.test/upload/", 1)
-        elif raises == does_not_raise():
-            responses.assert_call_count("https://upload.url.test/upload/", 2)
+    do_upload(
+        user=user,
+        id_token=id_token,
+        status=status,
+        preview=preview,
+        login_error=login_error,
+        raises=raises,
+    )
+
+
+def test_upload_data_no_path():
+    with pytest.raises(SystemExit):
+        do_upload(data_path=None)
+
+
+@responses.activate
+def test_upload_data_no_version(tmp_path):
+    src = pathlib.Path.cwd() / "tests/test_data/upload/upload__count_synthea_patient.parquet"
+    dest = pathlib.Path(tmp_path) / "upload"
+    dest.mkdir()
+    shutil.copy(src, dest)
+    do_upload(data_path=dest, version="0")
 
 
 @pytest.mark.parametrize(

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -601,8 +601,12 @@ def test_cli_upload_studies(mock_glob, args, status, login_error, raises):
 def test_cli_upload_filter(mock_upload_data, mock_glob, args, calls):
     mock_glob.side_effect = [
         [
-            Path(str(Path(__file__).parent) + "/test_data/count_synthea_patient.parquet"),
-            Path(str(Path(__file__).parent) + "/other_data/count_synthea_patient.parquet"),
+            Path(
+                str(Path(__file__).parent) + "/test_data/test_data__count_synthea_patient.parquet"
+            ),
+            Path(
+                str(Path(__file__).parent) + "/other_data/other_data__count_synthea_patient.parquet"
+            ),
         ],
     ]
     cli.main(cli_args=args)

diff --git a/...est_data/upload/count_synthea_patient.csv → .../upload/upload__count_synthea_patient.csv b/...est_data/upload/count_synthea_patient.csv → .../upload/upload__count_synthea_patient.csv
diff --git a/...data/upload/count_synthea_patient.parquet → ...oad/upload__count_synthea_patient.parquet b/...data/upload/count_synthea_patient.parquet → ...oad/upload__count_synthea_patient.parquet
diff --git a/tests/test_data/upload/upload__meta_version.csv b/tests/test_data/upload/upload__meta_version.csv
@@ -0,0 +1,2 @@
+"data_package_version"
+12345
diff --git a/tests/test_data/upload/upload__meta_version.parquet b/tests/test_data/upload/upload__meta_version.parquet