Do not create URL for imported sequencing data objects

microbiomedata · Oct 25, 2024 · 5a58e57 · 5a58e57
1 parent 0e1e3e5
commit 5a58e57
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 7 deletions.
diff --git a/nmdc_automation/import_automation/activity_mapper.py b/nmdc_automation/import_automation/activity_mapper.py
@@ -107,11 +107,11 @@ def map_sequencing_data(self) -> Tuple[nmdc.Database, Dict]:
                     filemeta = os.stat(export_file)
                     md5 = get_md5(export_file)
                     data_object_id = self.runtime.minter(self.data_object_type)
+                    # Imported nucleotide sequencing data object does not have a URL
                     do_record = {
                         "id": data_object_id,
                         "type": self.data_object_type,
                         "name": file_destination_name,
-                        "url": f"{self.url}/{self.nucelotide_sequencing_id}/{file_destination_name}",
                         "file_size_bytes": filemeta.st_size,
                         "md5_checksum": md5,
                         "data_object_type": data_object_dict["data_object_type"],

diff --git a/tests/test_imports.py b/tests/test_imports.py
@@ -54,8 +54,8 @@ def test_gold_mapper_map_sequencing_data(gold_mapper):
         "filter": {"id": exp_nucleotide_sequencing_id},
         "update": {"has_output": [exp_dobj_id]}
     }
-    # TODO verify that these are the correct values to expect based on the import logic for raw reads files
-    exp_url = 'https://data.microbiomedata.org/data/nmdc:omprc-11-importT/52834.4.466476.GATCGAGT-GATCGAGT.fastq.gz'
+    # Sequencing data does not get a URL
+    exp_url = None
     exp_name = '52834.4.466476.GATCGAGT-GATCGAGT.fastq.gz'
     exp_description = 'Metagenome Raw Reads for nmdc:omprc-11-importT'
 
@@ -110,10 +110,13 @@ def test_gold_mapper_map_data_unique(gold_mapper):
     for dobj in data_objects:
         assert str(dobj.data_object_type) in exp_data_object_types
         assert isinstance(dobj, DataObject)
-        assert dobj.url
-        assert exp_nucleotide_sequencing_id in dobj.url
-        assert exp_nucleotide_sequencing_id in dobj.description
-
+        # sequencing data object should not have a URL
+        if str(dobj.data_object_type) == "Metagenome Raw Reads":
+            assert not dobj.url
+        else:
+            assert dobj.url
+            assert exp_nucleotide_sequencing_id in dobj.url
+            assert exp_nucleotide_sequencing_id in dobj.description
 
 
 def test_gold_mapper_map_data_multiple(gold_mapper):