Skip to content

Commit

Permalink
Do not create URL for imported sequencing data objects
Browse files Browse the repository at this point in the history
  • Loading branch information
mbthornton-lbl committed Oct 25, 2024
1 parent 0e1e3e5 commit 5a58e57
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 7 deletions.
2 changes: 1 addition & 1 deletion nmdc_automation/import_automation/activity_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,11 @@ def map_sequencing_data(self) -> Tuple[nmdc.Database, Dict]:
filemeta = os.stat(export_file)
md5 = get_md5(export_file)
data_object_id = self.runtime.minter(self.data_object_type)
# Imported nucleotide sequencing data object does not have a URL
do_record = {
"id": data_object_id,
"type": self.data_object_type,
"name": file_destination_name,
"url": f"{self.url}/{self.nucelotide_sequencing_id}/{file_destination_name}",
"file_size_bytes": filemeta.st_size,
"md5_checksum": md5,
"data_object_type": data_object_dict["data_object_type"],
Expand Down
15 changes: 9 additions & 6 deletions tests/test_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def test_gold_mapper_map_sequencing_data(gold_mapper):
"filter": {"id": exp_nucleotide_sequencing_id},
"update": {"has_output": [exp_dobj_id]}
}
# TODO verify that these are the correct values to expect based on the import logic for raw reads files
exp_url = 'https://data.microbiomedata.org/data/nmdc:omprc-11-importT/52834.4.466476.GATCGAGT-GATCGAGT.fastq.gz'
# Sequencing data does not get a URL
exp_url = None
exp_name = '52834.4.466476.GATCGAGT-GATCGAGT.fastq.gz'
exp_description = 'Metagenome Raw Reads for nmdc:omprc-11-importT'

Expand Down Expand Up @@ -110,10 +110,13 @@ def test_gold_mapper_map_data_unique(gold_mapper):
for dobj in data_objects:
assert str(dobj.data_object_type) in exp_data_object_types
assert isinstance(dobj, DataObject)
assert dobj.url
assert exp_nucleotide_sequencing_id in dobj.url
assert exp_nucleotide_sequencing_id in dobj.description

# sequencing data object should not have a URL
if str(dobj.data_object_type) == "Metagenome Raw Reads":
assert not dobj.url
else:
assert dobj.url
assert exp_nucleotide_sequencing_id in dobj.url
assert exp_nucleotide_sequencing_id in dobj.description


def test_gold_mapper_map_data_multiple(gold_mapper):
Expand Down

0 comments on commit 5a58e57

Please sign in to comment.