@@ -775,13 +775,8 @@ def _get_one_outer_file(self, catalog: CatalogName) -> JSON:
775775 return one (hits )
776776
777777 def _source_spec (self , catalog : CatalogName , entity : JSON ) -> SourceSpec :
778- if config .is_hca_enabled (catalog ):
779- field = 'sourceSpec'
780- elif config .is_anvil_enabled (catalog ):
781- field = 'source_spec'
782- else :
783- assert False , catalog
784- return TDRSourceSpec .parse (one (entity ['sources' ])[field ])
778+ source = self ._source_from_response (catalog , one (entity ['sources' ]))
779+ return source .spec
785780
786781 def _file_size_facet (self , catalog : CatalogName ) -> str :
787782 if config .is_hca_enabled (catalog ):
@@ -1125,27 +1120,30 @@ def _test_repository_files(self, catalog: CatalogName):
11251120 outer_file , inner_file = self ._get_one_inner_file (catalog )
11261121 file_url = inner_file ['url' ]
11271122 if file_url :
1128- file_url = furl (file_url )
1129- # FIXME: Use _check_endpoint() instead
1130- # https://github.com/DataBiosphere/azul/issues/7373
1131- self .assertEqual (file_url .path .segments [0 ], 'repository' )
1132- file_url .path .segments .insert (0 , 'fetch' )
1133- response = self ._get_url_unchecked (GET , file_url )
1134- self .assertEqual (200 , response .status )
1135- response = json .loads (response .data )
1136- while response ['Status' ] != 302 :
1137- self .assertEqual (301 , response ['Status' ])
1138- self .assertNotIn ('Retry-After' , response )
1139- response = self ._get_url_json (GET , furl (response ['Location' ]))
1140- self .assertNotIn ('Retry-After' , response )
1141- response = self ._get_url (GET , furl (response ['Location' ]), stream = True )
11421123 source = self ._source_spec (catalog , outer_file )
1143- self ._validate_file_response ( response , source , inner_file )
1124+ self ._test_file_download ( source , inner_file )
11441125 else :
11451126 # Phantom files lack DRS URIs and cannot be downloaded
11461127 self .assertIsNone (file_url , inner_file )
11471128 self .assertEqual ('lungmap' , config .catalogs [catalog ].atlas , inner_file )
11481129
1130+ def _test_file_download (self , source : SourceSpec , file : JSON ):
1131+ file_url = furl (file ['url' ])
1132+ # FIXME: Use _check_endpoint() instead
1133+ # https://github.com/DataBiosphere/azul/issues/7373
1134+ self .assertEqual (file_url .path .segments [0 ], 'repository' )
1135+ file_url .path .segments .insert (0 , 'fetch' )
1136+ response = self ._get_url_unchecked (GET , file_url )
1137+ self .assertEqual (200 , response .status )
1138+ response = json .loads (response .data )
1139+ while response ['Status' ] != 302 :
1140+ self .assertEqual (301 , response ['Status' ])
1141+ self .assertNotIn ('Retry-After' , response )
1142+ response = self ._get_url_json (GET , furl (response ['Location' ]))
1143+ self .assertNotIn ('Retry-After' , response )
1144+ response = self ._get_url (GET , furl (response ['Location' ]), stream = True )
1145+ self ._validate_file_response (response , source , file )
1146+
11491147 def _file_ext (self , file : JSON ) -> str :
11501148 # We believe that the file extension is a more reliable indicator than
11511149 # the `format` metadata field. Note that this method preserves multipart
@@ -1289,6 +1287,13 @@ def _prepare_notifications(self,
12891287 notifications .extend (duplicate_bundles )
12901288 return notifications , bundle_fqids
12911289
1290+ def _source_from_response (self , catalog : CatalogName , source_json : JSON ) -> SourceRef :
1291+ special_fields = self .metadata_plugin (catalog ).special_fields
1292+ source = dict (id = source_json [special_fields .source_id ],
1293+ spec = source_json [special_fields .source_spec ],
1294+ prefix = source_json [special_fields .source_prefix ])
1295+ return self .repository_plugin (catalog ).source_ref_cls .from_json (source )
1296+
12921297 def _get_indexed_bundles (self ,
12931298 catalog : CatalogName ,
12941299 filters : JSON | None = None
@@ -1298,10 +1303,7 @@ def _get_indexed_bundles(self,
12981303 special_fields = self .metadata_plugin (catalog ).special_fields
12991304 for hit in hits :
13001305 source , bundle = one (hit ['sources' ]), one (hit ['bundles' ])
1301- source = dict (id = source [special_fields .source_id ],
1302- spec = source [special_fields .source_spec ],
1303- prefix = source [special_fields .source_prefix ])
1304- source = self .repository_plugin (catalog ).source_ref_cls .from_json (source )
1306+ source = self ._source_from_response (catalog , source )
13051307 bundle_fqid = SourcedBundleFQID (uuid = bundle [special_fields .bundle_uuid ],
13061308 version = bundle [special_fields .bundle_version ],
13071309 source = source )
0 commit comments