From bcbe6840f20ba6519d2759f01cf8f2d51f2e39c6 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Wed, 29 May 2024 16:31:06 -0400 Subject: [PATCH 1/6] WIP: nrao archive query - TAP only so far, based on ALMA (additional commit message was just debug notes and should be ignored) add back tapsql stuff remove alma stuff from nrao add nrao obscore thing Adapt region queries to work with NRAO TAP service add tapsql.py for nrao, updates to data columns supported by NRAO TAP fix imports implemented data retrieval code flush add VLA handling fix the wait-until-done step flex fixes --- astroquery/nrao/__init__.py | 44 ++++ astroquery/nrao/core.py | 455 ++++++++++++++++++++++++++++++++++++ astroquery/nrao/tapsql.py | 265 +++++++++++++++++++++ 3 files changed, 764 insertions(+) create mode 100644 astroquery/nrao/__init__.py create mode 100644 astroquery/nrao/core.py create mode 100644 astroquery/nrao/tapsql.py diff --git a/astroquery/nrao/__init__.py b/astroquery/nrao/__init__.py new file mode 100644 index 0000000000..fd6eb14fa7 --- /dev/null +++ b/astroquery/nrao/__init__.py @@ -0,0 +1,44 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +""" +NRAO Archive service. +""" +from astropy import config as _config + + +# list the URLs here separately so they can be used in tests. +_url_list = ['https://data.nrao.edu' + ] + +tap_urls = ['https://data-query.nrao.edu/'] + +auth_urls = ['data.nrao.edu'] + + +class Conf(_config.ConfigNamespace): + """ + Configuration parameters for `astroquery.nrao`. + """ + + timeout = _config.ConfigItem(60, "Timeout in seconds.") + + archive_url = _config.ConfigItem( + _url_list, + 'The NRAO Archive mirror to use.') + + auth_url = _config.ConfigItem( + auth_urls, + 'NRAO Central Authentication Service URLs' + ) + + username = _config.ConfigItem( + "", + 'Optional default username for NRAO archive.') + + +conf = Conf() + +from .core import Nrao, NraoClass, NRAO_BANDS + +__all__ = ['Nrao', 'NraoClass', + 'Conf', 'conf', + ] diff --git a/astroquery/nrao/core.py b/astroquery/nrao/core.py new file mode 100644 index 0000000000..77dcf5c987 --- /dev/null +++ b/astroquery/nrao/core.py @@ -0,0 +1,455 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst + +import os.path +import keyring +import numpy as np +import re +import tarfile +import string +import requests +import warnings +import json +import time + +from pkg_resources import resource_filename +from bs4 import BeautifulSoup +import pyvo +from urllib.parse import urljoin + +from astropy.table import Table, Column, vstack +from astroquery import log +from astropy.utils.console import ProgressBar +from astropy import units as u +from astropy.time import Time + +try: + from pyvo.dal.sia2 import SIA2_PARAMETERS_DESC, SIA2Service +except ImportError: + # Can be removed once min version of pyvo is 1.5 + from pyvo.dal.sia2 import SIA_PARAMETERS_DESC as SIA2_PARAMETERS_DESC + from pyvo.dal.sia2 import SIAService as SIA2Service + +from ..exceptions import LoginError +from ..utils import commons +from ..utils.process_asyncs import async_to_sync +from ..query import BaseQuery, QueryWithLogin, BaseVOQuery +from . import conf, auth_urls, tap_urls +from astroquery.exceptions import CorruptDataWarning +from ..alma.tapsql import (_gen_str_sql, _gen_numeric_sql, + _gen_band_list_sql, _gen_datetime_sql, _gen_pol_sql, _gen_pub_sql, + _gen_science_sql, _gen_spec_res_sql, ALMA_DATE_FORMAT) +from .tapsql import (_gen_pos_sql) + +__all__ = {'NraoClass',} + +__doctest_skip__ = ['NraoClass.*'] + +NRAO_BANDS = { + 'L': (1*u.GHz, 2*u.GHz), + 'S': (2*u.GHz, 4*u.GHz), + 'C': (4*u.GHz, 8*u.GHz), + 'X': (8*u.GHz, 12*u.GHz), + 'U': (12*u.GHz, 18*u.GHz), + 'K': (18*u.GHz, 26*u.GHz), + 'A': (26*u.GHz, 39*u.GHz), + 'Q': (39*u.GHz, 50*u.GHz), + 'W': (80*u.GHz, 115*u.GHz) +} + +TAP_SERVICE_PATH = 'tap' + +NRAO_FORM_KEYS = { + 'Position': { + 'Source name (astropy Resolver)': ['source_name_resolver', + 'SkyCoord.from_name', _gen_pos_sql], + 'Source name (NRAO)': ['source_name', 'target_name', _gen_str_sql], + 'RA Dec (Sexagesimal)': ['ra_dec', 's_ra, s_dec', _gen_pos_sql], + 'Galactic (Degrees)': ['galactic', 'gal_longitude, gal_latitude', + _gen_pos_sql], + 'Angular resolution (arcsec)': ['spatial_resolution', + 'spatial_resolution', _gen_numeric_sql], + 'Field of view (arcsec)': ['fov', 's_fov', _gen_numeric_sql], + 'Configuration': ['configuration', 'configuration', _gen_numeric_sql], + 'Maximum UV Distance (meters)': ['max_uv_dist', 'max_uv_dist', _gen_numeric_sql] + + + }, + 'Project': { + 'Project code': ['project_code', 'project_code', _gen_str_sql], + 'Telescope': ['instrument', 'instrument_name', _gen_str_sql], + 'Number of Antennas': ['n_ants', 'num_antennas', _gen_str_sql], + + }, + 'Time': { + 'Observation start': ['start_date', 't_min', _gen_datetime_sql], + 'Observation end': ['end_date', 't_max', _gen_datetime_sql], + 'Integration time (s)': ['integration_time', 't_exptime', + _gen_numeric_sql] + }, + 'Polarization': { + 'Polarisation type (Single, Dual, Full)': ['polarisation_type', + 'pol_states', _gen_pol_sql] + }, + 'Energy': { + 'Frequency (GHz)': ['frequency', 'center_frequencies', _gen_numeric_sql], + 'Bandwidth (Hz)': ['bandwidth', 'aggregate_bandwidth', _gen_numeric_sql], + 'Spectral resolution (KHz)': ['spectral_resolution', + 'em_resolution', _gen_spec_res_sql], + 'Band': ['band_list', 'band_list', _gen_band_list_sql] + }, + +} + +_OBSCORE_TO_NRAORESULT = { + 's_ra': 'RA', + 's_dec': 'Dec', +} + + +def _gen_sql(payload): + sql = 'select * from tap_schema.obscore' + where = '' + unused_payload = payload.copy() + if payload: + for constraint in payload: + for attrib_category in NRAO_FORM_KEYS.values(): + for attrib in attrib_category.values(): + if constraint in attrib: + # use the value and the second entry in attrib which + # is the new name of the column + val = payload[constraint] + if constraint == 'em_resolution': + # em_resolution does not require any transformation + attrib_where = _gen_numeric_sql(constraint, val) + else: + attrib_where = attrib[2](attrib[1], val) + if attrib_where: + if where: + where += ' AND ' + else: + where = ' WHERE ' + where += attrib_where + + # Delete this key to see what's left over afterward + # + # Use pop to avoid the slight possibility of trying to remove + # an already removed key + unused_payload.pop(constraint) + + if unused_payload: + # Left over (unused) constraints passed. Let the user know. + remaining = [f'{p} -> {unused_payload[p]}' for p in unused_payload] + raise TypeError(f'Unsupported arguments were passed:\n{remaining}') + + return sql + where + + +class NraoAuth(BaseVOQuery, BaseQuery): + pass + +class NraoClass(BaseQuery): + TIMEOUT = conf.timeout + archive_url = conf.archive_url + USERNAME = conf.username + + def __init__(self): + # sia service does not need disambiguation but tap does + super().__init__() + self._sia = None + self._tap = None + self._datalink = None + self._sia_url = None + self._tap_url = None + self._datalink_url = None + self._auth = NraoAuth() + + @property + def auth(self): + return self._auth + + @property + def datalink(self): + if not self._datalink: + self._datalink = pyvo.dal.adhoc.DatalinkService(self.datalink_url) + return self._datalink + + @property + def datalink_url(self): + if not self._datalink_url: + try: + self._datalink_url = urljoin(self._get_dataarchive_url(), DATALINK_SERVICE_PATH) + except requests.exceptions.HTTPError as err: + log.debug( + f"ERROR getting the NRAO Archive URL: {str(err)}") + raise err + return self._datalink_url + + @property + def sia(self): + if not self._sia: + self._sia = SIA2Service(baseurl=self.sia_url) + return self._sia + + @property + def sia_url(self): + if not self._sia_url: + try: + self._sia_url = urljoin(self._get_dataarchive_url(), SIA_SERVICE_PATH) + except requests.exceptions.HTTPError as err: + log.debug( + f"ERROR getting the NRAO Archive URL: {str(err)}") + raise err + return self._sia_url + + @property + def tap(self): + if not self._tap: + self._tap = pyvo.dal.tap.TAPService(baseurl=self.tap_url, session=self._session) + return self._tap + + @property + def tap_url(self): + if not self._tap_url: + try: + self._tap_url = urljoin(self._get_dataarchive_url(), TAP_SERVICE_PATH) + except requests.exceptions.HTTPError as err: + log.debug( + f"ERROR getting the NRAO Archive URL: {str(err)}") + raise err + return self._tap_url + + def query_tap(self, query, maxrec=None): + """ + Send query to the NRAO TAP. Results in pyvo.dal.TapResult format. + result.table in Astropy table format + + Parameters + ---------- + maxrec : int + maximum number of records to return + + """ + log.debug('TAP query: {}'.format(query)) + return self.tap.search(query, language='ADQL', maxrec=maxrec) + + def _get_dataarchive_url(self): + return tap_urls[0] + + def query_object_async(self, object_name, *, payload=None, **kwargs): + """ + Query the archive for a source name. + + Parameters + ---------- + object_name : str + The object name. Will be resolved by astropy.coord.SkyCoord + payload : dict + Dictionary of additional keywords. See `help`. + """ + if payload is not None: + payload['source_name_resolver'] = object_name + else: + payload = {'source_name_resolver': object_name} + return self.query_async(payload=payload, **kwargs) + + def query_region_async(self, coordinate, radius, *, + get_query_payload=False, + payload=None, **kwargs): + """ + Query the NRAO archive with a source name and radius + + Parameters + ---------- + coordinates : str / `astropy.coordinates` + the identifier or coordinates around which to query. + radius : str / `~astropy.units.Quantity`, optional + the radius of the region + payload : dict + Dictionary of additional keywords. See `help`. + """ + rad = radius + if not isinstance(radius, u.Quantity): + rad = radius*u.deg + obj_coord = commons.parse_coordinates(coordinate).icrs + ra_dec = '{}, {}'.format(obj_coord.to_string(), rad.to(u.deg).value) + if payload is None: + payload = {} + if 'ra_dec' in payload: + payload['ra_dec'] += ' | {}'.format(ra_dec) + else: + payload['ra_dec'] = ra_dec + + if get_query_payload: + return payload + + return self.query_async(payload=payload, **kwargs) + + def query_async(self, payload, *, get_query_payload=False, + maxrec=None, **kwargs): + """ + Perform a generic query with user-specified payload + + Parameters + ---------- + payload : dictionary + Please consult the `help` method + legacy_columns : bool + True to return the columns from the obsolete NRAO advanced query, + otherwise return the current columns based on ObsCore model. + get_query_payload : bool + Flag to indicate whether to simply return the payload. + maxrec : integer + Cap on the amount of records returned. Default is no limit. + + Returns + ------- + + Table with results. Columns are those in the NRAO ObsCore model + (see ``help_tap``) unless ``legacy_columns`` argument is set to True. + """ + + if payload is None: + payload = {} + for arg in kwargs: + value = kwargs[arg] + if arg in payload: + payload[arg] = '{} {}'.format(payload[arg], value) + else: + payload[arg] = value + print(payload) + query = _gen_sql(payload) + print(query) + if get_query_payload: + # Return the TAP query payload that goes out to the server rather + # than the unprocessed payload dict from the python side + return query + + result = self.query_tap(query, maxrec=maxrec) + + if result is not None: + result = result.to_table() + else: + # Should not happen + raise RuntimeError('BUG: Unexpected result None') + + return result + + + def _get_data(self, solr_id, email=None, workflow='runBasicMsWorkflow', + apply_flags=True + ): + """ + Defining this as a private function for now because it's using an + unverified API + + Parameters + ---------- + workflow : 'runBasicMsWorkflow', "runDownloadWorkflow" + """ + url = f'{self.archive_url}/portal/#/subscanViewer/{solr_id}' + + #self._session.headers['User-Agent'] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36" + + resp = self._request('GET', url, cache=False) + resp.raise_for_status() + + eb_deets = self._request('GET', + f'{self.archive_url}/archive-service/restapi_get_full_exec_block_details', + params={'solr_id': solr_id}, + cache=False + ) + eb_deets.raise_for_status() + assert len(self._session.cookies) > 0 + + resp1b = self._request('GET', + f'{self.archive_url}/archive-service/restapi_spw_details_view', + params={'exec_block_id': solr_id.split(":")[-1]}, + cache=False + ) + resp1b.raise_for_status() + + # returned data is doubly json-encoded + jd = json.loads(eb_deets.json()) + locator = jd['curr_eb']['sci_prod_locator'] + project_code = jd['curr_eb']['project_code'] + + instrument = ('VLBA' if 'vlba' in solr_id.lower() else + 'VLA' if 'vla' in solr_id.lower() else + 'EVLA' if 'nrao' in solr_id.lower() else + 'GBT' if 'gbt' in solr_id.lower() else None) + if instrument is None: + raise ValueError("Invalid instrument") + + if instrument == 'VLBA': + downloadDataFormat = "VLBARaw" + elif instrument in ('VLA', 'EVLA'): + # there are other options! + downloadDataFormat = 'MS' + + post_data = { + "emailNotification": email, + "requestDescription": f"{instrument} Download Request", + "archive": "VLA", + "p_telescope": instrument, + "p_project": project_code, + "productLocator": locator, + "requestCommand": "startVlaPPIWorkflow", + "p_workflowEventName": workflow, + "p_downloadDataFormat": downloadDataFormat, + "p_intentsFileName": "intents_hifv.xml", + "p_proceduresFileName": "procedure_hifv.xml" + } + + if instrument in ('VLA', 'EVLA'): + post_data['p_applyTelescopeFlags'] = apply_flags + casareq = self._request('GET', + f'{self.archive_url}/archive-service/restapi_get_casa_version_list', + cache=False + ) + casareq.raise_for_status() + casavdata = json.loads(casareq.json()) + for casav in casavdata['casa_version_list']: + if 'recommended' in casav['version']: + post_data['p_casaHome'] = casav['path'] + + presp = self._request('POST', + f'{self.archive_url}/rh/submission', + data=post_data, + cache=False + ) + presp.raise_for_status() + + # DEBUG print(f"presp.url: {presp.url}") + # DEBUG print(f"cookies: {self._session.cookies}") + resp2 = self._request('GET', presp.url, cache=False) + resp2.raise_for_status() + + for row in resp2.text.split(): + if 'window.location.href=' in row: + subrespurl = row.split("'")[1] + + # DEBUG print(f"subrespurl: {subrespurl}") + # DEBUG print(f"cookies: {self._session.cookies}") + nextresp = self._request('GET', subrespurl, cache=False) + wait_url = nextresp.url + nextresp.raise_for_status() + + if f'{self.archive_url}/rh/requests/' not in wait_url: + raise ValueError(f"Got wrong URL from post request: {wait_url}") + + # to get the right format of response, you need to specify this: + # accept = {"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7"} + + while True: + time.sleep(1) + print(".", end='', flush=True) + resp = self._request('GET', wait_url + "/state", cache=False) + resp.raise_for_status() + if resp.text == 'COMPLETE': + break + + return wait_url + + + +Nrao = NraoClass() diff --git a/astroquery/nrao/tapsql.py b/astroquery/nrao/tapsql.py new file mode 100644 index 0000000000..5a0d4bfe98 --- /dev/null +++ b/astroquery/nrao/tapsql.py @@ -0,0 +1,265 @@ +""" +Utilities for generating ADQL for ALMA TAP service +""" +from datetime import datetime + +from astropy import units as u +import astropy.coordinates as coord +from astropy.time import Time + +ALMA_DATE_FORMAT = '%d-%m-%Y' + + +def _gen_pos_sql(field, value): + result = '' + if field == 'SkyCoord.from_name': + # resolve the source first + if value: + obj_coord = coord.SkyCoord.from_name(value) + frame = 'icrs' + ras = [str(obj_coord.icrs.ra.to(u.deg).value)] + decs = [str(obj_coord.icrs.dec.to(u.deg).value)] + radius = 10 * u.arcmin + else: + raise ValueError('Object name missing') + else: + if field == 's_ra, s_dec': + frame = 'icrs' + else: + frame = 'galactic' + radius = 10*u.arcmin + if ',' in value: + center_coord, rad = value.split(',') + try: + radius = float(rad.strip())*u.degree + except ValueError: + raise ValueError('Cannot parse radius in ' + value) + else: + center_coord = value.strip() + try: + ra, dec = center_coord.split(' ') + except ValueError: + raise ValueError('Cannot find ra/dec in ' + value) + ras = _val_parse(ra, val_type=str) + decs = _val_parse(dec, val_type=str) + + for ra in ras: + for dec in decs: + if result: + result += ' OR ' + if isinstance(ra, str) and isinstance(dec, str): + # circle + center = coord.SkyCoord(ra, dec, + unit=(u.deg, u.deg), + frame=frame) + + result += \ + "CONTAINS(POINT('ICRS',s_ra,s_dec),CIRCLE('ICRS',{},{},{}))=1".\ + format(center.icrs.ra.to(u.deg).value, + center.icrs.dec.to(u.deg).value, + radius.to(u.deg).value) + else: + raise ValueError('Cannot interpret ra({}), dec({}'. + format(ra, dec)) + if ' OR ' in result: + # use brackets for multiple ORs + return '(' + result + ')' + else: + return result + + +def _gen_numeric_sql(field, value): + result = '' + for interval in _val_parse(value, float): + if result: + result += ' OR ' + if isinstance(interval, tuple): + int_min, int_max = interval + if int_min is None: + if int_max is None: + # no constraints on bandwith + pass + else: + result += '{}<={}'.format(field, int_max) + elif int_max is None: + result += '{}>={}'.format(field, int_min) + else: + result += '({1}<={0} AND {0}<={2})'.format(field, int_min, + int_max) + else: + result += '{}={}'.format(field, interval) + if ' OR ' in result: + # use brakets for multiple ORs + return '(' + result + ')' + else: + return result + + +def _gen_str_sql(field, value): + result = '' + for interval in _val_parse(value, str): + if result: + result += ' OR ' + if '*' in interval: + # use LIKE + # escape wildcards if they exists in the value + interval = interval.replace('%', r'\%') # noqa + interval = interval.replace('_', r'\_') # noqa + # ADQL wild cards are % and _ + interval = interval.replace('*', '%') + interval = interval.replace('?', '_') + result += "{} LIKE '{}'".format(field, interval) + else: + result += "{}='{}'".format(field, interval) + if ' OR ' in result: + # use brackets for multiple ORs + return '(' + result + ')' + else: + return result + + +def _gen_datetime_sql(field, value): + result = '' + for interval in _val_parse(value, str): + if result: + result += ' OR ' + if isinstance(interval, tuple): + min_datetime, max_datetime = interval + if max_datetime is None: + result += "{}>={}".format( + field, Time(datetime.strptime(min_datetime, ALMA_DATE_FORMAT)).mjd) + elif min_datetime is None: + result += "{}<={}".format( + field, Time(datetime.strptime(max_datetime, ALMA_DATE_FORMAT)).mjd) + else: + result += "({1}<={0} AND {0}<={2})".format( + field, Time(datetime.strptime(min_datetime, ALMA_DATE_FORMAT)).mjd, + Time(datetime.strptime(max_datetime, ALMA_DATE_FORMAT)).mjd) + else: + # TODO is it just a value (midnight) or the entire day? + result += "{}={}".format( + field, Time(datetime.strptime(interval, ALMA_DATE_FORMAT)).mjd) + if ' OR ' in result: + # use brackets for multiple ORs + return '(' + result + ')' + else: + return result + + +def _gen_spec_res_sql(field, value): + # This needs special treatment because spectral_resolution in AQ is in + # KHz while corresponding em_resolution is in m + result = '' + for interval in _val_parse(value): + if result: + result += ' OR ' + if isinstance(interval, tuple): + min_val, max_val = interval + if max_val is None: + result += "{}<={}".format( + field, + min_val*u.kHz.to(u.m, equivalencies=u.spectral())) + elif min_val is None: + result += "{}>={}".format( + field, + max_val*u.kHz.to(u.m, equivalencies=u.spectral())) + else: + result += "({1}<={0} AND {0}<={2})".format( + field, + max_val*u.kHz.to(u.m, equivalencies=u.spectral()), + min_val*u.kHz.to(u.m, equivalencies=u.spectral())) + else: + result += "{}={}".format( + field, interval*u.kHz.to(u.m, equivalencies=u.spectral())) + if ' OR ' in result: + # use brackets for multiple ORs + return '(' + result + ')' + else: + return result + + +def _gen_pub_sql(field, value): + if value is True: + return "{}='Public'".format(field) + elif value is False: + return "{}='Proprietary'".format(field) + else: + return None + + +def _gen_science_sql(field, value): + if value is True: + return "{}='T'".format(field) + elif value is False: + return "{}='F'".format(field) + else: + return None + + +def _gen_band_list_sql(field, value): + # band list value is expected to be space separated list of bands + if isinstance(value, list): + val = value + else: + val = value.split(' ') + return _gen_str_sql(field, '|'.join( + ['*{}*'.format(_) for _ in val])) + + +def _gen_pol_sql(field, value): + # band list value is expected to be space separated list of bands + val = '' + states_map = {'Stokes I': '*I*', + 'Single': '/LL/', + 'Dual': '/LL/RR/', + 'Full': '/LL/LR/RL/RR/'} + for state in states_map: + if state in value: + if val: + val += '|' + val += states_map[state] + return _gen_str_sql(field, val) + + +def _val_parse(value, val_type=float): + # parses an ALMA query field and returns a list of values (of type + # val_type) or tuples representing parsed values or intervals. Open + # intervals have None at one of the ends + def _one_val_parse(value, val_type=float): + # parses the value and returns corresponding interval for + # sia to work with. E.g <2 => (None, 2) + if value.startswith('<'): + return (None, val_type(value[1:])) + elif value.startswith('>'): + return (val_type(value[1:]), None) + else: + return val_type(value) + result = [] + if isinstance(value, str): + try: + if value.startswith('!'): + start, end = _val_parse(value[2:-1].strip(), val_type=val_type)[0] + result.append((None, start)) + result.append((end, None)) + elif value.startswith('('): + result += _val_parse(value[1:-1], val_type=val_type) + elif '|' in value: + for vv in value.split('|'): + result += _val_parse(vv.strip(), val_type=val_type) + elif '..' in value: + start, end = value.split('..') + if not start or not end: + raise ValueError('start or end interval missing in {}'. + format(value)) + result.append((_one_val_parse(start.strip(), val_type=val_type), + _one_val_parse(end.strip(), val_type=val_type))) + else: + result.append(_one_val_parse(value, val_type=val_type)) + except Exception as e: + raise ValueError( + 'Error parsing {}. Details: {}'.format(value, str(e))) + elif isinstance(value, list): + result = value + else: + result.append(value) + return result From c722b3000c9ab1d170a92c6112b9879d7620f3d1 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 2 Jun 2024 08:18:21 -0400 Subject: [PATCH 2/6] cleanup --- astroquery/nrao/core.py | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/astroquery/nrao/core.py b/astroquery/nrao/core.py index 77dcf5c987..b1729e9fb9 100644 --- a/astroquery/nrao/core.py +++ b/astroquery/nrao/core.py @@ -31,7 +31,6 @@ from ..exceptions import LoginError from ..utils import commons -from ..utils.process_asyncs import async_to_sync from ..query import BaseQuery, QueryWithLogin, BaseVOQuery from . import conf, auth_urls, tap_urls from astroquery.exceptions import CorruptDataWarning @@ -144,8 +143,12 @@ def _gen_sql(payload): return sql + where -class NraoAuth(BaseVOQuery, BaseQuery): - pass +# class NraoAuth(BaseVOQuery, BaseQuery): +# """ +# TODO: this needs to be implemented +# """ +# pass + class NraoClass(BaseQuery): TIMEOUT = conf.timeout @@ -161,7 +164,7 @@ def __init__(self): self._sia_url = None self._tap_url = None self._datalink_url = None - self._auth = NraoAuth() + # TODO self._auth = NraoAuth() @property def auth(self): @@ -235,7 +238,7 @@ def query_tap(self, query, maxrec=None): def _get_dataarchive_url(self): return tap_urls[0] - def query_object_async(self, object_name, *, payload=None, **kwargs): + def query_object(self, object_name, *, payload=None, **kwargs): """ Query the archive for a source name. @@ -250,9 +253,9 @@ def query_object_async(self, object_name, *, payload=None, **kwargs): payload['source_name_resolver'] = object_name else: payload = {'source_name_resolver': object_name} - return self.query_async(payload=payload, **kwargs) + return self.query(payload=payload, **kwargs) - def query_region_async(self, coordinate, radius, *, + def query_region(self, coordinate, radius, *, get_query_payload=False, payload=None, **kwargs): """ @@ -282,9 +285,9 @@ def query_region_async(self, coordinate, radius, *, if get_query_payload: return payload - return self.query_async(payload=payload, **kwargs) + return self.query(payload=payload, **kwargs) - def query_async(self, payload, *, get_query_payload=False, + def query(self, payload, *, get_query_payload=False, maxrec=None, **kwargs): """ Perform a generic query with user-specified payload @@ -300,12 +303,12 @@ def query_async(self, payload, *, get_query_payload=False, Flag to indicate whether to simply return the payload. maxrec : integer Cap on the amount of records returned. Default is no limit. + [ we don't know for sure that this is implemented for NRAO ] Returns ------- - Table with results. Columns are those in the NRAO ObsCore model - (see ``help_tap``) unless ``legacy_columns`` argument is set to True. + Table with results. """ if payload is None: @@ -316,9 +319,7 @@ def query_async(self, payload, *, get_query_payload=False, payload[arg] = '{} {}'.format(payload[arg], value) else: payload[arg] = value - print(payload) query = _gen_sql(payload) - print(query) if get_query_payload: # Return the TAP query payload that goes out to the server rather # than the unprocessed payload dict from the python side @@ -339,8 +340,10 @@ def _get_data(self, solr_id, email=None, workflow='runBasicMsWorkflow', apply_flags=True ): """ - Defining this as a private function for now because it's using an - unverified API + This private function can, under a very limited set of circumstances, + be used to retrieve the data download page from the NRAO data handler. + Because the data handler is run through a fairly complex, multi-step, + private API, we are not yet ready to make this service public. Parameters ---------- @@ -419,8 +422,6 @@ def _get_data(self, solr_id, email=None, workflow='runBasicMsWorkflow', ) presp.raise_for_status() - # DEBUG print(f"presp.url: {presp.url}") - # DEBUG print(f"cookies: {self._session.cookies}") resp2 = self._request('GET', presp.url, cache=False) resp2.raise_for_status() @@ -428,8 +429,6 @@ def _get_data(self, solr_id, email=None, workflow='runBasicMsWorkflow', if 'window.location.href=' in row: subrespurl = row.split("'")[1] - # DEBUG print(f"subrespurl: {subrespurl}") - # DEBUG print(f"cookies: {self._session.cookies}") nextresp = self._request('GET', subrespurl, cache=False) wait_url = nextresp.url nextresp.raise_for_status() From 7fb42fb0e0b6195a750daf44f0d51a3012e4cc81 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 2 Jun 2024 08:27:15 -0400 Subject: [PATCH 3/6] start on nrao tests --- astroquery/nrao/tests/__init__.py | 0 astroquery/nrao/tests/data/nrao-empty.txt | 1 + 2 files changed, 1 insertion(+) create mode 100644 astroquery/nrao/tests/__init__.py create mode 100644 astroquery/nrao/tests/data/nrao-empty.txt diff --git a/astroquery/nrao/tests/__init__.py b/astroquery/nrao/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/astroquery/nrao/tests/data/nrao-empty.txt b/astroquery/nrao/tests/data/nrao-empty.txt new file mode 100644 index 0000000000..eaa6ed04c1 --- /dev/null +++ b/astroquery/nrao/tests/data/nrao-empty.txt @@ -0,0 +1 @@ +dataproduct_type calib_level obs_collection obs_id s_ra s_dec s_fov obs_publisher_did access_url access_format target_name s_region s_resolution t_min t_max t_exptime t_resolution freq_min freq_max em_min em_max em_res_power em_xel o_ucd facility_name instrument_name pol_states configuration access_estsize num_antennas max_uv_dist spw_names center_frequencies bandwidths nums_channels spectral_resolutions aggregate_bandwidth From 07a61b3b38ebf03db2dc3f4152ee830485b2456c Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 2 Jun 2024 08:27:34 -0400 Subject: [PATCH 4/6] copied alma tests, but they are not all valid yet --- astroquery/nrao/tests/test_nrao.py | 546 ++++++++++++++++++++ astroquery/nrao/tests/test_nrao_remote.py | 593 ++++++++++++++++++++++ 2 files changed, 1139 insertions(+) create mode 100644 astroquery/nrao/tests/test_nrao.py create mode 100644 astroquery/nrao/tests/test_nrao_remote.py diff --git a/astroquery/nrao/tests/test_nrao.py b/astroquery/nrao/tests/test_nrao.py new file mode 100644 index 0000000000..4d9f8e08b6 --- /dev/null +++ b/astroquery/nrao/tests/test_nrao.py @@ -0,0 +1,546 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +from io import StringIO +import os + +import pytest +from unittest.mock import patch, Mock + +from astropy import units as u +from astropy import coordinates as coord +from astropy.table import Table +from astropy.coordinates import SkyCoord +from astropy.time import Time + +from astroquery.nrao import Nrao +from astroquery.nrao.core import _gen_sql, _OBSCORE_TO_nraoRESULT +from astroquery.nrao.tapsql import _val_parse + + +DATA_DIR = os.path.join(os.path.dirname(__file__), 'data') + + +def data_path(filename): + return os.path.join(DATA_DIR, filename) + + +def assert_called_with(mock, band=None, calib_level=None, collection=None, + data_rights=None, data_type=None, exptime=None, + facility=None, + field_of_view=None, instrument=None, maxrec=None, + pol=None, pos=None, publisher_did=None, res_format=None, + spatial_resolution=None, spectral_resolving_power=None, + target_name=None, time=None, timeres=None): + mock.assert_called_once_with( + band=band, calib_level=calib_level, + collection=collection, data_rights=data_rights, data_type=data_type, + exptime=exptime, facility=facility, + field_of_view=field_of_view, instrument=instrument, + maxrec=maxrec, pol=pol, pos=pos, publisher_did=publisher_did, + res_format=res_format, spatial_resolution=spatial_resolution, + spectral_resolving_power=spectral_resolving_power, + target_name=target_name, time=time, timeres=timeres) + +def test_gen_pos_sql(): + # test circle + # radius defaults to 1.0arcmin + common_select = 'select * from ivoa.obscore WHERE ' + assert _gen_sql({'ra_dec': '1 2'}) == common_select + "(INTERSECTS(" \ + "CIRCLE('ICRS',1.0,2.0,0.16666666666666666), s_region) = 1)" + assert _gen_sql({'ra_dec': '1 2, 3'}) == common_select + \ + "(INTERSECTS(CIRCLE('ICRS',1.0,2.0,3.0), s_region) = 1)" + assert _gen_sql({'ra_dec': '12:13:14.0 -00:01:02.1, 3'}) == \ + common_select + \ + "(INTERSECTS(CIRCLE('ICRS',12.220555555555556,-0.01725,3.0), " \ + "s_region) = 1)" + # multiple circles + assert _gen_sql({'ra_dec': '1 20|40, 3'}) == common_select + \ + "((INTERSECTS(CIRCLE('ICRS',1.0,20.0,3.0), s_region) = 1) OR " \ + "(INTERSECTS(CIRCLE('ICRS',1.0,40.0,3.0), s_region) = 1))" + assert _gen_sql({'ra_dec': '1|10 20|40, 1'}) == common_select + \ + "((INTERSECTS(CIRCLE('ICRS',1.0,20.0,1.0), s_region) = 1) OR " \ + "(INTERSECTS(CIRCLE('ICRS',1.0,40.0,1.0), s_region) = 1) OR " \ + "(INTERSECTS(CIRCLE('ICRS',10.0,20.0,1.0), s_region) = 1) OR " \ + "(INTERSECTS(CIRCLE('ICRS',10.0,40.0,1.0), s_region) = 1))" + + # test range + assert _gen_sql({'ra_dec': '0.0..20.0 >20'}) == common_select + \ + "(INTERSECTS(RANGE_S2D(0.0,20.0,20.0,90.0), s_region) = 1)" + assert _gen_sql({'ra_dec': '12:13:14..12:13:20 <4:20:20'}) == \ + common_select +\ + "(INTERSECTS(RANGE_S2D(12.220555555555556,12.222222222222223," \ + "-90.0,4.338888888888889), s_region) = 1)" + assert _gen_sql({'ra_dec': '!(10..20) >60'}) == common_select + \ + "((INTERSECTS(RANGE_S2D(0.0,10.0,60.0,90.0), s_region) = 1) OR " \ + "(INTERSECTS(RANGE_S2D(20.0,0.0,60.0,90.0), s_region) = 1))" + assert _gen_sql({'ra_dec': '0..20|40..60 <-50|>50'}) == common_select + \ + "((INTERSECTS(RANGE_S2D(0.0,20.0,-90.0,-50.0), s_region) = 1) OR " \ + "(INTERSECTS(RANGE_S2D(0.0,20.0,50.0,90.0), s_region) = 1) OR " \ + "(INTERSECTS(RANGE_S2D(40.0,60.0,-90.0,-50.0), s_region) = 1) OR " \ + "(INTERSECTS(RANGE_S2D(40.0,60.0,50.0,90.0), s_region) = 1))" + + # galactic frame + center = coord.SkyCoord(1, 2, unit=u.deg, frame='galactic') + assert _gen_sql({'galactic': '1 2, 3'}) == common_select + "(INTERSECTS(" \ + "CIRCLE('ICRS',{},{},3.0), s_region) = 1)".format( + center.icrs.ra.to(u.deg).value, center.icrs.dec.to(u.deg).value) + min_point = coord.SkyCoord('12:13:14.0', '-00:01:02.1', unit=u.deg, + frame='galactic') + max_point = coord.SkyCoord('12:14:14.0', '-00:00:02.1', unit=(u.deg, u.deg), + frame='galactic') + assert _gen_sql( + {'galactic': '12:13:14.0..12:14:14.0 -00:01:02.1..-00:00:02.1'}) == \ + common_select +\ + "(INTERSECTS(RANGE_S2D({},{},{},{}), s_region) = 1)".format( + min_point.icrs.ra.to(u.deg).value, + max_point.icrs.ra.to(u.deg).value, + min_point.icrs.dec.to(u.deg).value, + max_point.icrs.dec.to(u.deg).value) + + # combination of frames + center = coord.SkyCoord(1, 2, unit=u.deg, frame='galactic') + assert _gen_sql({'ra_dec': '1 2, 3', 'galactic': '1 2, 3'}) == \ + "select * from ivoa.obscore WHERE " \ + "(INTERSECTS(CIRCLE('ICRS',1.0,2.0,3.0), s_region) = 1) AND " \ + "(INTERSECTS(CIRCLE('ICRS',{},{},3.0), s_region) = 1)".format( + center.icrs.ra.to(u.deg).value, center.icrs.dec.to(u.deg).value) + + +def test_gen_numeric_sql(): + common_select = 'select * from ivoa.obscore WHERE ' + assert _gen_sql({'bandwidth': '23'}) == common_select + 'bandwidth=23.0' + assert _gen_sql({'bandwidth': '22 .. 23'}) == common_select +\ + '(22.0<=bandwidth AND bandwidth<=23.0)' + assert _gen_sql( + {'bandwidth': '<100'}) == common_select + 'bandwidth<=100.0' + assert _gen_sql( + {'bandwidth': '>100'}) == common_select + 'bandwidth>=100.0' + assert _gen_sql({'bandwidth': '!(20 .. 30)'}) == common_select + \ + '(bandwidth<=20.0 OR bandwidth>=30.0)' + assert _gen_sql({'bandwidth': '<10 | >20'}) == common_select + \ + '(bandwidth<=10.0 OR bandwidth>=20.0)' + assert _gen_sql({'bandwidth': 100, 'frequency': '>3'}) == common_select +\ + "bandwidth=100 AND frequency>=3.0" + + +def test_gen_str_sql(): + common_select = 'select * from ivoa.obscore WHERE ' + assert _gen_sql({'pub_title': '*Cosmic*'}) == common_select + \ + "pub_title LIKE '%Cosmic%'" + assert _gen_sql({'pub_title': 'Galaxy'}) == common_select + \ + "pub_title='Galaxy'" + assert _gen_sql({'pub_abstract': '*50% of the mass*'}) == common_select + \ + r"pub_abstract LIKE '%50\% of the mass%'" + assert _gen_sql({'project_code': '2012.* | 2013.?3*'}) == common_select + \ + "(proposal_id LIKE '2012.%' OR proposal_id LIKE '2013._3%')" + # test with brackets like the form example + assert _gen_sql({'project_code': '(2012.* | 2013.?3*)'}) == common_select + \ + "(proposal_id LIKE '2012.%' OR proposal_id LIKE '2013._3%')" + + +def test_gen_array_sql(): + # test string array input (regression in #2094) + # string arrays should be OR'd together + common_select = "select * from ivoa.obscore WHERE " + test_keywords = ["High-mass star formation", "Disks around high-mass stars"] + assert (_gen_sql({"spatial_resolution": "<0.1", "science_keyword": test_keywords}) + == common_select + ("spatial_resolution<=0.1 AND (science_keyword='High-mass star formation' " + "OR science_keyword='Disks around high-mass stars')")) + + +def test_gen_datetime_sql(): + common_select = 'select * from ivoa.obscore WHERE ' + assert _gen_sql({'start_date': '01-01-2020'}) == common_select + \ + "t_min=58849.0" + assert _gen_sql({'start_date': '>01-01-2020'}) == common_select + \ + "t_min>=58849.0" + assert _gen_sql({'start_date': '<01-01-2020'}) == common_select + \ + "t_min<=58849.0" + assert _gen_sql({'start_date': '(01-01-2020 .. 01-02-2020)'}) == \ + common_select + "(58849.0<=t_min AND t_min<=58880.0)" + + +def test_gen_spec_res_sql(): + common_select = 'select * from ivoa.obscore WHERE ' + assert _gen_sql({'spectral_resolution': 70}) == common_select + "em_resolution=20985472.06" + assert _gen_sql({'spectral_resolution': '<70'}) == common_select + "em_resolution>=20985472.06" + assert _gen_sql({'spectral_resolution': '>70'}) == common_select + "em_resolution<=20985472.06" + assert _gen_sql({'spectral_resolution': '(70 .. 80)'}) == common_select + \ + "(23983396.64<=em_resolution AND em_resolution<=20985472.06)" + assert _gen_sql({'spectral_resolution': '(70|80)'}) == common_select + \ + "(em_resolution=20985472.06 OR em_resolution=23983396.64)" + + +def test_gen_public_sql(): + common_select = 'select * from ivoa.obscore' + assert _gen_sql({'public_data': None}) == common_select + assert _gen_sql({'public_data': True}) == common_select +\ + " WHERE data_rights='Public'" + assert _gen_sql({'public_data': False}) == common_select + \ + " WHERE data_rights='Proprietary'" + + +def test_gen_science_sql(): + common_select = 'select * from ivoa.obscore' + assert _gen_sql({'science_observation': None}) == common_select + assert _gen_sql({'science_observation': True}) == common_select +\ + " WHERE science_observation='T'" + assert _gen_sql({'science_observation': False}) == common_select +\ + " WHERE science_observation='F'" + + +def test_pol_sql(): + common_select = 'select * from ivoa.obscore' + assert _gen_sql({'polarisation_type': 'Stokes I'}) == common_select +\ + " WHERE pol_states LIKE '%I%'" + assert _gen_sql({'polarisation_type': 'Single'}) == common_select + \ + " WHERE pol_states='/XX/'" + assert _gen_sql({'polarisation_type': 'Dual'}) == common_select + \ + " WHERE pol_states='/XX/YY/'" + assert _gen_sql({'polarisation_type': 'Full'}) == common_select + \ + " WHERE pol_states='/XX/XY/YX/YY/'" + assert _gen_sql({'polarisation_type': ['Single', 'Dual']}) == \ + common_select + " WHERE (pol_states='/XX/' OR pol_states='/XX/YY/')" + assert _gen_sql({'polarisation_type': 'Single, Dual'}) == \ + common_select + " WHERE (pol_states='/XX/' OR pol_states='/XX/YY/')" + + +def test_unused_args(): + nrao = Nrao() + nrao._get_dataarchive_url = Mock() + # with patch('astroquery.nrao.tapsql.coord.SkyCoord.from_name') as name_mock, pytest.raises(TypeError) as typeError: + with patch('astroquery.nrao.tapsql.coord.SkyCoord.from_name') as name_mock: + with pytest.raises(TypeError) as typeError: + name_mock.return_value = SkyCoord(1, 2, unit='deg') + nrao.query_object('M13', public=False, bogus=True, nope=False, band_list=[3]) + + assert "['bogus -> True', 'nope -> False']" in str(typeError.value) + + +def test_query(): + # Tests the query and return values + tap_mock = Mock() + empty_result = Table.read(os.path.join(DATA_DIR, 'nrao-empty.txt'), + format='ascii') + mock_result = Mock() + mock_result.to_table.return_value = empty_result + tap_mock.search.return_value = mock_result + nrao = Nrao() + nrao._get_dataarchive_url = Mock() + nrao._tap = tap_mock + result = nrao.query_region(SkyCoord(1*u.deg, 2*u.deg, frame='icrs'), + radius=1*u.deg) + assert len(result) == 0 + assert 'proposal_id' in result.columns + tap_mock.search.assert_called_once_with( + "select * from tap_schema.obscore WHERE CONTAINS(POINT('ICRS',s_ra,s_dec),CIRCLE('ICRS',1.0,2.0,1.0))=1", + language='ADQL', maxrec=None) + + # one row result + tap_mock = Mock() + onerow_result = Table.read(os.path.join(DATA_DIR, 'nrao-onerow.txt'), + format='ascii') + mock_result = Mock() + mock_result.to_table.return_value = onerow_result + tap_mock.search.return_value = mock_result + nrao = Nrao() + nrao._tap = tap_mock + with patch('astroquery.nrao.tapsql.coord.SkyCoord.from_name') as name_mock: + name_mock.return_value = SkyCoord(1, 2, unit='deg') + result = nrao.query_object('M83', public=False, + band_list=[3]) + assert len(result) == 1 + + tap_mock.search.assert_called_once_with( + "select * from ivoa.obscore WHERE " + "(INTERSECTS(CIRCLE('ICRS',1.0,2.0,0.16666666666666666), s_region) = 1) " + "AND band_list LIKE '%3%' AND science_observation='T' AND " + "data_rights='Proprietary'", + language='ADQL', maxrec=None) + + # repeat for legacy columns + mock_result = Mock() + tap_mock = Mock() + mock_result.to_table.return_value = onerow_result + tap_mock.search.return_value = mock_result + nrao = Nrao() + nrao._tap = tap_mock + with patch('astroquery.nrao.tapsql.coord.SkyCoord.from_name') as name_mock: + name_mock.return_value = SkyCoord(1, 2, unit='deg') + result_legacy = nrao.query_object('M83', public=False, + legacy_columns=True, + band_list=[3]) + assert len(result) == 1 + + assert 'Project code' in result_legacy.columns + tap_mock.search.assert_called_once_with( + "select * from ivoa.obscore WHERE " + "(INTERSECTS(CIRCLE('ICRS',1.0,2.0,0.16666666666666666), s_region) = 1) " + "AND band_list LIKE '%3%' AND science_observation='T' AND " + "data_rights='Proprietary'", + language='ADQL', maxrec=None) + row_legacy = result_legacy[0] + row = result[0] + for item in _OBSCORE_TO_nraoRESULT.items(): + if item[0] == 't_min': + assert Time(row[item[0]], format='mjd').strftime('%d-%m-%Y') ==\ + row_legacy[item[1]] + else: + assert row[item[0]] == row_legacy[item[1]] + + # query with different arguments + tap_mock = Mock() + empty_result = Table.read(os.path.join(DATA_DIR, 'nrao-empty.txt'), + format='ascii') + mock_result = Mock() + mock_result.to_table.return_value = empty_result + tap_mock.search.return_value = mock_result + nrao = Nrao() + nrao._get_dataarchive_url = Mock() + nrao._tap = tap_mock + result = nrao.query_region('1 2', radius=1*u.deg, + payload={'frequency': '22'}, public=None, + band_list='1 3', science=False, + start_date='01-01-2010', + polarisation_type='Dual', + fov=0.0123130, + integration_time=25) + assert len(result) == 0 + tap_mock.search.assert_called_with( + "select * from ivoa.obscore WHERE frequency=22.0 AND " + "(INTERSECTS(CIRCLE('ICRS',1.0,2.0,1.0), s_region) = 1) AND " + "(band_list LIKE '%1%' OR band_list LIKE '%3%') AND " + "t_min=55197.0 AND pol_states='/XX/YY/' AND s_fov=0.012313 AND " + "t_exptime=25 AND science_observation='F'", + language='ADQL', maxrec=None + ) + + +def test_sia(): + sia_mock = Mock() + empty_result = Table.read(os.path.join(DATA_DIR, 'nrao-empty.txt'), + format='ascii') + sia_mock.search.return_value = Mock(table=empty_result) + nrao = Nrao() + nrao._get_dataarchive_url = Mock() + nrao._sia = sia_mock + result = nrao.query_sia(pos='CIRCLE 1 2 1', calib_level=[0, 1], + data_rights='Public', + band=(300, 400), + time=545454, maxrec=10, pol=['XX', 'YY'], + instrument='JAO', collection='nrao', + field_of_view=0.0123130, data_type='cube', + target_name='J0423-013', + publisher_did='ADS/JAO.nrao#2013.1.00546.S', + exptime=25) + assert len(result.table) == 0 + assert_called_with(sia_mock.search, calib_level=[0, 1], + band=(300, 400), data_type='cube', + pos='CIRCLE 1 2 1', + time=545454, maxrec=10, pol=['XX', 'YY'], + instrument='JAO', collection='nrao', + data_rights='Public', + field_of_view=0.0123130, + target_name='J0423-013', + publisher_did='ADS/JAO.nrao#2013.1.00546.S', exptime=25) + + +def test_tap(): + tap_mock = Mock() + empty_result = Table.read(os.path.join(DATA_DIR, 'nrao-empty.txt'), + format='ascii') + tap_mock.search.return_value = Mock(table=empty_result) + nrao = Nrao() + nrao._get_dataarchive_url = Mock() + nrao._tap = tap_mock + result = nrao.query_tap('select * from ivoa.ObsCore') + assert len(result.table) == 0 + + tap_mock.search.assert_called_once_with('select * from ivoa.ObsCore', + language='ADQL', maxrec=None) + + +@pytest.mark.parametrize('data_archive_url', + [ + ('https://nraoscience.nrao.edu'), + ('https://nraoscience.eso.org'), + ('https://nraoscience.nao.ac.jp') + ]) +def test_tap_url(data_archive_url): + _test_tap_url(data_archive_url) + + +def _test_tap_url(data_archive_url): + nrao = Nrao() + nrao._get_dataarchive_url = Mock(return_value=data_archive_url) + nrao._get_dataarchive_url.reset_mock() + assert nrao.tap_url == f"{data_archive_url}/tap" + + +@pytest.mark.parametrize('data_archive_url', + [ + ('https://nraoscience.nrao.edu'), + ('https://nraoscience.eso.org'), + ('https://nraoscience.nao.ac.jp') + ]) +def test_sia_url(data_archive_url): + _test_sia_url(data_archive_url) + + +def _test_sia_url(data_archive_url): + nrao = Nrao() + nrao._get_dataarchive_url = Mock(return_value=data_archive_url) + nrao._get_dataarchive_url.reset_mock() + assert nrao.sia_url == f"{data_archive_url}/sia2" + + +@pytest.mark.parametrize('data_archive_url', + [ + ('https://nraoscience.nrao.edu'), + ('https://nraoscience.eso.org'), + ('https://nraoscience.nao.ac.jp') + ]) +def test_datalink_url(data_archive_url): + _test_datalink_url(data_archive_url) + + +def _test_datalink_url(data_archive_url): + nrao = Nrao() + nrao._get_dataarchive_url = Mock(return_value=data_archive_url) + nrao._get_dataarchive_url.reset_mock() + assert nrao.datalink_url == f"{data_archive_url}/datalink/sync" + + +def test_get_data_info(): + class MockDataLinkService: + def run_sync(self, uid): + return _mocked_datalink_sync(uid) + + nrao = Nrao() + nrao._get_dataarchive_url = Mock() + nrao._datalink = MockDataLinkService() + result = nrao.get_data_info(uids='uid://A001/X12a3/Xe9') + assert len(result) == 9 + + +# This method will be used by the mock in test_get_data_info_expand_tarfiles to replace requests.get +def _mocked_datalink_sync(*args, **kwargs): + class MockResponse: + adhoc_service_1_param1 = type('', (object, ), {'ID': 'standardID', + 'value': 'ivo://ivoa.net/std/DataLink#links-1.0'})() + adhoc_service_1_param2 = type( + '', (object, ), { + 'ID': 'accessURL', + 'value': 'https://nraoscience.org/datalink/sync?ID=2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar'} + )() + adhoc_service_1 = type( + '', (object, ), { + 'ID': 'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar', 'params': [ + adhoc_service_1_param1, adhoc_service_1_param2]})() + + adhoc_service_2_param1 = type('', (object, ), {'ID': 'standardID', + 'value': 'ivo://ivoa.net/std/DataLink#links-1.0'})() + adhoc_service_2_param2 = type( + '', (object, ), { + 'ID': 'accessURL', + 'value': 'https://nraoscience.org/datalink/sync?ID=2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar'} + )() + adhoc_service_2 = type( + '', (object, ), { + 'ID': 'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar', 'params': [ + adhoc_service_1_param1, adhoc_service_1_param2]})() + + adhoc_services = { + 'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar': adhoc_service_1, + 'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar': adhoc_service_2 + } + + def __init__(self, table): + self.table = table + + def to_table(self): + return self.table + + @property + def status(self): + return ['OK'] + + def iter_adhocservices(self): + return [self.adhoc_service_1, self.adhoc_service_2] + + def get_adhocservice_by_id(self, adhoc_service_id): + return self.adhoc_services[adhoc_service_id] + + print(f"\n\nFOUND ARGS {args}\n\n") + + if args[0] == 'uid://A001/X12a3/Xe9': + return MockResponse(Table.read(data_path('nrao-datalink.xml'), format='votable')) + elif args[0] == '2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar': + return MockResponse(Table.read(data_path('nrao-datalink-recurse-this.xml'), format='votable')) + elif args[0] == '2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar': + return MockResponse(Table.read(data_path('nrao-datalink-recurse-aux.xml'), format='votable')) + + pytest.fail('Should not get here.') + + +# @patch('pyvo.dal.adhoc.DatalinkService', side_effect=_mocked_datalink_sync) +def test_get_data_info_expand_tarfiles(): + class MockDataLinkService: + def run_sync(self, uid): + return _mocked_datalink_sync(uid) + + nrao = Nrao() + nrao._datalink = MockDataLinkService() + result = nrao.get_data_info(uids='uid://A001/X12a3/Xe9', expand_tarfiles=True) + + # Entire expanded structure is 19 links long. + assert len(result) == 19 + + +def test_galactic_query(): + """ + regression test for 1867 + """ + tap_mock = Mock() + empty_result = Table.read(os.path.join(DATA_DIR, 'nrao-empty.txt'), + format='ascii') + mock_result = Mock() + mock_result.to_table.return_value = empty_result + tap_mock.search.return_value = mock_result + nrao = Nrao() + nrao._get_dataarchive_url = Mock() + nrao._tap = tap_mock + result = nrao.query_region(SkyCoord(0*u.deg, 0*u.deg, frame='galactic'), + radius=1*u.deg, get_query_payload=True) + + assert "'ICRS',266.405,-28.9362,1.0" in result + + +def test_download_files(): + def _requests_mock(method, url, **kwargs): + response = Mock() + response.headers = { + 'Content-Disposition': 'attachment; ' + 'filename={}'.format(url.split('/')[-1])} + return response + + def _download_file_mock(url, file_name, **kwargs): + return file_name + nrao = Nrao() + nrao._request = Mock(side_effect=_requests_mock) + nrao._download_file = Mock(side_effect=_download_file_mock) + downloaded_files = nrao.download_files(['https://location/file1']) + assert len(downloaded_files) == 1 + assert downloaded_files[0].endswith('file1') + + nrao._request.reset_mock() + nrao._download_file.reset_mock() + downloaded_files = nrao.download_files(['https://location/file1', + 'https://location/file2']) + assert len(downloaded_files) == 2 + + # error cases + nrao._request = Mock() + # no Content-Disposition results in no downloaded file + nrao._request.return_value = Mock(headers={}) + result = nrao.download_files(['https://location/file1']) + assert not result diff --git a/astroquery/nrao/tests/test_nrao_remote.py b/astroquery/nrao/tests/test_nrao_remote.py new file mode 100644 index 0000000000..3db8969899 --- /dev/null +++ b/astroquery/nrao/tests/test_nrao_remote.py @@ -0,0 +1,593 @@ +# Licensed under a 3-clause BSD style license - see LICENSE.rst +from datetime import datetime, timezone +import os +from pathlib import Path +from urllib.parse import urlparse +import re +from unittest.mock import Mock, MagicMock, patch + +from astropy import coordinates +from astropy import units as u +import numpy as np +import pytest + +from pyvo.dal.exceptions import DALOverflowWarning + +from astroquery.exceptions import CorruptDataWarning +from .. import Alma + +# ALMA tests involving staging take too long, leading to travis timeouts +# TODO: make this a configuration item +SKIP_SLOW = True + +all_colnames = {'Project code', 'Source name', 'RA', 'Dec', 'Band', + 'Frequency resolution', 'Integration', 'Release date', + 'Frequency support', 'Velocity resolution', 'Pol products', + 'Observation date', 'PI name', 'PWV', 'Member ous id', + 'Asdm uid', 'Project title', 'Project type', 'Scan intent', + 'Spatial resolution', 'Largest angular scale', + 'QA2 Status', 'Group ous id', 'Pub'} + +download_hostname = 'almascience.eso.org' + + +@pytest.fixture +def alma(request): + """ + Returns an alma client class. `--alma-site` pytest option can be used + to have the client run against a specific site + :param request: pytest request fixture + :return: alma client to use in tests + """ + alma = Alma() + alma_site = request.config.getoption('--alma-site', + 'almascience.eso.org') + alma.archive_url = 'https://{}'.format(alma_site) + return alma + + +@pytest.mark.remote_data +class TestAlma: + def test_public(self, alma): + with pytest.warns(expected_warning=DALOverflowWarning, + match="Partial result set. Potential causes MAXREC, async storage space, etc."): + results = alma.query(payload=None, public=True, maxrec=100) + assert len(results) == 100 + for row in results: + assert row['data_rights'] == 'Public' + with pytest.warns(expected_warning=DALOverflowWarning, + match="Partial result set. Potential causes MAXREC, async storage space, etc."): + results = alma.query(payload=None, public=False, maxrec=100) + assert len(results) == 100 + for row in results: + assert row['data_rights'] == 'Proprietary' + + def test_SgrAstar(self, tmp_path, alma): + alma.cache_location = tmp_path + + result_s = alma.query_object('Sgr A*', legacy_columns=True) + + assert '2013.1.00857.S' in result_s['Project code'] + # "The Brick", g0.253, is in this one + # assert b'2011.0.00217.S' in result_c['Project code'] # missing cycle 1 data + + def test_freq(self, alma): + payload = {'frequency': '85..86'} + result = alma.query(payload) + assert len(result) > 0 + for row in result: + # returned em_min and em_max are in m + assert row['frequency'] >= 85 + assert row['frequency'] <= 100 + assert '3' in row['band_list'] + + def test_bands(self, alma): + payload = {'band_list': ['5', '7']} + # Added maxrec here as downloading and reading the results take too long. + with pytest.warns(expected_warning=DALOverflowWarning, + match="Partial result set. Potential causes MAXREC, async storage space, etc."): + result = alma.query(payload, maxrec=1000) + assert len(result) > 0 + for row in result: + assert ('5' in row['band_list']) or ('7' in row['band_list']) + + def test_equivalent_columns(self, alma): + # this test is to ensure that queries using original column names + # return the same results as the ones that use ObsCore names + # original + result_orig = alma.query(payload={'project_code': '2011.0.00131.S'}, + legacy_columns=True) + result_obscore = alma.query(payload={'proposal_id': '2011.0.00131.S'}, + legacy_columns=True) + assert len(result_orig) == len(result_obscore) + for row in result_orig: + assert row['Project code'] == '2011.0.00131.S' + for row in result_obscore: + assert row['Project code'] == '2011.0.00131.S' + + def test_alma_source_name(self, alma): + payload = {'source_name_alma': 'GRB021004'} + result = alma.query(payload) + assert len(result) > 0 + for row in result: + assert 'GRB021004' == row['target_name'] + + def test_ra_dec(self, alma): + payload = {'ra_dec': '181.0192d -0.01928d'} + result = alma.query(payload) + assert len(result) > 0 + + @pytest.mark.skipif("SKIP_SLOW") + def test_m83(self, tmp_path, alma): + # Runs for over 9 minutes + alma.cache_location = tmp_path + + m83_data = alma.query_object('M83', science=True, legacy_columns=True) + uids = np.unique(m83_data['Member ous id']) + link_list = alma.get_data_info(uids) + + # On Feb 8, 2016 there were 83 hits. This number should never go down. + # Except it has. On May 18, 2016, there were 47. + assert len(link_list) >= 47 + + def test_data_proprietary(self, alma): + # public + assert not alma.is_proprietary('uid://A001/X12a3/Xe9') + IVOA_DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%f" + now = datetime.now(timezone.utc).strftime(IVOA_DATE_FORMAT)[:-3] + query = "select top 1 member_ous_uid from ivoa.obscore where " \ + "obs_release_date > '{}'".format(now) + result = alma.query_tap(query) + assert len(result.to_table()) == 1 + # proprietary + assert alma.is_proprietary(result.to_table()[0][0]) + # non existent + with pytest.raises(AttributeError): + alma.is_proprietary('uid://NON/EXI/STING') + + def test_retrieve_data(self, tmp_path, alma): + """ + Regression test for issue 2490 (the retrieval step will simply fail if + given a blank line, so all we're doing is testing that it runs) + """ + alma.cache_location = tmp_path + + # small solar TP-only data set (<1 GB) + uid = 'uid://A001/X87c/X572' + + alma.retrieve_data_from_uid([uid]) + + def test_data_info(self, tmp_path, alma): + alma.cache_location = tmp_path + + uid = 'uid://A001/X12a3/Xe9' + data_info = alma.get_data_info(uid, expand_tarfiles=True) + for file in data_info: + # TODO found files that do not match info. + # assert u.isclose(file['content_length']*u.B, + # alma._HEADER_data_size([file['access_url']])[1]),\ + # 'File {} size: datalink and head do not match'.\ + # format(file['access_url']) + pass + + # compare with tarball version + data_info_tar = alma.get_data_info(uid, expand_tarfiles=False) + + # The expanded table should be much longer than the non-expanded table. + assert len(data_info) > len(data_info_tar) + # size is the same - not working because service inconsistencies + # assert sum(data_info['content_length']) == \ + # sum(data_info_tar['content_length']) + # check smallest file downloads correctly + file = 'member.uid___A001_X12a3_Xe9.README.txt' + for url in data_info['access_url']: + if file in url: + file_url = url + break + assert file_url + alma.download_files([file_url], savedir=tmp_path) + assert Path(tmp_path, file).stat().st_size + + # mock downloading an entire program + download_files_mock = Mock() + alma.download_files = download_files_mock + alma.retrieve_data_from_uid([uid]) + trimmed_access_url_list = [e for e in data_info_tar['access_url'].data if len(e) > 0] + trimmed_access_urls = (trimmed_access_url_list,) + mock_calls = download_files_mock.mock_calls[0][1] + print(f"\n\nComparing {mock_calls} to {trimmed_access_urls}\n\n") + # comparison = download_files_mock.mock_calls[0][1] == data_info_tar['access_url'] + assert mock_calls == trimmed_access_urls + # assert comparison.all() + + def test_download_data(self, tmp_path, alma): + # test only fits files from a program + alma.cache_location = tmp_path + + uid = 'uid://A001/X12a3/Xe9' + data_info = alma.get_data_info(uid, expand_tarfiles=True) + fitsre = re.compile(r'.*\.fits$') + # skip the actual downloading of the file + download_mock = MagicMock() + # following line require to make alma picklable + download_mock.__reduce__ = lambda self: (MagicMock, ()) + alma._download_file = download_mock + urls = [x['access_url'] for x in data_info + if fitsre.match(x['access_url'])] + results = alma.download_files(urls, savedir=tmp_path) + alma._download_file.call_count == len(results) + assert len(results) == len(urls) + + def test_download_and_extract(self, tmp_path, alma): + # TODO: slowish, runs for ~90s + + alma.cache_location = tmp_path + alma._cycle0_tarfile_content_table = {'ID': ''} + + uid = 'uid://A001/X12a3/Xe9' + data_info = alma.get_data_info(uid, expand_tarfiles=False) + aux_tar_file = [x for x in data_info['access_url'] if 'auxiliary' in x] + assert 1 == len(aux_tar_file) + download_mock = MagicMock() + # following line is required to make alma picklable + download_mock.__reduce__ = lambda self: (MagicMock, ()) + alma._download_file = download_mock + + # there are no FITS files in the auxiliary file + assert not alma.download_and_extract_files(aux_tar_file) + + # download python scripts now + downloaded = alma.download_and_extract_files(aux_tar_file, + regex=r'.*\.py') + assert len(downloaded) > 1 + assert download_mock.call_count == len(downloaded) + + # ASDM files cannot be expanded. + asdm_url = [x for x in data_info['access_url'] if 'asdm' in x][0] + tarfile_handle_mock = Mock() + mock_content_file1 = Mock(path='/tmp/') + # mocking attribute name is trickier and it requires the name to + # be set separately. + mock_content_file1.name = 'foo.py' + mock_content_file2 = Mock(path='/tmp/') + mock_content_file2.name = 'blah.txt' + tarfile_handle_mock.getmembers.return_value = \ + [mock_content_file1, mock_content_file2] + tarfile_pkg_mock = Mock() + tarfile_pkg_mock.open.return_value = tarfile_handle_mock + with patch('astroquery.alma.core.tarfile', tarfile_pkg_mock): + with patch('astroquery.alma.core.os.remove') as delete_mock: + downloaded_asdm = alma.download_and_extract_files( + [asdm_url], include_asdm=True, regex=r'.*\.py') + delete_mock.assert_called_once_with( + 'cache_path/' + asdm_url.split('/')[-1]) + assert Path(*downloaded_asdm) == Path(tmp_path, 'foo.py') + + def test_doc_example(self, tmp_path, alma): + alma.cache_location = tmp_path + m83_data = alma.query_object('M83', legacy_columns=True) + # the order can apparently sometimes change + # These column names change too often to keep testing. + # assert set(m83_data.colnames) == set(all_colnames) + galactic_center = coordinates.SkyCoord(0 * u.deg, 0 * u.deg, + frame='galactic') + gc_data = alma.query_region(galactic_center, 1 * u.deg) + # assert len(gc_data) >= 425 # Feb 8, 2016 + assert len(gc_data) >= 50 # Nov 16, 2016 + content_length_column_name = 'content_length' + + uids = np.unique(m83_data['Member ous id']) + + assert 'uid://A001/X11f/X30' in uids + X30 = (m83_data['Member ous id'] == 'uid://A001/X11f/X30') + X31 = (m83_data['Member ous id'] == 'uid://A002/X3216af/X31') + + assert X30.sum() == 4 # Jul 13, 2020 + assert X31.sum() == 4 # Jul 13, 2020 + mous1 = alma.get_data_info('uid://A001/X11f/X30') + totalsize_mous1 = mous1[content_length_column_name].sum() * u.Unit(mous1[content_length_column_name].unit) + assert (totalsize_mous1.to(u.B) > 1.9*u.GB) + + mous = alma.get_data_info('uid://A002/X3216af/X31') + totalsize_mous = mous[content_length_column_name].sum() * u.Unit(mous[content_length_column_name].unit) + # More recent ALMA request responses do not include any information + # about file size, so we have to allow for the possibility that all + # file sizes are replaced with -1 + assert (totalsize_mous.to(u.GB).value > 52) + + def test_query(self, tmp_path, alma): + alma.cache_location = tmp_path + + result = alma.query(payload={'start_date': '<11-11-2011'}, + public=False, legacy_columns=True, science=True) + # Nov 16, 2016: 159 + # Apr 25, 2017: 150 + # Jul 2, 2017: 160 + # May 9, 2018: 162 + # March 18, 2019: 171 (seriously, how do they keep changing history?) + # with SIA2 numbers are different (cardinality?) assert len(result) == 171 + test_date = datetime.strptime('11-11-2011', '%d-%m-%Y') + for row in result['Observation date']: + assert test_date > datetime.strptime(row, '%d-%m-%Y'), \ + 'Unexpected value: {}'.format(row) + + # Not in the help - no need to support it. + # result = alma.query(payload={'member_ous_id': 'uid://A001/X11a2/X11'}, + # science=True) + # assert len(result) == 1 + + def test_misc(self, alma): + # miscellaneous set of common tests + # + # alma.query_region(coordinate=orionkl_coords, radius=4 * u.arcmin, + # public=False, science=False) + + result = alma.query_object('M83', public=True, science=True) + assert len(result) > 0 + result = alma.query(payload={'pi_name': '*Bally*'}, public=False, + maxrec=10) + assert result + # Add overwrite=True in case the test previously died unexpectedly + # and left the temp file. + result.write('/tmp/alma-onerow.txt', format='ascii', overwrite=True) + for row in result: + assert 'Bally' in row['obs_creator_name'] + result = alma.query(payload=dict(project_code='2016.1.00165.S'), + public=False) + assert result + for row in result: + assert '2016.1.00165.S' == row['proposal_id'] + result = alma.query(payload=dict(project_code='2017.1.01355.L', + source_name_alma='G008.67'),) + assert result + for row in result: + assert '2017.1.01355.L' == row['proposal_id'] + assert 'Public' == row['data_rights'] + assert 'G008.67' in row['target_name'] + + result = alma.query_region( + coordinates.SkyCoord('5:35:14.461 -5:21:54.41', frame='fk5', + unit=(u.hour, u.deg)), radius=0.034 * u.deg) + assert result + + result = alma.query_region( + coordinates.SkyCoord('5:35:14.461 -5:21:54.41', frame='fk5', + unit=(u.hour, u.deg)), radius=0.034 * u.deg) + + result = alma.query(payload=dict(project_code='2012.*', + public_data=True)) + assert result + for row in result: + assert '2012.' in row['proposal_id'] + assert 'Public' == row['data_rights'] + + result = alma.query(payload={'frequency': '96 .. 96.5'}) + assert result + for row in result: + # TODO not sure how to test this + pass + + result = alma.query_object('M83', band_list=[3, 6, 8]) + assert result + for row in result: + assert row['band_list'] in ['3', '6', '8'] + + result = alma.query(payload={'pi_name': '*Ginsburg*', + 'band_list': '6'}) + assert result + for row in result: + assert '6' == row['band_list'] + assert 'ginsburg' in row['obs_creator_name'].lower() + + def test_user(self, alma): + # miscellaneous set of tests from current users + rslt = alma.query({'band_list': [6], 'project_code': '2012.1.*'}, + legacy_columns=True) + for row in rslt: + print(row['Project code']) + print(row['Member ous id']) + + # As of April 2017, these data are *MISSING FROM THE ARCHIVE*. + # This has been reported, as it is definitely a bug. + @pytest.mark.xfail + @pytest.mark.bigdata + def test_cycle1(self, tmp_path, alma): + # About 500 MB + alma.cache_location = tmp_path + target = 'NGC4945' + project_code = '2012.1.00912.S' + payload = {'project_code': project_code, + 'source_name_alma': target, } + result = alma.query(payload=payload) + assert len(result) == 1 + + # Need new Alma() instances each time + a1 = alma() + uid_url_table_mous = a1.get_data_info(result['Member ous id']) + a2 = alma() + uid_url_table_asdm = a2.get_data_info(result['Asdm uid']) + # I believe the fixes as part of #495 have resulted in removal of a + # redundancy in the table creation, so a 1-row table is OK here. + # A 2-row table may not be OK any more, but that's what it used to + # be... + assert len(uid_url_table_asdm) == 1 + assert len(uid_url_table_mous) >= 2 # now is len=3 (Nov 17, 2016) + + # URL should look like: + # https://almascience.eso.org/dataPortal/requests/anonymous/944120962/ALMA/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar + # https://almascience.eso.org/rh/requests/anonymous/944222597/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar + + small = uid_url_table_mous['size'] < 1 + + urls_to_download = uid_url_table_mous[small]['URL'] + + uri = urlparse(urls_to_download[0]) + assert uri.path == ('/dataPortal/requests/anonymous/{0}/ALMA/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar' # noqa + .format(a1._staging_log['staging_page_id'])) + + # THIS IS FAIL + # '2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar' + left = uid_url_table_mous['URL'][0].split("/")[-1] + assert left == '2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar' + right = uid_url_table_mous['uid'][0] + assert right == 'uid://A002/X5a9a13/X528' + assert left[15:-15] == right.replace(":", "_").replace("/", "_") + data = alma.download_and_extract_files(urls_to_download) + + assert len(data) == 6 + + @pytest.mark.skipif("SKIP_SLOW") + @pytest.mark.xfail(reason="Not working anymore") + def test_cycle0(self, tmp_path, alma): + # About 20 MB + alma.cache_location = tmp_path + + target = 'NGC4945' + project_code = '2011.0.00121.S' + + payload = {'project_code': project_code, + 'source_name_alma': target, } + result = alma.query(payload=payload, legacy_columns=True) + assert len(result) == 1 + + alma1 = alma() + alma2 = alma() + uid_url_table_mous = alma1.get_data_info(result['Member ous id']) + uid_url_table_asdm = alma2.get_data_info(result['Asdm uid']) + assert len(uid_url_table_asdm) == 1 + assert len(uid_url_table_mous) == 32 + + assert uid_url_table_mous[0]['URL'].split("/")[-1] == '2011.0.00121.S_2012-08-16_001_of_002.tar' + assert uid_url_table_mous[0]['uid'] == 'uid://A002/X327408/X246' + + small = uid_url_table_mous['size'] < 1 + + urls_to_download = uid_url_table_mous[small]['URL'] + # Check that all URLs show up in the Cycle 0 table + for url in urls_to_download: + tarfile_name = os.path.split(url)[-1] + assert tarfile_name in alma._cycle0_tarfile_content['ID'] + + data = alma.download_and_extract_files(urls_to_download) + + # There are 10 small files, but only 8 unique + assert len(data) == 8 + + def test_keywords(self, tmp_path, alma): + + alma.help_tap() + result = alma.query_tap( + "select * from ivoa.obscore where s_resolution <0.1 and " + "science_keyword in ('High-mass star formation', 'Disks around " + "high-mass stars')") + + assert len(result) >= 72 + # TODO why is it failing + # assert 'Orion_Source_I' in result['target_name'] + + +@pytest.mark.remote_data +def test_project_metadata(alma): + metadata = alma.get_project_metadata('2013.1.00269.S') + assert metadata == ['Sgr B2, a high-mass molecular cloud in our Galaxy\'s ' + 'Central Molecular Zone, is the most extreme site of ' + 'ongoing star formation in the Local Group in terms ' + 'of its gas content, temperature, and velocity ' + 'dispersion. If any cloud in our galaxy is analogous ' + 'to the typical cloud at the universal peak of star ' + 'formation at z~2, this is it. We propose a 6\'x6\' ' + 'mosaic in the 3mm window targeting gas thermometer ' + 'lines, specifically CH3CN and its isotopologues. We ' + 'will measure the velocity dispersion and temperature ' + 'of the molecular gas on all scales (0.02 - 12 pc, ' + '0.5" - 5\') within the cloud, which will yield ' + 'resolved measurements of the Mach number and the ' + 'sonic scale of the gas. We will assess the relative ' + 'importance of stellar feedback and turbulence on the ' + 'star-forming gas, determining how extensive the ' + 'feedback effects are within an ultradense ' + 'environment. The observations will provide ' + 'constraints on the inputs to star formation theories ' + 'and will determine their applicability in extremely ' + 'dense, turbulent, and hot regions. Sgr B2 will be ' + 'used as a testing ground for star formation theories ' + 'in an environment analogous to high-z starburst ' + 'clouds in which they must be applied.'] + + +@pytest.mark.remote_data +def test_data_info_stacking(alma): + alma.get_data_info(['uid://A001/X13d5/X1d', 'uid://A002/X3216af/X31', + 'uid://A001/X12a3/X240']) + + +@pytest.mark.remote_data +@pytest.mark.skipif("SKIP_SLOW", reason="Huge data file download") +def test_big_download_regression(alma): + """ + Regression test for #2020/#2021 - this download fails if logging tries to + load the whole data file into memory. + """ + result = alma.query({'project_code': '2013.1.01365.S'}) + uids = np.unique(result['member_ous_uid']) + files = alma.get_data_info(uids) + + # we may need to change the cache dir for this to work on testing machines? + # savedir='/big/data/path/' + # Alma.cache_dir=savedir + + # this is a big one that fails + alma.download_files([files['access_url'][3]]) + + +@pytest.mark.remote_data +def test_download_html_file(alma, tmp_path): + alma.cache_location = tmp_path + result = alma.download_files( + ['https://{}/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'.format(download_hostname)]) + assert result + + +@pytest.mark.remote_data +def test_verify_html_file(alma, caplog, tmp_path): + alma.cache_location = tmp_path + + # download the file + result = alma.download_files( + ['https://{}/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'.format(download_hostname)]) + assert 'member.uid___A001_X1284_X1353.qa2_report.html' in result[0] + + result = alma.download_files( + ['https://{}/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'.format(download_hostname)], + verify_only=True) + assert 'member.uid___A001_X1284_X1353.qa2_report.html' in result[0] + local_filepath = Path(result[0]) + expected_file_length = local_filepath.stat().st_size + assert f"Found cached file {local_filepath} with expected size {expected_file_length}." in caplog.text + + # manipulate the file + with open(local_filepath, 'ab') as fh: + fh.write(b"Extra Text") + + caplog.clear() + new_file_length = expected_file_length + 10 + with pytest.warns(expected_warning=CorruptDataWarning, + match=(f"Found cached file {local_filepath} with size {new_file_length} > expected size " + f"{expected_file_length}. The download is likely corrupted.")): + result = alma.download_files( + ['https://{}/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'.format(download_hostname)], + verify_only=True) + assert 'member.uid___A001_X1284_X1353.qa2_report.html' in result[0] + + # manipulate the file: make it small + with open(local_filepath, 'wb') as fh: + fh.write(b"Empty Text") + + caplog.clear() + result = alma.download_files( + ['https://{}/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'.format(download_hostname)], + verify_only=True) + assert 'member.uid___A001_X1284_X1353.qa2_report.html' in result[0] + existing_file_length = 10 + assert (f"Found cached file {local_filepath} with size {existing_file_length} < expected size " + f"{expected_file_length}. The download should be continued.") in caplog.text From 0f8799d8ae879e2440e1a564b3925cd38ddafe2d Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 2 Jun 2024 09:14:59 -0400 Subject: [PATCH 5/6] pare down translated ALMA tests --- astroquery/nrao/tests/test_nrao.py | 261 +---------------------------- 1 file changed, 6 insertions(+), 255 deletions(-) diff --git a/astroquery/nrao/tests/test_nrao.py b/astroquery/nrao/tests/test_nrao.py index 4d9f8e08b6..9d036b5471 100644 --- a/astroquery/nrao/tests/test_nrao.py +++ b/astroquery/nrao/tests/test_nrao.py @@ -228,7 +228,7 @@ def test_query(): nrao._get_dataarchive_url = Mock() nrao._tap = tap_mock result = nrao.query_region(SkyCoord(1*u.deg, 2*u.deg, frame='icrs'), - radius=1*u.deg) + radius=0.001*u.deg) assert len(result) == 0 assert 'proposal_id' in result.columns tap_mock.search.assert_called_once_with( @@ -246,102 +246,14 @@ def test_query(): nrao._tap = tap_mock with patch('astroquery.nrao.tapsql.coord.SkyCoord.from_name') as name_mock: name_mock.return_value = SkyCoord(1, 2, unit='deg') - result = nrao.query_object('M83', public=False, - band_list=[3]) + # mock data generated by running this query w/maxrec=5 + result = nrao.query_object('M83') assert len(result) == 1 tap_mock.search.assert_called_once_with( - "select * from ivoa.obscore WHERE " - "(INTERSECTS(CIRCLE('ICRS',1.0,2.0,0.16666666666666666), s_region) = 1) " - "AND band_list LIKE '%3%' AND science_observation='T' AND " - "data_rights='Proprietary'", + "select * from tap_schema.obscore WHERE CONTAINS(POINT('ICRS',s_ra,s_dec),CIRCLE('ICRS',204.25383,-29.865761111,0.16666666666666666))=1", language='ADQL', maxrec=None) - # repeat for legacy columns - mock_result = Mock() - tap_mock = Mock() - mock_result.to_table.return_value = onerow_result - tap_mock.search.return_value = mock_result - nrao = Nrao() - nrao._tap = tap_mock - with patch('astroquery.nrao.tapsql.coord.SkyCoord.from_name') as name_mock: - name_mock.return_value = SkyCoord(1, 2, unit='deg') - result_legacy = nrao.query_object('M83', public=False, - legacy_columns=True, - band_list=[3]) - assert len(result) == 1 - - assert 'Project code' in result_legacy.columns - tap_mock.search.assert_called_once_with( - "select * from ivoa.obscore WHERE " - "(INTERSECTS(CIRCLE('ICRS',1.0,2.0,0.16666666666666666), s_region) = 1) " - "AND band_list LIKE '%3%' AND science_observation='T' AND " - "data_rights='Proprietary'", - language='ADQL', maxrec=None) - row_legacy = result_legacy[0] - row = result[0] - for item in _OBSCORE_TO_nraoRESULT.items(): - if item[0] == 't_min': - assert Time(row[item[0]], format='mjd').strftime('%d-%m-%Y') ==\ - row_legacy[item[1]] - else: - assert row[item[0]] == row_legacy[item[1]] - - # query with different arguments - tap_mock = Mock() - empty_result = Table.read(os.path.join(DATA_DIR, 'nrao-empty.txt'), - format='ascii') - mock_result = Mock() - mock_result.to_table.return_value = empty_result - tap_mock.search.return_value = mock_result - nrao = Nrao() - nrao._get_dataarchive_url = Mock() - nrao._tap = tap_mock - result = nrao.query_region('1 2', radius=1*u.deg, - payload={'frequency': '22'}, public=None, - band_list='1 3', science=False, - start_date='01-01-2010', - polarisation_type='Dual', - fov=0.0123130, - integration_time=25) - assert len(result) == 0 - tap_mock.search.assert_called_with( - "select * from ivoa.obscore WHERE frequency=22.0 AND " - "(INTERSECTS(CIRCLE('ICRS',1.0,2.0,1.0), s_region) = 1) AND " - "(band_list LIKE '%1%' OR band_list LIKE '%3%') AND " - "t_min=55197.0 AND pol_states='/XX/YY/' AND s_fov=0.012313 AND " - "t_exptime=25 AND science_observation='F'", - language='ADQL', maxrec=None - ) - - -def test_sia(): - sia_mock = Mock() - empty_result = Table.read(os.path.join(DATA_DIR, 'nrao-empty.txt'), - format='ascii') - sia_mock.search.return_value = Mock(table=empty_result) - nrao = Nrao() - nrao._get_dataarchive_url = Mock() - nrao._sia = sia_mock - result = nrao.query_sia(pos='CIRCLE 1 2 1', calib_level=[0, 1], - data_rights='Public', - band=(300, 400), - time=545454, maxrec=10, pol=['XX', 'YY'], - instrument='JAO', collection='nrao', - field_of_view=0.0123130, data_type='cube', - target_name='J0423-013', - publisher_did='ADS/JAO.nrao#2013.1.00546.S', - exptime=25) - assert len(result.table) == 0 - assert_called_with(sia_mock.search, calib_level=[0, 1], - band=(300, 400), data_type='cube', - pos='CIRCLE 1 2 1', - time=545454, maxrec=10, pol=['XX', 'YY'], - instrument='JAO', collection='nrao', - data_rights='Public', - field_of_view=0.0123130, - target_name='J0423-013', - publisher_did='ADS/JAO.nrao#2013.1.00546.S', exptime=25) def test_tap(): @@ -352,23 +264,13 @@ def test_tap(): nrao = Nrao() nrao._get_dataarchive_url = Mock() nrao._tap = tap_mock - result = nrao.query_tap('select * from ivoa.ObsCore') + result = nrao.query_tap('select * from tap_scheme.ObsCore') assert len(result.table) == 0 - tap_mock.search.assert_called_once_with('select * from ivoa.ObsCore', + tap_mock.search.assert_called_once_with('select * from tap_scheme.ObsCore', language='ADQL', maxrec=None) -@pytest.mark.parametrize('data_archive_url', - [ - ('https://nraoscience.nrao.edu'), - ('https://nraoscience.eso.org'), - ('https://nraoscience.nao.ac.jp') - ]) -def test_tap_url(data_archive_url): - _test_tap_url(data_archive_url) - - def _test_tap_url(data_archive_url): nrao = Nrao() nrao._get_dataarchive_url = Mock(return_value=data_archive_url) @@ -376,126 +278,6 @@ def _test_tap_url(data_archive_url): assert nrao.tap_url == f"{data_archive_url}/tap" -@pytest.mark.parametrize('data_archive_url', - [ - ('https://nraoscience.nrao.edu'), - ('https://nraoscience.eso.org'), - ('https://nraoscience.nao.ac.jp') - ]) -def test_sia_url(data_archive_url): - _test_sia_url(data_archive_url) - - -def _test_sia_url(data_archive_url): - nrao = Nrao() - nrao._get_dataarchive_url = Mock(return_value=data_archive_url) - nrao._get_dataarchive_url.reset_mock() - assert nrao.sia_url == f"{data_archive_url}/sia2" - - -@pytest.mark.parametrize('data_archive_url', - [ - ('https://nraoscience.nrao.edu'), - ('https://nraoscience.eso.org'), - ('https://nraoscience.nao.ac.jp') - ]) -def test_datalink_url(data_archive_url): - _test_datalink_url(data_archive_url) - - -def _test_datalink_url(data_archive_url): - nrao = Nrao() - nrao._get_dataarchive_url = Mock(return_value=data_archive_url) - nrao._get_dataarchive_url.reset_mock() - assert nrao.datalink_url == f"{data_archive_url}/datalink/sync" - - -def test_get_data_info(): - class MockDataLinkService: - def run_sync(self, uid): - return _mocked_datalink_sync(uid) - - nrao = Nrao() - nrao._get_dataarchive_url = Mock() - nrao._datalink = MockDataLinkService() - result = nrao.get_data_info(uids='uid://A001/X12a3/Xe9') - assert len(result) == 9 - - -# This method will be used by the mock in test_get_data_info_expand_tarfiles to replace requests.get -def _mocked_datalink_sync(*args, **kwargs): - class MockResponse: - adhoc_service_1_param1 = type('', (object, ), {'ID': 'standardID', - 'value': 'ivo://ivoa.net/std/DataLink#links-1.0'})() - adhoc_service_1_param2 = type( - '', (object, ), { - 'ID': 'accessURL', - 'value': 'https://nraoscience.org/datalink/sync?ID=2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar'} - )() - adhoc_service_1 = type( - '', (object, ), { - 'ID': 'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar', 'params': [ - adhoc_service_1_param1, adhoc_service_1_param2]})() - - adhoc_service_2_param1 = type('', (object, ), {'ID': 'standardID', - 'value': 'ivo://ivoa.net/std/DataLink#links-1.0'})() - adhoc_service_2_param2 = type( - '', (object, ), { - 'ID': 'accessURL', - 'value': 'https://nraoscience.org/datalink/sync?ID=2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar'} - )() - adhoc_service_2 = type( - '', (object, ), { - 'ID': 'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar', 'params': [ - adhoc_service_1_param1, adhoc_service_1_param2]})() - - adhoc_services = { - 'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar': adhoc_service_1, - 'DataLink.2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar': adhoc_service_2 - } - - def __init__(self, table): - self.table = table - - def to_table(self): - return self.table - - @property - def status(self): - return ['OK'] - - def iter_adhocservices(self): - return [self.adhoc_service_1, self.adhoc_service_2] - - def get_adhocservice_by_id(self, adhoc_service_id): - return self.adhoc_services[adhoc_service_id] - - print(f"\n\nFOUND ARGS {args}\n\n") - - if args[0] == 'uid://A001/X12a3/Xe9': - return MockResponse(Table.read(data_path('nrao-datalink.xml'), format='votable')) - elif args[0] == '2017.1.01185.S_uid___A001_X12a3_Xe9_001_of_001.tar': - return MockResponse(Table.read(data_path('nrao-datalink-recurse-this.xml'), format='votable')) - elif args[0] == '2017.1.01185.S_uid___A001_X12a3_Xe9_auxiliary.tar': - return MockResponse(Table.read(data_path('nrao-datalink-recurse-aux.xml'), format='votable')) - - pytest.fail('Should not get here.') - - -# @patch('pyvo.dal.adhoc.DatalinkService', side_effect=_mocked_datalink_sync) -def test_get_data_info_expand_tarfiles(): - class MockDataLinkService: - def run_sync(self, uid): - return _mocked_datalink_sync(uid) - - nrao = Nrao() - nrao._datalink = MockDataLinkService() - result = nrao.get_data_info(uids='uid://A001/X12a3/Xe9', expand_tarfiles=True) - - # Entire expanded structure is 19 links long. - assert len(result) == 19 - - def test_galactic_query(): """ regression test for 1867 @@ -513,34 +295,3 @@ def test_galactic_query(): radius=1*u.deg, get_query_payload=True) assert "'ICRS',266.405,-28.9362,1.0" in result - - -def test_download_files(): - def _requests_mock(method, url, **kwargs): - response = Mock() - response.headers = { - 'Content-Disposition': 'attachment; ' - 'filename={}'.format(url.split('/')[-1])} - return response - - def _download_file_mock(url, file_name, **kwargs): - return file_name - nrao = Nrao() - nrao._request = Mock(side_effect=_requests_mock) - nrao._download_file = Mock(side_effect=_download_file_mock) - downloaded_files = nrao.download_files(['https://location/file1']) - assert len(downloaded_files) == 1 - assert downloaded_files[0].endswith('file1') - - nrao._request.reset_mock() - nrao._download_file.reset_mock() - downloaded_files = nrao.download_files(['https://location/file1', - 'https://location/file2']) - assert len(downloaded_files) == 2 - - # error cases - nrao._request = Mock() - # no Content-Disposition results in no downloaded file - nrao._request.return_value = Mock(headers={}) - result = nrao.download_files(['https://location/file1']) - assert not result From 0ecb26d25b1dc7f20b47c00b959fc373d0c3bda7 Mon Sep 17 00:00:00 2001 From: "Adam Ginsburg (keflavich)" Date: Sun, 2 Jun 2024 11:08:50 -0400 Subject: [PATCH 6/6] add basic remote tests --- astroquery/nrao/tests/test_nrao_remote.py | 579 +--------------------- 1 file changed, 10 insertions(+), 569 deletions(-) diff --git a/astroquery/nrao/tests/test_nrao_remote.py b/astroquery/nrao/tests/test_nrao_remote.py index 3db8969899..2bb03286bf 100644 --- a/astroquery/nrao/tests/test_nrao_remote.py +++ b/astroquery/nrao/tests/test_nrao_remote.py @@ -14,580 +14,21 @@ from pyvo.dal.exceptions import DALOverflowWarning from astroquery.exceptions import CorruptDataWarning -from .. import Alma - -# ALMA tests involving staging take too long, leading to travis timeouts -# TODO: make this a configuration item -SKIP_SLOW = True - -all_colnames = {'Project code', 'Source name', 'RA', 'Dec', 'Band', - 'Frequency resolution', 'Integration', 'Release date', - 'Frequency support', 'Velocity resolution', 'Pol products', - 'Observation date', 'PI name', 'PWV', 'Member ous id', - 'Asdm uid', 'Project title', 'Project type', 'Scan intent', - 'Spatial resolution', 'Largest angular scale', - 'QA2 Status', 'Group ous id', 'Pub'} - -download_hostname = 'almascience.eso.org' - - -@pytest.fixture -def alma(request): - """ - Returns an alma client class. `--alma-site` pytest option can be used - to have the client run against a specific site - :param request: pytest request fixture - :return: alma client to use in tests - """ - alma = Alma() - alma_site = request.config.getoption('--alma-site', - 'almascience.eso.org') - alma.archive_url = 'https://{}'.format(alma_site) - return alma +from .. import Nrao @pytest.mark.remote_data -class TestAlma: - def test_public(self, alma): - with pytest.warns(expected_warning=DALOverflowWarning, - match="Partial result set. Potential causes MAXREC, async storage space, etc."): - results = alma.query(payload=None, public=True, maxrec=100) - assert len(results) == 100 - for row in results: - assert row['data_rights'] == 'Public' - with pytest.warns(expected_warning=DALOverflowWarning, - match="Partial result set. Potential causes MAXREC, async storage space, etc."): - results = alma.query(payload=None, public=False, maxrec=100) - assert len(results) == 100 - for row in results: - assert row['data_rights'] == 'Proprietary' - - def test_SgrAstar(self, tmp_path, alma): - alma.cache_location = tmp_path - - result_s = alma.query_object('Sgr A*', legacy_columns=True) - - assert '2013.1.00857.S' in result_s['Project code'] - # "The Brick", g0.253, is in this one - # assert b'2011.0.00217.S' in result_c['Project code'] # missing cycle 1 data - - def test_freq(self, alma): - payload = {'frequency': '85..86'} - result = alma.query(payload) - assert len(result) > 0 - for row in result: - # returned em_min and em_max are in m - assert row['frequency'] >= 85 - assert row['frequency'] <= 100 - assert '3' in row['band_list'] - - def test_bands(self, alma): - payload = {'band_list': ['5', '7']} - # Added maxrec here as downloading and reading the results take too long. - with pytest.warns(expected_warning=DALOverflowWarning, - match="Partial result set. Potential causes MAXREC, async storage space, etc."): - result = alma.query(payload, maxrec=1000) - assert len(result) > 0 - for row in result: - assert ('5' in row['band_list']) or ('7' in row['band_list']) +class TestNrao: + def test_SgrAstar(self, tmp_path, nrao): + nrao.cache_location = tmp_path + result_s = nrao.query_object('Sgr A*', maxrec=5) - def test_equivalent_columns(self, alma): - # this test is to ensure that queries using original column names - # return the same results as the ones that use ObsCore names - # original - result_orig = alma.query(payload={'project_code': '2011.0.00131.S'}, - legacy_columns=True) - result_obscore = alma.query(payload={'proposal_id': '2011.0.00131.S'}, - legacy_columns=True) - assert len(result_orig) == len(result_obscore) - for row in result_orig: - assert row['Project code'] == '2011.0.00131.S' - for row in result_obscore: - assert row['Project code'] == '2011.0.00131.S' - - def test_alma_source_name(self, alma): - payload = {'source_name_alma': 'GRB021004'} - result = alma.query(payload) - assert len(result) > 0 - for row in result: - assert 'GRB021004' == row['target_name'] - - def test_ra_dec(self, alma): + def test_ra_dec(self, nrao): payload = {'ra_dec': '181.0192d -0.01928d'} - result = alma.query(payload) + result = nrao.query(payload) assert len(result) > 0 - @pytest.mark.skipif("SKIP_SLOW") - def test_m83(self, tmp_path, alma): - # Runs for over 9 minutes - alma.cache_location = tmp_path - - m83_data = alma.query_object('M83', science=True, legacy_columns=True) - uids = np.unique(m83_data['Member ous id']) - link_list = alma.get_data_info(uids) - - # On Feb 8, 2016 there were 83 hits. This number should never go down. - # Except it has. On May 18, 2016, there were 47. - assert len(link_list) >= 47 - - def test_data_proprietary(self, alma): - # public - assert not alma.is_proprietary('uid://A001/X12a3/Xe9') - IVOA_DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%f" - now = datetime.now(timezone.utc).strftime(IVOA_DATE_FORMAT)[:-3] - query = "select top 1 member_ous_uid from ivoa.obscore where " \ - "obs_release_date > '{}'".format(now) - result = alma.query_tap(query) - assert len(result.to_table()) == 1 - # proprietary - assert alma.is_proprietary(result.to_table()[0][0]) - # non existent - with pytest.raises(AttributeError): - alma.is_proprietary('uid://NON/EXI/STING') - - def test_retrieve_data(self, tmp_path, alma): - """ - Regression test for issue 2490 (the retrieval step will simply fail if - given a blank line, so all we're doing is testing that it runs) - """ - alma.cache_location = tmp_path - - # small solar TP-only data set (<1 GB) - uid = 'uid://A001/X87c/X572' - - alma.retrieve_data_from_uid([uid]) - - def test_data_info(self, tmp_path, alma): - alma.cache_location = tmp_path - - uid = 'uid://A001/X12a3/Xe9' - data_info = alma.get_data_info(uid, expand_tarfiles=True) - for file in data_info: - # TODO found files that do not match info. - # assert u.isclose(file['content_length']*u.B, - # alma._HEADER_data_size([file['access_url']])[1]),\ - # 'File {} size: datalink and head do not match'.\ - # format(file['access_url']) - pass - - # compare with tarball version - data_info_tar = alma.get_data_info(uid, expand_tarfiles=False) - - # The expanded table should be much longer than the non-expanded table. - assert len(data_info) > len(data_info_tar) - # size is the same - not working because service inconsistencies - # assert sum(data_info['content_length']) == \ - # sum(data_info_tar['content_length']) - # check smallest file downloads correctly - file = 'member.uid___A001_X12a3_Xe9.README.txt' - for url in data_info['access_url']: - if file in url: - file_url = url - break - assert file_url - alma.download_files([file_url], savedir=tmp_path) - assert Path(tmp_path, file).stat().st_size - - # mock downloading an entire program - download_files_mock = Mock() - alma.download_files = download_files_mock - alma.retrieve_data_from_uid([uid]) - trimmed_access_url_list = [e for e in data_info_tar['access_url'].data if len(e) > 0] - trimmed_access_urls = (trimmed_access_url_list,) - mock_calls = download_files_mock.mock_calls[0][1] - print(f"\n\nComparing {mock_calls} to {trimmed_access_urls}\n\n") - # comparison = download_files_mock.mock_calls[0][1] == data_info_tar['access_url'] - assert mock_calls == trimmed_access_urls - # assert comparison.all() - - def test_download_data(self, tmp_path, alma): - # test only fits files from a program - alma.cache_location = tmp_path - - uid = 'uid://A001/X12a3/Xe9' - data_info = alma.get_data_info(uid, expand_tarfiles=True) - fitsre = re.compile(r'.*\.fits$') - # skip the actual downloading of the file - download_mock = MagicMock() - # following line require to make alma picklable - download_mock.__reduce__ = lambda self: (MagicMock, ()) - alma._download_file = download_mock - urls = [x['access_url'] for x in data_info - if fitsre.match(x['access_url'])] - results = alma.download_files(urls, savedir=tmp_path) - alma._download_file.call_count == len(results) - assert len(results) == len(urls) - - def test_download_and_extract(self, tmp_path, alma): - # TODO: slowish, runs for ~90s - - alma.cache_location = tmp_path - alma._cycle0_tarfile_content_table = {'ID': ''} - - uid = 'uid://A001/X12a3/Xe9' - data_info = alma.get_data_info(uid, expand_tarfiles=False) - aux_tar_file = [x for x in data_info['access_url'] if 'auxiliary' in x] - assert 1 == len(aux_tar_file) - download_mock = MagicMock() - # following line is required to make alma picklable - download_mock.__reduce__ = lambda self: (MagicMock, ()) - alma._download_file = download_mock - - # there are no FITS files in the auxiliary file - assert not alma.download_and_extract_files(aux_tar_file) - - # download python scripts now - downloaded = alma.download_and_extract_files(aux_tar_file, - regex=r'.*\.py') - assert len(downloaded) > 1 - assert download_mock.call_count == len(downloaded) - - # ASDM files cannot be expanded. - asdm_url = [x for x in data_info['access_url'] if 'asdm' in x][0] - tarfile_handle_mock = Mock() - mock_content_file1 = Mock(path='/tmp/') - # mocking attribute name is trickier and it requires the name to - # be set separately. - mock_content_file1.name = 'foo.py' - mock_content_file2 = Mock(path='/tmp/') - mock_content_file2.name = 'blah.txt' - tarfile_handle_mock.getmembers.return_value = \ - [mock_content_file1, mock_content_file2] - tarfile_pkg_mock = Mock() - tarfile_pkg_mock.open.return_value = tarfile_handle_mock - with patch('astroquery.alma.core.tarfile', tarfile_pkg_mock): - with patch('astroquery.alma.core.os.remove') as delete_mock: - downloaded_asdm = alma.download_and_extract_files( - [asdm_url], include_asdm=True, regex=r'.*\.py') - delete_mock.assert_called_once_with( - 'cache_path/' + asdm_url.split('/')[-1]) - assert Path(*downloaded_asdm) == Path(tmp_path, 'foo.py') - - def test_doc_example(self, tmp_path, alma): - alma.cache_location = tmp_path - m83_data = alma.query_object('M83', legacy_columns=True) - # the order can apparently sometimes change - # These column names change too often to keep testing. - # assert set(m83_data.colnames) == set(all_colnames) - galactic_center = coordinates.SkyCoord(0 * u.deg, 0 * u.deg, - frame='galactic') - gc_data = alma.query_region(galactic_center, 1 * u.deg) - # assert len(gc_data) >= 425 # Feb 8, 2016 - assert len(gc_data) >= 50 # Nov 16, 2016 - content_length_column_name = 'content_length' - - uids = np.unique(m83_data['Member ous id']) - - assert 'uid://A001/X11f/X30' in uids - X30 = (m83_data['Member ous id'] == 'uid://A001/X11f/X30') - X31 = (m83_data['Member ous id'] == 'uid://A002/X3216af/X31') - - assert X30.sum() == 4 # Jul 13, 2020 - assert X31.sum() == 4 # Jul 13, 2020 - mous1 = alma.get_data_info('uid://A001/X11f/X30') - totalsize_mous1 = mous1[content_length_column_name].sum() * u.Unit(mous1[content_length_column_name].unit) - assert (totalsize_mous1.to(u.B) > 1.9*u.GB) - - mous = alma.get_data_info('uid://A002/X3216af/X31') - totalsize_mous = mous[content_length_column_name].sum() * u.Unit(mous[content_length_column_name].unit) - # More recent ALMA request responses do not include any information - # about file size, so we have to allow for the possibility that all - # file sizes are replaced with -1 - assert (totalsize_mous.to(u.GB).value > 52) - - def test_query(self, tmp_path, alma): - alma.cache_location = tmp_path - - result = alma.query(payload={'start_date': '<11-11-2011'}, - public=False, legacy_columns=True, science=True) - # Nov 16, 2016: 159 - # Apr 25, 2017: 150 - # Jul 2, 2017: 160 - # May 9, 2018: 162 - # March 18, 2019: 171 (seriously, how do they keep changing history?) - # with SIA2 numbers are different (cardinality?) assert len(result) == 171 - test_date = datetime.strptime('11-11-2011', '%d-%m-%Y') - for row in result['Observation date']: - assert test_date > datetime.strptime(row, '%d-%m-%Y'), \ - 'Unexpected value: {}'.format(row) - - # Not in the help - no need to support it. - # result = alma.query(payload={'member_ous_id': 'uid://A001/X11a2/X11'}, - # science=True) - # assert len(result) == 1 - - def test_misc(self, alma): - # miscellaneous set of common tests - # - # alma.query_region(coordinate=orionkl_coords, radius=4 * u.arcmin, - # public=False, science=False) - - result = alma.query_object('M83', public=True, science=True) - assert len(result) > 0 - result = alma.query(payload={'pi_name': '*Bally*'}, public=False, - maxrec=10) - assert result - # Add overwrite=True in case the test previously died unexpectedly - # and left the temp file. - result.write('/tmp/alma-onerow.txt', format='ascii', overwrite=True) - for row in result: - assert 'Bally' in row['obs_creator_name'] - result = alma.query(payload=dict(project_code='2016.1.00165.S'), - public=False) - assert result - for row in result: - assert '2016.1.00165.S' == row['proposal_id'] - result = alma.query(payload=dict(project_code='2017.1.01355.L', - source_name_alma='G008.67'),) - assert result - for row in result: - assert '2017.1.01355.L' == row['proposal_id'] - assert 'Public' == row['data_rights'] - assert 'G008.67' in row['target_name'] - - result = alma.query_region( - coordinates.SkyCoord('5:35:14.461 -5:21:54.41', frame='fk5', - unit=(u.hour, u.deg)), radius=0.034 * u.deg) - assert result - - result = alma.query_region( - coordinates.SkyCoord('5:35:14.461 -5:21:54.41', frame='fk5', - unit=(u.hour, u.deg)), radius=0.034 * u.deg) - - result = alma.query(payload=dict(project_code='2012.*', - public_data=True)) - assert result - for row in result: - assert '2012.' in row['proposal_id'] - assert 'Public' == row['data_rights'] - - result = alma.query(payload={'frequency': '96 .. 96.5'}) - assert result - for row in result: - # TODO not sure how to test this - pass - - result = alma.query_object('M83', band_list=[3, 6, 8]) - assert result - for row in result: - assert row['band_list'] in ['3', '6', '8'] - - result = alma.query(payload={'pi_name': '*Ginsburg*', - 'band_list': '6'}) - assert result - for row in result: - assert '6' == row['band_list'] - assert 'ginsburg' in row['obs_creator_name'].lower() - - def test_user(self, alma): - # miscellaneous set of tests from current users - rslt = alma.query({'band_list': [6], 'project_code': '2012.1.*'}, - legacy_columns=True) - for row in rslt: - print(row['Project code']) - print(row['Member ous id']) - - # As of April 2017, these data are *MISSING FROM THE ARCHIVE*. - # This has been reported, as it is definitely a bug. - @pytest.mark.xfail - @pytest.mark.bigdata - def test_cycle1(self, tmp_path, alma): - # About 500 MB - alma.cache_location = tmp_path - target = 'NGC4945' - project_code = '2012.1.00912.S' - payload = {'project_code': project_code, - 'source_name_alma': target, } - result = alma.query(payload=payload) - assert len(result) == 1 - - # Need new Alma() instances each time - a1 = alma() - uid_url_table_mous = a1.get_data_info(result['Member ous id']) - a2 = alma() - uid_url_table_asdm = a2.get_data_info(result['Asdm uid']) - # I believe the fixes as part of #495 have resulted in removal of a - # redundancy in the table creation, so a 1-row table is OK here. - # A 2-row table may not be OK any more, but that's what it used to - # be... - assert len(uid_url_table_asdm) == 1 - assert len(uid_url_table_mous) >= 2 # now is len=3 (Nov 17, 2016) - - # URL should look like: - # https://almascience.eso.org/dataPortal/requests/anonymous/944120962/ALMA/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar - # https://almascience.eso.org/rh/requests/anonymous/944222597/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar - - small = uid_url_table_mous['size'] < 1 - - urls_to_download = uid_url_table_mous[small]['URL'] - - uri = urlparse(urls_to_download[0]) - assert uri.path == ('/dataPortal/requests/anonymous/{0}/ALMA/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar/2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar' # noqa - .format(a1._staging_log['staging_page_id'])) - - # THIS IS FAIL - # '2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar' - left = uid_url_table_mous['URL'][0].split("/")[-1] - assert left == '2012.1.00912.S_uid___A002_X5a9a13_X528_001_of_001.tar' - right = uid_url_table_mous['uid'][0] - assert right == 'uid://A002/X5a9a13/X528' - assert left[15:-15] == right.replace(":", "_").replace("/", "_") - data = alma.download_and_extract_files(urls_to_download) - - assert len(data) == 6 - - @pytest.mark.skipif("SKIP_SLOW") - @pytest.mark.xfail(reason="Not working anymore") - def test_cycle0(self, tmp_path, alma): - # About 20 MB - alma.cache_location = tmp_path - - target = 'NGC4945' - project_code = '2011.0.00121.S' - - payload = {'project_code': project_code, - 'source_name_alma': target, } - result = alma.query(payload=payload, legacy_columns=True) - assert len(result) == 1 - - alma1 = alma() - alma2 = alma() - uid_url_table_mous = alma1.get_data_info(result['Member ous id']) - uid_url_table_asdm = alma2.get_data_info(result['Asdm uid']) - assert len(uid_url_table_asdm) == 1 - assert len(uid_url_table_mous) == 32 - - assert uid_url_table_mous[0]['URL'].split("/")[-1] == '2011.0.00121.S_2012-08-16_001_of_002.tar' - assert uid_url_table_mous[0]['uid'] == 'uid://A002/X327408/X246' - - small = uid_url_table_mous['size'] < 1 - - urls_to_download = uid_url_table_mous[small]['URL'] - # Check that all URLs show up in the Cycle 0 table - for url in urls_to_download: - tarfile_name = os.path.split(url)[-1] - assert tarfile_name in alma._cycle0_tarfile_content['ID'] - - data = alma.download_and_extract_files(urls_to_download) - - # There are 10 small files, but only 8 unique - assert len(data) == 8 - - def test_keywords(self, tmp_path, alma): - - alma.help_tap() - result = alma.query_tap( - "select * from ivoa.obscore where s_resolution <0.1 and " - "science_keyword in ('High-mass star formation', 'Disks around " - "high-mass stars')") - - assert len(result) >= 72 - # TODO why is it failing - # assert 'Orion_Source_I' in result['target_name'] - - -@pytest.mark.remote_data -def test_project_metadata(alma): - metadata = alma.get_project_metadata('2013.1.00269.S') - assert metadata == ['Sgr B2, a high-mass molecular cloud in our Galaxy\'s ' - 'Central Molecular Zone, is the most extreme site of ' - 'ongoing star formation in the Local Group in terms ' - 'of its gas content, temperature, and velocity ' - 'dispersion. If any cloud in our galaxy is analogous ' - 'to the typical cloud at the universal peak of star ' - 'formation at z~2, this is it. We propose a 6\'x6\' ' - 'mosaic in the 3mm window targeting gas thermometer ' - 'lines, specifically CH3CN and its isotopologues. We ' - 'will measure the velocity dispersion and temperature ' - 'of the molecular gas on all scales (0.02 - 12 pc, ' - '0.5" - 5\') within the cloud, which will yield ' - 'resolved measurements of the Mach number and the ' - 'sonic scale of the gas. We will assess the relative ' - 'importance of stellar feedback and turbulence on the ' - 'star-forming gas, determining how extensive the ' - 'feedback effects are within an ultradense ' - 'environment. The observations will provide ' - 'constraints on the inputs to star formation theories ' - 'and will determine their applicability in extremely ' - 'dense, turbulent, and hot regions. Sgr B2 will be ' - 'used as a testing ground for star formation theories ' - 'in an environment analogous to high-z starburst ' - 'clouds in which they must be applied.'] - - -@pytest.mark.remote_data -def test_data_info_stacking(alma): - alma.get_data_info(['uid://A001/X13d5/X1d', 'uid://A002/X3216af/X31', - 'uid://A001/X12a3/X240']) - - -@pytest.mark.remote_data -@pytest.mark.skipif("SKIP_SLOW", reason="Huge data file download") -def test_big_download_regression(alma): - """ - Regression test for #2020/#2021 - this download fails if logging tries to - load the whole data file into memory. - """ - result = alma.query({'project_code': '2013.1.01365.S'}) - uids = np.unique(result['member_ous_uid']) - files = alma.get_data_info(uids) - - # we may need to change the cache dir for this to work on testing machines? - # savedir='/big/data/path/' - # Alma.cache_dir=savedir - - # this is a big one that fails - alma.download_files([files['access_url'][3]]) - - -@pytest.mark.remote_data -def test_download_html_file(alma, tmp_path): - alma.cache_location = tmp_path - result = alma.download_files( - ['https://{}/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'.format(download_hostname)]) - assert result - - -@pytest.mark.remote_data -def test_verify_html_file(alma, caplog, tmp_path): - alma.cache_location = tmp_path - - # download the file - result = alma.download_files( - ['https://{}/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'.format(download_hostname)]) - assert 'member.uid___A001_X1284_X1353.qa2_report.html' in result[0] - - result = alma.download_files( - ['https://{}/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'.format(download_hostname)], - verify_only=True) - assert 'member.uid___A001_X1284_X1353.qa2_report.html' in result[0] - local_filepath = Path(result[0]) - expected_file_length = local_filepath.stat().st_size - assert f"Found cached file {local_filepath} with expected size {expected_file_length}." in caplog.text - - # manipulate the file - with open(local_filepath, 'ab') as fh: - fh.write(b"Extra Text") - - caplog.clear() - new_file_length = expected_file_length + 10 - with pytest.warns(expected_warning=CorruptDataWarning, - match=(f"Found cached file {local_filepath} with size {new_file_length} > expected size " - f"{expected_file_length}. The download is likely corrupted.")): - result = alma.download_files( - ['https://{}/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'.format(download_hostname)], - verify_only=True) - assert 'member.uid___A001_X1284_X1353.qa2_report.html' in result[0] - - # manipulate the file: make it small - with open(local_filepath, 'wb') as fh: - fh.write(b"Empty Text") + def test_query(self, tmp_path, nrao): + nrao.cache_location = tmp_path - caplog.clear() - result = alma.download_files( - ['https://{}/dataPortal/member.uid___A001_X1284_X1353.qa2_report.html'.format(download_hostname)], - verify_only=True) - assert 'member.uid___A001_X1284_X1353.qa2_report.html' in result[0] - existing_file_length = 10 - assert (f"Found cached file {local_filepath} with size {existing_file_length} < expected size " - f"{expected_file_length}. The download should be continued.") in caplog.text + result = nrao.query_object('M83', maxrec=5)