-
-
Notifications
You must be signed in to change notification settings - Fork 419
Gaia: change the signature of the method load_data #3014
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
bd8f332
a304b13
54bd20e
040b1dc
80275c2
53233b8
095483a
181e858
71ad0ae
eddf96a
72617e6
94b8099
536f39c
45b0461
0f8b0cf
6aa4f6b
6845bc8
12efad1
ea2c414
154d5e7
510fe02
db87123
443f422
220b885
75b7124
1b8aaed
f96d621
775e8e0
e24b67c
58f8692
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -13,13 +13,12 @@ | |||||
Created on 30 jun. 2016 | ||||||
Modified on 18 Ene. 2022 by mhsarmiento | ||||||
""" | ||||||
import datetime | ||||||
import json | ||||||
import os | ||||||
import shutil | ||||||
import zipfile | ||||||
from collections.abc import Iterable | ||||||
from datetime import datetime, timezone | ||||||
from pathlib import Path | ||||||
|
||||||
from astropy import units | ||||||
from astropy import units as u | ||||||
|
@@ -28,6 +27,7 @@ | |||||
from astropy.io import votable | ||||||
from astropy.table import Table | ||||||
from astropy.units import Quantity | ||||||
from astropy.utils.decorators import deprecated_renamed_argument | ||||||
from requests import HTTPError | ||||||
|
||||||
from astroquery import log | ||||||
|
@@ -168,9 +168,11 @@ | |||||
except HTTPError: | ||||||
log.error("Error logging out data server") | ||||||
|
||||||
@deprecated_renamed_argument("output_file", None, since="0.4.8") | ||||||
def load_data(self, ids, *, data_release=None, data_structure='INDIVIDUAL', retrieval_type="ALL", | ||||||
linking_parameter='SOURCE_ID', valid_data=False, band=None, avoid_datatype_check=False, | ||||||
format="votable", output_file=None, overwrite_output_file=False, verbose=False): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removing a parameter from the public APIs should be done through a deprecation period. But also, I do wonder why is this necessary, to remove the ability from the users to define a filename/location that better suits them? So, my suggestion would be to change the default None to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is still a problem, we should not break people's code without noticing them first. |
||||||
format="votable", dump_to_file=False, overwrite_output_file=False, verbose=False, | ||||||
output_file=None): | ||||||
"""Loads the specified table | ||||||
TAP+ only | ||||||
|
||||||
|
@@ -218,44 +220,53 @@ | |||||
By default, this value will be set to False. If it is set to 'true' | ||||||
the Datalink items tags will not be checked. | ||||||
format : str, optional, default 'votable' | ||||||
loading format. Other available formats are 'csv', 'ecsv','votable_plain' and 'fits' | ||||||
output_file : string or pathlib.PosixPath, optional, default None | ||||||
file where the results are saved. | ||||||
If it is not provided, the http response contents are returned. | ||||||
loading format. Other available formats are 'csv', 'ecsv','votable_plain', 'json' and 'fits' | ||||||
dump_to_file: boolean, optional, default False. | ||||||
If it is true, a compressed directory named "datalink_output_<time_stamp>.zip" with all the DataLink | ||||||
files is made in the current working directory. The <time_stamp> format follows the ISO 8601 standard: | ||||||
"yyyymmddThhmmss". | ||||||
overwrite_output_file : boolean, optional, default False | ||||||
To overwrite the output_file if it already exists. | ||||||
To overwrite the output file ("datalink_output.zip") if it already exists. | ||||||
verbose : bool, optional, default 'False' | ||||||
flag to display information about the process | ||||||
|
||||||
Returns | ||||||
------- | ||||||
A dictionary where the keys are the file names and its value is a list of astropy.table.table.Table objects | ||||||
""" | ||||||
now = datetime.now(timezone.utc) | ||||||
now_formatted = now.strftime("%Y%m%d_%H%M%S") | ||||||
temp_dirname = "temp_" + now_formatted | ||||||
downloadname_formated = "download_" + now_formatted | ||||||
|
||||||
output_file_specified = False | ||||||
if output_file is None: | ||||||
|
||||||
now = datetime.datetime.now(datetime.timezone.utc) | ||||||
if not dump_to_file: | ||||||
now_formatted = now.strftime("%Y%m%d_%H%M%S") | ||||||
temp_dirname = "temp_" + now_formatted | ||||||
downloadname_formated = "download_" + now_formatted | ||||||
output_file = os.path.join(os.getcwd(), temp_dirname, downloadname_formated) | ||||||
|
||||||
else: | ||||||
output_file = 'datalink_output_' + now.strftime("%Y%m%dT%H%M%S") + '.zip' | ||||||
output_file_specified = True | ||||||
|
||||||
if isinstance(output_file, str): | ||||||
if not output_file.lower().endswith('.zip'): | ||||||
output_file = output_file + '.zip' | ||||||
elif isinstance(output_file, Path): | ||||||
if not output_file.suffix.endswith('.zip'): | ||||||
output_file.with_suffix('.zip') | ||||||
|
||||||
output_file = os.path.abspath(output_file) | ||||||
log.info(f"DataLink products will be stored in the {output_file} file") | ||||||
|
||||||
if not overwrite_output_file and os.path.exists(output_file): | ||||||
raise ValueError(f"{output_file} file already exists. Please use overwrite_output_file='True' to " | ||||||
f"overwrite output file.") | ||||||
|
||||||
path = os.path.dirname(output_file) | ||||||
|
||||||
log.debug(f"Directory where the data will be saved: {path}") | ||||||
|
||||||
if path != '': | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This maybe cleaner as
Suggested change
|
||||||
if not os.path.isdir(path): | ||||||
try: | ||||||
os.mkdir(path) | ||||||
except FileExistsError: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is the |
||||||
log.warn("Path %s already exist" % path) | ||||||
except OSError: | ||||||
log.error("Creation of the directory %s failed" % path) | ||||||
|
||||||
if avoid_datatype_check is False: | ||||||
# we need to check params | ||||||
rt = str(retrieval_type).upper() | ||||||
|
@@ -298,14 +309,7 @@ | |||||
if linking_parameter != 'SOURCE_ID': | ||||||
params_dict['LINKING_PARAMETER'] = linking_parameter | ||||||
|
||||||
if path != '': | ||||||
try: | ||||||
os.mkdir(path) | ||||||
except FileExistsError: | ||||||
log.error("Path %s already exist" % path) | ||||||
except OSError: | ||||||
log.error("Creation of the directory %s failed" % path) | ||||||
|
||||||
files = dict() | ||||||
try: | ||||||
self.__gaiadata.load_data(params_dict=params_dict, output_file=output_file, verbose=verbose) | ||||||
files = Gaia.__get_data_files(output_file=output_file, path=path) | ||||||
|
@@ -314,6 +318,9 @@ | |||||
finally: | ||||||
if not output_file_specified: | ||||||
shutil.rmtree(path) | ||||||
else: | ||||||
for file in files.keys(): | ||||||
os.remove(os.path.join(os.getcwd(), path, file)) | ||||||
|
||||||
if verbose: | ||||||
if output_file_specified: | ||||||
|
@@ -329,18 +336,21 @@ | |||||
@staticmethod | ||||||
def __get_data_files(output_file, path): | ||||||
files = {} | ||||||
if zipfile.is_zipfile(output_file): | ||||||
with zipfile.ZipFile(output_file, 'r') as zip_ref: | ||||||
zip_ref.extractall(os.path.dirname(output_file)) | ||||||
extracted_files = [] | ||||||
|
||||||
with zipfile.ZipFile(output_file, "r") as zip_ref: | ||||||
extracted_files.extend(zip_ref.namelist()) | ||||||
zip_ref.extractall(os.path.dirname(output_file)) | ||||||
|
||||||
# r=root, d=directories, f = files | ||||||
for r, d, f in os.walk(path): | ||||||
for file in f: | ||||||
if file.lower().endswith(('.fits', '.xml', '.csv', '.ecsv')): | ||||||
if file in extracted_files: | ||||||
files[file] = os.path.join(r, file) | ||||||
|
||||||
for key, value in files.items(): | ||||||
if '.fits' in key: | ||||||
|
||||||
if key.endswith('.fits'): | ||||||
tables = [] | ||||||
with fits.open(value) as hduList: | ||||||
num_hdus = len(hduList) | ||||||
|
@@ -349,19 +359,20 @@ | |||||
Gaia.correct_table_units(table) | ||||||
tables.append(table) | ||||||
files[key] = tables | ||||||
elif '.xml' in key: | ||||||
|
||||||
elif key.endswith('.xml'): | ||||||
tables = [] | ||||||
for table in votable.parse(value).iter_tables(): | ||||||
tables.append(table) | ||||||
files[key] = tables | ||||||
|
||||||
elif '.csv' in key: | ||||||
elif key.endswith('.csv'): | ||||||
tables = [] | ||||||
table = Table.read(value, format='ascii.csv', fast_reader=False) | ||||||
tables.append(table) | ||||||
files[key] = tables | ||||||
|
||||||
elif '.json' in key: | ||||||
elif key.endswith('.json'): | ||||||
tables = [] | ||||||
with open(value) as f: | ||||||
data = json.load(f) | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is at the wrong location, but I'm cleaning up the changelog for release time anyway, so it doesn't matter here.