Skip to content

Commit

Permalink
Issue/85/read keys (#86)
Browse files Browse the repository at this point in the history
* Added stuff to only read some keys

* added quick test to test check_deps
  • Loading branch information
eacharles authored Nov 8, 2023
1 parent 70659a6 commit f5fcae2
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 12 deletions.
56 changes: 44 additions & 12 deletions src/tables_io/ioUtils.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ def writeApTablesToFits(tables, filepath, **kwargs):
hdu_list.writeto(filepath, **kwargs)


def readFitsToApTables(filepath):
def readFitsToApTables(filepath, keys=None):
"""
Reads `astropy.table.Table` objects from a FITS file.
Expand All @@ -485,6 +485,9 @@ def readFitsToApTables(filepath):
filepath: `str`
Path to input file
keys : `list` or `None`
Which tables to read
Returns
-------
tables : `OrderedDict` of `astropy.table.Table`
Expand All @@ -493,6 +496,9 @@ def readFitsToApTables(filepath):
fin = fits.open(filepath)
tables = OrderedDict()
for hdu in fin[1:]:
if keys is not None:
if hdu.name.lower() not in keys:
continue
tables[hdu.name.lower()] = apTable.Table.read(filepath, hdu=hdu.name)
return tables

Expand Down Expand Up @@ -523,7 +529,7 @@ def writeRecarraysToFits(recarrays, filepath, **kwargs):
hdu_list.writeto(filepath, **kwargs)


def readFitsToRecarrays(filepath):
def readFitsToRecarrays(filepath, keys=None):
"""
Reads `np.recarray` objects from a FITS file.
Expand All @@ -532,6 +538,9 @@ def readFitsToRecarrays(filepath):
filepath: `str`
Path to input file
keys : `list` or `None`
Which tables to read
Returns
-------
tables : `OrderedDict` of `np.recarray`
Expand All @@ -540,6 +549,8 @@ def readFitsToRecarrays(filepath):
fin = fits.open(filepath)
tables = OrderedDict()
for hdu in fin[1:]:
if keys is not None and hdu.name.lower() not in keys:
continue
tables[hdu.name.lower()] = hdu.data
return tables

Expand All @@ -565,7 +576,7 @@ def writeApTablesToHdf5(tables, filepath, **kwargs):
v.write(filepath, path=k, append=True, format="hdf5", **kwargs)


def readHdf5ToApTables(filepath):
def readHdf5ToApTables(filepath, keys=None):
"""
Reads `astropy.table.Table` objects from an hdf5 file.
Expand All @@ -574,6 +585,9 @@ def readHdf5ToApTables(filepath):
filepath: `str`
Path to input file
keys : `list` or `None`
Which tables to read
Returns
-------
tables : `OrderedDict` of `astropy.table.Table`
Expand All @@ -582,6 +596,8 @@ def readHdf5ToApTables(filepath):
fin = h5py.File(filepath)
tables = OrderedDict()
for k in fin.keys():
if keys is not None and k not in keys:
continue
tables[k] = apTable.Table.read(filepath, path=k, format="hdf5")
return tables

Expand Down Expand Up @@ -689,7 +705,7 @@ def writeDictsToHdf5(odicts, filepath):
writeDictToHdf5(val, filepath, key)


def readHdf5ToDicts(filepath):
def readHdf5ToDicts(filepath, keys=None):
"""
Reads `numpy.array` objects from an hdf5 file.
Expand All @@ -698,13 +714,21 @@ def readHdf5ToDicts(filepath):
filepath: `str`
Path to input file
keys : `list` or `None`
Which tables to read
Returns
-------
dicts : `OrderedDict`, (`str`, `OrderedDict`, (`str`, `numpy.array`) )
The data
"""
fin = h5py.File(filepath)
return OrderedDict([(key, readHdf5GroupToDict(val)) for key, val in fin.items()])
l_out = []
for key, val in fin.items():
if keys is not None and key not in keys:
continue
l_out.append((key, readHdf5GroupToDict(val)))
return OrderedDict(l_out)


### II C. Reading and Writing `pandas.DataFrame` to/from `hdf5`
Expand All @@ -729,14 +753,17 @@ def readHdf5ToDataFrame(filepath, key=None):
return pd.read_hdf(filepath, key)


def readH5ToDataFrames(filepath):
def readH5ToDataFrames(filepath, keys=None):
"""Open an h5 file and and return a dictionary of `pandas.DataFrame`
Parameters
----------
filepath: `str`
Path to input file
keys : `list` or `None`
Which tables to read
Returns
-------
tab : `OrderedDict` (`str` : `pandas.DataFrame`)
Expand All @@ -748,7 +775,12 @@ def readH5ToDataFrames(filepath):
They have a different structure than 'hdf5' files written with `h5py` or `astropy.table`
"""
fin = h5py.File(filepath)
return OrderedDict([(key, readHdf5ToDataFrame(filepath, key=key)) for key in fin.keys()])
l_out = []
for key in fin.keys():
if keys is not None and key not in keys:
continue
l_out.append((key, readHdf5ToDataFrame(filepath, key=key)))
return OrderedDict(l_out)


def writeDataFramesToH5(dataFrames, filepath):
Expand Down Expand Up @@ -985,15 +1017,15 @@ def readNative(filepath, fmt=None, keys=None, allow_missing_keys=False, **kwargs
"""
fType = fileType(filepath, fmt)
if fType == ASTROPY_FITS:
return readFitsToApTables(filepath)
return readFitsToApTables(filepath, keys=keys)
if fType == ASTROPY_HDF5:
return readHdf5ToApTables(filepath)
return readHdf5ToApTables(filepath, keys=keys)
if fType == NUMPY_HDF5:
return readHdf5ToDicts(filepath)
return readHdf5ToDicts(filepath, keys=keys)
if fType == NUMPY_FITS:
return readFitsToRecarrays(filepath)
return readFitsToRecarrays(filepath, keys=keys)
if fType == PANDAS_HDF5:
return readH5ToDataFrames(filepath)
return readH5ToDataFrames(filepath, keys=keys)
if fType == PANDAS_PARQUET:
basepath = os.path.splitext(filepath)[0]
return readPqToDataFrames(basepath, keys, allow_missing_keys, **kwargs)
Expand Down
5 changes: 5 additions & 0 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def testFitsLoopback(self):
"""Test writing / reading to FITS"""
self._do_loopback(types.AP_TABLE, "test_out", "fits")
self._do_loopback_single(types.AP_TABLE, "test_out_single", "fits")
self._do_loopback_with_keys(types.AP_TABLE, "test_out_lookback", "fits", ["md"])
self._do_iterator("test_out_single.fits", types.AP_TABLE, True)
self._do_open("test_out_single.fits")
self._do_open("test_out.fits")
Expand All @@ -144,6 +145,7 @@ def testRecarrayLoopback(self):
"""Test writing / reading to FITS"""
self._do_loopback(types.NUMPY_RECARRAY, "test_out", "fit")
self._do_loopback_single(types.NUMPY_RECARRAY, "test_out_single", "fit")
self._do_loopback_with_keys(types.NUMPY_RECARRAY, "test_out_lookback", "fit", ["md"])
self._do_iterator("test_out_single.fit", types.NUMPY_RECARRAY, True)
self._do_open("test_out_single.fit")
self._do_open("test_out.fit")
Expand All @@ -152,6 +154,7 @@ def testHf5Loopback(self):
"""Test writing / reading astropy tables to HDF5"""
self._do_loopback(types.AP_TABLE, "test_out", "hf5")
self._do_loopback_single(types.AP_TABLE, "test_out_single", "hf5")
self._do_loopback_with_keys(types.AP_TABLE, "test_out_lookback", "hf5", ["md"])
self._do_iterator("test_out_single.hf5", types.AP_TABLE, True, chunk_size=50)
self._do_open("test_out_single.hf5")
self._do_open("test_out.hf5")
Expand All @@ -160,6 +163,7 @@ def testHdf5Loopback(self):
"""Test writing / reading numpy arrays to HDF5"""
self._do_loopback(types.NUMPY_DICT, "test_out", "hdf5")
self._do_loopback_single(types.NUMPY_DICT, "test_out_single", "hdf5")
self._do_loopback_with_keys(types.NUMPY_DICT, "test_out_lookback", "hdf5", ["md"])
self._do_iterator("test_out_single.hdf5", types.NUMPY_DICT, False, chunk_size=50)
self._do_open("test_out_single.hdf5")
self._do_open("test_out.hdf5")
Expand All @@ -172,6 +176,7 @@ def testH5Loopback(self):
"""Test writing / reading pandas dataframes to HDF5"""
self._do_loopback(types.PD_DATAFRAME, "test_out", "h5")
self._do_loopback_single(types.PD_DATAFRAME, "test_out_single", "h5")
self._do_loopback_with_keys(types.PD_DATAFRAME, "test_out_lookback", "h5", ["md"])
self._do_iterator("test_out_single.h5", types.PD_DATAFRAME, True, chunk_size=50)
self._do_open("test_out_single.h5")
self._do_open("test_out.h5")
Expand Down
5 changes: 5 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
from tables_io.testUtils import check_deps


def test_check_deps():
bad_module = lazyImport('this_does_not_exist')
assert not check_deps([bad_module])


def test_array_length():
"""Test the pandas reading"""
assert arrayUtils.arrayLength(4) == 0
Expand Down

0 comments on commit f5fcae2

Please sign in to comment.