Skip to content

Commit

Permalink
Add climate surface file for testing (#10)
Browse files Browse the repository at this point in the history
Created a climate surface file in GEMPAK to allow for testing of the
climate surface file decoder. This also made clear that surface file
types were not being correctly determined in every case. The detection
was modified and should no longer produce erroneous results. Removed
text data as a possible element for merged sounding data per GEMPAK
documentation.

Adds additional testing coverage.
  • Loading branch information
nawendt authored Mar 14, 2024
1 parent c0187cc commit fca70ea
Show file tree
Hide file tree
Showing 12 changed files with 124 additions and 46 deletions.
5 changes: 1 addition & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,4 @@ gempakIO uses three decoding classes (`GempakGrid`, `GempakSounding`, and `Gempa
* GEMPAK grids can be packed using GRIB2 compression. These files cannot be decoded yet, but plans are in place to add that functionality.
* GEMPAK grids packed with the NMC method cannot be read. I have not found a file to test in the wild so this may not get added.
* GEMPAK had conversion methods for floating point number representations (e.g., IBM, IEEE, etc.). This package assumes IEEE. As it is relatively unlikely that there are much data not using IEEE floats, there is no plan to add conversions from other formats unless the need arises.
* GEMPAK sounding and surface files can have their parameter data packed/compressed, but this is not currently implemented. GEMPAK grids do have basic GRIB packing by default, but GRIB2 packing is not implemented at this time.

### Things Implemented With Limited Testing
* Climate surface file type (see [GEMPAK Surface Library](https://github.com/Unidata/gempak/blob/master/gempak/txt/gemlib/sflib.txt) documentation). This is another situation where I have no files to test.
* GEMPAK sounding and surface files can have their parameter data packed/compressed, but this is not currently implemented. GEMPAK grids do have basic GRIB packing by default, but GRIB2 packing is not implemented at this time.
2 changes: 1 addition & 1 deletion src/gempakio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
from gempakio.decode.gempak import GempakGrid, GempakSounding, GempakSurface
from gempakio.encode.gempak import GridFile, SoundingFile, SurfaceFile

__version__ = '1.0.1'
__version__ = '1.0.2'
27 changes: 15 additions & 12 deletions src/gempakio/decode/gempak.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def __init__(self, file):
# Navigation Block
navb_size = self._buffer.read_int(4, self.endian, False)
if navb_size != NAVB_SIZE:
raise ValueError('Navigation block size does not match GEMPAK specification')
raise ValueError('Navigation block size does not match GEMPAK specification.')
else:
self.navigation_block = (
self._buffer.read_struct(NamedStruct(self.grid_nav_fmt,
Expand All @@ -238,7 +238,7 @@ def __init__(self, file):
anlb_size = self._buffer.read_int(4, self.endian, False)
anlb_start = self._buffer.set_mark()
if anlb_size != ANLB_SIZE:
raise ValueError('Analysis block size does not match GEMPAK specification')
raise ValueError('Analysis block size does not match GEMPAK specification.')
else:
anlb_type = self._buffer.read_struct(struct.Struct(self.prefmt + 'f'))[0]
self._buffer.jump_to(anlb_start)
Expand Down Expand Up @@ -318,9 +318,9 @@ def __init__(self, file):

def _swap_bytes(self, binary):
"""Swap between little and big endian."""
self.swaped_bytes = (struct.pack('@i', 1) != binary)
self.swapped_bytes = (struct.pack('@i', 1) != binary)

if self.swaped_bytes:
if self.swapped_bytes:
if sys.byteorder == 'little':
self.prefmt = '>'
self.endian = 'big'
Expand Down Expand Up @@ -1100,15 +1100,12 @@ def _unpack_merged(self, sndno):
fmt_code = {
DataTypes.real: 'f',
DataTypes.realpack: 'i',
DataTypes.character: 's',
}.get(part.data_type)

if fmt_code is None:
raise NotImplementedError(
f'No methods for data type {part.data_type}'
)
if fmt_code == 's':
lendat *= BYTES_PER_WORD

packed_buffer = (
self._buffer.read_struct(
Expand Down Expand Up @@ -2120,12 +2117,19 @@ def sfinfo(self):
return sorted(self._sfinfo)

def _get_surface_type(self):
"""Determine type of surface file."""
if len(self.row_headers) == 1:
"""Determine type of surface file.
Notes
-----
See GEMPAK SFLIB documentation for type definitions.
"""
if (len(self.row_headers) == 1
and 'DATE' in self.column_keys
and 'STID' in self.column_keys):
self.surface_type = 'ship'
elif 'DATE' in self.row_keys:
elif 'DATE' in self.row_keys and 'STID' in self.column_keys:
self.surface_type = 'standard'
elif 'DATE' in self.column_keys:
elif 'DATE' in self.column_keys and 'STID' in self.row_keys:
self.surface_type = 'climate'
else:
raise TypeError('Unknown surface data type')
Expand Down Expand Up @@ -2626,7 +2630,6 @@ def sfjson(self, station_id=None, station_number=None, date_time=None, state=Non
country = [c.upper() for c in country]

# Figure out which columns to extract from the file
# matched = self._sfinfo.copy()
matched = sorted(self._sfinfo)

# Do this now or the matched filter iterator will be consumed
Expand Down
Binary file added tests/data/big_endian.grd
Binary file not shown.
2 changes: 2 additions & 0 deletions tests/data/climate.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
STN,YYMMDD/HHMM,PMSL,ALTI,TMPC,DWPC,SKNT,DRCT,GUST,WNUM,CHC1,CHC2,CHC3,VSBY,P03D,P03I,MSUN,SNOW,WEQS,P24I,TDXC,TDNC,P03C,CTYL,CTYM,CTYH,P06I,T6XC,T6NC,CEIL,P01I,SNEW
LWC,210401/0000,1031.3,30.44,9.4,-9.4,12.0,340.0,-9999.0,-9999.0,1004.0,-9999.0,-9999.0,10.0,3005.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,0.5,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0
Binary file added tests/data/climate.sfc
Binary file not shown.
Binary file added tests/data/little_endian.grd
Binary file not shown.
Binary file added tests/data/merged_nopack.snd
Binary file not shown.
Binary file added tests/data/multilevel_multidate.grd
Binary file not shown.
28 changes: 28 additions & 0 deletions tests/test_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@
from gempakio import GempakGrid


@pytest.mark.parametrize('order', ['little', 'big'])
def test_byte_swap(order):
""""Test byte swapping."""
g = Path(__file__).parent / 'data' / f'{order}_endian.grd'

grid = GempakGrid(g).gdxarray()[0].squeeze()

reference = np.ones((113, 151), dtype='int32')

np.testing.assert_equal(grid, reference)


@pytest.mark.parametrize('grid_name', ['none', 'diff', 'dec', 'grib'])
def test_grid_loading(grid_name):
"""Test reading grids with various packing."""
Expand All @@ -27,6 +39,22 @@ def test_grid_loading(grid_name):
np.testing.assert_allclose(gio, gempak, rtol=1e-6, atol=0)


def test_multi_level_multi_time_access():
"""Test accessing data with multiple levels and times."""
g = Path(__file__).parent / 'data' / 'multilevel_multidate.grd'

grid = GempakGrid(g)

grid.gdxarray(
parameter='STPC',
date_time='202403040000',
coordinate='HGHT',
level=0,
date_time2='202403050000',
level2=1
)


@pytest.mark.parametrize('keyword,date_time', [
('FIRST', '201204141200'), ('LAST', '201204150000')
])
Expand Down
14 changes: 14 additions & 0 deletions tests/test_soundings.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,20 @@ def test_merged():
np.testing.assert_allclose(gdtar, ddtar, rtol=1e-10, atol=1e-2)


@pytest.mark.parametrize('access_type', ['STID', 'STNM'])
def test_sounding_access(access_type):
"""Test for proper sounding retrieval with multi-parameter filter."""
g = Path(__file__).parent / 'data' / 'merged_nopack.snd'
gso = GempakSounding(g)

if access_type == 'STID':
gso.snxarray(station_id='OUN', country='US', state='OK',
date_time='202101200000')
elif access_type == 'STNM':
gso.snxarray(station_number=72357, country='US', state='OK',
date_time='202101200000')


@pytest.mark.parametrize('text_type', ['txta', 'txtb', 'txtc', 'txpb'])
def test_sounding_text(text_type):
"""Test for proper decoding of coded message text."""
Expand Down
92 changes: 63 additions & 29 deletions tests/test_surface.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@
from gempakio import GempakSurface


def test_standard_surface():
"""Test to read a standard surface file."""
skip = ['text', 'spcl']

g = Path(__file__).parent / 'data' / 'lwc_std_sfc.sfc'
d = Path(__file__).parent / 'data' / 'lwc_std_sfc.csv'
def test_climate_surface():
"""Test to read a climate surface file."""
g = Path(__file__).parent / 'data' / 'climate.sfc'
d = Path(__file__).parent / 'data' / 'climate.csv'

gsf = GempakSurface(g)
gstns = gsf.sfjson()
Expand All @@ -35,15 +33,29 @@ def test_standard_surface():
gemsfc = gempak.loc[idx_key, :]

for param, val in stn['values'].items():
if param not in skip:
assert val == pytest.approx(gemsfc[param.upper()])
assert val == pytest.approx(gemsfc[param.upper()])


def test_multiple_special_observations():
"""Test text decoding of surface file with multiple special reports in single time."""
g = Path(__file__).parent / 'data' / 'msn_std_sfc.sfc'
d = Path(__file__).parent / 'data' / 'msn_std_sfc.csv'

gsf = GempakSurface(g)
# Report text that is too long will end up truncated in surface files
nearest = gsf.nearest_time('202109071605', station_id='MSN', include_special=True)
text = nearest[0]['values']['spcl']
date_time = nearest[0]['properties']['date_time']

gempak = pd.read_csv(d)
gem_text = gempak.loc[:, 'SPCL_TRUNC'][0]

assert date_time == datetime(2021, 9, 7, 16, 4)
assert text == gem_text


def test_ship_surface():
"""Test to read a ship surface file."""
def dtparse(string):
return datetime.strptime(string, '%y%m%d/%H%M')

skip = ['text', 'spcl']

g = Path(__file__).parent / 'data' / 'ship_sfc.sfc'
Expand Down Expand Up @@ -73,6 +85,46 @@ def dtparse(string):
np.testing.assert_allclose(decoded_vals, actual_vals)


def test_standard_surface():
"""Test to read a standard surface file."""
skip = ['text', 'spcl']

g = Path(__file__).parent / 'data' / 'lwc_std_sfc.sfc'
d = Path(__file__).parent / 'data' / 'lwc_std_sfc.csv'

gsf = GempakSurface(g)
gstns = gsf.sfjson()

gempak = pd.read_csv(d, index_col=['STN', 'YYMMDD/HHMM'],
parse_dates=['YYMMDD/HHMM'],
date_format={'YYMMDD/HHMM': '%y%m%d/%H%M'})
if not gempak.index.is_monotonic_increasing:
gempak.sort_index(inplace=True)

for stn in gstns:
idx_key = (stn['properties']['station_id'],
stn['properties']['date_time'])
gemsfc = gempak.loc[idx_key, :]

for param, val in stn['values'].items():
if param not in skip:
assert val == pytest.approx(gemsfc[param.upper()])


@pytest.mark.parametrize('access_type', ['STID', 'STNM'])
def test_surface_access(access_type):
"""Test for proper surface retrieval with multi-parameter filter."""
g = Path(__file__).parent / 'data' / 'msn_std_sfc.sfc'
gsf = GempakSurface(g)

if access_type == 'STID':
gsf.sfjson(station_id='MSN', country='US', state='WI',
date_time='202109070000')
elif access_type == 'STNM':
gsf.sfjson(station_number=726410, country='US', state='WI',
date_time='202109070000')


@pytest.mark.parametrize('text_type,date_time,speci', [
('text', '202109070000', False), ('spcl', '202109071600', True)
])
Expand All @@ -92,24 +144,6 @@ def test_surface_text(text_type, date_time, speci):
assert text == gem_text


def test_multiple_special_observations():
"""Test text decoding of surface file with multiple special reports in single time."""
g = Path(__file__).parent / 'data' / 'msn_std_sfc.sfc'
d = Path(__file__).parent / 'data' / 'msn_std_sfc.csv'

gsf = GempakSurface(g)
# Report text that is too long will end up truncated in surface files
nearest = gsf.nearest_time('202109071605', station_id='MSN', include_special=True)
text = nearest[0]['values']['spcl']
date_time = nearest[0]['properties']['date_time']

gempak = pd.read_csv(d)
gem_text = gempak.loc[:, 'SPCL_TRUNC'][0]

assert date_time == datetime(2021, 9, 7, 16, 4)
assert text == gem_text


@pytest.mark.parametrize('keyword,date_time', [
('FIRST', '202109070000'), ('LAST', '202109071604')
])
Expand Down

0 comments on commit fca70ea

Please sign in to comment.