Add climate surface file for testing (#10)

Created a climate surface file in GEMPAK to allow for testing of the climate surface file decoder. This also made clear that surface file types were not being correctly determined in every case. The detection was modified and should no longer produce erroneous results. Removed text data as a possible element for merged sounding data per GEMPAK documentation. Adds additional testing coverage.
nawendt · Mar 14, 2024 · fca70ea · fca70ea
1 parent c0187cc
commit fca70ea
Show file tree

Hide file tree

Showing 12 changed files with 124 additions and 46 deletions.
diff --git a/README.md b/README.md
@@ -9,7 +9,4 @@ gempakIO uses three decoding classes (`GempakGrid`, `GempakSounding`, and `Gempa
 *  GEMPAK grids can be packed using GRIB2 compression. These files cannot be decoded yet, but plans are in place to add that functionality.
 *  GEMPAK grids packed with the NMC method cannot be read. I have not found a file to test in the wild so this may not get added.
 *  GEMPAK had conversion methods for floating point number representations (e.g., IBM, IEEE, etc.). This package assumes IEEE. As it is relatively unlikely that there are much data not using IEEE floats, there is no plan to add conversions from other formats unless the need arises.
-*  GEMPAK sounding and surface files can have their parameter data packed/compressed, but this is not currently implemented. GEMPAK grids do have basic GRIB packing by default, but GRIB2 packing is not implemented at this time.
-
-### Things Implemented With Limited Testing
-*  Climate surface file type (see [GEMPAK Surface Library](https://github.com/Unidata/gempak/blob/master/gempak/txt/gemlib/sflib.txt) documentation). This is another situation where I have no files to test.
+*  GEMPAK sounding and surface files can have their parameter data packed/compressed, but this is not currently implemented. GEMPAK grids do have basic GRIB packing by default, but GRIB2 packing is not implemented at this time.
diff --git a/src/gempakio/__init__.py b/src/gempakio/__init__.py
@@ -6,4 +6,4 @@
 from gempakio.decode.gempak import GempakGrid, GempakSounding, GempakSurface
 from gempakio.encode.gempak import GridFile, SoundingFile, SurfaceFile
 
-__version__ = '1.0.1'
+__version__ = '1.0.2'
diff --git a/src/gempakio/decode/gempak.py b/src/gempakio/decode/gempak.py
@@ -224,7 +224,7 @@ def __init__(self, file):
             # Navigation Block
             navb_size = self._buffer.read_int(4, self.endian, False)
             if navb_size != NAVB_SIZE:
-                raise ValueError('Navigation block size does not match GEMPAK specification')
+                raise ValueError('Navigation block size does not match GEMPAK specification.')
             else:
                 self.navigation_block = (
                     self._buffer.read_struct(NamedStruct(self.grid_nav_fmt,
@@ -238,7 +238,7 @@ def __init__(self, file):
             anlb_size = self._buffer.read_int(4, self.endian, False)
             anlb_start = self._buffer.set_mark()
             if anlb_size != ANLB_SIZE:
-                raise ValueError('Analysis block size does not match GEMPAK specification')
+                raise ValueError('Analysis block size does not match GEMPAK specification.')
             else:
                 anlb_type = self._buffer.read_struct(struct.Struct(self.prefmt + 'f'))[0]
                 self._buffer.jump_to(anlb_start)
@@ -318,9 +318,9 @@ def __init__(self, file):
 
     def _swap_bytes(self, binary):
         """Swap between little and big endian."""
-        self.swaped_bytes = (struct.pack('@i', 1) != binary)
+        self.swapped_bytes = (struct.pack('@i', 1) != binary)
 
-        if self.swaped_bytes:
+        if self.swapped_bytes:
             if sys.byteorder == 'little':
                 self.prefmt = '>'
                 self.endian = 'big'
@@ -1100,15 +1100,12 @@ def _unpack_merged(self, sndno):
                 fmt_code = {
                     DataTypes.real: 'f',
                     DataTypes.realpack: 'i',
-                    DataTypes.character: 's',
                 }.get(part.data_type)
 
                 if fmt_code is None:
                     raise NotImplementedError(
                         f'No methods for data type {part.data_type}'
                     )
-                if fmt_code == 's':
-                    lendat *= BYTES_PER_WORD
 
                 packed_buffer = (
                     self._buffer.read_struct(
@@ -2120,12 +2117,19 @@ def sfinfo(self):
         return sorted(self._sfinfo)
 
     def _get_surface_type(self):
-        """Determine type of surface file."""
-        if len(self.row_headers) == 1:
+        """Determine type of surface file.
+
+        Notes
+        -----
+        See GEMPAK SFLIB documentation for type definitions.
+        """
+        if (len(self.row_headers) == 1
+            and 'DATE' in self.column_keys
+            and 'STID' in self.column_keys):
             self.surface_type = 'ship'
-        elif 'DATE' in self.row_keys:
+        elif 'DATE' in self.row_keys and 'STID' in self.column_keys:
             self.surface_type = 'standard'
-        elif 'DATE' in self.column_keys:
+        elif 'DATE' in self.column_keys and 'STID' in self.row_keys:
             self.surface_type = 'climate'
         else:
             raise TypeError('Unknown surface data type')
@@ -2626,7 +2630,6 @@ def sfjson(self, station_id=None, station_number=None, date_time=None, state=Non
             country = [c.upper() for c in country]
 
         # Figure out which columns to extract from the file
-        # matched = self._sfinfo.copy()
         matched = sorted(self._sfinfo)
 
         # Do this now or the matched filter iterator will be consumed

diff --git a/tests/data/big_endian.grd b/tests/data/big_endian.grd
diff --git a/tests/data/climate.csv b/tests/data/climate.csv
@@ -0,0 +1,2 @@
+STN,YYMMDD/HHMM,PMSL,ALTI,TMPC,DWPC,SKNT,DRCT,GUST,WNUM,CHC1,CHC2,CHC3,VSBY,P03D,P03I,MSUN,SNOW,WEQS,P24I,TDXC,TDNC,P03C,CTYL,CTYM,CTYH,P06I,T6XC,T6NC,CEIL,P01I,SNEW
+LWC,210401/0000,1031.3,30.44,9.4,-9.4,12.0,340.0,-9999.0,-9999.0,1004.0,-9999.0,-9999.0,10.0,3005.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,0.5,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0,-9999.0
diff --git a/tests/data/climate.sfc b/tests/data/climate.sfc
diff --git a/tests/data/little_endian.grd b/tests/data/little_endian.grd
diff --git a/tests/data/merged_nopack.snd b/tests/data/merged_nopack.snd
diff --git a/tests/data/multilevel_multidate.grd b/tests/data/multilevel_multidate.grd
diff --git a/tests/test_grids.py b/tests/test_grids.py
@@ -13,6 +13,18 @@
 from gempakio import GempakGrid
 
 
+@pytest.mark.parametrize('order', ['little', 'big'])
+def test_byte_swap(order):
+    """"Test byte swapping."""
+    g = Path(__file__).parent / 'data' / f'{order}_endian.grd'
+
+    grid = GempakGrid(g).gdxarray()[0].squeeze()
+
+    reference = np.ones((113, 151), dtype='int32')
+
+    np.testing.assert_equal(grid, reference)
+
+
 @pytest.mark.parametrize('grid_name', ['none', 'diff', 'dec', 'grib'])
 def test_grid_loading(grid_name):
     """Test reading grids with various packing."""
@@ -27,6 +39,22 @@ def test_grid_loading(grid_name):
     np.testing.assert_allclose(gio, gempak, rtol=1e-6, atol=0)
 
 
+def test_multi_level_multi_time_access():
+    """Test accessing data with multiple levels and times."""
+    g = Path(__file__).parent / 'data' / 'multilevel_multidate.grd'
+
+    grid = GempakGrid(g)
+
+    grid.gdxarray(
+        parameter='STPC',
+        date_time='202403040000',
+        coordinate='HGHT',
+        level=0,
+        date_time2='202403050000',
+        level2=1
+    )
+
+
 @pytest.mark.parametrize('keyword,date_time', [
     ('FIRST', '201204141200'), ('LAST', '201204150000')
 ])

diff --git a/tests/test_soundings.py b/tests/test_soundings.py
@@ -75,6 +75,20 @@ def test_merged():
     np.testing.assert_allclose(gdtar, ddtar, rtol=1e-10, atol=1e-2)
 
 
+@pytest.mark.parametrize('access_type', ['STID', 'STNM'])
+def test_sounding_access(access_type):
+    """Test for proper sounding retrieval with multi-parameter filter."""
+    g = Path(__file__).parent / 'data' / 'merged_nopack.snd'
+    gso = GempakSounding(g)
+
+    if access_type == 'STID':
+        gso.snxarray(station_id='OUN', country='US', state='OK',
+                     date_time='202101200000')
+    elif access_type == 'STNM':
+        gso.snxarray(station_number=72357, country='US', state='OK',
+                     date_time='202101200000')
+
+
 @pytest.mark.parametrize('text_type', ['txta', 'txtb', 'txtc', 'txpb'])
 def test_sounding_text(text_type):
     """Test for proper decoding of coded message text."""

diff --git a/tests/test_surface.py b/tests/test_surface.py
@@ -13,12 +13,10 @@
 from gempakio import GempakSurface
 
 
-def test_standard_surface():
-    """Test to read a standard surface file."""
-    skip = ['text', 'spcl']
-
-    g = Path(__file__).parent / 'data' / 'lwc_std_sfc.sfc'
-    d = Path(__file__).parent / 'data' / 'lwc_std_sfc.csv'
+def test_climate_surface():
+    """Test to read a climate surface file."""
+    g = Path(__file__).parent / 'data' / 'climate.sfc'
+    d = Path(__file__).parent / 'data' / 'climate.csv'
 
     gsf = GempakSurface(g)
     gstns = gsf.sfjson()
@@ -35,15 +33,29 @@ def test_standard_surface():
         gemsfc = gempak.loc[idx_key, :]
 
         for param, val in stn['values'].items():
-            if param not in skip:
-                assert val == pytest.approx(gemsfc[param.upper()])
+            assert val == pytest.approx(gemsfc[param.upper()])
+
+
+def test_multiple_special_observations():
+    """Test text decoding of surface file with multiple special reports in single time."""
+    g = Path(__file__).parent / 'data' / 'msn_std_sfc.sfc'
+    d = Path(__file__).parent / 'data' / 'msn_std_sfc.csv'
+
+    gsf = GempakSurface(g)
+    #  Report text that is too long will end up truncated in surface files
+    nearest = gsf.nearest_time('202109071605', station_id='MSN', include_special=True)
+    text = nearest[0]['values']['spcl']
+    date_time = nearest[0]['properties']['date_time']
+
+    gempak = pd.read_csv(d)
+    gem_text = gempak.loc[:, 'SPCL_TRUNC'][0]
+
+    assert date_time == datetime(2021, 9, 7, 16, 4)
+    assert text == gem_text
 
 
 def test_ship_surface():
     """Test to read a ship surface file."""
-    def dtparse(string):
-        return datetime.strptime(string, '%y%m%d/%H%M')
-
     skip = ['text', 'spcl']
 
     g = Path(__file__).parent / 'data' / 'ship_sfc.sfc'
@@ -73,6 +85,46 @@ def dtparse(string):
                 np.testing.assert_allclose(decoded_vals, actual_vals)
 
 
+def test_standard_surface():
+    """Test to read a standard surface file."""
+    skip = ['text', 'spcl']
+
+    g = Path(__file__).parent / 'data' / 'lwc_std_sfc.sfc'
+    d = Path(__file__).parent / 'data' / 'lwc_std_sfc.csv'
+
+    gsf = GempakSurface(g)
+    gstns = gsf.sfjson()
+
+    gempak = pd.read_csv(d, index_col=['STN', 'YYMMDD/HHMM'],
+                         parse_dates=['YYMMDD/HHMM'],
+                         date_format={'YYMMDD/HHMM': '%y%m%d/%H%M'})
+    if not gempak.index.is_monotonic_increasing:
+        gempak.sort_index(inplace=True)
+
+    for stn in gstns:
+        idx_key = (stn['properties']['station_id'],
+                   stn['properties']['date_time'])
+        gemsfc = gempak.loc[idx_key, :]
+
+        for param, val in stn['values'].items():
+            if param not in skip:
+                assert val == pytest.approx(gemsfc[param.upper()])
+
+
+@pytest.mark.parametrize('access_type', ['STID', 'STNM'])
+def test_surface_access(access_type):
+    """Test for proper surface retrieval with multi-parameter filter."""
+    g = Path(__file__).parent / 'data' / 'msn_std_sfc.sfc'
+    gsf = GempakSurface(g)
+
+    if access_type == 'STID':
+        gsf.sfjson(station_id='MSN', country='US', state='WI',
+                   date_time='202109070000')
+    elif access_type == 'STNM':
+        gsf.sfjson(station_number=726410, country='US', state='WI',
+                   date_time='202109070000')
+
+
 @pytest.mark.parametrize('text_type,date_time,speci', [
     ('text', '202109070000', False), ('spcl', '202109071600', True)
 ])
@@ -92,24 +144,6 @@ def test_surface_text(text_type, date_time, speci):
     assert text == gem_text
 
 
-def test_multiple_special_observations():
-    """Test text decoding of surface file with multiple special reports in single time."""
-    g = Path(__file__).parent / 'data' / 'msn_std_sfc.sfc'
-    d = Path(__file__).parent / 'data' / 'msn_std_sfc.csv'
-
-    gsf = GempakSurface(g)
-    #  Report text that is too long will end up truncated in surface files
-    nearest = gsf.nearest_time('202109071605', station_id='MSN', include_special=True)
-    text = nearest[0]['values']['spcl']
-    date_time = nearest[0]['properties']['date_time']
-
-    gempak = pd.read_csv(d)
-    gem_text = gempak.loc[:, 'SPCL_TRUNC'][0]
-
-    assert date_time == datetime(2021, 9, 7, 16, 4)
-    assert text == gem_text
-
-
 @pytest.mark.parametrize('keyword,date_time', [
     ('FIRST', '202109070000'), ('LAST', '202109071604')
 ])