Skip to content

Commit

Permalink
Merge pull request #4 from nawendt/fix-text
Browse files Browse the repository at this point in the history
Fixed truncated text data on surface and soundings
  • Loading branch information
nawendt committed Jan 28, 2022
2 parents cf7088e + a89f442 commit 6f53e4b
Show file tree
Hide file tree
Showing 10 changed files with 121 additions and 60 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
BSD 3-Clause License

Copyright (c) 2021, Nathan Wendt
Copyright (c) 2022, Nathan Wendt
All rights reserved.

Redistribution and use in source and binary forms, with or without
Expand Down
128 changes: 75 additions & 53 deletions gempakio/decode.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021 Nathan Wendt.
# Copyright (c) 2022 Nathan Wendt.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
"""Classes for decoding various GEMPAK file formats."""
Expand Down Expand Up @@ -1139,6 +1139,9 @@ def _unpack_merged(self, sndno):
if fmt_code is None:
raise NotImplementedError('No methods for data type {}'
.format(part.data_type))
if fmt_code == 's':
lendat *= BYTES_PER_WORD

packed_buffer = (
self._buffer.read_struct(
struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
Expand Down Expand Up @@ -1202,6 +1205,9 @@ def _unpack_unmerged(self, sndno):
if fmt_code is None:
raise NotImplementedError('No methods for data type {}'
.format(part.data_type))
if fmt_code == 's':
lendat *= BYTES_PER_WORD

packed_buffer = (
self._buffer.read_struct(
struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
Expand Down Expand Up @@ -1765,18 +1771,17 @@ def _merge_sounding(self, parts):
sped = parts['PPDD']['SPED'][i]
skip = False

if (hght == self.prod_desc.missing_float
if ((hght == self.prod_desc.missing_float
and drct == self.prod_desc.missing_float
and sped == self.prod_desc.missing_float):
and sped == self.prod_desc.missing_float)
or hght <= zold):
skip = True
elif abs(zold - hght) < 1:
skip = True
if (merged['DRCT'][ilev - 1] == self.prod_desc.missing_float
or merged['SPED'][ilev - 1] == self.prod_desc.missing_float):
merged['DRCT'][ilev - 1] = drct
merged['SPED'][ilev - 1] = sped
elif hght <= zold:
skip = True
elif hght >= znxt:
while more and hght > znxt:
zold = znxt
Expand Down Expand Up @@ -2033,9 +2038,36 @@ def __init__(self, file, *args, **kwargs):
if self.surface_type == 'standard':
for irow, row_head in enumerate(self.row_headers):
for icol, col_head in enumerate(self.column_headers):
for iprt in range(len(self.parts)):
pointer = (self.prod_desc.data_block_ptr
+ (irow * self.prod_desc.columns * self.prod_desc.parts)
+ (icol * self.prod_desc.parts + iprt))

self._buffer.jump_to(self._start, _word_to_position(pointer))
data_ptr = self._buffer.read_int(4, self.endian, False)

if data_ptr:
self._sfinfo.append(
Surface(
irow,
icol,
datetime.combine(row_head.DATE, row_head.TIME),
col_head.STID + col_head.STD2,
col_head.STNM,
col_head.SLAT,
col_head.SLON,
col_head.SELV,
col_head.STAT,
col_head.COUN,
)
)
elif self.surface_type == 'ship':
irow = 0
for icol, col_head in enumerate(self.column_headers):
for iprt in range(len(self.parts)):
pointer = (self.prod_desc.data_block_ptr
+ (irow * self.prod_desc.columns * self.prod_desc.parts)
+ (icol * self.prod_desc.parts))
+ (icol * self.prod_desc.parts + iprt))

self._buffer.jump_to(self._start, _word_to_position(pointer))
data_ptr = self._buffer.read_int(4, self.endian, False)
Expand All @@ -2045,7 +2077,7 @@ def __init__(self, file, *args, **kwargs):
Surface(
irow,
icol,
datetime.combine(row_head.DATE, row_head.TIME),
datetime.combine(col_head.DATE, col_head.TIME),
col_head.STID + col_head.STD2,
col_head.STNM,
col_head.SLAT,
Expand All @@ -2055,56 +2087,32 @@ def __init__(self, file, *args, **kwargs):
col_head.COUN,
)
)
elif self.surface_type == 'ship':
irow = 0
for icol, col_head in enumerate(self.column_headers):
pointer = (self.prod_desc.data_block_ptr
+ (irow * self.prod_desc.columns * self.prod_desc.parts)
+ (icol * self.prod_desc.parts))

self._buffer.jump_to(self._start, _word_to_position(pointer))
data_ptr = self._buffer.read_int(4, self.endian, False)

if data_ptr:
self._sfinfo.append(
Surface(
irow,
icol,
datetime.combine(col_head.DATE, col_head.TIME),
col_head.STID + col_head.STD2,
col_head.STNM,
col_head.SLAT,
col_head.SLON,
col_head.SELV,
col_head.STAT,
col_head.COUN,
)
)
elif self.surface_type == 'climate':
for icol, col_head in enumerate(self.column_headers):
for irow, row_head in enumerate(self.row_headers):
pointer = (self.prod_desc.data_block_ptr
+ (irow * self.prod_desc.columns * self.prod_desc.parts)
+ (icol * self.prod_desc.parts))

self._buffer.jump_to(self._start, _word_to_position(pointer))
data_ptr = self._buffer.read_int(4, self.endian, False)

if data_ptr:
self._sfinfo.append(
Surface(
irow,
icol,
datetime.combine(col_head.DATE, col_head.TIME),
row_head.STID + row_head.STD2,
row_head.STNM,
row_head.SLAT,
row_head.SLON,
row_head.SELV,
row_head.STAT,
row_head.COUN,
for iprt in range(len(self.parts)):
pointer = (self.prod_desc.data_block_ptr
+ (irow * self.prod_desc.columns * self.prod_desc.parts)
+ (icol * self.prod_desc.parts + iprt))

self._buffer.jump_to(self._start, _word_to_position(pointer))
data_ptr = self._buffer.read_int(4, self.endian, False)

if data_ptr:
self._sfinfo.append(
Surface(
irow,
icol,
datetime.combine(col_head.DATE, col_head.TIME),
row_head.STID + row_head.STD2,
row_head.STNM,
row_head.SLAT,
row_head.SLON,
row_head.SELV,
row_head.STAT,
row_head.COUN,
)
)
)
else:
raise TypeError('Unknown surface type {}'.format(self.surface_type))

Expand Down Expand Up @@ -2183,6 +2191,9 @@ def _unpack_climate(self, sfcno):
if fmt_code is None:
raise NotImplementedError('No methods for data type {}'
.format(part.data_type))
if fmt_code == 's':
lendat *= BYTES_PER_WORD

packed_buffer = (
self._buffer.read_struct(
struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
Expand Down Expand Up @@ -2250,6 +2261,9 @@ def _unpack_ship(self, sfcno):
if fmt_code is None:
raise NotImplementedError('No methods for data type {}'
.format(part.data_type))
if fmt_code == 's':
lendat *= BYTES_PER_WORD

packed_buffer = (
self._buffer.read_struct(
struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
Expand Down Expand Up @@ -2304,6 +2318,11 @@ def _unpack_standard(self, sfcno):
self._buffer.jump_to(self._start, _word_to_position(self.data_ptr))
self.data_header_length = self._buffer.read_int(4, self.endian, False)
data_header = self._buffer.set_mark()
# if part.header_length == 1:
# ihhmm = self._buffer.read_int(4, self.endian, False)
# if part.header_length == 2:
# nreps = self._buffer.read_int(4, self.endian, False)
# ihhmm = self._buffer.read_int(4, self.endian, False)
self._buffer.jump_to(data_header,
_word_to_position(part.header_length + 1))
lendat = self.data_header_length - part.header_length
Expand All @@ -2317,6 +2336,9 @@ def _unpack_standard(self, sfcno):
if fmt_code is None:
raise NotImplementedError('No methods for data type {}'
.format(part.data_type))
if fmt_code == 's':
lendat *= BYTES_PER_WORD

packed_buffer = (
self._buffer.read_struct(
struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
Expand Down
1 change: 1 addition & 0 deletions gempakio/tools.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Copyright (c) 2009,2016,2019 MetPy Developers.
# Copyright (c) 2021 Nathan Wendt.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Copyright (c) 2021 Nathan Wendt.
# Copyright (c) 2022 Nathan Wendt.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
"""Setup script for installing gempakIO."""

from setuptools import find_packages, setup

NAME = 'gempakio'
VERSION = '0.6'
VERSION = '0.6.2'
DESCR = 'Read GEMPAK data with pure Python.'
URL = 'https://github.com/nawendt/gempakio'
REQUIRES = ['pyproj', 'xarray']
Expand Down
2 changes: 2 additions & 0 deletions tests/data/msn_std_sfc.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
TEXT,SPCL
"KMSN 062353Z 01003KT 10SM CLR 21/10 A2986 RMK AO2 SLP111 T02110100 10261 20211 58011 $","KMSN 071604Z 23009G18KT 2SM R36/3000VP6000FT -TSRA BKN027CB OVC080 22/18 A2966"
Binary file added tests/data/msn_std_sfc.sfc
Binary file not shown.
2 changes: 2 additions & 0 deletions tests/data/unmerged_with_text.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
TXTA,TXTB,TXTC,TXPB
"TTAA 57001 72357 99977 22458 15007 00142 ///// ///// 92818 18056 15518 85535 11834 17514 70148 06271 05504 50582 07599 07047 40751 20598 06036 30957 36187 04025 25081 46375 35531 20226 55172 34047 15406 62574 33036 10649 74363 29022 88105 73963 27521 77999 31313 51108 82302","TTBB 57008 72357 00977 22458 11976 22259 22847 11028 33825 09632 44821 09846 55817 12064 66812 12267 77786 10467 88777 11475 99752 10872 11565 05181 22517 05199 33415 19581 44341 30382 55320 31991 66220 53565 77190 55578 88165 61574 99145 61775 11105 73963 22101 74163 31313 51108 82302 41414 00900 51515 10164 00091 10194 16015 13005","TTCC 57002 72357 70859 73164 28512 50060 66371 28511 30379 51586 22516 20642 50986 29508 88999 77999 31313 51108 82302","PPBB 57008 72357 90012 15007 15007 15018 90345 15518 17016 17512 90678 16012 13509 10005 909// 04007 91012 03003 06505 08005 91345 10509 11010 10515 9168/ 09524 07547 92059 06546 06036 06032 93024 05028 03524 00530 9357/ 36030 34037 94257 34055 33049 33009 9489/ 25505 21512 9504/ 22017 29020"
Binary file added tests/data/unmerged_with_text.snd
Binary file not shown.
18 changes: 17 additions & 1 deletion tests/test_soundings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021 Nathan Wendt.
# Copyright (c) 2022 Nathan Wendt.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
"""Tests for decoding GEMPAK grid files."""
Expand Down Expand Up @@ -74,6 +74,22 @@ def test_merged():
np.testing.assert_allclose(gdtar, ddtar, rtol=1e-10, atol=1e-2)


@pytest.mark.parametrize('text_type', ['txta', 'txtb', 'txtc', 'txpb'])
def test_radat_text(text_type):
"""Test for proper decoding of RADAT text."""

g = Path(__file__).parent / 'data' / 'unmerged_with_text.snd'
d = Path(__file__).parent / 'data' / 'unmerged_with_text.csv'

gso = GempakSounding(g).snxarray(station_id='OUN')[0]
gempak = pd.read_csv(d)

text = gso.attrs['RADAT'][text_type]
gem_text = gempak.loc[:, text_type.upper()][0]

assert text == gem_text


@pytest.mark.parametrize('gem,gio,station', [
('top_sigw_hght_unmrg.csv', 'top_sigw_hght_unmrg.snd', 'TOP'),
('waml_sigw_pres_unmrg.csv', 'waml_sigw_pres_unmrg.snd', 'WAML')
Expand Down
24 changes: 21 additions & 3 deletions tests/test_surface.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021 Nathan Wendt.
# Copyright (c) 2022 Nathan Wendt.
# Distributed under the terms of the BSD 3-Clause License.
# SPDX-License-Identifier: BSD-3-Clause
"""Tests for decoding GEMPAK surface files."""
Expand All @@ -18,7 +18,7 @@ def test_standard_surface():
def dtparse(string):
return datetime.strptime(string, '%y%m%d/%H%M')

skip = ['text']
skip = ['text', 'spcl']

g = Path(__file__).parent / 'data' / 'lwc_std_sfc.sfc'
d = Path(__file__).parent / 'data' / 'lwc_std_sfc.csv'
Expand Down Expand Up @@ -47,7 +47,7 @@ def test_ship_surface():
def dtparse(string):
return datetime.strptime(string, '%y%m%d/%H%M')

skip = ['text']
skip = ['text', 'spcl']

g = Path(__file__).parent / 'data' / 'ship_sfc.sfc'
d = Path(__file__).parent / 'data' / 'ship_sfc.csv'
Expand All @@ -74,3 +74,21 @@ def dtparse(string):
decoded_vals = [d['values'][param.lower()] for d in gstns]
actual_vals = ugem.loc[:, param].values
np.testing.assert_allclose(decoded_vals, actual_vals)


@pytest.mark.parametrize('text_type,date_time', [
('text', '202109070000'), ('spcl', '202109071600')
])
def test_surface_text(text_type, date_time):
"""Test text decoding of surface hourly and special observations."""

g = Path(__file__).parent / 'data' / 'msn_std_sfc.sfc'
d = Path(__file__).parent / 'data' / 'msn_std_sfc.csv'

gsf = GempakSurface(g)
text = gsf.nearest_time(date_time, station_id='MSN')[0]['values'][text_type]

gempak = pd.read_csv(d)
gem_text = gempak.loc[:, text_type.upper()][0]

assert text == gem_text

0 comments on commit 6f53e4b

Please sign in to comment.