Skip to content

Commit f20c4fb

Browse files
committed
Added data issues repository path as an explicit parameter to AWS
1 parent 8db8359 commit f20c4fb

File tree

5 files changed

+91
-48
lines changed

5 files changed

+91
-48
lines changed

src/pypromice/process/L1toL2.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
AWS Level 1 (L1) to Level 2 (L2) data processing
44
"""
55
import logging
6+
from pathlib import Path
67

78
import numpy as np
89
import pandas as pd
@@ -23,6 +24,8 @@
2324
def toL2(
2425
L1: xr.Dataset,
2526
vars_df: pd.DataFrame,
27+
data_flags_dir: Path,
28+
data_adjustments_dir: Path,
2629
T_0=273.15,
2730
ews=1013.246,
2831
ei0=6.1071,
@@ -72,9 +75,9 @@ def toL2(
7275
ds = L1.copy(deep=True) # Reassign dataset
7376
ds.attrs['level'] = 'L2'
7477
try:
75-
ds = adjustTime(ds) # Adjust time after a user-defined csv files
76-
ds = flagNAN(ds) # Flag NaNs after a user-defined csv files
77-
ds = adjustData(ds) # Adjust data after a user-defined csv files
78+
ds = adjustTime(ds, adj_dir=data_adjustments_dir.as_posix()) # Adjust time after a user-defined csv files
79+
ds = flagNAN(ds, flag_dir=data_flags_dir.as_posix()) # Flag NaNs after a user-defined csv files
80+
ds = adjustData(ds, adj_dir=data_adjustments_dir.as_posix()) # Adjust data after a user-defined csv files
7881
except Exception:
7982
logger.exception('Flagging and fixing failed:')
8083

src/pypromice/process/aws.py

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
"""
55
import json
66
import warnings
7-
warnings.simplefilter(action='ignore', category=FutureWarning)
7+
8+
warnings.simplefilter(action="ignore", category=FutureWarning)
89

910
import logging, os
1011
from pathlib import Path
@@ -33,6 +34,7 @@ def __init__(
3334
self,
3435
config_file,
3536
inpath,
37+
data_issues_repository: Path | str,
3638
var_file=None,
3739
meta_file=None,
3840
):
@@ -59,23 +61,23 @@ def __init__(
5961
self.config = self.loadConfig(config_file, inpath)
6062
self.vars = pypromice.resources.load_variables(var_file)
6163
self.meta = pypromice.resources.load_metadata(meta_file)
64+
self.data_issues_repository = Path(data_issues_repository)
6265

6366
config_hash = get_commit_hash_and_check_dirty(Path(config_file))
6467
config_source_string = f"{Path(config_file).name}:{config_hash}"
6568
inpath_hash = get_commit_hash_and_check_dirty(Path(inpath))
66-
inpath_source_string = f"{Path(inpath).name}:{inpath_hash}"
67-
69+
data_issues_hash = get_commit_hash_and_check_dirty(self.data_issues_repository)
6870
source_dict = dict(
69-
pypromice = metadata.version("pypromice"),
70-
l0_config_file = config_source_string,
71-
l0_data_root = inpath_source_string,
71+
pypromice=metadata.version("pypromice"),
72+
l0_config_file=config_source_string,
73+
l0_data_root=inpath_hash,
74+
data_issues=data_issues_hash,
7275
)
7376
self.meta["source"] = json.dumps(source_dict)
7477

75-
7678
# Load config file
7779
L0 = self.loadL0()
78-
self.L0=[]
80+
self.L0 = []
7981
for l in L0:
8082
n = write.getColNames(self.vars, l)
8183
self.L0.append(utilities.popCols(l, n))
@@ -98,7 +100,9 @@ def __init__(
98100
def process(self):
99101
"""Perform L0 to L3 data processing"""
100102
try:
101-
logger.info(f'Commencing {self.L0.attrs["number_of_booms"]}-boom processing...')
103+
logger.info(
104+
f'Commencing {self.L0.attrs["number_of_booms"]}-boom processing...'
105+
)
102106
logger.info(
103107
f'Commencing {self.L0.attrs["number_of_booms"]}-boom processing...'
104108
)
@@ -137,7 +141,13 @@ def getL1(self):
137141
def getL2(self):
138142
"""Perform L1 to L2 data processing"""
139143
logger.info("Level 2 processing...")
140-
self.L2 = toL2(self.L1A, vars_df=self.vars)
144+
145+
self.L2 = toL2(
146+
self.L1A,
147+
vars_df=self.vars,
148+
data_flags_dir=self.data_issues_repository / "flags",
149+
data_adjustments_dir=self.data_issues_repository / "adjustments",
150+
)
141151

142152
def getL3(self):
143153
"""Perform L2 to L3 data processing, including resampling and metadata
@@ -186,7 +196,7 @@ def loadConfig(self, config_file, inpath):
186196
return conf
187197

188198
def loadL0(self):
189-
'''Load level 0 (L0) data from associated TOML-formatted
199+
"""Load level 0 (L0) data from associated TOML-formatted
190200
config file and L0 data file
191201
192202
Try readL0file() using the config with msg_lat & msg_lon appended. The
@@ -201,7 +211,7 @@ def loadL0(self):
201211
-------
202212
ds_list : list
203213
List of L0 xr.Dataset objects
204-
'''
214+
"""
205215
ds_list = []
206216
for k in self.config.keys():
207217
target = self.config[k]
@@ -211,14 +221,14 @@ def loadL0(self):
211221
except pd.errors.ParserError as e:
212222
# ParserError: Too many columns specified: expected 40 and found 38
213223
# logger.info(f'-----> No msg_lat or msg_lon for {k}')
214-
for item in ['msg_lat', 'msg_lon']:
215-
target['columns'].remove(item) # Also removes from self.config
224+
for item in ["msg_lat", "msg_lon"]:
225+
target["columns"].remove(item) # Also removes from self.config
216226
ds_list.append(self.readL0file(target))
217-
logger.info(f'L0 data successfully loaded from {k}')
227+
logger.info(f"L0 data successfully loaded from {k}")
218228
return ds_list
219229

220230
def readL0file(self, conf):
221-
'''Read L0 .txt file to Dataset object using config dictionary and
231+
"""Read L0 .txt file to Dataset object using config dictionary and
222232
populate with initial metadata
223233
224234
Parameters
@@ -230,9 +240,15 @@ def readL0file(self, conf):
230240
-------
231241
ds : xr.Dataset
232242
L0 data
233-
'''
234-
file_version = conf.get('file_version', -1)
235-
ds = load.getL0(conf['file'], conf['nodata'], conf['columns'],
236-
conf["skiprows"], file_version, time_offset=conf.get('time_offset'))
243+
"""
244+
file_version = conf.get("file_version", -1)
245+
ds = load.getL0(
246+
conf["file"],
247+
conf["nodata"],
248+
conf["columns"],
249+
conf["skiprows"],
250+
file_version,
251+
time_offset=conf.get("time_offset"),
252+
)
237253
ds = utilities.populateMeta(ds, conf, ["columns", "skiprows", "modem"])
238254
return ds

src/pypromice/process/get_l2.py

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
#!/usr/bin/env python
2-
import logging, os, sys, unittest
2+
import logging
3+
import os
4+
import sys
35
from argparse import ArgumentParser
6+
from pathlib import Path
7+
48
from pypromice.process.aws import AWS
59
from pypromice.process.write import prepare_and_write
610

11+
712
def parse_arguments_l2():
813
parser = ArgumentParser(description="AWS L2 processor")
914

@@ -17,24 +22,19 @@ def parse_arguments_l2():
1722
required=False, help='File path to variables look-up table')
1823
parser.add_argument('-m', '--metadata', default=None, type=str,
1924
required=False, help='File path to metadata')
25+
parser.add_argument('--data_issues_path', default=None, help="Path to data issues repository")
2026
args = parser.parse_args()
2127
return args
2228

2329

24-
def get_l2(config_file, inpath, outpath, variables, metadata) -> AWS:
25-
logging.basicConfig(
26-
format="%(asctime)s; %(levelname)s; %(name)s; %(message)s",
27-
level=logging.INFO,
28-
stream=sys.stdout,
29-
)
30-
30+
def get_l2(config_file, inpath, outpath, variables, metadata, data_issues_path: Path) -> AWS:
3131
# Define input path
3232
station_name = config_file.split('/')[-1].split('.')[0]
3333
station_path = os.path.join(inpath, station_name)
3434
if os.path.exists(station_path):
35-
aws = AWS(config_file, station_path, variables, metadata)
35+
aws = AWS(config_file, station_path, data_issues_repository=data_issues_path, var_file=variables, meta_file=metadata)
3636
else:
37-
aws = AWS(config_file, inpath, variables, metadata)
37+
aws = AWS(config_file, inpath, data_issues_repository=data_issues_path, var_file=variables, meta_file=metadata)
3838

3939
# Perform level 1 and 2 processing
4040
aws.getL1()
@@ -51,7 +51,29 @@ def get_l2(config_file, inpath, outpath, variables, metadata) -> AWS:
5151

5252
def main():
5353
args = parse_arguments_l2()
54-
_ = get_l2(args.config_file, args.inpath, args.outpath, args.variables, args.metadata)
54+
55+
logging.basicConfig(
56+
format="%(asctime)s; %(levelname)s; %(name)s; %(message)s",
57+
level=logging.INFO,
58+
stream=sys.stdout,
59+
)
60+
61+
data_issues_path = args.data_issues_path
62+
if data_issues_path is None:
63+
data_issues_path = Path("../PROMICE-AWS-data-issues")
64+
if data_issues_path.exists():
65+
logging.warning(f"data_issues_path is missing. Using default data issues path: {data_issues_path}")
66+
else:
67+
raise ValueError(f"data_issues_path is missing. Please provide a valid path to the data issues repository")
68+
69+
_ = get_l2(
70+
args.config_file,
71+
args.inpath,
72+
args.outpath,
73+
args.variables,
74+
args.metadata,
75+
data_issues_path=data_issues_path,
76+
)
5577

5678

5779
if __name__ == "__main__":

src/pypromice/qc/github_data_issues.py

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
import logging
22
import os
3-
import urllib.request
4-
from urllib.error import HTTPError, URLError
53

64
import numpy as np
75
import pandas as pd
@@ -16,8 +14,7 @@
1614
logger = logging.getLogger(__name__)
1715

1816

19-
def flagNAN(ds_in,
20-
flag_dir='../PROMICE-AWS-data-issues/flags'):
17+
def flagNAN(ds_in, flag_dir):
2118
'''Read flagged data from .csv file. For each variable, and downstream
2219
dependents, flag as invalid (or other) if set in the flag .csv
2320
@@ -73,9 +70,7 @@ def flagNAN(ds_in,
7370
return ds
7471

7572

76-
def adjustTime(ds,
77-
adj_dir='../PROMICE-AWS-data-issues/adjustments/',
78-
var_list=[], skip_var=[]):
73+
def adjustTime(ds, adj_dir, var_list=[], skip_var=[]):
7974
'''Read adjustment data from .csv file. Only applies the "time_shift" adjustment
8075
8176
Parameters
@@ -134,9 +129,7 @@ def adjustTime(ds,
134129
return ds_out
135130

136131

137-
def adjustData(ds,
138-
adj_dir='../PROMICE-AWS-data-issues/adjustments/',
139-
var_list=[], skip_var=[]):
132+
def adjustData(ds, adj_dir, var_list=[], skip_var=[]):
140133
'''Read adjustment data from .csv file. For each variable, and downstream
141134
dependents, adjust data accordingly if set in the adjustment .csv
142135

tests/e2e/test_process.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,13 @@ def test_add_all(self):
6262

6363
def test_l0_to_l3(self):
6464
'''Test L0 to L3 processing'''
65-
pAWS = AWS(TEST_CONFIG_PATH.as_posix(), TEST_DATA_ROOT_PATH.as_posix())
65+
pAWS = AWS(
66+
TEST_CONFIG_PATH.as_posix(),
67+
TEST_DATA_ROOT_PATH.as_posix(),
68+
data_issues_repository=TEST_DATA_ROOT_PATH / 'data_issues',
69+
var_file=None,
70+
meta_file=None
71+
)
6672
pAWS.process()
6773
self.assertIsInstance(pAWS.L2, xr.Dataset)
6874
self.assertTrue(pAWS.L2.attrs['station_id']=='TEST1')
@@ -71,17 +77,17 @@ def get_l2_cli(self):
7177
'''Test get_l2 CLI'''
7278
exit_status = os.system('get_l2 -h')
7379
self.assertEqual(exit_status, 0)
74-
80+
7581
def test_join_l2_cli(self):
7682
'''Test join_l2 CLI'''
7783
exit_status = os.system('join_l2 -h')
7884
self.assertEqual(exit_status, 0)
79-
85+
8086
def test_l2_to_l3_cli(self):
8187
"""Test get_l2tol3 CLI"""
8288
exit_status = os.system('get_l2tol3 -h')
8389
self.assertEqual(exit_status, 0)
84-
90+
8591
def test_join_l3_cli(self):
8692
"""Test join_l3 CLI"""
8793
exit_status = os.system('join_l3 -h')
@@ -100,18 +106,21 @@ def test_full_e2e(self):
100106
output_path_raw = root / "station_l2_raw"
101107
config_file_tx = TEST_DATA_ROOT_PATH / "test_config1_tx.toml"
102108
config_file_raw = TEST_DATA_ROOT_PATH / "test_config1_raw.toml"
109+
data_issues_path = TEST_DATA_ROOT_PATH / "data_issues"
103110
station_id = "TEST1"
104111
aws_tx_l2 = get_l2(
105112
config_file=config_file_tx.as_posix(),
106113
inpath=TEST_DATA_ROOT_PATH.as_posix(),
107114
outpath=output_path_tx,
115+
data_issues_path=data_issues_path,
108116
variables=None,
109117
metadata=None,
110118
)
111119
aws_raw_l2 = get_l2(
112120
config_file=config_file_raw.as_posix(),
113121
inpath=TEST_DATA_ROOT_PATH.as_posix(),
114122
outpath=output_path_raw,
123+
data_issues_path=data_issues_path,
115124
variables=None,
116125
metadata=None,
117126
)

0 commit comments

Comments
 (0)