Skip to content

Commit a4af0e2

Browse files
Issue1299 - New after_gather entry point + better error handling in sundew_dirPattern (#1308)
* Add after_gather entry point + add it to renamer * Have UpdateFieldsAccepted return a bool and wrap sundew_dirPattern in try/except * Add documentation for new after_gather entry point * Correct unit tests.
1 parent 7c5f470 commit a4af0e2

File tree

9 files changed

+116
-51
lines changed

9 files changed

+116
-51
lines changed

docs/source/Explanation/SarraPluginDev.rst

+11
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,17 @@ for detailed information about call signatures and return values, etc...
569569
| | |
570570
| | |
571571
+---------------------+----------------------------------------------------+
572+
| | |
573+
| after_gather | Called after gather and before filter. |
574+
| (self,worklist) | |
575+
| | Not used often. after_accept should be used |
576+
| | for most use cases. |
577+
| | |
578+
| | after_gather should only really be used when: |
579+
| | - There needs to be a change to the worklist |
580+
| | of messages before attempting to filter. |
581+
| | |
582+
+---------------------+----------------------------------------------------+
572583
| | called after When a transfer has been attempted. |
573584
| after_work | |
574585
| (self,worklist) | All messages are acknowledged by this point. |

docs/source/How2Guides/FlowCallbacks.rst

+3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ the `sarracenia.flowcb <../../sarracenia/flowcb/__init__.py>`_ class.
1717
Briefly, the algorithm has the following steps:
1818

1919
* **gather** -- passively collect notification messages to be processed.
20+
21+
* *after_gather* callback entry point
22+
2023
* **poll** -- actively collect notification messages to be processed.
2124
* **filter** -- apply accept/reject regular expression matches to the notification message list.
2225

docs/source/fr/CommentFaire/FlowCallbacks.rst

+3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ la classe `sarracenia.flowcb <../../sarracenia/flowcb/__init__.py>`_.
1717
En bref, l’algorithme comporte les étapes suivantes :
1818

1919
* **gather** -- collecter passivement les messages de notification à traiter.
20+
21+
* *after_gather* point d’entré de callback
22+
2023
* **poll** -- collecter activement les messages de notification à traiter.
2124
* **filter** -- appliquer des correspondances d’expression régulière accept/reject à la liste des messages de notification.
2225

docs/source/fr/Explication/SarraPluginDev.rst

+13
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,19 @@ pour des informations détaillées sur les signatures d’appel et les valeurs d
531531
| | binaires pour les fichiers volumineux.) |
532532
| | |
533533
+---------------------+----------------------------------------------------+
534+
| | |
535+
| after_gather | Appelé après gather et avant filter (filtre) |
536+
| (self,worklist) | |
537+
| | C'est une option peu utilisée. |
538+
| | after_accept devrait être utilisé pour la |
539+
| | plupart des cas |
540+
| | |
541+
| | after_gather devrait seulement être utilisé |
542+
| | lorsque: |
543+
| | - Un changement doit être fait à la worklist |
544+
| | de messages avant d'atteindre le filtre. |
545+
| | |
546+
+---------------------+----------------------------------------------------+
534547
| | appelé après qu’un transfert a été tenté. |
535548
| after_work | |
536549
| (self,worklist) | A ce point, tous les messages sont reconnus. |

sarracenia/flow/__init__.py

+37-12
Original file line numberDiff line numberDiff line change
@@ -806,7 +806,7 @@ def sundew_getDestInfos(self, msg, currentFileOption, filename):
806806
807807
"""
808808
def updateFieldsAccepted(self, msg, urlstr, pattern, maskDir,
809-
maskFileOption, mirror, path_strip_count, pstrip, flatten) -> None:
809+
maskFileOption, mirror, path_strip_count, pstrip, flatten) -> bool:
810810
"""
811811
Set new message fields according to values when the message is accepted.
812812
@@ -818,6 +818,8 @@ def updateFieldsAccepted(self, msg, urlstr, pattern, maskDir,
818818
* pstrip: pattern strip regexp to apply instead of a count.
819819
* flatten: a character to replace path separators with toe change a multi-directory
820820
deep file name into a single long file name
821+
822+
return True on success
821823
822824
"""
823825

@@ -961,14 +963,22 @@ def updateFieldsAccepted(self, msg, urlstr, pattern, maskDir,
961963

962964
tfname = filename
963965
# when sr_sender did not derived from sr_subscribe it was always called
964-
new_dir = self.o.sundew_dirPattern(pattern, urlstr, tfname, new_dir)
965-
msg.updatePaths(self.o, new_dir, filename)
966+
try:
967+
new_dir = self.o.sundew_dirPattern(pattern, urlstr, tfname, new_dir)
968+
msg.updatePaths(self.o, new_dir, filename)
969+
except Exception as ex:
970+
logger.error( f"sundew_dirPattern crashed: {ex}." )
971+
logger.debug( "details:", exc_info=True )
972+
return False
966973

967974
if maskFileOption:
968975
msg['new_file'] = self.sundew_getDestInfos(msg, maskFileOption, filename)
969976
msg['new_relPath'] = '/'.join( msg['new_relPath'].split('/')[0:-1] + [ msg['new_file'] ] )
970977

971978

979+
return True
980+
981+
972982
def filter(self) -> None:
973983

974984
logger.debug(
@@ -1068,14 +1078,18 @@ def filter(self) -> None:
10681078
(str(mask), strip, urlToMatch))
10691079
break
10701080

1081+
10711082
m['_mask'] = mask
10721083
m['_deleteOnPost'].add('_mask')
10731084

1074-
self.updateFieldsAccepted(m, url, pattern, maskDir,
1085+
if self.updateFieldsAccepted(m, url, pattern, maskDir,
10751086
maskFileOption, mirror, strip,
1076-
pstrip, flatten)
1087+
pstrip, flatten):
1088+
filtered_worklist.append(m)
1089+
else:
1090+
self.reject(m, 404, "unable to update fields %s" % url)
1091+
10771092

1078-
filtered_worklist.append(m)
10791093
break
10801094

10811095
if not matched:
@@ -1084,23 +1098,32 @@ def filter(self) -> None:
10841098
m['renameUnlink'] = True
10851099
m['_deleteOnPost'] |= set(['renameUnlink'])
10861100
logger.debug("rename deletion 2 %s" % (m['fileOp']['rename']))
1087-
filtered_worklist.append(m)
1088-
self.updateFieldsAccepted(m, url, None,
1101+
1102+
if self.updateFieldsAccepted(m, url, None,
10891103
default_accept_directory,
10901104
self.o.filename, self.o.mirror,
10911105
self.o.strip, self.o.pstrip,
1092-
self.o.flatten)
1106+
self.o.flatten):
1107+
filtered_worklist.append(m)
1108+
else:
1109+
self.reject(m, 404, "unable to update fields %s" % url)
1110+
1111+
10931112
continue
10941113

10951114
if self.o.acceptUnmatched:
10961115
logger.debug("accept: unmatched pattern=%s" % (url))
10971116
# FIXME... missing dir mapping with mirror, strip, etc...
1098-
self.updateFieldsAccepted(m, url, None,
1117+
if self.updateFieldsAccepted(m, url, None,
10991118
default_accept_directory,
11001119
self.o.filename, self.o.mirror,
11011120
self.o.strip, self.o.pstrip,
1102-
self.o.flatten)
1103-
filtered_worklist.append(m)
1121+
self.o.flatten):
1122+
1123+
filtered_worklist.append(m)
1124+
else:
1125+
self.reject(m, 404, "unable to update fields %s" % url)
1126+
11041127
else:
11051128
self.reject(m, 404, "unmatched pattern %s" % url)
11061129

@@ -1151,6 +1174,8 @@ def gather(self) -> None:
11511174

11521175
return
11531176

1177+
self._runCallbacksWorklist('after_gather')
1178+
11541179
# gather is an extended version of poll.
11551180
if self.o.component != 'poll':
11561181
return

sarracenia/flowcb/__init__.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
entry_points = [
1414

15-
'ack', 'after_accept', 'after_post', 'after_work', 'destfn', 'do_poll',
15+
'ack', 'after_accept', 'after_gather', 'after_post', 'after_work', 'destfn', 'do_poll',
1616
'download', 'gather', 'metricsReport', 'on_cleanup', 'on_declare', 'on_features',
1717
'on_housekeeping', 'on_sanity', 'on_start', 'on_stop',
1818
'please_stop', 'poll', 'post', 'report', 'send',
@@ -105,6 +105,17 @@ def after_accept(self,worklist) -> None::
105105
and move messages to worklist.rejected to prevent further processing.
106106
do not delete any messages, only move between worklists.
107107
108+
def after_gather(self,worklist) -> None::
109+
110+
Task: operate on worklist.incoming to help decide which messages to process further.
111+
Move messages to worklist.rejected to prevent further processing.
112+
113+
Should only really be used for special use cases when message processing
114+
needs to be done before going through `filter` of the flow algorithm.
115+
116+
Otherwise, after_accept entry point should be used.
117+
118+
108119
def after_work(self,worklist) -> None::
109120
110121
Task: operate on worklist.ok (files which have arrived.)

sarracenia/flowcb/log.py

+8
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,14 @@ def after_accept(self, worklist):
177177
if set(['after_accept']) & self.o.logEvents:
178178
logger.info( f"accepted: (lag: {lag:.2f} ) {self._messageAcceptStr(msg)}" )
179179

180+
def after_gather(self, worklist):
181+
if set(['after_gather']) & self.o.logEvents:
182+
for msg in worklist.incoming:
183+
logger.info("gathered: %s" % self._messagePostStr(msg))
184+
for msg in worklist.rejected:
185+
logger.info("rejected: %s" % self._messagePostStr(msg))
186+
187+
180188
def after_post(self, worklist):
181189
if set(['after_post']) & self.o.logEvents:
182190
for msg in worklist.ok:

sarracenia/flowcb/rename/raw2bulletin.py

+7-13
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def __init__(self,options) :
8282
self.o.add_option('binaryInitialCharacters', 'list', [b'BUFR' , b'GRIB', b'\211PNG'])
8383

8484
# If file was converted, get rid of extensions it had
85-
def after_accept(self,worklist):
85+
def after_gather(self,worklist):
8686

8787
new_worklist = []
8888

@@ -152,15 +152,8 @@ def after_accept(self,worklist):
152152

153153
# Add current time as new timestamp to filename
154154
new_file = header + "_" + timehandler.strftime('%d%H%M') + "_" + BBB + "_" + stn_id + "_" + seq + "_PROBLEM"
155-
156-
# Write the file manually as the messages don't get posted downstream.
157-
# The message won't also get downloaded further downstream
158-
msg['new_file'] = new_file
159-
new_path = msg['new_dir'] + '/' + msg['new_file']
160-
161-
# with open(new_path, 'w') as f: f.write(data)
162-
163155
logger.error(f"New filename (for problem file): {new_file}")
156+
164157
elif stn_id == None:
165158
new_file = header + "_" + BBB + "_" + '' + "_" + seq + "_PROBLEM"
166159
logger.error(f"New filename (for problem file): {new_file}")
@@ -169,15 +162,16 @@ def after_accept(self,worklist):
169162
else:
170163
new_file = header + "_" + ddhhmm + "_" + BBB + "_" + stn_id + "_" + seq
171164

172-
msg['new_file'] = new_file
173-
174165
# No longer needed
175166
if 'isProblem' in msg:
176167
del(msg['isProblem'])
177168

178-
# msg.updatePaths(self.o, msg['new_dir'], msg['new_file'])
169+
# Need to update the relPath with new filename, because it's not an after_accept. new_dir and new_file don't exist.
170+
parts = msg['relPath'].split('/')
171+
parts[-1] = new_file
172+
msg['relPath'] = '/'.join(parts)
179173

180-
logger.info(f"New filename: {msg['new_file']}")
174+
logger.info(f"New filename: {new_file}")
181175
new_worklist.append(msg)
182176

183177
except Exception as e:

tests/sarracenia/flowcb/gather/am__gather_test.py

+22-25
Original file line numberDiff line numberDiff line change
@@ -105,17 +105,16 @@ def test_am_binary_bulletin():
105105
bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test1)
106106

107107
bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_')
108-
message_test1['new_file'] = bulletinHeader + '__12345'
109-
message_test1['new_dir'] = BaseOptions.directory
108+
message_test1['relPath'] = BaseOptions.directory + bulletinHeader + '__12345'
110109
message_test1['content']['value'] = b64encode(message_test1['content']['value']).decode('ascii')
111110
message_test1["isProblem"] = False
112111

113112
worklist = make_worklist()
114113
worklist.incoming = [message_test1]
115114

116115
# Check renamer.
117-
renamer.after_accept(worklist)
118-
assert worklist.incoming[0]['new_file'] == 'ISAA41_CYWA_030000___00001'
116+
renamer.after_gather(worklist)
117+
assert worklist.incoming[0]['relPath'].split('/')[-1] == 'ISAA41_CYWA_030000___00001'
119118

120119

121120
# Test 2: Check a regular CACN bulletin
@@ -132,8 +131,7 @@ def test_cacn_regular():
132131
bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test2)
133132

134133
bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_')
135-
message_test2['new_file'] = bulletinHeader + '__12345'
136-
message_test2['new_dir'] = BaseOptions.directory
134+
message_test2['relPath'] = BaseOptions.directory + bulletinHeader + '__12345'
137135

138136
# Check correcting the bulletin contents of a CACN
139137
new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset)
@@ -147,8 +145,8 @@ def test_cacn_regular():
147145
worklist = make_worklist()
148146
worklist.incoming = [message_test2]
149147

150-
renamer.after_accept(worklist)
151-
assert worklist.incoming[0]['new_file'] == 'CACN00_CWAO_021600__WVO_00001'
148+
renamer.after_gather(worklist)
149+
assert worklist.incoming[0]['relPath'].split('/')[-1] == 'CACN00_CWAO_021600__WVO_00001'
152150

153151
# Test 3: Check an erronous CACN bulletin (missing timestamp in bulletin contents)
154152
def test_cacn_erronous():
@@ -180,8 +178,8 @@ def test_cacn_erronous():
180178
worklist.incoming = [message_test3]
181179

182180

183-
renamer.after_accept(worklist)
184-
assert re.match('CACN00_CWAO_......__WPK_00001_PROBLEM' , worklist.incoming[0]['new_file'])
181+
renamer.after_gather(worklist)
182+
assert re.match('CACN00_CWAO_......__WPK_00001_PROBLEM' , worklist.incoming[0]['relPath'].split('/')[-1])
185183

186184
# Test 4: Bulletin with double line separator after header (my-header\n\n)
187185
def test_bulletin_double_linesep():
@@ -212,8 +210,8 @@ def test_bulletin_double_linesep():
212210
worklist = make_worklist()
213211
worklist.incoming = [message_test4]
214212

215-
renamer.after_accept(worklist)
216-
assert message_test4['new_file'] == 'SXCN35_CWVR_021100___00001'
213+
renamer.after_gather(worklist)
214+
assert message_test4['relPath'].split('/')[-1] == 'SXCN35_CWVR_021100___00001'
217215

218216
# Test 5: Bulletin with invalid year in timestamp (Fix: https://github.com/MetPX/sarracenia/pull/973)
219217
def test_bulletin_invalid_timestamp(caplog):
@@ -230,8 +228,8 @@ def test_bulletin_invalid_timestamp(caplog):
230228
bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test5)
231229

232230
bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_')
233-
message_test5['new_file'] = bulletinHeader + '__12345'
234-
message_test5['new_dir'] = BaseOptions.directory
231+
message_test5['relPath'].split('/')[-1] = bulletinHeader + '__12345'
232+
message_test5['relPath'].split('/')[-2:] = BaseOptions.directory
235233

236234
new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset)
237235
assert new_bulletin == b'CACN00 CWAO\nWVO\n100,1024,123,1600,0,100,13.5,5.6,79.4,0.722,11.81,11.74,1.855,6.54,16.76,1544,2.344,14.26,0,375.6,375.6,375.5,375.5,0,11.58,11.24,3.709,13.89,13.16,11.22,11,9.45,11.39,5.033,79.4,0.694,-6999,41.19,5.967,5.887,5.93,6.184,5.64,5.066,5.253,-6999,7.3,0.058,0,5.715,4.569,0,0,1.942,-6999,57.4,0,0.531,-6999,1419,1604,1787,-6999,-6999,-6999,-6999,-6999,1601,-6999,-6999,6,5.921,5.956,6.177,5.643,5.07,5.256,-6999,9.53,11.22,10.09,10.61,125.4,9.1\n'
@@ -242,7 +240,7 @@ def test_bulletin_invalid_timestamp(caplog):
242240
worklist = make_worklist()
243241
worklist.incoming = [message_test5]
244242

245-
renamer.after_accept(worklist)
243+
renamer.after_gather(worklist)
246244
# We want to make sure the proper errors are raised from the logs
247245
assert 'Unable to fetch header contents. Skipping message' in caplog.text and 'Unable to verify year from julian time.' in caplog.text
248246

@@ -299,8 +297,8 @@ def test_bulletin_wrong_station():
299297
worklist = make_worklist()
300298
worklist.incoming = [message_test7]
301299

302-
renamer.after_accept(worklist)
303-
assert message_test7['new_file'] == 'UECN99_CYCX_071200___00001_PROBLEM'
300+
renamer.after_gather(worklist)
301+
assert worklist.incoming[0]['relPath'].split('/')[-1] == 'UECN99_CYCX_071200___00001_PROBLEM'
304302

305303
# Test 8: SM Bulletin - Add station mapping + SM/SI bulletin accomodities
306304
def test_SM_bulletin():
@@ -330,8 +328,8 @@ def test_SM_bulletin():
330328
worklist = make_worklist()
331329
worklist.incoming = [message_test8]
332330

333-
renamer.after_accept(worklist)
334-
assert message_test8['new_file'] == 'SMCN06_CWAO_030000__71816_00001'
331+
renamer.after_gather(worklist)
332+
assert worklist.incoming[0]['relPath'].split('/')[-1] == 'SMCN06_CWAO_030000__71816_00001'
335333

336334
# Test 9: Bulletin with 5 fields in header (invalid)
337335
def test_bulletin_header_five_fileds():
@@ -347,8 +345,7 @@ def test_bulletin_header_five_fileds():
347345
bulletin, firstchars, lines, missing_ahl, station, charset = _get_bulletin_info(message_test9)
348346

349347
bulletinHeader = lines[0].decode('iso-8859-1').replace(' ', '_')
350-
message_test9['new_file'] = bulletinHeader + '__12345'
351-
message_test9['new_dir'] = BaseOptions.directory
348+
message_test9['relPath'] = BaseOptions.directory + bulletinHeader + '__12345'
352349

353350
# Check correcting the bulletin contents of the bulletin
354351
new_bulletin, isProblem = am_instance.correctContents(bulletin, firstchars, lines, missing_ahl, station, charset)
@@ -422,8 +419,8 @@ def test_random_bulletin_with_BBB():
422419
worklist = make_worklist()
423420
worklist.incoming = [message_test12]
424421

425-
renamer.after_accept(worklist)
426-
assert message_test12['new_file'] == 'FXCN06_CYTR_230939_AAA__00001'
422+
renamer.after_gather(worklist)
423+
assert worklist.incoming[0]['relPath'].split('/')[-1] == 'FXCN06_CYTR_230939_AAA__00001'
427424

428425
# Test 13: SM Bulletin with BBB - Add station mapping + SM/SI bulletin accomodities + conserve BBB header
429426
def test_SM_bulletin_with_BBB():
@@ -453,5 +450,5 @@ def test_SM_bulletin_with_BBB():
453450
worklist = make_worklist()
454451
worklist.incoming = [message_test13]
455452

456-
renamer.after_accept(worklist)
457-
assert message_test13['new_file'] == 'SMCN06_CWAO_030000_AAA_71816_00001'
453+
renamer.after_gather(worklist)
454+
assert worklist.incoming[0]['relPath'].split('/')[-1] == 'SMCN06_CWAO_030000_AAA_71816_00001'

0 commit comments

Comments
 (0)