From f7684126e174685f4e9b54d9273462fc100f3746 Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Thu, 6 Jun 2024 23:44:10 -0400 Subject: [PATCH 01/21] modernizing old plugin This plugin was written before directories were in messages, and was assigning directories the .txt suffix... have it not try to put a type suffix on directories. Also, the way file renames are represented is different. updated that code as well. --- sarracenia/flowcb/accept/wmotypesuffix.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sarracenia/flowcb/accept/wmotypesuffix.py b/sarracenia/flowcb/accept/wmotypesuffix.py index 7a85ab4a8..3bcca1105 100755 --- a/sarracenia/flowcb/accept/wmotypesuffix.py +++ b/sarracenia/flowcb/accept/wmotypesuffix.py @@ -43,6 +43,10 @@ def __find_type(self, TT): def after_accept(self, worklist): for message in worklist.incoming: + + if 'fileOp' in message and 'directory' in message['fileOp']: + continue + type_suffix = self.__find_type(message['new_file'][0:2]) ## FIXME confused as to how this could ever be true since find_type never returns "UNKNOWN" #if type_suffix == 'UNKNOWN': @@ -53,6 +57,10 @@ def after_accept(self, worklist): continue message['new_file'] = message['new_file'] + type_suffix - if 'rename' in message: - message['rename'] = message['rename'] + type_suffix + + if 'fileOp' in message and 'rename' in message['fileOp']: + message['fileOp']['rename'] += type_suffix + # TODO else -> worklist.rejected.append(message) ?? should this be happening at any point? + + From c236c583f0041c794551a394a32f5390f44b1998 Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Thu, 6 Jun 2024 23:46:24 -0400 Subject: [PATCH 02/21] adding definitions of new report codes so they are not unknown --- sarracenia/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sarracenia/__init__.py b/sarracenia/__init__.py index 717f0b0be..9ace3aa6c 100755 --- a/sarracenia/__init__.py +++ b/sarracenia/__init__.py @@ -358,8 +358,8 @@ def durationToSeconds(str_value, default=None) -> float: known_report_codes = { - 201: - "Download successful. (variations: Downloaded, Inserted, Published, Copied, or Linked)", + 201: "Download successful. (variations: Downloaded, Inserted, Published, Copied, or Linked)", + 202: "Accepted. mkdir skipped as it already exists", 203: "Non-Authoritative Information: transformed during download.", 205: "Reset Content: truncated. File is shorter than originally expected (changed length during transfer) This only arises during multi-part transfers.", @@ -368,6 +368,7 @@ def durationToSeconds(str_value, default=None) -> float: "Not modified (Checksum validated, unchanged, so no download resulted.)", 307: "Insertion deferred (writing to temporary part file for the moment.)", 417: "Expectation Failed: invalid notification message (corrupt headers)", + 422: "Unprocessable Content: could not determine path to transfer to", 499: "Failure: Not Copied. SFTP/FTP/HTTP download problem", #FIXME : should not have 503 error code 3 times in a row # 503: "Service unavailable. delete (File removal not currently supported.)", From 82da104d667478be0d1f39affc14b6f5b538b5aa Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Thu, 6 Jun 2024 23:47:53 -0400 Subject: [PATCH 03/21] adding documentation for logFormat --- docs/source/Reference/sr3_options.7.rst | 9 +++++++++ docs/source/fr/Reference/sr3_options.7.rst | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/docs/source/Reference/sr3_options.7.rst b/docs/source/Reference/sr3_options.7.rst index 88df2c07c..e08f2c95a 100644 --- a/docs/source/Reference/sr3_options.7.rst +++ b/docs/source/Reference/sr3_options.7.rst @@ -1054,6 +1054,15 @@ other values: on_start, on_stop, post, gather, ... etc... It is comma separated, if the list starts with a plus sign (+) then the selected events are appended to current value. A minus signe (-) can be used to remove events from the set. +LogFormat ( default: %(asctime)s [%(levelname)s] %(name)s %(funcName)s %(message)s ) +------------------------------------------------------------------------------------ + +The *LogFormat* option is passed directly to python logging mechanisms and can be used +to control what is written to log files. The format is documented here: + +* https://docs.python.org/3/library/logging.html#logrecord-attributes + + logLevel ( default: info ) -------------------------- diff --git a/docs/source/fr/Reference/sr3_options.7.rst b/docs/source/fr/Reference/sr3_options.7.rst index 4ba782392..39cb427d8 100644 --- a/docs/source/fr/Reference/sr3_options.7.rst +++ b/docs/source/fr/Reference/sr3_options.7.rst @@ -1037,8 +1037,18 @@ messages de journal. Autres valeurs : on_start, on_stop, post, gather, ... etc.. On peut débuter la valeur avec un plus (+) pour signifier un ajout au valeurs actuels. la valeur moins (-) signifie la soustraction des valeurs de l´ensemble actuel. +LogFormat ( default: %(asctime)s [%(levelname)s] %(name)s %(funcName)s %(message)s ) +------------------------------------------------------------------------------------ + +L'option *LogFormat* est passée directement au mécanismes de contrôle des journalisation +de python. Le format est documenté ici: + +* https://docs.python.org/fr/3/library/logging.html#logrecord-attributes + + logLevel ( défaut: info ) ------------------------- + Niveau de journalisation exprimé par la journalisation de python. Les valeurs possibles sont : critical, error, info, warning, debug. From 5e5214591a72c6f54a5cd677106264ee745842a8 Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Fri, 7 Jun 2024 09:31:05 -0400 Subject: [PATCH 04/21] sr3 --full status crash --- sarracenia/sr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sarracenia/sr.py b/sarracenia/sr.py index 302fc0252..32f7dc7c3 100755 --- a/sarracenia/sr.py +++ b/sarracenia/sr.py @@ -2600,7 +2600,7 @@ def status(self): naturalSize(m["transferRxFiles"]).replace("B","F").replace("Fyte","File"), \ naturalSize(m["transferTxBytes"]), \ naturalSize(m["transferTxFiles"]).replace("B","F").replace("Fyte","File"), \ - time_base ) + m["time_base"] ) else: line += " %10s %10s %9s %5s %5s %10s %8s" % ( "-", "-", "-", "-", "-", "-", "-" ) if self.options.displayFull: From 88116ec3e933c07da69b009c54f54cbf9ead2fe1 Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Fri, 7 Jun 2024 09:31:27 -0400 Subject: [PATCH 05/21] updating with recent changes --- debian/changelog | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/debian/changelog b/debian/changelog index 5f4c0bd81..8e69976a9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,17 +1,24 @@ metpx-sr3 (3.00.54) UNRELEASED; urgency=medium - * merge PR #1067 (closing #824 ) + * messages reviewed & consolidated: #1094, #1099 (de-cluttering.) + * add http metadata to scheduled flows. #1084 + * only run after_post when actually posted #1101 + * when plugins go bad, report better, recover better: #1085, #1091, + * performance improvements #1083, #1086 + * crashes/problems with statehost #1076, #1087, #1096 + * sr3 status flow state detail improved. * adds lag,rtry,slow,reje states to status display. * adds checks for running process to cleanup, fail if running. * adds cleanup to remove, don't remove if cleanup fails. * adds progressive logs to transfers (closing #966) - * several fixes for sender crashes resulting from changes in try/except scope. + * several fixes for sender crashes resulting from changes in try/except + scope. #1091, #1095, * add #1054 can now convert multiple configs, and overwrite (with --wololo) * fixed #1064 poll crash. * fixed #927 sanity not restarting crashed polls. - * bug fixes and unit tests for AM + * bug fixes and unit tests for AM ( #1036, #1074, #1078, #1079 ) * many unit tests added, improved coverage (though still poor.) - * many other plugin improvements. + * many other improvements and fixes in core and plugins. * search function restored on web-site documentation. -- SSC-5CD2310S60 Fri, 17 May 2024 12:29:22 -0400 From cbcd6ec2f7249f7d09ca28288c76229fd7295d7a Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Fri, 7 Jun 2024 13:46:45 -0400 Subject: [PATCH 06/21] update changelog --- debian/changelog | 3 +++ 1 file changed, 3 insertions(+) diff --git a/debian/changelog b/debian/changelog index 8e69976a9..c31e9fd2a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -6,6 +6,9 @@ metpx-sr3 (3.00.54) UNRELEASED; urgency=medium * when plugins go bad, report better, recover better: #1085, #1091, * performance improvements #1083, #1086 * crashes/problems with statehost #1076, #1087, #1096 + * fix #1104 message rate per cpu second and cpuS state. + * fix #1097 better parsing of low fractional rates. + * added logFormat option. * sr3 status flow state detail improved. * adds lag,rtry,slow,reje states to status display. * adds checks for running process to cleanup, fail if running. From 7144f24dfe56a703722eb7d536aa5f28aa572e80 Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Fri, 7 Jun 2024 16:45:41 -0400 Subject: [PATCH 07/21] restore old fashioned rename header processing to wmotypesuffice, will make unit tests pass again --- sarracenia/flowcb/accept/wmotypesuffix.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sarracenia/flowcb/accept/wmotypesuffix.py b/sarracenia/flowcb/accept/wmotypesuffix.py index 3bcca1105..be8625f4a 100755 --- a/sarracenia/flowcb/accept/wmotypesuffix.py +++ b/sarracenia/flowcb/accept/wmotypesuffix.py @@ -58,6 +58,9 @@ def after_accept(self, worklist): message['new_file'] = message['new_file'] + type_suffix + if 'rename' in message: + message['rename'] += type_suffix + if 'fileOp' in message and 'rename' in message['fileOp']: message['fileOp']['rename'] += type_suffix From 975bc5fc9afa29123e986fc67a3888478b03aa65 Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Sat, 8 Jun 2024 12:11:09 -0400 Subject: [PATCH 08/21] adjust set string parsing to deal with +- ... result of conversion --- sarracenia/config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sarracenia/config.py b/sarracenia/config.py index 6d3c636ee..c7bcc8a69 100755 --- a/sarracenia/config.py +++ b/sarracenia/config.py @@ -1042,11 +1042,10 @@ def _parse_set_string( self, v:str, old_value: set ) -> set: if v == 'None': sv=set([]) else: - if v[0] in [ '+', '-']: + op='r' + while v[0] in [ '+', '-']: op=v[0] v=v[1:] - else: - op='r' if ',' in v: sv=set(v.split(',')) @@ -1057,6 +1056,7 @@ def _parse_set_string( self, v:str, old_value: set ) -> set: sv= old_value | sv elif op == '-' : sv= old_value - sv + return sv def add_option(self, option, kind='list', default_value=None, all_values=None ): From 906aa04dcf82699969b4ba17589d9c74690f4372 Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Sat, 8 Jun 2024 12:17:07 -0400 Subject: [PATCH 09/21] get convert not to prepend a + to fileEvents if a sign is already present --- sarracenia/sr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sarracenia/sr.py b/sarracenia/sr.py index 32f7dc7c3..c8248d9bf 100755 --- a/sarracenia/sr.py +++ b/sarracenia/sr.py @@ -2805,7 +2805,8 @@ def convert1(self,cfg): if 'none' in line[1].lower(): v=line[1] else: - line[1]= '+' + line[1] + if line[1][0] not in ['+','-']: + line[1]= '+' + line[1] v=line[1] if k == 'continue': From ece3c992009e9833d34de30b17b34b47184580df Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Tue, 11 Jun 2024 11:31:18 -0400 Subject: [PATCH 10/21] adding a note about the existence,importance, and use of unit tests for development --- docs/source/Contribution/Development.rst | 17 +++++++++++++++++ .../fr/Contribution/D\303\251veloppement.rst" | 9 +++++++++ 2 files changed, 26 insertions(+) diff --git a/docs/source/Contribution/Development.rst b/docs/source/Contribution/Development.rst index b648c6aa9..dbcd915a4 100644 --- a/docs/source/Contribution/Development.rst +++ b/docs/source/Contribution/Development.rst @@ -142,6 +142,15 @@ Both v2 and v3 are supported on the stable branch of sr_insects. That branch sh used to support all development in both versions.... +Unit tests +~~~~~~~~~~ + +The tests/ sub-directory contains a woefully incomplete but growing set of unit tests +using the *pytest* framework. These tests are only exercised on Ubuntu 22.04 at the moment. +consult tests/README.md for how to run them. Unit tests passing (or a very good explanation +of why they *temporarily* fail) should be another gate before merging to the main *development* branch. + + Local Installation ------------------ @@ -291,6 +300,14 @@ multipass launch -m 8G bionic ''' can run developer tests as per multipass as described above. +Need to edit: + +* requirements.txt (remove paramiko and watchdog) +* setup.py (remove install_requires for paramiko and watchdog) + +These packages have comedically complex dependencies. Install them separately +and they will be usable by sr3, but listing them as a requirement breaks things. + Python Wheel ~~~~~~~~~~~~ diff --git "a/docs/source/fr/Contribution/D\303\251veloppement.rst" "b/docs/source/fr/Contribution/D\303\251veloppement.rst" index 8293ffdb2..302aff6b7 100644 --- "a/docs/source/fr/Contribution/D\303\251veloppement.rst" +++ "b/docs/source/fr/Contribution/D\303\251veloppement.rst" @@ -127,6 +127,15 @@ Le référentiel sr_insects a sa propre base de données de problèmes, et le tr Les versions 2 et 3 sont prises en charge sur la branche principale de sr_insects. Cette branche devrait être utilisé pour prendre en charge tout le développement dans les deux versions... +Tests unitaires +~~~~~~~~~~ + +Le sous-répertoire tests/ contient un ensemble malheureusement incomplet mais croissant de tests unitaires +qui utilisent le framework *pytest*. Ces tests requiert Ubuntu 22.04 pour le moment. +Consultez tests/README.md pour savoir comment les exécuter. Réussite des tests unitaires (ou une très bonne explication +pourquoi ils échouent *temporairement*) devrait être une autre porte avant de fusionner avec la branche *developpement* principale. + + Installation locale ------------------- From dd7b75e2d5ac14bfca2504142064122ae8b24672 Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Tue, 11 Jun 2024 15:04:32 -0400 Subject: [PATCH 11/21] create full missing list when reading manifest for first time --- sarracenia/flowcb/block_reassembly.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sarracenia/flowcb/block_reassembly.py b/sarracenia/flowcb/block_reassembly.py index eeaf9f783..eeff8a482 100755 --- a/sarracenia/flowcb/block_reassembly.py +++ b/sarracenia/flowcb/block_reassembly.py @@ -148,7 +148,7 @@ def after_work(self, worklist) -> None: old_blocks = rfm.get() if old_blocks and not 'waiting' in old_blocks: - old_blocks['waiting'] = {} + old_blocks['waiting'] = m['blocks']['manifest'].copy() # calculate old file size. if old_blocks and 'manifest' in old_blocks: From 6e050b7538e49eff62432d12fb652580f9148f64 Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Wed, 12 Jun 2024 09:28:24 -0400 Subject: [PATCH 12/21] separate block manifest management in block_reassembly vs not case --- sarracenia/blockmanifest.py | 9 ++++++--- sarracenia/config.py | 4 +++- sarracenia/flow/__init__.py | 6 +++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/sarracenia/blockmanifest.py b/sarracenia/blockmanifest.py index 5add27f46..3e418775a 100755 --- a/sarracenia/blockmanifest.py +++ b/sarracenia/blockmanifest.py @@ -74,13 +74,16 @@ def __init__(self,path): self.lock.lock() - self.x = None - self.new_x = None + self.x = {} + self.new_x = {} if os.path.exists(self.path): self.fd = open(self.path,"r+") s=self.fd.read() - self.x = json.loads(s) + try: + self.x = json.loads(s) + except Exception as ex: + pass for k in ['manifest', 'waiting' ]: if k in self.x: diff --git a/sarracenia/config.py b/sarracenia/config.py index c7bcc8a69..f49e09946 100755 --- a/sarracenia/config.py +++ b/sarracenia/config.py @@ -85,7 +85,7 @@ def __repr__(self) -> str: 'batch' : 100, 'baseDir': None, 'baseUrl_relPath': False, - 'block_reassemble': True, + 'block_reassemble': False, 'delete': False, 'documentRoot': None, 'download': False, @@ -1875,6 +1875,8 @@ def finalize(self, component=None, config=None): self.plugins_early.append( 'nodupe.name' ) delattr( self, 'nodupe_basis' ) + self.block_reassemble = 'block_reassembly' in self.plugins_early + if config[-5:] == '.conf': cfg = config[:-5] else: diff --git a/sarracenia/flow/__init__.py b/sarracenia/flow/__init__.py index f1deeb6c5..4a6b63d8d 100644 --- a/sarracenia/flow/__init__.py +++ b/sarracenia/flow/__init__.py @@ -2680,9 +2680,9 @@ def set_local_file_attributes(self, local_file, msg): logger.debug("%s" % local_file) # if the file is not partitioned, the the onfly_checksum is for the whole file. - # cache it here, along with the mtime. - - if ('blocks' in msg) and sarracenia.features['reassembly']['present']: + # cache it here, along with the mtime, unless block_reassembly plugin is active... + + if ('blocks' in msg) and sarracenia.features['reassembly']['present'] and not self.o.block_reassemble: with sarracenia.blockmanifest.BlockManifest(local_file) as y: y.set( msg['blocks'] ) From c23cc6692f5d321b1b35ecbf812af1cbfa231b7c Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Wed, 12 Jun 2024 09:32:30 -0400 Subject: [PATCH 13/21] alternate name for block_reassembly --- sarracenia/config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sarracenia/config.py b/sarracenia/config.py index f49e09946..5a9122770 100755 --- a/sarracenia/config.py +++ b/sarracenia/config.py @@ -1875,7 +1875,8 @@ def finalize(self, component=None, config=None): self.plugins_early.append( 'nodupe.name' ) delattr( self, 'nodupe_basis' ) - self.block_reassemble = 'block_reassembly' in self.plugins_early + self.block_reassemble = 'block_reassembly' in self.plugins_early or \ + 'sarracenia.flowcb.block_reassembly' in self.plugins_early if config[-5:] == '.conf': cfg = config[:-5] From a4bdc7d30e0184067fb25591a5592341bc30000b Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Wed, 12 Jun 2024 11:20:25 -0400 Subject: [PATCH 14/21] replace block_reassemble option with flow state var --- sarracenia/config.py | 6 +----- sarracenia/flow/__init__.py | 5 ++++- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/sarracenia/config.py b/sarracenia/config.py index 5a9122770..2be3f1bf1 100755 --- a/sarracenia/config.py +++ b/sarracenia/config.py @@ -85,7 +85,6 @@ def __repr__(self) -> str: 'batch' : 100, 'baseDir': None, 'baseUrl_relPath': False, - 'block_reassemble': False, 'delete': False, 'documentRoot': None, 'download': False, @@ -141,7 +140,7 @@ def __repr__(self) -> str: # all the boolean settings. -flag_options = [ 'acceptSizeWrong', 'acceptUnmatched', 'amqp_consumer', 'baseUrl_relPath', 'block_reassemble', 'debug', \ +flag_options = [ 'acceptSizeWrong', 'acceptUnmatched', 'amqp_consumer', 'baseUrl_relPath', 'debug', \ 'delete', 'discard', 'download', 'dry_run', 'durable', 'exchangeDeclare', 'exchangeSplit', 'logReject', 'realpathFilter', \ 'follow_symlinks', 'force_polling', 'inline', 'inlineOnly', 'inplace', 'logMetrics', 'logStdout', 'logReject', 'restore', \ 'messageDebugDump', 'mirror', 'timeCopy', 'notify_only', 'overwrite', 'post_on_start', \ @@ -1875,9 +1874,6 @@ def finalize(self, component=None, config=None): self.plugins_early.append( 'nodupe.name' ) delattr( self, 'nodupe_basis' ) - self.block_reassemble = 'block_reassembly' in self.plugins_early or \ - 'sarracenia.flowcb.block_reassembly' in self.plugins_early - if config[-5:] == '.conf': cfg = config[:-5] else: diff --git a/sarracenia/flow/__init__.py b/sarracenia/flow/__init__.py index 4a6b63d8d..70270e1d2 100644 --- a/sarracenia/flow/__init__.py +++ b/sarracenia/flow/__init__.py @@ -208,6 +208,9 @@ def __init__(self, cfg=None): self.plugins['load'].extend(self.o.destfn_scripts) + self.block_reassembly_active = 'block_reassembly' in self.plugins['load'] or \ + 'sarracenia.flowcb.block_reassembly' in self.plugins['load'] + # metrics - dictionary with names of plugins as the keys self.metrics_lastWrite=0 self.metricsFlowReset() @@ -2682,7 +2685,7 @@ def set_local_file_attributes(self, local_file, msg): # if the file is not partitioned, the the onfly_checksum is for the whole file. # cache it here, along with the mtime, unless block_reassembly plugin is active... - if ('blocks' in msg) and sarracenia.features['reassembly']['present'] and not self.o.block_reassemble: + if ('blocks' in msg) and sarracenia.features['reassembly']['present'] and not self.block_reassembly_active: with sarracenia.blockmanifest.BlockManifest(local_file) as y: y.set( msg['blocks'] ) From 4d3eaa135527929dc0b096fd120cbcf03600c6d6 Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Wed, 12 Jun 2024 11:38:06 -0400 Subject: [PATCH 15/21] aesthetic change of working variable name --- sarracenia/flow/__init__.py | 4 ++-- sarracenia/flowcb/gather/file.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sarracenia/flow/__init__.py b/sarracenia/flow/__init__.py index 70270e1d2..6321168d9 100644 --- a/sarracenia/flow/__init__.py +++ b/sarracenia/flow/__init__.py @@ -2686,8 +2686,8 @@ def set_local_file_attributes(self, local_file, msg): # cache it here, along with the mtime, unless block_reassembly plugin is active... if ('blocks' in msg) and sarracenia.features['reassembly']['present'] and not self.block_reassembly_active: - with sarracenia.blockmanifest.BlockManifest(local_file) as y: - y.set( msg['blocks'] ) + with sarracenia.blockmanifest.BlockManifest(local_file) as bm: + bm.set( msg['blocks'] ) x = sarracenia.filemetadata.FileMetadata(local_file) # FIXME ... what to do when checksums don't match? diff --git a/sarracenia/flowcb/gather/file.py b/sarracenia/flowcb/gather/file.py index e5c2bce37..30e9e56c7 100755 --- a/sarracenia/flowcb/gather/file.py +++ b/sarracenia/flowcb/gather/file.py @@ -260,8 +260,8 @@ def post_file_in_parts(self, path, lstat): if features['reassembly']['present'] and \ (not hasattr(self.o, 'block_manifest_delete') or not self.o.block_manifest_delete): - with sarracenia.blockmanifest.BlockManifest( path ) as x: - x.set(msg['blocks']) + with sarracenia.blockmanifest.BlockManifest( path ) as bm: + bm.set(msg['blocks']) messages = [] for current_block in blocks: From b2e0bcaa7bf3d3e14b3db04445ab41712c81ffcf Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Wed, 12 Jun 2024 14:13:26 -0400 Subject: [PATCH 16/21] demoting some debug prints an obsolete comment. --- sarracenia/config.py | 2 -- sarracenia/flowcb/block_reassembly.py | 9 +++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/sarracenia/config.py b/sarracenia/config.py index 2be3f1bf1..237d5edf9 100755 --- a/sarracenia/config.py +++ b/sarracenia/config.py @@ -1500,8 +1500,6 @@ def parse_line(self, component, cfg, cfname, lineno, l ): if k == 'continue': return - #FIXME: note for Clea, line conversion to v3 complete here. - line = list(map(lambda x: self._varsub(x), line)) if len(line) == 1: diff --git a/sarracenia/flowcb/block_reassembly.py b/sarracenia/flowcb/block_reassembly.py index eeff8a482..a4beb75a8 100755 --- a/sarracenia/flowcb/block_reassembly.py +++ b/sarracenia/flowcb/block_reassembly.py @@ -117,7 +117,7 @@ def after_work(self, worklist) -> None: blksz=humanfriendly.parse_size(blk_suffix[1],binary=True) if blkno != m['blocks']['number']: - logger.warning(" mismatch {m['relPath']} name says {blkno} but message says {m['block']['number']}" ) + logger.warning( f"mismatch {m['relPath']} name says {blkno} but message says {m['block']['number']}" ) blkno = m['blocks']['number'] #determine root file name. @@ -131,7 +131,7 @@ def after_work(self, worklist) -> None: flck = flufl.lock.Lock(lock_file) flck.lock() - logger.info( f"10 locked {flck} lock_file: {lock_file}" ) + #logger.debug( f"10 locked {flck} lock_file: {lock_file}" ) pf=open(part_file,'rb') @@ -165,8 +165,9 @@ def after_work(self, worklist) -> None: # update old_blocks to reflect receipt of this block. if old_blocks and 'manifest' in old_blocks: - logger.info( f" read old block manifest from attributes: {old_blocks['manifest']}" ) - logger.info( f" also show waiting: {old_blocks['waiting']}" ) + logger.info( f" read {len(old_blocks['manifest'])} blocks in manifest, waiting for {len(old_blocks['waiting'])} " ) + logger.debug( f" read old block manifest from attributes: {old_blocks['manifest']}" ) + logger.debug( f" also show waiting: {old_blocks['waiting']}" ) found=False sz=0 # add From ef14b70de0d8bbb415c504dad65ff729b85e982d Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Wed, 12 Jun 2024 14:47:22 -0400 Subject: [PATCH 17/21] fix crash removing manifest files --- sarracenia/flowcb/block_reassembly.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sarracenia/flowcb/block_reassembly.py b/sarracenia/flowcb/block_reassembly.py index a4beb75a8..359f2b11f 100755 --- a/sarracenia/flowcb/block_reassembly.py +++ b/sarracenia/flowcb/block_reassembly.py @@ -232,7 +232,7 @@ def after_work(self, worklist) -> None: """ with sarracenia.blockmanifest.BlockManifest(root_file) as rfm: rfm.set(old_blocks) - m.setReport( 206, f"file block subset {m['blocks']['number']} received and reassembled ok. waiting for {(len(old_blocks['waiting']))} more blocks." ) + m.setReport( 206, f"file block subset {m['blocks']['number']} received and written ok. waiting for {(len(old_blocks['waiting']))} more blocks." ) worklist.rejected.append(m) else: # FIXME: for inflight. now rename the file to the real name. @@ -245,10 +245,12 @@ def after_work(self, worklist) -> None: logger.info( f"completed reassembly of {m['relPath']}" ) new_ok.append(m) if hasattr(self.o, 'block_manifest_delete') and self.o.block_manifest_delete: - manifest = msg['new_file'] + "§block_manifest§" + manifest = m['new_file'] + "§block_manifest§" if os.path.exists(manifest): + logger.info( f"deleting {manifest}") os.unlink(manifest) else: + logger.info( f"persisting {root_file} manifest.") del old_blocks['waiting'] with sarracenia.blockmanifest.BlockManifest(root_file) as rfm: rfm.set(old_blocks) From 7ea2b2994d18d9474408d48a27d3764f5522ceaf Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Wed, 12 Jun 2024 14:49:25 -0400 Subject: [PATCH 18/21] adding 206 report code --- sarracenia/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sarracenia/__init__.py b/sarracenia/__init__.py index 9ace3aa6c..8bb542926 100755 --- a/sarracenia/__init__.py +++ b/sarracenia/__init__.py @@ -364,8 +364,8 @@ def durationToSeconds(str_value, default=None) -> float: 205: "Reset Content: truncated. File is shorter than originally expected (changed length during transfer) This only arises during multi-part transfers.", 205: "Reset Content: checksum recalculated on receipt.", - 304: - "Not modified (Checksum validated, unchanged, so no download resulted.)", + 206: "Partial Content: received and inserted.", + 304: "Not modified (Checksum validated, unchanged, so no download resulted.)", 307: "Insertion deferred (writing to temporary part file for the moment.)", 417: "Expectation Failed: invalid notification message (corrupt headers)", 422: "Unprocessable Content: could not determine path to transfer to", From 8e22567b05c66107003c7d5f3bd85f0876de8c8f Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Wed, 12 Jun 2024 14:55:31 -0400 Subject: [PATCH 19/21] can only have one string per report code --- sarracenia/__init__.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sarracenia/__init__.py b/sarracenia/__init__.py index 8bb542926..91e53acac 100755 --- a/sarracenia/__init__.py +++ b/sarracenia/__init__.py @@ -356,13 +356,17 @@ def durationToSeconds(str_value, default=None) -> float: return duration +""" + report codes are cribbed from HTTP, when a new situation arises, just peruse a list, + and pick one that fits. Should also be easier for others to use: + + https://en.wikipedia.org/wiki/List_of_HTTP_status_codes +""" known_report_codes = { 201: "Download successful. (variations: Downloaded, Inserted, Published, Copied, or Linked)", 202: "Accepted. mkdir skipped as it already exists", 203: "Non-Authoritative Information: transformed during download.", - 205: - "Reset Content: truncated. File is shorter than originally expected (changed length during transfer) This only arises during multi-part transfers.", 205: "Reset Content: checksum recalculated on receipt.", 206: "Partial Content: received and inserted.", 304: "Not modified (Checksum validated, unchanged, so no download resulted.)", @@ -744,7 +748,7 @@ def setReport(msg, code, text=None): text = 'unknown disposition' if 'report' in msg: - logger.warning('overriding initial report: %d: %s' % + logger.debug('overriding initial report: %d: %s' % (msg['report']['code'], msg['report']['message'])) msg['report'] = {'code': code, 'timeCompleted': nowstr(), 'message': text} From d75ee14bd82094a6887def7a7955cb0e66fd25ca Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Wed, 12 Jun 2024 15:24:25 -0400 Subject: [PATCH 20/21] set directory for removal of block manifest file --- sarracenia/flowcb/block_reassembly.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/sarracenia/flowcb/block_reassembly.py b/sarracenia/flowcb/block_reassembly.py index 359f2b11f..45f3e2d49 100755 --- a/sarracenia/flowcb/block_reassembly.py +++ b/sarracenia/flowcb/block_reassembly.py @@ -196,7 +196,11 @@ def after_work(self, worklist) -> None: byteCount = m['blocks']['manifest'][blkno]['size'] logger.info( f" blocks: adding block {blkno} by seeking to: {offset} to write {byteCount} bytes in {root_file}" ) - logger.info( f" still waiting for: {len(old_blocks['waiting'])} " ) + if len(old_blocks['waiting']) > 0 : + logger.info( f" still waiting for: {len(old_blocks['waiting'])} " ) + else: + logger.info( f" we have received every block now." ) + #- {old_blocks['waiting']} " ) # FIXME: can seek ever fail? how do we check? @@ -245,10 +249,13 @@ def after_work(self, worklist) -> None: logger.info( f"completed reassembly of {m['relPath']}" ) new_ok.append(m) if hasattr(self.o, 'block_manifest_delete') and self.o.block_manifest_delete: - manifest = m['new_file'] + "§block_manifest§" + manifest = m['new_dir'] + os.sep + m['new_file'] + "§block_manifest§" + logger.info( f"should delete manifest: {manifest}") if os.path.exists(manifest): logger.info( f"deleting {manifest}") os.unlink(manifest) + else: + logger.info( "it did not exist!" ) else: logger.info( f"persisting {root_file} manifest.") del old_blocks['waiting'] From 79d685e3f4ec2e7b6b09fca41628857ef31f176a Mon Sep 17 00:00:00 2001 From: Peter Silva Date: Wed, 12 Jun 2024 15:56:56 -0400 Subject: [PATCH 21/21] reducing logging to reasonable level --- sarracenia/blockmanifest.py | 3 --- sarracenia/flowcb/block_reassembly.py | 15 +++++---------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/sarracenia/blockmanifest.py b/sarracenia/blockmanifest.py index 3e418775a..b543aced8 100755 --- a/sarracenia/blockmanifest.py +++ b/sarracenia/blockmanifest.py @@ -130,12 +130,9 @@ def persist(self): return if self.new_x and (self.new_x != self.x): - logger.info( f"overwriting" ) self.fd.seek(0) self.fd.write(json.dumps(self.new_x,sort_keys=True,indent=4)) self.fd.truncate() - else: - logger.info( f"closing unchanged" ) self.fd.close() self.lock.unlock() diff --git a/sarracenia/flowcb/block_reassembly.py b/sarracenia/flowcb/block_reassembly.py index 45f3e2d49..6a38420ff 100755 --- a/sarracenia/flowcb/block_reassembly.py +++ b/sarracenia/flowcb/block_reassembly.py @@ -165,7 +165,7 @@ def after_work(self, worklist) -> None: # update old_blocks to reflect receipt of this block. if old_blocks and 'manifest' in old_blocks: - logger.info( f" read {len(old_blocks['manifest'])} blocks in manifest, waiting for {len(old_blocks['waiting'])} " ) + logger.debug( f" read {len(old_blocks['manifest'])} blocks in manifest, waiting for {len(old_blocks['waiting'])} " ) logger.debug( f" read old block manifest from attributes: {old_blocks['manifest']}" ) logger.debug( f" also show waiting: {old_blocks['waiting']}" ) found=False @@ -182,7 +182,6 @@ def after_work(self, worklist) -> None: if blkno in old_blocks['waiting']: del old_blocks['waiting'][blkno] - logger.info( f"deleted block {blkno} from waiting: {len(old_blocks['waiting'])} left. ") # calculate where to seek to... offset=0 @@ -196,10 +195,10 @@ def after_work(self, worklist) -> None: byteCount = m['blocks']['manifest'][blkno]['size'] logger.info( f" blocks: adding block {blkno} by seeking to: {offset} to write {byteCount} bytes in {root_file}" ) - if len(old_blocks['waiting']) > 0 : - logger.info( f" still waiting for: {len(old_blocks['waiting'])} " ) - else: - logger.info( f" we have received every block now." ) + #if len(old_blocks['waiting']) > 0 : + # logger.info( f" still waiting for: {len(old_blocks['waiting'])} " ) + #else: + # logger.info( f" we have received every block now." ) #- {old_blocks['waiting']} " ) @@ -250,14 +249,10 @@ def after_work(self, worklist) -> None: new_ok.append(m) if hasattr(self.o, 'block_manifest_delete') and self.o.block_manifest_delete: manifest = m['new_dir'] + os.sep + m['new_file'] + "§block_manifest§" - logger.info( f"should delete manifest: {manifest}") if os.path.exists(manifest): logger.info( f"deleting {manifest}") os.unlink(manifest) - else: - logger.info( "it did not exist!" ) else: - logger.info( f"persisting {root_file} manifest.") del old_blocks['waiting'] with sarracenia.blockmanifest.BlockManifest(root_file) as rfm: rfm.set(old_blocks)