diff --git a/docs/source/Contribution/v03.rst b/docs/source/Contribution/v03.rst index 9f5d4d324..3523f372c 100644 --- a/docs/source/Contribution/v03.rst +++ b/docs/source/Contribution/v03.rst @@ -137,12 +137,12 @@ sr3 code:: 16 ./flow/winnow.py 793 ./__init__.py 226 ./instance.py - 36 ./integrity/arbitrary.py - 93 ./integrity/__init__.py - 33 ./integrity/md5name.py - 24 ./integrity/md5.py - 17 ./integrity/random.py - 24 ./integrity/sha512.py + 36 ./identity/arbitrary.py + 93 ./identity/__init__.py + 33 ./identity/md5name.py + 24 ./identity/md5.py + 17 ./identity/random.py + 24 ./identity/sha512.py 17 ./moth/amq1.py 585 ./moth/amqp.py 313 ./moth/__init__.py @@ -274,7 +274,7 @@ the two versions, is clear: | | | | sr_message.py | | +--------------------------+---------------------------+ -| sr_checksum.py | integrity/ | +| sr_checksum.py | identity/ | | | __init__.py | | sum/* | * | +--------------------------+---------------------------+ @@ -395,7 +395,7 @@ Known Problems (Solved in sr3) sarra.tmpc.*, sr.py ) using normal imports. likely need to refactor how checksum plugin mechanism works then try again. - totally refactored now. Integrity class is normal, and separate from flowcb. + totally refactored now. Identity class is normal, and separate from flowcb. Concrete Plan (Done) @@ -588,7 +588,7 @@ Items from the TODO list that have been addressed. so Plugin becomes a class instantiated in sarra/__init__.py... puts plugins and built-in code on a more even level... for example how do plugin transfer protocols work? thinking... This is sort of done - now: plugin became flowcb. Integrity is removed from the hierarchy. + now: plugin became flowcb. Identity is removed from the hierarchy. Class extension is now a separate kind of plugin (via import) * change default topic_prefix to v03.post done 2021/02 @@ -874,7 +874,7 @@ Features * The extension API is now vanilla python with no magic settings. just standard classes, using standard import mechanism. debugging should be much simpler now as the interpreter will provide much better error messages on startup. - The v2 style plugins are now called *flow callbacks*, and there are a number of classes (integrity, moth, + The v2 style plugins are now called *flow callbacks*, and there are a number of classes (identity, moth, transfer, perhaps flow) that permit extension by straightforward sub-classing. This should make it much easier to add additional protocols for transport and messages, as well checksum algorithms for new data types. @@ -897,7 +897,7 @@ Features * FlowCB plugin entry_points are now based on groups of notification messages, rather than individual ones, allowing people to organize concurrent work. -* integrity (checksums) are now plugins. +* identity (checksums) are now plugins. * gather (inlet? sources of notification messages) are now plugins. diff --git a/docs/source/Explanation/CommandLineGuide.rst b/docs/source/Explanation/CommandLineGuide.rst index 1b46ec528..b54a94680 100644 --- a/docs/source/Explanation/CommandLineGuide.rst +++ b/docs/source/Explanation/CommandLineGuide.rst @@ -314,8 +314,8 @@ View all configuration settings (the result of all parsing... what the flow comp 'inlineEncoding': 'guess', 'inlineOnly': False, 'instances': 1, - 'integrity_arbitrary_value': None, - 'integrity_method': 'sha512', + 'identity_arbitrary_value': None, + 'identity_method': 'sha512', 'logEvents': {'after_work', 'after_accept', 'on_housekeeping'}, 'logFormat': '%(asctime)s [%(levelname)s] %(name)s %(funcName)s %(message)s', 'logLevel': 'info', @@ -931,8 +931,8 @@ Polling is doing the same job as a post, except for files on a remote server. In the case of a poll, the post will have its url built from the *pollUrl* option, with the product's path (*directory*/"matched file"). There is one post per file. The file's size is taken from the directory "ls"... but its -checksum cannot be determined, so the default integrity method is "cod", asking -clients to calculate the integrity Checksum On Download. +checksum cannot be determined, so the default identity method is "cod", asking +clients to calculate the identity Checksum On Download. By default, sr_poll sends its post notification message to the broker with default exchange (the prefix *xs_* followed by the broker username). The *post_broker* is mandatory. @@ -1563,7 +1563,7 @@ WINNOW the **winnow** component subscribes to file notification messages and reposts them, suppressing redundant ones. How to decide which ones are redundant varies by use case. In the most straight-forward case, -the messages have **Integrity** header stores a file's fingerprint as described in the `sr_post(7) <../Reference/sr_post.7.html>`_ man page, +the messages have **Identity** header stores a file's fingerprint as described in the `sr_post(7) <../Reference/sr_post.7.html>`_ man page, and header is used exclusively. There are many other use cases, though. discussed in the following section on `Duplicate Suppression `_ @@ -2351,11 +2351,11 @@ the directory will be checked for new files. Here is part of the Script callbac return [] -Integrity +Identity --------- One can use the *import* directive to add new checksum algorithms by sub-classing -sarracenia.integrity.Integrity. +sarracenia.identity.Identity. Transfer -------- diff --git a/docs/source/Explanation/Concepts.rst b/docs/source/Explanation/Concepts.rst index 6f1eafe89..0ba1a295b 100644 --- a/docs/source/Explanation/Concepts.rst +++ b/docs/source/Explanation/Concepts.rst @@ -63,7 +63,7 @@ In more detail: The main components of the python implementation of Sarracenia all implement the same algorithm described above. The algorithm has various points where custom processing can be inserted (using flowCallbacks), -or deriving classes from flow, integrity, or transfer classes. +or deriving classes from flow, identity, or transfer classes. The components just have different default settings: diff --git a/docs/source/Explanation/DuplicateSuppression.rst b/docs/source/Explanation/DuplicateSuppression.rst index daf230354..a74d4ebe1 100644 --- a/docs/source/Explanation/DuplicateSuppression.rst +++ b/docs/source/Explanation/DuplicateSuppression.rst @@ -23,8 +23,8 @@ Duplicate suppression must: Duplicates are dropped to avoid further processing. -A notification message key is preferably derived from the *Integrity* field of the notification -message. If the producer does not provide an integrity checksum, algorithms may fall +A notification message key is preferably derived from the *Identity* field of the notification +message. If the producer does not provide an identity checksum, algorithms may fall back on other metadata: *mtime*, *size*, *pubTime.* Since pubTime is a mandatory field, a key can always be derived, but it's effectiveness in a particular use case is not assured. (the sarracenia.flowcb.nodupe.NoDupe.deriveKey(self,msg) helper routine @@ -47,7 +47,7 @@ Standard (path and data oriented) **method**: when products have the same key and path, they are duplicates. Two routes can receive the same product, with the same relative path. In normal processing, -the products should be identical, and *Integrity* checksums for it should be the same, +the products should be identical, and *Identity* checksums for it should be the same, @@ -94,7 +94,7 @@ or:: Override the standard duplicate suppression key generation to use only the file name. When multiple sources produce a product, but the result is not binary identical, and no -appropriate Integrity method is available, then then one needs a different approach. +appropriate Identity method is available, then then one needs a different approach. Since the two sources are not, generally, synchronized, URP diff --git a/docs/source/Explanation/History/mesh_gts.rst b/docs/source/Explanation/History/mesh_gts.rst index 5f63e83c7..d27ec59a8 100644 --- a/docs/source/Explanation/History/mesh_gts.rst +++ b/docs/source/Explanation/History/mesh_gts.rst @@ -291,7 +291,7 @@ The requirements for a store and forward system: - TCP/IP connectivity, - real-time data transmission, - per destination queueing to allow asynchrony (clients that operate at different speeds or have transient issues), -- application level integrity guarantees. +- application level identity guarantees. In addition, the ability to tune subscriptions, according to the client's interest will further optimize traffic. diff --git a/docs/source/Explanation/History/messages_v03.rst b/docs/source/Explanation/History/messages_v03.rst index 04857af2d..48d5f3076 100644 --- a/docs/source/Explanation/History/messages_v03.rst +++ b/docs/source/Explanation/History/messages_v03.rst @@ -34,7 +34,7 @@ pairs. * v02 fixed fields are now "pubTime", "baseURL", and "relPath" keys in the JSON object that is the messge body. - * v02 *sum* header with hex encoded value, is replaced by v03 *integrity* header with base64 encoding. + * v02 *sum* header with hex encoded value, is replaced by v03 *identity* header with base64 encoding. * v03 *content* header allows file content embedding. diff --git a/docs/source/Explanation/SarraPluginDev.rst b/docs/source/Explanation/SarraPluginDev.rst index 6981b6b81..5c0d6eb1c 100644 --- a/docs/source/Explanation/SarraPluginDev.rst +++ b/docs/source/Explanation/SarraPluginDev.rst @@ -33,7 +33,7 @@ build new ones in a copy/paste manner, with many samples being available to read There are other ways to extend Sarracenia v3 by subclassing of: * Sarracenia.transfer.Transfer to add more data transfer protocols -* Sarracenia.integrity.Integrity to add more checksumming methods. +* Sarracenia.identity.Identity to add more checksumming methods. * Sarracenia.moth.Moth to add support for more messaging protocols. * Sarracenia.flow.Flow to create new flows. * Sarracenia.flowcb.FlowCB to add custom callback routines to flows. @@ -341,7 +341,7 @@ One can add additional functionality to Sarracenia by creating subclassing. * sarra.moth - Messages Organized into Topic Hierarchies. (existing ones: rabbitmq-amqp) -* sarra.integrity - checksum algorithms ( existing ones: md5, sha512, arbitrary, random ) +* sarra.identity - checksum algorithms ( existing ones: md5, sha512, arbitrary, random ) * sarra.transfer - additional transport protocols (https, ftp, sftp ) @@ -457,7 +457,7 @@ self is the notification message being processed. variables variables most used: for non data download file operations, such as creation of symbolic links, file renames and removals. content described in `sr_post(7) <../Reference/sr_post.7.html>`_ -*msg['integrity']* +*msg['identity']* The checksum structure, a python dictionary with 'method' and 'value' fields. *msg['subtopic'], msg['new_subtopic']* @@ -475,7 +475,7 @@ self is the notification message being processed. variables variables most used: For example, all of the *new_* fields are in the *_deleteOnPost* by default. *msg['onfly_checksum'], msg['data_checksum']* - the value of an *Integrity* checksum field calculated as data is downloaded. + the value of an *Identity* checksum field calculated as data is downloaded. In the case where data is modified while downloading, the *onfly_checksum* is to verify that the upstream data was correctly received, while the *data_checksum* is calculated for downstream consumers. @@ -932,7 +932,7 @@ Examples of things that would be fun to do with plugins: - add additional message protocols (sub-classing Moth) -- additional checksums, subclassing Integrity. For example, to get GOES DCP +- additional checksums, subclassing Identity. For example, to get GOES DCP data from sources such as USGS Sioux Falls, the reports have a trailer that shows some antenna statistics from the reception site. So if one receives GOES DCP from Wallops, for example, the trailer will be different diff --git a/docs/source/How2Guides/FlowCallbacks.rst b/docs/source/How2Guides/FlowCallbacks.rst index f6d50b077..8469f2412 100644 --- a/docs/source/How2Guides/FlowCallbacks.rst +++ b/docs/source/How2Guides/FlowCallbacks.rst @@ -424,7 +424,7 @@ It's a good idea to look at the sarracenia source code itself. For example: reception of notification messages from message queue protocol flows. * *sarracenia.flowcb.nodupe.NoDupe* This modules removes duplicates from message - flows based on Integrity checksums. + flows based on Identity checksums. * *sarracenia.flowcb.post.message.Message* is a class that implements posting notification messages to Message queue protocol flows diff --git a/docs/source/How2Guides/UPGRADING.rst b/docs/source/How2Guides/UPGRADING.rst index 57055b257..0f4efdf0a 100644 --- a/docs/source/How2Guides/UPGRADING.rst +++ b/docs/source/How2Guides/UPGRADING.rst @@ -39,6 +39,13 @@ Installation Instructions git --- +*CHANGE*: v03 postformat field renamed: "integrity" is now "identity" + + * current version will read messsages with *integrity* and map them to *identity*. + * current version will post with *identity*, so older versions will miss them. + * https://github.com/MetPX/sarracenia/issues/703 + + 3.0.40 ------ @@ -125,13 +132,13 @@ git *CHANGE*: The "Vendor" string is now "MetPX" instead of "science.gc.ca". This affects some file placement particularly on Windows. -*CHANGE*: v03 notification message encoding changed: *Integrity* checksum is now optional. +*CHANGE*: v03 notification message encoding changed: *Identity* checksum is now optional. (details: https://github.com/MetPX/sarracenia/issues/547 ) *md5sum* is no longer defined, replaced with *none* in sr3. *CHANGE*: v03 notification message encoding changed for symbolic links, and file renames and removals. There is now a 'fileOp' field for these dataless file operations. - The *Integrity* sum is now used exclusively for checksums. + The *Identity* sum is now used exclusively for checksums. 3.0.15 @@ -336,7 +343,7 @@ V2 to Sr3 queue_name queueName report_back report source_from_exchange sourceFromExchange - sum integrity + sum identity suppress_duplicates nodupe_ttl suppress_duplicates_basis nodupe_basis topic_prefix topicPrefix diff --git a/docs/source/How2Guides/v2ToSr3.rst b/docs/source/How2Guides/v2ToSr3.rst index fa2945104..f4d958cf6 100644 --- a/docs/source/How2Guides/v2ToSr3.rst +++ b/docs/source/How2Guides/v2ToSr3.rst @@ -331,12 +331,12 @@ In general, v3 plugins: msg.exchange msg['exchange'] the channel on which the message was received. msg.logger logger pythonic logging setup describe above. msg.parts msg['size'] just omit, use sarracenia.Message constructor. - msg.sumflg msg['integrity'] just omit, use sarracenia.Message constructor. + msg.sumflg msg['identity'] just omit, use sarracenia.Message constructor. msg.sumstr v2wrapper.sumstrFromMessage(msg) the literal string for a v2 checksum field. parent.msg worklist.incoming v2 is 1 message at a time, sr3 has lists or messages. ================ ================================== ========================================================== -* the pubTime, baseUrl, relPath, retrievePath, size, integrity, are all standard message fields +* the pubTime, baseUrl, relPath, retrievePath, size, identity, are all standard message fields better described in `sr_post(7) <../Reference/sr_post.7.html>`_ * if one needs to store per message state, then one can declare temporary fields in the message, diff --git a/docs/source/Reference/code.rst b/docs/source/Reference/code.rst index ccdc4bd98..af6eb22c9 100644 --- a/docs/source/Reference/code.rst +++ b/docs/source/Reference/code.rst @@ -159,46 +159,46 @@ sarracenia.instance :private-members: :special-members: -sarracenia.integrity +sarracenia.identity -------------------- -.. automodule:: sarracenia.integrity +.. automodule:: sarracenia.identity :show-inheritance: :members: :private-members: :special-members: -sarracenia.integrity.arbitrary +sarracenia.identity.arbitrary ------------------------------ -.. automodule:: sarracenia.integrity.arbitrary +.. automodule:: sarracenia.identity.arbitrary :show-inheritance: :members: :private-members: :special-members: -sarracenia.integrity.sha512 +sarracenia.identity.sha512 --------------------------- -.. automodule:: sarracenia.integrity.sha512 +.. automodule:: sarracenia.identity.sha512 :show-inheritance: :members: :private-members: :special-members: -sarracenia.integrity.md5 +sarracenia.identity.md5 ------------------------ -.. automodule:: sarracenia.integrity.md5 +.. automodule:: sarracenia.identity.md5 :show-inheritance: :members: :private-members: :special-members: -sarracenia.integrity.random +sarracenia.identity.random --------------------------- -.. automodule:: sarracenia.integrity.random +.. automodule:: sarracenia.identity.random :show-inheritance: :members: :private-members: diff --git a/docs/source/Reference/sr3_options.7.rst b/docs/source/Reference/sr3_options.7.rst index 79ce41179..51556aca9 100644 --- a/docs/source/Reference/sr3_options.7.rst +++ b/docs/source/Reference/sr3_options.7.rst @@ -980,13 +980,13 @@ In directory ~/.cache/sarra/log:: loss of notifications. A queue which is not accessed for a long (implementation dependent) period will be destroyed. -integrity +identity ------------------ All file notification messages include a checksum. It is placed in the amqp message header will have as an entry *sum* with default value 'd,md5_checksum_on_data'. The *sum* option tell the program how to calculate the checksum. -In v3, they are called Integrity methods:: +In v3, they are called Identity methods:: cod,x - Calculate On Download applying x sha512 - do SHA512 on file content (default) diff --git a/docs/source/Reference/sr3_post.1.rst b/docs/source/Reference/sr3_post.1.rst index b68d22871..33f76bced 100644 --- a/docs/source/Reference/sr3_post.1.rst +++ b/docs/source/Reference/sr3_post.1.rst @@ -290,11 +290,11 @@ nodupe_ttl on|off|999 used ( set to a value other than 0 ) as otherwise blocksize will vary as files grow, and much duplicate data transfer will result. -integrity [,] +identity [,] ---------------------------- All file notification messages include a checksum. The *sum* option specifies how to calculate the it. -It is a comma separated string. Valid Integrity methods are :: +It is a comma separated string. Valid Identity methods are :: cod,x - Calculate On Download applying x sha512 - do SHA512 on file content (default) diff --git a/docs/source/Reference/sr_post.7.rst b/docs/source/Reference/sr_post.7.rst index 05d2962dc..11db62583 100644 --- a/docs/source/Reference/sr_post.7.rst +++ b/docs/source/Reference/sr_post.7.rst @@ -67,7 +67,7 @@ The headers are an array of name:value pairs:: one of: - "integrity" - for changes in file contents, an integrity checksum. + "identity" - for changes in file contents, an identifier for de-duplication purposes. { "method" : "md5" | "sha512" | "cod" | "random" , "value" : "base64 encoded checksum value" @@ -85,7 +85,7 @@ The headers are an array of name:value pairs:: nothing... If neither of these is present, then duplication suppression will rely on supplied meta data, such as the modification time, the size, and the publication Time to prevent loops. - It is strongly recommended that all data services provide integrity + It is strongly recommended that all data services provide identity checksums. Failure to do so results in a data service than cannot be reliably replicated. @@ -114,7 +114,7 @@ The headers are an array of name:value pairs:: "rename" - name to write file locally. "retrievePath" - relative retrieval path can be catenated to to override relPath used for API cases. - "topic" - copy of topic from AMQP header (usually omitted) + "topic" - copy of topic from AMQP header (in the envelope in protocol messages) "source" - the originating entity of the notification message. "from_cluster" - the originating cluster of a notification message. "to_clusters" - a destination specification. @@ -169,8 +169,8 @@ Source Filtering (use of TOPIC_ exchanges) are forwarded to the client. When there are many users interested in only small subsets of data, the savings in traffic are large. -Fingerprint Winnowing (use of the integrity_ header) - Each product has an integrity fingerprint and size intended to identify it uniquely, +Fingerprint Winnowing (use of the identity_ header) + Each product has an identity fingerprint and size intended to identify it uniquely, referred to as a *fingerprint*. If two files have the same fingerprint, they are considered equivalent. In cases where multiple sources of equivalent data are available but downstream consumers would prefer to receive single notification messages @@ -388,15 +388,15 @@ Remaining fields only useful for partitioned files. will have 'rename' and 'link' or 'directory' elements in that case. -**integrity** +**identity** ~~~~~~~~~~~~~ -The integrity field gives a checksum useful for identifying the contents +The identity field gives a checksum useful for identifying the contents of a file:: - "integrity" : { "method" : , "value": } + "identity" : { "method" : , "value": } -The integrity field is a signature computed to allow receivers to determine +The identity field is a signature computed to allow receivers to determine if they have already downloaded the product from elsewhere. ** - string field indicating the checksum method used. @@ -556,7 +556,7 @@ EXAMPLE AMQP TOPIC: v03.NRDPS.GIF MQTT TOPIC: exchange/v03/NRDPS/GIF/ Body: { "pubTime": "201506011357.345", "baseUrl": "sftp://afsiext@cmcdataserver", "relPath": "/data/NRPDS/outputs/NRDPS_HiRes_000.gif", - "rename": "NRDPS/GIF/", "parts":"p,457,1,0,0", "integrity" : { "method":"md5", "value":"" }, "source": "ec_cmc" } + "rename": "NRDPS/GIF/", "parts":"p,457,1,0,0", "identity" : { "method":"md5", "value":"" }, "source": "ec_cmc" } - v03 - version of protocol - post - indicates the type of notification message diff --git a/docs/source/Tutorials/2_CLI_with_flowcb_demo.ipynb b/docs/source/Tutorials/2_CLI_with_flowcb_demo.ipynb index 2eaeac3be..1bfc0ac23 100644 --- a/docs/source/Tutorials/2_CLI_with_flowcb_demo.ipynb +++ b/docs/source/Tutorials/2_CLI_with_flowcb_demo.ipynb @@ -669,7 +669,7 @@ "\n", "* [sarracenia.flowcb.gather.nodupe](https://github.com/MetPX/Sarracenia/blob/v03_wip/sarracenia/flowcb/nodupe)\n", "This modules removes duplicates from message\n", - " flows based on Integrity checksums.\n", + " flows based on Identity checksums.\n", "\n", "* [sarracenia.flowcb.post.message](https://github.com/MetPX/Sarracenia/blob/v03_wip/sarracenia/flowcb/post/message.py)\n", "is a class that implements posting\n", diff --git a/docs/source/Tutorials/3_api_flow_demo.ipynb b/docs/source/Tutorials/3_api_flow_demo.ipynb index 6b8162160..baebdcc2c 100644 --- a/docs/source/Tutorials/3_api_flow_demo.ipynb +++ b/docs/source/Tutorials/3_api_flow_demo.ipynb @@ -161,21 +161,21 @@ "2023-05-27 12:03:20,181 [DEBUG] sarracenia.config add_option logEvents declared as type: value:{'after_work', 'after_accept', 'on_housekeeping'}\n", "2023-05-27 12:03:20,181 [DEBUG] sarracenia.config add_option logMessageDump declared as type: value:False\n", "2023-05-27 12:03:20,181 [INFO] sarracenia.flowcb.log __init__ subscribe initialized with: {'after_work', 'after_accept', 'on_housekeeping'}\n", - "2023-05-27 12:03:20,182 [DEBUG] sarracenia.config check_undeclared_options missing defaults: {'reconnect', 'MemoryMultiplier', 'blocksize', 'cluster', 'source', 'post_exchange', 'post_exchangeSplit', 'action', 'realpathFilter', 'follow_symlinks', 'inplace', 'sendTo', 'force_polling', 'notify_only', 'pollUrl', 'exchange_suffix', 'retry_driver', 'MemoryBaseLineFile', 'count', 'MemoryMax', 'header', 'post_exchangeSuffix', 'nodupe_basis', 'restore', 'logMessageDump', 'report_exchange', 'feeder', 'post_on_start', 'save', 'integrity', 'exchangeSplit'}\n", + "2023-05-27 12:03:20,182 [DEBUG] sarracenia.config check_undeclared_options missing defaults: {'reconnect', 'MemoryMultiplier', 'blocksize', 'cluster', 'source', 'post_exchange', 'post_exchangeSplit', 'action', 'realpathFilter', 'follow_symlinks', 'inplace', 'sendTo', 'force_polling', 'notify_only', 'pollUrl', 'exchange_suffix', 'retry_driver', 'MemoryBaseLineFile', 'count', 'MemoryMax', 'header', 'post_exchangeSuffix', 'nodupe_basis', 'restore', 'logMessageDump', 'report_exchange', 'feeder', 'post_on_start', 'save', 'identity', 'exchangeSplit'}\n", "2023-05-27 12:03:20,182 [INFO] sarracenia.flow run callbacks loaded: ['sarracenia.flowcb.gather.message.Message', 'sarracenia.flowcb.retry.Retry', 'sarracenia.flowcb.housekeeping.resources.Resources', 'log']\n", "2023-05-27 12:03:20,182 [INFO] sarracenia.flow run pid: 1682497 subscribe/flow_demo instance: 0\n", - "2023-05-27 12:03:20,199 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'local_offset', '_format', 'exchange', 'ack_id', 'subtopic'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_553d269d71c2c2d52a9b4968e509fef5:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'source': 'WXO-DD', 'mtime': '20230527T153759.475', 'atime': '20230527T153759.475', 'pubTime': '20230527T153759.475', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/observations/swob-ml/20230527/CTNK/2023-05-27-1537-CTNK-AUTO-minute-swob.xml', 'subtopic': ['20230527', 'WXO-DD', 'observations', 'swob-ml', '20230527', 'CTNK'], 'integrity': {'method': 'md5', 'value': 'YkkEEWfkyty+UOIP7fEUag=='}, 'size': 9597, 'exchange': 'xpublic', 'ack_id': 1, 'local_offset': 0}\n", + "2023-05-27 12:03:20,199 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'local_offset', '_format', 'exchange', 'ack_id', 'subtopic'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_553d269d71c2c2d52a9b4968e509fef5:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'source': 'WXO-DD', 'mtime': '20230527T153759.475', 'atime': '20230527T153759.475', 'pubTime': '20230527T153759.475', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/observations/swob-ml/20230527/CTNK/2023-05-27-1537-CTNK-AUTO-minute-swob.xml', 'subtopic': ['20230527', 'WXO-DD', 'observations', 'swob-ml', '20230527', 'CTNK'], 'identity': {'method': 'md5', 'value': 'YkkEEWfkyty+UOIP7fEUag=='}, 'size': 9597, 'exchange': 'xpublic', 'ack_id': 1, 'local_offset': 0}\n", "2023-05-27 12:03:20,199 [INFO] sarracenia.flowcb.log after_accept accepted: (lag: 1520.72 ) https://hpfx.collab.science.gc.ca /20230527/WXO-DD/observations/swob-ml/20230527/CTNK/2023-05-27-1537-CTNK-AUTO-minute-swob.xml \n", "2023-05-27 12:03:20,199 [INFO] sarracenia.flow run now active on vip None\n", "2023-05-27 12:03:20,200 [DEBUG] sarracenia.config add_option accelWgetCommand declared as type: value:/usr/bin/wget %s -o - -O %d\n", "2023-05-27 12:03:20,277 [INFO] sarracenia.flowcb.log after_work downloaded ok: /tmp/flow_demo/2023-05-27-1537-CTNK-AUTO-minute-swob.xml \n", - "2023-05-27 12:03:20,304 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'local_offset', '_format', 'exchange', 'ack_id', 'subtopic'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_4e7603f7cb74d0193e5456a61042fca5:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'source': 'WXO-DD', 'mtime': '20230527T153759.474', 'atime': '20230527T153759.474', 'pubTime': '20230527T153759.474', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/observations/swob-ml/20230527/CWST/2023-05-27-1537-CWST-AUTO-minute-swob.xml', 'subtopic': ['20230527', 'WXO-DD', 'observations', 'swob-ml', '20230527', 'CWST'], 'integrity': {'method': 'md5', 'value': 'kaitX64I5bxPNIpELUQ7/A=='}, 'size': 9824, 'exchange': 'xpublic', 'ack_id': 2, 'local_offset': 0}\n", + "2023-05-27 12:03:20,304 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'local_offset', '_format', 'exchange', 'ack_id', 'subtopic'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_4e7603f7cb74d0193e5456a61042fca5:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'source': 'WXO-DD', 'mtime': '20230527T153759.474', 'atime': '20230527T153759.474', 'pubTime': '20230527T153759.474', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/observations/swob-ml/20230527/CWST/2023-05-27-1537-CWST-AUTO-minute-swob.xml', 'subtopic': ['20230527', 'WXO-DD', 'observations', 'swob-ml', '20230527', 'CWST'], 'identity': {'method': 'md5', 'value': 'kaitX64I5bxPNIpELUQ7/A=='}, 'size': 9824, 'exchange': 'xpublic', 'ack_id': 2, 'local_offset': 0}\n", "2023-05-27 12:03:20,305 [INFO] sarracenia.flowcb.log after_accept accepted: (lag: 1520.83 ) https://hpfx.collab.science.gc.ca /20230527/WXO-DD/observations/swob-ml/20230527/CWST/2023-05-27-1537-CWST-AUTO-minute-swob.xml \n", "2023-05-27 12:03:20,390 [INFO] sarracenia.flowcb.log after_work downloaded ok: /tmp/flow_demo/2023-05-27-1537-CWST-AUTO-minute-swob.xml \n", - "2023-05-27 12:03:20,416 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'local_offset', '_format', 'exchange', 'ack_id', 'subtopic'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_4ab40f078eb13197b272abb84cac6888:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'source': 'WXO-DD', 'mtime': '20230527T153759.474', 'atime': '20230527T153759.474', 'pubTime': '20230527T153759.474', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/observations/swob-ml/20230527/CWKH/2023-05-27-1537-CWKH-AUTO-minute-swob.xml', 'subtopic': ['20230527', 'WXO-DD', 'observations', 'swob-ml', '20230527', 'CWKH'], 'integrity': {'method': 'md5', 'value': 'r1E+zSg4n8PaB81Mnq6POw=='}, 'size': 6125, 'exchange': 'xpublic', 'ack_id': 3, 'local_offset': 0}\n", + "2023-05-27 12:03:20,416 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'local_offset', '_format', 'exchange', 'ack_id', 'subtopic'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_4ab40f078eb13197b272abb84cac6888:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'source': 'WXO-DD', 'mtime': '20230527T153759.474', 'atime': '20230527T153759.474', 'pubTime': '20230527T153759.474', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/observations/swob-ml/20230527/CWKH/2023-05-27-1537-CWKH-AUTO-minute-swob.xml', 'subtopic': ['20230527', 'WXO-DD', 'observations', 'swob-ml', '20230527', 'CWKH'], 'identity': {'method': 'md5', 'value': 'r1E+zSg4n8PaB81Mnq6POw=='}, 'size': 6125, 'exchange': 'xpublic', 'ack_id': 3, 'local_offset': 0}\n", "2023-05-27 12:03:20,417 [INFO] sarracenia.flowcb.log after_accept accepted: (lag: 1520.94 ) https://hpfx.collab.science.gc.ca /20230527/WXO-DD/observations/swob-ml/20230527/CWKH/2023-05-27-1537-CWKH-AUTO-minute-swob.xml \n", "2023-05-27 12:03:20,507 [INFO] sarracenia.flowcb.log after_work downloaded ok: /tmp/flow_demo/2023-05-27-1537-CWKH-AUTO-minute-swob.xml \n", - "2023-05-27 12:03:20,530 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'local_offset', '_format', 'exchange', 'ack_id', 'subtopic'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_575c5ec63a2fe354c27ff82f3cf9181f:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'source': 'WXO-DD', 'mtime': '20230527T153759.476', 'atime': '20230527T153759.476', 'pubTime': '20230527T153759.476', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/observations/swob-ml/20230527/CVSJ/2023-05-27-1537-CVSJ-AUTO-minute-swob.xml', 'subtopic': ['20230527', 'WXO-DD', 'observations', 'swob-ml', '20230527', 'CVSJ'], 'integrity': {'method': 'md5', 'value': 'H2iMpFuVhCmP39IeGNwx9g=='}, 'size': 9440, 'exchange': 'xpublic', 'ack_id': 4, 'local_offset': 0}\n" + "2023-05-27 12:03:20,530 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'local_offset', '_format', 'exchange', 'ack_id', 'subtopic'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_575c5ec63a2fe354c27ff82f3cf9181f:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230527153757', 'source': 'WXO-DD', 'mtime': '20230527T153759.476', 'atime': '20230527T153759.476', 'pubTime': '20230527T153759.476', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/observations/swob-ml/20230527/CVSJ/2023-05-27-1537-CVSJ-AUTO-minute-swob.xml', 'subtopic': ['20230527', 'WXO-DD', 'observations', 'swob-ml', '20230527', 'CVSJ'], 'identity': {'method': 'md5', 'value': 'H2iMpFuVhCmP39IeGNwx9g=='}, 'size': 9440, 'exchange': 'xpublic', 'ack_id': 4, 'local_offset': 0}\n" ] }, { diff --git a/docs/source/Tutorials/4_api_moth_sub_demo.ipynb b/docs/source/Tutorials/4_api_moth_sub_demo.ipynb index 976de60c9..8820dd867 100644 --- a/docs/source/Tutorials/4_api_moth_sub_demo.ipynb +++ b/docs/source/Tutorials/4_api_moth_sub_demo.ipynb @@ -174,15 +174,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "message 0: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'exchange', 'ack_id', 'local_offset', '_format'}, 'sundew_extension': 'CMC:REGIONAL:GRIB2:BIN::20230527145518', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_34df392aeffc9c678011f3fd30193bb6:CMC:REGIONAL:GRIB2:BIN::20230527145518', 'source': 'WXO-DD', 'mtime': '20230527T145520.791', 'atime': '20230527T145520.791', 'pubTime': '20230527T145520.791', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/model_gem_regional/10km/grib2/12/037/CMC_reg_WIND_ISBL_30_ps10km_2023052712_P037.grib2', 'subtopic': ['20230527', 'WXO-DD', 'model_gem_regional', '10km', 'grib2', '12', '037'], 'integrity': {'method': 'md5', 'value': 'U1vVZnatrCeK3bLrXshb2g=='}, 'size': 554100, 'exchange': 'xpublic', 'ack_id': 1, 'local_offset': 0}\n", + "message 0: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'exchange', 'ack_id', 'local_offset', '_format'}, 'sundew_extension': 'CMC:REGIONAL:GRIB2:BIN::20230527145518', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_34df392aeffc9c678011f3fd30193bb6:CMC:REGIONAL:GRIB2:BIN::20230527145518', 'source': 'WXO-DD', 'mtime': '20230527T145520.791', 'atime': '20230527T145520.791', 'pubTime': '20230527T145520.791', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/model_gem_regional/10km/grib2/12/037/CMC_reg_WIND_ISBL_30_ps10km_2023052712_P037.grib2', 'subtopic': ['20230527', 'WXO-DD', 'model_gem_regional', '10km', 'grib2', '12', '037'], 'identity': {'method': 'md5', 'value': 'U1vVZnatrCeK3bLrXshb2g=='}, 'size': 554100, 'exchange': 'xpublic', 'ack_id': 1, 'local_offset': 0}\n", "first 50 bytes of corresponding file: b'GRIB\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x00\\x00\\x08tt\\x00\\x00\\x00\\x15\\x01\\x006\\x00\\x00\\x04\\x00\\x01\\x07\\xe7\\x05\\x1b\\x0c\\x00\\x00\\x00\\x02\\x00\\x00\\x00A\\x03\\x00\\x00\\x0b\\xc1\\x88\\x00\\x00\\x00'\n", - "message 1: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'exchange', 'ack_id', 'local_offset', '_format'}, 'sundew_extension': 'CMC:REGIONAL:GRIB2:BIN::20230527145519', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_752eb4e8803503704990563d84030e67:CMC:REGIONAL:GRIB2:BIN::20230527145519', 'source': 'WXO-DD', 'mtime': '20230527T145520.292', 'atime': '20230527T145520.292', 'pubTime': '20230527T145520.292', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/model_gem_regional/10km/grib2/12/037/CMC_reg_HGT_ISBL_250_ps10km_2023052712_P037.grib2', 'subtopic': ['20230527', 'WXO-DD', 'model_gem_regional', '10km', 'grib2', '12', '037'], 'integrity': {'method': 'md5', 'value': 'j6bh9dbE4QbJAXEOejw0Tw=='}, 'size': 377005, 'exchange': 'xpublic', 'ack_id': 2, 'local_offset': 0}\n", + "message 1: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'exchange', 'ack_id', 'local_offset', '_format'}, 'sundew_extension': 'CMC:REGIONAL:GRIB2:BIN::20230527145519', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_752eb4e8803503704990563d84030e67:CMC:REGIONAL:GRIB2:BIN::20230527145519', 'source': 'WXO-DD', 'mtime': '20230527T145520.292', 'atime': '20230527T145520.292', 'pubTime': '20230527T145520.292', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/model_gem_regional/10km/grib2/12/037/CMC_reg_HGT_ISBL_250_ps10km_2023052712_P037.grib2', 'subtopic': ['20230527', 'WXO-DD', 'model_gem_regional', '10km', 'grib2', '12', '037'], 'identity': {'method': 'md5', 'value': 'j6bh9dbE4QbJAXEOejw0Tw=='}, 'size': 377005, 'exchange': 'xpublic', 'ack_id': 2, 'local_offset': 0}\n", "first 50 bytes of corresponding file: b'GRIB\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x00\\x00\\x05\\xc0\\xad\\x00\\x00\\x00\\x15\\x01\\x006\\x00\\x00\\x04\\x00\\x01\\x07\\xe7\\x05\\x1b\\x0c\\x00\\x00\\x00\\x02\\x00\\x00\\x00A\\x03\\x00\\x00\\x0b\\xc1\\x88\\x00\\x00\\x00'\n", - "message 2: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'exchange', 'ack_id', 'local_offset', '_format'}, 'sundew_extension': 'CMC:REGIONAL:GRIB2:BIN::20230527145519', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_55f121bb28e822cffb6e61196cd924eb:CMC:REGIONAL:GRIB2:BIN::20230527145519', 'source': 'WXO-DD', 'mtime': '20230527T145521.260', 'atime': '20230527T145521.260', 'pubTime': '20230527T145521.260', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/model_gem_regional/10km/grib2/12/037/CMC_reg_RH_ISBL_700_ps10km_2023052712_P037.grib2', 'subtopic': ['20230527', 'WXO-DD', 'model_gem_regional', '10km', 'grib2', '12', '037'], 'integrity': {'method': 'md5', 'value': 'V7goy/doL6Gle68s1zoVEA=='}, 'size': 808438, 'exchange': 'xpublic', 'ack_id': 3, 'local_offset': 0}\n", + "message 2: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'exchange', 'ack_id', 'local_offset', '_format'}, 'sundew_extension': 'CMC:REGIONAL:GRIB2:BIN::20230527145519', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_55f121bb28e822cffb6e61196cd924eb:CMC:REGIONAL:GRIB2:BIN::20230527145519', 'source': 'WXO-DD', 'mtime': '20230527T145521.260', 'atime': '20230527T145521.260', 'pubTime': '20230527T145521.260', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/model_gem_regional/10km/grib2/12/037/CMC_reg_RH_ISBL_700_ps10km_2023052712_P037.grib2', 'subtopic': ['20230527', 'WXO-DD', 'model_gem_regional', '10km', 'grib2', '12', '037'], 'identity': {'method': 'md5', 'value': 'V7goy/doL6Gle68s1zoVEA=='}, 'size': 808438, 'exchange': 'xpublic', 'ack_id': 3, 'local_offset': 0}\n", "first 50 bytes of corresponding file: b'GRIB\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x00\\x00\\x0cU\\xf6\\x00\\x00\\x00\\x15\\x01\\x006\\x00\\x00\\x04\\x00\\x01\\x07\\xe7\\x05\\x1b\\x0c\\x00\\x00\\x00\\x02\\x00\\x00\\x00A\\x03\\x00\\x00\\x0b\\xc1\\x88\\x00\\x00\\x00'\n", - "message 3: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'exchange', 'ack_id', 'local_offset', '_format'}, 'sundew_extension': 'CMC:REGIONAL:GRIB2:BIN::20230527145518', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_dac300cf33756ba816e030f99fc9dc22:CMC:REGIONAL:GRIB2:BIN::20230527145518', 'source': 'WXO-DD', 'mtime': '20230527T145519.586', 'atime': '20230527T145519.586', 'pubTime': '20230527T145519.586', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/model_gem_regional/10km/grib2/12/037/CMC_reg_UGRD_ISBL_225_ps10km_2023052712_P037.grib2', 'subtopic': ['20230527', 'WXO-DD', 'model_gem_regional', '10km', 'grib2', '12', '037'], 'integrity': {'method': 'md5', 'value': 'MI8XzT1uam5OUf7QlDZ4FA=='}, 'size': 487411, 'exchange': 'xpublic', 'ack_id': 4, 'local_offset': 0}\n", + "message 3: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'exchange', 'ack_id', 'local_offset', '_format'}, 'sundew_extension': 'CMC:REGIONAL:GRIB2:BIN::20230527145518', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_dac300cf33756ba816e030f99fc9dc22:CMC:REGIONAL:GRIB2:BIN::20230527145518', 'source': 'WXO-DD', 'mtime': '20230527T145519.586', 'atime': '20230527T145519.586', 'pubTime': '20230527T145519.586', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/model_gem_regional/10km/grib2/12/037/CMC_reg_UGRD_ISBL_225_ps10km_2023052712_P037.grib2', 'subtopic': ['20230527', 'WXO-DD', 'model_gem_regional', '10km', 'grib2', '12', '037'], 'identity': {'method': 'md5', 'value': 'MI8XzT1uam5OUf7QlDZ4FA=='}, 'size': 487411, 'exchange': 'xpublic', 'ack_id': 4, 'local_offset': 0}\n", "first 50 bytes of corresponding file: b'GRIB\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x00\\x00\\x07o\\xf3\\x00\\x00\\x00\\x15\\x01\\x006\\x00\\x00\\x04\\x00\\x01\\x07\\xe7\\x05\\x1b\\x0c\\x00\\x00\\x00\\x02\\x00\\x00\\x00A\\x03\\x00\\x00\\x0b\\xc1\\x88\\x00\\x00\\x00'\n", - "message 4: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'exchange', 'ack_id', 'local_offset', '_format'}, 'sundew_extension': 'CMC:REGIONAL:GRIB2:BIN::20230527145519', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_c5e84748169e0a6dce8f3b884ffdf059:CMC:REGIONAL:GRIB2:BIN::20230527145519', 'source': 'WXO-DD', 'mtime': '20230527T145520.651', 'atime': '20230527T145520.651', 'pubTime': '20230527T145520.651', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/model_gem_regional/10km/grib2/12/037/CMC_reg_RH_ISBL_550_ps10km_2023052712_P037.grib2', 'subtopic': ['20230527', 'WXO-DD', 'model_gem_regional', '10km', 'grib2', '12', '037'], 'integrity': {'method': 'md5', 'value': 'zukdtksA5I0C5oq/ieiXbQ=='}, 'size': 774394, 'exchange': 'xpublic', 'ack_id': 5, 'local_offset': 0}\n" + "message 4: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'exchange', 'ack_id', 'local_offset', '_format'}, 'sundew_extension': 'CMC:REGIONAL:GRIB2:BIN::20230527145519', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_c5e84748169e0a6dce8f3b884ffdf059:CMC:REGIONAL:GRIB2:BIN::20230527145519', 'source': 'WXO-DD', 'mtime': '20230527T145520.651', 'atime': '20230527T145520.651', 'pubTime': '20230527T145520.651', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230527/WXO-DD/model_gem_regional/10km/grib2/12/037/CMC_reg_RH_ISBL_550_ps10km_2023052712_P037.grib2', 'subtopic': ['20230527', 'WXO-DD', 'model_gem_regional', '10km', 'grib2', '12', '037'], 'identity': {'method': 'md5', 'value': 'zukdtksA5I0C5oq/ieiXbQ=='}, 'size': 774394, 'exchange': 'xpublic', 'ack_id': 5, 'local_offset': 0}\n" ] }, { diff --git a/docs/source/Tutorials/5_api_moth_post_demo.ipynb b/docs/source/Tutorials/5_api_moth_post_demo.ipynb index cc6accaeb..bd5fc1dda 100644 --- a/docs/source/Tutorials/5_api_moth_post_demo.ipynb +++ b/docs/source/Tutorials/5_api_moth_post_demo.ipynb @@ -107,8 +107,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "2023-05-27 11:02:41,866 [DEBUG] sarracenia __computeIntegrity xattr sum too old\n", - "2023-05-27 11:02:41,868 [DEBUG] sarracenia.moth.amqp putNewMessage published body: {\"pubTime\": \"20230527T150241.865911961\", \"relPath\": \"sample.txt\", \"baseUrl\": \"http://host\", \"mode\": \"664\", \"size\": 335, \"mtime\": \"20230527T150237.927556038\", \"atime\": \"20230525T031938.0635721684\", \"integrity\": {\"method\": \"sha512\", \"value\": \"w5ZwUT1IMAjnQT6TLR9NSLzG5RKijhxq46FjMx5UWtsHM/FNOaYNRmGwonIPfnhE5xUORf3z5dRyI6zdL6ygNw==\"}} headers: {} to xpublic under: v03 \n", + "2023-05-27 11:02:41,866 [DEBUG] sarracenia __computeIdentity xattr sum too old\n", + "2023-05-27 11:02:41,868 [DEBUG] sarracenia.moth.amqp putNewMessage published body: {\"pubTime\": \"20230527T150241.865911961\", \"relPath\": \"sample.txt\", \"baseUrl\": \"http://host\", \"mode\": \"664\", \"size\": 335, \"mtime\": \"20230527T150237.927556038\", \"atime\": \"20230525T031938.0635721684\", \"identity\": {\"method\": \"sha512\", \"value\": \"w5ZwUT1IMAjnQT6TLR9NSLzG5RKijhxq46FjMx5UWtsHM/FNOaYNRmGwonIPfnhE5xUORf3z5dRyI6zdL6ygNw==\"}} headers: {} to xpublic under: v03 \n", "2023-05-27 11:02:41,868 [DEBUG] amqp collect Closed channel #1\n" ] }, @@ -116,7 +116,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'_format': 'v03', '_deleteOnPost': {'local_offset', 'new_baseUrl', 'new_dir', 'new_subtopic', 'new_file', 'subtopic', '_format', 'exchange', 'new_relPath', 'post_format'}, 'exchange': 'xpublic', 'local_offset': 0, 'pubTime': '20230527T150241.865911961', 'new_dir': '/tmp', 'new_file': 'sample.txt', 'post_format': 'v03', 'new_baseUrl': 'http://host', 'new_relPath': 'sample.txt', 'new_subtopic': [], 'relPath': 'sample.txt', 'subtopic': [], 'baseUrl': 'http://host', 'mode': '664', 'size': 335, 'mtime': '20230527T150237.927556038', 'atime': '20230525T031938.0635721684', 'integrity': {'method': 'sha512', 'value': 'w5ZwUT1IMAjnQT6TLR9NSLzG5RKijhxq46FjMx5UWtsHM/FNOaYNRmGwonIPfnhE5xUORf3z5dRyI6zdL6ygNw=='}}\n" + "{'_format': 'v03', '_deleteOnPost': {'local_offset', 'new_baseUrl', 'new_dir', 'new_subtopic', 'new_file', 'subtopic', '_format', 'exchange', 'new_relPath', 'post_format'}, 'exchange': 'xpublic', 'local_offset': 0, 'pubTime': '20230527T150241.865911961', 'new_dir': '/tmp', 'new_file': 'sample.txt', 'post_format': 'v03', 'new_baseUrl': 'http://host', 'new_relPath': 'sample.txt', 'new_subtopic': [], 'relPath': 'sample.txt', 'subtopic': [], 'baseUrl': 'http://host', 'mode': '664', 'size': 335, 'mtime': '20230527T150237.927556038', 'atime': '20230525T031938.0635721684', 'identity': {'method': 'sha512', 'value': 'w5ZwUT1IMAjnQT6TLR9NSLzG5RKijhxq46FjMx5UWtsHM/FNOaYNRmGwonIPfnhE5xUORf3z5dRyI6zdL6ygNw=='}}\n" ] } ], diff --git a/docs/source/api-documentation.rst b/docs/source/api-documentation.rst index 902cb34e7..6a95ff130 100644 --- a/docs/source/api-documentation.rst +++ b/docs/source/api-documentation.rst @@ -48,7 +48,7 @@ Sarracenia.Moth :special-members: :noindex: -.. automodule:: sarracenia.integrity +.. automodule:: sarracenia.identity :show-inheritance: :members: :private-members: diff --git a/docs/source/fr/CommentFaire/MiseANiveau.rst b/docs/source/fr/CommentFaire/MiseANiveau.rst index be78d6635..e5d2c0e15 100644 --- a/docs/source/fr/CommentFaire/MiseANiveau.rst +++ b/docs/source/fr/CommentFaire/MiseANiveau.rst @@ -38,6 +38,16 @@ Instructions d’installation git --- +*CHANGE*: v03 postformat field renamded: "integrity" is now "identity" + + * current version will read messsages with integrity and map them to identity. + * current version will post with "Identity", so older versions will miss them. + * https://github.com/MetPX/sarracenia/issues/703 + +3.0.40 +------ + + *CHANGEMENT*: l'interface de programmation (API) python a subit un changement de rupture pour la classe sarracenia.moth, il faut maintenant specifier l'options['broker'] au lieu @@ -99,12 +109,12 @@ dans le cas de conversion à partir de v2.) *CHANGEMENT*: La chaine de charactères "Vendor" est changé de "science.gc.ca" à "MetPX". Ce changement modifie le placement des fichiers sur la platteforme *Windows*. -*CHANGEMENT*: l´encodage des messages d´annonce v03 est changé: *Integrity* est rendu optionnel. +*CHANGEMENT*: l´encodage des messages d´annonce v03 est changé: *Identity* est rendu optionnel. *CHANGEMENT*: l'encodage des messages d'annonce v03 est changé: le champs *fileOp* est rajouté pour séparer les operations sur des fichiers qui ne comprennent pas des transmissions de données: créations de liens symboliques, renommage de fichier, suppression de fichiers. - Le champs *Integrity* est maintenant dédié au sommes de contrôle pour les données. + Le champs *Identity* est maintenant dédié au sommes de contrôle pour les données. @@ -259,7 +269,7 @@ V2 to Sr3 queue_name queueName report_back report source_from_exchange sourceFromExchange - sum integrity + sum identity suppress_duplicates nodupe_ttl suppress_duplicates_basis nodupe_basis topic_prefix topicPrefix diff --git a/docs/source/fr/CommentFaire/v2ASr3.rst b/docs/source/fr/CommentFaire/v2ASr3.rst index f5f2957bf..aa7a31bfb 100644 --- a/docs/source/fr/CommentFaire/v2ASr3.rst +++ b/docs/source/fr/CommentFaire/v2ASr3.rst @@ -298,11 +298,11 @@ En général, les plugins v3: msg.exchange msg['exchange'] le canal sur lequel le message à été reçu. msg.logger logger les journeaux fonctionnent ¨normalement" pour python msg.parts msg['size'] oublie ca, utilise une constructeur de sarracenia.Message - msg.sumflg msg['integrity'] oublie ca, utilise une constructeur de sarracenia.Message + msg.sumflg msg['identity'] oublie ca, utilise une constructeur de sarracenia.Message parent.msg worklist.incoming sr3 traite des groupe des messages, pas individuelement ================ =================== =========================================================== -* pubTime, baseUrl, relPath, retrievePath, size, integrity, sont tous des champs de message standard +* pubTime, baseUrl, relPath, retrievePath, size, identity, sont tous des champs de message standard mieux décrit dans `sr_post(7) <../Reference/sr_post.7.html>`_ * si l'on a besoin de stocker par état de message, alors on peut déclarer des champs temporaires dans le message, diff --git a/docs/source/fr/Contribution/v03.rst b/docs/source/fr/Contribution/v03.rst index 9764928cb..14d4fcaba 100644 --- a/docs/source/fr/Contribution/v03.rst +++ b/docs/source/fr/Contribution/v03.rst @@ -135,11 +135,11 @@ code sr3:: 16 ./flow/winnow.py 793 ./__init__.py 226 ./instance.py - 36 ./integrity/arbitrary.py - 93 ./integrity/__init__.py - 24 ./integrity/md5.py - 17 ./integrity/random.py - 24 ./integrity/sha512.py + 36 ./identity/arbitrary.py + 93 ./identity/__init__.py + 24 ./identity/md5.py + 17 ./identity/random.py + 24 ./identity/sha512.py 17 ./moth/amq1.py 585 ./moth/amqp.py 313 ./moth/__init__.py @@ -269,7 +269,7 @@ les deux versions, est clair: | | | | sr_message.py | | +--------------------------+---------------------------+ -| sr_checksum.py | integrity/ | +| sr_checksum.py | identity/ | | | __init__.py | | sum/* | * | +--------------------------+---------------------------+ @@ -863,7 +863,7 @@ Fonctionnalités * L’API d’extension est maintenant du python simple sans paramètres magiques. Juste des classes standard, en utilisant un mécanisme d’importation standard. Le débogage devrait être beaucoup plus simple maintenant car l’interpréteur fournira de bien meilleurs messages d’erreur au démarrage. Les plugins de style v2 sont maintenant appelés *flow callbacks*, - et il existe un certain nombre de classes (integrity, moth, transfert, peut-être flux) qui permettent l’extension + et il existe un certain nombre de classes (identity, moth, transfert, peut-être flux) qui permettent l’extension par une sous-classification simple. Cela devrait faire en sorte que ce soit beaucoup plus facile d’ajouter des protocoles supplémentaires pour le transport et les messages, ainsi que des algorithmes de somme de contrôle pour les nouveaux types de données. diff --git a/docs/source/fr/Explication/GuideLigneDeCommande.rst b/docs/source/fr/Explication/GuideLigneDeCommande.rst index d4857657c..6cff0fdb0 100644 --- a/docs/source/fr/Explication/GuideLigneDeCommande.rst +++ b/docs/source/fr/Explication/GuideLigneDeCommande.rst @@ -316,8 +316,8 @@ Afficher tous les paramètres de configuration (le résultat de toutes les analy 'inlineEncoding': 'guess', 'inlineOnly': False, 'instances': 1, - 'integrity_arbitrary_value': None, - 'integrity_method': 'sha512', + 'identity_arbitrary_value': None, + 'identity_method': 'sha512', 'logEvents': {'after_work', 'after_accept', 'on_housekeeping'}, 'logFormat': '%(asctime)s [%(levelname)s] %(name)s %(funcName)s %(message)s', 'logLevel': 'info', @@ -1543,7 +1543,7 @@ WINNOW et réenregistre les notifications, en supprimant les notifications redondantes. La méthode de décider quels messages d´annonce sont redondants varient selon le cas d´usage. -Normalement, les messages comprenned un champs *Integrity* qui avec une somme de contrôle +Normalement, les messages comprenned un champs *Identity* qui avec une somme de contrôle du ficher, tel que décrit dans `sr_post(7) <../Reference/sr_post.7.html>`_ Il y bien d´autres cas d´usage discutés dans `Supprimer les doublons `_ @@ -2333,11 +2333,11 @@ le répertoire sera vérifié pour les nouveaux fichiers. Voici une partie de l return [] -Integrity +Identity --------- On peut utiliser la directive *import* pour ajouter de nouveaux algorithmes de somme de contrôle en sous-classant -sarracenia.integrity.Integrity. +sarracenia.identity.Identity. Transfer diff --git a/docs/source/fr/Explication/Histoire/messages_v03.rst b/docs/source/fr/Explication/Histoire/messages_v03.rst index 5e2924330..dd088cafb 100644 --- a/docs/source/fr/Explication/Histoire/messages_v03.rst +++ b/docs/source/fr/Explication/Histoire/messages_v03.rst @@ -34,7 +34,7 @@ paires nom-valeur * Les champs fixes v02 sont maintenant des clés "pubTime", "baseURL" et "relPath" dans l’objet JSON qui est le corps du message. - * L’en-tête v02 *sum* avec valeur codée hexadécimale, est remplacé par l’en-tête v03 *integrity* avec codage base64. + * L’en-tête v02 *sum* avec valeur codée hexadécimale, est remplacé par l’en-tête v03 *identity* avec codage base64. * L’en-tête v03 *content* permet l’intégration du contenu du fichier. diff --git a/docs/source/fr/Explication/SarraPluginDev.rst b/docs/source/fr/Explication/SarraPluginDev.rst index ef4af74bc..eaabc4a6f 100644 --- a/docs/source/fr/Explication/SarraPluginDev.rst +++ b/docs/source/fr/Explication/SarraPluginDev.rst @@ -34,7 +34,7 @@ en construire de nouveaux de manière copier/coller, avec de nombreux exemples d Il existe d’autres façons d’étendre Sarracenia v3 en sous-classant : * Sarracenia.transfer.Transfer pour ajouter plus de protocoles de transfert de données -* Sarracenia.integrity.Integrity pour ajouter plus de méthodes de somme de contrôle. +* Sarracenia.identity.Identity pour ajouter plus de méthodes de somme de contrôle. * Sarracenia.moth.Moth pour ajouter la prise en charge de plus de protocoles de messagerie. * Sarracenia.flow.Flow pour créer de nouveaux flux. * Sarracenia.flowcb.FlowCB pour personnaliser les flux. @@ -310,7 +310,7 @@ On peut ajouter des fonctionnalités supplémentaires à Sarracenia en créant d * sarra.moth - Messages organisés en hiérarchies de thèmes. (existants : rabbitmq-amqp) -* sarra.integrity - algorithmes de somme de contrôle (existants: md5, sha512, arbitraires, aléatoires) +* sarra.identity - algorithmes de somme de contrôle (existants: md5, sha512, arbitraires, aléatoires) * sarra.transfer - protocoles de transport supplémentaires (https, ftp, sftp ) @@ -430,7 +430,7 @@ self est le message en cours de traitement. variables les plus utilisées : symboliques, les changements de nom et les suppressions de fichiers. Contenu décrit dans `sr_post(7) <../Reference/sr_post.7.html>`_ -*msg['integrity']* +*msg['identity']* La structure de somme de contrôle, un dictionnaire python avec les champs 'méthode' et 'valeur'. *msg['subtopic'], msg['new_subtopic']* diff --git a/docs/source/fr/Explication/SupprimerLesDoublons.rst b/docs/source/fr/Explication/SupprimerLesDoublons.rst index e9b81d8a5..62866a4d8 100644 --- a/docs/source/fr/Explication/SupprimerLesDoublons.rst +++ b/docs/source/fr/Explication/SupprimerLesDoublons.rst @@ -25,7 +25,7 @@ La supression de doublons:: Quand un message est à propos d´un message jugé un doublons, on cesse de le traiter. la clé d´un message d´annonce est préférablement simplement la somme de contrôle -du champs *Integrity*. Si la source de données ne fournit pas de champs *Integrity*, +du champs *Identity*. Si la source de données ne fournit pas de champs *Identity*, on se fie sur d´autres champes dans le message: *mtime*, *size*, *pubTime.* Le champs *pubTime* étant mandatoire assure qu´un clé peut toujour être généré pour chaque message, mais des fois peut être inefficace. diff --git a/docs/source/fr/Reference/sr3_options.7.rst b/docs/source/fr/Reference/sr3_options.7.rst index b4b0adae6..fb11b793a 100644 --- a/docs/source/fr/Reference/sr3_options.7.rst +++ b/docs/source/fr/Reference/sr3_options.7.rst @@ -966,13 +966,13 @@ Dans le répertoire ~/.cache/sarra/log: perte de notifications. Une fil d’attente qu'on n’accède pas pendant une longue période (dépendant de l’implémentation) sera détruite. -integrity +identity ------------------ Tous les postes de fichiers incluent une somme de contrôle. Elle est placée dans l’en-tête du message amqp et aura comme entrée *sum* avec la valeur de défaut 'd,md5_checksum_on_data'. L’option *sum* indique au programme comment calculer la somme de contrôle. -Dans la v3, elles sont appelées Integrity methods (méthodes d’intégrité) :: +Dans la v3, elles sont appelées Identity methods (méthodes d’intégrité) :: cod,x - Calculer On Download en appliquant x sha512 - faire SHA512 sur le contenu du fichier (défaut) diff --git a/docs/source/fr/Reference/sr3_post.1.rst b/docs/source/fr/Reference/sr3_post.1.rst index 42f669143..407855b9a 100644 --- a/docs/source/fr/Reference/sr3_post.1.rst +++ b/docs/source/fr/Reference/sr3_post.1.rst @@ -286,7 +286,7 @@ nodupe_ttl on|off|999 utilisé (défini sur une valeur autre que 0) car sinon la taille de bloc variera à mesure que les fichiers grandissent, et beaucoup de transfert de données en double en résultera. -integrity [,] +identity [,] ---------------------------- Toutes les publications de fichiers incluent une somme de contrôle. L'option *sum* spécifie comment la calculer. diff --git a/docs/source/fr/Reference/sr_post.7.rst b/docs/source/fr/Reference/sr_post.7.rst index 235595b29..3c45372d9 100644 --- a/docs/source/fr/Reference/sr_post.7.rst +++ b/docs/source/fr/Reference/sr_post.7.rst @@ -63,7 +63,7 @@ Les en-têtes sont un tableau de paires nom:valeur:: "relPath" - Le chemin relatif peut être concaténé à un de: - "integrity" - Version WMO du champ de sum v02, en cours de développement. + "identity" - Version WMO du champ de sum v02, en cours de développement. { "method" : "md5" | "sha512" | "cod" | "arbitrary" | "random" , "value" : "base64 valeur de somme de contrôle encodée" @@ -165,7 +165,7 @@ Filtrage des sources (utilisation des échanges `AMQP TOPIC`_) sont transmis au client. Lorsqu’il y a beaucoup d’utilisateurs intéressés par seulement un petit sous-ensembles de données, les économies de trafic sont importantes. -Fingerprint Winnowing (utilisation de l'en-tête integrity_) +Fingerprint Winnowing (utilisation de l'en-tête identity_) Chaque produit a une empreinte digitale d’intégrité et une taille destinée à l’identifier de manière unique, appelée *fingerprint*. Si deux fichiers ont la même empreinte digitale, ils sont considérés comme équivalents. Dans les cas où plusieurs sources de données équivalentes sont disponibles, mais les consommateurs en aval @@ -375,13 +375,13 @@ Les changements de noms de liens symboliques et répertoires sont representés p (répertoire) et "link" (lien) dans le champs "fileOp" qui contient également un *rename*. -**integrity** +**identity** ~~~~~~~~~~~~~ Le champ d’intégrité donne une somme de contrôle qui est utile pour identifier le contenu d’un fichier:: - "integrity" : { "method" : , "value": } + "identity" : { "method" : , "value": } Le champ d’intégrité est une signature calculée pour permettre aux récepteurs de déterminer s’ils ont déjà téléchargé le produit ailleurs. @@ -549,7 +549,7 @@ EXEMPLE AMQP TOPIC: v03.NRDPS.GIF MQTT TOPIC: exchange/v03/NRDPS/GIF/ Body: { "pubTime": "201506011357.345", "baseUrl": "sftp://afsiext@cmcdataserver", "relPath": "/data/NRPDS/outputs/NRDPS_HiRes_000.gif", - "rename": "NRDPS/GIF/", "parts":"p,457,1,0,0", "integrity" : { "method":"md5", "value":"" }, "source": "ec_cmc" } + "rename": "NRDPS/GIF/", "parts":"p,457,1,0,0", "identity" : { "method":"md5", "value":"" }, "source": "ec_cmc" } - v03 - version du protocole - la version et le type ensemble determine le format des thèmes qui suivent et du corps du message d'annonce. diff --git a/docs/source/fr/Tutoriel/1_CLI_introduction.ipynb b/docs/source/fr/Tutoriel/1_CLI_introduction.ipynb index b8de4ff43..c000d4665 100644 --- a/docs/source/fr/Tutoriel/1_CLI_introduction.ipynb +++ b/docs/source/fr/Tutoriel/1_CLI_introduction.ipynb @@ -206,7 +206,7 @@ "fixed_headers={}, flatten='/', hostdir='fractal', hostname='fractal',\n", "housekeeping=300, imports=[], inflight=None, inline=False,\n", "inline_encoding='guess', inline_max=4096, inline_only=False, instances=5,\n", - "integrity_arbitrary_value=None, integrity_method='sha512',\n", + "identity_arbitrary_value=None, identity_method='sha512',\n", "logEvents=\"...ekeeping', 'after_accept', 'after_work'}\",\n", "logFormat=\"...me)s] %(name)s %(funcName)s %(message)s'\", logLevel='info',\n", "logStdout=False, log_flowcb_needed=False, lr_backupCount=5, lr_interval=1,\n", diff --git a/docs/source/fr/Tutoriel/3_api_flow_demo.ipynb b/docs/source/fr/Tutoriel/3_api_flow_demo.ipynb index b58cd8e5c..d61fede25 100644 --- a/docs/source/fr/Tutoriel/3_api_flow_demo.ipynb +++ b/docs/source/fr/Tutoriel/3_api_flow_demo.ipynb @@ -158,21 +158,21 @@ "2023-05-28 16:52:20,104 [DEBUG] sarracenia.config add_option logEvents declared as type: value:{'after_work', 'after_accept', 'on_housekeeping'}\n", "2023-05-28 16:52:20,104 [DEBUG] sarracenia.config add_option logMessageDump declared as type: value:False\n", "2023-05-28 16:52:20,105 [INFO] sarracenia.flowcb.log __init__ subscribe initialized with: {'after_work', 'after_accept', 'on_housekeeping'}\n", - "2023-05-28 16:52:20,105 [DEBUG] sarracenia.config check_undeclared_options missing defaults: {'post_exchangeSuffix', 'exchangeSplit', 'integrity', 'pollUrl', 'post_exchangeSplit', 'MemoryMax', 'notify_only', 'cluster', 'blocksize', 'exchange_suffix', 'report_exchange', 'realpathFilter', 'action', 'logMessageDump', 'retry_driver', 'source', 'nodupe_basis', 'MemoryMultiplier', 'reconnect', 'force_polling', 'header', 'inplace', 'post_exchange', 'save', 'post_on_start', 'follow_symlinks', 'count', 'MemoryBaseLineFile', 'feeder', 'sendTo', 'restore'}\n", + "2023-05-28 16:52:20,105 [DEBUG] sarracenia.config check_undeclared_options missing defaults: {'post_exchangeSuffix', 'exchangeSplit', 'identity', 'pollUrl', 'post_exchangeSplit', 'MemoryMax', 'notify_only', 'cluster', 'blocksize', 'exchange_suffix', 'report_exchange', 'realpathFilter', 'action', 'logMessageDump', 'retry_driver', 'source', 'nodupe_basis', 'MemoryMultiplier', 'reconnect', 'force_polling', 'header', 'inplace', 'post_exchange', 'save', 'post_on_start', 'follow_symlinks', 'count', 'MemoryBaseLineFile', 'feeder', 'sendTo', 'restore'}\n", "2023-05-28 16:52:20,105 [INFO] sarracenia.flow run callbacks loaded: ['sarracenia.flowcb.gather.message.Message', 'sarracenia.flowcb.retry.Retry', 'sarracenia.flowcb.housekeeping.resources.Resources', 'log']\n", "2023-05-28 16:52:20,105 [INFO] sarracenia.flow run pid: 1921103 subscribe/flow_demo instance: 0\n", - "2023-05-28 16:52:20,128 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'_format', 'exchange', 'ack_id', 'local_offset', 'subtopic'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528202430', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_ef8614a54e610cd50588f448a9632244:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528202430', 'source': 'WXO-DD', 'mtime': '20230528T202432.81', 'atime': '20230528T202432.81', 'pubTime': '20230528T202432.81', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/observations/swob-ml/20230528/CWRZ/2023-05-28-2023-CWRZ-AUTO-minute-swob.xml', 'subtopic': ['20230528', 'WXO-DD', 'observations', 'swob-ml', '20230528', 'CWRZ'], 'integrity': {'method': 'md5', 'value': '30K1LtKs+91neD6625tbcg=='}, 'size': 7665, 'exchange': 'xpublic', 'ack_id': 1, 'local_offset': 0}\n", + "2023-05-28 16:52:20,128 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'_format', 'exchange', 'ack_id', 'local_offset', 'subtopic'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528202430', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_ef8614a54e610cd50588f448a9632244:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528202430', 'source': 'WXO-DD', 'mtime': '20230528T202432.81', 'atime': '20230528T202432.81', 'pubTime': '20230528T202432.81', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/observations/swob-ml/20230528/CWRZ/2023-05-28-2023-CWRZ-AUTO-minute-swob.xml', 'subtopic': ['20230528', 'WXO-DD', 'observations', 'swob-ml', '20230528', 'CWRZ'], 'identity': {'method': 'md5', 'value': '30K1LtKs+91neD6625tbcg=='}, 'size': 7665, 'exchange': 'xpublic', 'ack_id': 1, 'local_offset': 0}\n", "2023-05-28 16:52:20,129 [INFO] sarracenia.flowcb.log after_accept accepted: (lag: 1667.32 ) https://hpfx.collab.science.gc.ca /20230528/WXO-DD/observations/swob-ml/20230528/CWRZ/2023-05-28-2023-CWRZ-AUTO-minute-swob.xml \n", "2023-05-28 16:52:20,129 [INFO] sarracenia.flow run now active on vip None\n", "2023-05-28 16:52:20,130 [DEBUG] sarracenia.config add_option accelWgetCommand declared as type: value:/usr/bin/wget %s -o - -O %d\n", "2023-05-28 16:52:20,213 [INFO] sarracenia.flowcb.log after_work downloaded ok: /tmp/flow_demo/2023-05-28-2023-CWRZ-AUTO-minute-swob.xml \n", - "2023-05-28 16:52:20,233 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'_format', 'exchange', 'ack_id', 'local_offset', 'subtopic'}, 'sundew_extension': 'DMS:CMC:SWOB_FORESTRY:XML::20230528202436', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_b82b2479d8c115cf0eb4c82bfcc59981:DMS:CMC:SWOB_FORESTRY:XML::20230528202436', 'source': 'WXO-DD', 'mtime': '20230528T202437.541', 'atime': '20230528T202437.541', 'pubTime': '20230528T202437.541', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/observations/swob-ml/partners/on-firewx/20230528/ban/2023-05-28-2023-on-mnrf-affes-ban-ban-AUTO-swob.xml', 'subtopic': ['20230528', 'WXO-DD', 'observations', 'swob-ml', 'partners', 'on-firewx', '20230528', 'ban'], 'integrity': {'method': 'md5', 'value': 'QGDX+gsirC8l8hnDfEHa1w=='}, 'size': 5203, 'exchange': 'xpublic', 'ack_id': 2, 'local_offset': 0}\n", + "2023-05-28 16:52:20,233 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'_format', 'exchange', 'ack_id', 'local_offset', 'subtopic'}, 'sundew_extension': 'DMS:CMC:SWOB_FORESTRY:XML::20230528202436', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_b82b2479d8c115cf0eb4c82bfcc59981:DMS:CMC:SWOB_FORESTRY:XML::20230528202436', 'source': 'WXO-DD', 'mtime': '20230528T202437.541', 'atime': '20230528T202437.541', 'pubTime': '20230528T202437.541', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/observations/swob-ml/partners/on-firewx/20230528/ban/2023-05-28-2023-on-mnrf-affes-ban-ban-AUTO-swob.xml', 'subtopic': ['20230528', 'WXO-DD', 'observations', 'swob-ml', 'partners', 'on-firewx', '20230528', 'ban'], 'identity': {'method': 'md5', 'value': 'QGDX+gsirC8l8hnDfEHa1w=='}, 'size': 5203, 'exchange': 'xpublic', 'ack_id': 2, 'local_offset': 0}\n", "2023-05-28 16:52:20,233 [INFO] sarracenia.flowcb.log after_accept accepted: (lag: 1662.69 ) https://hpfx.collab.science.gc.ca /20230528/WXO-DD/observations/swob-ml/partners/on-firewx/20230528/ban/2023-05-28-2023-on-mnrf-affes-ban-ban-AUTO-swob.xml \n", "2023-05-28 16:52:20,311 [INFO] sarracenia.flowcb.log after_work downloaded ok: /tmp/flow_demo/2023-05-28-2023-on-mnrf-affes-ban-ban-AUTO-swob.xml \n", - "2023-05-28 16:52:20,336 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'_format', 'exchange', 'ack_id', 'local_offset', 'subtopic'}, 'sundew_extension': 'DMS:CMC:SWOB_FORESTRY:XML::20230528202436', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_b96cfe8f3a477e2c4a39af99a97b6429:DMS:CMC:SWOB_FORESTRY:XML::20230528202436', 'source': 'WXO-DD', 'mtime': '20230528T202437.541', 'atime': '20230528T202437.541', 'pubTime': '20230528T202437.541', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/observations/swob-ml/partners/on-firewx/20230528/ple/2023-05-28-2023-on-mnrf-affes-ple-ple-AUTO-swob.xml', 'subtopic': ['20230528', 'WXO-DD', 'observations', 'swob-ml', 'partners', 'on-firewx', '20230528', 'ple'], 'integrity': {'method': 'md5', 'value': 'eL80Iw/3MaCqipWJOT7LeQ=='}, 'size': 5091, 'exchange': 'xpublic', 'ack_id': 3, 'local_offset': 0}\n", + "2023-05-28 16:52:20,336 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'_format', 'exchange', 'ack_id', 'local_offset', 'subtopic'}, 'sundew_extension': 'DMS:CMC:SWOB_FORESTRY:XML::20230528202436', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_b96cfe8f3a477e2c4a39af99a97b6429:DMS:CMC:SWOB_FORESTRY:XML::20230528202436', 'source': 'WXO-DD', 'mtime': '20230528T202437.541', 'atime': '20230528T202437.541', 'pubTime': '20230528T202437.541', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/observations/swob-ml/partners/on-firewx/20230528/ple/2023-05-28-2023-on-mnrf-affes-ple-ple-AUTO-swob.xml', 'subtopic': ['20230528', 'WXO-DD', 'observations', 'swob-ml', 'partners', 'on-firewx', '20230528', 'ple'], 'identity': {'method': 'md5', 'value': 'eL80Iw/3MaCqipWJOT7LeQ=='}, 'size': 5091, 'exchange': 'xpublic', 'ack_id': 3, 'local_offset': 0}\n", "2023-05-28 16:52:20,336 [INFO] sarracenia.flowcb.log after_accept accepted: (lag: 1662.80 ) https://hpfx.collab.science.gc.ca /20230528/WXO-DD/observations/swob-ml/partners/on-firewx/20230528/ple/2023-05-28-2023-on-mnrf-affes-ple-ple-AUTO-swob.xml \n", "2023-05-28 16:52:20,433 [INFO] sarracenia.flowcb.log after_work downloaded ok: /tmp/flow_demo/2023-05-28-2023-on-mnrf-affes-ple-ple-AUTO-swob.xml \n", - "2023-05-28 16:52:20,456 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'_format', 'exchange', 'ack_id', 'local_offset', 'subtopic'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528202442', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_0e2c0d07f3648f9956db0a2b1523e6d7:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528202442', 'source': 'WXO-DD', 'mtime': '20230528T202443.46', 'atime': '20230528T202443.46', 'pubTime': '20230528T202443.46', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/observations/swob-ml/20230528/CXHI/2023-05-28-2024-CXHI-AUTO-minute-swob.xml', 'subtopic': ['20230528', 'WXO-DD', 'observations', 'swob-ml', '20230528', 'CXHI'], 'integrity': {'method': 'md5', 'value': 'bGYYmVHKuo3JSRDOjiC7NA=='}, 'size': 9353, 'exchange': 'xpublic', 'ack_id': 4, 'local_offset': 0}\n" + "2023-05-28 16:52:20,456 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'_format', 'exchange', 'ack_id', 'local_offset', 'subtopic'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528202442', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_0e2c0d07f3648f9956db0a2b1523e6d7:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528202442', 'source': 'WXO-DD', 'mtime': '20230528T202443.46', 'atime': '20230528T202443.46', 'pubTime': '20230528T202443.46', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/observations/swob-ml/20230528/CXHI/2023-05-28-2024-CXHI-AUTO-minute-swob.xml', 'subtopic': ['20230528', 'WXO-DD', 'observations', 'swob-ml', '20230528', 'CXHI'], 'identity': {'method': 'md5', 'value': 'bGYYmVHKuo3JSRDOjiC7NA=='}, 'size': 9353, 'exchange': 'xpublic', 'ack_id': 4, 'local_offset': 0}\n" ] }, { @@ -181,7 +181,7 @@ "text": [ "2023-05-28 16:52:20,457 [INFO] sarracenia.flowcb.log after_accept accepted: (lag: 1657.00 ) https://hpfx.collab.science.gc.ca /20230528/WXO-DD/observations/swob-ml/20230528/CXHI/2023-05-28-2024-CXHI-AUTO-minute-swob.xml \n", "2023-05-28 16:52:20,534 [INFO] sarracenia.flowcb.log after_work downloaded ok: /tmp/flow_demo/2023-05-28-2024-CXHI-AUTO-minute-swob.xml \n", - "2023-05-28 16:52:20,558 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'_format', 'exchange', 'ack_id', 'local_offset', 'subtopic'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528202442', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_1198e5a492e9a42cd6aadbbe92bcb788:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528202442', 'source': 'WXO-DD', 'mtime': '20230528T202443.47', 'atime': '20230528T202443.47', 'pubTime': '20230528T202443.47', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/observations/swob-ml/20230528/CWBM/2023-05-28-2024-CWBM-AUTO-minute-swob.xml', 'subtopic': ['20230528', 'WXO-DD', 'observations', 'swob-ml', '20230528', 'CWBM'], 'integrity': {'method': 'md5', 'value': 'sIqUJmWCsX5BVfplkZA75Q=='}, 'size': 9354, 'exchange': 'xpublic', 'ack_id': 5, 'local_offset': 0}\n", + "2023-05-28 16:52:20,558 [DEBUG] sarracenia.moth.amqp getNewMessage new msg: {'_format': 'v02', '_deleteOnPost': {'_format', 'exchange', 'ack_id', 'local_offset', 'subtopic'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528202442', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_1198e5a492e9a42cd6aadbbe92bcb788:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528202442', 'source': 'WXO-DD', 'mtime': '20230528T202443.47', 'atime': '20230528T202443.47', 'pubTime': '20230528T202443.47', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/observations/swob-ml/20230528/CWBM/2023-05-28-2024-CWBM-AUTO-minute-swob.xml', 'subtopic': ['20230528', 'WXO-DD', 'observations', 'swob-ml', '20230528', 'CWBM'], 'identity': {'method': 'md5', 'value': 'sIqUJmWCsX5BVfplkZA75Q=='}, 'size': 9354, 'exchange': 'xpublic', 'ack_id': 5, 'local_offset': 0}\n", "2023-05-28 16:52:20,559 [INFO] sarracenia.flowcb.log after_accept accepted: (lag: 1657.09 ) https://hpfx.collab.science.gc.ca /20230528/WXO-DD/observations/swob-ml/20230528/CWBM/2023-05-28-2024-CWBM-AUTO-minute-swob.xml \n", "2023-05-28 16:52:20,652 [INFO] sarracenia.flowcb.log after_work downloaded ok: /tmp/flow_demo/2023-05-28-2024-CWBM-AUTO-minute-swob.xml \n", "2023-05-28 16:52:20,653 [INFO] sarracenia.flow please_stop ok, telling 4 callbacks about it.\n", diff --git a/docs/source/fr/Tutoriel/4_api_moth_sub_demo.ipynb b/docs/source/fr/Tutoriel/4_api_moth_sub_demo.ipynb index 9baa3f2b1..7df2d2adf 100644 --- a/docs/source/fr/Tutoriel/4_api_moth_sub_demo.ipynb +++ b/docs/source/fr/Tutoriel/4_api_moth_sub_demo.ipynb @@ -166,13 +166,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "message 0: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'ack_id', '_format', 'local_offset', 'exchange'}, 'sundew_extension': 'CMC:HRDPS:GRIB2:BIN::20230528190111', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_5eebe93b78f7f20d6c58dff7079f17f8:CMC:HRDPS:GRIB2:BIN::20230528190111', 'source': 'WXO-DD', 'mtime': '20230528T190113.733', 'atime': '20230528T190113.733', 'pubTime': '20230528T190113.733', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/model_hrdps/north/grib2/12/006/CMC_hrdps_north_HGT_ISBL_1000_ps2.5km_2023052812_P006-00.grib2', 'subtopic': ['20230528', 'WXO-DD', 'model_hrdps', 'north', 'grib2', '12', '006'], 'integrity': {'method': 'md5', 'value': 'DcEZ6+fx637myOUf83VyDQ=='}, 'size': 236654, 'exchange': 'xpublic', 'ack_id': 1, 'local_offset': 0}\n", + "message 0: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'ack_id', '_format', 'local_offset', 'exchange'}, 'sundew_extension': 'CMC:HRDPS:GRIB2:BIN::20230528190111', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_5eebe93b78f7f20d6c58dff7079f17f8:CMC:HRDPS:GRIB2:BIN::20230528190111', 'source': 'WXO-DD', 'mtime': '20230528T190113.733', 'atime': '20230528T190113.733', 'pubTime': '20230528T190113.733', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/model_hrdps/north/grib2/12/006/CMC_hrdps_north_HGT_ISBL_1000_ps2.5km_2023052812_P006-00.grib2', 'subtopic': ['20230528', 'WXO-DD', 'model_hrdps', 'north', 'grib2', '12', '006'], 'identity': {'method': 'md5', 'value': 'DcEZ6+fx637myOUf83VyDQ=='}, 'size': 236654, 'exchange': 'xpublic', 'ack_id': 1, 'local_offset': 0}\n", "le premier 50 octets du fichier annoncé: b'GRIB\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x00\\x00\\x03\\x9cn\\x00\\x00\\x00\\x15\\x01\\x006\\x00\\x00\\x04\\x00\\x01\\x07\\xe7\\x05\\x1c\\x0c\\x00\\x00\\x01\\x02\\x00\\x00\\x00A\\x03\\x00\\x00\\x12q1\\x00\\x00\\x00'\n", - "message 1: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'ack_id', '_format', 'local_offset', 'exchange'}, 'sundew_extension': 'CMC:HRDPS:GRIB2:BIN::20230528190111', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_abed0a37b3c8b8511cc78b6f8c5c6a82:CMC:HRDPS:GRIB2:BIN::20230528190111', 'source': 'WXO-DD', 'mtime': '20230528T190114.13', 'atime': '20230528T190114.13', 'pubTime': '20230528T190114.13', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/model_hrdps/north/grib2/12/006/CMC_hrdps_north_SPFH_ISBL_0150_ps2.5km_2023052812_P006-00.grib2', 'subtopic': ['20230528', 'WXO-DD', 'model_hrdps', 'north', 'grib2', '12', '006'], 'integrity': {'method': 'md5', 'value': 'oMQDWV/QlF9aLLGOu+Tumw=='}, 'size': 330883, 'exchange': 'xpublic', 'ack_id': 2, 'local_offset': 0}\n", + "message 1: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'ack_id', '_format', 'local_offset', 'exchange'}, 'sundew_extension': 'CMC:HRDPS:GRIB2:BIN::20230528190111', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_abed0a37b3c8b8511cc78b6f8c5c6a82:CMC:HRDPS:GRIB2:BIN::20230528190111', 'source': 'WXO-DD', 'mtime': '20230528T190114.13', 'atime': '20230528T190114.13', 'pubTime': '20230528T190114.13', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/model_hrdps/north/grib2/12/006/CMC_hrdps_north_SPFH_ISBL_0150_ps2.5km_2023052812_P006-00.grib2', 'subtopic': ['20230528', 'WXO-DD', 'model_hrdps', 'north', 'grib2', '12', '006'], 'identity': {'method': 'md5', 'value': 'oMQDWV/QlF9aLLGOu+Tumw=='}, 'size': 330883, 'exchange': 'xpublic', 'ack_id': 2, 'local_offset': 0}\n", "le premier 50 octets du fichier annoncé: b'GRIB\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x00\\x00\\x05\\x0c\\x83\\x00\\x00\\x00\\x15\\x01\\x006\\x00\\x00\\x04\\x00\\x01\\x07\\xe7\\x05\\x1c\\x0c\\x00\\x00\\x01\\x02\\x00\\x00\\x00A\\x03\\x00\\x00\\x12q1\\x00\\x00\\x00'\n", - "message 2: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'ack_id', '_format', 'local_offset', 'exchange'}, 'sundew_extension': 'CMC:HRDPS:GRIB2:BIN::20230528190111', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_7af6caa9fd11fae11919525225783fad:CMC:HRDPS:GRIB2:BIN::20230528190111', 'source': 'WXO-DD', 'mtime': '20230528T190113.823', 'atime': '20230528T190113.823', 'pubTime': '20230528T190113.823', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/model_hrdps/north/grib2/12/006/CMC_hrdps_north_DEPR_ISBL_0850_ps2.5km_2023052812_P006-00.grib2', 'subtopic': ['20230528', 'WXO-DD', 'model_hrdps', 'north', 'grib2', '12', '006'], 'integrity': {'method': 'md5', 'value': 'zSw+zw6P1XlQayy+CjoLAg=='}, 'size': 194315, 'exchange': 'xpublic', 'ack_id': 3, 'local_offset': 0}\n", + "message 2: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'ack_id', '_format', 'local_offset', 'exchange'}, 'sundew_extension': 'CMC:HRDPS:GRIB2:BIN::20230528190111', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_7af6caa9fd11fae11919525225783fad:CMC:HRDPS:GRIB2:BIN::20230528190111', 'source': 'WXO-DD', 'mtime': '20230528T190113.823', 'atime': '20230528T190113.823', 'pubTime': '20230528T190113.823', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/model_hrdps/north/grib2/12/006/CMC_hrdps_north_DEPR_ISBL_0850_ps2.5km_2023052812_P006-00.grib2', 'subtopic': ['20230528', 'WXO-DD', 'model_hrdps', 'north', 'grib2', '12', '006'], 'identity': {'method': 'md5', 'value': 'zSw+zw6P1XlQayy+CjoLAg=='}, 'size': 194315, 'exchange': 'xpublic', 'ack_id': 3, 'local_offset': 0}\n", "le premier 50 octets du fichier annoncé: b'GRIB\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x00\\x00\\x02\\xf7\\x0b\\x00\\x00\\x00\\x15\\x01\\x006\\x00\\x00\\x04\\x00\\x01\\x07\\xe7\\x05\\x1c\\x0c\\x00\\x00\\x01\\x02\\x00\\x00\\x00A\\x03\\x00\\x00\\x12q1\\x00\\x00\\x00'\n", - "message 3: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'ack_id', '_format', 'local_offset', 'exchange'}, 'sundew_extension': 'CMC:HRDPS:GRIB2:BIN::20230528190112', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_e02209b0564eb4b19dd746af9eb5ee9c:CMC:HRDPS:GRIB2:BIN::20230528190112', 'source': 'WXO-DD', 'mtime': '20230528T190114.89', 'atime': '20230528T190114.89', 'pubTime': '20230528T190114.89', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/model_hrdps/north/grib2/12/006/CMC_hrdps_north_WDIR_TGL_40_ps2.5km_2023052812_P006-00.grib2', 'subtopic': ['20230528', 'WXO-DD', 'model_hrdps', 'north', 'grib2', '12', '006'], 'integrity': {'method': 'md5', 'value': 'GpPL5qQEOn0ALfuOzQrIHw=='}, 'size': 529466, 'exchange': 'xpublic', 'ack_id': 4, 'local_offset': 0}\n", + "message 3: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'ack_id', '_format', 'local_offset', 'exchange'}, 'sundew_extension': 'CMC:HRDPS:GRIB2:BIN::20230528190112', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_e02209b0564eb4b19dd746af9eb5ee9c:CMC:HRDPS:GRIB2:BIN::20230528190112', 'source': 'WXO-DD', 'mtime': '20230528T190114.89', 'atime': '20230528T190114.89', 'pubTime': '20230528T190114.89', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/model_hrdps/north/grib2/12/006/CMC_hrdps_north_WDIR_TGL_40_ps2.5km_2023052812_P006-00.grib2', 'subtopic': ['20230528', 'WXO-DD', 'model_hrdps', 'north', 'grib2', '12', '006'], 'identity': {'method': 'md5', 'value': 'GpPL5qQEOn0ALfuOzQrIHw=='}, 'size': 529466, 'exchange': 'xpublic', 'ack_id': 4, 'local_offset': 0}\n", "le premier 50 octets du fichier annoncé: b'GRIB\\x00\\x00\\x00\\x02\\x00\\x00\\x00\\x00\\x00\\x08\\x14:\\x00\\x00\\x00\\x15\\x01\\x006\\x00\\x00\\x04\\x00\\x01\\x07\\xe7\\x05\\x1c\\x0c\\x00\\x00\\x01\\x02\\x00\\x00\\x00A\\x03\\x00\\x00\\x12q1\\x00\\x00\\x00'\n" ] }, @@ -187,7 +187,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "message 4: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'ack_id', '_format', 'local_offset', 'exchange'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528190109', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_0926936c6c7b2968e12487b5e10b3bc9:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528190109', 'source': 'WXO-DD', 'mtime': '20230528T190111.364', 'atime': '20230528T190111.364', 'pubTime': '20230528T190111.364', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/observations/swob-ml/20230528/CVKU/2023-05-28-1900-CVKU-AUTO-minute-swob.xml', 'subtopic': ['20230528', 'WXO-DD', 'observations', 'swob-ml', '20230528', 'CVKU'], 'integrity': {'method': 'md5', 'value': 'WEEsvB9/BKQC1Pv9hgO3LA=='}, 'size': 6426, 'exchange': 'xpublic', 'ack_id': 5, 'local_offset': 0}\n", + "message 4: {'_format': 'v02', '_deleteOnPost': {'subtopic', 'ack_id', '_format', 'local_offset', 'exchange'}, 'sundew_extension': 'DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528190109', 'from_cluster': 'DDSR.CMC', 'to_clusters': 'ALL', 'filename': 'msg_ddsr-WXO-DD_0926936c6c7b2968e12487b5e10b3bc9:DMS:WXO_RENAMED_SWOB2:MSC:XML::20230528190109', 'source': 'WXO-DD', 'mtime': '20230528T190111.364', 'atime': '20230528T190111.364', 'pubTime': '20230528T190111.364', 'baseUrl': 'https://hpfx.collab.science.gc.ca', 'relPath': '/20230528/WXO-DD/observations/swob-ml/20230528/CVKU/2023-05-28-1900-CVKU-AUTO-minute-swob.xml', 'subtopic': ['20230528', 'WXO-DD', 'observations', 'swob-ml', '20230528', 'CVKU'], 'identity': {'method': 'md5', 'value': 'WEEsvB9/BKQC1Pv9hgO3LA=='}, 'size': 6426, 'exchange': 'xpublic', 'ack_id': 5, 'local_offset': 0}\n", "le premier 50 octets du fichier annoncé: b'= msg['mtime']: logger.debug("mtime remembered by xattr") - fxainteg = xattr.get('integrity') - if fxainteg['method'] == o.integrity_method: - msg['integrity'] = fxainteg + fxainteg = xattr.get('identity') + if fxainteg['method'] == o.identity_method: + msg['identity'] = fxainteg return logger.debug("xattr different method than on disk") - calc_method = o.integrity_method + calc_method = o.identity else: logger.debug("xattr sum too old") - calc_method = o.integrity_method + calc_method = o.identity_method else: - calc_method = o.integrity_method + calc_method = o.identity_method if calc_method == None: return @@ -368,10 +368,10 @@ def __computeIntegrity(msg, path, o): elif calc_method == 'arbitrary': sumstr = { 'method': 'arbitrary', - 'value': o.integrity_arbitrary_value + 'value': o.identity_arbitrary_value } else: - sumalgo = sarracenia.integrity.Integrity.factory(calc_method) + sumalgo = sarracenia.identity.Identity.factory(calc_method) sumalgo.set_path(path) # compute checksum @@ -391,8 +391,8 @@ def __computeIntegrity(msg, path, o): checksum = sumalgo.value sumstr = {'method': calc_method, 'value': checksum} - msg['integrity'] = sumstr - xattr.set('integrity', sumstr) + msg['identity'] = sumstr + xattr.set('identity', sumstr) xattr.persist() def copyDict(msg, d): @@ -470,7 +470,7 @@ def fromFileData(path, o, lstat=None): m = sarracenia.Message.fromFileInfo(path, o, lstat) if lstat : if os_stat.S_ISREG(lstat.st_mode): - m.__computeIntegrity(path, o) + m.__computeIdentity(path, o) try: t = magic.from_file(path,mime=True) m['contentType'] = t @@ -565,24 +565,24 @@ def fromFileInfo(path, o, lstat=None): msg['source'] = o.source - if o.integrity_method: - if o.integrity_method.startswith('cod,'): - msg['integrity'] = { + if o.identity_method: + if o.identity_method.startswith('cod,'): + msg['identity'] = { 'method': 'cod', - 'value': o.integrity_method[4:] + 'value': o.identity_method[4:] } - elif o.integrity_method in ['random']: - algo = sarracenia.integrity.Integrity.factory(o.integrity_method) + elif o.identity_method in ['random']: + algo = sarracenia.identity.Indentiy.factory(o.identity_method) algo.set_path(post_relPath) - msg['integrity'] = { - 'method': o.integrity_method, + msg['identity'] = { + 'method': o.identity_method, 'value': algo.value } else: - if 'integrity' in msg: - del msg['integrity'] + if 'identity' in msg: + del msg['identity'] - # for md5name/aka None aka omit integrity... should just fall through. + # for md5name/aka None aka omit identity... should just fall through. if lstat is None: return msg diff --git a/sarracenia/config.py b/sarracenia/config.py index 7f0f465c2..eca333d59 100755 --- a/sarracenia/config.py +++ b/sarracenia/config.py @@ -57,10 +57,10 @@ def __call__(self, parser, namespace, values, option_string=None): from sarracenia.flow.sarra import default_options as sarradefopts -import sarracenia.integrity.arbitrary +import sarracenia.identity.arbitrary import sarracenia.moth -import sarracenia.integrity +import sarracenia.identity import sarracenia.instance default_options = { @@ -79,7 +79,7 @@ def __call__(self, parser, namespace, values, option_string=None): 'inflight': None, 'inline': False, 'inlineOnly': False, - 'integrity_method': 'sha512', + 'identity_method': 'sha512', 'logMetrics': False, 'logStdout': False, 'nodupe_driver': 'disk', @@ -143,7 +143,7 @@ def __call__(self, parser, namespace, values, option_string=None): str_options = [ 'action', 'admin', 'baseDir', 'broker', 'cluster', 'directory', 'exchange', - 'exchange_suffix', 'feeder', 'filename', 'flatten', 'flowMain', 'header', 'integrity', 'logLevel', + 'exchange_suffix', 'feeder', 'filename', 'flatten', 'flowMain', 'header', 'identity', 'logLevel', 'pollUrl', 'post_baseUrl', 'post_baseDir', 'post_broker', 'post_exchange', 'post_exchangeSuffix', 'post_format', 'post_topic', 'queueName', 'sendTo', 'rename', 'report_exchange', 'source', 'strip', 'timezone', 'nodupe_ttl', 'nodupe_driver', @@ -167,15 +167,15 @@ def __call__(self, parser, namespace, values, option_string=None): 'discard' : [ 'delete_destination', 'on' ], 'from_cluster' : [ 'continue' ], 'to_clusters' : [ 'continue' ], - 'integrity' : { - 'n' : [ 'integrity', 'none' ], - 's' : [ 'integrity', 'sha512' ], - 'd' : [ 'integrity', 'md5' ], - 'a' : [ 'integrity', 'arbitrary' ], - 'r' : [ 'integrity', 'random' ], - 'z,d' : [ 'integrity', 'cod,md5' ], - 'z,s' : [ 'integrity', 'cod,sha512' ], - 'z,n' : [ 'integrity', 'none' ] + 'identity' : { + 'n' : [ 'identity', 'none' ], + 's' : [ 'identity', 'sha512' ], + 'd' : [ 'identity', 'md5' ], + 'a' : [ 'identity', 'arbitrary' ], + 'r' : [ 'identity', 'random' ], + 'z,d' : [ 'identity', 'cod,md5' ], + 'z,s' : [ 'identity', 'cod,sha512' ], + 'z,n' : [ 'identity', 'none' ] }, 'ls_file_index' : [ 'continue' ], 'plugin': { @@ -688,7 +688,7 @@ class Config: 'simulate': 'dry_run', 'simulation': 'dry_run', 'source_from_exchange': 'sourceFromExchange', - 'sum' : 'integrity', + 'sum' : 'identity', 'suppress_duplicates' : 'nodupe_ttl', 'suppress_duplicates_basis' : 'nodupe_basis', 'tls_rigour' : 'tlsRigour', @@ -755,7 +755,7 @@ def __init__(self, parent=None) -> 'Config': self.inline = False self.inlineByteMax = 4096 self.inlineEncoding = 'guess' - self.integrity_arbitrary_value = None + self.identity_arbitrary_value = None self.logReject = False self.logRotateCount = 5 self.logRotateInterval = 1 @@ -1278,54 +1278,54 @@ def _parse_sum(self, value): #logger.error('FIXME! input value: %s' % value) if not value: - if not self.integrity_method: + if not self.identity_method: return - value = self.integrity_method + value = self.identity_method - if (value in sarracenia.integrity.known_methods) or ( + if (value in sarracenia.identity.known_methods) or ( value[0:4] == 'cod,'): - self.integrity_method = value + self.identity_method = value #logger.error('returning 1: %s' % value) return - #logger.error( f'1 value: {value} self.integrity_method={self.integrity_method}' ) + #logger.error( f'1 value: {value} self.identity_method={self.identity_method}' ) if (value[0:2] == 'z,'): value = value[2:] - self.integrity_method = 'cod,' + self.identity_method = 'cod,' elif (value[0:2] == 'a,'): - self.integrity_method = 'arbitrary' - self.integrity_arbitrary_value = value[2:] + self.identity_method = 'arbitrary' + self.identity_arbitrary_value = value[2:] else: - self.integrity_method = value - #logger.error( f'2 value: {value} self.integrity_method={self.integrity_method}' ) + self.identity_method = value + #logger.error( f'2 value: {value} self.identity_method={self.identity_method}' ) if value.lower() in [ 'n', 'none' ]: - self.integrity_method = None + self.identity_method = None #logger.error('returning 1.1: %s' % 'none') return - #logger.error( f'3 value: {value} self.integrity_method={self.integrity_method}' ) + #logger.error( f'3 value: {value} self.identity_method={self.identity_method}' ) - for sc in sarracenia.integrity.Integrity.__subclasses__(): + for sc in sarracenia.identity.Identity.__subclasses__(): #logger.error('against 1.8: %s' % sc.__name__.lower() ) if value == sc.__name__.lower(): #logger.error('returning 2: %s' % value ) - if self.integrity_method == 'cod,': - self.integrity_method += value + if self.identity_method == 'cod,': + self.identity_method += value else: - self.integrity_method = value + self.identity_method = value return if hasattr(sc, 'registered_as'): #logger.error('against 3: %s' % sc.registered_as() ) if (sc.registered_as() == value): - if self.integrity_method == 'cod,': - self.integrity_method += sc.__name__.lower() + if self.identity_method == 'cod,': + self.identity_method += sc.__name__.lower() else: - self.integrity_method = sc.__name__.lower() - #logger.error('returning 3: %s' % self.integrity_method) + self.identity_method = sc.__name__.lower() + #logger.error('returning 3: %s' % self.identity_method) return # FIXME this is an error return case, how to designate an invalid checksum? - self.integrity_method = 'invalid' + self.identity_method = 'invalid' #logger.error('returning 4: invalid' ) def parse_file(self, cfg, component=None): @@ -1471,7 +1471,7 @@ def parse_file(self, cfg, component=None): self.plugins_early.insert(0, v) elif k in ['set', 'setting', 's']: self._parse_setting(line[1], line[2:]) - elif k in ['integrity']: + elif k in ['identity', 'integrity']: self._parse_sum(v) elif k in Config.port_required: logger.error( f' {cfname}:{lineno} {k} {v} not supported in v3, consult porting guide. Option ignored.' ) @@ -2326,9 +2326,9 @@ def parse_args(self, isPost=False): type=int, help='number of processes to run per configuration') - parser.add_argument('--integrity_method', '--integrity', '-s', '--sum', + parser.add_argument('--identity_method', '--identity', '-s', '--sum', nargs='?', - default=self.integrity_method, + default=self.identity_method, help='choose a different checksumming method for the files posted') if hasattr(self, 'bindings'): parser.set_defaults(bindings=self.bindings) diff --git a/sarracenia/diskqueue.py b/sarracenia/diskqueue.py index cdfcdd7f3..82ad43722 100755 --- a/sarracenia/diskqueue.py +++ b/sarracenia/diskqueue.py @@ -271,8 +271,8 @@ def in_cache(self, message) -> bool: sumstr = jsonpickle.encode(message['noDupe']['key']) elif 'fileOp' in message: sumstr = jsonpickle.encode(message['fileOp']) - elif 'integrity' in message: - sumstr = jsonpickle.encode(message['integrity']) + elif 'identity' in message: + sumstr = jsonpickle.encode(message['identity']) elif 'pubTime' in message: sumstr = jsonpickle.encode(message['pubTime']) else: diff --git a/sarracenia/filemetadata.py b/sarracenia/filemetadata.py index a4dc93338..f1b9aa250 100755 --- a/sarracenia/filemetadata.py +++ b/sarracenia/filemetadata.py @@ -126,6 +126,11 @@ def __init__(self, path): except: self.x = {} + if 'integrity' in self.x: # id transition. + self.x['identity'] = self.x['integrity'] + del self.x['integrity'] + + def __del__(self): self.persist() diff --git a/sarracenia/flow/__init__.py b/sarracenia/flow/__init__.py index df685a6fa..41a78eb42 100644 --- a/sarracenia/flow/__init__.py +++ b/sarracenia/flow/__init__.py @@ -7,7 +7,7 @@ # v3 plugin architecture... import sarracenia.flowcb -import sarracenia.integrity +import sarracenia.identity import sarracenia.transfer import stat @@ -1069,21 +1069,21 @@ def write_inline_file(self, msg) -> bool: else: data = msg['content']['value'].encode('utf-8') - if self.o.integrity_method.startswith('cod,'): - algo_method = self.o.integrity_method[4:] - elif msg['integrity']['method'] == 'cod': - algo_method = msg['integrity']['value'] + if self.o.identity_method.startswith('cod,'): + algo_method = self.o.identity_method[4:] + elif msg['identity']['method'] == 'cod': + algo_method = msg['identity']['value'] else: - algo_method = msg['integrity']['method'] + algo_method = msg['identity']['method'] - onfly_algo = sarracenia.integrity.Integrity.factory(algo_method) - data_algo = sarracenia.integrity.Integrity.factory(algo_method) + onfly_algo = sarracenia.identity.Identity.factory(algo_method) + data_algo = sarracenia.identity.Identity.factory(algo_method) onfly_algo.set_path(path) data_algo.set_path(path) if algo_method == 'arbitrary': - onfly_algo.value = msg['integrity']['value'] - data_algo.value = msg['integrity']['value'] + onfly_algo.value = msg['identity']['value'] + data_algo.value = msg['identity']['value'] onfly_algo.update(data) @@ -1143,36 +1143,36 @@ def compute_local_checksum(self, msg) -> None: if sarracenia.filemetadata.supports_extended_attributes: try: x = sarracenia.filemetadata.FileMetadata(msg['new_path']) - s = x.get('integrity') + s = x.get('identity') if s: metadata_cached_mtime = x.get('mtime') if ((metadata_cached_mtime >= msg['mtime'])): # file has not been modified since checksum value was stored. - if (( 'integrity' in msg ) and ( 'method' in msg['integrity'] ) and \ - ( msg['integrity']['method'] == s['method'] )) or \ - ( s['method'] == self.o.integrity_method ) : + if (( 'identity' in msg ) and ( 'method' in msg['identity'] ) and \ + ( msg['identity']['method'] == s['method'] )) or \ + ( s['method'] == self.o.identity_method ) : # file # cache good. - msg['local_integrity'] = s - msg['_deleteOnPost'] |= set(['local_integrity']) + msg['local_identity'] = s + msg['_deleteOnPost'] |= set(['local_identity']) return except: pass - local_integrity = sarracenia.integrity.Integrity.factory( - msg['integrity']['method']) + local_identity = sarracenia.identity.Identity.factory( + msg['identity']['method']) - if msg['integrity']['method'] == 'arbitrary': - local_integrity.value = msg['integrity']['value'] + if msg['identity']['method'] == 'arbitrary': + local_identity.value = msg['identity']['value'] - local_integrity.update_file(msg['new_path']) - msg['local_integrity'] = { - 'method': msg['integrity']['method'], - 'value': local_integrity.value + local_identity.update_file(msg['new_path']) + msg['local_identity'] = { + 'method': msg['identity']['method'], + 'value': local_identity.value } - msg['_deleteOnPost'] |= set(['local_integrity']) + msg['_deleteOnPost'] |= set(['local_identity']) def file_should_be_downloaded(self, msg) -> bool: """ @@ -1235,7 +1235,7 @@ def file_should_be_downloaded(self, msg) -> bool: msg['new_path'], new_mtime - old_mtime, new_mtime, old_mtime)) - if 'integrity' in msg and msg['integrity']['method'] in ['random', 'cod']: + if 'identity' in msg and msg['identity']['method'] in ['random', 'cod']: logger.debug("content_match %s sum 0/z never matches" % (msg['new_path'])) return True @@ -1245,7 +1245,7 @@ def file_should_be_downloaded(self, msg) -> bool: "new file not big enough... considered different") return True - if not 'integrity' in msg: + if not 'identity' in msg: # FIXME... should there be a setting to assume them the same? use cases may vary. logger.debug( "no checksum available, assuming different" ) return True @@ -1259,9 +1259,9 @@ def file_should_be_downloaded(self, msg) -> bool: return True logger.debug("checksum in message: %s vs. local: %s" % - (msg['integrity'], msg['local_integrity'])) + (msg['identity'], msg['local_identity'])) - if msg['local_integrity'] == msg['integrity']: + if msg['local_identity'] == msg['identity']: self.reject(msg, 304, "same checksum %s " % (msg['new_path'])) return False else: @@ -1651,10 +1651,10 @@ def download(self, msg, options) -> bool: urlstr = msg['baseUrl'] + '/' + msg['relPath'] - istr =msg['integrity'] if ('integrity' in msg) else "None" + istr =msg['identity'] if ('identity' in msg) else "None" fostr = msg['fileOp'] if ('fileOp' in msg ) else "None" - logger.debug( 'integrity: %s, fileOp: %s' % ( istr, fostr ) ) + logger.debug( 'identity: %s, fileOp: %s' % ( istr, fostr ) ) new_inflight_path = '' new_dir = msg['new_dir'] @@ -1783,10 +1783,10 @@ def download(self, msg, options) -> bool: # FIXME locking for i parts in temporary file ... should stay lock # and file_reassemble... take into account the locking - if self.o.integrity_method.startswith('cod,'): - download_algo = self.o.integrity_method[4:] - elif 'integrity' in msg: - download_algo = msg['integrity']['method'] + if self.o.identity_method.startswith('cod,'): + download_algo = self.o.identity_method[4:] + elif 'identity' in msg: + download_algo = msg['identity']['method'] else: download_algo = None @@ -1795,7 +1795,7 @@ def download(self, msg, options) -> bool: if download_algo == 'arbitrary': self.proto[self.scheme].set_sumArbitrary( - msg['integrity']['value']) + msg['identity']['value']) if (type(options.inflight) == str) \ and (options.inflight[0] == '/' or options.inflight[-1] == '/') \ @@ -1875,8 +1875,8 @@ def download(self, msg, options) -> bool: msg['onfly_checksum'] = self.proto[self.scheme].get_sumstr() msg['data_checksum'] = self.proto[self.scheme].data_checksum - if self.o.integrity_method.startswith('cod,') and not accelerated: - msg['integrity'] = msg['onfly_checksum'] + if self.o.identity_method.startswith('cod,') and not accelerated: + msg['identity'] = msg['onfly_checksum'] msg['_deleteOnPost'] |= set(['onfly_checksum']) msg['_deleteOnPost'] |= set(['data_checksum']) @@ -2282,9 +2282,9 @@ def set_local_file_attributes(self, local_file, msg): # FIXME ... what to do when checksums don't match? if 'onfly_checksum' in msg: - x.set( 'integrity', msg['onfly_checksum'] ) - elif 'integrity' in msg: - x.set('integrity', msg['integrity'] ) + x.set( 'identity', msg['onfly_checksum'] ) + elif 'identity' in msg: + x.set('identity', msg['identity'] ) if self.o.timeCopy and 'mtime' in msg and msg['mtime']: x.set('mtime', msg['mtime']) diff --git a/sarracenia/flow/poll.py b/sarracenia/flow/poll.py index 6bdca0ab8..2e6b38949 100644 --- a/sarracenia/flow/poll.py +++ b/sarracenia/flow/poll.py @@ -22,7 +22,7 @@ 'follow_symlinks': False, 'force_polling': False, 'inflight': None, - 'integrity_method': 'cod,sha512', + 'identity_method': 'cod,sha512', 'part_ext': 'Part', 'partflg': '1', 'post_baseDir': None, diff --git a/sarracenia/flowcb/download/__init__.py b/sarracenia/flowcb/download/__init__.py index 1c32b0121..720837906 100644 --- a/sarracenia/flowcb/download/__init__.py +++ b/sarracenia/flowcb/download/__init__.py @@ -28,10 +28,10 @@ This replaces built-in download functionality, providing an override. for individual file transfers. ideally you set checksums as you download. - looking at self.o.integrity_method to establish download checksum algorithm. + looking at self.o.identity_method to establish download checksum algorithm. might have to allow for cod... say it is checksum_method:: - checksum = sarracenia.integrity.Integrity.factory(self.o.checksum_method) + checksum = sarracenia.identity.Identity.factory(self.o.checksum_method) while downloading: checksum.update(chunk) @@ -42,7 +42,7 @@ it is imperative, to avoid looping, to apply the actual checksum of the data to the message: - msg['integrity'] = { 'method': checksum_method, 'value': checksum.get_sumstr() } + msg['identity'] = { 'method': checksum_method, 'value': checksum.get_sumstr() } return Boolean success indicator. if False, download will be attempted again and/or appended to retry queue. diff --git a/sarracenia/flowcb/download/mail_ingest.py b/sarracenia/flowcb/download/mail_ingest.py index 8c1280cdd..e5124c2d5 100644 --- a/sarracenia/flowcb/download/mail_ingest.py +++ b/sarracenia/flowcb/download/mail_ingest.py @@ -31,7 +31,7 @@ import sarracenia from sarracenia.flowcb import FlowCB -import sarracenia.integrity +import sarracenia.identity logger = logging.getLogger(__name__) @@ -134,7 +134,7 @@ def download(self, msg) -> bool: if msgid == msg['new_file']: logger.info("download_email_ingest downloaded file: %s" % msg['new_dir']+'/'+msg['new_file']) - sumalgo = sarracenia.integrity.Integrity.factory(self.o.integrity_method) + sumalgo = sarracenia.identity.Identity.factory(self.o.identity_method) sumalgo.set_path(path) with open(msg['new_dir']+'/'+msg['new_file'], 'w') as f: sumalgo.update(email_message) @@ -142,7 +142,7 @@ def download(self, msg) -> bool: f.close() message['size'] = len(bytes(email_message,'utf8')) - message['integrity'] = { 'method': self.o.integrity_method, 'value': sumalgo.value } + message['identity'] = { 'method': self.o.identity_method, 'value': sumalgo.value } if self.o.delete : mailman.dele(index+1) found=True diff --git a/sarracenia/flowcb/gather/file.py b/sarracenia/flowcb/gather/file.py index 575d79778..02359900a 100755 --- a/sarracenia/flowcb/gather/file.py +++ b/sarracenia/flowcb/gather/file.py @@ -20,7 +20,7 @@ import sarracenia from sarracenia import * from sarracenia.flowcb import FlowCB -import sarracenia.integrity +import sarracenia.identity import stat from sys import platform as _platform @@ -272,7 +272,7 @@ def post_file_in_parts(self, path, lstat): else: sumflg = self.o.sumflg - sumalgo = sarracenia.integrity.Integrity.factory(sumflg) + sumalgo = sarracenia.identity.Identity.factory(sumflg) sumalgo.set_path(path) # compute block stuff @@ -316,7 +316,7 @@ def post_file_in_parts(self, path, lstat): # complete message - msg['integrity'] = sumstr + msg['identity'] = sumstr messages.extend(copy.deepcopy(msg)) return messages @@ -345,7 +345,7 @@ def post_file_part(self, path, lstat): # complete message msg['parts'] = partstr - msg['integrity'] = sumstr + msg['identity'] = sumstr return [msg] diff --git a/sarracenia/flowcb/poll/__init__.py b/sarracenia/flowcb/poll/__init__.py index 63543345d..447c11adf 100755 --- a/sarracenia/flowcb/poll/__init__.py +++ b/sarracenia/flowcb/poll/__init__.py @@ -107,7 +107,7 @@ class Poll(FlowCB): * chmod - used to identify the minimum permissions to accept for a file to be included in a polling result. - * integrity_method - parameter for how to build integrity checksum for messages. + * identity_method - parameter for how to build identity checksum for messages. as these are usually remote files, the default is typically "cod" (calculate on download) * rename - parameter used to to put in messages built to specify the rename field contents. @@ -485,8 +485,8 @@ def poll_file_post(self, desc, destDir, remote_file): if not self.o.follow_symlinks: try: ok['fileOp'] = { 'link': os.readlink(path) } - if 'Integrity' in msg: - del ok['Integrity'] + if 'Identity' in msg: + del ok['Identity'] except: logger.error("cannot read link %s message dropped" % path) logger.debug('Exception details: ', exc_info=True) @@ -523,14 +523,14 @@ def poll_file_post(self, desc, destDir, remote_file): logger.debug('Exception details: ', exc_info=True) return None - if self.o.integrity_method and (',' in self.o.integrity_method): - m, v = self.o.integrity_method.split(',') - msg['integrity'] = {'method': m, 'value': v} + if self.o.identity_method and (',' in self.o.identity_method): + m, v = self.o.identity_method.split(',') + msg['identity'] = {'method': m, 'value': v} # If there is a file operation, and it isn't a rename, then some fields are irrelevant/wrong. if 'fileOp' in msg and 'rename' not in msg['fileOp']: - if 'Integrity' in msg: - del msg['Integrity'] + if 'Identity' in msg: + del msg['Indentity'] if 'size' in msg: del msg['size'] diff --git a/sarracenia/flowcb/poll/noaa_hydrometric.py b/sarracenia/flowcb/poll/noaa_hydrometric.py index 324bced6a..9beddb0e7 100755 --- a/sarracenia/flowcb/poll/noaa_hydrometric.py +++ b/sarracenia/flowcb/poll/noaa_hydrometric.py @@ -53,9 +53,9 @@ def __init__(self, options): self.o.add_option( option='retrievePathPattern', kind='str', \ default_value='datagetter?range=1&station={0:}&product={1:}&units=metric&time_zone=gmt&application=web_services&format=csv' ) - if self.o.integrity_method.startswith('cod,'): - m, v = self.o.integrity_method.split(',') - self.integrity = {'method': m, 'value': v} + if self.o.identity_method.startswith('cod,'): + m, v = self.o.identity_method.split(',') + self.identity = {'method': m, 'value': v} def poll(self) -> list: @@ -97,7 +97,7 @@ def poll(self) -> list: fname = f'noaa_{mtime}_{site}_WT.csv' m = sarracenia.Message.fromFileInfo(fname, self.o) - m['integrity'] = self.integrity + m['identity'] = self.identity m['retrievePath'] = retrievePath m['new_file'] = fname @@ -112,7 +112,7 @@ def poll(self) -> list: fname = f'noaa_{mtime}_{site}_WL.csv' m = sarracenia.Message.fromFileInfo(fname, self.o) - m['integrity'] = self.integrity + m['identity'] = self.identity m['retrievePath'] = retrievePath m['new_file'] = fname diff --git a/sarracenia/flowcb/v2wrapper.py b/sarracenia/flowcb/v2wrapper.py index 8fc6c5d37..999847c80 100755 --- a/sarracenia/flowcb/v2wrapper.py +++ b/sarracenia/flowcb/v2wrapper.py @@ -36,22 +36,22 @@ def sumstrFromMessage( msg ) -> str: accepts a v3 message as argument msg. returns the corresponding sum string for a v2 'sum' header. """ - if 'integrity' in msg: - if msg['integrity']['method'] in sum_algo_v3tov2: - sa = sum_algo_v3tov2[msg["integrity"]["method"]] + if 'identity' in msg: + if msg['identity']['method'] in sum_algo_v3tov2: + sa = sum_algo_v3tov2[msg["identity"]["method"]] else: # FIXME ... 1st md5name case... default when unknown... - logger.error('integrity method unknown to v2: %s, replacing with md5name' % msg['integrity']['method'] ) + logger.error('identity method unknown to v2: %s, replacing with md5name' % msg['identity']['method'] ) sa = 'n' sv = md5(bytes(os.path.basename(msg['relPath']),'utf-8')).hexdigest() # transform sum value if sa in ['0', 'a']: - sv = msg["integrity"]["value"] + sv = msg["identity"]["value"] elif sa in ['z']: - sv = sum_algo_v3tov2[msg["integrity"]["value"]] + sv = sum_algo_v3tov2[msg["identity"]["value"]] else: sv = encode( - decode(msg["integrity"]["value"].encode('utf-8'), "base64"), + decode(msg["identity"]["value"].encode('utf-8'), "base64"), 'hex').decode('utf-8') sumstr = sa + ',' + sv else: diff --git a/sarracenia/flowcb/work/check.py b/sarracenia/flowcb/work/check.py index 06ce7a6b2..9ffdd088a 100755 --- a/sarracenia/flowcb/work/check.py +++ b/sarracenia/flowcb/work/check.py @@ -58,7 +58,7 @@ def after_work(self, worklist): os.unlink(local_file) continue - logger.info("integrity %s " % msg['integrity'] ) + logger.info("identity %s " % msg['identity'] ) logger.info("filesize %s " % msg['size']) lstat = os.stat(local_file) @@ -69,14 +69,14 @@ def after_work(self, worklist): self.size_mismatches+=1 self.o.post_baseUrl = msg['baseUrl'] - self.o.integrity_method = msg['integrity']['method'] + self.o.identity_method = msg['identity']['method'] downloaded_msg = sarracenia.Message.fromFileData( local_file, self.o, lstat ) - if downloaded_msg['integrity'] != msg['integrity']: + if downloaded_msg['identity'] != msg['identity']: logger.error( "checksum differ (corrupted ?) lf %s msg %s" % - (downloaded_msg['integrity'], msg['integrity'])) + (downloaded_msg['identity'], msg['identity'])) self.checksum_mismatches+=1 if self.content_check(local_file): diff --git a/sarracenia/integrity/__init__.py b/sarracenia/identity/__init__.py similarity index 89% rename from sarracenia/integrity/__init__.py rename to sarracenia/identity/__init__.py index 7c00e25f5..ae8a51dc4 100755 --- a/sarracenia/integrity/__init__.py +++ b/sarracenia/identity/__init__.py @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) -class Integrity: +class Identity: """ A class for algorithms to get a fingerprint for a file being announced. Appropriate fingerprinting algorithms vary according to file type. @@ -41,7 +41,7 @@ class Integrity: def registered_as(self): return a one letter string identifying the algorithm (mostly for v2.) - in v3, the registration comes from the integrity sub-class name in lower case. + in v3, the registration comes from the identity sub-class name in lower case. def set_path(self,path): start a checksum for the given path... initialize. @@ -52,7 +52,7 @@ def update(self,chunk): @staticmethod def factory(method='sha512'): - for sc in Integrity.__subclasses__(): + for sc in Identity.__subclasses__(): if method == sc.__name__.lower(): return sc() return None @@ -79,11 +79,11 @@ def value(self): return b64encode(self.filehash.digest()).decode('utf-8') -import sarracenia.integrity.arbitrary -import sarracenia.integrity.md5 -import sarracenia.integrity.random -import sarracenia.integrity.sha512 +import sarracenia.identity.arbitrary +import sarracenia.identity.md5 +import sarracenia.identity.random +import sarracenia.identity.sha512 known_methods = [] -for sc in Integrity.__subclasses__(): +for sc in Identity.__subclasses__(): known_methods.append(sc.__name__.lower()) diff --git a/sarracenia/integrity/arbitrary.py b/sarracenia/identity/arbitrary.py similarity index 91% rename from sarracenia/integrity/arbitrary.py rename to sarracenia/identity/arbitrary.py index f1e9c8644..686a0452d 100755 --- a/sarracenia/integrity/arbitrary.py +++ b/sarracenia/identity/arbitrary.py @@ -1,4 +1,4 @@ -from sarracenia.integrity import Integrity +from sarracenia.identity import Identity default_value = "None" @@ -7,7 +7,7 @@ def set_default_value(value): global default_value default_value = value -class Arbitrary(Integrity): +class Arbitrary(Identity): """ For applications where there is no known way of determining equivalence, allow them to supply an arbitrary tag, that can be used to compare products for duplicate suppression purposes. diff --git a/sarracenia/integrity/md5.py b/sarracenia/identity/md5.py similarity index 78% rename from sarracenia/integrity/md5.py rename to sarracenia/identity/md5.py index b574fd5c9..8c814a07c 100755 --- a/sarracenia/integrity/md5.py +++ b/sarracenia/identity/md5.py @@ -1,12 +1,12 @@ from hashlib import md5 -from sarracenia.integrity import Integrity +from sarracenia.identity import Identity -class Md5(Integrity): +class Md5(Identity): """ use the (obsolete) Message Digest 5 (MD5) algorithm, applied on the content - of a file, to generate an integrity signature. + of a file, to generate an identity signature. """ @staticmethod diff --git a/sarracenia/integrity/random.py b/sarracenia/identity/random.py similarity index 82% rename from sarracenia/integrity/random.py rename to sarracenia/identity/random.py index 56bca77d1..636b3288e 100755 --- a/sarracenia/integrity/random.py +++ b/sarracenia/identity/random.py @@ -1,9 +1,9 @@ import random -from sarracenia.integrity import Integrity +from sarracenia.identity import Identity -class Random(Integrity): +class Random(Identity): """ Trivial minimalist checksumming algorithm, returns random number for any file. """ diff --git a/sarracenia/integrity/sha512.py b/sarracenia/identity/sha512.py similarity index 87% rename from sarracenia/integrity/sha512.py rename to sarracenia/identity/sha512.py index 6da71e2c8..0dc2faae7 100755 --- a/sarracenia/integrity/sha512.py +++ b/sarracenia/identity/sha512.py @@ -1,13 +1,13 @@ from hashlib import sha512 -from sarracenia.integrity import Integrity +from sarracenia.identity import Identity # =================================== # checksum_s class # =================================== -class Sha512(Integrity): +class Sha512(Identity): """ The SHA512 algorithm to checksum the entire file, which is called 's'. """ diff --git a/sarracenia/moth/amqp.py b/sarracenia/moth/amqp.py index 214332baa..94325eac9 100755 --- a/sarracenia/moth/amqp.py +++ b/sarracenia/moth/amqp.py @@ -586,7 +586,7 @@ def putNewMessage(self, # FIXME: assert ( len(self.o['exchange']) == self.o['post_exchangeSplit'] ) # if that isn't true... then there is something wrong... should we check ? idx = sum( - bytearray(body['integrity']['value'], + bytearray(body['identity']['value'], 'ascii')) % len(self.o['exchange']) exchange = self.o['exchange'][idx] else: diff --git a/sarracenia/moth/mqtt.py b/sarracenia/moth/mqtt.py index 428231243..db0547600 100755 --- a/sarracenia/moth/mqtt.py +++ b/sarracenia/moth/mqtt.py @@ -655,7 +655,7 @@ def putNewMessage(self, # FIXME: assert ( len(self.o['exchange']) == self.o['post_exchangeSplit'] ) # if that isn't true... then there is something wrong... should we check ? idx = sum( - bytearray(body['integrity']['value'], + bytearray(body['identity']['value'], 'ascii')) % len(self.o['exchange']) exchange = self.o['exchange'][idx] else: diff --git a/sarracenia/postformat/v02.py b/sarracenia/postformat/v02.py index 393509a8e..49a7fbd0f 100644 --- a/sarracenia/postformat/v02.py +++ b/sarracenia/postformat/v02.py @@ -92,7 +92,7 @@ def importMine(body, headers, options) -> sarracenia.Message: elif sm in ['link']: msg['fileOp']['link'] = msg['link'] else: - msg["integrity"] = {"method": sm, "value": sv} + msg["identity"] = {"method": sm, "value": sv} elif sm == 'remove': msg['fileOp'] = { 'remove': '' } elif sm == 'mkdir': @@ -105,7 +105,7 @@ def importMine(body, headers, options) -> sarracenia.Message: elif sm == 'md5name': pass else: - msg["integrity"] = {"method": sm, "value": sv} + msg["identity"] = {"method": sm, "value": sv} del msg['sum'] except Exception as ex: @@ -143,7 +143,7 @@ def exportMine(body, options) -> (str, dict, str): # v2wrapp for h in [ 'pubTime', 'baseUrl', 'fileOp', 'relPath', 'size', - 'blocks', 'content', 'integrity' + 'blocks', 'content', 'identity' ]: if h in v2m.headers: del v2m.headers[h] diff --git a/sarracenia/postformat/v03.py b/sarracenia/postformat/v03.py index c5a6a8c41..0bea6ee09 100644 --- a/sarracenia/postformat/v03.py +++ b/sarracenia/postformat/v03.py @@ -49,6 +49,10 @@ def importMine(body, headers, options) -> sarracenia.Message: msg['retrievePath'] = msg['retPath'] del msg['retPath'] + if 'integrity' in msg: + msg['identity'] = msg['integrity'] + del msg['integrity'] + """ observed Sarracenia v2.20.08p1 and earlier have 'parts' header in v03 messages. bug, or implementation did not keep up. Applying Postel's Robustness principle: normalizing messages. diff --git a/sarracenia/redisqueue.py b/sarracenia/redisqueue.py index 1a7c058bc..761ed36ca 100755 --- a/sarracenia/redisqueue.py +++ b/sarracenia/redisqueue.py @@ -138,8 +138,8 @@ def _in_cache(self, message) -> bool: sumstr = jsonpickle.encode(message['noDupe']['key']) elif 'fileOp' in message: sumstr = jsonpickle.encode(message['fileOp']) - elif 'integrity' in message: - sumstr = jsonpickle.encode(message['integrity']) + elif 'identity' in message: + sumstr = jsonpickle.encode(message['identity']) elif 'pubTime' in message: sumstr = jsonpickle.encode(message['pubTime']) else: diff --git a/sarracenia/sr.py b/sarracenia/sr.py index c1ce2f739..57014d7f7 100755 --- a/sarracenia/sr.py +++ b/sarracenia/sr.py @@ -2351,7 +2351,7 @@ def convert(self): k = 'post_broker' elif (k == 'directory' ) and (component == 'poll'): k = 'path' - elif k == 'integrity': + elif k in [ 'identity', 'integrity' ]: if line[1][0] in sum_algo_v2tov3: method=sum_algo_v2tov3[line[1][0]] if method == 'cod': diff --git a/sarracenia/transfer/__init__.py b/sarracenia/transfer/__init__.py index 6cc85166d..9512654fa 100755 --- a/sarracenia/transfer/__init__.py +++ b/sarracenia/transfer/__init__.py @@ -354,8 +354,8 @@ def readlocal_write(self, local_file, local_offset=0, length=0, dst=None): def set_sumalgo(self, sumalgo): logger.debug("sr_proto set_sumalgo %s" % sumalgo) - self.sumalgo = sarracenia.integrity.Integrity.factory(sumalgo) - self.data_sumalgo = sarracenia.integrity.Integrity.factory(sumalgo) + self.sumalgo = sarracenia.identity.Identity.factory(sumalgo) + self.data_sumalgo = sarracenia.identity.Identity.factory(sumalgo) def set_sumArbitrary(self, value): self.sumalgo.value = value diff --git a/tests/sarracenia/flowcb/nodupe/disk_test.py b/tests/sarracenia/flowcb/nodupe/disk_test.py index 192025322..840531f69 100644 --- a/tests/sarracenia/flowcb/nodupe/disk_test.py +++ b/tests/sarracenia/flowcb/nodupe/disk_test.py @@ -70,6 +70,7 @@ def test_deriveKey(tmp_path): thismsg = make_message() thismsg['identity'] = {'method': "cod"} assert nodupe.deriveKey(thismsg) == thismsg["relPath"] + thismsg['identity'] = {'method': "method", 'value': "value\n"} assert nodupe.deriveKey(thismsg) == "method,value" @@ -600,4 +601,5 @@ def test_after_accept__InFlight(tmp_path, capsys): assert len(test_after_accept__InFlight.rejected) == 1 assert len(test_after_accept__InFlight.incoming) == 1 assert test_after_accept__InFlight.incoming[0]['mtime'] == message_old['mtime'] - assert test_after_accept__InFlight.rejected[0]['reject'].count(message_new['mtime'] + " too new (nodupe check), newest allowed") \ No newline at end of file + assert test_after_accept__InFlight.rejected[0]['reject'].count(message_new['mtime'] + " too new (nodupe check), newest allowed") +