From 62c711adf3a4d431550d2d38f73933697dadf2da Mon Sep 17 00:00:00 2001 From: William Welling Date: Mon, 16 Sep 2024 09:32:41 -0500 Subject: [PATCH] Instance duplication report workflow --- README.md | 46 +++++++++++++++++++ .../nodes/callNumberEmail.json | 29 ++++++++++++ .../nodes/callNumberMoveToNode.json | 12 +++++ .../nodes/callNumberQuery.json | 27 +++++++++++ .../nodes/connectToLdp.json | 13 ++++++ .../nodes/disconnectFromLDP.json | 10 ++++ duplicate-instance-report/nodes/end.json | 6 +++ .../nodes/isbnConnectTo.json | 7 +++ .../nodes/isbnEmail.json | 29 ++++++++++++ .../nodes/isbnMoveToNode.json | 12 +++++ .../nodes/isbnQuery.json | 27 +++++++++++ .../nodes/issnConnectTo.json | 7 +++ .../nodes/issnEmail.json | 29 ++++++++++++ .../nodes/issnMoveToNode.json | 12 +++++ .../nodes/issnQuery.json | 27 +++++++++++ duplicate-instance-report/nodes/join.json | 7 +++ .../nodes/lccnConnectTo.json | 7 +++ .../nodes/lccnEmail.json | 29 ++++++++++++ .../nodes/lccnMoveToNode.json | 12 +++++ .../nodes/lccnQuery.json | 27 +++++++++++ .../nodes/oclcConnectTo.json | 7 +++ .../nodes/oclcEmail.json | 29 ++++++++++++ duplicate-instance-report/nodes/oclcFork.json | 11 +++++ .../nodes/oclcQuery.json | 27 +++++++++++ .../nodes/reportEmail.json | 29 ++++++++++++ .../nodes/reportQuery.json | 27 +++++++++++ .../nodes/reportZip.json | 22 +++++++++ duplicate-instance-report/nodes/start.json | 8 ++++ duplicate-instance-report/setup.json | 1 + duplicate-instance-report/workflow.json | 27 +++++++++++ 30 files changed, 563 insertions(+) create mode 100644 duplicate-instance-report/nodes/callNumberEmail.json create mode 100644 duplicate-instance-report/nodes/callNumberMoveToNode.json create mode 100644 duplicate-instance-report/nodes/callNumberQuery.json create mode 100644 duplicate-instance-report/nodes/connectToLdp.json create mode 100644 duplicate-instance-report/nodes/disconnectFromLDP.json create mode 100644 duplicate-instance-report/nodes/end.json create mode 100644 duplicate-instance-report/nodes/isbnConnectTo.json create mode 100644 duplicate-instance-report/nodes/isbnEmail.json create mode 100644 duplicate-instance-report/nodes/isbnMoveToNode.json create mode 100644 duplicate-instance-report/nodes/isbnQuery.json create mode 100644 duplicate-instance-report/nodes/issnConnectTo.json create mode 100644 duplicate-instance-report/nodes/issnEmail.json create mode 100644 duplicate-instance-report/nodes/issnMoveToNode.json create mode 100644 duplicate-instance-report/nodes/issnQuery.json create mode 100644 duplicate-instance-report/nodes/join.json create mode 100644 duplicate-instance-report/nodes/lccnConnectTo.json create mode 100644 duplicate-instance-report/nodes/lccnEmail.json create mode 100644 duplicate-instance-report/nodes/lccnMoveToNode.json create mode 100644 duplicate-instance-report/nodes/lccnQuery.json create mode 100644 duplicate-instance-report/nodes/oclcConnectTo.json create mode 100644 duplicate-instance-report/nodes/oclcEmail.json create mode 100644 duplicate-instance-report/nodes/oclcFork.json create mode 100644 duplicate-instance-report/nodes/oclcQuery.json create mode 100644 duplicate-instance-report/nodes/reportEmail.json create mode 100644 duplicate-instance-report/nodes/reportQuery.json create mode 100644 duplicate-instance-report/nodes/reportZip.json create mode 100644 duplicate-instance-report/nodes/start.json create mode 100644 duplicate-instance-report/setup.json create mode 100644 duplicate-instance-report/workflow.json diff --git a/README.md b/README.md index 53dca5c9..4892fa7d 100644 --- a/README.md +++ b/README.md @@ -796,3 +796,49 @@ Either wait for scheduled event to occur or manually execute via: ```shell fw run evans-pres-repr ``` + +## duplicate-instance-report + +### Instance Duplication Report Workflow (Scheduled) + +This workflow emails a CSV report for Call Number, ISBN, LCCN, ISSN, and OCLC matches as well as a full instance duplication CSV report compressed with ZIP format. + +The full instance duplication CSV has the following columns. The title and author columns are wrapped in double quotes. + +``` +HRID, HRID2, OCLC, ISBN, ISSN, CALL_NUMBER, LCCN, TITLE, TITLE2, AUTHOR, AUTHOR2 +``` + +Requires following path `/mnt/workflows/${tenantId}/duplicate-instance-report`. + + +These variables are required when building and running the workflow: + +| Variable Name | Allowed Values | Brief Description | +| ------------------------------ | -------------- | ----------------- | +| ldp-url | URL | LDP URL. | +| ldp-user | string | LDP login username. | +| ldp-password | string | LDP login password. | +| duplicate-instance-report-from | e-mail address | The e-mail address of the report sender. | +| duplicate-instance-report-to | e-mail address | The e-mail address of the report recipient. | + +The scheduled event is for **12:00 AM UTC**, on the first of the month, only in January, April, July, and October. + +```shell +fw config set ldp-url *** +fw config set ldp-user *** +fw config set ldp-password *** +fw config set duplicate-instance-report-from *** +fw config set duplicate-instance-report-to *** +``` + +To build and activate: +```shell +fw build duplicate-instance-report +fw activate duplicate-instance-report +``` + +Either wait for scheduled event to occur or manually execute via: +```shell +fw run duplicate-instance-report +``` diff --git a/duplicate-instance-report/nodes/callNumberEmail.json b/duplicate-instance-report/nodes/callNumberEmail.json new file mode 100644 index 00000000..aaf1c6d6 --- /dev/null +++ b/duplicate-instance-report/nodes/callNumberEmail.json @@ -0,0 +1,29 @@ +{ + "id": "b5dca523-4a24-4d02-a122-6ea8c9f34ac4", + "name": "Email Call Number Matches", + "description": "Email CSV with instances with matching call numbers", + "deserializeAs": "EmailTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + }, + { + "key": "callNumberCount", + "type": "PROCESS" + } + ], + "outputVariable": {}, + "mailFrom": "{{{duplicate-instance-report-from}}}", + "mailTo": "{{{duplicate-instance-report-to}}}", + "mailText": "The instances with matching Call Number report has completed, see the results attached.\n${callNumberCount} instance matches found.", + "mailMarkup": "

The instances with matching Call Number report has completed, see the results attached.


${callNumberCount} instance matches found.", + "mailSubject": "Matching Call Number Instances Report - LDP {{{ldp-url}}}", + "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/call-number-${timestamp}.csv", + "includeAttachment": "${callNumberCount}", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/callNumberMoveToNode.json b/duplicate-instance-report/nodes/callNumberMoveToNode.json new file mode 100644 index 00000000..e4e7de3b --- /dev/null +++ b/duplicate-instance-report/nodes/callNumberMoveToNode.json @@ -0,0 +1,12 @@ +{ + "id": "42c50baa-7d73-48ad-bd9b-53ffd6cd6eda", + "name": "CALL NUMBER", + "description": "", + "deserializeAs": "MoveToNode", + "gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83", + "nodes": [ + "{{{mod-workflow}}}/databaseQueryTask/7a20c05e-2a58-42f2-9769-42e5b7045343", + "{{{mod-workflow}}}/emailTask/b5dca523-4a24-4d02-a122-6ea8c9f34ac4", + "{{{mod-workflow}}}/connectTo/53c5ca0f-9116-4bcf-86ba-ad4b4770aaaf" + ] +} diff --git a/duplicate-instance-report/nodes/callNumberQuery.json b/duplicate-instance-report/nodes/callNumberQuery.json new file mode 100644 index 00000000..72fab78e --- /dev/null +++ b/duplicate-instance-report/nodes/callNumberQuery.json @@ -0,0 +1,27 @@ +{ + "id": "7a20c05e-2a58-42f2-9769-42e5b7045343", + "name": "Call Number Match Query", + "description": "Query to find instances with matching call number", + "deserializeAs": "DatabaseQueryTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + } + ], + "outputVariable": { + "key": "callNumberCount", + "type": "PROCESS", + "spin": false + }, + "designation": "ldp", + "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/call-number-${timestamp}.csv", + "resultType": "CSV", + "includeHeader": true, + "query": "WITH call_number AS (SELECT ie.instance_hrid, he.call_number_type_id, he.call_number_type_name, he.call_number, TRIM(CONCAT_WS(' ', he.call_number_prefix, he.call_number, he.call_number_suffix)) AS full_call_number, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.holdings_ext he ON ie.instance_id = he.instance_id JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE he.call_number IS NOT NULL AND he.call_number !~ '^\\s*$' AND he.call_number_type_id IS NOT NULL AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.call_number AS call_number, r.call_number AS call_number2, l.full_call_number, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM call_number l JOIN call_number r ON l.full_call_number = r.full_call_number AND l.instance_hrid < r.instance_hrid", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/connectToLdp.json b/duplicate-instance-report/nodes/connectToLdp.json new file mode 100644 index 00000000..1c29382b --- /dev/null +++ b/duplicate-instance-report/nodes/connectToLdp.json @@ -0,0 +1,13 @@ +{ + "id": "11f065f0-f1ea-47ed-abc9-146099572e7b", + "name": "Connect LDP", + "description": "Connect to the LDP", + "deserializeAs": "DatabaseConnectionTask", + "inputVariables": [], + "outputVariable": {}, + "designation": "ldp", + "url": "{{{ldp-url}}}", + "username": "{{{ldp-user}}}", + "password": "{{{ldp-password}}}", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/disconnectFromLDP.json b/duplicate-instance-report/nodes/disconnectFromLDP.json new file mode 100644 index 00000000..3487cb0f --- /dev/null +++ b/duplicate-instance-report/nodes/disconnectFromLDP.json @@ -0,0 +1,10 @@ +{ + "id": "db806bf5-49b8-4f8a-bf96-fc0258d3c31e", + "name": "Disconnect LDP", + "description": "Disconnect from the LDP", + "deserializeAs": "DatabaseDisconnectTask", + "inputVariables": [], + "outputVariable": {}, + "designation": "ldp", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/end.json b/duplicate-instance-report/nodes/end.json new file mode 100644 index 00000000..72be1aa8 --- /dev/null +++ b/duplicate-instance-report/nodes/end.json @@ -0,0 +1,6 @@ +{ + "id": "3c9848b3-f5a4-4753-b916-2b73c88d9409", + "name": "End", + "description": "End of duplicate instance report workflow", + "deserializeAs": "EndEvent" +} diff --git a/duplicate-instance-report/nodes/isbnConnectTo.json b/duplicate-instance-report/nodes/isbnConnectTo.json new file mode 100644 index 00000000..35bcda68 --- /dev/null +++ b/duplicate-instance-report/nodes/isbnConnectTo.json @@ -0,0 +1,7 @@ +{ + "id": "c36f8e08-7e43-49b3-923f-ebb8629617c8", + "name": "ISBN Complete", + "description": "", + "deserializeAs": "ConnectTo", + "nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985" +} diff --git a/duplicate-instance-report/nodes/isbnEmail.json b/duplicate-instance-report/nodes/isbnEmail.json new file mode 100644 index 00000000..055536eb --- /dev/null +++ b/duplicate-instance-report/nodes/isbnEmail.json @@ -0,0 +1,29 @@ +{ + "id": "66356870-9e8a-406b-ae3d-5fcffef0c556", + "name": "Email ISBN Matches", + "description": "Email CSV with instances with matching ISBN", + "deserializeAs": "EmailTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + }, + { + "key": "isbnCount", + "type": "PROCESS" + } + ], + "outputVariable": {}, + "mailFrom": "{{{duplicate-instance-report-from}}}", + "mailTo": "{{{duplicate-instance-report-to}}}", + "mailText": "The instances with matching ISBN report has completed, see the results attached.\n${isbnCount} instance matches found.", + "mailMarkup": "

The instances with matching ISBN report has completed, see the results attached.


${isbnCount} instance matches found.", + "mailSubject": "Matching ISBN Instances Report - LDP {{{ldp-url}}}", + "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/isbn-${timestamp}.csv", + "includeAttachment": "${isbnCount}", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/isbnMoveToNode.json b/duplicate-instance-report/nodes/isbnMoveToNode.json new file mode 100644 index 00000000..c57e8c8b --- /dev/null +++ b/duplicate-instance-report/nodes/isbnMoveToNode.json @@ -0,0 +1,12 @@ +{ + "id": "408a6624-8be4-4bdf-8688-75c23a730187", + "name": "ISBN", + "description": "", + "deserializeAs": "MoveToNode", + "gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83", + "nodes": [ + "{{{mod-workflow}}}/databaseQueryTask/84b7be20-ce1f-45f4-ad2e-7dff0c131e42", + "{{{mod-workflow}}}/emailTask/66356870-9e8a-406b-ae3d-5fcffef0c556", + "{{{mod-workflow}}}/connectTo/c36f8e08-7e43-49b3-923f-ebb8629617c8" + ] +} diff --git a/duplicate-instance-report/nodes/isbnQuery.json b/duplicate-instance-report/nodes/isbnQuery.json new file mode 100644 index 00000000..d680f787 --- /dev/null +++ b/duplicate-instance-report/nodes/isbnQuery.json @@ -0,0 +1,27 @@ +{ + "id": "84b7be20-ce1f-45f4-ad2e-7dff0c131e42", + "name": "ISBN Match Query", + "description": "Query to find instances with matching ISBN", + "deserializeAs": "DatabaseQueryTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + } + ], + "outputVariable": { + "key": "isbnCount", + "type": "PROCESS", + "spin": false + }, + "designation": "ldp", + "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/isbn-${timestamp}.csv", + "resultType": "CSV", + "includeHeader": true, + "query": "WITH isbn AS (SELECT ie.instance_hrid, ii.identifier, NULLIF(LEFT(RIGHT(REGEXP_REPLACE(ii.identifier, ' .*', ''), 10), 9), ':') AS isbn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'ISBN' AND ii.identifier NOT SIMILAR TO '(:|$)%' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.identifier, l.isbn, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM isbn l JOIN isbn r ON l.isbn = r.isbn AND l.instance_hrid < r.instance_hrid", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/issnConnectTo.json b/duplicate-instance-report/nodes/issnConnectTo.json new file mode 100644 index 00000000..f9bde9ec --- /dev/null +++ b/duplicate-instance-report/nodes/issnConnectTo.json @@ -0,0 +1,7 @@ +{ + "id": "01c05fa9-6897-4e1c-b0bc-c56b187173e2", + "name": "ISSN Complete", + "description": "", + "deserializeAs": "ConnectTo", + "nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985" +} diff --git a/duplicate-instance-report/nodes/issnEmail.json b/duplicate-instance-report/nodes/issnEmail.json new file mode 100644 index 00000000..bcbb29b9 --- /dev/null +++ b/duplicate-instance-report/nodes/issnEmail.json @@ -0,0 +1,29 @@ +{ + "id": "f2cd0c7a-0dbe-4daa-b1ea-29455044a505", + "name": "Email ISSN Matches", + "description": "Email CSV with instances with matching ISSN", + "deserializeAs": "EmailTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + }, + { + "key": "issnCount", + "type": "PROCESS" + } + ], + "outputVariable": {}, + "mailFrom": "{{{duplicate-instance-report-from}}}", + "mailTo": "{{{duplicate-instance-report-to}}}", + "mailText": "The instances with matching ISSN report has completed, see the results attached.\n${issnCount} instance matches found.", + "mailMarkup": "

The instances with matching ISSN report has completed, see the results attached.


${issnCount} instance matches found.", + "mailSubject": "Matching ISSN Instances Report - LDP {{{ldp-url}}}", + "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/issn-${timestamp}.csv", + "includeAttachment": "${issnCount}", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/issnMoveToNode.json b/duplicate-instance-report/nodes/issnMoveToNode.json new file mode 100644 index 00000000..55c60d8e --- /dev/null +++ b/duplicate-instance-report/nodes/issnMoveToNode.json @@ -0,0 +1,12 @@ +{ + "id": "3b2668b9-58aa-447e-b907-40cee663a3ea", + "name": "ISSN", + "description": "", + "deserializeAs": "MoveToNode", + "gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83", + "nodes": [ + "{{{mod-workflow}}}/databaseQueryTask/403c8b97-2901-494a-bcdd-bfbbe23a1aa8", + "{{{mod-workflow}}}/emailTask/f2cd0c7a-0dbe-4daa-b1ea-29455044a505", + "{{{mod-workflow}}}/connectTo/01c05fa9-6897-4e1c-b0bc-c56b187173e2" + ] +} diff --git a/duplicate-instance-report/nodes/issnQuery.json b/duplicate-instance-report/nodes/issnQuery.json new file mode 100644 index 00000000..19c3849b --- /dev/null +++ b/duplicate-instance-report/nodes/issnQuery.json @@ -0,0 +1,27 @@ +{ + "id": "403c8b97-2901-494a-bcdd-bfbbe23a1aa8", + "name": "ISSN Match Query", + "description": "Query to find instances with matching ISSN", + "deserializeAs": "DatabaseQueryTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + } + ], + "outputVariable": { + "key": "issnCount", + "type": "PROCESS", + "spin": false + }, + "designation": "ldp", + "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/issn-${timestamp}.csv", + "resultType": "CSV", + "includeHeader": true, + "query": "WITH issn_with_title AS (SELECT ie.instance_hrid, sm.content AS issn, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE field = '022' AND ord = 1 AND sf = 'a'), issn AS (SELECT issnwt.instance_hrid, issnwt.issn, issnwt.title, sm.content AS author FROM issn_with_title issnwt JOIN public.srs_marctab sm ON issnwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.issn, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM issn l JOIN issn r ON l.issn = r.issn AND l.instance_hrid < r.instance_hrid", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/join.json b/duplicate-instance-report/nodes/join.json new file mode 100644 index 00000000..e9fe5076 --- /dev/null +++ b/duplicate-instance-report/nodes/join.json @@ -0,0 +1,7 @@ +{ + "id": "af9d6b6c-6d59-4735-9a7c-2314a68e0985", + "name": "Join", + "description": "", + "deserializeAs": "ParallelGateway", + "nodes": [] +} diff --git a/duplicate-instance-report/nodes/lccnConnectTo.json b/duplicate-instance-report/nodes/lccnConnectTo.json new file mode 100644 index 00000000..8af4d1ce --- /dev/null +++ b/duplicate-instance-report/nodes/lccnConnectTo.json @@ -0,0 +1,7 @@ +{ + "id": "35bb9b1d-ffa5-4d20-b3c9-afd71ff3990e", + "name": "LCCN Complete", + "description": "", + "deserializeAs": "ConnectTo", + "nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985" +} diff --git a/duplicate-instance-report/nodes/lccnEmail.json b/duplicate-instance-report/nodes/lccnEmail.json new file mode 100644 index 00000000..2fbc66f4 --- /dev/null +++ b/duplicate-instance-report/nodes/lccnEmail.json @@ -0,0 +1,29 @@ +{ + "id": "a32eae62-d2f3-4b7b-bbd4-275ba140555e", + "name": "Email LCCN Matches", + "description": "Email CSV with instances with matching LCCN", + "deserializeAs": "EmailTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + }, + { + "key": "lccnCount", + "type": "PROCESS" + } + ], + "outputVariable": {}, + "mailFrom": "{{{duplicate-instance-report-from}}}", + "mailTo": "{{{duplicate-instance-report-to}}}", + "mailText": "The instances with matching LCCN report has completed, see the results attached.\n${lccnCount} instance matches found.", + "mailMarkup": "

The instances with matching LCCN report has completed, see the results attached.


${lccnCount} instance matches found.", + "mailSubject": "Matching LCCN Instances Report - LDP {{{ldp-url}}}", + "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/lccn-${timestamp}.csv", + "includeAttachment": "${lccnCount}", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/lccnMoveToNode.json b/duplicate-instance-report/nodes/lccnMoveToNode.json new file mode 100644 index 00000000..cae67f9a --- /dev/null +++ b/duplicate-instance-report/nodes/lccnMoveToNode.json @@ -0,0 +1,12 @@ +{ + "id": "b4e72925-19a4-47ed-8f31-e32cf8905123", + "name": "LCCN", + "description": "", + "deserializeAs": "MoveToNode", + "gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83", + "nodes": [ + "{{{mod-workflow}}}/databaseQueryTask/9d943d76-ab21-4ca6-9eae-8df6e962c037", + "{{{mod-workflow}}}/emailTask/a32eae62-d2f3-4b7b-bbd4-275ba140555e", + "{{{mod-workflow}}}/connectTo/35bb9b1d-ffa5-4d20-b3c9-afd71ff3990e" + ] +} diff --git a/duplicate-instance-report/nodes/lccnQuery.json b/duplicate-instance-report/nodes/lccnQuery.json new file mode 100644 index 00000000..3aacdf68 --- /dev/null +++ b/duplicate-instance-report/nodes/lccnQuery.json @@ -0,0 +1,27 @@ +{ + "id": "9d943d76-ab21-4ca6-9eae-8df6e962c037", + "name": "LCCN Match Query", + "description": "Query to find instances with matching LCCN", + "deserializeAs": "DatabaseQueryTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + } + ], + "outputVariable": { + "key": "lccnCount", + "type": "PROCESS", + "spin": false + }, + "designation": "ldp", + "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/lccn-${timestamp}.csv", + "resultType": "CSV", + "includeHeader": true, + "query": "WITH lccn AS (SELECT ie.instance_hrid, ii.identifier AS lccn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'LCCN' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.lccn, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM lccn l JOIN lccn r ON l.lccn = r.lccn AND l.instance_hrid < r.instance_hrid", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/oclcConnectTo.json b/duplicate-instance-report/nodes/oclcConnectTo.json new file mode 100644 index 00000000..17354509 --- /dev/null +++ b/duplicate-instance-report/nodes/oclcConnectTo.json @@ -0,0 +1,7 @@ +{ + "id": "53c5ca0f-9116-4bcf-86ba-ad4b4770aaaf", + "name": "OCLC Complete", + "description": "", + "deserializeAs": "ConnectTo", + "nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985" +} diff --git a/duplicate-instance-report/nodes/oclcEmail.json b/duplicate-instance-report/nodes/oclcEmail.json new file mode 100644 index 00000000..57c4fbaf --- /dev/null +++ b/duplicate-instance-report/nodes/oclcEmail.json @@ -0,0 +1,29 @@ +{ + "id": "7bc30e09-6b69-40da-9a53-34e75fdcc488", + "name": "Email OCLC Matches", + "description": "Email CSV with instances with matching OCLC", + "deserializeAs": "EmailTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + }, + { + "key": "oclcCount", + "type": "PROCESS" + } + ], + "outputVariable": {}, + "mailFrom": "{{{duplicate-instance-report-from}}}", + "mailTo": "{{{duplicate-instance-report-to}}}", + "mailText": "The instances with matching OCLC report has completed, see the results attached.\n${oclcCount} instance matches found.", + "mailMarkup": "

The instances with matching OCLC report has completed, see the results attached.


${oclcCount} instance matches found.", + "mailSubject": "Matching OCLC Instances Report - LDP {{{ldp-url}}}", + "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/oclc-${timestamp}.csv", + "includeAttachment": "${oclcCount}", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/oclcFork.json b/duplicate-instance-report/nodes/oclcFork.json new file mode 100644 index 00000000..1953ac29 --- /dev/null +++ b/duplicate-instance-report/nodes/oclcFork.json @@ -0,0 +1,11 @@ +{ + "id": "aea23b81-06b1-4795-8bab-ea562a854c83", + "name": "Fork", + "description": "", + "deserializeAs": "ParallelGateway", + "nodes": [ + "{{{mod-workflow}}}/databaseQueryTask/724e34bf-c539-411e-bb62-cd15da9ff515", + "{{{mod-workflow}}}/emailTask/7bc30e09-6b69-40da-9a53-34e75fdcc488", + "{{{mod-workflow}}}/parallelGateway/af9d6b6c-6d59-4735-9a7c-2314a68e0985" + ] +} diff --git a/duplicate-instance-report/nodes/oclcQuery.json b/duplicate-instance-report/nodes/oclcQuery.json new file mode 100644 index 00000000..5fc89e61 --- /dev/null +++ b/duplicate-instance-report/nodes/oclcQuery.json @@ -0,0 +1,27 @@ +{ + "id": "724e34bf-c539-411e-bb62-cd15da9ff515", + "name": "OCLC Match Query", + "description": "Query to find instances with matching OCLC", + "deserializeAs": "DatabaseQueryTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + } + ], + "outputVariable": { + "key": "oclcCount", + "type": "PROCESS", + "spin": false + }, + "designation": "ldp", + "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/oclc-${timestamp}.csv", + "resultType": "CSV", + "includeHeader": true, + "query": "WITH oclc_with_title AS (SELECT ie.instance_hrid, LTRIM(REGEXP_REPLACE(SUBSTRING(sm.content FROM 8), '[^0-9]', '', 'g'), '0') AS oclc, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE sm.field = '035' AND sm.ord = 1 AND sm.sf IN ('a', 'z') AND sm.content LIKE '(OCoLC)%'), oclc AS (SELECT oclcwt.instance_hrid, oclcwt.oclc, oclcwt.title, sm.content AS author FROM oclc_with_title oclcwt JOIN public.srs_marctab sm ON oclcwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.oclc, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM oclc l JOIN oclc r ON l.oclc = r.oclc AND l.instance_hrid < r.instance_hrid", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/reportEmail.json b/duplicate-instance-report/nodes/reportEmail.json new file mode 100644 index 00000000..01188e98 --- /dev/null +++ b/duplicate-instance-report/nodes/reportEmail.json @@ -0,0 +1,29 @@ +{ + "id": "da165c76-b891-4d01-9fbb-f8da1b1a6d12", + "name": "Email Instance Duplications Report", + "description": "Email CSV with duplicate instances hrids and match criterium", + "deserializeAs": "EmailTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + }, + { + "key": "count", + "type": "PROCESS" + } + ], + "outputVariable": {}, + "mailFrom": "{{{duplicate-instance-report-from}}}", + "mailTo": "{{{duplicate-instance-report-to}}}", + "mailText": "The Instance Duplication Report has completed, see the results attached.\n${count} instance matches found.", + "mailMarkup": "

The Instance Duplication Report has completed, see the results attached.


${count} instance matches found.", + "mailSubject": "Instance Duplication Report - LDP {{{ldp-url}}}", + "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-duplication-report-${timestamp}.zip", + "includeAttachment": "${count}", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/reportQuery.json b/duplicate-instance-report/nodes/reportQuery.json new file mode 100644 index 00000000..a92c06d5 --- /dev/null +++ b/duplicate-instance-report/nodes/reportQuery.json @@ -0,0 +1,27 @@ +{ + "id": "f548dd78-4cf5-4eb9-9b28-e4738470d44b", + "name": "Find Instance Duplications", + "description": "Query to return matching instances by identifier from LDP", + "deserializeAs": "DatabaseQueryTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + } + ], + "outputVariable": { + "key": "count", + "type": "PROCESS", + "spin": false + }, + "designation": "ldp", + "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-duplication-report-${timestamp}.csv", + "resultType": "CSV", + "includeHeader": true, + "query": "WITH oclc_with_title AS (SELECT ie.instance_hrid, LTRIM(REGEXP_REPLACE(SUBSTRING(sm.content FROM 8), '[^0-9]', '', 'g'), '0') AS oclc, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE sm.field = '035' AND sm.ord = 1 AND sm.sf IN ('a', 'z') AND sm.content LIKE '(OCoLC)%'), oclc AS (SELECT oclcwt.instance_hrid, oclcwt.oclc, oclcwt.title, sm.content AS author FROM oclc_with_title oclcwt JOIN public.srs_marctab sm ON oclcwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), oclc_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'oclc' AS match_type FROM oclc l JOIN oclc r ON l.oclc = r.oclc AND l.instance_hrid < r.instance_hrid), isbn AS (SELECT ie.instance_hrid, NULLIF(LEFT(RIGHT(REGEXP_REPLACE(ii.identifier, ' .*', ''), 10), 9), ':') AS isbn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'ISBN' AND ii.identifier NOT SIMILAR TO '(:|$)%' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), isbn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'isbn' AS match_type FROM isbn l JOIN isbn r ON l.isbn = r.isbn AND l.instance_hrid < r.instance_hrid), lccn AS (SELECT ie.instance_hrid, ii.identifier AS lccn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'LCCN' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), lccn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'lccn' AS match_type FROM lccn l JOIN lccn r ON l.lccn = r.lccn AND l.instance_hrid < r.instance_hrid), issn_with_title AS (SELECT ie.instance_hrid, sm.content AS issn, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE field = '022' AND ord = 1 AND sf = 'a'), issn AS (SELECT issnwt.instance_hrid, issnwt.issn, issnwt.title, sm.content AS author FROM issn_with_title issnwt JOIN public.srs_marctab sm ON issnwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), issn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'issn' AS match_type FROM issn l JOIN issn r ON l.issn = r.issn AND l.instance_hrid < r.instance_hrid), all_matches AS (SELECT hrid, hrid2, title, title2, author, author2, match_type FROM oclc_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM isbn_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM lccn_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM issn_matches) SELECT hrid AS HRID, hrid2 AS HRID2, MAX(CASE WHEN match_type = 'oclc' THEN 'T' END) AS OCLC, MAX(CASE WHEN match_type = 'isbn' THEN 'T' END) AS ISBN, MAX(CASE WHEN match_type = 'issn' THEN 'T' END) AS ISSN, MAX(CASE WHEN match_type = 'lccn' THEN 'T' END) AS LCCN, '\"' || REPLACE(title, '\"', '\"\"') || '\"' AS TITLE, '\"' || REPLACE(title2, '\"', '\"\"') || '\"' AS TITLE2, '\"' || REPLACE(author, '\"', '\"\"') || '\"' AS AUTHOR, '\"' || REPLACE(author2, '\"', '\"\"') || '\"' AS AUTHOR2 FROM all_matches GROUP BY hrid, hrid2, title, title2, author, author2", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/reportZip.json b/duplicate-instance-report/nodes/reportZip.json new file mode 100644 index 00000000..b4c51c69 --- /dev/null +++ b/duplicate-instance-report/nodes/reportZip.json @@ -0,0 +1,22 @@ +{ + "id": "e14e6cfc-b069-4f2f-8903-d94714fd2574", + "name": "Compress Instance Duplications Report", + "description": "Compress instance duplications report as ZIP format", + "deserializeAs": "CompressFileTask", + "inputVariables": [ + { + "key": "timestamp", + "type": "PROCESS" + }, + { + "key": "tenantId", + "type": "PROCESS" + } + ], + "outputVariable": {}, + "source": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-duplication-report-${timestamp}.csv", + "destination": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-duplication-report-${timestamp}.zip", + "format": "ZIP", + "container": "NONE", + "asyncBefore": true +} diff --git a/duplicate-instance-report/nodes/start.json b/duplicate-instance-report/nodes/start.json new file mode 100644 index 00000000..884e9da4 --- /dev/null +++ b/duplicate-instance-report/nodes/start.json @@ -0,0 +1,8 @@ +{ + "id": "bf39c55f-1fd6-41a5-a98a-c28d6d05da9d", + "name": "Start", + "description": "Start of instance duplication report workflow", + "type": "SCHEDULED", + "deserializeAs": "StartEvent", + "expression": "0 0 0 1 1,4,7,10 ?" +} diff --git a/duplicate-instance-report/setup.json b/duplicate-instance-report/setup.json new file mode 100644 index 00000000..0967ef42 --- /dev/null +++ b/duplicate-instance-report/setup.json @@ -0,0 +1 @@ +{} diff --git a/duplicate-instance-report/workflow.json b/duplicate-instance-report/workflow.json new file mode 100644 index 00000000..1ca45016 --- /dev/null +++ b/duplicate-instance-report/workflow.json @@ -0,0 +1,27 @@ +{ + "id": "93c90a8c-5b39-4499-a0bc-a24d75444a5c", + "name": "Duplicate Instance Report Workflow", + "description": "Identify and report instances with matching OCLC, ISBN, ISSN, LCCN, or Call Number", + "versionTag": "1.0", + "historyTimeToLive": 0, + "deploymentId": null, + "active": false, + "setup": { + "asyncBefore": false, + "asyncAfter": false + }, + "nodes": [ + "{{{mod-workflow}}}/startEvent/bf39c55f-1fd6-41a5-a98a-c28d6d05da9d", + "{{{mod-workflow}}}/databaseConnectionTask/11f065f0-f1ea-47ed-abc9-146099572e7b", + "{{{mod-workflow}}}/parallelGateway/aea23b81-06b1-4795-8bab-ea562a854c83", + "{{{mod-workflow}}}/moveToNode/408a6624-8be4-4bdf-8688-75c23a730187", + "{{{mod-workflow}}}/moveToNode/3b2668b9-58aa-447e-b907-40cee663a3ea", + "{{{mod-workflow}}}/moveToNode/b4e72925-19a4-47ed-8f31-e32cf8905123", + "{{{mod-workflow}}}/databaseQueryTask/f548dd78-4cf5-4eb9-9b28-e4738470d44b", + "{{{mod-workflow}}}/compressFileTask/e14e6cfc-b069-4f2f-8903-d94714fd2574", + "{{{mod-workflow}}}/emailTask/da165c76-b891-4d01-9fbb-f8da1b1a6d12", + "{{{mod-workflow}}}/databaseDisconnectTask/db806bf5-49b8-4f8a-bf96-fc0258d3c31e", + "{{{mod-workflow}}}/endEvent/3c9848b3-f5a4-4753-b916-2b73c88d9409" + ], + "initialContext": {} +}