From 62c711adf3a4d431550d2d38f73933697dadf2da Mon Sep 17 00:00:00 2001
From: William Welling <wwelling@tamu.edu>
Date: Mon, 16 Sep 2024 09:32:41 -0500
Subject: [PATCH] Instance duplication report workflow

---
 README.md                                     | 46 +++++++++++++++++++
 .../nodes/callNumberEmail.json                | 29 ++++++++++++
 .../nodes/callNumberMoveToNode.json           | 12 +++++
 .../nodes/callNumberQuery.json                | 27 +++++++++++
 .../nodes/connectToLdp.json                   | 13 ++++++
 .../nodes/disconnectFromLDP.json              | 10 ++++
 duplicate-instance-report/nodes/end.json      |  6 +++
 .../nodes/isbnConnectTo.json                  |  7 +++
 .../nodes/isbnEmail.json                      | 29 ++++++++++++
 .../nodes/isbnMoveToNode.json                 | 12 +++++
 .../nodes/isbnQuery.json                      | 27 +++++++++++
 .../nodes/issnConnectTo.json                  |  7 +++
 .../nodes/issnEmail.json                      | 29 ++++++++++++
 .../nodes/issnMoveToNode.json                 | 12 +++++
 .../nodes/issnQuery.json                      | 27 +++++++++++
 duplicate-instance-report/nodes/join.json     |  7 +++
 .../nodes/lccnConnectTo.json                  |  7 +++
 .../nodes/lccnEmail.json                      | 29 ++++++++++++
 .../nodes/lccnMoveToNode.json                 | 12 +++++
 .../nodes/lccnQuery.json                      | 27 +++++++++++
 .../nodes/oclcConnectTo.json                  |  7 +++
 .../nodes/oclcEmail.json                      | 29 ++++++++++++
 duplicate-instance-report/nodes/oclcFork.json | 11 +++++
 .../nodes/oclcQuery.json                      | 27 +++++++++++
 .../nodes/reportEmail.json                    | 29 ++++++++++++
 .../nodes/reportQuery.json                    | 27 +++++++++++
 .../nodes/reportZip.json                      | 22 +++++++++
 duplicate-instance-report/nodes/start.json    |  8 ++++
 duplicate-instance-report/setup.json          |  1 +
 duplicate-instance-report/workflow.json       | 27 +++++++++++
 30 files changed, 563 insertions(+)
 create mode 100644 duplicate-instance-report/nodes/callNumberEmail.json
 create mode 100644 duplicate-instance-report/nodes/callNumberMoveToNode.json
 create mode 100644 duplicate-instance-report/nodes/callNumberQuery.json
 create mode 100644 duplicate-instance-report/nodes/connectToLdp.json
 create mode 100644 duplicate-instance-report/nodes/disconnectFromLDP.json
 create mode 100644 duplicate-instance-report/nodes/end.json
 create mode 100644 duplicate-instance-report/nodes/isbnConnectTo.json
 create mode 100644 duplicate-instance-report/nodes/isbnEmail.json
 create mode 100644 duplicate-instance-report/nodes/isbnMoveToNode.json
 create mode 100644 duplicate-instance-report/nodes/isbnQuery.json
 create mode 100644 duplicate-instance-report/nodes/issnConnectTo.json
 create mode 100644 duplicate-instance-report/nodes/issnEmail.json
 create mode 100644 duplicate-instance-report/nodes/issnMoveToNode.json
 create mode 100644 duplicate-instance-report/nodes/issnQuery.json
 create mode 100644 duplicate-instance-report/nodes/join.json
 create mode 100644 duplicate-instance-report/nodes/lccnConnectTo.json
 create mode 100644 duplicate-instance-report/nodes/lccnEmail.json
 create mode 100644 duplicate-instance-report/nodes/lccnMoveToNode.json
 create mode 100644 duplicate-instance-report/nodes/lccnQuery.json
 create mode 100644 duplicate-instance-report/nodes/oclcConnectTo.json
 create mode 100644 duplicate-instance-report/nodes/oclcEmail.json
 create mode 100644 duplicate-instance-report/nodes/oclcFork.json
 create mode 100644 duplicate-instance-report/nodes/oclcQuery.json
 create mode 100644 duplicate-instance-report/nodes/reportEmail.json
 create mode 100644 duplicate-instance-report/nodes/reportQuery.json
 create mode 100644 duplicate-instance-report/nodes/reportZip.json
 create mode 100644 duplicate-instance-report/nodes/start.json
 create mode 100644 duplicate-instance-report/setup.json
 create mode 100644 duplicate-instance-report/workflow.json

diff --git a/README.md b/README.md
index 53dca5c9..4892fa7d 100644
--- a/README.md
+++ b/README.md
@@ -796,3 +796,49 @@ Either wait for scheduled event to occur or manually execute via:
 ```shell
 fw run evans-pres-repr
 ```
+
+## duplicate-instance-report
+
+### Instance Duplication Report Workflow (Scheduled)
+
+This workflow emails a CSV report for Call Number, ISBN, LCCN, ISSN, and OCLC matches as well as a full instance duplication CSV report compressed with ZIP format.
+
+The full instance duplication CSV has the following columns. The title and author columns are wrapped in double quotes.
+
+```
+HRID, HRID2, OCLC, ISBN, ISSN, CALL_NUMBER, LCCN, TITLE, TITLE2, AUTHOR, AUTHOR2
+```
+
+Requires following path `/mnt/workflows/${tenantId}/duplicate-instance-report`.
+
+
+These variables are required when building and running the workflow:
+
+| Variable Name                  | Allowed Values | Brief Description |
+| ------------------------------ | -------------- | ----------------- |
+| ldp-url                        | URL            | LDP URL. |
+| ldp-user                       | string         | LDP login username. |
+| ldp-password                   | string         | LDP login password. |
+| duplicate-instance-report-from | e-mail address | The e-mail address of the report sender. |
+| duplicate-instance-report-to   | e-mail address | The e-mail address of the report recipient. |
+
+The scheduled event is for **12:00 AM UTC**, on the first of the month, only in January, April, July, and October.
+
+```shell
+fw config set ldp-url ***
+fw config set ldp-user ***
+fw config set ldp-password ***
+fw config set duplicate-instance-report-from ***
+fw config set duplicate-instance-report-to ***
+```
+
+To build and activate:
+```shell
+fw build duplicate-instance-report
+fw activate duplicate-instance-report
+```
+
+Either wait for scheduled event to occur or manually execute via:
+```shell
+fw run duplicate-instance-report
+```
diff --git a/duplicate-instance-report/nodes/callNumberEmail.json b/duplicate-instance-report/nodes/callNumberEmail.json
new file mode 100644
index 00000000..aaf1c6d6
--- /dev/null
+++ b/duplicate-instance-report/nodes/callNumberEmail.json
@@ -0,0 +1,29 @@
+{
+  "id": "b5dca523-4a24-4d02-a122-6ea8c9f34ac4",
+  "name": "Email Call Number Matches",
+  "description": "Email CSV with instances with matching call numbers",
+  "deserializeAs": "EmailTask",
+  "inputVariables": [
+    {
+      "key": "timestamp",
+      "type": "PROCESS"
+    },
+    {
+      "key": "tenantId",
+      "type": "PROCESS"
+    },
+    {
+      "key": "callNumberCount",
+      "type": "PROCESS"
+    }
+  ],
+  "outputVariable": {},
+  "mailFrom": "{{{duplicate-instance-report-from}}}",
+  "mailTo": "{{{duplicate-instance-report-to}}}",
+  "mailText": "The instances with matching Call Number report has completed, see the results attached.\n${callNumberCount} instance matches found.",
+  "mailMarkup": "<p>The instances with matching Call Number report has completed, see the results attached.</p><br/>${callNumberCount} instance matches found.",
+  "mailSubject": "Matching Call Number Instances Report - LDP {{{ldp-url}}}",
+  "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/call-number-${timestamp}.csv",
+  "includeAttachment": "${callNumberCount}",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/callNumberMoveToNode.json b/duplicate-instance-report/nodes/callNumberMoveToNode.json
new file mode 100644
index 00000000..e4e7de3b
--- /dev/null
+++ b/duplicate-instance-report/nodes/callNumberMoveToNode.json
@@ -0,0 +1,12 @@
+{
+  "id": "42c50baa-7d73-48ad-bd9b-53ffd6cd6eda",
+  "name": "CALL NUMBER",
+  "description": "",
+  "deserializeAs": "MoveToNode",
+  "gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83",
+  "nodes": [
+    "{{{mod-workflow}}}/databaseQueryTask/7a20c05e-2a58-42f2-9769-42e5b7045343",
+    "{{{mod-workflow}}}/emailTask/b5dca523-4a24-4d02-a122-6ea8c9f34ac4",
+    "{{{mod-workflow}}}/connectTo/53c5ca0f-9116-4bcf-86ba-ad4b4770aaaf"
+  ]
+}
diff --git a/duplicate-instance-report/nodes/callNumberQuery.json b/duplicate-instance-report/nodes/callNumberQuery.json
new file mode 100644
index 00000000..72fab78e
--- /dev/null
+++ b/duplicate-instance-report/nodes/callNumberQuery.json
@@ -0,0 +1,27 @@
+{
+  "id": "7a20c05e-2a58-42f2-9769-42e5b7045343",
+  "name": "Call Number Match Query",
+  "description": "Query to find instances with matching call number",
+  "deserializeAs": "DatabaseQueryTask",
+  "inputVariables": [
+    {
+      "key": "timestamp",
+      "type": "PROCESS"
+    },
+    {
+      "key": "tenantId",
+      "type": "PROCESS"
+    }
+  ],
+  "outputVariable": {
+    "key": "callNumberCount",
+    "type": "PROCESS",
+    "spin": false
+  },
+  "designation": "ldp",
+  "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/call-number-${timestamp}.csv",
+  "resultType": "CSV",
+  "includeHeader": true,
+  "query": "WITH call_number AS (SELECT ie.instance_hrid, he.call_number_type_id, he.call_number_type_name, he.call_number, TRIM(CONCAT_WS(' ', he.call_number_prefix, he.call_number, he.call_number_suffix)) AS full_call_number, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.holdings_ext he ON ie.instance_id = he.instance_id JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE he.call_number IS NOT NULL AND he.call_number !~ '^\\s*$' AND he.call_number_type_id IS NOT NULL AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.call_number AS call_number, r.call_number AS call_number2, l.full_call_number, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM call_number l JOIN call_number r ON l.full_call_number = r.full_call_number AND l.instance_hrid < r.instance_hrid",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/connectToLdp.json b/duplicate-instance-report/nodes/connectToLdp.json
new file mode 100644
index 00000000..1c29382b
--- /dev/null
+++ b/duplicate-instance-report/nodes/connectToLdp.json
@@ -0,0 +1,13 @@
+{
+  "id": "11f065f0-f1ea-47ed-abc9-146099572e7b",
+  "name": "Connect LDP",
+  "description": "Connect to the LDP",
+  "deserializeAs": "DatabaseConnectionTask",
+  "inputVariables": [],
+  "outputVariable": {},
+  "designation": "ldp",
+  "url": "{{{ldp-url}}}",
+  "username": "{{{ldp-user}}}",
+  "password": "{{{ldp-password}}}",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/disconnectFromLDP.json b/duplicate-instance-report/nodes/disconnectFromLDP.json
new file mode 100644
index 00000000..3487cb0f
--- /dev/null
+++ b/duplicate-instance-report/nodes/disconnectFromLDP.json
@@ -0,0 +1,10 @@
+{
+  "id": "db806bf5-49b8-4f8a-bf96-fc0258d3c31e",
+  "name": "Disconnect LDP",
+  "description": "Disconnect from the LDP",
+  "deserializeAs": "DatabaseDisconnectTask",
+  "inputVariables": [],
+  "outputVariable": {},
+  "designation": "ldp",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/end.json b/duplicate-instance-report/nodes/end.json
new file mode 100644
index 00000000..72be1aa8
--- /dev/null
+++ b/duplicate-instance-report/nodes/end.json
@@ -0,0 +1,6 @@
+{
+  "id": "3c9848b3-f5a4-4753-b916-2b73c88d9409",
+  "name": "End",
+  "description": "End of duplicate instance report workflow",
+  "deserializeAs": "EndEvent"
+}
diff --git a/duplicate-instance-report/nodes/isbnConnectTo.json b/duplicate-instance-report/nodes/isbnConnectTo.json
new file mode 100644
index 00000000..35bcda68
--- /dev/null
+++ b/duplicate-instance-report/nodes/isbnConnectTo.json
@@ -0,0 +1,7 @@
+{
+  "id": "c36f8e08-7e43-49b3-923f-ebb8629617c8",
+  "name": "ISBN Complete",
+  "description": "",
+  "deserializeAs": "ConnectTo",
+  "nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985"
+}
diff --git a/duplicate-instance-report/nodes/isbnEmail.json b/duplicate-instance-report/nodes/isbnEmail.json
new file mode 100644
index 00000000..055536eb
--- /dev/null
+++ b/duplicate-instance-report/nodes/isbnEmail.json
@@ -0,0 +1,29 @@
+{
+  "id": "66356870-9e8a-406b-ae3d-5fcffef0c556",
+  "name": "Email ISBN Matches",
+  "description": "Email CSV with instances with matching ISBN",
+  "deserializeAs": "EmailTask",
+  "inputVariables": [
+    {
+      "key": "timestamp",
+      "type": "PROCESS"
+    },
+    {
+      "key": "tenantId",
+      "type": "PROCESS"
+    },
+    {
+      "key": "isbnCount",
+      "type": "PROCESS"
+    }
+  ],
+  "outputVariable": {},
+  "mailFrom": "{{{duplicate-instance-report-from}}}",
+  "mailTo": "{{{duplicate-instance-report-to}}}",
+  "mailText": "The instances with matching ISBN report has completed, see the results attached.\n${isbnCount} instance matches found.",
+  "mailMarkup": "<p>The instances with matching ISBN report has completed, see the results attached.</p><br/>${isbnCount} instance matches found.",
+  "mailSubject": "Matching ISBN Instances Report - LDP {{{ldp-url}}}",
+  "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/isbn-${timestamp}.csv",
+  "includeAttachment": "${isbnCount}",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/isbnMoveToNode.json b/duplicate-instance-report/nodes/isbnMoveToNode.json
new file mode 100644
index 00000000..c57e8c8b
--- /dev/null
+++ b/duplicate-instance-report/nodes/isbnMoveToNode.json
@@ -0,0 +1,12 @@
+{
+  "id": "408a6624-8be4-4bdf-8688-75c23a730187",
+  "name": "ISBN",
+  "description": "",
+  "deserializeAs": "MoveToNode",
+  "gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83",
+  "nodes": [
+    "{{{mod-workflow}}}/databaseQueryTask/84b7be20-ce1f-45f4-ad2e-7dff0c131e42",
+    "{{{mod-workflow}}}/emailTask/66356870-9e8a-406b-ae3d-5fcffef0c556",
+    "{{{mod-workflow}}}/connectTo/c36f8e08-7e43-49b3-923f-ebb8629617c8"
+  ]
+}
diff --git a/duplicate-instance-report/nodes/isbnQuery.json b/duplicate-instance-report/nodes/isbnQuery.json
new file mode 100644
index 00000000..d680f787
--- /dev/null
+++ b/duplicate-instance-report/nodes/isbnQuery.json
@@ -0,0 +1,27 @@
+{
+  "id": "84b7be20-ce1f-45f4-ad2e-7dff0c131e42",
+  "name": "ISBN Match Query",
+  "description": "Query to find instances with matching ISBN",
+  "deserializeAs": "DatabaseQueryTask",
+  "inputVariables": [
+    {
+      "key": "timestamp",
+      "type": "PROCESS"
+    },
+    {
+      "key": "tenantId",
+      "type": "PROCESS"
+    }
+  ],
+  "outputVariable": {
+    "key": "isbnCount",
+    "type": "PROCESS",
+    "spin": false
+  },
+  "designation": "ldp",
+  "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/isbn-${timestamp}.csv",
+  "resultType": "CSV",
+  "includeHeader": true,
+  "query": "WITH isbn AS (SELECT ie.instance_hrid, ii.identifier, NULLIF(LEFT(RIGHT(REGEXP_REPLACE(ii.identifier, ' .*', ''), 10), 9), ':') AS isbn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'ISBN' AND ii.identifier NOT SIMILAR TO '(:|$)%' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.identifier, l.isbn, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM isbn l JOIN isbn r ON l.isbn = r.isbn AND l.instance_hrid < r.instance_hrid",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/issnConnectTo.json b/duplicate-instance-report/nodes/issnConnectTo.json
new file mode 100644
index 00000000..f9bde9ec
--- /dev/null
+++ b/duplicate-instance-report/nodes/issnConnectTo.json
@@ -0,0 +1,7 @@
+{
+  "id": "01c05fa9-6897-4e1c-b0bc-c56b187173e2",
+  "name": "ISSN Complete",
+  "description": "",
+  "deserializeAs": "ConnectTo",
+  "nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985"
+}
diff --git a/duplicate-instance-report/nodes/issnEmail.json b/duplicate-instance-report/nodes/issnEmail.json
new file mode 100644
index 00000000..bcbb29b9
--- /dev/null
+++ b/duplicate-instance-report/nodes/issnEmail.json
@@ -0,0 +1,29 @@
+{
+  "id": "f2cd0c7a-0dbe-4daa-b1ea-29455044a505",
+  "name": "Email ISSN Matches",
+  "description": "Email CSV with instances with matching ISSN",
+  "deserializeAs": "EmailTask",
+  "inputVariables": [
+    {
+      "key": "timestamp",
+      "type": "PROCESS"
+    },
+    {
+      "key": "tenantId",
+      "type": "PROCESS"
+    },
+    {
+      "key": "issnCount",
+      "type": "PROCESS"
+    }
+  ],
+  "outputVariable": {},
+  "mailFrom": "{{{duplicate-instance-report-from}}}",
+  "mailTo": "{{{duplicate-instance-report-to}}}",
+  "mailText": "The instances with matching ISSN report has completed, see the results attached.\n${issnCount} instance matches found.",
+  "mailMarkup": "<p>The instances with matching ISSN report has completed, see the results attached.</p><br/>${issnCount} instance matches found.",
+  "mailSubject": "Matching ISSN Instances Report - LDP {{{ldp-url}}}",
+  "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/issn-${timestamp}.csv",
+  "includeAttachment": "${issnCount}",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/issnMoveToNode.json b/duplicate-instance-report/nodes/issnMoveToNode.json
new file mode 100644
index 00000000..55c60d8e
--- /dev/null
+++ b/duplicate-instance-report/nodes/issnMoveToNode.json
@@ -0,0 +1,12 @@
+{
+  "id": "3b2668b9-58aa-447e-b907-40cee663a3ea",
+  "name": "ISSN",
+  "description": "",
+  "deserializeAs": "MoveToNode",
+  "gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83",
+  "nodes": [
+    "{{{mod-workflow}}}/databaseQueryTask/403c8b97-2901-494a-bcdd-bfbbe23a1aa8",
+    "{{{mod-workflow}}}/emailTask/f2cd0c7a-0dbe-4daa-b1ea-29455044a505",
+    "{{{mod-workflow}}}/connectTo/01c05fa9-6897-4e1c-b0bc-c56b187173e2"
+  ]
+}
diff --git a/duplicate-instance-report/nodes/issnQuery.json b/duplicate-instance-report/nodes/issnQuery.json
new file mode 100644
index 00000000..19c3849b
--- /dev/null
+++ b/duplicate-instance-report/nodes/issnQuery.json
@@ -0,0 +1,27 @@
+{
+  "id": "403c8b97-2901-494a-bcdd-bfbbe23a1aa8",
+  "name": "ISSN Match Query",
+  "description": "Query to find instances with matching ISSN",
+  "deserializeAs": "DatabaseQueryTask",
+  "inputVariables": [
+    {
+      "key": "timestamp",
+      "type": "PROCESS"
+    },
+    {
+      "key": "tenantId",
+      "type": "PROCESS"
+    }
+  ],
+  "outputVariable": {
+    "key": "issnCount",
+    "type": "PROCESS",
+    "spin": false
+  },
+  "designation": "ldp",
+  "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/issn-${timestamp}.csv",
+  "resultType": "CSV",
+  "includeHeader": true,
+  "query": "WITH issn_with_title AS (SELECT ie.instance_hrid, sm.content AS issn, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE field = '022' AND ord = 1 AND sf = 'a'), issn AS (SELECT issnwt.instance_hrid, issnwt.issn, issnwt.title, sm.content AS author FROM issn_with_title issnwt JOIN public.srs_marctab sm ON issnwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.issn, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM issn l JOIN issn r ON l.issn = r.issn AND l.instance_hrid < r.instance_hrid",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/join.json b/duplicate-instance-report/nodes/join.json
new file mode 100644
index 00000000..e9fe5076
--- /dev/null
+++ b/duplicate-instance-report/nodes/join.json
@@ -0,0 +1,7 @@
+{
+  "id": "af9d6b6c-6d59-4735-9a7c-2314a68e0985",
+  "name": "Join",
+  "description": "",
+  "deserializeAs": "ParallelGateway",
+  "nodes": []
+}
diff --git a/duplicate-instance-report/nodes/lccnConnectTo.json b/duplicate-instance-report/nodes/lccnConnectTo.json
new file mode 100644
index 00000000..8af4d1ce
--- /dev/null
+++ b/duplicate-instance-report/nodes/lccnConnectTo.json
@@ -0,0 +1,7 @@
+{
+  "id": "35bb9b1d-ffa5-4d20-b3c9-afd71ff3990e",
+  "name": "LCCN Complete",
+  "description": "",
+  "deserializeAs": "ConnectTo",
+  "nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985"
+}
diff --git a/duplicate-instance-report/nodes/lccnEmail.json b/duplicate-instance-report/nodes/lccnEmail.json
new file mode 100644
index 00000000..2fbc66f4
--- /dev/null
+++ b/duplicate-instance-report/nodes/lccnEmail.json
@@ -0,0 +1,29 @@
+{
+  "id": "a32eae62-d2f3-4b7b-bbd4-275ba140555e",
+  "name": "Email LCCN Matches",
+  "description": "Email CSV with instances with matching LCCN",
+  "deserializeAs": "EmailTask",
+  "inputVariables": [
+    {
+      "key": "timestamp",
+      "type": "PROCESS"
+    },
+    {
+      "key": "tenantId",
+      "type": "PROCESS"
+    },
+    {
+      "key": "lccnCount",
+      "type": "PROCESS"
+    }
+  ],
+  "outputVariable": {},
+  "mailFrom": "{{{duplicate-instance-report-from}}}",
+  "mailTo": "{{{duplicate-instance-report-to}}}",
+  "mailText": "The instances with matching LCCN report has completed, see the results attached.\n${lccnCount} instance matches found.",
+  "mailMarkup": "<p>The instances with matching LCCN report has completed, see the results attached.</p><br/>${lccnCount} instance matches found.",
+  "mailSubject": "Matching LCCN Instances Report - LDP {{{ldp-url}}}",
+  "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/lccn-${timestamp}.csv",
+  "includeAttachment": "${lccnCount}",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/lccnMoveToNode.json b/duplicate-instance-report/nodes/lccnMoveToNode.json
new file mode 100644
index 00000000..cae67f9a
--- /dev/null
+++ b/duplicate-instance-report/nodes/lccnMoveToNode.json
@@ -0,0 +1,12 @@
+{
+  "id": "b4e72925-19a4-47ed-8f31-e32cf8905123",
+  "name": "LCCN",
+  "description": "",
+  "deserializeAs": "MoveToNode",
+  "gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83",
+  "nodes": [
+    "{{{mod-workflow}}}/databaseQueryTask/9d943d76-ab21-4ca6-9eae-8df6e962c037",
+    "{{{mod-workflow}}}/emailTask/a32eae62-d2f3-4b7b-bbd4-275ba140555e",
+    "{{{mod-workflow}}}/connectTo/35bb9b1d-ffa5-4d20-b3c9-afd71ff3990e"
+  ]
+}
diff --git a/duplicate-instance-report/nodes/lccnQuery.json b/duplicate-instance-report/nodes/lccnQuery.json
new file mode 100644
index 00000000..3aacdf68
--- /dev/null
+++ b/duplicate-instance-report/nodes/lccnQuery.json
@@ -0,0 +1,27 @@
+{
+  "id": "9d943d76-ab21-4ca6-9eae-8df6e962c037",
+  "name": "LCCN Match Query",
+  "description": "Query to find instances with matching LCCN",
+  "deserializeAs": "DatabaseQueryTask",
+  "inputVariables": [
+    {
+      "key": "timestamp",
+      "type": "PROCESS"
+    },
+    {
+      "key": "tenantId",
+      "type": "PROCESS"
+    }
+  ],
+  "outputVariable": {
+    "key": "lccnCount",
+    "type": "PROCESS",
+    "spin": false
+  },
+  "designation": "ldp",
+  "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/lccn-${timestamp}.csv",
+  "resultType": "CSV",
+  "includeHeader": true,
+  "query": "WITH lccn AS (SELECT ie.instance_hrid, ii.identifier AS lccn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'LCCN' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.lccn, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM lccn l JOIN lccn r ON l.lccn = r.lccn AND l.instance_hrid < r.instance_hrid",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/oclcConnectTo.json b/duplicate-instance-report/nodes/oclcConnectTo.json
new file mode 100644
index 00000000..17354509
--- /dev/null
+++ b/duplicate-instance-report/nodes/oclcConnectTo.json
@@ -0,0 +1,7 @@
+{
+  "id": "53c5ca0f-9116-4bcf-86ba-ad4b4770aaaf",
+  "name": "OCLC Complete",
+  "description": "",
+  "deserializeAs": "ConnectTo",
+  "nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985"
+}
diff --git a/duplicate-instance-report/nodes/oclcEmail.json b/duplicate-instance-report/nodes/oclcEmail.json
new file mode 100644
index 00000000..57c4fbaf
--- /dev/null
+++ b/duplicate-instance-report/nodes/oclcEmail.json
@@ -0,0 +1,29 @@
+{
+  "id": "7bc30e09-6b69-40da-9a53-34e75fdcc488",
+  "name": "Email OCLC Matches",
+  "description": "Email CSV with instances with matching OCLC",
+  "deserializeAs": "EmailTask",
+  "inputVariables": [
+    {
+      "key": "timestamp",
+      "type": "PROCESS"
+    },
+    {
+      "key": "tenantId",
+      "type": "PROCESS"
+    },
+    {
+      "key": "oclcCount",
+      "type": "PROCESS"
+    }
+  ],
+  "outputVariable": {},
+  "mailFrom": "{{{duplicate-instance-report-from}}}",
+  "mailTo": "{{{duplicate-instance-report-to}}}",
+  "mailText": "The instances with matching OCLC report has completed, see the results attached.\n${oclcCount} instance matches found.",
+  "mailMarkup": "<p>The instances with matching OCLC report has completed, see the results attached.</p><br/>${oclcCount} instance matches found.",
+  "mailSubject": "Matching OCLC Instances Report - LDP {{{ldp-url}}}",
+  "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/oclc-${timestamp}.csv",
+  "includeAttachment": "${oclcCount}",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/oclcFork.json b/duplicate-instance-report/nodes/oclcFork.json
new file mode 100644
index 00000000..1953ac29
--- /dev/null
+++ b/duplicate-instance-report/nodes/oclcFork.json
@@ -0,0 +1,11 @@
+{
+  "id": "aea23b81-06b1-4795-8bab-ea562a854c83",
+  "name": "Fork",
+  "description": "",
+  "deserializeAs": "ParallelGateway",
+  "nodes": [
+    "{{{mod-workflow}}}/databaseQueryTask/724e34bf-c539-411e-bb62-cd15da9ff515",
+    "{{{mod-workflow}}}/emailTask/7bc30e09-6b69-40da-9a53-34e75fdcc488",
+    "{{{mod-workflow}}}/parallelGateway/af9d6b6c-6d59-4735-9a7c-2314a68e0985"
+  ]
+}
diff --git a/duplicate-instance-report/nodes/oclcQuery.json b/duplicate-instance-report/nodes/oclcQuery.json
new file mode 100644
index 00000000..5fc89e61
--- /dev/null
+++ b/duplicate-instance-report/nodes/oclcQuery.json
@@ -0,0 +1,27 @@
+{
+  "id": "724e34bf-c539-411e-bb62-cd15da9ff515",
+  "name": "OCLC Match Query",
+  "description": "Query to find instances with matching OCLC",
+  "deserializeAs": "DatabaseQueryTask",
+  "inputVariables": [
+    {
+      "key": "timestamp",
+      "type": "PROCESS"
+    },
+    {
+      "key": "tenantId",
+      "type": "PROCESS"
+    }
+  ],
+  "outputVariable": {
+    "key": "oclcCount",
+    "type": "PROCESS",
+    "spin": false
+  },
+  "designation": "ldp",
+  "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/oclc-${timestamp}.csv",
+  "resultType": "CSV",
+  "includeHeader": true,
+  "query": "WITH oclc_with_title AS (SELECT ie.instance_hrid, LTRIM(REGEXP_REPLACE(SUBSTRING(sm.content FROM 8), '[^0-9]', '', 'g'), '0') AS oclc, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE sm.field = '035' AND sm.ord = 1 AND sm.sf IN ('a', 'z') AND sm.content LIKE '(OCoLC)%'), oclc AS (SELECT oclcwt.instance_hrid, oclcwt.oclc, oclcwt.title, sm.content AS author FROM oclc_with_title oclcwt JOIN public.srs_marctab sm ON oclcwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.oclc, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM oclc l JOIN oclc r ON l.oclc = r.oclc AND l.instance_hrid < r.instance_hrid",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/reportEmail.json b/duplicate-instance-report/nodes/reportEmail.json
new file mode 100644
index 00000000..01188e98
--- /dev/null
+++ b/duplicate-instance-report/nodes/reportEmail.json
@@ -0,0 +1,29 @@
+{
+  "id": "da165c76-b891-4d01-9fbb-f8da1b1a6d12",
+  "name": "Email Instance Duplications Report",
+  "description": "Email CSV with duplicate instances hrids and match criterium",
+  "deserializeAs": "EmailTask",
+  "inputVariables": [
+    {
+      "key": "timestamp",
+      "type": "PROCESS"
+    },
+    {
+      "key": "tenantId",
+      "type": "PROCESS"
+    },
+    {
+      "key": "count",
+      "type": "PROCESS"
+    }
+  ],
+  "outputVariable": {},
+  "mailFrom": "{{{duplicate-instance-report-from}}}",
+  "mailTo": "{{{duplicate-instance-report-to}}}",
+  "mailText": "The Instance Duplication Report has completed, see the results attached.\n${count} instance matches found.",
+  "mailMarkup": "<p>The Instance Duplication Report has completed, see the results attached.</p><br/>${count} instance matches found.",
+  "mailSubject": "Instance Duplication Report - LDP {{{ldp-url}}}",
+  "attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-duplication-report-${timestamp}.zip",
+  "includeAttachment": "${count}",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/reportQuery.json b/duplicate-instance-report/nodes/reportQuery.json
new file mode 100644
index 00000000..a92c06d5
--- /dev/null
+++ b/duplicate-instance-report/nodes/reportQuery.json
@@ -0,0 +1,27 @@
+{
+  "id": "f548dd78-4cf5-4eb9-9b28-e4738470d44b",
+  "name": "Find Instance Duplications",
+  "description": "Query to return matching instances by identifier from LDP",
+  "deserializeAs": "DatabaseQueryTask",
+  "inputVariables": [
+    {
+      "key": "timestamp",
+      "type": "PROCESS"
+    },
+    {
+      "key": "tenantId",
+      "type": "PROCESS"
+    }
+  ],
+  "outputVariable": {
+    "key": "count",
+    "type": "PROCESS",
+    "spin": false
+  },
+  "designation": "ldp",
+  "outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-duplication-report-${timestamp}.csv",
+  "resultType": "CSV",
+  "includeHeader": true,
+  "query": "WITH oclc_with_title AS (SELECT ie.instance_hrid, LTRIM(REGEXP_REPLACE(SUBSTRING(sm.content FROM 8), '[^0-9]', '', 'g'), '0') AS oclc, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE sm.field = '035' AND sm.ord = 1 AND sm.sf IN ('a', 'z') AND sm.content LIKE '(OCoLC)%'), oclc AS (SELECT oclcwt.instance_hrid, oclcwt.oclc, oclcwt.title, sm.content AS author FROM oclc_with_title oclcwt JOIN public.srs_marctab sm ON oclcwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), oclc_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'oclc' AS match_type FROM oclc l JOIN oclc r ON l.oclc = r.oclc AND l.instance_hrid < r.instance_hrid), isbn AS (SELECT ie.instance_hrid, NULLIF(LEFT(RIGHT(REGEXP_REPLACE(ii.identifier, ' .*', ''), 10), 9), ':') AS isbn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'ISBN' AND ii.identifier NOT SIMILAR TO '(:|$)%' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), isbn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'isbn' AS match_type FROM isbn l JOIN isbn r ON l.isbn = r.isbn AND l.instance_hrid < r.instance_hrid), lccn AS (SELECT ie.instance_hrid, ii.identifier AS lccn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'LCCN' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), lccn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'lccn' AS match_type FROM lccn l JOIN lccn r ON l.lccn = r.lccn AND l.instance_hrid < r.instance_hrid), issn_with_title AS (SELECT ie.instance_hrid, sm.content AS issn, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE field = '022' AND ord = 1 AND sf = 'a'), issn AS (SELECT issnwt.instance_hrid, issnwt.issn, issnwt.title, sm.content AS author FROM issn_with_title issnwt JOIN public.srs_marctab sm ON issnwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), issn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'issn' AS match_type FROM issn l JOIN issn r ON l.issn = r.issn AND l.instance_hrid < r.instance_hrid), all_matches AS (SELECT hrid, hrid2, title, title2, author, author2, match_type FROM oclc_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM isbn_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM lccn_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM issn_matches) SELECT hrid AS HRID, hrid2 AS HRID2, MAX(CASE WHEN match_type = 'oclc' THEN 'T' END) AS OCLC, MAX(CASE WHEN match_type = 'isbn' THEN 'T' END) AS ISBN, MAX(CASE WHEN match_type = 'issn' THEN 'T' END) AS ISSN, MAX(CASE WHEN match_type = 'lccn' THEN 'T' END) AS LCCN, '\"' || REPLACE(title, '\"', '\"\"') || '\"' AS TITLE, '\"' || REPLACE(title2, '\"', '\"\"') || '\"' AS TITLE2, '\"' || REPLACE(author, '\"', '\"\"') || '\"' AS AUTHOR, '\"' || REPLACE(author2, '\"', '\"\"') || '\"' AS AUTHOR2 FROM all_matches GROUP BY hrid, hrid2, title, title2, author, author2",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/reportZip.json b/duplicate-instance-report/nodes/reportZip.json
new file mode 100644
index 00000000..b4c51c69
--- /dev/null
+++ b/duplicate-instance-report/nodes/reportZip.json
@@ -0,0 +1,22 @@
+{
+  "id": "e14e6cfc-b069-4f2f-8903-d94714fd2574",
+  "name": "Compress Instance Duplications Report",
+  "description": "Compress instance duplications report as ZIP format",
+  "deserializeAs": "CompressFileTask",
+  "inputVariables": [
+    {
+      "key": "timestamp",
+      "type": "PROCESS"
+    },
+    {
+      "key": "tenantId",
+      "type": "PROCESS"
+    }
+  ],
+  "outputVariable": {},
+  "source": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-duplication-report-${timestamp}.csv",
+  "destination": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-duplication-report-${timestamp}.zip",
+  "format": "ZIP",
+  "container": "NONE",
+  "asyncBefore": true
+}
diff --git a/duplicate-instance-report/nodes/start.json b/duplicate-instance-report/nodes/start.json
new file mode 100644
index 00000000..884e9da4
--- /dev/null
+++ b/duplicate-instance-report/nodes/start.json
@@ -0,0 +1,8 @@
+{
+  "id": "bf39c55f-1fd6-41a5-a98a-c28d6d05da9d",
+  "name": "Start",
+  "description": "Start of instance duplication report workflow",
+  "type": "SCHEDULED",
+  "deserializeAs": "StartEvent",
+  "expression": "0 0 0 1 1,4,7,10 ?"
+}
diff --git a/duplicate-instance-report/setup.json b/duplicate-instance-report/setup.json
new file mode 100644
index 00000000..0967ef42
--- /dev/null
+++ b/duplicate-instance-report/setup.json
@@ -0,0 +1 @@
+{}
diff --git a/duplicate-instance-report/workflow.json b/duplicate-instance-report/workflow.json
new file mode 100644
index 00000000..1ca45016
--- /dev/null
+++ b/duplicate-instance-report/workflow.json
@@ -0,0 +1,27 @@
+{
+  "id": "93c90a8c-5b39-4499-a0bc-a24d75444a5c",
+  "name": "Duplicate Instance Report Workflow",
+  "description": "Identify and report instances with matching OCLC, ISBN, ISSN, LCCN, or Call Number",
+  "versionTag": "1.0",
+  "historyTimeToLive": 0,
+  "deploymentId": null,
+  "active": false,
+  "setup": {
+    "asyncBefore": false,
+    "asyncAfter": false
+  },
+  "nodes": [
+    "{{{mod-workflow}}}/startEvent/bf39c55f-1fd6-41a5-a98a-c28d6d05da9d",
+    "{{{mod-workflow}}}/databaseConnectionTask/11f065f0-f1ea-47ed-abc9-146099572e7b",
+    "{{{mod-workflow}}}/parallelGateway/aea23b81-06b1-4795-8bab-ea562a854c83",
+    "{{{mod-workflow}}}/moveToNode/408a6624-8be4-4bdf-8688-75c23a730187",
+    "{{{mod-workflow}}}/moveToNode/3b2668b9-58aa-447e-b907-40cee663a3ea",
+    "{{{mod-workflow}}}/moveToNode/b4e72925-19a4-47ed-8f31-e32cf8905123",
+    "{{{mod-workflow}}}/databaseQueryTask/f548dd78-4cf5-4eb9-9b28-e4738470d44b",
+    "{{{mod-workflow}}}/compressFileTask/e14e6cfc-b069-4f2f-8903-d94714fd2574",
+    "{{{mod-workflow}}}/emailTask/da165c76-b891-4d01-9fbb-f8da1b1a6d12",
+    "{{{mod-workflow}}}/databaseDisconnectTask/db806bf5-49b8-4f8a-bf96-fc0258d3c31e",
+    "{{{mod-workflow}}}/endEvent/3c9848b3-f5a4-4753-b916-2b73c88d9409"
+  ],
+  "initialContext": {}
+}