Skip to content

Commit

Permalink
Instance duplication report workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
wwelling committed Sep 25, 2024
1 parent 63cc986 commit 62c711a
Show file tree
Hide file tree
Showing 30 changed files with 563 additions and 0 deletions.
46 changes: 46 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -796,3 +796,49 @@ Either wait for scheduled event to occur or manually execute via:
```shell
fw run evans-pres-repr
```

## duplicate-instance-report

### Instance Duplication Report Workflow (Scheduled)

This workflow emails a CSV report for Call Number, ISBN, LCCN, ISSN, and OCLC matches as well as a full instance duplication CSV report compressed with ZIP format.

The full instance duplication CSV has the following columns. The title and author columns are wrapped in double quotes.

```
HRID, HRID2, OCLC, ISBN, ISSN, CALL_NUMBER, LCCN, TITLE, TITLE2, AUTHOR, AUTHOR2
```

Requires following path `/mnt/workflows/${tenantId}/duplicate-instance-report`.


These variables are required when building and running the workflow:

| Variable Name | Allowed Values | Brief Description |
| ------------------------------ | -------------- | ----------------- |
| ldp-url | URL | LDP URL. |
| ldp-user | string | LDP login username. |
| ldp-password | string | LDP login password. |
| duplicate-instance-report-from | e-mail address | The e-mail address of the report sender. |
| duplicate-instance-report-to | e-mail address | The e-mail address of the report recipient. |

The scheduled event is for **12:00 AM UTC**, on the first of the month, only in January, April, July, and October.

```shell
fw config set ldp-url ***
fw config set ldp-user ***
fw config set ldp-password ***
fw config set duplicate-instance-report-from ***
fw config set duplicate-instance-report-to ***
```

To build and activate:
```shell
fw build duplicate-instance-report
fw activate duplicate-instance-report
```

Either wait for scheduled event to occur or manually execute via:
```shell
fw run duplicate-instance-report
```
29 changes: 29 additions & 0 deletions duplicate-instance-report/nodes/callNumberEmail.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"id": "b5dca523-4a24-4d02-a122-6ea8c9f34ac4",
"name": "Email Call Number Matches",
"description": "Email CSV with instances with matching call numbers",
"deserializeAs": "EmailTask",
"inputVariables": [
{
"key": "timestamp",
"type": "PROCESS"
},
{
"key": "tenantId",
"type": "PROCESS"
},
{
"key": "callNumberCount",
"type": "PROCESS"
}
],
"outputVariable": {},
"mailFrom": "{{{duplicate-instance-report-from}}}",
"mailTo": "{{{duplicate-instance-report-to}}}",
"mailText": "The instances with matching Call Number report has completed, see the results attached.\n${callNumberCount} instance matches found.",
"mailMarkup": "<p>The instances with matching Call Number report has completed, see the results attached.</p><br/>${callNumberCount} instance matches found.",
"mailSubject": "Matching Call Number Instances Report - LDP {{{ldp-url}}}",
"attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/call-number-${timestamp}.csv",
"includeAttachment": "${callNumberCount}",
"asyncBefore": true
}
12 changes: 12 additions & 0 deletions duplicate-instance-report/nodes/callNumberMoveToNode.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"id": "42c50baa-7d73-48ad-bd9b-53ffd6cd6eda",
"name": "CALL NUMBER",
"description": "",
"deserializeAs": "MoveToNode",
"gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83",
"nodes": [
"{{{mod-workflow}}}/databaseQueryTask/7a20c05e-2a58-42f2-9769-42e5b7045343",
"{{{mod-workflow}}}/emailTask/b5dca523-4a24-4d02-a122-6ea8c9f34ac4",
"{{{mod-workflow}}}/connectTo/53c5ca0f-9116-4bcf-86ba-ad4b4770aaaf"
]
}
27 changes: 27 additions & 0 deletions duplicate-instance-report/nodes/callNumberQuery.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"id": "7a20c05e-2a58-42f2-9769-42e5b7045343",
"name": "Call Number Match Query",
"description": "Query to find instances with matching call number",
"deserializeAs": "DatabaseQueryTask",
"inputVariables": [
{
"key": "timestamp",
"type": "PROCESS"
},
{
"key": "tenantId",
"type": "PROCESS"
}
],
"outputVariable": {
"key": "callNumberCount",
"type": "PROCESS",
"spin": false
},
"designation": "ldp",
"outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/call-number-${timestamp}.csv",
"resultType": "CSV",
"includeHeader": true,
"query": "WITH call_number AS (SELECT ie.instance_hrid, he.call_number_type_id, he.call_number_type_name, he.call_number, TRIM(CONCAT_WS(' ', he.call_number_prefix, he.call_number, he.call_number_suffix)) AS full_call_number, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.holdings_ext he ON ie.instance_id = he.instance_id JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE he.call_number IS NOT NULL AND he.call_number !~ '^\\s*$' AND he.call_number_type_id IS NOT NULL AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.call_number AS call_number, r.call_number AS call_number2, l.full_call_number, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM call_number l JOIN call_number r ON l.full_call_number = r.full_call_number AND l.instance_hrid < r.instance_hrid",
"asyncBefore": true
}
13 changes: 13 additions & 0 deletions duplicate-instance-report/nodes/connectToLdp.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"id": "11f065f0-f1ea-47ed-abc9-146099572e7b",
"name": "Connect LDP",
"description": "Connect to the LDP",
"deserializeAs": "DatabaseConnectionTask",
"inputVariables": [],
"outputVariable": {},
"designation": "ldp",
"url": "{{{ldp-url}}}",
"username": "{{{ldp-user}}}",
"password": "{{{ldp-password}}}",
"asyncBefore": true
}
10 changes: 10 additions & 0 deletions duplicate-instance-report/nodes/disconnectFromLDP.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"id": "db806bf5-49b8-4f8a-bf96-fc0258d3c31e",
"name": "Disconnect LDP",
"description": "Disconnect from the LDP",
"deserializeAs": "DatabaseDisconnectTask",
"inputVariables": [],
"outputVariable": {},
"designation": "ldp",
"asyncBefore": true
}
6 changes: 6 additions & 0 deletions duplicate-instance-report/nodes/end.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"id": "3c9848b3-f5a4-4753-b916-2b73c88d9409",
"name": "End",
"description": "End of duplicate instance report workflow",
"deserializeAs": "EndEvent"
}
7 changes: 7 additions & 0 deletions duplicate-instance-report/nodes/isbnConnectTo.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"id": "c36f8e08-7e43-49b3-923f-ebb8629617c8",
"name": "ISBN Complete",
"description": "",
"deserializeAs": "ConnectTo",
"nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985"
}
29 changes: 29 additions & 0 deletions duplicate-instance-report/nodes/isbnEmail.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"id": "66356870-9e8a-406b-ae3d-5fcffef0c556",
"name": "Email ISBN Matches",
"description": "Email CSV with instances with matching ISBN",
"deserializeAs": "EmailTask",
"inputVariables": [
{
"key": "timestamp",
"type": "PROCESS"
},
{
"key": "tenantId",
"type": "PROCESS"
},
{
"key": "isbnCount",
"type": "PROCESS"
}
],
"outputVariable": {},
"mailFrom": "{{{duplicate-instance-report-from}}}",
"mailTo": "{{{duplicate-instance-report-to}}}",
"mailText": "The instances with matching ISBN report has completed, see the results attached.\n${isbnCount} instance matches found.",
"mailMarkup": "<p>The instances with matching ISBN report has completed, see the results attached.</p><br/>${isbnCount} instance matches found.",
"mailSubject": "Matching ISBN Instances Report - LDP {{{ldp-url}}}",
"attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/isbn-${timestamp}.csv",
"includeAttachment": "${isbnCount}",
"asyncBefore": true
}
12 changes: 12 additions & 0 deletions duplicate-instance-report/nodes/isbnMoveToNode.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"id": "408a6624-8be4-4bdf-8688-75c23a730187",
"name": "ISBN",
"description": "",
"deserializeAs": "MoveToNode",
"gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83",
"nodes": [
"{{{mod-workflow}}}/databaseQueryTask/84b7be20-ce1f-45f4-ad2e-7dff0c131e42",
"{{{mod-workflow}}}/emailTask/66356870-9e8a-406b-ae3d-5fcffef0c556",
"{{{mod-workflow}}}/connectTo/c36f8e08-7e43-49b3-923f-ebb8629617c8"
]
}
27 changes: 27 additions & 0 deletions duplicate-instance-report/nodes/isbnQuery.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"id": "84b7be20-ce1f-45f4-ad2e-7dff0c131e42",
"name": "ISBN Match Query",
"description": "Query to find instances with matching ISBN",
"deserializeAs": "DatabaseQueryTask",
"inputVariables": [
{
"key": "timestamp",
"type": "PROCESS"
},
{
"key": "tenantId",
"type": "PROCESS"
}
],
"outputVariable": {
"key": "isbnCount",
"type": "PROCESS",
"spin": false
},
"designation": "ldp",
"outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/isbn-${timestamp}.csv",
"resultType": "CSV",
"includeHeader": true,
"query": "WITH isbn AS (SELECT ie.instance_hrid, ii.identifier, NULLIF(LEFT(RIGHT(REGEXP_REPLACE(ii.identifier, ' .*', ''), 10), 9), ':') AS isbn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'ISBN' AND ii.identifier NOT SIMILAR TO '(:|$)%' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.identifier, l.isbn, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM isbn l JOIN isbn r ON l.isbn = r.isbn AND l.instance_hrid < r.instance_hrid",
"asyncBefore": true
}
7 changes: 7 additions & 0 deletions duplicate-instance-report/nodes/issnConnectTo.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"id": "01c05fa9-6897-4e1c-b0bc-c56b187173e2",
"name": "ISSN Complete",
"description": "",
"deserializeAs": "ConnectTo",
"nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985"
}
29 changes: 29 additions & 0 deletions duplicate-instance-report/nodes/issnEmail.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"id": "f2cd0c7a-0dbe-4daa-b1ea-29455044a505",
"name": "Email ISSN Matches",
"description": "Email CSV with instances with matching ISSN",
"deserializeAs": "EmailTask",
"inputVariables": [
{
"key": "timestamp",
"type": "PROCESS"
},
{
"key": "tenantId",
"type": "PROCESS"
},
{
"key": "issnCount",
"type": "PROCESS"
}
],
"outputVariable": {},
"mailFrom": "{{{duplicate-instance-report-from}}}",
"mailTo": "{{{duplicate-instance-report-to}}}",
"mailText": "The instances with matching ISSN report has completed, see the results attached.\n${issnCount} instance matches found.",
"mailMarkup": "<p>The instances with matching ISSN report has completed, see the results attached.</p><br/>${issnCount} instance matches found.",
"mailSubject": "Matching ISSN Instances Report - LDP {{{ldp-url}}}",
"attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/issn-${timestamp}.csv",
"includeAttachment": "${issnCount}",
"asyncBefore": true
}
12 changes: 12 additions & 0 deletions duplicate-instance-report/nodes/issnMoveToNode.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"id": "3b2668b9-58aa-447e-b907-40cee663a3ea",
"name": "ISSN",
"description": "",
"deserializeAs": "MoveToNode",
"gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83",
"nodes": [
"{{{mod-workflow}}}/databaseQueryTask/403c8b97-2901-494a-bcdd-bfbbe23a1aa8",
"{{{mod-workflow}}}/emailTask/f2cd0c7a-0dbe-4daa-b1ea-29455044a505",
"{{{mod-workflow}}}/connectTo/01c05fa9-6897-4e1c-b0bc-c56b187173e2"
]
}
27 changes: 27 additions & 0 deletions duplicate-instance-report/nodes/issnQuery.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"id": "403c8b97-2901-494a-bcdd-bfbbe23a1aa8",
"name": "ISSN Match Query",
"description": "Query to find instances with matching ISSN",
"deserializeAs": "DatabaseQueryTask",
"inputVariables": [
{
"key": "timestamp",
"type": "PROCESS"
},
{
"key": "tenantId",
"type": "PROCESS"
}
],
"outputVariable": {
"key": "issnCount",
"type": "PROCESS",
"spin": false
},
"designation": "ldp",
"outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/issn-${timestamp}.csv",
"resultType": "CSV",
"includeHeader": true,
"query": "WITH issn_with_title AS (SELECT ie.instance_hrid, sm.content AS issn, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE field = '022' AND ord = 1 AND sf = 'a'), issn AS (SELECT issnwt.instance_hrid, issnwt.issn, issnwt.title, sm.content AS author FROM issn_with_title issnwt JOIN public.srs_marctab sm ON issnwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.issn, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM issn l JOIN issn r ON l.issn = r.issn AND l.instance_hrid < r.instance_hrid",
"asyncBefore": true
}
7 changes: 7 additions & 0 deletions duplicate-instance-report/nodes/join.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"id": "af9d6b6c-6d59-4735-9a7c-2314a68e0985",
"name": "Join",
"description": "",
"deserializeAs": "ParallelGateway",
"nodes": []
}
7 changes: 7 additions & 0 deletions duplicate-instance-report/nodes/lccnConnectTo.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"id": "35bb9b1d-ffa5-4d20-b3c9-afd71ff3990e",
"name": "LCCN Complete",
"description": "",
"deserializeAs": "ConnectTo",
"nodeId": "parallel_gateway_af9d6b6c_6d59_4735_9a7c_2314a68e0985"
}
29 changes: 29 additions & 0 deletions duplicate-instance-report/nodes/lccnEmail.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"id": "a32eae62-d2f3-4b7b-bbd4-275ba140555e",
"name": "Email LCCN Matches",
"description": "Email CSV with instances with matching LCCN",
"deserializeAs": "EmailTask",
"inputVariables": [
{
"key": "timestamp",
"type": "PROCESS"
},
{
"key": "tenantId",
"type": "PROCESS"
},
{
"key": "lccnCount",
"type": "PROCESS"
}
],
"outputVariable": {},
"mailFrom": "{{{duplicate-instance-report-from}}}",
"mailTo": "{{{duplicate-instance-report-to}}}",
"mailText": "The instances with matching LCCN report has completed, see the results attached.\n${lccnCount} instance matches found.",
"mailMarkup": "<p>The instances with matching LCCN report has completed, see the results attached.</p><br/>${lccnCount} instance matches found.",
"mailSubject": "Matching LCCN Instances Report - LDP {{{ldp-url}}}",
"attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/lccn-${timestamp}.csv",
"includeAttachment": "${lccnCount}",
"asyncBefore": true
}
12 changes: 12 additions & 0 deletions duplicate-instance-report/nodes/lccnMoveToNode.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"id": "b4e72925-19a4-47ed-8f31-e32cf8905123",
"name": "LCCN",
"description": "",
"deserializeAs": "MoveToNode",
"gatewayId": "parallel_gateway_aea23b81_06b1_4795_8bab_ea562a854c83",
"nodes": [
"{{{mod-workflow}}}/databaseQueryTask/9d943d76-ab21-4ca6-9eae-8df6e962c037",
"{{{mod-workflow}}}/emailTask/a32eae62-d2f3-4b7b-bbd4-275ba140555e",
"{{{mod-workflow}}}/connectTo/35bb9b1d-ffa5-4d20-b3c9-afd71ff3990e"
]
}
27 changes: 27 additions & 0 deletions duplicate-instance-report/nodes/lccnQuery.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"id": "9d943d76-ab21-4ca6-9eae-8df6e962c037",
"name": "LCCN Match Query",
"description": "Query to find instances with matching LCCN",
"deserializeAs": "DatabaseQueryTask",
"inputVariables": [
{
"key": "timestamp",
"type": "PROCESS"
},
{
"key": "tenantId",
"type": "PROCESS"
}
],
"outputVariable": {
"key": "lccnCount",
"type": "PROCESS",
"spin": false
},
"designation": "ldp",
"outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/lccn-${timestamp}.csv",
"resultType": "CSV",
"includeHeader": true,
"query": "WITH lccn AS (SELECT ie.instance_hrid, ii.identifier AS lccn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'LCCN' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.lccn, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM lccn l JOIN lccn r ON l.lccn = r.lccn AND l.instance_hrid < r.instance_hrid",
"asyncBefore": true
}
Loading

0 comments on commit 62c711a

Please sign in to comment.