Skip to content

Commit

Permalink
Duplicate instance report workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
wwelling committed Sep 21, 2024
1 parent 63cc986 commit cb32f9a
Show file tree
Hide file tree
Showing 10 changed files with 167 additions and 0 deletions.
42 changes: 42 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -796,3 +796,45 @@ Either wait for scheduled event to occur or manually execute via:
```shell
fw run evans-pres-repr
```

## duplicate-instance-report

### Duplicate Instrance Report Workflow (Scheduled)

This workflow emails a CSV after connects, queries, and disconnects from LDP.

It has a signle query to build the report.

Requires following path `/mnt/workflows/${tenantId}/duplicate-instance-report` to save `instance-report-report-${timestamp}.csv` to.


These variables are required when building and running the workflow:

| Variable Name | Allowed Values | Brief Description |
| ------------------------------ | -------------- | ----------------- |
| ldp-url | URL | LDP URL. |
| ldp-user | string | LDP login username. |
| ldp-password | string | LDP login password. |
| duplicate-instance-report-from | e-mail address | The e-mail address of the report sender. |
| duplicate-instance-report-to | e-mail address | The e-mail address of the report recipient. |

The scheduled event is for ...

```shell
fw config set ldp-url ***
fw config set ldp-user ***
fw config set ldp-password ***
fw config set duplicate-instance-report-from ***
fw config set duplicate-instance-report-to ***
```

To build and activate:
```shell
fw build duplicate-instance-report
fw activate duplicate-instance-report
```

Either wait for scheduled event to occur or manually execute via:
```shell
fw run duplicate-instance-report
```
13 changes: 13 additions & 0 deletions duplicate-instance-report/nodes/connectToLdp.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"id": "11f065f0-f1ea-47ed-abc9-146099572e7b",
"name": "Connect LDP",
"description": "Connect to the LDP.",
"deserializeAs": "DatabaseConnectionTask",
"inputVariables": [],
"outputVariable": {},
"designation": "ldp",
"url": "{{{ldp-url}}}",
"username": "{{{ldp-user}}}",
"password": "{{{ldp-password}}}",
"asyncBefore": true
}
10 changes: 10 additions & 0 deletions duplicate-instance-report/nodes/disconnectFromLDP.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"id": "db806bf5-49b8-4f8a-bf96-fc0258d3c31e",
"name": "Disconnect LDP",
"description": "Disconnect from the LDP.",
"deserializeAs": "DatabaseDisconnectTask",
"inputVariables": [],
"outputVariable": {},
"designation": "ldp",
"asyncBefore": true
}
29 changes: 29 additions & 0 deletions duplicate-instance-report/nodes/email.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
{
"id": "da165c76-b891-4d01-9fbb-f8da1b1a6d12",
"name": "Email Duplicate Instance Report",
"description": "Email CSV with duplicate instances hrids and match criterium.",
"deserializeAs": "EmailTask",
"inputVariables": [
{
"key": "timestamp",
"type": "PROCESS"
},
{
"key": "tenantId",
"type": "PROCESS"
},
{
"key": "count",
"type": "PROCESS"
}
],
"outputVariable": {},
"mailFrom": "{{{duplicate-instance-report-from}}}",
"mailTo": "{{{duplicate-instance-report-to}}}",
"mailText": "The Instance Duplication Report has completed, see the results attached.\n${count} instance matches found",
"mailMarkup": "<p>The Instance Duplication Report has completed, see the results attached.</p><br/>${count} instance matches found",
"mailSubject": "Instance Duplication Report - LDP {{{ldp-url}}}",
"attachmentPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-report-report-${timestamp}.csv",
"includeAttachment": "${count}",
"asyncBefore": true
}
6 changes: 6 additions & 0 deletions duplicate-instance-report/nodes/end.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"id": "3c9848b3-f5a4-4753-b916-2b73c88d9409",
"name": "End",
"description": "End of duplicate instance report workflow",
"deserializeAs": "EndEvent"
}
27 changes: 27 additions & 0 deletions duplicate-instance-report/nodes/query.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
{
"id": "f548dd78-4cf5-4eb9-9b28-e4738470d44b",
"name": "Find Duplicates",
"description": "Query to return matching instances by identifier from LDP",
"deserializeAs": "DatabaseQueryTask",
"inputVariables": [
{
"key": "timestamp",
"type": "PROCESS"
},
{
"key": "tenantId",
"type": "PROCESS"
}
],
"outputVariable": {
"key": "count",
"type": "PROCESS",
"spin": false
},
"designation": "ldp",
"outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-report-report-${timestamp}.csv",
"resultType": "CSV",
"includeHeader": true,
"query": "WITH call_number_from_holdings_ext AS (SELECT instance_hrid, TRIM(TRIM(CONCAT_WS(' ', call_number_prefix, call_number, call_number_suffix))) AS call_number FROM folio_reporting.instance_ext i JOIN folio_reporting.holdings_ext h ON i.instance_id = h.instance_id WHERE call_number IS NOT NULL AND call_number <> ''), call_number_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, 'call_number' AS match_type FROM call_number_from_holdings_ext l JOIN call_number_from_holdings_ext r ON l.call_number = r.call_number AND l.instance_hrid < r.instance_hrid), oclc_from_srs_marctab AS (SELECT instance_hrid, LTRIM(REGEXP_REPLACE(content, '[^0-9]', '', 'g'), '0') AS oclc FROM public.srs_marctab WHERE field = '035' AND ord = 1 AND sf IN ('a', 'z') AND content LIKE '(OCoLC)%'), oclc_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, 'oclc' AS match_type FROM oclc_from_srs_marctab l JOIN oclc_from_srs_marctab r ON l.oclc = r.oclc AND l.oclc <> '' AND l.instance_hrid < r.instance_hrid), isbn_from_instance_identifiers AS (SELECT instance_hrid, LEFT(RIGHT(REGEXP_REPLACE(identifier, ' .*', ''), 10), 9) AS isbn FROM folio_reporting.instance_identifiers WHERE identifier_type_name = 'ISBN' AND identifier IS NOT NULL AND identifier <> ''), isbn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, 'isbn' AS match_type FROM isbn_from_instance_identifiers l JOIN isbn_from_instance_identifiers r ON l.isbn = r.isbn AND l.isbn <> '' AND l.instance_hrid < r.instance_hrid), lccn_from_instance_identifiers AS (SELECT instance_hrid, identifier AS lccn FROM folio_reporting.instance_identifiers WHERE identifier_type_name = 'LCCN' AND identifier IS NOT NULL AND identifier <> ''), lccn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, 'lccn' AS match_type FROM lccn_from_instance_identifiers l JOIN lccn_from_instance_identifiers r ON l.lccn = r.lccn AND l.instance_hrid < r.instance_hrid), issn_from_srs_marctab AS (SELECT instance_hrid::text, content AS issn FROM public.srs_marctab WHERE field = '022' AND ord = 1 AND sf = 'a' AND content IS NOT NULL AND content <> ''), issn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, 'issn' AS match_type FROM issn_from_srs_marctab l JOIN issn_from_srs_marctab r ON l.issn = r.issn AND l.instance_hrid < r.instance_hrid), all_matches AS (SELECT hrid, hrid2, match_type FROM call_number_matches UNION ALL SELECT hrid, hrid2, match_type FROM oclc_matches UNION ALL SELECT hrid, hrid2, match_type FROM isbn_matches UNION ALL SELECT hrid, hrid2, match_type FROM lccn_matches UNION ALL SELECT hrid, hrid2, match_type FROM issn_matches) SELECT hrid AS HRID, hrid2 AS HRID1, MAX(CASE WHEN match_type = 'oclc' THEN 'T' END) AS OCLC, MAX(CASE WHEN match_type = 'isbn' THEN 'T' END) AS ISBN, MAX(CASE WHEN match_type = 'lccn' THEN 'T' END) AS LCCN, MAX(CASE WHEN match_type = 'call_number' THEN 'T' END) AS CALL_NUMBER, MAX(CASE WHEN match_type = 'issn' THEN 'T' END) AS ISSN FROM all_matches GROUP BY hrid, hrid2",
"asyncBefore": true
}
8 changes: 8 additions & 0 deletions duplicate-instance-report/nodes/start.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"id": "bf39c55f-1fd6-41a5-a98a-c28d6d05da9d",
"name": "Start",
"description": "Start of duplicate instance report workflow",
"type": "MESSAGE_CORRELATION",
"deserializeAs": "StartEvent",
"expression": "/events/workflow/duplicate-instance-workflow/start"
}
1 change: 1 addition & 0 deletions duplicate-instance-report/setup.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
9 changes: 9 additions & 0 deletions duplicate-instance-report/triggers/startTrigger.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"id": "4381df47-746c-466a-8beb-75ca91bad9f0",
"name": "Duplicate Instance Report Trigger",
"description": "Start to duplcate instance report workflow via POST request (temporary)",
"type": "MESSAGE_CORRELATE",
"method": "POST",
"deserializeAs": "EventTrigger",
"pathPattern": "/events/workflow/duplicate-instance-workflow/start"
}
22 changes: 22 additions & 0 deletions duplicate-instance-report/workflow.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"id": "93c90a8c-5b39-4499-a0bc-a24d75444a5c",
"name": "Duplicate Instance Report Workflow",
"description": "Identify and report duplicate instances by OCLC, ISBN, ISSN, LCCN, and Call Number",
"versionTag": "1.0",
"historyTimeToLive": 0,
"deploymentId": null,
"active": false,
"setup": {
"asyncBefore": false,
"asyncAfter": false
},
"nodes": [
"{{{mod-workflow}}}/startEvent/bf39c55f-1fd6-41a5-a98a-c28d6d05da9d",
"{{{mod-workflow}}}/databaseConnectionTask/11f065f0-f1ea-47ed-abc9-146099572e7b",
"{{{mod-workflow}}}/databaseQueryTask/f548dd78-4cf5-4eb9-9b28-e4738470d44b",
"{{{mod-workflow}}}/databaseDisconnectTask/db806bf5-49b8-4f8a-bf96-fc0258d3c31e",
"{{{mod-workflow}}}/emailTask/da165c76-b891-4d01-9fbb-f8da1b1a6d12",
"{{{mod-workflow}}}/endEvent/3c9848b3-f5a4-4753-b916-2b73c88d9409"
],
"initialContext": {}
}

0 comments on commit cb32f9a

Please sign in to comment.