Skip to content

Commit

Permalink
Exclude call number matches
Browse files Browse the repository at this point in the history
  • Loading branch information
wwelling committed Sep 25, 2024
1 parent 511bf63 commit 9d5fb40
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 3 deletions.
2 changes: 1 addition & 1 deletion duplicate-instance-report/nodes/callNumberQuery.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@
"outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/call-number-${timestamp}.csv",
"resultType": "CSV",
"includeHeader": true,
"query": "WITH call_number AS (SELECT ie.instance_hrid, he.call_number_type_id, he.call_number_type_name, he.call_number, TRIM(CONCAT_WS(' ', he.call_number_prefix, he.call_number, he.call_number_suffix)) AS full_call_number, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.holdings_ext he ON ie.instance_id = he.instance_id JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE he.call_number IS NOT NULL AND he.call_number <> '' AND he.call_number_type_id IS NOT NULL AND he.call_number_type_id <> '6caca63e-5651-4db6-9247-3205156e9699' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.call_number AS call_number, r.call_number AS call_number2, l.full_call_number, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM call_number l JOIN call_number r ON l.full_call_number = r.full_call_number AND l.instance_hrid < r.instance_hrid",
"query": "WITH call_number AS (SELECT ie.instance_hrid, he.call_number_type_id, he.call_number_type_name, he.call_number, TRIM(CONCAT_WS(' ', he.call_number_prefix, he.call_number, he.call_number_suffix)) AS full_call_number, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.holdings_ext he ON ie.instance_id = he.instance_id JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE he.call_number IS NOT NULL AND he.call_number !~ '^\\s*$' AND he.call_number_type_id IS NOT NULL AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a') SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.call_number AS call_number, r.call_number AS call_number2, l.full_call_number, '\"' || REPLACE(l.title, '\"', '\"\"') || '\"' AS title, '\"' || REPLACE(r.title, '\"', '\"\"') || '\"' AS title2, '\"' || REPLACE(l.author, '\"', '\"\"') || '\"' AS author, '\"' || REPLACE(r.author, '\"', '\"\"') || '\"' AS author2 FROM call_number l JOIN call_number r ON l.full_call_number = r.full_call_number AND l.instance_hrid < r.instance_hrid",
"asyncBefore": true
}
2 changes: 1 addition & 1 deletion duplicate-instance-report/nodes/reportQuery.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,6 @@
"outputPath": "/mnt/workflows/${tenantId}/duplicate-instance-report/instance-duplication-report-${timestamp}.csv",
"resultType": "CSV",
"includeHeader": true,
"query": "WITH filtered_holdings AS (SELECT instance_id, call_number, call_number_prefix, call_number_suffix FROM folio_reporting.holdings_ext WHERE he.call_number IS NOT NULL AND he.call_number <> '' AND he.call_number_type_id IS NOT NULL AND he.call_number_type_id <> '6caca63e-5651-4db6-9247-3205156e9699'), filtered_marctab AS (SELECT instance_hrid, content FROM public.srs_marctab WHERE field = '100' AND ord = 1 AND sf = 'a'), call_number AS (SELECT ie.instance_hrid, TRIM(CONCAT_WS(' ', he.call_number_prefix, he.call_number, he.call_number_suffix)) AS full_call_number, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN filtered_holdings he ON ie.instance_id = he.instance_id JOIN filtered_marctab sm ON ie.instance_hrid = sm.instance_hrid), call_number_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'call_number' AS match_type FROM call_number l JOIN call_number r ON l.full_call_number = r.full_call_number AND l.instance_hrid < r.instance_hrid), oclc_with_title AS (SELECT ie.instance_hrid, LTRIM(REGEXP_REPLACE(SUBSTRING(sm.content FROM 8), '[^0-9]', '', 'g'), '0') AS oclc, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE sm.field = '035' AND sm.ord = 1 AND sm.sf IN ('a', 'z') AND sm.content LIKE '(OCoLC)%'), oclc AS (SELECT oclcwt.instance_hrid, oclcwt.oclc, oclcwt.title, sm.content AS author FROM oclc_with_title oclcwt JOIN public.srs_marctab sm ON oclcwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), oclc_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'oclc' AS match_type FROM oclc l JOIN oclc r ON l.oclc = r.oclc AND l.instance_hrid < r.instance_hrid), isbn AS (SELECT ie.instance_hrid, NULLIF(LEFT(RIGHT(REGEXP_REPLACE(ii.identifier, ' .*', ''), 10), 9), ':') AS isbn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'ISBN' AND ii.identifier NOT SIMILAR TO '(:|$)%' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), isbn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'isbn' AS match_type FROM isbn l JOIN isbn r ON l.isbn = r.isbn AND l.instance_hrid < r.instance_hrid), lccn AS (SELECT ie.instance_hrid, ii.identifier AS lccn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'LCCN' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), lccn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'lccn' AS match_type FROM lccn l JOIN lccn r ON l.lccn = r.lccn AND l.instance_hrid < r.instance_hrid), issn_with_title AS (SELECT ie.instance_hrid, sm.content AS issn, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE field = '022' AND ord = 1 AND sf = 'a'), issn AS (SELECT issnwt.instance_hrid, issnwt.issn, issnwt.title, sm.content AS author FROM issn_with_title issnwt JOIN public.srs_marctab sm ON issnwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), issn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'issn' AS match_type FROM issn l JOIN issn r ON l.issn = r.issn AND l.instance_hrid < r.instance_hrid), all_matches AS (SELECT hrid, hrid2, title, title2, author, author2, match_type FROM call_number_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM oclc_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM isbn_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM lccn_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM issn_matches) SELECT hrid AS HRID, hrid2 AS HRID2, MAX(CASE WHEN match_type = 'oclc' THEN 'T' END) AS OCLC, MAX(CASE WHEN match_type = 'isbn' THEN 'T' END) AS ISBN, MAX(CASE WHEN match_type = 'issn' THEN 'T' END) AS ISSN, MAX(CASE WHEN match_type = 'call_number' THEN 'T' END) AS CALL_NUMBER, MAX(CASE WHEN match_type = 'lccn' THEN 'T' END) AS LCCN, '\"' || REPLACE(title, '\"', '\"\"') || '\"' AS TITLE, '\"' || REPLACE(title2, '\"', '\"\"') || '\"' AS TITLE2, '\"' || REPLACE(author, '\"', '\"\"') || '\"' AS AUTHOR, '\"' || REPLACE(author2, '\"', '\"\"') || '\"' AS AUTHOR2 FROM all_matches GROUP BY hrid, hrid2, title, title2, author, author2",
"query": "WITH oclc_with_title AS (SELECT ie.instance_hrid, LTRIM(REGEXP_REPLACE(SUBSTRING(sm.content FROM 8), '[^0-9]', '', 'g'), '0') AS oclc, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE sm.field = '035' AND sm.ord = 1 AND sm.sf IN ('a', 'z') AND sm.content LIKE '(OCoLC)%'), oclc AS (SELECT oclcwt.instance_hrid, oclcwt.oclc, oclcwt.title, sm.content AS author FROM oclc_with_title oclcwt JOIN public.srs_marctab sm ON oclcwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), oclc_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'oclc' AS match_type FROM oclc l JOIN oclc r ON l.oclc = r.oclc AND l.instance_hrid < r.instance_hrid), isbn AS (SELECT ie.instance_hrid, NULLIF(LEFT(RIGHT(REGEXP_REPLACE(ii.identifier, ' .*', ''), 10), 9), ':') AS isbn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'ISBN' AND ii.identifier NOT SIMILAR TO '(:|$)%' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), isbn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'isbn' AS match_type FROM isbn l JOIN isbn r ON l.isbn = r.isbn AND l.instance_hrid < r.instance_hrid), lccn AS (SELECT ie.instance_hrid, ii.identifier AS lccn, ie.title, sm.content AS author FROM folio_reporting.instance_ext ie JOIN folio_reporting.instance_identifiers ii ON ie.instance_hrid = ii.instance_hrid JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE ii.identifier_type_name = 'LCCN' AND sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), lccn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'lccn' AS match_type FROM lccn l JOIN lccn r ON l.lccn = r.lccn AND l.instance_hrid < r.instance_hrid), issn_with_title AS (SELECT ie.instance_hrid, sm.content AS issn, ie.title FROM folio_reporting.instance_ext ie JOIN public.srs_marctab sm ON ie.instance_hrid = sm.instance_hrid WHERE field = '022' AND ord = 1 AND sf = 'a'), issn AS (SELECT issnwt.instance_hrid, issnwt.issn, issnwt.title, sm.content AS author FROM issn_with_title issnwt JOIN public.srs_marctab sm ON issnwt.instance_hrid = sm.instance_hrid WHERE sm.field = '100' AND sm.ord = 1 AND sm.sf = 'a'), issn_matches AS (SELECT l.instance_hrid AS hrid, r.instance_hrid AS hrid2, l.title AS title, r.title AS title2, l.author AS author, r.author AS author2, 'issn' AS match_type FROM issn l JOIN issn r ON l.issn = r.issn AND l.instance_hrid < r.instance_hrid), all_matches AS (SELECT hrid, hrid2, title, title2, author, author2, match_type FROM call_number_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM oclc_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM isbn_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM lccn_matches UNION ALL SELECT hrid, hrid2, title, title2, author, author2, match_type FROM issn_matches) SELECT hrid AS HRID, hrid2 AS HRID2, MAX(CASE WHEN match_type = 'oclc' THEN 'T' END) AS OCLC, MAX(CASE WHEN match_type = 'isbn' THEN 'T' END) AS ISBN, MAX(CASE WHEN match_type = 'issn' THEN 'T' END) AS ISSN, MAX(CASE WHEN match_type = 'call_number' THEN 'T' END) AS CALL_NUMBER, MAX(CASE WHEN match_type = 'lccn' THEN 'T' END) AS LCCN, '\"' || REPLACE(title, '\"', '\"\"') || '\"' AS TITLE, '\"' || REPLACE(title2, '\"', '\"\"') || '\"' AS TITLE2, '\"' || REPLACE(author, '\"', '\"\"') || '\"' AS AUTHOR, '\"' || REPLACE(author2, '\"', '\"\"') || '\"' AS AUTHOR2 FROM all_matches GROUP BY hrid, hrid2, title, title2, author, author2",
"asyncBefore": true
}
1 change: 0 additions & 1 deletion duplicate-instance-report/workflow.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
"{{{mod-workflow}}}/moveToNode/408a6624-8be4-4bdf-8688-75c23a730187",
"{{{mod-workflow}}}/moveToNode/3b2668b9-58aa-447e-b907-40cee663a3ea",
"{{{mod-workflow}}}/moveToNode/b4e72925-19a4-47ed-8f31-e32cf8905123",
"{{{mod-workflow}}}/moveToNode/42c50baa-7d73-48ad-bd9b-53ffd6cd6eda",
"{{{mod-workflow}}}/databaseQueryTask/f548dd78-4cf5-4eb9-9b28-e4738470d44b",
"{{{mod-workflow}}}/compressFileTask/e14e6cfc-b069-4f2f-8903-d94714fd2574",
"{{{mod-workflow}}}/emailTask/da165c76-b891-4d01-9fbb-f8da1b1a6d12",
Expand Down

0 comments on commit 9d5fb40

Please sign in to comment.