From e29df3bed3bd25d06b05cf374704f79e32c8076e Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Tue, 12 Mar 2024 14:17:02 +0100 Subject: [PATCH] Add RPPD export comparison workflow, tweak transformation (RPB-59) --- conf/rppd-to-lobid.fix | 12 ++++++----- conf/test-export-compare-rppd.flux | 32 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 conf/test-export-compare-rppd.flux diff --git a/conf/rppd-to-lobid.fix b/conf/rppd-to-lobid.fix index e9f0f5a..5b0e2ce 100644 --- a/conf/rppd-to-lobid.fix +++ b/conf/rppd-to-lobid.fix @@ -60,7 +60,7 @@ do once("map") end # Kommentar Doku: #14 "x" eintragen, falls der Datensatz nicht im Webopac erscheinen soll, z.B. noch nicht aufgearbeitete ps-Sätze -if exists("doNotIndex") +if all_equal("doNotIndex", "true") reject() end @@ -318,10 +318,12 @@ move_field("_temp", "gndSubjectCategory[]") #1ny (Datum der letzten inhaltlichen Änderung) -> describedBy.dateModified # Kommentar Doku: JJJJMMTT, z.B. 20120928 für 28.09.2012 -if any_match("dateModified", "(\\d{4})(\\d{2})(\\d{2})") - replace_all("dateModified", "(\\d{4})(\\d{2})(\\d{2})", "$1-$2-$3") - copy_field("dateModified", "describedBy.dateModified") -end +# Strapi: 2024-03-12T10:05:45.841Z -> 2024-03-12 +substring("updatedAt", "0", "10") +move_field("updatedAt", "dateModified") +# Allegro: "20240312" -> 2024-03-12 +replace_all("dateModified", "(\\d{4})(\\d{2})(\\d{2})", "$1-$2-$3") +copy_field("dateModified", "describedBy.dateModified") # ------- #1z1 (1. biogr. Anmerkung) -> biographicalOrHistoricalInformation diff --git a/conf/test-export-compare-rppd.flux b/conf/test-export-compare-rppd.flux new file mode 100644 index 0000000..44b4bde --- /dev/null +++ b/conf/test-export-compare-rppd.flux @@ -0,0 +1,32 @@ +// Get test data for the specified type; for each record, +// fetch the entry from Strapi, convert that to lobid, write. + +// sbt "runMain rpb.ETL conf/rppd-to-strapi.flux IN_FILE=RPB-Export_HBZ_Bio_Test.txt OUT_FILE=test-output-rppd.json" +// sbt -mem 2048 "runMain rpb.ETL conf/test-export-compare-rppd.flux" +FLUX_DIR + "output/test-output-rppd.json" +| open-file +| as-lines +| decode-json +| fix(" +prepend(rppdId, 'https://rpb-cms-test.lobid.org/api/persons?populate=*&filters[rppdId][$eq]=') +retain(rppdId) +") +| literal-to-object +| log-object("Strapi URL: ") +| open-http +| as-records +| decode-json(recordPath="data.[*].attributes") +| fix(FLUX_DIR + "rppd-to-lobid.fix") +| encode-json +| write(FLUX_DIR + "output/test-rppd-output-from-strapi.json") +; + +// To compare, convert test data directly to lobid, write. +FLUX_DIR + "output/test-output-rppd.json" +| open-file +| as-lines +| decode-json +| fix(FLUX_DIR + "rppd-to-lobid.fix") +| encode-json +| write(FLUX_DIR + "output/test-rppd-output-from-file.json") +;