evolvedbinary · marmoure · Jan 22, 2026 · Jan 20, 2026 · Jan 22, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,20 @@
+name: CI
+on: [push, pull_request]
+jobs:
+  build:
+    name: Build
+    strategy:
+      fail-fast: false
+      matrix:
+        jdk: [ 21 ]
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up JDK ${{ matrix.jdk }}
+        uses: actions/setup-java@v4
+        with:
+          java-version: ${{ matrix.jdk }}
+          distribution: liberica
+          cache: maven
+      - name: Build with Maven
+        run: mvn -V -B package
diff --git a/src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.csvs b/src/main/resources/schemas/ADM_362-technical-acquisition-with-minimal-transcription.csvs
@@ -1,68 +1,68 @@
-version 1.0
-@totalColumns 42
-/*-------------------------------------------------------------------------------
-|Schema:   ADM_363-technical-acquisition-with-minimal-transcription.csvs        |
-|Authors:  Nicki Welch                                                          |
-|          David Underdown                                                      |
-|Purpose:  To capture metadata about the digitisation of the ADM 363 series     |
-|          Primarily technical metadata, but with a minimal amount of           |
-|          transcription to verify that the records may be publicly released    |
-|          after receipt by The National Archives                               |
-|Revision: 1.0 first release                                                    |
-|          1.1 update as some official numbers only single digit                |
-|          1.2 allow M as official number prefix too                            |
-|          1.3 further additions to prefixes, L, S, SS, SSX                     |
-|          1.4 allow for asterisk and ? in official number                      |
-|          1.5 further prefixes MX, KX, JX, and longer volume number            |
-|          1.6 add explicit check that checksum is not that for a 0 byte file   |
-|          1.7 Fix errors eg use correct not(), rather than isNot()             |
-|          1.8 Allow brackets etc in comments, range checking for birth year    |
-|              ???? for birth year                                              |
-|          1.9 Add piece check in ordinal: unique($piece,$item,$ordinal)        |
-|              Remove  and in($resource_uri) from item:                         |
-|              resource_uri, change starts(...) to                              |
-|              regex("...")                                                     |
-|          2.0 Allow LX as a prefix too                                         |
-|-------------------------------------------------------------------------------*/
-batch_code: length(10) regex("^ADM362B([0-9]{3})$")
-department: (is("ADM") if($file_path/notEmpty,in($file_path) and in($resource_uri)))
-series: is("362") and if($file_path/notEmpty,in($file_path) and in($resource_uri))
-piece: range(1,69720) if($file_path/notEmpty,in($file_path) and in($resource_uri))
-item: ((positiveInteger unique($piece,$item,$ordinal)) or empty) if($file_path/notEmpty,in($file_path))
-ordinal: if($item/empty,empty,unique($piece,$item,$ordinal))
-file_uuid: if($ordinal/empty,empty,uuid4 unique)
-file_path: uri if($ordinal/empty,empty,unique fileExists regex("^file:\/\/\/ADM_362\/[0-9]{1,5}\/[1-9][0-9]{0,4}\/[1-9][0-9]{0,4}_[0-9]{1,4}\.jp2$"))
-file_checksum: if($ordinal/empty,empty,not("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") and checksum(file($file_path),"SHA-256"))
-resource_uri: if($ordinal/notEmpty,uri and regex("^http://datagov.nationalarchives.gov.uk/66/ADM/362/[1-9][0-9]*/[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$"))
-scan_operator: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z]{1,12}$"))
-scan_id: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z_]{1,12}$"))
-scan_location: if($ordinal/empty,empty,regex("[-\w\s,]+"))
-scan_native_format: if($ordinal/empty,empty,regex("[0-9\w\s,.:]+"))
-scan_timestamp: if($ordinal/empty,empty,xDateTime)
-image_resolution: if($ordinal/empty,empty,is("300"))
-image_width: if($ordinal/empty,empty,positiveInteger)
-image_height: if($ordinal/empty,empty,positiveInteger)
-image_tonal_resolution: if($ordinal/empty,empty,is("24-bit colour"))
-image_format: if($ordinal/empty,empty,is("x-fmt/392"))
-image_colour_space: if($ordinal/empty,empty,is("sRGB"))
-process_location: if($ordinal/empty,empty,regex("[-\w\s,]+"))
-jp2_creation_timestamp: if($ordinal/empty,empty,xDateTime)
-uuid_timestamp: if($ordinal/empty,empty,xDateTime)
-embed_timestamp: if($ordinal/empty,empty,xDateTime)
-image_split: if($ordinal/empty,empty,is("yes") or is("no"))
-image_split_other_uuid: if($ordinal/empty,empty,if($image_split/is("yes"),uuid4,is("")))
-image_split_operator: if($ordinal/empty,empty,if($image_split/is("yes"),length(1,12) and regex("^[0-9a-zA-Z]{1,12}$"),is("")))
-image_split_timestamp: if($ordinal/empty,empty,if($image_split/is("yes"),xDateTime,is("")))
-image_crop: if($ordinal/empty,empty,is("auto") or is("manual") or is("none"))
-image_crop_operator: if($ordinal/empty,empty,if($image_split/is("manual"),length(1,12) and regex("^[0-9a-zA-Z]{1,12}$"),is("")))
-image_crop_timestamp: if($ordinal/empty,empty,if($image_crop/is("none"),empty,xDateTime))
-image_deskew: if($ordinal/empty,empty,is("yes") or is("no"))
-image_deskew_operator: if($ordinal/empty,empty,if($image_deskew/is("yes"),regex("^[0-9a-zA-Z]{1,12}$"),is("")))
-image_deskew_timestamp: if($ordinal/empty,empty,if($image_deskew/is("yes"),xDateTime,is("")))
-QA-code: regex("^[0-9/,]{1,2}$") @optional
-comments: regex("[-\w\s,\.\(\)\/'":\?]+") @optional
-transcribed_volume_number: if($item/empty,regex("[0-9A-Z\-\s]{1,19}"),is(""))
-transcribed_birth_date_day:  if(($ordinal/empty and $item/notEmpty),regex("^\*|([0\?][1-9\?])|([1-2\?][0-9\?])|([3\?][0-1\?])$"),is("")) 
-transcribed_birth_date_month: if(($ordinal/empty and $item/notEmpty),is("*") or is("?") or is("January") or is("February") or is("March") or is("April") or is("May") or is("June") or is("July") or is("August") or is("September") or is("October") or is("November") or is("December"), is(""))
-transcribed_birth_date_year: if(($ordinal/empty and $item/notEmpty),if(positiveInteger,range(1850,1914),regex("^1[7-9][0-9\?]{2}|\*|\?{4}$")),is(""))
+version 1.0
+@totalColumns 42
+/*-------------------------------------------------------------------------------
+|Schema:   ADM_363-technical-acquisition-with-minimal-transcription.csvs        |
+|Authors:  Nicki Welch                                                          |
+|          David Underdown                                                      |
+|Purpose:  To capture metadata about the digitisation of the ADM 363 series     |
+|          Primarily technical metadata, but with a minimal amount of           |
+|          transcription to verify that the records may be publicly released    |
+|          after receipt by The National Archives                               |
+|Revision: 1.0 first release                                                    |
+|          1.1 update as some official numbers only single digit                |
+|          1.2 allow M as official number prefix too                            |
+|          1.3 further additions to prefixes, L, S, SS, SSX                     |
+|          1.4 allow for asterisk and ? in official number                      |
+|          1.5 further prefixes MX, KX, JX, and longer volume number            |
+|          1.6 add explicit check that checksum is not that for a 0 byte file   |
+|          1.7 Fix errors eg use correct not(), rather than isNot()             |
+|          1.8 Allow brackets etc in comments, range checking for birth year    |
+|              ???? for birth year                                              |
+|          1.9 Add piece check in ordinal: unique($piece,$item,$ordinal)        |
+|              Remove  and in($resource_uri) from item:                         |
+|              resource_uri, change starts(...) to                              |
+|              regex("...")                                                     |
+|          2.0 Allow LX as a prefix too                                         |
+|-------------------------------------------------------------------------------*/
+batch_code: length(10) regex("^ADM362B([0-9]{3})$")
+department: (is("ADM") if($file_path/notEmpty,in($file_path) and in($resource_uri)))
+series: is("362") and if($file_path/notEmpty,in($file_path) and in($resource_uri))
+piece: range(1,69720) if($file_path/notEmpty,in($file_path) and in($resource_uri))
+item: ((positiveInteger unique($piece,$item,$ordinal)) or empty) if($file_path/notEmpty,in($file_path))
+ordinal: if($item/empty,empty,unique($piece,$item,$ordinal))
+file_uuid: if($ordinal/empty,empty,uuid4 unique)
+file_path: uri if($ordinal/empty,empty,unique fileExists regex("^file:\/\/\/ADM_362\/[0-9]{1,5}\/[1-9][0-9]{0,4}\/[1-9][0-9]{0,4}_[0-9]{1,4}\.jp2$"))
+file_checksum: if($ordinal/empty,empty,not("e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") and checksum(file($file_path),"SHA-256"))
+resource_uri: if($ordinal/notEmpty,uri and regex("^http://datagov.nationalarchives.gov.uk/66/ADM/362/[1-9][0-9]*/[a-f0-9]{8}-[a-f0-9]{4}-4[a-f0-9]{3}-[89ab][a-f0-9]{3}-[a-f0-9]{12}$"))
+scan_operator: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z]{1,12}$"))
+scan_id: if($ordinal/empty,empty,length(1,12) regex("^[0-9a-zA-Z_]{1,12}$"))
+scan_location: if($ordinal/empty,empty,regex("[-\w\s,]+"))
+scan_native_format: if($ordinal/empty,empty,regex("[0-9\w\s,.:]+"))
+scan_timestamp: if($ordinal/empty,empty,xDateTime)
+image_resolution: if($ordinal/empty,empty,is("300"))
+image_width: if($ordinal/empty,empty,positiveInteger)
+image_height: if($ordinal/empty,empty,positiveInteger)
+image_tonal_resolution: if($ordinal/empty,empty,is("24-bit colour"))
+image_format: if($ordinal/empty,empty,is("x-fmt/392"))
+image_colour_space: if($ordinal/empty,empty,is("sRGB"))
+process_location: if($ordinal/empty,empty,regex("[-\w\s,]+"))
+jp2_creation_timestamp: if($ordinal/empty,empty,xDateTime)
+uuid_timestamp: if($ordinal/empty,empty,xDateTime)
+embed_timestamp: if($ordinal/empty,empty,xDateTime)
+image_split: if($ordinal/empty,empty,is("yes") or is("no"))
+image_split_other_uuid: if($ordinal/empty,empty,if($image_split/is("yes"),uuid4,is("")))
+image_split_operator: if($ordinal/empty,empty,if($image_split/is("yes"),length(1,12) and regex("^[0-9a-zA-Z]{1,12}$"),is("")))
+image_split_timestamp: if($ordinal/empty,empty,if($image_split/is("yes"),xDateTime,is("")))
+image_crop: if($ordinal/empty,empty,is("auto") or is("manual") or is("none"))
+image_crop_operator: if($ordinal/empty,empty,if($image_split/is("manual"),length(1,12) and regex("^[0-9a-zA-Z]{1,12}$"),is("")))
+image_crop_timestamp: if($ordinal/empty,empty,if($image_crop/is("none"),empty,xDateTime))
+image_deskew: if($ordinal/empty,empty,is("yes") or is("no"))
+image_deskew_operator: if($ordinal/empty,empty,if($image_deskew/is("yes"),regex("^[0-9a-zA-Z]{1,12}$"),is("")))
+image_deskew_timestamp: if($ordinal/empty,empty,if($image_deskew/is("yes"),xDateTime,is("")))
+QA-code: regex("^[0-9/,]{1,2}$") @optional
+comments: regex("[-\w\s,\.\(\)\/'":\?]+") @optional
+transcribed_volume_number: if($item/empty,regex("[0-9A-Z\-\s]{1,19}"),is(""))
+transcribed_birth_date_day:  if(($ordinal/empty and $item/notEmpty),regex("^\*|([0\?][1-9\?])|([1-2\?][0-9\?])|([3\?][0-1\?])$"),is("")) 
+transcribed_birth_date_month: if(($ordinal/empty and $item/notEmpty),is("*") or is("?") or is("January") or is("February") or is("March") or is("April") or is("May") or is("June") or is("July") or is("August") or is("September") or is("October") or is("November") or is("December"), is(""))
+transcribed_birth_date_year: if(($ordinal/empty and $item/notEmpty),if(positiveInteger,range(1850,1914),regex("^1[7-9][0-9\?]{2}|\*|\?{4}$")),is(""))
 transcribed_official_number: if(($ordinal/empty and $item/notEmpty),regex("^(([CDP]\/)?([FJKLMS]|LX|MX|JX|KX|SS|SSX)[/?0-9]{1,6}|[/?1-9][/?0-9]{5}|\*)$"),is(""))
diff --git a/src/main/resources/schemas/concat.csvs b/src/main/resources/schemas/concat.csvs
@@ -1,5 +1,5 @@
-version 1.1
-@totalColumns 3
-c1:
-c2:
-c3: is(concat($c1,$c2))
+version 1.1
+@totalColumns 3
+c1:
+c2:
+c3: is(concat($c1,$c2))
diff --git a/src/main/resources/schemas/thunder-stone-sample-csvs.csvs b/src/main/resources/schemas/thunder-stone-sample-csvs.csvs
@@ -1,13 +1,13 @@
-database /tmp/testdb
-table  customer
-# indicate csv format with a delimiter of |
-csv |
-#       Name            Type            Tag
-field   CustID          varchar(10)     1
-field   Company         varchar(80)     2
-field   Address         varchar(80)     3
-field   City            varchar(20)     4
-field   State           varchar(10)     5
-field   Zip             varchar(10)     6
-field   Country         varchar(10)     7
+database /tmp/testdb
+table  customer
+# indicate csv format with a delimiter of |
+csv |
+#       Name            Type            Tag
+field   CustID          varchar(10)     1
+field   Company         varchar(80)     2
+field   Address         varchar(80)     3
+field   City            varchar(20)     4
+field   State           varchar(10)     5
+field   Zip             varchar(10)     6
+field   Country         varchar(10)     7
 field   Phone           varchar(20)     8
diff --git a/src/test/resources/mock-data/concat.csvs b/src/test/resources/mock-data/concat.csvs
@@ -1,5 +1,5 @@
-version 1.1
-@totalColumns 3
-c1:
-c2:
-c3: is(concat($c1,$c2))
+version 1.1
+@totalColumns 3
+c1:
+c2:
+c3: is(concat($c1,$c2))