From f57dd9fe500ca353c0d4a42a4b56a7b4328bf07b Mon Sep 17 00:00:00 2001 From: Steve Lawrence Date: Wed, 15 Apr 2020 13:40:25 -0400 Subject: [PATCH 1/5] Organize project into standard Daffodil test layout No functional changes here, just moving files around - Allows for packaging the schema into a jar - Adds tdml file to allow quick testing by running "sbt test" --- .gitignore | 1 + build.sbt | 16 ++++++++++ project/build.properties | 1 + .../praattextgrid/PraatTextGrid.dfdl.xsd | 2 +- .../praattextgrid}/built-in-formats.xsd | 0 .../examples}/ekskfk_miski_1.TextGrid | 0 .../examples/ekskfk_miski_1.TextGrid.xml | 2 +- src/test/resources/textgrid.tdml | 30 +++++++++++++++++++ src/test/scala/TestPraatTextGrid.scala | 20 +++++++++++++ 9 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 .gitignore create mode 100644 build.sbt create mode 100644 project/build.properties rename PraatTextGrid.dfdl.xsd => src/main/resources/com/github/keeleleek/praattextgrid/PraatTextGrid.dfdl.xsd (99%) rename {lib => src/main/resources/com/github/keeleleek/praattextgrid}/built-in-formats.xsd (100%) rename {examples => src/test/resources/examples}/ekskfk_miski_1.TextGrid (100%) rename examples/ekskfk_miski_1.tdml => src/test/resources/examples/ekskfk_miski_1.TextGrid.xml (97%) create mode 100644 src/test/resources/textgrid.tdml create mode 100644 src/test/scala/TestPraatTextGrid.scala diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb5a316 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +target diff --git a/build.sbt b/build.sbt new file mode 100644 index 0000000..a54d7ac --- /dev/null +++ b/build.sbt @@ -0,0 +1,16 @@ +name := "dfdl-praat-textgrid" + +organization := "com.github.keeleleek" + +version := "0.0.1-SNAPSHOT" + +scalaVersion := "2.12.11" + +libraryDependencies := Seq( + "org.apache.daffodil" %% "daffodil-tdml-processor" % "2.5.0" % "test", + "com.novocode" % "junit-interface" % "0.11" % "test", +) + +testOptions += Tests.Argument(TestFrameworks.JUnit, "-v") + +crossPaths := false diff --git a/project/build.properties b/project/build.properties new file mode 100644 index 0000000..06703e3 --- /dev/null +++ b/project/build.properties @@ -0,0 +1 @@ +sbt.version=1.3.9 diff --git a/PraatTextGrid.dfdl.xsd b/src/main/resources/com/github/keeleleek/praattextgrid/PraatTextGrid.dfdl.xsd similarity index 99% rename from PraatTextGrid.dfdl.xsd rename to src/main/resources/com/github/keeleleek/praattextgrid/PraatTextGrid.dfdl.xsd index f6f69c0..2ca56b9 100644 --- a/PraatTextGrid.dfdl.xsd +++ b/src/main/resources/com/github/keeleleek/praattextgrid/PraatTextGrid.dfdl.xsd @@ -7,7 +7,7 @@ elementFormDefault="qualified" attributeFormDefault="unqualified"> - + diff --git a/lib/built-in-formats.xsd b/src/main/resources/com/github/keeleleek/praattextgrid/built-in-formats.xsd similarity index 100% rename from lib/built-in-formats.xsd rename to src/main/resources/com/github/keeleleek/praattextgrid/built-in-formats.xsd diff --git a/examples/ekskfk_miski_1.TextGrid b/src/test/resources/examples/ekskfk_miski_1.TextGrid similarity index 100% rename from examples/ekskfk_miski_1.TextGrid rename to src/test/resources/examples/ekskfk_miski_1.TextGrid diff --git a/examples/ekskfk_miski_1.tdml b/src/test/resources/examples/ekskfk_miski_1.TextGrid.xml similarity index 97% rename from examples/ekskfk_miski_1.tdml rename to src/test/resources/examples/ekskfk_miski_1.TextGrid.xml index 82ca7fe..53723f7 100644 --- a/examples/ekskfk_miski_1.tdml +++ b/src/test/resources/examples/ekskfk_miski_1.TextGrid.xml @@ -707,4 +707,4 @@ - + diff --git a/src/test/resources/textgrid.tdml b/src/test/resources/textgrid.tdml new file mode 100644 index 0000000..d320a3e --- /dev/null +++ b/src/test/resources/textgrid.tdml @@ -0,0 +1,30 @@ + + + + + + + + encodingErrorPolicyError + + + + + + + examples/ekskfk_miski_1.TextGrid + + + examples/ekskfk_miski_1.TextGrid.xml + + + + diff --git a/src/test/scala/TestPraatTextGrid.scala b/src/test/scala/TestPraatTextGrid.scala new file mode 100644 index 0000000..1e1db96 --- /dev/null +++ b/src/test/scala/TestPraatTextGrid.scala @@ -0,0 +1,20 @@ +package com.gitub.keeleleek.praattextgrid + +import org.apache.daffodil.tdml.Runner +import org.junit.Test +import org.junit.AfterClass + +object TestPraatTextGrid { + lazy val runner = new Runner("/", "textgrid.tdml") + + @AfterClass def shutDown { + runner.reset + } +} + +class TestPraatTextGrid { + import TestPraatTextGrid._ + + @Test def test_ekskfk_miski_1() { runner.runOneTest("ekskfk_miski_1") } + +} From 6daffa5d4c9820f511c6ac7a7a7cded3fffdfd8a Mon Sep 17 00:00:00 2001 From: Steve Lawrence Date: Wed, 15 Apr 2020 13:50:38 -0400 Subject: [PATCH 2/5] Minor updates to schema and expected infoset - Remove elementFormDefault, this property is ignored - Increase the decimal precision of the textNumberPattern. This ensures that when we unparse numbers that no precision is lost - The text elements are nillable when their value is the empty string. Updates the expected infoset so that empty text elements are nilled instead of just being empty --- .../praattextgrid/PraatTextGrid.dfdl.xsd | 3 +-- .../praattextgrid/built-in-formats.xsd | 2 +- .../examples/ekskfk_miski_1.TextGrid.xml | 20 +++++++++---------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/main/resources/com/github/keeleleek/praattextgrid/PraatTextGrid.dfdl.xsd b/src/main/resources/com/github/keeleleek/praattextgrid/PraatTextGrid.dfdl.xsd index 2ca56b9..9c0d010 100644 --- a/src/main/resources/com/github/keeleleek/praattextgrid/PraatTextGrid.dfdl.xsd +++ b/src/main/resources/com/github/keeleleek/praattextgrid/PraatTextGrid.dfdl.xsd @@ -4,8 +4,7 @@ xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/" xmlns:ex="http://example.com" targetNamespace="http://example.com" - elementFormDefault="qualified" - attributeFormDefault="unqualified"> + elementFormDefault="qualified"> diff --git a/src/main/resources/com/github/keeleleek/praattextgrid/built-in-formats.xsd b/src/main/resources/com/github/keeleleek/praattextgrid/built-in-formats.xsd index a6168a2..430ebe9 100644 --- a/src/main/resources/com/github/keeleleek/praattextgrid/built-in-formats.xsd +++ b/src/main/resources/com/github/keeleleek/praattextgrid/built-in-formats.xsd @@ -30,7 +30,7 @@ utf16Width="fixed" textStandardDecimalSeparator="." textStandardGroupingSeparator="," textStandardExponentRep="E" textStandardZeroRep="0" textStandardInfinityRep="Inf" - textStandardNaNRep="NaN" textNumberPattern="#,##0.###;-#,##0.###" + textStandardNaNRep="NaN" textNumberPattern="#,##0.###############;-#,##0.###############" textNumberRounding="explicit" textNumberRoundingMode="roundUnnecessary" textNumberRoundingIncrement="0" decimalSigned="yes"/> diff --git a/src/test/resources/examples/ekskfk_miski_1.TextGrid.xml b/src/test/resources/examples/ekskfk_miski_1.TextGrid.xml index 53723f7..548513a 100644 --- a/src/test/resources/examples/ekskfk_miski_1.TextGrid.xml +++ b/src/test/resources/examples/ekskfk_miski_1.TextGrid.xml @@ -1,4 +1,4 @@ - + ooTextFile TextGrid 0.0 @@ -82,7 +82,7 @@ 2 0.061199346418562 0.40788261011471 - + 3 @@ -260,7 +260,7 @@ 2 0.061199346418562 0.40788261011471 - + 3 @@ -438,7 +438,7 @@ 2 0.061199346418562 0.40788261011471 - + 3 @@ -526,7 +526,7 @@ 2 0.061199346418562 0.40788261011471 - + 3 @@ -578,7 +578,7 @@ 2 0.061199346418562 0.40788261011471 - + 3 @@ -658,7 +658,7 @@ 1 0.0 2.0 - + @@ -674,7 +674,7 @@ 1 0.0 0.061199346418562 - + 2 @@ -686,7 +686,7 @@ 3 0.40788261011471 2.0 - + @@ -702,7 +702,7 @@ 1 0.0 2.0 - + From 7d2806909f3e3e756076badfdff45f9236ca2eb6 Mon Sep 17 00:00:00 2001 From: Steve Lawrence Date: Wed, 15 Apr 2020 14:32:41 -0400 Subject: [PATCH 3/5] Update README - Update example infoset for nillable empty text - Use Apache Daffodil website instead of old NCSA daffodil - Fix paths based on new layout --- README.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 62fe924..2e1a480 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ The current logical structure of the XML schema of TextGrid files is pictured be 1 0 2 - + @@ -63,11 +63,13 @@ The example data originates from the [Phonetic Corpus of Estonian Spontaneous Sp ## Creating XML from Praat TextGrid files -The DFDL schema has been developed and tested using the open source tool [Daffodil](https://opensource.ncsa.illinois.edu/confluence/display/DFDL). +The DFDL schema has been developed and tested using the open source tool [Apache Daffodil](https://daffodil.apache.org). Parsing the example TextGrid file. ```shell -$ ../bin/daffodil parse --schema ./PraatTextGrid.dfdl.xsd ./examples/ekskfk_miski_1.TextGrid +$ daffodil parse -TsuppressSchemaDefinitionWarnings=encodingErrorPolicyError \ + --schema src/main/resources/com/github/keeleleek/praattextgrid/PraatTextGrid.dfdl.xsd \ + src/test/resources/examples/ekskfk_miski_1.TextGrid ``` ## Creating Praat TextGrid files from XML @@ -75,5 +77,7 @@ $ ../bin/daffodil parse --schema ./PraatTextGrid.dfdl.xsd ./examples/ekskfk_misk Un-parsing the parsed example XML infoset back to TextGrid text file. ```shell -$ ../bin/daffodil unparse --schema ./PraatTextGrid.dfdl.xsd ./examples/ekskfk_miski_1.tdml +$ daffodil unparse -TsuppressSchemaDefinitionWarnings=encodingErrorPolicyError \ + --schema src/main/resources/com/github/keeleleek/praattextgrid/PraatTextGrid.dfdl.xsd \ + src/test/resources/examples/ekskfk_miski_1.TextGrid.xml ``` From ce80d9948bf7e409df26fb795cafe9f9dcc2418e Mon Sep 17 00:00:00 2001 From: Steve Lawrence Date: Tue, 26 May 2020 12:30:34 -0400 Subject: [PATCH 4/5] Use ++= for libraryDependencies, depend on JUnit 4.12 --- build.sbt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index a54d7ac..70ce7b8 100644 --- a/build.sbt +++ b/build.sbt @@ -6,9 +6,10 @@ version := "0.0.1-SNAPSHOT" scalaVersion := "2.12.11" -libraryDependencies := Seq( +libraryDependencies ++= Seq( "org.apache.daffodil" %% "daffodil-tdml-processor" % "2.5.0" % "test", "com.novocode" % "junit-interface" % "0.11" % "test", + "junit" % "junit" % "4.12" % "test", ) testOptions += Tests.Argument(TestFrameworks.JUnit, "-v") From 6b1437ec365ad2574f08fb960a0126c6da1f8246 Mon Sep 17 00:00:00 2001 From: Steve Lawrence Date: Wed, 3 Jun 2020 11:54:32 -0400 Subject: [PATCH 5/5] Update to latest Daffodil version --- build.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.sbt b/build.sbt index 70ce7b8..1634d9a 100644 --- a/build.sbt +++ b/build.sbt @@ -7,7 +7,7 @@ version := "0.0.1-SNAPSHOT" scalaVersion := "2.12.11" libraryDependencies ++= Seq( - "org.apache.daffodil" %% "daffodil-tdml-processor" % "2.5.0" % "test", + "org.apache.daffodil" %% "daffodil-tdml-processor" % "2.6.0" % "test", "com.novocode" % "junit-interface" % "0.11" % "test", "junit" % "junit" % "4.12" % "test", )