From 2fca8ce004cc4bff85d3ca19cebf5c4306845fe8 Mon Sep 17 00:00:00 2001 From: Meti Bayissa <53035177+Meti-Adane@users.noreply.github.com> Date: Wed, 11 Dec 2024 15:00:57 +0100 Subject: [PATCH] [GSOC'24 Amharic chapter] Add Amharic configurations for duration parser (#764) * Added Ontology Datatypes for Amharic * Added amharic configurations and tests for duration parser. FYI: running the tests currently MIGHT fail due to a bug in the duration parser that drops non latin characters as mentioned in Issue 760. A quick yet not so inefficient fix could be updating the regex here with `replaceAll(s"""[^\'\"$timeUnitsRegex]""", ""` --------- Co-authored-by: Ted Thibodeau Jr --- .../dataparser/DurationParserConfig.scala | 53 ++++++++++++++++++- .../ontology/OntologyDatatypes.scala | 14 ++--- .../dataparser/DurationParserTest.scala | 21 ++++++++ 3 files changed, 80 insertions(+), 8 deletions(-) diff --git a/core/src/main/scala/org/dbpedia/extraction/config/dataparser/DurationParserConfig.scala b/core/src/main/scala/org/dbpedia/extraction/config/dataparser/DurationParserConfig.scala index f91458058c..3b58cf3346 100644 --- a/core/src/main/scala/org/dbpedia/extraction/config/dataparser/DurationParserConfig.scala +++ b/core/src/main/scala/org/dbpedia/extraction/config/dataparser/DurationParserConfig.scala @@ -39,8 +39,59 @@ object DurationParserConfig "years" -> "year", "yr" -> "year" ), + "am" -> Map( + "second" -> "second", + "s" -> "second", + "sec" -> "second", + "seconds" -> "second", + "secs" -> "second", + "\"" -> "second", + "ሰከንድ" -> "second", + "ሴኮንድ" -> "second", + "ሴኮንዶች" -> "second", + "minute" -> "minute", + "m" -> "minute", + "min" -> "minute", + "minutes" -> "minute", + "min." -> "minute", + "mins" -> "minute", + "minu" -> "minute", + "'" -> "minute", + "ደቂቃ" -> "minute", + "ደቂቃዎች" -> "minute", + "hour" -> "hour", + "h" -> "hour", + "hours" -> "hour", + "hr" -> "hour", + "hr." -> "hour", + "hrs" -> "hour", + "hrs." -> "hour", + "ሰአት" -> "hour", + "ሰዓታት" -> "hour", + "ሰዓት" -> "hour", + "day" -> "day", + "d" -> "day", + "d." -> "day", + "days" -> "day", + "ቀን" -> "day", + "ቀናት" -> "day", + "ቀኖች" -> "day", + "ቀናቶች" -> "day", + "month" -> "month", + "months" -> "month", + "ወር" -> "month", + "ወራት" -> "month", + "ወሮች" -> "month", + "year" -> "year", + "y" -> "year", + "years" -> "year", + "yr" -> "year", + "አመት" -> "year", + "ዓመት" -> "year", + "ዓመታት" -> "year" + ), // For "ar" configuration, rendering right-to-left may seems like a bug, but it's not. - // Don't change this else if you know how it is done. + // Don't change this unless you know how it works. "ar" -> Map( "ثانية" -> "second", "ثا" -> "second", diff --git a/core/src/main/scala/org/dbpedia/extraction/ontology/OntologyDatatypes.scala b/core/src/main/scala/org/dbpedia/extraction/ontology/OntologyDatatypes.scala index 67e23d7b1f..4c4cca654b 100644 --- a/core/src/main/scala/org/dbpedia/extraction/ontology/OntologyDatatypes.scala +++ b/core/src/main/scala/org/dbpedia/extraction/ontology/OntologyDatatypes.scala @@ -458,9 +458,9 @@ object OntologyDatatypes types :::= builder.build builder.addDimension("Speed"); - builder.addUnit(new StandardUnitDatatype("kilometrePerHour", Set("km/h","kmh","kilometre per hour",/*el*/ "χιλιόμετρα ανά ώρα"))); + builder.addUnit(new StandardUnitDatatype("kilometrePerHour", Set("km/h","kmh","kilometre per hour",/*el*/ "χιλιόμετρα ανά ώρα", /*am*/ "ኪሎሜትር በሰዓት"))); builder.addUnit(new FactorUnitDatatype("metrePerSecond", Set("m/s","ms","metre per second"), 3.6)); - builder.addUnit(new FactorUnitDatatype("kilometrePerSecond", Set("km/s","kilometre per second"), 3600.0)); + builder.addUnit(new FactorUnitDatatype("kilometrePerSecond", Set("km/s","kilometre per second", /*am*/ "ኪሎሜትር በሰከንድ"), 3600.0)); builder.addUnit(new FactorUnitDatatype("milePerHour", Set("mph","mi/h","mile per hour"), 1.60934)); builder.addUnit(new FactorUnitDatatype("footPerSecond", Set("ft/s","foot per second"), 0.0003048333333)); builder.addUnit(new FactorUnitDatatype("footPerMinute", Set("ft/min","foot per minute"), 0.01829)); @@ -469,21 +469,21 @@ object OntologyDatatypes builder.addDimension("Temperature"); builder.addUnit(new StandardUnitDatatype("kelvin", Set("K","kelvin",/*el*/ "Κ","κέλβιν"))); - builder.addUnit(new FactorUnitDatatype("degreeCelsius", Set("°C","degree celsius","C","Celsius",/*el*/ "βαθμοί"), 1.0, 273.15)); + builder.addUnit(new FactorUnitDatatype("degreeCelsius", Set("°C","degree celsius","C","Celsius",/*el*/ "βαθμοί", /*am*/ "ሴልሲየስ"), 1.0, 273.15)); builder.addUnit(new FactorUnitDatatype("degreeFahrenheit", Set("°F","F","Fahrenheit","degree fahrenheit"), 5.0 / 9.0, 459.67)); builder.addUnit(new FactorUnitDatatype("degreeRankine", Set("°R","R","degree rankine"), 5.0 / 9.0, 0)); types :::= builder.build builder.addDimension("Time"); - builder.addUnit(new StandardUnitDatatype("second", Set("s","sec","secs","second","seconds",/*el*/ "δ","δευτερόλεπτα"))); + builder.addUnit(new StandardUnitDatatype("second", Set("s","sec","secs","second","seconds",/*el*/ "δ","δευτερόλεπτα", /*am*/ "ሴኮንድ"))); //Add commonly used fractions of second builder.addUnit(new FactorUnitDatatype("millisecond", Set("ms","millisecond","milliseconds"), 0.001)); builder.addUnit(new FactorUnitDatatype("microsecond", Set("µs","microsecond","microseconds"), 1.0E-6)); builder.addUnit(new FactorUnitDatatype("nanosecond", Set("ns","nanosecond","nanoseconds"), 1.0E-9)); // end of fractions - builder.addUnit(new FactorUnitDatatype("minute", Set("m","min","min.","mins","minute","minutes",/*el*/ "λ","λεπτά"), 60.0)); - builder.addUnit(new FactorUnitDatatype("hour", Set("h","hr","hr.","hour","hours","std",/*el*/ "ω","ώρες","ώρα"), 3600.0)); - builder.addUnit(new FactorUnitDatatype("day", Set("d","days","day",/*el*/ "μ","μέρα","μέρες"), 86400.0)); + builder.addUnit(new FactorUnitDatatype("minute", Set("m","min","min.","mins","minute","minutes",/*el*/ "λ","λεπτά", /*am*/ "ደቂቃ", "ደቂቃዎች"), 60.0)); + builder.addUnit(new FactorUnitDatatype("hour", Set("h","hr","hr.","hour","hours","std",/*el*/ "ω","ώρες","ώρα", /*am*/ "ሰዓት", "ሰዓታት"), 3600.0)); + builder.addUnit(new FactorUnitDatatype("day", Set("d","days","day",/*el*/ "μ","μέρα","μέρες", /*am*/ "ቀን", "ቀናት", "ቀኖች", "ቀናቶች"), 86400.0)); types :::= builder.build builder.addDimension("Torque"); diff --git a/core/src/test/scala/org/dbpedia/extraction/dataparser/DurationParserTest.scala b/core/src/test/scala/org/dbpedia/extraction/dataparser/DurationParserTest.scala index 5bff1a21be..c494a18e1d 100644 --- a/core/src/test/scala/org/dbpedia/extraction/dataparser/DurationParserTest.scala +++ b/core/src/test/scala/org/dbpedia/extraction/dataparser/DurationParserTest.scala @@ -138,6 +138,27 @@ class DurationParserTest extends FlatSpec with Matchers parse("en", "Time", "2002-present") should equal (None) } + + // Test for Amharic + "DurationParser" should "return 5 seconds for '5 ሴኮንድ'" in + { + parse("am", "Time", "5 ሴኮንድ").get should equal (5.0) + } + "DurationParser" should "return 10 minutes for 'context 10 ደቂቃዎች context'" in + { + parse("am", "Time", "context 10 ደቂቃዎች context").get should equal (600.0) + } + "DurationParser" should "return 14820 seconds for 'context 4 ሰዓት, 7 ደቂቃ context'" in + { + parse("am", "Time", "context 4 ሰዓት, 7 ደቂቃ context").get should equal (14820.0) + } + + "DurationParser" should "return 857752817 seconds for '27 ዓመታት, 2 ወራት, 5 ቀናት, 1 ሰዓት 40 ደቂቃ, 17 ሴኮንድ '" in + { + parse("am", "Time", "27 ዓመታት, 2 ወራት, 5 ቀናት, 1 ሰዓት 40 ደቂቃ, 17 ሴኮንድ ").get should be (approximatelyEqualTo(857752817.0)) + } + + /** * Matcher to test if 2 values are approximately equal. */