Skip to content

Commit

Permalink
refactor: remove 'RuleBased' from the names of all transliterators
Browse files Browse the repository at this point in the history
  • Loading branch information
medavox committed Aug 31, 2019
1 parent c561098 commit 9dc7100
Show file tree
Hide file tree
Showing 20 changed files with 42 additions and 43 deletions.
34 changes: 17 additions & 17 deletions src/main/kotlin/com/github/medavox/ipa_transcribers/Language.kt
Original file line number Diff line number Diff line change
Expand Up @@ -67,21 +67,21 @@ prediction: fairly regular, but known to have lots of variants
//tamil -- Unique brahmic abugida

enum class Language(val neim:String, val transcriber: Transcriber) {
ARABIC("Arabic", ArabicRuleBased),
BENGALI("Bengali", BengaliRuleBased),
ENGLISH("English", EnglishRuleBased),
GERMAN("German", GermanRuleBased),
HINDI("Hindi", HindiRuleBased),
ITALIAN("Italian", ItalianRuleBased),
JAPANESE_LIMITED("Japanese/Katakana & Hiragana Only", JapaneseRuleBasedKatakanaHiraganaOnly),
KOREAN("Korean", KoreanRuleBased),
MARATHI("Marathi", MarathiRuleBased),
MALAY("Malay", MalayRuleBased),
PERSIAN("Persian", PersianRuleBased),
RUSSIAN("Russian",RussianRuleBased),
SPANISH_EUROPEAN("Spanish/European", SpanishPeninsularRuleBased),
SPANISH_AMERICAN("Spanish/PanAmerican", SpanishPanAmericanRuleBased),
SWAHILI("Swahili", SwahiliRuleBased),
TELUGU("Telugu", TeluguRuleBased),
TURKISH("Turkish", TurkishRuleBased)
ARABIC("Arabic", Arabic),
BENGALI("Bengali", Bengali),
ENGLISH("English", English),
GERMAN("German", German),
HINDI("Hindi", Hindi),
ITALIAN("Italian", Italian),
JAPANESE_LIMITED("Japanese/Katakana & Hiragana Only", JapaneseOnlyKatakanaHiragana),
KOREAN("Korean", Korean),
MARATHI("Marathi", Marathi),
MALAY("Malay", Malay),
PERSIAN("Persian", Persian),
RUSSIAN("Russian",Russian),
SPANISH_EUROPEAN("Spanish/European", SpanishPeninsular),
SPANISH_AMERICAN("Spanish/PanAmerican", SpanishPanAmerican),
SWAHILI("Swahili", Swahili),
TELUGU("Telugu", Telugu),
TURKISH("Turkish", Turkish)
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import com.github.medavox.ipa_transcribers.RuleBasedTranscriber
* given its standard spelling.
*
* But we can at least transcribe the information that arabic DOES provide.*/
object ArabicRuleBased : RuleBasedTranscriber() {
object Arabic : RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.IN_PROGRESS
val rules:List<Rule> = listOf<Rule>(
Rule("؀", ""),//NUMBER SIGN
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import com.github.medavox.ipa_transcribers.RuleBasedTranscriber
*
* Even so, this transcriber does the best job possible,
* given the information that *has* been recorded in the orthography.*/
object PersianRuleBased:RuleBasedTranscriber() {
object Persian:RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.IN_PROGRESS

/**Although persian letters have different representations whether they are initial, medial, or final --
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import com.github.medavox.ipa_transcribers.RuleBasedTranscriber
* Completion status: all purely sound-based characters have been added;
* however, there are a small number of 'functional' Hiragana & Katakana, which require more complex rules,
* that are yet to be done.*/
object JapaneseRuleBasedKatakanaHiraganaOnly:RuleBasedTranscriber() {
object JapaneseOnlyKatakanaHiragana:RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.INCOMPLETE
private val rules:List<Rule> = listOf(
//katakana
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import java.text.Normalizer
* https://linguistics.stackexchange.com/q/2805 describes the syllable-final assimilation rules
* [An independent reference wiki for the korean language](http://www.koreanwikiproject.com/wiki/index.php?title=IPA)
* */
object KoreanRuleBased: RuleBasedTranscriber() {
object Korean: RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.COMPLETE
private val vowels = "ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ"
private val rules:List<Rule> = listOf(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import com.github.medavox.ipa_transcribers.RuleBasedTranscriber
* So this may be as far as we can get for Russian.
*
* Also a static list of any exceptions.*/
object RussianRuleBased:RuleBasedTranscriber() {
object Russian:RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.IN_PROGRESS
//todo: implement vowel reduction in unstressed vowels
//TODO:voicing assimilation, and devoicing at end of word
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import com.github.medavox.ipa_transcribers.RuleBasedTranscriber
* meaning that pronunciation rules for single letters and simple digraphs have been implemented,
* but more complex linguistic rules not directly represented in the orthography are as-yet unimplemented.
**/
object BengaliRuleBased : RuleBasedTranscriber() {
object Bengali : RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.SURFACE_LEVEL_COMPLETE

fun l(it:String):String{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import com.github.medavox.ipa_transcribers.RuleBasedTranscriber
*
* The rules governing which schwas are dropped are fairly complex,
* and haven't been implemented yet.*/
object HindiRuleBased:RuleBasedTranscriber() {
object Hindi:RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.SURFACE_LEVEL_COMPLETE
private val consonants = "बभदधजझडढफ़गघग़हयकखलमनणञङपफक़रढ़सषशतथचछटठवख़ज़झ़"
private var lastWasConsonant:Boolean = false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ package com.github.medavox.ipa_transcribers.brahmic
import com.github.medavox.ipa_transcribers.CompletionStatus
import com.github.medavox.ipa_transcribers.RuleBasedTranscriber

object MarathiRuleBased:RuleBasedTranscriber() {
object Marathi:RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.INCOMPLETE

override fun transcribe(nativeText: String): String {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import com.github.medavox.ipa_transcribers.CompletionStatus
import com.github.medavox.ipa_transcribers.Rule
import com.github.medavox.ipa_transcribers.RuleBasedTranscriber

object TeluguRuleBased:RuleBasedTranscriber() {
object Telugu:RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.INCOMPLETE

private fun l(it:String):String{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import com.github.medavox.ipa_transcribers.RuleBasedTranscriber
* So here goes.
* Based on [work by Mark Rosenfelder](http://zompist.com/spell.html)
* See also more formal work by Edward Carney*/
object EnglishRuleBased:RuleBasedTranscriber() {
object English:RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.IN_PROGRESS
//todo:english: download Mellon Carnegie Uni pronouncing dictionary, check it has the 1000 most common english words
//fallback system: try MCU (which includes exceptions), then try composable morphemes, then mark rosenfelder's rules
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import com.github.medavox.ipa_transcribers.Rule
import com.github.medavox.ipa_transcribers.RuleBasedTranscriber

/**This transcriber follows pronunciation in Standard German, as spoken in Germany.*/
object GermanRuleBased: RuleBasedTranscriber() {
object German: RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.IN_PROGRESS

val vowels = "aeiouäöü"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import com.github.medavox.ipa_transcribers.RuleBasedTranscriber
*
* According to Wikipedia, Northern Italy is more densely populated than Southern Italy;
* therefore this transcriber will favour Northern pronunciations.*/
object ItalianRuleBased: RuleBasedTranscriber() {
object Italian: RuleBasedTranscriber() {
val alphabet = "abcdefghilmnopqrstuvz"
val vowels = "aeiou"
val voicedConsonants = "bdglmnrvz"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import com.github.medavox.ipa_transcribers.CompletionStatus
import com.github.medavox.ipa_transcribers.Rule
import com.github.medavox.ipa_transcribers.RuleBasedTranscriber

object MalayRuleBased:RuleBasedTranscriber() {
object Malay:RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.IN_PROGRESS
val rules:List<Rule> = listOf(
Rule("c", "t͡ʃ"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import com.github.medavox.ipa_transcribers.RuleBasedTranscriber
* * [Wikipedia:Spanish Orthography](https://en.wikipedia.org/wiki/Spanish_orthography)
* * [Wikipedia phonology of spanish](https://en.wikipedia.org/wiki/Spanish_language#Phonology)
* */
object SpanishPanAmericanRuleBased: RuleBasedTranscriber() {
object SpanishPanAmerican: RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.COMPLETE
//the 'transcripcon' problem - does the voicedness of n bleed over onto s AND c?
//todo: account for voicing assimilation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ package com.github.medavox.ipa_transcribers.latin
import com.github.medavox.ipa_transcribers.CompletionStatus
import com.github.medavox.ipa_transcribers.Rule
import com.github.medavox.ipa_transcribers.RuleBasedTranscriber
import com.github.medavox.ipa_transcribers.latin.SpanishPanAmericanRuleBased.voicedConsonants
import com.github.medavox.ipa_transcribers.latin.SpanishPanAmericanRuleBased.normaliseAccents
import com.github.medavox.ipa_transcribers.latin.SpanishPanAmericanRuleBased.removeStressAccents
import com.github.medavox.ipa_transcribers.latin.SpanishPanAmerican.voicedConsonants
import com.github.medavox.ipa_transcribers.latin.SpanishPanAmerican.normaliseAccents
import com.github.medavox.ipa_transcribers.latin.SpanishPanAmerican.removeStressAccents

object SpanishPeninsularRuleBased: RuleBasedTranscriber() {
object SpanishPeninsular: RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.COMPLETE
val rules:List<Rule> = SpanishPanAmericanRuleBased.rules.map{
val rules:List<Rule> = SpanishPanAmerican.rules.map{
when(it.unconsumedMatcher) {
Regex("c[ie]") -> it.copy(outputString = {"θ"})
Regex("z$voicedConsonants") -> it.copy(outputString = {"ð"})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import com.github.medavox.ipa_transcribers.CompletionStatus
import com.github.medavox.ipa_transcribers.Rule
import com.github.medavox.ipa_transcribers.RuleBasedTranscriber

object SwahiliRuleBased :RuleBasedTranscriber() {
object Swahili :RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.SURFACE_LEVEL_COMPLETE
val rules:List<Rule> = listOf(

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import com.github.medavox.ipa_transcribers.RuleBasedTranscriber
*
* Turkish spelling is extremely regular, because it systematically adopted the latin alphabet in 1929.
* thanks Atatürk!*/
object TurkishRuleBased: RuleBasedTranscriber() {
object Turkish: RuleBasedTranscriber() {
override val completionStatus: CompletionStatus = CompletionStatus.COMPLETE
val frontVowels = "eiöü"
val backVowels = "aâıouû"
Expand Down
4 changes: 2 additions & 2 deletions src/test/kotlin/BengaliTest.kt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import com.github.medavox.ipa_transcribers.brahmic.BengaliRuleBased
import com.github.medavox.ipa_transcribers.brahmic.Bengali
import org.junit.Test

class BengaliTest : CoverageTest() {
Expand Down Expand Up @@ -32,7 +32,7 @@ class BengaliTest : CoverageTest() {
fun characterCoverageTest() {
for(i in unicodeBlock) {
if(i !in excluded) {
codePointTest(i, BengaliRuleBased::transcribe)
codePointTest(i, Bengali::transcribe)
}
}
}
Expand Down
5 changes: 2 additions & 3 deletions src/test/kotlin/TeluguTest.kt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import com.github.medavox.ipa_transcribers.brahmic.TeluguRuleBased
import com.ibm.icu.lang.UCharacter
import com.github.medavox.ipa_transcribers.brahmic.Telugu
import org.junit.Test

class TeluguTest :CoverageTest() {
Expand All @@ -24,7 +23,7 @@ class TeluguTest :CoverageTest() {
fun characterCoverageTest() {
for(i in unicodeBlock) {
if (i !in excluded) {
codePointTest(i, TeluguRuleBased::transcribe)
codePointTest(i, Telugu::transcribe)
}
}
}
Expand Down

0 comments on commit 9dc7100

Please sign in to comment.