Skip to content

Commit

Permalink
Greek transliteration enhancements (#3)
Browse files Browse the repository at this point in the history
* Enhancements:
- greek_transliteration can handle some punctuation marks

* Translitation function fixes, tests and readme
  • Loading branch information
sinnec committed Jan 4, 2023
1 parent 8033c94 commit d32c25e
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 10 deletions.
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,29 @@ False
# Both look the same but are different unnicode characters
>>> ANNA == ΑΝΝΑ
False
```

There's also the abillity to convert a word from latin to it's intended accentuated once in Greek:

```
>>> greek_transliteration("P;ita soybl;aki")
Πίτα σουβλάκι
>>> greek_transliteration("kaWiki")
καΐκι
>>> greek_transliteration("pro:yp;ouesh")
προϋπόθεση
>>> greek_transliteration("GA:IDAROS")
ΓΑΪΔΑΡΟΣ
```

Note: The function takes as given that the user intended to write the work in Greek using the correct key sequence but just didn't switch their keyboard to Greek. It doesn't convert from Greeklish!

```
# Wrong key sequence by user.
# They're supposed to press SHIFT + W and not just w for the ΅ character to appear.
>>> greek_transliteration("kawiki")
καςικι
```
32 changes: 26 additions & 6 deletions src/greeklt/greek_transliteration.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,30 @@
def greek_transliteration(string: str):
eng = "abcdefghijklmnoprstuvwxyzABCDEFGHIJKLMNOPRSTUVWXYZ"
el = "αβψδεφγηιξκλμνοπρστθωςχυζΑΒΨΔΕΦΓΗΙΞΚΛΜΝΟΠΡΣΤΘΩςΧΥΖ"
eng = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
el = "αβψδεφγηιξκλμνοπ;ρστθωςχυζΑΒΨΔΕΦΓΗΙΞΚΛΜΝΟΠ:ΡΣΤΘΩςΧΥΖ"
diaresis = {"i": "ϊ", "I": "Ϊ", "y": "ϋ", "Y": "Ϋ"}
accent = {"a": "ά", "e": "έ", "h": "ή", "i": "ί", "o": "ό", "y": "ύ", "v": "ώ"}
diaresis_accent = {"i": "ΐ", "y": "ΰ"}

new_string = ""
for i in string:
if i in eng:
new_string += el[eng.index(i)]
for i, letter in enumerate(string):
if letter in eng:
if i != len(string) - 1 and letter == "W":
if string[i + 1] in diaresis_accent.keys():
new_string += diaresis_accent[string[i + 1]]
else:
new_string += el[eng.index(letter)]
elif (string[i - 1] == "W" and letter in diaresis.keys()) or (
string[i - 1] in (";", ":")
and (letter in accent.keys() or letter in diaresis.keys())
):
continue
else:
new_string += el[eng.index(letter)]
elif letter in (";", ":"):
if letter == ";":
new_string += accent[string[i + 1]]
else:
new_string += diaresis[string[i + 1]]
else:
new_string += i
new_string += letter
return new_string
8 changes: 4 additions & 4 deletions tests/test_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ def test_remove_accentuation():
def test_transilteration():

assert greek_transliteration("fvtia") == "φωτια"
assert (
greek_transliteration("Gamow htane den htane na ginei")
== "Γαμος ητανε δεν ητανε να γινει"
)
assert greek_transliteration("kaWiki") == "καΐκι"
assert greek_transliteration("p;ita soybl;aki") == "πίτα σουβλάκι"
assert greek_transliteration("pro:yp;ouesh") == "προϋπόθεση"
assert greek_transliteration("GA:IDAROS") == "ΓΑΪΔΑΡΟΣ"


def test_convert_final_s():
Expand Down

0 comments on commit d32c25e

Please sign in to comment.