Skip to content

Commit

Permalink
o ante e nimi ilo :(
Browse files Browse the repository at this point in the history
  • Loading branch information
gregdan3 committed May 2, 2024
1 parent bfcf315 commit afae096
Show file tree
Hide file tree
Showing 19 changed files with 47 additions and 44 deletions.
20 changes: 10 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# o toki pona
# sona toki

## What is __o toki pona__?
## What is **sona toki**?

This library, "Speak Toki Pona," helps you identify whether a message is in Toki Pona. No grammar checking, yet, which means this more checks whether a given message has enough Toki Pona words.
This library, "Language Knowledge," helps you identify whether a message is in Toki Pona. No grammar checking, yet, which means this more checks whether a given message has enough Toki Pona words.

I wrote it with a variety of scraps and lessons learned from a prior project, [ilo pi toki pona taso, "toki-pona-only tool"](https://github.com/gregdan3/ilo-pi-toki-pona-taso). That tool will be rewritten to use this library shortly.

Expand All @@ -16,24 +16,24 @@ Install with your preferred Python package manager. Example:

```sh
pdm init # if your pyproject.toml doesn't exist yet
pdm add otokipona
pdm add sonatoki
```

Then get started with a script along these lines:

```py
from otokipona.Filters import (
from sonatoki.Filters import (
Numerics,
Syllabic,
NimiLinku,
Alphabetic,
ProperName,
Punctuations,
)
from otokipona.Scorers import Scaling
from otokipona.Cleaners import ConsecutiveDuplicates
from otokipona.Tokenizers import word_tokenize_tok
from otokipona.Preprocessors import URLs, DiscordEmotes
from sonatoki.Scorers import Scaling
from sonatoki.Cleaners import ConsecutiveDuplicates
from sonatoki.Tokenizers import word_tokenize_tok
from sonatoki.Preprocessors import URLs, DiscordEmotes

def main():
ilo = Ilo(
Expand Down Expand Up @@ -67,4 +67,4 @@ The intent is to show our methodology to the Unicode Consortium, particularly to

After our proposal has been examined and a result given by the committee, I will translate this file and library into Toki Pona, with a note left behind for those who do not understand it.

### Why aren't any of the specific
### Why aren't any of the specific
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[project]
name = "otokipona"
name = "sonatoki"
version = "0.1.0"
description = "ilo li moku e toki li pana e sona ni: ni li toki ala toki pona?"
authors = [
Expand Down Expand Up @@ -76,9 +76,9 @@ src = ["src"]

[tool.coverage.run]
branch = true
source = ["src/otokipona/"]
source = ["src/sonatoki/"]
omit = [
"src/otokipona/__main__.py"
"src/sonatoki/__main__.py"
]

[tool.coverage.report]
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion src/otokipona/Filters.py → src/sonatoki/Filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from typing_extensions import override

# LOCAL
from otokipona.constants import (
from sonatoki.constants import (
VOWELS,
CONSONANTS,
NIMI_PU_SET,
Expand Down
File renamed without changes.
8 changes: 4 additions & 4 deletions src/otokipona/Scorers.py → src/sonatoki/Scorers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from typing_extensions import override

# LOCAL
from otokipona.Filters import Filter
from sonatoki.Filters import Filter

Number = Union[int, float]
Weights = Dict[str, Number]
Expand Down Expand Up @@ -83,6 +83,7 @@ def score(cls, tokens: List[str], filters: List[Type[Filter]]) -> Number:
total_score += cls.score_token(token, filters, len_filters)
return total_score / max_score if max_score else 0


class SoftScaling(Scaling):
"""Shorter messages are subject to less harsh scoring
by mapping the token count to [0.5, 1.0] via the sigmoid function,
Expand All @@ -92,7 +93,7 @@ class SoftScaling(Scaling):

@staticmethod
def sigmoid(n: int) -> Number:
return (1 / (1 + math.exp(-(0.30 * (n-1)) )))
return 1 / (1 + math.exp(-(0.30 * (n - 1))))
# n-1 makes sigmoid(1) == 0.5
# 0.30 softens scaling against input
# return n / (1+abs(n)) # too weak in 0.7+
Expand All @@ -112,11 +113,10 @@ def score(cls, tokens: List[str], filters: List[Type[Filter]]) -> Number:
total_score += cls.score_token(token, filters, len_filters)

percentage = total_score / max_score if max_score else 0
percentage **= cls.sigmoid(len_tokens)
percentage **= cls.sigmoid(len_tokens)
return percentage



class Logarithmic(Scorer): ...


Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
11 changes: 5 additions & 6 deletions src/otokipona/ilo.py → src/sonatoki/ilo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
from typing import List, Type

# LOCAL
from otokipona.Filters import Filter
from otokipona.Scorers import Number, Scorer
from otokipona.Cleaners import Cleaner
from otokipona.Tokenizers import Tokenizer
from otokipona.Preprocessors import Preprocessor
from sonatoki.Filters import Filter
from sonatoki.Scorers import Number, Scorer
from sonatoki.Cleaners import Cleaner
from sonatoki.Tokenizers import Tokenizer
from sonatoki.Preprocessors import Preprocessor


class Ilo:
Expand Down Expand Up @@ -98,5 +98,4 @@ def is_toki_pona(self, message: str) -> bool:
print("Cleaned: %s" % cleaned)
print()


return score >= self.__passing_score
File renamed without changes.
6 changes: 4 additions & 2 deletions tests/test_cleaners.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from hypothesis import assume, given, example, reproduce_failure

# LOCAL
from otokipona.Cleaners import ConsecutiveDuplicates
from sonatoki.Cleaners import ConsecutiveDuplicates

# FILESYSTEM
from .test_utils import overlapping_pairs
Expand All @@ -16,7 +16,9 @@
@example("muuuuuu")
@example("nnn")
@example("")
@example("manna") # syllabically valid, but not phonotactically valid; errantly matches phonotactic filter after this cleaner
@example(
"manna"
) # syllabically valid, but not phonotactically valid; errantly matches phonotactic filter after this cleaner
def test_ConsecutiveDuplicates(s: str):
_ = assume("\n" not in s)
res = ConsecutiveDuplicates.clean(s)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from hypothesis import HealthCheck, given, assume, example, settings

# LOCAL
from otokipona.Filters import (
from sonatoki.Filters import (
NimiPu,
Numerics,
Syllabic,
Expand All @@ -17,8 +17,8 @@
Phonotactic,
Punctuations,
)
from otokipona.Cleaners import ConsecutiveDuplicates
from otokipona.constants import NIMI_PU, NIMI_LINKU
from sonatoki.Cleaners import ConsecutiveDuplicates
from sonatoki.constants import NIMI_PU, NIMI_LINKU

# FILESYSTEM
from .test_utils import ALPHABETIC_RE, PROPER_NAME_RE
Expand Down
18 changes: 10 additions & 8 deletions tests/test_ilo.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# LOCAL
from otokipona.ilo import Ilo
from otokipona.Filters import (
from sonatoki.ilo import Ilo
from sonatoki.Filters import (
Numerics,
Syllabic,
NimiLinku,
Alphabetic,
ProperName,
Punctuations,
)
from otokipona.Scorers import Scaling, SoftScaling
from otokipona.Cleaners import ConsecutiveDuplicates
from otokipona.Tokenizers import word_tokenize_tok
from otokipona.Preprocessors import (
from sonatoki.Scorers import Scaling, SoftScaling
from sonatoki.Cleaners import ConsecutiveDuplicates
from sonatoki.Tokenizers import word_tokenize_tok
from sonatoki.Preprocessors import (
URLs,
DiscordEmotes,
DiscordSpecial,
Expand Down Expand Up @@ -47,5 +47,7 @@ def test_constructor():
assert ilo.is_toki_pona("ni li tptpt")

assert not ilo.is_toki_pona("I'm Trying To Evade The Filter")
assert not ilo.is_toki_pona("""aaa i non-saw usa's most multiple element-set
it's as asinine as in `e`-less speak""")
assert not ilo.is_toki_pona(
"""aaa i non-saw usa's most multiple element-set
it's as asinine as in `e`-less speak"""
)
2 changes: 1 addition & 1 deletion tests/test_preprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from hypothesis import given, example

# LOCAL
from otokipona.Preprocessors import (
from sonatoki.Preprocessors import (
URLs,
Spoilers,
Backticks,
Expand Down
4 changes: 2 additions & 2 deletions tests/test_scorers.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from hypothesis import given

# LOCAL
from otokipona.Filters import (
from sonatoki.Filters import (
Filter,
NimiPu,
Numerics,
Expand All @@ -18,7 +18,7 @@
Phonotactic,
Punctuations,
)
from otokipona.Scorers import Scorer, Scaling, PassFail, SoftScaling
from sonatoki.Scorers import Scorer, Scaling, PassFail, SoftScaling

# FILESYSTEM
from .test_utils import token_strategy
Expand Down
4 changes: 2 additions & 2 deletions tests/test_tokenize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest

# LOCAL
from otokipona.Tokenizers import (
from sonatoki.Tokenizers import (
sent_tokenize_re,
word_tokenize_re,
sent_tokenize_tok,
Expand All @@ -18,7 +18,7 @@
import nltk

# LOCAL
from otokipona.Tokenizers import sent_tokenize_nltk, word_tokenize_nltk
from sonatoki.Tokenizers import sent_tokenize_nltk, word_tokenize_nltk

except ImportError as e:
nltk = e
Expand Down
4 changes: 2 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import hypothesis.strategies as st

# LOCAL
from otokipona.Filters import Syllabic, Phonotactic
from otokipona.constants import ALPHABET, NIMI_LINKU_ALE
from sonatoki.Filters import Syllabic, Phonotactic
from sonatoki.constants import ALPHABET, NIMI_LINKU_ALE

PROPER_NAME_RE = r"[A-Z][a-z]*"
ALPHABETIC_RE = rf"[{ALPHABET}{ALPHABET.upper()}]+"
Expand Down

0 comments on commit afae096

Please sign in to comment.