Skip to content

Commit

Permalink
sin: ilo o ken nasin e toki mute lon tenpo sama
Browse files Browse the repository at this point in the history
  • Loading branch information
gregdan3 committed Aug 16, 2024
1 parent 57870a6 commit 793f724
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 3 deletions.
61 changes: 59 additions & 2 deletions src/sonatoki/Scorers.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,64 @@ class SoftScaling(Soften, Scaling):
scoring."""


# class Logarithmic(Scorer): ...
class SentenceScorer(ABC):
@classmethod
@abstractmethod
def score(cls, scorecards: List[Scorecard]) -> List[Scorecard]:
"""Re-score a list of sentences (scorecards, sentences with all their
metadata) and return them."""
raise NotImplementedError


class SentNoOp(SentenceScorer):
@classmethod
@override
def score(cls, scorecards: List[Scorecard]) -> List[Scorecard]:
return scorecards


__all__ = ["PassFail", "SoftPassFail", "Scaling", "SoftScaling"]
class SentAvg(SentenceScorer):
@classmethod
@override
def score(cls, scorecards: List[Scorecard]) -> List[Scorecard]:
if not scorecards:
return scorecards

total = sum(card["score"] for card in scorecards)
avg = total / len(scorecards)
for card in scorecards:
card["score"] = avg
return scorecards


class SentWeightedAvg(SentenceScorer):
@classmethod
@override
def score(cls, scorecards: List[Scorecard]) -> List[Scorecard]:
if not scorecards:
return scorecards

weighted_total = 0
total_len = 0
for card in scorecards:
cardlen = len(card["cleaned"])
cardscore = card["score"]

weighted_total += cardlen * cardscore
total_len += cardlen

weighted_avg = weighted_total / total_len
for card in scorecards:
card["score"] = weighted_avg
return scorecards


__all__ = [
"PassFail",
"Scaling",
"SoftPassFail",
"SoftScaling",
"Soften",
"SentAvg",
"SentWeightedAvg",
]
9 changes: 8 additions & 1 deletion src/sonatoki/ilo.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# LOCAL
from sonatoki.types import Number, Scorecard
from sonatoki.Filters import Filter
from sonatoki.Scorers import Scorer
from sonatoki.Scorers import Scorer, SentNoOp, SentenceScorer
from sonatoki.Cleaners import Cleaner
from sonatoki.Tokenizers import Tokenizer, SentTokenizer, WordTokenizer
from sonatoki.Preprocessors import Preprocessor
Expand All @@ -18,6 +18,7 @@ class Ilo:
__ignoring_filters: List[Type[Filter]]
__scoring_filters: List[Type[Filter]]
__scorer: Type[Scorer]
__sentence_scorer: Type[SentenceScorer]
__passing_score: Number

def __init__(
Expand All @@ -28,6 +29,7 @@ def __init__(
scoring_filters: List[Type[Filter]],
scorer: Type[Scorer],
passing_score: Number,
sentence_scorer: Type[SentenceScorer] = SentNoOp,
word_tokenizer: Type[Tokenizer] = WordTokenizer,
sent_tokenizer: Type[Tokenizer] = SentTokenizer,
):
Expand All @@ -40,6 +42,7 @@ def __init__(
self.__ignoring_filters = [*ignoring_filters]
self.__scoring_filters = [*scoring_filters]
self.__scorer = scorer
self.__sentence_scorer = sentence_scorer
self.__passing_score = passing_score

def preprocess(self, msg: str) -> str:
Expand Down Expand Up @@ -91,6 +94,9 @@ def filter_tokens(self, tokens: List[str]) -> List[str]:
def score_tokens(self, tokens: List[str]) -> float:
return self.__scorer.score(tokens, self.__scoring_filters)

def score_sentences(self, scorecards: List[Scorecard]) -> List[Scorecard]:
return self.__sentence_scorer.score(scorecards)

def _is_toki_pona(self, message: str) -> Scorecard:
"""Process a message into its tokens, then filters, cleans, and scores
them. Message must already be preprocessed, normally done in
Expand Down Expand Up @@ -130,6 +136,7 @@ def _are_toki_pona(self, message: str) -> List[Scorecard]:
for sentence in self.sent_tokenize(message):
result = self._is_toki_pona(sentence)
scorecards.append(result)
scorecards = self.score_sentences(scorecards)
return scorecards

def are_toki_pona(self, message: str) -> List[bool]:
Expand Down

0 comments on commit 793f724

Please sign in to comment.