From cda1eb2b82342b3478fa34e798f4188634a79236 Mon Sep 17 00:00:00 2001 From: Gregory Danielson Date: Thu, 4 Jul 2024 09:34:41 -0500 Subject: [PATCH] ilo alasa o wawa lon nimi ijo, lon nimi pi toki inli, lon poki :ijo: --- tests/test_ilo.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/tests/test_ilo.py b/tests/test_ilo.py index 9576e93..822bc1f 100644 --- a/tests/test_ilo.py +++ b/tests/test_ilo.py @@ -46,6 +46,7 @@ def corpus_ilo() -> Ilo: "https://example.com/", "mi wile e ni: li pona", "lipu https://example.com li kama pona", + "<:owe:843315277286473778><:owe:843315277286473778><:owe:843315277286473778><:owe:843315277286473778><:owe:843315277286473778>", "...", " ⟨·⟩, a", "·····", @@ -88,6 +89,7 @@ def corpus_ilo() -> Ilo: "toki Lojban li nasa e lawa mi", "ilo Firefox", "mi musi Space Station 13", + "jan Tepo en jan Salo en jan Lakuse en pipi Kewapi en soweli Eweke en mi li musi", ] SOME_INVALID = [ @@ -113,15 +115,20 @@ def corpus_ilo() -> Ilo: EXCESSIVE_SYLLABICS = [ - # NOTE: these are actually harder to spot bc of the EnglishIgnorables filter - # it simply stops counting all the short english phonomatches - # so you can use any number of them... + # NOTE: this is sometimes hard to distinguish from EXCESSIVE_ENGLISH "manama manama namana namana majani makala", "I manipulate a passe pile so a ton emulate, akin to intake", "a ton of insolate puke. make no amen, no joke.", "I elope so, to an elite untaken tune, some unwise tone", "insane asinine lemon awesome atone joke", "insane asinine lemon awesome atone", # i got more clever + "nope, no, joke", + "insane", + "woman", + "man", + "opposite", + "nine emo women see anime alone", + "i like mini potato", ] EXCESSIVE_ALPHABETICS = [ @@ -148,6 +155,7 @@ def corpus_ilo() -> Ilo: "Maybe I’m too nasa", # previous false positive; fixed by LongSyllabic and LongAlphabetic "I see :)", "I wanna see", # same down to here + "i'm online all the time", ] NON_MATCHES = [