Skip to content

Commit

Permalink
Another minor speed up by removing replace method out of class. Was m…
Browse files Browse the repository at this point in the history
…ade by mistake but looks like an improvement
  • Loading branch information
Kirill Belousov committed Sep 16, 2023
1 parent c84dd47 commit 3a81078
Showing 1 changed file with 18 additions and 16 deletions.
34 changes: 18 additions & 16 deletions addon/globalPlugins/textnormalizer/textnormalizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,20 @@
https://habr.com/ru/post/86303/
"""

from functools import lru_cache
import re

try: from logHandler import log
except ImportError: import logging as log


def normalizer_replace_text(old, new, string, case_insensitive = False):
if case_insensitive:
return string.replace(old, new)
else:
return re.sub(re.escape(old), new, string, flags=re.IGNORECASE)


class TextNormalizer():
"""Translates the letters of the alphabet mixed in normal"""

Expand Down Expand Up @@ -96,13 +105,6 @@ def __init__(self):
]



def replace(self, old, new, string, case_insensitive = False):
if case_insensitive:
return string.replace(old, new)
else:
return re.sub(re.escape(old), new, string, flags=re.IGNORECASE)

def CheckWord(self, word, change_case = True):
"""Check the word
Expand All @@ -117,10 +119,10 @@ def CheckWord(self, word, change_case = True):
self.lang = "?"

# в VK часто стал использоваться символ "ë" как русская буква "е".
newword = self.replace("ë", "е", word, True)
newword = normalizer_replace_text("ë", "е", word, True)
# остальные символы из постов VK
for k, v in self.lettersstrng.items():
newword = self.replace(k, v, newword, True)
newword = normalizer_replace_text(k, v, newword, True)
# один символ не имеет смысла
if len(newword.strip()) == 1:
return newword
Expand Down Expand Up @@ -149,7 +151,7 @@ def CheckWord(self, word, change_case = True):
self.lang = "ru"

for i in range(0, len(self.Rus)):
newword = self.replace(self.Eng[i], self.Rus[i], newword, True)
newword = normalizer_replace_text(self.Eng[i], self.Rus[i], newword, True)

else:
self.IsEn100percent = False
Expand All @@ -160,7 +162,7 @@ def CheckWord(self, word, change_case = True):
self.lang = "en"

for i in range(0, len(self.Eng)):
newword = self.replace(self.Rus[i], self.Eng[i], newword, True)
newword = normalizer_replace_text(self.Rus[i], self.Eng[i], newword, True)

# Были ли замены?
self.Changes = newword != word
Expand Down Expand Up @@ -189,7 +191,7 @@ def CheckText(self, text, change_case = True):
for word in (words, words2, words4)[x]:
newWord = self.CheckWord(word, change_case)
if self.Changes:
newText = self.replace(word, newWord, newText, False)
newText = normalizer_replace_text(word, newWord, newText, False)
Rus = ["с", "у", "нет", "ее"]
Eng = ["c", "y", "heт", "ee"]
if text != newText:
Expand All @@ -199,16 +201,16 @@ def CheckText(self, text, change_case = True):
newText = newText.replace(" c ", " с ")
newText = newText.replace(" C ", " С ")
for i in range(0, len(Rus)):
newText = self.replace(Eng[i], Rus[i], newText, False)
newText = normalizer_replace_text(Eng[i], Rus[i], newText, False)
for i in range(0, len(self.patterns)):
if text != newText:
newText = re.sub(self.patterns[i], self.replaces[i], newText, flags=re.IGNORECASE)
newText = re.sub(r"([a-z])у([a-z])", r"\1y\2", newText)
newText = re.sub(r"([a-z])у", r"\1y", newText)
newText = re.sub(r"у([a-z])", r"y\1", newText)
newText = self.replace("сh", "ch", newText, True)
newText = self.replace("сe", "ce", newText, True)
newText = self.replace("Вo", "Bo", newText, True)
newText = normalizer_replace_text("сh", "ch", newText, True)
newText = normalizer_replace_text("сe", "ce", newText, True)
newText = normalizer_replace_text("Вo", "Bo", newText, True)
return newText

def main():
Expand Down

0 comments on commit 3a81078

Please sign in to comment.