From 294d7d13556375625234fa6fa367a28fbadea6d7 Mon Sep 17 00:00:00 2001 From: ma-ilsi Date: Sat, 9 Nov 2024 21:35:55 +0300 Subject: [PATCH] =?UTF-8?q?Variable=20`output=5FString`=20initialized=20ea?= =?UTF-8?q?rlier=20The=20change=20prevents=20the=20raising=20of=20`Unbound?= =?UTF-8?q?LocalError`=20when=20passing=20certain=20strings=20found=20in?= =?UTF-8?q?=20Arabic=20corpuses=20such=20as=20"=D9=80"=20(Unicode=20point?= =?UTF-8?q?=200640)=20to=20the=20chained=20function=20calls:=20remove=5Fpu?= =?UTF-8?q?nctuation(arStrip()).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- build/lib/sinatools/utils/parser.py | 4 ++-- sinatools/utils/parser.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/build/lib/sinatools/utils/parser.py b/build/lib/sinatools/utils/parser.py index d08fd6a..49f6fc4 100644 --- a/build/lib/sinatools/utils/parser.py +++ b/build/lib/sinatools/utils/parser.py @@ -93,6 +93,7 @@ def remove_punctuation(text): يَا أَيُّهَا الَّذِينَ آمَنُوا لِيَسْتَأْذِنْكُمُ """ + output_string = text try: if text: punctuation_marks = [r'[\u0021-\u002F]+', r'[U+060C]+', r'[\u003A-\u0040]+', @@ -100,7 +101,6 @@ def remove_punctuation(text): r'[\u061B]+', r'[\u061E]+', r'[\u061F]+', r'[\u0640]+', r'[\u0653]+', r'[\u065C]+', r'[\u066C]+', r'[\u066A]+', r'["}"]+', r'["{"]+'] - output_string = text for punctuation in punctuation_marks: output_string = re.sub(punctuation, '', output_string) except: @@ -138,4 +138,4 @@ def remove_latin(text): except: return text return text - \ No newline at end of file + diff --git a/sinatools/utils/parser.py b/sinatools/utils/parser.py index d08fd6a..49f6fc4 100644 --- a/sinatools/utils/parser.py +++ b/sinatools/utils/parser.py @@ -93,6 +93,7 @@ def remove_punctuation(text): يَا أَيُّهَا الَّذِينَ آمَنُوا لِيَسْتَأْذِنْكُمُ """ + output_string = text try: if text: punctuation_marks = [r'[\u0021-\u002F]+', r'[U+060C]+', r'[\u003A-\u0040]+', @@ -100,7 +101,6 @@ def remove_punctuation(text): r'[\u061B]+', r'[\u061E]+', r'[\u061F]+', r'[\u0640]+', r'[\u0653]+', r'[\u065C]+', r'[\u066C]+', r'[\u066A]+', r'["}"]+', r'["{"]+'] - output_string = text for punctuation in punctuation_marks: output_string = re.sub(punctuation, '', output_string) except: @@ -138,4 +138,4 @@ def remove_latin(text): except: return text return text - \ No newline at end of file +