From 59f7ffcc354652e0e1e36579c5c237056f7ae0b1 Mon Sep 17 00:00:00 2001
From: taishi-i <taishi.ikeda.0323@gmail.com>
Date: Thu, 8 Sep 2022 20:37:29 +0900
Subject: [PATCH] =?UTF-8?q?fix=20latin=20capital=20letter=20I=20with=20dot?=
 =?UTF-8?q?=20above=20(=C4=B0)=20AssertionError?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 nagisa/nagisa_utils.pyx | 2 ++
 test/nagisa_test.py     | 8 +++++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/nagisa/nagisa_utils.pyx b/nagisa/nagisa_utils.pyx
index 42c75eb..dd3d7aa 100644
--- a/nagisa/nagisa_utils.pyx
+++ b/nagisa/nagisa_utils.pyx
@@ -42,6 +42,7 @@ cpdef unicode normalize(unicode text):
 cpdef unicode preprocess(text):
     text = utf8rstrip(text)
     text = normalize(text)
+    text = text.replace('İ', 'I')
     text = text.replace(' ', '　')
     return text
 
@@ -50,6 +51,7 @@ cpdef unicode preprocess_without_rstrip(text):
     if type(text) != unicode:
         text = unicode(text, 'utf-8')
     text = normalize(text)
+    text = text.replace('İ', 'I')
     text = text.replace(' ', '　')
     return text
 
diff --git a/test/nagisa_test.py b/test/nagisa_test.py
index fbbaf6b..aa41983 100644
--- a/test/nagisa_test.py
+++ b/test/nagisa_test.py
@@ -18,7 +18,6 @@ def test_tagging(self):
         words = nagisa.tagging(text, lower=True)
         self.assertEqual(output, str(words))
 
-
         # test_3
         text   = 'ニューラルネットワークを使ってます。'
         output = 'ニューラル/名詞 ネットワーク/名詞 を/助詞 使っ/動詞 て/助動詞 ます/助動詞 。/補助記号'
@@ -116,6 +115,13 @@ def test_tagging(self):
         words = nagisa.tagging(text)
         self.assertEqual(output, str(words))
 
+        # test_26
+        text = "エラーを避けるため、İはIに変換される"
+        output = "エラー/名詞 を/助詞 避ける/動詞 ため/名詞 、/補助記号 I/名詞 は/助詞 I/名詞 に/助詞 変換/名詞 さ/動詞 れる/助動詞"
+        words = nagisa.tagging(text)
+        self.assertEqual(output, str(words))
+
+
     def test_utils(self):
         # test_20
         output = "oov"