From 8b3d7a64fa87f00d44b3a4802ee98fc7bf9c1099 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Fri, 17 Dec 2021 02:30:18 +0900
Subject: [PATCH 01/13] =?UTF-8?q?windows=E3=81=AE=E3=83=91=E3=82=B9?=
 =?UTF-8?q?=E3=82=B7=E3=82=B9=E3=83=86=E3=83=A0=E3=81=A7=E3=82=82=E8=AA=AD?=
 =?UTF-8?q?=E3=81=BF=E8=BE=BC=E3=82=81=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB?=
 =?UTF-8?q?=E5=A4=89=E6=9B=B4=20(#1)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyopenjtalk/__init__.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/pyopenjtalk/__init__.py b/pyopenjtalk/__init__.py
index a266104..f41deb1 100644
--- a/pyopenjtalk/__init__.py
+++ b/pyopenjtalk/__init__.py
@@ -17,22 +17,26 @@
 except ImportError:
     raise ImportError("BUG: version.py doesn't exist. Please file a bug report.")
 
+import locale
+
 from .htsengine import HTSEngine
 from .openjtalk import OpenJTalk
 
+path_encoding = locale.getpreferredencoding()
+
 # Dictionary directory
 # defaults to the package directory where the dictionary will be automatically downloaded
 OPEN_JTALK_DICT_DIR = os.environ.get(
     "OPEN_JTALK_DICT_DIR",
     pkg_resources.resource_filename(__name__, "open_jtalk_dic_utf_8-1.11"),
-).encode("utf-8")
+)
 _dict_download_url = "https://github.com/r9y9/open_jtalk/releases/download/v1.11.1"
 _DICT_URL = f"{_dict_download_url}/open_jtalk_dic_utf_8-1.11.tar.gz"
 
 # Default mei_normal.voice for HMM-based TTS
 DEFAULT_HTS_VOICE = pkg_resources.resource_filename(
     __name__, "htsvoice/mei_normal.htsvoice"
-).encode("utf-8")
+)
 
 # Global instance of OpenJTalk
 _global_jtalk = None
@@ -67,7 +71,7 @@ def _extract_dic():
         f.extractall(path=pkg_resources.resource_filename(__name__, ""))
     OPEN_JTALK_DICT_DIR = pkg_resources.resource_filename(
         __name__, "open_jtalk_dic_utf_8-1.11"
-    ).encode("utf-8")
+    )
     os.remove(filename)
 
 
@@ -94,7 +98,7 @@ def g2p(*args, **kwargs):
     global _global_jtalk
     if _global_jtalk is None:
         _lazy_init()
-        _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR)
+        _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding))
     return _global_jtalk.g2p(*args, **kwargs)
 
 
@@ -129,7 +133,7 @@ def synthesize(labels, speed=1.0, half_tone=0.0):
 
     global _global_htsengine
     if _global_htsengine is None:
-        _global_htsengine = HTSEngine(DEFAULT_HTS_VOICE)
+        _global_htsengine = HTSEngine(DEFAULT_HTS_VOICE.encode(path_encoding))
     sr = _global_htsengine.get_sampling_frequency()
     _global_htsengine.set_speed(speed)
     _global_htsengine.add_half_tone(half_tone)
@@ -165,5 +169,5 @@ def run_frontend(text, verbose=0):
     global _global_jtalk
     if _global_jtalk is None:
         _lazy_init()
-        _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR)
+        _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding))
     return _global_jtalk.run_frontend(text, verbose)

From 8466b5a331e4736181648d5585be626aa7f9bde1 Mon Sep 17 00:00:00 2001
From: takana-v <44311840+takana-v@users.noreply.github.com>
Date: Sat, 1 Jan 2022 12:38:14 +0900
Subject: [PATCH 02/13] =?UTF-8?q?=E3=83=A6=E3=83=BC=E3=82=B6=E3=83=BC?=
 =?UTF-8?q?=E8=BE=9E=E6=9B=B8=E6=A9=9F=E8=83=BD=E3=81=AE=E8=BF=BD=E5=8A=A0?=
 =?UTF-8?q?=EF=BC=88=E5=86=8DPR=EF=BC=89=20(#3)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* user dict

* fix lint

* windowsのパスシステムでも読み込めるように変更 (#1)

* fix encoding

Co-authored-by: Yosshi999 <Yosshi999@users.noreply.github.com>
Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 README.md                       | 28 ++++++++++++++++
 pyopenjtalk/__init__.py         | 35 +++++++++++++++++++-
 pyopenjtalk/openjtalk.pyx       | 57 ++++++++++++++++++++++++++++++---
 pyopenjtalk/openjtalk/mecab.pxd | 11 +++++++
 4 files changed, 126 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 0d490fc..46b462a 100644
--- a/README.md
+++ b/README.md
@@ -105,6 +105,34 @@ In [3]: pyopenjtalk.g2p("こんにちは", kana=True)
 Out[3]: 'コンニチワ'
 ```
 
+### Create/Apply user dictionary
+
+1. Create a CSV file (e.g. `user.csv`) and write custom words like below:
+
+```csv
+ＧＮＵ,,,1,名詞,一般,*,*,*,*,ＧＮＵ,グヌー,グヌー,2/3,*
+```
+
+2. Call `create_user_dict` to compile the CSV file.
+
+```
+>>> import pyopenjtalk
+>>> pyopenjtalk.create_user_dict("user.csv", "user.dic")
+reading user.csv ... 1
+emitting double-array: 100% |###########################################| 
+
+done!
+```
+
+3. Call `set_user_dict` to apply the user dictionary.
+
+```
+>>> pyopenjtalk.g2p("GNU")
+'j i i e n u y u u'
+>>> pyopenjtalk.set_user_dict("user.dic")
+>>> pyopenjtalk.g2p("GNU")
+'g u n u u'
+```
 
 ## LICENSE
 
diff --git a/pyopenjtalk/__init__.py b/pyopenjtalk/__init__.py
index f41deb1..03946ff 100644
--- a/pyopenjtalk/__init__.py
+++ b/pyopenjtalk/__init__.py
@@ -20,7 +20,9 @@
 import locale
 
 from .htsengine import HTSEngine
-from .openjtalk import OpenJTalk
+from .openjtalk import CreateUserDict, OpenJTalk
+
+path_encoding = locale.getpreferredencoding()
 
 path_encoding = locale.getpreferredencoding()
 
@@ -171,3 +173,34 @@ def run_frontend(text, verbose=0):
         _lazy_init()
         _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding))
     return _global_jtalk.run_frontend(text, verbose)
+
+
+def create_user_dict(path, out_path):
+    """Create user dictionary
+
+    Args:
+        path (str): path to user csv
+        out_path (str): path to output dictionary
+    """
+    global _global_jtalk
+    if _global_jtalk is None:
+        _lazy_init()
+    if not exists(path):
+        raise ValueError("no such file or directory: %s" % path)
+    CreateUserDict(OPEN_JTALK_DICT_DIR.encode(path_encoding), path.encode(path_encoding), out_path.encode(path_encoding))
+
+
+def set_user_dict(path):
+    """Apply user dictionary
+
+    Args:
+        path (str): path to user dictionary
+    """
+    global _global_jtalk
+    if _global_jtalk is None:
+        _lazy_init()
+    if not exists(path):
+        raise ValueError("no such file or directory: %s" % path)
+    _global_jtalk = OpenJTalk(
+        dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding), user_mecab=path.encode(path_encoding)
+    )
diff --git a/pyopenjtalk/openjtalk.pyx b/pyopenjtalk/openjtalk.pyx
index 46e6004..c83704e 100644
--- a/pyopenjtalk/openjtalk.pyx
+++ b/pyopenjtalk/openjtalk.pyx
@@ -11,6 +11,7 @@ cimport cython
 
 from openjtalk.mecab cimport Mecab, Mecab_initialize, Mecab_load, Mecab_analysis
 from openjtalk.mecab cimport Mecab_get_feature, Mecab_get_size, Mecab_refresh, Mecab_clear
+from openjtalk.mecab cimport mecab_dict_index, createModel, Model, Tagger, Lattice
 from openjtalk.njd cimport NJD, NJD_initialize, NJD_refresh, NJD_print, NJD_clear
 from openjtalk cimport njd as _njd
 from openjtalk.jpcommon cimport JPCommon, JPCommon_initialize,JPCommon_make_label
@@ -20,6 +21,36 @@ from openjtalk cimport njd2jpcommon
 from openjtalk.text2mecab cimport text2mecab
 from openjtalk.mecab2njd cimport mecab2njd
 from openjtalk.njd2jpcommon cimport njd2jpcommon
+from libc.string cimport strlen
+
+cdef inline int Mecab_load_ex(Mecab *m, char* dicdir, char* userdic):
+    if userdic == NULL or strlen(userdic) == 0:
+        return Mecab_load(m, dicdir)
+
+    if m == NULL or dicdir == NULL or strlen(dicdir) == 0:
+        return 0
+
+    Mecab_clear(m)
+
+    cdef (char*)[5] argv = ["mecab", "-d", dicdir, "-u", userdic]
+    cdef Model *model = createModel(5, argv)
+
+    if model == NULL:
+        return 0
+    m.model = model
+
+    cdef Tagger *tagger = model.createTagger()
+    if tagger == NULL:
+        Mecab_clear(m)
+        return 0
+    m.tagger = tagger
+
+    cdef Lattice *lattice = model.createLattice()
+    if lattice == NULL:
+        Mecab_clear(m)
+        return 0
+    m.lattice = lattice
+    return 1
 
 cdef njd_node_get_string(_njd.NJDNode* node):
     return (<bytes>(_njd.NJDNode_get_string(node))).decode("utf-8")
@@ -96,12 +127,15 @@ cdef class OpenJTalk(object):
 
     Args:
         dn_mecab (bytes): Dictionaly path for MeCab.
+        user_mecab (bytes): Dictionary path for MeCab userdic.
+            This option is ignored when empty bytestring is given.
+            Default is empty.
     """
     cdef Mecab* mecab
     cdef NJD* njd
     cdef JPCommon* jpcommon
 
-    def __cinit__(self, bytes dn_mecab=b"/usr/local/dic"):
+    def __cinit__(self, bytes dn_mecab=b"/usr/local/dic", bytes user_mecab=b""):
         self.mecab = new Mecab()
         self.njd = new NJD()
         self.jpcommon = new JPCommon()
@@ -110,7 +144,7 @@ cdef class OpenJTalk(object):
         NJD_initialize(self.njd)
         JPCommon_initialize(self.jpcommon)
 
-        r = self._load(dn_mecab)
+        r = self._load(dn_mecab, user_mecab)
         if r != 1:
           self._clear()
           raise RuntimeError("Failed to initalize Mecab")
@@ -121,8 +155,8 @@ cdef class OpenJTalk(object):
       NJD_clear(self.njd)
       JPCommon_clear(self.jpcommon)
 
-    def _load(self, bytes dn_mecab):
-        return Mecab_load(self.mecab, dn_mecab)
+    def _load(self, bytes dn_mecab, bytes user_mecab):
+        return Mecab_load_ex(self.mecab, dn_mecab, user_mecab)
 
 
     def run_frontend(self, text, verbose=0):
@@ -196,3 +230,18 @@ cdef class OpenJTalk(object):
         del self.mecab
         del self.njd
         del self.jpcommon
+
+def CreateUserDict(bytes dn_mecab, bytes path, bytes out_path):
+    cdef (char*)[10] argv = [
+        "mecab-dict-index",
+        "-d",
+        dn_mecab,
+        "-u",
+        out_path,
+        "-f",
+        "utf-8",
+        "-t",
+        "utf-8",
+        path
+    ]
+    mecab_dict_index(10, argv)
\ No newline at end of file
diff --git a/pyopenjtalk/openjtalk/mecab.pxd b/pyopenjtalk/openjtalk/mecab.pxd
index bd367c7..1538e05 100644
--- a/pyopenjtalk/openjtalk/mecab.pxd
+++ b/pyopenjtalk/openjtalk/mecab.pxd
@@ -16,3 +16,14 @@ cdef extern from "mecab.h":
     char **Mecab_get_feature(Mecab *m)
     cdef int Mecab_refresh(Mecab *m)
     cdef int Mecab_clear(Mecab *m)
+    cdef int mecab_dict_index(int argc, char **argv)
+
+cdef extern from "mecab.h" namespace "MeCab":
+    cdef cppclass Tagger:
+        pass
+    cdef cppclass Lattice:
+        pass
+    cdef cppclass Model:
+        Tagger *createTagger()
+        Lattice *createLattice()
+    cdef Model *createModel(int argc, char **argv)

From de0aafc16ec762a159c1ddfcb92488053f49191c Mon Sep 17 00:00:00 2001
From: takana-v <44311840+takana-v@users.noreply.github.com>
Date: Tue, 25 Jan 2022 22:32:22 +0900
Subject: [PATCH 03/13] =?UTF-8?q?=E3=83=A6=E3=83=BC=E3=82=B6=E3=83=BC?=
 =?UTF-8?q?=E8=BE=9E=E6=9B=B8=E3=81=AE=E9=81=A9=E7=94=A8=E3=82=92=E3=82=84?=
 =?UTF-8?q?=E3=82=81=E3=82=8B=E9=96=A2=E6=95=B0=E3=82=92=E8=BF=BD=E5=8A=A0?=
 =?UTF-8?q?=20(#5)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add unset_user_dict

* fix format
---
 pyopenjtalk/__init__.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pyopenjtalk/__init__.py b/pyopenjtalk/__init__.py
index 03946ff..8ce923b 100644
--- a/pyopenjtalk/__init__.py
+++ b/pyopenjtalk/__init__.py
@@ -204,3 +204,11 @@ def set_user_dict(path):
     _global_jtalk = OpenJTalk(
         dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding), user_mecab=path.encode(path_encoding)
     )
+
+
+def unset_user_dict():
+    """Stop applying user dictionary"""
+    global _global_jtalk
+    if _global_jtalk is None:
+        _lazy_init()
+    _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding))
\ No newline at end of file

From 9a99880c7c83707b833ff525195c592414df9678 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Tue, 1 Feb 2022 00:54:48 +0900
Subject: [PATCH 04/13] =?UTF-8?q?openjtalk=E3=82=921.11=E3=81=AB=E3=82=A2?=
 =?UTF-8?q?=E3=83=83=E3=83=97=E3=83=87=E3=83=BC=E3=83=88=20(#6)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitmodules    | 2 +-
 lib/open_jtalk | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index e70e7ee..bda0e0d 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -3,4 +3,4 @@
 	url = https://github.com/r9y9/open_jtalk.git
 [submodule "lib/hts_engine_API"]
 	path = lib/hts_engine_API
-	url = https://github.com/r9y9/hts_engine_API.git
+	url = https://github.com/VOICEVOX/pyopenjtalk.git
diff --git a/lib/open_jtalk b/lib/open_jtalk
index 9572293..427cfd7 160000
--- a/lib/open_jtalk
+++ b/lib/open_jtalk
@@ -1 +1 @@
-Subproject commit 957229334996d2c9d9fcb73cdb3f4d9c15bcdd57
+Subproject commit 427cfd761b78efb6094bea3c5bb8c968f0d711ab

From f3dfdb6da0473cf06964b1d0a23481ff962655ba Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Tue, 1 Feb 2022 01:23:28 +0900
Subject: [PATCH 05/13] =?UTF-8?q?Revert=20"openjtalk=E3=82=921.11=E3=81=AB?=
 =?UTF-8?q?=E3=82=A2=E3=83=83=E3=83=97=E3=83=87=E3=83=BC=E3=83=88=20(#6)"?=
 =?UTF-8?q?=20(#7)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 9a99880c7c83707b833ff525195c592414df9678.
---
 .gitmodules    | 2 +-
 lib/open_jtalk | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index bda0e0d..e70e7ee 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -3,4 +3,4 @@
 	url = https://github.com/r9y9/open_jtalk.git
 [submodule "lib/hts_engine_API"]
 	path = lib/hts_engine_API
-	url = https://github.com/VOICEVOX/pyopenjtalk.git
+	url = https://github.com/r9y9/hts_engine_API.git
diff --git a/lib/open_jtalk b/lib/open_jtalk
index 427cfd7..9572293 160000
--- a/lib/open_jtalk
+++ b/lib/open_jtalk
@@ -1 +1 @@
-Subproject commit 427cfd761b78efb6094bea3c5bb8c968f0d711ab
+Subproject commit 957229334996d2c9d9fcb73cdb3f4d9c15bcdd57

From a85521a0a0f298f08d9e9b24987b3c77eb4aaff5 Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Tue, 1 Feb 2022 01:51:08 +0900
Subject: [PATCH 06/13] =?UTF-8?q?Submodule=E3=82=92=E6=9B=B4=E6=96=B0?=
 =?UTF-8?q?=E3=81=97=E3=81=A6openjtalk1.11=E3=81=AB=E3=82=A2=E3=83=83?=
 =?UTF-8?q?=E3=83=97=E3=83=87=E3=83=BC=E3=83=88=20(#8)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* delete

* upadte
---
 .gitmodules    | 6 +++---
 lib/open_jtalk | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.gitmodules b/.gitmodules
index e70e7ee..79cf34b 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,6 @@
-[submodule "lib/open_jtalk"]
-	path = lib/open_jtalk
-	url = https://github.com/r9y9/open_jtalk.git
 [submodule "lib/hts_engine_API"]
 	path = lib/hts_engine_API
 	url = https://github.com/r9y9/hts_engine_API.git
+[submodule "lib/open_jtalk"]
+	path = lib/open_jtalk
+	url = https://github.com/VOICEVOX/open_jtalk.git
diff --git a/lib/open_jtalk b/lib/open_jtalk
index 9572293..427cfd7 160000
--- a/lib/open_jtalk
+++ b/lib/open_jtalk
@@ -1 +1 @@
-Subproject commit 957229334996d2c9d9fcb73cdb3f4d9c15bcdd57
+Subproject commit 427cfd761b78efb6094bea3c5bb8c968f0d711ab

From 07f242eed8e9c2b51e7ee908d46fc789406adefe Mon Sep 17 00:00:00 2001
From: Yosshi999 <Yosshi999@users.noreply.github.com>
Date: Sat, 16 Apr 2022 20:23:42 +0900
Subject: [PATCH 07/13] use safer text2mecab (#10)

---
 lib/open_jtalk                       | 2 +-
 pyopenjtalk/openjtalk.pyx            | 9 ++++++++-
 pyopenjtalk/openjtalk/text2mecab.pxd | 2 +-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/lib/open_jtalk b/lib/open_jtalk
index 427cfd7..d74d20a 160000
--- a/lib/open_jtalk
+++ b/lib/open_jtalk
@@ -1 +1 @@
-Subproject commit 427cfd761b78efb6094bea3c5bb8c968f0d711ab
+Subproject commit d74d20ac25d212079acb40fdb7af69f11d38d8cf
diff --git a/pyopenjtalk/openjtalk.pyx b/pyopenjtalk/openjtalk.pyx
index c83704e..816f655 100644
--- a/pyopenjtalk/openjtalk.pyx
+++ b/pyopenjtalk/openjtalk.pyx
@@ -2,6 +2,7 @@
 # cython: boundscheck=True, wraparound=True
 # cython: c_string_type=unicode, c_string_encoding=ascii
 
+import errno
 import numpy as np
 
 cimport numpy as np
@@ -165,7 +166,13 @@ cdef class OpenJTalk(object):
         if isinstance(text, str):
           text = text.encode("utf-8")
         cdef char buff[8192]
-        text2mecab(buff, text)
+        cdef int result = text2mecab(buff, 8192, text)
+        if result != 0:
+            if result == errno.ERANGE:
+                raise RuntimeError("Text is too long")
+            if result == errno.EINVAL:
+                raise RuntimeError("Invalid input for text2mecab")
+            raise RuntimeError("Unknown error: " + str(result))
         Mecab_analysis(self.mecab, buff)
         mecab2njd(self.njd, Mecab_get_feature(self.mecab), Mecab_get_size(self.mecab))
         _njd.njd_set_pronunciation(self.njd)
diff --git a/pyopenjtalk/openjtalk/text2mecab.pxd b/pyopenjtalk/openjtalk/text2mecab.pxd
index 6081757..3d44553 100644
--- a/pyopenjtalk/openjtalk/text2mecab.pxd
+++ b/pyopenjtalk/openjtalk/text2mecab.pxd
@@ -1,4 +1,4 @@
 # distutils: language = c++
 
 cdef extern from "text2mecab.h":
-    void text2mecab(char *output, const char *input)
+    int text2mecab(char *output, size_t sizeOfOutput, const char *input)

From 74fcb1f85af70919390c901b3fa2b3388c79af1d Mon Sep 17 00:00:00 2001
From: takana-v <44311840+takana-v@users.noreply.github.com>
Date: Wed, 22 Jun 2022 01:00:44 +0900
Subject: [PATCH 08/13] =?UTF-8?q?openjtalk=E3=82=92=E6=9B=B4=E6=96=B0=20(#?=
 =?UTF-8?q?12)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 lib/open_jtalk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/open_jtalk b/lib/open_jtalk
index d74d20a..b56a8ed 160000
--- a/lib/open_jtalk
+++ b/lib/open_jtalk
@@ -1 +1 @@
-Subproject commit d74d20ac25d212079acb40fdb7af69f11d38d8cf
+Subproject commit b56a8ed2a63f021e72f237b15b8881ce1cbd621d

From 50b0296a9e1b666e5a09a41ec9e9284a2a9b608f Mon Sep 17 00:00:00 2001
From: Hiroshiba <hihokaruta@gmail.com>
Date: Thu, 23 Jun 2022 01:07:20 +0900
Subject: [PATCH 09/13] Merge r9y9 0.2.0 (#13)

* Workaround the installation issue #27

Not sure about the exact root cause, but the error messages suggested
that there's issues with setuptools/pip. I can confirm we can fix the
issue by changing the build-time setuptools requirement to <v60.0.

At least v59.8.0 should work.
https://github.com/pypa/setuptools/releases/tag/v59.8.0

* parepare for v0.1.6

* Start a new dev cycle

* Update hts_engine_API

* Update open_jtalk

https://github.com/r9y9/open_jtalk/releases/tag/v1.11.2

* Update version to v0.2.0

* [ci skip] update changelog

* Update pyproject.toml

Co-authored-by: Ryuichi Yamamoto <zryuichi@gmail.com>
---
 docs/changelog.rst | 11 ++++++++++-
 lib/hts_engine_API |  2 +-
 setup.py           |  2 +-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index fce3799..012d042 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,9 +1,16 @@
 Change log
 ==========
 
-v0.1.6 <2021-xx-xx>
+v0.2.0 <2022-02-06>
 -------------------
 
+* `#29`_: Update binary dependencies (hts_engine_API/open_jtalk)
+
+v0.1.6 <2022-01-29>
+-------------------
+
+* `#27`_: pyopenjtalk cannot be installed in google colab
+
 v0.1.5 <2021-09-18>
 -------------------
 
@@ -73,3 +80,5 @@ Initial release with OpenJTalk's text processsing functionality
 .. _#22: https://github.com/r9y9/pyopenjtalk/pull/22
 .. _#24: https://github.com/r9y9/pyopenjtalk/pull/24
 .. _#25: https://github.com/r9y9/pyopenjtalk/pull/25
+.. _#27: https://github.com/r9y9/pyopenjtalk/issues/27
+.. _#29: https://github.com/r9y9/pyopenjtalk/pull/29
diff --git a/lib/hts_engine_API b/lib/hts_engine_API
index b7e1c8b..214e26d 160000
--- a/lib/hts_engine_API
+++ b/lib/hts_engine_API
@@ -1 +1 @@
-Subproject commit b7e1c8b51787e19ea4376176afd9707c3c9d599a
+Subproject commit 214e26dfb7f728ff9db39c14a59db709abcc121d
diff --git a/setup.py b/setup.py
index 0f75369..6f4af65 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@
 
 platform_is_windows = sys.platform == "win32"
 
-version = "0.1.6"
+version = "0.2.0"
 
 min_cython_ver = "0.21.0"
 try:

From f4ade29ef9a4f43d8605103cb5bacc29e0b2ccae Mon Sep 17 00:00:00 2001
From: takana-v <44311840+takana-v@users.noreply.github.com>
Date: Sun, 11 Sep 2022 16:56:22 +0900
Subject: [PATCH 10/13] =?UTF-8?q?openjtalk=E3=81=AE=E3=83=91=E3=82=B9UTF?=
 =?UTF-8?q?=E5=8C=96=E3=81=AB=E8=BF=BD=E5=BE=93=E3=81=99=E3=82=8B=20(#14)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* open_jtalkを最新コミットに追従

* Revert "windowsのパスシステムでも読み込めるように変更 (#1)"

This reverts commit 8b3d7a64fa87f00d44b3a4802ee98fc7bf9c1099.

* 変更漏れ修正
---
 lib/open_jtalk          |  2 +-
 pyopenjtalk/__init__.py | 24 +++++++++---------------
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/lib/open_jtalk b/lib/open_jtalk
index b56a8ed..acd0cc6 160000
--- a/lib/open_jtalk
+++ b/lib/open_jtalk
@@ -1 +1 @@
-Subproject commit b56a8ed2a63f021e72f237b15b8881ce1cbd621d
+Subproject commit acd0cc63ab63d09ea519a39462a2cbc9bc8698d0
diff --git a/pyopenjtalk/__init__.py b/pyopenjtalk/__init__.py
index 8ce923b..09a8e94 100644
--- a/pyopenjtalk/__init__.py
+++ b/pyopenjtalk/__init__.py
@@ -17,28 +17,22 @@
 except ImportError:
     raise ImportError("BUG: version.py doesn't exist. Please file a bug report.")
 
-import locale
-
 from .htsengine import HTSEngine
 from .openjtalk import CreateUserDict, OpenJTalk
 
-path_encoding = locale.getpreferredencoding()
-
-path_encoding = locale.getpreferredencoding()
-
 # Dictionary directory
 # defaults to the package directory where the dictionary will be automatically downloaded
 OPEN_JTALK_DICT_DIR = os.environ.get(
     "OPEN_JTALK_DICT_DIR",
     pkg_resources.resource_filename(__name__, "open_jtalk_dic_utf_8-1.11"),
-)
+).encode("utf-8")
 _dict_download_url = "https://github.com/r9y9/open_jtalk/releases/download/v1.11.1"
 _DICT_URL = f"{_dict_download_url}/open_jtalk_dic_utf_8-1.11.tar.gz"
 
 # Default mei_normal.voice for HMM-based TTS
 DEFAULT_HTS_VOICE = pkg_resources.resource_filename(
     __name__, "htsvoice/mei_normal.htsvoice"
-)
+).encode("utf-8")
 
 # Global instance of OpenJTalk
 _global_jtalk = None
@@ -73,7 +67,7 @@ def _extract_dic():
         f.extractall(path=pkg_resources.resource_filename(__name__, ""))
     OPEN_JTALK_DICT_DIR = pkg_resources.resource_filename(
         __name__, "open_jtalk_dic_utf_8-1.11"
-    )
+    ).encode("utf-8")
     os.remove(filename)
 
 
@@ -100,7 +94,7 @@ def g2p(*args, **kwargs):
     global _global_jtalk
     if _global_jtalk is None:
         _lazy_init()
-        _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding))
+        _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR)
     return _global_jtalk.g2p(*args, **kwargs)
 
 
@@ -135,7 +129,7 @@ def synthesize(labels, speed=1.0, half_tone=0.0):
 
     global _global_htsengine
     if _global_htsengine is None:
-        _global_htsengine = HTSEngine(DEFAULT_HTS_VOICE.encode(path_encoding))
+        _global_htsengine = HTSEngine(DEFAULT_HTS_VOICE)
     sr = _global_htsengine.get_sampling_frequency()
     _global_htsengine.set_speed(speed)
     _global_htsengine.add_half_tone(half_tone)
@@ -171,7 +165,7 @@ def run_frontend(text, verbose=0):
     global _global_jtalk
     if _global_jtalk is None:
         _lazy_init()
-        _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding))
+        _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR)
     return _global_jtalk.run_frontend(text, verbose)
 
 
@@ -187,7 +181,7 @@ def create_user_dict(path, out_path):
         _lazy_init()
     if not exists(path):
         raise ValueError("no such file or directory: %s" % path)
-    CreateUserDict(OPEN_JTALK_DICT_DIR.encode(path_encoding), path.encode(path_encoding), out_path.encode(path_encoding))
+    CreateUserDict(OPEN_JTALK_DICT_DIR, path.encode("utf-8"), out_path.encode("utf-8"))
 
 
 def set_user_dict(path):
@@ -202,7 +196,7 @@ def set_user_dict(path):
     if not exists(path):
         raise ValueError("no such file or directory: %s" % path)
     _global_jtalk = OpenJTalk(
-        dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding), user_mecab=path.encode(path_encoding)
+        dn_mecab=OPEN_JTALK_DICT_DIR, user_mecab=path.encode("utf-8")
     )
 
 
@@ -211,4 +205,4 @@ def unset_user_dict():
     global _global_jtalk
     if _global_jtalk is None:
         _lazy_init()
-    _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding))
\ No newline at end of file
+    _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR)
\ No newline at end of file

From 827a3fc5c7dda7bbe832c0c69da98e39cc8cb2c3 Mon Sep 17 00:00:00 2001
From: aoirint <aoirint@gmail.com>
Date: Thu, 2 Feb 2023 11:48:03 +0900
Subject: [PATCH 11/13] Merge r9y9 0.3.0 (#16)

* Workaround the installation issue #27

Not sure about the exact root cause, but the error messages suggested
that there's issues with setuptools/pip. I can confirm we can fix the
issue by changing the build-time setuptools requirement to <v60.0.

At least v59.8.0 should work.
https://github.com/pypa/setuptools/releases/tag/v59.8.0

* parepare for v0.1.6

* Start a new dev cycle

* Update hts_engine_API

* Update open_jtalk

https://github.com/r9y9/open_jtalk/releases/tag/v1.11.2

* Update version to v0.2.0

* [ci skip] update changelog

* Start new dev cycle

* Fixees for Python 3.10

* bump version for dev

* Add a workaround for Numpy's ABI issue

* add codes

* update tests

* add test for frontend

* add run_marine option to tts()

* update the README for run_marin_option

* update error message for importing marine

* fix for lint

* add marine's license

* update readme

* add new API in docs/pyopenjtalk.rst

* add chage log

* fix typo

* Update pyopenjtalk/__init__.py

Co-authored-by: Ryuichi Yamamoto <zryuichi@gmail.com>

* Update pyopenjtalk/__init__.py

Co-authored-by: Ryuichi Yamamoto <zryuichi@gmail.com>

* Update pyopenjtalk/__init__.py

Co-authored-by: Ryuichi Yamamoto <zryuichi@gmail.com>

* add link

* prep for release

---------

Co-authored-by: Ryuichi Yamamoto <zryuichi@gmail.com>
Co-authored-by: park.byeongseon <park.byeongseon@linecorp.com>
Co-authored-by: bgsn.pk <disloretomail@gmail.com>
---
 .github/workflows/ci.yaml |  17 +++++-
 README.md                 |  21 ++++++-
 docs/changelog.rst        |  10 ++++
 docs/index.rst            |  18 ++++++
 docs/pyopenjtalk.rst      |   2 +
 pyopenjtalk/__init__.py   |  81 +++++++++++++++++++++++----
 pyopenjtalk/openjtalk.pyx | 114 +++++++++++++++++++++++++-------------
 pyopenjtalk/utils.py      |  21 +++++++
 pyproject.toml            |  10 ++--
 setup.py                  |   8 ++-
 tests/test_openjtalk.py   |  45 +++++++++++----
 11 files changed, 275 insertions(+), 72 deletions(-)
 create mode 100644 pyopenjtalk/utils.py

diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index f4b97e3..8b22dcd 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -15,8 +15,19 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest]
-        python-version: [3.7, 3.8, 3.9]
+        include:
+          - os: ubuntu-latest
+            python-version: 3.7
+          - os: ubuntu-latest
+            python-version: 3.8
+          - os: ubuntu-latest
+            python-version: 3.9
+          - os: ubuntu-latest
+            python-version: '3.10'
+          - os: macos-latest
+            python-version: 3.9
+          - os: windows-latest
+            python-version: 3.9
 
     steps:
     - uses: actions/checkout@v2
@@ -36,4 +47,4 @@ jobs:
     - name: Test with pytest
       run: |
         pip install pytest
-        pytest
\ No newline at end of file
+        pytest
diff --git a/README.md b/README.md
index 46b462a..b6b0f70 100644
--- a/README.md
+++ b/README.md
@@ -105,6 +105,24 @@ In [3]: pyopenjtalk.g2p("こんにちは", kana=True)
 Out[3]: 'コンニチワ'
 ```
 
+### About `run_marine` option
+
+After v0.3.0, the `run_marine` option has been available for estimating the Japanese accent with the DNN-based method (see [marine](https://github.com/6gsn/marine)). If you want to use the feature, please install pyopenjtalk as below;
+
+```shell
+pip install pyopenjtalk[marine]
+```
+
+And then, you can use the option as the following examples;
+
+```python
+In [1]: import pyopenjtalk
+
+In [2]: x, sr = pyopenjtalk.tts("おめでとうございます", run_marine=True) # for TTS
+
+In [3]: label = pyopenjtalk.extract_fullcontext("こんにちは", run_marine=True) # for text processing frontend only
+```
+
 ### Create/Apply user dictionary
 
 1. Create a CSV file (e.g. `user.csv`) and write custom words like below:
@@ -139,7 +157,8 @@ done!
 - pyopenjtalk: MIT license ([LICENSE.md](LICENSE.md))
 - Open JTalk: Modified BSD license ([COPYING](https://github.com/r9y9/open_jtalk/blob/1.10/src/COPYING))
 - htsvoice in this repository: Please check [pyopenjtalk/htsvoice/README.md](pyopenjtalk/htsvoice/README.md).
+- marine: Apache 2.0 license ([LICENSE](https://github.com/6gsn/marine/blob/main/LICENSE))
 
 ## Acknowledgements
 
-HTS Working Group for their dedicated efforts to develop and maintain Open JTalk.
\ No newline at end of file
+HTS Working Group for their dedicated efforts to develop and maintain Open JTalk.
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 012d042..cfb72f8 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -1,6 +1,14 @@
 Change log
 ==========
 
+v0.3.0 <2022-09-20>
+-------------------
+
+Newer numpy  (>v1.20.0) is required to avoid ABI compatibility issues. Please check the updated installation guide.
+
+* `#40`_: Introduce marine for Japanese accent estimation. Note that there could be a breakpoint regarding `run_frontend` because this PR changed the behavior of the API.
+* `#35`_: Fixes for Python 3.10.
+
 v0.2.0 <2022-02-06>
 -------------------
 
@@ -82,3 +90,5 @@ Initial release with OpenJTalk's text processsing functionality
 .. _#25: https://github.com/r9y9/pyopenjtalk/pull/25
 .. _#27: https://github.com/r9y9/pyopenjtalk/issues/27
 .. _#29: https://github.com/r9y9/pyopenjtalk/pull/29
+.. _#35: https://github.com/r9y9/pyopenjtalk/pull/35
+.. _#40: https://github.com/r9y9/pyopenjtalk/pull/40
diff --git a/docs/index.rst b/docs/index.rst
index 441c879..f9040d0 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -24,6 +24,24 @@ The latest release is availabe on pypi. You can install it by:
 
    pip install pyopenjtalk
 
+
+Workaround for ``ValueError: numpy.ndarray size changed, may indicate binary incompatibility``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This type of errors comes from the Numpys' ABI breaking changes. If you see ``ValueError: numpy.ndarray size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject`` or similar, please make sure to install numpy first, and then install pyopenjtalk by:
+
+.. code::
+
+   pip install pyopenjtalk --no-build-isolation
+
+or:
+
+.. code::
+
+   pip install git+https://github.com/r9y9/pyopenjtalk --no-build-isolation
+
+The option ``--no-build-isolation`` tells pip not to create a build environment, so the pre-installed numpy is used to build the packge. Hense there should be no Numpy's ABI issues.
+
 .. toctree::
    :maxdepth: 1
    :caption: Notebooks
diff --git a/docs/pyopenjtalk.rst b/docs/pyopenjtalk.rst
index 370d2f5..5e03e7b 100644
--- a/docs/pyopenjtalk.rst
+++ b/docs/pyopenjtalk.rst
@@ -25,3 +25,5 @@ Misc
 ----
 
 .. autofunction:: run_frontend
+.. autofunction:: make_label
+.. autofunction:: estimate_accent
diff --git a/pyopenjtalk/__init__.py b/pyopenjtalk/__init__.py
index 09a8e94..1716ff9 100644
--- a/pyopenjtalk/__init__.py
+++ b/pyopenjtalk/__init__.py
@@ -19,6 +19,7 @@
 
 from .htsengine import HTSEngine
 from .openjtalk import CreateUserDict, OpenJTalk
+from .utils import merge_njd_marine_features
 
 # Dictionary directory
 # defaults to the package directory where the dictionary will be automatically downloaded
@@ -39,6 +40,8 @@
 # Global instance of HTSEngine
 # mei_normal.voice is used as default
 _global_htsengine = None
+# Global instance of Marine
+_global_marine = None
 
 
 # https://github.com/tqdm/tqdm#hooks-and-callbacks
@@ -98,18 +101,53 @@ def g2p(*args, **kwargs):
     return _global_jtalk.g2p(*args, **kwargs)
 
 
-def extract_fullcontext(text):
+def estimate_accent(njd_features):
+    """Accent estimation using marine
+
+    This function requires marine (https://github.com/6gsn/marine)
+
+    Args:
+        njd_result (list): features generated by OpenJTalk.
+
+    Returns:
+        list: features for NJDNode with estimation results by marine.
+    """
+    global _global_marine
+    if _global_marine is None:
+        try:
+            from marine.predict import Predictor
+        except BaseException:
+            raise ImportError(
+                "Please install marine by `pip install pyopenjtalk[marine]`"
+            )
+        _global_marine = Predictor()
+    from marine.utils.openjtalk_util import convert_njd_feature_to_marine_feature
+
+    marine_feature = convert_njd_feature_to_marine_feature(njd_features)
+    marine_results = _global_marine.predict(
+        [marine_feature], require_open_jtalk_format=True
+    )
+    njd_features = merge_njd_marine_features(njd_features, marine_results)
+    return njd_features
+
+
+def extract_fullcontext(text, run_marine=False):
     """Extract full-context labels from text
 
     Args:
         text (str): Input text
+        run_marine (bool): Whether to estimate accent using marine.
+          Default is False. If you want to activate this option, you need to install marine
+          by `pip install pyopenjtalk[marine]`
 
     Returns:
         list: List of full-context labels
     """
-    # note: drop first return
-    _, labels = run_frontend(text)
-    return labels
+
+    njd_features = run_frontend(text)
+    if run_marine:
+        njd_features = estimate_accent(njd_features)
+    return make_label(njd_features)
 
 
 def synthesize(labels, speed=1.0, half_tone=0.0):
@@ -136,37 +174,56 @@ def synthesize(labels, speed=1.0, half_tone=0.0):
     return _global_htsengine.synthesize(labels), sr
 
 
-def tts(text, speed=1.0, half_tone=0.0):
+def tts(text, speed=1.0, half_tone=0.0, run_marine=False):
     """Text-to-speech
 
     Args:
         text (str): Input text
         speed (float): speech speed rate. Default is 1.0.
         half_tone (float): additional half-tone. Default is 0.
+        run_marine (bool): Whether to estimate accent using marine.
+          Default is False. If you want activate this option, you need to install marine
+          by `pip install pyopenjtalk[marine]`
 
     Returns:
         np.ndarray: speech waveform (dtype: np.float64)
         int: sampling frequency (defualt: 48000)
     """
-    return synthesize(extract_fullcontext(text), speed, half_tone)
+    return synthesize(
+        extract_fullcontext(text, run_marine=run_marine), speed, half_tone
+    )
 
 
-def run_frontend(text, verbose=0):
+def run_frontend(text):
     """Run OpenJTalk's text processing frontend
 
     Args:
         text (str): Unicode Japanese text.
-        verbose (int): Verbosity. Default is 0.
 
     Returns:
-        tuple: Pair of 1) NJD_print and 2) JPCommon_make_label.
-        The latter is the full-context labels in HTS-style format.
+        list: features for NJDNode.
+    """
+    global _global_jtalk
+    if _global_jtalk is None:
+        _lazy_init()
+        _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR)
+    return _global_jtalk.run_frontend(text)
+
+
+def make_label(njd_features):
+    """Make full-context label using features
+
+    Args:
+        njd_features (list): features for NJDNode.
+
+    Returns:
+        list: full-context labels.
     """
     global _global_jtalk
     if _global_jtalk is None:
         _lazy_init()
         _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR)
-    return _global_jtalk.run_frontend(text, verbose)
+    return _global_jtalk.make_label(njd_features)
 
 
 def create_user_dict(path, out_path):
@@ -205,4 +262,4 @@ def unset_user_dict():
     global _global_jtalk
     if _global_jtalk is None:
         _lazy_init()
-    _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR)
\ No newline at end of file
+    _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR)
diff --git a/pyopenjtalk/openjtalk.pyx b/pyopenjtalk/openjtalk.pyx
index 816f655..650660c 100644
--- a/pyopenjtalk/openjtalk.pyx
+++ b/pyopenjtalk/openjtalk.pyx
@@ -9,6 +9,7 @@ cimport numpy as np
 np.import_array()
 
 cimport cython
+from libc.stdlib cimport calloc
 
 from openjtalk.mecab cimport Mecab, Mecab_initialize, Mecab_load, Mecab_analysis
 from openjtalk.mecab cimport Mecab_get_feature, Mecab_get_size, Mecab_refresh, Mecab_clear
@@ -96,32 +97,57 @@ cdef njd_node_get_chain_flag(_njd.NJDNode* node):
       return _njd.NJDNode_get_chain_flag(node)
 
 
-cdef njd_node_print(_njd.NJDNode* node):
-  return "{},{},{},{},{},{},{},{},{},{},{}/{},{},{}".format(
-    njd_node_get_string(node),
-    njd_node_get_pos(node),
-    njd_node_get_pos_group1(node),
-    njd_node_get_pos_group2(node),
-    njd_node_get_pos_group3(node),
-    njd_node_get_ctype(node),
-    njd_node_get_cform(node),
-    njd_node_get_orig(node),
-    njd_node_get_read(node),
-    njd_node_get_pron(node),
-    njd_node_get_acc(node),
-    njd_node_get_mora_size(node),
-    njd_node_get_chain_rule(node),
-    njd_node_get_chain_flag(node)
-    )
-
-
-cdef njd_print(_njd.NJD* njd):
+cdef node2feature(_njd.NJDNode* node):
+  return {
+    "string": njd_node_get_string(node),
+    "pos": njd_node_get_pos(node),
+    "pos_group1": njd_node_get_pos_group1(node),
+    "pos_group2": njd_node_get_pos_group2(node),
+    "pos_group3": njd_node_get_pos_group3(node),
+    "ctype": njd_node_get_ctype(node),
+    "cform": njd_node_get_cform(node),
+    "orig": njd_node_get_orig(node),
+    "read": njd_node_get_read(node),
+    "pron": njd_node_get_pron(node),
+    "acc": njd_node_get_acc(node),
+    "mora_size": njd_node_get_mora_size(node),
+    "chain_rule": njd_node_get_chain_rule(node),
+    "chain_flag": njd_node_get_chain_flag(node),
+  }
+
+
+cdef njd2feature(_njd.NJD* njd):
     cdef _njd.NJDNode* node = njd.head
-    njd_results = []
+    features = []
     while node is not NULL:
-      njd_results.append(njd_node_print(node))
+      features.append(node2feature(node))
       node = node.next
-    return njd_results
+    return features
+
+
+cdef feature2njd(_njd.NJD* njd, features):
+    cdef _njd.NJDNode* node
+
+    for feature_node in features:
+        node = <_njd.NJDNode *> calloc(1, sizeof(_njd.NJDNode))
+        _njd.NJDNode_initialize(node)
+        # set values
+        _njd.NJDNode_set_string(node, feature_node["string"].encode("utf-8"))
+        _njd.NJDNode_set_pos(node, feature_node["pos"].encode("utf-8"))
+        _njd.NJDNode_set_pos_group1(node, feature_node["pos_group1"].encode("utf-8"))
+        _njd.NJDNode_set_pos_group2(node, feature_node["pos_group2"].encode("utf-8"))
+        _njd.NJDNode_set_pos_group3(node, feature_node["pos_group3"].encode("utf-8"))
+        _njd.NJDNode_set_ctype(node, feature_node["ctype"].encode("utf-8"))
+        _njd.NJDNode_set_cform(node, feature_node["cform"].encode("utf-8"))
+        _njd.NJDNode_set_orig(node, feature_node["orig"].encode("utf-8"))
+        _njd.NJDNode_set_read(node, feature_node["read"].encode("utf-8"))
+        _njd.NJDNode_set_pron(node, feature_node["pron"].encode("utf-8"))
+        _njd.NJDNode_set_acc(node, feature_node["acc"])
+        _njd.NJDNode_set_mora_size(node, feature_node["mora_size"])
+        _njd.NJDNode_set_chain_rule(node, feature_node["chain_rule"].encode("utf-8"))
+        _njd.NJDNode_set_chain_flag(node, feature_node["chain_flag"])
+        _njd.NJD_push_node(njd, node)
+
 
 cdef class OpenJTalk(object):
     """OpenJTalk
@@ -160,12 +186,13 @@ cdef class OpenJTalk(object):
         return Mecab_load_ex(self.mecab, dn_mecab, user_mecab)
 
 
-    def run_frontend(self, text, verbose=0):
+    def run_frontend(self, text):
         """Run OpenJTalk's text processing frontend
         """
-        if isinstance(text, str):
-          text = text.encode("utf-8")
         cdef char buff[8192]
+        if isinstance(text, str):
+            text = text.encode("utf-8")
+
         cdef int result = text2mecab(buff, 8192, text)
         if result != 0:
             if result == errno.ERANGE:
@@ -173,6 +200,7 @@ cdef class OpenJTalk(object):
             if result == errno.EINVAL:
                 raise RuntimeError("Invalid input for text2mecab")
             raise RuntimeError("Unknown error: " + str(result))
+
         Mecab_analysis(self.mecab, buff)
         mecab2njd(self.njd, Mecab_get_feature(self.mecab), Mecab_get_size(self.mecab))
         _njd.njd_set_pronunciation(self.njd)
@@ -181,7 +209,20 @@ cdef class OpenJTalk(object):
         _njd.njd_set_accent_type(self.njd)
         _njd.njd_set_unvoiced_vowel(self.njd)
         _njd.njd_set_long_vowel(self.njd)
+        features = njd2feature(self.njd)
+
+        # Note that this will release memory for njd feature
+        NJD_refresh(self.njd)
+        Mecab_refresh(self.mecab)
+
+        return features
+
+    def make_label(self, features):
+        """Make full-context label
+        """
+        feature2njd(self.njd, features)
         njd2jpcommon(self.jpcommon, self.njd)
+
         JPCommon_make_label(self.jpcommon)
 
         cdef int label_size = JPCommon_get_label_size(self.jpcommon)
@@ -194,23 +235,19 @@ cdef class OpenJTalk(object):
           # http://cython.readthedocs.io/en/latest/src/tutorial/strings.html
           labels.append(<unicode>label_feature[i])
 
-        njd_results = njd_print(self.njd)
-
-        if verbose > 0:
-          NJD_print(self.njd)
-
         # Note that this will release memory for label feature
         JPCommon_refresh(self.jpcommon)
         NJD_refresh(self.njd)
-        Mecab_refresh(self.mecab)
 
-        return njd_results, labels
+        return labels
 
     def g2p(self, text, kana=False, join=True):
         """Grapheme-to-phoeneme (G2P) conversion
         """
-        njd_results, labels = self.run_frontend(text)
+        njd_features = self.run_frontend(text)
+
         if not kana:
+            labels = self.make_label(njd_features)
             prons = list(map(lambda s: s.split("-")[1].split("+")[0], labels[1:-1]))
             if join:
                 prons = " ".join(prons)
@@ -218,12 +255,11 @@ cdef class OpenJTalk(object):
 
         # kana
         prons = []
-        for n in njd_results:
-            row = n.split(",")
-            if row[1] == "記号":
-                p = row[0]
+        for n in njd_features:
+            if n["pos"] == "記号":
+                p = n["string"]
             else:
-                p = row[9]
+                p = n["pron"]
             # remove special chars
             for c in "’":
                 p = p.replace(c,"")
diff --git a/pyopenjtalk/utils.py b/pyopenjtalk/utils.py
new file mode 100644
index 0000000..7aeb1bf
--- /dev/null
+++ b/pyopenjtalk/utils.py
@@ -0,0 +1,21 @@
+def merge_njd_marine_features(njd_features, marine_results):
+    features = []
+
+    marine_accs = marine_results["accent_status"]
+    marine_chain_flags = marine_results["accent_phrase_boundary"]
+
+    assert (
+        len(njd_features) == len(marine_accs) == len(marine_chain_flags)
+    ), "Invalid sequence sizes in njd_results, marine_results"
+
+    for node_index, njd_feature in enumerate(njd_features):
+        _feature = {}
+        for feature_key in njd_feature.keys():
+            if feature_key == "acc":
+                _feature["acc"] = int(marine_accs[node_index])
+            elif feature_key == "chain_flag":
+                _feature[feature_key] = int(marine_chain_flags[node_index])
+            else:
+                _feature[feature_key] = njd_feature[feature_key]
+        features.append(_feature)
+    return features
diff --git a/pyproject.toml b/pyproject.toml
index 8d1266b..212e445 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,13 +1,13 @@
 [build-system]
 requires = [
     "wheel",
-    "setuptools",
-    "cython>=0.21.0",
-    "numpy<v1.20.0",
+    "setuptools<v60.0",
+    "cython>=0.28.0",
+    "numpy>=1.20.0",
 ]
 
 [tool.pysen]
-version = "0.9"
+version = "0.10.2"
 
 [tool.pysen.lint]
 enable_black = true
@@ -18,4 +18,4 @@ mypy_preset = "strict"
 line_length = 88
 py_version = "py37"
 [[tool.pysen.lint.mypy_targets]]
-  paths = ["."]
\ No newline at end of file
+  paths = ["."]
diff --git a/setup.py b/setup.py
index 6f4af65..62ec246 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@
 
 platform_is_windows = sys.platform == "win32"
 
-version = "0.2.0"
+version = "0.3.0"
 
 min_cython_ver = "0.21.0"
 try:
@@ -276,7 +276,7 @@ def run(self):
     ext_modules=ext_modules,
     cmdclass=cmdclass,
     install_requires=[
-        "numpy >= 1.8.0",
+        "numpy >= 1.20.0",
         "cython >= " + min_cython_ver,
         "six",
         "tqdm",
@@ -296,11 +296,14 @@ def run(self):
             "types-setuptools",
             "mypy<=0.910",
             "black>=19.19b0,<=20.8",
+            "click<8.1.0",
             "flake8>=3.7,<4",
             "flake8-bugbear",
             "isort>=4.3,<5.2.0",
+            "types-decorator",
         ],
         "test": ["pytest", "scipy"],
+        "marine": ["marine>=0.0.5"],
     },
     classifiers=[
         "Operating System :: POSIX",
@@ -313,6 +316,7 @@ def run(self):
         "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
         "License :: OSI Approved :: MIT License",
         "Topic :: Scientific/Engineering",
         "Topic :: Software Development",
diff --git a/tests/test_openjtalk.py b/tests/test_openjtalk.py
index 56b4a80..0f43363 100644
--- a/tests/test_openjtalk.py
+++ b/tests/test_openjtalk.py
@@ -1,10 +1,9 @@
 import pyopenjtalk
 
 
-def _print_results(njd_results, labels):
-    for n in njd_results:
-        row = n.split(",")
-        s, p = row[0], row[9]
+def _print_results(njd_features, labels):
+    for f in njd_features:
+        s, p = f["string"], f["pron"]
         print(s, p)
 
     for label in labels:
@@ -12,12 +11,37 @@ def _print_results(njd_results, labels):
 
 
 def test_hello():
-    njd_results, labels = pyopenjtalk.run_frontend("こんにちは")
-    _print_results(njd_results, labels)
+    njd_features = pyopenjtalk.run_frontend("こんにちは")
+    labels = pyopenjtalk.make_label(njd_features)
+    _print_results(njd_features, labels)
+
+
+def test_njd_features():
+    njd_features = pyopenjtalk.run_frontend("こんにちは")
+    expected_feature = [
+        {
+            "string": "こんにちは",
+            "pos": "感動詞",
+            "pos_group1": "*",
+            "pos_group2": "*",
+            "pos_group3": "*",
+            "ctype": "*",
+            "cform": "*",
+            "orig": "こんにちは",
+            "read": "コンニチハ",
+            "pron": "コンニチワ",
+            "acc": 0,
+            "mora_size": 5,
+            "chain_rule": "-1",
+            "chain_flag": -1,
+        }
+    ]
+    assert njd_features == expected_feature
 
 
 def test_fullcontext():
-    _, labels = pyopenjtalk.run_frontend("こんにちは")
+    features = pyopenjtalk.run_frontend("こんにちは")
+    labels = pyopenjtalk.make_label(features)
     labels2 = pyopenjtalk.extract_fullcontext("こんにちは")
     for a, b in zip(labels, labels2):
         assert a == b
@@ -30,10 +54,11 @@ def test_jtalk():
         "どんまい！",
         "パソコンのとりあえず知っておきたい使い方",
     ]:
-        njd_results, labels = pyopenjtalk.run_frontend(text)
-        _print_results(njd_results, labels)
+        njd_features = pyopenjtalk.run_frontend(text)
+        labels = pyopenjtalk.make_label(njd_features)
+        _print_results(njd_features, labels)
 
-        surface = "".join(map(lambda s: s.split(",")[0], njd_results))
+        surface = "".join(map(lambda f: f["string"], njd_features))
         assert surface == text
 
 

From acd4f02d2af3129382c151590238b9370465e360 Mon Sep 17 00:00:00 2001
From: sabonerune <102559104+sabonerune@users.noreply.github.com>
Date: Tue, 18 Jul 2023 19:56:24 +0900
Subject: [PATCH 12/13] =?UTF-8?q?BLD:=20=E3=83=93=E3=83=AB=E3=83=89?=
 =?UTF-8?q?=E6=99=82=E3=81=AB=E4=BD=BF=E7=94=A8=E3=81=99=E3=82=8Bcython?=
 =?UTF-8?q?=E3=83=90=E3=83=BC=E3=82=B8=E3=83=A7=E3=83=B3=E3=82=92=E5=88=B6?=
 =?UTF-8?q?=E9=99=90=E3=81=99=E3=82=8B=20(#18)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Hiroshiba <hihokaruta@gmail.com>
---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 212e445..39c1428 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,7 +2,7 @@
 requires = [
     "wheel",
     "setuptools<v60.0",
-    "cython>=0.28.0",
+    "cython>=0.28.0, <3.0", # NOTE: https://github.com/r9y9/pyopenjtalk/issues/55
     "numpy>=1.20.0",
 ]
 

From ba5a316a694b66aa4db91f3b5b05bf49b5dafb00 Mon Sep 17 00:00:00 2001
From: My <84212641+My-MC@users.noreply.github.com>
Date: Fri, 8 Sep 2023 23:41:34 +0900
Subject: [PATCH 13/13] =?UTF-8?q?MSVC=2014.37=E3=81=AE=E4=BB=95=E6=A7=98?=
 =?UTF-8?q?=E5=A4=89=E6=9B=B4=E3=81=B8=E3=81=AE=E5=AF=BE=E5=BF=9C=20(#19)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 lib/open_jtalk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/open_jtalk b/lib/open_jtalk
index acd0cc6..78e1829 160000
--- a/lib/open_jtalk
+++ b/lib/open_jtalk
@@ -1 +1 @@
-Subproject commit acd0cc63ab63d09ea519a39462a2cbc9bc8698d0
+Subproject commit 78e182970ff4eaa032b8e81f0c4ddba3fdd8e73a