From 8b3d7a64fa87f00d44b3a4802ee98fc7bf9c1099 Mon Sep 17 00:00:00 2001 From: Hiroshiba Date: Fri, 17 Dec 2021 02:30:18 +0900 Subject: [PATCH 01/13] =?UTF-8?q?windows=E3=81=AE=E3=83=91=E3=82=B9?= =?UTF-8?q?=E3=82=B7=E3=82=B9=E3=83=86=E3=83=A0=E3=81=A7=E3=82=82=E8=AA=AD?= =?UTF-8?q?=E3=81=BF=E8=BE=BC=E3=82=81=E3=82=8B=E3=82=88=E3=81=86=E3=81=AB?= =?UTF-8?q?=E5=A4=89=E6=9B=B4=20(#1)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyopenjtalk/__init__.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/pyopenjtalk/__init__.py b/pyopenjtalk/__init__.py index a266104..f41deb1 100644 --- a/pyopenjtalk/__init__.py +++ b/pyopenjtalk/__init__.py @@ -17,22 +17,26 @@ except ImportError: raise ImportError("BUG: version.py doesn't exist. Please file a bug report.") +import locale + from .htsengine import HTSEngine from .openjtalk import OpenJTalk +path_encoding = locale.getpreferredencoding() + # Dictionary directory # defaults to the package directory where the dictionary will be automatically downloaded OPEN_JTALK_DICT_DIR = os.environ.get( "OPEN_JTALK_DICT_DIR", pkg_resources.resource_filename(__name__, "open_jtalk_dic_utf_8-1.11"), -).encode("utf-8") +) _dict_download_url = "https://github.com/r9y9/open_jtalk/releases/download/v1.11.1" _DICT_URL = f"{_dict_download_url}/open_jtalk_dic_utf_8-1.11.tar.gz" # Default mei_normal.voice for HMM-based TTS DEFAULT_HTS_VOICE = pkg_resources.resource_filename( __name__, "htsvoice/mei_normal.htsvoice" -).encode("utf-8") +) # Global instance of OpenJTalk _global_jtalk = None @@ -67,7 +71,7 @@ def _extract_dic(): f.extractall(path=pkg_resources.resource_filename(__name__, "")) OPEN_JTALK_DICT_DIR = pkg_resources.resource_filename( __name__, "open_jtalk_dic_utf_8-1.11" - ).encode("utf-8") + ) os.remove(filename) @@ -94,7 +98,7 @@ def g2p(*args, **kwargs): global _global_jtalk if _global_jtalk is None: _lazy_init() - _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR) + _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding)) return _global_jtalk.g2p(*args, **kwargs) @@ -129,7 +133,7 @@ def synthesize(labels, speed=1.0, half_tone=0.0): global _global_htsengine if _global_htsengine is None: - _global_htsengine = HTSEngine(DEFAULT_HTS_VOICE) + _global_htsengine = HTSEngine(DEFAULT_HTS_VOICE.encode(path_encoding)) sr = _global_htsengine.get_sampling_frequency() _global_htsengine.set_speed(speed) _global_htsengine.add_half_tone(half_tone) @@ -165,5 +169,5 @@ def run_frontend(text, verbose=0): global _global_jtalk if _global_jtalk is None: _lazy_init() - _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR) + _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding)) return _global_jtalk.run_frontend(text, verbose) From 8466b5a331e4736181648d5585be626aa7f9bde1 Mon Sep 17 00:00:00 2001 From: takana-v <44311840+takana-v@users.noreply.github.com> Date: Sat, 1 Jan 2022 12:38:14 +0900 Subject: [PATCH 02/13] =?UTF-8?q?=E3=83=A6=E3=83=BC=E3=82=B6=E3=83=BC?= =?UTF-8?q?=E8=BE=9E=E6=9B=B8=E6=A9=9F=E8=83=BD=E3=81=AE=E8=BF=BD=E5=8A=A0?= =?UTF-8?q?=EF=BC=88=E5=86=8DPR=EF=BC=89=20(#3)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * user dict * fix lint * windowsのパスシステムでも読み込めるように変更 (#1) * fix encoding Co-authored-by: Yosshi999 Co-authored-by: Hiroshiba --- README.md | 28 ++++++++++++++++ pyopenjtalk/__init__.py | 35 +++++++++++++++++++- pyopenjtalk/openjtalk.pyx | 57 ++++++++++++++++++++++++++++++--- pyopenjtalk/openjtalk/mecab.pxd | 11 +++++++ 4 files changed, 126 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 0d490fc..46b462a 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,34 @@ In [3]: pyopenjtalk.g2p("こんにちは", kana=True) Out[3]: 'コンニチワ' ``` +### Create/Apply user dictionary + +1. Create a CSV file (e.g. `user.csv`) and write custom words like below: + +```csv +GNU,,,1,名詞,一般,*,*,*,*,GNU,グヌー,グヌー,2/3,* +``` + +2. Call `create_user_dict` to compile the CSV file. + +``` +>>> import pyopenjtalk +>>> pyopenjtalk.create_user_dict("user.csv", "user.dic") +reading user.csv ... 1 +emitting double-array: 100% |###########################################| + +done! +``` + +3. Call `set_user_dict` to apply the user dictionary. + +``` +>>> pyopenjtalk.g2p("GNU") +'j i i e n u y u u' +>>> pyopenjtalk.set_user_dict("user.dic") +>>> pyopenjtalk.g2p("GNU") +'g u n u u' +``` ## LICENSE diff --git a/pyopenjtalk/__init__.py b/pyopenjtalk/__init__.py index f41deb1..03946ff 100644 --- a/pyopenjtalk/__init__.py +++ b/pyopenjtalk/__init__.py @@ -20,7 +20,9 @@ import locale from .htsengine import HTSEngine -from .openjtalk import OpenJTalk +from .openjtalk import CreateUserDict, OpenJTalk + +path_encoding = locale.getpreferredencoding() path_encoding = locale.getpreferredencoding() @@ -171,3 +173,34 @@ def run_frontend(text, verbose=0): _lazy_init() _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding)) return _global_jtalk.run_frontend(text, verbose) + + +def create_user_dict(path, out_path): + """Create user dictionary + + Args: + path (str): path to user csv + out_path (str): path to output dictionary + """ + global _global_jtalk + if _global_jtalk is None: + _lazy_init() + if not exists(path): + raise ValueError("no such file or directory: %s" % path) + CreateUserDict(OPEN_JTALK_DICT_DIR.encode(path_encoding), path.encode(path_encoding), out_path.encode(path_encoding)) + + +def set_user_dict(path): + """Apply user dictionary + + Args: + path (str): path to user dictionary + """ + global _global_jtalk + if _global_jtalk is None: + _lazy_init() + if not exists(path): + raise ValueError("no such file or directory: %s" % path) + _global_jtalk = OpenJTalk( + dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding), user_mecab=path.encode(path_encoding) + ) diff --git a/pyopenjtalk/openjtalk.pyx b/pyopenjtalk/openjtalk.pyx index 46e6004..c83704e 100644 --- a/pyopenjtalk/openjtalk.pyx +++ b/pyopenjtalk/openjtalk.pyx @@ -11,6 +11,7 @@ cimport cython from openjtalk.mecab cimport Mecab, Mecab_initialize, Mecab_load, Mecab_analysis from openjtalk.mecab cimport Mecab_get_feature, Mecab_get_size, Mecab_refresh, Mecab_clear +from openjtalk.mecab cimport mecab_dict_index, createModel, Model, Tagger, Lattice from openjtalk.njd cimport NJD, NJD_initialize, NJD_refresh, NJD_print, NJD_clear from openjtalk cimport njd as _njd from openjtalk.jpcommon cimport JPCommon, JPCommon_initialize,JPCommon_make_label @@ -20,6 +21,36 @@ from openjtalk cimport njd2jpcommon from openjtalk.text2mecab cimport text2mecab from openjtalk.mecab2njd cimport mecab2njd from openjtalk.njd2jpcommon cimport njd2jpcommon +from libc.string cimport strlen + +cdef inline int Mecab_load_ex(Mecab *m, char* dicdir, char* userdic): + if userdic == NULL or strlen(userdic) == 0: + return Mecab_load(m, dicdir) + + if m == NULL or dicdir == NULL or strlen(dicdir) == 0: + return 0 + + Mecab_clear(m) + + cdef (char*)[5] argv = ["mecab", "-d", dicdir, "-u", userdic] + cdef Model *model = createModel(5, argv) + + if model == NULL: + return 0 + m.model = model + + cdef Tagger *tagger = model.createTagger() + if tagger == NULL: + Mecab_clear(m) + return 0 + m.tagger = tagger + + cdef Lattice *lattice = model.createLattice() + if lattice == NULL: + Mecab_clear(m) + return 0 + m.lattice = lattice + return 1 cdef njd_node_get_string(_njd.NJDNode* node): return ((_njd.NJDNode_get_string(node))).decode("utf-8") @@ -96,12 +127,15 @@ cdef class OpenJTalk(object): Args: dn_mecab (bytes): Dictionaly path for MeCab. + user_mecab (bytes): Dictionary path for MeCab userdic. + This option is ignored when empty bytestring is given. + Default is empty. """ cdef Mecab* mecab cdef NJD* njd cdef JPCommon* jpcommon - def __cinit__(self, bytes dn_mecab=b"/usr/local/dic"): + def __cinit__(self, bytes dn_mecab=b"/usr/local/dic", bytes user_mecab=b""): self.mecab = new Mecab() self.njd = new NJD() self.jpcommon = new JPCommon() @@ -110,7 +144,7 @@ cdef class OpenJTalk(object): NJD_initialize(self.njd) JPCommon_initialize(self.jpcommon) - r = self._load(dn_mecab) + r = self._load(dn_mecab, user_mecab) if r != 1: self._clear() raise RuntimeError("Failed to initalize Mecab") @@ -121,8 +155,8 @@ cdef class OpenJTalk(object): NJD_clear(self.njd) JPCommon_clear(self.jpcommon) - def _load(self, bytes dn_mecab): - return Mecab_load(self.mecab, dn_mecab) + def _load(self, bytes dn_mecab, bytes user_mecab): + return Mecab_load_ex(self.mecab, dn_mecab, user_mecab) def run_frontend(self, text, verbose=0): @@ -196,3 +230,18 @@ cdef class OpenJTalk(object): del self.mecab del self.njd del self.jpcommon + +def CreateUserDict(bytes dn_mecab, bytes path, bytes out_path): + cdef (char*)[10] argv = [ + "mecab-dict-index", + "-d", + dn_mecab, + "-u", + out_path, + "-f", + "utf-8", + "-t", + "utf-8", + path + ] + mecab_dict_index(10, argv) \ No newline at end of file diff --git a/pyopenjtalk/openjtalk/mecab.pxd b/pyopenjtalk/openjtalk/mecab.pxd index bd367c7..1538e05 100644 --- a/pyopenjtalk/openjtalk/mecab.pxd +++ b/pyopenjtalk/openjtalk/mecab.pxd @@ -16,3 +16,14 @@ cdef extern from "mecab.h": char **Mecab_get_feature(Mecab *m) cdef int Mecab_refresh(Mecab *m) cdef int Mecab_clear(Mecab *m) + cdef int mecab_dict_index(int argc, char **argv) + +cdef extern from "mecab.h" namespace "MeCab": + cdef cppclass Tagger: + pass + cdef cppclass Lattice: + pass + cdef cppclass Model: + Tagger *createTagger() + Lattice *createLattice() + cdef Model *createModel(int argc, char **argv) From de0aafc16ec762a159c1ddfcb92488053f49191c Mon Sep 17 00:00:00 2001 From: takana-v <44311840+takana-v@users.noreply.github.com> Date: Tue, 25 Jan 2022 22:32:22 +0900 Subject: [PATCH 03/13] =?UTF-8?q?=E3=83=A6=E3=83=BC=E3=82=B6=E3=83=BC?= =?UTF-8?q?=E8=BE=9E=E6=9B=B8=E3=81=AE=E9=81=A9=E7=94=A8=E3=82=92=E3=82=84?= =?UTF-8?q?=E3=82=81=E3=82=8B=E9=96=A2=E6=95=B0=E3=82=92=E8=BF=BD=E5=8A=A0?= =?UTF-8?q?=20(#5)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add unset_user_dict * fix format --- pyopenjtalk/__init__.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pyopenjtalk/__init__.py b/pyopenjtalk/__init__.py index 03946ff..8ce923b 100644 --- a/pyopenjtalk/__init__.py +++ b/pyopenjtalk/__init__.py @@ -204,3 +204,11 @@ def set_user_dict(path): _global_jtalk = OpenJTalk( dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding), user_mecab=path.encode(path_encoding) ) + + +def unset_user_dict(): + """Stop applying user dictionary""" + global _global_jtalk + if _global_jtalk is None: + _lazy_init() + _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding)) \ No newline at end of file From 9a99880c7c83707b833ff525195c592414df9678 Mon Sep 17 00:00:00 2001 From: Hiroshiba Date: Tue, 1 Feb 2022 00:54:48 +0900 Subject: [PATCH 04/13] =?UTF-8?q?openjtalk=E3=82=921.11=E3=81=AB=E3=82=A2?= =?UTF-8?q?=E3=83=83=E3=83=97=E3=83=87=E3=83=BC=E3=83=88=20(#6)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitmodules | 2 +- lib/open_jtalk | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index e70e7ee..bda0e0d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,4 @@ url = https://github.com/r9y9/open_jtalk.git [submodule "lib/hts_engine_API"] path = lib/hts_engine_API - url = https://github.com/r9y9/hts_engine_API.git + url = https://github.com/VOICEVOX/pyopenjtalk.git diff --git a/lib/open_jtalk b/lib/open_jtalk index 9572293..427cfd7 160000 --- a/lib/open_jtalk +++ b/lib/open_jtalk @@ -1 +1 @@ -Subproject commit 957229334996d2c9d9fcb73cdb3f4d9c15bcdd57 +Subproject commit 427cfd761b78efb6094bea3c5bb8c968f0d711ab From f3dfdb6da0473cf06964b1d0a23481ff962655ba Mon Sep 17 00:00:00 2001 From: Hiroshiba Date: Tue, 1 Feb 2022 01:23:28 +0900 Subject: [PATCH 05/13] =?UTF-8?q?Revert=20"openjtalk=E3=82=921.11=E3=81=AB?= =?UTF-8?q?=E3=82=A2=E3=83=83=E3=83=97=E3=83=87=E3=83=BC=E3=83=88=20(#6)"?= =?UTF-8?q?=20(#7)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 9a99880c7c83707b833ff525195c592414df9678. --- .gitmodules | 2 +- lib/open_jtalk | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index bda0e0d..e70e7ee 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,4 @@ url = https://github.com/r9y9/open_jtalk.git [submodule "lib/hts_engine_API"] path = lib/hts_engine_API - url = https://github.com/VOICEVOX/pyopenjtalk.git + url = https://github.com/r9y9/hts_engine_API.git diff --git a/lib/open_jtalk b/lib/open_jtalk index 427cfd7..9572293 160000 --- a/lib/open_jtalk +++ b/lib/open_jtalk @@ -1 +1 @@ -Subproject commit 427cfd761b78efb6094bea3c5bb8c968f0d711ab +Subproject commit 957229334996d2c9d9fcb73cdb3f4d9c15bcdd57 From a85521a0a0f298f08d9e9b24987b3c77eb4aaff5 Mon Sep 17 00:00:00 2001 From: Hiroshiba Date: Tue, 1 Feb 2022 01:51:08 +0900 Subject: [PATCH 06/13] =?UTF-8?q?Submodule=E3=82=92=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=E3=81=97=E3=81=A6openjtalk1.11=E3=81=AB=E3=82=A2=E3=83=83?= =?UTF-8?q?=E3=83=97=E3=83=87=E3=83=BC=E3=83=88=20(#8)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * delete * upadte --- .gitmodules | 6 +++--- lib/open_jtalk | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.gitmodules b/.gitmodules index e70e7ee..79cf34b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,6 @@ -[submodule "lib/open_jtalk"] - path = lib/open_jtalk - url = https://github.com/r9y9/open_jtalk.git [submodule "lib/hts_engine_API"] path = lib/hts_engine_API url = https://github.com/r9y9/hts_engine_API.git +[submodule "lib/open_jtalk"] + path = lib/open_jtalk + url = https://github.com/VOICEVOX/open_jtalk.git diff --git a/lib/open_jtalk b/lib/open_jtalk index 9572293..427cfd7 160000 --- a/lib/open_jtalk +++ b/lib/open_jtalk @@ -1 +1 @@ -Subproject commit 957229334996d2c9d9fcb73cdb3f4d9c15bcdd57 +Subproject commit 427cfd761b78efb6094bea3c5bb8c968f0d711ab From 07f242eed8e9c2b51e7ee908d46fc789406adefe Mon Sep 17 00:00:00 2001 From: Yosshi999 Date: Sat, 16 Apr 2022 20:23:42 +0900 Subject: [PATCH 07/13] use safer text2mecab (#10) --- lib/open_jtalk | 2 +- pyopenjtalk/openjtalk.pyx | 9 ++++++++- pyopenjtalk/openjtalk/text2mecab.pxd | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/open_jtalk b/lib/open_jtalk index 427cfd7..d74d20a 160000 --- a/lib/open_jtalk +++ b/lib/open_jtalk @@ -1 +1 @@ -Subproject commit 427cfd761b78efb6094bea3c5bb8c968f0d711ab +Subproject commit d74d20ac25d212079acb40fdb7af69f11d38d8cf diff --git a/pyopenjtalk/openjtalk.pyx b/pyopenjtalk/openjtalk.pyx index c83704e..816f655 100644 --- a/pyopenjtalk/openjtalk.pyx +++ b/pyopenjtalk/openjtalk.pyx @@ -2,6 +2,7 @@ # cython: boundscheck=True, wraparound=True # cython: c_string_type=unicode, c_string_encoding=ascii +import errno import numpy as np cimport numpy as np @@ -165,7 +166,13 @@ cdef class OpenJTalk(object): if isinstance(text, str): text = text.encode("utf-8") cdef char buff[8192] - text2mecab(buff, text) + cdef int result = text2mecab(buff, 8192, text) + if result != 0: + if result == errno.ERANGE: + raise RuntimeError("Text is too long") + if result == errno.EINVAL: + raise RuntimeError("Invalid input for text2mecab") + raise RuntimeError("Unknown error: " + str(result)) Mecab_analysis(self.mecab, buff) mecab2njd(self.njd, Mecab_get_feature(self.mecab), Mecab_get_size(self.mecab)) _njd.njd_set_pronunciation(self.njd) diff --git a/pyopenjtalk/openjtalk/text2mecab.pxd b/pyopenjtalk/openjtalk/text2mecab.pxd index 6081757..3d44553 100644 --- a/pyopenjtalk/openjtalk/text2mecab.pxd +++ b/pyopenjtalk/openjtalk/text2mecab.pxd @@ -1,4 +1,4 @@ # distutils: language = c++ cdef extern from "text2mecab.h": - void text2mecab(char *output, const char *input) + int text2mecab(char *output, size_t sizeOfOutput, const char *input) From 74fcb1f85af70919390c901b3fa2b3388c79af1d Mon Sep 17 00:00:00 2001 From: takana-v <44311840+takana-v@users.noreply.github.com> Date: Wed, 22 Jun 2022 01:00:44 +0900 Subject: [PATCH 08/13] =?UTF-8?q?openjtalk=E3=82=92=E6=9B=B4=E6=96=B0=20(#?= =?UTF-8?q?12)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/open_jtalk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/open_jtalk b/lib/open_jtalk index d74d20a..b56a8ed 160000 --- a/lib/open_jtalk +++ b/lib/open_jtalk @@ -1 +1 @@ -Subproject commit d74d20ac25d212079acb40fdb7af69f11d38d8cf +Subproject commit b56a8ed2a63f021e72f237b15b8881ce1cbd621d From 50b0296a9e1b666e5a09a41ec9e9284a2a9b608f Mon Sep 17 00:00:00 2001 From: Hiroshiba Date: Thu, 23 Jun 2022 01:07:20 +0900 Subject: [PATCH 09/13] Merge r9y9 0.2.0 (#13) * Workaround the installation issue #27 Not sure about the exact root cause, but the error messages suggested that there's issues with setuptools/pip. I can confirm we can fix the issue by changing the build-time setuptools requirement to --- docs/changelog.rst | 11 ++++++++++- lib/hts_engine_API | 2 +- setup.py | 2 +- 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index fce3799..012d042 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,9 +1,16 @@ Change log ========== -v0.1.6 <2021-xx-xx> +v0.2.0 <2022-02-06> ------------------- +* `#29`_: Update binary dependencies (hts_engine_API/open_jtalk) + +v0.1.6 <2022-01-29> +------------------- + +* `#27`_: pyopenjtalk cannot be installed in google colab + v0.1.5 <2021-09-18> ------------------- @@ -73,3 +80,5 @@ Initial release with OpenJTalk's text processsing functionality .. _#22: https://github.com/r9y9/pyopenjtalk/pull/22 .. _#24: https://github.com/r9y9/pyopenjtalk/pull/24 .. _#25: https://github.com/r9y9/pyopenjtalk/pull/25 +.. _#27: https://github.com/r9y9/pyopenjtalk/issues/27 +.. _#29: https://github.com/r9y9/pyopenjtalk/pull/29 diff --git a/lib/hts_engine_API b/lib/hts_engine_API index b7e1c8b..214e26d 160000 --- a/lib/hts_engine_API +++ b/lib/hts_engine_API @@ -1 +1 @@ -Subproject commit b7e1c8b51787e19ea4376176afd9707c3c9d599a +Subproject commit 214e26dfb7f728ff9db39c14a59db709abcc121d diff --git a/setup.py b/setup.py index 0f75369..6f4af65 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ platform_is_windows = sys.platform == "win32" -version = "0.1.6" +version = "0.2.0" min_cython_ver = "0.21.0" try: From f4ade29ef9a4f43d8605103cb5bacc29e0b2ccae Mon Sep 17 00:00:00 2001 From: takana-v <44311840+takana-v@users.noreply.github.com> Date: Sun, 11 Sep 2022 16:56:22 +0900 Subject: [PATCH 10/13] =?UTF-8?q?openjtalk=E3=81=AE=E3=83=91=E3=82=B9UTF?= =?UTF-8?q?=E5=8C=96=E3=81=AB=E8=BF=BD=E5=BE=93=E3=81=99=E3=82=8B=20(#14)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * open_jtalkを最新コミットに追従 * Revert "windowsのパスシステムでも読み込めるように変更 (#1)" This reverts commit 8b3d7a64fa87f00d44b3a4802ee98fc7bf9c1099. * 変更漏れ修正 --- lib/open_jtalk | 2 +- pyopenjtalk/__init__.py | 24 +++++++++--------------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/lib/open_jtalk b/lib/open_jtalk index b56a8ed..acd0cc6 160000 --- a/lib/open_jtalk +++ b/lib/open_jtalk @@ -1 +1 @@ -Subproject commit b56a8ed2a63f021e72f237b15b8881ce1cbd621d +Subproject commit acd0cc63ab63d09ea519a39462a2cbc9bc8698d0 diff --git a/pyopenjtalk/__init__.py b/pyopenjtalk/__init__.py index 8ce923b..09a8e94 100644 --- a/pyopenjtalk/__init__.py +++ b/pyopenjtalk/__init__.py @@ -17,28 +17,22 @@ except ImportError: raise ImportError("BUG: version.py doesn't exist. Please file a bug report.") -import locale - from .htsengine import HTSEngine from .openjtalk import CreateUserDict, OpenJTalk -path_encoding = locale.getpreferredencoding() - -path_encoding = locale.getpreferredencoding() - # Dictionary directory # defaults to the package directory where the dictionary will be automatically downloaded OPEN_JTALK_DICT_DIR = os.environ.get( "OPEN_JTALK_DICT_DIR", pkg_resources.resource_filename(__name__, "open_jtalk_dic_utf_8-1.11"), -) +).encode("utf-8") _dict_download_url = "https://github.com/r9y9/open_jtalk/releases/download/v1.11.1" _DICT_URL = f"{_dict_download_url}/open_jtalk_dic_utf_8-1.11.tar.gz" # Default mei_normal.voice for HMM-based TTS DEFAULT_HTS_VOICE = pkg_resources.resource_filename( __name__, "htsvoice/mei_normal.htsvoice" -) +).encode("utf-8") # Global instance of OpenJTalk _global_jtalk = None @@ -73,7 +67,7 @@ def _extract_dic(): f.extractall(path=pkg_resources.resource_filename(__name__, "")) OPEN_JTALK_DICT_DIR = pkg_resources.resource_filename( __name__, "open_jtalk_dic_utf_8-1.11" - ) + ).encode("utf-8") os.remove(filename) @@ -100,7 +94,7 @@ def g2p(*args, **kwargs): global _global_jtalk if _global_jtalk is None: _lazy_init() - _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding)) + _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR) return _global_jtalk.g2p(*args, **kwargs) @@ -135,7 +129,7 @@ def synthesize(labels, speed=1.0, half_tone=0.0): global _global_htsengine if _global_htsengine is None: - _global_htsengine = HTSEngine(DEFAULT_HTS_VOICE.encode(path_encoding)) + _global_htsengine = HTSEngine(DEFAULT_HTS_VOICE) sr = _global_htsengine.get_sampling_frequency() _global_htsengine.set_speed(speed) _global_htsengine.add_half_tone(half_tone) @@ -171,7 +165,7 @@ def run_frontend(text, verbose=0): global _global_jtalk if _global_jtalk is None: _lazy_init() - _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding)) + _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR) return _global_jtalk.run_frontend(text, verbose) @@ -187,7 +181,7 @@ def create_user_dict(path, out_path): _lazy_init() if not exists(path): raise ValueError("no such file or directory: %s" % path) - CreateUserDict(OPEN_JTALK_DICT_DIR.encode(path_encoding), path.encode(path_encoding), out_path.encode(path_encoding)) + CreateUserDict(OPEN_JTALK_DICT_DIR, path.encode("utf-8"), out_path.encode("utf-8")) def set_user_dict(path): @@ -202,7 +196,7 @@ def set_user_dict(path): if not exists(path): raise ValueError("no such file or directory: %s" % path) _global_jtalk = OpenJTalk( - dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding), user_mecab=path.encode(path_encoding) + dn_mecab=OPEN_JTALK_DICT_DIR, user_mecab=path.encode("utf-8") ) @@ -211,4 +205,4 @@ def unset_user_dict(): global _global_jtalk if _global_jtalk is None: _lazy_init() - _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR.encode(path_encoding)) \ No newline at end of file + _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR) \ No newline at end of file From 827a3fc5c7dda7bbe832c0c69da98e39cc8cb2c3 Mon Sep 17 00:00:00 2001 From: aoirint Date: Thu, 2 Feb 2023 11:48:03 +0900 Subject: [PATCH 11/13] Merge r9y9 0.3.0 (#16) * Workaround the installation issue #27 Not sure about the exact root cause, but the error messages suggested that there's issues with setuptools/pip. I can confirm we can fix the issue by changing the build-time setuptools requirement to * Update pyopenjtalk/__init__.py Co-authored-by: Ryuichi Yamamoto * Update pyopenjtalk/__init__.py Co-authored-by: Ryuichi Yamamoto * add link * prep for release --------- Co-authored-by: Ryuichi Yamamoto Co-authored-by: park.byeongseon Co-authored-by: bgsn.pk --- .github/workflows/ci.yaml | 17 +++++- README.md | 21 ++++++- docs/changelog.rst | 10 ++++ docs/index.rst | 18 ++++++ docs/pyopenjtalk.rst | 2 + pyopenjtalk/__init__.py | 81 +++++++++++++++++++++++---- pyopenjtalk/openjtalk.pyx | 114 +++++++++++++++++++++++++------------- pyopenjtalk/utils.py | 21 +++++++ pyproject.toml | 10 ++-- setup.py | 8 ++- tests/test_openjtalk.py | 45 +++++++++++---- 11 files changed, 275 insertions(+), 72 deletions(-) create mode 100644 pyopenjtalk/utils.py diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index f4b97e3..8b22dcd 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -15,8 +15,19 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, windows-latest] - python-version: [3.7, 3.8, 3.9] + include: + - os: ubuntu-latest + python-version: 3.7 + - os: ubuntu-latest + python-version: 3.8 + - os: ubuntu-latest + python-version: 3.9 + - os: ubuntu-latest + python-version: '3.10' + - os: macos-latest + python-version: 3.9 + - os: windows-latest + python-version: 3.9 steps: - uses: actions/checkout@v2 @@ -36,4 +47,4 @@ jobs: - name: Test with pytest run: | pip install pytest - pytest \ No newline at end of file + pytest diff --git a/README.md b/README.md index 46b462a..b6b0f70 100644 --- a/README.md +++ b/README.md @@ -105,6 +105,24 @@ In [3]: pyopenjtalk.g2p("こんにちは", kana=True) Out[3]: 'コンニチワ' ``` +### About `run_marine` option + +After v0.3.0, the `run_marine` option has been available for estimating the Japanese accent with the DNN-based method (see [marine](https://github.com/6gsn/marine)). If you want to use the feature, please install pyopenjtalk as below; + +```shell +pip install pyopenjtalk[marine] +``` + +And then, you can use the option as the following examples; + +```python +In [1]: import pyopenjtalk + +In [2]: x, sr = pyopenjtalk.tts("おめでとうございます", run_marine=True) # for TTS + +In [3]: label = pyopenjtalk.extract_fullcontext("こんにちは", run_marine=True) # for text processing frontend only +``` + ### Create/Apply user dictionary 1. Create a CSV file (e.g. `user.csv`) and write custom words like below: @@ -139,7 +157,8 @@ done! - pyopenjtalk: MIT license ([LICENSE.md](LICENSE.md)) - Open JTalk: Modified BSD license ([COPYING](https://github.com/r9y9/open_jtalk/blob/1.10/src/COPYING)) - htsvoice in this repository: Please check [pyopenjtalk/htsvoice/README.md](pyopenjtalk/htsvoice/README.md). +- marine: Apache 2.0 license ([LICENSE](https://github.com/6gsn/marine/blob/main/LICENSE)) ## Acknowledgements -HTS Working Group for their dedicated efforts to develop and maintain Open JTalk. \ No newline at end of file +HTS Working Group for their dedicated efforts to develop and maintain Open JTalk. diff --git a/docs/changelog.rst b/docs/changelog.rst index 012d042..cfb72f8 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -1,6 +1,14 @@ Change log ========== +v0.3.0 <2022-09-20> +------------------- + +Newer numpy (>v1.20.0) is required to avoid ABI compatibility issues. Please check the updated installation guide. + +* `#40`_: Introduce marine for Japanese accent estimation. Note that there could be a breakpoint regarding `run_frontend` because this PR changed the behavior of the API. +* `#35`_: Fixes for Python 3.10. + v0.2.0 <2022-02-06> ------------------- @@ -82,3 +90,5 @@ Initial release with OpenJTalk's text processsing functionality .. _#25: https://github.com/r9y9/pyopenjtalk/pull/25 .. _#27: https://github.com/r9y9/pyopenjtalk/issues/27 .. _#29: https://github.com/r9y9/pyopenjtalk/pull/29 +.. _#35: https://github.com/r9y9/pyopenjtalk/pull/35 +.. _#40: https://github.com/r9y9/pyopenjtalk/pull/40 diff --git a/docs/index.rst b/docs/index.rst index 441c879..f9040d0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -24,6 +24,24 @@ The latest release is availabe on pypi. You can install it by: pip install pyopenjtalk + +Workaround for ``ValueError: numpy.ndarray size changed, may indicate binary incompatibility`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This type of errors comes from the Numpys' ABI breaking changes. If you see ``ValueError: numpy.ndarray size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject`` or similar, please make sure to install numpy first, and then install pyopenjtalk by: + +.. code:: + + pip install pyopenjtalk --no-build-isolation + +or: + +.. code:: + + pip install git+https://github.com/r9y9/pyopenjtalk --no-build-isolation + +The option ``--no-build-isolation`` tells pip not to create a build environment, so the pre-installed numpy is used to build the packge. Hense there should be no Numpy's ABI issues. + .. toctree:: :maxdepth: 1 :caption: Notebooks diff --git a/docs/pyopenjtalk.rst b/docs/pyopenjtalk.rst index 370d2f5..5e03e7b 100644 --- a/docs/pyopenjtalk.rst +++ b/docs/pyopenjtalk.rst @@ -25,3 +25,5 @@ Misc ---- .. autofunction:: run_frontend +.. autofunction:: make_label +.. autofunction:: estimate_accent diff --git a/pyopenjtalk/__init__.py b/pyopenjtalk/__init__.py index 09a8e94..1716ff9 100644 --- a/pyopenjtalk/__init__.py +++ b/pyopenjtalk/__init__.py @@ -19,6 +19,7 @@ from .htsengine import HTSEngine from .openjtalk import CreateUserDict, OpenJTalk +from .utils import merge_njd_marine_features # Dictionary directory # defaults to the package directory where the dictionary will be automatically downloaded @@ -39,6 +40,8 @@ # Global instance of HTSEngine # mei_normal.voice is used as default _global_htsengine = None +# Global instance of Marine +_global_marine = None # https://github.com/tqdm/tqdm#hooks-and-callbacks @@ -98,18 +101,53 @@ def g2p(*args, **kwargs): return _global_jtalk.g2p(*args, **kwargs) -def extract_fullcontext(text): +def estimate_accent(njd_features): + """Accent estimation using marine + + This function requires marine (https://github.com/6gsn/marine) + + Args: + njd_result (list): features generated by OpenJTalk. + + Returns: + list: features for NJDNode with estimation results by marine. + """ + global _global_marine + if _global_marine is None: + try: + from marine.predict import Predictor + except BaseException: + raise ImportError( + "Please install marine by `pip install pyopenjtalk[marine]`" + ) + _global_marine = Predictor() + from marine.utils.openjtalk_util import convert_njd_feature_to_marine_feature + + marine_feature = convert_njd_feature_to_marine_feature(njd_features) + marine_results = _global_marine.predict( + [marine_feature], require_open_jtalk_format=True + ) + njd_features = merge_njd_marine_features(njd_features, marine_results) + return njd_features + + +def extract_fullcontext(text, run_marine=False): """Extract full-context labels from text Args: text (str): Input text + run_marine (bool): Whether to estimate accent using marine. + Default is False. If you want to activate this option, you need to install marine + by `pip install pyopenjtalk[marine]` Returns: list: List of full-context labels """ - # note: drop first return - _, labels = run_frontend(text) - return labels + + njd_features = run_frontend(text) + if run_marine: + njd_features = estimate_accent(njd_features) + return make_label(njd_features) def synthesize(labels, speed=1.0, half_tone=0.0): @@ -136,37 +174,56 @@ def synthesize(labels, speed=1.0, half_tone=0.0): return _global_htsengine.synthesize(labels), sr -def tts(text, speed=1.0, half_tone=0.0): +def tts(text, speed=1.0, half_tone=0.0, run_marine=False): """Text-to-speech Args: text (str): Input text speed (float): speech speed rate. Default is 1.0. half_tone (float): additional half-tone. Default is 0. + run_marine (bool): Whether to estimate accent using marine. + Default is False. If you want activate this option, you need to install marine + by `pip install pyopenjtalk[marine]` Returns: np.ndarray: speech waveform (dtype: np.float64) int: sampling frequency (defualt: 48000) """ - return synthesize(extract_fullcontext(text), speed, half_tone) + return synthesize( + extract_fullcontext(text, run_marine=run_marine), speed, half_tone + ) -def run_frontend(text, verbose=0): +def run_frontend(text): """Run OpenJTalk's text processing frontend Args: text (str): Unicode Japanese text. - verbose (int): Verbosity. Default is 0. Returns: - tuple: Pair of 1) NJD_print and 2) JPCommon_make_label. - The latter is the full-context labels in HTS-style format. + list: features for NJDNode. + """ + global _global_jtalk + if _global_jtalk is None: + _lazy_init() + _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR) + return _global_jtalk.run_frontend(text) + + +def make_label(njd_features): + """Make full-context label using features + + Args: + njd_features (list): features for NJDNode. + + Returns: + list: full-context labels. """ global _global_jtalk if _global_jtalk is None: _lazy_init() _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR) - return _global_jtalk.run_frontend(text, verbose) + return _global_jtalk.make_label(njd_features) def create_user_dict(path, out_path): @@ -205,4 +262,4 @@ def unset_user_dict(): global _global_jtalk if _global_jtalk is None: _lazy_init() - _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR) \ No newline at end of file + _global_jtalk = OpenJTalk(dn_mecab=OPEN_JTALK_DICT_DIR) diff --git a/pyopenjtalk/openjtalk.pyx b/pyopenjtalk/openjtalk.pyx index 816f655..650660c 100644 --- a/pyopenjtalk/openjtalk.pyx +++ b/pyopenjtalk/openjtalk.pyx @@ -9,6 +9,7 @@ cimport numpy as np np.import_array() cimport cython +from libc.stdlib cimport calloc from openjtalk.mecab cimport Mecab, Mecab_initialize, Mecab_load, Mecab_analysis from openjtalk.mecab cimport Mecab_get_feature, Mecab_get_size, Mecab_refresh, Mecab_clear @@ -96,32 +97,57 @@ cdef njd_node_get_chain_flag(_njd.NJDNode* node): return _njd.NJDNode_get_chain_flag(node) -cdef njd_node_print(_njd.NJDNode* node): - return "{},{},{},{},{},{},{},{},{},{},{}/{},{},{}".format( - njd_node_get_string(node), - njd_node_get_pos(node), - njd_node_get_pos_group1(node), - njd_node_get_pos_group2(node), - njd_node_get_pos_group3(node), - njd_node_get_ctype(node), - njd_node_get_cform(node), - njd_node_get_orig(node), - njd_node_get_read(node), - njd_node_get_pron(node), - njd_node_get_acc(node), - njd_node_get_mora_size(node), - njd_node_get_chain_rule(node), - njd_node_get_chain_flag(node) - ) - - -cdef njd_print(_njd.NJD* njd): +cdef node2feature(_njd.NJDNode* node): + return { + "string": njd_node_get_string(node), + "pos": njd_node_get_pos(node), + "pos_group1": njd_node_get_pos_group1(node), + "pos_group2": njd_node_get_pos_group2(node), + "pos_group3": njd_node_get_pos_group3(node), + "ctype": njd_node_get_ctype(node), + "cform": njd_node_get_cform(node), + "orig": njd_node_get_orig(node), + "read": njd_node_get_read(node), + "pron": njd_node_get_pron(node), + "acc": njd_node_get_acc(node), + "mora_size": njd_node_get_mora_size(node), + "chain_rule": njd_node_get_chain_rule(node), + "chain_flag": njd_node_get_chain_flag(node), + } + + +cdef njd2feature(_njd.NJD* njd): cdef _njd.NJDNode* node = njd.head - njd_results = [] + features = [] while node is not NULL: - njd_results.append(njd_node_print(node)) + features.append(node2feature(node)) node = node.next - return njd_results + return features + + +cdef feature2njd(_njd.NJD* njd, features): + cdef _njd.NJDNode* node + + for feature_node in features: + node = <_njd.NJDNode *> calloc(1, sizeof(_njd.NJDNode)) + _njd.NJDNode_initialize(node) + # set values + _njd.NJDNode_set_string(node, feature_node["string"].encode("utf-8")) + _njd.NJDNode_set_pos(node, feature_node["pos"].encode("utf-8")) + _njd.NJDNode_set_pos_group1(node, feature_node["pos_group1"].encode("utf-8")) + _njd.NJDNode_set_pos_group2(node, feature_node["pos_group2"].encode("utf-8")) + _njd.NJDNode_set_pos_group3(node, feature_node["pos_group3"].encode("utf-8")) + _njd.NJDNode_set_ctype(node, feature_node["ctype"].encode("utf-8")) + _njd.NJDNode_set_cform(node, feature_node["cform"].encode("utf-8")) + _njd.NJDNode_set_orig(node, feature_node["orig"].encode("utf-8")) + _njd.NJDNode_set_read(node, feature_node["read"].encode("utf-8")) + _njd.NJDNode_set_pron(node, feature_node["pron"].encode("utf-8")) + _njd.NJDNode_set_acc(node, feature_node["acc"]) + _njd.NJDNode_set_mora_size(node, feature_node["mora_size"]) + _njd.NJDNode_set_chain_rule(node, feature_node["chain_rule"].encode("utf-8")) + _njd.NJDNode_set_chain_flag(node, feature_node["chain_flag"]) + _njd.NJD_push_node(njd, node) + cdef class OpenJTalk(object): """OpenJTalk @@ -160,12 +186,13 @@ cdef class OpenJTalk(object): return Mecab_load_ex(self.mecab, dn_mecab, user_mecab) - def run_frontend(self, text, verbose=0): + def run_frontend(self, text): """Run OpenJTalk's text processing frontend """ - if isinstance(text, str): - text = text.encode("utf-8") cdef char buff[8192] + if isinstance(text, str): + text = text.encode("utf-8") + cdef int result = text2mecab(buff, 8192, text) if result != 0: if result == errno.ERANGE: @@ -173,6 +200,7 @@ cdef class OpenJTalk(object): if result == errno.EINVAL: raise RuntimeError("Invalid input for text2mecab") raise RuntimeError("Unknown error: " + str(result)) + Mecab_analysis(self.mecab, buff) mecab2njd(self.njd, Mecab_get_feature(self.mecab), Mecab_get_size(self.mecab)) _njd.njd_set_pronunciation(self.njd) @@ -181,7 +209,20 @@ cdef class OpenJTalk(object): _njd.njd_set_accent_type(self.njd) _njd.njd_set_unvoiced_vowel(self.njd) _njd.njd_set_long_vowel(self.njd) + features = njd2feature(self.njd) + + # Note that this will release memory for njd feature + NJD_refresh(self.njd) + Mecab_refresh(self.mecab) + + return features + + def make_label(self, features): + """Make full-context label + """ + feature2njd(self.njd, features) njd2jpcommon(self.jpcommon, self.njd) + JPCommon_make_label(self.jpcommon) cdef int label_size = JPCommon_get_label_size(self.jpcommon) @@ -194,23 +235,19 @@ cdef class OpenJTalk(object): # http://cython.readthedocs.io/en/latest/src/tutorial/strings.html labels.append(label_feature[i]) - njd_results = njd_print(self.njd) - - if verbose > 0: - NJD_print(self.njd) - # Note that this will release memory for label feature JPCommon_refresh(self.jpcommon) NJD_refresh(self.njd) - Mecab_refresh(self.mecab) - return njd_results, labels + return labels def g2p(self, text, kana=False, join=True): """Grapheme-to-phoeneme (G2P) conversion """ - njd_results, labels = self.run_frontend(text) + njd_features = self.run_frontend(text) + if not kana: + labels = self.make_label(njd_features) prons = list(map(lambda s: s.split("-")[1].split("+")[0], labels[1:-1])) if join: prons = " ".join(prons) @@ -218,12 +255,11 @@ cdef class OpenJTalk(object): # kana prons = [] - for n in njd_results: - row = n.split(",") - if row[1] == "記号": - p = row[0] + for n in njd_features: + if n["pos"] == "記号": + p = n["string"] else: - p = row[9] + p = n["pron"] # remove special chars for c in "’": p = p.replace(c,"") diff --git a/pyopenjtalk/utils.py b/pyopenjtalk/utils.py new file mode 100644 index 0000000..7aeb1bf --- /dev/null +++ b/pyopenjtalk/utils.py @@ -0,0 +1,21 @@ +def merge_njd_marine_features(njd_features, marine_results): + features = [] + + marine_accs = marine_results["accent_status"] + marine_chain_flags = marine_results["accent_phrase_boundary"] + + assert ( + len(njd_features) == len(marine_accs) == len(marine_chain_flags) + ), "Invalid sequence sizes in njd_results, marine_results" + + for node_index, njd_feature in enumerate(njd_features): + _feature = {} + for feature_key in njd_feature.keys(): + if feature_key == "acc": + _feature["acc"] = int(marine_accs[node_index]) + elif feature_key == "chain_flag": + _feature[feature_key] = int(marine_chain_flags[node_index]) + else: + _feature[feature_key] = njd_feature[feature_key] + features.append(_feature) + return features diff --git a/pyproject.toml b/pyproject.toml index 8d1266b..212e445 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,13 @@ [build-system] requires = [ "wheel", - "setuptools", - "cython>=0.21.0", - "numpy=0.28.0", + "numpy>=1.20.0", ] [tool.pysen] -version = "0.9" +version = "0.10.2" [tool.pysen.lint] enable_black = true @@ -18,4 +18,4 @@ mypy_preset = "strict" line_length = 88 py_version = "py37" [[tool.pysen.lint.mypy_targets]] - paths = ["."] \ No newline at end of file + paths = ["."] diff --git a/setup.py b/setup.py index 6f4af65..62ec246 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ platform_is_windows = sys.platform == "win32" -version = "0.2.0" +version = "0.3.0" min_cython_ver = "0.21.0" try: @@ -276,7 +276,7 @@ def run(self): ext_modules=ext_modules, cmdclass=cmdclass, install_requires=[ - "numpy >= 1.8.0", + "numpy >= 1.20.0", "cython >= " + min_cython_ver, "six", "tqdm", @@ -296,11 +296,14 @@ def run(self): "types-setuptools", "mypy<=0.910", "black>=19.19b0,<=20.8", + "click<8.1.0", "flake8>=3.7,<4", "flake8-bugbear", "isort>=4.3,<5.2.0", + "types-decorator", ], "test": ["pytest", "scipy"], + "marine": ["marine>=0.0.5"], }, classifiers=[ "Operating System :: POSIX", @@ -313,6 +316,7 @@ def run(self): "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", "License :: OSI Approved :: MIT License", "Topic :: Scientific/Engineering", "Topic :: Software Development", diff --git a/tests/test_openjtalk.py b/tests/test_openjtalk.py index 56b4a80..0f43363 100644 --- a/tests/test_openjtalk.py +++ b/tests/test_openjtalk.py @@ -1,10 +1,9 @@ import pyopenjtalk -def _print_results(njd_results, labels): - for n in njd_results: - row = n.split(",") - s, p = row[0], row[9] +def _print_results(njd_features, labels): + for f in njd_features: + s, p = f["string"], f["pron"] print(s, p) for label in labels: @@ -12,12 +11,37 @@ def _print_results(njd_results, labels): def test_hello(): - njd_results, labels = pyopenjtalk.run_frontend("こんにちは") - _print_results(njd_results, labels) + njd_features = pyopenjtalk.run_frontend("こんにちは") + labels = pyopenjtalk.make_label(njd_features) + _print_results(njd_features, labels) + + +def test_njd_features(): + njd_features = pyopenjtalk.run_frontend("こんにちは") + expected_feature = [ + { + "string": "こんにちは", + "pos": "感動詞", + "pos_group1": "*", + "pos_group2": "*", + "pos_group3": "*", + "ctype": "*", + "cform": "*", + "orig": "こんにちは", + "read": "コンニチハ", + "pron": "コンニチワ", + "acc": 0, + "mora_size": 5, + "chain_rule": "-1", + "chain_flag": -1, + } + ] + assert njd_features == expected_feature def test_fullcontext(): - _, labels = pyopenjtalk.run_frontend("こんにちは") + features = pyopenjtalk.run_frontend("こんにちは") + labels = pyopenjtalk.make_label(features) labels2 = pyopenjtalk.extract_fullcontext("こんにちは") for a, b in zip(labels, labels2): assert a == b @@ -30,10 +54,11 @@ def test_jtalk(): "どんまい!", "パソコンのとりあえず知っておきたい使い方", ]: - njd_results, labels = pyopenjtalk.run_frontend(text) - _print_results(njd_results, labels) + njd_features = pyopenjtalk.run_frontend(text) + labels = pyopenjtalk.make_label(njd_features) + _print_results(njd_features, labels) - surface = "".join(map(lambda s: s.split(",")[0], njd_results)) + surface = "".join(map(lambda f: f["string"], njd_features)) assert surface == text From acd4f02d2af3129382c151590238b9370465e360 Mon Sep 17 00:00:00 2001 From: sabonerune <102559104+sabonerune@users.noreply.github.com> Date: Tue, 18 Jul 2023 19:56:24 +0900 Subject: [PATCH 12/13] =?UTF-8?q?BLD:=20=E3=83=93=E3=83=AB=E3=83=89?= =?UTF-8?q?=E6=99=82=E3=81=AB=E4=BD=BF=E7=94=A8=E3=81=99=E3=82=8Bcython?= =?UTF-8?q?=E3=83=90=E3=83=BC=E3=82=B8=E3=83=A7=E3=83=B3=E3=82=92=E5=88=B6?= =?UTF-8?q?=E9=99=90=E3=81=99=E3=82=8B=20(#18)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Hiroshiba --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 212e445..39c1428 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ requires = [ "wheel", "setuptools=0.28.0", + "cython>=0.28.0, <3.0", # NOTE: https://github.com/r9y9/pyopenjtalk/issues/55 "numpy>=1.20.0", ] From ba5a316a694b66aa4db91f3b5b05bf49b5dafb00 Mon Sep 17 00:00:00 2001 From: My <84212641+My-MC@users.noreply.github.com> Date: Fri, 8 Sep 2023 23:41:34 +0900 Subject: [PATCH 13/13] =?UTF-8?q?MSVC=2014.37=E3=81=AE=E4=BB=95=E6=A7=98?= =?UTF-8?q?=E5=A4=89=E6=9B=B4=E3=81=B8=E3=81=AE=E5=AF=BE=E5=BF=9C=20(#19)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/open_jtalk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/open_jtalk b/lib/open_jtalk index acd0cc6..78e1829 160000 --- a/lib/open_jtalk +++ b/lib/open_jtalk @@ -1 +1 @@ -Subproject commit acd0cc63ab63d09ea519a39462a2cbc9bc8698d0 +Subproject commit 78e182970ff4eaa032b8e81f0c4ddba3fdd8e73a