diff --git a/python/MANIFEST.in b/python/MANIFEST.in index 7d2aced..8677de9 100644 --- a/python/MANIFEST.in +++ b/python/MANIFEST.in @@ -3,3 +3,4 @@ include README.md include requirements.txt include PyKomoran/libs/** include PyKomoran/models_*/** +include PyKomoran/tests/test_data/** \ No newline at end of file diff --git a/python/PyKomoran/core.py b/python/PyKomoran/core.py index 8836f07..a90e41c 100644 --- a/python/PyKomoran/core.py +++ b/python/PyKomoran/core.py @@ -22,13 +22,13 @@ class Komoran: Komoran Wrapper class """ - def __init__(self, model_path="./models_full"): + def __init__(self, model_path="./models_full", max_heap=1024): self._base_path = os.path.dirname(os.path.realpath(__file__)) self._model_path = os.path.abspath(os.path.join(self._base_path, model_path)) assert os.path.exists(self._model_path) - jvm.init_jvm() + jvm.init_jvm(max_heap) self._komoran = jvm.get_jvm().kr.co.shineware.nlp.pykomoran.KomoranEntryPoint() self._komoran.init(self._model_path) diff --git a/python/PyKomoran/jvm.py b/python/PyKomoran/jvm.py index 42f65fa..32f62c9 100644 --- a/python/PyKomoran/jvm.py +++ b/python/PyKomoran/jvm.py @@ -10,7 +10,7 @@ jvm_gateway = None -def init_jvm(jar_path="./libs", max_heap=1024): +def init_jvm(max_heap, jar_path="./libs"): base_path = os.path.dirname(os.path.realpath(__file__)) jar_path = os.path.abspath(os.path.join(base_path, jar_path)) diff --git a/python/PyKomoran/tests/_jvm_test.py b/python/PyKomoran/tests/ajvm_test.py similarity index 63% rename from python/PyKomoran/tests/_jvm_test.py rename to python/PyKomoran/tests/ajvm_test.py index 898a80a..19ca477 100644 --- a/python/PyKomoran/tests/_jvm_test.py +++ b/python/PyKomoran/tests/ajvm_test.py @@ -1,16 +1,17 @@ -""" This test filename has _ prefix because this test should run before running other tests """ +""" This test filename has 'a' prefix because this test should run before running other tests """ import nose + from PyKomoran.jvm import * +global jvm_gateway if jvm_gateway is not None: jvm_gateway.shutdown() test_jvm1 = None -test_jvm2 = None def test_to_before_init_Jvm(): """ - before jvm_init(), jvm_gateway should be None + JVM Test: before jvm_init(), jvm_gateway should be None :return: """ global jvm_gateway @@ -20,7 +21,7 @@ def test_to_before_init_Jvm(): def test_to_get_Jvm_before_init(): """ - before jvm_init(), get_jvm() should be None + JVM Test: before jvm_init(), get_jvm() should be None :return: """ global test_jvm1 @@ -32,25 +33,25 @@ def test_to_get_Jvm_before_init(): def test_to_init_Jvm(): """ - when call jvm_init() first time, jvm object should be returned + JVM Test: when call jvm_init() first time, jvm object should be returned :return: """ global test_jvm1 - test_jvm1 = init_jvm() + test_jvm1 = init_jvm(1024) assert test_jvm1 is not None def test_to_duplicate_init_Jvm(): """ - when call jvm_init() more than once, None should be returned + JVM Test: when call jvm_init() more than once, None should be returned :return: """ global test_jvm1 - test_jvm2 = init_jvm() - test_jvm3 = init_jvm() + test_jvm2 = init_jvm(1024) + test_jvm3 = init_jvm(1024) assert test_jvm1 is not None assert test_jvm2 is None @@ -59,7 +60,7 @@ def test_to_duplicate_init_Jvm(): def test_to_get_Jvm_after_init(): """ - when call get_jvm() after init_jvm(), jvm object should be returned + JVM Test: when call get_jvm() after init_jvm(), jvm object should be returned :return: """ test_jvm2 = get_jvm() @@ -69,7 +70,7 @@ def test_to_get_Jvm_after_init(): def test_to_duplicate_get_Jvm_after_init(): """ - when call get_jvm() after init_jvm() more than once, returned values should be same + JVM Test: when call get_jvm() after init_jvm() more than once, returned values should be same :return: """ global test_jvm1 diff --git a/python/PyKomoran/tests/core_test.py b/python/PyKomoran/tests/core_test.py index 34a8afc..36a77d0 100644 --- a/python/PyKomoran/tests/core_test.py +++ b/python/PyKomoran/tests/core_test.py @@ -1,4 +1,6 @@ +import os import nose + from PyKomoran.core import * from PyKomoran.type import * @@ -8,7 +10,7 @@ def test_to_init_Komoran(): """ - init Komoran with default model (models_full) + Core Test: init Komoran with default model (models_full) :return: """ global komoran @@ -19,19 +21,9 @@ def test_to_init_Komoran(): assert komoran._komoran.isInitialized() -def test_to_set_user_dic(): - # TODO: implement test_to_set_user_dic() test code - pass - - -def test_to_set_fw_dic(): - # TODO: implement test_to_set_fw_dic() test code - pass - - def test_to_analyze_get_nouns(): """ - analyze test string with get_nouns() and check result is as expected + Core Test: analyze with get_nouns() :return: """ global komoran @@ -46,7 +38,7 @@ def test_to_analyze_get_nouns(): def test_to_analyze_get_morphes_by_tags(): """ - analyze test string with get_morphes_by_tags() and check result is as expected + Core Test: analyze with get_morphes_by_tags() :return: """ global komoran @@ -61,7 +53,7 @@ def test_to_analyze_get_morphes_by_tags(): def test_to_analyze_get_morphes_by_invalid_tags(): """ - analyze test string with get_morphes_by_tags(tag_list=['INVALID','POS']) and invalid tag_list and check result is as expected + Core Test: analyze with get_morphes_by_tags(tag_list=['INVALID','POS']) & invalid tag_list :return: """ global komoran @@ -76,7 +68,7 @@ def test_to_analyze_get_morphes_by_invalid_tags(): def test_to_analyze_get_morphes_by_no_given_tags(): """ - analyze test string with get_morphes_by_tags(tag_list=[]) and check result is as expected + Core Test: analyze with get_morphes_by_tags(tag_list=[]) :return: """ global komoran @@ -91,7 +83,7 @@ def test_to_analyze_get_morphes_by_no_given_tags(): def test_to_analyze_get_plain_text(): """ - analyze test string with get_plain_text() and check result is as expected + Core Test: analyze with get_plain_text() :return: """ global komoran @@ -107,7 +99,7 @@ def test_to_analyze_get_plain_text(): def test_to_analyze_get_token_list_with_flatten(): """ - analyze test string with get_token_list(flatten=False,use_pos_name=False) and check result is as expected + Core Test: analyze with get_token_list(flatten=False,use_pos_name=False) :return: """ global komoran @@ -145,7 +137,7 @@ def test_to_analyze_get_token_list_with_flatten(): def test_to_analyze_get_token_list_with_flatten_and_use_pos_name(): """ - analyze test string with get_token_list(flatten=True,use_pos_name=True) and check result is as expected + Core Test: analyze with get_token_list(flatten=True,use_pos_name=True) :return: """ global komoran @@ -183,7 +175,7 @@ def test_to_analyze_get_token_list_with_flatten_and_use_pos_name(): def test_to_analyze_get_token_list_without_flatten(): """ - analyze test string with get_token_list(flatten=False,use_pos_name=False) and check result is as expected + Core Test: analyze with get_token_list(flatten=False,use_pos_name=False) :return: """ global komoran @@ -222,7 +214,7 @@ def test_to_analyze_get_token_list_without_flatten(): def test_to_analyze_get_token_list_without_flatten_and_use_pos_name(): """ - analyze test string with get_token_list(flatten=False,use_pos_name=True) and check result is as expected + Core Test: analyze with get_token_list(flatten=False,use_pos_name=True) :return: """ global komoran @@ -261,7 +253,7 @@ def test_to_analyze_get_token_list_without_flatten_and_use_pos_name(): def test_to_analyze_get_list(): """ - analyze test string with get_list() and check result is as expected + Core Test: analyze with get_list() :return: """ global komoran @@ -291,5 +283,110 @@ def test_to_analyze_get_list(): # @formatter:on +def test_to_set_user_dic(): + """ + Core Test: test with set_user_dic() + :return: + """ + global komoran + + if komoran is None: + komoran = Komoran(model_path='./models_full') + + tokens = komoran.get_token_list("테스트 단어") + + # @formatter:off + assert isinstance(tokens, list) + assert len(tokens) == 2 + assert isinstance(tokens[0], Token) + assert tokens[0] == Token({ + 'morph': '테스트', + 'pos': 'NNP', + 'beginIndex': 0, + 'endIndex': 3 + }) + assert tokens[1] == Token({ + 'morph': '단어', + 'pos': 'NNG', + 'beginIndex': 4, + 'endIndex': 6 + }) + # @formatter:on + + base_path = os.path.dirname(os.path.realpath(__file__)) + komoran.set_user_dic(os.path.join(base_path, "./test_data/dic.user")) + + tokens = komoran.get_token_list("테스트 단어") + + # @formatter:off + assert isinstance(tokens, list) + assert len(tokens) == 1 + assert isinstance(tokens[0], Token) + assert tokens[0] == Token({ + 'morph': '테스트 단어', + 'pos': 'NNP', + 'beginIndex': 0, + 'endIndex': 6 + }) + # @formatter:on + + pass + + +def test_to_set_fw_dic(): + # TODO: implement test_to_set_fw_dic() test code + """ + Core Test: test with set_fw_dic() + :return: + """ + global komoran + + if komoran is None: + komoran = Komoran(model_path='./models_full') + + tokens = komoran.get_token_list("테스트") + + # @formatter:off + assert isinstance(tokens, list) + assert len(tokens) == 1 + assert isinstance(tokens[0], Token) + assert tokens[0] == Token({ + 'morph': '테스트', + 'pos': 'NNP', + 'beginIndex': 0, + 'endIndex': 3 + }) + # @formatter:on + + base_path = os.path.dirname(os.path.realpath(__file__)) + komoran.set_fw_dic(os.path.join(base_path, "./test_data/fwd.user")) + + tokens = komoran.get_token_list("테스트") + + # @formatter:off + assert isinstance(tokens, list) + assert len(tokens) == 3 + assert isinstance(tokens[0], Token) + assert tokens[0] == Token({ + 'morph': '테', + 'pos': 'NNG', + 'beginIndex': 0, + 'endIndex': 3 # TODO: Check and fix KOMORAN + }) + assert tokens[1] == Token({ + 'morph': '스', + 'pos': 'NNG', + 'beginIndex': 0, # TODO: Check and fix KOMORAN + 'endIndex': 3 # TODO: Check and fix KOMORAN + }) + assert tokens[2] == Token({ + 'morph': '트', + 'pos': 'NNG', + 'beginIndex': 0, # TODO: Check and fix KOMORAN + 'endIndex': 3 # TODO: Check and fix KOMORAN + }) + # @formatter:on + + if __name__ == '__main__': nose.runmodule() diff --git a/python/PyKomoran/tests/test_data/dic.user b/python/PyKomoran/tests/test_data/dic.user new file mode 100644 index 0000000..06bd389 --- /dev/null +++ b/python/PyKomoran/tests/test_data/dic.user @@ -0,0 +1 @@ +테스트 단어 \ No newline at end of file diff --git a/python/PyKomoran/tests/test_data/fwd.user b/python/PyKomoran/tests/test_data/fwd.user new file mode 100644 index 0000000..257a0f0 --- /dev/null +++ b/python/PyKomoran/tests/test_data/fwd.user @@ -0,0 +1 @@ +테스트 테/NNG 스/NNG 트/NNG \ No newline at end of file diff --git a/python/PyKomoran/tests/type_test.py b/python/PyKomoran/tests/type_test.py index beaced1..4076a16 100644 --- a/python/PyKomoran/tests/type_test.py +++ b/python/PyKomoran/tests/type_test.py @@ -4,7 +4,7 @@ def test_to_init_Token(): """ - init Token using given Dict, and validate given values with Token methods + Type Test: init Token using given Dict, and validate given values with Token methods :return: """ # @formatter:off @@ -33,7 +33,7 @@ def test_to_init_Token(): def test_to_init_Token_using_Pos_name(): """ - init Token using given Dict with use_pos_name parameter, and validate given values with Token methods + Type Test: init Token using given Dict with use_pos_name parameter, and validate given values with Token methods :return: """ # @formatter:off @@ -62,7 +62,7 @@ def test_to_init_Token_using_Pos_name(): def test_to_init_Pair(): """ - init Pair using given Dict, and validate given values with Pair methods + Type Test: init Pair using given Dict, and validate given values with Pair methods :return: """ # @formatter:off @@ -85,7 +85,7 @@ def test_to_init_Pair(): def test_to_pos_table(): """ - check pos_table is initialized well + Type Test: check pos_table is initialized well :return: """ global pos_table