Merge pull request #54 from Kensuke-Mitsuzawa/enhancement/#53

cleaned up type hint
Kensuke-Mitsuzawa · Jan 21, 2019 · 45af698 · 45af698
2 parents 76e2e5c + 3ef0cf0
commit 45af698
Show file tree

Hide file tree

Showing 11 changed files with 49 additions and 49 deletions.
diff --git a/JapaneseTokenizer/common/juman_utils.py b/JapaneseTokenizer/common/juman_utils.py
@@ -62,4 +62,4 @@ def feature_parser(uni_feature, word_surface):
     else:
         word_stem = word_surface
 
-    return tuple_pos, word_stem
+    return tuple_pos, word_stem
diff --git a/JapaneseTokenizer/common/sever_handler.py b/JapaneseTokenizer/common/sever_handler.py
@@ -7,7 +7,6 @@
 # logger
 from JapaneseTokenizer import init_logger
 import logging
-logger = init_logger.init_logger(logging.getLogger(init_logger.LOGGER_NAME))
 # typing
 from typing import Union
 # else
@@ -17,6 +16,7 @@
 import shutil
 import signal
 import os
+logger = init_logger.init_logger(logging.getLogger(init_logger.LOGGER_NAME))
 
 
 class ProcessDownException(Exception):
@@ -29,8 +29,8 @@ def __init__(self,
                  option=None,
                  pattern='EOS',
                  timeout_second=10):
-        """* Get communication with unix process using pexpect module."""
         # type: (text_type,text_type,text_type,int)->None
+        """* Get communication with unix process using pexpect module."""
         self.command = command
         self.timeout_second = timeout_second
         self.pattern = pattern
@@ -42,10 +42,10 @@ def __del__(self):
             self.process_analyzer.kill(sig=9)
 
     def launch_process(self, command):
+        # type: (Union[bytes,text_type])->None
         """* What you can do
         - It starts process and keep it.
         """
-        # type: (Union[bytes,text_type])->None
         if not self.option is None:
             command_plus_option = self.command + " " + self.option
         else:
@@ -67,7 +67,6 @@ def launch_process(self, command):
                 self.process_id = self.process_analyzer.pid
 
     def restart_process(self):
-        """"""
         # type: ()->None
         if not self.option is None:
             command_plus_option = self.command + " " + self.option
@@ -79,10 +78,10 @@ def restart_process(self):
         self.process_id = self.process_analyzer.pid
 
     def stop_process(self):
+        # type: ()->bool
         """* What you can do
         - You're able to stop the process which this instance has now.
         """
-        # type: ()->bool
         if hasattr(self, "process_analyzer"):
             self.process_analyzer.kill(sig=9)
         else:
@@ -91,11 +90,11 @@ def stop_process(self):
         return True
 
     def __query(self, input_string):
+        # type: (text_type)->text_type
         """* What you can do
         - It takes the result of Juman++
         - This function monitors time which takes for getting the result.
         """
-        # type: (text_type)->text_type
         signal.signal(signal.SIGALRM, self.__notify_handler)
         signal.alarm(self.timeout_second)
         self.process_analyzer.sendline(input_string)
@@ -118,8 +117,6 @@ def __notify_handler(self, signum, frame):
         2. Run restart_process() method when the exception happens.""".format(**{"time": self.timeout_second}))
 
     def query(self, input_string):
-        """* What you can do
-        """
         # type: (text_type)->text_type
         return self.__query(input_string=input_string)
 
@@ -135,6 +132,5 @@ def __init__(self,
         super(JumanppHnadler, self).__init__(command=jumanpp_command, option=option, pattern=pattern, timeout_second=timeout_second)
 
     def launch_jumanpp_process(self, command):
-        """"""
         # type: (text_type)->None
-        return self.launch_process(command)
+        return self.launch_process(command)
diff --git a/JapaneseTokenizer/common/text_preprocess.py b/JapaneseTokenizer/common/text_preprocess.py
@@ -33,14 +33,14 @@ def b(str): return str.encode("utf-8")
 
 
 def denormalize_text(input_text):
+    # type: (text_type)->text_type
     """* What you can do
     - It converts text into standard japanese writing way
 
     * Note
     - hankaku-katakana is to zenkaku-katakana
     - zenkaku-eisu is to hankaku-eisu
     """
-    # type: (text_type)->text_type
     if input_text in STRING_EXCEPTION:
         return input_text
     else:
@@ -54,13 +54,13 @@ def normalize_text(input_text,
                    is_kana=True,
                    is_ascii=True,
                    is_digit=True):
+    # type: (text_type,text_type,text_type,bool,bool,bool,bool)->text_type
     """* What you can do
     - It converts input-text into normalized-text which is good for tokenizer input.
 
     * Params
     - new_line_replaced: a string which replaces from \n string.
     """
-    # type: (text_type,text_type,text_type,bool,bool,bool,bool)->text_type
     if is_replace_eos:
         without_new_line = input_text.replace('\n', new_line_replaced)
     else:
@@ -75,9 +75,9 @@ def normalize_text(input_text,
 
 
 def normalize_text_normal_ipadic(input_text, kana=True, ascii=True, digit=True):
+    # type: (text_type,bool,bool,bool)->text_type
     """
     * All hankaku Katanaka is converted into Zenkaku Katakana
     * All hankaku English alphabet and numberc string are converted into Zenkaku one
     """
-    # type: (text_type,bool,bool,bool)->text_type
-    return jaconv.h2z(input_text, kana=kana, ascii=ascii, digit=digit)
+    return jaconv.h2z(input_text, kana=kana, ascii=ascii, digit=digit)
diff --git a/JapaneseTokenizer/common/timeout_handler.py b/JapaneseTokenizer/common/timeout_handler.py
@@ -1,9 +1,11 @@
 #! -*- coding: utf-8 -*-
 from functools import wraps
 
+
 class TimeoutException(Exception):
     pass
 
+
 def handler_func(msg):
     raise TimeoutException()
 
@@ -26,4 +28,4 @@ def __wrapper(*args, **kwargs):
             signal.alarm(0)
             return result
         return wraps(function)(__wrapper)
-    return __decorator
+    return __decorator
diff --git a/JapaneseTokenizer/datamodels.py b/JapaneseTokenizer/datamodels.py
@@ -4,7 +4,7 @@
 # datemodels #
 from MeCab import Node
 # typing #
-from typing import List, Union, Any, Tuple, Dict, Callable
+from typing import List, Union, Any, Tuple, Dict, Callable, Optional
 from future.utils import text_type, string_types
 import sys
 import six
@@ -23,12 +23,11 @@ def __is_sotpwords(token, stopwords):
 
 
 def __is_valid_pos(pos_tuple, valid_pos):
+    # type: (Tuple[text_type,...],List[Tuple[text_type,...]])->bool
     """This function checks token's pos is with in POS set that user specified.
     If token meets all conditions, Return True; else return False
     """
-    # type: (Tuple[text_type,...],List[Tuple[text_type,...]])->bool
     def is_valid_pos(valid_pos_tuple):
-        """"""
         # type: (Tuple[text_type,...])->bool
         length_valid_pos_tuple = len(valid_pos_tuple)
         if valid_pos_tuple == pos_tuple[:length_valid_pos_tuple]:
@@ -93,9 +92,16 @@ def filter_words(tokenized_obj, valid_pos, stopwords, check_field_name='stem'):
 
 
 class TokenizedResult(object):
-    def __init__(self, node_obj, tuple_pos, word_stem, word_surface,
-                 is_feature=True, is_surface=False, misc_info=None, analyzed_line=None):
-        # type: (Union[Node, None], Union[str, Tuple[text_type, ...], str, str, bool, bool, Union[None, Dict[str, Any]], str])->None
+    def __init__(self,
+                 node_obj,
+                 tuple_pos,
+                 word_stem,
+                 word_surface,
+                 is_feature=True,
+                 is_surface=False,
+                 misc_info=None,
+                 analyzed_line=None):
+        # type: (Optional[Node], Tuple[text_type, ...], str, str, bool, bool, Optional[Dict[str, Any]], str)->None
         assert isinstance(node_obj, (Node, type(None)))
         assert isinstance(tuple_pos, (string_types, tuple))
         assert isinstance(word_stem, (string_types))
@@ -120,12 +126,12 @@ def __init__(self, node_obj, tuple_pos, word_stem, word_surface,
 
 class TokenizedSenetence(object):
     def __init__(self, sentence, tokenized_objects, string_encoding='utf-8'):
+        # type: (text_type, List[TokenizedResult], text_type)->None
         """* Parameters
         - sentence: sentence
         - tokenized_objects: list of TokenizedResult object
         - string_encoding: Encoding type of string type. This option is used only under python2.x
         """
-        # type: (text_type, List[TokenizedResult])->None
         assert isinstance(sentence, text_type)
         assert isinstance(tokenized_objects, list)
 
@@ -137,9 +143,9 @@ def __init__(self, sentence, tokenized_objects, string_encoding='utf-8'):
     def __extend_token_object(self, token_object,
                               is_denormalize=True,
                               func_denormalizer=denormalize_text):
+        # type: (TokenizedResult,bool,Callable[[str],str])->Tuple
         """This method creates dict object from token object.
         """
-        # type: (TokenizedResult,bool,Callable[[str],str])->Tuple[str,...]
         assert isinstance(token_object, TokenizedResult)
 
         if is_denormalize:
@@ -170,14 +176,14 @@ def __extend_token_object(self, token_object,
     def convert_list_object(self,
                             is_denormalize=True,
                             func_denormalizer=denormalize_text):
+        # type: (bool,Callable[[str],str])->List[Union[str, Tuple[str,...]]]
         """* What you can do
         - You extract string object from TokenizedResult object
 
         * Args
         - is_denormalize: boolen object. True; it makes denormalize string
         - func_denormalizer: callable object. de-normalization function.
         """
-        # type: (bool,Callable[[str],str])->List[Union[str, Tuple[str,...]]]
         sentence_in_list_obj = [
             self.__extend_token_object(token_object,is_denormalize,func_denormalizer)
             for token_object
@@ -187,14 +193,14 @@ def convert_list_object(self,
         return sentence_in_list_obj
 
     def __convert_string_type(self, p_c_tuple):
+        # type: (Tuple[text_type,...])->Tuple[text_type]
         """* What you can do
         - it normalizes string types into str
         """
-        # type: (Tuple[text_type,...])->Tuple[text_type]
         if not isinstance(p_c_tuple, tuple):
             raise Exception('Pos condition expects tuple of string. However = {}'.format(p_c_tuple))
 
-        converted = [object] * len(p_c_tuple)
+        converted = [text_type] * len(p_c_tuple)
         for i, pos_element in enumerate(p_c_tuple):
             if six.PY2 and isinstance(pos_element, str):
                 """str into unicode if python2.x"""
@@ -209,11 +215,11 @@ def __convert_string_type(self, p_c_tuple):
         return tuple(converted)
 
     def __check_pos_condition(self, pos_condistion):
+        # type: (List[Tuple[text_type, ...]])->List[Tuple[text_type, ...]]
         """* What you can do
         - Check your pos condition
         - It converts character type into unicode if python version is 2.x
         """
-        # type: (List[Tuple[text_type, ...]])->List[Tuple[text_type, ...]]
         assert isinstance(pos_condistion, list)
 
         return [self.__convert_string_type(p_c_tuple) for p_c_tuple in pos_condistion]
@@ -224,6 +230,7 @@ def filter(self,
                is_normalize=True,
                func_normalizer=normalize_text,
                check_field_name='stem'):
+        # type: (List[Tuple[text_type,...]], List[text_type], bool, Callable[[text_type], text_type],text_type)->FilteredObject
         """* What you can do
         - It filters out token which does NOT meet the conditions (stopwords & part-of-speech tag)
         - Under python2.x, pos_condition & stopwords are converted into unicode type.
@@ -243,7 +250,6 @@ def filter(self,
         >>> pos_condition = [('名詞', '一般'), ('形容詞', '自立'), ('助詞', '格助詞', '一般')]
         >>> stopwords = ['これ', 'それ']
         """
-        # type: (List[Tuple[text_type,...]], List[text_type], bool, Callable[[text_type], text_type],text_type)->FilteredObject
         assert isinstance(pos_condition, (type(None), list))
         assert isinstance(stopwords, (type(None), list))
 
@@ -280,7 +286,6 @@ def filter(self,
 
 class FilteredObject(TokenizedSenetence):
     def __init__(self, sentence, tokenized_objects, pos_condition, stopwords):
-        """"""
         # type: (str, List[TokenizedResult], List[str, ...], List[str])->None
         super(FilteredObject, self).__init__(
             sentence=sentence,

diff --git a/JapaneseTokenizer/juman_wrapper/juman_wrapper.py b/JapaneseTokenizer/juman_wrapper/juman_wrapper.py
@@ -6,7 +6,7 @@
 from JapaneseTokenizer import init_logger
 from JapaneseTokenizer.common.sever_handler import JumanppHnadler
 # else
-from typing import List, Union, Any, Callable, Tuple
+from typing import List, Union, Callable, Tuple
 from six import text_type
 from pyknp import MList
 import logging
@@ -25,7 +25,8 @@
     logger.warning(msg='pyknp is not ready to use. Install first if you would like to use pyknp wrapper.')
 
 if six.PY3:
-    import socket, re
+    import socket
+    import re
 
     class MonkeyPatchSocket(object):
         """* Class for overwriting pyknp.Socket because it is only for python2.x"""
@@ -39,15 +40,14 @@ def __init__(self, hostname, port, option=None):
                 self.sock.send(option)
             data = b""
             while b"OK" not in data:
-                #while isinstance(data, bytes) and b"OK" not in data:
+                # while isinstance(data, bytes) and b"OK" not in data:
                 data = self.sock.recv(1024)
 
         def __del__(self):
             if self.sock:
                 self.sock.close()
 
         def query(self, sentence, pattern):
-            """"""
             # type: (str,str)->str
             assert(isinstance(sentence, six.text_type))
             sentence_bytes = sentence.encode('utf-8').strip()
@@ -74,9 +74,9 @@ def __init__(self,
                  pattern='EOS',
                  is_use_pyknp=False,
                  **args):
+        # type: (text_type, text_type, int, int, text_type, Union[bytes, text_type], Union[bytes, text_type], bool, **str)->None
         """* Class to call Juman tokenizer
         """
-        # type: (text_type,text_type,int,int,text_type,Union[bytes,text_type],Union[bytes,text_type],bool)->None
 
         self.timeout = timeout
         self.pattern = pattern
@@ -101,8 +101,7 @@ def __init__(self,
         else:
             pass
 
-
-        if not server is None:
+        if server is not None:
             # use server mode #
             self.juman = pyknp.Juman(command=command, server=server, port=port,
                                      timeout=self.timeout, rcfile=rcfile, option=option,
@@ -128,10 +127,10 @@ def __del__(self):
                 self.juman.stop_process()
 
     def __monkey_patch_juman_lines(self, input_str):
+        # type: (text_type)->text_type
         """* What you can do
         - It overwrites juman_line() method because this method causes TypeError in python3
         """
-        # type: (text_type,)->text_type
         assert isinstance(self.juman, pyknp.Juman)
         if not self.juman.socket and not self.juman.subprocess:
             if self.juman.server is not None:
@@ -236,4 +235,4 @@ def filter(self, parsed_sentence, pos_condition=None, stopwords=None):
         assert isinstance(pos_condition, (type(None), list))
         assert isinstance(stopwords, (type(None), list))
 
-        return parsed_sentence.filter(pos_condition, stopwords)
+        return parsed_sentence.filter(pos_condition, stopwords)
diff --git a/JapaneseTokenizer/jumanpp_wrapper/jumanpp_wrapper.py b/JapaneseTokenizer/jumanpp_wrapper/jumanpp_wrapper.py
@@ -112,6 +112,7 @@ def __init__(self,
                  port=12000,
                  is_use_pyknp = False,
                  ** args):
+        # type: (text_type,int,text_type,text_type,bool)
         """* What you can do
         - You can select backend process of jumanpp.
             - jumanpp-pexpect: It calls jumanpp on your local machine. It keeps jumanpp process running.
@@ -125,7 +126,6 @@ def __init__(self,
         - server: hostname where jumanpp is running
         - port: port number where jumanpp is running
         """
-        # type: (text_type,int,text_type,text_type,bool)
         self.eos_pattern = pattern
         self.is_use_pyknp = is_use_pyknp