From 72bb56b0727269b7349578ca3ca222bbb84068d7 Mon Sep 17 00:00:00 2001 From: qqaatw Date: Fri, 6 Aug 2021 16:07:59 +0800 Subject: [PATCH 1/5] Add option to decide whether or not to pin memory. --- ckip_transformers/nlp/util.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ckip_transformers/nlp/util.py b/ckip_transformers/nlp/util.py index 0e7fbf1..b154435 100644 --- a/ckip_transformers/nlp/util.py +++ b/ckip_transformers/nlp/util.py @@ -98,6 +98,7 @@ def __call__( batch_size: int = 256, max_length: Optional[int] = None, show_progress: bool = True, + pin_memory: bool = True, ): """Call the driver. @@ -114,8 +115,11 @@ def __call__( max_length : ``int``, *optional* The maximum length of the sentence, must not longer then the maximum sequence length for this model (i.e. ``tokenizer.model_max_length``). - show_progress : ``int``, *optional*, defaults to True + show_progress : ``bool``, *optional*, defaults to True Show progress bar. + pin_memory : ``bool``, *optional*, defaults to True + Pin memory in order to accelerate the speed of data transfer to the GPU. This option is + incompatible with multiprocessing. """ model_max_length = self.tokenizer.model_max_length - 2 # Add [CLS] and [SEP] @@ -170,7 +174,7 @@ def __call__( batch_size=batch_size, shuffle=False, drop_last=False, - pin_memory=True, + pin_memory=pin_memory, ) if show_progress: dataloader = tqdm(dataloader, desc="Inference") From 7ba71aa739ab8727b0ab950c4a5652f35ff0ea29 Mon Sep 17 00:00:00 2001 From: Mu Yang Date: Sun, 8 Aug 2021 17:46:50 +0800 Subject: [PATCH 2/5] Update version number. --- ckip_transformers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckip_transformers/__init__.py b/ckip_transformers/__init__.py index 7c580ad..d740c4b 100644 --- a/ckip_transformers/__init__.py +++ b/ckip_transformers/__init__.py @@ -10,7 +10,7 @@ __copyright__ = "2020 CKIP Lab" __title__ = "CKIP Transformers" -__version__ = "0.2.6" +__version__ = "0.2.7" __description__ = "CKIP Transformers" __license__ = "GPL-3.0" From 6dbaeccadc6283d92c4854fa06a6d7d5388b6520 Mon Sep 17 00:00:00 2001 From: Mu Yang Date: Sun, 8 Aug 2021 17:47:06 +0800 Subject: [PATCH 3/5] Format setup.py --- setup.py | 55 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/setup.py b/setup.py index a604cce..5554c5c 100644 --- a/setup.py +++ b/setup.py @@ -1,56 +1,63 @@ #!/usr/bin/env python3 # -*- coding:utf-8 -*- -__author__ = 'Mu Yang ' -__copyright__ = '2020 CKIP Lab' -__license__ = 'GPL-3.0' +__author__ = "Mu Yang " +__copyright__ = "2020 CKIP Lab" +__license__ = "GPL-3.0" from setuptools import setup, find_namespace_packages import ckip_transformers as about ################################################################################ + def main(): - with open('README.rst', encoding='utf-8') as fin: + with open("README.rst", encoding="utf-8") as fin: readme = fin.read() setup( - name='ckip-transformers', + name="ckip-transformers", version=about.__version__, author=about.__author_name__, author_email=about.__author_email__, description=about.__description__, long_description=readme, - long_description_content_type='text/x-rst', + long_description_content_type="text/x-rst", url=about.__url__, download_url=about.__download_url__, - platforms=['linux_x86_64'], + platforms=["linux_x86_64"], license=about.__license__, classifiers=[ - 'Development Status :: 4 - Beta', - 'Environment :: Console', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3 :: Only', - 'License :: OSI Approved :: GNU General Public License v3 (GPLv3)', - 'Operating System :: POSIX :: Linux', - 'Natural Language :: Chinese (Traditional)', + "Development Status :: 4 - Beta", + "Environment :: Console", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3 :: Only", + "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", + "Operating System :: POSIX :: Linux", + "Natural Language :: Chinese (Traditional)", ], - python_requires='>=3.6', - packages=find_namespace_packages(include=['ckip_transformers', 'ckip_transformers.*',]), + python_requires=">=3.6", + packages=find_namespace_packages( + include=[ + "ckip_transformers", + "ckip_transformers.*", + ] + ), install_requires=[ - 'torch>=1.5.0', - 'tqdm>=4.27', - 'transformers>=3.5.0', + "torch>=1.5.0", + "tqdm>=4.27", + "transformers>=3.5.0", ], data_files=[], ) + ################################################################################ -if __name__ == '__main__': +if __name__ == "__main__": main() From 5a5a6d5db555c1b44a1f06582bb1299a6b954f12 Mon Sep 17 00:00:00 2001 From: Mu Yang Date: Sun, 8 Aug 2021 17:47:33 +0800 Subject: [PATCH 4/5] Fix typo. --- ckip_transformers/nlp/driver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ckip_transformers/nlp/driver.py b/ckip_transformers/nlp/driver.py index 868b0dc..d37697a 100644 --- a/ckip_transformers/nlp/driver.py +++ b/ckip_transformers/nlp/driver.py @@ -78,7 +78,7 @@ def __call__( Returns ------- - ``List[List[NerToken]]`` + ``List[List[str]]`` A list of list of words (``str``). """ @@ -174,7 +174,7 @@ def __call__( Returns ------- - ``List[List[NerToken]]`` + ``List[List[str]]`` A list of list of POS tags (``str``). """ From 0fb94517bb9b62e4a04551b6cf8cf3cfec1c5f6f Mon Sep 17 00:00:00 2001 From: Mu Yang Date: Sun, 8 Aug 2021 18:05:55 +0800 Subject: [PATCH 5/5] Update docs. --- .gitignore | 3 +++ DEVELOP.md | 1 + ckip_transformers/nlp/driver.py | 9 +++++++++ docs/.gitignore | 1 + 4 files changed, 14 insertions(+) create mode 100644 docs/.gitignore diff --git a/.gitignore b/.gitignore index b6e4761..db4d280 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +venv +activate + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/DEVELOP.md b/DEVELOP.md index acef199..b054858 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -3,6 +3,7 @@ - make sure requirements.txt and test/requirements.txt matches setup.py. - >> make clean +- >> make format - >> make lint - >> make doc - >> make tox diff --git a/ckip_transformers/nlp/driver.py b/ckip_transformers/nlp/driver.py index d37697a..62ef5ff 100644 --- a/ckip_transformers/nlp/driver.py +++ b/ckip_transformers/nlp/driver.py @@ -75,6 +75,9 @@ def __call__( must not longer then the maximum sequence length for this model (i.e. ``tokenizer.model_max_length``). show_progress : ``int``, *optional*, defaults to True Show progress bar. + pin_memory : ``bool``, *optional*, defaults to True + Pin memory in order to accelerate the speed of data transfer to the GPU. This option is + incompatible with multiprocessing. Returns ------- @@ -171,6 +174,9 @@ def __call__( must not longer then the maximum sequence length for this model (i.e. ``tokenizer.model_max_length``). show_progress : ``int``, *optional*, defaults to True Show progress bar. + pin_memory : ``bool``, *optional*, defaults to True + Pin memory in order to accelerate the speed of data transfer to the GPU. This option is + incompatible with multiprocessing. Returns ------- @@ -257,6 +263,9 @@ def __call__( must not longer then the maximum sequence length for this model (i.e. ``tokenizer.model_max_length``). show_progress : ``int``, *optional*, defaults to True Show progress bar. + pin_memory : ``bool``, *optional*, defaults to True + Pin memory in order to accelerate the speed of data transfer to the GPU. This option is + incompatible with multiprocessing. Returns ------- diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 0000000..e646368 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1 @@ +_api \ No newline at end of file