From 5d4698742954072f5586d38330087484e135aa5b Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Fri, 26 Apr 2024 14:17:11 +0700 Subject: [PATCH 1/5] Add save and load --- notebooks/test_gzip_classify.ipynb | 58 ++++++++++++++++++++++++++++-- pythainlp/classify/param_free.py | 22 ++++++++++-- 2 files changed, 75 insertions(+), 5 deletions(-) diff --git a/notebooks/test_gzip_classify.ipynb b/notebooks/test_gzip_classify.ipynb index 573e7c059..a8a0eb45e 100644 --- a/notebooks/test_gzip_classify.ipynb +++ b/notebooks/test_gzip_classify.ipynb @@ -60,11 +60,60 @@ "source": [ "model.predict(\"ฉันดีใจ\", k=1)" ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5a97f0d3", + "metadata": {}, + "outputs": [], + "source": [ + "model.save(\"d.model\")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6e183243", + "metadata": {}, + "outputs": [], + "source": [ + "model2 = pythainlp.classify.param_free.GzipModel(model_path=\"d.model\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "b30af6f0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Positive'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model2.predict(x1=\"ฉันดีใจ\", k=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e72a33b", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "Python 3.8.13 ('base')", "language": "python", "name": "python3" }, @@ -78,7 +127,12 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.8.13" + }, + "vscode": { + "interpreter": { + "hash": "a1d6ff38954a1cdba4cf61ffa51e42f4658fc35985cd256cd89123cae8466a39" + } } }, "nbformat": 4, diff --git a/pythainlp/classify/param_free.py b/pythainlp/classify/param_free.py index 87247da74..119bddb82 100644 --- a/pythainlp/classify/param_free.py +++ b/pythainlp/classify/param_free.py @@ -5,6 +5,7 @@ import gzip from typing import List, Tuple import numpy as np +import json class GzipModel: @@ -16,9 +17,12 @@ class GzipModel: :param list training_data: list [(text_sample,label)] """ - def __init__(self, training_data: List[Tuple[str, str]]): - self.training_data = np.array(training_data) - self.Cx2_list = self.train() + def __init__(self, training_data: List[Tuple[str, str]]=None, model_path=None): + if model_path!=None: + self.load(model_path) + else: + self.training_data = np.array(training_data) + self.Cx2_list = self.train() def train(self): Cx2_list = [] @@ -72,3 +76,15 @@ def predict(self, x1: str, k: int = 1) -> str: predict_class = top_k_class[counts.argmax()] return predict_class + + def save(self, path: str): + with open(path, "w") as f: + json.dump({ + "training_data": self.training_data.tolist(), "Cx2_list":self.Cx2_list + }, f, ensure_ascii=False) + + def load(self, path: str): + with open(path, "r") as f: + data = json.load(f) + self.Cx2_list = data["Cx2_list"] + self.training_data = np.array(data["training_data"]) From 89cde1b6b158892dac133c9b5c05b055210053f2 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Tue, 7 May 2024 00:17:14 +0700 Subject: [PATCH 2/5] Update param_free.py --- pythainlp/classify/param_free.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pythainlp/classify/param_free.py b/pythainlp/classify/param_free.py index 119bddb82..43dea8d98 100644 --- a/pythainlp/classify/param_free.py +++ b/pythainlp/classify/param_free.py @@ -17,7 +17,7 @@ class GzipModel: :param list training_data: list [(text_sample,label)] """ - def __init__(self, training_data: List[Tuple[str, str]]=None, model_path=None): + def __init__(self, training_data: List[Tuple[str, str]]=None, model_path: str=None): if model_path!=None: self.load(model_path) else: @@ -80,7 +80,8 @@ def predict(self, x1: str, k: int = 1) -> str: def save(self, path: str): with open(path, "w") as f: json.dump({ - "training_data": self.training_data.tolist(), "Cx2_list":self.Cx2_list + "training_data": self.training_data.tolist(), + "Cx2_list":self.Cx2_list }, f, ensure_ascii=False) def load(self, path: str): From b09e8bdaa715e0e2f23d7e9d642e701ee90be8e0 Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Tue, 7 May 2024 00:17:23 +0700 Subject: [PATCH 3/5] Update .gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 727432b68..af16c6179 100644 --- a/.gitignore +++ b/.gitignore @@ -117,4 +117,5 @@ cython_debug/ notebooks/iso_11940-dev.ipynb # vscode devcontainer -.devcontainer/ \ No newline at end of file +.devcontainer/ +notebooks/d.model From 62b8c94332f0b2259e7521f5e74bbb735cc8797e Mon Sep 17 00:00:00 2001 From: Wannaphong Date: Fri, 19 Jul 2024 23:23:44 +0700 Subject: [PATCH 4/5] Update param_free.py --- pythainlp/classify/param_free.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pythainlp/classify/param_free.py b/pythainlp/classify/param_free.py index 43dea8d98..7076828c9 100644 --- a/pythainlp/classify/param_free.py +++ b/pythainlp/classify/param_free.py @@ -17,8 +17,8 @@ class GzipModel: :param list training_data: list [(text_sample,label)] """ - def __init__(self, training_data: List[Tuple[str, str]]=None, model_path: str=None): - if model_path!=None: + def __init__(self, training_data: List[Tuple[str, str]] = None, model_path: str = None): + if model_path is not None: self.load(model_path) else: self.training_data = np.array(training_data) @@ -81,11 +81,11 @@ def save(self, path: str): with open(path, "w") as f: json.dump({ "training_data": self.training_data.tolist(), - "Cx2_list":self.Cx2_list + "Cx2_list": self.Cx2_list }, f, ensure_ascii=False) def load(self, path: str): with open(path, "r") as f: data = json.load(f) - self.Cx2_list = data["Cx2_list"] + self.Cx2_list = data["Cx2_list"] self.training_data = np.array(data["training_data"]) From e7a1c82a81fa0b233feb27b84eb96486283b4434 Mon Sep 17 00:00:00 2001 From: Wannaphong Date: Fri, 19 Jul 2024 23:28:02 +0700 Subject: [PATCH 5/5] Update param_free.py --- pythainlp/classify/param_free.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pythainlp/classify/param_free.py b/pythainlp/classify/param_free.py index 7076828c9..e416bf72c 100644 --- a/pythainlp/classify/param_free.py +++ b/pythainlp/classify/param_free.py @@ -15,6 +15,7 @@ class GzipModel: (Jiang et al., Findings 2023) :param list training_data: list [(text_sample,label)] + :param str model_path: Path for loading model (if you saved the model) """ def __init__(self, training_data: List[Tuple[str, str]] = None, model_path: str = None): @@ -78,6 +79,9 @@ def predict(self, x1: str, k: int = 1) -> str: return predict_class def save(self, path: str): + """ + :param str path: path for save model + """ with open(path, "w") as f: json.dump({ "training_data": self.training_data.tolist(),