diff --git a/models/chiasmus_de.pkl b/models/chiasmus_de.pkl deleted file mode 100644 index c20774b..0000000 Binary files a/models/chiasmus_de.pkl and /dev/null differ diff --git a/models/metaphor_de.pkl b/models/metaphor_de.pkl deleted file mode 100644 index bf28ea2..0000000 Binary files a/models/metaphor_de.pkl and /dev/null differ diff --git a/models/metaphor_de.torch b/models/metaphor_de.torch deleted file mode 100644 index 9e835cb..0000000 Binary files a/models/metaphor_de.torch and /dev/null differ diff --git a/setup.cfg b/setup.cfg index 0f7718b..0bd3938 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = freestylo -version = 0.3.0 +version = 0.4.0 description = A tool for stylistic device detection." author = Felix Schneider author_email = felix.schneider@uni-jena.de @@ -36,6 +36,7 @@ install_requires = scipy cltk<1.0 wget + fasttext [options.packages.find] where = src diff --git a/src/freestylo/Configs.py b/src/freestylo/Configs.py index e91ccd8..0133ba9 100644 --- a/src/freestylo/Configs.py +++ b/src/freestylo/Configs.py @@ -2,13 +2,15 @@ import logging import json import wget +import zipfile model_list = [ "chiasmus_de.pkl", "metaphor_de.torch", + "fasttext_mgh.bin.zip", ] -github_model_base = "https://github.com/cvjena/freestylo/raw/refs/heads/main/models/" +model_base_url = "https://www.felixschneider.xyz/download/models/" def get_model_path(model_to_load : str) -> str: if os.path.exists(model_to_load): @@ -36,9 +38,12 @@ def get_model_path(model_to_load : str) -> str: for model in model_list: if not os.path.exists(os.path.join(model_path, model)): - logging.info(f"Downloading model {model} from {github_model_base}") - wget.download(github_model_base+model, model_path) + logging.info(f"Downloading model {model} from {model_base_url}") + wget.download(model_base_url+model, model_path) logging.info("done") + if model.endswith(".zip"): + with zipfile.ZipFile(os.path.join(model_path, model), 'r') as zip_ref: + zip_ref.extractall(model_path) model_to_load = os.path.join(model_path, model_to_load) diff --git a/src/freestylo/MGHPreprocessor.py b/src/freestylo/MGHPreprocessor.py index f905b1a..b385ce1 100644 --- a/src/freestylo/MGHPreprocessor.py +++ b/src/freestylo/MGHPreprocessor.py @@ -1,5 +1,7 @@ import cltk import numpy as np +import fasttext +from freestylo.Configs import get_model_path from cltk.corpus.middle_high_german.alphabet import normalize_middle_high_german from cltk.tag.pos import POSTag @@ -17,6 +19,7 @@ def __init__(self): Constructor for the MGHPreprocessor class. """ self.text = "" + self.model = fasttext.load_model(get_model_path("fasttext_mgh.bin")) pass # make class callable with () @@ -51,7 +54,7 @@ def __call__(self, text): dep = "" - vector = np.zeros(300) + vector = self.model.get_word_vector(word) tokens.append(MGHToken(word, pos, lemma, dep, vector, idx)) diff --git a/test/test_text_object.py b/test/test_text_object.py index fc8a79d..a98d032 100644 --- a/test/test_text_object.py +++ b/test/test_text_object.py @@ -36,6 +36,7 @@ def test_processing(): assert(text.has_tokens()) assert(text.tokens[1] == "erbiten") assert(text.pos[1] == "VERB") + assert(text.vectors[0].size == 300) print(text.tokens)