From 5c0d0e6738930d6be4999895143f2d30c29b7517 Mon Sep 17 00:00:00 2001 From: schneider Date: Sun, 27 Oct 2024 16:02:04 +0100 Subject: [PATCH] add html documentation --- html/stylotool/index.html | 72 ++ html/stylotool/setup.html | 55 ++ .../src/freestylo/AlliterationAnnotation.html | 285 ++++++++ .../src/freestylo/ChiasmusAnnotation.html | 677 ++++++++++++++++++ html/stylotool/src/freestylo/Configs.html | 69 ++ .../src/freestylo/EpiphoraAnnotation.html | 275 +++++++ .../src/freestylo/MGHPreprocessor.html | 269 +++++++ .../src/freestylo/MetaphorAnnotation.html | 308 ++++++++ .../src/freestylo/PolysyndetonAnnotation.html | 319 +++++++++ .../stylotool/src/freestylo/SimilarityNN.html | 169 +++++ html/stylotool/src/freestylo/TextObject.html | 279 ++++++++ .../src/freestylo/TextPreprocessor.html | 198 +++++ .../src/freestylo/freestylo_main.html | 107 +++ html/stylotool/src/freestylo/index.html | 117 +++ html/stylotool/src/index.html | 67 ++ html/stylotool/test/index.html | 92 +++ .../test/test_alliteration_annotation.html | 69 ++ .../test/test_chiasmus_annotation.html | 69 ++ .../test/test_epiphora_annotation.html | 69 ++ .../test/test_metaphor_annotations.html | 69 ++ .../test/test_polysyndeton_annotation.html | 69 ++ html/stylotool/test/test_text_object.html | 69 ++ 22 files changed, 3772 insertions(+) create mode 100644 html/stylotool/index.html create mode 100644 html/stylotool/setup.html create mode 100644 html/stylotool/src/freestylo/AlliterationAnnotation.html create mode 100644 html/stylotool/src/freestylo/ChiasmusAnnotation.html create mode 100644 html/stylotool/src/freestylo/Configs.html create mode 100644 html/stylotool/src/freestylo/EpiphoraAnnotation.html create mode 100644 html/stylotool/src/freestylo/MGHPreprocessor.html create mode 100644 html/stylotool/src/freestylo/MetaphorAnnotation.html create mode 100644 html/stylotool/src/freestylo/PolysyndetonAnnotation.html create mode 100644 html/stylotool/src/freestylo/SimilarityNN.html create mode 100644 html/stylotool/src/freestylo/TextObject.html create mode 100644 html/stylotool/src/freestylo/TextPreprocessor.html create mode 100644 html/stylotool/src/freestylo/freestylo_main.html create mode 100644 html/stylotool/src/freestylo/index.html create mode 100644 html/stylotool/src/index.html create mode 100644 html/stylotool/test/index.html create mode 100644 html/stylotool/test/test_alliteration_annotation.html create mode 100644 html/stylotool/test/test_chiasmus_annotation.html create mode 100644 html/stylotool/test/test_epiphora_annotation.html create mode 100644 html/stylotool/test/test_metaphor_annotations.html create mode 100644 html/stylotool/test/test_polysyndeton_annotation.html create mode 100644 html/stylotool/test/test_text_object.html diff --git a/html/stylotool/index.html b/html/stylotool/index.html new file mode 100644 index 0000000..85ff16d --- /dev/null +++ b/html/stylotool/index.html @@ -0,0 +1,72 @@ + + + + + + +stylotool API documentation + + + + + + + + + + + +
+ + +
+ + + diff --git a/html/stylotool/setup.html b/html/stylotool/setup.html new file mode 100644 index 0000000..a464305 --- /dev/null +++ b/html/stylotool/setup.html @@ -0,0 +1,55 @@ + + + + + + +stylotool.setup API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.setup

+
+
+
+
+
+
+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/src/freestylo/AlliterationAnnotation.html b/html/stylotool/src/freestylo/AlliterationAnnotation.html new file mode 100644 index 0000000..1ec495c --- /dev/null +++ b/html/stylotool/src/freestylo/AlliterationAnnotation.html @@ -0,0 +1,285 @@ + + + + + + +stylotool.src.freestylo.AlliterationAnnotation API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.src.freestylo.AlliterationAnnotation

+
+
+
+
+
+
+
+
+
+
+

Classes

+
+
+class AlliterationAnnotation +(text: freestylo.TextObject.TextObject, max_skip=2, min_length=3, skip_tokens=['.', ',', ':', ';', '!', '?', '…', '(', ')', '[', ']', '{', '}', '„', '“', '‚', '‘:', '‘', '’']) +
+
+

This class is used to find alliterations candidates in a text. +It uses the TextObject class to store the text and its annotations.

+

Parameters

+
+
text : TextObject
+
The text to be analyzed.
+
max_skip : int, optional
+
 
+
min_length : int, optional
+
 
+
skip_tokens : list, optional
+
A list of tokens that should be skipped when looking for alliterations.
+
+
+ +Expand source code + +
class AlliterationAnnotation:
+    """ This class is used to find alliterations candidates in a text.
+    It uses the TextObject class to store the text and its annotations.
+    """
+
+    def __init__(self, text : TextObject, max_skip = 2, min_length=3, skip_tokens=[".", ",", ":", ";", "!", "?", "…", "(", ")", "[", "]", "{", "}", "„", "“", "‚", "‘:", "‘", "’"]):
+        """
+        Parameters
+        ----------
+        text : TextObject
+            The text to be analyzed.
+        max_skip : int, optional
+        min_length : int, optional
+        skip_tokens : list, optional
+            A list of tokens that should be skipped when looking for alliterations.
+        """
+
+        self.text = text
+        self.candidates = []
+        self.max_skip = max_skip
+        self.min_length = min_length
+        self.skip_tokens = skip_tokens
+
+
+    def find_candidates(self):
+        """
+        This method finds alliteration candidates in the text.
+        """
+        tokens = self.text.tokens
+
+        open_candidates = {}
+        i = 0
+
+        for i in range(len(tokens)):
+            token = tokens[i]
+            token_char = token[0].lower()
+            # check if there is an  alliteration candidate with the current character
+            if not token_char.isalpha():
+                continue
+            # if not, create a new one
+            if token_char not in open_candidates:
+                open_candidates[token_char] = [AlliterationCandidate([i], token_char), 0]
+                continue
+            # if yes, add the current token to the candidate
+            candidate = open_candidates[token_char][0]
+            candidate.ids.append(i)
+
+            # close candidates
+            keys_to_delete = []
+            for key in open_candidates:
+                candidate_pair = open_candidates[key]
+                candidate = candidate_pair[0]
+                if token_char in self.skip_tokens:
+                    candidate_pair[1] += 1
+                if i - candidate.ids[-1] >= self.max_skip+1+candidate_pair[1]:
+                    if len(candidate.ids) > self.min_length:
+                        self.candidates.append(candidate)
+                    keys_to_delete.append(key)
+            for key_del in keys_to_delete:
+                    del open_candidates[key_del]
+        # get the remaining ones
+        for key in open_candidates:
+            candidate = open_candidates[key][0]
+            if len(candidate.ids) > self.min_length:
+                self.candidates.append(candidate)
+
+
+
+    def serialize(self) -> list:
+        """
+        This method serializes the alliteration candidates into a list of dictionaries.
+
+        Returns
+        -------
+        list
+            A list of dictionaries containing the ids, length and character of the alliteration candidates.
+        """
+        candidates = []
+        for c in self.candidates:
+            candidates.append({
+                "ids": c.ids,
+                "length": c.length,
+                "char": c.char})
+        return candidates
+
+

Methods

+
+
+def find_candidates(self) +
+
+

This method finds alliteration candidates in the text.

+
+
+def serialize(self) ‑> list +
+
+

This method serializes the alliteration candidates into a list of dictionaries.

+

Returns

+
+
list
+
A list of dictionaries containing the ids, length and character of the alliteration candidates.
+
+
+
+
+
+class AlliterationCandidate +(ids, char) +
+
+

This class represents an alliteration candidate.

+

Parameters

+
+
ids : list
+
A list of token ids that form the alliteration candidate.
+
char : str
+
The character that the candidate starts with.
+
+
+ +Expand source code + +
class AlliterationCandidate():
+    """
+    This class represents an alliteration candidate.
+    """
+    def __init__(self, ids, char):
+        """
+        Parameters
+        ----------
+        ids : list
+            A list of token ids that form the alliteration candidate.
+        char : str
+            The character that the candidate starts with.
+        """
+        self.ids = ids
+        self.char = char
+
+    @property
+    def score(self):
+        """
+        This property returns the score of the alliteration candidate.
+        """
+        return len(self.ids)
+
+    @property
+    def length(self):
+        """
+        This property returns the length of the alliteration candidate.
+        """
+        return len(self.ids)
+
+

Instance variables

+
+
prop length
+
+

This property returns the length of the alliteration candidate.

+
+ +Expand source code + +
@property
+def length(self):
+    """
+    This property returns the length of the alliteration candidate.
+    """
+    return len(self.ids)
+
+
+
prop score
+
+

This property returns the score of the alliteration candidate.

+
+ +Expand source code + +
@property
+def score(self):
+    """
+    This property returns the score of the alliteration candidate.
+    """
+    return len(self.ids)
+
+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/src/freestylo/ChiasmusAnnotation.html b/html/stylotool/src/freestylo/ChiasmusAnnotation.html new file mode 100644 index 0000000..efea84a --- /dev/null +++ b/html/stylotool/src/freestylo/ChiasmusAnnotation.html @@ -0,0 +1,677 @@ + + + + + + +stylotool.src.freestylo.ChiasmusAnnotation API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.src.freestylo.ChiasmusAnnotation

+
+
+
+
+
+
+
+
+

Functions

+
+
+def cosine_similarity(vec1, vec2) +
+
+

This method calculates the cosine similarity between two vectors.

+

Parameters

+
+
vec1 : np.array
+
The first vector.
+
vec2 : np.array
+
The second vector.
+
+
+
+
+
+

Classes

+
+
+class ChiasmusAnnotation +(text: freestylo.TextObject.TextObject, window_size=30) +
+
+

This class is used to find chiasmus candidates in a text. +It uses the TextObject class to store the text and its annotations.

+

Parameters

+
+
text : TextObject
+
The text to be analyzed.
+
window_size : int, optional
+
The window size to search for chiasmus candidates
+
+
+ +Expand source code + +
class ChiasmusAnnotation:
+    """
+    This class is used to find chiasmus candidates in a text.
+    It uses the TextObject class to store the text and its annotations.
+    """
+    def __init__(self, text : TextObject, window_size=30):
+        """
+        Parameters
+        ----------
+        text : TextObject
+            The text to be analyzed.
+        window_size : int, optional
+            The window size to search for chiasmus candidates
+        """
+        self.text = text
+        text.annotations.append(self)
+        self.window_size = window_size
+        self.candidates = []
+        self.denylist = []
+        self.allowlist = []
+        self.neglist = []
+        self.poslist = []
+        self.conjlist = []
+        self.type = "chiasmus"
+        self.model = None
+
+
+    def find_candidates(self):
+        """
+        This method finds chiasmus candidates in the text.
+        It uses the window_size to search for candidates.
+        """
+        pos = self.text.pos
+
+        outer_matches = []
+        for i in range(len(pos)):
+            outer_matches += self._find_matches(i, i + self.window_size)
+
+        for match in outer_matches:
+            A, A_ = match
+            start_inner = A + 1
+            inner_matches = self._find_matches(start_inner, A_)
+            for B, B_ in inner_matches:
+                self.candidates.append(ChiasmusCandidate(A, B, B_, A_))
+
+    def load_classification_model(self, model_path):
+        """
+        This method loads a classification model to score the chiasmus candidates.
+        Parameters
+        ----------
+        model_path : str
+            The path to the model file.
+        """
+        import pickle
+        with open(get_model_path(model_path), "rb") as f:
+            self.model = pickle.load(f)
+
+    def serialize(self) -> list:
+        """
+        This method serializes the chiasmus candidates.
+
+        Returns
+        -------
+        list
+            A list of serialized candidates.
+        """
+        candidates = []
+        for c in self.candidates:
+            candidates.append({
+        "ids": c.ids,
+        "A": c.A,
+        "B": c.B,
+        "B_": c.B_,
+        "A_": c.A_,
+        "score": c.score})
+        return candidates
+
+        
+        
+    
+    def _find_matches(self, start : int, end : int) -> list:
+        """
+        This method finds matches in the pos list of the text.
+        It uses the start and end index to search for matches.
+
+        Parameters
+        ----------
+        start : int
+            The start index of the search.
+        end : int
+            The end index of the search.
+        """
+        pos = self.text.pos
+
+        #if end > len(pos):
+        #    end = len(pos)
+
+        #if end < start+3:
+        #    return []
+
+        if not self._check_pos(pos[start]):
+            return []
+        matches = []
+        for i in range(start+1, end):
+            try:
+                if pos[start] == pos[i]:
+                    matches.append((start, i))
+            except IndexError:
+                pass
+        return matches
+
+    def _check_pos(self, pos):
+        """
+        This method checks if a pos is in the allowlist or not in the denylist.
+
+        Parameters
+        ----------
+        pos : str
+            The pos to check.
+        """
+        if len(self.allowlist) > 0 and pos not in self.allowlist:
+            return False
+        if len(self.denylist) > 0 and pos in self.denylist:
+            return False
+        return True
+
+    def has_candidates(self):
+        """
+        This method checks if the text has chiasmus candidates.
+        """
+        return len(self.candidates) > 0
+
+    def score_candidates(self):
+        """
+        This method scores the chiasmus candidates.
+        """
+        features = []
+        for candidate in self.candidates:
+            features.append(self.get_features(candidate))
+        if self.model is None:
+            print("Load Chiasmus Model before scoring the candidates")
+            return False
+        features = np.stack(features)
+        scores = self.model.decision_function(features)
+        for score, candidate in zip(scores, self.candidates):
+            candidate.score = score
+        return True
+
+    def get_features(self, candidate):
+        """
+        This method extracts features for a chiasmus candidate.
+
+        Parameters
+        ----------
+        candidate : ChiasmusCandidate
+            The candidate to extract features from.
+
+        Returns
+        -------
+        np.array
+            An array of features.
+        """
+
+        dubremetz_features = self.get_dubremetz_features(candidate)
+        lexical_features = self.get_lexical_features(candidate)
+        semantic_features = self.get_semantic_features(candidate)
+        return np.concatenate((dubremetz_features, lexical_features, semantic_features))
+
+    def get_dubremetz_features(self, candidate):
+        """
+        This method extracts Dubremetz features for a chiasmus candidate.
+
+        Returns
+        -------
+        np.array
+            An array of Dubremetz features
+        """
+        tokens = self.text.tokens
+        lemmas = self.text.lemmas
+        pos = self.text.pos
+        dep = self.text.dep
+        vectors = self.text.vectors
+
+        context_start = candidate.A - 5
+        context_end = candidate.A_ + 5
+
+        tokens_main = [tokens[i] for i in range(candidate.A, candidate.A_+1)]
+        lemmas_main = [lemmas[i] for i in range(candidate.A, candidate.A_+1)]
+        pos_main = [pos[i] for i in range(candidate.A, candidate.A_+1)]
+        dep_main = [dep[i] for i in range(candidate.A, candidate.A_+1)]
+        vectors_main = [vectors[i] for i in range(candidate.A, candidate.A_+1)]
+
+        neglist = self.neglist
+        poslist = self.poslist
+        conjlist = self.conjlist
+
+        hardp_list = ['.', '(', ')', "[", "]"] 
+        softp_list = [',', ';']
+
+        features = []
+
+         # Basic
+
+        num_punct = 0
+        for h in hardp_list:
+            if h in tokens[ candidate.ids[0]+1 : candidate.ids[1] ]: num_punct+=1
+            if h in tokens[ candidate.ids[2]+1 : candidate.ids[3] ]: num_punct+=1
+        features.append(num_punct)
+
+        num_punct = 0
+        for h in hardp_list:
+            if h in tokens[ candidate.ids[0]+1 : candidate.ids[1] ]: num_punct+=1
+            if h in tokens[ candidate.ids[2]+1 : candidate.ids[3] ]: num_punct+=1
+        features.append(num_punct)
+
+        num_punct = 0
+        for h in hardp_list:
+            if h in tokens[ candidate.ids[1]+1 : candidate.ids[2] ]: num_punct+=1
+        features.append(num_punct)
+
+        rep_a1 = -1
+        if lemmas[candidate.ids[0]] == lemmas[candidate.ids[3]]:
+            rep_a1 -= 1
+        rep_a1 += lemmas.count(lemmas[candidate.ids[0]])
+        features.append(rep_a1)
+
+        rep_b1 = -1
+        if lemmas[candidate.ids[1]] == lemmas[candidate.ids[2]]:
+            rep_b1 -= 1
+        rep_b1 += lemmas.count(lemmas[candidate.ids[1]])
+        features.append(rep_b1)
+
+        rep_b2 = -1
+        if lemmas[candidate.ids[1]] == lemmas[candidate.ids[2]]:
+            rep_b2 -= 1
+        rep_b2 += lemmas.count(lemmas[candidate.ids[2]])
+        features.append(rep_b2)
+
+        rep_a2 = -1
+        if lemmas[candidate.ids[0]] == lemmas[candidate.ids[3]]:
+            rep_a2 -= 1
+        rep_a2 += lemmas.count(lemmas[candidate.ids[3]])
+        features.append(rep_b2)
+
+        # Size
+
+        diff_size = abs((candidate.ids[1]-candidate.ids[0]) - (candidate.ids[3]-candidate.ids[2]))
+        features.append(diff_size)
+
+        toks_in_bc = candidate.ids[3]-candidate.ids[1]
+        features.append(toks_in_bc)
+
+        # Similarity
+
+        exact_match = ([" ".join(tokens[candidate.ids[0]+1 : candidate.ids[1]])] == [" ".join(tokens[candidate.ids[2]+1 : candidate.ids[3]])])
+        features.append(exact_match)
+
+        same_tok = 0
+        for l in lemmas[candidate.ids[0]+1 : candidate.ids[1]]:
+            if l in lemmas[candidate.ids[2]+1 : candidate.ids[3]]: same_tok += 1
+        features.append(same_tok)
+
+        sim_score = same_tok / (candidate.ids[1]-candidate.ids[0])
+        features.append(sim_score)
+
+        num_bigrams = 0
+        t1 = " ".join(tokens[candidate.ids[0]+1 : candidate.ids[1]])
+        t2 = " ".join(tokens[candidate.ids[2]+1 : candidate.ids[3]])
+        s1 = set()
+        s2 = set()
+        for t in range(len(t1)-1):
+            bigram = t1[t:t+2]
+            s1.add(bigram)
+        for t in range(len(t2)-1):
+            bigram = t2[t:t+2]
+            s2.add(bigram)
+        for b in s1:
+            if b in s2: num_bigrams += 1
+        bigrams_normed = (num_bigrams/max(len(s1)+1, len(s2)+1))
+        features.append(bigrams_normed)
+
+        num_trigrams = 0
+        t1 = " ".join(tokens[candidate.ids[0]+1 : candidate.ids[1]])
+        t2 = " ".join(tokens[candidate.ids[2]+1 : candidate.ids[3]])
+        s1 = set()
+        s2 = set()
+        for t in range(len(t1)-2):
+            trigram = t1[t:t+3]
+            s1.add(trigram)
+        for t in range(len(t2)-2):
+            trigram = t2[t:t+3]
+            s2.add(trigram)
+        for t in s1:
+            if t in s2: num_trigrams += 1
+        trigrams_normed = (num_trigrams/max(len(s1)+1, len(s2)+1))
+        features.append(trigrams_normed)
+
+        same_cont = 0
+        t1 = set(tokens[candidate.ids[0]+1:candidate.ids[1]])
+        t2 = set(tokens[candidate.ids[2]+1:candidate.ids[3]])
+        for t in t1:
+            if t in t2: same_cont += 1
+        features.append(same_cont)
+
+        # Lexical clues
+
+        conj = 0
+        for c in conjlist:
+            if c in tokens[candidate.ids[1]+1:candidate.ids[2]]+lemmas[candidate.ids[1]+1:candidate.ids[2]]:
+                conj = 1
+        features.append(conj)
+
+
+        neg = 0
+        for n in neglist:
+            if n in tokens[candidate.ids[1]+1:candidate.ids[2]]+lemmas[candidate.ids[1]+1:candidate.ids[2]]:
+                neg = 1
+        features.append(neg)
+
+
+        # Dependency score
+
+        if dep[candidate.ids[1]] == dep[candidate.ids[3]]:
+            features.append(1)  
+        else: 
+            features.append(0)
+
+        if dep[candidate.ids[0]] == dep[candidate.ids[2]]:
+            features.append(1)  
+        else: 
+            features.append(0)
+
+        if dep[candidate.ids[1]] == dep[candidate.ids[2]]:
+            features.append(1)  
+        else: 
+            features.append(0)
+
+        if dep[candidate.ids[0]] == dep[candidate.ids[3]]:
+            features.append(1)  
+        else: 
+            features.append(0)
+
+        features = np.array(features)
+        return features
+
+    def get_lexical_features(self, candidate):
+        """
+        This method extracts lexical features for a chiasmus candidate.
+
+        Returns
+        -------
+        np.array
+            An array of lexical features
+        """
+        tokens = self.text.tokens
+        lemmas = self.text.lemmas
+        pos = self.text.pos
+        dep = self.text.dep
+        vectors = self.text.vectors
+
+        context_start = candidate.A - 5
+        context_end = candidate.A_ + 5
+
+        lemmas_main = [lemmas[i] for i in candidate.ids]
+
+
+        neglist = self.neglist
+        poslist = self.poslist
+
+        features = []
+
+        
+        for i in range(len(lemmas_main)):
+            for j in range(i+1, len(lemmas_main)):
+                if lemmas_main[i] == lemmas_main[j]:
+                    features.append(1)
+                else:
+                    features.append(0)
+
+        features = np.array(features)
+        return features
+
+    def get_semantic_features(self, candidate):
+        """
+        This method extracts semantic features for a chiasmus candidate.
+
+        Returns
+        -------
+        np.array
+            An array of semantic features
+        """
+        tokens = self.text.tokens
+        lemmas = self.text.lemmas
+        pos = self.text.pos
+        dep = self.text.dep
+        vectors = self.text.vectors
+
+        context_start = candidate.A - 5
+        context_end = candidate.A_ + 5
+
+        vectors_main = [vectors[i] for i in candidate.ids]
+
+
+        features = []
+        for i in range(len(vectors_main)):
+            for j in range(i+1, len(vectors_main)):
+                features.append(cosine_similarity(vectors_main[i], vectors_main[j]))
+
+        features = np.array(features)
+        return features
+
+

Methods

+
+
+def find_candidates(self) +
+
+

This method finds chiasmus candidates in the text. +It uses the window_size to search for candidates.

+
+
+def get_dubremetz_features(self, candidate) +
+
+

This method extracts Dubremetz features for a chiasmus candidate.

+

Returns

+
+
np.array
+
An array of Dubremetz features
+
+
+
+def get_features(self, candidate) +
+
+

This method extracts features for a chiasmus candidate.

+

Parameters

+
+
candidate : ChiasmusCandidate
+
The candidate to extract features from.
+
+

Returns

+
+
np.array
+
An array of features.
+
+
+
+def get_lexical_features(self, candidate) +
+
+

This method extracts lexical features for a chiasmus candidate.

+

Returns

+
+
np.array
+
An array of lexical features
+
+
+
+def get_semantic_features(self, candidate) +
+
+

This method extracts semantic features for a chiasmus candidate.

+

Returns

+
+
np.array
+
An array of semantic features
+
+
+
+def has_candidates(self) +
+
+

This method checks if the text has chiasmus candidates.

+
+
+def load_classification_model(self, model_path) +
+
+

This method loads a classification model to score the chiasmus candidates. +Parameters

+
+
+
model_path : str
+
The path to the model file.
+
+
+
+def score_candidates(self) +
+
+

This method scores the chiasmus candidates.

+
+
+def serialize(self) ‑> list +
+
+

This method serializes the chiasmus candidates.

+

Returns

+
+
list
+
A list of serialized candidates.
+
+
+
+
+
+class ChiasmusCandidate +(A, B, B_, A_) +
+
+

This class represents a chiasmus candidate.

+

Parameters

+
+
A : int
+
Index of the first supporting word
+
B : int
+
Index of the second supporting word
+
B_ : int
+
Index of the third supporting word, paired with B
+
A_ : int
+
Index of the fourth supporting word, paired with A
+
+
+ +Expand source code + +
class ChiasmusCandidate:
+    """
+    This class represents a chiasmus candidate.
+    """
+    def __init__(self, A, B, B_, A_):
+        """
+        Parameters
+        ----------
+        A : int
+            Index of the first supporting word
+        B : int
+            Index of the second supporting word
+        B_ : int
+            Index of the third supporting word, paired with B
+        A_ : int
+            Index of the fourth supporting word, paired with A
+        """
+
+        self.ids = [A, B, B_, A_]
+        self.A = A
+        self.B = B
+        self.B_ = B_
+        self.A_ = A_
+        self.score = None
+
+    def __str__(self):
+        """
+        This method returns a string representation of the chiasmus candidate.
+        """
+        return f"{self.A} {self.B} {self.B_} {self.A_}"
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/src/freestylo/Configs.html b/html/stylotool/src/freestylo/Configs.html new file mode 100644 index 0000000..4233940 --- /dev/null +++ b/html/stylotool/src/freestylo/Configs.html @@ -0,0 +1,69 @@ + + + + + + +stylotool.src.freestylo.Configs API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.src.freestylo.Configs

+
+
+
+
+
+
+
+
+

Functions

+
+
+def get_model_path(model_to_load: str) ‑> str +
+
+
+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/src/freestylo/EpiphoraAnnotation.html b/html/stylotool/src/freestylo/EpiphoraAnnotation.html new file mode 100644 index 0000000..059e447 --- /dev/null +++ b/html/stylotool/src/freestylo/EpiphoraAnnotation.html @@ -0,0 +1,275 @@ + + + + + + +stylotool.src.freestylo.EpiphoraAnnotation API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.src.freestylo.EpiphoraAnnotation

+
+
+
+
+
+
+
+
+
+
+

Classes

+
+
+class EpiphoraAnnotation +(text: freestylo.TextObject.TextObject, min_length=2, conj=['and', 'or', 'but', 'nor'], punct_pos='PUNCT') +
+
+

This class is used to find epiphora candidates in a text. +It uses the TextObject class to store the text and its annotations.

+

Constructor for the EpiphoraAnnotation class.

+

Parameters

+
+
text : TextObject
+
The text to be analyzed.
+
min_length : int, optional
+
The minimum length of the epiphora candidates.
+
conj : list, optional
+
A list of conjunctions that should be considered when looking for epiphora.
+
punct_pos : str, optional
+
The part of speech tag for punctuation.
+
+
+ +Expand source code + +
class EpiphoraAnnotation:
+    """
+    This class is used to find epiphora candidates in a text.
+    It uses the TextObject class to store the text and its annotations.
+    """
+    def __init__(self, text : TextObject, min_length=2, conj = ["and", "or", "but", "nor"], punct_pos="PUNCT"):
+        """
+        Constructor for the EpiphoraAnnotation class.
+
+        Parameters
+        ----------
+        text : TextObject
+            The text to be analyzed.
+        min_length : int, optional
+            The minimum length of the epiphora candidates.
+        conj : list, optional
+            A list of conjunctions that should be considered when looking for epiphora.
+        punct_pos : str, optional
+            The part of speech tag for punctuation.
+        """
+
+        self.text = text
+        self.candidates = []
+        self.min_length = min_length
+        self.conj = conj
+        self.punct_pos = punct_pos
+
+    def split_in_phrases(self):
+        """
+        This method splits the text into phrases.
+
+        Returns
+        -------
+        list
+            A list of lists, each containing the start and end index of a phrase.
+        """
+            
+        phrases = []
+        current_start = 0
+        for i, token in enumerate(self.text.tokens):
+            if token in self.conj or self.text.pos[i] == self.punct_pos:
+                if i-current_start > 2:
+                    phrases.append([current_start, i])
+                    current_start = i+1
+        phrases.append([current_start, len(self.text.tokens)])
+        return phrases
+
+
+    def find_candidates(self):
+        """
+        This method finds epiphora candidates in the text.
+        """
+        candidates = []
+        current_candidate = EpiphoraCandidate([], "")
+        phrases = self.split_in_phrases()
+        for phrase in phrases:
+            word = self.text.tokens[phrase[1]-1]
+            if word != current_candidate.word:
+                if len(current_candidate.ids) >= self.min_length:
+                    candidates.append(current_candidate)
+                current_candidate = EpiphoraCandidate([phrase], word)
+            else:
+                current_candidate.ids.append(phrase)
+        self.candidates = candidates
+
+    def serialize(self) -> list:
+        """
+        This method serializes the epiphora candidates.
+
+        Returns
+        -------
+        list
+            A list of dictionaries, each containing the ids, length, and word of an epiphora candidate.
+        """
+        candidates = []
+        for c in self.candidates:
+            candidates.append({
+                "ids": c.ids,
+                "length": c.length,
+                "word": c.word})
+        return candidates
+
+

Methods

+
+
+def find_candidates(self) +
+
+

This method finds epiphora candidates in the text.

+
+
+def serialize(self) ‑> list +
+
+

This method serializes the epiphora candidates.

+

Returns

+
+
list
+
A list of dictionaries, each containing the ids, length, and word of an epiphora candidate.
+
+
+
+def split_in_phrases(self) +
+
+

This method splits the text into phrases.

+

Returns

+
+
list
+
A list of lists, each containing the start and end index of a phrase.
+
+
+
+
+
+class EpiphoraCandidate +(ids, word) +
+
+

This class represents an epiphora candidate.

+

Constructor for the EpiphoraCandidate class.

+

Parameters

+
+
ids : list
+
A list of token ids that form the candidate.
+
word : str
+
The word that the candidate ends with.
+
+
+ +Expand source code + +
class EpiphoraCandidate():
+    """
+    This class represents an epiphora candidate.
+    """
+    def __init__(self, ids, word):
+        """
+        Constructor for the EpiphoraCandidate class.
+
+        Parameters
+        ----------
+        ids : list
+            A list of token ids that form the candidate.
+        word : str
+            The word that the candidate ends with.
+        """
+        self.ids = ids
+        self.word = word
+
+    @property
+    def score(self):
+        """
+        This property returns the score of the candidate.
+        """
+        return len(self.ids)
+
+

Instance variables

+
+
prop score
+
+

This property returns the score of the candidate.

+
+ +Expand source code + +
@property
+def score(self):
+    """
+    This property returns the score of the candidate.
+    """
+    return len(self.ids)
+
+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/src/freestylo/MGHPreprocessor.html b/html/stylotool/src/freestylo/MGHPreprocessor.html new file mode 100644 index 0000000..0285af5 --- /dev/null +++ b/html/stylotool/src/freestylo/MGHPreprocessor.html @@ -0,0 +1,269 @@ + + + + + + +stylotool.src.freestylo.MGHPreprocessor API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.src.freestylo.MGHPreprocessor

+
+
+
+
+
+
+
+
+
+
+

Classes

+
+
+class MGHPreprocessor +
+
+

This class preprocesses Middle High German text.

+

Constructor for the MGHPreprocessor class.

+
+ +Expand source code + +
class MGHPreprocessor:
+    """
+    This class preprocesses Middle High German text.
+    """
+    def __init__(self):
+        """
+        Constructor for the MGHPreprocessor class.
+        """
+        self.text = ""
+        self.model = fasttext.load_model(get_model_path("fasttext_mgh.bin"))
+        pass
+
+    # make class callable with ()
+    def __call__(self, text):
+        """
+        This method preprocesses Middle High German text.
+        
+        Parameters
+        ----------
+        text : str
+            The text to be preprocessed.
+
+        Returns
+        -------
+        list
+            A list of MGH tokens.
+        """
+        self.text = normalize_middle_high_german(text)
+
+        tokens = []
+
+        idx = 0
+        pos_tagger = POSTag('middle_high_german')
+        lemmatizer = BackoffMHGLemmatizer()
+        # custom tokenizer, because I need the character index of the word
+        while True:
+            word, next_idx = self.get_next_word(self.text, idx)
+
+            pos = pos_tagger.tag_tnt(word)[0][1]
+
+            lemma = min(lemmatizer.lemmatize([word])[0][1], key=len)
+
+            dep = ""
+
+            vector = self.model.get_word_vector(word)
+
+
+            tokens.append(MGHToken(word, pos, lemma, dep, vector, idx))
+
+            if next_idx is None:
+                break
+            idx = next_idx
+        return tokens
+
+
+
+    def get_next_word(self, text, idx):
+        """
+        This method finds the next word in a text.
+
+        Parameters
+        ----------
+        text : list[str]
+            The text to be searched.
+        idx : int
+            The index of the current word.
+
+        Returns
+        -------
+        str
+            The next word in the text.
+        int
+            The index of the next word.
+        """
+        cursor = idx
+        is_end = False 
+        # find end of current word
+        while cursor < len(text):
+            try:
+                if text[cursor] in [" ", "\n", "\t"]:
+                    break
+            except: # end of text
+                is_end = True
+                break
+            cursor += 1
+
+        end_word = cursor
+
+        #find start of next word
+        while cursor < len(text):
+            try:
+                if text[cursor] not in [" ", "\n", "\t"]:
+                    break
+            except:
+                is_end = True
+                break
+            cursor += 1
+
+        next_word = cursor
+
+        if cursor == len(text):
+            next_word = None
+
+        word = text[idx:end_word]
+
+        return word, next_word
+
+

Methods

+
+
+def get_next_word(self, text, idx) +
+
+

This method finds the next word in a text.

+

Parameters

+
+
text : list[str]
+
The text to be searched.
+
idx : int
+
The index of the current word.
+
+

Returns

+
+
str
+
The next word in the text.
+
int
+
The index of the next word.
+
+
+
+
+
+class MGHToken +(text, pos, lemma, dep, vector, idx) +
+
+

This class represents a Middle High German token.

+

Constructor for the MGHToken class.

+

Parameters

+
+
text : str
+
The text of the token.
+
pos : str
+
The part of speech of the token.
+
lemma : str
+
The lemma of the token.
+
dep : str
+
The dependency of the token.
+
vector : np.array
+
The vector representation of the token.
+
idx : int
+
The index of the token in the text.
+
+
+ +Expand source code + +
class MGHToken:
+    """
+    This class represents a Middle High German token.
+    """
+    def __init__(self, text, pos, lemma, dep, vector, idx):
+        """
+        Constructor for the MGHToken class.
+
+        Parameters
+        ----------
+        text : str
+            The text of the token.
+        pos : str
+            The part of speech of the token.
+        lemma : str
+            The lemma of the token.
+        dep : str
+            The dependency of the token.
+        vector : np.array
+            The vector representation of the token.
+        idx : int
+            The index of the token in the text.
+        """
+        self.text = text
+        self.pos = pos
+        self.lemma = lemma
+        self.dep = dep
+        self.vector = vector
+        self.idx = idx
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/src/freestylo/MetaphorAnnotation.html b/html/stylotool/src/freestylo/MetaphorAnnotation.html new file mode 100644 index 0000000..6c141ab --- /dev/null +++ b/html/stylotool/src/freestylo/MetaphorAnnotation.html @@ -0,0 +1,308 @@ + + + + + + +stylotool.src.freestylo.MetaphorAnnotation API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.src.freestylo.MetaphorAnnotation

+
+
+
+
+
+
+
+
+

Functions

+
+
+def cosine_distance(a, b) +
+
+

This function calculates the cosine distance between two vectors.

+

Parameters

+
+
a : torch.Tensor
+
The first vector.
+
b : torch.Tensor
+
The second vector.
+
+

Returns

+
+
float
+
The cosine distance between the two vectors.
+
+
+
+
+
+

Classes

+
+
+class MetaphorAnnotation +(text) +
+
+

This class is used to find metaphor candidates in a text. +It uses the TextObject class to store the text and its annotations.

+

Constructor for the MetaphorAnnotation class.

+

Parameters

+
+
text : TextObject
+
The text to be analyzed.
+
+
+ +Expand source code + +
class MetaphorAnnotation:
+    """
+    This class is used to find metaphor candidates in a text.
+    It uses the TextObject class to store the text and its annotations.
+    """
+    def __init__(self, text):
+        """
+        Constructor for the MetaphorAnnotation class.
+
+        Parameters
+        ----------
+        text : TextObject
+            The text to be analyzed.
+        """
+        self.text = text
+        text.annotations.append(self)
+        self.candidates = []
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.type = "metaphor"
+        self.model = None
+
+    def find_candidates(self):
+        """
+        This method finds metaphor candidates in the text.
+        """
+        pos = self.text.pos
+        for i in range(len(pos)-1):
+            if pos[i] == "ADJ" and pos[i+1] == "NOUN":
+                self.candidates.append(MetaphorCandidate(i, i+1))
+
+    def serialize(self) -> list:
+        """
+        This method serializes the metaphor candidates.
+
+        Returns
+        -------
+        list
+            A list of dictionaries, each containing the ids of the adjective and noun, the adjective, the noun, and the score.
+        """
+        candidates = []
+        for c in self.candidates:
+            candidates.append({
+                "ids": c.ids,
+                "adjective": c.adj_id,
+                "noun": c.noun_id,
+                "score": c.score})
+        return candidates
+
+
+    def load_model(self, model_path):
+        """
+        This method loads a model for metaphor detection.
+
+        Parameters
+        ----------
+        model_path : str
+            The path to the model.
+        """
+        model_path = get_model_path(model_path)
+        self.model = SimilarityNN.SimilarityNN(300, 128, 1, 128, self.device)
+        self.model.load_state_dict(torch.load(model_path, weights_only=True, map_location=self.device))
+        self.model = self.model.to(self.device)
+        self.model.eval()
+
+    def get_vectors(self):
+        """
+        This method returns the vectors of the adjective and noun candidates.
+
+        Returns
+        -------
+        np.array
+            An array of adjective vectors.
+        np.array
+            An array of noun vectors.
+        """
+        adj_vectors = []
+        noun_vectors = []
+        for candidate in self.candidates:
+            adj_vectors.append(self.text.vectors[candidate.ids[0]])
+            noun_vectors.append(self.text.vectors[candidate.ids[1]])
+
+        adj_vectors = np.array(adj_vectors)
+        noun_vectors = np.array(noun_vectors)
+        return adj_vectors, noun_vectors
+
+    def score_candidates(self):
+        """
+        This method scores the metaphor candidates.
+        """
+        adj_vectors, noun_vectors = self.get_vectors()
+        adj_tensor = torch.tensor(adj_vectors, device=self.device).to(self.device)
+        noun_tensor = torch.tensor(noun_vectors, device=self.device).to(self.device)
+        assert(self.model is not None)
+        adj_metaphor_tensor = self.model(adj_tensor)
+        noun_metaphor_tensor = self.model(noun_tensor)
+        #scores = 1-(torch.nn.CosineSimilarity()(adj_metaphor_tensor, noun_metaphor_tensor)+1)/2
+        scores = cosine_distance(adj_metaphor_tensor, noun_metaphor_tensor)
+        for score, candidate in zip(scores, self.candidates):
+            candidate.score = score.item()
+
+

Methods

+
+
+def find_candidates(self) +
+
+

This method finds metaphor candidates in the text.

+
+
+def get_vectors(self) +
+
+

This method returns the vectors of the adjective and noun candidates.

+

Returns

+
+
np.array
+
An array of adjective vectors.
+
np.array
+
An array of noun vectors.
+
+
+
+def load_model(self, model_path) +
+
+

This method loads a model for metaphor detection.

+

Parameters

+
+
model_path : str
+
The path to the model.
+
+
+
+def score_candidates(self) +
+
+

This method scores the metaphor candidates.

+
+
+def serialize(self) ‑> list +
+
+

This method serializes the metaphor candidates.

+

Returns

+
+
list
+
A list of dictionaries, each containing the ids of the adjective and noun, the adjective, the noun, and the score.
+
+
+
+
+
+class MetaphorCandidate +(adj_id, noun_id) +
+
+

This class represents a metaphor candidate.

+

Constructor for the MetaphorCandidate class.

+

Parameters

+
+
adj_id : int
+
The id of the adjective.
+
noun_id : int
+
The id of the noun.
+
+
+ +Expand source code + +
class MetaphorCandidate():
+    """
+    This class represents a metaphor candidate.
+    """
+    def __init__(self, adj_id, noun_id):
+        """
+        Constructor for the MetaphorCandidate class.
+
+        Parameters
+        ----------
+        adj_id : int
+            The id of the adjective.
+        noun_id : int
+            The id of the noun.
+        """
+        self.ids = [adj_id, noun_id]
+        self.noun_id = noun_id
+        self.adj_id = adj_id
+        self.score = None
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/src/freestylo/PolysyndetonAnnotation.html b/html/stylotool/src/freestylo/PolysyndetonAnnotation.html new file mode 100644 index 0000000..263000f --- /dev/null +++ b/html/stylotool/src/freestylo/PolysyndetonAnnotation.html @@ -0,0 +1,319 @@ + + + + + + +stylotool.src.freestylo.PolysyndetonAnnotation API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.src.freestylo.PolysyndetonAnnotation

+
+
+
+
+
+
+
+
+
+
+

Classes

+
+
+class PolysyndetonAnnotation +(text: freestylo.TextObject.TextObject, min_length=2, conj=['and', 'or', 'but', 'nor'], sentence_end_tokens=['.', '?', '!', ':', ';', '...'], punct_pos='PUNCT') +
+
+

This class is used to find polysyndeton candidates in a text. +It uses the TextObject class to store the text and its annotations.

+

Constructor for the PolysyndetonAnnotation class.

+

Parameters

+
+
text : TextObject
+
The text to be analyzed.
+
min_length : int, optional
+
The minimum length of the polysyndeton candidates.
+
conj : list, optional
+
A list of conjunctions that should be considered when looking for polysyndeton.
+
sentence_end_tokens : list, optional
+
A list of tokens that indicate the end of a sentence.
+
punct_pos : str, optional
+
The part of speech tag for punctuation.
+
+
+ +Expand source code + +
class PolysyndetonAnnotation:
+    """
+    This class is used to find polysyndeton candidates in a text.
+    It uses the TextObject class to store the text and its annotations.
+    """
+    def __init__(self, text : TextObject, min_length=2, conj = ["and", "or", "but", "nor"], sentence_end_tokens=[".", "?", "!", ":", ";", "..."], punct_pos="PUNCT"):
+        """
+        Constructor for the PolysyndetonAnnotation class.
+
+        Parameters
+        ----------
+        text : TextObject
+            The text to be analyzed.
+        min_length : int, optional
+            The minimum length of the polysyndeton candidates.
+        conj : list, optional
+            A list of conjunctions that should be considered when looking for polysyndeton.
+        sentence_end_tokens : list, optional
+            A list of tokens that indicate the end of a sentence.
+        punct_pos : str, optional
+            The part of speech tag for punctuation.
+        """
+
+        self.text = text
+        self.candidates = []
+        self.min_length = min_length
+        self.conj = conj
+        self.sentence_end_tokens = sentence_end_tokens
+        self.punct_pos = punct_pos
+
+    def split_in_phrases(self):
+        """
+        This method splits the text into phrases.
+
+        Returns
+        -------
+        list
+            A list of lists, each containing the start and end index of a phrase.
+        """
+        
+        phrases_in_sentences = []
+        phrases = []
+        current_sentence_start = 0
+        current_phrase_start = 0
+        for i, token in enumerate(self.text.tokens):
+            if token in self.sentence_end_tokens:
+                phrases.append([current_phrase_start, i])
+                current_phrase_start = i+1
+                current_sentence_start = i+1
+                phrases_in_sentences.append(phrases)
+                phrases = []
+            elif token in self.conj and i-current_phrase_start > 1:
+                phrases.append([current_phrase_start, i])
+                current_phrase_start = i
+        return phrases_in_sentences
+
+    def check_add_candidate(self, candidates, candidate):
+        """
+        This method checks if the candidate is long enough to be a polysyndeton candidate.
+
+        Parameters
+        ----------
+        candidates : list
+            A list of polysyndeton candidates.
+        """
+        if len(candidate.ids) >= self.min_length:
+            candidates.append(candidate)
+        return candidates
+
+
+
+    def find_candidates(self):
+        """
+        This method finds polysyndeton candidates in the text.
+        """
+        candidates = []
+        sentences = self.split_in_phrases()
+        for sentence in sentences:
+            current_candidate = PolysyndetonCandidate([], "")
+            current_word = ""
+            for phrase in sentence:
+                word = self.text.tokens[phrase[0]]
+                if word != current_candidate.word:
+                    candidates = self.check_add_candidate(candidates, current_candidate)
+                    current_candidate = PolysyndetonCandidate([phrase], word)
+                else:
+                    current_candidate.ids.append(phrase)
+            candidates = self.check_add_candidate(candidates, current_candidate)
+
+        self.candidates = []
+        for candidate in candidates:
+            if candidate.word in self.conj:
+                self.candidates.append(candidate)
+
+
+    def serialize(self) -> list:
+        """
+        This method serializes the polysyndeton candidates.
+
+        Returns
+        -------
+        list
+            A list of dictionaries, each containing the ids, word, and score of a polysyndeton candidate.
+        """
+        candidates = []
+        for c in self.candidates:
+            candidates.append({
+                "ids": c.ids,
+                "score": c.score,
+                "word": c.word})
+        return candidates
+
+

Methods

+
+
+def check_add_candidate(self, candidates, candidate) +
+
+

This method checks if the candidate is long enough to be a polysyndeton candidate.

+

Parameters

+
+
candidates : list
+
A list of polysyndeton candidates.
+
+
+
+def find_candidates(self) +
+
+

This method finds polysyndeton candidates in the text.

+
+
+def serialize(self) ‑> list +
+
+

This method serializes the polysyndeton candidates.

+

Returns

+
+
list
+
A list of dictionaries, each containing the ids, word, and score of a polysyndeton candidate.
+
+
+
+def split_in_phrases(self) +
+
+

This method splits the text into phrases.

+

Returns

+
+
list
+
A list of lists, each containing the start and end index of a phrase.
+
+
+
+
+
+class PolysyndetonCandidate +(ids, word) +
+
+

This class represents a polysyndeton candidate.

+

Constructor for the PolysyndetonCandidate class.

+

Parameters

+
+
ids : list
+
A list of token ids that form the candidate.
+
word : str
+
The word that the candidate ends with.
+
+
+ +Expand source code + +
class PolysyndetonCandidate():
+    """
+    This class represents a polysyndeton candidate.
+    """
+    def __init__(self, ids, word):
+        """
+        Constructor for the PolysyndetonCandidate class.
+
+        Parameters
+        ----------
+        ids : list
+            A list of token ids that form the candidate.
+        word : str
+            The word that the candidate ends with.
+        """
+        self.ids = ids
+        self.word = word
+
+    @property
+    def score(self):
+        """
+        This property returns the score of the polysyndeton candidate.
+        """
+        return len(self.ids)
+
+

Instance variables

+
+
prop score
+
+

This property returns the score of the polysyndeton candidate.

+
+ +Expand source code + +
@property
+def score(self):
+    """
+    This property returns the score of the polysyndeton candidate.
+    """
+    return len(self.ids)
+
+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/src/freestylo/SimilarityNN.html b/html/stylotool/src/freestylo/SimilarityNN.html new file mode 100644 index 0000000..b3b86d2 --- /dev/null +++ b/html/stylotool/src/freestylo/SimilarityNN.html @@ -0,0 +1,169 @@ + + + + + + +stylotool.src.freestylo.SimilarityNN API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.src.freestylo.SimilarityNN

+
+
+
+
+
+
+
+
+
+
+

Classes

+
+
+class SimilarityNN +(input_dim, hidden_dim, num_hidden, output_dim, device) +
+
+

This class defines a neural network for metaphor detection.

+

Constructor for the SimilarityNN class.

+

Parameters

+
+
input_dim : int
+
The dimension of the input.
+
hidden_dim : int
+
The dimension of the hidden layers.
+
num_hidden : int
+
The number of hidden layers.
+
output_dim : int
+
The dimension of the output.
+
device : str
+
The device to run the model on.
+
+
+ +Expand source code + +
class SimilarityNN(nn.Module):
+    """
+    This class defines a neural network for metaphor detection.
+    """
+    def __init__(self, input_dim, hidden_dim, num_hidden, output_dim, device):
+        """
+        Constructor for the SimilarityNN class.
+
+        Parameters
+        ----------
+        input_dim : int
+            The dimension of the input.
+        hidden_dim : int
+            The dimension of the hidden layers.
+        num_hidden : int
+            The number of hidden layers.
+        output_dim : int
+            The dimension of the output.
+        device : str
+            The device to run the model on.
+        """
+        super(SimilarityNN, self).__init__()
+        self.hidden_dim = hidden_dim
+        self.num_hidden = num_hidden
+        self.output_dim = output_dim
+
+        self.input_layer = nn.Linear(input_dim, hidden_dim, device=device)
+        self.hidden_layers = nn.ModuleList()
+        for i in range(num_hidden):
+            self.hidden_layers.append(nn.Linear(hidden_dim, hidden_dim, device=device))
+        self.output_layer = nn.Linear(hidden_dim, self.output_dim, device=device)
+
+
+    def forward(self, data):
+        """
+        This method defines the forward pass of the neural network.
+
+        Parameters
+        ----------
+        data : tensor
+            The input data.
+
+        Returns
+        -------
+        tensor
+            The output of the neural network.
+        """
+        intermediate = [nn.ReLU()(self.input_layer(data))]
+        for i in range(self.num_hidden):
+            intermediate.append(nn.ReLU()(self.hidden_layers[i](intermediate[i])))
+        out = self.output_layer(intermediate[-1])
+        return out
+
+

Ancestors

+
    +
  • torch.nn.modules.module.Module
  • +
+

Methods

+
+
+def forward(self, data) ‑> Callable[..., Any] +
+
+

This method defines the forward pass of the neural network.

+

Parameters

+
+
data : tensor
+
The input data.
+
+

Returns

+
+
tensor
+
The output of the neural network.
+
+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/src/freestylo/TextObject.html b/html/stylotool/src/freestylo/TextObject.html new file mode 100644 index 0000000..1de5a21 --- /dev/null +++ b/html/stylotool/src/freestylo/TextObject.html @@ -0,0 +1,279 @@ + + + + + + +stylotool.src.freestylo.TextObject API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.src.freestylo.TextObject

+
+
+
+
+
+
+
+
+
+
+

Classes

+
+
+class TextObject +(textfile=None, text=None, language='') +
+
+

This class is used to store a text and its annotations.

+

Constructor for the TextObject class.

+

Parameters

+
+
textfile : str, optional
+
The path to a text file.
+
text : str, optional
+
 
+
language : str, optional
+
The language of the text.
+
+
+ +Expand source code + +
class TextObject:
+    """
+    This class is used to store a text and its annotations.
+    """
+    def __init__(self, textfile=None, text=None, language=''):
+        """
+        Constructor for the TextObject class.
+
+        Parameters
+        ----------
+        textfile : str, optional
+            The path to a text file.
+        text : str, optional
+
+        language : str, optional
+            The language of the text.
+        """
+        self.textfile = textfile
+        self.language = language
+        self.tokens = []
+        self.pos = []
+        self.lemmas = []
+        self.dep = []
+        self.vectors = []
+        self.annotations = []
+        self.token_offsets = []
+        self.text = text
+
+        if textfile is not None:
+            try:
+                with open(textfile, 'r') as f:
+                    self.text = f.read()
+            except FileNotFoundError:
+                print("File not found, no textfile loaded")
+        elif text is not None:
+            self.text = text
+
+    def save_as(self, filename):
+        """
+        This method saves the TextObject as a pickle file.
+
+        Parameters
+        ----------
+        filename : str
+        """
+        with open(filename, 'wb') as f:
+            pickle.dump(self, f)
+
+    def serialize(self, filename):
+        """
+        This method serializes the TextObject as a JSON file.
+
+        Parameters
+        ----------
+        filename : str
+        """
+        with open(filename, 'w') as f:
+            annotations = {}
+            for anno in self.annotations:
+                annotations[anno.type] = anno.serialize()
+            save_dict = {
+                'text': self.text,
+                'tokens': self.tokens,
+                'pos': self.pos,
+                'lemmas': self.lemmas,
+                'dep': self.dep,
+                'token_offsets': self.token_offsets,
+                'annotations': annotations
+            }
+            with open(filename, 'w') as f:
+                json.dump(save_dict, f, indent=4)
+
+
+    def has_text(self):
+        """
+        This method checks if the TextObject has a text.
+        """
+        return len(self.text) > 0
+    
+    def has_tokens(self):
+        """
+        This method checks if the TextObject has tokens.
+        """ 
+        return len(self.tokens) > 0
+
+    def has_pos(self):
+        """
+        This method checks if the TextObject has part-of-speech tags.
+        """
+        return len(self.pos) > 0
+
+    def has_lemmas(self):
+        """
+        This method checks if the TextObject has lemmas.
+        """
+        return len(self.lemmas) > 0
+
+    def has_dep(self):
+        """
+        This method checks if the TextObject has dependency relations.
+        """
+        return len(self.dep) > 0
+
+    def has_vectors(self):
+        """
+        This method checks if the TextObject has vectors.
+        """
+        return len(self.vectors) > 0
+
+    def has_annotations(self):
+        """
+        This method checks if the TextObject has annotations.
+        """
+        return len(self.annotations) > 0
+
+

Methods

+
+
+def has_annotations(self) +
+
+

This method checks if the TextObject has annotations.

+
+
+def has_dep(self) +
+
+

This method checks if the TextObject has dependency relations.

+
+
+def has_lemmas(self) +
+
+

This method checks if the TextObject has lemmas.

+
+
+def has_pos(self) +
+
+

This method checks if the TextObject has part-of-speech tags.

+
+
+def has_text(self) +
+
+

This method checks if the TextObject has a text.

+
+
+def has_tokens(self) +
+
+

This method checks if the TextObject has tokens.

+
+
+def has_vectors(self) +
+
+

This method checks if the TextObject has vectors.

+
+
+def save_as(self, filename) +
+
+

This method saves the TextObject as a pickle file.

+

Parameters

+
+
filename : str
+
 
+
+
+
+def serialize(self, filename) +
+
+

This method serializes the TextObject as a JSON file.

+

Parameters

+
+
filename : str
+
 
+
+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/src/freestylo/TextPreprocessor.html b/html/stylotool/src/freestylo/TextPreprocessor.html new file mode 100644 index 0000000..98b01f2 --- /dev/null +++ b/html/stylotool/src/freestylo/TextPreprocessor.html @@ -0,0 +1,198 @@ + + + + + + +stylotool.src.freestylo.TextPreprocessor API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.src.freestylo.TextPreprocessor

+
+
+
+
+
+
+
+
+
+
+

Classes

+
+
+class TextPreprocessor +(language='en') +
+
+

This class is used to preprocess text. +It uses the TextObject class to store the text and its annotations.

+

Constructor for the TextPreprocessor class.

+

Parameters

+
+
language : str, optional
+
The language of the text.
+
+
+ +Expand source code + +
class TextPreprocessor:
+    """
+    This class is used to preprocess text.
+    It uses the TextObject class to store the text and its annotations.
+    """
+    def __init__(self, language='en'):
+        """
+        Constructor for the TextPreprocessor class.
+
+        Parameters
+        ----------
+        language : str, optional
+            The language of the text.
+        """
+
+        if language == 'en':
+            self.nlp = self.load_spacy_nlp('en_core_web_lg')
+        elif language == 'de':
+            self.nlp = self.load_spacy_nlp('de_core_news_lg')
+        elif language == 'mgh':
+            from MGHPreprocessor import MGHPreprocessor
+            self.nlp = MGHPreprocessor()
+
+
+    def load_spacy_nlp(self, model_name):
+        """
+        This method loads a spaCy model.
+
+        Parameters
+        ----------
+        model_name : str
+            The name of the spaCy model.
+
+        Returns
+        -------
+        spacy.lang
+            The spaCy model.
+        """
+        nlp = None
+        while nlp is None:
+            try:
+                nlp = spacy.load(model_name)
+            except:
+                try:
+                    spacy.cli.download(model_name)
+                except:
+                    print(f"ERROR: Could not download model {model_name}")
+                    exit(1)
+        return nlp
+
+
+    def process_text(self, text : TextObject):
+        """
+        This method processes a text.
+        """
+        processed = self.nlp(text.text)
+        try:
+            text.tokens = [token.text for token in processed]
+        except:
+            print("No tokens available")
+
+        try:    
+            text.pos = [token.pos_ for token in processed]
+        except:
+            print("No POS available")
+
+        try:
+            text.lemmas = [token.lemma_ for token in processed]
+        except:
+            print("No lemmas available")
+
+        try:
+            text.dep = [token.dep_ for token in processed]
+        except:
+            print("No dependencies available")
+
+        try:
+            text.vectors = [token.vector for token in processed]
+        except:
+            print("No vectors available")
+
+        try:
+            text.token_offsets = [(token.idx, token.idx + len(token.text)) for token in processed]
+        except:
+            print("No token offsets available")
+
+

Methods

+
+
+def load_spacy_nlp(self, model_name) +
+
+

This method loads a spaCy model.

+

Parameters

+
+
model_name : str
+
The name of the spaCy model.
+
+

Returns

+
+
spacy.lang
+
The spaCy model.
+
+
+
+def process_text(self, text: freestylo.TextObject.TextObject) +
+
+

This method processes a text.

+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/src/freestylo/freestylo_main.html b/html/stylotool/src/freestylo/freestylo_main.html new file mode 100644 index 0000000..fdceab2 --- /dev/null +++ b/html/stylotool/src/freestylo/freestylo_main.html @@ -0,0 +1,107 @@ + + + + + + +stylotool.src.freestylo.freestylo_main API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.src.freestylo.freestylo_main

+
+
+
+
+
+
+
+
+

Functions

+
+
+def add_alliteration_annotation(text, config) +
+
+

This function adds alliteration annotations to the text.

+
+
+def add_chiasmus_annotation(text, config) +
+
+

This function adds chiasmus annotations to the text.

+
+
+def add_epiphora_annotation(text, config) +
+
+

This function adds epiphora annotations to the text.

+
+
+def add_metaphor_annotation(text, config) +
+
+

This function adds metaphor annotations to the text.

+
+
+def add_polysyndeton_annotation(text, config) +
+
+

This function adds polysyndeton annotations to the text.

+
+
+def main() +
+
+

This is the main function of the freestylo tool. +When you run the tool from the command line, this function is called. +It reads the input text, preprocesses it, and adds the specified annotations. +The results are then serialized to a file.

+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/src/freestylo/index.html b/html/stylotool/src/freestylo/index.html new file mode 100644 index 0000000..e7d3415 --- /dev/null +++ b/html/stylotool/src/freestylo/index.html @@ -0,0 +1,117 @@ + + + + + + +stylotool.src.freestylo API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.src.freestylo

+
+
+
+
+

Sub-modules

+
+
stylotool.src.freestylo.AlliterationAnnotation
+
+
+
+
stylotool.src.freestylo.ChiasmusAnnotation
+
+
+
+
stylotool.src.freestylo.Configs
+
+
+
+
stylotool.src.freestylo.EpiphoraAnnotation
+
+
+
+
stylotool.src.freestylo.MGHPreprocessor
+
+
+
+
stylotool.src.freestylo.MetaphorAnnotation
+
+
+
+
stylotool.src.freestylo.PolysyndetonAnnotation
+
+
+
+
stylotool.src.freestylo.SimilarityNN
+
+
+
+
stylotool.src.freestylo.TextObject
+
+
+
+
stylotool.src.freestylo.TextPreprocessor
+
+
+
+
stylotool.src.freestylo.freestylo_main
+
+
+
+
+
+
+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/src/index.html b/html/stylotool/src/index.html new file mode 100644 index 0000000..5d62fcd --- /dev/null +++ b/html/stylotool/src/index.html @@ -0,0 +1,67 @@ + + + + + + +stylotool.src API documentation + + + + + + + + + + + +
+ + +
+ + + diff --git a/html/stylotool/test/index.html b/html/stylotool/test/index.html new file mode 100644 index 0000000..3f0ce09 --- /dev/null +++ b/html/stylotool/test/index.html @@ -0,0 +1,92 @@ + + + + + + +stylotool.test API documentation + + + + + + + + + + + +
+ + +
+ + + diff --git a/html/stylotool/test/test_alliteration_annotation.html b/html/stylotool/test/test_alliteration_annotation.html new file mode 100644 index 0000000..0f6c1c8 --- /dev/null +++ b/html/stylotool/test/test_alliteration_annotation.html @@ -0,0 +1,69 @@ + + + + + + +stylotool.test.test_alliteration_annotation API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.test.test_alliteration_annotation

+
+
+
+
+
+
+
+
+

Functions

+
+
+def test_alliteration_annotation() +
+
+
+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/test/test_chiasmus_annotation.html b/html/stylotool/test/test_chiasmus_annotation.html new file mode 100644 index 0000000..37f4e3c --- /dev/null +++ b/html/stylotool/test/test_chiasmus_annotation.html @@ -0,0 +1,69 @@ + + + + + + +stylotool.test.test_chiasmus_annotation API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.test.test_chiasmus_annotation

+
+
+
+
+
+
+
+
+

Functions

+
+
+def test_chiasmus_annotation() +
+
+
+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/test/test_epiphora_annotation.html b/html/stylotool/test/test_epiphora_annotation.html new file mode 100644 index 0000000..4f5e359 --- /dev/null +++ b/html/stylotool/test/test_epiphora_annotation.html @@ -0,0 +1,69 @@ + + + + + + +stylotool.test.test_epiphora_annotation API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.test.test_epiphora_annotation

+
+
+
+
+
+
+
+
+

Functions

+
+
+def test_epiphora_annotation() +
+
+
+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/test/test_metaphor_annotations.html b/html/stylotool/test/test_metaphor_annotations.html new file mode 100644 index 0000000..d069e25 --- /dev/null +++ b/html/stylotool/test/test_metaphor_annotations.html @@ -0,0 +1,69 @@ + + + + + + +stylotool.test.test_metaphor_annotations API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.test.test_metaphor_annotations

+
+
+
+
+
+
+
+
+

Functions

+
+
+def test_metaphor_annotation() +
+
+
+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/test/test_polysyndeton_annotation.html b/html/stylotool/test/test_polysyndeton_annotation.html new file mode 100644 index 0000000..2bd715a --- /dev/null +++ b/html/stylotool/test/test_polysyndeton_annotation.html @@ -0,0 +1,69 @@ + + + + + + +stylotool.test.test_polysyndeton_annotation API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.test.test_polysyndeton_annotation

+
+
+
+
+
+
+
+
+

Functions

+
+
+def test_polysyndeton_annotation() +
+
+
+
+
+
+
+
+
+ +
+ + + diff --git a/html/stylotool/test/test_text_object.html b/html/stylotool/test/test_text_object.html new file mode 100644 index 0000000..9bf7483 --- /dev/null +++ b/html/stylotool/test/test_text_object.html @@ -0,0 +1,69 @@ + + + + + + +stylotool.test.test_text_object API documentation + + + + + + + + + + + +
+
+
+

Module stylotool.test.test_text_object

+
+
+
+
+
+
+
+
+

Functions

+
+
+def test_processing() +
+
+
+
+
+
+
+
+
+ +
+ + +