From c8f09fa56e1384fc4035f9c4d5efce026e269ba0 Mon Sep 17 00:00:00 2001
From: Olga Bulat <obulat@gmail.com>
Date: Thu, 29 Dec 2022 14:46:53 +0300
Subject: [PATCH 1/3] Always return set[PhoneticAttribute] from
 calculate_phonetic_attributes

---
 zeyrek/attributes.py        | 20 ++++++++++++--------
 zeyrek/morphotactics.py     |  8 ++++----
 zeyrek/rulebasedanalyzer.py |  2 +-
 3 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/zeyrek/attributes.py b/zeyrek/attributes.py
index 2afb590..8faf401 100644
--- a/zeyrek/attributes.py
+++ b/zeyrek/attributes.py
@@ -77,12 +77,12 @@ class PosInfo(NamedTuple):
 
 
 class RootAttribute(Enum):
-    """These represents attributes of roots."""
+    """This represents attributes of roots."""
 
     # Generally Present tense (Aorist) suffix has the form [Ir]; such as gel-ir, bul-ur, kapat-ır.
     # But for most verbs with single syllable and compound verbs it forms as [Ar].
     # Such as yap-ar, yet-er, hapsed-er. There are exceptions for this case, such as "var-ır".
-    # Below two represents the attributes for clearing the ambiguity. These attributes does not
+    # Below two represents the attributes for clearing the ambiguity. These attributes do not
     # modify the root form.
     Aorist_I = auto()
     Aorist_A = auto()
@@ -282,10 +282,14 @@ class PhoneticAttribute(Enum):
 
 
 @functools.lru_cache(maxsize=128, typed=False)
-def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[PhoneticAttribute]:
+def calculate_phonetic_attributes(
+    word: str,
+    predecessor_attrs: tuple[PhoneticAttribute] | None = None
+) -> set[PhoneticAttribute]:
+    p_attrs = set() if predecessor_attrs is None else set(predecessor_attrs)
     # the word should be in lower case
     if len(word) == 0:
-        return predecessor_attrs
+        return p_attrs
     result = set()
     last_letter = word[-1]
     if last_letter in tr.vowels_lower_set:
@@ -313,7 +317,7 @@ def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[Phon
     else:
         result.add(PhoneticAttribute.FirstLetterConsonant)
     if last_vowel is None:
-        result.update(predecessor_attrs)
+        result.update(p_attrs)
         result.update(no_vowel_attrs)
         result.discard(PhoneticAttribute.LastLetterVowel)
         result.discard(PhoneticAttribute.ExpectsConsonant)
@@ -321,8 +325,8 @@ def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[Phon
     return result
 
 
-def parse_attr_data(data: str) -> Set:
-    attrs: Set = set()
+def parse_attr_data(data: str) -> set[RootAttribute]:
+    attrs = set()
     tokens = [_.strip() for _ in data.split(",")]
     for s in tokens:
         if s not in RootAttribute_set:
@@ -332,7 +336,7 @@ def parse_attr_data(data: str) -> Set:
     return attrs
 
 
-def infer_morphemic_attributes(word: str, pos_data, attrs: Set = None) -> Set:
+def infer_morphemic_attributes(word: str, pos_data, attrs: Set | None = None) -> set[RootAttribute]:
     result = attrs if attrs is not None else set()
     last = word[-1]
     last_char_is_vowel = tr.is_vowel(last)
diff --git a/zeyrek/morphotactics.py b/zeyrek/morphotactics.py
index c95b8d9..a858c59 100644
--- a/zeyrek/morphotactics.py
+++ b/zeyrek/morphotactics.py
@@ -3578,13 +3578,13 @@ def __str__(self):
     def __repr__(self):
         return f"SearchPath({self.dict_item.id_}) (-{self.tail})({self.transitions})"
 
-    def copy(self, surface_node: SurfaceTransition, phonetic_attributes: Set = None):
+    def copy(self, surface_node: SurfaceTransition, pa: set[PhoneticAttribute] | None = None):
         phonetic_attributes = (
             calculate_phonetic_attributes(
-                surface_node.surface, self.phonetic_attributes
+                surface_node.surface, tuple(self.phonetic_attributes)
             )
-            if phonetic_attributes is None
-            else phonetic_attributes
+            if pa is None
+            else pa
         )
         is_terminal = surface_node.state.terminal
         hist = self.transitions[:]
diff --git a/zeyrek/rulebasedanalyzer.py b/zeyrek/rulebasedanalyzer.py
index 21a6559..d6f9f3f 100644
--- a/zeyrek/rulebasedanalyzer.py
+++ b/zeyrek/rulebasedanalyzer.py
@@ -112,7 +112,7 @@ def advance(self, path: SearchPath):
             # if tail is equal to surface, no need to calculate phonetic attributes.
             tail_equals_surface = path.tail == surface
             attributes = path.phonetic_attributes if tail_equals_surface \
-                else calculate_phonetic_attributes(surface, frozenset(path.phonetic_attributes))
+                else calculate_phonetic_attributes(surface, tuple(path.phonetic_attributes))
 
             # This is required for suffixes like `cik` and `ciğ`
             # an extra attribute is added if "cik" or "ciğ" is generated and matches the tail.

From b785d790bcfe4538f4333ffe499eb30d26e40bab Mon Sep 17 00:00:00 2001
From: Olga Bulat <obulat@gmail.com>
Date: Thu, 29 Dec 2022 14:54:06 +0300
Subject: [PATCH 2/3] Fix build

---
 .github/workflows/build.yml | 2 +-
 zeyrek/attributes.py        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index f730e95..d4e63dc 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: [3.8, 3.9, '3.10']
+        python-version: [3.9, '3.10']
 
     steps:
     - uses: actions/checkout@v2
diff --git a/zeyrek/attributes.py b/zeyrek/attributes.py
index 8faf401..61066b4 100644
--- a/zeyrek/attributes.py
+++ b/zeyrek/attributes.py
@@ -284,7 +284,7 @@ class PhoneticAttribute(Enum):
 @functools.lru_cache(maxsize=128, typed=False)
 def calculate_phonetic_attributes(
     word: str,
-    predecessor_attrs: tuple[PhoneticAttribute] | None = None
+    predecessor_attrs: "tuple[PhoneticAttribute] | None" = None
 ) -> set[PhoneticAttribute]:
     p_attrs = set() if predecessor_attrs is None else set(predecessor_attrs)
     # the word should be in lower case
@@ -336,7 +336,7 @@ def parse_attr_data(data: str) -> set[RootAttribute]:
     return attrs
 
 
-def infer_morphemic_attributes(word: str, pos_data, attrs: Set | None = None) -> set[RootAttribute]:
+def infer_morphemic_attributes(word: str, pos_data, attrs: "set[RootAttribute] | None" = None) -> set[RootAttribute]:
     result = attrs if attrs is not None else set()
     last = word[-1]
     last_char_is_vowel = tr.is_vowel(last)

From 02b3833f8ee0b24e6645be74957995ddf43406ba Mon Sep 17 00:00:00 2001
From: Olga Bulat <obulat@gmail.com>
Date: Thu, 29 Dec 2022 14:56:22 +0300
Subject: [PATCH 3/3] Fix build

---
 zeyrek/morphotactics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/zeyrek/morphotactics.py b/zeyrek/morphotactics.py
index a858c59..0c5c87b 100644
--- a/zeyrek/morphotactics.py
+++ b/zeyrek/morphotactics.py
@@ -3578,7 +3578,7 @@ def __str__(self):
     def __repr__(self):
         return f"SearchPath({self.dict_item.id_}) (-{self.tail})({self.transitions})"
 
-    def copy(self, surface_node: SurfaceTransition, pa: set[PhoneticAttribute] | None = None):
+    def copy(self, surface_node: SurfaceTransition, pa: "set[PhoneticAttribute] | None" = None):
         phonetic_attributes = (
             calculate_phonetic_attributes(
                 surface_node.surface, tuple(self.phonetic_attributes)