diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f730e95..d4e63dc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, '3.10'] + python-version: [3.9, '3.10'] steps: - uses: actions/checkout@v2 diff --git a/zeyrek/attributes.py b/zeyrek/attributes.py index 2afb590..61066b4 100644 --- a/zeyrek/attributes.py +++ b/zeyrek/attributes.py @@ -77,12 +77,12 @@ class PosInfo(NamedTuple): class RootAttribute(Enum): - """These represents attributes of roots.""" + """This represents attributes of roots.""" # Generally Present tense (Aorist) suffix has the form [Ir]; such as gel-ir, bul-ur, kapat-ır. # But for most verbs with single syllable and compound verbs it forms as [Ar]. # Such as yap-ar, yet-er, hapsed-er. There are exceptions for this case, such as "var-ır". - # Below two represents the attributes for clearing the ambiguity. These attributes does not + # Below two represents the attributes for clearing the ambiguity. These attributes do not # modify the root form. Aorist_I = auto() Aorist_A = auto() @@ -282,10 +282,14 @@ class PhoneticAttribute(Enum): @functools.lru_cache(maxsize=128, typed=False) -def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[PhoneticAttribute]: +def calculate_phonetic_attributes( + word: str, + predecessor_attrs: "tuple[PhoneticAttribute] | None" = None +) -> set[PhoneticAttribute]: + p_attrs = set() if predecessor_attrs is None else set(predecessor_attrs) # the word should be in lower case if len(word) == 0: - return predecessor_attrs + return p_attrs result = set() last_letter = word[-1] if last_letter in tr.vowels_lower_set: @@ -313,7 +317,7 @@ def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[Phon else: result.add(PhoneticAttribute.FirstLetterConsonant) if last_vowel is None: - result.update(predecessor_attrs) + result.update(p_attrs) result.update(no_vowel_attrs) result.discard(PhoneticAttribute.LastLetterVowel) result.discard(PhoneticAttribute.ExpectsConsonant) @@ -321,8 +325,8 @@ def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[Phon return result -def parse_attr_data(data: str) -> Set: - attrs: Set = set() +def parse_attr_data(data: str) -> set[RootAttribute]: + attrs = set() tokens = [_.strip() for _ in data.split(",")] for s in tokens: if s not in RootAttribute_set: @@ -332,7 +336,7 @@ def parse_attr_data(data: str) -> Set: return attrs -def infer_morphemic_attributes(word: str, pos_data, attrs: Set = None) -> Set: +def infer_morphemic_attributes(word: str, pos_data, attrs: "set[RootAttribute] | None" = None) -> set[RootAttribute]: result = attrs if attrs is not None else set() last = word[-1] last_char_is_vowel = tr.is_vowel(last) diff --git a/zeyrek/morphotactics.py b/zeyrek/morphotactics.py index c95b8d9..0c5c87b 100644 --- a/zeyrek/morphotactics.py +++ b/zeyrek/morphotactics.py @@ -3578,13 +3578,13 @@ def __str__(self): def __repr__(self): return f"SearchPath({self.dict_item.id_}) (-{self.tail})({self.transitions})" - def copy(self, surface_node: SurfaceTransition, phonetic_attributes: Set = None): + def copy(self, surface_node: SurfaceTransition, pa: "set[PhoneticAttribute] | None" = None): phonetic_attributes = ( calculate_phonetic_attributes( - surface_node.surface, self.phonetic_attributes + surface_node.surface, tuple(self.phonetic_attributes) ) - if phonetic_attributes is None - else phonetic_attributes + if pa is None + else pa ) is_terminal = surface_node.state.terminal hist = self.transitions[:] diff --git a/zeyrek/rulebasedanalyzer.py b/zeyrek/rulebasedanalyzer.py index 21a6559..d6f9f3f 100644 --- a/zeyrek/rulebasedanalyzer.py +++ b/zeyrek/rulebasedanalyzer.py @@ -112,7 +112,7 @@ def advance(self, path: SearchPath): # if tail is equal to surface, no need to calculate phonetic attributes. tail_equals_surface = path.tail == surface attributes = path.phonetic_attributes if tail_equals_surface \ - else calculate_phonetic_attributes(surface, frozenset(path.phonetic_attributes)) + else calculate_phonetic_attributes(surface, tuple(path.phonetic_attributes)) # This is required for suffixes like `cik` and `ciğ` # an extra attribute is added if "cik" or "ciğ" is generated and matches the tail.