From c8f09fa56e1384fc4035f9c4d5efce026e269ba0 Mon Sep 17 00:00:00 2001 From: Olga Bulat Date: Thu, 29 Dec 2022 14:46:53 +0300 Subject: [PATCH 1/3] Always return set[PhoneticAttribute] from calculate_phonetic_attributes --- zeyrek/attributes.py | 20 ++++++++++++-------- zeyrek/morphotactics.py | 8 ++++---- zeyrek/rulebasedanalyzer.py | 2 +- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/zeyrek/attributes.py b/zeyrek/attributes.py index 2afb590..8faf401 100644 --- a/zeyrek/attributes.py +++ b/zeyrek/attributes.py @@ -77,12 +77,12 @@ class PosInfo(NamedTuple): class RootAttribute(Enum): - """These represents attributes of roots.""" + """This represents attributes of roots.""" # Generally Present tense (Aorist) suffix has the form [Ir]; such as gel-ir, bul-ur, kapat-ır. # But for most verbs with single syllable and compound verbs it forms as [Ar]. # Such as yap-ar, yet-er, hapsed-er. There are exceptions for this case, such as "var-ır". - # Below two represents the attributes for clearing the ambiguity. These attributes does not + # Below two represents the attributes for clearing the ambiguity. These attributes do not # modify the root form. Aorist_I = auto() Aorist_A = auto() @@ -282,10 +282,14 @@ class PhoneticAttribute(Enum): @functools.lru_cache(maxsize=128, typed=False) -def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[PhoneticAttribute]: +def calculate_phonetic_attributes( + word: str, + predecessor_attrs: tuple[PhoneticAttribute] | None = None +) -> set[PhoneticAttribute]: + p_attrs = set() if predecessor_attrs is None else set(predecessor_attrs) # the word should be in lower case if len(word) == 0: - return predecessor_attrs + return p_attrs result = set() last_letter = word[-1] if last_letter in tr.vowels_lower_set: @@ -313,7 +317,7 @@ def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[Phon else: result.add(PhoneticAttribute.FirstLetterConsonant) if last_vowel is None: - result.update(predecessor_attrs) + result.update(p_attrs) result.update(no_vowel_attrs) result.discard(PhoneticAttribute.LastLetterVowel) result.discard(PhoneticAttribute.ExpectsConsonant) @@ -321,8 +325,8 @@ def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[Phon return result -def parse_attr_data(data: str) -> Set: - attrs: Set = set() +def parse_attr_data(data: str) -> set[RootAttribute]: + attrs = set() tokens = [_.strip() for _ in data.split(",")] for s in tokens: if s not in RootAttribute_set: @@ -332,7 +336,7 @@ def parse_attr_data(data: str) -> Set: return attrs -def infer_morphemic_attributes(word: str, pos_data, attrs: Set = None) -> Set: +def infer_morphemic_attributes(word: str, pos_data, attrs: Set | None = None) -> set[RootAttribute]: result = attrs if attrs is not None else set() last = word[-1] last_char_is_vowel = tr.is_vowel(last) diff --git a/zeyrek/morphotactics.py b/zeyrek/morphotactics.py index c95b8d9..a858c59 100644 --- a/zeyrek/morphotactics.py +++ b/zeyrek/morphotactics.py @@ -3578,13 +3578,13 @@ def __str__(self): def __repr__(self): return f"SearchPath({self.dict_item.id_}) (-{self.tail})({self.transitions})" - def copy(self, surface_node: SurfaceTransition, phonetic_attributes: Set = None): + def copy(self, surface_node: SurfaceTransition, pa: set[PhoneticAttribute] | None = None): phonetic_attributes = ( calculate_phonetic_attributes( - surface_node.surface, self.phonetic_attributes + surface_node.surface, tuple(self.phonetic_attributes) ) - if phonetic_attributes is None - else phonetic_attributes + if pa is None + else pa ) is_terminal = surface_node.state.terminal hist = self.transitions[:] diff --git a/zeyrek/rulebasedanalyzer.py b/zeyrek/rulebasedanalyzer.py index 21a6559..d6f9f3f 100644 --- a/zeyrek/rulebasedanalyzer.py +++ b/zeyrek/rulebasedanalyzer.py @@ -112,7 +112,7 @@ def advance(self, path: SearchPath): # if tail is equal to surface, no need to calculate phonetic attributes. tail_equals_surface = path.tail == surface attributes = path.phonetic_attributes if tail_equals_surface \ - else calculate_phonetic_attributes(surface, frozenset(path.phonetic_attributes)) + else calculate_phonetic_attributes(surface, tuple(path.phonetic_attributes)) # This is required for suffixes like `cik` and `ciğ` # an extra attribute is added if "cik" or "ciğ" is generated and matches the tail. From b785d790bcfe4538f4333ffe499eb30d26e40bab Mon Sep 17 00:00:00 2001 From: Olga Bulat Date: Thu, 29 Dec 2022 14:54:06 +0300 Subject: [PATCH 2/3] Fix build --- .github/workflows/build.yml | 2 +- zeyrek/attributes.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f730e95..d4e63dc 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, '3.10'] + python-version: [3.9, '3.10'] steps: - uses: actions/checkout@v2 diff --git a/zeyrek/attributes.py b/zeyrek/attributes.py index 8faf401..61066b4 100644 --- a/zeyrek/attributes.py +++ b/zeyrek/attributes.py @@ -284,7 +284,7 @@ class PhoneticAttribute(Enum): @functools.lru_cache(maxsize=128, typed=False) def calculate_phonetic_attributes( word: str, - predecessor_attrs: tuple[PhoneticAttribute] | None = None + predecessor_attrs: "tuple[PhoneticAttribute] | None" = None ) -> set[PhoneticAttribute]: p_attrs = set() if predecessor_attrs is None else set(predecessor_attrs) # the word should be in lower case @@ -336,7 +336,7 @@ def parse_attr_data(data: str) -> set[RootAttribute]: return attrs -def infer_morphemic_attributes(word: str, pos_data, attrs: Set | None = None) -> set[RootAttribute]: +def infer_morphemic_attributes(word: str, pos_data, attrs: "set[RootAttribute] | None" = None) -> set[RootAttribute]: result = attrs if attrs is not None else set() last = word[-1] last_char_is_vowel = tr.is_vowel(last) From 02b3833f8ee0b24e6645be74957995ddf43406ba Mon Sep 17 00:00:00 2001 From: Olga Bulat Date: Thu, 29 Dec 2022 14:56:22 +0300 Subject: [PATCH 3/3] Fix build --- zeyrek/morphotactics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zeyrek/morphotactics.py b/zeyrek/morphotactics.py index a858c59..0c5c87b 100644 --- a/zeyrek/morphotactics.py +++ b/zeyrek/morphotactics.py @@ -3578,7 +3578,7 @@ def __str__(self): def __repr__(self): return f"SearchPath({self.dict_item.id_}) (-{self.tail})({self.transitions})" - def copy(self, surface_node: SurfaceTransition, pa: set[PhoneticAttribute] | None = None): + def copy(self, surface_node: SurfaceTransition, pa: "set[PhoneticAttribute] | None" = None): phonetic_attributes = ( calculate_phonetic_attributes( surface_node.surface, tuple(self.phonetic_attributes)