Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix frozen set issue #25

Merged
merged 3 commits into from
Dec 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8, 3.9, '3.10']
python-version: [3.9, '3.10']

steps:
- uses: actions/checkout@v2
Expand Down
20 changes: 12 additions & 8 deletions zeyrek/attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,12 @@ class PosInfo(NamedTuple):


class RootAttribute(Enum):
"""These represents attributes of roots."""
"""This represents attributes of roots."""

# Generally Present tense (Aorist) suffix has the form [Ir]; such as gel-ir, bul-ur, kapat-ır.
# But for most verbs with single syllable and compound verbs it forms as [Ar].
# Such as yap-ar, yet-er, hapsed-er. There are exceptions for this case, such as "var-ır".
# Below two represents the attributes for clearing the ambiguity. These attributes does not
# Below two represents the attributes for clearing the ambiguity. These attributes do not
# modify the root form.
Aorist_I = auto()
Aorist_A = auto()
Expand Down Expand Up @@ -282,10 +282,14 @@ class PhoneticAttribute(Enum):


@functools.lru_cache(maxsize=128, typed=False)
def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[PhoneticAttribute]:
def calculate_phonetic_attributes(
word: str,
predecessor_attrs: "tuple[PhoneticAttribute] | None" = None
) -> set[PhoneticAttribute]:
p_attrs = set() if predecessor_attrs is None else set(predecessor_attrs)
# the word should be in lower case
if len(word) == 0:
return predecessor_attrs
return p_attrs
result = set()
last_letter = word[-1]
if last_letter in tr.vowels_lower_set:
Expand Down Expand Up @@ -313,16 +317,16 @@ def calculate_phonetic_attributes(word: str, predecessor_attrs=None) -> Set[Phon
else:
result.add(PhoneticAttribute.FirstLetterConsonant)
if last_vowel is None:
result.update(predecessor_attrs)
result.update(p_attrs)
result.update(no_vowel_attrs)
result.discard(PhoneticAttribute.LastLetterVowel)
result.discard(PhoneticAttribute.ExpectsConsonant)

return result


def parse_attr_data(data: str) -> Set:
attrs: Set = set()
def parse_attr_data(data: str) -> set[RootAttribute]:
attrs = set()
tokens = [_.strip() for _ in data.split(",")]
for s in tokens:
if s not in RootAttribute_set:
Expand All @@ -332,7 +336,7 @@ def parse_attr_data(data: str) -> Set:
return attrs


def infer_morphemic_attributes(word: str, pos_data, attrs: Set = None) -> Set:
def infer_morphemic_attributes(word: str, pos_data, attrs: "set[RootAttribute] | None" = None) -> set[RootAttribute]:
result = attrs if attrs is not None else set()
last = word[-1]
last_char_is_vowel = tr.is_vowel(last)
Expand Down
8 changes: 4 additions & 4 deletions zeyrek/morphotactics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3578,13 +3578,13 @@ def __str__(self):
def __repr__(self):
return f"SearchPath({self.dict_item.id_}) (-{self.tail})({self.transitions})"

def copy(self, surface_node: SurfaceTransition, phonetic_attributes: Set = None):
def copy(self, surface_node: SurfaceTransition, pa: "set[PhoneticAttribute] | None" = None):
phonetic_attributes = (
calculate_phonetic_attributes(
surface_node.surface, self.phonetic_attributes
surface_node.surface, tuple(self.phonetic_attributes)
)
if phonetic_attributes is None
else phonetic_attributes
if pa is None
else pa
)
is_terminal = surface_node.state.terminal
hist = self.transitions[:]
Expand Down
2 changes: 1 addition & 1 deletion zeyrek/rulebasedanalyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def advance(self, path: SearchPath):
# if tail is equal to surface, no need to calculate phonetic attributes.
tail_equals_surface = path.tail == surface
attributes = path.phonetic_attributes if tail_equals_surface \
else calculate_phonetic_attributes(surface, frozenset(path.phonetic_attributes))
else calculate_phonetic_attributes(surface, tuple(path.phonetic_attributes))

# This is required for suffixes like `cik` and `ciğ`
# an extra attribute is added if "cik" or "ciğ" is generated and matches the tail.
Expand Down