Skip to content

Commit e3fefc8

Browse files
isingoocopybara-github
authored andcommitted
Add Expression.ANY constant that accepts, contains, and matches with everything other than an empty Or. This will be used as the default value for Alignment arguments.
PiperOrigin-RevId: 630344671
1 parent 64d73d0 commit e3fefc8

File tree

3 files changed

+75
-7
lines changed

3 files changed

+75
-7
lines changed

nisaba/scripts/natural_translit/utils/expression.py

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,19 @@ def _symbols_of(
112112
else:
113113
return [[other]]
114114

115+
def is_any(self) -> bool:
116+
if isinstance(self, sym.Symbol):
117+
return False
118+
if len(self) == 1:
119+
return self.item(0).is_any()
120+
return self is Expression.ANY
121+
122+
def is_eps(self) -> bool:
123+
return isinstance(self, sym.Symbol) and self.symbol.is_eps()
124+
125+
def is_nor(self) -> bool:
126+
return isinstance(self, sym.Symbol) and self.symbol.is_nor()
127+
115128
def accepts(
116129
self, other: 'Expression.OR_SYMBOL', equivalent: bool = False
117130
) -> bool:
@@ -124,6 +137,10 @@ def accepts(
124137
Returns:
125138
bool
126139
"""
140+
if self.is_any() or other.is_any():
141+
if equivalent:
142+
return self.is_any() and other.is_any()
143+
return True
127144
self_symbols, other_symbols = self.symbols(), self._symbols_of(other)
128145
self_len, other_len = len(self_symbols), len(other_symbols)
129146
if (
@@ -198,6 +215,8 @@ def contains_symbol_list(
198215
return True
199216
if search_for == [sym.Symbol.CTRL.nor]:
200217
return False
218+
if self.is_any():
219+
return True
201220
# Loop over symbol lists, eg: [[a, b, c, d], [e, f, g]]
202221
for symbol_list in self.symbols():
203222
while symbol_list:
@@ -251,15 +270,19 @@ def contains(
251270
a.contains(b, head=True): [a, b, c, d] starts with [a, b]
252271
a.contains(b, tail=True): False
253272
"""
273+
if self.is_any() or other.is_any():
274+
return not self.is_nor() and not other.is_nor()
254275
for sym_list in self._symbols_of(other):
255276
if self.contains_symbol_list(sym_list, head, tail):
256277
return True
257278
return False
258279

259280
def _symbol_contains(self, other: sym.Symbol) -> bool:
281+
if other.is_any():
282+
return True
260283
self_symbols = self.symbols()
261284
return [sym.Symbol.CTRL.eps] in self_symbols or (
262-
other != sym.Symbol.CTRL.nor and [other] in self_symbols
285+
not other.is_nor() and [other] in self_symbols
263286
)
264287

265288
def is_contained(
@@ -278,13 +301,13 @@ def matches(self, other: 'Expression.OR_SYMBOL') -> bool:
278301
return self.contains(other, head=True, tail=True)
279302

280303
# head_matches and tail_matches require at least one symbol match unless
281-
# both expressions are empty Cats. For example, if a rule requires a vowel as
282-
# following context but there is no following context, the rule shouldn't
283-
# apply.
304+
# both expressions are empty Cats or one of the expressions is Expression.ANY
305+
# For example, if a rule requires a vowel as following context but there is no
306+
# following context, the rule shouldn't apply.
284307

285308
def head_matches(self, other: 'Expression.OR_SYMBOL') -> bool:
286309
if self and not other:
287-
return False
310+
return other.is_any()
288311
return self.contains(other, head=True)
289312

290313
def is_prefix(self, other: 'Expression.OR_SYMBOL') -> bool:
@@ -294,7 +317,7 @@ def is_prefix(self, other: 'Expression.OR_SYMBOL') -> bool:
294317

295318
def tail_matches(self, other: 'Expression.OR_SYMBOL') -> bool:
296319
if self and not other:
297-
return False
320+
return other.is_any()
298321
return self.contains(other, tail=True)
299322

300323
def is_suffix(self, other: 'Expression.OR_SYMBOL') -> bool:
@@ -316,6 +339,9 @@ def repeat(self, n: int = 2) -> 'Cat':
316339
return Cat(*([self] * n))
317340

318341

342+
Expression.ANY = Expression('any_expression')
343+
344+
319345
class Atomic(Expression, sym.Symbol):
320346
"""An instance of a single symbol."""
321347

@@ -392,7 +418,10 @@ def __str__(self):
392418

393419
def add(self, *items: Expression) -> 'Cat':
394420
for item in items:
395-
self._add_item(item)
421+
if item.is_any():
422+
self._items.append(item)
423+
else:
424+
self._add_item(item)
396425
return self
397426

398427
def symbols(self) -> list[list[sym.Symbol]]:
@@ -462,6 +491,13 @@ def add(self, *items: Expression) -> 'Or':
462491
self
463492
"""
464493
for item in items:
494+
# If the Expression.ANY is in Or, don't add any items.
495+
if Expression.ANY in self:
496+
break
497+
# If the item is any, the other items are irrelevant.
498+
if item.is_any():
499+
self._items = [item]
500+
break
465501
if self.accepts(item):
466502
if item.accepts(self) and item.state_count() < self.state_count():
467503
self._update(item)

nisaba/scripts/natural_translit/utils/expression_test.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ def test_atomic_read(self):
5151

5252
def test_control(self):
5353
self.assertTrue(exp.Atomic.CTRL.unk.is_control())
54+
self.assertTrue(exp.Atomic.CTRL.eps.is_eps())
55+
self.assertTrue(exp.Atomic.CTRL.nor.is_nor())
5456

5557
def test_symbol_inventory_lookup(self):
5658
self.assertEqual(_ATM.lookup(_ATM.a, 'atm_sym'), _SYM.a)
@@ -98,6 +100,7 @@ def test_cat_items(self):
98100
self.AssertStrEqual(cat, '(a b a)')
99101
self.assertIsNot(cat.item(0), cat.item(2))
100102
self.AssertEquivalent(cat.item(0), (cat.item(2)))
103+
self.assertTrue(exp.Cat(exp.Expression.ANY).is_any())
101104

102105
def test_cat_nested(self):
103106
cat1 = exp.Cat(_ATM.a, _ATM.b)
@@ -132,10 +135,13 @@ def test_or_items(self):
132135
or2 = exp.Or(_ATM.b, _ATM.c)
133136
or3 = exp.Or(or1, or2)
134137
or4 = exp.Or(_ATM.a, _ATM.b, _ATM.a)
138+
or5 = or1.copy().add(exp.Expression.ANY)
135139
self.AssertStrEqual(or2, '(b | c)')
136140
self.AssertStrEqual(or3, '(a | b | c)')
137141
self.AssertStrEqual(or4, '(a | b)')
138142
self.assertLen(or4, 2)
143+
self.assertNotIn(_ATM.a, or5)
144+
self.AssertAccepts(or5, _ATM.a)
139145

140146
def test_or_nested(self):
141147
cat1 = exp.Cat(_ATM.a)
@@ -214,6 +220,8 @@ def test_state_count(self):
214220
def test_equivalent(self):
215221
or0 = exp.Or()
216222
self.AssertEquivalent(exp.Atomic.CTRL.eps, sym.Symbol.CTRL.eps)
223+
self.AssertEquivalent(exp.Expression.ANY, exp.Expression.ANY)
224+
self.AssertNotEquivalent(exp.Expression.ANY, exp.Atomic.CTRL.eps)
217225
self.AssertEquivalent(exp.Cat(), exp.Atomic.CTRL.eps)
218226
self.AssertEquivalent(exp.Cat(), exp.Cat())
219227
self.AssertNotEquivalent(or0, exp.Atomic.CTRL.nor)
@@ -229,13 +237,21 @@ def test_equivalent(self):
229237
def test_contains_controls(self):
230238
eps = exp.Atomic.CTRL.eps
231239
nor = exp.Atomic.CTRL.nor
240+
any_exp = exp.Expression.ANY
232241
self.AssertContains(eps, eps)
233242
self.AssertContains(nor, eps)
243+
self.AssertNotContains(nor, any_exp)
234244
self.AssertNotContains(eps, nor)
235245
self.AssertNotContains(nor, nor)
246+
self.AssertNotContains(any_exp, nor)
236247

237248
def test_contains_expressions(self):
238249
cat_abc = _ATM.a + _ATM.b + _ATM.c
250+
any_exp = exp.Expression.ANY
251+
self.AssertContains(any_exp, _ATM.a)
252+
self.AssertContains(_ATM.a, any_exp)
253+
self.AssertContains(any_exp, cat_abc)
254+
self.AssertContains(cat_abc, any_exp)
239255
self.AssertContains(cat_abc, exp.Cat())
240256
self.AssertNotContains(cat_abc, exp.Or())
241257
self.AssertContains(cat_abc, exp.Or(exp.Cat()))
@@ -250,8 +266,15 @@ def test_contains_expressions(self):
250266
def test_matches(self):
251267
abc_or_cd = (_ATM.a + _ATM.b + _ATM.c) | (_ATM.c + _ATM.d)
252268
a_or_c_b_or_d = (_ATM.a | _ATM.c) + (_ATM.b | _ATM.d)
269+
any_exp = exp.Expression.ANY
270+
self.AssertMatches(any_exp, a_or_c_b_or_d)
271+
self.AssertMatches(a_or_c_b_or_d, any_exp)
253272
self.AssertMatches(abc_or_cd, a_or_c_b_or_d)
254273
self.AssertNotMatches(abc_or_cd, _ATM.a + _ATM.b + _ATM.d)
274+
self.assertTrue(any_exp.is_prefix(abc_or_cd))
275+
self.assertTrue(any_exp.is_suffix(abc_or_cd))
276+
self.assertTrue(abc_or_cd.is_prefix(any_exp))
277+
self.assertTrue(abc_or_cd.is_suffix(any_exp))
255278
self.assertTrue(exp.Cat().is_prefix(exp.Cat()))
256279
self.assertTrue(exp.Cat().is_suffix(exp.Cat()))
257280
self.assertFalse(exp.Cat().is_prefix(abc_or_cd))

nisaba/scripts/natural_translit/utils/symbol.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,15 @@ def __str__(self) -> str:
106106
def is_control(self) -> bool:
107107
return self in Symbol.CTRL
108108

109+
def is_any(self) -> bool:
110+
return False
111+
112+
def is_eps(self) -> bool:
113+
return self is Symbol.CTRL.eps
114+
115+
def is_nor(self) -> bool:
116+
return self is Symbol.CTRL.nor
117+
109118
def description(self, show_features: bool = False) -> str:
110119
"""A string that describes the symbol."""
111120
text = 'alias: %s index: %s' % (self.alias, self.index)

0 commit comments

Comments
 (0)