Skip to content

Commit 75a0633

Browse files
authored
Unicode alphabet, subalphabets, and easier tests (#21)
1 parent d127d1f commit 75a0633

File tree

3 files changed

+215
-0
lines changed

3 files changed

+215
-0
lines changed

code/cubical/Examples/Dyck.agda

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,3 +257,62 @@ opaque
257257
-- done = ⊗-unit-r
258258

259259
-- -- TODO: show this is a *strong* equivalence!
260+
261+
open import String.Unicode
262+
open import String.SubAlphabet
263+
open import Cubical.Data.Empty as Empty
264+
open import Cubical.Functions.Embedding
265+
266+
pick-char : Bool ⟨ Unicode ⟩
267+
pick-char true = '('
268+
pick-char false = ')'
269+
270+
pick-char-inj : {b b' : Bool} pick-char b ≡ pick-char b' b ≡ b'
271+
pick-char-inj {false} {false} _ = refl
272+
pick-char-inj {false} {true} x =
273+
Empty.rec (mkUnicodeCharPath-no ')' '(' refl x)
274+
pick-char-inj {true} {false} x =
275+
Empty.rec (mkUnicodeCharPath-no '(' ')' refl x)
276+
pick-char-inj {true} {true} _ = refl
277+
278+
isEmbedding-pick-char : isEmbedding pick-char
279+
isEmbedding-pick-char = injEmbedding (Unicode .snd) pick-char-inj
280+
281+
Bracket' : hSet ℓ-zero
282+
Bracket' =
283+
UnicodeSubAlphabet (Bool , isSetBool) (pick-char , isEmbedding-pick-char)
284+
isFinSetBool
285+
286+
287+
BracketIso : Iso Bracket ⟨ Bracket' ⟩
288+
BracketIso .Iso.fun [ = '(' , (true , refl)
289+
BracketIso .Iso.fun ] = ')' , (false , refl)
290+
BracketIso .Iso.inv (c , true , _) = [
291+
BracketIso .Iso.inv (c , false , _) = ]
292+
BracketIso .Iso.rightInv (c , true , fib) i =
293+
-- idek anymore
294+
fib i , (true , isSetUnicodeChar _ _ (λ j fib (i ∧ j)) (λ j fib (i ∧ j)) i)
295+
BracketIso .Iso.rightInv (c , false , fib) i =
296+
fib i , (false , isSetUnicodeChar _ _ (λ j fib (i ∧ j)) (λ j fib (i ∧ j)) i)
297+
BracketIso .Iso.leftInv [ = refl
298+
BracketIso .Iso.leftInv ] = refl
299+
300+
test : UnicodeString
301+
-- Also tested with a string that was about 21k characters long
302+
-- Took 30 seconds to parse,
303+
-- and correctly fails when the string is unbalanced
304+
test = "()((((())))()()()()(())()())()()"
305+
306+
test' : List ⟨ Bracket' ⟩
307+
test' = toString (Bool , isSetBool) (pick-char , isEmbedding-pick-char) isFinSetBool test
308+
309+
w : String
310+
w = Cubical.Data.List.map (BracketIso .Iso.inv) test'
311+
312+
test-parse : (LinΣ[ b ∈ Bool ] BalancedStkTr 0 b) w
313+
test-parse = (LinΠ-app 0 ∘g parseStkTr) w (⌈w⌉→string {w = w} w (internalize w))
314+
315+
opaque
316+
unfolding ⟜-app _⟜_ ⟜-intro ⊸-app _⊸_ ⊸-app parseStkTr KL*r-elim ⌈w⌉→string
317+
_ : test-parse .fst ≡ true
318+
_ = refl

code/cubical/String/SubAlphabet.agda

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
open import Cubical.Foundations.Prelude
2+
open import Cubical.Foundations.HLevels
3+
4+
module String.SubAlphabet where
5+
6+
open import Cubical.Foundations.Structure
7+
open import Cubical.Foundations.Isomorphism
8+
open import Cubical.Foundations.Equiv
9+
10+
open import Cubical.Functions.Embedding
11+
12+
open import Cubical.Relation.Nullary.Base
13+
open import Cubical.Relation.Nullary.Properties
14+
15+
open import Agda.Builtin.Char
16+
open import Agda.Builtin.String
17+
18+
open import Cubical.Data.Bool
19+
open import Cubical.Data.Maybe
20+
open import Cubical.Data.Empty as Empty
21+
open import Cubical.Data.Sigma
22+
open import Cubical.Data.FinSet
23+
open import Cubical.Data.FinSet.Constructors
24+
open import Cubical.Data.FinSet.DecidablePredicate
25+
26+
open import Cubical.HITs.PropositionalTruncation as PT
27+
28+
open import Helper
29+
30+
-- Pick out a subalphabet of another alphabet
31+
module _
32+
(Alphabet Alphabet' : hSet ℓ-zero)
33+
(embed : ⟨ Alphabet ⟩ ↪ ⟨ Alphabet' ⟩)
34+
where
35+
SubAlphabet' : Type ℓ-zero
36+
SubAlphabet' = Σ[ c ∈ ⟨ Alphabet' ⟩ ] fiber (embed .fst) c
37+
38+
SubAlphabetIso :
39+
Iso ⟨ Alphabet ⟩ (Σ ⟨ Alphabet' ⟩ (λ v fiber (λ z embed .fst z) v))
40+
SubAlphabetIso =
41+
iso
42+
(λ x (embed .fst x) , (x , refl))
43+
(λ x x .snd .fst)
44+
(λ b
45+
Σ≡Prop (λ x isEmbedding→hasPropFibers (embed .snd) x)
46+
(b .snd .snd))
47+
(λ _ refl)
48+
49+
open Iso
50+
isSetSubAlphabet : isSet SubAlphabet'
51+
isSetSubAlphabet =
52+
isSetRetract (SubAlphabetIso .inv) (SubAlphabetIso .fun)
53+
(SubAlphabetIso .rightInv) (Alphabet .snd)
54+
55+
SubAlphabet : hSet ℓ-zero
56+
SubAlphabet .fst = SubAlphabet'
57+
SubAlphabet .snd = isSetSubAlphabet
58+
59+
module _ (isFinSetAlphabet : isFinSet ⟨ Alphabet ⟩) where
60+
isFinSetSubAlphabet : isFinSet SubAlphabet'
61+
isFinSetSubAlphabet =
62+
EquivPresIsFinSet (isoToEquiv SubAlphabetIso) isFinSetAlphabet
63+
64+
module _ (DiscreteAlphabet' : Discrete ⟨ Alphabet' ⟩) where
65+
maybe-SubAlphabet : ⟨ Alphabet' ⟩ Maybe ⟨ SubAlphabet ⟩
66+
maybe-SubAlphabet c' =
67+
decRec
68+
(λ ∣in-image∣
69+
just
70+
(c' ,
71+
(PT.rec (isEmbedding→hasPropFibers (embed .snd) c')
72+
(λ in-image in-image) ∣in-image∣)))
73+
(λ ¬|in-image∣ nothing)
74+
(DecProp∃
75+
(_ , isFinSetAlphabet)
76+
(λ c (_ , Alphabet' .snd _ _) ,
77+
(DiscreteAlphabet' (embed .fst c) c'))
78+
.snd )

code/cubical/String/Unicode.agda

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
open import Cubical.Foundations.Prelude
2+
open import Cubical.Foundations.HLevels
3+
4+
module String.Unicode where
5+
6+
open import Cubical.Foundations.Structure
7+
open import Cubical.Foundations.Isomorphism
8+
open import Cubical.Foundations.Equiv
9+
10+
open import Cubical.Functions.Embedding
11+
12+
open import Cubical.Relation.Nullary.Base
13+
open import Cubical.Relation.Nullary.Properties
14+
15+
open import Agda.Builtin.Char
16+
open import Agda.Builtin.String
17+
18+
open import Cubical.Data.Bool
19+
open import Cubical.Data.Maybe as Maybe
20+
open import Cubical.Data.List
21+
open import Cubical.Data.Empty as Empty
22+
open import Cubical.Data.Sigma
23+
open import Cubical.Data.FinSet
24+
open import Cubical.Data.FinSet.Constructors
25+
26+
open import String.SubAlphabet
27+
28+
29+
UnicodeChar = Char
30+
UnicodeString = String
31+
32+
-- Char equality is computed externally by JavaScript
33+
-- Internalize a path oracle
34+
postulate
35+
mkUnicodeCharPath-yes : (c c' : UnicodeChar) primCharEquality c c' ≡ true c ≡ c'
36+
mkUnicodeCharPath-no : (c c' : UnicodeChar) primCharEquality c c' ≡ false ¬ (c ≡ c')
37+
38+
DiscreteUnicodeChar : Discrete UnicodeChar
39+
DiscreteUnicodeChar c c' =
40+
decRec
41+
(λ t yes (mkUnicodeCharPath-yes c c' t))
42+
(λ f no (mkUnicodeCharPath-no c c' (flip (primCharEquality c c') f)))
43+
(primCharEquality c c' ≟ true)
44+
where
45+
flip : (b : Bool) ¬ (b ≡ true) b ≡ false
46+
flip false _ = refl
47+
flip true ¬t=t = Empty.rec (¬t=t refl)
48+
49+
isSetUnicodeChar : isSet UnicodeChar
50+
isSetUnicodeChar = Discrete→isSet DiscreteUnicodeChar
51+
52+
-- The unicode alphabet
53+
Unicode : hSet ℓ-zero
54+
Unicode .fst = UnicodeChar
55+
Unicode .snd = isSetUnicodeChar
56+
57+
-- I thought this would be good to define alphabets, but
58+
-- that cannot be true because you cannot pattern match on the characters
59+
-- in an alphabet due to the paths in the fibers underlying the subalphabet
60+
-- definitions
61+
-- However, this should still be good to write test cases more concisely?
62+
-- Still likely no. Maybe if Eq was used instead of path?
63+
module _
64+
(Alphabet : hSet ℓ-zero)
65+
(embed : ⟨ Alphabet ⟩ ↪ ⟨ Unicode ⟩)
66+
(isFinSetAlphabet : isFinSet ⟨ Alphabet ⟩)
67+
where
68+
UnicodeSubAlphabet : hSet ℓ-zero
69+
UnicodeSubAlphabet = SubAlphabet Alphabet Unicode embed
70+
71+
-- This should probably be a stricter check that none of the
72+
-- characters were from outside of the subalphabet
73+
toString : String List ⟨ UnicodeSubAlphabet ⟩
74+
toString w =
75+
filterMap
76+
(maybe-SubAlphabet Alphabet Unicode embed
77+
isFinSetAlphabet DiscreteUnicodeChar)
78+
(primStringToList w)

0 commit comments

Comments
 (0)