-
Notifications
You must be signed in to change notification settings - Fork 0
/
SimplifyNFA.cs
121 lines (115 loc) · 4.45 KB
/
SimplifyNFA.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
namespace Lex
{
/*
* Class SimplifyNFA
* Extract character classes from the NFA and simplify.
*/
using System;
using System.Collections;
using BitSet;
class SimplifyNfa
{
static private int[] ccls; // character class mapping.
static private int original_charset_size; // original charset size
static private int mapped_charset_size; // reduced charset size
static internal void simplify(Spec spec)
{
computeClasses(spec); // initialize fields.
/*
* now rewrite the NFA using our character class mapping.
*/
for (int i = 0; i < spec.nfa_states.Count; i++)
{
Nfa nfa = (Nfa)spec.nfa_states[i];
if (nfa.GetEdge() == Nfa.EMPTY || nfa.GetEdge() == Nfa.EPSILON)
continue; // no change.
if (nfa.GetEdge() == Nfa.CCL)
{
CharSet nset = new CharSet();
nset.map(nfa.GetCharSet(), ccls); // map it.
nfa.SetCharSet(nset);
}
else
{ // single character
nfa.SetEdge(ccls[nfa.GetEdge()]); // map it.
}
}
/*
* now update spec with the mapping.
*/
spec.ccls_map = ccls;
spec.dtrans_ncols = mapped_charset_size;
}
/*
* Compute minimum set of character classes needed to disambiguate
* edges. We optimistically assume that every character belongs to
* a single character class, and then incrementally split classes
* as we see edges that require discrimination between characters in
* the class.
*/
static private void computeClasses(Spec spec)
{
original_charset_size = spec.dtrans_ncols;
ccls = new int[original_charset_size]; // initially all zero.
int nextcls = 1;
BitSet clsA = new BitSet();
BitSet clsB = new BitSet();
Hashtable h = new Hashtable();
Console.WriteLine("Working on character classes.");
for (int index = 0; index < spec.nfa_states.Count; index++)
{
Nfa nfa = (Nfa)spec.nfa_states[index];
if (nfa.GetEdge() == Nfa.EMPTY || nfa.GetEdge() == Nfa.EPSILON)
continue; // no discriminatory information.
clsA.ClearAll();
clsB.ClearAll();
for (int i = 0; i < ccls.Length; i++)
{
if (nfa.GetEdge() == i || // edge labeled with a character
nfa.GetEdge() == Nfa.CCL
&& nfa.GetCharSet().contains(i)) // set of characters
clsA.Set(ccls[i], true);
else
clsB.Set(ccls[i], true);
}
/*
* now figure out which character classes we need to split.
*/
clsA.And(clsB); // split the classes which show up on both sides of edge
if (clsA.GetLength() == 0)
{
Console.Write(".");
continue;
}
Console.Write(":");
/*
* and split them.
*/
h.Clear(); // h will map old to new class name
for (int i = 0; i < ccls.Length; i++)
{
if (clsA.Get(ccls[i])) // a split class
{
if (nfa.GetEdge() == i ||
nfa.GetEdge() == Nfa.CCL
&& nfa.GetCharSet().contains(i))
{ // on A side
int split = ccls[i];
if (!h.ContainsKey(split))
{
h.Add(split, nextcls++); // make new class
#if DEBUG
Console.WriteLine("Adding char " + (nextcls - 1) + " split=" + split + " i=" + i);
#endif
}
ccls[i] = (int)h[split];
}
}
}
}
Console.WriteLine();
Console.WriteLine("NFA has " + nextcls + " distinct character classes.");
mapped_charset_size = nextcls;
}
}
}