11package org .metaborg .sdf2table .parsetable ;
22
33import java .io .Serializable ;
4- import java .util .List ;
5- import java .util .Map ;
6- import java .util .Queue ;
7- import java .util .Set ;
4+ import java .util .*;
85
96import org .metaborg .parsetable .IParseTable ;
107import org .metaborg .parsetable .states .IState ;
11- import org .metaborg .sdf2table .grammar .IProduction ;
12- import org .metaborg .sdf2table .grammar .ISymbol ;
13- import org .metaborg .sdf2table .deepconflicts .Context ;
14- import org .metaborg .sdf2table .deepconflicts .ContextPosition ;
15- import org .metaborg .sdf2table .deepconflicts .ContextType ;
16- import org .metaborg .sdf2table .deepconflicts .ContextualProduction ;
17- import org .metaborg .sdf2table .deepconflicts .ContextualSymbol ;
18- import org .metaborg .sdf2table .deepconflicts .DeepConflictsAnalyzer ;
19- import org .metaborg .sdf2table .grammar .GeneralAttribute ;
20- import org .metaborg .sdf2table .grammar .NormGrammar ;
21- import org .metaborg .sdf2table .grammar .Priority ;
22- import org .metaborg .sdf2table .grammar .Production ;
23- import org .metaborg .sdf2table .grammar .Symbol ;
24-
25- import com .google .common .collect .BiMap ;
26- import com .google .common .collect .HashBiMap ;
27- import com .google .common .collect .HashMultimap ;
28- import com .google .common .collect .Lists ;
29- import com .google .common .collect .Maps ;
30- import com .google .common .collect .Queues ;
31- import com .google .common .collect .SetMultimap ;
32- import com .google .common .collect .Sets ;
8+ import org .metaborg .sdf2table .deepconflicts .*;
9+ import org .metaborg .sdf2table .grammar .*;
10+
11+ import com .google .common .collect .*;
3312
3413public class ParseTable implements IParseTable , Serializable {
3514
@@ -41,7 +20,7 @@ public class ParseTable implements IParseTable, Serializable {
4120
4221 public static final int FIRST_PRODUCTION_LABEL = 257 ;
4322 public static final int INITIAL_STATE_NUMBER = 0 ;
44- public static final int VERSION_NUMBER = 6 ;
23+ public static final int VERSION_NUMBER = 7 ;
4524
4625 private NormGrammar grammar ;
4726
@@ -100,6 +79,10 @@ public ParseTable(NormGrammar grammar, boolean dynamic, boolean dataDependent, b
10079 // create JSGLR parse table productions
10180 createJSGLRParseTableProductions (productionLabels );
10281
82+ // calculate FIRST-set and FOLLOW-set
83+ calculateFirst ();
84+ calculateFollow ();
85+
10386 // create states if the table should not be generated dynamically
10487 initialProduction = grammar .getInitialProduction ();
10588
@@ -140,6 +123,137 @@ private void calculateNullable() {
140123 } while (markedNullable );
141124 }
142125
126+ // Based on https://compilers.iecc.com/comparch/article/01-04-079
127+ private void calculateFirst () {
128+ SetMultimap <ISymbol , IProduction > symbolProductionsMapping = grammar .getSymbolProductionsMapping ();
129+ Set <Symbol > symbols = grammar .getSymbols ();
130+ SetMultimap <ISymbol , ISymbol > containsTheFirstOf = HashMultimap .create ();
131+
132+ for (Symbol s : symbols ) {
133+ // The FIRST set of a CharacterClass symbol is equal to the character class it represents.
134+ if (s instanceof CharacterClassSymbol ) {
135+ s .setFirst (((CharacterClassSymbol ) s ).getCC ());
136+ continue ;
137+ }
138+
139+ for (IProduction p : symbolProductionsMapping .get (s )) {
140+ // Direct contributions:
141+ // If p is of the shape A = A0 ... Ak a Am ... An where all symbols up to Ak are nullable
142+ for (ISymbol rhs : p .rightHand ()) {
143+ // Then, a is in FIRST(A).
144+ if (rhs instanceof CharacterClassSymbol ) {
145+ s .setFirst (((CharacterClassSymbol ) rhs ).getCC ());
146+ break ;
147+ }
148+
149+ // Indirect contributions: calculate contains-the-FIRSTs-of
150+ // If p is of the shape A = A0 ... Ak B Am ... An where all symbols up to Ak are nullable
151+ // Then, A contains-the-FIRSTs-of B
152+ containsTheFirstOf .put (s , rhs );
153+
154+ if (!rhs .isNullable ())
155+ break ;
156+ }
157+ }
158+ }
159+
160+ // Indirect contributions: Tarjan's algorithm for strongly connected components
161+ final int DONE = symbols .size ();
162+ final Map <ISymbol , Integer > low = new HashMap <>();
163+ final Stack <ISymbol > stack = new Stack <>();
164+ for (Symbol v : symbols ) {
165+ if (low .get (v ) == null /* CLEAN */ )
166+ traverseFirst (v , containsTheFirstOf , DONE , low , stack );
167+ }
168+ }
169+
170+ private void traverseFirst (ISymbol v , SetMultimap <ISymbol , ISymbol > containsTheFirstOf , int DONE ,
171+ Map <ISymbol , Integer > low , Stack <ISymbol > stack ) {
172+ stack .push (v );
173+ int top1 = stack .size () - 1 ;
174+ low .put (v , top1 );
175+ for (ISymbol w : containsTheFirstOf .get (v )) {
176+ if (low .get (w ) == null /* CLEAN */ ) {
177+ traverseFirst (w , containsTheFirstOf , DONE , low , stack );
178+ }
179+ // Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
180+ v .setFirst (v .getFirst ().union (w .getFirst ())); // union!
181+ if (low .get (w ) < low .get (v ))
182+ low .put (v , low .get (w ));
183+ }
184+ if (low .get (v ) == top1 ) // v is the root of this SCC
185+ while (stack .size () - 1 >= top1 ) {
186+ ISymbol w = stack .pop ();
187+ w .setFirst (v .getFirst ()); // distribute!
188+ low .put (w , DONE );
189+ }
190+ }
191+
192+ // Based on https://compilers.iecc.com/comparch/article/01-04-079
193+ // and Modern Compiler Implementation in Java, Second Edition - Andrew Appel, 2004
194+ private void calculateFollow () {
195+ SetMultimap <ISymbol , IProduction > symbolProductionsMapping = grammar .getSymbolProductionsMapping ();
196+ Set <Symbol > symbols = grammar .getSymbols ();
197+ SetMultimap <ISymbol , ISymbol > containsTheFirstOf = HashMultimap .create ();
198+ SetMultimap <ISymbol , ISymbol > containsTheFollowOf = HashMultimap .create ();
199+
200+ for (Symbol s : symbols ) {
201+ for (IProduction p : symbolProductionsMapping .get (s )) {
202+ List <ISymbol > rightHand = p .rightHand ();
203+ for (int i = 0 , rightHandSize = rightHand .size (); i < rightHandSize ; i ++) {
204+ ISymbol symbolI = rightHand .get (i );
205+
206+ // If p is of the shape A = A0 ... Ai Ak ... Am Aj ... An
207+ for (int j = i + 1 ; j < rightHandSize ; j ++) {
208+ // If Ak ... Am are all nullable, FOLLOW(Ai) contains FIRST(Aj)
209+ ISymbol symbolJ = rightHand .get (j );
210+ containsTheFirstOf .put (symbolI , symbolJ );
211+
212+ if (!symbolJ .isNullable ())
213+ break ;
214+ }
215+
216+ // If Ak ... An are all nullable, FOLLOW(Ai) contains FOLLOW(A)
217+ containsTheFollowOf .put (symbolI , s );
218+ }
219+ }
220+ }
221+
222+ // Indirect contributions: Tarjan's algorithm for strongly connected components
223+ final int DONE = symbols .size ();
224+ final Map <ISymbol , Integer > low = new HashMap <>();
225+ final Stack <ISymbol > stack = new Stack <>();
226+ for (Symbol v : symbols ) {
227+ if (low .get (v ) == null /* CLEAN */ )
228+ traverseFollow (v , containsTheFirstOf , containsTheFollowOf , DONE , low , stack );
229+ }
230+ }
231+
232+ private void traverseFollow (ISymbol v , SetMultimap <ISymbol , ISymbol > containsTheFirstOf ,
233+ SetMultimap <ISymbol , ISymbol > containsTheFollowOf , int DONE , Map <ISymbol , Integer > low , Stack <ISymbol > stack ) {
234+ stack .push (v );
235+ int top1 = stack .size () - 1 ;
236+ low .put (v , top1 );
237+ for (ISymbol w : containsTheFirstOf .get (v )) {
238+ v .setFollow (v .getFollow ().union (w .getFirst ())); // union!
239+ }
240+ for (ISymbol w : containsTheFollowOf .get (v )) {
241+ if (low .get (w ) == null /* CLEAN */ ) {
242+ traverseFollow (w , containsTheFirstOf , containsTheFollowOf , DONE , low , stack );
243+ }
244+ // Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
245+ v .setFollow (v .getFollow ().union (w .getFollow ())); // union!
246+ if (low .get (w ) < low .get (v ))
247+ low .put (v , low .get (w ));
248+ }
249+ if (low .get (v ) == top1 ) // v is the root of this SCC
250+ while (stack .size () - 1 >= top1 ) {
251+ ISymbol w = stack .pop ();
252+ w .setFollow (v .getFollow ()); // distribute!
253+ low .put (w , DONE );
254+ }
255+ }
256+
143257 private void calculateRecursion () {
144258 // direct and indirect left recursion :
145259 // depth first search, whenever finding a cycle, those symbols are left recursive with respect to each other
@@ -413,28 +527,6 @@ private boolean mutuallyRecursive(Priority p) {
413527 || grammar .getRightRecursiveSymbolsMapping ().get (p .higher ().getLhs ()).contains (p .lower ().leftHand ());
414528 }
415529
416- /*
417- * TODO calculate first and follow sets private void calculateFirstFollow() { for(IProduction p :
418- * getGrammar().prods.values()) { p.calculateDependencies(getGrammar()); }
419- *
420- * tarjanStack = new Stack<>(); first_components = Sets.newHashSet(); for(IProduction p :
421- * getGrammar().prods.values()) { if(p.firstSet().index == -1) { stronglyConnectedTarjan(p.firstSet(),
422- * first_components); } } }
423- *
424- *
425- * private void stronglyConnectedTarjan(TableSet v, Set<Set<TableSet>> components) { // Set the depth index for v to
426- * the smallest unused index v.index = index; v.low_link = index; index++; tarjanStack.push(v); v.onStack = true;
427- *
428- * for(TableSet d : v.depends_on) { if(d.index == -1) { // Successor w has not yet been visited; recurse on it
429- * stronglyConnectedTarjan(d, components); v.add(d.value); d.low_link = Math.min(v.low_link, d.low_link); } else
430- * if(d.onStack) { // Successor w is in stack S and hence in the current SCC v.low_link = Math.min(v.low_link,
431- * d.index); } }
432- *
433- * TableSet t; // If v is a root node, pop the stack and generate an SCC if(v.low_link == v.index) { Set<TableSet>
434- * component = Sets.newHashSet(); do { t = tarjanStack.pop(); t.onStack = false; t.add(v.value); component.add(t); }
435- * while(t != v); components.add(component); } }
436- */
437-
438530 private void createLabels () {
439531 BiMap <IProduction , Integer > labels = HashBiMap .create ();
440532
0 commit comments