Skip to content

Commit 34f9364

Browse files
committed
Implement SLR parse table generation
1 parent 2bff831 commit 34f9364

File tree

5 files changed

+196
-174
lines changed

5 files changed

+196
-174
lines changed

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/ISymbol.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,23 @@
77
public interface ISymbol {
88

99
String name();
10+
1011
boolean isNullable();
12+
1113
void setNullable(boolean nullable);
14+
15+
ICharacterClass getFirst();
16+
17+
void setFirst(ICharacterClass first);
18+
19+
ICharacterClass getFollow();
20+
21+
void setFollow(ICharacterClass follow);
22+
1223
ICharacterClass followRestriction();
24+
1325
List<ICharacterClass[]> followRestrictionLookahead();
26+
1427
org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol();
1528

1629
}

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/Symbol.java

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
import java.util.Map;
66
import java.util.Set;
77

8+
import org.metaborg.parsetable.characterclasses.CharacterClassFactory;
89
import org.metaborg.parsetable.characterclasses.ICharacterClass;
9-
10-
import org.metaborg.sdf2table.grammar.ISymbol;
1110
import org.metaborg.parsetable.symbols.SortCardinality;
1211
import org.metaborg.parsetable.symbols.SyntaxContext;
1312
import org.metaborg.sdf2table.deepconflicts.Context;
@@ -24,12 +23,15 @@ public abstract class Symbol implements Serializable, ISymbol {
2423
protected List<ICharacterClass[]> followRestrictionsLookahead;
2524

2625
private boolean nullable = false;
26+
private ICharacterClass first = CharacterClassFactory.EMPTY_CHARACTER_CLASS;
27+
private ICharacterClass follow = CharacterClassFactory.EMPTY_CHARACTER_CLASS;
2728

28-
/* (non-Javadoc)
29+
/*
30+
* (non-Javadoc)
31+
*
2932
* @see org.metaborg.sdf2table.grammar.ISymbol#name()
3033
*/
31-
@Override
32-
public abstract String name();
34+
@Override public abstract String name();
3335

3436
public boolean isNullable() {
3537
return nullable;
@@ -39,6 +41,22 @@ public void setNullable(boolean nullable) {
3941
this.nullable = nullable;
4042
}
4143

44+
@Override public ICharacterClass getFirst() {
45+
return first;
46+
}
47+
48+
@Override public void setFirst(ICharacterClass first) {
49+
this.first = first;
50+
}
51+
52+
@Override public ICharacterClass getFollow() {
53+
return follow;
54+
}
55+
56+
@Override public void setFollow(ICharacterClass follow) {
57+
this.follow = follow;
58+
}
59+
4260
@Override public String toString() {
4361
return name();
4462
}
@@ -119,5 +137,6 @@ public org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol() {
119137
return toParseTableSymbol(null, null);
120138
}
121139

122-
public abstract org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol(SyntaxContext syntaxContext, SortCardinality cardinality);
140+
public abstract org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol(SyntaxContext syntaxContext,
141+
SortCardinality cardinality);
123142
}

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java

Lines changed: 141 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,14 @@
11
package org.metaborg.sdf2table.parsetable;
22

33
import java.io.Serializable;
4-
import java.util.List;
5-
import java.util.Map;
6-
import java.util.Queue;
7-
import java.util.Set;
4+
import java.util.*;
85

96
import org.metaborg.parsetable.IParseTable;
107
import org.metaborg.parsetable.states.IState;
11-
import org.metaborg.sdf2table.grammar.IProduction;
12-
import org.metaborg.sdf2table.grammar.ISymbol;
13-
import org.metaborg.sdf2table.deepconflicts.Context;
14-
import org.metaborg.sdf2table.deepconflicts.ContextPosition;
15-
import org.metaborg.sdf2table.deepconflicts.ContextType;
16-
import org.metaborg.sdf2table.deepconflicts.ContextualProduction;
17-
import org.metaborg.sdf2table.deepconflicts.ContextualSymbol;
18-
import org.metaborg.sdf2table.deepconflicts.DeepConflictsAnalyzer;
19-
import org.metaborg.sdf2table.grammar.GeneralAttribute;
20-
import org.metaborg.sdf2table.grammar.NormGrammar;
21-
import org.metaborg.sdf2table.grammar.Priority;
22-
import org.metaborg.sdf2table.grammar.Production;
23-
import org.metaborg.sdf2table.grammar.Symbol;
24-
25-
import com.google.common.collect.BiMap;
26-
import com.google.common.collect.HashBiMap;
27-
import com.google.common.collect.HashMultimap;
28-
import com.google.common.collect.Lists;
29-
import com.google.common.collect.Maps;
30-
import com.google.common.collect.Queues;
31-
import com.google.common.collect.SetMultimap;
32-
import com.google.common.collect.Sets;
8+
import org.metaborg.sdf2table.deepconflicts.*;
9+
import org.metaborg.sdf2table.grammar.*;
10+
11+
import com.google.common.collect.*;
3312

3413
public class ParseTable implements IParseTable, Serializable {
3514

@@ -41,7 +20,7 @@ public class ParseTable implements IParseTable, Serializable {
4120

4221
public static final int FIRST_PRODUCTION_LABEL = 257;
4322
public static final int INITIAL_STATE_NUMBER = 0;
44-
public static final int VERSION_NUMBER = 6;
23+
public static final int VERSION_NUMBER = 7;
4524

4625
private NormGrammar grammar;
4726

@@ -100,6 +79,10 @@ public ParseTable(NormGrammar grammar, boolean dynamic, boolean dataDependent, b
10079
// create JSGLR parse table productions
10180
createJSGLRParseTableProductions(productionLabels);
10281

82+
// calculate FIRST-set and FOLLOW-set
83+
calculateFirst();
84+
calculateFollow();
85+
10386
// create states if the table should not be generated dynamically
10487
initialProduction = grammar.getInitialProduction();
10588

@@ -140,6 +123,137 @@ private void calculateNullable() {
140123
} while(markedNullable);
141124
}
142125

126+
// Based on https://compilers.iecc.com/comparch/article/01-04-079
127+
private void calculateFirst() {
128+
SetMultimap<ISymbol, IProduction> symbolProductionsMapping = grammar.getSymbolProductionsMapping();
129+
Set<Symbol> symbols = grammar.getSymbols();
130+
SetMultimap<ISymbol, ISymbol> containsTheFirstOf = HashMultimap.create();
131+
132+
for(Symbol s : symbols) {
133+
// The FIRST set of a CharacterClass symbol is equal to the character class it represents.
134+
if(s instanceof CharacterClassSymbol) {
135+
s.setFirst(((CharacterClassSymbol) s).getCC());
136+
continue;
137+
}
138+
139+
for(IProduction p : symbolProductionsMapping.get(s)) {
140+
// Direct contributions:
141+
// If p is of the shape A = A0 ... Ak a Am ... An where all symbols up to Ak are nullable
142+
for(ISymbol rhs : p.rightHand()) {
143+
// Then, a is in FIRST(A).
144+
if(rhs instanceof CharacterClassSymbol) {
145+
s.setFirst(((CharacterClassSymbol) rhs).getCC());
146+
break;
147+
}
148+
149+
// Indirect contributions: calculate contains-the-FIRSTs-of
150+
// If p is of the shape A = A0 ... Ak B Am ... An where all symbols up to Ak are nullable
151+
// Then, A contains-the-FIRSTs-of B
152+
containsTheFirstOf.put(s, rhs);
153+
154+
if(!rhs.isNullable())
155+
break;
156+
}
157+
}
158+
}
159+
160+
// Indirect contributions: Tarjan's algorithm for strongly connected components
161+
final int DONE = symbols.size();
162+
final Map<ISymbol, Integer> low = new HashMap<>();
163+
final Stack<ISymbol> stack = new Stack<>();
164+
for(Symbol v : symbols) {
165+
if(low.get(v) == null /* CLEAN */)
166+
traverseFirst(v, containsTheFirstOf, DONE, low, stack);
167+
}
168+
}
169+
170+
private void traverseFirst(ISymbol v, SetMultimap<ISymbol, ISymbol> containsTheFirstOf, int DONE,
171+
Map<ISymbol, Integer> low, Stack<ISymbol> stack) {
172+
stack.push(v);
173+
int top1 = stack.size() - 1;
174+
low.put(v, top1);
175+
for(ISymbol w : containsTheFirstOf.get(v)) {
176+
if(low.get(w) == null /* CLEAN */) {
177+
traverseFirst(w, containsTheFirstOf, DONE, low, stack);
178+
}
179+
// Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
180+
v.setFirst(v.getFirst().union(w.getFirst())); // union!
181+
if(low.get(w) < low.get(v))
182+
low.put(v, low.get(w));
183+
}
184+
if(low.get(v) == top1) // v is the root of this SCC
185+
while(stack.size() - 1 >= top1) {
186+
ISymbol w = stack.pop();
187+
w.setFirst(v.getFirst()); // distribute!
188+
low.put(w, DONE);
189+
}
190+
}
191+
192+
// Based on https://compilers.iecc.com/comparch/article/01-04-079
193+
// and Modern Compiler Implementation in Java, Second Edition - Andrew Appel, 2004
194+
private void calculateFollow() {
195+
SetMultimap<ISymbol, IProduction> symbolProductionsMapping = grammar.getSymbolProductionsMapping();
196+
Set<Symbol> symbols = grammar.getSymbols();
197+
SetMultimap<ISymbol, ISymbol> containsTheFirstOf = HashMultimap.create();
198+
SetMultimap<ISymbol, ISymbol> containsTheFollowOf = HashMultimap.create();
199+
200+
for(Symbol s : symbols) {
201+
for(IProduction p : symbolProductionsMapping.get(s)) {
202+
List<ISymbol> rightHand = p.rightHand();
203+
for(int i = 0, rightHandSize = rightHand.size(); i < rightHandSize; i++) {
204+
ISymbol symbolI = rightHand.get(i);
205+
206+
// If p is of the shape A = A0 ... Ai Ak ... Am Aj ... An
207+
for(int j = i + 1; j < rightHandSize; j++) {
208+
// If Ak ... Am are all nullable, FOLLOW(Ai) contains FIRST(Aj)
209+
ISymbol symbolJ = rightHand.get(j);
210+
containsTheFirstOf.put(symbolI, symbolJ);
211+
212+
if(!symbolJ.isNullable())
213+
break;
214+
}
215+
216+
// If Ak ... An are all nullable, FOLLOW(Ai) contains FOLLOW(A)
217+
containsTheFollowOf.put(symbolI, s);
218+
}
219+
}
220+
}
221+
222+
// Indirect contributions: Tarjan's algorithm for strongly connected components
223+
final int DONE = symbols.size();
224+
final Map<ISymbol, Integer> low = new HashMap<>();
225+
final Stack<ISymbol> stack = new Stack<>();
226+
for(Symbol v : symbols) {
227+
if(low.get(v) == null /* CLEAN */)
228+
traverseFollow(v, containsTheFirstOf, containsTheFollowOf, DONE, low, stack);
229+
}
230+
}
231+
232+
private void traverseFollow(ISymbol v, SetMultimap<ISymbol, ISymbol> containsTheFirstOf,
233+
SetMultimap<ISymbol, ISymbol> containsTheFollowOf, int DONE, Map<ISymbol, Integer> low, Stack<ISymbol> stack) {
234+
stack.push(v);
235+
int top1 = stack.size() - 1;
236+
low.put(v, top1);
237+
for(ISymbol w : containsTheFirstOf.get(v)) {
238+
v.setFollow(v.getFollow().union(w.getFirst())); // union!
239+
}
240+
for(ISymbol w : containsTheFollowOf.get(v)) {
241+
if(low.get(w) == null /* CLEAN */) {
242+
traverseFollow(w, containsTheFirstOf, containsTheFollowOf, DONE, low, stack);
243+
}
244+
// Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
245+
v.setFollow(v.getFollow().union(w.getFollow())); // union!
246+
if(low.get(w) < low.get(v))
247+
low.put(v, low.get(w));
248+
}
249+
if(low.get(v) == top1) // v is the root of this SCC
250+
while(stack.size() - 1 >= top1) {
251+
ISymbol w = stack.pop();
252+
w.setFollow(v.getFollow()); // distribute!
253+
low.put(w, DONE);
254+
}
255+
}
256+
143257
private void calculateRecursion() {
144258
// direct and indirect left recursion :
145259
// depth first search, whenever finding a cycle, those symbols are left recursive with respect to each other
@@ -413,28 +527,6 @@ private boolean mutuallyRecursive(Priority p) {
413527
|| grammar.getRightRecursiveSymbolsMapping().get(p.higher().getLhs()).contains(p.lower().leftHand());
414528
}
415529

416-
/*
417-
* TODO calculate first and follow sets private void calculateFirstFollow() { for(IProduction p :
418-
* getGrammar().prods.values()) { p.calculateDependencies(getGrammar()); }
419-
*
420-
* tarjanStack = new Stack<>(); first_components = Sets.newHashSet(); for(IProduction p :
421-
* getGrammar().prods.values()) { if(p.firstSet().index == -1) { stronglyConnectedTarjan(p.firstSet(),
422-
* first_components); } } }
423-
*
424-
*
425-
* private void stronglyConnectedTarjan(TableSet v, Set<Set<TableSet>> components) { // Set the depth index for v to
426-
* the smallest unused index v.index = index; v.low_link = index; index++; tarjanStack.push(v); v.onStack = true;
427-
*
428-
* for(TableSet d : v.depends_on) { if(d.index == -1) { // Successor w has not yet been visited; recurse on it
429-
* stronglyConnectedTarjan(d, components); v.add(d.value); d.low_link = Math.min(v.low_link, d.low_link); } else
430-
* if(d.onStack) { // Successor w is in stack S and hence in the current SCC v.low_link = Math.min(v.low_link,
431-
* d.index); } }
432-
*
433-
* TableSet t; // If v is a root node, pop the stack and generate an SCC if(v.low_link == v.index) { Set<TableSet>
434-
* component = Sets.newHashSet(); do { t = tarjanStack.pop(); t.onStack = false; t.add(v.value); component.add(t); }
435-
* while(t != v); components.add(component); } }
436-
*/
437-
438530
private void createLabels() {
439531
BiMap<IProduction, Integer> labels = HashBiMap.create();
440532

org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/State.java

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,21 @@
55
import java.util.Map;
66
import java.util.Set;
77

8-
import org.metaborg.parsetable.characterclasses.CharacterClassFactory;
9-
import org.metaborg.parsetable.query.IActionQuery;
10-
import org.metaborg.parsetable.states.IState;
118
import org.metaborg.parsetable.actions.IAction;
129
import org.metaborg.parsetable.actions.IGoto;
1310
import org.metaborg.parsetable.actions.IReduce;
11+
import org.metaborg.parsetable.characterclasses.CharacterClassFactory;
1412
import org.metaborg.parsetable.characterclasses.ICharacterClass;
15-
import org.metaborg.sdf2table.grammar.IProduction;
16-
import org.metaborg.sdf2table.grammar.ISymbol;
17-
import org.metaborg.sdf2table.grammar.CharacterClassSymbol;
18-
import org.metaborg.sdf2table.grammar.Symbol;
1913
import org.metaborg.parsetable.query.ActionsForCharacterDisjointSorted;
2014
import org.metaborg.parsetable.query.ActionsPerCharacterClass;
15+
import org.metaborg.parsetable.query.IActionQuery;
2116
import org.metaborg.parsetable.query.IActionsForCharacter;
22-
17+
import org.metaborg.parsetable.states.IState;
18+
import org.metaborg.sdf2table.deepconflicts.ContextualSymbol;
19+
import org.metaborg.sdf2table.grammar.CharacterClassSymbol;
20+
import org.metaborg.sdf2table.grammar.IProduction;
21+
import org.metaborg.sdf2table.grammar.ISymbol;
22+
import org.metaborg.sdf2table.grammar.Symbol;
2323

2424
import com.google.common.collect.LinkedHashMultimap;
2525
import com.google.common.collect.Maps;
@@ -137,19 +137,24 @@ public void doShift() {
137137

138138
public void doReduces() {
139139
// for each item p_i : A = A0 ... AN .
140-
// add a reduce action reduce([0-256] / follow(A), p_i)
140+
// add a reduce action reduce(FOLLOW(A) / follow-restriction(A), p_i) -- SLR(1) parsing
141141
for(LRItem item : items) {
142142

143143
if(item.getDotPosition() == item.getProd().rightHand().size()) {
144144
int prod_label = pt.productionLabels().get(item.getProd());
145145

146146
ISymbol leftHandSymbol = item.getProd().leftHand();
147147
ICharacterClass fr = leftHandSymbol.followRestriction();
148+
149+
ICharacterClass final_range = leftHandSymbol instanceof ContextualSymbol
150+
? ((ContextualSymbol) leftHandSymbol).getOrigSymbol().getFollow() : leftHandSymbol.getFollow();
151+
// Previous line used to be the following in LR(0). TODO add option to switch between LR(0) and SLR(1)
152+
// ICharacterClass final_range = CharacterClassFactory.FULL_RANGE;
153+
148154
if((fr == null || fr.isEmpty()) && leftHandSymbol.followRestrictionLookahead() == null) {
149-
addReduceAction(item.getProd(), prod_label, CharacterClassFactory.FULL_RANGE, null);
155+
addReduceAction(item.getProd(), prod_label, final_range, null);
150156
} else {
151-
ICharacterClass final_range = CharacterClassFactory.FULL_RANGE;
152-
// Not based on first and follow sets thus, only considering the follow restrictions
157+
// Considering the follow restrictions
153158
if(fr != null && !fr.isEmpty()) {
154159
final_range = final_range.difference(leftHandSymbol.followRestriction());
155160
}

0 commit comments

Comments
 (0)