From b3688a36f4af30b1d8e60863a3890e9d3d357a20 Mon Sep 17 00:00:00 2001 From: Maarten Sijm Date: Fri, 5 Jul 2019 11:57:26 +0200 Subject: [PATCH 1/3] Implement SLR parse table generation --- .../metaborg/sdf2table/grammar/ISymbol.java | 13 ++ .../metaborg/sdf2table/grammar/Symbol.java | 31 +++- .../sdf2table/parsetable/ParseTable.java | 157 +++++++++++++++--- .../metaborg/sdf2table/parsetable/State.java | 19 ++- .../sdf2table/parsetable/TableSet.java | 107 ------------ 5 files changed, 188 insertions(+), 139 deletions(-) delete mode 100644 org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/TableSet.java diff --git a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/ISymbol.java b/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/ISymbol.java index 96189de58..12e81ba3e 100644 --- a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/ISymbol.java +++ b/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/ISymbol.java @@ -7,10 +7,23 @@ public interface ISymbol { String name(); + boolean isNullable(); + void setNullable(boolean nullable); + + ICharacterClass getFirst(); + + void setFirst(ICharacterClass first); + + ICharacterClass getFollow(); + + void setFollow(ICharacterClass follow); + ICharacterClass followRestriction(); + List followRestrictionLookahead(); + org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol(); void normalizeFollowRestrictionLookahead(); diff --git a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/Symbol.java b/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/Symbol.java index 3e0654916..9681ee02d 100644 --- a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/Symbol.java +++ b/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/grammar/Symbol.java @@ -5,9 +5,8 @@ import java.util.Map; import java.util.Set; +import org.metaborg.parsetable.characterclasses.CharacterClassFactory; import org.metaborg.parsetable.characterclasses.ICharacterClass; - -import org.metaborg.sdf2table.grammar.ISymbol; import org.metaborg.parsetable.symbols.SortCardinality; import org.metaborg.parsetable.symbols.SyntaxContext; import org.metaborg.sdf2table.deepconflicts.Context; @@ -24,12 +23,15 @@ public abstract class Symbol implements Serializable, ISymbol { protected List followRestrictionsLookahead; private boolean nullable = false; + private ICharacterClass first = CharacterClassFactory.EMPTY_CHARACTER_CLASS; + private ICharacterClass follow = CharacterClassFactory.EMPTY_CHARACTER_CLASS; - /* (non-Javadoc) + /* + * (non-Javadoc) + * * @see org.metaborg.sdf2table.grammar.ISymbol#name() */ - @Override - public abstract String name(); + @Override public abstract String name(); public boolean isNullable() { return nullable; @@ -39,6 +41,22 @@ public void setNullable(boolean nullable) { this.nullable = nullable; } + @Override public ICharacterClass getFirst() { + return first; + } + + @Override public void setFirst(ICharacterClass first) { + this.first = first; + } + + @Override public ICharacterClass getFollow() { + return follow; + } + + @Override public void setFollow(ICharacterClass follow) { + this.follow = follow; + } + @Override public String toString() { return name(); } @@ -149,5 +167,6 @@ public org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol() { return toParseTableSymbol(null, null); } - public abstract org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol(SyntaxContext syntaxContext, SortCardinality cardinality); + public abstract org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol(SyntaxContext syntaxContext, + SortCardinality cardinality); } diff --git a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java b/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java index e802f8e65..c9b0ec3f7 100644 --- a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java +++ b/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java @@ -117,6 +117,10 @@ public ParseTable(NormGrammar grammar, ParseTableConfiguration config) { // create JSGLR parse table productions createJSGLRParseTableProductions(productionLabels); + // calculate FIRST-set and FOLLOW-set + calculateFirst(); + calculateFollow(); + // create states if the table should not be generated dynamically initialProduction = grammar.getInitialProduction(); @@ -182,6 +186,137 @@ private void calculateNullable() { } while(markedNullable); } + // Based on https://compilers.iecc.com/comparch/article/01-04-079 + private void calculateFirst() { + SetMultimap symbolProductionsMapping = grammar.getSymbolProductionsMapping(); + Set symbols = grammar.getSymbols(); + SetMultimap containsTheFirstOf = HashMultimap.create(); + + for(ISymbol s : symbols) { + // The FIRST set of a CharacterClass symbol is equal to the character class it represents. + if(s instanceof CharacterClassSymbol) { + s.setFirst(((CharacterClassSymbol) s).getCC()); + continue; + } + + for(IProduction p : symbolProductionsMapping.get(s)) { + // Direct contributions: + // If p is of the shape A = A0 ... Ak a Am ... An where all symbols up to Ak are nullable + for(ISymbol rhs : p.rightHand()) { + // Then, a is in FIRST(A). + if(rhs instanceof CharacterClassSymbol) { + s.setFirst(((CharacterClassSymbol) rhs).getCC()); + break; + } + + // Indirect contributions: calculate contains-the-FIRSTs-of + // If p is of the shape A = A0 ... Ak B Am ... An where all symbols up to Ak are nullable + // Then, A contains-the-FIRSTs-of B + containsTheFirstOf.put(s, rhs); + + if(!rhs.isNullable()) + break; + } + } + } + + // Indirect contributions: Tarjan's algorithm for strongly connected components + final int DONE = symbols.size(); + final Map low = new HashMap<>(); + final Stack stack = new Stack<>(); + for(ISymbol v : symbols) { + if(low.get(v) == null /* CLEAN */) + traverseFirst(v, containsTheFirstOf, DONE, low, stack); + } + } + + private void traverseFirst(ISymbol v, SetMultimap containsTheFirstOf, int DONE, + Map low, Stack stack) { + stack.push(v); + int top1 = stack.size() - 1; + low.put(v, top1); + for(ISymbol w : containsTheFirstOf.get(v)) { + if(low.get(w) == null /* CLEAN */) { + traverseFirst(w, containsTheFirstOf, DONE, low, stack); + } + // Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block + v.setFirst(v.getFirst().union(w.getFirst())); // union! + if(low.get(w) < low.get(v)) + low.put(v, low.get(w)); + } + if(low.get(v) == top1) // v is the root of this SCC + while(stack.size() - 1 >= top1) { + ISymbol w = stack.pop(); + w.setFirst(v.getFirst()); // distribute! + low.put(w, DONE); + } + } + + // Based on https://compilers.iecc.com/comparch/article/01-04-079 + // and Modern Compiler Implementation in Java, Second Edition - Andrew Appel, 2004 + private void calculateFollow() { + SetMultimap symbolProductionsMapping = grammar.getSymbolProductionsMapping(); + Set symbols = grammar.getSymbols(); + SetMultimap containsTheFirstOf = HashMultimap.create(); + SetMultimap containsTheFollowOf = HashMultimap.create(); + + for(ISymbol s : symbols) { + for(IProduction p : symbolProductionsMapping.get(s)) { + List rightHand = p.rightHand(); + for(int i = 0, rightHandSize = rightHand.size(); i < rightHandSize; i++) { + ISymbol symbolI = rightHand.get(i); + + // If p is of the shape A = A0 ... Ai Ak ... Am Aj ... An + for(int j = i + 1; j < rightHandSize; j++) { + // If Ak ... Am are all nullable, FOLLOW(Ai) contains FIRST(Aj) + ISymbol symbolJ = rightHand.get(j); + containsTheFirstOf.put(symbolI, symbolJ); + + if(!symbolJ.isNullable()) + break; + } + + // If Ak ... An are all nullable, FOLLOW(Ai) contains FOLLOW(A) + containsTheFollowOf.put(symbolI, s); + } + } + } + + // Indirect contributions: Tarjan's algorithm for strongly connected components + final int DONE = symbols.size(); + final Map low = new HashMap<>(); + final Stack stack = new Stack<>(); + for(ISymbol v : symbols) { + if(low.get(v) == null /* CLEAN */) + traverseFollow(v, containsTheFirstOf, containsTheFollowOf, DONE, low, stack); + } + } + + private void traverseFollow(ISymbol v, SetMultimap containsTheFirstOf, + SetMultimap containsTheFollowOf, int DONE, Map low, Stack stack) { + stack.push(v); + int top1 = stack.size() - 1; + low.put(v, top1); + for(ISymbol w : containsTheFirstOf.get(v)) { + v.setFollow(v.getFollow().union(w.getFirst())); // union! + } + for(ISymbol w : containsTheFollowOf.get(v)) { + if(low.get(w) == null /* CLEAN */) { + traverseFollow(w, containsTheFirstOf, containsTheFollowOf, DONE, low, stack); + } + // Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block + v.setFollow(v.getFollow().union(w.getFollow())); // union! + if(low.get(w) < low.get(v)) + low.put(v, low.get(w)); + } + if(low.get(v) == top1) // v is the root of this SCC + while(stack.size() - 1 >= top1) { + ISymbol w = stack.pop(); + w.setFollow(v.getFollow()); // distribute! + low.put(w, DONE); + } + } + private void calculateRecursion() { // direct and indirect left recursion : // depth first search, whenever finding a cycle, those symbols are left recursive with respect to each other @@ -521,28 +656,6 @@ private boolean mutuallyRecursive(Priority p) { || grammar.getRightRecursiveSymbolsMapping().get(p.higher().getLhs()).contains(p.lower().leftHand()); } - /* - * TODO calculate first and follow sets private void calculateFirstFollow() { for(IProduction p : - * getGrammar().prods.values()) { p.calculateDependencies(getGrammar()); } - * - * tarjanStack = new Stack<>(); first_components = Sets.newHashSet(); for(IProduction p : - * getGrammar().prods.values()) { if(p.firstSet().index == -1) { stronglyConnectedTarjan(p.firstSet(), - * first_components); } } } - * - * - * private void stronglyConnectedTarjan(TableSet v, Set> components) { // Set the depth index for v to - * the smallest unused index v.index = index; v.low_link = index; index++; tarjanStack.push(v); v.onStack = true; - * - * for(TableSet d : v.depends_on) { if(d.index == -1) { // Successor w has not yet been visited; recurse on it - * stronglyConnectedTarjan(d, components); v.add(d.value); d.low_link = Math.min(v.low_link, d.low_link); } else - * if(d.onStack) { // Successor w is in stack S and hence in the current SCC v.low_link = Math.min(v.low_link, - * d.index); } } - * - * TableSet t; // If v is a root node, pop the stack and generate an SCC if(v.low_link == v.index) { Set - * component = Sets.newHashSet(); do { t = tarjanStack.pop(); t.onStack = false; t.add(v.value); component.add(t); } - * while(t != v); components.add(component); } } - */ - private void extractExpressionGrammars(SCCNodes scc) { for(ISymbol s : grammar.getSymbols()) { diff --git a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/State.java b/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/State.java index c1133be9a..945cde718 100644 --- a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/State.java +++ b/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/State.java @@ -17,6 +17,12 @@ import org.metaborg.sdf2table.grammar.IProduction; import org.metaborg.sdf2table.grammar.ISymbol; import org.metaborg.sdf2table.grammar.Symbol; +import org.metaborg.parsetable.states.IState; +import org.metaborg.sdf2table.deepconflicts.ContextualSymbol; +import org.metaborg.sdf2table.grammar.CharacterClassSymbol; +import org.metaborg.sdf2table.grammar.IProduction; +import org.metaborg.sdf2table.grammar.ISymbol; +import org.metaborg.sdf2table.grammar.Symbol; import com.google.common.collect.LinkedHashMultimap; import com.google.common.collect.Maps; @@ -116,7 +122,7 @@ public void doShift() { public void doReduces() { // for each item p_i : A = A0 ... AN . - // add a reduce action reduce([0-MAX_CHAR,eof] / follow(A), p_i) + // add a reduce action reduce(FOLLOW(A) / follow-restriction(A), p_i) -- SLR(1) parsing for(LRItem item : items) { if(item.getDotPosition() == item.getProd().arity()) { @@ -124,11 +130,16 @@ public void doReduces() { ISymbol leftHandSymbol = item.getProd().leftHand(); ICharacterClass fr = leftHandSymbol.followRestriction(); + + ICharacterClass final_range = leftHandSymbol instanceof ContextualSymbol + ? ((ContextualSymbol) leftHandSymbol).getOrigSymbol().getFollow() : leftHandSymbol.getFollow(); + // Previous line used to be the following in LR(0). TODO add option to switch between LR(0) and SLR(1) + // ICharacterClass final_range = CharacterClassFactory.FULL_RANGE; + if((fr == null || fr.isEmpty()) && leftHandSymbol.followRestrictionLookahead() == null) { - addReduceAction(item.getProd(), prod_label, CharacterClassFactory.FULL_RANGE, null); + addReduceAction(item.getProd(), prod_label, final_range, null); } else { - ICharacterClass final_range = CharacterClassFactory.FULL_RANGE; - // Not based on first and follow sets thus, only considering the follow restrictions + // Considering the follow restrictions if(fr != null && !fr.isEmpty()) { final_range = final_range.difference(leftHandSymbol.followRestriction()); } diff --git a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/TableSet.java b/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/TableSet.java deleted file mode 100644 index 2a8a88dcc..000000000 --- a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/TableSet.java +++ /dev/null @@ -1,107 +0,0 @@ -package org.metaborg.sdf2table.parsetable; - -import java.io.Serializable; -import java.util.Set; - -import org.metaborg.parsetable.characterclasses.ICharacterClass; -import org.metaborg.sdf2table.grammar.IProduction; - -import com.google.common.collect.Sets; - -public class TableSet implements Serializable { - - private static final long serialVersionUID = 3001311608137161789L; - - IProduction p; - ICharacterClass value; - Set depends_on = Sets.newHashSet(); - Set dependents = Sets.newHashSet(); - - // Tarjan variables - int index = -1; - int low_link = -1; - boolean onStack = false; - - public TableSet(IProduction p) { - this.p = p; - } - - public void add(ICharacterClass cc) { - if(value == null) { - value = cc; - } else if(cc != null) { - value = cc.union(value); - } - } - - public void addDependency(TableSet s) { - if(depends_on.add(s)) { - s.addDependent(this); - } - } - - public void addDependent(TableSet s) { - this.dependents.add(s); - } - - @Override public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((p == null) ? 0 : p.hashCode()); - return result; - } - - @Override public boolean equals(Object obj) { - if(this == obj) - return true; - if(obj == null) - return false; - if(getClass() != obj.getClass()) - return false; - TableSet other = (TableSet) obj; - if(p == null) { - if(other.p != null) - return false; - } else if(!p.equals(other.p)) - return false; - return true; - } - - @Override public String toString() { - String buf = ""; - buf += "prod: " + p + "\n"; - - if(value != null) { - buf += "value = " + value + "\n"; - } else { - buf += "no value\n"; - } - - buf += "depends on: "; - if(depends_on.isEmpty()) { - buf += "---\n"; - } - int i = 0; - for(TableSet t : depends_on) { - if(i != 0) - buf += " "; - buf += t.p + "\n"; - i++; - } - - buf += "dependents: "; - i = 0; - if(dependents.isEmpty()) { - buf += "---\n"; - } - for(TableSet t : dependents) { - if(i != 0) - buf += " "; - buf += t.p + "\n"; - i++; - } - - return buf; - } - -} From 9dad2bcd1941ae5b1f9dcc01d22eb5750963acc3 Mon Sep 17 00:00:00 2001 From: Maarten Sijm Date: Mon, 8 Jul 2019 18:12:08 +0200 Subject: [PATCH 2/3] Correctly calculate contains-the-follow-of --- .../java/org/metaborg/sdf2table/parsetable/ParseTable.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java b/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java index c9b0ec3f7..cdf17be7d 100644 --- a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java +++ b/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java @@ -263,7 +263,7 @@ private void calculateFollow() { for(ISymbol s : symbols) { for(IProduction p : symbolProductionsMapping.get(s)) { List rightHand = p.rightHand(); - for(int i = 0, rightHandSize = rightHand.size(); i < rightHandSize; i++) { + i: for(int i = 0, rightHandSize = rightHand.size(); i < rightHandSize; i++) { ISymbol symbolI = rightHand.get(i); // If p is of the shape A = A0 ... Ai Ak ... Am Aj ... An @@ -272,8 +272,9 @@ private void calculateFollow() { ISymbol symbolJ = rightHand.get(j); containsTheFirstOf.put(symbolI, symbolJ); + // If Ak ... An are NOT all nullable, continue with next Ai if(!symbolJ.isNullable()) - break; + continue i; } // If Ak ... An are all nullable, FOLLOW(Ai) contains FOLLOW(A) From 000e0ab8b161cb63f838d9b92231fe5d9da42cdc Mon Sep 17 00:00:00 2001 From: Maarten Sijm Date: Tue, 25 Feb 2020 20:44:21 +0100 Subject: [PATCH 3/3] Use new EOFSymbol in SLR parsing This EOFSymbol was introduced when changing the EOF value from 256 to -1 --- .../java/org/metaborg/sdf2table/parsetable/ParseTable.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java b/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java index cdf17be7d..e0d51e4ef 100644 --- a/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java +++ b/org.metaborg.sdf2table/src/main/java/org/metaborg/sdf2table/parsetable/ParseTable.java @@ -16,6 +16,8 @@ import com.google.common.collect.*; +import static org.metaborg.parsetable.characterclasses.CharacterClassFactory.EOF_SINGLETON; + public class ParseTable implements IParseTable, Serializable { // FIXME Currently generating an LR(0) table, compute first/follow sets to generate SLR(1) @@ -198,6 +200,11 @@ private void calculateFirst() { s.setFirst(((CharacterClassSymbol) s).getCC()); continue; } + // The FIRST set of an EOFSymbol is equal to the EOF singleton character class. + if(s instanceof EOFSymbol) { + s.setFirst(EOF_SINGLETON); + continue; + } for(IProduction p : symbolProductionsMapping.get(s)) { // Direct contributions: