Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,23 @@
public interface ISymbol {

String name();

boolean isNullable();

void setNullable(boolean nullable);

ICharacterClass getFirst();

void setFirst(ICharacterClass first);

ICharacterClass getFollow();

void setFollow(ICharacterClass follow);

ICharacterClass followRestriction();

List<ICharacterClass[]> followRestrictionLookahead();

org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol();
void normalizeFollowRestrictionLookahead();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
import java.util.Map;
import java.util.Set;

import org.metaborg.parsetable.characterclasses.CharacterClassFactory;
import org.metaborg.parsetable.characterclasses.ICharacterClass;

import org.metaborg.sdf2table.grammar.ISymbol;
import org.metaborg.parsetable.symbols.SortCardinality;
import org.metaborg.parsetable.symbols.SyntaxContext;
import org.metaborg.sdf2table.deepconflicts.Context;
Expand All @@ -24,12 +23,15 @@ public abstract class Symbol implements Serializable, ISymbol {
protected List<ICharacterClass[]> followRestrictionsLookahead;

private boolean nullable = false;
private ICharacterClass first = CharacterClassFactory.EMPTY_CHARACTER_CLASS;
private ICharacterClass follow = CharacterClassFactory.EMPTY_CHARACTER_CLASS;

/* (non-Javadoc)
/*
* (non-Javadoc)
*
* @see org.metaborg.sdf2table.grammar.ISymbol#name()
*/
@Override
public abstract String name();
@Override public abstract String name();

public boolean isNullable() {
return nullable;
Expand All @@ -39,6 +41,22 @@ public void setNullable(boolean nullable) {
this.nullable = nullable;
}

@Override public ICharacterClass getFirst() {
return first;
}

@Override public void setFirst(ICharacterClass first) {
this.first = first;
}

@Override public ICharacterClass getFollow() {
return follow;
}

@Override public void setFollow(ICharacterClass follow) {
this.follow = follow;
}

@Override public String toString() {
return name();
}
Expand Down Expand Up @@ -149,5 +167,6 @@ public org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol() {
return toParseTableSymbol(null, null);
}

public abstract org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol(SyntaxContext syntaxContext, SortCardinality cardinality);
public abstract org.metaborg.parsetable.symbols.ISymbol toParseTableSymbol(SyntaxContext syntaxContext,
SortCardinality cardinality);
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

import com.google.common.collect.*;

import static org.metaborg.parsetable.characterclasses.CharacterClassFactory.EOF_SINGLETON;

public class ParseTable implements IParseTable, Serializable {

// FIXME Currently generating an LR(0) table, compute first/follow sets to generate SLR(1)
Expand Down Expand Up @@ -117,6 +119,10 @@ public ParseTable(NormGrammar grammar, ParseTableConfiguration config) {
// create JSGLR parse table productions
createJSGLRParseTableProductions(productionLabels);

// calculate FIRST-set and FOLLOW-set
calculateFirst();
calculateFollow();

// create states if the table should not be generated dynamically
initialProduction = grammar.getInitialProduction();

Expand Down Expand Up @@ -182,6 +188,143 @@ private void calculateNullable() {
} while(markedNullable);
}

// Based on https://compilers.iecc.com/comparch/article/01-04-079
private void calculateFirst() {
SetMultimap<ISymbol, IProduction> symbolProductionsMapping = grammar.getSymbolProductionsMapping();
Set<ISymbol> symbols = grammar.getSymbols();
SetMultimap<ISymbol, ISymbol> containsTheFirstOf = HashMultimap.create();

for(ISymbol s : symbols) {
// The FIRST set of a CharacterClass symbol is equal to the character class it represents.
if(s instanceof CharacterClassSymbol) {
s.setFirst(((CharacterClassSymbol) s).getCC());
continue;
}
// The FIRST set of an EOFSymbol is equal to the EOF singleton character class.
if(s instanceof EOFSymbol) {
s.setFirst(EOF_SINGLETON);
continue;
}

for(IProduction p : symbolProductionsMapping.get(s)) {
// Direct contributions:
// If p is of the shape A = A0 ... Ak a Am ... An where all symbols up to Ak are nullable
for(ISymbol rhs : p.rightHand()) {
// Then, a is in FIRST(A).
if(rhs instanceof CharacterClassSymbol) {
s.setFirst(((CharacterClassSymbol) rhs).getCC());
break;
}

// Indirect contributions: calculate contains-the-FIRSTs-of
// If p is of the shape A = A0 ... Ak B Am ... An where all symbols up to Ak are nullable
// Then, A contains-the-FIRSTs-of B
containsTheFirstOf.put(s, rhs);

if(!rhs.isNullable())
break;
}
}
}

// Indirect contributions: Tarjan's algorithm for strongly connected components
final int DONE = symbols.size();
final Map<ISymbol, Integer> low = new HashMap<>();
final Stack<ISymbol> stack = new Stack<>();
for(ISymbol v : symbols) {
if(low.get(v) == null /* CLEAN */)
traverseFirst(v, containsTheFirstOf, DONE, low, stack);
}
}

private void traverseFirst(ISymbol v, SetMultimap<ISymbol, ISymbol> containsTheFirstOf, int DONE,
Map<ISymbol, Integer> low, Stack<ISymbol> stack) {
stack.push(v);
int top1 = stack.size() - 1;
low.put(v, top1);
for(ISymbol w : containsTheFirstOf.get(v)) {
if(low.get(w) == null /* CLEAN */) {
traverseFirst(w, containsTheFirstOf, DONE, low, stack);
}
// Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
v.setFirst(v.getFirst().union(w.getFirst())); // union!
if(low.get(w) < low.get(v))
low.put(v, low.get(w));
}
if(low.get(v) == top1) // v is the root of this SCC
while(stack.size() - 1 >= top1) {
ISymbol w = stack.pop();
w.setFirst(v.getFirst()); // distribute!
low.put(w, DONE);
}
}

// Based on https://compilers.iecc.com/comparch/article/01-04-079
// and Modern Compiler Implementation in Java, Second Edition - Andrew Appel, 2004
private void calculateFollow() {
SetMultimap<ISymbol, IProduction> symbolProductionsMapping = grammar.getSymbolProductionsMapping();
Set<ISymbol> symbols = grammar.getSymbols();
SetMultimap<ISymbol, ISymbol> containsTheFirstOf = HashMultimap.create();
SetMultimap<ISymbol, ISymbol> containsTheFollowOf = HashMultimap.create();

for(ISymbol s : symbols) {
for(IProduction p : symbolProductionsMapping.get(s)) {
List<ISymbol> rightHand = p.rightHand();
i: for(int i = 0, rightHandSize = rightHand.size(); i < rightHandSize; i++) {
ISymbol symbolI = rightHand.get(i);

// If p is of the shape A = A0 ... Ai Ak ... Am Aj ... An
for(int j = i + 1; j < rightHandSize; j++) {
// If Ak ... Am are all nullable, FOLLOW(Ai) contains FIRST(Aj)
ISymbol symbolJ = rightHand.get(j);
containsTheFirstOf.put(symbolI, symbolJ);

// If Ak ... An are NOT all nullable, continue with next Ai
if(!symbolJ.isNullable())
continue i;
}

// If Ak ... An are all nullable, FOLLOW(Ai) contains FOLLOW(A)
containsTheFollowOf.put(symbolI, s);
}
}
}

// Indirect contributions: Tarjan's algorithm for strongly connected components
final int DONE = symbols.size();
final Map<ISymbol, Integer> low = new HashMap<>();
final Stack<ISymbol> stack = new Stack<>();
for(ISymbol v : symbols) {
if(low.get(v) == null /* CLEAN */)
traverseFollow(v, containsTheFirstOf, containsTheFollowOf, DONE, low, stack);
}
}

private void traverseFollow(ISymbol v, SetMultimap<ISymbol, ISymbol> containsTheFirstOf,
SetMultimap<ISymbol, ISymbol> containsTheFollowOf, int DONE, Map<ISymbol, Integer> low, Stack<ISymbol> stack) {
stack.push(v);
int top1 = stack.size() - 1;
low.put(v, top1);
for(ISymbol w : containsTheFirstOf.get(v)) {
v.setFollow(v.getFollow().union(w.getFirst())); // union!
}
for(ISymbol w : containsTheFollowOf.get(v)) {
if(low.get(w) == null /* CLEAN */) {
traverseFollow(w, containsTheFirstOf, containsTheFollowOf, DONE, low, stack);
}
// Change compared to the article at compilers.iecc.com: this line is moved outside of the previous if-block
v.setFollow(v.getFollow().union(w.getFollow())); // union!
if(low.get(w) < low.get(v))
low.put(v, low.get(w));
}
if(low.get(v) == top1) // v is the root of this SCC
while(stack.size() - 1 >= top1) {
ISymbol w = stack.pop();
w.setFollow(v.getFollow()); // distribute!
low.put(w, DONE);
}
}

private void calculateRecursion() {
// direct and indirect left recursion :
// depth first search, whenever finding a cycle, those symbols are left recursive with respect to each other
Expand Down Expand Up @@ -521,28 +664,6 @@ private boolean mutuallyRecursive(Priority p) {
|| grammar.getRightRecursiveSymbolsMapping().get(p.higher().getLhs()).contains(p.lower().leftHand());
}

/*
* TODO calculate first and follow sets private void calculateFirstFollow() { for(IProduction p :
* getGrammar().prods.values()) { p.calculateDependencies(getGrammar()); }
*
* tarjanStack = new Stack<>(); first_components = Sets.newHashSet(); for(IProduction p :
* getGrammar().prods.values()) { if(p.firstSet().index == -1) { stronglyConnectedTarjan(p.firstSet(),
* first_components); } } }
*
*
* private void stronglyConnectedTarjan(TableSet v, Set<Set<TableSet>> components) { // Set the depth index for v to
* the smallest unused index v.index = index; v.low_link = index; index++; tarjanStack.push(v); v.onStack = true;
*
* for(TableSet d : v.depends_on) { if(d.index == -1) { // Successor w has not yet been visited; recurse on it
* stronglyConnectedTarjan(d, components); v.add(d.value); d.low_link = Math.min(v.low_link, d.low_link); } else
* if(d.onStack) { // Successor w is in stack S and hence in the current SCC v.low_link = Math.min(v.low_link,
* d.index); } }
*
* TableSet t; // If v is a root node, pop the stack and generate an SCC if(v.low_link == v.index) { Set<TableSet>
* component = Sets.newHashSet(); do { t = tarjanStack.pop(); t.onStack = false; t.add(v.value); component.add(t); }
* while(t != v); components.add(component); } }
*/

private void extractExpressionGrammars(SCCNodes<ISymbol> scc) {

for(ISymbol s : grammar.getSymbols()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
import org.metaborg.sdf2table.grammar.IProduction;
import org.metaborg.sdf2table.grammar.ISymbol;
import org.metaborg.sdf2table.grammar.Symbol;
import org.metaborg.parsetable.states.IState;
import org.metaborg.sdf2table.deepconflicts.ContextualSymbol;
import org.metaborg.sdf2table.grammar.CharacterClassSymbol;
import org.metaborg.sdf2table.grammar.IProduction;
import org.metaborg.sdf2table.grammar.ISymbol;
import org.metaborg.sdf2table.grammar.Symbol;

import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Maps;
Expand Down Expand Up @@ -116,19 +122,24 @@ public void doShift() {

public void doReduces() {
// for each item p_i : A = A0 ... AN .
// add a reduce action reduce([0-MAX_CHAR,eof] / follow(A), p_i)
// add a reduce action reduce(FOLLOW(A) / follow-restriction(A), p_i) -- SLR(1) parsing
for(LRItem item : items) {

if(item.getDotPosition() == item.getProd().arity()) {
int prod_label = pt.productionLabels().get(item.getProd());

ISymbol leftHandSymbol = item.getProd().leftHand();
ICharacterClass fr = leftHandSymbol.followRestriction();

ICharacterClass final_range = leftHandSymbol instanceof ContextualSymbol
? ((ContextualSymbol) leftHandSymbol).getOrigSymbol().getFollow() : leftHandSymbol.getFollow();
// Previous line used to be the following in LR(0). TODO add option to switch between LR(0) and SLR(1)
// ICharacterClass final_range = CharacterClassFactory.FULL_RANGE;

if((fr == null || fr.isEmpty()) && leftHandSymbol.followRestrictionLookahead() == null) {
addReduceAction(item.getProd(), prod_label, CharacterClassFactory.FULL_RANGE, null);
addReduceAction(item.getProd(), prod_label, final_range, null);
} else {
ICharacterClass final_range = CharacterClassFactory.FULL_RANGE;
// Not based on first and follow sets thus, only considering the follow restrictions
// Considering the follow restrictions
if(fr != null && !fr.isEmpty()) {
final_range = final_range.difference(leftHandSymbol.followRestriction());
}
Expand Down
Loading