Skip to content

Commit

Permalink
replace epsilon and arrow right by constants
Browse files Browse the repository at this point in the history
  • Loading branch information
SamyaDaleh committed Jan 7, 2024
1 parent b082006 commit 17bab7b
Show file tree
Hide file tree
Showing 33 changed files with 148 additions and 81 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
import java.util.*;
import java.util.stream.Collectors;

import static com.github.samyadaleh.cltoolbox.common.Constants.ARROW_RIGHT;
import static com.github.samyadaleh.cltoolbox.common.Constants.EPSILON;

/**
* A deduction system that derives consequences from antecendence items and
* tries to generate a goal item. Based on the slides from Laura Kallmeyer about
Expand Down Expand Up @@ -194,11 +197,11 @@ public void printTraceLatex(String[][] trace) {
for (String[] line : trace) {
String id = line[0];
String item = line[1].replace("$", "\\$").replace("•", "\\textbullet{}")
.replace("ε", "$\\epsilon$").replace("->", "$\\rightarrow$");
.replace(EPSILON, "$\\epsilon$").replace(ARROW_RIGHT, "$\\rightarrow$");
String rules =
line[2].replace("ε", "$\\epsilon$").replace("->", "$\\rightarrow$");
line[2].replace(EPSILON, "$\\epsilon$").replace(ARROW_RIGHT, "$\\rightarrow$");
String backPointers = line[3].replace("{", "\\{").replace("}", "\\}");
String trees = line[4].replace("ε", "$\\epsilon$");
String trees = line[4].replace(EPSILON, "$\\epsilon$");
log.info(
id + " & " + item + " & " + rules + " & " + backPointers + " & " + trees + " \\\\");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
import java.util.ArrayList;
import java.util.List;

import static com.github.samyadaleh.cltoolbox.common.Constants.DEDUCTION_RULE_CFG_CYK_AXIOM;
import static com.github.samyadaleh.cltoolbox.common.Constants.DEDUCTION_RULE_CFG_CYK_AXIOM_EPSILON;
import static com.github.samyadaleh.cltoolbox.common.Constants.*;

public class CfgToCykRulesConverter {

Expand All @@ -30,7 +29,7 @@ public static ParsingSchema cfgToCykRules(Cfg cfg, String w)
throw new ParseException("Grammar has to be in Chomsky Normal Form.", 1);
}
String[] wSplit = w.split(" ");
int wLength = wSplit[0].equals("") || wSplit[0].equals("ε")
int wLength = wSplit[0].equals("") || wSplit[0].equals(EPSILON)
? 0 : wSplit.length;
ParsingSchema schema = new ParsingSchema();

Expand Down Expand Up @@ -84,7 +83,7 @@ private static void addCykScanRules(String[] wSplit, ParsingSchema schema,
for (int i = 0; i < wSplit.length; i++) {
if (wSplit[i].equals(rule.getRhs()[0])) {
StaticDeductionRule scan = new StaticDeductionRule();
String itemLength = wSplit[i].equals("") || wSplit[i].equals("ε")
String itemLength = wSplit[i].equals("") || wSplit[i].equals(EPSILON)
? "0" : "1";
ChartItemInterface consequence =
new DeductionChartItem(rule.getLhs(), String.valueOf(i),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import com.github.samyadaleh.cltoolbox.chartparsing.item.AbstractChartItem;
import com.github.samyadaleh.cltoolbox.chartparsing.item.ChartItemInterface;

import static com.github.samyadaleh.cltoolbox.common.Constants.EPSILON;

/**
* Chart Item for Left associative grammar that uses the usual itemForm for
* display, but stores its own data structures for further use.
Expand Down Expand Up @@ -31,7 +33,7 @@ public LagChartItem(String[][] categories, String[] rulePackage) {
categorieRepr.append(cat);
}
if (categorieRepr.charAt(categorieRepr.length()-1) == '(') {
categorieRepr.append("ε");
categorieRepr.append(EPSILON);
}
categorieRepr.append(")");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.util.List;

import static com.github.samyadaleh.cltoolbox.common.Constants.DEDUCTION_RULE_LCFRS_CYK_AXIOM;
import static com.github.samyadaleh.cltoolbox.common.Constants.EPSILON;

public class LcfrsToCykRulesConverter {

Expand All @@ -34,7 +35,7 @@ private static void addSrcgCykScanRules(String[] wsplit, ParsingSchema schema,
String[] arg = clause.getLhs().getArgumentByIndex(i + 1);
for (int j = 0; j < arg.length; j++) {
if (arg[j].length() == 0) {
treeString.append("ε");
treeString.append(EPSILON);
} else {
treeString.append(clause.getLhs().getSymAt(i + 1, j));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.github.samyadaleh.cltoolbox.common.tag.Tree;

import static com.github.samyadaleh.cltoolbox.common.Constants.DEDUCTION_RULE_TAG_ADJOIN;
import static com.github.samyadaleh.cltoolbox.common.Constants.EPSILON;

/**
* Adjoin an auxiliary tree into an appropriate node in any other tree.
Expand Down Expand Up @@ -64,7 +65,7 @@ protected void calculateConsequences(String[] itemForm1, String[] itemForm2) {
}
}
String node2Name =
(node2.length() > 1) ? node2.substring(0, node2.length() - 1) : "ε";
(node2.length() > 1) ? node2.substring(0, node2.length() - 1) : EPSILON;
this.name =
DEDUCTION_RULE_TAG_ADJOIN + " " + treeName2 + "[" + node2Name + ","
+ treeName1 + "]";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.github.samyadaleh.cltoolbox.common.tag.Tree;

import static com.github.samyadaleh.cltoolbox.common.Constants.DEDUCTION_RULE_TAG_EARLEY_ADJOIN;
import static com.github.samyadaleh.cltoolbox.common.Constants.EPSILON;

/**
* Combines an auxiliary tree with another tree to get a new item in which has
Expand Down Expand Up @@ -67,7 +68,7 @@ protected void calculateConsequences(String[] itemForm1, String[] itemForm2) {
}
}
consequence.setTrees(derivedTrees);
String node2name = node2.length() == 0 ? "ε" : node2;
String node2name = node2.length() == 0 ? EPSILON : node2;
this.name =
DEDUCTION_RULE_TAG_EARLEY_ADJOIN + " " + treeName2 + "[" + node2name
+ "," + treeName1 + "]";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import com.github.samyadaleh.cltoolbox.common.tag.Tree;

import static com.github.samyadaleh.cltoolbox.common.Constants.DEDUCTION_RULE_TAG_EARLEY_SUBSTITUTE;
import static com.github.samyadaleh.cltoolbox.common.Constants.EPSILON;

/**
* If a potential initial tree is complete, substitute it if possible.
Expand Down Expand Up @@ -63,7 +64,7 @@ public TagEarleySubstitute(String outTreeName, String outNode, Tag tag) {
}
// imagine a tree with 1 node where you would substitute into the root
// ...
String outNodeName = outNode.length() == 0 ? "ε" : outNode;
String outNodeName = outNode.length() == 0 ? EPSILON : outNode;
this.name =
"substitute " + outTreeName + "[" + outNodeName + "," + treeName
+ "]";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.util.List;

import static com.github.samyadaleh.cltoolbox.common.Constants.DEDUCTION_RULE_TAG_EARLEY_PREFIXVALID_PREDICTADJOINED;
import static com.github.samyadaleh.cltoolbox.common.Constants.EPSILON;

public class TagEarleyPrefixValidPredictAdjoined
extends AbstractDynamicDecutionRuleTwoAntecedences {
Expand Down Expand Up @@ -42,7 +43,7 @@ protected void calculateConsequences(String[] itemForm1, String[] itemForm2) {
new DeductionChartItem(treeName2, node2, "lb", iGamma2, m, "-", "-",
m, "0");
List<Tree> derivedTrees = generateDerivatedTrees(itemForm1);
String node2name = node2.length() == 0 ? "ε" : node2;
String node2name = node2.length() == 0 ? EPSILON : node2;
this.name =
DEDUCTION_RULE_TAG_EARLEY_PREFIXVALID_PREDICTADJOINED + " " + treeName2 + "["
+ node2name + "," + treeName1 + "]";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import java.util.List;
import java.util.stream.Stream;

import static com.github.samyadaleh.cltoolbox.common.Constants.EPSILON;

/**
* Open collection of functions to work with arrays.
*/
Expand Down Expand Up @@ -85,7 +87,7 @@ public static String toString(String[] item) {
representation.append(",");
}
if (element.equals("")) {
representation.append("ε");
representation.append(EPSILON);
} else {
representation.append(element);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
* Class holding constant strings used all around the toolbox.
*/
public class Constants {
/* Symbols */
public static final String EPSILON = "ε";
public static final String ARROW_RIGHT = "->";

/* Static rules*/
public final static String DEDUCTION_RULE_CCG_DEDUCTION_AXIOM = "axiom";
public final static String DEDUCTION_RULE_CFG_CYK_AXIOM = "scan";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,14 @@
import java.util.ArrayList;
import java.util.List;

import static com.github.samyadaleh.cltoolbox.common.Constants.ARROW_RIGHT;
import static com.github.samyadaleh.cltoolbox.common.Constants.EPSILON;

public class TreeUtils {

public static Tree getTreeOfSrcgClause(Clause clause, List<Integer> vector) {
StringBuilder extractedRule = new StringBuilder();
extractedRule.append(clause.getLhs().getNonterminal()).append(" ->");
extractedRule.append(clause.getLhs().getNonterminal()).append(" ").append(ARROW_RIGHT);
int terminalsInLhs = 0;
for (String symbol : clause.getLhs().getSymbolsAsPlainArray()) {
if (!TreeUtils.symbolIsVariable(clause, symbol)) {
Expand Down Expand Up @@ -55,7 +58,7 @@ public static Tree getTreeOfSrcgClause(Clause clause) {
if (clause.getRhs().size() == 0) {
try {
return new Tree(new CfgProductionRule(
clause.getLhs().getNonterminal() + " -> " + ArrayUtils
clause.getLhs().getNonterminal() + " " + ARROW_RIGHT + " " + ArrayUtils
.getSubSequenceAsString(
clause.getLhs().getSymbolsAsPlainArray(), 0,
clause.getLhs().getSymbolsAsPlainArray().length)));
Expand All @@ -65,7 +68,7 @@ public static Tree getTreeOfSrcgClause(Clause clause) {
}
StringBuilder cfgRuleString =
new StringBuilder(clause.getLhs().getNonterminal());
cfgRuleString.append(" ->");
cfgRuleString.append(" ").append(ARROW_RIGHT);
for (Predicate rhsPred : clause.getRhs()) {
cfgRuleString.append(" ").append(rhsPred.getNonterminal());
}
Expand Down Expand Up @@ -222,7 +225,7 @@ private static List<Tree> collectSubTrees(Tree tree1, Vertex node1,
private static String collectSubtreeAsString(Tree tree2, Vertex node2) {
StringBuilder newTree = new StringBuilder("(");
if (node2.getLabel().equals("")) {
newTree.append("ε");
newTree.append(EPSILON);
} else {
newTree.append(node2.getLabel());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import java.text.ParseException;
import java.util.*;

import static com.github.samyadaleh.cltoolbox.common.Constants.ARROW_RIGHT;
import static com.github.samyadaleh.cltoolbox.common.Constants.EPSILON;
import static com.github.samyadaleh.cltoolbox.common.cfg.util.EmptyProductions.isEmptyProductionRule;
import static com.github.samyadaleh.cltoolbox.common.cfg.util.EmptyProductions.nonterminalOccursInAnyRhs;

Expand Down Expand Up @@ -52,7 +54,7 @@ public Cfg(NondeterministicFiniteAutomaton nfa) {
for (String transitionState : entry.getValue()) {
try {
productionRules.add(new CfgProductionRule(
entry.getKey()[0] + " -> " + entry.getKey()[1] + " "
entry.getKey()[0] + " " + ARROW_RIGHT + " " + entry.getKey()[1] + " "
+ transitionState));
} catch (ParseException e) {
throw new RuntimeException(e);
Expand All @@ -61,7 +63,7 @@ public Cfg(NondeterministicFiniteAutomaton nfa) {
}
for (String finalState : nfa.getFinalStates()) {
try {
productionRules.add(new CfgProductionRule(finalState + " -> ε"));
productionRules.add(new CfgProductionRule(finalState + " " + ARROW_RIGHT + " " + EPSILON));
} catch (ParseException e) {
throw new RuntimeException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import java.text.ParseException;

import static com.github.samyadaleh.cltoolbox.common.Constants.ARROW_RIGHT;
import static com.github.samyadaleh.cltoolbox.common.Constants.EPSILON;

/**
* Representation of a CFG production rule where the lhs consists of one
* nonterminal and the rhs can be any length.
Expand All @@ -14,7 +17,7 @@ public class CfgProductionRule {
public CfgProductionRule(String[] rule) {
this.lhs = rule[0];
String[] ruleSplit = rule[1].split(" ");
if ((ruleSplit.length == 1 && ruleSplit[0].equals("ε"))) {
if ((ruleSplit.length == 1 && ruleSplit[0].equals(EPSILON))) {
this.rhs = new String[] {""};
} else {
this.rhs = ruleSplit;
Expand All @@ -27,7 +30,7 @@ public CfgProductionRule(String[] rule) {
*/
public CfgProductionRule(String lhs, String[] rhs) {
this.lhs = lhs;
if (rhs.length == 1 && rhs[0].equals("ε")) {
if (rhs.length == 1 && rhs[0].equals(EPSILON)) {
this.rhs = new String[] {""};
} else {
this.rhs = rhs;
Expand All @@ -38,12 +41,12 @@ public CfgProductionRule(String lhs, String[] rhs) {
* Creates a rule from a String representation like S -> A B
*/
public CfgProductionRule(String ruleString) throws ParseException {
if (!ruleString.contains("->")) {
throw new ParseException("Separator -> missing in rule " + ruleString, 0);
if (!ruleString.contains(ARROW_RIGHT)) {
throw new ParseException("Separator " + ARROW_RIGHT + " missing in rule " + ruleString, 0);
}
String[] ruleSplit = ruleString.split("->", 2);
String[] ruleSplit = ruleString.split(ARROW_RIGHT, 2);
this.lhs = ruleSplit[0].trim();
if (ruleSplit[1].trim().equals("") || ruleSplit[1].trim().equals("ε")) {
if (ruleSplit[1].trim().equals("") || ruleSplit[1].trim().equals(EPSILON)) {
this.rhs = new String[] {""};
} else {
this.rhs = ruleSplit[1].trim().split(" ");
Expand All @@ -60,9 +63,9 @@ public String[] getRhs() {

@Override public String toString() {
if (rhs.length == 0 || rhs[0].equals("")) {
return lhs + " -> ε";
return lhs + " " + ARROW_RIGHT + " " + EPSILON;
} else {
return lhs + " -> " + String.join(" ", rhs);
return lhs + " " + ARROW_RIGHT + " " + String.join(" ", rhs);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import java.text.ParseException;

import static com.github.samyadaleh.cltoolbox.common.Constants.ARROW_RIGHT;
import static com.github.samyadaleh.cltoolbox.common.Constants.EPSILON;

/**
* Representation of a context-free rule where the lhs is only allowed to
* contain one symbol and the rule has a probability.
Expand All @@ -28,8 +31,8 @@ public class PcfgProductionRule extends CfgProductionRule {
*/
PcfgProductionRule(String ruleString) throws ParseException {
super(ruleString.split(":", 2)[1]);
if (ruleString.indexOf("->") < ruleString.indexOf(':')) {
throw new ParseException(": has to be left of -> in rule " + ruleString,
if (ruleString.indexOf(ARROW_RIGHT) < ruleString.indexOf(':')) {
throw new ParseException(": has to be left of " + ARROW_RIGHT + " in rule " + ruleString,
0);
}
String[] ruleSplit = ruleString.split(":", 2);
Expand All @@ -43,9 +46,9 @@ public Double getP() {
@Override public String toString() {
double roundedP = Math.round(p * 100.0) / 100.0;
if (this.getRhs()[0].equals("")) {
return roundedP + " : " + this.getLhs() + " -> ε";
return roundedP + " : " + this.getLhs() + " " + ARROW_RIGHT + " " + EPSILON;
} else {
return roundedP + " : " + this.getLhs() + " -> "
return roundedP + " : " + this.getLhs() + " " + ARROW_RIGHT + " "
+ String.join(" ", this.getRhs());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
import java.util.Arrays;
import java.util.List;

import static com.github.samyadaleh.cltoolbox.common.Constants.ARROW_RIGHT;
import static com.github.samyadaleh.cltoolbox.common.Constants.EPSILON;

/**
* Takes care that different nonterminals are used as left-corner and in other
* places, so no symbol appears in both positions.
Expand All @@ -32,7 +35,7 @@ public static Cfg doubleSymbols(Cfg cfgOld) throws ParseException {

for (int i = 0; i < newRules.size(); i++) {
CfgProductionRule rule = newRules.get(i);
if (rule.getRhs().length > 0 && !"ε".equals(rule.getRhs()[0])) {
if (rule.getRhs().length > 0 && !EPSILON.equals(rule.getRhs()[0])) {
handleLcOccuringInOtherRules(newRules, newNonterminals, rule);
}
}
Expand All @@ -52,7 +55,7 @@ private static void handleLcOccuringInOtherRules(
// if N occurs in any other production rule not as lc:
for (int j = 0; j < newRules.size(); j++) {
CfgProductionRule rule2 = newRules.get(j);
if (rule2.getRhs().length > 0 && !"ε".equals(rule2.getRhs()[0])) {
if (rule2.getRhs().length > 0 && !EPSILON.equals(rule2.getRhs()[0])) {
for (int k = 1; k < rule2.getRhs().length; k++) {
if (lc.equals(rule2.getRhs()[k])) {
if (newNt == null) {
Expand All @@ -73,7 +76,7 @@ private static void duplicateRules(List<CfgProductionRule> newRules,
CfgProductionRule rule3 = newRules.get(l);
if (lc.equals(rule3.getLhs())) {
newRules.add(new CfgProductionRule(
newNt + " -> " + rule3.toString().split("->")[1]));
newNt + " " + ARROW_RIGHT + " " + rule3.toString().split(ARROW_RIGHT)[1]));
}
}
}
Expand Down
Loading

0 comments on commit 17bab7b

Please sign in to comment.