Skip to content

Commit

Permalink
Merge branch 'master' into pydatagen
Browse files Browse the repository at this point in the history
  • Loading branch information
psybers committed Aug 15, 2021
2 parents e9a935c + dcc749e commit b0e3e5a
Show file tree
Hide file tree
Showing 12 changed files with 117 additions and 61 deletions.
2 changes: 2 additions & 0 deletions src/java/boa/compiler/BoaCompiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,9 @@ public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int

try {
if (!parserErrorListener.hasError) {
TypeCheckingVisitor.warn = true;
TypeCheckingVisitor.instance.start(p, new SymbolTable());
TypeCheckingVisitor.warn = false;

final TaskClassifyingVisitor simpleVisitor = new TaskClassifyingVisitor();
simpleVisitor.start(p);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,10 @@ private void updateVisitClause(final boolean isBefore, final SymbolTable env, fi
if (visitMap.containsKey(typeToFind)) {
vs = visitMap.get(typeToFind);

vs.getBody().getStatements().add(0, generatePushExpStatement(b, token, vs.getComponent().getIdentifier().getToken(), e));
if (isBefore)
vs.getBody().getStatements().add(0, generatePushExpStatement(b, token, vs.getComponent().getIdentifier().getToken(), e));
else
vs.getBody().getStatements().add(0, generatePopExpStatement(b, token, e));
} else {
// 2) Otherwise, add a 'before T' clause with a 's_t_#.push(node)'
final Block blk;
Expand All @@ -236,7 +239,10 @@ private void updateVisitClause(final boolean isBefore, final SymbolTable env, fi
else
blk = new Block();

blk.getStatements().add(0, generatePushExpStatement(b, token, "_n", e));
if (isBefore)
blk.getStatements().add(0, generatePushExpStatement(b, token, "_n", e));
else
blk.getStatements().add(0, generatePopExpStatement(b, token, e));

vs = new VisitStatement(isBefore, new Component(ASTFactory.createIdentifier("_n", env), ASTFactory.createIdentifier(typeToFind, env)), blk);
TypeCheckingVisitor.instance.start(vs, e.env);
Expand Down
21 changes: 16 additions & 5 deletions src/java/boa/compiler/visitors/TypeCheckingVisitor.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
/*
* Copyright 2017, Anthony Urso, Hridesh Rajan, Robert Dyer,
* Copyright 2017-2021, Anthony Urso, Hridesh Rajan, Robert Dyer,
* Iowa State University of Science and Technology
* and Bowling Green State University
* Bowling Green State University
* and University of Nebraska Board of Regents
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -44,6 +45,7 @@ public class TypeCheckingVisitor extends AbstractVisitorNoReturn<SymbolTable> {
BoaType lastRetType;

public static final TypeCheckingVisitor instance = new TypeCheckingVisitor();
public static boolean warn = true;

/**
* This verifies visitors have at most 1 before/after for a type.
Expand Down Expand Up @@ -410,13 +412,18 @@ public void visit(final Factor n, final SymbolTable env) {
warn(node, "directly indexing maps can lead to runtime crashes - replace with lookup(" + n.getOperand() + ", " + new PrettyPrintVisitor().startAndReturn(((Index)node).getStart()) + ", <defaultValue>)");
type = ((BoaMap) type).getType();
} else {
if (n.getOperand().type instanceof BoaTable)
throw new TypeCheckException(n.getOperand(), "can not assign to output variable '" + n.getOperand() + "' - did you mean to use <<?");
throw new TypeCheckException(node, "type '" + type + "' does not allow index operations");
}
} else {
node.accept(this, env);
n.getOperand().env = env;

boolean oldwarn = warn;
warn = false;
final List<BoaType> formalParameters = this.check((Call) node, env);
warn = oldwarn;

try {
type = env.getFunction(((Identifier)n.getOperand()).getToken(), formalParameters).erase(formalParameters);
Expand Down Expand Up @@ -566,16 +573,19 @@ public void visit(final AssignmentStatement n, final SymbolTable env) {
n.env = env;

try {
n.env.setIsLhs(true);
n.env.setIsLhs(true);
n.getLhs().accept(this, env);
n.env.setIsLhs(false);
n.env.setIsLhs(false);
} catch (final TypeCheckException e) {
if (!e.getMessage().startsWith("expected a call to function"))
throw e;
}

n.getRhs().accept(this, env);

if (n.getLhs().type instanceof BoaTable)
throw new TypeCheckException(n.getLhs(), "can not assign to output variable '" + n.getLhs().getOperand() + "' - did you mean to use <<?");

if (!(n.getLhs().type instanceof BoaArray && n.getRhs().type instanceof BoaTuple))
if (!n.getLhs().type.assigns(n.getRhs().type))
throw new TypeCheckException(n.getRhs(), "incompatible types for assignment: required '" + n.getLhs().type + "', found '" + n.getRhs().type + "'");
Expand Down Expand Up @@ -1566,6 +1576,7 @@ protected BoaType checkPairs(final List<Pair> pl, final SymbolTable env) {
}

protected void warn(final Node node, final String msg) {
System.err.println("WARNING at line " + node.beginLine + ", columns " + node.beginColumn + "-" + node.endColumn + ": " + msg);
if (warn)
System.err.println("WARNING at line " + node.beginLine + ", columns " + node.beginColumn + "-" + node.endColumn + ": " + msg);
}
}
72 changes: 41 additions & 31 deletions src/java/boa/datagen/BoaGenerator.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/*
* Copyright 2015, Hridesh Rajan, Robert Dyer,
* and Iowa State University of Science and Technology
* Copyright 2015-2021, Hridesh Rajan, Robert Dyer,
* Iowa State University of Science and Technology
* and University of Nebraska Board of Regents
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -32,7 +33,7 @@
* The main entry point for Boa tools for generating datasets.
*
* @author hridesh
*
* @author rdyer
*/
public class BoaGenerator {
private static boolean jsonAvailable = true;
Expand All @@ -57,37 +58,41 @@ public static void main(final String[] args) throws IOException {
if (outDirectory.exists())
org.apache.commons.io.FileUtils.deleteQuietly(outDirectory);

/*
* 1. if user provides local json files
* 2. if user provides username and password
* in both the cases json files are going to be available
*/

if (jsonAvailable) {
try {
SeqRepoImporter.main(new String[0]);
} catch (InterruptedException e) {
e.printStackTrace();
}
SeqCombiner.main(new String[0]);
} else if (tokenAvailable) { // when user provides local repo and doesn't have json files
MetaDataMaster mdm = new MetaDataMaster();
mdm.downloadRepoNames(DefaultProperties.TOKEN, DefaultProperties.OUTPUT);

if (cl.hasOption("recover")) {
SeqCombiner.main(new String[0]);
} else { // when user provides local repo and does not have json files
File output = new File(DefaultProperties.OUTPUT);
if (!output.exists())
output.mkdirs();
LocalGitSequenceGenerator.localGitSequenceGenerate(DefaultProperties.GH_GIT_PATH, DefaultProperties.OUTPUT);
try {
MapFileGen.main(new String[0]);
} catch (Exception e) {
e.printStackTrace();
} else {
/*
* 1. if user provides local json files
* 2. if user provides username and password
* in both the cases json files are going to be available
*/

if (jsonAvailable) {
try {
SeqRepoImporter.main(new String[0]);
} catch (InterruptedException e) {
e.printStackTrace();
}
SeqCombiner.main(new String[0]);
} else if (tokenAvailable) { // when user provides local repo and doesn't have json files
MetaDataMaster mdm = new MetaDataMaster();
mdm.downloadRepoNames(DefaultProperties.TOKEN, DefaultProperties.OUTPUT);

SeqCombiner.main(new String[0]);
} else { // when user provides local repo and does not have json files
File output = new File(DefaultProperties.OUTPUT);
if (!output.exists())
output.mkdirs();
LocalGitSequenceGenerator.localGitSequenceGenerate(DefaultProperties.GH_GIT_PATH, DefaultProperties.OUTPUT);
try {
MapFileGen.main(new String[0]);
} catch (Exception e) {
e.printStackTrace();
}
}
}

clear();
clear();
}
}

private static final void printHelp(Options options, String message) {
Expand Down Expand Up @@ -118,6 +123,8 @@ private static void addOptions(Options options) {
options.addOption("targetUser", true, "username of target repository");
options.addOption("targetRepo", true, "name of the target repository");
options.addOption("cache", false, "enable if you want to use already cloned repositories");
options.addOption("skip", true, "skip every Nth project (useful for sampling)");
options.addOption("recover", false, "enable to recover partially built dataset - this will only combine generated data");
options.addOption("debug", false, "enable for debug mode");
options.addOption("debugparse", false, "enable for debug mode when parsing source files");
options.addOption("help", false, "shows this help");
Expand Down Expand Up @@ -193,6 +200,9 @@ private static void handleCmdOptions(CommandLine cl, Options options, final Stri
if (cl.hasOption("cache")) {
DefaultProperties.CACHE = true;
}
if (cl.hasOption("skip")) {
DefaultProperties.SKIPS = cl.getOptionValue("skip");
}
if (cl.hasOption("libs")) {
DefaultProperties.CLASSPATH_ROOT = cl.getOptionValue("libs");
}
Expand Down
11 changes: 6 additions & 5 deletions src/java/boa/datagen/DefaultProperties.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ public class DefaultProperties {
public static String TOTAL_MAX_PROJECTS = "" + Long.MAX_VALUE;
public static String MAX_COMMITS = "10000";
public static String MAX_SIZE_FOR_PROJECT_WITH_COMMITS = String.valueOf(1 << 26); // Integer.MAX_VALUE / 3

public static String SKIPS = "1";

public static boolean DEBUG = false;
public static boolean DEBUGPARSE = false;
public static boolean CACHE = false;
Expand Down Expand Up @@ -62,22 +63,22 @@ public class DefaultProperties {
public static final String SF_JSON_CACHE_PATH = "json_cache";
public static final String SF_SVN_PATH = "svn";
public static final String SF_TICKETS_PATH = "tickets";

// GitHub paths
public static String GH_JSON_PATH = "repos-metadata-Boa-upto1213";
public static String GH_GIT_PATH = "";
public static String GH_ISSUE_PATH = "";
public static final String GH_TICKETS_PATH = "tickets";
public static String TOKEN = null;

public static String CLASSPATH_ROOT = getClasspathRoot();
public static String OUTPUT = "output";

public static boolean STORE_ASCII_PRINTABLE_CONTENTS = true;
public static boolean STORE_COMMITS = true;

public static String localDataPath = null;

@SuppressWarnings("unused")
private static String getRoot() {
File dir = new File(System.getProperty("user.dir"));
Expand Down
40 changes: 24 additions & 16 deletions src/java/boa/datagen/SeqRepoImporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
public class SeqRepoImporter {
private final static boolean debug = Properties.getBoolean("debug", DefaultProperties.DEBUG);
private final static boolean cache = Properties.getBoolean("cache", DefaultProperties.CACHE);
private final static long skips = Long.parseLong(Properties.getProperty("skip", DefaultProperties.SKIPS)) + 1;

private final static File gitRootPath = new File(Properties.getProperty("gh.svn.path", DefaultProperties.GH_GIT_PATH));
final static String jsonPath = Properties.getProperty("gh.json.path", DefaultProperties.GH_JSON_PATH);
Expand Down Expand Up @@ -130,25 +131,30 @@ static void processJSON(final File file) {
try {
final JsonObject rp = repoArray.get(i).getAsJsonObject();
final RepoMetadata repo = new RepoMetadata(rp);
if (repo.id != null && repo.name != null && !processedProjectIds.contains(repo.id)) {
final Project project = repo.toBoaMetaDataProtobuf(); // current project instance only contains metadata

// System.out.println(jRepo.toString());
boolean assigned = false;
while (!getDone() && !assigned) {
for (int j = 0; !getDone() && j < POOL_SIZE; j++) {
if (workers[j].isReady() && !workers[j].isAssigned()) {
workers[j].setProject(project);
workers[j].setAssigned(true);
assigned = true;
break;
if (counter % skips == 0) {
if (repo.id != null && repo.name != null && !processedProjectIds.contains(repo.id)) {
final Project project = repo.toBoaMetaDataProtobuf(); // current project instance only contains metadata

// System.out.println(jRepo.toString());
boolean assigned = false;
while (!getDone() && !assigned) {
for (int j = 0; !getDone() && j < POOL_SIZE; j++) {
if (workers[j].isReady() && !workers[j].isAssigned()) {
workers[j].setProject(project);
workers[j].setAssigned(true);
assigned = true;
break;
}
}
// Thread.sleep(100);
}
// Thread.sleep(100);
if (assigned)
System.out.println("Assigned the " + (++counter) + "th project: " + repo.name + " with id: " + repo.id
+ " from the " + i + "th object of the json file: " + file.getPath());
}
if (assigned)
System.out.println("Assigned the " + (++counter) + "th project: " + repo.name + " with id: " + repo.id
+ " from the " + i + "th object of the json file: " + file.getPath());
} else {
System.out.println("Skipped the " + (++counter) + "th project: " + repo.name + " with id: " + repo.id
+ " from the " + i + "th object of the json file: " + file.getPath());
}
} catch (final Exception e) {
System.err.println("Error proccessing item " + i + " of page " + file.getPath());
Expand Down Expand Up @@ -368,6 +374,8 @@ private synchronized Project storeRepository(final Project project, final int i)

// clone repository
if (!gitDir.exists()) {
if (cache)
return null; // return null to skip non-cached project
final String[] args = { repo.getUrl(), gitDir.getAbsolutePath() };
try {
RepositoryCloner.clone(args);
Expand Down
1 change: 1 addition & 0 deletions src/java/boa/runtime/BoaRunner.java
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ public Job job(final Path[] ins, final Path out) throws IOException {
static {
options.addOption("p", "profile", false, "if true, profiles the execution of 1 map task");
options.addOption("b", "block", false, "if true, wait for job to finish and show status");
options.addOption("t", "time", false, "if true, dump debug timings for each project");
options.addOption(OptionBuilder.withLongOpt("job")
.withDescription("sets the MySql ID to update with this job's status")
.hasArg()
Expand Down
1 change: 1 addition & 0 deletions src/test/boa/test/compiler/BaseTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ protected StartContext typecheck(final String input, final String error) throws
final StartContext ctx = parse(input);

try {
TypeCheckingVisitor.warn = false;
TypeCheckingVisitor.instance.start(ctx.ast, new SymbolTable());
if (error != null)
fail("expected error: " + error);
Expand Down
2 changes: 1 addition & 1 deletion src/test/boa/test/compiler/TestTraversalBad.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,6 @@ public void traversalWithStop() throws IOException {

@Test
public void traversalWithNoReturn() throws IOException {
codegen(load(badDir + "traverse-with-no-return-statement.boa"), "Error on line 137: missing return statement");
codegen(load(badDir + "traverse-with-no-return-statement.boa"), "Error on line 142: missing return statement");
}
}
5 changes: 5 additions & 0 deletions src/test/boa/test/compiler/TestTypecheckBad.java
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,9 @@ public void aggregatorInEmit() throws IOException {
public void stopInAfter() throws IOException {
typecheck(load(badDir + "stop-in-after.boa"), "Stop statement not allowed inside 'after' visits");
}

@Test
public void assignOutput() throws IOException {
typecheck(load(badDir + "assignoutput.boa"), "can not assign to output variable 'o' - did you mean to use <<?");
}
}
10 changes: 9 additions & 1 deletion templates/BoaJavaHadoop.stg
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ public class <name> extends boa.runtime.BoaRunner {
// pass any arguments to map/reduce classes via configuration
if (line.hasOption("excludelist"))
configuration.setStrings("boa.exclude.projects", line.getOptionValue("excludelist"));
if (line.hasOption("time"))
configuration.setBoolean("boa.debug.timings", true);

jb.submit();

Expand Down Expand Up @@ -135,10 +137,13 @@ public class <name> extends boa.runtime.BoaRunner {
@Override
protected void map(final org.apache.hadoop.io.Text key, final org.apache.hadoop.io.BytesWritable value, final org.apache.hadoop.mapreduce.Mapper\<org.apache.hadoop.io.Text, org.apache.hadoop.io.BytesWritable, boa.io.EmitKey, boa.io.EmitValue>.Context context) throws java.io.IOException {
if (excludeProject(key.toString())) {
LOG.error("EXCLUDED PROJECT: " + key.toString());
LOG.info("EXCLUDED PROJECT: " + key.toString());
return;
}

if (context.getConfiguration().getBoolean("boa.debug.timings", false))
LOG.info(key.toString());

try {
boa.functions.BoaMathIntrinsics.random = new java.util.Random(<seed> + key.hashCode());
boa.types.Toplevel.Project _input = boa.types.Toplevel.Project.parseFrom(com.google.protobuf.CodedInputStream.newInstance(value.getBytes(), 0, value.getLength()));
Expand All @@ -148,6 +153,9 @@ public class <name> extends boa.runtime.BoaRunner {
boa.io.BoaOutputCommitter.lastSeenEx = e;
throw new java.io.IOException("map failure for key '" + key.toString() + "'", e);
}

if (context.getConfiguration().getBoolean("boa.debug.timings", false))
LOG.info(key.toString());
}

/** {@inheritDoc} */
Expand Down
3 changes: 3 additions & 0 deletions test/typecheck/errors/assignoutput.boa
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
o: output sum[int][int] of int;

o[3][3] = 3;

0 comments on commit b0e3e5a

Please sign in to comment.