Skip to content

Commit

Permalink
GIRAPH-13: Port Giraph to YARN
Browse files Browse the repository at this point in the history
  • Loading branch information
ereisman committed Apr 3, 2013
1 parent 67f5f74 commit b2dff27
Show file tree
Hide file tree
Showing 20 changed files with 2,578 additions and 32 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
Giraph Change Log

Release 0.2.0 - unreleased
GIRAPH-13: Port Giraph to YARN (ereisman)

GIRAPH-600: Create an option to do output during computation (majakabiljo)

GIRAPH-599: Hive IO dependency issues with some Hadoop profiles (nitay via majakabiljo)
Expand Down
5 changes: 4 additions & 1 deletion checkstyle.xml
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,10 @@
<!-- Switch statements should be complete and with independent cases -->
<module name="FallThrough" />
<module name="MissingSwitchDefault" />
<module name="RedundantThrows"/>
<!-- For hadoop_yarn profile, some YARN exceptions aren't loading in checkstyle -->
<module name="RedundantThrows">
<property name="suppressLoadErrors" value="true" />
</module>
<module name="SimplifyBooleanExpression"/>
<module name="SimplifyBooleanReturn"/>
<!-- Only one statment per line allowed -->
Expand Down
159 changes: 158 additions & 1 deletion giraph-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,18 @@ under the License.
</activation>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/yarn/**</exclude>
</excludes>
<testExcludes>
<exclude>**/yarn/**</exclude>
</testExcludes>
</configuration>
</plugin>
<plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>munge-maven-plugin</artifactId>
Expand All @@ -147,6 +159,18 @@ under the License.
<id>hadoop_1.0</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/yarn/**</exclude>
</excludes>
<testExcludes>
<exclude>**/yarn/**</exclude>
</testExcludes>
</configuration>
</plugin>
<plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>munge-maven-plugin</artifactId>
Expand All @@ -159,6 +183,18 @@ under the License.
<id>hadoop_non_secure</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/yarn/**</exclude>
</excludes>
<testExcludes>
<exclude>**/yarn/**</exclude>
</testExcludes>
</configuration>
</plugin>
<plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>munge-maven-plugin</artifactId>
Expand All @@ -183,6 +219,18 @@ under the License.
<id>hadoop_facebook</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/yarn/**</exclude>
</excludes>
<testExcludes>
<exclude>**/yarn/**</exclude>
</testExcludes>
</configuration>
</plugin>
<plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>munge-maven-plugin</artifactId>
Expand Down Expand Up @@ -211,6 +259,18 @@ under the License.
<id>hadoop_0.23</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/yarn/**</exclude>
</excludes>
<testExcludes>
<exclude>**/yarn/**</exclude>
</testExcludes>
</configuration>
</plugin>
<plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>munge-maven-plugin</artifactId>
Expand All @@ -223,24 +283,121 @@ under the License.
</build>
</profile>

<!-- Currently supports hadoop-2.0.3-alpha
(see hadoop_yarn profile in giraph-parent POM to change) -->
<profile>
<id>hadoop_yarn</id>
<build>
<plugins>
<plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>munge-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
</profile>

<!-- Unmunged profiles are below. -->

<profile>
<id>hadoop_2.0.0</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/yarn/**</exclude>
</excludes>
<testExcludes>
<exclude>**/yarn/**</exclude>
</testExcludes>
</configuration>
</plugin>
</plugins>
</build>
</profile>

<profile>
<id>hadoop_2.0.1</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/yarn/**</exclude>
</excludes>
<testExcludes>
<exclude>**/yarn/**</exclude>
</testExcludes>
</configuration>
</plugin>
</plugins>
</build>

</profile>

<profile>
<id>hadoop_2.0.2</id>
<id>hadoop_2.0.2</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/yarn/**</exclude>
</excludes>
<testExcludes>
<exclude>**/yarn/**</exclude>
</testExcludes>
</configuration>
</plugin>
</plugins>
</build>
</profile>

<profile>
<id>hadoop_2.0.3</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/yarn/**</exclude>
</excludes>
<testExcludes>
<exclude>**/yarn/**</exclude>
</testExcludes>
</configuration>
</plugin>
</plugins>
</build>
</profile>

<profile>
<id>hadoop_trunk</id>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<excludes>
<exclude>**/yarn/**</exclude>
</excludes>
<testExcludes>
<exclude>**/yarn/**</exclude>
</testExcludes>
</configuration>
</plugin>
</plugins>
</build>
</profile>
</profiles>

Expand Down
15 changes: 14 additions & 1 deletion giraph-core/src/main/java/org/apache/giraph/GiraphRunner.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
import org.apache.giraph.utils.ConfigurationUtils;
import org.apache.giraph.conf.GiraphConfiguration;
import org.apache.giraph.job.GiraphJob;
/*if[PURE_YARN]
import org.apache.giraph.yarn.GiraphYarnClient;
end[PURE_YARN]*/
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
Expand Down Expand Up @@ -64,16 +67,26 @@ public void setConf(Configuration conf) {
* @return job run exit code
*/
public int run(String[] args) throws Exception {
if (null == getConf()) { // for YARN profile
conf = new Configuration();
}
GiraphConfiguration giraphConf = new GiraphConfiguration(getConf());
CommandLine cmd = ConfigurationUtils.parseArgs(giraphConf, args);
if (null == cmd) {
return 0; // user requested help/info printout, don't run a job.
}

// set up job for various platforms
final String vertexClassName = args[0];
GiraphJob job = new GiraphJob(giraphConf, "Giraph: " + vertexClassName);
final String jobName = "Giraph: " + vertexClassName;
/*if[PURE_YARN]
GiraphYarnClient job = new GiraphYarnClient(giraphConf, jobName);
else[PURE_YARN]*/
GiraphJob job = new GiraphJob(giraphConf, jobName);
prepareHadoopMRJob(job, cmd);
/*end[PURE_YARN]*/

// run the job, collect results
if (LOG.isDebugEnabled()) {
LOG.debug("Attempting to run Vertex: " + vertexClassName);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,16 @@ public static int getMaxTasks(Configuration conf) {
int maxWorkers = conf.getInt(GiraphConstants.MAX_WORKERS, 0);
boolean splitMasterWorker = GiraphConstants.SPLIT_MASTER_WORKER.get(conf);
int maxTasks = maxWorkers;
if (splitMasterWorker) {
// if this is a YARN job, separate ZK should already be running
boolean isYarnJob = GiraphConstants.IS_PURE_YARN_JOB.get(conf);
if (splitMasterWorker && !isYarnJob) {
int zkServers = GiraphConstants.ZOOKEEPER_SERVER_COUNT.get(conf);
maxTasks += zkServers;
}
if (LOG.isDebugEnabled()) {
LOG.debug("getMaxTasks: Max workers = " + maxWorkers +
", split master/worker = " + splitMasterWorker +
", is YARN-only job = " + isYarnJob +
", total max tasks = " + maxTasks);
}
return maxTasks;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,10 @@ public final void setZooKeeperConfiguration(String serverList) {
set(ZOOKEEPER_LIST, serverList);
}

/**
* Getter for SPLIT_MASTER_WORKER flag.
* @return boolean flag value.
*/
public final boolean getSplitMasterWorker() {
return SPLIT_MASTER_WORKER.get(this);
}
Expand Down Expand Up @@ -474,6 +478,50 @@ public int getTaskPartition() {
return getInt("mapred.task.partition", -1);
}

/**
* Is this a "pure YARN" Giraph job, or is a MapReduce layer (v1 or v2)
* actually managing our cluster nodes, i.e. each task is a Mapper.
* @return TRUE if this is a pure YARN job.
*/
public boolean isPureYarnJob() {
return IS_PURE_YARN_JOB.get(this);
}

/**
* Jars required in "Pure YARN" jobs (names only, no paths) should
* be listed here in full, including Giraph framework jar(s).
* @return the comma-separated list of jar names for export to cluster.
*/
public String getYarnLibJars() {
return GIRAPH_YARN_LIBJARS.get(this);
}

/**
* Populate jar list for Pure YARN jobs.
* @param jarList a comma-separated list of jar names
*/
public void setYarnLibJars(String jarList) {
GIRAPH_YARN_LIBJARS.set(this, jarList);
}

/**
* Get heap size (in MB) for each task in our Giraph job run,
* assuming this job will run on the "pure YARN" profile.
* @return the heap size for all tasks, in MB
*/
public int getYarnTaskHeapMb() {
return GIRAPH_YARN_TASK_HEAP_MB.get(this);
}

/**
* Set heap size for Giraph tasks in our job run, assuming
* the job will run on the "pure YARN" profile.
* @param heapMb the heap size for all tasks
*/
public void setYarnTaskHeapMb(int heapMb) {
GIRAPH_YARN_TASK_HEAP_MB.set(this, heapMb);
}

/**
* Get the ZooKeeper list.
*
Expand All @@ -496,10 +544,27 @@ public boolean useLogThreadLayout() {
return LOG_THREAD_LAYOUT.get(this);
}

/**
* is this job run a local test?
* @return the test status as recorded in the Configuration
*/
public boolean getLocalTestMode() {
return LOCAL_TEST_MODE.get(this);
}

/**
* Flag this job as a local test run.
* @param flag the test status for this job
*/
public void setLocalTestMode(boolean flag) {
LOCAL_TEST_MODE.set(this, flag);
}

/**
* The number of server tasks in our ZK quorum for
* this job run.
* @return the number of ZK servers in the quorum
*/
public int getZooKeeperServerCount() {
return ZOOKEEPER_SERVER_COUNT.get(this);
}
Expand Down
Loading

0 comments on commit b2dff27

Please sign in to comment.