Skip to content

Commit

Permalink
[VL] Add uniffle integration
Browse files Browse the repository at this point in the history
  • Loading branch information
summaryzb committed Apr 3, 2024
1 parent 68d7526 commit 4a393ee
Show file tree
Hide file tree
Showing 18 changed files with 755 additions and 4 deletions.
61 changes: 61 additions & 0 deletions .github/workflows/velox_docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ on:
- 'gluten-celeborn/common'
- 'gluten-celeborn/package'
- 'gluten-celeborn/velox'
- 'gluten-uniffle/**'
- 'gluten-core/**'
- 'gluten-data/**'
- 'gluten-delta/**'
Expand Down Expand Up @@ -275,6 +276,66 @@ jobs:
# -d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \
# -d=OFFHEAP_SIZE:1g,spark.memory.offHeap.size=1g || true

run-tpc-test-centos8-uniffle:
needs: build-native-lib
strategy:
fail-fast: false
matrix:
spark: ["spark-3.2"]
runs-on: ubuntu-20.04
container: centos:8
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v2
with:
name: velox-native-lib-${{github.sha}}
path: ./cpp/build/releases
- name: Update mirror list
run: |
sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true
sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true
- name: Setup java and maven
run: |
yum update -y && yum install -y java-1.8.0-openjdk-devel wget git
wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
tar -xvf apache-maven-3.8.8-bin.tar.gz
mv apache-maven-3.8.8 /usr/lib/maven
- name: Build for Spark ${{ matrix.spark }}
run: |
cd $GITHUB_WORKSPACE/ && \
export MAVEN_HOME=/usr/lib/maven && \
export PATH=${PATH}:${MAVEN_HOME}/bin && \
mvn clean install -P${{ matrix.spark }} -Pbackends-velox -Prss-uniffle -DskipTests
- name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 with uniffle 0.8.0
run: |
export MAVEN_HOME=/usr/lib/maven && \
export PATH=${PATH}:${MAVEN_HOME}/bin && \
export export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk && \
cd /opt && \
git clone -b branch-0.8 https://github.com/apache/incubator-uniffle.git && \
cd incubator-uniffle && \
sed -i '250d' ./server/src/main/java/org/apache/uniffle/server/ShuffleTaskManager.java && \
sed -i '228d' ./server/src/main/java/org/apache/uniffle/server/ShuffleTaskManager.java && \
sed -i '226d' ./server/src/main/java/org/apache/uniffle/server/ShuffleTaskManager.java && \
mvn clean install -Phadoop2.8 -DskipTests
cd /opt && \
wget -nv https://archive.apache.org/dist/incubator/uniffle/0.8.0/apache-uniffle-0.8.0-incubating-bin.tar.gz && \
tar xzf apache-uniffle-0.8.0-incubating-bin.tar.gz -C /opt/ && mv /opt/rss-0.8.0-hadoop2.8 /opt/uniffle && \
wget -nv https://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz && \
tar xzf hadoop-2.8.5.tar.gz -C /opt/
rm -f /opt/uniffle/jars/server/shuffle-server-0.8.0-SNAPSHOT.jar
cp /opt/incubator-uniffle/server/target/shuffle-server-0.8.1-SNAPSHOT.jar /opt/uniffle/jars/server/
rm -rf /opt/incubator-uniffle
cd /opt/uniffle && mkdir shuffle_data && \
echo -e "XMX_SIZE=16g\nHADOOP_HOME=/opt/hadoop-2.8.5" > ./bin/rss-env.sh && \
echo -e "rss.coordinator.shuffle.nodes.max 1\nrss.rpc.server.port 19999" > ./conf/coordinator.conf && \
echo -e "rss.server.app.expired.withoutHeartbeat 7200000\nrss.server.heartbeat.delay 3000\nrss.rpc.server.port 19997\nrss.jetty.http.port 19996\nrss.server.netty.port 19995\nrss.storage.basePath /opt/uniffle/shuffle_data\nrss.storage.type MEMORY_LOCALFILE\nrss.coordinator.quorum localhost:19999\nrss.server.flush.thread.alive 10\nrss.server.single.buffer.flush.threshold 64m" > ./conf/server.conf && \
bash ./bin/start-coordinator.sh && bash ./bin/start-shuffle-server.sh
cd $GITHUB_WORKSPACE/tools/gluten-it && mvn clean install -Pspark-3.2,rss-uniffle && \
GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \
--local --preset=velox-with-uniffle --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1
run-spark-test-spark32:
runs-on: ubuntu-20.04
container: ghcr.io/facebookincubator/velox-dev:circleci-avx
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,8 @@ class CHSparkPlanExecApi extends SparkPlanExecApi {
val constructor =
clazz.getConstructor(classOf[SQLMetric], classOf[SQLMetric], classOf[SQLMetric])
constructor.newInstance(readBatchNumRows, numOutputRows, dataSize).asInstanceOf[Serializer]
} else if (GlutenConfig.getConf.isUseUniffleShuffleManager) {
throw new UnsupportedOperationException("temporarily uniffle not support ch ")
} else {
new CHColumnarBatchSerializer(readBatchNumRows, numOutputRows, dataSize)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -349,7 +349,8 @@ object BackendSettings extends BackendSettingsApi {

override def supportColumnarShuffleExec(): Boolean = {
GlutenConfig.getConf.isUseColumnarShuffleManager ||
GlutenConfig.getConf.isUseCelebornShuffleManager
GlutenConfig.getConf.isUseCelebornShuffleManager ||
GlutenConfig.getConf.isUseUniffleShuffleManager
}

override def enableJoinKeysRewrite(): Boolean = false
Expand Down
17 changes: 17 additions & 0 deletions cpp/core/jni/JniWrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -915,6 +915,23 @@ JNIEXPORT jlong JNICALL Java_org_apache_gluten_vectorized_ShuffleWriterJniWrappe
std::move(partitionWriterOptions),
memoryManager->getArrowMemoryPool(),
std::move(celebornClient));
} else if (partitionWriterType == "uniffle") {
jclass unifflePartitionPusherClass =
createGlobalClassReferenceOrError(env, "Lorg/apache/spark/shuffle/writer/PartitionPusher;");
jmethodID unifflePushPartitionDataMethod =
getMethodIdOrError(env, unifflePartitionPusherClass, "pushPartitionData", "(I[BI)I");
JavaVM* vm;
if (env->GetJavaVM(&vm) != JNI_OK) {
throw gluten::GlutenException("Unable to get JavaVM instance");
}
// rename CelebornClient RssClient
std::shared_ptr<CelebornClient> uniffleClient =
std::make_shared<CelebornClient>(vm, partitionPusher, unifflePushPartitionDataMethod);
partitionWriter = std::make_unique<CelebornPartitionWriter>(
numPartitions,
std::move(partitionWriterOptions),
memoryManager->getArrowMemoryPool(),
std::move(uniffleClient));
} else {
throw gluten::GlutenException("Unrecognizable partition writer type: " + partitionWriterType);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/core/shuffle/Options.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ static constexpr double kDefaultBufferReallocThreshold = 0.25;
static constexpr double kDefaultMergeBufferThreshold = 0.25;
static constexpr bool kEnableBufferedWrite = true;

enum PartitionWriterType { kLocal, kCeleborn };
enum PartitionWriterType { kLocal, kCeleborn, kUniffle };

struct ShuffleReaderOptions {
arrow::Compression::type compressionType = arrow::Compression::type::LZ4_FRAME;
Expand Down
29 changes: 29 additions & 0 deletions gluten-uniffle/package/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>gluten-uniffle</artifactId>
<groupId>org.apache.gluten</groupId>
<version>1.2.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>gluten-uniffle-package</artifactId>
<packaging>jar</packaging>
<name>Gluten Uniffle Package</name>

<profiles>
<profile>
<id>backends-velox</id>
<dependencies>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-uniffle-velox</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</profile>
</profiles>
</project>
89 changes: 89 additions & 0 deletions gluten-uniffle/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>gluten-parent</artifactId>
<groupId>org.apache.gluten</groupId>
<version>1.2.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>gluten-uniffle</artifactId>
<packaging>pom</packaging>
<name>Gluten Uniffle</name>

<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-json</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-core</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.uniffle</groupId>
<artifactId>rss-client-spark${spark.major.version}-shaded</artifactId>
<version>${uniffle.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
</plugin>
<plugin>
<groupId>com.diffplug.spotless</groupId>
<artifactId>spotless-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>backends-velox</id>
<properties>
</properties>
<modules>
<module>velox</module>
<module>package</module>
</modules>
</profile>
</profiles>
</project>
62 changes: 62 additions & 0 deletions gluten-uniffle/velox/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>gluten-uniffle</artifactId>
<groupId>org.apache.gluten</groupId>
<version>1.2.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>gluten-uniffle-velox</artifactId>
<packaging>jar</packaging>
<name>Gluten Uniffle Velox</name>

<dependencies>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>backends-velox</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.gluten</groupId>
<artifactId>gluten-data</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>

<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
</plugin>
<plugin>
<groupId>com.diffplug.spotless</groupId>
<artifactId>spotless-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
Loading

0 comments on commit 4a393ee

Please sign in to comment.