Skip to content

Commit

Permalink
add uniffle
Browse files Browse the repository at this point in the history
  • Loading branch information
summaryzb committed Jan 15, 2024
1 parent 6a5c64c commit 97b23b1
Show file tree
Hide file tree
Showing 13 changed files with 624 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ class CHSparkPlanExecApi extends SparkPlanExecApi {
val constructor =
clazz.getConstructor(classOf[SQLMetric], classOf[SQLMetric], classOf[SQLMetric])
constructor.newInstance(readBatchNumRows, numOutputRows, dataSize).asInstanceOf[Serializer]
} else if (GlutenConfig.getConf.isUseUniffleShuffleManager) {
throw new UnsupportedOperationException("temporarily uniffle not support ch ")
} else {
new CHColumnarBatchSerializer(readBatchNumRows, numOutputRows, dataSize)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,8 @@ object BackendSettings extends BackendSettingsApi {

override def supportColumnarShuffleExec(): Boolean = {
GlutenConfig.getConf.isUseColumnarShuffleManager ||
GlutenConfig.getConf.isUseCelebornShuffleManager
GlutenConfig.getConf.isUseCelebornShuffleManager ||
GlutenConfig.getConf.isUseUniffleShuffleManager
}

override def enableJoinKeysRewrite(): Boolean = false
Expand Down
23 changes: 23 additions & 0 deletions cpp/core/jni/JniWrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,29 @@ JNIEXPORT jlong JNICALL Java_io_glutenproject_vectorized_ShuffleWriterJniWrapper
std::move(partitionWriterOptions),
memoryManager->getArrowMemoryPool(),
std::move(celebornClient));
partitionWriterCreator = std::make_shared<CelebornPartitionWriterCreator>(std::move(celebornClient));
} else if (partitionWriterType == "uniffle") {
shuffleWriterOptions.partition_writer_type = PartitionWriterType::kUniffle;
jclass unifflePartitionPusherClass =
createGlobalClassReferenceOrError(env, "Lorg/apache/spark/shuffle/writer/PartitionPusher;");
jmethodID unifflePushPartitionDataMethod =
getMethodIdOrError(env, unifflePartitionPusherClass, "pushPartitionData", "(I[B)I");
if (pushBufferMaxSize > 0) {
shuffleWriterOptions.push_buffer_max_size = pushBufferMaxSize;
}
JavaVM* vm;
if (env->GetJavaVM(&vm) != JNI_OK) {
throw gluten::GlutenException("Unable to get JavaVM instance");
}
// rename CelebornClient RssClient
std::shared_ptr<CelebornClient> celebornClient =
std::make_shared<CelebornClient>(vm, partitionPusher, unifflePushPartitionDataMethod);
partitionWriter = std::make_unique<CelebornPartitionWriter>(
numPartitions,
std::move(partitionWriterOptions),
memoryManager->getArrowMemoryPool(),
std::move(celebornClient));
partitionWriterCreator = std::make_shared<CelebornPartitionWriterCreator>(std::move(celebornClient));
} else {
throw gluten::GlutenException("Unrecognizable partition writer type: " + partitionWriterType);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/core/shuffle/Options.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ static constexpr double kDefaultBufferReallocThreshold = 0.25;
static constexpr double kDefaultMergeBufferThreshold = 0.25;
static constexpr bool kEnableBufferedWrite = true;

enum PartitionWriterType { kLocal, kCeleborn };
enum PartitionWriterType { kLocal, kCeleborn, kUniffle };

struct ShuffleReaderOptions {
arrow::Compression::type compressionType = arrow::Compression::type::LZ4_FRAME;
Expand Down
29 changes: 29 additions & 0 deletions gluten-uniffle/package/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>gluten-uniffle</artifactId>
<groupId>io.glutenproject</groupId>
<version>1.2.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>gluten-uniffle-package</artifactId>
<packaging>jar</packaging>
<name>Gluten Uniffle Package</name>

<profiles>
<profile>
<id>backends-velox</id>
<dependencies>
<dependency>
<groupId>io.glutenproject</groupId>
<artifactId>gluten-uniffle-velox</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</profile>
</profiles>
</project>
89 changes: 89 additions & 0 deletions gluten-uniffle/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>gluten-parent</artifactId>
<groupId>io.glutenproject</groupId>
<version>1.2.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>gluten-uniffle</artifactId>
<packaging>pom</packaging>
<name>Gluten Uniffle</name>

<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<scope>provided</scope>
<exclusions>
<exclusion>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
</exclusion>
<exclusion>
<groupId>com.sun.jersey</groupId>
<artifactId>jersey-json</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>io.glutenproject</groupId>
<artifactId>gluten-core</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.uniffle</groupId>
<artifactId>rss-client-spark${spark.major.version}-shaded</artifactId>
<version>${uniffle.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
</plugin>
<plugin>
<groupId>com.diffplug.spotless</groupId>
<artifactId>spotless-maven-plugin</artifactId>
</plugin>
</plugins>
</build>
<profiles>
<profile>
<id>backends-velox</id>
<properties>
</properties>
<modules>
<module>velox</module>
<module>package</module>
</modules>
</profile>
</profiles>
</project>
62 changes: 62 additions & 0 deletions gluten-uniffle/velox/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>gluten-uniffle</artifactId>
<groupId>io.glutenproject</groupId>
<version>1.2.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>

<artifactId>gluten-uniffle-velox</artifactId>
<packaging>jar</packaging>
<name>Gluten Uniffle Velox</name>

<dependencies>
<dependency>
<groupId>io.glutenproject</groupId>
<artifactId>backends-velox</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>io.glutenproject</groupId>
<artifactId>gluten-data</artifactId>
<version>${project.version}</version>
<scope>provided</scope>
</dependency>
</dependencies>

<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
<plugins>
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.scalastyle</groupId>
<artifactId>scalastyle-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-checkstyle-plugin</artifactId>
</plugin>
<plugin>
<groupId>com.diffplug.spotless</groupId>
<artifactId>spotless-maven-plugin</artifactId>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
</plugin>
</plugins>
</build>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.shuffle.gluten.uniffle;

import org.apache.spark.ShuffleDependency;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkEnv;
import org.apache.spark.TaskContext;
import org.apache.spark.executor.ShuffleWriteMetrics;
import org.apache.spark.shuffle.ColumnarShuffleDependency;
import org.apache.spark.shuffle.RssShuffleHandle;
import org.apache.spark.shuffle.RssShuffleManager;
import org.apache.spark.shuffle.RssSparkConfig;
import org.apache.spark.shuffle.ShuffleHandle;
import org.apache.spark.shuffle.ShuffleWriteMetricsReporter;
import org.apache.spark.shuffle.ShuffleWriter;
import org.apache.spark.shuffle.sort.ColumnarShuffleManager;
import org.apache.spark.shuffle.writer.VeloxUniffleColumnarShuffleWriter;
import org.apache.uniffle.common.exception.RssException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.lang.reflect.Constructor;

public class GlutenRssShuffleManager extends RssShuffleManager {
private static final Logger LOG = LoggerFactory.getLogger(GlutenRssShuffleManager.class);
private static final String GLUTEN_SHUFFLE_MANAGER_NAME =
"org.apache.spark.shuffle.sort.ColumnarShuffleManager";

private static final String VANILLA_UNIFFLE_SHUFFLE_MANAGER_NAME =
"org.apache.spark.shuffle.celeborn.SparkShuffleManager";
private volatile ColumnarShuffleManager _columnarShuffleManager;
private volatile RssShuffleManager _vanillaUniffleShuffleManager;

private ColumnarShuffleManager columnarShuffleManager() {
if (_columnarShuffleManager == null) {
synchronized (this) {
if (_columnarShuffleManager == null) {
_columnarShuffleManager =
initShuffleManager(GLUTEN_SHUFFLE_MANAGER_NAME, sparkConf, isDriver());
}
}
}
return _columnarShuffleManager;
}

private RssShuffleManager vanillaUniffleShuffleManager() {
if (_vanillaUniffleShuffleManager == null) {
synchronized (this) {
if (_vanillaUniffleShuffleManager == null) {
initShuffleManager(VANILLA_UNIFFLE_SHUFFLE_MANAGER_NAME, sparkConf, isDriver());
}
}
}
return _vanillaUniffleShuffleManager;
}

private boolean isDriver() {
return "driver".equals(SparkEnv.get().executorId());
}

private ColumnarShuffleManager initShuffleManager(String name, SparkConf conf, boolean isDriver) {
Constructor constructor;
ColumnarShuffleManager instance;
try {
Class klass = Class.forName(name);
try {
constructor = klass.getConstructor(conf.getClass(), Boolean.TYPE);
instance = (ColumnarShuffleManager) constructor.newInstance(conf, isDriver);
} catch (NoSuchMethodException var7) {
constructor = klass.getConstructor(conf.getClass());
instance = (ColumnarShuffleManager) constructor.newInstance(conf);
}
} catch (Exception e) {
throw new RuntimeException("initColumnManager fail");
}
return instance;
}

public GlutenRssShuffleManager(SparkConf conf, boolean isDriver) {
super(conf, isDriver);
// TODO conf set some config
}

@Override
public <K, V, C> ShuffleHandle registerShuffle(
int shuffleId, ShuffleDependency<K, V, C> dependency) {
return super.registerShuffle(shuffleId, dependency);
}

@Override
public <K, V> ShuffleWriter<K, V> getWriter(
ShuffleHandle handle, long mapId, TaskContext context, ShuffleWriteMetricsReporter metrics) {
if (!(handle instanceof RssShuffleHandle)) {
throw new RssException("Unexpected ShuffleHandle:" + handle.getClass().getName());
}
sparkConf.setIfMissing(
RssSparkConfig.SPARK_RSS_CONFIG_PREFIX + RssSparkConfig.RSS_ROW_BASED, "false");
RssShuffleHandle<K, V, V> rssHandle = (RssShuffleHandle<K, V, V>) handle;
if (rssHandle.getDependency() instanceof ColumnarShuffleDependency) {
setPusherAppId(rssHandle);
String taskId = "" + context.taskAttemptId() + "_" + context.attemptNumber();
ShuffleWriteMetrics writeMetrics;
if (metrics != null) {
writeMetrics = new WriteMetrics(metrics);
} else {
writeMetrics = context.taskMetrics().shuffleWriteMetrics();
}
return new VeloxUniffleColumnarShuffleWriter<>(
rssHandle.getAppId(),
rssHandle.getShuffleId(),
taskId,
context.taskAttemptId(),
writeMetrics,
this,
sparkConf,
shuffleWriteClient,
rssHandle,
this::markFailedTask,
context);
} else {
return super.getWriter(handle, mapId, context, metrics);
}
}
}
Loading

0 comments on commit 97b23b1

Please sign in to comment.