diff --git a/.github/workflows/velox_docker.yml b/.github/workflows/velox_docker.yml
index 31796c15bdd5..d110d0a6d223 100644
--- a/.github/workflows/velox_docker.yml
+++ b/.github/workflows/velox_docker.yml
@@ -521,7 +521,7 @@ jobs:
fail-fast: false
matrix:
spark: ["spark-3.2"]
- celeborn: ["celeborn-0.4.0", "celeborn-0.3.2"]
+ celeborn: ["celeborn-0.4.1", "celeborn-0.3.2-incubating"]
runs-on: ubuntu-20.04
container: ubuntu:22.04
steps:
@@ -557,8 +557,8 @@ jobs:
fi
echo "EXTRA_PROFILE: ${EXTRA_PROFILE}"
cd /opt && mkdir -p celeborn && \
- wget https://archive.apache.org/dist/incubator/celeborn/${{ matrix.celeborn }}-incubating/apache-${{ matrix.celeborn }}-incubating-bin.tgz && \
- tar xzf apache-${{ matrix.celeborn }}-incubating-bin.tgz -C /opt/celeborn --strip-components=1 && cd celeborn && \
+ wget https://archive.apache.org/dist/celeborn/${{ matrix.celeborn }}/apache-${{ matrix.celeborn }}-bin.tgz && \
+ tar xzf apache-${{ matrix.celeborn }}-bin.tgz -C /opt/celeborn --strip-components=1 && cd celeborn && \
mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \
bash -c "echo -e 'CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g' > ./conf/celeborn-env.sh" && \
bash -c "echo -e 'celeborn.worker.commitFiles.threads 128\nceleborn.worker.sortPartition.threads 64' > ./conf/celeborn-defaults.conf" && \
diff --git a/docs/get-started/ClickHouse.md b/docs/get-started/ClickHouse.md
index 4352a99e55f9..ab24de7a4fd6 100644
--- a/docs/get-started/ClickHouse.md
+++ b/docs/get-started/ClickHouse.md
@@ -679,13 +679,13 @@ spark.shuffle.manager=org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleMa
quickly start a celeborn cluster
```shell
-wget https://archive.apache.org/dist/incubator/celeborn/celeborn-0.3.0-incubating/apache-celeborn-0.3.0-incubating-bin.tgz && \
-tar -zxvf apache-celeborn-0.3.0-incubating-bin.tgz && \
-mv apache-celeborn-0.3.0-incubating-bin/conf/celeborn-defaults.conf.template apache-celeborn-0.3.0-incubating-bin/conf/celeborn-defaults.conf && \
-mv apache-celeborn-0.3.0-incubating-bin/conf/log4j2.xml.template apache-celeborn-0.3.0-incubating-bin/conf/log4j2.xml && \
+wget https://archive.apache.org/dist/celeborn/celeborn-0.3.2-incubating/apache-celeborn-0.3.2-incubating-bin.tgz && \
+tar -zxvf apache-celeborn-0.3.2-incubating-bin.tgz && \
+mv apache-celeborn-0.3.2-incubating-bin/conf/celeborn-defaults.conf.template apache-celeborn-0.3.2-incubating-bin/conf/celeborn-defaults.conf && \
+mv apache-celeborn-0.3.2-incubating-bin/conf/log4j2.xml.template apache-celeborn-0.3.2-incubating-bin/conf/log4j2.xml && \
mkdir /opt/hadoop && chmod 777 /opt/hadoop && \
-echo -e "celeborn.worker.flusher.threads 4\nceleborn.worker.storage.dirs /tmp\nceleborn.worker.monitor.disk.enabled false" > apache-celeborn-0.3.0-incubating-bin/conf/celeborn-defaults.conf && \
-bash apache-celeborn-0.3.0-incubating-bin/sbin/start-master.sh && bash apache-celeborn-0.3.0-incubating-bin/sbin/start-worker.sh
+echo -e "celeborn.worker.flusher.threads 4\nceleborn.worker.storage.dirs /tmp\nceleborn.worker.monitor.disk.enabled false" > apache-celeborn-0.3.2-incubating-bin/conf/celeborn-defaults.conf && \
+bash apache-celeborn-0.3.2-incubating-bin/sbin/start-master.sh && bash apache-celeborn-0.3.2-incubating-bin/sbin/start-worker.sh
```
### Columnar shuffle mode
diff --git a/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java b/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java
index f454cf00c656..d196691d1b14 100644
--- a/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java
+++ b/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java
@@ -217,7 +217,13 @@ public boolean unregisterShuffle(int shuffleId) {
}
}
return CelebornUtils.unregisterShuffle(
- lifecycleManager, shuffleClient, shuffleIdTracker, shuffleId, appUniqueId, isDriver());
+ lifecycleManager,
+ shuffleClient,
+ shuffleIdTracker,
+ shuffleId,
+ appUniqueId,
+ throwsFetchFailure,
+ isDriver());
}
@Override
diff --git a/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornUtils.java b/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornUtils.java
index 9dd4e1d1191e..6b4229ad3037 100644
--- a/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornUtils.java
+++ b/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornUtils.java
@@ -49,11 +49,21 @@ public static boolean unregisterShuffle(
Object shuffleIdTracker,
int appShuffleId,
String appUniqueId,
+ boolean throwsFetchFailure,
boolean isDriver) {
try {
- // for Celeborn 0.4.0
try {
- if (lifecycleManager != null) {
+ try {
+ // for Celeborn 0.4.1
+ if (lifecycleManager != null) {
+ Method unregisterAppShuffle =
+ lifecycleManager
+ .getClass()
+ .getMethod("unregisterAppShuffle", int.class, boolean.class);
+ unregisterAppShuffle.invoke(lifecycleManager, appShuffleId, throwsFetchFailure);
+ }
+ } catch (NoSuchMethodException ex) {
+ // for Celeborn 0.4.0
Method unregisterAppShuffle =
lifecycleManager.getClass().getMethod("unregisterAppShuffle", int.class);
unregisterAppShuffle.invoke(lifecycleManager, appShuffleId);
diff --git a/pom.xml b/pom.xml
index 81ce0e5d462a..887839ce5fc0 100644
--- a/pom.xml
+++ b/pom.xml
@@ -53,7 +53,7 @@
delta-core
2.4.0
24
- 0.3.2-incubating
+ 0.4.1
0.8.0
15.0.0
15.0.0-gluten
diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml
index 3f1760069792..71db637a8403 100644
--- a/tools/gluten-it/pom.xml
+++ b/tools/gluten-it/pom.xml
@@ -21,7 +21,7 @@
3.4.2
2.12
3
- 0.3.0-incubating
+ 0.3.2-incubating
0.8.0
1.2.0-SNAPSHOT
32.0.1-jre
@@ -167,7 +167,7 @@
celeborn-0.4
- 0.4.0-incubating
+ 0.4.1