Skip to content

Commit 4da04fc

Browse files
sunchaodongjoon-hyun
authored andcommitted
[SPARK-37600][BUILD] Upgrade to Hadoop 3.3.2
### What changes were proposed in this pull request? This PR aims to upgrade to Hadoop 3.3.2. In addition, it also removes the LZ4 wrapper classes added in SPARK-36669, therefore fixing SPARK-36679. ### Why are the changes needed? Hadoop 3.3.2 has many bug fixes and we also can remove our internal hacked Hadoop codecs. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes apache#34855 from sunchao/SPARK-37600. Authored-by: Chao Sun <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 35c0e5c commit 4da04fc

File tree

11 files changed

+98
-144
lines changed

11 files changed

+98
-144
lines changed

LICENSE-binary

+2
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,7 @@ net.sf.py4j:py4j
456456
org.jpmml:pmml-model
457457
org.jpmml:pmml-schema
458458
org.threeten:threeten-extra
459+
org.jdom:jdom2
459460

460461
python/lib/py4j-*-src.zip
461462
python/pyspark/cloudpickle.py
@@ -504,6 +505,7 @@ Common Development and Distribution License (CDDL) 1.0
504505
javax.activation:activation http://www.oracle.com/technetwork/java/javase/tech/index-jsp-138795.html
505506
javax.xml.stream:stax-api https://jcp.org/en/jsr/detail?id=173
506507
javax.transaction:javax.transaction-api
508+
javax.xml.bind:jaxb-api
507509

508510

509511
Common Development and Distribution License (CDDL) 1.1

NOTICE-binary

+3
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,9 @@ This product includes code (JaspellTernarySearchTrie) from Java Spelling Checkin
917917
g Package (jaspell): http://jaspell.sourceforge.net/
918918
License: The BSD License (http://www.opensource.org/licenses/bsd-license.php)
919919

920+
This product includes software developed by the JDOM Project (http://www.jdom.org/)
921+
License: https://raw.githubusercontent.com/hunterhacker/jdom/master/LICENSE.txt
922+
920923
The snowball stemmers in
921924
analysis/common/src/java/net/sf/snowball
922925
were developed by Martin Porter and Richard Boulton.

core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Compressor.java

-37
This file was deleted.

core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4Factory.java

-49
This file was deleted.

core/src/main/java/org/apache/hadoop/shaded/net/jpountz/lz4/LZ4SafeDecompressor.java

-36
This file was deleted.

dev/deps/spark-deps-hadoop-3-hive-2.3

+22-19
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,10 @@ ST4/4.0.4//ST4-4.0.4.jar
66
activation/1.1.1//activation-1.1.1.jar
77
aircompressor/0.21//aircompressor-0.21.jar
88
algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar
9-
aliyun-java-sdk-core/3.4.0//aliyun-java-sdk-core-3.4.0.jar
10-
aliyun-java-sdk-ecs/4.2.0//aliyun-java-sdk-ecs-4.2.0.jar
11-
aliyun-java-sdk-ram/3.0.0//aliyun-java-sdk-ram-3.0.0.jar
12-
aliyun-java-sdk-sts/3.0.0//aliyun-java-sdk-sts-3.0.0.jar
13-
aliyun-sdk-oss/3.4.1//aliyun-sdk-oss-3.4.1.jar
9+
aliyun-java-sdk-core/4.5.10//aliyun-java-sdk-core-4.5.10.jar
10+
aliyun-java-sdk-kms/2.11.0//aliyun-java-sdk-kms-2.11.0.jar
11+
aliyun-java-sdk-ram/3.1.0//aliyun-java-sdk-ram-3.1.0.jar
12+
aliyun-sdk-oss/3.13.0//aliyun-sdk-oss-3.13.0.jar
1413
annotations/17.0.0//annotations-17.0.0.jar
1514
antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
1615
antlr4-runtime/4.8//antlr4-runtime-4.8.jar
@@ -26,7 +25,7 @@ automaton/1.11-8//automaton-1.11-8.jar
2625
avro-ipc/1.11.0//avro-ipc-1.11.0.jar
2726
avro-mapred/1.11.0//avro-mapred-1.11.0.jar
2827
avro/1.11.0//avro-1.11.0.jar
29-
aws-java-sdk-bundle/1.11.901//aws-java-sdk-bundle-1.11.901.jar
28+
aws-java-sdk-bundle/1.11.1026//aws-java-sdk-bundle-1.11.1026.jar
3029
azure-data-lake-store-sdk/2.3.9//azure-data-lake-store-sdk-2.3.9.jar
3130
azure-keyvault-core/1.0.0//azure-keyvault-core-1.0.0.jar
3231
azure-storage/7.0.1//azure-storage-7.0.1.jar
@@ -67,18 +66,18 @@ generex/1.0.2//generex-1.0.2.jar
6766
gmetric4j/1.0.10//gmetric4j-1.0.10.jar
6867
gson/2.2.4//gson-2.2.4.jar
6968
guava/14.0.1//guava-14.0.1.jar
70-
hadoop-aliyun/3.3.1//hadoop-aliyun-3.3.1.jar
71-
hadoop-annotations/3.3.1//hadoop-annotations-3.3.1.jar
72-
hadoop-aws/3.3.1//hadoop-aws-3.3.1.jar
73-
hadoop-azure-datalake/3.3.1//hadoop-azure-datalake-3.3.1.jar
74-
hadoop-azure/3.3.1//hadoop-azure-3.3.1.jar
75-
hadoop-client-api/3.3.1//hadoop-client-api-3.3.1.jar
76-
hadoop-client-runtime/3.3.1//hadoop-client-runtime-3.3.1.jar
77-
hadoop-cloud-storage/3.3.1//hadoop-cloud-storage-3.3.1.jar
78-
hadoop-cos/3.3.1//hadoop-cos-3.3.1.jar
79-
hadoop-openstack/3.3.1//hadoop-openstack-3.3.1.jar
69+
hadoop-aliyun/3.3.2//hadoop-aliyun-3.3.2.jar
70+
hadoop-annotations/3.3.2//hadoop-annotations-3.3.2.jar
71+
hadoop-aws/3.3.2//hadoop-aws-3.3.2.jar
72+
hadoop-azure-datalake/3.3.2//hadoop-azure-datalake-3.3.2.jar
73+
hadoop-azure/3.3.2//hadoop-azure-3.3.2.jar
74+
hadoop-client-api/3.3.2//hadoop-client-api-3.3.2.jar
75+
hadoop-client-runtime/3.3.2//hadoop-client-runtime-3.3.2.jar
76+
hadoop-cloud-storage/3.3.2//hadoop-cloud-storage-3.3.2.jar
77+
hadoop-cos/3.3.2//hadoop-cos-3.3.2.jar
78+
hadoop-openstack/3.3.2//hadoop-openstack-3.3.2.jar
8079
hadoop-shaded-guava/1.1.1//hadoop-shaded-guava-1.1.1.jar
81-
hadoop-yarn-server-web-proxy/3.3.1//hadoop-yarn-server-web-proxy-3.3.1.jar
80+
hadoop-yarn-server-web-proxy/3.3.2//hadoop-yarn-server-web-proxy-3.3.2.jar
8281
hive-beeline/2.3.9//hive-beeline-2.3.9.jar
8382
hive-cli/2.3.9//hive-cli-2.3.9.jar
8483
hive-common/2.3.9//hive-common-2.3.9.jar
@@ -97,9 +96,9 @@ hive-vector-code-gen/2.3.9//hive-vector-code-gen-2.3.9.jar
9796
hk2-api/2.6.1//hk2-api-2.6.1.jar
9897
hk2-locator/2.6.1//hk2-locator-2.6.1.jar
9998
hk2-utils/2.6.1//hk2-utils-2.6.1.jar
100-
htrace-core4/4.1.0-incubating//htrace-core4-4.1.0-incubating.jar
10199
httpclient/4.5.13//httpclient-4.5.13.jar
102100
httpcore/4.4.14//httpcore-4.4.14.jar
101+
ini4j/0.5.4//ini4j-0.5.4.jar
103102
istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
104103
ivy/2.5.0//ivy-2.5.0.jar
105104
jackson-annotations/2.13.1//jackson-annotations-2.13.1.jar
@@ -121,10 +120,11 @@ janino/3.0.16//janino-3.0.16.jar
121120
javassist/3.25.0-GA//javassist-3.25.0-GA.jar
122121
javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
123122
javolution/5.5.1//javolution-5.5.1.jar
123+
jaxb-api/2.2.11//jaxb-api-2.2.11.jar
124124
jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
125125
jcl-over-slf4j/1.7.32//jcl-over-slf4j-1.7.32.jar
126126
jdo-api/3.0.1//jdo-api-3.0.1.jar
127-
jdom/1.1//jdom-1.1.jar
127+
jdom2/2.0.6//jdom2-2.0.6.jar
128128
jersey-client/2.34//jersey-client-2.34.jar
129129
jersey-common/2.34//jersey-common-2.34.jar
130130
jersey-container-servlet-core/2.34//jersey-container-servlet-core-2.34.jar
@@ -204,6 +204,9 @@ objenesis/3.2//objenesis-3.2.jar
204204
okhttp/3.12.12//okhttp-3.12.12.jar
205205
okio/1.14.0//okio-1.14.0.jar
206206
opencsv/2.3//opencsv-2.3.jar
207+
opentracing-api/0.33.0//opentracing-api-0.33.0.jar
208+
opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
209+
opentracing-util/0.33.0//opentracing-util-0.33.0.jar
207210
orc-core/1.7.3//orc-core-1.7.3.jar
208211
orc-mapreduce/1.7.3//orc-mapreduce-1.7.3.jar
209212
orc-shims/1.7.3//orc-shims-1.7.3.jar

hadoop-cloud/pom.xml

+7
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,13 @@
267267
<groupId>com.google.guava</groupId>
268268
<artifactId>guava</artifactId>
269269
</exclusion>
270+
<exclusion>
271+
<!--
272+
This is a code coverage library introduced by aliyun-java-sdk-core, only for testing
273+
-->
274+
<groupId>org.jacoco</groupId>
275+
<artifactId>org.jacoco.agent</artifactId>
276+
</exclusion>
270277
</exclusions>
271278
</dependency>
272279
<!--

licenses/LICENSE-jdom.txt

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*--
2+
3+
Copyright (C) 2000-2012 Jason Hunter & Brett McLaughlin.
4+
All rights reserved.
5+
6+
Redistribution and use in source and binary forms, with or without
7+
modification, are permitted provided that the following conditions
8+
are met:
9+
10+
1. Redistributions of source code must retain the above copyright
11+
notice, this list of conditions, and the following disclaimer.
12+
13+
2. Redistributions in binary form must reproduce the above copyright
14+
notice, this list of conditions, and the disclaimer that follows
15+
these conditions in the documentation and/or other materials
16+
provided with the distribution.
17+
18+
3. The name "JDOM" must not be used to endorse or promote products
19+
derived from this software without prior written permission. For
20+
written permission, please contact <request_AT_jdom_DOT_org>.
21+
22+
4. Products derived from this software may not be called "JDOM", nor
23+
may "JDOM" appear in their name, without prior written permission
24+
from the JDOM Project Management <request_AT_jdom_DOT_org>.
25+
26+
In addition, we request (but do not require) that you include in the
27+
end-user documentation provided with the redistribution and/or in the
28+
software itself an acknowledgement equivalent to the following:
29+
"This product includes software developed by the
30+
JDOM Project (http://www.jdom.org/)."
31+
Alternatively, the acknowledgment may be graphical using the logos
32+
available at http://www.jdom.org/images/logos.
33+
34+
THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
35+
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
36+
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
37+
DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT
38+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
41+
USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
42+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
43+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
44+
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
45+
SUCH DAMAGE.
46+
47+
This software consists of voluntary contributions made by many
48+
individuals on behalf of the JDOM Project and was originally
49+
created by Jason Hunter <jhunter_AT_jdom_DOT_org> and
50+
Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information
51+
on the JDOM Project, please see <http://www.jdom.org/>.
52+
53+
*/
54+

pom.xml

+3-1
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@
120120
<sbt.project.name>spark</sbt.project.name>
121121
<slf4j.version>1.7.32</slf4j.version>
122122
<log4j.version>2.17.1</log4j.version>
123-
<hadoop.version>3.3.1</hadoop.version>
123+
<!-- make sure to update IsolatedClientLoader whenever this version is changed -->
124+
<hadoop.version>3.3.2</hadoop.version>
124125
<protobuf.version>2.5.0</protobuf.version>
125126
<yarn.version>${hadoop.version}</yarn.version>
126127
<zookeeper.version>3.6.2</zookeeper.version>
@@ -3427,6 +3428,7 @@
34273428
<profile>
34283429
<id>hadoop-2</id>
34293430
<properties>
3431+
<!-- make sure to update IsolatedClientLoader whenever this version is changed -->
34303432
<hadoop.version>2.7.4</hadoop.version>
34313433
<curator.version>2.7.1</curator.version>
34323434
<commons-io.version>2.4</commons-io.version>

project/MimaExcludes.scala

+6-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,12 @@ object MimaExcludes {
4848
// [SPARK-37780][SQL] QueryExecutionListener support SQLConf as constructor parameter
4949
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.util.ExecutionListenerManager.this"),
5050
// [SPARK-37786][SQL] StreamingQueryListener support use SQLConf.get to get corresponding SessionState's SQLConf
51-
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.this")
51+
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.this"),
52+
53+
// [SPARK-37600][BUILD] Upgrade to Hadoop 3.3.2
54+
ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4Compressor"),
55+
ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4Factory"),
56+
ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4SafeDecompressor")
5257
)
5358

5459
// Exclude rules for 3.2.x from 3.1.1

sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ private[hive] object IsolatedClientLoader extends Logging {
6969
// If the error message contains hadoop, it is probably because the hadoop
7070
// version cannot be resolved.
7171
val fallbackVersion = if (VersionUtils.isHadoop3) {
72-
"3.3.1"
72+
"3.3.2"
7373
} else {
7474
"2.7.4"
7575
}

0 commit comments

Comments
 (0)