Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/branch-25.02' into 241204_metric…
Browse files Browse the repository at this point in the history
…s_on_gpu_contention
  • Loading branch information
binmahone committed Dec 12, 2024
2 parents af093a6 + 38d66b0 commit f76f6fe
Show file tree
Hide file tree
Showing 118 changed files with 980 additions and 527 deletions.
58 changes: 58 additions & 0 deletions .github/workflows/license-header-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# A workflow to check copyright/license header
name: license header check

on:
pull_request:
types: [opened, synchronize, reopened]

jobs:
license-header-check:
runs-on: ubuntu-latest
if: "!contains(github.event.pull_request.title, '[bot]')"
steps:
- name: Get checkout depth
run: |
echo "PR_FETCH_DEPTH=$(( ${{ github.event.pull_request.commits }} + 10 ))" >> $GITHUB_ENV
- name: Checkout code
uses: actions/checkout@v4
with:
fetch-depth: ${{ env.PR_FETCH_DEPTH }}

- name: license-header-check
uses: NVIDIA/spark-rapids-common/license-header-check@main
with:
included_file_patterns: |
*.yml,
*.yaml,
*.sh,
*.xml,
*.properties,
*.scala,
*.py,
build/*,
*.cpp,
*Dockerfile*,
*Jenkinsfile*,
*.ini,
*.java,
*.fbs
excluded_file_patterns: |
*target/*,
thirdparty/*,
sql-plugin/src/main/java/com/nvidia/spark/rapids/format/*
6 changes: 4 additions & 2 deletions .github/workflows/mvn-verify-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ jobs:
id: generateCacheKey
run: |
set -x
cacheKey="${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}-${{ github.event.pull_request.base.ref }}-$(date +'%Y-%m-%d')"
depsSHA1=$(. .github/workflows/mvn-verify-check/get-deps-sha1.sh 2.12)
cacheKey="${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}-${{ github.event.pull_request.base.ref }}-${depsSHA1}"
echo "dailyCacheKey=$cacheKey" | tee $GITHUB_ENV $GITHUB_OUTPUT
- name: Cache local Maven repository
id: cache
Expand Down Expand Up @@ -165,7 +166,8 @@ jobs:
id: generateCacheKey
run: |
set -x
cacheKey="${{ runner.os }}-maven-scala213-${{ hashFiles('**/pom.xml') }}-${{ github.event.pull_request.base.ref }}-$(date +'%Y-%m-%d')"
depsSHA1=$(. .github/workflows/mvn-verify-check/get-deps-sha1.sh 2.13)
cacheKey="${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}-${{ github.event.pull_request.base.ref }}-${depsSHA1}"
echo "scala213dailyCacheKey=$cacheKey" | tee $GITHUB_ENV $GITHUB_OUTPUT
- name: Cache local Maven repository
id: cache
Expand Down
36 changes: 36 additions & 0 deletions .github/workflows/mvn-verify-check/get-deps-sha1.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash

# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

scala_ver=${1:-"2.12"}
base_URL="https://oss.sonatype.org/service/local/artifact/maven/resolve"
project_jni="spark-rapids-jni"
project_private="rapids-4-spark-private_${scala_ver}"

jni_ver=$(mvn help:evaluate -q -pl dist -Dexpression=spark-rapids-jni.version -DforceStdout)
private_ver=$(mvn help:evaluate -q -pl dist -Dexpression=spark-rapids-private.version -DforceStdout)

jni_sha1=$(curl -s -H "Accept: application/json" \
"${base_URL}?r=snapshots&g=com.nvidia&a=${project_jni}&v=${jni_ver}&c=&e=jar&wt=json" \
| jq .data.sha1) || $(date +'%Y-%m-%d')
private_sha1=$(curl -s -H "Accept: application/json" \
"${base_URL}?r=snapshots&g=com.nvidia&a=${project_private}&v=${private_ver}&c=&e=jar&wt=json" \
| jq .data.sha1) || $(date +'%Y-%m-%d')

sha1md5=$(echo -n "${jni_sha1}_${private_sha1}" | md5sum | awk '{print $1}')

echo $sha1md5
21 changes: 12 additions & 9 deletions .github/workflows/mvn-verify-check/populate-daily-cache.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,30 @@
# See the License for the specific language governing permissions and
# limitations under the License.

set -x
max_retry=3; delay=30; i=1
set -e
set -o pipefail

if [[ $SCALA_VER == '2.12' ]]; then
pom='pom.xml'
elif [[ $SCALA_VER == '2.13' ]]; then
pom='scala2.13/pom.xml'
fi

max_retry=3; delay=30; i=1
while true; do
buildvers=($(python build/get_buildvers.py no_snapshots $pom | tr -d ',')) &&
{
python build/get_buildvers.py "no_snapshots.buildvers" $pom | tr -d ',' | \
xargs -n 1 -I {} bash -c \
"mvn $COMMON_MVN_FLAGS --file $pom -Dbuildver={} de.qaware.maven:go-offline-maven-plugin:resolve-dependencies"

for buildver in "${buildvers[@]}"; do
mvn $COMMON_MVN_FLAGS --file $pom -Dbuildver=$buildver de.qaware.maven:go-offline-maven-plugin:resolve-dependencies
done
} && {
# compile base versions to cache scala compiler and compiler bridge
mvn $COMMON_MVN_FLAGS --file $pom \
process-test-resources -pl sql-plugin-api -am
mvn $COMMON_MVN_FLAGS --file $pom process-test-resources -pl sql-plugin-api -am
} && break || {
if [[ $i -le $max_retry ]]; then
echo "mvn command failed. Retry $i/$max_retry."; ((i++)); sleep $delay; ((delay=delay*2))
else
echo "mvn command failed. Exit 1"; exit 1
fi
}
done
done
8 changes: 4 additions & 4 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,15 @@ mvn -pl dist -PnoSnapshots package -DskipTests
Verify that shim-specific classes are hidden from a conventional classloader.

```bash
$ javap -cp dist/target/rapids-4-spark_2.12-24.12.0-SNAPSHOT-cuda11.jar com.nvidia.spark.rapids.shims.SparkShimImpl
$ javap -cp dist/target/rapids-4-spark_2.12-25.02.0-SNAPSHOT-cuda11.jar com.nvidia.spark.rapids.shims.SparkShimImpl
Error: class not found: com.nvidia.spark.rapids.shims.SparkShimImpl
```

However, its bytecode can be loaded if prefixed with `spark3XY` not contained in the package name

```bash
$ javap -cp dist/target/rapids-4-spark_2.12-24.12.0-SNAPSHOT-cuda11.jar spark320.com.nvidia.spark.rapids.shims.SparkShimImpl | head -2
Warning: File dist/target/rapids-4-spark_2.12-24.12.0-SNAPSHOT-cuda11.jar(/spark320/com/nvidia/spark/rapids/shims/SparkShimImpl.class) does not contain class spark320.com.nvidia.spark.rapids.shims.SparkShimImpl
$ javap -cp dist/target/rapids-4-spark_2.12-25.02.0-SNAPSHOT-cuda11.jar spark320.com.nvidia.spark.rapids.shims.SparkShimImpl | head -2
Warning: File dist/target/rapids-4-spark_2.12-25.02.0-SNAPSHOT-cuda11.jar(/spark320/com/nvidia/spark/rapids/shims/SparkShimImpl.class) does not contain class spark320.com.nvidia.spark.rapids.shims.SparkShimImpl
Compiled from "SparkShims.scala"
public final class com.nvidia.spark.rapids.shims.SparkShimImpl {
```
Expand Down Expand Up @@ -178,7 +178,7 @@ mvn package -pl dist -am -Dbuildver=340 -DallowConventionalDistJar=true
Verify `com.nvidia.spark.rapids.shims.SparkShimImpl` is conventionally loadable:
```bash
$ javap -cp dist/target/rapids-4-spark_2.12-24.12.0-SNAPSHOT-cuda11.jar com.nvidia.spark.rapids.shims.SparkShimImpl | head -2
$ javap -cp dist/target/rapids-4-spark_2.12-25.02.0-SNAPSHOT-cuda11.jar com.nvidia.spark.rapids.shims.SparkShimImpl | head -2
Compiled from "SparkShims.scala"
public final class com.nvidia.spark.rapids.shims.SparkShimImpl {
```
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ as a `provided` dependency.
<dependency>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark_2.12</artifactId>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
```
4 changes: 2 additions & 2 deletions aggregator/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@
<parent>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-jdk-profiles_2.12</artifactId>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>
<relativePath>../jdk-profiles/pom.xml</relativePath>
</parent>
<artifactId>rapids-4-spark-aggregator_2.12</artifactId>
<name>RAPIDS Accelerator for Apache Spark Aggregator</name>
<description>Creates an aggregated shaded package of the RAPIDS plugin for Apache Spark</description>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>

<properties>
<rapids.module>aggregator</rapids.module>
Expand Down
4 changes: 2 additions & 2 deletions api_validation/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@
<parent>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>
<relativePath>../shim-deps/pom.xml</relativePath>
</parent>
<artifactId>rapids-4-spark-api-validation_2.12</artifactId>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>

<properties>
<rapids.module>api_validation</rapids.module>
Expand Down
2 changes: 1 addition & 1 deletion build/get_buildvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def _get_buildvers(buildvers, pom_file, logger=None):
else:
no_snapshots.append(release)
excluded_shims = pom.find(".//pom:dyn.shim.excluded.releases", ns)
if excluded_shims is not None:
if excluded_shims is not None and excluded_shims.text:
for removed_shim in [x.strip() for x in excluded_shims.text.split(",")]:
if removed_shim in snapshots:
snapshots.remove(removed_shim)
Expand Down
6 changes: 3 additions & 3 deletions datagen/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ Where `$SPARK_VERSION` is a compressed version number, like 330 for Spark 3.3.0.

After this the jar should be at
`target/datagen_2.12-$PLUGIN_VERSION-spark$SPARK_VERSION.jar`
for example a Spark 3.3.0 jar for the 24.12.0 release would be
`target/datagen_2.12-24.12.0-spark330.jar`
for example a Spark 3.3.0 jar for the 25.02.0 release would be
`target/datagen_2.12-25.02.0-spark330.jar`

To get a spark shell with this you can run
```shell
spark-shell --jars target/datagen_2.12-24.12.0-spark330.jar
spark-shell --jars target/datagen_2.12-25.02.0-spark330.jar
```

After that you should be good to go.
Expand Down
2 changes: 1 addition & 1 deletion datagen/ScaleTest.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ $SPARK_HOME/bin/spark-submit \
--conf spark.sql.parquet.datetimeRebaseModeInWrite=CORRECTED \
--class com.nvidia.rapids.tests.scaletest.ScaleTestDataGen \ # the main class
--jars $SPARK_HOME/examples/jars/scopt_2.12-3.7.1.jar \ # one dependency jar just shipped with Spark under $SPARK_HOME
./target/datagen_2.12-24.12.0-SNAPSHOT-spark332.jar \
./target/datagen_2.12-25.02.0-SNAPSHOT-spark332.jar \
1 \
10 \
parquet \
Expand Down
4 changes: 2 additions & 2 deletions datagen/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@
<parent>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-shim-deps-parent_2.12</artifactId>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>
<relativePath>../shim-deps/pom.xml</relativePath>
</parent>
<artifactId>datagen_2.12</artifactId>
<name>Data Generator</name>
<description>Tools for generating large amounts of data</description>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>
<properties>
<rapids.module>datagen</rapids.module>
<target.classifier/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}

object DataGenExprShims {
def columnToExpr(c: Column): Expression = c
def exprToColumn(e: Expression): Column = e
def columnToExpr(c: Column): Expression = expression(c)
def exprToColumn(e: Expression): Column = column(e)
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import org.apache.spark.sql.execution.{CoalescedPartitionSpec, ShufflePartitionS
import org.apache.spark.sql.execution.exchange.Exchange
import org.apache.spark.sql.execution.metric.{SQLMetrics, SQLShuffleReadMetricsReporter, SQLShuffleWriteMetricsReporter}
import org.apache.spark.sql.rapids.execution.{GpuShuffleExchangeExecBase, ShuffledBatchRDD}
import org.apache.spark.sql.rapids.execution.GpuShuffleExchangeExecBase.createAdditionalExchangeMetrics
import org.apache.spark.sql.vectorized.ColumnarBatch
import org.apache.spark.util.ThreadUtils

Expand Down Expand Up @@ -71,22 +72,11 @@ case class GpuOptimizeWriteExchangeExec(
private[sql] lazy val readMetrics =
SQLShuffleReadMetricsReporter.createShuffleReadMetrics(sparkContext)

override lazy val additionalMetrics: Map[String, GpuMetric] = Map(
"dataSize" -> createSizeMetric(ESSENTIAL_LEVEL, "data size"),
"dataReadSize" -> createSizeMetric(MODERATE_LEVEL, "data read size"),
"rapidsShuffleSerializationTime" ->
createNanoTimingMetric(DEBUG_LEVEL, "rs. serialization time"),
"rapidsShuffleDeserializationTime" ->
createNanoTimingMetric(DEBUG_LEVEL, "rs. deserialization time"),
"rapidsShuffleWriteTime" ->
createNanoTimingMetric(ESSENTIAL_LEVEL, "rs. shuffle write time"),
"rapidsShuffleCombineTime" ->
createNanoTimingMetric(DEBUG_LEVEL, "rs. shuffle combine time"),
"rapidsShuffleWriteIoTime" ->
createNanoTimingMetric(DEBUG_LEVEL, "rs. shuffle write io time"),
"rapidsShuffleReadTime" ->
createNanoTimingMetric(ESSENTIAL_LEVEL, "rs. shuffle read time")
) ++ GpuMetric.wrap(readMetrics) ++ GpuMetric.wrap(writeMetrics)
override lazy val additionalMetrics : Map[String, GpuMetric] = {
createAdditionalExchangeMetrics(this) ++
GpuMetric.wrap(readMetrics) ++
GpuMetric.wrap(writeMetrics)
}

override lazy val allMetrics: Map[String, GpuMetric] = {
Map(
Expand All @@ -98,7 +88,7 @@ case class GpuOptimizeWriteExchangeExec(
}

private lazy val serializer: Serializer =
new GpuColumnarBatchSerializer(gpuLongMetric("dataSize"),
new GpuColumnarBatchSerializer(allMetrics,
child.output.map(_.dataType).toArray,
RapidsConf.SHUFFLE_KUDO_SERIALIZER_ENABLED.get(child.conf))

Expand Down
4 changes: 2 additions & 2 deletions delta-lake/delta-20x/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@
<parent>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-jdk-profiles_2.12</artifactId>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>
<relativePath>../../jdk-profiles/pom.xml</relativePath>
</parent>

<artifactId>rapids-4-spark-delta-20x_2.12</artifactId>
<name>RAPIDS Accelerator for Apache Spark Delta Lake 2.0.x Support</name>
<description>Delta Lake 2.0.x support for the RAPIDS Accelerator for Apache Spark</description>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>

<properties>
<rapids.module>../delta-lake/delta-20x</rapids.module>
Expand Down
4 changes: 2 additions & 2 deletions delta-lake/delta-21x/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@
<parent>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-jdk-profiles_2.12</artifactId>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>
<relativePath>../../jdk-profiles/pom.xml</relativePath>
</parent>

<artifactId>rapids-4-spark-delta-21x_2.12</artifactId>
<name>RAPIDS Accelerator for Apache Spark Delta Lake 2.1.x Support</name>
<description>Delta Lake 2.1.x support for the RAPIDS Accelerator for Apache Spark</description>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>

<properties>
<rapids.module>../delta-lake/delta-21x</rapids.module>
Expand Down
4 changes: 2 additions & 2 deletions delta-lake/delta-22x/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@
<parent>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-jdk-profiles_2.12</artifactId>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>
<relativePath>../../jdk-profiles/pom.xml</relativePath>
</parent>

<artifactId>rapids-4-spark-delta-22x_2.12</artifactId>
<name>RAPIDS Accelerator for Apache Spark Delta Lake 2.2.x Support</name>
<description>Delta Lake 2.2.x support for the RAPIDS Accelerator for Apache Spark</description>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>

<properties>
<rapids.module>../delta-lake/delta-22x</rapids.module>
Expand Down
4 changes: 2 additions & 2 deletions delta-lake/delta-23x/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@
<parent>
<groupId>com.nvidia</groupId>
<artifactId>rapids-4-spark-parent_2.12</artifactId>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>

<artifactId>rapids-4-spark-delta-23x_2.12</artifactId>
<name>RAPIDS Accelerator for Apache Spark Delta Lake 2.3.x Support</name>
<description>Delta Lake 2.3.x support for the RAPIDS Accelerator for Apache Spark</description>
<version>24.12.0-SNAPSHOT</version>
<version>25.02.0-SNAPSHOT</version>

<properties>
<rapids.module>../delta-lake/delta-23x</rapids.module>
Expand Down
Loading

0 comments on commit f76f6fe

Please sign in to comment.