Skip to content

Commit

Permalink
Merge pull request #1759 from NvTimLiu/release-tmp
Browse files Browse the repository at this point in the history
Merge branch 'branch-24.02' into main
  • Loading branch information
NvTimLiu authored Feb 14, 2024
2 parents 67cea63 + 8f5e38e commit 9bc1847
Show file tree
Hide file tree
Showing 62 changed files with 6,709 additions and 5,343 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/auto-merge.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -18,12 +18,12 @@ name: auto-merge HEAD to BASE
on:
pull_request_target:
branches:
- branch-23.12
- branch-24.02
types: [closed]

env:
HEAD: branch-23.12
BASE: branch-24.02
HEAD: branch-24.02
BASE: branch-24.04

jobs:
auto-merge:
Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[submodule "thirdparty/cudf"]
path = thirdparty/cudf
url = https://github.com/rapidsai/cudf.git
branch = branch-23.12
branch = branch-24.02
13 changes: 12 additions & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,17 @@ There are two types of branches in this repository:
is held here. `main` will change with new releases, but otherwise it should not change with
every pull request merged, making it a more stable branch.

## Git Submodules

This repository uses Git submodules. After cloning this repository or moving to a new commit
in this repository you will need to ensure the submodules are initialized and updated to the
expected submodule commits. This can be done by executing the following command at the top of
the repository:

```commandline
git submodule update --init --recursive
```

## Building From Source

[Maven](https://maven.apache.org) is used for most aspects of the build. For example, the
Expand Down Expand Up @@ -149,7 +160,7 @@ $ ./build/build-in-docker install ...
```

Now cd to ~/repos/NVIDIA/spark-rapids and build with one of the options from
[spark-rapids instructions](https://github.com/NVIDIA/spark-rapids/blob/branch-23.12/CONTRIBUTING.md#building-from-source).
[spark-rapids instructions](https://github.com/NVIDIA/spark-rapids/blob/branch-24.02/CONTRIBUTING.md#building-from-source).

```bash
$ ./build/buildall
Expand Down
20 changes: 20 additions & 0 deletions NOTICE
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
RAPIDS Accelerator JNI For Apache Spark
Copyright (c) 2022-2024, NVIDIA CORPORATION

--------------------------------------------------------------------------------

This project includes code from ryu (https://github.com/ulfjack/ryu).

Copyright (2018) Ulf Adams and contributors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
4 changes: 2 additions & 2 deletions ci/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -61,7 +61,7 @@ RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v
rm -rf ccache-${CCACHE_VERSION}

## install a version of boost that is needed for arrow/parquet to work
RUN cd /usr/local && wget --quiet https://boostorg.jfrog.io/artifactory/main/release/1.79.0/source/boost_1_79_0.tar.gz && \
RUN cd /usr/local && wget --quiet https://archives.boost.io/release/1.79.0/source/boost_1_79_0.tar.gz && \
tar -xzf boost_1_79_0.tar.gz && \
rm boost_1_79_0.tar.gz && \
cd boost_1_79_0 && \
Expand Down
4 changes: 2 additions & 2 deletions ci/Dockerfile.multi
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -63,7 +63,7 @@ RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v
rm -rf ccache-${CCACHE_VERSION}

## install a version of boost that is needed for arrow/parquet to work
RUN cd /usr/local && wget --quiet https://boostorg.jfrog.io/artifactory/main/release/1.79.0/source/boost_1_79_0.tar.gz && \
RUN cd /usr/local && wget --quiet https://archives.boost.io/release/1.79.0/source/boost_1_79_0.tar.gz && \
tar -xzf boost_1_79_0.tar.gz && \
rm boost_1_79_0.tar.gz && \
cd boost_1_79_0 && \
Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Copyright (c) 2022-2023, NVIDIA CORPORATION.
Copyright (c) 2022-2024, NVIDIA CORPORATION.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -21,7 +21,7 @@

<groupId>com.nvidia</groupId>
<artifactId>spark-rapids-jni</artifactId>
<version>23.12.0</version>
<version>24.02.0</version>
<packaging>jar</packaging>
<name>RAPIDS Accelerator JNI for Apache Spark</name>
<description>
Expand Down
12 changes: 5 additions & 7 deletions src/main/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ rapids_cuda_init_architectures(SPARK_RAPIDS_JNI)

project(
SPARK_RAPIDS_JNI
VERSION 23.12.00
VERSION 24.02.00
LANGUAGES C CXX CUDA
)

Expand Down Expand Up @@ -94,11 +94,8 @@ include(cmake/Modules/ConfigureCUDA.cmake) # set other CUDA compilation flags
# ##################################################################################################
# * dependencies ----------------------------------------------------------------------------------

# find libcu++
include(${rapids-cmake-dir}/cpm/libcudacxx.cmake)

# find thrust/cub
include(${CUDF_DIR}/cpp/cmake/thirdparty/get_thrust.cmake)
# find CCCL
include(${CUDF_DIR}/cpp/cmake/thirdparty/get_cccl.cmake)

# JNI
find_package(JNI REQUIRED)
Expand Down Expand Up @@ -164,6 +161,8 @@ add_library(
src/ZOrderJni.cpp
src/bloom_filter.cu
src/cast_decimal_to_string.cu
src/format_float.cu
src/cast_float_to_string.cu
src/cast_string.cu
src/cast_string_to_float.cu
src/datetime_rebase.cu
Expand All @@ -172,7 +171,6 @@ add_library(
src/map_utils.cu
src/murmur_hash.cu
src/parse_uri.cu
src/row_conversion.cu
src/timezones.cu
src/utilities.cu
src/xxhash64.cu
Expand Down
2 changes: 1 addition & 1 deletion src/main/cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
return cudf::make_strings_column(
num_rows,
std::move(offsets),
std::move(chars),
std::move(chars->release().data.release()[0]),
profile.get_null_frequency().has_value() ? std::move(result_bitmask) : rmm::device_buffer{},
null_count);
}
Expand Down
16 changes: 8 additions & 8 deletions src/main/cpp/benchmarks/row_conversion.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -48,15 +48,15 @@ void fixed_width(nvbench::state& state)
bytes_per_row += cudf::size_of(t);
}

auto rows = spark_rapids_jni::convert_to_rows_fixed_width_optimized(table->view());
auto rows = cudf::convert_to_rows_fixed_width_optimized(table->view());

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
if (direction == "to row") {
auto _rows = spark_rapids_jni::convert_to_rows_fixed_width_optimized(table->view());
auto _rows = cudf::convert_to_rows_fixed_width_optimized(table->view());
} else {
for (auto const& r : rows) {
cudf::lists_column_view const l(r->view());
auto out = spark_rapids_jni::convert_from_rows_fixed_width_optimized(l, schema);
auto out = cudf::convert_from_rows_fixed_width_optimized(l, schema);
}
}
});
Expand Down Expand Up @@ -117,16 +117,16 @@ static void variable_or_fixed_width(nvbench::state& state)
}
}

auto rows = spark_rapids_jni::convert_to_rows(table->view());
auto rows = cudf::convert_to_rows(table->view());

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto new_rows = spark_rapids_jni::convert_to_rows(table->view());
auto new_rows = cudf::convert_to_rows(table->view());
if (direction == "to row") {
auto _rows = spark_rapids_jni::convert_to_rows(table->view());
auto _rows = cudf::convert_to_rows(table->view());
} else {
for (auto const& r : rows) {
cudf::lists_column_view const l(r->view());
auto out = spark_rapids_jni::convert_from_rows(l, schema);
auto out = cudf::convert_from_rows(l, schema);
}
}
});
Expand Down
7 changes: 5 additions & 2 deletions src/main/cpp/faultinj/faultinj.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -31,6 +31,9 @@
#include <sys/inotify.h>
#include <sys/time.h>

// Format enums for logging
auto format_as(CUpti_CallbackDomain domain) { return fmt::underlying(domain); }

namespace {

#define CUPTI_CALL(call) \
Expand Down Expand Up @@ -392,7 +395,7 @@ void readFaultInjectorConfig(void)
std::srand(seed);

const spdlog::level::level_enum logLevelEnum = static_cast<spdlog::level::level_enum>(logLevel);
spdlog::info("changed log level to {}", logLevelEnum);
spdlog::info("changed log level to {}", logLevel);
spdlog::set_level(logLevelEnum);
traceConfig(globalControl.configRoot);

Expand Down
35 changes: 33 additions & 2 deletions src/main/cpp/src/CastStringJni.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -109,6 +109,37 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_toFloat(
CATCH_CAST_EXCEPTION(env, 0);
}

JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromFloat(JNIEnv* env,
jclass,
jlong input_column)
{
JNI_NULL_CHECK(env, input_column, "input column is null", 0);

try {
cudf::jni::auto_set_device(env);

auto const& cv = *reinterpret_cast<cudf::column_view const*>(input_column);
return cudf::jni::release_as_jlong(
spark_rapids_jni::float_to_string(cv, cudf::get_default_stream()));
}
CATCH_CAST_EXCEPTION(env, 0);
}

JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromFloatWithFormat(
JNIEnv* env, jclass, jlong input_column, jint digits)
{
JNI_NULL_CHECK(env, input_column, "input column is null", 0);

try {
cudf::jni::auto_set_device(env);

auto const& cv = *reinterpret_cast<cudf::column_view const*>(input_column);
return cudf::jni::release_as_jlong(
spark_rapids_jni::format_float(cv, digits, cudf::get_default_stream()));
}
CATCH_CAST_EXCEPTION(env, 0);
}

JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromDecimal(JNIEnv* env,
jclass,
jlong input_column)
Expand All @@ -118,7 +149,7 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromDecimal
try {
cudf::jni::auto_set_device(env);

cudf::column_view cv{*reinterpret_cast<cudf::column_view const*>(input_column)};
auto const& cv = *reinterpret_cast<cudf::column_view const*>(input_column);
return cudf::jni::release_as_jlong(
spark_rapids_jni::decimal_to_non_ansi_string(cv, cudf::get_default_stream()));
}
Expand Down
11 changes: 8 additions & 3 deletions src/main/cpp/src/DecimalUtilsJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,13 @@

extern "C" {

JNIEXPORT jlongArray JNICALL Java_com_nvidia_spark_rapids_jni_DecimalUtils_multiply128(
JNIEnv* env, jclass, jlong j_view_a, jlong j_view_b, jint j_product_scale)
JNIEXPORT jlongArray JNICALL
Java_com_nvidia_spark_rapids_jni_DecimalUtils_multiply128(JNIEnv* env,
jclass,
jlong j_view_a,
jlong j_view_b,
jint j_product_scale,
bool cast_interim_result)
{
JNI_NULL_CHECK(env, j_view_a, "column is null", 0);
JNI_NULL_CHECK(env, j_view_b, "column is null", 0);
Expand All @@ -30,7 +35,7 @@ JNIEXPORT jlongArray JNICALL Java_com_nvidia_spark_rapids_jni_DecimalUtils_multi
auto view_b = reinterpret_cast<cudf::column_view const*>(j_view_b);
auto scale = static_cast<int>(j_product_scale);
return cudf::jni::convert_table_for_return(
env, cudf::jni::multiply_decimal128(*view_a, *view_b, scale));
env, cudf::jni::multiply_decimal128(*view_a, *view_b, scale, cast_interim_result));
}
CATCH_STD(env, 0);
}
Expand Down
59 changes: 59 additions & 0 deletions src/main/cpp/src/ParseURIJni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,63 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ParseURI_parseProtocol(
}
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ParseURI_parseHost(JNIEnv* env,
jclass,
jlong input_column)
{
JNI_NULL_CHECK(env, input_column, "input column is null", 0);

try {
cudf::jni::auto_set_device(env);
auto const input = reinterpret_cast<cudf::column_view const*>(input_column);
return cudf::jni::ptr_as_jlong(spark_rapids_jni::parse_uri_to_host(*input).release());
}
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ParseURI_parseQuery(JNIEnv* env,
jclass,
jlong input_column)
{
JNI_NULL_CHECK(env, input_column, "input column is null", 0);

try {
cudf::jni::auto_set_device(env);
auto const input = reinterpret_cast<cudf::column_view const*>(input_column);
return cudf::jni::ptr_as_jlong(spark_rapids_jni::parse_uri_to_query(*input).release());
}
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ParseURI_parseQueryWithLiteral(
JNIEnv* env, jclass, jlong input_column, jstring query)
{
JNI_NULL_CHECK(env, input_column, "input column is null", 0);
JNI_NULL_CHECK(env, query, "query is null", 0);

try {
cudf::jni::auto_set_device(env);
auto const input = reinterpret_cast<cudf::column_view const*>(input_column);
cudf::jni::native_jstring native_query(env, query);
return cudf::jni::ptr_as_jlong(
spark_rapids_jni::parse_uri_to_query(*input, native_query.get()).release());
}
CATCH_STD(env, 0);
}

JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ParseURI_parseQueryWithColumn(
JNIEnv* env, jclass, jlong input_column, jlong query_column)
{
JNI_NULL_CHECK(env, input_column, "input column is null", 0);
JNI_NULL_CHECK(env, query_column, "query column is null", 0);

try {
cudf::jni::auto_set_device(env);
auto const input = reinterpret_cast<cudf::column_view const*>(input_column);
auto const query = reinterpret_cast<cudf::column_view const*>(query_column);
return cudf::jni::ptr_as_jlong(spark_rapids_jni::parse_uri_to_query(*input, *query).release());
}
CATCH_STD(env, 0);
}
}
Loading

0 comments on commit 9bc1847

Please sign in to comment.