Merge pull request #1759 from NvTimLiu/release-tmp

Merge branch 'branch-24.02' into main
NVIDIA · Feb 14, 2024 · 9bc1847 · 9bc1847
2 parents 67cea63 + 8f5e38e
commit 9bc1847
Show file tree

Hide file tree

Showing 62 changed files with 6,709 additions and 5,343 deletions.
diff --git a/.github/workflows/auto-merge.yml b/.github/workflows/auto-merge.yml
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -18,12 +18,12 @@ name: auto-merge HEAD to BASE
 on:
   pull_request_target:
     branches:
-      - branch-23.12
+      - branch-24.02
     types: [closed]
 
 env:
-  HEAD: branch-23.12
-  BASE: branch-24.02
+  HEAD: branch-24.02
+  BASE: branch-24.04
 
 jobs:
   auto-merge:

diff --git a/.gitmodules b/.gitmodules
@@ -1,4 +1,4 @@
 [submodule "thirdparty/cudf"]
 	path = thirdparty/cudf
 	url = https://github.com/rapidsai/cudf.git
-	branch = branch-23.12
+	branch = branch-24.02
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -28,6 +28,17 @@ There are two types of branches in this repository:
   is held here. `main` will change with new releases, but otherwise it should not change with
   every pull request merged, making it a more stable branch.
 
+## Git Submodules
+
+This repository uses Git submodules. After cloning this repository or moving to a new commit
+in this repository you will need to ensure the submodules are initialized and updated to the
+expected submodule commits. This can be done by executing the following command at the top of
+the repository:
+
+```commandline
+git submodule update --init --recursive
+```
+
 ## Building From Source
 
 [Maven](https://maven.apache.org) is used for most aspects of the build. For example, the
@@ -149,7 +160,7 @@ $ ./build/build-in-docker install ...
 ```
 
 Now cd to ~/repos/NVIDIA/spark-rapids and build with one of the options from
-[spark-rapids instructions](https://github.com/NVIDIA/spark-rapids/blob/branch-23.12/CONTRIBUTING.md#building-from-source).
+[spark-rapids instructions](https://github.com/NVIDIA/spark-rapids/blob/branch-24.02/CONTRIBUTING.md#building-from-source).
 
 ```bash
 $ ./build/buildall

diff --git a/NOTICE b/NOTICE
@@ -0,0 +1,20 @@
+RAPIDS Accelerator JNI For Apache Spark
+Copyright (c) 2022-2024, NVIDIA CORPORATION
+
+--------------------------------------------------------------------------------
+
+This project includes code from ryu (https://github.com/ulfjack/ryu).
+
+Copyright (2018) Ulf Adams and contributors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/ci/Dockerfile b/ci/Dockerfile
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -61,7 +61,7 @@ RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v
    rm -rf ccache-${CCACHE_VERSION}
 
 ## install a version of boost that is needed for arrow/parquet to work
-RUN cd /usr/local && wget --quiet https://boostorg.jfrog.io/artifactory/main/release/1.79.0/source/boost_1_79_0.tar.gz && \
+RUN cd /usr/local && wget --quiet https://archives.boost.io/release/1.79.0/source/boost_1_79_0.tar.gz && \
   tar -xzf boost_1_79_0.tar.gz && \
   rm boost_1_79_0.tar.gz && \
   cd boost_1_79_0 && \

diff --git a/ci/Dockerfile.multi b/ci/Dockerfile.multi
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -63,7 +63,7 @@ RUN cd /tmp && wget --quiet https://github.com/ccache/ccache/releases/download/v
    rm -rf ccache-${CCACHE_VERSION}
 
 ## install a version of boost that is needed for arrow/parquet to work
-RUN cd /usr/local && wget --quiet https://boostorg.jfrog.io/artifactory/main/release/1.79.0/source/boost_1_79_0.tar.gz && \
+RUN cd /usr/local && wget --quiet https://archives.boost.io/release/1.79.0/source/boost_1_79_0.tar.gz && \
   tar -xzf boost_1_79_0.tar.gz && \
   rm boost_1_79_0.tar.gz && \
   cd boost_1_79_0 && \

diff --git a/pom.xml b/pom.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <!--
-  Copyright (c) 2022-2023, NVIDIA CORPORATION.
+  Copyright (c) 2022-2024, NVIDIA CORPORATION.
 
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
@@ -21,7 +21,7 @@
 
   <groupId>com.nvidia</groupId>
   <artifactId>spark-rapids-jni</artifactId>
-  <version>23.12.0</version>
+  <version>24.02.0</version>
   <packaging>jar</packaging>
   <name>RAPIDS Accelerator JNI for Apache Spark</name>
   <description>

diff --git a/src/main/cpp/CMakeLists.txt b/src/main/cpp/CMakeLists.txt
@@ -32,7 +32,7 @@ rapids_cuda_init_architectures(SPARK_RAPIDS_JNI)
 
 project(
   SPARK_RAPIDS_JNI
-  VERSION 23.12.00
+  VERSION 24.02.00
   LANGUAGES C CXX CUDA
 )
 
@@ -94,11 +94,8 @@ include(cmake/Modules/ConfigureCUDA.cmake) # set other CUDA compilation flags
 # ##################################################################################################
 # * dependencies ----------------------------------------------------------------------------------
 
-# find libcu++
-include(${rapids-cmake-dir}/cpm/libcudacxx.cmake)
-
-# find thrust/cub
-include(${CUDF_DIR}/cpp/cmake/thirdparty/get_thrust.cmake)
+# find CCCL
+include(${CUDF_DIR}/cpp/cmake/thirdparty/get_cccl.cmake)
 
 # JNI
 find_package(JNI REQUIRED)
@@ -164,6 +161,8 @@ add_library(
   src/ZOrderJni.cpp
   src/bloom_filter.cu
   src/cast_decimal_to_string.cu
+  src/format_float.cu
+  src/cast_float_to_string.cu
   src/cast_string.cu
   src/cast_string_to_float.cu
   src/datetime_rebase.cu
@@ -172,7 +171,6 @@ add_library(
   src/map_utils.cu
   src/murmur_hash.cu
   src/parse_uri.cu
-  src/row_conversion.cu
   src/timezones.cu
   src/utilities.cu
   src/xxhash64.cu

diff --git a/src/main/cpp/benchmarks/common/generate_input.cu b/src/main/cpp/benchmarks/common/generate_input.cu
@@ -520,7 +520,7 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
   return cudf::make_strings_column(
     num_rows,
     std::move(offsets),
-    std::move(chars),
+    std::move(chars->release().data.release()[0]),
     profile.get_null_frequency().has_value() ? std::move(result_bitmask) : rmm::device_buffer{},
     null_count);
 }

diff --git a/src/main/cpp/benchmarks/row_conversion.cpp b/src/main/cpp/benchmarks/row_conversion.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -48,15 +48,15 @@ void fixed_width(nvbench::state& state)
     bytes_per_row += cudf::size_of(t);
   }
 
-  auto rows = spark_rapids_jni::convert_to_rows_fixed_width_optimized(table->view());
+  auto rows = cudf::convert_to_rows_fixed_width_optimized(table->view());
 
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
     if (direction == "to row") {
-      auto _rows = spark_rapids_jni::convert_to_rows_fixed_width_optimized(table->view());
+      auto _rows = cudf::convert_to_rows_fixed_width_optimized(table->view());
     } else {
       for (auto const& r : rows) {
         cudf::lists_column_view const l(r->view());
-        auto out = spark_rapids_jni::convert_from_rows_fixed_width_optimized(l, schema);
+        auto out = cudf::convert_from_rows_fixed_width_optimized(l, schema);
       }
     }
   });
@@ -117,16 +117,16 @@ static void variable_or_fixed_width(nvbench::state& state)
     }
   }
 
-  auto rows = spark_rapids_jni::convert_to_rows(table->view());
+  auto rows = cudf::convert_to_rows(table->view());
 
   state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
-    auto new_rows = spark_rapids_jni::convert_to_rows(table->view());
+    auto new_rows = cudf::convert_to_rows(table->view());
     if (direction == "to row") {
-      auto _rows = spark_rapids_jni::convert_to_rows(table->view());
+      auto _rows = cudf::convert_to_rows(table->view());
     } else {
       for (auto const& r : rows) {
         cudf::lists_column_view const l(r->view());
-        auto out = spark_rapids_jni::convert_from_rows(l, schema);
+        auto out = cudf::convert_from_rows(l, schema);
       }
     }
   });

diff --git a/src/main/cpp/faultinj/faultinj.cu b/src/main/cpp/faultinj/faultinj.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -31,6 +31,9 @@
 #include <sys/inotify.h>
 #include <sys/time.h>
 
+// Format enums for logging
+auto format_as(CUpti_CallbackDomain domain) { return fmt::underlying(domain); }
+
 namespace {
 
 #define CUPTI_CALL(call)                                                \
@@ -392,7 +395,7 @@ void readFaultInjectorConfig(void)
     std::srand(seed);
 
     const spdlog::level::level_enum logLevelEnum = static_cast<spdlog::level::level_enum>(logLevel);
-    spdlog::info("changed log level to {}", logLevelEnum);
+    spdlog::info("changed log level to {}", logLevel);
     spdlog::set_level(logLevelEnum);
     traceConfig(globalControl.configRoot);
 

diff --git a/src/main/cpp/src/CastStringJni.cpp b/src/main/cpp/src/CastStringJni.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2023, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -109,6 +109,37 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_toFloat(
   CATCH_CAST_EXCEPTION(env, 0);
 }
 
+JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromFloat(JNIEnv* env,
+                                                                               jclass,
+                                                                               jlong input_column)
+{
+  JNI_NULL_CHECK(env, input_column, "input column is null", 0);
+
+  try {
+    cudf::jni::auto_set_device(env);
+
+    auto const& cv = *reinterpret_cast<cudf::column_view const*>(input_column);
+    return cudf::jni::release_as_jlong(
+      spark_rapids_jni::float_to_string(cv, cudf::get_default_stream()));
+  }
+  CATCH_CAST_EXCEPTION(env, 0);
+}
+
+JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromFloatWithFormat(
+  JNIEnv* env, jclass, jlong input_column, jint digits)
+{
+  JNI_NULL_CHECK(env, input_column, "input column is null", 0);
+
+  try {
+    cudf::jni::auto_set_device(env);
+
+    auto const& cv = *reinterpret_cast<cudf::column_view const*>(input_column);
+    return cudf::jni::release_as_jlong(
+      spark_rapids_jni::format_float(cv, digits, cudf::get_default_stream()));
+  }
+  CATCH_CAST_EXCEPTION(env, 0);
+}
+
 JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromDecimal(JNIEnv* env,
                                                                                  jclass,
                                                                                  jlong input_column)
@@ -118,7 +149,7 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_CastStrings_fromDecimal
   try {
     cudf::jni::auto_set_device(env);
 
-    cudf::column_view cv{*reinterpret_cast<cudf::column_view const*>(input_column)};
+    auto const& cv = *reinterpret_cast<cudf::column_view const*>(input_column);
     return cudf::jni::release_as_jlong(
       spark_rapids_jni::decimal_to_non_ansi_string(cv, cudf::get_default_stream()));
   }

diff --git a/src/main/cpp/src/DecimalUtilsJni.cpp b/src/main/cpp/src/DecimalUtilsJni.cpp
@@ -19,8 +19,13 @@
 
 extern "C" {
 
-JNIEXPORT jlongArray JNICALL Java_com_nvidia_spark_rapids_jni_DecimalUtils_multiply128(
-  JNIEnv* env, jclass, jlong j_view_a, jlong j_view_b, jint j_product_scale)
+JNIEXPORT jlongArray JNICALL
+Java_com_nvidia_spark_rapids_jni_DecimalUtils_multiply128(JNIEnv* env,
+                                                          jclass,
+                                                          jlong j_view_a,
+                                                          jlong j_view_b,
+                                                          jint j_product_scale,
+                                                          bool cast_interim_result)
 {
   JNI_NULL_CHECK(env, j_view_a, "column is null", 0);
   JNI_NULL_CHECK(env, j_view_b, "column is null", 0);
@@ -30,7 +35,7 @@ JNIEXPORT jlongArray JNICALL Java_com_nvidia_spark_rapids_jni_DecimalUtils_multi
     auto view_b = reinterpret_cast<cudf::column_view const*>(j_view_b);
     auto scale  = static_cast<int>(j_product_scale);
     return cudf::jni::convert_table_for_return(
-      env, cudf::jni::multiply_decimal128(*view_a, *view_b, scale));
+      env, cudf::jni::multiply_decimal128(*view_a, *view_b, scale, cast_interim_result));
   }
   CATCH_STD(env, 0);
 }

diff --git a/src/main/cpp/src/ParseURIJni.cpp b/src/main/cpp/src/ParseURIJni.cpp
@@ -33,4 +33,63 @@ JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ParseURI_parseProtocol(
   }
   CATCH_STD(env, 0);
 }
+
+JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ParseURI_parseHost(JNIEnv* env,
+                                                                            jclass,
+                                                                            jlong input_column)
+{
+  JNI_NULL_CHECK(env, input_column, "input column is null", 0);
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const input = reinterpret_cast<cudf::column_view const*>(input_column);
+    return cudf::jni::ptr_as_jlong(spark_rapids_jni::parse_uri_to_host(*input).release());
+  }
+  CATCH_STD(env, 0);
+}
+
+JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ParseURI_parseQuery(JNIEnv* env,
+                                                                             jclass,
+                                                                             jlong input_column)
+{
+  JNI_NULL_CHECK(env, input_column, "input column is null", 0);
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const input = reinterpret_cast<cudf::column_view const*>(input_column);
+    return cudf::jni::ptr_as_jlong(spark_rapids_jni::parse_uri_to_query(*input).release());
+  }
+  CATCH_STD(env, 0);
+}
+
+JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ParseURI_parseQueryWithLiteral(
+  JNIEnv* env, jclass, jlong input_column, jstring query)
+{
+  JNI_NULL_CHECK(env, input_column, "input column is null", 0);
+  JNI_NULL_CHECK(env, query, "query is null", 0);
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const input = reinterpret_cast<cudf::column_view const*>(input_column);
+    cudf::jni::native_jstring native_query(env, query);
+    return cudf::jni::ptr_as_jlong(
+      spark_rapids_jni::parse_uri_to_query(*input, native_query.get()).release());
+  }
+  CATCH_STD(env, 0);
+}
+
+JNIEXPORT jlong JNICALL Java_com_nvidia_spark_rapids_jni_ParseURI_parseQueryWithColumn(
+  JNIEnv* env, jclass, jlong input_column, jlong query_column)
+{
+  JNI_NULL_CHECK(env, input_column, "input column is null", 0);
+  JNI_NULL_CHECK(env, query_column, "query column is null", 0);
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto const input = reinterpret_cast<cudf::column_view const*>(input_column);
+    auto const query = reinterpret_cast<cudf::column_view const*>(query_column);
+    return cudf::jni::ptr_as_jlong(spark_rapids_jni::parse_uri_to_query(*input, *query).release());
+  }
+  CATCH_STD(env, 0);
+}
 }