Merge branch 'branch-25.04' into test-more-configs

rapidsai · Mar 8, 2025 · 5a866c2 · 5a866c2
2 parents 799a4a1 + 0e99ec3
commit 5a866c2
Show file tree

Hide file tree

Showing 13 changed files with 117 additions and 18 deletions.
diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
@@ -20,6 +20,9 @@ sccache --zero-stats
 RAPIDS_PACKAGE_VERSION=$(rapids-generate-version)
 export RAPIDS_PACKAGE_VERSION
 
+RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"}
+mkdir -p "${RAPIDS_ARTIFACTS_DIR}"
+
 source rapids-rattler-channel-string
 
 # --no-build-id allows for caching with `sccache`

diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
@@ -36,6 +36,7 @@ function sed_runner() {
 
 # Centralized version file update
 echo "${NEXT_FULL_TAG}" > VERSION
+echo "${NEXT_FULL_TAG}" > python/cudf/cudf/VERSION
 
 # Wheel testing script
 sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_dask_cudf.sh

diff --git a/conda/recipes/cudf/recipe.yaml b/conda/recipes/cudf/recipe.yaml
@@ -111,6 +111,8 @@ requirements:
           - cuda-cudart-dev
           - if: linux and x86_64
             then: libcufile-dev
+        else:
+          - cudatoolkit
     by_name:
       - cuda-version
 

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -1044,6 +1044,17 @@ if(CUDF_BUILD_TESTUTIL)
   )
   target_link_libraries(cudftestutil_impl INTERFACE cudf::cudftestutil)
 
+  # Base library for linking to cudf::cudftestutil, Note that targets should not directly link to
+  # cudf::cudftestutil_impl and should instead link to an OBJECT library that contains the compiled
+  # test utilities first
+  add_library(cudftestutil_objects OBJECT)
+  target_link_libraries(
+    cudftestutil_objects
+    PUBLIC cudf::cudftestutil GTest::gmock GTest::gmock_main GTest::gtest GTest::gtest_main
+    PRIVATE cudf::cudftestutil_impl
+  )
+  add_library(cudf::cudftestutil_objects ALIAS cudftestutil_objects)
+
   install(FILES tests/io/metadata_utilities.cpp DESTINATION src/cudftestutil/io)
   install(
     FILES tests/utilities/column_utilities.cu

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
@@ -99,7 +99,7 @@ function(ConfigureBench CMAKE_BENCH_NAME)
   )
   target_link_libraries(
     ${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen benchmark::benchmark_main
-                                cudf::cudftestutil_impl $<TARGET_NAME_IF_EXISTS:conda_env>
+                                cudf::cudftestutil_objects $<TARGET_NAME_IF_EXISTS:conda_env>
   )
   add_custom_command(
     OUTPUT CUDF_BENCHMARKS
@@ -129,7 +129,7 @@ function(ConfigureNVBench CMAKE_BENCH_NAME)
   target_link_libraries(
     ${CMAKE_BENCH_NAME}
     PRIVATE cudf_benchmark_common ndsh_data_generator cudf_datagen nvbench::nvbench
-            $<TARGET_NAME_IF_EXISTS:conda_env> cudf::cudftestutil_impl
+            $<TARGET_NAME_IF_EXISTS:conda_env> cudf::cudftestutil_objects
   )
   install(
     TARGETS ${CMAKE_BENCH_NAME}
@@ -360,6 +360,7 @@ ConfigureNVBench(TRANSFORM_NVBENCH transform/polynomials.cpp transform/transform
 # * nvtext benchmark -------------------------------------------------------------------
 ConfigureNVBench(
   TEXT_NVBENCH
+  text/byte_pair_encoding.cpp
   text/edit_distance.cpp
   text/hash_ngrams.cpp
   text/jaccard.cpp

diff --git a/cpp/benchmarks/text/byte_pair_encoding.cpp b/cpp/benchmarks/text/byte_pair_encoding.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+
+#include <cudf_test/column_wrapper.hpp>
+
+#include <cudf/strings/strings_column_view.hpp>
+
+#include <nvtext/byte_pair_encoding.hpp>
+
+#include <rmm/device_buffer.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+static void bench_byte_pair_encoding(nvbench::state& state)
+{
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
+
+  auto mpt         = cudf::test::strings_column_wrapper({
+    "e n",    // 14
+    "i t",    // 16
+    "i s",    // 17
+    "e s",    // 20
+    "en t",   // 44
+    "c e",    // 90
+    "es t",   // 141
+    "en ce",  // 340
+    "t h",    // 146
+    "h i",    // 5049
+    "th is",  // 5407
+    "t est",  // 9034
+    "s i",    // 13142
+    "s ent"   // 33832
+  });
+  auto merge_pairs = nvtext::load_merge_pairs(cudf::strings_column_view(mpt));
+
+  data_profile const strings_profile = data_profile_builder().distribution(
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
+  auto const strings_table =
+    create_random_table({cudf::type_id::STRING}, row_count{num_rows}, strings_profile);
+  cudf::strings_column_view input(strings_table->view().column(0));
+
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+
+  auto chars_size = input.chars_size(cudf::get_default_stream());
+  state.add_global_memory_reads<nvbench::int8_t>(chars_size);
+  // output are integers (one per row)
+  state.add_global_memory_writes<nvbench::int32_t>(num_rows);
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto result = nvtext::byte_pair_encoding(input, *merge_pairs);
+  });
+}
+
+NVBENCH_BENCH(bench_byte_pair_encoding)
+  .set_name("byte_pair_encoding")
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144});
diff --git a/cpp/src/text/bpe/load_merge_pairs.cu b/cpp/src/text/bpe/load_merge_pairs.cu
@@ -44,8 +44,10 @@ namespace {
 std::unique_ptr<detail::merge_pairs_map_type> initialize_merge_pairs_map(
   cudf::column_device_view const& input, rmm::cuda_stream_view stream)
 {
+  auto const elements  = input.size() / 2;
   auto merge_pairs_map = std::make_unique<merge_pairs_map_type>(
-    static_cast<size_t>(input.size()),
+    static_cast<size_t>(elements),
+    cudf::detail::CUCO_DESIRED_LOAD_FACTOR,
     cuco::empty_key{-1},
     cuco::empty_value{-1},
     bpe_equal{input},
@@ -60,7 +62,7 @@ std::unique_ptr<detail::merge_pairs_map_type> initialize_merge_pairs_map(
     cuda::proclaim_return_type<cuco::pair<cudf::size_type, cudf::size_type>>(
       [] __device__(cudf::size_type idx) { return cuco::make_pair(idx, idx); }));
 
-  merge_pairs_map->insert_async(iter, iter + (input.size() / 2), stream.value());
+  merge_pairs_map->insert_async(iter, iter + elements, stream.value());
 
   return merge_pairs_map;
 }
@@ -70,6 +72,7 @@ std::unique_ptr<detail::mp_table_map_type> initialize_mp_table_map(
 {
   auto mp_table_map = std::make_unique<mp_table_map_type>(
     static_cast<size_t>(input.size()),
+    cudf::detail::CUCO_DESIRED_LOAD_FACTOR,
     cuco::empty_key{-1},
     cuco::empty_value{-1},
     mp_equal{input},

diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
@@ -55,16 +55,8 @@ function(ConfigureTest CMAKE_TEST_NAME)
   )
 
   target_link_libraries(
-    ${CMAKE_TEST_NAME}
-    PRIVATE cudf::cudftestutil
-            cudf::cudftestutil_impl
-            GTest::gmock
-            GTest::gmock_main
-            GTest::gtest
-            GTest::gtest_main
-            nvtx3::nvtx3-cpp
-            $<TARGET_NAME_IF_EXISTS:conda_env>
-            "${_CUDF_TEST_EXTRA_LIBS}"
+    ${CMAKE_TEST_NAME} PRIVATE cudf::cudftestutil_objects nvtx3::nvtx3-cpp
+                               $<TARGET_NAME_IF_EXISTS:conda_env> "${_CUDF_TEST_EXTRA_LIBS}"
   )
   rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ${CUDA_STATIC_RUNTIME})
   rapids_test_add(

diff --git a/docs/cudf/source/pylibcudf/api_docs/contiguous_split.rst b/docs/cudf/source/pylibcudf/api_docs/contiguous_split.rst
@@ -0,0 +1,6 @@
+================
+contiguous_split
+================
+
+.. automodule:: pylibcudf.contiguous_split
+   :members:
diff --git a/docs/cudf/source/pylibcudf/api_docs/index.rst b/docs/cudf/source/pylibcudf/api_docs/index.rst
@@ -13,6 +13,7 @@ This page provides API documentation for pylibcudf.
     column
     column_factories
     concatenate
+    contiguous_split
     copying
     datetime
     expressions

diff --git a/python/cudf/cudf/VERSION b/python/cudf/cudf/VERSION
diff --git a/python/cudf/cudf/VERSION b/python/cudf/cudf/VERSION
@@ -0,0 +1 @@
+25.04.00
diff --git a/python/cudf/cudf/tests/test_array_ufunc.py b/python/cudf/cudf/tests/test_array_ufunc.py
@@ -91,7 +91,8 @@ def test_ufunc_index(request, ufunc):
     request.applymarker(
         pytest.mark.xfail(
             condition=fname in {"ceil", "floor", "trunc"}
-            and parse(np.__version__) >= parse("2.1"),
+            and parse(np.__version__) >= parse("2.1")
+            and parse(cp.__version__) < parse("14"),
             reason="https://github.com/cupy/cupy/issues/9018",
         )
     )
@@ -401,7 +402,8 @@ def test_ufunc_dataframe(request, ufunc, has_nulls, indexed):
         pytest.mark.xfail(
             condition=fname in {"ceil", "floor", "trunc"}
             and not has_nulls
-            and parse(np.__version__) >= parse("2.1"),
+            and parse(np.__version__) >= parse("2.1")
+            and parse(cp.__version__) < parse("14"),
             reason="https://github.com/cupy/cupy/issues/9018",
         )
     )

diff --git a/python/cudf/cudf/tests/test_unaops.py b/python/cudf/cudf/tests/test_unaops.py
@@ -5,6 +5,7 @@
 import re
 from decimal import Decimal
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pytest
@@ -89,7 +90,8 @@ def test_scalar_unary_operations(slr, dtype, op, request):
         pytest.mark.xfail(
             condition=op in {np.ceil, np.floor}
             and not isinstance(slr, float)
-            and parse(np.__version__) >= parse("2.1"),
+            and parse(np.__version__) >= parse("2.1")
+            and parse(cp.__version__) < parse("14.0"),
             reason="https://github.com/cupy/cupy/issues/9018",
         )
     )