Skip to content

Commit

Permalink
Merge branch 'branch-25.04' into test-more-configs
Browse files Browse the repository at this point in the history
  • Loading branch information
bdice authored Mar 8, 2025
2 parents 799a4a1 + 0e99ec3 commit 5a866c2
Show file tree
Hide file tree
Showing 13 changed files with 117 additions and 18 deletions.
3 changes: 3 additions & 0 deletions ci/build_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ sccache --zero-stats
RAPIDS_PACKAGE_VERSION=$(rapids-generate-version)
export RAPIDS_PACKAGE_VERSION

RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"}
mkdir -p "${RAPIDS_ARTIFACTS_DIR}"

source rapids-rattler-channel-string

# --no-build-id allows for caching with `sccache`
Expand Down
1 change: 1 addition & 0 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ function sed_runner() {

# Centralized version file update
echo "${NEXT_FULL_TAG}" > VERSION
echo "${NEXT_FULL_TAG}" > python/cudf/cudf/VERSION

# Wheel testing script
sed_runner "s/branch-.*/branch-${NEXT_SHORT_TAG}/g" ci/test_wheel_dask_cudf.sh
Expand Down
2 changes: 2 additions & 0 deletions conda/recipes/cudf/recipe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ requirements:
- cuda-cudart-dev
- if: linux and x86_64
then: libcufile-dev
else:
- cudatoolkit
by_name:
- cuda-version

Expand Down
11 changes: 11 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1044,6 +1044,17 @@ if(CUDF_BUILD_TESTUTIL)
)
target_link_libraries(cudftestutil_impl INTERFACE cudf::cudftestutil)

# Base library for linking to cudf::cudftestutil, Note that targets should not directly link to
# cudf::cudftestutil_impl and should instead link to an OBJECT library that contains the compiled
# test utilities first
add_library(cudftestutil_objects OBJECT)
target_link_libraries(
cudftestutil_objects
PUBLIC cudf::cudftestutil GTest::gmock GTest::gmock_main GTest::gtest GTest::gtest_main
PRIVATE cudf::cudftestutil_impl
)
add_library(cudf::cudftestutil_objects ALIAS cudftestutil_objects)

install(FILES tests/io/metadata_utilities.cpp DESTINATION src/cudftestutil/io)
install(
FILES tests/utilities/column_utilities.cu
Expand Down
5 changes: 3 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ function(ConfigureBench CMAKE_BENCH_NAME)
)
target_link_libraries(
${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen benchmark::benchmark_main
cudf::cudftestutil_impl $<TARGET_NAME_IF_EXISTS:conda_env>
cudf::cudftestutil_objects $<TARGET_NAME_IF_EXISTS:conda_env>
)
add_custom_command(
OUTPUT CUDF_BENCHMARKS
Expand Down Expand Up @@ -129,7 +129,7 @@ function(ConfigureNVBench CMAKE_BENCH_NAME)
target_link_libraries(
${CMAKE_BENCH_NAME}
PRIVATE cudf_benchmark_common ndsh_data_generator cudf_datagen nvbench::nvbench
$<TARGET_NAME_IF_EXISTS:conda_env> cudf::cudftestutil_impl
$<TARGET_NAME_IF_EXISTS:conda_env> cudf::cudftestutil_objects
)
install(
TARGETS ${CMAKE_BENCH_NAME}
Expand Down Expand Up @@ -360,6 +360,7 @@ ConfigureNVBench(TRANSFORM_NVBENCH transform/polynomials.cpp transform/transform
# * nvtext benchmark -------------------------------------------------------------------
ConfigureNVBench(
TEXT_NVBENCH
text/byte_pair_encoding.cpp
text/edit_distance.cpp
text/hash_ngrams.cpp
text/jaccard.cpp
Expand Down
75 changes: 75 additions & 0 deletions cpp/benchmarks/text/byte_pair_encoding.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>

#include <cudf_test/column_wrapper.hpp>

#include <cudf/strings/strings_column_view.hpp>

#include <nvtext/byte_pair_encoding.hpp>

#include <rmm/device_buffer.hpp>

#include <nvbench/nvbench.cuh>

static void bench_byte_pair_encoding(nvbench::state& state)
{
auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));

auto mpt = cudf::test::strings_column_wrapper({
"e n", // 14
"i t", // 16
"i s", // 17
"e s", // 20
"en t", // 44
"c e", // 90
"es t", // 141
"en ce", // 340
"t h", // 146
"h i", // 5049
"th is", // 5407
"t est", // 9034
"s i", // 13142
"s ent" // 33832
});
auto merge_pairs = nvtext::load_merge_pairs(cudf::strings_column_view(mpt));

data_profile const strings_profile = data_profile_builder().distribution(
cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
auto const strings_table =
create_random_table({cudf::type_id::STRING}, row_count{num_rows}, strings_profile);
cudf::strings_column_view input(strings_table->view().column(0));

state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));

auto chars_size = input.chars_size(cudf::get_default_stream());
state.add_global_memory_reads<nvbench::int8_t>(chars_size);
// output are integers (one per row)
state.add_global_memory_writes<nvbench::int32_t>(num_rows);

state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
auto result = nvtext::byte_pair_encoding(input, *merge_pairs);
});
}

NVBENCH_BENCH(bench_byte_pair_encoding)
.set_name("byte_pair_encoding")
.add_int64_axis("min_width", {0})
.add_int64_axis("max_width", {32, 64, 128, 256})
.add_int64_axis("num_rows", {32768, 262144});
7 changes: 5 additions & 2 deletions cpp/src/text/bpe/load_merge_pairs.cu
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,10 @@ namespace {
std::unique_ptr<detail::merge_pairs_map_type> initialize_merge_pairs_map(
cudf::column_device_view const& input, rmm::cuda_stream_view stream)
{
auto const elements = input.size() / 2;
auto merge_pairs_map = std::make_unique<merge_pairs_map_type>(
static_cast<size_t>(input.size()),
static_cast<size_t>(elements),
cudf::detail::CUCO_DESIRED_LOAD_FACTOR,
cuco::empty_key{-1},
cuco::empty_value{-1},
bpe_equal{input},
Expand All @@ -60,7 +62,7 @@ std::unique_ptr<detail::merge_pairs_map_type> initialize_merge_pairs_map(
cuda::proclaim_return_type<cuco::pair<cudf::size_type, cudf::size_type>>(
[] __device__(cudf::size_type idx) { return cuco::make_pair(idx, idx); }));

merge_pairs_map->insert_async(iter, iter + (input.size() / 2), stream.value());
merge_pairs_map->insert_async(iter, iter + elements, stream.value());

return merge_pairs_map;
}
Expand All @@ -70,6 +72,7 @@ std::unique_ptr<detail::mp_table_map_type> initialize_mp_table_map(
{
auto mp_table_map = std::make_unique<mp_table_map_type>(
static_cast<size_t>(input.size()),
cudf::detail::CUCO_DESIRED_LOAD_FACTOR,
cuco::empty_key{-1},
cuco::empty_value{-1},
mp_equal{input},
Expand Down
12 changes: 2 additions & 10 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,8 @@ function(ConfigureTest CMAKE_TEST_NAME)
)

target_link_libraries(
${CMAKE_TEST_NAME}
PRIVATE cudf::cudftestutil
cudf::cudftestutil_impl
GTest::gmock
GTest::gmock_main
GTest::gtest
GTest::gtest_main
nvtx3::nvtx3-cpp
$<TARGET_NAME_IF_EXISTS:conda_env>
"${_CUDF_TEST_EXTRA_LIBS}"
${CMAKE_TEST_NAME} PRIVATE cudf::cudftestutil_objects nvtx3::nvtx3-cpp
$<TARGET_NAME_IF_EXISTS:conda_env> "${_CUDF_TEST_EXTRA_LIBS}"
)
rapids_cuda_set_runtime(${CMAKE_TEST_NAME} USE_STATIC ${CUDA_STATIC_RUNTIME})
rapids_test_add(
Expand Down
6 changes: 6 additions & 0 deletions docs/cudf/source/pylibcudf/api_docs/contiguous_split.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
================
contiguous_split
================

.. automodule:: pylibcudf.contiguous_split
:members:
1 change: 1 addition & 0 deletions docs/cudf/source/pylibcudf/api_docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ This page provides API documentation for pylibcudf.
column
column_factories
concatenate
contiguous_split
copying
datetime
expressions
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/VERSION

This file was deleted.

1 change: 1 addition & 0 deletions python/cudf/cudf/VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
25.04.00
6 changes: 4 additions & 2 deletions python/cudf/cudf/tests/test_array_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ def test_ufunc_index(request, ufunc):
request.applymarker(
pytest.mark.xfail(
condition=fname in {"ceil", "floor", "trunc"}
and parse(np.__version__) >= parse("2.1"),
and parse(np.__version__) >= parse("2.1")
and parse(cp.__version__) < parse("14"),
reason="https://github.com/cupy/cupy/issues/9018",
)
)
Expand Down Expand Up @@ -401,7 +402,8 @@ def test_ufunc_dataframe(request, ufunc, has_nulls, indexed):
pytest.mark.xfail(
condition=fname in {"ceil", "floor", "trunc"}
and not has_nulls
and parse(np.__version__) >= parse("2.1"),
and parse(np.__version__) >= parse("2.1")
and parse(cp.__version__) < parse("14"),
reason="https://github.com/cupy/cupy/issues/9018",
)
)
Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/tests/test_unaops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import re
from decimal import Decimal

import cupy as cp
import numpy as np
import pandas as pd
import pytest
Expand Down Expand Up @@ -89,7 +90,8 @@ def test_scalar_unary_operations(slr, dtype, op, request):
pytest.mark.xfail(
condition=op in {np.ceil, np.floor}
and not isinstance(slr, float)
and parse(np.__version__) >= parse("2.1"),
and parse(np.__version__) >= parse("2.1")
and parse(cp.__version__) < parse("14.0"),
reason="https://github.com/cupy/cupy/issues/9018",
)
)
Expand Down

0 comments on commit 5a866c2

Please sign in to comment.