Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simple NumPy 2 fixes that are clearly no behavior change #15876

Merged
merged 5 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/buffer/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def from_host_memory(cls, data: Any) -> Self:
"""Create an owner from a buffer or array like object

Data must implement `__array_interface__`, the buffer protocol, and/or
be convertible to a buffer object using `numpy.array()`
be convertible to a buffer object using `numpy.asanyarray()`

The host memory is copied to a new device allocation.

Expand All @@ -209,7 +209,7 @@ def from_host_memory(cls, data: Any) -> Self:
"""

# Convert to numpy array, this will not copy data in most cases.
ary = numpy.array(data, copy=False, subok=True)
ary = numpy.asanyarray(data)
# Extract pointer and size
ptr, size = get_ptr_and_size(ary.__array_interface__)
# Copy to device memory
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/buffer/spillable_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def from_host_memory(cls, data: Any) -> Self:
"""Create a spillabe buffer from host memory.

Data must implement `__array_interface__`, the buffer protocol, and/or
be convertible to a buffer object using `numpy.array()`
be convertible to a buffer object using `numpy.asanyarray()`

The new buffer is marked as spilled to host memory already.

Expand All @@ -165,7 +165,7 @@ def from_host_memory(cls, data: Any) -> Self:

# Convert to a memoryview using numpy array, this will not copy data
# in most cases.
data = memoryview(numpy.array(data, copy=False, subok=True))
data = memoryview(numpy.asanyarray(data))
if not data.c_contiguous:
raise ValueError("Buffer data must be C-contiguous")
data = data.cast("B") # Make sure itemsize==1
Expand Down
8 changes: 3 additions & 5 deletions python/cudf/cudf/tests/test_hash_vocab.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2022, NVIDIA CORPORATION.
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
import filecmp
import os
import warnings
Expand All @@ -21,9 +21,7 @@ def test_correct_bert_base_vocab_hash(datadir, tmpdir):

groundtruth_path = os.path.join(datadir, "vocab-hash.txt")
output_path = tmpdir.join("cudf-vocab-hash.txt")
with warnings.catch_warnings():
# See https://github.com/rapidsai/cudf/issues/12403
warnings.simplefilter(action="ignore", category=RuntimeWarning)
hash_vocab(vocab_path, output_path)
warnings.simplefilter(action="ignore", category=RuntimeWarning)
hash_vocab(vocab_path, output_path)

assert filecmp.cmp(output_path, groundtruth_path, shallow=False)
2 changes: 1 addition & 1 deletion python/cudf/cudf/tests/test_numerical.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_can_cast_safely_same_kind():
assert data.can_cast_safely(to_dtype)

data = cudf.Series(
[np.finfo("float32").max * 2, 1.0], dtype="float64"
[float(np.finfo("float32").max) * 2, 1.0], dtype="float64"
)._column
to_dtype = np.dtype("float32")
assert not data.can_cast_safely(to_dtype)
Expand Down
51 changes: 14 additions & 37 deletions python/cudf/cudf/tests/test_replace.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

import operator
import re
from decimal import Decimal

Expand Down Expand Up @@ -825,43 +826,23 @@ def test_series_fillna_invalid_dtype(data_dtype):

@pytest.mark.parametrize("data_dtype", NUMERIC_TYPES)
@pytest.mark.parametrize("fill_value", [100, 100.0, 128.5])
def test_series_where(data_dtype, fill_value):
@pytest.mark.parametrize("op", [operator.gt, operator.eq, operator.lt])
def test_series_where(data_dtype, fill_value, op):
psr = pd.Series(list(range(10)), dtype=data_dtype)
sr = cudf.from_pandas(psr)

if sr.dtype.type(fill_value) != fill_value:
with pytest.raises(TypeError):
sr.where(sr > 0, fill_value)
else:
# Cast back to original dtype as pandas automatically upcasts
expect = psr.where(psr > 0, fill_value)
got = sr.where(sr > 0, fill_value)
# pandas returns 'float16' dtype, which is not supported in cudf
assert_eq(
expect,
got,
check_dtype=expect.dtype.kind not in ("f"),
)
try:
scalar_fits = sr.dtype.type(fill_value) == fill_value
except OverflowError:
scalar_fits = False

if sr.dtype.type(fill_value) != fill_value:
if not scalar_fits:
with pytest.raises(TypeError):
sr.where(sr < 0, fill_value)
sr.where(op(sr, 0), fill_value)
else:
expect = psr.where(psr < 0, fill_value)
got = sr.where(sr < 0, fill_value)
# pandas returns 'float16' dtype, which is not supported in cudf
assert_eq(
expect,
got,
check_dtype=expect.dtype.kind not in ("f"),
)

if sr.dtype.type(fill_value) != fill_value:
with pytest.raises(TypeError):
sr.where(sr == 0, fill_value)
else:
expect = psr.where(psr == 0, fill_value)
got = sr.where(sr == 0, fill_value)
# Cast back to original dtype as pandas automatically upcasts
expect = psr.where(op(psr, 0), fill_value)
got = sr.where(op(sr, 0), fill_value)
# pandas returns 'float16' dtype, which is not supported in cudf
assert_eq(
expect,
Expand Down Expand Up @@ -985,12 +966,8 @@ def test_numeric_series_replace_dtype(series_dtype, replacement):
psr = pd.Series([0, 1, 2, 3, 4, 5], dtype=series_dtype)
sr = cudf.from_pandas(psr)

if sr.dtype.kind in "ui":
can_replace = np.array([replacement])[0].is_integer() and np.can_cast(
int(replacement), sr.dtype
)
else:
can_replace = np.can_cast(replacement, sr.dtype)
numpy_replacement = np.array(replacement).astype(sr.dtype)[()]
can_replace = numpy_replacement == replacement

# Both Scalar
if not can_replace:
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/tests/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,8 @@ def test_series_argsort(nelem, dtype, asc):
if asc:
expected = np.argsort(sr.to_numpy(), kind="mergesort")
else:
expected = np.argsort(sr.to_numpy() * -1, kind="mergesort")
# -1 multiply works around missing desc sort (may promote to float64)
expected = np.argsort(sr.to_numpy() * np.int8(-1), kind="mergesort")
wence- marked this conversation as resolved.
Show resolved Hide resolved
np.testing.assert_array_equal(expected, res.to_numpy())


Expand Down
25 changes: 13 additions & 12 deletions python/cudf/cudf/utils/hash_vocab_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

# Coefficients ranges for inner hash - This are important to set to be
# large so that we have randomness in the bottom bits when modding
A_SECOND_LEVEL_POW = np.uint8(48)
B_SECOND_LEVEL_POW = np.uint8(7)
A_SECOND_LEVEL_POW = np.uint64(48)
B_SECOND_LEVEL_POW = np.uint64(7)

A_LBOUND_SECOND_LEVEL_HASH = 2**16
A_HBOUND_SECOND_LEVEL_HASH = 2**A_SECOND_LEVEL_POW
Expand All @@ -23,11 +23,11 @@


# Shifts for bit packing
A_SECOND_LEVEL_SHIFT_AMT = np.uint8(64 - A_SECOND_LEVEL_POW)
B_SECOND_LEVEL_SHIFT_AMT = np.uint8(
A_SECOND_LEVEL_SHIFT_AMT = np.uint64(64 - A_SECOND_LEVEL_POW)
wence- marked this conversation as resolved.
Show resolved Hide resolved
B_SECOND_LEVEL_SHIFT_AMT = np.uint64(
64 - A_SECOND_LEVEL_POW - B_SECOND_LEVEL_POW
)
BITS_FOR_INNER_TABLE_SIZE = np.uint8(8)
BITS_FOR_INNER_TABLE_SIZE = np.uint64(8)

NOT_FOUND = -1

Expand Down Expand Up @@ -94,7 +94,8 @@ def _find_hash_for_internal(hash_bin):

while True:
a = np.random.randint(
A_LBOUND_SECOND_LEVEL_HASH, A_HBOUND_SECOND_LEVEL_HASH
A_LBOUND_SECOND_LEVEL_HASH,
A_HBOUND_SECOND_LEVEL_HASH,
)
b = np.random.randint(
B_LBOUND_SECOND_LEVEL_HASH, B_HBOUND_SECOND_LEVEL_HASH
Expand Down Expand Up @@ -130,13 +131,13 @@ def _perfect_hash(integers, max_constant):
bin_length = len(internal_table)
max_bin_length = max(bin_length, max_bin_length)
internal_table_coeffs[i] = (
coeff_a << A_SECOND_LEVEL_SHIFT_AMT
| coeff_b << B_SECOND_LEVEL_SHIFT_AMT
| bin_length
)
offset_into_flattened_table[i + 1] = (
offset_into_flattened_table[i] + bin_length
np.uint64(coeff_a) << A_SECOND_LEVEL_SHIFT_AMT
wence- marked this conversation as resolved.
Show resolved Hide resolved
| np.uint64(coeff_b) << B_SECOND_LEVEL_SHIFT_AMT
| np.uint64(bin_length)
)
offset_into_flattened_table[i + 1] = offset_into_flattened_table[
i
] + np.uint64(bin_length)
flattened_bins.extend(internal_table)

print(
Expand Down
Loading