[MNT] Small NumPy 2 related fixes (#5954)

seberg · web-flow · commit 4338268b7731 · 2024-07-28T16:34:33.000Z
This applies some smaller NumPy 2 related fixes. With (in progress) cupy 13.2 fixups, the single gpu test suite seems to be doing mostly fine. There is a single test remaining: ``` test_simpl_set.py::test_simplicial_set_embedding ``` is failing with: ``` (Pdb) cp.asarray(cu_embedding) array([[23067.518, 23067.518], [17334.559, 17334.559], [22713.598, 22713.598], ..., [23238.438, 23238.438], [25416.912, 25416.912], [19748.943, 19748.943]], dtype=float32) ``` being completely different from the reference: ``` array([[5.330462 , 4.3419437], [4.1822557, 5.6225405], [5.200859 , 4.530094 ], ..., [4.852359 , 5.0026293], [5.361374 , 4.1475334], [4.0259256, 5.7187223]], dtype=float32) ``` And I am not sure why that might be, I will prod it a bit more, but it may need someone who knows the methods to have a look. One wrinkle is that hdbscan is not yet released for NumPy 2, but I guess that still required even though sklearn has a version? (Probably, not a big issue, but my fixups scikit-learn-contrib/hdbscan#644 run into some issue even though it doesn't seem NumPy 2 related.) xref: rapidsai/build-planning#38 Authors: - Sebastian Berg (https://github.com/seberg) - https://github.com/jakirkham - Dante Gama Dessavre (https://github.com/dantegd) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Dante Gama Dessavre (https://github.com/dantegd) URL: #5954
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -73,7 +73,8 @@ repos:
                 setup[.]cfg$
           exclude: |
             (?x)
-                cpp/src/tsne/cannylab/bh[.]cu$
+                cpp/src/tsne/cannylab/bh[.]cu$|
+                python/cuml/cuml/_thirdparty
         - id: verify-alpha-spec
     - repo: https://github.com/rapidsai/dependency-file-generator
       rev: v1.13.11
diff --git a/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py b/python/cuml/cuml/_thirdparty/sklearn/utils/sparsefuncs.py
@@ -214,7 +214,7 @@ def _sparse_min_or_max(X, axis, min_or_max):
         if np.isnan(m):
             if 'nan' in min_or_max:
                 m = 0
-        elif X.nnz != cpu_np.product(X.shape):
+        elif X.nnz != cpu_np.prod(X.shape):
             if 'min' in min_or_max:
                 m = m if m <= 0 else 0
             else:
diff --git a/python/cuml/cuml/internals/array.py b/python/cuml/cuml/internals/array.py
@@ -1163,12 +1163,16 @@ def from_input(
         if (
             not fail_on_order and order != arr.order and order != "K"
         ) or make_copy:
-            arr = cls(
-                arr.mem_type.xpy.array(
-                    arr.to_output("array"), order=order, copy=make_copy
-                ),
-                index=index,
-            )
+            if make_copy:
+                data = arr.mem_type.xpy.array(
+                    arr.to_output("array"), order=order
+                )
+            else:
+                data = arr.mem_type.xpy.asarray(
+                    arr.to_output("array"), order=order
+                )
+
+            arr = cls(data, index=index)
 
         n_rows = arr.shape[0]
 
diff --git a/python/cuml/cuml/tests/test_make_classification.py b/python/cuml/cuml/tests/test_make_classification.py
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -115,7 +115,7 @@ def test_make_classification_informative_features():
 
             # Cluster by sign, viewed as strings to allow uniquing
             signs = np.sign(cp.asnumpy(X))
-            signs = signs.view(dtype="|S{0}".format(signs.strides[0]))
+            signs = signs.view(dtype="|S{0}".format(signs.strides[0])).ravel()
             unique_signs, cluster_index = np.unique(signs, return_inverse=True)
 
             assert (
diff --git a/python/cuml/cuml/tests/test_metrics.py b/python/cuml/cuml/tests/test_metrics.py
@@ -1065,7 +1065,7 @@ def test_pairwise_distances(metric: str, matrix_size, is_col_major):
     cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)
 
     # Change precision of one parameter
-    Y = np.asfarray(Y, dtype=np.float32)
+    Y = np.asarray(Y, dtype=np.float32)
     S = pairwise_distances(X, Y, metric=metric)
     S2 = ref_dense_pairwise_dist(X, Y, metric=metric)
     cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)
@@ -1074,8 +1074,8 @@ def test_pairwise_distances(metric: str, matrix_size, is_col_major):
     compare_precision = 2
 
     # Change precision of both parameters to float
-    X = np.asfarray(X, dtype=np.float32)
-    Y = np.asfarray(Y, dtype=np.float32)
+    X = np.asarray(X, dtype=np.float32)
+    Y = np.asarray(Y, dtype=np.float32)
     S = pairwise_distances(X, Y, metric=metric)
     S2 = ref_dense_pairwise_dist(X, Y, metric=metric)
     cp.testing.assert_array_almost_equal(S, S2, decimal=compare_precision)
@@ -1132,8 +1132,8 @@ def test_pairwise_distances_sklearn_comparison(metric: str, matrix_size):
     # For fp32, compare at 4 decimals, (3 places less than the ~7 max)
     compare_precision = 4
 
-    X = np.asfarray(X, dtype=np.float32)
-    Y = np.asfarray(Y, dtype=np.float32)
+    X = np.asarray(X, dtype=np.float32)
+    Y = np.asarray(Y, dtype=np.float32)
 
     # Compare to sklearn, fp32
     S = pairwise_distances(X, Y, metric=metric)
@@ -1228,7 +1228,7 @@ def test_pairwise_distances_exceptions():
 
     X_int = rng.randint(10, size=(5, 4))
     X_double = rng.random_sample((5, 4))
-    X_float = np.asfarray(X_double, dtype=np.float32)
+    X_float = np.asarray(X_double, dtype=np.float32)
     X_bool = rng.choice([True, False], size=(5, 4))
 
     # Test int inputs (only float/double accepted at this time)
diff --git a/python/cuml/cuml/tests/test_umap.py b/python/cuml/cuml/tests/test_umap.py
@@ -420,9 +420,9 @@ def get_embedding(n_components, random_state):
         )
         return reducer.fit_transform(data, convert_dtype=True)
 
-    state = copy.copy(random_state)
+    state = copy.deepcopy(random_state)
     cuml_embedding1 = get_embedding(n_components, state)
-    state = copy.copy(random_state)
+    state = copy.deepcopy(random_state)
     cuml_embedding2 = get_embedding(n_components, state)
 
     assert not np.isnan(cuml_embedding1).any()
@@ -475,9 +475,9 @@ def get_embedding(n_components, random_state):
         reducer.fit(fit_data, convert_dtype=True)
         return reducer.transform(transform_data, convert_dtype=True)
 
-    state = copy.copy(random_state)
+    state = copy.deepcopy(random_state)
     cuml_embedding1 = get_embedding(n_components, state)
-    state = copy.copy(random_state)
+    state = copy.deepcopy(random_state)
     cuml_embedding2 = get_embedding(n_components, state)
 
     assert not np.isnan(cuml_embedding1).any()

Original file line number	Diff line number	Diff line change
`@@ -1,5 +1,5 @@`
`1`	`1`	`#`
`2`		`-# Copyright (c) 2020-2023, NVIDIA CORPORATION.`
	`2`	`+# Copyright (c) 2020-2024, NVIDIA CORPORATION.`
`3`	`3`	`#`
`4`	`4`	`# Licensed under the Apache License, Version 2.0 (the "License");`
`5`	`5`	`# you may not use this file except in compliance with the License.`
`@@ -115,7 +115,7 @@ def test_make_classification_informative_features():`
`115`	`115`
`116`	`116`	`# Cluster by sign, viewed as strings to allow uniquing`
`117`	`117`	`signs = np.sign(cp.asnumpy(X))`
`118`		`- signs = signs.view(dtype="\|S{0}".format(signs.strides[0]))`
	`118`	`+ signs = signs.view(dtype="\|S{0}".format(signs.strides[0])).ravel()`
`119`	`119`	`unique_signs, cluster_index = np.unique(signs, return_inverse=True)`
`120`	`120`
`121`	`121`	`assert (`