Skip to content

Commit b06f510

Browse files
authored
Use more pylibcudf.types instead of cudf._lib.types (#17619)
Contributes to #17317 Primary change is to use `pylibcudf.TypeId` instead of an ad-hoc one defined in `cudf._lib.types`. Additionally uses pylibcudf more consistently and inlines/removes some seldom uses/dead code Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: #17619
1 parent 3add496 commit b06f510

File tree

9 files changed

+87
-227
lines changed

9 files changed

+87
-227
lines changed

python/cudf/cudf/_lib/__init__.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,2 @@
11
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
2-
import numpy as np
3-
42
from . import strings_udf
5-
6-
MAX_COLUMN_SIZE = np.iinfo(np.int32).max
7-
MAX_COLUMN_SIZE_STR = "INT32_MAX"
8-
MAX_STRING_COLUMN_BYTES = np.iinfo(np.int32).max
9-
MAX_STRING_COLUMN_BYTES_STR = "INT32_MAX"

python/cudf/cudf/_lib/column.pyx

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,12 @@ from rmm.pylibrmm.device_buffer cimport DeviceBuffer
3131

3232
from cudf._lib.types cimport (
3333
dtype_from_column_view,
34-
dtype_to_data_type,
3534
dtype_to_pylibcudf_type,
3635
)
3736

3837
from cudf._lib.types import dtype_from_pylibcudf_column
3938

39+
from pylibcudf cimport DataType as plc_DataType
4040
cimport pylibcudf.libcudf.copying as cpp_copying
4141
cimport pylibcudf.libcudf.types as libcudf_types
4242
cimport pylibcudf.libcudf.unary as libcudf_unary
@@ -361,7 +361,7 @@ cdef class Column:
361361
col = self
362362
data_dtype = col.dtype
363363

364-
cdef libcudf_types.data_type dtype = dtype_to_data_type(data_dtype)
364+
cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype)
365365
cdef libcudf_types.size_type offset = self.offset
366366
cdef vector[mutable_column_view] children
367367
cdef void* data
@@ -398,7 +398,7 @@ cdef class Column:
398398
self._data = None
399399

400400
return mutable_column_view(
401-
dtype,
401+
dtype.c_obj,
402402
self.size,
403403
data,
404404
mask,
@@ -424,7 +424,7 @@ cdef class Column:
424424
col = self
425425
data_dtype = col.dtype
426426

427-
cdef libcudf_types.data_type dtype = dtype_to_data_type(data_dtype)
427+
cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype)
428428
cdef libcudf_types.size_type offset = self.offset
429429
cdef vector[column_view] children
430430
cdef void* data
@@ -450,7 +450,7 @@ cdef class Column:
450450
cdef libcudf_types.size_type c_null_count = null_count
451451

452452
return column_view(
453-
dtype,
453+
dtype.c_obj,
454454
self.size,
455455
data,
456456
mask,

python/cudf/cudf/_lib/scalar.pyx

Lines changed: 25 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,22 @@ from libcpp cimport bool
1010
from libcpp.memory cimport unique_ptr
1111
from libcpp.utility cimport move
1212

13-
import pylibcudf
13+
import pylibcudf as plc
1414

1515
import cudf
16-
from cudf._lib.types import LIBCUDF_TO_SUPPORTED_NUMPY_TYPES
1716
from cudf.core.dtypes import ListDtype, StructDtype
17+
from cudf._lib.types import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES
18+
from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id
1819
from cudf.core.missing import NA, NaT
1920

20-
cimport pylibcudf.libcudf.types as libcudf_types
2121
# We currently need this cimport because some of the implementations here
2222
# access the c_obj of the scalar, and because we need to be able to call
2323
# pylibcudf.Scalar.from_libcudf. Both of those are temporarily acceptable until
2424
# DeviceScalar is phased out entirely from cuDF Cython (at which point
2525
# cudf.Scalar will be directly backed by pylibcudf.Scalar).
26-
from pylibcudf cimport Scalar as plc_Scalar
26+
from pylibcudf cimport Scalar as plc_Scalar, type_id as plc_TypeID
2727
from pylibcudf.libcudf.scalar.scalar cimport list_scalar, scalar, struct_scalar
2828

29-
from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id
30-
3129

3230
def _replace_nested(obj, check, replacement):
3331
if isinstance(obj, list):
@@ -62,12 +60,12 @@ def gather_metadata(dtypes):
6260
"""
6361
out = []
6462
for name, dtype in dtypes.items():
65-
v = pylibcudf.interop.ColumnMetadata(name)
63+
v = plc.interop.ColumnMetadata(name)
6664
if isinstance(dtype, cudf.StructDtype):
6765
v.children_meta = gather_metadata(dtype.fields)
6866
elif isinstance(dtype, cudf.ListDtype):
6967
# Offsets column is unnamed and has no children
70-
v.children_meta.append(pylibcudf.interop.ColumnMetadata(""))
68+
v.children_meta.append(plc.interop.ColumnMetadata(""))
7169
v.children_meta.extend(
7270
gather_metadata({"": dtype.element_type})
7371
)
@@ -81,7 +79,7 @@ cdef class DeviceScalar:
8179
# that from_unique_ptr is implemented is probably dereferencing this in an
8280
# invalid state. See what the best way to fix that is.
8381
def __cinit__(self, *args, **kwargs):
84-
self.c_value = pylibcudf.Scalar.__new__(pylibcudf.Scalar)
82+
self.c_value = plc.Scalar.__new__(plc.Scalar)
8583

8684
def __init__(self, value, dtype):
8785
"""
@@ -127,20 +125,20 @@ cdef class DeviceScalar:
127125
pa_array = pa.array([pa.scalar(value, type=pa_type)])
128126

129127
pa_table = pa.Table.from_arrays([pa_array], names=[""])
130-
table = pylibcudf.interop.from_arrow(pa_table)
128+
table = plc.interop.from_arrow(pa_table)
131129

132130
column = table.columns()[0]
133131
if isinstance(dtype, cudf.core.dtypes.DecimalDtype):
134132
if isinstance(dtype, cudf.core.dtypes.Decimal32Dtype):
135-
column = pylibcudf.unary.cast(
136-
column, pylibcudf.DataType(pylibcudf.TypeId.DECIMAL32, -dtype.scale)
133+
column = plc.unary.cast(
134+
column, plc.DataType(plc.TypeId.DECIMAL32, -dtype.scale)
137135
)
138136
elif isinstance(dtype, cudf.core.dtypes.Decimal64Dtype):
139-
column = pylibcudf.unary.cast(
140-
column, pylibcudf.DataType(pylibcudf.TypeId.DECIMAL64, -dtype.scale)
137+
column = plc.unary.cast(
138+
column, plc.DataType(plc.TypeId.DECIMAL64, -dtype.scale)
141139
)
142140

143-
self.c_value = pylibcudf.copying.get_element(column, 0)
141+
self.c_value = plc.copying.get_element(column, 0)
144142
self._dtype = dtype
145143

146144
def _to_host_scalar(self):
@@ -150,7 +148,7 @@ cdef class DeviceScalar:
150148
null_type = NaT if is_datetime or is_timedelta else NA
151149

152150
metadata = gather_metadata({"": self.dtype})[0]
153-
ps = pylibcudf.interop.to_arrow(self.c_value, metadata)
151+
ps = plc.interop.to_arrow(self.c_value, metadata)
154152
if not ps.is_valid:
155153
return null_type
156154

@@ -225,43 +223,42 @@ cdef class DeviceScalar:
225223
return s
226224

227225
cdef void _set_dtype(self, dtype=None):
228-
cdef libcudf_types.data_type cdtype = self.get_raw_ptr()[0].type()
229-
226+
cdef plc_TypeID cdtype_id = self.c_value.type().id()
230227
if dtype is not None:
231228
self._dtype = dtype
232-
elif cdtype.id() in {
233-
libcudf_types.type_id.DECIMAL32,
234-
libcudf_types.type_id.DECIMAL64,
235-
libcudf_types.type_id.DECIMAL128,
229+
elif cdtype_id in {
230+
plc_TypeID.DECIMAL32,
231+
plc_TypeID.DECIMAL64,
232+
plc_TypeID.DECIMAL128,
236233
}:
237234
raise TypeError(
238235
"Must pass a dtype when constructing from a fixed-point scalar"
239236
)
240-
elif cdtype.id() == libcudf_types.type_id.STRUCT:
237+
elif cdtype_id == plc_TypeID.STRUCT:
241238
struct_table_view = (<struct_scalar*>self.get_raw_ptr())[0].view()
242239
self._dtype = StructDtype({
243240
str(i): dtype_from_column_view(struct_table_view.column(i))
244241
for i in range(struct_table_view.num_columns())
245242
})
246-
elif cdtype.id() == libcudf_types.type_id.LIST:
243+
elif cdtype_id == plc_TypeID.LIST:
247244
if (
248245
<list_scalar*>self.get_raw_ptr()
249-
)[0].view().type().id() == libcudf_types.type_id.LIST:
246+
)[0].view().type().id() == plc_TypeID.LIST:
250247
self._dtype = dtype_from_column_view(
251248
(<list_scalar*>self.get_raw_ptr())[0].view()
252249
)
253250
else:
254251
self._dtype = ListDtype(
255-
LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
252+
PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
256253
<underlying_type_t_type_id>(
257254
(<list_scalar*>self.get_raw_ptr())[0]
258255
.view().type().id()
259256
)
260257
]
261258
)
262259
else:
263-
self._dtype = LIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
264-
<underlying_type_t_type_id>(cdtype.id())
260+
self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[
261+
<underlying_type_t_type_id>(cdtype_id)
265262
]
266263

267264

python/cudf/cudf/_lib/types.pxd

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,11 @@
11
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
22

33
from libc.stdint cimport int32_t
4-
from libcpp cimport bool
54

6-
cimport pylibcudf.libcudf.types as libcudf_types
75
from pylibcudf.libcudf.column.column_view cimport column_view
8-
from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
96

107
ctypedef int32_t underlying_type_t_type_id
118

129
cdef dtype_from_column_view(column_view cv)
1310

14-
cdef libcudf_types.data_type dtype_to_data_type(dtype) except *
1511
cpdef dtype_to_pylibcudf_type(dtype)
16-
cdef bool is_decimal_type_id(libcudf_types.type_id tid) except *

0 commit comments

Comments
 (0)