diff --git a/doc/source/history.rst b/doc/source/history.rst index 25336b0..be1ab18 100644 --- a/doc/source/history.rst +++ b/doc/source/history.rst @@ -6,8 +6,12 @@ Release history 0.8.0 (current development version) =================================== +* CHANGED: `dest_offset` renamed to `dst_offset` to match `pyopencl`/`pycuda` API. + * FIXED: register ``numpy.bool`` in addition to ``bool`` - it is a separate type now (@perdigao1). +* FIXED: incompatibility with modern versions of `pyopencl`. + 0.7.6 (20 Nov 2021) =================== diff --git a/reikna/cluda/api.py b/reikna/cluda/api.py index 6b2bd9f..1ed8dbe 100644 --- a/reikna/cluda/api.py +++ b/reikna/cluda/api.py @@ -461,13 +461,13 @@ def from_device(self, arr, dest=None, async_=False): """ raise NotImplementedError() - def copy_array(self, arr, dest=None, src_offset=0, dest_offset=0, size=None): + def copy_array(self, arr, dest=None, src_offset=0, dst_offset=0, size=None): """ Copies array on device. :param dest: the effect is the same as in :py:meth:`to_device`. :param src_offset: offset (in items of ``arr.dtype``) in the source array. - :param dest_offset: offset (in items of ``arr.dtype``) in the destination array. + :param dst_offset: offset (in items of ``arr.dtype``) in the destination array. :param size: how many elements of ``arr.dtype`` to copy. """ @@ -479,10 +479,10 @@ def copy_array(self, arr, dest=None, src_offset=0, dest_offset=0, size=None): itemsize = arr.dtype.itemsize nbytes = arr.nbytes if size is None else itemsize * size src_offset *= itemsize - dest_offset *= itemsize + dst_offset *= itemsize self._copy_array_buffer(arr_device, arr, - nbytes, src_offset=src_offset, dest_offset=dest_offset) + nbytes, src_offset=src_offset, dst_offset=dst_offset) self._synchronize() if dest is None: diff --git a/reikna/cluda/cuda.py b/reikna/cluda/cuda.py index b24ee66..86242ae 100644 --- a/reikna/cluda/cuda.py +++ b/reikna/cluda/cuda.py @@ -204,13 +204,13 @@ def from_device(self, arr, dest=None, async_=False): if dest is None: return arr_cpu - def _copy_array_buffer(self, dest, src, nbytes, src_offset=0, dest_offset=0): + def _copy_array_buffer(self, dest, src, nbytes, src_offset=0, dst_offset=0): self._memcpy_dtod( - dest.gpudata, src.gpudata, nbytes, src_offset=src_offset, dest_offset=dest_offset) + dest.gpudata, src.gpudata, nbytes, src_offset=src_offset, dst_offset=dst_offset) - def _memcpy_dtod(self, dest, src, nbytes, src_offset=0, dest_offset=0): + def _memcpy_dtod(self, dest, src, nbytes, src_offset=0, dst_offset=0): cuda.memcpy_dtod_async( - int(dest) + dest_offset, + int(dest) + dst_offset, int(src) + src_offset, nbytes, stream=self._queue) diff --git a/reikna/cluda/ocl.py b/reikna/cluda/ocl.py index 7a85444..eb652af 100644 --- a/reikna/cluda/ocl.py +++ b/reikna/cluda/ocl.py @@ -27,10 +27,16 @@ class Array(clarray.Array): """ def __init__( self, thr, shape, dtype, strides=None, offset=0, nbytes=None, - allocator=None, base_data=None): + allocator=None, data=None, events=None, + _fast=False, _context=None, _queue=None, _size=None): + + if thr is None: + thr = Thread(_queue) + clarray.Array.__init__( self, thr._queue, shape, dtype, strides=strides, allocator=allocator, - data=base_data, offset=offset) + data=data, offset=offset, events=events, + _fast=_fast, _context=thr._context, _queue=thr._queue, _size=_size) self.nbytes = nbytes self.thread = thr @@ -51,7 +57,7 @@ def __getitem__(self, index): # Let cl.Array calculate the new strides and offset return self.thread.array( shape=res.shape, dtype=res.dtype, strides=res.strides, - base_data=res.base_data, + base=res.base_data, offset=res.offset) def __setitem__(self, index, value): @@ -88,7 +94,7 @@ def _process_cqd(self, cqd): def array( self, shape, dtype, strides=None, offset=0, nbytes=None, - allocator=None, base=None, base_data=None): + allocator=None, base=None, data=None): if allocator is None: allocator = self.allocate @@ -98,14 +104,16 @@ def array( if nbytes is None: nbytes = int(min_buffer_size(shape, dtype.itemsize, strides=strides, offset=offset)) - if (offset != 0 or strides is not None) and base_data is None and base is None: - base_data = allocator(nbytes) + if (offset != 0 or strides is not None) and data is None and base is None: + data = allocator(nbytes) elif base is not None: - base_data = base.data + if isinstance(base, Array): + base = base.base_data + data = base return Array( self, shape, dtype, strides=strides, offset=offset, - allocator=allocator, base_data=base_data, nbytes=nbytes) + allocator=allocator, data=data, nbytes=nbytes) def allocate(self, size): return cl.Buffer(self._context, cl.mem_flags.READ_WRITE, size=size) @@ -118,10 +126,10 @@ def from_device(self, arr, dest=None, async_=False): if dest is None: return arr_cpu - def _copy_array_buffer(self, dest, src, nbytes, src_offset=0, dest_offset=0): + def _copy_array_buffer(self, dest, src, nbytes, src_offset=0, dst_offset=0): cl.enqueue_copy( self._queue, dest.data, src.data, - byte_count=nbytes, src_offset=src_offset, dest_offset=dest_offset) + byte_count=nbytes, src_offset=src_offset, dst_offset=dst_offset) def synchronize(self): self._queue.finish() @@ -222,6 +230,6 @@ def prepare(self, global_size, local_size=None, local_mem=0): self._global_size = wrap_in_tuple(global_size) def _prepared_call(self, *args): - # Passing base_data, assuming that the kernel knows how to handle the offset and the strides + # Passing data, assuming that the kernel knows how to handle the offset and the strides args = [x.base_data if isinstance(x, clarray.Array) else x for x in args] return self._kernel(self._thr._queue, self._global_size, self._local_size, *args) diff --git a/test/test_cluda/test_basics.py b/test/test_cluda/test_basics.py index 53e15bc..0a98731 100644 --- a/test/test_cluda/test_basics.py +++ b/test/test_cluda/test_basics.py @@ -376,7 +376,7 @@ def test_offsets_in_kernel(thr): """ global_size = 100 - dest_offset = 4 + dst_offset = 4 src_offset = 2 dtype = dtypes.normalize_type(numpy.int32) @@ -384,15 +384,15 @@ def test_offsets_in_kernel(thr): KERNEL void test(GLOBAL_MEM int *dest, GLOBAL_MEM int *src) { const SIZE_T i = get_global_id(0); - dest[i + ${dest_offset}] = src[i + ${src_offset}]; + dest[i + ${dst_offset}] = src[i + ${src_offset}]; } """, - render_kwds=dict(dest_offset=dest_offset, src_offset=src_offset)) + render_kwds=dict(dst_offset=dst_offset, src_offset=src_offset)) test = program.test - dest_dev_base = thr.array(global_size + dest_offset, dtype) + dest_dev_base = thr.array(global_size + dst_offset, dtype) dest_dev = thr.array( - global_size, dtype, offset=dest_offset * dtype.itemsize, base=dest_dev_base) + global_size, dtype, offset=dst_offset * dtype.itemsize, base=dest_dev_base) src_base = numpy.arange(global_size + src_offset).astype(dtype) src_dev_base = thr.to_device(src_base) diff --git a/test/test_core/test_transformation.py b/test/test_core/test_transformation.py index f024b82..6defb83 100644 --- a/test/test_core/test_transformation.py +++ b/test/test_core/test_transformation.py @@ -560,7 +560,7 @@ def test_array_offset(thr): # providing base_data a3_base = thr.array((arr_len + offset_len,), dtype) a3_data = a3_base.base_data - a3 = thr.array((arr_len,), dtype, offset=itemsize * offset_len, base_data=a3_data) + a3 = thr.array((arr_len,), dtype, offset=itemsize * offset_len, data=a3_data) fill = PureParallel( [