Skip to content

Commit

Permalink
Fix incompatibility with pyopencl
Browse files Browse the repository at this point in the history
  • Loading branch information
fjarri committed Jan 10, 2023
1 parent 91aa7c3 commit 0d4225f
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 25 deletions.
4 changes: 4 additions & 0 deletions doc/source/history.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,12 @@ Release history
0.8.0 (current development version)
===================================

* CHANGED: `dest_offset` renamed to `dst_offset` to match `pyopencl`/`pycuda` API.

* FIXED: register ``numpy.bool`` in addition to ``bool`` - it is a separate type now (@perdigao1).

* FIXED: incompatibility with modern versions of `pyopencl`.


0.7.6 (20 Nov 2021)
===================
Expand Down
8 changes: 4 additions & 4 deletions reikna/cluda/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,13 +461,13 @@ def from_device(self, arr, dest=None, async_=False):
"""
raise NotImplementedError()

def copy_array(self, arr, dest=None, src_offset=0, dest_offset=0, size=None):
def copy_array(self, arr, dest=None, src_offset=0, dst_offset=0, size=None):
"""
Copies array on device.
:param dest: the effect is the same as in :py:meth:`to_device`.
:param src_offset: offset (in items of ``arr.dtype``) in the source array.
:param dest_offset: offset (in items of ``arr.dtype``) in the destination array.
:param dst_offset: offset (in items of ``arr.dtype``) in the destination array.
:param size: how many elements of ``arr.dtype`` to copy.
"""

Expand All @@ -479,10 +479,10 @@ def copy_array(self, arr, dest=None, src_offset=0, dest_offset=0, size=None):
itemsize = arr.dtype.itemsize
nbytes = arr.nbytes if size is None else itemsize * size
src_offset *= itemsize
dest_offset *= itemsize
dst_offset *= itemsize

self._copy_array_buffer(arr_device, arr,
nbytes, src_offset=src_offset, dest_offset=dest_offset)
nbytes, src_offset=src_offset, dst_offset=dst_offset)
self._synchronize()

if dest is None:
Expand Down
8 changes: 4 additions & 4 deletions reikna/cluda/cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,13 +204,13 @@ def from_device(self, arr, dest=None, async_=False):
if dest is None:
return arr_cpu

def _copy_array_buffer(self, dest, src, nbytes, src_offset=0, dest_offset=0):
def _copy_array_buffer(self, dest, src, nbytes, src_offset=0, dst_offset=0):
self._memcpy_dtod(
dest.gpudata, src.gpudata, nbytes, src_offset=src_offset, dest_offset=dest_offset)
dest.gpudata, src.gpudata, nbytes, src_offset=src_offset, dst_offset=dst_offset)

def _memcpy_dtod(self, dest, src, nbytes, src_offset=0, dest_offset=0):
def _memcpy_dtod(self, dest, src, nbytes, src_offset=0, dst_offset=0):
cuda.memcpy_dtod_async(
int(dest) + dest_offset,
int(dest) + dst_offset,
int(src) + src_offset,
nbytes, stream=self._queue)

Expand Down
30 changes: 19 additions & 11 deletions reikna/cluda/ocl.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,16 @@ class Array(clarray.Array):
"""
def __init__(
self, thr, shape, dtype, strides=None, offset=0, nbytes=None,
allocator=None, base_data=None):
allocator=None, data=None, events=None,
_fast=False, _context=None, _queue=None, _size=None):

if thr is None:
thr = Thread(_queue)

clarray.Array.__init__(
self, thr._queue, shape, dtype, strides=strides, allocator=allocator,
data=base_data, offset=offset)
data=data, offset=offset, events=events,
_fast=_fast, _context=thr._context, _queue=thr._queue, _size=_size)
self.nbytes = nbytes
self.thread = thr

Expand All @@ -51,7 +57,7 @@ def __getitem__(self, index):
# Let cl.Array calculate the new strides and offset
return self.thread.array(
shape=res.shape, dtype=res.dtype, strides=res.strides,
base_data=res.base_data,
base=res.base_data,
offset=res.offset)

def __setitem__(self, index, value):
Expand Down Expand Up @@ -88,7 +94,7 @@ def _process_cqd(self, cqd):

def array(
self, shape, dtype, strides=None, offset=0, nbytes=None,
allocator=None, base=None, base_data=None):
allocator=None, base=None, data=None):

if allocator is None:
allocator = self.allocate
Expand All @@ -98,14 +104,16 @@ def array(
if nbytes is None:
nbytes = int(min_buffer_size(shape, dtype.itemsize, strides=strides, offset=offset))

if (offset != 0 or strides is not None) and base_data is None and base is None:
base_data = allocator(nbytes)
if (offset != 0 or strides is not None) and data is None and base is None:
data = allocator(nbytes)
elif base is not None:
base_data = base.data
if isinstance(base, Array):
base = base.base_data
data = base

return Array(
self, shape, dtype, strides=strides, offset=offset,
allocator=allocator, base_data=base_data, nbytes=nbytes)
allocator=allocator, data=data, nbytes=nbytes)

def allocate(self, size):
return cl.Buffer(self._context, cl.mem_flags.READ_WRITE, size=size)
Expand All @@ -118,10 +126,10 @@ def from_device(self, arr, dest=None, async_=False):
if dest is None:
return arr_cpu

def _copy_array_buffer(self, dest, src, nbytes, src_offset=0, dest_offset=0):
def _copy_array_buffer(self, dest, src, nbytes, src_offset=0, dst_offset=0):
cl.enqueue_copy(
self._queue, dest.data, src.data,
byte_count=nbytes, src_offset=src_offset, dest_offset=dest_offset)
byte_count=nbytes, src_offset=src_offset, dst_offset=dst_offset)

def synchronize(self):
self._queue.finish()
Expand Down Expand Up @@ -222,6 +230,6 @@ def prepare(self, global_size, local_size=None, local_mem=0):
self._global_size = wrap_in_tuple(global_size)

def _prepared_call(self, *args):
# Passing base_data, assuming that the kernel knows how to handle the offset and the strides
# Passing data, assuming that the kernel knows how to handle the offset and the strides
args = [x.base_data if isinstance(x, clarray.Array) else x for x in args]
return self._kernel(self._thr._queue, self._global_size, self._local_size, *args)
10 changes: 5 additions & 5 deletions test/test_cluda/test_basics.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,23 +376,23 @@ def test_offsets_in_kernel(thr):
"""

global_size = 100
dest_offset = 4
dst_offset = 4
src_offset = 2
dtype = dtypes.normalize_type(numpy.int32)

program = thr.compile("""
KERNEL void test(GLOBAL_MEM int *dest, GLOBAL_MEM int *src)
{
const SIZE_T i = get_global_id(0);
dest[i + ${dest_offset}] = src[i + ${src_offset}];
dest[i + ${dst_offset}] = src[i + ${src_offset}];
}
""",
render_kwds=dict(dest_offset=dest_offset, src_offset=src_offset))
render_kwds=dict(dst_offset=dst_offset, src_offset=src_offset))
test = program.test

dest_dev_base = thr.array(global_size + dest_offset, dtype)
dest_dev_base = thr.array(global_size + dst_offset, dtype)
dest_dev = thr.array(
global_size, dtype, offset=dest_offset * dtype.itemsize, base=dest_dev_base)
global_size, dtype, offset=dst_offset * dtype.itemsize, base=dest_dev_base)

src_base = numpy.arange(global_size + src_offset).astype(dtype)
src_dev_base = thr.to_device(src_base)
Expand Down
2 changes: 1 addition & 1 deletion test/test_core/test_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,7 @@ def test_array_offset(thr):
# providing base_data
a3_base = thr.array((arr_len + offset_len,), dtype)
a3_data = a3_base.base_data
a3 = thr.array((arr_len,), dtype, offset=itemsize * offset_len, base_data=a3_data)
a3 = thr.array((arr_len,), dtype, offset=itemsize * offset_len, data=a3_data)

fill = PureParallel(
[
Expand Down

0 comments on commit 0d4225f

Please sign in to comment.