From de78dcf6e3b32564def4e5ef3c8db869e2fb23e3 Mon Sep 17 00:00:00 2001 From: Jerome Kieffer Date: Tue, 19 Nov 2024 18:02:09 +0100 Subject: [PATCH 1/6] Explicitely Disable fp64 Work aroung a bug in Apple Silicon GPU driver --- src/pyFAI/opencl/__init__.py | 20 +++++++++++++++++++- src/pyFAI/opencl/azim_csr.py | 16 ++++++++++------ src/pyFAI/opencl/azim_hist.py | 20 ++++++++++++-------- src/pyFAI/opencl/azim_lut.py | 19 ++++++++++--------- src/pyFAI/opencl/peak_finder.py | 17 +++++++---------- src/pyFAI/opencl/preproc.py | 10 ++++++---- src/pyFAI/opencl/test/test_preproc.py | 5 +++-- src/pyFAI/resources/openCL/preprocess.cl | 4 ++-- 8 files changed, 69 insertions(+), 42 deletions(-) diff --git a/src/pyFAI/opencl/__init__.py b/src/pyFAI/opencl/__init__.py index 15c16ff37..49bdccf52 100644 --- a/src/pyFAI/opencl/__init__.py +++ b/src/pyFAI/opencl/__init__.py @@ -36,7 +36,7 @@ __contact__ = "Jerome.Kieffer@ESRF.eu" __license__ = "MIT" __copyright__ = "2012-2024 European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "15/04/2024" +__date__ = "19/11/2024" __status__ = "stable" import os @@ -79,6 +79,24 @@ def get_x87_volatile_option(ctx): else: return "" +def get_compiler_options(ctx, x87_volatile=False, apple_gpu=False): + """Provide a set of common compiler options to work around known bugs: + + :x87_volatile: set to true to declare all x87 operation as volatile, needed on PoCL x86 32bits + :apple_gpu: redefine the cl_khr_fp64 to zero when the device is Apple GPU + which wrongly declares fp64 compatibility. See #2339 + :return: compilation directive as string. + """ + + if x87_volatile: + options = get_x87_volatile_option(ctx) + else: + options = "" + if apple_gpu: + fp64_support = 1 if "cl_khr_fp64" in ctx.devices[0].extensions else 0 + options += f" -D cl_khr_fp64={fp64_support}" + return options.strip() + def dtype_converter(dtype): "convert a numpy dtype as a int8" diff --git a/src/pyFAI/opencl/azim_csr.py b/src/pyFAI/opencl/azim_csr.py index 62c24b8de..5a8c61ff5 100644 --- a/src/pyFAI/opencl/azim_csr.py +++ b/src/pyFAI/opencl/azim_csr.py @@ -28,7 +28,7 @@ __authors__ = ["Jérôme Kieffer", "Giannis Ashiotis"] __license__ = "MIT" -__date__ = "06/09/2024" +__date__ = "19/11/2024" __copyright__ = "ESRF, Grenoble" __contact__ = "jerome.kieffer@esrf.fr" @@ -275,11 +275,15 @@ def compile_kernels(self, kernel_file=None): kernel_file = kernel_file or self.kernel_files[-1] kernels = self.kernel_files[:-1] + [kernel_file] - compile_options = f"-D NBINS={self.bins} -D NIMAGE={self.size}" - default_compiler_options = self.get_compiler_options(x87_volatile=True) - if default_compiler_options: - compile_options += " " + default_compiler_options - OpenclProcessing.compile_kernels(self, kernels, compile_options) + try: + compile_options = self.get_compiler_options(x87_volatile=True, apple_gpu=True) + except (AttributeError, TypeError): # Silx version too old + logger.warning("Please upgrade to silx v2.1+") + from . import get_compiler_options + compile_options = get_compiler_options(self.ctx, x87_volatile=True, apple_gpu=True) + + compile_options += f" -D NBINS={self.bins} -D NIMAGE={self.size}" + OpenclProcessing.compile_kernels(self, kernels, compile_options.strip()) for kernel_name in self.kernels.__dict__: if kernel_name.startswith("_"): continue diff --git a/src/pyFAI/opencl/azim_hist.py b/src/pyFAI/opencl/azim_hist.py index b8a1dcd91..cdaaf1079 100644 --- a/src/pyFAI/opencl/azim_hist.py +++ b/src/pyFAI/opencl/azim_hist.py @@ -32,7 +32,7 @@ """ __author__ = "Jérôme Kieffer" __license__ = "MIT" -__date__ = "25/04/2024" +__date__ = "19/11/2024" __copyright__ = "2012-2021, ESRF, Grenoble" __contact__ = "jerome.kieffer@esrf.fr" @@ -47,7 +47,7 @@ raise ImportError("pyopencl is not installed") from . import allocate_cl_buffers, release_cl_buffers, kernel_workgroup_size -from . import concatenate_cl_kernel, get_x87_volatile_option, processing, OpenclProcessing +from . import concatenate_cl_kernel, processing, OpenclProcessing from ..containers import Integrate1dtpl, Integrate2dtpl, ErrorModel from ..utils.decorators import deprecated EventDescription = processing.EventDescription @@ -257,13 +257,17 @@ def compile_kernels(self, kernel_file=None): # concatenate all needed source files into a single openCL module kernel_file = kernel_file or self.kernel_files[-1] kernels = self.kernel_files[:-1] + [kernel_file] - default_compiler_options = get_x87_volatile_option(self.ctx) - compile_options = "-D NBINS=%i -D NIMAGE=%i -D WORKGROUP_SIZE=%i" % \ - (self.bins, self.size, self.BLOCK_SIZE) - if default_compiler_options: - compile_options += " " + default_compiler_options try: - OpenclProcessing.compile_kernels(self, kernels, compile_options) + compile_options = self.get_compiler_options(x87_volatile=True, apple_gpu=True) + except (AttributeError, TypeError): # Silx version too old + logger.warning("Please upgrade to silx v2.1+") + from . import get_compiler_options + compile_options = get_compiler_options(self.ctx, x87_volatile=True, apple_gpu=True) + + + compile_options += f" -D NBINS={self.bins} -D NIMAGE={self.size} -D WORKGROUP_SIZE={self.BLOCK_SIZE}" + try: + OpenclProcessing.compile_kernels(self, kernels, compile_options.strip()) except Exception as error: # This error may be related to issue #1219. Provides an ugly work around. if "cl_khr_int64_base_atomics" in self.ctx.devices[0].extensions: diff --git a/src/pyFAI/opencl/azim_lut.py b/src/pyFAI/opencl/azim_lut.py index b590b0359..f93f92f07 100644 --- a/src/pyFAI/opencl/azim_lut.py +++ b/src/pyFAI/opencl/azim_lut.py @@ -27,7 +27,7 @@ __author__ = "Jérôme Kieffer" __license__ = "MIT" -__date__ = "24/04/2024" +__date__ = "19/11/2024" __copyright__ = "2012-2024, ESRF, Grenoble" __contact__ = "jerome.kieffer@esrf.fr" @@ -205,14 +205,15 @@ def compile_kernels(self, kernel_file=None): # concatenate all needed source files into a single openCL module kernel_file = kernel_file or self.kernel_files[-1] kernels = self.kernel_files[:-1] + [kernel_file] - - compile_options = "-D NBINS=%i -D NIMAGE=%i -D NLUT=%i -D ON_CPU=%i" % \ - (self.bins, self.size, self.lut_size, int(self.device.type == "CPU")) - - default_compiler_options = self.get_compiler_options(x87_volatile=True) - if default_compiler_options: - compile_options += " " + default_compiler_options - OpenclProcessing.compile_kernels(self, kernels, compile_options) + try: + compile_options = self.get_compiler_options(x87_volatile=True, apple_gpu=True) + except (AttributeError, TypeError): # Silx version too old + logger.warning("Please upgrade to silx v2.1+") + from . import get_compiler_options + compile_options = get_compiler_options(self.ctx, x87_volatile=True, apple_gpu=True) + + compile_options += f" -D NBINS={self.bins} -D NIMAGE={self.size} -D NLUT={self.lut_size} -D ON_CPU={int(self.device.type == 'CPU')}" + OpenclProcessing.compile_kernels(self, kernels, compile_options.strip()) def set_kernel_arguments(self): """Tie arguments of OpenCL kernel-functions to the actual kernels diff --git a/src/pyFAI/opencl/peak_finder.py b/src/pyFAI/opencl/peak_finder.py index fc5c87850..761646def 100644 --- a/src/pyFAI/opencl/peak_finder.py +++ b/src/pyFAI/opencl/peak_finder.py @@ -29,7 +29,7 @@ __authors__ = ["Jérôme Kieffer"] __license__ = "MIT" -__date__ = "08/04/2024" +__date__ = "19/11/2024" __copyright__ = "2014-2023, ESRF, Grenoble" __contact__ = "jerome.kieffer@esrf.fr" @@ -40,7 +40,7 @@ from ..containers import SparseFrame, ErrorModel from ..utils import EPS32 from .azim_csr import OCL_CSR_Integrator, BufferDescription, EventDescription, mf, calc_checksum, pyopencl, OpenclProcessing -from . import get_x87_volatile_option, kernel_workgroup_size, dtype_converter +from . import kernel_workgroup_size, dtype_converter logger = logging.getLogger(__name__) @@ -908,15 +908,12 @@ def compile_kernels(self, kernel_file=None): kernels = self.kernel_files[:-1] + [kernel_file] try: - default_compiler_options = self.get_compiler_options(x87_volatile=True) - except AttributeError: # Silx version too old - logger.warning("Please upgrade to silx v0.10+") - default_compiler_options = get_x87_volatile_option(self.ctx) + compile_options = self.get_compiler_options(x87_volatile=True, apple_gpu=True) + except (AttributeError, TypeError): # Silx version too old + logger.warning("Please upgrade to silx v2.1+") + from . import get_compiler_options + compile_options = get_compiler_options(self.ctx, x87_volatile=True, apple_gpu=True) - if default_compiler_options: - compile_options = default_compiler_options - else: - compile_options = "" OpenclProcessing.compile_kernels(self, kernels, compile_options) for kernel_name, kernel in self.kernels.get_kernels().items(): wg = kernel_workgroup_size(self.program, kernel) diff --git a/src/pyFAI/opencl/preproc.py b/src/pyFAI/opencl/preproc.py index f6bc09681..bca8091c6 100644 --- a/src/pyFAI/opencl/preproc.py +++ b/src/pyFAI/opencl/preproc.py @@ -3,7 +3,7 @@ # Project: Azimuthal integration # https://github.com/silx-kit/pyFAI # -# Copyright (C) 2015-2018 European Synchrotron Radiation Facility, Grenoble, France +# Copyright (C) 2015-2024 European Synchrotron Radiation Facility, Grenoble, France # # Principal author: Jérôme Kieffer (Jerome.Kieffer@ESRF.eu) # @@ -31,7 +31,7 @@ __author__ = "Jérôme Kieffer" __license__ = "MIT" -__date__ = "23/10/2024" +__date__ = "19/11/2024" __copyright__ = "2015-2017, ESRF, Grenoble" __contact__ = "jerome.kieffer@esrf.fr" @@ -328,7 +328,7 @@ def set_kernel_arguments(self): ("output", self.cl_mem["output"]))) - def compile_kernels(self, kernel_files=None, compile_options=None): + def compile_kernels(self, kernel_files=None): """Call the OpenCL compiler :param kernel_files: list of path to the kernel @@ -336,7 +336,9 @@ def compile_kernels(self, kernel_files=None, compile_options=None): """ # concatenate all needed source files into a single openCL module kernel_files = kernel_files or self.kernel_files - compile_options = "-D NIMAGE=%i" % (self.size) + # Explicit handling of fp64 since Apple silicon compiler wrongly clams fp64 support see issue #2339 + fp64_support = 1 if "cl_khr_fp64" in self.ctx.devices[0].extensions else 0 + compile_options = f"-D NIMAGE={self.size} -D cl_khr_fp64={fp64_support}" OpenclProcessing.compile_kernels(self, kernel_files, compile_options) def send_buffer(self, data, dest, convert=True): diff --git a/src/pyFAI/opencl/test/test_preproc.py b/src/pyFAI/opencl/test/test_preproc.py index 0e4661f52..d4fbdb7e8 100644 --- a/src/pyFAI/opencl/test/test_preproc.py +++ b/src/pyFAI/opencl/test/test_preproc.py @@ -33,7 +33,7 @@ __contact__ = "jerome.kieffer@esrf.eu" __license__ = "MIT" __copyright__ = "European Synchrotron Radiation Facility, Grenoble, France" -__date__ = "04/10/2023" +__date__ = "19/11/2024" import logging import numpy @@ -57,7 +57,8 @@ def test_preproc(self): """ from ..preproc import preproc ary = numpy.arange(12).reshape(4,3) - for dtype in (numpy.uint8, numpy.int8, numpy.int16, numpy.uint16, numpy.uint32, numpy.int32, numpy.uint64, numpy.int64, numpy.float32): + for dtype in (numpy.uint8, numpy.int8, numpy.int16, numpy.uint16, numpy.uint32, numpy.int32):#, numpy.uint64, numpy.int64, numpy.float32): + import sys; sys.stderr.write(f"test {dtype}\n") self.assertEqual(abs(preproc(ary.astype(dtype),split_result=4)[..., 0]-ary).max(), 0, "Result OK for dtype {dtype}") diff --git a/src/pyFAI/resources/openCL/preprocess.cl b/src/pyFAI/resources/openCL/preprocess.cl index cf7400c12..a078be026 100644 --- a/src/pyFAI/resources/openCL/preprocess.cl +++ b/src/pyFAI/resources/openCL/preprocess.cl @@ -7,7 +7,7 @@ * Grenoble, France * * Principal authors: J. Kieffer (kieffer@esrf.fr) - * Last revision: 23/04/2024 + * Last revision: 19/11/2024 * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -203,7 +203,7 @@ static float _any2float(const global uchar* input, input[8*position+4],input[8*position+5], input[8*position+6],input[8*position+7]); value = convert_float(as_double(rval)); #else - if (get_global_id==0)printf("Doubleprecision arithmetics is not supported on this device !\n"); + if (get_global_id(0)==0)printf("Double precision arithmetics is not supported on this device !\n"); #endif } From d522f2bf86cf5fa035e24a83fb9084ae196551cc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 17:04:08 +0000 Subject: [PATCH 2/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/source/publications.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/publications.rst b/doc/source/publications.rst index e77f2464e..9931a202c 100644 --- a/doc/source/publications.rst +++ b/doc/source/publications.rst @@ -34,11 +34,11 @@ Publications about pyFAI * *Application of signal separation to diffraction image compression and serial crystallography*; Jérôme Kieffer, Julien Orlans, Nicolas Coquelle, Samuel Debionne, Shibom Basu, Alejandro Homs, Gianluca Santonia and Daniele De Sanctis; - `Accepted `_ in **J. Applied Crystallography** (2024); + `Accepted `_ in **J. Applied Crystallography** (2024); In depth explainaion of sigma-clipping background assessment and error models. The latest paper should be the cited in publications using pyFAI. There are already 1400 publications referring to pyFAI, some of them in the most -prestigious scientific journals (Nature, PNAS, ...) and -40 other `applications `_ +prestigious scientific journals (Nature, PNAS, ...) and +40 other `applications `_ using pyFAI as a library. From 3327bedc71c2e5adc71414460c3ddcf26a34086d Mon Sep 17 00:00:00 2001 From: Jerome Kieffer Date: Tue, 19 Nov 2024 18:04:48 +0100 Subject: [PATCH 3/6] Typo --- src/pyFAI/resources/openCL/preprocess.cl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pyFAI/resources/openCL/preprocess.cl b/src/pyFAI/resources/openCL/preprocess.cl index a078be026..a4ca007f0 100644 --- a/src/pyFAI/resources/openCL/preprocess.cl +++ b/src/pyFAI/resources/openCL/preprocess.cl @@ -147,7 +147,7 @@ s32_to_float(global int *array_int, } /* Function reading at the given position. - * Dtype is 1/-1 for char/uchar .... 8/-4 for int64/uint64 and 32/64 for float/double. + * Dtype is 1/-1 for char/uchar .... 8/-8 for int64/uint64 and 32/64 for float/double. */ static float _any2float(const global uchar* input, size_t position, From 8b232b904408f0dd60e8d0bcb16474385c8d979f Mon Sep 17 00:00:00 2001 From: Jerome Kieffer Date: Tue, 19 Nov 2024 18:08:25 +0100 Subject: [PATCH 4/6] Print warning message --- src/pyFAI/resources/openCL/preprocess.cl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pyFAI/resources/openCL/preprocess.cl b/src/pyFAI/resources/openCL/preprocess.cl index a4ca007f0..6d805be49 100644 --- a/src/pyFAI/resources/openCL/preprocess.cl +++ b/src/pyFAI/resources/openCL/preprocess.cl @@ -199,9 +199,13 @@ static float _any2float(const global uchar* input, } else if (dtype == 64){ #ifdef cl_khr_fp64 + #if cl_khr_fp64 uchar8 rval = (uchar8) (input[8*position],input[8*position+1], input[8*position+2],input[8*position+3], input[8*position+4],input[8*position+5], input[8*position+6],input[8*position+7]); value = convert_float(as_double(rval)); + #else + if (get_global_id(0)==0)printf("Double precision arithmetics is not supported on this device !\n"); + #endif #else if (get_global_id(0)==0)printf("Double precision arithmetics is not supported on this device !\n"); #endif From 870b4c12597c275a79a4fbb3648020349a317b34 Mon Sep 17 00:00:00 2001 From: Jerome Kieffer Date: Tue, 19 Nov 2024 18:12:14 +0100 Subject: [PATCH 5/6] restore full test --- src/pyFAI/opencl/test/test_preproc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pyFAI/opencl/test/test_preproc.py b/src/pyFAI/opencl/test/test_preproc.py index d4fbdb7e8..b5dc5ec50 100644 --- a/src/pyFAI/opencl/test/test_preproc.py +++ b/src/pyFAI/opencl/test/test_preproc.py @@ -57,7 +57,7 @@ def test_preproc(self): """ from ..preproc import preproc ary = numpy.arange(12).reshape(4,3) - for dtype in (numpy.uint8, numpy.int8, numpy.int16, numpy.uint16, numpy.uint32, numpy.int32):#, numpy.uint64, numpy.int64, numpy.float32): + for dtype in (numpy.uint8, numpy.int8, numpy.int16, numpy.uint16, numpy.uint32, numpy.int32, numpy.uint64, numpy.int64, numpy.float32): import sys; sys.stderr.write(f"test {dtype}\n") self.assertEqual(abs(preproc(ary.astype(dtype),split_result=4)[..., 0]-ary).max(), 0, "Result OK for dtype {dtype}") From 0089ba9c0f2567c107be5eb36a23ded067d3747b Mon Sep 17 00:00:00 2001 From: Jerome Kieffer Date: Wed, 20 Nov 2024 16:53:59 +0100 Subject: [PATCH 6/6] Change version of silx --- src/pyFAI/opencl/azim_csr.py | 2 +- src/pyFAI/opencl/azim_hist.py | 2 +- src/pyFAI/opencl/azim_lut.py | 2 +- src/pyFAI/opencl/peak_finder.py | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/pyFAI/opencl/azim_csr.py b/src/pyFAI/opencl/azim_csr.py index 5a8c61ff5..4c0db511e 100644 --- a/src/pyFAI/opencl/azim_csr.py +++ b/src/pyFAI/opencl/azim_csr.py @@ -278,7 +278,7 @@ def compile_kernels(self, kernel_file=None): try: compile_options = self.get_compiler_options(x87_volatile=True, apple_gpu=True) except (AttributeError, TypeError): # Silx version too old - logger.warning("Please upgrade to silx v2.1+") + logger.warning("Please upgrade to silx v2.2+") from . import get_compiler_options compile_options = get_compiler_options(self.ctx, x87_volatile=True, apple_gpu=True) diff --git a/src/pyFAI/opencl/azim_hist.py b/src/pyFAI/opencl/azim_hist.py index cdaaf1079..163aed8aa 100644 --- a/src/pyFAI/opencl/azim_hist.py +++ b/src/pyFAI/opencl/azim_hist.py @@ -260,7 +260,7 @@ def compile_kernels(self, kernel_file=None): try: compile_options = self.get_compiler_options(x87_volatile=True, apple_gpu=True) except (AttributeError, TypeError): # Silx version too old - logger.warning("Please upgrade to silx v2.1+") + logger.warning("Please upgrade to silx v2.2+") from . import get_compiler_options compile_options = get_compiler_options(self.ctx, x87_volatile=True, apple_gpu=True) diff --git a/src/pyFAI/opencl/azim_lut.py b/src/pyFAI/opencl/azim_lut.py index f93f92f07..4db6efe89 100644 --- a/src/pyFAI/opencl/azim_lut.py +++ b/src/pyFAI/opencl/azim_lut.py @@ -208,7 +208,7 @@ def compile_kernels(self, kernel_file=None): try: compile_options = self.get_compiler_options(x87_volatile=True, apple_gpu=True) except (AttributeError, TypeError): # Silx version too old - logger.warning("Please upgrade to silx v2.1+") + logger.warning("Please upgrade to silx v2.2+") from . import get_compiler_options compile_options = get_compiler_options(self.ctx, x87_volatile=True, apple_gpu=True) diff --git a/src/pyFAI/opencl/peak_finder.py b/src/pyFAI/opencl/peak_finder.py index 761646def..1c5e4adb2 100644 --- a/src/pyFAI/opencl/peak_finder.py +++ b/src/pyFAI/opencl/peak_finder.py @@ -29,7 +29,7 @@ __authors__ = ["Jérôme Kieffer"] __license__ = "MIT" -__date__ = "19/11/2024" +__date__ = "20/11/2024" __copyright__ = "2014-2023, ESRF, Grenoble" __contact__ = "jerome.kieffer@esrf.fr" @@ -910,7 +910,7 @@ def compile_kernels(self, kernel_file=None): try: compile_options = self.get_compiler_options(x87_volatile=True, apple_gpu=True) except (AttributeError, TypeError): # Silx version too old - logger.warning("Please upgrade to silx v2.1+") + logger.warning("Please upgrade to silx v2.2+") from . import get_compiler_options compile_options = get_compiler_options(self.ctx, x87_volatile=True, apple_gpu=True)