diff --git a/CMakeLists.txt b/CMakeLists.txt index f3eabeb9..bd26e779 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,61 +3,53 @@ if(${CMAKE_VERSION} VERSION_LESS 3.12) cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION}) endif() -project(search LANGUAGES CXX CUDA) +project(search LANGUAGES CXX) + +# Check if we can compile CUDA on this system. +include(CheckLanguage) +check_language(CUDA) + +set(CPU_ONLY OFF CACHE BOOL "Build without GPU support?") + +if(CMAKE_CUDA_COMPILER AND NOT CPU_ONLY) + set(HAVE_CUDA 1) + enable_language(CUDA) + add_definitions(-DHAVE_CUDA=1) +endif() include(CheckIPOSupported) check_ipo_supported(RESULT ipo_supported) -#find_package(Python3 COMPONENTS Interpreter Development REQUIRED) -find_package( PythonInterp ) -find_package( PythonLibs ) +find_package(Python3 COMPONENTS Interpreter Development REQUIRED) -find_library(CFITSIO_LIBRARY +find_library(CFITSIO_LIBRARY NAMES fitsio cfitsio libcfitsio HINTS lib/ ) add_subdirectory(lib/pybind11) -set(CMAKE_CXX_STANDARD 11) # set(PYBIND11_CPP_STANDARD -std=c++11) +set(CMAKE_CXX_STANDARD 11) include_directories( include/ ) -add_library(searchcu STATIC - src/kbmod/search/image_kernels.cu - src/kbmod/search/kernels.cu -) - -set_target_properties(searchcu PROPERTIES - POSITION_INDEPENDENT_CODE ON - CUDA_VISIBILITY_PRESET "hidden" - PREFIX "${PYTHON_MODULE_PREFIX}" - SUFFIX "${PYTHON_MODULE_EXTENSION}" -) - -if(ipo_supported) - set_property(TARGET searchcu PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) -endif() - +# Create the python module via pybind11. pybind11_add_module(search MODULE src/kbmod/search/bindings.cpp ) set_target_properties(search PROPERTIES CXX_VISIBILITY_PRESET "hidden" - INTERPROCEDURAL_OPTIMIZATION TRUE PREFIX "${PYTHON_MODULE_PREFIX}" SUFFIX "${PYTHON_MODULE_EXTENSION}" ) - if(ipo_supported) set_property(TARGET search PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) endif() - target_compile_options(search PRIVATE $<$: -O3 -fvisibility=hidden @@ -65,7 +57,32 @@ target_compile_options(search PRIVATE $<$: >) target_link_libraries(search PRIVATE - searchcu - ${CFITSIO_LIBRARY} - -lgomp + ${CFITSIO_LIBRARY} + -lgomp ) + + +# If we have CUDA, build the kernel libraries and link them in as well. +if(HAVE_CUDA) + message(STATUS "Building CUDA Libraries") + add_library(searchcu STATIC + src/kbmod/search/image_kernels.cu + src/kbmod/search/kernels.cu + ) + + set_target_properties(searchcu PROPERTIES + POSITION_INDEPENDENT_CODE ON + CUDA_VISIBILITY_PRESET "hidden" + CUDA_SEPARABLE_COMPILATION ON + CUDA_RESOLVE_DEVICE_SYMBOLS ON + PREFIX "${PYTHON_MODULE_PREFIX}" + SUFFIX "${PYTHON_MODULE_EXTENSION}" + ) + if(ipo_supported) + set_property(TARGET searchcu PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) + endif() + + target_link_libraries(search PRIVATE searchcu) +else() + message(STATUS "Skipping CUDA Libraries") +endif() diff --git a/setup.py b/setup.py index f14c4bdb..78caa1a4 100644 --- a/setup.py +++ b/setup.py @@ -128,6 +128,14 @@ def build_extension(self, ext: CMakeExtension) -> None: if archs: cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))] + # Check if we have GPU support. + try: + subprocess.check_output('nvidia-smi') + cmake_args += ["-DCPU_ONLY=OFF"] + except Exception: + cmake_args += ["-DCPU_ONLY=ON"] + print("WARNING: No GPU Found. Building with CPU only mode.") + # Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level # across all generators. if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ: diff --git a/src/kbmod/search/Filtering.cpp b/src/kbmod/search/Filtering.cpp index c8425d45..1d7cda18 100644 --- a/src/kbmod/search/Filtering.cpp +++ b/src/kbmod/search/Filtering.cpp @@ -11,10 +11,12 @@ namespace search { -/* The filter_kenerls.cu functions. */ -extern "C" void sigmaGFilteredIndicesCU(float* values, int num_values, float sGL0, float sGL1, - float sigmaGCoeff, float width, int* idxArray, int* minKeepIndex, - int* maxKeepIndex); +#ifdef HAVE_CUDA + /* The filter_kenerls.cu functions. */ + extern "C" void sigmaGFilteredIndicesCU(float* values, int num_values, float sGL0, float sGL1, + float sigmaGCoeff, float width, int* idxArray, int* minKeepIndex, + int* maxKeepIndex); +#endif /* Return the list of indices from the values array such that those elements pass the sigmaG filtering defined by percentiles [sGL0, sGL1] with coefficient @@ -35,8 +37,13 @@ std::vector sigmaGFilteredIndices(const std::vector& values, float s int minKeepIndex = 0; int maxKeepIndex = num_values - 1; - sigmaGFilteredIndicesCU(values_arr, num_values, sGL0, sGL1, sigmaGCoeff, width, idxArray, &minKeepIndex, - &maxKeepIndex); + + #ifdef HAVE_CUDA + sigmaGFilteredIndicesCU(values_arr, num_values, sGL0, sGL1, sigmaGCoeff, width, idxArray, + &minKeepIndex, &maxKeepIndex); + #else + throw std::runtime_error("Non-GPU sigmaGFilteredIndicesCU is not implemented."); + #endif // Copy the result into a vector and return it. std::vector result; diff --git a/src/kbmod/search/KBMOSearch.cpp b/src/kbmod/search/KBMOSearch.cpp index 87688781..fc826f00 100644 --- a/src/kbmod/search/KBMOSearch.cpp +++ b/src/kbmod/search/KBMOSearch.cpp @@ -9,14 +9,16 @@ namespace search { -extern "C" void deviceSearchFilter(int imageCount, int width, int height, float* psiVect, float* phiVect, - perImageData img_data, searchParameters params, int trajCount, - trajectory* trajectoriesToSearch, int resultsCount, - trajectory* bestTrajects); +#ifdef HAVE_CUDA + extern "C" void deviceSearchFilter(int imageCount, int width, int height, float* psiVect, float* phiVect, + perImageData img_data, searchParameters params, int trajCount, + trajectory* trajectoriesToSearch, int resultsCount, + trajectory* bestTrajects); -void deviceGetCoadds(ImageStack& stack, perImageData image_data, int num_trajectories, - trajectory* trajectories, stampParameters params, - std::vector >& use_index_vect, float* results); + void deviceGetCoadds(ImageStack& stack, perImageData image_data, int num_trajectories, + trajectory* trajectories, stampParameters params, + std::vector >& use_index_vect, float* results); +#endif KBMOSearch::KBMOSearch(ImageStack& imstack) : stack(imstack) { maxResultCount = 100000; @@ -150,8 +152,12 @@ void KBMOSearch::search(int aSteps, int vSteps, float minAngle, float maxAngle, // Do the actual search on the GPU. startTimer("Searching"); - deviceSearchFilter(stack.imgCount(), stack.getWidth(), stack.getHeight(), psiVect.data(), phiVect.data(), - img_data, params, searchList.size(), searchList.data(), max_results, results.data()); + #ifdef HAVE_CUDA + deviceSearchFilter(stack.imgCount(), stack.getWidth(), stack.getHeight(), psiVect.data(), phiVect.data(), + img_data, params, searchList.size(), searchList.data(), max_results, results.data()); + #else + throw std::runtime_error("Non-GPU search is not implemented."); + #endif endTimer(); startTimer("Sorting results"); @@ -351,8 +357,12 @@ std::vector KBMOSearch::coaddedScienceStampsGPU(std::vector stamp_data(stamp_ppi * num_trajectories); // Do the co-adds. - deviceGetCoadds(stack, img_data, num_trajectories, t_array.data(), params, use_index_vect, - stamp_data.data()); + #ifdef HAVE_CUDA + deviceGetCoadds(stack, img_data, num_trajectories, t_array.data(), params, use_index_vect, + stamp_data.data()); + #else + throw std::runtime_error("Non-GPU co-adds is not implemented."); + #endif // Copy the stamps into RawImages std::vector results(num_trajectories); diff --git a/src/kbmod/search/RawImage.cpp b/src/kbmod/search/RawImage.cpp index de49cedc..29adcfae 100644 --- a/src/kbmod/search/RawImage.cpp +++ b/src/kbmod/search/RawImage.cpp @@ -9,18 +9,16 @@ namespace search { -// Performs convolution between an image represented as an array of floats -// and a PSF on a GPU device. -extern "C" void deviceConvolve(float* sourceImg, float* resultImg, int width, int height, float* psfKernel, - int psfSize, int psfDim, int psfRadius, float psfSum); +#ifdef HAVE_CUDA + // Performs convolution between an image represented as an array of floats + // and a PSF on a GPU device. + extern "C" void deviceConvolve(float* sourceImg, float* resultImg, int width, int height, float* psfKernel, + int psfSize, int psfDim, int psfRadius, float psfSum); -// Grow the mask by expanding masked pixels to their neighbors -// out for "steps" steps. -extern "C" void deviceGrowMask(int width, int height, float* source, float* dest, int steps); + extern "C" pixelPos findPeakImageVect(int width, int height, float* img, bool furthest_from_center); -extern "C" pixelPos findPeakImageVect(int width, int height, float* img, bool furthest_from_center); - -extern "C" imageMoments findCentralMomentsImageVect(int width, int height, float* img); + extern "C" imageMoments findCentralMomentsImageVect(int width, int height, float* img); +#endif RawImage::RawImage() : width(0), height(0) { pixels = std::vector(); } @@ -122,8 +120,12 @@ RawImage RawImage::createStamp(float x, float y, int radius, bool interpolate, b } void RawImage::convolve(PointSpreadFunc psf) { - deviceConvolve(pixels.data(), pixels.data(), getWidth(), getHeight(), psf.kernelData(), psf.getSize(), - psf.getDim(), psf.getRadius(), psf.getSum()); + #ifdef HAVE_CUDA + deviceConvolve(pixels.data(), pixels.data(), getWidth(), getHeight(), psf.kernelData(), + psf.getSize(), psf.getDim(), psf.getRadius(), psf.getSum()); + #else + throw std::runtime_error("Non-GPU convolution is not implemented."); + #endif } void RawImage::applyMask(int flags, const std::vector& exceptions, const RawImage& mask) { @@ -301,11 +303,19 @@ std::array RawImage::computeBounds() const { // The maximum value of the image and return the coordinates. pixelPos RawImage::findPeak(bool furthest_from_center) { - return findPeakImageVect(width, height, pixels.data(), furthest_from_center); + #ifdef HAVE_CUDA + return findPeakImageVect(width, height, pixels.data(), furthest_from_center); + #else + throw std::runtime_error("Non-GPU findPeak is not implemented."); + #endif } imageMoments RawImage::findCentralMoments() { - return findCentralMomentsImageVect(width, height, pixels.data()); + #ifdef HAVE_CUDA + return findCentralMomentsImageVect(width, height, pixels.data()); + #else + throw std::runtime_error("Non-GPU findCentralMoments is not implemented."); + #endif } RawImage createMedianImage(const std::vector& images) { @@ -324,7 +334,7 @@ RawImage createMedianImage(const std::vector& images) { for (int i = 0; i < num_images; ++i) { // Only used the unmasked pixels. float pixVal = images[i].getPixel(x, y); - if ((pixVal != NO_DATA) && (!isnan(pixVal))) { + if ((pixVal != NO_DATA) && (!std::isnan(pixVal))) { pixArray[num_unmasked] = pixVal; num_unmasked += 1; } @@ -367,7 +377,7 @@ RawImage createSummedImage(const std::vector& images) { float sum = 0.0; for (int i = 0; i < num_images; ++i) { float pixVal = images[i].getPixel(x, y); - if ((pixVal == NO_DATA) || (isnan(pixVal))) pixVal = 0.0; + if ((pixVal == NO_DATA) || (std::isnan(pixVal))) pixVal = 0.0; sum += pixVal; } result.setPixel(x, y, sum); @@ -392,7 +402,7 @@ RawImage createMeanImage(const std::vector& images) { float count = 0.0; for (int i = 0; i < num_images; ++i) { float pixVal = images[i].getPixel(x, y); - if ((pixVal != NO_DATA) && (!isnan(pixVal))) { + if ((pixVal != NO_DATA) && (!std::isnan(pixVal))) { count += 1.0; sum += pixVal; } diff --git a/src/kbmod/search/RawImage.h b/src/kbmod/search/RawImage.h index 0892643e..03c30a17 100644 --- a/src/kbmod/search/RawImage.h +++ b/src/kbmod/search/RawImage.h @@ -10,6 +10,7 @@ #ifndef RAWIMAGE_H_ #define RAWIMAGE_H_ +#include #include #include #include