Skip to content

Commit

Permalink
Merge pull request #335 from dirac-institute/cuda_opt
Browse files Browse the repository at this point in the history
Make GPU Code optional
  • Loading branch information
jeremykubica authored Sep 14, 2023
2 parents 94cb6d7 + 1f8e586 commit a8d76da
Show file tree
Hide file tree
Showing 6 changed files with 115 additions and 62 deletions.
73 changes: 45 additions & 28 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,69 +3,86 @@ if(${CMAKE_VERSION} VERSION_LESS 3.12)
cmake_policy(VERSION ${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION})
endif()

project(search LANGUAGES CXX CUDA)
project(search LANGUAGES CXX)

# Check if we can compile CUDA on this system.
include(CheckLanguage)
check_language(CUDA)

set(CPU_ONLY OFF CACHE BOOL "Build without GPU support?")

if(CMAKE_CUDA_COMPILER AND NOT CPU_ONLY)
set(HAVE_CUDA 1)
enable_language(CUDA)
add_definitions(-DHAVE_CUDA=1)
endif()

include(CheckIPOSupported)
check_ipo_supported(RESULT ipo_supported)

#find_package(Python3 COMPONENTS Interpreter Development REQUIRED)
find_package( PythonInterp )
find_package( PythonLibs )
find_package(Python3 COMPONENTS Interpreter Development REQUIRED)

find_library(CFITSIO_LIBRARY
find_library(CFITSIO_LIBRARY
NAMES fitsio cfitsio libcfitsio
HINTS lib/
)

add_subdirectory(lib/pybind11)

set(CMAKE_CXX_STANDARD 11) # set(PYBIND11_CPP_STANDARD -std=c++11)
set(CMAKE_CXX_STANDARD 11)

include_directories(
include/
)

add_library(searchcu STATIC
src/kbmod/search/image_kernels.cu
src/kbmod/search/kernels.cu
)

set_target_properties(searchcu PROPERTIES
POSITION_INDEPENDENT_CODE ON
CUDA_VISIBILITY_PRESET "hidden"
PREFIX "${PYTHON_MODULE_PREFIX}"
SUFFIX "${PYTHON_MODULE_EXTENSION}"
)

if(ipo_supported)
set_property(TARGET searchcu PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()


# Create the python module via pybind11.
pybind11_add_module(search MODULE
src/kbmod/search/bindings.cpp
)

set_target_properties(search PROPERTIES
CXX_VISIBILITY_PRESET "hidden"
INTERPROCEDURAL_OPTIMIZATION TRUE
PREFIX "${PYTHON_MODULE_PREFIX}"
SUFFIX "${PYTHON_MODULE_EXTENSION}"
)

if(ipo_supported)
set_property(TARGET search PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()


target_compile_options(search PRIVATE $<$<COMPILE_LANGUAGE:CXX>:
-O3
-fvisibility=hidden
-fopenmp
>)

target_link_libraries(search PRIVATE
searchcu
${CFITSIO_LIBRARY}
-lgomp
${CFITSIO_LIBRARY}
-lgomp
)


# If we have CUDA, build the kernel libraries and link them in as well.
if(HAVE_CUDA)
message(STATUS "Building CUDA Libraries")
add_library(searchcu STATIC
src/kbmod/search/image_kernels.cu
src/kbmod/search/kernels.cu
)

set_target_properties(searchcu PROPERTIES
POSITION_INDEPENDENT_CODE ON
CUDA_VISIBILITY_PRESET "hidden"
CUDA_SEPARABLE_COMPILATION ON
CUDA_RESOLVE_DEVICE_SYMBOLS ON
PREFIX "${PYTHON_MODULE_PREFIX}"
SUFFIX "${PYTHON_MODULE_EXTENSION}"
)
if(ipo_supported)
set_property(TARGET searchcu PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE)
endif()

target_link_libraries(search PRIVATE searchcu)
else()
message(STATUS "Skipping CUDA Libraries")
endif()
8 changes: 8 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,14 @@ def build_extension(self, ext: CMakeExtension) -> None:
if archs:
cmake_args += ["-DCMAKE_OSX_ARCHITECTURES={}".format(";".join(archs))]

# Check if we have GPU support.
try:
subprocess.check_output('nvidia-smi')
cmake_args += ["-DCPU_ONLY=OFF"]
except Exception:
cmake_args += ["-DCPU_ONLY=ON"]
print("WARNING: No GPU Found. Building with CPU only mode.")

# Set CMAKE_BUILD_PARALLEL_LEVEL to control the parallel build level
# across all generators.
if "CMAKE_BUILD_PARALLEL_LEVEL" not in os.environ:
Expand Down
19 changes: 13 additions & 6 deletions src/kbmod/search/Filtering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@

namespace search {

/* The filter_kenerls.cu functions. */
extern "C" void sigmaGFilteredIndicesCU(float* values, int num_values, float sGL0, float sGL1,
float sigmaGCoeff, float width, int* idxArray, int* minKeepIndex,
int* maxKeepIndex);
#ifdef HAVE_CUDA
/* The filter_kenerls.cu functions. */
extern "C" void sigmaGFilteredIndicesCU(float* values, int num_values, float sGL0, float sGL1,
float sigmaGCoeff, float width, int* idxArray, int* minKeepIndex,
int* maxKeepIndex);
#endif

/* Return the list of indices from the values array such that those elements
pass the sigmaG filtering defined by percentiles [sGL0, sGL1] with coefficient
Expand All @@ -35,8 +37,13 @@ std::vector<int> sigmaGFilteredIndices(const std::vector<float>& values, float s

int minKeepIndex = 0;
int maxKeepIndex = num_values - 1;
sigmaGFilteredIndicesCU(values_arr, num_values, sGL0, sGL1, sigmaGCoeff, width, idxArray, &minKeepIndex,
&maxKeepIndex);

#ifdef HAVE_CUDA
sigmaGFilteredIndicesCU(values_arr, num_values, sGL0, sGL1, sigmaGCoeff, width, idxArray,
&minKeepIndex, &maxKeepIndex);
#else
throw std::runtime_error("Non-GPU sigmaGFilteredIndicesCU is not implemented.");
#endif

// Copy the result into a vector and return it.
std::vector<int> result;
Expand Down
32 changes: 21 additions & 11 deletions src/kbmod/search/KBMOSearch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@

namespace search {

extern "C" void deviceSearchFilter(int imageCount, int width, int height, float* psiVect, float* phiVect,
perImageData img_data, searchParameters params, int trajCount,
trajectory* trajectoriesToSearch, int resultsCount,
trajectory* bestTrajects);
#ifdef HAVE_CUDA
extern "C" void deviceSearchFilter(int imageCount, int width, int height, float* psiVect, float* phiVect,
perImageData img_data, searchParameters params, int trajCount,
trajectory* trajectoriesToSearch, int resultsCount,
trajectory* bestTrajects);

void deviceGetCoadds(ImageStack& stack, perImageData image_data, int num_trajectories,
trajectory* trajectories, stampParameters params,
std::vector<std::vector<bool> >& use_index_vect, float* results);
void deviceGetCoadds(ImageStack& stack, perImageData image_data, int num_trajectories,
trajectory* trajectories, stampParameters params,
std::vector<std::vector<bool> >& use_index_vect, float* results);
#endif

KBMOSearch::KBMOSearch(ImageStack& imstack) : stack(imstack) {
maxResultCount = 100000;
Expand Down Expand Up @@ -150,8 +152,12 @@ void KBMOSearch::search(int aSteps, int vSteps, float minAngle, float maxAngle,

// Do the actual search on the GPU.
startTimer("Searching");
deviceSearchFilter(stack.imgCount(), stack.getWidth(), stack.getHeight(), psiVect.data(), phiVect.data(),
img_data, params, searchList.size(), searchList.data(), max_results, results.data());
#ifdef HAVE_CUDA
deviceSearchFilter(stack.imgCount(), stack.getWidth(), stack.getHeight(), psiVect.data(), phiVect.data(),
img_data, params, searchList.size(), searchList.data(), max_results, results.data());
#else
throw std::runtime_error("Non-GPU search is not implemented.");
#endif
endTimer();

startTimer("Sorting results");
Expand Down Expand Up @@ -351,8 +357,12 @@ std::vector<RawImage> KBMOSearch::coaddedScienceStampsGPU(std::vector<trajectory
std::vector<float> stamp_data(stamp_ppi * num_trajectories);

// Do the co-adds.
deviceGetCoadds(stack, img_data, num_trajectories, t_array.data(), params, use_index_vect,
stamp_data.data());
#ifdef HAVE_CUDA
deviceGetCoadds(stack, img_data, num_trajectories, t_array.data(), params, use_index_vect,
stamp_data.data());
#else
throw std::runtime_error("Non-GPU co-adds is not implemented.");
#endif

// Copy the stamps into RawImages
std::vector<RawImage> results(num_trajectories);
Expand Down
44 changes: 27 additions & 17 deletions src/kbmod/search/RawImage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,16 @@

namespace search {

// Performs convolution between an image represented as an array of floats
// and a PSF on a GPU device.
extern "C" void deviceConvolve(float* sourceImg, float* resultImg, int width, int height, float* psfKernel,
int psfSize, int psfDim, int psfRadius, float psfSum);
#ifdef HAVE_CUDA
// Performs convolution between an image represented as an array of floats
// and a PSF on a GPU device.
extern "C" void deviceConvolve(float* sourceImg, float* resultImg, int width, int height, float* psfKernel,
int psfSize, int psfDim, int psfRadius, float psfSum);

// Grow the mask by expanding masked pixels to their neighbors
// out for "steps" steps.
extern "C" void deviceGrowMask(int width, int height, float* source, float* dest, int steps);
extern "C" pixelPos findPeakImageVect(int width, int height, float* img, bool furthest_from_center);

extern "C" pixelPos findPeakImageVect(int width, int height, float* img, bool furthest_from_center);

extern "C" imageMoments findCentralMomentsImageVect(int width, int height, float* img);
extern "C" imageMoments findCentralMomentsImageVect(int width, int height, float* img);
#endif

RawImage::RawImage() : width(0), height(0) { pixels = std::vector<float>(); }

Expand Down Expand Up @@ -122,8 +120,12 @@ RawImage RawImage::createStamp(float x, float y, int radius, bool interpolate, b
}

void RawImage::convolve(PointSpreadFunc psf) {
deviceConvolve(pixels.data(), pixels.data(), getWidth(), getHeight(), psf.kernelData(), psf.getSize(),
psf.getDim(), psf.getRadius(), psf.getSum());
#ifdef HAVE_CUDA
deviceConvolve(pixels.data(), pixels.data(), getWidth(), getHeight(), psf.kernelData(),
psf.getSize(), psf.getDim(), psf.getRadius(), psf.getSum());
#else
throw std::runtime_error("Non-GPU convolution is not implemented.");
#endif
}

void RawImage::applyMask(int flags, const std::vector<int>& exceptions, const RawImage& mask) {
Expand Down Expand Up @@ -301,11 +303,19 @@ std::array<float, 2> RawImage::computeBounds() const {

// The maximum value of the image and return the coordinates.
pixelPos RawImage::findPeak(bool furthest_from_center) {
return findPeakImageVect(width, height, pixels.data(), furthest_from_center);
#ifdef HAVE_CUDA
return findPeakImageVect(width, height, pixels.data(), furthest_from_center);
#else
throw std::runtime_error("Non-GPU findPeak is not implemented.");
#endif
}

imageMoments RawImage::findCentralMoments() {
return findCentralMomentsImageVect(width, height, pixels.data());
#ifdef HAVE_CUDA
return findCentralMomentsImageVect(width, height, pixels.data());
#else
throw std::runtime_error("Non-GPU findCentralMoments is not implemented.");
#endif
}

RawImage createMedianImage(const std::vector<RawImage>& images) {
Expand All @@ -324,7 +334,7 @@ RawImage createMedianImage(const std::vector<RawImage>& images) {
for (int i = 0; i < num_images; ++i) {
// Only used the unmasked pixels.
float pixVal = images[i].getPixel(x, y);
if ((pixVal != NO_DATA) && (!isnan(pixVal))) {
if ((pixVal != NO_DATA) && (!std::isnan(pixVal))) {
pixArray[num_unmasked] = pixVal;
num_unmasked += 1;
}
Expand Down Expand Up @@ -367,7 +377,7 @@ RawImage createSummedImage(const std::vector<RawImage>& images) {
float sum = 0.0;
for (int i = 0; i < num_images; ++i) {
float pixVal = images[i].getPixel(x, y);
if ((pixVal == NO_DATA) || (isnan(pixVal))) pixVal = 0.0;
if ((pixVal == NO_DATA) || (std::isnan(pixVal))) pixVal = 0.0;
sum += pixVal;
}
result.setPixel(x, y, sum);
Expand All @@ -392,7 +402,7 @@ RawImage createMeanImage(const std::vector<RawImage>& images) {
float count = 0.0;
for (int i = 0; i < num_images; ++i) {
float pixVal = images[i].getPixel(x, y);
if ((pixVal != NO_DATA) && (!isnan(pixVal))) {
if ((pixVal != NO_DATA) && (!std::isnan(pixVal))) {
count += 1.0;
sum += pixVal;
}
Expand Down
1 change: 1 addition & 0 deletions src/kbmod/search/RawImage.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#ifndef RAWIMAGE_H_
#define RAWIMAGE_H_

#include <algorithm>
#include <array>
#include <vector>
#include <fitsio.h>
Expand Down

0 comments on commit a8d76da

Please sign in to comment.