diff --git a/src/kbmod/search/Filtering.cpp b/src/kbmod/search/Filtering.cpp index 1d7cda18..0cc893d5 100644 --- a/src/kbmod/search/Filtering.cpp +++ b/src/kbmod/search/Filtering.cpp @@ -12,64 +12,63 @@ namespace search { #ifdef HAVE_CUDA - /* The filter_kenerls.cu functions. */ - extern "C" void sigmaGFilteredIndicesCU(float* values, int num_values, float sGL0, float sGL1, - float sigmaGCoeff, float width, int* idxArray, int* minKeepIndex, - int* maxKeepIndex); +/* The filter_kenerls.cu functions. */ +extern "C" void SigmaGFilteredIndicesCU(float *values, int num_values, float sgl0, float sgl1, float sg_coeff, + float width, int *idx_array, int *min_keep_idx, int *max_keep_idx); #endif /* Return the list of indices from the values array such that those elements - pass the sigmaG filtering defined by percentiles [sGL0, sGL1] with coefficient - sigmaGCoeff and a multiplicative factor of width. */ -std::vector sigmaGFilteredIndices(const std::vector& values, float sGL0, float sGL1, - float sigmaGCoeff, float width) { + pass the sigmaG filtering defined by percentiles [sgl0, sgl1] with coefficient + sigma_g_coeff and a multiplicative factor of width. */ +std::vector sigmaGFilteredIndices(const std::vector &values, float sgl0, float sgl1, + float sigma_g_coeff, float width) { // Bounds check the percentile values. - assert(sGL0 > 0.0); - assert(sGL1 < 1.0); + assert(sgl0 > 0.0); + assert(sgl1 < 1.0); // Allocate space for the input and result. const int num_values = values.size(); float values_arr[num_values]; - int idxArray[num_values]; + int idx_array[num_values]; for (int i = 0; i < num_values; ++i) { values_arr[i] = values[i]; } - int minKeepIndex = 0; - int maxKeepIndex = num_values - 1; + int min_keep_idx = 0; + int max_keep_idx = num_values - 1; - #ifdef HAVE_CUDA - sigmaGFilteredIndicesCU(values_arr, num_values, sGL0, sGL1, sigmaGCoeff, width, idxArray, - &minKeepIndex, &maxKeepIndex); - #else - throw std::runtime_error("Non-GPU sigmaGFilteredIndicesCU is not implemented."); - #endif +#ifdef HAVE_CUDA + SigmaGFilteredIndicesCU(values_arr, num_values, sgl0, sgl1, sigma_g_coeff, width, idx_array, + &min_keep_idx, &max_keep_idx); +#else + throw std::runtime_error("Non-GPU SigmaGFilteredIndicesCU is not implemented."); +#endif // Copy the result into a vector and return it. std::vector result; - for (int i = minKeepIndex; i <= maxKeepIndex; ++i) { - result.push_back(idxArray[i]); + for (int i = min_keep_idx; i <= max_keep_idx; ++i) { + result.push_back(idx_array[i]); } return result; } /* Given a set of psi and phi values, return a likelihood value */ -double calculateLikelihoodFromPsiPhi(std::vector psiValues, std::vector phiValues) { - assert(psiValues.size() == phiValues.size()); - double psiSum = 0.0; - double phiSum = 0.0; +double calculateLikelihoodFromPsiPhi(std::vector psi_values, std::vector phi_values) { + assert(psi_values.size() == phi_values.size()); + double psi_sum = 0.0; + double phi_sum = 0.0; - for (int i = 0; i < psiValues.size(); i++) { - psiSum += psiValues[i]; - phiSum += phiValues[i]; + for (int i = 0; i < psi_values.size(); i++) { + psi_sum += psi_values[i]; + phi_sum += phi_values[i]; } - if (psiSum == 0.0 || phiSum <= 0.0) { + if (psi_sum == 0.0 || phi_sum <= 0.0) { return 0.0; } - return psiSum / sqrt(phiSum); + return psi_sum / sqrt(phi_sum); } - + } /* namespace search */ diff --git a/src/kbmod/search/Filtering.h b/src/kbmod/search/Filtering.h index f7364949..17efb4fe 100644 --- a/src/kbmod/search/Filtering.h +++ b/src/kbmod/search/Filtering.h @@ -15,9 +15,9 @@ namespace search { /* Return the list of indices from the values array such that those elements pass the sigmaG filtering defined by percentiles [sGL0, sGL1] with coefficient - sigmaGCoeff and a multiplicative factor of width. */ -std::vector sigmaGFilteredIndices(const std::vector& values, float sGL0, float sGL1, - float sigmaGCoeff, float width); + sigmag_coeff and a multiplicative factor of width. */ +std::vector sigmaGFilteredIndices(const std::vector& values, float sgl0, float sgl1, + float sigma_g_coeff, float width); } /* namespace search */ diff --git a/src/kbmod/search/ImageStack.cpp b/src/kbmod/search/ImageStack.cpp index e8f9d187..aaef5820 100644 --- a/src/kbmod/search/ImageStack.cpp +++ b/src/kbmod/search/ImageStack.cpp @@ -15,8 +15,8 @@ ImageStack::ImageStack(const std::vector& filenames, const std::vec loadImages(filenames, psfs); extractImageTimes(); setTimeOrigin(); - globalMask = RawImage(getWidth(), getHeight()); - globalMask.setAllPix(0.0); + global_mask = RawImage(getWidth(), getHeight()); + global_mask.setAllPix(0.0); } ImageStack::ImageStack(const std::vector& imgs) { @@ -24,13 +24,13 @@ ImageStack::ImageStack(const std::vector& imgs) { images = imgs; extractImageTimes(); setTimeOrigin(); - globalMask = RawImage(getWidth(), getHeight()); - globalMask.setAllPix(0.0); + global_mask = RawImage(getWidth(), getHeight()); + global_mask.setAllPix(0.0); } -void ImageStack::loadImages(const std::vector& fileNames, +void ImageStack::loadImages(const std::vector& filenames, const std::vector& psfs) { - const int num_files = fileNames.size(); + const int num_files = filenames.size(); if (num_files == 0) { std::cout << "No files provided" << "\n"; @@ -40,7 +40,7 @@ void ImageStack::loadImages(const std::vector& fileNames, // Load images from file for (int i = 0; i < num_files; ++i) { - images.push_back(LayeredImage(fileNames[i], psfs[i])); + images.push_back(LayeredImage(filenames[i], psfs[i])); if (verbose) std::cout << "." << std::flush; } if (verbose) std::cout << "\n"; @@ -48,16 +48,16 @@ void ImageStack::loadImages(const std::vector& fileNames, void ImageStack::extractImageTimes() { // Load image times - imageTimes = std::vector(); + image_times = std::vector(); for (auto& i : images) { - imageTimes.push_back(float(i.getObstime())); + image_times.push_back(float(i.getObstime())); } } void ImageStack::setTimeOrigin() { // Set beginning time to 0.0 - double initialTime = imageTimes[0]; - for (auto& t : imageTimes) t = t - initialTime; + double initial_time = image_times[0]; + for (auto& t : image_times) t = t - initial_time; } LayeredImage& ImageStack::getSingleImage(int index) { @@ -75,7 +75,7 @@ void ImageStack::setTimes(const std::vector& times) { throw std::runtime_error( "List of times provided" " does not match the number of images!"); - imageTimes = times; + image_times = times; setTimeOrigin(); } @@ -85,13 +85,13 @@ void ImageStack::convolvePSF() { for (auto& i : images) i.convolvePSF(); } -void ImageStack::saveGlobalMask(const std::string& path) { globalMask.saveToFile(path); } +void ImageStack::saveGlobalMask(const std::string& path) { global_mask.saveToFile(path); } void ImageStack::saveImages(const std::string& path) { for (auto& i : images) i.saveLayers(path); } -const RawImage& ImageStack::getGlobalMask() const { return globalMask; } +const RawImage& ImageStack::getGlobalMask() const { return global_mask; } void ImageStack::applyMaskFlags(int flags, const std::vector& exceptions) { for (auto& i : images) { @@ -102,7 +102,7 @@ void ImageStack::applyMaskFlags(int flags, const std::vector& exceptions) { void ImageStack::applyGlobalMask(int flags, int threshold) { createGlobalMask(flags, threshold); for (auto& i : images) { - i.applyGlobalMask(globalMask); + i.applyGlobalMask(global_mask); } } @@ -128,9 +128,9 @@ void ImageStack::createGlobalMask(int flags, int threshold) { } // Set all pixels below threshold to 0 and all above to 1 - float* globalM = globalMask.getDataRef(); + float* global_m = global_mask.getDataRef(); for (unsigned int p = 0; p < npixels; ++p) { - globalM[p] = counts[p] < threshold ? 0.0 : 1.0; + global_m[p] = counts[p] < threshold ? 0.0 : 1.0; } } diff --git a/src/kbmod/search/ImageStack.h b/src/kbmod/search/ImageStack.h index f6fb1324..c9e013c9 100644 --- a/src/kbmod/search/ImageStack.h +++ b/src/kbmod/search/ImageStack.h @@ -30,8 +30,8 @@ class ImageStack { unsigned getHeight() const { return images.size() > 0 ? images[0].getHeight() : 0; } unsigned getNPixels() const { return images.size() > 0 ? images[0].getNPixels() : 0; } std::vector& getImages() { return images; } - const std::vector& getTimes() const { return imageTimes; } - float* getTimesDataRef() { return imageTimes.data(); } + const std::vector& getTimes() const { return image_times; } + float* getTimesDataRef() { return image_times.data(); } LayeredImage& getSingleImage(int index); // Simple setters. @@ -55,13 +55,13 @@ class ImageStack { virtual ~ImageStack(){}; private: - void loadImages(const std::vector& fileNames, const std::vector& psfs); + void loadImages(const std::vector& filenames, const std::vector& psfs); void extractImageTimes(); void setTimeOrigin(); void createGlobalMask(int flags, int threshold); std::vector images; - RawImage globalMask; - std::vector imageTimes; + RawImage global_mask; + std::vector image_times; bool verbose; }; diff --git a/src/kbmod/search/KBMOSearch.cpp b/src/kbmod/search/KBMOSearch.cpp index 8def1b79..ae84246c 100644 --- a/src/kbmod/search/KBMOSearch.cpp +++ b/src/kbmod/search/KBMOSearch.cpp @@ -10,34 +10,33 @@ namespace search { #ifdef HAVE_CUDA - extern "C" void deviceSearchFilter(int imageCount, int width, int height, float* psiVect, float* phiVect, - perImageData img_data, searchParameters params, int trajCount, - trajectory* trajectoriesToSearch, int resultsCount, - trajectory* bestTrajects); - - void deviceGetCoadds(ImageStack& stack, perImageData image_data, int num_trajectories, - trajectory* trajectories, stampParameters params, - std::vector >& use_index_vect, float* results); +extern "C" void deviceSearchFilter(int num_images, int width, int height, float* psi_vect, float* phi_vect, + PerImageData img_data, SearchParameters params, int num_trajectories, + trajectory* trj_to_search, int num_results, trajectory* best_results); + +void deviceGetCoadds(ImageStack& stack, PerImageData image_data, int num_trajectories, + trajectory* trajectories, StampParameters params, + std::vector >& use_index_vect, float* results); #endif KBMOSearch::KBMOSearch(ImageStack& imstack) : stack(imstack) { - maxResultCount = 100000; - debugInfo = false; - psiPhiGenerated = false; + max_result_count = 100000; + debug_info = false; + psi_phi_generated = false; // Default the thresholds. - params.minObservations = 0; - params.minLH = 0.0; + params.min_observations = 0; + params.min_lh = 0.0; // Default filtering arguments. params.do_sigmag_filter = false; - params.sGL_L = 0.25; - params.sGL_H = 0.75; - params.sigmaGCoeff = -1.0; + params.sgl_L = 0.25; + params.sgl_H = 0.75; + params.sigmag_coeff = -1.0; // Default the encoding parameters. - params.psiNumBytes = -1; - params.phiNumBytes = -1; + params.psi_num_bytes = -1; + params.phi_num_bytes = -1; // Default pixel starting bounds. params.x_start_min = 0; @@ -46,52 +45,53 @@ KBMOSearch::KBMOSearch(ImageStack& imstack) : stack(imstack) { params.y_start_max = stack.getHeight(); // Set default values for the barycentric correction. - baryCorrs = std::vector(stack.imgCount()); - params.useCorr = false; - useCorr = false; + bary_corrs = std::vector(stack.imgCount()); + params.use_corr = false; + use_corr = false; params.debug = false; } void KBMOSearch::setDebug(bool d) { - debugInfo = d; + debug_info = d; params.debug = d; } -void KBMOSearch::enableCorr(std::vector pyBaryCorrCoeff) { - useCorr = true; - params.useCorr = true; +void KBMOSearch::enableCorr(std::vector bary_corr_coeff) { + use_corr = true; + params.use_corr = true; for (int i = 0; i < stack.imgCount(); i++) { int j = i * 6; - baryCorrs[i].dx = pyBaryCorrCoeff[j]; - baryCorrs[i].dxdx = pyBaryCorrCoeff[j + 1]; - baryCorrs[i].dxdy = pyBaryCorrCoeff[j + 2]; - baryCorrs[i].dy = pyBaryCorrCoeff[j + 3]; - baryCorrs[i].dydx = pyBaryCorrCoeff[j + 4]; - baryCorrs[i].dydy = pyBaryCorrCoeff[j + 5]; + bary_corrs[i].dx = bary_corr_coeff[j]; + bary_corrs[i].dxdx = bary_corr_coeff[j + 1]; + bary_corrs[i].dxdy = bary_corr_coeff[j + 2]; + bary_corrs[i].dy = bary_corr_coeff[j + 3]; + bary_corrs[i].dydx = bary_corr_coeff[j + 4]; + bary_corrs[i].dydy = bary_corr_coeff[j + 5]; } } -void KBMOSearch::enableGPUSigmaGFilter(std::vector pyPercentiles, float pySigmaGCoeff, float pyMinLH) { +void KBMOSearch::enableGPUSigmaGFilter(std::vector percentiles, float sigmag_coeff, + float min_lh) { params.do_sigmag_filter = true; - params.sGL_L = pyPercentiles[0]; - params.sGL_H = pyPercentiles[1]; - params.sigmaGCoeff = pySigmaGCoeff; - params.minLH = pyMinLH; + params.sgl_L = percentiles[0]; + params.sgl_H = percentiles[1]; + params.sigmag_coeff = sigmag_coeff; + params.min_lh = min_lh; } -void KBMOSearch::enableGPUEncoding(int pyPsiNumBytes, int pyPhiNumBytes) { +void KBMOSearch::enableGPUEncoding(int psi_num_bytes, int phi_num_bytes) { // Make sure the encoding is one of the supported options. // Otherwise use default float (aka no encoding). - if (pyPsiNumBytes == 1 || pyPsiNumBytes == 2) { - params.psiNumBytes = pyPsiNumBytes; + if (psi_num_bytes == 1 || psi_num_bytes == 2) { + params.psi_num_bytes = psi_num_bytes; } else { - params.psiNumBytes = -1; + params.psi_num_bytes = -1; } - if (pyPhiNumBytes == 1 || pyPhiNumBytes == 2) { - params.phiNumBytes = pyPhiNumBytes; + if (phi_num_bytes == 1 || phi_num_bytes == 2) { + params.phi_num_bytes = phi_num_bytes; } else { - params.phiNumBytes = -1; + params.phi_num_bytes = -1; } } @@ -105,59 +105,59 @@ void KBMOSearch::setStartBoundsY(int y_min, int y_max) { params.y_start_max = y_max; } -void KBMOSearch::search(int aSteps, int vSteps, float minAngle, float maxAngle, float minVelocity, - float maxVelocity, int minObservations) { +void KBMOSearch::search(int ang_steps, int vel_steps, float min_ang, float max_ang, float min_vel, + float max_vel, int min_observations) { preparePsiPhi(); - createSearchList(aSteps, vSteps, minAngle, maxAngle, minVelocity, maxVelocity); + createSearchList(ang_steps, vel_steps, min_ang, max_ang, min_vel, max_vel); startTimer("Creating psi/phi buffers"); - std::vector psiVect; - std::vector phiVect; - fillPsiAndPhiVects(psiImages, phiImages, &psiVect, &phiVect); + std::vector psi_vect; + std::vector phi_vect; + fillPsiAndphi_vects(psi_images, phi_images, &psi_vect, &phi_vect); endTimer(); // Create a data stucture for the per-image data. - perImageData img_data; - img_data.numImages = stack.imgCount(); - img_data.imageTimes = stack.getTimesDataRef(); - if (params.useCorr) img_data.baryCorrs = &baryCorrs[0]; + PerImageData img_data; + img_data.num_images = stack.imgCount(); + img_data.image_times = stack.getTimesDataRef(); + if (params.use_corr) img_data.bary_corrs = &bary_corrs[0]; // Compute the encoding parameters for psi and phi if needed. // Vectors need to be created outside the if so they stay in scope. - std::vector psiScaleVect; - std::vector phiScaleVect; - if (params.psiNumBytes > 0) { - psiScaleVect = computeImageScaling(psiImages, params.psiNumBytes); - img_data.psiParams = psiScaleVect.data(); + std::vector psi_scale_vect; + std::vector phi_scale_vect; + if (params.psi_num_bytes > 0) { + psi_scale_vect = computeImageScaling(psi_images, params.psi_num_bytes); + img_data.psi_params = psi_scale_vect.data(); } - if (params.phiNumBytes > 0) { - phiScaleVect = computeImageScaling(phiImages, params.phiNumBytes); - img_data.phiParams = phiScaleVect.data(); + if (params.phi_num_bytes > 0) { + phi_scale_vect = computeImageScaling(phi_images, params.phi_num_bytes); + img_data.phi_params = phi_scale_vect.data(); } // Allocate a vector for the results. int num_search_pixels = ((params.x_start_max - params.x_start_min) * (params.y_start_max - params.y_start_min)); int max_results = num_search_pixels * RESULTS_PER_PIXEL; - if (debugInfo) { + if (debug_info) { std::cout << "Searching X=[" << params.x_start_min << ", " << params.x_start_max << "]" << " Y=[" << params.y_start_min << ", " << params.y_start_max << "]\n"; std::cout << "Allocating space for " << max_results << " results.\n"; } results = std::vector(max_results); - if (debugInfo) std::cout << searchList.size() << " trajectories... \n" << std::flush; + if (debug_info) std::cout << search_list.size() << " trajectories... \n" << std::flush; // Set the minimum number of observations. - params.minObservations = minObservations; + params.min_observations = min_observations; // Do the actual search on the GPU. startTimer("Searching"); - #ifdef HAVE_CUDA - deviceSearchFilter(stack.imgCount(), stack.getWidth(), stack.getHeight(), psiVect.data(), phiVect.data(), - img_data, params, searchList.size(), searchList.data(), max_results, results.data()); - #else - throw std::runtime_error("Non-GPU search is not implemented."); - #endif +#ifdef HAVE_CUDA + deviceSearchFilter(stack.imgCount(), stack.getWidth(), stack.getHeight(), psi_vect.data(), phi_vect.data(), + img_data, params, search_list.size(), search_list.data(), max_results, results.data()); +#else + throw std::runtime_error("Non-GPU search is not implemented."); +#endif endTimer(); startTimer("Sorting results"); @@ -171,9 +171,9 @@ void KBMOSearch::savePsiPhi(const std::string& path) { } void KBMOSearch::preparePsiPhi() { - if (!psiPhiGenerated) { - psiImages.clear(); - phiImages.clear(); + if (!psi_phi_generated) { + psi_images.clear(); + phi_images.clear(); // Compute Phi and Psi from convolved images // while leaving masked pixels alone @@ -181,11 +181,11 @@ void KBMOSearch::preparePsiPhi() { const int num_images = stack.imgCount(); for (int i = 0; i < num_images; ++i) { LayeredImage& img = stack.getSingleImage(i); - psiImages.push_back(img.generatePsiImage()); - phiImages.push_back(img.generatePhiImage()); + psi_images.push_back(img.generatePsiImage()); + phi_images.push_back(img.generatePhiImage()); } - psiPhiGenerated = true; + psi_phi_generated = true; } } @@ -199,11 +199,11 @@ std::vector KBMOSearch::computeImageScaling(const std::vector bnds = vect[i].computeBounds(); - params.minVal = bnds[0]; - params.maxVal = bnds[1]; + params.min_val = bnds[0]; + params.max_val = bnds[1]; // Increase width to avoid divide by zero. - float width = (params.maxVal - params.minVal); + float width = (params.max_val - params.min_val); if (width < 1e-6) width = 1e-6; // Set the scale if we are encoding the values. @@ -223,62 +223,62 @@ void KBMOSearch::saveImages(const std::string& path) { std::string number = std::to_string(i); // Add leading zeros number = std::string(4 - number.length(), '0') + number; - psiImages[i].saveToFile(path + "/psi/PSI" + number + ".fits"); - phiImages[i].saveToFile(path + "/phi/PHI" + number + ".fits"); + psi_images[i].saveToFile(path + "/psi/PSI" + number + ".fits"); + phi_images[i].saveToFile(path + "/phi/PHI" + number + ".fits"); } } -void KBMOSearch::createSearchList(int angleSteps, int velocitySteps, float minAngle, float maxAngle, - float minVelocity, float maxVelocity) { - std::vector angles(angleSteps); - float aStepSize = (maxAngle - minAngle) / float(angleSteps); - for (int i = 0; i < angleSteps; ++i) { - angles[i] = minAngle + float(i) * aStepSize; +void KBMOSearch::createSearchList(int angle_steps, int velocity_steps, float min_ang, float max_ang, + float min_vel, float max_vel) { + std::vector angles(angle_steps); + float ang_stepsize = (max_ang - min_ang) / float(angle_steps); + for (int i = 0; i < angle_steps; ++i) { + angles[i] = min_ang + float(i) * ang_stepsize; } - std::vector velocities(velocitySteps); - float vStepSize = (maxVelocity - minVelocity) / float(velocitySteps); - for (int i = 0; i < velocitySteps; ++i) { - velocities[i] = minVelocity + float(i) * vStepSize; + std::vector velocities(velocity_steps); + float vel_stepsize = (max_vel - min_vel) / float(velocity_steps); + for (int i = 0; i < velocity_steps; ++i) { + velocities[i] = min_vel + float(i) * vel_stepsize; } - int trajCount = angleSteps * velocitySteps; - searchList = std::vector(trajCount); - for (int a = 0; a < angleSteps; ++a) { - for (int v = 0; v < velocitySteps; ++v) { - searchList[a * velocitySteps + v].xVel = cos(angles[a]) * velocities[v]; - searchList[a * velocitySteps + v].yVel = sin(angles[a]) * velocities[v]; + int trajCount = angle_steps * velocity_steps; + search_list = std::vector(trajCount); + for (int a = 0; a < angle_steps; ++a) { + for (int v = 0; v < velocity_steps; ++v) { + search_list[a * velocity_steps + v].x_vel = cos(angles[a]) * velocities[v]; + search_list[a * velocity_steps + v].y_vel = sin(angles[a]) * velocities[v]; } } } -void KBMOSearch::fillPsiAndPhiVects(const std::vector& psiImgs, - const std::vector& phiImgs, std::vector* psiVect, - std::vector* phiVect) { - assert(psiVect != NULL); - assert(phiVect != NULL); +void KBMOSearch::fillPsiAndphi_vects(const std::vector& psi_imgs, + const std::vector& phi_imgs, std::vector* psi_vect, + std::vector* phi_vect) { + assert(psi_vect != NULL); + assert(phi_vect != NULL); - int num_images = psiImgs.size(); + int num_images = psi_imgs.size(); assert(num_images > 0); - assert(phiImgs.size() == num_images); + assert(phi_imgs.size() == num_images); - int num_pixels = psiImgs[0].getNPixels(); + int num_pixels = psi_imgs[0].getNPixels(); for (int i = 0; i < num_images; ++i) { - assert(psiImgs[i].getNPixels() == num_pixels); - assert(phiImgs[i].getNPixels() == num_pixels); + assert(psi_imgs[i].getNPixels() == num_pixels); + assert(phi_imgs[i].getNPixels() == num_pixels); } - psiVect->clear(); - psiVect->reserve(num_images * num_pixels); - phiVect->clear(); - phiVect->reserve(num_images * num_pixels); + psi_vect->clear(); + psi_vect->reserve(num_images * num_pixels); + phi_vect->clear(); + phi_vect->reserve(num_images * num_pixels); for (int i = 0; i < num_images; ++i) { - const std::vector& psiRef = psiImgs[i].getPixels(); - const std::vector& phiRef = phiImgs[i].getPixels(); + const std::vector& psi_ref = psi_imgs[i].getPixels(); + const std::vector& phi_ref = phi_imgs[i].getPixels(); for (unsigned p = 0; p < num_pixels; ++p) { - psiVect->push_back(psiRef[p]); - phiVect->push_back(phiRef[p]); + psi_vect->push_back(psi_ref[p]); + phi_vect->push_back(phi_ref[p]); } } } @@ -294,7 +294,7 @@ std::vector KBMOSearch::scienceStamps(const trajectory& trj, int radiu int num_times = stack.imgCount(); for (int i = 0; i < num_times; ++i) { if (use_all_stamps || use_index[i]) { - pixelPos pos = getTrajPos(trj, i); + PixelPos pos = getTrajPos(trj, i); RawImage& img = stack.getSingleImage(i).getScience(); stamps.push_back(img.createStamp(pos.x, pos.y, radius, interpolate, keep_no_data)); } @@ -333,14 +333,14 @@ RawImage KBMOSearch::summedScienceStamp(const trajectory& trj, int radius, scienceStamps(trj, radius, false /*=interpolate*/, false /*=keep_no_data*/, use_index)); } -bool KBMOSearch::filterStamp(const RawImage& img, const stampParameters& params) { +bool KBMOSearch::filterStamp(const RawImage& img, const StampParameters& params) { // Allocate space for the coadd information and initialize to zero. const int stamp_width = 2 * params.radius + 1; const int stamp_ppi = stamp_width * stamp_width; const std::vector& pixels = img.getPixels(); // Filter on the peak's position. - pixelPos pos = img.findPeak(true); + PixelPos pos = img.findPeak(true); if ((abs(pos.x - params.radius) >= params.peak_offset_x) || (abs(pos.y - params.radius) >= params.peak_offset_y)) { return true; @@ -361,11 +361,9 @@ bool KBMOSearch::filterStamp(const RawImage& img, const stampParameters& params) } // Filter on the image moments. - imageMoments moments = img.findCentralMoments(); - if ((fabs(moments.m01) >= params.m01_limit) || - (fabs(moments.m10) >= params.m10_limit) || - (fabs(moments.m11) >= params.m11_limit) || - (moments.m02 >= params.m02_limit) || + ImageMoments moments = img.findCentralMoments(); + if ((fabs(moments.m01) >= params.m01_limit) || (fabs(moments.m10) >= params.m10_limit) || + (fabs(moments.m11) >= params.m11_limit) || (moments.m02 >= params.m02_limit) || (moments.m20 >= params.m20_limit)) { return true; } @@ -375,27 +373,27 @@ bool KBMOSearch::filterStamp(const RawImage& img, const stampParameters& params) std::vector KBMOSearch::coaddedScienceStamps(std::vector& t_array, std::vector >& use_index_vect, - const stampParameters& params, - bool use_gpu) { + const StampParameters& params, bool use_gpu) { if (use_gpu) { - #ifdef HAVE_CUDA - return coaddedScienceStampsGPU(t_array, use_index_vect, params); - #else - print("WARNING: GPU is not enabled. Performing co-adds on the CPU."); - #endif +#ifdef HAVE_CUDA + return coaddedScienceStampsGPU(t_array, use_index_vect, params); +#else + print("WARNING: GPU is not enabled. Performing co-adds on the CPU."); +#endif } return coaddedScienceStampsCPU(t_array, use_index_vect, params); } std::vector KBMOSearch::coaddedScienceStampsCPU(std::vector& t_array, std::vector >& use_index_vect, - const stampParameters& params) { + const StampParameters& params) { const int num_trajectories = t_array.size(); std::vector results(num_trajectories); std::vector empty_pixels(1, NO_DATA); for (int i = 0; i < num_trajectories; ++i) { - std::vector stamps = scienceStamps(t_array[i], params.radius, false, true, use_index_vect[i]); + std::vector stamps = + scienceStamps(t_array[i], params.radius, false, true, use_index_vect[i]); RawImage coadd(1, 1); switch (params.stamp_type) { @@ -425,7 +423,7 @@ std::vector KBMOSearch::coaddedScienceStampsCPU(std::vector KBMOSearch::coaddedScienceStampsGPU(std::vector& t_array, std::vector >& use_index_vect, - const stampParameters& params) { + const StampParameters& params) { // Right now only limited stamp sizes are allowed. if (2 * params.radius + 1 > MAX_STAMP_EDGE || params.radius <= 0) { throw std::runtime_error("Invalid Radius."); @@ -436,9 +434,9 @@ std::vector KBMOSearch::coaddedScienceStampsGPU(std::vector KBMOSearch::coaddedScienceStampsGPU(std::vector stamp_data(stamp_ppi * num_trajectories); - // Do the co-adds. - #ifdef HAVE_CUDA - deviceGetCoadds(stack, img_data, num_trajectories, t_array.data(), params, use_index_vect, - stamp_data.data()); - #else - throw std::runtime_error("Non-GPU co-adds is not implemented."); - #endif +// Do the co-adds. +#ifdef HAVE_CUDA + deviceGetCoadds(stack, img_data, num_trajectories, t_array.data(), params, use_index_vect, + stamp_data.data()); +#else + throw std::runtime_error("Non-GPU co-adds is not implemented."); +#endif // Copy the stamps into RawImages and do the filtering. std::vector results(num_trajectories); @@ -480,27 +478,28 @@ std::vector KBMOSearch::createStamps(trajectory t, int radius, const s if (radius < 0) throw std::runtime_error("stamp radius must be at least 0"); std::vector stamps; for (int i = 0; i < imgs.size(); ++i) { - pixelPos pos = getTrajPos(t, i); + PixelPos pos = getTrajPos(t, i); stamps.push_back(imgs[i]->createStamp(pos.x, pos.y, radius, interpolate, false)); } return stamps; } -pixelPos KBMOSearch::getTrajPos(const trajectory& t, int i) const { +PixelPos KBMOSearch::getTrajPos(const trajectory& t, int i) const { float time = stack.getTimes()[i]; - if (useCorr) { - return {t.x + time * t.xVel + baryCorrs[i].dx + t.x * baryCorrs[i].dxdx + t.y * baryCorrs[i].dxdy, - t.y + time * t.yVel + baryCorrs[i].dy + t.x * baryCorrs[i].dydx + t.y * baryCorrs[i].dydy}; + if (use_corr) { + return {t.x + time * t.x_vel + bary_corrs[i].dx + t.x * bary_corrs[i].dxdx + t.y * bary_corrs[i].dxdy, + t.y + time * t.y_vel + bary_corrs[i].dy + t.x * bary_corrs[i].dydx + + t.y * bary_corrs[i].dydy}; } else { - return {t.x + time * t.xVel, t.y + time * t.yVel}; + return {t.x + time * t.x_vel, t.y + time * t.y_vel}; } } -std::vector KBMOSearch::getMultTrajPos(trajectory& t) const { - std::vector results; +std::vector KBMOSearch::getMultTrajPos(trajectory& t) const { + std::vector results; int num_times = stack.imgCount(); for (int i = 0; i < num_times; ++i) { - pixelPos pos = getTrajPos(t, i); + PixelPos pos = getTrajPos(t, i); results.push_back(pos); } return results; @@ -517,25 +516,26 @@ std::vector KBMOSearch::createCurves(trajectory t, const std::vector lightcurve - The computed trajectory */ - int imgSize = imgs.size(); + int img_size = imgs.size(); std::vector lightcurve; - lightcurve.reserve(imgSize); + lightcurve.reserve(img_size); const std::vector& times = stack.getTimes(); - for (int i = 0; i < imgSize; ++i) { + for (int i = 0; i < img_size; ++i) { /* Do not use getPixelInterp(), because results from createCurves must * be able to recover the same likelihoods as the ones reported by the * gpu search.*/ - float pixVal; - if (useCorr) { - pixelPos pos = getTrajPos(t, i); - pixVal = imgs[i].getPixel(int(pos.x + 0.5), int(pos.y + 0.5)); + float pix_val; + if (use_corr) { + PixelPos pos = getTrajPos(t, i); + pix_val = imgs[i].getPixel(int(pos.x + 0.5), int(pos.y + 0.5)); } /* Does not use getTrajPos to be backwards compatible with Hits_Rerun */ else { - pixVal = imgs[i].getPixel(t.x + int(times[i] * t.xVel + 0.5), t.y + int(times[i] * t.yVel + 0.5)); + pix_val = imgs[i].getPixel(t.x + int(times[i] * t.x_vel + 0.5), + t.y + int(times[i] * t.y_vel + 0.5)); } - if (pixVal == NO_DATA) pixVal = 0.0; - lightcurve.push_back(pixVal); + if (pix_val == NO_DATA) pix_val = 0.0; + lightcurve.push_back(pix_val); } return lightcurve; } @@ -548,7 +548,7 @@ std::vector KBMOSearch::psiCurves(trajectory& t) { * std::vector - A vector of the lightcurve values */ preparePsiPhi(); - return createCurves(t, psiImages); + return createCurves(t, psi_images); } std::vector KBMOSearch::phiCurves(trajectory& t) { @@ -559,29 +559,29 @@ std::vector KBMOSearch::phiCurves(trajectory& t) { * std::vector - A vector of the lightcurve values */ preparePsiPhi(); - return createCurves(t, phiImages); + return createCurves(t, phi_images); } -std::vector& KBMOSearch::getPsiImages() { return psiImages; } +std::vector& KBMOSearch::getPsiImages() { return psi_images; } -std::vector& KBMOSearch::getPhiImages() { return phiImages; } +std::vector& KBMOSearch::getPhiImages() { return phi_images; } void KBMOSearch::sortResults() { __gnu_parallel::sort(results.begin(), results.end(), [](trajectory a, trajectory b) { return b.lh < a.lh; }); } -void KBMOSearch::filterResults(int minObservations) { +void KBMOSearch::filterResults(int min_observations) { results.erase(std::remove_if(results.begin(), results.end(), - std::bind([](trajectory t, int cutoff) { return t.obsCount < cutoff; }, - std::placeholders::_1, minObservations)), + std::bind([](trajectory t, int cutoff) { return t.obs_count < cutoff; }, + std::placeholders::_1, min_observations)), results.end()); } -void KBMOSearch::filterResultsLH(float minLH) { +void KBMOSearch::filterResultsLH(float min_lh) { results.erase(std::remove_if(results.begin(), results.end(), std::bind([](trajectory t, float cutoff) { return t.lh < cutoff; }, - std::placeholders::_1, minLH)), + std::placeholders::_1, min_lh)), results.end()); } @@ -597,17 +597,17 @@ std::vector KBMOSearch::getResults(int start, int count) { void KBMOSearch::setResults(const std::vector& new_results) { results = new_results; } void KBMOSearch::startTimer(const std::string& message) { - if (debugInfo) { + if (debug_info) { std::cout << message << "... " << std::flush; - tStart = std::chrono::system_clock::now(); + t_start = std::chrono::system_clock::now(); } } void KBMOSearch::endTimer() { - if (debugInfo) { - tEnd = std::chrono::system_clock::now(); - tDelta = tEnd - tStart; - std::cout << " Took " << tDelta.count() << " seconds.\n" << std::flush; + if (debug_info) { + t_end = std::chrono::system_clock::now(); + t_delta = t_end - t_start; + std::cout << " Took " << t_delta.count() << " seconds.\n" << std::flush; } } diff --git a/src/kbmod/search/KBMOSearch.h b/src/kbmod/search/KBMOSearch.h index 6668b732..d42fa83b 100644 --- a/src/kbmod/search/KBMOSearch.h +++ b/src/kbmod/search/KBMOSearch.h @@ -36,26 +36,26 @@ class KBMOSearch { void setDebug(bool d); // The primary search functions. - void enableGPUSigmaGFilter(std::vector pyPercentiles, float pySigmaGCoeff, float pyMinLH); - void enableCorr(std::vector pyBaryCorrCoeff); - void enableGPUEncoding(int psiNumBytes, int phiNumBytes); + void enableGPUSigmaGFilter(std::vector percentiles, float sigmag_coeff, float min_lh); + void enableCorr(std::vector bary_corr_coeff); + void enableGPUEncoding(int psi_num_bytes, int phi_num_bytes); void setStartBoundsX(int x_min, int x_max); void setStartBoundsY(int y_min, int y_max); - void search(int aSteps, int vSteps, float minAngle, float maxAngle, float minVelocity, float maxVelocity, - int minObservations); + void search(int a_steps, int v_steps, float min_angle, float max_angle, float min_velocity, + float max_velocity, int min_observations); // Gets the vector of result trajectories. std::vector getResults(int start, int end); // Get the predicted (pixel) positions for a given trajectory. - pixelPos getTrajPos(const trajectory& t, int i) const; - std::vector getMultTrajPos(trajectory& t) const; + PixelPos getTrajPos(const trajectory& t, int i) const; + std::vector getMultTrajPos(trajectory& t) const; // Filters the results based on various parameters. - void filterResults(int minObservations); - void filterResultsLH(float minLH); + void filterResults(int min_observations); + void filterResultsLH(float min_lh); // Functions for creating science stamps for filtering, visualization, etc. User can specify // the radius of the stamp, whether to interpolate among pixels, whether to keep NO_DATA values @@ -75,11 +75,10 @@ class KBMOSearch { // the code will return a 1x1 image with NO_DATA to represent each filtered image. std::vector coaddedScienceStamps(std::vector& t_array, std::vector >& use_index_vect, - const stampParameters& params, - bool use_cpu); + const StampParameters& params, bool use_cpu); // Function to do the actual stamp filtering. - bool filterStamp(const RawImage& img, const stampParameters& params); + bool filterStamp(const RawImage& img, const StampParameters& params); // Getters for the Psi and Phi data. std::vector& getPsiImages(); @@ -104,8 +103,8 @@ class KBMOSearch { std::vector createCurves(trajectory t, const std::vector& imgs); // Fill an interleaved vector for the GPU functions. - void fillPsiAndPhiVects(const std::vector& psiImgs, const std::vector& phiImgs, - std::vector* psiVect, std::vector* phiVect); + void fillPsiAndphi_vects(const std::vector& psi_imgs, const std::vector& phi_imgs, + std::vector* psi_vect, std::vector* phi_vect); // Set the parameter min/max/scale from the psi/phi/other images. std::vector computeImageScaling(const std::vector& vect, @@ -118,39 +117,39 @@ class KBMOSearch { bool interpolate); // Creates list of trajectories to search. - void createSearchList(int angleSteps, int veloctiySteps, float minAngle, float maxAngle, - float minVelocity, float maxVelocity); + void createSearchList(int angle_steps, int velocity_steps, float min_ang, float max_ang, + float min_vel, float max_vel); std::vector coaddedScienceStampsGPU(std::vector& t_array, std::vector >& use_index_vect, - const stampParameters& params); + const StampParameters& params); std::vector coaddedScienceStampsCPU(std::vector& t_array, std::vector >& use_index_vect, - const stampParameters& params); + const StampParameters& params); // Helper functions for timing operations of the search. void startTimer(const std::string& message); void endTimer(); - unsigned maxResultCount; - bool psiPhiGenerated; - bool debugInfo; + unsigned max_result_count; + bool psi_phi_generated; + bool debug_info; ImageStack stack; - std::vector searchList; - std::vector psiImages; - std::vector phiImages; + std::vector search_list; + std::vector psi_images; + std::vector phi_images; std::vector results; // Variables for the timer. - std::chrono::time_point tStart, tEnd; - std::chrono::duration tDelta; + std::chrono::time_point t_start, t_end; + std::chrono::duration t_delta; // Parameters for the GPU search. - searchParameters params; + SearchParameters params; // Parameters to do barycentric corrections. - bool useCorr; - std::vector baryCorrs; + bool use_corr; + std::vector bary_corrs; }; } /* namespace search */ diff --git a/src/kbmod/search/LayeredImage.cpp b/src/kbmod/search/LayeredImage.cpp index 337758d0..6f6f453a 100644 --- a/src/kbmod/search/LayeredImage.cpp +++ b/src/kbmod/search/LayeredImage.cpp @@ -9,12 +9,12 @@ namespace search { -LayeredImage::LayeredImage(std::string path, const PointSpreadFunc& psf) : psf(psf), psfSQ(psf) { - psfSQ.squarePSF(); +LayeredImage::LayeredImage(std::string path, const PointSpreadFunc& psf) : psf(psf), psf_sq(psf) { + psf_sq.squarePSF(); - int fBegin = path.find_last_of("/"); - int fEnd = path.find_last_of(".fits") - 4; - fileName = path.substr(fBegin, fEnd - fBegin); + int f_begin = path.find_last_of("/"); + int f_end = path.find_last_of(".fits") - 4; + filename = path.substr(f_begin, f_end - f_begin); science = RawImage(); science.loadFromFile(path, 1); @@ -35,7 +35,7 @@ LayeredImage::LayeredImage(std::string path, const PointSpreadFunc& psf) : psf(p LayeredImage::LayeredImage(const RawImage& sci, const RawImage& var, const RawImage& msk, const PointSpreadFunc& psf) - : psf(psf), psfSQ(psf) { + : psf(psf), psf_sq(psf) { // Get the dimensions of the science layer and check for consistency with // the other two layers. width = sci.getWidth(); @@ -46,7 +46,7 @@ LayeredImage::LayeredImage(const RawImage& sci, const RawImage& var, const RawIm throw std::runtime_error("Science and Mask layers are not the same size."); // Set the remaining variables. - psfSQ.squarePSF(); + psf_sq.squarePSF(); // Copy the image layers. science = sci; @@ -54,50 +54,50 @@ LayeredImage::LayeredImage(const RawImage& sci, const RawImage& var, const RawIm variance = var; } -LayeredImage::LayeredImage(std::string name, int w, int h, float noiseStDev, float pixelVariance, double time, +LayeredImage::LayeredImage(std::string name, int w, int h, float noise_stdev, float pixel_variance, double time, const PointSpreadFunc& psf) - : LayeredImage(name, w, h, noiseStDev, pixelVariance, time, psf, -1) {} + : LayeredImage(name, w, h, noise_stdev, pixel_variance, time, psf, -1) {} -LayeredImage::LayeredImage(std::string name, int w, int h, float noiseStDev, float pixelVariance, double time, +LayeredImage::LayeredImage(std::string name, int w, int h, float noise_stdev, float pixel_variance, double time, const PointSpreadFunc& psf, int seed) - : psf(psf), psfSQ(psf) { - fileName = name; + : psf(psf), psf_sq(psf) { + filename = name; width = w; height = h; - psfSQ.squarePSF(); + psf_sq.squarePSF(); - std::vector rawSci(width * height); + std::vector raw_sci(width * height); std::random_device r; std::default_random_engine generator(r()); if (seed >= 0) { generator.seed(seed); } - std::normal_distribution distrib(0.0, noiseStDev); - for (float& p : rawSci) p = distrib(generator); + std::normal_distribution distrib(0.0, noise_stdev); + for (float& p : raw_sci) p = distrib(generator); - science = RawImage(w, h, rawSci); + science = RawImage(w, h, raw_sci); science.setObstime(time); mask = RawImage(w, h, std::vector(w * h, 0.0)); - variance = RawImage(w, h, std::vector(w * h, pixelVariance)); + variance = RawImage(w, h, std::vector(w * h, pixel_variance)); } void LayeredImage::setPSF(const PointSpreadFunc& new_psf) { psf = new_psf; - psfSQ = new_psf; - psfSQ.squarePSF(); + psf_sq = new_psf; + psf_sq.squarePSF(); } void LayeredImage::addObject(float x, float y, float flux) { const std::vector& k = psf.getKernel(); int dim = psf.getDim(); - float initialX = x - static_cast(psf.getRadius()); - float initialY = y - static_cast(psf.getRadius()); + float initial_x = x - static_cast(psf.getRadius()); + float initial_y = y - static_cast(psf.getRadius()); int count = 0; for (int i = 0; i < dim; ++i) { for (int j = 0; j < dim; ++j) { - science.addPixelInterp(initialX + static_cast(i), initialY + static_cast(j), + science.addPixelInterp(initial_x + static_cast(i), initial_y + static_cast(j), flux * k[count]); count++; } @@ -111,7 +111,7 @@ void LayeredImage::growMask(int steps) { void LayeredImage::convolvePSF() { science.convolve(psf); - variance.convolve(psfSQ); + variance.convolve(psf_sq); } void LayeredImage::applyMaskFlags(int flags, const std::vector& exceptions) { @@ -120,32 +120,32 @@ void LayeredImage::applyMaskFlags(int flags, const std::vector& exceptions) } /* Mask all pixels that are not 0 in global mask */ -void LayeredImage::applyGlobalMask(const RawImage& globalM) { - science.applyMask(0xFFFFFF, {}, globalM); - variance.applyMask(0xFFFFFF, {}, globalM); +void LayeredImage::applyGlobalMask(const RawImage& global_mask) { + science.applyMask(0xFFFFFF, {}, global_mask); + variance.applyMask(0xFFFFFF, {}, global_mask); } void LayeredImage::applyMaskThreshold(float thresh) { - const int numPixels = getNPixels(); - float* sciPix = science.getDataRef(); - float* varPix = variance.getDataRef(); - for (int i = 0; i < numPixels; ++i) { - if (sciPix[i] > thresh) { - sciPix[i] = NO_DATA; - varPix[i] = NO_DATA; + const int num_pixels = getNPixels(); + float* sci_pixels = science.getDataRef(); + float* var_pix = variance.getDataRef(); + for (int i = 0; i < num_pixels; ++i) { + if (sci_pixels[i] > thresh) { + sci_pixels[i] = NO_DATA; + var_pix[i] = NO_DATA; } } } -void LayeredImage::subtractTemplate(const RawImage& subTemplate) { - assert(getHeight() == subTemplate.getHeight() && getWidth() == subTemplate.getWidth()); - const int numPixels = getNPixels(); +void LayeredImage::subtractTemplate(const RawImage& sub_template) { + assert(getHeight() == sub_template.getHeight() && getWidth() == sub_template.getWidth()); + const int num_pixels = getNPixels(); - float* sciPix = science.getDataRef(); - const std::vector& temNPixelsx = subTemplate.getPixels(); - for (unsigned i = 0; i < numPixels; ++i) { - if ((sciPix[i] != NO_DATA) && (temNPixelsx[i] != NO_DATA)) { - sciPix[i] -= temNPixelsx[i]; + float* sci_pixels = science.getDataRef(); + const std::vector& tem_pixels = sub_template.getPixels(); + for (unsigned i = 0; i < num_pixels; ++i) { + if ((sci_pixels[i] != NO_DATA) && (tem_pixels[i] != NO_DATA)) { + sci_pixels[i] -= tem_pixels[i]; } } } @@ -156,16 +156,16 @@ void LayeredImage::saveLayers(const std::string& path) { long naxes[2] = {0, 0}; double obstime = science.getObstime(); - fits_create_file(&fptr, (path + fileName + ".fits").c_str(), &status); + fits_create_file(&fptr, (path + filename + ".fits").c_str(), &status); // If we are unable to create the file, check if it already exists // and, if so, delete it and retry the create. if (status == 105) { status = 0; - fits_open_file(&fptr, (path + fileName + ".fits").c_str(), READWRITE, &status); + fits_open_file(&fptr, (path + filename + ".fits").c_str(), READWRITE, &status); if (status == 0) { fits_delete_file(fptr, &status); - fits_create_file(&fptr, (path + fileName + ".fits").c_str(), &status); + fits_create_file(&fptr, (path + filename + ".fits").c_str(), &status); } } @@ -174,9 +174,9 @@ void LayeredImage::saveLayers(const std::string& path) { fits_close_file(fptr, &status); fits_report_error(stderr, status); - science.appendLayerToFile(path + fileName + ".fits"); - mask.appendLayerToFile(path + fileName + ".fits"); - variance.appendLayerToFile(path + fileName + ".fits"); + science.appendLayerToFile(path + filename + ".fits"); + mask.appendLayerToFile(path + filename + ".fits"); + variance.appendLayerToFile(path + filename + ".fits"); } void LayeredImage::setScience(RawImage& im) { @@ -202,15 +202,15 @@ void LayeredImage::checkDims(RawImage& im) { RawImage LayeredImage::generatePsiImage() { RawImage result(width, height); float* result_arr = result.getDataRef(); - float* sciArray = getSDataRef(); - float* varArray = getVDataRef(); + float* sci_array = getSDataRef(); + float* var_array = getVDataRef(); // Set each of the result pixels. const int num_pixels = getNPixels(); for (int p = 0; p < num_pixels; ++p) { - float varPix = varArray[p]; - if (varPix != NO_DATA) { - result_arr[p] = sciArray[p] / varPix; + float var_pix = var_array[p]; + if (var_pix != NO_DATA) { + result_arr[p] = sci_array[p] / var_pix; } else { result_arr[p] = NO_DATA; } @@ -225,14 +225,14 @@ RawImage LayeredImage::generatePsiImage() { RawImage LayeredImage::generatePhiImage() { RawImage result(width, height); float* result_arr = result.getDataRef(); - float* varArray = getVDataRef(); + float* var_array = getVDataRef(); // Set each of the result pixels. const int num_pixels = getNPixels(); for (int p = 0; p < num_pixels; ++p) { - float varPix = varArray[p]; - if (varPix != NO_DATA) { - result_arr[p] = 1.0 / varPix; + float var_pix = var_array[p]; + if (var_pix != NO_DATA) { + result_arr[p] = 1.0 / var_pix; } else { result_arr[p] = NO_DATA; } diff --git a/src/kbmod/search/LayeredImage.h b/src/kbmod/search/LayeredImage.h index 936782d9..b258944e 100644 --- a/src/kbmod/search/LayeredImage.h +++ b/src/kbmod/search/LayeredImage.h @@ -28,18 +28,18 @@ class LayeredImage { explicit LayeredImage(std::string path, const PointSpreadFunc& psf); explicit LayeredImage(const RawImage& sci, const RawImage& var, const RawImage& msk, const PointSpreadFunc& psf); - explicit LayeredImage(std::string name, int w, int h, float noiseStDev, float pixelVariance, double time, + explicit LayeredImage(std::string name, int w, int h, float noise_stdev, float pixel_variance, double time, const PointSpreadFunc& psf); - explicit LayeredImage(std::string name, int w, int h, float noiseStDev, float pixelVariance, double time, + explicit LayeredImage(std::string name, int w, int h, float noise_stdev, float pixel_variance, double time, const PointSpreadFunc& psf, int seed); // Set an image specific point spread function. void setPSF(const PointSpreadFunc& psf); const PointSpreadFunc& getPSF() const { return psf; } - const PointSpreadFunc& getPSFSQ() const { return psfSQ; } + const PointSpreadFunc& getPSFSQ() const { return psf_sq; } // Basic getter functions for image data. - std::string getName() const { return fileName; } + std::string getName() const { return filename; } unsigned getWidth() const { return width; } unsigned getHeight() const { return height; } unsigned getNPixels() const { return width * height; } @@ -58,12 +58,12 @@ class LayeredImage { // Applies the mask functions to each of the science and variance layers. void applyMaskFlags(int flag, const std::vector& exceptions); - void applyGlobalMask(const RawImage& globalMask); + void applyGlobalMask(const RawImage& global_mask); void applyMaskThreshold(float thresh); void growMask(int steps); // Subtracts a template image from the science layer. - void subtractTemplate(const RawImage& subTemplate); + void subtractTemplate(const RawImage& sub_template); // Adds an (artificial) object to the image (science) data. void addObject(float x, float y, float flux); @@ -86,12 +86,12 @@ class LayeredImage { private: void checkDims(RawImage& im); - std::string fileName; + std::string filename; unsigned width; unsigned height; PointSpreadFunc psf; - PointSpreadFunc psfSQ; + PointSpreadFunc psf_sq; RawImage science; RawImage mask; RawImage variance; diff --git a/src/kbmod/search/PointSpreadFunc.cpp b/src/kbmod/search/PointSpreadFunc.cpp index 31671eda..3038460a 100644 --- a/src/kbmod/search/PointSpreadFunc.cpp +++ b/src/kbmod/search/PointSpreadFunc.cpp @@ -11,20 +11,20 @@ namespace search { PointSpreadFunc::PointSpreadFunc(float stdev) { width = stdev; - float simpleGauss[MAX_KERNEL_RADIUS]; - double psfCoverage = 0.0; - double normFactor = stdev * sqrt(2.0); + float simple_gauss[MAX_KERNEL_RADIUS]; + double psf_coverage = 0.0; + double norm_factor = stdev * sqrt(2.0); int i = 0; // Create 1D gaussian array - while (psfCoverage < 0.98 && i < MAX_KERNEL_RADIUS) { - float currentBin = - 0.5 * (std::erf((float(i) + 0.5) / normFactor) - std::erf((float(i) - 0.5) / normFactor)); - simpleGauss[i] = currentBin; + while (psf_coverage < 0.98 && i < MAX_KERNEL_RADIUS) { + float current_bin = + 0.5 * (std::erf((float(i) + 0.5) / norm_factor) - std::erf((float(i) - 0.5) / norm_factor)); + simple_gauss[i] = current_bin; if (i == 0) { - psfCoverage += currentBin; + psf_coverage += current_bin; } else { - psfCoverage += 2.0 * currentBin; + psf_coverage += 2.0 * current_bin; } i++; } @@ -36,7 +36,7 @@ PointSpreadFunc::PointSpreadFunc(float stdev) { kernel = std::vector(); for (int ii = 0; ii < dim; ++ii) { for (int jj = 0; jj < dim; ++jj) { - float current = simpleGauss[abs(radius - ii)] * simpleGauss[abs(radius - jj)]; + float current = simple_gauss[abs(radius - ii)] * simple_gauss[abs(radius - jj)]; kernel.push_back(current); } } diff --git a/src/kbmod/search/RawImage.cpp b/src/kbmod/search/RawImage.cpp index 63305464..637d69de 100644 --- a/src/kbmod/search/RawImage.cpp +++ b/src/kbmod/search/RawImage.cpp @@ -10,10 +10,10 @@ namespace search { #ifdef HAVE_CUDA - // Performs convolution between an image represented as an array of floats - // and a PSF on a GPU device. - extern "C" void deviceConvolve(float* sourceImg, float* resultImg, int width, int height, float* psfKernel, - int psfSize, int psfDim, int psfRadius, float psfSum); +// Performs convolution between an image represented as an array of floats +// and a PSF on a GPU device. +extern "C" void deviceConvolve(float* source_img, float* result_img, int width, int height, float* psf_kernel, + int psf_size, int psf_dim, int psf_radius, float psf_sum); #endif RawImage::RawImage() : width(0), height(0), obstime(-1.0) { pixels = std::vector(); } @@ -37,7 +37,10 @@ RawImage& RawImage::operator=(const RawImage& source) { // Move constructor RawImage::RawImage(RawImage&& source) - : width(source.width), height(source.height), obstime(source.obstime), pixels(std::move(source.pixels)) {} + : width(source.width), + height(source.height), + obstime(source.obstime), + pixels(std::move(source.pixels)) {} // Move assignment RawImage& RawImage::operator=(RawImage&& source) { @@ -52,7 +55,8 @@ RawImage& RawImage::operator=(RawImage&& source) { RawImage::RawImage(unsigned w, unsigned h) : height(h), width(w), obstime(-1.0), pixels(w * h) {} -RawImage::RawImage(unsigned w, unsigned h, const std::vector& pix) : width(w), height(h), obstime(-1.0), pixels(pix) { +RawImage::RawImage(unsigned w, unsigned h, const std::vector& pix) + : width(w), height(h), obstime(-1.0), pixels(pix) { assert(w * h == pix.size()); } @@ -75,41 +79,37 @@ void RawImage::setArray(pybind11::array_t& arr) { } #endif -bool RawImage::approxEqual(const RawImage& imgB, float atol) const { - if ((width != imgB.width) || (height != imgB.height)) - return false; +bool RawImage::approxEqual(const RawImage& img_b, float atol) const { + if ((width != img_b.width) || (height != img_b.height)) return false; for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { float p1 = getPixel(x, y); - float p2 = imgB.getPixel(x, y); + float p2 = img_b.getPixel(x, y); // NO_DATA values must match exactly. - if ((p1 == NO_DATA) && (p2 != NO_DATA)) - return false; - if ((p1 != NO_DATA) && (p2 == NO_DATA)) - return false; + if ((p1 == NO_DATA) && (p2 != NO_DATA)) return false; + if ((p1 != NO_DATA) && (p2 == NO_DATA)) return false; // Other values match within an absolute tolerance. - if (fabs(p1 - p2) > atol) - return false; + if (fabs(p1 - p2) > atol) return false; } } return true; } // Load the image data from a specific layer of a FITS file. -void RawImage::loadFromFile(const std::string& filePath, int layer_num) { +void RawImage::loadFromFile(const std::string& file_path, int layer_num) { // Open the file's header and read in the obstime and the dimensions. fitsfile* fptr; int status = 0; int mjdStatus = 0; - int fileNotFound; + int file_not_found; int nullval = 0; int anynull = 0; // Open the correct layer to extract the RawImage. - std::string layerPath = filePath + "[" + std::to_string(layer_num) + "]"; + std::string layerPath = file_path + "[" + std::to_string(layer_num) + "]"; if (fits_open_file(&fptr, layerPath.c_str(), READONLY, &status)) { fits_report_error(stderr, status); throw std::runtime_error("Could not open FITS file to read RawImage"); @@ -117,7 +117,7 @@ void RawImage::loadFromFile(const std::string& filePath, int layer_num) { // Read image dimensions. long dimensions[2]; - if (fits_read_keys_lng(fptr, "NAXIS", 1, 2, dimensions, &fileNotFound, &status)) + if (fits_read_keys_lng(fptr, "NAXIS", 1, 2, dimensions, &file_not_found, &status)) fits_report_error(stderr, status); width = dimensions[0]; height = dimensions[1]; @@ -126,7 +126,7 @@ void RawImage::loadFromFile(const std::string& filePath, int layer_num) { pixels = std::vector(width * height); if (fits_read_img(fptr, TFLOAT, 1, getNPixels(), &nullval, pixels.data(), &anynull, &status)) fits_report_error(stderr, status); - + // Read image observation time, ignore error if does not exist obstime = -1.0; if (fits_read_key(fptr, TDOUBLE, "MJD", &obstime, NULL, &mjdStatus)) obstime = -1.0; @@ -134,7 +134,7 @@ void RawImage::loadFromFile(const std::string& filePath, int layer_num) { // If we are reading from a sublayer and did not find a time, try the overall header. if (obstime < 0.0) { - if (fits_open_file(&fptr, filePath.c_str(), READONLY, &status)) + if (fits_open_file(&fptr, file_path.c_str(), READONLY, &status)) throw std::runtime_error("Could not open FITS file to read RawImage"); fits_read_key(fptr, TDOUBLE, "MJD", &obstime, NULL, &mjdStatus); if (fits_close_file(fptr, &status)) fits_report_error(stderr, status); @@ -215,14 +215,14 @@ RawImage RawImage::createStamp(float x, float y, int radius, bool interpolate, b RawImage stamp(dim, dim); for (int yoff = 0; yoff < dim; ++yoff) { for (int xoff = 0; xoff < dim; ++xoff) { - float pixVal; + float pix_val; if (interpolate) - pixVal = getPixelInterp(x + static_cast(xoff - radius), - y + static_cast(yoff - radius)); + pix_val = getPixelInterp(x + static_cast(xoff - radius), + y + static_cast(yoff - radius)); else - pixVal = getPixel(static_cast(x) + xoff - radius, static_cast(y) + yoff - radius); - if ((pixVal == NO_DATA) && !keep_no_data) pixVal = 0.0; - stamp.setPixel(xoff, yoff, pixVal); + pix_val = getPixel(static_cast(x) + xoff - radius, static_cast(y) + yoff - radius); + if ((pix_val == NO_DATA) && !keep_no_data) pix_val = 0.0; + stamp.setPixel(xoff, yoff, pix_val); } } @@ -232,8 +232,8 @@ RawImage RawImage::createStamp(float x, float y, int radius, bool interpolate, b void RawImage::convolve_cpu(const PointSpreadFunc& psf) { std::vector result(width * height, 0.0); - const int psfRad = psf.getRadius(); - const float psfTotal = psf.getSum(); + const int psf_rad = psf.getRadius(); + const float psf_total = psf.getSum(); for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { @@ -244,48 +244,48 @@ void RawImage::convolve_cpu(const PointSpreadFunc& psf) { } float sum = 0.0; - float psfPortion = 0.0; - for (int j = -psfRad; j <= psfRad; j++) { - for (int i = -psfRad; i <= psfRad; i++) { + float psf_portion = 0.0; + for (int j = -psf_rad; j <= psf_rad; j++) { + for (int i = -psf_rad; i <= psf_rad; i++) { if ((x + i >= 0) && (x + i < width) && (y + j >= 0) && (y + j < height)) { - float currentPixel = pixels[(y + j) * width + (x + i)]; - if (currentPixel != NO_DATA) { - float currentPSF = psf.getValue(i + psfRad, j + psfRad); - psfPortion += currentPSF; - sum += currentPixel * currentPSF; + float current_pixel = pixels[(y + j) * width + (x + i)]; + if (current_pixel != NO_DATA) { + float current_psf = psf.getValue(i + psf_rad, j + psf_rad); + psf_portion += current_psf; + sum += current_pixel * current_psf; } } } } - result[y * width + x] = (sum * psfTotal) / psfPortion; + result[y * width + x] = (sum * psf_total) / psf_portion; } } // Copy the data into the pixels vector. const int npixels = getNPixels(); - for(int i = 0; i < npixels; ++i) { + for (int i = 0; i < npixels; ++i) { pixels[i] = result[i]; } } void RawImage::convolve(PointSpreadFunc psf) { - #ifdef HAVE_CUDA - deviceConvolve(pixels.data(), pixels.data(), getWidth(), getHeight(), psf.kernelData(), - psf.getSize(), psf.getDim(), psf.getRadius(), psf.getSum()); - #else - convolve_cpu(psf); - #endif +#ifdef HAVE_CUDA + deviceConvolve(pixels.data(), pixels.data(), getWidth(), getHeight(), psf.kernelData(), psf.getSize(), + psf.getDim(), psf.getRadius(), psf.getSum()); +#else + convolve_cpu(psf); +#endif } void RawImage::applyMask(int flags, const std::vector& exceptions, const RawImage& mask) { - const std::vector& maskPix = mask.getPixels(); + const std::vector& mask_pix = mask.getPixels(); const int num_pixels = getNPixels(); assert(num_pixels == mask.getNPixels()); for (unsigned int p = 0; p < num_pixels; ++p) { - int pixFlags = static_cast(maskPix[p]); - bool isException = false; - for (auto& e : exceptions) isException = isException || e == pixFlags; - if (!isException && ((flags & pixFlags) != 0)) pixels[p] = NO_DATA; + int pix_flags = static_cast(mask_pix[p]); + bool is_exception = false; + for (auto& e : exceptions) is_exception = is_exception || e == pix_flags; + if (!is_exception && ((flags & pix_flags) != 0)) pixels[p] = NO_DATA; } } @@ -342,36 +342,35 @@ std::vector RawImage::bilinearInterp(float x, float y) const { // Top right float ax = x + 0.5; float ay = y + 0.5; - float aPx = floor(ax); - float aPy = floor(ay); - float aAmount = (ax - aPx) * (ay - aPy); + float a_px = floor(ax); + float a_py = floor(ay); + float a_amt = (ax - a_px) * (ay - a_py); // Bottom right float bx = x + 0.5; float by = y - 0.5; - float bPx = floor(bx); - float bPy = floor(by); - float bAmount = (bx - bPx) * (bPy + 1.0 - by); + float b_px = floor(bx); + float b_py = floor(by); + float b_amt = (bx - b_px) * (b_py + 1.0 - by); // Bottom left float cx = x - 0.5; float cy = y - 0.5; - float cPx = floor(cx); - float cPy = floor(cy); - float cAmount = (cPx + 1.0 - cx) * (cPy + 1.0 - cy); + float c_px = floor(cx); + float c_py = floor(cy); + float c_amt = (c_px + 1.0 - cx) * (c_py + 1.0 - cy); // Top left float dx = x - 0.5; float dy = y + 0.5; - float dPx = floor(dx); - float dPy = floor(dy); - float dAmount = (dPx + 1.0 - dx) * (dy - dPy); + float d_px = floor(dx); + float d_py = floor(dy); + float d_amt = (d_px + 1.0 - dx) * (dy - d_py); // make sure the right amount has been distributed - float diff = std::abs(aAmount + bAmount + cAmount + dAmount - 1.0); + float diff = std::abs(a_amt + b_amt + c_amt + d_amt - 1.0); if (diff > 0.01) std::cout << "warning: bilinearInterpSum == " << diff << "\n"; - // assert(std::abs(aAmount+bAmount+cAmount+dAmount-1.0)<0.001); - return {aPx, aPy, aAmount, bPx, bPy, bAmount, cPx, cPy, cAmount, dPx, dPy, dAmount}; + return {a_px, a_py, a_amt, b_px, b_py, b_amt, c_px, c_py, c_amt, d_px, d_py, d_amt}; } void RawImage::addPixelInterp(float x, float y, float value) { @@ -430,33 +429,33 @@ void RawImage::setAllPix(float value) { std::array RawImage::computeBounds() const { const int num_pixels = getNPixels(); - float minVal = FLT_MAX; - float maxVal = -FLT_MAX; + float min_val = FLT_MAX; + float max_val = -FLT_MAX; for (unsigned p = 0; p < num_pixels; ++p) { if (pixels[p] != NO_DATA) { - minVal = std::min(minVal, pixels[p]); - maxVal = std::max(maxVal, pixels[p]); + min_val = std::min(min_val, pixels[p]); + max_val = std::max(max_val, pixels[p]); } } // Assert that we have seen at least some valid data. - assert(maxVal != -FLT_MAX); - assert(minVal != FLT_MAX); + assert(max_val != -FLT_MAX); + assert(min_val != FLT_MAX); // Set and return the result array. std::array res; - res[0] = minVal; - res[1] = maxVal; + res[0] = min_val; + res[1] = max_val; return res; } // The maximum value of the image and return the coordinates. -pixelPos RawImage::findPeak(bool furthest_from_center) const { +PixelPos RawImage::findPeak(bool furthest_from_center) const { int c_x = width / 2; int c_y = height / 2; // Initialize the variables for tracking the peak's location. - pixelPos result = {0, 0}; + PixelPos result = {0, 0}; float max_val = pixels[0]; float dist2 = c_x * c_x + c_y * c_y; @@ -489,13 +488,13 @@ pixelPos RawImage::findPeak(bool furthest_from_center) const { // It computes the moments on the "normalized" image where the minimum // value has been shifted to zero and the sum of all elements is 1.0. // Elements with NO_DATA are treated as zero. -imageMoments RawImage::findCentralMoments() const { +ImageMoments RawImage::findCentralMoments() const { const int num_pixels = width * height; const int c_x = width / 2; const int c_y = height / 2; // Set all the moments to zero initially. - imageMoments res = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; + ImageMoments res = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; // Find the min (non-NO_DATA) value to subtract off. float min_val = FLT_MAX; @@ -537,30 +536,30 @@ RawImage createMedianImage(const std::vector& images) { for (auto& img : images) assert(img.getWidth() == width and img.getHeight() == height); RawImage result = RawImage(width, height); - std::vector pixArray(num_images); + std::vector pix_array(num_images); for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { int num_unmasked = 0; for (int i = 0; i < num_images; ++i) { // Only used the unmasked pixels. - float pixVal = images[i].getPixel(x, y); - if ((pixVal != NO_DATA) && (!std::isnan(pixVal))) { - pixArray[num_unmasked] = pixVal; + float pix_val = images[i].getPixel(x, y); + if ((pix_val != NO_DATA) && (!std::isnan(pix_val))) { + pix_array[num_unmasked] = pix_val; num_unmasked += 1; } } if (num_unmasked > 0) { - std::sort(pixArray.begin(), pixArray.begin() + num_unmasked); + std::sort(pix_array.begin(), pix_array.begin() + num_unmasked); // If we have an even number of elements, take the mean of the two // middle ones. int median_ind = num_unmasked / 2; if (num_unmasked % 2 == 0) { - float ave_middle = (pixArray[median_ind] + pixArray[median_ind - 1]) / 2.0; + float ave_middle = (pix_array[median_ind] + pix_array[median_ind - 1]) / 2.0; result.setPixel(x, y, ave_middle); } else { - result.setPixel(x, y, pixArray[median_ind]); + result.setPixel(x, y, pix_array[median_ind]); } } else { // We use a 0.0 value if there is no data to allow for visualization @@ -586,9 +585,9 @@ RawImage createSummedImage(const std::vector& images) { for (int x = 0; x < width; ++x) { float sum = 0.0; for (int i = 0; i < num_images; ++i) { - float pixVal = images[i].getPixel(x, y); - if ((pixVal == NO_DATA) || (std::isnan(pixVal))) pixVal = 0.0; - sum += pixVal; + float pix_val = images[i].getPixel(x, y); + if ((pix_val == NO_DATA) || (std::isnan(pix_val))) pix_val = 0.0; + sum += pix_val; } result.setPixel(x, y, sum); } @@ -611,10 +610,10 @@ RawImage createMeanImage(const std::vector& images) { float sum = 0.0; float count = 0.0; for (int i = 0; i < num_images; ++i) { - float pixVal = images[i].getPixel(x, y); - if ((pixVal != NO_DATA) && (!std::isnan(pixVal))) { + float pix_val = images[i].getPixel(x, y); + if ((pix_val != NO_DATA) && (!std::isnan(pix_val))) { count += 1.0; - sum += pixVal; + sum += pix_val; } } diff --git a/src/kbmod/search/RawImage.h b/src/kbmod/search/RawImage.h index 2cd4c745..46d576a2 100644 --- a/src/kbmod/search/RawImage.h +++ b/src/kbmod/search/RawImage.h @@ -96,7 +96,7 @@ class RawImage { // Load the image data from a specific layer of a FITS file. // Overwrites the current image data. - void loadFromFile(const std::string& filePath, int layer_num); + void loadFromFile(const std::string& file_path, int layer_num); // Save the RawImage to a file (single layer) or append the layer to an existing file. void saveToFile(const std::string& filename); @@ -114,13 +114,13 @@ class RawImage { // The maximum value of the image and return the coordinates. The parameter // furthest_from_center indicates whether to break ties using the peak further // or closer to the center of the image. - pixelPos findPeak(bool furthest_from_center) const; + PixelPos findPeak(bool furthest_from_center) const; // Find the basic image moments in order to test if stamps have a gaussian shape. // It computes the moments on the "normalized" image where the minimum // value has been shifted to zero and the sum of all elements is 1.0. // Elements with NO_DATA are treated as zero. - imageMoments findCentralMoments() const; + ImageMoments findCentralMoments() const; virtual ~RawImage(){}; diff --git a/src/kbmod/search/bindings.cpp b/src/kbmod/search/bindings.cpp index b6508ce3..4fe03ebe 100644 --- a/src/kbmod/search/bindings.cpp +++ b/src/kbmod/search/bindings.cpp @@ -17,8 +17,8 @@ using li = search::LayeredImage; using is = search::ImageStack; using ks = search::KBMOSearch; using tj = search::trajectory; -using bc = search::baryCorrection; -using pp = search::pixelPos; +using bc = search::BaryCorrection; +using pp = search::PixelPos; using std::to_string; @@ -201,7 +201,7 @@ PYBIND11_MODULE(search, m) { .def("summed_sci_stamp", &ks::summedScienceStamp) .def("coadded_stamps", (std::vector(ks::*)(std::vector &, std::vector> &, - const search::stampParameters &, bool)) & + const search::StampParameters &, bool)) & ks::coaddedScienceStamps) // For testing .def("filter_stamp", &ks::filterStamp) @@ -218,22 +218,23 @@ PYBIND11_MODULE(search, m) { A trajectory structure holding basic information about potential results. )pbdoc") .def(py::init<>()) - .def_readwrite("x_v", &tj::xVel) - .def_readwrite("y_v", &tj::yVel) + .def_readwrite("x_v", &tj::x_vel) + .def_readwrite("y_v", &tj::y_vel) .def_readwrite("lh", &tj::lh) .def_readwrite("flux", &tj::flux) .def_readwrite("x", &tj::x) .def_readwrite("y", &tj::y) - .def_readwrite("obs_count", &tj::obsCount) + .def_readwrite("obs_count", &tj::obs_count) .def("__repr__", [](const tj &t) { return "lh: " + to_string(t.lh) + " flux: " + to_string(t.flux) + - " x: " + to_string(t.x) + " y: " + to_string(t.y) + " x_v: " + to_string(t.xVel) + - " y_v: " + to_string(t.yVel) + " obs_count: " + to_string(t.obsCount); + " x: " + to_string(t.x) + " y: " + to_string(t.y) + + " x_v: " + to_string(t.x_vel) + " y_v: " + to_string(t.y_vel) + + " obs_count: " + to_string(t.obs_count); }) .def(py::pickle( [](const tj &p) { // __getstate__ - return py::make_tuple(p.xVel, p.yVel, p.lh, p.flux, p.x, p.y, p.obsCount); + return py::make_tuple(p.x_vel, p.y_vel, p.lh, p.flux, p.x, p.y, p.obs_count); }, [](py::tuple t) { // __setstate__ if (t.size() != 7) throw std::runtime_error("Invalid state!"); @@ -247,28 +248,28 @@ PYBIND11_MODULE(search, m) { .def_readwrite("x", &pp::x) .def_readwrite("y", &pp::y) .def("__repr__", [](const pp &p) { return "x: " + to_string(p.x) + " y: " + to_string(p.y); }); - py::class_(m, "image_moments") + py::class_(m, "image_moments") .def(py::init<>()) - .def_readwrite("m00", &search::imageMoments::m00) - .def_readwrite("m01", &search::imageMoments::m01) - .def_readwrite("m10", &search::imageMoments::m10) - .def_readwrite("m11", &search::imageMoments::m11) - .def_readwrite("m02", &search::imageMoments::m02) - .def_readwrite("m20", &search::imageMoments::m20); - py::class_(m, "stamp_parameters") + .def_readwrite("m00", &search::ImageMoments::m00) + .def_readwrite("m01", &search::ImageMoments::m01) + .def_readwrite("m10", &search::ImageMoments::m10) + .def_readwrite("m11", &search::ImageMoments::m11) + .def_readwrite("m02", &search::ImageMoments::m02) + .def_readwrite("m20", &search::ImageMoments::m20); + py::class_(m, "stamp_parameters") .def(py::init<>()) - .def_readwrite("radius", &search::stampParameters::radius) - .def_readwrite("stamp_type", &search::stampParameters::stamp_type) - .def_readwrite("do_filtering", &search::stampParameters::do_filtering) - .def_readwrite("center_thresh", &search::stampParameters::center_thresh) - .def_readwrite("peak_offset_x", &search::stampParameters::peak_offset_x) - .def_readwrite("peak_offset_y", &search::stampParameters::peak_offset_y) - .def_readwrite("m01", &search::stampParameters::m01_limit) - .def_readwrite("m10", &search::stampParameters::m10_limit) - .def_readwrite("m11", &search::stampParameters::m11_limit) - .def_readwrite("m02", &search::stampParameters::m02_limit) - .def_readwrite("m20", &search::stampParameters::m20_limit); - py::class_(m, "baryCorrection") + .def_readwrite("radius", &search::StampParameters::radius) + .def_readwrite("stamp_type", &search::StampParameters::stamp_type) + .def_readwrite("do_filtering", &search::StampParameters::do_filtering) + .def_readwrite("center_thresh", &search::StampParameters::center_thresh) + .def_readwrite("peak_offset_x", &search::StampParameters::peak_offset_x) + .def_readwrite("peak_offset_y", &search::StampParameters::peak_offset_y) + .def_readwrite("m01", &search::StampParameters::m01_limit) + .def_readwrite("m10", &search::StampParameters::m10_limit) + .def_readwrite("m11", &search::StampParameters::m11_limit) + .def_readwrite("m02", &search::StampParameters::m02_limit) + .def_readwrite("m20", &search::StampParameters::m20_limit); + py::class_(m, "BaryCorrection") .def(py::init<>()) .def_readwrite("dx", &bc::dx) .def_readwrite("dxdx", &bc::dxdx) diff --git a/src/kbmod/search/common.h b/src/kbmod/search/common.h index f094682f..529cba5a 100644 --- a/src/kbmod/search/common.h +++ b/src/kbmod/search/common.h @@ -11,9 +11,9 @@ namespace search { #ifdef HAVE_CUDA - constexpr bool HAVE_GPU = true; +constexpr bool HAVE_GPU = true; #else - constexpr bool HAVE_GPU = false; +constexpr bool HAVE_GPU = false; #endif constexpr unsigned int MAX_KERNEL_RADIUS = 15; @@ -32,8 +32,8 @@ enum StampType { STAMP_SUM = 0, STAMP_MEAN, STAMP_MEDIAN }; */ struct trajectory { // Trajectory velocities - float xVel; - float yVel; + float x_vel; + float y_vel; // Likelyhood float lh; // Est. Flux @@ -42,11 +42,11 @@ struct trajectory { short x; short y; // Number of images summed - short obsCount; + short obs_count; }; // The position (in pixels) of a trajectory. -struct pixelPos { +struct PixelPos { float x; float y; }; @@ -56,7 +56,7 @@ struct pixelPos { * pixel in the first image to a pixel in a consequent image. One struct needed * per image. Correction calculated in higher level code. */ -struct baryCorrection { +struct BaryCorrection { // linear coefficients of linear fit of pixel dependence float dx; float dxdx; @@ -68,23 +68,23 @@ struct baryCorrection { /* The parameters to use for the on device search. */ -struct searchParameters { +struct SearchParameters { // Basic filtering paramets. - int minObservations; - float minLH; + int min_observations; + float min_lh; // Parameters for sigmaG filtering on device. bool do_sigmag_filter; - float sGL_L; - float sGL_H; - float sigmaGCoeff; + float sgl_L; + float sgl_H; + float sigmag_coeff; // Do barycentric corrections. - bool useCorr; + bool use_corr; // Use a compressed image representation. - int psiNumBytes; // -1 (No encoding), 1 or 2 - int phiNumBytes; // -1 (No encoding), 1 or 2 + int psi_num_bytes; // -1 (No encoding), 1 or 2 + int phi_num_bytes; // -1 (No encoding), 1 or 2 // The bounds on which x and y pixels can be used // to start a search. @@ -98,23 +98,23 @@ struct searchParameters { }; struct scaleParameters { - float minVal; - float maxVal; + float min_val; + float max_val; float scale; }; // Search data on a per-image basis. -struct perImageData { - int numImages = 0; +struct PerImageData { + int num_images = 0; - float* imageTimes = nullptr; - baryCorrection* baryCorrs = nullptr; + float* image_times = nullptr; + BaryCorrection* bary_corrs = nullptr; - scaleParameters* psiParams = nullptr; - scaleParameters* phiParams = nullptr; + scaleParameters* psi_params = nullptr; + scaleParameters* phi_params = nullptr; }; -struct stampParameters { +struct StampParameters { int radius = 10; StampType stamp_type = STAMP_SUM; bool do_filtering = false; @@ -133,7 +133,7 @@ struct stampParameters { }; // Basic image moments use for analysis. -struct imageMoments { +struct ImageMoments { float m00; float m01; float m10; diff --git a/src/kbmod/search/image_kernels.cu b/src/kbmod/search/image_kernels.cu index 8331a6f5..c3ec078c 100644 --- a/src/kbmod/search/image_kernels.cu +++ b/src/kbmod/search/image_kernels.cu @@ -8,23 +8,19 @@ #ifndef IMAGE_KERNELS_CU_ #define IMAGE_KERNELS_CU_ -#define MAX_STAMP_IMAGES 200 - #include #include "common.h" #include "cuda_errors.h" #include #include -#include "ImageStack.h" - namespace search { /* * Device kernel that convolves the provided image with the psf */ -__global__ void convolvePSF(int width, int height, float *sourceImage, float *resultImage, float *psf, - int psfRad, int psfDim, float psfSum, float maskFlag) { +__global__ void convolvePSF(int width, int height, float *source_img, float *result_img, float *psf, + int psf_radius, int psf_dim, float psf_sum) { // Find bounds of convolution area const int x = blockIdx.x * CONV_THREAD_DIM + threadIdx.x; const int y = blockIdx.y * CONV_THREAD_DIM + threadIdx.y; @@ -32,269 +28,60 @@ __global__ void convolvePSF(int width, int height, float *sourceImage, float *re // Read kernel float sum = 0.0; - float psfPortion = 0.0; - float center = sourceImage[y * width + x]; + float psf_portion = 0.0; + float center = source_img[y * width + x]; if (center != NO_DATA) { - for (int j = -psfRad; j <= psfRad; j++) { + for (int j = -psf_radius; j <= psf_radius; j++) { // #pragma unroll - for (int i = -psfRad; i <= psfRad; i++) { + for (int i = -psf_radius; i <= psf_radius; i++) { if ((x + i >= 0) && (x + i < width) && (y + j >= 0) && (y + j < height)) { - float currentPixel = sourceImage[(y + j) * width + (x + i)]; - if (currentPixel != NO_DATA) { - float currentPSF = psf[(j + psfRad) * psfDim + (i + psfRad)]; - psfPortion += currentPSF; - sum += currentPixel * currentPSF; + float current_pix = source_img[(y + j) * width + (x + i)]; + if (current_pix != NO_DATA) { + float current_psf = psf[(j + psf_radius) * psf_dim + (i + psf_radius)]; + psf_portion += current_psf; + sum += current_pix * current_psf; } } } } - resultImage[y * width + x] = (sum * psfSum) / psfPortion; + result_img[y * width + x] = (sum * psf_sum) / psf_portion; } else { // Leave masked pixel alone (these could be replaced here with zero) - resultImage[y * width + x] = NO_DATA; // 0.0 + result_img[y * width + x] = NO_DATA; // 0.0 } } -extern "C" void deviceConvolve(float *sourceImg, float *resultImg, int width, int height, float *psfKernel, - int psfSize, int psfDim, int psfRadius, float psfSum) { +extern "C" void deviceConvolve(float *source_img, float *result_img, int width, int height, float *psf_kernel, + int psf_size, int psf_dim, int psf_radiusius, float psf_sum) { // Pointers to device memory - float *deviceKernel; - float *deviceSourceImg; - float *deviceResultImg; + float *device_kernel; + float *devicesource_img; + float *deviceresult_img; - long pixelsPerImage = width * height; + long n_pixels = width * height; dim3 blocks(width / CONV_THREAD_DIM + 1, height / CONV_THREAD_DIM + 1); dim3 threads(CONV_THREAD_DIM, CONV_THREAD_DIM); // Allocate Device memory - checkCudaErrors(cudaMalloc((void **)&deviceKernel, sizeof(float) * psfSize)); - checkCudaErrors(cudaMalloc((void **)&deviceSourceImg, sizeof(float) * pixelsPerImage)); - checkCudaErrors(cudaMalloc((void **)&deviceResultImg, sizeof(float) * pixelsPerImage)); + checkCudaErrors(cudaMalloc((void **)&device_kernel, sizeof(float) * psf_size)); + checkCudaErrors(cudaMalloc((void **)&devicesource_img, sizeof(float) * n_pixels)); + checkCudaErrors(cudaMalloc((void **)&deviceresult_img, sizeof(float) * n_pixels)); - checkCudaErrors(cudaMemcpy(deviceKernel, psfKernel, sizeof(float) * psfSize, cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(device_kernel, psf_kernel, sizeof(float) * psf_size, cudaMemcpyHostToDevice)); checkCudaErrors( - cudaMemcpy(deviceSourceImg, sourceImg, sizeof(float) * pixelsPerImage, cudaMemcpyHostToDevice)); - - convolvePSF<<>>(width, height, deviceSourceImg, deviceResultImg, deviceKernel, psfRadius, - psfDim, psfSum, NO_DATA); - - checkCudaErrors( - cudaMemcpy(resultImg, deviceResultImg, sizeof(float) * pixelsPerImage, cudaMemcpyDeviceToHost)); - - checkCudaErrors(cudaFree(deviceKernel)); - checkCudaErrors(cudaFree(deviceSourceImg)); - checkCudaErrors(cudaFree(deviceResultImg)); -} - -__global__ void device_get_coadd_stamp(int num_images, int width, int height, float *image_vect, - perImageData image_data, int num_trajectories, - trajectory *trajectories, stampParameters params, int *use_index_vect, - float *results) { - // Get the trajectory that we are going to be using. - const int trj_index = blockIdx.x * blockDim.x + threadIdx.x; - if (trj_index < 0 || trj_index >= num_trajectories) return; - trajectory trj = trajectories[trj_index]; - - // Get the pixel coordinates within the stamp to use. - const int stamp_width = 2 * params.radius + 1; - const int stamp_x = threadIdx.y; - if (stamp_x < 0 || stamp_x >= stamp_width) return; - - const int stamp_y = threadIdx.z; - if (stamp_y < 0 || stamp_y >= stamp_width) return; - - // Compute the various offsets for the indices. - int use_index_offset = num_images * trj_index; - int trj_offset = trj_index * stamp_width * stamp_width; - int pixel_index = stamp_width * stamp_y + stamp_x; - - // Allocate space for the coadd information. - assert(num_images < MAX_STAMP_IMAGES); - float values[MAX_STAMP_IMAGES]; - - // Loop over each image and compute the stamp. - int num_values = 0; - for (int t = 0; t < num_images; ++t) { - // Skip entries marked 0 in the use_index_vect. - if (use_index_vect != nullptr && use_index_vect[use_index_offset + t] == 0) { - continue; - } - - // Predict the trajectory's position including the barycentric correction if needed. - float cTime = image_data.imageTimes[t]; - int currentX = int(trj.x + trj.xVel * cTime); - int currentY = int(trj.y + trj.yVel * cTime); - if (image_data.baryCorrs != nullptr) { - baryCorrection bc = image_data.baryCorrs[t]; - currentX = int(trj.x + trj.xVel * cTime + bc.dx + trj.x * bc.dxdx + trj.y * bc.dxdy); - currentY = int(trj.y + trj.yVel * cTime + bc.dy + trj.x * bc.dydx + trj.y * bc.dydy); - } - - // Get the stamp and add it to the list of values. - int img_x = currentX - params.radius + stamp_x; - int img_y = currentY - params.radius + stamp_y; - if ((img_x >= 0) && (img_x < width) && (img_y >= 0) && (img_y < height)) { - int pixel_index = width * height * t + img_y * width + img_x; - if (image_vect[pixel_index] != NO_DATA) { - values[num_values] = image_vect[pixel_index]; - ++num_values; - } - } - } - - // If there are no values, just return 0. - if (num_values == 0) { - results[trj_offset + pixel_index] = 0.0; - return; - } - - // Do the actual computation from the values. - float result = 0.0; - int median_ind = num_values / 2; // Outside switch to avoid compiler warnings. - switch (params.stamp_type) { - case STAMP_MEDIAN: - // Sort the values in ascending order. - for (int j = 0; j < num_values; j++) { - for (int k = j + 1; k < num_values; k++) { - if (values[j] > values[k]) { - float tmp = values[j]; - values[j] = values[k]; - values[k] = tmp; - } - } - } - - // Take the median value of the pixels with data. - if (num_values % 2 == 0) { - result = (values[median_ind] + values[median_ind - 1]) / 2.0; - } else { - result = values[median_ind]; - } - break; - case STAMP_SUM: - for (int t = 0; t < num_values; ++t) { - result += values[t]; - } - break; - case STAMP_MEAN: - for (int t = 0; t < num_values; ++t) { - result += values[t]; - } - result /= float(num_values); - break; - } - - // Save the result to the correct pixel location. - results[trj_offset + pixel_index] = result; -} - -void deviceGetCoadds(ImageStack &stack, perImageData image_data, int num_trajectories, - trajectory *trajectories, stampParameters params, - std::vector> &use_index_vect, float *results) { - // Allocate Device memory - trajectory *device_trjs; - int *device_use_index = nullptr; - float *device_times; - float *device_img; - float *device_res; - baryCorrection *deviceBaryCorrs = nullptr; - - // Compute the dimensions for the data. - const unsigned int num_images = stack.imgCount(); - const unsigned int width = stack.getWidth(); - const unsigned int height = stack.getHeight(); - const unsigned int num_image_pixels = num_images * width * height; - const unsigned int stamp_width = 2 * params.radius + 1; - const unsigned int stamp_ppi = (2 * params.radius + 1) * (2 * params.radius + 1); - const unsigned int num_stamp_pixels = num_trajectories * stamp_ppi; - - // Allocate and copy the trajectories. - checkCudaErrors(cudaMalloc((void **)&device_trjs, sizeof(trajectory) * num_trajectories)); - checkCudaErrors(cudaMemcpy(device_trjs, trajectories, sizeof(trajectory) * num_trajectories, - cudaMemcpyHostToDevice)); - - // Check if we need to create a vector of per-trajectory, per-image use. - // Convert the vector of booleans into an integer array so we do a cudaMemcpy. - if (use_index_vect.size() == num_trajectories) { - checkCudaErrors(cudaMalloc((void **)&device_use_index, sizeof(int) * num_images * num_trajectories)); - - int *start_ptr = device_use_index; - std::vector int_vect(num_images, 0); - for (unsigned i = 0; i < num_trajectories; ++i) { - assert(use_index_vect[i].size() == num_images); - for (unsigned t = 0; t < num_images; ++t) { - int_vect[t] = use_index_vect[i][t] ? 1 : 0; - } - - checkCudaErrors( - cudaMemcpy(start_ptr, int_vect.data(), sizeof(int) * num_images, cudaMemcpyHostToDevice)); - start_ptr += num_images; - } - } - - // Allocate and copy the times. - checkCudaErrors(cudaMalloc((void **)&device_times, sizeof(float) * num_images)); - checkCudaErrors(cudaMemcpy(device_times, image_data.imageTimes, sizeof(float) * num_images, - cudaMemcpyHostToDevice)); - - // Allocate and copy the images. - checkCudaErrors(cudaMalloc((void **)&device_img, sizeof(float) * num_image_pixels)); - float *next_ptr = device_img; - for (unsigned t = 0; t < num_images; ++t) { - const std::vector &data_ref = stack.getSingleImage(t).getScience().getPixels(); - - assert(data_ref.size() == width * height); - checkCudaErrors(cudaMemcpy(next_ptr, data_ref.data(), sizeof(float) * width * height, - cudaMemcpyHostToDevice)); - next_ptr += width * height; - } - - // Allocate space for the results. - checkCudaErrors(cudaMalloc((void **)&device_res, sizeof(float) * num_stamp_pixels)); - - // Allocate memory for and copy barycentric corrections (if needed). - if (image_data.baryCorrs != nullptr) { - checkCudaErrors(cudaMalloc((void **)&deviceBaryCorrs, sizeof(baryCorrection) * num_images)); - checkCudaErrors(cudaMemcpy(deviceBaryCorrs, image_data.baryCorrs, sizeof(baryCorrection) * num_images, - cudaMemcpyHostToDevice)); - } - - // Wrap the per-image data into a struct. This struct will be copied by value - // during the function call, so we don't need to allocate memory for the - // struct itself. We just set the pointers to the on device vectors. - perImageData device_image_data; - device_image_data.numImages = num_images; - device_image_data.imageTimes = device_times; - device_image_data.baryCorrs = deviceBaryCorrs; - device_image_data.psiParams = nullptr; - device_image_data.phiParams = nullptr; - - dim3 blocks(num_trajectories, 1, 1); - dim3 threads(1, stamp_width, stamp_width); - - // Create the stamps. - device_get_coadd_stamp<<>>(num_images, width, height, device_img, device_image_data, - num_trajectories, device_trjs, params, device_use_index, - device_res); - cudaDeviceSynchronize(); + cudaMemcpy(devicesource_img, source_img, sizeof(float) * n_pixels, cudaMemcpyHostToDevice)); - // Free up the unneeded memory (everything except for the on-device results). - if (deviceBaryCorrs != nullptr) checkCudaErrors(cudaFree(deviceBaryCorrs)); - checkCudaErrors(cudaFree(device_img)); - if (device_use_index != nullptr) checkCudaErrors(cudaFree(device_use_index)); - checkCudaErrors(cudaFree(device_times)); - checkCudaErrors(cudaFree(device_trjs)); - cudaDeviceSynchronize(); + convolvePSF<<>>(width, height, devicesource_img, deviceresult_img, device_kernel, + psf_radiusius, psf_dim, psf_sum); - // Read back results checkCudaErrors( - cudaMemcpy(results, device_res, sizeof(float) * num_stamp_pixels, cudaMemcpyDeviceToHost)); - cudaDeviceSynchronize(); + cudaMemcpy(result_img, deviceresult_img, sizeof(float) * n_pixels, cudaMemcpyDeviceToHost)); - // Free the rest of the on GPU memory. - checkCudaErrors(cudaFree(device_res)); + checkCudaErrors(cudaFree(device_kernel)); + checkCudaErrors(cudaFree(devicesource_img)); + checkCudaErrors(cudaFree(deviceresult_img)); } } /* namespace search */ diff --git a/src/kbmod/search/kernels.cu b/src/kbmod/search/kernels.cu index b304fb39..bfd5efa9 100644 --- a/src/kbmod/search/kernels.cu +++ b/src/kbmod/search/kernels.cu @@ -9,6 +9,7 @@ #define KERNELS_CU_ #define GPU_LC_FILTER 1 #define MAX_NUM_IMAGES 140 +#define MAX_STAMP_IMAGES 200 #include "common.h" #include @@ -17,63 +18,65 @@ #include #include +#include "ImageStack.h" + namespace search { -extern "C" __device__ __host__ void sigmaGFilteredIndicesCU(float *values, int num_values, float sGL0, - float sGL1, float sigmaGCoeff, float width, - int *idxArray, int *minKeepIndex, - int *maxKeepIndex) { +extern "C" __device__ __host__ void SigmaGFilteredIndicesCU(float *values, int num_values, float sgl0, + float sgl1, float sigmag_coeff, float width, + int *idx_array, int *min_keep_idx, + int *max_keep_idx) { // Clip the percentiles to [0.01, 99.99] to avoid invalid array accesses. - if (sGL0 < 0.0001) sGL0 = 0.0001; - if (sGL1 > 0.9999) sGL1 = 0.9999; + if (sgl0 < 0.0001) sgl0 = 0.0001; + if (sgl1 > 0.9999) sgl1 = 0.9999; // Initialize the index array. for (int j = 0; j < num_values; j++) { - idxArray[j] = j; + idx_array[j] = j; } - // Sort the the indexes (idxArray) of values in ascending order. - int tmpSortIdx; + // Sort the the indexes (idx_array) of values in ascending order. + int tmp_sort_idx; for (int j = 0; j < num_values; j++) { for (int k = j + 1; k < num_values; k++) { - if (values[idxArray[j]] > values[idxArray[k]]) { - tmpSortIdx = idxArray[j]; - idxArray[j] = idxArray[k]; - idxArray[k] = tmpSortIdx; + if (values[idx_array[j]] > values[idx_array[k]]) { + tmp_sort_idx = idx_array[j]; + idx_array[j] = idx_array[k]; + idx_array[k] = tmp_sort_idx; } } } // Compute the index of each of the percent values in values - // from the given bounds sGL0, 0.5 (median), and sGL1. - const int pct_L = int(ceil(num_values * sGL0) + 0.001) - 1; - const int pct_H = int(ceil(num_values * sGL1) + 0.001) - 1; + // from the given bounds sgl0, 0.5 (median), and sgl1. + const int pct_L = int(ceil(num_values * sgl0) + 0.001) - 1; + const int pct_H = int(ceil(num_values * sgl1) + 0.001) - 1; const int median_ind = int(ceil(num_values * 0.5) + 0.001) - 1; - // Compute the values that are +/- (width * sigmaG) from the median. - float sigmaG = sigmaGCoeff * (values[idxArray[pct_H]] - values[idxArray[pct_L]]); - float minValue = values[idxArray[median_ind]] - width * sigmaG; - float maxValue = values[idxArray[median_ind]] + width * sigmaG; + // Compute the values that are +/- (width * sigma_g) from the median. + float sigma_g = sigmag_coeff * (values[idx_array[pct_H]] - values[idx_array[pct_L]]); + float min_value = values[idx_array[median_ind]] - width * sigma_g; + float max_value = values[idx_array[median_ind]] + width * sigma_g; - // Find the index of the first value >= minValue. + // Find the index of the first value >= min_value. int start = 0; - while ((start < median_ind) && (values[idxArray[start]] < minValue)) { + while ((start < median_ind) && (values[idx_array[start]] < min_value)) { ++start; } - *minKeepIndex = start; + *min_keep_idx = start; - // Find the index of the last value <= maxValue. + // Find the index of the last value <= max_value. int end = median_ind + 1; - while ((end < num_values) && (values[idxArray[end]] <= maxValue)) { + while ((end < num_values) && (values[idx_array[end]] <= max_value)) { ++end; } - *maxKeepIndex = end - 1; + *max_keep_idx = end - 1; } -__device__ float readEncodedPixel(void *imageVect, int index, int numBytes, const scaleParameters ¶ms) { - float value = (numBytes == 1) ? (float)reinterpret_cast(imageVect)[index] - : (float)reinterpret_cast(imageVect)[index]; - float result = (value == 0.0) ? NO_DATA : (value - 1.0) * params.scale + params.minVal; +__device__ float ReadEncodedPixel(void *image_vect, int index, int n_bytes, const scaleParameters ¶ms) { + float value = (n_bytes == 1) ? (float)reinterpret_cast(image_vect)[index] + : (float)reinterpret_cast(image_vect)[index]; + float result = (value == 0.0) ? NO_DATA : (value - 1.0) * params.scale + params.min_val; return result; } @@ -81,10 +84,10 @@ __device__ float readEncodedPixel(void *imageVect, int index, int numBytes, cons * Searches through images (represented as a flat array of floats) looking for most likely * trajectories in the given list. Outputs a results image of best trajectories. Returns a * fixed number of results per pixel specified by RESULTS_PER_PIXEL - * filters results using a sigmaG-based filter and a central-moment filter. + * filters results using a sigma_g-based filter and a central-moment filter. */ -__global__ void searchFilterImages(int imageCount, int width, int height, void *psiVect, void *phiVect, - perImageData image_data, searchParameters params, int trajectoryCount, +__global__ void searchFilterImages(int num_images, int width, int height, void *psi_vect, void *phi_vect, + PerImageData image_data, SearchParameters params, int num_trajectories, trajectory *trajectories, trajectory *results) { // Get the x and y coordinates within the search space. const int x_i = blockIdx.x * THREAD_DIM_X + threadIdx.x; @@ -100,13 +103,13 @@ __global__ void searchFilterImages(int imageCount, int width, int height, void * // Get origin pixel for the trajectories in pixel space. const int x = x_i + params.x_start_min; const int y = y_i + params.y_start_min; - const unsigned int pixelsPerImage = width * height; + const unsigned int n_pixels = width * height; // Data structures used for filtering. - float lcArray[MAX_NUM_IMAGES]; - float psiArray[MAX_NUM_IMAGES]; - float phiArray[MAX_NUM_IMAGES]; - int idxArray[MAX_NUM_IMAGES]; + float lc_array[MAX_NUM_IMAGES]; + float psi_array[MAX_NUM_IMAGES]; + float phi_array[MAX_NUM_IMAGES]; + int idx_array[MAX_NUM_IMAGES]; // Create an initial set of best results with likelihood -1.0. // We also set (x, y) because they are used in the later python @@ -119,111 +122,111 @@ __global__ void searchFilterImages(int imageCount, int width, int height, void * } // For each trajectory we'd like to search - for (int t = 0; t < trajectoryCount; ++t) { + for (int t = 0; t < num_trajectories; ++t) { // Create a trajectory for this search. - trajectory currentT; - currentT.x = x; - currentT.y = y; - currentT.xVel = trajectories[t].xVel; - currentT.yVel = trajectories[t].yVel; - currentT.obsCount = 0; + trajectory curr_trj; + curr_trj.x = x; + curr_trj.y = y; + curr_trj.x_vel = trajectories[t].x_vel; + curr_trj.y_vel = trajectories[t].y_vel; + curr_trj.obs_count = 0; - float psiSum = 0.0; - float phiSum = 0.0; + float psi_sum = 0.0; + float phi_sum = 0.0; // Loop over each image and sample the appropriate pixel - for (int i = 0; i < imageCount; ++i) { - lcArray[i] = 0; - psiArray[i] = 0; - phiArray[i] = 0; - idxArray[i] = i; + for (int i = 0; i < num_images; ++i) { + lc_array[i] = 0; + psi_array[i] = 0; + phi_array[i] = 0; + idx_array[i] = i; } // Loop over each image and sample the appropriate pixel int num_seen = 0; - for (int i = 0; i < imageCount; ++i) { + for (int i = 0; i < num_images; ++i) { // Predict the trajectory's position. - float cTime = image_data.imageTimes[i]; - int currentX = x + int(currentT.xVel * cTime + 0.5); - int currentY = y + int(currentT.yVel * cTime + 0.5); + float curr_time = image_data.image_times[i]; + int current_x = x + int(curr_trj.x_vel * curr_time + 0.5); + int current_y = y + int(curr_trj.y_vel * curr_time + 0.5); // If using barycentric correction, apply it. // Must be before out of bounds check - if (params.useCorr && (image_data.baryCorrs != nullptr)) { - baryCorrection bc = image_data.baryCorrs[i]; - currentX = int(x + currentT.xVel * cTime + bc.dx + x * bc.dxdx + y * bc.dxdy + 0.5); - currentY = int(y + currentT.yVel * cTime + bc.dy + x * bc.dydx + y * bc.dydy + 0.5); + if (params.use_corr && (image_data.bary_corrs != nullptr)) { + BaryCorrection bc = image_data.bary_corrs[i]; + current_x = int(x + curr_trj.x_vel * curr_time + bc.dx + x * bc.dxdx + y * bc.dxdy + 0.5); + current_y = int(y + curr_trj.y_vel * curr_time + bc.dy + x * bc.dydx + y * bc.dydy + 0.5); } // Test if trajectory goes out of the image, in which case we do not // look up a pixel value for this time step (allowing trajectories to // overlap the image for only some of the times). - if (currentX >= width || currentY >= height || currentX < 0 || currentY < 0) { + if (current_x >= width || current_y >= height || current_x < 0 || current_y < 0) { continue; } // Get the Psi and Phi pixel values. - unsigned int pixel_index = (pixelsPerImage * i + currentY * width + currentX); - float cPsi = (params.psiNumBytes <= 0 || image_data.psiParams == nullptr) - ? reinterpret_cast(psiVect)[pixel_index] - : readEncodedPixel(psiVect, pixel_index, params.psiNumBytes, - image_data.psiParams[i]); - if (cPsi == NO_DATA) continue; - - float cPhi = (params.phiNumBytes <= 0 || image_data.phiParams == nullptr) - ? reinterpret_cast(phiVect)[pixel_index] - : readEncodedPixel(phiVect, pixel_index, params.phiNumBytes, - image_data.phiParams[i]); - if (cPhi == NO_DATA) continue; - - if (cPsi != NO_DATA && cPhi != NO_DATA) { - currentT.obsCount++; - psiSum += cPsi; - phiSum += cPhi; - psiArray[num_seen] = cPsi; - phiArray[num_seen] = cPhi; - if (cPhi != 0.0) lcArray[num_seen] = cPsi / cPhi; + unsigned int pixel_index = (n_pixels * i + current_y * width + current_x); + float curr_psi = (params.psi_num_bytes <= 0 || image_data.psi_params == nullptr) + ? reinterpret_cast(psi_vect)[pixel_index] + : ReadEncodedPixel(psi_vect, pixel_index, params.psi_num_bytes, + image_data.psi_params[i]); + if (curr_psi == NO_DATA) continue; + + float curr_phi = (params.phi_num_bytes <= 0 || image_data.phi_params == nullptr) + ? reinterpret_cast(phi_vect)[pixel_index] + : ReadEncodedPixel(phi_vect, pixel_index, params.phi_num_bytes, + image_data.phi_params[i]); + if (curr_phi == NO_DATA) continue; + + if (curr_psi != NO_DATA && curr_phi != NO_DATA) { + curr_trj.obs_count++; + psi_sum += curr_psi; + phi_sum += curr_phi; + psi_array[num_seen] = curr_psi; + phi_array[num_seen] = curr_phi; + if (curr_phi != 0.0) lc_array[num_seen] = curr_psi / curr_phi; num_seen += 1; } } - currentT.lh = psiSum / sqrt(phiSum); - currentT.flux = psiSum / phiSum; + curr_trj.lh = psi_sum / sqrt(phi_sum); + curr_trj.flux = psi_sum / phi_sum; // If we do not have enough observations or a good enough LH score, // do not bother with any of the following steps. - if ((currentT.obsCount < params.minObservations) || - (params.do_sigmag_filter && currentT.lh < params.minLH)) + if ((curr_trj.obs_count < params.min_observations) || + (params.do_sigmag_filter && curr_trj.lh < params.min_lh)) continue; - // If we are doing on GPU filtering, run the sigmaG filter + // If we are doing on GPU filtering, run the sigma_g filter // and recompute the likelihoods. if (params.do_sigmag_filter) { - int minKeepIndex = 0; - int maxKeepIndex = num_seen - 1; - sigmaGFilteredIndicesCU(lcArray, num_seen, params.sGL_L, params.sGL_H, params.sigmaGCoeff, 2.0, - idxArray, &minKeepIndex, &maxKeepIndex); + int min_keep_idx = 0; + int max_keep_idx = num_seen - 1; + SigmaGFilteredIndicesCU(lc_array, num_seen, params.sgl_L, params.sgl_H, params.sigmag_coeff, 2.0, + idx_array, &min_keep_idx, &max_keep_idx); // Compute the likelihood and flux of the track based on the filtered - // observations (ones in [minKeepIndex, maxKeepIndex]). - float newPsiSum = 0.0; - float newPhiSum = 0.0; - for (int i = minKeepIndex; i <= maxKeepIndex; i++) { - int idx = idxArray[i]; - newPsiSum += psiArray[idx]; - newPhiSum += phiArray[idx]; + // observations (ones in [min_keep_idx, max_keep_idx]). + float new_psi_sum = 0.0; + float new_phi_sum = 0.0; + for (int i = min_keep_idx; i <= max_keep_idx; i++) { + int idx = idx_array[i]; + new_psi_sum += psi_array[idx]; + new_phi_sum += phi_array[idx]; } - currentT.lh = newPsiSum / sqrt(newPhiSum); - currentT.flux = newPsiSum / newPhiSum; + curr_trj.lh = new_psi_sum / sqrt(new_phi_sum); + curr_trj.flux = new_psi_sum / new_phi_sum; } // Insert the new trajectory into the sorted list of results. // Only sort the values with valid likelihoods. trajectory temp; for (int r = 0; r < RESULTS_PER_PIXEL; ++r) { - if (currentT.lh > best[r].lh && currentT.lh > -1.0) { + if (curr_trj.lh > best[r].lh && curr_trj.lh > -1.0) { temp = best[r]; - best[r] = currentT; - currentT = temp; + best[r] = curr_trj; + curr_trj = temp; } } } @@ -239,147 +242,146 @@ __global__ void searchFilterImages(int imageCount, int width, int height, void * } template -void *encodeImage(float *imageVect, int numTimes, int numPixels, scaleParameters *params, bool debug) { - void *deviceVect = NULL; +void *encodeImage(float *image_vect, int num_times, int num_pixels, scaleParameters *params, bool debug) { + void *device_vect = NULL; - long unsigned int total_size = sizeof(T) * numTimes * numPixels; + long unsigned int total_size = sizeof(T) * num_times * num_pixels; if (debug) { printf("Encoding image into %lu bytes/pixel for a total of %lu bytes.\n", sizeof(T), total_size); } // Do the encoding locally first. T *encoded = (T *)malloc(total_size); - for (int t = 0; t < numTimes; ++t) { - float safe_max = params[t].maxVal - params[t].scale / 100.0; - for (int p = 0; p < numPixels; ++p) { - int index = t * numPixels + p; - float value = imageVect[index]; + for (int t = 0; t < num_times; ++t) { + float safe_max = params[t].max_val - params[t].scale / 100.0; + for (int p = 0; p < num_pixels; ++p) { + int index = t * num_pixels + p; + float value = image_vect[index]; if (value == NO_DATA) { encoded[index] = 0; } else { value = min(value, safe_max); - value = max(value, params[t].minVal); - value = (value - params[t].minVal) / params[t].scale + 1.0; + value = max(value, params[t].min_val); + value = (value - params[t].min_val) / params[t].scale + 1.0; encoded[index] = static_cast(value); } } } // Allocate the space on device and do a direct copy. - checkCudaErrors(cudaMalloc((void **)&deviceVect, total_size)); - checkCudaErrors(cudaMemcpy(deviceVect, encoded, total_size, cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMalloc((void **)&device_vect, total_size)); + checkCudaErrors(cudaMemcpy(device_vect, encoded, total_size, cudaMemcpyHostToDevice)); // Free the local space. free(encoded); - return deviceVect; + return device_vect; } -void *encodeImageFloat(float *imageVect, unsigned int vectLength, bool debug) { - void *deviceVect = NULL; +void *encodeImageFloat(float *image_vect, unsigned int vectLength, bool debug) { + void *device_vect = NULL; long unsigned int total_size = sizeof(float) * vectLength; if (debug) { printf("Encoding image as float for a total of %lu bytes.\n", total_size); } - checkCudaErrors(cudaMalloc((void **)&deviceVect, total_size)); - checkCudaErrors(cudaMemcpy(deviceVect, imageVect, total_size, cudaMemcpyHostToDevice)); - return deviceVect; + checkCudaErrors(cudaMalloc((void **)&device_vect, total_size)); + checkCudaErrors(cudaMemcpy(device_vect, image_vect, total_size, cudaMemcpyHostToDevice)); + return device_vect; } -extern "C" void deviceSearchFilter(int imageCount, int width, int height, float *psiVect, float *phiVect, - perImageData img_data, searchParameters params, int trajCount, - trajectory *trajectoriesToSearch, int resultsCount, - trajectory *bestTrajects) { +extern "C" void deviceSearchFilter(int num_images, int width, int height, float *psi_vect, float *phi_vect, + PerImageData img_data, SearchParameters params, int num_trajectories, + trajectory *trj_to_search, int num_results, trajectory *best_results) { // Allocate Device memory - trajectory *deviceTests; - float *deviceImgTimes; - void *devicePsi; - void *devicePhi; - trajectory *deviceSearchResults; - baryCorrection *deviceBaryCorrs = nullptr; - scaleParameters *devicePsiParams = nullptr; - scaleParameters *devicePhiParams = nullptr; - - // Check the hard coded maximum number of images against the imageCount. - if (imageCount > MAX_NUM_IMAGES) { + trajectory *device_tests; + float *device_img_times; + void *device_psi; + void *device_phi; + trajectory *device_search_results; + BaryCorrection *device_bary_corrs = nullptr; + scaleParameters *device_psi_params = nullptr; + scaleParameters *device_phi_params = nullptr; + + // Check the hard coded maximum number of images against the num_images. + if (num_images > MAX_NUM_IMAGES) { throw std::runtime_error("Number of images exceeds GPU maximum."); } if (params.debug) { - printf("Allocating %lu bytes for testing grid.\n", sizeof(trajectory) * trajCount); + printf("Allocating %lu bytes for testing grid.\n", sizeof(trajectory) * num_trajectories); } - checkCudaErrors(cudaMalloc((void **)&deviceTests, sizeof(trajectory) * trajCount)); + checkCudaErrors(cudaMalloc((void **)&device_tests, sizeof(trajectory) * num_trajectories)); if (params.debug) { - printf("Allocating %lu bytes for time data.\n", sizeof(float) * imageCount); + printf("Allocating %lu bytes for time data.\n", sizeof(float) * num_images); } - checkCudaErrors(cudaMalloc((void **)&deviceImgTimes, sizeof(float) * imageCount)); + checkCudaErrors(cudaMalloc((void **)&device_img_times, sizeof(float) * num_images)); if (params.debug) { - printf("Allocating %lu bytes for testing grid.\n", sizeof(trajectory) * trajCount); + printf("Allocating %lu bytes for testing grid.\n", sizeof(trajectory) * num_trajectories); } - checkCudaErrors(cudaMalloc((void **)&deviceSearchResults, sizeof(trajectory) * resultsCount)); + checkCudaErrors(cudaMalloc((void **)&device_search_results, sizeof(trajectory) * num_results)); // Copy trajectories to search - checkCudaErrors(cudaMemcpy(deviceTests, trajectoriesToSearch, sizeof(trajectory) * trajCount, + checkCudaErrors(cudaMemcpy(device_tests, trj_to_search, sizeof(trajectory) * num_trajectories, cudaMemcpyHostToDevice)); // Copy image times - checkCudaErrors(cudaMemcpy(deviceImgTimes, img_data.imageTimes, sizeof(float) * imageCount, + checkCudaErrors(cudaMemcpy(device_img_times, img_data.image_times, sizeof(float) * num_images, cudaMemcpyHostToDevice)); // Copy (and encode) the images. Also copy over the scaling parameters if needed. - if ((params.psiNumBytes == 1 || params.psiNumBytes == 2) && (img_data.psiParams != nullptr)) { - checkCudaErrors(cudaMalloc((void **)&devicePsiParams, imageCount * sizeof(scaleParameters))); - checkCudaErrors(cudaMemcpy(devicePsiParams, img_data.psiParams, imageCount * sizeof(scaleParameters), - cudaMemcpyHostToDevice)); - if (params.psiNumBytes == 1) { - devicePsi = encodeImage(psiVect, imageCount, width * height, img_data.psiParams, - params.debug); - } else { - devicePsi = encodeImage(psiVect, imageCount, width * height, img_data.psiParams, + if ((params.psi_num_bytes == 1 || params.psi_num_bytes == 2) && (img_data.psi_params != nullptr)) { + checkCudaErrors(cudaMalloc((void **)&device_psi_params, num_images * sizeof(scaleParameters))); + checkCudaErrors(cudaMemcpy(device_psi_params, img_data.psi_params, + num_images * sizeof(scaleParameters), cudaMemcpyHostToDevice)); + if (params.psi_num_bytes == 1) { + device_psi = encodeImage(psi_vect, num_images, width * height, img_data.psi_params, params.debug); + } else { + device_psi = encodeImage(psi_vect, num_images, width * height, img_data.psi_params, + params.debug); } } else { - devicePsi = encodeImageFloat(psiVect, imageCount * width * height, params.debug); + device_psi = encodeImageFloat(psi_vect, num_images * width * height, params.debug); } - if ((params.phiNumBytes == 1 || params.phiNumBytes == 2) && (img_data.phiParams != nullptr)) { - checkCudaErrors(cudaMalloc((void **)&devicePhiParams, imageCount * sizeof(scaleParameters))); - checkCudaErrors(cudaMemcpy(devicePhiParams, img_data.phiParams, imageCount * sizeof(scaleParameters), - cudaMemcpyHostToDevice)); - if (params.phiNumBytes == 1) { - devicePhi = encodeImage(phiVect, imageCount, width * height, img_data.phiParams, - params.debug); - } else { - devicePhi = encodeImage(phiVect, imageCount, width * height, img_data.phiParams, + if ((params.phi_num_bytes == 1 || params.phi_num_bytes == 2) && (img_data.phi_params != nullptr)) { + checkCudaErrors(cudaMalloc((void **)&device_phi_params, num_images * sizeof(scaleParameters))); + checkCudaErrors(cudaMemcpy(device_phi_params, img_data.phi_params, + num_images * sizeof(scaleParameters), cudaMemcpyHostToDevice)); + if (params.phi_num_bytes == 1) { + device_phi = encodeImage(phi_vect, num_images, width * height, img_data.phi_params, params.debug); + } else { + device_phi = encodeImage(phi_vect, num_images, width * height, img_data.phi_params, + params.debug); } } else { - devicePhi = encodeImageFloat(phiVect, imageCount * width * height, params.debug); + device_phi = encodeImageFloat(phi_vect, num_images * width * height, params.debug); } // allocate memory for and copy barycentric corrections - if (params.useCorr) { + if (params.use_corr) { if (params.debug) { printf("Search is using barycentric corrections (%lu bytes).\n", - sizeof(baryCorrection) * imageCount); + sizeof(BaryCorrection) * num_images); } - checkCudaErrors(cudaMalloc((void **)&deviceBaryCorrs, sizeof(baryCorrection) * imageCount)); - checkCudaErrors(cudaMemcpy(deviceBaryCorrs, img_data.baryCorrs, sizeof(baryCorrection) * imageCount, - cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMalloc((void **)&device_bary_corrs, sizeof(BaryCorrection) * num_images)); + checkCudaErrors(cudaMemcpy(device_bary_corrs, img_data.bary_corrs, + sizeof(BaryCorrection) * num_images, cudaMemcpyHostToDevice)); } // Wrap the per-image data into a struct. This struct will be copied by value // during the function call, so we don't need to allocate memory for the // struct itself. We just set the pointers to the on device vectors. - perImageData device_image_data; - device_image_data.numImages = imageCount; - device_image_data.imageTimes = deviceImgTimes; - device_image_data.baryCorrs = deviceBaryCorrs; - device_image_data.psiParams = devicePsiParams; - device_image_data.phiParams = devicePhiParams; + PerImageData device_image_data; + device_image_data.num_images = num_images; + device_image_data.image_times = device_img_times; + device_image_data.bary_corrs = device_bary_corrs; + device_image_data.psi_params = device_psi_params; + device_image_data.phi_params = device_phi_params; // Compute the range of starting pixels to use when setting the blocks and threads. // We use the width and height of the search space (as opposed to the image width @@ -390,23 +392,231 @@ extern "C" void deviceSearchFilter(int imageCount, int width, int height, float dim3 threads(THREAD_DIM_X, THREAD_DIM_Y); // Launch Search - searchFilterImages<<>>(imageCount, width, height, devicePsi, devicePhi, - device_image_data, params, trajCount, deviceTests, - deviceSearchResults); + searchFilterImages<<>>(num_images, width, height, device_psi, device_phi, + device_image_data, params, num_trajectories, device_tests, + device_search_results); // Read back results - checkCudaErrors(cudaMemcpy(bestTrajects, deviceSearchResults, sizeof(trajectory) * resultsCount, + checkCudaErrors(cudaMemcpy(best_results, device_search_results, sizeof(trajectory) * num_results, cudaMemcpyDeviceToHost)); // Free the on GPU memory. - if (deviceBaryCorrs != nullptr) checkCudaErrors(cudaFree(deviceBaryCorrs)); - if (devicePhiParams != nullptr) checkCudaErrors(cudaFree(devicePhiParams)); - if (devicePsiParams != nullptr) checkCudaErrors(cudaFree(devicePsiParams)); - checkCudaErrors(cudaFree(devicePhi)); - checkCudaErrors(cudaFree(devicePsi)); - checkCudaErrors(cudaFree(deviceSearchResults)); - checkCudaErrors(cudaFree(deviceImgTimes)); - checkCudaErrors(cudaFree(deviceTests)); + if (device_bary_corrs != nullptr) checkCudaErrors(cudaFree(device_bary_corrs)); + if (device_phi_params != nullptr) checkCudaErrors(cudaFree(device_phi_params)); + if (device_psi_params != nullptr) checkCudaErrors(cudaFree(device_psi_params)); + checkCudaErrors(cudaFree(device_phi)); + checkCudaErrors(cudaFree(device_psi)); + checkCudaErrors(cudaFree(device_search_results)); + checkCudaErrors(cudaFree(device_img_times)); + checkCudaErrors(cudaFree(device_tests)); +} + +__global__ void deviceGetCoaddStamp(int num_images, int width, int height, float *image_vect, + PerImageData image_data, int num_trajectories, trajectory *trajectories, + StampParameters params, int *use_index_vect, float *results) { + // Get the trajectory that we are going to be using. + const int trj_index = blockIdx.x * blockDim.x + threadIdx.x; + if (trj_index < 0 || trj_index >= num_trajectories) return; + trajectory trj = trajectories[trj_index]; + + // Get the pixel coordinates within the stamp to use. + const int stamp_width = 2 * params.radius + 1; + const int stamp_x = threadIdx.y; + if (stamp_x < 0 || stamp_x >= stamp_width) return; + + const int stamp_y = threadIdx.z; + if (stamp_y < 0 || stamp_y >= stamp_width) return; + + // Compute the various offsets for the indices. + int use_index_offset = num_images * trj_index; + int trj_offset = trj_index * stamp_width * stamp_width; + int pixel_index = stamp_width * stamp_y + stamp_x; + + // Allocate space for the coadd information. + assert(num_images < MAX_STAMP_IMAGES); + float values[MAX_STAMP_IMAGES]; + + // Loop over each image and compute the stamp. + int num_values = 0; + for (int t = 0; t < num_images; ++t) { + // Skip entries marked 0 in the use_index_vect. + if (use_index_vect != nullptr && use_index_vect[use_index_offset + t] == 0) { + continue; + } + + // Predict the trajectory's position including the barycentric correction if needed. + float curr_time = image_data.image_times[t]; + int current_x = int(trj.x + trj.x_vel * curr_time); + int current_y = int(trj.y + trj.y_vel * curr_time); + if (image_data.bary_corrs != nullptr) { + BaryCorrection bc = image_data.bary_corrs[t]; + current_x = int(trj.x + trj.x_vel * curr_time + bc.dx + trj.x * bc.dxdx + trj.y * bc.dxdy); + current_y = int(trj.y + trj.y_vel * curr_time + bc.dy + trj.x * bc.dydx + trj.y * bc.dydy); + } + + // Get the stamp and add it to the list of values. + int img_x = current_x - params.radius + stamp_x; + int img_y = current_y - params.radius + stamp_y; + if ((img_x >= 0) && (img_x < width) && (img_y >= 0) && (img_y < height)) { + int pixel_index = width * height * t + img_y * width + img_x; + if (image_vect[pixel_index] != NO_DATA) { + values[num_values] = image_vect[pixel_index]; + ++num_values; + } + } + } + + // If there are no values, just return 0. + if (num_values == 0) { + results[trj_offset + pixel_index] = 0.0; + return; + } + + // Do the actual computation from the values. + float result = 0.0; + int median_ind = num_values / 2; // Outside switch to avoid compiler warnings. + switch (params.stamp_type) { + case STAMP_MEDIAN: + // Sort the values in ascending order. + for (int j = 0; j < num_values; j++) { + for (int k = j + 1; k < num_values; k++) { + if (values[j] > values[k]) { + float tmp = values[j]; + values[j] = values[k]; + values[k] = tmp; + } + } + } + + // Take the median value of the pixels with data. + if (num_values % 2 == 0) { + result = (values[median_ind] + values[median_ind - 1]) / 2.0; + } else { + result = values[median_ind]; + } + break; + case STAMP_SUM: + for (int t = 0; t < num_values; ++t) { + result += values[t]; + } + break; + case STAMP_MEAN: + for (int t = 0; t < num_values; ++t) { + result += values[t]; + } + result /= float(num_values); + break; + } + + // Save the result to the correct pixel location. + results[trj_offset + pixel_index] = result; +} + +void deviceGetCoadds(ImageStack &stack, PerImageData image_data, int num_trajectories, + trajectory *trajectories, StampParameters params, + std::vector> &use_index_vect, float *results) { + // Allocate Device memory + trajectory *device_trjs; + int *device_use_index = nullptr; + float *device_times; + float *device_img; + float *device_res; + BaryCorrection *device_bary_corrs = nullptr; + + // Compute the dimensions for the data. + const unsigned int num_images = stack.imgCount(); + const unsigned int width = stack.getWidth(); + const unsigned int height = stack.getHeight(); + const unsigned int num_image_pixels = num_images * width * height; + const unsigned int stamp_width = 2 * params.radius + 1; + const unsigned int stamp_ppi = (2 * params.radius + 1) * (2 * params.radius + 1); + const unsigned int num_stamp_pixels = num_trajectories * stamp_ppi; + + // Allocate and copy the trajectories. + checkCudaErrors(cudaMalloc((void **)&device_trjs, sizeof(trajectory) * num_trajectories)); + checkCudaErrors(cudaMemcpy(device_trjs, trajectories, sizeof(trajectory) * num_trajectories, + cudaMemcpyHostToDevice)); + + // Check if we need to create a vector of per-trajectory, per-image use. + // Convert the vector of booleans into an integer array so we do a cudaMemcpy. + if (use_index_vect.size() == num_trajectories) { + checkCudaErrors(cudaMalloc((void **)&device_use_index, sizeof(int) * num_images * num_trajectories)); + + int *start_ptr = device_use_index; + std::vector int_vect(num_images, 0); + for (unsigned i = 0; i < num_trajectories; ++i) { + assert(use_index_vect[i].size() == num_images); + for (unsigned t = 0; t < num_images; ++t) { + int_vect[t] = use_index_vect[i][t] ? 1 : 0; + } + + checkCudaErrors( + cudaMemcpy(start_ptr, int_vect.data(), sizeof(int) * num_images, cudaMemcpyHostToDevice)); + start_ptr += num_images; + } + } + + // Allocate and copy the times. + checkCudaErrors(cudaMalloc((void **)&device_times, sizeof(float) * num_images)); + checkCudaErrors(cudaMemcpy(device_times, image_data.image_times, sizeof(float) * num_images, + cudaMemcpyHostToDevice)); + + // Allocate and copy the images. + checkCudaErrors(cudaMalloc((void **)&device_img, sizeof(float) * num_image_pixels)); + float *next_ptr = device_img; + for (unsigned t = 0; t < num_images; ++t) { + const std::vector &data_ref = stack.getSingleImage(t).getScience().getPixels(); + + assert(data_ref.size() == width * height); + checkCudaErrors(cudaMemcpy(next_ptr, data_ref.data(), sizeof(float) * width * height, + cudaMemcpyHostToDevice)); + next_ptr += width * height; + } + + // Allocate space for the results. + checkCudaErrors(cudaMalloc((void **)&device_res, sizeof(float) * num_stamp_pixels)); + + // Allocate memory for and copy barycentric corrections (if needed). + if (image_data.bary_corrs != nullptr) { + checkCudaErrors(cudaMalloc((void **)&device_bary_corrs, sizeof(BaryCorrection) * num_images)); + checkCudaErrors(cudaMemcpy(device_bary_corrs, image_data.bary_corrs, + sizeof(BaryCorrection) * num_images, cudaMemcpyHostToDevice)); + } + + // Wrap the per-image data into a struct. This struct will be copied by value + // during the function call, so we don't need to allocate memory for the + // struct itself. We just set the pointers to the on device vectors. + PerImageData device_image_data; + device_image_data.num_images = num_images; + device_image_data.image_times = device_times; + device_image_data.bary_corrs = device_bary_corrs; + device_image_data.psi_params = nullptr; + device_image_data.phi_params = nullptr; + + dim3 blocks(num_trajectories, 1, 1); + dim3 threads(1, stamp_width, stamp_width); + + // Create the stamps. + deviceGetCoaddStamp<<>>(num_images, width, height, device_img, device_image_data, + num_trajectories, device_trjs, params, device_use_index, + device_res); + cudaDeviceSynchronize(); + + // Free up the unneeded memory (everything except for the on-device results). + if (device_bary_corrs != nullptr) checkCudaErrors(cudaFree(device_bary_corrs)); + checkCudaErrors(cudaFree(device_img)); + if (device_use_index != nullptr) checkCudaErrors(cudaFree(device_use_index)); + checkCudaErrors(cudaFree(device_times)); + checkCudaErrors(cudaFree(device_trjs)); + cudaDeviceSynchronize(); + + // Read back results + checkCudaErrors( + cudaMemcpy(results, device_res, sizeof(float) * num_stamp_pixels, cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + + // Free the rest of the on GPU memory. + checkCudaErrors(cudaFree(device_res)); } } /* namespace search */