r-abishek · HazarathKumarM · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024 · Jul 27, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,7 +1,14 @@
 # Changelog for RPP
 
 Full documentation for RPP is available at [https://rocm.docs.amd.com/projects/rpp/en/latest](https://rocm.docs.amd.com/projects/rpp/en/latest)
-
+
+## RPP 1.10.1 (unreleased)
+
+### Changes
+
+* RPP Tensor Sobel Filter support on HIP
+
+
 ## RPP 1.9.1 for ROCm 6.3.0
 
 ### Changes

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -29,7 +29,7 @@ endif()
 set(CMAKE_CXX_STANDARD 17)
 
 # RPP Version
-set(VERSION "1.9.1")
+set(VERSION "1.10.1")
 
 # Set Project Version and Language
 project(rpp VERSION ${VERSION} LANGUAGES CXX)

diff --git a/docs/data/doxygenOutputs/filter_augmentations_sobel_filter_kSize3_img150x150.png b/docs/data/doxygenOutputs/filter_augmentations_sobel_filter_kSize3_img150x150.png
diff --git a/include/rpp_version.h b/include/rpp_version.h
@@ -39,7 +39,7 @@ extern "C" {
 #endif
 // NOTE: IMPORTANT: Match the version with CMakelists.txt version
 #define RPP_VERSION_MAJOR 1
-#define RPP_VERSION_MINOR 9
+#define RPP_VERSION_MINOR 10
 #define RPP_VERSION_PATCH 1
 #ifdef __cplusplus
 }

diff --git a/include/rppt_tensor_filter_augmentations.h b/include/rppt_tensor_filter_augmentations.h
@@ -93,6 +93,50 @@ RppStatus rppt_box_filter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t
 RppStatus rppt_gaussian_filter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *stdDevTensor, Rpp32u kernelSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
 #endif // GPU_SUPPORT
 
+/*! \brief Sobel Filter augmentation on HOST backend for a NHWC/NCHW layout tensor
+ * \details The sobel filter augmentation runs for a batch of RGB(3 channel) / greyscale(1 channel) images with NHWC/NCHW tensor layout.<br>
+ * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
+ * - dstPtr depth ranges - Will be same depth as srcPtr.
+ * \image html img150x150.png Sample Input
+ * \image html filter_augmentations_sobel_filter_kSize3_img150x150.png Sample 3x3 Output
+ * \param [in] srcPtr source tensor in HOST memory
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
+ * \param [out] dstPtr destination tensor in HOST memory
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1)
+ * \param [in] sobelType sobel type for sobel filter (a single Rpp32u number with sobelType = 0 (X Gradient) / 1 (Y Gradient) / 2 (XY Gradient) that applies to all images in the batch)
+ * \param [in] kernelSize kernel size for sobel filter (a single Rpp32u odd number with kernelSize = 3/5/7 that applies to all images in the batch)
+ * \param [in] roiTensorPtrSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
+ * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
+ * \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+ * \return A <tt> \ref RppStatus</tt> enumeration.
+ * \retval RPP_SUCCESS Successful completion.
+ * \retval RPP_ERROR* Unsuccessful completion.
+ */
+RppStatus rppt_sobel_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32u sobelType, Rpp32u kernelSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
+
+#ifdef GPU_SUPPORT
+/*! \brief Sobel Filter augmentation on HIP backend for a NHWC/NCHW layout tensor
+ * \details The sobel filter augmentation runs for a batch of RGB(3 channel) / greyscale(1 channel) images with NHWC/NCHW tensor layout.<br>
+ * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
+ * - dstPtr depth ranges - Will be same depth as srcPtr.
+ * \image html img150x150.png Sample Input
+ * \image html filter_augmentations_sobel_filter_kSize3_img150x150.png Sample 3x3 Output
+ * \param [in] srcPtr source tensor in HIP memory
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW, c = 1/3)
+ * \param [out] dstPtr destination tensor in HIP memory
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1)
+ * \param [in] sobelType sobel type for sobel filter (a single Rpp32u number with sobelType = 0 (X Gradient) / 1 (Y Gradient) / 2 (XY Gradient) that applies to all images in the batch)
+ * \param [in] kernelSize kernel size for sobel filter (a single Rpp32u odd number with kernelSize = 3/5/7 that applies to all images in the batch)
+ * \param [in] roiTensorPtrSrc ROI data in HIP memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
+ * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
+ * \param [in] rppHandle RPP HIP handle created with <tt>\ref rppCreateWithStreamAndBatchSize()</tt>
+ * \return A <tt> \ref RppStatus</tt> enumeration.
+ * \retval RPP_SUCCESS Successful completion.
+ * \retval RPP_ERROR* Unsuccessful completion.
+ */
+RppStatus rppt_sobel_filter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32u sobelType, Rpp32u kernelSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
+#endif // GPU_SUPPORT
+
 /*! @}
  */
 

diff --git a/src/include/cpu/rpp_cpu_common.hpp b/src/include/cpu/rpp_cpu_common.hpp
@@ -512,12 +512,12 @@ inline int power_function(int a, int b)
 
 inline void saturate_pixel(Rpp32f pixel, Rpp8u* dst)
 {
-    *dst = RPPPIXELCHECK(pixel);
+    *dst = RPPPIXELCHECK(std::nearbyintf(pixel));
 }
 
 inline void saturate_pixel(Rpp32f pixel, Rpp8s* dst)
 {
-    *dst = (Rpp8s)RPPPIXELCHECKI8(pixel - 128);
+    *dst = (Rpp8s)RPPPIXELCHECKI8(std::nearbyintf(pixel) - 128);
 }
 
 inline void saturate_pixel(Rpp32f pixel, Rpp32f* dst)