r-abishek · sampath1117 · Sep 5, 2024 · Sep 5, 2024 · Sep 6, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,7 +1,13 @@
 # Changelog for RPP
 
 Full documentation for RPP is available at [https://rocm.docs.amd.com/projects/rpp/en/latest](https://rocm.docs.amd.com/projects/rpp/en/latest)
-
+
+## RPP 1.15.3 (unreleased)
+
+### Changes
+
+* RPP Tensor Dilate support on HOST
+
 ## RPP 1.9.1 for ROCm 6.3.0
 
 ### Changes

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -29,7 +29,7 @@ endif()
 set(CMAKE_CXX_STANDARD 17)
 
 # RPP Version
-set(VERSION "1.9.1")
+set(VERSION "1.15.3")
 
 # Set Project Version and Language
 project(rpp VERSION ${VERSION} LANGUAGES CXX)

diff --git a/include/rpp_version.h b/include/rpp_version.h
@@ -39,8 +39,8 @@ extern "C" {
 #endif
 // NOTE: IMPORTANT: Match the version with CMakelists.txt version
 #define RPP_VERSION_MAJOR 1
-#define RPP_VERSION_MINOR 9
-#define RPP_VERSION_PATCH 1
+#define RPP_VERSION_MINOR 15
+#define RPP_VERSION_PATCH 3
 #ifdef __cplusplus
 }
 #endif

diff --git a/include/rppt_tensor_effects_augmentations.h b/include/rppt_tensor_effects_augmentations.h
@@ -518,7 +518,11 @@ RppStatus rppt_jitter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstP
  * \param [in] dstGenericDescPtr destination tensor descriptor
  * \param [in] meanTensor mean values for each input, which are used to compute the generalized Box-Mueller transforms in a gaussian distribution (1D tensor of size batchSize with meanTensor[i] >= 0 for each image in batch)
  * \param [in] stdDevTensor stdDev values for each image, which are used to compute the generalized Box-Mueller transforms in a gaussian distribution (1D tensor of size batchSize with stdDevTensor[i] >= 0 for each image in batch)
+ * \param [in] seed A user-defined seed value (single Rpp32u value)
+ * \param [in] roiGenericPtrSrc ROI data for each image in source tensor (tensor of batchSize RpptRoiGeneric values)
+ * \param [in] roiType ROI type used (RpptRoi3DType::XYZWHD or RpptRoi3DType::LTFRBB)
  * \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+ * \return A <tt> \ref RppStatus</tt> enumeration.
  * \retval RPP_SUCCESS Successful completion.
  * \retval RPP_ERROR* Unsuccessful completion.
  */

diff --git a/include/rppt_tensor_morphological_operations.h b/include/rppt_tensor_morphological_operations.h
@@ -67,6 +67,29 @@ extern "C" {
 RppStatus rppt_erode_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32u kernelSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
 #endif // GPU_SUPPORT
 
+/*! \brief Dilate augmentation on HOST backend for a NCHW/NHWC layout tensor
+ * \details The dilate augmentation runs for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>
+ * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127).
+ * - dstPtr depth ranges - Will be same depth as srcPtr.
+ * \image html img150x150.png Sample Input
+ * \image html morphological_operations_dilate_kSize3_img150x150.png Sample 3x3 Output
+ * \image html morphological_operations_dilate_kSize5_img150x150.png Sample 5x5 Output
+ * \image html morphological_operations_dilate_kSize7_img150x150.png Sample 7x7 Output
+ * \image html morphological_operations_dilate_kSize9_img150x150.png Sample 9x9 Output
+ * \param [in] srcPtr source tensor in HOST memory
+ * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3)
+ * \param [out] dstPtr destination tensor in HOST memory
+ * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr)
+ * \param [in] kernelSize kernel size for box filter (a single Rpp32u odd number with kernelSize = 3/5/7/9 that applies to all images in the batch)
+ * \param [in] roiTensorPtrSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y))
+ * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB)
+ * \param [in] rppHandle RPP HOST handle created with <tt>\ref rppCreateWithBatchSize()</tt>
+ * \return A <tt> \ref RppStatus</tt> enumeration.
+ * \retval RPP_SUCCESS Successful completion.
+ * \retval RPP_ERROR* Unsuccessful completion.
+ */
+RppStatus rppt_dilate_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32u kernelSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle);
+
 #ifdef GPU_SUPPORT
 /*! \brief Dilate augmentation on HIP backend for a NCHW/NHWC layout tensor
  * \details The dilate augmentation runs for a batch of RGB(3 channel) / greyscale(1 channel) images with an NHWC/NCHW tensor layout.<br>

diff --git a/src/include/cpu/rpp_cpu_common.hpp b/src/include/cpu/rpp_cpu_common.hpp
@@ -510,24 +510,24 @@ inline int power_function(int a, int b)
     return product;
 }
 
-inline void saturate_pixel(Rpp32f pixel, Rpp8u* dst)
+inline void saturate_pixel(Rpp32f &pixel, Rpp8u* dst)
 {
-    *dst = RPPPIXELCHECK(pixel);
+    *dst = static_cast<Rpp8u>(RPPPIXELCHECK(std::nearbyintf(pixel)));
 }
 
-inline void saturate_pixel(Rpp32f pixel, Rpp8s* dst)
+inline void saturate_pixel(Rpp32f &pixel, Rpp8s* dst)
 {
-    *dst = (Rpp8s)RPPPIXELCHECKI8(pixel - 128);
+    *dst = static_cast<Rpp8s>(RPPPIXELCHECKI8(std::nearbyintf(pixel) - 128));
 }
 
-inline void saturate_pixel(Rpp32f pixel, Rpp32f* dst)
+inline void saturate_pixel(Rpp32f &pixel, Rpp32f* dst)
 {
-    *dst = (Rpp32f)pixel;
+    *dst = RPPPIXELCHECKF32(pixel);
 }
 
-inline void saturate_pixel(Rpp32f pixel, Rpp16f* dst)
+inline void saturate_pixel(Rpp32f &pixel, Rpp16f* dst)
 {
-    *dst = (Rpp16f)pixel;
+    *dst = static_cast<Rpp16f>(RPPPIXELCHECKF32(pixel));
 }
 
 template <typename T>