diff --git a/CHANGELOG.md b/CHANGELOG.md index f1b94b987..3c488f0ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Documentation for MIVisionX is available at * Support for advanced GPUs * Support for PreEmphasis Filter augmentation in openVX extensions * Support for Spectrogram augmentation in openVX extensions +* Support for Downmix and ToDecibels augmentations in openVX extensions ### Optimizations diff --git a/amd_openvx_extensions/amd_rpp/CMakeLists.txt b/amd_openvx_extensions/amd_rpp/CMakeLists.txt index bb091a4f1..e5b74f138 100644 --- a/amd_openvx_extensions/amd_rpp/CMakeLists.txt +++ b/amd_openvx_extensions/amd_rpp/CMakeLists.txt @@ -135,6 +135,7 @@ list(APPEND SOURCES source/tensor/Copy.cpp source/tensor/Crop.cpp source/tensor/CropMirrorNormalize.cpp + source/tensor/Downmix.cpp source/tensor/Exposure.cpp source/tensor/FishEye.cpp source/tensor/Flip.cpp @@ -158,6 +159,7 @@ list(APPEND SOURCES source/tensor/SequenceRearrange.cpp source/tensor/Snow.cpp source/tensor/Spectrogram.cpp + source/tensor/ToDecibels.cpp source/tensor/Vignette.cpp source/tensor/WarpAffine.cpp source/tensor/SequenceRearrange.cpp diff --git a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h index 9c63cb450..29c337dc7 100644 --- a/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h +++ b/amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h @@ -157,6 +157,8 @@ vx_status Vignette_Register(vx_context); vx_status WarpAffine_Register(vx_context); vx_status SequenceRearrange_Register(vx_context); vx_status Spectrogram_Register(vx_context); +vx_status Downmix_Register(vx_context); +vx_status ToDecibels_Register(vx_context); // kernel names #define VX_KERNEL_RPP_NOPBATCHPD_NAME "org.rpp.NopbatchPD" @@ -283,5 +285,7 @@ vx_status Spectrogram_Register(vx_context); #define VX_KERNEL_RPP_SEQUENCEREARRANGE_NAME "org.rpp.SequenceRearrange" #define VX_KERNEL_RPP_PREEMPHASISFILTER_NAME "org.rpp.PreemphasisFilter" #define VX_KERNEL_RPP_SPECTROGRAM_NAME "org.rpp.Spectrogram" +#define VX_KERNEL_RPP_DOWNMIX_NAME "org.rpp.Downmix" +#define VX_KERNEL_RPP_TODECIBELS_NAME "org.rpp.ToDecibels" #endif //_AMDVX_EXT__PUBLISH_KERNELS_H_ diff --git a/amd_openvx_extensions/amd_rpp/include/internal_rpp.h b/amd_openvx_extensions/amd_rpp/include/internal_rpp.h index ee9ea06c9..c80383948 100644 --- a/amd_openvx_extensions/amd_rpp/include/internal_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/internal_rpp.h @@ -43,6 +43,7 @@ THE SOFTWARE. #include #include #include +#include using namespace std; @@ -74,6 +75,18 @@ enum vxTensorLayout { VX_NTF = 6 // Time major, Used for Spectrogram/MelFilterBank }; +const std::map tensorLayoutMapping = { + {vxTensorLayout::VX_NHWC, RpptLayout::NHWC}, + {vxTensorLayout::VX_NCHW, RpptLayout::NCHW}, + {vxTensorLayout::VX_NFHWC, RpptLayout::NHWC}, + {vxTensorLayout::VX_NFCHW, RpptLayout::NCHW}, +#if RPP_AUDIO + {vxTensorLayout::VX_NHW, RpptLayout::NHW}, + {vxTensorLayout::VX_NFT, RpptLayout::NFT}, + {vxTensorLayout::VX_NTF, RpptLayout::NTF} +#endif +}; + //! Brief The utility functions vx_node createNode(vx_graph graph, vx_enum kernelEnum, vx_reference params[], vx_uint32 num); vx_status createRPPHandle(vx_node node, vxRppHandle ** pHandle, Rpp32u batchSize, Rpp32u deviceType); @@ -81,7 +94,6 @@ vx_status releaseRPPHandle(vx_node node, vxRppHandle * handle, Rpp32u deviceType void fillDescriptionPtrfromDims(RpptDescPtr &descPtr, vxTensorLayout layout, size_t *tensorDims); void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *tensorDims, vxTensorLayout layout = vxTensorLayout::VX_NHW); RpptDataType getRpptDataType(vx_enum dataType); -RpptLayout getRpptLayout(vxTensorLayout layout); class Kernellist { diff --git a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h index c34a45804..783d1c8b5 100644 --- a/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/kernels_rpp.h @@ -149,7 +149,9 @@ extern "C" VX_KERNEL_RPP_VIGNETTE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x72, VX_KERNEL_RPP_WARPAFFINE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x73, VX_KERNEL_RPP_PREEMPHASISFILTER = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x74, - VX_KERNEL_RPP_SPECTROGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x75 + VX_KERNEL_RPP_SPECTROGRAM = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x75, + VX_KERNEL_RPP_DOWNMIX = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x76, + VX_KERNEL_RPP_TODECIBELS = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x77 }; #ifdef __cplusplus diff --git a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h index a28891d1a..c93ae8935 100644 --- a/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h +++ b/amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h @@ -1896,6 +1896,31 @@ extern "C" */ SHARED_PUBLIC vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_tensor pDstRoi, vx_array windowFunction, vx_scalar centerWindow, vx_scalar reflectPadding, vx_scalar spectrogramLayout, vx_scalar power, vx_scalar nfft, vx_scalar windowLength, vx_scalar windowStep); + /*! \brief [Graph] Applies downmixing to the input tensor. + * \ingroup group_amd_rpp + * \param [in] graph The handle to the graph. + * \param [in] pSrc The input tensor in \ref VX_TYPE_FLOAT32 format data. + * \param [out] pDst The output tensor in \ref VX_TYPE_FLOAT32 format data. + * \param [in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input. + * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus. + */ + SHARED_PUBLIC vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor srcRoi); + + /*! \brief [Graph] Applies to_decibels augmentation to the input tensor. + * \ingroup group_amd_rpp + * \param [in] graph The handle to the graph. + * \param [in] pSrc The input tensor in \ref VX_TYPE_FLOAT32 format data. + * \param[in] pSrcRoi The input tensor of batch size in unsigned int containing the roi values for the input. + * \param [out] pDst The output tensor in \ref VX_TYPE_FLOAT32 format data. + * \param[in] cutOffDB The input scalar in \ref VX_TYPE_FLOAT32 format containing minimum or cut-off ratio in dB + * \param[in] multiplier The input scalar in \ref VX_TYPE_FLOAT32 format containing factor by which the logarithm is multiplied + * \param[in] referenceMagnitude The input scalar in \ref VX_TYPE_FLOAT32 format containing Reference magnitude which if not provided uses maximum value of input as reference + * \param [in] inputLayout The input layout in \ref VX_TYPE_INT32 denotes the layout of input tensor. + * \param [in] outputLayout The output layout in \ref VX_TYPE_INT32 denotes the layout of output tensor. + * \return A node reference \ref vx_node. Any possible errors preventing a successful creation should be checked using \ref vxGetStatus. + */ + SHARED_PUBLIC vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude, vx_scalar inputLayout, vx_scalar outputLayout); + #ifdef __cplusplus } #endif diff --git a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp index f01a35d48..cfd574fa2 100644 --- a/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp +++ b/amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp @@ -162,6 +162,8 @@ vx_status get_kernels_to_publish() STATUS_ERROR_CHECK(ADD_KERNEL(Vignette_Register)); STATUS_ERROR_CHECK(ADD_KERNEL(WarpAffine_Register)); STATUS_ERROR_CHECK(ADD_KERNEL(Spectrogram_Register)); + STATUS_ERROR_CHECK(ADD_KERNEL(Downmix_Register)); + STATUS_ERROR_CHECK(ADD_KERNEL(ToDecibels_Register)); return status; } diff --git a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp index 050bf1515..99607e2c5 100644 --- a/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp +++ b/amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp @@ -2584,6 +2584,43 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppSpectrogram(vx_graph graph, vx_tensor p return node; } +VX_API_ENTRY vx_node VX_API_CALL vxExtRppDownmix(vx_graph graph, vx_tensor pSrc, vx_tensor pDst, vx_tensor pSrcRoi) { + vx_node node = NULL; + vx_context context = vxGetContext((vx_reference)graph); + if (vxGetStatus((vx_reference)context) == VX_SUCCESS) { + vx_uint32 devType = getGraphAffinity(graph); + vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType); + vx_reference params[] = { + (vx_reference)pSrc, + (vx_reference)pDst, + (vx_reference)pSrcRoi, + (vx_reference)deviceType}; + node = createNode(graph, VX_KERNEL_RPP_DOWNMIX, params, 4); + } + return node; +} + +VX_API_ENTRY vx_node VX_API_CALL vxExtRppToDecibels(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar cutOffDB, vx_scalar multiplier, vx_scalar referenceMagnitude, vx_scalar inputLayout, vx_scalar outputLayout) { + vx_node node = NULL; + vx_context context = vxGetContext((vx_reference)graph); + if (vxGetStatus((vx_reference)context) == VX_SUCCESS) { + vx_uint32 devType = getGraphAffinity(graph); + vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devType); + vx_reference params[] = { + (vx_reference)pSrc, + (vx_reference)pSrcRoi, + (vx_reference)pDst, + (vx_reference)cutOffDB, + (vx_reference)multiplier, + (vx_reference)referenceMagnitude, + (vx_reference)inputLayout, + (vx_reference)outputLayout, + (vx_reference)deviceType}; + node = createNode(graph, VX_KERNEL_RPP_TODECIBELS, params, 9); + } + return node; +} + RpptDataType getRpptDataType(vx_enum vxDataType) { switch(vxDataType) { case vx_type_e::VX_TYPE_FLOAT32: @@ -2597,34 +2634,6 @@ RpptDataType getRpptDataType(vx_enum vxDataType) { } } -RpptLayout getRpptLayout(vxTensorLayout layout) { - switch(layout) { - case vxTensorLayout::VX_NHWC: - return RpptLayout::NHWC; - case vxTensorLayout::VX_NCHW: - return RpptLayout::NCHW; - case vxTensorLayout::VX_NFHWC: - return RpptLayout::NHWC; - case vxTensorLayout::VX_NFCHW: - return RpptLayout::NCHW; -#if RPP_AUDIO - case vxTensorLayout::VX_NHW: - return RpptLayout::NHW; - case vxTensorLayout::VX_NFT: - return RpptLayout::NFT; - case vxTensorLayout::VX_NTF: - return RpptLayout::NTF; -#else - case vxTensorLayout::VX_NHW: - case vxTensorLayout::VX_NFT: - case vxTensorLayout::VX_NTF: - throw std::runtime_error("RPP_AUDIO flag disabled, Audio layouts are not supported"); -#endif - default: - throw std::runtime_error("Invalid layout"); - } -} - void fillDescriptionPtrfromDims(RpptDescPtr &descPtr, vxTensorLayout layout, size_t *tensorDims) { switch(layout) { case vxTensorLayout::VX_NHWC: { @@ -2691,7 +2700,11 @@ void fillAudioDescriptionPtrFromDims(RpptDescPtr &descPtr, size_t *maxTensorDims descPtr->strides.wStride = descPtr->c; descPtr->strides.cStride = 1; descPtr->numDims = 4; - descPtr->layout = getRpptLayout(layout); + if(tensorLayoutMapping.find(layout) != tensorLayoutMapping.end()) { + descPtr->layout = tensorLayoutMapping.at(layout); + } else { + throw std::runtime_error("Invalid layout"); + } } // utility functions diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp new file mode 100644 index 000000000..5ed6924c0 --- /dev/null +++ b/amd_openvx_extensions/amd_rpp/source/tensor/Downmix.cpp @@ -0,0 +1,213 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "internal_publishKernels.h" +#include "vx_ext_amd.h" + +struct DownmixLocalData { + vxRppHandle *handle; + Rpp32u deviceType; + RppPtr_t pSrc; + RppPtr_t pDst; + vx_int32 *pSrcRoi; + RpptDescPtr pSrcDesc; + RpptDescPtr pDstDesc; + size_t inputTensorDims[RPP_MAX_TENSOR_DIMS]; + size_t outputTensorDims[RPP_MAX_TENSOR_DIMS]; +}; + +static vx_status VX_CALLBACK refreshDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num, DownmixLocalData *data) { + vx_status status = VX_SUCCESS; + if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_OPENCL || ENABLE_HIP + return VX_ERROR_NOT_IMPLEMENTED; +#endif + } + void *roi_tensor_ptr_src; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); + RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); + for (int n = 0; n < data->inputTensorDims[0]; n++) { + data->pSrcRoi[n * 2] = src_roi[n].xywhROI.roiWidth; + data->pSrcRoi[n * 2 + 1] = src_roi[n].xywhROI.roiHeight; + } + return status; +} + +static vx_status VX_CALLBACK validateDownmix(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_UINT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); + + // Check for input parameters + size_t num_tensor_dims; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Downmix: tensor: #0 dimensions=%lu (must be greater than or equal to 3)\n", num_tensor_dims); + + // Check for output parameters + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Downmix: tensor: #1 dimensions=%lu (must be greater than or equal to 3)\n", num_tensor_dims); + + vx_uint8 tensor_fixed_point_position; + size_t tensor_dims[RPP_MAX_TENSOR_DIMS]; + vx_enum tensor_datatype; + + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[1], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position))); + return status; +} + +static vx_status VX_CALLBACK processDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num) { + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + DownmixLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_OPENCL || ENABLE_HIP + return VX_ERROR_NOT_IMPLEMENTED; +#endif + } + if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { + refreshDownmix(node, parameters, num, data); +#if RPP_AUDIO + rpp_status = rppt_down_mixing_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, (Rpp32s *)data->pSrcRoi, false, data->handle->rppHandle); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#else + return_status = VX_ERROR_NOT_SUPPORTED; +#endif + } + return return_status; +} + +static vx_status VX_CALLBACK initializeDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num) { + DownmixLocalData *data = new DownmixLocalData; + if (data) { + memset(data, 0, sizeof(DownmixLocalData)); + + vx_enum input_tensor_datatype, output_tensor_datatype; + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + + // Querying for input tensor + data->pSrcDesc = new RpptDesc; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims)); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_datatype, sizeof(input_tensor_datatype))); + data->pSrcDesc->dataType = getRpptDataType(input_tensor_datatype); + data->pSrcDesc->offsetInBytes = 0; + fillAudioDescriptionPtrFromDims(data->pSrcDesc, data->inputTensorDims); + + // Querying for output tensor + data->pDstDesc = new RpptDesc; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims)); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_DATA_TYPE, &output_tensor_datatype, sizeof(output_tensor_datatype))); + data->pDstDesc->dataType = getRpptDataType(output_tensor_datatype); + data->pDstDesc->offsetInBytes = 0; + fillAudioDescriptionPtrFromDims(data->pDstDesc, data->outputTensorDims); + + data->pSrcRoi = new vx_int32[data->pSrcDesc->n * 2]; + refreshDownmix(node, parameters, num, data); + STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; + } else { + return VX_FAILURE; + } +} + +static vx_status VX_CALLBACK uninitializeDownmix(vx_node node, const vx_reference *parameters, vx_uint32 num) { + DownmixLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + if (data->pSrcRoi) delete[] data->pSrcRoi; + if (data->pSrcDesc) delete data->pSrcDesc; + if (data->pDstDesc) delete data->pDstDesc; + STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType)); + if (data) delete data; + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) { + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + + // hardcode the affinity to CPU for OpenCL backend to avoid VerifyGraph failure since there is no codegen callback for amd_rpp nodes + + return VX_SUCCESS; +} + +vx_status Downmix_Register(vx_context context) { + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Downmix", + VX_KERNEL_RPP_DOWNMIX, + processDownmix, + 4, + validateDownmix, + initializeDownmix, + uninitializeDownmix); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_HIP + // enable OpenCL buffer access since the kernel_f callback uses OpenCL buffers instead of host accessible buffers + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); +#else + vx_bool enableBufferAccess = vx_false_e; +#endif + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + + if (kernel) { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + + return status; +} diff --git a/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp new file mode 100644 index 000000000..d09d08d43 --- /dev/null +++ b/amd_openvx_extensions/amd_rpp/source/tensor/ToDecibels.cpp @@ -0,0 +1,238 @@ +/* +Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +#include "internal_publishKernels.h" + +struct ToDecibelsLocalData { + vxRppHandle *handle; + Rpp32u deviceType; + RppPtr_t pSrc; + RppPtr_t pDst; + Rpp32f cutOffDB; + Rpp32f multiplier; + Rpp32f referenceMagnitude; + RpptDescPtr pSrcDesc; + RpptDescPtr pDstDesc; + RpptImagePatch *pSrcDims; + vxTensorLayout inputLayout; + vxTensorLayout outputLayout; + size_t inputTensorDims[RPP_MAX_TENSOR_DIMS]; + size_t outputTensorDims[RPP_MAX_TENSOR_DIMS]; +}; + +static vx_status VX_CALLBACK refreshToDecibels(vx_node node, const vx_reference *parameters, vx_uint32 num, ToDecibelsLocalData *data) { + vx_status status = VX_SUCCESS; + if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_OPENCL || ENABLE_HIP + return VX_ERROR_NOT_IMPLEMENTED; +#endif + } + void *roi_tensor_ptr_src; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr_src, sizeof(roi_tensor_ptr_src))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst))); + RpptROI *src_roi = reinterpret_cast(roi_tensor_ptr_src); + for (unsigned i = 0; i < data->inputTensorDims[0]; i++) { + data->pSrcDims[i].width = src_roi[i].xywhROI.roiWidth; + data->pSrcDims[i].height = src_roi[i].xywhROI.roiHeight; + } + return status; +} + +static vx_status VX_CALLBACK validateToDecibels(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) { + vx_status status = VX_SUCCESS; + vx_enum scalar_type; + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_FLOAT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #3 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_FLOAT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #4 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[5], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_FLOAT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Paramter: #5 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[6], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_INT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #6 type=%d (must be size)\n", scalar_type); + STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[7], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type))); + if (scalar_type != VX_TYPE_INT32) + return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #7 type=%d (must be size)\n", scalar_type); + + // Check for input parameters + size_t num_tensor_dims; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: ToDecibels: tensor: #0 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims); + + // Check for output parameters + vx_uint8 tensor_fixed_point_position; + size_t tensor_dims[RPP_MAX_TENSOR_DIMS]; + vx_enum tensor_datatype; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: ToDecibels: tensor: #2 dimensions=%lu (must be greater than or equal to 4)\n", num_tensor_dims); + + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype))); + STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position))); + return status; +} + +static vx_status VX_CALLBACK processToDecibels(vx_node node, const vx_reference *parameters, vx_uint32 num) { + RppStatus rpp_status = RPP_SUCCESS; + vx_status return_status = VX_SUCCESS; + ToDecibelsLocalData *data = NULL; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + refreshToDecibels(node, parameters, num, data); + if (data->deviceType == AGO_TARGET_AFFINITY_GPU) { +#if ENABLE_OPENCL || ENABLE_HIP + return_status = VX_ERROR_NOT_IMPLEMENTED; +#endif + } + if (data->deviceType == AGO_TARGET_AFFINITY_CPU) { +#if RPP_AUDIO + rpp_status = rppt_to_decibels_host(data->pSrc, data->pSrcDesc, data->pDst, data->pDstDesc, data->pSrcDims, data->cutOffDB, data->multiplier, data->referenceMagnitude, data->handle->rppHandle); + return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE; +#else + return_status = VX_ERROR_NOT_SUPPORTED; +#endif + } + return return_status; +} + +static vx_status VX_CALLBACK initializeToDecibels(vx_node node, const vx_reference *parameters, vx_uint32 num) { + ToDecibelsLocalData *data = new ToDecibelsLocalData; + if (data) { + memset(data, 0, sizeof(ToDecibelsLocalData)); + + vx_enum input_tensor_datatype, output_tensor_datatype; + vx_int32 input_layout, output_layout; + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[3], &data->cutOffDB)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[4], &data->multiplier)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[5], &data->referenceMagnitude)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[6], &input_layout)); + STATUS_ERROR_CHECK(vxReadScalarValue((vx_scalar)parameters[7], &output_layout)); + STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[8], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST)); + data->inputLayout = static_cast(input_layout); + data->outputLayout = static_cast(output_layout); + + // Querying for input tensor + data->pSrcDesc = new RpptDesc; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcDesc->numDims, sizeof(data->pSrcDesc->numDims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcDesc->numDims)); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_datatype, sizeof(input_tensor_datatype))); + data->pSrcDesc->dataType = getRpptDataType(input_tensor_datatype); + data->pSrcDesc->offsetInBytes = 0; + fillAudioDescriptionPtrFromDims(data->pSrcDesc, data->inputTensorDims); + + // Querying for output tensor + data->pDstDesc = new RpptDesc; + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstDesc->numDims, sizeof(data->pDstDesc->numDims))); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstDesc->numDims)); + STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_datatype, sizeof(output_tensor_datatype))); + data->pDstDesc->dataType = getRpptDataType(output_tensor_datatype); + data->pDstDesc->offsetInBytes = 0; + fillAudioDescriptionPtrFromDims(data->pDstDesc, data->outputTensorDims); + + data->pSrcDims = new RpptImagePatch[data->pSrcDesc->n]; + refreshToDecibels(node, parameters, num, data); + STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->pSrcDesc->n, data->deviceType)); + STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + return VX_SUCCESS; + } else { + return VX_FAILURE; + } +} + +static vx_status VX_CALLBACK uninitializeToDecibels(vx_node node, const vx_reference *parameters, vx_uint32 num) { + ToDecibelsLocalData *data; + STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data))); + if (data->pSrcDims) delete[] data->pSrcDims; + if (data->pSrcDesc) delete data->pSrcDesc; + if (data->pDstDesc) delete data->pDstDesc; + STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType)); + if (data) delete data; + return VX_SUCCESS; +} + +//! \brief The kernel target support callback. +// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph +static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node, + vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2 + vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU) +) { + vx_context context = vxGetContext((vx_reference)graph); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + supported_target_affinity = AGO_TARGET_AFFINITY_GPU; + else + supported_target_affinity = AGO_TARGET_AFFINITY_CPU; + + return VX_SUCCESS; +} + +vx_status ToDecibels_Register(vx_context context) { + vx_status status = VX_SUCCESS; + // Add kernel to the context with callbacks + vx_kernel kernel = vxAddUserKernel(context, "org.rpp.ToDecibels", + VX_KERNEL_RPP_TODECIBELS, + processToDecibels, + 9, + validateToDecibels, + initializeToDecibels, + uninitializeToDecibels); + ERROR_CHECK_OBJECT(kernel); + AgoTargetAffinityInfo affinity; + vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity)); +#if ENABLE_HIP + vx_bool enableBufferAccess = vx_true_e; + if (affinity.device_type == AGO_TARGET_AFFINITY_GPU) + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess))); +#else + vx_bool enableBufferAccess = vx_false_e; +#endif + amd_kernel_query_target_support_f query_target_support_f = query_target_support; + + if (kernel) { + STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f))); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 5, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 6, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 7, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 8, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED)); + PARAM_ERROR_CHECK(vxFinalizeKernel(kernel)); + } + if (status != VX_SUCCESS) { + exit: + vxRemoveKernel(kernel); + return VX_FAILURE; + } + + return status; +} \ No newline at end of file