From 22a39a4a3c098e6b4433ecd4df9c515aa0149dcc Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Fri, 26 Jul 2024 07:39:51 +0000 Subject: [PATCH 01/31] initial commit --- include/rppt_tensor_filter_augmentations.h | 22 ++++++ .../cpu/host_tensor_filter_augmentations.hpp | 30 ++++++++ src/modules/cpu/kernel/sobel_filter.hpp | 53 ++++++++++++++ .../rppt_tensor_filter_augmentations.cpp | 70 +++++++++++++++++++ 4 files changed, 175 insertions(+) create mode 100644 src/modules/cpu/host_tensor_filter_augmentations.hpp create mode 100644 src/modules/cpu/kernel/sobel_filter.hpp diff --git a/include/rppt_tensor_filter_augmentations.h b/include/rppt_tensor_filter_augmentations.h index 992631c49..e3d8d40d8 100644 --- a/include/rppt_tensor_filter_augmentations.h +++ b/include/rppt_tensor_filter_augmentations.h @@ -93,6 +93,28 @@ RppStatus rppt_box_filter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t RppStatus rppt_gaussian_filter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *stdDevTensor, Rpp32u kernelSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle); #endif // GPU_SUPPORT +/*! \brief Sobel Filter augmentation on HOST backend for a NCHW layout tensor + * \details The sobel filter augmentation runs for a batch of greyscale(1 channel) images with NCHW tensor layout.
+ * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127). + * - dstPtr depth ranges - Will be same depth as srcPtr. + * \image html img150x150.png Sample Input + * \image html filter_augmentations_sobel_filter_kSize3_img150x150.png Sample 3x3 Output + * \image html filter_augmentations_sobel_filter_kSize5_img150x150.png Sample 5x5 Output + * \image html filter_augmentations_sobel_filter_kSize7_img150x150.png Sample 7x7 Output + * \param [in] srcPtr source tensor in HOST memory + * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3) + * \param [out] dstPtr destination tensor in HOST memory + * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr) + * \param [in] kernelSize kernel size for sobel filter (a single Rpp32u odd number with kernelSize = 3/5/7/9 that applies to all images in the batch) + * \param [in] roiTensorSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) + * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) + * \param [in] rppHandle RPP HOST handle created with \ref rppCreateWithBatchSize() + * \return A \ref RppStatus enumeration. + * \retval RPP_SUCCESS Successful completion. + * \retval RPP_ERROR* Unsuccessful completion. + */ +RppStatus rppt_sobel_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32u sobelType, Rpp32u kernelSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle); + /*! @} */ diff --git a/src/modules/cpu/host_tensor_filter_augmentations.hpp b/src/modules/cpu/host_tensor_filter_augmentations.hpp new file mode 100644 index 000000000..c453e3b65 --- /dev/null +++ b/src/modules/cpu/host_tensor_filter_augmentations.hpp @@ -0,0 +1,30 @@ +/* +MIT License + +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef HOST_TENSOR_FILTER_AUGMENTATIONS_HPP +#define HOST_TENSOR_FILTER_AUGMENTATIONS_HPP + +#include "kernel/sobel_filter.hpp" + +#endif // HOST_TENSOR_FILTER_AUGMENTATIONS_HPP \ No newline at end of file diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp new file mode 100644 index 000000000..4f15027e0 --- /dev/null +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -0,0 +1,53 @@ +/* +MIT License + +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#include "rppdefs.h" +#include "rpp_cpu_simd.hpp" +#include "rpp_cpu_common.hpp" + +RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, + RpptDescPtr srcDescPtr, + Rpp8u *dstPtr, + RpptDescPtr dstDescPtr, + Rpp32u sobelType, + Rpp32u kernelSize, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + RppLayoutParams layoutParams, + rpp::Handle& handle) +{ + RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; + Rpp32u numThreads = handle.GetNumThreads(); + + omp_set_dynamic(0); +#pragma omp parallel for num_threads(numThreads) + for(int batchCount = 0; batchCount < dstDescPtr->n; batchCount++) + { + RpptROI roi; + RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount]; + compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType); + } + + return RPP_SUCCESS; +} \ No newline at end of file diff --git a/src/modules/rppt_tensor_filter_augmentations.cpp b/src/modules/rppt_tensor_filter_augmentations.cpp index 5d06840dc..833c7bea1 100644 --- a/src/modules/rppt_tensor_filter_augmentations.cpp +++ b/src/modules/rppt_tensor_filter_augmentations.cpp @@ -25,12 +25,82 @@ SOFTWARE. #include "rppdefs.h" #include "rppi_validate.hpp" #include "rppt_tensor_filter_augmentations.h" +#include "cpu/host_tensor_filter_augmentations.hpp" #ifdef HIP_COMPILE #include #include "hip/hip_tensor_filter_augmentations.hpp" #endif // HIP_COMPILE +/******************** sobel_filter ********************/ + +RppStatus rppt_sobel_filter_host(RppPtr_t srcPtr, + RpptDescPtr srcDescPtr, + RppPtr_t dstPtr, + RpptDescPtr dstDescPtr, + Rpp32u sobelType, + Rpp32u kernelSize, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + rppHandle_t rppHandle) +{ + RppLayoutParams layoutParams = get_layout_params(srcDescPtr->layout, srcDescPtr->c); + + if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8)) + { + sobel_filter_u8_u8_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, + srcDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + sobelType, + kernelSize, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) + { + // sobel_filter_f16_f16_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), + // srcDescPtr, + // reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + // dstDescPtr, + // sobelType, + // kernelSize, + // roiTensorPtrSrc, + // roiType, + // layoutParams, + // rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) + { + // sobel_filter_f32_f32_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), + // srcDescPtr, + // reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + // dstDescPtr, + // sobelType, + // kernelSize, + // roiTensorPtrSrc, + // roiType, + // layoutParams, + // rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) + { + // sobel_filter_i8_i8_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, + // srcDescPtr, + // static_cast(dstPtr) + dstDescPtr->offsetInBytes, + // dstDescPtr, + // sobelType, + // kernelSize, + // roiTensorPtrSrc, + // roiType, + // layoutParams, + // rpp::deref(rppHandle)); + } + return RPP_SUCCESS; +} + /********************************************************************************************************************/ /*********************************************** RPP_GPU_SUPPORT = ON ***********************************************/ /********************************************************************************************************************/ From 568a71a1e8430f7415185bf51c1ba37fc362416d Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Fri, 26 Jul 2024 13:15:07 +0000 Subject: [PATCH 02/31] added initial support for 3x3 kernel gradient x variant --- src/include/cpu/rpp_cpu_filter.hpp | 568 +++++++++++++++++++ src/modules/cpu/kernel/sobel_filter.hpp | 139 +++++ utilities/test_suite/HOST/Tensor_host.cpp | 15 + utilities/test_suite/rpp_test_suite_common.h | 1 + 4 files changed, 723 insertions(+) create mode 100644 src/include/cpu/rpp_cpu_filter.hpp diff --git a/src/include/cpu/rpp_cpu_filter.hpp b/src/include/cpu/rpp_cpu_filter.hpp new file mode 100644 index 000000000..58b296ec0 --- /dev/null +++ b/src/include/cpu/rpp_cpu_filter.hpp @@ -0,0 +1,568 @@ +/* +MIT License + +Copyright (c) 2019 - 2024 Advanced Micro Devices, Inc. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef AMD_RPP_RPP_CPU_FILTER_HPP +#define AMD_RPP_RPP_CPU_FILTER_HPP + +#include "stdio.h" +#include "rppdefs.h" +#include +using halfhpp = half_float::half; +typedef halfhpp Rpp16f; +#include "rpp_cpu_simd.hpp" + +#if _WIN32 +#include +#else +#include +#include +#include +#endif + +const __m128i xmm_pxMaskRotate0To1 = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1); +const __m128i xmm_pxMaskRotate0To3 = _mm_setr_epi8(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3); +const __m128i xmm_pxMaskRotate0To5 = _mm_setr_epi8(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5); +const __m128i xmm_pxMaskRotate0To7 = _mm_setr_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7); +const __m128i xmm_pxMaskRotate0To9 = _mm_setr_epi8(10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); +const __m128i xmm_pxMaskRotate0To11 = _mm_setr_epi8(12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11); +const __m128i xmm_pxMaskRotate0To13 = _mm_setr_epi8(14, 15, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13); + +const __m256i avx_pxMaskRotate0To1 = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0); +const __m256i avx_pxMaskRotate0To2 = _mm256_setr_epi32(2, 3, 4, 5, 6, 7, 0, 1); +const __m256i avx_pxMaskRotate0To3 = _mm256_setr_epi32(3, 4, 5, 6, 7, 0, 1, 2); +const __m256i avx_pxMaskRotate0To4 = _mm256_setr_epi32(4, 5, 6, 7, 0, 1, 2, 3); +const __m256i avx_pxMaskRotate0To5 = _mm256_setr_epi32(5, 6, 7, 0, 1, 2, 3, 4); +const __m256i avx_pxMaskRotate0To6 = _mm256_setr_epi32(6, 7, 0, 1, 2, 3, 4, 5); +const __m256i avx_pxMaskRotate0To7 = _mm256_setr_epi32(7, 0, 1, 2, 3, 4, 5, 6); + +template +inline void increment_row_ptrs(T **srcPtrTemp, Rpp32u kernelSize, Rpp32s increment) +{ + for (int i = 0; i < kernelSize; i++) + srcPtrTemp[i] += increment; +} + +// get the kernel loop limit based on index +inline void get_kernel_loop_limit(Rpp32s &index, Rpp32s &loopLimit, Rpp32u &padLength, Rpp32u &unpaddedLength) +{ + if ((index < padLength) || (index >= unpaddedLength)) + { + Rpp32u factor = (index < padLength) ? (index - padLength) : (unpaddedLength - 1 - index); + loopLimit += factor; + } +} + +// extract 4 SSE registers from 2 AVX registers +inline void extract_4sse_registers(__m256i *pxRowHalf, __m128i *px128) +{ + px128[0] = _mm256_castsi256_si128(pxRowHalf[0]); + px128[1] = _mm256_castsi256_si128(pxRowHalf[1]); + px128[2] = _mm256_extracti128_si256(pxRowHalf[0], 1); + px128[3] = _mm256_extracti128_si256(pxRowHalf[1], 1); +} + +// extract 3 SSE registers from 2 AVX registers +inline void extract_3sse_registers(__m256i *pxRowHalf, __m128i *px128) +{ + px128[0] = _mm256_castsi256_si128(pxRowHalf[0]); + px128[1] = _mm256_castsi256_si128(pxRowHalf[1]); + px128[2] = _mm256_extracti128_si256(pxRowHalf[0], 1); +} + +// -------------------- U8/I8 bitdepth compute functions for kernel size (3/5/7/9) -------------------- + +inline void blend_shuffle_add_3x3_pln_host(__m128i *px128) +{ + /* px128[0] - [X01|X02|X03|X04|X05|X06|X07|X08] + px128[1] - [X09|X10|X11|X12|X13|X14|X15|X16] */ + __m128i pxTemp[2]; + pxTemp[0] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 1), xmm_pxMaskRotate0To1); // blend with mask [0000 0001] and shuffle - [X02|X03|X04|X05|X06|X07|X08|X09] + pxTemp[1] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 3), xmm_pxMaskRotate0To3); // blend with mask [0000 0011] and shuffle - [X03|X04|X05|X06|X07|X08|X09|X10] + px128[0] = _mm_add_epi16(px128[0], pxTemp[0]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[1]); +} + +inline void blend_shuffle_add_3x3_pkd_host(__m128i *px128) +{ + /* px128[0] - [R01|G01|B01|R02|G02|B02|R03|G03] + px128[1] - [B03|R04|G04|B04|R05|G05|B05|R06] */ + __m128i pxTemp[2]; + pxTemp[0] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 7), xmm_pxMaskRotate0To5); // blend with mask [0000 0111] and shuffle - [R02|G02|B02|R03|G03|B03|R04|G04] + pxTemp[1] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 63), xmm_pxMaskRotate0To11); // blend with mask [0011 1111] and shuffle - [R03|G03|B03|R04|G04|B04|R05|G05] + px128[0] = _mm_add_epi16(px128[0], pxTemp[0]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[1]); +} + +inline void blend_shuffle_add_5x5_pln_host(__m128i *px128) +{ + /* px128[0] - [X01|X02|X03|X04|X05|X06|X07|X08] + px128[1] - [X09|X10|X11|X12|X13|X14|X15|X16] */ + __m128i pxTemp[4]; + pxTemp[0] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 1), xmm_pxMaskRotate0To1); // blend with mask [0000 0001] and shuffle - [X02|X03|X04|X05|X06|X07|X08|X09] + pxTemp[1] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 3), xmm_pxMaskRotate0To3); // blend with mask [0000 0011] and shuffle - [X03|X04|X05|X06|X07|X08|X09|X10] + pxTemp[2] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 7), xmm_pxMaskRotate0To5); // blend with mask [0000 0111] and shuffle - [X04|X05|X06|X07|X08|X09|X10|X11] + pxTemp[3] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 15), xmm_pxMaskRotate0To7); // blend with mask [0000 1111] and shuffle - [X05|X06|X07|X08|X09|X10|X11|X12] + px128[0] = _mm_add_epi16(px128[0], pxTemp[0]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[1]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[2]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[3]); +} + +inline void blend_shuffle_add_5x5_pkd_host(__m128i *px128) +{ + /* px128[0] - [R01|G01|B01|R02|G02|B02|R03|G03] + px128[1] - [B03|R04|G04|B04|R05|G05|B05|R06] + px128[2] - [G06|B06|R07|G07|B07|R08|G08|B08] */ + __m128i pxTemp[4]; + pxTemp[0] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 7), xmm_pxMaskRotate0To5); // blend with mask [0000 0111] and shuffle - [R02|G02|B02|R03|G03|B03|R04|G04] + pxTemp[1] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 63), xmm_pxMaskRotate0To11); // blend with mask [0011 1111] and shuffle - [R03|G03|B03|R04|G04|B04|R05|G05] + pxTemp[2] = _mm_shuffle_epi8(_mm_blend_epi16(px128[1], px128[2], 1), xmm_pxMaskRotate0To1); // blend with mask [0000 0001] and shuffle - [R04|G04|B04|R05|G05|B05|R06|G06] + pxTemp[3] = _mm_shuffle_epi8(_mm_blend_epi16(px128[1], px128[2], 15), xmm_pxMaskRotate0To7); // blend with mask [0000 1111] and shuffle - [R05|G05|B05|R06|G06|B06|R07|G07] + px128[0] = _mm_add_epi16(px128[0], pxTemp[0]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[1]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[2]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[3]); +} + +inline void blend_shuffle_add_7x7_pln_host(__m128i *px128) +{ + /* px128[0] - [X01|X02|X03|X04|X05|X06|X07|X08] + px128[1] - [X09|X10|X11|X12|X13|X14|X15|X16] */ + __m128i pxTemp[6]; + pxTemp[0] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 1), xmm_pxMaskRotate0To1); // blend with mask [0000 0001] and shuffle - [X02|X03|X04|X05|X06|X07|X08|X09] + pxTemp[1] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 3), xmm_pxMaskRotate0To3); // blend with mask [0000 0011] and shuffle - [X03|X04|X05|X06|X07|X08|X09|X10] + pxTemp[2] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 7), xmm_pxMaskRotate0To5); // blend with mask [0000 0111] and shuffle - [X04|X05|X06|X07|X08|X09|X10|X11] + pxTemp[3] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 15), xmm_pxMaskRotate0To7); // blend with mask [0000 1111] and shuffle - [X05|X06|X07|X08|X09|X10|X11|X12] + pxTemp[4] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 31), xmm_pxMaskRotate0To9); // blend with mask [0001 1111] and shuffle - [X06|X07|X08|X09|X10|X11|X12|X13] + pxTemp[5] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 63), xmm_pxMaskRotate0To11); // blend with mask [0011 1111] and shuffle - [X07|X08|X09|X10|X11|X12|X13|X14] + px128[0] = _mm_add_epi16(px128[0], pxTemp[0]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[1]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[2]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[3]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[4]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[5]); +} + +inline void blend_shuffle_add_7x7_pkd_host(__m128i *px128) +{ + /* px128[0] - [R01|G01|B01|R02|G02|B02|R03|G03] + px128[1] - [B03|R04|G04|B04|R05|G05|B05|R06] + px128[2] - [G06|B06|R07|G07|B07|R08|G08|B08] + px128[3] - [R09|G09|B09|R10|G10|B10|R11|G11] */ + __m128i pxTemp[6]; + pxTemp[0] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 7), xmm_pxMaskRotate0To5); // blend with mask [0000 0111] and shuffle - [R02|G02|B02|R03|G03|B03|R04|G04] + pxTemp[1] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 63), xmm_pxMaskRotate0To11); // blend with mask [0011 1111] and shuffle - [R03|G03|B03|R04|G04|B04|R05|G05] + pxTemp[2] = _mm_shuffle_epi8(_mm_blend_epi16(px128[1], px128[2], 1), xmm_pxMaskRotate0To1); // blend with mask [0000 0001] and shuffle - [R04|G04|B04|R05|G05|B05|R06|G06] + pxTemp[3] = _mm_shuffle_epi8(_mm_blend_epi16(px128[1], px128[2], 15), xmm_pxMaskRotate0To7); // blend with mask [0000 1111] and shuffle - [R05|G05|B05|R06|G06|B06|R07|G07] + pxTemp[4] = _mm_shuffle_epi8(_mm_blend_epi16(px128[1], px128[2], 127), xmm_pxMaskRotate0To13); // blend with mask [0111 1111] and shuffle - [R06|G06|B06|R07|G07|B07|R08|G08] + pxTemp[5] = _mm_shuffle_epi8(_mm_blend_epi16(px128[2], px128[3], 3), xmm_pxMaskRotate0To3); // blend with mask [0000 0011] and shuffle - [R07|G07|B07|R08|G08|B08|R09|G09] + px128[0] = _mm_add_epi16(px128[0], pxTemp[0]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[1]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[2]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[3]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[4]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[5]); +} + +inline void blend_shuffle_add_9x9_pln_host(__m128i *px128) +{ + /* px128[0] - [X01|X02|X03|X04|X05|X06|X07|X08] + px128[1] - [X09|X10|X11|X12|X13|X14|X15|X16] */ + __m128i pxTemp[7]; + pxTemp[0] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 1), xmm_pxMaskRotate0To1); // blend with mask [0000 0001] and shuffle - [X02|X03|X04|X05|X06|X07|X08|X09] + pxTemp[1] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 3), xmm_pxMaskRotate0To3); // blend with mask [0000 0011] and shuffle - [X03|X04|X05|X06|X07|X08|X09|X10] + pxTemp[2] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 7), xmm_pxMaskRotate0To5); // blend with mask [0000 0111] and shuffle - [X04|X05|X06|X07|X08|X09|X10|X11] + pxTemp[3] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 15), xmm_pxMaskRotate0To7); // blend with mask [0000 1111] and shuffle - [X05|X06|X07|X08|X09|X10|X11|X12] + pxTemp[4] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 31), xmm_pxMaskRotate0To9); // blend with mask [0001 1111] and shuffle - [X06|X07|X08|X09|X10|X11|X12|X13] + pxTemp[5] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 63), xmm_pxMaskRotate0To11); // blend with mask [0011 1111] and shuffle - [X07|X08|X09|X10|X11|X12|X13|X14] + pxTemp[6] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 127), xmm_pxMaskRotate0To13); // blend with mask [0111 1111] and shuffle - [X08|X09|X10|X11|X12|X13|X14|X15] + px128[0] = _mm_add_epi16(px128[0], pxTemp[0]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[1]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[2]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[3]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[4]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[5]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[6]); + px128[0] = _mm_add_epi16(px128[0], px128[1]); +} +inline void blend_shuffle_add_9x9_pkd_host(__m128i *px128) +{ + /* px128[0] - [R01|G01|B01|R02|G02|B02|R03|G03] + px128[1] - [B03|R04|G04|B04|R05|G05|B05|R06] + px128[2] - [G06|B06|R07|G07|B07|R08|G08|B08] + px128[3] - [R09|G09|B09|R10|G10|B10|R11|G11] */ + __m128i pxTemp[7]; + pxTemp[0] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 7), xmm_pxMaskRotate0To5); // blend with mask [0000 0111] and shuffle - [R02|G02|B02|R03|G03|B03|R04|G04] + pxTemp[1] = _mm_shuffle_epi8(_mm_blend_epi16(px128[0], px128[1], 63), xmm_pxMaskRotate0To11); // blend with mask [0011 1111] and shuffle - [R03|G03|B03|R04|G04|B04|R05|G05] + pxTemp[2] = _mm_shuffle_epi8(_mm_blend_epi16(px128[1], px128[2], 1), xmm_pxMaskRotate0To1); // blend with mask [0000 0001] and shuffle - [R04|G04|B04|R05|G05|B05|R06|G06] + pxTemp[3] = _mm_shuffle_epi8(_mm_blend_epi16(px128[1], px128[2], 15), xmm_pxMaskRotate0To7); // blend with mask [0000 1111] and shuffle - [R05|G05|B05|R06|G06|B06|R07|G07] + pxTemp[4] = _mm_shuffle_epi8(_mm_blend_epi16(px128[1], px128[2], 127), xmm_pxMaskRotate0To13); // blend with mask [0111 1111] and shuffle - [R06|G06|B06|R07|G07|B07|R08|G08] + pxTemp[5] = _mm_shuffle_epi8(_mm_blend_epi16(px128[2], px128[3], 3), xmm_pxMaskRotate0To3); // blend with mask [0000 0011] and shuffle - [R07|G07|B07|R08|G08|B08|R09|G09] + pxTemp[6] = _mm_shuffle_epi8(_mm_blend_epi16(px128[2], px128[3], 31), xmm_pxMaskRotate0To9); // blend with mask [0001 1111] and shuffle - [R08|G08|B08|R09|G09|B09|R10|G10] + px128[0] = _mm_add_epi16(px128[0], pxTemp[0]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[1]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[2]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[3]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[4]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[5]); + px128[0] = _mm_add_epi16(px128[0], pxTemp[6]); + px128[0] = _mm_add_epi16(px128[0], px128[3]); +} + +// -------------------- F32/F16 bitdepth compute functions for kernel size (3/5/7/9) -------------------- + +inline void blend_permute_add_mul_3x3_pln(__m256 *pSrc, __m256 *pDst, __m256 pConvolutionFactor) +{ + /* pSrc[0] - [X01|X02|X03|X04|X05|X06|X07|X08] + pSrc[1] - [X09|X10|X11|X12|X13|X14|X15|X16] */ + pDst[0] = _mm256_add_ps(pSrc[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 1), avx_pxMaskRotate0To1)); // blend with mask [0000 0001] and permute - [X02|X03|X04|X05|X06|X07|X08|X09] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 3), avx_pxMaskRotate0To2)); // blend with mask [0000 0011] and permute - [X03|X04|X05|X06|X07|X08|X09|X10] + pDst[0] = _mm256_mul_ps(pDst[0], pConvolutionFactor); +} + +inline void blend_permute_add_mul_3x3_pkd(__m256 *pSrc, __m256 *pDst, __m256 pConvolutionFactor) +{ + /* pSrc[0] - [R01|G01|B01|R02|G02|B02|R03|G03] + pSrc[1] - [B03|R04|G04|B04|R05|G05|B05|R06] */ + pDst[0] = _mm256_add_ps(pSrc[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 7), avx_pxMaskRotate0To3)); // blend with mask [0000 0111] and permute - [R02|G02|B02|R03|G03|B03|R04|G04] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 63), avx_pxMaskRotate0To6)); // blend with mask [0011 1111] and permute - [R03|G03|B03|R04|G04|B04|R05|G05] + pDst[0] = _mm256_mul_ps(pDst[0], pConvolutionFactor); +} + +inline void blend_permute_add_mul_5x5_pln(__m256 *pSrc, __m256 *pDst, __m256 pConvolutionFactor) +{ + /* pSrc[0] - [X01|X02|X03|X04|X05|X06|X07|X08] + pSrc[1] - [X09|X10|X11|X12|X13|X14|X15|X16] */ + pDst[0] = _mm256_add_ps(pSrc[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 1), avx_pxMaskRotate0To1)); // blend with mask [0000 0001] and permute - [X02|X03|X04|X05|X06|X07|X08|X09] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 3), avx_pxMaskRotate0To2)); // blend with mask [0000 0011] and permute - [X03|X04|X05|X06|X07|X08|X09|X10] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 7), avx_pxMaskRotate0To3)); // blend with mask [0000 0111] and permute - [X04|X05|X06|X07|X08|X09|X10|X11] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 15), avx_pxMaskRotate0To4)); // blend with mask [0000 1111] and permute - [X05|X06|X07|X08|X09|X10|X11|X12] + pDst[0] = _mm256_mul_ps(pDst[0], pConvolutionFactor); +} + +inline void blend_permute_add_mul_5x5_pkd(__m256 *pSrc, __m256 *pDst, __m256 pConvolutionFactor) +{ + /* pSrc[0] - [R01|G01|B01|R02|G02|B02|R03|G03] + pSrc[1] - [B03|R04|G04|B04|R05|G05|B05|R06] + pSrc[2] - [G06|B06|R07|G07|B07|R08|G08|B08] */ + pDst[0] = _mm256_add_ps(pSrc[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 7), avx_pxMaskRotate0To3)); // blend with mask [0000 0111] and permute - [R02|G02|B02|R03|G03|B03|R04|G04] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 63), avx_pxMaskRotate0To6)); // blend with mask [0011 1111] and permute - [R03|G03|B03|R04|G04|B04|R05|G05] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[1], pSrc[2], 1), avx_pxMaskRotate0To1)); // blend with mask [0000 0001] and permute - [R04|G04|B04|R05|G05|B05|R06|G06] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[1], pSrc[2], 15), avx_pxMaskRotate0To4)); // blend with mask [0000 1111] and permute - [R05|G05|B05|R06|G06|B06|R07|G07] + pDst[0] = _mm256_mul_ps(pDst[0], pConvolutionFactor); +} + +inline void blend_permute_add_mul_7x7_pln(__m256 *pSrc, __m256 *pDst, __m256 pConvolutionFactor) +{ + /* pSrc[0] - [X01|X02|X03|X04|X05|X06|X07|X08] + pSrc[1] - [X09|X10|X11|X12|X13|X14|X15|X16] */ + pDst[0] = _mm256_add_ps(pSrc[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 1), avx_pxMaskRotate0To1)); // blend with mask [0000 0001] and permute - [X02|X03|X04|X05|X06|X07|X08|X09] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 3), avx_pxMaskRotate0To2)); // blend with mask [0000 0011] and permute - [X03|X04|X05|X06|X07|X08|X09|X10] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 7), avx_pxMaskRotate0To3)); // blend with mask [0000 0111] and permute - [X04|X05|X06|X07|X08|X09|X10|X11] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 15), avx_pxMaskRotate0To4)); // blend with mask [0000 1111] and permute - [X05|X06|X07|X08|X09|X10|X11|X12] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 31), avx_pxMaskRotate0To5)); // blend with mask [0001 1111] and permute - [X06|X07|X08|X09|X10|X11|X12|X13] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 63), avx_pxMaskRotate0To6)); // blend with mask [0011 1111] and permute - [X07|X08|X09|X10|X11|X12|X13|X14] + pDst[0] = _mm256_mul_ps(pDst[0], pConvolutionFactor); +} + +inline void blend_permute_add_mul_7x7_pkd(__m256 *pSrc, __m256 *pDst, __m256 pConvolutionFactor) +{ + /* pSrc[0] - [R01|G01|B01|R02|G02|B02|R03|G03] + pSrc[1] - [B03|R04|G04|B04|R05|G05|B05|R06] + pSrc[2] - [G06|B06|R07|G07|B07|R08|G08|B08] + pSrc[3] - [R09|G09|B09|R10|G10|B10|R11|G11] */ + pDst[0] = _mm256_add_ps(pSrc[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 7), avx_pxMaskRotate0To3)); // blend with mask [0000 0111] and permute - [R02|G02|B02|R03|G03|B03|R04|G04] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 63), avx_pxMaskRotate0To6)); // blend with mask [0011 1111] and permute - [R03|G03|B03|R04|G04|B04|R05|G05] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[1], pSrc[2], 1), avx_pxMaskRotate0To1)); // blend with mask [0000 0001] and permute - [R04|G04|B04|R05|G05|B05|R06|G06] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[1], pSrc[2], 15), avx_pxMaskRotate0To4)); // blend with mask [0000 1111] and permute - [R05|G05|B05|R06|G06|B06|R07|G07] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[1], pSrc[2], 127), avx_pxMaskRotate0To7)); // blend with mask [0111 1111] and permute - [R06|G06|B06|R07|G07|B07|R08|G08] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[2], pSrc[3], 3), avx_pxMaskRotate0To2)); // blend with mask [0000 0011] and permute - [R07|G07|B07|R08|G08|B08|R09|G09] + pDst[0] = _mm256_mul_ps(pDst[0], pConvolutionFactor); +} + +inline void blend_permute_add_mul_9x9_pln(__m256 *pSrc, __m256 *pDst, __m256 pConvolutionFactor) +{ + /* pSrc[0] - [X01|X02|X03|X04|X05|X06|X07|X08] + pSrc[1] - [X09|X10|X11|X12|X13|X14|X15|X16] */ + pDst[0] = _mm256_add_ps(pSrc[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 1), avx_pxMaskRotate0To1)); // blend with mask [0000 0001] and permute - [X02|X03|X04|X05|X06|X07|X08|X09] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 3), avx_pxMaskRotate0To2)); // blend with mask [0000 0011] and permute - [X03|X04|X05|X06|X07|X08|X09|X10] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 7), avx_pxMaskRotate0To3)); // blend with mask [0000 0111] and permute - [X04|X05|X06|X07|X08|X09|X10|X11] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 15), avx_pxMaskRotate0To4)); // blend with mask [0000 1111] and permute - [X05|X06|X07|X08|X09|X10|X11|X12] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 31), avx_pxMaskRotate0To5)); // blend with mask [0001 1111] and permute - [X06|X07|X08|X09|X10|X11|X12|X13] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 63), avx_pxMaskRotate0To6)); // blend with mask [0011 1111] and permute - [X07|X08|X09|X10|X11|X12|X13|X14] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 127), avx_pxMaskRotate0To7)); // blend with mask [0111 1111] and permute - [X08|X09|X10|X11|X12|X13|X14|X15] + pDst[0] = _mm256_add_ps(pDst[0], pSrc[1]); + pDst[0] = _mm256_mul_ps(pDst[0], pConvolutionFactor); +} + +inline void blend_permute_add_mul_9x9_pkd(__m256 *pSrc, __m256 *pDst, __m256 pConvolutionFactor) +{ + /* pSrc[0] - [R01|G01|B01|R02|G02|B02|R03|G03] + pSrc[1] - [B03|R04|G04|B04|R05|G05|B05|R06] + pSrc[2] - [G06|B06|R07|G07|B07|R08|G08|B08] + pSrc[3] - [R09|G09|B09|R10|G10|B10|R11|G11] */ + pDst[0] = _mm256_add_ps(pSrc[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 7), avx_pxMaskRotate0To3)); // blend with mask [0000 0111] and permute - [R02|G02|B02|R03|G03|B03|R04|G04] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[0], pSrc[1], 63), avx_pxMaskRotate0To6)); // blend with mask [0011 1111] and permute - [R03|G03|B03|R04|G04|B04|R05|G05] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[1], pSrc[2], 1), avx_pxMaskRotate0To1)); // blend with mask [0000 0001] and permute - [R04|G04|B04|R05|G05|B05|R06|G06] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[1], pSrc[2], 15), avx_pxMaskRotate0To4)); // blend with mask [0000 1111] and permute - [R05|G05|B05|R06|G06|B06|R07|G07] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[1], pSrc[2], 127), avx_pxMaskRotate0To7)); // blend with mask [0111 1111] and permute - [R06|G06|B06|R07|G07|B07|R08|G08] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[2], pSrc[3], 3), avx_pxMaskRotate0To2)); // blend with mask [0000 0011] and permute - [R07|G07|B07|R08|G08|B08|R09|G09] + pDst[0] = _mm256_add_ps(pDst[0], _mm256_permutevar8x32_ps(_mm256_blend_ps(pSrc[2], pSrc[3], 31), avx_pxMaskRotate0To5)); // blend with mask [0001 1111] and permute - [R08|G08|B08|R09|G09|B09|R10|G10] + pDst[0] = _mm256_add_ps(pDst[0], pSrc[3]); + pDst[0] = _mm256_mul_ps(pDst[0], pConvolutionFactor); +} + +// -------------------- Filter load functions for U8 bitdepth -------------------- + +// load function for 3x3 kernel size +inline void rpp_load_box_filter_char_3x3_host(__m256i *pxRow, Rpp8u **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 2 rows for 3x3 kernel + pxRow[0] = _mm256_loadu_si256((__m256i *)srcPtrTemp[0]); + pxRow[1] = _mm256_loadu_si256((__m256i *)srcPtrTemp[1]); + + // if rowKernelLoopLimit is 3 load values from 3rd row pointer else set it 0 + if (rowKernelLoopLimit == 3) + pxRow[2] = _mm256_loadu_si256((__m256i *)srcPtrTemp[2]); + else + pxRow[2] = avx_px0; +} + +// load function for 5x5 kernel size +inline void rpp_load_box_filter_char_5x5_host(__m256i *pxRow, Rpp8u **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 3 rows for 5x5 kernel + pxRow[0] = _mm256_loadu_si256((__m256i *)srcPtrTemp[0]); + pxRow[1] = _mm256_loadu_si256((__m256i *)srcPtrTemp[1]); + pxRow[2] = _mm256_loadu_si256((__m256i *)srcPtrTemp[2]); + for (int k = 3; k < rowKernelLoopLimit; k++) + pxRow[k] = _mm256_loadu_si256((__m256i *)srcPtrTemp[k]); + for (int k = rowKernelLoopLimit; k < 5; k++) + pxRow[k] = avx_px0; +} + +// load function for 7x7 kernel size +inline void rpp_load_box_filter_char_7x7_host(__m256i *pxRow, Rpp8u **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 4 rows for 7x7 kernel + pxRow[0] = _mm256_loadu_si256((__m256i *)srcPtrTemp[0]); + pxRow[1] = _mm256_loadu_si256((__m256i *)srcPtrTemp[1]); + pxRow[2] = _mm256_loadu_si256((__m256i *)srcPtrTemp[2]); + pxRow[3] = _mm256_loadu_si256((__m256i *)srcPtrTemp[3]); + for (int k = 4; k < rowKernelLoopLimit; k++) + pxRow[k] = _mm256_loadu_si256((__m256i *)srcPtrTemp[k]); + for (int k = rowKernelLoopLimit; k < 7; k++) + pxRow[k] = avx_px0; +} + +// load function for 9x9 kernel size +inline void rpp_load_box_filter_char_9x9_host(__m256i *pxRow, Rpp8u **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 5 rows for 9x9 kernel + pxRow[0] = _mm256_loadu_si256((__m256i *)srcPtrTemp[0]); + pxRow[1] = _mm256_loadu_si256((__m256i *)srcPtrTemp[1]); + pxRow[2] = _mm256_loadu_si256((__m256i *)srcPtrTemp[2]); + pxRow[3] = _mm256_loadu_si256((__m256i *)srcPtrTemp[3]); + pxRow[4] = _mm256_loadu_si256((__m256i *)srcPtrTemp[4]); + for (int k = 5; k < rowKernelLoopLimit; k++) + pxRow[k] = _mm256_loadu_si256((__m256i *)srcPtrTemp[k]); + for (int k = rowKernelLoopLimit; k < 9; k++) + pxRow[k] = avx_px0; +} + +// -------------------- Filter load functions for I8 bitdepth -------------------- + +// load function for 3x3 kernel size +inline void rpp_load_box_filter_char_3x3_host(__m256i *pxRow, Rpp8s **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 2 rows for 3x3 kernel + pxRow[0] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[0])); + pxRow[1] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[1])); + + // if rowKernelLoopLimit is 3 load values from 3rd row pointer else set it 0 + if (rowKernelLoopLimit == 3) + pxRow[2] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[2])); + else + pxRow[2] = avx_p0; +} + +// load function for 5x5 kernel size +inline void rpp_load_box_filter_char_5x5_host(__m256i *pxRow, Rpp8s **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 3 rows for 5x5 kernel + pxRow[0] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[0])); + pxRow[1] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[1])); + pxRow[2] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[2])); + for (int k = 3; k < rowKernelLoopLimit; k++) + pxRow[k] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[k])); + for (int k = rowKernelLoopLimit; k < 5; k++) + pxRow[k] = avx_p0; +} + +// load function for 7x7 kernel size +inline void rpp_load_box_filter_char_7x7_host(__m256i *pxRow, Rpp8s **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 4 rows for 7x7 kernel + pxRow[0] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[0])); + pxRow[1] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[1])); + pxRow[2] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[2])); + pxRow[3] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[3])); + for (int k = 4; k < rowKernelLoopLimit; k++) + pxRow[k] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[k])); + for (int k = rowKernelLoopLimit; k < 7; k++) + pxRow[k] = avx_p0; +} + +// load function for 9x9 kernel size +inline void rpp_load_box_filter_char_9x9_host(__m256i *pxRow, Rpp8s **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 5 rows for 9x9 kernel + pxRow[0] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[0])); + pxRow[1] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[1])); + pxRow[2] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[2])); + pxRow[3] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[3])); + pxRow[4] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[4])); + for (int k = 5; k < rowKernelLoopLimit; k++) + pxRow[k] = _mm256_add_epi8(avx_pxConvertI8, _mm256_loadu_si256((__m256i *)srcPtrTemp[k])); + for (int k = rowKernelLoopLimit; k < 9; k++) + pxRow[k] = avx_p0; +} + +// -------------------- Filter load functions for F32 bitdepth -------------------- + +// load function for 3x3 kernel size +inline void rpp_load_box_filter_float_3x3_host(__m256 *pRow, Rpp32f **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 2 rows for 3x3 kernel + pRow[0] = _mm256_loadu_ps(srcPtrTemp[0]); + pRow[1] = _mm256_loadu_ps(srcPtrTemp[1]); + + // if rowKernelLoopLimit is 3 load values from 3rd row pointer else set it 0 + if (rowKernelLoopLimit == 3) + pRow[2] = _mm256_loadu_ps(srcPtrTemp[2]); + else + pRow[2] = avx_px0; +} + +// load function for 5x5 kernel size +inline void rpp_load_box_filter_float_5x5_host(__m256 *pRow, Rpp32f **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 3 rows for 5x5 kernel + pRow[0] = _mm256_loadu_ps(srcPtrTemp[0]); + pRow[1] = _mm256_loadu_ps(srcPtrTemp[1]); + pRow[2] = _mm256_loadu_ps(srcPtrTemp[2]); + for (int k = 3; k < rowKernelLoopLimit; k++) + pRow[k] = _mm256_loadu_ps(srcPtrTemp[k]); + for (int k = rowKernelLoopLimit; k < 5; k++) + pRow[k] = avx_p0; +} + +// load function for 7x7 kernel size +inline void rpp_load_box_filter_float_7x7_host(__m256 *pRow, Rpp32f **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 4 rows for 7x7 kernel + pRow[0] = _mm256_loadu_ps(srcPtrTemp[0]); + pRow[1] = _mm256_loadu_ps(srcPtrTemp[1]); + pRow[2] = _mm256_loadu_ps(srcPtrTemp[2]); + pRow[3] = _mm256_loadu_ps(srcPtrTemp[3]); + for (int k = 4; k < rowKernelLoopLimit; k++) + pRow[k] = _mm256_loadu_ps(srcPtrTemp[k]); + for (int k = rowKernelLoopLimit; k < 7; k++) + pRow[k] = avx_p0; +} + +// load function for 9x9 kernel size +inline void rpp_load_box_filter_float_9x9_host(__m256 *pRow, Rpp32f **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 5 rows for 9x9 kernel + pRow[0] = _mm256_loadu_ps(srcPtrTemp[0]); + pRow[1] = _mm256_loadu_ps(srcPtrTemp[1]); + pRow[2] = _mm256_loadu_ps(srcPtrTemp[2]); + pRow[3] = _mm256_loadu_ps(srcPtrTemp[3]); + pRow[4] = _mm256_loadu_ps(srcPtrTemp[4]); + for (int k = 5; k < rowKernelLoopLimit; k++) + pRow[k] = _mm256_loadu_ps(srcPtrTemp[k]); + for (int k = rowKernelLoopLimit; k < 9; k++) + pRow[k] = avx_p0; +} + +// -------------------- Filter load functions for F16 bitdepth -------------------- + +// load function for 3x3 kernel size +inline void rpp_load_box_filter_float_3x3_host(__m256 *pRow, Rpp16f **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 2 rows for 3x3 kernel + pRow[0] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[0])))); + pRow[1] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[1])))); + + // if rowKernelLoopLimit is 3 load values from 3rd row pointer else set it 0 + if (rowKernelLoopLimit == 3) + pRow[2] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[2])))); + else + pRow[2] = avx_px0; +} + +// load function for 5x5 kernel size +inline void rpp_load_box_filter_float_5x5_host(__m256 *pRow, Rpp16f **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 3 rows for 5x5 kernel + pRow[0] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[0])))); + pRow[1] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[1])))); + pRow[2] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[2])))); + for (int k = 3; k < rowKernelLoopLimit; k++) + pRow[k] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[k])))); + for (int k = rowKernelLoopLimit; k < 5; k++) + pRow[k] = avx_p0; +} + +// load function for 7x7 kernel size +inline void rpp_load_box_filter_float_7x7_host(__m256 *pRow, Rpp16f **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 4 rows for 7x7 kernel + pRow[0] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[0])))); + pRow[1] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[1])))); + pRow[2] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[2])))); + pRow[3] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[3])))); + for (int k = 4; k < rowKernelLoopLimit; k++) + pRow[k] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[k])))); + for (int k = rowKernelLoopLimit; k < 7; k++) + pRow[k] = avx_p0; +} + +// load function for 9x9 kernel size +inline void rpp_load_box_filter_float_9x9_host(__m256 *pRow, Rpp16f **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 5 rows for 9x9 kernel + pRow[0] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[0])))); + pRow[1] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[1])))); + pRow[2] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[2])))); + pRow[3] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[3])))); + pRow[4] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[4])))); + for (int k = 5; k < rowKernelLoopLimit; k++) + pRow[k] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[k])))); + for (int k = rowKernelLoopLimit; k < 9; k++) + pRow[k] = avx_p0; +} + +#endif //RPP_CPU_FILTER_HPP \ No newline at end of file diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index 4f15027e0..83762d40c 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -25,6 +25,35 @@ SOFTWARE. #include "rppdefs.h" #include "rpp_cpu_simd.hpp" #include "rpp_cpu_common.hpp" +#include "rpp_cpu_filter.hpp" + +inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32s columnIndex, + Rpp32u kernelSize, Rpp32u padLength, Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, + Rpp32f *filterTensor, Rpp32u channels = 1) +{ + Rpp32f accum = 0.0f; + Rpp32s columnKernelLoopLimit = kernelSize; + + // find the colKernelLoopLimit based on columnIndex + get_kernel_loop_limit(columnIndex, columnKernelLoopLimit, padLength, unpaddedWidth); + for (int i = 0; i < rowKernelLoopLimit; i++) + for (int j = 0, k = 0 ; j < columnKernelLoopLimit; j++, k += channels) + accum += static_cast(srcPtrTemp[i][k]) * filterTensor[i * kernelSize + j]; + + saturate_pixel(accum, dstPtrTemp); +} + +// process padLength number of columns in each row +// left border pixels in image which does not have required pixels in 3x3 kernel, process them separately +inline void process_left_border_columns_pln_pln(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32u kernelSize, Rpp32u padLength, + Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, Rpp32f *filterTensor) +{ + for (int k = 0; k < padLength; k++) + { + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterTensor); + dstPtrTemp++; + } +} RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, RpptDescPtr srcDescPtr, @@ -47,6 +76,116 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROI roi; RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount]; compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType); + + Rpp8u *srcPtrImage, *dstPtrImage; + srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride; + dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride; + + Rpp32u padLength = kernelSize / 2; + Rpp32u bufferLength = roi.xywhROI.roiWidth * layoutParams.bufferMultiplier; + Rpp32u unpaddedHeight = roi.xywhROI.roiHeight - padLength; + Rpp32u unpaddedWidth = roi.xywhROI.roiWidth - padLength; + + Rpp8u *srcPtrChannel, *dstPtrChannel; + srcPtrChannel = srcPtrImage + (roi.xywhROI.xy.y * srcDescPtr->strides.hStride) + (roi.xywhROI.xy.x * layoutParams.bufferMultiplier); + dstPtrChannel = dstPtrImage; + if (kernelSize == 3) + { + Rpp32f filter[9] = {-1, 0, 1, -2, 0, 2, -1, 0, 1}; + + Rpp8u *srcPtrRow[3], *dstPtrRow; + for (int i = 0; i < 3; i++) + srcPtrRow[i] = srcPtrChannel + i * srcDescPtr->strides.hStride; + dstPtrRow = dstPtrChannel; +#if __AVX2__ + __m256 pFilter[9]; + pFilter[0] = _mm256_set1_ps(-1); + pFilter[1] = _mm256_set1_ps(0); + pFilter[2] = _mm256_set1_ps(1); + pFilter[3] = _mm256_set1_ps(-2); + pFilter[4] = _mm256_set1_ps(0); + pFilter[5] = _mm256_set1_ps(2); + pFilter[6] = _mm256_set1_ps(-1); + pFilter[7] = _mm256_set1_ps(0); + pFilter[8] = _mm256_set1_ps(1); +#endif + // box filter without fused output-layout toggle (NCHW -> NCHW) + if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW) && (srcDescPtr->c == 1)) + { + /* exclude 2 * padLength number of columns from alignedLength calculation + since padLength number of columns from the beginning and end of each row will be computed using raw c code */ + Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 8) * 8; + for (int c = 0; c < srcDescPtr->c; c++) + { + srcPtrRow[0] = srcPtrChannel; + srcPtrRow[1] = srcPtrRow[0] + srcDescPtr->strides.hStride; + srcPtrRow[2] = srcPtrRow[1] + srcDescPtr->strides.hStride; + dstPtrRow = dstPtrChannel; + for(int i = 0; i < roi.xywhROI.roiHeight; i++) + { + int vectorLoopCount = 0; + bool padLengthRows = (i < padLength) ? 1: 0; + Rpp8u *srcPtrTemp[3] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2]}; + Rpp8u *dstPtrTemp = dstPtrRow; + + // get the number of rows needs to be loaded for the corresponding row + Rpp32s rowKernelLoopLimit = kernelSize; + get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + dstPtrTemp += padLength; +#if __AVX2__ + // process alignedLength number of columns in each row + for (; vectorLoopCount < alignedLength; vectorLoopCount += 8) + { + __m256 pRow[6]; + // irrespective of row location, we need to load 2 rows for 3x3 kernel + rpp_load16_u8_to_f32_avx(srcPtrTemp[0], &pRow[0]); + rpp_load16_u8_to_f32_avx(srcPtrTemp[1], &pRow[2]); + + // if rowKernelLoopLimit is 3 load values from 3rd row pointer else set it 0 + if (rowKernelLoopLimit == 3) + rpp_load16_u8_to_f32_avx(srcPtrTemp[2], &pRow[4]); + else + { + pRow[4] = avx_p0; + pRow[5] = avx_p0; + } + + __m256 pDst[2]; + pDst[0] = avx_p0; + pDst[1] = avx_p0; + for (int k = 0; k < 3; k++) + { + __m256 pTemp[3]; + Rpp32s filterIndex = k * 3; + Rpp32s rowIndex = k * 2; + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilter[filterIndex]); + pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); + pDst[0] = _mm256_add_ps(pDst[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + } + rpp_store16_f32_to_u8_avx(dstPtrTemp, pDst); + increment_row_ptrs(srcPtrTemp, kernelSize, 8); + dstPtrTemp += 8; + } +#endif + vectorLoopCount += padLength; + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + increment_row_ptrs(srcPtrTemp, kernelSize, 1); + dstPtrTemp++; + } + // for the first padLength rows, we need not increment the src row pointers to next rows + increment_row_ptrs(srcPtrRow, kernelSize, (!padLengthRows) ? srcDescPtr->strides.hStride : 0); + dstPtrRow += dstDescPtr->strides.hStride; + } + srcPtrChannel += srcDescPtr->strides.cStride; + dstPtrChannel += dstDescPtr->strides.cStride; + } + } + } } return RPP_SUCCESS; diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp index bb1312a5e..e77b47b42 100644 --- a/utilities/test_suite/HOST/Tensor_host.cpp +++ b/utilities/test_suite/HOST/Tensor_host.cpp @@ -1083,6 +1083,21 @@ int main(int argc, char **argv) break; } + case 50: + { + testCaseName = "sobel_filter"; + Rpp32u kernelSize = 3; + Rpp32u sobelType = 0; + + startWallTime = omp_get_wtime(); + startCpuTime = clock(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 3 || inputBitDepth == 4 || inputBitDepth == 5) + rppt_sobel_filter_host(input, srcDescPtr, output, dstDescPtr, sobelType, kernelSize, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; + + break; + } case 61: { testCaseName = "magnitude"; diff --git a/utilities/test_suite/rpp_test_suite_common.h b/utilities/test_suite/rpp_test_suite_common.h index eddf78702..57263dfbe 100644 --- a/utilities/test_suite/rpp_test_suite_common.h +++ b/utilities/test_suite/rpp_test_suite_common.h @@ -97,6 +97,7 @@ std::map augmentationMap = {45, "color_temperature"}, {46, "vignette"}, {49, "box_filter"}, + {50, "sobel_filter"}, {54, "gaussian_filter"}, {61, "magnitude"}, {63, "phase"}, From 50bd38b713a4a582b050d89e9393cba0920d6b2c Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Fri, 26 Jul 2024 16:23:19 +0000 Subject: [PATCH 03/31] added support for gradient y and gradient xy variants --- src/modules/cpu/kernel/sobel_filter.hpp | 148 +++++++++++++++++++----- 1 file changed, 120 insertions(+), 28 deletions(-) diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index 83762d40c..1060e0811 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -40,7 +40,7 @@ inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, R for (int j = 0, k = 0 ; j < columnKernelLoopLimit; j++, k += channels) accum += static_cast(srcPtrTemp[i][k]) * filterTensor[i * kernelSize + j]; - saturate_pixel(accum, dstPtrTemp); + saturate_pixel(std::nearbyintf(accum), dstPtrTemp); } // process padLength number of columns in each row @@ -50,11 +50,49 @@ inline void process_left_border_columns_pln_pln(Rpp8u **srcPtrTemp, Rpp8u *dstPt { for (int k = 0; k < padLength; k++) { - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterTensor); + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterTensor, 1); dstPtrTemp++; } } +inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32s columnIndex, + Rpp32u kernelSize, Rpp32u padLength, Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, + Rpp32f *filterXTensor, Rpp32f *filterYTensor, Rpp32u channels = 1) +{ + Rpp32f accumX = 0.0f; + Rpp32f accumY = 0.0f; + Rpp32s columnKernelLoopLimit = kernelSize; + + // find the colKernelLoopLimit based on columnIndex + get_kernel_loop_limit(columnIndex, columnKernelLoopLimit, padLength, unpaddedWidth); + for (int i = 0; i < rowKernelLoopLimit; i++) + { + for (int j = 0, k = 0 ; j < columnKernelLoopLimit; j++, k += channels) + { + accumX += static_cast(srcPtrTemp[i][k]) * filterXTensor[i * kernelSize + j]; + accumY += static_cast(srcPtrTemp[i][k]) * filterYTensor[i * kernelSize + j]; + } + } + + Rpp32f accum = sqrt((accumX * accumX) + (accumY * accumY)); + saturate_pixel(std::nearbyintf(accum), dstPtrTemp); +} + +// process padLength number of columns in each row +// left border pixels in image which does not have required pixels in 3x3 kernel, process them separately +inline void process_left_border_columns_pln_pln(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32u kernelSize, Rpp32u padLength, + Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, Rpp32f *filterXTensor, Rpp32f *filterYTensor) +{ + for (int k = 0; k < padLength; k++) + { + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterXTensor, filterYTensor, 1); + dstPtrTemp++; + } +} + +Rpp32f sobel3x3X[9] = {-1, 0, 1, -2, 0, 2, -1, 0, 1}; +Rpp32f sobel3x3Y[9] = {1, 2, 1, 0, 0, 0, -1, -2, -1}; + RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, RpptDescPtr srcDescPtr, Rpp8u *dstPtr, @@ -85,30 +123,43 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp32u bufferLength = roi.xywhROI.roiWidth * layoutParams.bufferMultiplier; Rpp32u unpaddedHeight = roi.xywhROI.roiHeight - padLength; Rpp32u unpaddedWidth = roi.xywhROI.roiWidth - padLength; + bool combined = (sobelType == 2); + Rpp32f *filter, *filterX, *filterY; Rpp8u *srcPtrChannel, *dstPtrChannel; srcPtrChannel = srcPtrImage + (roi.xywhROI.xy.y * srcDescPtr->strides.hStride) + (roi.xywhROI.xy.x * layoutParams.bufferMultiplier); dstPtrChannel = dstPtrImage; if (kernelSize == 3) { - Rpp32f filter[9] = {-1, 0, 1, -2, 0, 2, -1, 0, 1}; + __m256 pFilter[9], pFilterX[9], pFilterY[9]; + if (sobelType == 0) + { + filter = sobel3x3X; + for (int i = 0; i < 9; i++) + pFilter[i] = _mm256_set1_ps(filter[i]); + } + else if (sobelType == 1) + { + filter = sobel3x3Y; + for (int i = 0; i < 9; i++) + pFilter[i] = _mm256_set1_ps(filter[i]); + } + else + { + filterX = sobel3x3X; + filterY = sobel3x3Y; + for (int i = 0; i < 9; i++) + { + pFilterX[i] = _mm256_set1_ps(filterX[i]); + pFilterY[i] = _mm256_set1_ps(filterY[i]); + } + } Rpp8u *srcPtrRow[3], *dstPtrRow; for (int i = 0; i < 3; i++) srcPtrRow[i] = srcPtrChannel + i * srcDescPtr->strides.hStride; dstPtrRow = dstPtrChannel; -#if __AVX2__ - __m256 pFilter[9]; - pFilter[0] = _mm256_set1_ps(-1); - pFilter[1] = _mm256_set1_ps(0); - pFilter[2] = _mm256_set1_ps(1); - pFilter[3] = _mm256_set1_ps(-2); - pFilter[4] = _mm256_set1_ps(0); - pFilter[5] = _mm256_set1_ps(2); - pFilter[6] = _mm256_set1_ps(-1); - pFilter[7] = _mm256_set1_ps(0); - pFilter[8] = _mm256_set1_ps(1); -#endif + // box filter without fused output-layout toggle (NCHW -> NCHW) if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW) && (srcDescPtr->c == 1)) { @@ -131,7 +182,11 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, // get the number of rows needs to be loaded for the corresponding row Rpp32s rowKernelLoopLimit = kernelSize; get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); - process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + if (!combined) + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + else + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); + dstPtrTemp += padLength; #if __AVX2__ // process alignedLength number of columns in each row @@ -152,18 +207,52 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, } __m256 pDst[2]; - pDst[0] = avx_p0; - pDst[1] = avx_p0; - for (int k = 0; k < 3; k++) + if (!combined) { - __m256 pTemp[3]; - Rpp32s filterIndex = k * 3; - Rpp32s rowIndex = k * 2; - - pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilter[filterIndex]); - pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); - pDst[0] = _mm256_add_ps(pDst[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + pDst[0] = avx_p0; + pDst[1] = avx_p0; + for (int k = 0; k < 3; k++) + { + __m256 pTemp[3]; + Rpp32s filterIndex = k * 3; + Rpp32s rowIndex = k * 2; + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilter[filterIndex]); + pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); + pDst[0] = _mm256_add_ps(pDst[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + } + } + else + { + __m256 pDstX[2], pDstY[2]; + for (int k = 0; k < 2; k++) + { + pDstX[k] = avx_p0; + pDstY[k] = avx_p0; + pDst[k] = avx_p0; + } + for (int k = 0; k < 3; k++) + { + __m256 pTemp[3], pRowShift[2]; + Rpp32s filterIndex = k * 3; + Rpp32s rowIndex = k * 2; + + pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1); + pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2); + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterX[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); + pDstX[0] = _mm256_add_ps(pDstX[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterY[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); + pDstY[0] = _mm256_add_ps(pDstY[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + } + pDstX[0] = _mm256_mul_ps(pDstX[0], pDstX[0]); + pDstY[0] = _mm256_mul_ps(pDstY[0], pDstY[0]); + pDst[0] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[0], pDstY[0])); } rpp_store16_f32_to_u8_avx(dstPtrTemp, pDst); increment_row_ptrs(srcPtrTemp, kernelSize, 8); @@ -173,7 +262,10 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, vectorLoopCount += padLength; for (; vectorLoopCount < bufferLength; vectorLoopCount++) { - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + if (!combined) + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + else + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); increment_row_ptrs(srcPtrTemp, kernelSize, 1); dstPtrTemp++; } From 5794a05badab391f25fd9989085ad0d7ab1a2a03 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Sat, 27 Jul 2024 12:21:07 +0000 Subject: [PATCH 04/31] fixed the issue with border pixel processing in raw c code --- src/modules/cpu/kernel/sobel_filter.hpp | 30 +++++++++++++++---------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index 1060e0811..cc7feaaf8 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -29,16 +29,18 @@ SOFTWARE. inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32s columnIndex, Rpp32u kernelSize, Rpp32u padLength, Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, - Rpp32f *filterTensor, Rpp32u channels = 1) + Rpp32f *filterTensor, Rpp32s filterRowStartIndex, Rpp32u channels = 1) { Rpp32f accum = 0.0f; Rpp32s columnKernelLoopLimit = kernelSize; // find the colKernelLoopLimit based on columnIndex get_kernel_loop_limit(columnIndex, columnKernelLoopLimit, padLength, unpaddedWidth); + Rpp32s filterColStartIndex = (columnIndex < padLength) ? (kernelSize - columnKernelLoopLimit) : 0; + Rpp32f *filterShiftedTensor = filterTensor + filterRowStartIndex * kernelSize + filterColStartIndex; for (int i = 0; i < rowKernelLoopLimit; i++) for (int j = 0, k = 0 ; j < columnKernelLoopLimit; j++, k += channels) - accum += static_cast(srcPtrTemp[i][k]) * filterTensor[i * kernelSize + j]; + accum += static_cast(srcPtrTemp[i][k]) * filterShiftedTensor[i * kernelSize + j]; saturate_pixel(std::nearbyintf(accum), dstPtrTemp); } @@ -46,18 +48,18 @@ inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, R // process padLength number of columns in each row // left border pixels in image which does not have required pixels in 3x3 kernel, process them separately inline void process_left_border_columns_pln_pln(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32u kernelSize, Rpp32u padLength, - Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, Rpp32f *filterTensor) + Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, Rpp32f *filterTensor, Rpp32s filterRowStartIndex) { for (int k = 0; k < padLength; k++) { - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterTensor, 1); + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterTensor, filterRowStartIndex, 1); dstPtrTemp++; } } inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32s columnIndex, Rpp32u kernelSize, Rpp32u padLength, Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, - Rpp32f *filterXTensor, Rpp32f *filterYTensor, Rpp32u channels = 1) + Rpp32f *filterXTensor, Rpp32f *filterYTensor, Rpp32s filterRowStartIndex, Rpp32u channels = 1) { Rpp32f accumX = 0.0f; Rpp32f accumY = 0.0f; @@ -65,6 +67,9 @@ inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, R // find the colKernelLoopLimit based on columnIndex get_kernel_loop_limit(columnIndex, columnKernelLoopLimit, padLength, unpaddedWidth); + Rpp32s filterColStartIndex = (columnIndex < padLength) ? (kernelSize - columnKernelLoopLimit) : 0; + Rpp32f *filterXShiftedTensor = filterXTensor + filterRowStartIndex * kernelSize + filterColStartIndex; + Rpp32f *filterYShiftedTensor = filterYTensor + filterRowStartIndex * kernelSize + filterColStartIndex; for (int i = 0; i < rowKernelLoopLimit; i++) { for (int j = 0, k = 0 ; j < columnKernelLoopLimit; j++, k += channels) @@ -81,17 +86,17 @@ inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, R // process padLength number of columns in each row // left border pixels in image which does not have required pixels in 3x3 kernel, process them separately inline void process_left_border_columns_pln_pln(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32u kernelSize, Rpp32u padLength, - Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, Rpp32f *filterXTensor, Rpp32f *filterYTensor) + Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, Rpp32f *filterXTensor, Rpp32f *filterYTensor, Rpp32s filterRowStartIndex) { for (int k = 0; k < padLength; k++) { - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterXTensor, filterYTensor, 1); + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterXTensor, filterYTensor, filterRowStartIndex, 1); dstPtrTemp++; } } Rpp32f sobel3x3X[9] = {-1, 0, 1, -2, 0, 2, -1, 0, 1}; -Rpp32f sobel3x3Y[9] = {1, 2, 1, 0, 0, 0, -1, -2, -1}; +Rpp32f sobel3x3Y[9] = {-1, -2, -1, 0, 0, 0, 1, 2, 1}; RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, RpptDescPtr srcDescPtr, @@ -182,10 +187,11 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, // get the number of rows needs to be loaded for the corresponding row Rpp32s rowKernelLoopLimit = kernelSize; get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); + Rpp32s filterRowStartIndex = padLengthRows ? (kernelSize - rowKernelLoopLimit) : 0; if (!combined) - process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter, filterRowStartIndex); else - process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY, filterRowStartIndex); dstPtrTemp += padLength; #if __AVX2__ @@ -263,9 +269,9 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, for (; vectorLoopCount < bufferLength; vectorLoopCount++) { if (!combined) - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter, filterRowStartIndex); else - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY, filterRowStartIndex); increment_row_ptrs(srcPtrTemp, kernelSize, 1); dstPtrTemp++; } From e71e319a215192b480d90422a3feb3bfd0bb9778 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Mon, 29 Jul 2024 08:21:31 +0000 Subject: [PATCH 05/31] reverted back to previous version --- src/modules/cpu/kernel/sobel_filter.hpp | 28 ++++++++++--------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index cc7feaaf8..f24e194f8 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -29,18 +29,16 @@ SOFTWARE. inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32s columnIndex, Rpp32u kernelSize, Rpp32u padLength, Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, - Rpp32f *filterTensor, Rpp32s filterRowStartIndex, Rpp32u channels = 1) + Rpp32f *filterTensor, Rpp32u channels = 1) { Rpp32f accum = 0.0f; Rpp32s columnKernelLoopLimit = kernelSize; // find the colKernelLoopLimit based on columnIndex get_kernel_loop_limit(columnIndex, columnKernelLoopLimit, padLength, unpaddedWidth); - Rpp32s filterColStartIndex = (columnIndex < padLength) ? (kernelSize - columnKernelLoopLimit) : 0; - Rpp32f *filterShiftedTensor = filterTensor + filterRowStartIndex * kernelSize + filterColStartIndex; for (int i = 0; i < rowKernelLoopLimit; i++) for (int j = 0, k = 0 ; j < columnKernelLoopLimit; j++, k += channels) - accum += static_cast(srcPtrTemp[i][k]) * filterShiftedTensor[i * kernelSize + j]; + accum += static_cast(srcPtrTemp[i][k]) * filterTensor[i * kernelSize + j]; saturate_pixel(std::nearbyintf(accum), dstPtrTemp); } @@ -48,18 +46,18 @@ inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, R // process padLength number of columns in each row // left border pixels in image which does not have required pixels in 3x3 kernel, process them separately inline void process_left_border_columns_pln_pln(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32u kernelSize, Rpp32u padLength, - Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, Rpp32f *filterTensor, Rpp32s filterRowStartIndex) + Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, Rpp32f *filterTensor) { for (int k = 0; k < padLength; k++) { - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterTensor, filterRowStartIndex, 1); + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterTensor, 1); dstPtrTemp++; } } inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32s columnIndex, Rpp32u kernelSize, Rpp32u padLength, Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, - Rpp32f *filterXTensor, Rpp32f *filterYTensor, Rpp32s filterRowStartIndex, Rpp32u channels = 1) + Rpp32f *filterXTensor, Rpp32f *filterYTensor, Rpp32u channels = 1) { Rpp32f accumX = 0.0f; Rpp32f accumY = 0.0f; @@ -67,9 +65,6 @@ inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, R // find the colKernelLoopLimit based on columnIndex get_kernel_loop_limit(columnIndex, columnKernelLoopLimit, padLength, unpaddedWidth); - Rpp32s filterColStartIndex = (columnIndex < padLength) ? (kernelSize - columnKernelLoopLimit) : 0; - Rpp32f *filterXShiftedTensor = filterXTensor + filterRowStartIndex * kernelSize + filterColStartIndex; - Rpp32f *filterYShiftedTensor = filterYTensor + filterRowStartIndex * kernelSize + filterColStartIndex; for (int i = 0; i < rowKernelLoopLimit; i++) { for (int j = 0, k = 0 ; j < columnKernelLoopLimit; j++, k += channels) @@ -86,11 +81,11 @@ inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, R // process padLength number of columns in each row // left border pixels in image which does not have required pixels in 3x3 kernel, process them separately inline void process_left_border_columns_pln_pln(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32u kernelSize, Rpp32u padLength, - Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, Rpp32f *filterXTensor, Rpp32f *filterYTensor, Rpp32s filterRowStartIndex) + Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, Rpp32f *filterXTensor, Rpp32f *filterYTensor) { for (int k = 0; k < padLength; k++) { - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterXTensor, filterYTensor, filterRowStartIndex, 1); + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterXTensor, filterYTensor, 1); dstPtrTemp++; } } @@ -187,11 +182,10 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, // get the number of rows needs to be loaded for the corresponding row Rpp32s rowKernelLoopLimit = kernelSize; get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); - Rpp32s filterRowStartIndex = padLengthRows ? (kernelSize - rowKernelLoopLimit) : 0; if (!combined) - process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter, filterRowStartIndex); + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); else - process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY, filterRowStartIndex); + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); dstPtrTemp += padLength; #if __AVX2__ @@ -269,9 +263,9 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, for (; vectorLoopCount < bufferLength; vectorLoopCount++) { if (!combined) - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter, filterRowStartIndex); + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); else - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY, filterRowStartIndex); + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); increment_row_ptrs(srcPtrTemp, kernelSize, 1); dstPtrTemp++; } From cde5e22b8394aac59a7c4ca32a50a6451d30d754 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Mon, 29 Jul 2024 08:28:17 +0000 Subject: [PATCH 06/31] fixed the xy gradient output issues --- src/modules/cpu/kernel/sobel_filter.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index f24e194f8..6b107339a 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -73,6 +73,8 @@ inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, R accumY += static_cast(srcPtrTemp[i][k]) * filterYTensor[i * kernelSize + j]; } } + accumX = RPPPIXELCHECK(accumX); + accumY = RPPPIXELCHECK(accumY); Rpp32f accum = sqrt((accumX * accumX) + (accumY * accumY)); saturate_pixel(std::nearbyintf(accum), dstPtrTemp); @@ -250,6 +252,8 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); pDstY[0] = _mm256_add_ps(pDstY[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); } + pDstX[0] = _mm256_min_ps(_mm256_max_ps(pDstX[0], avx_p0), avx_p255); + pDstY[0] = _mm256_min_ps(_mm256_max_ps(pDstY[0], avx_p0), avx_p255); pDstX[0] = _mm256_mul_ps(pDstX[0], pDstX[0]); pDstY[0] = _mm256_mul_ps(pDstY[0], pDstY[0]); pDst[0] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[0], pDstY[0])); From 1e006e593dd348b45236f7a0f9cf11020e783dbb Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Mon, 29 Jul 2024 09:02:06 +0000 Subject: [PATCH 07/31] decoupled xy gradient from x, y gradient variants --- src/modules/cpu/kernel/sobel_filter.hpp | 212 +++++++++++++--------- utilities/test_suite/HOST/Tensor_host.cpp | 2 +- 2 files changed, 123 insertions(+), 91 deletions(-) diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index 6b107339a..74cf0d71e 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -95,6 +95,23 @@ inline void process_left_border_columns_pln_pln(Rpp8u **srcPtrTemp, Rpp8u *dstPt Rpp32f sobel3x3X[9] = {-1, 0, 1, -2, 0, 2, -1, 0, 1}; Rpp32f sobel3x3Y[9] = {-1, -2, -1, 0, 0, 0, 1, 2, 1}; +// load function for 3x3 kernel size +inline void rpp_load_sobel_filter_char_3x3_host(__m256 *pRow, Rpp8u **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 2 rows for 3x3 kernel + rpp_load16_u8_to_f32_avx(srcPtrTemp[0], &pRow[0]); + rpp_load16_u8_to_f32_avx(srcPtrTemp[1], &pRow[2]); + + // if rowKernelLoopLimit is 3 load values from 3rd row pointer else set it 0 + if (rowKernelLoopLimit == 3) + rpp_load16_u8_to_f32_avx(srcPtrTemp[2], &pRow[4]); + else + { + pRow[4] = avx_p0; + pRow[5] = avx_p0; + } +} + RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, RpptDescPtr srcDescPtr, Rpp8u *dstPtr, @@ -131,23 +148,17 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, Rpp8u *srcPtrChannel, *dstPtrChannel; srcPtrChannel = srcPtrImage + (roi.xywhROI.xy.y * srcDescPtr->strides.hStride) + (roi.xywhROI.xy.x * layoutParams.bufferMultiplier); dstPtrChannel = dstPtrImage; - if (kernelSize == 3) + if ((kernelSize == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW) && (srcDescPtr->c == 1)) { - __m256 pFilter[9], pFilterX[9], pFilterY[9]; - if (sobelType == 0) - { - filter = sobel3x3X; - for (int i = 0; i < 9; i++) - pFilter[i] = _mm256_set1_ps(filter[i]); - } - else if (sobelType == 1) - { - filter = sobel3x3Y; - for (int i = 0; i < 9; i++) - pFilter[i] = _mm256_set1_ps(filter[i]); - } - else + Rpp8u *srcPtrRow[3], *dstPtrRow; + for (int i = 0; i < 3; i++) + srcPtrRow[i] = srcPtrChannel + i * srcDescPtr->strides.hStride; + dstPtrRow = dstPtrChannel; + + if (combined) { +#if __AVX2__ + __m256 pFilterX[9], pFilterY[9]; filterX = sobel3x3X; filterY = sobel3x3Y; for (int i = 0; i < 9; i++) @@ -155,16 +166,7 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, pFilterX[i] = _mm256_set1_ps(filterX[i]); pFilterY[i] = _mm256_set1_ps(filterY[i]); } - } - - Rpp8u *srcPtrRow[3], *dstPtrRow; - for (int i = 0; i < 3; i++) - srcPtrRow[i] = srcPtrChannel + i * srcDescPtr->strides.hStride; - dstPtrRow = dstPtrChannel; - - // box filter without fused output-layout toggle (NCHW -> NCHW) - if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW) && (srcDescPtr->c == 1)) - { +#endif /* exclude 2 * padLength number of columns from alignedLength calculation since padLength number of columns from the beginning and end of each row will be computed using raw c code */ Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 8) * 8; @@ -184,79 +186,112 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, // get the number of rows needs to be loaded for the corresponding row Rpp32s rowKernelLoopLimit = kernelSize; get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); - if (!combined) - process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); - else - process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); - + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); dstPtrTemp += padLength; #if __AVX2__ // process alignedLength number of columns in each row for (; vectorLoopCount < alignedLength; vectorLoopCount += 8) { - __m256 pRow[6]; - // irrespective of row location, we need to load 2 rows for 3x3 kernel - rpp_load16_u8_to_f32_avx(srcPtrTemp[0], &pRow[0]); - rpp_load16_u8_to_f32_avx(srcPtrTemp[1], &pRow[2]); - - // if rowKernelLoopLimit is 3 load values from 3rd row pointer else set it 0 - if (rowKernelLoopLimit == 3) - rpp_load16_u8_to_f32_avx(srcPtrTemp[2], &pRow[4]); - else + __m256 pRow[6], pDst[2], pDstX[2], pDstY[2]; + rpp_load_sobel_filter_char_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); + for (int k = 0; k < 2; k++) { - pRow[4] = avx_p0; - pRow[5] = avx_p0; + pDstX[k] = avx_p0; + pDstY[k] = avx_p0; + pDst[k] = avx_p0; } - - __m256 pDst[2]; - if (!combined) + for (int k = 0; k < 3; k++) { - pDst[0] = avx_p0; - pDst[1] = avx_p0; - for (int k = 0; k < 3; k++) - { - __m256 pTemp[3]; - Rpp32s filterIndex = k * 3; - Rpp32s rowIndex = k * 2; + __m256 pTemp[3], pRowShift[2]; + Rpp32s filterIndex = k * 3; + Rpp32s rowIndex = k * 2; - pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilter[filterIndex]); - pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); - pDst[0] = _mm256_add_ps(pDst[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); - } + pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1); + pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2); + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterX[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); + pDstX[0] = _mm256_add_ps(pDstX[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterY[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); + pDstY[0] = _mm256_add_ps(pDstY[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); } - else - { - __m256 pDstX[2], pDstY[2]; - for (int k = 0; k < 2; k++) - { - pDstX[k] = avx_p0; - pDstY[k] = avx_p0; - pDst[k] = avx_p0; - } - for (int k = 0; k < 3; k++) - { - __m256 pTemp[3], pRowShift[2]; - Rpp32s filterIndex = k * 3; - Rpp32s rowIndex = k * 2; + pDstX[0] = _mm256_min_ps(_mm256_max_ps(pDstX[0], avx_p0), avx_p255); + pDstY[0] = _mm256_min_ps(_mm256_max_ps(pDstY[0], avx_p0), avx_p255); + pDstX[0] = _mm256_mul_ps(pDstX[0], pDstX[0]); + pDstY[0] = _mm256_mul_ps(pDstY[0], pDstY[0]); + pDst[0] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[0], pDstY[0])); + + rpp_store16_f32_to_u8_avx(dstPtrTemp, pDst); + increment_row_ptrs(srcPtrTemp, kernelSize, 8); + dstPtrTemp += 8; + } +#endif + vectorLoopCount += padLength; + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); + increment_row_ptrs(srcPtrTemp, kernelSize, 1); + dstPtrTemp++; + } + // for the first padLength rows, we need not increment the src row pointers to next rows + increment_row_ptrs(srcPtrRow, kernelSize, (!padLengthRows) ? srcDescPtr->strides.hStride : 0); + dstPtrRow += dstDescPtr->strides.hStride; + } + srcPtrChannel += srcDescPtr->strides.cStride; + dstPtrChannel += dstDescPtr->strides.cStride; + } + } + else + { +#if __AVX2__ + __m256 pFilter[9]; + filter = (!sobelType) ? sobel3x3Y : sobel3x3X; + for (int i = 0; i < 9; i++) + pFilter[i] = _mm256_set1_ps(filter[i]); +#endif - pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1); - pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2); - pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterX[filterIndex]); - pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); - pDstX[0] = _mm256_add_ps(pDstX[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + /* exclude 2 * padLength number of columns from alignedLength calculation + since padLength number of columns from the beginning and end of each row will be computed using raw c code */ + Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 8) * 8; + for (int c = 0; c < srcDescPtr->c; c++) + { + srcPtrRow[0] = srcPtrChannel; + srcPtrRow[1] = srcPtrRow[0] + srcDescPtr->strides.hStride; + srcPtrRow[2] = srcPtrRow[1] + srcDescPtr->strides.hStride; + dstPtrRow = dstPtrChannel; + for(int i = 0; i < roi.xywhROI.roiHeight; i++) + { + int vectorLoopCount = 0; + bool padLengthRows = (i < padLength) ? 1: 0; + Rpp8u *srcPtrTemp[3] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2]}; + Rpp8u *dstPtrTemp = dstPtrRow; + + // get the number of rows needs to be loaded for the corresponding row + Rpp32s rowKernelLoopLimit = kernelSize; + get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + dstPtrTemp += padLength; +#if __AVX2__ + // process alignedLength number of columns in each row + for (; vectorLoopCount < alignedLength; vectorLoopCount += 8) + { + __m256 pRow[6], pDst[2]; + rpp_load_sobel_filter_char_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); + pDst[0] = avx_p0; + pDst[1] = avx_p0; + for (int k = 0; k < 3; k++) + { + __m256 pTemp[3]; + Rpp32s filterIndex = k * 3; + Rpp32s rowIndex = k * 2; - pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterY[filterIndex]); - pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); - pDstY[0] = _mm256_add_ps(pDstY[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); - } - pDstX[0] = _mm256_min_ps(_mm256_max_ps(pDstX[0], avx_p0), avx_p255); - pDstY[0] = _mm256_min_ps(_mm256_max_ps(pDstY[0], avx_p0), avx_p255); - pDstX[0] = _mm256_mul_ps(pDstX[0], pDstX[0]); - pDstY[0] = _mm256_mul_ps(pDstY[0], pDstY[0]); - pDst[0] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[0], pDstY[0])); + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilter[filterIndex]); + pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); + pDst[0] = _mm256_add_ps(pDst[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); } rpp_store16_f32_to_u8_avx(dstPtrTemp, pDst); increment_row_ptrs(srcPtrTemp, kernelSize, 8); @@ -266,10 +301,7 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, vectorLoopCount += padLength; for (; vectorLoopCount < bufferLength; vectorLoopCount++) { - if (!combined) - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); - else - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); + sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); increment_row_ptrs(srcPtrTemp, kernelSize, 1); dstPtrTemp++; } diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp index e77b47b42..f29ff1f49 100644 --- a/utilities/test_suite/HOST/Tensor_host.cpp +++ b/utilities/test_suite/HOST/Tensor_host.cpp @@ -1091,7 +1091,7 @@ int main(int argc, char **argv) startWallTime = omp_get_wtime(); startCpuTime = clock(); - if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 3 || inputBitDepth == 4 || inputBitDepth == 5) + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) rppt_sobel_filter_host(input, srcDescPtr, output, dstDescPtr, sobelType, kernelSize, roiTensorPtrSrc, roiTypeSrc, handle); else missingFuncFlag = 1; From 2f281c564591c1e88862f65cd4aed2146840eaf9 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Mon, 29 Jul 2024 10:16:45 +0000 Subject: [PATCH 08/31] modified raw c process function names for better clarity added changes to handle different bitdepths in few helpers --- src/include/cpu/rpp_cpu_common.hpp | 4 +-- src/modules/cpu/kernel/sobel_filter.hpp | 40 ++++++++++++++++--------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/include/cpu/rpp_cpu_common.hpp b/src/include/cpu/rpp_cpu_common.hpp index be8eaeeaa..fdcb621df 100644 --- a/src/include/cpu/rpp_cpu_common.hpp +++ b/src/include/cpu/rpp_cpu_common.hpp @@ -512,12 +512,12 @@ inline int power_function(int a, int b) inline void saturate_pixel(Rpp32f pixel, Rpp8u* dst) { - *dst = RPPPIXELCHECK(pixel); + *dst = RPPPIXELCHECK(std::nearbyintf(pixel)); } inline void saturate_pixel(Rpp32f pixel, Rpp8s* dst) { - *dst = (Rpp8s)RPPPIXELCHECKI8(pixel - 128); + *dst = (Rpp8s)RPPPIXELCHECKI8(std::nearbyintf(pixel) - 128); } inline void saturate_pixel(Rpp32f pixel, Rpp32f* dst) diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index 74cf0d71e..072ea1a46 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -27,9 +27,10 @@ SOFTWARE. #include "rpp_cpu_common.hpp" #include "rpp_cpu_filter.hpp" -inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32s columnIndex, - Rpp32u kernelSize, Rpp32u padLength, Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, - Rpp32f *filterTensor, Rpp32u channels = 1) +template +inline void sobel_filter_unidirection_generic_tensor(T **srcPtrTemp, T *dstPtrTemp, Rpp32s columnIndex, + Rpp32u kernelSize, Rpp32u padLength, Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, + Rpp32f *filterTensor, Rpp32u channels = 1) { Rpp32f accum = 0.0f; Rpp32s columnKernelLoopLimit = kernelSize; @@ -40,7 +41,7 @@ inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, R for (int j = 0, k = 0 ; j < columnKernelLoopLimit; j++, k += channels) accum += static_cast(srcPtrTemp[i][k]) * filterTensor[i * kernelSize + j]; - saturate_pixel(std::nearbyintf(accum), dstPtrTemp); + saturate_pixel(accum, dstPtrTemp); } // process padLength number of columns in each row @@ -50,14 +51,15 @@ inline void process_left_border_columns_pln_pln(Rpp8u **srcPtrTemp, Rpp8u *dstPt { for (int k = 0; k < padLength; k++) { - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterTensor, 1); + sobel_filter_unidirection_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterTensor, 1); dstPtrTemp++; } } -inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32s columnIndex, - Rpp32u kernelSize, Rpp32u padLength, Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, - Rpp32f *filterXTensor, Rpp32f *filterYTensor, Rpp32u channels = 1) +template +inline void sobel_filter_bidirection_generic_tensor(T **srcPtrTemp, T *dstPtrTemp, Rpp32s columnIndex, + Rpp32u kernelSize, Rpp32u padLength, Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, + Rpp32f *filterXTensor, Rpp32f *filterYTensor, Rpp32u channels = 1) { Rpp32f accumX = 0.0f; Rpp32f accumY = 0.0f; @@ -73,11 +75,21 @@ inline void sobel_filter_generic_tensor(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, R accumY += static_cast(srcPtrTemp[i][k]) * filterYTensor[i * kernelSize + j]; } } - accumX = RPPPIXELCHECK(accumX); - accumY = RPPPIXELCHECK(accumY); + + // saturate the values of accumX and accumY to the range of datatype + if constexpr (std::is_same::value || std::is_same::value) + { + accumX = RPPPIXELCHECK(accumX); + accumY = RPPPIXELCHECK(accumY); + } + else + { + accumX = RPPPIXELCHECKF32(accumX); + accumY = RPPPIXELCHECKF32(accumY); + } Rpp32f accum = sqrt((accumX * accumX) + (accumY * accumY)); - saturate_pixel(std::nearbyintf(accum), dstPtrTemp); + saturate_pixel(accum, dstPtrTemp); } // process padLength number of columns in each row @@ -87,7 +99,7 @@ inline void process_left_border_columns_pln_pln(Rpp8u **srcPtrTemp, Rpp8u *dstPt { for (int k = 0; k < padLength; k++) { - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterXTensor, filterYTensor, 1); + sobel_filter_bidirection_generic_tensor(srcPtrTemp, dstPtrTemp, k, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterXTensor, filterYTensor, 1); dstPtrTemp++; } } @@ -232,7 +244,7 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, vectorLoopCount += padLength; for (; vectorLoopCount < bufferLength; vectorLoopCount++) { - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); + sobel_filter_bidirection_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); increment_row_ptrs(srcPtrTemp, kernelSize, 1); dstPtrTemp++; } @@ -301,7 +313,7 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, vectorLoopCount += padLength; for (; vectorLoopCount < bufferLength; vectorLoopCount++) { - sobel_filter_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + sobel_filter_unidirection_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); increment_row_ptrs(srcPtrTemp, kernelSize, 1); dstPtrTemp++; } From 2a1e892bd78a447371b466fe6b71174dc7731d53 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Mon, 29 Jul 2024 12:21:10 +0000 Subject: [PATCH 09/31] modified to process pixels in vectorized manner per iteration --- src/modules/cpu/kernel/sobel_filter.hpp | 40 +++++++++++++++++++------ 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index 072ea1a46..cca80cc65 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -181,7 +181,7 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, #endif /* exclude 2 * padLength number of columns from alignedLength calculation since padLength number of columns from the beginning and end of each row will be computed using raw c code */ - Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 8) * 8; + Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 16) * 16; for (int c = 0; c < srcDescPtr->c; c++) { srcPtrRow[0] = srcPtrChannel; @@ -202,7 +202,7 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, dstPtrTemp += padLength; #if __AVX2__ // process alignedLength number of columns in each row - for (; vectorLoopCount < alignedLength; vectorLoopCount += 8) + for (; vectorLoopCount < alignedLength; vectorLoopCount += 14) { __m256 pRow[6], pDst[2], pDstX[2], pDstY[2]; rpp_load_sobel_filter_char_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); @@ -229,6 +229,18 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); pDstY[0] = _mm256_add_ps(pDstY[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + + pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 1), avx_pxMaskRotate0To1); + pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 3), avx_pxMaskRotate0To2); + pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilterX[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); + pDstX[1] = _mm256_add_ps(pDstX[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilterY[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); + pDstY[1] = _mm256_add_ps(pDstY[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); } pDstX[0] = _mm256_min_ps(_mm256_max_ps(pDstX[0], avx_p0), avx_p255); pDstY[0] = _mm256_min_ps(_mm256_max_ps(pDstY[0], avx_p0), avx_p255); @@ -236,9 +248,15 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, pDstY[0] = _mm256_mul_ps(pDstY[0], pDstY[0]); pDst[0] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[0], pDstY[0])); + pDstX[1] = _mm256_min_ps(_mm256_max_ps(pDstX[1], avx_p0), avx_p255); + pDstY[1] = _mm256_min_ps(_mm256_max_ps(pDstY[1], avx_p0), avx_p255); + pDstX[1] = _mm256_mul_ps(pDstX[1], pDstX[1]); + pDstY[1] = _mm256_mul_ps(pDstY[1], pDstY[1]); + pDst[1] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[1], pDstY[1])); + rpp_store16_f32_to_u8_avx(dstPtrTemp, pDst); - increment_row_ptrs(srcPtrTemp, kernelSize, 8); - dstPtrTemp += 8; + increment_row_ptrs(srcPtrTemp, kernelSize, 14); + dstPtrTemp += 14; } #endif vectorLoopCount += padLength; @@ -264,10 +282,9 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, for (int i = 0; i < 9; i++) pFilter[i] = _mm256_set1_ps(filter[i]); #endif - /* exclude 2 * padLength number of columns from alignedLength calculation since padLength number of columns from the beginning and end of each row will be computed using raw c code */ - Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 8) * 8; + Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 16) * 16; for (int c = 0; c < srcDescPtr->c; c++) { srcPtrRow[0] = srcPtrChannel; @@ -288,7 +305,7 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, dstPtrTemp += padLength; #if __AVX2__ // process alignedLength number of columns in each row - for (; vectorLoopCount < alignedLength; vectorLoopCount += 8) + for (; vectorLoopCount < alignedLength; vectorLoopCount += 14) { __m256 pRow[6], pDst[2]; rpp_load_sobel_filter_char_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); @@ -304,10 +321,15 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); pDst[0] = _mm256_add_ps(pDst[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilter[filterIndex]); + pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); + pDst[1] = _mm256_add_ps(pDst[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); } rpp_store16_f32_to_u8_avx(dstPtrTemp, pDst); - increment_row_ptrs(srcPtrTemp, kernelSize, 8); - dstPtrTemp += 8; + increment_row_ptrs(srcPtrTemp, kernelSize, 14); + dstPtrTemp += 14; } #endif vectorLoopCount += padLength; From 792afb886e2b916f504372964158a5ca02011042 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Mon, 29 Jul 2024 13:25:03 +0000 Subject: [PATCH 10/31] added support for other data types --- src/modules/cpu/kernel/sobel_filter.hpp | 184 ++++++++++++++---- .../rppt_tensor_filter_augmentations.cpp | 80 ++++---- 2 files changed, 189 insertions(+), 75 deletions(-) diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index cca80cc65..c82733da7 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -37,16 +37,26 @@ inline void sobel_filter_unidirection_generic_tensor(T **srcPtrTemp, T *dstPtrTe // find the colKernelLoopLimit based on columnIndex get_kernel_loop_limit(columnIndex, columnKernelLoopLimit, padLength, unpaddedWidth); - for (int i = 0; i < rowKernelLoopLimit; i++) - for (int j = 0, k = 0 ; j < columnKernelLoopLimit; j++, k += channels) - accum += static_cast(srcPtrTemp[i][k]) * filterTensor[i * kernelSize + j]; + if constexpr (std::is_same::value) + { + for (int i = 0; i < rowKernelLoopLimit; i++) + for (int j = 0, k = 0 ; j < columnKernelLoopLimit; j++, k += channels) + accum += static_cast(srcPtrTemp[i][k] + 128) * filterTensor[i * kernelSize + j]; + } + else + { + for (int i = 0; i < rowKernelLoopLimit; i++) + for (int j = 0, k = 0 ; j < columnKernelLoopLimit; j++, k += channels) + accum += static_cast(srcPtrTemp[i][k]) * filterTensor[i * kernelSize + j]; + } saturate_pixel(accum, dstPtrTemp); } // process padLength number of columns in each row // left border pixels in image which does not have required pixels in 3x3 kernel, process them separately -inline void process_left_border_columns_pln_pln(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32u kernelSize, Rpp32u padLength, +template +inline void process_left_border_columns_pln_pln(T **srcPtrTemp, T *dstPtrTemp, Rpp32u kernelSize, Rpp32u padLength, Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, Rpp32f *filterTensor) { for (int k = 0; k < padLength; k++) @@ -67,12 +77,26 @@ inline void sobel_filter_bidirection_generic_tensor(T **srcPtrTemp, T *dstPtrTem // find the colKernelLoopLimit based on columnIndex get_kernel_loop_limit(columnIndex, columnKernelLoopLimit, padLength, unpaddedWidth); - for (int i = 0; i < rowKernelLoopLimit; i++) + if constexpr (std::is_same::value) { - for (int j = 0, k = 0 ; j < columnKernelLoopLimit; j++, k += channels) + for (int i = 0; i < rowKernelLoopLimit; i++) { - accumX += static_cast(srcPtrTemp[i][k]) * filterXTensor[i * kernelSize + j]; - accumY += static_cast(srcPtrTemp[i][k]) * filterYTensor[i * kernelSize + j]; + for (int j = 0, k = 0 ; j < columnKernelLoopLimit; j++, k += channels) + { + accumX += static_cast(srcPtrTemp[i][k] + 128) * filterXTensor[i * kernelSize + j]; + accumY += static_cast(srcPtrTemp[i][k] + 128) * filterYTensor[i * kernelSize + j]; + } + } + } + else + { + for (int i = 0; i < rowKernelLoopLimit; i++) + { + for (int j = 0, k = 0 ; j < columnKernelLoopLimit; j++, k += channels) + { + accumX += static_cast(srcPtrTemp[i][k]) * filterXTensor[i * kernelSize + j]; + accumY += static_cast(srcPtrTemp[i][k]) * filterYTensor[i * kernelSize + j]; + } } } @@ -94,7 +118,8 @@ inline void sobel_filter_bidirection_generic_tensor(T **srcPtrTemp, T *dstPtrTem // process padLength number of columns in each row // left border pixels in image which does not have required pixels in 3x3 kernel, process them separately -inline void process_left_border_columns_pln_pln(Rpp8u **srcPtrTemp, Rpp8u *dstPtrTemp, Rpp32u kernelSize, Rpp32u padLength, +template +inline void process_left_border_columns_pln_pln(T **srcPtrTemp, T *dstPtrTemp, Rpp32u kernelSize, Rpp32u padLength, Rpp32u unpaddedWidth, Rpp32s rowKernelLoopLimit, Rpp32f *filterXTensor, Rpp32f *filterYTensor) { for (int k = 0; k < padLength; k++) @@ -108,7 +133,7 @@ Rpp32f sobel3x3X[9] = {-1, 0, 1, -2, 0, 2, -1, 0, 1}; Rpp32f sobel3x3Y[9] = {-1, -2, -1, 0, 0, 0, 1, 2, 1}; // load function for 3x3 kernel size -inline void rpp_load_sobel_filter_char_3x3_host(__m256 *pRow, Rpp8u **srcPtrTemp, Rpp32s rowKernelLoopLimit) +inline void rpp_load_sobel_filter_3x3_host(__m256 *pRow, Rpp8u **srcPtrTemp, Rpp32s rowKernelLoopLimit) { // irrespective of row location, we need to load 2 rows for 3x3 kernel rpp_load16_u8_to_f32_avx(srcPtrTemp[0], &pRow[0]); @@ -124,16 +149,91 @@ inline void rpp_load_sobel_filter_char_3x3_host(__m256 *pRow, Rpp8u **srcPtrTemp } } -RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, - RpptDescPtr srcDescPtr, - Rpp8u *dstPtr, - RpptDescPtr dstDescPtr, - Rpp32u sobelType, - Rpp32u kernelSize, - RpptROIPtr roiTensorPtrSrc, - RpptRoiType roiType, - RppLayoutParams layoutParams, - rpp::Handle& handle) +inline void rpp_load_sobel_filter_3x3_host(__m256 *pRow, Rpp8s **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 2 rows for 3x3 kernel + rpp_load16_i8_to_f32_avx(srcPtrTemp[0], &pRow[0]); + rpp_load16_i8_to_f32_avx(srcPtrTemp[1], &pRow[2]); + + // if rowKernelLoopLimit is 3 load values from 3rd row pointer else set it 0 + if (rowKernelLoopLimit == 3) + rpp_load16_i8_to_f32_avx(srcPtrTemp[2], &pRow[4]); + else + { + pRow[4] = avx_p0; + pRow[5] = avx_p0; + } +} + +inline void rpp_load_sobel_filter_3x3_host(__m256 *pRow, Rpp32f **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 2 rows for 3x3 kernel + rpp_load16_f32_to_f32_avx(srcPtrTemp[0], &pRow[0]); + rpp_load16_f32_to_f32_avx(srcPtrTemp[1], &pRow[2]); + + // if rowKernelLoopLimit is 3 load values from 3rd row pointer else set it 0 + if (rowKernelLoopLimit == 3) + rpp_load16_f32_to_f32_avx(srcPtrTemp[2], &pRow[4]); + else + { + pRow[4] = avx_p0; + pRow[5] = avx_p0; + } +} + +// load function for 3x3 kernel size +inline void rpp_load_sobel_filter_3x3_host(__m256 *pRow, Rpp16f **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 2 rows for 3x3 kernel + pRow[0] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[0])))); + pRow[1] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[0] + 8)))); + pRow[2] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[1])))); + pRow[3] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[1] + 8)))); + + // if rowKernelLoopLimit is 3 load values from 3rd row pointer else set it 0 + if (rowKernelLoopLimit == 3) + { + pRow[4] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[2])))); + pRow[5] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[2] + 8)))); + } + else + { + pRow[4] = avx_px0; + pRow[5] = avx_px0; + } +} + +inline void rpp_sobel_store16(Rpp8u *dstPtrTemp, __m256 *pDst) +{ + rpp_store16_f32_to_u8_avx(dstPtrTemp, pDst); +} + +inline void rpp_sobel_store16(Rpp8s *dstPtrTemp, __m256 *pDst) +{ + rpp_store16_f32_to_i8_avx(dstPtrTemp, pDst); +} + +inline void rpp_sobel_store16(Rpp32f *dstPtrTemp, __m256 *pDst) +{ + rpp_store16_f32_to_f32_avx(dstPtrTemp, pDst); +} + +inline void rpp_sobel_store16(Rpp16f *dstPtrTemp, __m256 *pDst) +{ + rpp_store16_f32_to_f16_avx(dstPtrTemp, pDst); +} + +template +RppStatus sobel_filter_host_tensor(T *srcPtr, + RpptDescPtr srcDescPtr, + T *dstPtr, + RpptDescPtr dstDescPtr, + Rpp32u sobelType, + Rpp32u kernelSize, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + RppLayoutParams layoutParams, + rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; Rpp32u numThreads = handle.GetNumThreads(); @@ -146,7 +246,7 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, RpptROIPtr roiPtrInput = &roiTensorPtrSrc[batchCount]; compute_roi_validation_host(roiPtrInput, &roi, &roiDefault, roiType); - Rpp8u *srcPtrImage, *dstPtrImage; + T *srcPtrImage, *dstPtrImage; srcPtrImage = srcPtr + batchCount * srcDescPtr->strides.nStride; dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride; @@ -157,12 +257,26 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, bool combined = (sobelType == 2); Rpp32f *filter, *filterX, *filterY; - Rpp8u *srcPtrChannel, *dstPtrChannel; +#if __AVX2__ + __m256 pMax, pMin; + if constexpr (std::is_same::value || std::is_same::value) + { + pMax = avx_p255; + pMin = avx_p0; + } + else + { + pMax = avx_p1; + pMin = avx_p0; + } +#endif + + T *srcPtrChannel, *dstPtrChannel; srcPtrChannel = srcPtrImage + (roi.xywhROI.xy.y * srcDescPtr->strides.hStride) + (roi.xywhROI.xy.x * layoutParams.bufferMultiplier); dstPtrChannel = dstPtrImage; if ((kernelSize == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW) && (srcDescPtr->c == 1)) { - Rpp8u *srcPtrRow[3], *dstPtrRow; + T *srcPtrRow[3], *dstPtrRow; for (int i = 0; i < 3; i++) srcPtrRow[i] = srcPtrChannel + i * srcDescPtr->strides.hStride; dstPtrRow = dstPtrChannel; @@ -192,8 +306,8 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, { int vectorLoopCount = 0; bool padLengthRows = (i < padLength) ? 1: 0; - Rpp8u *srcPtrTemp[3] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2]}; - Rpp8u *dstPtrTemp = dstPtrRow; + T *srcPtrTemp[3] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2]}; + T *dstPtrTemp = dstPtrRow; // get the number of rows needs to be loaded for the corresponding row Rpp32s rowKernelLoopLimit = kernelSize; @@ -205,7 +319,7 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, for (; vectorLoopCount < alignedLength; vectorLoopCount += 14) { __m256 pRow[6], pDst[2], pDstX[2], pDstY[2]; - rpp_load_sobel_filter_char_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); + rpp_load_sobel_filter_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); for (int k = 0; k < 2; k++) { pDstX[k] = avx_p0; @@ -242,19 +356,19 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); pDstY[1] = _mm256_add_ps(pDstY[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); } - pDstX[0] = _mm256_min_ps(_mm256_max_ps(pDstX[0], avx_p0), avx_p255); - pDstY[0] = _mm256_min_ps(_mm256_max_ps(pDstY[0], avx_p0), avx_p255); + pDstX[0] = _mm256_min_ps(_mm256_max_ps(pDstX[0], pMin), pMax); + pDstY[0] = _mm256_min_ps(_mm256_max_ps(pDstY[0], pMin), pMax); pDstX[0] = _mm256_mul_ps(pDstX[0], pDstX[0]); pDstY[0] = _mm256_mul_ps(pDstY[0], pDstY[0]); pDst[0] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[0], pDstY[0])); - pDstX[1] = _mm256_min_ps(_mm256_max_ps(pDstX[1], avx_p0), avx_p255); - pDstY[1] = _mm256_min_ps(_mm256_max_ps(pDstY[1], avx_p0), avx_p255); + pDstX[1] = _mm256_min_ps(_mm256_max_ps(pDstX[1], pMin), pMax); + pDstY[1] = _mm256_min_ps(_mm256_max_ps(pDstY[1], pMin), pMax); pDstX[1] = _mm256_mul_ps(pDstX[1], pDstX[1]); pDstY[1] = _mm256_mul_ps(pDstY[1], pDstY[1]); pDst[1] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[1], pDstY[1])); - rpp_store16_f32_to_u8_avx(dstPtrTemp, pDst); + rpp_sobel_store16(dstPtrTemp, pDst); increment_row_ptrs(srcPtrTemp, kernelSize, 14); dstPtrTemp += 14; } @@ -295,8 +409,8 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, { int vectorLoopCount = 0; bool padLengthRows = (i < padLength) ? 1: 0; - Rpp8u *srcPtrTemp[3] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2]}; - Rpp8u *dstPtrTemp = dstPtrRow; + T *srcPtrTemp[3] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2]}; + T *dstPtrTemp = dstPtrRow; // get the number of rows needs to be loaded for the corresponding row Rpp32s rowKernelLoopLimit = kernelSize; @@ -308,7 +422,7 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, for (; vectorLoopCount < alignedLength; vectorLoopCount += 14) { __m256 pRow[6], pDst[2]; - rpp_load_sobel_filter_char_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); + rpp_load_sobel_filter_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); pDst[0] = avx_p0; pDst[1] = avx_p0; for (int k = 0; k < 3; k++) @@ -327,7 +441,7 @@ RppStatus sobel_filter_u8_u8_host_tensor(Rpp8u *srcPtr, pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); pDst[1] = _mm256_add_ps(pDst[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); } - rpp_store16_f32_to_u8_avx(dstPtrTemp, pDst); + rpp_sobel_store16(dstPtrTemp, pDst); increment_row_ptrs(srcPtrTemp, kernelSize, 14); dstPtrTemp += 14; } diff --git a/src/modules/rppt_tensor_filter_augmentations.cpp b/src/modules/rppt_tensor_filter_augmentations.cpp index 833c7bea1..a159044a2 100644 --- a/src/modules/rppt_tensor_filter_augmentations.cpp +++ b/src/modules/rppt_tensor_filter_augmentations.cpp @@ -48,55 +48,55 @@ RppStatus rppt_sobel_filter_host(RppPtr_t srcPtr, if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8)) { - sobel_filter_u8_u8_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, - srcDescPtr, - static_cast(dstPtr) + dstDescPtr->offsetInBytes, - dstDescPtr, - sobelType, - kernelSize, - roiTensorPtrSrc, - roiType, - layoutParams, - rpp::deref(rppHandle)); + sobel_filter_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, + srcDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + sobelType, + kernelSize, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { - // sobel_filter_f16_f16_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), - // srcDescPtr, - // reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), - // dstDescPtr, - // sobelType, - // kernelSize, - // roiTensorPtrSrc, - // roiType, - // layoutParams, - // rpp::deref(rppHandle)); + sobel_filter_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), + srcDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + sobelType, + kernelSize, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { - // sobel_filter_f32_f32_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), - // srcDescPtr, - // reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), - // dstDescPtr, - // sobelType, - // kernelSize, - // roiTensorPtrSrc, - // roiType, - // layoutParams, - // rpp::deref(rppHandle)); + sobel_filter_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), + srcDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + sobelType, + kernelSize, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { - // sobel_filter_i8_i8_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, - // srcDescPtr, - // static_cast(dstPtr) + dstDescPtr->offsetInBytes, - // dstDescPtr, - // sobelType, - // kernelSize, - // roiTensorPtrSrc, - // roiType, - // layoutParams, - // rpp::deref(rppHandle)); + sobel_filter_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, + srcDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + sobelType, + kernelSize, + roiTensorPtrSrc, + roiType, + layoutParams, + rpp::deref(rppHandle)); } return RPP_SUCCESS; } From 02b9d4f12045c901ecf8982e594af1a119d8d986 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Tue, 30 Jul 2024 06:46:29 +0000 Subject: [PATCH 11/31] added support to convert RGB images to grey scale images before passing to sobel filter --- src/modules/cpu/kernel/sobel_filter.hpp | 261 ++++++++---------- .../rppt_tensor_filter_augmentations.cpp | 29 +- 2 files changed, 137 insertions(+), 153 deletions(-) diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index c82733da7..e4128a5b2 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -232,7 +232,6 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, Rpp32u kernelSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, - RppLayoutParams layoutParams, rpp::Handle& handle) { RpptROI roiDefault = {0, 0, (Rpp32s)srcDescPtr->w, (Rpp32s)srcDescPtr->h}; @@ -251,7 +250,7 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, dstPtrImage = dstPtr + batchCount * dstDescPtr->strides.nStride; Rpp32u padLength = kernelSize / 2; - Rpp32u bufferLength = roi.xywhROI.roiWidth * layoutParams.bufferMultiplier; + Rpp32u bufferLength = roi.xywhROI.roiWidth; Rpp32u unpaddedHeight = roi.xywhROI.roiHeight - padLength; Rpp32u unpaddedWidth = roi.xywhROI.roiWidth - padLength; bool combined = (sobelType == 2); @@ -272,7 +271,7 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, #endif T *srcPtrChannel, *dstPtrChannel; - srcPtrChannel = srcPtrImage + (roi.xywhROI.xy.y * srcDescPtr->strides.hStride) + (roi.xywhROI.xy.x * layoutParams.bufferMultiplier); + srcPtrChannel = srcPtrImage + (roi.xywhROI.xy.y * srcDescPtr->strides.hStride) + roi.xywhROI.xy.x; dstPtrChannel = dstPtrImage; if ((kernelSize == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW) && (srcDescPtr->c == 1)) { @@ -296,96 +295,87 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, /* exclude 2 * padLength number of columns from alignedLength calculation since padLength number of columns from the beginning and end of each row will be computed using raw c code */ Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 16) * 16; - for (int c = 0; c < srcDescPtr->c; c++) + for(int i = 0; i < roi.xywhROI.roiHeight; i++) { - srcPtrRow[0] = srcPtrChannel; - srcPtrRow[1] = srcPtrRow[0] + srcDescPtr->strides.hStride; - srcPtrRow[2] = srcPtrRow[1] + srcDescPtr->strides.hStride; - dstPtrRow = dstPtrChannel; - for(int i = 0; i < roi.xywhROI.roiHeight; i++) - { - int vectorLoopCount = 0; - bool padLengthRows = (i < padLength) ? 1: 0; - T *srcPtrTemp[3] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2]}; - T *dstPtrTemp = dstPtrRow; - - // get the number of rows needs to be loaded for the corresponding row - Rpp32s rowKernelLoopLimit = kernelSize; - get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); - process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); - dstPtrTemp += padLength; + int vectorLoopCount = 0; + bool padLengthRows = (i < padLength) ? 1: 0; + T *srcPtrTemp[3] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2]}; + T *dstPtrTemp = dstPtrRow; + + // get the number of rows needs to be loaded for the corresponding row + Rpp32s rowKernelLoopLimit = kernelSize; + get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); + dstPtrTemp += padLength; #if __AVX2__ - // process alignedLength number of columns in each row - for (; vectorLoopCount < alignedLength; vectorLoopCount += 14) + // process alignedLength number of columns in each row + for (; vectorLoopCount < alignedLength; vectorLoopCount += 14) + { + __m256 pRow[6], pDst[2], pDstX[2], pDstY[2]; + rpp_load_sobel_filter_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); + for (int k = 0; k < 2; k++) { - __m256 pRow[6], pDst[2], pDstX[2], pDstY[2]; - rpp_load_sobel_filter_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); - for (int k = 0; k < 2; k++) - { - pDstX[k] = avx_p0; - pDstY[k] = avx_p0; - pDst[k] = avx_p0; - } - for (int k = 0; k < 3; k++) - { - __m256 pTemp[3], pRowShift[2]; - Rpp32s filterIndex = k * 3; - Rpp32s rowIndex = k * 2; - - pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1); - pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2); - pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterX[filterIndex]); - pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); - pDstX[0] = _mm256_add_ps(pDstX[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); - - pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterY[filterIndex]); - pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); - pDstY[0] = _mm256_add_ps(pDstY[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); - - pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 1), avx_pxMaskRotate0To1); - pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 3), avx_pxMaskRotate0To2); - pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilterX[filterIndex]); - pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); - pDstX[1] = _mm256_add_ps(pDstX[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); - - pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilterY[filterIndex]); - pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); - pDstY[1] = _mm256_add_ps(pDstY[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); - } - pDstX[0] = _mm256_min_ps(_mm256_max_ps(pDstX[0], pMin), pMax); - pDstY[0] = _mm256_min_ps(_mm256_max_ps(pDstY[0], pMin), pMax); - pDstX[0] = _mm256_mul_ps(pDstX[0], pDstX[0]); - pDstY[0] = _mm256_mul_ps(pDstY[0], pDstY[0]); - pDst[0] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[0], pDstY[0])); - - pDstX[1] = _mm256_min_ps(_mm256_max_ps(pDstX[1], pMin), pMax); - pDstY[1] = _mm256_min_ps(_mm256_max_ps(pDstY[1], pMin), pMax); - pDstX[1] = _mm256_mul_ps(pDstX[1], pDstX[1]); - pDstY[1] = _mm256_mul_ps(pDstY[1], pDstY[1]); - pDst[1] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[1], pDstY[1])); - - rpp_sobel_store16(dstPtrTemp, pDst); - increment_row_ptrs(srcPtrTemp, kernelSize, 14); - dstPtrTemp += 14; + pDstX[k] = avx_p0; + pDstY[k] = avx_p0; + pDst[k] = avx_p0; } -#endif - vectorLoopCount += padLength; - for (; vectorLoopCount < bufferLength; vectorLoopCount++) + for (int k = 0; k < 3; k++) { - sobel_filter_bidirection_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); - increment_row_ptrs(srcPtrTemp, kernelSize, 1); - dstPtrTemp++; + __m256 pTemp[3], pRowShift[2]; + Rpp32s filterIndex = k * 3; + Rpp32s rowIndex = k * 2; + + pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1); + pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2); + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterX[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); + pDstX[0] = _mm256_add_ps(pDstX[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterY[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); + pDstY[0] = _mm256_add_ps(pDstY[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + + pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 1), avx_pxMaskRotate0To1); + pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 3), avx_pxMaskRotate0To2); + pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilterX[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); + pDstX[1] = _mm256_add_ps(pDstX[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilterY[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); + pDstY[1] = _mm256_add_ps(pDstY[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); } - // for the first padLength rows, we need not increment the src row pointers to next rows - increment_row_ptrs(srcPtrRow, kernelSize, (!padLengthRows) ? srcDescPtr->strides.hStride : 0); - dstPtrRow += dstDescPtr->strides.hStride; + pDstX[0] = _mm256_min_ps(_mm256_max_ps(pDstX[0], pMin), pMax); + pDstY[0] = _mm256_min_ps(_mm256_max_ps(pDstY[0], pMin), pMax); + pDstX[0] = _mm256_mul_ps(pDstX[0], pDstX[0]); + pDstY[0] = _mm256_mul_ps(pDstY[0], pDstY[0]); + pDst[0] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[0], pDstY[0])); + + pDstX[1] = _mm256_min_ps(_mm256_max_ps(pDstX[1], pMin), pMax); + pDstY[1] = _mm256_min_ps(_mm256_max_ps(pDstY[1], pMin), pMax); + pDstX[1] = _mm256_mul_ps(pDstX[1], pDstX[1]); + pDstY[1] = _mm256_mul_ps(pDstY[1], pDstY[1]); + pDst[1] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[1], pDstY[1])); + + rpp_sobel_store16(dstPtrTemp, pDst); + increment_row_ptrs(srcPtrTemp, kernelSize, 14); + dstPtrTemp += 14; + } +#endif + vectorLoopCount += padLength; + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + sobel_filter_bidirection_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); + increment_row_ptrs(srcPtrTemp, kernelSize, 1); + dstPtrTemp++; } - srcPtrChannel += srcDescPtr->strides.cStride; - dstPtrChannel += dstDescPtr->strides.cStride; + // for the first padLength rows, we need not increment the src row pointers to next rows + increment_row_ptrs(srcPtrRow, kernelSize, (!padLengthRows) ? srcDescPtr->strides.hStride : 0); + dstPtrRow += dstDescPtr->strides.hStride; } } else @@ -399,66 +389,57 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, /* exclude 2 * padLength number of columns from alignedLength calculation since padLength number of columns from the beginning and end of each row will be computed using raw c code */ Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 16) * 16; - for (int c = 0; c < srcDescPtr->c; c++) + for(int i = 0; i < roi.xywhROI.roiHeight; i++) { - srcPtrRow[0] = srcPtrChannel; - srcPtrRow[1] = srcPtrRow[0] + srcDescPtr->strides.hStride; - srcPtrRow[2] = srcPtrRow[1] + srcDescPtr->strides.hStride; - dstPtrRow = dstPtrChannel; - for(int i = 0; i < roi.xywhROI.roiHeight; i++) - { - int vectorLoopCount = 0; - bool padLengthRows = (i < padLength) ? 1: 0; - T *srcPtrTemp[3] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2]}; - T *dstPtrTemp = dstPtrRow; - - // get the number of rows needs to be loaded for the corresponding row - Rpp32s rowKernelLoopLimit = kernelSize; - get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); - process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); - dstPtrTemp += padLength; + int vectorLoopCount = 0; + bool padLengthRows = (i < padLength) ? 1: 0; + T *srcPtrTemp[3] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2]}; + T *dstPtrTemp = dstPtrRow; + + // get the number of rows needs to be loaded for the corresponding row + Rpp32s rowKernelLoopLimit = kernelSize; + get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + dstPtrTemp += padLength; #if __AVX2__ - // process alignedLength number of columns in each row - for (; vectorLoopCount < alignedLength; vectorLoopCount += 14) + // process alignedLength number of columns in each row + for (; vectorLoopCount < alignedLength; vectorLoopCount += 14) + { + __m256 pRow[6], pDst[2]; + rpp_load_sobel_filter_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); + pDst[0] = avx_p0; + pDst[1] = avx_p0; + for (int k = 0; k < 3; k++) { - __m256 pRow[6], pDst[2]; - rpp_load_sobel_filter_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); - pDst[0] = avx_p0; - pDst[1] = avx_p0; - for (int k = 0; k < 3; k++) - { - __m256 pTemp[3]; - Rpp32s filterIndex = k * 3; - Rpp32s rowIndex = k * 2; - - pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilter[filterIndex]); - pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); - pDst[0] = _mm256_add_ps(pDst[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); - - pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilter[filterIndex]); - pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); - pDst[1] = _mm256_add_ps(pDst[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); - } - rpp_sobel_store16(dstPtrTemp, pDst); - increment_row_ptrs(srcPtrTemp, kernelSize, 14); - dstPtrTemp += 14; + __m256 pTemp[3]; + Rpp32s filterIndex = k * 3; + Rpp32s rowIndex = k * 2; + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilter[filterIndex]); + pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); + pDst[0] = _mm256_add_ps(pDst[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilter[filterIndex]); + pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); + pDst[1] = _mm256_add_ps(pDst[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); } + rpp_sobel_store16(dstPtrTemp, pDst); + increment_row_ptrs(srcPtrTemp, kernelSize, 14); + dstPtrTemp += 14; + } #endif - vectorLoopCount += padLength; - for (; vectorLoopCount < bufferLength; vectorLoopCount++) - { - sobel_filter_unidirection_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); - increment_row_ptrs(srcPtrTemp, kernelSize, 1); - dstPtrTemp++; - } - // for the first padLength rows, we need not increment the src row pointers to next rows - increment_row_ptrs(srcPtrRow, kernelSize, (!padLengthRows) ? srcDescPtr->strides.hStride : 0); - dstPtrRow += dstDescPtr->strides.hStride; + vectorLoopCount += padLength; + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + sobel_filter_unidirection_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + increment_row_ptrs(srcPtrTemp, kernelSize, 1); + dstPtrTemp++; } - srcPtrChannel += srcDescPtr->strides.cStride; - dstPtrChannel += dstDescPtr->strides.cStride; + // for the first padLength rows, we need not increment the src row pointers to next rows + increment_row_ptrs(srcPtrRow, kernelSize, (!padLengthRows) ? srcDescPtr->strides.hStride : 0); + dstPtrRow += dstDescPtr->strides.hStride; } } } diff --git a/src/modules/rppt_tensor_filter_augmentations.cpp b/src/modules/rppt_tensor_filter_augmentations.cpp index a159044a2..2f1a4ec1b 100644 --- a/src/modules/rppt_tensor_filter_augmentations.cpp +++ b/src/modules/rppt_tensor_filter_augmentations.cpp @@ -44,58 +44,61 @@ RppStatus rppt_sobel_filter_host(RppPtr_t srcPtr, RpptRoiType roiType, rppHandle_t rppHandle) { - RppLayoutParams layoutParams = get_layout_params(srcDescPtr->layout, srcDescPtr->c); + // convert image to grey scale if input is RGB image + RppPtr_t tempPtr = srcPtr; + if (srcDescPtr->c == 3) + { + RpptSubpixelLayout srcSubpixelLayout = RpptSubpixelLayout::RGBtype; + tempPtr = rpp::deref(rppHandle).GetInitHandle()->mem.mcpu.scratchBufferHost; + rppt_color_to_greyscale_host(srcPtr, srcDescPtr, tempPtr, dstDescPtr, srcSubpixelLayout, rppHandle); + } if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8)) { - sobel_filter_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, - srcDescPtr, + sobel_filter_host_tensor(static_cast(tempPtr) + srcDescPtr->offsetInBytes, + dstDescPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, sobelType, kernelSize, roiTensorPtrSrc, roiType, - layoutParams, rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { - sobel_filter_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), - srcDescPtr, + sobel_filter_host_tensor(reinterpret_cast(static_cast(tempPtr) + srcDescPtr->offsetInBytes), + dstDescPtr, reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, sobelType, kernelSize, roiTensorPtrSrc, roiType, - layoutParams, rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { - sobel_filter_host_tensor(reinterpret_cast(static_cast(srcPtr) + srcDescPtr->offsetInBytes), - srcDescPtr, + sobel_filter_host_tensor(reinterpret_cast(static_cast(tempPtr) + srcDescPtr->offsetInBytes), + dstDescPtr, reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), dstDescPtr, sobelType, kernelSize, roiTensorPtrSrc, roiType, - layoutParams, rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { - sobel_filter_host_tensor(static_cast(srcPtr) + srcDescPtr->offsetInBytes, - srcDescPtr, + sobel_filter_host_tensor(static_cast(tempPtr) + srcDescPtr->offsetInBytes, + dstDescPtr, static_cast(dstPtr) + dstDescPtr->offsetInBytes, dstDescPtr, sobelType, kernelSize, roiTensorPtrSrc, roiType, - layoutParams, rpp::deref(rppHandle)); } return RPP_SUCCESS; From 1b59d6c5dc1cdb4ef16cfc9f4d32fece33226bf3 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Tue, 30 Jul 2024 11:23:49 +0000 Subject: [PATCH 12/31] added support for 5x5 kernel size --- src/include/cpu/rpp_cpu_simd.hpp | 20 +- src/modules/cpu/kernel/sobel_filter.hpp | 570 +++++++++++++++++------- 2 files changed, 430 insertions(+), 160 deletions(-) diff --git a/src/include/cpu/rpp_cpu_simd.hpp b/src/include/cpu/rpp_cpu_simd.hpp index b9e79c146..8edb6093f 100644 --- a/src/include/cpu/rpp_cpu_simd.hpp +++ b/src/include/cpu/rpp_cpu_simd.hpp @@ -1045,13 +1045,13 @@ inline void rpp_glitch_load24_f32pkd3_to_f32pln3_avx(Rpp32f *srcPtr, __m256 *p, { __m128 p128[8]; Rpp32f *srcPtrTemp = srcPtr + srcLocs[0]; - p[0] = _mm256_setr_ps(*srcPtrTemp, *(srcPtrTemp + 3), *(srcPtrTemp + 6), *(srcPtrTemp + 9), + p[0] = _mm256_setr_ps(*srcPtrTemp, *(srcPtrTemp + 3), *(srcPtrTemp + 6), *(srcPtrTemp + 9), *(srcPtrTemp + 12), *(srcPtrTemp + 15), *(srcPtrTemp + 18), *(srcPtrTemp + 21)); srcPtrTemp = srcPtr + srcLocs[1]; - p[1] = _mm256_setr_ps(*(srcPtrTemp + 1), *(srcPtrTemp + 4), *(srcPtrTemp + 7), *(srcPtrTemp + 10), + p[1] = _mm256_setr_ps(*(srcPtrTemp + 1), *(srcPtrTemp + 4), *(srcPtrTemp + 7), *(srcPtrTemp + 10), *(srcPtrTemp + 13), *(srcPtrTemp + 16), *(srcPtrTemp + 19), *(srcPtrTemp + 22)); srcPtrTemp = srcPtr + srcLocs[2]; - p[2] = _mm256_setr_ps(*(srcPtrTemp + 2), *(srcPtrTemp + 5), *(srcPtrTemp + 8), *(srcPtrTemp + 11), + p[2] = _mm256_setr_ps(*(srcPtrTemp + 2), *(srcPtrTemp + 5), *(srcPtrTemp + 8), *(srcPtrTemp + 11), *(srcPtrTemp + 14), *(srcPtrTemp + 17), *(srcPtrTemp + 20), *(srcPtrTemp + 23)); } @@ -1099,7 +1099,7 @@ inline void rpp_glitch_load30_i8pkd3_to_i8pkd3_avx(Rpp8s *srcPtr, int * srcLocs, inline void rpp_glitch_load6_f32pkd3_to_f32pkd3_avx(Rpp32f *srcPtr, int * srcLocs, __m256 &p) { - p =_mm256_setr_ps(*(srcPtr + srcLocs[0]), *(srcPtr + srcLocs[1] + 1), *(srcPtr + srcLocs[2] + 2), *(srcPtr + srcLocs[0] + 3), + p =_mm256_setr_ps(*(srcPtr + srcLocs[0]), *(srcPtr + srcLocs[1] + 1), *(srcPtr + srcLocs[2] + 2), *(srcPtr + srcLocs[0] + 3), *(srcPtr + srcLocs[1] + 4), *(srcPtr + srcLocs[2] + 5), 0.0f, 0.0f); } @@ -1633,6 +1633,12 @@ inline void rpp_store16_f32_to_f32_avx(Rpp32f *dstPtr, __m256 *p) _mm256_storeu_ps(dstPtr + 8, p[1]); } +inline void rpp_load16_f16_to_f32_avx(Rpp16f *srcPtr, __m256 *p) +{ + p[0] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtr)))); + p[1] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtr + 8)))); +} + inline void rpp_store16_f32_to_f16_avx(Rpp16f *dstPtr, __m256 *p) { __m128i px128[2]; @@ -3862,8 +3868,8 @@ inline void rpp_resize_nn_load_u8pkd3(Rpp8u *srcRowPtrsForInterp, Rpp32s *loc, _ template inline void rpp_resize_nn_extract_pkd3_avx(T *srcRowPtrsForInterp, Rpp32s *loc, __m256i &p) { - p = _mm256_setr_epi8(*(srcRowPtrsForInterp + loc[0]), *(srcRowPtrsForInterp + loc[0] + 1), *(srcRowPtrsForInterp + loc[0] + 2), - *(srcRowPtrsForInterp + loc[1]), *(srcRowPtrsForInterp + loc[1] + 1), *(srcRowPtrsForInterp + loc[1] + 2), + p = _mm256_setr_epi8(*(srcRowPtrsForInterp + loc[0]), *(srcRowPtrsForInterp + loc[0] + 1), *(srcRowPtrsForInterp + loc[0] + 2), + *(srcRowPtrsForInterp + loc[1]), *(srcRowPtrsForInterp + loc[1] + 1), *(srcRowPtrsForInterp + loc[1] + 2), *(srcRowPtrsForInterp + loc[2]), *(srcRowPtrsForInterp + loc[2] + 1), *(srcRowPtrsForInterp + loc[2] + 2), *(srcRowPtrsForInterp + loc[3]), *(srcRowPtrsForInterp + loc[3] + 1), *(srcRowPtrsForInterp + loc[3] + 2), *(srcRowPtrsForInterp + loc[4]), *(srcRowPtrsForInterp + loc[4] + 1), *(srcRowPtrsForInterp + loc[4] + 2), @@ -3888,7 +3894,7 @@ inline void rpp_resize_nn_load_u8pln1(Rpp8u *srcRowPtrsForInterp, Rpp32s *loc, _ template inline void rpp_resize_nn_extract_pln1_avx(T *srcRowPtrsForInterp, Rpp32s *loc, __m256i &p) { - p = _mm256_setr_epi8(*(srcRowPtrsForInterp + loc[0]), *(srcRowPtrsForInterp + loc[1]), + p = _mm256_setr_epi8(*(srcRowPtrsForInterp + loc[0]), *(srcRowPtrsForInterp + loc[1]), *(srcRowPtrsForInterp + loc[2]), *(srcRowPtrsForInterp + loc[3]), *(srcRowPtrsForInterp + loc[4]), *(srcRowPtrsForInterp + loc[5]), *(srcRowPtrsForInterp + loc[6]), *(srcRowPtrsForInterp + loc[7]), diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index e4128a5b2..478078e0e 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -129,8 +129,22 @@ inline void process_left_border_columns_pln_pln(T **srcPtrTemp, T *dstPtrTemp, R } } -Rpp32f sobel3x3X[9] = {-1, 0, 1, -2, 0, 2, -1, 0, 1}; -Rpp32f sobel3x3Y[9] = {-1, -2, -1, 0, 0, 0, 1, 2, 1}; +Rpp32f sobel3x3X[9] = {-1, 0, 1, + -2, 0, 2, + -1, 0, 1}; +Rpp32f sobel3x3Y[9] = {-1, -2, -1, + 0, 0, 0, + 1, 2, 1}; +Rpp32f sobel5x5X[25] = {-5, -4, 0, 4, 5, + -8, -10, 0, 10, 8, + -10, -20, 0, 20, 10, + -8, -10, 0, 10, 8, + -5, -4, 0, 4, 5}; +Rpp32f sobel5x5Y[25] = {-5, -8, -10 -8, -5, + -4, -10, -20, -10, -4, + 0, 0, 0, 0, 0, + 4, 10, 20, 10, 4, + 5, 8, 10, 8, 5}; // load function for 3x3 kernel size inline void rpp_load_sobel_filter_3x3_host(__m256 *pRow, Rpp8u **srcPtrTemp, Rpp32s rowKernelLoopLimit) @@ -185,21 +199,82 @@ inline void rpp_load_sobel_filter_3x3_host(__m256 *pRow, Rpp32f **srcPtrTemp, Rp inline void rpp_load_sobel_filter_3x3_host(__m256 *pRow, Rpp16f **srcPtrTemp, Rpp32s rowKernelLoopLimit) { // irrespective of row location, we need to load 2 rows for 3x3 kernel - pRow[0] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[0])))); - pRow[1] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[0] + 8)))); - pRow[2] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[1])))); - pRow[3] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[1] + 8)))); + rpp_load16_f16_to_f32_avx(srcPtrTemp[0], &pRow[0]); + rpp_load16_f16_to_f32_avx(srcPtrTemp[1], &pRow[2]); // if rowKernelLoopLimit is 3 load values from 3rd row pointer else set it 0 if (rowKernelLoopLimit == 3) + rpp_load16_f16_to_f32_avx(srcPtrTemp[2], &pRow[4]); + else { - pRow[4] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[2])))); - pRow[5] = _mm256_cvtph_ps(_mm_castps_si128(_mm_loadu_ps(reinterpret_cast(srcPtrTemp[2] + 8)))); + pRow[4] = avx_p0; + pRow[5] = avx_p0; } - else +} + +// load function for 5x5 kernel size +inline void rpp_load_sobel_filter_5x5_host(__m256 *pRow, Rpp8u **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 3 rows for 5x5 kernel + rpp_load16_u8_to_f32_avx(srcPtrTemp[0], &pRow[0]); + rpp_load16_u8_to_f32_avx(srcPtrTemp[1], &pRow[2]); + rpp_load16_u8_to_f32_avx(srcPtrTemp[2], &pRow[4]); + + for (int k = 3; k < rowKernelLoopLimit; k++) + rpp_load16_u8_to_f32_avx(srcPtrTemp[k], &pRow[k * 2]); + for (int k = rowKernelLoopLimit; k < 5; k++) + { + pRow[k * 2] = avx_p0; + pRow[k * 2 + 1] = avx_p0; + } +} + +inline void rpp_load_sobel_filter_5x5_host(__m256 *pRow, Rpp8s **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 2 rows for 3x3 kernel + rpp_load16_i8_to_f32_avx(srcPtrTemp[0], &pRow[0]); + rpp_load16_i8_to_f32_avx(srcPtrTemp[1], &pRow[2]); + rpp_load16_i8_to_f32_avx(srcPtrTemp[2], &pRow[4]); + + for (int k = 3; k < rowKernelLoopLimit; k++) + rpp_load16_i8_to_f32_avx(srcPtrTemp[k], &pRow[k * 2]); + for (int k = rowKernelLoopLimit; k < 5; k++) { - pRow[4] = avx_px0; - pRow[5] = avx_px0; + pRow[k * 2] = avx_p0; + pRow[k * 2 + 1] = avx_p0; + } +} + +inline void rpp_load_sobel_filter_5x5_host(__m256 *pRow, Rpp32f **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 2 rows for 3x3 kernel + rpp_load16_f32_to_f32_avx(srcPtrTemp[0], &pRow[0]); + rpp_load16_f32_to_f32_avx(srcPtrTemp[1], &pRow[2]); + rpp_load16_f32_to_f32_avx(srcPtrTemp[2], &pRow[4]); + + for (int k = 3; k < rowKernelLoopLimit; k++) + rpp_load16_f32_to_f32_avx(srcPtrTemp[k], &pRow[k * 2]); + for (int k = rowKernelLoopLimit; k < 5; k++) + { + pRow[k * 2] = avx_p0; + pRow[k * 2 + 1] = avx_p0; + } +} + +// load function for 3x3 kernel size +inline void rpp_load_sobel_filter_5x5_host(__m256 *pRow, Rpp16f **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 2 rows for 3x3 kernel + rpp_load16_f16_to_f32_avx(srcPtrTemp[0], &pRow[0]); + rpp_load16_f16_to_f32_avx(srcPtrTemp[1], &pRow[2]); + rpp_load16_f16_to_f32_avx(srcPtrTemp[2], &pRow[4]); + + for (int k = 3; k < rowKernelLoopLimit; k++) + rpp_load16_f16_to_f32_avx(srcPtrTemp[k], &pRow[k * 2]); + for (int k = rowKernelLoopLimit; k < 5; k++) + { + pRow[k * 2] = avx_p0; + pRow[k * 2 + 1] = avx_p0; } } @@ -273,173 +348,362 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, T *srcPtrChannel, *dstPtrChannel; srcPtrChannel = srcPtrImage + (roi.xywhROI.xy.y * srcDescPtr->strides.hStride) + roi.xywhROI.xy.x; dstPtrChannel = dstPtrImage; - if ((kernelSize == 3) && (srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW) && (srcDescPtr->c == 1)) + if ((srcDescPtr->layout == RpptLayout::NCHW) && (dstDescPtr->layout == RpptLayout::NCHW) && (srcDescPtr->c == 1)) { - T *srcPtrRow[3], *dstPtrRow; - for (int i = 0; i < 3; i++) - srcPtrRow[i] = srcPtrChannel + i * srcDescPtr->strides.hStride; - dstPtrRow = dstPtrChannel; - - if (combined) + if (kernelSize == 3) { -#if __AVX2__ - __m256 pFilterX[9], pFilterY[9]; - filterX = sobel3x3X; - filterY = sobel3x3Y; - for (int i = 0; i < 9; i++) - { - pFilterX[i] = _mm256_set1_ps(filterX[i]); - pFilterY[i] = _mm256_set1_ps(filterY[i]); - } -#endif - /* exclude 2 * padLength number of columns from alignedLength calculation - since padLength number of columns from the beginning and end of each row will be computed using raw c code */ - Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 16) * 16; - for(int i = 0; i < roi.xywhROI.roiHeight; i++) + T *srcPtrRow[3], *dstPtrRow; + for (int i = 0; i < 3; i++) + srcPtrRow[i] = srcPtrChannel + i * srcDescPtr->strides.hStride; + dstPtrRow = dstPtrChannel; + + if (combined) { - int vectorLoopCount = 0; - bool padLengthRows = (i < padLength) ? 1: 0; - T *srcPtrTemp[3] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2]}; - T *dstPtrTemp = dstPtrRow; - - // get the number of rows needs to be loaded for the corresponding row - Rpp32s rowKernelLoopLimit = kernelSize; - get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); - process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); - dstPtrTemp += padLength; #if __AVX2__ - // process alignedLength number of columns in each row - for (; vectorLoopCount < alignedLength; vectorLoopCount += 14) + __m256 pFilterX[9], pFilterY[9]; + filterX = sobel3x3X; + filterY = sobel3x3Y; + for (int i = 0; i < 9; i++) + { + pFilterX[i] = _mm256_set1_ps(filterX[i]); + pFilterY[i] = _mm256_set1_ps(filterY[i]); + } +#endif + /* exclude 2 * padLength number of columns from alignedLength calculation + since padLength number of columns from the beginning and end of each row will be computed using raw c code */ + Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 16) * 16; + for(int i = 0; i < roi.xywhROI.roiHeight; i++) { - __m256 pRow[6], pDst[2], pDstX[2], pDstY[2]; - rpp_load_sobel_filter_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); - for (int k = 0; k < 2; k++) + int vectorLoopCount = 0; + bool padLengthRows = (i < padLength) ? 1: 0; + T *srcPtrTemp[3] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2]}; + T *dstPtrTemp = dstPtrRow; + + // get the number of rows needs to be loaded for the corresponding row + Rpp32s rowKernelLoopLimit = kernelSize; + get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); + dstPtrTemp += padLength; +#if __AVX2__ + // process alignedLength number of columns in each row + for (; vectorLoopCount < alignedLength; vectorLoopCount += 14) { - pDstX[k] = avx_p0; - pDstY[k] = avx_p0; - pDst[k] = avx_p0; + __m256 pRow[6], pDst[2], pDstX[2], pDstY[2]; + rpp_load_sobel_filter_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); + for (int k = 0; k < 2; k++) + { + pDstX[k] = avx_p0; + pDstY[k] = avx_p0; + pDst[k] = avx_p0; + } + for (int k = 0; k < 3; k++) + { + __m256 pTemp[3], pRowShift[2]; + Rpp32s filterIndex = k * 3; + Rpp32s rowIndex = k * 2; + + pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1); + pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2); + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterX[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); + pDstX[0] = _mm256_add_ps(pDstX[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterY[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); + pDstY[0] = _mm256_add_ps(pDstY[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + + pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 1), avx_pxMaskRotate0To1); + pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 3), avx_pxMaskRotate0To2); + pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilterX[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); + pDstX[1] = _mm256_add_ps(pDstX[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilterY[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); + pDstY[1] = _mm256_add_ps(pDstY[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + } + pDstX[0] = _mm256_min_ps(_mm256_max_ps(pDstX[0], pMin), pMax); + pDstY[0] = _mm256_min_ps(_mm256_max_ps(pDstY[0], pMin), pMax); + pDstX[0] = _mm256_mul_ps(pDstX[0], pDstX[0]); + pDstY[0] = _mm256_mul_ps(pDstY[0], pDstY[0]); + pDst[0] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[0], pDstY[0])); + + pDstX[1] = _mm256_min_ps(_mm256_max_ps(pDstX[1], pMin), pMax); + pDstY[1] = _mm256_min_ps(_mm256_max_ps(pDstY[1], pMin), pMax); + pDstX[1] = _mm256_mul_ps(pDstX[1], pDstX[1]); + pDstY[1] = _mm256_mul_ps(pDstY[1], pDstY[1]); + pDst[1] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[1], pDstY[1])); + + rpp_sobel_store16(dstPtrTemp, pDst); + increment_row_ptrs(srcPtrTemp, kernelSize, 14); + dstPtrTemp += 14; } - for (int k = 0; k < 3; k++) +#endif + vectorLoopCount += padLength; + for (; vectorLoopCount < bufferLength; vectorLoopCount++) { - __m256 pTemp[3], pRowShift[2]; - Rpp32s filterIndex = k * 3; - Rpp32s rowIndex = k * 2; - - pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1); - pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2); - pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterX[filterIndex]); - pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); - pDstX[0] = _mm256_add_ps(pDstX[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); - - pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterY[filterIndex]); - pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); - pDstY[0] = _mm256_add_ps(pDstY[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); - - pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 1), avx_pxMaskRotate0To1); - pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 3), avx_pxMaskRotate0To2); - pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilterX[filterIndex]); - pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); - pDstX[1] = _mm256_add_ps(pDstX[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); - - pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilterY[filterIndex]); - pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); - pDstY[1] = _mm256_add_ps(pDstY[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + sobel_filter_bidirection_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); + increment_row_ptrs(srcPtrTemp, kernelSize, 1); + dstPtrTemp++; } - pDstX[0] = _mm256_min_ps(_mm256_max_ps(pDstX[0], pMin), pMax); - pDstY[0] = _mm256_min_ps(_mm256_max_ps(pDstY[0], pMin), pMax); - pDstX[0] = _mm256_mul_ps(pDstX[0], pDstX[0]); - pDstY[0] = _mm256_mul_ps(pDstY[0], pDstY[0]); - pDst[0] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[0], pDstY[0])); - - pDstX[1] = _mm256_min_ps(_mm256_max_ps(pDstX[1], pMin), pMax); - pDstY[1] = _mm256_min_ps(_mm256_max_ps(pDstY[1], pMin), pMax); - pDstX[1] = _mm256_mul_ps(pDstX[1], pDstX[1]); - pDstY[1] = _mm256_mul_ps(pDstY[1], pDstY[1]); - pDst[1] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[1], pDstY[1])); - - rpp_sobel_store16(dstPtrTemp, pDst); - increment_row_ptrs(srcPtrTemp, kernelSize, 14); - dstPtrTemp += 14; + // for the first padLength rows, we need not increment the src row pointers to next rows + increment_row_ptrs(srcPtrRow, kernelSize, (!padLengthRows) ? srcDescPtr->strides.hStride : 0); + dstPtrRow += dstDescPtr->strides.hStride; } + } + else + { +#if __AVX2__ + __m256 pFilter[9]; + filter = (!sobelType) ? sobel3x3X : sobel3x3Y; + for (int i = 0; i < 9; i++) + pFilter[i] = _mm256_set1_ps(filter[i]); #endif - vectorLoopCount += padLength; - for (; vectorLoopCount < bufferLength; vectorLoopCount++) + /* exclude 2 * padLength number of columns from alignedLength calculation + since padLength number of columns from the beginning and end of each row will be computed using raw c code */ + Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 16) * 16; + for(int i = 0; i < roi.xywhROI.roiHeight; i++) { - sobel_filter_bidirection_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); - increment_row_ptrs(srcPtrTemp, kernelSize, 1); - dstPtrTemp++; + int vectorLoopCount = 0; + bool padLengthRows = (i < padLength) ? 1: 0; + T *srcPtrTemp[3] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2]}; + T *dstPtrTemp = dstPtrRow; + + // get the number of rows needs to be loaded for the corresponding row + Rpp32s rowKernelLoopLimit = kernelSize; + get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + dstPtrTemp += padLength; +#if __AVX2__ + // process alignedLength number of columns in each row + for (; vectorLoopCount < alignedLength; vectorLoopCount += 14) + { + __m256 pRow[6], pDst[2]; + rpp_load_sobel_filter_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); + pDst[0] = avx_p0; + pDst[1] = avx_p0; + for (int k = 0; k < 3; k++) + { + __m256 pTemp[3]; + Rpp32s filterIndex = k * 3; + Rpp32s rowIndex = k * 2; + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilter[filterIndex]); + pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); + pDst[0] = _mm256_add_ps(pDst[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilter[filterIndex]); + pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); + pDst[1] = _mm256_add_ps(pDst[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + } + rpp_sobel_store16(dstPtrTemp, pDst); + increment_row_ptrs(srcPtrTemp, kernelSize, 14); + dstPtrTemp += 14; + } +#endif + vectorLoopCount += padLength; + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + sobel_filter_unidirection_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + increment_row_ptrs(srcPtrTemp, kernelSize, 1); + dstPtrTemp++; + } + // for the first padLength rows, we need not increment the src row pointers to next rows + increment_row_ptrs(srcPtrRow, kernelSize, (!padLengthRows) ? srcDescPtr->strides.hStride : 0); + dstPtrRow += dstDescPtr->strides.hStride; } - // for the first padLength rows, we need not increment the src row pointers to next rows - increment_row_ptrs(srcPtrRow, kernelSize, (!padLengthRows) ? srcDescPtr->strides.hStride : 0); - dstPtrRow += dstDescPtr->strides.hStride; } } - else + else if (kernelSize == 5) { -#if __AVX2__ - __m256 pFilter[9]; - filter = (!sobelType) ? sobel3x3Y : sobel3x3X; - for (int i = 0; i < 9; i++) - pFilter[i] = _mm256_set1_ps(filter[i]); -#endif - /* exclude 2 * padLength number of columns from alignedLength calculation - since padLength number of columns from the beginning and end of each row will be computed using raw c code */ - Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 16) * 16; - for(int i = 0; i < roi.xywhROI.roiHeight; i++) + T *srcPtrRow[5], *dstPtrRow; + for (int i = 0; i < 5; i++) + srcPtrRow[i] = srcPtrChannel + i * srcDescPtr->strides.hStride; + dstPtrRow = dstPtrChannel; + + if (combined) { - int vectorLoopCount = 0; - bool padLengthRows = (i < padLength) ? 1: 0; - T *srcPtrTemp[3] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2]}; - T *dstPtrTemp = dstPtrRow; - - // get the number of rows needs to be loaded for the corresponding row - Rpp32s rowKernelLoopLimit = kernelSize; - get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); - process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); - dstPtrTemp += padLength; #if __AVX2__ - // process alignedLength number of columns in each row - for (; vectorLoopCount < alignedLength; vectorLoopCount += 14) + __m256 pFilterX[25], pFilterY[25]; + filterX = sobel5x5X; + filterY = sobel5x5Y; + for (int i = 0; i < 25; i++) + { + pFilterX[i] = _mm256_set1_ps(filterX[i]); + pFilterY[i] = _mm256_set1_ps(filterY[i]); + } +#endif + /* exclude 2 * padLength number of columns from alignedLength calculation + since padLength number of columns from the beginning and end of each row will be computed using raw c code */ + Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 16) * 16; + for(int i = 0; i < roi.xywhROI.roiHeight; i++) { - __m256 pRow[6], pDst[2]; - rpp_load_sobel_filter_3x3_host(pRow, srcPtrTemp, rowKernelLoopLimit); - pDst[0] = avx_p0; - pDst[1] = avx_p0; - for (int k = 0; k < 3; k++) + int vectorLoopCount = 0; + bool padLengthRows = (i < padLength) ? 1: 0; + T *srcPtrTemp[5] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2], srcPtrRow[3], srcPtrRow[4]}; + T *dstPtrTemp = dstPtrRow; + + // get the number of rows needs to be loaded for the corresponding row + Rpp32s rowKernelLoopLimit = kernelSize; + get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); + dstPtrTemp += padLength; +#if __AVX2__ + // process alignedLength number of columns in each row + for (; vectorLoopCount < alignedLength; vectorLoopCount += 12) + { + __m256 pRow[10], pDst[2], pDstX[2], pDstY[2]; + rpp_load_sobel_filter_5x5_host(pRow, srcPtrTemp, rowKernelLoopLimit); + for (int k = 0; k < 2; k++) + { + pDstX[k] = avx_p0; + pDstY[k] = avx_p0; + pDst[k] = avx_p0; + } + for (int k = 0; k < 5; k++) + { + __m256 pTemp[5], pRowShift[4]; + Rpp32s filterIndex = k * 5; + Rpp32s rowIndex = k * 2; + + pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1); + pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2); + pRowShift[2] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 7), avx_pxMaskRotate0To3); + pRowShift[3] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 15), avx_pxMaskRotate0To4); + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterX[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); + pTemp[3] = _mm256_mul_ps(pRowShift[2], pFilterX[filterIndex + 3]); + pTemp[4] = _mm256_mul_ps(pRowShift[3], pFilterX[filterIndex + 4]); + pDstX[0] = _mm256_add_ps(pDstX[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], _mm256_add_ps(pTemp[1], pTemp[2])), _mm256_add_ps(pTemp[3], pTemp[4]))); + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterY[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); + pTemp[3] = _mm256_mul_ps(pRowShift[2], pFilterY[filterIndex + 3]); + pTemp[4] = _mm256_mul_ps(pRowShift[3], pFilterY[filterIndex + 4]); + pDstY[0] = _mm256_add_ps(pDstY[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], _mm256_add_ps(pTemp[1], pTemp[2])), _mm256_add_ps(pTemp[3], pTemp[4]))); + + pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 1), avx_pxMaskRotate0To1); + pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 3), avx_pxMaskRotate0To2); + pRowShift[2] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 7), avx_pxMaskRotate0To3); + pRowShift[3] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 15), avx_pxMaskRotate0To4); + pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilterX[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); + pTemp[3] = _mm256_mul_ps(pRowShift[2], pFilterX[filterIndex + 3]); + pTemp[4] = _mm256_mul_ps(pRowShift[3], pFilterX[filterIndex + 4]); + pDstX[1] = _mm256_add_ps(pDstX[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], _mm256_add_ps(pTemp[1], pTemp[2])), _mm256_add_ps(pTemp[3], pTemp[4]))); + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilterY[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); + pTemp[3] = _mm256_mul_ps(pRowShift[2], pFilterY[filterIndex + 3]); + pTemp[4] = _mm256_mul_ps(pRowShift[3], pFilterY[filterIndex + 4]); + pDstY[1] = _mm256_add_ps(pDstY[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], _mm256_add_ps(pTemp[1], pTemp[2])), _mm256_add_ps(pTemp[3], pTemp[4]))); + } + pDstX[0] = _mm256_min_ps(_mm256_max_ps(pDstX[0], pMin), pMax); + pDstY[0] = _mm256_min_ps(_mm256_max_ps(pDstY[0], pMin), pMax); + pDstX[0] = _mm256_mul_ps(pDstX[0], pDstX[0]); + pDstY[0] = _mm256_mul_ps(pDstY[0], pDstY[0]); + pDst[0] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[0], pDstY[0])); + + pDstX[1] = _mm256_min_ps(_mm256_max_ps(pDstX[1], pMin), pMax); + pDstY[1] = _mm256_min_ps(_mm256_max_ps(pDstY[1], pMin), pMax); + pDstX[1] = _mm256_mul_ps(pDstX[1], pDstX[1]); + pDstY[1] = _mm256_mul_ps(pDstY[1], pDstY[1]); + pDst[1] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[1], pDstY[1])); + + rpp_sobel_store16(dstPtrTemp, pDst); + increment_row_ptrs(srcPtrTemp, kernelSize, 12); + dstPtrTemp += 12; + } +#endif + vectorLoopCount += padLength; + for (; vectorLoopCount < bufferLength; vectorLoopCount++) { - __m256 pTemp[3]; - Rpp32s filterIndex = k * 3; - Rpp32s rowIndex = k * 2; - - pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilter[filterIndex]); - pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); - pDst[0] = _mm256_add_ps(pDst[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); - - pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilter[filterIndex]); - pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); - pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); - pDst[1] = _mm256_add_ps(pDst[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); + sobel_filter_bidirection_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); + increment_row_ptrs(srcPtrTemp, kernelSize, 1); + dstPtrTemp++; } - rpp_sobel_store16(dstPtrTemp, pDst); - increment_row_ptrs(srcPtrTemp, kernelSize, 14); - dstPtrTemp += 14; + // for the first padLength rows, we need not increment the src row pointers to next rows + increment_row_ptrs(srcPtrRow, kernelSize, (!padLengthRows) ? srcDescPtr->strides.hStride : 0); + dstPtrRow += dstDescPtr->strides.hStride; } + } + else + { +#if __AVX2__ + __m256 pFilter[25]; + filter = (!sobelType) ? sobel5x5X : sobel5x5Y; + for (int i = 0; i < 25; i++) + pFilter[i] = _mm256_set1_ps(filter[i]); #endif - vectorLoopCount += padLength; - for (; vectorLoopCount < bufferLength; vectorLoopCount++) + /* exclude 2 * padLength number of columns from alignedLength calculation + since padLength number of columns from the beginning and end of each row will be computed using raw c code */ + Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 16) * 16; + for(int i = 0; i < roi.xywhROI.roiHeight; i++) { - sobel_filter_unidirection_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); - increment_row_ptrs(srcPtrTemp, kernelSize, 1); - dstPtrTemp++; + int vectorLoopCount = 0; + bool padLengthRows = (i < padLength) ? 1: 0; + T *srcPtrTemp[5] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2], srcPtrRow[3], srcPtrRow[4]}; + T *dstPtrTemp = dstPtrRow; + + // get the number of rows needs to be loaded for the corresponding row + Rpp32s rowKernelLoopLimit = kernelSize; + get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + dstPtrTemp += padLength; +#if __AVX2__ + // process alignedLength number of columns in each row + for (; vectorLoopCount < alignedLength; vectorLoopCount += 12) + { + __m256 pRow[10], pDst[2]; + rpp_load_sobel_filter_5x5_host(pRow, srcPtrTemp, rowKernelLoopLimit); + pDst[0] = avx_p0; + pDst[1] = avx_p0; + for (int k = 0; k < 5; k++) + { + __m256 pTemp[5]; + Rpp32s filterIndex = k * 5; + Rpp32s rowIndex = k * 2; + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilter[filterIndex]); + pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); + pTemp[3] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 7), avx_pxMaskRotate0To3), pFilter[filterIndex + 3]); + pTemp[4] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 15), avx_pxMaskRotate0To4), pFilter[filterIndex + 4]); + pDst[0] = _mm256_add_ps(pDst[0], _mm256_add_ps(_mm256_add_ps(pTemp[0], _mm256_add_ps(pTemp[1], pTemp[2])), _mm256_add_ps(pTemp[3], pTemp[4]))); + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex + 1], pFilter[filterIndex]); + pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); + pTemp[3] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 7), avx_pxMaskRotate0To3), pFilter[filterIndex + 3]); + pTemp[4] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 15), avx_pxMaskRotate0To4), pFilter[filterIndex + 4]); + pDst[1] = _mm256_add_ps(pDst[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], _mm256_add_ps(pTemp[1], pTemp[2])), _mm256_add_ps(pTemp[3], pTemp[4]))); + } + rpp_sobel_store16(dstPtrTemp, pDst); + increment_row_ptrs(srcPtrTemp, kernelSize, 12); + dstPtrTemp += 12; + } +#endif + vectorLoopCount += padLength; + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + sobel_filter_unidirection_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + increment_row_ptrs(srcPtrTemp, kernelSize, 1); + dstPtrTemp++; + } + // for the first padLength rows, we need not increment the src row pointers to next rows + increment_row_ptrs(srcPtrRow, kernelSize, (!padLengthRows) ? srcDescPtr->strides.hStride : 0); + dstPtrRow += dstDescPtr->strides.hStride; } - // for the first padLength rows, we need not increment the src row pointers to next rows - increment_row_ptrs(srcPtrRow, kernelSize, (!padLengthRows) ? srcDescPtr->strides.hStride : 0); - dstPtrRow += dstDescPtr->strides.hStride; } } } From 49bb2911da0b67a52895ab3d3d4bb2b79543137e Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Tue, 30 Jul 2024 13:46:19 +0000 Subject: [PATCH 13/31] added support for 7x7 kernel size --- src/include/cpu/rpp_cpu_simd.hpp | 17 ++ src/modules/cpu/kernel/sobel_filter.hpp | 285 ++++++++++++++++++++++-- 2 files changed, 288 insertions(+), 14 deletions(-) diff --git a/src/include/cpu/rpp_cpu_simd.hpp b/src/include/cpu/rpp_cpu_simd.hpp index 8edb6093f..17aea7c26 100644 --- a/src/include/cpu/rpp_cpu_simd.hpp +++ b/src/include/cpu/rpp_cpu_simd.hpp @@ -1421,6 +1421,23 @@ inline void rpp_load8_u8_to_f32_avx(Rpp8u *srcPtr, __m256 *p) p[0] = _mm256_cvtepi32_ps(_mm256_setr_m128i(_mm_shuffle_epi8(px, xmm_pxMask00To03), _mm_shuffle_epi8(px, xmm_pxMask04To07))); /* Contains pixels 01-08 */ } +inline void rpp_store8_f32_to_u8_avx(Rpp8u *dstPtrTemp, __m256 *pDst) +{ + __m256i px1 = _mm256_cvtps_epi32(pDst[0]); // Pack int32 values to uint16 + __m128i px2 = _mm_packus_epi32(_mm256_castsi256_si128(px1), _mm256_extracti128_si256(px1, 1)); // Pack uint16 values to uint8 + __m128i px3 = _mm_packus_epi16(px2, _mm_setzero_si128()); + _mm_storeu_si64((__m128i*)dstPtrTemp, px3); // Store the result to dst +} + +inline void rpp_store8_f32_to_i8_avx(Rpp8s *dstPtrTemp, __m256 *pDst) +{ + __m256i px1 = _mm256_cvtps_epi32(pDst[0]); + __m128i px2 = _mm_packus_epi32(_mm256_castsi256_si128(px1), _mm256_extracti128_si256(px1, 1)); + __m128i px3 = _mm_packus_epi16(px2, _mm_setzero_si128()); + px3 = _mm_sub_epi8(px3, xmm_pxConvertI8); // convert back to i8 for px0 store // + _mm_storeu_si64((__m128i*)dstPtrTemp, px3); // Store the result to dst +} + inline void rpp_load16_u8_to_f32_mirror_avx(Rpp8u *srcPtr, __m256 *p) { __m128i px; diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index 478078e0e..1142c60b4 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -135,16 +135,30 @@ Rpp32f sobel3x3X[9] = {-1, 0, 1, Rpp32f sobel3x3Y[9] = {-1, -2, -1, 0, 0, 0, 1, 2, 1}; -Rpp32f sobel5x5X[25] = {-5, -4, 0, 4, 5, - -8, -10, 0, 10, 8, - -10, -20, 0, 20, 10, - -8, -10, 0, 10, 8, - -5, -4, 0, 4, 5}; -Rpp32f sobel5x5Y[25] = {-5, -8, -10 -8, -5, - -4, -10, -20, -10, -4, - 0, 0, 0, 0, 0, - 4, 10, 20, 10, 4, - 5, 8, 10, 8, 5}; +Rpp32f sobel5x5X[25] = {-1, -2, 0, 2, 1, + -4, -8, 0, 8, 4, + -6, -12, 0, 12, 6, + -4, -8, 0, 8, 4, + -1, -2, 0, 2, 1}; +Rpp32f sobel5x5Y[25] = {-1, -4, -6, -4, -1, + -2, -8, -12, -8, -2, + 0, 0, 0, 0, 0, + 2, 8, 12, 8, 2, + 1, 4, 6, 4, 1}; +Rpp32f sobel7x7X[49] = {-1, -4, -5, 0, 5, 4, 1, + -6, -24, -30, 0, 30, 24, 6, + -15, -60, -75, 0, 75, 60, 15, + -20, -80, -100, 0, 100, 80, 20, + -15, -60, -75, 0, 75, 60, 15, + -6, -24, -30, 0, 30, 24, 6, + -1, -4, -5, 0, 5, 4, 1}; +Rpp32f sobel7x7Y[49] = {-1, -6, -15, -20, -15, -6, -1, + -4, -24, -60, -80, -60, -24, -4, + -5, -30, -75, -100, -75, -30, -5, + 0, 0, 0, 0, 0, 0, 0, + 5, 30, 75, 100, 75, 30, 5, + 4, 24, 60, 80, 60, 24, 4, + 1, 6, 15, 20, 15, 6, 1}; // load function for 3x3 kernel size inline void rpp_load_sobel_filter_3x3_host(__m256 *pRow, Rpp8u **srcPtrTemp, Rpp32s rowKernelLoopLimit) @@ -231,7 +245,7 @@ inline void rpp_load_sobel_filter_5x5_host(__m256 *pRow, Rpp8u **srcPtrTemp, Rpp inline void rpp_load_sobel_filter_5x5_host(__m256 *pRow, Rpp8s **srcPtrTemp, Rpp32s rowKernelLoopLimit) { - // irrespective of row location, we need to load 2 rows for 3x3 kernel + // irrespective of row location, we need to load 3 rows for 5x5 kernel rpp_load16_i8_to_f32_avx(srcPtrTemp[0], &pRow[0]); rpp_load16_i8_to_f32_avx(srcPtrTemp[1], &pRow[2]); rpp_load16_i8_to_f32_avx(srcPtrTemp[2], &pRow[4]); @@ -247,7 +261,7 @@ inline void rpp_load_sobel_filter_5x5_host(__m256 *pRow, Rpp8s **srcPtrTemp, Rpp inline void rpp_load_sobel_filter_5x5_host(__m256 *pRow, Rpp32f **srcPtrTemp, Rpp32s rowKernelLoopLimit) { - // irrespective of row location, we need to load 2 rows for 3x3 kernel + // irrespective of row location, we need to load 3 rows for 5x5 kernel rpp_load16_f32_to_f32_avx(srcPtrTemp[0], &pRow[0]); rpp_load16_f32_to_f32_avx(srcPtrTemp[1], &pRow[2]); rpp_load16_f32_to_f32_avx(srcPtrTemp[2], &pRow[4]); @@ -261,10 +275,9 @@ inline void rpp_load_sobel_filter_5x5_host(__m256 *pRow, Rpp32f **srcPtrTemp, Rp } } -// load function for 3x3 kernel size inline void rpp_load_sobel_filter_5x5_host(__m256 *pRow, Rpp16f **srcPtrTemp, Rpp32s rowKernelLoopLimit) { - // irrespective of row location, we need to load 2 rows for 3x3 kernel + // irrespective of row location, we need to load 3 rows for 5x5 kernel rpp_load16_f16_to_f32_avx(srcPtrTemp[0], &pRow[0]); rpp_load16_f16_to_f32_avx(srcPtrTemp[1], &pRow[2]); rpp_load16_f16_to_f32_avx(srcPtrTemp[2], &pRow[4]); @@ -278,6 +291,71 @@ inline void rpp_load_sobel_filter_5x5_host(__m256 *pRow, Rpp16f **srcPtrTemp, Rp } } +// load function for 7x7 kernel size +inline void rpp_load_sobel_filter_7x7_host(__m256 *pRow, Rpp8u **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 4 rows for 7x7 kernel + rpp_load16_u8_to_f32_avx(srcPtrTemp[0], &pRow[0]); + rpp_load16_u8_to_f32_avx(srcPtrTemp[1], &pRow[2]); + rpp_load16_u8_to_f32_avx(srcPtrTemp[2], &pRow[4]); + rpp_load16_u8_to_f32_avx(srcPtrTemp[3], &pRow[6]); + for (int k = 4; k < rowKernelLoopLimit; k++) + rpp_load16_u8_to_f32_avx(srcPtrTemp[k], &pRow[k * 2]); + for (int k = rowKernelLoopLimit; k < 7; k++) + { + pRow[k * 2] = avx_p0; + pRow[k * 2 + 1] = avx_p0; + } +} + +inline void rpp_load_sobel_filter_7x7_host(__m256 *pRow, Rpp8s **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 4 rows for 7x7 kernel + rpp_load16_i8_to_f32_avx(srcPtrTemp[0], &pRow[0]); + rpp_load16_i8_to_f32_avx(srcPtrTemp[1], &pRow[2]); + rpp_load16_i8_to_f32_avx(srcPtrTemp[2], &pRow[4]); + rpp_load16_i8_to_f32_avx(srcPtrTemp[3], &pRow[6]); + for (int k = 4; k < rowKernelLoopLimit; k++) + rpp_load16_i8_to_f32_avx(srcPtrTemp[k], &pRow[k * 2]); + for (int k = rowKernelLoopLimit; k < 7; k++) + { + pRow[k * 2] = avx_p0; + pRow[k * 2 + 1] = avx_p0; + } +} + +inline void rpp_load_sobel_filter_7x7_host(__m256 *pRow, Rpp32f **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 4 rows for 7x7 kernel + rpp_load16_f32_to_f32_avx(srcPtrTemp[0], &pRow[0]); + rpp_load16_f32_to_f32_avx(srcPtrTemp[1], &pRow[2]); + rpp_load16_f32_to_f32_avx(srcPtrTemp[2], &pRow[4]); + rpp_load16_f32_to_f32_avx(srcPtrTemp[3], &pRow[6]); + for (int k = 4; k < rowKernelLoopLimit; k++) + rpp_load16_f32_to_f32_avx(srcPtrTemp[k], &pRow[k * 2]); + for (int k = rowKernelLoopLimit; k < 7; k++) + { + pRow[k * 2] = avx_p0; + pRow[k * 2 + 1] = avx_p0; + } +} + +inline void rpp_load_sobel_filter_7x7_host(__m256 *pRow, Rpp16f **srcPtrTemp, Rpp32s rowKernelLoopLimit) +{ + // irrespective of row location, we need to load 3 rows for 5x5 kernel + rpp_load16_f16_to_f32_avx(srcPtrTemp[0], &pRow[0]); + rpp_load16_f16_to_f32_avx(srcPtrTemp[1], &pRow[2]); + rpp_load16_f16_to_f32_avx(srcPtrTemp[2], &pRow[4]); + rpp_load16_f16_to_f32_avx(srcPtrTemp[3], &pRow[6]); + for (int k = 4; k < rowKernelLoopLimit; k++) + rpp_load16_f16_to_f32_avx(srcPtrTemp[k], &pRow[k * 2]); + for (int k = rowKernelLoopLimit; k < 7; k++) + { + pRow[k * 2] = avx_p0; + pRow[k * 2 + 1] = avx_p0; + } +} + inline void rpp_sobel_store16(Rpp8u *dstPtrTemp, __m256 *pDst) { rpp_store16_f32_to_u8_avx(dstPtrTemp, pDst); @@ -298,6 +376,26 @@ inline void rpp_sobel_store16(Rpp16f *dstPtrTemp, __m256 *pDst) rpp_store16_f32_to_f16_avx(dstPtrTemp, pDst); } +inline void rpp_sobel_store8(Rpp8u *dstPtrTemp, __m256 *pDst) +{ + rpp_store8_f32_to_u8_avx(dstPtrTemp, pDst); +} + +inline void rpp_sobel_store8(Rpp8s *dstPtrTemp, __m256 *pDst) +{ + rpp_store8_f32_to_i8_avx(dstPtrTemp, pDst); +} + +inline void rpp_sobel_store8(Rpp32f *dstPtrTemp, __m256 *pDst) +{ + rpp_store8_f32_to_f32_avx(dstPtrTemp, pDst); +} + +inline void rpp_sobel_store8(Rpp16f *dstPtrTemp, __m256 *pDst) +{ + rpp_store8_f32_to_f16_avx(dstPtrTemp, pDst); +} + template RppStatus sobel_filter_host_tensor(T *srcPtr, RpptDescPtr srcDescPtr, @@ -692,6 +790,165 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, increment_row_ptrs(srcPtrTemp, kernelSize, 12); dstPtrTemp += 12; } +#endif + vectorLoopCount += padLength; + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + sobel_filter_unidirection_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + increment_row_ptrs(srcPtrTemp, kernelSize, 1); + dstPtrTemp++; + } + // for the first padLength rows, we need not increment the src row pointers to next rows + increment_row_ptrs(srcPtrRow, kernelSize, (!padLengthRows) ? srcDescPtr->strides.hStride : 0); + dstPtrRow += dstDescPtr->strides.hStride; + } + } + } + else if (kernelSize == 7) + { + T *srcPtrRow[7], *dstPtrRow; + for (int i = 0; i < 7; i++) + srcPtrRow[i] = srcPtrChannel + i * srcDescPtr->strides.hStride; + dstPtrRow = dstPtrChannel; + + if (combined) + { +#if __AVX2__ + __m256 pFilterX[49], pFilterY[49]; + filterX = sobel7x7X; + filterY = sobel7x7Y; + for (int i = 0; i < 49; i++) + { + pFilterX[i] = _mm256_set1_ps(filterX[i]); + pFilterY[i] = _mm256_set1_ps(filterY[i]); + } +#endif + /* exclude 2 * padLength number of columns from alignedLength calculation + since padLength number of columns from the beginning and end of each row will be computed using raw c code */ + Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 16) * 16; + for(int i = 0; i < roi.xywhROI.roiHeight; i++) + { + int vectorLoopCount = 0; + bool padLengthRows = (i < padLength) ? 1: 0; + T *srcPtrTemp[7] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2], srcPtrRow[3], srcPtrRow[4], srcPtrRow[5], srcPtrRow[6]}; + T *dstPtrTemp = dstPtrRow; + + // get the number of rows needs to be loaded for the corresponding row + Rpp32s rowKernelLoopLimit = kernelSize; + get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); + dstPtrTemp += padLength; +#if __AVX2__ + // process alignedLength number of columns in each row + for (; vectorLoopCount < alignedLength; vectorLoopCount += 8) + { + __m256 pRow[14], pDst, pDstX, pDstY; + rpp_load_sobel_filter_7x7_host(pRow, srcPtrTemp, rowKernelLoopLimit); + pDstX = avx_p0; + pDstY = avx_p0; + pDst = avx_p0; + for (int k = 0; k < 7; k++) + { + __m256 pTemp[7], pRowShift[6]; + Rpp32s filterIndex = k * 7; + Rpp32s rowIndex = k * 2; + + pRowShift[0] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1); + pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2); + pRowShift[2] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 7), avx_pxMaskRotate0To3); + pRowShift[3] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 15), avx_pxMaskRotate0To4); + pRowShift[4] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 31), avx_pxMaskRotate0To4); + pRowShift[5] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 63), avx_pxMaskRotate0To6); + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterX[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterX[filterIndex + 2]); + pTemp[3] = _mm256_mul_ps(pRowShift[2], pFilterX[filterIndex + 3]); + pTemp[4] = _mm256_mul_ps(pRowShift[3], pFilterX[filterIndex + 4]); + pTemp[5] = _mm256_mul_ps(pRowShift[4], pFilterX[filterIndex + 5]); + pTemp[6] = _mm256_mul_ps(pRowShift[5], pFilterX[filterIndex + 6]); + pDstX = _mm256_add_ps(pDstX, _mm256_add_ps(_mm256_add_ps(pTemp[0], _mm256_add_ps(pTemp[1], pTemp[2])), _mm256_add_ps(_mm256_add_ps(pTemp[3], pTemp[4]), _mm256_add_ps(pTemp[5], pTemp[6])))); + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterY[filterIndex]); + pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterY[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(pRowShift[1], pFilterY[filterIndex + 2]); + pTemp[3] = _mm256_mul_ps(pRowShift[2], pFilterY[filterIndex + 3]); + pTemp[4] = _mm256_mul_ps(pRowShift[3], pFilterY[filterIndex + 4]); + pTemp[5] = _mm256_mul_ps(pRowShift[4], pFilterY[filterIndex + 5]); + pTemp[6] = _mm256_mul_ps(pRowShift[5], pFilterY[filterIndex + 6]); + pDstY = _mm256_add_ps(pDstY, _mm256_add_ps(_mm256_add_ps(pTemp[0], _mm256_add_ps(pTemp[1], pTemp[2])), _mm256_add_ps(_mm256_add_ps(pTemp[3], pTemp[4]), _mm256_add_ps(pTemp[5], pTemp[6])))); + } + pDstX = _mm256_min_ps(_mm256_max_ps(pDstX, pMin), pMax); + pDstY = _mm256_min_ps(_mm256_max_ps(pDstY, pMin), pMax); + pDstX = _mm256_mul_ps(pDstX, pDstX); + pDstY = _mm256_mul_ps(pDstY, pDstY); + pDst = _mm256_sqrt_ps(_mm256_add_ps(pDstX, pDstY)); + + rpp_sobel_store8(dstPtrTemp, &pDst); + increment_row_ptrs(srcPtrTemp, kernelSize, 8); + dstPtrTemp += 8; + } +#endif + vectorLoopCount += padLength; + for (; vectorLoopCount < bufferLength; vectorLoopCount++) + { + sobel_filter_bidirection_generic_tensor(srcPtrTemp, dstPtrTemp, vectorLoopCount, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filterX, filterY); + increment_row_ptrs(srcPtrTemp, kernelSize, 1); + dstPtrTemp++; + } + // for the first padLength rows, we need not increment the src row pointers to next rows + increment_row_ptrs(srcPtrRow, kernelSize, (!padLengthRows) ? srcDescPtr->strides.hStride : 0); + dstPtrRow += dstDescPtr->strides.hStride; + } + } + else + { +#if __AVX2__ + __m256 pFilter[49]; + filter = (!sobelType) ? sobel7x7X : sobel7x7Y; + for (int i = 0; i < 49; i++) + pFilter[i] = _mm256_set1_ps(filter[i]); +#endif + /* exclude 2 * padLength number of columns from alignedLength calculation + since padLength number of columns from the beginning and end of each row will be computed using raw c code */ + Rpp32u alignedLength = ((bufferLength - (2 * padLength)) / 16) * 16; + for(int i = 0; i < roi.xywhROI.roiHeight; i++) + { + int vectorLoopCount = 0; + bool padLengthRows = (i < padLength) ? 1: 0; + T *srcPtrTemp[7] = {srcPtrRow[0], srcPtrRow[1], srcPtrRow[2], srcPtrRow[3], srcPtrRow[4], srcPtrRow[5], srcPtrRow[6]}; + T *dstPtrTemp = dstPtrRow; + + // get the number of rows needs to be loaded for the corresponding row + Rpp32s rowKernelLoopLimit = kernelSize; + get_kernel_loop_limit(i, rowKernelLoopLimit, padLength, unpaddedHeight); + process_left_border_columns_pln_pln(srcPtrTemp, dstPtrTemp, kernelSize, padLength, unpaddedWidth, rowKernelLoopLimit, filter); + dstPtrTemp += padLength; +#if __AVX2__ + // process alignedLength number of columns in each row + for (; vectorLoopCount < alignedLength; vectorLoopCount += 8) + { + __m256 pRow[14], pDst; + rpp_load_sobel_filter_7x7_host(pRow, srcPtrTemp, rowKernelLoopLimit); + pDst = avx_p0; + for (int k = 0; k < 7; k++) + { + __m256 pTemp[7]; + Rpp32s filterIndex = k * 7; + Rpp32s rowIndex = k * 2; + + pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilter[filterIndex]); + pTemp[1] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 1), avx_pxMaskRotate0To1), pFilter[filterIndex + 1]); + pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); + pTemp[3] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 7), avx_pxMaskRotate0To3), pFilter[filterIndex + 3]); + pTemp[4] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 15), avx_pxMaskRotate0To4), pFilter[filterIndex + 4]); + pTemp[5] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 31), avx_pxMaskRotate0To5), pFilter[filterIndex + 5]); + pTemp[6] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 63), avx_pxMaskRotate0To6), pFilter[filterIndex + 6]); + pDst = _mm256_add_ps(pDst, _mm256_add_ps(_mm256_add_ps(pTemp[0], _mm256_add_ps(pTemp[1], pTemp[2])), _mm256_add_ps(_mm256_add_ps(pTemp[3], pTemp[4]), _mm256_add_ps(pTemp[5], pTemp[6])))); + } + rpp_sobel_store8(dstPtrTemp, &pDst); + increment_row_ptrs(srcPtrTemp, kernelSize, 8); + dstPtrTemp += 8; + } #endif vectorLoopCount += padLength; for (; vectorLoopCount < bufferLength; vectorLoopCount++) From ac1ed742eba5261633c7ed408dcae3d3bbb25c66 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Tue, 30 Jul 2024 13:48:30 +0000 Subject: [PATCH 14/31] added sobel filter case number in runTests.py --- utilities/test_suite/HOST/runTests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index 7386b364b..b42dcc48c 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -261,7 +261,7 @@ def rpp_test_suite_parser_and_validator(): subprocess.call(["make", "-j16"], cwd=".") # nosec # List of cases supported -supportedCaseList = ['0', '1', '2', '4', '6', '8', '13', '20', '21', '23', '26', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92'] +supportedCaseList = ['0', '1', '2', '4', '6', '8', '13', '20', '21', '23', '26', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '45', '46', '50', '54', '61', '63', '65', '68', '70', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92'] print("\n\n\n\n\n") print("##########################################################################################") From eebd6250a5bb0b83caa672a57476adef2b816bac Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Wed, 31 Jul 2024 07:30:05 +0000 Subject: [PATCH 15/31] made changes in test suite to test all gradient types for all layout variants for kernel size 3 --- utilities/test_suite/HOST/Tensor_host.cpp | 24 ++++++--- utilities/test_suite/HOST/runTests.py | 7 +++ utilities/test_suite/rpp_test_suite_common.h | 55 +++++++++++++++++++- 3 files changed, 78 insertions(+), 8 deletions(-) diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp index f29ff1f49..b950ab301 100644 --- a/utilities/test_suite/HOST/Tensor_host.cpp +++ b/utilities/test_suite/HOST/Tensor_host.cpp @@ -64,14 +64,15 @@ int main(int argc, char **argv) int decoderType = atoi(argv[13]); int batchSize = atoi(argv[14]); - bool additionalParamCase = (testCase == 8 || testCase == 21 || testCase == 23 || testCase == 24 || testCase == 79); + bool additionalParamCase = (testCase == 8 || testCase == 21 || testCase == 23 || testCase == 24 || testCase == 50 || testCase == 79); bool dualInputCase = (testCase == 2 || testCase == 30 || testCase == 33 || testCase == 61 || testCase == 63 || testCase == 65 || testCase == 68); bool randomOutputCase = (testCase == 6 || testCase == 8 || testCase == 84); bool nonQACase = (testCase == 24); bool interpolationTypeCase = (testCase == 21 || testCase == 23 || testCase == 24 || testCase == 79); bool reductionTypeCase = (testCase == 87 || testCase == 88 || testCase == 89 || testCase == 90 || testCase == 91); bool noiseTypeCase = (testCase == 8); - bool pln1OutTypeCase = (testCase == 86); + bool pln1OutTypeCase = (testCase == 86 || testCase == 50); + bool kernelSizeAndGradientCase = (testCase == 50); unsigned int verbosity = atoi(argv[11]); unsigned int additionalParam = additionalParamCase ? atoi(argv[7]) : 1; @@ -198,6 +199,8 @@ int main(int argc, char **argv) RpptInterpolationType interpolationType = RpptInterpolationType::BILINEAR; std::string interpolationTypeName = ""; std::string noiseTypeName = ""; + std::string kernelSizeAndGradientName = ""; + Rpp32u kernelSize, GradientType; if (interpolationTypeCase) { @@ -211,6 +214,15 @@ int main(int argc, char **argv) func += "_noiseType"; func += noiseTypeName.c_str(); } + else if (kernelSizeAndGradientCase) + { + kernelSizeAndGradientName = get_kernel_size_and_gradient_type(additionalParam, kernelSize, GradientType); + func += kernelSizeAndGradientName; + std::cout << "kernelSizeAndGradientName: " << kernelSizeAndGradientName << std::endl; + std::cout << "func: " << func << std::endl; + std::cout << "kernelSize: " << kernelSize << std::endl; + std::cout << "GradientType: " << GradientType << std::endl; + } if(!qaFlag) { @@ -1086,13 +1098,13 @@ int main(int argc, char **argv) case 50: { testCaseName = "sobel_filter"; - Rpp32u kernelSize = 3; - Rpp32u sobelType = 0; + // GradientType = 0; + // kernelSize = 3; startWallTime = omp_get_wtime(); startCpuTime = clock(); if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) - rppt_sobel_filter_host(input, srcDescPtr, output, dstDescPtr, sobelType, kernelSize, roiTensorPtrSrc, roiTypeSrc, handle); + rppt_sobel_filter_host(input, srcDescPtr, output, dstDescPtr, GradientType, kernelSize, roiTensorPtrSrc, roiTypeSrc, handle); else missingFuncFlag = 1; @@ -1594,7 +1606,7 @@ int main(int argc, char **argv) 3.source and destination layout are the same 4.augmentation case does not generate random output*/ if(qaFlag && inputBitDepth == 0 && ((srcDescPtr->layout == dstDescPtr->layout) || pln1OutTypeCase) && !(randomOutputCase) && !(nonQACase)) - compare_output(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, testCase, dst, scriptPath); + compare_output(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, kernelSizeAndGradientName, testCase, dst, scriptPath); // Calculate exact dstROI in XYWH format for OpenCV dump if (roiTypeSrc == RpptRoiType::LTRB) diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index b42dcc48c..35a990267 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -85,6 +85,13 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) + elif case == "50": + kernelSizeAndGradientRange = 3 + for kernelSizeAndGradient in range(kernelSizeAndGradientRange): + print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSizeAndGradient) + " 0") + result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSizeAndGradient), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE) # nosec + stdout_data, stderr_data = result.communicate() + print(stdout_data.decode()) else: print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout) + " 0") result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE) # nosec diff --git a/utilities/test_suite/rpp_test_suite_common.h b/utilities/test_suite/rpp_test_suite_common.h index 57263dfbe..dc61459ea 100644 --- a/utilities/test_suite/rpp_test_suite_common.h +++ b/utilities/test_suite/rpp_test_suite_common.h @@ -242,6 +242,34 @@ inline std::string get_noise_type(unsigned int val) } } +// returns the gradient type applied to an image +inline std::string get_gradient_type(unsigned int val) +{ + switch(val) + { + case 0: return "X"; + case 1: return "Y"; + case 2: return "XY"; + default:return "X"; + } +} + +// returns the interpolation type used for image resizing or scaling operations. +inline std::string get_kernel_size_and_gradient_type(unsigned int val, Rpp32u &kernelSize, Rpp32u &GradientType) +{ + unsigned int x = val / 3; + GradientType = val % 3; + switch(x) + { + case 0: kernelSize = 3; + case 1: kernelSize = 5; + case 2: kernelSize = 7; + case 3: kernelSize = 9; + default: kernelSize = 3; + } + return ("_kernelSize" + std::to_string(kernelSize) + "_Gradient" + get_gradient_type(GradientType)); +} + // returns number of input channels according to layout type inline int set_input_channels(int layoutType) { @@ -1095,7 +1123,7 @@ void compare_outputs_pln3(Rpp8u* output, Rpp8u* refOutput, RpptDescPtr dstDescPt } template -inline void compare_output(T* output, string funcName, RpptDescPtr srcDescPtr, RpptDescPtr dstDescPtr, RpptImagePatch *dstImgSizes, int noOfImages, string interpolationTypeName, string noiseTypeName, int testCase, string dst, string scriptPath) +inline void compare_output(T* output, string funcName, RpptDescPtr srcDescPtr, RpptDescPtr dstDescPtr, RpptImagePatch *dstImgSizes, int noOfImages, string interpolationTypeName, string noiseTypeName, string kernelSizeAndGradientName, int testCase, string dst, string scriptPath, int additionalParam = 0) { string func = funcName; string refFile = ""; @@ -1111,7 +1139,7 @@ inline void compare_output(T* output, string funcName, RpptDescPtr srcDescPtr, R refOutputHeight = GOLDEN_OUTPUT_MAX_HEIGHT; } int refOutputSize = refOutputHeight * refOutputWidth * dstDescPtr->c; - Rpp64u binOutputSize = refOutputHeight * refOutputWidth * dstDescPtr->n * 4; + Rpp64u binOutputSize = refOutputHeight * refOutputWidth * dstDescPtr->n * 6; int pln1RefStride = dstDescPtr->strides.nStride * dstDescPtr->n * 3; string dataType[4] = {"_u8_", "_f16_", "_f32_", "_i8_"}; @@ -1142,6 +1170,21 @@ inline void compare_output(T* output, string funcName, RpptDescPtr srcDescPtr, R func += "Tensor_PLN3"; pln1RefStride = 0; } + else if(testCase == 50) + { + if(srcDescPtr->layout == RpptLayout::NHWC) + { + pln1RefStride = 0; + func += "Tensor_PKD3"; + } + else if (srcDescPtr->c == 3 && srcDescPtr->layout == RpptLayout::NCHW) + { + pln1RefStride = 0; + func += "Tensor_PLN3"; + } + else if (srcDescPtr->c == 1 && srcDescPtr->layout == RpptLayout::NCHW) + func += "Tensor_PLN1"; + } else func += "Tensor_PLN1"; } @@ -1156,6 +1199,14 @@ inline void compare_output(T* output, string funcName, RpptDescPtr srcDescPtr, R func += "_noiseType" + noiseTypeName; binFile += "_noiseType" + noiseTypeName; } + else if(testCase == 50) + { + func += kernelSizeAndGradientName; + Rpp32u kernelSize, GradientType; + get_kernel_size_and_gradient_type(additionalParam, kernelSize, GradientType); + binFile += "_kernelSize" + std::to_string(kernelSize); + pln1RefStride += GradientType * dstDescPtr->strides.nStride * dstDescPtr->n; + } refFile = scriptPath + "/../REFERENCE_OUTPUT/" + funcName + "/"+ binFile + ".bin"; int fileMatch = 0; From 256622d477398a30e6d6727d387f42d6f8b45631 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Wed, 31 Jul 2024 07:46:29 +0000 Subject: [PATCH 16/31] added golden output for 3x3 kernel size --- utilities/test_suite/HOST/Tensor_host.cpp | 8 +------- .../sobel_filter_u8_Tensor_kernelSize3.bin | Bin 0 -> 410400 bytes utilities/test_suite/rpp_test_suite_common.h | 2 +- 3 files changed, 2 insertions(+), 8 deletions(-) create mode 100644 utilities/test_suite/REFERENCE_OUTPUT/sobel_filter/sobel_filter_u8_Tensor_kernelSize3.bin diff --git a/utilities/test_suite/HOST/Tensor_host.cpp b/utilities/test_suite/HOST/Tensor_host.cpp index b950ab301..14ff3a38d 100644 --- a/utilities/test_suite/HOST/Tensor_host.cpp +++ b/utilities/test_suite/HOST/Tensor_host.cpp @@ -218,10 +218,6 @@ int main(int argc, char **argv) { kernelSizeAndGradientName = get_kernel_size_and_gradient_type(additionalParam, kernelSize, GradientType); func += kernelSizeAndGradientName; - std::cout << "kernelSizeAndGradientName: " << kernelSizeAndGradientName << std::endl; - std::cout << "func: " << func << std::endl; - std::cout << "kernelSize: " << kernelSize << std::endl; - std::cout << "GradientType: " << GradientType << std::endl; } if(!qaFlag) @@ -1098,8 +1094,6 @@ int main(int argc, char **argv) case 50: { testCaseName = "sobel_filter"; - // GradientType = 0; - // kernelSize = 3; startWallTime = omp_get_wtime(); startCpuTime = clock(); @@ -1606,7 +1600,7 @@ int main(int argc, char **argv) 3.source and destination layout are the same 4.augmentation case does not generate random output*/ if(qaFlag && inputBitDepth == 0 && ((srcDescPtr->layout == dstDescPtr->layout) || pln1OutTypeCase) && !(randomOutputCase) && !(nonQACase)) - compare_output(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, kernelSizeAndGradientName, testCase, dst, scriptPath); + compare_output(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, kernelSizeAndGradientName, testCase, dst, scriptPath, additionalParam); // Calculate exact dstROI in XYWH format for OpenCV dump if (roiTypeSrc == RpptRoiType::LTRB) diff --git a/utilities/test_suite/REFERENCE_OUTPUT/sobel_filter/sobel_filter_u8_Tensor_kernelSize3.bin b/utilities/test_suite/REFERENCE_OUTPUT/sobel_filter/sobel_filter_u8_Tensor_kernelSize3.bin new file mode 100644 index 0000000000000000000000000000000000000000..7f8d116522b5fd2f0fbb7f6a2f42ad74d7e6d57f GIT binary patch literal 410400 zcmeFa2Yg)Bl?QyL_okWAsB6@Fmu)qxS+?aO%f0uGu}v|?#(*itbPT5VgiaEAuOW~? z0wJUjQb{3%WU~pIw#n|2WZ(Bc=e{>H5+)ldYwh>_zBxbXy<5*c_q2Q8dw1@opqHFc zF995U0Z4=c)x#aNlj6S4X>Bt($^`-vn23wVN#4Q;w-oTJ_RL1JUE61LO{5xO+wv-5}wv&fRjpP)clYfYv#T&B}y z_S3Q((Q<&M5H(LOh&J^4D67~tLBymqlQ-B`U65N^3|f3dSKLXLe4Xgp94?X9SWwbj z<+o(m5%EbYkG0lj?_Ety8qtKzEFdS6+gRRya9WIEoYjKtrw zZp&bON#oIH|FoN~Ze&d=Z;Kb{WfFz}^MIyJ3@8iB?cK6${k<=L>xm=Hwszr(FTbk6 z2C0sOP{YXh@ry+j>Vlz}ztXg=3*wQWiM^DSEY*`TD=Q41+4N%d$B&YcnEgsh<{5Wt z+LJ^csCK5XDOguuP+HZbN|)#wUr;)-_VJ7PbT@yqroHM!Gf?q1q7YGce>9vKnpP)G zS2W7aoHJn-=?{ShqAYMLTiPb7Avgs`)e;A8OU^J-CDS%x15q%X&jaESP1C8u&F`Vg z2Aa{-?4f1XO=%4#b0#k2cE(vY@2Ap*cx^sqUUztqT25X%WYXQSPemBn#;WfWah zO0_>fX9bxSA2=&Eab7F6>t(qx$t#j{4HJl>0xG}u|7@ z=3Tz7_1|_NmMKY&xL#@kZgAyGHSMs`(ppTsUYI8_r>4F8Z7Tck)8N>2MpYTbCgAP_ zS??t@X#V2!PMFuOOz`jrtacx+>TPb<>8P9ldWq^rVx1Ou#w!rkzk=m|3ybp*PsV8o!jgNOX1e-f?VC#SIesynh4@6mI!&9dTmMn*2DB_9J*;WhA$Gf?4l;`!J7}m;4@NcrJV5bQ`Bb=g1`z z=h*Rvi~{x%jsTt<3zzmuYmO-={Q4k5S8C_un)`+Gr%19)46`IS0T+9Q1>DHWCrf`x zRa_bjl=?12;M3X(%*Wv28)Z0fmh+r!!uSR0xXE3UMN@a`Pu>MiGA#fJSM0_GY`pXf z!+duaVM1mp;^0I8i0K`!!@H6VIjXt)p2ASHxC)VSNr?aSk!A^GfG&qhQzeh@)jsXo z197}RmEjsq-KwoNRnFoz2l+xRzkjTO%osG?4m@~I7{ADS!r3F0=IVFWP8PS$R_LRF z4}J{-G0%2dYr@tEd(TAFQ;hauj|n)mmWS?ZFXDvuL-+jW4JZ&t*k5AHQKrc`#~IAW zc>T@e4yMjLd6;ze^B>yD5iNv$;FZ*LL)yogDvXb*E$9y)E`0N_Xa^e$IdAGu%gH(1 z+X#xX%rdD+t#Mq#^6Z$RWJjP(+0Qq(_QyEq#tW!?TU!)kl`+RYuEeAuPcgFToctA{ z-9cJ)(n%$(cE$H}FPIacTX3jXBph?W*f7>qbu8gnE9Gye_ziISpzp+&iEi3YgcnZ}1hEWq8cpTN zqYqtrrzBXYAeJ}n`om<0$AEU-`##Y+d|H?o;Gx+ACm=J@Yo*QG4#^E#cWRivf=TQS zFfylh>4QWzHD*fw@xR~;8UWfQ&l=PpA@$zgdIix@P!omLZP-Nlu*Vh0pPFymdreCEKx9}fPb3Ikbx)_tXwmx#7y22vaEXk zORYrPp52=GL3MR+tbT^<4K{BdcBTN7%3cDzIZJSOA(vCgd?v{q*To($$FH)hW>p$= zW%G};t4`lHy8-(@R8#o#+Ar{fc7$tP-N#xOc!rn zfi4<2B{sH;yQCc3(Ns@QY!Q-&C{^!1WidFCQDL5=X`e>I+TXpDz}vo%Om)Jq#pm)b z_sQ488;=08Ou^IkGLz&d;S^BhN!C~|j@&rdQ9Q}mTEbeua;#W?aCIEwY|q@M$$mAo z>q5cI%qVHWJ~46T#iU_jRPk)RR|OW|nP+i1uW*VtM;tGhy#Z`O5fLsi4hO3nm2Epb zKgUcpM{aiiMZ01;napbIK-q@N7e%oMw#}eWE(@I+hfDZIlyCK)cbEfKDgIBW7b5}K z6(WS3*&@f2_RlITsVU@5u0A?<28CIVp#BQb5trNMPDrU~D6TRh=IBL763MOr)Be8# zR);H#_ad&@cFLmqJ71;mg07XafesQiF+Qjc#ACNP{5lJX>Sn1e53mE(;|8-^sVOE~-r7CBg!VMe)#&^xT}R^+AJd+tpVlPn_T#(MGAi<2-}GfIm?6ataiYX+~> zG_9SQdx_3y=S?60MIc9x^xe7ch|p18Oy1t0H*GG}fK*Cbz}5|={Rk37$IqXr^J!ZamstQS+U z?CWKcrK>A{EGv>YHC7mizdN zOtXUQsw7X1w6WX^L_JItEQZH%)jk8ddZqpgVuDB64s~#{Xevvf_iDdz#DYKTBez%% zBH53;kl0ksoHs=`biMMJZf1)mQ&a+jOUP#--+-otg&yq4r?D^X)RKPN;RRt42s5jq z5Y4c`v}?0)(c6qY{F3f7pn#EP9r2h5(hKLrLsl}o6Sfn(Uwc=%Dq5@k@Chz;Zz7ZI zezgsdqv`6;Am`;(xPYCGm0S)Gdf-b;uL6$@?nZ2E_pII-l~9XCY9fWY1)}s?BZ8mN z?$dYRny;Ir*~$C@IL{WAYF|PaB)5x#UkblK+K${81VvG84B)!L_YdB0$%@ zS%hnWF`eV2@j&*_wv8Rwrt|srMr~gQbeQStKVw;pRzBmW_QOk(-Onj&$g_~Y;c@IP zjVeFUzPA*&S+oZu7Q-RY`?^vU8}~IbxM@p2Hs!m2nSsKS2XBa7L<8Edpv+-yHmLk# z!VlRecdS|mw{q~ItF%ABESFB5QR=a*%%c1z?B4_&3twhAR>m1!c>I3V-@9h>bnG&B z>taU4uzP=BkWSKk^t^TsqT1(({(RVM(mg@*sV9Bzo7I@2Ziv1CrLs(}S`ela?p-3o z(pwuoqUisj(qY3{8}G8zeE0B`$}jeTh5_SI10WvtWMpSREx%&=%#P&krck{`BfZOGkfZ$%?DSqS&s)|J@ZzfZpH0; zLkoVmpMAhbf1I!Ei#`X5tXCGWu(k-^02^Lw+7eImz986 zpA^fPT#M{eRxM}GfYRCnPz4FnP-0i{xlg?RnKhTI)UjSOPr7tX11e>{^60ipHp#}E z?L2$Xp1B{_3*aWT&(oxB2-_bz7gw5erX_#^0hZIGl`EHV7CQ@SJrx0V&YJcDr#Y4$ z`GIz{=QIwF9h$GYZzm(Gd-MEFpOQ9cBc8Ua7yJnvv-~&qCew}*-GCA`*Pkuu^Bgkf z1q$3T3uVv29$`e!YFdzBk6lc~-$e5HTYmoh1rINY&yh-FzEoWH(18}zI(JvwDW}G> zAyF3L+n3T@%}-fxA^Q7IV;jmUxXt;nLV*}x8xJha1LZr}dC|XV+9bZOxE7b8H~)g` z-LHTDUv()x<~7#&#WZRrXaTEgD&|_fJ}-#G)L5# z%TzdV+5FX$ab5UwJ*PaXY0a|Z8oLNr^9C1oZ+&)Ih|1b+AbwmhW$Thw3~IO19pDq- z#&6t~Ny{VD+=)oZAUL+{lsukXopCzUDhY?)o(+2cRb%A@kN=ZrLu?E1W3J55lsB|5 zPA6aQz_?E$$*ij`#BQcj==V(5dD^2Zz>RVrO@kJ-QKL4EPClKbOQ&=*bqZz=(538L zm-3l1258z7DrgU9S8V-;_82+mwt9R<_89L@PXrI2rTZ^>>uCoj5byS}3xhh%g_{MX z*duv*g`n)p#QfMyW+w8qDYW~G!_f10HzW4A28T!V@7jBO1Nd``TCu}<15FO$FG({V7wpJ4)pP%gFvfZ6Lb1?w5LYhp#g&c-0#w5b0k1T4Ow{WR)rCizQ*2R72N8r=m-p$ z-^TglOP|vIf*6E99&owbv0F6_o*rPufB4?VTEi@|&KBOyHyq>)Ag_zD;`M)OdLx&@ zj_ zx}jMr4RXQUGt?v2xc<5dH9B|gUX^1jgKYC?E^AlGZE&D?N;jaUfA2T6SHRjyM#|Xk z?vg)gKfgBDrD|1tSv!G>5jMMM)(%u!d3&SZ(pz-XX_QQ`pyMf|Tf6N!0 z4Rh&=My{3%`DxlYZ~;^+gW5nmQx`8Da zXdyE{W9@l)wDMP9++4FzrH}QxNz*pYlu9Rjc2b%43Yfc-&pPOn+oQ@CS(`vGiRu_tSq4Me1=F6nm z*e{h!_G%iCXg}%NU^h>q2rEp$OpX?XD{OKOo`-SslncMIi#pe{vDMx+H=3;;)#!TB zNG=)xX&KR2RzY!oyd+<|-P!ACMbgzOeE;axasoI77j)G;>Tc+mDgCB~LNc~U4=k#x zcn7WA``nIJ@-ClNg40>>uE%>Xj>@p$R- z;&u2kXfyOVf6mFRf{;2EbXMr{419DxJ&)YYU*d7#?6;77u?GL~5z!B|ooKwLhAZInRPMNQ^1r{f>ic~3Xl1Uj~XxgWq2H#(zLuloFcU!IDeY>R%o=QImK(A(*@km zaO10(o(qY7jQ9I^R{9Yxp84TJ$g1aHu|qfHncES?>~TC`!a;^Yf%V8+{h`)UH7bYC zxL{p%DdQW$IHefU)`{p#tWGm@CwiIo2gKfHQZM{4&gTnEMo?hcNw~Evz(sXbrH^+DN6yPb}6zaN|Gf|XpBxfhl%N#lDeVkS#iE@c) zzfVTS6`HJR^SGktP3=#m)L-pWIPT{n8^tRNDL;?5CK?s;|Lsn zngZzNBjK@L%nsKtG;I;qC6H-jD}TqpP%VI?@Sf&C|6E+4!i$?Bum`kU`Z}jZ`aar> z+q~bta&H{sy`mz(semEvEjp7qsvk=wp|M^}WVL(G`j4Oe=n?ken-+J5sI*Q;Vg(;D zi%!54I1BHlA3rb*+VQvX&qK(Xeel&c=Ya61FPHfdM%3m%Xotc6e-2z;n=M*0_`LQl zs0vli-Z6RZqq^3+a~glEttr(l%{H@6rV^R!FJk>i>6Q1*Uxjh0^MNM@PQu%a$Mi9E z!c(#gFJ-bE%_O2G5 zwPH5b+Bf6Le4OHR*2ztnb2QzF)D6XW$2dqUHgHj}y^QbU08?IRUJx%SN>3Jtu?Tk9 zeRR7Zve@(TydGcPV6Q~OC;WA)1WKTyz`|XlO3YET}81s=D$GRDbm@TuN35qAHBhHcX~0<{;XzmR|M^^~5tzJDKz2 zP4K~k|8^_U&-UZB%4rdO1`@x66MHI(4IxfiT?ySzwF3yPNUdZp(zADzeWRe}m z$M4_FHb@#17_&8}bM0JOdppuLXsfxY>Vgc=`TeW$R07xhwI57k*Mg~Tx0$9i7P5cH z2~Z`x+PI)>W_L|d$b@EY)&_|l`Z;5%?-}S#VuNdU+5_ctdNAAh8j?G{ix4JUbzGC>wf&XfRLHxzo^tTAbKW#?nrQwezK#RR2d7q-uugQH{|LPKLwDd|taL_}Da*jc z9D^8h3j3FO>0G!}cnTD(z#KM%TvJ!`5D*(aPtyfvr%Amiy>4(ddtN#fG8h*W^>CAwi}${PLBWwBLP)=$D`fQ17ICl+QeGGa_N` z(m26Y9L=hYL3L+={oNX)>&UxXTWFwjkSk)Q%xq_QbweKyNs~7A;TrG58+*XiRhWhy zh0*<%W5~5p9<%vOjO5Ku{nL!(jP(+71&YyxXGRAG|D#Z6GMRS%^+O$v86>-6*?w1G z`4IV9Id$RXqW14z3UPU|yFGKipB?YJ@+PWfgRF`cO3m)eps{8cYQlabc?=hm*&f6Y z>~CugkaH?rxP|P6i`5!Apn)j76>zXsXD!A|mGKrIUCOsRZ^X!GkF{r>dK*ri!g;~B zz>{GG1M?K!ytn|fCKzV3M8|V6nYfm(q#9aQ+L{~Abo!|1JJ;)|te8GmL?}9R_7w5< z{(yT>Jir`ieGBEcS*v@@^qe-;`PrkW(xjh!Xg+bPAZS$n8;*}wHduJQpa&Ov12!3)rP-g zejO~%MNNC!8Bg=N;o9u1(z>XQG!!e(pu*RA5=piIoG8^T`h|93-i9+%>9SC8a!>aR zqmo{CJU6?`0bL<;MsshOpG;Bm4$WT#=N_3}=J7>yTA2cd@<5i))qgeUDs@YOZR+LP z`yRUOB}Eq_!ANguTs5n>BvZVtq>T6C@C8_sc277v=80Ct17vEsc_&hM_7x~y)T5~I z^_Q4}J?m@0-9gn<^UlkDcO@RYUsQDQpavZ*=o;?KjK-mU{~TBxo)CA{ZX%1*GqpDi zVOBhF22va_09)_2UbrJb^+^R(uz$WOpTDh3(z+8*d2*OczuBYcQj5vz+MDaAln1OB zl>RxeSgg!*Gx%V(+NU%{he{}Z=QbqSV0-?hEs9-*%Qs-OIb28`Y_6CxQpw`E=bsUx z;-@<8^gY)51#IBubJ`0Vt3?SB`{#fssV4k8thV~f_<|C60P*lx4>x_Q@(Zr%Gf<#z zf!8y268uV#hon%?=jOtr?i(7SyP2+&OK}Oz-Zq)C$M5Y^d%{bVEaCLU9`o*okQlc1@D)qF3!A6ltx|bEbFt%_7^RyIe|aii03f;#BQ5!wA6P^FVKvJ@ ze2k!kBb7|z#Cch&$y9v8keOVHC}i3ufB6I{6MR+%hj31$rner$a@+1W(ieSn9}Mtf z4HnCE?XKY$?5*z%P++L_3aK~dONP{7J0!KRORXD^%pT15*)j640$IpBKYV4Kqr>2w;t>J&-F*v@r{< zSy|?ABg@xvpp`Q7Yt`Js2}(u}}RI3iWcf8=T9)dVukQ&0_pQBEc?K zahcbzKQovuJo1`ztA^XSg{-;lE~kI;w6HV>t2-TZ{FSHRHtkWoi>=|zvIFoH z%BnazX0Tes@>A=D+os0s4r6Vzu#J|z)x$+*t;qqCTG-`QOO-BdnD3FssROXh##4+a}<8Uek;3wnzC|wSFo^rg4ZI*RqRn zhc{8>ziYG4 zf>DTJ(&BbsHhFI+%ejcHi&A9{wd4|39nFNa{{%o)3;>t1Hl071v{cT}@If9h zeD{35ov;_rC<-cO^i@|7J%yKtNLZH1r?&QmW2Ykyj?Q?)C^wF~DTOGh?i{%I6ij1M zLDRTdDt+jF=Ip{%VKxVVKX3A^NRF58U5Vc+U~b#Fb_XXB|@)(36 zsstn7AK1NdV}^wPPyZ2L_7E3$BAu;pO#A3^=Q!bKTk*oxMJ?yv3>_OMunb2K zhJ^^HtLH%b;L5OPsf;D3HtFbV<`qVwb<-g36^lKuXc)HfJq!QMH^&@uMd$ua@j_ToN=HO6D?KuhQ;;+;ZC&0G1Pf9MjA%4b8i7P7u%I+0 z-G~UwIfMUYk5(f`3R^76Xo5ns{zHRQbj-{IezRr7S_(bgsGr0r2uwy)7I-Zs&+U`)hP14(px$mkju zLXxD)MQ4>>OfnTORL@hop5PbK#DSsCNC_qir}Q(0DlVU>U^J*>)r%$aS! zUe!p94|+Y6PlPy-T7%z(62(z10zE$Bw9y z$TKB2Fc$_2ZmX=vR0)G%GowJvs3R&ONk)os=Xs4Hmucck%pXos%A)44 z1|qZBpexBQT^~`ypevcD_{Egq9~arn`py*;53PNsK(Z$s0?9?lP zF4c(?Bs!Bg#r%mUZ|I@|MwVf6NYNdpnkJ<5hx?%WDOO9S#H>qS4LQ0_>Q%TGDI`fy zFTr7}?c*I+#SJv7%dje;j`Kw5O21R&50>gNw5*(f?g|u<%EDMR1V0xNWD-+J$mKUC?8qw0d)VlLn2RF&A_-(#baD zVkipJrD#+*jC@ucy{2TSLMoS~A#Tqoj8+tSMVjg984>I<~tZ|;OAbal5M2xD7kg1Y4pQD zBbbE>PSGu1Ohfmc$$kndj)M3U;TRsuz=(T_WKe#@#75DBmQfXR*{y8MbxI22&7~N=8M*jV$A?*BGHooW@;C3{99hqzb{p89F@) z7BOyiw^CWOGwBpKO_z%VIhK%xk&=2JO&#Z{POv}>cMTQgSVAAfM?j?*HRwu^uO4)e zXyHkKSyybv$uNqLB%6^pnhryn21AjrC-KclMGMe$!(SpKLQwX?qhMkP6Vjn&@+fQ> zMVD?mLL*;NY0(=UotH>v{T6CQ=p^{XHZ6$wg5tP}ZGl9#L4^R7n6L0mT*_)wj=*Rv z*)Jm{c~8=q3>htQDZ?}9_GeT~={n07b#WC2BFdN)HZ`NS>K@a|zadWn-G`*494p8@ z96_7TCo}@k&E2R7xfo?7N0}Wrq(>Dl@Cl^x?3ycr$qJdmGSW?=f`L5cjx?^Tk83)C zO09K?jI0F}GANZcl?iZAq6UG8nCvz~p{<59Qw@*A_%Sn(LOSh<8IlmA%vu^s>4F5| z07lWJx21HMJt*>}Dy$QK97F<3`;%Y$>COa?l`^&hu98 zC8}Gj@Lnm!6&}MFjpkCZb6O%@s6c7MP3go)nZ$)_Go`4Pw5=4aMDyG+Pzv(EJ`DEQ z!$hU1dQuq|FqE+-T(yzTccDRY5wXjUTX}w>9+guIayZdry@Jt<&Q~RDw8iD5Q4Zr* zjwX*AwV9`i5FIc3I1VPer)S{C1rtt0rwds1xp1lt0r#=VgTAa`7!4AiWOPUn@dvq}`K%20HIl~~*V3W4FKpfXL zRB8krIvL5d(R49eftx8}tl*kaR`t_(v0dQB7J- zUQ!Vmxy%-wKt&PLhHitdBq){|bSCqRid>Q>z1>nO-4I&+0KhpwdkwO=v zk^d1q(7B1i3B-CcKqqcT_pqETilJ~ON^>$eJ2Earnc=Z`_=M}VI_<1xu5egUkWxG` zcTLDkN2PZ=Nf)}OtANm9$c>9LANhYYj7(FqL-%Yp=)1}0a5&vAZr`2}ij+}0q$gq^(Jf V2CC;7Z(q9ZDsHSshM00%p*KKc3u}g5iX9>og=@MyQ54 z6+bghMhwKdJbCC>Js3?F3LYa#s&hL2^r;rK%Yhv^$n0=B9A0d+3e;})`KfTMUV`+b z6{^B`*wq7&qssiap0f(4K(cQtT`7SX!?f83cV)w|rWTvWW5-SaCOKg*rS2yMM+?zn z(SNJW>Y-ToasNJwUr8brU9e(Scb-!4%+zj&6SKLTyhDM-!D1UF%45=&>?~)St-R-{ z0(eo8*D?5I6wwPNqN*qy754@IAj<(|gG050AQ&ectDQ`VCK46@r+^tYZIt2pr9Ra( zlK!W=_+56O8~|O|KFet}LCZiOgbK3A>a`v@Q2%l|JTOXxXPKFI{nW}@3oBjxekfHWWS%PxZ z|0Eib&c#gd23&(Xh38kO6<Si+pAgFb>9QRxsjt*i!$ z#Ei08nKV13DCVgOGm#9UyQ8|Na@3Ux(6iYO>WMNyj--nV3E5l(?W>aNDpM(@3;*0J zm&44?%jSX4vbmtKm|^JC2WbIAr=T*1m{eRUETf_!K-Mh9DVY_FXj+OVQ$${hOkAZg zegFqijKl3?j1#iqQ~)!eD5E3&VgU-y@hfe!3Lw;=L1AW-u<% zV6q7bspkw`*G5|0h`}LI%_3W;N;lz3p$+nqn)wAm;U6?X*<|LWn2IsZ@DqfohT;Gi zlE5gW+2lQmp2jJ99fiZN1lcM=MY+RI@v64yFgr3#?)q4ej7D%BaoL zM-GGCNxFEhWhFjKC>3-@ChHoR$7WZOET%`SfEza19e8sIy@zAPfGu<-{IKe+VvF)# zsoG2GkeB2X)gSk-X)6~)WTJPB}yzX8w*b+VApo19itHLP<%#UAZC0_ z7|dWa=ufN$;t|4Lkg&uJfSFgg7)8GYT8D4qDIeC;*Hl zQPJjPgH1$LK;Da}F?v*vLA4GlKX;Lfe+MWrRa9J5JwafWpm_Y^!Thz*v(FF0A2BC`cT#4Oc~0b6v7 zC8NZ{ZqBHlg%<-OSz*v1T`*K~bP=g;aRa3@sJl5HPey>=6^jG&Es$cD$89MAx?E0q zdW5Mmr_@*qc_!$(>gsmh6s>#IR*FaxiK}w9kNXl7uzOhre0h-(yi7!lnUKY97r)M% zm#UXEnA~6*jTye%4g&=a;6gPRV4`oCJy|#&648Tz02eGy1kgtsebS@z-`u#S;c01*{kk*s2tdE_TOu~ zG&vlo!ik0)fXL za1GoFWCVg48Gg)%`N0EMj7^|Lwrn*`z8!Rdf!Kd9cnS#ncF(AxI@tpH{IQ13;Utq{ zbUHA!F z2DgYI!{C#`W6H`2@g~Fq21n&9j?#YzN4fBeLEH!f^7j=NL?b82nig(Sf9}W93x^P*%$C#<2lY`?h`DL$e34 zx!hK(pS=m(IuF|xo7>?GMPm`KKZ6PBx*69%O-mNSWlT;spQ~thkwt&G8C>&6P`0yL zHlO4INpQ!qvnorNU(FT9t7EILa%wH_JA4K<;y!d<=pxydGFeALP$qvUtE#Ojv6(Dp zgf5#c5YEUAtD*V2@1taV%Yl<-6UoN7)uk{&Xnh8RBqB1m-wVPXJYTd;qW(2 z_q&->!MnPrhR70iQEs6nw^??$U-wp=e*YUPK5rlv7&*?Nd^kHNK7aS>IztE!epg~8 zcPNyp_y52-RMBLsT5#EYME%@qYx!Di%PtmSmj>XBHvC5WNc+AdeT@2o9QTur~!PDOCfU3@ZL0xTI*PRCB>&6h4kQm{N%%(uTa4ErAe)z^FwMS?j? z6qqWy+N#-4xx1Yu*q&3Mtc+%B z+8l&W|F0AEG~6<>3udiyQQq>c+}=Q6f#}$>*)^ljU2yDvZ6;UKc@b`?Zb7f3A#%2g zMC_n0i&bwq2>WR@7j4D={@XGgxitn6NVw`!?ArXd7X%j<%E)rto>#lV;-ub$*%VKy zZ8A~AviZ$iXvs`rWkGvGU?jhwDS5Xqk0gmTm6o(XL}d`wG!eEtKUCf)sx|N}jgM{5 zlp}G@*uxr6nfBgII~8?I(bIA+5UU@czHk$l$C*~zb4!ZI8D|r3818nEbJgMwMBL%p zVwTC=L0ro5pMV(a_<=dm;l>2Rva-u-)zDV!;BsbfcE>WQ;5A)~ZEK^uC`Db`+V=lU zdrdkpX+26^#~P)iP1HA}{Yq+{b9Bea)23gHox>Z?+7O2l)b=1u!(m$@ z1H*b&0Y;I`hE`1Q9i=MC-Pt{xJtc#yVc)-K+CCN0ui5)FZ7YWz>+5jIrR9f={3vCa zIP3!l0|l#U<5T8dza2hEm)Vo6J%i8?Gx@-97V>~cCaMJqb85X6bZ0S&YEbE)*$rc< zP-(%fZVHRfeaJxRB zm#(VWLl&A;M{3%=ICX?f)#y7bs3JbI z;^H;qv~@C9#?Xnpd@#w!i!)lu&-|K{Pgxj4K@eC0rK*h!d)&*WAd{$QK3gCfhX+rA z-W&IleXb+`yFcaxSyt$q7oEeSZ)ukxr*iXMqD2%xEJRSDXmn)Wb$cp&%vm%9 z)MSad$p@M&F*8NM9Sd(7@3A_Ro`FqKFDHXN#r-Ij3l8Bd0BY{WR^dZ&XcH2Gb*y~Y zqn|8QI3kELJGPRb~le<~M&^MB=Txo>`W)wUt10K8vl)hvDHyaaV!8TUNhXq7U!z}#}cjKvaP zl<8W_fxWlk632fv#Ao$6Z2`y)?CZxFa~`x`=&(i>ZAJcqMqGq0YDe#!1%t($KxSy0 zqs9b7s9JoEq|m7!NgrtITSvC^F6>Z=*gd=xArE97E}L`D7nYGy9?GgDSDur+{ansA z!$M2pP;&0S0rXcOr2%yysO%gttBC66P^Xsy+_aBYZ325^_vPh36+qIvv@pfMtJRpStQ@Edwu-i(rFSRc-S zJcM8`I1CDMGpThis|;1vke%m#U)jvD4K6lHI_IOA9K*>-L7D?|+)?h%RaVB6*=@f( z3l=P(y^pQ(g-usIdE>WCN-jE>Njx^&l5m zy^=y(aQD-ZBa#!!!U+mc#8taFS_3lxNB934e1m4W+e6aVp-i$XBgzIh@A&Bo&Sm(; zgH)wGz@aO(TP1bqiSjO-OpxdG*6+F)3W+02_`)!IBVXP2>_VLWcsqw_$2&L&lRf_F z+o<<4I4znpibeX#gH8k6~`kq_rg-J8;?_y6k`gC@jQrxP<=11_rC{k zuiyvY$HB=Dak%?G?i9+~wO>DR6(0-F& zK+b|0=rfsiz*7MVkNEn-1F7!`CC`TSWT35%zYpR5`Kt4*w^)nryBT@~(D)o%cMSh6nao=TS2dc# zODvQ(*%8diV=ogxA)IO8<@C}Vu*>R@!oH3S>dodL**oh>II%68iW?;bmy9*@B$!$L z={E|qzRTLg6H1=1|D(T@ATf}AH$7M6mpArx%kn(h+50*VKUFbZofaPFJS>8+>H zEl+kPWRY2KvNXXvvrEX!w;x)CQ0LW54&W{ATwDc_bsDNL)Xjm6YB4eZGQCt73v$@m z3Rbx6o(ia{uOON~rBFyT^=(-^yw;94S$3AJhz5-skbTJ`2*QxbnHN5I#I)s8a8JHS z<@%xQni`vaAZaNTHRgY|4TYw5>J<FvsMGtYUG&A`6GX%MQg2Guf-+GL!R` z+CyCh?)Hvwyft3zs$Ge$+KW+eQSl@QO)ObQs&2eq*mdL^PLKtmktx$xyrAyrJ_O_a zJDxtnk8@x3@--B%qJwNf$~+O6i^-Yl54A182UHvrOWqmvFkQWlDX*&1hi8ZE+S*9{ z?(+DlU7ns9vD}=J3Rs7G(88!h!m4~2WGLVRv3zI!iY2TR*P0*lRK9yF;@xLnawY~a z`+zchSac$z6_^9DY|I69Y8dAdCQq!)8_hS>4D?hL&dRS`Ttekd zv9SI?RmKuh{?RXD&jt`bhmawwgoYt&sEc0m*>i|M2bs&vGN)RK}`|> zvFBD8){Inq(BO||mP7K?XXocL26xP9iJtl192~;-#np&g3+v+@l9LNMvf<4@5!mQb zTIz|p*f|Nr@^TuB$&RXPccIVOTTg9;CSy2=)J3g4ZoB8L%*|k<)Ooa=?M>v{s_C?mL@pJP)TL8c?^?Nda-7dWt6+DvIi+m{CI*^+gN#rd3J8V z>n7*d{|dh|_j9lB@fj~vQLbw9{>5M6KCc)z`1Iiw<&KO(41G=KgTt}* zTpU40xy5W$qrunP*CeN@O9sFF#8MK3EpCjf=Jj8DW)f$(y)NSeF}RTx!caz;uWQH| ze(pQh&E_H~jsLup=T}Xuk!UD`+UAI2^z(V+6;op+1JmNd^gtEAT*#>MvNqROvtPxr ztp?aUGv-aH^^f7}fl>vl{ieK&fNJPNeuI*OrCJeri`!UC0^pvv+pnm*g2&z}I0!#u-BAcyd-_14!f zsOAjN5srzTD64`_`!fAr^vYyCQ#&0c=AU^p7nDJ4@rtvSv&y&iND}wG0r;lJAm^x~ zTz9&Bl3Xf;x1YLlbyb;>gfBm6l>y5m#0>EbPbCe!@Yu=4oWtto$&~M0iY|1}6>tXv zXw8;!iMAbG8rsSw1-9V0Xc#@sTrbh~xfrj?dE^jb3Y)SK6hoB~l-05M>@7Rmxc>^= zypT(Hg_h**^m_TV2Vm%}$9lRrz_j286Y?q31knjsEMA|N-{YGp_gW?_@#dYJ^JdTP zV3i&|0bblrZL7#Jz;w!+Q&FABY?+U78lF_=tl7G6eN%~wLH~R>4Ox$1orW~%um^+q z_{pgoesX7p^bJJXvFJryhi)`nz!UPiJ=_(0{w>-vuAxJYzkawNyTOf@Wn_ofw1jZL ztK?x|uw`)-`CQCNjf4HCUbwV33dPXDVZ|p`F9FMt5dPgh33jwfWS;h?q%U58?YG7bdmTT2`n#11_K6;erH^*rQIA<04Tq z_m{{}mjhox8}VO0avgl#l<6B*F4%$Ln9B>8TxJuN^BYle`uI&JgT>>M1>PL3dqG=8 zW$a~`M|dZ1aVbrdAElw^utT!@94?aez6=X0%@6&+@+5rO z2VIbkTutgFu+xLj>PV0bTg%t97Mi?(qryupH}R+s!C17Q=n{+d2JHCr1zFV4%{^Q} z0cI2BQGb4s5tn=3jO9ba7hFE4y(VdV;DZHAGK(&9Ifqa3HxT)4m-9Zjk|9ey)ME0oVWFpKC zcx8tdZV;av71o5!8DkAMkvVtKz@il`4ZF{-V?7bzww-4FLaJ+qGFSW{L%AdV#Q{`K zG=`fF?bqJL`KVW^+C005&bhG&fD@8GHnsV=0hVbb;ln|S&t|sc1HYbuTgLg?Plyh`Y@}dOFKD%SeKH*A+gNd?oxfL*&F1iUv04Xj zO%lB}s#12v@cL~V<}_8bI&-_Zw1CB9GGQr|1K*ac($_>rWklk!jJhj| z%4N2t=R3SsGjxK#5b*MiBzSV=z-g?%<{uXEsz%Nex_Y0`?j*YRhnyg(c>IG4wL5t} z&0=OmYOST6KQysTm>@+gJxN_57prQSxopMI;%PmTt1?Z-R9ULxr#XbRiR^yhw2-90 z>h?m(Rg+*77K9_5kU;x;JYBh0y-Y&|m@llx>~J~N8V)Rwgga1v=(pUY(kW&YM}t6Fk%D7$gm zP(pqlnNzTIm{WQWxE*qczreZ;v1hx}Y-hUg7>JT62a8;(Y3SW6f929xf(lT}vaQf; z;jsMlA;Da+99~sU7GKEY^aXY687lC_iV9k~Dj9=-6OEy3ymk{kQLF9@Az1_b;)0R- zoE9uDc7tK5B6Q&^sum}DN5(j}SDMF~7m#dqdDPrYEiqSZ$rJ-YEc-QCJps2fl*#<# z6i4Co4RH<`U=KL4!o-TTZJ%mX~r(&iOmm9gj+}<3Ss81)#}a6 z6g7o_c&QouJImOh9Up716taFVWKjq$AP-X;Tx_zk7NryOG!6r?^cWUuGCTc2rz;fj zNJ`?}kTYZ#f8>KlaLAK`u})GUz}rJtz^_{rmRjS7Wx&exCiKr;{bQ{YZ^qTp>JE=m z75UKymRhOy*jRLga#O3USvL4RjNcB=uZD!wGXF0M>w||SBfGqP;f9_2)>eHIaqbHF z+WAS@$1U9&D6FXLUO1zM#p~U8S}u)P+GC02XS=iVctytl0hnu+ocI9NF&VIJBn{4r zveJ<$6BX;Z|2GCre18b9raT$S28D!B%Hz;P(S&wvSQMmb-+0g$y>EZzrk4=Db>Y;M z6{-_R3T4Q1h_radZ?#{1`|iPdywnxg&s09MFm-;d=S;-Sr^{(Ii1>7#?r+8G#)jWx z%CtA+IesE&G1|x~Dry=WmUp7QA8XpK0%PH(Uch9_9bC}bdf?%M6U(&i7hiDnr9WyP ze&zFE%lG%TC1q+*?v)oiMr@+)ox{sczTpc$6KQEz=O%_aw9m`*0lWXEX|T!(hMKm{ z>BB3wpK!URJrnKJ=!v1vk z<+nWg*j2k`q@@3!2SbNJ+4~P)dGzS%ga3ELl>2kk+$>2#?%g2lnWgi#ov}8VI02z> zi5icY9`SNRq#ehgBUV+2d0N4}KRhLaqD6_8>KhV41mt<4 z^1!0hwBsADuFucO33;|?zilzMHXWm_xTML5P=UGVM1dNUyFdS0N}JKX-W@yFopjxH zAwqeviZ(o z)0Z0U`<85;dn_F^^8Z)w3mrX5X1-*oigWtYlJE>LcdEZj9aHTO7% z?Q;*^yiK2gS@WLMQoe(z!LVuRiv5Yfr(#O(aik>$bxwZa_M@Gpx^z|~-sy7@eJbxS zzw)mM!cwXp_n5^;8IYoZ(kdk%0g4S4}f%-B{ zK731b>J%eY?!mK9sY1XUiIq&yb8X`+c2o?Wlg^iKy(%{P+T4~KpS5xIiggpDX_LCA z=(f0h)C^|Qm9L(-dY(aF+v>S!L?U^ap_0DsH$Hmx3Bzk2`|y|F|Jp?*OvT&UPcdt)s~kP0o(KxP ziWBC5D|h*M`xZ8K9eVA@AO72`+lgjs+Dj88v`_A6VLZEi{dCC>Jn-(_M<0ZGy~Z_;u~Wn~pxb(rBQ+w|}5a9yKH-LtLqv>r5C`cfNk_kHDK>{>ul~ zHt?_2WeRT^m?ndcL1^NhU0s%!h#c3z>mcf#1^9dfeoQ`nc~$E!UP5s!aCqH;a9(T6 zvGV@+(^_Y*sY_xtEm|{UccSIoSZRQ!zN|eQvBfijwKbJx9(PEeLMH1>Na*y3&e-#t zKOD%!YT_I2_yD}bSiXd1YH3)#pekUiYE{qW6GBX)5np%b!n4=FbF8@Yhkwx?nb&#J zAGC!kH{P{$W%t~j0~7cNnINl_r>&LD9XyQnNwUpRkFa(7Iji$2XW3?a4FS~?4*c88 z(y8dla1Vm3ZpM$iunyTsP2bZTNOK^~fiwrw97uB@&4Dxr z(i}*0AkBd^2hto!b0E!uGzZcgNOK^~fiwrw97uB@&4Dxr(i}*0AkBd^2hto!b0E!u zGzZcgNOK^~fiwrw97uB@&4Dxr(i}*0AkBd^2hto!b0E!uGzZcgNOK^~fiwrw97uB@ z&4Dxr(i}*0AkBgQ7jpooZ1A=mYQqkMJnKLB^TpMbD~~*RgKWowgXwdsijp1hPj$P$gsb<+7{bM=IFy1=BY>sv<>M1W+{sT?B<71_cm4MTu z-1-K%wrF8)&>!N{jK&ORM=)d5z679&4{lH=vDg;tTd0|?`qD9wtKC=<8FD~@$rZ#t z^#3H-^nK(Qd}$i0#a|FhCBR zHsK_#ZF=JEmtQ(amOaJ)bBo3i3IB7+@s+SF`oi7UeeH%uuDuz@&ufX@ zz&?%-hjx!l#D?*{h-_RxGW{Qn*&I&hJ&p|+h1ce^t<|*e-F0+sPB`D6(Y52s2X+b0 zIjiPPZSGjH_uPZ~PCNDF-CGxSO|FePQ_iu0z6JAY+ouioPp+#d&gb3v#}1xgI2wy( zc-=0Xbz!r+b=$Ny9s2ZL*rB#XZA;->z_?p%WJ@KX}`{cieo_&9~lo!R~F_ zS9N5202hQrj!7NF@2|cm=3^z_) zkHtKE9t%(`Q=N!~D8F%(+UspKnLGhMyq{0ZS z&gh0GkE}g&Pm`UAcHo2!-JPjYseKqx@9#yN9*@iEay#t~TwOR$iQ5j|sBIv6T5YyM zS3ZcO>30vx$*{-3Wu>g=w7;&MGau~5St|}42?358q*=!2HE_V6H&W72TU^{aZ_WDE zLoK-;yCdVKKjAQ3MA{1-JntP` z=5mo>U9m5-AU89BQ$3uKqPn)WrpBhaxH?@sjdjiZK}#H0_oZ^BPAPylK~IO9#2c?V#_q$3DC0 z0^GHStIQ7BB+Qe6^IXOau24ZmO-)r*Sy4mZKwtm#?%GVTIvpYJQI>zvC18obL&qjRsgLl07lO-5s zzouzl3gXx?{b(36<#en`Y^N+OjO&8U1c4+^Wo%3R;aDEOHR09e@O(Y^5V&;KH4m6J%rPR82)q) zW8e1Umzy1{vEk^eMq4d?Iyp0*|LS&0Y&-G7%O8Ez$REo+qip_pw_UTl7nH?jZL3YK z`YS%~A)~0U4|APaXNb-J6jZ$rK89q%7dvU+6?jh4eyZ1M8Nx$~rZr{1ge$vabN_um zdiy&(jExxrrHjtJ<(5l!PWH!}>e~CdR-RXv!13k}pvHf=^u~oC>ZOO*fc z1n$FeX6=szMHt)h)V?|(t#xzkyyo0xTN0F%>^4Gn9Lm^q;9GjM?wqt^ybmM3dUwtQ zS?>e@4#Ljhl$BQX{eNB{1Z}t#1WXLhrR#?UEcJ9Se(Y@Scsh zlM^nTMdrDOwmxxBCCd{hylK`@?4Y*OE3)w;JX@B-SH;t(r$(+jLT5c&V&}u1o~3^= zl63hg1#DApe(G?%e)3A)Cp4DIJP922N%-%*)NJ_E7cdE90Ypk(acrr=J3QXraN605 z-xdJ1zxv-l;&waEJKYPOxS3Be>V#7Jz4Tx-2BY$zE?oES-`+SlBd==9Ixml0N9-s` zk0?1}ccB7eul;&(I1DH{C9M~WKPTXrP^bfu{r_;aZI%8o zFF_r8xxgB_VrE^RN_E_IZc!an77vB0&m^nCKWh8*JU%e~O{Kz3Iu882HSTq^y`#dDGE8+MA8Oa*k5km{ZkSo5eM_5*53bEuJg`tc3$$ z2Qh0A^roY|(ih9qp9f>((Qhp&%97`c|GuOxMHw5X=bQfeFE{X5;(MJ*t7UicT_v{*_MDlxS zCnSg?fRz$+{g6yKNz*6GxxBn3Nn2GcrK-NMA_wIoRP&OVxYTmnMh9=MA{1=cH#9|Q z7r2Qlrj)GWs>00d()wKF%#*YKA8#;|AM!A5=aSh~CVl3F3x>t&^Eb)1Ml8mVUKWiB zGIG2d4|;z_QCSM!&3&Td8T#>5HZSYKFTXlEx-gnoQ&s54>O|eVX))G8MbS7)xTmS& z1#R}!-VKm&XAm2fjN;sspu+e40OQoGOQhdz`kr(miK#1RjLMd^jO(=S1`3pQPGUqfB{*|<~ zdHTlVzxi_i{GKu^6F$iakXR?_rC-5dtAF&<*`kQ1djd5b!-s0~_o0H+toHWZL#OXv z>0!x5{#1F87LQD5X8ditp91i=u-u=1z9L1IR~a7bxqV9G6oJi!%F zmYl3LI+^AI?r5_ck+BtE%5nC=Jyh2B)%&_RUo%!Sp}neQSZv&-7M@bu!lUaEQ@p;O zbK;>Z@G|aBInUyGByBAflZqL8N!pA7t`TOFoPM^q1R1q*wm8^bJ9ycQ;X{fbTqzV! zu1O*xkPt#c^YG>>i^jwiRtP#o29;Y)hFn?}y>?$R)TdJYYa2uqIpZ~MI4qtY8*iRr zlShEVawE(dn10jTwCW8{-9L{sixO6%oct8@b>+;>@1^f-sTy- zUDJA7v+@d*Qs;12^;PX_Rf6n#_T?|=DdFs-dn4iRPl+@J4#A!lNl{PAm1ra&u+C60!w_@!m8B4}g@JCO%Fiiu@Ib2C& z;nYM1h(-UW2en%3N-M0W@0d7k@*ti$X1(w?UE*j;S@!UEZk}G1=2sRZMC~=xCynN$ zYe?WXrD1dTMapA8@6Pn%%H|=1$ADzoe8-{>?~B)7JiOQ&6aP$Cv$P|rea6V@)@vo{ zq5#vS7jqN^+LV!Yd+etD`jk7l&>6NJiec27E zCZ8-QJCXw>(UM7AS|L7j;)MKgNnyZmWmX#9>ZYr%YlE&8V}e)PkBbTh*y&-$>tElN zj%Z7M-@#-pL5q3gxr%ZP%E0#)TOQX^NlT7==iVmfopnKh+2e5fWO1!!;o1x!{J#(k zIl=5Po#U5taT&@WrIHX#=tXq*x>SnlJy2>W@8*#rYtoZ2YOc;x`MEUplGUZvCASe) zoiSf3@mkO2N#DCzF6Lc5C68Mg8oSXRPRlMUE3Pf^200(+9@^Yo4CRd35D!hw!J*A% zv*wdJN6=XfW^+=Fh28TLihO6Q^KV)x@D7y)va5&w1n9k z;CLkF=6MM6w$4lXmy zvp3Ewfh=>ILM~<+I!SJO-j~w4q@0#~AS!OBuyX>=N)qvu;K(dh5M!rgtvfl{X-TPX zF7jFeE?XwXWOUFO%2`G(UtI3>@vRQ-5c4*2fnqBaZ#Gy%8?qYF=gIjV*93xTbPQ7pk~(U#5sB_ zfn?8V%&!T`Q5h45fOg?Qoh3BfEjnvnX{?PRZgwerR>KWiL+4+m!=Yd#iOXAc(fH+y zhvmdd^y5LJw>Vu^eTXVeLhy^R`{EesWdGIpIqKH*zK`)*>`&wOVHjoF)b{RxgKDaass*k7e2O zCpKVos!C=oEQ)7wq&K$}q-0i%o;Pn~QSs^hTIkUKjT2Dy1#i zK#aCx_>fGQsui2Go>;1Ob_m|))q*tct-VPC~OuYDEVp~WjdFjN-1kyza_z4%hHb25?~YIRcc_@1e=<}X~lY{}fIlc!9b(30i% zgabO2o;a8fhO%OreRLVs<$`QbQq`knR8~*zgf+YM@d=_$V@+$CpLg#+!EmPZ_KD+W zuDN)}#x1+{Ua@=E75gv0?D9PqZQOFvmQ8yO%m$yRupiYR#%M}@QJSm`jaehQGUV!9 z?Wy9aY|d0JTvY2>Q>LFOJ-O*wWPX@j+E-Dmu)TOZKiKm>}VN;`au@v z;ys9eZT+u8$8x&5IhE-D5k0r9&QPf{ruSC+%^HI*Z|bcNE{F{Ve`TR)IcCJ#V66Jp zI*YGWPWkUa=I~gZUa`QSV&Mz#yS&s8D4o#TO3ZA^EKEx)&$J49X;V{LFgd~qCQ-^m z%B<1NHQ1Ru?5d08nE%^NO~Vht*)zRBG5zV!MBarrT(_{mi#bwmKAHJ84AG>~`FxM6 ztttanMSFEcx*IZ!#)iq2mmSG!7^-FS7M_!pucz(WCac3?!~SjTV0YT}YEVsEG-I41 zzn!F#$NIJFPn^8|y8RnhT(D{N?C#p|DLVmVwqv8;dZ3J#+V&lMA^O?yF!S6e zNkaOMxN$y-(XK5g6;H{bI1=dT@KS%WlRLpfj8hhDPr zzinh!_P!_WkK10zQ$`CGX$vku*LWD-DL?e@W^=i`ey`Kv_IlhNugB$dm^Jtpn115^ zkho%27^OZ&&#tR(1EQ|=OE&C;=PzkQ>zaLu*5 zkKb@EaG9)D2nb?&mDY$^5&pSoG@7hwoL;sevnO;+QdYS#D@!Vejq4gx=nzw;NG7*3 zSn3V$U!ALV<+m1v#DSLz)_uaq9xx3fH;Frrx!R zO$6HCLw61rieOuqfLqBxi4+J2bfBtSPH9P$mg8xokXDBc?!;nL4dF%fAfYU3q8`d6 zMb)wX#bf1TKNT$U9%*BE?voE4$94?`4;);&$Z=x(k{;l542k5v_yEW%0$9O@X#4aU zV|DT^%+yDPp0P&d2N_YZ7M0U?`_xG%mV$)Tt~lvbZXcPk^GdJRCqjH`3#Y#;wQ%TZ zTUneeeVPSgICc8gAh1PATH4OScYeuKM@E95SAs?oanCWLjdO)uXRP0S+7L+TLn?wf zp+cU6$MK;iqa{`e?e#L@lygBSr6Q4^L#6O1{mFv(;mmNv>We?1Am!x*IIWAP07<1N zj~-)(lE7F(PQ55WZ zeo?<j^=Yia^+lgp8F0Pkv?0eapb#Me9rVbf1t8#i9DGX> z-5|FofaE-;&`jj#L=ni4dGOTgd6jb4MOt4xP_Y=l$H_r;#*mLTG2V#pIb5-LlF=4p zAGyc70vZZY)K`+liKozuU1!eByL=}7{7ZjaFUP|hNYpRDM5Q(6zXj;aRjPzKX%r^uVjR8eH#t|n2#qmw` z^7h33xIVlj=j(G5tpGj+JULZV!g~faP3a`{l`$*#kA@|-9OF!7lRloig^tEzUOz*vLe{h;dF05YiA7eQh%86oEuZBCj5?Xo#@F1*3@N#Q+x=P02_V z3H}yk<5(2LQK@vOieMw=q^5bbG87yeRJRONycr%zN|p*6IW6hzYmrqnFZY83rK0~& z#_7nRP(V>SMY|v=2x`;rNUpW;M|6=On@z}$qy#<}pu`|H7*Ik8P0=C3uN)z;!rKs7 z2jqZ@tUBybRb zOsGKs%^QtfFuqi3r~&2QqVbWY5R7^t2d*Op|H)!RX^eMVIDNd)B-pdO2HDX0fcNyaSa6snnbmD??p6A2{K zVi@Di%blbasmeI)G2kePgDS`-Kj4T|vbYYZEmglH*;OSBl=EJg5~zR-bYSVDLvI z#&dGSkz)Z!24WDv#pxgzPsXOG7c|vEN<*9xBz-EOcNty0LWNUsL^^EpSQ>SRXTue> z!4xN%gr0B?6~GZbsnmOXvN={>}Gn4@fVk)a<>FZ$^PJQaQp~B6d)3!6lW709${0;JjN+U z12+Z$itAuUC@3OG`UZd`@Pq%5$mXNmB(HlJ$RKm}61+pS#Q$l*UrHxq{USuhz~IPJK#{9S1W}Gs6sAA{ADjx5)E|G-Rx)g}M{=J`C0>_T2V1}!zke}Z#lT%fB2qIgpVHes?4lzrX^ z=fdU`S9+06G^H8^PiRl>AxAFewK7}@OO=#I^gE6s`3W${HOgXDhE}1JLVfuzYT^vQ zp;xLU6%6c9pAf~9011h4ZdeMr8mCpfR$0ArAgHV;AJcB~z@lUYa|z>@BUNA#^~h@) z)G=Z%S)>c*L&bbu0hwvdKn54(F;A0qqNpd{Pdq8TN~#%{8y zhK6PNrSH40A!`PnpYW;CnlO2QFc$h&miL4Z0*RBDa21D=P-4X78!>p~cBBk(Jda$G z_k+jsk;7lbAV%{PPcgh6Sr^bV@eJJ!;xv=KaDhE(-i306YEg;8vK(F+EKmXcQ5xiE zSLA|%DQaI75ivMyfIldB<>}NA^6}9i4-Aum{v1bZag-wCOCByG)`f`W0l$i)@VPk7}Li|aiI zX;Hf(#<*BVl!7wi3n5>WHUzffSE@5UxMi#jJkBvSj!Onq6B!(;1 z3JJk=dwf)i^1_}@>?&`M+cB{=5yc@FgMp&sF>+D5#X82D(!uSQ(;JLLBf&sYN=hVv zzmfAlAtTjn)?@S&VMUjT)+RgB2}cAXClZGQT_B^B8y^%QHxx&N97(8(;Y!IUvd4iZ zF=xVALjGVV5{*W~Zbu+9E9|lJ<1Xwj@tx0V=Wq}p0F^k7s#E5IB)^sFg{&6cj`5q^ zCSijIp82#?)GkYHBBrn&k4ge)#ch05eaL8Ca|!sT}1z&R3LhKss6^oKEbvII9izX% z8=;G^jUU!+R2t)Hf3*kmLaq)O-;^D2n7DnQuquGzBOS2tPI3k(93%i1d+ZC4u%MJU z0iCR-WqJx&#n2aaH|WjJyin2lt=WnX+mI7iJaqkzxkK`UE>O^7!~9LDQ)U7Fsw`B* zfdNXn*inR6ycC{fq6xAom^~9}p>H7v$g<`X;0^}~2H@+K4}S9Q^C$K!7zBC}HEqac z245MBU^hCFf_MVG6Lci9@*v>q+mAl5CA=Uny~WDP4;-18s$h+lR?(41Fef?>xMuG@ z{_qPgz3|A@>t_rt4%=bKT5V=Af~hCWp@%Xn+j`dQz3HLnp1Swy_2a8j-6kMMZ=ne}M=Mos z1Igtp@49Fg)ywsAbR-}My@6gKI5-UARgRFmBjgvag6|uE2(#O4HWO(bBk4z@)g38l zow#!U?GJtXo}(M5HD!9hV#M|&eEp)rjPh!f7!O1z0l4=tVG@44FI+ zq;i~m#B4GR=qs7Y;S2fgq#1!tL|{D!hn)LE$(dzCCNJK0@YvP67Iox>;09Gk^CS{p zG}7_NsQ`Lnp2<5dz@P+TM+IRCP|Bdoa4FdZYYC94TA7DUR;L>hl%s=$9|W1%6;4S_ z%go6usv10X<%VsSZk#o=H0p%DM4M(k%{K}4W+0i)jFIQ1N;`xV1OP7Zh+qav#e zihxUWlM>Jo8|fObd|c^r`GsKzXRbL##bvdFM)k~HvVQ08HIv)xio#Z|$w|hdJmv+0 z)#ZvsV{|3J;$SJS1QwwZW#tOQK;|!mRq7z6f>pp+RytGz9!0sxKr;9XMi{UN*SaYM zmCd7PFI~O!$iW@UCbm^%g?(;}9$F`(ihwPl0q8~HVS4Hlh&c#~I8VT1PeIzbeIQX^J73=W?^DZgd%s!I;t`ry3>mX2#F$xIFcG@HZe^7won zkC#6??9if4r=81^J}^=3a;uOFW{m$*Cw4Qvktn!!r_8Bp8VixSqm!o=^OKD;_7>T@{VHnYQKfURgT(tHJ#a>cq~rjbGY1~8r5gka}xyeL5=e|-8hyFT|f#71ML7@ZFKs4 zfvn~!TOR!AS8qRX#oR$9nL)cP5O(k}%?G7m2KlO>D~?1W8KI0TJ@6V$SOxLGQ8b>Z zPxz4uLxTz6fgYsu_{V(`Mc{h`90UX4s07NmLMaiS+ZRf2UV6g|uRU_~!ci6ZDGnQ4 zI}<^@ejiS?5%u+rn|*#b5lu@W`Mzw#s{iA4#;2yUjV<5T(GBL ze~Cbgyqr2vkPvaicN%M2^V|c^e)Q2x#~0Rwy`iv=PT4`FGHVg(64;B-prj~NB~~G$ z6UZPoBt(+YLb!x!An+i>584*pG+~O**X0z*=nlLAWQ=lg1ejLqZQJ(5my-15cQ;qK zoc;jDuo%%66Zo5Rz8C^DK8fpvNRgZ1fb!$JNWTCfF1Hvr0-q?4;KoQ-@K1G7e(W+K ztO@It!-p|pBAu?HAp%|C%_t}yvizo>NdI`_(8M&I-sN)%Q%EKoM+3kh3Bd=K5KaUb z*}r_iTKGx`sT|HrykX#s>fsm{at9#7uE!ObEF>o&CG?4hJ#+YICT6ra0?A>Q*%Bxi zvij-2NuNHjqQY(WxMRgRJjzAE8{q(fq#QTi$R1Z9CILb30G`v5C&0=9Dn%jc2ge!b zd#zEBO+du2m^v7pkXbHt3p6u1^QRns^{d}LzO2-bD8yrYbBgk62nL6n4ta_Nlq8hM z@|QMp@u4M{iqOPuJTh(wt29oH@RJ5w2MHtq*>Fa&dW*&u(=AJrFw%tKCV%-tM#u z%_2q_jSC_rXBrMFc-`i7yEzLXn(#|6cCuti#`8>omAJ$~{s@4Sn6$u~fH00XXkb1M zB|Ai8%)j8tPd+?(LA4)EdVEfk!OF#kLPqek1#pt4Qv?BrmJkKuNs^CGZU``vr=0A{ zK;RhUF-|AuwXpx_v{e}(nvyC&#qNYVa-*!(Yt5O{ZhP2@0B4=o8oBL)|L z&^p~lc$L?fa(k|P{>}R@npPh+nZO$m2OhuIWrHCJ76Cnl%p~1Ma+t;h7Z`%=pfLP? zNGzm)rL0bHAO${VIL~oB()xryEjulgk{-ZmZa%CA2rbDEKJN<`(4{rGoY+{WNgjXI ztM5L3Xwl$;h|8phh2k)qVT^~oHg4B)(s3dYU2Qbn#1E8n`9h&!FyMn~Qh1!yqX3P1 zLcqZofHUU&Q~=P93*szR=51fGb4^!mP4m#oZ&oa`CVC2u!RJejrlll7uA6AG7ZbIybUXl!dV^YR&+C}n(^X%PS5VzrlkTtt z%i2avn>n(vx~w2045)5Do7E|ssAWWr&qjVeQ2EMYvL+i?#CM{Vosk1J_ zWycxrdBeIVj_+(~s42}&i-f}=z(T7JX0y>kLY)>qOs;UmZN~Y8z5oFh^oFxbO0qKx za?*3sea=8iz~Q44LpAvq-P99JuP^sUD#-4lpf@|+P%>D7r48k02(-fi!R$-QY3`b` zV&m#nizc)c*fpkb!-OfL2DP@gH&+#8q$Ed@f&dJP%I37f(GmLU!2HE#!q*@YXfAI! z8gdx4-h%8%Y9ySQ;t7U4fg~3xY)zETlMSZ$e94tH2`B#>Y!-YIK7*LW8lw&CY4|`x z#<#&8&8cYW7&ClCPkRtTXVA0>%{7gURR!rpN-#c&Qj)JWoG3|C9!NW<-31Ls8VDG) zxPm@@=B_K~ie#om!#)Rhz$dhj1w4l%UlWfm)|_tsKre(FM|V{dgE=7XVA`U%qj2zO z!k!$0!K747;Uhj!P=m9(xmn`?IPPF!=eWj9C^45sX8^!h$z*Cf@TnA~JDnm6=23CD zg2}j$V%^SCQ)Ti*g#vH7ew0z~Vg$2AC98K@fsv0tF(VXdX78Ua-yJEg&)UiLO1(11HMDf!CuoA{k=!csapkCOO1DIm5{OVjQnQT8nhV;M zxY#|Q(6Wgia-CJvSW#G5TwYT*Y|+{!P4MkHXCfpXv_xu#f#uFywtU<63#M1bio*$2 z!ZRLeEfc%Jm?_EcFK!WXB06I8W%FC)lWAkw(|Xqp5-*M0zw_nmt78fLAvXs%s>@hR zC)={ZM$Dg5%FDANdBZy4A#ol~ie>7urV>JZ{?7dqPSwpyzo;;RqMPG`&#bXk&mC^h z8`a1=D6;u>?1~SF)Z5IN6s{P3xS)cO(m zJi2sz2EWHIw!*5uymbng40ejh?Q7(&ap}d|yy)=VeS8Dx;NpOS%re20?x(+a<#>0} zsW_%QbO`o;3b{4;X_0`yjTrGNLkywTsC?YG&cIX=pC>)$_O-LGtA{hUGIxy(<(ZTJ zO{^(WQKDn6@nhxTWfamz9!X4-Dud{1owj7fxT+*$sH7-PUsRl(#4K3}a^&B%byi6s z!i_bQQ^*~?q?mN077r)f#L#ZRx#PsOIu92{>5H~59t1C2j=Z)O0XsD#l4v~DmsO)Vl?#&_0HD#=FbAEe~&BpPjk1H!n>u;@+A5w`%{&DWM*>&r{`rVr`L zu?3S|?v{0{2RDr_(g#aZoxx-jPb1e0p$T&v&@p*6lxkK-3xw;B%C*a84R#sIcfIn& zj&1M#4I5278f#8?z|*bOv)$?xgf^~&Yx zywync=waEig0a<9=hcl{dh=UfklP^yo`c~lvDs*kdNG{}7p2F@fy6$BA*oVhRZGF7 ztH&Fbf4D!@5}PWejF>s9D3x<CpyIff8n=*U7VeH&&Cn|5!lVXhJ-LEJM= zoH+dP=g++(K%(oXz!XgFNO~lvwy7-1s0pGOA9o{f5>u z&+6V(g{zTT(Ony~r8f_$FKHZ^iubA+%V$+Zy=E&w*Ccf;Zg+9Fb}DX14WAJQEM<5@ z@e|TVE8&zugj)`;%0UGzDR0#Jg~PhKs4fxbKdW2O3O<1*bVMvP`(@_R zdGoT(eIzWOeNq)XSc2&z*6+Py7F1hObzwNv-czJwp4Pf(!zjKfbkh&z2kBe(~0=qr2BCW{b{Rv2<2BjX!waZa5)HKRM75DV@J| z%N6UpvyIN6!7|~*iKX?a6AyPJm@Gu=;xWtS7Nu8oY`XvP%erUnd+_aFe*VRufBX2w zd+)nu?f8hJsJXV|zz6RgUUSvAE+3n0#wQ+u-)VP0aQx&0`;XqZVcGgxRCB!~9X)n^1c>L{8Kl{U1Z@&5S=k{#A=FxY5{WnQ^dhL>) z0*fUHT{q+|xb?1;`EA?xADaV+wAD*@uUP?+@#Ow#DVaT^5d|XkUb1`la@gG*RobST z9+~I%U2tOqr5SJf=g%IyY((=CLG>Sg0M{HZPuloHeEOj9`}-gN?caX>(#cz{J96Kf zFW!CSfnDQLpn?s)l#t$6Uehpk&dg=E+`QF>;zF}Ib!(}iA%GUjeN`EFy zR%!=Ma<91hvNqgJ#Wh6*DptMc`h#2Nj39rBW^P#o*FUzeLf5(?jWR$aJrMz`h&oxwAn;i9eXSh#IYEjpbs-o>2bkGw!INzxSr#c84N z$er6-nwM``I1xfe4(61Ain;1B_EI_ul}NhVnnsr=zW4XpXeq8XGxyN0Ah)$qmpjX3pz=0sLY$$xqN1@GQo+;KIgA*Q1>(i&^{j$5rYmCEE4yc` zy5gxnNuRv(^nUWok`TP>uA?^8rzjjmjo){}i3jgq$k>|Oj$d-+3;+D%eJd9o+})l~ zwx5r>{H|$(%I00Zd(V-3U;TvR=d1f}c<%XQtK#!0rAot^`yqQ`Qdx*vE zEM5b@eDAg?$PrG@%srKLjDgq;BwmN|WEr>n>Q`uFo)SK}duiFJ6^SyM)z4=urTg6B z&mVf?B&1U)jxCP{ZHL;;OLR9TQ^Lfx?=m*Emz%sc+U-w zKXu1EZrHtQQ4i+_rHw>tVfpx$6a`%t| z%sLb1v$0wu)4CLHwcD_F%f>91N!yW}$o6>c9-3S{XN5e@!jK0=~V7>Fo3wbGAv@|YM9uKn}`Ns-8a_5Wq<^=qS>BbCFo!ytLUO2>Tti^i9nL3Fa zrv+2(5X7|{dGgLn_uTlvD=!|uboa#=O=yl_C7;>nAl zG;wp?ByABn@LvUsL2wjj^m!B=KDMW$a_np}NzOKOE^n(CyW+A3UU>5Eb=5Y#)ml1t z)uQp(Yc#g5MQG%BO>2K4JrDy^BbGpPG%KrmT3Schum=x&yXlD=Ly%I8t>PlQrnf6D zTPfNLA8A^Ry{Nf)#EbiAJ3gXl@?GQ+DAqt#NWgOuX7$;B1(;t zd*sUr`LdDVZXG^$^5D|C&IwB|+gwS-S@T8r$~sY!KHh!RQ+IC|J7nsHb@Rqlr56ot z;Kmfs7rV@bz}Tz2!Bv(9n=|OuM4x=L(?0sLliuLi3mee^u@>KiIl>H?h}O-bM5ycv zSjyWU_&}20d-2KRm+n6_DJ2YC8!{)&EwweSUNw2lppF~jFA6d7{k?tg6_HtS=3(-)u?AJdirevd5XST$QS^0rhkw)Jvv~d8e zeC_M+fAPc1o2xIrW9#IaK(wN^t$X&$m6JQR{aX5HQ?fNbYLbPc8dd+xQ2rW$-yo-( zLevT%5l4TR9b~q8FaG?ySv(ql%+!fB_=F{IBV+^HekadBml5-O`T8C``zP*x?3t4% z_YAEbF>&(948-(hLCSN6a1mgV-r`e0q((DnO&>s+$jv3z`MrDsmvrO*KFx=R32V4c zo&y9$1`+T;1i$NEB_b4sCE*%1*lNfgG=KH7-M8+XUW>F0A~?$#S)10953z1OcR3^ueD`WgMTf1=Xr8rYg(!YdrHNdoo_s1dOz5Vv-Q|d-)vIo47iA56ex{vzT|K6<rF=92jJ@_Chg2C?Ke4(`kl8hu(5t43^Ze|7aq>zuE5jgHt z@kK@vi)*R$BxAGIEZ(|nYEf=QOM5C=P&UIGXuV;|z(h%BIYaqW6^sPrnzuArSS8 zo>&?j$U5v=mGWHzy2j(62DMICg0JS#)6j?&>4+!bbvCUEDhQBZR}FTkCM@|-y(-#r zLl0*c4hfi~*JD8GlKf@|_IQ{T0irS3y0A zVJe~%WSNLKo2%L2Wwe=%7t73Svnc8H^QW} zqaOWF$;$j3%YYLC6#qiP`Fwp_!XMZYB`fwu^bwk;TCVnsekU;X^8@LL72$XybKebi z8k-o8*j0q2gjQ$L`r4>{ytD!h9jQPO2|IvrWJR1wkrXEb<|wSsSJ&}BCzzhwaroL@ z%lN?Ob&Ja>n>`F^h2p~u{%D3jYQ#5_sn!!%-N%%o0v-+i=}R$m5(!=LA`;|mrw~~w zO!uJ=5>U^spMLEJk7mgUp#V5sP5;qIzz3x{(~>!3;S)M`%?j?|jI7@-{hhJpk5VQ9 z3BRg!95?@(*A>7rgegYF5!6%PBA_m|T1W0YFipfU;vE<*Mv_RpaycF=I7x!9j0UJ* zzW-WDa^Ecdn6U*z z0_yf5`BU$DU}-!>V^VAF76X0N;&VVHMITHH@kZo7kl;TyN)pzc-;rPy9Q}x5C>q?( zwc)9vi>IT4KFpzsSCn-*!E}Eo(=T}b-Muoc{1$`N?(uo?#Rr3=uZCh~e^}@i?38sm zEvQ)_idte#zmbkI_RwGX3tfb7W9XbdY!FN44+-Ttx2PMr=zO)9>8C#Q>ZOU9bry#W zn~`)FH;K`}flXLg`0p&=md+<#$Re5@!5!0)2?hrixAC_@Y{OKd=&p7KlY(Lnf^+G#_&*;0>6eIyagx-+*sBl{5Cn43 z7%z!PI!2G4&KL|P8ekHv5XRR2MEapJ{V-!Qo!_FSv(bR2Q%AJQXy^N0MU|MB#yD9p zm~FKrg&BK;gVlHVo(={5dE#^kHS|A2gmtioE0jWzwrY#h3~S^}J<>SBA@OnYe^;oOj(liqxaN(;D-ULOe2N7E zCz6k63GC(Hc0LebnNBfDPs^@U`~RmqHX|E-oR1ZgOfWV2kl$5bSe0sPXs_+yJwgU7 ze19{+guh@}Q@k2?D7&G1_R@7T^5V?exNvA=dtqwx$kBrVj3o!#{4ksR=7J?N744Za zOLKCY#|wRlULEz7wAxsZe+M?^WMzI60Lg7ch7HP(CS|6Be_HSOT!D%1_okLcv#Rp# z8aK%lTbQ}{1ofMOt!U)Pj_kYwmNu79rN8&wVBZ~2RLp$_kX<*UIX^`f+xd<}HF&;( zqbXp^%z49#GfL@0&O0AYDuxM~U7MQ5U~@{+x5q(%^8i(ShDE~j10OgZ|EEZ5oi}OL z(*WX!b21k*CAx4IJ`W zkW$zwkzeJ{%}LWG4I5iox$(ZO9BI+w^9D)ZiWgpX$-!rSFMaVcIf$3m)Pl8{pDzG7 zi;AO}d1cH>hE|45%IG&W92>?=T6xv8^5>UBNh-20v0#Wr)#f&z$M0${TK-Q-a^?s^ zs>2Ga_3hn%|E-|DsiSMk!bP-vL_AI6pn$H1ya{A)OxO_8zKpb=Rj}M~lbV9xm?Cm| zlATFm(@l~jujgYd;*TG>C2VL}xrI$SQd8+{17Lr=^@?jxK5$^!(EmPe&RC<_m`b$4 zC%Zn$n63JcTsn+ZteiMXrFC5j$UeDm|G|T6hxN&JK3}p@QTy`=aAn)W8kWflf3a-} zQ;pxaIlv-Gw*s(lPaM5q!;JF@N}sy3lCZt$pn!`e7-JaAtZqzY*@stN!r12PE}KmY z1;NXn*_orVhIWcEhi+xX_74A@V1&oCan{ z($AXtyjQ?i}sJLy8> z6fkuLgcT2C#-Eg=i+27m;-oh}BewfuYHN8Nb}uL8f|zoy^L2ml>m#J6=gcZBJGk}oJys@tkjzGC z^xko+hx4@af6*`csq}}`0&eeVBM(bb7xQygyY2Bna_`_8RyJ|U`k|5(@@xb!IrU^qHJmu_!1QTgqx5A{n;f%Iy1|sc zeCnhnto;TWiGu=lb2F0brd~2^H{0A-f%d86e7ww*q#^R?KPcUarHRM}N%BEaA3|2 z6n*|fY3JY#e{ITubg~>1DB^A6YeFrQV$CDhu1org7^{3(T59KFm>PK~TH`I|(&1XZU zp`y{EXQki1hkYo&ly2wKr0HzLyouctCN9{qbM=UC2CmH7j?O`KS*#TLWv#S~8ckjf zBauRTyYy>G`qs){-Ck8!nh}Yn=aiIJ=6o}7B^TF}<@+_p!AG#4`_oBm(7FXXuH5|- zOtb&Y*aO%Gdr8}O|9X(`@(0f`(>F96@vovxuQ%d%XAfF<^+R`V>YDMu7oYw3ox8rI zPwPM#2weHXMKv5pTm#9Irt^+8tI@52VHiv^B`x2%b#iIr#V`K!i{E|sTa0x`()&a- z=cQLS5g5j7GB>^{e9VybG3}mbyC~kAhUEcG z8X1z`L@+Sp;`gL4t~vCwJOexNNL%Z$r8PiKHg7-mqle1zU}dzo%pl^9b#cc2!9dOhYHTPMW6bnoI2q5nA!P2TJhwBOuJ+4CR+|gXsLj^$LLc^LYF#2b!^Z z?c}`5Wu8Ot&Zy>ADfkY1I|l#HZvbF5GuSP~Ns%G3FC*s{w#wntM`2x6UYJ*PPanFx z;^G;^M2B}J1u`9V2me?4EJv4`}oV<{wUbWm;Id>Ub@*HE=tjE@a^AC{RLcq<8N@EdV1aEt8&Yy97Z%WGK zXdu#l^wMjVUwJo8w{QO>-#pX%4XuBr62IU8PrZv_=gDEbgu3p$n6atHW<#v4Dqt_e zh+PbWB3wA}9xREG&iuxL8cT853CL+FSO9wK;E7c9SEJTT(U zvrgm3JxE_juPT3%*3^w-sv?fB@!RnqY8;=@k|)XrHO|#*Ee1HNcO?}zwl>u@ki*vg z2OP$3NJ_>3x6`YX6NR-=C(g9WnYDFVDm>3Nwc?bYODDs5)$==f^+co$oZ8_`CHz)( zaYUsLmwOe}*fwp(0k{lqk#xS4B&XfQ{~X?5$kYa>oBIp$5|(%{M4UJuK5N0HW8i?b z;5JG6yR>%*Qyq9gBooKjd^m!|i*tdkF5&y7w2PBiqfXY^cUG1hNGN_+lRtIFp{p7q-t`kkeL+DE-lK*E6C3zuj~d> z*{D=%S*!D^eY^I#;rfRM)2Tj%MfAB`wFXm=PBJW5a6xw|KQ#(R$GMg>>IW<={lEXzuAf_EWYE>Zc2W7-5 z5&gTXoXIkCStC)qi97L5pR;ZXP6R*l`_D_+tZvr(OWQ)_cHdQi{U+NYFBe*%ak?Sx2{{gYUT3z;~L@Lx3w!Q>a((>+io{FBZYPC z!`iBga?+DsG;I4FEKZLv5b&3#!f}sQXYggEyO=A)v^6)~_hW!H(pBEd*oOMCICBmm zO~+by@49%&*!eq;-+t4vYmQxa^(D&}ESy?jGn)9Hso>`#nF*CB1~^{!hc5S{wD%Xx6kWxI-M?ma$3xl`PqWR^BFBxi`i_- z2*K@)-k|qnX6X!mljU{})}72l$F6#%50%q=;D6BIHd@T1Mni(}b3So~hrt?19vro3 z4S@iLMACy#`ytsxaCo&2rK}QnR(FdX9%+om^q^L&g`*ZnRwf*I`3PNGA?6u-`1m^M ztxMjkLuGoWnyTb}up1lDwBnr3=<9uC-@Hv1mg(UE#u$0Uk;&-EfW$GCkhkX6M{hv-lajRm9(GU~a_caUgV)M^&*WwRPH|p1u$C-I z|Cre|266CT6Tbh+ATUaYPT?4=8rSJwD|j{k;i85=BX34YS$2Bm=;^bjjjE1X^bQ;! znkx=7)Br#jG7S>YBkMT>IV1DJ6bj~KWasD4z5U|+CTW3()u}udrm~oW_dS#5^yfFU z4Qi;Zsceim&x!9&0Q3(0q>(r?#xS)g-R6%b`2%;qkYe+p~%_y}|IB#RdEd489puO`w)*pXt_OTC{^DERmk^-$FoGA4}2;du=zp-cPVXH8j}+J8^t-0#(CKG+*Xix1cg_Dqx)e^9NVujKFD4f}6fW|; zx(`&X&Kv7ti*BEg;{fPys?x2EZ)YN7fE2m4W!d%bJdB5P2-dm@a|Xfd;`P&*=LONC zxZ>18_()_qIG6aB$C$@k)EybJsC_a|jXx!AEDly@?80AV zd*OAJjFOt|Prvnpn<=utL2FDIF>NlU5joed$+=YeR)&{YWe?7BBlfrp!eN$>u;ZFD zD)-A^Z!513CoTBtbLI1CvRTS>Q24&rbRb?3lmAkwqA*&tt1A&8BXR-gP39|og3g9aDn>g1laE!%oPq- z7qY;LTkrS{B7RPz1f};s9-f`-JBXCVIu(?3Xv%nzNx6>glz)g+MxcMd@{ z|C)6W0GGbT8lMuypOvn7;_HPhe*tWUg$~Uc+aesiJ>0@ZEL5t`AqCU>+J(4|du<(L z?$als=J||z_%3d>XYf@K%KJseiVuW_VpV`&kS{zBG}7`iHx-&OOf^qnEh z);d@x>wR8#e=h!_{1lAw8umNDRfCRS0&Ha4SUb3=wzgF2T=DdcWc{-XnnS!^dqWZ% zKA7$KKFB&z;!^Y593Z0oGdakk_k?ohe-FL`jUy{9HeRUz7o)`sZJc=@DuwqmNg7N# zaHy3*X0Dc`2XRL75NZoN?T1aThIiV+b?-{=96b2a%q{C$KI}!xkEk4lk9bCykV$Ek zAa9AI+IiNV?d77%&FpeGbgB%h`t3a{X19;WNx7i|Uwr`81d^^=@%?+Iwe!5CnQGki z=1@U;*mcpa&0D08KIHI7*U(&3y6rx>R;%BW+WkZ6$;dfRx7{h>fX_7&m41g- z3E{q@#an&s;*wc?uk9NYax_R!e_d z$y{@8|Ez6ghjcZxq}pm!Ig22WxAQ8`?bR{{Uy|f+kDd#LI75Qp9=@_v;f( zJ!LIb8w%J;pajUL`IM3Tfu)rM9V7k%2Tv7qKfehnQ&*++!kjpvWj1mfYfqi{2)P(# zApW@olgS*$VY+(_RG8MRJhwks=>f@xZe3cJhF6Y8R9+M$!@ljdDHVKWi?w&AX3pvH z1hF>vG?b3%hLuEkRNxBG5R5j=+=Ewp_FU3P{SGRdbtMraHHBN^&fK1p5AWfqQ%&B~ zD;6D$YS>+__AINxKkCN&>%^#RTs}3d%K~j9Y~&6!n{gZh^euubz_oWl+qyqVZ>G$< z`6|Jl{e;ZqDgjvkn3_$39#+Lm(z8cNLaR-zXEDXt=UoZC#4@+d(lcXD)#eoiBvhH^ zihVPy=pp_2?PY}P?vs;2Ur0ixwc1P$XQ25u>C=|Mj`qrPBcx@cB;7SAi{vyytwgR2 zXi-AV3a8Rj`5G__iH!U8%$?$#I+-g+=K6H+)VW}B#@_o`J&ts~=zZyz8|al!%XB8S zW99+r!OD$o=oydh=fy(kn%Po7#cY*TZ%finJej~)022J9oNm2G`Ye*1!ZMrNshO0P z7U`1l>%*HQsc6KNAk;S4`fVK{8}k60oc}8K^weE?^0C=0HqrgqJT()-(HEv)R>HHzsW@boLtJLD?u z)Q6aZoCEuoA1Y=&zxqyZ!8i?-?{|O~mN^}~)S_?w5dO7PETyV_LKQ@OLDXy$4%*-wwzHkC#Gp<}RxaaDA zgTxE8aIda-bi;;{UWu~w?j`dYO!~|TTUK%YVQkWRe^clI>3h>Gc2#r)=^_U+q+c>p zXk;xcE{kiyFOH**bTtd*+58zrW%7bAZ3!5;`#^epo;I^(%vaLZ=II;%Bz?JmeovXxUmSwcq)>B#lV!asp8I(<*f4>9 z&rafsjq05JgcY3N=KAZ zv3Ypk-whciq(4RVFL=184wQlaiBdyLt{;oZc?;LgpEr}B!Gr{kxp5mD4P-EttMPI? zJ0N6P$?;|83Q6+5M|fhx9{&WBrr?SwOHS4roh*P@gl^;(E5LZ@U#P=pA1YyiYRqPB z`L+U9GoihzYZ>(w;eJU1N3lIC^KflwBlCLUgVF`8qN?mnPhd)?a;x#qR!R%8s@HwVP@l@i z4NG2GKQ1aqCsZNA<9w+@tu3l?YdpgyPbo38+z7LVvGS2tz2T`peis4>A}t3a(RJdb zX*c1cgCRA43$_ZNNq;`aTw>?~v2na|xLv+#IKt3!87CEUYy>;Gf#J)opIu65z+ zoO2G-)6>(FbIv(1FbqQ&U|`5O=O92J5E&tY1PEc2k&vt`lylDMT2{1VTaMSZY|Gca zzP9Y^^MAYQbkAVlx7H8;6aRYa&4xLBPMr$7cB)Me)7cwWW(q7)j3$afEz}4ARBaq7cCnk zRc>uPMgiyk@Sa}yS@=Ka&pk*oYTGMscaLskj~#@0CGd~<4{3L`xWBk7PC~bbGxLh- z+Pa>SzDJ^wJ!(ni=m+6v0u-j~lHW_y<1PG&X=dGWpv>e{dS~q=`A6)jXU7h(qJc5@ zJlwvLxz?Ubp@=>9<9Y3~zqcldHYuoeX{r<+Ii+Db&!rn@s#S-pn=8x`;(u`2sjM0@N z;Xq#LW6rV~HEa2DW6*v5+dX!5X>nI4;`LAuCSJPK1H4%gQ@nFCPkg}_TS;wYMSW*O zRqo+8uRhcG=1-)*%}bxcrkpsme8ct`xn`R~t5VvP3P-g5+V>f&oj0y{j`Zv2_ts^0 z_LPRzUB5f6Wj{feWA*xu5yEA*XO?#_nL>z|iY;kqM=uJ(%}u5G*%?kxXpgjKE_+K- zY&kh%V)cekzJ7^B*A0$YtjwuYr*zF4&1T#=vA{B0ng%J#Yb?#kFR%X;ZR7!%=661; z1qZDGgQYoM)>@JpoA;ds6)}A_tlC9E#)zi;m=i5+?kTB@vHO08T&{Wg$fkprwZc|8 z$$Z1%(Q9p)-7pDvykqd#Yd^*v8k;`N1tLYdjskeg02QGewFu;%yOf{y+CPyTp2*>@8SjVU!G;w+fjI>}MA^EH;! zS4n3k=e4AdbgAqk7f=(BzWWtPYGCeJtMkmRs#cF8IJ0ZoD4-j>qWK~Rz|vZ4b2A99 zmibj6+FPgnnFrR~wvs5q0_m^?29QgsWNDtlC5?tzx3I{*jZv0fxpT>plEM;8*DQ3( z9>wuCeEy9oES^AC1C|W+*0#aaIee|$KvQNH2%*~2tZW*hX`TS=ZjrWujor89Q!ZZ7 zwAu7QpFb@>O=Z1>oK;GLT$oD0R-A%u*JYIQ)ew^f{((xu(%A%{{CC0)G}Qt z#q87S4S~FTpKrm&5$SAtmyLtL1!QoX|yiYi_^^F3`sB+`eK-X+cgDb!E=bzIeS|4K*&u<-)-P zxx^|ofF(T_QWh@{xx7Y)x!~gySjd6kjN-hUyt0t3aK-h#Fu;R8dH2?y zj8u=-?>E7n57cKC104s7)Nd*bv)Nf&rLTM1%NTo1k~VA2fv{KWCBy6+J!fL!1)!6R zsxkReXp|SABPnAhi^-N*QIn4qPLR0tNgXyzv}5VKuF|SF7Rs4(iV?@2pXCg1FBl6v z9D&(UIEJ~GNz%gRHl~9mI~MSVaRiitDb!{2_pj>`emas}2&l9syThy{M?gV_U4>N} z*4lAJXj}}eWNl4w>cp&yIb-t!{&)e*8MO<{6UlV4)Hkv`ShP8t4>cB0uO&PiC!0%B zf4)DbA``2wG{K2G_O7Zax+r?7bY`Q$Y&F4`QEH7gAIzZAS=yXoWeN{7_l%v8v2I0O zM~2Db36^Kd70$HzEuop$LXOH8OsWH0H9dDpzcjHMo)aVGwpe3pUTQ|!w0&DA)RgtY z8Mtr&y&S%P6HA%2Sfb|hI!rpf$rbRKJULNUrmO7Kva(SfSs$0?WRAmkv zo>|@1i+-y~?QdTHE@SyBos%RA?8UOT?NrI{uHE*z#VA6uJl)mv<4 zgTZ9xYXLF3pwgQfnq38YoLc1$_+A5~+7*C!9+Jr;X{gJKxib!dli>Xz{;K-Zr zIQjAfX8ZkG)+9+PmgY-IFD&UC80Z?&(Gt73^>UC?>48?vU11rRT20~uGtZQ2ZB>b< zC!IxaU5Fq}9y4dmEjJQYEO)~Ov_-p9SuK_-sqOxp$_Wd`?B48Pu}AB)X`f03ZET#k zpf*3NyP>Eu&&6#JPk2EW!t1x&Yr}Sv9?a5Wuv7*6tW#@B%8DnX>$S5-P<8sSEI zz-{*GX^2oT&TML^fX`?Tl{Am?H{CrNW1$;+de|9B*>7DHX92Zs)WTL@fV&tMgi#{0 zBDJIe>%Md0vZ{2zO{G>cgUUN&abKS?aO&+SeZB2`}`*nNvJzYEwX^&}l&ttxBU;C}N9y zfS&AXs=e--*B0}f>%Ul>mRg-1N{uzoUslP~)hnV1pYY9D=GO1Szzf!0Ft##}r7y@S z|EQhWddi=;mM6T>3pFihW_@lergaodqX)eV2BmvcSBgc8b_lMH+Te~%xGEpfl%DR7 zjGDr!1MOw`bI;tdDb9-blI!z0W_6XUY_MLR zO0(Q|GUN~}aWmEJkOb+_;?;~{NW<8@Fok;N7Y?3iW>$L0p;Cq#`=$=eTCixz@}+YJ z`lk#`Y-(Co8}w-uqzjg&k;uBF(4y}8023jGcx9jc8 z1IO0Fi&f}#8lBov4yu&q6{MMc4?9>ioRZbun;tG2g)H8Bu`Rmu1)O$xux{Oei|v-s zo~b7&*;7x^A;^zv1HD)DgFJE#&)$f)-{cpyK&#in+{Bg z8iP5t{WGgge!sn-BQGc2sM=i4A}Iy6jnn#TJbEb2S*dyHZVW%OVeNxbo_Y#!HP)PZ9v^wBhmV zH^2G*_nx`)`u$rEKk~#4hqlij|K;O6hIY!EFjn3v4L;TkTYiqO-E_H*-Z$|5FDXJ? zlA`$W{bE1syA^{`V=wY(SsnamlgXI;;)e3p*|K|IkWQ7STeWF@r*6CLgRkDYVb$6k zbU5lt1Cjl6Fuby`#-~s3V3~iBK9LS-T+ir8ZT~3$V<-li6Dw{@w1y1wfx3cQZd-`^K@1whSEIM@j@S<_!x(jt< zrm0x^)HXw!b?ON8&c=KtohfUB=H5tn9eGXq;=}i*m4CgAna@sO<)>)z;{_vy1U9#u zu(8`LuvK2S)8%qHJ!T_jyYnhr>L(q&b9U$9`FWYO)jJl>8$V)f|HzWkF{z z^osCNjNWQ6-yumLmu13L%SOzEnFMT`6)!I;?;77bvMn`kWQFN0+})CPPFkUf-t^hY zY^A4VMNwelmp_)4v5x8E!AzP_ds%w&VD7&(C;u<;Or}eiR)bP~M7kYqOsHmDh{fR< z0;VpMvopJNVHInpq{M8~z!l zz%T`dDKJcdVG0aWV3-2K6d0z!Fa?GwFie4A3Jgcy-rob=-hAA*if&Viopb*RN6UZu5iRonVt#~4Q$S*#QNx*q&6XA(pii8a!vFZqa zLqU~%{$CbmvhsyF2HIXxj}^aWlL;4^ix)vyvqSqW{*Qp@lMs}_@k(inCVxju$ElGivg0fO(sW`;t{$)RjZ+F zfgs12er3QLai_uh9=g*JUWln=DuK5a`vwmMsxoj%q)U{f)9`pAoE8eHH9&)SPuUP% zkUvKznHD)w4F|E|)Fi9{gQ1j~7Ud=wfbv1u1f9=|hd5ZSK@o=lXpAKs0I_^YOId@` zGH|*it4MStgcH;diT*i9k(nIIC0a%Nq}3vcY?Hq-p@_E}1QJ9QUZ^NpgY{fWEdNS! z2b7>Uh^$a)v|25eL?$s#X8pgsAv(&as5tp$lJo3_ruvrv{*oxDrB`Yl$vvk9vJ^QY zT+T9ChMXf&c~B}6f&U}k38D-kELW>R`tcaDJdmm-3X?enqH7H%qgKtinc%ssI>ZFI z1gIC06_orR@sN=m`h$cjh*7fAa)BZWM`#d93M9f{ZJbu12)qQJl3GKE@CT0M&g@qQC5XFOB zlo6_>Q4V1aqR4p*-Y0OBF+7h(iFcIkzlejHXu*TIPLkYX<6E-mI=MWKB{a^`vV2Q}&#~R4Q7_Cm>#815}O-r6C(H6J2B~GJ+P8K0-|qB^g-| zG^q!i-pE>%hxiFQl0f=@`W2<1Ca~*6-$T)X3)ZA^Uw~?gCQuM?uzavlj|NDo2HE81 zmg*)8h?*Al@s?3!EK^_nB;z1i7D6YqpAzu)T|A0BxtLP!+&`hj zzze*Db>pE#KNK$~;WU8VgLo1j2?Oz$M1|s0Zv28@5sN$?E+7U2FA9b6-MT4T$`0Bj z>x)2z(xEI^Z4zf;G&%N&#j8iT5L(>Hp}0I9mwR3jgbNW%xM`4lu<<;;i)wNL$e0mcLOrOyyorJB;KYP3 zoY!cJdI;YV;y}_nL?L;u2BYzR@L!QL35rlmk_(wXoPWZjLf$f!g)DP@RKUn6@U^Ji zB*<@wE2Jrc4HW#nc=6ecr7SiFRRNaQ1a2?;*-l z6u^e4l+-H35b{ipFXy$0hp2RWQM$KiX-DIJU>VrOczR~ zOx*!$mI*|pq(4C+T8JT~;&_u$BcO-D+Hgw28p9lt;TpoUL}igLm?LuM z=}AYRkV3xcN|{1<6A37iYy?S`;EzIhU{^sjat*bdY!m}@O6p~5+r2yym#ORg#)i*m6Q}Et_g$v9g(+z6NO_$aR&*9r+=yqr;!|!lNCKp`i>kWlYEiLkp|&Pc>-H5 zPsBXx4FXTXWN0S*Stdz>+&m4oNQX~8h&iD`ltTi@qf!kOq`?FnFHuDq8Ol_2w&iq0uOV`gq&o?rtPbd( z1COEdMU4uW%d;oi5b4Og@p=hj5k9hXa#X}c<~v6g3CKO*W(_zBr=2r_R|c&l@`sM_ z{1TK#o{5SYEHrYB%Q+A_0un^W6X;{HnDDg-ArFv?B3!C*GYtdl`!7QpdHwo6>85Q|qKVN4)~D3oXgD?uI+omYZqU_i!|6X%~Vc!z8VM(^PCa%uN2g?Lj0(tU#xRXX#{uV!Qg?K>}k)n~lLEW@^vy2z{ePmY&Sx|h4=R~OR zKW=&w){K-sDIBtmL&l(lEF*;GpfHGvB|OFR@FEn6(qtcGyr6@!X9#SltxDo0k0$cW zWmkt%{u6LSC`{N(+$7NYXN)*rq;ay%5+qDAnbQFXkVnX0Y0r7Nat?|oQGDLq9;i8U1#@0xOM7tsj3sFW1YeJd886)a)CX+uQ z**lUkUjlvEV3ie5evK7K9?{54>hV2jhxNK`}+EF+{F-O*n!)CAR|nG1;-Da0(Im zs2HJ1VI(skln%${(QYPSCSfAu1+qd3C>?Mjq2Wc!!pe~o*?|i(dC6*UaY)#O1fh7g zJc$Sa2K+|W6p=L*ggCqax)@Qu#lrhjMlK;TUNQ!t6b8lwTD-Q%5#yx%R)`?@kIgeE z81Xm{s0xX=;ssrWOrmJWjc8D)y^?(hQEMt+NGXT~^)Z;>RslcWp&rBwM_dVuM$HH1 zM-gLOu$VPH>9Z}v?$LZXvp;!G$QAU^z(6u_yMK$!d}qWoa{1!jW=)KY_cqGus!l&mku z4FwTWz#rA(Qei+Fg#R{gBqiXoP@@Gu2VG3ENF-LMzZj?)qLmy$>Uwhi5RZtIqCCoX zkQvB?UQxURB1p{Z!jmN~^pa!+6sx|!96sh1J#AA9M6Ti+MIOA7hMN%kWd6Ugq#zC0ysi4 zJqI-<@s{HAyopfcTHvfZlhtmuT5UK_(&%wneL=t5K_`FGNUGL^?E#EJ{y7gd)MHVK zqLX>j4ai5KglkeN&xFh2w)7u?RhZ$e!+HF?5 z%WZ?rHQVsHv>BF+j1C$Hh%Y=_nKD9Zcnd@1fH3mX67&(A z6G~^GsY$q!I-S|f9e*RX{~(DcKPNpe>UBEpR+|mf6le93F*I5%+#`ax2?mn8iri#L zLe9c~43YZ;YW!C)o%?L!HbFDaNuo9kJGE=5xk>|i!G}=r6ZWXb0lV7K*O2M68MuES zL=*(&vY+Tu$$~}w8zQ1ytc=jV*!Lk0W_R%cASBsPdHXCh?Cugo;gHqs3!5FsRD zKnF=g=m(+&@j(X&!VM17e?3IqY6QJ*`@t62Kzh=p|9t-vB8%at6%G8@XPBQaZE6q}Qp`dWD7#hmDuFPF;WXEf2r&%zY;>>#K-5fjb!v zs}a+BYJQrgiC7LMTR}_CT2U^_;a)_7978bV5eN@Lh9gF+7I2>&874!ItqG{kkZq!d z)ee3dw7^wQ?IU&&3FkFWT6Or2habQ9#O7%YaTkP$8aT*5=ta#Gi2#{5Fpoq9H5Ky* z$s&2_gx3%pg^2$muC5@6b6^aJ2K(CSTGTOXBC?Cn2h0HtbJvKS_22$cB9sNtL zIC|>j-o+zw0$!Ji3MPhgO{W@DWzFzvjiTZSJ*29V?^ykxxO_gu8-9h88J zy@fWI0CpB`L5UnN8yJ$pZtIRMV}fX;g*#MAX(3P9UbkJYg%7%DLkf0YgZb`JzDW$faOfgw>t{QR% z!b(6AsJ^5U5OWfF9Sts7tdJ-rslgK|yxESUbyM@o8^_FEwrzXs??mMz+@wi-Weh;$Yyj;6gwitsb z(H*EEkb$0%9|W&a25^fnP$E?#l9A)muc^F!D>2ULeS zgC1xM?#!x=DO+xP;d>vxaAILsZAlEL_G04`mkYbI_AcF#sd6xJ={_x-h^1#dv&lCpSXfa?Q*9Z)t3#bm-9BlmpKB6Mfeyu@AJs_sF z`eG%W^AA7&i(kEc(}wAzDzhRUo6~7A+AXM>%>)a?X~Hcl5VZ1DS%U~#sUF7Bq9U{KwKhw0J~0_L-rG- z06m1L0)McLa)eLlaDy!nUxNXiWimyOdj@Y>HtUqV#Susidt9DCtZ~^*FTeHZiM2gt zxv4gb$Lk`7y1gFkYC|~^MnkwL^OnnjIjTt6r`135xuzo3wqJ%NkK zv4e_|jaOqcLLjSlw_foSPAdG}E0>o!>|P&q7*+?YKxM50J;6QFPm+3Jn-Y|yci=p4 zCXFD7!zuKJasVHl$N_r@dEKBtq>}Ka_#}Qb9*sd7Mm-E5f-w$we&{7B$d$v+b?)@M z!j2WUd?Ee)oogngX|xUx@lezqxe77>4PuM1p9z~qv>~;GWfG$he2XzIo>VZQ;Gib} zWkfi_uNPUG#QZkKN@y~O@S;&O`UPSdY<=oW2_msCuVdY_-$=iBXl0qx>T)KNb9@q) z1l_0*fXxaPlJkakIEgV3f;&pPi~%py4)_^`EONYfEd|X;?%~X+8J9HN3#`cN)!Wfu za#-PgfDrcFDaYRY`u9&RFZLn`;ZCAE^5oeN4mKxk@{|OmBt?_$FU&S}J|hX>iVOh* z8L7=N;ed14TLeJxISXly9PSU127FY~hMky2BvKIoFEk}eHB{D!kv{0FGgiOsfuH~M z?6h2u6?PV6B<=J#2t5!$WCtuclZA*w;>DI!aOE()$?Y+);CusSP$0;alMq868rqp4 zy}%0;QV|~sn*_N8W^x0}{d3qyTd-`=cYpBX2bYwFJXW&<+zZpny^XtH1@1P2E(!+azy6`*bv zc~Ue!s?z7Ke)^}MKe)Qmi%PpZc7x8$%?66$Yy>CJd*-N6AAwNh*h2cPu$^T2c;|*A z23)wTQzC9WhKMNowd8*&n9(ZmqAn>h62!xLWWGX;b5*SYLDAte#;4u>$p<&jt4e_Y zK|X9wyO~S?sTEa~{n9SiGJDSg+y`Qek-EN;&WJJ3@zpfkvcyy{rod)M)0%`uNVl%5v!hVp8fY70|6u#nUmlFosU+*DEh zv2h*bpR6p{D=aBuKnvgxf`)#eik6%3a-vm%`@+k{ZgtnxkjZFFaogMhAGWrJw*e=E zEI-kT z4d{cpAUIxMC_T5RI43JBJsJu4gB}bl*|lb`2m3!WY}^_cID20vZn34`s}0_0BrP=r zyN-o0I@-5c1{H6r_%moo<1yBp5&ctpYq4#AWlMFs&Ezlb?4CArbbVz>UV6aEB^d2F zb=83oHaJJSPgV||7@9M$KY%k3VzF3OVR>yyT0A>7HR^JNQ~hv^z1W98J1;Qf`U4pn zaxLnA3e2vuS#0Qr_B7k)YtkK7hcA@V zHDOZU$fmlg;_S3=Fc<(XaL_SS3PZ~qM5u-{l;Sj!jR>Tq#bS|AB&(<>E1s7X%S!j! zeW^aXpY|J9wZC)M)JUwhEF3O#tOjD=Z_;Uvb|)GcjKI=@@qy37Ef%=v-%1P>n2X=X=!e2YplpiPfZDj{6Gv$wK%L84@6yUuqrkqd}U~e+aF0w z3%Q*(cV1>V8V<%&T{vja7jlw7YcG2XSAVL<7b~v{m;DHs?V+85bruu)tO+yAv4Q!} zX>c)H6e@B)sZ+0uWam{hj2Jtrdup3MzouiCkL) zYz8k1wbGj2G^V#w!E9O^%pF`?jW095qGJqps%^xv0Szq+&3=CnP+6H4jcD=RoQ;zBDobw+#X_*w_o;oBM;9EOz6 zfk_MZ?%lkqmE>XOyHWOs-+5v?+I{FhWfbOQ6clDef=(OsKeKh#rh_jLBc6TdH{U$l z<8VV6174>U229?_dGi%4t3;DC?y9pmHH8W-&zw={L9w)E!Rsn0uS_xfXMZJqBmL*q z*(%w<(E&s(s%KMnQ3}RlKkDq7Szfhh+Nen-nk=!yxp_h}3=MNy=UhF8jHtz(mK_TQ z!#;&-nIZ|E;vi;YAZ`}({roF?0E6a`1BDcry!y|Gh`jLV_Cz3ohKf< zcSAqzN6!Hi&%O5N9+kta^b_^5qjG(9E(A~IkEP_-WB`3zbwkWPu7T|ZVt06XeIkhG zm>g={xW+|Es#do#(<&Uz$40Nvr(=6gfKQ!~!A40^cf(}1;jc?Gs_M(~^9xI>YDO*F zeB}xQ%igt{#JjyMKTpT9=U=sZ&(77;%Xw09@#$8)I)ZLN3xLs6lv|diWWK1^*U>^p z%xFE4?rnE0XbSWLu^;DBBuanT%?mbm=5ZwIx4-nex7QsxfK!0~`)QsR>~ogWZ!|t7 zJ*h9O0zdnT>d}8nEyW2~9ew>8*0_5uPDxUhES?ChVY&NH&2|g+Q}zVt8-{P}cPG!d zQ7hu8Wr3oswT*Wa+vS+!wZSk>t zDn%C6)tCK5dexkYr;1rX`1l&!rKL`I_P1}`Frjmt^eZ0iila+bwRP-k!U@2a&xt|^ z@|$qadR>}u7I-wVi^m$gWH*dV3;ViL*_Kt)yti5sl z8&Srw@yL8%5!&xU*JfNcOzbP}1h zs|75~_A!l1$CUmlJFnZZqy@)q+lpJ8Jn7g0oR^CzXt1=tfV=4ye%Lpi*i7IdLr6XV zO+0C|9Y$tZ|m4;ogFX5YNIU=%g?Mn9;yNw_qv=hLcA`M&;4)E+id?6wH<4 zQb$%zE3i0Vbf6aomeP0Y0ph4WV8nq zf^31Q8K;ggQ%NQ|ANKSx&dSP2L2!Ah&goU*CL`{)$9=)t3Z*uJXV3U30-2$uf!jh> zm=p24%Ud~70?dB(_Q==+=}1*xQ5emd05=-t0(6eB$5t?MB4rJLE2KT2VG=!x<@Kt4K1`9Ib&jt#@8dJ0$M+7O&@eH#%+i43qjd2Upcf?%M>wW;onqj<3yfz`+54oGPaB!R!Gil@IPQ#>G^0 z>aR(ybE;^_UjzGQrt_R}A8SRl5ZFxTwwE=j)!9p?mY{B^LjiS90_?$mHe((GuD@!Sjdt-$au^lH`EF|1! zVvCkppfz!-2#eSPxoN0EMpku!CGOWMOg5ZupPHK*OY`IGFjaaHxF3b8`%>ehm$ip{ z%Rjn#Om?c%=?a zqx)wuNL&$1LpoZ->?*0ndBxJ&CVj@tbxT9+Z+%e21YiE|sE3EF%G z`9^x?jEvntM}NThzwzesS?vzpsxQ9y@WkFv{=Ppu-!U)}Wbdpm&YgkN+gv5*-q^5g z;VeRg8bD>p$y5{hBBnmCVFm@RzZDFL;wBeX&0x`M-5{dFR(08$8O^oL4GmS{SZvaY zip=`y#Y|C_=BMM2Y9QiirDMzTpuk2y*rCgVDXD1a-)`kcId764+55&-F}C2{L-RZ5 z%fis2N3-cg~-`tqk|6 zqv{GJ>B~)zCy@G&U#-m|Ga1V1*|Vx^!jy`PXh_9!SL~h`%k;8_mCe`e#F>iC4PjHh zpM|`}5#L#mX>_9-%(PNBW3SJHp=PrUq?GK6Z$?02W8Ie>zj{`0fQ2^JhBG>+7HF8O zd0eFKR_Xb5l`3W_EN0SA=^T5C&H!o`O-Y<>E{37Puo({=7$XU{IHkKXal2x<}>eir|a zK4uzN!3xJNUse!n9BOU;e!y@E@g*4;?$T_rM+J9=+??X(M)Axp(K| zpZx{z{zR9*ow)w?bMJll`ioD0@QYvm>Ff9Z?Z+>ixcvIZKEj-Y^z1u-os(xWg`hRM zAAWu2Y3W?+_QR*v{nxD>>dIyNuB7arK0Gb8XZqMa>Q09i?%KD11ft{>vA?@gu)%#Bz z**`TFFf)fIH7C_mT2(i0?vmxV-MV=HJ1^gP_Q;Koyzybw=FsVVB^y5`B&4xa1iSXG zPF$y4D@k{wX)zSe>&#Q+HypU}JGmN}Pc0hWDe$uZ0 z^j90Rvw35!GMe0VQ+FOeuw~u0D`t-$(_ESdE3Agl8|-v2Dyo`hvHCeDRHjr#uyVxO z?Q?3O%jp~EFnix|9Bxl=u$6K~%K7Wb8DVvQ{A0VOw(Z@DwXTq9oE-9U+OHhT|#NR62hjSh!cdm8*px-}|&Zl;d7$uqH(P>91w zo`OBKIH2;020h%`UN(FBj5P5ZK{t4mPmUPc2Q$Mc z72WI0K_7$Glx5cBj-B0Jv-Z%w1IO=s^QW9=U*CVz3om|WO-MMSWKd)XKKxh66q#Lp z*tPADV*X8~sG>W2*6hQF7WFo8SB&StFYiCF6fuLbc=n#Z+ztrZK z$f{OQ183XdP(dgdGsm^=1y`N?x4~SJcfUHf3`bx8GiXU7AETpYRA3rYcrCGU1xGjH zHrl%6u}xOyayMd-hK17hz0bb(%)|FR`0_iiKK9^q?}^AsR7h3tylLOw#ZzH2Ifpq~I+s3y z9vz6~cuJarXCb)i6o)DM^V1DcW!s@Uwp2pC+R-c%I2a@)J%hzBKYjZnFaBgmA$0rZ z-gVd9abn9AXP>-b=c$i>^6B6I={-HZ^Zd;{c{wFMw=T|e%peX4`)G|!>*d}P0<1P2 z77sG(+tM*SSp?}>(9iT&ecC#qv-jbr*MOaho^}?2C>NBrw>K4*w&rGznmKjc*lEjm zTzkdTx^N(94;Iw6G+KbS3uXXJYzi5Iu>zMOw&aQblBBQx{fj5=dg#{8qw=%$?pWMc z-a4|xiF}iQ6k|@VOFUu@2eWi;7qh%7-G)HD0mx&{Non-y)t)V1cFzbOeQDZ+UF#N& zE=t4T2wc$+1Uirjr;YMJcZeGhlg~iLiGwd3ERGqfp%gQ=9e?_+z4yHQ#2c^Puy5ba zUDxbh(9>DOjD3d=te*&Rm1#h!Rz3B~H83I^=r}zHl+meWp%10Iaj!J0Fdb&n!TELv z!QA6g`FwWy*{vC;4veT`1U)3SjCW)Sq-e_* zCVN0bORCA_Vh>62HQs5DJvgyy-ni)?5(kB)`#_Hu^;N5^g^i8fGZ(I!VhVTVXZf5J zORv~|=Hp+!b^YF}vpbG{b$=Rj4a{$D8PT`t>LXiM^tIHL8a1ibAbjKmxIUGn$KkMX zFly*{M7s6BH83v5Fg!hA5?Dt=keIF3DMFaSa_iswyLYL2^xbc)we)a1ffQMR!lj%0 z%AL*KUFHC}liHN6~uw(q)^J~=$DW5%A%i~A~K%V?tDv zfO&{|9q(1*8J{$$FquSbFnGXI^=_PzMe&TZdIxA>Don3d`5gwkF7ACp(#&GLPkwZK zRJaT(x&5IZNYZCNef!qq$FG^38Z}T*fbwAi8dS(!>{)}^F}Wvj>BQCiGNtr>?7vaJ6|h^ryQjT4_*1y*JUO+=;M zBT3~vGDfnUVhodq+Imq`boc zj<2fhp0sKs8KdehM=Pk~%n6Z7Wgw1#U~A$IzgH2TKXA(-kqkSZ4N+jF5=zzDy?iV* z`y=U_E{X!uIBC>_0Fx;YKAYc(C9X_8S-So1``F}dcRq$!fhfepG54M)%QD*+tlPH# z%--qM@PE>4YVopdQgy5=-BLKFk<%$ks1le8Wx)=gBb=nY!g!;IfEy>U0hC$`^*hv< z_DBB=)&DQ)B{)4mORaYK46qNpeT1y3R2DBL?t!Cvi?rdrSKMQ_oI*Ui&cM2_K6iZA zKwW#jx4M}(8CI5E+B|;ip3B>6YSKJ9e)>CQO1KV!@kNf*u0%*EV8IvA^rXTPD-s}U z$idmorc2T-1@s0wp|*JWG@VLG>&{GA;$kz}up|h_#O;?B{rY`(&jLgg6rJDF#ShIL zF^dwjnpw-&?ccj=Z4<_c;P7!W1$1$DY|tKz{{tcQ#~h9a8t}B32jBj1T47B;mpfws zhYs?)RX8;nu8yFQ+Gr7s<}<7YtUAE-5sl+3S?HLw=ob&kBkzc=A2VuHEwQS1F>}wl z@v%GhH1%%jBXmgVa!BdiqqskYJ9JbD39IZ8yAeWzVndHeU#q!y zfh@3nnmpU86mx|{Zv$3#VeP#_Z_?WCmujUKV88Ui>Z+cDkG%Kzrm^elN=DA<$z}W) zZbo<+@Nei;a*=TOq=T~9GkugVau5w5Ocd5&6D`yhUy4a-#4Y9!zK~vHa9|LMIvVg{ zkcQ9vig|JseY>Z3Sg}S<%-@B7BNaGzus#9Y@uhWEu^CCot#Y$9tr4h>^@C5=Pugv;0{D%%QB8a6hn-o~l z%SkJ)Iv9KmiDOkcW}C5oMXzX*q!|*18yn(*2vfGEqssdbSkWshc(5Ad$h;~ ztK%pbYbx>mkWHb*B6r*Pk@_s5RpxX#{IqPAQ3add%PQkEwtqR{u~iPWpzRnQdD9x7f94y)|9lt5ZtXmKbzpgt1s0B{OLYQ2HB z6Tw1cjm}K#U$E*-KA zf6fW$X!#7I`R`z*oXx83kH!GAWEN8qs+0C8a`(nG>rSYm~WC;AIku z_x+71b#GVR-1{C~nG9wseBxlW>nwINQ*+UfUupg686nCZUoPN71AHzb)R5!VBT0W? z?8h7}DhZz=z@@{E20|X5F@1+&rwRzO421; zFTAAb9k>$cPO7a=`c^=YYEmzGjerc3pPvA=mc8u9{_4NruZlUlFf5cAYHOqWe^amAbmGyahUXA zrGYvnl61Spm$vz*(r4&?a42Ct^I>fwm1aSsqSTpTcTgC; z<=&pbg4Q%AftJzClL;3BpL%m;gc1-D3BX`2O(~;JSfQ;pp&Rb-npE_iAq+pxs{LKk z8HKggx|~KeKD6SNPsT@RjzbbJ9?E+ZFhEMQvSz)_3)Un0hhXD$iVe^jl_cWS*y)MQBv1(;+oV?8!y&T! zs0|uB9j(VEX-Gy>#opi;3A8W}#N7%NeJ74CzZpyH2xu|(An=uX<`STER;e*wAOHgCK@4h&M_!Wmr;dq|b-YQ8Qz`0?8iDgmT~ybeG#(n_m&N)U{QQ z;4csXE%zk?U}sJxKbP6$u0*o>^@}>|+w!B0qsO%4wVFDA7;U|DV2Vdko<1!jS)@Il zkNdV2p_-?g}TPT#B2F@;tBFg@sZalvXzRB<^esrlYhn;wfsjF#nJ~ z?WI78ml5u2$&G~KG2jBxzxr0T|2@WzaW64k97!+Dv8pfz36&V)I9?Lu3Pz6}k(ran z(&pYcj(u=Fo7l!YN1~$WLQ6&}6pA3%HfR$>$hUoMnmNC#FunNb6aTq_T~kbwlvy23 zlS@r10@MSNc4W7RBK1yyc^H#;jqGuakK3cpQUhURS*TvB%4DoKx^LkrQS(NJ1GQW%Nn z@Bt6T(q)N{#D}K+Ht%Voo&a;luTTR35wGU6H;t_xm)Fr&z~5$xF(qa2l4&G9pQ4ie zpjKi~7jyn-m2xJMzcF!BB9!B&xnvEI-IYQO;vv}w;H9I#lwHI+OG-_csH`f-koTnr zqdrQ!!N}HmW#^|b@A2JTtZdb!9&`u)|A34rO2ruN`DMph{?D$M!W4a*FZW$Kt2Gn{ z-ru@<5U*a$q0`dN%wwx|F?RXs{j<*>cN&`ZTfEm`7Q`0$zVwA$Mj_mtW`3`E>`x`^ zmfI&k3S-J`EkcvFUyl9l{g5l{f?Lk#Zcxlrtgc>-u`s}s(htH1XO{ntMg+V(NEfB> zO9L?FlO$<(qOcioyq+GzpVX4{`S)4IKi0vp$%H~*=aPXQXnmO+xg6WKWAT_8p@RE= zOJeuif#%X08Jp}&h81QL{wzs+B%7D{nnZJ|JqDT;4zJAtUWXM|pWZIxlkCua`}mc< zg6~XBiOGvhNqPn%20uC%Q!qM)@G!efx=D6s%fEem5+z{7@+MadI>!OCV?N*oz&Bd6 z$jv8^evrbt=_ZRVmjt@bNnfPqCHy_&sF!?ZcKeeoWnQ~HIv_~_*JcXkjVaxH>#$am z7Dp0l7L#h_0x=^1J!Lg~9*jk#w7Rq3Ix6QCuvz+DUBY&4zI`NFdO+JXgjb8NW^Q^4 zJ?~?e5~0AG zoV!GOd7rd|EhBT!I@Z&>jmj7j*wEL&m1RHefvyV@tmolprI%UV>q*di*;ZLx14Gg)YKFERg$_G2+>4RT&Rg z`{=g#&>&uDc1-;#?8Hbm7V~bvlFuk(`9K~|bSW;Ha7_&v)m+vob;<=!YR6#KSByPl z#FtY4pn*Fp^-1JURpdzD?vL8$VmU%;)?>1eptSm}^;nO6BV(T#S+Hx;&ug`U5%iU)JY(9dr#!A0w&5l(*!Q?;h&M0tU>xMdG+Zk84k9qqeM z^5#IFNj}*nqZhbiWX3!{Ygi`tyd(JhX&g|vdIc^ZDQPBhfI}5x0eEdOmlAm z#!6-fMR8{zMN`yA4S6OHi2nEiF=kEEsX7sMDG<}P9ornm_$hC$QQ&E5nj#y`H$K&) z3oHA>Lq$x9M$@Q6$NQ4eiAI$66CR8(6tAoy0D7dt+aR ziv@Crq_{M^Q(%|^!xR{% zz%T`dDKJcdVG0aWV3-2K6d0z!Fa?GwFie4A3Jgcy-rob=-hAA*ifnf>^Q(%|^!xR{%z%T`dDKJcdVG0aWV3-2K6d0z!Fa?GwFie4A z3JgC7}GPc9ZhaWa*L2K9&x1S>ON4!nWgNOq?-{i=`PPhQiULlrzU}noKC?8@ z#v=f~!oH3lp~yc8XE?FrqCppmf*i4m#MTn(2=Os%*0yO;u(QUyQ9ib6`5s;b5hJ|} z`@}?v78SjmP_a>Rts*MRmdh!gnNy1I6Hbp80ZN?dfrETFl_rin!5&3gjn0~=3M&{XwmiUIJdvxe<)8HvI&N{~2;E*3 zD2+l)na+$e4*W!~%-QQFw_?+@T~ej1evHUfFa4HoOxQ&qKUx!xhL@paw2YUUr5!zC zP6T`XAne71!k$8?0L~s(>cHK{at+`j;t;ozmx8EV-g|~xI5>YSc*IdYrghK6%#~jQ7Bm>jC4&x|+ z8dloO*rpoV1Oku-bht4uFEk~2NH0oys4f5<3*7w>-^Yc&KQEGqaUoYt7T--orS?YC zj0|?d{hq{NPX}Prae(&2`!a^)M!TVOkM`j@NEZ)(=8*yqJVkmVNTo}vCVzLK7e$Qk z>!nt?;u#9&HJEU;7_{$p=Bh?kZs~JQ0XbK)ZargQ!DurS88oe;nZ#r0+_T)I@i(@? z+eFk0`c0$iGnK-Y5yx37OP9*-^U4z>m7iPKVr!{lXD!|49oo za?Qa*;DM);o2n_pRkETdQeVg~&Ia;wk$~9JG2@nYNpf*~up^&DwzN(fwA(s5jLZ#Y z`m<6|3MovDG!hrNNZ$u4j<>_U$KurVnann;O9Y5 zZNXvOu;Xv1;TlP}a};zj5a?Mrrxu&uKQ$|n+ENP{CcIiVP4=;=IJ&TA_r$^K2Q!Mo zDrPM@hux)->L%=|7;BlNvhXCe<`QODRDMk37b|Kzz)O;=)`PZ&$6& z)8}s>+;k zF1GmWMBXgD5A^FlfN^8KATFlT=KMiIBj5tSTr+V_3yQgM8iaVV3Mr*OFT&=XN9Ya( zXAwu^sCil)<(l>C-;N%6x{+?#Oj4DSRgYz`>%X^_htOs1ucEjr$x@fh+cuTd`eMM> zG-d#4vu<4P825wAG|1wcKaLgp#qI`2W(f@#pPvS6fDNxDX`-pLGRPbH`LM1mbuNp4 zBOSmyL7E#RREu_zWT}g;JF|f7+r>cr%yAviwdk3fy0@I+;;`*ncr_rBfK9+F6}xa* z*+OnyU;7F+$rr55$LrMt{bU&sTe=6Lz;9Vka#!)u5C8P8=?6vVgC0NtYVAGh6py|Q^nM&lj#*13s$}cH?TN( zc$&g=lMa0*9c@}i_ZJSON6s!IOJDHzm2+?6Wzd0{Il1HL{4>=*us4}>6lYT+M&607 zJo~VYw|TaR(Whd8ZtM|8N6SfeIy$|6A0Gajb3SGM&z{_=lUZ4P_3@=2&PU2OnLFH%|%!^JQeb7>=E;px-HH zaOrvbZ@8X$YoznZ&(BTc4QhU>P-C!|W+Xd6 z#&r^%DU$$6dQ04Dy@9L+?@JFc{pfO&HSziafAuCeI-l`Pd*6Lb2M6Nw7BXQFryiJD zV2VAGJF<9|^E7K}%ah?1m~_u=S9+>E8j7Y-J4-%&|ANnQ!So;@+T<$xev z&qxsl=D`LzngaAV`}!=Q_zf!tf|?BQHq1uvz>D-7su}y7(~Bp|ZQby`^dd-$;EMrc z`hQA4I~FpCw3)9}D zUtc{i7n;l1C;tz7?*U&|apjNC>%HndJw4T@-n*>cCEK!PSuV0I_g-+N+t^@Zn_`Ty zG1b(iL$dS|0tq36g@lBXMzTo=gfvn&EJ-%I+5GnYzvs-o_dUrbK?1z{|9sy0NcT=X zbLN~gXU@HM-po6gX$B6w^fX1{U|D^m@HB>DF^25-E2@sY1#5@p7RmF>k9OdT?b%ju)8hJ{jcwpzkwdGXJ3WI0eL5>as zY7$-Ub%(RXl3%~LuAEJ-%$vw{t)i@%&K1u1?36;~^As%Xgv3P7`DOBD(KcxJ5=GhH zmV+bfJFW6$v+xC@G)KdSr%PL$E;v7cy3LDlCd0K``j?!b=)|E*QMPccz%$C59ZMo2 zuUKUA=xkob`h$6U7oI5|SH8K-w3kus}> zJXXk2m@yRmO)-Q^YrNsQY&*uM(UsQ#=RcYjuA+1Iv-wpC>8EAE~Zj>5(h-0+ON zqYe}+7!HRrW4T57S#0etS}7K*<3TI+k?%$vcm5ts&<(Ridv*JC)>=`WBeX5jee+9; z-$X0-J-fLpvZFF}0$0DH zywAm_DoTy5tAw0oz>L)^B{evI@m$2GXX7c~;BUWzGN-F_@!3h@MWXA%pW(D3%99XM z{-gybd2dvd*;Li}4dpKdqPNT=$*G>F&bW_A8jhb^ib8F@Y~0o;I|MrsUsGNYfen}r zegmTty@T*bn5TeFRS~qbiQPMtZ*v9Alb2gWtR$y|qfKo*X9~mjBwb(sav9Ql$blB( z;+H_laRl1mRrb$<xghP>ZfBrF)u{DTdjKskGU&U}7L_*%IpB*8UQk3)Xi2E%VQbYje?KlF5o~8gF zhifLg<(G;wAL|l?VNC;l$AD}Ya1_ z7gfdS_92(EupsRL1x>&#+RDy*jHVQV^lHzneRKC-Bx9w|Ea!l=y`H>Qcw-Gc)1J!0 zDNYxjS(}2_mUkg_bspX^4v598sVLZ9NcTyAA+sRUkCzk$XR^VVk8#*}d=n!wSu-=; zE>Gq_H%HSp|E+@q1=se|Jx~M^dfwfM0eX(`o6FPd$R!(c<5Ez!B@DU+hUueJbQ0jJ zt}95*t}HAqJw{X6OLyavPmj`Yr!Z74aHdQ~KiV*sU-eD-#4}qtld@yAu)zcWcB>FS z-G$dG=Z5Mj6}}j14oZuWD`J0@i4(qNcD9veMe~B;jLYcV+BbM~FPK7yvPXc-XrZSg zEVMC1JMsQ`#qCI2tt_Kv%Cdu?^9R@9L(I?1H!R8j@STj}RQLO6XMa^+cmtH2;m> zRN(m>?|+NH%eL8l;4~h$*V1(oP&Cj{mQOm-e&A79^M~)mQ)I!kG((z(%LM1kguZ9? zE_REHVNzi!P_P)2XizvhmeCXt8Q!m`0<(%K-6*|sVA)J6dNyP*Dah?@Eh{9tde%3A zY2Ua}h%*-*frJ2%C1y@!nv5dXU3G4!SOu~P@*5B42|I?{z$(N__m*u@R#Q5ehCGap z@NjEiJ4JgT*hxV~Yh7^(xG%crgMiC_4~4yE@a_u6<>G5PxaG8FitwI|VhF91Q8Jc( z=V^ZTF#9Y_k3WX7dY23bz%4?MV`S0n0z|AhO^FgG0il|LSOs-zYCk5t|M(Y#j`V!y zp#)4ZIQ;fGWGl!XroO{&k3?X!8=~Ob{G- z;k1efWHtlr?^Fm~Yvw)j%_PD&EzA`*;`mxyL0N4tO-WPM^xzun!5e!($5K(R9LJ#h z1I3VQ5t$e#Vh?8WI=lL5hI1x*2{?RtXu>rO4#c3ugMH9|x6@x#dzxEPF9Kc--B zbAwOVJ7B_1!kRO}XSxv<%sB2c7}&~77r|4dl)U2$>2}u%%#7AZOX}IT;nXRd7kmpW znJCF8o}!zFvf(vhVAgSTG8cn^YI*X@phpD_87ZlDkI4Pr4Js-qbD%6Jyo)buW^3>D zIS1GR3?g1HvYO3hT}HTX9rn*2N0nOjz3faQVms^WB) z$I<`r8i~W_z=Dc`V5oKT3OTZxi!36$-ppeU5BZ*X7_FneXO*M&jZ08vhG1-1gt3JX zq|}v2Jd?R{8%B48Mvp(IFcJtC&fJ4h$7aIox288Ybj=*8k*Mip`e=JmO*uJkm5RPmRJOL9GTDpG<0DC0T`AFsD_k1$2}AwOTaglk0y-*J_DCu` zz4-=z*Q#=GcR)5(xAn^RkKv=`bCNEa)S!de?Za)U;TY8KITxV2%J~JF-Rgt~rJ*%$jm$U%BP1r-p^$cjrjDZ357BjI0WQ9o8yiJ*^431FC$v-vu9k3^Qb#&GS(q9# z$f<#O%)AFeBG}MHS1k3;scWhNS9*zyt>3~dU3=+QJMaPk(VduSg{SVp<-$8Gy$r-B z2vRs)$-qXOmZcgDd1nk6g+mgBOiSZe&)_otXV{#lq=(A8t1vA$Z;v5;{$KCM06e6? zW)NN5%gMoQc(Y&lh8nKodK11#$CSpQkPExyy7AD=fh>>JCKbpejMJ{HKnY`b@VnfC zF(qU8+$IETE~jbwGk>OG+w&EaA-b+Ji0yd6_z=DUELi&l5+(s?8t=78g?Vg&8}tN( z&txUdGjb>Zuvv3P%P8QFto=TJ6h*1gtML#dyf=Za9%VnxS?RSQVY(NZ@IOLq6XK4U z!a03;7CAqY0H({vGbHqxU163Kd<8mYj=}8dnxTdUz5|kNR4ISs2*f77yn>c%+C_Ud zeH0hn{|b%&gNY7<|GruDE~}UICVqugP{9b|RFXI)uY(;@3I`g%x?)nr$ddsP%giLj z%dDP>>o;D&N;Ld$Ig;Cn=%pmb^BQPP)}P>1^TRh%CIO&^7{#CkutwGD%U~v2h=t~~ zG-FB)$=9^IL8N9?$l2HqO790DE9VQb`+k1JKKsuo)J@q=a4v}T0FwoaN&AI_&Mrq@ zq1&rIGZ;-Y^BOZshnuN|v{@|^{+4O<~c z6-6fuW>cc@xOz%+N5pE=);2TS#G==`C~wB{bTFx&Ol}#MZC8fr9-3J(?=bFzM}-eM z4~V2VLU`=p5yc4;y-0U8v6i#4KIRO!)?&G>4mZ%6UM%%EPS?^^9U|06Az{0YOoTIK zttdHyz{(*}vYZ~qv(iK>Tqdyw#`)ubbPU_Tn^fjw08~W-a4Bu=fr0o)r3?ihbqw)Uh%&PN;^o%VID+$8RX1fsaQz5mczaAOid zQK(r_@bCk~*?CJ-XgC18nT<0->27h~5;}$#tDshFrsT#)h1his-V--cY?5HIBoGGS znc8;kUPo3EK^3J(QNR?$PrQZ6MoeoI?mDonC?5v4-C;iO;H9^~SOE4Zchha%8>!-E zP`F*hC^|VX+Y%HUbcN@hxBE+^&H%TAZ{c@8&v&u->kIJqVIhd?hAn>r(-ORml;aOC z{qDOIEcif);TCyrwHUbzs{-bHoTFt;)-hUd^OX$0pj<)Ch)}m`g4Rp^iR>%Z&>Vy* zDjzf72V~xbt714K@4qRy@J8iDO1bkjA-0}_A15~JKQE)?2?1ytYbtye%Pd9dB_!!n z=LlOih307{a`rNl$Rm{&OX|eCm>B^bmx&bd-9isl?o1tTq1d`zx z!mtqGeEA$`9axg$TF7(B*|jSA^4U3|aAhCFy?Dg+6$R5azGvaT?xrG9{QO_!uOxr% z6X%0vyWaT4lWMhz-%O~2R^s3`81GgTG85Hn`$X{!&e^+4EfA^8$hV8Cd|~YE$$diE zPO~P!Q@6w?=DzzA`eZ5^Axv7~_uOBfJW|Hllep+KX-eiWny*&wh1@b)=4OZjsrXAa zs;J#5HY{@wX1OB8Ss4ee9WIgS-sYL0=g|6}+>W?=hbU8E=%4!hs+G{hBqTYIoffdEFs81SuHC7OkrpkEU(XUp{C66Ybmi%FNDP;1ru+Wt1yF&)E$|j z<70ls7dld5qF$q)`ND-|YMkR#<|L4`CnU>u2n!FylbUC0xX?I)Sx72fY9CQdq62;k za6~f~C$*eF1d5J{X%Wd3?xM;TRW&p+sUDc7)z%x8bR|$Maj4{RM8l-jr8bGi#JEPs z_`Av!RzBId)=NnyRG50Nw~v72h14jWUMQVhm&SsZk#W{mZ<;acvBW?eof^`*Mum{X zseDmcr558%$qU)@gs$uSLK+(|)EO&5y>LQ56R4u{dIhaP6{}iI&nGeK5jB$;!)=1K z@wRl7D8}6cN}Y(DE2k z;&{|-y2(Vkst9!^%LLBBETmO|&El}CZ!*~wU#^|V3Q3nb4m1ROS6dyEcIt>Mi9Cb8 zfjKjfahrKPwxrEuU?Rca1Y)rZvWhlQyChDHUwYRx0<yVdl+hW61*TyAEmqTy@waS*! zt~kF`eMAk7u6Um07g2(LT!4pEL!H`~R3?OS#{$fNq>E(82&rXB7g3|KQ>_HLWG51k zs7#_1C{8WgMaZDz~XCHoyiRY*x zHa64A@hM* zB>)CT9N1_X|ku#LA6bZSza}qu&c7$Y8kDpL`hev-&$QQ zpGJ#ja*2&4WNYavN@|^JYW>jAFlMHL zQq&R8LqoNm@qRKYih}qA;b<00!-#u=WKe#LVCiHM6gG{cOC35) zBV7_{Q5zkfrzewsGc{v$68xeu&4}oN;<)nI0*N#RB?43;zQQt5DYHcy0s?pj62MJvlWa@bKrYEEf+3;A5Ck+EGq|&Z?NKA|z*4$7+7dQw5Fpe&@ zEvd_7L6Of@F*@-_K_rm0KMcmuskW;21T2>|B}Ud6piv7YB2?djA&KQLEh(8UiGTy=Pk;?V}9)?6ZXO4HMs3Y6B&lu8Vj(Jxe+C`G;a*hPAGR3s_ZOI9`rJHp`J^6XaI`q5anZj=7x4nLF-wRu1S}02(iR7ZquQEEjiEy& zBc3*%E@CTi6Gem-T+_MrAtZzj zH$9n{g$Pj=b9;#v7>fkD2o3*_;epCc z6xI=|%>W&n9o528HYvXOGTx2 zJ5Cq6r>X$cp~;Pk6Cde+Jd90~vP1VQ7U;XdVzb$u4r<>T4AT$Ob)ahc+ai4~SMjT~ zk7*4WEBTMq1b#8Xu{7vyXaTN4eFvj#HnOJ{yUXQp+R5bG!*cTotuXGo&qMhVBq`%` zNKM2NE$ zJZb8eJs3|H3Z5c~t5Z7uR96eyWy6jf!f3PGY;J6{3e--r`H66_3u2%NK{hCfQ35=5rS7{ zi``*$YHCG0ha_N(;~pD|PmFRH6DNtd1iBI`Xz!U5lC+?-IjVn2`U&RSVNx-3!1CLj z@Xxu^^cEnYQLXy;yFPKEGK_?0U1C14nRsd*82bj3{_>;{`Norv##6^)T)8wKn`i5vQ z+byj87KeqLFmTIavbqw~nY3BrqdP+aVvbX3@|!6_rh*j|Vf~9-oJ&MUoMZaKwu zc}a`K?|3exvOI|CQw5wH<~C+S2nv&-0VWI+5;aQ%@#8LIXX~wxGL3P1gYhOfB%d=> zT^l>%#!L=+H4|^0%H4!1g*Na@YT_3Jg@4clWn<4vG8J>2W+wz(4atE-Mlop?eovyd zLMdt;i9;I;l&`X1Evnz*q9!#*D^M;}v)U~-q6fwctRfmU?VveIt4;1Bg~9H)0Lj-f z3pz_E5mZLT>uQ;&W>=gnqK9WMYS>`4;msxV9)=YYHq)isVcA>B7HPc_wO7bPUg9pQ zMgZ<0p6S=*Nm+@!ix<_Xg{cBdq*yR)Oyo>pP+09&%tE+B@d<&3nD7x{7zX1(ePY!R zkI4q3iL+Wa=uW&pHECSa1cf56p45O`R+9;v2^-7~@XL$`T)1{DJkX*F1m*BF!Q~l& zcA=;Dztav;r9XubF8qxh1{xxHH(6mnRhDNjm-f?O-=04IH@CRryGs5+W>^ktHb-TOdSusZLDTtXm`*DIPL& zTJ_)IbvnpxaHV}%Wnr^HZ;bE@X_y=A*!z>j==S*o0WXOIox`7lB_o~Ikf(JD z{2}Uf`_DkxmFhO3Us?-G;ISd>@(1=#2InA$JRHp9i_!`a3Jvb1FnHvzM#(^ z40_=Y(}M@Dn43TgZ`o>Syd89bf!Ke~e-;S)Zr8Y}I^F{Md{V>4lsJ4o^TH+$gI7nVga>=e|rS2H_yxs(%NRUG^bb2D6AML*tXg zV@OL6&?dwTMb)kOS~&3zU56Pp<3t2uwSiff5=~}@!xfA~y%vw(K|?3T6-d#fvbna? zj_m;cawN?6gD{+Y>^cS0AO>$*h;<+?>_|~*whOdob>i57j-HJtTWGQX7KhVp_L4P$ zS*K~+WO3T;fp8?`_6CWN_M36_mDi^sTu9_Zn<~%?hcKxxH~q{12+FpW@#d3MAP&w* zG_53`_*GY|y*f5~N;>$|&kL{-_rd*v%XweQcpVNwnY@9t(iYkoRRTOhhsEMc31+0o zsrmXJAS1SZ_l}uD@W!}h1sHD9bm0f(a;FC*}Sz=y-wnk|L)F? zfG~v}A|uC?QAd3gD_@U^fB&7x+utAYjh*LEJ|&tSn{(Ex3QZMl^sdA#oPj{9+W+1A zL~*U9bncb+3(-sX8j4n6TXr51e3T>^qt(At{;K@>suD_PpwSw$i^Z47Xx=;1239r{ z_JDDIQ6#m|J-?d|eekZiYIaJZCSyz6{=Qsy-ho53yC)xtiha%_%mlkv+b-FaAKi5q zvdcHRqyek1oyQwP(BWDk4rd1)I*tPUdxo7bEH~l1EJUHl%2pz=d!ryY?4h+>B|gz@oUl zxs2>I-d+SLoxwFLBrXSG_sK&DvXK~AB&_G0ts`@tiKF?%tnK!EY|qJ1SSV&HdNI>l99q*RyHqu1epus5(ote`H9 zRBsf7{iKYFHsF8PZK<}5a*YTiTzwe3HvhjbGA=3J@gmjQy>_dljwpJX&jlj&161X#rSdq_N_locuCT{wh*uAH+Jt@SNGl@Fl!`o( z$*cifitw*PgmirOV0gGjXINUas6tL{6*ekobVpkkaRs;GI&51T-X;>%rMY?6PnDOs z15;L^)b*rMQrbfF3@N|nn){A#KC^G?A?zGpbK&Y3l%S#uVKJOy$%Jg23y7{UkkO9l zV2)U`FIP>+Tc=2IB%9mSB_ zYJRk#?rIKNrxXrMoBqTcf9H9x>_Q4K$8h^%t8FO;Ss`qS>9=32ZxgKsO4NzS&;nY2SYfunlouM}mcjHuV>PDR;3VcV)oTr$MyK`lQl(hL;# zfP=p5r4_NJSvPD#XH=OzwahgD4KWH27*0a&cd?_It?OFDt)M%NP?Uj6@AOU#mSU+k z#93=7-UIk6#_ehK6>7d1pmR*E@u6Qob)_b_CQ+;tev1wlF8 zSMdpC4g@iTA#Q-NV+I}vokSLVIt%GnF)q{_m(!R_!ih$NpFY5voG|h(vxsyckaVLA%$1P z+9MAEhzkEz7sD`90nnONJikz#AZYJ%gRgSN0zwT4;+0jFi8j}#qv*|OR|KJo_{@rn z+lbTFg|Q@pPGn|*Ngi69(IC9UuPIp~4Ra_60xO_Y6)|Ryb5RpAg~**lBM^V=T z(JxG;I3h@-#sqnMIMdL#5afZgHrE9fvIa0~U-+*%bQhhkNO5M>5xw@D{WtWejUtK@ z74jIyAEAIuxXLD3JEk=oj?h(~X^^sjXz(s26^N#C zfxgT%=5X&CM2kQOy6p1$ogxhFMzq*nVke_p=CGxf_n^$hcT+tmmPm1cCTpbBAObXq zDh%Ws*+npW&e>E1T>8pAd|&s-(v3q<0a(Al(iy7fya06_!F!*sF>@JQFx=9?j1i8{ zPj#%IzV>5^5Z$SRs8eEtz(SqLDvj_4hK^UQJ zwsHdoLg~mpP7!DSmHQwzynb|j_qW-7#+w)dX}E(CEu-lrg%F zIz-xuhW4#8<@?Ib9EjZX{Fwu*hh?Ie@q9i)feVYHMQO7he`c=KK%8;ZD!ipmVIv}# zMVJai)?>SHtv2zzAA+UmqU=a|r&l};2E9;(nAnCf4{G;+mUIzNogH2ViWjEQz-3r! z5G_Am-@q~xCPkiU0S-N|e*InyTn6|CFqdUd9vOsao_Nx)%?RyfIK&?Oh8`Eyp`;O` z4`)CgM6eqi1_c?ZqTxJJ8BtO$tmONBZ5_o{J7`dfeSe)!F`SI#7lRm%n~R(oa+HxX zyYamXF@hQBFy}UA)74L%xQvp>#ZtZ}+FQh;&mXM=vK0qpQOIT#Bw;vNfa=CeWGY(S zO9hrK5rOr%d&SYCoD)dH2?|ierQ0c5j$r_f?)okG2F-G|1h}t5slqA;Q54*~^Cycb zm*AHViBjc33LR5!<XfoD53&QIyjV!gx~u@~#^cl!$(ZasJP#rvP}Pm= z1MkAxOZedraB%X&6z=@XT}*k4^4lk`riA`9{|Ff+nx$a@08I;xnAlg+?03Hcz5^6R z6~n!|)8|e@pM_yFn?sR zF;m#+#Fzx6K?NKsn2BwG6s)J>E2ygvpgdPgXTP;U#?^Sy33Z<7|?e>3z7pwKzC&ItbNQ;D|( zjxscbmsluoup*eANmj;(LO9dFP3Z;cV3*m(g*~l7(H*6ru(nm^Q(|*e$JcYxzE9fYjQPSA9^Nzs0^EfbvH52CC4qAg@0cvmk|> z#z;t*vqKE8>EI4PfKcaaqKA?ucGxxMA3|%A>7&1#sJa~49 zR#b$lwim_DZg+J}i)5td7h`ld2TX*DB`k}lfD8$AAeQICUz3ET;+nGpu9CNIMZ9y* z6?-s&S^MR{M?N$qzfi46w;WeNzDXrATnrzGWMj;(l+!qsFt{Rx?r@f&yuYh7XGT`Z zNWLhljijg#RD~oVX&=2TR*C{3a*S%njsHFq5w~Q$NXMLY}%5bEUMmihXAlvMS$KxoktITwynL>F5o=8!- zO+Za_fY>vNwb6`Je9)keW~PJu)MusVGXiJCZVK;tcMylLy|@fXcs;M|)oSu)uWM~>R4Y>oeOxda@Bu+Y zPQ168Qs4lS+9OMNa>$!|TmjByPd~N*qM8dPF&I39hlixx=zTnN3rI5u@>NzB_msw( zmt5!-wug^Fkl9@&WahGa+;*6JlC01DLa*hUqqIr-NqnFMbFYbus_xHt^>Aq5WxezRMlQvhC zkzK{Htr}Qd(`Gl7P$Gi~!l8rCNel)Dp0|Z8YhoqbMcsm?jP zFkr=$OD~N0Y(~+nqc7jpC@c9rZo(!3lJKPkt@L?&k#aB67VUlDY07ewHS4kemqYTiI5@9`&qY66hN%gwXD}#BDaz0AT*>f`$u@iX%%ht$oL%HO#pUFe|0 z=k)o|nk9IJvI$)p+CU{4*7pp|!=M+_S8=pu7UruWnmJgQ%%&)UJW#oWNN-(t(fZBJ z)PD(1TFAw`LQ8zNyWRBK12FXZ<6Z3(AX@N)3I3F6fasVjCb!2;@9~Wy<2oWtfAcQv zvuDn1C6yjN16JHBnwJV&Kheo=PFZzAGua=bG(4%!T)uJVs@i-RgZ_DN8nPO}It^~n zX7&5=@sm#Nsb-+}?8_E*hoKlM*v$Cks(t*mIGHNph!|~N@CItV@t4<_ z;q(9pc!C8?SS~>^yZ{0aKOI4z6>IMixA-1W;>wcV@5eK@Z)H?uwWA6j!!3v|$2bov zqq1x3zO#lpJ2kaOSr4d|Uu~%w6Mu5W2Q%nDvCt;6uK($|cuohPpv8f&i?r?-LxOrM zy)b*g3sY<2@7|feCHpENzb6N8UQO)=Q9yneCPFAeF|M-fvXC)%%Ipo7ZJ9qP?Hnft zV3_2$Uy5pTi_z(}z=H8-S7Yq6${f)}h=@sUm%#T&DlFtuYhIyl_c=UXn*$O+VwXHo zj*5hZu{WQmx^zqzVh#R_#;${}Y?``y$=uDDj;TC@!C^FDIlmSab`QPjBrv%=yuh2H zm0whribAq7#3Q_uH#wvx@{iKcbBse__1GL9w-2M9#6C|a3@yFIcmFepLZRs0JaYfmTRyb7&ENp%B>s;c!^7#s%NOk>n61-&UrJF76D8&1b>y zPHx5XWBdAXAo&q7U-sB(o%+x(Zhs$B}hR_U+Ql1Vt7iByiJehJtUP>0t zoqE<4t-;Of;)&Ak(@*cXx3b5E6k0|Dj262Mt4j2-;#>QNpTnd|vZy=A!RmjP>;ual zwd_QM(FUt*bHfbcbECwnk2!6wp(avi&F`PTxW0P(MU|u{4Ai#WNMA@*&5*+tKgf{g zh<^vLEUMA2(L1*FI6-&0RDGpXu1uLL)41Rb^gw=!N z)!7QE=?0g_=7$XjDISZ_iVys125!Lv$~!__`hu2%MZKWa;`Z=#pl)MDnO6ER+xH8+y+?iNCoQfZ#W3)N!at#L-NWvUQLt%~5fTX1x zR;S1BbK-|#vI%FbW%<^PBYA$&INUp@TqXhwvT5eWq3Q`!zFuQzC zdmxqgM=7?PsjFiY(!lDoV}*$sYui8pBSwzYErTbO#}61oE3UCRkd}OMMd?FlZ)b1d zZ6?d$;vN{BM32GZa7f!E2d>tdU9uGR&4eLo_oezIzKX6!?P^sR=qa7b>jxkyP&SZ%IT0kCL8(b{BvKFOvKaIjb%ss{kH5lz) zzugh=xj03CH)Ic3*&cad5p4YApskaX2=MmM;q$5^3QMi=!!lqcdUgGCmc6aC;mx=_ zTHWT7s=`0oz)~yO9t{>1A>ZUGYmyCq5977M^2;e9vCRLWusV5|g3+Rud8@bXTv7UA z#5s%kYv+e$e`@JwUrupJ=e%jSl<9`8+XF zY#d92y|}PoY|811bv^JqjV8K3j8{{xpd5otLLlLB=ycJ9b}U#Fq$uBf$P>PQSIOGv z5x#X^N5Tr#(@2U4@^c8cc)=f(Uw-$VfhxSzW!Objetuq}f34=6j+;l7(=rfoN2Y3T z+3H4y-zCbFuk&;K>7YexBP}<#c3_y_iF)2vlx^AC!cDb+!ICjBx1nM8BYRIT(>5Kt zzXqz`sTU>l7IokS;^_ltM!I9x^Eatpp*9v1 z+V2Hnf4=9)Ess5Z^|omV>Hq7&z@?z4wXyveMH7uJy_v>WvMxr)Vq2X*?qo!xueWAm`-HFTI@5X1J$& z^VXF+uHV2!NGn#{jL2y;W87-$3(7OOam#gqqMUcx|3J}|+v0+am_$a&>EaeXvg0F| z<1Xdvjgb<~JEev9%3tV0Fq}Y(y?^G6)`a+{7dJeIj?)6C(+}2w{?6+cS2@ZyFI}`ni0;p=R4FmFEWPA^g{}ln43hp|s3>94JSU#q zeclaM-}sF7zBQiysfG;t>w*I11+^vrf@A9w8qH{(yY`A}iztPE@Q{%8X$8%Z91ZoA z3V-EZn&NlCv-}P6re;Po3UVjL+8K8vnEyt97;?{xQvrN?+f<{6!5s{RLYb+7{GLrG9=qm@;T4bn{=FZ(c6mNg@g3zS@LDTN#=F#~gF?6D zgwf~77~Q{fUQPSKm*4*VzrD0ci0O**{Am)}hj%n57HwHImGgZMzID&>hoD|BQIr>K zCDxPfYhuc$6}FGOqWu1*ZWqq~sJUY8=$Dfr2 zGB?(~a7pK^t^KF*5mG0sfZWy+;tn3hdN|o)t3uekY2UIek-lghzJ`G6(+>38%Yu&C z_kR71c+=-x(;KZBnNf%oay&{-({l$AkYT4Qz&3qVhrkcYzkS|0>ulN6IL*}3re$;d zG+qp0dc7p@%XmX>6^D+#cd&Oe2Vp@P#_k7v8VMWI6mh3)#cfyRCpnPhK#~JV4kS5{i-t7xdNsjaJ{pYVSoKs$T$LBk-;=y6YZp7(qB;ljSfDtdVw&8gcWx?DD^ z%@xR~Xzy-oX=_i|-{RC&PL*op-O*3VA%@Y`2}WbMeSTL__UI24<<65zohkvRM>*9E za4q4S48J!(rx{Hcj5dF8+`a^$i4Ja%C$U&Y)GgEuSO3c?kjvdz^b9_rz~Jy>ANr38 z7Ih!_v1n*Je+!bb&bpR1>^xV?y+%=RsEW6+AU(aPx}vzGysQBG8e%(hDh%KQrwuqs z>(kw(!j1iqzdCxEqP((9wtGTRt}|iFc(*qg_Pad6pvR?d3?%c-v>Cq1g2RM9RWL68 z!t?iFKR07l``RbJ^TP9ch3TBUf83&WM8ZEVIk^(1`Cq>0`qyr(q1x+k{Jb*z!|~wI z?y;HJFy0g5jqAszf5e!@W+&d`*nn|(Eq2QaMfumekIza^$?^uy+4}l+Er5hQg^Se9Dahhox9?^asvhsQEqF(xVzNbmp6c zP+LoLUu#Keb6D3V_i*cePdPFij$*of)EZYF_;}*e=r;h`2GGsI4c)tv`+%; zF1O3+#EBnvKBWgI!SE@PIP5}E*2`%p5i8E~vf->A9Ly32oX``=3OaG>dL-ha^H_i? zo@@3c&QkI8e-Ltc-9!|GXpXC{A?vDN@7t>=Jn!sO-jFz~ezul;q{0}i_VDVbj;`2q zPOX)Qw&8>g)tt#viG3JT@9#zIE|&ST=p97tPe=n?>)Jp6mQjq}|ZL{cqwji;DOw^E|29 z8L2*;>R}J%RyH@+*3?$Um%a4t+?7X4jW*sS z%oW6WE|UgFAiKD{ytK41x4Nglr*~>+MJkUv6(Q|WmUa0RUe(_DOX7~_gjb5|?Mujx!~27QzjP1|=P zX0G!o6+3Y~EVH+CKKHH7SvHf^WLA%l!*NIBe6){7W&FG|F7JwEh1`zRivH<{E_moi zzxl;;!@ZxoTW;RA;F#Q5H+9A%fBM;@PyX@<%0&M5*B!**&9AkB$w=UPWQ|1;Xb?JBk{rsL1bXHHw@)HC{q-=+d@(dClFhEh^6}HmmPWRF)e>0_h8|i{kL7ay&IH8W^AlTtomy@?;)6*(*s{; z#sxg)e*&t020jMZ;fw6pc@>_Ml%J@z>WA=hZ2&dt2_dfMyRrSA`Sa*>Ip zD9V-NDtH#(_1txF{B$by7-+9ib#gFn^2QYS_>ray{^R#>33Y&wvUc3ORnr|#_qLt1 zx8mm|LH*xRUM|+_elmf5*zfcz=)_(8h|waQ@;@q1WS@lISaHFRlsAsAD4lZA&JgGB zL_NUgPAYoR4$vo+`0xau56AEY=2gK6PNDA11sbq+)G8H^2w9y@mLFQ0r;d8@a0 z?Uw%95*Vw@EP2)*m!K048J-0(k8C-2XBkc|Tz==T`@~%3RyFHm-rO!6{#W(|MY)O* zm3LtJS#(i59{kbd*RuKKM&!66WJnpp3BnotPvwr5`nBhEb573Me+HR&)C-;bC?M-j z?CRO8{AoSt(ICVIYub*LEm{iSEqrutMGK}Z22*yHCqmlj%vDq5Et0|#&#tMB!RxP> z**zxrBSzrc&HSUHY!BwBh9KwROQnf6l&@XKJ}DnciRm-LZ3_<_Tv^Hk)D#2zP z0PG{$Dlu>R`WnJ|nTZ5!t=E5cOJ$+@y!iKPdSaRkPcFCojekGDsgMAUPZlCqoj$~* zl+BfM=&dU%YRd_wr=jt>KVPmR=fJp&JP%#Odbw^Pg?^UPM zw3t?6B6nlPnb!>DVw?k-uYos(M?q{gUoSOFH(8voUHr{Ef1#YHEX&DSa`}*!$oGc1 z&`nZXB8)S(uWNKz3m2b1Q%9HDp(%@&WXr3~)(sgQ zyc-8=YJN>y3F6K%$?*ac2-U2~IrHD%zhQPoc4^a;id4LvXx+4ux~xHVoF#nHRLz!t zZ*0T9E*yod6^Y+n`Bf!3X;Ky9sgnq8QGUDQ8_EM2!q~8KxLBQK0w;AhQm(z{6!x$j z{=K&A|}Goi>-N`~%-m}ewb_^4qV2%P)4a?7k=DZgT$ z#iDoFE6-1@Dc$+b4}SEwKRk2C*H5tRtxuS-E>e_VErKiZ)j{~K@SOjA=Fqg^;h6(H zjm*u&Lzq7*L25aRGd!?s1h%jX9*)NqLesi&2=$^+LECj;$k%@Q!J(p)6{hjTdX(;a zU)kHaeD`g?{kLNqmejh1Jd>On&^xKN_R()PPunx^t(N6Y3l^i>@dsMN-Z$4lvZCp} zL(`TYIx|INRfKW|WZ9ZI32ltrd+0e3f43-k`*%BXRC|@5;-1{6G)C-pwcYp3T(u3F zF$QD^yX1iaQ&w;0@DD#o1zX7HaA02|z)cL;c+6AoFLNqPto6uOkBjU%{~3!w%kB7ee=V581lEjM;I_itM< zb801wD?!DgrVKIy2@suDiddjFo8+rP?sOS21dZg#xRGjhCr!GEr z_hqb{q3$&shL^4DE-Wois5sFef1Qn2iq;jg%KFo(Wn*aFYoDPBFAiltxd)N%#*TA$ zVJc#{=vf$>^VT1~YgIjsYA;?_;rw;^Ag30Y(9LXfMY7Wqr$4UFVhlw|VgQvfrn0}| zMEA*}bakn|S3G5+A-rq%XB3oIH?^=5kE?Dl=aB8tq34#vwXUG%?P$r>bLu&mQYQ;X zgny=%C(>hy3ZN(dqkz$94&+wUw@zQUY_x|9>)s2;EC~ZcNJ#8W z%NNaN(X}P$o7S*%zI3m(_(#9{^Qsy;r}oc*2ARuzXT0@D9DCKws!+mo*tm6iM&F8A zjor5>%7KWmC&P+Y zKe#s!$?l5Sm`ryyS}YymDJsyDkMAv>Ql6zUwp{<>!yO{D>a22Siq9Wb&9zyiH|K-l zp8(j3(~9QI7}?I|GQE%{l>~607ukn)#wcms)it(SZZ!NDJ`o<_Vg^qj3R|kpP=Rq% z7?+vMWzmgG;?+`BA==U-sj<9to3N#lX1A$=Tqwa~eXD|iyHMZCWBSV3)leRq8*jrF+XnhQItFgdrVWji|4 zrghe?+MrGj7Prmm%xDC87Ga6#&kK{G=M;7JUft?)AU#qFS%mX)qBUz4rK;sxc5d2Q zExc_wc!V4>hN-yrHsh;mYM#TcdM0FTCkLm}dQS?`Lw3oQmFqQFZNgqq9&l7IneMat z*k(9a?_OUGTNZRg1Hv(Fk=pk9e^Yj56nB+@QE@*-Oo})+S;TSRE2z=l4S6Zs=+De@ ziEv9-Wylo?cnUBkvwKca&oyiN=DJXrmpTNaBGk?X#e1rWcU+4nM_rzAri+T;0Wb%T z(N@A+SW{kBn&)?xELhQ+$K$}mrZlgcnW|&VS-+XozT;q*Fpv<8!rge1lcJaYhXIGj ztxg`ND9Yy6m5~(*#b#c3WNT9{h}B^U9_+et!j4(<`}%1ZjJ|VZjA?O%f~pzTD}JPa zD-b3VWH|LBsHr@kvBYL>JAYMsVNTeZ7N6ANN>h{Wf6inP)xMOI<4gnG}~H9DnwQv5RE7&(Ub>UcY`;W!3C~c+N);fnR}0G>D~4Ry$m+PylOn zwc4&CFvdJ(&R~%$Rn@ZAMFkJd@hGgS2D6b zU z@q5j}C0$XI!O9Eo4Ho-2=9^Pfw1r%)7o3rs(_E6`&#a!khW%EpHK}|YgWBc8>5)F+ z&TF+2leqgMA+OsFWw6+d!ssBkG!AA*D7!+P4Nd7zOVyI4{nhzx=iYV!G@W6_8U6&7 z%(~%;9h)w!PfvXGjANT<$dX-Dkei#IpOYTV$joF1)=;Ead zx(ZWM(jyjwl{AB(Iw>}c(Z<;-D;t;2fH%AM*#)vqdsBCAs^&=kBQj3P z?^`%;<#`A9@4o2rtBxGH{K&B@uekE?f!!A!xMatryRb z3Q88^6NQwT<|)Mm-Ah)jSUA0Bum^LXXgPO9OP0X9Q&%i#_vJJ$y<%?-x0!db=0~d} zwD!0}fO`=C+WLu7CyIyXvy|xf5likpH{W2)U%sv})oHSYOPAjB#72EEq&8H@mUSoA z17|hWXmN$R)tpa`GGB_@ACfN^3?luUN3N`~MQRqT>n3G(6jbEq))ly=yv(tjE<7Az zaSEAIhMZNiJDadFci_eYYRbpE8ENOzHNg2(Q9t_cp z*=4*&)iWg@UPWJHeO?eYi^hf{P+F8(*fz~9)^D1Wmv5!z+77qR=fV1ItYG(htwu=A zT)ASNX28P**;q`RJAJ{r<#|MLNtHC@!dgGIs(SvG%_~P1E$=H?bar!HS1gdX?)Gb! z_=b!lcN0Tja%mRU=7i=cq9=m1M@CBJTlWPv=_r% zXF?YPD5LeYT*=~cMyBsNf7ktw-G1w_-8;_ObMET-&FML?WeK3FotXW`}g-P z_vp&0XRj+SYHe;`x^CX|IioXbqec-VBNZykNv&)Qht~W;`RcK`9yb{}!{UFIVlA|Y zd4G7Vz2dxE4&C-olcCGucEdoB(i_ZnJQd-Wi*~!iEf=wPrmkKveUbK-tDvE}VPM|y z)C!+`WXddHFGIxs>4zsujDfQ5%4n1wj;YH#A?{m_;dJZ)NnT$1JO6Lb zoOAD$?ff6|dSPzkdWf?1~{vVXMm(s z6i1J~LqT9HAt#dAB|8cSnVJEL&~j{)2f2adR615_2o)yxJHZ->XUL7j{yddtV7np} zmtRyb83m8aKP6JyQWOtzQAQ|Rr#_2S_;@kER|c-voYmzx1(ZfeAqU-ZZvg~+kArVX zq8sG$1W3+fN}GxJyio)(WF9;-d!D6SbrCj@4wNjx@5$z%JQKu6ndooC_Z(NkpJcSf zI6&_4s-O*pC>kiql8vXd7pDP701SdBxmWU%lsR&0&QwZzL2BUAW3xE_7r1GNq_NQq zaEjw`m4q^P@kSMVCMOh?MqCOSKx@i}6A*!DTT!%3Bbn$4Kbda`7xe*UcuW#b2F2-3 z`SS9_{Oc-$qz#gGClDpnGtK}5^e*ZVbSyU~JIWi zK>{dY;shY7QH_90t9S?=)f@FhOJ18)uI2TYK@&SD{Yw7J#(65)waQaA`8 zQ*scXd7+65`j<+LVLG}+UX2BR9tj*pRo|70 zP{m0mp(dO|1vsK7)tX1-Q0NJ`xEe|nNp^lhp(r}3ctlPVRTj?)29W72s+hz@ct{Q< zUCN_?%!B0S;PZ1nz9spK&%fo#D`BDLpy@uBv;rLAk-0MoNNF<;770LOLQMoZD=~E? zuKV!(cy>G^UKsUOe8yxq(^G=KxEz(MB}hv82gkwbBY-F%QoM*K$ZP1|p!9;JcrtpVEaWK3MiDJavQZSB90f>wATa}% z(*IHmMM0FJ-_KB;cJ>K}q!?xu}qd z*OCGg_#j&NaHGggqY0m|2-yIBOcctAq+E~~0hEPc*#WW0n|sJbpt95-$-z4TOyNq( zOKyh*G32;hX-euMg6}~fgTb6oQBv|F7FI~Wi{qD6YzlD=X0F5W0WZ<4uscKil31Ao zL1@trF?mHO9}$jEWvw;=0F>wRG|~%s@&sBcivJ7Fk!gXd5=8N&bZnyJ0F(osh~`4) z6jyqXRWv0Vg-$3>t|2Eb=Cv}el%-0FBkG+bk$eXXa*ncCmC-6KrL?~MD{|rtz|kw& z(hv;lP@Uk#gMfs@I9Du%TuruBGFN$cwb3guGzFDI&?BJz>fG^k?4 zT(U?P%!iEmx&k88nt==#=`l}}$3&4&vYxn8YR73Mmm~KA9R4P+qsq8sdM18kRSgBp z@=HH(T|*ujczmL#Mr*?40m4{}x3at^f)FI$#6+tkN+O97lW+Lomg|u+;$$2-C9enf zwYY?ZI48#TYqZ26M$*#x+ zg;G?$NFsc2*Z{v#=*q*XAjIRnLGBnP1AQDvYjG4J(@X9yTtJIDiNcX1)=4=IQ668z zqjCg(x{>KXo=Hb2L<(g+g6lAfYz1%%SK@GbaBY+8FG!fgMJBF5exfUvSbW@rkQTWs zVvLJ*cqt?!z7X<7DML^zd8Is)om-}wl7py7ssPgJxg3;n$=L{8yfSjt73y>Pq|!)$ zN7OUuAY%~xOAiVJs$U8~_$RX9rIR?N*AND>YenM_MUW*XrR5G8B=VC5l{~_wVgnJ; zMdW@a%1OYIheJ|oxu%iKI8kHnC36sErPEuzL9Z43g(eWVI35m$R|1!0)nu^b!KtSJ zmd8t|sLDV=$>dQ>4C9nkq?2KZAKv7=0IDS-(O3j$bBEJH9vi(*f}R7FNOV`q6%vAv z?a5v#(hGe$wW_>4uE!+GgcnCHIspp_C)L{EYzF=xVA zLjGVVnx38>b~^$&xnYl;A9rDQiT8X~JI6sl0F^k7vQy@QB)^sHg{&6UPVk#tCQ*Ys z9{H?L-VG)8%&Iz}=!l5(-5 z2(M%++{uk5noXhXxk!ug7IJ_zYu*Cf;2>cDy>5U1XYagx>d=x7v?sNu4YADND}xd2 zMn_T*PoQ?99jRHl6SxM}V*s|42gIePL|XZV6BAPvtkKdcI^qcCMCE~N-oYCmf92Iz zp16MV?5@(V9g3{gW)?k|($heJd~qvUh!e?bBmCzi6^A1tQ3d%rFdl6-3*F*iHna*Q z6yrRY`j^FXzk2kwPHU2IC6h;26ZC>>*c2$Sp0w zRzQ1Vp2;gNU{C?y zW_bVFEj#yZo!eEO?!H$3iUWn4m?Rw5mkmFaEWSC z04idmu?8%k9CW$-Lbro4*Zh*wiu#V8z6C2b??1R-k^p&GC#%0&i(!Cz3qKt-6=%_ypF9XD^~ z`u)d`?Oru?SXFM==SJ_Lbu!8b)Dj~AJt#a(cLN+T2hoqWzKAJ@3m}Axd*hM6!yzuG z7`!2D7`?qtGzy{Ah}8~*!{?90+ooT9=~Z_;`ry%(6Whvi(t?0yb2wcdpU>m*@_UCJ zqo~tq=dxq~Cdyqd6>`B$q(k4A9a@VvC=t1Vjkpf*PzxF;Y2s~ixLppL-5ZVsTvnLq z1u(wj1oH<(GI{grM$FiD`%6E1=cQ|xbT?LH!D=rKKJj>PI!hpc(@#J`q8&ja5ex~U zluWH+G@9+u17uI$igIHL5S%tVBB3I|NQ}V8DV#?REfBAmkhueAVVNMaO|%b5*nZL5 z16dVai;lebo8Nx#=8dyP*XF1D98R~*?64W2D_V>+UqPl^vf^sD@McH=v{OB}xC35J z55bW%;uy?!s8#}x#ff+hmm94{`I+@x1%Z4};Cs@1V4hbg1*bn}M15{wD64JtEw8-s#I@^ss^S?A8%#S>gL?fw zoN6QT8_tKL{h$FsZrdaEOqc48+(W^HutqXfYVIQ5cgG^=CqNYn>FPZCz=@Nj6RY>UsG02+; zNlFXR5~=}b0L~A*6NpqMO!4`;oD7WrfNp?{UM`LR(`dcJc0TikBz^JiOR8K>e*k@0 z^k|C-{LMLEbOGw0#Pvd?D8ML;AKyje3kc$Ji+&^MNnOVYd)U4N{gwFGWkOUF+A9Z! zK4GFUT}534V}UojsC2~YTYo0~^UbTKX6p1VpG&Aha_JBmEb(u;FA^ zE_T75oRtFu<=~d-^c)=h2o3|*>hA=qXbjVY}AtjMSmcO)- zi+3%-R757O)`gtWJAE#K2Li|iQm5i1sobg4ok)V*N3ScxMQFy|E zUYwESnNrGG&3xg5lq4~Af!gB;S1rBw{SQwsuZ;NYRu{+vvE#7Xoz(5^PCM^Ze3&Ds zW;-<{XBrL_0XC=G%~=TEL|=NalO{_to<{;!;t~hhBLGnn(gIHcVUlhbf%!a?^bn0P ze$lfZe{lMuT0e^P_?#w#m5U9f8Nt&Q;G~gG5d=a6P@95!=y`l{L%>L$@@7{$0;eGN zaXK-th5kpUt;&FCN~(ZL{1WZRjIvg*HRsH{^S!rkSy-0_2C`Z#4mvB0WFm+|Rh2u1 z+}Y4BL=4VoR3gA6p3y7J4v!CVn&1ja$Xg$>!;j+{F#y?}Zl_Ux(UKxG61ez{*6B9F zs=UTj(0BFA-+y?|%*L?E1m1u+@c6wh8x%>f2-;J~Od9)04g+su90!A8!0`Jau@C~5 zvO2+ml6OWyv$HWRsg5D`LG%wMoE6~`9NGCm)7KRVq={qZSr-mzw`7} z%Z3+4T_!y=6o=UiWjy4yalMu|9d9IRR~vOV@sE~s`9h&!FyO<`q%b)Rj{+Ligup== zfHCHH3;>+bXf!Q7;zjSKQR-Z+iLG^39bP+QDbM6@T6N8-dycPc&+>UAS(#yfxS*k} zu{6VnbLwa`B+dbuxTqpDoVDbhnrKyu!-?h%04fwJ!sW)O;wF7!pz}Jsp#YAyhPDAE zgJyGQIdK*%^LDP;zoECjuC;4)XKhJQX>D6mMR9RSmWPj;GKoZ21E0tej|&xfOyJ<5 z)rHctqW)me9|*Xe9;elt2E7X7&>7B_6^#`XBBm3SMWg;(e#3FEm7QIq#!s6zuC=VJ zG9Jswi&u@BHnuwE<#Z4-jmQr^0vmdqblf=64DDfX`aRTozEG^7w6rikKRcEl3Wj~) zH!GG|Q!Rxi@cCltnHdqtbrVhY5~4N{jvJ6sZ&0i4g(Ii;^)?n27S*=bWjQRtieY1B z&KcWWTTzr9atlcyWU3V+$p-tXqy=L5z*y-9iLgwVk)D=aT-{ianNyIF9`iV&89^|* zA7|MY8hHBz_ZtY9YYOM4}tGM0;KfT%M#vp?gSlUp24o5p2;LN^AerxZHHCxwT zyll#_BD=;EZkjTqr=z{Iv$duuJ0mR`2?7`fDx1>^Lr18q1M?S~32%c)pt-!^^pL}# z^%mttW6^Lf{?>FW$a=yc4S(puNtTvL=qO$o*)QA+aFh7%=e$^&WVw7W2Z(Fgoc%ZhT- zp?q-Z<@N?MTep1cpOW;a*N#lchCLJ-sxdRsYG{Nzw+oEyZA`~`lsZd#-N=rAf9fq3 z21@FaCoh}QI{T+C*n42p!q!9$qT+O4TOhZzFt?;MmloyG|Fha=Z9MqMUnJ@6=imJ7 zUoVYwc|yTZ(C;=UXBg*%=2Lo|r=+?zVORyccea2_Nnbc&=IE|c9ckl9Cr(H!(7T9*3KNS)WqeLv*zzPC@*6voovgB zn=yaNsI1J57Iu%)vBE_-DVC`#TFQv@@%=}poH;ft^`gXZ%GjJ>U1rwUY8Q;Q7xpys z3W{jH-3O8#B6jDGpE|ZOz@~qCQiS5V^(v}7FHA@t9s<=dvtf9V%^xDpu{Dl~bMNxW z+59(ou@%=0-QJ*Kvr__AZKvNIrE@i=nRMRo%j&p8b zQ?#lKrrak_kh_;r^bolxF-=T{=xU$2V$H;wh%r=Fl5Ah(nHOP}+!Q%-?qa^WZ-{7P z4doYysC#9YbUn*Q6KxV?x8U4y;#!@D3!|)MJC=9onbA==tWECSh%MxlK0x-q{=K6i z-9_V>?td|et#_2|uvt?(DxG+2`Psv}eUs&Ht`iQ&a zh{Els5fgC19Eb&Sv(zfl7;3jXVp!cK4;NNeE?w0@uP~N?FJWw`xSbGcl6^sCaVlLw zZeR6T1sU}*l7yih?HtCRIuul zW=S94gWj53T~mx>J9#$hhB}og7!8`CG{8I; zoLMSvC zg3t=%Tw1@&5pbJ9enTY3BP<$CzH?=q(?5FbcfY){4&X!WvLG>M$j0qbhM1H>aI2DA zfP6J}&S92wM>blu+VoMAE7JLCK+a2+j$QtGyWyIt zx$(lBvHPBUU{M*Tj30I=fly~={Ox%PC}(~bsFXH8-d z@P~57&TWYVI!@d&t{}tZ_9mdG701F{z$+Q5BRQp6RKLurRcKJ>tdC$fuX@zM^=;!9 z)R)cdpT2ox4Uem4o}#EDg)HQ&N^n2*1^w{l?S4CItW6KvFmJFzD~HzWE6nm{jp)s{ z1=C#awoU7Yw~Q;%2g_s5U>cHVlIexelm$(wn7kTFIjhr)gz1m!jjQGkcNr=Vy!OoQ zo$vkgvVtNCW>sCyiyBs8G+64wZNjoup*+@rvMvxw!vh?3 z3oq_(YiMg}DT!o7ees&S<`UGXDl;gzq&ilRl~)9K=7caYmV_O41I=IJeZ2=>yF81R z8m%4IohJ`4wz}HFhKVb0d+T#DI|RpLFuWx;8|~>{OsB#nS&8OA-~h#t!k8lhE_>Vvkd))9?m&0}MDu9>}hZcVz^Yz1^pWaRQr7dLCC;(FBR*-2m-qnk>f zkv>`rqYT2`e$B=C$bdx(dp0la?(MC~jd?W8J%8VntUNz!Skn$Z!AR(cT4?sm%;k#~ z<(UUaSU&rtEV#1-v&L+`;_zGyZIRmIaHzAdM8`bs4e3qWZ@g-4t%li3apXLlN&3Ua zQCe2GXIBF~maM<#=^YCXZNK^L@7%Ft+@y_)+M=^ot(;p){STJ6n@&m6&yJ3amM`A8 z{qW{Vc}8c@V3~62)XK)#)N4kjs4UdhrQ=sED9NfCx$WVnFPk*?%17V+&(A*p%kQ7w zd&NUHY@8f*l(g25Jo^57*KD}%-pePXnemE8(0AtjkKB0rkt5gMvSrogdSr7G>}22g z-t|}Q7^tu+gAVhSR8`M?^2aZpJ~l3MEvM-NbLSmA^?O0yKYjk(mDij&aOloQo;cN? zF>?3rr8^$~>0c4{$N$FV$5-8O=c8}E^WA5je)~6{{MVP?|Ne(B9lGR(C*S$>?~?TV z#ua@<7E1(GHxw+nSa=$0l5Gv4~IUwrejF|8{^tN-MEnCAHIw5>nEs}FL2did$z|NX;PPv3s> z_(R|S&iz*(Juoo?L$JY@5z_lA>zXFapR?-r+m>DS<}0^fd*r6aZ=M-*xD3WX#in;D z^(R7QrE(aXarpYnhT+##T31q}Vzq~EI<{l}7_ygW=9)!t^Ea=o(zUP2q{t9M;(Eio zk17^2(FWe8zP-l}Ze71)=bWB#?fF7Q;StYq(bkS!x^sR#DxE#q#hjCmze1QK=`f)< z%M~BLXJ=dM>g`LXLg>iOoDonlS0nmf#>isDlYVV2<0@012YYX{l-8P=yQ_D^Dl$qh zNzW`!_M1675)6cVy*U`mofR^u!Y$k2XQ-{JYHo#8@C+N(O^nC_$>j9-m7Fy(RwSZV zPnv!4;phHa`uMfykC0uK6zAP^lFCq@G2p;!@|8EAdi4IKjBU8{#!Ih$J4)RtP@15CEvFP%HhmJq^`p29;Up{p6OD~_eI603}vNUXX7_uiJl@)Ku-75~v z>22Yh#dF}7AKW_fH)$;qqVc>|eP0;pc8YdhPKYJGRW~U$bk+_UrDt z=FrVgKljiB&%gTIV-KEw`L&myJpIC3f-Wh5{`Sn>Hy^xsSs&*IrHs_pLjSQG{%x-k zr2MwT)Zvk{QX?uMEa#GwcRv2|FH;Jk+t+rlzv|9wx9vRj^oeaJ-u>`Duu^oU`_~)K z9vo4GS*J3qOJs?g=*(V)TkSUN-Lf&uWzsGLq_RC8Cv~Nj&R-+UBI7E?VjrDvyWRj{kEFZn^PAymJEm#B^ge4V{xNUB7gM*;tSD zjB`~IG0t+P`VsJJJO1oF`wrdm$ZOxZao@qcdoJI*xTmWO)@)ZFSU*XIE~o5&$xOmxQ>@}Lu&?ZLYWKJ7@B|VaWsS!&cI+~T$JTGmcXxM{?z1?)jl_5we#xCZH zOWnfWq--T`FKnb~HTII$)-kh}tet_8Ig%f6*R0sN>*hB<{_d3vhv$trcFQ6+^Gxln zY#upz%c0$ute)7>6gTUHt#itQoacv<^n}7a)EZ)?)b7XqsGmzqC=Tr-qb9^>VuIH> z53UfYdgL1U@NJp*+DL8lHU8yvp4QLa@DkqFlcSanYN(Z*1Z1W>Ek;_-u!2DzrTF&&I=En ze&opqE*Zc0=&qjfe3AZ)YspU$b(ievjtI4vw-vKc8>&`)`lC_`HfnWdOG25IZ)g>1 z+-*V!4S0#W{!DLlW&RJ_ST*dN!@;1C)u(TwFCj?38D0tMExjksJgEmdUNaTxuKFS-(phfxE*{h`BAJWo; zXz{ncEsJ4#*Ei;hn>G~(o7#*0j6SU(S5R3rar^!yZ4C{WN?B8`iY`d%gpR&wO4>lC2%r8y00Xto;Tl)1-@510 zb!`L4lp|M##~=48f%>daPHfGLd9bHcJlVy@BfTGN#@TwpZ@%B6v@>u~i;<%+zWI_r z$(P?elKPaq_s+iQ$&(A(dKXXRO_>Zx-qrk^6Y9-O*~?NK!KMJEi|H+9D$w9}V=~K; zE|Pv!#i>QY;omxZsLjLv{*;Ik8@>6aqx0K>tn8BAljHI66YEEfOg8^n*<8snNl$XN zK`2;CFUwAftFcq(!|dFnybTT|QSo2r9fW_WGPy9wMi%{Xm+;Jo4Ng zdoPG!Nny(4pht?|L0w(<2~fX1F-%EkH8Tyi3S%>Df?+f!yNg%U;P$~t9zIColw)mw zlPXxjC?%y4G^6tf1~@ci;#p@(WSQ_g$IWssqa^9g4GLqUm27$)KCqVMr<}+YkfoCW zdm~IrIqGr$lB~?nsSKPDp!f<2=kpCL2_LYfN>=QR7$7vy)LiX*{vasz;nA$rjBq@W zdEf>+jZO4N>?%S~N~v>beXZw8o?1aeM=Fp<$_^kLS&?K?1SOjRa}+k4cJ0;L+eqU-F@oNa%_O5gReJt)kXnZ_9luh6k;R&WPrWc@DbAB?Sj zk|GHt{HoS*+WdQ7SAb;;Rg99usr%m|P0&2TF^PBofbD_QwiNMDUf- zfZ><#zgB|Ww@LrO*pg?IXZ}m4D+r9d^Z|rYA}S58Jma@;)h`ItWrg;sPyS$iBEVuZ zkV2uiYoKW+d~svXE#zxydf3Xu@n!;-sBhtu(oYz>^zGE>Xfbq)q>pGyJfGD?lX0;6 zxBYhn>aG#-{`($TnGDgG)LOg6KyS7998d|-2Qx#w5cvZVd}E^oVcq%1610MAKO!IU z2DkIk@J!Lg!;!%N=FlWF%DtR0Jv@r(m%RMW6>?koEe5OIx6qLC%6uR1$0Xopdc@kNu55&_&ob#+WmJ4HCipCM7-R7IiZhov)WNeg9*x?@NuW zvp8(njHE-qN%aN|Y(m4rx3hd(I-hhQiYRsrH>`X53raJ9haf`v!ItMDTdL%sgg+rl zosKZJ^PkeY6Uj$$1_u_m@!24@VJcqqt9Aw>K`{rxxpZ25kB2Y)Qt2>JlG+%19YO+v zKu((ACE-cO=+WIdoxwyMOoA4|*v6kpKT)P1W^AVOThw$m8pw2Jk5(D&eBZ0c67$jo zCkqC%t(T-QV{dX;1H13pDCpyfvk_|OJ43il(1)v(M3A;>i_;8kTbpVpfh<10x(<3=q;Gg!pKT|N#?}M@O^xT@b zUE?O1Vhb}DpP+tS*h+5Y{wm`!{CJm zM^nJIIg7eWv&-p4&W}HsRtgm~uRfN^pmR#nQxhS;xx-L>jzz)?gAW{!?A$e z9w2_w;B4TM-j-HarVIDt^&s5qeCY}R&U`n0VK5=rV18yrcW;e3y=UZa0pMV9I$p0q zN@1r&yvARUpQ(#KL0Kmh?h3hgSDBTF90}8 zO4D-+E0~oOt!%j|)4#6n*ff6H+Us7FU%y-xQIUR$1w$;Vwhr@o{I1TD)&G(tXTE4i zby#7wfwlX;e=2Hh8QD8y=`vbABAzC3P()Wl;S|z0rfdmmzl*S6RI!4I(^`UG+d$;> zq&XvD)2)&uujivL;=_-EGS*d5X<^fj*Ht@*0ob4KIQ)&%j~rdq^?y&BbLMC^#;9%Z z%5Fd~W~=_=`?^`x+NnJ%t!p2Uef-doW5+gj4~TXlA97>J{pA!~dG@e|<*?#k?VQ0> zlebaZ9$$zK~D`9SXQg<3r+f0hD%h=ePJ5zEOXE+$ z)=A2g=YqjWWE?L|3x{2Pbh+{B>I(AiV{G_Em>|JDExjK-I;Z-J<=8^xkFfkEW{B~C z3xDzG=+)?QNjj#KHJkZny?W`&IcttD)=JU`KUU@b&w3-vr?((jk1tfBUEp6D3ym|t z)Y%YL+>M`nT9Wqc{~!2CKfv}J#y;J01we1`1>y`Begx z;ut%ky5i(60%9CKdPd`gMkMmLI6?~mVx6TNT@hmtn}nyk4}R~-1{P=E!~I7qH}*l_ zxf#kVvrSnzlk?!$fo&pG!Mi^vixO{a37r>wDVh6Oz=+{G8S9d^(V}aCjZ7m^x!~mn4NeTLC7&QS{Uc zjw_soWq{3P?V=!b0EK_5Bq^OuWeu&&o_9aIAOsJ&vNh< zzNGr^bNjyv(FXb?si>IV?qMykiOKWFTi`J47$ecj@(2UlZH@1&*%5~^|5C3T5H zYhRWA_;ZNcPo=v_}hG~}{!j4vCdRa9udowfF6oe)n9YMu2$3q|7aqI=q>W{1ORKv7~UhE}*b0(GtG?}DG zejU!hjLYAXKEL6r@5(cdMW7lw*-2@ps&@gEFGOh7A08>g=SLuz!5NBIa)s!^<9ZFC{xTU~puJARzEX$OVY1FOG*0M zKl5nMfBDP_MK0a==C5m=T%UryQjae zb^OYHOd-EM$7v8?IUy8T0FJ-1kr}(5sjMCruo_AmufJ5g=8ly(N_N_MybVdM$Aw0i zDHmXJvk^Hh;17RFg9PMx9PYnpcuq6h>Ncixe6CopzE z`f|g%qlWaY^H#Co>In@^NBh}?d5o>Virqvx=7s+;M3~CQS>NNUQ)Obkxv+8Cm7nY# zwy`vADT`gV+E=|`6fKZ)`JTVNG>7ZW=5s3C@6xbL$}3e-*`nv#`N?+wDa11 zH>|$;ewuFI^>N%h$NM#{f5nJjaDb=Y#jx|_8a#x0@7c>(|A~1JYZn(m!^d(D6pC>1 z)CaI6Mq}pJ=G0hNAcBvdR@A5fPtG10Z8wgRgG|>&xN=4@tkhE}Iu&4YtqCFPXqkV_A4Zk%eqCRntjI za1M=kE`x=yhJ7L3{fY}htFNIZ#)dJ7TqaH|B$X2 z!Bj_I5y8YUHk{&T@Zwxxt4r8^DevV?tWl?F?Ys9mVUL&WGVwFJM5@7FWeZq1cvWPC z^u~}2N3X}%Y#xvJ&JWkc(edxi6j=e}H5z8pS?zA0%L;4J zHjA)GSu%=_Jb*1G3roNJY8MW`;?66~HOo(f|52Oe<5Ub*r^6hi7R{TtVH(bZec^4X z*3;bIyx_67UwUDO^!YlRW?<2A+UhOB#5omLF_z<^BM>u>U29bk_=7Ux6pz9Es+?&u zb6KP5b`v+^oi%^c44epl{7;{iv$>Pl!v6%V#_^NORBpSIs$nuE&hB?Kk53bl%D~c3 zo}^o?*=+MVx$&mKff2EFFjJug^MuH-vPxqs$g}FaIPrny&)z~thi-ptZP>G+(miR% zk^5J%clW}2SbuM8b=!;$yLN0^fAQMYizhb2zHfVPSmbAAkvs1)IHSc4o!!G~OY*bQ zT-0p`?JQ1@FA(sT$6&Zet26j=vs}y-V%oY}ANmi#8tbZTXKYL31e`esm#Jgz2M_FB zF=6rk8}GXH#0@7-UVrK8B}@Ao>&6lPGZpNdh3Fcnnx3@MmUQ8017XFv0ahrW=|2v4gTShdH~ZhKA0@f+@2{4sUI2YHqBp zEU&1lElnSbBf|7HC+ug09)2d^bo;z6r_<^3r)4HgnSWJCJf6{FwV2JO>=4Y(=nZ;L zPOi@2H(BoDu;n(I0ujE zoodRG+re&bLeYwGI-_sl6IU+Uwyr`C3oyp$qt7MHw}uK;1I{!0N|-2;5w?3^&nP26 zR*g(XPc|fusf?^Ow?BC^!k?9-BM-1+(ug~{Jr15Lw>?vk4V>b zP-L1UkVhWp3}lSV3soqXpPd(vFSu)OyhU2#VGSyeg{ds&;6pEDI{opcVI58Nb=A#L z=XvqH%9K{ux9jAI9b51H)gOQQ^wkR+eJuDLSTdKS%4{uL2Se3Vv%7c`@;d76q98-X z%>ca{|I%2T8Dr=#$+G#=BmThsuVfg#nI-l073CGBCBXW>kmj7hra;rOo4)nlJ5QYc z5(Rv8YGu9OoKAX`6IlD(XC0dV-m(?h>Q5bJy#_a^@`_gMT68l?LHnjs{WXT&} zUXg#>w=)eIjWrO7Mgks188~O*|399N^rmG;PF%5f+nyi3`GdC}KR6}NY4B4UO42+e z+bP{bi6!Y9m(MMi)m6o$5)5(k$JEEUM6CtXFAVI&{L^!dPAsZ$$a$s`e){ny!HhxMrR|$0?}*~ z6?d;B{kDP5O(#daESy^w)#i;nfU_V!*hroqPGt|$FYFGrKKH}r`(H@YYqX{LLWt?j ziBX)&`>IZ-^>!~9;c?p$%kyo8A&qzCwpLE~ikEMMuvP8l)t8_V;;32oi_xhD+)!M=d z9=7b%lspHZ|3H;xZGJlk5d$=kTiaIM^y9~Ia~{sxFlBxRtS;U(lX+edC5kIfErg9k zmXC9Z|MLX%cuOWlM=a}{&I9?;bC=E;i}EVg!pzzYKic5W$ee(K)frp&+dMC<^Oq_Y%>}EKbI}>7xWsKSa(|}9V9}=)XIG8IOt}SMPHFwD0pY;5k^Tf!Ph z1CXA&`+GlyX86Zt6cqbq50l<#{%TpV?P$ncX*S$=E}~flFf5 z7c={fxWOURZ+`okYGzmuE8_os>iVp~qe6{MX_v;=%K9`IkSq73?)e`u#;$ToMbBu#o_VTy(~|@2emF3yJ*NLKdDl zJ1*xRpx^RshqiSGEZdgQ!xxgo=iM{zz(ddX{=tt)g&F6Gb&*C2AG}j7zF`vtz`i$F z^K&Bgi_+m|{dcNnb2+vD`+tGL2Uff7Rc*#Z$0abK)es6%rm!l78f8)4i;+2@l!b-)C(4 z(K}wa#>KX56q={71lmFAU5AQa3ToupSSPrszP?-t~WQoW{T|3+rKQy(B$~ zGm=M8S>S0ubb2+c(-v=eUwZf0u~+A8-_-WOLWI0a>B#+sM}*oKj@4qCUgoHEer3z{ za#H2T>~g?hc8s!q`_P(sos)4=Zs_Qj?_+2}lU}^$hY!r`B2=Yk1B>07i)2=g6n-r5tt zJbmYH9(xR)`3oNVVMT!rcAC;Ucw?$F+q$wkJsukfdi}TGpkvHmVc@B1!DqK3q<=%k0_eOCshBSU9oL#Q{RuKLdcKyYOeS*} zhv{Bnpi*hg%5&GF)gCli*N&A9nRw)61ZR~5NwFVx?~E#n#aQR4SkC-DPY`Q!&tuRr z-Fy)>JOTn4lq9Wr-XT0Xi&wSdY0yq(v#zDah-L8p>@4Uz{rDkHI@R=T3rSajJW@jj zxjJ*L24BzZk2Q#1*|NGntjk5)M%mcih?IR}J!2mbu5>gz)Uo0gc*PON@N?@z5eOr?Dd?UW=k>?*7} ze@ugNOA~Qccg)WfJqkQ7{i>Q);-+7u78Xs^Q2IeTSYeqzlBZgB?@wS~OT}V!T~liy;xo3?Va;R39JVS_ zU)_hPbrV*`BdvNYHWzKZvg?vLG)7~p0`VS642{_p)paE~d6g5^Ph)KM)hmYgUC(0; z`qIX&x+0kiI+XNUI?I~0dTFyspF3mkM$SKsP222m2|XhHV5Y^cN*_aGkwaz3IyhF$ z#I;PTwz!u3>PF;|h2^h8n?Jj}sj{-z6CGb{2<5IDG$fuN%9p;&=Vx38EpB3QdSU;x zVoVuSjY}6nbK%Z++{4l@e)QpODr5cVsZ1L+MPucw7FXsk-q}XVjTT+OSr zGTms)E-lE6qdNpcp7WKf#=~!uJp3JT$R4Jyo-?;u9@MhXLc%R=!5nwq#`~L#rmU=D zxmm2Zq9lVWVQd(tDKjxA;|6pCpx zppu&B>D4tpAHjhMc>~F!d85r9m3h>Wojs+SPJMjU>WeRq*D_Xo`&eIyCKjxO<{elM z$U|>&RLtuL`TgE#W_CeAK?VhHpTlNIqv&q1POVn2M8 zH@Y3H0AakXfla1B5h9F_%Ed$t+TfNxAeu4#eEe{e`yGVCb_zcoGkZsaX-kb z!G=|c_w0Ni>{+;YcDk0)sM4}?PiLlT$2-va9I$?UUDvFGYj-uL6??x$)$f;9mXANo z*w3Y_@N~n^C*OJSIYQ40KK^V?h9!&M1+Z+7b<{Ha_Tqo=&T6>>5mYa?@C%${X@XoHScHd z&n_QRPRSPH`N*UxxD(n3`3=f>6zz8eRtODk1%97SRkU*Z@>49vf za<+g<%mE?G%5Gd`u977G&xuZ~`{|D{X$r1MXK86#qZ3Q(+)YAku>y>n!MO&!_MsGJ z42^kgSlm{`>ZWwo^sb`1!aXEO;3#%%eIc$*!$@ga`lz&qRg`XSgayXe*fkG9+4<{z z&>e3<*z!drZ7uVp|0H8Pmp~9|_$Uj{K{-o{^dp{N3fL%Oj1^=QXos(wJ$m|RuEj7F zhRg_r!0Ru@5v_wkfo7qrUSsUjt18lU%^yj-$3k)2FExy0FFb=Ooyx5yMQ*LM6svlZ zF75KEY(hWhg?#mk$}uXN$9|eGb*Qx^b#9HPd-{wrGb_zfTSHha%&gu18dN;Y7OPo4 zdZO#ptF!Nd-eHKv$9{Ym@(gl$L)LdU|8Utazq9Me?fWa3cC|FB{d%c)=EC;e8o>`= z9a2h6jnM$fItlv{m}-8t$r!3E<65aR9F(Ld)!v0~J~O%|VV-r5rcuNITd+9o)#vHf#=fJZKb=$0(%s>y!p!}9MB>3-Rk6z2+F=vNPv0bs)5ab9KYA$GdTc4rMN`^a z8?f1;-R=A_xJjBF>6|^bb`%uIJptyfYS6Ndk2Zz9H+-+hsjV#U?n1mCD=s;eD}2z? zSQ%5jc?(ZG=!>JGv9_kUtEDb}jB2~TIH_m9+=6A54I@ShmD!nBJ!bg~0%9q*W&VB8 zh(v;Dds}63L9W{u*)Q#1z}}El+pZrurGDdkUw&&}9du;OGi-L|)~GUi=8a{u@19a( znmaza*?Yvy>$hP7v zH%8j{38|a02Y-cJu6p+9mcyHeVFY)R_{IdsXmsR_fk?RPO_R^r_%80)$jqv$^s+fO z0wW|yy(F!Qhs%o7{noaEFpD*f>%Z>gF!YQy2p+sssq?y9;@QL#&3hh(G=K0`%pw=P z_oq8azFUi97|@}Ip@G8DVKZE1dtYURlWWOla#2V2D38WDYB3D~+55gNNiEDf_o5=J zr*4=}6`s>Qb2Q+FceeBCH{uM^We#hM=Mt`tMQFD88z+N#hc3Q-4N(NIf*=d%1&>C} zGOeZSTTOFsW$6bu#aMRj-sMLtN-J#Lb72?a37EIx@86ip<_qPwV9QW%V<$wND=wN`?oFe_n`YXE|^j0Eqwcj zw~dA99Cs0mkY%Oof35OkECAoRf_Y3EoVmCI53Y!f)VcF<-twGSF;!~BaI}Z?3)~&< zxaR@y`WeMg6fh_TveUw`f|}Z@j&-X;5S$Ie3$D7Pwc;fyvjM@=E@BnZU*GL&{cP?M zQaMVEEG=X(2QR%C9mea>1>{)_Wi=>g%JKz^p)qJjVb;?sEkxqR=pE9;9Y-5sZ-i-r zg(08XS5O#kUa_zXLlUc*-_fvX=|$ruUlU&64DV-E(lKR@GGFo~b&T3^U zwcd##*j``lV8#~6AV*Gl?qp0R;p|py15kS_@%p^-*oqQwcfe)KDLOVay~E0GyB-?1 zKG5L5_(oW8L2vxVovW8umK4TNSLTi!%xQLN!JO5&4BlD5m)L~{XUmR*%W|qC9>3XT zEqV7^Y~+AxHf2xZ@XhcmZKU_y|M1mnDVz1*H^K$dTqQ-jg{7o%TFJ}gynFX zUq!$)E0)v}e^&Dzgqy)Rlg+BZD^EC+CnJbPEh^0qTWqG`56*`d`jW#QNedU|_FVSx zk-AJb;??KrSbCLK-PejPTxqsVgx)vZ@w9a1a2UE}(|>nZu#|213d+@2TYO&ns>S8A zVSrVsRcdR1WVvFJB&RSTGIL_jHcXU_Dc;Ik_=Ea~G#y3mSlJ-t!fF!#n z6pX?Il!~deRf{fL-;JI$c&E`@oGz=L6akfq1|00#us2R9!{B0K73&(pvt|~LSUsUQ z6wE2{VPK*XeCc^^mhoD?51TgU@ukKRnzcmd!Lqp|O)m}>*5qN=l`cGG_kpzyW#>gN zjlpU*S?w0+GHSiq5r7y}yDQss?M&rk)}HZ`@^|bS+mmZFdDH5Zisw0kw#b~T!AI4L zr_#!zuIC=<6AQcX1+h}@$Z8!{l#yFC^WgSL4OP8R1_tjyFIOPs##Sahwx|XCE{nlv z@q_{vUt!FX=czievS$34ILK)#nbRTmb=iiGsLSg=GN-<~cM{rN8*Jb3^qbOA#@>F8 z71AWvV@oTot?pW|b=}0~j&^zpFnHIuYJ6k1-Dq=IO(u(#?*+u_g2rfRY4?;wLKTmY z3hk{O-!pvGm6J(xz?g95tYG@~>8kX~CalS6%;8bD+;#mclbGZ8>sXs4X;@|;ExWX$ ztG~Z{j{sH_?TuxYevX43d) zFPj_>V5M1Tb)M)Q>v(+k)h9_VVXJpWWgU82-mqNoPvZ|as9D9n*EiZ$fWO$LMqF%_ z_pk8Td!6eJ^Mr%Fm?=T|)V{WmMrF{0B6^L^s8VGu>j6Aj)ztdE4X-ZaIXC}oS!PCk zK_nxqebLHVrmbHcga4$zpJHC)K`gw`avfV!#IhF`R{vx;bM#by^J<=OuooIyFw6$L zcD&Y6F`W_gGMUug(cNh_y@8|{)0({LlP)htG_`NcPt3Z~zW(7=#S8AZYfBC*KR~L_ zQ+TVZW*sjh4sfNt{pgzOE~#4b0Nw#iS@0<3KX`|tv94tSbEoI!WM*b(XM{tMXw)Aq zs2>%N3)AHWeS9UCj?Tb>ccI4SPM9~`JN@Xmyd^F68#3rE_uce*Aza~QnmfS>(v{0D zV(ft_%sggE8qLfH?|iYm{OpKJqmHyr?(3hsc?fHf{IYc(Z59sjc*8FPWSNGn+529A`Qv8Ci3B?ZQz;*8caWr#ODEVPlx_=k1wY zbE*apiKpnCI&sd1y}P$=KXAq2g9i>Dx%{%r5AE5yeb4r7hpt%%EmmbT=nPs{HK3jOo*-j_)4U+SE2~!KQ=P-+X9B%p5LkoIa=C5)3*^Midoh zn>AajS$bMYW9!W64L&0}%_*sU#XhVpZap<-dbHGY@t-mCzUQ)0YG!^9>IC)!oUhYq z)CrA}1QeVpD_+r9AJ1v;n>%~z$l+r;!s|=l0o{7Tp46-deJ^xF8XA$*7OC!9cTp|L5OQjO z`?d7c$)!c!^>^3`;tu<&g>J2bMaIP)#+u0u?xt+pE$y9`S7o{9`dB>7s-mtHh0*a% zU0T;?OG(QgwA0geZIjjEuwmdf8ZAz{Uai-$;Hq0zK@H^2my?bWT9OVdoR!6T?-*ZD z8eq#dV2+}%8c|v{>EfO1X3v?``Si4lmshsMW)v*E`|7??zRaDOtZ(}Csxhw*8j?t$ zQmZlWSlw=DIz|Y3rpNYc`<(kPD-6Z+eXiX4hVsH}zX1wzpBn2rIS<}lkfk?F?DzPX z&RxIl=~Lh$s=3WT(oWdyh-)pEbNJ){598qKzux^j#(=~vS~khq$0c7q}}ku zXa9wl(4)g_7w};wwD2wen4e$G(jVKn=Zf>jrNyTwiUT@*bmE4sum9jj&)t2)W!sND z{>>Y&+_h-pM^Et>`Wdf7SovqP1Xw#{`J;UArpI&a!TukAL=kc%DTbfGZ!Tk#Z^L5L z`13R^Yk>Z2v6vHI+)&;IM?vpP(uwM9yFPRBiQ8{~`?EK0T)VCi6ON|JQ2J%_vAnXR zAz;iN!Senpy(b;fTZ(%lS^vO@&8A)_eH%ZSFMMiYE7#%U#QNSbl;xnE&E@j?y-tVQ z>v4O$9v41q(l~Uwv68e1dMm5nx_|QSAN}OM-Ak`Leq`x{31dnP<7R4Dc3-C{)801{ zv$JuZNq1CrqUYX3xQ@Om{p_8$W>$Z>l37npV$~;T^W&gFA%Vl|B{WWl4YJDbb$dK+ zx6f+chL?M_9nDjZ-94}C$fBaW#`@h$7ET;Fe)_12%5fQy^pZ}MH$Hn>>1)#5`M;K4 z{l)~Fwd#gEmXDWTR}+lSob0IdX7jEuq+i~6^ZDR1S*;Kd#Plky5wjwE6l1iTtanKg z$Yq;!`O1-VASU5<%+0ATtL~oIJE}7y2k(xuS#(TC=A+VTUF_zcUSFX0b*wH6P5J0U zX(bymYa$v``h>Apq^A$Z&l*m?BF;oQMYNjK+N08)7-J$06Cx}Jnju`@5;;4o#}L&p zcbX-TpBCA3b#H<2x7S0vs6Dvo!<+DIUBluMw2{Z3nmeaXPB-*3M1dg+3{hZ+0z(uS zqQDRZhA1#ZfguVEQDBGyLlhXIzz_w7C@@5UAqospV2A=k6d0nw5Cw)PFhqeN3Jg(T zhywp_P(UTN-z#8M`1D0~{9AG5A>~({h#_y?m%|i)<(Is1KqPh@;Xl3cD4&GGKH`C~QlD{(iO9i}8V9fZH8_8g z&N{_50CEPM&Y&j`0x6>6oKX!vwcx@0Vi21^RC3ak0G(EELNUlegY%4aID6Bq$A)mt z|JT@efY()=Y0vGw>h^opuBs)gS+ZrzmgFuM+=Rtq`@YegiXk@n@zIW|NQU!%{f;xyZ`h21HL+E&Xjk)Hs8#dGlLy2 zhvc^kmW*a)s7`Kmi}ag*n*(dfZBh+Da+^$n%5;PasBRaF1wcW}zh=M_edWUX9$tBf z3n}f&p>eyhZ}8B-CIgF=&#KAel5{$r3x%96z##8g4C#XM1xzU~N@5EK(Fiq(8euS& zxw$CUVgTv~VJ$kJmJVsKT!RUR1ke~uI1u#mB{z!(rOhC8DK(LFBw&jgI@6a+)PtDm-qyUZ4Q^)?Z%SG65>+P|IZ7n z6qX?5H%TtV4bAmGBIF?{=;kA*hjK5pK#{s}h&PKooks+V?QFkHfd3u0MU)|g6}1|q zpQJG5fvRRzW^xKdcl!cjWOP7`N{u!v)Kn8dgCH46 z!eDKjTSEjc&8L#q5Qy9e$e{(G3R4ax)@q@y$t+syLP@T){zwX89V^!qhutWx&x1L@ z3j5Q_5&I=HpjNYS!e77@7L;ItTr3F9a@mJ42W64(mNu7)Hi4f;qte4cCJc!pErd6s zlazaGd`pY&G3yhMs0;Wlp6T?6(ADX7vKe2JHleW4#i|M5V09C*+4{f|jbX}+} z2wF&egqk8rDKR<;gpwWzy%C}7qu`mcr>pzle|0U8N7(hDDWK@Uf;Fk)3s7Dm^O}Hz z^@ELGbU=0|$Yyr8Y?~<{PA=+`o>3QUQeXcmrJ+%?Y9<(MiZm@uK-%c|ye@5!z$bQ( zlGcq_0H=izI_Ovsg{~16`-0Y1ln6U+AYNzzp__q?0|*z2A~v*II%+L+TKkzn`geV+ z%M|Lfbn#D^8MwepST~6!{ZPG`McM+p2k|5yi9r7=L1B89CO`PAQ_L-MMGge)CH?ean?pt&?A*JkEIZnI5|u!`9$ueA&A!^X1Qr_A;8Ad{4Q#Q z#>t)&6~Xxng+toTaL?``t|Y1(8jxJfi>Nf5CQ_XEh=TQ^nL+>)8F6X#V0)#Df$b1t zLKjZ!v~@eQZwYar^bS!-Db?rq|1bV4N~WM_#iU%A{1N_Xj|yd*RJLT2`UN9n!`Gr; zvfAJZHAVA5Czn2w=s_#Lm=Rr>$q;SvECXTIVE!Qpp^=C@pt50Tm*zS5z2LTBFe>w?TbvpQJn~1)PTjmI{=*na0GcJwHesOc!cqc1ncQ zA}ryR$h}M319au!HJxDH&)7XavkhjG2~9icQjBgN8zooz7HM!HzR6&&(v>EL<`{m_ z0@HWqaDZ+XgH8lKTIPo(>fT3i)2QimL@F6XkU=R}W4Zt&3vuaSH5TQ9Il6Q{0jnro zlq)USESAD+Wnd!H2vU~dk3x80S3xwohHfDnQ-H2A5TZAvxg>2XaSu*AoL*K&B^T-f zrgk@kUchRK396Jt9BJrlDH1iKrNmwWM=+*#via;E`PU2}%peo+0F@3>ge4=800{i6 zBZmBf8|HITScf59H?pD66Y5(}k$fmKM4!nvi;wiHU=)GRtjz&-lXkik>o1EdiIQl- z;CM&c4)&wvCB#GXfp(|2MrdRL6tZHZ$?wQfGv$jSM-9R+%LH1pMSuoXK^;Ugv=F&B zS!|J8@?eW}_~e7g2}Q_(ZU?r=2^ADejgt*fr35krHIkGR7s?kcr}RTifO;)ck19$Q z>NGsMBu1G+4DwQg83dPZq6vnD>cO^|kMtTM7bUt=2!+HlJ_nAW@+ISMbMQ*qkaRR} z(k@Lb;t_fZ5bMX}nSezGx(8y`fKltZFhLrFUJ~%3BT}A4X_RR-)EA0`{a)OaZ3_Y0cItnG$A?Fjun-N z!L)?r3xv$MG(y&PRS51cO|eN^ba^G>4WUqpMv}xLuE#HvWg3+RPrpZ;H2%s{{DCVZ z6)_=`0U+){t)PjEejn{B!GhvLJSSEq|48SOHKWp(gu{$Q!SHKEK6N_=g+bq2p5ke^ z2$7VgeK2uB2TjiqXxLYkaGA4-ERN)pY|MIk5Fm`w_7WEstS=)HT+}$zW@!?Zm@ISv z0D6S-$c^3w>LTRv$T)3~SGtWFn$|}xK&%s2Pyw^|q;)2h7^!{Dq55mZ>w<>Ntuc}A zSA<8b*p2>A7|O8-sVhvTKcVGbgSLQap*f9cy1lw}>@}b@X8ornv&d_@YD_7%!RsD6 zuYqpBoFNbb!lF5xz7TcB9A9bqLc6sx3LZAXL}8M|3nPFS06@RSl)Bd#B3DD@5zj?) z1aE0z%Mux*%W){eWE^D%g!16nJnm)!W|BB2E|df%U_M}?&`6c0uo8y}3tTYCORFK` zVA%zWP*SX9(J??E7nc4=(X0q*qyql6QY07_wEOv94aqD0J-%B9vi z@E@CJFdFHE2du|Rs~_-}D0Jh%F}YxWW%>~21N_la3SvQaeF3@>^k1k9B3}fKBd&z} z>^Dd)>Gw=pv-v_nq_8B6wWF(pT!>}TBqa@K;fDmUbO`LB_Jt)#&?@co;|Ag=cCf@p zDT!qWs8UH5^+5pobf)z{OLaQr!lnim9hem)XI*+c%wm|<5{*)^NP&_qmP=zen~JW{ zk4;}-hS3i(foU-{fl(O>D3xb{(PM+5*a(7pE)D{U(HteNQg0w9?2%*%*$f1D^;;7HkQtKIMi==Ej0l^V23GpGLxBT|1A^eiHtfXJPkep<`YBDB z7&sV^@taxZAkzp}ynDCDZ#^U%L~g%?OP-rd6Sh$|pg;tq^Fjxa2mdg>fQUiRLg=$; zocO;Nq8|2xUibXu(+rU1U~(9=lyQKr z7xjoYW07MBJV`(tg3H2*h|Sh0k9YhMv;}>tg?*1oE zJ$!uc%HG;C2oXCt7!=Tp-4sQD<_*lFsGy~m*qc?NMjeJ=)Iw$=^LV5mg%oLHPu2E8 zEHG*$E`KDIgYr?{LNxEN_t@k-l`XIBn6h-!WjEY*%b|?}!?LNe04pZLMbpU&HZ};a z)=wj?WtULcRQhfvEMcIaG#x_|m-$>o3qSN%MeIY|U;^kuVnIm`m<>!xVbj-GuCl7S zwqaPy$jK|VUv$|Od)G{DuZVJ1MSLRm^~3}s$D(AX+=|>vWnx}S)>SJhGMKOi@tAg1 zn_;3AT+~G>8%{GeJ6)Pa%01LBmMm{*Y3&$2Vb=2X=k2-j@P$h!jcLgRr6-4lB{4i6 z=r6ajA#WK!zS(Ox@4WKZ4g0q&p4eHJOU9u+ns8bL*#nd)Vv1I28ID|-uPrtASxqSb zgVaf7KqLey3~fR~NwQFwjDVKo*jq3q0DqMeCLFnWBFR*~sekE?gE!v$_#@YEp5NbA zTb=>5P$U{FOD4<85^@c);k?{v*t8g>dgTJt&|sh^^n>6vngQIB1xi#Uk|clu5OI2Z ze8c4N`yz4d2^dM_^66NR?}xB=4$ZmK2R(5467}6vmR)?$%m4KL%g4`|*3(hMslC|v zq^u0Pv!v75{RG%TXc(c<1dHv^Rg*(ruRjbwfCdG@!QXv$U;o+%8tKjgzi zt{(`X3uq4d9BlmpKB6Jeez(sf@1)(~bg^UVhU;GX^>4p_=dM*VMm1CcYnI z@G!7nXB^w@0V8UnqbidJc5gJ9OxN`-yY#6~fA`+WtJhCxtIb40>0E?v5H^hwORtxR zQ6z*g@>3!r1P6g!qCJ3Jr{L3mf)t>K7FFN}+gKue!dL=qL2u;qVX#b71i4r6rcJZX z;zO})Wg%IX%oh8%-u2q|o;rTPtj?y&NGO>qBSRCZBzCo7i9|GnizaVH?o1gLqY?;& z;>-z44#`5V>j#xUG@zE*F^DOWz)lb%%r;yv_*3w|xWyx_tEeyd5)4M;I20PLiLa_} z{q-;X@|WMeKTa4`iPRFpQ}iOmQ(&7GKd*>gCl@DJa)q%#&xrJ=*HI$#AF z8`khN_o$zgdTpC5O7a;vPkDs7f?CJ5-mnDlF^&?jhfpd3`a>#-o9QWjoR7vNje;>G zF3fSj^TQ~~Mpq6y*ClG2Tc>RO#>d9r-@b8iwaXn#l83tOC{>dIFvu2dKP{U@+E6WF zne;4#Y%wO{$%+Xf#ybQSM2zt3b&&!6ej9Tod>KSsoHgTL0CQpM({yx_rX;-&0@$NI78#!3((VX%jxwbfVu-}K$x|sHCl$@K zzcAa_`HT`EiVOh*8QJFqaKJh2Eg~TJ!a}|yhx@iRX-+&AX z1i6Y67{<`hPK)#!7bxT)ABn~ymqs=?cBPN*jpRDd{fD3Y;;~I5^2u;82J*xbWf8b@ zRC|m~DK;b_i>F>_1mL5FjAjn4~9u3k2P_5Wh{u8OE2vKh-gCc101XzYFlu6M+z=m@;218{_nteBrjRo7Q%D~oCD=9Vmr z_;oKyT_E8=H0pw2gADp&Q7o);lrOybt@oe3apUCXLJR|iD6;wCjAs)tp%8CjoiK$g z(pDXafGY(5Dx1w@(n)kpWG>_jV8~+b*~XTD&LAiDa;Kq6uri$&y z#&wW?T3N7HTT-Ne7Qi0_4Ksj>o}0Mh=vCmpNZr`2?#3}R8GYr6NFtlY*4FSg;AGJ9 zlU9PuvT<7GRASP$D=(Zgy1Q>+=A=5_ zoefbvue<(X%U8`aTVlZ}f6qd(`E z0DuCaB>H_?Ie22|&Qh5y&Oj&@iw&(^Jss7xjg^&EWwAnK299wG`|vk5XII~Pw9ZA> zVjNCEH?E3=A{d5b(wKt9kh?@M;i%l&&~F7oUb{+mOrAQXqj$-9=Pa2%rY07SrSrq4 zEm*v8djHt&_QvW$E|&!sIOv!vg`t%W5vmc-m&g6I5!v$UVzDA$(a_e`P}|&4Y^X^^ z)0OFHhWm{>CcpjQ@`_?lXQ9v;J0HM)*za-sqj7XHn1SVj^1;9bFLGz{)St{Z^vzqg zZST$<8y8J%4yRjN$1YknYr^mE05WOI?MWWU6JFq+s=5?)d$ zk^l*D9l=mLn#|^7!DM-wn*dkEGOggrgfAq)K?mQz ziMxe&2Ju4M^!ejz(TT+ZcyWw@VjMCD+e%w7?l#mGY3|Hm7uh-pk1gAceM_?ynd)KL zNO^U_8C~@SkZo>?+6QG`pkYJ#(JRj1z2n?wY}wpU2LCKoH*(6#O`EoD+194iML$&P z!2Z~w)tFl-D>QYEXlt&kfb*fPSE^7qao_h)@E31hH?Jm$;XAwgSR$TJV3TizV;|zD zYQx@b*qu=}m~5(@Ffga5!j}rVLKWQu6Y};k*o4w}`0|m0M*nc-rpF&0F~27!I($!G zZ_HOdb;aUy4jtONeIi?M<~vpDPk%@j_fJk^{8ZOEtgfZCt|AwYK>zC}uDR&yS5fMF z-+ueE|9*LPECFTArs823Fmog4y_czm4%e`GSDnDADXg@sesyaS)pE^3s;s4JRCzG7 z_A}#e#(!MX=r9eOBEze^YS}^NP^$RzsngEv>OObnjKv+U2ED_1aKU&O8Wj((yJj|x zXed$LSj^=L=~%k`qEm0&fAy?dD3DYO{3@;Cd(d9@8)MWSE-RM@$ z=LD2YOLun6A1zPd)Z~;s)HJcD(%!jo*Z0x91;C3TT{v5V6yVjh-;k7N%-Go2dg7J6 zvpl{=9AhJKIeW25{l?faV`tR`_neJ07vdwv_KfOis%h$;z5kVa=hqC}!d<4L^@ZTT zzRkU7A2@#M(cMeAAH4)jYF1i^D;isi${)B%2EbZ;>_uU8!c zu=`Weo(klREy?%o-ciOCQ4`AIBm5xonTQNi}HwlJwc(V1wRUm`0_i%_g{FGp}X9@Q5 z9oN%jy9tt<+>)5I@ur7H=@Led-t#Nt^(iV>@tFUAf@f|N7<~3#QI9ek;i? zyJ6GzNmCB@<0OJh)>T0WhWF!I%_(yg8o;AP(Oy*ErCN7cHJm~On>XT4 zH>4{TBTaPn78XM z%w)SpTrW@DBkQJ39e8|hwC}<<_aFuWT7^H)_jp!7$zqiqk_z2S4b6nFuDd;>VhfjG z+w6ztz%-Di6(PGywRHidw_W>V{@!J~wr-n0GVjf{wYKfXNv*rhV(RlMP*`&HU2XLtu zNzh@r)7eD-H*TJUYo6H5kfb4`55N~sE_X5*V}t#1jEr6Jj%r+okPzb8+7F%7W&m$A zG8HvoDhP&?M|445RfU7dIuAKiS$UbWV*ARLPz*)~b@^2J!n==>quzY1Ie~)K$O&XV zL_vii;)zto+dJK*?0%Mpp4|oNV5@tVliC#U+pE(_o5ESK(WKX-9R5sIx}j)KqyJQO zL3{j6N*aWDZn7*0nk5tEfR@7hQ03@X?klExe3zh&m?*8SYT4T>?Ll%|AetDlb89Px za6plOU9ji-S#Jomkfw%|2fM3hK)n!(1=1i+9Z`XfdJI0IH3giNRR_DSx+*>Klmi!a zwTWnLI@dGO?ykUna()#em2X0?HWaPHDl&<#i9(dDie9s?V(!rzkiFc&!zOr05w_S{Ml?v!4;?) zha-aQ!P>f@voRjY$NjY-<(*Pn)&k~))WQ=Nc2g1?&;97Hzq+a$p%!u+3rKSTtr2d6bevDyC-^NPFbbO_Dq@ep7Frq zx~5^Z>;81=;SFtJRX(CNRcswu+<$yRh{7EtTihxHt*K>0R7E7)RE;LoHH>Zv)n?qb zK*Se`RyI`@t1~z|%u&+@?pHz8)0MSnY@VD?Z~5S^*^QO)cp{5-+7))sFCX4tpLMIY zxXNQj)R|UI=hU{=piRkTCp&Q_R&>>G4CC|h(+}_LpS%C9`72i}-Lq?CghtS=u6<-| zLKGjY!Q?{b7xKCjAYv@iy=TYj@jc^vd%Fw8;^M6%>&LBXSGLaT439q=0}-ztF}JfB z3hd8-9iC>Gl99bj_l4ys=ZlOd550L+QJwwHwHv0cTRRQMWHF2y*F9=j&+g54|KM}D z3JemRX>|E20^zDD7*6F{aQ4Px&@1=A&1iB1NZJNLgUDoaxUTk2UDv$vmW8nketKP1 z%(ikJL`|vrD`%cLtEDQY`fJss)%^t=vxE7F5RJrApUT{_WbK7PnPdLB@%E+ND%O_1 z^~LSe*3OaIVMhPYH*fmYXRrKOL*nnRDU1HL@x{t9V|qKeV&8%Zom&@Fsl}O* ztGBKhS&{GuSJ>2;pI^LdUW~^rIt*hKu*_K9HtK;58!qj{b>)n)t%mW-i%waf{_VFH zG|)`uht0Zj`?LkiM%Gp39ja;T6^n}XDb>4e{EY{2rsDYCLST4CQ4SSq zZCA!8Jcr~@yQVv@fA)=M=;(G%*m;xjm$Qx^x${%w!^3kH?Am-eVi8=y-mBJ(pqJxH zWyb!}Fh0J1pwNEZuCF}**n)cRs#TuE!4oGokE^=>?E!WPv5czyw}mROVykMMyJd4r zas0r=-}=}%Yt6NfzxOYn{rg}3`0Rme9{uK?h2;Y+eWM4i|LKo!y6|J;zIo;7vM^Tv z^}{$Y?2D6kJa+Oe>=~_QJ#mj=+K;w!<)%N)6)=yOtRc5{PNCb6nRB z!+7bj8)jGI%x#2+*Q|ZwwLfYCe_{OS+M8}WboBnmpL+1yD+l&pe(1odkN$#Z|IVNP zI)3YYkH7o=8?QY7-mic2@1OtRAAj-k@k?%f`UAX^FuwivU)MDU0(oeS=Vzba`@He^ z#C_M@w(~#lp5h#}`S9f|{`u=xR?b>AcOi$<`Ew2)KC%^O2}QaW|4i`UgM35g5N(xmT(|F@JyV*+2g6U*3J{p4*Q<`p)Z*9>4y`@?tiq zV#&&3mB|s^W9O~kwB??=Hy(NWwFgdI|Me%|e7`CZ^LW!8yMIIw##~mRzWU%){4V>d zVSE!^i?8*psm-?Gy+^ZtJo|5V@l;cs_0 zHcDsh@COoOmmfHG^x~bDUbc4r?C~R-VTGL#dS6uG{REm?9SUcJ6OKToEjMc5f_>|H zpvyJ8*Qw~jW3LG&xAoAz#Vz!RB={@G`P8J3 zHb6I?yz4B9GI)-yKi50Evol57GWc-n3;HHI>#fSnkM68>DSvI}2uL|vI_#p0 zk;K#8-#PB$2lA?H;MFU7nJH};v+##%to-P7BorrC!@uOE-=0n=XGMaZNvMM{F+V+p zXk~lWk=7l0VdCV@wX0U|xcY@Z8=t=W-Rt|%Q!}L*Do(!6KF{unMbG&DQuWT$XjUMb zf^IK4cISy_A2}Ch{r)=+9S4yg-M;aTH_q1i-ZA#!LJD$DLKcRWnc&fII%4P_f(o-# zy!Lj1K0bG#A?RwFyLR%J3$8tU^w=YBeIiWz{LwpKe&uUB^4b}tK`}$_iN8XoXm-7Y zvP-YEZMbs;nwV%@v-Y}c&z;jNu9#*!e}C|;^N=!EtZlsVFZV$LKpdMVu>$P!W|OYU zIVh?}Qv+w);7~y*m%9aWh=e69h z8<$lRH$8n(Sfzv~IOqNczxnfTUc9gGjAIWST0CXmRi8@1zxe96UbyG_vVN?+tf8`<+K0{`PlYc;b=AUVHoXryqOqU7ffDg{u0%ore!?Tn>{d92Quf zGOq=X2V%vZGnV0A3$C*~7HItG?Y&j@N!Q+g@hHgGWOU06F$GDfsZ+I=+SJ)rWuo(sSOv{nA~tnum3y6P{Wrv5p+l_Ay+sa*lXU zhzLhKSUjl0-#2c;T}YE&Svs} zeBRuZTlRnTvgKn7*<3W&(leni1l(mX17Ko*-j^%3l-Y`#p7~G1`0QUke&)fGcki7s zywRH|)~35APVb1L+)_Y}*|+SpZt)HWZ|N}h54~>OgGjFr;PKAM?oWH2$%}tEb9Lc{ zS641LxbxgI+Nv=*0#`H-fzH;$X=51}4v7H?qn@Ww07o;9^i`4?V$^uk3DSCaEvrKym7;XkULl$w|?jO3n#DIK5xu~T@;wX;NACo<5|#42TZV&HZDxf ztDY~6+Zc=GpKiYF0S!Zyfk2r$Y1HmWt$g~iMcrr3TLmIXu!UwGos~j+osMv8U*F6# z&)L2#P?$EnAsrui-evpl{n2l}ck7{R8mHXw*`w8}Y{iE069yJubj|f&*}8DT*b#nL zWjF^P*$QPJ8pbI&Y!dt~9*@Y&BwPdkMoh!=1}}jy_~7;hSIm@pbd2 ztiE#Z#)Tt`U9+c^=7d<44rd|h%`&RQJw9o0;AIlo;7fw1&MR<67Ss8;dIzxZD$MJ2 zWMaOkr#AJb;aa2SJo~}18HG-$;Hx$=*!D6k68* zV~MLTFUCn;t%54S7+*wHK4chOk{C1DaXkmmwHwNdcfOblsUB^-x$i-B)-8`eK`oxS zX=67!nA;wxLjSV(>Stbj`sA_gqh>DNzMIBq^t9MSP$xbtPnD`e8Xa;Itt&KxzNTBjK57|5|Ad&Sq}!hOo8x4GI1<%Rn8^GecyaUExGi8 zr|~FT1u==Of9ToH`pIYSy!6PuhgOY-|5G!j2M;GL9$lPP6Kb8^Cv>VJRE9vSDcFU_ ziOD#mjW?>uCU629g54eBxWkFp{urO3`TuFW3a1CKoX)be5B5R&N64DP5lZ33Juo^i zHg>)Hoy6RWZ$rAM$ERjq^Z2oYE5=S9o*F$~x{R=D95H_WSFXHd(wH&TNspZV&O(WG zF!)~)PzSA;d^UtHpn2!O5-SQ&I3E*st5t^ajTSzEPB=p;c}?fAbKO}0OI#xU2$lrl zn7AXxxxf2CV%FJ6s42Q(!nFBh#K0Q-U>vw+%g!T*4qniYc_KJ`LZ+;zHc=e32lM|# zP+tra9O%HSi%ER@BecSrevvzW7KaYX)pnek3|B|f$mtJhM#~#kA66aU^%3Xs?J9qh zaqh2AnltZ6?wvhjMh{svXQN82`TEoMU)ewB;)R4mr5i@2OUw{|3|APa>i+t@cDVyv z+bi%I&r~s|Cn#6`bSCL>NJFkRjPb%m7za02iEy6~1cA(k2Vz*hARlU%f6Z9(w^Q;| zRn6FQ*A1ID)MqDNvVZlmrnxJ6rw_z%z^gpXfxH<~0q3pja9o&YoU$xLBpdex9ryrT zKX%8&6Lv%^qJ9Vss?9%XeC`zQ0!3i^H1ln%UB4?Ny?t2Og|+uKZ@?XS)aWr@hW+y9 zMt9G;`pI`sT{L&+*pBJzW;H1}hFcMr5C08)mE5KsKI3Xr?3K4l7CGn+5GD$1ut^JN zC|w?~`*DeP2p=2YRd8Ssi#Gc3VUSDS{Ob4Q4t~4mjfJsBPQSk^{N89co>ywkg_|zD zYJ2yHQT>x9+f*INJskfWI9XCj9W0T-E+-Zm#$ONUdq{O2@WeBi2|K&t z;6LRJ$WkD@k=6~u_aHU}ItOEcu#mzPS`2bYK--Q4R1I)}YC<<1kKgjgOhVl;-HHpw zoDM7_!5T$27Gyvl=L^TU=u~+UnW)dp#aVdZbj>r20X1n0bCP&oyuX!?selrze;AFE zz9DGAh#UWhqHc8ZSgdq`JdvL+ur3)cuO5uS)mDtN8Q%X2G)V_%G{g^~un?L&(;*lt zLk9|4*==5U3b5!B?{c1F7-2Q_Cf?bhKwf->gxB3-9{T)#nK(&@^bP zchE{AO(=|!sT3$b;lmn}@s~%7rC1Dsussn@i_|OY%oe8$0^5P*joeScN*y^uLD}+* z+=D3GUJt9MpeG1$3UzL;kK2i0A+pO8rZ@`j9U@hqExP7fD0qQyhr#k|Sa32KCyPd0Nqk9Z zbK}iPIEXdMq7-;o3hjsgi z_|O2Ki)b|zT(b=0PfGnlV6i5AiU5}e3ye*nm%dG>cW2=1;}cKx1z{{3^T{(E1=2p0YROr&#C=}@#!KYkr04S;fcf}aEh^p zzC>6&1CC5S8@E%Rv2KVf3bE>27NFDBfi2i;7-v|p;6ETZ;h5s11W2}&WJA7Ex2hl@ zY#_gz3&jFnu+qmtiDBFqN>}gw#P|rq4+;KwIFXKG(H55JBZs+qNE1pqBWbKnWNjgI zDt1p0b_bR5sd#(75PH*L7A${IvS}9rpL&ZjLJ3Gj1~AyoSISUGtk8A_Fbt2S0uFv> z2*Z!FYX4AjMlmpPm&N@~d}t+>&%|TOmLpND*7F5^d5yT)s(aWlkVP}m z78JlXKk;A>I1I7{LkX9}%7_96$c|n%=#8YndL(}zY+S|E2yVZfBF@gvOEy!WtU%l3 zbVdt#+I_SJot=m4u_+CuWRBvSf=I){M3A@@4t^(&A%74{>=?KhdoajKJ$j!uyEh#5 z`eJ(dmnzK-d1G$$Phb!j?vc@!eWOELUYibl$M9OxgvJhQYhphglpBZD7x>$}@ l$LnToXt9+TtrfgvjfS784hB7aLEhNWm0?M-pPvt@{|imKlc)dy literal 0 HcmV?d00001 diff --git a/utilities/test_suite/rpp_test_suite_common.h b/utilities/test_suite/rpp_test_suite_common.h index dc61459ea..a242357dd 100644 --- a/utilities/test_suite/rpp_test_suite_common.h +++ b/utilities/test_suite/rpp_test_suite_common.h @@ -1205,7 +1205,7 @@ inline void compare_output(T* output, string funcName, RpptDescPtr srcDescPtr, R Rpp32u kernelSize, GradientType; get_kernel_size_and_gradient_type(additionalParam, kernelSize, GradientType); binFile += "_kernelSize" + std::to_string(kernelSize); - pln1RefStride += GradientType * dstDescPtr->strides.nStride * dstDescPtr->n; + pln1RefStride += (GradientType * dstDescPtr->strides.nStride * dstDescPtr->n); } refFile = scriptPath + "/../REFERENCE_OUTPUT/" + funcName + "/"+ binFile + ".bin"; int fileMatch = 0; From 5668ebdb6e6f2dc1f86839efcfbc7cc60ae896ba Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Wed, 31 Jul 2024 07:50:45 +0000 Subject: [PATCH 17/31] fixed pointer assignment w.r.t ifdef for AVX2 flag inside kernel --- src/modules/cpu/kernel/sobel_filter.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index 1142c60b4..7ea3e53cf 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -457,10 +457,10 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, if (combined) { -#if __AVX2__ - __m256 pFilterX[9], pFilterY[9]; filterX = sobel3x3X; filterY = sobel3x3Y; +#if __AVX2__ + __m256 pFilterX[9], pFilterY[9]; for (int i = 0; i < 9; i++) { pFilterX[i] = _mm256_set1_ps(filterX[i]); @@ -555,9 +555,9 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, } else { + filter = (!sobelType) ? sobel3x3X : sobel3x3Y; #if __AVX2__ __m256 pFilter[9]; - filter = (!sobelType) ? sobel3x3X : sobel3x3Y; for (int i = 0; i < 9; i++) pFilter[i] = _mm256_set1_ps(filter[i]); #endif @@ -627,10 +627,10 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, if (combined) { -#if __AVX2__ - __m256 pFilterX[25], pFilterY[25]; filterX = sobel5x5X; filterY = sobel5x5Y; +#if __AVX2__ + __m256 pFilterX[25], pFilterY[25]; for (int i = 0; i < 25; i++) { pFilterX[i] = _mm256_set1_ps(filterX[i]); @@ -737,9 +737,9 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, } else { + filter = (!sobelType) ? sobel5x5X : sobel5x5Y; #if __AVX2__ __m256 pFilter[25]; - filter = (!sobelType) ? sobel5x5X : sobel5x5Y; for (int i = 0; i < 25; i++) pFilter[i] = _mm256_set1_ps(filter[i]); #endif @@ -813,10 +813,10 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, if (combined) { -#if __AVX2__ - __m256 pFilterX[49], pFilterY[49]; filterX = sobel7x7X; filterY = sobel7x7Y; +#if __AVX2__ + __m256 pFilterX[49], pFilterY[49]; for (int i = 0; i < 49; i++) { pFilterX[i] = _mm256_set1_ps(filterX[i]); @@ -902,9 +902,9 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, } else { + filter = (!sobelType) ? sobel7x7X : sobel7x7Y; #if __AVX2__ __m256 pFilter[49]; - filter = (!sobelType) ? sobel7x7X : sobel7x7Y; for (int i = 0; i < 49; i++) pFilter[i] = _mm256_set1_ps(filter[i]); #endif From 0d2d6d6c378f190af6ec7af9ce02fcec3c4ef391 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Wed, 31 Jul 2024 09:38:25 +0000 Subject: [PATCH 18/31] added golden output for kernelsize 5 --- .../sobel_filter_u8_Tensor_kernelSize5.bin | Bin 0 -> 410400 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 utilities/test_suite/REFERENCE_OUTPUT/sobel_filter/sobel_filter_u8_Tensor_kernelSize5.bin diff --git a/utilities/test_suite/REFERENCE_OUTPUT/sobel_filter/sobel_filter_u8_Tensor_kernelSize5.bin b/utilities/test_suite/REFERENCE_OUTPUT/sobel_filter/sobel_filter_u8_Tensor_kernelSize5.bin new file mode 100644 index 0000000000000000000000000000000000000000..c121fd054b50a065c391467a019cf19ea60c81df GIT binary patch literal 410400 zcmeFa2Y4OD^#?kt_v-4Z_uhM#Em%^f1>cmyyh3DH^p_Q}}Zan*0K zxI+d|Vm`#5a(G!JG6k^bUL1&pGW;sJV#va=96h6)%PG|eXb&S45%U#m=|!`F1r}Q> zLj&lz9F+(*=@Z9DxlM)fmy3ob9NUW{B7mh}p!&e(D`;#`OK;L4#{c0H7&*Hil#}{+ zeX$Sa5EJaUZtj`JzP@2nFSqt3+lz6i&{tJpqI-@IdxFS|j?~xT5XLoyJ?pR(TtJnM z%Quq$u)vTUKDyM3Li@X96@beruAqfD3`rDyok&>$B1Pfk-Sgg5P#)?-cgh@E_INCl9HAq`or;<`ZlV~k-NQYpJ?$Re`c zTj{V8GhBuHi3-LU;}C_X{A*`cSls>NiI;ChKSTl9p_pvZZ0JLMk^;MiNCars5LM(+ zyR-l+4rV6lz=kLw?S@@5Opx)dVP-PX!80<4wr1BRF3T0Dm?TwLIxga+S^|e0u;L=X z_VzrGJRKL2|MDgA&aedzHMF0&kgF$(9C6Jmqd*=$)X~`X?;_`nnwr|r`s2{R9?l{U z*?NZ>dJUdmVkcHy6pM()T{o%+2(jAtCx<#3+rGRkS>=LyIg_#Dq#&*cVR|T-D{Z|) z4awu+Ar60|w107jjzr;vaJRY%G69D=8r%Na;{bmr$LL^y9Bl>kQ(#1=8_H@)2^+#v z@DV(}g}-{`6!&f1aG($Ym7IjyE3pYXl!Y9Rf_rL96kzzHW7Rku3}JBH;%8Jf~g zaS5@`rQH##GPRK@1{31yZSm&}O-$rvw~oT{NP%4JgCXnFjBegXvZ<4=F&xTv)# zE)FAHLGmgg9_gV#f4qXY(o{6BA_xVEO=h?JM~mxo?%fk~o^)VT;cw`Iay(zzt-R`9 zq~mI)fH`5LX-fMJV4&xk%iyNr*2f#-WYI@6De9e+K*+@MplE0e=Sk8SrPop84~iV$)fvbH4Q}8~dws+^T7zY6feo2e?HKEz7PCIXe zrjxXr;W1~Dq9H%bsW;fb>*iCZDy5#s{_^}g{p3q=wa3jXJL%PK^UpmcO}kod#S*<8 z;NLVEZ;(00fP<+E{~(`Xy~1C?>L0k@0&o;z{h7JB(>NnZ1;-x=@D)wZ zJbqt+5gAOKV9fTAgPW<}rW2$}%rA^nA^D9EbC3R$yfaW#mjxSVB&gsB8dt4$3^qfL z5gcIswKla5HvfL~SI^SrsR~|!DqfU6%@Hod6;l2u*XaYo8m%QgC+lEf^Nx9Xlhvbh zKY9K(GR}pp0$A;^jvUYn-ZC5{5-}*L8CZ_6py}=S046suEh1F=(zt(Nu=}W#i02PHc?5+hM+5hB{u7e*kn(;v{ZqQm9lYQ zk@!e&y!0>o6e0eU9qfjTKS3lFm-D?jQ|N6rRp*NgurNhX2hc(!ue22;M6r5lwm}D2 z!|Vjnb%ph=7vNkRP8Fozu8V6joGyy??V>Q+9Tng8g^4>G@gRIGJ-?%{ALcULB zH%df1Amlhf>nxwR6s7{k@fs`rKXhEOo?Y1o!X5UTXFQq>k!h>9_wBbijj0MMPI zI9{Vvv6rIxT7nnO^21xOG=$ra59_(c5kuNomS+v{f6%zdp1^%P1uOA3GQ`tLVau=C z53_~fAr=DaYGf5)Tx$dsS)f9c!4##-Ce+fHL7A$vNDe*|Q+bdzK4?rDsa&cf2Z1{z zMgjL{g8ViZgQx{!2qn)1}-iK4UA;rdo(yKW9S#e`^@TdYz~> z^iDk7iKtAlI3KhsEAso+SyN^+VGxI9b5}Ode1QUO9Dhv*GQ^dJ38P?BF+I!ya=jLE z%BZF%&5JLzxgr@dt1CfUElj}X1p!z@`+6lZ3O4Wt zSCsl=TA4AxF5ZJ6;{D1WF^&L=im2i|A~3|Yg)xZ^M4{rgC<6nOw$ULBvq>pd6yg?V z3Rvu6J1*LZv*Wta!i=G8R_H@E?C-BxznJAFsgo3&~IiGcyE?=&@BWJyV5`pd@V+d!0?Rc)wSENKSlK>IB^fZKS%$nxi~ zk|i`Sz_Q3jFcN+%F^iB5gu-uqATA06oeu z{?r2u#nEgeiJsP+re(>25y#e!5a^OzK70G+TS=V|de!qF3U>{eryXO6f1LJq(*ne@ z3T=%hgzYNq#Q;IY@hKCJsfGBA@(UK1f3h-z+RQ*JWyiAk2W{{Ryc1u8ka$<|ad)rd z0uHXzFP;>F(yh!9XiPFV@Pmnq#w(Pw*#beoR<;OK8(k_AP(k3hP_G7J$uTe!)TeEd zVJpgkA7KTB@ifrOk)bSL& z4u_O9*Qk)9vZYK6?x+moi5wLn_G$b@M_kvkBQk%uxQbyZPUbmXD{&a23LkP+kLAR# z$ci&gQXW)c4{S*Vwp&}VJu@Lo?%`x8A@K)Q)7pC`(`nxq1_^4h1S}H8vdtJl4u0IuV)w zUk$Mo(V40PLxU-P4f>WQ-gKZerAy23(nx zDAR14osglOoDhR*2~G!b8>H-@r!jnQx504H&?;e6(*_6CgDCHlL0zEEcC@j3|A@!_ zwPNO1Pmo<&6!I5;C>=!jNZ`I^R$k5` zDtqb_1{9GJrf}qimN&Ko3bv?y$`Y2tfv8vFUJm!#>QRl3RI|V(8I~qd6&*nXENo#L zl-RI2IL!rt8MTY$m`x5AF@#$-y4|f-Jat0?(qCy?gC8}E1x`bxc)*2i(h>!~of5I^ zjz|vuWq><>&vG`yKvF(V9CcPo>EV;qEdXIz`YEd74CF}n67mZ?8kRCvTW3eMjk7Pt zx0;BCf>fxO0kZ^C3D&rnA?`;XxRue2#xUE=03rO0Ps-1E$05xTmm^_izgWKT6U)CX zLpLZR3>`b6>^M=b;fDZ>B5{K+`+g@S_Rrpy@>1=ylAIZXRlHO2M+nI;nEImPDUokT z)D%{sO*0f08Y9GEj1B70ddef%m;4geX`u%b1-##?ZIugwpY2^vJHu+DvQ!Bv&wx=b zp%X$O6`UPc3LE_O2qWonI6Mct@$<;~KW@5+DmE`fML2$KAbG=DQE2auAU>#knT>I+ z!dxX(YRouT0|V(MT7L%SJKi?b>dPlC1c%BsiXw0PK+27pJDVOg>ug!4LOjT;N(&CU zYJQGB!$*yzi?-ytpcmmp{z-?_0{|M|itAZ|v|2VpP#ttYMsdSqM+3a*&pdFzVEeVo zXnU`?L@;_j3hNCs?umlY#*U+U`3N41jrl>>MO&9+`*PAWdaj8tyG?Gn0To$e9ETO! zG~(q#XyBJ5q6q-)xJ0Kdr4ZuxT?4>-C7#N(sRNKvjNi}@f zaWN5xK}6!KlCb33JFw{>YMLkPyn?O1o2cxTO&9#JN+nM;V)Md}NTG!38It4pmZ(fr zD-}WyqEyW1d+k!UD4nLPl5sNlTjhprGl|*1)V`duEKiNE<9bEQ98?w8N=ubwtlagG zW#Zbf7rsAW+aby`$8skk#;ikYe1xwLkYW+KYM69TA+d630jf|e0m^ZY#76VwbIf_k z^1wcIGNc%;9?LLSA0ThZh_$@q&L$4xkU}d!o|Je|y>BnY$%Suw2$GWT#BZjkks}Wu zDNOvrnhqb}4=7#NM064DpbP&fJtPBmqF_|wL!+h;MzF4gxKC48iL_R=a8ATD2AAja>;L)Y7Roap<)B!;0k%O%Oz4iZ!nGvH?cYaQqDEt z3>8-+g=wXVA^u?+{krc5DCi$tjy$q@%B#-r)k^suJL86-iHgNyc{M4-c9k>VxKx1V4LKBtEK9#%bt);pF1P z-m>K5wSUUT9CYoL;;N&Y9hER4Xc0tW#8>$`RGfJ?NL1x)d}PICZfwpu!|;!Krad7-x^;!iB4)w8GD|p8BG<7IfMk02~_VPW2hW7F|kS z*dx_fXxGYQHYoATjV7iRqc5?Of<>U# z7d$DeMVAo=nK%Y@T0dae3&}vI^oTb04U&q1c~$^Vg7_x>%$AtZG;vVcgrqKakZ~Eq zR$atv9GIN|!B{V4~p_j8Dca-9(V?srqF}k0)gm9aJ}fQ?%GDX9i;na^s9! zb;=|algddnDbdXm92x83UpNGNDLu$bHkvZP9SeS9a&#>HDkFgo?$~JM0|p|qa^u56 zqcaUlO9DhCz8q16@()!N0h+mh0z6^V&4ep5AtQ?u$ex7iG9sRSD2r|o8*3VwP*<#G zhiV|cmVST~O8OaP>CB9!>|5s(mJk;nmzD%hAazn#LcGpFWNA8nHJj?lhzO^?7ot2S zqdiYYEnCUTT9jDI5)lZgK{5W%6roZ{f< z0JDtB5EU#}%5G>7MuVwTE< zTelRkDMWX$z9xwfVA4piHI#+M(NGkTlt#a24xuhCFrvB0P(T7&FW4K4_79FqFn!e< z8y-z@dy}b0x$I#ppj0K$n*b{j6=jhDu4ukyV2R8+ipTAWFz0EL5mbmZ+9K(25)_t{ zMgK0v#T6Cd<}L`K0Q9d|FYJaHJ8o}OP7a+)44p|d9aIU-Rl}~E8a}3?ARKKOsV_Ax zLcNzu7@a5LFgVxfzn<7QbwvlcqoY~clbH!~Tp;hFBn5^%J7e*rQT{ z$O4%Z5af!8(8)|JP(44uMS*(LF&PGy))mINTpVvmN@^MnP@%EuS+V*$4~)&T;Jl$rASu_wQ zVeHfGN$T;L@gX84DgnZgc^P!7QRVp&G5G~NMxpz-6Vs81=-kwZU^Dl|7eh=JeSU<+ zQ;J>DnHAKaz2l05?+C=^NlC?2L#47<#U&C7Xt2&Mjik=TEaV8|Rxo7)zQc2PNfTd= z`MI=$oCK2fh!n(}gXX~is8)}Wd0N}oJJ0`to{UPK0_?7?Mp$%*81xxWT4n~HOJIJg zjy*m<8}`zXFGu129g&ld*++!aYA%rH&f+q)uyc7sLvY=PmleB&JA}_BAUr-_8*u3+ zK@-6jk6}U4`8aWuXtT0}UBVG(6k-d`$jJx`jDV$t2;Jq93Npa->Is@;O^HcQ&!p@I zcr(*IZW>YZ3SkD6*en`I!cq#6N<9$=$7g`aCDgQ5a8$gWn*yT>%ej5jDuc!J)e#U+ zS4K=yVpw=OLX8a9OPrXjlrZX=VLTC<%ak+|Qd6>cK8Y>Oi;0fLxn~px@oAfAV)oRs7oEcZo988=2iRu1SPCB@WR!lEhtXqFT;ClahnbVa!us6{|b%?RDG z4O7+V425|Vm+-i}NjUY;80tGz^my;dMaLFiwN?lq(WuyT$}Ci(BGO9fa!Re>MHHo2 z1YiYCOmh7c#s~k603nYp@5fqn43-U}JCp<|>82oUD#>q^~O5nAlH=4&>(l#YHBQ;)M^(p0r zI(3FgwL4q|>jf_|D^5*r6pR|b5eL*G^Eo_~)MNw{h*=p`a2i@5ECm)K)cvJ9CJqaa zKwK(hJ0dQgmm$eXIs_u>;z4E=7?oE>%>hr`=pu}-%fPUB&P*~qMCIz6R}ygXx&=w3 zE|;w|X`qitqp3VBEe$zvl@tVsu%rMHkeEs%RX~U*ju#%hbPEV_$MZrmA*FA}4)td0 zP8Xb7Swvw$osZ#a(srwpNrE>IMhF^0nI9CUuIcasG;)%@CltXEaj_o6;Ym(FtAlZ5 z9_T_ynC5-59+rUYCFm0l2un#r69r%)NG-tI8X_R%SGm3 zDUPBQk(Jd5535}u0kKIbX)*8y48UAnnYiczNUzPu(OH5c?JI#+w1L((1WuOw zGA=nD6oNx^_`UPxPK{N4jO=>zDsb_|B!tsa69ZF01!f9wGEIm9Q7HxaG{T|;Csh>2 zQipS8mm|955xN606RnE{kOon?qv-^+cXR>X^8tlsDzzjA*Isgg$QP9>JJao` z&B#3Twp_Fqp`pq7NMuk#3KA9^m0DVn6vDkTAf}=`1}Z_3WsT@1@gqw~+A8iay~1Nk zV5DAGF=pNBC~3VSvDP!e5fQEkK6_VsT3Cq5dQ5#Y)(z2FiMS>(FNMVMA}~5G$`u%v zkX}@g=3%=5kp(5V8a%Pd#kB}LtEtS2iGUC+yB&m3FVA+0V(&`u8R`|Bwf+lDO!Wlw zjb>C5?v;#{u-fM8D1j{>5U^gvZ6$dKiHwa5i%Uz+$cgi?-H`a)B8)6ad4+{Ez?64X z8v|U2XW5%+Aea$=d3t4pT+{tu5N;sIIl|LDBsPV+orq0!UY$WDqpM=|>N-Bt6C4p0 zL(>noKcX=)Mn-$w$a7GDC#$F^&+Ex8F3h4csOqj`pXQLDZ`C!f`-sAH#|sFc7NmPE z<_3n+#qA0WpyeM4y3%~6+~C~0iZp#qBQ9(|rP8R4J%Px~%wD-Y?%@k>$`GN2{2q$hq+v6-Oz*-HCw6vm4usqxTHjdSfd}yhi2AP7OVX! z5rKIe>sHEFM6|~fhugfMgsg;^tjutCY>YQ!WJ5t#ikAlG+`>ZE)(1BYla8Xs1x?V& zi%Fo-H8>@Q_G`@eoSo~zZ8Wdy0{LpSj07iD)|RP%6uBa-4{V$^a>nTSk? zV9#m$$(K7v&&9P(h$evc9g<7aX_&B1j_)+0D*7yQ&4zLywg+6;TJd220_&;V#H514 z)F^L4lt`}65gBWbM3ZOaxUin4)s8Nqt`KfVx=i{jVHLVTQVP+tg5t1gnpm8TUK(XB z;=*X0iKwV@u8zo5V3^*@2*pMZwxYtZ@WWj6a(qBkRdIOIl;ycXl$FrD zWwy?t?0bRtjV!D}uR%j%T>)XVh-IDMp_qjOb)G*^30t}UmMuIOvKh*STZia z2<(w%7w6}s#U{pwL{(M9BsWbc_N1352742eER$G1^wWN189T*X!g8oHQ3-dsu-pvw z#-yb4R)D1#TULOSrplZ@!3@+`MJq)QK50eDxy;B z8plqVT?_{q={g1E&DtF7)sAK?v2V#EBf~MKpiL4}(`n4)btakc7L?)ANh;ANU_GjF z#YLHcjYbj7S;+-Ci8(p(xpg(gdFd&MG5PIv(J5sk*57em4(!Af zqR*N;AJcFT6XgkUM@2@Y&@ES_^@JiMAqPg{c}OQobB2NFR#x!1vLj>Tvoqt;(v!=Z zMwO~32Xxe zg&M{q1)rh@dfpMT=-#xVqO`c^$Y|szA;ZLlF)C2dJZQ;jz%d5|yMx0rN(z!ap%E!{ z^HxkMiHIM&{o*SwId4mIiika1KGj>dsz_7%nDv>D=(ZIXZ&FoFR$3-4kFmYzlOK%t zq1NtKFDK|+nBK#)OH1?O+~FCGi`UJpj&jv(x#W`buQ_LKZGdQe?zfJii?lr@Jlgvh z(gUe4O*MB!VP$4&P7)&0D{7~ZwYpYB`@Aqu1-nBca?45!)8bM}M@^hRxjH$1*y62Q zj=%njbzMm!{p6E-4Sx3JW_z%^ltqim!W}aTx_Cx9FC%-+JH*6YD^;dg{em9B1O$e} z=9O3EC#RJSudW|Iwmg4CXXm)i)fb+>VNQj}UNXf#3!~p}Z4FM$jVx@a=iY;-G^vGo z(Rg&%>;93&css>1XEj8K+6+i7t{7Gvn_N4*u(D}vLv?FcL)EDHyN;MMeN^o_W;SGr**2^@c?&vWpN6+Qbd;T{We+9L$Nky<;# zJg+p#9pX-}s;I0;imRy1EUN3C*IGGl-n90X(N(prV_RyoNA`?t8t2eCATXkE{NgDz zebbO#n4g@L8O_7-0rrWol$6==H#cw<`m|Y4eOXTX4L#mE4!-G5^E>VoK_(s z>qmD_wFaI8ib@1!m5u4?z$SHoDITeYaY1s*B zDUq=mm7|*{cGNU2T)%d9Q)bNY`K#w*4E2;xo}zcU{#R7k*|1ANeRIn&+|UHa=H_M= z=SN@{fM#Nk)w2DVrCWzY>x|HVsJOlW#_KAyXKqLcD!8e1l<-+uU_*3#;(RoizR*@mf? zwwWp%tBL+CF_kyi6+drc9i4Hi8@m8lFXPDxZA+SU8OgY_iqts53ZZzuVV3~fX$?xN z&JTA*#iyhsm$kRGE#AI;`GmH%@iPuv*Paj%RzUk>zN50O-wR5x7mo=Q=#k--rSwqU zlbIDmw|3SKC|J#v;(Q%RLZ(?fF_V9kkyV`@o{*EBljJU((KDiL`Gy6v4_nqbe({1# z-1>}~RMpG-|3>*Db4PpV^uvmW6?nPFWBVgCz~j?y!4V{Cu1VUE*IR1T8PaJFAgd-d zDz`Kjy90SE7gZIuFPOXPq)p9rvzL`eWEYK`KYzF*#rio)|2-DAL_{_YFl{~PRCJtrP#lcwnk5TsQlheQGb{5#%GNC^O|R*mxo*$O zvb>&k9fjo;BbM%3!jBkW9Yrm@e3mPUVI7A7icV5p$D5y%kVQXlov!Zw?iBP#(~yvMmGk8lkj8^$#_-{} z;lbg_`FZ(Gb7qgLt!!+rXkIa`tZwGy;+WL3x}HUo+w&~X9AdogD2~C~63{>Z-zp{} zRS6ZG?%<4Kv)|>WUxAp-4M!gdD6Hm)_}zn^%DTdwjKtJ}Y;Sz)ylLGHB`uu=6^j-% zwa(})jq}E3)eNs3T^Z)0j);PlqVVSGxT007Do%e#a^vFd7!jIPVwPIiw{YR6QYqL& z9@hW$Ep$4(k&{wao042yQB@u%LPpJ+IcrRA%Z!?eSqoc7c28-{aJiGRlOp0$lT&rd zGTmXh)x-1^9^$xp>n|*NNhR*Gv0H*p6`$=G3ETY#?xx8{x%gr6MX z37yOFiwfaN=B@Q!8t%+&Ly6C?uE!okWi|zW-3JBeipQ1%m`07L?biVh-3INeEiH}@wUlnX~Rb3c>==oY6>!k&EGaFIeg59 ztuyOu#?75jQaGxblzoS(Y%yY9j%|Y>Z%SHHn(vPxdPxM@W3hT{8Jh*OZv6|%Uc!~@ z&strYRFLNliAt-e%S)_Yuw{N;)X3GFrxv8v&7Cv6Xjoa2H`vv?)h6t+&RrYyrsbxk z^F#7p9-Js1dU6TQq;-EhHF=bC+e2~bShel4ZRPIxcoCFSRX?I2zOZB7yq4OYO`E%t z+}T~zM^;y7CnY7sQAaR)BBa)`toLd^I5RgcCMQpAyOKzMC6cu;K+ywl5sfh>B*baf z)~P+}itVL=*k$pSH`JDAMkW>4O5&}{n-67LYBz-IGLa*Rj1F1%S73Dv}C$i3^f1qfySFLo;W z{x)cbt=_e?G-1rrsWs*GBk+jeOhf91tg-d2O3lpDy*f_e-MxagDR@~ErHcO*)%eTpG44Q^6zw(cF!aH z9bM4x(lMPc?IU)MgLM0wbnLgZ%xSJJNH1@$&M2rXYgjltQPgcX`O?SU|NB3`dTz5d zO~i9{8`~)amosjtxzd>O&GWpm!x`Kx|F~7l{8p# z?V6p_d?&#yyv6@Ib2CS0Wn`Uo(#Xkc)=iqcVKN5aadWyxj~v-FcGk}G>Amsa-2IzB zE`V&E+=t`R%X=(6V`@q{k;pWak2@>@3n*QGAwF6`L%Zjp0!?iu;Zq4gLKE8IQ?UdwbXRKtuW z{w^ee^Z_yE-@pu?L(iTE)psji&}47xq9e8~+Ctw$_^yO$fYetvt@Dy{8F8#Bla2F1 z8`0Hpep%*ej1l|wN4`FC9zO3V^*f^R-DIPBrtg|!rJ>6*V{-SEduHcn9fkLib}Eu3 z;)u(DwJK$J^|C3(4ApDL6V=Dcl~d$BE2QpSN=N1FYp3EQV@;adC(7&_bh$)WV`2dd ziRsptbjc7~2}(NqTxE{m3aX6JWP~x04zIb1M3B|k;y3b~UAHNc3J7l?=C`-jY9cEB z=D~eWLw6xgPFn!hYKt(Wt^fQSO$WXjU}4?a@)RRVGODa25?v}n^7k_cqC(sg_^5Cp z^e5Of&rropY=>TC{EcEU2WXQeq5zU}k`#XOxoN+XW0*dZzH=nR&xpaI6?DY_Y@{y~ znZhhBplHwhmr%Mkon(UFKV4Hcq9$QNefyv$IzA7qRQ`JsPMB20Xfs`^4-tTJGmRkn zWave?3T%XUu!c!bd1D^S;0pX)WpOco%@xZbif0~IB&$UP;pn zU;m~+GDJmTbX?~k5dA;=saC@W^CN<3B)5wY7qUPOkqEL4-inn4j6U~<8pdn201;n_ zq}S8_M~I_%91~vB){$Jyu)#mbA5}r{4IL17nk84$KQ;!=HpH< z7b|_dV_FrEk5DY&S_a)%E{7!*pxLb1N>`4Nz{hDBD}PQ?*qS!rDh@_+AoUTzX;Pt#RCQK0LS5dA67#_COLK@)0xJ?|Jp z1?eupEUP9eQdBs2zx)#p8Vd$Ug?q&7a)}U&)uQKmISnl+KNeyIR&i_O1M*)G!zW0u z{ic;H%s-diK@KJrMW=HB~!wh*VtSHQ)0ReuK}UpLfh6>t$rxN=Pp zm7U~T#Veu~9V_L7AeMDuf8(JrQE=>^@Qo{$i=ffw3R*zV`}dVh5x3EM%2!ZqC~Sj< z?o%j#Cu{<#jc%!NMYTG?Tebs&bF#sPHmwx!J|EP*Pms{haPNkX zDi`#Kw7>6F;e(8k`b|_=9a`?KFnEAb9|T|#b-42lAwIsFV56Z@aVE2;uU^0I@DCOI z1u>3=>C^V4o+&>gh4Yk+6ZlpA=iG;TFp^0|ug_l~#3t&%O2Po=kP}5SwTeB?W{`$A z>Z4t7a1y~rgYP9}C6UCeJMUZF`XWS7z+Wyfx83vpjmoBRD#RmSHgE@;Dt6K^3CHex z)m$Z0jIODbE}LT#iwm5z{@ytePn$4Z#))#-LI`nC{g#jK1G)-h&8A&DF8*i~5bm(I z5gYza}y#8R@D0hn!@+LEFxg;R&aWD->v zq&0vlRiZPEE^`!IgUzUv7|qR0#Rdu>{+7!aVrn9NMUaI-!*Z9FJUldj%9=`$x;;xN zwNxq3q8$z?FMGa?IiySu6@o>wdM&<9H1^&wVn&uk*6~L<3mS;W7NiUX%pS$(OfsCZ zT}ns^Zox1$jGeIBH}Wj4E*7fskH%7?sz8mQGIkmIDG`Wr5tGS`q$N)59W!^$+^ty3 zl&v;XStMEXLKBz%3OE^q8n&ZEnv}P&4AGVXTE?D?Q4j!X%vtlI%5a`?#B~~JG%GbS zY?QPi+LJ(kCXU#)UG*yZLL|>)x6AkFlQk3Yp}ON=k#Dn*mZeoHaB!KbttspsO+}>$ z_%Q7qG*DT>_)_41w5}E*Oqn{1pqG@d7q~hKlg#_{^;R&vv@;fS#!+EMAB&8O_dt4* zg|7YQ8SVJ|+Zp0Q6_>K0;$i^?=MhK_Pzb1Npn5e=G%O{bQ6Chdxl|E!&4X1Qlc0Bn z0)&|I$HU0Zzk%<48o`OVJT8#^+h$RNio*#uMl=A^4=D2xFidr~z}V~Geu+Ur#E?Fh zTK+}@RKRw|MeuTCTv@svZgG z^3~!aFU39UlfzDAGqecBMlHjqurLmB22gdL<;Z+LoZQp5n5aCKHeQQkcTC5%S5bgI zg!KTBRi=9(u5S@sqY|il93q(da!J#-K;T~eon0l{=U4{?L##qFxo}nhVJ0#>@d9xy z3;@vgHO`aHVj!23$bU3-1Zljpdl^-}1&iq@xroF*(Xu|3d#s?PvKriobE;CumBE0( z2ya3;N`862g^=tRh`ge^djY=ZUQJ^W^gKe{y`}EiaT{6r{2R7qAO59Go=oEXmyR61 zn<6Hb+$gx>_`Y7ng$6%N>dpd(^TkIJUD;Xg_*08UQ!B2!z5^M+%ym77-;F?$y?>K$ z(d0zs*Q&zzM|3l@rtX)vQE&C|wD^ljmS~lH35kd+?L9ZiF?uE zsT?JrT#lKJ%jcwldraDlL1#Dm52G*0-=15lD0k5s+EM;0b;d1P<(jn>cWY&lJ|$T( z(%zPL(l=Xp-lZ=g!@i}`33?}wJcg8dDT%5=T2#amI?#u6h$H1MByNP70pf<;+~X;Y z``(w*r9eJnkjpKuUvd*!QC4@&MFdhjVC>w*m4Yusb?OuDe*Q)}PhF;c zEZ^OrLI8#AD8@dG(f-{S(X1J7%TG034inhU@Gyo0ms8Y8rV!HLZ!T)4s*>pi1Fo!^ z_8AlMM8MZ9qd1qo{qvPh(Wm9V&h2rTh#+V)NoyKVqDAZ^#_M!IqQDs5F4ZD%tEI8p zsFEVeHwG6XpKr;+hPBZxUc;>Ai<;RGNok><$!qP!3|2@N%P)UeNZJT$4{U&GQnnp0Zv)7Kkn;SYEltpCDyUWmH6cllm4e>uAW|3XK5qGF z?1bs~jbzeM*Bpju@as*YAi|;>UwHA?JKVyHdmZ1i8Hk+{;YFk=wjsE7}8VT`Yb1W4VDByz{~x zr(z-u_cdxxNDfC_yT!{lh@LA=gruq+5{SaOIGm|aJ}#UYZQg8EJP{KU%G|4tIc2#B ziQw-SOI|(_IcC`CsVnDU{4)=Jq5+-s@vGNfa2N)Z1mD1L!z|@^DZhl%og(2u(wgbW z58+&BP08g%L%|+Ffe6i|cH!b0foaj5nO47k&5SwN@X7y-<;|dyIA!Gc#WP0GnbJSx zqtJA;`OTN_xn<9+2A#3@n1RE8pnDev(sNXnR772dt7{s)RB{{psjTa2PT2__J4z3*)Y%^emaeMKh*t+sLh$ zO8CYp=TUp(mfcDiQxQMvD|XuqgyzCB#Y+waJ8uNM#&HH6t)l%xY|HY#pv( zKHl}(TB;|{A3uJY#7^nVB@X9L@D*Tr`|e9IF*r?D>kbZ1@wzat-g%UXRvK!{hZOrfjT zrEco5yb;Y9tAZ45&oy+SAtf0RA*JiKb>!z#Ld*?LyY_;vo*tLjuxj649`eEylGJ5B zlE#ej7hZbBx%*!D>=?j_5qn76E-ukwBldV8)i$DoRX8d$EP_W|Cg$A#@?rw=I~UBJ zHGjNWrk>6xWp|3;lstF*)LrME#JvzNKnr2*am!If9)A9rTaJ6|v+LtUbPZjh`zcnQ z99@t^ClAjp#u9<%a4Kl7)Y`NH^`e1{>07qU9$RTvh_lGXCqnL3qWHhGY_BNTe$ge{ zi4c`;&Z}t;WxeO@GdAzI?xlx&L|9wC*63A5ID&Hi!&r4;arL;$tP0(Dq*XVf`RaSQ ziPQ5-y0@ITvN}Q=3*gPmBsK03Ewwlw-LT=ppI*t`D>K`~-*gS-r}@}3PdoD`4?TNq zkSO&X36#N?oI!n|Xnc%rEGunq@62(K#FSh2E!@smaL%Urty4E_o2V`^3`2aoN`1k| z<5yp$0ix&HyZ2NPDI0TRAoJSp#|$YN=54;{)*FBJ{I#?Q=~befF@5J?b}X5d7}(mK zn%Um$po-4#tPS5u+m?=R>6uZc*_CX`8J1`($sbu!wf3?-?Ie?s=@ucG+`&3t8U{ke z$kth)L7aQ*<(8Pi4D zxT^BgYuXyecNK=kVXjlJMkI!3!j^5z7Ot60aL3ZR=(1^(c@QUD6!i^D)O5y*nzbkG zUd1JeJ*KIn3Gb%ZXgiJ}9J-M>7EF!%sJUG?R zxWCT2<)%}XQvXaZs$6~?dj4?rHYXBF>>F0(u=;XW(3stq@1D$;VPayq^)kS;-yKg{ zGbUu`u6**Vm#)6}q$0vm9C4kgC^)WNfXig`j5_>45wG5{$ilsPJ$k^Lo#RF=#Dd8+ zadF4OqcI!BnUfS5zG3l@GJFC4A1bhI@6D%-V@u^&YIElz?7HLVICV}<%H*Ga_{g=- zJV)(HMaMN=VRX!EoVVx5mYl+>9Jh$hw&{STz52miUOKtgEH6*1S+IV5onCM$F?2c< ztIOx}$CK}rj2<;*$l zjOBSGIATVrkw-m;Z~cm@@^jnvKJ)M`@5>iD6jy%gJHpnTxh5lV!jvrh)3F&QA?U(i zQs>oXotiOz+_<^Rr?!uSuKiExJj-K%GUJvSR|?lJ$p}bq-*)~sG(F0=$sconjIkhu0 zwu(-j7ByVxi5@p14k#m-=>PMh=TR{Q%5&ZApr6aKY3mpQ^&fa+_>Fq>C_UGPmy~TbgeD%8d zWQx6Z-fTZpTinM%&ycp^CsH1pqV_01edKUEn9(+9K|_@*%OUO2Ih``CM>QkD*h?&8@@+yP!27q;`py|;u97SL)X2~Mh_))oQ}e}2{BQ9>R9HO z0GMZxLtEpAN>r|AFYZejad>3he%45+gL?=;k$Ej!I*_GAb?GPw1{cmhGap)jCmt4i( z{(vZ)ZZUgOAyWH;PR5~wrM%i0H?9rtsJ|oLHRCX8`3CF|`GG;((TM=>*q?O@NSFPI z{IGO#rYv^*8cw7CD6syIiGD(?i?7(B!q{0K<0b}GgY6*n5k4dY<5|i?V?W6eL_eSr zJG8ZOd?NjR1Le*DRrfje+SWO=F_HDR@GmA`yl`2cK#0f0-|+uD5jvzLTX}n22PSSb z5uVHdJ-qqxiiw3Iup%=T>W>na35jYGfTkJLSx48?M)eRBXI$3EzhA%f`_TO|e^7=U zmk_%d*^l{v!;U@vS0u0>{try{4TUj!s>sEDq%0!OXBHg{_t4hM_#vY4p)X5Dtw1;w zj3=txY-ESBRzFTOA*lKj3gB6brk^SeWmJN4N7>;Z*fT-+v5FNH?#_oaBIV432ZK5U z%R6wM4M4y@m5P-UEY8pY1(cmQ6&dayxezwATfY6BuY3kn6&QDSjZN%N_D7(p$W|)P zg!~9G_+9!l;Lm_R1O5#7GvLpFKLh>@_%q@_%q@_%q@_%qg!(zsMY6ubLwiGZ@BBXpO>!J)PkFM?28Yk|A0~SHD$t5(ST2cD^V9uUA1vW zbyZtyHV{WiD+dz4@T92@+7A!%SKu76De(LK6y|=v%t1&@Ga-%SkIWHfTT5Ab+;MZG zZQSF0iGNf?X4}q(@OHIt9d81)We)^OOTs5UsD!G@9A7p;q2AM}-tDWxaxiT0{xzem zPb}awo3Gm(`AoI3Igt9=KmQ9ata|*^jSCyoJ)X2-wfxzbK-=!UWUY`tji>C|EGKl^ z@-nc?dq{nhc?*>ixJl^^?0EBEN%>#5uj?8;tgf|t(zJKGMjLDs28!O@?GxG6? z2jjG_-q`1e_vTugGy8W&TFNi(Uf(@o!KM?|+w9iskF${jhfcrazRS-#g%!Xh_o{3mOUxx;4NX1(Wczjpd#*aj@g zsCDhD^c()h^{06NG~1j79objS@O+jmBZ&G(2pg`h|Fq>W`Xbmb%W_o9Hbbr9?4>+4c?`qJ^AYzHo$KoH|I#g-w7WA-b~Y zjqS^twapEhZ@U-_G}SV#fUA@dr){3LjF`g*y^GF5cO9WWrL>VHE&9L!Yc1cq1Ak%c z7@sB{QyV_l7a1u|;sZ=mf<9sgi2OpSP9t&Sj%s>K>|2(N@6@+xodOQnh=IhQVgbyt zAR}_XwPjxLTLEbucZ*gg7QIpZFTmAz%Prx5kjNVrsudoz2n5jTNEOWdQ&z-v!cV}Yh9y*+g) z0x?yY-CJTHIGfOKf7S2cHggMqlCc%A@yd zgT?#d5`AOToLT*_21O!!K8nBNy#OClFT~R5rP)jDAW#abw{Z~u<}~?R`3_L^K{ZW> z$Xx*Y>T3Q$RAf_?OXzFl=dmeG@ipt56%@y4`pO*?);xa>zJTYvu@XXEp)k82e}>$r zKB!21m9KpB3%!DamzNsP8sP{>`^8%^eM|YTrChfGFYvSj3svwB*OK|=W0q_4?7Ra< z+np5MI;3N)_q0wt#)>l1l4O!)r#{3IIS4?TGZ*v|>Z8;b6Zu7_*_tUO>W3;-efUJ5`VPGizl0T=H2|oY{8KSH2BbBLPZOSV%+(}kOB#({_4W&ykfMaYU%r>R zLf0B?i`(0fl=BQveGr_#9B4>Z4Ydf3w3bybMEmcQX#1;ro3j-DH_#`n|0Rz8#M>zS z$&ew7o|;#BwG30}1>acY0RTF#dRD3OG2Wk*rA)knwNBsbBmTPm^#}-guyLXdJlmh4 z@1v=>l$3($tO5NGMfDwax+<>gEqA#`cz!SMudjrK-~0~~17fMT{-~-MBJ!=1jQVLm zHIS;6h%ZfJ*vI%E+AD zxZ0*-&EndSAgLB!H%OUnh^Agec7AQJgQ!hBkMeCuDO>uN#KdXJuX^{liE<0^CTmTl za;JVHu;7ht|f*~wZ+3qNWtGcdh%Y{X8MH@ zJG=KO8OHb3mTeZaH(=B=3yVttNwhlb`7nJDhtQzF9m<;c8#_%HSLCqrKE-jN!yP@% z@F~PIiL&KhwI&GcU<|UT=Ve-%y~JCVaD#{kL$*IPf>>-Dutf+`Q$hoQgCD}KB6ZUP z=F$yOXB0yD*4HU8NJa9Fm*N1WtuHRKeRk-SHL{A5Y-9#nv$n62eyCuGkYI31m!Syx zu1X^j$N(P!FD67j0_YTmr1{_2 z-ADFPA0SGml!`L?2OIg$*A$)*g8(Ud^rDM=h=C?;#Yjl`=hu6mmQgvtde_sIAP2@+ zw2D!CvjkM@MXL`^DJn#Krp9M2&G&7!gN882k&UBN^>6sV-J7#8Bj?Qro>HS6SW=GT za3T8!NutJat*1A?2Dz^p3$$iM5;X^kLir!12}*eWZW zWCiz@2jM`$v`@7=e3jlf zhOPx)r42QL3DAh>dYI-?WyOW%^-G~dS^3)Lxi{`1Xuu`1;1P218Y}kW`K@CTwap7; zDj*VmhA0G3DbQ@9oW<}IP`d=}PfK|tJ^md`iO*Vc_I0=V?lrPSI{a+slSlP73bG;@ zDJ@`7`Q$xHf(WnODtp};^fosTd>#9M26i-Kd=wwA<3usd1VV^{&sNpTzq&vvxz{fJ z&gbq;kq34B7TI`)?34CiX%o{~Ucjvgcr=Rf&E{#$RN<&JUMl4$tLt!sz-7t5HE^(% z4oI2Vbc(8LUi+F{pHqD_-%Jqvl-Piyx+5Z-O9P4L-E{f*%oc2^{~pM!x5AZ)gFTwg zqnd^r@whGO{+e9B%=S5m7hW^6%NWN&O%8cK*?aU)E`3{?`-6iGsh2)E8xPu9@`dF; zHJ*1WAx81*m|0@SMMmwBQPje36d!@lNp85^Ysf!*J@p?8Hog4FqK+LWZhgq*z5g^(@Tl=^q#A{@_2wJ)KcA5{Enke{Dbi)+nu;*+E6T3yh9hE$s1x>OxdMxIE zfm`mod*c3;!iz@EIe{Jt>7t%0DtWmB^UT#kM5sL~x=_F%!JDFOi>FKZfsGwF^fb1M z>_tf*J@sYVS4>>*)jB2cTn`zUa;B5Ul26O;^tK73PX2+yw?Ol`QTvrX4{Vi$XGv!I zqxyjZA^UJU_t0N;HpcyxNMaTTg2Vf?sX0zz%o*U}=qLXmKfZiUl}SC1SPmO^lIbr? z925)OzV}>WQf6WZKplaJOP8xJrBxCaOJe}Q`pa&8`NNN%-aEH0FEzRT)OX%k&`X&t z-|tIHfA)^EKovsW&#HV&qrF#&QWo|z6K5eQ>vdnaLLX!8&0_7kr%I>nql23 z)YF)W5kWeBEk`D`J6(OQYj!3#Q}q{L3FZv&pe?-c?Codo-a3iLq#(=0l{e5OAHQUe za^x87kCd{a6}q0_bF_tPSxY4+*csO|lPr4!0E~stA3SThb1$cli43!#hK7Slt5_%}f7dIVUg*QKQx(l{gxa6{}jzQvVu~8g}x8y=s~T$lrO07w_H-Pi_Cw? z7duHQKf_o-2L@O{LR+X5bYW`~LDi46_us>yC5P=cO-I?!ZvMWO*wC zB&jGzsOpD1T3c)!4Y^s(&~?wqPo(_p9*Yc@MN;}Kf9MQa3OSZD+J3l@9F}m6qa24T z%=Bg41EE}Bu$*g?79mcsL9PEW&Sm4*hmC}w!{QGEPQ_By+}P?sxI!;m?-Lj=yU6GH zGDN#p{M)GdVS>?ClAQ+QM1b=1b}6m(ehwLoz})3%4Rp6+^6{Pr{zw*{aF7TeXh|nDRpuK-!DLcQ0_fTAG16$Q#Mr z%G!pO8Aspw+H3dk+p_Yo)t$pvUA>PNsXm_mY&WR5SYDi|0`n2^K~(rmI^CRwKA8bj zQDsv$Y??DNJ1*KA;2L$*tv)U^;$tpNrBe4Yu)?$y@1=1*%5A%jhwU z!wd71Ec^^+)g_mA<<-;%k^xH>n zK4K(#55`Iq-6{;5!+m5KE417%?s2rY<<5vn%D9-rsKp!r@DOQ!^Ull9Jo-5Oi@@_%q%e-EgRP^ znK~*zcX;=ld9&v(m_50B;^c|l6MIMwf~(fAUNmq1oXMRNK(H38#irOHE$Zc zcg|R{e98Rj?PYjVZ+TT-dW%}`R+m(cRLr)XC2N+? zCL6FYxob>SaejVP>y*VCHY}P&Mon3^di9zm zlUtfQCPQXy`}lTsPMs4wTSwQyY{@W+htDND8ya;Z>uPH1MvcO8MDqyz*G*f;__5<> zuUWser)_Naj2Vat!ue&5Qy0vi);6rHyt;05%ee0D_A&J%n#Z+`A#HPpEzM&_jo}lb z^KR^#wP?YlkyTYS!)nSZMz%CHwjv5`Zgf`Fq7!yO4dD=|;Y09JO=n+|HiG%Vv%nfwpKIT{paW>O%C0*^@iRbxxl@XDTc=HPlwu zA_9(s{OcyP@`cDWnOYdVxohIMy0T&8XD^;Rp&h*q-HZ9$?g(=6$~8-Rte)A@(l&X) zlDRWx&EOE1&KM~~5zGTUaj zTqo4Eh7T_-;66)Hw|2}}y0lA(hW4)Zt|{I1#TBEwX3XepD9tY_%7}~0EU&E{F@D}* z8#k<)iwmfxa>S$sb0$xmHnnXe^)5PjvSRmJTwNfd*~X5WIAg)`RV$V*nA|kVG&0Ap zJLcHVi5*Sompa+4=)83CfZW+Ss=B0l?2Lu@&xRPkkvi+7Ns~H}#oGGznJYJLJ#x$H*=<#M7!F#vQ=87kmw9>p7}cf8$JpkP z<=I&!G#YT>Q^+kH(MG{H)AidjfrhS)>sGB=hpVm~*KBjc@X-_IE?YTOh?$3P+q8Y> z)+LiVsMk!LyYaNMkDS#pY1Xvyjp|xM7N*WyzHnU4h^ebLZaMm-leRDCp27Wv##tV> zSIwh-=!gIfR8&%I#-G;q@oi&p-8GFF(=li9TpB~icTbo&YtRhYnKA4*%$$a?X_AIH4Kp=qk~VFi6evU5-aE5mOMVa7amI&~t@wk$roEyp!wO`v56?FzswUM?0ZU^UG!E5tHwN?yg_;Wc24$o{>(BbDYP zg;Zn9#I?7oEZ3@0OW~h*eg;d1DZx@yQ(a=$$t5swU~I&O`wL73`Bs%uMb|M{|163y zFVA4HIK(&5-=jyr;27wwf&II|vPx$CFz759Cb`_?;Mx_;Ac+YMh6%uw?8wVVN-k?0 zlBLtxGLltdby|LIiou$dBJuZ*GNxPgTCf^fdtsDCh6eOv7*B=9tXCOxDk^e}@lss7 zLM5hjlQd47U7DAcMtTpd`>?KxBfSU481!OMf>tI`>dY3cToe%^wq$0Ll{_p?Yu1Wo zpcfL;wsC-YB+__ePI-B0<1xZPc!ra8jFiE^ns7cSy&orx4x&);mJyl26e`qMN zAix{`h7gjFl#*YZn_XDsNKVbnOtq$F6_iz$WF#3=voozaLuPq3go+%KAw4%;D-sBV z%A_<$c20Id1>kFkj2Kdj#YQ>IrDAykuIMm-#cOTArm70t01T1rD6A|=(G@Snqib@~&00fRR$h5+c^)=I zZc%Yo5=>Sh33kkahgH-te(K2FL_?Y*&6aGor00|l9y@*E^0!9SRoB$jl;q_X=I2+9 z99c!GV_(7RsVdF3nJp$QcsGW~zP%%Cr6u-6ZK7T+lP6_Z1ltE+DuKcXaydW!UFuuTpI)zZIvDwY)L`NocArgoOD2{@^A@mFi4fFQs-Tk$fUU=zsk1!dGLYRkwyulw0 z!;VH*F_?eCdsu4Mi4}EM9McMJ*m&!o;ME>=cbeqSlIo{Bh!R4pXT_ z2=kOk6f0CHz&VVGQ6!o53Xw2gB?^btJ6-o$$63;nHLKwd0t132@gfPC^IX24UvPYC zUSVE_MH3H`n^*)McVL7dMi?CqlQZ!lB-0abooMGSgBD6Lt#96#p1juQzoX^lMU!~%d|F!JtZ|IF&_OD@R1Cv zm_YA9sTC)_njyn04SwC;^bPXrA0P-13Xi6KT^RVe$wpPIK%S%*MTuaki6g3oi!??< zLcCIEG-zNt&9j3~7Z#;3Xhh(1S)m5BBUrIFST_iIlShAlp&{L2f>&m}85|LtV71wD zD$6qx!u$6Ri#O-j)t6>j4Q6YiTo8(WaJY9LSR8YT%Zu|0iXD=^-8>_}4~5 zAU!>O!__9ORN&_=NKgv`q7ymG;O4+=vnJ`3GKo;APRq@(B*w=GH3mg=w9;bLslo9u zt0H~jsxy++a($Wu=c)WGShoWd+C(d~U%K6%YEs0+DK#pUR;vK#Nv_w$M+XNIKN;Ht zRwjiEc0j7QC};|k>fE=0s`6=p#J)`@oJt5CeX6m);&-dU}Thc=YrVS_;Z? zOnSVcIB~RB=QjehSvlr-kw`2OD|Cr;s}&k4QtQ=Hd4dYJZ4rtLvL2LY!0t|N%qekb zBr>&HL12$35nAfby|IbEH=ce z|G>aljS>B5|2RnmdRVrciot_xO0!d}CXGU4%`Qk0`g#RKO0|h|#_2MY^=?{GyU_vd)xeC?aT!>Czlf!7~8HZDy)DQ5Mkkx#zkJggtJYE+rW% z+cF&%jXGZ7=Q+SHnm9w$qXeHIEe%vjPR%MB^45km(}vUyu7PuaKp4Z|=Ori-46xq9 zuqAtYakM}V6PpRQ5K4V=c1bDq&17UI=iE4$H%e_0JzwkeT9-Z{y1YtU56j^|BQHI( zz%KXg{>GaFLnR3sJzS3&{6}1pREoIBaM*}Qm0GbWJg zLxvA)sHwOR>;K7hpPXddMNMW`mLBEiK6A4D3*O!y)NKQ_%rWOwy zRhykVc>3ZM^WaSl#=)Z|3@>$95@hOR;xbkiWjV58f01fTG$xrWCJl9rV62DZz7PFo zmx`R_a*Ind_3=tWW>sA&9-C;4j*|M}gDXplOUjDvI!S2%{&036iHlJrr|6(fp)U_A zLl+1GoC>96;89Ik3}dJrQk|8gl3i z_TZd}vw(X$rLoj3x(VYOA2^`pj*n>1s_gkjZ%=_Z)$)ANg}M^B$IVMvjqV#e}C z(?`~UqdBxJ!v^zxLNWN9!y2b9cx(LN8q}+@IKK#e*9`1W=!aoL;?7B!rU&m|D5>xCDb44;sPSGuTTi%L)r&gy8_gkycp*bqQY^@@5GMeq zcszR&3Y8i>r_<`KRyfxaiRCI|Ms5x`XHoIck->vk!>OPW_Jt(C(Sc0kD6g(6aaeS5 z{(fX;7YIZuHFaowMW!t90dmsxa=Fmg19ocGWJN@zM4MVXWckHVq0m z60D@HLDaHk=v=-1yr0(QQrFCKqEx!tA?@AK+Q-TZ`x z>_P`@7tA_~-Cj6#_klT%xFA7dPFYoP0_p`C)DEgIFD--1*PC?`@RaSvr3HC#6j6%1 zoZMWyPGQQ;OIC|RJpdX#WU-%$QNqFr=oUVdT)V+}xs)(uR?Bd3j}p$wqrg5!I`%Y33XljFm77 zO>i71RHv7eWhf$H{+B9KD(Z^U5|s*Lc5y*YZb@Z+63CN?gfU@3Ap)r;5jriNQ_~T` zx1%{dFN=)KY`Zo(N)An%C=2S-?M?I{a1`U>p@-!O4sx>L8gi|sbbCTXm{1Fim}^Tf zEadf?F>7MekoqBT^~p9{GIXvCwQrYBufORpOw7oEhJvX-1eP3a;l_ zokk6<5e`SMZ~|}1fH~e~k_QhUJ_l~Ma>~i}8jd9bJVSNF$CPW#HZx3sXe3-7i02B< zuS{#TSyRcr=?xDv*J z7Pf-WkHaHjpQk{*WKjb>V26b+B06JmPGw3MBMah5u0=9aA-A($pf@ zrx{aHGxG{dVR?n)3wv5tQAuv9Su5pyK^#cXC|TwhzrYxUQXDHzFp;Gkn^BBYWOWT} zw_(jJhB@12u~^WXOUp{r$>kc81-3|FUw&f4W(H3{t441^uF#lLQ>^y#(Tm@^@cH#C zAJF&RoR~Xt(oj8|GifdP<$0N~w&myAla1goz9vWuV z_=H5WDM17)lT@y!=PEF!VH#BG==!9?@l4Lr6tEcw8@UCtc){T!+;cH!WT#uQ#_q;H z_`1T44a4;C8U%4^ebexQtg=C4rj9ByYxE{ddSPjnt#I7Tk-2&qOzm=EwA7evwON!= z;OOWTA-&+HgWQywo~(g8Kiv3&c^*t8mKovj2{)suu;}24akP}be}Z0ZNN5CH3@)zn zxM*2w&8RVr!^f@o(4`ZBzFbfUesuAyh2w?|89i(Hs<{oR3CaY0qRwEoCc$Z)-4HK| z2#!%H`McZC{|bLTo~Ch-zyE)GavH-7TfA!Gj4daQY*_#Y zMzsw!MVVH;!Db43qo+V`&rMSc!6Xux5~IW*JYH`$=%IhbLqK;Qj*vLFk!vZ?0gvm+ zf)fV&9KMv|$I$qf->zMKB(}10HN55K!Kx$IK6`%#!<>g# z1;LvxKHa%!?%TT#?Oii%cx`#rpsIA8AuYFJ$l!uRIdpV&Y-m7$KxN4+tF0&|r*?WR z&L4D=q^mWFGxQN&egdJuA8t;7+mT>K=M5JsawKd3Zy;WwR4L+hj_Uecb(oir)ZvJJ zirjte*rgZzD;#bp*ar=^@&8!Ab!_LR_1pIEUpKYBth{CjIC3*5R@tp-;9jO@XC#Uv zgT4C>2vyoE8tTCt!Y)qlJQ8;mIz1n#mrFJUWD5gcPzJqSiO0kei4b-iDh;^_XjVk{ zc>2jQ(qrD>UIB1FyakB)Z$C{=o|@N-y}EbP(z){%&S-F0)4@-!9yMh|t|RU&EaunE&946Pt;Hop%Lg_8Ra!K5F8qiIRW z%ucsi4O*=sv$`tFoMy?o8POUBi^D^}AmeB|)DqI7FI+=;HP z$V)N822dA|_lTsbq;%M`z{*rmSXu_Rjk7ZxWy1%T)=XMBe^gd{s9#u&Ur&GQu!Wn} zO*A#r3sk#xcE_sa3#Uz-Jic-C+%=n4&mKKuNM%t$rco3cAvf4E^NQ;lM~xaie){|c zGpEmAxncX7$qnv$@i6EI_o;`2CtZ?PR5u(Z^~$oM{F3^naifM0tt~03C@aj*Ni!#z zQp+Z--nOdAVNS~}ue1gC@CZ|;SJyYU7nwaSzPott*f9&&t(r5op|ZYd%G{Y_Mok=7 zpPy4Te$I^1LxxY7F>CgMHM{n1TfJb)7!cinwxu38uT<^usf$*uT(M;Cl*zMZG}YBL z&0eu~_0k11n;IJDzO#Jxl+pE-m5GHuYAPx6`R(- zHKJz7#7Pag4s!3RynImO@P_KLl7?wZSE34Zi#K@e)JbEY`C!*Lw9Ia`mkpmhcV<(4 z!_c84XDpsKzM;sGQ(2j1#`FD@oc!X3NlVr)YsyT@o`$b5JH2T3!dXp&D{S#XxIZ+9 zv|h{Tz1pnYl7>OK=G6SM`o>XXXD(khb=2f}GlotuM7F>QnN{YT+S_raFEfS5=&D%`7U&A`eYel$TbGUU}&B-euz(r*64?W)onC!`TJ9L%&@`WWYY`bc+cRdpR!=ylrdu_fk8(#))wSf z4IMUk=m{j`rH-U51qXH(@!6)nKpLpgjsKI-nMPi((%Jd?_aTG-t5`)maJHb zVa1|J6*fasGHk9&ir{&diF22%czgBQ)o-s{xqQ*W%imhMb?36i z!gR;rg=<$Xm@{SUl!dFdldpoDdUr-HoEPeB_0wjKoAi5c4*6i;!S`=+T7G%@+=b%@ zPF%e5#ph=>u2{Qe+xA`W?b^PLgsmIbyuEzhgpm{8TD)lC;^j+bj2={4SU>&kja#7L z?RR(V-L+xKl#xS+ju<^@+LUQ;&6~Yo^}7p4=3k1WW6 z^RJxJk#*KMRqgC4!)L>rh3*i(a^ZfterC`1okuQx`NfqFPw(Hk=h*Q*TepH<+joK3 zIWwnDU2*J_i)T(A+_7=Z%0*KKmsgFPwP4nS(PO5q*tBXwb=}le>({K9H(|txv2)+u zx(*Ed*2JcUyu`TBU^vrH%PSc?W6k~#u6=Q8EW?y8U%PUcS(a3q7uVzYPWqOwkaVi_ z^@U@Xe)#s>?j2iJ%v-YYz}d?mAKSil|LNm9Hf{mEcI-X4cL$`q&wTO4#d9AV-??$q zmJQ3s7bcmqODk*ZYKKgoKefJQ+RpRm4{uvFf8P9g>p%G9#G&`Ees*@tys;z3P9I-p zk_u(Ai2k06npGcOSpcu~YML;rx)APU6q%%<{e4p1-}mHH3W3wRj$gZR?dr|z=ib|} ze&?Yh`!=uLyzkJSt(%Fxb{{%>^~SARH?Ex8x9{-LBZm$h-nZec5rrw{)QV}#7fhZw zeaeWz6IN{7vUdHBeaFw8*!AAYQ%8=UKKtR>ovT*w{@}CAC-nj6f`BN9y zG0cDd`^fX&hN@9xn#N3?I%USn-yZ|)&Fi$Cf4p+`%*p*bw(Zz=^zeIJ_u6sb{0;Ea zfBtye+v_%O-gWrY*^@h0%v-qRt+#gGVm~{wYyIrTrWuQu%$mGl$N8JL&hJ{Ybm8)~ z?<`-v@!gfHcO2OJ-tkYap4q;j-VoTcUrdr6_O&$hmm4hEgJ-QjcyRAU_`>R7|2MOm z8H>;S2EY%uKKtO{o&(3;KY#Yr@xzCX9EETgy4k+d7eBgiY}>jG8#nDfdgADT-Mfxl z`s~Bgmv8@e^U|r^>(*{LaO(XNyVh+zcJ9RP^{ZCCvuW3^9edAy_UQ*lwy#>X{m_xa z&{EtxudRC!*Gc_8K=YrPcw_T829r;i*uxPA4eGhcrD?dRu? z9659GBM2YtSv&=2)H#b`ly2Mp-jP!$4jnlD;pf+{o&mEhe&_9_>ra0E{m;L!-=5vQ zdG+$uTlb&&`0AA_S3k!V_~FXA^Bz=%REPFEe(v&>3n%vN z*m>~a^7?|B5hEIBY(4++>BFZ#zPNYUl&LHCoxODB!{bMeA3gQS7dO87;iua_fA`Ig zzx?<$cF^5xr@^}I6Dy~`Z$EtZ?dKmHKXP#2-hBs;967lE z;Nin3KZdE}(npuBd~x&Ety?$0{QS%7myd2*_0FcP%f^pvtSNwd1{*gloHl3OzHN(U z%wK=#fYpxda zNY}8OB?sPzw@+T)wPdrqP5}Rmo$RYqdv@b3E^^Xp0-neFJp((B5tu^l~S-5olp8b3FA3JsW@Rs%4wy$5i?cmYF z`}Uu?cx3C|3m>1@yllxkoA(?#u)cxU=kZ7WbKk?y^6&U%{_~oTI=?8ZY}|sSld@$( z^V}Ayfn$61;)kI2PZ!UgKec=PhMk8_ojkDb)a6eBwV3d_eV+kOc9;>Vxf{Pz2=ubn@C?dxwoz4+0UPd+&E-hmT)SFhT(clY+~yAK>W zdc;jH)bD{uA9LF6{%5*-^z7-c&MHVz#KuSr1(UBeQ|;TIX!gcOmv4M^?Zm#**FHJC zdDD9bc5T^p_~hwhJKx=W`1H9?;T62U{c!!#kxi>sEMK$lGV1x&)z7}U&HnoB%^#4# zk6&KC^#1NOYjz$v{N4_n0&hVlyLI)@Xn1$)tDX2a`@xX>pW7f{n0p@UJTNFUJTgvg zR14w;QB_>=SNpcS|NSrD-2D2hFTaLS_2uVR-amNw;*B3~e|2F$Ot(As9)|gC)27|~ z51u@K@$z?A@%r)Ydp@}S!|k8H{rutwpAm!pbotQcO?ywBIdbUzPoX0q)z2FkMo?H< z81WF(@liC{W8JzveJ=vdLWdW7^!4=e4ic7PC097HbnS&-e!O_fPOEKKq%yedFU(hfkiraN)xHCr+L| zdF<%%3(&GO{Cau&{KbcySwQ~q*`;gW{_w^5vsZt`&nY1p9*B`}dS;PP;{WQy58=+Y zMR+RGGI-vk!lLGr+GmHL_dCP;I}U$zVb{{d8xCIl>R0y1tA{phJaF>Vg=^P7`Q)RM zdv+eUNLr4NUp>BO&(X8zPM>Y=m)`y!X5SmDT(UFqt;11!j~qL(eyBdUSEnakMLg77 z5#=?o+oN3H62-!W@75 z*0rl&+`Mt+^x<=)usNfz=kp?+DZGCZ!jb7Chh(cG-Xzn$vy1{bX77z?&Zv3r%B2s_ z9^bcX?}60yBoIcJ#u8{%GvXGe)#z}x4*jh{)GUGGFea2<2*z_bjN{Su6H7 zpA1`7;$EW%_~z0%M`F*|Y+;W(56?b!dl&ivk2^4Jj=Hg7RN0XEZ?D*L;_L_K53OCi ze9yN)z;79v)AG5m8HRp&fGD=prJZHj1$fW@Tt`HY=Uua|tC;B*7Yy@U6wilGr}2P$ zdVblck+awB+_HAp4fcoAAE0OR-51}uMkb}5?N;UIkqo0}*^fXVhjkNeS~l12|C)2j zbe5EV%AQv-_eYwP0>fl@c|I2|z>8^-hR>7qm$Q5K?_0lk@jILMpTEv6T~P4TrAwF3 z9Y1+^o^Z~_t!nY_*oA`64?~A&9d_-55&7w+AsVlqt(-rXV5~Q;JN(e!1xcO!ex8%|~uRV9@ZFp-c7SPwVw;BPk@Nm^AUoPXs&7)9P zo(i%~1s+F;BF1s}XDPWeu5&`a1H9&o;}SxwJAqRvM3Q3nQij;T2>(ZF6-t2EiF6w^ zP8?9jVErEddzNx`iqEYkap$?h z^!}{N7K3U)#%std)+$g}^`zQKmRWEGBs*is}Ui~bA zhlOaEaOFA=y3z+HEr)*p$01DHa*#93BcmrTJN(}9>r=W%H`&R8|1`%ZiQ8Emc#a|0 z{tf(F55ml^XGCNAzhBw0Aq}Mn(m)#tV>^$+wD9U*>E!M;5Qc&B<$C1x^X|PT0sxUV zxc8$$5j_)@pV>b;X)F{RA(LnbKsKy^v}6FYY!l{PVH?ZtrR72w5}};p=J`L%H4qWy z3^Sd*Rtm~xKm{$!CM54$UOnTj%;y*eSEcdDCIw0HvkYQVhh<^IOm;W@5dI@(P>T(`PMF~X zp@fA7R8m!Ah8UG#T~VJ+2-{h9q)_|X3n@i@9@r2&HZ4x;%sl!(D9S^V&@CO$nBAI; zl`uSlxbM+<$mE!%~jW-UyvwF`bpC6cD z(rk3&)DlD{kHC-MWj);%rCdanIX47uelDGcT5@tuPWO1d$n?rL;0XE;vd6$WKe3fI zSOEEG?9MH>3&0VK8-t8L$4bl|Dr2Ho-Da#r0znN0ke0FdH9{SQ-3Yisl2G)MBi_Ni z6FVhq8^rwZz~1K`jva+uB&kNDulebZ6F3&iZBXRHyhsRkqp=~FF*mj!Km-jDK|3QJ z>y|Q1g_{M&(XDC$;jpd)skrRtqc0(_18V(5Fv#ABJgT`YB$#yuxQRmHIm4za`7*e{ z&CRn|=B)XOvz9w>jITwJXe|=9a4Ext5atlcWEGWZ)#0lGAHo zuz|38t15&i!we?{a1h4enETW4Rb(${y;qjqiQ^wtK5n+*(~K>vB|Pt5>0yTHB_l+x zDAS$b?}v|_T|KioR}j6n6^1t-uUCI31!A0;e*`G!g*qh-YbghsuZ3}G9r`s}jvA~B zqtMb?+xMVFN5A`DH!&{oYD-bhyo&}Qx_{&yJ|KD(kY#UE+thKV0B++1>r82pN8Dcz zaPZMOIIFi{ZM^_q⁡}ya=kfwb>!$a>!ZnZh;K4JTmDmbgFO04-1pT!8<4*v@Lsx>-l?>?wquA6)0ugHgD$|r?TD?lY~RX8-DZ^2y69sf{sJwS zfCP{8M5LruL`!$I!ruV_r=outUa(HB{vPWg74B6Zeb7Zuuu7h*3;(AO%g$y7@b#Lf zg_xbwf`huH)BBK*o_d6Yuv6DJyrTM$yHd2`2PEMbx8U^$*=fyHX`Ta@uUWVkiro-! zL=4~j%nehJTBN{ZVE-OnUhTkibden6-5F|ri zx_RhmYBb!3rx2VO1@`3dmmXjqdi~9=-AVsWBlNtDXP)^}*@RcisJT!+=Sj%;oO}9I z(OeUE7lNPz5^^&7Bh(Sr-un=EC=%lk^37QQw*57|V1DdLgyJ^nj$tNMy+s*Fh0rB+ zc-kY(D3{j3CFLf%UUzT}Na8=hjZB_}lbmvH7Hdy5>%M%ImMK@KCE7c>D|~P_e%zLK zV25_VDTleYTY5z>C4QI~i@?u6hY3*85`_BWw>C}AxdV&50DUU=bBJ#P-Tip>c*npy zB6?@(g9d7fCkDI$xS+~;D~EgWtwM-ic=_Q+9;4*{d*&hL;TM~AN0(wq0y#GJ9rj3a z5w9;|XrL_a7;_YK<{=bf{r^b%ngfKw)xZ6p+;^WY09@BTelid}jR4pNQb%h5XC-bbUZVL#= z0aNsiQ;Hj~pard4)=A~UrOYb};By^9`XK1pxMOQm40av5v>NzwXL?VoGqgu~-9t)= zVootmz*)03@bDdo8jtK9MpOavH&x2sSnJ7#&)NC0*zuVUNR=Z9HV^kbOS<4oPdU*K zy+|GKaV^?XE~sFa=^eKa)rEJKRc`a9^ll;eXwNR{BF6G&I(vh^VXow zki1;7U`A;?=aAESgL$^k2)aMx5$f|Q*yYj3J3Rhc50|l!q=R<=SC6mohv^-a5{@3< zP?OCMd8W5{*f2S=wkR<6~ZNs{%Js)|0G< zecYU;R3E;m52u#1{-bD=d%6VrzltP}(uebKVtCqpR>!j63BRjUhcKl)@I|tC@Kql! z+rLdjGU4XjVu#Q*U6g>6o3C zs0i~5@}~DHxY}u7+^!+(FO%|@1n?v?%Ay6pIgDQ+evN1@Vb)nY!T@=1VK2pQj2*tR_dHsfUYCmbo{r| zE3VGw(uP9Qe^03=N{jRB%S?&#p6K+tg7iFlS%ujLq(i2S8(dw)`CW)=t=0qodlH)O z>Au_`Kq6;&(X($ez3y{1Z!?|PLT3%wTus}zyD1^K>&wA`Zdd@pmAw{(sPMrncCQ|u z8r#b?CAL*Ra~3hUPl4Gu$eO8s6Epc2Z52@HMw-0m~l?%FYd?r^~(5 z>&DL-VGeK`Q+H5{A)d`W&v$v{G3-7a9%g7iK z%Alq5Yc;_=Tph+$_}aYhu8sA={IlKu%e)*C=htkAGIe*fmaBeisH3|f`C`x&81cEk zBZb={e+QfYU-P(sGW!$slBg>uTBzyjduW8P7N!4ub{WfF=*l#?Hvv8h>^3GqFJC$a z|9Nd(5~{&4qG#8+nWq&9Tk!1U*vH@y=HCpc2Hv4)W};@gdCp?3s1YIh*Hqib=^*Xf zOc}R~X`&jmxwku0bNye_ikiVqiPj+ADuwXHM4JA4aZ6Rk;%#@g+Zg#om9YCl{ymkL zN$f0!oL@VYL*Y0K>YW$>h(<^MExp_&Q+Sa?PK5v1qKK@tRJ(scxfa6NWeE(^Kzw+v zr1=p1ztZa%nt~;}@n#zH7W$P3{^_)tbj})+Z6K9+*7qg4%IO`XVgIy3PW~(_JavnR zccK7T^|1_ao7~qTPDOXBe-i$L#X*Kb-KBK9hoA&RnOIIGlDJd-lki}o5Q=Oe1yS_2 z&}w(HwV?XPvf%o1>!P~|+~}cV<1s>wNy=VprM-KlzY$Kc7w#DJ3fw?|5WR>$dAP_J zovL673dCchzftg?u!4IB_0NZ*Z1&474vrWy{sz@?8% zv7TGuS)S9B;@^s?^jVxZr!LI_DE0W!d5rXLsK)&rX* z0l@#bN<=_5rx=xn8BQ*OPhs>1M&~g4cQoRqphQk2Sq;G^6?_+e56k9Ygn_T|@1$-b z0lWo535M!HK<+`FQ0~k#5gYzRYf*JjxKk*=7kRZ<<~EkiiNqV; zE)pm0KLMa52*A3QOF1cr0oj?9VKEON{I3K~Ajl%lpGzivmY6R-=~B!^@=rp%V3Ywc zC(5M-h5oPXbqoYQ!|TNlzn02z`G3+%F5XmFEx(Dq=XaDD1T`LjU$U#_KMP#|MQ$FU zZhSMHKrG20#CS6*u@HX0f4q$pe`DyxjEn?!yh_Xh9vVadxP6VO4M_U3Xs~}VborFm zg$Q+O23SO3A>PY-gl&FQPl*4qh*t?yD#VFl*glAHmNdeNp@&!Re@vBTjGbBlFq@Q6 z41!tCJOP;51#m*lPbC!fkHs#%82)Yrq9!>75(0+FyeF;lCk}R&{ewVrQ6v}6vKv7n z%f3#s7%?$UzxH3Ums#w7(c7seh)QrOg-z$mEOMv+x8O3De0K;R#pgrRzom+ssR(n>e_e+%pY_n$cpICtVSpJy4^2@HjWDDK}<#!XHaP>T5<)$GeSmyHnOEa^u5 z4};|X3YAgEkiP^sR|s>${{y{hT-P0Tfxx_84LpEa{pRbaJ9<0NzLxyTq_~ z+6(Y~%wK=d0BZbwfqf2nfnMQ+C#do>%Nm%)*$l%y+Vc0wYUk8$fp!bDTcF(n?G|Xa zK)VInEzoX(b_=vypxpxP7HGFXy9L@U&~AZt3$$CH-2&|vXtzMS1==mpZh>|Sv|FIv z0__%Pw?MlE+AYv-fp!bDTcF(n?G|XaK)VInEzoX(b_=vypxpxP7HGFXy9L@U&~AZt z3$$CH-2&|vXtzMS1==mpZh>|SxLLrTWw&&Mw*~T_9AMr}hf?xaWKX8_JMxY#if~ri zO-eBf04DH-(HKk_YcgQ)EiE)8_Tr(or|@2fpF0)_`BJB7l?S?nZC?CS*E3^J%yQKr5t7o+w*vTM1A%*90vNtCPcm(clr{C4OSeZb&#>&bT-8i=+J7IT3T-2d zF=7_6buQM=Z-O`SU(!OvvhxrP6!?;sCmuXczMu1U2adMiqa1KI$&<(>(lC&#JCR)g zkW(WH&@zMps=PS8@neqQ4N?M`X8`Eee++ze0ltCE{a-(JHSKk@_Wx7tmAr#p3LH4G zd_s9*ZYF@;WwtDB&gePHP85&S_^$jHdj*U@8vc_zo7rg`!QCanLpV;7ZC@Pz=uY-R zX*9rxu#=0TcC(aejGlL?k-6S)&9LxB2qd{vpr6s`i1R87^P4m5yQ?W|?zNv-_E%t%82D0$4)}H5Z%V)2 zf3K4(0L)Q{?ss0VdyzZ*CQ$T}Jn8xrE?yXw!xKX~_J6|K zI?N9pmLB{(kcEMpp}MONGnDMU>c$wst${oOFA!joU_^ry*CUT{%*#9(NtZCy@P(wb zDTlUT_PDF;*XcUGW?Rw=E)G0|os0zFgDJA3F=4EuuL)z$Q6d^633LWNEaw51U5e{d z+k=aN6Cx`KXDCDh)2S_*V@~)r?gz~TI(wy!~tV=kA;w$f41ebYeEq>YJ$kw^a zX}cp8%jVt7FpU7+CSh8HQZldItOyu0mTi zLi&xAsl27kTEJfbB;|dZ9XAVLO4}+vObQ4kvgFE}L5OctXek6LFLpD-AzI#%W${(0 z&4SuBu~J=H5I7Y?qqaFT6Gbu%^WkcSS=p+=R&BFVSfTkc zM@&@>U!HBoyPJ+{&uHc#6kJ`5zNwP}xnSl+SfGI}1~YEOEPnCkE&{T2vLW>m(=0&o zcg(AR;@FdSH4_S|1FepQgn|%~VvL^TDz!Dix}09Gle&l?0p*a+e*F>LA_0!$joTn! znx&K+Ip;aN&a#zAO#@e_%?Z}#^uoFvHx}BL%Ta&ST`!&vJ{{+(OouBLIe8FX1alV= zR(9$|IAOLsX%g<3f*Oj@fq+BoN6ZFVi`hz%5U=F4K@D_R%7?p9iBySa*(T;X&jnM| zax9iPk%SPoUzqkwbE1KiBj-_skYV2A^DNtmL$xsp*439L`?%Nuv@4usTdVah@P5I4 zYuKzBSBc2Sxu1mPZB)leKJx?i7oG`wEU?krnQ;M>m^NlAqT$TOjt+=hptdDD7BFd! zsf3nAIt8^6c%#?cdX7D*88yyL7^niap5~B27J%?#Q^uEagIw8cS1FPEKA!a=jwEnEN1&n!Sc8(17_)5+>3n@!k>&_| zg9u^fvn-qoQt3-bad#}nqZV>0cz6N8gAjKl^%y4f&Ay4y{v_a~aOgHATy?%K$pV81 z8KxU$xQ-TKK_2Z-g#TDX5QwChb5r1j&OIpxj_bz%*d~QIVy?Ou$$F3+tR_9dqu~4< zce>3C9uzRl3|=Va@V9^<+z&`#E#d;RpJUOcgjOa^!*>aW83*ubu9BQ3?f+rK5WrPBAR3 z?xC0vs6C2NQgW{c(TI z64EtzRwTu@{la%|&7mBlW`*QF5McIDLpaO6ZfWw3Seq7eeA<{eSySL+QXM{Sfc+an zvtnir%YwRqgQ#U;b4piTgxVaqb)Wqj?#>a$BMA!xcYT7QC%B1&n~B6sfFgRi6*UL< zQazB4RA{qW40o1_6)EeHPdQ=OAi{f*n@e=*^*kwZ)^m|eB@?jQebhtp8e= zMff2t&p4X9m&RQvUydnp&YYy@u?oq_36V2$pg6U=Xj;5rhh zXkPlb* zK5RKDgEq&618$L#9o5TDaFBrk_yor=iIYS`&eV}dt2h*`3z?BT&3LNUL+Pg5?A`ej z+-2jPFJS75=w*JzPKSc~;P+SsFB9p0V+=Gq6moe~3GPvcsaP=ZMV!K5Lz{Hh=AumT zoC=GQ*@H1=%>CZ~4yDmRi@}eEfG5!Gc8Z7C2dO(R{C@l^AzL5|8P$i} zT~sV-Tfn^pdlPOAxsPiq5yjl}@?FvL0{< zANb$4DdR>>CWIRdGZwdJz#NL4vILN6WKW#rZhN#SRj?{JhN1yaLb2YtJ`gR-T=ocI zRPQFI&EV)0EDP)3MV6h)>4m&mM)tiv`}h`vvZVGf=4psv$m1t80|yyAzF8W`JPe#b zkpPBCV_=N5J@kVrp@X-?(a9YhJ$O=sm-32axl5Qbr<% z0R4Sk}6U}`F<~E8(h7Wi1O_=Lk0^tD(6u>Tk zr7MeJOW-R>v%$7wsBvf&j2GhbF_Zx&SFloB!(Ns>il>nb6MO(a!{^pv5DAYDJ~$p_ z(eQD9z}3=wU9<7#LIAU4y9%P;n$*qLjBykA(vFMTV!O95$eMx8Zf~~Gy^-onAo`J<8yi@ity8nUy;r;hV%%+9A_TQfLl$lJc4Cj1)@h!Ir9`~lS0j8 ze7Sydd1}^M*Vr?2WiJPB=J{Uc0eGtNeSn`Jm6q{Xn%>~D*fY^^K=TEM;*z`9i#@V` z*~o^8t5?FI1Jf@!u6zb%xd7!nl{Dffnxz6bY3w8bI@~{s&k~&=14KXbI8676{vY&W zZ)|9^1nBaSMbC$rp0tJd>t_zpE<^+n(M0eL$Wt#CvqUcoLY-JQllCT++8iMt8diic z{|m*lQ#kAJNu)TgU6@Q9S#%x(0BE=>cr%g<1cwoG6r;L6IBFo=n?O&l+N#+L0$QJC zB}@l+o{D+or4CH5I*Mf8pi?|^4Dy6C;+=Z2j3><#Bt-v1m56e%`au4EGIU7AyNZxI zTOS@D3}=|Hyy_M7-&{nDZtB8w!#SV8h<63Y=K3Y0z{;H;;Q5%;^iR_~slXx=KV#cKsg{Vev5WN8Y4^Y6s;i;mq_^y_1PO$$S zL=xs}2tj9@kq~;Aq@R%@XGFCn9B30dwI?NnLz^S#r0f*a4A`uG{@GXm+X-+)?o0qu zs2)fL!e!A03UKGTbVe+uY-DP;b z^UFAExd-qRS_IA<>|4$84D&Voy@zG{Qy&+2uMuMsl0mo|HD2o8qsuGZ1xeO~Xutk_JtMScO>F;%8wo!=1AzJe z7w9vc34)pjCBaY8!iXG2tK@F9Vwl(a^!Eu-l&yIG{dJ>rlhab7J#Di`yE~~=&NWO> z0?AH#1(Id4EzClSxX7(&*j?F+T+JIX%FOHp-%kH~{E>Se^ebNCF2N1v^@7`gESs7G zOL~S|DFL@o1aHb9k|i?Q9^KQSpJd?UZU+egEv3=?55W527XXGEpm;|M0V;PjCE)Zn zLkbA!cod*(85Xu7L|C(GCQPVLiW7wR2LyU{edAf;7*IjYW8limhxDVzK`uw|V| zfDRP}B(HZH^x;Y8Tt%n%+OZ?Yj-0xB>++#3i}HLeQ#pBDt8h7*tcT<&_`QDf9XC9A zwAZOP7CZw#m%hBXznpGD9zT9`eBajX7XZ5b^`|E{y)zS$e-fDdW&}Z?Ltp%RCI-Bz zk4AQZrB}skE{ym`^ zE*>Z=4&wLWQ9}$=F92Q#q~f7VtaH?oVoHG3i2f{I8sePQII8xf-2&|vXtzMS1==mp zZh>|Sv|FIv0__%Pw?MlE+AYv-fp!bDTcF(n?G|XaK)VInEzoX(b_=vypxpxP7HGFX zy9NILZh`ym$N#(B!>M^s$JbuvAG_-C*d3qVy8n?!?&mM+zWex!@HqD)c&8lFCtrDn zXTfxQ?P<7s{;-RRdEm)spXM1q+zB2^#qhwhuRYyD*nJP-AIY8J>E4}rroW>X!#u?6 zb^l|$1gOvJ3wpr=YxuuM_}$Y59wEN>v5tHVcoonqouB7xx%7Im_iJuXmcT0=VnjlP zA=zq4&CJdzC@HX~WF#BY3M;GYnr6&sswyon&$a5L5yF(Rsne*fjgs&vMM~AUsiR6#<74G2snkFzfk92<#4+_H z`8ilI%V8nHs>;i&OLDSv3af`TjcTe(ga=z5d*a33J$iI~qj!i%5+@MEiACXIVx85V zQ&L@-pJq+Xu<5mWojI>KFGVYt>ryf^la*1SAt9lWaT2-4ppi=<$aPk$Mi}DdD^JVM z%}O`R{NCv9E7a=L8ofpmD-;WbVhPd#VTvQes@EroqvHrBPN+-E%FeXu6P0m-Xl0Tm zNf#gSCcLS)mq(9YPjqDT)Q26=h-D@_R!Swf;t8K0n6iy}iq!(!FR1$BeUQZBh+ZesZ%3XbtMPFeD2V zh-G3yaA2TUuU_6kAtAxReqR2;gn=l+tCvSeL`1Mx&ra|FUqG}l$g4*m-(Zj!5)v-A zrdc#n0h&W7jtTJS-sKICP^mF9(;x<|LV~<|z24a)R79>FB3T?_5#PHPC=?Rt*Y%l~ zJ9YL9kecmz8A;-h;24=CE;1}a5JBj|Vx_U60luC+-ss^O4Ei9yP7q%2=_By#*UvA) zZ(zTE-of7Rubc2FL4-h^nqiWLhs7!qq~RgKzP;XfGf-~O$-@V{*}LyR|B%So*oXix z@8HPr0H`;RULmLx9OM^3t~S`;%PTNOWzb9fJv@M*Z*O0LG$J%w7#$e_c6_6EV2nhH z3P#07hJrG0^z;gaJ|h8C2vqeCiHHjG=;aj=4Ya*Nl9%v&7LB5_oU?m!`!+{)V>lk6Mk6)mF z@9usvVqr9L5J(dYNd{GHSVRO8hsVm~;y95S`-4Fx6snCDlOiHSAQ6JVNMS74OHo{& zuZdP!%*sgbUcG&S{09zwD3i9dSw?AkQO37 zr9z1=QRGMZ*6Tez2lndj8-smT6crj8r81k~k3&8YF%dCRnSb}*{xJ$gj8D%u-VE?~ z=Bd~FdJgawXw$MXZ5mOyZ@)g?A{g63xh&i-5Jm{!W;g~(zwPB644pep3|%Nvs7kch ztYC03G`U>ph@k%fPuek|@5jK14Cag-3Y|AHTqFvI&KTz3w@1HNO`?clq^ViynT2`E z0X@CKV`Ibpe8c0yyFc>+Od7reNoN&_MA6})exAM&%H*v4!rT;fMBmP@_wo)wKAc#X zC&GfgJv_Vvu;2QI31Xlu*w^FDPCamxl0b&u>%Dx#g*a2-_ze;2Vd~1V+iZ4+O%(}a zHrU70Uuv@_%NR!KD9A3V$WIbQVXu*CvW7Jks-k431VJF}J75}_){+=G&_|Y%m0Mb0 zUu2PlgC3;6$hEdioVPPmbh0=>1kp1r%(qWh9Q)mRdi3)p!xcJ+AUpuZT~JU+tk$HJ z;|v}fC()RPxI#1&KYD%_ek@du$Y{JPC`?xwTJQ&7lV1Tc8KxA|j)fJ5! zE;XlW1aW#?@$w6yAwFI#RwSloW|$QklQ}J~V({=9Fh&fF??|ONNu@NTX5!jgRhDZ@ zGE2P}#?N5MFeO-uYN|`@I=Lh!uum6{&{$D)w5cH9s#2=xIwsdzEs8KN&tS1Q#5d62 zqsM@d7?>dg`*-sQ6Okz+3_44ONgi)@z*XmD+O%+alGrGLM4x0zcEH@9T;4P+Tc@+- zXD7-v_JZ6LgEcEf5*!e1Ot&V&yQ^{O%*0U^Ckg1qFrEsHS+6qYR8-_Z6T{H+3zeAC zEh?EYr!+4ujhYwMRjphc={+#ks27V8v@%J8!D7+MMG+xNAR#NcK&Cfq#R}+e(AhvK z?0OEXMkbNQC*@U?msT}SXe>xd%B(ClM8&BULZK!-KR?r~(IzG+jahcRTq-9nfH)@F z8~$??l8}^=U!0p=Sma1f&CE=-re_tDRhBrC(=xI%tvW+yc{PNJ9FrkEH(e`<5eQYL zG)H!RUQq?$Yln;-S&PL+xfpt_Ji(G?N;1NtueAZ2s;ZIG@Snqib@~&00fRR$h5+ zMILs*+|ru7Bw4s`NP-=+&^AR43a6RnOMhe;CR*~c>|QfkSxDzT%glhf3Kz+jl{NVmpD$t|p` z%1^b~Gt*O2trm+xZOE(`G^nPyu(TvMyQs2m$e{Yh@r|WfWPp-ufe1Q)LZeqJHF{OF zzrWO)nd>m=lakG9l~$(^N)4%YvpO+5-x4pCfmrCNf*7ef3jXxlGbl97+oN~)*Is(z zrPupK%3&13(i`L(815ei9Syt;oE2gI3G*E=FfdXO5u-Ba)HD>^G%`h!HA$mXfJ>N| zl#!ikh?lCh;-G+V_hBlP2xG-C9>qchbSbqgPGvId6>?>~N*o>*qtq#+(s)ar9oFvH zD4Zva5`iGR&r1yB6)TSO>DJxT4`$H_l`X{rGpct#k3NCI2#Lu7hNONU_-HRb^L)?X zm{>_tR#A0bS*FcovLwjGvC(3QNM=aZ3j_O-`M%qLFoA#X-str~2#W@-L~*gfe&H}~ zW4t{hM6gx~q#{Xhp8=6-nDOFtIeAHO0X_lIDr0h5vPK*o*b82n6=z9Hjt>iys9=Pp zd z)Tl(EFdn^P$;78jOtB{$&FUC1rp*DHkNgw`w02aS%3x5%1bPQT*T6l)kl~fafNpR4 z26+t#ii!vdkB$rrhV>;(l9ZZkRL2SANqW#Y0o)s+Sh!eYG$bf=uo!5xDS3HzQ$ieQ zVo4H%Lt_OSV6Tet?2XQM@PIxZ{r!bTyTcR;Z#D{!h)uBCY&n(X83{2yUg7cP{JQ$m zTt~9SnkW~9Mu{bH;og0`Whptu<;D4h#STf|ZUY6;U}Cjat5rsWbK4UZb+t(=75I5a zC8(nWF^OifH30`uoL~mm!UQw6P^h!#W>}IGF+#0H9UZL%{u*#R%<3p#xatg>Mv<82 zNQDhgeip3T0SXwl_RNfQyFJyUjFV|JDwS3XeN?8_>*Aw>1IfZ0DiBIwWm3o#33?SY zl^FIobQb_lN?2rUoKPf@ib1a!MWO~)2|4bVpuu8=0_b2;DJ-lZp>my0t21kaQ6jM* zG*lo_D&t|C!saGiWXi87%g@d&E-y^gDK!T8_kF)m;44yyeESTHO3cVgPlUA|{3fq{ zzMdZ8Y8W5kp%Kwyl~yH17da#{N}^Oq#nFP;nBd;8ck1G+Ci9!aBojfmwP!j^CM$@h zyOA)VCLty`7}OI4cn$Q46^2HOgi&##=)eKQkJPJ$;8ugL4`b6-~Lqa0LBSmU` zf=r&E!fjimCY!7W{k((; zLeH+fd?UoLQwsO<^a&B_%qAo7j1BSbKQK^evY{XCA18@G56hNQF?eWQX?BX$q)}+B z*##*g7`u^DZK7HNZA8~DSZbo8!MV;&PtDB54RcO$G1)ic`YDMG@fzS05-U$KDr0fu zG$2F@t9($5L?V|`teJKLERXt}VdIA6WtW#Z z#NdA!!E;J7+slTH9XGrr72E*uhRhb&XIrI#{rmfYGY*q|Mv6|Mh1tcZ7K(If+4)5! z{~vqr9oNP2{Er{K_ue}qiem4*R}e)-nt=3P6c9u~6jT&@@7R0qy?5-fM`MezCNbT2 z?l&{Lcdr*}zMp8`@%_h-edz7(?Ci|*X4~w(j=OwXXY$j6I^i7BzDIC;@(}-)joJka zNy&_h4DHj>)7q{#21E?#EhKml<2tB z?6Ng`w$C3wX<{Lq1GEYxKW|{afnl)T!mte(7y{E}*RBCDu|)tWATT64Eq6HjW-)_c zk^+YdJrXq5(7fR>9lc$gT|DZv4bCjU^>9={e*Vyel%a_M&AhzY_3rLBFeEZIJ~|To z$CUVp!2bPubnOc5a3`jK0VOWJ^eeFK8fMNUl|5)#-KabQ#75{7}bDxNjO2g3#*eXq3ijNGEq2@^{T zM`Xq)NjLk_IZ z94sW$jijYz=^Gf9GHOg79GeUd3Qx{0o={R+S~xr}Cn?0QUHt~I4E5>Vy8q&##?RlXPEpzP*>k2&Te7NRR$1Bf854^}768=gvllL& zGof&JN@VbW(8R3l!l?@u&M6z7SzNJY-Jw1% zg-(gRf$pzl{P@W;r;W*mHkuAMT{Lmlv`N$F%`C|uo}CmC9N2$wa(>yIMN1b<8j%ju zYVxp=VZ|ptAl-d%f;B4g(|7(g*i!-?=Ba zdIPE31q=*}O$H0rnYf%&t~fZ##lNg3ef`giFYoj4dQH()^T?tyHJi#}>lpP=M? zaH2C)B6_rF4*NX6p5WmO4E95Zrgp2q_^iDAob>qcL4*97H)zviV03i9ZUNDmLkq^u zm^HB|H>N$}f=xre0Wol*Iud%-NP-mu%T-!p^k8sxJ9qI98if0ZlvL{j4Nq}&akM3m#kYiIjL_)zvPjHBeMb!*T~$&u)*oM`LGEBJQ?wk zVZjkF$Y8?D8=4ApSx9o`kid}CVTr*#>sz_?&VY>#EaJTfCFVktS!7YIu4#?-`ykJRPh*i8C@Lu}$jigqj|}$f4k~ko56y(5h~b%8!?H3n5`*E`ATxSE z?{*D)#U77fiVShH#J=*+AUIWZAQx!DZY=<-DsGfRsK zU=&7&4d~r-KtgU_Qol|(Ed<7n7@ZRz8rUy7J!dG)=miBCFhBYA34o5*tA~G3=#X?e zr)DDr+H!CV9Dqe8WTwK_43>+;L0!Se4u^JZUB}A@&vF82@qi=i@VM0E_@wmg{QRtp zn1s~vrCoatN*`UA5touZOv1Hr@vQRF;?fC)Y0)t;(a^b)1~l_tD7*()qPYv`ErvN8E#@Ghpzyx}88 zmz0hjIlO3MX?99x?(p2S@BliGZQ3ai4$PrX_vstz*R5}G(14)8o*lXbM#qH@ib_dK zh>eNp+r9z$9RB`+(V6+Vi4lOhJ+YmeRm za8?r!cbWs^a%fJ`=z^jNqf?5voLpa4GJV#xvJ^P~ice2UO3x`6kscYD4qN7&0vHE_ z{QL%l^zYuGQ_sHOI|Hshy_?i+(XBVQh`oFH1qAr@4vb7nij5i^)~{o$_OSPbbCtN1 z%wcKa1Hd@FJGW?xoeDzl&MjN@3`AlTrcTpIY$D5+81 znz!oKFAyA;pvXwj9nhyow_bffe#WRm*lxp`nVXps8xs|i2(20!pBf(wwu*=fhnwq$ zV}U{{oRUREMTUk)z)qxJa9na~a$HKm)D4#&K7aL$y%#~evTWY`aUpQdG$<-7KQlEe zGd(KY9=WgSAJ`@T|O?9(7 zLi)A!@v7UlUu0ZDY*;||o&f`K%0+9W#)tIo6FvmaZ~6p=#-(S%8CX!SuKt7LqF~-a zpB4f*)7=6FCnkmW?cFOVZ`Q(DlO`=a#ox`tP{H-Z%3mTtgV!vGx zA09_^Z?Dc5gz;;(%vyNh#+3srCxe$UrYJjQNJv;rM0VJ7_>Sy2nwGcm$^iem&{9p0;%Ddjy4}FM%|@${t? z(@VyV9yxn`#hexMvNQ7X6V9DC)f!p(^x2Jt^zyS8--+V4M^{&DK6d`p_W2Wv@<)yx znGhTvpE07eWN2u>z=((e@MBZU&i$iOM-+|7$txJ1kq{AvM=`L6;Rm*?W2YsBbZp$B zOV7?NnzVpzW3Pdc=)B=V)xUd3_}u}#fqpQ{hQQHM$>ebbFgdupIS7g7`!3q;)6oqv>RNO=wY~$$^pIH^mN)3!1MxM5CCI8SGH{j2S0v2djy0; z42cR3ipfYG5;>q_$H<~lL;PcBPEQ%=BRCKCR~2;X1*K;WkZ_4lPVHM)v3%9SG08&` z;J4RNljoLYL=41@` zIkU7?>Eqw4U0pX<@5XJqV}s(UOeT058R^k*8k{$-d`x;=O7V>Rfqk3XH5kylX;Aje z4I5`f$+)n;T|RK+)b6=s!H+J&|0Pprmcj4H1$kMC;eM@reSBL4F?aOD$&;tdSh!|=#gf&V_Z;3nXN)o~DGm7Gcj^gai{aokd-Q~H zV~Ps$^7HbF%coD9HhFwuUcu;*IXT1fiic)Q+!qibYc=&zfGGl|FLjiuLOkPMW!J@sgE0j-NZaW6gqT zQzlOsgSKTJxC9DTlGPhGZCqP1ch2I4<)cTJFWIm6Av0sjn)PccrVbxkx_HOdg}EW!`-QfNDTVh15tu5MJri?AEkCkrdPY#M;1P2c z&zZ4k^WN>t=geEPW6$=Avf{$Rl9{VEZ`-Ey!flD;qm?+44zQ$*^HBEge+^ z{bQViZQksu(0pSjOd6M$I3zJ|!kmgl<;7#hjhnP^&GH#zhUa7# zj7*CP9oR1maJMlb^fH-zCBwC_@e!fgcC)n8M$M| zW<d8eB@|qwW5i07R;VHsgz)iDk@t54Y*;& zoQcK56GKB{GxLh@A&k+ub0Eh&H$5diduS?baFa)lFPpl2^Wj@xU)Z^R>!I7=Nh~NF zJ!SgnQL$Z`G=Se>KKAMh1bSq~nDY4(GjgU?EL}KreChmM$IsopzH9!Jd8^j1m|0RZ zZXETgkp)@Nq40wQjy@QTrBi3jTeNub!WpBJL&MUFCz9nRzy@$cMsy@F7339+C@3s0 zEuS@K*4+7v)@|Q+?AWQx*RNKd-kR09dst4{)Z(o0zQIwjudR6g5pxmg%E2RBXHA{~ zEi-vi+1vy7o;*0Qvb=EYq}g+4Oqo2jY&08k`H7KmNHwl}GKQ&>@*tZqEDwIRNiK%H z^4zIYXU_)#6N`op8#!)#N!j#?qf2J4+`fC$>aBZsZQH(i$&8h!Z{NOtqO4=x?wPYU z9Y4K$(bUVH*VgxW83CUn>MUoyK&>v zDU+s87&B$r=3TostU6eEcF(#cWfRsOIkx}Mo(0q9t=)U^2|~EGb72OY7Y4@^&tEis z_Q{W+LelNi=We{_WiPJZx_kZ7^?Q$>fA`hiO}h>rI(+QpvBQTb9NfEo^M(~OC(T^8 zX7#Ey8`dtIQc{ppykPU*15mJe=aEw<_O6{fscg!msq>dET(V;M@}p<>ubV%0{G8oe zCg+YESG?@->64e~g=fE4u1tsXuk^f0qlfh9U$kWIgeCB1p^t>`6|{FRZ=5@E?BatL z&mZ5varWrR%a>0cJP5cBA3L^Z?b1c_=54(6@ZMKf&mGyjebefBr6Wd_FJ8HL=9H=P zH}2aqbJXa0I}YyLv}$hov{@DVZdY#KvSZu&t^4N0^o_|M0zWB_oVRQDt~0lvKEDR< z;2geT&!+K_eFhKD>`~LkIpqCUNXn3Yym#rrFK=%hKXQ1>iVX+O-hA}UrNal$-ne{Z z?*YJdV!{AsbJHo$R z{ibo`naOCB64#Gie){U^lQ*w!o!Eci*!jxS`*$2TS9$W_ezMnz^EaQodh_Phlk2BX zU$|I#{``g0dzO_Ai%m=!F@M9#Idc}xEi0M1`Q-84yN{f{a`*bl$|p~5-MIbW+k5BF zojCL0*Y9tf+Ou+6Y0iM|t@~unTQgb^G;|u-Gzl9yWm@^vIrHW&-1OIDKz#F(wezn} z?%lk4=E$KVXRlVC;N9!!**mY`MLfTMb7=Ff{rityxOVgE(T&SjtzEVD_#5$A<%vB@ zCYCSUv~}5n)kp9B@WY+shmP#se_+?{BM0}LI(zQg%`4X*J-Kmo!=(7Y*7dtZCc?f} z5bDD-b`65UqtZ$i?>=|_98A48OIp-QRWg+#`6ncQdGqYu04DA9lv^tEV=WMg|T}P7Fj`j@mk|z6~q|b?5H6^Oj7@ z&rFYOgmj}1mx4ckdVKeK<@s|*w(tAu#oM>f?^ITPb?-ie+b7q|oxgbT;-zbLKzBQQ zqVn36^Jg!A{oTtacP?yNzGmy@b-S;AhZ~Hyx6U5hwPD-AE8o5S_2tvo)Bx|kee~+t z?JHkB{o%(KU+r2vWBQoE@XBFh13|NPt7bL`#kqYS|JGi1*48GN>v2Y5C^_}|+ef!9 zojrB=;mbF#pWQik@+{fw{Nc@OH>_T|7^gb4-{o769^buk`pD697d97z<4`tn;laC) zZ(qCd&Al_5=g;4C=J7ATy}o(*&f{mVfBN+o1pM=lzlp#9@aEOyYbSQkhjqJY*PiWN zj2btkcg(?Swh)k@=TNhu|ELo(n5v0CfBEU{i@TRA&z(MX`dnq@xijZ3T)6tpix-a{ z+<)-+`Rg}t-n@SC-HVrxF7Dg1b^oFDGvJ4Uq1oeB9^AKf*{WTq53OFbdf%l7-`qI8 zebeq^=gywHeEY>a@uwet|IZJ%PoKPa>+8Fhx8=j5lBO1(4r;#5;R#B3qLRN3N#C=5 z`r67H@b<|^XSeLHic0()JK6WwP98sf>E@lgUxQiBT)ceeNAdR`fBNIkKYsciaQ*Ph z&(9w{e)8F z+dHSv-FtXt|N6CC_n$m}cJ~+rgP}n)Lp41se7!OS4VR_^hK@+fo4#`0oQys_BP-s6 z29E8MdtbkWJ>R{Xcdi}Zz31rp>sQa7zW(Uj``0gByjXeR>ix%$p8p4H{qf@Ah1Fw+ z6)ih@`uvSY-#+^0+t)w;@$>VCum^kl?AzbPw~udKy?OV--o3|89Y1{dICP6j3749s zcAe%Xf~KL7ngmSU(yMX(+ARm94vp>Cy_;Xy&^h!XHyPerzDa&{|Iw@OpI$kA{prIC z`}dzZckIBi3sS~Z{rLKa?_YqLUtTcYhfC-&_- ze&*cOI}h2^_vZ59Q+L1r&wu{-?T_DIz956XdwliM#mbv^uUxqCEp!AF{yTi_rt`4j z!#ZmBDDXf)Mm1_!NL|d%)z01`bnR;THf-Fabx(K;r*gQmZpYnszuvp{LKNS< ze*Ey+_phJ5`sJ4&-n@Kx^Q&u@uHC+InHGif*Kgf_LT_UH;ql$Cu06o_bcuhwx_;!i z>|*`$=;70QSFhfG^Y-=2H_#OK1YzoTZ^fT~cz$fykg$ORgZ!I0a*L@ux|*4|xLLAC zo?!|ZGBP20jQnmbj_=K_3unKE^qU7a?>_wc(%G+`KE8Hr=a!wvzk2pN=>F!~g{yb& z-o1O{%B`Qo@2_6I2k)F?;m@b1b{=^oq)>~Oyp zPI~zKv$=(qrjd=CBP1!9cZw=mK6_Yp)k*Eyd3t3m7jWZ9<^8+I)~(rd?%wzR5r2Jh ze$U>sSFhcD`t;$mcjDtyXO$MYCs!{&gx3G@k>Z_}DCCcye|{&v+QM}LAp_3x2lP5s zdFjgTaUpGLJDUplc(=Y2w4@5Y?Hz(ozb=g%)iCs2$x(bf`t!l*Q`a9pfBEdmgLCWW zFW-0mtEbdyU`v1E+IO#i_!-}4_s5&3PqFL&@my|8C22pbU<%s;?N3NTR7Ay<3nrDO z4d__QM9|RzI=weuaB(1R9J8wUw$8r{j~{$}^YZCqr_LVUv}XIsi(}y#{7}xjWu!@1W4w z?%Z?eRAuF{O-oiB{rbCi;xFIax&QF)@lC6iP8qijX0N{rI0Jpnln{%Ii=Qk0yk{)@ zV(lrx5R5%E4vg!2YYtXlTzF)ax>4Fv8F7nw=nXzgb6(B|U z3SAzlj5`u1r=|6*DNks~D(IV2B-BYv^uP~T{EhHx&*Z$)6}yjLzjyo2y-T~+ZoPt) zlt;^lLVkcRLoq6m6?~tw6qYcb+T_pbhXMAV< zmV1x8|?g50)Ocs(SD!AK4=Z=BIjAi`{L2VpV=@4 z(k2kz2tPX@L@W@+t?+#+LBJ1t0fDc~4`k8Ly0qw7wWGr`Z23$syfA#9$MWYtu(gF~ zAcDWHP{j8j^YHDG{Bg^+9K8JSy=MFE=*Hb&y?nrICx7yoS*I?Dg3y$39V9{b;3@+X z!I)J;ORf#5kZLH;##jr#>cOqPLiz2d-(BF!d0ieIy8>(a?{~2O|EDHtK_UIAR8q=V zmyb>DUyjK;ERR=-gT)QQkTzYCBHqL-3!FAk$}4vkiS}19JbQ^jQOxp0sfeUZefFAH zc=fUrBp!?t=*91hB9&8|s7zxD-c?!ps&H4$$X&&Q=t5sn?8d~QAWEq@9Q^+7`!fq0 zjWv+rhBj8kR8m_K0-1B-2daRO&libU?{p+F3ynOJR> zgDd0xvsf647zBEQHVNE8SD>th^E*gWDZM(H3R(d{T_P)NfNWF_xA#f_p~x}`E?%r) z5JeocpSv5b2iWM(x6XDr8(wvcw6Tm?SBr6Bb&PkYC4*Za%0wqq&OWB{uIfaj? zFg_%+VydLL5I^4+GODexkl6)xj-wb!;grhqB}YvV5=n#vMgU(V8@RAy-6e$QTTJg~ zT>J_jDMh0EnL%CwNo(5P z7z`|gz~14DsmUm0WGD#39EP^q_m~s;dI_Dek$`YPU=9@xQsS#cSG*5+0}UwG;WN!v#BIvU#i@s-l&lvRA}&}E8IqP_G}TbxqO6yp&>aqnHcGXDm`Mew zhb7?JLX3i$83Be@vhU`=qKH+1f<^@92blG*eHJnXUA?e2mD)r!6@M}kzGgXOrJ#5C zEcU=wg3!~4Q5NDnA_l?Y3tmy+!Xxm-X85x7yO9^fG{Nqa2&eHq1?c+7g#Q^f5V2|^ zWLG8L-vJ-!68ykq)ACT)%*QW*E^LA{@zO%2HG6}Uhq5D&;KEAb6M!su3+)G4-4qAI zjWVZaxflS?%fk!HFWh_da&D^lo%jdUs#Ec{F&r$DIJ{U3{zNBS*vzy9K|HPP82rKz z2IPYvCyjfhxPP-tTAO zLm|fJ=j~zYl^RFqVn8 zidQY01xx)dU|DVm^<$BD9wb;DQCxwELLhW_Buv@i$WBqw0{KZHAg0mq_3+KQ)sSD3 zKj_S%Siz7Sd$-BCK%y}9>9hWn06Fl9^=wF62t7qHXB|`VyCBRJ;frMY2W~(S(9T2w zqlceh_hUJs_!Y>6Voz^YqskSke2bqxpn!T}@B%MrzXe}J*o-MG*~jBa;tChr*zD%v zTao(Rns_(C@C~TTTmk#ueMnP;gVBXF&rIGy6V1Fj{j)f8)Lj**{=VYpqg&=VcYuUM} zhp^}wa=`3sEGB)wui;ySJkVraxcc3M3FK2%X9 zP`6>Ks?Z@L2qTm%Fr95xhXENGu}a(Zzl-)l@<1mf{3hsN1R)$Rqo%U^mEj-}f}CGE zegPK+2Mh&yv`#S5#D}fpNU8TPmz21m?2uG`fF~pkk1sGn91xI()!^Sd5hvyVW*w>S zU&M`Oi|@cmR)8ikDD3?JA|a5+Cf#HjG7-v=5<;WO#nz1Sk zas#VU;8VeCv6(1{BDgrF+#*%^{(HGgH@7W%UmAXwx@wS{4}syAX6wu4j3Q_MjRdN= z4}wWIDj5I17C6To2ww`M4=%GZtV8COE8)jEaW7K&er555x1cv=*MuCM;RRrE0S;`$ z6IflS_+N2;nYCV9dagJGm1@eQuJEKn^-Vmy~fZbSmUg)9MoDtsp% z>%D_~`6+$42GztONU^+IxZCXJ@A!RF!9-Y1g>p5VNI5r) zu_mr@ED&vZQ_T3?0rv&ItpUlsVm@3m3elrPd%<@G!3_f!^ZigP@Rc4)*)34wpu`8a z2+2WobC6&xQX#k|QniNA1t6^}ik*lA6~My~XHjNSV*ws`0B1xvEnfE;Ht&=|z(-Qx zq%aZE)eV_gX1K(UUc4NzOwLdu`)i(ul^dJ`a_W?|rZaJk4g=dZVI*h%s|XgykAx=3 z&gpd@^STlc@HJS`U&;KZbODyj4}*4lYA*>E+jD>_X@aNcU#J?aB@~wP_#Z|yLFm{I z@i0oWf`t*_&N7qW!6_X%(!QQ`PM9C!MRC?l#BB^;Q-yDeHGZ!}C47Rd&$8uRVCV9G zho1!4i^zfZOF1v-H%VVmL{gAe2ZoTLmSC_@Jn|9ARBH@2?i`^BX7r)MW5^LtL>aN9 z@Cqw76U8M7ACVxFj&p7veJ`RLuN>+QD;q}KNVMHA}jWB1uGEJ}aS) zmA0n2N7E*)MRBEd16D*=W)B}F|BI6;y62>v`)GO!SFIGaMaBk71mhN6Y+$#Xv3-Gs zKMA3-_4~^E*lP;fuC+Y9j30>S3gcN9p?ea=MPEtN#z*!3B@y?d@XcW*^_Wz29;v|A z4*Ivdrr0@l(yl|}WD$v_W2I=<}B`-eMrLSwcSM=+*Ce;3!UgE+`uTxVZA zmr?{?svy9YLtDoZMvjGb!@T8_eCdOmyvIgfe{;AwvFc(c40t9!ND;UvM(aJ6F@Rj| zd(ogPL}(zuo>`Z8u?B{cA6QIRUQ3CBN$W1XJq0bd!4(&e#0x5nhgeh>|9mf-G3%*e zd`kRns-La3LOBv4S6}|Pw^SrFhSar;?^wQJM^W5>yjju+6=E?90d^xSuq<4)qT&2Q z5Of;%LzDEIbLi9@TIG=q3j|w-?cy?UW+0eZY72UH`m@D1M`rjc&M9Rr%xK_ZMf1v? zBnsqL{P|L>ATjVsM;`MCbwRkTp!N=7YqW3c+79Z1W#OrPv(dj`e4Nd5{&(fj@t}9r zsbOy6>Dw^u($#B+`ol>yW8?UVn<$XtG>JicfTfK1gc#+E#3B=fAxqP_uhuRRZDkvI z{K~v=JZoUQ5@+o(74hbfQ8&X@v{TWjv9j{wv0QCI{~i}AaSAPo;7Sl^cCR7{zUT-+ zVO_2Vcw_-;qfVd9cc83U^8vlUXsQGDi7yM9gG@Rr5ub5hJJ&X;S+qv!*(1psSZCa? z+)xUwt;cbp-0UBvjB@-`vK&@5!HXPZHkl&C?w{G(luxBKqgrD@!1jTKw#K#f*4&gWBvQ5;RCQmX$%mVh>gbR2YNm(}jLOB3=b; z&gE0%n_;^^sMSBP3$&%STj?9|SXwx|!OfjXzRt=8OaEq_!}~SCJGHT+MZyKDrr*B2 zWzCrW&Fs`Ai&e!nnOTV;f*oa*Kwc(k>7$?@7cXbmgzY{C@6O@5!B&tkiK#enW?m;3 z@EEiitc}C~mw?ocp;Xk@!Y7Y$! zEGX>a1eS7aXkzSA(_FBGvZfV_7wtQ7X0dgXmY(3SD)n1fyYS#fh(-y_UYMd_$b?tn zhZE)n6;mhY(MAZld?N3Q=nv|SI{NtC&wI&7(qyYK``r_nR0an$jGdn00=qhW_nLZw z8uUrP4`2UWHD%ZJv%4ZR;7k#BVMt>E(rItxUlt^4m1b5Dh5Gw$?<)$bUyHS%gV1I1 z-UX#4Fj>}L_v4+dOUi?2sIgO+`)~}zf@YJ>tvZZZ7>+q(Hy1qD1fx1TIt#g*W^dZR z=J?JQuu*fa@*PWncwant*N=Fmi^5Q73Mup-IVv?dwigU=oPQvUS-QL;O3Cw4MHm_yY&@|%Y~<>lkiI&Hxm>bQCqR@lqnh_I7u^X6S=&RaNjO6AKuz}cw5 z7%Gr2T0$2&UBys;D3OIu@MqpxZFLQB#igJXar+*bz`aM-#Q9Ug+&O(B!4kw8oK4-% zQ`@{>$-;8-B#aEyG^n6clzq#V_Q})BGDaSHx!hbZuIE*?YjbfFDGa6D*6_PcL)Uuk z`u23@ywE*tVcU0@R5vNvdtzl?R~r0K9?4;Ga|!C^9{Peu_Ppg|oS@Xm$r>jv_)Vbo z7z>!l{3cCE&s}zDzn`GfLAo(4R|*Eopi&eCzMeG$lEXSSloUfrqb6<7il@-^v3K{& zTe5k^AU0P*4_}Bl#MH!yYEba zn&AC0b17pIUqb~R4AOIB!oq{X+v2d6g8ymoGiU^Rj4d8MdQwS$vH=vUk{s*|5^wFM z^}SW>qPFfWsf#3MSgF+QarN%73$INFrO#Tsa{GmfItF+IQzaNx0i{$_R-Dk@t4CUD zZ~sn8R6M+Azk~>3i#E-hRG1QsKSE)s^8qZOLrwQ)K1~ZZPUwgX7LMA2hNGbYB)eQ{ z=TxZ!ayGU0j3SqN;!dsWN1tkPu?(J!0#|+zxe{ z_b|_z6!a8V! zCiUzzxFTWMNKEw^SkI>S&L;;dPM-~G-2|2k1ulsZo@t%EWXiCPy@w6$s3#aZE9j64 zxGz4xe>mS*(2k3$ZP%r6T2}9#-bx&7S1gZ2cbeLQa|3Jz^^T>Bi&`P8Lsvf6!fR?B zdw4hADDveMdpZhMp`Zd$4!VrheK%6Pw7q!2GZ8AMqPK;4&|r! zuX!j|b`fCe06{OR&ajQj;v6h{_&aH68{t6aK?`E5u1lo0cIe!xQ&3c&j-8;cn%vIx zU>i(O(!AA%Dtjgkc2KeJICT0@x+9@eZ{?}8HMnWe%^VF(8|587F>lTNtCOjg;^GzC z5;Nf$CPJOQK9DkXk$W?7b`gi z{)EdE&NpNo*KeBwTf@bQkI;WHSa1x_g1;|d8$)S%P_dGTSpxu$Ds}oVhfBG|I7EDe ziUY_K#i1kOFCeIRVSXamNPR#o{0aru*G4vVG9^W?ayBKc2MnEz(( z+j%IRP;fPKVjw}DwOT09IrU3NqKIoW;!@VU<(oc0+4limndje4Kg0(~KG0u7g-D$d zZxz7*E*K^0@U+ZFaB!W5t;*5`$d}j*NAr9}tw3!4H%W2-5!y zm*6GPPtHoc2(VNI201w0`qH5(GYnVTQ43>!Tc{6)A>&@fSm+P%oQ9HcUeX z_d5ng8RHj%$Mkb$GAwFcb1Y-KaZG(7Knf_5N?Z-AP){J7rAWdAFZx0eYUC>Agp7j< z={#Pk?8^Wn)uakjl1`qcLLw@aNj(2pcIq#1K~Jw_N4N}b|5^xS|G$k!A)g;w0YPCeG#vjg!EQ)3_9^Q9 z+o>C%`4TCSaUmAL0t^JvZp_H-JNMs4qmVDow`R7G?X_P*LVz$sxbRpoT2b%cPKhHy z1N>eiH0S!I<4BGZVNthQ1w8Uk3;#AM_`{w~pyLmbo}pmc@aauR!F4N(MBGsNifFJe z7(DQF`!4)v$HfBd+az3w0t!St%hty~*7}kG6*mX<*p?6=2mdoLRfp$Ux1Tr!oA5Y0_!={|wkGw3pC>;ej+I zFoOSI4iwCmQW(t=yccm?Z4eM)`m(qRB~TcFwk z)fT9>K(z&`El_QNY710bpxOe}7O1vBwFRmzP;G%~3shU6+5*)UsJ1}01*$DjZGmbF zR9m3h0@W6%wm`K7sx44$focm>TcFwk)fT9>K(z&`El_QNY710bpxOe}7O1vBwFRmz zP;G%~3shU6+5*)UsJ1}01*$DjZGrz^SOESG`hWqvEl~R8fUq+G?i>HiS8ON@{|R5* zj(_mWuFN9Gkw$+7%%g77ciD&Fw{OPEb_@LnFVg4$e|2n-1LB;xIB^n#f#33mzemQv z2(r4HB7RjJv#bn1Y#mCatMGFVYh=byUpe{7iPj)VHy19_7s2SQ7?2m?MIsb^fa|s> zeq99@ydq^VDc-#TYAD_hIiThz!;ve%o8k8g{Xrb3w4^VA5FlqIT))W-@SP7)Pq?9M zFW~M;)xSFk$>p+a_!QaahjoWL!nd@=w_HMsI3H4kYm3siir^*daJzBy~3>6T?m>@XbA^q(Kv zl=fkj{SH09aQ`FrVx=5ldURKOwOjDi68y#mhA0`jaW*7>U{rjAOrDhdoY?a#@if)* zJPK03qJ9errNE^!!!0>_C<_RI>0Kpd$jQ>?!ytK$(;Y`m@sL4QHZ_|U6Hy6_RAeOAhHOo~Y51;pH^;v*erPO)Pr?8ZH zNiL~D;m480a4Ekx#~sX9lJlu2OwUvb=(-C~kxupP|1f2uC=Jcpgdk2@p>B2Z&_(07-#> zlFL}0u5A$0y*KjLx`(iwZvNLUOC<0}A9(3MD8jEN0py zT%sp0Z=>=H`?9gDmGQG;MOrTYy9ua@J6en&FmZ5Y$x>EeAd@LyxC=N35R(#GX;BE^ zVz@}G`ZdvPO$Ip2hnuWo*Q5*Pi&P29$HG{~cT?Pk?VMA2<947Qncm5QO!%C4^6U?4hv8M zry7`LSGo^)_a{>hRf~v1l0F#_uYi?FKD#xN1s4ZNEzSKV3J9zkeXSPaALP<&bWzK$(mm!^#AQgm3DM(d5HSR0Ixzb~jA`2gZ zFVICz!g;9xb2pJ#58sIgt{1Ze6cZw%F%l$c4Pn4sAp2P%GAOzseJ6vYg0N$F=O{`^ zC*^j+)fSeA{!p})Gb!_U$hdHOOaV_6J~K?K-G&Px0ZM5|O16L^rZf)A;U$$)K~0nELA)uI4}m@r-)#O`M=}w7;}#VY1`~=z5nojcf#H%06}SZaia0N4Jl;p-^5(;x#D)X*jpxqidG}S0)qy1F_<;l`$cBjLX!-;A-8Ls7PSNaK-Z$p4&$b zBk@l~s)9-8{#yyTDhYvej-N!>s$x}HBqmrXdW$bKNv?F1$CSuYfbn-$nalg{;EF>8 zW`vmq1(x>k8Q8UZCqH-<-tzcVm)- zGASVl@Iw)=+Dj5a!l@X36vAI?3OO$_NbGR510T7k0=Rs0WNio)isE%F`1H_^M1iNr zMLs%=zLuSe5Gnn%g9}17D-MSwl)@FZ+f0`f3&P9ctO!csl$J^<$@o6`N_9aAi$%8d z!VxC?$fuC~fRzjLC0vk)Qf0mHOCeA?V^c#=RAouZf6_&kLm$kb-}soqLre^n4vPhJ{FG3_#cxz9hnSccCrolxpi*s=0-xW$$xq0D z)aCAAzDZ-F8tlPI|y7z6=cq7GE0GRiTMxPdCL3-KLbB9zE~KPl^_grt#B zk`N?A0zR1zS8GZu*rtUkx+I4DJUBL>WAAT#~SDw*C(eTp zeNw|Ago*=^A351qWw~x~0g;)6e}-!&h-4Zh#c;%+9MobMex_zoupq!rnEh0P`dJ~b zfILLNJVVDzJ|!Xv;9oIZkYhrXL^Oi5Q8rV}a0x;fF&ZFmCVXOw0^5)P>3clZpr4wB zkbi~?odtMyN-!mLs6`3adK)-SJtOkNHeMjHh~i;lm-0}&9~O&oJnytbNzf-HaW8>~ z@vyv^GMnL58P{o8^%kIK4FOES$|M#fLXgOU&`;O}eSm!u$f1?aCt*=$hR66s@s%J< z6XCZn(m=SBfVI0cq|YPdf&ew;s}W?H5Eba~O<9s^DaO&E7iFbYR$1TQPEWZaifiGA zPf?u5b=KhsNQAE-z%saZE71N!$}Tbl4BoFC7e&O6hh`9n3C;xKDfd~Ib|nX3-;d3N zjvk2ccz|lSEKfoB8YXslBn2olfauN3hy&6P<}ede6}USLKxHvqKks612KCN@B=bS2 z<>ytiFygX2RUILdr=Z;7WyHYj6^x}2l+^Q_w=zws`0pp#2zrg;|3KA}z}TJy76@CJ zFmWZEs;6?nunO?s+?1+d85ban;ryQ-wKXD6X!G}Q3F^@EA&{a5l6s`TY@@*88`Wfl%=X!! zpkj@zC@n$g6)*&)iL-mRT>%jaq&!>_xOBiv0kf)akcATO7!zhemzMZBed8hnCjukEcZcn2`4C>*8IK;yWGM8!wALSM7-jlLDHA5~7HY%L%I?j}%=-r)wu9JVrKA#Bq&8EnW@le16cysu~V(F#@3?d}ve( zn8P%W1YrlK5W$_1MNtDdh)aN<@L{n>es4*}XT&X(>uBP^Z!zj6D3Ed6!_#^S8ZPrt zL|3?QIFKhITqwy3F!x!Z71dHvuG@?&Lz=TJ9ul#FP!3%X5y9|<0m8sd6$fKiQpR9ZH3jz8Eg<->d{jV#3 zuNHnOAZ4hY7Aj&hC2vv?vYE}Q;$ko*iCpj}L^f^?IBO{fJe=F{?PX;WHwWU{76TV) zPW^$wW#2x83i#NobO(@Svy8H!zn!-BvV5NxLEQL7JfuWN5@sQ4gqt18K)$1bPBc+0 zIfb4g`oGMf#`I?e*qpu>$Xx}6>lJj!Nw|QBxyP(fLFc&Omt}a!Rqt%M2L8QIu5<$m zQJkpX!#rjc zQ9o8n4|&sAj4{I0 zFO);dX9SlP@S~zqI7E7s2r?c8uDEnu(oyv$;^uf3^O(^D;q=NXVm>WXr&qly6(pfZ z+{a3Q^b4kJNiS@`877S3nQlb|Nbx!M4=NME7y9hr(aJ-)E;t4lh$K6lJdXxrt%SCM zfy=pgg`K<}`=U2zRF?e-X-!pa+bUC7*gVui8ZlnTRWp&!KryFMV=Uxs_$79%AQKS0 zBKrww6s)*+FjQpWAu8rY0|`&xWRq|fBFP|lDU^w#uh30m2QrpOS!L;eJB^Rfs48 zTcFwk)fT9>K(z&`El_QNY710bpxOe}7O1vBwFRmzP;G&K)dFhj__U++telF0i<9)& zm9Ei8pWaf}*OwnfQB#vD!rxZuvzJY%n>t!bA_M~$3;6Y1Pr)T?1%0~&8x{;&-Doc0)7d%&^4HWpV;sK8ks~QiX(mI9ZZk4x3Z^#nD&Do^KF)bW%UbPd(y2L+g2RG59VBrTd;p^6$jP@tx-uZea6 zT=2w^uDYP|AubT0=jdi@!fm7PX2-0dZdcb;M{YDmGj&q7ps9zenWUE8GIE*Nk`X17 z@McjrG}Wai9MtvEJYW}RXi61rQz*BzHa5klGA!_-ZEDPPfB-`$S93j};hpN(u}8rL z!Mrx0(4+@dp^SosD?}=~I@rwwEkg?jCo&*u5?GfJ%<(~4=mw^yQX^wl%aj*u@`QPP zd;m!k>pRr4r$SS-zI;DA7G`>8c2-cLik(!;!kpW{(AyrKnzdE3h@gjkPlIW+u-Dfz zbHt}&)L5_Mcc9BqO7fu`gSH9X3+iR%MqY?Z*AZ;w(14ziGO>ieH4xM_)yc%_#@5Cf zIu=&It!DuR3VCKV!E#pa%D7bV`7?DLvX`x+uBtBC-v}R%p>Bea^gN2H()dH;Vi^SL zUl41W7^w*`q+tx&*LT7ab8A#(V5CL3G%c-lRn+wjfo5u>r=|kk4N6tj1a(Viu!Frj zR*{3cx;}r#Py@!7hOw!(pkn06aOo?z8`MxG`Mjz+vyveklUf~ux2w5_p?nKa#K>uYEk8>tJL4tOiPp-`y03qBJK?P3T<({*vt6ErQI zm@Tw#IiUqWuy1ps2zq*ly z)K~RQ^^6g>zBTqJEyc8@;>`@GZvj#ad}l?Ya;H{Y8ofzYV1yxrz?P_DgoA`L> zL0cN~p~hLT_fpWo`ve?Po?_^~x~67qM5}2V8xyy#qa%o;0Q_2LBo$3fRp|RB`t<5? zJ{X{X8F_od0Mpiki>@n;7pf*b_eh<`%+XW>79+!|eaBFmd{uPxbkqO`*T^E6KXmnt zjftplYH7*(fU&(aLjxBLJ?O|ef0rXKI zs98Fj)2Pxm&}Z{5a(gzcLsjsiFpZX-BVzQdEzJNjkES{H8XoB8r3}Q>>A4a04Rv+l z<{@acn)7u7pt#ibp~`U4QOq{)J?`E_i3CwXf$}#H2x46!cb>$RAki#(!vLw19AKngHa zj4Ui+22k~`=Wc6ZK@AOq*vd)|?g3m3OHJMe8V)ipR44^C1H69=XD3`sgesi~@;V?- z4Vn|=@s986#@o!&2`6GJnEQBpVs&#za}7bm$O4!R-5l^%jcU6an7Dh;90lD++tMBk zVeD#apf0Vw=3cyjV|Tc52*#3U334$HIE?_Cx`T^@j;aAj zvM}IX&dS393ibH{jl5DYvA1_b>#FEG+UsiLjOykIVt``mgcFI5jVG9rFU0ER4p2xz zQ)(@BBXhnssTz6KruJc82C?zcp<<3B!^Fx`N6#Lh8tQQu$Hd7-2fL;YErilA#hMnj zHcqsjn0mXJ7#qXAJ9w(mZQ6LCTWIHjeh;e$uitfY#Y)OS&xMa`;HuF`Ufh&fpb1@_ zQN6a2mZ39>Gr&#_LDk5F=6eI?ph{~Wl$cxDV&{gNaj~&>0e4@psYQ*U;^YE!jhbHA zTXc-k{Lm~mY)!;855_*V*+eaZiq|J)zY_uTh}zfzJr0!_qwjo zu?2nRE2to=k*Pge7OEH-*m>h{vh^V+3Qa7ifCr6*(k4p7_=73kxV9&!so6L3;Z2Hl zq@be%7z}|2?o8KMAC;+@T41L{4%l~KLm6KZB|=#V?oJfUl4 z1(P4_(8!pUP8Q@l>KoIXi?yYo?qF-qN4-sLPi{J?5MymU238-+44ur_;)Y<@{0%su zx}iJ4V@9QrpA~91m3(Dk$5)%$(d1Yby=`i9G0nWm(e5 ztrj*1Xre|JbZA_v>X?&dtU-g3E8n~#gBJNMU_|V4&a5U*^aggglDjK?Gh=Yw6)pISb;WO zm?3ZqrE)bLHm3DFppmWko={LXFf-K!9Ju3o2a7QYprds4jYJ(dA($Ry& zZ4pfPkkhw!CXGrL6B8XH>6x3|swwQSYbuBGJ zGec@L;-FiBwxCHs(8M8#oQB?P`sGqowE?iY16(j@n7M-GK$01v6%6b60zB|`A){hy zWn*Uo+CYPbm8-q24Gt46902z9-QlVn)D0CQ0XU!UhbcZe;82W2Hg8Q^mxmwh2^GGpyALdWm^6Z{(JPwhs63h@xu9{VK}ITOG#D zJrdcUQ8zHqGN8NHvbWRGkgRUfxDEIX#!i+nCSYCCFe4|>*vv>vRmZ}t$yG7* zs0pJ%*VNjpJ^*)WT}#G<2@P<&l?a;pwAwL>;#7j}kb7{l|EFnbtE)~MO(QGVD@jsx z>bLPV65z-O2;eWmwh}dH7@8XDnAur7xR~jZTn%&A8Zfh1xp{ix1XH_~-9Kd1va7P>s=GGwvEySJaKZ7DEza6-CSd63sHdg{$aKJu(sguo@TlqO>0(1& zr&g=_C`$@Drd2&BW&o*_#X4GA(7KuyHf*(G{mTkA{h$&Jr6JFL#INaCuZ|ZxpArmU z9S6S^?PXxBt7`_^JT(g^3lk?tJ#AAHeTQaEJ)CUxadLL`^rYJSqK(s}Qeg7}CvfAL zSm5lcZsUUIHPZa-?5YdfX!7b*X|&2h)UE2&uf_h5OV!Xw1CDu|an{z8v;wN>z{pc^fGv+Y zo&e}D7eXOK#lWMU8|wlZM#kD&rgmm{>-tt6HC#xLrKhK*o|%&acyOw=K5e=-_JSow zMU!S6E{30zDo&BAz|>)^y7qMHXzEteS?b?9hNiePL+j$;sxn#x4T~E5tN}Nba12c| zh^e!09Tz*qYv}5!Cz!Z-d07Z5wOciBUbhDJPx9F~ENv%rWnnlgPZd%b#B5cDHZU<# z71S-oiJ&sa5@mbI*C*v~|oZO>u;B#Sj_0T2hCqGF9+mocYyhyaBr zV@!mhg#l@{`wM$%yYRKRerrIZg69s_-u5_6$Tvqj4I`fq++68ESrv{4wBTq(7tSxh zpK`Ud^6<1Z(zh@YtQ)%s4hh4c$sJs@z@J8MhX+k94QfXmCLaqrKDc%ucxoBX6b^PLPL2_o4BBt`X1qHH_-|m{_;&;iYR|+frTM(n=Ny?!!mg4<*A%F$EnL z>`WN+w6wt8)YLbzv8ST|xuR(;4=}lnH@BCrG*s|7gIWRm81#HC3rANwGe;+L=ejlB zU9HWGG|d|L7}+*z)!u)A7btMB=O#dH^5Nin?I_J9iY>XJp&raB&?c6)_BdyfpNS&$ zJ!-+wv8u^$K>n#l%gjic*l-qsHOtz=#nQ#a+_h0XFE@J|OB45wjf`z-HA&jA%mw6_ zc(|iD7EGn_COLeZuEFn`Iv>$s4`ZaOp>1SnV1rvOL-`j94GR|#Vop;!a!Plo3f)Qu z4tSnMqv*{y-dIujVAn?KYqUgRDGS&L7$lbp zT3izjc+JdBbhXWJb8qD?&9GnyK#%L-dqL^^T18bw%i7u2(pcZZ)}y^&doL5a_7SNC z6LNca>g$JnBYl}bA;80vr{O1sDuGS(bRs0v_3diZuro6@GzR-vI7ql)j#3qH9hCWa zLTL?9*H+hYsOe#?t7%}g;)?sS9iC)}-0KiXc9bR;97#Ht?+DXzOAHh`2|s%p`l)jHtLTSf|vrH4I#9 zd3)NK*?2ea8Qj~~+Ppz{R>r913lqCp3HD>hzBl7=-IR}^&a?@#@5r#7=F5YZ~sVabetlKlN&;V9R}EX`84n{wXWaLvrg;w zO?}&SYg)H?@QBQTeVg0)_Umq?{Ogb`(nuj^CC$I8s7j$@5R-3PU+(`isZ z$F?o%)^FFoZGGove$85UQmXT}k_Mig!~Jpj#wpv=-P*~~n5N^u*(Y>tT zi@U9F(y23-PLHV;mV?&fP}S-*E+fRA9IwOaahfkSE)Eej`S zcSj>5BOQHoq~(@^^j~2Fx{jW{sf&-Lrk%5eosFTXL!B0FdUmPTIy5O^Kx;>nhQYBx zFo){a?(NS{y8d6N;ABG)JQ}xY+WZY!)j$ZBta0-Acv98>+0kQJ)8;Y7R<-%qY zdY)Ya4_$rmxxk-rY0?oUibkz__zfBy*0qU$q<=$nnH>B&DC8+gsG8JNp2Xa{bn~?} z)>46=axEaY;m_19n#K*0T}b zv8^_o0)W2^zntK)q~w<&kG4|{HjhvRP56C75mfM`Rn5-VT~Eu%+{VVbR>uwt28F@K%O6|VP~+%GXST0wnnbru5dcw79CdCvtvk5 zOmSM9Mgt;i8#vc!798AA$;AA3l#lmVkR=#6!^OmjEtnQ{Ynd5gUbW1w53p-qD6xQ2?-$~?yi?yiH+}&MB0tpF8 zaCdhp?(PuWp-`-}SX=7y{xiGpy}L`ouT;OE&;N7#fw!|Wv$OMNZFb*f7oaY^Xw&I1 zbWo3h_>-u%9ow|wezgNVksHKXcjFyQn)zdOQnMd^Of&b_)93`BxCf3NIm9+XJZhMp zrEOq_x7rYKnB^c11M{%rNIjuYW(iupM$_k zmuvoQKY{khS0r$^(InlgRbMh#_Se%J-2Q|1y@w7NYT@r^uRGe@Qp+;ZYm~l^`-rZ6 zM(K+~-0g-4B@-39a}7md@D><2pcPpv_6Ae+&?4skp#KPdzpE|&3WVR>P-r8qhK(T_ zk+TQejn*GFsDIBs>S`UjTL*f%7>%@YQr8Lzv9OjnX>{w*ZJ>_cXw%UjwZVo60X#*f z!(dLYF0@uDrKKkp(}rxvFdq&a$-7#xZ_x%8l{ABV$OHf1zzSc7Y~*~Rulq^w5n9@s z-I&%UzCONYgRLYwTE4;7#x5S_{oAzdrPixsw?4i5a3&kj_M^dL#&SKpwZh`9CAIL7 zN?2vXZV5E1?rI7nLE9emdy#UkwpzBR7j~-mh8pShc(0d^{;&>AYl~p1)O?Uth>NMK z+t}eYf%bjDlO2rWy7e2ui}+B}9JW+Qc%L>KZFpHz%nwd;rLpjKQbCJnt^4}}?tXKvk{8<*ejG+4d&0KL&2U~8!R z;2|SL5uwf+Bdvqu{9Ht)8n#la9_@Oo;g=A3A3}JH327&@Ru+OEuLm+O@BcIwKK{}yM0$(6WBj&)muwj*G3ZNG)UbpGF9s0=3;K+ zYCfzNHHkTH2~}Tc}oUl%-gu=P;Kb zhZ|-?h$bBG$;V>)+4AFoAhs>{3&|$T zqZ22^X!KGa(xG+dzFPW2dXAB%2My_L9Fyj$-bX*cUvK!>QN22R(57jvxfTiU&I#<$ zcW~c+H9jm;v;s13L8@q3<3NkzJW-+r4!!Q|r;XTHQK+qOJzx zT&wQ#s`~(TUYmyQJ=%QO89tBC)azLy1`;8s~_Ww{#zJi1^#85Fm@{=6CMC=zG2Es^4 zCq^FKG`5w3DnWY+DgJF6v}0q6!ZdoAg?Z{|8fcO+sk?1jMRi5GzmvU#v*7^zW(m$2 z2x-${C|`sX==D1!AmaQ2o&vvl0FcfBg7Y8{5wM@^1JCK~H{67wM1*(ZW>EbAKfB@m zt;4-^H4OFo;I!0FoHeepdSXdIj+gEzREX&ed!X;@e8>o{(r=&;3j~ZXZZW(!M#-8y zPf`U!^Vo0kE#)nPCzDUskg)dA*BGI0F0masOj~m({<)vky}z@mv9X1X zZ{ajNH~z=XXYWXXSD&2>&#?JoAs;WwLlaRb8pAV3fk6N?5@*1r6*#oF+pErDEs?na z6QB{oEh#q0CbE9XYPc^YiRS7&(#S5fsCePe!2bD(JdztTVs>rHf(Z&?c@U6BLLE)Z zpYJUtc#cVj5G?8thag@cM(8UDLFpl`+)eMTILVp~v3y~r-_U`% za2}}>6)&(T=*5G&)MRk3BNzkmaicAw5E3ezImt_e7>I`H*Rc~6d5{lJ4~BqUg<+xR z@p@6s%7`vV!vGg@a|~QW(W?lS9Apd#NDwr{E|e}jm2y#5;2l4u?`77EF!|s;M+~!# z2t+Yj*kd4NjyDwX0n$`E9>JXY9dlGsUd%@V;vP=KCnQs7MBb;VXGM?-*8|fmzgNHu z6G5i@bV)BF0NiRELAV#igK{Zo4(uOrFv&;-qY%OaJ@6&9qKw?;3hodR0_C!3F7o8g zcm}#KfT57{TUi*jsb0OX_Z~p*!m>Gm&5TX#0 z7rque3lMtd79GZQIR->t?S#kE{suSKl5vdbfVsIBnnhqfh1_rtm_TnpLP;Gp1<(bOt5SY!o)do8wrjAir6Yi9Lst0;N14 zDa|t+hDhTCy&;!13A`K!wm|TbL|Qbx0LKwL&RQQP4W9*v*;|mF|7+2C7_s`E#9l69 zpy?wM{-@G-4OYbij-li0)B+w0s7<^cbfUtH0`>4CB8W)(0P{m2E#f8{3WFqddX}?Z zP=o9M`dbWGp7`25eir$%mA=Cp}-M^y!E^q z!%SeW0+nHIUIrxTH&|<0P-G`e)1nc=P>8BfeVNW7#VorUkfUH`=(hJGM!onE+;K(t zI^t-v03GPSUl$S#++zL%7zTBNKxE)8s1wkBWiZh&wz#zF!7z<`%bryXHUj%iW9C3n zjzkS9SFBJxuZAtw408@nD0+4)b zasY2G|5{f}|b!DGg_625`N@bL>5&#;u!x{{EdPA!p zDrJ~dY`~O*2O6*tisNIddp9`&b9CtZMG+KGfiRzgtR>meh+=#IWr zwdEn+NlFj|50Cw?jYtFXWD0SZ1Y+B*bgtrjA)0}f(Nr(eSK+0(`_55jwW~MVjnQPS zFo6_^&XMOsfEF=~X=+j4tS?L;K^p8j1Q^F$%E{t&k2GMOEffogQ1}l6R2WL^PZ)R- zVmQ434;P6d6&~(Nga9^wIv{0)YQa8xh5*fn&|@{macDd6p#koff;kTmAv{m#BEcz# zyVF@#lV$l42UtELFN%2?xTu0t28FOJX*Pfc5S|kV4j*W13~)gfa!_= zWkwUOfNOsJ2h#LM5D|#|3V5c-i-8mbqBxi;?*HHAIpWyc)hq=z)?J8gY77$ zFUzhWLJ=1RP&g@@7fR)FDTw3+pi_ki#GD{#002{s4@*j;7@CRW?hZI;&q!gia%ii< z8zRX(wx0bQt`1~f;i9_ItL#0(#OH>!C8TM2NB{@}g7>*tK(K7o1{|pPyf6=g??t&d zUXmX(Gm|jeMFMhH$>+i(^APVu140Mf8S7e;`%!)|Se*GBP{#@H(jRK<;9mC{W(L)Z zN}zfX1_A~mhy_4EK;H=C7&6fiF7_1Od`E{Sir|&T*>wm(LB0hPU>J`(aVY0kNY|eP z<(L36E};B%zF47562iO?4uAr7!G9WfB3&(z)Xkq?!=S))HpYr7fx|5X;G<6p=s^igz@erVKn0&%W(s&mWhJ$?_I3nzhaeAS*1p$(K!5;u9nr~i! z`xyx4gwE(60v4G124ha;01R8Xdv+;rv!^~};E~#3OcF?;0KrVe)0jr22q^#{i4!9R zGU>Km5P*RZo>wK8 zeZ5miAg>v)T;-a(I^5?z2FD`E*N(w~2L4L0GH)G9dHExeC4c-xMUE$2Ej}Cvk<}E^ zaN`svf0iZdda4&xcpS?$7AP=?`GSR}tQa;tvf`3`w3aP*1Pj2baye1o1r+Js;T!fY zPEHt((g^M!;i{SBUc`+iU5M$pfIUpIGrKtoM+cTlRCYFFi0mDp z!DGl}k4L~vN5V_Y0c%XS1P~iLpMM{;1@_z&4T{->KD0vknjc^U-00YNt<79!rk_V| zO6Yr!-H3O#ka-txLI(Ll7AL?vk&%Z-^_h#TG+-_aOej9^;vA&F6~HVkge(Ds1v|gS zqdB%eWZ5Gp)S>{;fP{JMS6)FBkI!#GK1ZQ1%%$4H&_=2rcswWr zvO`$KSqfnwz+Qp78z&+013?tStYV+%;Al(3hr`K8X#`qDj>fxHaLi~<45!yP?V6Ys6!UWAuVV{oQeT?JGZQ^!x>)17Hr zagU)y5TPxY8>J;MJz&q4VH4p>ATdWIIcPavry+r_@20@W?!X;Rf4~feYgT`8f`qWQ z%!y!#=xN465`aFCYxK|$nE?S&Ivop0A%r~R^m-J*g=N3rKV-+0lH%~y`4^rKbWA-U zd^kuD4I#>+LmNOpVwK`J&DWea?n}u7P#{c`BVfq^e?|g5r$=vfn*iBphIB;w>aLs5 zuf+G{g4Z+lK{8bVFerx-CUYs=LpEb9Il?{mIfrM*GO(S|j-=p0!UQ^!(FEoocP?^7 z8ui8l2HFhNv6HwCVOo7hcqnD=y&X}CtS8x@rik0{8Udr6h`GEViMWo1NVtg)Fcc8< z>{#jqW(qk3xtI~ck8)8%p*_J*1jgFp63@stBE%A6%7p|7+t{UYX9hf^S?ue>)5nmT z{%E0;4WA?*bR14s7!&<@mKDFhV3%^7U&wO^xfEYdAP*U-38w}03`~^v$C`RZ1;Y5pfB5MFP{p%rOQ*-dfYL)*F<3COtOH)? z%Yy&eV>jmVQLqoDNkE^HgmF9u_6XFO4+mqtkkG~u9FevI`Wht0O8jBNXXp#U&krg+ zJ&YY>5<`4b{DXW5I31MmT zYeA|5a2gK7DWz*fQFNFgGYHCQOLGW5v{#C)f4mj$gtgZ}OEJALw{+i*?YkFFg+c7S zF_-}fBz#B`;K^N`3u(j$^rsZ=NyXz+XmvmtB=a95u=y{a2#pB}>}XtC7iIquX*x8Y zz%wjF;+2&`YOxRg?J>lLcAoUvcA%GupZuV3{$47{#Ud-ggUI29%Iubkt2%ta1 zOj|X1=e09C!w{aMs25gX>TBW40@7&^K6&Gv^z!gtNQU)bwNY&ehMKlu0uGiIB~i{! z<{b#$lf2OV3qrPz*5MWeE2dg1yxsmA7furhRLU?ZB~4ZK9%G0KU&2xGQ)c zz`!uq1WQ3vkq3Rxu|o_kVXS(e;wqQmi?$xwzhi5Sge=FQgV7-H5su`x<4S?1-mhCv zqv*-?WXNmZqZjSw#yDm;%$U1i%H+lu1pxQ#Sc$pip$+3GPe5%70aoWqVVKE)k2=cT zzCk-L$&fzON$TetptUAK?IR%Slr*h2z2xAF z<=vSsI@m+s5(JeLnjo^e>kS?OE&|Ts7@)fP==N2o2MzT1PEYr<8O?i$z9{2MhOMWp z@ONoy9T@eTnX_{c(YYW0UKW*Tb#YxyTHdmY`^3ygwnMobO`?tpN-6%q*qC7>#@LS@ zsKr$t=F&H`T)dT3alfG>UDC%zjp@h>f81d50mD-3>gSqiNLbKfoGbCdam^O3m%5CSDq7%|2bXL^q z7X_kYsBh2rtX=vHu(MRaGETn;8@_H?VGdSe$taFInn+Sqi?JO!)L2V9eqN;=^7I(c zmT5hJG%&}DV*#OMY+bNz@6N_U5wt6Ff*?{j1EW#(<$tQK8mqgVecfy_>5w!n;p9eXp?MN*BVT-v6Uk9fy;l%k!lVpt|c0vYN8* z$jz%Yia)?q)M+LsoJK?Y>DZb(I1l@%8_ad|XhbBmb4||*4~}(5*fC7M%P22*GKgcE z8QxqiM#t%6Mkl_aJen*9I}Ga8r9(S9PB_0w!%Q|BeY|F#d479Af(zdm6t!YakckhU z__>cBJ*s0r?e-t^;c=wB`mR|uAq?ARzu}`JN}%QI(bJresb_PwI*m2ZZ1cWZ`TTNs z(hYm|Y%d%J*!OZN=90Lo4URhg=HlX66~i$NK|!xtih*am6uOD2L?3>j2uJS-Xr^sU zBD4ViLVJ^7aG2VNWRAi4FdKE_Uy|vstQHwk^rZ0jQ198+ubyB}Bx!=HO&S-bmX+Ys z(_iNk_Z?66FFkbz>lFj1mp4t}bBK9hWwzCzVcLV*GF{ZCjzdIFaaVJVavU=Z+LRC|ht?tXg+9Rz3Jwx^OCv(G+w_FD=feRMwop)u{;;oG zMv-;rHiP`DYa)gqqoG8D=ZDyzEd7RS4;^eFhis!GrLxbJM_B#Z*#13TJqE%b z9rME^Ft&mJy_^=aEUoMQ4)*o|5uSGTkXQak>139N0g4~D=(sX0F|>cHes)>YvY_fA z*t;tZofc$Z?mM94$NB|_j?Z0l|4IYll0Qbom|;Se-jx~ctp(7>cnG6Hu*=)9uNo#I zpy(QD8GAYn_*ffX9i18y@QWU)M0h}mBJvn@XZZ7*0oL(F5?TSKCe-yJJj_U9JUCqI z19kt&^HajBW>-^!>iO3!VEF~sLX)5jv5xhQJe6e|ngwh=SFJqMT zYOyem1-`;SnAjrAzk@Wt`kIvaHtB>YJl@9jKft>PN&)acDF5w6Dk%1LrhkD_*whXq zn-)Sfd)kz*Dce7lj!6hYQKVF-)$(s@vStN{|I<4Bl4XlOp?j3i_b17fF_nV?^k3OZ zC=mDP6bmIuB@Cl+fMIHI&ipR}(PNQNBCi&S1uXng-j8IBpn&;zL8`~^d?89|L4wC~ ztx%r)H=z~2xNok0?$KwFz!GAYSC906$`$9pOXjUE~zifv64-hQ7o-YOUV%Z{0vVtpyit!Bp zo7fM&T?y@7t^z8?{TZ7a`QJhCT{4=KCj1DUD;s7{44AUp&Efv#h-DY^CGbV_X@&R~ zq%E$ltbcAh`FH5_HU&~Va{CM-f3%o8w13@#sqyA6y;#Iw@gdr(r>HV_Z4y|%esbJl+l6U7^gFCNn%zxdv~NuRSU9uvkY z9lEZg3)G&)7k2mi2eoJ}3d`jJC1PSBfFnGkQzT=wZLQTPo`3e$1gLv(g(r{rS7LGt z9Iu!V|8Y^n2LCb7-{A1#RWT-)%wb9{?US&Ql7KGqbUVdE{5y9XU|YBsPFMR`;UrKw z@4G;Kd%+bS7(S#C|GdO21#zVlJ=>S6r9hB_J7Z0SD;D6I&6{!xduj|X^UuUfpZ){~ zRvoHJ2{!N7u3g`;y5!oJ_vCW7qO@V;YCI}ePEv%{MT`QnrWmuE@TX8whE+7*pL?$U z8Z-a2KEc^^tiH92o0n(^2w21y*<2V;X%b(n2^?WocXCr;_jbB@hrb2t_fK!0Cda=C zha)x@_IAwVV;9byIJ}`cTH@|xW3JV`Q~#lG#RFex^L#dEkajy>PVu|UbF=KL&516q z($umPsG1AwjS)O-5ILG2dT$u@iy`rB89^!OH`Z{ zD230d%=foamO=h-1KuuPGQY3KpR38Cnr0E4cv83{Jmtl)41YR!fpAW%V}YP$CxMfo1UZs{ z0?Xe3IB|T324mnFdlsTED2mnbfIILS04pGeRby*!8fI-XM-(m-=pDFC8Qv*v3E4^D zxgkM7^ih*{#HDDTNa-Q&uLqW|2pnOgr>_AV@Y_-%9)B+%5lGR}Nz`i}zoPXL@=1|*A) zlJJ(K6yQpS+g6!auf+b4J*1dJuI6b)^t-}TFUn0}d{4R1mgLZT*je^G1E)7DGn^X9 zw7bG?>4_5FGNCJHc$XB_i(7Yzm=AAP@VTl#Y)y8gvNW=N@RkjcQX*lYa)!T}QqTrKH9EM;g?-FB*E|ZCej+f89qo7|AG@Kn zlYHuL#3%>D0kRCi&2CgoBA0liNjqYEsYe!0^Raa7I8x5#a7Hu-3xHhOyF#3Bl(a%F zhCflBr;@m#C{$6ZKm$*R!ap7uUys{NCwR6xUO^u5uCXASWRP#b&~Fw36#?Y1P>`!c zR7xl?{UtDuO2e0h{5|Z4D@geYQHmd~a7$PzU}oM>R1iLY$mW>_@W7vrX|Y^3Qc8D0 z#^f;dIrA8X{Y+4oQ=2l$NY4GhN2Vx9{pH(t zSoxLPEc=|4tU;s;Mue205CtfP#}l%^L$M(7x29Wf98)mY6x?F&@vEs8CBpeVRq%bd+Khs*n}>@AWN z<+cP{gwn#A3rFtHmFy^RDwjpjz?{kQQ>sEiEz4D4cnjEelu#Psz!cB=4ZsTC`9Xn` z03R7~oFoN#P_+)B?E4B3P+d=V$Z4u}BqEL^O35s;wQGFZBk zeU+#iX#p$gk32Xj?;Lef-;YJyAQ@UH`E~&;xkxxcoK|E$g=qvZs8cGzOd>G8k~1@t zP**ma>jn1z8|XeTv~Jm2KP4t25g+pt4`nD(AOUEK;<-2mKFJ>6LOBp|V=dccY0y;M zpGhZ>G9X7H%eGd^ct~Ha7_SI2P`HjpmyS%QncBvO{UX2nPf5_9PW%kkIEC^t=Xc!2 zkb;!+gNP0|ifJQLHK8B`fxvU&^1~SYzFdybUpe6KM!a|;C!70(HR|?vSnf2jANc=SIX`~4i8`T4aJkJ8c@;oIkIJ5mnS3qXyX)r|T9zHF*H%bGKawfC34 zyX){W6=JwNou)cyPdc|XJyYScrzy1eO+fsSWd%n<$hn!*YYxR^un`2%5E%XhP$2Vu zJGah9%%I1-5imX@=jC-5FgR%?5oX?4Da{t*0`ngNYqoC|y`AZ!zI{gd;no@#=vPI` z&*s4`NsAe#Bi*CIjsgT^T&3{)EAZeRzg_;@DJNkQRqm8D<73HkALct6KG#*`fbV)> z5sz9$9N|03{!&DdB&kS$S9lHNJY(`!?uF!vSZGc33R~|3>{NkwGW_mr@7J6)?7t>> zU!n*+9(IJZJxvKY{sfSJ{5tz=zP~na`mRm#9wDNY%*1o#)K5VoTBa)k5O+*PfP}?! zU-KZJ;m^d0^VVK^^2N#e0R16-dK*;Tza?#w8-+L7`RjLj!z92mj29`HHY(bM2SS_g zbd{jxk0uh|DM@ob-!;21Ho0u}{tHVz`wJ@t0~|96gYqaTFA<@aT#^*WreXAzG6$jj z$p1*Wa?#O)Td`Bh zDK|mhp8znu(afenN4OLxE_{33yI%5&B?YHO3vW*YR{YKq$}99!hUuel@_a1@H4^+6o?6(kNKi`QINLhaep%Yg_qHCywC5GUIU?4$-W=` zEjbiG+%di&K`9Cd$%g>t|0UlGB}3-_Bg`IIgyLV5&W?PvoUtSerx?H#LL@+apRFfp z*l}0*5+{LDiQ9$XG+}*C$?GuUB>Xc`Z$*LK z7~Io`a8Cn$y&X^ljKHMH?SyzUq0z^dU3XA{V+trt|Jl%Q#7Ol4BOF2_{hds7$LQGw zh6lLlw(mg1&LhGG{K~S&hQ!fw5CafbibB~X;xhhmGkAco3C-QIc$I3M2=se0JIo01-ty`B4 zt=gF6uH{84#!7kmQd4rR3`iewtYj~Z*t44@pvZRqU)8HsyDr_jby1kA&F1CwR8)z; zR4;yM_=cq;fC7nAoF%(=g?wGNVfndC>BasqVHcTFHe+&DfUAR*shPRnu%W#K`At+q zJfJAqd-r`@sc~v$1G##-Ahw;-3KaV$L7$UpjT*5PswT<(n_A>|I)Qg2;Qz zV;~dl2PI@Gku7MM6nBkfLxr{QU$fVrcm-q~_KKa-EDI>p45vbO7aT}(<6gC>_jmIB?Vb& zX;}p&MJYb!x?{%ZnmBq(BNI|mF{EYX=Vzrv2l^+JSC~cJ;veW2AoX*15xI+8L}KIv#^}VDkiZ~+cPCfC zh+Q2;L2)Um${yxFcA@;f^ zUZG*Y8{+F?gFN^$Znnn8W(Jy?z-VP@YG7b$i4@GQsbykkXKA3J27jq%<|Hy7Gjg;s zCEAKZV20bBw;JmA#9Lotc55rM;~g=H}nf%F@il zjJy!Ox4E-#h}6wkTU%$W&L}NoD+_aL(1H_Z>MX{WAPS;t+uSn!8iwO0$ zvvP0->w_2?3!A}<8Y6qegE54zeI0FNv?WtF66vXsE=|23xA>sG%B0PP~F{F783WZpP40uxaV(X{eJn zi&|SdO2WdN8OF%Y+0NO+#bAV%sk21lWTY{4`0)PSx((3O)irboj7v$54uB4%GuqHi z>hCV{^0YO^)`c&RQsnIxdKVy}+HCAa5@|$qWSG?5!i2A6qCeL#ZJb1o7SJv^V_QSx z#m)n0Cu@^2BgfcCg5fVOJYrMgle5!(#%dVYy13Yy7~49V4eilOUEj<|-y9pOo12>x zSW(x&&L=7*Gdn%T&sKW~3!;hO|^j|aWo)-yux4&Lt9kd z68o=}D-K;L3DMCB(Ch4=&srMknYspqM|m=gPf}KDPGM%KnhB6!I$T3~sxcSu5l)YVFVxZ02rqm7_ud8Kf?-L#>@d$`c#<{noAU!g` z&jbDv&m<%wG0ZrzDA;?P%9S8b`i>;|cSXO4Fua7UC$6)@8kk}aLS-QJh8JnAG zkJPbrhSqAXI~=A}GV0qvV@VA2@(N4h>J`i&cDA&H0U#_YDI+m7s$f!8N>EUAVwA7D zUtDH-Oh{x(jM&uBQ5GL54Fs!^xfgnwy|tMp!{|u@!lk~lw4$OknYRbdUDo2T_%ILG zz|{PVxH!^!VBUv$)!p7u-#J9;?&crpDfS5pj|lW~v$JxKNKPd)xs7XJc%Zu{=!ML* zZj!(}Vh?XwT47;+arM+mS)rjx+3CUdHezp)C?GyFGdbMXH^|>7BtABX%>PhP?ha;# z@Mj2C{-H6Mx#_9dIZ08m$;q*iu_;*v#d(RLve?w*$e@tq!cquDX<;Gp>G6SX4h|xp z(72@3wA8F3z>lk{tr~}9nHP+u?q2>lqr>>+9T*K_N=mQ}zz|VM*~NKrLH?l$>FLSw zp&m}ojt<_@@nPY_OOXU6hVs&~^0K^)%Uk7tbTdrAS-KS+2mt@99 zCnUwi#74@31N=ggipGyG$;r;oOHa)yE-Npum^8gMFNJ!F^lh+CC4S(e`AL19Oif%u zlhYGJrJ=GwKi_~LiO4-5Iw9OII4K#L5DCNsba#Y5L(s6WwlUP!8ZoF}pMHb2Z9JhD z!Z>7M2>z%IHZ(em!T4iiJXYV_-oegU5|&n0nG@mb=`D?bNzEHv!r;)v)R+LV*e}rC z!W0Kip__V$MKDgexw(iW5^xTkoh6~+Qi+?$+tS$|e8$$a}0z=Z-)xuz8|DK;{KreI;jL#`8 zFG$7#%iqf##JRh9hD1q4rr@Z+c(1N)<$xd^=WU$aJ=~og?95GU-8|f^bw-;K7sFO0 zwj4dy-qX|B4*PMatC^9hgO3cx7nt>oG~tKPt`Tuj-ZnO3U;mJZ=!E3N*l<7i1A(!Y zVsAGw8S~`6pNXY+Y({oQVnl#93~ugj;BlMVIXH`)Y+-OFeg{l)U@8x9bSz20Nr8^- z&Mw}e;lu%lfzQR&#o5}(%G1Zq-p0|(*Ug$vU+4?_1V}{I&>sz4a9s5C364pK3PG>i zGcY^XzpAs1o9vSTAV2ysTt>I{x9Mf_O zb2GAXlf+|&>)C-H>g*F380do@h=!iAtzTH6hl7csgTJ4HnNu)l8C)EMM@NPR`FM&& zBEPuw#E4*TXHh_i#L3AgA~MJi9FK5cdt-R(#3(;6X+J;p>GfCZIYy+Cc*&-k_B=)OKKyEI{Olj*Yd0*oTON#f_~fG_$B6GdU%vFgrHD z(=Wso{xZlK67{QwXxsW>|F8zpz5zZS#9tudJkT3(G95KoP2Jd! zjBiO{o^IfTCM3m#hDFMJuuDU*arN_cvb3bmgubD%894mVPuv_##u7gg-B>cudIrJR zpCq-@(=~z>gM%~p(dt^->KcYtX4)DCqKK@5v@j{Yk*m9t!H}V5fhlR>-fnL0Ztjwx zV7k<@ws-TB`gwTy`{J_APLfFGgTh2u-FeB<@{$6?o_>CAq!R63X!vZ7$k*QfRB4YzHdyl|iKd~Ei8K8gQ+7q1X^!V81bh2QU51(FQlxtw+;uRF) zl_2#IpsX+1c2bkJZ+Y6yXlQl97+kr5;$b3a^L2;>>IeCR- zo+(U}*wGpbic5kFdS=jXlVih!Jl zqXeHIE)GTDE)JysGjEWv~xm4t*H>Jb#IQ2+X(8ZOPi+-O0fV2DUItX@|C#S+rmP`!zWNGi@~WK@)TsHXQZd37mO<}ub#GW#p*Tlk{HH0F0HU2D;fN2 z=yIe$eoBK0G!?w>NGW;;;LH&3g-&Di3EW9>;B=x*z)?@n8&_2`v9hcvBQ7yFKQ%Ty z$Uh)BDiJ&wGV6(9(s7f7M~6bY@Bk+gj6mNnEj=kJDkd^EcVff1)bt7S*KAx4CpE|> zG|Z^UPm1vO^ot@cV{uMOQYx%3Vr9Xy(6ETG0O}Y)U$@0|ANtL56*<>VGmXsAzNn2>Q*r7594UebgdtOy|xubQ|6;G>YW8VsJ;KR|idQ^H1%>VeN! zR9c*!08W#yw=}ghKT-lk!8>MGPMJBmx^jF?-Ne#NSX#h75bP;|PZyhzT|BOO^5pT^ zu)qMHTNWOdmXTF4Z{3C!vnN%SK~pHmh4$s23kFJv�a~Vk+Qg4aQ{+L3diMVWTeFgLlXrfLdzLfJ8fEh&E!R^ zmd>mNaN@X<;);gp(`GGLFk@nAc6=BN_VJlHrIY6`m{FCJRJ35j>iKo$;AmDCBu2w{ z@1F}k=fp{KS1y}Ap$zpZ&dtn0-!&226Z&CTk+`yV_X>!Ji}Zo@vs++nJglpr{KV+s zFfbT4cWA-j)doXLq)s8eU)i{c)23GDfaf_CUV7ZbsZ;8v&7D5JFh2|Q^6?HzD5##X zaLK$$g-KD-F>x8il{NMCRoUUb@Hb{eubTRqGth<@iZYYaI6sZHSK<%2phx8`OGyhA z!A=7Vt=xW`e@HlPG=jGml8{$ikev-Z3_BS1wBj7-H<3QDo+lfNX`DI1;Yu$DMe|~% z!LjkOz{s@Rf(#$sVS1ivwTss*t}9INg>jYCV{#^Sdr>dgQp`$>AWi@d@woRS68Qw+ zK3$+RG7|Rn+}yohLS%1Re6sR(nn5+{|E zm1KIMUa)8h2~El=EY3>;ebVE?LxQl)kO2?2awXEFj7T48N_Ik!NVn%exAeT^V6n5a zdtgEyR#|2uaa<+fXTT&3OX{T5^jyHkM}cLX&2)54?O^4L`r`H%&=aElY)235H{3)N zlA4_a%Z2cuh=hdfxyQ~dNpiJt2u>>~$@NFQK!b7PD+==q;Ps{9L1OTf6LRyjGGHqr zA6Gf)=?OuSu=I>5KX+?AGru?(G{7S?w(^e29bZ$H5&?692TXHh2af0k?H)`?_8S~@ zhG(?HSz=ygRasGGU3EcvdQM(`WnFnjMnQIzEFmw4>Q!F9a0ztA zV(5ioupKAzi_a@al-R@g@8J_uRGu3b>?4t-=4Pd(=M`s$LV030k+Y43m4in>Ff>}+ zr=~rGugCECj1%P7b%DxXj_z9fI#q^g|cjJ*83R9HQu-)>~@0~_Wr zm&4TN;_Mw1;0Lu~3tO+S10RtHV|;X&m*rUEbKr6-t&l9QVOzpX&pL?sm|g+l(cv%v zqLFZVAf78Yzn+1S(UGxa>d4N@$tx%-Elh~6*mQVZ?f5CvC)Xy!{#RUDVq#iuaZy@$ zcv?YjW^OU`gFsjcLOZs#hjpF=_40Jo*M=1qx`^nE!9G=3Tx=w))FgK1;P%0U9~ljc z{Z!Jb+#O7ftjKZ(Hyg}hC6R(dZAk&vIJ$5SpjT9Ud{Rbk@%ZY>ag{Zbs>hcX)m?zU zqr5Psq-xgO99XBxVq%jsvh!hjh3$)kxRji{^w{t~56&0Fjs%sG67Fnb?kw?fcX9U* zBU3q6qdN|frDd?(hB-4A#_Z^bhzRuN;!@&*yu1R!B4CLG_T>jQtY+{80{zgN@R9_C z#l}P?6i!}q{O*@89^aBS-iBdQQ!9BmCz9uWBVEmN14LRWf(y)m5?EIAI>}d<@ z(xsj-w0nu1JY-Rk(Gfn5;OIytR+{k9!7D5_J}Lmd{BZpX#(6N2yQd5`pKvi63zH7+ z7(03J-%rr1wX(K@m%+)^%hky>ev})SKs>zEsY*|_v z>+j<)4Gs#4j0}a{x`YsKH#hZyn~$D5xouv}xWbb0CGkNaap^@>6S9K6 zprQM@SeuzS_(mibj4R3|yLQq*96#tFNoQ;FOqALgm^g?WOyT1MxE=oC=)B=XMYe=P zz#H(E`1ne^gOW-s(*0}=s)TW2|<3cjOFfGL0&P-2pxTb}FD$eKNz$Au8!*&H(s_`n3c-+Jp z^JY{R5jUG(2e?_2?+})x9fG475tE!69~~JI7#NaVT9OhT93u17ZVM`X+_U%CfSlR)803xLh&+5l<6+GLGgDS}egS+oPEAZI zsF{#oHfz<2h7@mW6B}m}4b#YptM=}k8K$HcsP_8i;Vm0h&6_!U`lQKAx9#1!cyeu3 zaZXmU%+1=)DeI>WM`(ug@=a47R=gua7%qscwBm6akS+~Z5yBX(h6mJkFE6iOym9;1bt@OvS58{`>4wE~CRY>}riF(k<>aNp z+*dkj_PqMs_|yqY)-T9)(Som>(N&pE?S-oCo|Id<{P50c=>cv*MY9*pp1yGNo^8u# z&t18B)6%Kc6=h`;rZ3;HY18_}^<~*fX=U}Z=FQ~S24t;KN*Fe-o?BB{zi{d7N#m=4 zfAh9opDwDctY5TZ?nG#Q$a7K|*9gLCx%?3+pQ?tE=l4tXV$2GAAjmxHu&o_xEGcGIJ|ut=+!9 zJ~=dX9v)$KZS~?+i|Qv7MSF|j`;c&b1gS;-aeQ1#dS2!D^zhisf{IBEQx|SnKeu7_ z@JHFFm&m|mHS>mCT$|7K08!*2_iA{B`1(~A6rU};Q7!8rI4MtwQIjw{KHk4(8dhv!`~io)S7U}b%2Y3-~z(Jh*q=^cvFoH?Cd2c=7VJ8#iItxO!GmbVz6vEUxl$;P);wm#*EodF%GAn>TIR zuzJ;ojY}ri)K*p2KSU#g} z#e|V(=FOS6Z297qTX(FgO^-_- zH?b@!IJ9u-{$mIBkrVuWZd{oL`(J7Kb>)$+zT+0psaXss3$+lwmg9bXas9-hBaIK9 zfA#pYYp0K#xOnNrfdin|p`%djl7(~UZoK&L-t{YI5AWHwY4w~5g(Y>1RxX+`dCI(v zd$-IeEuXt(*S3wzXVlhCUAp7IPB8GYne~+!!LHVpu%{oFkvCz%w$rztesvYj;LP8! zebdBn&(Qn~*O8y7Nt+%aDX8?#-HQ)?cysI6;r$zzuibOz=A+Lq9y)OP+NHyL_JdxB zPo6z_7~*5szxwLlt=pH5?Ag12_xkDCp<$`{#pBAyRn1;8x1wy`kvn(JAKbEH`HJPc zZa=(y?#7d6H}@}}T03?A^nx%Ck*BAfuAZc9%V&>Q!m(cUGa5>>;Y&tNn1{8lQLN&5 zPl8eiTswN{>C2~2UcI<=eD|&+=NeD#+rIDAxf2KW5qljwck{{1*RNkbzIy7^`3sHb z&YeHCds%IEOn7Y3ybUX7&zwJ}cEXH}2lsE^b@zjC$l(zTnP-8`~o>#^I< z9$h)Hb8&Tw%di1sJu~O7DQB40A9eV|Fr=hmO8u1CbLTAB^!7f$zIs9H`NzjMuU|QR z_~7AF7tSB&n%ChocV2>@{`2PtH}BlH@96ofH?JJoxO~;xWy_AdW}h`4-L-g9{em@X z7tLOI_|B`>caCmbw`#-oPd9Acvt!fN!)Hz&zx43Q^+PKwLd-REoI?{}T}y+mS4c$a zghji~o;`UFZdjdQIx^~aC2MgBME>yl+3m9@&Ro24=jPQ*=g&1>fN&m~*{N&y?%%z5 zaOdtldyieXeBsQoqm2)qeRl29FTcNfaP`>E?fcJMy>a>I&I1>3T|TyJ%cf8F9zA;a zga-TCb1{8>U3s)b0 z_43;ve){F-@4o%<*B`&Z26}A!JeaqQoJ2No=D2cb-+!c@N9pkz*t_M^y$9A$ubWhs1>X$z z>|Ql*$<9*;S1(wx>)e$yhxcq+yMFz~9s7@7{^Hx$&%gfp*VngBow#u8v%43!7Q%6# zANLs6|9xe(m=42i($=220jEzsI=XfruMZdhjE(H;t0#_~x_I->-Os=*r!QQ(^DX=H zx8MEp`!C;p4SId^!}ni3di>yt^QTT(iw}>-XFD;?MYHS`RYv%gjkB zn6`4=tW-}?_|hh-fn)pR-e;ipPxo%#xq58Z?jz@}UO98>>Z332U%PnWLgV=>_a8s{ z3f6>h5Y_XC=U0!sX3H{X77@BZV5w;PY2xqNc#mV+mc z9XfREOyh+{1-($e4?A@h?Dj#=5!xCWrhX||F%lPNaY)wer%I~5`H3c9-hcG+>!+7b zU3>cQ{Jy=%&m7%<^!$};7mw`NcmCR~FW?yE-+y@VpmFb(jT^R|dW3p@{p8uVzp%f( zdG!Md`0@Fp2RDvw+jgY!{PDv$1ipqw_WH@W$#8b-K(*Gq)tC>uDRcscY1Mg%zJ;}| zy{n(h&%t#(RYm^%`qcg#-~amUt8c!3{tfi1=U+a)arXSZmp}gU_1)7j+#Wu89>%x5 zdykzyd*#l(N8e$_7ncs5xc%aXUw(e`<-Oa_h(UjPbZ+0?lUJ`dp1bh{Gz8@O8E(6A z$j;BUYs+-(gvxauKD@_!f|fkcuJ6b(dIp9TqI}FG50}?%zx(Tt_pUw%NiQBheD?a~ zvzI^o@XhNN4{u(-dhzP*8<%iW*njTYt@}@&y?FD>uirenbN%XPU;p~sZ@>Te@bJ3b zPdHh>J-d7F^E;O>fA;Fls~4}KD(*4NlrP_~-+%M@n(W9BfBygv;{i$*<5goAwaAkA zs7i$s(umC0x6YsW44~KdZ{B@)@8X&3Paj`Bx?{_ZW7nVk%>MH7^Q-5t+_`)A?v2Y= zu3fo!;nH2GSsH$ObZEtzb3zi3KRkQz^vw@n-MRVXH@r>>*>FRQglh|PWMb2S?c3tY zw@K)3A2DJ1tn8fAJTr-(orBgdgd2y?-@kiw-J0EJ?|uCn`{R>yyZ4;Aa`o=hrw2>X*uCr(_rdF$FuWxw>7?_u0=BtZGkDa~x;Kes@V2uCe z_0uO`y?Xih+WA|As4VE46?~CG4Cik`Xq;bHmFj0ViVXV#jRM$b-?LK|q?~&E;Io^T zP8~gY=Fp}!+m2tjc$1j)hx2<596P*o!>Z*gHXOS9>J@aAw}oGqH7OI59P_JvlhdUx z1}&J4%k#F`faZU8bjOa}2TwLO9@(^b#gWgx{Pvfx@7=h2`|Pf@%jQn3SkAZj7Bs^V z=(85|;#WxD?;a0dtcSJ4L2w<@S`gX{_1l$I6UR;7bn4{cn=onLIksux>_r<-Di4Ou zGI7V~0baRuZcTY&YN}{t%iVKyLV`NIz@r7B%BYvS8w#pcY~Hy4^3B_K&TU__;lvwY z;Fk=_ynN~#hM_kP5XFwjwToDP7tZ-#nq)Wf6ZxnsCo?*(mN3pa@^bii9uFwSch)!5 zE#7%#|MsIV*&nXmM$hKEufCP{Ov<}hA zwJSfp@QVF?h9I1IpT+xRz`=nfBnEC~en3;?X)5ZkkRqVjeNr4Adj072}*feN_09%GGuXgTdZw~hj$x+vtLvC{CONkJ}4?{ zz_P{2v7DYkPieMIr8G|m-a5A#PEAF^Gn%O>4@^89swFzKP)Z%fb3szjtP4UEF^$9b zpzOl4T!7vJ3^K-k3Bg5?K~M^jgsezqZXikqe~;8G2+eVM%aI*VU{(CHMpec|FQSp@l71fYBT}I|BRCp%Iz!;e94e&{~7*T4`Ax!DbARG@+VtwU6zd~1$vV(Rr4gw({KKh zPOjYoaTqA~)}gGYyY?P82SRjt-!_)^y#tn<-ajI=9x58hBx((q3L79V8Ne)Ci)9UL zC7*o8T9Jl`SW>2L`G1wlPzIaJFq7F^*9uG9-j7Bs^6sXP7)K4~(K))ifeuZ41(H@zWz;BeES@{Xr-wUXC&f zxgum(6aXHgP<+Pl$r8Q}9&mH>EY=AvI|!|`k)wPoNElJ1G)cTR!@rLoJ-ceEu2fLH<^>AxKHjc@0tZrr!heF~@=66w z!yD^?+t~5EyoSsg;5yxqP4r>j*j^LpzG(j5r`?RnNy^8B$XyE6-B&CA(pHEOhIaWS z_y{6RC$UC0jHPv1Nwrg`V@y7wggJDTwGKAw8|f9fW(8Yj9+ z`O;v*#+S77PcvpJxTg?=Y#xvlI}owfu=c)#NOK2Ft;siM0onS`?1K5RH&Ke)pqIi_ zYI>tO&U@oDl1n(*XtIV0ZHNqc#z495ZI|?oiy;cPWR=TG|ss;jmbvN z-Qk0~KBG3TgdN%eK@anGkC;3w%G{DTi{S0P#013_}(u zv92rGhKLN_UZhZR zgE=cH6W>aNEiJ2 zQ-R#PJ)z)j=es1;CvdzJ5p2@h;%)qEkE#Q=-G{%C$_gS*_#%4R@)2aFWN1X-fB zvO~X~WGDYYo0jjs_13$sKBBW6aC|&yh$Xo~-ma%YvsWR4P1=Jc;o+}K!s)Dl?WD7Y zc7g1hthrONeYk*}wkzkwe(u=wa|0XWZ^18ZK5X$}r(POoAxjG*bFLlV;BWJn8YLPr zx~wRXAM#9Jq-{`QchYjLApEUQ+cNKe+x-jLjbI7VnYw`OUD8b8 z{~JVOleVV@ZrnXO$s49~4xFZKm~4jlz-bLTX4Vx2;OxMNNn^w)yf>sh{64P-nb>(6 zFsab3f zTKPV_`466H3^F$+!}LAPHvMaJb8Q#C+m)LKwY74%mQmWb4{N`+@p0*()dU`*tRqDO z^{5%OQO0~#Kh7+neH(PjTiq}wbXFFiEHb-mZIFPYb{+5ZGxo7gE@4cyCC?p!mjC_&UoYg)kiww8&>HuQA4Z%oPP zX;n&dtugfyvltTDyw$E-hql;#TC{qx6KUxszarGJOREyL8hP_4#M$=hZlIahNqynD zKduxLtoa~EX--5@!S+z8Wx~{i`i&XK4g()!4WeXEi!7eT44nr5s zDVAFG(sUS2WvAJ{-l{jm@)tcmV7{?-Gu0WQOvy{VrD^{n)>_+1D{anxJbvg!PD4qw1_ahG1K;OHhA{eF-EJP z80!z#yxo&2y8p(kqHfSK;zbl6l>*!yOY=9EHa4XmAG>Qk#z-Zm*uG=^=1gM7vC|lG zek~Y>%5fN!3KT$yMMvJ0UD}!{K40ujlxzPn0oIU}mRk1)jB9My;sAyzBQZQzqdNru zU)gmOUBQywxQ@lVhyLV||2o&v&Us_J3bc~Q`ki1+J$-^S{J%CxkPp9SI{ApG7f67t z_`wTk8@1OW&O~jl{}R8(;vhq%+FE+NMM#ds7+20Dl4*1Omv}ib2vs)Hf+YG_XqC3x z8gcz+MF@Slbx~UdZuHPF`p21Efn{&KV7+#ue-V#Vr*9wH0UjU#IM3%#9xgF*!4#}P zg?McAFDm{YqTtys)?m|6`GKFPMXi6HE{}R*KMeZ7>N@E4Z{$Kg) zC@B6Mm7`sejjhrx|Ce6U$fi1r`AzIwx;6-m7(1>~P|JBFa$nh^iN2bOB#1Ru9VdnDKP=dhYYs^)^(w{|#y+P=9o41816)XcP z60j1V<+WmUkLn5Yf0pnjVNR6@6o&1G6rrXXP7DnU`~GL9=m-|90AxC;p%g?Dgfc;x z>1l96%vnz<|6QuFi{YPEAZeVSkuWe!{9Doae{i@^_a7pWjBCEmhDW6m@qCv zT>FOnrS#Cg=xx*zBn1dY;jPmY=4)j>p5~|cGRweDU>Gb!Zg0vMEj``980Hgd(LFcLLN4Pxsm1*tqvZJt)ltHb zKMB?~2umV*cfq#**cdJGMH<}pBEfmP%6J5=dgJ~AjlyGd>JDlK(B;Iis|<^$y@0>N z{PQ0gK#PA5*e_8Q*kwm#f+ep{KrnM*Sr^Qi z@CcybTUsat_u;Xo=kQ*Kr>!$V+_zj?wgwQQxhDgEkBmSGg6y*_3*T?SBCTUKv1~A@ zoi`pR3k1gC{sP)$#1bU6$by7_dm%d!zS00?7G5MmG9lq*pJ&<2I(ES;QbI}PI($9K z<6J+-zcdywi{YU*UT&{6GrFZ_d`g-HaO4QP(a(U?JNQKAnz5t{G$^Qv2f; zKz}bIc<-x#v-D$=oaHSz-T`eijhc*oj8YszhHwqXvU33wETRalLmaUDOOvaA z;264+8Yp}Y$bf+(;j0Vq4P@@`^<7udUPoL1pHjn!?d&3uz^UakDicdn0qyCfN(f9E zF+Sgb(UGRJ0i0Y#gZ4qvina&;wq^f5#pBC%6| z^qI}+JAF}+rZ;J%HXjH$`Chw^m{RkIO^{qo%*Z4!wzTBfdx41V)?A@US=RVjvo_+r zyv7S31}h8)AcM%#x~U}?(JdA{0WT16l3_x@6W1eyQOq|y8_5?kh46)>=n025WAT`#?$60Ve9NZf z7c~-i3_BSCkb^mjqbU&_L|+reoTE%MMHc7`JS^c6mR*GFQ`12sK)}dK!Z`|wAhbKf ztonl!?wm&vfuA7Vb&6_x)$f#`8OTkuO-CFp;Ec`RK!Q@YVdWVVJ_I{HPrI-k>5T!G zsr~Ft?(+wPy@^R6e;F`4%X5_y2OJo-0z=g(WsCH<1Y4;*Xj`=|K(R1N1D@=~F{(Z{(^!@(KZ z*jA+#lX3%1Ae{fy7G1u3*p^7J!B^fl0*!lUD}LGH*w$+5>3v5omQ8t^VX6UpM%ZY} zrlw@=*A_2W_7IkD?TW@;0Y+j4ZBS2(3`4GN9RqO+*Py8uA^!#{)Vs0HVxV6HB9;A& z9W@Pb%KJikm{br>6v@@=U`THgG*$wQ7ihWR5Up>`viK@g9q`votkjl941$5^)Fy+D zDpFvW%c~e>*$Wj~YBxQNG-e-a2)qg0rxhDFZL(3=KzErVt}28t&+5qT5yah3XXYSO z++6TJG!!^c3KrVK0u6jpENE4;VaU*QK#_tN(i|x|0M)-T-vWt~PTti_IH(P@IaZQW z3c^ULF?qf9krzqCbk;Qs^^nefzZGtgfQ8h$7Ff*3&1yh@ZL5Q$jWeqlZ!S|C#& zY4K6R8HTJOl&uqK+vLP5s5Cobu$Cs4-HBa?TF23eRF?f7@~^bFhFrB)#_8X|lki00 zwP5Ob_Bk(vJ!(;HBos6V6-@796q0wbY(j#j9x>|M(*$L_$4I$bY5H15L8GpHW|ndq zhzO@qT#07*cJO$t$nl_d5i;Qftc}166b;_{I7E)LSWj4AE4jr9^rNkFNs)oCOCi8V zlK4KuvYtJuIuh@ZYL?vteU9)!iWmqPw`Ve>T>U9p*>uaSH~~~Wx8|W3yixFCQ^thK zgi_8F%1a1{o^}Eh5t5N94zQJ*kpamZV6mwXuG^TFh>!G+Z0ujZqF|L4|dIH^F^kWdDYk|)ej+n~INQ%tHz%~~c0a)z14wZaSy z;NBjNjRX)_nEe8+2?%XYZVi8l3}b`3@M{561xb{TIJ7lALkG%u7`#AIFgLLpdH3}i zV{w9ALEc$Un-H=f=AR>^HDTxEa651jDx+eqGs9NWN)=AI*4*vptAPo&Zd~abZa<5J zk(m5n?9w(1J1P&b%|>9^J{$$3#d~fsY^I9 zK=}9)VfYQwsU9B8K2jppFn6?Q*%|j5P>dn;ErBGvNBYD6#a~1WNys*&4^aq5bsD?4 z1@C8If~Pe_gcoa-T+^Nmp8oGB z(Jc@@xm12wO4CYaK^(Q@3CWC~$837N-WO^j2}X6;|*x_Bz&)k|)wN05sZ+ zrSE;la#Mjy!&baprkQ~8WKz2s9u$8`3?cULOqi3#43j4(L<}bg0twmCp_rxu5rB>8 zlmx}p-keY>3JZWWO4;tifzll-xoU>lrs3;JDrdLQ(?qW|9K}5kX z8oQ9V1_vcpd`D`50y%z?kzp*6LJ6FhTW7J{bf9vWjVKT#jKGw`LD(k*70PkUG$3*` zhK4 zG}q{8BIfYDMkF-F0z`d82sh9ehJ1GvzC#beG{f{j7E+JVAtHqECuUgc!!1XMzbB3JhhGs{qv%>ijshXri~ zLXqRbYzs0p+S8@#Hlreh#+<}|nl;DZlm}?Up``g^o=(jaN)-7(%(7101`2kWNW@m; zi8fy#2OLCJlam`%OQV$Gx)#gMVtd%TDdPoZAeuM^vSeAX4Ffs24 zHxN}$g=dN`oN6r_Yk1OVYFG0Z#WVKx;t9?<_~Z+OY7%;xzmlI)V2Io{gV%|Ce=#An zI1EZD)ClgU4im9rNPC>ZU_%@F+U}x4mk_%yGarLEYAU)ZeFUb=;(_OXMQs#l^&V2d z6KHlDU&9(Bcb9ojMt88@3`8jSU7n7#*8$v1z+pUN0J|PE5@aBSB;BC!nMN;=f1 z0l@Ln0zP{H@a^(wRQ7~hENcLV@D?8+>9xQNr9NPodN_gCvWw{Q28qj{_8_lW#}Z%G z500TIFz`)TY<;dDB#Yx0w<1QDzLrH?qY=khb|J%DV%dqDT_~Gn{GSG={%|TGxUld5 zmMPdXzI1TRiYQSK(|25G}S;ylA2`~XK7)$u1gJpVSv8j+F_hFc?@tGVtZJLa}-H`xsIp*$r&eRM^Y1 zM_@w%V08dL!>83@kjS#x+8?+20S*CV&Gb6V7Zn=_1%(_E}y>a$%&1 zf|?2#U>%^R+3{Pmq4n5XWa0p_{Y;qH;S;f@L^vc2E{Mzw?w9p7Zg-hGx zB*}ov5!lv=JjPC+_>9M!Y!ozzowV@S&D(oxr9-;353Zj7!#Nq_MC3_FFOA^^!}ufn za!vtH#;_VdV;0MN3&l1k30af8j$O3%y>;VrqY`G_Vo%TX`o?M#FY-E%z_O0c!=I7B zvcywq{(viD&qT)o$6b!ZWu#RT6Rv5){>3$AV^=L}2h9gqx#dow^tq5RJyXa2MT=B6 z*MuDhNQ-85e394$1z>v0(=gxX-b9lKJ1^nE`sy(Apj4#wZqD?kcZk1!JW@tRz6E#Z zwnN#17eIN#u-K8+aw*+?kd{qJ1hMQ=TjmoRJUjgozN{Wh>D+x`;;W$ih{y#5a$3EV zVGdCYiKGUQBY-g_{qV*Eyd6MKu9{*L@_yw=+q0~gX@Lj5t-fx-^eLf4rYlRPc;+aS ziDaY$LaSM3Ff9`zB%h#3SPNc9o@Qj|kcO|7pmes>&c~`P!*ux8(DH+1B#fxVIETPF zC=vwe8sMHIqY3Z}YfNq+8_d;|hdiF44XK=_rm%5-a`)#ei|XdAsLiO`u*j3hJ;I6c zAyX|DG$wJ1A%lm7B!x-{x}i9L=W;^+-c7Ydi{I=eDv9vbgsBV8NQkv0`5jUvB-C0W z0a1d`N>YREnhwIGEU3{zHu<;v;@b~80u4lyWeI;`%d^1nFasV;7*lw1EXD2(tEfV3q(&@Yv%bd+0T6!X{Mg|4P9) zI0Od3m=J%;N?_v0G@3*j6|@jCiekzzDf~9?HLPpbj*Wy|-n6@b9tn3(WuvXI+L2xu(CcnK=edy$fS1yg z{Og{*x^?L37#benY&x+2VEaI&)OBFXY9f!is>5^WGhGOQx(7AEe_v`BSMlnsDb{Fs zJNFxCZ0(z~^vs#HBa$PcBb^7UrjLNxzY*NRGPWZ0aUCFA?ApjJq>4+ls)oOozleoh zT_o{|0VW+k`LNYn@0(^W)YhPd^LD{wK$eY4f+amxtCpafDS>yTB`FdeO;6rxF+e=z zL#=}Z^Tyg}`8N>V@+-g@1{`V!F5%QOt0)6!w-Rz7K*ys9UCXet31Ad&7co`Nc^mEj*7qs!O|if?O^jO>U?AW_w%4(|&5 zfxtr+cCz&EVW#u7kP}|5+s2YRN!pXv|4PyW7Z23s2I(1=|1k z0X8G{vv_H={I8g#7ozuo-UE6M=slqKfZhXo59mFh_ki95dJpJ5p!a~@19}hWJ)rl1 z-UE6M=slqKfZhXo59mFh_ki95dJpJ5@WwpwPP2DF*IS&vw_11lmjBsRi?;B4!55)f zt5(f;>O1f74dLh9t?)@Xx0BW{OW@^tF76`VkcjZGsQARBw5&9BWNd^yIz7Lzq;|@b+Jfxd+~n{e zPkT{hPC-suQc_w@L0*EyVqpLN15KTLWZ|)i1d`ISv(pkHg96kg!%Ffq;^ksHC*R0| zQ4{O3qI_Jv{X9KGC?^OgsvbMCG%Ga;8z#hs5oHCrxrJFtiAm{&!)xnmOM>BtEp0z) z-?vwj_yNWzSA`db00b^ub zY`9Do;Nk2>C~l&l=!C?0l`L4|=IAU54GRtOvHuR<)Z53PSD%kswE3*_KnqJl12c&t zKLdn|J*4oz9FC>(LB0Si`&Kkki#@f=bcjxZr zgAFaMZAE^vfIwA@++m=pw^9MTA^vW*$Y$f{;pO3IWnp31r;m}PwY8O%siBz_5#Vlb z*vG)y-rmZvcSraEpSiQh(y&)Q6D!bYZEfcr9vvq2bVPTEJY38TdUoq-VB;x|j}P$x ztE?@J`gHDMVB=1%9TG1$q$0gt(BRf zp@oZIh)ir|U;qsL`f;RANgbGdr6BUAhba-#Ph3#mK!JZLG;0)nA?LZ#^Segtr1}{;-4hMSRUb~2_j7=@f`t~$+@enzq1V_(+kkAl6S6h2~ zWVUnl^7e3Z_s9Ma;wKXM%fl4D_STMK5h!#Jxq`oZGjmg=&VFG^iGxv}zQ&ejLxwmZ zzd|NW5p1CRmws{sLVb8Sk){hm(uQ(R7fZqrY4# zvUi2v=IKc4#U5a$ty_R1A}Z9=k#sq1YHRNq5-Roe_xBX}$-|VsCh)s{FNxUE#@z)k z!`jB)-p16#Rb*>pXKOmp*ugJ4H6~2c>R_ES> zhxF-b;(~qF-O0wr$xo?(KO8c)cd>Wz^fK$&*UZJ&*TuMZ*YC^?KL50H|G|Td90Q{h z;#E?2JCgzZjND;ti@d$;Of6u9@b?VIAnCV#jI5w@yLmttauE3ihpEEB;oxX;xzG{8 z{y~Fj$ArG`0wdCjbG8k1UI#mOcRT2ewr2f%4RDnP!(U!_MkU0=r>95;^)|F~b+t1! zv2(NQ`T18cX_yQloz>ml-Pz8@bg+rNBqAX-Jvq|fzJHg_eT=M84yP9830o^80|O&- z?6)SijxJDVWn%DM$6h!}36P=JxsQpR2xkf$zt$odOkD|Tl}a6_@^gSOYh^sx%u}U~ z@M0KATv}pAUTUbj6ZRS}X~OW@bU!CAMS!CP?K|KaufVWihatvZkqOD!rKK5RVmq*d z^cU|yRXony@sUAZZjScEPFq`(e%*2G_vmdfz=RA}=pc@E<}mIoEv;Px6@lJ3gS)zk zWl_m+p)y%iQk>ie`>cnrGC4Cf0{$}1$tTD!GF~o`lXW5~qhjosp^2*G%rJLULdU;< zm>Q-LH<1VQC>RQII#1&Ks)$f0ko80zp}@%#`?$ZGofV8z3v&}Aa|dT9YAZU|&QlpB zb##;AikF%W9oJptD)#sH_Vkh<4IwElGd(U+0)L)p?HeRhMTJEq=i&O77#}0Yg(WD^ z3wthHWdFWKc5Y&y5Vbm3WId!u$1Xkl8Qa+5*awGs;KD^07%xdsNT8>)n_qB{zf2V! zp@3Pz+T6w(<}J7lK0Y2EzQIxPu}WX5LK&TsS6*2J&TxV8?I2Nx`bk2f;&JUQ$Vpa( zDm@Ju#xx`>RuK@EQB;_v4)PYeSoG^=VB_lU?5s#j4fm7y(RIu_Fg(oH)^Mf|Vvg(@QAVD68|tsR~i z6r@T`4EC0)(~=`Y!V@CJR_4y~nDAhDcQr1Z@i@xd#O8e%X0WeRDf5#j<>e*8Ery|I zY9m&}g!y^Nld@Byqp5pgT@Cd1a4;I;Dwlb<2LyVF146>W0=?bsttG%hR&qx#nKIDB z7y27?HZTgiUR=1;OYG?rnv$2BT~IxyIxRFbK0huBlw#m>wwV6)Xe0>^o+QOsQCD(@R)?Oocye~i0IhF`0${R_}oH(yd*_P zOma-1*u_!gr-+VAOijti1A1{qeN8b|%e_6I*LnwpMJqz(u;>S>KukeFRxIfAk*nj= z^RwdR0io*T)b#jJPai*LM;}#;LYbJE8WZFjl$f895EmC22!qdG8L5^#GmJ+}YC-LY zqNEsQU`TXAN^Wsp3U-F{5)vJ+Rz-wG#;0YM*H50e zWZuYg)tE zK*_a00v*6tD)W~}Wq!_PW}e~k$#Duf>1_U!caS6_D?;NT6T5SCs`6AL>tTj*#IW#Fs`^N+2`pdl6xj`l8o z%A}&QOqJBjH#9s{D)EJoFgP?eF)GBz(?8I|(%epan0ksut{yNRJw(3HrTo3z{1i%= zueZd<&%@5vMH1xe>FE=eqK38G)d}ZGx!BRst{;3g!qC;j&A3O;!KN^a+WV;@!(c`= z8eq`RLS!#i#DODe+y^n*H=nobZRO%B4o%1?EXj#iDHLG=-X5;b9%6T|kO-N`qCc7M zdknI5H0#?JqdovzXRylM&DF}(4#usE(O`RbSgRa8-Njb@208e{jOP}VloIM@Zfx%C zCy$7Zka{><^nq7qxrIeX_}JQt{a}Perl!Y5DgEIO1O~zF^(1p14IEH~rKy!qR7!ek zakx4cs$SROZXgdwgeNX*kHz$==e=*}>Kd))!lGXjFvU-_6lG zR0b9YKzKt8v-6P3Ljrt*U@?#eMy8~w6#;HwNm!@{1T^8`1MF2UgZpCeZ8fN$!9X*S zTpg#dVVLi%>|F!GRjQ=?+}HpYV?#S1Wok)jc5+-qSa`6vqm7e?*v-zUpOIH&Qf6*u zYI|G>aNi8BPZyL zByL_(sh?k9AoNi$e_4=^vy}x|cx@a-Vpy4cy?g^?esHTiV2?w00T86vI=H%t+{KmhD39AGloz|J4Whn>C=Jni?l_G#Y4ZZf5K50&#S=z6RZT8(Eth z^fnZQr{^RqWzaXAAi}g7(8D}1AxY`u?(X64A&~_`bZ%{JZ|C6dFAMPU4)DWmn}alw ztOvQV5*Igbc~Vw_R17Y6r<04Lm(<6}$_fHOC*wZ7``g`Vt6TZ@8}3OUGhwKf_!#6qM{VH|DdCbq{2OO=#YKCC1=F)~~s^_7MvrbW8L*mdv> z4E7hpJ)&zDEHzHf5L_q6M8zlLhB+xSlkA&u{S>=e8xAtIcJ&UGOI&f|G|1W$R(VSo zu@~&|Z0w!9z3nUp4KT6A72np$D+sp4FuRGwe&O-z5Lg~%NyA4~q$K9%#CbscC5Ol< zRH@DxUO%cbD+>|-N?d;?*2k^76>gQ62tGqQ5YI+L3y zb-+2q)-5PHF5J_6h>dr6d`gru*vq{8m+c2RLICL=9HoNBs`&Vr@Bkl2GZPatXTMFn$c6Pp4^ z-V#|vVpcZBW~xA#q#)qJhyFy~BR)&RAxrs?BDRGgJ z;Za#tqbrk=%cregH9L!8h)xNQC`Gv^-cIst8ly!3fv^qtwn7 zha7mH23SZY7m}Emnv)$b^O1za7nbF~u}P>@9+y>GSzb|5l%10qD-+uc90bddmxrrw zL}ZXe7MYYkydpm_*oR!7Pe=>~)&;ebXO1n($SfN_t~P;TLW(O26GA24ve+C9OavfN zH3cLhSgI2g7{Uf?-$ri8s6&GUhXvJeiIR1tDNA6ZgKb~voW7>=ZujT}36bai=U z&G5q1NEzskj|x@9B*sU@WfzuIjUHK^5gVhD`3K3v;xe#Ot5E;Rehx=@}Lhqoc6gcf7<1j8< z9-Rr{Ut(fzadt{dsyZ0DC^!*%PI1+!apOnUPoFn)Y*p2$(bdHT`C#g(aZ{#^uPn-r zR|fe9t5Y+IMoykGzA8JVbmodhQ)^2g(5xGlp@R87AhWolYIya;xwFTV7olDGnW-5V zyT)RF@`58Jypga{x_Ed?VJJwPY@NMiQ85tJK>vzW1&62PLD$W}@tK#b42DjLy@6a` z`S9U&W9rK?;2w>Fi!QDnTVFG3;+XQ>?2K4Nki;)EF1Kp@)ESd&@{(X$jY}^mtE{c9 z%8mB3j8lMy|_rV2~_E)mkbH#BeQ6wgXifkQ45b6bxZs#2k5*Y`U zB*%q@sI&5O($m2t=mN=TT7FhmPKHVX`*|N(L~H`=&Ei6E#QQ2z%j)WfXGJT+ViUrG zB9b$6Qe}3A4)V0>=?kXU=EeEJyh`q4d@5ccg@W9ywAiqa03S&ZPVsQ|1dF9aisy7N z#u?n)dJ{M z1;i{BELVx@h)@W19i2R-fw+%|Pk>(LUeSONtAF9i_dqBqTK0%f`|f7Is|CZJj;ctql!KoV@*ma3!^KghfqWzGzWh ztdG4ouArzO)f??9$Wn)dCS~QqCJ5|Fj#h>QDPWMngqM?+0CSlvE+t$dOGsA-i3Wbr z#UmLuHn50$1gf*(mgT0!;!Yc;Wjv~}hAnj*gatXNu`v+65LAmsjNH?O;u7PPo{sMRp>cV`hZpCS)eI{tPRqz&u&k~m zB{eTorHIYSpmvqiPMtZXqBtK$VMK_(hsZxBD<{_10jC8?WL`;Tbg;xXA}KQsX7v30 zWSF1CUf$60+}%8-!Qn~doSKdhxR;?SH~>?|q$I%B43-OZpfkkSa=4F{{d)Grvm9@- zc)*dhJSrhBIyNaIH#ap|6_YT$!dVoUR8o{26`zsL+cjm{*xHKHiprwI2$d=VI#;Z} zNwL`rU!S^*Y|UCamD1jXWl>Md2OlrRC}dKDijlX!{X zY}(n)!yC?OqTxz2VO*wV7MJ7~SC+(=uGq7-zoeIFi(cD555|e;aZ9z^-6dYWLJs<@4k;lX)WPn2|h7T3>Km4sxVJU>GT8)5VToX_}5gOy6Dhnp9e7o0@mXc*IZDG`1_QL$mZ zus-2?w~3SA|KOAkdNdTKDZ~VC}1niir&I zb`^OC;FOErNQjnsc*(=z{KiWX9F>#;XJAryXV1{6FqpS6rUigt+QmCm9V_?oaF^zc zoietjX2#w>s69e@Z*Dro(V5fc!QCA*Ysre4Wl;f=Kv{55NO*WCoYtvBeBAA=T>K<% zPH>Vh4G(g&a}@iE?Tz}tA%Rb@Iz=t>^_O~+`2^j8bA?DEgZZ8d+i-P)tu`5p)Gjy` z@Cpb~M&o%_>X=1K7tNnO<@+;tAO4JQ5&Ppfdu_S;(2|02i)W6XGchAMCpYHcp^x7$Xt;If z%oOtSvwJ_X?Cl-%XD;7)Xz%JtmBqOQLknVp7ubo($lUq~) zf!x%w1?uo<2rpw2V}m^$tc>~(vhhnOE-Qs-2)cNn3{JZtWQj<}l~e$Xa+8wCrU0fF zh=RZ{`nb~C1`dA2A~$cDB0MZes!EOvSNhxAD~k)mJtN1AiVx_^bPV-;oB3dXG%3ZO zw~M{GcjKa&v*%4Iiwlo|XRn2I6RMIG0l1OO&P@vOus0vvr$;}Rs9a)20UUG2rQ=qu zsGJ6qykW(|$4nYCERP1WWUJ!k>271trE_mXD_6Xscq)?ukw$V-1e^xv467|mii$5C zog3g|+J2C~hp{wc%#x*}!vwpqza87Weeb#nLm`eX#{cCb$5g>{eD}dk6Eos-N~9VB@XHFPDZAxuPN$vEdYgR3qJH5W5cJ|7pbEb_R zR+N_>9hp{CRhg1JV!`4CGe>5pRZLs6a!QuW)i>BmRRQk{BF5+>>#H*hXK!CSDp~3t zlsA6b_|a3BZ&*Eh{KN%o)~}vfRa#V3K4#wXRV$aws4L2dODd`zH)$-tH^9AA5p=rI zvzIKLKe4h50;uuTLo1-TeD%7O)2fG!nK5T_O+jXALX;vVziuXYV8Ym%vZ|>ocAUL& zY3JJcL*Y3>Xj~z@$WF&3TDZAl_Vh_rY0*((%EbJvl-#PxOE<2YRW)?vtl2fGaj;>p zs3L9$rv6aoV(T<=F)#dNlUcT+z<-^}EUb*!=L=uyW zN=A$-DU5V78U)W_UiRt>GpEI^=TIN;sq%R$GTKvL*+LG3hz**e0$N_R14R zjvO}$1XLHNr56kvUS2h-x}}bN{iOQ($+K5%+PZb)(s5N|<}6vZ zY}vxu)2Gi~xD>EuFa#&$sX{^{;xclxGIC0)$IV>0bor_^tClZYwq*XorAudws2NpR zHe%NDwQHBm+tP4g{i5krl?%7;+_ZK5`qUf8K&6 z3#W`I&(AEKynMrEs965}_Px6|ESykNHKJzZq#0AD&zU`Y$AL|YCXF0Ee%*??tb$>s zv$pNqbBw(3>}kW?Bsl*{%Bd*{ck?TrKB00tyjkca@RNr2{QjAPyLTSBdhhOyD`yVu z*mLaIo-JFzu5CMau3tD~>cojlk6ycU`ozKQ8&)ryKd~aOuy)$qX=6r=oV0Y~iZO*H z6W46{e%ZVUwe@3XZam+xdc~Smi&t(OukumlhQlM}f{AO_t=)hA*4>lv4$kZ)>z55z zdWB}ExblcqsS4Jgg=NFe04}(yqv}n?m0+)SiZ$H_x3pfA#jILkD;7zxw-6XZEh2 zTVIjs?`r9lI&nb>!@T>!kak8P1taQfM~sj{)}4eR`jNzj^8GiT&HRZa;9M zVK>*kb{x3y0A9rN^oOm>*KXRh^YF>DCw45IJ#XQ>g}WZHcN%uDpI%)%W!cJEljrZa z^!V|GU0b(r*tB`=y6syw?mckuB*-Q6- zdvJE&wjKK$8V(;jv3F^OQW6@cmY`i7nt#x_|DeyAW*uF#CQh%ElNaZk*h`ZW659#?B(!PHhd1$vfuYHCt#9 z)AE~MgFOp(Ypl`1{`&3LU+!Hz)^Kp&-hBrf8V>G1c=+&%AMV||arMg88+RW*di3bw zy&v!0zkX!nij|wTE*=dJ3eqx$&E2wb;jDRU_idd&b^gYqSARIOZ}qZuI}aW>cC~ZrOk2*oB|jr$7Jt?5}6P{seYC{_V-#>o;!Rd2s*g zflV7$PfS;+tCnq8xoGK{jeGX*IdJOSg)>Kv9@@ES`(&n9(U%+mJ!n>V}{nPRib4JZwG(Oo&q@3BvGze^OUb_4X?D;O8 zy>N2Zy7fB_ojP$~->K`jubeu1U#paudw^h1(=QxN`l$Pq&WmJ9X>Y;Z2+N9^AQk=iw8l zj_&w=)8SL+Zo_W%uix%pZP>VC>8gWwenLP0a`Vp5VDA&SCJH}YKCpfN(c?!C@5L$b z(eKZHe{}QE2zV>}w;kW@$QT87LX(<((weudRlD|IwC~z&h^394gPXtH-_dO-FN)89 z+Q0eCljlD_eEicrQ1jcpySL69JbdZF@6Ud^xF4q5?RyU&IlO!0#$Edlp15$0PJNG# zZQFbCr$7IE_Q$iQ_wNyhKEH9|=#hrA7mpu4a~nDW3V#Y;yKzjkEKfH16&QA~TJoxRm z$B*t`JA3-%(Ua%T93zXup;PCs+$3*ee0<~L>62ISJzeax2dB305<;xsu3x)#>BNaE zkA8W0{}J4ZOAIse$6wedkMHhG4-W|lkb0VQ;5^o>L+4LE>eS_P`jcmvLc$ATBFcn! zYjO5HI(PWMWxyX@J$v!m<)a5q-@0*f=l3hV-*x)VQ_%gx$-^fuT)cSk%<*%-vOk?T zb_w1&N8ztqd%xd)oiAbj`<)+teDd4f3umFn9HjC65x)M^)C{@U?Aw<3`{z$Td;h(* z?YeXTl)$GBVdb;OrDyEtnMr=<5dU^=ES}lkaOL98MGMv+y!6wb?C&=Zt>19q#L0`d zZe6?coV~I4fYu^+^Te@haQA<{u6d^=N_h6<$#eF>3Qi{wlHn}h-+gbx(c|le$*lTx z{FuR?cenZ$?&Mqews(NOzD|ZidbH_27X`c${<^wv@2P8d@87w3_2A-3vo{_(eT#G& z*wXJl`QyXKPw;(q&mP^ng+kf{F^T7u| zZ%NoXeM2%Fvv1?uI$sGlu3kQSY~Rkk2evI+uzJstqi3m0&u-b)aCYD7#Y>hf+jjBc zLt#X|DChYsqKP5n0#I#^hnq$u%Ulh~bW6D2?oaKaO&I1?s{s{qXXp3duiv`2p<(B; z>2r2m{_#2c+YcA6T)Vhy*}NGehAo2G>s5d=&}FKGun2Z>=Za6(4}~Yz-FY*Z54*i5 z;E1$NdX;2t`kyD}4KE(CY~Qhy(6^pkX;?F3@|;yixYNZr7V&Ve;(fLWI*2`JHX8f<~b*x|8MgPi|cCVY(H~u=f0oW z-%f#CfJfN*b%1;6AfY|2p&zt{b&=zl!sRCzMzMIN+N_hXWD?W992k{5`ZjyPk7?$h zkT`~nhnMGbaLEL`&KLOn`Pseu_pMv7VAU?J?{gKmZ{NFq?Fk*GKw1XEhu~)i7{z3k zT?yZ(VwhAA1_W+pzAA{m<76h%?db3uwp=O~UKqZy+w8lK>DoeQAcA{cA*=QIr`ywW zhs|2C<=C}Gx9yJ|OV^#gf0gU0aCoOOOhN!AFxDfjMC!mjT#d zVK&BA_*DvOc%XS`zHT~%Y?EioEB5F{GAF1WFQr6i+6Z~osy+F&D z@*FfD9D=k_ycG5<7AbJF0KTlLnljwpONSg_{$X91M+zFrKd#;+I%>_qZqiE5HYcUs%AX-j6lW)qyU%qfB~V%QUgg(S`kPxNOS;neMp;$ zc{(;cyo!b2&JnwklW+pq@(q%;fn(=LSegy|P$U&voLJ_Izmnh-KB7YTkW8ydBTYj1 zxk!Lgzhb6PzrfD1kdib(sUV*Z)c8+w@y0L^i(~*7HZ+({?75BjCZdQ>7-kF-h0hFf z4Un|3Hzdx$_`^q#>>CIP5a2lsh zAPh?g*3B=`AoAgKa>hnjwurUQ~(!bUvS!>YA99%4JfCWcHHMo zKIZD#k=uAo&s2TgunaF|VG+V5rIG+x9&*GDG&Pi+Gc^QxgZPkm(#e4vXFgUjl3 zP}i6n_koVz)xr|PkcH~~aZC6zkhWikgw`_607dA{*am)a7N-OcdA)TReqjg!`2Yk2dFkAqesew6 z{7Tq5)gtdU#PeaYKYO9)J&lq{HX7Shm~dvqJ)w zz@!#hk2M#8m3+l74)VT8Vc8GR5Y)i|7qIN5c0sfCQ1P7$d{1=a2TaBiSlylw&enmD zO~C*<@cC3ifv=eV#xUXB^$|Z44fy{41MIDAFbAYEEVX0pWEVJ7{ z`$*)S2ncP%vU3nA0)pLjX2hBf^b{36kQ*xlF+;K!!#C^ZLwR~`;Qp+Bh^CRO{+_w-}uioE*nC7eKT7~R6 zoLz95q1#%ZYshE-!whrtVLQPmag(tlVI^d^TdQZT0XB_^t}o5Y8uzmg6#mq-)nJod z5dRU|GJ7a_tYOI}yw@;6DJW$amMD~SIb=Z8d`?D}%t+bw^v+I5*oswmkejwa>_$h% zscQ?^033nS*b+@!_A%vYo5#`WdA;laLfO?E#E=mO#rgpZe5gVPNVlP?bm&lkZ!K$i zU=-b|rUMxnv5EEKr|j2ET)?+T_=EWXGXOazg(K5EKbbc)H{6JSL8#+vCkn$))-C1 zps>aOA^}Lm#$JM{Yg}W7mz965?E~Zvi**&ej8m6(OsRrjGwRZy6rf9iPX#Z;+eCn6 zA;kHZ^N23rzgIeXcGc9z((t>~x**&>Gz`BqJ5Z>n6ruQEAgJOB2qwBw!SKdf;2bjm zz7$A4xJ>J?4RN>72tUq=TO17kbzygJrsasWm6;!8=Yz)?IIv-NV{>NaZmbk`IAhg-WSP)-; z@CClD0q6!c7t-XSFCayK!56Y241*A}F_bdgN)NU45(u(|=YvZGG>}{zAe4o#gwTX< zHW<19;4N6zfsmj9L>TN?Qm8Rx;0F&7jKD|XPxVZnxK|_KB^U%LR0N-PK_*(r=K1M| z(zz1W@f9@BxXhJd<%ZyZ1a;b4lc=~M7-6qs3OMFhWw1ED1dNcKqZ{1d+VX}#tU;?@ zCG-DEXJEN}5t`eQ`^B3@_Z*;#pWw;wUq~}pO9;&7^8aHt280fM(H=@M$)BQtaF)6R z4^GLEBiYx}&I$8FG|P@1gLZ$&LO6GuHEeW8dHa~J`p%l|1Ur}i4Zkw57ZHH#mqM9o zWt23zm`FidKNv!7-e+1*VYk0TGHF$YH+MW^goRen;Sn;~-C=Dh$~?e^CM-KW<|PsY z(k_mh%YI@pjMoN(^s02ijl{40`gS#R>eq&8e~DeX3dt~%G{CiU28uJ8d3ZrzO#~8G z(6GN6!#$n`aDl-c2?uY(B-c48Kb1zfgg7*6NUv_+wqRN_lm5TPjx;LD>-un528NME zKm=x399BUQ1_lsEkQpJOY@z{NP$mS$g{7#6;0A(VKoKJfMK(ujrLEc;s|}D!T!P{W zF43OW)Pv1wT9cS+lao`Mo_xLcz3-d(CYsaUA8)z$zW46C?|%2)_r8IT`yyv0cKnfP z#d?K?bUapxM7IWjOntQUKaxslw@Em|VIRnk-j;!FunQ6w4E8VHLwlm=fn%k!r%P?! z0^>AV_@=pMG$YZJ=_5iEfAMIVJ@OUTURiGAhZbc?$QUg`IL}L+0jK543*GKnv=d7E z)o^RJk1de~%nAx|e$3sf-M~zQ&I!H(_y@slymH(hMdEoBzBw!!9*bt<5eQ!1VBacR zqH4dqSzd1z>%=D$by3J9mk}a4`TgXmPLIk=;#lZQF8{xB{i}c%aw$LNGQ5{E15Y(0 z;K(7h_kbtI-BVpNa4?d7aFe&#q6g1L6DX?#8ez^8?(r7FM`CopM;e<2-|r8yf_oGZ zP2kMzKzXqRLgPPKPE)>>2!ln>)QlivH*>A=_6Hk?72}~SQ~JJrw4E`_B$$tJFAisV zd71}DB-ElzANLkWBo<2dD?ev-wT|P?Aa9lMgpy*c0>Ej60jol4R}A+b@WJQUTvSPJ z=jE|Zy2~RQ5{Q@L0(TOE8GvaXQX=zq?BJe#&>m^Nr!Jf%Gv}*+f616jLIwm=NTR6BT@{mb?*l3m87~+1 zyZvjSBF_KDD+yad2q^{=n;BU$a#ZWeyV6jD776N8-+g+m#?eO3PvSVw?GimNySM3denfutkN{E2l%j>I0jHIe`b2KAYd()*^DGn!w-k+@}p+46;zPazM<}>MsSu^Zl*)^igBC zFq3c%#K2awXnsmVPa)QvXZqv4CMeZU=|`&1b(|X+ymoOO9Mt?QzGDd@PIABhb0*~# z60@RE6#}f-y{Bw*$x?X0(f%Pvd9bOFh(K+5 zcPBMmnfr9KGPokt2P7n<9qsBl+&1yF2IP#1ZlDBHVnwj!564bzMkl~;GKZB!5th~=E3HQ>XUayGKqbPFfpWyKrR+)iVnjZQ5L43rp6(l|GUp^uL$v!?&o?FGzNf`#{BFYRXU>=mK{HN#HfMa&X5bC`^SWGX*C`=+PZG_`cff6^J2;qsIa$^2UFLT{Z+m? z(%vn~Kk4wul|SBZPH>U%`-jD|2ZL?nyT`1cXX18O`Px|J!ej{fOe%IrZ+p(YsLlY6 zl+LkR7uxDKX_-3$Oz_LvBC%<%WNv9wb16LxI{Hs{c64BV;?d=lQeMDRVTDL89f8mTap1(~IZt>JwJS5@~cvU0ri)dQ_+gwHnwi%yJ2g@SE0-{SIBda2f z_qFhEY84T?HX?2w{AGN)@y_|vUvV7{@d+lmkj8`Z)L$;{tJZ9sYsDhHKHre7(?eT{ zX`Sg0+rU8yero}39JFbz!pcXt^~hE_Bcjg`yG|{HX<%kbxob?#==H9_$&U}wR^}w6 zBoQlxl`drVvIr=-`kSnoa_OqijO-7=qH9!KM0|Fd(sd3_-Ih`zIfv?^bETMK+8+sg nO)VkH Date: Wed, 31 Jul 2024 14:01:36 +0000 Subject: [PATCH 19/31] fixed the output issues with 7x7 kernel and added QA support --- src/modules/cpu/kernel/sobel_filter.hpp | 12 ++++++++++- utilities/test_suite/HOST/runTests.py | 3 +-- .../sobel_filter_u8_Tensor_kernelSize7.bin | Bin 0 -> 410400 bytes utilities/test_suite/rpp_test_suite_common.h | 20 +++++++++++++----- 4 files changed, 27 insertions(+), 8 deletions(-) create mode 100644 utilities/test_suite/REFERENCE_OUTPUT/sobel_filter/sobel_filter_u8_Tensor_kernelSize7.bin diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index 7ea3e53cf..9b4e076ce 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -529,12 +529,14 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, pDstX[0] = _mm256_mul_ps(pDstX[0], pDstX[0]); pDstY[0] = _mm256_mul_ps(pDstY[0], pDstY[0]); pDst[0] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[0], pDstY[0])); + pDst[0] = _mm256_min_ps(_mm256_max_ps(pDst[0], pMin), pMax); pDstX[1] = _mm256_min_ps(_mm256_max_ps(pDstX[1], pMin), pMax); pDstY[1] = _mm256_min_ps(_mm256_max_ps(pDstY[1], pMin), pMax); pDstX[1] = _mm256_mul_ps(pDstX[1], pDstX[1]); pDstY[1] = _mm256_mul_ps(pDstY[1], pDstY[1]); pDst[1] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[1], pDstY[1])); + pDst[1] = _mm256_min_ps(_mm256_max_ps(pDst[1], pMin), pMax); rpp_sobel_store16(dstPtrTemp, pDst); increment_row_ptrs(srcPtrTemp, kernelSize, 14); @@ -600,6 +602,8 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, pTemp[2] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 3), avx_pxMaskRotate0To2), pFilter[filterIndex + 2]); pDst[1] = _mm256_add_ps(pDst[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], pTemp[1]), pTemp[2])); } + pDst[0] = _mm256_min_ps(_mm256_max_ps(pDst[0], pMin), pMax); + pDst[1] = _mm256_min_ps(_mm256_max_ps(pDst[1], pMin), pMax); rpp_sobel_store16(dstPtrTemp, pDst); increment_row_ptrs(srcPtrTemp, kernelSize, 14); dstPtrTemp += 14; @@ -711,12 +715,14 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, pDstX[0] = _mm256_mul_ps(pDstX[0], pDstX[0]); pDstY[0] = _mm256_mul_ps(pDstY[0], pDstY[0]); pDst[0] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[0], pDstY[0])); + pDst[0] = _mm256_min_ps(_mm256_max_ps(pDst[0], pMin), pMax); pDstX[1] = _mm256_min_ps(_mm256_max_ps(pDstX[1], pMin), pMax); pDstY[1] = _mm256_min_ps(_mm256_max_ps(pDstY[1], pMin), pMax); pDstX[1] = _mm256_mul_ps(pDstX[1], pDstX[1]); pDstY[1] = _mm256_mul_ps(pDstY[1], pDstY[1]); pDst[1] = _mm256_sqrt_ps(_mm256_add_ps(pDstX[1], pDstY[1])); + pDst[1] = _mm256_min_ps(_mm256_max_ps(pDst[1], pMin), pMax); rpp_sobel_store16(dstPtrTemp, pDst); increment_row_ptrs(srcPtrTemp, kernelSize, 12); @@ -786,6 +792,8 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, pTemp[4] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex + 1], avx_p0, 15), avx_pxMaskRotate0To4), pFilter[filterIndex + 4]); pDst[1] = _mm256_add_ps(pDst[1], _mm256_add_ps(_mm256_add_ps(pTemp[0], _mm256_add_ps(pTemp[1], pTemp[2])), _mm256_add_ps(pTemp[3], pTemp[4]))); } + pDst[0] = _mm256_min_ps(_mm256_max_ps(pDst[0], pMin), pMax); + pDst[1] = _mm256_min_ps(_mm256_max_ps(pDst[1], pMin), pMax); rpp_sobel_store16(dstPtrTemp, pDst); increment_row_ptrs(srcPtrTemp, kernelSize, 12); dstPtrTemp += 12; @@ -857,7 +865,7 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, pRowShift[1] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 3), avx_pxMaskRotate0To2); pRowShift[2] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 7), avx_pxMaskRotate0To3); pRowShift[3] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 15), avx_pxMaskRotate0To4); - pRowShift[4] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 31), avx_pxMaskRotate0To4); + pRowShift[4] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 31), avx_pxMaskRotate0To5); pRowShift[5] = _mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 63), avx_pxMaskRotate0To6); pTemp[0] = _mm256_mul_ps(pRow[rowIndex], pFilterX[filterIndex]); pTemp[1] = _mm256_mul_ps(pRowShift[0], pFilterX[filterIndex + 1]); @@ -882,6 +890,7 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, pDstX = _mm256_mul_ps(pDstX, pDstX); pDstY = _mm256_mul_ps(pDstY, pDstY); pDst = _mm256_sqrt_ps(_mm256_add_ps(pDstX, pDstY)); + pDst = _mm256_min_ps(_mm256_max_ps(pDst, pMin), pMax); rpp_sobel_store8(dstPtrTemp, &pDst); increment_row_ptrs(srcPtrTemp, kernelSize, 8); @@ -945,6 +954,7 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, pTemp[6] = _mm256_mul_ps(_mm256_permutevar8x32_ps(_mm256_blend_ps(pRow[rowIndex], pRow[rowIndex + 1], 63), avx_pxMaskRotate0To6), pFilter[filterIndex + 6]); pDst = _mm256_add_ps(pDst, _mm256_add_ps(_mm256_add_ps(pTemp[0], _mm256_add_ps(pTemp[1], pTemp[2])), _mm256_add_ps(_mm256_add_ps(pTemp[3], pTemp[4]), _mm256_add_ps(pTemp[5], pTemp[6])))); } + pDst = _mm256_min_ps(_mm256_max_ps(pDst, pMin), pMax); rpp_sobel_store8(dstPtrTemp, &pDst); increment_row_ptrs(srcPtrTemp, kernelSize, 8); dstPtrTemp += 8; diff --git a/utilities/test_suite/HOST/runTests.py b/utilities/test_suite/HOST/runTests.py index 35a990267..a0938fdc3 100644 --- a/utilities/test_suite/HOST/runTests.py +++ b/utilities/test_suite/HOST/runTests.py @@ -86,8 +86,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) elif case == "50": - kernelSizeAndGradientRange = 3 - for kernelSizeAndGradient in range(kernelSizeAndGradientRange): + for kernelSizeAndGradient in range(9): print("./Tensor_host " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSizeAndGradient) + " 0") result = subprocess.Popen([buildFolderPath + "/build/Tensor_host", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSizeAndGradient), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() diff --git a/utilities/test_suite/REFERENCE_OUTPUT/sobel_filter/sobel_filter_u8_Tensor_kernelSize7.bin b/utilities/test_suite/REFERENCE_OUTPUT/sobel_filter/sobel_filter_u8_Tensor_kernelSize7.bin new file mode 100644 index 0000000000000000000000000000000000000000..9405ce5de4b44d78daee2a1f02b4f3bc211ff9c4 GIT binary patch literal 410400 zcmeEv4cwk%-Tygk%T~4+*<{1KjW!v1YhH#%Mz*pPGKDf_M1`kN>t!AuH_` zu`o$JOkT2z$|{pcs8mSw=!NR||K5+^ah~UO-`9EVzOP&V&vAY@j^E4o_x*j(-+5jy z_r2@B@1iJ*pa@3^BJ&6^oRn(FU83L*>I;a=%ljZ`3g8%T8_cdf3zZQ2f7 zB20DFjjSEAKEy;_Fe>DoA(1}p(Fql+;L7xgs}r_9siE~?*R=NiI7SpR$nTjRo0ovuOsQ0|@4T4BT6xMZ7 z*ihPQZ*s|rx+=3&aDVRHUORYiK!r*3I$bnMnWyHvwd+izpC1=qVhUrhTY9vQ=t^94 z5`hCCEY)QKBrIbhPo;NfX93a+oM?$O3ZUH`J=%r3v*Vg`H0E4N3;TJGz44YBDcv|a zE+)b1XCWo}q_y?NsNVY&eEI4solk$eHZLBLw!`^QO2RkUC>7DqJ2l51B0$@jC@?hE z&HZDlM|YMOhvnkk)D`DsgFz)YBihyJswQm48cIfd3=W(wnFIt?nFdRM;@;;`TZ|%5+DO zHBQ>4I-N`~6JS3(m2TW$RIo061oX3%Ht10g!c-SOG}|cbX9(%>xJoJoJoJoJoJoJoJoJ zoJoJoJoJoJoJoJoJoJoJoJoJoJ zoJoJoJoJoJoJ*Cud-{CICzEG_Zpj)!ZIcgTPdB~cVziK0(g zN<`qan`2F5i@8GBk1M%_24yz|hG8KLjz2`r@Wv8q3siz6xs?WGM_sO%qUbaD-%i)s z8@iKeYT`NL0HlsB47j7j30O!%kk?qwC5ARqi!?D@18e0pp;@KlsALR+MnpTaO3*B# zytLaQ9A4f7nAEu0T%j!^LL<#~t7lr7pE$TNT{@f?r_0C~uF>vRsexv@bzoL%uO{!p z7kf{)TMJVJBFtgYEQ&k8k;;xXEQPFYPShSCygx^UMLX{EUm_)Cg4skQ$l-7wYm*=>7m@}N=FKZ-y5f%f(Rg2OA&-|1s@qr4Yj`` z(R`$n67=Z+ANLZI1_Tnqc?>`t=8}?fafz!Z*Z7TtBCt>+jj9}O&Zz_fK*n%oPc7TZ z^`=YusXiSDix+*^NO;H~AD7iQdeQ|s?uUjPKjs2jF^ERo*g~ikpwq#@ZIm7fxFxT0 z`rMn?eO3jLdh5;S@EB8Bw5dUT?6jFoG?5xz zicRi7aA~I?ip8#`r4)~db}T8TjTylNO{|aws|~y0yhsUZGb-l_1Z%kIF15J9!F;eP zVJVyn(~=0{`c!pOMT|#I(xFXAl0+@R238PE(yk;HAi@FB?Bwt*bV%GC*q{OR7Ujgx zZ8g%m>B2A_1mZ-G*c=?I|C?lRqu}voaYo}>VbRE;srGgv5a={!6GgsNY@=c^!ey{iT1Yq= z3)+)WK}E_)#T~(q=yH;zSUQsElq23i%?u=Oz$h{4YPwXPzClCS)=>bu(d)!g2HfcwPAp(D8^*xW z^-hdRj5SLdqg>$;G1Am3_2B-o38teie)C5$5|LyXNc|b%bT(B5c%~DkCviuTum)36 zoKWP4Gs!lT7+l0F;xG&cm2|r7f#vk|4Kwk&Na&Cr0n$Wl337kRVnU;;ODHMt1S~N*A)q0X_C$(}bXVFnib^Eeufv4YXvJXy51}PFa#>=xCA> zD$8_f74HT7TP@?9at$=!wKeWgjSE}kNS0^?5%ZFan{_v+|MJbUZ5ff(bdfkLW{rR# zhmd0Frqwhy4R6U@jB@fd#JK=AjAho{>EVu08UJiBLT4)+;mk++@mZ$ z*~rBx#!!^dVBxMwfs0(ag0w`M1{Ra@8Bzx^BgGUjwYBv@J$S@~or>zBG=VxevBzB%+KT zcg;DA8Sd;-G9cSlb6zZ?sw>jQ-lP=W_H{)XDsHrBpC~|jn-0z}sphC-eF*>|fpD1~ zS9Y>4N;?QGQ)`NO*^?gVB(v7u6z>CFBhn7+Q4=)ja4pf-B5YKxE2ofQzm_Y!fRKu~ALGiR6UsEL=wdJrZlORC1G3r8nNfS0Ko_~GF)Sev`J z?@!{+;-rtG2rk7OSPbcO-Lq22X=U__0$SJ*obFe$8&0z>HWhMvJiy#Famoh~9wylybk;P{!=j;2?xJZ)}N&)suBIvg* zq6!==WSWsSdj1^NlgA`?XBcJ8q%4Fq0K2j_-#7e9TFs;?OL7X4BxD_om53e`Ksnyf z?dTW-Rg-*WY{(YnR9BP}VFKV6Nq;^F@yx4d|DVY*15aH@IR>s6+6!DAQ{T)>iEUqG za>(Gg=~6u+%i{g7%LHa4PasbqPasbqPasbqPasbqPasbqPasbqPasbqPasbqPasbq zPasbqPasbqPasbqPasbqPasdAKPPbVRU;k!st}G6cl@5?hl)A+v}x-&&@lW-r=l2q z%>66ptS|8)W2YQ21ONSg^!(e7DMlPQZOVBkgLmz%TNa}~ecv?)O8xkyA-ph$i8nZ5 z1%Ezl#Czdip%-o>OtSqwjB^3_iQm6St5ItfL*-{?j|uY-GiBjI4zRANOYfgQ5zGT=D=w?+Jfj{wCOe|BKhfd7&;r<#jc~N)?Khyk4 zxHr$3F><5%;aR#41b+j3J{QkKk7*BqHU7q%5OG&{{2r)#Wl_BD)0E$rQ3tWgtuMNS z4T_FEa~bo<{Y4-7o>q*yExeT)tp%us|NIa5u+=-~9Ww#$!y$+LAv|*tT**U=;@!VF zZ}atMUhzdZPrz+LXB2{{`r6J_{a+@rxm+B6~0ZnFtj4P1$15g@Pf%WV)(5n%MUv08{gf#C~gib z58Hh6DR`P=w;uX|rUOEO(;lL0UZMXZ` zH^L;o9>UTCN6~prKmYW_|M&3QUkRO&Bp&-p;Gi29O`r7L5FXt7Lm^y7`7P-NiqOgX z?KE}wv8Y*h7I1}q!+^6;x%wPJPlnp#2arE+_Veul8^mU z_~dqn-S*-S4=kqM^5V*4nEaUUKN!NP#j9@!{{qZAm=jlc_8@2Osxyk>1vt}VE)C&B zG4HI&r<}X=GC5BR!&BGuOUKV++Evp%`v>aB~h zr<^hCBzWe2_%3{kJpPzT!x1q4gV6J~V$kuIL-Yc7Enj&K*N=uL`e9JE1hpm4amDWT z(a4#n%${<{Lj2WGc8oe^#j2&ZJoeZ^jC=`-m*FeMgy+Jg3|V9C5u>MHe982&2QFTI z^<9vjcRoDuhA(~O!W|H>`3dkE@QvZcBz*Aud+Zflj_ZL-&L2_C0_w$^#UNUr#v4VC z!&lrfS#8*1USFCsd(m&dg)!(GmeDKj#o;AswfWfx~LeJSDY3C+{^f?fCc_oZ`pZt`%r9`E3SyofP;!5 zV|U+s;)HizdKM%He{SW-igR%Db zgU-9_oBaGS9{Ryp2akOH;rqdh^okHh@{e18a{&$i5Xj#EHT%OJj-NDT%4>>Z=UQFG zw3YV)a3tXBAC7B67LK9U-g-D&S3i2v2(+dxTlF#6=G-=Shq*tzjh|(URUzCoe(3mF z^X3jMCV~U={0wRc;To!*wGf`fCKScbATn?E!i9JZ&T{uXfq0qrg|*@N21_q@Q8l+b zlC3cze1)wL?xH85X(7A_k>7;-cA36#p5RZ0_e_V6pYtSg!NUO`;qr5ND-Z#%?w=$g z==RIwPhD{*UhR$Qbq%}rc$nAcP#Bh$j<>+`+a5a>#pDqFK`9XU6Mn*i{i`>aEbss2!2ZvJwm{p`or+4Vmbs*KXA^%(~xqzy1sCaUU=eq z#qb%=i!K;ma=@$Qy!RBv4v>B;Cbomu1vqUuAOcSgA*`GJ$#`jJoW4(~0J<9|LtzuqTYbJ$0~_`g4&LC*=O>sYuh_nx;i-EUMvPsSU(1gS^B z9}74BgAarc7u({q`*skyGyDxs6rb>p+vfP$bU%!JDeBU;>pdQ>qbJ~EZ-CNs?H)6? zc28~ye0R79*6)XK$4>&>`xpM;>-@<4A>6_n!7XsgyaOP=@lN_03ZE#)154pqDuidY z!Kp4nAJ1hBzNo|du?1scW{!WnCs8~e5v3uwhe^^FO z{wG#-;bR5yMM1ytgw4{r>5U=m1F>Nrz3TBl!4uZ>{|j$o$fQNT{Tf*CP6XEJOOJ-j zG8CSWiURI?emcD1cVK}MnA2Y|@$(RF+yZ}X3D|=`J>B=a&?#Wr4tSvzU3eYEv@95t z<{VlS??077AAa`9E8rP*?7hF)SRj+n{W#q4gx`QO2%IG#x%kvGu%ySUAa^>4z^c)8_>wPB`Xg|MUj$Eu zc%5d=8(b7yzmejjX228uUEwNp))_Kr@Wds5KdUHq{6h%eq*uOuifaNqZ!hCEvtJ1R zJbUvU7e4^+_KV>Vd;uO&#gjFCuYmWJ$XOI{7bp4Gor-UGr+oE3%09M~be%R7J$Sw@ z4*Md%j9(ZIvUkIShKxJ;#0hj(O+yYdOC4I3$QGD!$@B&-#^zDBQRB`H!co&Vk{KfH@{}t$(^{*8( zHyFR)^oj5^vBuVWVYn#1iZ3;CMuSEjes6eb{+MFNJ44tRzo3Ly@e@~UT5NFBPtKin z;PH!>F1qc>rBFQ`^o7~Q(Bpn`El}K9)%C%Zw|?(q#9s{ha|qvMlh!4-;*<5rFNUMh z{rr4*@AAGG<0dSHCkIkWprZKPpZA8psB{x~YwYx`5H1)zX#BBH($m7zAv{{lTz=M; zBSwswaru2Ogy;Tq_Gxp56dTZ2x_Bebf>#qgD`~{!j1>@Oq5M@u_>K6-S2mvIp;(!ZyW(GZsJh96pu*mEKn` z3qRc*-l4)n5#B;(i`7lO6Vg9*Vg z`5b)GEZ%z97+8l4n+Q)DOUB~suN)r>56`$RY|7pbpxp>~dU|rjrNbcd6Z(<5Q$zR? z=%QzgV_&n*utCF4LRsG!R}|-mut8PVkX?7?dzVPxe#L&$inb|-;B61bR`{ea^$V9x zA65(=J7u3YPQs4k@Le7}UBXiVydp1q2rW4N3H@fj;xF)x8XVV}m%-P6^j77uU5mG) zmT>8TQ^pP%It$IChOB!uZ11n?+V~SR1Y(52Ecz0^1t$Nrg>QprCOY6*X~fYhR=`(S z@aFkHIX3gZm&`c@pX-ZHgz)VTzKx>=&g8*e9}M_Co-uFO@sp&hxIX;poFniHAb2No z^}^{RUNIBR{YPGm8@webU&l?KzH_<|!ZzyYbC2NnFBrSw&O;)e)@&55?&zodMZ5mBg};G z(?@xtP3u|$uJB#hHSapHh78n_yoN|fIaELg)=AaGyCZM;ER9} zGZtM;x`rS1bhrZVv9<3CSL3(ClM_t8h^}-?Rmbk#N<=lIgA3yubyFLdGsbcVy(_rkyF~J{Hc{bhib!?1O=C^P?7%a9ltF*k1DY z|J)y*Uh#{qrQw->1o%$D?vxI{Tgf}Fc%CI)2c9}>>*4&qgqU_?zGS^8tb()0`}FS^ zKk_zs!~@T5+iW{`K0Zn&Pum*fdfw!x-bA|3CDWR(z$Gk6nF-&I~*P;@F>4&^4@dcX=LW< z2aFyz*qz$${{rBFRJzyVTQB<>c<@X#h~>a7vkMZ@apzHwvVkXx&-`|L?Y6_#24}N% zE#eJ%;ygDz2iDq1ajB#ay#>CKnJ{zC+Qo=#?mT*<@zbWkAoRc+jO~r5&xbeBR5kcX ztjBM_!P}ki(#Oc#;rFx=+#b(Z8p4IRz3+9nwme46`_sKU6h|S)zPo-9r}*)u=N>%z zl{>?e#EAI|r*AR$;fLW%MLot}N8dO?choWPJa!wt*o?oOB&-_xYIrXmmcJXnZ4mHR zU%<;}2tWVy@o&{~*tZMetLWYcUxSqodIbCjM>nH)#GeO_^L`&zKy2sfn_>p8`tUuc zj(a`a!}uJf$4)2XQ{p}-ldGZK+!%p8N)FAGA zzhffawUa(`*Iid!^YG7>egCOd-?Znj=faP^0X29hf-d;h#i#z2-1ldmxGQqt^HSXb zZ^t-Huf1@up9S9&3D1llJns^P&%N)$t;XVW(;ga;eZM`&kTv^Zct`r!^7{bttH)rG z<8PO%wZ;kXejEPx^^@b%v=6Ioy5>B+V5fI4xpUd+UqH5PisLU(2)x6ch2OymyJ}prh#V0Oqh;S1foFzSOQYa_gzPZ~lggE+>mF zp}&q7oUVP-z54LH`mraTT(RgI{Ak*>xa39tN~jn)kLGn}xZegPRQH9I__ZWFqYhbT zR(Q${`E@w_1DAWVF@P(4c~gtvBiX?_s$*b z--$qS)2X{}jIT~>4;!`nVMl!ri~o8(zL!mphtN$Mm2S3jGWz=-kM>bQc;x|O5C3iW zjJ%SjQI6S z*kU44hw!P5H$EF)zu-aX4tRI{8RtfG@Pb9N4?OBDsjb=Ggt3TkU_l+cAt=_L&#Whn z+7@4Twn-Vr-U44;#A6dR$G;fP-vQs`_jo9}$&Sxi_?5=c$@l^Z_N2XGeW5h_!Smkf zNq11^czBVXxapUvvh}jtRK>Ty6C1$S7`Y!l7~x&(hu|eR`Fb14wn`XZp&q^r&f4K- zD(rF5g0^>J1N86-Co=Jix8Jhh?ORo!OKKVM{mhE<@%u5o9JSRw1o4d>-IzPuQ}(0h z0Y1K!9JCfc@@h$}s25rA&cE5geA^To!WYhTJihF!rwlGe@1rz>hU2%NTOG$N?R8c~ z@c&9cZ6=;MBLyU-tFOPEh=iNoOobzVOfl?zDhbJz*{Xrt$f7sw{;CpRX(Kk#GHf9k#n?-zTo$)Pr z?fxoIJbc*c^%r!#FW(3yZH)`QIrLvf51b#uzu^1eDF+>e8qb_MZriKEPvX=4Sa_@M zzOa4O`ThmzwSW3@Q5=nR->84lzWP`21PA}R-R2XRJxQ(yltpndtP8f;0mdEvvh1xp zUML^)Ao1%jpnulT6Zvt#^S2}Xkw5nVm939Y8oX`e?*H8zurrYJ#Dk_Cu>|%r4_S1X zub*)-z0Z;mT;los3)W zor@OCJ@-QW+Q=0{xA%g(Cx)~$yswC+Sr7bCO)Wq10}y$fy*J&X(H8sR7{B+0E2j^^ z_tPgv)EO`ShTdhz2wK4WE{SZ3ed+7Fm!_auI`F$FPXTmgypr@V;p$yC5tbgeH^b}e zxns8e>~eY-;^()D;t=HL0s7BXG z;R~2tBwx1*Fm8k2^M~i;TOvdEDMrnH+n%5K0lxdMKTN_sgWwDE`a|9T#IK9DSOvhf zON0=*wEQij1aelK@hLYhffuP8!`A@tirO!YP04-ytwHmh0WTtaziOK;mP3}Vtw!K> zp#t~zAg0vX%-->yK@wNsTj24hAF6r3TYG$~)8e@6#Q!Kiq5SaF(KeH6e5gk3YE)Gl1!&gH8=B|uEe-C^&XZ2qkSBLS>_1Np4&Gw@2sG_aMN6!Zf zZl?X563hV~uSyIrc5A`g)AHj%)4FRV-1t2l*dZRfSAToIj}l#%sY$n&IdCIp_8WBBak4YZ5D#SVKS>%Dq=LOMh(WbR4y!(q5`%` zB3)%eEuqAz5n}W2l1BQX?+p?PjdemJFr1GZj~1j*OI2oDXA6>MG?iK-OqOIwDuh zQ7)=ONUU-(9X8&W7?l1|28Lo>Ah>PmF&KcusuP#rblQDJAO>vAcc0a9lvY5&u2>@$ zLm?VRKjB)+2wI~qaeY6oQXUx;0+@gkT1QOiU|4sC$jV3yu?u+BKmjQR8(0V{t7Zv~ zjuEjO^a;Z`GC9Brg!>}&i{h8Dj02HG`|rV2kT&{CZw|l;5arPV^kXGj>`R!_xcY-= zq)OxLegi&?MiKnZmUQEcdX&`=I?|DLN|GMVC=Pd(Pcbdz0$G!Onqdkk=775Bu`N|T zO<2eZgj4L0t%bDvtTeIE$!Nhx!8m~MD=o*xi`&zsVwsQEQZphZQeLzXp5~+K(8(re z3>m~n098X!BI2S4|19OaqypcFas_jhtEr9`s#^*GnJ@WV#W5u&T@Q)pYB@pyHL)x?SR=HSauZnL z+eKW<)M#UlV!HD^fnftHN@+23yH*L|rwmVZo|FQ60-g+5sV$R&gM|W5)JQDmuoGdq zH8rNJFbYyUO=E@H&llf!9GXB<3X4K=Ap*OUnl1_%9nnJ0adHSh>P-ab3JY&s%hW=S ziC23H@P&pf%yWbqO^{-OSjI4z)oM@b;L653tjB^#$iw$Bz<~ok=2Ca|kRcXLf-etL zia1?JLf?=?L#xb$0NI*g4?7~g!nh7YhPO0C73(V0L1T zvjRde9CB^Mp=$h$6d4qZgkXK1G7Lrx*3*X!rGbKDJ%O-*Qn-;rg&2B5a<~zO45XC; zjf1PE?$lplq5yJ)gatmEM5a(#IUyxuZ4}CFIqPEB5{|S~v7BK6SfeP}sgjEMthh{z zNLCVJJY5)*r{{o_+Ck(fGsd};5$uT?2{D#{299XOQbbg^n2?hIR$+8RhR}F6QKv>Z z@|j4C78PL`8?bY}oT4aVIGtEpL0N{t3g1s;G-91FwXvgACe~ju2oXNSgdaB2E}G+v zfx+cE!9h8P2uDSlM`o^O%WN*Ctgu)_q+$UbRs)`5C1EF~K**zMT;Og(4gn}3mfsK& z0fa9tn3a$fT20I`3$rmi7A6+drgYlh*qJoEh64gFZ;{i9ai%sgT}I$4xf5i_&=Lf+ zI0y>Ma>W`e8(d~VWl>$+$3YDV27x62BX?Jfg3491dzLy|N@B`5v?&BMOF4CT$B;%S z{bO34Lh+MLD_ZP^rTjl>wHO-^i;a&}t&R_2>L#^1P5vk!!wB#jh0AW`8aEmWD7{6+ zaJr<`^-3UhTFI)Rj0^_9AT^9Z%A^>xY2YGht4Mo~HajNNij^Euztl|OUYE#vjg};P zSra6ULvrA2iAybHTQgclK7C6YEyPg76=H~7N>jG7#Ew$^Ed3y@vKXZnY67{HJk41N z=|Q|Ib~c9aTkWOxSW5*&5|__F(EWf=Jktn4ayg`5DMv0L&_aV2u8{~6qk|Yb7?Z|! zJ>g5U*Dn3@EEf=UxmJQTIYd539nearMJ*wRness;l@$C4iP8x`A?P{|%W^nLQVy^f zbZH9nLPVa9nHAyskcnUZzgfmTYHdLY2tALNENu|?Nr|}v%A&7u&*U1o5{LL7s^W#t z7EQ@-=WGfw>~Fk5M1g41fty2;NlCTh&IO8(ake%eVjnsW{9}*~Nu|C*xQ^o8XKs)X zZV)d4SZw7i7;7~08IR(v;nVoJ5jp)ZlgVsa&ysIdXvn{@-FQu@uL_Yc{X7=fNo{l( zRmz8(_LmQ~Js-jm2xgc(Q${lRFcl);-;(7J%130^u##{XJWNEY2$N=rb3rhFILmze z5l7(%6#aacgX@Ro06u>WSHl(SaAg`7HV9JF@+UuHvqThpmhT6`Bfqi2ou!#+Cn7j6 z3@u0D0}tOOfk}c!L?xwRAj&3?h{Eu>N&@z`9P}3W!89#Xi1$NSw;V?9R2RAsqyG%8 z20?kz5bj&!Co|%wCAT69I|KNJrmAqKDO8XBb-}Z$?>hjJsv?d9TYiKyv!5hUtzn3%L z#1xyHBj`l^$PjukulG~bpG^pxVI9R=F2-8{Lgt(g#ySL|rv>~uEA_0)XOqa{t7k9+ z)zSy^DUZL!AX1ktqkvyPlH|fO%i(8NCu?laEdlwv5b!fhQuBMx8G}OMXKk0sKUzX) zQaDBu={bel?`o;bDA7<_+9~5AjKsjt*fhMlRzD6Rd+=Bg|AhUNWWZl4qbCKR1#;a?vJL zSxy&XriVMm%zc#5OllQrV?ek}Dd#f8BnySnj(MN4GzBB(OfLDs(X05>A;S@a2n%c$ zLL-LBlH4zo4_WC?>RjQ9-=~FUSgJY=_k@74_HQ}MU?&R_VRCB zlI9Y4s9r$UL0^#H6@+Qd6@PYIZ-$t>@1cJKSRS6d`JzY_tE84w>?EOB17RGB9p})G zyfApC4!S7bsW0;&2M_!#ywwowL`h&&en%Gr7|23P{tyHX>n!ANsQc zYr!E3Uf3~!OL`QS5$fUVpN7u}_~Sr|QzY`d-+g$PquAM(Vf3$@q8|eR)1}$re^FtO zOL`NMFzIj{tiPD(GK@=HdVQw z_WLvbsMdmclBdEKwZ; zVAY;UzXg-l?D$?T?Zf?VyT(;j*q);OucO?dtgD>&uRhgW|3yl=>Ke5HsViIkm^E%e zWqC4Mfch)k(*rIM)?WfT;q}5gsqeT38~0comGubg<0Ns41bhvZL^g-HY*pJcl}TE+EWgk_(~B7d6lcr zBTCd$mm%Mez;he7ob?ChIexSD`W_R@);!I%?B>7Od0NnjF!svwfLyx>`S( zVM7Dq+%>9eRs}uoVAdb^T$g zAl;TM`1>3DyDIAW`YLL=>=Hr*$-fK#&#O^DS*+?S1+_d;mNudYVGmwVTe1a2zurw< zQDd#1B&cVR1pCO5KkgLe#8F`f7vGLLzE1yuNh#aMSJd^zvlo{XBq`@Cq--4|Cfc_g zF&4e%j>B1jd>k2{SuIG1;P3I!m=O?Dzf~pi0tv#EQ*$(2c96 zUU8TjAMh`V*B(|0S+g4$RZ=Ibat)dIr;qvuRfbebU+HpvsD0YSEAnUHs??l|*g-v& zr(}tzB$XYS60ir}N==JE(y&dZ1(n?sJzWJ?3*DTTn160lNeW9SqLYHeGqLPPbWc_- zQ}4k#%1TuP`4mU%2PIMZOK-lWZ9BG%mMf;q7&GyzVhnITJ;FQErF2OT$BCw3%y2boR`Q58T+Kws3cP26$ecLL(4q!FazG?L<0%X^WkmDul)P`A`U z!_~b6Tn)Fv9o1;6!Y+wI{?M}GV5c*w;5!L8QoV_MNUxNqR-8Wr%xj&;c5W-1DZqer z@w|6xI0mD{7%QF@D&i3~mu}C*IzAK+9+?!?V2o%w$)pYwFe|QUwmr|P4c-HhfDM`$ zu|IqplaY`|BZ0N)NGn$xKHj>tr!I+rrO{h0T_S~4ww48_Pf3Ew(gGcIwE*-IW_AWy zCp3Dh6JR|?lRZ=PL|@KDMzP8O^?OVki=tO|H$pQaV|g?LiRr|W@+8)=V_MkYC&|cG zxhM#uG~pvK#>%Hs!nEbG7+aOxYB6JuH_9cPgFk8=3tCiqmR7d1Lq77!V=unGxL!u2 zmDoJmDYE{IjG~p{=nzt7ap{^fPqN-=)vevQPMbt7S*RELa9M? z)s`K74Re+~T;Y(c+NI+5T-TBrE2paA$Y-r%NFOe$g?3#WtOFBSxJ>w*Q=-$6xNuTR z=eD`Za;NSJIx&fn3ynNw$1rqvngq1B3MO!&%*FqBxtRdVnjQf~6 zd6L6?lf$)QR&9bLl#Fa13pVB{sU@{zBVgoc*K)m}L_Mrl+lIg$EX>9ou94~5!U^Uo zhicEXHVg+5vn?cs5^*yMV#r{c-vuIM&m}|inr^n&R%?*<65snF$RR}uKwa7|f}jRU z1gqgHBkg4#3{p%{;HYUk7qMF==*)i`mLcKxyr(Rv2$Qn0LJZJ>*l3A|+v*=c8%fmW zUTJF@5oftRHIhc+ zI9+6_QgkAoQGLGZ#f**2l3wUn+Eze}fPb@We*C=0k`t_z7!h676|oToRu!0}TR2G8 zB8B1Di>)4@05*Jt^}-*4C5zgXV$LO6mPG4Pmt)cU{h$(ZoX+&H-fixARZAe}aYcx1 zjD98OR*@%D=Mb$7N0Z^rRwHUrCi%LomO_q9#Aqzp>xz4eB?LzAs%m1@8Uxj)99!FF zmFV=e)>>M)S`smI(bg&^AhgX?c5|uW#=Lg$hS) z^)Uu%i>Ba+>DBTKiyit-OgP_62>;p~Fo?hh*v4XFXhBo0NFzk6%Yl@;U=VB+N}=Ro ze0&mbDIYkPD1kR}RG958s|`U$3z&fPL**?{?S#{I;H5<_kRkBv*6xC0G1 zM211fDwrhi(sC1|HA%h=M|$+Otwbv8L}(>PhiuvP6cUhv+uo_V_XYW4BgRV)bjVc0 zxqiw5Z*$`oy{^E<7^F2c_W(CFFfS3_`fjz>VmL8AmFm&fuf7n^;0fpJsY^G+-5zbRY=f-X zV0hM!?TL3^9$Asacc2!rQ<7@!bfTp-TR9WiL)d_wG9>!R>BP$Rt-a5`=39|2gnxL) z+bN}78v#JhL%~i0X=!`j?;PRPJ5ddMg1F{L>ExziT#~{Nsp2TR38F2Bnf7iseDQ+e)46$yG=))~^JyMhRN3t|c2736DCzA*0k1!i5%u0Wnwo>#L8G(+e6E6=Oh z1e&4q{gvm{Yy!>D`TolDYBqsp=zM?Wc{Q6rGjzVc^1PZ&pcy*fUwK~5CeRF>@2@00|Vq1~_^0d0v%~^ccwV>Tx`kWS&=L zBs~W5ym}l@C0P{Jzp`8b4M2e2_Y}p%0chQ0OVNef_6cx+Z#<_9%y*fz{{qjP0Wi4x z=Tdn^@#p|ZNm>@gcK?0Z#UCx}Iwt`Qfa3%WdCylzn>zqfl9na}jPdU~#M}`Hs{2k+ zOq;jaD@@Lz*H1GWjc;~e5qI>Dx|`IyY>+|7?It0cNRG&MwU6U@g2neoB zP<`#R=<3>BRts@}elhiWRGM42KUPv3Nv=;XC3LZtV6bOFuYqBceF$v|>+Yx&=R$3p zT1$KS#4a2uB85QCqtdRfFio`Kx!STBtIL_hXPAUyP5H&^M4#%-P8y_6Qktg9o*!W> zE2DI<5z*4Bl=^_2uoU}DkG3|e0conYy{5DhGLA~7gMg8F19-e!zW%js8WbS3%wZ8V zQagmNCTO_O;Y}@bQv>XSc+BgMQvpVJ%wP8KUT{>`u^**I~qJ~jwXV(df zRY-A52x~gFD9jKp+t(&^Q_8#8FyEyV0{dR+Uq8`|RcWBI$3~Tu3rN!m<>qCdOWNaf z^v(oEI#yt8&ktILqvn!9F|@!)GMJF7>f>dY&$YIZ%#I0wGbb7l}#m z>A(!{*rnFG zTk*A7I-wpB^<`SpYOk&GK`Z1Sxu<#n+5#$vTlvOWZ?r0N@vm7W2ig5+^&g`!ak49n zCY7MewY{vdPAw=<)AmThA@u+946X(-Bis!praC-~$Q!`rUAA3=7@VPv;kO#{+0DN7we=qaX2@D>qZ(XlC$jbvB|i_V2yQkVMSr!M)Z@ZjdcB~wAA zFuG6hUy@}+@lQc2Fof5rS-1P9C0mGEiqd=nFpgJ+@C|6lpmSt*XSl%^>*T~36pqt! z>!e16l`Tb-L~WwDX~V!eUr>=>({blpKl)}tZLG6)Nk7AiVJM#<9tyxWa8E=-VA1lV z_M_poq^pp%F1PRz$ia1=R=VT(lYaW3xQ*sgT?jLC1dY(k*d!2c*ce zdAVG@$(?9*fYe71k`!b!v8hgyXm7LQSs}zvDoL8QHW%$A&9sWbWk<0uK|*cC6va#7 zXBb>xBU`AU`?G5FwktR636$a%F;C$SLP~8zL1ZERnd;Za2wD%O%xSXLQ)bwc{NtGL z5pds+{*joFl>PI(0sqd~q_QXmLv$0v6hjY=cC95OxFvat?-fH4Fpg4+W+xU8J7e*F z{K+W!+qOypK(A%k9-64`%87d^ODPvK2qg?|AB#2UM)TlvW)CR*#6`Pn=Dc62)$=Y5 zXiJe8dOQh55LqTmJoBaK28@#p8n^twTN19G9OE+rMN7uc65|mh*#`tIZdn$FpHaf% z)qjNV*S2#Q8yabWq~7bJ6r+G-twwzdNEyaGgV@h~uO{e9j4mzKaQcBTic+!Wt;zxx zDT-~^04yYSsgdy9Q#C;hMIy8kP)sFZE=g+tu7J9dz264eT_J<=+BzZ&SxJrMN^f_6 zNeTKykwL6bBB0ztYWz8biDa*%k=p%+JKt5BRRXh?mn2~x)vgqyF8~pDlR>x&c`Z~i z4h>;WB^U!h;dR9XSY!9%1|yOvI2`7tn~{T`@wS80JR0U`c^^?a1-|GtL%C0(~nVL0wSEM9ORKyWI$)b4y zGO6reM|JhMvxKhd>TygYb|6Gy&mJd*NfE|d7giQj)>Wsp3u0W0+CgEyl5j;`Fe~lh ziqS=z;WJ$jQflG5qm5}=ONmlhzq_^VPa$!ol)C*cNgHTGeb7;y-&@60w6v_>cho~| zOP!*W#44fs&;57P<(?}1(?M;-GfZaV&xB1T4)_gT2qN(mk)8;w~^PRx!NZA1CqA{o{bkv5O z9f=XJ-xupno!E1xq*)}yNo2o^;zrb)dzDR*1rtN78+xn54g2fa2(g zR@dewjcr7tOa#gHa?zElO1(JJ{EIy9 zp|vDSa|np3{5B|(S)+(U{i~;*y`>nJsITnrs1wk_>~95i z{O+xds7kMAQOq4%SJ&UM79+xN_nx}yPLiGmYSj&>t{X^ejV_2o^rX5|=|+s7HSD?5 z1Y&P=x}l-2hw2gmqKn_*45Y3)tvQw_kSCBQkSCBQkSCBQkSCBQkSCBQkSCBQkSCBQ zkSCBQkSCBQkSCBQkSCBQkSCBQkSCBQkSCBQkSCBQkSCBQkSCBQkSCBQkSCBQkSCBQ zkSCBQkSCBQkSCBQkSCBQkSCBQkSCBQkSCBQkSCBQ@c-2W=(pwLPxUUA)54!S9xgxg zD;s3MilIix> zLs=|w2A886}0 z)ny%+72DmdX1crHn%E9vWK-@i4ePEFJK2$g>WoZ{bWbp83jS6OcP@v)?^KddL3s@+ z&|)-l*J1=B;1VR(wr`avsxGUIgjGbZkyzZm)hj7Zs(xHWF+N<*68(z9L5szE3vGvOvRis)&dtM_Zq3hJ{x5n~1Baf{VG zOsNG~tZm? ztczDkBe@9ENKGLkDEuwNcXZUUYD;G47CpqO93W*u2rD~M#6?xk7%AODlqiZHM?h-} z$n~o0b#E$C+O_WCGw^q}yj%UfT$(M4$505lO%*X-N|ms*I~}Pa!MQv}h|&zH=tNgN zy+o;zv!iKNsw+8DORa{k`U?W*8_B^oHK`#OOrr!zn!!VCC@$@wUS!uuYgK>{DXK_n z%xx%%;IXf=;d#b$jd^RMp%_rO?uv+%jir?+Z06Otk0kTNo|M$);3Nt)-fTxoqoGo2 zk}ULOL^uY(%6Z%{gdU~D(R>m&KwAcv;wsc^vIik{flBktieV0Zm=Sn#(hyS;6i!Jr z*^nSMk9^uz8kBTVly&*pureu8OcON`>qvZu0{?2bsT7;4pnww=WC2lzqtp>nWuwKD zM4R3Tm>cy(sgy+FKE$1^jiK*-Co6$e9ifd0?bCFR46QBZggh9nQJA50 z0&1#9@xBkmd_Zi9!}}@RDP)F$HG=^FO)&ik?dULSYO8#D&bTuzcdXGxP{0VTD4wR0 z$ZLrv`eHd%sf&H^uzs45U`!K^;6#8+pXy>(Y0vC*qB1Yi+Bd^j?KV@cD)n2bLf#6Y%>+cIZf-G3XKJvX3a`xY*Nl#O;#<8OxlrHI2`ANL4&fz5rzod5M&Th;{{4% zuP2en+Qm47p!bp>zr{j`D~E%43MOd0dR;7wNw)_kYD$c;CuL2F7eJ#G zI@&zJ>ghr{?uk5d5vOo3Sxr*JYMy(m$1)r1(E_I7ZfAo-dSIhCf~=23ykE)^r{&4( zcww5@AW@Pyh%5z4qD_)OI~E0nR}xuOSB&^>iLS({qE<&0*7C_IuQjU`W?3$(pI0OW zjspJ8vib3I=73do*$%N2%n^gpcsU`UP8p;L&EwwmKu)Sz)QUZcBa{Qu2ptW9C16G= zSfGtIBEpIhX$SVSkF8tm1kuJY9SKo?RxTRmO(YRMyE2!K;8`g#(Y=|ok^ zyO<}kLmI^?Jx)!i16aQ{fEf@~!H6n9j;XTIqGv_>QFNs)7?ezb=qQaA8skk41LKMb zLUD+yI6^Fq7a^!_%7huRQwX&$if>aEmgiZQfmoiBbg&y!YA6#$(Mi+s;*n}x#1I_f zm7}x0+)#aDBgd?=I93bkiZ06Ztsd1CcWw{9890iVM~i(DsF}`WX?03p>lxIEiKb*> zBP%NIcU?7NWA~jU3!=U!!gzY0=uj7wJ#%)swkm;o)r2+9JL^Py5KR&++{^TB`r+X+ zutq>NL=nCz!@?wBx0@N1sPK0hu)az#pW0iv zH)CQ7P?pu&C*FN|6oE`h5AT&y2`+Iu(UTDL;zX4j5Ymh6sk9Sad!K*Jw>;-Ryi>AO z7wnvAEiB1&qU(3A_tiTQ1AOA8{)oCDkJ?J|G*3CCfPid7++@MJ^w>W+SC^A65aDhy zrTS!!I!@KsM2})4c|$8zPg0_A;fi{-UE6yPYV5RYF{LaEBV?)=LT!ub1^9m~1W;z6 zDODVki|M+eNjb!cft!@|!M2E4kx=N7GdAFCT%{ufB3=q{lXTwl1o8y(1o8y(1o8y( z1o8y(1o8y(1o8y(1o8y(1o8y(1o8y(1o8y(1o8y(1o8y(1o8y(1o8y_T_$kyRU@6$ zRUsTD?)W{&4;6FtY17tmpkervPDL^JnEO}GSzqEq#!fk4#w2h?&%f=MV#JZtrkr;& zc-P*#Wik5G_g!0rA2dCf?wL75w?I5$}b6a9+5PFv<4!FwO;j8^T|t)u=U# zp<8q6m@p5GDGL{JfOSn>djI^1V2+qRe>!9*9=9qSRuseLF9PSP@YpJBn&=uaec>%H zf`n!8QByd!_@@5z_eKz0vdvBtDJ;t~$v=$)AYcPK~8apKF+7yE4uY1VR5a$C?FTX=RiSV)$mUT6|BzazW zUd<-Z44v< z<#{!mKr?i{zw*4AO`sV%-(PuN%_h(co$s$auVxcyhR*lbzxTYRiu{+|b>yEMhJO%k zHs5|nzg%^_T(4u+)zK~avb*o|bGhn+E zrQeIyrRF`fAzdea01%IFpTGwA0`q6)c1KBIzRRTj7kK6jfWh5A_fytY6#KlAls-BD zQj(TMvE6@PcJW7XHat3B(_h}6RCLEtG0<(ud%ika-2uIZc;+l*3-~N;SbO~Y4l!Ny zOPdieKu1wbo4463wpb4Bw*gUyUQeG=MS1TSAVu8PKk9Cp_a_X>%;YzK9{}!~yD!n} z((YeiW#V2gIyKv!y+zY=AbxRJa^4G|f`1oY-3j!gM5hbPq|v;=@Qa}DfX9PiO6dgK z=MMKI+Pc2qDYhMDvVL(Mo3HJ26;;AsbcJw*$yy`)*laMWTzwhoRhK?Ii(=hqV%5sN zlu_Zr4iYpY1zm;^JNCVZ%@ou3o{FJvACLShHiSQ#CNSOrhys7K0FQ`af3?=4Smx_R zm&$V~={niVG9?gnP%;Tu4z}rIEkI2SaY2N`NaEXZf-#EZMpacrQ4?l-FDsE_bMCGk zEkNZ?S^O!FvMdMUo(KrnNVqt$7a?gnC8sG-C`F|s;~H6`gtD$Wo)T?KHz7xq_N0nd z%nHIi0!z9WMCqnW`xsqME10%bhHB?9tYr!m0m}{jR zK{w{mVt^AA^9E$Ymr{gFWi3b0*Io#rd~Bn&685LSSO#dCbduw^I4Rt+$@y?KtY=AM z3n(MRDW<;e&>iA%eJWX}OTn-sSPwBdzzJlLPH4(ObzJ2_6aYAdlr!B+Xhu?7&O|i& z(L;-U33D1(cO-a?bMPBQ@ivMqNkHrB5Cx&6u4$FNw%PP^I1-7qzLe%hy;p#uLRR^thrc95=wE52aVXwhLhykaWs~N z*rV!Dtt=)MivU2r5U5J&i{*R08h(V5(n>}<7rX&%=5t&alLFqwf%sAYV^fGk7R6IU zoQDL#1ey_Eu2mxO7;~xA7X|!%5j==T7h_Wc6KWf}B%f#`Nm$LxIixA@D2~ix(jL7d zNpS4U{RwYoG@zJ&#W=H2f+Rly0X`I*=%T-mm7ZNx1Qk~5MLLE*Mpl`S1n}EMT+V@5 z!#D8eoQl<1;lq(?vvMQsRy2^woB z#PTHP7;56!)`jt>udyJ;z}gM}Dh(YEIjF|mUda-T>8hlu)Y6WK1k}Mz9xJrkDmI=$ z!pqJ~z{iqk2^4Das3~++NmjsGbB*5M8i?Rl#1Rp#d(m_hEmb9`Sl@#sCl|@1)k9UQ zREv}9L2oOjZp29z+yKyPwa&U@t<|n}ugN6MCyn0gxM>fCdih{)g;I#+F+k{o|8$5Y zdNHI5Cf~A`Fkk~|A$tvVg~b;=#RU|?Zb*X;nc~y;6DcKn2+f6cE2W9zuR_Hr5h7dl z#xgJ%*DHb5`K+%avE@dkK5>h>7QSiUB?Gs>^hVY%CsU~i@xpqwAAv~KjQBVl)Ddv>A39u+0O-f3r)-L{5p6X0w&WWnx zGr;2iX(Y-*>-xJkNfktn*4^tfL<~!MsRXM@+IKzSOS9MhYRbX@jFGh?{`#i`K_2Fk zXb+BHaVsBERSHA&{4zU35YN9NY7YKuos_!`Va|fkQzj^TPKo-BQAi+5i?qiTEGv$`W@&wGt&Kh}A~D zx3ifl<{-86St0Nzl{Bn-(wFqqNHTJ)iWSAy#G(OpXvYN-YAePf@N4x>3gPwed+#_k zM3L}XDz#bzL=c&vuI*NlXwnW0GmM5qcn~r!EOZjt72NqD94FBS<838pBMIEXWo4xp zjE942TtmYN_5RO?@Mn3J{3fQXR&nte4I(hF_fx`PWNcOpEe$}cZFdD0LxbX<|DeUl zw*+{HQp(B|_*sj<-?l9lUp<2p6g|&!#d~_vWiT+5m-fJK4jfsiW3A46?*9SVZe0+y*f}6I1c(s$YzQ$R4Lq z7zv&TyA@l0;4NT<@OE0Y;<&I10i{E;y!)hNv%4V$Y*1S3fwuyS=EL0 zkvbU_GSP9;9q`Y(?$G!oHE@r4i5BnoLFyr2DMuUtPbJQ4m=m7PFTBM}MTEE*KRym?4q=Xx4y}Rd7N2WZ!_KZ=p0E(xtqofZ+$I z@$T-Z%a4n?Zx(fR{Xr)=Jkv-(q^TDpRopJwUsB74@F? zy+IJEh{C!q3L8p$?M*H@QCDS_3hvLH+iM5!4X7|_UZ;yjDf85Pw|1S0^z-AwOH5%5 zc1w@;5nYLkP9kssgr&MnfP`gC?}ZfffFr}Mgg?Dqer_?cXnKJj>eozX<&J9Xx&s!aple4D_5GH@=0vVLOJaGVtlygV zKjHBKxgU5su!eI2k?=Jx@cvY14 zC7%Uhg}MW#OJuW5R+Q>e;R5XM4TZQKQ5OfvCo{#llKwHv?NCVUgr4}i$7H`+UIjJc z{-OdlIv@jkzY1xK9K|4zM2Yiq(S^cH7dS5)U5T)lldeP`1(YlKGMJX!MksD$P>sD z$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD z$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD$P>sD z=-LEskRR_2i=`$0+%f;v1kK>j0fQYSQ373wqEA^$MBucWV@+d=xkA{FE4hURWj6(e zVId5TKSa&&#u91^RDvYAl?G);U9Om-=rj1=PS@J__g^|a@SJe~QqL9!+)?5NEF>Yw zYb@syLmR0@ni#HuwQ`!!tkQ8*G6q2-qMcbKXckdk+HDaIFYf_NYTRtD(3TOQk!HKq zGp)=|9Nd^L9ZrnXWn>K3Xm_jBK(pOCFe|lJlXu~Zy{Fr)g((6N=CEiM#U0>CWk(xw zQ6Z6%qn7LmEyrFAf&PjErZ6H9N-F#YjcTCTj=!y+Du}ea_^!g6G=~`aEm9KhY*Sk5 zmoD73L`x7KQv!cUF|TSpwt=m*sU-H0y-ZIT&#wd(UFAw&)Vr;;tR$|~MwWyHAyF=Q zlBZ(DB5HLGttV1lh>$a$o@~KLPX@6=8P(zw8f&m4m%zawW(`S3(|}^AfWW0t%u_;p zOSQTUcj=+r>Pkln2l+cpB!cNh0FTdg#JIXgMpHxW??^Nsrz9ol(*Zv2B_<6B1pW{d z0}zL~q@-M2;_AsYe&e7BEYwJ&Du5_h`PY1%{MISa29x}+s zWi^hTbU}{$p&`eQxqwy-q7gT?5NZYJbZ~GRrAGp8$*Y_`_a=6qRY9cQdhZ2rq96{oXyvwQrn)o=JF=21cJwe>+q0dZQZk2m#S`UzNfX2gK$~c&KXrYEnqen8 z)FUOhtc79naAR9TeWc5-0X0H$Jfx@DrRE=U7$Pu@Xrz=B>0qJWCO|x8msxNxURm^1 z07?`~5O0TYRN@9`%TN{ui-!*J!a|7!yi?keh_Oi$j1~wAO{zB{+OSdv@vsg}ZY-V# zshO4~xqDg+|Ch#=)vkg*lw25KCz)5@5rP zN|8p4NGL=cmumtJgn4B{6j)Gbs8~vcsM!{IqLo|#C5N4q(BjmqFwPgn`B=aj&{DgZ zDjgG6F-z>|$vI7{nWA1gI|U}CV^OLGc9m7uMQSCFpm_|%2$kR>rksdmXv3wm!}m3t zS~8!+xI2izD8_7O8$(-jg=M1KfrW(1gmxHABoZ|Z><9iM%?NWfV4}}4sy5~9b+VRv zNJndeY1ANbCkLA)S%FyG>%?Hg!}?+q{EN-pTUgL%T@(oT_t_~H6>`K@#4&;(=yrBs z`TCKGb?D~{C=LKQdC`bbY!1K%1pcH zVs#1Anu>#{G^I_#q)Q4mCj{#{RP-fWZ8@Hp=My_e0zr`!)kW$p!Uk5LBE(ZQuVgk` zkO4Vxy^c0n5}M_hrUS!&D_7Oo@LaRF6k-I3KkQTBb*>5$Z%=dGE zTrgpe=s2Hpl~y<2k;qzjR<%(99_oveM4~Q*5o&c7YF8ASXj|E%j#^#V(JIhX2^v(t zASE)z;H7S9EcQFk?VVR<(8A*-Z|Pz;e? z5szvxh>Ju8vsxRI?qHnO{dTgL+h1!!iUQowM^qo{+Kot&*BzuL>XOC`AfUj@y6ErE z0*vbWV%gLQQ$CbIw(RI*gx7DY{X7H^Mk@v>p(LxhA4$a6(azNzi)n-c=!hCKNOPa+ zqSKvBFFlk(e!Auu-}uHi#u{tw zwfEWVvrmRIk7tG%kS4T_Eb+-=78NF4i}?D~oBd#5aKq##x^^OB9YNe&0#(gQD`i7Q9CP3Tocrz|D$r^|dI;RzT*@3rYmG`Wt57ci-p*OhTd6TUBQd~JG? zgS4JfTdXSDd0*5t42V{lp3O@XIkq;f^%QyS4P4r!0K!lQAguWc<=L&(&4rSL8iVzPa>%xTLMY}i<-d!aJf}Iu{ns3%ZQpq@ZIfwm`b+Kv4k zX=RL4#T~lO(B5JWJZt<$j?||YIX$O6x?=ifg7+Fc=HQ9=@Am`e+&wMyJ8t}#xu=15 z%U#35z|TH>^C6NybTRzW>2wy}{FLSV`LKQ;g@1*fzojV2@@gF70QmIyD^hB}`k}Y{ z&g?Y$ecv!<{(J_wuW^eXoih^5eiP=v_nwB4C#{S}htOxv0&s4Ot5#ystgn6(=HCI| zLmHP1g-8h7e_#LAd`kcq?YP@Wf@Sx7_7MjlL@dIa_I2E<@dz~UD@=SL&JUsgqF(~_ zf%xju_rUt6U%x%g3;V(XOCYer;9jY(Z2-Jvlf#b(+_pb9>1(|mn0DMMtP$Rw&+9!) z9-Tb`eot8DC4Fst({BNLDE{ZpF_7Z>>M>yQg1amG8V2YWHvQm9T#fh3sjm+myA`_U zKL?Cw(Ao`mVPD#ZOIOi)@bcy3$moIl&CYmu{s1%vOj|MySNs+tSmr$nJ21ZtOOKy? z`E8-!2N%pg2(GVQqZh>2pMl7QA@n?VV*f4Y#24s15dB@@2N91%xA9MaHT1UIk+Cd3 z^8n<%IfS==mf{C6>rn1;_~lo!LFnM~mavY_E5Jv-$Ag5LzDJ2Yuc21 z;u$e6J#^}Kf3#l+--|1b-fp`wc$kCVIRqY$4~KW@qW}|e8+Y~PY@E1q#W@$nc;m1U z_s0hbdhfw>$~X-Bfm`XV1HM~*pNWk3=(!K1PoJKfpSmW_3~zn`tH{rIV6m``|e}MBr*2F#BJ;<54 z^4t*Cz>!Y7D#lNwxU)u|e(~aK^_mj);7CB?usJ=)T`f;dvcjz(WT0k##=km=LasEKK(f5J0 zMc7+*j|*e%qmh$NpEBm~`Exjf_XDOaU%B{>RjcNs`xQuDg3lNuUW`|9e7z0(4V-Yr zl@kUZvT*55%OE`a5_sVCUHsH#yC7k^Q{Xw^yM4nbeDM2g>`k1G`$HC8(l1N~>4n=S z5UtPRg`(Rr%kLenHmoqOuS}n^;172q26f{Sdd9sXzK+kAaDUhWuE?E^ns(F-IJt27 zp1AuMcr?5n%uDdXpY*@b*C#g#ecuwop}sFl#@(CFiV?15d{)2&ziql?E?qu^?Qz|8 z=?>T<^cp;NzmX%3y6Sug_Wa_C75C0w4{0aF_;p-+Y%&(_OEXq1J7Pb)dY?IVmz@xQ z8ti>xodxS7lQ!wo7qb4lvabPfO#*SFC>I>qwjFS+nW6s~pW2;0c+G(i&c4IPAG*(W zU~D+_(7DUL&-ag^P!Go1v;RAeIS`(tm&e#YTn1SD{yZA~;b49Ta_HN3W{es$=B**@ zUfEX|zv3Yv_J@S(hvWJXg{}83cOAplO;4ZN53TV_R(=YW>37fEW#%Jy^IbNqjPdrN zy@yVoJ+pTh2@cHjbJ$mmH&gcH`EU~(5yCG3nLTCxd^`u|yX&4vyc>V@hH$^Z)N388 z=CI?~8WiK#*otu(-Gs)+_!=O;j}PxLVg794p9Zg)ju|?AHEO}b0UzP=dwI)|0nhFi zk`Pq;&5Sdb--l;=%j&-R+%f~^^+jw9Qw!Vs;QqGvt|5$$@m~~z1|Q=We4GH??J;03uT4cJ3>(?A*FZS@4dKxm zzk%o(7mkJ37TNZVkA?7Nh~Iql8DRYV<#XwtfPI|+=jEZfyVLbXDReX5{B;OD1^!^T z@E`wW{AAb(@9sMTa$o!>94OxKPTFzC6uKS;zn=QivYS2=zeP9TupdzAUc2|C;qK;! z#1F>@VE>aC?|m%7wSU=z-{wc=BX9|C375d>vk!*&R=eqQD14$EH%x`QRE*E>h*MpF zJ|4>;d{T$kW1F8gdmLd;mHWC97!(WmTROjEv#=isuQ#`by7TV2##!p@^g4o}X!@p-rXZuO?cZ0x|VF7((ckk=|yo7H4v&wzp zV+Hvkj0LM4$H+Ff#kfDLea^Y@nZLmeYr@~-dk}|@GB5bUH(`NSBCwyZ_;@%iz2Sxw z0$lfeJG}IdC<{Z4@gErYRgAZ7k6&8?b`R*DuKPXc5HM^TJkd%scpfYI8a4gM5Kcam z5g(W+I{2aAZzTewFTMaS_#yNQ4dJBsz`Qm)3U3W~JI8Z>+O;43B$#gt@M{)7 z0sla7&K>cw7=I5=4{#Pi%i%N6!!F<60q^uN0j}aK{`NER1@D;aAExN4#njhXz0rgFbvXLV{4{=9Jk(wd z_vkg`_*L=#{$a?y@cbMCT)iiRPds;U*zzCo7YBqeXy#q;1ixkIbJ$s5y?oNhgRlBe zyaS|GWp%MY2%lQR-@gP;E6~?)25EohZFm(8x%Rc8=>8V^n*5*TlQtiE;DnKIn^+myWJV%r9FELJz+K77XA|B(_zxm^N028H)!Iu53h+Y{`Zu# zruPb)(@(m1Ax?&86Fn*s#AVR$o;gZi%DnBfw>ew00U%@lVGBhFp;;){4I{|CLUUJ`#k7G9ykLovn~TW>qG zNB9VuaS|{V;oT#@eo*Pa1vCsa!w1eB9EKliPXrszg$cnn`Xan(hW8&m2==}DjD(xU zqQUt5E8C~ylM}xcw_)!=DAx~ePpg+-)d!HrV*GIkXU6yy=!@$E=H3)&-jp}J0&mpdxYoZ0e*UAEDy#MkM_@1Uszb&M?$vuTnp1miay%?2 zm;2i4(=-H9guyKM3cmy<|E&2RfIAaya98Sg{PN}S6BfL9{%^)6eSOjN)A3#(J{{u^ zKK=nl1J2~}Js*$w9?zh6?Rp{g6>g1xyXe?&V=wWso90jG_ohi`9@PH|T;L@^@jPzx zm-{A&AS?rpzxY^w{eswc?mJvUBgYKsJ8<$fSHZIk{IGEc+*sk3cshK+&Czs%ABw9_ z-Bzdpe|m|N&U$D*0MqX}@3U*-3V4Fp_qn+Grx=Ox=TAK{bMT-G(7a#@?(v~m>4VEIpFAG#60$9YaIvqI@%1UGukbcKai}^B zcd!kc`M}Z{Q*K!HMR-Vso@35}v4_zg8oleB$8LiAP?#29!^=-MIH~X)gZGyZ?ud6D ziSq;68fxP4Szq`~eD=xLpO`1ZL-f7CEcCVAil^zVKT?6+yVP*&(U)F1@5t@oti{hG zI&s0duwNC=-DaVF>uw^#gOYjJ%3UlHCZ7)#;!#}&Nd z9*?uo*CA(49^RKM?C1Ue3+Nt zL%*BvJATWdry$1|qJco{Ay zz8&`sw|=w#_Rub2Dry|C=f`o17c9Q`uz_#h9c~i+=FFe4{mdtygdY`k8~QDJ;|SdW z)8IaKH$K@6y@y&@*?VJnEgqMC7~eLC_|+Hi^cmx?K0D+6nhxvEFT(e-N-4mpp|J))5E`RQET-MjHoeN?G zcQC#{$F1KN=U?&D$F7`nJBh*F-5w|azDgL0U&{~An^rx$dijFy@}p_baOG?KlTheC zo91<2eAEUdQV+)!__HM3QG0FFf8d9H<@+ewdG!$P`AZRD|IHz+v!G_&*nKUdTCZ$ye}aZ`gWmvE43LQ7iu{HY9!_HJwy`_AiEW|5g?K z_B-J5u!w^_`VQY`a2T=pp_zmID-j58Gj8lw`0TV{p8;c!p88`<{>QEOS~lOFKsPT` zxY&xpIKzx^wz0e!vx2jYVfUbTJ# zo`SQVw^3||Oz}Pl-FL(+7JluXJLVm+L-}4(sfe#AAp9FC}|yz>mC25ld>LEqLYM+Qxc2hArR+XWE`wa{cK& z!@&Jjrbl0V`?=9JZE>5k3gMS1+(@AT&duyeKX;&g<{{(WKWE9euQ^5ey+)loFZ;nm z1GtWzu~EREq~hwo!W$^n8_+P|(;C=^+hxls@DhCjz9Pi!fWCc3eLu!kM{90>c*xPK zO9kAz?ffr*hiU&iX&=W!A~s87!w;k~vS z=?xR44O#$qv&7G>bMJ)b?(0F~$)nG%KB1d_^)^Up*SO%#q5m;?(EJ4c1>eU{KXfYg zc;2`nJKY!`OSk*M@KW9VVEdL!{1ejK{`S=nj>o+3RzGRq^jo;W!M|>|^9E)&$xT5r zgezb_Z^vC=-0_tq@89(@`IrYazu7$cXAM1&pG7==+u~p4uTMd0!|g(&x7)b?fA<6K zM3kI$==ft7!Ft}|3$F3`6R)7xSpvbyK6Ny_a+K{Uq&93w11BoIJ?nKqIw+n)R1)|p zJWawya{2$PoJL@;E$9VS|B+s8{B4WB`G;5NfwMlmUd12ZIK z==D>)34ZLq;TkxboZ5dJUlp5AzT?}O*em?z@rWOsPB1pYx96&s2+PvJDZctG#zQ!O zps&T>;tykkz&Qq1rVW}okBrS{J(-F^@Udjiu<)cI=6fl z|MffW3?}|i#__FYKl%yyS!s*X-F`=*JPu)c%v83LfxqH^<^ewZ(DTFiajvk-Egqu}m?P%1w;nP&a+uWD-gtd9Zm414c+ zK+y*4F^E63=7tHq@b&bp#5(u2-_xt?grEh=9};93)}^2CULS*IVZ(P(o&)L%IR{|U z!-TW<+?v&1hHe_<&-4Ds_@Asp_Wkt};7Ac`}H`nZyR9xC*NF23Z)tF(tE zfcr-#@X@h}IOx&A)qO&&V-V+S%Y68kxO!C4>N-3Vfg(C2aM;ZtFz*#+b4 zeJP9Hz>)llzD|u-V{n1|+$zMh^!&)5UzE2*dhZ_wO!>e*pL-Br{a0_+!UH|v2lMJp zUI3(@i??41#0?9A09{pl%P6CqpJsgeZHwSZ>bCezAiSyaM`P>cVSa1Sd}qLu2w$&S zX8WZOrE{wYTrQO0ULK^7THD$y-gBwN<@geK=-EeV+^<$1U+Og2md*OF;v*WEUIlD; ze+;*S3vjjjuJ{rJ?s$C%_?N4N_6o#vSq(m1)BFb_eDS!K%Y!uMkts^o7V)wVXM35Z zA9^hGRbrvXjTw6>-sAd?xfS?us1r_r_OqmjC4;RUCh>$660;yDzSj%!f&!+g|v+=e|z?(QhuY4K4mhL zpvPn=89Sv{dcVdW7E2rC0)`5u5CQZqyJ}M**sX?RR-0Gkeg-~TJDfg@B~lbJrhvs| zz4)^tgg*BJ3X`g$hUHzr=Y-&$v$QE~_b?>xqmDFWebK)cq@Y@xG#JQmBtQw6dY74K;l+4OU}0V0_CvI8r0kWz{-izR~6XyW?6jG-Cev!Bi*NOla~O@ zOef@$8ReuZhG3A2~_L{RFdHV{gWY0{4*ag1!K!7BJ4JyQy zy+(TV!X{s;%>e9yqB@#@e#%6PeStZQ zd%uy4WGT+>cj3cmgy1_{QjH_(QB)yxq#~^pq<%P}JY2PX!ZeW+WS`a32unaR1L~s3 zwq*G*aUm)c4zWYFCerGoQpBLs(1MSGF@W?NEXK(T+t-&i%X+*o6eD30<((G7!+csD zI@s)pA%grwplkpI5f?r9XDR1p8Tdk!GgzybO?kYdyv0C}`I66BjHxj7^@MmXmk|Qg z#Kd$Ah+YN%Gxj?rQAyaH#0K$6MFa3&ql$X=yu$tjXcFS^J_b0j!N**xjvgYUqCxQG zfl7$$3q|M+Ni;OeOaRE%0DD*w<(0;H2pQkPfGXEzsEGciQz&VGEi0$ z6bDyLU8%m(qyvZ%6L#>~Br-+H(upY%E2C5{%Tb46N!apK<#L1@z#L`CN|}_*N5yHH zL~Ugd^XbG`dwL8gsTHIhRYsgsnZcfPBM@_mXyn9JE+Nu|!$h1K;4Vy##E_bfCiST( zBcDscXi*ZTu>mW`iy=ah!{N-*42m)Z_V9ipvytl>QyVKvW@h~ZK|pvD6Mor9t7wcP zMh2Jjga_#i5s#8Ij>??Pmf4(2QBg5OTEzs~EC)R0O27_Gi4aHAw7}h(93oIaEWaQl z5r|*dVN@VXw44-U6;@+Sdsk3N&D+59Snn*P5vk!(}?hUrORsRikl1pDsQM5 zt}iKds|rY-W^&h%Mh27DNDgyQGE2s24qPr}m1(chrlf#cDU%WP3&oV~`;xZaswvr8 z>OYa|B^wbdX~kb8>9Y zQ@%1~!@@t$aso*ga|Nu3A@fD5fL1;&DhV-+6gNsKh2U36R89y=LDy+ml;JE$G2kNT z!UXeN7-ZDz85B9f5EG zyg=Y)OGm+6labGOg!jkK;^#)>@Z%&FvuV9RzGa~y|HgLP&4s$!h`{vwSYT(l$zfI@ z9&bNLKG^nBjK?CGX>zAbV)9`sWWc{A%O;Xf$*Qo5Fbp0Bl2oEeG2}TA%tt1hkH6w5 z`oN;!?{akgupH3mtmkq#V+C+46(^E zqK?#$450_}I+;*^G%;?Ad4xCInQn;)k&8Z_@{ow0Ch+I1)UzsI$dH9MO=Jbir4Qy) z9KXdNNf#}XfG;4)V)6N<@Vl#{729VRF#i}MeuhbM{=_j;P%8Yc?GpJ%OMphjX%fhf zDc-ZJVP7VRhRV`P5r;4nfnTs`e042<2q1g&Sdjm${g!0JFO|`Q0@6uwB@zmo>5<`s z5-CH7JfV3KR@d5cg$YY{iAW!5vjqN)bzg;l`@#r51i$hnc#g`uxan{?ymf_$LDbfS zc<-Q@PZJvgu%V7mD~Z4a}n2A5)zgaa0$Q3 zK4hi8sdF7LH2FkLQotSFHU$h$-nXvZmlC96BFOg>CSeMxIIqh7!#m~QxFpOtJX9|w z>(Hy^yMi#xvGB5M>%D-<>mK?yfTeNu_w<8N5K38+%T?IPAPz_g9Exoh(XYHPd6Ld5 zgroFn9@@bTKMQX;M4Kf6MCCiWn9x>VNrp06CeFIo{#lm@R5ZNl>`@Y@C8Y`)PWEk4 zU-EHUq^>Rm5v!0x2t&3dPX5*a5GX{%MAEMU(lDs?%M{_4{%pfsa7co8>=?lXJ&V(b z_2jL8fzJr|?GVBVNjfLH4-Yenm3;acG z)>RF3MAtXgk^BzlVBx08sk5fEP7ZR_=Av!3-aNg;e>9g2J z(eLWcisQ1j$Vk<}O-@%^cUu=y$$T|Z2ZSWID=jUqlW@M7^j-%xu@3s;W0zKJ5LLbf zyVTI3xW4jw`HaBJ!(HXI*%zO^t}5SHmm8BfI%R4#TIO{f!fjz6S$`#<>pG}Zwrjj^(_^oyG2ex@Ay^e1=2;1MI#|_L<08mz9PP}A>#Nc0QEtVEomsuH7L~Jb?xhDo z_RuhPR~q?gk)cpQ8xYMad0p8}Bkfe(Sx7FpjqUrpN6Cp>-Oq(!FmGq1nLRY6R0OpPoSPa zJ%M@x^#tk()Dx&DP*0$qKs|wa0`&yy3Dgs)Cs0qIoIu{ns3%ZQpq@ZI zfqDY<1nLRY6R0OpPoSPaJ%M@x^#tk()Dx&DP*0$qKs|wa0`&yy3Dgs)Cs0qIoIu{n=*9`~ub6l6xA8mv`|0ues$+b2A397bP?qTyP zQ?>{6+tPd`p))|=SF+ny)}v^f**Zi1xKnC}EYorL&IG<)|9}a4E^1eRrLL9Xl|fou zlFUNRnV$+e6wMT9zvUz_>8a1<8ySeTx@F4jgfQCBvYdq)8TP&Z zMktH4jaaf7kGpYIFDMp@LgRXT?({PU-MfwDs0N@w-u>QMQ- zitorDfy+{JCUSc;OV7nJPen>A6eVI4-BL~iAS>8Tr-77R6V1+o%Y|yr%gmSC*-1{z zB$AVp#It7EPwZw^rBd5qm1HR^k$i~J>Ol!gf9cKVwA)T4ljV}>G^R*8tB3*5hnx+O zdGwceoK$P36HBH^BuFg^B}c8|nkksmQ6pEcn#R}+ggTfaOO`Dy5Z2LlEH6_=ha)Q9 zAWS5Qk&S;G$z_VoNPG9J=T%&F6+sAX7h+Qh8f2ehWR_VzqnWLxfTLzwEfFBJQzWyL zCM=;PzQ$WwvjXBszJ~Cd+qk46Z+oYX#vq(t%Tr`oiIYn~b zT6wSe`l?SW1k^87P`I=fcx&TUXcwW9ybyx?p=HOXBPsDa0yt5;G?CK{SQFGz;nOxpylN=;dBok3?239!v`0Riyn2jN= zm5yBds>l&?MFgN!KgP%58@gEsQ>yvRQ7PqeBfhuRU~5(-Zvg(PvuK#nK(iW_zDy%m zFXeDCNus4hVE|(dLhXXNMA9O;^g_5=gOeH*)?kBhwPI5=DWU~`LA4QEh?U`Qno4E3 z1E2|-kwl6u5+#K*OVTr}!;baBM)59DnjO-p81V~bMWcXrz-1xZYe7@Oh&kRUrbvun zvwcEKwyHytYd|~ED6U1>6q6gV0f|{kN}*$-8`L6;+}{l1!BmBmjcNuwMDs< z6VKHVhU!i?5h>)aI>%aTQ)b5tchQC> zwxi7(P=SKLTDYo+>jgy}1PMa}H*nZ+dro4YD9|zg1DJ+_vgRtyzpwLgtdw`8#bRRYi;I z7#6~(t+IiMXR%H-vO?mxzR1*0(Mfz}b^2aUW-4Ts)Iz^hwnSnE{F`O#+pn6Y9A%}% zNbGW7iJMqpm5EunfrDZVLI|f?Y&D?*+4zaphJGRzEGk#PoC{&_xF_5%b}@zze6R!& zol$3cTH7`y#v2ow83YH0Vpu(=EU(R76Db3z4;VN%tkvkrGx(eJ(;CzGh7A zDFN2CZ^SEj!8v;9QVn_cBwH+7AZj)kUbVya$osENs;J^CP>ED23pI8+(o18tVkD`@ zxH&sTO7gSANtx}p_PgZfJCj_D|MHHvb4fWjLV%d3f}H_*YWuuDI>DQEqa64IamAL) z$wkAwEQAns#VETOqg@Uw`~5+b=MlCM28&hMP!eMIu{ns3%ZQpq@ZIfj80wvJXML zVVp)E-B{NR)_t{h2I_g$^J;AZt+DIttDaYD6KIWHUtjgSTAM&??E3nu=hfN-T4UGO zS3R%RCeRwYzP{>twKjp)*!A^Q&#Scww8pNluXyxV zXpLQ8U;n4)6?T&L-x%ld!|)HE=R-&M^*!8}uF~eZTK1xUk^T0>H(FmY{vm{G-BjKn z^Ku=rS@%^MNmHPnSJUy7g7v&gBWViM^J+StQm~#^X(Ua7dR|S(QwoMq{wvER(mDX> zbx#N**Fou~C8aN1-kA~C@rCF50`o)G+SkA{XB`+^{fl{fAw0bfTFFX=u=Cfixx)WV z7;5pNjI<6MH>h48xqhIz>!6jaq%{J@&=U_gw_nEUekX+Sv$uVdwR7aH zKs!8_TQpO4F_QUA6DZ{rpS))IP+N9ZAa}CVJXG}j2vb^-rGrh3hE^%n19HHU>?2KW zEmlF&P+Mz7Y6&upQltZbiFpHbyjwi~m1SB;Kx$FL5H)f=@-QcZiy{?1^Ic{IN1?=3 zgz6>7tWp=;HNwUb;zTa=7&l}Z&R5*A953{@)8kM@?LhNeEj zz$o=${QyGb|9}kGJyEI515`4swy7EeAuLIeWC6(>GdR;+HW>V>UNNGMv7&^8im>WU z%H$o(^2kuSTOBt;`4BJ*;hU8vdP)^nSc*1WA7Vbn+sf2}4Ydk^q-UKM7|q<&7qieW z^NXx9qtlVfK8jVOuP|e`(sfg$c7>1S3%Uv5-jWZ*GQE(!)~*(9Qb{RRivetJbHmy+ zR7I1s<1A(u$mA9|WL1j}QDPo$D^1y0sk~9N3>Og9=G^Np6ndL!CWRTo5 zJppYY72!s{X;vGJ(j5Lx%VdzNNI^(-0T~0g9 zOvO8#>k+Vf!T;~hRGEMfS-bXTK=*EXBA8|8jwUr+gw5Q!c!XiDZght|2Sc&O^ z1ga0t#b`70XJ&cfGawlu^I)Z?%CKhfXE5Lqa8E}6I1EV0{(0VruW~fY4525WTN@|z zJ}lXlmWbez#R=axL5VPgLds?*CHFaZ;eq_gDEZsADgi{_OP{?pP+nCN*Fuy+4l@b` zjBX!`HR_hL;d5rcRQjw1V>L4Ow`w(Cr2s8C6QO4^D3Qprw!|}ElD>mD+0f&#kG(I` zYUY@p2`F14c9e)Gl0<(gY;lWiA^f}whK>IU@7K0xGc`2w1POiAql6J4QOi-^07*mK zD~SE#k1IfzVsd%1!ubcn2&H6=Tb2Qags{_kz=gmb6)n8@Tm^_w5<)8@rBDWQN|yVo z5~@=6`v64ugb23Rt}UsMmE~Bh@N)Mz6rfKO8O0I>fzAz590FigQQ8){A4JsK&#yGtMCIBRSUnv0A;C;D3zYGP3VJ^BYIsfA+ z9B$s0=O6z)?}?rIst9%rqQ2VSR>z9eS=AGeYsOuwfB@BSHmZ}m64eH@hWgp#g0FR@ zbjzxJKruBd)^2GfE22#t+pH~`7cjHTu64AprYpD5t$j5e6E(Xon!=h*Cxt<2jF)cM zb3?_xsygk4W?YHNL1DZUa7o=@mde8!qmwq#N4lYrLJ40TZ4A@C5GbT|xm#KObZV{; zQn$-n(gIpgA9RHCYpWDWmZo+2iv3X8LZ#G7W))an=l);o%iSvcZKLZ3R8Y3FRKQ(d z_%KCUkQq`Gn;PoYzVL{-v<*Ytd|@8$yKl?;%KI@7+5x-!n!4Lp#$r!_6EqcGn!cpi1Oj{2f8XjkZ{4J$i>5wXj6R-G!b=S&H6lYmo`UEUNIqS{=m zXi8f!F|@j)AFIKotJ{(>X?K>>Ee2IheSxKY#IOwq1?ZM`tVrq$t<<1i?$VURlws*z zWbl(X{iJk@J%Swc4%&AYKEc9Sl??&-~ngQ4fsFu7N(gTAZa^|uJ zdqAv_f??z?HTas+64C;?g>I6o-0oac^~P3r_LFKg?nfhu+kIBWeO23`HH4XiyRVy6 zDQhBky6@JN+DTU9N8&o|>pRq8J}5bX_y9dM9YFV5eAlq&8fy@{K&N}VN!7G3X+Zko zS2*jkuhd~})f1>EP*0$qKs|wa0`&yy3Dgs)Cs0qIoIu{ns3%ZQpq@ZI zfqDY<1nLRY6R0OpPoSPaJ%M@x^#tk()Dx&DP*0$qKs|wa0`&yy3Dgs)Cs0qIoIu{ns3%ZQpq@ZIfqDY<1nLRY6Zrph0`zV9_*=aTeijcN@=Q#Pe9bs{2>wdcA}P83R-2+Ysp@fsFf?A-C|^3DaQnMS%tpG5%YzP_lo7(u zpXO7`Xr)4GD?2E!(%i08RS;6i%(ST+Wi(r7>h;>ZgX(BAVa&idE-Aaolp2Vo-1b`y zqzJ}-U_-I^CzE7=LzXR6^PTasWwv5Cfs!f3Opb?E*;gso0LyO8u>1;x`{KQXQCu2R zq#_X+1mB7L4sDgHT9VbdL=&v60jLJHRq1w!~)A~d!@T+h0m_g0&f za*b>F2>jhG@0P!pOQRvI!bZq#wIk+BstOiX*GJxw@LU`bk~BjyI?`9Qyr5Lb*~v63 z?<+e}3#~<8)h7gwH;IF7-Qn`tNt2;eDw6H!W<)$D zz@Fo{AcP);#L;+$8=+kWr{Z3u*<=qOc0o$xtcoy$pJWCeoD{^6fTAgZRu&XU#gm?v zl>&vn2<5)~Y`8N?QBEt}B(4+j6+-DP1|U)vt(b+LIdW{uz&LFLbY!Oj`8o@w3?g0d zG7zJ#FCr5$FZFU2gD{EUQgjlkQqkS2m(m5{*l_D&7`TU>gh344btU`6@3vEC^Su^Qfn$CX(Z9g zuDUI%p`DOi;_=+_2B{k>OAMD_fffu)=Nxg-0h1SsU_fUeQP&``dEk`!_*~^u9x1f8 z!#vz1%h?KYhyy8Hu*g*r)cOx2bTF-T#B52IRF$Vc(G89%{!=2j@NgKQx?&m`4GlYu zEZ&yG`OH!+615XaF%99GifS5nA*DQ(=@MvdAsI?32;zv6ln>f4ESy9vW(h`W&A@!h zFrj3ql3no}0+O#^exN7ohfXKAAEOVoZ;d-v9;?f ztJO1AQ=ALYehu5%oCw0!<=~W(3jx~7hRMiRkyMdX7*6xbsGP*fOc8n#LyLr3-f^NR zttK6ESV(YK$dBj2BA6NpN#x98rIzn9;3nAodAJ|^bLrzqPSRCLj@ z$x$iHp9-*y7TPgtu|h?4=B}SJt*F3~N8C>osaCE1hDqoF1CaN|TE-ItP=mZJ;N=$7 z^V$BEgvknA=3BI+iV(YiX-i$982^c&AO_&~YeE!ahEg%7)r}Qygfz55c$NX(XN7bKk8Q`>s0FN(jM7~&~Bt65QkbjOiX(WAb;)q z;={08rw*#C;h_I#!j|S5wLj6(SU!Jj{(**W8?EAzw8*wiXMj zPio|tRcx+tUli%wezdQ&+OW*PQKUFp>;sr09c!zgn^(y_qq=62DN$^R8&rCi_my`^ zUYZL1g9l*H&5xxqp5CWB*cYTd|M_BWxdlD?Ld539aqbiC9uxr!*P?to{rF^&Sf~yz z*KL4@a0^GcFQi+;7%0lO)8D7W<0`U$iNYD=9BNp*wrldYIv^@%6OU6k$y6PHk{Pu=EU?(HcRbDU;ABh^POJvU*0L$+83-G zY1~*A=}2FHbhU5VjTGP$FV$P>3*y*Yp*@XL3?%@Ng@jw#VP3u+lpU*!Nfki2S_)}@ zGDjV!_SZ^3%1z=eT6y=>N@`rRQomZR-D{I>?6502g(x>h%)DcO+TBbW#J_VRz%m0( z$zn`SrmKo3#gHckE|S&>%K}nGL6JwzRDh#tFBJhud`ZMb!u3*5pq@ZIfqDY<1nLRY z6R0OpPoSPaJ%M@x^#tk()Dx&DP*0$qKs|wa0`&yy3Dgs)Cs0qIoD{m*WfV+PsCra8#w3gX`$b7pp?^Y3^K zT37-fHHB^a@9RH*ZwcU{9d{c^uUp&`f!5gd^;OTSwF$Jw zuCK3pUad`_HFkY{)$?j?0#LquYZGXVU0+}Iyjq(;YwY^^`oBG|ank-9?mY5u z4#Pi)wl?1`XTMSVdZV7lx~~phl5e={zCMrjdF;>xT1%?0`+8ojO`tXQ8~6J9VQUA{ zO>FuM*dC$qy;$8;yh$1AYu3kr@yt6j+B!bL{F%A2*b7Pf>m%0>RCk@8LmUG)vORp3Hf}id z#KWyG`qE}3tfMW2@w2ym)AmcD{EpDnk+;&PRI$AeuA`2)tbf$qKKpM7%FIq-!olFa zm(A|n`@Xd9)!do5*Giun?Vhzk(Tf1TA}*S{2D;$y!mB!fE^N{D1!h)gc2D>s=zHPu zAe=%t<952kHA%Lr?hnIG1FWrI&t>y%o!&)Ru#LWAyw2KMFaFGIFv?n;iL}|5K0HI% zB$=4CqBGklX<-Ec&B#gDAjP%=E@w06biSv}P_+w^zBU`;;_F~ z`w*7+eCbQub0MiZ+P7t$0O+V>60Zoh)5lz(S~H{t2*W7i%W;4?%H%@ju1KN+W_}xc z6360PT`L+u>8`W%TOP%>9EocpA>J%-nAJujc{mrRDG`*j@{w_l>{AQHzN&NzvJKr# z99deEG8!o>3D-m{^u;6!x4KLEDN`^2$Qqa;&MGFd?+E$vftHUI?9vsO)b*$#LqN<0 zvmjxvr80tUilM~>*HDTZkqw_p2&c-vh@h{vkV5fPMso%BhrnDWXqfsW$7yj;v}KXw z;cVESFNFIpJMkv!2sJOcHn%2k^x}#-e~E z73C8m&J)650gZ?*=c0r2f2 zPG=zHu+K*aKN@NpREqJ2apX!d$-IZV&}PDC5U0jyWjp_iK&TSJqZC3mZ(1tGO-k)3 z2Udu=Fi(KGBbY6ssh;-28iT?JHsDV(u#xXgfZ<}boLuyrB2^9HN}ymNbdjCp6)9>| z`Iwrui!vipwJhcYDsU|8VEpZCOh_15d*NTDq3sDrRm`oWD9Kn~r7)#h*a`7~Dk9b% zcW8{VbUK1emz_z#PbtX~D&*o;k?69rsE{@1X1%}_NaRMsiIJ>_&~y?FyUJK8zlkLW zm&B9RB&$)V!9g|Yw~uA=@1LFVMrECzF{paY(o**h(@+0kg9o=eQ}hVK%2$Lh+BI#u$kL@ z&-rdoboMPXmugFSh~;HQyV`*a*NZbrN_>;`eNoIJoop;^)>VpWRqe~J91V2Z>Lyb% zwMYsmA}h5Ay@jxybgB&(h`=Z!jr~-v(y~N~{+srOsaE*^s4w(m}aFF zSjl0_B@#gXE6+O^&H3Wr51c}%6}MPiD`t3%FJwiO6yrz2^g=ZuhVXQjQeZW9@mqPy zGm$xGU4_qqivOpP)D~K|F55cqAZawNUL7G}*yX26xEiE&&r`lKWy9-p6(%4?)~@*V zPZ@(ejAdvON4U5YH_0l%fL>BmXA1K9E25_3zt%~*Taf15WLBUgnGeaQQ`qg!0I~NL zWjKBh)*6S4iqeQZhhgTv;}iRSJY}Xb9E?Cx|07^3d)Bv5SrPPqW<7 zGlrc5|LMdaiaP+2_G30E8PCFBlC_T_**G*#>gwPs<)&I$3ksqPB}kn6Djfx|On}wN zeLvvvo~R!Fq=Fd#(O#bz?q|wvH!BstkId|nk>w`GcnLz4!`{aJOEDfB!X?7eJEa|h zyyt`5^zCRue#)fbfRnA3zF78k#a4=yAso=jOrSS#2PVyMn`Vf>kHrUf&W{M&0`^MM zH}Fv2u#`ckXND-*;)sSxI{QSp7;IxEQ!`SLx$(zW4HMWB%t)EkB1odHs-)EtvPluD zBz2S;!hY6a#sLEOG46hTjQmL@P0>~9)E)Ix(UOT{R>}y&Nksv5XvH0sW=}a(1|j%b zy;Ea+2Yl}xhk7v&_AgMilmIRwBOJn}Wn?{Rg#t(67$1j-rJAT9djc>g#*+kpJY80D zG?oA&*AzR2peZ+LDulj7JNZj7zASgi@1w9KYn60I0|e%EG6jrA#Jkh? zgYYkX+%WPj0N#-lvUDYW!4UY{wqfB-6FETWT#JQ|H0#S~V5lywkzdLww$CtT?f`(~ z{E1_xpwv&pmln=u6rfS@E_(MH)8_uxl4HDQS)rolI}WNO7%Y-?2$w<5FT_;7y6T4^ zfb4xb!6@)-+$#+G*!#eW@d(;$#z}D{5(=9}dG$ew$Wc-xk?@~RXp;CR%S-84rk z3-gn7HY!x2?bLhWpLN})_$W1SO>sfP$sVK{^0OjRSG(Zx-(x3}Lx_Q-3|8XgZw+XO z$cYr!Shj$npQ{|gbB>Eta4iEp!HlIr&*C(qJbCM1;PtTI4k4Tn6M?6>e#T)`YblKA zKR7fFF)%em9y Date: Mon, 5 Aug 2024 10:35:58 +0000 Subject: [PATCH 20/31] modified the docs as per the latest changes in kernel --- ...entations_sobel_filter_kSize3_img150x150.png | Bin 0 -> 10661 bytes include/rppt_tensor_filter_augmentations.h | 9 ++++----- 2 files changed, 4 insertions(+), 5 deletions(-) create mode 100644 docs/data/doxygenOutputs/filter_augmentations_sobel_filter_kSize3_img150x150.png diff --git a/docs/data/doxygenOutputs/filter_augmentations_sobel_filter_kSize3_img150x150.png b/docs/data/doxygenOutputs/filter_augmentations_sobel_filter_kSize3_img150x150.png new file mode 100644 index 0000000000000000000000000000000000000000..c367b723316d56e258f3e7b6a3ed46b425bbed85 GIT binary patch literal 10661 zcmX9@2UHVX(+<6Z^xm6DlP*#sAWg&|7NiMCuaVvpiu5L+pddt~Dk31gNazR%Nbf>I z69Gv;!a_p$^M3!`v-j+ty*smKc4nTL=Uy&cu7j@JF)}p*k&%HwWTXRhxePJ@k(2#5 z{+r4F86qM9d)YShwY3OLFY3OLEsc9K#>FEC(q$@@SddB~5F1tYtw4gjt z9vK-c=&~OqOcGA@Kba&A|3ApcDJZF^X-MxfkQOvuA?c)`AZel^sUfY7COrpHvQn{$ zD(O+P-*=)B^XE{G%`Tx8*RSv4w4B69s5}dZqoe2I=Hca&l#;$Gb4^uEUE_x4O@rHp zM#d(lcOE=^^w{c&wT-ijtDC!rr&nN5a7buactre*gv6xel+>KuSFiK(-@GmOP+C@A zQCaoz)7OT^rskH`w)Wn>{(-@v@53Wg(=)Sk^9zehDD=kW*7nZt-9Ol)<9{c(Q{e3U zatTEHpV`15!j+3XM_Tzc9q@AwxPoW**XBjJgZR^ng#nr372ytw*hSwS;;Wq;$|R!R zjeq}n9^d|sZR4Hn@k~s3nOS(Z z!V9XCY*U)!$}+Zfs_4&=?5U=XqG|S;KEX=40}NDceOF(73G@AT36ioVTfBJ*TDv`E zx;4bZB9lw=aX@U`IsMs`GDKfOz_Qft`#^cBfb&Pc8Py5iWzP1Okz(xUsgJWyeOAOk zLBdKUUn=h>ro%*zO|m)@?62>98b(sWR58wJ3@UQ&_|MBJW!xbf7cS4g-z1A4)BLjW z3iZK)=idZ*;kE%g$fr!a*+qoi+|!KlTWK)9%g@trDYnrz_MpHpAOqi82RYS~J}FmT zVc))<{R%!R$LN;$>dw%nH0To4?a5{Y>EM0zbn^;d{sF7l+j~EU5}uYlbPKa%QQCmq z{YahT9-Iy@6%Xw5-GgMf3abW4P2Eg}01qxfLA%845GkGf5pj+e?df#(WC!W6xWeYg z+SCQwA)=5C6Zt;%Rb)K&p1(39Zz{{*e3C1OWE!+>Sr9wmH8*VYz#Uy8p$`XCq7#ok zEa|VR)NLbhp7(f`!x6Z7Ki#%+56R`2PdvB>V|qN%o2o2BReKOw^%d1`@E1tnC)<;a z-XAY=Cj-r;9`iWYXW7M(5w6`%8HxE?5g9d<8qssD<*jN=_RsC&>@fw01OHyFmi$&e zsrn9+__wCRd$C!49Ocu`%4$Uf<8o8OSru6;CjSPQE3J9;VTjP14>9*oJoymrwg+430wo;(0dpSZhn_2{`( z^NmUaC3@OhMoPJRrczVNKd*peKPFImJ*C_{k>E=&-;_^reZ$=0A_~6~vQu0=eoojemycb^&4DjVOwX1*TA7kn ztah2H&v*G%tGhep9&8gvt#fbEApN;1n}IK+w<%bTO&6zLHIQPUEncw z3AtwtF0x;#75vPFV61crdI9ghZ)YyF$*7ke z%Dd|zNH72tv5Kg~k5c(qY1DRU*~;L+oj)%-olFjX(r=pQQXDUn73-QZDShr9|NipJ zO>=eIBQ>UhYB5JqiQv`ZtBrAOxG>h;?D$T177Y_O24)#>Gd!C>8XLFBTFHybJUVm3PwL(!gunqwZdU;%+Pv z0;ay`N)m_oUf9&jY_E7MJ;bh>`ay2o&WMGMbcq8EctwIcAYZ%c0cItzRPE+B=RCU~ z?k;f;Y4L~li~1U5#*8zTU;HrqYihxsX-6tZJkd=#s6U>(TA@%E98?H_ud#Wle{Fq9 zJc+~ztMW(AY4P0RzIYjE;;1ChASw<7&c$;s^4jZ^>SVo<$sQ7knovn$G4I|u{%Au- z;p%EAUFY*aB6H)p=@*U`$0P|DdG3C-tXp^yq~f0Wy=-Hz_sz5-f^Sr}TEE6UdL1WM z9vbcwpfs0k__s!dzi?Dy<%Y>SpZkOatC_~?>Tv6_ZxUSV%j+Krh88yy{$=pA$w)Ze z0se{w(L)TB!gP9oaH$4CO^vTna&P-s! z1|N!o5V+kp90ttEr%^)(XPC5JBr8ETJ9!%``Rm=wy=;S@XMtHK8M~c)`%th87!ZdX zjB*p4E>V*0)764Ug;Oi8sB>cw`$2b_TP5m;$>Ib;#kid3|8@x!h4~C~B zss6Gg`AwX2r>n}A%{1Ds83?XWW=8Y-Mjfj)Kj~DzO8YnGp^Fjo<7eZMfu@_W-7E-m z#6`q&{4=JSin!>z@!%SAh7a(t@`m{vY67_DcNSWWofZspzx$`eRtmrxkyx z$+s^Jb~$#u=F^*X=H);&6%JpC3;a>;1y!Z<$L&aD-s#uGl^;1uyr~qZ9d~=-E}ZeW zZCcKnzQdRcHT0@=>|vz$GEY=%t(mAFxg-4&CbXv}aB%6>xmBEVwAY`$ySbJ#qCXER zI@NJi`kh}@@7(q%pAQt`7MF0VgQLi13r4*6yhpvO9D}WiW9pY6`9duN)>G4^g$eu0 zsjr7YI<|Km{d|SAkIGI<{GjqKj3dAM9l7oXnhI5FE5Gf{=)O34#tduvm$jdE38FH; zs0BO!UNglg4sgyzNr!XW?r=Dh-g-Qs4lYPa$O&y}G2yAA!bFdedxft$x z2@<}iVQ;r+AbFb5M3?2y^CK@6GHV!4bHIeBAXovymB#2x5Hrxxr>l;6z5HybUx;Ov z4XuTQykr~xS1XZ^a?eDWNOoa#{>eXIXtDNHcZtmLRJ#Nv6yLy1KKKZDJv!zNJs0fG zXZTBVXJ_lK>1LX5S+Ku`>P~Cq?UAQoom;oU%eW@Oe^hU0U+l}@J>am6E)qBCy5II) z&QO~{l#BgQrt=!@mjw3aJQ@_YWPCSxu3BD>JnR$qy`R+8s5bGlqsjUwtGZ>tC8^G!d>pm?)%?{!&gGw$1ZW+1W4rZ-ANVRkwhRcwbPTR>Hof}U$zNd* zcwK)J63x{9iKTO(JlKN9GSHr@60Z()m}5B+FM@8t5!@D;`6mA-S3=oYlJKOvfJ1m?o|)loS#}k zn`26QG<0uX)DYQLJ?G86CfHYjUe4BV_01kAG)FLbkXs;F0%WqI&5r@2+rzr%2ZX5z zVDzXxza?NCj8*?24(6CnLAo#XE&|CKiS#u_?cQ+*XbQNqadj#T5%``VDCpT zgm}Shvi6V(m{_I7a|3Z-qHOj~2+*%@WA{1I{iclN|Styf9R z&mb`-5I}k&KFR%Vm@ zn_c@33K(tU(zN?f?0X6FMJ$YNBn%}S9umZ{6n!vi;Jz^75m)h*`UVG%u=BnjDe``5 zTFHOq*JBU*OnHkX3L{meO!OPH9b{pX2wH948p0E7UMcmQ_fS0JZgHfU_~%$>E$fhVruP?bNk8Ti1PtKiVZ9DbhY4!cYs<3T!mLD^u+!dl zWL%f{uDjW+$eS?eL)M~u427Bpmfo(ZPtdQmKflf4X6TDrfyBW*Aqy~vB?ENrz8T(N z_wXZut@sku&u82R5d{2JDbKk|pdlM~=0!6l?S}jr%{XUF^i8DMnKHjjY-tozn2Y!F zA<}etyo9K*(h2`ojd}-{A=4)T!3>KEFA`iX&J!&vq?mb%-U@5nJr7NTX#x{OxRv;S5<&9+MVg_z_&)-zpI}FqM70_*F9lx`mGK5tI!zFe^-qgL2qOaG~t{p-y*P_7( z2Bp$5s*15Nil4r0ye_HcEWLF@k&M@V3`H-3Z#~>XfMquw!yk~pDl6`n=Cn87b zTb76jnbEyjvasdYCd}G~RYDhUSgAQy_wBl;n)#h&v*M{$HU8FZlc)t7o|tRz)u)!b z9ZgOS?6}&XwLRq*Ux_Er66K#gd1#~0Dztu8xjlhi`TgJxibS4)Nca6wynfgQ?%`CF z;U`ZU`4t<*C z5t}tSdC1R@F5A}5X373>vz+>!ZJMeR&iXRqEWGg;Hga=qKhz1!ZP5Wlqt|%)&^l1# zj^G^b8I+ztphOP&WIfL@`Adl-^mvGj5ESh|^UKpsJ*A{zZTMOiWtD00pXX&e62Uuh zI>VUot*=2e9 z(Sd?*vkApT{?)e?4;>rAY^>RSws$6HS_K|lWB!%cs$(>1CHRi)mers^hKn5Ia`zQ+nqZhqPbV;rtnO4fA7-Q!Ub7%V5&= zn;8>>Mf^ne$x=?4EOAsxLnqWk#NAP9<90vvXQ7tWV!S=fH z*9?NqNISJ2C91&v8^R>M@xgFGo zef$<$vic`EM+Fipnqyx0TsifKP~8-oV;(I?0#rp3O5XBv_<9NI8`HW2=wMa)pNf(i zR~5+DpPkUp(n(HDn1S+>Uf!{?o4Iywe;!C2K^eF>)2Kz#Sr9O7Vs9E|{t@O^0wx!I z*ozRhTMcu#JFD0z3t=1P><*Xv@OsJASiu>Tt(a8$3?X6f=1g0P42QA&8lUw>rL zP&Ut}B76zW3E`T0I6AZ2KibUqPtC=BDe{N348wL}2uv7D#RMYbbR#6OSQ-n->a^bA z^_2^AqV}@@NrrUCStVA$bcmvAd)D=)Uv^Jh~{C!3kJPhB2fk3Z4WFl12NC&GOI3P=gZ zfuX`n;yn&5mmuf0r9%`lvr`ANfs*6nPCqFkC))Z-_x)V9S@nCfvZ7zt@@8D1)$Mox zzi#vi1Q6NjPL2}wtn_lg*j|DbPR+2<&F?04V-Vac0{roH0{db#7BEMua_}W`t1y3c<`Y`=*LA9F3H-2Rt;YL1z6R|$gzm{zI#B&b8`0s+A z;lhiFO)nxYbmhw14|$6fRwk1g*M)Vxivc~nF1&|DO_ho947;RyY*Ys;SE`g7lacw;!sD^AI<-u8N)*01g{gzJ08aXk-hUb6&l`q zhUC?M&QA6d80u`?HVM6P_OQQAsu4zV0THBo7JJws1X#l+;Z^`cOeN}XM#T0i|AUU1 zu^+f0|Anv0i7f$b@~!D?WL}FGr7$w$5WMb`z`ooIrUkpMrSicC71O}%O|@)&T*&o2 z+y1WUVl7*E&eey*4Wr}zKG)sfIKE^GS;Ihg2<*;8dU1>vT5%Avq+)TAeZoR%kKErw z>nL^;6@ezNpc)0Hklp=)xpl)(w!5NNjDwz=Hzz zkF5<`+i*x~ER@Ykd8#oj&)rWHY&R_(m``kM-KS0E_dqF9BxUE1xkhKdR+7$!_gE09 z@UkSB`7sT5g~q}gPsS2om#eiD`J?k#J(o6CkqC+6=nht2(Ug}G4~$B8C?dE9qI=Ma zi-knGa8k*WgxN>dyS5|*AyO|vLTb<}%Ro|p^O8fK>>=x#jg7I7P2auvaRVr$%9W$< zl5bcAa{R2-`^0?1N{I}GXz=3Eh8bp_z>Xx+8Di$mJEyh_g!#M*irz)2WfZf#VO2P0 zR6h$TJoVL%(^gRVW*9YwcYyZ{2xA`WL=KLk9mq-LyMZoKl+x5+#}>NvS{l!~u-MNP zSFRS0b=WH$!(4ua<@@n~m{n{X$dEpVfJQAYyaSWpk9^@!7dkLzHpoZONaUH;HpdE^ za^;<(m0>>>-giaD^cpgB8Xmo*y$h0^=zOq2D(s}Q1|W;m)_^r$k`RD7?dDfZvstl1 zvaO**Q~Wv&s>KGUW%U^IhEv4kRVyR6-Uzq|8P^l20Tq&wJT6i$LC+MI*m3spM24*O zHO?NqUf6@Xb)MpYdlhslm`OLhyy;zM^)A@EgIp3o&c$Gc4`0p?;0uq$%Gc}PeGIr|T>JiXt{ zm)LN!mXBwA)Y7R+>_NnqFzUdC@s4A!z4K0CD%;sXpY zarRQ%uFNA@d|vnOwBCENtJ$&KB50NspOGalkx*FTdNH-@`yRXkHKnZFN3}G*3m*0dQy3 z!48{6plLE|4p^OA;s=N-Gis_y zejwTwwwr&SsU!@`neG*wXPJ|1EJO0Q7XW(&@uvf+X--ldgWz6a|K_y_({{1$g~Y*u zTl)1jKlf&_-f`VtL%QM&6R$%zdI&B zbEFh5;mN-(_hT$?+*I?>p~QhgS0!wjNH2(~-4Nd2|D_VEdyQ1&^blF=CmZwX<)Q*q zjf=Xb8K|vprrNxAkfhUK;7>5Mlk-hN)h>kg64@^@gM@{{jQjYY1GU2D=;Y5x6z7cE zqWr%iwp|`F!5`joLV0gMg;^_I%(1jIBqc@A8=edc3aR$GwR47zdmbX6rC!Uz-Y(k9O$aV`1r3j?%|U zr)x`cWxXF}ILLEp>1=6Lwi~H{$a9q0CR_BhiT{2AUav!9?n2_5BM^yQY$U6bcX8|j z(_*b%v%zaA_qh`mdF1pE_l_-n$XQ;U(L6sD((oqXp&WNQ1NOEkSpR=xaVy1_pi z&ls~;2&UMk(-gs*c{v6F`ghhr9O1XusX*Jc1OY6GYk=U^<|cX}3xzn?d{c(PcRgs; zE}U7L%%^U-vpk2lwlZ;Hz$bkK7H-W>yLP=td|KO+QkHlSRBDF%GKT|MHDmx z-kA5X5{Ns}3}*7%omN^NOCv)P z4VYKXSoUMl6AOx{{C+`cQ(ZbNq<@0T+X}O5qgpjlvR{ljqfE*8QU3{Tq%ZXA?%(lf zm0;+S87^`Xrf!T4?1fVVd#5c1YYin?&J*;f%_?p6xMt0uqRxrI%i9WP&VHb7O1IWE zCgPls4B(0xS(;uSiq{ne?A7e`_sp{%=<+po&VMe=kT0r;JbUKZ4R5?cf8emvJWCa5 zH#7)cn%{sARWHj5^G4~Nf;h|%Y7<^< ztB$=jjHxgnWxQX+`Fe-I$xmdkUOx-gGWqKpoRuKl@HFe9A#9A%{d?NMNP6?M`a&B2 zC28B;r|{3rU= zf}zq=<#i1Rjw2?SoMGMqYs1;*1ulhpX8 zaX5d}SM$R(p;mWQtlvy<7(q99Z+l{R@=xANc4*1(?qRVrx9~|9w@tzB>stJ=BkSuk z?fo|8Kd_@{^Lu7#Dfx8Xl(CybJ|`5r!<89AW|5)3hIw;jv1&D)@nb&lFvKOuswsrT z*mi?6g_{)z1zB)D7x^&m)jgs(mh-&NsdPbv>J?*S!$|+N+i7i^dHSrJrV{jcmaBgY$_yvthuqRo4Ww~;?%KB>F?2BH4E)Q8XrT6 zY@zvMvj=l(Sv=nPFfr`Y8nxBc(YMfAKY_;d2xmmMa~%&4`TH4-CEJ?Jbq3U`@iVW6 z&(1zZ795T(G!myZ=FI+_V+*3=z^ROQcKj&IgxWMg9w^0X-~xg2GJ-FlxDk2C3=|s7 za2=b>1@+!;&Q;*KqSrGKEb@^uPH6yE2K%|zluuxzA=1;}Spfsw`-=wEOAxmQQ5u75 zss!Boi}?UkPdBwyF`Sv%Vz%!_X916*wvNW)U$Y5S3skxobe!Z7zQ zS{IKiz$Wz}7ME|$`Lyg-W3vY5&S53dpL~{~>lVmFCGVb+5oi8= zbfq-t#j%6timhj!xw=3X$c~2hL3ZprfnsKp?t=r1u}^EO$k(hs4cHdR!k_Fjb2(x9 z{QT6j+SZ?q-Ao^F78ISbGehW=*>!6&BOj$meJ#1k-E(i7@Qq*M0b|XlM-Cg@1D7Dqs0bjDsEU=$d|NrR zRo!%EX+M9+vZq5kwrQv*genRxhN-ul?Id~+FoTjuFf7*lNa;&@bc?OdAH7^ zq#7stQSBE-%Q2v+veT0RSZf%b$V>-VVZ2c@U7(9XZ4Gph5Ys&dvkHtxFPLH%OdF-) zDbi%^RE+jjK72ga#2qf-p!RI05o4)S(iP$b)S|NpoVK`b>~_{CL(K6L%xUkKtaUEz zBTJtk_r6WVdxBg|_?LUf-Zrr>+dOsW;krl9!{}+oBLX~|adl==?{dwxT9Zfa%}}Ig zzf=0h!)fQ0tDiGe`Kg~y^4-;6#ubr|j5}I$d0%0tX%OQz9uY1#%YjsDYf+Z`CFSMl zZ+~#5Xj;*4Iir0xQ6c{B&wfl+n`gfGOChd13o4k?%)|6<=Fuv5=ZQG`-ylAEeEgsj zaSD|pch%2)v)wVVD86n*P~$$(TxVHa?9^8c#v;;uRKwLA8o~z`>{g8F^4+%9T3{i6 zd53H+e6S>J*uTI;9O7axVqjMQm402Ji?>LZZ%j?_%$o`spVGjb1Q=Ez)PKR){iByZ z0?79`2$nH232nD6ORsV*R%;+sTzm$5Bhul`h=GpUOj4y>+Xz%P*EdgT%c2l03k>r7 zz3n=!vhHQOA;}@ui?*W60%kd8+L;rw!{TSb)ny^McShS%$V?9oAG^F<{QW^`qG0Ep zGv|FCW49>~gX)`M&_-6o3!#8MuuV&N*gUuT)WPysjz_!`6tPx83?D{=lEfoiHkJ#t YZ_h!u*7`h3Tpm%;_Rt6h$X+h~9{> + * \details The sobel filter augmentation runs for a batch of RGB(3 channel) / greyscale(1 channel) images with NCHW tensor layout.
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127). * - dstPtr depth ranges - Will be same depth as srcPtr. * \image html img150x150.png Sample Input * \image html filter_augmentations_sobel_filter_kSize3_img150x150.png Sample 3x3 Output - * \image html filter_augmentations_sobel_filter_kSize5_img150x150.png Sample 5x5 Output - * \image html filter_augmentations_sobel_filter_kSize7_img150x150.png Sample 7x7 Output * \param [in] srcPtr source tensor in HOST memory * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1/3) * \param [out] dstPtr destination tensor in HOST memory - * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = same as that of srcDescPtr) - * \param [in] kernelSize kernel size for sobel filter (a single Rpp32u odd number with kernelSize = 3/5/7/9 that applies to all images in the batch) + * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1) + * \param [in] sobelType sobel type for sobel filter (a single Rpp32u number with sobelType = 0 (X Gradient) / 1 (Y Gradient) / 2 (XY Gradient) that applies to all images in the batch) + * \param [in] kernelSize kernel size for sobel filter (a single Rpp32u odd number with kernelSize = 3/5/7 that applies to all images in the batch) * \param [in] roiTensorSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) * \param [in] rppHandle RPP HOST handle created with \ref rppCreateWithBatchSize() From a679b561dca8530a54b9568cdcb10ec421a2ac49 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Mon, 5 Aug 2024 10:54:21 +0000 Subject: [PATCH 21/31] fixed variable names in helper functions added in test suite --- utilities/test_suite/HIP/Tensor_hip.cpp | 3 +- utilities/test_suite/rpp_test_suite_common.h | 40 ++++++++++++++------ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/utilities/test_suite/HIP/Tensor_hip.cpp b/utilities/test_suite/HIP/Tensor_hip.cpp index ec1b47d9b..bec475ac6 100644 --- a/utilities/test_suite/HIP/Tensor_hip.cpp +++ b/utilities/test_suite/HIP/Tensor_hip.cpp @@ -188,6 +188,7 @@ int main(int argc, char **argv) RpptInterpolationType interpolationType = RpptInterpolationType::BILINEAR; std::string interpolationTypeName = ""; std::string noiseTypeName = ""; + string kernelSizeAndGradientName = ""; if (kernelSizeCase) { char additionalParam_char[2]; @@ -1568,7 +1569,7 @@ int main(int argc, char **argv) 3.source and destination layout are the same 4.augmentation case does not generate random output*/ if(qaFlag && inputBitDepth == 0 && ((srcDescPtr->layout == dstDescPtr->layout) || pln1OutTypeCase) && !(randomOutputCase) && !(nonQACase)) - compare_output(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, testCase, dst, scriptPath); + compare_output(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, kernelSizeAndGradientName, testCase, dst, scriptPath); // Calculate exact dstROI in XYWH format for OpenCV dump if (roiTypeSrc == RpptRoiType::LTRB) diff --git a/utilities/test_suite/rpp_test_suite_common.h b/utilities/test_suite/rpp_test_suite_common.h index 59d0ff749..a1caf40e2 100644 --- a/utilities/test_suite/rpp_test_suite_common.h +++ b/utilities/test_suite/rpp_test_suite_common.h @@ -255,10 +255,10 @@ inline std::string get_gradient_type(unsigned int val) } // returns the interpolation type used for image resizing or scaling operations. -inline std::string get_kernel_size_and_gradient_type(unsigned int val, Rpp32u &kernelSize, Rpp32u &GradientType) +inline std::string get_kernel_size_and_gradient_type(unsigned int val, Rpp32u &kernelSize, Rpp32u &gradientType) { unsigned int x = val / 3; - GradientType = val % 3; + gradientType = val % 3; switch(x) { case 0: @@ -270,14 +270,11 @@ inline std::string get_kernel_size_and_gradient_type(unsigned int val, Rpp32u &k case 2: kernelSize = 7; break; - case 3: - kernelSize = 9; - break; default: kernelSize = 3; break; } - return ("_kernelSize" + std::to_string(kernelSize) + "_Gradient" + get_gradient_type(GradientType)); + return ("_kernelSize" + std::to_string(kernelSize) + "_gradient" + get_gradient_type(gradientType)); } // returns number of input channels according to layout type @@ -1014,6 +1011,27 @@ inline void read_bin_file(string refFile, T *binaryContent) fclose(fp); } +// returns the size of binary file passed +inline long get_bin_file_size(string refFile) +{ + FILE *fp; + fp = fopen(refFile.c_str(), "rb"); + if(!fp) + { + std::cout << "\n unable to open file : "<::const_iterator imagesNamesStart, RpptImagePatch *dstImgSizes, int maxImageDump) { @@ -1149,7 +1167,6 @@ inline void compare_output(T* output, string funcName, RpptDescPtr srcDescPtr, R refOutputHeight = GOLDEN_OUTPUT_MAX_HEIGHT; } int refOutputSize = refOutputHeight * refOutputWidth * dstDescPtr->c; - Rpp64u binOutputSize = refOutputHeight * refOutputWidth * dstDescPtr->n * 6; int pln1RefStride = dstDescPtr->strides.nStride * dstDescPtr->n * 3; string dataType[4] = {"_u8_", "_f16_", "_f32_", "_i8_"}; @@ -1212,15 +1229,16 @@ inline void compare_output(T* output, string funcName, RpptDescPtr srcDescPtr, R else if(testCase == 50) { func += kernelSizeAndGradientName; - Rpp32u kernelSize, GradientType; - get_kernel_size_and_gradient_type(additionalParam, kernelSize, GradientType); + Rpp32u kernelSize, gradientType; + get_kernel_size_and_gradient_type(additionalParam, kernelSize, gradientType); binFile += "_kernelSize" + std::to_string(kernelSize); - pln1RefStride += (GradientType * dstDescPtr->strides.nStride * dstDescPtr->n); + pln1RefStride += (gradientType * dstDescPtr->strides.nStride * dstDescPtr->n); } refFile = scriptPath + "/../REFERENCE_OUTPUT/" + funcName + "/"+ binFile + ".bin"; int fileMatch = 0; - Rpp8u *binaryContent = (Rpp8u *)malloc(binOutputSize * sizeof(Rpp8u)); + Rpp64u binOutputSize = get_bin_file_size(refFile); + Rpp8u *binaryContent = static_cast(malloc(binOutputSize * sizeof(Rpp8u))); read_bin_file(refFile, binaryContent); if(dstDescPtr->layout == RpptLayout::NHWC) From 0cac8e68fd7113c2fd74a88f5261b5851aca90e4 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Mon, 5 Aug 2024 11:02:01 +0000 Subject: [PATCH 22/31] added validation checks for sobelType and kernelSize --- src/modules/cpu/host_tensor_filter_augmentations.hpp | 2 +- src/modules/rppt_tensor_filter_augmentations.cpp | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/modules/cpu/host_tensor_filter_augmentations.hpp b/src/modules/cpu/host_tensor_filter_augmentations.hpp index c453e3b65..3b161c32a 100644 --- a/src/modules/cpu/host_tensor_filter_augmentations.hpp +++ b/src/modules/cpu/host_tensor_filter_augmentations.hpp @@ -27,4 +27,4 @@ SOFTWARE. #include "kernel/sobel_filter.hpp" -#endif // HOST_TENSOR_FILTER_AUGMENTATIONS_HPP \ No newline at end of file +#endif // HOST_TENSOR_FILTER_AUGMENTATIONS_HPP diff --git a/src/modules/rppt_tensor_filter_augmentations.cpp b/src/modules/rppt_tensor_filter_augmentations.cpp index 2f1a4ec1b..b821d21a5 100644 --- a/src/modules/rppt_tensor_filter_augmentations.cpp +++ b/src/modules/rppt_tensor_filter_augmentations.cpp @@ -44,6 +44,11 @@ RppStatus rppt_sobel_filter_host(RppPtr_t srcPtr, RpptRoiType roiType, rppHandle_t rppHandle) { + if ((kernelSize != 3) && (kernelSize != 5) && (kernelSize != 7)) + return RPP_ERROR_INVALID_ARGUMENTS; + if ((sobelType != 0) && (sobelType != 1) && (sobelType != 2)) + return RPP_ERROR_INVALID_ARGUMENTS; + // convert image to grey scale if input is RGB image RppPtr_t tempPtr = srcPtr; if (srcDescPtr->c == 3) From 4543311da6de3739d970118ff30b099e09ec16a7 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Mon, 5 Aug 2024 11:11:21 +0000 Subject: [PATCH 23/31] reverted unwanted changes added in rpp_cpu_simd.hpp --- src/include/cpu/rpp_cpu_simd.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/include/cpu/rpp_cpu_simd.hpp b/src/include/cpu/rpp_cpu_simd.hpp index 17aea7c26..c14699a50 100644 --- a/src/include/cpu/rpp_cpu_simd.hpp +++ b/src/include/cpu/rpp_cpu_simd.hpp @@ -1045,13 +1045,13 @@ inline void rpp_glitch_load24_f32pkd3_to_f32pln3_avx(Rpp32f *srcPtr, __m256 *p, { __m128 p128[8]; Rpp32f *srcPtrTemp = srcPtr + srcLocs[0]; - p[0] = _mm256_setr_ps(*srcPtrTemp, *(srcPtrTemp + 3), *(srcPtrTemp + 6), *(srcPtrTemp + 9), + p[0] = _mm256_setr_ps(*srcPtrTemp, *(srcPtrTemp + 3), *(srcPtrTemp + 6), *(srcPtrTemp + 9), *(srcPtrTemp + 12), *(srcPtrTemp + 15), *(srcPtrTemp + 18), *(srcPtrTemp + 21)); srcPtrTemp = srcPtr + srcLocs[1]; - p[1] = _mm256_setr_ps(*(srcPtrTemp + 1), *(srcPtrTemp + 4), *(srcPtrTemp + 7), *(srcPtrTemp + 10), + p[1] = _mm256_setr_ps(*(srcPtrTemp + 1), *(srcPtrTemp + 4), *(srcPtrTemp + 7), *(srcPtrTemp + 10), *(srcPtrTemp + 13), *(srcPtrTemp + 16), *(srcPtrTemp + 19), *(srcPtrTemp + 22)); srcPtrTemp = srcPtr + srcLocs[2]; - p[2] = _mm256_setr_ps(*(srcPtrTemp + 2), *(srcPtrTemp + 5), *(srcPtrTemp + 8), *(srcPtrTemp + 11), + p[2] = _mm256_setr_ps(*(srcPtrTemp + 2), *(srcPtrTemp + 5), *(srcPtrTemp + 8), *(srcPtrTemp + 11), *(srcPtrTemp + 14), *(srcPtrTemp + 17), *(srcPtrTemp + 20), *(srcPtrTemp + 23)); } @@ -1099,7 +1099,7 @@ inline void rpp_glitch_load30_i8pkd3_to_i8pkd3_avx(Rpp8s *srcPtr, int * srcLocs, inline void rpp_glitch_load6_f32pkd3_to_f32pkd3_avx(Rpp32f *srcPtr, int * srcLocs, __m256 &p) { - p =_mm256_setr_ps(*(srcPtr + srcLocs[0]), *(srcPtr + srcLocs[1] + 1), *(srcPtr + srcLocs[2] + 2), *(srcPtr + srcLocs[0] + 3), + p =_mm256_setr_ps(*(srcPtr + srcLocs[0]), *(srcPtr + srcLocs[1] + 1), *(srcPtr + srcLocs[2] + 2), *(srcPtr + srcLocs[0] + 3), *(srcPtr + srcLocs[1] + 4), *(srcPtr + srcLocs[2] + 5), 0.0f, 0.0f); } @@ -3885,8 +3885,8 @@ inline void rpp_resize_nn_load_u8pkd3(Rpp8u *srcRowPtrsForInterp, Rpp32s *loc, _ template inline void rpp_resize_nn_extract_pkd3_avx(T *srcRowPtrsForInterp, Rpp32s *loc, __m256i &p) { - p = _mm256_setr_epi8(*(srcRowPtrsForInterp + loc[0]), *(srcRowPtrsForInterp + loc[0] + 1), *(srcRowPtrsForInterp + loc[0] + 2), - *(srcRowPtrsForInterp + loc[1]), *(srcRowPtrsForInterp + loc[1] + 1), *(srcRowPtrsForInterp + loc[1] + 2), + p = _mm256_setr_epi8(*(srcRowPtrsForInterp + loc[0]), *(srcRowPtrsForInterp + loc[0] + 1), *(srcRowPtrsForInterp + loc[0] + 2), + *(srcRowPtrsForInterp + loc[1]), *(srcRowPtrsForInterp + loc[1] + 1), *(srcRowPtrsForInterp + loc[1] + 2), *(srcRowPtrsForInterp + loc[2]), *(srcRowPtrsForInterp + loc[2] + 1), *(srcRowPtrsForInterp + loc[2] + 2), *(srcRowPtrsForInterp + loc[3]), *(srcRowPtrsForInterp + loc[3] + 1), *(srcRowPtrsForInterp + loc[3] + 2), *(srcRowPtrsForInterp + loc[4]), *(srcRowPtrsForInterp + loc[4] + 1), *(srcRowPtrsForInterp + loc[4] + 2), @@ -3911,7 +3911,7 @@ inline void rpp_resize_nn_load_u8pln1(Rpp8u *srcRowPtrsForInterp, Rpp32s *loc, _ template inline void rpp_resize_nn_extract_pln1_avx(T *srcRowPtrsForInterp, Rpp32s *loc, __m256i &p) { - p = _mm256_setr_epi8(*(srcRowPtrsForInterp + loc[0]), *(srcRowPtrsForInterp + loc[1]), + p = _mm256_setr_epi8(*(srcRowPtrsForInterp + loc[0]), *(srcRowPtrsForInterp + loc[1]), *(srcRowPtrsForInterp + loc[2]), *(srcRowPtrsForInterp + loc[3]), *(srcRowPtrsForInterp + loc[4]), *(srcRowPtrsForInterp + loc[5]), *(srcRowPtrsForInterp + loc[6]), *(srcRowPtrsForInterp + loc[7]), From 0320eae59dad3d508e266880ea5ca3272fb1c3a1 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Mon, 5 Aug 2024 12:37:08 +0000 Subject: [PATCH 24/31] added blank line at EOF for sobel_filter.hpp --- src/modules/cpu/kernel/sobel_filter.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/modules/cpu/kernel/sobel_filter.hpp b/src/modules/cpu/kernel/sobel_filter.hpp index 9b4e076ce..d5243649c 100644 --- a/src/modules/cpu/kernel/sobel_filter.hpp +++ b/src/modules/cpu/kernel/sobel_filter.hpp @@ -977,4 +977,4 @@ RppStatus sobel_filter_host_tensor(T *srcPtr, } return RPP_SUCCESS; -} \ No newline at end of file +} From b0101cd3ab2c4b3824405e09357b928e1f7f3ccf Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Fri, 30 Aug 2024 10:12:19 +0000 Subject: [PATCH 25/31] added the required version changes --- CHANGELOG.md | 9 ++++++++- CMakeLists.txt | 2 +- include/rpp_version.h | 4 ++-- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8fadf597..3649793a3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,14 @@ # Changelog for RPP Full documentation for RPP is available at [https://rocm.docs.amd.com/projects/rpp/en/latest](https://rocm.docs.amd.com/projects/rpp/en/latest) - + +## RPP 1.10.0 (unreleased) + +### Changes + +* RPP Tensor Sobel Filter support on HOST + + ## RPP 1.9.1 for ROCm 6.3.0 ### Changes diff --git a/CMakeLists.txt b/CMakeLists.txt index 7ef88e169..9726b2365 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,7 +29,7 @@ endif() set(CMAKE_CXX_STANDARD 17) # RPP Version -set(VERSION "1.9.1") +set(VERSION "1.10.0") # Set Project Version and Language project(rpp VERSION ${VERSION} LANGUAGES CXX) diff --git a/include/rpp_version.h b/include/rpp_version.h index 79e0b248d..94ba02c27 100644 --- a/include/rpp_version.h +++ b/include/rpp_version.h @@ -39,8 +39,8 @@ extern "C" { #endif // NOTE: IMPORTANT: Match the version with CMakelists.txt version #define RPP_VERSION_MAJOR 1 -#define RPP_VERSION_MINOR 9 -#define RPP_VERSION_PATCH 1 +#define RPP_VERSION_MINOR 10 +#define RPP_VERSION_PATCH 0 #ifdef __cplusplus } #endif From 7749560ccedcd1a7c3e166d4d60266e4b116cf59 Mon Sep 17 00:00:00 2001 From: sampath1117 Date: Fri, 30 Aug 2024 10:19:20 +0000 Subject: [PATCH 26/31] added validation for dst channels 3 fixed documentation --- include/rppt_tensor_filter_augmentations.h | 6 +++--- src/modules/rppt_tensor_filter_augmentations.cpp | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/rppt_tensor_filter_augmentations.h b/include/rppt_tensor_filter_augmentations.h index 1ecfbe678..f821df844 100644 --- a/include/rppt_tensor_filter_augmentations.h +++ b/include/rppt_tensor_filter_augmentations.h @@ -93,8 +93,8 @@ RppStatus rppt_box_filter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t RppStatus rppt_gaussian_filter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32f *stdDevTensor, Rpp32u kernelSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle); #endif // GPU_SUPPORT -/*! \brief Sobel Filter augmentation on HOST backend for a NCHW layout tensor - * \details The sobel filter augmentation runs for a batch of RGB(3 channel) / greyscale(1 channel) images with NCHW tensor layout.
+/*! \brief Sobel Filter augmentation on HOST backend for a NHWC/NCHW layout tensor + * \details The sobel filter augmentation runs for a batch of RGB(3 channel) / greyscale(1 channel) images with NHWC/NCHW tensor layout.
* - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127). * - dstPtr depth ranges - Will be same depth as srcPtr. * \image html img150x150.png Sample Input @@ -105,7 +105,7 @@ RppStatus rppt_gaussian_filter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppP * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1) * \param [in] sobelType sobel type for sobel filter (a single Rpp32u number with sobelType = 0 (X Gradient) / 1 (Y Gradient) / 2 (XY Gradient) that applies to all images in the batch) * \param [in] kernelSize kernel size for sobel filter (a single Rpp32u odd number with kernelSize = 3/5/7 that applies to all images in the batch) - * \param [in] roiTensorSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) + * \param [in] roiTensorPtrSrc ROI data in HOST memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) * \param [in] rppHandle RPP HOST handle created with \ref rppCreateWithBatchSize() * \return A \ref RppStatus enumeration. diff --git a/src/modules/rppt_tensor_filter_augmentations.cpp b/src/modules/rppt_tensor_filter_augmentations.cpp index b821d21a5..671b60f4c 100644 --- a/src/modules/rppt_tensor_filter_augmentations.cpp +++ b/src/modules/rppt_tensor_filter_augmentations.cpp @@ -48,6 +48,8 @@ RppStatus rppt_sobel_filter_host(RppPtr_t srcPtr, return RPP_ERROR_INVALID_ARGUMENTS; if ((sobelType != 0) && (sobelType != 1) && (sobelType != 2)) return RPP_ERROR_INVALID_ARGUMENTS; + if (dstDescPtr->c == 3) + return RPP_ERROR_INVALID_DST_CHANNELS; // convert image to grey scale if input is RGB image RppPtr_t tempPtr = srcPtr; From ef9bb75773ed93c83778fa9765fdb6e4861250ce Mon Sep 17 00:00:00 2001 From: HazarathKumarM Date: Fri, 20 Sep 2024 03:53:30 +0000 Subject: [PATCH 27/31] Add 3x3 intial HIP implementation --- include/rppt_tensor_filter_augmentations.h | 23 ++ .../hip/hip_tensor_filter_augmentations.hpp | 1 + src/modules/hip/kernel/sobel_filter.hpp | 309 ++++++++++++++++++ .../rppt_tensor_filter_augmentations.cpp | 84 +++++ utilities/test_suite/HIP/Tensor_hip.cpp | 29 +- utilities/test_suite/HIP/runTests.py | 12 +- utilities/test_suite/rpp_test_suite_common.h | 42 +-- 7 files changed, 473 insertions(+), 27 deletions(-) create mode 100644 src/modules/hip/kernel/sobel_filter.hpp diff --git a/include/rppt_tensor_filter_augmentations.h b/include/rppt_tensor_filter_augmentations.h index f821df844..f69ccce01 100644 --- a/include/rppt_tensor_filter_augmentations.h +++ b/include/rppt_tensor_filter_augmentations.h @@ -114,6 +114,29 @@ RppStatus rppt_gaussian_filter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppP */ RppStatus rppt_sobel_filter_host(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32u sobelType, Rpp32u kernelSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle); +#ifdef GPU_SUPPORT +/*! \brief Sobel Filter augmentation on HIP backend for a NHWC/NCHW layout tensor + * \details The sobel filter augmentation runs for a batch of RGB(3 channel) / greyscale(1 channel) images with NHWC/NCHW tensor layout.
+ * - srcPtr depth ranges - Rpp8u (0 to 255), Rpp16f (0 to 1), Rpp32f (0 to 1), Rpp8s (-128 to 127). + * - dstPtr depth ranges - Will be same depth as srcPtr. + * \image html img150x150.png Sample Input + * \image html filter_augmentations_sobel_filter_kSize3_img150x150.png Sample 3x3 Output + * \param [in] srcPtr source tensor in HIP memory + * \param [in] srcDescPtr source tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW, c = 1/3) + * \param [out] dstPtr destination tensor in HIP memory + * \param [in] dstDescPtr destination tensor descriptor (Restrictions - numDims = 4, offsetInBytes >= 0, dataType = U8/F16/F32/I8, layout = NCHW/NHWC, c = 1) + * \param [in] sobelType sobel type for sobel filter (a single Rpp32u number with sobelType = 0 (X Gradient) / 1 (Y Gradient) / 2 (XY Gradient) that applies to all images in the batch) + * \param [in] kernelSize kernel size for sobel filter (a single Rpp32u odd number with kernelSize = 3/5/7 that applies to all images in the batch) + * \param [in] roiTensorPtrSrc ROI data in HIP memory, for each image in source tensor (2D tensor of size batchSize * 4, in either format - XYWH(xy.x, xy.y, roiWidth, roiHeight) or LTRB(lt.x, lt.y, rb.x, rb.y)) + * \param [in] roiType ROI type used (RpptRoiType::XYWH or RpptRoiType::LTRB) + * \param [in] rppHandle RPP HIP handle created with \ref rppCreateWithStreamAndBatchSize() + * \return A \ref RppStatus enumeration. + * \retval RPP_SUCCESS Successful completion. + * \retval RPP_ERROR* Unsuccessful completion. + */ +RppStatus rppt_sobel_filter_gpu(RppPtr_t srcPtr, RpptDescPtr srcDescPtr, RppPtr_t dstPtr, RpptDescPtr dstDescPtr, Rpp32u sobelType, Rpp32u kernelSize, RpptROIPtr roiTensorPtrSrc, RpptRoiType roiType, rppHandle_t rppHandle); +#endif // GPU_SUPPORT + /*! @} */ diff --git a/src/modules/hip/hip_tensor_filter_augmentations.hpp b/src/modules/hip/hip_tensor_filter_augmentations.hpp index 367d29722..de14d7e28 100644 --- a/src/modules/hip/hip_tensor_filter_augmentations.hpp +++ b/src/modules/hip/hip_tensor_filter_augmentations.hpp @@ -27,5 +27,6 @@ SOFTWARE. #include "kernel/box_filter.hpp" #include "kernel/gaussian_filter.hpp" +#include "kernel/sobel_filter.hpp" #endif // HIP_TENSOR_FILTER_AUGMENTATIONS_HPP diff --git a/src/modules/hip/kernel/sobel_filter.hpp b/src/modules/hip/kernel/sobel_filter.hpp new file mode 100644 index 000000000..bbd396dbc --- /dev/null +++ b/src/modules/hip/kernel/sobel_filter.hpp @@ -0,0 +1,309 @@ +#include +#include "rpp_hip_common.hpp" + +__device__ __constant__ float sobel3x3XHip[9] = {-1, 0, 1, + -2, 0, 2, + -1, 0, 1}; +__device__ __constant__ float sobel3x3YHip[9] = {-1, -2, -1, + 0, 0, 0, + 1, 2, 1}; + +// -------------------- sobel_filter device helpers -------------------- + +__device__ __forceinline__ void sobel_filter_bidirection_hip_compute(d_float8 *src1_f8, d_float8 *src2_f8, d_float8 *dst_f8) +{ + rpp_hip_math_multiply8(src1_f8, src1_f8, src1_f8); + rpp_hip_math_multiply8(src2_f8, src2_f8, src2_f8); + rpp_hip_math_add8(src1_f8, src2_f8, dst_f8); + rpp_hip_math_sqrt8(dst_f8, dst_f8); +} + +__device__ __forceinline__ void sobel_filter_3x3_row_hip_compute(uchar *srcPtr, d_float8 *dst_f8, float *filter) +{ + float src_f1; + uint3 src_ui3; + src_ui3 = *(reinterpret_cast(srcPtr)); + src_f1 = rpp_hip_unpack0(src_ui3.x); + dst_f8->f1[0] = fmaf(src_f1, filter[0], dst_f8->f1[0]); + src_f1 = rpp_hip_unpack1(src_ui3.x); + dst_f8->f1[0] = fmaf(src_f1, filter[1], dst_f8->f1[0]); + dst_f8->f1[1] = fmaf(src_f1, filter[0], dst_f8->f1[1]); + src_f1 = rpp_hip_unpack2(src_ui3.x); + dst_f8->f1[0] = fmaf(src_f1, filter[2], dst_f8->f1[0]); + dst_f8->f1[1] = fmaf(src_f1, filter[1], dst_f8->f1[1]); + dst_f8->f1[2] = fmaf(src_f1, filter[0], dst_f8->f1[2]); + src_f1 = rpp_hip_unpack3(src_ui3.x); + dst_f8->f1[1] = fmaf(src_f1, filter[2], dst_f8->f1[1]); + dst_f8->f1[2] = fmaf(src_f1, filter[1], dst_f8->f1[2]); + dst_f8->f1[3] = fmaf(src_f1, filter[0], dst_f8->f1[3]); + src_f1 = rpp_hip_unpack0(src_ui3.y); + dst_f8->f1[2] = fmaf(src_f1, filter[2], dst_f8->f1[2]); + dst_f8->f1[3] = fmaf(src_f1, filter[1], dst_f8->f1[3]); + dst_f8->f1[4] = fmaf(src_f1, filter[0], dst_f8->f1[4]); + src_f1 = rpp_hip_unpack1(src_ui3.y); + dst_f8->f1[3] = fmaf(src_f1, filter[2], dst_f8->f1[3]); + dst_f8->f1[4] = fmaf(src_f1, filter[1], dst_f8->f1[4]); + dst_f8->f1[5] = fmaf(src_f1, filter[0], dst_f8->f1[5]); + src_f1 = rpp_hip_unpack2(src_ui3.y); + dst_f8->f1[4] = fmaf(src_f1, filter[2], dst_f8->f1[4]); + dst_f8->f1[5] = fmaf(src_f1, filter[1], dst_f8->f1[5]); + dst_f8->f1[6] = fmaf(src_f1, filter[0], dst_f8->f1[6]); + src_f1 = rpp_hip_unpack3(src_ui3.y); + dst_f8->f1[5] = fmaf(src_f1, filter[2], dst_f8->f1[5]); + dst_f8->f1[6] = fmaf(src_f1, filter[1], dst_f8->f1[6]); + dst_f8->f1[7] = fmaf(src_f1, filter[0], dst_f8->f1[7]); + src_f1 = rpp_hip_unpack0(src_ui3.z); + dst_f8->f1[6] = fmaf(src_f1, filter[2], dst_f8->f1[6]); + dst_f8->f1[7] = fmaf(src_f1, filter[1], dst_f8->f1[7]); + src_f1 = rpp_hip_unpack1(src_ui3.z); + dst_f8->f1[7] = fmaf(src_f1, filter[2], dst_f8->f1[7]); +} + +__device__ __forceinline__ void sobel_filter_3x3_bidirectional_row_hip_compute(uchar *srcPtr, d_float8 *dst_f8x, d_float8 *dst_f8y, float *filterX, float *filterY) +{ + float src_f1; + uint3 src_ui3; + src_ui3 = *(reinterpret_cast(srcPtr)); + src_f1 = rpp_hip_unpack0(src_ui3.x); + dst_f8x->f1[0] = fmaf(src_f1, filterX[0], dst_f8x->f1[0]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[0], dst_f8y->f1[0]); + + src_f1 = rpp_hip_unpack1(src_ui3.x); + dst_f8x->f1[0] = fmaf(src_f1, filterX[1], dst_f8x->f1[0]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[1], dst_f8y->f1[0]); + dst_f8x->f1[1] = fmaf(src_f1, filterX[0], dst_f8x->f1[1]); + dst_f8y->f1[1] = fmaf(src_f1, filterY[0], dst_f8y->f1[1]); + + src_f1 = rpp_hip_unpack2(src_ui3.x); + dst_f8x->f1[0] = fmaf(src_f1, filterX[2], dst_f8x->f1[0]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[2], dst_f8y->f1[0]); + dst_f8x->f1[1] = fmaf(src_f1, filterX[1], dst_f8x->f1[1]); + dst_f8y->f1[1] = fmaf(src_f1, filterY[1], dst_f8y->f1[1]); + dst_f8x->f1[2] = fmaf(src_f1, filterX[0], dst_f8x->f1[2]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[0], dst_f8y->f1[2]); + + src_f1 = rpp_hip_unpack3(src_ui3.x); + dst_f8x->f1[1] = fmaf(src_f1, filterX[2], dst_f8x->f1[1]); + dst_f8y->f1[1] = fmaf(src_f1, filterY[2], dst_f8y->f1[1]); + dst_f8x->f1[2] = fmaf(src_f1, filterX[1], dst_f8x->f1[2]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[1], dst_f8y->f1[2]); + dst_f8x->f1[3] = fmaf(src_f1, filterX[0], dst_f8x->f1[3]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[0], dst_f8y->f1[3]); + + src_f1 = rpp_hip_unpack0(src_ui3.y); + dst_f8x->f1[2] = fmaf(src_f1, filterX[2], dst_f8x->f1[2]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[2], dst_f8y->f1[2]); + dst_f8x->f1[3] = fmaf(src_f1, filterX[1], dst_f8x->f1[3]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[1], dst_f8y->f1[3]); + dst_f8x->f1[4] = fmaf(src_f1, filterX[0], dst_f8x->f1[4]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[0], dst_f8y->f1[4]); + + src_f1 = rpp_hip_unpack1(src_ui3.y); + dst_f8x->f1[3] = fmaf(src_f1, filterX[2], dst_f8x->f1[3]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[2], dst_f8y->f1[3]); + dst_f8x->f1[4] = fmaf(src_f1, filterX[1], dst_f8x->f1[4]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[1], dst_f8y->f1[4]); + dst_f8x->f1[5] = fmaf(src_f1, filterX[0], dst_f8x->f1[5]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[0], dst_f8y->f1[5]); + + src_f1 = rpp_hip_unpack2(src_ui3.y); + dst_f8x->f1[4] = fmaf(src_f1, filterX[2], dst_f8x->f1[4]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[2], dst_f8y->f1[4]); + dst_f8x->f1[5] = fmaf(src_f1, filterX[1], dst_f8x->f1[5]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[1], dst_f8y->f1[5]); + dst_f8x->f1[6] = fmaf(src_f1, filterX[0], dst_f8x->f1[6]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[0], dst_f8y->f1[6]); + + src_f1 = rpp_hip_unpack3(src_ui3.y); + dst_f8x->f1[5] = fmaf(src_f1, filterX[2], dst_f8x->f1[5]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[2], dst_f8y->f1[5]); + dst_f8x->f1[6] = fmaf(src_f1, filterX[1], dst_f8x->f1[6]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[1], dst_f8y->f1[6]); + dst_f8x->f1[7] = fmaf(src_f1, filterX[0], dst_f8x->f1[7]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[0], dst_f8y->f1[7]); + + src_f1 = rpp_hip_unpack0(src_ui3.z); + dst_f8x->f1[6] = fmaf(src_f1, filterX[2], dst_f8x->f1[6]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[2], dst_f8y->f1[6]); + dst_f8x->f1[7] = fmaf(src_f1, filterX[1], dst_f8x->f1[7]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[1], dst_f8y->f1[7]); + + src_f1 = rpp_hip_unpack1(src_ui3.z); + dst_f8x->f1[7] = fmaf(src_f1, filterX[2], dst_f8x->f1[7]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[2], dst_f8y->f1[7]); +} + + +template +__global__ void sobel_filter_3x3_pln_bidirection_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint3 dstStridesNCH, + int channelsDst, + uint padLength, + uint2 tileSize, + RpptROIPtr roiTensorPtrSrc) +{ + int hipThreadIdx_x8 = hipThreadIdx_x << 3; + int id_x_o = (hipBlockIdx_x * tileSize.x * 8) + hipThreadIdx_x8; + int id_y_o = hipBlockIdx_y * tileSize.y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + int id_x_i = id_x_o - padLength; + int id_y_i = id_y_o - padLength; + + d_float8 sum_f8x, sum_f8y, sum_f8; + __shared__ uchar src_smem[SMEM_LENGTH_Y_1C][SMEM_LENGTH_X]; + + int srcIdx = (id_z * srcStridesNCH.x) + ((id_y_i + roiTensorPtrSrc[id_z].xywhROI.xy.y) * srcStridesNCH.z) + (id_x_i + roiTensorPtrSrc[id_z].xywhROI.xy.x); + int dstIdx = (id_z * dstStridesNCH.x) + (id_y_o * dstStridesNCH.z) + id_x_o; + float *filterRowX1 = &sobel3x3XHip[0]; + float *filterRowX2 = &filterRowX1[3]; + float *filterRowX3 = &filterRowX1[6]; + float *filterRowY1 = &sobel3x3YHip[0]; + float *filterRowY2 = &filterRowY1[3]; + float *filterRowY3 = &filterRowY1[6]; + sum_f8x.f4[0] = static_cast(0); + sum_f8x.f4[1] = static_cast(0); + sum_f8y.f4[0] = static_cast(0); + sum_f8y.f4[1] = static_cast(0); + if ((id_x_i >= -(int)padLength) && (id_x_i < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_i >= 0) && (id_y_i < roiTensorPtrSrc[id_z].xywhROI.roiHeight)) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + else if(id_y_i < 0) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx + srcStridesNCH.z, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + if(id_x_i < 0) + src_smem[hipThreadIdx_y][0] = src_smem[hipThreadIdx_y][1]; + __syncthreads(); + if ((id_x_o < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_o < roiTensorPtrSrc[id_z].xywhROI.roiHeight) && + (hipThreadIdx_x < tileSize.x) && + (hipThreadIdx_y < tileSize.y)) + { + sobel_filter_3x3_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX1, filterRowY1); + sobel_filter_3x3_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX2, filterRowY2); + sobel_filter_3x3_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX3, filterRowY3); + rpp_hip_pixel_check_0to255(&sum_f8x); + rpp_hip_pixel_check_0to255(&sum_f8y); + rpp_hip_adjust_range(dstPtr, &sum_f8x); + rpp_hip_adjust_range(dstPtr, &sum_f8y); + sobel_filter_bidirection_hip_compute(&sum_f8x, &sum_f8y, &sum_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &sum_f8); + } +} + +template +__global__ void sobel_filter_3x3_pln_unidirection_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint3 dstStridesNCH, + int channelsDst, + uint padLength, + uint2 tileSize, + RpptROIPtr roiTensorPtrSrc, + int sobelType) +{ + int hipThreadIdx_x8 = hipThreadIdx_x << 3; + int id_x_o = (hipBlockIdx_x * tileSize.x * 8) + hipThreadIdx_x8; + int id_y_o = hipBlockIdx_y * tileSize.y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + int id_x_i = id_x_o - padLength; + int id_y_i = id_y_o - padLength; + + d_float8 sum_f8; + __shared__ uchar src_smem[SMEM_LENGTH_Y_1C][SMEM_LENGTH_X]; + + int srcIdx = (id_z * srcStridesNCH.x) + ((id_y_i + roiTensorPtrSrc[id_z].xywhROI.xy.y) * srcStridesNCH.z) + (id_x_i + roiTensorPtrSrc[id_z].xywhROI.xy.x); + int dstIdx = (id_z * dstStridesNCH.x) + (id_y_o * dstStridesNCH.z) + id_x_o; + float *filter_row1 = ((!sobelType) ? sobel3x3XHip : sobel3x3YHip); + float *filter_row2 = &filter_row1[3]; + float *filter_row3 = &filter_row1[6]; + sum_f8.f4[0] = static_cast(0); + sum_f8.f4[1] = static_cast(0); + if ((id_x_i >= -(int)padLength) && (id_x_i < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_i >= 0) && (id_y_i < roiTensorPtrSrc[id_z].xywhROI.roiHeight)) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + else if(id_y_i < 0) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx + srcStridesNCH.z, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + if(id_x_i < 0) + src_smem[hipThreadIdx_y][0] = src_smem[hipThreadIdx_y][1]; + __syncthreads(); + if ((id_x_o < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_o < roiTensorPtrSrc[id_z].xywhROI.roiHeight) && + (hipThreadIdx_x < tileSize.x) && + (hipThreadIdx_y < tileSize.y)) + { + sobel_filter_3x3_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8, filter_row1); + sobel_filter_3x3_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8, filter_row2); + sobel_filter_3x3_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8, filter_row3); + rpp_hip_pixel_check_0to255(&sum_f8); + rpp_hip_adjust_range(dstPtr, &sum_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &sum_f8); + } +} + +template +RppStatus hip_exec_sobel_filter_tensor(T *srcPtr, + RpptDescPtr srcDescPtr, + T *dstPtr, + RpptDescPtr dstDescPtr, + Rpp32u sobelType, + Rpp32u kernelSize, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + rpp::Handle& handle) +{ + if (roiType == RpptRoiType::LTRB) + hip_exec_roi_converison_ltrb_to_xywh(roiTensorPtrSrc, handle); + + int globalThreads_x = (dstDescPtr->strides.hStride + 7) >> 3; + int globalThreads_y = dstDescPtr->h; + int globalThreads_z = dstDescPtr->n; + + uint padLength = kernelSize / 2; + uint padLengthTwice = padLength * 2; + uint2 tileSize; + tileSize.x = (SMEM_LENGTH_X - padLengthTwice) / 8; + tileSize.y = 16 - padLengthTwice; + bool combined = (sobelType == 2); + + if (kernelSize == 3) + { + if(combined) + { + hipLaunchKernelGGL(sobel_filter_3x3_pln_bidirection_tensor, + dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + dstDescPtr->c, + padLength, + tileSize, + roiTensorPtrSrc); + } + else + { + hipLaunchKernelGGL(sobel_filter_3x3_pln_unidirection_tensor, + dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + dstDescPtr->c, + padLength, + tileSize, + roiTensorPtrSrc, + sobelType); + } + } + + return RPP_SUCCESS; +} \ No newline at end of file diff --git a/src/modules/rppt_tensor_filter_augmentations.cpp b/src/modules/rppt_tensor_filter_augmentations.cpp index 671b60f4c..2c6bdc0ad 100644 --- a/src/modules/rppt_tensor_filter_augmentations.cpp +++ b/src/modules/rppt_tensor_filter_augmentations.cpp @@ -257,4 +257,88 @@ RppStatus rppt_gaussian_filter_gpu(RppPtr_t srcPtr, #endif // backend } +/******************** sobel_filter ********************/ + +RppStatus rppt_sobel_filter_gpu(RppPtr_t srcPtr, + RpptDescPtr srcDescPtr, + RppPtr_t dstPtr, + RpptDescPtr dstDescPtr, + Rpp32u sobelType, + Rpp32u kernelSize, + RpptROIPtr roiTensorPtrSrc, + RpptRoiType roiType, + rppHandle_t rppHandle) +{ +#ifdef HIP_COMPILE + if ((kernelSize != 3) && (kernelSize != 5) && (kernelSize != 7)) + return RPP_ERROR_INVALID_ARGUMENTS; + if ((sobelType != 0) && (sobelType != 1) && (sobelType != 2)) + return RPP_ERROR_INVALID_ARGUMENTS; + if (dstDescPtr->c == 3) + return RPP_ERROR_INVALID_DST_CHANNELS; + + // convert image to grey scale if input is RGB image + RppPtr_t tempPtr; + if (srcDescPtr->c == 3) + { + RpptSubpixelLayout srcSubpixelLayout = RpptSubpixelLayout::RGBtype; + tempPtr = rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; + rppt_color_to_greyscale_gpu(srcPtr, srcDescPtr, tempPtr, dstDescPtr, srcSubpixelLayout, rppHandle); + } + + if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8)) + { + hip_exec_sobel_filter_tensor(static_cast(tempPtr) + srcDescPtr->offsetInBytes, + dstDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + sobelType, + kernelSize, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) + { + hip_exec_sobel_filter_tensor(reinterpret_cast(static_cast(tempPtr) + srcDescPtr->offsetInBytes), + srcDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + sobelType, + kernelSize, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) + { + hip_exec_sobel_filter_tensor(reinterpret_cast(static_cast(tempPtr) + srcDescPtr->offsetInBytes), + srcDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + sobelType, + kernelSize, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); + } + else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) + { + hip_exec_sobel_filter_tensor(static_cast(tempPtr) + srcDescPtr->offsetInBytes, + srcDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + sobelType, + kernelSize, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); + } + + return RPP_SUCCESS; +#elif defined(OCL_COMPILE) + return RPP_ERROR_NOT_IMPLEMENTED; +#endif // backend +} + #endif // GPU_SUPPORT diff --git a/utilities/test_suite/HIP/Tensor_hip.cpp b/utilities/test_suite/HIP/Tensor_hip.cpp index b4cdd3671..27010c6fc 100644 --- a/utilities/test_suite/HIP/Tensor_hip.cpp +++ b/utilities/test_suite/HIP/Tensor_hip.cpp @@ -63,7 +63,7 @@ int main(int argc, char **argv) int decoderType = atoi(argv[13]); int batchSize = atoi(argv[14]); - bool additionalParamCase = (testCase == 8 || testCase == 21 || testCase == 23|| testCase == 24 || testCase == 40 || testCase == 41 || testCase == 49 || testCase == 54 || testCase == 79); + bool additionalParamCase = (testCase == 8 || testCase == 21 || testCase == 23|| testCase == 24 || testCase == 40 || testCase == 41 || testCase == 49 || testCase == 50 || testCase == 54 || testCase == 79); bool kernelSizeCase = (testCase == 40 || testCase == 41 || testCase == 49 || testCase == 54); bool dualInputCase = (testCase == 2 || testCase == 30 || testCase == 33 || testCase == 61 || testCase == 63 || testCase == 65 || testCase == 68); bool randomOutputCase = (testCase == 6 || testCase == 8 || testCase == 84 || testCase == 49 || testCase == 54); @@ -71,7 +71,8 @@ int main(int argc, char **argv) bool interpolationTypeCase = (testCase == 21 || testCase == 23 || testCase == 24 || testCase == 79); bool reductionTypeCase = (testCase == 87 || testCase == 88 || testCase == 89 || testCase == 90 || testCase == 91); bool noiseTypeCase = (testCase == 8); - bool pln1OutTypeCase = (testCase == 86); + bool pln1OutTypeCase = (testCase == 86 || testCase == 50); + bool kernelSizeAndGradientCase = (testCase == 50); unsigned int verbosity = atoi(argv[11]); unsigned int additionalParam = additionalParamCase ? atoi(argv[7]) : 1; @@ -188,7 +189,8 @@ int main(int argc, char **argv) RpptInterpolationType interpolationType = RpptInterpolationType::BILINEAR; std::string interpolationTypeName = ""; std::string noiseTypeName = ""; - string kernelSizeAndGradientName = ""; + std::string kernelSizeAndGradientName = ""; + Rpp32u kernelSize, GradientType; if (kernelSizeCase) { char additionalParam_char[2]; @@ -208,6 +210,11 @@ int main(int argc, char **argv) func += "_noiseType"; func += noiseTypeName.c_str(); } + else if (kernelSizeAndGradientCase) + { + kernelSizeAndGradientName = get_kernel_size_and_gradient_type(additionalParam, kernelSize, GradientType); + func += kernelSizeAndGradientName; + } if(!qaFlag) { @@ -267,7 +274,7 @@ int main(int argc, char **argv) Rpp32u outputChannels = inputChannels; if(pln1OutTypeCase) outputChannels = 1; - Rpp32u srcOffsetInBytes = (kernelSizeCase) ? (12 * (additionalParam / 2)) : 0; + Rpp32u srcOffsetInBytes = (kernelSizeCase || kernelSizeAndGradientCase) ? (12 * 1) : 0; Rpp32u dstOffsetInBytes = 0; int imagesMixed = 0; // Flag used to check if all images in dataset is of same dimensions @@ -1132,6 +1139,18 @@ int main(int argc, char **argv) break; } + case 50: + { + testCaseName = "sobel_filter"; + + startWallTime = omp_get_wtime(); + if (inputBitDepth == 0 || inputBitDepth == 1 || inputBitDepth == 2 || inputBitDepth == 5) + rppt_sobel_filter_gpu(d_input, srcDescPtr, d_output, dstDescPtr, GradientType, kernelSize, roiTensorPtrSrc, roiTypeSrc, handle); + else + missingFuncFlag = 1; + + break; + } case 54: { testCaseName = "gaussian_filter"; @@ -1590,7 +1609,7 @@ int main(int argc, char **argv) 3.source and destination layout are the same 4.augmentation case does not generate random output*/ if(qaFlag && inputBitDepth == 0 && ((srcDescPtr->layout == dstDescPtr->layout) || pln1OutTypeCase) && !(randomOutputCase) && !(nonQACase)) - compare_output(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, kernelSizeAndGradientName, testCase, dst, scriptPath); + compare_output(outputu8, testCaseName, srcDescPtr, dstDescPtr, dstImgSizes, batchSize, interpolationTypeName, noiseTypeName, kernelSizeAndGradientName, testCase, dst, scriptPath, additionalParam); // Calculate exact dstROI in XYWH format for OpenCV dump if (roiTypeSrc == RpptRoiType::LTRB) diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py index 8857e6ac5..a95498bf7 100644 --- a/utilities/test_suite/HIP/runTests.py +++ b/utilities/test_suite/HIP/runTests.py @@ -86,6 +86,12 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(interpolationType), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode()) + elif case == "50": + for kernelSizeAndGradient in range(9): + print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSizeAndGradient) + " 0") + result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSizeAndGradient), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE) # nosec + stdout_data, stderr_data = result.communicate() + print(stdout_data.decode()) else: print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " 0 " + str(numRuns) + " " + str(testType) + " " + str(layout)) result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), "0", str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE) # nosec @@ -122,6 +128,10 @@ def run_performance_test(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPa for interpolationType in range(6): run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, interpolationType, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList) print("") + elif case == "50": + for kernelSizeAndGradient in range(9): + run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, kernelSizeAndGradient, numRuns, testType, layout, qaMode, decoderType, batchSize, roiList) + print("") else: run_performance_test_cmd(loggingFolder, logFileLayout, srcPath1, srcPath2, dstPath, bitDepth, outputFormatToggle, case, "0", numRuns, testType, layout, qaMode, decoderType, batchSize, roiList) print("------------------------------------------------------------------------------------------\n") @@ -272,7 +282,7 @@ def rpp_test_suite_parser_and_validator(): subprocess.call(["make", "-j16"], cwd=".") # nosec # List of cases supported -supportedCaseList = ['0', '1', '2', '4', '5', '6', '8', '13', '20', '21', '23', '26', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '45', '46', '54', '61', '63', '65', '68', '70', '79', '80', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92'] +supportedCaseList = ['0', '1', '2', '4', '5', '6', '8', '13', '20', '21', '23', '26', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '45', '46', '50', '54', '61', '63', '65', '68', '70', '79', '80', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92'] # Create folders based on testType and profilingOption if testType == 1 and profilingOption == "YES": diff --git a/utilities/test_suite/rpp_test_suite_common.h b/utilities/test_suite/rpp_test_suite_common.h index b9e950b28..9f7d32111 100644 --- a/utilities/test_suite/rpp_test_suite_common.h +++ b/utilities/test_suite/rpp_test_suite_common.h @@ -288,6 +288,27 @@ inline int set_input_channels(int layoutType) return 1; } +// returns the size of binary file passed +inline long get_bin_file_size(string refFile) +{ + FILE *fp; + fp = fopen(refFile.c_str(), "rb"); + if(!fp) + { + std::cout << "\n unable to open file : "<::const_iterator imagesNamesStart, RpptImagePatch *dstImgSizes, int maxImageDump) { From 9a63e7883e3aef5b7d0407ca52172dad3bd48cbd Mon Sep 17 00:00:00 2001 From: HazarathKumarM Date: Fri, 20 Sep 2024 03:56:52 +0000 Subject: [PATCH 28/31] Adds 5x5 HIP implementation for sobel filter --- src/modules/hip/kernel/sobel_filter.hpp | 328 ++++++++++++++++++++++++ 1 file changed, 328 insertions(+) diff --git a/src/modules/hip/kernel/sobel_filter.hpp b/src/modules/hip/kernel/sobel_filter.hpp index bbd396dbc..0ea05415a 100644 --- a/src/modules/hip/kernel/sobel_filter.hpp +++ b/src/modules/hip/kernel/sobel_filter.hpp @@ -7,6 +7,16 @@ __device__ __constant__ float sobel3x3XHip[9] = {-1, 0, 1, __device__ __constant__ float sobel3x3YHip[9] = {-1, -2, -1, 0, 0, 0, 1, 2, 1}; +__device__ __constant__ float sobel5x5XHip[25] = {-1, -2, 0, 2, 1, + -4, -8, 0, 8, 4, + -6, -12, 0, 12, 6, + -4, -8, 0, 8, 4, + -1, -2, 0, 2, 1}; +__device__ __constant__ float sobel5x5YHip[25] = {-1, -4, -6, -4, -1, + -2, -8, -12, -8, -2, + 0, 0, 0, 0, 0, + 2, 8, 12, 8, 2, + 1, 4, 6, 4, 1}; // -------------------- sobel_filter device helpers -------------------- @@ -133,6 +143,163 @@ __device__ __forceinline__ void sobel_filter_3x3_bidirectional_row_hip_compute(u dst_f8y->f1[7] = fmaf(src_f1, filterY[2], dst_f8y->f1[7]); } +__device__ void sobel_filter_5x5_row_hip_compute(uchar *srcPtr, d_float8 *dst_f8, float *filter) +{ + float src_f1; + uint3 src_ui3; + src_ui3 = *(reinterpret_cast(srcPtr)); + src_f1 = rpp_hip_unpack0(src_ui3.x); + dst_f8->f1[0] = fmaf(src_f1, filter[0], dst_f8->f1[0]); + src_f1 = rpp_hip_unpack1(src_ui3.x); + dst_f8->f1[0] = fmaf(src_f1, filter[1], dst_f8->f1[0]); + dst_f8->f1[1] = fmaf(src_f1, filter[0], dst_f8->f1[1]); + src_f1 = rpp_hip_unpack2(src_ui3.x); + dst_f8->f1[0] = fmaf(src_f1, filter[2], dst_f8->f1[0]); + dst_f8->f1[1] = fmaf(src_f1, filter[1], dst_f8->f1[1]); + dst_f8->f1[2] = fmaf(src_f1, filter[0], dst_f8->f1[2]); + src_f1 = rpp_hip_unpack3(src_ui3.x); + dst_f8->f1[0] = fmaf(src_f1, filter[3], dst_f8->f1[0]); + dst_f8->f1[1] = fmaf(src_f1, filter[2], dst_f8->f1[1]); + dst_f8->f1[2] = fmaf(src_f1, filter[1], dst_f8->f1[2]); + dst_f8->f1[3] = fmaf(src_f1, filter[0], dst_f8->f1[3]); + src_f1 = rpp_hip_unpack0(src_ui3.y); + dst_f8->f1[0] = fmaf(src_f1, filter[4], dst_f8->f1[0]); + dst_f8->f1[1] = fmaf(src_f1, filter[3], dst_f8->f1[1]); + dst_f8->f1[2] = fmaf(src_f1, filter[2], dst_f8->f1[2]); + dst_f8->f1[3] = fmaf(src_f1, filter[1], dst_f8->f1[3]); + dst_f8->f1[4] = fmaf(src_f1, filter[0], dst_f8->f1[4]); + src_f1 = rpp_hip_unpack1(src_ui3.y); + dst_f8->f1[1] = fmaf(src_f1, filter[4], dst_f8->f1[1]); + dst_f8->f1[2] = fmaf(src_f1, filter[3], dst_f8->f1[2]); + dst_f8->f1[3] = fmaf(src_f1, filter[2], dst_f8->f1[3]); + dst_f8->f1[4] = fmaf(src_f1, filter[1], dst_f8->f1[4]); + dst_f8->f1[5] = fmaf(src_f1, filter[0], dst_f8->f1[5]); + src_f1 = rpp_hip_unpack2(src_ui3.y); + dst_f8->f1[2] = fmaf(src_f1, filter[4], dst_f8->f1[2]); + dst_f8->f1[3] = fmaf(src_f1, filter[3], dst_f8->f1[3]); + dst_f8->f1[4] = fmaf(src_f1, filter[2], dst_f8->f1[4]); + dst_f8->f1[5] = fmaf(src_f1, filter[1], dst_f8->f1[5]); + dst_f8->f1[6] = fmaf(src_f1, filter[0], dst_f8->f1[6]); + src_f1 = rpp_hip_unpack3(src_ui3.y); + dst_f8->f1[3] = fmaf(src_f1, filter[4], dst_f8->f1[3]); + dst_f8->f1[4] = fmaf(src_f1, filter[3], dst_f8->f1[4]); + dst_f8->f1[5] = fmaf(src_f1, filter[2], dst_f8->f1[5]); + dst_f8->f1[6] = fmaf(src_f1, filter[1], dst_f8->f1[6]); + dst_f8->f1[7] = fmaf(src_f1, filter[0], dst_f8->f1[7]); + src_f1 = rpp_hip_unpack0(src_ui3.z); + dst_f8->f1[4] = fmaf(src_f1, filter[4], dst_f8->f1[4]); + dst_f8->f1[5] = fmaf(src_f1, filter[3], dst_f8->f1[5]); + dst_f8->f1[6] = fmaf(src_f1, filter[2], dst_f8->f1[6]); + dst_f8->f1[7] = fmaf(src_f1, filter[1], dst_f8->f1[7]); + src_f1 = rpp_hip_unpack1(src_ui3.z); + dst_f8->f1[5] = fmaf(src_f1, filter[4], dst_f8->f1[5]); + dst_f8->f1[6] = fmaf(src_f1, filter[3], dst_f8->f1[6]); + dst_f8->f1[7] = fmaf(src_f1, filter[2], dst_f8->f1[7]); + src_f1 = rpp_hip_unpack2(src_ui3.z); + dst_f8->f1[6] = fmaf(src_f1, filter[4], dst_f8->f1[6]); + dst_f8->f1[7] = fmaf(src_f1, filter[3], dst_f8->f1[7]); + src_f1 = rpp_hip_unpack3(src_ui3.z); + dst_f8->f1[7] = fmaf(src_f1, filter[4], dst_f8->f1[7]); +} + +__device__ void sobel_filter_5x5_bidirectional_row_hip_compute(uchar *srcPtr, d_float8 *dst_f8x, d_float8 *dst_f8y, float *filterX, float *filterY) +{ + float src_f1; + uint3 src_ui3; + src_ui3 = *(reinterpret_cast(srcPtr)); + src_f1 = rpp_hip_unpack0(src_ui3.x); + dst_f8x->f1[0] = fmaf(src_f1, filterX[0], dst_f8x->f1[0]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[0], dst_f8y->f1[0]); + src_f1 = rpp_hip_unpack1(src_ui3.x); + dst_f8x->f1[0] = fmaf(src_f1, filterX[1], dst_f8x->f1[0]); + dst_f8x->f1[1] = fmaf(src_f1, filterX[0], dst_f8x->f1[1]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[1], dst_f8y->f1[0]); + dst_f8y->f1[1] = fmaf(src_f1, filterX[0], dst_f8y->f1[1]); + src_f1 = rpp_hip_unpack2(src_ui3.x); + dst_f8x->f1[0] = fmaf(src_f1, filterX[2], dst_f8x->f1[0]); + dst_f8x->f1[1] = fmaf(src_f1, filterX[1], dst_f8x->f1[1]); + dst_f8x->f1[2] = fmaf(src_f1, filterX[0], dst_f8x->f1[2]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[2], dst_f8y->f1[0]); + dst_f8y->f1[1] = fmaf(src_f1, filterY[1], dst_f8y->f1[1]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[0], dst_f8y->f1[2]); + src_f1 = rpp_hip_unpack3(src_ui3.x); + dst_f8x->f1[0] = fmaf(src_f1, filterX[3], dst_f8x->f1[0]); + dst_f8x->f1[1] = fmaf(src_f1, filterX[2], dst_f8x->f1[1]); + dst_f8x->f1[2] = fmaf(src_f1, filterX[1], dst_f8x->f1[2]); + dst_f8x->f1[3] = fmaf(src_f1, filterX[0], dst_f8x->f1[3]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[3], dst_f8y->f1[0]); + dst_f8y->f1[1] = fmaf(src_f1, filterY[2], dst_f8y->f1[1]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[1], dst_f8y->f1[2]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[0], dst_f8y->f1[3]); + src_f1 = rpp_hip_unpack0(src_ui3.y); + dst_f8x->f1[0] = fmaf(src_f1, filterX[4], dst_f8x->f1[0]); + dst_f8x->f1[1] = fmaf(src_f1, filterX[3], dst_f8x->f1[1]); + dst_f8x->f1[2] = fmaf(src_f1, filterX[2], dst_f8x->f1[2]); + dst_f8x->f1[3] = fmaf(src_f1, filterX[1], dst_f8x->f1[3]); + dst_f8x->f1[4] = fmaf(src_f1, filterX[0], dst_f8x->f1[4]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[4], dst_f8y->f1[0]); + dst_f8y->f1[1] = fmaf(src_f1, filterY[3], dst_f8y->f1[1]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[2], dst_f8y->f1[2]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[1], dst_f8y->f1[3]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[0], dst_f8y->f1[4]); + src_f1 = rpp_hip_unpack1(src_ui3.y); + dst_f8x->f1[1] = fmaf(src_f1, filterX[4], dst_f8x->f1[1]); + dst_f8x->f1[2] = fmaf(src_f1, filterX[3], dst_f8x->f1[2]); + dst_f8x->f1[3] = fmaf(src_f1, filterX[2], dst_f8x->f1[3]); + dst_f8x->f1[4] = fmaf(src_f1, filterX[1], dst_f8x->f1[4]); + dst_f8x->f1[5] = fmaf(src_f1, filterX[0], dst_f8x->f1[5]); + dst_f8y->f1[1] = fmaf(src_f1, filterY[4], dst_f8y->f1[1]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[3], dst_f8y->f1[2]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[2], dst_f8y->f1[3]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[1], dst_f8y->f1[4]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[0], dst_f8y->f1[5]); + src_f1 = rpp_hip_unpack2(src_ui3.y); + dst_f8x->f1[2] = fmaf(src_f1, filterX[4], dst_f8x->f1[2]); + dst_f8x->f1[3] = fmaf(src_f1, filterX[3], dst_f8x->f1[3]); + dst_f8x->f1[4] = fmaf(src_f1, filterX[2], dst_f8x->f1[4]); + dst_f8x->f1[5] = fmaf(src_f1, filterX[1], dst_f8x->f1[5]); + dst_f8x->f1[6] = fmaf(src_f1, filterX[0], dst_f8x->f1[6]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[4], dst_f8y->f1[2]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[3], dst_f8y->f1[3]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[2], dst_f8y->f1[4]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[1], dst_f8y->f1[5]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[0], dst_f8y->f1[6]); + src_f1 = rpp_hip_unpack3(src_ui3.y); + dst_f8x->f1[3] = fmaf(src_f1, filterX[4], dst_f8x->f1[3]); + dst_f8x->f1[4] = fmaf(src_f1, filterX[3], dst_f8x->f1[4]); + dst_f8x->f1[5] = fmaf(src_f1, filterX[2], dst_f8x->f1[5]); + dst_f8x->f1[6] = fmaf(src_f1, filterX[1], dst_f8x->f1[6]); + dst_f8x->f1[7] = fmaf(src_f1, filterX[0], dst_f8x->f1[7]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[4], dst_f8y->f1[3]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[3], dst_f8y->f1[4]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[2], dst_f8y->f1[5]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[1], dst_f8y->f1[6]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[0], dst_f8y->f1[7]); + src_f1 = rpp_hip_unpack0(src_ui3.z); + dst_f8x->f1[4] = fmaf(src_f1, filterX[4], dst_f8x->f1[4]); + dst_f8x->f1[5] = fmaf(src_f1, filterX[3], dst_f8x->f1[5]); + dst_f8x->f1[6] = fmaf(src_f1, filterX[2], dst_f8x->f1[6]); + dst_f8x->f1[7] = fmaf(src_f1, filterX[1], dst_f8x->f1[7]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[4], dst_f8y->f1[4]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[3], dst_f8y->f1[5]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[2], dst_f8y->f1[6]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[1], dst_f8y->f1[7]); + src_f1 = rpp_hip_unpack1(src_ui3.z); + dst_f8x->f1[5] = fmaf(src_f1, filterX[4], dst_f8x->f1[5]); + dst_f8x->f1[6] = fmaf(src_f1, filterX[3], dst_f8x->f1[6]); + dst_f8x->f1[7] = fmaf(src_f1, filterX[2], dst_f8x->f1[7]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[4], dst_f8y->f1[5]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[3], dst_f8y->f1[6]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[2], dst_f8y->f1[7]); + src_f1 = rpp_hip_unpack2(src_ui3.z); + dst_f8x->f1[6] = fmaf(src_f1, filterX[4], dst_f8x->f1[6]); + dst_f8x->f1[7] = fmaf(src_f1, filterX[3], dst_f8x->f1[7]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[4], dst_f8y->f1[6]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[3], dst_f8y->f1[7]); + src_f1 = rpp_hip_unpack3(src_ui3.z); + dst_f8x->f1[7] = fmaf(src_f1, filterX[4], dst_f8x->f1[7]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[4], dst_f8y->f1[7]); +} template __global__ void sobel_filter_3x3_pln_bidirection_tensor(T *srcPtr, @@ -192,6 +359,74 @@ __global__ void sobel_filter_3x3_pln_bidirection_tensor(T *srcPtr, } } +template +__global__ void sobel_filter_5x5_pln_bidirection_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint3 dstStridesNCH, + int channelsDst, + uint padLength, + uint2 tileSize, + RpptROIPtr roiTensorPtrSrc) +{ + int hipThreadIdx_x8 = hipThreadIdx_x << 3; + int id_x_o = (hipBlockIdx_x * tileSize.x * 8) + hipThreadIdx_x8; + int id_y_o = hipBlockIdx_y * tileSize.y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + int id_x_i = id_x_o - padLength; + int id_y_i = id_y_o - padLength; + + d_float8 sum_f8x, sum_f8y, sum_f8; + __shared__ uchar src_smem[SMEM_LENGTH_Y_1C][SMEM_LENGTH_X]; + + int srcIdx = (id_z * srcStridesNCH.x) + ((id_y_i + roiTensorPtrSrc[id_z].xywhROI.xy.y) * srcStridesNCH.z) + (id_x_i + roiTensorPtrSrc[id_z].xywhROI.xy.x); + int dstIdx = (id_z * dstStridesNCH.x) + (id_y_o * dstStridesNCH.z) + id_x_o; + float *filterRowX1 = &sobel5x5XHip[0]; + float *filterRowX2 = &filterRowX1[5]; + float *filterRowX3 = &filterRowX1[10]; + float *filterRowX4 = &filterRowX1[15]; + float *filterRowX5 = &filterRowX1[20]; + float *filterRowY1 = &sobel5x5YHip[0]; + float *filterRowY2 = &filterRowY1[5]; + float *filterRowY3 = &filterRowY1[10]; + float *filterRowY4 = &filterRowY1[15]; + float *filterRowY5 = &filterRowY1[20]; + sum_f8x.f4[0] = static_cast(0); + sum_f8x.f4[1] = static_cast(0); + sum_f8y.f4[0] = static_cast(0); + sum_f8y.f4[1] = static_cast(0); + if ((id_x_i >= -(int)padLength) && (id_x_i < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_i >= 0) && (id_y_i < roiTensorPtrSrc[id_z].xywhROI.roiHeight)) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + else if(id_y_i < 0) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx + 2 * srcStridesNCH.z, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + if(id_x_i < 0) + { + src_smem[hipThreadIdx_y][0] = src_smem[hipThreadIdx_y][2]; + src_smem[hipThreadIdx_y][1] = src_smem[hipThreadIdx_y][2]; + } + + __syncthreads(); + if ((id_x_o < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_o < roiTensorPtrSrc[id_z].xywhROI.roiHeight) && + (hipThreadIdx_x < tileSize.x) && + (hipThreadIdx_y < tileSize.y)) + { + sobel_filter_5x5_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX1, filterRowY1); + sobel_filter_5x5_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX2, filterRowY2); + sobel_filter_5x5_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX3, filterRowY3); + sobel_filter_5x5_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 3][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX4, filterRowY4); + sobel_filter_5x5_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 4][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX5, filterRowY5); + rpp_hip_pixel_check_0to255(&sum_f8x); + rpp_hip_pixel_check_0to255(&sum_f8y); + rpp_hip_adjust_range(dstPtr, &sum_f8x); + rpp_hip_adjust_range(dstPtr, &sum_f8y); + sobel_filter_bidirection_hip_compute(&sum_f8x, &sum_f8y, &sum_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &sum_f8); + } +} + template __global__ void sobel_filter_3x3_pln_unidirection_tensor(T *srcPtr, uint3 srcStridesNCH, @@ -243,6 +478,63 @@ __global__ void sobel_filter_3x3_pln_unidirection_tensor(T *srcPtr, } } +template +__global__ void sobel_filter_5x5_pln_unidirection_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint3 dstStridesNCH, + int channelsDst, + uint padLength, + uint2 tileSize, + RpptROIPtr roiTensorPtrSrc, + int sobelType) +{ + int hipThreadIdx_x8 = hipThreadIdx_x << 3; + int id_x_o = (hipBlockIdx_x * tileSize.x * 8) + hipThreadIdx_x8; + int id_y_o = hipBlockIdx_y * tileSize.y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + int id_x_i = id_x_o - padLength; + int id_y_i = id_y_o - padLength; + d_float8 sum_f8; + __shared__ uchar src_smem[SMEM_LENGTH_Y_1C][SMEM_LENGTH_X]; + + int srcIdx = (id_z * srcStridesNCH.x) + ((id_y_i + roiTensorPtrSrc[id_z].xywhROI.xy.y) * srcStridesNCH.z) + (id_x_i + roiTensorPtrSrc[id_z].xywhROI.xy.x); + int dstIdx = (id_z * dstStridesNCH.x) + (id_y_o * dstStridesNCH.z) + id_x_o; + float *filter_row1 = (!sobelType) ? sobel5x5XHip : sobel5x5YHip; + float *filter_row2 = &filter_row1[5]; + float *filter_row3 = &filter_row1[10]; + float *filter_row4 = &filter_row1[15]; + float *filter_row5 = &filter_row1[20]; + sum_f8.f4[0] = static_cast(0); + sum_f8.f4[1] = static_cast(0); + if ((id_x_i >= -(int)padLength) && (id_x_i < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_i >= 0) && (id_y_i < roiTensorPtrSrc[id_z].xywhROI.roiHeight)) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + else if(id_y_i < 0) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx + 2 * srcStridesNCH.z, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + if(id_x_i < 0) + { + src_smem[hipThreadIdx_y][0] = src_smem[hipThreadIdx_y][2]; + src_smem[hipThreadIdx_y][1] = src_smem[hipThreadIdx_y][2]; + } + __syncthreads(); + if ((id_x_o < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_o < roiTensorPtrSrc[id_z].xywhROI.roiHeight) && + (hipThreadIdx_x < tileSize.x) && + (hipThreadIdx_y < tileSize.y)) + { + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8, filter_row1); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8, filter_row2); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8, filter_row3); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 3][hipThreadIdx_x8], &sum_f8, filter_row4); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 4][hipThreadIdx_x8], &sum_f8, filter_row5); + rpp_hip_pixel_check_0to255(&sum_f8); + rpp_hip_adjust_range(dstPtr, &sum_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &sum_f8); + } +} + template RppStatus hip_exec_sobel_filter_tensor(T *srcPtr, RpptDescPtr srcDescPtr, @@ -304,6 +596,42 @@ RppStatus hip_exec_sobel_filter_tensor(T *srcPtr, sobelType); } } + else if (kernelSize == 5) + { + if(combined) + { + hipLaunchKernelGGL(sobel_filter_5x5_pln_bidirection_tensor, + dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + dstDescPtr->c, + padLength, + tileSize, + roiTensorPtrSrc); + } + else + { + hipLaunchKernelGGL(sobel_filter_5x5_pln_unidirection_tensor, + dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + dstDescPtr->c, + padLength, + tileSize, + roiTensorPtrSrc, + sobelType); + } + } return RPP_SUCCESS; } \ No newline at end of file From d98da4cc51d51c304f77b0575a14949548df48f7 Mon Sep 17 00:00:00 2001 From: HazarathKumarM Date: Fri, 20 Sep 2024 03:57:48 +0000 Subject: [PATCH 29/31] Adds 7x7 HIP implementation for sobel filter --- src/modules/hip/kernel/sobel_filter.hpp | 394 ++++++++++++++++++++++++ 1 file changed, 394 insertions(+) diff --git a/src/modules/hip/kernel/sobel_filter.hpp b/src/modules/hip/kernel/sobel_filter.hpp index 0ea05415a..6e4d9cd27 100644 --- a/src/modules/hip/kernel/sobel_filter.hpp +++ b/src/modules/hip/kernel/sobel_filter.hpp @@ -17,6 +17,20 @@ __device__ __constant__ float sobel5x5YHip[25] = {-1, -4, -6, -4, -1, 0, 0, 0, 0, 0, 2, 8, 12, 8, 2, 1, 4, 6, 4, 1}; +__device__ __constant__ float sobel7x7XHip[49] = {-1, -4, -5, 0, 5, 4, 1, + -6, -24, -30, 0, 30, 24, 6, + -15, -60, -75, 0, 75, 60, 15, + -20, -80, -100, 0, 100, 80, 20, + -15, -60, -75, 0, 75, 60, 15, + -6, -24, -30, 0, 30, 24, 6, + -1, -4, -5, 0, 5, 4, 1}; +__device__ __constant__ float sobel7x7YHip[49] = {-1, -6, -15, -20, -15, -6, -1, + -4, -24, -60, -80, -60, -24, -4, + -5, -30, -75, -100, -75, -30, -5, + 0, 0, 0, 0, 0, 0, 0, + 5, 30, 75, 100, 75, 30, 5, + 4, 24, 60, 80, 60, 24, 4, + 1, 6, 15, 20, 15, 6, 1}; // -------------------- sobel_filter device helpers -------------------- @@ -301,6 +315,214 @@ __device__ void sobel_filter_5x5_bidirectional_row_hip_compute(uchar *srcPtr, d_ dst_f8y->f1[7] = fmaf(src_f1, filterY[4], dst_f8y->f1[7]); } +__device__ void sobel_filter_7x7_row_hip_compute(uchar *srcPtr, d_float8 *dst_f8, float *filter) +{ + float src_f1; + uint4 src_ui4 = *(reinterpret_cast(srcPtr)); + src_f1 = rpp_hip_unpack0(src_ui4.x); + dst_f8->f1[0] = fmaf(src_f1, filter[0], dst_f8->f1[0]); + src_f1 = rpp_hip_unpack1(src_ui4.x); + dst_f8->f1[0] = fmaf(src_f1, filter[1], dst_f8->f1[0]); + dst_f8->f1[1] = fmaf(src_f1, filter[0], dst_f8->f1[1]); + src_f1 = rpp_hip_unpack2(src_ui4.x); + dst_f8->f1[0] = fmaf(src_f1, filter[2], dst_f8->f1[0]); + dst_f8->f1[1] = fmaf(src_f1, filter[1], dst_f8->f1[1]); + dst_f8->f1[2] = fmaf(src_f1, filter[0], dst_f8->f1[2]); + src_f1 = rpp_hip_unpack3(src_ui4.x); + dst_f8->f1[0] = fmaf(src_f1, filter[3], dst_f8->f1[0]); + dst_f8->f1[1] = fmaf(src_f1, filter[2], dst_f8->f1[1]); + dst_f8->f1[2] = fmaf(src_f1, filter[1], dst_f8->f1[2]); + dst_f8->f1[3] = fmaf(src_f1, filter[0], dst_f8->f1[3]); + src_f1 = rpp_hip_unpack0(src_ui4.y); + dst_f8->f1[0] = fmaf(src_f1, filter[4], dst_f8->f1[0]); + dst_f8->f1[1] = fmaf(src_f1, filter[3], dst_f8->f1[1]); + dst_f8->f1[2] = fmaf(src_f1, filter[2], dst_f8->f1[2]); + dst_f8->f1[3] = fmaf(src_f1, filter[1], dst_f8->f1[3]); + dst_f8->f1[4] = fmaf(src_f1, filter[0], dst_f8->f1[4]); + src_f1 = rpp_hip_unpack1(src_ui4.y); + dst_f8->f1[0] = fmaf(src_f1, filter[5], dst_f8->f1[0]); + dst_f8->f1[1] = fmaf(src_f1, filter[4], dst_f8->f1[1]); + dst_f8->f1[2] = fmaf(src_f1, filter[3], dst_f8->f1[2]); + dst_f8->f1[3] = fmaf(src_f1, filter[2], dst_f8->f1[3]); + dst_f8->f1[4] = fmaf(src_f1, filter[1], dst_f8->f1[4]); + dst_f8->f1[5] = fmaf(src_f1, filter[0], dst_f8->f1[5]); + src_f1 = rpp_hip_unpack2(src_ui4.y); + dst_f8->f1[0] = fmaf(src_f1, filter[6], dst_f8->f1[0]); + dst_f8->f1[1] = fmaf(src_f1, filter[5], dst_f8->f1[1]); + dst_f8->f1[2] = fmaf(src_f1, filter[4], dst_f8->f1[2]); + dst_f8->f1[3] = fmaf(src_f1, filter[3], dst_f8->f1[3]); + dst_f8->f1[4] = fmaf(src_f1, filter[2], dst_f8->f1[4]); + dst_f8->f1[5] = fmaf(src_f1, filter[1], dst_f8->f1[5]); + dst_f8->f1[6] = fmaf(src_f1, filter[0], dst_f8->f1[6]); + src_f1 = rpp_hip_unpack3(src_ui4.y); + dst_f8->f1[1] = fmaf(src_f1, filter[6], dst_f8->f1[1]); + dst_f8->f1[2] = fmaf(src_f1, filter[5], dst_f8->f1[2]); + dst_f8->f1[3] = fmaf(src_f1, filter[4], dst_f8->f1[3]); + dst_f8->f1[4] = fmaf(src_f1, filter[3], dst_f8->f1[4]); + dst_f8->f1[5] = fmaf(src_f1, filter[2], dst_f8->f1[5]); + dst_f8->f1[6] = fmaf(src_f1, filter[1], dst_f8->f1[6]); + dst_f8->f1[7] = fmaf(src_f1, filter[0], dst_f8->f1[7]); + src_f1 = rpp_hip_unpack0(src_ui4.z); + dst_f8->f1[2] = fmaf(src_f1, filter[6], dst_f8->f1[2]); + dst_f8->f1[3] = fmaf(src_f1, filter[5], dst_f8->f1[3]); + dst_f8->f1[4] = fmaf(src_f1, filter[4], dst_f8->f1[4]); + dst_f8->f1[5] = fmaf(src_f1, filter[3], dst_f8->f1[5]); + dst_f8->f1[6] = fmaf(src_f1, filter[2], dst_f8->f1[6]); + dst_f8->f1[7] = fmaf(src_f1, filter[1], dst_f8->f1[7]); + src_f1 = rpp_hip_unpack1(src_ui4.z); + dst_f8->f1[3] = fmaf(src_f1, filter[6], dst_f8->f1[3]); + dst_f8->f1[4] = fmaf(src_f1, filter[5], dst_f8->f1[4]); + dst_f8->f1[5] = fmaf(src_f1, filter[4], dst_f8->f1[5]); + dst_f8->f1[6] = fmaf(src_f1, filter[3], dst_f8->f1[6]); + dst_f8->f1[7] = fmaf(src_f1, filter[2], dst_f8->f1[7]); + src_f1 = rpp_hip_unpack2(src_ui4.z); + dst_f8->f1[4] = fmaf(src_f1, filter[6], dst_f8->f1[4]); + dst_f8->f1[5] = fmaf(src_f1, filter[5], dst_f8->f1[5]); + dst_f8->f1[6] = fmaf(src_f1, filter[4], dst_f8->f1[6]); + dst_f8->f1[7] = fmaf(src_f1, filter[3], dst_f8->f1[7]); + src_f1 = rpp_hip_unpack3(src_ui4.z); + dst_f8->f1[5] = fmaf(src_f1, filter[6], dst_f8->f1[5]); + dst_f8->f1[6] = fmaf(src_f1, filter[5], dst_f8->f1[6]); + dst_f8->f1[7] = fmaf(src_f1, filter[4], dst_f8->f1[7]); + src_f1 = rpp_hip_unpack0(src_ui4.w); + dst_f8->f1[6] = fmaf(src_f1, filter[6], dst_f8->f1[6]); + dst_f8->f1[7] = fmaf(src_f1, filter[5], dst_f8->f1[7]); + src_f1 = rpp_hip_unpack1(src_ui4.w); + dst_f8->f1[7] = fmaf(src_f1, filter[6], dst_f8->f1[7]); +} + +__device__ void sobel_filter_7x7_bidirectional_row_hip_compute(uchar *srcPtr, d_float8 *dst_f8x, d_float8 *dst_f8y, float *filterX, float *filterY) +{ + float src_f1; + uint4 src_ui4 = *(reinterpret_cast(srcPtr)); + src_f1 = rpp_hip_unpack0(src_ui4.x); + dst_f8x->f1[0] = fmaf(src_f1, filterX[0], dst_f8x->f1[0]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[0], dst_f8y->f1[0]); + src_f1 = rpp_hip_unpack1(src_ui4.x); + dst_f8x->f1[0] = fmaf(src_f1, filterX[1], dst_f8x->f1[0]); + dst_f8x->f1[1] = fmaf(src_f1, filterX[0], dst_f8x->f1[1]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[1], dst_f8y->f1[0]); + dst_f8y->f1[1] = fmaf(src_f1, filterX[0], dst_f8y->f1[1]); + src_f1 = rpp_hip_unpack2(src_ui4.x); + dst_f8x->f1[0] = fmaf(src_f1, filterX[2], dst_f8x->f1[0]); + dst_f8x->f1[1] = fmaf(src_f1, filterX[1], dst_f8x->f1[1]); + dst_f8x->f1[2] = fmaf(src_f1, filterX[0], dst_f8x->f1[2]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[2], dst_f8y->f1[0]); + dst_f8y->f1[1] = fmaf(src_f1, filterY[1], dst_f8y->f1[1]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[0], dst_f8y->f1[2]); + src_f1 = rpp_hip_unpack3(src_ui4.x); + dst_f8x->f1[0] = fmaf(src_f1, filterX[3], dst_f8x->f1[0]); + dst_f8x->f1[1] = fmaf(src_f1, filterX[2], dst_f8x->f1[1]); + dst_f8x->f1[2] = fmaf(src_f1, filterX[1], dst_f8x->f1[2]); + dst_f8x->f1[3] = fmaf(src_f1, filterX[0], dst_f8x->f1[3]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[3], dst_f8y->f1[0]); + dst_f8y->f1[1] = fmaf(src_f1, filterY[2], dst_f8y->f1[1]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[1], dst_f8y->f1[2]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[0], dst_f8y->f1[3]); + src_f1 = rpp_hip_unpack0(src_ui4.y); + dst_f8x->f1[0] = fmaf(src_f1, filterX[4], dst_f8x->f1[0]); + dst_f8x->f1[1] = fmaf(src_f1, filterX[3], dst_f8x->f1[1]); + dst_f8x->f1[2] = fmaf(src_f1, filterX[2], dst_f8x->f1[2]); + dst_f8x->f1[3] = fmaf(src_f1, filterX[1], dst_f8x->f1[3]); + dst_f8x->f1[4] = fmaf(src_f1, filterX[0], dst_f8x->f1[4]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[4], dst_f8y->f1[0]); + dst_f8y->f1[1] = fmaf(src_f1, filterY[3], dst_f8y->f1[1]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[2], dst_f8y->f1[2]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[1], dst_f8y->f1[3]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[0], dst_f8y->f1[4]); + src_f1 = rpp_hip_unpack1(src_ui4.y); + dst_f8x->f1[0] = fmaf(src_f1, filterX[5], dst_f8x->f1[0]); + dst_f8x->f1[1] = fmaf(src_f1, filterX[4], dst_f8x->f1[1]); + dst_f8x->f1[2] = fmaf(src_f1, filterX[3], dst_f8x->f1[2]); + dst_f8x->f1[3] = fmaf(src_f1, filterX[2], dst_f8x->f1[3]); + dst_f8x->f1[4] = fmaf(src_f1, filterX[1], dst_f8x->f1[4]); + dst_f8x->f1[5] = fmaf(src_f1, filterX[0], dst_f8x->f1[5]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[5], dst_f8y->f1[0]); + dst_f8y->f1[1] = fmaf(src_f1, filterY[4], dst_f8y->f1[1]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[3], dst_f8y->f1[2]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[2], dst_f8y->f1[3]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[1], dst_f8y->f1[4]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[0], dst_f8y->f1[5]); + src_f1 = rpp_hip_unpack2(src_ui4.y); + dst_f8x->f1[0] = fmaf(src_f1, filterX[6], dst_f8x->f1[0]); + dst_f8x->f1[1] = fmaf(src_f1, filterX[5], dst_f8x->f1[1]); + dst_f8x->f1[2] = fmaf(src_f1, filterX[4], dst_f8x->f1[2]); + dst_f8x->f1[3] = fmaf(src_f1, filterX[3], dst_f8x->f1[3]); + dst_f8x->f1[4] = fmaf(src_f1, filterX[2], dst_f8x->f1[4]); + dst_f8x->f1[5] = fmaf(src_f1, filterX[1], dst_f8x->f1[5]); + dst_f8x->f1[6] = fmaf(src_f1, filterX[0], dst_f8x->f1[6]); + dst_f8y->f1[0] = fmaf(src_f1, filterY[6], dst_f8y->f1[0]); + dst_f8y->f1[1] = fmaf(src_f1, filterY[5], dst_f8y->f1[1]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[4], dst_f8y->f1[2]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[3], dst_f8y->f1[3]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[2], dst_f8y->f1[4]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[1], dst_f8y->f1[5]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[0], dst_f8y->f1[6]); + src_f1 = rpp_hip_unpack3(src_ui4.y); + dst_f8x->f1[1] = fmaf(src_f1, filterX[6], dst_f8x->f1[1]); + dst_f8x->f1[2] = fmaf(src_f1, filterX[5], dst_f8x->f1[2]); + dst_f8x->f1[3] = fmaf(src_f1, filterX[4], dst_f8x->f1[3]); + dst_f8x->f1[4] = fmaf(src_f1, filterX[3], dst_f8x->f1[4]); + dst_f8x->f1[5] = fmaf(src_f1, filterX[2], dst_f8x->f1[5]); + dst_f8x->f1[6] = fmaf(src_f1, filterX[1], dst_f8x->f1[6]); + dst_f8x->f1[7] = fmaf(src_f1, filterX[0], dst_f8x->f1[7]); + dst_f8y->f1[1] = fmaf(src_f1, filterY[6], dst_f8y->f1[1]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[5], dst_f8y->f1[2]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[4], dst_f8y->f1[3]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[3], dst_f8y->f1[4]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[2], dst_f8y->f1[5]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[1], dst_f8y->f1[6]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[0], dst_f8y->f1[7]); + src_f1 = rpp_hip_unpack0(src_ui4.z); + dst_f8x->f1[2] = fmaf(src_f1, filterX[6], dst_f8x->f1[2]); + dst_f8x->f1[3] = fmaf(src_f1, filterX[5], dst_f8x->f1[3]); + dst_f8x->f1[4] = fmaf(src_f1, filterX[4], dst_f8x->f1[4]); + dst_f8x->f1[5] = fmaf(src_f1, filterX[3], dst_f8x->f1[5]); + dst_f8x->f1[6] = fmaf(src_f1, filterX[2], dst_f8x->f1[6]); + dst_f8x->f1[7] = fmaf(src_f1, filterX[1], dst_f8x->f1[7]); + dst_f8y->f1[2] = fmaf(src_f1, filterY[6], dst_f8y->f1[2]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[5], dst_f8y->f1[3]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[4], dst_f8y->f1[4]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[3], dst_f8y->f1[5]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[2], dst_f8y->f1[6]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[1], dst_f8y->f1[7]); + src_f1 = rpp_hip_unpack1(src_ui4.z); + dst_f8x->f1[3] = fmaf(src_f1, filterX[6], dst_f8x->f1[3]); + dst_f8x->f1[4] = fmaf(src_f1, filterX[5], dst_f8x->f1[4]); + dst_f8x->f1[5] = fmaf(src_f1, filterX[4], dst_f8x->f1[5]); + dst_f8x->f1[6] = fmaf(src_f1, filterX[3], dst_f8x->f1[6]); + dst_f8x->f1[7] = fmaf(src_f1, filterX[2], dst_f8x->f1[7]); + dst_f8y->f1[3] = fmaf(src_f1, filterY[6], dst_f8y->f1[3]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[5], dst_f8y->f1[4]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[4], dst_f8y->f1[5]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[3], dst_f8y->f1[6]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[2], dst_f8y->f1[7]); + src_f1 = rpp_hip_unpack2(src_ui4.z); + dst_f8x->f1[4] = fmaf(src_f1, filterX[6], dst_f8x->f1[4]); + dst_f8x->f1[5] = fmaf(src_f1, filterX[5], dst_f8x->f1[5]); + dst_f8x->f1[6] = fmaf(src_f1, filterX[4], dst_f8x->f1[6]); + dst_f8x->f1[7] = fmaf(src_f1, filterX[3], dst_f8x->f1[7]); + dst_f8y->f1[4] = fmaf(src_f1, filterY[6], dst_f8y->f1[4]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[5], dst_f8y->f1[5]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[4], dst_f8y->f1[6]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[3], dst_f8y->f1[7]); + src_f1 = rpp_hip_unpack3(src_ui4.z); + dst_f8x->f1[5] = fmaf(src_f1, filterX[6], dst_f8x->f1[5]); + dst_f8x->f1[6] = fmaf(src_f1, filterX[5], dst_f8x->f1[6]); + dst_f8x->f1[7] = fmaf(src_f1, filterX[4], dst_f8x->f1[7]); + dst_f8y->f1[5] = fmaf(src_f1, filterY[6], dst_f8y->f1[5]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[5], dst_f8y->f1[6]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[4], dst_f8y->f1[7]); + src_f1 = rpp_hip_unpack0(src_ui4.w); + dst_f8x->f1[6] = fmaf(src_f1, filterX[6], dst_f8x->f1[6]); + dst_f8x->f1[7] = fmaf(src_f1, filterX[5], dst_f8x->f1[7]); + dst_f8y->f1[6] = fmaf(src_f1, filterY[6], dst_f8y->f1[6]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[5], dst_f8y->f1[7]); + src_f1 = rpp_hip_unpack1(src_ui4.w); + dst_f8x->f1[7] = fmaf(src_f1, filterX[6], dst_f8x->f1[7]); + dst_f8y->f1[7] = fmaf(src_f1, filterY[6], dst_f8y->f1[7]); +} + template __global__ void sobel_filter_3x3_pln_bidirection_tensor(T *srcPtr, uint3 srcStridesNCH, @@ -427,6 +649,80 @@ __global__ void sobel_filter_5x5_pln_bidirection_tensor(T *srcPtr, } } +template +__global__ void sobel_filter_7x7_pln_bidirection_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint3 dstStridesNCH, + int channelsDst, + uint padLength, + uint2 tileSize, + RpptROIPtr roiTensorPtrSrc) +{ + int hipThreadIdx_x8 = hipThreadIdx_x << 3; + int id_x_o = (hipBlockIdx_x * tileSize.x * 8) + hipThreadIdx_x8; + int id_y_o = hipBlockIdx_y * tileSize.y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + int id_x_i = id_x_o - padLength; + int id_y_i = id_y_o - padLength; + + d_float8 sum_f8x, sum_f8y, sum_f8; + __shared__ uchar src_smem[SMEM_LENGTH_Y_1C][SMEM_LENGTH_X]; + + int srcIdx = (id_z * srcStridesNCH.x) + ((id_y_i + roiTensorPtrSrc[id_z].xywhROI.xy.y) * srcStridesNCH.z) + (id_x_i + roiTensorPtrSrc[id_z].xywhROI.xy.x); + int dstIdx = (id_z * dstStridesNCH.x) + (id_y_o * dstStridesNCH.z) + id_x_o; + float *filterRowX1 = &sobel7x7XHip[0]; + float *filterRowX2 = &filterRowX1[7]; + float *filterRowX3 = &filterRowX1[14]; + float *filterRowX4 = &filterRowX1[21]; + float *filterRowX5 = &filterRowX1[28]; + float *filterRowX6 = &filterRowX1[35]; + float *filterRowX7 = &filterRowX1[42]; + float *filterRowY1 = &sobel7x7YHip[0]; + float *filterRowY2 = &filterRowY1[7]; + float *filterRowY3 = &filterRowY1[14]; + float *filterRowY4 = &filterRowY1[21]; + float *filterRowY5 = &filterRowY1[28]; + float *filterRowY6 = &filterRowY1[35]; + float *filterRowY7 = &filterRowY1[42]; + sum_f8x.f4[0] = static_cast(0); + sum_f8x.f4[1] = static_cast(0); + sum_f8y.f4[0] = static_cast(0); + sum_f8y.f4[1] = static_cast(0); + if ((id_x_i >= -(int)padLength) && (id_x_i < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_i >= 0) && (id_y_i < roiTensorPtrSrc[id_z].xywhROI.roiHeight)) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + else if(id_y_i < 0) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx + 2 * srcStridesNCH.z, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + if(id_x_i < 0) + { + src_smem[hipThreadIdx_y][0] = src_smem[hipThreadIdx_y][3]; + src_smem[hipThreadIdx_y][1] = src_smem[hipThreadIdx_y][3]; + src_smem[hipThreadIdx_y][2] = src_smem[hipThreadIdx_y][3]; + } + __syncthreads(); + if ((id_x_o < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_o < roiTensorPtrSrc[id_z].xywhROI.roiHeight) && + (hipThreadIdx_x < tileSize.x) && + (hipThreadIdx_y < tileSize.y)) + { + sobel_filter_7x7_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX1, filterRowY1); + sobel_filter_7x7_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX2, filterRowY2); + sobel_filter_7x7_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX3, filterRowY3); + sobel_filter_7x7_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 3][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX4, filterRowY4); + sobel_filter_7x7_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 4][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX5, filterRowY5); + sobel_filter_7x7_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 5][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX6, filterRowY6); + sobel_filter_7x7_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 6][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX7, filterRowY7); + rpp_hip_pixel_check_0to255(&sum_f8x); + rpp_hip_pixel_check_0to255(&sum_f8y); + rpp_hip_adjust_range(dstPtr, &sum_f8x); + rpp_hip_adjust_range(dstPtr, &sum_f8y); + sobel_filter_bidirection_hip_compute(&sum_f8x, &sum_f8y, &sum_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &sum_f8); + } +} + template __global__ void sobel_filter_3x3_pln_unidirection_tensor(T *srcPtr, uint3 srcStridesNCH, @@ -535,6 +831,68 @@ __global__ void sobel_filter_5x5_pln_unidirection_tensor(T *srcPtr, } } +template +__global__ void sobel_filter_7x7_pln_unidirection_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint3 dstStridesNCH, + int channelsDst, + uint padLength, + uint2 tileSize, + RpptROIPtr roiTensorPtrSrc, + int sobelType) +{ + int hipThreadIdx_x8 = hipThreadIdx_x << 3; + int id_x_o = (hipBlockIdx_x * tileSize.x * 8) + hipThreadIdx_x8; + int id_y_o = hipBlockIdx_y * tileSize.y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + int id_x_i = id_x_o - padLength; + int id_y_i = id_y_o - padLength; + d_float8 sum_f8; + __shared__ uchar src_smem[SMEM_LENGTH_Y_1C][SMEM_LENGTH_X]; + + int srcIdx = (id_z * srcStridesNCH.x) + ((id_y_i + roiTensorPtrSrc[id_z].xywhROI.xy.y) * srcStridesNCH.z) + (id_x_i + roiTensorPtrSrc[id_z].xywhROI.xy.x); + int dstIdx = (id_z * dstStridesNCH.x) + (id_y_o * dstStridesNCH.z) + id_x_o; + float *filter_row1 = (!sobelType) ? sobel7x7XHip : sobel7x7YHip; + float *filter_row2 = &filter_row1[7]; + float *filter_row3 = &filter_row1[14]; + float *filter_row4 = &filter_row1[21]; + float *filter_row5 = &filter_row1[28]; + float *filter_row6 = &filter_row1[35]; + float *filter_row7 = &filter_row1[42]; + sum_f8.f4[0] = static_cast(0); + sum_f8.f4[1] = static_cast(0); + if ((id_x_i >= -(int)padLength) && (id_x_i < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_i >= 0) && (id_y_i < roiTensorPtrSrc[id_z].xywhROI.roiHeight)) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + else if(id_y_i < 0) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx + 2 * srcStridesNCH.z, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + if(id_x_i < 0) + { + src_smem[hipThreadIdx_y][0] = src_smem[hipThreadIdx_y][3]; + src_smem[hipThreadIdx_y][1] = src_smem[hipThreadIdx_y][3]; + src_smem[hipThreadIdx_y][2] = src_smem[hipThreadIdx_y][3]; + } + __syncthreads(); + if ((id_x_o < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_o < roiTensorPtrSrc[id_z].xywhROI.roiHeight) && + (hipThreadIdx_x < tileSize.x) && + (hipThreadIdx_y < tileSize.y)) + { + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8, filter_row1); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8, filter_row2); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8, filter_row3); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 3][hipThreadIdx_x8], &sum_f8, filter_row4); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 4][hipThreadIdx_x8], &sum_f8, filter_row5); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 5][hipThreadIdx_x8], &sum_f8, filter_row6); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 6][hipThreadIdx_x8], &sum_f8, filter_row7); + rpp_hip_pixel_check_0to255(&sum_f8); + rpp_hip_adjust_range(dstPtr, &sum_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &sum_f8); + } +} + template RppStatus hip_exec_sobel_filter_tensor(T *srcPtr, RpptDescPtr srcDescPtr, @@ -632,6 +990,42 @@ RppStatus hip_exec_sobel_filter_tensor(T *srcPtr, sobelType); } } + else if (kernelSize == 7) + { + if(combined) + { + hipLaunchKernelGGL(sobel_filter_7x7_pln_bidirection_tensor, + dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + dstDescPtr->c, + padLength, + tileSize, + roiTensorPtrSrc); + } + else + { + hipLaunchKernelGGL(sobel_filter_7x7_pln_unidirection_tensor, + dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + dstDescPtr->c, + padLength, + tileSize, + roiTensorPtrSrc, + sobelType); + } + } return RPP_SUCCESS; } \ No newline at end of file From 036317c81949f266782e251c7f284c9c30a6d7de Mon Sep 17 00:00:00 2001 From: HazarathKumarM Date: Tue, 24 Sep 2024 17:04:39 +0000 Subject: [PATCH 30/31] Add version changes , update maps in common.py and code cleanup --- CHANGELOG.md | 4 +- CMakeLists.txt | 2 +- include/rpp_version.h | 2 +- src/modules/hip/kernel/sobel_filter.hpp | 756 ++++++++---------- .../rppt_tensor_filter_augmentations.cpp | 19 +- utilities/test_suite/common.py | 3 +- 6 files changed, 366 insertions(+), 420 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3649793a3..44a35c9ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,11 @@ Full documentation for RPP is available at [https://rocm.docs.amd.com/projects/rpp/en/latest](https://rocm.docs.amd.com/projects/rpp/en/latest) -## RPP 1.10.0 (unreleased) +## RPP 1.10.1 (unreleased) ### Changes -* RPP Tensor Sobel Filter support on HOST +* RPP Tensor Sobel Filter support on HIP ## RPP 1.9.1 for ROCm 6.3.0 diff --git a/CMakeLists.txt b/CMakeLists.txt index 9726b2365..ea064f4c4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,7 +29,7 @@ endif() set(CMAKE_CXX_STANDARD 17) # RPP Version -set(VERSION "1.10.0") +set(VERSION "1.10.1") # Set Project Version and Language project(rpp VERSION ${VERSION} LANGUAGES CXX) diff --git a/include/rpp_version.h b/include/rpp_version.h index 94ba02c27..490868183 100644 --- a/include/rpp_version.h +++ b/include/rpp_version.h @@ -40,7 +40,7 @@ extern "C" { // NOTE: IMPORTANT: Match the version with CMakelists.txt version #define RPP_VERSION_MAJOR 1 #define RPP_VERSION_MINOR 10 -#define RPP_VERSION_PATCH 0 +#define RPP_VERSION_PATCH 1 #ifdef __cplusplus } #endif diff --git a/src/modules/hip/kernel/sobel_filter.hpp b/src/modules/hip/kernel/sobel_filter.hpp index 6e4d9cd27..303aecc8b 100644 --- a/src/modules/hip/kernel/sobel_filter.hpp +++ b/src/modules/hip/kernel/sobel_filter.hpp @@ -2,35 +2,35 @@ #include "rpp_hip_common.hpp" __device__ __constant__ float sobel3x3XHip[9] = {-1, 0, 1, - -2, 0, 2, - -1, 0, 1}; + -2, 0, 2, + -1, 0, 1}; __device__ __constant__ float sobel3x3YHip[9] = {-1, -2, -1, 0, 0, 0, 1, 2, 1}; __device__ __constant__ float sobel5x5XHip[25] = {-1, -2, 0, 2, 1, - -4, -8, 0, 8, 4, - -6, -12, 0, 12, 6, - -4, -8, 0, 8, 4, - -1, -2, 0, 2, 1}; + -4, -8, 0, 8, 4, + -6, -12, 0, 12, 6, + -4, -8, 0, 8, 4, + -1, -2, 0, 2, 1}; __device__ __constant__ float sobel5x5YHip[25] = {-1, -4, -6, -4, -1, -2, -8, -12, -8, -2, - 0, 0, 0, 0, 0, - 2, 8, 12, 8, 2, - 1, 4, 6, 4, 1}; + 0, 0, 0, 0, 0, + 2, 8, 12, 8, 2, + 1, 4, 6, 4, 1}; __device__ __constant__ float sobel7x7XHip[49] = {-1, -4, -5, 0, 5, 4, 1, - -6, -24, -30, 0, 30, 24, 6, - -15, -60, -75, 0, 75, 60, 15, - -20, -80, -100, 0, 100, 80, 20, - -15, -60, -75, 0, 75, 60, 15, - -6, -24, -30, 0, 30, 24, 6, - -1, -4, -5, 0, 5, 4, 1}; + -6, -24, -30, 0, 30, 24, 6, + -15, -60, -75, 0, 75, 60, 15, + -20, -80, -100, 0, 100, 80, 20, + -15, -60, -75, 0, 75, 60, 15, + -6, -24, -30, 0, 30, 24, 6, + -1, -4, -5, 0, 5, 4, 1}; __device__ __constant__ float sobel7x7YHip[49] = {-1, -6, -15, -20, -15, -6, -1, - -4, -24, -60, -80, -60, -24, -4, - -5, -30, -75, -100, -75, -30, -5, - 0, 0, 0, 0, 0, 0, 0, - 5, 30, 75, 100, 75, 30, 5, - 4, 24, 60, 80, 60, 24, 4, - 1, 6, 15, 20, 15, 6, 1}; + -4, -24, -60, -80, -60, -24, -4, + -5, -30, -75, -100, -75, -30, -5, + 0, 0, 0, 0, 0, 0, 0, + 5, 30, 75, 100, 75, 30, 5, + 4, 24, 60, 80, 60, 24, 4, + 1, 6, 15, 20, 15, 6, 1}; // -------------------- sobel_filter device helpers -------------------- @@ -83,80 +83,6 @@ __device__ __forceinline__ void sobel_filter_3x3_row_hip_compute(uchar *srcPtr, dst_f8->f1[7] = fmaf(src_f1, filter[2], dst_f8->f1[7]); } -__device__ __forceinline__ void sobel_filter_3x3_bidirectional_row_hip_compute(uchar *srcPtr, d_float8 *dst_f8x, d_float8 *dst_f8y, float *filterX, float *filterY) -{ - float src_f1; - uint3 src_ui3; - src_ui3 = *(reinterpret_cast(srcPtr)); - src_f1 = rpp_hip_unpack0(src_ui3.x); - dst_f8x->f1[0] = fmaf(src_f1, filterX[0], dst_f8x->f1[0]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[0], dst_f8y->f1[0]); - - src_f1 = rpp_hip_unpack1(src_ui3.x); - dst_f8x->f1[0] = fmaf(src_f1, filterX[1], dst_f8x->f1[0]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[1], dst_f8y->f1[0]); - dst_f8x->f1[1] = fmaf(src_f1, filterX[0], dst_f8x->f1[1]); - dst_f8y->f1[1] = fmaf(src_f1, filterY[0], dst_f8y->f1[1]); - - src_f1 = rpp_hip_unpack2(src_ui3.x); - dst_f8x->f1[0] = fmaf(src_f1, filterX[2], dst_f8x->f1[0]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[2], dst_f8y->f1[0]); - dst_f8x->f1[1] = fmaf(src_f1, filterX[1], dst_f8x->f1[1]); - dst_f8y->f1[1] = fmaf(src_f1, filterY[1], dst_f8y->f1[1]); - dst_f8x->f1[2] = fmaf(src_f1, filterX[0], dst_f8x->f1[2]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[0], dst_f8y->f1[2]); - - src_f1 = rpp_hip_unpack3(src_ui3.x); - dst_f8x->f1[1] = fmaf(src_f1, filterX[2], dst_f8x->f1[1]); - dst_f8y->f1[1] = fmaf(src_f1, filterY[2], dst_f8y->f1[1]); - dst_f8x->f1[2] = fmaf(src_f1, filterX[1], dst_f8x->f1[2]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[1], dst_f8y->f1[2]); - dst_f8x->f1[3] = fmaf(src_f1, filterX[0], dst_f8x->f1[3]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[0], dst_f8y->f1[3]); - - src_f1 = rpp_hip_unpack0(src_ui3.y); - dst_f8x->f1[2] = fmaf(src_f1, filterX[2], dst_f8x->f1[2]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[2], dst_f8y->f1[2]); - dst_f8x->f1[3] = fmaf(src_f1, filterX[1], dst_f8x->f1[3]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[1], dst_f8y->f1[3]); - dst_f8x->f1[4] = fmaf(src_f1, filterX[0], dst_f8x->f1[4]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[0], dst_f8y->f1[4]); - - src_f1 = rpp_hip_unpack1(src_ui3.y); - dst_f8x->f1[3] = fmaf(src_f1, filterX[2], dst_f8x->f1[3]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[2], dst_f8y->f1[3]); - dst_f8x->f1[4] = fmaf(src_f1, filterX[1], dst_f8x->f1[4]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[1], dst_f8y->f1[4]); - dst_f8x->f1[5] = fmaf(src_f1, filterX[0], dst_f8x->f1[5]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[0], dst_f8y->f1[5]); - - src_f1 = rpp_hip_unpack2(src_ui3.y); - dst_f8x->f1[4] = fmaf(src_f1, filterX[2], dst_f8x->f1[4]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[2], dst_f8y->f1[4]); - dst_f8x->f1[5] = fmaf(src_f1, filterX[1], dst_f8x->f1[5]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[1], dst_f8y->f1[5]); - dst_f8x->f1[6] = fmaf(src_f1, filterX[0], dst_f8x->f1[6]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[0], dst_f8y->f1[6]); - - src_f1 = rpp_hip_unpack3(src_ui3.y); - dst_f8x->f1[5] = fmaf(src_f1, filterX[2], dst_f8x->f1[5]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[2], dst_f8y->f1[5]); - dst_f8x->f1[6] = fmaf(src_f1, filterX[1], dst_f8x->f1[6]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[1], dst_f8y->f1[6]); - dst_f8x->f1[7] = fmaf(src_f1, filterX[0], dst_f8x->f1[7]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[0], dst_f8y->f1[7]); - - src_f1 = rpp_hip_unpack0(src_ui3.z); - dst_f8x->f1[6] = fmaf(src_f1, filterX[2], dst_f8x->f1[6]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[2], dst_f8y->f1[6]); - dst_f8x->f1[7] = fmaf(src_f1, filterX[1], dst_f8x->f1[7]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[1], dst_f8y->f1[7]); - - src_f1 = rpp_hip_unpack1(src_ui3.z); - dst_f8x->f1[7] = fmaf(src_f1, filterX[2], dst_f8x->f1[7]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[2], dst_f8y->f1[7]); -} - __device__ void sobel_filter_5x5_row_hip_compute(uchar *srcPtr, d_float8 *dst_f8, float *filter) { float src_f1; @@ -216,105 +142,6 @@ __device__ void sobel_filter_5x5_row_hip_compute(uchar *srcPtr, d_float8 *dst_f8 dst_f8->f1[7] = fmaf(src_f1, filter[4], dst_f8->f1[7]); } -__device__ void sobel_filter_5x5_bidirectional_row_hip_compute(uchar *srcPtr, d_float8 *dst_f8x, d_float8 *dst_f8y, float *filterX, float *filterY) -{ - float src_f1; - uint3 src_ui3; - src_ui3 = *(reinterpret_cast(srcPtr)); - src_f1 = rpp_hip_unpack0(src_ui3.x); - dst_f8x->f1[0] = fmaf(src_f1, filterX[0], dst_f8x->f1[0]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[0], dst_f8y->f1[0]); - src_f1 = rpp_hip_unpack1(src_ui3.x); - dst_f8x->f1[0] = fmaf(src_f1, filterX[1], dst_f8x->f1[0]); - dst_f8x->f1[1] = fmaf(src_f1, filterX[0], dst_f8x->f1[1]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[1], dst_f8y->f1[0]); - dst_f8y->f1[1] = fmaf(src_f1, filterX[0], dst_f8y->f1[1]); - src_f1 = rpp_hip_unpack2(src_ui3.x); - dst_f8x->f1[0] = fmaf(src_f1, filterX[2], dst_f8x->f1[0]); - dst_f8x->f1[1] = fmaf(src_f1, filterX[1], dst_f8x->f1[1]); - dst_f8x->f1[2] = fmaf(src_f1, filterX[0], dst_f8x->f1[2]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[2], dst_f8y->f1[0]); - dst_f8y->f1[1] = fmaf(src_f1, filterY[1], dst_f8y->f1[1]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[0], dst_f8y->f1[2]); - src_f1 = rpp_hip_unpack3(src_ui3.x); - dst_f8x->f1[0] = fmaf(src_f1, filterX[3], dst_f8x->f1[0]); - dst_f8x->f1[1] = fmaf(src_f1, filterX[2], dst_f8x->f1[1]); - dst_f8x->f1[2] = fmaf(src_f1, filterX[1], dst_f8x->f1[2]); - dst_f8x->f1[3] = fmaf(src_f1, filterX[0], dst_f8x->f1[3]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[3], dst_f8y->f1[0]); - dst_f8y->f1[1] = fmaf(src_f1, filterY[2], dst_f8y->f1[1]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[1], dst_f8y->f1[2]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[0], dst_f8y->f1[3]); - src_f1 = rpp_hip_unpack0(src_ui3.y); - dst_f8x->f1[0] = fmaf(src_f1, filterX[4], dst_f8x->f1[0]); - dst_f8x->f1[1] = fmaf(src_f1, filterX[3], dst_f8x->f1[1]); - dst_f8x->f1[2] = fmaf(src_f1, filterX[2], dst_f8x->f1[2]); - dst_f8x->f1[3] = fmaf(src_f1, filterX[1], dst_f8x->f1[3]); - dst_f8x->f1[4] = fmaf(src_f1, filterX[0], dst_f8x->f1[4]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[4], dst_f8y->f1[0]); - dst_f8y->f1[1] = fmaf(src_f1, filterY[3], dst_f8y->f1[1]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[2], dst_f8y->f1[2]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[1], dst_f8y->f1[3]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[0], dst_f8y->f1[4]); - src_f1 = rpp_hip_unpack1(src_ui3.y); - dst_f8x->f1[1] = fmaf(src_f1, filterX[4], dst_f8x->f1[1]); - dst_f8x->f1[2] = fmaf(src_f1, filterX[3], dst_f8x->f1[2]); - dst_f8x->f1[3] = fmaf(src_f1, filterX[2], dst_f8x->f1[3]); - dst_f8x->f1[4] = fmaf(src_f1, filterX[1], dst_f8x->f1[4]); - dst_f8x->f1[5] = fmaf(src_f1, filterX[0], dst_f8x->f1[5]); - dst_f8y->f1[1] = fmaf(src_f1, filterY[4], dst_f8y->f1[1]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[3], dst_f8y->f1[2]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[2], dst_f8y->f1[3]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[1], dst_f8y->f1[4]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[0], dst_f8y->f1[5]); - src_f1 = rpp_hip_unpack2(src_ui3.y); - dst_f8x->f1[2] = fmaf(src_f1, filterX[4], dst_f8x->f1[2]); - dst_f8x->f1[3] = fmaf(src_f1, filterX[3], dst_f8x->f1[3]); - dst_f8x->f1[4] = fmaf(src_f1, filterX[2], dst_f8x->f1[4]); - dst_f8x->f1[5] = fmaf(src_f1, filterX[1], dst_f8x->f1[5]); - dst_f8x->f1[6] = fmaf(src_f1, filterX[0], dst_f8x->f1[6]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[4], dst_f8y->f1[2]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[3], dst_f8y->f1[3]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[2], dst_f8y->f1[4]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[1], dst_f8y->f1[5]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[0], dst_f8y->f1[6]); - src_f1 = rpp_hip_unpack3(src_ui3.y); - dst_f8x->f1[3] = fmaf(src_f1, filterX[4], dst_f8x->f1[3]); - dst_f8x->f1[4] = fmaf(src_f1, filterX[3], dst_f8x->f1[4]); - dst_f8x->f1[5] = fmaf(src_f1, filterX[2], dst_f8x->f1[5]); - dst_f8x->f1[6] = fmaf(src_f1, filterX[1], dst_f8x->f1[6]); - dst_f8x->f1[7] = fmaf(src_f1, filterX[0], dst_f8x->f1[7]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[4], dst_f8y->f1[3]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[3], dst_f8y->f1[4]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[2], dst_f8y->f1[5]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[1], dst_f8y->f1[6]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[0], dst_f8y->f1[7]); - src_f1 = rpp_hip_unpack0(src_ui3.z); - dst_f8x->f1[4] = fmaf(src_f1, filterX[4], dst_f8x->f1[4]); - dst_f8x->f1[5] = fmaf(src_f1, filterX[3], dst_f8x->f1[5]); - dst_f8x->f1[6] = fmaf(src_f1, filterX[2], dst_f8x->f1[6]); - dst_f8x->f1[7] = fmaf(src_f1, filterX[1], dst_f8x->f1[7]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[4], dst_f8y->f1[4]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[3], dst_f8y->f1[5]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[2], dst_f8y->f1[6]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[1], dst_f8y->f1[7]); - src_f1 = rpp_hip_unpack1(src_ui3.z); - dst_f8x->f1[5] = fmaf(src_f1, filterX[4], dst_f8x->f1[5]); - dst_f8x->f1[6] = fmaf(src_f1, filterX[3], dst_f8x->f1[6]); - dst_f8x->f1[7] = fmaf(src_f1, filterX[2], dst_f8x->f1[7]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[4], dst_f8y->f1[5]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[3], dst_f8y->f1[6]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[2], dst_f8y->f1[7]); - src_f1 = rpp_hip_unpack2(src_ui3.z); - dst_f8x->f1[6] = fmaf(src_f1, filterX[4], dst_f8x->f1[6]); - dst_f8x->f1[7] = fmaf(src_f1, filterX[3], dst_f8x->f1[7]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[4], dst_f8y->f1[6]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[3], dst_f8y->f1[7]); - src_f1 = rpp_hip_unpack3(src_ui3.z); - dst_f8x->f1[7] = fmaf(src_f1, filterX[4], dst_f8x->f1[7]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[4], dst_f8y->f1[7]); -} - __device__ void sobel_filter_7x7_row_hip_compute(uchar *srcPtr, d_float8 *dst_f8, float *filter) { float src_f1; @@ -391,138 +218,6 @@ __device__ void sobel_filter_7x7_row_hip_compute(uchar *srcPtr, d_float8 *dst_f8 dst_f8->f1[7] = fmaf(src_f1, filter[6], dst_f8->f1[7]); } -__device__ void sobel_filter_7x7_bidirectional_row_hip_compute(uchar *srcPtr, d_float8 *dst_f8x, d_float8 *dst_f8y, float *filterX, float *filterY) -{ - float src_f1; - uint4 src_ui4 = *(reinterpret_cast(srcPtr)); - src_f1 = rpp_hip_unpack0(src_ui4.x); - dst_f8x->f1[0] = fmaf(src_f1, filterX[0], dst_f8x->f1[0]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[0], dst_f8y->f1[0]); - src_f1 = rpp_hip_unpack1(src_ui4.x); - dst_f8x->f1[0] = fmaf(src_f1, filterX[1], dst_f8x->f1[0]); - dst_f8x->f1[1] = fmaf(src_f1, filterX[0], dst_f8x->f1[1]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[1], dst_f8y->f1[0]); - dst_f8y->f1[1] = fmaf(src_f1, filterX[0], dst_f8y->f1[1]); - src_f1 = rpp_hip_unpack2(src_ui4.x); - dst_f8x->f1[0] = fmaf(src_f1, filterX[2], dst_f8x->f1[0]); - dst_f8x->f1[1] = fmaf(src_f1, filterX[1], dst_f8x->f1[1]); - dst_f8x->f1[2] = fmaf(src_f1, filterX[0], dst_f8x->f1[2]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[2], dst_f8y->f1[0]); - dst_f8y->f1[1] = fmaf(src_f1, filterY[1], dst_f8y->f1[1]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[0], dst_f8y->f1[2]); - src_f1 = rpp_hip_unpack3(src_ui4.x); - dst_f8x->f1[0] = fmaf(src_f1, filterX[3], dst_f8x->f1[0]); - dst_f8x->f1[1] = fmaf(src_f1, filterX[2], dst_f8x->f1[1]); - dst_f8x->f1[2] = fmaf(src_f1, filterX[1], dst_f8x->f1[2]); - dst_f8x->f1[3] = fmaf(src_f1, filterX[0], dst_f8x->f1[3]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[3], dst_f8y->f1[0]); - dst_f8y->f1[1] = fmaf(src_f1, filterY[2], dst_f8y->f1[1]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[1], dst_f8y->f1[2]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[0], dst_f8y->f1[3]); - src_f1 = rpp_hip_unpack0(src_ui4.y); - dst_f8x->f1[0] = fmaf(src_f1, filterX[4], dst_f8x->f1[0]); - dst_f8x->f1[1] = fmaf(src_f1, filterX[3], dst_f8x->f1[1]); - dst_f8x->f1[2] = fmaf(src_f1, filterX[2], dst_f8x->f1[2]); - dst_f8x->f1[3] = fmaf(src_f1, filterX[1], dst_f8x->f1[3]); - dst_f8x->f1[4] = fmaf(src_f1, filterX[0], dst_f8x->f1[4]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[4], dst_f8y->f1[0]); - dst_f8y->f1[1] = fmaf(src_f1, filterY[3], dst_f8y->f1[1]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[2], dst_f8y->f1[2]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[1], dst_f8y->f1[3]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[0], dst_f8y->f1[4]); - src_f1 = rpp_hip_unpack1(src_ui4.y); - dst_f8x->f1[0] = fmaf(src_f1, filterX[5], dst_f8x->f1[0]); - dst_f8x->f1[1] = fmaf(src_f1, filterX[4], dst_f8x->f1[1]); - dst_f8x->f1[2] = fmaf(src_f1, filterX[3], dst_f8x->f1[2]); - dst_f8x->f1[3] = fmaf(src_f1, filterX[2], dst_f8x->f1[3]); - dst_f8x->f1[4] = fmaf(src_f1, filterX[1], dst_f8x->f1[4]); - dst_f8x->f1[5] = fmaf(src_f1, filterX[0], dst_f8x->f1[5]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[5], dst_f8y->f1[0]); - dst_f8y->f1[1] = fmaf(src_f1, filterY[4], dst_f8y->f1[1]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[3], dst_f8y->f1[2]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[2], dst_f8y->f1[3]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[1], dst_f8y->f1[4]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[0], dst_f8y->f1[5]); - src_f1 = rpp_hip_unpack2(src_ui4.y); - dst_f8x->f1[0] = fmaf(src_f1, filterX[6], dst_f8x->f1[0]); - dst_f8x->f1[1] = fmaf(src_f1, filterX[5], dst_f8x->f1[1]); - dst_f8x->f1[2] = fmaf(src_f1, filterX[4], dst_f8x->f1[2]); - dst_f8x->f1[3] = fmaf(src_f1, filterX[3], dst_f8x->f1[3]); - dst_f8x->f1[4] = fmaf(src_f1, filterX[2], dst_f8x->f1[4]); - dst_f8x->f1[5] = fmaf(src_f1, filterX[1], dst_f8x->f1[5]); - dst_f8x->f1[6] = fmaf(src_f1, filterX[0], dst_f8x->f1[6]); - dst_f8y->f1[0] = fmaf(src_f1, filterY[6], dst_f8y->f1[0]); - dst_f8y->f1[1] = fmaf(src_f1, filterY[5], dst_f8y->f1[1]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[4], dst_f8y->f1[2]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[3], dst_f8y->f1[3]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[2], dst_f8y->f1[4]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[1], dst_f8y->f1[5]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[0], dst_f8y->f1[6]); - src_f1 = rpp_hip_unpack3(src_ui4.y); - dst_f8x->f1[1] = fmaf(src_f1, filterX[6], dst_f8x->f1[1]); - dst_f8x->f1[2] = fmaf(src_f1, filterX[5], dst_f8x->f1[2]); - dst_f8x->f1[3] = fmaf(src_f1, filterX[4], dst_f8x->f1[3]); - dst_f8x->f1[4] = fmaf(src_f1, filterX[3], dst_f8x->f1[4]); - dst_f8x->f1[5] = fmaf(src_f1, filterX[2], dst_f8x->f1[5]); - dst_f8x->f1[6] = fmaf(src_f1, filterX[1], dst_f8x->f1[6]); - dst_f8x->f1[7] = fmaf(src_f1, filterX[0], dst_f8x->f1[7]); - dst_f8y->f1[1] = fmaf(src_f1, filterY[6], dst_f8y->f1[1]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[5], dst_f8y->f1[2]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[4], dst_f8y->f1[3]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[3], dst_f8y->f1[4]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[2], dst_f8y->f1[5]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[1], dst_f8y->f1[6]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[0], dst_f8y->f1[7]); - src_f1 = rpp_hip_unpack0(src_ui4.z); - dst_f8x->f1[2] = fmaf(src_f1, filterX[6], dst_f8x->f1[2]); - dst_f8x->f1[3] = fmaf(src_f1, filterX[5], dst_f8x->f1[3]); - dst_f8x->f1[4] = fmaf(src_f1, filterX[4], dst_f8x->f1[4]); - dst_f8x->f1[5] = fmaf(src_f1, filterX[3], dst_f8x->f1[5]); - dst_f8x->f1[6] = fmaf(src_f1, filterX[2], dst_f8x->f1[6]); - dst_f8x->f1[7] = fmaf(src_f1, filterX[1], dst_f8x->f1[7]); - dst_f8y->f1[2] = fmaf(src_f1, filterY[6], dst_f8y->f1[2]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[5], dst_f8y->f1[3]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[4], dst_f8y->f1[4]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[3], dst_f8y->f1[5]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[2], dst_f8y->f1[6]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[1], dst_f8y->f1[7]); - src_f1 = rpp_hip_unpack1(src_ui4.z); - dst_f8x->f1[3] = fmaf(src_f1, filterX[6], dst_f8x->f1[3]); - dst_f8x->f1[4] = fmaf(src_f1, filterX[5], dst_f8x->f1[4]); - dst_f8x->f1[5] = fmaf(src_f1, filterX[4], dst_f8x->f1[5]); - dst_f8x->f1[6] = fmaf(src_f1, filterX[3], dst_f8x->f1[6]); - dst_f8x->f1[7] = fmaf(src_f1, filterX[2], dst_f8x->f1[7]); - dst_f8y->f1[3] = fmaf(src_f1, filterY[6], dst_f8y->f1[3]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[5], dst_f8y->f1[4]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[4], dst_f8y->f1[5]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[3], dst_f8y->f1[6]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[2], dst_f8y->f1[7]); - src_f1 = rpp_hip_unpack2(src_ui4.z); - dst_f8x->f1[4] = fmaf(src_f1, filterX[6], dst_f8x->f1[4]); - dst_f8x->f1[5] = fmaf(src_f1, filterX[5], dst_f8x->f1[5]); - dst_f8x->f1[6] = fmaf(src_f1, filterX[4], dst_f8x->f1[6]); - dst_f8x->f1[7] = fmaf(src_f1, filterX[3], dst_f8x->f1[7]); - dst_f8y->f1[4] = fmaf(src_f1, filterY[6], dst_f8y->f1[4]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[5], dst_f8y->f1[5]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[4], dst_f8y->f1[6]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[3], dst_f8y->f1[7]); - src_f1 = rpp_hip_unpack3(src_ui4.z); - dst_f8x->f1[5] = fmaf(src_f1, filterX[6], dst_f8x->f1[5]); - dst_f8x->f1[6] = fmaf(src_f1, filterX[5], dst_f8x->f1[6]); - dst_f8x->f1[7] = fmaf(src_f1, filterX[4], dst_f8x->f1[7]); - dst_f8y->f1[5] = fmaf(src_f1, filterY[6], dst_f8y->f1[5]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[5], dst_f8y->f1[6]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[4], dst_f8y->f1[7]); - src_f1 = rpp_hip_unpack0(src_ui4.w); - dst_f8x->f1[6] = fmaf(src_f1, filterX[6], dst_f8x->f1[6]); - dst_f8x->f1[7] = fmaf(src_f1, filterX[5], dst_f8x->f1[7]); - dst_f8y->f1[6] = fmaf(src_f1, filterY[6], dst_f8y->f1[6]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[5], dst_f8y->f1[7]); - src_f1 = rpp_hip_unpack1(src_ui4.w); - dst_f8x->f1[7] = fmaf(src_f1, filterX[6], dst_f8x->f1[7]); - dst_f8y->f1[7] = fmaf(src_f1, filterY[6], dst_f8y->f1[7]); -} - template __global__ void sobel_filter_3x3_pln_bidirection_tensor(T *srcPtr, uint3 srcStridesNCH, @@ -569,9 +264,12 @@ __global__ void sobel_filter_3x3_pln_bidirection_tensor(T *srcPtr, (hipThreadIdx_x < tileSize.x) && (hipThreadIdx_y < tileSize.y)) { - sobel_filter_3x3_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX1, filterRowY1); - sobel_filter_3x3_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX2, filterRowY2); - sobel_filter_3x3_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX3, filterRowY3); + sobel_filter_3x3_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8x, filterRowX1); + sobel_filter_3x3_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8x, filterRowX2); + sobel_filter_3x3_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8x, filterRowX3); + sobel_filter_3x3_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8y, filterRowY1); + sobel_filter_3x3_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8y, filterRowY2); + sobel_filter_3x3_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8y, filterRowY3); rpp_hip_pixel_check_0to255(&sum_f8x); rpp_hip_pixel_check_0to255(&sum_f8y); rpp_hip_adjust_range(dstPtr, &sum_f8x); @@ -635,11 +333,16 @@ __global__ void sobel_filter_5x5_pln_bidirection_tensor(T *srcPtr, (hipThreadIdx_x < tileSize.x) && (hipThreadIdx_y < tileSize.y)) { - sobel_filter_5x5_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX1, filterRowY1); - sobel_filter_5x5_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX2, filterRowY2); - sobel_filter_5x5_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX3, filterRowY3); - sobel_filter_5x5_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 3][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX4, filterRowY4); - sobel_filter_5x5_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 4][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX5, filterRowY5); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8x, filterRowX1); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8x, filterRowX2); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8x, filterRowX3); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 3][hipThreadIdx_x8], &sum_f8x, filterRowX4); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 4][hipThreadIdx_x8], &sum_f8x, filterRowX5); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8y, filterRowY1); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8y, filterRowY2); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8y, filterRowY3); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 3][hipThreadIdx_x8], &sum_f8y, filterRowY4); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 4][hipThreadIdx_x8], &sum_f8y, filterRowY5); rpp_hip_pixel_check_0to255(&sum_f8x); rpp_hip_pixel_check_0to255(&sum_f8y); rpp_hip_adjust_range(dstPtr, &sum_f8x); @@ -707,13 +410,20 @@ __global__ void sobel_filter_7x7_pln_bidirection_tensor(T *srcPtr, (hipThreadIdx_x < tileSize.x) && (hipThreadIdx_y < tileSize.y)) { - sobel_filter_7x7_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX1, filterRowY1); - sobel_filter_7x7_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX2, filterRowY2); - sobel_filter_7x7_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX3, filterRowY3); - sobel_filter_7x7_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 3][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX4, filterRowY4); - sobel_filter_7x7_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 4][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX5, filterRowY5); - sobel_filter_7x7_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 5][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX6, filterRowY6); - sobel_filter_7x7_bidirectional_row_hip_compute(&src_smem[hipThreadIdx_y + 6][hipThreadIdx_x8], &sum_f8x, &sum_f8y, filterRowX7, filterRowY7); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8x, filterRowX1); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8x, filterRowX2); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8x, filterRowX3); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 3][hipThreadIdx_x8], &sum_f8x, filterRowX4); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 4][hipThreadIdx_x8], &sum_f8x, filterRowX5); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 5][hipThreadIdx_x8], &sum_f8x, filterRowX6); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 6][hipThreadIdx_x8], &sum_f8x, filterRowX7); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8y, filterRowY1); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8y, filterRowY2); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8y, filterRowY3); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 3][hipThreadIdx_x8], &sum_f8y, filterRowY4); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 4][hipThreadIdx_x8], &sum_f8y, filterRowY5); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 5][hipThreadIdx_x8], &sum_f8y, filterRowY6); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 6][hipThreadIdx_x8], &sum_f8y, filterRowY7); rpp_hip_pixel_check_0to255(&sum_f8x); rpp_hip_pixel_check_0to255(&sum_f8y); rpp_hip_adjust_range(dstPtr, &sum_f8x); @@ -724,15 +434,181 @@ __global__ void sobel_filter_7x7_pln_bidirection_tensor(T *srcPtr, } template -__global__ void sobel_filter_3x3_pln_unidirection_tensor(T *srcPtr, - uint3 srcStridesNCH, - T *dstPtr, - uint3 dstStridesNCH, - int channelsDst, - uint padLength, - uint2 tileSize, - RpptROIPtr roiTensorPtrSrc, - int sobelType) +__global__ void sobel_filter_3x3_pln_x_gradient_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint3 dstStridesNCH, + int channelsDst, + uint padLength, + uint2 tileSize, + RpptROIPtr roiTensorPtrSrc) +{ + int hipThreadIdx_x8 = hipThreadIdx_x << 3; + int id_x_o = (hipBlockIdx_x * tileSize.x * 8) + hipThreadIdx_x8; + int id_y_o = hipBlockIdx_y * tileSize.y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + int id_x_i = id_x_o - padLength; + int id_y_i = id_y_o - padLength; + + d_float8 sum_f8; + __shared__ uchar src_smem[SMEM_LENGTH_Y_1C][SMEM_LENGTH_X]; + + int srcIdx = (id_z * srcStridesNCH.x) + ((id_y_i + roiTensorPtrSrc[id_z].xywhROI.xy.y) * srcStridesNCH.z) + (id_x_i + roiTensorPtrSrc[id_z].xywhROI.xy.x); + int dstIdx = (id_z * dstStridesNCH.x) + (id_y_o * dstStridesNCH.z) + id_x_o; + float *filter_row1 = &sobel3x3XHip[0]; + float *filter_row2 = &filter_row1[3]; + float *filter_row3 = &filter_row1[6]; + sum_f8.f4[0] = static_cast(0); + sum_f8.f4[1] = static_cast(0); + if ((id_x_i >= -(int)padLength) && (id_x_i < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_i >= 0) && (id_y_i < roiTensorPtrSrc[id_z].xywhROI.roiHeight)) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + else if(id_y_i < 0) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx + srcStridesNCH.z, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + if(id_x_i < 0) + src_smem[hipThreadIdx_y][0] = src_smem[hipThreadIdx_y][1]; + __syncthreads(); + if ((id_x_o < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_o < roiTensorPtrSrc[id_z].xywhROI.roiHeight) && + (hipThreadIdx_x < tileSize.x) && + (hipThreadIdx_y < tileSize.y)) + { + sobel_filter_3x3_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8, filter_row1); + sobel_filter_3x3_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8, filter_row2); + sobel_filter_3x3_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8, filter_row3); + rpp_hip_pixel_check_0to255(&sum_f8); + rpp_hip_adjust_range(dstPtr, &sum_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &sum_f8); + } +} + +template +__global__ void sobel_filter_5x5_pln_x_gradient_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint3 dstStridesNCH, + int channelsDst, + uint padLength, + uint2 tileSize, + RpptROIPtr roiTensorPtrSrc) +{ + int hipThreadIdx_x8 = hipThreadIdx_x << 3; + int id_x_o = (hipBlockIdx_x * tileSize.x * 8) + hipThreadIdx_x8; + int id_y_o = hipBlockIdx_y * tileSize.y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + int id_x_i = id_x_o - padLength; + int id_y_i = id_y_o - padLength; + d_float8 sum_f8; + __shared__ uchar src_smem[SMEM_LENGTH_Y_1C][SMEM_LENGTH_X]; + + int srcIdx = (id_z * srcStridesNCH.x) + ((id_y_i + roiTensorPtrSrc[id_z].xywhROI.xy.y) * srcStridesNCH.z) + (id_x_i + roiTensorPtrSrc[id_z].xywhROI.xy.x); + int dstIdx = (id_z * dstStridesNCH.x) + (id_y_o * dstStridesNCH.z) + id_x_o; + float *filter_row1 = &sobel5x5XHip[0]; + float *filter_row2 = &filter_row1[5]; + float *filter_row3 = &filter_row1[10]; + float *filter_row4 = &filter_row1[15]; + float *filter_row5 = &filter_row1[20]; + sum_f8.f4[0] = static_cast(0); + sum_f8.f4[1] = static_cast(0); + if ((id_x_i >= -(int)padLength) && (id_x_i < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_i >= 0) && (id_y_i < roiTensorPtrSrc[id_z].xywhROI.roiHeight)) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + else if(id_y_i < 0) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx + 2 * srcStridesNCH.z, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + if(id_x_i < 0) + { + src_smem[hipThreadIdx_y][0] = src_smem[hipThreadIdx_y][2]; + src_smem[hipThreadIdx_y][1] = src_smem[hipThreadIdx_y][2]; + } + __syncthreads(); + if ((id_x_o < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_o < roiTensorPtrSrc[id_z].xywhROI.roiHeight) && + (hipThreadIdx_x < tileSize.x) && + (hipThreadIdx_y < tileSize.y)) + { + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8, filter_row1); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8, filter_row2); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8, filter_row3); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 3][hipThreadIdx_x8], &sum_f8, filter_row4); + sobel_filter_5x5_row_hip_compute(&src_smem[hipThreadIdx_y + 4][hipThreadIdx_x8], &sum_f8, filter_row5); + rpp_hip_pixel_check_0to255(&sum_f8); + rpp_hip_adjust_range(dstPtr, &sum_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &sum_f8); + } +} + +template +__global__ void sobel_filter_7x7_pln_x_gradient_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint3 dstStridesNCH, + int channelsDst, + uint padLength, + uint2 tileSize, + RpptROIPtr roiTensorPtrSrc) +{ + int hipThreadIdx_x8 = hipThreadIdx_x << 3; + int id_x_o = (hipBlockIdx_x * tileSize.x * 8) + hipThreadIdx_x8; + int id_y_o = hipBlockIdx_y * tileSize.y + hipThreadIdx_y; + int id_z = hipBlockIdx_z * hipBlockDim_z + hipThreadIdx_z; + + int id_x_i = id_x_o - padLength; + int id_y_i = id_y_o - padLength; + d_float8 sum_f8; + __shared__ uchar src_smem[SMEM_LENGTH_Y_1C][SMEM_LENGTH_X]; + + int srcIdx = (id_z * srcStridesNCH.x) + ((id_y_i + roiTensorPtrSrc[id_z].xywhROI.xy.y) * srcStridesNCH.z) + (id_x_i + roiTensorPtrSrc[id_z].xywhROI.xy.x); + int dstIdx = (id_z * dstStridesNCH.x) + (id_y_o * dstStridesNCH.z) + id_x_o; + float *filter_row1 = &sobel7x7XHip[0]; + float *filter_row2 = &filter_row1[7]; + float *filter_row3 = &filter_row1[14]; + float *filter_row4 = &filter_row1[21]; + float *filter_row5 = &filter_row1[28]; + float *filter_row6 = &filter_row1[35]; + float *filter_row7 = &filter_row1[42]; + sum_f8.f4[0] = static_cast(0); + sum_f8.f4[1] = static_cast(0); + if ((id_x_i >= -(int)padLength) && (id_x_i < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_i >= 0) && (id_y_i < roiTensorPtrSrc[id_z].xywhROI.roiHeight)) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + else if(id_y_i < 0) + rpp_hip_load8_to_uchar8(srcPtr + srcIdx + 2 * srcStridesNCH.z, &src_smem[hipThreadIdx_y][hipThreadIdx_x8]); + if(id_x_i < 0) + { + src_smem[hipThreadIdx_y][0] = src_smem[hipThreadIdx_y][3]; + src_smem[hipThreadIdx_y][1] = src_smem[hipThreadIdx_y][3]; + src_smem[hipThreadIdx_y][2] = src_smem[hipThreadIdx_y][3]; + } + __syncthreads(); + if ((id_x_o < roiTensorPtrSrc[id_z].xywhROI.roiWidth) && + (id_y_o < roiTensorPtrSrc[id_z].xywhROI.roiHeight) && + (hipThreadIdx_x < tileSize.x) && + (hipThreadIdx_y < tileSize.y)) + { + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y ][hipThreadIdx_x8], &sum_f8, filter_row1); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 1][hipThreadIdx_x8], &sum_f8, filter_row2); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 2][hipThreadIdx_x8], &sum_f8, filter_row3); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 3][hipThreadIdx_x8], &sum_f8, filter_row4); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 4][hipThreadIdx_x8], &sum_f8, filter_row5); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 5][hipThreadIdx_x8], &sum_f8, filter_row6); + sobel_filter_7x7_row_hip_compute(&src_smem[hipThreadIdx_y + 6][hipThreadIdx_x8], &sum_f8, filter_row7); + rpp_hip_pixel_check_0to255(&sum_f8); + rpp_hip_adjust_range(dstPtr, &sum_f8); + rpp_hip_pack_float8_and_store8(dstPtr + dstIdx, &sum_f8); + } +} + +template +__global__ void sobel_filter_3x3_pln_y_gradient_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint3 dstStridesNCH, + int channelsDst, + uint padLength, + uint2 tileSize, + RpptROIPtr roiTensorPtrSrc) { int hipThreadIdx_x8 = hipThreadIdx_x << 3; int id_x_o = (hipBlockIdx_x * tileSize.x * 8) + hipThreadIdx_x8; @@ -747,7 +623,7 @@ __global__ void sobel_filter_3x3_pln_unidirection_tensor(T *srcPtr, int srcIdx = (id_z * srcStridesNCH.x) + ((id_y_i + roiTensorPtrSrc[id_z].xywhROI.xy.y) * srcStridesNCH.z) + (id_x_i + roiTensorPtrSrc[id_z].xywhROI.xy.x); int dstIdx = (id_z * dstStridesNCH.x) + (id_y_o * dstStridesNCH.z) + id_x_o; - float *filter_row1 = ((!sobelType) ? sobel3x3XHip : sobel3x3YHip); + float *filter_row1 = &sobel3x3YHip[0]; float *filter_row2 = &filter_row1[3]; float *filter_row3 = &filter_row1[6]; sum_f8.f4[0] = static_cast(0); @@ -775,15 +651,14 @@ __global__ void sobel_filter_3x3_pln_unidirection_tensor(T *srcPtr, } template -__global__ void sobel_filter_5x5_pln_unidirection_tensor(T *srcPtr, - uint3 srcStridesNCH, - T *dstPtr, - uint3 dstStridesNCH, - int channelsDst, - uint padLength, - uint2 tileSize, - RpptROIPtr roiTensorPtrSrc, - int sobelType) +__global__ void sobel_filter_5x5_pln_y_gradient_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint3 dstStridesNCH, + int channelsDst, + uint padLength, + uint2 tileSize, + RpptROIPtr roiTensorPtrSrc) { int hipThreadIdx_x8 = hipThreadIdx_x << 3; int id_x_o = (hipBlockIdx_x * tileSize.x * 8) + hipThreadIdx_x8; @@ -797,7 +672,7 @@ __global__ void sobel_filter_5x5_pln_unidirection_tensor(T *srcPtr, int srcIdx = (id_z * srcStridesNCH.x) + ((id_y_i + roiTensorPtrSrc[id_z].xywhROI.xy.y) * srcStridesNCH.z) + (id_x_i + roiTensorPtrSrc[id_z].xywhROI.xy.x); int dstIdx = (id_z * dstStridesNCH.x) + (id_y_o * dstStridesNCH.z) + id_x_o; - float *filter_row1 = (!sobelType) ? sobel5x5XHip : sobel5x5YHip; + float *filter_row1 = &sobel5x5YHip[0]; float *filter_row2 = &filter_row1[5]; float *filter_row3 = &filter_row1[10]; float *filter_row4 = &filter_row1[15]; @@ -832,15 +707,14 @@ __global__ void sobel_filter_5x5_pln_unidirection_tensor(T *srcPtr, } template -__global__ void sobel_filter_7x7_pln_unidirection_tensor(T *srcPtr, - uint3 srcStridesNCH, - T *dstPtr, - uint3 dstStridesNCH, - int channelsDst, - uint padLength, - uint2 tileSize, - RpptROIPtr roiTensorPtrSrc, - int sobelType) +__global__ void sobel_filter_7x7_pln_y_gradient_tensor(T *srcPtr, + uint3 srcStridesNCH, + T *dstPtr, + uint3 dstStridesNCH, + int channelsDst, + uint padLength, + uint2 tileSize, + RpptROIPtr roiTensorPtrSrc) { int hipThreadIdx_x8 = hipThreadIdx_x << 3; int id_x_o = (hipBlockIdx_x * tileSize.x * 8) + hipThreadIdx_x8; @@ -854,7 +728,7 @@ __global__ void sobel_filter_7x7_pln_unidirection_tensor(T *srcPtr, int srcIdx = (id_z * srcStridesNCH.x) + ((id_y_i + roiTensorPtrSrc[id_z].xywhROI.xy.y) * srcStridesNCH.z) + (id_x_i + roiTensorPtrSrc[id_z].xywhROI.xy.x); int dstIdx = (id_z * dstStridesNCH.x) + (id_y_o * dstStridesNCH.z) + id_x_o; - float *filter_row1 = (!sobelType) ? sobel7x7XHip : sobel7x7YHip; + float *filter_row1 = &sobel7x7YHip[0]; float *filter_row2 = &filter_row1[7]; float *filter_row3 = &filter_row1[14]; float *filter_row4 = &filter_row1[21]; @@ -938,20 +812,38 @@ RppStatus hip_exec_sobel_filter_tensor(T *srcPtr, } else { - hipLaunchKernelGGL(sobel_filter_3x3_pln_unidirection_tensor, - dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), - dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), - 0, - handle.GetStream(), - srcPtr, - make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), - dstPtr, - make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), - dstDescPtr->c, - padLength, - tileSize, - roiTensorPtrSrc, - sobelType); + if(!sobelType) + { + hipLaunchKernelGGL(sobel_filter_3x3_pln_x_gradient_tensor, + dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + dstDescPtr->c, + padLength, + tileSize, + roiTensorPtrSrc); + } + else + { + hipLaunchKernelGGL(sobel_filter_3x3_pln_y_gradient_tensor, + dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + dstDescPtr->c, + padLength, + tileSize, + roiTensorPtrSrc); + } } } else if (kernelSize == 5) @@ -974,20 +866,38 @@ RppStatus hip_exec_sobel_filter_tensor(T *srcPtr, } else { - hipLaunchKernelGGL(sobel_filter_5x5_pln_unidirection_tensor, - dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), - dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), - 0, - handle.GetStream(), - srcPtr, - make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), - dstPtr, - make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), - dstDescPtr->c, - padLength, - tileSize, - roiTensorPtrSrc, - sobelType); + if(!sobelType) + { + hipLaunchKernelGGL(sobel_filter_5x5_pln_x_gradient_tensor, + dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + dstDescPtr->c, + padLength, + tileSize, + roiTensorPtrSrc); + } + else + { + hipLaunchKernelGGL(sobel_filter_5x5_pln_y_gradient_tensor, + dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + dstDescPtr->c, + padLength, + tileSize, + roiTensorPtrSrc); + } } } else if (kernelSize == 7) @@ -1010,22 +920,40 @@ RppStatus hip_exec_sobel_filter_tensor(T *srcPtr, } else { - hipLaunchKernelGGL(sobel_filter_7x7_pln_unidirection_tensor, - dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), - dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), - 0, - handle.GetStream(), - srcPtr, - make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), - dstPtr, - make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), - dstDescPtr->c, - padLength, - tileSize, - roiTensorPtrSrc, - sobelType); + if(!sobelType) + { + hipLaunchKernelGGL(sobel_filter_7x7_pln_x_gradient_tensor, + dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + dstDescPtr->c, + padLength, + tileSize, + roiTensorPtrSrc); + } + else + { + hipLaunchKernelGGL(sobel_filter_7x7_pln_y_gradient_tensor, + dim3(ceil((float)globalThreads_x/tileSize.x), ceil((float)globalThreads_y/tileSize.y), ceil((float)globalThreads_z/LOCAL_THREADS_Z)), + dim3(LOCAL_THREADS_X, LOCAL_THREADS_Y, LOCAL_THREADS_Z), + 0, + handle.GetStream(), + srcPtr, + make_uint3(srcDescPtr->strides.nStride, srcDescPtr->strides.cStride, srcDescPtr->strides.hStride), + dstPtr, + make_uint3(dstDescPtr->strides.nStride, dstDescPtr->strides.cStride, dstDescPtr->strides.hStride), + dstDescPtr->c, + padLength, + tileSize, + roiTensorPtrSrc); + } } } return RPP_SUCCESS; -} \ No newline at end of file +} diff --git a/src/modules/rppt_tensor_filter_augmentations.cpp b/src/modules/rppt_tensor_filter_augmentations.cpp index 2c6bdc0ad..68752a69b 100644 --- a/src/modules/rppt_tensor_filter_augmentations.cpp +++ b/src/modules/rppt_tensor_filter_augmentations.cpp @@ -32,6 +32,20 @@ SOFTWARE. #include "hip/hip_tensor_filter_augmentations.hpp" #endif // HIP_COMPILE +inline size_t get_size_of_data_type(RpptDataType dataType) +{ + if(dataType == RpptDataType::U8) + return sizeof(Rpp8u); + else if(dataType == RpptDataType::I8) + return sizeof(Rpp8s); + else if(dataType == RpptDataType::F16) + return sizeof(Rpp16f); + else if(dataType == RpptDataType::F32) + return sizeof(Rpp32f); + else + return 0; +} + /******************** sobel_filter ********************/ RppStatus rppt_sobel_filter_host(RppPtr_t srcPtr, @@ -282,9 +296,11 @@ RppStatus rppt_sobel_filter_gpu(RppPtr_t srcPtr, if (srcDescPtr->c == 3) { RpptSubpixelLayout srcSubpixelLayout = RpptSubpixelLayout::RGBtype; - tempPtr = rpp::deref(rppHandle).GetInitHandle()->mem.mgpu.scratchBufferHip.floatmem; rppt_color_to_greyscale_gpu(srcPtr, srcDescPtr, tempPtr, dstDescPtr, srcSubpixelLayout, rppHandle); } + else + CHECK_RETURN_STATUS(hipMemcpy(tempPtr, srcPtr, dstDescPtr->strides.nStride * dstDescPtr->n * get_size_of_data_type(srcDescPtr->dataType), hipMemcpyDeviceToDevice)); + hipStreamSynchronize(rpp::deref(rppHandle).GetStream()); if ((srcDescPtr->dataType == RpptDataType::U8) && (dstDescPtr->dataType == RpptDataType::U8)) { @@ -335,6 +351,7 @@ RppStatus rppt_sobel_filter_gpu(RppPtr_t srcPtr, rpp::deref(rppHandle)); } + CHECK_RETURN_STATUS(hipFree(tempPtr)); return RPP_SUCCESS; #elif defined(OCL_COMPILE) return RPP_ERROR_NOT_IMPLEMENTED; diff --git a/utilities/test_suite/common.py b/utilities/test_suite/common.py index e24ee73f6..24bf67fa9 100644 --- a/utilities/test_suite/common.py +++ b/utilities/test_suite/common.py @@ -63,6 +63,7 @@ 45: ["color_temperature", "HOST", "HIP"], 46: ["vignette", "HOST", "HIP"], 49: ["box_filter", "HIP"], + 50: ["sobel_filter", "HOST", "HIP"], 54: ["gaussian_filter", "HIP"], 61: ["magnitude", "HOST", "HIP"], 63: ["phase", "HOST", "HIP"], @@ -116,7 +117,7 @@ "color_augmentations" : [0, 1, 2, 3, 4, 13, 31, 34, 36, 45, 81], "effects_augmentations" : [5, 6, 8, 29, 30, 32, 35, 46, 82, 83, 84], "geometric_augmentations" : [20, 21, 23, 24, 26, 33, 37, 38, 39, 63, 79, 80, 92], - "filter_augmentations" : [49, 54], + "filter_augmentations" : [49, 50, 54], "arithmetic_operations" : [61], "logical_operations" : [65, 68], "data_exchange_operations" : [70, 85, 86], From 3cba12f7e5c677c735018bc9b70ae52f0935a22a Mon Sep 17 00:00:00 2001 From: HazarathKumarM Date: Tue, 24 Sep 2024 19:17:32 +0000 Subject: [PATCH 31/31] Minor code cleanup --- .../rppt_tensor_filter_augmentations.cpp | 51 ++++++++++--------- utilities/test_suite/HIP/runTests.py | 2 +- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/src/modules/rppt_tensor_filter_augmentations.cpp b/src/modules/rppt_tensor_filter_augmentations.cpp index 68752a69b..2718aae08 100644 --- a/src/modules/rppt_tensor_filter_augmentations.cpp +++ b/src/modules/rppt_tensor_filter_augmentations.cpp @@ -292,7 +292,8 @@ RppStatus rppt_sobel_filter_gpu(RppPtr_t srcPtr, return RPP_ERROR_INVALID_DST_CHANNELS; // convert image to grey scale if input is RGB image - RppPtr_t tempPtr; + void *tempPtr; + CHECK_RETURN_STATUS(hipMalloc(&tempPtr, dstDescPtr->strides.nStride * dstDescPtr->n * get_size_of_data_type(srcDescPtr->dataType))); if (srcDescPtr->c == 3) { RpptSubpixelLayout srcSubpixelLayout = RpptSubpixelLayout::RGBtype; @@ -317,38 +318,38 @@ RppStatus rppt_sobel_filter_gpu(RppPtr_t srcPtr, else if ((srcDescPtr->dataType == RpptDataType::F16) && (dstDescPtr->dataType == RpptDataType::F16)) { hip_exec_sobel_filter_tensor(reinterpret_cast(static_cast(tempPtr) + srcDescPtr->offsetInBytes), - srcDescPtr, - reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), - dstDescPtr, - sobelType, - kernelSize, - roiTensorPtrSrc, - roiType, - rpp::deref(rppHandle)); + dstDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + sobelType, + kernelSize, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::F32) && (dstDescPtr->dataType == RpptDataType::F32)) { hip_exec_sobel_filter_tensor(reinterpret_cast(static_cast(tempPtr) + srcDescPtr->offsetInBytes), - srcDescPtr, - reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), - dstDescPtr, - sobelType, - kernelSize, - roiTensorPtrSrc, - roiType, - rpp::deref(rppHandle)); + dstDescPtr, + reinterpret_cast(static_cast(dstPtr) + dstDescPtr->offsetInBytes), + dstDescPtr, + sobelType, + kernelSize, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); } else if ((srcDescPtr->dataType == RpptDataType::I8) && (dstDescPtr->dataType == RpptDataType::I8)) { hip_exec_sobel_filter_tensor(static_cast(tempPtr) + srcDescPtr->offsetInBytes, - srcDescPtr, - static_cast(dstPtr) + dstDescPtr->offsetInBytes, - dstDescPtr, - sobelType, - kernelSize, - roiTensorPtrSrc, - roiType, - rpp::deref(rppHandle)); + dstDescPtr, + static_cast(dstPtr) + dstDescPtr->offsetInBytes, + dstDescPtr, + sobelType, + kernelSize, + roiTensorPtrSrc, + roiType, + rpp::deref(rppHandle)); } CHECK_RETURN_STATUS(hipFree(tempPtr)); diff --git a/utilities/test_suite/HIP/runTests.py b/utilities/test_suite/HIP/runTests.py index a95498bf7..af16ad428 100644 --- a/utilities/test_suite/HIP/runTests.py +++ b/utilities/test_suite/HIP/runTests.py @@ -88,7 +88,7 @@ def run_unit_test(srcPath1, srcPath2, dstPathTemp, case, numRuns, testType, layo print(stdout_data.decode()) elif case == "50": for kernelSizeAndGradient in range(9): - print("./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSizeAndGradient) + " 0") + print("\n./Tensor_hip " + srcPath1 + " " + srcPath2 + " " + dstPathTemp + " " + str(bitDepth) + " " + str(outputFormatToggle) + " " + str(case) + " " + str(kernelSizeAndGradient) + " 0") result = subprocess.Popen([buildFolderPath + "/build/Tensor_hip", srcPath1, srcPath2, dstPathTemp, str(bitDepth), str(outputFormatToggle), str(case), str(kernelSizeAndGradient), str(numRuns), str(testType), str(layout), "0", str(qaMode), str(decoderType), str(batchSize)] + roiList + [scriptPath], stdout=subprocess.PIPE) # nosec stdout_data, stderr_data = result.communicate() print(stdout_data.decode())