From 1cb3a1dfabcf2e0037d4a50e0f8febc3b6673ca7 Mon Sep 17 00:00:00 2001 From: dmitrykos Date: Thu, 7 Sep 2023 20:40:57 +0300 Subject: [PATCH] Improve float to int truncation precision by forcing floating-point rounding mode to rounding towards zero or by using CPU SIMD which truncates by rounding towards zero. --- src/common/pa_converters.c | 261 ++++++++++++++++++++++++++++++++----- 1 file changed, 225 insertions(+), 36 deletions(-) diff --git a/src/common/pa_converters.c b/src/common/pa_converters.c index dbf052356..48b846766 100644 --- a/src/common/pa_converters.c +++ b/src/common/pa_converters.c @@ -320,6 +320,29 @@ PaUtilConverterTable paConverters = { #define PA_CLIP_( val, min, max )\ { val = ((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val)); } +/* detect whether SSE SIMD is enabled during compilation */ +#ifndef PA_HAVE_SSE +#if defined(__SSE2__) || /* SSE2 enabled by GCC */\ + (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || /* SSE2 enabled by MSVC for 32-bit code */\ + (defined(_M_AMD64) || defined(_M_X64)) /* SSE2 is always present on x64 CPU platform (implies SSE) */ + #define PA_HAVE_SSE 2 + #include +#elif defined(__SSE__) || /* SSE enabled by GCC */\ + (defined(_M_IX86_FP) && (_M_IX86_FP >= 1)) /* SSE enabled by MSVC for 32-bit code */ + #define PA_HAVE_SSE 1 + #include +#else + #define PA_HAVE_SSE 0 +#endif +#endif + +/* use fesetround() only when there is no SIMD for truncation of float or double to + integer with rounding towards zero, exclude cases when CPU has SIMD covering both cases */ +#if !((PA_HAVE_SSE >= 2) || /* SSE2 SIMD implies SSE */\ + ((__ARM_ARCH >= 8) || defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__))) /* ARMv8 CPU platform */ + #include + #define PA_USE_FESETROUND +#endif static const float const_1_div_128_ = 1.0f / 128.0f; /* 8 bit multiplier */ @@ -329,6 +352,102 @@ static const double const_1_div_2147483648_ = 1.0 / 2147483648.0; /* 32 bit mult /* -------------------------------------------------------------------------- */ +static inline int setTruncRoundingModePriv() +{ +#ifdef PA_USE_FESETROUND + int prev = fegetround(); + + if (fesetround(FE_TOWARDZERO) != 0) + prev = -1; + + return prev; +#else + return 0; +#endif +} + +/* -------------------------------------------------------------------------- */ + +static inline void resetRoundingModePriv(int prev) +{ +#ifdef PA_USE_FESETROUND + if (prev != -1) + fesetround(prev); +#endif +} + +/* -------------------------------------------------------------------------- */ + +static inline int setTruncRoundingModeFloat() +{ +#if (PA_HAVE_SSE < 1) + return setTruncRoundingModePriv(); +#else + return 0; +#endif +} + +/* -------------------------------------------------------------------------- */ + +static inline void resetRoundingModeFloat(int prev) +{ +#if (PA_HAVE_SSE < 1) + resetRoundingModePriv(prev); +#endif +} + +/* -------------------------------------------------------------------------- */ + +static inline int setTruncRoundingModeDouble() +{ +#if (PA_HAVE_SSE < 2) + return setTruncRoundingModePriv(); +#else + return 0; +#endif +} + +/* -------------------------------------------------------------------------- */ + +static inline void resetRoundingModeDouble(int prev) +{ +#if (PA_HAVE_SSE < 2) + resetRoundingModePriv(prev); +#endif +} + +/* -------------------------------------------------------------------------- */ + +static inline PaInt32 truncFloatToInt32(float v) +{ +#if PA_HAVE_SSE + return _mm_cvttss_si32(_mm_load_ss(&v)); +#elif (__ARM_ARCH >= 8) || defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__) + int ret; + __asm__ ("fcvtzs %x0, %s1" : "=r"(ret) : "w"(v)); + return ret; +#else + return (PaInt32)v; +#endif +} + +/* -------------------------------------------------------------------------- */ + +static inline PaInt32 truncDoubleToInt32(double v) +{ +#if PA_HAVE_SSE && defined (_M_X64) + return (PaInt32)_mm_cvttsd_si32(_mm_load_sd(&v)); +#elif (__ARM_ARCH >= 8) || defined(__ARM_ARCH_8__) || defined(__ARM_ARCH_8A__) + int64 ret; + __asm__ ("fcvtzs %x0, %d1" : "=r"(ret) : "w"(v)); + return (PaInt32)ret; +#else + return (PaInt32)v; +#endif +} + +/* -------------------------------------------------------------------------- */ + static void Float32_To_Int32( void *destinationBuffer, signed int destinationStride, void *sourceBuffer, signed int sourceStride, @@ -336,17 +455,21 @@ static void Float32_To_Int32( { float *src = (float*)sourceBuffer; PaInt32 *dest = (PaInt32*)destinationBuffer; + int prevMode = setTruncRoundingModeDouble(); + (void)ditherGenerator; /* unused parameter */ while( count-- ) { /* REVIEW */ double scaled = *src * 0x7FFFFFFF; - *dest = (PaInt32) scaled; + *dest = truncDoubleToInt32(scaled); src += sourceStride; dest += destinationStride; } + + resetRoundingModeDouble(prevMode); } /* -------------------------------------------------------------------------- */ @@ -358,6 +481,7 @@ static void Float32_To_Int32_Dither( { float *src = (float*)sourceBuffer; PaInt32 *dest = (PaInt32*)destinationBuffer; + int prevMode = setTruncRoundingModeDouble(); while( count-- ) { @@ -365,11 +489,13 @@ static void Float32_To_Int32_Dither( double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); /* use smaller scaler to prevent overflow when we add the dither */ double dithered = ((double)*src * (2147483646.0)) + dither; - *dest = (PaInt32) dithered; + *dest = truncDoubleToInt32(dithered); src += sourceStride; dest += destinationStride; } + + resetRoundingModeDouble(prevMode); } /* -------------------------------------------------------------------------- */ @@ -381,18 +507,22 @@ static void Float32_To_Int32_Clip( { float *src = (float*)sourceBuffer; PaInt32 *dest = (PaInt32*)destinationBuffer; - (void) ditherGenerator; /* unused parameter */ + int prevMode = setTruncRoundingModeDouble(); + + (void)ditherGenerator; /* unused parameter */ while( count-- ) { /* REVIEW */ double scaled = *src * 0x7FFFFFFF; PA_CLIP_( scaled, -2147483648., 2147483647. ); - *dest = (PaInt32) scaled; + *dest = truncDoubleToInt32(scaled); src += sourceStride; dest += destinationStride; } + + resetRoundingModeDouble(prevMode); } /* -------------------------------------------------------------------------- */ @@ -403,20 +533,23 @@ static void Float32_To_Int32_DitherClip( unsigned int count, struct PaUtilTriangularDitherGenerator *ditherGenerator ) { float *src = (float*)sourceBuffer; - PaInt32 *dest = (PaInt32*)destinationBuffer; + PaInt32 *dest = (PaInt32*)destinationBuffer; + int prevMode = setTruncRoundingModeDouble(); while( count-- ) { /* REVIEW */ - double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); + double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); /* use smaller scaler to prevent overflow when we add the dither */ double dithered = ((double)*src * (2147483646.0)) + dither; PA_CLIP_( dithered, -2147483648., 2147483647. ); - *dest = (PaInt32) dithered; + *dest = truncDoubleToInt32(dithered); src += sourceStride; dest += destinationStride; } + + resetRoundingModeDouble(prevMode); } /* -------------------------------------------------------------------------- */ @@ -428,15 +561,16 @@ static void Float32_To_Int24( { float *src = (float*)sourceBuffer; unsigned char *dest = (unsigned char*)destinationBuffer; + int prevMode = setTruncRoundingModeDouble(); PaInt32 temp; - (void) ditherGenerator; /* unused parameter */ + (void)ditherGenerator; /* unused parameter */ while( count-- ) { /* convert to 32 bit and drop the low 8 bits */ double scaled = (double)(*src) * 2147483647.0; - temp = (PaInt32) scaled; + temp = truncDoubleToInt32(scaled); #if defined(PA_LITTLE_ENDIAN) dest[0] = (unsigned char)(temp >> 8); @@ -451,6 +585,8 @@ static void Float32_To_Int24( src += sourceStride; dest += destinationStride * 3; } + + resetRoundingModeDouble(prevMode); } /* -------------------------------------------------------------------------- */ @@ -462,17 +598,18 @@ static void Float32_To_Int24_Dither( { float *src = (float*)sourceBuffer; unsigned char *dest = (unsigned char*)destinationBuffer; + int prevMode = setTruncRoundingModeDouble(); PaInt32 temp; while( count-- ) { /* convert to 32 bit and drop the low 8 bits */ - double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); + double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); /* use smaller scaler to prevent overflow when we add the dither */ double dithered = ((double)*src * (2147483646.0)) + dither; - temp = (PaInt32) dithered; + temp = truncDoubleToInt32(dithered); #if defined(PA_LITTLE_ENDIAN) dest[0] = (unsigned char)(temp >> 8); @@ -487,6 +624,8 @@ static void Float32_To_Int24_Dither( src += sourceStride; dest += destinationStride * 3; } + + resetRoundingModeDouble(prevMode); } /* -------------------------------------------------------------------------- */ @@ -498,16 +637,17 @@ static void Float32_To_Int24_Clip( { float *src = (float*)sourceBuffer; unsigned char *dest = (unsigned char*)destinationBuffer; + int prevMode = setTruncRoundingModeDouble(); PaInt32 temp; - (void) ditherGenerator; /* unused parameter */ + (void)ditherGenerator; /* unused parameter */ while( count-- ) { /* convert to 32 bit and drop the low 8 bits */ double scaled = *src * 0x7FFFFFFF; PA_CLIP_( scaled, -2147483648., 2147483647. ); - temp = (PaInt32) scaled; + temp = truncDoubleToInt32(scaled); #if defined(PA_LITTLE_ENDIAN) dest[0] = (unsigned char)(temp >> 8); @@ -522,6 +662,8 @@ static void Float32_To_Int24_Clip( src += sourceStride; dest += destinationStride * 3; } + + resetRoundingModeDouble(prevMode); } /* -------------------------------------------------------------------------- */ @@ -533,6 +675,7 @@ static void Float32_To_Int24_DitherClip( { float *src = (float*)sourceBuffer; unsigned char *dest = (unsigned char*)destinationBuffer; + int prevMode = setTruncRoundingModeDouble(); PaInt32 temp; while( count-- ) @@ -544,7 +687,7 @@ static void Float32_To_Int24_DitherClip( double dithered = ((double)*src * (2147483646.0)) + dither; PA_CLIP_( dithered, -2147483648., 2147483647. ); - temp = (PaInt32) dithered; + temp = truncDoubleToInt32(dithered); #if defined(PA_LITTLE_ENDIAN) dest[0] = (unsigned char)(temp >> 8); @@ -559,6 +702,8 @@ static void Float32_To_Int24_DitherClip( src += sourceStride; dest += destinationStride * 3; } + + resetRoundingModeDouble(prevMode); } /* -------------------------------------------------------------------------- */ @@ -570,16 +715,20 @@ static void Float32_To_Int16( { float *src = (float*)sourceBuffer; PaInt16 *dest = (PaInt16*)destinationBuffer; + int prevMode = setTruncRoundingModeFloat(); + (void)ditherGenerator; /* unused parameter */ while( count-- ) { - short samp = (short) (*src * (32767.0f)); - *dest = samp; + float scaled = *src * (32767.0f); + *dest = (PaInt16)truncFloatToInt32(scaled); src += sourceStride; dest += destinationStride; } + + resetRoundingModeFloat(prevMode); } /* -------------------------------------------------------------------------- */ @@ -591,6 +740,7 @@ static void Float32_To_Int16_Dither( { float *src = (float*)sourceBuffer; PaInt16 *dest = (PaInt16*)destinationBuffer; + int prevMode = setTruncRoundingModeFloat(); while( count-- ) { @@ -599,11 +749,13 @@ static void Float32_To_Int16_Dither( /* use smaller scaler to prevent overflow when we add the dither */ float dithered = (*src * (32766.0f)) + dither; - *dest = (PaInt16) dithered; + *dest = (PaInt16)truncFloatToInt32(dithered); src += sourceStride; dest += destinationStride; } + + resetRoundingModeFloat(prevMode); } /* -------------------------------------------------------------------------- */ @@ -615,18 +767,21 @@ static void Float32_To_Int16_Clip( { float *src = (float*)sourceBuffer; PaInt16 *dest = (PaInt16*)destinationBuffer; + int prevMode = setTruncRoundingModeFloat(); + (void)ditherGenerator; /* unused parameter */ while( count-- ) { - long samp = (PaInt32) (*src * (32767.0f)); - - PA_CLIP_( samp, -0x8000, 0x7FFF ); - *dest = (PaInt16) samp; + float scaled = *src * (32767.0f); + PA_CLIP_( scaled, -0x8000, 0x7FFF ); + *dest = (PaInt16)truncFloatToInt32(scaled); src += sourceStride; dest += destinationStride; } + + resetRoundingModeFloat(prevMode); } /* -------------------------------------------------------------------------- */ @@ -638,21 +793,25 @@ static void Float32_To_Int16_DitherClip( { float *src = (float*)sourceBuffer; PaInt16 *dest = (PaInt16*)destinationBuffer; + int prevMode = setTruncRoundingModeFloat(); + (void)ditherGenerator; /* unused parameter */ while( count-- ) { - float dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); + float dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); /* use smaller scaler to prevent overflow when we add the dither */ float dithered = (*src * (32766.0f)) + dither; - PaInt32 samp = (PaInt32) dithered; + PaInt32 samp = truncFloatToInt32(dithered); PA_CLIP_( samp, -0x8000, 0x7FFF ); *dest = (PaInt16) samp; src += sourceStride; dest += destinationStride; } + + resetRoundingModeFloat(prevMode); } /* -------------------------------------------------------------------------- */ @@ -664,16 +823,20 @@ static void Float32_To_Int8( { float *src = (float*)sourceBuffer; signed char *dest = (signed char*)destinationBuffer; + int prevMode = setTruncRoundingModeFloat(); + (void)ditherGenerator; /* unused parameter */ while( count-- ) { - signed char samp = (signed char) (*src * (127.0f)); - *dest = samp; + float scaled = *src * (127.0f); + *dest = (signed char)truncFloatToInt32(scaled); src += sourceStride; dest += destinationStride; } + + resetRoundingModeFloat(prevMode); } /* -------------------------------------------------------------------------- */ @@ -685,18 +848,20 @@ static void Float32_To_Int8_Dither( { float *src = (float*)sourceBuffer; signed char *dest = (signed char*)destinationBuffer; + int prevMode = setTruncRoundingModeFloat(); while( count-- ) { float dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); /* use smaller scaler to prevent overflow when we add the dither */ float dithered = (*src * (126.0f)) + dither; - PaInt32 samp = (PaInt32) dithered; - *dest = (signed char) samp; + *dest = (signed char)truncFloatToInt32(dithered); src += sourceStride; dest += destinationStride; } + + resetRoundingModeFloat(prevMode); } /* -------------------------------------------------------------------------- */ @@ -708,17 +873,22 @@ static void Float32_To_Int8_Clip( { float *src = (float*)sourceBuffer; signed char *dest = (signed char*)destinationBuffer; + int prevMode = setTruncRoundingModeFloat(); + (void)ditherGenerator; /* unused parameter */ while( count-- ) { - PaInt32 samp = (PaInt32)(*src * (127.0f)); + float scaled = *src * (127.0f); + PaInt32 samp = truncFloatToInt32(scaled); PA_CLIP_( samp, -0x80, 0x7F ); *dest = (signed char) samp; src += sourceStride; dest += destinationStride; } + + resetRoundingModeFloat(prevMode); } /* -------------------------------------------------------------------------- */ @@ -730,6 +900,8 @@ static void Float32_To_Int8_DitherClip( { float *src = (float*)sourceBuffer; signed char *dest = (signed char*)destinationBuffer; + int prevMode = setTruncRoundingModeFloat(); + (void)ditherGenerator; /* unused parameter */ while( count-- ) @@ -737,13 +909,15 @@ static void Float32_To_Int8_DitherClip( float dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); /* use smaller scaler to prevent overflow when we add the dither */ float dithered = (*src * (126.0f)) + dither; - PaInt32 samp = (PaInt32) dithered; + PaInt32 samp = truncFloatToInt32(dithered); PA_CLIP_( samp, -0x80, 0x7F ); *dest = (signed char) samp; src += sourceStride; dest += destinationStride; } + + resetRoundingModeFloat(prevMode); } /* -------------------------------------------------------------------------- */ @@ -755,16 +929,20 @@ static void Float32_To_UInt8( { float *src = (float*)sourceBuffer; unsigned char *dest = (unsigned char*)destinationBuffer; + int prevMode = setTruncRoundingModeFloat(); + (void)ditherGenerator; /* unused parameter */ while( count-- ) { - unsigned char samp = (unsigned char)(128 + ((unsigned char) (*src * (127.0f)))); + unsigned char samp = (unsigned char)(128 + ((unsigned char)truncFloatToInt32(*src * (127.0f)))); *dest = samp; src += sourceStride; dest += destinationStride; } + + resetRoundingModeFloat(prevMode); } /* -------------------------------------------------------------------------- */ @@ -776,18 +954,21 @@ static void Float32_To_UInt8_Dither( { float *src = (float*)sourceBuffer; unsigned char *dest = (unsigned char*)destinationBuffer; + int prevMode = setTruncRoundingModeFloat(); while( count-- ) { - float dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); + float dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); /* use smaller scaler to prevent overflow when we add the dither */ float dithered = (*src * (126.0f)) + dither; - PaInt32 samp = (PaInt32) dithered; + PaInt32 samp = truncFloatToInt32(dithered); *dest = (unsigned char) (128 + samp); src += sourceStride; dest += destinationStride; } + + resetRoundingModeFloat(prevMode); } /* -------------------------------------------------------------------------- */ @@ -799,17 +980,21 @@ static void Float32_To_UInt8_Clip( { float *src = (float*)sourceBuffer; unsigned char *dest = (unsigned char*)destinationBuffer; + int prevMode = setTruncRoundingModeFloat(); + (void)ditherGenerator; /* unused parameter */ while( count-- ) { - PaInt32 samp = 128 + (PaInt32)(*src * (127.0f)); - PA_CLIP_( samp, 0x0000, 0x00FF ); + PaInt32 samp = 128 + truncFloatToInt32(*src * (127.0f)); + PA_CLIP_( samp, 0x00, 0xFF ); *dest = (unsigned char) samp; src += sourceStride; dest += destinationStride; } + + resetRoundingModeFloat(prevMode); } /* -------------------------------------------------------------------------- */ @@ -821,6 +1006,8 @@ static void Float32_To_UInt8_DitherClip( { float *src = (float*)sourceBuffer; unsigned char *dest = (unsigned char*)destinationBuffer; + int prevMode = setTruncRoundingModeFloat(); + (void)ditherGenerator; /* unused parameter */ while( count-- ) @@ -828,13 +1015,15 @@ static void Float32_To_UInt8_DitherClip( float dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator ); /* use smaller scaler to prevent overflow when we add the dither */ float dithered = (*src * (126.0f)) + dither; - PaInt32 samp = 128 + (PaInt32) dithered; - PA_CLIP_( samp, 0x0000, 0x00FF ); + PaInt32 samp = 128 + truncFloatToInt32(dithered); + PA_CLIP_( samp, 0x00, 0xFF ); *dest = (unsigned char) samp; src += sourceStride; dest += destinationStride; } + + resetRoundingModeFloat(prevMode); } /* -------------------------------------------------------------------------- */