From 57a17aa287bdadceca2bb2b5761c9b82b9829cf0 Mon Sep 17 00:00:00 2001
From: Yunho Huh <yunho@google.com>
Date: Wed, 11 Dec 2024 23:17:43 +0000
Subject: [PATCH] Implement fixed-point normalized atan and atan2p functions.

Change-Id: I12463cdafb44e6bf9a66502a464187e7217e839a
Signed-off-by: Jean-Marc Valin <jeanmarcv@google.com>
---
 celt/mathops.h                 | 66 ++++++++++++++++++++++++++-
 celt/tests/test_unit_mathops.c | 81 ++++++++++++++++++++++++++++++++++
 celt/vq.c                      |  6 +--
 3 files changed, 148 insertions(+), 5 deletions(-)

diff --git a/celt/mathops.h b/celt/mathops.h
index cc1569edd..7cff0e515 100644
--- a/celt/mathops.h
+++ b/celt/mathops.h
@@ -128,7 +128,7 @@ static OPUS_INLINE opus_val32 celt_maxabs32(const opus_val32 *x, int len)
 #endif
 #endif
 
-#if !defined(FIXED_POINT) || defined(ENABLE_QEXT)
+#ifndef FIXED_POINT
 /* Calculates the arctangent of x using a Remez approximation of order 15,
  * incorporating only odd-powered terms. */
 static OPUS_INLINE float celt_atan_norm(float x)
@@ -176,7 +176,9 @@ static OPUS_INLINE float celt_atan2p_norm(float y, float x)
       return 1.f - celt_atan_norm(x / y);
    }
 }
+#endif
 
+#if !defined(FIXED_POINT) || defined(ENABLE_QEXT)
 /* Computes estimated cosine values for (PI/2 * x) using only terms with even
  * exponents. */
 static OPUS_INLINE float celt_cos_norm2(float x)
@@ -521,6 +523,68 @@ opus_val32 celt_rcp(opus_val32 x);
 opus_val32 frac_div32_q29(opus_val32 a, opus_val32 b);
 opus_val32 frac_div32(opus_val32 a, opus_val32 b);
 
+/* Computes atan(x) multiplied by 2/PI. The input value (x) should be within the
+ * range of -1 to 1 and represented in Q30 format. The function will return the
+ * result in Q30 format. */
+static OPUS_INLINE opus_val32 celt_atan_norm(opus_val32 x)
+{
+   /* Approximation constants. */
+   static const opus_int32 ATAN_2_OVER_PI = 1367130551;   /* Q31 */
+   static const opus_int32 ATAN_COEFF_A03 = -715791936;   /* Q31 */
+   static const opus_int32 ATAN_COEFF_A05 = 857391616;    /* Q32 */
+   static const opus_int32 ATAN_COEFF_A07 = -1200579328;  /* Q33 */
+   static const opus_int32 ATAN_COEFF_A09 = 1682636672;   /* Q34 */
+   static const opus_int32 ATAN_COEFF_A11 = -1985085440;  /* Q35 */
+   static const opus_int32 ATAN_COEFF_A13 = 1583306112;   /* Q36 */
+   static const opus_int32 ATAN_COEFF_A15 = -598602432;   /* Q37 */
+   opus_int32 x_sq_q30;
+   opus_int32 x_q31;
+   opus_int32 tmp;
+   /* The expected x is in the range of [-1.0f, 1.0f] */
+   celt_sig_assert((x <= 1073741824) && (x >= -1073741824));
+
+   /* If x = 1.0f, returns 0.5f */
+   if (x == 1073741824)
+   {
+      return 536870912; /* 0.5f (Q30) */
+   }
+   /* If x = 1.0f, returns 0.5f */
+   if (x == -1073741824)
+   {
+      return -536870912; /* -0.5f (Q30) */
+   }
+   x_q31 = SHL32(x, 1);
+   x_sq_q30 = MULT32_32_Q31(x_q31, x);
+   /* Split evaluation in steps to avoid exploding macro expansion. */
+   tmp = MULT32_32_Q31(x_sq_q30, ATAN_COEFF_A15);
+   tmp = MULT32_32_Q31(x_sq_q30, ADD32(ATAN_COEFF_A13, tmp));
+   tmp = MULT32_32_Q31(x_sq_q30, ADD32(ATAN_COEFF_A11, tmp));
+   tmp = MULT32_32_Q31(x_sq_q30, ADD32(ATAN_COEFF_A09, tmp));
+   tmp = MULT32_32_Q31(x_sq_q30, ADD32(ATAN_COEFF_A07, tmp));
+   tmp = MULT32_32_Q31(x_sq_q30, ADD32(ATAN_COEFF_A05, tmp));
+   tmp = MULT32_32_Q31(x_sq_q30, ADD32(ATAN_COEFF_A03, tmp));
+   tmp = ADD32(x, MULT32_32_Q31(x_q31, tmp));
+   return MULT32_32_Q31(ATAN_2_OVER_PI, tmp);
+}
+
+/* Calculates the arctangent of y/x, multiplies the result by 2/pi, and returns
+ * the value in Q30 format. Both input values (x and y) must be within the range
+ * of 0 to 1 and represented in Q30 format. Inputs must be zero or greater, and
+ * at least one input must be non-zero. */
+static OPUS_INLINE opus_val32 celt_atan2p_norm(opus_val32 y, opus_val32 x)
+{
+   celt_sig_assert(x>=0 && y>=0);
+   if (y==0 && x==0) {
+      return 0;
+   } else if (y < x) {
+      return celt_atan_norm(SHR32(frac_div32(y, x), 1));
+   } else {
+      celt_sig_assert(y > 0);
+      return 1073741824 /* 1.0f Q30 */ -
+             celt_atan_norm(SHR32(frac_div32(x, y), 1));
+   }
+}
+
 #define M1 32767
 #define M2 -21
 #define M3 -11943
diff --git a/celt/tests/test_unit_mathops.c b/celt/tests/test_unit_mathops.c
index 515eced9a..063818025 100644
--- a/celt/tests/test_unit_mathops.c
+++ b/celt/tests/test_unit_mathops.c
@@ -439,6 +439,86 @@ void testrsqrt(void)
    }
    fprintf (stdout, "celt_rsqrt_norm32 max_error: %.7e\n", max_error);
 }
+
+void testatan_norm(void)
+{
+#if defined(ENABLE_QEXT)
+   float error = -1;
+   float max_error = -2;
+   float error_threshold = 5.97e-08;
+   float fx = 0;
+   opus_int32 x = 0;
+   int q_input = 30;
+   int q_output = 30;
+   #define ATAN2_2_OVER_PI 0.636619772367581f
+   for (fx = -1.0f; fx <= 1.0f; fx += 0.007f)
+   {
+      x = DOUBLE_TO_FIX_INT(fx, q_input);
+      error = fabs(atan(FIX_INT_TO_DOUBLE(x, q_input)) * ATAN2_2_OVER_PI -
+                   FIX_INT_TO_DOUBLE(celt_atan_norm(x), q_output));
+      if (error > max_error)
+      {
+         max_error = error;
+      }
+      if (error > error_threshold)
+      {
+         fprintf(stderr,
+                 "celt_atan_norm failed: error: [%.5e > %.5e] (x = %f)\n",
+                 error, error_threshold, FIX_INT_TO_DOUBLE(x, DB_SHIFT));
+         ret = 1;
+      }
+   }
+   fprintf(stdout, "celt_atan_norm max_error: %.7e\n", max_error);
+#endif /* defined(ENABLE_QEXT) */
+}
+
+void testatan2p_norm(void)
+{
+#if defined(ENABLE_QEXT)
+   float error = -1;
+   float max_error = -2;
+   float error_threshold = 1.2e-07;
+   float fx = 0;
+   float fy = 0;
+   opus_int32 x = 0;
+   opus_int32 y = 0;
+   int q_input = 30;
+   int q_output = 30;
+   #define ATAN2_2_OVER_PI 0.636619772367581f
+   for (fx = 0.0f; fx <= 1.0f; fx += 0.007f)
+   {
+      x = DOUBLE_TO_FIX_INT(fx, q_input);
+      for (fy = 0.0f; fy <= 1.0f; fy += 0.007f)
+      {
+         y = DOUBLE_TO_FIX_INT(fy, q_input);
+         if (x == 0 && x == 0)
+            continue;
+
+         error = fabs(atan2(FIX_INT_TO_DOUBLE(y, q_input),
+                            FIX_INT_TO_DOUBLE(x, q_input)) * ATAN2_2_OVER_PI -
+                      FIX_INT_TO_DOUBLE(celt_atan2p_norm(y, x), q_output));
+         if (error > max_error)
+         {
+            max_error = error;
+         }
+         if (error > error_threshold)
+         {
+            fprintf(stderr,
+                  "celt_atan2p_norm failed: error: [%.5e > %.5e] (x = %f)\n",
+                  error, error_threshold, FIX_INT_TO_DOUBLE(x, DB_SHIFT));
+            ret = 1;
+         }
+      }
+   }
+   fprintf(stdout, "celt_atan2p_norm max_error: %.7e\n", max_error);
+#endif /* defined(ENABLE_QEXT) */
+}
+
+void testatan(void)
+{
+   testatan_norm();
+   testatan2p_norm();
+}
 #endif
 
 int main(void)
@@ -455,6 +535,7 @@ int main(void)
    testlog2_db();
    testexp2_db();
    testrsqrt();
+   testatan();
 #else
    test_cos();
    test_atan2();
diff --git a/celt/vq.c b/celt/vq.c
index 9bdde62b4..b261e523d 100644
--- a/celt/vq.c
+++ b/celt/vq.c
@@ -693,10 +693,8 @@ opus_int32 stereo_itheta(const celt_norm *X, const celt_norm *Y, int stereo, int
    }
    mid = celt_sqrt(Emid);
    side = celt_sqrt(Eside);
-   /* FIXME: Add a fixed-point version for ENABLE_QEXT*/
-#if defined(FIXED_POINT) && !defined(ENABLE_QEXT)
-   /* 0.63662 = 2/pi */
-   itheta = MULT16_16(QCONST16(0.63662f,15),celt_atan2p(side, mid))<<1;
+#if defined(FIXED_POINT)
+   itheta = celt_atan2p_norm(side, mid);
 #else
    itheta = (int)floor(.5f+65536.f*16384*celt_atan2p_norm(side,mid));
 #endif