Update tolerance of provider tests to fix flaky tests (microsoft#19792)

### Description Check float/double/float16/bfloat16 tensors are close like [numpy.isclose](https://numpy.org/doc/stable/reference/generated/numpy.isclose.html). ``` absolute(a - b) <= (atol + rtol * absolute(b)) ``` The default tolerance thresholds: - float: atol=1e-5 and rtol=1e-4 - float16: atol=0.0025 and rtol=0.001 - bfloat16: atol=0.02 and rtol=0.01 ### Motivation and Context Current pipeline has frequent failure due to using only relative tolerance in microsoft#19608: [ RUN ] MatMulIntegerToFloat.NoZeroPoint_NoBias_test_U8S8 1: C:\a\_work\1\s\onnxruntime\test\providers\checkers.cc(272): error: The difference between cur_expected[i] and cur_actual[i] is 1.3113021850585938e-06, which exceeds *(params.relative_error) * std::abs(cur_expected[i]), where 1: cur_expected[i] evaluates to -1.3113021850585938e-06, 1: cur_actual[i] evaluates to 0, and 1: *(params.relative_error) * std::abs(cur_expected[i]) evaluates to 2.6226043559063328e-08. It is not reasonable to use relative tolerance for a small value very close to 0. Combining relative tolerance with a positive absolute tolerance could avoid such issue.
ROCm · Mar 7, 2024 · bff4f8b · bff4f8b
1 parent 5c5d6e9
commit bff4f8b
Show file tree

Hide file tree

Showing 2 changed files with 83 additions and 77 deletions.
diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc
@@ -127,6 +127,7 @@ void TestMatMulIntegerToFloat(bool is_matrix_b_constant,
 
   if (std::is_same_v<OType, float>) {
     test.AddOutput<float>("Y", {M, N}, Y_data);
+    test.SetOutputAbsErr("Y", 0.0001f);
     test.SetOutputRelErr("Y", 0.02f);
   } else {
     test.AddOutput<MLFloat16>("Y", {M, N}, ToFloat16(Y_data));

diff --git a/onnxruntime/test/providers/checkers.cc b/onnxruntime/test/providers/checkers.cc
@@ -14,6 +14,54 @@
 namespace onnxruntime {
 namespace test {
 namespace {
+
+template <typename T>
+struct DefaultTolerance;
+
+template <>
+struct DefaultTolerance<double> {
+  static constexpr float absolute = 1e-6f;
+  static constexpr float relative = 1e-5f;
+};
+
+template <>
+struct DefaultTolerance<float> {
+  static constexpr float absolute = 1e-5f;
+  static constexpr float relative = 1e-4f;
+};
+
+template <>
+struct DefaultTolerance<MLFloat16> {
+  // The thresholds are estimated with PyTorch script like the following:
+  //    x = torch.rand(1000, 1000)
+  //    absolute = ((x + 1e-6).to(torch.float16) - x).abs().max() * 10
+  //    x[abs(x) < absolute] = absolute
+  //    relative = ((x - x.to(torch.float16)) / x).abs().max() * 2
+  static constexpr float absolute = 0.0025f;
+  static constexpr float relative = 0.001f;
+};
+
+template <>
+struct DefaultTolerance<BFloat16> {
+  static constexpr float absolute = 0.02f;
+  static constexpr float relative = 0.01f;
+};
+
+template <typename T>
+T get_tolerance(float absolute, float relative, T expected_value) {
+  static_assert(std::is_floating_point<T>::value, "T must be a floating point type");
+
+  // The formula is similar to numpy.isclose: https://numpy.org/doc/stable/reference/generated/numpy.isclose.html
+  return static_cast<T>(absolute) + static_cast<T>(relative) * std::abs(expected_value);
+}
+
+template <typename T, typename D>  // D is the original data type
+T get_tolerance(const ValidateOutputParams& params, T expected_value) {
+  float absolute = (params.absolute_error.has_value() ? *(params.absolute_error) : DefaultTolerance<D>::absolute);
+  float relative = (params.relative_error.has_value() ? *(params.relative_error) : DefaultTolerance<D>::relative);
+  return get_tolerance<T>(absolute, relative, expected_value);
+}
+
 template <typename T>
 Tensor copy_sort(const Tensor& src, const AllocatorPtr& allocator) {
   Tensor result(src.DataType(), src.Shape(), allocator);
@@ -67,7 +115,7 @@ struct TensorCheck {
       cur_actual = actual.Data<T>();
     }
 
-    for (int i = 0; i < size; ++i) {
+    for (int64_t i = 0; i < size; ++i) {
       EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i;
     }
   }
@@ -111,7 +159,7 @@ struct TensorCheck<uint8_t> {
       double threshold = has_abs_err ? *(params.absolute_error)
                                      : 0.0;
 
-      for (int i = 0; i < size; ++i) {
+      for (int64_t i = 0; i < size; ++i) {
         if (has_rel_err) {
           EXPECT_NEAR(cur_expected[i], cur_actual[i],
                       *(params.relative_error) * cur_expected[i])  // expected[i] is unsigned, can't be negative
@@ -121,7 +169,7 @@ struct TensorCheck<uint8_t> {
         }
       }
     } else {
-      for (int i = 0; i < size; ++i) {
+      for (int64_t i = 0; i < size; ++i) {
         EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i;
       }
     }
@@ -157,11 +205,11 @@ struct TensorCheck<int8_t> {
     if (has_abs_err) {
       double threshold = *(params.absolute_error);
 
-      for (int i = 0; i < size; ++i) {
+      for (int64_t i = 0; i < size; ++i) {
         EXPECT_NEAR(cur_expected[i], cur_actual[i], threshold) << "i:" << i;
       }
     } else {
-      for (int i = 0; i < size; ++i) {
+      for (int64_t i = 0; i < size; ++i) {
         EXPECT_EQ(cur_expected[i], cur_actual[i]) << "i:" << i;
       }
     }
@@ -176,8 +224,7 @@ struct TensorCheck<double> {
                   const std::string& /*provider_type*/) const {
     auto size = actual.Shape().Size();
 
-    bool has_abs_err = params.absolute_error.has_value();
-    bool has_rel_err = params.relative_error.has_value();
+    const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value();
 
     // deal with rare cases in which order of output data from a kernel MAY be
     // undefined
@@ -198,52 +245,41 @@ struct TensorCheck<double> {
     threshold = 0.005;
 #endif
 
-    for (int i = 0; i < size; ++i) {
+    for (int64_t i = 0; i < size; ++i) {
       // NOTE: Check isnan first to work around MSVC linker bug when /LTCG:incremental is specified.
       // If the isinf check is first the isnan check and branch gets omitted
       if (std::isnan(cur_expected[i])) {
         EXPECT_TRUE(std::isnan(cur_actual[i])) << "Expected NaN. i:" << i;
       } else if (std::isinf(cur_expected[i])) {  // Test infinity for equality
         EXPECT_EQ(cur_expected[i], cur_actual[i]) << "Expected infinity. i:" << i;
       } else {
-        if (!has_abs_err && !has_rel_err) {
-          // the default for existing tests
-          EXPECT_NEAR(cur_expected[i], cur_actual[i], threshold) << "i:" << i;
-        } else {
-          if (has_abs_err) {
-            EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.absolute_error)) << "i:" << i;
-          }
-          if (has_rel_err) {
-            EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.relative_error) * std::abs(cur_expected[i]))
-                << "i:" << i;
-          }
-        }
+        double tolerance = has_tolerance ? get_tolerance<double, double>(params, cur_expected[i]) : threshold;
+        EXPECT_NEAR(cur_expected[i], cur_actual[i], tolerance) << "i:" << i;
       }
     }
   }
 };
 
-template <typename TypeToCheck>
+template <typename T>
 void InternalNumericalCheck(const Tensor& expected,
                             const Tensor& actual,
                             const ValidateOutputParams& params,
                             const std::string& /*provider_type*/) {
-  const bool has_abs_err = params.absolute_error.has_value();
-  const bool has_rel_err = params.relative_error.has_value();
+  const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value();
 
   // deal with rare cases in which order of output data from a kernel MAY be
   // undefined
   Tensor expected_sorted, actual_sorted;
-  const TypeToCheck* cur_expected;
-  const TypeToCheck* cur_actual;
+  const T* cur_expected;
+  const T* cur_actual;
   auto size = actual.Shape().Size();
   if (params.sort_output) {
-    sort_expected_and_actual_buffers<TypeToCheck>(expected, expected_sorted, actual, actual_sorted);
-    cur_expected = expected_sorted.Data<TypeToCheck>();
-    cur_actual = actual_sorted.Data<TypeToCheck>();
+    sort_expected_and_actual_buffers<T>(expected, expected_sorted, actual, actual_sorted);
+    cur_expected = expected_sorted.Data<T>();
+    cur_actual = actual_sorted.Data<T>();
   } else {
-    cur_expected = expected.Data<TypeToCheck>();
-    cur_actual = actual.Data<TypeToCheck>();
+    cur_expected = expected.Data<T>();
+    cur_actual = actual.Data<T>();
   }
 
 #if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML)
@@ -252,27 +288,16 @@ void InternalNumericalCheck(const Tensor& expected,
   constexpr float threshold = 0.0001f;
 #endif
 
-  for (int i = 0; i < size; ++i) {
+  for (int64_t i = 0; i < size; ++i) {
     // NOTE: Check isnan first to work around MSVC linker bug when /LTCG:incremental is specified.
     // If the isinf check is first the isnan check and branch gets omitted
     if (std::isnan(cur_expected[i])) {
       EXPECT_TRUE(std::isnan(cur_actual[i])) << "Expected NaN. i:" << i;
     } else if (std::isinf(cur_expected[i])) {  // Test infinity for equality
       EXPECT_EQ(cur_expected[i], cur_actual[i]) << "Expected infinity. i:" << i;
     } else {
-      if (!has_abs_err && !has_rel_err) {
-        // the default for existing tests
-        EXPECT_NEAR(cur_expected[i], cur_actual[i], threshold) << "i:" << i;
-      } else {
-        if (has_abs_err) {
-          EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.absolute_error))
-              << "i:" << i;
-        }
-        if (has_rel_err) {
-          EXPECT_NEAR(cur_expected[i], cur_actual[i], *(params.relative_error) * std::abs(cur_expected[i]))
-              << "i:" << i;
-        }
-      }
+      T tolerance = has_tolerance ? get_tolerance<T, T>(params, cur_expected[i]) : threshold;
+      EXPECT_NEAR(cur_expected[i], cur_actual[i], tolerance) << "i:" << i;
     }
   }
 }
@@ -308,34 +333,22 @@ struct TensorCheck<MLFloat16> {
       sort_expected_and_actual_buffers<float>(f_expected, f_actual);
     }
 
-    const bool has_abs_err = params.absolute_error.has_value();
-    const bool has_rel_err = params.relative_error.has_value();
+    const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value();
 
     float threshold = 0.001f;
 #if defined(USE_TENSORRT) || defined(ENABLE_TRAINING_CORE) || defined(USE_CUDA) || defined(USE_ROCM)
     threshold = 0.005f;
 #elif defined(USE_DML)
     threshold = 0.02f;
 #endif
-    for (int i = 0; i < size; ++i) {
+    for (int64_t i = 0; i < size; ++i) {
       if (std::isnan(f_expected[i])) {
         EXPECT_TRUE(std::isnan(f_expected[i])) << "Expected NaN. i:" << i;
       } else if (std::isinf(f_expected[i])) {  // Test infinity for equality
         EXPECT_EQ(f_expected[i], f_actual[i]) << "Expected infinity. i:" << i;
       } else {
-        if (!has_abs_err && !has_rel_err) {
-          // the default for existing tests
-          EXPECT_NEAR(f_expected[i], f_actual[i], threshold) << "i:" << i;
-        } else {
-          if (has_abs_err) {
-            EXPECT_NEAR(f_expected[i], f_actual[i], *(params.absolute_error))
-                << "i:" << i;
-          }
-          if (has_rel_err) {
-            EXPECT_NEAR(f_expected[i], f_actual[i], *(params.relative_error) * std::abs(static_cast<float>(cur_expected[i])))
-                << "i:" << i;
-          }
-        }
+        float tolerance = has_tolerance ? get_tolerance<float, MLFloat16>(params, f_expected[i]) : threshold;
+        EXPECT_NEAR(f_expected[i], f_actual[i], tolerance) << "i:" << i;
       }
     }
   }
@@ -362,32 +375,24 @@ struct TensorCheck<BFloat16> {
       sort_expected_and_actual_buffers<float>(f_expected, f_actual);
     }
 
-    /// XXX: May need to adjust threshold as BFloat is coarse
+    const bool has_tolerance = params.absolute_error.has_value() || params.relative_error.has_value();
+
     float abs_threshold = 0.0001f;
-    float threshold = 0.001f;
+    float rel_threshold = 0.001f;
 #if defined(USE_TENSORRT) || defined(ENABLE_TRAINING_CORE) || defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML) || defined(USE_DNNL)
-    threshold = 0.05f;  // expect at least 95% close
+    rel_threshold = 0.05f;  // expect at least 95% close
 #endif
 
-    for (int i = 0; i < size; ++i) {
+    for (int64_t i = 0; i < size; ++i) {
       if (std::isnan(f_expected[i])) {
         EXPECT_TRUE(std::isnan(f_expected[i])) << "Expected NaN. i:" << i;
       } else if (std::isinf(f_expected[i])) {  // Test infinity for equality
         EXPECT_EQ(f_expected[i], f_actual[i]) << "Expected infinity. i:" << i;
       } else {
-        // the default for existing tests
-        const float max_value = fmax(fabs(f_expected[i]), fabs(f_actual[i]));
-        if (max_value != 0) {  // max_value = 0 means output and expected are 0s.
-          const float abs_error = fabs(f_expected[i] - f_actual[i]);
-          if (abs_error <= abs_threshold) {
-            // if the absolute error is small enough, then no need to calculate realative error
-            EXPECT_NEAR(0, abs_error, abs_threshold);
-          } else {
-            // default for existing tests.
-            const float rel_error = abs_error / max_value;
-            EXPECT_NEAR(0, rel_error, threshold);
-          }
-        }
+        float tolerance = has_tolerance
+                              ? get_tolerance<float, BFloat16>(params, f_expected[i])
+                              : get_tolerance<float>(abs_threshold, rel_threshold, f_expected[i]);
+        EXPECT_NEAR(f_expected[i], f_actual[i], tolerance) << "i:" << i;
       }
     }
   }