[feat] Add math eval to CI (sgl-project#2652)

XiaotongJiang · Jan 3, 2025 · 15fdc6e · 15fdc6e
1 parent af78c6c
commit 15fdc6e
Show file tree

Hide file tree

Showing 2 changed files with 23 additions and 0 deletions.
diff --git a/test/srt/test_eval_accuracy_large.py b/test/srt/test_eval_accuracy_large.py
@@ -68,6 +68,17 @@ def test_mgsm_en(self):
         metrics = run_eval(args)
         self.assertGreater(metrics["score"], 0.835)
 
+    def test_math(self):
+        args = SimpleNamespace(
+            base_url=self.base_url,
+            model=self.model,
+            eval_name="math",
+            num_examples=5000,
+            num_threads=1024
+        )
+
+        metrics = run_eval(args)
+        self.assertGreaterEqual(metrics["score"], 0.519 - 0.01) # -1% to account for sampling variance
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/srt/test_eval_accuracy_mini.py b/test/srt/test_eval_accuracy_mini.py
@@ -37,6 +37,18 @@ def test_mmlu(self):
         metrics = run_eval(args)
         self.assertGreaterEqual(metrics["score"], 0.65)
 
+    def test_math(self):
+        args = SimpleNamespace(
+            base_url=self.base_url,
+            model=self.model,
+            eval_name="math",
+            num_examples=64,
+            num_threads=32,
+            temperature=0.1,
+        )
 
+        metrics = run_eval(args)
+        self.assertGreaterEqual(metrics["score"], 0.519 - 0.03) # -3% to account for sampling variance
+
 if __name__ == "__main__":
     unittest.main()