@@ -63,7 +63,7 @@ def report_eval(evaluator, result: EvalResultWithSummary, verbose, jsonl):
6363 except Exception :
6464 tests_pass = 0
6565 scores [category ] += tests_pass
66- if tests_pass >= 0.999 :
66+ if tests_pass >= 1 :
6767 passed_counts [category ] += 1
6868 total_passed += 1
6969 total_num_tests += 1
@@ -142,14 +142,14 @@ def _write_local_results(result: EvalResultWithSummary):
142142 except :
143143 tests_pass_score = 0.0
144144
145- passed = tests_pass_score >= 0.999
145+ passed = tests_pass_score >= 1
146146
147147 # Determine failure reason from scores
148148 failure_reason = None
149149 if not passed :
150150 if r .scores :
151151 for score_name , score_value in r .scores .items ():
152- if isinstance (score_value , (int , float )) and score_value < 0.999 :
152+ if isinstance (score_value , (int , float )) and score_value < 1 :
153153 if score_name == "Valid filesystem output" :
154154 failure_reason = "filesystem fail"
155155 break
@@ -255,7 +255,7 @@ def file_report_eval(evaluator, result: EvalResultWithSummary, verbose, jsonl):
255255 if r .scores and "Tests pass" in r .scores and isinstance (r .scores ["Tests pass" ], (int , float )):
256256 score_val = float (r .scores ["Tests pass" ]) # already normalized ratio per our scorer
257257 tests_pass_scores [category ] = tests_pass_scores .get (category , 0.0 ) + score_val
258- if score_val >= 0.999 :
258+ if score_val >= 1 :
259259 passed_counts [category ] = passed_counts .get (category , 0 ) + 1
260260 total_passed += 1
261261 total_num_tests += 1
0 commit comments