Skip to content

Commit 49e5b8e

Browse files
sjarmakclaude
andcommitted
fix: remediate verifier quality issues to eliminate all extension_only tasks
Fixes /tmp path collisions in 16 verifiers (onboard-search + 2 others) by switching to mktemp in /logs/verifier/. Improves abc_audit T.9 heuristic to recognize content validation inside heredocs (json.loads, re.search, python3 <<). Results: 52 extension_only → 0. Manifest now hits all suite allocation targets including previously shortfalled understand (8/8) and document (4/4). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 4ec304c commit 49e5b8e

File tree

20 files changed

+1253
-1031
lines changed

20 files changed

+1253
-1031
lines changed

benchmarks/csb_org_onboarding/ccx-onboard-search-201/tests/test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ if [ ! -f "$SOLUTION_FILE" ]; then
3030
exit 0
3131
fi
3232

33-
VERIFY_SCRIPT=$(mktemp /tmp/verify_XXXXXX.py)
33+
VERIFY_SCRIPT=$(mktemp /logs/verifier/verify_XXXXXX.py)
3434
cat > "$VERIFY_SCRIPT" << 'PYEOF'
3535
import json, sys, re
3636
sys.path.insert(0, "/tests")

benchmarks/csb_org_onboarding/ccx-onboard-search-202/tests/test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ if [ ! -f "$SOLUTION_FILE" ]; then
3030
exit 0
3131
fi
3232

33-
VERIFY_SCRIPT=$(mktemp /tmp/verify_XXXXXX.py)
33+
VERIFY_SCRIPT=$(mktemp /logs/verifier/verify_XXXXXX.py)
3434
cat > "$VERIFY_SCRIPT" << 'PYEOF'
3535
import json, sys, re
3636
sys.path.insert(0, "/tests")

benchmarks/csb_org_onboarding/ccx-onboard-search-203/tests/test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ if [ ! -f "$SOLUTION_FILE" ]; then
3030
exit 0
3131
fi
3232

33-
VERIFY_SCRIPT=$(mktemp /tmp/verify_XXXXXX.py)
33+
VERIFY_SCRIPT=$(mktemp /logs/verifier/verify_XXXXXX.py)
3434
cat > "$VERIFY_SCRIPT" << 'PYEOF'
3535
import json, sys, re
3636
sys.path.insert(0, "/tests")

benchmarks/csb_org_onboarding/ccx-onboard-search-204/tests/test.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ if [ ! -f "$SOLUTION_FILE" ]; then
3030
exit 0
3131
fi
3232

33-
cat > /tmp/verify.py << 'PYEOF'
33+
VERIFY_SCRIPT="$(mktemp /logs/verifier/verify_XXXXXX.py)"
34+
cat > "$VERIFY_SCRIPT" << 'PYEOF'
3435
import json, sys, re
3536
sys.path.insert(0, "/tests")
3637
from verifiers import SemanticRetrievalQAVerifier
@@ -68,5 +69,6 @@ except Exception as e:
6869
f.write("0.0")
6970
PYEOF
7071

71-
python3 /tmp/verify.py 2>&1 | tee /logs/verifier/verify-debug.log
72+
python3 "$VERIFY_SCRIPT" 2>&1 | tee /logs/verifier/verify-debug.log
7273
exit 0
74+
rm -f "$VERIFY_SCRIPT"

benchmarks/csb_org_onboarding/ccx-onboard-search-205/tests/test.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ if [ ! -f "$SOLUTION_FILE" ]; then
3030
exit 0
3131
fi
3232

33-
cat > /tmp/verify.py << 'PYEOF'
33+
VERIFY_SCRIPT="$(mktemp /logs/verifier/verify_XXXXXX.py)"
34+
cat > "$VERIFY_SCRIPT" << 'PYEOF'
3435
import json, sys, re
3536
sys.path.insert(0, "/tests")
3637
from verifiers import SemanticRetrievalQAVerifier
@@ -68,5 +69,6 @@ except Exception as e:
6869
f.write("0.0")
6970
PYEOF
7071

71-
python3 /tmp/verify.py 2>&1 | tee /logs/verifier/verify-debug.log
72+
python3 "$VERIFY_SCRIPT" 2>&1 | tee /logs/verifier/verify-debug.log
7273
exit 0
74+
rm -f "$VERIFY_SCRIPT"

benchmarks/csb_org_onboarding/ccx-onboard-search-206/tests/test.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ if [ ! -f "$SOLUTION_FILE" ]; then
3030
exit 0
3131
fi
3232

33-
cat > /tmp/verify.py << 'PYEOF'
33+
VERIFY_SCRIPT="$(mktemp /logs/verifier/verify_XXXXXX.py)"
34+
cat > "$VERIFY_SCRIPT" << 'PYEOF'
3435
import json, sys, re
3536
sys.path.insert(0, "/tests")
3637
from verifiers import SemanticRetrievalQAVerifier
@@ -68,5 +69,6 @@ except Exception as e:
6869
f.write("0.0")
6970
PYEOF
7071

71-
python3 /tmp/verify.py 2>&1 | tee /logs/verifier/verify-debug.log
72+
python3 "$VERIFY_SCRIPT" 2>&1 | tee /logs/verifier/verify-debug.log
7273
exit 0
74+
rm -f "$VERIFY_SCRIPT"

benchmarks/csb_org_onboarding/ccx-onboard-search-207/tests/test.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ if [ ! -f "$SOLUTION_FILE" ]; then
2929
exit 0
3030
fi
3131

32-
cat > /tmp/verify.py << 'PYEOF'
32+
VERIFY_SCRIPT="$(mktemp /logs/verifier/verify_XXXXXX.py)"
33+
cat > "$VERIFY_SCRIPT" << 'PYEOF'
3334
import json, sys, re
3435
sys.path.insert(0, "/tests")
3536
from verifiers import SemanticRetrievalQAVerifier
@@ -67,5 +68,6 @@ except Exception as e:
6768
f.write("0.0")
6869
PYEOF
6970

70-
python3 /tmp/verify.py 2>&1 | tee /logs/verifier/verify-debug.log
71+
python3 "$VERIFY_SCRIPT" 2>&1 | tee /logs/verifier/verify-debug.log
7172
exit 0
73+
rm -f "$VERIFY_SCRIPT"

benchmarks/csb_org_onboarding/ccx-onboard-search-208/tests/test.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ if [ ! -f "$SOLUTION_FILE" ]; then
2929
exit 0
3030
fi
3131

32-
cat > /tmp/verify.py << 'PYEOF'
32+
VERIFY_SCRIPT="$(mktemp /logs/verifier/verify_XXXXXX.py)"
33+
cat > "$VERIFY_SCRIPT" << 'PYEOF'
3334
import json, sys, re
3435
sys.path.insert(0, "/tests")
3536
from verifiers import SemanticRetrievalQAVerifier
@@ -67,5 +68,6 @@ except Exception as e:
6768
f.write("0.0")
6869
PYEOF
6970

70-
python3 /tmp/verify.py 2>&1 | tee /logs/verifier/verify-debug.log
71+
python3 "$VERIFY_SCRIPT" 2>&1 | tee /logs/verifier/verify-debug.log
7172
exit 0
73+
rm -f "$VERIFY_SCRIPT"

benchmarks/csb_org_onboarding/ccx-onboard-search-209/tests/test.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ if [ ! -f "$SOLUTION_FILE" ]; then
3030
exit 0
3131
fi
3232

33-
cat > /tmp/verify.py << 'PYEOF'
33+
VERIFY_SCRIPT="$(mktemp /logs/verifier/verify_XXXXXX.py)"
34+
cat > "$VERIFY_SCRIPT" << 'PYEOF'
3435
import json, sys, re
3536
sys.path.insert(0, "/tests")
3637
from verifiers import SemanticRetrievalQAVerifier
@@ -68,5 +69,6 @@ except Exception as e:
6869
f.write("0.0")
6970
PYEOF
7071

71-
python3 /tmp/verify.py 2>&1 | tee /logs/verifier/verify-debug.log
72+
python3 "$VERIFY_SCRIPT" 2>&1 | tee /logs/verifier/verify-debug.log
7273
exit 0
74+
rm -f "$VERIFY_SCRIPT"

benchmarks/csb_org_onboarding/ccx-onboard-search-210/tests/test.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ if [ ! -f "$SOLUTION_FILE" ]; then
3030
exit 0
3131
fi
3232

33-
cat > /tmp/verify.py << 'PYEOF'
33+
VERIFY_SCRIPT="$(mktemp /logs/verifier/verify_XXXXXX.py)"
34+
cat > "$VERIFY_SCRIPT" << 'PYEOF'
3435
import json, sys, re
3536
sys.path.insert(0, "/tests")
3637
from verifiers import SemanticRetrievalQAVerifier
@@ -68,5 +69,6 @@ except Exception as e:
6869
f.write("0.0")
6970
PYEOF
7071

71-
python3 /tmp/verify.py 2>&1 | tee /logs/verifier/verify-debug.log
72+
python3 "$VERIFY_SCRIPT" 2>&1 | tee /logs/verifier/verify-debug.log
7273
exit 0
74+
rm -f "$VERIFY_SCRIPT"

0 commit comments

Comments
 (0)