From 76c10442c32a593f88aa61abb39f074a7f38ded7 Mon Sep 17 00:00:00 2001 From: Stephen Benjamin Date: Mon, 15 Jun 2026 08:34:30 -0400 Subject: [PATCH] Use a different minimal case case-001 is the "hard" case Opus always fails at, case-003 is a better smoke test to make sure things are working. --- .../openshift-eng/ai-helpers/openshift-eng-ai-helpers-main.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci-operator/config/openshift-eng/ai-helpers/openshift-eng-ai-helpers-main.yaml b/ci-operator/config/openshift-eng/ai-helpers/openshift-eng-ai-helpers-main.yaml index 7ebd5c59b6956..c8f5bbd4804d2 100644 --- a/ci-operator/config/openshift-eng/ai-helpers/openshift-eng-ai-helpers-main.yaml +++ b/ci-operator/config/openshift-eng/ai-helpers/openshift-eng-ai-helpers-main.yaml @@ -36,7 +36,7 @@ tests: optional: true steps: env: - EVAL_CASES: case-001 + EVAL_CASES: case-003 EVAL_CONFIG: plugins/ci/evals/eval-payload-analysis.yaml EVAL_MAX_TURNS: "250" EVAL_MODEL: claude-opus-4-6