sourcegraph
diff --git a/‎.github/workflows/docs-consistency.yml‎
Lines changed: 22 additions & 0 deletions b/‎.github/workflows/docs-consistency.yml‎
Lines changed: 22 additions & 0 deletions
diff --git a/‎.github/workflows/roam.yml‎
Lines changed: 21 additions & 0 deletions b/‎.github/workflows/roam.yml‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 5 additions & 3 deletions b/‎README.md‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎agents/harnesses/openhands/agent.py‎
Lines changed: 31 additions & 0 deletions b/‎agents/harnesses/openhands/agent.py‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎configs/eval_matrix.json‎
Lines changed: 0 additions & 16 deletions b/‎configs/eval_matrix.json‎
Lines changed: 0 additions & 16 deletions
diff --git a/‎configs/openhands_2config.sh‎
Lines changed: 11 additions & 1 deletion b/‎configs/openhands_2config.sh‎
Lines changed: 11 additions & 1 deletion
@@ -0,0 +1,22 @@
+name: Docs Consistency
+
+on:
+  pull_request:
+  push:
+    branches:
+      - main
+
+jobs:
+  docs-consistency:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Validate docs references
+        run: python3 scripts/docs_consistency_check.py
@@ -0,0 +1,21 @@
+name: Roam Code Analysis
+on:
+  pull_request:
+    branches: [main, master]
+permissions:
+  contents: read
+  pull-requests: write
+jobs:
+  roam:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+      - run: pip install roam-code
+      - run: roam index
+      - run: roam fitness
+      - run: roam pr-risk --json
@@ -16,11 +16,13 @@ Eight suites organized by software development lifecycle phase:
 | `ccb_design` | Architecture & Design | 20 | Architecture analysis, dependency graphs, change impact |
 | `ccb_fix` | Bug Repair | 25 | Diagnosing and fixing real bugs across production codebases |
 | `ccb_build` | Feature & Refactoring | 25 | New features, refactoring, dependency management |
-| `ccb_test` | Testing & QA | 14 | Code review, performance testing, code search validation |
-| `ccb_document` | Documentation | 13 | API references, architecture docs, migration guides |
+| `ccb_test` | Testing & QA | 20 | Code review, performance testing, code search validation |
+| `ccb_document` | Documentation | 20 | API references, architecture docs, migration guides |
 | `ccb_secure` | Security & Compliance | 20 | CVE analysis, reachability, governance, access control |
 | `ccb_debug` | Debugging & Investigation | 20 | Root cause tracing, fault localization, provenance |
-| **Total** | | **157** | |
+| **Total** | | **170** | |
+
+*ccb_test* and *ccb_document* currently have 14 and 13 tasks on disk (target 20 each); see `docs/backlog_ccb_test.json` and `docs/backlog_ccb_document.json` for the growth plan.
 
 See `docs/PRD_SDLC_SUITE_REORGANIZATION.md` for the reorganization rationale and task mapping.
 
 
@@ -1,9 +1,40 @@
 """OpenHands harness agent wired to Harbor's OpenHands CLI with shared baseline tooling."""
 
+import os
+
+from harbor.agents import utils as harbor_utils
 from harbor.agents.installed.openhands import OpenHands
 
 from ..base import BaselineHarnessMixin
 
+# Codex model names (LiteLLM/Harbor don't know these); we map them to OPENAI_API_KEY
+# so Harbor's get_api_key_var_names_from_model_name can resolve the key.
+_CODEX_MODEL_PREFIXES = ("gpt-5.3-codex", "gpt53codex", "codex")
+
+
+def _get_api_key_var_names_from_model_name(model_name: str) -> list[str]:
+    """Wrap Harbor's resolver so Codex models map to OPENAI_API_KEY or CODEX_API_KEY."""
+    lower = (model_name or "").strip().lower()
+    if lower in _CODEX_MODEL_PREFIXES or (lower and "codex" in lower and "gpt" in lower):
+        if os.environ.get("CODEX_API_KEY"):
+            return ["CODEX_API_KEY"]
+        if os.environ.get("OPENAI_API_KEY"):
+            return ["OPENAI_API_KEY"]
+        # Harbor will raise "Unset API variable"; prefer telling user to set CODEX_API_KEY
+        return ["CODEX_API_KEY"]
+    return _original_get_api_key_var_names(model_name)
+
+
+# Apply once at import so Harbor's OpenHands sees it. Harbor's openhands.py does
+# "from harbor.agents.utils import get_api_key_var_names_from_model_name", so we
+# must patch the openhands module's reference too (utils patch alone is not seen there).
+_original_get_api_key_var_names = harbor_utils.get_api_key_var_names_from_model_name
+harbor_utils.get_api_key_var_names_from_model_name = _get_api_key_var_names_from_model_name
+import sys
+_openhands_mod = sys.modules.get("harbor.agents.installed.openhands")
+if _openhands_mod is not None:
+    _openhands_mod.get_api_key_var_names_from_model_name = _get_api_key_var_names_from_model_name
+
 
 class OpenHandsHarnessAgent(BaselineHarnessMixin, OpenHands):
     """OpenHands CLI agent extended with evaluation context and MCP wiring."""
@@ -6,11 +6,9 @@
   ],
   "supported_configs": [
     "baseline",
-    "sourcegraph_base",
     "sourcegraph_full",
     "sourcegraph_isolated",
     "sg_only_env",
-    "github_base",
     "github_full"
   ],
   "config_definitions": {
@@ -21,13 +19,6 @@
       "track_in_official": true,
       "status": "active"
     },
-    "sourcegraph_base": {
-      "baseline_mcp_type": "sourcegraph_base",
-      "mcp_enabled": true,
-      "provider": "sourcegraph",
-      "track_in_official": true,
-      "status": "legacy_or_targeted"
-    },
     "sourcegraph_full": {
       "baseline_mcp_type": "sourcegraph_full",
       "mcp_enabled": true,
@@ -51,13 +42,6 @@
       "status": "experimental",
       "notes": "Environment-only variant: same agent as sourcegraph_full, Dockerfile.sg_only removes local source"
     },
-    "github_base": {
-      "baseline_mcp_type": "github_base",
-      "mcp_enabled": true,
-      "provider": "github",
-      "track_in_official": false,
-      "status": "experimental_scaffold"
-    },
     "github_full": {
       "baseline_mcp_type": "github_full",
       "mcp_enabled": true,
 
@@ -16,6 +16,7 @@
 #   --parallel N           Max parallel task subshells (default: 1)
 #   --category CATEGORY    Run category label for jobs dir (default: staging)
 #   --benchmark BENCH      Optional benchmark filter (e.g. ccb_build, ccb_fix)
+#   --task TASK_ID         Run only this task (further filters after --benchmark)
 
 set -e
 
@@ -27,12 +28,14 @@ export PYTHONPATH="$(pwd):${PYTHONPATH:-}"
 
 # Shared helpers (validation/reporting and run helpers)
 source "$SCRIPT_DIR/_common.sh"
+load_credentials
 
 SELECTION_FILE="$SCRIPT_DIR/selected_benchmark_tasks.json"
 AGENT_PATH="${AGENT_PATH:-agents.harnesses.openhands:OpenHandsHarnessAgent}"
 MODEL="${MODEL:-anthropic/claude-opus-4-6}"
 CATEGORY="${CATEGORY:-staging}"
 BENCHMARK_FILTER=""
+TASK_FILTER=""
 CONCURRENCY=2
 TIMEOUT_MULTIPLIER=10
 RUN_BASELINE=true
@@ -68,6 +71,10 @@ while [[ $# -gt 0 ]]; do
             BENCHMARK_FILTER="$2"
             shift 2
             ;;
+        --task)
+            TASK_FILTER="$2"
+            shift 2
+            ;;
         *)
             echo "Unknown option: $1"
             exit 1
@@ -80,19 +87,22 @@ if [ ! -f "$SELECTION_FILE" ]; then
     exit 1
 fi
 
-readarray -t TASK_ROWS < <(python3 - "$SELECTION_FILE" "$BENCHMARK_FILTER" <<'PYEOF'
+readarray -t TASK_ROWS < <(python3 - "$SELECTION_FILE" "$BENCHMARK_FILTER" "$TASK_FILTER" <<'PYEOF'
 import json
 import sys
 
 selection_file = sys.argv[1]
 benchmark_filter = sys.argv[2]
+task_filter = sys.argv[3] if len(sys.argv) > 3 else ""
 
 data = json.load(open(selection_file))
 for task in data.get("tasks", []):
     if task.get("excluded", False):
         continue
     if benchmark_filter and task.get("benchmark") != benchmark_filter:
         continue
+    if task_filter and task.get("task_id") != task_filter:
+        continue
     task_id = task["task_id"]
     task_dir = task["task_dir"]
     benchmark = task.get("benchmark", "")