sourcegraph
diff --git a/‎CLAUDE.md‎
Lines changed: 1 addition & 1 deletion b/‎CLAUDE.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/ccb_crossrepo/crossrepo-chain-001/environment/Dockerfile‎
Lines changed: 32 additions & 0 deletions b/‎benchmarks/ccb_crossrepo/crossrepo-chain-001/environment/Dockerfile‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎benchmarks/ccb_crossrepo/crossrepo-chain-001/instruction.md‎
Lines changed: 65 additions & 0 deletions b/‎benchmarks/ccb_crossrepo/crossrepo-chain-001/instruction.md‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎benchmarks/ccb_crossrepo/crossrepo-chain-001/task.toml‎
Lines changed: 26 additions & 0 deletions b/‎benchmarks/ccb_crossrepo/crossrepo-chain-001/task.toml‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎benchmarks/ccb_crossrepo/crossrepo-chain-001/tests/ground_truth.json‎
Lines changed: 25 additions & 0 deletions b/‎benchmarks/ccb_crossrepo/crossrepo-chain-001/tests/ground_truth.json‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎benchmarks/ccb_crossrepo/crossrepo-chain-001/tests/test.sh‎
Lines changed: 189 additions & 0 deletions b/‎benchmarks/ccb_crossrepo/crossrepo-chain-001/tests/test.sh‎
Lines changed: 189 additions & 0 deletions
diff --git a/‎benchmarks/ccb_crossrepo/crossrepo-chain-002/environment/Dockerfile‎
Lines changed: 32 additions & 0 deletions b/‎benchmarks/ccb_crossrepo/crossrepo-chain-002/environment/Dockerfile‎
Lines changed: 32 additions & 0 deletions
@@ -49,7 +49,7 @@ docs/
 | TAC | 6 | Mixed | Tool-augmented coding | Active |
 | DIBench | 8 | Mixed | Dependency installation | Active |
 | SWE-Perf | 3 | Python | Performance optimization | Active |
-| CodeReview | 3 | TS, C#, Mixed | AI code review: find & fix injected PR defects | Active — harder variant planned |
+| CodeReview | 3 | C#, JS, TS | AI code review: find & fix injected PR defects | Active — harder variant planned |
 | LinuxFLBench | 5 | C | Linux kernel fault localization | Active — verifier needs hardening |
 | Investigation | 4 | Mixed | Codebase investigation tasks | Active — harder variant planned |
 | Enterprise | 6 | Mixed | Enterprise codebase tasks | Active |
 
@@ -0,0 +1,32 @@
+FROM golang:1.23-alpine AS base
+
+# Install git for cloning
+RUN apk add --no-cache git bash
+
+# Clone repos at pinned commits
+WORKDIR /workspace
+
+# Clone kubernetes/kubernetes
+RUN git clone https://github.com/kubernetes/kubernetes.git kubernetes && \
+    cd kubernetes && \
+    git checkout 31bf3ed48b91b67e5003d8df1b3bd0b918d1fb94
+
+# Clone kubernetes/api (synced from k/k staging)
+RUN git clone https://github.com/kubernetes/api.git api && \
+    cd api && \
+    git checkout f32ed1d60cf0787a512bebd6c06a4b84ae0b7cc7
+
+# Clone kubernetes/apimachinery
+RUN git clone https://github.com/kubernetes/apimachinery.git apimachinery && \
+    cd apimachinery && \
+    git checkout b2e9f88ff6d4c50c13061a53b1239c7707354eda
+
+# Verify repos exist
+RUN ls -la /workspace/ && \
+    test -d /workspace/kubernetes && \
+    test -d /workspace/api && \
+    test -d /workspace/apimachinery
+
+FROM base AS final
+WORKDIR /workspace
+CMD ["/bin/bash"]
@@ -0,0 +1,65 @@
+Trace the definition of `TypeMeta` through a chain of Kubernetes package dependencies.
+
+## Background
+
+In the Kubernetes ecosystem, core API types are defined across multiple repositories forming a dependency chain. The `TypeMeta` struct is a fundamental type embedded in all Kubernetes resource objects (Pod, Deployment, Service, etc.).
+
+## Repositories
+
+Three repositories are available under `/workspace/`:
+
+- `/workspace/kubernetes/` — kubernetes/kubernetes (main Kubernetes repo with controller implementations)
+- `/workspace/api/` — kubernetes/api (API type definitions for core resources)
+- `/workspace/apimachinery/` — kubernetes/apimachinery (shared machinery and meta types)
+
+## Task
+
+Trace the `TypeMeta` type from its **usage site** in the main Kubernetes repo through any **intermediate re-exports** to its **original definition**. Document each step in the chain.
+
+Start from this usage site:
+- **File**: `/workspace/kubernetes/staging/src/k8s.io/api/core/v1/types.go`
+- **Usage**: The `Pod` struct embeds `metav1.TypeMeta`
+
+For each link in the chain, record:
+- `step`: sequence number (1 for usage, 2 for re-export, 3 for definition)
+- `repo`: which repository (e.g., `kubernetes/kubernetes`, `kubernetes/api`, `kubernetes/apimachinery`)
+- `file`: path relative to the repository root
+- `line`: line number where the symbol appears (approximate is acceptable)
+- `context`: what happens at this step (e.g., "Pod embeds TypeMeta", "api/core/v1 re-exports metav1", "TypeMeta defined here")
+
+## Output
+
+Write your results to `/workspace/chain.json`:
+
+```json
+[
+  {
+    "step": 1,
+    "repo": "kubernetes/kubernetes",
+    "file": "staging/src/k8s.io/api/core/v1/types.go",
+    "line": 4500,
+    "context": "Pod struct embeds metav1.TypeMeta"
+  },
+  {
+    "step": 2,
+    "repo": "kubernetes/api",
+    "file": "core/v1/types.go",
+    "line": 4500,
+    "context": "api/core/v1 imports metav1 from apimachinery"
+  },
+  {
+    "step": 3,
+    "repo": "kubernetes/apimachinery",
+    "file": "pkg/apis/meta/v1/types.go",
+    "line": 50,
+    "context": "TypeMeta struct definition with APIVersion and Kind fields"
+  }
+]
+```
+
+## Notes
+
+- The kubernetes/kubernetes repository contains a staging directory (`staging/src/k8s.io/`) with code that is synced to separate repositories (kubernetes/api, kubernetes/apimachinery). For this task, treat them as separate codebases.
+- Use `go_to_definition` or cross-file search to trace imports and type references.
+- You may encounter intermediate re-exports—document all steps.
+- Line numbers are approximate; +/- 50 lines is acceptable if the symbol is in that region.
@@ -0,0 +1,26 @@
+version = "1.0"
+
+[metadata]
+task_id = "crossrepo-chain-001"
+category = "symbol_resolution"
+language = "go"
+difficulty = "very_hard"
+tags = ["ccb-crossrepo", "dependency-chain", "kubernetes", "type-resolution", "cross-repo-navigation"]
+
+[verifier]
+timeout_sec = 300.0
+command = "bash /tests/test.sh"
+
+[agent]
+timeout_sec = 1200.0
+
+[verification]
+reward_type = "partial_credit"
+description = "Partial credit for each correct link in the dependency chain (usage → re-export → definition)"
+
+[environment]
+docker_image = "harbor-ccb_crossrepo:crossrepo-chain-001"
+build_timeout_sec = 600.0
+cpus = 2
+memory_mb = 8192
+storage_mb = 20480
@@ -0,0 +1,25 @@
+{
+  "steps": [
+    {
+      "step": 1,
+      "repo": "kubernetes/kubernetes",
+      "file": "staging/src/k8s.io/api/core/v1/types.go",
+      "line": 5465,
+      "context": "Pod struct embeds metav1.TypeMeta"
+    },
+    {
+      "step": 2,
+      "repo": "kubernetes/api",
+      "file": "core/v1/types.go",
+      "line": 21,
+      "context": "core/v1 imports metav1 from k8s.io/apimachinery/pkg/apis/meta/v1"
+    },
+    {
+      "step": 3,
+      "repo": "kubernetes/apimachinery",
+      "file": "pkg/apis/meta/v1/types.go",
+      "line": 42,
+      "context": "TypeMeta struct definition with Kind and APIVersion fields"
+    }
+  ]
+}
@@ -0,0 +1,189 @@
+#!/bin/bash
+# Reward: Partial credit (0.0-1.0) — each correct link in the chain scores independently
+#
+# DEPENDENCY CHAIN SCORER
+# -----------------------
+# Scores agent output by comparing each step in the dependency chain against
+# ground truth. Each step is worth an equal fraction of the total score.
+# Matching is fuzzy on line numbers (+/- tolerance) and exact on repo/file.
+
+set -e
+
+OUTPUT_PATH="/workspace/chain.json"
+GROUND_TRUTH="/tests/ground_truth.json"
+REWARD_FILE="/logs/verifier/reward.txt"
+LINE_TOLERANCE=50  # +/- 50 lines for line number matching
+
+mkdir -p /logs/verifier
+
+# ── Check prerequisites ───────────────────────────────────────────────────
+if [ ! -f "$GROUND_TRUTH" ]; then
+    echo "ERROR: ground_truth.json not found at $GROUND_TRUTH"
+    echo "0.0" > "$REWARD_FILE"
+    exit 0
+fi
+
+if [ ! -f "$OUTPUT_PATH" ]; then
+    echo "No agent output found at $OUTPUT_PATH"
+    echo "Agent did not produce the required chain.json file."
+    echo "0.0" > "$REWARD_FILE"
+    exit 0
+fi
+
+echo "Scoring dependency chain..."
+echo "Output: $OUTPUT_PATH"
+echo "Ground truth: $GROUND_TRUTH"
+echo ""
+
+# ── Delegate scoring to Python ────────────────────────────────────────────
+OUTPUT_PATH="$OUTPUT_PATH" GROUND_TRUTH="$GROUND_TRUTH" \
+REWARD_FILE="$REWARD_FILE" LINE_TOLERANCE="$LINE_TOLERANCE" \
+python3 << 'PYEOF'
+import json, os, re, sys
+
+OUTPUT_PATH = os.environ["OUTPUT_PATH"]
+GT_PATH = os.environ["GROUND_TRUTH"]
+REWARD_PATH = os.environ["REWARD_FILE"]
+LINE_TOLERANCE = int(os.environ.get("LINE_TOLERANCE", "50"))
+
+def write_reward(score):
+    """Write score to reward file and print summary."""
+    with open(REWARD_PATH, "w") as f:
+        f.write(f"{score:.2f}\n")
+    print(f"\nTests completed - Score: {score:.2f}")
+
+def strip_code_fences(text):
+    """Strip markdown code fences if agent wrapped JSON in ```json blocks."""
+    m = re.search(r'```(?:json)?\s*\n(.*?)```', text, re.DOTALL)
+    return m.group(1).strip() if m else text.strip()
+
+def normalize_path(path):
+    """Normalize file paths (remove leading ./ or /workspace/)."""
+    path = path.strip()
+    path = re.sub(r'^\./', '', path)
+    path = re.sub(r'^/workspace/[^/]+/', '', path)
+    return path
+
+def lines_match(line1, line2, tolerance):
+    """Check if two line numbers match within tolerance (both can be None)."""
+    if line1 is None or line2 is None:
+        return True  # Don't penalize if line number not provided
+    return abs(int(line1) - int(line2)) <= tolerance
+
+# ── Load ground truth ────────────────────────────────────────────────────
+with open(GT_PATH) as f:
+    gt = json.load(f)
+
+expected_steps = gt.get("steps", [])
+if not expected_steps:
+    print("ERROR: ground_truth.json must have a 'steps' array")
+    write_reward(0.0)
+    sys.exit(0)
+
+num_expected = len(expected_steps)
+
+# ── Load agent output ────────────────────────────────────────────────────
+try:
+    with open(OUTPUT_PATH) as f:
+        raw = f.read()
+    raw = strip_code_fences(raw)
+    reported_steps = json.loads(raw)
+    if not isinstance(reported_steps, list):
+        print("Agent output is not a JSON array — scoring as empty.")
+        reported_steps = []
+except (json.JSONDecodeError, ValueError) as e:
+    print(f"Malformed JSON in agent output: {e}")
+    reported_steps = []
+
+num_reported = len(reported_steps)
+
+if num_reported == 0:
+    print("Agent output is empty — no chain steps to score.")
+    print(f"Expected {num_expected} steps.")
+    write_reward(0.0)
+    sys.exit(0)
+
+# ── Score each step ──────────────────────────────────────────────────────
+print(f"=== Dependency Chain Scoring ===")
+print(f"  Expected steps: {num_expected}")
+print(f"  Reported steps: {num_reported}")
+print(f"  Line tolerance: +/- {LINE_TOLERANCE}")
+print()
+
+correct_steps = 0
+step_details = []
+
+for i, expected in enumerate(expected_steps, start=1):
+    # Find matching reported step by step number or position
+    reported = None
+    for r in reported_steps:
+        if r.get("step") == expected.get("step", i):
+            reported = r
+            break
+
+    if not reported and i <= num_reported:
+        # Fallback: match by position if step field missing
+        reported = reported_steps[i-1]
+
+    if not reported:
+        step_details.append({
+            "step": i,
+            "status": "MISSING",
+            "expected": expected
+        })
+        continue
+
+    # Check each field
+    repo_match = expected.get("repo", "").strip() == reported.get("repo", "").strip()
+    file_match = normalize_path(expected.get("file", "")) == normalize_path(reported.get("file", ""))
+    line_match = lines_match(expected.get("line"), reported.get("line"), LINE_TOLERANCE)
+
+    all_match = repo_match and file_match and line_match
+
+    if all_match:
+        correct_steps += 1
+        status = "CORRECT"
+    else:
+        status = "PARTIAL" if (repo_match or file_match) else "WRONG"
+
+    step_details.append({
+        "step": i,
+        "status": status,
+        "repo_match": repo_match,
+        "file_match": file_match,
+        "line_match": line_match,
+        "expected": expected,
+        "reported": reported
+    })
+
+# ── Compute score ────────────────────────────────────────────────────────
+# Each step is worth equal credit
+score = correct_steps / num_expected if num_expected > 0 else 0.0
+
+# ── Print detailed results ───────────────────────────────────────────────
+print("=== Step-by-Step Results ===")
+for detail in step_details:
+    status = detail["status"]
+    symbol = "✓" if status == "CORRECT" else "✗" if status == "WRONG" else "~"
+    print(f"\nStep {detail['step']}: [{symbol}] {status}")
+
+    exp = detail["expected"]
+    print(f"  Expected: {exp.get('repo')} / {exp.get('file')} : {exp.get('line')}")
+    print(f"            {exp.get('context', 'N/A')}")
+
+    if "reported" in detail:
+        rep = detail["reported"]
+        print(f"  Reported: {rep.get('repo')} / {rep.get('file')} : {rep.get('line')}")
+        print(f"            {rep.get('context', 'N/A')}")
+
+        if status != "MISSING":
+            print(f"  Match: repo={detail['repo_match']}, file={detail['file_match']}, line={detail['line_match']}")
+    else:
+        print(f"  Reported: (missing)")
+
+print(f"\n=== Summary ===")
+print(f"  Correct steps: {correct_steps}/{num_expected}")
+print(f"  Score: {score:.2f}")
+
+write_reward(score)
+PYEOF
@@ -0,0 +1,32 @@
+FROM golang:1.23-alpine AS base
+
+# Install git for cloning
+RUN apk add --no-cache git bash
+
+# Clone repos at pinned commits
+WORKDIR /workspace
+
+# Clone istio/istio
+RUN git clone https://github.com/istio/istio.git istio && \
+    cd istio && \
+    git checkout 4c1f845d839e9086ee85ad9337f2647492322eb4
+
+# Clone envoyproxy/go-control-plane
+RUN git clone https://github.com/envoyproxy/go-control-plane.git go-control-plane && \
+    cd go-control-plane && \
+    git checkout 71637ad69bbc5f51fbb2562e612a4365292804a5
+
+# Clone envoyproxy/data-plane-api (protobuf definitions)
+RUN git clone https://github.com/envoyproxy/data-plane-api.git data-plane-api && \
+    cd data-plane-api && \
+    git checkout 84e84367f2560cdb47b9bb78fd3e615feb80c3e4
+
+# Verify repos exist
+RUN ls -la /workspace/ && \
+    test -d /workspace/istio && \
+    test -d /workspace/go-control-plane && \
+    test -d /workspace/data-plane-api
+
+FROM base AS final
+WORKDIR /workspace
+CMD ["/bin/bash"]