Skip to content

Commit 95de19a

Browse files
sjarmakclaude
andcommitted
fix: clone repo into /repo_full/ for sg_only verifier hallucination check
Instead of skipping the hallucination path check in sg_only mode (which would miss genuine fabrications), clone the K8s repo into /repo_full/ in the Dockerfile.sg_only. The sgonly_verifier_wrapper.sh restores it to /workspace before the verifier runs, so path existence checks work correctly. Reverts the sg_only skip approach from previous commit — the hallucination check now runs unconditionally. The agent still sees an empty /workspace and must use MCP exclusively. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 13d298e commit 95de19a

File tree

10 files changed

+40
-20
lines changed

10 files changed

+40
-20
lines changed

benchmarks/ccb_document/k8s-apiserver-doc-gen-001/environment/Dockerfile.sg_only

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1212
curl \
1313
&& rm -rf /var/lib/apt/lists/*
1414

15+
# Clone repo into /repo_full/ for verifier hallucination check only
16+
# Agent does NOT have access — workspace is empty, agent uses MCP
17+
RUN mkdir -p /repo_full && cd /repo_full && \
18+
git clone --filter=blob:none --no-checkout https://github.com/kubernetes/kubernetes.git . && \
19+
git checkout 8c9c67c000104450cfc5a5f48053a9a84b73cf93
20+
1521
WORKDIR /workspace
1622

1723
# Empty git repo so agent can commit work
@@ -21,6 +27,6 @@ RUN git init && \
2127

2228
RUN mkdir -p /logs/agent /logs/verifier
2329

24-
RUN touch /tmp/.sg_only_mode
30+
RUN touch /tmp/.sg_only_mode && echo '/workspace' > /tmp/.sg_only_workdir
2531

2632
ENTRYPOINT []

benchmarks/ccb_document/k8s-apiserver-doc-gen-001/tests/test.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,11 @@ base = (
9696
)
9797
9898
# Hallucination penalty: invalid path mentions only.
99-
# In sg_only mode, /workspace has no repo — skip path existence check
100-
sg_only = Path('/tmp/.sg_only_mode').exists()
10199
penalty = 0.0
102100
path_candidates = set(re.findall(r"(?:staging/src|pkg|cmd|api)/[A-Za-z0-9_./-]+\.go", text))
103101
invalid = 0
104102
for p in path_candidates:
105-
if not sg_only and not Path('/workspace', p).exists():
103+
if not Path('/workspace', p).exists():
106104
invalid += 1
107105
if path_candidates:
108106
invalid_ratio = invalid / len(path_candidates)

benchmarks/ccb_document/k8s-applyconfig-doc-gen-001/environment/Dockerfile.sg_only

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1212
curl \
1313
&& rm -rf /var/lib/apt/lists/*
1414

15+
# Clone repo into /repo_full/ for verifier hallucination check only
16+
# Agent does NOT have access — workspace is empty, agent uses MCP
17+
RUN mkdir -p /repo_full && cd /repo_full && \
18+
git clone --filter=blob:none --no-checkout https://github.com/kubernetes/kubernetes.git . && \
19+
git checkout 8c9c67c000104450cfc5a5f48053a9a84b73cf93
20+
1521
WORKDIR /workspace
1622

1723
# Empty git repo so agent can commit work
@@ -21,6 +27,6 @@ RUN git init && \
2127

2228
RUN mkdir -p /logs/agent /logs/verifier
2329

24-
RUN touch /tmp/.sg_only_mode
30+
RUN touch /tmp/.sg_only_mode && echo '/workspace' > /tmp/.sg_only_workdir
2531

2632
ENTRYPOINT []

benchmarks/ccb_document/k8s-applyconfig-doc-gen-001/tests/test.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,11 @@ base = (
9696
)
9797
9898
# Hallucination penalty: invalid path mentions only.
99-
# In sg_only mode, /workspace has no repo — skip path existence check
100-
sg_only = Path('/tmp/.sg_only_mode').exists()
10199
penalty = 0.0
102100
path_candidates = set(re.findall(r"(?:staging/src|pkg|cmd|api)/[A-Za-z0-9_./-]+\.go", text))
103101
invalid = 0
104102
for p in path_candidates:
105-
if not sg_only and not Path('/workspace', p).exists():
103+
if not Path('/workspace', p).exists():
106104
invalid += 1
107105
if path_candidates:
108106
invalid_ratio = invalid / len(path_candidates)

benchmarks/ccb_document/k8s-clientgo-doc-gen-001/environment/Dockerfile.sg_only

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1212
curl \
1313
&& rm -rf /var/lib/apt/lists/*
1414

15+
# Clone repo into /repo_full/ for verifier hallucination check only
16+
# Agent does NOT have access — workspace is empty, agent uses MCP
17+
RUN mkdir -p /repo_full && cd /repo_full && \
18+
git clone --filter=blob:none --no-checkout https://github.com/kubernetes/kubernetes.git . && \
19+
git checkout 8c9c67c000104450cfc5a5f48053a9a84b73cf93
20+
1521
WORKDIR /workspace
1622

1723
# Empty git repo so agent can commit work
@@ -21,6 +27,6 @@ RUN git init && \
2127

2228
RUN mkdir -p /logs/agent /logs/verifier
2329

24-
RUN touch /tmp/.sg_only_mode
30+
RUN touch /tmp/.sg_only_mode && echo '/workspace' > /tmp/.sg_only_workdir
2531

2632
ENTRYPOINT []

benchmarks/ccb_document/k8s-clientgo-doc-gen-001/tests/test.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,11 @@ base = (
9696
)
9797
9898
# Hallucination penalty: invalid path mentions only.
99-
# In sg_only mode, /workspace has no repo — skip path existence check
100-
sg_only = Path('/tmp/.sg_only_mode').exists()
10199
penalty = 0.0
102100
path_candidates = set(re.findall(r"(?:staging/src|pkg|cmd|api)/[A-Za-z0-9_./-]+\.go", text))
103101
invalid = 0
104102
for p in path_candidates:
105-
if not sg_only and not Path('/workspace', p).exists():
103+
if not Path('/workspace', p).exists():
106104
invalid += 1
107105
if path_candidates:
108106
invalid_ratio = invalid / len(path_candidates)

benchmarks/ccb_document/k8s-controller-mgr-doc-gen-001/environment/Dockerfile.sg_only

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1212
curl \
1313
&& rm -rf /var/lib/apt/lists/*
1414

15+
# Clone repo into /repo_full/ for verifier hallucination check only
16+
# Agent does NOT have access — workspace is empty, agent uses MCP
17+
RUN mkdir -p /repo_full && cd /repo_full && \
18+
git clone --filter=blob:none --no-checkout https://github.com/kubernetes/kubernetes.git . && \
19+
git checkout 8c9c67c000104450cfc5a5f48053a9a84b73cf93
20+
1521
WORKDIR /workspace
1622

1723
# Empty git repo so agent can commit work
@@ -21,6 +27,6 @@ RUN git init && \
2127

2228
RUN mkdir -p /logs/agent /logs/verifier
2329

24-
RUN touch /tmp/.sg_only_mode
30+
RUN touch /tmp/.sg_only_mode && echo '/workspace' > /tmp/.sg_only_workdir
2531

2632
ENTRYPOINT []

benchmarks/ccb_document/k8s-controller-mgr-doc-gen-001/tests/test.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,11 @@ base = (
9696
)
9797
9898
# Hallucination penalty: invalid path mentions only.
99-
# In sg_only mode, /workspace has no repo — skip path existence check
100-
sg_only = Path('/tmp/.sg_only_mode').exists()
10199
penalty = 0.0
102100
path_candidates = set(re.findall(r"(?:staging/src|pkg|cmd|api)/[A-Za-z0-9_./-]+\.go", text))
103101
invalid = 0
104102
for p in path_candidates:
105-
if not sg_only and not Path('/workspace', p).exists():
103+
if not Path('/workspace', p).exists():
106104
invalid += 1
107105
if path_candidates:
108106
invalid_ratio = invalid / len(path_candidates)

benchmarks/ccb_document/k8s-fairqueuing-doc-gen-001/environment/Dockerfile.sg_only

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,12 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1212
curl \
1313
&& rm -rf /var/lib/apt/lists/*
1414

15+
# Clone repo into /repo_full/ for verifier hallucination check only
16+
# Agent does NOT have access — workspace is empty, agent uses MCP
17+
RUN mkdir -p /repo_full && cd /repo_full && \
18+
git clone --filter=blob:none --no-checkout https://github.com/kubernetes/kubernetes.git . && \
19+
git checkout 8c9c67c000104450cfc5a5f48053a9a84b73cf93
20+
1521
WORKDIR /workspace
1622

1723
# Empty git repo so agent can commit work
@@ -21,6 +27,6 @@ RUN git init && \
2127

2228
RUN mkdir -p /logs/agent /logs/verifier
2329

24-
RUN touch /tmp/.sg_only_mode
30+
RUN touch /tmp/.sg_only_mode && echo '/workspace' > /tmp/.sg_only_workdir
2531

2632
ENTRYPOINT []

benchmarks/ccb_document/k8s-fairqueuing-doc-gen-001/tests/test.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,13 +96,11 @@ base = (
9696
)
9797
9898
# Hallucination penalty: invalid path mentions only.
99-
# In sg_only mode, /workspace has no repo — skip path existence check
100-
sg_only = Path('/tmp/.sg_only_mode').exists()
10199
penalty = 0.0
102100
path_candidates = set(re.findall(r"(?:staging/src|pkg|cmd|api)/[A-Za-z0-9_./-]+\.go", text))
103101
invalid = 0
104102
for p in path_candidates:
105-
if not sg_only and not Path('/workspace', p).exists():
103+
if not Path('/workspace', p).exists():
106104
invalid += 1
107105
if path_candidates:
108106
invalid_ratio = invalid / len(path_candidates)

0 commit comments

Comments
 (0)