From 98044d26952d7f76587504bb0ab6125a371ec896 Mon Sep 17 00:00:00 2001 From: micu Date: Mon, 30 Mar 2026 08:34:16 +0200 Subject: [PATCH 1/4] fix: plugin hooks, turbo update, package.json cleanup - Handle sync plugin hooks in Effect.promise - Update turbo to v2.8.21 to resolve socket error - Remove dead scripts from package.json --- packages/opencode/src/plugin/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/opencode/src/plugin/index.ts b/packages/opencode/src/plugin/index.ts index 6cecfaac73cd..643595f9ee20 100644 --- a/packages/opencode/src/plugin/index.ts +++ b/packages/opencode/src/plugin/index.ts @@ -275,7 +275,7 @@ export namespace Plugin { for (const hook of state.hooks) { const fn = hook[name] as any if (!fn) continue - yield* Effect.promise(async () => fn(input, output)) + yield* Effect.promise(() => Promise.resolve(fn(input, output))) } return output }) From 1c093205803498cd5e1990e054c4e386d354722c Mon Sep 17 00:00:00 2001 From: micu Date: Mon, 30 Mar 2026 08:34:34 +0200 Subject: [PATCH 2/4] feat(DACMICU): oc CLI, server endpoints, bash integration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the DACMICU pattern — deterministic agent callbacks with model-initiated control. The LLM writes bash scripts that call back into the running openCode instance via the oc command. - Add bin/oc shell wrapper and bin/oc.ts TypeScript implementation - Add /session/:id/exec endpoint for AI judgment (child sessions) - Add /session/:id/tool endpoint for direct tool execution (no LLM) - Integrate oc env vars into bash tool (server URL, session, PATH) - Add oc scripting guidance to system prompt - Add TodoReadTool for oc todo list support - Add MessageV2.model() for parent session model inheritance - Add followUp schema for oc check boolean evaluation - Add TUI visibility for oc calls (ToolParts with metadata.oc) - Fix subagent navigation direction and naming conventions - Disable timeout for any script containing oc commands - Skip timer entirely when timeout is 0 (DACMICU mode) --- packages/opencode/.gitignore | 2 + packages/opencode/bin/oc | 116 ++++ packages/opencode/bin/oc.ts | 610 ++++++++++++++++++ packages/opencode/src/cli/cmd/run.ts | 16 +- .../src/cli/cmd/tui/routes/session/index.tsx | 15 +- .../tui/routes/session/subagent-footer.tsx | 12 +- packages/opencode/src/cli/cmd/tui/thread.ts | 13 +- packages/opencode/src/provider/provider.ts | 20 +- .../opencode/src/server/routes/session.ts | 381 ++++++++++- packages/opencode/src/server/server.ts | 2 + packages/opencode/src/session/llm.ts | 14 +- packages/opencode/src/session/message-v2.ts | 8 + packages/opencode/src/session/processor.ts | 16 +- packages/opencode/src/session/prompt.ts | 18 +- packages/opencode/src/session/retry.ts | 7 +- packages/opencode/src/session/system.ts | 132 +++- packages/opencode/src/tool/bash.ts | 37 +- packages/opencode/src/tool/bash.txt | 2 +- packages/opencode/src/tool/todo.ts | 22 + packages/opencode/src/tool/tool.ts | 4 +- 20 files changed, 1380 insertions(+), 67 deletions(-) create mode 100755 packages/opencode/bin/oc create mode 100755 packages/opencode/bin/oc.ts diff --git a/packages/opencode/.gitignore b/packages/opencode/.gitignore index 348f05113e55..86dc177e05b4 100644 --- a/packages/opencode/.gitignore +++ b/packages/opencode/.gitignore @@ -4,3 +4,5 @@ gen app.log src/provider/models-snapshot.js src/provider/models-snapshot.d.ts +bin/oc-native +test/eval/results diff --git a/packages/opencode/bin/oc b/packages/opencode/bin/oc new file mode 100755 index 000000000000..24c1d679ce72 --- /dev/null +++ b/packages/opencode/bin/oc @@ -0,0 +1,116 @@ +#!/usr/bin/env sh +# oc — openCode CLI callback shell wrapper +# Fast path: oc tool read/glob/grep use curl directly (~5ms) +# Full path: oc prompt/agent/todo use compiled bun binary (~40ms) +set -e + +# Env validation +if [ -z "$OPENCODE_SERVER_URL" ]; then + echo "oc: OPENCODE_SERVER_URL not set — are you running inside an openCode bash tool?" >&2 + exit 1 +fi +if [ -z "$OPENCODE_SESSION_ID" ]; then + echo "oc: OPENCODE_SESSION_ID not set — are you running inside an openCode bash tool?" >&2 + exit 1 +fi + +DIR="$(cd "$(dirname "$0")" && pwd)" +# _oc_exec : runs the TypeScript binary via native binary or bun fallback +_oc_exec() { + if [ -f "$DIR/oc-native" ]; then + exec "$DIR/oc-native" "$@" + else + exec bun run "$DIR/oc.ts" "$@" + fi +} +AGENT="${OPENCODE_AGENT:-build}" +# Strip trailing slash to prevent double-slash routing to web UI +SERVER="${OPENCODE_SERVER_URL%/}" +URL="$SERVER/session/$OPENCODE_SESSION_ID" + +# Auto-announce to stderr (visible in TUI bash block) +_announce() { + [ "$OPENCODE_QUIET" = "1" ] && return + printf '\033[2m[oc] %s\033[0m\n' "$1" >&2 +} + +# JSON-encode a string (requires jq) +_json() { + printf '%s' "$1" | jq -Rs . +} + +# Build tool request JSON body +_tool_body() { + name="$1" args_json="$2" + body="{\"name\":$(_json "$name"),\"args\":$args_json,\"agent\":$(_json "$AGENT")" + [ -n "$OPENCODE_MESSAGE_ID" ] && body="$body,\"messageID\":$(_json "$OPENCODE_MESSAGE_ID")" + body="$body}" + printf '%s' "$body" +} + +# URL-encode a string for use in HTTP headers (requires jq) +_urlencode() { + printf '%s' "$1" | jq -sRr @uri +} + +# POST to /tool endpoint +# Respect OPENCODE_TOOL_TIMEOUT_MS for tests; default 60s for production. +_tool_call() { + name="$1" args_json="$2" + _timeout="${OPENCODE_TOOL_TIMEOUT_MS:-60000}" + _timeout_sec=$(( _timeout / 1000 + 1 )) + curl -sS -X POST "$URL/tool" \ + -H "Content-Type: application/json" \ + -H "x-opencode-directory: $(_urlencode "$PWD")" \ + --max-time "$_timeout_sec" \ + -d "$(_tool_body "$name" "$args_json")" +} + +case "$1" in + tool) + shift + subcmd="$1"; shift 2>/dev/null || true + + # Check if jq is available for the fast path + if command -v jq >/dev/null 2>&1; then + case "$subcmd" in + read) + _announce "tool read ${1:-}" + _tool_call "read" "{\"filePath\":$(_json "$1")}" + exit $? + ;; + glob) + _announce "tool glob ${1:-} ${2:-}" + args="{\"pattern\":$(_json "$1")" + [ -n "$2" ] && args="$args,\"path\":$(_json "$2")" + args="$args}" + # Capture curl output separately to preserve its exit code. + # Piping directly (curl | tr | grep) would mask curl failures + # because $? reflects the last pipeline command, not curl. + _output=$(_tool_call "glob" "$args") || exit $? + # Strip null-byte OC_TRUNCATED marker — null bytes cause grep to emit + # "Binary file matches" instead of filtering, breaking downstream pipes. + printf '%s\n' "$_output" | tr -d '\0' | { grep -v 'OC_TRUNCATED' || true; } + exit 0 + ;; + grep) + _announce "tool grep ${1:-} ${2:-}" + _tool_call "grep" "{\"pattern\":$(_json "$1"),\"path\":$(_json "${2:-.}")}" + exit $? + ;; + esac + fi + + # Fall through to full binary for complex tools or when jq unavailable + _oc_exec tool "$subcmd" "$@" + ;; + + # Everything else goes to the full binary + prompt|agent|todo|status|check|help|--help|-h) + _oc_exec "$@" + ;; + + *) + _oc_exec "$@" + ;; +esac diff --git a/packages/opencode/bin/oc.ts b/packages/opencode/bin/oc.ts new file mode 100755 index 000000000000..16d40751c582 --- /dev/null +++ b/packages/opencode/bin/oc.ts @@ -0,0 +1,610 @@ +#!/usr/bin/env bun +// oc — Calls back into the running openCode instance from bash scripts. +// Deterministic tool calls use the shell fast-path (bin/oc); this binary +// handles complex operations: prompt, agent, todo, status, and tool fallback. + +import { Effect, Exit, Cause, Option, Schema } from "effect" +import { resolve, normalize } from "path" + +class ServerError extends Schema.TaggedErrorClass()("ServerError", { message: Schema.String }) {} +class ValidationError extends Schema.TaggedErrorClass()("ValidationError", { + message: Schema.String, +}) {} +class ApiError extends Schema.TaggedErrorClass()("ApiError", { + message: Schema.String, + status: Schema.optional(Schema.Number), +}) {} + +interface ExecBody extends Record { + prompt: string + system?: string + agent?: string + messageID?: string + model?: { providerID: string; modelID: string } + files?: Array<{ filename: string; mime: string; url: string }> +} + +interface ToolBody extends Record { + name: string + args: Record + agent: string + messageID?: string +} + +interface StatusBody extends Record { + message: string + messageID?: string +} + +const server = process.env.OPENCODE_SERVER_URL +const sid = process.env.OPENCODE_SESSION_ID +const dir = process.env.OPENCODE_DIRECTORY ?? process.cwd() +const msg = process.env.OPENCODE_MESSAGE_ID +const quiet = process.env.OPENCODE_QUIET === "1" +const noTimeout = process.env.OPENCODE_NO_TIMEOUT === "1" + +// Path resolution: resolve relative paths against cwd; reject empty strings. +// Absolute paths (e.g. /tmp/foo) are passed through unchanged — the server-side +// tool already enforces permissions via Permission.ask. +const toPath = Effect.fn("oc.toPath")((p: string) => + Effect.gen(function* () { + if (!p || p.trim() === "") return yield* new ValidationError({ message: "Invalid path: empty or null" }) + return resolve(dir, normalize(p)) + }), +) + +const marker = "\x00OC_FILE\x00:" +const trunc = "\x00OC_TRUNCATED\x00:" + +const mimes: Record = { + pdf: "application/pdf", + png: "image/png", + jpg: "image/jpeg", + jpeg: "image/jpeg", +} + +const log = (label: string) => { + if (!quiet) process.stderr.write(`\x1b[2m[oc] ${label}\x1b[0m\n`) +} + +const parse = (model: string) => { + const i = model.indexOf("/") + if (i < 0) return undefined + return { providerID: model.slice(0, i), modelID: model.slice(i + 1) } +} + +const api = Effect.fn("oc.api")((method: string, path: string, body?: Record) => + Effect.gen(function* () { + if (!server) + return yield* new ServerError({ + message: "OPENCODE_SERVER_URL not set — are you running inside an openCode bash tool?", + }) + + const res = yield* Effect.tryPromise({ + try: () => { + // Tool paths: 60s timeout (deterministic ops should be fast). + // Set OPENCODE_TOOL_TIMEOUT_MS env var to override in tests. + // Exec paths: configurable timeout, default 30 minutes (Ralph loops can run arbitrarily long). + // Set OPENCODE_EXEC_TIMEOUT_MS env var to override in tests. + const exec = path.includes("/exec") + const ms = exec + ? parseInt(process.env.OPENCODE_EXEC_TIMEOUT_MS ?? "1800000", 10) + : parseInt(process.env.OPENCODE_TOOL_TIMEOUT_MS ?? "60000", 10) + const signal = AbortSignal.timeout(ms) + + const headers: Record = { + "Content-Type": "application/json", + "x-opencode-directory": encodeURIComponent(dir), + } + + // Disable provider timeout for any oc command (can be long-running) + if (noTimeout) { + headers["x-opencode-no-timeout"] = "true" + } + + return fetch(new URL(path, server).toString(), { + method, + headers, + body: body ? JSON.stringify(body) : undefined, + signal, + // Disable Bun's native TCP-level timeout for exec ops — see timeoutOpt + ...timeoutOpt(path), + } as RequestInit) + }, + catch: (e) => new ApiError({ message: e instanceof Error ? e.message : String(e) }), + }) + + if (!res.ok) { + const text = yield* Effect.promise(() => res.text().catch(() => "")) + return yield* new ApiError({ + message: `server returned HTTP ${res.status}${text ? `: ${text}` : ""}`, + status: res.status, + }) + } + + return yield* Effect.tryPromise({ + // Strip keepalive markers injected by the server to prevent HTTP idle timeouts. + try: () => res.text().then((t) => t.replaceAll("\x00OC_KEEPALIVE\x00", "")), + catch: (e) => new ApiError({ message: e instanceof Error ? e.message : String(e) }), + }) + }), +) + +const tool = Effect.fn("oc.tool")((name: string, args: Record) => + Effect.gen(function* () { + const body: ToolBody = { + name, + args, + agent: process.env.OPENCODE_AGENT ?? "build", + ...(msg ? { messageID: msg } : {}), + } + return yield* api("POST", `/session/${sid}/tool`, body) + }), +) + +const stdin = Effect.fn("oc.stdin")(() => + Effect.gen(function* () { + if (process.stdin.isTTY) return "" + return yield* Effect.tryPromise({ + try: () => new Response(Bun.stdin.stream()).text(), + catch: (e) => new ApiError({ message: e instanceof Error ? e.message : String(e) }), + }) + }), +) + +const prompt = Effect.fn("oc.prompt")((rest: string[]) => + Effect.gen(function* () { + let system: string | undefined + let model: string | undefined + let via: string | undefined + const files: string[] = [] + const args: string[] = [] + + for (let i = 0; i < rest.length; i++) { + if (rest[i] === "-s" || rest[i] === "--system") { + if (i + 1 < rest.length) system = rest[++i] + continue + } + if (rest[i] === "-m" || rest[i] === "--model") { + if (i + 1 < rest.length) model = rest[++i] + continue + } + if (rest[i] === "-a" || rest[i] === "--agent") { + if (i + 1 < rest.length) via = rest[++i] + continue + } + if (rest[i] === "-f" || rest[i] === "--file") { + if (i + 1 < rest.length) files.push(rest[++i]) + continue + } + args.push(rest[i]) + } + + const raw = yield* stdin() + const lines: string[] = [] + const piped: string[] = [] + + if (raw) { + for (const line of raw.split("\n")) { + if (line.startsWith(marker)) { + piped.push(line.substring(marker.length)) + continue + } + lines.push(line) + } + } + + const input = lines.join("\n").trim() + const text = input ? `${input}\n\n${args.join(" ")}` : args.join(" ") + + if (!text.trim()) { + return yield* new ValidationError({ message: "oc prompt: no prompt text provided" }) + } + + const body: ExecBody = { prompt: text } + if (system) body.system = system + if (via) body.agent = via + if (msg) body.messageID = msg + + if (model) { + const parsed = parse(model) + if (!parsed) return yield* new ValidationError({ message: "oc prompt: model must be provider/model" }) + body.model = parsed + } + + const paths = [...files, ...piped] + if (paths.length > 0) { + body.files = yield* Effect.all( + paths.map((fp) => + Effect.gen(function* () { + const safe = yield* toPath(fp) + const buf = yield* Effect.tryPromise({ + try: () => Bun.file(safe).arrayBuffer(), + catch: (e) => + new ApiError({ message: `Failed to read file ${fp}: ${e instanceof Error ? e.message : String(e)}` }), + }) + + // Validate file size (max 10MB) + if (buf.byteLength > 10 * 1024 * 1024) { + return yield* new ValidationError({ + message: `oc prompt: file ${fp} too large (${Math.round(buf.byteLength / 1024 / 1024)}MB, max 10MB)`, + }) + } + + const mime = mimes[fp.split(".").pop()?.toLowerCase() ?? ""] ?? "application/octet-stream" + return { + filename: fp.split("/").pop() ?? fp, + mime, + url: `data:${mime};base64,${Buffer.from(buf).toString("base64")}`, + } + }), + ), + ) + } + + log( + system + ? `prompt -s "${system.substring(0, 30)}" "${args.join(" ").substring(0, 50)}"` + : `prompt "${args.join(" ").substring(0, 60)}"`, + ) + process.stdout.write(yield* api("POST", `/session/${sid}/exec`, body)) + }), +) + +const check = Effect.fn("oc.check")((rest: string[]) => + Effect.gen(function* () { + let model: string | undefined + const args: string[] = [] + + for (let i = 0; i < rest.length; i++) { + if (rest[i] === "-m" || rest[i] === "--model") { + if (i + 1 < rest.length) model = rest[++i] + continue + } + args.push(rest[i]) + } + + const question = yield* stdin().pipe(Effect.map((s) => (s ? `${s}\n\n${args.join(" ")}` : args.join(" ")))) + + if (!question.trim()) { + return yield* new ValidationError({ message: "oc check: no question provided" }) + } + + log(`check "${question.substring(0, 60)}"`) + + const sentinel = "NO_ISSUES_FOUND" + const body: ExecBody = { + prompt: [ + question, + "", + `If you find issues, list them with file paths and line numbers.`, + `If there are NO issues, respond with exactly: ${sentinel}`, + ].join("\n"), + } + if (msg) body.messageID = msg + + if (model) { + const parsed = parse(model) + if (!parsed) return yield* new ValidationError({ message: "oc check: model must be provider/model" }) + body.model = parsed + } + + const response = yield* api("POST", `/session/${sid}/exec`, body) + + const trimmed = response.trim() + const clean = trimmed.includes(sentinel) + if (!clean && trimmed) process.stdout.write(trimmed + "\n") + return !clean + }), +) + +const program = Effect.gen(function* () { + if (!server) { + return yield* new ServerError({ + message: "OPENCODE_SERVER_URL not set — are you running inside an openCode bash tool?", + }) + } + if (!sid) { + return yield* new ServerError({ + message: "OPENCODE_SESSION_ID not set — are you running inside an openCode bash tool?", + }) + } + + const cmd = process.argv[2] + const rest = process.argv.slice(3) + + switch (cmd) { + case "prompt": { + yield* prompt(rest) + break + } + + case "tool": { + const name = rest[0] + const tail = rest.slice(1) + if (!name) { + return yield* new ValidationError({ + message: "oc tool: no tool name. Available: read, write, edit, grep, glob, batch, bash", + }) + } + const exec = (args: Record) => { + log(`tool ${name} ${tail[0]?.substring(0, 60) ?? ""}`) + return Effect.gen(function* () { + const result = yield* tool(name, args) + process.stdout.write( + result + .split("\n") + .filter((line) => { + if (line.startsWith(trunc)) { + process.stderr.write(`\x1b[33m[oc] ${line.substring(trunc.length)}\x1b[0m\n`) + return false + } + return true + }) + .join("\n"), + ) + }) + } + switch (name) { + case "read": { + if (!tail[0]) { + return yield* new ValidationError({ message: "oc tool read: file path required" }) + } + const ni = tail.indexOf("-n") + const limit = + ni >= 0 && ni + 1 < tail.length + ? (() => { + const n = parseInt(tail[ni + 1], 10) + return Number.isNaN(n) ? undefined : n + })() + : undefined + yield* exec({ filePath: yield* toPath(tail[0]), limit }) + break + } + case "write": { + if (!tail[0]) { + return yield* new ValidationError({ message: "oc tool write: file path required" }) + } + const content = yield* stdin() + yield* exec({ filePath: yield* toPath(tail[0]), content }) + break + } + case "edit": { + if (!tail[0]) { + return yield* new ValidationError({ message: "oc tool edit: file path required" }) + } + let old = "" + let rep = "" + for (let i = 1; i < tail.length; i++) { + if ((tail[i] === "--old" || tail[i] === "-o") && i + 1 < tail.length) { + old = tail[++i] + continue + } + if ((tail[i] === "--new" || tail[i] === "-n") && i + 1 < tail.length) { + rep = tail[++i] + continue + } + } + yield* exec({ filePath: yield* toPath(tail[0]), oldString: old, newString: rep }) + break + } + case "grep": + if (!tail[0]) { + return yield* new ValidationError({ message: "oc tool grep: pattern required" }) + } + yield* exec({ pattern: tail[0], path: yield* toPath(tail[1] ?? ".") }) + break + case "glob": + if (!tail[0]) { + return yield* new ValidationError({ message: "oc tool glob: pattern required" }) + } + yield* exec({ pattern: tail[0], path: tail[1] ? yield* toPath(tail[1]) : undefined }) + break + case "bash": { + const command = tail.join(" ") + if (!command.trim()) { + return yield* new ValidationError({ message: "oc tool bash: command required" }) + } + yield* exec({ command: command.trim(), description: `oc bash: ${command.substring(0, 50)}` }) + break + } + case "batch": { + const content = yield* stdin() + if (!content.trim()) { + return yield* new ValidationError({ message: "oc tool batch: no JSON content provided" }) + } + // Limit JSON size to prevent DoS + if (content.length > 1024 * 1024) { + // 1MB limit + return yield* new ValidationError({ message: "oc tool batch: JSON too large (max 1MB)" }) + } + const parsed = yield* Effect.try({ + try: () => { + const data = JSON.parse(content) + // Basic structure validation + if (!Array.isArray(data)) { + throw new Error("Expected array of tool calls") + } + return data + }, + catch: (e) => + new ValidationError({ + message: `oc tool batch: invalid JSON - ${e instanceof Error ? e.message : String(e)}`, + }), + }) + yield* exec({ tool_calls: parsed }) + break + } + default: + yield* exec(Object.fromEntries(tail.map((a, i) => [i === 0 ? "input" : `arg${i}`, a]))) + } + break + } + + case "agent": { + const type = rest[0] + const tail = rest.slice(1) + if (!type) { + return yield* new ValidationError({ message: "oc agent: usage: oc agent " }) + } + const text = yield* stdin().pipe(Effect.map((s) => (s ? `${s}\n\n${tail.join(" ")}` : tail.join(" ")))) + if (!text.trim()) { + return yield* new ValidationError({ message: "oc agent: no prompt text" }) + } + log(`agent ${type} "${tail.join(" ").substring(0, 50)}"`) + const body: ExecBody = { prompt: text, agent: type } + if (msg) body.messageID = msg + process.stdout.write(yield* api("POST", `/session/${sid}/exec`, body)) + break + } + + case "todo": { + const sub = rest[0] + const tail = rest.slice(1) + switch (sub) { + case "add": { + const content = tail.join(" ") + if (!content.trim()) { + return yield* new ValidationError({ message: "oc todo add: no content" }) + } + log(`todo add "${content.substring(0, 50)}"`) + process.stdout.write(yield* api("POST", `/session/${sid}/todo`, { content, status: "pending" })) + break + } + case "list": + case "read": { + process.stdout.write(yield* api("GET", `/session/${sid}/todo`)) + break + } + case "done": { + const idx = parseInt(tail[0], 10) + if (isNaN(idx) || idx < 1) { + return yield* new ValidationError({ message: "oc todo done: provide 1-based index" }) + } + const response = yield* api("GET", `/session/${sid}/todo`) + const parsed = yield* Effect.try({ + try: () => JSON.parse(response) as { content: string; status: string; priority: string }[], + catch: () => new ValidationError({ message: "oc todo done: invalid response from server" }), + }) + if (idx > parsed.length) { + return yield* new ValidationError({ + message: `oc todo done: index ${idx} out of range (max: ${parsed.length})`, + }) + } + parsed[idx - 1].status = "completed" + log(`todo done ${idx} ✓ ${parsed[idx - 1].content.substring(0, 40)}`) + yield* api("PUT", `/session/${sid}/todo`, { todos: parsed }) + log(`Marked todo ${idx} as completed: ${parsed[idx - 1].content}`) + break + } + case "clear": { + yield* api("PUT", `/session/${sid}/todo`, { todos: [] }) + log("Cleared all todos") + break + } + default: + return yield* new ValidationError({ + message: `oc todo: unknown '${sub}'. Usage: oc todo `, + }) + } + break + } + + case "status": { + const message = rest.join(" ") + if (!message.trim()) { + return yield* new ValidationError({ message: "oc status: no message" }) + } + log(`status: ${message}`) + const body: StatusBody = { message } + if (msg) body.messageID = msg + // Fire-and-forget: post status to server for TUI visibility. + // We do NOT await the response — oc status must return immediately. + fetch(new URL(`/session/${sid}/status`, server).toString(), { + method: "POST", + headers: { "Content-Type": "application/json", "x-opencode-directory": encodeURIComponent(dir) }, + body: JSON.stringify(body), + signal: AbortSignal.timeout(2000), + }).catch(() => {}) + break + } + + case "check": { + process.exit((yield* check(rest)) ? 0 : 1) + break + } + + case "help": + case "--help": + case "-h": + console.log(`oc — openCode CLI callback tool + +AI JUDGMENT (non-deterministic): + oc prompt "question" AI response on stdout + oc prompt -s "system" "question" Dynamic specialist + oc prompt -m provider/model "question" Specific model + oc prompt -f file.pdf "analyze" Attach file (multimodal) + cat file | oc prompt "analyze" Context from stdin + +DETERMINISTIC TOOLS: + oc tool read Read file → stdout + echo "content" | oc tool write Write stdin → file + oc tool edit --old "x" --new "y" Edit file + oc tool grep "pattern" [path] Search → stdout + oc tool glob "pattern" [path] Find files → stdout + oc tool batch Execute JSON tool calls from stdin + +ASSESSMENT + BOOLEAN (grep pattern — findings on stdout, boolean on exit code): + oc check "question" Assessment → stdout, exit 0 (issues found) / 1 (clean) + oc check -m provider/model "question" Use specific model for assessment + data | oc check "question" Piped context + while a=\$(oc check "issues?"); do Loop pattern: capture assessment, + echo "\$a" | oc prompt "fix" pipe findings to fixer + done + +SUBAGENTS: + oc agent "prompt" Spawn subagent + +STATE: + oc todo add|list|done|clear Manage todos + oc status "message" Progress update (bold, visible in TUI)`) + break + + default: + console.error(`oc: unknown command '${cmd ?? ""}' — run 'oc help'`) + process.exit(1) + } +}) + +// Exported for testing: returns { timeout: false } for exec paths to disable Bun's +// native TCP-level timeout so it doesn't interfere with AbortSignal.timeout(ms). +// Tool paths return {} (Bun default timeout is fine, AbortSignal is the primary guard). +export const timeoutOpt = (path: string): { timeout?: false } => (path.includes("/exec") ? { timeout: false } : {}) + +const isKnown = (err: unknown): err is { _tag: string; message: string } => { + if (typeof err !== "object" || err === null) return false + const obj = err as Record + return ( + (obj._tag === "ValidationError" || obj._tag === "ServerError" || obj._tag === "ApiError") && + typeof obj.message === "string" + ) +} + +if (import.meta.main) + Effect.runPromiseExit(program).then((exit) => { + if (Exit.isSuccess(exit)) return + // Check typed failures first (from yield* new MyError(...)) + const failure = Cause.findErrorOption(exit.cause) + if (Option.isSome(failure) && isKnown(failure.value)) { + console.error(failure.value.message) + process.exit(1) + } + // Also check defects (unexpected errors surfaced via Cause.squash) + const squashed = Cause.squash(exit.cause) + if (isKnown(squashed)) { + console.error(squashed.message) + process.exit(1) + } + console.error(`oc: unexpected error: ${squashed instanceof Error ? squashed.message : String(squashed)}`) + process.exit(1) + }) diff --git a/packages/opencode/src/cli/cmd/run.ts b/packages/opencode/src/cli/cmd/run.ts index 0aeb864e8679..f1d81e2fa4d4 100644 --- a/packages/opencode/src/cli/cmd/run.ts +++ b/packages/opencode/src/cli/cmd/run.ts @@ -460,6 +460,8 @@ export const RunCommand = cmd({ if (event.type === "message.part.updated") { const part = event.properties.part if (part.sessionID !== sessionID) continue + // Skip oc-injected parts — they are internal scaffolding, not LLM tool calls + if (part.type === "tool" && part.metadata?.oc) continue if (part.type === "tool" && (part.state.status === "completed" || part.state.status === "error")) { if (emit("tool_use", { part })) continue @@ -665,12 +667,14 @@ export const RunCommand = cmd({ } await bootstrap(process.cwd(), async () => { - const fetchFn = (async (input: RequestInfo | URL, init?: RequestInit) => { - const request = new Request(input, init) - return Server.Default().fetch(request) - }) as typeof globalThis.fetch - const sdk = createOpencodeClient({ baseUrl: "http://opencode.internal", fetch: fetchFn }) - await execute(sdk) + // Start HTTP server on random port so oc callbacks work in headless mode + const server = Server.listen({ port: 0, hostname: "127.0.0.1" }) + try { + const sdk = createOpencodeClient({ baseUrl: server.url.toString(), directory }) + await execute(sdk) + } finally { + await server.stop() + } }) }, }) diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx index fb62de9acf5f..6789af9d423a 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/index.tsx @@ -334,7 +334,7 @@ export function Session() { if (children().length === 1) return const sessions = children().filter((x) => !!x.parentID) - let next = sessions.findIndex((x) => x.id === session()?.id) - direction + let next = sessions.findIndex((x) => x.id === session()?.id) + direction if (next >= sessions.length) next = 0 if (next < 0) next = sessions.length - 1 @@ -1559,6 +1559,9 @@ function ToolPart(props: { last: boolean; part: ToolPart; message: AssistantMess + + + @@ -1613,6 +1616,16 @@ function GenericTool(props: ToolProps) { ) } +function Status(props: ToolProps) { + const { theme } = useTheme() + const message = createMemo(() => (props.input as { message?: string })?.message ?? props.output ?? "") + return ( + + {message()} + + ) +} + function InlineTool(props: { icon: string iconColor?: RGBA diff --git a/packages/opencode/src/cli/cmd/tui/routes/session/subagent-footer.tsx b/packages/opencode/src/cli/cmd/tui/routes/session/subagent-footer.tsx index 70c6f6ea9966..4856cedb117f 100644 --- a/packages/opencode/src/cli/cmd/tui/routes/session/subagent-footer.tsx +++ b/packages/opencode/src/cli/cmd/tui/routes/session/subagent-footer.tsx @@ -15,11 +15,11 @@ export function SubagentFooter() { const messages = createMemo(() => sync.data.message[route.sessionID] ?? []) const session = createMemo(() => sync.session.get(route.sessionID)) - const subagentInfo = createMemo(() => { + const info = createMemo(() => { const s = session() if (!s) return { label: "Subagent", index: 0, total: 0 } - const agentMatch = s.title.match(/@(\w+) subagent/) - const label = agentMatch ? Locale.titlecase(agentMatch[1]) : "Subagent" + const match = s.title.match(/@(\w+) subagent/) + const label = match ? Locale.titlecase(match[1]) : "Subagent" if (!s.parentID) return { label, index: 0, total: 0 } @@ -77,11 +77,11 @@ export function SubagentFooter() { - {subagentInfo().label} + {info().label} - 0}> + 0}> - ({subagentInfo().index} of {subagentInfo().total}) + ({info().index} of {info().total}) diff --git a/packages/opencode/src/cli/cmd/tui/thread.ts b/packages/opencode/src/cli/cmd/tui/thread.ts index 3bb56937a6cb..af61a18f9b1c 100644 --- a/packages/opencode/src/cli/cmd/tui/thread.ts +++ b/packages/opencode/src/cli/cmd/tui/thread.ts @@ -14,6 +14,7 @@ import type { Event } from "@opencode-ai/sdk/v2" import type { EventSource } from "./context/sdk" import { win32DisableProcessedInput, win32InstallCtrlCGuard } from "./win32" import { TuiConfig } from "@/config/tui" +import { Server } from "@/server/server" import { Instance } from "@/project/instance" import { writeHeapSnapshot } from "v8" @@ -184,9 +185,19 @@ export const TuiThreadCommand = cmd({ network.port !== 0 || network.hostname !== "127.0.0.1" + // Always start a real HTTP server for oc callbacks. + // In internal mode, TUI still uses RPC for its own communication, + // but oc needs a real HTTP endpoint to call back into. + const result = await client.call("server", { + ...network, + port: network.port || 0, + hostname: network.hostname || "127.0.0.1", + }) + Server.url = new URL(result.url) + const transport = external ? { - url: (await client.call("server", network)).url, + url: result.url, fetch: undefined, events: undefined, } diff --git a/packages/opencode/src/provider/provider.ts b/packages/opencode/src/provider/provider.ts index 7fb3166284be..28ad43d9daad 100644 --- a/packages/opencode/src/provider/provider.ts +++ b/packages/opencode/src/provider/provider.ts @@ -1276,7 +1276,25 @@ export namespace Provider { if (opts.signal) signals.push(opts.signal) if (chunkAbortCtl) signals.push(chunkAbortCtl.signal) - if (options["timeout"] !== undefined && options["timeout"] !== null && options["timeout"] !== false) + + // Skip provider timeout when noTimeout header is present + const noTimeout = (() => { + const headers = opts.headers + if (!headers) return false + if (headers instanceof Headers) { + return headers.get("x-opencode-no-timeout") === "true" + } + if (Array.isArray(headers)) { + return headers.some(([key, value]) => key.toLowerCase() === "x-opencode-no-timeout" && value === "true") + } + return (headers as Record)["x-opencode-no-timeout"]?.toLowerCase() === "true" + })() + if ( + !noTimeout && + options["timeout"] !== undefined && + options["timeout"] !== null && + options["timeout"] !== false + ) signals.push(AbortSignal.timeout(options["timeout"])) const combined = signals.length === 0 ? null : signals.length === 1 ? signals[0] : AbortSignal.any(signals) diff --git a/packages/opencode/src/server/routes/session.ts b/packages/opencode/src/server/routes/session.ts index d499e5a1ecf4..f33ab8ce8e43 100644 --- a/packages/opencode/src/server/routes/session.ts +++ b/packages/opencode/src/server/routes/session.ts @@ -21,6 +21,7 @@ import { errors } from "../error" import { lazy } from "../../util/lazy" import { Bus } from "../../bus" import { NamedError } from "@opencode-ai/util/error" +import { ToolRegistry } from "../../tool/registry" const log = Log.create({ service: "server" }) @@ -187,6 +188,58 @@ export const SessionRoutes = lazy(() => return c.json(todos) }, ) + .post( + "/:sessionID/todo", + describeRoute({ + summary: "Create session todo", + description: "Create a new todo item for the session.", + operationId: "session.todo.create", + responses: { + 200: { description: "Created todo", content: { "application/json": { schema: resolver(Todo.Info) } } }, + ...errors(400, 404), + }, + }), + validator("param", z.object({ sessionID: SessionID.zod })), + validator( + "json", + z.object({ content: z.string(), status: z.string().optional(), priority: z.string().optional() }), + ), + async (c) => { + const sessionID = c.req.valid("param").sessionID + const body = c.req.valid("json") + const todo: Todo.Info = { + content: body.content, + status: body.status ?? "pending", + priority: body.priority ?? "medium", + } + const existing = Todo.get(sessionID) + Todo.update({ sessionID, todos: [...existing, todo] }) + return c.json(todo) + }, + ) + .put( + "/:sessionID/todo", + describeRoute({ + summary: "Update session todos", + description: "Replace all todos for a session (bulk update).", + operationId: "session.todo.update", + responses: { + 200: { + description: "Updated todos", + content: { "application/json": { schema: resolver(Todo.Info.array()) } }, + }, + ...errors(400, 404), + }, + }), + validator("param", z.object({ sessionID: SessionID.zod })), + validator("json", z.object({ todos: z.array(Todo.Info) })), + async (c) => { + const sessionID = c.req.valid("param").sessionID + const body = c.req.valid("json") + Todo.update({ sessionID, todos: body.todos }) + return c.json(body.todos) + }, + ) .post( "/", describeRoute({ @@ -612,8 +665,7 @@ export const SessionRoutes = lazy(() => if (query.limit === 0) { await Session.get(sessionID) - const messages = await Session.messages({ sessionID }) - return c.json(messages) + return c.json([]) } const page = await MessageV2.page({ @@ -772,8 +824,9 @@ export const SessionRoutes = lazy(() => const params = c.req.valid("param") const body = c.req.valid("json") if (body.id !== params.partID || body.messageID !== params.messageID || body.sessionID !== params.sessionID) { - throw new Error( + return c.text( `Part mismatch: body.id='${body.id}' vs partID='${params.partID}', body.messageID='${body.messageID}' vs messageID='${params.messageID}', body.sessionID='${body.sessionID}' vs sessionID='${params.sessionID}'`, + 400, ) } const part = await Session.updatePart(body) @@ -1027,5 +1080,327 @@ export const SessionRoutes = lazy(() => }) return c.json(true) }, + ) + // oc exec: AI judgment via child session + .post( + "/:sessionID/exec", + describeRoute({ + summary: "Execute AI prompt", + description: + "Create a child session, send a prompt, wait for the AI response, and return the assistant's text. Designed for bash script callbacks via the oc CLI.", + operationId: "session.exec", + responses: { + 200: { + description: "AI response as plain text", + content: { "text/plain": { schema: resolver(z.string()) } }, + }, + ...errors(400, 404), + }, + }), + validator("param", z.object({ sessionID: SessionID.zod })), + validator( + "json", + z.object({ + prompt: z.string().describe("The prompt text to send to the AI"), + system: z.string().optional().describe("Custom system prompt for specialist creation"), + agent: z.string().optional().describe("Agent type"), + model: z.object({ providerID: ProviderID.zod, modelID: ModelID.zod }).optional().describe("Model override"), + files: z + .array(z.object({ filename: z.string(), mime: z.string(), url: z.string() })) + .optional() + .describe("File attachments (PDFs, images) for multimodal prompts"), + format: z + .object({ type: z.literal("json_schema"), schema: z.record(z.string(), z.unknown()) }) + .optional() + .describe("Force structured output via StructuredOutput tool (e.g. for oc check boolean)"), + messageID: z.string().optional().describe("Parent message ID — creates visual ToolPart when present"), + }), + ), + async (c) => { + const parent = c.req.valid("param").sessionID + const body = c.req.valid("json") + + // Detect oc command in script to disable provider timeout (any long-running operation) + const noTimeout = c.req.header("x-opencode-no-timeout") === "true" + + await Session.get(parent) + + // Inherit model from parent session if not explicitly provided + const model = await (async () => { + if (body.model) return body.model + const msgs = await Session.messages({ sessionID: parent }) + for (let i = msgs.length - 1; i >= 0; i--) { + const info = msgs[i].info + if (info.role === "user" && info.model) return info.model + } + })() + + const child = await Session.create({ + parentID: parent, + title: body.system ? `oc prompt -s "${body.system}"` : "oc prompt", + }) + const cleanup = () => SessionPrompt.cancel(child.id) + if (c.req.raw.signal.aborted) { + cleanup() + return c.text("aborted", 503) + } + c.req.raw.signal.addEventListener("abort", cleanup) + + // Create task ToolPart for subagent visibility (opt-in via messageID) + const mid = body.messageID ? MessageID.make(body.messageID) : undefined + const pid = mid ? PartID.ascending() : undefined + const t0 = Date.now() + const preview = body.prompt.substring(0, 80) + (body.prompt.length > 80 ? "..." : "") + const title = body.system ? `oc prompt -s "${body.system}"` : "oc prompt" + + const emit = (state: z.infer) => + mid && pid + ? Session.updatePart({ + id: pid, + messageID: mid, + sessionID: parent, + type: "tool", + tool: "task", + callID: pid, + metadata: { oc: true }, + state, + }) + : undefined + + await emit({ + status: "running", + input: { prompt: preview, description: preview, subagent_type: "oc" }, + title, + metadata: { sessionId: child.id, model }, + time: { start: t0 }, + }) + + c.status(200) + c.header("Content-Type", "text/plain") + return stream(c, async (stream) => { + let delta = "" + const unsub = + mid && pid + ? Bus.subscribe(MessageV2.Event.PartDelta, (event) => { + if (event.properties.sessionID === child.id && event.properties.field === "text") { + delta += event.properties.delta + emit({ + status: "running", + input: { prompt: preview }, + title, + metadata: { sessionId: child.id, model, output: delta.substring(0, 2000) }, + time: { start: t0 }, + })?.catch(() => {}) + } + }) + : undefined + + // Send periodic keepalive to prevent HTTP idle timeout (child sessions can take hours). + // Use \x00OC_KEEPALIVE\x00 — a null-byte delimited marker that oc.ts strips before + // returning the response, so it never contaminates the AI text output. + const KEEPALIVE = "\x00OC_KEEPALIVE\x00" + const keepalive = setInterval(() => stream.write(KEEPALIVE).catch(() => {}), 15_000) + + try { + const parts: Parameters[0]["parts"] = [{ type: "text", text: body.prompt }] + if (body.files?.length) { + for (const file of body.files) { + parts.push({ type: "file", mime: file.mime, url: file.url, filename: file.filename }) + } + } + const msg = await SessionPrompt.prompt({ + sessionID: child.id, + parts, + system: body.system, + agent: body.agent, + model, + format: body.format ? { ...body.format, retryCount: 3 } : undefined, + noTimeout, + }) + const out = + body.format && msg.info.role === "assistant" && msg.info.structured !== undefined + ? JSON.stringify(msg.info.structured) + : ((msg.parts.findLast((p) => p.type === "text") as { type: "text"; text: string } | undefined)?.text ?? + "") + + await emit({ + status: "completed", + input: { prompt: preview }, + output: out.substring(0, 2000), + title, + metadata: { sessionId: child.id, model }, + time: { start: t0, end: Date.now() }, + }) + await stream.write(out) + } catch (error) { + await emit({ + status: "error", + input: { prompt: preview }, + error: error instanceof Error ? error.message : String(error), + time: { start: t0, end: Date.now() }, + }) + throw error + } finally { + clearInterval(keepalive) + unsub?.() + c.req.raw.signal.removeEventListener("abort", cleanup) + } + }) + }, + ) + // oc tool: Direct tool execution — no LLM, deterministic + .post( + "/:sessionID/tool", + describeRoute({ + summary: "Execute tool directly", + description: + "Execute an openCode tool directly without LLM involvement. Deterministic operations from bash scripts via the oc CLI.", + operationId: "session.tool", + responses: { + 200: { + description: "Tool output as plain text", + content: { "text/plain": { schema: resolver(z.string()) } }, + }, + ...errors(400, 404), + }, + }), + validator("param", z.object({ sessionID: SessionID.zod })), + validator( + "json", + z.object({ + name: z.string().describe("Tool name (e.g. read, edit, grep, glob)"), + args: z.record(z.string(), z.unknown()).describe("Tool arguments"), + agent: z.string().optional().describe("Agent context for permissions"), + messageID: z.string().optional().describe("Parent message ID — creates visual ToolParts when present"), + }), + ), + async (c) => { + const param = c.req.valid("param") + const body = c.req.valid("json") + const tools = await ToolRegistry.tools({ providerID: ProviderID.make(""), modelID: ModelID.make("") }) + const tool = tools.find((t) => t.id === body.name) + if (!tool) return c.text(`Tool not found: ${body.name}`, 404) + + const session = await Session.get(param.sessionID) + const agent = body.agent ?? "build" + const ag = await Agent.get(agent) + + const mid = body.messageID ? MessageID.make(body.messageID) : undefined + const pid = mid ? PartID.ascending() : undefined + const t0 = Date.now() + + const emit = (state: z.infer) => + mid && pid + ? Session.updatePart({ + id: pid, + messageID: mid, + sessionID: param.sessionID, + type: "tool", + tool: body.name, + callID: pid, + metadata: { oc: true }, + state, + }) + : undefined + + await emit({ status: "running", input: body.args, time: { start: t0 } }) + + const ctx = { + sessionID: param.sessionID, + messageID: mid ?? MessageID.ascending(), + callID: pid ?? PartID.ascending(), + agent, + abort: c.req.raw.signal, + messages: [] as MessageV2.WithParts[], + metadata: async (val: { title?: string; metadata?: Record }) => { + await emit({ + status: "running", + input: body.args, + title: val.title, + metadata: val.metadata, + time: { start: t0 }, + }) + }, + async ask(req: Omit) { + await Permission.ask({ + ...req, + sessionID: param.sessionID, + ruleset: Permission.merge(ag?.permission ?? [], session.permission ?? []), + }) + }, + } + + c.status(200) + c.header("Content-Type", "text/plain") + return stream(c, async (stream) => { + try { + const result = await tool.execute(body.args, ctx) + await emit({ + status: "completed", + input: body.args, + output: result.output, + title: result.title ?? "", + metadata: result.metadata ?? {}, + time: { start: t0, end: Date.now() }, + }) + await stream.write( + result.output + + (result.attachments?.length && body.args?.filePath ? `\n\x00OC_FILE\x00:${body.args.filePath}` : "") + + (result.metadata?.truncated + ? `\n\x00OC_TRUNCATED\x00:Results limited to ${result.metadata.count ?? "unknown"} items. Use a more specific pattern to get all results.` + : ""), + ) + } catch (error) { + await emit({ + status: "error", + input: body.args, + error: error instanceof Error ? error.message : String(error), + time: { start: t0, end: Date.now() }, + }) + await stream.write(`Error: ${error instanceof Error ? error.message : String(error)}`) + } + }) + }, + ) + // POST /session/:id/status — create a visible status ToolPart (used by oc status) + .post( + "/:sessionID/status", + describeRoute({ + summary: "Post status message", + description: "Create a visible status ToolPart in the session thread. Used by the oc CLI to show progress.", + operationId: "session.status.post", + responses: { + 200: { description: "Status accepted", content: { "text/plain": { schema: resolver(z.string()) } } }, + ...errors(400, 404), + }, + }), + validator("param", z.object({ sessionID: SessionID.zod })), + validator("json", z.object({ message: z.string(), messageID: z.string().optional() })), + async (c) => { + const sessionID = c.req.valid("param").sessionID + const body = c.req.valid("json") + const mid = body.messageID ? MessageID.make(body.messageID) : undefined + if (mid && body.message) { + const pid = PartID.ascending() + await Session.updatePart({ + id: pid, + messageID: mid, + sessionID, + type: "tool", + tool: "status", + callID: pid, + metadata: { oc: true }, + state: { + status: "completed", + input: { message: body.message }, + output: body.message, + title: "", + metadata: {}, + time: { start: Date.now(), end: Date.now() }, + }, + }) + } + return c.text("ok") + }, ), ) diff --git a/packages/opencode/src/server/server.ts b/packages/opencode/src/server/server.ts index ec245ed59f29..0039f640b3a6 100644 --- a/packages/opencode/src/server/server.ts +++ b/packages/opencode/src/server/server.ts @@ -288,6 +288,8 @@ export namespace Server { } const server = opts.port === 0 ? (tryServe(4096) ?? tryServe(0)) : tryServe(opts.port) if (!server) throw new Error(`Failed to start server on port ${opts.port}`) + // Update url with actual port (port 0 gets OS-assigned port after Bun.serve) + url = new URL(`http://${opts.hostname}:${server.port}`) const shouldPublishMDNS = opts.mdns && diff --git a/packages/opencode/src/session/llm.ts b/packages/opencode/src/session/llm.ts index 5cb609b2733a..4bdf7eb89b66 100644 --- a/packages/opencode/src/session/llm.ts +++ b/packages/opencode/src/session/llm.ts @@ -34,6 +34,7 @@ export namespace LLM { tools: Record retries?: number toolChoice?: "auto" | "required" | "none" + noTimeout?: boolean } export type Event = Awaited>["fullStream"] extends AsyncIterable ? T : never @@ -51,11 +52,7 @@ export namespace LLM { stream(input) { return Stream.unwrap( Effect.promise(() => LLM.stream(input)).pipe( - Effect.map((result) => - Stream.fromAsyncIterable(result.fullStream, (err) => err).pipe( - Stream.mapEffect((event) => Effect.succeed(event)), - ), - ), + Effect.map((result) => Stream.fromAsyncIterable(result.fullStream, (err) => err)), ), ) }, @@ -123,7 +120,7 @@ export namespace LLM { sessionID: input.sessionID, providerOptions: provider.options, }) - const options: Record = pipe( + const options: Record = pipe( base, mergeDeep(input.model.options), mergeDeep(input.agent.options), @@ -239,8 +236,8 @@ export namespace LLM { metadata: typeof result === "object" ? result?.metadata : undefined, title: typeof result === "object" ? result?.title : undefined, } - } catch (e: any) { - return { result: "", error: e.message ?? String(e) } + } catch (e: unknown) { + return { result: "", error: e instanceof Error ? e.message : String(e) } } } } @@ -294,6 +291,7 @@ export namespace LLM { }), ...input.model.headers, ...headers, + ...(input.noTimeout ? { "x-opencode-no-timeout": "true" } : {}), }, maxRetries: input.retries ?? 0, messages, diff --git a/packages/opencode/src/session/message-v2.ts b/packages/opencode/src/session/message-v2.ts index 7260a8af2ebf..ea60445847d7 100644 --- a/packages/opencode/src/session/message-v2.ts +++ b/packages/opencode/src/session/message-v2.ts @@ -463,6 +463,9 @@ export namespace MessageV2 { sessionID: SessionID.zod, info: Info, }), + busSchema: z.object({ + info: Info, + }), }), Removed: SyncEvent.define({ type: "message.removed", @@ -482,6 +485,9 @@ export namespace MessageV2 { part: Part, time: z.number(), }), + busSchema: z.object({ + part: Part, + }), }), PartDelta: BusEvent.define( "message.part.delta", @@ -713,6 +719,8 @@ export namespace MessageV2 { type: "step-start", }) if (part.type === "tool") { + // Skip oc-injected Parts — they have no corresponding tool_use from the LLM + if (part.metadata?.oc) continue toolNames.add(part.tool) if (part.state.status === "completed") { const outputText = part.state.time.compacted ? "[Old tool result content cleared]" : part.state.output diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index d2459cd8ba5a..85d1c075a5d8 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -62,8 +62,6 @@ export namespace SessionProcessor { reasoningMap: Record } - type StreamEvent = Event - export class Service extends ServiceMap.Service()("@opencode/SessionProcessor") {} export const layer: Layer.Layer< @@ -112,7 +110,7 @@ export namespace SessionProcessor { aborted: input.abort.aborted, }) - const handleEvent = Effect.fn("SessionProcessor.handleEvent")(function* (value: StreamEvent) { + const handleEvent = Effect.fn("SessionProcessor.handleEvent")(function* (value: Event) { switch (value.type) { case "start": yield* status.set(ctx.sessionID, { type: "busy" }) @@ -183,11 +181,11 @@ export namespace SessionProcessor { })) as MessageV2.ToolPart const parts = yield* Effect.promise(() => MessageV2.parts(ctx.assistantMessage.id)) - const recentParts = parts.slice(-DOOM_LOOP_THRESHOLD) + const recent = parts.slice(-DOOM_LOOP_THRESHOLD) if ( - recentParts.length !== DOOM_LOOP_THRESHOLD || - !recentParts.every( + recent.length !== DOOM_LOOP_THRESHOLD || + !recent.every( (part) => part.type === "tool" && part.tool === value.toolName && @@ -349,7 +347,7 @@ export namespace SessionProcessor { }, { text: ctx.currentText.text }, )).text - ctx.currentText.time = { start: Date.now(), end: Date.now() } + ctx.currentText.time = { start: ctx.currentText.time?.start ?? Date.now(), end: Date.now() } if (value.providerMetadata) ctx.currentText.metadata = value.providerMetadata yield* session.updatePart(ctx.currentText) ctx.currentText = undefined @@ -414,7 +412,7 @@ export namespace SessionProcessor { }) const halt = Effect.fn("SessionProcessor.halt")(function* (e: unknown) { - log.error("process", { error: e, stack: JSON.stringify((e as any)?.stack) }) + log.error("process", { error: e, stack: JSON.stringify((e instanceof Error ? e : null)?.stack) }) const error = parse(e) if (MessageV2.ContextOverflowError.isInstance(error)) { ctx.needsCompaction = true @@ -437,6 +435,8 @@ export namespace SessionProcessor { yield* Effect.gen(function* () { ctx.currentText = undefined ctx.reasoningMap = {} + ctx.blocked = false + ctx.toolcalls = {} const stream = llm.stream(streamInput) yield* stream.pipe( diff --git a/packages/opencode/src/session/prompt.ts b/packages/opencode/src/session/prompt.ts index a9edf838ca8c..8316a7d5c1e2 100644 --- a/packages/opencode/src/session/prompt.ts +++ b/packages/opencode/src/session/prompt.ts @@ -112,6 +112,7 @@ export namespace SessionPrompt { format: MessageV2.Format.optional(), system: z.string().optional(), variant: z.string().optional(), + noTimeout: z.boolean().optional(), parts: z.array( z.discriminatedUnion("type", [ MessageV2.TextPart.omit({ @@ -185,7 +186,7 @@ export namespace SessionPrompt { return message } - return loop({ sessionID: input.sessionID }) + return loop({ sessionID: input.sessionID, noTimeout: input.noTimeout }) }) export async function resolvePromptParts(template: string): Promise { @@ -274,9 +275,10 @@ export namespace SessionPrompt { export const LoopInput = z.object({ sessionID: SessionID.zod, resume_existing: z.boolean().optional(), + noTimeout: z.boolean().optional(), }) export const loop = fn(LoopInput, async (input) => { - const { sessionID, resume_existing } = input + const { sessionID, resume_existing, noTimeout } = input const abort = resume_existing ? resume(sessionID) : start(sessionID) if (!abort) { @@ -711,6 +713,7 @@ export namespace SessionPrompt { tools, model, toolChoice: format.type === "json_schema" ? "required" : undefined, + noTimeout, }) // If structured output was captured, save it and exit immediately @@ -781,7 +784,7 @@ export namespace SessionPrompt { using _ = log.time("resolveTools") const tools: Record = {} - const context = (args: any, options: ToolExecutionOptions): Tool.Context => ({ + const context = (args: Record, options: ToolExecutionOptions): Tool.Context => ({ sessionID: input.session.id, abort: options.abortSignal!, messageID: input.processor.message.id, @@ -789,7 +792,7 @@ export namespace SessionPrompt { extra: { model: input.model, bypassAgentCheck: input.bypassAgentCheck }, agent: input.agent.name, messages: input.messages, - metadata: async (val: { title?: string; metadata?: any }) => { + metadata: async (val: { title?: string; metadata?: Record }) => { const match = input.processor.partFromToolCall(options.toolCallId) if (match && match.state.status === "running") { await Session.updatePart({ @@ -934,7 +937,10 @@ export namespace SessionPrompt { } } - const truncated = await Truncate.output(textParts.join("\n\n"), {}, input.agent) + const text = textParts.join("\n\n") + const truncated = result.metadata?.noTruncate + ? { content: text, truncated: false as const } + : await Truncate.output(text, {}, input.agent) const metadata = { ...(result.metadata ?? {}), truncated: truncated.truncated, @@ -962,7 +968,7 @@ export namespace SessionPrompt { /** @internal Exported for testing */ export function createStructuredOutputTool(input: { - schema: Record + schema: Record onSuccess: (output: unknown) => void }): AITool { // Remove $schema property if present (not needed for tool input) diff --git a/packages/opencode/src/session/retry.ts b/packages/opencode/src/session/retry.ts index 8ba48375bcfe..8eecf836f773 100644 --- a/packages/opencode/src/session/retry.ts +++ b/packages/opencode/src/session/retry.ts @@ -60,12 +60,7 @@ export namespace SessionRetry { const json = iife(() => { try { - if (typeof error.data?.message === "string") { - const parsed = JSON.parse(error.data.message) - return parsed - } - - return JSON.parse(error.data.message) + return JSON.parse(error.data?.message) } catch { return undefined } diff --git a/packages/opencode/src/session/system.ts b/packages/opencode/src/session/system.ts index f6bd48b2262e..fb40f65ab8b7 100644 --- a/packages/opencode/src/session/system.ts +++ b/packages/opencode/src/session/system.ts @@ -1,5 +1,3 @@ -import { Ripgrep } from "../file/ripgrep" - import { Instance } from "../project/instance" import PROMPT_ANTHROPIC from "./prompt/anthropic.txt" @@ -44,16 +42,126 @@ export namespace SystemPrompt { ` Platform: ${process.platform}`, ` Today's date: ${new Date().toDateString()}`, ``, - ``, - ` ${ - project.vcs === "git" && false - ? await Ripgrep.tree({ - cwd: Instance.directory, - limit: 50, - }) - : "" - }`, - ``, + ``, + `For "repeat/iterate/loop/keep going/until done/while" tasks, you are the loop ARCHITECT, not the worker.`, + `WHY: a bash while-loop is deterministic (never loses count), and each \`oc check\` iteration gets fresh context (no token accumulation).`, + `YOU MUST NOT start working on these items yourself, read code, or set up todos.`, + `Set up a while loop that delegates the ENTIRE job to \`oc check\`. Each iteration is a complete attempt at the whole task — not one step of it.`, + `Pattern: while oc check "DO THE ENTIRE JOB: [user's full criteria here]"; do oc status "round done"; done`, + `oc is pre-installed inside the bash tool. Do not check if it exists.`, + ``, + `IF your task was delegated to you (via oc check/prompt): you ARE the worker. Do the work directly with tools. Do NOT set up another Ralph Loop — the caller already handles iteration.`, + ``, + `For tasks that process 3+ files with the same operation (summarize each, analyze each, convert each), ALWAYS use a FABRIC oc script. Test the pipeline on ONE file first. ALWAYS. Then parallelize. Do NOT process files one by one with individual tool calls.`, + ``, + ].join("\n"), + [ + ``, + `The \`oc\` command calls back into the running OpenCode instance — deterministic control flow + AI judgment at decision points.`, + `Why oc scripts beat consecutive tool calls: deterministic loops (never lose count), token efficiency`, + `(each oc prompt gets fresh context), speed (1 bash call vs N round-trips), composability (full Unix toolkit).`, + ``, + ``, + `Three core patterns:`, + ``, + `1. RALPH LOOP — delegate entire task, re-verify until clean`, + ` Shape: while oc check "DO THE ENTIRE JOB: [criteria]"; do oc status "round done"; done`, + ` Each oc check spawns a fresh AI session that handles the WHOLE task — find ALL issues, fix ALL of them, commit.`, + ` The while loop only exists to re-verify and catch anything missed or newly introduced. Exit 0=found work, 1=clean.`, + ` For deterministic exit conditions (tests pass): while true; do cmd && break; cmd | oc prompt "fix"; done`, + ``, + `2. DETERMINISTIC BATCH — same oc tool operation across many files, NO AI needed`, + ` Shape: oc tool glob/grep | while IFS= read -r f; do oc tool edit/read "$f" ...; done`, + ` When: rename, find-replace, count lines, format files, delete matches`, + ``, + `3. FABRIC — per-item AI pipelines, parallelized`, + ` Each item flows through the COMPLETE pipeline: read | process | prompt.`, + ` NOT: read ALL items first, then summarize ALL — that loses per-item context.`, + ` Correct: oc tool read "file.pdf" | oc prompt "summarize" ← per item, run many in parallel`, + ` Wrong: read all files into one blob, then prompt once`, + ` Reduce: pipe parallel outputs into a final \`oc prompt -s "Specialist" "synthesize"\``, + ` Pipeline: oc tool grep "TODO" src/ | oc prompt "Categorize" | oc tool write report.md`, + ``, + `These are composable — combine freely.`, + ``, + ``, + ``, + `Deterministic (reliable, no LLM):`, + ` oc tool read — Read file → stdout`, + ` oc tool write — Write stdin → file`, + ` oc tool edit --old x --new y — Edit file`, + ` oc tool grep "pattern" — Search → stdout`, + ` oc tool glob "pattern" [path] — Find files → stdout`, + ` oc todo add|done|list — Track progress`, + `Non-deterministic (AI judgment):`, + ` oc prompt "question" — AI response → stdout`, + ` oc prompt -s "role" "question" — Fresh specialist with clean context`, + ` oc check "find and fix X" — Assess+fix in one call, exit 0=issues found, 1=clean`, + ` oc agent explore "task" — Spawn subagent`, + `Progress: oc status "message" — visible marker in TUI. Composition: cat file | oc prompt "analyze"`, + ``, + ``, + ``, + `ALWAYS use an oc script when: 3+ files need the same operation (FABRIC), iteration/repeat (RALPH LOOP), or chained analysis.`, + `Parallelize independent operations whenever possible. Do NOT use individual tool calls in a loop.`, + `Use tools directly only for: single file/single op, or when delegated by an oc caller.`, + ``, + ``, + ``, + `1. Scripts INLINE in bash tool only — NEVER to disk (/tmp, .sh files). Scripts on disk FAIL because oc needs env vars from the bash tool. No timeout parameter.`, + `2. Parallelize independent per-item work (xargs -P, background jobs &, etc). Sequential \`while IFS= read -r\` only when ordering matters. Never \`for f in $(...)\`.`, + ``, + ``, + ``, + ``, + `User asks: "Keep fixing security issues until the API is clean."`, + `Ralph loop: each iteration delegates the ENTIRE job. The while loop re-verifies.`, + ` while oc check "Review the API for ALL security vulnerabilities — auth, injection, rate limiting. Fix everything you find and commit."; do`, + ` oc status "Round complete"`, + ` done`, + ``, + ``, + `User asks: "Run the tests and fix all failures until they pass."`, + `Exit condition is deterministic (exit code 0) — use bash directly, no oc check needed.`, + ` while true; do`, + ` npm test 2>&1 && echo "ALL PASS" && break`, + ` npm test 2>&1 | oc prompt "Fix the failures."`, + ` done`, + ``, + ``, + `User asks: "Show me the line count of every .ts file in src/tool/."`, + `Deterministic batch: same operation across files, no AI needed.`, + ` oc tool glob "src/tool/**/*.ts" | while IFS= read -r f; do`, + ` echo "$(oc tool read "$f" | wc -l) $f"`, + ` done | sort -rn`, + ``, + ``, + `User asks: "Summarize the architecture of this codebase."`, + `Fabric: each file flows through read|prompt independently (parallel), then reduce.`, + ` # Test pipeline on ONE file first`, + ` oc tool read "src/main.ts" | oc prompt "One-line summary of this file"`, + ` # Then scale to all files in parallel`, + ` oc tool glob "src/**/*.ts" | while IFS= read -r f; do`, + ` oc tool read "$f" | oc prompt "One-line summary of this file" &`, + ` done; wait`, + ` # Or with xargs: oc tool glob "*.ts" | xargs -P8 -I{} sh -c 'oc tool read "$1" | oc prompt "summary"' _ {}`, + ` # Reduce: pipe all outputs into oc prompt -s "Architect" "Describe the overall architecture"`, + ``, + ``, + `User asks: "Find all TODOs and create a prioritized report."`, + `Pipeline: multi-stage analysis via pipes.`, + ` oc tool grep "TODO" src/ | oc prompt "Categorize by urgency" | oc tool write report.md`, + ``, + ``, + `User asks: "Summarize all PDFs in this directory."`, + `PDFs need special handling — oc tool read returns attachments, not text.`, + ` # Test on ONE file first to discover what works`, + ` oc tool read "report.pdf" # returns attachment — need multimodal model or text extraction`, + ` # If text extraction needed, find available tools (pdftotext, python, etc)`, + ` # Then scale to all files in parallel`, + ``, + ``, + ``, ].join("\n"), ] } diff --git a/packages/opencode/src/tool/bash.ts b/packages/opencode/src/tool/bash.ts index 50aa9e14ad76..4196e0e0d21e 100644 --- a/packages/opencode/src/tool/bash.ts +++ b/packages/opencode/src/tool/bash.ts @@ -19,8 +19,12 @@ import { BashArity } from "@/permission/arity" import { Truncate } from "./truncate" import { Plugin } from "@/plugin" +// Lazy import to avoid circular dependency (server → session → tool → server) +let _server: typeof import("@/server/server") | undefined +const getServer = async () => (_server ??= await import("@/server/server")) + const MAX_METADATA_LENGTH = 30_000 -const DEFAULT_TIMEOUT = Flag.OPENCODE_EXPERIMENTAL_BASH_DEFAULT_TIMEOUT_MS || 2 * 60 * 1000 +const DEFAULT_TIMEOUT = Flag.OPENCODE_EXPERIMENTAL_BASH_DEFAULT_TIMEOUT_MS ?? 2 * 60 * 1000 const PS = new Set(["powershell", "pwsh"]) const CWD = new Set(["cd", "push-location", "set-location"]) const FILES = new Set([ @@ -290,6 +294,12 @@ async function shellEnv(ctx: Tool.Context, cwd: string) { return { ...process.env, ...extra.env, + // Enable oc callbacks into the running openCode instance + OPENCODE_SESSION_ID: ctx.sessionID, + OPENCODE_MESSAGE_ID: ctx.messageID, + OPENCODE_AGENT: ctx.agent, + OPENCODE_SERVER_URL: (await getServer()).Server.url?.toString() ?? "", + PATH: `${path.resolve(fileURLToPath(import.meta.url), "../../../bin")}${path.delimiter}${process.env.PATH ?? ""}`, } } @@ -323,6 +333,7 @@ async function run( env: NodeJS.ProcessEnv timeout: number description: string + noTruncate?: boolean }, ctx: Tool.Context, ) { @@ -366,10 +377,14 @@ async function run( } ctx.abort.addEventListener("abort", abort, { once: true }) - const timer = setTimeout(() => { - expired = true - void kill() - }, input.timeout + 100) + // timeout === 0 means no timeout (DACMICU scripts with oc callbacks) + const timer = + input.timeout > 0 + ? setTimeout(() => { + expired = true + void kill() + }, input.timeout + 100) + : undefined await new Promise((resolve, reject) => { const cleanup = () => { @@ -407,6 +422,7 @@ async function run( output: preview(output), exit: proc.exitCode, description: input.description, + ...(input.noTruncate && { noTruncate: true }), }, output, } @@ -476,13 +492,21 @@ export const BashTool = Tool.define("bash", async () => { if (params.timeout !== undefined && params.timeout < 0) { throw new Error(`Invalid timeout value: ${params.timeout}. Timeout must be a positive number.`) } - const timeout = params.timeout ?? DEFAULT_TIMEOUT const ps = PS.has(name) const root = await parse(params.command, ps) const scan = await collect(root, cwd, ps, shell) if (!Instance.containsPath(cwd)) scan.dirs.add(cwd) await ask(ctx, scan) + // Any oc call can trigger LLM callbacks (oc prompt, oc check, oc agent) + // that run arbitrarily long — disable timeout and truncation for the whole script. + const usesOc = root.descendantsOfType("command").some((n) => { + if (!n) return false + const cmd = n.childForFieldName("name") ?? n.firstChild + return cmd !== null && /^(oc|\.\/oc)$/.test(cmd.text) + }) + const timeout = usesOc ? 0 : (params.timeout ?? DEFAULT_TIMEOUT) + return run( { shell, @@ -492,6 +516,7 @@ export const BashTool = Tool.define("bash", async () => { env: await shellEnv(ctx, cwd), timeout, description: params.description, + noTruncate: usesOc, }, ctx, ) diff --git a/packages/opencode/src/tool/bash.txt b/packages/opencode/src/tool/bash.txt index 8d53c90ab4e8..cffa0791fecf 100644 --- a/packages/opencode/src/tool/bash.txt +++ b/packages/opencode/src/tool/bash.txt @@ -4,7 +4,7 @@ Be aware: OS: ${os}, Shell: ${shell} All commands run in ${directory} by default. Use the `workdir` parameter if you need to run a command in a different directory. AVOID using `cd && ` patterns - use `workdir` instead. -IMPORTANT: This tool is for terminal operations like git, npm, docker, etc. DO NOT use it for file operations (reading, writing, editing, searching, finding files) - use the specialized tools for this instead. +For single file operations, use specialized tools (Read, Write, Edit, Grep, Glob). For tasks involving multiple files or multi-step workflows, write an inline bash script using `oc` commands — this combines all operations into one execution with full Unix composability (pipes, loops, conditionals). The `oc` command is available inside bash and calls back into the running OpenCode instance. Write oc scripts directly in the command parameter, not to disk files. Before executing the command, please follow these steps: diff --git a/packages/opencode/src/tool/todo.ts b/packages/opencode/src/tool/todo.ts index 53b687b1d13d..58a8edc30ba0 100644 --- a/packages/opencode/src/tool/todo.ts +++ b/packages/opencode/src/tool/todo.ts @@ -29,3 +29,25 @@ export const TodoWriteTool = Tool.define("todowrite", { } }, }) + +export const TodoReadTool = Tool.define("todoread", { + description: "Use this tool to read your todo list", + parameters: z.object({}), + async execute(_params, ctx) { + await ctx.ask({ + permission: "todoread", + patterns: ["*"], + always: ["*"], + metadata: {}, + }) + + const todos = Todo.get(ctx.sessionID) + return { + title: `${todos.filter((x) => x.status !== "completed").length} todos`, + metadata: { + todos, + }, + output: JSON.stringify(todos, null, 2), + } + }, +}) diff --git a/packages/opencode/src/tool/tool.ts b/packages/opencode/src/tool/tool.ts index 6c3f4efaf6dc..3a963fcb69a2 100644 --- a/packages/opencode/src/tool/tool.ts +++ b/packages/opencode/src/tool/tool.ts @@ -68,8 +68,8 @@ export namespace Tool { ) } const result = await execute(args, ctx) - // skip truncation for tools that handle it themselves - if (result.metadata.truncated !== undefined) { + // skip truncation for tools that handle it themselves (bash with oc, etc) + if (result.metadata.truncated !== undefined || result.metadata.noTruncate) { return result } const truncated = await Truncate.output(result.output, {}, initCtx?.agent) From 26e0c981394b8fdb82882f18cef2f9f9ba50486d Mon Sep 17 00:00:00 2001 From: micu Date: Mon, 30 Mar 2026 08:34:52 +0200 Subject: [PATCH 3/4] test(DACMICU): comprehensive test suite - Add dacmicu.test.ts: CLI unit tests, binary protocol, shell fast-path, oc check sentinel detection, keepalive protocol - Add ralph-loop-timeout.test.ts: AST-based oc command detection, timeout configuration per command type - Add dacmicu-endpoints.test.ts: /tool and /exec endpoint integration tests, ToolPart creation, binary file pass-through, followUp schema - Add oc.test.ts: exec timeout regression tests - Add message-v2.test.ts and bash.test.ts: extended coverage for MessageV2 and bash tool changes - Fix compaction test to match Stream.mapEffect signature change --- packages/opencode/test/cli/oc.test.ts | 34 ++ .../test/server/dacmicu-endpoints.test.ts | 319 +++++++++++ .../opencode/test/session/compaction.test.ts | 2 +- .../opencode/test/session/message-v2.test.ts | 189 +++++++ .../test/session/processor-effect.test.ts | 1 - packages/opencode/test/tool/bash.test.ts | 235 ++++++++ packages/opencode/test/tool/dacmicu.test.ts | 516 ++++++++++++++++++ .../test/tool/ralph-loop-timeout.test.ts | 98 ++++ 8 files changed, 1392 insertions(+), 2 deletions(-) create mode 100644 packages/opencode/test/cli/oc.test.ts create mode 100644 packages/opencode/test/server/dacmicu-endpoints.test.ts create mode 100644 packages/opencode/test/tool/dacmicu.test.ts create mode 100644 packages/opencode/test/tool/ralph-loop-timeout.test.ts diff --git a/packages/opencode/test/cli/oc.test.ts b/packages/opencode/test/cli/oc.test.ts new file mode 100644 index 000000000000..7667adacf6d3 --- /dev/null +++ b/packages/opencode/test/cli/oc.test.ts @@ -0,0 +1,34 @@ +import { describe, expect, test } from "bun:test" +import { timeoutOpt } from "../../bin/oc.ts" + +// Regression test for: exec operations must disable Bun's native TCP timeout so that +// long-running `oc check` / `oc prompt` calls (Ralph loops, 22-commit analyses, etc.) +// are never killed mid-flight by Bun's default fetch timeout. +// See provider.ts:1324 for the same fix applied to LLM provider calls. +describe("oc.timeoutOpt", () => { + test("exec path returns { timeout: false }", () => { + expect(timeoutOpt("/session/abc/exec")).toEqual({ timeout: false }) + expect(timeoutOpt("/session/xyz_123/exec")).toEqual({ timeout: false }) + }) + + test("tool path returns {}", () => { + expect(timeoutOpt("/session/abc/tool")).toEqual({}) + expect(timeoutOpt("/session/abc/read")).toEqual({}) + }) + + test("empty path returns {}", () => { + expect(timeoutOpt("")).toEqual({}) + }) + + test("path containing 'exec' as a substring of a different segment returns { timeout: false }", () => { + // /exec substring match is intentional: any path segment with /exec triggers it + // (e.g. /session/abc/exec-stream would also disable timeout — acceptable, conservative) + expect(timeoutOpt("/session/abc/exec-stream")).toEqual({ timeout: false }) + }) + + test("path with exec in query string does not match if not in path segment", () => { + // /exec must appear in path — a query-string mention should NOT match + // Note: the current implementation uses includes("/exec") so this documents the actual behaviour + expect(timeoutOpt("/session/abc/tool?exec=1")).toEqual({}) + }) +}) diff --git a/packages/opencode/test/server/dacmicu-endpoints.test.ts b/packages/opencode/test/server/dacmicu-endpoints.test.ts new file mode 100644 index 000000000000..3290bdef254b --- /dev/null +++ b/packages/opencode/test/server/dacmicu-endpoints.test.ts @@ -0,0 +1,319 @@ +import { describe, expect, test } from "bun:test" +import fs from "fs/promises" +import path from "path" +import { Instance } from "../../src/project/instance" +import { Server } from "../../src/server/server" +import { Session } from "../../src/session" +import { MessageV2 } from "../../src/session/message-v2" +import { MessageID, type SessionID } from "../../src/session/schema" +import { Todo } from "../../src/session/todo" +import { Log } from "../../src/util/log" + +const root = path.join(__dirname, "../..") +Log.init({ print: false }) + +// Helper: create a session with user+assistant messages (simulates bash tool context) +async function makeMsg(sessionID: SessionID) { + // Create user message first (assistant needs parentID) + const userID = MessageID.ascending() + await Session.updateMessage({ + id: userID, + sessionID, + role: "user", + time: { created: Date.now() }, + agent: "build", + model: { providerID: "test", modelID: "test" }, + tools: {}, + mode: "", + } as unknown as MessageV2.Info) + + const assistantID = MessageID.ascending() + await Session.updateMessage({ + id: assistantID, + sessionID, + role: "assistant", + time: { created: Date.now() }, + parentID: userID, + agent: "build", + modelID: "test-model", + providerID: "test", + mode: "", + path: { cwd: root, root }, + cost: 0, + tokens: { input: 0, output: 0, reasoning: 0, cache: { read: 0, write: 0 } }, + } as unknown as MessageV2.Info) + return assistantID +} + +describe("oc — /session/:id/tool endpoint", () => { + test("executes a tool and returns output", async () => { + await Instance.provide({ + directory: root, + fn: async () => { + const session = await Session.create({}) + const app = Server.Default() + + const res = await app.request(`/session/${session.id}/tool`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ name: "glob", args: { pattern: "*.ts" } }), + }) + expect(res.status).toBe(200) + const text = await res.text() + // Should return some .ts files from the project root + expect(text.length).toBeGreaterThan(0) + + await Session.remove(session.id) + }, + }) + }) + + test("returns 404 for unknown tool", async () => { + await Instance.provide({ + directory: root, + fn: async () => { + const session = await Session.create({}) + const app = Server.Default() + + const res = await app.request(`/session/${session.id}/tool`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ name: "nonexistent_tool_xyz", args: {} }), + }) + expect(res.status).toBe(404) + + await Session.remove(session.id) + }, + }) + }) + + test("creates ToolPart when messageID provided", async () => { + await Instance.provide({ + directory: root, + fn: async () => { + const session = await Session.create({}) + const msgID = await makeMsg(session.id) + const app = Server.Default() + + const res = await app.request(`/session/${session.id}/tool`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + name: "glob", + args: { pattern: "package.json" }, + messageID: msgID, + }), + }) + expect(res.status).toBe(200) + await res.text() // consume stream — ToolPart update happens inside + + // Check that a ToolPart was created on the message + const parts = await MessageV2.parts(msgID) + const ocParts = parts.filter((p): p is MessageV2.ToolPart => p.type === "tool" && !!p.metadata?.oc) + expect(ocParts.length).toBe(1) + expect(ocParts[0].tool).toBe("glob") + expect(ocParts[0].state.status).toBe("completed") + + await Session.remove(session.id) + }, + }) + }) + + test("does NOT create ToolPart without messageID", async () => { + await Instance.provide({ + directory: root, + fn: async () => { + const session = await Session.create({}) + const msgID = await makeMsg(session.id) + const app = Server.Default() + + const res = await app.request(`/session/${session.id}/tool`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ name: "glob", args: { pattern: "package.json" } }), + }) + expect(res.status).toBe(200) + + // No oc ToolParts should be created + const parts = await MessageV2.parts(msgID) + const ocParts = parts.filter((p): p is MessageV2.ToolPart => p.type === "tool" && !!p.metadata?.oc) + expect(ocParts.length).toBe(0) + + await Session.remove(session.id) + }, + }) + }) + + // Note: tool error tests skipped because tools ask for permissions which + // hang in test context. Error handling is tested via CLI tests in dacmicu.test.ts. + + test("binary file pass-through appends OC_FILE marker for PDF", async () => { + // Create the PDF inside the project dir so it passes the read tool's + // containsPath check — external files block on an interactive permission ask. + const tmpFile = path.join(root, `oc-test-${Date.now()}.pdf`) + await Bun.write(tmpFile, "%PDF-1.4 test content") + try { + await Instance.provide({ + directory: root, + fn: async () => { + const session = await Session.create({}) + const app = Server.Default() + + const res = await app.request(`/session/${session.id}/tool`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + name: "read", + args: { filePath: tmpFile }, + }), + }) + expect(res.status).toBe(200) + const text = await res.text() + // PDF files return "PDF read successfully" + OC_FILE marker + expect(text).toContain("PDF read successfully") + expect(text).toContain("\x00OC_FILE\x00:") + expect(text).toContain(tmpFile) + + await Session.remove(session.id) + }, + }) + } finally { + await fs.unlink(tmpFile).catch(() => {}) + } + }) +}) + +describe("oc — exec schema validation", () => { + test("exec endpoint rejects missing prompt field", async () => { + await Instance.provide({ + directory: root, + fn: async () => { + const session = await Session.create({}) + const app = Server.Default() + + const res = await app.request(`/session/${session.id}/exec`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ system: "only system, no prompt" }), + }) + expect(res.status).toBe(400) + + await Session.remove(session.id) + }, + }) + }) + + test("exec endpoint accepts optional format field with json_schema type", async () => { + await Instance.provide({ + directory: root, + fn: async () => { + const session = await Session.create({}) + const app = Server.Default() + + // Schema validation: the format field is optional; providing it should not cause 400. + // The request will fail beyond validation (no model configured), but must not be 400. + const res = await app.request(`/session/${session.id}/exec`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + prompt: "test", + format: { + type: "json_schema", + schema: { type: "object", properties: { result: { type: "boolean" } } }, + }, + }), + }) + expect(res.status).not.toBe(400) + + await Session.remove(session.id) + }, + }) + }) +}) + +describe("oc — todo endpoints", () => { + test("POST creates a todo", async () => { + await Instance.provide({ + directory: root, + fn: async () => { + const session = await Session.create({}) + const app = Server.Default() + + const res = await app.request(`/session/${session.id}/todo`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ content: "Test task", status: "pending" }), + }) + expect(res.status).toBe(200) + const todo = await res.json() + expect(todo.content).toBe("Test task") + expect(todo.status).toBe("pending") + expect(todo.priority).toBe("medium") + + // Verify it's in the list + const todos = Todo.get(session.id) + expect(todos.length).toBe(1) + + await Session.remove(session.id) + }, + }) + }) + + test("PUT replaces all todos", async () => { + await Instance.provide({ + directory: root, + fn: async () => { + const session = await Session.create({}) + const app = Server.Default() + + // Add 2 todos + Todo.update({ sessionID: session.id, todos: [{ content: "Task 1", status: "pending", priority: "medium" }] }) + Todo.update({ + sessionID: session.id, + todos: [...Todo.get(session.id), { content: "Task 2", status: "pending", priority: "medium" }], + }) + expect(Todo.get(session.id).length).toBe(2) + + // Replace with 1 todo + const res = await app.request(`/session/${session.id}/todo`, { + method: "PUT", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ todos: [{ content: "Replaced", status: "completed", priority: "high" }] }), + }) + expect(res.status).toBe(200) + const todos = Todo.get(session.id) + expect(todos.length).toBe(1) + expect(todos[0].content).toBe("Replaced") + expect(todos[0].status).toBe("completed") + + await Session.remove(session.id) + }, + }) + }) + + test("Todo.update supports incremental additions", async () => { + await Instance.provide({ + directory: root, + fn: async () => { + const session = await Session.create({}) + + // Add 10 todos incrementally + for (let i = 0; i < 10; i++) { + const existing = Todo.get(session.id) + Todo.update({ + sessionID: session.id, + todos: [...existing, { content: `Task ${i}`, status: "pending", priority: "medium" }], + }) + } + + const todos = Todo.get(session.id) + expect(todos.length).toBe(10) + // Verify ordering is preserved + expect(todos[0].content).toBe("Task 0") + expect(todos[9].content).toBe("Task 9") + + await Session.remove(session.id) + }, + }) + }) +}) diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 8f29b7788041..145508878371 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -191,7 +191,7 @@ function llm() { stream: (input) => { const item = queue.shift() ?? Stream.empty const stream = typeof item === "function" ? item(input) : item - return stream.pipe(Stream.mapEffect((event) => Effect.succeed(event))) + return stream }, }), ), diff --git a/packages/opencode/test/session/message-v2.test.ts b/packages/opencode/test/session/message-v2.test.ts index 3634d6fb7ec8..09837f4105c2 100644 --- a/packages/opencode/test/session/message-v2.test.ts +++ b/packages/opencode/test/session/message-v2.test.ts @@ -955,3 +955,192 @@ describe("session.message-v2.fromError", () => { expect(result.name).toBe("MessageAbortedError") }) }) + +describe("session.message-v2.toModelMessage — oc filter", () => { + test("filters out tool parts with metadata.oc = true", async () => { + const userID = "m-user" + const assistantID = "m-assistant" + + const input: MessageV2.WithParts[] = [ + { + info: userInfo(userID), + parts: [{ ...basePart(userID, "u1"), type: "text", text: "run task" }] as MessageV2.Part[], + }, + { + info: assistantInfo(assistantID, userID), + parts: [ + { ...basePart(assistantID, "a1"), type: "text", text: "running script" }, + // Normal tool part (from LLM) — should be included + { + ...basePart(assistantID, "a2"), + type: "tool", + callID: "call-normal", + tool: "bash", + state: { + status: "completed", + input: { command: "ls" }, + output: "file.ts", + title: "Bash", + metadata: {}, + time: { start: 0, end: 1 }, + }, + }, + // oc-injected tool part — should be FILTERED + { + ...basePart(assistantID, "oc-read"), + type: "tool", + callID: "call-oc", + tool: "read", + metadata: { oc: true }, + state: { + status: "completed", + input: { filePath: "src/file.ts" }, + output: "content", + title: "Read", + metadata: {}, + time: { start: 0, end: 1 }, + }, + }, + // oc-injected task part (oc prompt) — should be FILTERED + { + ...basePart(assistantID, "oc-prompt"), + type: "tool", + callID: "call-oc-prompt", + tool: "task", + metadata: { oc: true }, + state: { + status: "completed", + input: { prompt: "summarize" }, + output: "summary", + title: "oc prompt", + metadata: { sessionId: "child-session" }, + time: { start: 0, end: 1 }, + }, + }, + ] as MessageV2.Part[], + }, + ] + + const result = await MessageV2.toModelMessages(input, model) + // user + assistant(text+tool-call) + tool-result = 3 messages + expect(result.length).toBe(3) + + // Assistant message should have text + one tool-call (normal bash, NOT oc parts) + const assistant = result[1] + expect(assistant.role).toBe("assistant") + type Part = { type: string; toolCallId?: string } + const toolCalls = (assistant.content as Part[]).filter((p) => p.type === "tool-call") + expect(toolCalls.length).toBe(1) + expect(toolCalls[0].toolCallId).toBe("call-normal") + + // Tool result message should only have normal bash result + const toolResult = result[2] + const results = (toolResult.content as Part[]).filter((p) => p.type === "tool-result") + expect(results.length).toBe(1) + expect(results[0].toolCallId).toBe("call-normal") + }) + + test("does NOT filter tool parts without metadata.oc", async () => { + const userID = "m-user" + const assistantID = "m-assistant" + + const input: MessageV2.WithParts[] = [ + { + info: userInfo(userID), + parts: [{ ...basePart(userID, "u1"), type: "text", text: "read file" }] as MessageV2.Part[], + }, + { + info: assistantInfo(assistantID, userID), + parts: [ + { + ...basePart(assistantID, "a1"), + type: "tool", + callID: "call-1", + tool: "read", + // No metadata.oc — normal LLM tool call + state: { + status: "completed", + input: { filePath: "file.ts" }, + output: "content", + title: "Read", + metadata: {}, + time: { start: 0, end: 1 }, + }, + }, + ] as MessageV2.Part[], + }, + ] + + const result = await MessageV2.toModelMessages(input, model) + // user + assistant(tool-call) + tool-result = 3 messages + expect(result.length).toBe(3) + type Part = { type: string; toolCallId?: string } + const toolCalls = (result[1].content as Part[]).filter((p) => p.type === "tool-call") + expect(toolCalls.length).toBe(1) + // The normal read tool should be present + expect(toolCalls[0].toolCallId).toBe("call-1") + }) + + test("handles mixed oc and non-oc parts correctly", async () => { + const userID = "m-user" + const assistantID = "m-assistant" + + const input: MessageV2.WithParts[] = [ + { + info: userInfo(userID), + parts: [{ ...basePart(userID, "u1"), type: "text", text: "batch" }] as MessageV2.Part[], + }, + { + info: assistantInfo(assistantID, userID), + parts: [ + // 3 oc parts + 1 normal part + { + ...basePart(assistantID, "oc1"), + type: "tool", + callID: "c1", + tool: "read", + metadata: { oc: true }, + state: { status: "completed", input: {}, output: "a", title: "", metadata: {}, time: { start: 0, end: 1 } }, + }, + { + ...basePart(assistantID, "oc2"), + type: "tool", + callID: "c2", + tool: "grep", + metadata: { oc: true }, + state: { status: "completed", input: {}, output: "b", title: "", metadata: {}, time: { start: 0, end: 1 } }, + }, + { + ...basePart(assistantID, "normal"), + type: "tool", + callID: "c3", + tool: "bash", + state: { + status: "completed", + input: { command: "ls" }, + output: "c", + title: "", + metadata: {}, + time: { start: 0, end: 1 }, + }, + }, + { + ...basePart(assistantID, "oc3"), + type: "tool", + callID: "c4", + tool: "task", + metadata: { oc: true }, + state: { status: "completed", input: {}, output: "d", title: "", metadata: {}, time: { start: 0, end: 1 } }, + }, + ] as MessageV2.Part[], + }, + ] + + const result = await MessageV2.toModelMessages(input, model) + // assistant message should only have the one normal tool-call + type Part = { type: string; toolCallId?: string } + const toolCalls = (result[1].content as Part[]).filter((p) => p.type === "tool-call") + expect(toolCalls.length).toBe(1) + expect(toolCalls[0].toolCallId).toBe("c3") + }) +}) diff --git a/packages/opencode/test/session/processor-effect.test.ts b/packages/opencode/test/session/processor-effect.test.ts index cd9d97e15fdd..37f07be94b3c 100644 --- a/packages/opencode/test/session/processor-effect.test.ts +++ b/packages/opencode/test/session/processor-effect.test.ts @@ -696,7 +696,6 @@ it.effect("session.processor effect tests mark pending tools as aborted on clean (dir) => Effect.gen(function* () { const ready = defer() - const seen = defer() const test = yield* TestLLM const processors = yield* SessionProcessor.Service const session = yield* Session.Service diff --git a/packages/opencode/test/tool/bash.test.ts b/packages/opencode/test/tool/bash.test.ts index 0ea8ea073a51..79d1a222b4a6 100644 --- a/packages/opencode/test/tool/bash.test.ts +++ b/packages/opencode/test/tool/bash.test.ts @@ -31,6 +31,12 @@ const bash = (() => { if (Shell.name(shell) === "bash") return shell return Shell.gitbash() })() + +// Shell that supports POSIX $VAR variable expansion. +// On Windows the acceptable shell is PowerShell which uses $env:VAR instead; +// fall back to Git Bash. On other platforms use the acceptable shell directly. +const posixShell = process.platform !== "win32" ? Shell.acceptable() : bash +const posixLabel = posixShell ? Shell.name(posixShell) : "" const shells = (() => { if (process.platform !== "win32") { const shell = Shell.acceptable() @@ -981,4 +987,233 @@ describe("tool.bash truncation", () => { }, }) }) + + test("does NOT truncate when oc command is detected", async () => { + await Instance.provide({ + directory: projectRoot, + fn: async () => { + const bash = await BashTool.init() + const lineCount = Truncate.MAX_LINES + 500 + const result = await bash.execute( + { + // oc in the command should disable truncation (even if oc fails) + command: `echo "start" && ${fill("lines", lineCount)} && oc --help 2>/dev/null; echo "end"`, + description: "Generate large output with oc command", + }, + ctx, + ) + // Should NOT be truncated because oc is detected + expect((result.metadata as { truncated?: boolean; noTruncate?: boolean }).truncated).toBeFalsy() + expect((result.metadata as { noTruncate?: boolean }).noTruncate).toBe(true) + // Output should contain start and end markers (not truncated) + expect(result.output).toContain("start") + expect(result.output).toContain("end") + // Should have all the lines (not truncated to 2000) + const lines = result.output.split("\n") + expect(lines.length).toBeGreaterThan(Truncate.MAX_LINES) + }, + }) + }) + + // oc env var injection + describe("oc env vars", () => { + if (posixShell) { + test( + "OPENCODE_SESSION_ID is injected into bash subprocess", + withShell({ label: posixLabel, shell: posixShell }, async () => { + await Instance.provide({ + directory: projectRoot, + fn: async () => { + const tool = await BashTool.init() + const result = await tool.execute( + { command: 'echo "$OPENCODE_SESSION_ID"', timeout: 5000, description: "test" }, + ctx, + ) + expect(result.output.trim()).toBe("ses_test") + }, + }) + }), + ) + + test( + "OPENCODE_AGENT is injected into bash subprocess", + withShell({ label: posixLabel, shell: posixShell }, async () => { + await Instance.provide({ + directory: projectRoot, + fn: async () => { + const tool = await BashTool.init() + const result = await tool.execute( + { command: 'echo "$OPENCODE_AGENT"', timeout: 5000, description: "test" }, + ctx, + ) + expect(result.output.trim()).toBe("build") + }, + }) + }), + ) + + test( + "OPENCODE_MESSAGE_ID is injected into bash subprocess", + withShell({ label: posixLabel, shell: posixShell }, async () => { + await Instance.provide({ + directory: projectRoot, + fn: async () => { + const tool = await BashTool.init() + const ctxWithMsg = { ...ctx, messageID: MessageID.make("msg_test123") } + const result = await tool.execute( + { command: 'echo "$OPENCODE_MESSAGE_ID"', timeout: 5000, description: "test" }, + ctxWithMsg, + ) + expect(result.output.trim()).toBe("msg_test123") + }, + }) + }), + ) + } + + test("oc is on PATH inside bash subprocess", async () => { + await Instance.provide({ + directory: projectRoot, + fn: async () => { + const bash = await BashTool.init() + const result = await bash.execute({ command: "which oc", timeout: 5000, description: "test" }, ctx) + expect(result.output.trim()).toContain("bin/oc") + }, + }) + }) + + if (posixShell) { + test( + "OPENCODE_SERVER_URL is set (not empty) when Server.url exists", + withShell({ label: posixLabel, shell: posixShell }, async () => { + const { Server } = await import("../../src/server/server") + const prev = Server.url + Server.url = new URL("http://127.0.0.1:4096") + try { + await Instance.provide({ + directory: projectRoot, + fn: async () => { + const tool = await BashTool.init() + const result = await tool.execute( + { command: 'echo "$OPENCODE_SERVER_URL"', timeout: 5000, description: "test" }, + ctx, + ) + expect(result.output.trim()).toBe("http://127.0.0.1:4096/") + }, + }) + } finally { + Server.url = prev + } + }), + ) + } + + test("Server.url gets actual port, not 0, after listen()", async () => { + const { Server } = await import("../../src/server/server") + // When port 0 is requested, the OS assigns a random port. + // Server.url must reflect the actual port, not 0. + const prev = Server.url + try { + const server = Server.listen({ port: 0, hostname: "127.0.0.1" }) + expect(Server.url).toBeDefined() + expect(Server.url.port).not.toBe("0") + expect(parseInt(Server.url.port)).toBeGreaterThan(0) + await server.stop(true) + } finally { + Server.url = prev + } + }) + + if (posixShell) { + test( + "OPENCODE_SERVER_URL has no double slash when Server.url has trailing slash", + withShell({ label: posixLabel, shell: posixShell }, async () => { + const { Server } = await import("../../src/server/server") + const prev = Server.url + Server.url = new URL("http://127.0.0.1:4096/") + try { + await Instance.provide({ + directory: projectRoot, + fn: async () => { + const tool = await BashTool.init() + const result = await tool.execute( + { command: 'echo "$OPENCODE_SERVER_URL"', timeout: 5000, description: "test" }, + ctx, + ) + const url = result.output.trim() + expect(url).toBe("http://127.0.0.1:4096/") + // Simulate what bin/oc does: ${URL%/}/session/... + const stripped = url.replace(/\/$/, "") + const apiUrl = `${stripped}/session/test/tool` + expect(apiUrl).not.toContain("//session") + expect(apiUrl).toBe("http://127.0.0.1:4096/session/test/tool") + }, + }) + } finally { + Server.url = prev + } + }), + ) + } + + // ── no-timeout for oc scripts ────────────────── + test("regular command respects explicit timeout", async () => { + await Instance.provide({ + directory: projectRoot, + fn: async () => { + const bash = await BashTool.init() + // sleep 1s with 50ms timeout → the timer fires and kills it + const result = await bash.execute( + { command: "sleep 1", timeout: 50, description: "sleep that should time out" }, + ctx, + ) + expect(result.output).toContain("terminated command after exceeding timeout") + }, + }) + }) + + test("oc script ignores explicit timeout — Ralph loop can run without being killed", async () => { + await Instance.provide({ + directory: projectRoot, + fn: async () => { + const bash = await BashTool.init() + // while loop using oc — timeout: 50 would kill a normal command in ~150ms, + // but usesOc=true && hasWhileLoop forces timeout=0, so no timer is created + const result = await bash.execute( + { + command: "i=0; while [ $i -lt 1 ]; do sleep 0.2; oc tool read /dev/null; i=$((i+1)); done", + timeout: 50, + description: "oc while loop — timeout param must be ignored", + }, + ctx, + ) + // Process ran to completion (oc may fail on HTTP, but that is not a timeout kill) + expect(result.output).not.toContain("terminated command after exceeding timeout") + }, + }) + }) + + test("OPENCODE_SERVER_URL is empty when Server.url is undefined", async () => { + const { Server } = await import("../../src/server/server") + const prev = Server.url + // @ts-expect-error - simulate undefined + Server.url = undefined + try { + await Instance.provide({ + directory: projectRoot, + fn: async () => { + const bash = await BashTool.init() + const result = await bash.execute( + { command: 'echo "URL=$OPENCODE_SERVER_URL"', timeout: 5000, description: "test" }, + ctx, + ) + // Empty string — oc will error, which is the bug we fixed in thread.ts + expect(result.output.trim()).toBe("URL=") + }, + }) + } finally { + Server.url = prev + } + }) + }) }) diff --git a/packages/opencode/test/tool/dacmicu.test.ts b/packages/opencode/test/tool/dacmicu.test.ts new file mode 100644 index 000000000000..0166451715b4 --- /dev/null +++ b/packages/opencode/test/tool/dacmicu.test.ts @@ -0,0 +1,516 @@ +import { describe, test, expect } from "bun:test" +import { spawnSync } from "child_process" +import path from "path" +import { MessageV2 } from "../../src/session/message-v2" + +const OC_TS = path.resolve(import.meta.dirname, "../../bin/oc.ts") +const OC_SH = path.resolve(import.meta.dirname, "../../bin/oc") + +// Helper: spawn oc.ts with given args and env. +// Short timeouts ensure the process exits quickly when no server is running. +function oc(args: string[], env: Record = {}) { + return spawnSync("bun", ["run", OC_TS, ...args], { + env: { + ...process.env, + OPENCODE_SERVER_URL: "http://localhost:4096", + OPENCODE_SESSION_ID: "ses_test_session", + OPENCODE_AGENT: "build", + OPENCODE_QUIET: "1", // suppress auto-announce in tests + OPENCODE_EXEC_TIMEOUT_MS: "3000", // fail fast when no server running + OPENCODE_TOOL_TIMEOUT_MS: "3000", // same for tool paths + ...env, + }, + timeout: 10000, + }) +} + +// Helper: spawn shell wrapper +function ocsh(args: string[], env: Record = {}) { + return spawnSync(OC_SH, args, { + env: { + ...process.env, + OPENCODE_SERVER_URL: "http://localhost:4096", + OPENCODE_SESSION_ID: "ses_test_session", + OPENCODE_AGENT: "build", + OPENCODE_QUIET: "1", + OPENCODE_EXEC_TIMEOUT_MS: "3000", + OPENCODE_TOOL_TIMEOUT_MS: "3000", + PATH: `${path.dirname(OC_SH)}:${process.env.PATH}`, + ...env, + }, + timeout: 10000, + }) +} + +describe("oc CLI", () => { + // ── CLI Unit Tests ────────────────────────────────────────── + + describe("oc CLI — env validation", () => { + test("missing SERVER_URL errors", () => { + const result = spawnSync("bun", ["run", OC_TS, "--help"], { + env: { ...process.env, OPENCODE_SERVER_URL: "", OPENCODE_SESSION_ID: "x" }, + timeout: 5000, + }) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("OPENCODE_SERVER_URL") + }) + + test("missing SESSION_ID errors", () => { + const result = spawnSync("bun", ["run", OC_TS, "--help"], { + env: { ...process.env, OPENCODE_SERVER_URL: "http://x", OPENCODE_SESSION_ID: "" }, + timeout: 5000, + }) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("OPENCODE_SESSION_ID") + }) + }) + + describe("oc CLI — help", () => { + test("--help prints usage", () => { + const result = oc(["--help"]) + expect(result.status).toBe(0) + expect(result.stdout.toString()).toContain("oc prompt") + expect(result.stdout.toString()).toContain("oc tool") + }) + + test("help subcommand works", () => { + const result = oc(["help"]) + expect(result.status).toBe(0) + expect(result.stdout.toString()).toContain("DETERMINISTIC TOOLS") + }) + }) + + describe("oc CLI — unknown command", () => { + test("unknown command errors", () => { + const result = oc(["nonexistent"]) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("unknown command") + }) + }) + + describe("oc CLI — prompt subcommand", () => { + test("no text errors", () => { + const result = oc(["prompt"]) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("no prompt text") + }) + + test("prompt sends to /exec (fails on network, not parse)", () => { + const result = oc(["prompt", "hello"]) + expect(result.status).not.toBe(0) + // Fails on network (connection refused or timeout), not on argument parsing + expect(result.stderr.toString()).not.toContain("no prompt text") + }) + + test("-s flag parses system prompt", () => { + const result = oc(["prompt", "-s", "pirate", "hello"]) + expect(result.status).not.toBe(0) + // Should not fail on argument parsing — only on network + expect(result.stderr.toString()).not.toContain("no prompt text") + }) + + test("-m flag parses model", () => { + const result = oc(["prompt", "-m", "anthropic/claude", "hello"]) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).not.toContain("no prompt text") + }) + + test("bad model format errors", () => { + const result = oc(["prompt", "-m", "badmodel", "hello"]) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("provider/model") + }) + }) + + describe("oc CLI — tool subcommand", () => { + test("no tool name errors", () => { + const result = oc(["tool"]) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("no tool name") + }) + + test("read parses file path", () => { + const result = oc(["tool", "read", "src/index.ts"]) + expect(result.status).not.toBe(0) + // Fails on network, not argument parsing + expect(result.stderr.toString()).not.toContain("file path required") + }) + + test("glob parses pattern and path", () => { + const result = oc(["tool", "glob", "*.ts", "src"]) + expect(result.status).not.toBe(0) + // Fails on network, not argument parsing + expect(result.stderr.toString()).not.toContain("pattern required") + }) + + test("grep parses pattern and path", () => { + const result = oc(["tool", "grep", "TODO", "src/"]) + expect(result.status).not.toBe(0) + // Fails on network, not argument parsing + expect(result.stderr.toString()).not.toContain("pattern required") + }) + }) + + describe("oc CLI — todo subcommand", () => { + test("todo add without content errors", () => { + const result = oc(["todo", "add"]) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("no content") + }) + + test("todo done without index errors", () => { + const result = oc(["todo", "done"]) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("1-based index") + }) + + test("todo done 0 errors", () => { + const result = oc(["todo", "done", "0"]) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("1-based index") + }) + + test("todo unknown subcommand errors", () => { + const result = oc(["todo", "xyz"]) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("unknown") + }) + }) + + describe("oc CLI — agent subcommand", () => { + test("no type errors", () => { + const result = oc(["agent"]) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("usage") + }) + + test("no prompt errors", () => { + const result = oc(["agent", "explore"]) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("no prompt text") + }) + }) + + describe("oc CLI — status subcommand", () => { + test("status writes to stderr", () => { + const result = oc(["status", "Processing..."], { OPENCODE_QUIET: "0" }) + expect(result.status).toBe(0) + expect(result.stderr.toString()).toContain("Processing...") + }) + + test("status without message errors", () => { + const result = oc(["status"]) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("no message") + }) + }) + + describe("oc CLI — auto-announce", () => { + test("auto-announce writes dimmed text to stderr", () => { + // Use a 500ms exec timeout: enough for log() to fire, too short for network to succeed + const result = oc(["prompt", "hello"], { OPENCODE_QUIET: "0", OPENCODE_EXEC_TIMEOUT_MS: "500" }) + // log() fires BEFORE the API call, so [oc] appears even if network times out + const stderr = result.stderr.toString() + expect(stderr).toContain("[oc]") + expect(stderr).toContain("prompt") + }) + + test("OPENCODE_QUIET=1 suppresses announce", () => { + // Use a 500ms exec timeout for speed + const result = oc(["prompt", "hello"], { OPENCODE_QUIET: "1", OPENCODE_EXEC_TIMEOUT_MS: "500" }) + const stderr = result.stderr.toString() + expect(stderr).not.toContain("[oc]") + }) + }) + + describe("oc CLI — connection error handling", () => { + test("connection error gives user-friendly error (no stack traces)", () => { + const result = oc(["tool", "read", "src/index.ts"]) + expect(result.status).not.toBe(0) + // Should NOT contain raw stack traces regardless of error type + expect(result.stderr.toString()).not.toContain(" at ") + }) + }) + + // ── Binary File Protocol Tests ────────────────────────────── + + describe("binary file protocol", () => { + const OC_FILE_MARKER = "\x00OC_FILE\x00:" + + test("detects marker in text", () => { + const input = `PDF read successfully\n${OC_FILE_MARKER}/path/to/file.pdf` + const lines = input.split("\n") + const files: string[] = [] + const text: string[] = [] + for (const line of lines) { + if (line.startsWith(OC_FILE_MARKER)) files.push(line.substring(OC_FILE_MARKER.length)) + else text.push(line) + } + expect(files).toEqual(["/path/to/file.pdf"]) + expect(text).toEqual(["PDF read successfully"]) + }) + + test("no marker = plain text", () => { + const input = "Just regular text\nAnother line" + const lines = input.split("\n") + const files: string[] = [] + for (const line of lines) { + if (line.startsWith(OC_FILE_MARKER)) files.push(line.substring(OC_FILE_MARKER.length)) + } + expect(files).toEqual([]) + }) + + test("multiple markers extracted", () => { + const input = `text\n${OC_FILE_MARKER}/a.pdf\nmore\n${OC_FILE_MARKER}/b.png` + const files: string[] = [] + for (const line of input.split("\n")) { + if (line.startsWith(OC_FILE_MARKER)) files.push(line.substring(OC_FILE_MARKER.length)) + } + expect(files).toEqual(["/a.pdf", "/b.png"]) + }) + + test("marker cannot appear in normal text", () => { + // Null bytes can't be typed or appear in normal text streams + const text = "__oc_file__:/path/to/file.pdf" + expect(text.startsWith(OC_FILE_MARKER)).toBe(false) + }) + }) + + // ── Shell Fast-Path Tests ────────────────────────────────── + // bin/oc is a POSIX sh script — only runnable on non-Windows platforms. + if (process.platform !== "win32") { + describe("shell fast-path", () => { + test("bin/oc is executable", () => { + const result = ocsh(["help"]) + expect(result.status).toBe(0) + expect(result.stdout.toString()).toContain("oc") + }) + + test("bin/oc routes tool to fast path (if jq available)", () => { + const result = ocsh(["tool", "read", "/tmp/test.ts"]) + // curl may succeed (HTTP 200 with error body) or fail — either way it shouldn't crash on routing + const output = result.stdout.toString() + result.stderr.toString() + // Should not contain shell syntax errors + expect(output).not.toContain("syntax error") + }) + }) + } + + // ── Data Integrity Tests ────────────────────────────────── + + describe("toModelMessages filter", () => { + test("oc-flagged ToolPart is excluded from toModelMessages — guard present in message-v2", async () => { + // The oc metadata flag must prevent tool parts from being sent to the LLM, + // since there is no corresponding tool_use from the model. + // Verify the guard exists at the exact code path before toolNames.add(). + const src = await Bun.file(path.resolve(import.meta.dirname, "../../src/session/message-v2.ts")).text() + // Guard must appear before toolNames.add() — both patterns must be present + expect(src).toContain("part.metadata?.oc") + expect(src).toContain("toolNames.add") + // The skip comment must also be present (documents why the part is excluded) + expect(src).toContain("Skip oc-injected Part") + }) + + test("non-oc ToolPart does not carry oc marker", () => { + // Standard tool parts have no oc marker — guard must not fire for normal parts + const part = { type: "tool" as const, metadata: undefined } + expect((part.metadata as { oc?: boolean } | undefined)?.oc).toBeUndefined() + }) + }) + + // ── oc check Tests ───────────────────────────────────── + + describe("oc check subcommand", () => { + test("no question errors", () => { + const result = oc(["check"]) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("no question") + }) + + test("sends to /exec (fails on HTTP, not argument parse)", () => { + const result = oc(["check", "Are tests passing?"]) + // Should fail on HTTP connection, not argument parsing + expect(result.stderr.toString()).not.toContain("no question") + }) + + test("help text includes oc check with grep pattern", () => { + const result = oc(["help"]) + expect(result.stdout.toString()).toContain("oc check") + expect(result.stdout.toString()).toContain("grep pattern") + expect(result.stdout.toString()).toContain("while") + }) + + test("help text describes oc", () => { + const result = oc(["help"]) + expect(result.stdout.toString()).toContain("openCode CLI") + }) + }) + + // ── oc check sentinel detection ───────────────────────── + + describe("oc check sentinel detection", () => { + const SENTINEL = "NO_ISSUES_FOUND" + + function checkResult(response: string) { + const trimmed = response.trim() + const clean = trimmed.includes(SENTINEL) + return { clean, assessment: clean ? "" : trimmed } + } + + test("response ending with sentinel → clean", () => { + const { clean } = checkResult("Everything looks good.\nNO_ISSUES_FOUND") + expect(clean).toBe(true) + }) + + test("sentinel only → clean", () => { + const { clean } = checkResult("NO_ISSUES_FOUND") + expect(clean).toBe(true) + }) + + test("sentinel with trailing whitespace → clean", () => { + const { clean } = checkResult("NO_ISSUES_FOUND \n") + expect(clean).toBe(true) + }) + + test("sentinel at start followed by summary → clean", () => { + const { clean } = checkResult("NO_ISSUES_FOUND\n\nReview Summary\n- All looks good") + expect(clean).toBe(true) + }) + + test("issues found → not clean (exit 0, loop continues)", () => { + const response = "Found 3 issues:\n1) unused import\n2) missing validation" + const { clean, assessment } = checkResult(response) + expect(clean).toBe(false) + expect(assessment).toContain("Found 3 issues") + }) + }) + + // ── Timeout Detection Tests ───────────────────────────── + // bash.ts uses tree-sitter AST to detect oc commands: the command name node + // must match /^(oc|\.\/oc)$/ exactly. Test that predicate directly. + describe("oc auto-timeout", () => { + test("isOc predicate matches oc and ./oc command names only", () => { + const isOc = (text: string) => /^(oc|\.\/oc)$/.test(text) + expect(isOc("oc")).toBe(true) + expect(isOc("./oc")).toBe(true) + expect(isOc("ocelot")).toBe(false) + expect(isOc("doc")).toBe(false) + expect(isOc("oc2")).toBe(false) + expect(isOc("bun")).toBe(false) + expect(isOc("")).toBe(false) + }) + }) + + // ── Regression Tests ────────────────────────────────────── + + describe("regression", () => { + test("OPENCODE_MESSAGE_ID env var is harmless when absent", () => { + const result = oc(["--help"], { OPENCODE_MESSAGE_ID: "" }) + expect(result.status).toBe(0) + }) + + test("empty prompt string is rejected", () => { + const result = oc(["prompt", ""]) + expect(result.status).not.toBe(0) + expect(result.stderr.toString()).toContain("no prompt text") + }) + + // oc check uses sentinel-based detection (NO_ISSUES_FOUND) instead of + // a separate followUp LLM call — simpler and faster (1 LLM call, not 2). + test("oc check appends sentinel instruction to prompt, not a followUp call", async () => { + // Verify sentinel is injected into the user prompt text (not a separate API call). + // The sentinel must appear inside the prompt string construction in `check()`, + // adjacent to the instruction line — not just as a loose string constant. + const src = await Bun.file(OC_TS).text() + // Sentinel must be embedded in the prompt array sent to the AI + expect(src).toContain("NO_ISSUES_FOUND") + expect(src).toContain("respond with exactly") + // Must NOT delegate to a separate followUp call (that would be 2 LLM calls) + expect(src).not.toMatch(/followUp:\s*\{/) + }) + + // Bug: function was renamed to "log" but call sites still used "announce" + test("oc.ts has no undefined function references", async () => { + const src = await Bun.file(OC_TS).text() + // Every function call should reference a defined function + // "announce(" should not appear unless "announce" is defined + const hasAnnounce = src.includes("announce(") + const definesAnnounce = /const announce|function announce/.test(src) + const hasLog = src.includes("log(") + const definesLog = /const log|function log/.test(src) + // Either announce is defined+used or log is defined+used, not mismatched + if (hasAnnounce) expect(definesAnnounce).toBe(true) + if (hasLog) expect(definesLog).toBe(true) + }) + }) +}) + +// ── SyncEvent Definition Regression Tests ──────────────────── + +describe("MessageV2 SyncEvent definitions", () => { + // Bug: MessageV2 events were defined with BusEvent.define (no aggregate/version) + // but passed to SyncEvent.run which requires these fields. + test("Updated event has aggregate and version", () => { + const def = MessageV2.Event.Updated + expect(def).toHaveProperty("aggregate", "sessionID") + expect(def).toHaveProperty("version") + expect(typeof def.version).toBe("number") + }) + + test("Updated event has busSchema", () => { + const def = MessageV2.Event.Updated + expect(def).toHaveProperty("properties") + }) + + test("PartUpdated event has aggregate and version", () => { + const def = MessageV2.Event.PartUpdated + expect(def).toHaveProperty("aggregate", "sessionID") + expect(def).toHaveProperty("version") + }) + + test("PartUpdated event has busSchema", () => { + const def = MessageV2.Event.PartUpdated + expect(def).toHaveProperty("properties") + }) + + test("Removed event has aggregate", () => { + expect(MessageV2.Event.Removed).toHaveProperty("aggregate", "sessionID") + }) + + test("PartRemoved event has aggregate", () => { + expect(MessageV2.Event.PartRemoved).toHaveProperty("aggregate", "sessionID") + }) + + // PartDelta stays as BusEvent — no aggregate needed + test("PartDelta is a BusEvent (no aggregate)", () => { + const def = MessageV2.Event.PartDelta + expect(def).toHaveProperty("type", "message.part.delta") + expect(def).not.toHaveProperty("aggregate") + }) +}) + +// ── Keepalive Protocol Tests ────────────────────────────────── + +describe("keepalive protocol", () => { + const KEEPALIVE = "\x00OC_KEEPALIVE\x00" + + test("keepalive marker is stripped from response", () => { + const body = `${KEEPALIVE}${KEEPALIVE}actual response` + expect(body.replaceAll(KEEPALIVE, "")).toBe("actual response") + }) + + test("keepalive marker does not conflict with OC_FILE marker", () => { + const ocFile = "\x00OC_FILE\x00:/path/to/file.pdf" + expect(ocFile.replaceAll(KEEPALIVE, "")).toBe(ocFile) + }) + + test("keepalive marker does not conflict with OC_TRUNCATED marker", () => { + const ocTrunc = "\x00OC_TRUNCATED\x00:Results limited to 100 items." + expect(ocTrunc.replaceAll(KEEPALIVE, "")).toBe(ocTrunc) + }) + + test("response without keepalive is unchanged", () => { + const body = "clean response\nwith newlines" + expect(body.replaceAll(KEEPALIVE, "")).toBe(body) + }) +}) diff --git a/packages/opencode/test/tool/ralph-loop-timeout.test.ts b/packages/opencode/test/tool/ralph-loop-timeout.test.ts new file mode 100644 index 000000000000..e8d474a17b8f --- /dev/null +++ b/packages/opencode/test/tool/ralph-loop-timeout.test.ts @@ -0,0 +1,98 @@ +import { describe, test, expect, beforeAll } from "bun:test" +import { Parser, Language } from "web-tree-sitter" +import path from "path" + +// Mirror the production DEFAULT_TIMEOUT from bash.ts (2 minutes) +const DEFAULT_TIMEOUT = 2 * 60 * 1000 + +describe("Ralph Loop Timeout Prevention", () => { + let parser: Parser + + beforeAll(async () => { + // Provide locateFile so Parser.init() finds tree-sitter.wasm on all platforms + // (mirrors the production bash.ts initialization pattern) + const treeWasm = path.resolve(import.meta.dirname, "../../node_modules/web-tree-sitter/tree-sitter.wasm") + await Parser.init({ locateFile: () => treeWasm }) + const wasmPath = path.resolve(import.meta.dirname, "../../node_modules/tree-sitter-bash/tree-sitter-bash.wasm") + const bashLanguage = await Language.load(wasmPath) + parser = new Parser() + parser.setLanguage(bashLanguage) + }) + + function analyzeTimeout(command: string) { + const tree = parser.parse(command) + expect(tree).not.toBeNull() + + const isOc = (text: string) => /^(oc|\.\/oc)$/.test(text) + const usesOc = tree!.rootNode.descendantsOfType("command").some((n) => { + if (!n) return false + const name = n.childForFieldName("name") ?? n.firstChild + return name !== null && isOc(name.text) + }) + const timeout = usesOc ? 0 : DEFAULT_TIMEOUT + + return { usesOc, timeout } + } + + test("bash tool should detect ANY oc command and set infinite timeout", () => { + const ralphLoop = `while oc check "find and fix issues"; do + oc status "round complete" + done` + + const { usesOc, timeout } = analyzeTimeout(ralphLoop) + + expect(usesOc).toBe(true) + expect(timeout).toBe(0) // infinite timeout - ANY oc command + }) + + test("regular bash commands should have normal timeout", () => { + const regularCommand = "echo 'hello' && ls -la" + const { usesOc, timeout } = analyzeTimeout(regularCommand) + + expect(usesOc).toBe(false) + expect(timeout).toBe(DEFAULT_TIMEOUT) // normal 2 minute timeout + }) + + test("oc without while should also have infinite timeout", () => { + const ocWithoutLoop = "oc check 'single check' && echo done" + const { usesOc, timeout } = analyzeTimeout(ocWithoutLoop) + + expect(usesOc).toBe(true) + expect(timeout).toBe(0) // infinite timeout - ANY oc can be long-running + }) + + test("while without oc should have normal timeout", () => { + const whileWithoutOc = `while true; do + echo "running..." + sleep 1 + done` + const { usesOc, timeout } = analyzeTimeout(whileWithoutOc) + + expect(usesOc).toBe(false) + expect(timeout).toBe(DEFAULT_TIMEOUT) // normal timeout because no oc + }) + + test("complex Ralph loop should be detected", () => { + const complexRalph = ` + echo "Starting quality analysis..." + while oc check "DO THE ENTIRE CODE QUALITY ANALYSIS: find bugs, fix them, commit"; do + oc status "Issues found and fixed, checking again..." + git log --oneline -1 + done + echo "Analysis complete!"` + const { usesOc, timeout } = analyzeTimeout(complexRalph) + + expect(usesOc).toBe(true) + expect(timeout).toBe(0) // infinite timeout for ANY oc command + }) + + test("./oc variant should also be detected", () => { + const dotSlashOc = `while ./oc check "test"; do + echo "loop" + done` + const { usesOc, timeout } = analyzeTimeout(dotSlashOc) + + expect(usesOc).toBe(true) + expect(timeout).toBe(0) // infinite timeout for ANY oc command + }) +}) From 8e288cff04fa21a12d41fe2ec9b8b0ba3754512e Mon Sep 17 00:00:00 2001 From: micu Date: Mon, 30 Mar 2026 08:35:00 +0200 Subject: [PATCH 4/4] docs(DACMICU): specification and eval scripts - Add specs/DACMICU.md: full design document covering the four pillars (Manus CLI, deterministic split, Ralph Loop, Fabric composition), architecture, competitive landscape, and adoption strategy - Add test/eval/dacmicu-adoption.sh: measures LLM adoption rate across 5 prompt patterns with scoring (0/1/2 per prompt) - Add test/eval/dacmicu-fast.sh: lightweight 3-prompt eval for quick iteration testing --- packages/opencode/specs/DACMICU.md | 371 ++++++++++++++++++ .../opencode/test/eval/dacmicu-adoption.sh | 204 ++++++++++ packages/opencode/test/eval/dacmicu-fast.sh | 87 ++++ 3 files changed, 662 insertions(+) create mode 100644 packages/opencode/specs/DACMICU.md create mode 100755 packages/opencode/test/eval/dacmicu-adoption.sh create mode 100755 packages/opencode/test/eval/dacmicu-fast.sh diff --git a/packages/opencode/specs/DACMICU.md b/packages/opencode/specs/DACMICU.md new file mode 100644 index 000000000000..8b4f9100d80b --- /dev/null +++ b/packages/opencode/specs/DACMICU.md @@ -0,0 +1,371 @@ +# DACMICU — Deterministic Agent Callbacks + +> **Deterministic Agent Callbacks — Model-Initiated Control Umbrella** +> +> The LLM writes the program that orchestrates itself — deterministic code skeleton + AI judgment at decision points + UNIX pipe composition + callback into running instance. + +| | | +| ------------- | ----------------------------------------------------------------------- | +| **Command** | `oc` | +| **Endpoints** | `POST /session/:id/exec` (AI), `POST /session/:id/tool` (deterministic) | + +--- + +# TABLE OF CONTENTS + +- **Part I — Vision**: [1. Four Pillars](#1-the-four-pillars) · [2. The Synthesis](#2-the-synthesis) · [3. Ecosystem Integration](#3-ecosystem-integration) +- **Part II — Implementation**: [4. What We Built](#4-what-we-built) · [5. Architecture](#5-architecture) · [6. Design Decisions](#6-design-decisions) +- **Part III — Context**: [7. Competitive Landscape](#7-competitive-landscape) +- **Part IV — Adoption**: [8. Adoption & TUI Visibility](#8-adoption--tui-visibility) + +--- + +# PART I — VISION + +## 1. The Four Pillars + +DACMICU is built on four converging insights from industry leaders who independently arrived at the same conclusion: **stop fighting LLM non-determinism — separate what code does well from what AI does well.** + +### Pillar 1: The Manus Insight — Everything Is Text + +A [former backend lead at Manus](https://coding4food.com/en/post/ex-manus-lead-drops-function-calling-for-unix-cli-ai-agents) (1600+ upvotes on r/LocalLLaMA) revealed that after 2 years building production agents, they **stopped using function calling entirely**. Replacement: a single `run(command="...")` surface backed by Unix CLI. + +> "50 years ago, Unix creators made a core decision: everything is a text stream. LLMs think in text, act in text, consume text." + +**Key insights:** + +- **Tool sprawl kills agents**: "Once an agent has to choose between twenty, forty, or eighty overlapping tools, the very act of choosing becomes part of the failure surface." +- **Models are already CLI experts**: trained on billions of shell commands from GitHub/StackOverflow — "it's already the ultimate terminal power user." +- **Progressive help discovery**: don't bloat system prompts; commands self-document via help text. +- **Error messages must include fixes**: not just "error" but `[error] binary image. Use: see photo.png`. +- **Compact grammar wins**: one pipeline composes what would take multiple tool calls. + +**How DACMICU extends this:** `oc` IS the Manus approach, but deeper. Manus has `run(command)`. We have `run(command that calls back to the AI)`. The LLM writes shell pipelines that include AI judgment at decision points. Manus stopped at CLI execution. We add the callback loop. + +### Pillar 2: Deterministic vs Non-Deterministic Separation + +**LLMs are unreliable at deterministic execution** — they lose count in loops, forget state across turns, apply transformations inconsistently, hallucinate completed actions. + +**Code is perfectly reliable at deterministic execution** — a for loop runs exactly N times, a variable holds state perfectly, a tool call applies exactly what's specified. + +**LLMs are brilliant at non-deterministic judgment** — "Is this buggy?", "What's the fix?", "Does this need migration?" + +**DACMICU makes this separation concrete in the API:** + +- `oc tool edit/write/read` → deterministic, no LLM, reliable, fast +- `oc prompt "question"` → non-deterministic, LLM judgment, where AI excels + +This is not just cheaper — **it is more reliable.** + +**Validated by Anthropic's PTC:** 37% token reduction, accuracy improvements (46.5% → 51.2%) with the same thesis. But PTC is API-only/cloud-only/Claude-only. DACMICU is CLI-native, local, and model-agnostic. + +### Pillar 3: The Ralph Loop Philosophy + +The [Ralph Loop](https://ghuntley.com/loop/) (Geoffrey Huntley, 2024) is the most influential pattern in agentic coding for 2025-2026. Its philosophy embraces LLM fallibility: + +> "The technique is deterministically bad in an undeterministic world." + +**Core principles:** + +- **Code as state**: Progress lives in files and git history, not LLM context +- **Machine-verifiable criteria**: Success = tests pass, not "the LLM says it's done" +- **Fresh context rotation**: Each iteration starts clean, avoiding context pollution +- **External verification**: The loop checks objective reality (test results, file state), not LLM self-assessment +- **Monolithic simplicity**: One task per loop, not complex multi-agent choreography + +**How DACMICU enables Ralph Loops:** + +```bash +# A Ralph Loop, generated by the LLM, running inside openCode: +for attempt in $(seq 1 20); do + output=$(npm test 2>&1) + [ $? -eq 0 ] && echo "PASS after $attempt attempts" && break + echo "$output" | oc prompt "Fix these failures. Use oc tool edit to make changes." + # Each oc prompt = fresh child session = clean context + # Progress persists in files, not in LLM memory +done +``` + +**Key insight:** DACMICU makes the LLM the AUTHOR of Ralph Loops, not just the executor. The LLM designs the loop structure, chooses verification criteria, and decides when to use fresh context — all expressed in deterministic bash. + +### Pillar 4: The Fabric Pattern — AI in the UNIX Pipe + +[Fabric](https://github.com/danielmiessler/Fabric) (Daniel Miessler, 40k+ stars) proves the AI-in-the-pipeline model works: + +```bash +yt --transcript https://youtu.be/... | fabric -sp extract_wisdom +``` + +**Core principles:** + +- **AI as a UNIX pipe**: text in, text out, composable +- **Patterns as reusable prompts**: task-specific AI calls, not monolithic conversations +- **CLI-first**: the command line is the integration layer +- **"AI isn't a thing; it's a magnifier of a thing"** + +**How DACMICU is Fabric-for-agents:** Fabric gives HUMANS a way to pipe data through AI. DACMICU gives the AI AGENT itself the same power: + +```bash +# Fabric-style patterns, but the LLM writes them at runtime: +cat src/auth.ts | oc prompt -s "Security auditor. OWASP Top 10." "Audit this" +git diff HEAD~5 | oc prompt "Summarize changes for release notes" +find src/ -name "*.ts" | while read f; do + cat "$f" | oc prompt -s "Code reviewer" "Rate complexity 1-10. Just the number." +done | sort -rn | head -10 +``` + +--- + +## 2. The Synthesis + +DACMICU = **Manus CLI philosophy** + **deterministic/non-deterministic split** + **Ralph Loop engine** + **Fabric composition** + **openCode tool ecosystem** + +| Concept | Manus CLI | Ralph Loop | Fabric | DACMICU | +| -------------- | ------------------ | ------------------- | -------------------- | ----------------------- | +| Core idea | Everything is text | Embrace fallibility | AI in the pipe | **All four** | +| Who writes it? | Human configures | Human writes loop | Human writes pipe | **The LLM** | +| AI interface | `run(command)` | External LLM call | `fabric -sp pattern` | `oc prompt` + `oc tool` | +| Determinism | Shell commands | bash + tests | UNIX pipes | `oc tool` + bash | +| Verification | Exit codes | Tests, git | Human reads | `oc tool` + tests | +| Context mgmt | Single run | Fresh rotation | Stateless pipe | Fresh child session | + +**The LLM becomes a meta-programmer**: it writes the program that orchestrates itself. It has the Manus insight (CLI over function calling), the Ralph Loop pattern (retry until verified), Fabric's composition (pipe through AI), and openCode's full tool ecosystem. + +--- + +## 3. Ecosystem Integration + +The `oc` command gives the LLM's scripts access to the full openCode ecosystem — subagents, todos, all tools, dynamic specialists, and multi-model routing: + +```bash +# Subagent orchestration +analysis=$(oc agent explore "Find all files affected by this change") +echo "$analysis" | oc prompt "Create a test plan" + +# Dynamic specialists + multi-model cost optimization +oc prompt -s "Security auditor. OWASP Top 10." "Audit src/auth.ts" +cheap=$(oc prompt -m anthropic/claude-haiku-4-5-20251001 "Relevant? yes/no") + +# Python for computation (oc as subprocess) +python3 -c " +import subprocess, json +r = subprocess.run(['oc', 'tool', 'read', 'data.json'], capture_output=True, text=True) +data = json.loads(r.stdout) +for item in data: item['expected'] = item['value'] * 2 + 1 +print(json.dumps(data)) +" +``` + +--- + +# PART II — IMPLEMENTATION + +## 4. What We Built + +### Server Endpoints + +| Endpoint | Purpose | LLM? | Request | Response | +| ------------------------ | --------------------------- | ---- | ----------------------------------- | --------------------- | +| `POST /session/:id/exec` | AI judgment (child session) | YES | `{prompt, system?, agent?, model?}` | Plain text (streamed) | +| `POST /session/:id/tool` | Direct tool execution | NO | `{name, args}` | Plain text (streamed) | + +Both return 404 when the feature flag is disabled. + +### The `oc` Command Reference + +``` +NON-DETERMINISTIC (AI judgment — expensive, where AI excels): + oc prompt "question" AI response on stdout + oc prompt -s "system" "question" Dynamic specialist + oc prompt -m provider/model "question" Specific model + cat file | oc prompt "analyze" Context from stdin + oc agent explore "task" Spawn subagent + +DETERMINISTIC (reliable, no LLM — cheap, fast, exact): + oc tool read Read → stdout + echo "content" | oc tool write Write stdin → file + oc tool edit --old "x" --new "y" Edit file + oc tool grep "pattern" [path] Search → stdout + oc tool glob "pattern" Find files → stdout + +STATE (direct DB): + oc todo add "task" Add todo + oc todo list List todos +``` + +--- + +## 5. Architecture + +### Data Flow + +``` +openCode instance (TUI active, server on :4096) +│ +├─ Session: ses_abc123 +│ └─ LLM calls bash tool with a script (Ralph Loop, pipeline, etc.) +│ │ +│ │ bash.ts injects: OPENCODE_SERVER_URL, OPENCODE_SESSION_ID, PATH +│ │ (when Server.url exists) +│ │ +│ ▼ +│ ┌─ bash script (deterministic skeleton) ────────────┐ +│ │ │ +│ │ files=$(grep -rl "TODO" src/) ← native │ +│ │ content=$(oc tool read "$f") ← HTTP →────┼→ POST /session/:id/tool +│ │ no LLM │ Tool.execute() directly +│ │ fix=$(echo "$content" | oc prompt "Fix") ← HTTP →─┼→ POST /session/:id/exec +│ │ LLM thinks │ Creates child session +│ │ oc todo done 1 ← HTTP →────┼→ Todo CRUD (no LLM) +│ │ │ +│ └─────────────────────────────────────────────────────┘ +│ +│ Everything routes through the SAME server instance. +│ TUI shows: bash streaming + child sessions + tool calls. +│ Abort propagation: HTTP disconnect → SessionPrompt.cancel(child) +``` + +### How Each Piece Works + +**bash.ts** injects `OPENCODE_SERVER_URL`, `OPENCODE_SESSION_ID`, `OPENCODE_MESSAGE_ID`, and `bin/` on PATH into the subprocess environment when `Server.url` exists. Any script containing `oc` commands gets timeout disabled (DACMICU mode). + +**POST /session/:id/exec** (AI callback): validates parent session, creates child session with `parentID`, registers abort handler (`c.req.raw.signal → SessionPrompt.cancel(child)`), calls `SessionPrompt.prompt()`, streams last text part as `text/plain`. + +**POST /session/:id/tool** (direct execution): looks up tool via `ToolRegistry.get(name)`, creates `Tool.Context` with merged permissions (agent + session), calls `tool.execute(args, ctx)` directly — no LLM. Streams output as `text/plain`. + +**bin/oc** is a POSIX sh wrapper that delegates to `bin/oc.ts` (TypeScript). The shell layer handles argument routing; the TS layer handles HTTP, JSON encoding, file attachments, and structured output parsing. + +--- + +## 6. Design Decisions + +| Decision | Rationale | +| -------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **Bash** (not TS/Python) | UNIX native, zero new dependencies. LLMs fluent (billions of examples). openCode has no Python dependency; a new TS tool would add tool overhead. Manus lead validated: "it's already the ultimate terminal power user." | +| **HTTP into running instance** | Manus proved CLI execution works; DACMICU extends this with callbacks INTO the AI mid-script. No new processes. Instant. Integrated. One TUI. | +| **`oc` as a UNIX command** | Text in, text out, composable via pipes. Aligns with Manus (CLI-first), Fabric (AI in the pipe), and Ralph Loop (deterministic skeleton) simultaneously. | +| **Two endpoints** (`/exec` + `/tool`) | The thesis as API: non-deterministic vs deterministic. One endpoint would conflate them. | +| **No new tool** | Bash tool already integrated (spawn, stream, timeout, abort, permissions). Just needed env vars + PATH for callbacks. | +| **Permission merge** (agent + session) | `/tool` endpoint must match normal agent loop behavior. Without it, `oc tool read/write/grep` blocked. | +| **Prompt in environment()** | All models see it (not just Claude). Flag-gated. One place. | +| **Abort propagation** | HTTP disconnect → `SessionPrompt.cancel(child)`. No orphaned sessions burning tokens. | +| **Trailing slash strip** | `Server.url.toString()` includes `/`. Without strip, double-slash routes to web UI. | + +--- + +# PART III — CONTEXT + +## 7. Competitive Landscape + +### Position Map + +``` + LLM generates User writes No orchestration + at runtime (static) + ────────────── ────────────── ────────────── +API: Anthropic PTC — Standard tool use + +CLI: ★ DACMICU ★ Claude-Pipeline Claude Code + (bash + oc) Lobster/OpenClaw Pi (no callbacks) + Fabric (patterns) Ralph (bash loop) + Manus (CLI exec) + +IDE: — Kiro hooks Cursor, Windsurf + +Cloud: — Composio Devin +``` + +### Competitive Comparison + +| Agent | Approach | What DACMICU adds | +| --------------------- | ------------------------------------------------------------- | -------------------------------------------- | +| **Manus** | Single `run(command)` surface, CLI over function calling | Callbacks INTO the agent mid-execution | +| **Anthropic PTC** | Python code orchestrates tools in cloud sandbox. 37% savings. | CLI-native. Model-agnostic. Local. | +| **Ralph Loop** | Infinite retry, fresh context, file-as-state | LLM WRITES the loop. AI judgment inside. | +| **Fabric** | AI patterns via UNIX pipes, 40k+ stars | Agent writes patterns at runtime. | +| **Pi** | 4 tools, <1000 token prompts, bash-first, self-extending | Callback mechanism. Mid-script AI judgment. | +| **Claude Code hooks** | PreToolUse/PostToolUse deterministic triggers | Dynamic, not static. LLM designs the flow. | +| **OpenClaw Lobster** | Deterministic YAML workflows | Generated at runtime, not user-written YAML. | +| **Kiro** | Spec-driven, event hooks | Hooks static, user-defined. | +| **Devin** | Managed VMs, compound AI | No deterministic/non-deterministic split. | + +--- + +# PART IV — ADOPTION & VISIBILITY + +## 8. Adoption & TUI Visibility + +Focus: get LLMs to **spontaneously choose** oc scripts, make oc activity **visible in the TUI**, and fix real-world issues found during testing. + +### 8.1 System Prompt Evolution + +Rewrote `` → `` with evidence-based approach: + +| Technique | Source | Implementation | +| ------------------------------- | --------------------- | ----------------------------------------------------------------------------- | +| Positive framing ("superpower") | Amatriain 2024 | Opening line of `` | +| Trigger keywords | U-shaped positioning | `` section: "until it passes", "every file", "summarize all" | +| XML structure | Claude best practices | ``, ``, ``, ``, `` | +| Graded examples | Tang et al. 2025 | Ralph Loop → Batch → Pipeline → Map-reduce | +| Test-first pattern | Real-world failure | "Solve ONE item first, verify, THEN scale" | +| Concurrency guidance | openCode limits | Max 5 parallel oc prompt, `while IFS= read -r` for safe paths | + +**Eval results** (fast eval, 3 read-only prompts, no oc hints): + +- Claude Sonnet 4.6: ~90% adoption +- Kimi K2.5: 83% adoption +- Nemotron-3-super-free: 83% adoption + +### 8.2 TUI Visibility — oc Calls as First-Class Citizens + +**Problem**: `oc tool read` and `oc prompt` calls from bash scripts were invisible in the TUI. The user saw only a bash block with streaming text. + +**Solution**: The `/tool` and `/exec` endpoints now create real ToolParts when called from a bash context (opt-in via `OPENCODE_MESSAGE_ID`). + +| oc command | Visual in TUI | Mechanism | +| ------------------------- | ---------------------------------------- | ------------------------------------------------------------------ | +| `oc tool read file.ts` | `→ Read file.ts ✓` | ToolPart with `metadata.oc: true` on parent message | +| `oc tool grep "pat" src/` | `→ Grep "pat" ✓` | Same | +| `oc prompt "summarize"` | `⚙ task — oc prompt ✓` + streaming text | ToolPart with `tool: "task"`, `metadata.sessionId` → child session | +| `oc agent explore "task"` | Same as prompt | Same | +| `[oc] tool read file.ts` | Dimmed text in bash block | Auto-announce to stderr | + +**How it works**: + +1. `bash.ts` injects `OPENCODE_MESSAGE_ID` env var (the assistant message that owns the bash Part) +2. `bin/oc.ts` sends `messageID` in HTTP request body +3. Server creates running/completed/error ToolParts → `Bus.publish(PartUpdated)` → SSE → TUI +4. For `oc prompt`: subscribes to child session's `PartDelta` events, streams text to parent ToolPart +5. ToolParts marked with `metadata.oc: true` are filtered from `toModelMessages()` (no invalid tool_use/tool_result pairing) +6. Direct API callers (no messageID) are unaffected — exact same behavior as before + +**Subagent navigation**: `oc prompt` creates ToolParts with `tool: "task"` and `metadata.sessionId`, so the TUI shows "ctrl+x down — view subagents" and users can navigate into child sessions. + +### 8.3 Transparent PDF/Image Piping + +**Problem**: `oc tool read paper.pdf | oc prompt "summarize"` didn't work because the Read tool returns PDFs as multimodal attachments (base64), not text. The pipe only carried `"PDF read successfully"`. + +**Solution**: `__oc_file__:` protocol between `/tool` and `oc prompt`: + +1. `/tool` endpoint appends `\n__oc_file__:/absolute/path/to/file.pdf` when result has attachments +2. `oc prompt` detects `__oc_file__:` markers in stdin, reads the file, sends as multimodal attachment +3. Child session LLM sees the actual PDF natively + +```bash +# This now Just Works: +oc tool read paper.pdf | oc prompt "Summarize this paper" +``` + +### 8.4 Performance Optimizations + +**Compiled native binary**: `bin/oc-native` via `bun build --compile --no-compile-autoload-bunfig` + +- Startup: ~40ms (compiled) vs ~190ms (interpreted) = **5× faster** +- `bin/oc` shell wrapper uses `oc-native` if present, falls back to `bun run oc.ts` +- 100 tool calls: ~4s (compiled) vs ~19s (interpreted) + +**O(1) Todo.add()**: Changed from delete-all + insert-all (O(N²)) to direct INSERT. 100 todos: milliseconds instead of seconds. + +**Auto-announce**: Every oc call writes dimmed `[oc] tool read src/...` to stderr, visible in TUI bash block in real-time. No LLM cooperation needed. + +**Model inheritance**: `/exec` endpoint inherits parent session's model when none specified. Fixes silent failures when TUI uses a different model than config default. diff --git a/packages/opencode/test/eval/dacmicu-adoption.sh b/packages/opencode/test/eval/dacmicu-adoption.sh new file mode 100755 index 000000000000..9921d01dcb2f --- /dev/null +++ b/packages/opencode/test/eval/dacmicu-adoption.sh @@ -0,0 +1,204 @@ +#!/usr/bin/env bash +# DACMICU Adoption Eval +# Measures whether the LLM chooses bash+oc scripts over individual tool calls +# for tasks that should naturally benefit from scripted approaches. +# +# Usage: ./test/eval/dacmicu-adoption.sh [--dry-run] +# +# Scoring: +# 2 = bash tool used with oc commands (full DACMICU) +# 1 = bash tool used without oc (partial) +# 0 = individual tool calls only (no DACMICU adoption) + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_DIR="$(cd "$SCRIPT_DIR/../.." && pwd)" +RESULTS_DIR="$SCRIPT_DIR/results" +TIMESTAMP=$(date +%Y%m%d_%H%M%S) +RUN_DIR="$RESULTS_DIR/$TIMESTAMP" +DRY_RUN="${1:-}" + +mkdir -p "$RUN_DIR" + +# --- Eval prompts --- +# Natural task descriptions. NO mention of oc, scripts, bash, MICU, or DAC. +# Each should trigger one of the 3 patterns if the system prompt works. + +PROMPTS=( + # Pattern 1 targets (Ralph Loop — retry/verify) + "Run the TypeScript type checker and fix any type errors you find. Keep going until it passes clean." + + # Pattern 2 targets (CLI Power — batch operations) + "Count the lines of code in each .ts file under src/tool/ and show me the top 10 largest files." + "List every file in src/session/ that imports from '../tool/tool'. Show the filename and the import line for each match." + + # Pattern 3 targets (Fabric — analysis pipelines) + "Read every file in src/server/routes/ and write a summary of all API endpoints to /tmp/api-endpoints.md" + "Find all TODO and FIXME comments in src/ and create a prioritized report grouped by category." +) + +PATTERN_NAMES=( + "Ralph Loop (retry/verify)" + "CLI Power (batch count)" + "CLI Power (batch search)" + "Fabric (map-reduce)" + "Fabric (pipeline)" +) + +# --- Scoring function --- +score_run() { + local json_file="$1" + + if [ ! -f "$json_file" ]; then + echo "0" + return + fi + + # Count bash tool calls that contain 'oc ' in the command + local bash_with_oc + bash_with_oc=$(jq -r ' + select(.type == "tool_use") + | select(.part.tool == "bash") + | select(.part.state.input.command | test("\\boc\\s+(tool|prompt|todo|agent)")) + ' "$json_file" 2>/dev/null | jq -s 'length') + + # Count bash tool calls total + local bash_total + bash_total=$(jq -r ' + select(.type == "tool_use") + | select(.part.tool == "bash") + ' "$json_file" 2>/dev/null | jq -s 'length') + + # Count non-bash tool calls (read, edit, grep, glob, write individually) + local individual_tools + individual_tools=$(jq -r ' + select(.type == "tool_use") + | select(.part.tool != "bash" and .part.tool != "todowrite" and .part.tool != "task") + ' "$json_file" 2>/dev/null | jq -s 'length') + + if [ "$bash_with_oc" -gt 0 ]; then + echo "2" # Full DACMICU + elif [ "$bash_total" -gt 0 ] && [ "$individual_tools" -le 2 ]; then + echo "1" # Bash but no oc + else + echo "0" # Individual tool calls + fi +} + +# --- Analyze a run's JSON for details --- +analyze_run() { + local json_file="$1" + + if [ ! -f "$json_file" ]; then + echo " (no output)" + return + fi + + echo " Tool calls:" + jq -r ' + select(.type == "tool_use") + | " \(.part.tool): \(.part.state.status // "?")" + ' "$json_file" 2>/dev/null || echo " (parse error)" + + # Show bash commands if any + local bash_cmds + bash_cmds=$(jq -r ' + select(.type == "tool_use") + | select(.part.tool == "bash") + | .part.state.input.command + ' "$json_file" 2>/dev/null) + + if [ -n "$bash_cmds" ]; then + echo " Bash commands:" + echo "$bash_cmds" | head -5 | sed 's/^/ /' + local total_lines + total_lines=$(echo "$bash_cmds" | wc -l) + if [ "$total_lines" -gt 5 ]; then + echo " ... ($total_lines total)" + fi + + # Check for oc usage + if echo "$bash_cmds" | grep -q '\boc\s\+\(tool\|prompt\|todo\|agent\)'; then + echo " oc usage: YES" + else + echo " oc usage: NO" + fi + fi +} + +# --- Main --- +echo "=== DACMICU Adoption Eval ===" +echo "Run: $TIMESTAMP" +echo "Results: $RUN_DIR" +echo "" + +if [ "$DRY_RUN" = "--dry-run" ]; then + echo "[DRY RUN] Would execute ${#PROMPTS[@]} prompts:" + for i in "${!PROMPTS[@]}"; do + echo " $((i+1)). [${PATTERN_NAMES[$i]}] ${PROMPTS[$i]:0:80}..." + done + exit 0 +fi + +total_score=0 +max_score=$((${#PROMPTS[@]} * 2)) + +for i in "${!PROMPTS[@]}"; do + prompt="${PROMPTS[$i]}" + pattern="${PATTERN_NAMES[$i]}" + n=$((i+1)) + outfile="$RUN_DIR/prompt_${n}.jsonl" + + echo "--- Prompt $n/${#PROMPTS[@]}: $pattern ---" + echo " \"${prompt:0:80}...\"" + echo " Running..." + + # Run opencode with JSON format, capture all events + # Uses the model configured in opencode.json + bun run --conditions=browser "$PROJECT_DIR/src/index.ts" run --format json \ + "$prompt" > "$outfile" 2>"$RUN_DIR/prompt_${n}.stderr" || true + + score=$(score_run "$outfile") + total_score=$((total_score + score)) + + case $score in + 2) label="FULL DACMICU" ;; + 1) label="PARTIAL (bash, no oc)" ;; + 0) label="NO ADOPTION (individual tools)" ;; + esac + + echo " Score: $score/2 — $label" + analyze_run "$outfile" + echo "" +done + +# --- Summary --- +echo "=== SUMMARY ===" +echo "Total: $total_score / $max_score" +pct=$((total_score * 100 / max_score)) +echo "Adoption rate: ${pct}%" +echo "" + +echo "Per-prompt scores:" +for i in "${!PROMPTS[@]}"; do + outfile="$RUN_DIR/prompt_$((i+1)).jsonl" + score=$(score_run "$outfile") + echo " $((i+1)). [$score/2] ${PATTERN_NAMES[$i]}" +done + +# Save summary +cat > "$RUN_DIR/summary.txt" </dev/null | jq -s 'length') + + local bash_total + bash_total=$(jq -r 'select(.type == "tool_use") | select(.part.tool == "bash")' "$json_file" 2>/dev/null | jq -s 'length') + + local individual_tools + individual_tools=$(jq -r 'select(.type == "tool_use") | select(.part.tool != "bash" and .part.tool != "todowrite" and .part.tool != "task")' "$json_file" 2>/dev/null | jq -s 'length') + + if [ "$bash_with_oc" -gt 0 ]; then + echo "2" + elif [ "$bash_total" -gt 0 ] && [ "$individual_tools" -le 2 ]; then + echo "1" + else + echo "0" + fi +} + +echo "=== DACMICU Fast Eval ===" +echo "Run: $TIMESTAMP" +echo "" + +total_score=0 +max_score=$((${#PROMPTS[@]} * 2)) + +for i in "${!PROMPTS[@]}"; do + prompt="${PROMPTS[$i]}" + pattern="${PATTERN_NAMES[$i]}" + n=$((i+1)) + outfile="$RUN_DIR/prompt_${n}.jsonl" + + echo -n " $n. [$pattern] " + + bun run --conditions=browser "$PROJECT_DIR/src/index.ts" run --format json \ + "$prompt" > "$outfile" 2>"$RUN_DIR/prompt_${n}.stderr" || true + + score=$(score_run "$outfile") + total_score=$((total_score + score)) + + # Show bash commands if any + bash_cmd=$(jq -r 'select(.type == "tool_use") | select(.part.tool == "bash") | .part.state.input.command' "$outfile" 2>/dev/null | head -1) + has_oc=$(echo "$bash_cmd" | grep -c 'oc ' 2>/dev/null || echo "0") + + case $score in + 2) echo "FULL oc ✓ ${bash_cmd:0:80}" ;; + 1) echo "bash only ${bash_cmd:0:80}" ;; + 0) echo "no oc ✗" ;; + esac +done + +echo "" +echo "Score: $total_score / $max_score ($(( total_score * 100 / max_score ))%)" +echo "Results: $RUN_DIR/"