diff --git a/README.md b/README.md index cb46a24..4261867 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ Automatically reduces token usage in OpenCode by removing obsolete tool outputs from conversation history. -![DCP in action](dcp-demo3.png) +![DCP in action](dcp-demo5.png) ## Installation @@ -19,8 +19,6 @@ Add to your OpenCode config: Using `@latest` ensures you always get the newest version automatically when OpenCode starts. -> **Note:** If you use OAuth plugins (e.g., for Google or other services), place this plugin last in your `plugin` array to avoid interfering with their authentication flows. - Restart OpenCode. The plugin will automatically start optimizing your sessions. ## How Pruning Works @@ -49,6 +47,8 @@ LLM providers like Anthropic and OpenAI cache prompts based on exact prefix matc **Trade-off:** You lose some cache read benefits but gain larger token savings from reduced context size and performance improvements through reduced context poisoning. In most cases, the token savings outweigh the cache miss cost—especially in long sessions where context bloat becomes significant. +> **Note:** In testing, cache hit rates were approximately 65% with DCP enabled vs 85% without. + **Best use case:** Providers that count usage in requests, such as Github Copilot and Google Antigravity have no negative price impact. ## Configuration diff --git a/dcp-demo4.png b/dcp-demo4.png new file mode 100644 index 0000000..c2baf5f Binary files /dev/null and b/dcp-demo4.png differ diff --git a/dcp-demo5.png b/dcp-demo5.png new file mode 100644 index 0000000..a86e035 Binary files /dev/null and b/dcp-demo5.png differ diff --git a/index.ts b/index.ts index 0802afb..b047850 100644 --- a/index.ts +++ b/index.ts @@ -3,7 +3,11 @@ import { getConfig } from "./lib/config" import { Logger } from "./lib/logger" import { createSessionState } from "./lib/state" import { createDiscardTool, createExtractTool } from "./lib/strategies" -import { createChatMessageTransformHandler, createSystemPromptHandler } from "./lib/hooks" +import { + createChatMessageTransformHandler, + createCommandExecuteHandler, + createSystemPromptHandler, +} from "./lib/hooks" const plugin: Plugin = (async (ctx) => { const config = getConfig(ctx) @@ -64,8 +68,17 @@ const plugin: Plugin = (async (ctx) => { }), }, config: async (opencodeConfig) => { - // Add enabled tools to primary_tools by mutating the opencode config - // This works because config is cached and passed by reference + opencodeConfig.command ??= {} + opencodeConfig.command["dcp-stats"] = { + template: "", + description: "Show DCP pruning statistics", + } + opencodeConfig.command["dcp-context"] = { + template: "", + description: "Show token usage breakdown for current session", + } + logger.info("Registered /dcp-stats and /dcp-context commands") + const toolsToAdd: string[] = [] if (config.tools.discard.enabled) toolsToAdd.push("discard") if (config.tools.extract.enabled) toolsToAdd.push("extract") @@ -81,6 +94,7 @@ const plugin: Plugin = (async (ctx) => { ) } }, + "command.execute.before": createCommandExecuteHandler(ctx.client, state, logger), } }) satisfies Plugin diff --git a/lib/commands/context.ts b/lib/commands/context.ts new file mode 100644 index 0000000..a7fba87 --- /dev/null +++ b/lib/commands/context.ts @@ -0,0 +1,243 @@ +/** + * DCP Context command handler. + * Shows a visual breakdown of token usage in the current session. + */ + +import type { Logger } from "../logger" +import type { SessionState, WithParts } from "../state" +import { sendIgnoredMessage } from "../ui/notification" +import { formatTokenCount } from "../ui/utils" +import { isMessageCompacted } from "../shared-utils" +import { isIgnoredUserMessage } from "../messages/utils" +import { countTokens, getCurrentParams } from "../strategies/utils" +import type { AssistantMessage, TextPart, ToolPart } from "@opencode-ai/sdk/v2" + +export interface ContextCommandContext { + client: any + state: SessionState + logger: Logger + sessionId: string + messages: WithParts[] +} + +interface TokenBreakdown { + system: number + user: number + assistant: number + reasoning: number + tools: number + pruned: number + total: number +} + +function analyzeTokens(state: SessionState, messages: WithParts[]): TokenBreakdown { + const breakdown: TokenBreakdown = { + system: 0, + user: 0, + assistant: 0, + reasoning: 0, + tools: 0, + pruned: state.stats.totalPruneTokens, + total: 0, + } + + let firstAssistant: AssistantMessage | undefined + for (const msg of messages) { + if (msg.info.role === "assistant") { + const assistantInfo = msg.info as AssistantMessage + if (assistantInfo.tokens?.input > 0 || assistantInfo.tokens?.cache?.read > 0) { + firstAssistant = assistantInfo + break + } + } + } + + let firstUserTokens = 0 + for (const msg of messages) { + if (msg.info.role === "user" && !isIgnoredUserMessage(msg)) { + for (const part of msg.parts) { + if (part.type === "text") { + const textPart = part as TextPart + firstUserTokens += countTokens(textPart.text || "") + } + } + break + } + } + + // Calculate system tokens: first response's total input minus first user message + if (firstAssistant) { + const firstInput = + (firstAssistant.tokens?.input || 0) + (firstAssistant.tokens?.cache?.read || 0) + breakdown.system = Math.max(0, firstInput - firstUserTokens) + } + + let lastAssistant: AssistantMessage | undefined + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i] + if (msg.info.role === "assistant") { + const assistantInfo = msg.info as AssistantMessage + if (assistantInfo.tokens?.output > 0) { + lastAssistant = assistantInfo + break + } + } + } + + // Get total from API + // Total = input + output + reasoning + cache.read + cache.write + const apiInput = lastAssistant?.tokens?.input || 0 + const apiOutput = lastAssistant?.tokens?.output || 0 + const apiReasoning = lastAssistant?.tokens?.reasoning || 0 + const apiCacheRead = lastAssistant?.tokens?.cache?.read || 0 + const apiCacheWrite = lastAssistant?.tokens?.cache?.write || 0 + const apiTotal = apiInput + apiOutput + apiReasoning + apiCacheRead + apiCacheWrite + + for (const msg of messages) { + if (isMessageCompacted(state, msg)) { + continue + } + + if (msg.info.role === "user" && isIgnoredUserMessage(msg)) { + continue + } + + const info = msg.info + const role = info.role + + for (const part of msg.parts) { + switch (part.type) { + case "text": { + const textPart = part as TextPart + const tokens = countTokens(textPart.text || "") + if (role === "user") { + breakdown.user += tokens + } else { + breakdown.assistant += tokens + } + break + } + case "tool": { + const toolPart = part as ToolPart + + if (toolPart.state?.input) { + const inputStr = + typeof toolPart.state.input === "string" + ? toolPart.state.input + : JSON.stringify(toolPart.state.input) + breakdown.tools += countTokens(inputStr) + } + + if (toolPart.state?.status === "completed" && toolPart.state?.output) { + const outputStr = + typeof toolPart.state.output === "string" + ? toolPart.state.output + : JSON.stringify(toolPart.state.output) + breakdown.tools += countTokens(outputStr) + } + break + } + } + } + } + + breakdown.tools = Math.max(0, breakdown.tools - breakdown.pruned) + + // Calculate reasoning as the difference between API total and our counted parts + // This handles both interleaved thinking and non-interleaved models correctly + const countedParts = breakdown.system + breakdown.user + breakdown.assistant + breakdown.tools + breakdown.reasoning = Math.max(0, apiTotal - countedParts) + + breakdown.total = apiTotal + + return breakdown +} + +function createBar(value: number, maxValue: number, width: number, char: string = "█"): string { + if (maxValue === 0) return "" + const filled = Math.round((value / maxValue) * width) + const bar = char.repeat(Math.max(0, filled)) + return bar +} + +function formatContextMessage(breakdown: TokenBreakdown): string { + const lines: string[] = [] + const barWidth = 30 + + const values = [ + breakdown.system, + breakdown.user, + breakdown.assistant, + breakdown.reasoning, + breakdown.tools, + breakdown.pruned, + ] + const maxValue = Math.max(...values) + + const categories = [ + { label: "System", value: breakdown.system, char: "█" }, + { label: "User", value: breakdown.user, char: "▓" }, + { label: "Assistant", value: breakdown.assistant, char: "▒" }, + { label: "Reasoning", value: breakdown.reasoning, char: "░" }, + { label: "Tools", value: breakdown.tools, char: "▓" }, + { label: "Pruned", value: breakdown.pruned, char: "⣿", isSaved: true }, + ] as const + + lines.push("╭───────────────────────────────────────────────────────────╮") + lines.push("│ DCP Context Analysis │") + lines.push("╰───────────────────────────────────────────────────────────╯") + lines.push("") + lines.push("Session Context Breakdown:") + lines.push("─".repeat(60)) + lines.push("") + + for (const cat of categories) { + const bar = createBar(cat.value, maxValue, barWidth, cat.char) + + let labelWithPct: string + let valueStr: string + if ("isSaved" in cat && cat.isSaved) { + labelWithPct = cat.label.padEnd(16) + valueStr = `${formatTokenCount(cat.value).replace(" tokens", "").padStart(6)} saved` + } else { + const percentage = + breakdown.total > 0 ? ((cat.value / breakdown.total) * 100).toFixed(1) : "0.0" + labelWithPct = `${cat.label.padEnd(9)} ${percentage.padStart(5)}%` + valueStr = formatTokenCount(cat.value).padStart(13) + } + + lines.push(`${labelWithPct}│${bar.padEnd(barWidth)}│${valueStr}`) + } + + lines.push("") + lines.push("─".repeat(60)) + lines.push("") + + lines.push("Summary:") + + if (breakdown.pruned > 0) { + const withoutPruning = breakdown.total + breakdown.pruned + const savingsPercent = ((breakdown.pruned / withoutPruning) * 100).toFixed(1) + lines.push( + ` Current context: ~${formatTokenCount(breakdown.total)} (${savingsPercent}% saved)`, + ) + lines.push(` Without DCP: ~${formatTokenCount(withoutPruning)}`) + } else { + lines.push(` Current context: ~${formatTokenCount(breakdown.total)}`) + } + + lines.push("") + + return lines.join("\n") +} + +export async function handleContextCommand(ctx: ContextCommandContext): Promise { + const { client, state, logger, sessionId, messages } = ctx + + const breakdown = analyzeTokens(state, messages) + + const message = formatContextMessage(breakdown) + + const params = getCurrentParams(state, messages, logger) + await sendIgnoredMessage(client, sessionId, message, params, logger) +} diff --git a/lib/commands/stats.ts b/lib/commands/stats.ts new file mode 100644 index 0000000..2463594 --- /dev/null +++ b/lib/commands/stats.ts @@ -0,0 +1,67 @@ +/** + * DCP Stats command handler. + * Shows pruning statistics for the current session and all-time totals. + */ + +import type { Logger } from "../logger" +import type { SessionState, WithParts } from "../state" +import { sendIgnoredMessage } from "../ui/notification" +import { formatTokenCount } from "../ui/utils" +import { loadAllSessionStats, type AggregatedStats } from "../state/persistence" +import { getCurrentParams } from "../strategies/utils" + +export interface StatsCommandContext { + client: any + state: SessionState + logger: Logger + sessionId: string + messages: WithParts[] +} + +function formatStatsMessage( + sessionTokens: number, + sessionTools: number, + allTime: AggregatedStats, +): string { + const lines: string[] = [] + + lines.push("╭───────────────────────────────────────────────────────────╮") + lines.push("│ DCP Statistics │") + lines.push("╰───────────────────────────────────────────────────────────╯") + lines.push("") + lines.push("Session:") + lines.push("─".repeat(60)) + lines.push(` Tokens pruned: ~${formatTokenCount(sessionTokens)}`) + lines.push(` Tools pruned: ${sessionTools}`) + lines.push("") + lines.push("All-time:") + lines.push("─".repeat(60)) + lines.push(` Tokens saved: ~${formatTokenCount(allTime.totalTokens)}`) + lines.push(` Tools pruned: ${allTime.totalTools}`) + lines.push(` Sessions: ${allTime.sessionCount}`) + + return lines.join("\n") +} + +export async function handleStatsCommand(ctx: StatsCommandContext): Promise { + const { client, state, logger, sessionId, messages } = ctx + + // Session stats from in-memory state + const sessionTokens = state.stats.totalPruneTokens + const sessionTools = state.prune.toolIds.length + + // All-time stats from storage files + const allTime = await loadAllSessionStats(logger) + + const message = formatStatsMessage(sessionTokens, sessionTools, allTime) + + const params = getCurrentParams(state, messages, logger) + await sendIgnoredMessage(client, sessionId, message, params, logger) + + logger.info("Stats command executed", { + sessionTokens, + sessionTools, + allTimeTokens: allTime.totalTokens, + allTimeTools: allTime.totalTools, + }) +} diff --git a/lib/hooks.ts b/lib/hooks.ts index fc5e479..ba029bf 100644 --- a/lib/hooks.ts +++ b/lib/hooks.ts @@ -6,6 +6,8 @@ import { deduplicate, supersedeWrites, purgeErrors } from "./strategies" import { prune, insertPruneToolContext } from "./messages" import { checkSession } from "./state" import { loadPrompt } from "./prompts" +import { handleStatsCommand } from "./commands/stats" +import { handleContextCommand } from "./commands/context" const INTERNAL_AGENT_SIGNATURES = [ "You are a title generator", @@ -76,3 +78,39 @@ export function createChatMessageTransformHandler( } } } + +export function createCommandExecuteHandler(client: any, state: SessionState, logger: Logger) { + return async ( + input: { command: string; sessionID: string; arguments: string }, + _output: { parts: any[] }, + ) => { + if (input.command === "dcp-stats") { + const messagesResponse = await client.session.messages({ + path: { id: input.sessionID }, + }) + const messages = (messagesResponse.data || messagesResponse) as WithParts[] + await handleStatsCommand({ + client, + state, + logger, + sessionId: input.sessionID, + messages, + }) + throw new Error("__DCP_STATS_HANDLED__") + } + if (input.command === "dcp-context") { + const messagesResponse = await client.session.messages({ + path: { id: input.sessionID }, + }) + const messages = (messagesResponse.data || messagesResponse) as WithParts[] + await handleContextCommand({ + client, + state, + logger, + sessionId: input.sessionID, + messages, + }) + throw new Error("__DCP_CONTEXT_HANDLED__") + } + } +} diff --git a/lib/logger.ts b/lib/logger.ts index c86a53d..972a1fb 100644 --- a/lib/logger.ts +++ b/lib/logger.ts @@ -168,12 +168,18 @@ export class Logger { callID: part.callID, } + if (part.state?.status) { + toolPart.status = part.state.status + } if (part.state?.input) { toolPart.input = part.state.input } if (part.state?.output) { toolPart.output = part.state.output } + if (part.state?.error) { + toolPart.error = part.state.error + } return toolPart } diff --git a/lib/state/persistence.ts b/lib/state/persistence.ts index ccd4859..172ff75 100644 --- a/lib/state/persistence.ts +++ b/lib/state/persistence.ts @@ -99,3 +99,48 @@ export async function loadSessionState( return null } } + +export interface AggregatedStats { + totalTokens: number + totalTools: number + sessionCount: number +} + +export async function loadAllSessionStats(logger: Logger): Promise { + const result: AggregatedStats = { + totalTokens: 0, + totalTools: 0, + sessionCount: 0, + } + + try { + if (!existsSync(STORAGE_DIR)) { + return result + } + + const files = await fs.readdir(STORAGE_DIR) + const jsonFiles = files.filter((f) => f.endsWith(".json")) + + for (const file of jsonFiles) { + try { + const filePath = join(STORAGE_DIR, file) + const content = await fs.readFile(filePath, "utf-8") + const state = JSON.parse(content) as PersistedSessionState + + if (state?.stats?.totalPruneTokens && state?.prune?.toolIds) { + result.totalTokens += state.stats.totalPruneTokens + result.totalTools += state.prune.toolIds.length + result.sessionCount++ + } + } catch { + // Skip invalid files + } + } + + logger.debug("Loaded all-time stats", result) + } catch (error: any) { + logger.warn("Failed to load all-time stats", { error: error?.message }) + } + + return result +} diff --git a/lib/state/tool-cache.ts b/lib/state/tool-cache.ts index f9d3d3c..057bcf1 100644 --- a/lib/state/tool-cache.ts +++ b/lib/state/tool-cache.ts @@ -42,7 +42,9 @@ export async function syncToolCache( turnProtectionTurns > 0 && state.currentTurn - turnCounter < turnProtectionTurns - state.lastToolPrune = part.tool === "discard" || part.tool === "extract" + state.lastToolPrune = + (part.tool === "discard" || part.tool === "extract") && + part.state.status === "completed" const allProtectedTools = config.tools.settings.protectedTools diff --git a/lib/strategies/tools.ts b/lib/strategies/tools.ts index e3d8e03..44f6742 100644 --- a/lib/strategies/tools.ts +++ b/lib/strategies/tools.ts @@ -39,7 +39,9 @@ async function executePruneOperation( if (!ids || ids.length === 0) { logger.debug(`${toolName} tool called but ids is empty or undefined`) - return `No IDs provided. Check the list for available IDs to ${toolName.toLowerCase()}.` + throw new Error( + `No IDs provided. Check the list for available IDs to ${toolName.toLowerCase()}.`, + ) } const numericToolIds: number[] = ids @@ -48,7 +50,7 @@ async function executePruneOperation( if (numericToolIds.length === 0) { logger.debug(`No numeric tool IDs provided for ${toolName}: ` + JSON.stringify(ids)) - return "No numeric IDs provided. Format: ids: [id1, id2, ...]" + throw new Error("No numeric IDs provided. Format: ids: [id1, id2, ...]") } // Fetch messages to calculate tokens and find current agent @@ -65,7 +67,9 @@ async function executePruneOperation( // Validate that all numeric IDs are within bounds if (numericToolIds.some((id) => id < 0 || id >= toolIdList.length)) { logger.debug("Invalid tool IDs provided: " + numericToolIds.join(", ")) - return "Invalid IDs provided. Only use numeric IDs from the list." + throw new Error( + "Invalid IDs provided. Only use numeric IDs from the list.", + ) } // Validate that all IDs exist in cache and aren't protected @@ -78,7 +82,9 @@ async function executePruneOperation( "Rejecting prune request - ID not in cache (turn-protected or hallucinated)", { index, id }, ) - return "Invalid IDs provided. Only use numeric IDs from the list." + throw new Error( + "Invalid IDs provided. Only use numeric IDs from the list.", + ) } const allProtectedTools = config.tools.settings.protectedTools if (allProtectedTools.includes(metadata.tool)) { @@ -87,7 +93,9 @@ async function executePruneOperation( id, tool: metadata.tool, }) - return "Invalid IDs provided. Only use numeric IDs from the list." + throw new Error( + "Invalid IDs provided. Only use numeric IDs from the list.", + ) } const filePath = getFilePathFromParameters(metadata.parameters) @@ -98,7 +106,9 @@ async function executePruneOperation( tool: metadata.tool, filePath, }) - return "Invalid IDs provided. Only use numeric IDs from the list." + throw new Error( + "Invalid IDs provided. Only use numeric IDs from the list.", + ) } } @@ -158,7 +168,9 @@ export function createDiscardTool(ctx: PruneToolContext): ReturnType encode(text).length) + return anthropicCountTokens(text) } catch { - return texts.map((text) => Math.round(text.length / 4)) + return Math.round(text.length / 4) } } -/** - * Calculates approximate tokens saved by pruning the given tool call IDs. - */ +function estimateTokensBatch(texts: string[]): number[] { + return texts.map(countTokens) +} + export const calculateTokensSaved = ( state: SessionState, messages: WithParts[], diff --git a/package-lock.json b/package-lock.json index 0b7035a..77d4705 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,8 +9,8 @@ "version": "1.2.3", "license": "MIT", "dependencies": { + "@anthropic-ai/tokenizer": "^0.0.4", "@opencode-ai/sdk": "^1.1.3", - "gpt-tokenizer": "^3.4.0", "jsonc-parser": "^3.3.1", "zod": "^4.1.13" }, @@ -25,6 +25,31 @@ "@opencode-ai/plugin": ">=0.13.7" } }, + "node_modules/@anthropic-ai/tokenizer": { + "version": "0.0.4", + "resolved": "https://registry.npmjs.org/@anthropic-ai/tokenizer/-/tokenizer-0.0.4.tgz", + "integrity": "sha512-EHRKbxlxlc8W4KCBEseByJ7YwyYCmgu9OyN59H9+IYIGPoKv8tXyQXinkeGDI+cI8Tiuz9wk2jZb/kK7AyvL7g==", + "license": "Apache-2.0", + "dependencies": { + "@types/node": "^18.11.18", + "tiktoken": "^1.0.10" + } + }, + "node_modules/@anthropic-ai/tokenizer/node_modules/@types/node": { + "version": "18.19.130", + "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz", + "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==", + "license": "MIT", + "dependencies": { + "undici-types": "~5.26.4" + } + }, + "node_modules/@anthropic-ai/tokenizer/node_modules/undici-types": { + "version": "5.26.5", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", + "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", + "license": "MIT" + }, "node_modules/@esbuild/aix-ppc64": { "version": "0.27.0", "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.0.tgz", @@ -579,12 +604,6 @@ "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" } }, - "node_modules/gpt-tokenizer": { - "version": "3.4.0", - "resolved": "https://registry.npmjs.org/gpt-tokenizer/-/gpt-tokenizer-3.4.0.tgz", - "integrity": "sha512-wxFLnhIXTDjYebd9A9pGl3e31ZpSypbpIJSOswbgop5jLte/AsZVDvjlbEuVFlsqZixVKqbcoNmRlFDf6pz/UQ==", - "license": "MIT" - }, "node_modules/jsonc-parser": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/jsonc-parser/-/jsonc-parser-3.3.1.tgz", @@ -617,6 +636,12 @@ "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" } }, + "node_modules/tiktoken": { + "version": "1.0.22", + "resolved": "https://registry.npmjs.org/tiktoken/-/tiktoken-1.0.22.tgz", + "integrity": "sha512-PKvy1rVF1RibfF3JlXBSP0Jrcw2uq3yXdgcEXtKTYn3QJ/cBRBHDnrJ5jHky+MENZ6DIPwNUGWpkVx+7joCpNA==", + "license": "MIT" + }, "node_modules/tsx": { "version": "4.21.0", "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz", diff --git a/package.json b/package.json index 13c9c72..052f0db 100644 --- a/package.json +++ b/package.json @@ -40,8 +40,8 @@ "@opencode-ai/plugin": ">=0.13.7" }, "dependencies": { + "@anthropic-ai/tokenizer": "^0.0.4", "@opencode-ai/sdk": "^1.1.3", - "gpt-tokenizer": "^3.4.0", "jsonc-parser": "^3.3.1", "zod": "^4.1.13" },