diff --git a/README.md b/README.md index cb46a24..28cc84b 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,17 @@ DCP uses multiple tools and strategies to reduce context size: Your session history is never modified—DCP replaces pruned content with placeholders before sending requests to your LLM. +### Preemptive Compaction (Experimental) + +**Preemptive Compaction** — An optional multi-phase context reduction system that proactively manages context before it exceeds model limits. When enabled, it monitors token usage after each message and triggers compaction when usage exceeds a configurable threshold (default: 85%). + +The compaction flow proceeds in phases: +1. **DCP Strategies** — Runs all enabled DCP strategies (deduplication, supersede writes, purge errors) +2. **Tool Truncation** — If still over threshold, truncates large tool outputs (preserving recent messages) +3. **Summarization** — If still over threshold, triggers OpenCode's built-in summarization + +This prevents context overflow errors and maintains session continuity during long conversations. Disabled by default—enable via `preemptiveCompaction.enabled: true`. + ## Impact on Prompt Caching LLM providers like Anthropic and OpenAI cache prompts based on exact prefix matching. When DCP prunes a tool output, it changes the message content, which invalidates cached prefixes from that point forward. @@ -121,6 +132,23 @@ DCP uses its own config file: "protectedTools": [], }, }, + // Preemptive compaction (experimental) - proactively manages context before limits + "preemptiveCompaction": { + // Disabled by default - opt-in feature + "enabled": false, + // Context usage threshold to trigger compaction (0.0 - 1.0) + "threshold": 0.85, + // Cooldown between compaction attempts (ms) + "cooldownMs": 60000, + // Minimum tokens before compaction can trigger + "minTokens": 50000, + // Tool output truncation settings + "truncation": { + "enabled": true, + // Number of recent messages to protect from truncation + "protectedMessages": 3, + }, + }, } ``` diff --git a/dcp.schema.json b/dcp.schema.json index 39823f2..398fa85 100644 --- a/dcp.schema.json +++ b/dcp.schema.json @@ -202,6 +202,56 @@ } } } + }, + "preemptiveCompaction": { + "type": "object", + "description": "Preemptive compaction to avoid context overflow. Attempts DCP + truncation before falling back to summarization.", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "default": false, + "description": "Enable preemptive compaction (opt-in feature)" + }, + "threshold": { + "type": "number", + "minimum": 0.5, + "maximum": 0.95, + "default": 0.85, + "description": "Context usage ratio that triggers compaction (0.85 = 85%)" + }, + "cooldownMs": { + "type": "number", + "minimum": 10000, + "maximum": 300000, + "default": 60000, + "description": "Minimum time between compaction attempts (milliseconds)" + }, + "minTokens": { + "type": "number", + "default": 50000, + "description": "Minimum tokens before compaction can trigger" + }, + "truncation": { + "type": "object", + "description": "Truncation phase configuration", + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean", + "default": true, + "description": "Enable truncation of large tool outputs" + }, + "protectedMessages": { + "type": "number", + "minimum": 1, + "maximum": 10, + "default": 3, + "description": "Number of recent messages to protect from truncation" + } + } + } + } } } } diff --git a/index.ts b/index.ts index 0802afb..d1b9c99 100644 --- a/index.ts +++ b/index.ts @@ -4,6 +4,7 @@ import { Logger } from "./lib/logger" import { createSessionState } from "./lib/state" import { createDiscardTool, createExtractTool } from "./lib/strategies" import { createChatMessageTransformHandler, createSystemPromptHandler } from "./lib/hooks" +import { createPreemptiveCompactionHandler } from "./lib/preemptive" const plugin: Plugin = (async (ctx) => { const config = getConfig(ctx) @@ -17,8 +18,14 @@ const plugin: Plugin = (async (ctx) => { logger.info("DCP initialized", { strategies: config.strategies, + preemptiveCompaction: config.preemptiveCompaction.enabled, }) + // Create preemptive compaction handler if enabled + const preemptiveHandler = config.preemptiveCompaction.enabled + ? createPreemptiveCompactionHandler(ctx.client, ctx.directory, state, logger, config) + : null + return { "experimental.chat.system.transform": createSystemPromptHandler(state, logger, config), @@ -81,6 +88,8 @@ const plugin: Plugin = (async (ctx) => { ) } }, + // Preemptive compaction event hook (handles message.updated events) + ...(preemptiveHandler && { event: preemptiveHandler }), } }) satisfies Plugin diff --git a/lib/config.ts b/lib/config.ts index d1bbba5..fabbcc0 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -45,6 +45,19 @@ export interface TurnProtection { turns: number } +export interface PreemptiveCompactionTruncation { + enabled: boolean + protectedMessages: number +} + +export interface PreemptiveCompaction { + enabled: boolean + threshold: number + cooldownMs: number + minTokens: number + truncation: PreemptiveCompactionTruncation +} + export interface PluginConfig { enabled: boolean debug: boolean @@ -57,6 +70,7 @@ export interface PluginConfig { supersedeWrites: SupersedeWrites purgeErrors: PurgeErrors } + preemptiveCompaction: PreemptiveCompaction } const DEFAULT_PROTECTED_TOOLS = [ @@ -107,6 +121,15 @@ export const VALID_CONFIG_KEYS = new Set([ "strategies.purgeErrors.enabled", "strategies.purgeErrors.turns", "strategies.purgeErrors.protectedTools", + // preemptiveCompaction + "preemptiveCompaction", + "preemptiveCompaction.enabled", + "preemptiveCompaction.threshold", + "preemptiveCompaction.cooldownMs", + "preemptiveCompaction.minTokens", + "preemptiveCompaction.truncation", + "preemptiveCompaction.truncation.enabled", + "preemptiveCompaction.truncation.protectedMessages", ]) // Extract all key paths from a config object for validation @@ -421,6 +444,16 @@ const defaultConfig: PluginConfig = { protectedTools: [...DEFAULT_PROTECTED_TOOLS], }, }, + preemptiveCompaction: { + enabled: false, // Opt-in feature + threshold: 0.85, + cooldownMs: 60000, + minTokens: 50000, + truncation: { + enabled: true, + protectedMessages: 3, + }, + }, } const GLOBAL_CONFIG_DIR = join(homedir(), ".config", "opencode") @@ -705,6 +738,16 @@ export function getConfig(ctx: PluginInput): PluginConfig { ], tools: mergeTools(config.tools, result.data.tools as any), strategies: mergeStrategies(config.strategies, result.data.strategies as any), + preemptiveCompaction: { + enabled: result.data.preemptiveCompaction?.enabled ?? config.preemptiveCompaction.enabled, + threshold: result.data.preemptiveCompaction?.threshold ?? config.preemptiveCompaction.threshold, + cooldownMs: result.data.preemptiveCompaction?.cooldownMs ?? config.preemptiveCompaction.cooldownMs, + minTokens: result.data.preemptiveCompaction?.minTokens ?? config.preemptiveCompaction.minTokens, + truncation: { + enabled: result.data.preemptiveCompaction?.truncation?.enabled ?? config.preemptiveCompaction.truncation.enabled, + protectedMessages: result.data.preemptiveCompaction?.truncation?.protectedMessages ?? config.preemptiveCompaction.truncation.protectedMessages, + }, + }, } } } else { @@ -747,6 +790,16 @@ export function getConfig(ctx: PluginInput): PluginConfig { ], tools: mergeTools(config.tools, result.data.tools as any), strategies: mergeStrategies(config.strategies, result.data.strategies as any), + preemptiveCompaction: { + enabled: result.data.preemptiveCompaction?.enabled ?? config.preemptiveCompaction.enabled, + threshold: result.data.preemptiveCompaction?.threshold ?? config.preemptiveCompaction.threshold, + cooldownMs: result.data.preemptiveCompaction?.cooldownMs ?? config.preemptiveCompaction.cooldownMs, + minTokens: result.data.preemptiveCompaction?.minTokens ?? config.preemptiveCompaction.minTokens, + truncation: { + enabled: result.data.preemptiveCompaction?.truncation?.enabled ?? config.preemptiveCompaction.truncation.enabled, + protectedMessages: result.data.preemptiveCompaction?.truncation?.protectedMessages ?? config.preemptiveCompaction.truncation.protectedMessages, + }, + }, } } } @@ -786,6 +839,16 @@ export function getConfig(ctx: PluginInput): PluginConfig { ], tools: mergeTools(config.tools, result.data.tools as any), strategies: mergeStrategies(config.strategies, result.data.strategies as any), + preemptiveCompaction: { + enabled: result.data.preemptiveCompaction?.enabled ?? config.preemptiveCompaction.enabled, + threshold: result.data.preemptiveCompaction?.threshold ?? config.preemptiveCompaction.threshold, + cooldownMs: result.data.preemptiveCompaction?.cooldownMs ?? config.preemptiveCompaction.cooldownMs, + minTokens: result.data.preemptiveCompaction?.minTokens ?? config.preemptiveCompaction.minTokens, + truncation: { + enabled: result.data.preemptiveCompaction?.truncation?.enabled ?? config.preemptiveCompaction.truncation.enabled, + protectedMessages: result.data.preemptiveCompaction?.truncation?.protectedMessages ?? config.preemptiveCompaction.truncation.protectedMessages, + }, + }, } } } diff --git a/lib/preemptive/constants.ts b/lib/preemptive/constants.ts new file mode 100644 index 0000000..e27704a --- /dev/null +++ b/lib/preemptive/constants.ts @@ -0,0 +1,40 @@ +/** + * Constants for preemptive compaction feature + */ + +// Default configuration values +export const DEFAULT_THRESHOLD = 0.85 +export const DEFAULT_COOLDOWN_MS = 60000 +export const MIN_TOKENS_FOR_COMPACTION = 50000 +export const DEFAULT_PROTECTED_MESSAGES = 3 +export const CHARS_PER_TOKEN = 4 + +// Message shown when tool output is truncated +export const TRUNCATION_MESSAGE = + "[TOOL RESULT TRUNCATED - Context limit exceeded. Original output was too large and has been truncated. Re-run this tool if you need the full output.]" + +// Model context limits for inference when not configured +export const MODEL_CONTEXT_PATTERNS: Array<{ pattern: RegExp; limit: number }> = [ + // Claude models (check for 1M context env vars) + { pattern: /claude-(opus|sonnet|haiku)/i, limit: 200_000 }, + // GPT-5.x models (1M context) + { pattern: /gpt-5/i, limit: 1_000_000 }, + // GPT-4 models + { pattern: /gpt-4-turbo|gpt-4o/i, limit: 128_000 }, + { pattern: /gpt-4(?!o)/i, limit: 8_192 }, + // OpenAI reasoning models + { pattern: /o1|o3/i, limit: 200_000 }, + // Gemini models + { pattern: /gemini-3/i, limit: 2_000_000 }, + { pattern: /gemini-2\.5-pro/i, limit: 2_000_000 }, + { pattern: /gemini/i, limit: 1_000_000 }, +] + +// Fallback context limit when model is not recognized +export const DEFAULT_CONTEXT_LIMIT = 200_000 + +// Extended context for environments with 1M enabled +export const EXTENDED_CONTEXT_LIMIT = + process.env.ANTHROPIC_1M_CONTEXT === "true" || process.env.VERTEX_ANTHROPIC_1M_CONTEXT === "true" + ? 1_000_000 + : DEFAULT_CONTEXT_LIMIT diff --git a/lib/preemptive/index.ts b/lib/preemptive/index.ts new file mode 100644 index 0000000..688b7b8 --- /dev/null +++ b/lib/preemptive/index.ts @@ -0,0 +1,290 @@ +/** + * Preemptive Compaction Handler + * + * Multi-phase compaction that attempts DCP strategies + truncation before + * falling back to expensive summarization. + * + * Flow: + * 1. Check if usage >= threshold + * 2. Phase 1: Run DCP strategies (deduplication, supersede writes, purge errors) + * 3. Phase 2: Truncate large tool outputs (protect recent messages) + * 4. Decision: If usage < threshold, skip summarization + * 5. Fallback: Trigger summarize() if still over threshold + */ + +import type { PluginConfig } from "../config" +import type { Logger } from "../logger" +import type { SessionState } from "../state" +import type { PreemptiveCompactionState, MessageInfo, TokenInfo } from "./types" +import { MIN_TOKENS_FOR_COMPACTION, CHARS_PER_TOKEN } from "./constants" +import { inferContextLimit } from "./model-limits" +import { truncateUntilTargetTokens } from "./storage" + +// Re-export types for external use +export type { PreemptiveCompactionConfig, MessageInfo } from "./types" + +/** + * Create the preemptive compaction state + */ +function createState(): PreemptiveCompactionState { + return { + lastCompactionTime: new Map(), + compactionInProgress: new Set(), + } +} + +/** + * Create the preemptive compaction event handler + */ +export function createPreemptiveCompactionHandler( + client: any, + directory: string, + dcpState: SessionState, + logger: Logger, + config: PluginConfig +) { + const preemptiveConfig = config.preemptiveCompaction + + // Not enabled - return no-op handler + if (!preemptiveConfig.enabled) { + return async () => {} + } + + const state = createState() + const threshold = preemptiveConfig.threshold + const cooldownMs = preemptiveConfig.cooldownMs + const minTokens = preemptiveConfig.minTokens + + logger.info("Preemptive compaction initialized", { + threshold, + cooldownMs, + minTokens, + truncation: preemptiveConfig.truncation, + }) + + /** + * Check and trigger compaction if needed + */ + const checkAndTriggerCompaction = async ( + sessionID: string, + lastAssistant: MessageInfo + ): Promise => { + // Skip if already compacting this session + if (state.compactionInProgress.has(sessionID)) { + logger.debug("Compaction already in progress", { sessionID }) + return + } + + // Check cooldown + const lastCompaction = state.lastCompactionTime.get(sessionID) ?? 0 + if (Date.now() - lastCompaction < cooldownMs) { + logger.debug("Compaction on cooldown", { sessionID }) + return + } + + // Skip if this is a summary message + if (lastAssistant.summary === true) { + logger.debug("Skipping summary message", { sessionID }) + return + } + + // Get token info + const tokens = lastAssistant.tokens + if (!tokens) { + logger.debug("No token info available", { sessionID }) + return + } + + const modelID = lastAssistant.modelID ?? "" + const providerID = lastAssistant.providerID ?? "" + + // Infer context limit from model ID + const contextLimit = inferContextLimit(modelID) + const totalUsed = tokens.input + tokens.cache.read + tokens.output + + // Skip if not enough tokens + if (totalUsed < minTokens) { + logger.debug("Below minimum tokens threshold", { sessionID, totalUsed, minTokens }) + return + } + + let usageRatio = totalUsed / contextLimit + + logger.info("Checking preemptive compaction", { + sessionID, + totalUsed, + contextLimit, + usageRatio: usageRatio.toFixed(2), + threshold, + }) + + // Skip if under threshold + if (usageRatio < threshold) { + return + } + + // Mark compaction in progress + state.compactionInProgress.add(sessionID) + state.lastCompactionTime.set(sessionID, Date.now()) + + // Validate provider/model info + if (!providerID || !modelID) { + logger.warn("Missing provider/model info", { sessionID }) + state.compactionInProgress.delete(sessionID) + return + } + + try { + // Show initial toast + await client.tui.showToast({ + body: { + title: "Smart Compaction", + message: `Context at ${(usageRatio * 100).toFixed(0)}% - running DCP + truncation...`, + variant: "warning", + duration: 3000, + }, + }).catch(() => {}) + + let tokensSaved = 0 + + // Phase 1: DCP is already running via message transform + // The strategies (deduplication, supersede writes, purge errors) run automatically + // on each message transform. Here we just log that DCP is active. + logger.info("Phase 1: DCP strategies active via message transform", { sessionID }) + + // Phase 2: Truncation + if (preemptiveConfig.truncation.enabled) { + logger.info("Phase 2: Running truncation", { sessionID }) + + const protectedMessages = preemptiveConfig.truncation.protectedMessages + const truncationResult = truncateUntilTargetTokens( + sessionID, + totalUsed - tokensSaved, + contextLimit, + threshold, + CHARS_PER_TOKEN, + protectedMessages + ) + + if (truncationResult.truncatedCount > 0) { + const truncationTokensSaved = Math.floor( + truncationResult.totalBytesRemoved / CHARS_PER_TOKEN + ) + tokensSaved += truncationTokensSaved + + logger.info("Truncation completed", { + sessionID, + truncatedCount: truncationResult.truncatedCount, + bytesRemoved: truncationResult.totalBytesRemoved, + tokensSaved: truncationTokensSaved, + tools: truncationResult.truncatedTools.map((t) => t.toolName), + }) + } else { + logger.info("Truncation completed - nothing to truncate", { sessionID }) + } + } + + // Recalculate usage + const currentTokens = totalUsed - tokensSaved + usageRatio = currentTokens / contextLimit + + logger.info("After DCP + Truncation", { + sessionID, + originalTokens: totalUsed, + tokensSaved, + currentTokens, + newUsageRatio: usageRatio.toFixed(2), + threshold, + }) + + // Decision: Skip summarization if under threshold + if (usageRatio < threshold) { + await client.tui.showToast({ + body: { + title: "Smart Compaction Success", + message: `Reduced to ${(usageRatio * 100).toFixed(0)}% via DCP + truncation. No summarization needed.`, + variant: "success", + duration: 4000, + }, + }).catch(() => {}) + + logger.info("Skipping summarization - pruning was sufficient", { + sessionID, + tokensSaved, + newUsageRatio: usageRatio.toFixed(2), + }) + + state.compactionInProgress.delete(sessionID) + return + } + + // Fallback: Trigger summarization + await client.tui.showToast({ + body: { + title: "Smart Compaction", + message: `Still at ${(usageRatio * 100).toFixed(0)}% after pruning. Summarizing...`, + variant: "warning", + duration: 3000, + }, + }).catch(() => {}) + + logger.info("Triggering summarization", { sessionID, usageRatio }) + + const summarizeBody = { providerID, modelID, auto: true } + await client.session.summarize({ + path: { id: sessionID }, + body: summarizeBody as never, + query: { directory }, + }) + + await client.tui.showToast({ + body: { + title: "Compaction Complete", + message: "Session compacted successfully.", + variant: "success", + duration: 2000, + }, + }).catch(() => {}) + + logger.info("Summarization completed", { sessionID }) + + } catch (err) { + logger.error("Compaction failed", { sessionID, error: String(err) }) + } finally { + state.compactionInProgress.delete(sessionID) + } + } + + /** + * Event handler for message.updated and session.deleted events + */ + return async ({ event }: { event: { type: string; properties?: unknown } }) => { + const props = event.properties as Record | undefined + + // Handle session deletion - cleanup state + if (event.type === "session.deleted") { + const sessionInfo = props?.info as { id?: string } | undefined + if (sessionInfo?.id) { + state.lastCompactionTime.delete(sessionInfo.id) + state.compactionInProgress.delete(sessionInfo.id) + logger.debug("Cleaned up session state", { sessionID: sessionInfo.id }) + } + return + } + + // Handle message updated - check for compaction + if (event.type === "message.updated") { + const info = props?.info as MessageInfo | undefined + if (!info) return + + // Only process finished assistant messages + if (info.role !== "assistant" || !info.finish) return + + const sessionID = info.sessionID + if (!sessionID) return + + await checkAndTriggerCompaction(sessionID, info) + return + } + } +} diff --git a/lib/preemptive/model-limits.ts b/lib/preemptive/model-limits.ts new file mode 100644 index 0000000..9acded7 --- /dev/null +++ b/lib/preemptive/model-limits.ts @@ -0,0 +1,38 @@ +/** + * Model context limit inference utilities + */ + +import { MODEL_CONTEXT_PATTERNS, DEFAULT_CONTEXT_LIMIT, EXTENDED_CONTEXT_LIMIT } from "./constants" + +/** + * Infer context limit from model ID pattern + * Returns extended context limit for Claude models if env vars are set + */ +export function inferContextLimit(modelID: string): number { + // Check for Claude models with extended context + if (/claude-(opus|sonnet|haiku)/i.test(modelID)) { + return EXTENDED_CONTEXT_LIMIT + } + + // Check other model patterns + for (const { pattern, limit } of MODEL_CONTEXT_PATTERNS) { + if (pattern.test(modelID)) { + return limit + } + } + + return DEFAULT_CONTEXT_LIMIT +} + +/** + * Calculate token usage ratio + */ +export function calculateUsageRatio( + inputTokens: number, + outputTokens: number, + cacheReadTokens: number, + contextLimit: number +): number { + const totalUsed = inputTokens + cacheReadTokens + outputTokens + return totalUsed / contextLimit +} diff --git a/lib/preemptive/storage.ts b/lib/preemptive/storage.ts new file mode 100644 index 0000000..ce81680 --- /dev/null +++ b/lib/preemptive/storage.ts @@ -0,0 +1,297 @@ +/** + * Storage utilities for preemptive compaction + * Handles finding and truncating tool outputs + */ + +import { existsSync, readdirSync, readFileSync, writeFileSync, statSync } from "fs" +import { join } from "path" +import { homedir } from "os" +import type { TruncationResult, ToolResultInfo, StoredToolPart } from "./types" +import { TRUNCATION_MESSAGE, CHARS_PER_TOKEN } from "./constants" + +// OpenCode storage directories +const OPENCODE_STORAGE = join(homedir(), ".local", "share", "opencode") +const MESSAGE_STORAGE = join(OPENCODE_STORAGE, "message") +const PART_STORAGE = join(OPENCODE_STORAGE, "part") + +/** + * Get the message directory for a session + */ +function getMessageDir(sessionID: string): string | null { + if (!existsSync(MESSAGE_STORAGE)) return null + + // Try direct path first + const directPath = join(MESSAGE_STORAGE, sessionID) + if (existsSync(directPath)) return directPath + + // Search in subdirectories (for multi-project setups) + try { + for (const dir of readdirSync(MESSAGE_STORAGE)) { + const sessionPath = join(MESSAGE_STORAGE, dir, sessionID) + if (existsSync(sessionPath)) return sessionPath + } + } catch { + return null + } + + return null +} + +/** + * Get all message IDs for a session + */ +function getMessageIds(sessionID: string): string[] { + const messageDir = getMessageDir(sessionID) + if (!messageDir || !existsSync(messageDir)) return [] + + const messageIds: string[] = [] + try { + for (const file of readdirSync(messageDir)) { + if (!file.endsWith(".json")) continue + const messageId = file.replace(".json", "") + messageIds.push(messageId) + } + } catch { + return [] + } + + return messageIds +} + +/** + * Find tool results sorted by output size (largest first) + * Optionally protects the last N messages from truncation + */ +export function findToolResultsBySize( + sessionID: string, + protectedMessageCount: number = 0 +): ToolResultInfo[] { + const messageIds = getMessageIds(sessionID) + const results: ToolResultInfo[] = [] + + // Protect the last N messages from truncation + const protectedMessageIds = new Set() + if (protectedMessageCount > 0 && messageIds.length > 0) { + const messageDir = getMessageDir(sessionID) + if (messageDir) { + const messageTimestamps: Array<{ id: string; mtime: number }> = [] + for (const msgId of messageIds) { + try { + const msgPath = join(messageDir, `${msgId}.json`) + if (existsSync(msgPath)) { + const stat = statSync(msgPath) + messageTimestamps.push({ id: msgId, mtime: stat.mtimeMs }) + } + } catch { + continue + } + } + // Sort by mtime descending (newest first) + messageTimestamps.sort((a, b) => b.mtime - a.mtime) + // Protect the most recent N messages + for (let i = 0; i < Math.min(protectedMessageCount, messageTimestamps.length); i++) { + protectedMessageIds.add(messageTimestamps[i].id) + } + } + } + + for (const messageID of messageIds) { + // Skip protected messages + if (protectedMessageIds.has(messageID)) continue + + const partDir = join(PART_STORAGE, messageID) + if (!existsSync(partDir)) continue + + try { + for (const file of readdirSync(partDir)) { + if (!file.endsWith(".json")) continue + try { + const partPath = join(partDir, file) + const content = readFileSync(partPath, "utf-8") + const part = JSON.parse(content) as StoredToolPart + + // Only include completed tool parts with output that aren't already truncated + if (part.type === "tool" && part.state?.output && !part.truncated) { + results.push({ + partPath, + partId: part.id, + messageID, + toolName: part.tool, + outputSize: part.state.output.length, + }) + } + } catch { + continue + } + } + } catch { + continue + } + } + + // Sort by output size descending (largest first) + return results.sort((a, b) => b.outputSize - a.outputSize) +} + +/** + * Find the largest tool result for a session + */ +export function findLargestToolResult(sessionID: string): ToolResultInfo | null { + const results = findToolResultsBySize(sessionID) + return results.length > 0 ? results[0] : null +} + +/** + * Truncate a single tool result + */ +export function truncateToolResult(partPath: string): { + success: boolean + toolName?: string + originalSize?: number +} { + try { + const content = readFileSync(partPath, "utf-8") + const part = JSON.parse(content) as StoredToolPart + + if (!part.state?.output) { + return { success: false } + } + + const originalSize = part.state.output.length + const toolName = part.tool + + // Mark as truncated and replace output + part.truncated = true + part.originalSize = originalSize + part.state.output = TRUNCATION_MESSAGE + + // Add compaction timestamp + if (!part.state.time) { + part.state.time = { start: Date.now() } + } + part.state.time.compacted = Date.now() + + writeFileSync(partPath, JSON.stringify(part, null, 2)) + + return { success: true, toolName, originalSize } + } catch { + return { success: false } + } +} + +/** + * Get total size of all tool outputs for a session + */ +export function getTotalToolOutputSize(sessionID: string): number { + const results = findToolResultsBySize(sessionID) + return results.reduce((sum, r) => sum + r.outputSize, 0) +} + +/** + * Count how many tool results have been truncated in a session + */ +export function countTruncatedResults(sessionID: string): number { + const messageIds = getMessageIds(sessionID) + let count = 0 + + for (const messageID of messageIds) { + const partDir = join(PART_STORAGE, messageID) + if (!existsSync(partDir)) continue + + try { + for (const file of readdirSync(partDir)) { + if (!file.endsWith(".json")) continue + try { + const content = readFileSync(join(partDir, file), "utf-8") + const part = JSON.parse(content) + if (part.truncated === true) { + count++ + } + } catch { + continue + } + } + } catch { + continue + } + } + + return count +} + +/** + * Truncate tool outputs until we reach the target token count + * Returns information about what was truncated + */ +export function truncateUntilTargetTokens( + sessionID: string, + currentTokens: number, + maxTokens: number, + targetRatio: number = 0.8, + charsPerToken: number = CHARS_PER_TOKEN, + protectedMessageCount: number = 3 +): TruncationResult { + const targetTokens = Math.floor(maxTokens * targetRatio) + const tokensToReduce = currentTokens - targetTokens + const charsToReduce = tokensToReduce * charsPerToken + + // Already under target + if (tokensToReduce <= 0) { + return { + success: true, + sufficient: true, + truncatedCount: 0, + totalBytesRemoved: 0, + targetBytesToRemove: 0, + truncatedTools: [], + } + } + + const results = findToolResultsBySize(sessionID, protectedMessageCount) + + // No tool results to truncate + if (results.length === 0) { + return { + success: false, + sufficient: false, + truncatedCount: 0, + totalBytesRemoved: 0, + targetBytesToRemove: charsToReduce, + truncatedTools: [], + } + } + + let totalRemoved = 0 + let truncatedCount = 0 + const truncatedTools: Array<{ toolName: string; originalSize: number }> = [] + + // Truncate largest outputs first until we've removed enough + for (const result of results) { + const truncateResult = truncateToolResult(result.partPath) + if (truncateResult.success) { + truncatedCount++ + const removedSize = truncateResult.originalSize ?? result.outputSize + totalRemoved += removedSize + truncatedTools.push({ + toolName: truncateResult.toolName ?? result.toolName, + originalSize: removedSize, + }) + + // Stop if we've removed enough + if (totalRemoved >= charsToReduce) { + break + } + } + } + + const sufficient = totalRemoved >= charsToReduce + + return { + success: truncatedCount > 0, + sufficient, + truncatedCount, + totalBytesRemoved: totalRemoved, + targetBytesToRemove: charsToReduce, + truncatedTools, + } +} diff --git a/lib/preemptive/types.ts b/lib/preemptive/types.ts new file mode 100644 index 0000000..f392d8e --- /dev/null +++ b/lib/preemptive/types.ts @@ -0,0 +1,83 @@ +/** + * Types for preemptive compaction feature + */ + +export interface PreemptiveCompactionState { + lastCompactionTime: Map + compactionInProgress: Set +} + +export interface TokenInfo { + input: number + output: number + cache: { + read: number + write: number + } +} + +export interface MessageInfo { + id: string + role: string + sessionID: string + providerID?: string + modelID?: string + tokens?: TokenInfo + summary?: boolean + finish?: boolean +} + +export interface TruncationResult { + success: boolean + sufficient: boolean + truncatedCount: number + totalBytesRemoved: number + targetBytesToRemove: number + truncatedTools: Array<{ toolName: string; originalSize: number }> +} + +export interface ToolResultInfo { + partPath: string + partId: string + messageID: string + toolName: string + outputSize: number +} + +export interface StoredToolPart { + id: string + sessionID: string + messageID: string + type: "tool" + callID: string + tool: string + state: { + status: "pending" | "running" | "completed" | "error" + input: Record + output?: string + error?: string + time?: { + start: number + end?: number + compacted?: number + } + } + truncated?: boolean + originalSize?: number +} + +export interface PreemptiveCompactionConfig { + enabled: boolean + threshold: number + cooldownMs: number + minTokens: number + truncation: { + enabled: boolean + protectedMessages: number + } +} + +export interface CompactionPhaseResult { + phase: "triggered" | "dcp" | "truncation" | "decision" | "skipped" | "summarized" + data: Record +}