diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..507b319 --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,606 @@ +# Agentic Kit Roadmap + +This document plans the next phases of work for `agentic-kit`. It supersedes +neither `REDESIGN_DECISIONS.md` nor `README.md` — those describe what exists. +This describes what will exist next, why, and what is explicitly out of scope. + +## Current State (snapshot) + +| Package | Status | +| ------------------------ | ------------------------------------------------------------------------------------------------------------ | +| `agentic-kit` | Core portability layer. Streaming, message model, providers registry, cross-provider transforms, usage/cost. | +| `@agentic-kit/agent` | Sequential agent loop. Tool execution, lifecycle events, abort/continue, JSON Schema validation. | +| `@agentic-kit/anthropic` | Provider adapter. Streaming, thinking, tool calls, multimodal, abort. | +| `@agentic-kit/openai` | Provider adapter. Streaming, reasoning, tool calls, multimodal, abort. OpenAI-compatible endpoints. | +| `@agentic-kit/ollama` | Provider adapter. Local inference, embeddings. **Tool execution in streaming is a stub.** | + +The agent loop today runs to completion in-process: it does not pause for +out-of-band input and has no transport layer above it. Consumers wire it into +their own HTTP layer and supply their own React bindings. + +## Design Principles (carried forward) + +- Provider-agnostic core; OpenAI-compatible is a compatibility class, not a brand. +- No schema-library coupling at the core (JSON Schema only). +- Normalize provider differences inward; do not leak them. +- Runtime-agnostic; consume standard Web platform primitives (`Response`, + `ReadableStream`, `AbortSignal`, `fetch`). +- Headless. The kit ships no opinionated UI. +- Composable. Core stays minimal; extensions are opt-in packages. +- Storage is pluggable. Defaults work for development; production swaps in. + +## Phase 0 — Test Infrastructure (do first) + +Phase 1 cannot land cleanly without a small set of shared test helpers. Build +these first; everything afterward inherits the same testing idiom. + +### 0.1 Test Conventions + +Three rules the kit follows: + +1. **Deterministic by default.** Every package's default `pnpm test` runs only + unit tests against scripted mocks. No network, no API keys, no flakes. +2. **Live tests are gated and opt-in.** Files named `*.live.test.ts` and + workspace scripts like `test:live:*` exist for exercising real provider + APIs. Never required in CI by default. +3. **One environment per package.** Most packages run `testEnvironment: 'node'`. + The single exception is `@agentic-kit/react`, which runs `jsdom`. There is + no workspace-wide jsdom; the asymmetry is intentional. + +### 0.2 Shared Test Helpers (repo-internal, not a package) + +The kit needs a small set of reusable test helpers — scripted providers, SSE +stubs, parsers, contract suites. These live as a **repo-internal directory**, +not a published package and not a workspace package. + +Layout: `tools/test/` at the repo root, plain `.ts` files, imported via a +tsconfig `paths` alias (e.g., `@test/scripted-provider`) from each package's +test config. No `package.json`, no version, no public API surface, no +publishing concerns. + +Why not a package: +- Dev-only code in a `"private": true` workspace package is a publishing + ceremony with no upside; the alternative is a directory. +- Promotes test code to a load-bearing public API the moment a consumer + installs it. +- Reference: AI SDK keeps its test helpers in-package, not as a separate + workspace package. + +Helpers live wherever they are simplest to maintain: shared idioms in +`tools/test/`, package-specific helpers co-located in that package's +`__tests__/`. Duplication of a 30-line scripted provider across packages is +acceptable; promotion to `tools/test/` happens when a third package needs the +same helper. + +```ts +// scripted mock provider — replaces inline streamFn boilerplate +function createScriptedProvider(opts: { + responses: AssistantMessageResponse[] + delayMs?: number +}): ProviderAdapter + +// SSE response stub for serialization tests and useChat fetch mocks +function createScriptedSSEResponse(events: AgentEvent[]): Response + +// SSE parser for assertions on emitted bytes +function parseSSEStream(stream: ReadableStream): AsyncIterable + +// portable contract suite for any RunStore implementation +function runRunStoreContractTests(makeStore: () => RunStore | Promise): void + +// small fixtures +function makeFakeModel(overrides?: Partial): ModelDescriptor +``` + +Existing tests that inline-construct a scripted provider migrate to use the +helper as part of this phase. No behavior change; cleanup only. + +If a consumer application later wants to write provider-mocking tests of its +own, it copies the relevant helper (each is small) rather than installing a +dep. That is intentional. + +### 0.3 Integration Test Lane + +A workspace-level `pnpm test:integration` script. Brings up +`http.createServer` in-process, runs `agent.start(...).toResponse()` against +it, exercises pause/resume across a real HTTP boundary via `fetch`. Mock +providers, real HTTP, real serialization. Catches wire-format and abort +regressions that pure unit tests miss. + +Optional in Phase 1 PRs; required for any 1.0 release of `@agentic-kit/agent`'s +new pause/resume APIs. + +### 0.4 SSE Wire-Format Tests + +A dedicated `__tests__/sse.test.ts` in `@agentic-kit/agent` covers parser +edge cases: chunks split mid-event, multi-line `data:` lines, comment lines, +event-type framing, trailing newlines, mid-event abort. Easy to under-test, +easy to break silently. Hand-crafted byte sequences only; no provider in +the loop. + +--- + +## Phase 1 — Pause/Resume + React Bindings (must) + +The single architectural change behind Phase 1: the agent loop becomes +**checkpoint-able**. Tools may declare a `decision` schema; when the loop hits +such a tool, it persists run state, emits a structured event, and waits for a +matching decision payload before continuing. Everything else in Phase 1 follows +from this. + +### 1.1 Pausable Tools + +#### Problem + +Many real agent flows need structured input from outside the loop before a +tool can be considered safe or actionable: human approval on destructive +operations, multi-choice routing on a generated proposal, signed authorization, +delayed completion of a long-running external job. Today the loop has no way +to express this — tools must either run unconditionally or be elided. + +#### Design + +Extend `AgentTool` with an optional `decision` JSON Schema. The agent loop: + +1. When the LLM emits a call to a tool that declares `decision`: + - Validate the LLM's input against `parameters` as today. + - Emit a `tool_decision_pending` event with the input and the schema. + - Persist the run via the configured `RunStore` (see 1.2). + - Halt the loop and return. +2. The host invokes `agent.resume(runId, decision)`: + - Load run state from the `RunStore`. + - Validate `decision` against the tool's `decision` schema. + - Call `tool.execute(input, decision, ctx)`. + - Continue the loop with the result. + +Tools without a `decision` schema run as today — synchronously inside the loop. + +#### API + +```ts +interface AgentTool extends ToolDefinition { + label: string + decision?: JsonSchema // optional; declares structured outside-input + execute( + toolCallId: string, + input: Record, + decision: unknown, // undefined for non-pausable tools + signal?: AbortSignal, + onUpdate?: (partial: AgentToolResult) => void, + ): Promise +} + +class Agent { + // existing + prompt(input: string | Message): Promise + abort(): void + + // new + resume(runId: string, decision: unknown): Promise +} + +type AgentEvent = + // ... existing events + | { type: 'tool_decision_pending' + runId: string + toolCallId: string + toolName: string + input: Record + schema: JsonSchema } +``` + +A pausable tool with no `decision` is invalid — the field's presence is the +mechanism. Validation runs before `execute` is called; a malformed decision +rejects with a typed error and does not consume the run. + +#### Naming + +The field is named **`decision`** because the dominant case is a user or +upstream system choosing how the tool should proceed. The variable inside +`execute` is also `decision`; React surfaces it as `respondWithDecision`. If +later phases introduce a categorically different out-of-band input (e.g., raw +results from a client-executed tool), it gets a sibling field with its own +shape — the kit does not over-generalize now. + +#### Testing + +Unit tests in `@agentic-kit/agent`. Uses `createScriptedProvider` from 0.2. + +- Scripted provider emits a tool call to a `decision`-bearing tool. Assert: + `tool_decision_pending` event emitted, `runStore.save` called, loop halted. +- `agent.resume(runId, valid)` with a fresh scripted response. Assert: + `tool.execute` invoked with the decision argument, loop continues, final + event emitted. +- Resume with a decision that fails schema validation. Assert: typed + validation error, run not consumed, retry permitted. +- Resume with non-existent `runId`. Assert: typed `RunNotFound` error. +- `agent.abort()` while paused. Assert: clean cancellation, run cleaned up. +- Tool without `decision` still runs synchronously (regression guard). + +### 1.2 RunStore + +#### Problem + +Pause/resume across HTTP requests requires the loop's state to survive between +the pause and the resume call. The kit must define where that state lives +without forcing a specific backend on consumers. + +#### Design + +A small interface plus a default implementation. The kit owns the schema of +what gets persisted (the run record); the consumer owns where it lands. + +```ts +interface AgentRun { + id: string + model: string + systemPrompt?: string + tools: ToolDefinition[] + messages: Message[] + pending?: { + toolCallId: string + toolName: string + input: Record + } + createdAt: number + updatedAt: number +} + +interface RunStore { + save(run: AgentRun): Promise + load(id: string): Promise + delete(id: string): Promise +} + +class MemoryRunStore implements RunStore { /* default, ephemeral */ } +``` + +`@agentic-kit/agent` ships `MemoryRunStore` for development and single-process +deployments. Production users supply a Redis-, KV-, or DB-backed implementation. +The kit ships no production backend. + +The kit deliberately does **not** persist final conversation history. That is a +consumer concern. See 1.4 for lifecycle hooks. + +#### Testing + +Unit tests in `@agentic-kit/agent`. + +- `MemoryRunStore`: save → load round-trip; `load` of missing id returns + `undefined`; `delete` is idempotent; `delete` then `load` returns `undefined`. +- `runRunStoreContractTests(makeMemoryStore)` from 0.2 runs the portable + contract suite against `MemoryRunStore`. The same export is consumed by + any third-party `RunStore` implementation. +- Concurrent save/load on the same id (last write wins, no torn reads). +- Re-pause `createdAt` preservation: a second `save()` of the same run id keeps + the original `createdAt`; only `updatedAt` advances. (1.1 does not yet + enforce this — fold into the contract suite.) +- Abort-during-save race: `agent.abort()` while a `runStore.save()` is + in-flight resolves without orphaning the persisted record or surfacing a + rejected save promise. +- Mixed-batch tool ordering: when an assistant turn contains a regular tool + call followed by a decision-bearing tool whose arguments fail validation, + the persisted `messages` order matches the LLM's tool-call order. (Latent + in 1.1's invalid-args branch; surfaces only via the contract suite.) + +### 1.3 Run Serialization Helpers + +#### Problem + +The agent emits a stream of typed events. To use it across an HTTP boundary — +or any boundary that requires bytes — the consumer needs to serialize. The +kit should ship the canonical form so consumers do not reinvent it. + +#### Design + +Standard Web primitives only. No framework helpers. The agent run object +exposes both pull-based and push-based access. + +```ts +interface AgentRunHandle { + events(): AsyncIterable + toReadableStream(): ReadableStream + toResponse(init?: ResponseInit): Response // SSE-shaped body +} + +const handle = agent.start({ messages, ... }) +return handle.toResponse() +``` + +`toResponse` returns a `Response` with `Content-Type: text/event-stream`, each +`AgentEvent` serialized as one SSE frame. Compatible with any runtime that +speaks standard `Response` and `ReadableStream`: Next.js App Router, Hono, +Bun, Deno, Cloudflare Workers, raw Node 18+. + +A symmetric pair handles resume: + +```ts +const handle = agent.resumeRun({ runId, decision, runStore }) +return handle.toResponse() +``` + +The wire format is the kit's `AgentEvent` discriminated union, serialized as +JSON in SSE `data:` lines. No translation to any third-party protocol; if a +consumer wants to bridge to one, they write the bridge. + +#### Testing + +Unit tests in `@agentic-kit/agent`. + +- `events()`: scripted provider events come out of the async iterable in + emission order with correct shapes. +- `toReadableStream()`: bytes parsed back via `parseSSEStream` (from 0.2) + reproduce the original event sequence. +- `toResponse()`: assert `Content-Type: text/event-stream`, no caching headers, + body parses as above. +- Wire-format edge cases live in `__tests__/sse.test.ts` (0.4): split chunks, + multi-line `data:`, comments, trailing newlines, mid-event abort. +- Backpressure: stream consumer pauses; producer respects it (no unbounded + buffer). + +### 1.4 `@agentic-kit/react` + +#### Problem + +The dominant consumer surface is browser UIs that stream from an agent endpoint. +A canonical React hook avoids every consumer reimplementing the same fetch / +parse / state-update / abort / resume loop. + +#### Design + +One hook. Headless — returns state and actions; renders nothing. Persistence +is delegated to the consumer via lifecycle callbacks. + +```ts +import { useChat } from '@agentic-kit/react' + +const chat = useChat({ + api: '/api/chat', + body: () => ({ /* extra request body fields */ }), + initialMessages: storedMessages, + onMessage: (m) => {}, // streaming partial state + onFinish: (m) => {}, // turn complete; consumer may persist + onDecisionPending: (event) => {}, // tool paused; consumer renders UI +}) + +chat.send('hello') +chat.respondWithDecision(value) // delivers decision to /resume +chat.abort() +chat.messages // Message[] +chat.isStreaming // boolean +chat.pendingDecision // event | undefined +chat.error // unknown | undefined +``` + +Behaviors the hook is responsible for: + +- POSTing to `api` with `messages` plus any consumer-supplied body fields. +- Parsing the SSE response into `AgentEvent`s and folding them into `messages`. +- Emitting `onMessage` per partial update, `onFinish` per turn end. +- Surfacing `tool_decision_pending` events as `chat.pendingDecision` and via + `onDecisionPending`. +- Rebroadcasting `respondWithDecision(value)` as a POST to `/resume` (path + configurable) with `{ runId, decision }`, and resuming stream consumption + from the response. +- Plumbing an `AbortSignal` through `chat.abort()`. + +The hook does not own persistence, modes, system prompts, or any UI shape. + +#### Testing + +The only package using `testEnvironment: 'jsdom'`. Adds devDeps: +`jest-environment-jsdom`, `@testing-library/react`, `react`, `react-dom`. Adds +peerDeps: `react`, `react-dom`. `globalThis.fetch` is stubbed per-test to +return `createScriptedSSEResponse(events)` from 0.2. + +- Send → stream → finish: messages assemble in order; `isStreaming` transitions; + `onMessage` and `onFinish` fire with correct payloads. +- `body()` callback's fields appear in the POST body. +- `chat.abort()` reaches the fetch mock's `AbortSignal`; state cleans up; no + late updates after abort. +- Decision-pending: `onDecisionPending` fires; `chat.pendingDecision` set; + `respondWithDecision(value)` POSTs to `/resume` with `{ runId, decision }`; + the resumed stream folds into `messages`. +- Network error / non-200 response: `chat.error` set; `messages` not corrupted. +- Malformed SSE bytes: hook surfaces an error rather than crashing. +- `initialMessages` hydrates state on mount. + +--- + +## Phase 2 — Production Polish (should) + +### 2.1 Prompt Caching API + +The kit currently reads `cacheRead` and `cacheWrite` from `Usage` but exposes +no API to *set* cache control on outgoing messages. Both Anthropic and OpenAI +(via Anthropic-compatible providers and recent OpenAI features) support +prompt caching, and the cost savings are material at scale. + +Design sketch: add an optional `cache?: 'short' | 'long'` flag at the message +level (or at content-block level). Each provider adapter translates to its +native control mechanism (Anthropic `cache_control: { type: 'ephemeral' }`, +OpenAI cache strategy hints). The flag is advisory; providers without support +ignore it. + +#### Testing + +Unit tests per provider adapter, matching the existing +`anthropic.test.ts` / `openai.test.ts` idiom. + +- Mock HTTP intercepts the outgoing request body and headers. +- Build a `Context` whose messages carry `cache: 'short' | 'long'`. +- Anthropic: assert `cache_control: { type: 'ephemeral' }` on flagged blocks. +- OpenAI: assert the corresponding native cache hint. +- Ollama and other no-support providers: assert the flag is silently ignored, + no error. +- `Usage.cacheRead` / `cacheWrite` are populated correctly on the assistant + response (existing usage assertion pattern). + +### 2.2 Telemetry / Middleware Hooks + +The agent loop today has no insertion points for observability or +interception. Production consumers need at minimum: + +- A `before/after` provider call hook (latency, errors, token counts). +- A `before/after` tool call hook (arguments, results, durations). +- Stream event tap (without buffering the stream). + +Design as middleware composition over the run, akin to a small async +interceptor chain. Standard error type for transient vs. terminal failures +to support upstream retry logic. + +#### Testing + +Unit tests in `@agentic-kit/agent`. + +- Register middleware, run a scripted loop, assert hook invocation order and + arguments (provider request, response, tool call, tool result). +- Multiple middlewares compose left-to-right with predictable ordering. +- A throwing middleware does not crash the loop; the error surfaces via the + defined channel. +- `before/after` pairs see matching correlation IDs (request ↔ response). +- Stream-event tap does not buffer or reorder events. + +--- + +## Phase 3 — Optional Extensions (could) + +### 3.1 Full Ollama Tool Support + +The Ollama adapter currently does not parse tool calls in streaming responses. +Bring it to feature parity with the Anthropic and OpenAI adapters: tool call +deltas, tool result round-trips, and live tests covering the full loop. + +#### Testing + +- Unit: parse scripted Ollama NDJSON tool-call chunks; assert the canonical + `AssistantMessageEvent` sequence is emitted. +- Live (gated, in `ollama.live.test.ts`): tool-using smoke test against a + known-good local Ollama model. Skipped when `OLLAMA_LIVE_MODEL` is unset. + +### 3.2 Retry / Backoff + +A small built-in retry policy for transient provider failures (HTTP 408, 425, +429, 500, 502, 503, 504; aborted-not-by-user network errors). Configurable +attempt count, jittered exponential backoff. Disabled by default — consumers +opt in. Layered above provider adapters, below the agent loop. + +#### Testing + +Unit tests using an injectable clock. + +- Provider mock returns scripted transient errors then success; assert retry + count and final outcome. +- Backoff timings match the configured curve (use a fake clock; never sleep + for real in tests). +- Non-retriable errors (400, 401, 403) fail immediately; no retries attempted. +- Abort during a retry wait cancels promptly; no further attempts. +- Retries respect a global deadline; total time bounded. + +### 3.3 Stream Resume on Disconnect + +If the agent loop is mid-run when the SSE connection drops, the client should +be able to reconnect with the run ID and pick up where it left off. The +machinery is largely a free side-effect of `RunStore` — the run survives; +only stream-position tracking and an event replay endpoint are new. Useful +for flaky-network and long-running flows. + +#### Testing + +- Unit: abort an in-flight `events()` iterator. Reload the run by id and call + `resumeRun`. Assert: events continue from the last-emitted checkpoint, no + duplicate side effects. +- Integration (lane from 0.3): same flow over real HTTP — drop the connection + mid-stream, reconnect with `runId`, assert event continuity and correct + `Last-Event-ID` semantics. + +### 3.4 Client-Side Tool Execution + +For tools that genuinely require browser-only capabilities (DOM access, +WebRTC, File System Access API, locally-running services, hardware bridges, +wallet signing), introduce a `runs: 'client'` flag. The mechanism reuses the +pause/resume rails: such tools emit a `tool_client_execute_pending` event, +the browser-side dispatcher runs the registered local executor, and the +result returns via the same resume endpoint shape. + +This is deferred until a real use case appears. Most agent applications do +not need it, and shipping it prematurely would constrain the design. + +#### Testing + +- Unit (in `@agentic-kit/agent`): protocol layer only. Scripted provider + emits a `runs: 'client'` tool call. Assert: `tool_client_execute_pending` + event fires, loop halts. `agent.resume(runId, { result })` continues with + the supplied result as the tool result. +- Unit (in `@agentic-kit/react`, jsdom): client dispatcher. Register a local + executor, fire a synthetic pending event, assert: executor runs with the + tool input, resulting POST to `/resume` includes the correct payload, the + resumed stream folds into `messages`. + +--- + +## Non-Goals + +The kit will not ship the following. They belong in consumer applications, +companion packages, or other ecosystems entirely. + +- **Conversation history persistence.** Lifecycle hooks expose what is needed; + storage is consumer-owned. Browser, server, sync model — none of it is the + kit's call. +- **Structured output / `generateObject` analog.** Tool calls already provide + typed structured outputs via JSON Schema. A second mechanism is redundant. +- **Schema library coupling.** No `@agentic-kit/zod`, no `@agentic-kit/typebox`. + Consumers convert their schema library of choice to JSON Schema at the + boundary; this is a one-line operation for every popular library. +- **Framework-specific helpers.** No Next.js, Hono, Express, Fastify packages. + Standard `Response` and `ReadableStream` cover all of them. +- **UI rendering / component library.** The kit is headless. React hook + exposes state and actions; consumers render however they want. +- **Embeddings as a primary capability.** Per `REDESIGN_DECISIONS.md` #14, + embeddings live behind an optional capability interface or companion + package, not in the conversational core. +- **System prompt construction utilities.** Prompt design is consumer-owned. +- **Conversation modes / agent personas.** Application concern. +- **Built-in production storage backends.** `MemoryRunStore` is the only + implementation the kit ships; Redis, KV, DB backends are for consumers. + +## Package Layout After Phase 1 + +| Package | Change | +| ------------------------ | ------------------------------------------------------------------------------------------- | +| `agentic-kit` | unchanged | +| `@agentic-kit/agent` | extended: pausable tools, `RunStore`, run serialization helpers, middleware hooks (Phase 2) | +| `@agentic-kit/anthropic` | unchanged in Phase 1; caching API in Phase 2 | +| `@agentic-kit/openai` | unchanged in Phase 1; caching API in Phase 2 | +| `@agentic-kit/ollama` | unchanged in Phase 1; tool support in Phase 3 | +| `@agentic-kit/react` | **new** — `useChat` hook | + +Shared test helpers live in `tools/test/` (repo-internal directory, not a +package). Phase 2 and 3 add no new packages; everything extends in place. + +## Open Questions + +- **Run record schema versioning.** Once `RunStore` is shipped, the on-disk + `AgentRun` shape becomes a compatibility surface. Decide on an explicit + version field and migration story before 1.0. +- **Decision schema validator scope.** Resolved (1.1): the decision validator + reuses `validateSchema` from `packages/agent/src/validation.ts` — same code + path as tool inputs. Discriminated-union and `oneOf` / `anyOf` coverage is + still untested; fold into the 1.2 contract suite. +- **Lifecycle events across pause boundaries.** On resume, `agent_start` + re-fires (each `runLoop` entry is a fresh start) but `turn_start` does not + (the persisted assistant message is reused, not regenerated). This + asymmetry is invisible to a single-prompt consumer but matters for any + listener that tracks turn vs. run lifecycle. Decide before 1.4 whether to + introduce a distinct `agent_resume` event or to redocument `agent_start` + with explicit "loop entry" semantics — the `@agentic-kit/react` hook will + codify whichever choice externally. +- **SSE vs. NDJSON.** SSE is the proposed default. NDJSON is simpler but lacks + reconnection semantics and event-type framing. Revisit if real-world + consumers report SSE problems behind specific proxies. +- **`onDecisionPending` ergonomics.** Whether the React hook should auto-route + the next stream from `/resume` or require the consumer to call a follow-up + method explicitly. Default to auto for ergonomics; expose an opt-out. +- **Live test policy for paid providers.** Anthropic/OpenAI live tests would + burn API credits. Default position: gated `*.live.test.ts` files with + env-var keys, manually triggered, never required by per-PR CI. diff --git a/package.json b/package.json index c5a1ff6..ad4e2f2 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "build": "pnpm -r run build", "build:dev": "pnpm -r run build:dev", "test": "pnpm -r run test", + "test:integration": "jest --config tests/integration/jest.config.js", "typecheck": "node ./scripts/typecheck.js", "test:live:ollama": "pnpm --filter @agentic-kit/ollama run test:live:smoke", "test:live:ollama:extended": "pnpm --filter @agentic-kit/ollama run test:live:extended", diff --git a/packages/agent/__tests__/agent.test.ts b/packages/agent/__tests__/agent.test.ts index aa6681c..d724c8f 100644 --- a/packages/agent/__tests__/agent.test.ts +++ b/packages/agent/__tests__/agent.test.ts @@ -4,117 +4,43 @@ import { createAssistantMessageEventStream, type ModelDescriptor, } from 'agentic-kit'; +import { + createScriptedProvider, + makeFakeAssistantMessage, + makeFakeModel, +} from '@test/index'; -import { Agent } from '../src'; - -function createModel(): ModelDescriptor { - return { - id: 'demo', - name: 'Demo', - api: 'fake', - provider: 'fake', - baseUrl: 'http://fake.local', - input: ['text'], - reasoning: false, - tools: true, - }; -} +import { + Agent, + type AgentEvent, + type AgentTool, + DecisionValidationError, + MemoryRunStore, + RunNotFoundError, + ToolNotRegisteredError, +} from '../src'; describe('@agentic-kit/agent', () => { it('runs a minimal sequential tool loop', async () => { const responses = [ - { - role: 'assistant' as const, - api: 'fake', - provider: 'fake', - model: 'demo', - usage: { - input: 1, - output: 1, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 2, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - stopReason: 'toolUse' as const, - timestamp: Date.now(), + makeFakeAssistantMessage({ + usage: makeUsage(), + stopReason: 'toolUse', content: [ - { type: 'toolCall' as const, id: 'tool_1', name: 'echo', arguments: { text: 'hello' } }, + { type: 'toolCall', id: 'tool_1', name: 'echo', arguments: { text: 'hello' } }, ], - }, - { - role: 'assistant' as const, - api: 'fake', - provider: 'fake', - model: 'demo', - usage: { - input: 1, - output: 1, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 2, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - stopReason: 'stop' as const, - timestamp: Date.now(), - content: [{ type: 'text' as const, text: 'done' }], - }, + }), + makeFakeAssistantMessage({ + usage: makeUsage(), + stopReason: 'stop', + content: [{ type: 'text', text: 'done' }], + }), ]; - let callIndex = 0; - const streamFn = (_model: ModelDescriptor, _context: Context) => { - const stream = createAssistantMessageEventStream(); - const response = responses[callIndex++]; - - queueMicrotask(() => { - stream.push({ type: 'start', partial: response }); - if (response.content[0].type === 'toolCall') { - stream.push({ - type: 'toolcall_start', - contentIndex: 0, - partial: response, - }); - stream.push({ - type: 'toolcall_end', - contentIndex: 0, - toolCall: response.content[0], - partial: response, - }); - } else { - stream.push({ - type: 'text_start', - contentIndex: 0, - partial: response, - }); - stream.push({ - type: 'text_delta', - contentIndex: 0, - delta: 'done', - partial: response, - }); - stream.push({ - type: 'text_end', - contentIndex: 0, - content: 'done', - partial: response, - }); - } - stream.push({ - type: 'done', - reason: response.stopReason === 'toolUse' ? 'toolUse' : 'stop', - message: response, - }); - stream.end(response); - }); - - return stream; - }; - + const provider = createScriptedProvider({ responses }); const agent = new Agent({ - initialState: { - model: createModel(), - }, - streamFn, + initialState: { model: makeFakeModel({ id: 'demo', name: 'Demo' }) }, + streamFn: provider.stream, }); agent.setTools([ @@ -129,7 +55,7 @@ describe('@agentic-kit/agent', () => { }, required: ['text'], }, - execute: async (_toolCallId, params) => ({ + execute: async (_toolCallId, params, _decision) => ({ content: [{ type: 'text', text: String(params.text) }], }), }, @@ -155,20 +81,20 @@ describe('@agentic-kit/agent', () => { it('turns tool argument validation failures into error tool results and continues', async () => { const responses = [ - createAssistantResponse({ + makeFakeAssistantMessage({ stopReason: 'toolUse', content: [{ type: 'toolCall', id: 'tool_1', name: 'echo', arguments: {} }], }), - createAssistantResponse({ + makeFakeAssistantMessage({ stopReason: 'stop', content: [{ type: 'text', text: 'recovered' }], }), ]; - let callIndex = 0; + const provider = createScriptedProvider({ responses }); const agent = new Agent({ - initialState: { model: createModel() }, - streamFn: () => streamMessage(responses[callIndex++]), + initialState: { model: makeFakeModel({ id: 'demo', name: 'Demo' }) }, + streamFn: provider.stream, }); const execute = jest.fn(async () => ({ @@ -211,10 +137,10 @@ describe('@agentic-kit/agent', () => { it('records aborted assistant turns when the active stream is cancelled', async () => { const agent = new Agent({ - initialState: { model: createModel() }, + initialState: { model: makeFakeModel({ id: 'demo', name: 'Demo' }) }, streamFn: (_model: ModelDescriptor, _context: Context, options) => { const stream = createAssistantMessageEventStream(); - const partial = createAssistantResponse({ + const partial = makeFakeAssistantMessage({ stopReason: 'stop', content: [{ type: 'text', text: '' }], }); @@ -225,7 +151,7 @@ describe('@agentic-kit/agent', () => { options?.signal?.addEventListener( 'abort', () => { - const aborted = createAssistantResponse({ + const aborted: AssistantMessage = makeFakeAssistantMessage({ stopReason: 'aborted', errorMessage: 'aborted by test', content: [], @@ -256,76 +182,213 @@ describe('@agentic-kit/agent', () => { }); }); -function createAssistantResponse(overrides: Partial): AssistantMessage { +function makeUsage() { return { - ...createAssistantResponseBase(), - ...overrides, + input: 1, + output: 1, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 2, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }; } -function createAssistantResponseBase(): AssistantMessage { - return { - role: 'assistant' as const, - api: 'fake', - provider: 'fake', - model: 'demo', - usage: { - input: 1, - output: 1, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 2, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - stopReason: 'stop' as const, - timestamp: Date.now(), - content: [] as AssistantMessage['content'], - }; -} +describe('@agentic-kit/agent — pausable tools', () => { + function makeApprovalTool(execute: AgentTool['execute']): AgentTool { + return { + name: 'approve', + label: 'Approve', + description: 'Tool that requires explicit approval', + parameters: { + type: 'object', + properties: { target: { type: 'string' } }, + required: ['target'], + }, + decision: { + type: 'object', + properties: { approved: { type: 'boolean' } }, + required: ['approved'], + }, + execute, + }; + } + + function pauseResponse() { + return makeFakeAssistantMessage({ + stopReason: 'toolUse', + content: [ + { type: 'toolCall', id: 'tool_1', name: 'approve', arguments: { target: 'thing' } }, + ], + }); + } + + function finalResponse() { + return makeFakeAssistantMessage({ + stopReason: 'stop', + content: [{ type: 'text', text: 'finalized' }], + }); + } + + it('pauses on a decision-bearing tool, persists the run, and emits tool_decision_pending', async () => { + const provider = createScriptedProvider({ responses: [pauseResponse()] }); + const runStore = new MemoryRunStore(); + const saveSpy = jest.spyOn(runStore, 'save'); + const execute = jest.fn(); + const events: AgentEvent[] = []; + + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + runStore, + }); + agent.subscribe((event) => events.push(event)); + agent.setTools([makeApprovalTool(execute)]); + + await agent.prompt('approve thing'); + + expect(execute).not.toHaveBeenCalled(); + expect(saveSpy).toHaveBeenCalledTimes(1); + + const pendingEvent = events.find((e) => e.type === 'tool_decision_pending'); + expect(pendingEvent).toMatchObject({ + type: 'tool_decision_pending', + toolCallId: 'tool_1', + toolName: 'approve', + input: { target: 'thing' }, + schema: expect.objectContaining({ type: 'object' }), + }); + + const runId = (pendingEvent as { runId: string }).runId; + expect(runId).toBeTruthy(); + expect(agent.pendingRunId).toBe(runId); + expect(agent.state.isStreaming).toBe(false); + + expect(events.some((e) => e.type === 'agent_end')).toBe(false); -function streamMessage(message: AssistantMessage) { - const stream = createAssistantMessageEventStream(); - - queueMicrotask(() => { - stream.push({ type: 'start', partial: message }); - if (message.content[0]?.type === 'toolCall') { - stream.push({ - type: 'toolcall_start', - contentIndex: 0, - partial: message, - }); - stream.push({ - type: 'toolcall_end', - contentIndex: 0, - toolCall: message.content[0], - partial: message, - }); - } else { - stream.push({ - type: 'text_start', - contentIndex: 0, - partial: message, - }); - stream.push({ - type: 'text_delta', - contentIndex: 0, - delta: message.content[0]?.type === 'text' ? message.content[0].text : '', - partial: message, - }); - stream.push({ - type: 'text_end', - contentIndex: 0, - content: message.content[0]?.type === 'text' ? message.content[0].text : '', - partial: message, - }); - } - stream.push({ - type: 'done', - reason: message.stopReason === 'toolUse' ? 'toolUse' : 'stop', - message, + const stored = await runStore.load(runId); + expect(stored).toMatchObject({ + id: runId, + pending: { toolCallId: 'tool_1', toolName: 'approve', input: { target: 'thing' } }, }); - stream.end(message); + expect(stored?.tools[0]).not.toHaveProperty('execute'); }); - return stream; -} + it('resume invokes execute with the decision argument and continues the loop', async () => { + const provider = createScriptedProvider({ responses: [pauseResponse(), finalResponse()] }); + const execute = jest.fn( + async (_id: string, _params: Record, decision: unknown) => ({ + content: [{ type: 'text' as const, text: `decision=${JSON.stringify(decision)}` }], + }) + ); + const events: AgentEvent[] = []; + + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + }); + agent.subscribe((event) => events.push(event)); + agent.setTools([makeApprovalTool(execute)]); + + await agent.prompt('approve thing'); + const runId = agent.pendingRunId!; + expect(runId).toBeTruthy(); + + await agent.resume(runId, { approved: true }); + + expect(execute).toHaveBeenCalledTimes(1); + expect(execute.mock.calls[0]?.[2]).toEqual({ approved: true }); + expect(agent.pendingRunId).toBeUndefined(); + + expect(agent.state.messages.at(-1)).toMatchObject({ + role: 'assistant', + content: [{ type: 'text', text: 'finalized' }], + }); + expect(events.some((e) => e.type === 'agent_end')).toBe(true); + }); + + it('rejects a malformed decision and leaves the run resumable', async () => { + const provider = createScriptedProvider({ responses: [pauseResponse(), finalResponse()] }); + const runStore = new MemoryRunStore(); + const execute = jest.fn( + async (_id: string, _params: Record, decision: unknown) => ({ + content: [{ type: 'text' as const, text: `decision=${JSON.stringify(decision)}` }], + }) + ); + + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + runStore, + }); + agent.setTools([makeApprovalTool(execute)]); + + await agent.prompt('approve thing'); + const runId = agent.pendingRunId!; + + await expect(agent.resume(runId, { approved: 'yes' })).rejects.toBeInstanceOf( + DecisionValidationError + ); + expect(execute).not.toHaveBeenCalled(); + expect(agent.pendingRunId).toBe(runId); + expect(await runStore.load(runId)).toBeDefined(); + + await agent.resume(runId, { approved: true }); + + expect(execute).toHaveBeenCalledTimes(1); + expect(agent.pendingRunId).toBeUndefined(); + expect(await runStore.load(runId)).toBeUndefined(); + }); + + it('throws RunNotFoundError when resuming an unknown run', async () => { + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: createScriptedProvider({ responses: [] }).stream, + }); + + await expect(agent.resume('does-not-exist', { approved: true })).rejects.toBeInstanceOf( + RunNotFoundError + ); + }); + + it('cleans up the persisted run when abort() is called while paused', async () => { + const provider = createScriptedProvider({ responses: [pauseResponse()] }); + const runStore = new MemoryRunStore(); + + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + runStore, + }); + agent.setTools([makeApprovalTool(jest.fn())]); + + await agent.prompt('approve thing'); + const runId = agent.pendingRunId!; + expect(await runStore.load(runId)).toBeDefined(); + + agent.abort(); + await new Promise((resolve) => setImmediate(resolve)); + + expect(agent.pendingRunId).toBeUndefined(); + expect(await runStore.load(runId)).toBeUndefined(); + }); + + it('throws ToolNotRegisteredError when resuming after the tool has been removed', async () => { + const provider = createScriptedProvider({ responses: [pauseResponse(), finalResponse()] }); + const tool = makeApprovalTool(jest.fn()); + + const agent = new Agent({ + initialState: { model: makeFakeModel() }, + streamFn: provider.stream, + }); + agent.setTools([tool]); + + await agent.prompt('approve thing'); + const runId = agent.pendingRunId!; + + agent.setTools([]); + + await expect(agent.resume(runId, { approved: true })).rejects.toBeInstanceOf( + ToolNotRegisteredError + ); + }); +}); diff --git a/packages/agent/__tests__/sse.test.ts b/packages/agent/__tests__/sse.test.ts new file mode 100644 index 0000000..682e3dd --- /dev/null +++ b/packages/agent/__tests__/sse.test.ts @@ -0,0 +1,114 @@ +// Exercises the `parseSSEStream` helper from tools/test/, not a production +// parser. The kit ships no SSE parser today; consumers parse on their side. +// These tests pin down the helper's edge-case behavior so future parser work +// has a baseline to match. +import { parseSSEStream } from '@test/index'; + +import type { AgentEvent } from '../src'; + +const encoder = new TextEncoder(); + +function streamFromChunks(chunks: string[]): ReadableStream { + return new ReadableStream({ + start(controller) { + for (const chunk of chunks) { + controller.enqueue(encoder.encode(chunk)); + } + controller.close(); + }, + }); +} + +async function collect(stream: ReadableStream): Promise { + const out: AgentEvent[] = []; + for await (const event of parseSSEStream(stream)) { + out.push(event); + } + return out; +} + +describe('parseSSEStream', () => { + it('parses a single complete event', async () => { + const events = await collect(streamFromChunks(['data: {"type":"agent_start"}\n\n'])); + expect(events).toEqual([{ type: 'agent_start' }]); + }); + + it('reassembles a payload split across chunks', async () => { + const events = await collect( + streamFromChunks(['data: {"type":"agen', 't_start"}\n', '\n']) + ); + expect(events).toEqual([{ type: 'agent_start' }]); + }); + + it('joins multiple data: lines with newlines into a single payload', async () => { + const events = await collect( + streamFromChunks(['data: {"type":\ndata: "agent_start"}\n\n']) + ); + expect(events).toEqual([{ type: 'agent_start' }]); + }); + + it('ignores comment lines starting with `:`', async () => { + const events = await collect( + streamFromChunks([': keepalive\ndata: {"type":"turn_start"}\n\n']) + ); + expect(events).toEqual([{ type: 'turn_start' }]); + }); + + it('ignores event:, id:, and retry: framing fields', async () => { + const events = await collect( + streamFromChunks([ + 'event: turn_start\nid: 1\nretry: 1000\ndata: {"type":"turn_start"}\n\n', + ]) + ); + expect(events).toEqual([{ type: 'turn_start' }]); + }); + + it('skips a [DONE] marker without yielding an event', async () => { + const events = await collect( + streamFromChunks([ + 'data: {"type":"agent_start"}\n\ndata: [DONE]\n\n', + ]) + ); + expect(events).toEqual([{ type: 'agent_start' }]); + }); + + it('handles trailing newlines without emitting a spurious event', async () => { + const events = await collect( + streamFromChunks(['data: {"type":"agent_start"}\n\n\n\n']) + ); + expect(events).toEqual([{ type: 'agent_start' }]); + }); + + it('handles CRLF line endings', async () => { + const events = await collect( + streamFromChunks(['data: {"type":"agent_start"}\r\n\r\n']) + ); + expect(events).toEqual([{ type: 'agent_start' }]); + }); + + it('drops a final incomplete event when the stream ends mid-event', async () => { + const events = await collect( + streamFromChunks([ + 'data: {"type":"agent_start"}\n\n', + 'data: {"type":"turn_start"}', + ]) + ); + expect(events).toEqual([{ type: 'agent_start' }]); + }); + + it('yields multiple complete events in order', async () => { + const events = await collect( + streamFromChunks([ + 'data: {"type":"agent_start"}\n\ndata: {"type":"turn_start"}\n\n', + ]) + ); + expect(events).toEqual([{ type: 'agent_start' }, { type: 'turn_start' }]); + }); + + it('honors an optional space after the `data:` field name', async () => { + const events = await collect( + streamFromChunks(['data:{"type":"agent_start"}\n\n']) + ); + expect(events).toEqual([{ type: 'agent_start' }]); + }); +}); diff --git a/packages/agent/__tests__/tsconfig.json b/packages/agent/__tests__/tsconfig.json index 6c4fda5..3ae83c4 100644 --- a/packages/agent/__tests__/tsconfig.json +++ b/packages/agent/__tests__/tsconfig.json @@ -2,9 +2,19 @@ "extends": "../tsconfig.json", "compilerOptions": { "noEmit": true, - "rootDir": "..", + "rootDir": "../../..", + "baseUrl": "../../..", + "paths": { + "@test/*": ["tools/test/*"], + "agentic-kit": ["packages/agentic-kit/src"], + "@agentic-kit/agent": ["packages/agent/src"] + }, "types": ["jest", "node"] }, - "include": ["./**/*.ts", "../src/**/*.ts"], + "include": [ + "./**/*.ts", + "../src/**/*.ts", + "../../../tools/test/**/*.ts" + ], "exclude": ["../dist", "../node_modules"] } diff --git a/packages/agent/jest.config.js b/packages/agent/jest.config.js index 6622fd1..2069518 100644 --- a/packages/agent/jest.config.js +++ b/packages/agent/jest.config.js @@ -17,6 +17,7 @@ module.exports = { modulePathIgnorePatterns: ['dist/*'], moduleNameMapper: { '^(\\.{1,2}/.*)\\.js$': '$1', + '^@test/(.*)$': '/../../tools/test/$1', '^agentic-kit$': '/../agentic-kit/src', '^@agentic-kit/(.*)$': '/../$1/src', }, diff --git a/packages/agent/src/agent.ts b/packages/agent/src/agent.ts index 8fc8503..a7ff4fd 100644 --- a/packages/agent/src/agent.ts +++ b/packages/agent/src/agent.ts @@ -1,3 +1,5 @@ +import { randomUUID } from 'node:crypto'; + import { type AssistantMessage, type Context, @@ -6,8 +8,19 @@ import { type Message, stream, type StreamOptions, + type ToolCallContent, + type ToolDefinition, } from 'agentic-kit'; +import { + type AgentRun, + type AgentRunPending, + DecisionValidationError, + MemoryRunStore, + RunNotFoundError, + type RunStore, + ToolNotRegisteredError, +} from './run-store.js'; import type { AgentEvent, AgentOptions, @@ -15,15 +28,22 @@ import type { AgentTool, AgentToolResult, } from './types.js'; -import { validateToolArguments as defaultValidateToolArguments } from './validation.js'; +import { + validateSchema, + validateToolArguments as defaultValidateToolArguments, +} from './validation.js'; export class Agent { private readonly listeners = new Set<(event: AgentEvent) => void>(); private readonly transformContext?: AgentOptions['transformContext']; private readonly streamFn: NonNullable; private readonly validateToolArguments: NonNullable; + private readonly runStore: RunStore; + private readonly generateRunId: () => string; private abortController?: AbortController; private running?: Promise; + private currentRunId?: string; + private pausedRunId?: string; private _state: AgentState; @@ -40,6 +60,8 @@ export class Agent { this.streamFn = options.streamFn ?? stream; this.transformContext = options.transformContext; this.validateToolArguments = options.validateToolArguments ?? defaultValidateToolArguments; + this.runStore = options.runStore ?? new MemoryRunStore(); + this.generateRunId = options.generateRunId ?? randomUUID; } get state(): AgentState { @@ -88,6 +110,12 @@ export class Agent { } abort(): void { + if (this.pausedRunId) { + const runId = this.pausedRunId; + this.pausedRunId = undefined; + void this.runStore.delete(runId); + return; + } this.abortController?.abort(); } @@ -99,15 +127,21 @@ export class Agent { if (this._state.isStreaming) { throw new Error('Agent is already processing a prompt'); } + if (this.pausedRunId) { + throw new Error('Agent is paused awaiting a decision; call resume() or abort() first'); + } const message = typeof input === 'string' ? createUserMessage(input) : input; - await this.runLoop([message]); + await this.runLoop({ runId: this.generateRunId(), initialMessages: [message] }); } async continue(): Promise { if (this._state.isStreaming) { throw new Error('Agent is already processing'); } + if (this.pausedRunId) { + throw new Error('Agent is paused awaiting a decision; call resume() or abort() first'); + } const lastMessage = this._state.messages[this._state.messages.length - 1]; if (!lastMessage) { @@ -117,62 +151,131 @@ export class Agent { throw new Error('Cannot continue from message role: assistant'); } - await this.runLoop(); + await this.runLoop({ runId: this.generateRunId() }); + } + + get pendingRunId(): string | undefined { + return this.pausedRunId; } - private async runLoop(initialMessages?: Message[]): Promise { + async resume(runId: string, decision: unknown): Promise { + if (this._state.isStreaming) { + throw new Error('Agent is already processing'); + } + + const run = await this.runStore.load(runId); + if (!run) { + throw new RunNotFoundError(runId); + } + if (!run.pending) { + throw new Error(`Run ${runId} is not paused`); + } + + const tool = this._state.tools.find((t) => t.name === run.pending!.toolName); + if (!tool) { + throw new ToolNotRegisteredError(runId, run.pending.toolName); + } + if (!tool.decision) { + throw new Error( + `Tool '${tool.name}' has no decision schema; cannot resume run ${runId}` + ); + } + + const errors = validateSchema(tool.decision, decision, 'root'); + if (errors.length > 0) { + throw new DecisionValidationError(runId, tool.name, errors); + } + + this._state.model = run.model; + if (run.systemPrompt !== undefined) { + this._state.systemPrompt = run.systemPrompt; + } + this._state.messages = [...run.messages]; + this.pausedRunId = undefined; + + await this.runLoop({ + runId, + resumeDecision: { toolCallId: run.pending.toolCallId, decision }, + }); + } + + private async runLoop(opts: { + runId: string; + initialMessages?: Message[]; + resumeDecision?: { toolCallId: string; decision: unknown }; + }): Promise { this.running = (async () => { this.abortController = new AbortController(); this._state.isStreaming = true; this._state.streamMessage = null; this._state.error = undefined; + this.currentRunId = opts.runId; try { this.emit({ type: 'agent_start' }); - if (initialMessages && initialMessages.length > 0) { - for (const message of initialMessages) { + if (opts.initialMessages && opts.initialMessages.length > 0) { + for (const message of opts.initialMessages) { this.emit({ type: 'message_start', message }); this.appendMessage(message); this.emit({ type: 'message_end', message }); } } - while (true) { - this.emit({ type: 'turn_start' }); - - const assistantMessage = await this.generateAssistantMessage(this.abortController.signal); - this.appendMessage(assistantMessage); - this.emit({ type: 'message_end', message: assistantMessage }); + let resumeDecision = opts.resumeDecision; - if (assistantMessage.stopReason === 'error' || assistantMessage.stopReason === 'aborted') { - this._state.error = assistantMessage.errorMessage; - this.emit({ type: 'turn_end', message: assistantMessage, toolResults: [] }); - break; + while (true) { + let assistantMessage: AssistantMessage; + + if (resumeDecision) { + const last = this._state.messages[this._state.messages.length - 1]; + if (!last || last.role !== 'assistant') { + throw new Error('Cannot resume: last message is not an assistant message'); + } + assistantMessage = last; + } else { + this.emit({ type: 'turn_start' }); + assistantMessage = await this.generateAssistantMessage(this.abortController.signal); + this.appendMessage(assistantMessage); + this.emit({ type: 'message_end', message: assistantMessage }); + + if (assistantMessage.stopReason === 'error' || assistantMessage.stopReason === 'aborted') { + this._state.error = assistantMessage.errorMessage; + this.emit({ type: 'turn_end', message: assistantMessage, toolResults: [] }); + break; + } } - const toolCalls = assistantMessage.content.filter((block) => block.type === 'toolCall'); + const toolCalls = assistantMessage.content.filter( + (block): block is ToolCallContent => block.type === 'toolCall' + ); if (toolCalls.length === 0) { this.emit({ type: 'turn_end', message: assistantMessage, toolResults: [] }); break; } - const toolResults = await this.executeToolCalls(toolCalls, this.abortController.signal); - for (const toolResult of toolResults) { - this.emit({ type: 'message_start', message: toolResult }); - this.appendMessage(toolResult); - this.emit({ type: 'message_end', message: toolResult }); + const outcome = await this.executeToolCalls( + toolCalls, + this.abortController.signal, + resumeDecision + ); + resumeDecision = undefined; + + if (outcome.status === 'paused') { + return; } - this.emit({ type: 'turn_end', message: assistantMessage, toolResults }); + this.emit({ type: 'turn_end', message: assistantMessage, toolResults: outcome.results }); } this.emit({ type: 'agent_end', messages: [...this._state.messages] }); + await this.runStore.delete(opts.runId); } finally { this._state.isStreaming = false; this._state.streamMessage = null; this.abortController = undefined; this.running = undefined; + this.currentRunId = undefined; } })(); @@ -228,34 +331,143 @@ export class Agent { } private async executeToolCalls( - toolCalls: Array>, - signal: AbortSignal - ) { - const results = []; + toolCalls: ToolCallContent[], + signal: AbortSignal, + resumeDecision?: { toolCallId: string; decision: unknown } + ): Promise< + | { status: 'completed'; results: ReturnType[] } + | { status: 'paused' } + > { + const completedToolCallIds = new Set( + this._state.messages + .filter((m): m is Extract => m.role === 'toolResult') + .map((m) => m.toolCallId) + ); + + const results: ReturnType[] = []; for (const toolCall of toolCalls) { - const tool = this._state.tools.find((candidate) => candidate.name === toolCall.name); - this.emit({ - type: 'tool_execution_start', - toolCallId: toolCall.id, - toolName: toolCall.name, - args: toolCall.arguments as Record, - }); + if (completedToolCallIds.has(toolCall.id)) { + continue; + } - let result: AgentToolResult; - let isError = false; + const tool = this._state.tools.find((candidate) => candidate.name === toolCall.name); + const args = toolCall.arguments as Record; + const isResumeTarget = resumeDecision?.toolCallId === toolCall.id; + + if (tool?.decision && !isResumeTarget) { + let validatedArgs: Record; + try { + validatedArgs = this.validateToolArguments(tool.parameters, args); + } catch (error) { + const result: AgentToolResult = { + content: [ + { + type: 'text', + text: error instanceof Error ? error.message : String(error), + }, + ], + }; + this.emit({ + type: 'tool_execution_start', + toolCallId: toolCall.id, + toolName: toolCall.name, + args, + }); + this.emit({ + type: 'tool_execution_end', + toolCallId: toolCall.id, + toolName: toolCall.name, + result, + isError: true, + }); + const toolResult = createToolResultMessage(toolCall.id, toolCall.name, result.content, true); + this.appendMessageWithEvents(toolResult); + continue; + } - try { - if (!tool) { - throw new Error(`Tool '${toolCall.name}' not found`); + for (const toolResult of results) { + this.appendMessageWithEvents(toolResult); } - const validatedArgs = this.validateToolArguments( - tool.parameters, - toolCall.arguments as Record - ); + const runId = this.currentRunId!; + const pending: AgentRunPending = { + toolCallId: toolCall.id, + toolName: toolCall.name, + input: validatedArgs, + }; + const now = Date.now(); + const run: AgentRun = { + id: runId, + model: this._state.model, + systemPrompt: this._state.systemPrompt, + tools: this._state.tools.map(toToolDefinition), + messages: [...this._state.messages], + pending, + createdAt: now, + updatedAt: now, + }; + await this.runStore.save(run); + this.pausedRunId = runId; + this.emit({ + type: 'tool_decision_pending', + runId, + toolCallId: toolCall.id, + toolName: toolCall.name, + input: validatedArgs, + schema: tool.decision, + }); + return { status: 'paused' }; + } + + const decisionForExecute = isResumeTarget ? resumeDecision!.decision : undefined; + const toolResult = await this.executeOneTool( + tool, + toolCall, + args, + decisionForExecute, + signal + ); + results.push(toolResult); + } + + for (const toolResult of results) { + this.appendMessageWithEvents(toolResult); + } + + return { status: 'completed', results }; + } + + private async executeOneTool( + tool: AgentTool | undefined, + toolCall: ToolCallContent, + args: Record, + decision: unknown, + signal: AbortSignal + ): Promise> { + this.emit({ + type: 'tool_execution_start', + toolCallId: toolCall.id, + toolName: toolCall.name, + args, + }); + + let result: AgentToolResult; + let isError = false; + + try { + if (!tool) { + throw new Error(`Tool '${toolCall.name}' not found`); + } + + const validatedArgs = this.validateToolArguments(tool.parameters, args); - result = await tool.execute(toolCall.id, validatedArgs, signal, (partialResult) => { + result = await tool.execute( + toolCall.id, + validatedArgs, + decision, + signal, + (partialResult) => { this.emit({ type: 'tool_execution_update', toolCallId: toolCall.id, @@ -263,33 +475,35 @@ export class Agent { args: validatedArgs, partialResult, }); - }); - } catch (error) { - result = { - content: [ - { - type: 'text', - text: error instanceof Error ? error.message : String(error), - }, - ], - }; - isError = true; - } - - this.emit({ - type: 'tool_execution_end', - toolCallId: toolCall.id, - toolName: toolCall.name, - result, - isError, - }); - - results.push( - createToolResultMessage(toolCall.id, toolCall.name, result.content, isError) + } ); + } catch (error) { + result = { + content: [ + { + type: 'text', + text: error instanceof Error ? error.message : String(error), + }, + ], + }; + isError = true; } - return results; + this.emit({ + type: 'tool_execution_end', + toolCallId: toolCall.id, + toolName: toolCall.name, + result, + isError, + }); + + return createToolResultMessage(toolCall.id, toolCall.name, result.content, isError); + } + + private appendMessageWithEvents(message: Message): void { + this.emit({ type: 'message_start', message }); + this.appendMessage(message); + this.emit({ type: 'message_end', message }); } private emit(event: AgentEvent): void { @@ -298,3 +512,11 @@ export class Agent { } } } + +function toToolDefinition(tool: AgentTool): ToolDefinition { + return { + name: tool.name, + description: tool.description, + parameters: tool.parameters, + }; +} diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts index b8b99bb..bad2c86 100644 --- a/packages/agent/src/index.ts +++ b/packages/agent/src/index.ts @@ -1,3 +1,4 @@ export * from './agent.js'; +export * from './run-store.js'; export * from './types.js'; export * from './validation.js'; diff --git a/packages/agent/src/run-store.ts b/packages/agent/src/run-store.ts new file mode 100644 index 0000000..a658d69 --- /dev/null +++ b/packages/agent/src/run-store.ts @@ -0,0 +1,81 @@ +import type { Message, ModelDescriptor, ToolDefinition } from 'agentic-kit'; + +export interface AgentRunPending { + toolCallId: string; + toolName: string; + input: Record; +} + +export interface AgentRun { + id: string; + model: ModelDescriptor; + systemPrompt?: string; + tools: ToolDefinition[]; + messages: Message[]; + pending?: AgentRunPending; + createdAt: number; + updatedAt: number; +} + +export interface RunStore { + save(run: AgentRun): Promise; + load(id: string): Promise; + delete(id: string): Promise; +} + +export class MemoryRunStore implements RunStore { + private readonly runs = new Map(); + + async save(run: AgentRun): Promise { + this.runs.set(run.id, cloneRun(run)); + } + + async load(id: string): Promise { + const run = this.runs.get(id); + return run ? cloneRun(run) : undefined; + } + + async delete(id: string): Promise { + this.runs.delete(id); + } +} + +export class RunNotFoundError extends Error { + readonly runId: string; + + constructor(runId: string) { + super(`Run not found: ${runId}`); + this.name = 'RunNotFoundError'; + this.runId = runId; + } +} + +export class DecisionValidationError extends Error { + readonly runId: string; + readonly toolName: string; + readonly errors: string[]; + + constructor(runId: string, toolName: string, errors: string[]) { + super(`Decision validation failed for tool '${toolName}':\n${errors.map((e) => `- ${e}`).join('\n')}`); + this.name = 'DecisionValidationError'; + this.runId = runId; + this.toolName = toolName; + this.errors = errors; + } +} + +export class ToolNotRegisteredError extends Error { + readonly runId: string; + readonly toolName: string; + + constructor(runId: string, toolName: string) { + super(`Tool '${toolName}' is not registered on the agent resuming run ${runId}`); + this.name = 'ToolNotRegisteredError'; + this.runId = runId; + this.toolName = toolName; + } +} + +function cloneRun(run: AgentRun): AgentRun { + return JSON.parse(JSON.stringify(run)) as AgentRun; +} diff --git a/packages/agent/src/types.ts b/packages/agent/src/types.ts index 1486987..9e724d9 100644 --- a/packages/agent/src/types.ts +++ b/packages/agent/src/types.ts @@ -10,6 +10,8 @@ import type { ToolResultMessage, } from 'agentic-kit'; +import type { RunStore } from './run-store.js'; + export interface AgentToolResult { content: ToolResultMessage['content']; details?: TDetails; @@ -21,9 +23,11 @@ export type AgentToolUpdateCallback = ( export interface AgentTool extends ToolDefinition { label: string; + decision?: JsonSchema; execute: ( toolCallId: string, params: Record, + decision: unknown, signal?: AbortSignal, onUpdate?: AgentToolUpdateCallback ) => Promise>; @@ -66,6 +70,14 @@ export type AgentEvent = toolName: string; result: AgentToolResult; isError: boolean; + } + | { + type: 'tool_decision_pending'; + runId: string; + toolCallId: string; + toolName: string; + input: Record; + schema: JsonSchema; }; export interface AgentOptions { @@ -80,4 +92,6 @@ export interface AgentOptions { schema: JsonSchema, args: Record ) => Record; + runStore?: RunStore; + generateRunId?: () => string; } diff --git a/packages/agent/src/validation.ts b/packages/agent/src/validation.ts index 51634c7..3fb227e 100644 --- a/packages/agent/src/validation.ts +++ b/packages/agent/src/validation.ts @@ -12,7 +12,7 @@ export function validateToolArguments( throw new Error(`Tool argument validation failed:\n${errors.map((error) => `- ${error}`).join('\n')}`); } -function validateSchema(schema: JsonSchema, value: unknown, path: string): string[] { +export function validateSchema(schema: JsonSchema, value: unknown, path: string): string[] { if (!schema || Object.keys(schema).length === 0) { return []; } diff --git a/packages/agentic-kit/__tests__/adapter.test.ts b/packages/agentic-kit/__tests__/adapter.test.ts index b186f64..60dccdd 100644 --- a/packages/agentic-kit/__tests__/adapter.test.ts +++ b/packages/agentic-kit/__tests__/adapter.test.ts @@ -1,47 +1,28 @@ +import { + createScriptedProvider, + makeFakeAssistantMessage, + makeFakeModel, +} from '@test/index'; + import { AgentKit, type AssistantMessage, - createAssistantMessageEventStream, getMessageText, type ModelDescriptor, - type ProviderAdapter, transformMessages, } from '../src'; function createFakeModel(): ModelDescriptor { - return { - id: 'demo', - name: 'Demo', - api: 'fake-api', - provider: 'fake', - baseUrl: 'http://fake.local', - input: ['text'], - reasoning: false, - tools: true, - }; + return makeFakeModel({ name: 'Demo' }); } function createAssistantMessage( overrides: Partial = {} ): AssistantMessage { - return { - role: 'assistant', - api: 'fake-api', - provider: 'fake', - model: 'demo', - usage: { - input: 0, - output: 0, - cacheRead: 0, - cacheWrite: 0, - totalTokens: 0, - cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, - }, - stopReason: 'stop', - timestamp: Date.now(), + return makeFakeAssistantMessage({ content: [{ type: 'text', text: 'hello world' }], ...overrides, - }; + }); } describe('agentic-kit core', () => { @@ -236,43 +217,11 @@ describe('agentic-kit core', () => { }); it('keeps the legacy AgentKit generate API working through structured streams', async () => { - const provider: ProviderAdapter & { name: string } = { - api: 'fake-api', - provider: 'fake', - name: 'fake', - createModel: () => createFakeModel(), - stream: () => { - const stream = createAssistantMessageEventStream(); - const message = createAssistantMessage(); - - queueMicrotask(() => { - stream.push({ type: 'start', partial: { ...message, content: [{ type: 'text', text: '' }] } }); - stream.push({ - type: 'text_start', - contentIndex: 0, - partial: { ...message, content: [{ type: 'text', text: '' }] }, - }); - stream.push({ - type: 'text_delta', - contentIndex: 0, - delta: 'hello world', - partial: message, - }); - stream.push({ - type: 'text_end', - contentIndex: 0, - content: 'hello world', - partial: message, - }); - stream.push({ type: 'done', reason: 'stop', message }); - stream.end(message); - }); - - return stream; - }, - }; - - const kit = new AgentKit().addProvider(provider); + const kit = new AgentKit().addProvider( + createScriptedProvider({ + responses: [createAssistantMessage(), createAssistantMessage()], + }) + ); const chunks: string[] = []; await kit.generate( { model: 'demo', prompt: 'hi', stream: true }, @@ -284,29 +233,17 @@ describe('agentic-kit core', () => { }); it('rejects legacy generate when a provider returns a terminal error in non-stream mode', async () => { - const provider: ProviderAdapter & { name: string } = { - api: 'fake-api', - provider: 'fake', - name: 'fake', - createModel: () => createFakeModel(), - stream: () => { - const stream = createAssistantMessageEventStream(); - const failure = createAssistantMessage({ - stopReason: 'error', - errorMessage: 'provider failed', - content: [{ type: 'text', text: '' }], - }); - - queueMicrotask(() => { - stream.push({ type: 'error', reason: 'error', error: failure }); - stream.end(failure); - }); - - return stream; - }, - }; - - const kit = new AgentKit().addProvider(provider); + const kit = new AgentKit().addProvider( + createScriptedProvider({ + responses: [ + createAssistantMessage({ + stopReason: 'error', + errorMessage: 'provider failed', + content: [{ type: 'text', text: '' }], + }), + ], + }) + ); const onComplete = jest.fn(); const onError = jest.fn(); const onStateChange = jest.fn(); @@ -324,44 +261,17 @@ describe('agentic-kit core', () => { }); it('rejects legacy generate when a provider returns a terminal error in stream mode', async () => { - const provider: ProviderAdapter & { name: string } = { - api: 'fake-api', - provider: 'fake', - name: 'fake', - createModel: () => createFakeModel(), - stream: () => { - const stream = createAssistantMessageEventStream(); - const partial = createAssistantMessage({ - content: [{ type: 'text', text: 'partial' }], - }); - const failure = createAssistantMessage({ - stopReason: 'error', - errorMessage: 'provider failed', - content: [{ type: 'text', text: 'partial' }], - }); - - queueMicrotask(() => { - stream.push({ type: 'start', partial: { ...partial, content: [{ type: 'text', text: '' }] } }); - stream.push({ - type: 'text_start', - contentIndex: 0, - partial: { ...partial, content: [{ type: 'text', text: '' }] }, - }); - stream.push({ - type: 'text_delta', - contentIndex: 0, - delta: 'partial', - partial, - }); - stream.push({ type: 'error', reason: 'error', error: failure }); - stream.end(failure); - }); - - return stream; - }, - }; - - const kit = new AgentKit().addProvider(provider); + const kit = new AgentKit().addProvider( + createScriptedProvider({ + responses: [ + createAssistantMessage({ + stopReason: 'error', + errorMessage: 'provider failed', + content: [{ type: 'text', text: 'partial' }], + }), + ], + }) + ); const chunks: string[] = []; const onComplete = jest.fn(); const onError = jest.fn(); diff --git a/packages/agentic-kit/__tests__/tsconfig.json b/packages/agentic-kit/__tests__/tsconfig.json index 6c4fda5..3ae83c4 100644 --- a/packages/agentic-kit/__tests__/tsconfig.json +++ b/packages/agentic-kit/__tests__/tsconfig.json @@ -2,9 +2,19 @@ "extends": "../tsconfig.json", "compilerOptions": { "noEmit": true, - "rootDir": "..", + "rootDir": "../../..", + "baseUrl": "../../..", + "paths": { + "@test/*": ["tools/test/*"], + "agentic-kit": ["packages/agentic-kit/src"], + "@agentic-kit/agent": ["packages/agent/src"] + }, "types": ["jest", "node"] }, - "include": ["./**/*.ts", "../src/**/*.ts"], + "include": [ + "./**/*.ts", + "../src/**/*.ts", + "../../../tools/test/**/*.ts" + ], "exclude": ["../dist", "../node_modules"] } diff --git a/packages/agentic-kit/jest.config.js b/packages/agentic-kit/jest.config.js index c539b86..79ccd00 100644 --- a/packages/agentic-kit/jest.config.js +++ b/packages/agentic-kit/jest.config.js @@ -17,6 +17,8 @@ module.exports = { modulePathIgnorePatterns: ['dist/*'], moduleNameMapper: { '^(\\.{1,2}/.*)\\.js$': '$1', + '^@test/(.*)$': '/../../tools/test/$1', + '^agentic-kit$': '/src', '^@agentic-kit/(.*)$': '/../$1/src', }, setupFilesAfterEnv: ['/jest.setup.js'] diff --git a/packages/anthropic/__tests__/tsconfig.json b/packages/anthropic/__tests__/tsconfig.json index 6c4fda5..3ae83c4 100644 --- a/packages/anthropic/__tests__/tsconfig.json +++ b/packages/anthropic/__tests__/tsconfig.json @@ -2,9 +2,19 @@ "extends": "../tsconfig.json", "compilerOptions": { "noEmit": true, - "rootDir": "..", + "rootDir": "../../..", + "baseUrl": "../../..", + "paths": { + "@test/*": ["tools/test/*"], + "agentic-kit": ["packages/agentic-kit/src"], + "@agentic-kit/agent": ["packages/agent/src"] + }, "types": ["jest", "node"] }, - "include": ["./**/*.ts", "../src/**/*.ts"], + "include": [ + "./**/*.ts", + "../src/**/*.ts", + "../../../tools/test/**/*.ts" + ], "exclude": ["../dist", "../node_modules"] } diff --git a/packages/anthropic/jest.config.js b/packages/anthropic/jest.config.js index e11f478..d0dfaaa 100644 --- a/packages/anthropic/jest.config.js +++ b/packages/anthropic/jest.config.js @@ -15,5 +15,11 @@ module.exports = { testRegex: '(/__tests__/.*|(\\.|/)(test|spec))\\.(jsx?|tsx?)$', moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], modulePathIgnorePatterns: ['dist/*'], + moduleNameMapper: { + '^(\\.{1,2}/.*)\\.js$': '$1', + '^@test/(.*)$': '/../../tools/test/$1', + '^agentic-kit$': '/../agentic-kit/src', + '^@agentic-kit/(.*)$': '/../$1/src', + }, setupFilesAfterEnv: ['/jest.setup.js'] }; diff --git a/packages/ollama/__tests__/tsconfig.json b/packages/ollama/__tests__/tsconfig.json index 6c4fda5..3ae83c4 100644 --- a/packages/ollama/__tests__/tsconfig.json +++ b/packages/ollama/__tests__/tsconfig.json @@ -2,9 +2,19 @@ "extends": "../tsconfig.json", "compilerOptions": { "noEmit": true, - "rootDir": "..", + "rootDir": "../../..", + "baseUrl": "../../..", + "paths": { + "@test/*": ["tools/test/*"], + "agentic-kit": ["packages/agentic-kit/src"], + "@agentic-kit/agent": ["packages/agent/src"] + }, "types": ["jest", "node"] }, - "include": ["./**/*.ts", "../src/**/*.ts"], + "include": [ + "./**/*.ts", + "../src/**/*.ts", + "../../../tools/test/**/*.ts" + ], "exclude": ["../dist", "../node_modules"] } diff --git a/packages/ollama/jest.config.js b/packages/ollama/jest.config.js index 5b89d20..061b4b9 100644 --- a/packages/ollama/jest.config.js +++ b/packages/ollama/jest.config.js @@ -16,5 +16,11 @@ module.exports = { moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], modulePathIgnorePatterns: ['dist/*'], testPathIgnorePatterns: process.env.OLLAMA_LIVE_READY === '1' ? [] : ['\\.live\\.test\\.ts$'], + moduleNameMapper: { + '^(\\.{1,2}/.*)\\.js$': '$1', + '^@test/(.*)$': '/../../tools/test/$1', + '^agentic-kit$': '/../agentic-kit/src', + '^@agentic-kit/(.*)$': '/../$1/src', + }, setupFilesAfterEnv: ['/jest.setup.js'] }; diff --git a/packages/openai/__tests__/tsconfig.json b/packages/openai/__tests__/tsconfig.json index 6c4fda5..3ae83c4 100644 --- a/packages/openai/__tests__/tsconfig.json +++ b/packages/openai/__tests__/tsconfig.json @@ -2,9 +2,19 @@ "extends": "../tsconfig.json", "compilerOptions": { "noEmit": true, - "rootDir": "..", + "rootDir": "../../..", + "baseUrl": "../../..", + "paths": { + "@test/*": ["tools/test/*"], + "agentic-kit": ["packages/agentic-kit/src"], + "@agentic-kit/agent": ["packages/agent/src"] + }, "types": ["jest", "node"] }, - "include": ["./**/*.ts", "../src/**/*.ts"], + "include": [ + "./**/*.ts", + "../src/**/*.ts", + "../../../tools/test/**/*.ts" + ], "exclude": ["../dist", "../node_modules"] } diff --git a/packages/openai/jest.config.js b/packages/openai/jest.config.js index e11f478..d0dfaaa 100644 --- a/packages/openai/jest.config.js +++ b/packages/openai/jest.config.js @@ -15,5 +15,11 @@ module.exports = { testRegex: '(/__tests__/.*|(\\.|/)(test|spec))\\.(jsx?|tsx?)$', moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], modulePathIgnorePatterns: ['dist/*'], + moduleNameMapper: { + '^(\\.{1,2}/.*)\\.js$': '$1', + '^@test/(.*)$': '/../../tools/test/$1', + '^agentic-kit$': '/../agentic-kit/src', + '^@agentic-kit/(.*)$': '/../$1/src', + }, setupFilesAfterEnv: ['/jest.setup.js'] }; diff --git a/tests/integration/README.md b/tests/integration/README.md new file mode 100644 index 0000000..3ec22dd --- /dev/null +++ b/tests/integration/README.md @@ -0,0 +1,14 @@ +# Integration tests + +Workspace-level lane that runs in-process integration tests against the kit's +HTTP boundary. Brings up `http.createServer`, exercises real serialization +and `fetch`, with mocked providers. + +Empty in Phase 0 — scaffolding only. First tests land with Phase 1.3 (run +serialization helpers) and 1.1 (pause/resume). Run with: + +```sh +pnpm test:integration +``` + +`passWithNoTests` is set, so the script is safe to run while empty. diff --git a/tests/integration/jest.config.js b/tests/integration/jest.config.js new file mode 100644 index 0000000..4507567 --- /dev/null +++ b/tests/integration/jest.config.js @@ -0,0 +1,24 @@ +/** @type {import('ts-jest').JestConfigWithTsJest} */ +module.exports = { + preset: 'ts-jest', + testEnvironment: 'node', + rootDir: '.', + passWithNoTests: true, + transform: { + '^.+\\.tsx?$': [ + 'ts-jest', + { + babelConfig: false, + tsconfig: '/tsconfig.json', + }, + ], + }, + testRegex: '\\.test\\.(jsx?|tsx?)$', + moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'], + moduleNameMapper: { + '^(\\.{1,2}/.*)\\.js$': '$1', + '^@test/(.*)$': '/../../tools/test/$1', + '^agentic-kit$': '/../../packages/agentic-kit/src', + '^@agentic-kit/(.*)$': '/../../packages/$1/src', + }, +}; diff --git a/tests/integration/tsconfig.json b/tests/integration/tsconfig.json new file mode 100644 index 0000000..f287326 --- /dev/null +++ b/tests/integration/tsconfig.json @@ -0,0 +1,18 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "noEmit": true, + "rootDir": "../..", + "baseUrl": "../..", + "paths": { + "@test/*": ["tools/test/*"], + "agentic-kit": ["packages/agentic-kit/src"], + "@agentic-kit/agent": ["packages/agent/src"], + "@agentic-kit/anthropic": ["packages/anthropic/src"], + "@agentic-kit/openai": ["packages/openai/src"], + "@agentic-kit/ollama": ["packages/ollama/src"] + }, + "types": ["jest", "node"] + }, + "include": ["./**/*.ts", "../../tools/test/**/*.ts"] +} diff --git a/tools/test/README.md b/tools/test/README.md new file mode 100644 index 0000000..0513eb6 --- /dev/null +++ b/tools/test/README.md @@ -0,0 +1,24 @@ +# Shared test helpers + +Repo-internal helpers for unit tests. Imported via the `@test/*` tsconfig path +alias (see each package's `__tests__/tsconfig.json` and `jest.config.js`). +Not a workspace package, not published. + +## Helpers + +- `makeFakeModel(overrides?)` — `ModelDescriptor` with sane defaults. +- `makeFakeAssistantMessage(overrides?)` — `AssistantMessage` with zero usage and stop reason. +- `createScriptedProvider({ responses })` — `ProviderAdapter` that emits a derived event sequence per `AssistantMessage` in `responses` on successive `stream()` calls. `stopReason` of `error` or `aborted` produces an `error` terminal event; otherwise `done`. +- `createScriptedSSEResponse(events)` — `Response` whose body serializes each `AgentEvent` as one SSE frame (`data: \n\n`). +- `parseSSEStream(stream)` — async iterable that parses `AgentEvent` SSE frames from a `ReadableStream`. Handles split chunks, multi-line `data:`, comment lines, event-type framing, trailing newlines, and mid-event abort (incomplete trailing event is dropped, per SSE spec). + +## Deferred + +`runRunStoreContractTests(makeStore)` lands with Phase 1.2 alongside the +`RunStore` interface. Adding it now would be dead scaffolding. + +## Adding a helper + +Promote a helper to `tools/test/` only when a third package needs the same +idiom. Duplication of a 30-line scripted helper across two packages is fine; +duplicating across three is the trigger for promotion. diff --git a/tools/test/fixtures.ts b/tools/test/fixtures.ts new file mode 100644 index 0000000..003fa2b --- /dev/null +++ b/tools/test/fixtures.ts @@ -0,0 +1,40 @@ +import type { AssistantMessage, ModelDescriptor, Usage } from 'agentic-kit'; + +const ZERO_USAGE: Usage = { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, + totalTokens: 0, + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, +}; + +export function makeFakeModel(overrides: Partial = {}): ModelDescriptor { + return { + id: 'demo', + name: 'Demo', + api: 'fake-api', + provider: 'fake', + baseUrl: 'http://fake.local', + input: ['text'], + reasoning: false, + tools: true, + ...overrides, + }; +} + +export function makeFakeAssistantMessage( + overrides: Partial = {} +): AssistantMessage { + return { + role: 'assistant', + api: 'fake-api', + provider: 'fake', + model: 'demo', + usage: { ...ZERO_USAGE, cost: { ...ZERO_USAGE.cost } }, + stopReason: 'stop', + timestamp: Date.now(), + content: [{ type: 'text', text: '' }], + ...overrides, + }; +} diff --git a/tools/test/index.ts b/tools/test/index.ts new file mode 100644 index 0000000..4c8ab2e --- /dev/null +++ b/tools/test/index.ts @@ -0,0 +1,3 @@ +export { makeFakeAssistantMessage, makeFakeModel } from './fixtures'; +export { createScriptedProvider, type ScriptedProviderOptions } from './scripted-provider'; +export { createScriptedSSEResponse, parseSSEStream } from './scripted-sse'; diff --git a/tools/test/scripted-provider.ts b/tools/test/scripted-provider.ts new file mode 100644 index 0000000..da43921 --- /dev/null +++ b/tools/test/scripted-provider.ts @@ -0,0 +1,115 @@ +import { + type AssistantMessage, + type AssistantMessageEvent, + createAssistantMessageEventStream, + type ModelDescriptor, + type ProviderAdapter, +} from 'agentic-kit'; + +import { makeFakeAssistantMessage, makeFakeModel } from './fixtures'; + +export interface ScriptedProviderOptions { + responses: AssistantMessage[]; + delayMs?: number; + api?: string; + provider?: string; +} + +export function createScriptedProvider(opts: ScriptedProviderOptions): ProviderAdapter { + const api = opts.api ?? 'fake-api'; + const provider = opts.provider ?? 'fake'; + let callIndex = 0; + + return { + api, + provider, + createModel: (modelId: string, overrides?: Partial) => + makeFakeModel({ id: modelId, api, provider, ...overrides }), + stream: () => { + const stream = createAssistantMessageEventStream(); + const message = + opts.responses[callIndex++] ?? + makeFakeAssistantMessage({ + api, + provider, + stopReason: 'error', + errorMessage: 'scripted provider: no response queued for this call', + content: [], + }); + + const events = deriveEventSequence(message); + const emit = () => { + for (const event of events) { + stream.push(event); + } + stream.end(message); + }; + + if (opts.delayMs && opts.delayMs > 0) { + setTimeout(emit, opts.delayMs); + } else { + queueMicrotask(emit); + } + + return stream; + }, + }; +} + +function deriveEventSequence(message: AssistantMessage): AssistantMessageEvent[] { + const events: AssistantMessageEvent[] = []; + events.push({ type: 'start', partial: message }); + + for (let i = 0; i < message.content.length; i++) { + const block = message.content[i]; + if (block.type === 'text') { + events.push({ type: 'text_start', contentIndex: i, partial: message }); + if (block.text.length > 0) { + events.push({ + type: 'text_delta', + contentIndex: i, + delta: block.text, + partial: message, + }); + } + events.push({ + type: 'text_end', + contentIndex: i, + content: block.text, + partial: message, + }); + } else if (block.type === 'thinking') { + events.push({ type: 'thinking_start', contentIndex: i, partial: message }); + if (block.thinking.length > 0) { + events.push({ + type: 'thinking_delta', + contentIndex: i, + delta: block.thinking, + partial: message, + }); + } + events.push({ + type: 'thinking_end', + contentIndex: i, + content: block.thinking, + partial: message, + }); + } else if (block.type === 'toolCall') { + events.push({ type: 'toolcall_start', contentIndex: i, partial: message }); + events.push({ + type: 'toolcall_end', + contentIndex: i, + toolCall: block, + partial: message, + }); + } + } + + if (message.stopReason === 'error' || message.stopReason === 'aborted') { + events.push({ type: 'error', reason: message.stopReason, error: message }); + } else { + events.push({ type: 'done', reason: message.stopReason, message }); + } + + return events; +} diff --git a/tools/test/scripted-sse.ts b/tools/test/scripted-sse.ts new file mode 100644 index 0000000..ae7497e --- /dev/null +++ b/tools/test/scripted-sse.ts @@ -0,0 +1,88 @@ +import type { AgentEvent } from '@agentic-kit/agent'; + +export function createScriptedSSEResponse(events: AgentEvent[]): Response { + const encoder = new TextEncoder(); + const body = new ReadableStream({ + start(controller) { + for (const event of events) { + controller.enqueue(encoder.encode(`data: ${JSON.stringify(event)}\n\n`)); + } + controller.close(); + }, + }); + + return new Response(body, { + status: 200, + headers: { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + Connection: 'keep-alive', + }, + }); +} + +export async function* parseSSEStream( + stream: ReadableStream +): AsyncIterable { + const reader = stream.getReader(); + const decoder = new TextDecoder('utf-8'); + let buffer = ''; + + try { + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + + buffer += decoder.decode(value, { stream: true }); + buffer = buffer.replace(/\r\n/g, '\n').replace(/\r/g, '\n'); + + let blankIdx = buffer.indexOf('\n\n'); + while (blankIdx !== -1) { + const rawEvent = buffer.slice(0, blankIdx); + buffer = buffer.slice(blankIdx + 2); + const event = parseEvent(rawEvent); + if (event) { + yield event; + } + blankIdx = buffer.indexOf('\n\n'); + } + } + } finally { + reader.releaseLock(); + } +} + +function parseEvent(raw: string): AgentEvent | null { + const dataLines: string[] = []; + for (const line of raw.split('\n')) { + if (line === '' || line.startsWith(':')) { + continue; + } + const colon = line.indexOf(':'); + const field = colon === -1 ? line : line.slice(0, colon); + let value = colon === -1 ? '' : line.slice(colon + 1); + if (value.startsWith(' ')) { + value = value.slice(1); + } + if (field === 'data') { + dataLines.push(value); + } + } + + if (dataLines.length === 0) { + return null; + } + + const data = dataLines.join('\n'); + if (data === '[DONE]') { + return null; + } + + try { + return JSON.parse(data) as AgentEvent; + } catch { + return null; + } +}