diff --git a/ROADMAP.md b/ROADMAP.md
new file mode 100644
index 0000000..507b319
--- /dev/null
+++ b/ROADMAP.md
@@ -0,0 +1,606 @@
+# Agentic Kit Roadmap
+
+This document plans the next phases of work for `agentic-kit`. It supersedes
+neither `REDESIGN_DECISIONS.md` nor `README.md` — those describe what exists.
+This describes what will exist next, why, and what is explicitly out of scope.
+
+## Current State (snapshot)
+
+| Package                  | Status                                                                                                       |
+| ------------------------ | ------------------------------------------------------------------------------------------------------------ |
+| `agentic-kit`            | Core portability layer. Streaming, message model, providers registry, cross-provider transforms, usage/cost. |
+| `@agentic-kit/agent`     | Sequential agent loop. Tool execution, lifecycle events, abort/continue, JSON Schema validation.             |
+| `@agentic-kit/anthropic` | Provider adapter. Streaming, thinking, tool calls, multimodal, abort.                                        |
+| `@agentic-kit/openai`    | Provider adapter. Streaming, reasoning, tool calls, multimodal, abort. OpenAI-compatible endpoints.          |
+| `@agentic-kit/ollama`    | Provider adapter. Local inference, embeddings. **Tool execution in streaming is a stub.**                    |
+
+The agent loop today runs to completion in-process: it does not pause for
+out-of-band input and has no transport layer above it. Consumers wire it into
+their own HTTP layer and supply their own React bindings.
+
+## Design Principles (carried forward)
+
+- Provider-agnostic core; OpenAI-compatible is a compatibility class, not a brand.
+- No schema-library coupling at the core (JSON Schema only).
+- Normalize provider differences inward; do not leak them.
+- Runtime-agnostic; consume standard Web platform primitives (`Response`,
+  `ReadableStream`, `AbortSignal`, `fetch`).
+- Headless. The kit ships no opinionated UI.
+- Composable. Core stays minimal; extensions are opt-in packages.
+- Storage is pluggable. Defaults work for development; production swaps in.
+
+## Phase 0 — Test Infrastructure (do first)
+
+Phase 1 cannot land cleanly without a small set of shared test helpers. Build
+these first; everything afterward inherits the same testing idiom.
+
+### 0.1 Test Conventions
+
+Three rules the kit follows:
+
+1. **Deterministic by default.** Every package's default `pnpm test` runs only
+   unit tests against scripted mocks. No network, no API keys, no flakes.
+2. **Live tests are gated and opt-in.** Files named `*.live.test.ts` and
+   workspace scripts like `test:live:*` exist for exercising real provider
+   APIs. Never required in CI by default.
+3. **One environment per package.** Most packages run `testEnvironment: 'node'`.
+   The single exception is `@agentic-kit/react`, which runs `jsdom`. There is
+   no workspace-wide jsdom; the asymmetry is intentional.
+
+### 0.2 Shared Test Helpers (repo-internal, not a package)
+
+The kit needs a small set of reusable test helpers — scripted providers, SSE
+stubs, parsers, contract suites. These live as a **repo-internal directory**,
+not a published package and not a workspace package.
+
+Layout: `tools/test/` at the repo root, plain `.ts` files, imported via a
+tsconfig `paths` alias (e.g., `@test/scripted-provider`) from each package's
+test config. No `package.json`, no version, no public API surface, no
+publishing concerns.
+
+Why not a package:
+- Dev-only code in a `"private": true` workspace package is a publishing
+  ceremony with no upside; the alternative is a directory.
+- Promotes test code to a load-bearing public API the moment a consumer
+  installs it.
+- Reference: AI SDK keeps its test helpers in-package, not as a separate
+  workspace package.
+
+Helpers live wherever they are simplest to maintain: shared idioms in
+`tools/test/`, package-specific helpers co-located in that package's
+`__tests__/`. Duplication of a 30-line scripted provider across packages is
+acceptable; promotion to `tools/test/` happens when a third package needs the
+same helper.
+
+```ts
+// scripted mock provider — replaces inline streamFn boilerplate
+function createScriptedProvider(opts: {
+  responses: AssistantMessageResponse[]
+  delayMs?: number
+}): ProviderAdapter
+
+// SSE response stub for serialization tests and useChat fetch mocks
+function createScriptedSSEResponse(events: AgentEvent[]): Response
+
+// SSE parser for assertions on emitted bytes
+function parseSSEStream(stream: ReadableStream<Uint8Array>): AsyncIterable<AgentEvent>
+
+// portable contract suite for any RunStore implementation
+function runRunStoreContractTests(makeStore: () => RunStore | Promise<RunStore>): void
+
+// small fixtures
+function makeFakeModel(overrides?: Partial<ModelDescriptor>): ModelDescriptor
+```
+
+Existing tests that inline-construct a scripted provider migrate to use the
+helper as part of this phase. No behavior change; cleanup only.
+
+If a consumer application later wants to write provider-mocking tests of its
+own, it copies the relevant helper (each is small) rather than installing a
+dep. That is intentional.
+
+### 0.3 Integration Test Lane
+
+A workspace-level `pnpm test:integration` script. Brings up
+`http.createServer` in-process, runs `agent.start(...).toResponse()` against
+it, exercises pause/resume across a real HTTP boundary via `fetch`. Mock
+providers, real HTTP, real serialization. Catches wire-format and abort
+regressions that pure unit tests miss.
+
+Optional in Phase 1 PRs; required for any 1.0 release of `@agentic-kit/agent`'s
+new pause/resume APIs.
+
+### 0.4 SSE Wire-Format Tests
+
+A dedicated `__tests__/sse.test.ts` in `@agentic-kit/agent` covers parser
+edge cases: chunks split mid-event, multi-line `data:` lines, comment lines,
+event-type framing, trailing newlines, mid-event abort. Easy to under-test,
+easy to break silently. Hand-crafted byte sequences only; no provider in
+the loop.
+
+---
+
+## Phase 1 — Pause/Resume + React Bindings (must)
+
+The single architectural change behind Phase 1: the agent loop becomes
+**checkpoint-able**. Tools may declare a `decision` schema; when the loop hits
+such a tool, it persists run state, emits a structured event, and waits for a
+matching decision payload before continuing. Everything else in Phase 1 follows
+from this.
+
+### 1.1 Pausable Tools
+
+#### Problem
+
+Many real agent flows need structured input from outside the loop before a
+tool can be considered safe or actionable: human approval on destructive
+operations, multi-choice routing on a generated proposal, signed authorization,
+delayed completion of a long-running external job. Today the loop has no way
+to express this — tools must either run unconditionally or be elided.
+
+#### Design
+
+Extend `AgentTool` with an optional `decision` JSON Schema. The agent loop:
+
+1. When the LLM emits a call to a tool that declares `decision`:
+   - Validate the LLM's input against `parameters` as today.
+   - Emit a `tool_decision_pending` event with the input and the schema.
+   - Persist the run via the configured `RunStore` (see 1.2).
+   - Halt the loop and return.
+2. The host invokes `agent.resume(runId, decision)`:
+   - Load run state from the `RunStore`.
+   - Validate `decision` against the tool's `decision` schema.
+   - Call `tool.execute(input, decision, ctx)`.
+   - Continue the loop with the result.
+
+Tools without a `decision` schema run as today — synchronously inside the loop.
+
+#### API
+
+```ts
+interface AgentTool extends ToolDefinition {
+  label: string
+  decision?: JsonSchema           // optional; declares structured outside-input
+  execute(
+    toolCallId: string,
+    input: Record<string, unknown>,
+    decision: unknown,            // undefined for non-pausable tools
+    signal?: AbortSignal,
+    onUpdate?: (partial: AgentToolResult) => void,
+  ): Promise<AgentToolResult>
+}
+
+class Agent {
+  // existing
+  prompt(input: string | Message): Promise<void>
+  abort(): void
+
+  // new
+  resume(runId: string, decision: unknown): Promise<void>
+}
+
+type AgentEvent =
+  // ... existing events
+  | { type: 'tool_decision_pending'
+      runId: string
+      toolCallId: string
+      toolName: string
+      input: Record<string, unknown>
+      schema: JsonSchema }
+```
+
+A pausable tool with no `decision` is invalid — the field's presence is the
+mechanism. Validation runs before `execute` is called; a malformed decision
+rejects with a typed error and does not consume the run.
+
+#### Naming
+
+The field is named **`decision`** because the dominant case is a user or
+upstream system choosing how the tool should proceed. The variable inside
+`execute` is also `decision`; React surfaces it as `respondWithDecision`. If
+later phases introduce a categorically different out-of-band input (e.g., raw
+results from a client-executed tool), it gets a sibling field with its own
+shape — the kit does not over-generalize now.
+
+#### Testing
+
+Unit tests in `@agentic-kit/agent`. Uses `createScriptedProvider` from 0.2.
+
+- Scripted provider emits a tool call to a `decision`-bearing tool. Assert:
+  `tool_decision_pending` event emitted, `runStore.save` called, loop halted.
+- `agent.resume(runId, valid)` with a fresh scripted response. Assert:
+  `tool.execute` invoked with the decision argument, loop continues, final
+  event emitted.
+- Resume with a decision that fails schema validation. Assert: typed
+  validation error, run not consumed, retry permitted.
+- Resume with non-existent `runId`. Assert: typed `RunNotFound` error.
+- `agent.abort()` while paused. Assert: clean cancellation, run cleaned up.
+- Tool without `decision` still runs synchronously (regression guard).
+
+### 1.2 RunStore
+
+#### Problem
+
+Pause/resume across HTTP requests requires the loop's state to survive between
+the pause and the resume call. The kit must define where that state lives
+without forcing a specific backend on consumers.
+
+#### Design
+
+A small interface plus a default implementation. The kit owns the schema of
+what gets persisted (the run record); the consumer owns where it lands.
+
+```ts
+interface AgentRun {
+  id: string
+  model: string
+  systemPrompt?: string
+  tools: ToolDefinition[]
+  messages: Message[]
+  pending?: {
+    toolCallId: string
+    toolName: string
+    input: Record<string, unknown>
+  }
+  createdAt: number
+  updatedAt: number
+}
+
+interface RunStore {
+  save(run: AgentRun): Promise<void>
+  load(id: string): Promise<AgentRun | undefined>
+  delete(id: string): Promise<void>
+}
+
+class MemoryRunStore implements RunStore { /* default, ephemeral */ }
+```
+
+`@agentic-kit/agent` ships `MemoryRunStore` for development and single-process
+deployments. Production users supply a Redis-, KV-, or DB-backed implementation.
+The kit ships no production backend.
+
+The kit deliberately does **not** persist final conversation history. That is a
+consumer concern. See 1.4 for lifecycle hooks.
+
+#### Testing
+
+Unit tests in `@agentic-kit/agent`.
+
+- `MemoryRunStore`: save → load round-trip; `load` of missing id returns
+  `undefined`; `delete` is idempotent; `delete` then `load` returns `undefined`.
+- `runRunStoreContractTests(makeMemoryStore)` from 0.2 runs the portable
+  contract suite against `MemoryRunStore`. The same export is consumed by
+  any third-party `RunStore` implementation.
+- Concurrent save/load on the same id (last write wins, no torn reads).
+- Re-pause `createdAt` preservation: a second `save()` of the same run id keeps
+  the original `createdAt`; only `updatedAt` advances. (1.1 does not yet
+  enforce this — fold into the contract suite.)
+- Abort-during-save race: `agent.abort()` while a `runStore.save()` is
+  in-flight resolves without orphaning the persisted record or surfacing a
+  rejected save promise.
+- Mixed-batch tool ordering: when an assistant turn contains a regular tool
+  call followed by a decision-bearing tool whose arguments fail validation,
+  the persisted `messages` order matches the LLM's tool-call order. (Latent
+  in 1.1's invalid-args branch; surfaces only via the contract suite.)
+
+### 1.3 Run Serialization Helpers
+
+#### Problem
+
+The agent emits a stream of typed events. To use it across an HTTP boundary —
+or any boundary that requires bytes — the consumer needs to serialize. The
+kit should ship the canonical form so consumers do not reinvent it.
+
+#### Design
+
+Standard Web primitives only. No framework helpers. The agent run object
+exposes both pull-based and push-based access.
+
+```ts
+interface AgentRunHandle {
+  events(): AsyncIterable<AgentEvent>
+  toReadableStream(): ReadableStream<AgentEvent>
+  toResponse(init?: ResponseInit): Response   // SSE-shaped body
+}
+
+const handle = agent.start({ messages, ... })
+return handle.toResponse()
+```
+
+`toResponse` returns a `Response` with `Content-Type: text/event-stream`, each
+`AgentEvent` serialized as one SSE frame. Compatible with any runtime that
+speaks standard `Response` and `ReadableStream`: Next.js App Router, Hono,
+Bun, Deno, Cloudflare Workers, raw Node 18+.
+
+A symmetric pair handles resume:
+
+```ts
+const handle = agent.resumeRun({ runId, decision, runStore })
+return handle.toResponse()
+```
+
+The wire format is the kit's `AgentEvent` discriminated union, serialized as
+JSON in SSE `data:` lines. No translation to any third-party protocol; if a
+consumer wants to bridge to one, they write the bridge.
+
+#### Testing
+
+Unit tests in `@agentic-kit/agent`.
+
+- `events()`: scripted provider events come out of the async iterable in
+  emission order with correct shapes.
+- `toReadableStream()`: bytes parsed back via `parseSSEStream` (from 0.2)
+  reproduce the original event sequence.
+- `toResponse()`: assert `Content-Type: text/event-stream`, no caching headers,
+  body parses as above.
+- Wire-format edge cases live in `__tests__/sse.test.ts` (0.4): split chunks,
+  multi-line `data:`, comments, trailing newlines, mid-event abort.
+- Backpressure: stream consumer pauses; producer respects it (no unbounded
+  buffer).
+
+### 1.4 `@agentic-kit/react`
+
+#### Problem
+
+The dominant consumer surface is browser UIs that stream from an agent endpoint.
+A canonical React hook avoids every consumer reimplementing the same fetch /
+parse / state-update / abort / resume loop.
+
+#### Design
+
+One hook. Headless — returns state and actions; renders nothing. Persistence
+is delegated to the consumer via lifecycle callbacks.
+
+```ts
+import { useChat } from '@agentic-kit/react'
+
+const chat = useChat({
+  api: '/api/chat',
+  body: () => ({ /* extra request body fields */ }),
+  initialMessages: storedMessages,
+  onMessage: (m) => {},          // streaming partial state
+  onFinish: (m) => {},           // turn complete; consumer may persist
+  onDecisionPending: (event) => {},  // tool paused; consumer renders UI
+})
+
+chat.send('hello')
+chat.respondWithDecision(value)  // delivers decision to /resume
+chat.abort()
+chat.messages         // Message[]
+chat.isStreaming      // boolean
+chat.pendingDecision  // event | undefined
+chat.error            // unknown | undefined
+```
+
+Behaviors the hook is responsible for:
+
+- POSTing to `api` with `messages` plus any consumer-supplied body fields.
+- Parsing the SSE response into `AgentEvent`s and folding them into `messages`.
+- Emitting `onMessage` per partial update, `onFinish` per turn end.
+- Surfacing `tool_decision_pending` events as `chat.pendingDecision` and via
+  `onDecisionPending`.
+- Rebroadcasting `respondWithDecision(value)` as a POST to `/resume` (path
+  configurable) with `{ runId, decision }`, and resuming stream consumption
+  from the response.
+- Plumbing an `AbortSignal` through `chat.abort()`.
+
+The hook does not own persistence, modes, system prompts, or any UI shape.
+
+#### Testing
+
+The only package using `testEnvironment: 'jsdom'`. Adds devDeps:
+`jest-environment-jsdom`, `@testing-library/react`, `react`, `react-dom`. Adds
+peerDeps: `react`, `react-dom`. `globalThis.fetch` is stubbed per-test to
+return `createScriptedSSEResponse(events)` from 0.2.
+
+- Send → stream → finish: messages assemble in order; `isStreaming` transitions;
+  `onMessage` and `onFinish` fire with correct payloads.
+- `body()` callback's fields appear in the POST body.
+- `chat.abort()` reaches the fetch mock's `AbortSignal`; state cleans up; no
+  late updates after abort.
+- Decision-pending: `onDecisionPending` fires; `chat.pendingDecision` set;
+  `respondWithDecision(value)` POSTs to `/resume` with `{ runId, decision }`;
+  the resumed stream folds into `messages`.
+- Network error / non-200 response: `chat.error` set; `messages` not corrupted.
+- Malformed SSE bytes: hook surfaces an error rather than crashing.
+- `initialMessages` hydrates state on mount.
+
+---
+
+## Phase 2 — Production Polish (should)
+
+### 2.1 Prompt Caching API
+
+The kit currently reads `cacheRead` and `cacheWrite` from `Usage` but exposes
+no API to *set* cache control on outgoing messages. Both Anthropic and OpenAI
+(via Anthropic-compatible providers and recent OpenAI features) support
+prompt caching, and the cost savings are material at scale.
+
+Design sketch: add an optional `cache?: 'short' | 'long'` flag at the message
+level (or at content-block level). Each provider adapter translates to its
+native control mechanism (Anthropic `cache_control: { type: 'ephemeral' }`,
+OpenAI cache strategy hints). The flag is advisory; providers without support
+ignore it.
+
+#### Testing
+
+Unit tests per provider adapter, matching the existing
+`anthropic.test.ts` / `openai.test.ts` idiom.
+
+- Mock HTTP intercepts the outgoing request body and headers.
+- Build a `Context` whose messages carry `cache: 'short' | 'long'`.
+- Anthropic: assert `cache_control: { type: 'ephemeral' }` on flagged blocks.
+- OpenAI: assert the corresponding native cache hint.
+- Ollama and other no-support providers: assert the flag is silently ignored,
+  no error.
+- `Usage.cacheRead` / `cacheWrite` are populated correctly on the assistant
+  response (existing usage assertion pattern).
+
+### 2.2 Telemetry / Middleware Hooks
+
+The agent loop today has no insertion points for observability or
+interception. Production consumers need at minimum:
+
+- A `before/after` provider call hook (latency, errors, token counts).
+- A `before/after` tool call hook (arguments, results, durations).
+- Stream event tap (without buffering the stream).
+
+Design as middleware composition over the run, akin to a small async
+interceptor chain. Standard error type for transient vs. terminal failures
+to support upstream retry logic.
+
+#### Testing
+
+Unit tests in `@agentic-kit/agent`.
+
+- Register middleware, run a scripted loop, assert hook invocation order and
+  arguments (provider request, response, tool call, tool result).
+- Multiple middlewares compose left-to-right with predictable ordering.
+- A throwing middleware does not crash the loop; the error surfaces via the
+  defined channel.
+- `before/after` pairs see matching correlation IDs (request ↔ response).
+- Stream-event tap does not buffer or reorder events.
+
+---
+
+## Phase 3 — Optional Extensions (could)
+
+### 3.1 Full Ollama Tool Support
+
+The Ollama adapter currently does not parse tool calls in streaming responses.
+Bring it to feature parity with the Anthropic and OpenAI adapters: tool call
+deltas, tool result round-trips, and live tests covering the full loop.
+
+#### Testing
+
+- Unit: parse scripted Ollama NDJSON tool-call chunks; assert the canonical
+  `AssistantMessageEvent` sequence is emitted.
+- Live (gated, in `ollama.live.test.ts`): tool-using smoke test against a
+  known-good local Ollama model. Skipped when `OLLAMA_LIVE_MODEL` is unset.
+
+### 3.2 Retry / Backoff
+
+A small built-in retry policy for transient provider failures (HTTP 408, 425,
+429, 500, 502, 503, 504; aborted-not-by-user network errors). Configurable
+attempt count, jittered exponential backoff. Disabled by default — consumers
+opt in. Layered above provider adapters, below the agent loop.
+
+#### Testing
+
+Unit tests using an injectable clock.
+
+- Provider mock returns scripted transient errors then success; assert retry
+  count and final outcome.
+- Backoff timings match the configured curve (use a fake clock; never sleep
+  for real in tests).
+- Non-retriable errors (400, 401, 403) fail immediately; no retries attempted.
+- Abort during a retry wait cancels promptly; no further attempts.
+- Retries respect a global deadline; total time bounded.
+
+### 3.3 Stream Resume on Disconnect
+
+If the agent loop is mid-run when the SSE connection drops, the client should
+be able to reconnect with the run ID and pick up where it left off. The
+machinery is largely a free side-effect of `RunStore` — the run survives;
+only stream-position tracking and an event replay endpoint are new. Useful
+for flaky-network and long-running flows.
+
+#### Testing
+
+- Unit: abort an in-flight `events()` iterator. Reload the run by id and call
+  `resumeRun`. Assert: events continue from the last-emitted checkpoint, no
+  duplicate side effects.
+- Integration (lane from 0.3): same flow over real HTTP — drop the connection
+  mid-stream, reconnect with `runId`, assert event continuity and correct
+  `Last-Event-ID` semantics.
+
+### 3.4 Client-Side Tool Execution
+
+For tools that genuinely require browser-only capabilities (DOM access,
+WebRTC, File System Access API, locally-running services, hardware bridges,
+wallet signing), introduce a `runs: 'client'` flag. The mechanism reuses the
+pause/resume rails: such tools emit a `tool_client_execute_pending` event,
+the browser-side dispatcher runs the registered local executor, and the
+result returns via the same resume endpoint shape.
+
+This is deferred until a real use case appears. Most agent applications do
+not need it, and shipping it prematurely would constrain the design.
+
+#### Testing
+
+- Unit (in `@agentic-kit/agent`): protocol layer only. Scripted provider
+  emits a `runs: 'client'` tool call. Assert: `tool_client_execute_pending`
+  event fires, loop halts. `agent.resume(runId, { result })` continues with
+  the supplied result as the tool result.
+- Unit (in `@agentic-kit/react`, jsdom): client dispatcher. Register a local
+  executor, fire a synthetic pending event, assert: executor runs with the
+  tool input, resulting POST to `/resume` includes the correct payload, the
+  resumed stream folds into `messages`.
+
+---
+
+## Non-Goals
+
+The kit will not ship the following. They belong in consumer applications,
+companion packages, or other ecosystems entirely.
+
+- **Conversation history persistence.** Lifecycle hooks expose what is needed;
+  storage is consumer-owned. Browser, server, sync model — none of it is the
+  kit's call.
+- **Structured output / `generateObject` analog.** Tool calls already provide
+  typed structured outputs via JSON Schema. A second mechanism is redundant.
+- **Schema library coupling.** No `@agentic-kit/zod`, no `@agentic-kit/typebox`.
+  Consumers convert their schema library of choice to JSON Schema at the
+  boundary; this is a one-line operation for every popular library.
+- **Framework-specific helpers.** No Next.js, Hono, Express, Fastify packages.
+  Standard `Response` and `ReadableStream` cover all of them.
+- **UI rendering / component library.** The kit is headless. React hook
+  exposes state and actions; consumers render however they want.
+- **Embeddings as a primary capability.** Per `REDESIGN_DECISIONS.md` #14,
+  embeddings live behind an optional capability interface or companion
+  package, not in the conversational core.
+- **System prompt construction utilities.** Prompt design is consumer-owned.
+- **Conversation modes / agent personas.** Application concern.
+- **Built-in production storage backends.** `MemoryRunStore` is the only
+  implementation the kit ships; Redis, KV, DB backends are for consumers.
+
+## Package Layout After Phase 1
+
+| Package                  | Change                                                                                      |
+| ------------------------ | ------------------------------------------------------------------------------------------- |
+| `agentic-kit`            | unchanged                                                                                   |
+| `@agentic-kit/agent`     | extended: pausable tools, `RunStore`, run serialization helpers, middleware hooks (Phase 2) |
+| `@agentic-kit/anthropic` | unchanged in Phase 1; caching API in Phase 2                                                |
+| `@agentic-kit/openai`    | unchanged in Phase 1; caching API in Phase 2                                                |
+| `@agentic-kit/ollama`    | unchanged in Phase 1; tool support in Phase 3                                               |
+| `@agentic-kit/react`     | **new** — `useChat` hook                                                                    |
+
+Shared test helpers live in `tools/test/` (repo-internal directory, not a
+package). Phase 2 and 3 add no new packages; everything extends in place.
+
+## Open Questions
+
+- **Run record schema versioning.** Once `RunStore` is shipped, the on-disk
+  `AgentRun` shape becomes a compatibility surface. Decide on an explicit
+  version field and migration story before 1.0.
+- **Decision schema validator scope.** Resolved (1.1): the decision validator
+  reuses `validateSchema` from `packages/agent/src/validation.ts` — same code
+  path as tool inputs. Discriminated-union and `oneOf` / `anyOf` coverage is
+  still untested; fold into the 1.2 contract suite.
+- **Lifecycle events across pause boundaries.** On resume, `agent_start`
+  re-fires (each `runLoop` entry is a fresh start) but `turn_start` does not
+  (the persisted assistant message is reused, not regenerated). This
+  asymmetry is invisible to a single-prompt consumer but matters for any
+  listener that tracks turn vs. run lifecycle. Decide before 1.4 whether to
+  introduce a distinct `agent_resume` event or to redocument `agent_start`
+  with explicit "loop entry" semantics — the `@agentic-kit/react` hook will
+  codify whichever choice externally.
+- **SSE vs. NDJSON.** SSE is the proposed default. NDJSON is simpler but lacks
+  reconnection semantics and event-type framing. Revisit if real-world
+  consumers report SSE problems behind specific proxies.
+- **`onDecisionPending` ergonomics.** Whether the React hook should auto-route
+  the next stream from `/resume` or require the consumer to call a follow-up
+  method explicitly. Default to auto for ergonomics; expose an opt-out.
+- **Live test policy for paid providers.** Anthropic/OpenAI live tests would
+  burn API credits. Default position: gated `*.live.test.ts` files with
+  env-var keys, manually triggered, never required by per-PR CI.
diff --git a/package.json b/package.json
index c5a1ff6..ad4e2f2 100644
--- a/package.json
+++ b/package.json
@@ -19,6 +19,7 @@
     "build": "pnpm -r run build",
     "build:dev": "pnpm -r run build:dev",
     "test": "pnpm -r run test",
+    "test:integration": "jest --config tests/integration/jest.config.js",
     "typecheck": "node ./scripts/typecheck.js",
     "test:live:ollama": "pnpm --filter @agentic-kit/ollama run test:live:smoke",
     "test:live:ollama:extended": "pnpm --filter @agentic-kit/ollama run test:live:extended",
diff --git a/packages/agent/__tests__/agent.test.ts b/packages/agent/__tests__/agent.test.ts
index aa6681c..d724c8f 100644
--- a/packages/agent/__tests__/agent.test.ts
+++ b/packages/agent/__tests__/agent.test.ts
@@ -4,117 +4,43 @@ import {
   createAssistantMessageEventStream,
   type ModelDescriptor,
 } from 'agentic-kit';
+import {
+  createScriptedProvider,
+  makeFakeAssistantMessage,
+  makeFakeModel,
+} from '@test/index';
 
-import { Agent } from '../src';
-
-function createModel(): ModelDescriptor {
-  return {
-    id: 'demo',
-    name: 'Demo',
-    api: 'fake',
-    provider: 'fake',
-    baseUrl: 'http://fake.local',
-    input: ['text'],
-    reasoning: false,
-    tools: true,
-  };
-}
+import {
+  Agent,
+  type AgentEvent,
+  type AgentTool,
+  DecisionValidationError,
+  MemoryRunStore,
+  RunNotFoundError,
+  ToolNotRegisteredError,
+} from '../src';
 
 describe('@agentic-kit/agent', () => {
   it('runs a minimal sequential tool loop', async () => {
     const responses = [
-      {
-        role: 'assistant' as const,
-        api: 'fake',
-        provider: 'fake',
-        model: 'demo',
-        usage: {
-          input: 1,
-          output: 1,
-          cacheRead: 0,
-          cacheWrite: 0,
-          totalTokens: 2,
-          cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-        },
-        stopReason: 'toolUse' as const,
-        timestamp: Date.now(),
+      makeFakeAssistantMessage({
+        usage: makeUsage(),
+        stopReason: 'toolUse',
         content: [
-          { type: 'toolCall' as const, id: 'tool_1', name: 'echo', arguments: { text: 'hello' } },
+          { type: 'toolCall', id: 'tool_1', name: 'echo', arguments: { text: 'hello' } },
         ],
-      },
-      {
-        role: 'assistant' as const,
-        api: 'fake',
-        provider: 'fake',
-        model: 'demo',
-        usage: {
-          input: 1,
-          output: 1,
-          cacheRead: 0,
-          cacheWrite: 0,
-          totalTokens: 2,
-          cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-        },
-        stopReason: 'stop' as const,
-        timestamp: Date.now(),
-        content: [{ type: 'text' as const, text: 'done' }],
-      },
+      }),
+      makeFakeAssistantMessage({
+        usage: makeUsage(),
+        stopReason: 'stop',
+        content: [{ type: 'text', text: 'done' }],
+      }),
     ];
 
-    let callIndex = 0;
-    const streamFn = (_model: ModelDescriptor, _context: Context) => {
-      const stream = createAssistantMessageEventStream();
-      const response = responses[callIndex++];
-
-      queueMicrotask(() => {
-        stream.push({ type: 'start', partial: response });
-        if (response.content[0].type === 'toolCall') {
-          stream.push({
-            type: 'toolcall_start',
-            contentIndex: 0,
-            partial: response,
-          });
-          stream.push({
-            type: 'toolcall_end',
-            contentIndex: 0,
-            toolCall: response.content[0],
-            partial: response,
-          });
-        } else {
-          stream.push({
-            type: 'text_start',
-            contentIndex: 0,
-            partial: response,
-          });
-          stream.push({
-            type: 'text_delta',
-            contentIndex: 0,
-            delta: 'done',
-            partial: response,
-          });
-          stream.push({
-            type: 'text_end',
-            contentIndex: 0,
-            content: 'done',
-            partial: response,
-          });
-        }
-        stream.push({
-          type: 'done',
-          reason: response.stopReason === 'toolUse' ? 'toolUse' : 'stop',
-          message: response,
-        });
-        stream.end(response);
-      });
-
-      return stream;
-    };
-
+    const provider = createScriptedProvider({ responses });
     const agent = new Agent({
-      initialState: {
-        model: createModel(),
-      },
-      streamFn,
+      initialState: { model: makeFakeModel({ id: 'demo', name: 'Demo' }) },
+      streamFn: provider.stream,
     });
 
     agent.setTools([
@@ -129,7 +55,7 @@ describe('@agentic-kit/agent', () => {
           },
           required: ['text'],
         },
-        execute: async (_toolCallId, params) => ({
+        execute: async (_toolCallId, params, _decision) => ({
           content: [{ type: 'text', text: String(params.text) }],
         }),
       },
@@ -155,20 +81,20 @@ describe('@agentic-kit/agent', () => {
 
   it('turns tool argument validation failures into error tool results and continues', async () => {
     const responses = [
-      createAssistantResponse({
+      makeFakeAssistantMessage({
         stopReason: 'toolUse',
         content: [{ type: 'toolCall', id: 'tool_1', name: 'echo', arguments: {} }],
       }),
-      createAssistantResponse({
+      makeFakeAssistantMessage({
         stopReason: 'stop',
         content: [{ type: 'text', text: 'recovered' }],
       }),
     ];
 
-    let callIndex = 0;
+    const provider = createScriptedProvider({ responses });
     const agent = new Agent({
-      initialState: { model: createModel() },
-      streamFn: () => streamMessage(responses[callIndex++]),
+      initialState: { model: makeFakeModel({ id: 'demo', name: 'Demo' }) },
+      streamFn: provider.stream,
     });
 
     const execute = jest.fn(async () => ({
@@ -211,10 +137,10 @@ describe('@agentic-kit/agent', () => {
 
   it('records aborted assistant turns when the active stream is cancelled', async () => {
     const agent = new Agent({
-      initialState: { model: createModel() },
+      initialState: { model: makeFakeModel({ id: 'demo', name: 'Demo' }) },
       streamFn: (_model: ModelDescriptor, _context: Context, options) => {
         const stream = createAssistantMessageEventStream();
-        const partial = createAssistantResponse({
+        const partial = makeFakeAssistantMessage({
           stopReason: 'stop',
           content: [{ type: 'text', text: '' }],
         });
@@ -225,7 +151,7 @@ describe('@agentic-kit/agent', () => {
           options?.signal?.addEventListener(
             'abort',
             () => {
-              const aborted = createAssistantResponse({
+              const aborted: AssistantMessage = makeFakeAssistantMessage({
                 stopReason: 'aborted',
                 errorMessage: 'aborted by test',
                 content: [],
@@ -256,76 +182,213 @@ describe('@agentic-kit/agent', () => {
   });
 });
 
-function createAssistantResponse(overrides: Partial<AssistantMessage>): AssistantMessage {
+function makeUsage() {
   return {
-    ...createAssistantResponseBase(),
-    ...overrides,
+    input: 1,
+    output: 1,
+    cacheRead: 0,
+    cacheWrite: 0,
+    totalTokens: 2,
+    cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
   };
 }
 
-function createAssistantResponseBase(): AssistantMessage {
-  return {
-    role: 'assistant' as const,
-    api: 'fake',
-    provider: 'fake',
-    model: 'demo',
-    usage: {
-      input: 1,
-      output: 1,
-      cacheRead: 0,
-      cacheWrite: 0,
-      totalTokens: 2,
-      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-    },
-    stopReason: 'stop' as const,
-    timestamp: Date.now(),
-    content: [] as AssistantMessage['content'],
-  };
-}
+describe('@agentic-kit/agent — pausable tools', () => {
+  function makeApprovalTool(execute: AgentTool['execute']): AgentTool {
+    return {
+      name: 'approve',
+      label: 'Approve',
+      description: 'Tool that requires explicit approval',
+      parameters: {
+        type: 'object',
+        properties: { target: { type: 'string' } },
+        required: ['target'],
+      },
+      decision: {
+        type: 'object',
+        properties: { approved: { type: 'boolean' } },
+        required: ['approved'],
+      },
+      execute,
+    };
+  }
+
+  function pauseResponse() {
+    return makeFakeAssistantMessage({
+      stopReason: 'toolUse',
+      content: [
+        { type: 'toolCall', id: 'tool_1', name: 'approve', arguments: { target: 'thing' } },
+      ],
+    });
+  }
+
+  function finalResponse() {
+    return makeFakeAssistantMessage({
+      stopReason: 'stop',
+      content: [{ type: 'text', text: 'finalized' }],
+    });
+  }
+
+  it('pauses on a decision-bearing tool, persists the run, and emits tool_decision_pending', async () => {
+    const provider = createScriptedProvider({ responses: [pauseResponse()] });
+    const runStore = new MemoryRunStore();
+    const saveSpy = jest.spyOn(runStore, 'save');
+    const execute = jest.fn();
+    const events: AgentEvent[] = [];
+
+    const agent = new Agent({
+      initialState: { model: makeFakeModel() },
+      streamFn: provider.stream,
+      runStore,
+    });
+    agent.subscribe((event) => events.push(event));
+    agent.setTools([makeApprovalTool(execute)]);
+
+    await agent.prompt('approve thing');
+
+    expect(execute).not.toHaveBeenCalled();
+    expect(saveSpy).toHaveBeenCalledTimes(1);
+
+    const pendingEvent = events.find((e) => e.type === 'tool_decision_pending');
+    expect(pendingEvent).toMatchObject({
+      type: 'tool_decision_pending',
+      toolCallId: 'tool_1',
+      toolName: 'approve',
+      input: { target: 'thing' },
+      schema: expect.objectContaining({ type: 'object' }),
+    });
+
+    const runId = (pendingEvent as { runId: string }).runId;
+    expect(runId).toBeTruthy();
+    expect(agent.pendingRunId).toBe(runId);
+    expect(agent.state.isStreaming).toBe(false);
+
+    expect(events.some((e) => e.type === 'agent_end')).toBe(false);
 
-function streamMessage(message: AssistantMessage) {
-  const stream = createAssistantMessageEventStream();
-
-  queueMicrotask(() => {
-    stream.push({ type: 'start', partial: message });
-    if (message.content[0]?.type === 'toolCall') {
-      stream.push({
-        type: 'toolcall_start',
-        contentIndex: 0,
-        partial: message,
-      });
-      stream.push({
-        type: 'toolcall_end',
-        contentIndex: 0,
-        toolCall: message.content[0],
-        partial: message,
-      });
-    } else {
-      stream.push({
-        type: 'text_start',
-        contentIndex: 0,
-        partial: message,
-      });
-      stream.push({
-        type: 'text_delta',
-        contentIndex: 0,
-        delta: message.content[0]?.type === 'text' ? message.content[0].text : '',
-        partial: message,
-      });
-      stream.push({
-        type: 'text_end',
-        contentIndex: 0,
-        content: message.content[0]?.type === 'text' ? message.content[0].text : '',
-        partial: message,
-      });
-    }
-    stream.push({
-      type: 'done',
-      reason: message.stopReason === 'toolUse' ? 'toolUse' : 'stop',
-      message,
+    const stored = await runStore.load(runId);
+    expect(stored).toMatchObject({
+      id: runId,
+      pending: { toolCallId: 'tool_1', toolName: 'approve', input: { target: 'thing' } },
     });
-    stream.end(message);
+    expect(stored?.tools[0]).not.toHaveProperty('execute');
   });
 
-  return stream;
-}
+  it('resume invokes execute with the decision argument and continues the loop', async () => {
+    const provider = createScriptedProvider({ responses: [pauseResponse(), finalResponse()] });
+    const execute = jest.fn(
+      async (_id: string, _params: Record<string, unknown>, decision: unknown) => ({
+        content: [{ type: 'text' as const, text: `decision=${JSON.stringify(decision)}` }],
+      })
+    );
+    const events: AgentEvent[] = [];
+
+    const agent = new Agent({
+      initialState: { model: makeFakeModel() },
+      streamFn: provider.stream,
+    });
+    agent.subscribe((event) => events.push(event));
+    agent.setTools([makeApprovalTool(execute)]);
+
+    await agent.prompt('approve thing');
+    const runId = agent.pendingRunId!;
+    expect(runId).toBeTruthy();
+
+    await agent.resume(runId, { approved: true });
+
+    expect(execute).toHaveBeenCalledTimes(1);
+    expect(execute.mock.calls[0]?.[2]).toEqual({ approved: true });
+    expect(agent.pendingRunId).toBeUndefined();
+
+    expect(agent.state.messages.at(-1)).toMatchObject({
+      role: 'assistant',
+      content: [{ type: 'text', text: 'finalized' }],
+    });
+    expect(events.some((e) => e.type === 'agent_end')).toBe(true);
+  });
+
+  it('rejects a malformed decision and leaves the run resumable', async () => {
+    const provider = createScriptedProvider({ responses: [pauseResponse(), finalResponse()] });
+    const runStore = new MemoryRunStore();
+    const execute = jest.fn(
+      async (_id: string, _params: Record<string, unknown>, decision: unknown) => ({
+        content: [{ type: 'text' as const, text: `decision=${JSON.stringify(decision)}` }],
+      })
+    );
+
+    const agent = new Agent({
+      initialState: { model: makeFakeModel() },
+      streamFn: provider.stream,
+      runStore,
+    });
+    agent.setTools([makeApprovalTool(execute)]);
+
+    await agent.prompt('approve thing');
+    const runId = agent.pendingRunId!;
+
+    await expect(agent.resume(runId, { approved: 'yes' })).rejects.toBeInstanceOf(
+      DecisionValidationError
+    );
+    expect(execute).not.toHaveBeenCalled();
+    expect(agent.pendingRunId).toBe(runId);
+    expect(await runStore.load(runId)).toBeDefined();
+
+    await agent.resume(runId, { approved: true });
+
+    expect(execute).toHaveBeenCalledTimes(1);
+    expect(agent.pendingRunId).toBeUndefined();
+    expect(await runStore.load(runId)).toBeUndefined();
+  });
+
+  it('throws RunNotFoundError when resuming an unknown run', async () => {
+    const agent = new Agent({
+      initialState: { model: makeFakeModel() },
+      streamFn: createScriptedProvider({ responses: [] }).stream,
+    });
+
+    await expect(agent.resume('does-not-exist', { approved: true })).rejects.toBeInstanceOf(
+      RunNotFoundError
+    );
+  });
+
+  it('cleans up the persisted run when abort() is called while paused', async () => {
+    const provider = createScriptedProvider({ responses: [pauseResponse()] });
+    const runStore = new MemoryRunStore();
+
+    const agent = new Agent({
+      initialState: { model: makeFakeModel() },
+      streamFn: provider.stream,
+      runStore,
+    });
+    agent.setTools([makeApprovalTool(jest.fn())]);
+
+    await agent.prompt('approve thing');
+    const runId = agent.pendingRunId!;
+    expect(await runStore.load(runId)).toBeDefined();
+
+    agent.abort();
+    await new Promise((resolve) => setImmediate(resolve));
+
+    expect(agent.pendingRunId).toBeUndefined();
+    expect(await runStore.load(runId)).toBeUndefined();
+  });
+
+  it('throws ToolNotRegisteredError when resuming after the tool has been removed', async () => {
+    const provider = createScriptedProvider({ responses: [pauseResponse(), finalResponse()] });
+    const tool = makeApprovalTool(jest.fn());
+
+    const agent = new Agent({
+      initialState: { model: makeFakeModel() },
+      streamFn: provider.stream,
+    });
+    agent.setTools([tool]);
+
+    await agent.prompt('approve thing');
+    const runId = agent.pendingRunId!;
+
+    agent.setTools([]);
+
+    await expect(agent.resume(runId, { approved: true })).rejects.toBeInstanceOf(
+      ToolNotRegisteredError
+    );
+  });
+});
diff --git a/packages/agent/__tests__/sse.test.ts b/packages/agent/__tests__/sse.test.ts
new file mode 100644
index 0000000..682e3dd
--- /dev/null
+++ b/packages/agent/__tests__/sse.test.ts
@@ -0,0 +1,114 @@
+// Exercises the `parseSSEStream` helper from tools/test/, not a production
+// parser. The kit ships no SSE parser today; consumers parse on their side.
+// These tests pin down the helper's edge-case behavior so future parser work
+// has a baseline to match.
+import { parseSSEStream } from '@test/index';
+
+import type { AgentEvent } from '../src';
+
+const encoder = new TextEncoder();
+
+function streamFromChunks(chunks: string[]): ReadableStream<Uint8Array> {
+  return new ReadableStream<Uint8Array>({
+    start(controller) {
+      for (const chunk of chunks) {
+        controller.enqueue(encoder.encode(chunk));
+      }
+      controller.close();
+    },
+  });
+}
+
+async function collect(stream: ReadableStream<Uint8Array>): Promise<AgentEvent[]> {
+  const out: AgentEvent[] = [];
+  for await (const event of parseSSEStream(stream)) {
+    out.push(event);
+  }
+  return out;
+}
+
+describe('parseSSEStream', () => {
+  it('parses a single complete event', async () => {
+    const events = await collect(streamFromChunks(['data: {"type":"agent_start"}\n\n']));
+    expect(events).toEqual([{ type: 'agent_start' }]);
+  });
+
+  it('reassembles a payload split across chunks', async () => {
+    const events = await collect(
+      streamFromChunks(['data: {"type":"agen', 't_start"}\n', '\n'])
+    );
+    expect(events).toEqual([{ type: 'agent_start' }]);
+  });
+
+  it('joins multiple data: lines with newlines into a single payload', async () => {
+    const events = await collect(
+      streamFromChunks(['data: {"type":\ndata: "agent_start"}\n\n'])
+    );
+    expect(events).toEqual([{ type: 'agent_start' }]);
+  });
+
+  it('ignores comment lines starting with `:`', async () => {
+    const events = await collect(
+      streamFromChunks([': keepalive\ndata: {"type":"turn_start"}\n\n'])
+    );
+    expect(events).toEqual([{ type: 'turn_start' }]);
+  });
+
+  it('ignores event:, id:, and retry: framing fields', async () => {
+    const events = await collect(
+      streamFromChunks([
+        'event: turn_start\nid: 1\nretry: 1000\ndata: {"type":"turn_start"}\n\n',
+      ])
+    );
+    expect(events).toEqual([{ type: 'turn_start' }]);
+  });
+
+  it('skips a [DONE] marker without yielding an event', async () => {
+    const events = await collect(
+      streamFromChunks([
+        'data: {"type":"agent_start"}\n\ndata: [DONE]\n\n',
+      ])
+    );
+    expect(events).toEqual([{ type: 'agent_start' }]);
+  });
+
+  it('handles trailing newlines without emitting a spurious event', async () => {
+    const events = await collect(
+      streamFromChunks(['data: {"type":"agent_start"}\n\n\n\n'])
+    );
+    expect(events).toEqual([{ type: 'agent_start' }]);
+  });
+
+  it('handles CRLF line endings', async () => {
+    const events = await collect(
+      streamFromChunks(['data: {"type":"agent_start"}\r\n\r\n'])
+    );
+    expect(events).toEqual([{ type: 'agent_start' }]);
+  });
+
+  it('drops a final incomplete event when the stream ends mid-event', async () => {
+    const events = await collect(
+      streamFromChunks([
+        'data: {"type":"agent_start"}\n\n',
+        'data: {"type":"turn_start"}',
+      ])
+    );
+    expect(events).toEqual([{ type: 'agent_start' }]);
+  });
+
+  it('yields multiple complete events in order', async () => {
+    const events = await collect(
+      streamFromChunks([
+        'data: {"type":"agent_start"}\n\ndata: {"type":"turn_start"}\n\n',
+      ])
+    );
+    expect(events).toEqual([{ type: 'agent_start' }, { type: 'turn_start' }]);
+  });
+
+  it('honors an optional space after the `data:` field name', async () => {
+    const events = await collect(
+      streamFromChunks(['data:{"type":"agent_start"}\n\n'])
+    );
+    expect(events).toEqual([{ type: 'agent_start' }]);
+  });
+});
diff --git a/packages/agent/__tests__/tsconfig.json b/packages/agent/__tests__/tsconfig.json
index 6c4fda5..3ae83c4 100644
--- a/packages/agent/__tests__/tsconfig.json
+++ b/packages/agent/__tests__/tsconfig.json
@@ -2,9 +2,19 @@
   "extends": "../tsconfig.json",
   "compilerOptions": {
     "noEmit": true,
-    "rootDir": "..",
+    "rootDir": "../../..",
+    "baseUrl": "../../..",
+    "paths": {
+      "@test/*": ["tools/test/*"],
+      "agentic-kit": ["packages/agentic-kit/src"],
+      "@agentic-kit/agent": ["packages/agent/src"]
+    },
     "types": ["jest", "node"]
   },
-  "include": ["./**/*.ts", "../src/**/*.ts"],
+  "include": [
+    "./**/*.ts",
+    "../src/**/*.ts",
+    "../../../tools/test/**/*.ts"
+  ],
   "exclude": ["../dist", "../node_modules"]
 }
diff --git a/packages/agent/jest.config.js b/packages/agent/jest.config.js
index 6622fd1..2069518 100644
--- a/packages/agent/jest.config.js
+++ b/packages/agent/jest.config.js
@@ -17,6 +17,7 @@ module.exports = {
   modulePathIgnorePatterns: ['dist/*'],
   moduleNameMapper: {
     '^(\\.{1,2}/.*)\\.js$': '$1',
+    '^@test/(.*)$': '<rootDir>/../../tools/test/$1',
     '^agentic-kit$': '<rootDir>/../agentic-kit/src',
     '^@agentic-kit/(.*)$': '<rootDir>/../$1/src',
   },
diff --git a/packages/agent/src/agent.ts b/packages/agent/src/agent.ts
index 8fc8503..a7ff4fd 100644
--- a/packages/agent/src/agent.ts
+++ b/packages/agent/src/agent.ts
@@ -1,3 +1,5 @@
+import { randomUUID } from 'node:crypto';
+
 import {
   type AssistantMessage,
   type Context,
@@ -6,8 +8,19 @@ import {
   type Message,
   stream,
   type StreamOptions,
+  type ToolCallContent,
+  type ToolDefinition,
 } from 'agentic-kit';
 
+import {
+  type AgentRun,
+  type AgentRunPending,
+  DecisionValidationError,
+  MemoryRunStore,
+  RunNotFoundError,
+  type RunStore,
+  ToolNotRegisteredError,
+} from './run-store.js';
 import type {
   AgentEvent,
   AgentOptions,
@@ -15,15 +28,22 @@ import type {
   AgentTool,
   AgentToolResult,
 } from './types.js';
-import { validateToolArguments as defaultValidateToolArguments } from './validation.js';
+import {
+  validateSchema,
+  validateToolArguments as defaultValidateToolArguments,
+} from './validation.js';
 
 export class Agent {
   private readonly listeners = new Set<(event: AgentEvent) => void>();
   private readonly transformContext?: AgentOptions['transformContext'];
   private readonly streamFn: NonNullable<AgentOptions['streamFn']>;
   private readonly validateToolArguments: NonNullable<AgentOptions['validateToolArguments']>;
+  private readonly runStore: RunStore;
+  private readonly generateRunId: () => string;
   private abortController?: AbortController;
   private running?: Promise<void>;
+  private currentRunId?: string;
+  private pausedRunId?: string;
 
   private _state: AgentState;
 
@@ -40,6 +60,8 @@ export class Agent {
     this.streamFn = options.streamFn ?? stream;
     this.transformContext = options.transformContext;
     this.validateToolArguments = options.validateToolArguments ?? defaultValidateToolArguments;
+    this.runStore = options.runStore ?? new MemoryRunStore();
+    this.generateRunId = options.generateRunId ?? randomUUID;
   }
 
   get state(): AgentState {
@@ -88,6 +110,12 @@ export class Agent {
   }
 
   abort(): void {
+    if (this.pausedRunId) {
+      const runId = this.pausedRunId;
+      this.pausedRunId = undefined;
+      void this.runStore.delete(runId);
+      return;
+    }
     this.abortController?.abort();
   }
 
@@ -99,15 +127,21 @@ export class Agent {
     if (this._state.isStreaming) {
       throw new Error('Agent is already processing a prompt');
     }
+    if (this.pausedRunId) {
+      throw new Error('Agent is paused awaiting a decision; call resume() or abort() first');
+    }
 
     const message = typeof input === 'string' ? createUserMessage(input) : input;
-    await this.runLoop([message]);
+    await this.runLoop({ runId: this.generateRunId(), initialMessages: [message] });
   }
 
   async continue(): Promise<void> {
     if (this._state.isStreaming) {
       throw new Error('Agent is already processing');
     }
+    if (this.pausedRunId) {
+      throw new Error('Agent is paused awaiting a decision; call resume() or abort() first');
+    }
 
     const lastMessage = this._state.messages[this._state.messages.length - 1];
     if (!lastMessage) {
@@ -117,62 +151,131 @@ export class Agent {
       throw new Error('Cannot continue from message role: assistant');
     }
 
-    await this.runLoop();
+    await this.runLoop({ runId: this.generateRunId() });
+  }
+
+  get pendingRunId(): string | undefined {
+    return this.pausedRunId;
   }
 
-  private async runLoop(initialMessages?: Message[]): Promise<void> {
+  async resume(runId: string, decision: unknown): Promise<void> {
+    if (this._state.isStreaming) {
+      throw new Error('Agent is already processing');
+    }
+
+    const run = await this.runStore.load(runId);
+    if (!run) {
+      throw new RunNotFoundError(runId);
+    }
+    if (!run.pending) {
+      throw new Error(`Run ${runId} is not paused`);
+    }
+
+    const tool = this._state.tools.find((t) => t.name === run.pending!.toolName);
+    if (!tool) {
+      throw new ToolNotRegisteredError(runId, run.pending.toolName);
+    }
+    if (!tool.decision) {
+      throw new Error(
+        `Tool '${tool.name}' has no decision schema; cannot resume run ${runId}`
+      );
+    }
+
+    const errors = validateSchema(tool.decision, decision, 'root');
+    if (errors.length > 0) {
+      throw new DecisionValidationError(runId, tool.name, errors);
+    }
+
+    this._state.model = run.model;
+    if (run.systemPrompt !== undefined) {
+      this._state.systemPrompt = run.systemPrompt;
+    }
+    this._state.messages = [...run.messages];
+    this.pausedRunId = undefined;
+
+    await this.runLoop({
+      runId,
+      resumeDecision: { toolCallId: run.pending.toolCallId, decision },
+    });
+  }
+
+  private async runLoop(opts: {
+    runId: string;
+    initialMessages?: Message[];
+    resumeDecision?: { toolCallId: string; decision: unknown };
+  }): Promise<void> {
     this.running = (async () => {
       this.abortController = new AbortController();
       this._state.isStreaming = true;
       this._state.streamMessage = null;
       this._state.error = undefined;
+      this.currentRunId = opts.runId;
 
       try {
         this.emit({ type: 'agent_start' });
 
-        if (initialMessages && initialMessages.length > 0) {
-          for (const message of initialMessages) {
+        if (opts.initialMessages && opts.initialMessages.length > 0) {
+          for (const message of opts.initialMessages) {
             this.emit({ type: 'message_start', message });
             this.appendMessage(message);
             this.emit({ type: 'message_end', message });
           }
         }
 
-        while (true) {
-          this.emit({ type: 'turn_start' });
-
-          const assistantMessage = await this.generateAssistantMessage(this.abortController.signal);
-          this.appendMessage(assistantMessage);
-          this.emit({ type: 'message_end', message: assistantMessage });
+        let resumeDecision = opts.resumeDecision;
 
-          if (assistantMessage.stopReason === 'error' || assistantMessage.stopReason === 'aborted') {
-            this._state.error = assistantMessage.errorMessage;
-            this.emit({ type: 'turn_end', message: assistantMessage, toolResults: [] });
-            break;
+        while (true) {
+          let assistantMessage: AssistantMessage;
+
+          if (resumeDecision) {
+            const last = this._state.messages[this._state.messages.length - 1];
+            if (!last || last.role !== 'assistant') {
+              throw new Error('Cannot resume: last message is not an assistant message');
+            }
+            assistantMessage = last;
+          } else {
+            this.emit({ type: 'turn_start' });
+            assistantMessage = await this.generateAssistantMessage(this.abortController.signal);
+            this.appendMessage(assistantMessage);
+            this.emit({ type: 'message_end', message: assistantMessage });
+
+            if (assistantMessage.stopReason === 'error' || assistantMessage.stopReason === 'aborted') {
+              this._state.error = assistantMessage.errorMessage;
+              this.emit({ type: 'turn_end', message: assistantMessage, toolResults: [] });
+              break;
+            }
           }
 
-          const toolCalls = assistantMessage.content.filter((block) => block.type === 'toolCall');
+          const toolCalls = assistantMessage.content.filter(
+            (block): block is ToolCallContent => block.type === 'toolCall'
+          );
           if (toolCalls.length === 0) {
             this.emit({ type: 'turn_end', message: assistantMessage, toolResults: [] });
             break;
           }
 
-          const toolResults = await this.executeToolCalls(toolCalls, this.abortController.signal);
-          for (const toolResult of toolResults) {
-            this.emit({ type: 'message_start', message: toolResult });
-            this.appendMessage(toolResult);
-            this.emit({ type: 'message_end', message: toolResult });
+          const outcome = await this.executeToolCalls(
+            toolCalls,
+            this.abortController.signal,
+            resumeDecision
+          );
+          resumeDecision = undefined;
+
+          if (outcome.status === 'paused') {
+            return;
           }
 
-          this.emit({ type: 'turn_end', message: assistantMessage, toolResults });
+          this.emit({ type: 'turn_end', message: assistantMessage, toolResults: outcome.results });
         }
 
         this.emit({ type: 'agent_end', messages: [...this._state.messages] });
+        await this.runStore.delete(opts.runId);
       } finally {
         this._state.isStreaming = false;
         this._state.streamMessage = null;
         this.abortController = undefined;
         this.running = undefined;
+        this.currentRunId = undefined;
       }
     })();
 
@@ -228,34 +331,143 @@ export class Agent {
   }
 
   private async executeToolCalls(
-    toolCalls: Array<Extract<AssistantMessage['content'][number], { type: 'toolCall' }>>,
-    signal: AbortSignal
-  ) {
-    const results = [];
+    toolCalls: ToolCallContent[],
+    signal: AbortSignal,
+    resumeDecision?: { toolCallId: string; decision: unknown }
+  ): Promise<
+    | { status: 'completed'; results: ReturnType<typeof createToolResultMessage>[] }
+    | { status: 'paused' }
+  > {
+    const completedToolCallIds = new Set(
+      this._state.messages
+        .filter((m): m is Extract<Message, { role: 'toolResult' }> => m.role === 'toolResult')
+        .map((m) => m.toolCallId)
+    );
+
+    const results: ReturnType<typeof createToolResultMessage>[] = [];
 
     for (const toolCall of toolCalls) {
-      const tool = this._state.tools.find((candidate) => candidate.name === toolCall.name);
-      this.emit({
-        type: 'tool_execution_start',
-        toolCallId: toolCall.id,
-        toolName: toolCall.name,
-        args: toolCall.arguments as Record<string, unknown>,
-      });
+      if (completedToolCallIds.has(toolCall.id)) {
+        continue;
+      }
 
-      let result: AgentToolResult;
-      let isError = false;
+      const tool = this._state.tools.find((candidate) => candidate.name === toolCall.name);
+      const args = toolCall.arguments as Record<string, unknown>;
+      const isResumeTarget = resumeDecision?.toolCallId === toolCall.id;
+
+      if (tool?.decision && !isResumeTarget) {
+        let validatedArgs: Record<string, unknown>;
+        try {
+          validatedArgs = this.validateToolArguments(tool.parameters, args);
+        } catch (error) {
+          const result: AgentToolResult = {
+            content: [
+              {
+                type: 'text',
+                text: error instanceof Error ? error.message : String(error),
+              },
+            ],
+          };
+          this.emit({
+            type: 'tool_execution_start',
+            toolCallId: toolCall.id,
+            toolName: toolCall.name,
+            args,
+          });
+          this.emit({
+            type: 'tool_execution_end',
+            toolCallId: toolCall.id,
+            toolName: toolCall.name,
+            result,
+            isError: true,
+          });
+          const toolResult = createToolResultMessage(toolCall.id, toolCall.name, result.content, true);
+          this.appendMessageWithEvents(toolResult);
+          continue;
+        }
 
-      try {
-        if (!tool) {
-          throw new Error(`Tool '${toolCall.name}' not found`);
+        for (const toolResult of results) {
+          this.appendMessageWithEvents(toolResult);
         }
 
-        const validatedArgs = this.validateToolArguments(
-          tool.parameters,
-          toolCall.arguments as Record<string, unknown>
-        );
+        const runId = this.currentRunId!;
+        const pending: AgentRunPending = {
+          toolCallId: toolCall.id,
+          toolName: toolCall.name,
+          input: validatedArgs,
+        };
+        const now = Date.now();
+        const run: AgentRun = {
+          id: runId,
+          model: this._state.model,
+          systemPrompt: this._state.systemPrompt,
+          tools: this._state.tools.map(toToolDefinition),
+          messages: [...this._state.messages],
+          pending,
+          createdAt: now,
+          updatedAt: now,
+        };
+        await this.runStore.save(run);
+        this.pausedRunId = runId;
+        this.emit({
+          type: 'tool_decision_pending',
+          runId,
+          toolCallId: toolCall.id,
+          toolName: toolCall.name,
+          input: validatedArgs,
+          schema: tool.decision,
+        });
+        return { status: 'paused' };
+      }
+
+      const decisionForExecute = isResumeTarget ? resumeDecision!.decision : undefined;
+      const toolResult = await this.executeOneTool(
+        tool,
+        toolCall,
+        args,
+        decisionForExecute,
+        signal
+      );
+      results.push(toolResult);
+    }
+
+    for (const toolResult of results) {
+      this.appendMessageWithEvents(toolResult);
+    }
+
+    return { status: 'completed', results };
+  }
+
+  private async executeOneTool(
+    tool: AgentTool | undefined,
+    toolCall: ToolCallContent,
+    args: Record<string, unknown>,
+    decision: unknown,
+    signal: AbortSignal
+  ): Promise<ReturnType<typeof createToolResultMessage>> {
+    this.emit({
+      type: 'tool_execution_start',
+      toolCallId: toolCall.id,
+      toolName: toolCall.name,
+      args,
+    });
+
+    let result: AgentToolResult;
+    let isError = false;
+
+    try {
+      if (!tool) {
+        throw new Error(`Tool '${toolCall.name}' not found`);
+      }
+
+      const validatedArgs = this.validateToolArguments(tool.parameters, args);
 
-        result = await tool.execute(toolCall.id, validatedArgs, signal, (partialResult) => {
+      result = await tool.execute(
+        toolCall.id,
+        validatedArgs,
+        decision,
+        signal,
+        (partialResult) => {
           this.emit({
             type: 'tool_execution_update',
             toolCallId: toolCall.id,
@@ -263,33 +475,35 @@ export class Agent {
             args: validatedArgs,
             partialResult,
           });
-        });
-      } catch (error) {
-        result = {
-          content: [
-            {
-              type: 'text',
-              text: error instanceof Error ? error.message : String(error),
-            },
-          ],
-        };
-        isError = true;
-      }
-
-      this.emit({
-        type: 'tool_execution_end',
-        toolCallId: toolCall.id,
-        toolName: toolCall.name,
-        result,
-        isError,
-      });
-
-      results.push(
-        createToolResultMessage(toolCall.id, toolCall.name, result.content, isError)
+        }
       );
+    } catch (error) {
+      result = {
+        content: [
+          {
+            type: 'text',
+            text: error instanceof Error ? error.message : String(error),
+          },
+        ],
+      };
+      isError = true;
     }
 
-    return results;
+    this.emit({
+      type: 'tool_execution_end',
+      toolCallId: toolCall.id,
+      toolName: toolCall.name,
+      result,
+      isError,
+    });
+
+    return createToolResultMessage(toolCall.id, toolCall.name, result.content, isError);
+  }
+
+  private appendMessageWithEvents(message: Message): void {
+    this.emit({ type: 'message_start', message });
+    this.appendMessage(message);
+    this.emit({ type: 'message_end', message });
   }
 
   private emit(event: AgentEvent): void {
@@ -298,3 +512,11 @@ export class Agent {
     }
   }
 }
+
+function toToolDefinition(tool: AgentTool): ToolDefinition {
+  return {
+    name: tool.name,
+    description: tool.description,
+    parameters: tool.parameters,
+  };
+}
diff --git a/packages/agent/src/index.ts b/packages/agent/src/index.ts
index b8b99bb..bad2c86 100644
--- a/packages/agent/src/index.ts
+++ b/packages/agent/src/index.ts
@@ -1,3 +1,4 @@
 export * from './agent.js';
+export * from './run-store.js';
 export * from './types.js';
 export * from './validation.js';
diff --git a/packages/agent/src/run-store.ts b/packages/agent/src/run-store.ts
new file mode 100644
index 0000000..a658d69
--- /dev/null
+++ b/packages/agent/src/run-store.ts
@@ -0,0 +1,81 @@
+import type { Message, ModelDescriptor, ToolDefinition } from 'agentic-kit';
+
+export interface AgentRunPending {
+  toolCallId: string;
+  toolName: string;
+  input: Record<string, unknown>;
+}
+
+export interface AgentRun {
+  id: string;
+  model: ModelDescriptor;
+  systemPrompt?: string;
+  tools: ToolDefinition[];
+  messages: Message[];
+  pending?: AgentRunPending;
+  createdAt: number;
+  updatedAt: number;
+}
+
+export interface RunStore {
+  save(run: AgentRun): Promise<void>;
+  load(id: string): Promise<AgentRun | undefined>;
+  delete(id: string): Promise<void>;
+}
+
+export class MemoryRunStore implements RunStore {
+  private readonly runs = new Map<string, AgentRun>();
+
+  async save(run: AgentRun): Promise<void> {
+    this.runs.set(run.id, cloneRun(run));
+  }
+
+  async load(id: string): Promise<AgentRun | undefined> {
+    const run = this.runs.get(id);
+    return run ? cloneRun(run) : undefined;
+  }
+
+  async delete(id: string): Promise<void> {
+    this.runs.delete(id);
+  }
+}
+
+export class RunNotFoundError extends Error {
+  readonly runId: string;
+
+  constructor(runId: string) {
+    super(`Run not found: ${runId}`);
+    this.name = 'RunNotFoundError';
+    this.runId = runId;
+  }
+}
+
+export class DecisionValidationError extends Error {
+  readonly runId: string;
+  readonly toolName: string;
+  readonly errors: string[];
+
+  constructor(runId: string, toolName: string, errors: string[]) {
+    super(`Decision validation failed for tool '${toolName}':\n${errors.map((e) => `- ${e}`).join('\n')}`);
+    this.name = 'DecisionValidationError';
+    this.runId = runId;
+    this.toolName = toolName;
+    this.errors = errors;
+  }
+}
+
+export class ToolNotRegisteredError extends Error {
+  readonly runId: string;
+  readonly toolName: string;
+
+  constructor(runId: string, toolName: string) {
+    super(`Tool '${toolName}' is not registered on the agent resuming run ${runId}`);
+    this.name = 'ToolNotRegisteredError';
+    this.runId = runId;
+    this.toolName = toolName;
+  }
+}
+
+function cloneRun(run: AgentRun): AgentRun {
+  return JSON.parse(JSON.stringify(run)) as AgentRun;
+}
diff --git a/packages/agent/src/types.ts b/packages/agent/src/types.ts
index 1486987..9e724d9 100644
--- a/packages/agent/src/types.ts
+++ b/packages/agent/src/types.ts
@@ -10,6 +10,8 @@ import type {
   ToolResultMessage,
 } from 'agentic-kit';
 
+import type { RunStore } from './run-store.js';
+
 export interface AgentToolResult<TDetails = unknown> {
   content: ToolResultMessage<TDetails>['content'];
   details?: TDetails;
@@ -21,9 +23,11 @@ export type AgentToolUpdateCallback<TDetails = unknown> = (
 
 export interface AgentTool<TDetails = unknown> extends ToolDefinition {
   label: string;
+  decision?: JsonSchema;
   execute: (
     toolCallId: string,
     params: Record<string, unknown>,
+    decision: unknown,
     signal?: AbortSignal,
     onUpdate?: AgentToolUpdateCallback<TDetails>
   ) => Promise<AgentToolResult<TDetails>>;
@@ -66,6 +70,14 @@ export type AgentEvent =
       toolName: string;
       result: AgentToolResult;
       isError: boolean;
+    }
+  | {
+      type: 'tool_decision_pending';
+      runId: string;
+      toolCallId: string;
+      toolName: string;
+      input: Record<string, unknown>;
+      schema: JsonSchema;
     };
 
 export interface AgentOptions {
@@ -80,4 +92,6 @@ export interface AgentOptions {
     schema: JsonSchema,
     args: Record<string, unknown>
   ) => Record<string, unknown>;
+  runStore?: RunStore;
+  generateRunId?: () => string;
 }
diff --git a/packages/agent/src/validation.ts b/packages/agent/src/validation.ts
index 51634c7..3fb227e 100644
--- a/packages/agent/src/validation.ts
+++ b/packages/agent/src/validation.ts
@@ -12,7 +12,7 @@ export function validateToolArguments(
   throw new Error(`Tool argument validation failed:\n${errors.map((error) => `- ${error}`).join('\n')}`);
 }
 
-function validateSchema(schema: JsonSchema, value: unknown, path: string): string[] {
+export function validateSchema(schema: JsonSchema, value: unknown, path: string): string[] {
   if (!schema || Object.keys(schema).length === 0) {
     return [];
   }
diff --git a/packages/agentic-kit/__tests__/adapter.test.ts b/packages/agentic-kit/__tests__/adapter.test.ts
index b186f64..60dccdd 100644
--- a/packages/agentic-kit/__tests__/adapter.test.ts
+++ b/packages/agentic-kit/__tests__/adapter.test.ts
@@ -1,47 +1,28 @@
+import {
+  createScriptedProvider,
+  makeFakeAssistantMessage,
+  makeFakeModel,
+} from '@test/index';
+
 import {
   AgentKit,
   type AssistantMessage,
-  createAssistantMessageEventStream,
   getMessageText,
   type ModelDescriptor,
-  type ProviderAdapter,
   transformMessages,
 } from '../src';
 
 function createFakeModel(): ModelDescriptor {
-  return {
-    id: 'demo',
-    name: 'Demo',
-    api: 'fake-api',
-    provider: 'fake',
-    baseUrl: 'http://fake.local',
-    input: ['text'],
-    reasoning: false,
-    tools: true,
-  };
+  return makeFakeModel({ name: 'Demo' });
 }
 
 function createAssistantMessage(
   overrides: Partial<AssistantMessage> = {}
 ): AssistantMessage {
-  return {
-    role: 'assistant',
-    api: 'fake-api',
-    provider: 'fake',
-    model: 'demo',
-    usage: {
-      input: 0,
-      output: 0,
-      cacheRead: 0,
-      cacheWrite: 0,
-      totalTokens: 0,
-      cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
-    },
-    stopReason: 'stop',
-    timestamp: Date.now(),
+  return makeFakeAssistantMessage({
     content: [{ type: 'text', text: 'hello world' }],
     ...overrides,
-  };
+  });
 }
 
 describe('agentic-kit core', () => {
@@ -236,43 +217,11 @@ describe('agentic-kit core', () => {
   });
 
   it('keeps the legacy AgentKit generate API working through structured streams', async () => {
-    const provider: ProviderAdapter & { name: string } = {
-      api: 'fake-api',
-      provider: 'fake',
-      name: 'fake',
-      createModel: () => createFakeModel(),
-      stream: () => {
-        const stream = createAssistantMessageEventStream();
-        const message = createAssistantMessage();
-
-        queueMicrotask(() => {
-          stream.push({ type: 'start', partial: { ...message, content: [{ type: 'text', text: '' }] } });
-          stream.push({
-            type: 'text_start',
-            contentIndex: 0,
-            partial: { ...message, content: [{ type: 'text', text: '' }] },
-          });
-          stream.push({
-            type: 'text_delta',
-            contentIndex: 0,
-            delta: 'hello world',
-            partial: message,
-          });
-          stream.push({
-            type: 'text_end',
-            contentIndex: 0,
-            content: 'hello world',
-            partial: message,
-          });
-          stream.push({ type: 'done', reason: 'stop', message });
-          stream.end(message);
-        });
-
-        return stream;
-      },
-    };
-
-    const kit = new AgentKit().addProvider(provider);
+    const kit = new AgentKit().addProvider(
+      createScriptedProvider({
+        responses: [createAssistantMessage(), createAssistantMessage()],
+      })
+    );
     const chunks: string[] = [];
     await kit.generate(
       { model: 'demo', prompt: 'hi', stream: true },
@@ -284,29 +233,17 @@ describe('agentic-kit core', () => {
   });
 
   it('rejects legacy generate when a provider returns a terminal error in non-stream mode', async () => {
-    const provider: ProviderAdapter & { name: string } = {
-      api: 'fake-api',
-      provider: 'fake',
-      name: 'fake',
-      createModel: () => createFakeModel(),
-      stream: () => {
-        const stream = createAssistantMessageEventStream();
-        const failure = createAssistantMessage({
-          stopReason: 'error',
-          errorMessage: 'provider failed',
-          content: [{ type: 'text', text: '' }],
-        });
-
-        queueMicrotask(() => {
-          stream.push({ type: 'error', reason: 'error', error: failure });
-          stream.end(failure);
-        });
-
-        return stream;
-      },
-    };
-
-    const kit = new AgentKit().addProvider(provider);
+    const kit = new AgentKit().addProvider(
+      createScriptedProvider({
+        responses: [
+          createAssistantMessage({
+            stopReason: 'error',
+            errorMessage: 'provider failed',
+            content: [{ type: 'text', text: '' }],
+          }),
+        ],
+      })
+    );
     const onComplete = jest.fn();
     const onError = jest.fn();
     const onStateChange = jest.fn();
@@ -324,44 +261,17 @@ describe('agentic-kit core', () => {
   });
 
   it('rejects legacy generate when a provider returns a terminal error in stream mode', async () => {
-    const provider: ProviderAdapter & { name: string } = {
-      api: 'fake-api',
-      provider: 'fake',
-      name: 'fake',
-      createModel: () => createFakeModel(),
-      stream: () => {
-        const stream = createAssistantMessageEventStream();
-        const partial = createAssistantMessage({
-          content: [{ type: 'text', text: 'partial' }],
-        });
-        const failure = createAssistantMessage({
-          stopReason: 'error',
-          errorMessage: 'provider failed',
-          content: [{ type: 'text', text: 'partial' }],
-        });
-
-        queueMicrotask(() => {
-          stream.push({ type: 'start', partial: { ...partial, content: [{ type: 'text', text: '' }] } });
-          stream.push({
-            type: 'text_start',
-            contentIndex: 0,
-            partial: { ...partial, content: [{ type: 'text', text: '' }] },
-          });
-          stream.push({
-            type: 'text_delta',
-            contentIndex: 0,
-            delta: 'partial',
-            partial,
-          });
-          stream.push({ type: 'error', reason: 'error', error: failure });
-          stream.end(failure);
-        });
-
-        return stream;
-      },
-    };
-
-    const kit = new AgentKit().addProvider(provider);
+    const kit = new AgentKit().addProvider(
+      createScriptedProvider({
+        responses: [
+          createAssistantMessage({
+            stopReason: 'error',
+            errorMessage: 'provider failed',
+            content: [{ type: 'text', text: 'partial' }],
+          }),
+        ],
+      })
+    );
     const chunks: string[] = [];
     const onComplete = jest.fn();
     const onError = jest.fn();
diff --git a/packages/agentic-kit/__tests__/tsconfig.json b/packages/agentic-kit/__tests__/tsconfig.json
index 6c4fda5..3ae83c4 100644
--- a/packages/agentic-kit/__tests__/tsconfig.json
+++ b/packages/agentic-kit/__tests__/tsconfig.json
@@ -2,9 +2,19 @@
   "extends": "../tsconfig.json",
   "compilerOptions": {
     "noEmit": true,
-    "rootDir": "..",
+    "rootDir": "../../..",
+    "baseUrl": "../../..",
+    "paths": {
+      "@test/*": ["tools/test/*"],
+      "agentic-kit": ["packages/agentic-kit/src"],
+      "@agentic-kit/agent": ["packages/agent/src"]
+    },
     "types": ["jest", "node"]
   },
-  "include": ["./**/*.ts", "../src/**/*.ts"],
+  "include": [
+    "./**/*.ts",
+    "../src/**/*.ts",
+    "../../../tools/test/**/*.ts"
+  ],
   "exclude": ["../dist", "../node_modules"]
 }
diff --git a/packages/agentic-kit/jest.config.js b/packages/agentic-kit/jest.config.js
index c539b86..79ccd00 100644
--- a/packages/agentic-kit/jest.config.js
+++ b/packages/agentic-kit/jest.config.js
@@ -17,6 +17,8 @@ module.exports = {
   modulePathIgnorePatterns: ['dist/*'],
   moduleNameMapper: {
     '^(\\.{1,2}/.*)\\.js$': '$1',
+    '^@test/(.*)$': '<rootDir>/../../tools/test/$1',
+    '^agentic-kit$': '<rootDir>/src',
     '^@agentic-kit/(.*)$': '<rootDir>/../$1/src',
   },
   setupFilesAfterEnv: ['<rootDir>/jest.setup.js']
diff --git a/packages/anthropic/__tests__/tsconfig.json b/packages/anthropic/__tests__/tsconfig.json
index 6c4fda5..3ae83c4 100644
--- a/packages/anthropic/__tests__/tsconfig.json
+++ b/packages/anthropic/__tests__/tsconfig.json
@@ -2,9 +2,19 @@
   "extends": "../tsconfig.json",
   "compilerOptions": {
     "noEmit": true,
-    "rootDir": "..",
+    "rootDir": "../../..",
+    "baseUrl": "../../..",
+    "paths": {
+      "@test/*": ["tools/test/*"],
+      "agentic-kit": ["packages/agentic-kit/src"],
+      "@agentic-kit/agent": ["packages/agent/src"]
+    },
     "types": ["jest", "node"]
   },
-  "include": ["./**/*.ts", "../src/**/*.ts"],
+  "include": [
+    "./**/*.ts",
+    "../src/**/*.ts",
+    "../../../tools/test/**/*.ts"
+  ],
   "exclude": ["../dist", "../node_modules"]
 }
diff --git a/packages/anthropic/jest.config.js b/packages/anthropic/jest.config.js
index e11f478..d0dfaaa 100644
--- a/packages/anthropic/jest.config.js
+++ b/packages/anthropic/jest.config.js
@@ -15,5 +15,11 @@ module.exports = {
   testRegex: '(/__tests__/.*|(\\.|/)(test|spec))\\.(jsx?|tsx?)$',
   moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'],
   modulePathIgnorePatterns: ['dist/*'],
+  moduleNameMapper: {
+    '^(\\.{1,2}/.*)\\.js$': '$1',
+    '^@test/(.*)$': '<rootDir>/../../tools/test/$1',
+    '^agentic-kit$': '<rootDir>/../agentic-kit/src',
+    '^@agentic-kit/(.*)$': '<rootDir>/../$1/src',
+  },
   setupFilesAfterEnv: ['<rootDir>/jest.setup.js']
 };
diff --git a/packages/ollama/__tests__/tsconfig.json b/packages/ollama/__tests__/tsconfig.json
index 6c4fda5..3ae83c4 100644
--- a/packages/ollama/__tests__/tsconfig.json
+++ b/packages/ollama/__tests__/tsconfig.json
@@ -2,9 +2,19 @@
   "extends": "../tsconfig.json",
   "compilerOptions": {
     "noEmit": true,
-    "rootDir": "..",
+    "rootDir": "../../..",
+    "baseUrl": "../../..",
+    "paths": {
+      "@test/*": ["tools/test/*"],
+      "agentic-kit": ["packages/agentic-kit/src"],
+      "@agentic-kit/agent": ["packages/agent/src"]
+    },
     "types": ["jest", "node"]
   },
-  "include": ["./**/*.ts", "../src/**/*.ts"],
+  "include": [
+    "./**/*.ts",
+    "../src/**/*.ts",
+    "../../../tools/test/**/*.ts"
+  ],
   "exclude": ["../dist", "../node_modules"]
 }
diff --git a/packages/ollama/jest.config.js b/packages/ollama/jest.config.js
index 5b89d20..061b4b9 100644
--- a/packages/ollama/jest.config.js
+++ b/packages/ollama/jest.config.js
@@ -16,5 +16,11 @@ module.exports = {
   moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'],
   modulePathIgnorePatterns: ['dist/*'],
   testPathIgnorePatterns: process.env.OLLAMA_LIVE_READY === '1' ? [] : ['\\.live\\.test\\.ts$'],
+  moduleNameMapper: {
+    '^(\\.{1,2}/.*)\\.js$': '$1',
+    '^@test/(.*)$': '<rootDir>/../../tools/test/$1',
+    '^agentic-kit$': '<rootDir>/../agentic-kit/src',
+    '^@agentic-kit/(.*)$': '<rootDir>/../$1/src',
+  },
   setupFilesAfterEnv: ['<rootDir>/jest.setup.js']
 };
diff --git a/packages/openai/__tests__/tsconfig.json b/packages/openai/__tests__/tsconfig.json
index 6c4fda5..3ae83c4 100644
--- a/packages/openai/__tests__/tsconfig.json
+++ b/packages/openai/__tests__/tsconfig.json
@@ -2,9 +2,19 @@
   "extends": "../tsconfig.json",
   "compilerOptions": {
     "noEmit": true,
-    "rootDir": "..",
+    "rootDir": "../../..",
+    "baseUrl": "../../..",
+    "paths": {
+      "@test/*": ["tools/test/*"],
+      "agentic-kit": ["packages/agentic-kit/src"],
+      "@agentic-kit/agent": ["packages/agent/src"]
+    },
     "types": ["jest", "node"]
   },
-  "include": ["./**/*.ts", "../src/**/*.ts"],
+  "include": [
+    "./**/*.ts",
+    "../src/**/*.ts",
+    "../../../tools/test/**/*.ts"
+  ],
   "exclude": ["../dist", "../node_modules"]
 }
diff --git a/packages/openai/jest.config.js b/packages/openai/jest.config.js
index e11f478..d0dfaaa 100644
--- a/packages/openai/jest.config.js
+++ b/packages/openai/jest.config.js
@@ -15,5 +15,11 @@ module.exports = {
   testRegex: '(/__tests__/.*|(\\.|/)(test|spec))\\.(jsx?|tsx?)$',
   moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'],
   modulePathIgnorePatterns: ['dist/*'],
+  moduleNameMapper: {
+    '^(\\.{1,2}/.*)\\.js$': '$1',
+    '^@test/(.*)$': '<rootDir>/../../tools/test/$1',
+    '^agentic-kit$': '<rootDir>/../agentic-kit/src',
+    '^@agentic-kit/(.*)$': '<rootDir>/../$1/src',
+  },
   setupFilesAfterEnv: ['<rootDir>/jest.setup.js']
 };
diff --git a/tests/integration/README.md b/tests/integration/README.md
new file mode 100644
index 0000000..3ec22dd
--- /dev/null
+++ b/tests/integration/README.md
@@ -0,0 +1,14 @@
+# Integration tests
+
+Workspace-level lane that runs in-process integration tests against the kit's
+HTTP boundary. Brings up `http.createServer`, exercises real serialization
+and `fetch`, with mocked providers.
+
+Empty in Phase 0 — scaffolding only. First tests land with Phase 1.3 (run
+serialization helpers) and 1.1 (pause/resume). Run with:
+
+```sh
+pnpm test:integration
+```
+
+`passWithNoTests` is set, so the script is safe to run while empty.
diff --git a/tests/integration/jest.config.js b/tests/integration/jest.config.js
new file mode 100644
index 0000000..4507567
--- /dev/null
+++ b/tests/integration/jest.config.js
@@ -0,0 +1,24 @@
+/** @type {import('ts-jest').JestConfigWithTsJest} */
+module.exports = {
+  preset: 'ts-jest',
+  testEnvironment: 'node',
+  rootDir: '.',
+  passWithNoTests: true,
+  transform: {
+    '^.+\\.tsx?$': [
+      'ts-jest',
+      {
+        babelConfig: false,
+        tsconfig: '<rootDir>/tsconfig.json',
+      },
+    ],
+  },
+  testRegex: '\\.test\\.(jsx?|tsx?)$',
+  moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'],
+  moduleNameMapper: {
+    '^(\\.{1,2}/.*)\\.js$': '$1',
+    '^@test/(.*)$': '<rootDir>/../../tools/test/$1',
+    '^agentic-kit$': '<rootDir>/../../packages/agentic-kit/src',
+    '^@agentic-kit/(.*)$': '<rootDir>/../../packages/$1/src',
+  },
+};
diff --git a/tests/integration/tsconfig.json b/tests/integration/tsconfig.json
new file mode 100644
index 0000000..f287326
--- /dev/null
+++ b/tests/integration/tsconfig.json
@@ -0,0 +1,18 @@
+{
+  "extends": "../../tsconfig.json",
+  "compilerOptions": {
+    "noEmit": true,
+    "rootDir": "../..",
+    "baseUrl": "../..",
+    "paths": {
+      "@test/*": ["tools/test/*"],
+      "agentic-kit": ["packages/agentic-kit/src"],
+      "@agentic-kit/agent": ["packages/agent/src"],
+      "@agentic-kit/anthropic": ["packages/anthropic/src"],
+      "@agentic-kit/openai": ["packages/openai/src"],
+      "@agentic-kit/ollama": ["packages/ollama/src"]
+    },
+    "types": ["jest", "node"]
+  },
+  "include": ["./**/*.ts", "../../tools/test/**/*.ts"]
+}
diff --git a/tools/test/README.md b/tools/test/README.md
new file mode 100644
index 0000000..0513eb6
--- /dev/null
+++ b/tools/test/README.md
@@ -0,0 +1,24 @@
+# Shared test helpers
+
+Repo-internal helpers for unit tests. Imported via the `@test/*` tsconfig path
+alias (see each package's `__tests__/tsconfig.json` and `jest.config.js`).
+Not a workspace package, not published.
+
+## Helpers
+
+- `makeFakeModel(overrides?)` — `ModelDescriptor` with sane defaults.
+- `makeFakeAssistantMessage(overrides?)` — `AssistantMessage` with zero usage and stop reason.
+- `createScriptedProvider({ responses })` — `ProviderAdapter` that emits a derived event sequence per `AssistantMessage` in `responses` on successive `stream()` calls. `stopReason` of `error` or `aborted` produces an `error` terminal event; otherwise `done`.
+- `createScriptedSSEResponse(events)` — `Response` whose body serializes each `AgentEvent` as one SSE frame (`data: <json>\n\n`).
+- `parseSSEStream(stream)` — async iterable that parses `AgentEvent` SSE frames from a `ReadableStream<Uint8Array>`. Handles split chunks, multi-line `data:`, comment lines, event-type framing, trailing newlines, and mid-event abort (incomplete trailing event is dropped, per SSE spec).
+
+## Deferred
+
+`runRunStoreContractTests(makeStore)` lands with Phase 1.2 alongside the
+`RunStore` interface. Adding it now would be dead scaffolding.
+
+## Adding a helper
+
+Promote a helper to `tools/test/` only when a third package needs the same
+idiom. Duplication of a 30-line scripted helper across two packages is fine;
+duplicating across three is the trigger for promotion.
diff --git a/tools/test/fixtures.ts b/tools/test/fixtures.ts
new file mode 100644
index 0000000..003fa2b
--- /dev/null
+++ b/tools/test/fixtures.ts
@@ -0,0 +1,40 @@
+import type { AssistantMessage, ModelDescriptor, Usage } from 'agentic-kit';
+
+const ZERO_USAGE: Usage = {
+  input: 0,
+  output: 0,
+  cacheRead: 0,
+  cacheWrite: 0,
+  totalTokens: 0,
+  cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+};
+
+export function makeFakeModel(overrides: Partial<ModelDescriptor> = {}): ModelDescriptor {
+  return {
+    id: 'demo',
+    name: 'Demo',
+    api: 'fake-api',
+    provider: 'fake',
+    baseUrl: 'http://fake.local',
+    input: ['text'],
+    reasoning: false,
+    tools: true,
+    ...overrides,
+  };
+}
+
+export function makeFakeAssistantMessage(
+  overrides: Partial<AssistantMessage> = {}
+): AssistantMessage {
+  return {
+    role: 'assistant',
+    api: 'fake-api',
+    provider: 'fake',
+    model: 'demo',
+    usage: { ...ZERO_USAGE, cost: { ...ZERO_USAGE.cost } },
+    stopReason: 'stop',
+    timestamp: Date.now(),
+    content: [{ type: 'text', text: '' }],
+    ...overrides,
+  };
+}
diff --git a/tools/test/index.ts b/tools/test/index.ts
new file mode 100644
index 0000000..4c8ab2e
--- /dev/null
+++ b/tools/test/index.ts
@@ -0,0 +1,3 @@
+export { makeFakeAssistantMessage, makeFakeModel } from './fixtures';
+export { createScriptedProvider, type ScriptedProviderOptions } from './scripted-provider';
+export { createScriptedSSEResponse, parseSSEStream } from './scripted-sse';
diff --git a/tools/test/scripted-provider.ts b/tools/test/scripted-provider.ts
new file mode 100644
index 0000000..da43921
--- /dev/null
+++ b/tools/test/scripted-provider.ts
@@ -0,0 +1,115 @@
+import {
+  type AssistantMessage,
+  type AssistantMessageEvent,
+  createAssistantMessageEventStream,
+  type ModelDescriptor,
+  type ProviderAdapter,
+} from 'agentic-kit';
+
+import { makeFakeAssistantMessage, makeFakeModel } from './fixtures';
+
+export interface ScriptedProviderOptions {
+  responses: AssistantMessage[];
+  delayMs?: number;
+  api?: string;
+  provider?: string;
+}
+
+export function createScriptedProvider(opts: ScriptedProviderOptions): ProviderAdapter {
+  const api = opts.api ?? 'fake-api';
+  const provider = opts.provider ?? 'fake';
+  let callIndex = 0;
+
+  return {
+    api,
+    provider,
+    createModel: (modelId: string, overrides?: Partial<ModelDescriptor>) =>
+      makeFakeModel({ id: modelId, api, provider, ...overrides }),
+    stream: () => {
+      const stream = createAssistantMessageEventStream();
+      const message =
+        opts.responses[callIndex++] ??
+        makeFakeAssistantMessage({
+          api,
+          provider,
+          stopReason: 'error',
+          errorMessage: 'scripted provider: no response queued for this call',
+          content: [],
+        });
+
+      const events = deriveEventSequence(message);
+      const emit = () => {
+        for (const event of events) {
+          stream.push(event);
+        }
+        stream.end(message);
+      };
+
+      if (opts.delayMs && opts.delayMs > 0) {
+        setTimeout(emit, opts.delayMs);
+      } else {
+        queueMicrotask(emit);
+      }
+
+      return stream;
+    },
+  };
+}
+
+function deriveEventSequence(message: AssistantMessage): AssistantMessageEvent[] {
+  const events: AssistantMessageEvent[] = [];
+  events.push({ type: 'start', partial: message });
+
+  for (let i = 0; i < message.content.length; i++) {
+    const block = message.content[i];
+    if (block.type === 'text') {
+      events.push({ type: 'text_start', contentIndex: i, partial: message });
+      if (block.text.length > 0) {
+        events.push({
+          type: 'text_delta',
+          contentIndex: i,
+          delta: block.text,
+          partial: message,
+        });
+      }
+      events.push({
+        type: 'text_end',
+        contentIndex: i,
+        content: block.text,
+        partial: message,
+      });
+    } else if (block.type === 'thinking') {
+      events.push({ type: 'thinking_start', contentIndex: i, partial: message });
+      if (block.thinking.length > 0) {
+        events.push({
+          type: 'thinking_delta',
+          contentIndex: i,
+          delta: block.thinking,
+          partial: message,
+        });
+      }
+      events.push({
+        type: 'thinking_end',
+        contentIndex: i,
+        content: block.thinking,
+        partial: message,
+      });
+    } else if (block.type === 'toolCall') {
+      events.push({ type: 'toolcall_start', contentIndex: i, partial: message });
+      events.push({
+        type: 'toolcall_end',
+        contentIndex: i,
+        toolCall: block,
+        partial: message,
+      });
+    }
+  }
+
+  if (message.stopReason === 'error' || message.stopReason === 'aborted') {
+    events.push({ type: 'error', reason: message.stopReason, error: message });
+  } else {
+    events.push({ type: 'done', reason: message.stopReason, message });
+  }
+
+  return events;
+}
diff --git a/tools/test/scripted-sse.ts b/tools/test/scripted-sse.ts
new file mode 100644
index 0000000..ae7497e
--- /dev/null
+++ b/tools/test/scripted-sse.ts
@@ -0,0 +1,88 @@
+import type { AgentEvent } from '@agentic-kit/agent';
+
+export function createScriptedSSEResponse(events: AgentEvent[]): Response {
+  const encoder = new TextEncoder();
+  const body = new ReadableStream<Uint8Array>({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(encoder.encode(`data: ${JSON.stringify(event)}\n\n`));
+      }
+      controller.close();
+    },
+  });
+
+  return new Response(body, {
+    status: 200,
+    headers: {
+      'Content-Type': 'text/event-stream',
+      'Cache-Control': 'no-cache',
+      Connection: 'keep-alive',
+    },
+  });
+}
+
+export async function* parseSSEStream(
+  stream: ReadableStream<Uint8Array>
+): AsyncIterable<AgentEvent> {
+  const reader = stream.getReader();
+  const decoder = new TextDecoder('utf-8');
+  let buffer = '';
+
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) {
+        break;
+      }
+
+      buffer += decoder.decode(value, { stream: true });
+      buffer = buffer.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
+
+      let blankIdx = buffer.indexOf('\n\n');
+      while (blankIdx !== -1) {
+        const rawEvent = buffer.slice(0, blankIdx);
+        buffer = buffer.slice(blankIdx + 2);
+        const event = parseEvent(rawEvent);
+        if (event) {
+          yield event;
+        }
+        blankIdx = buffer.indexOf('\n\n');
+      }
+    }
+  } finally {
+    reader.releaseLock();
+  }
+}
+
+function parseEvent(raw: string): AgentEvent | null {
+  const dataLines: string[] = [];
+  for (const line of raw.split('\n')) {
+    if (line === '' || line.startsWith(':')) {
+      continue;
+    }
+    const colon = line.indexOf(':');
+    const field = colon === -1 ? line : line.slice(0, colon);
+    let value = colon === -1 ? '' : line.slice(colon + 1);
+    if (value.startsWith(' ')) {
+      value = value.slice(1);
+    }
+    if (field === 'data') {
+      dataLines.push(value);
+    }
+  }
+
+  if (dataLines.length === 0) {
+    return null;
+  }
+
+  const data = dataLines.join('\n');
+  if (data === '[DONE]') {
+    return null;
+  }
+
+  try {
+    return JSON.parse(data) as AgentEvent;
+  } catch {
+    return null;
+  }
+}