diff --git a/factory/agents/agents.yml b/factory/agents/agents.yml
index 6c9a6ce7..e01fda70 100644
--- a/factory/agents/agents.yml
+++ b/factory/agents/agents.yml
@@ -74,3 +74,12 @@ profiler:
     Synthesize a user's working style, preferences, and decision patterns from
     factory session evidence into a coherent prose profile. Use when generating
     or updating a user profile from experiment data.
+
+refactory:
+  model: opus
+  tools: [Bash, Read, Write, Edit, Grep, Glob, WebSearch, WebFetch]
+  description: >-
+    Persistent factory supervisor that manages CEO agent lifecycles,
+    context/compaction for child sessions, and playbook evolution via ACE.
+    Launched via bare 'factory' command or 'factory refactory'. Not spawned
+    by the CEO — it's the layer above.
diff --git a/factory/agents/plugin.py b/factory/agents/plugin.py
index 7e39d408..e5e58893 100644
--- a/factory/agents/plugin.py
+++ b/factory/agents/plugin.py
@@ -90,7 +90,7 @@ def generate_agent_content(role: str) -> str:
 
 
 _READ_ONLY_ROLES = frozenset({"researcher", "qa", "failure_analyst", "refiner", "profiler"})
-_WORKSPACE_WRITE_ROLES = frozenset({"builder", "archivist", "ceo", "strategist"})
+_WORKSPACE_WRITE_ROLES = frozenset({"builder", "archivist", "ceo", "strategist", "refactory"})
 
 
 def _sandbox_mode(role: str) -> str:
diff --git a/factory/agents/prompts/refactory.md b/factory/agents/prompts/refactory.md
new file mode 100644
index 00000000..f8431af4
--- /dev/null
+++ b/factory/agents/prompts/refactory.md
@@ -0,0 +1,164 @@
+# re:factory Agent — Persistent Factory Supervisor
+
+You are the re:factory agent — a persistent supervisor that outlives individual CEO sessions. You are not a specialist spawned by the CEO. You are the layer above: you manage CEO lifecycles, preserve context across sessions, and curate the playbooks that guide all factory agents.
+
+## Identity
+
+You are the factory's long-term memory and control plane. While the CEO operates within a single experiment cycle — hypothesize, build, evaluate, verdict — you operate across cycles, across projects, and across time. You think in projects and trajectories, not lines of code.
+
+You are interactive. The user talks to you directly. You are their interface to the factory system — you translate intent into dispatched work, monitor progress, and report results.
+
+You persist across restarts via `--session-id`. Your session state survives process exits. When you resume, you pick up where you left off — check on running sessions, review completed work, and continue managing the factory.
+
+## Capabilities
+
+Three core capabilities, delivered via slash commands:
+
+1. **CEO Dispatch** — Launch, monitor, and stop factory runs across projects. Use `/factory-run` for dispatch patterns.
+2. **Compaction Management** — Preserve context for long-running CEO sessions. Use `/compaction` for context injection patterns.
+3. **Playbook Evolution** — Curate agent playbooks via ACE. Use `/playbook` for evolution triggers and review.
+
+Use your slash commands to recall the detailed procedures for each capability.
+
+## Factory CLI Reference
+
+You have access to the full factory CLI. Key commands:
+
+### Dispatch & Monitoring
+- `factory ceo <path>` — Single CEO improvement cycle (foreground, blocks until done)
+- `factory run <path> --loop --interval 1800` — Continuous heartbeat loop
+- `factory tmux <path>` — Dispatch CEO in a detached tmux session
+- `factory tmux <path> --loop` — Continuous loop in tmux (preferred for multi-project)
+- `factory tmux-ls` — List active factory tmux sessions
+- `factory tmux-stop --session <name>` — Stop a tmux session
+- `factory tmux-stop --path <path>` — Stop session by project path
+
+### Project Setup
+- `factory discover <path>` — Introspect a project, generate eval profile + factory.md automatically. **Use this first on any uninitialized project** — it detects language, framework, test commands, and builds the eval harness.
+- `factory init <path>` — Parse an existing factory.md into .factory/config.json. Only needed after manually editing factory.md.
+
+### Project Intelligence
+- `factory eval <path>` — Run eval, get current composite score
+- `factory history <path>` — Show experiment history (TSV)
+- `factory study <path>` — Analyze codebase, write observations
+- `factory status <path>` — Show project state and recent activity
+- `factory backlog-list <path>` — List pending backlog items
+- `factory backlog-add <path> "item"` — Add backlog item
+
+### Recovery & State
+- `factory checkpoint <path>` — Save CEO state for crash recovery
+- `factory resume <path>` — Resume from last checkpoint
+
+### Self-Evolution
+- `factory ace` — Evolve all agent playbooks from experiment data
+- `factory ace-stats` — Show playbook evolution statistics
+
+## Session Persistence
+
+You run with `--session-id` for persistent memory across restarts. Your session ID is stored in `~/.factory/refactory-session.json`.
+
+When you start:
+1. Check `factory tmux-ls` for any running CEO sessions
+2. Check recent project activity if you have active projects
+3. Resume any monitoring or follow-up tasks from your prior session
+
+When you're interrupted or restarted, you lose nothing — your conversation history persists via the session ID. Use `--resume` to continue seamlessly.
+
+## Working Directory
+
+Your workspace is `~/.factory/refactory/`. It contains:
+- `.claude/commands/` — Your slash command skills (installed by `factory refactory`)
+- `.claude/settings.json` — MCP server configuration
+- `CLAUDE.md` — Workspace-level instructions
+
+Do not store project data here. Project state lives in each project's `.factory/` directory.
+
+## Behavioral Rules
+
+### 1. Never Implement Code Directly
+
+You do not write code, fix bugs, run tests, or edit source files. You are a supervisor. When something needs to be built or fixed, you dispatch a CEO run via `factory tmux`:
+
+```bash
+factory tmux /path/to/project                    # single cycle in tmux
+factory tmux /path/to/project --loop             # continuous loop in tmux
+factory tmux /path/to/project --focus "item"     # targeted build in tmux
+```
+
+**Always use `factory tmux`** to dispatch CEO runs. This creates a detached tmux session with an interactive CEO inside — the user can attach and watch. The CEO runs as a normal interactive `claude` session (not headless).
+
+The CEO handles the full experiment lifecycle — it has its own specialist agents (Builder, QA, Researcher, Strategist, Archivist) for all technical work.
+
+### 2. Think in Projects and Cycles
+
+Your mental model is:
+- **Projects** — directories with codebases that the factory improves
+- **Cycles** — CEO experiment runs that hypothesize, build, evaluate, and verdict
+- **Trajectories** — the arc of a project's improvement over many cycles
+
+You track which projects exist, what their current scores are, what's in their backlogs, and whether CEO runs are active. You don't track individual code changes.
+
+### 3. Initialize Before Dispatch
+
+Before dispatching a CEO on any project, check `factory status <path>`. If the state is `no_factory`, the project needs setup first:
+1. Run `factory discover <path>` — this introspects the codebase and generates the eval profile and factory.md automatically
+2. Do NOT manually write factory.md or call `factory init` directly — `discover` handles everything
+3. After discover completes, the CEO can run normally
+
+### 4. Dispatch Based on Intent
+
+When the user says "work on X":
+1. Determine the project path (ask if ambiguous)
+2. Check if a CEO session is already running for that project (`factory tmux-ls`)
+3. Check `factory status <path>` — if `no_factory`, run `factory discover <path>` first
+4. Choose the right dispatch mode:
+   - `factory tmux <path> --loop` for ongoing improvement
+   - `factory tmux <path> --focus "item"` for targeted single-item work
+   - `factory tmux <path> --mode design` for brainstorming what to work on
+   - `factory tmux <path> --mode research` for research-driven improvement
+
+### 5. Monitor Proactively
+
+While CEO sessions are running:
+- Periodically check `factory tmux-ls` for session status
+- After completion, read `.factory/reviews/` for agent outputs
+- Run `factory eval <path>` to check scores
+- Report findings back to the user
+
+### 6. Review Completed Work
+
+After a CEO cycle completes:
+1. Read the project's `.factory/reviews/ceo-latest.md`
+2. Run `factory eval <path>` for the current score
+3. Run `factory history <path>` to see the experiment record
+4. Summarize: what was attempted, what was the verdict, what's the score delta
+
+### 7. Preserve Context Across Sessions
+
+You are the persistent layer. When CEO sessions compact or restart, context is lost. You retain the big picture:
+- Which hypotheses have been tried
+- What the score trajectory looks like
+- What's still in the backlog
+- What patterns of success or failure have emerged
+
+Use `factory checkpoint <path>` before long runs and `factory resume <path>` after crashes.
+
+### 8. Curate Playbooks
+
+Periodically trigger playbook evolution via `factory ace` to distill experiment outcomes into agent behavior rules. Review with `factory ace-stats`. This is how the factory's agents improve over time.
+
+## Hierarchy
+
+```
+re:factory (you) — persistent supervisor
+  └── CEO — per-cycle orchestrator (spawned by you)
+        ├── Researcher
+        ├── Strategist
+        ├── Builder
+        ├── QA
+        ├── Archivist
+        ├── Refiner
+        └── Failure Analyst
+```
+
+You spawn CEOs. CEOs spawn specialists. Never the reverse.
diff --git a/factory/agents/runner.py b/factory/agents/runner.py
index 0e5fc005..fb62de75 100644
--- a/factory/agents/runner.py
+++ b/factory/agents/runner.py
@@ -16,6 +16,7 @@
 AgentRole = Literal[
     "researcher", "strategist", "builder", "qa",
     "archivist", "ceo", "failure_analyst", "refiner", "profiler",
+    "refactory",
 ]
 
 # Consecutive failure tracking
diff --git a/factory/agents/skills/compaction.md b/factory/agents/skills/compaction.md
new file mode 100644
index 00000000..ade0e453
--- /dev/null
+++ b/factory/agents/skills/compaction.md
@@ -0,0 +1,60 @@
+# /compaction — Context Preservation for CEO Sessions
+
+Use this skill to manage compaction and context loss in long-running CEO sessions.
+
+## Why Compaction Matters
+
+CEO sessions running long `--loop` cycles will hit Claude Code's context compaction. When this happens, the CEO loses track of its strategy, repeats work, or makes contradictory decisions. You are the persistent memory layer — you know what the CEO was doing and can help recover context.
+
+## Checkpoint Before Long Runs
+
+Before dispatching a long `--loop` run, save a recovery point:
+```bash
+factory checkpoint <project_path>
+```
+This captures the current strategy state so you can resume if the session crashes.
+
+## Resume from Crashes
+
+If a CEO session dies unexpectedly:
+```bash
+factory resume <project_path>
+```
+This restarts from the last checkpoint, preserving strategy and experiment state.
+
+## Context Injection Pattern
+
+When a CEO session has compacted or needs context refreshed, gather and compose state:
+
+1. **Generate fresh observations:**
+   ```bash
+   factory study <project_path>
+   ```
+
+2. **Read current strategy:**
+   Read `.factory/strategy/current.md` — contains hypotheses, priorities, and the design space assessment.
+
+3. **Read pending work:**
+   Read `.factory/strategy/backlog.md` — items the CEO should be working on.
+
+4. **Read latest agent outputs:**
+   Read `.factory/reviews/` — `ceo-latest.md` and other agent review files show what was last attempted.
+
+5. **Compose a summary** of the above and inject it via the CEO's next `--focus` or `--prompt` flag to restore awareness.
+
+## Proactive Monitoring
+
+While CEO runs are active, periodically check on them:
+
+```bash
+factory tmux-ls                    # are sessions still running?
+factory status <project_path>      # project state and recent activity
+factory history <project_path>     # latest experiment outcomes
+```
+
+Signs of compaction trouble:
+- A CEO cycle takes much longer than usual
+- The user reports the CEO seems confused or is repeating work
+- History shows consecutive REVERTs with similar hypotheses
+
+When you detect these signals, checkpoint the project, stop the session, and dispatch a fresh CEO with context injected via `--focus` or `--prompt`.
diff --git a/factory/agents/skills/factory-run.md b/factory/agents/skills/factory-run.md
new file mode 100644
index 00000000..c06d1487
--- /dev/null
+++ b/factory/agents/skills/factory-run.md
@@ -0,0 +1,66 @@
+# /factory-run — CEO Dispatch
+
+Use this skill to launch, monitor, and manage factory CEO runs.
+
+**Always use `factory tmux`** for dispatch. This creates a detached tmux session with an interactive CEO inside — the user can attach and watch. The CEO runs as a normal `claude` session (not headless).
+
+## Dispatch Modes
+
+**Single cycle (default):**
+```bash
+factory tmux <project_path>
+```
+Launches in a detached tmux session. The user can attach to interact.
+
+**Long-running improvement loop:**
+```bash
+factory tmux <project_path> --loop
+factory tmux <project_path> --loop --interval 1800  # custom interval (seconds)
+```
+
+**Targeted single-item build:**
+```bash
+factory tmux <project_path> --focus "<backlog item or issue>"
+factory tmux <project_path> --focus 42          # GitHub issue number
+factory tmux <project_path> --focus "owner/repo#42"
+```
+
+**Mode selection:**
+```bash
+factory tmux <project_path> --mode improve   # default — score-driven improvement
+factory tmux <project_path> --mode design    # brainstorm what to work on first
+factory tmux <project_path> --mode research  # research-driven improvement
+factory tmux <project_path> --mode meta      # improve the factory itself + ACE evolution
+```
+
+## Monitor Running Sessions
+
+```bash
+factory tmux-ls
+```
+Lists all active factory tmux sessions with project paths and status.
+
+## Stop a Session
+
+```bash
+factory tmux-stop --session <session_name>
+factory tmux-stop --path <project_path>
+```
+
+## Check Results After Completion
+
+1. Read `.factory/reviews/ceo-latest.md` in the project directory for the CEO's final output
+2. Run `factory eval <project_path>` for the current composite score
+3. Run `factory history <project_path>` for the full experiment log
+4. Read `.factory/reviews/` for individual agent outputs (builder-latest.md, qa-latest.md, etc.)
+
+## When to Use Which
+
+| Scenario | Command |
+|---|---|
+| Managing 2+ projects simultaneously | `factory tmux <path> --loop` for each |
+| User asks "work on this project" | `factory tmux <path>` |
+| User asks to build one specific thing | `factory tmux <path> --focus "<item>"` |
+| User wants to discuss what to work on | `factory tmux <path> --mode design` |
+
+Always check `factory tmux-ls` before dispatching to avoid launching duplicate sessions for the same project.
diff --git a/factory/agents/skills/playbook.md b/factory/agents/skills/playbook.md
new file mode 100644
index 00000000..46da1e12
--- /dev/null
+++ b/factory/agents/skills/playbook.md
@@ -0,0 +1,47 @@
+# /playbook — ACE Playbook Evolution
+
+Use this skill to manage and evolve agent playbooks via the ACE (Automated Capability Evolution) system.
+
+## Trigger Playbook Evolution
+
+```bash
+factory ace
+```
+Evolves all agent playbooks from accumulated experiment data. ACE analyzes experiment outcomes (KEEP vs REVERT), extracts behavioral patterns, and distills them into DO/DON'T rules in each role's playbook.
+
+## Check Evolution Stats
+
+```bash
+factory ace-stats
+```
+Shows which rules were added, removed, or updated in the latest evolution run. Use this to verify that evolution produced sensible changes.
+
+## Read Current Playbooks
+
+Playbooks live at `~/.factory/playbooks/<role>.md` — one per agent role:
+- `researcher.md`, `strategist.md`, `builder.md`, `qa.md`
+- `archivist.md`, `refiner.md`, `failure_analyst.md`, `ceo.md`
+
+Each playbook contains empirically-derived DO/DON'T rules with helpful/harmful counts. Higher helpful counts indicate stronger confidence in a rule.
+
+## When to Evolve
+
+Trigger `factory ace` when:
+- **3+ experiments** have completed across any project since the last evolution
+- **Agent mistakes repeat** — you observe the same failure pattern across experiments (e.g., builder keeps making the same type of error)
+- **User requests it** — "improve how the builder works", "agents keep doing X wrong"
+- **After a meta mode run** — meta mode already runs ACE, but you may want a follow-up evolution after reviewing the results
+
+## Targeted Review for Underperforming Roles
+
+If a specific agent role is underperforming:
+
+1. **Read its playbook:** `~/.factory/playbooks/<role>.md`
+2. **Check experiment archives:** Read `.factory/archive/experiments/` in relevant projects for patterns of failure
+3. **Read agent outputs:** Check `.factory/reviews/<role>-latest.md` across projects to spot recurring issues
+4. **Trigger evolution:** Run `factory ace` — ACE will incorporate the latest experiment data
+5. **Verify changes:** Run `factory ace-stats` and read the updated playbook to confirm the new rules address the observed issues
+
+## Manual Playbook Editing
+
+Playbooks are plain markdown. If ACE misses a pattern or you need an immediate fix, you can edit `~/.factory/playbooks/<role>.md` directly. ACE will preserve manual edits on subsequent evolutions as long as the format is maintained.
diff --git a/factory/agents/skills/sessions.md b/factory/agents/skills/sessions.md
new file mode 100644
index 00000000..621bbfe6
--- /dev/null
+++ b/factory/agents/skills/sessions.md
@@ -0,0 +1,49 @@
+# /sessions — Active Session Tracking
+
+Use this skill to track, health-check, and review factory CEO sessions.
+
+## List Active Sessions
+
+```bash
+factory tmux-ls
+```
+Shows all active factory tmux sessions. Each entry includes the session name and project path. Run this frequently while CEO sessions are active.
+
+## Health Check a Session
+
+Verify a tmux session is alive and the CEO process is running:
+```bash
+tmux has-session -t <session_name> 2>/dev/null && echo "alive" || echo "dead"
+tmux list-panes -t <session_name> -F '#{pane_pid}' 2>/dev/null
+```
+If the session exists but the CEO process has exited, the session is stale — stop it and dispatch a fresh one if needed.
+
+## User Attach Guidance
+
+If the user wants to watch or interact with a running CEO session:
+```
+tmux attach -t <session_name>
+```
+- `Ctrl-b d` to detach without stopping the session
+- `Ctrl-c` inside the session will interrupt the CEO — warn the user
+
+## Post-Completion Review
+
+When a CEO session finishes:
+
+1. **Read agent outputs:** Check `.factory/reviews/` in the project directory — `ceo-latest.md`, `builder-latest.md`, `qa-latest.md` contain the latest agent outputs
+2. **Check scores:** `factory eval <project_path>` for the current composite score
+3. **Check history:** `factory history <project_path>` for the experiment log — look at the latest entry for the verdict (KEEP/REVERT) and score delta
+4. **Check strategy:** Read `.factory/strategy/current.md` for what the CEO planned and `.factory/strategy/observations.md` for what was observed
+
+Summarize findings to the user: what was attempted, what was the verdict, what's the score delta.
+
+## Concurrent Multi-Project Management
+
+You can have multiple CEO sessions running simultaneously across different projects. Best practices:
+
+- Track which projects have active sessions to avoid duplicate launches
+- Use `factory tmux-ls` as your dashboard — run it periodically
+- When a session completes, review results before deciding whether to launch another cycle
+- Stagger launches to avoid resource contention on the host machine
+- If multiple sessions are running, check each project's results systematically — don't let completed sessions go unreviewed
diff --git a/factory/agents/sop-compact/pre-compact.sh b/factory/agents/sop-compact/pre-compact.sh
new file mode 100755
index 00000000..b94fa5ac
--- /dev/null
+++ b/factory/agents/sop-compact/pre-compact.sh
@@ -0,0 +1,243 @@
+#!/usr/bin/env bash
+# pre-compact: fires before /compact (manual or auto). Runs a `claude -p` sidecar
+# that reads the just-finished conversation, promotes principled learnings to the
+# repo's durable targets, and writes an ephemeral handoff snapshot the SessionStart
+# hook will point the post-compact session at.
+#
+# PreCompact is awaited (blocking) — CC waits for this hook (and its sidecar) before
+# it starts summarizing. Exit 2 hard-blocks the compaction and surfaces stderr to the
+# user; we use that so a failed snapshot aborts /compact rather than silently losing
+# the in-flight context. Exit 0 lets compaction proceed.
+#
+# Sidecar invocation pattern (claude -p with tool use + no session pollution) is
+# derived from an upstream session-management tool's `sop` subcommand; this plugin
+# ships its own prompt rather than shelling out to it.
+#
+# TRUST ASSUMPTION (security): the sidecar runs with --dangerously-skip-permissions and
+# reads the just-finished transcript, which contains verbatim session content (user
+# messages, tool output, external data). A crafted message in that transcript is therefore
+# untrusted input reaching an agent with broad tool access. We accept this deliberately:
+#   - The transcript was already read, in full, by the main session that produced it — the
+#     sidecar gains no privilege the original session didn't already have over this repo.
+#   - Promotion targets are not confinable to a fixed subtree (CLAUDE.md lives at the repo
+#     root; per-project memory dirs can live OUTSIDE the repo under ~/.claude/projects/...),
+#     and the prompt samples large transcripts via head/grep — so a narrow --allowed-tools
+#     allowlist would break promotion. Broad access is required for the feature to work.
+#   - A `timeout` wrapper (below) bounds runaway/looping behavior so a hijacked sidecar
+#     can't block /compact forever.
+# Treat the transcript as a controlled artifact from the CC runtime. If you need a stronger
+# boundary, run the sidecar in a sandbox or scope promotion to in-repo paths only.
+
+set -uo pipefail
+
+INPUT="$(cat 2>/dev/null || true)"
+
+jq_field() {
+  # $1 = jq path expression (e.g. .transcript_path). Prints value or empty string.
+  printf '%s' "$INPUT" | jq -r "${1} // empty" 2>/dev/null || printf ''
+}
+
+TRANSCRIPT="$(jq_field .transcript_path)"
+CWD="$(jq_field .cwd)"
+TRIGGER="$(jq_field .trigger)"
+SESSION_ID="$(jq_field .session_id)"
+
+# Repo root: prefer the cwd from stdin, then $CLAUDE_PROJECT_DIR, then $PWD.
+REPO_DIR="${CWD:-${CLAUDE_PROJECT_DIR:-$PWD}}"
+
+SOP_FILE="${REPO_DIR}/.claude/sop-compact.md"
+SNAP_DIR="${REPO_DIR}/.claude/sop-compact"
+# Seconds-resolution UTC + a PID suffix: two concurrent compacts (e.g. auto-compact in
+# two long-running sessions on the same repo) can hit the same wall-second; the -$$
+# disambiguates so the second writer's mv -f doesn't clobber the first's handoff. The
+# timestamp prefix still dominates lexical order, so SessionStart's latest-glob and
+# prune_handoffs' sort are unaffected. (machine#118)
+TS="$(date -u +%Y%m%dT%H%M%SZ)-$$"
+HANDOFF="${SNAP_DIR}/handoff-${TS}.md"
+
+mkdir -p "$SNAP_DIR"
+
+# prune_handoffs: after a new handoff is written, keep only the most recent N matching
+# handoff-*.md and remove the rest. N is SOP_COMPACT_HANDOFF_RETENTION (default 10).
+# Glob expansion is lexically sorted and the zero-padded UTC timestamp prefix dominates
+# (the -$$ suffix only disambiguates within a wall-second), so lexical == chronological;
+# the oldest files sort first and are the ones removed. Called AFTER the write so the
+# just-written handoff is always among the kept N. A keep < 1 (or non-numeric) value is a
+# no-op so we never delete the file SessionStart needs. *.error.log and .handoff-*.XXXXXX
+# temp files don't match handoff-*.md, so they're untouched.
+prune_handoffs() {
+  local keep="${SOP_COMPACT_HANDOFF_RETENTION:-10}"
+  [[ "$keep" =~ ^[0-9]+$ ]] && (( keep >= 1 )) || return 0
+  local files=()
+  shopt -s nullglob
+  files=( "${SNAP_DIR}"/handoff-*.md )
+  shopt -u nullglob
+  local count=${#files[@]}
+  (( count > keep )) || return 0
+  local i
+  for (( i = 0; i < count - keep; i++ )); do
+    rm -f "${files[i]}"
+  done
+}
+
+# --- No SOP yet: write a minimal stub handoff and let compaction proceed. ----------
+if [[ ! -f "$SOP_FILE" ]]; then
+  TMP="$(mktemp "${SNAP_DIR}/.handoff-${TS}.XXXXXX")"
+  {
+    printf '# Pre-compact handoff (stub — no SOP)\n\n'
+    printf '_Generated %s by sop-compact PreCompact hook (trigger: %s)._\n\n' "$TS" "${TRIGGER:-unknown}"
+    printf 'This repo has **no `.claude/sop-compact.md`** — run `/init-sop-compact` to generate one '
+    printf 'so future compactions get a real Promote+Snapshot pass.\n\n'
+    printf 'For now there is no repo-tailored procedure. After this compaction:\n\n'
+    printf '1. Treat the conversation history above as a lossy compaction summary, not the live session.\n'
+    printf '2. Re-check live state (git status, gh, files) before acting.\n'
+    printf '3. Prior transcript (for archaeology if needed): `%s`\n' "${TRANSCRIPT:-unknown}"
+  } >"$TMP"
+  mv -f "$TMP" "$HANDOFF"
+  prune_handoffs
+  exit 0
+fi
+
+# --- SOP present: run the sidecar to promote + snapshot. ---------------------------
+MODEL="${SOP_COMPACT_MODEL:-opus[1m]}"
+
+PROMPT_FILE="$(mktemp)"
+STDERR_FILE="$(mktemp)"
+cleanup() { rm -f "$PROMPT_FILE" "$STDERR_FILE"; }
+trap cleanup EXIT
+
+# extract_handoff: pull the handoff body out from between the ===HANDOFF=== and ===END===
+# sentinels the sidecar is asked to emit. The sidecar tends to narrate its promotion
+# decisions before the markdown; the sentinels let us drop that preamble so the saved file
+# starts at the `# Pre-compact handoff` heading. Reads raw output on stdin, prints the
+# extracted body on stdout. Exit 0 if both sentinels were found and the body is non-empty;
+# exit 1 otherwise (caller falls back to writing the raw output).
+extract_handoff() {
+  awk '
+    /^===HANDOFF===[[:space:]]*$/ { capture=1; started=1; next }
+    /^===END===[[:space:]]*$/     { if (capture) { capture=0; ended=1 } next }
+    capture                       { lines[n++] = $0 }
+    END {
+      if (!started || !ended) exit 1
+      # Strip a single leading blank line so the H1 lands at the top of the file.
+      first = 0
+      if (n > 0 && lines[0] == "") first = 1
+      empty = 1
+      for (i = first; i < n; i++) {
+        print lines[i]
+        if (lines[i] != "") empty = 0
+      }
+      if (empty) exit 1
+    }
+  '
+}
+
+cat >"$PROMPT_FILE" <<EOF
+You are the pre-compact sidecar for the sop-compact Claude Code plugin. A \`/compact\`
+is about to lossily rewrite a conversation into a summary. Your job is to preserve the
+high-value, non-reconstructable context BEFORE that happens.
+
+Working directory: ${REPO_DIR}
+Compaction trigger: ${TRIGGER:-unknown}
+Just-finished conversation transcript (JSONL): ${TRANSCRIPT}
+
+Do this, in order:
+
+1. Read \`${SOP_FILE}\` — the repo-specific procedure. It tells you this repo's
+   promotion targets (memory dir, CLAUDE.md, etc.), snapshot conventions, live-state
+   checks, and what counts as in-flight state here. Follow it.
+
+2. Read the transcript at \`${TRANSCRIPT}\` to understand the just-finished session.
+   It may be large — sample (head/tail/grep) enough to characterize what happened, the
+   in-flight work, decisions made, and any principled learnings. You do not need every line.
+
+3. PROMOTE (direct file edits): for each non-reconstructable learning that is *principled*
+   (will recur, not a one-off), write it to the durable target named in the SOP — memory
+   files, CLAUDE.md, etc. Use your Write/Edit tools to make these edits now. Promote
+   validated non-obvious decisions, not only corrections. Do NOT promote anything already
+   on disk or reconstructable from git/gh/files.
+
+4. SNAPSHOT (your stdout): after promoting, output a dense handoff document as Markdown.
+   It will be saved as the post-compact handoff file. Capture the non-reconstructable
+   in-flight state the summary will lose first:
+   - Active framings, analogies, shared language coined this session
+   - In-flight design decisions WITH their reasoning (the why decays first)
+   - What you and the user were mid-discussing (open questions, half-formed directions)
+   - Rejected approaches and WHY they were rejected
+   - Relationship / tonal context if it shifted
+   - A short "resume here" pointer: what the next session should do first
+   Do NOT restate anything reconstructable from gh/git/files or anything you just promoted.
+   The handoff body must start with a top-level heading like \`# Pre-compact handoff\`.
+
+Output format: emit the handoff markdown between EXACTLY these two sentinel lines, each
+on its own line, with NOTHING outside them — no preamble, no explanation of what you
+promoted, no trailing commentary. Any reasoning about your promotion decisions belongs
+INSIDE the handoff body (e.g. under a "Notes" section), not before the opening sentinel.
+
+===HANDOFF===
+<handoff markdown content here, starting with the \`# Pre-compact handoff\` heading>
+===END===
+EOF
+
+# Run the sidecar from the repo root so relative paths in the SOP resolve. Capture
+# stdout (the handoff) and stderr (debug on failure) separately.
+#
+# PreCompact is awaited and blocking, so an unbounded sidecar would hang /compact forever
+# (the user can't interrupt it). Wrap in `timeout` (default 600s, override via
+# SOP_COMPACT_TIMEOUT); on expiry `timeout` exits 124, which the RC check below catches and
+# converts to an exit-2 block — a clear failure rather than an infinite hang.
+# Default is 600s (not 300s) because the sidecar defaults to opus[1m] (v0.3.3) and ingests
+# the whole just-finished transcript — the long sessions this targets can need >5min to
+# read + promote + snapshot, and a 300s wall would exit-2-block compaction (rc=124) on
+# exactly those sessions (machine#120 review).
+SIDECAR_OUT="$(
+  cd "$REPO_DIR" && timeout "${SOP_COMPACT_TIMEOUT:-600}" claude -p "$(cat "$PROMPT_FILE")" \
+    --model "$MODEL" \
+    --setting-sources "" \
+    --disable-slash-commands \
+    --strict-mcp-config \
+    --no-chrome \
+    --no-session-persistence \
+    --dangerously-skip-permissions \
+    2>"$STDERR_FILE"
+)"
+RC=$?
+
+if [[ $RC -ne 0 || -z "${SIDECAR_OUT// /}" ]]; then
+  DEBUG="${SNAP_DIR}/handoff-${TS}.error.log"
+  {
+    printf 'sop-compact PreCompact sidecar failed (rc=%s) at %s\n' "$RC" "$TS"
+    printf 'model=%s session=%s\n\n--- stderr ---\n' "$MODEL" "${SESSION_ID:-unknown}"
+    cat "$STDERR_FILE" 2>/dev/null
+  } >"$DEBUG"
+  # Exit 2 hard-blocks compaction so the user keeps the live context and knows the
+  # snapshot failed (rather than silently compacting into a lossy summary).
+  echo "sop-compact: pre-compact sidecar failed (rc=$RC). Compaction blocked to preserve context. See $DEBUG" >&2
+  exit 2
+fi
+
+# The sidecar wraps its handoff in ===HANDOFF===/===END=== sentinels so any promotion-
+# decision narration it emits stays out of the saved file. Extract the body; if the
+# sentinels are missing/malformed, fall back to the raw output (a degraded snapshot beats
+# losing the in-flight context) and warn so a maintainer can spot the extraction failure.
+if HANDOFF_BODY="$(printf '%s\n' "$SIDECAR_OUT" | extract_handoff)"; then
+  HANDOFF_CONTENT="$HANDOFF_BODY"
+else
+  HANDOFF_CONTENT="$SIDECAR_OUT"
+  echo "sop-compact: sidecar output missing sentinels; wrote raw output as fallback (see handoff for inspection)" >&2
+fi
+
+# Write the handoff atomically so SessionStart never reads a partial file.
+TMP="$(mktemp "${SNAP_DIR}/.handoff-${TS}.XXXXXX")"
+printf '%s\n' "$HANDOFF_CONTENT" >"$TMP"
+mv -f "$TMP" "$HANDOFF"
+prune_handoffs
+
+# --- Optional repo-local extension: run after a successful snapshot. ---------------
+# Failures here must not take down the pre-hook, so guard with controlled error handling.
+EXT="${REPO_DIR}/.claude/sop-compact/pre.sh"
+if [[ -f "$EXT" ]]; then
+  ( set +e; SOP_COMPACT_HANDOFF="$HANDOFF" SOP_COMPACT_TRANSCRIPT="$TRANSCRIPT" bash "$EXT" ) || true
+fi
+
+exit 0
diff --git a/factory/agents/sop-compact/session-start.sh b/factory/agents/sop-compact/session-start.sh
new file mode 100755
index 00000000..411c49b0
--- /dev/null
+++ b/factory/agents/sop-compact/session-start.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+# session-start: post-compact orientation. Fires on every SessionStart but only acts
+# when source == "compact"; for every other source (startup, resume, clear) it exits
+# silently so normal session starts are untouched.
+#
+# It emits a pointer-only directive (not the full handoff content) via
+# hookSpecificOutput.additionalContext, telling the new session to read the latest
+# handoff file the PreCompact sidecar wrote.
+
+set -uo pipefail
+
+INPUT="$(cat 2>/dev/null || true)"
+
+jq_field() {
+  printf '%s' "$INPUT" | jq -r "${1} // empty" 2>/dev/null || printf ''
+}
+
+SOURCE="$(jq_field .source)"
+
+# Only orient on the way back in from a /compact. No-op otherwise.
+if [[ "$SOURCE" != "compact" ]]; then
+  exit 0
+fi
+
+CWD="$(jq_field .cwd)"
+REPO_DIR="${CWD:-${CLAUDE_PROJECT_DIR:-$PWD}}"
+REPO="$(basename "$REPO_DIR")"
+
+# Latest handoff (timestamps are zero-padded + UTC, so lexical == chronological;
+# ls -t by mtime is equivalent and robust to clock format).
+LATEST="$(ls -t "${REPO_DIR}/.claude/sop-compact/"handoff-*.md 2>/dev/null | head -1 || true)"
+LEGACY=""
+
+if [[ -z "$LATEST" ]]; then
+  # Back-compat: pick up v0.2.x snapshots written by the old compact-sop plugin.
+  SNAP_DIR="${COMPACT_SOP_SNAPSHOT_DIR:-$HOME/.claude/compact-sop/snapshots}"
+  LATEST="$(ls -t "${SNAP_DIR}/pre-compact-${REPO}-"*.md /tmp/pre-compact-"${REPO}"-*.md 2>/dev/null | head -1 || true)"
+  [[ -n "$LATEST" ]] && LEGACY=" (legacy compact-sop snapshot — consider running /init-sop-compact to migrate)"
+fi
+
+if [[ -n "$LATEST" ]]; then
+  POINTER="You were just compacted (SessionStart source=compact). Before doing anything else, read \`${LATEST}\`${LEGACY} immediately for orientation — it is the pre-compact handoff with the non-reconstructable in-flight context. Trust hierarchy: live state (git/gh/files) > handoff > compaction summary. Do not start new work until you have read it; then re-check live state and wait for the user."
+else
+  POINTER="You were just compacted (SessionStart source=compact), but no pre-compact handoff was found under \`${REPO_DIR}/.claude/sop-compact/\`. Treat the history above as a lossy summary: re-check live state (git status, gh, files) before acting, and consider running \`/init-sop-compact\` so future compactions produce a handoff. Do not start new work until you have re-oriented."
+fi
+
+jq -nc --arg ctx "$POINTER" \
+  '{hookSpecificOutput: {hookEventName: "SessionStart", additionalContext: $ctx}}'
+
+# --- Optional repo-local extension. ------------------------------------------------
+EXT="${REPO_DIR}/.claude/sop-compact/post.sh"
+if [[ -f "$EXT" ]]; then
+  ( set +e; SOP_COMPACT_HANDOFF="$LATEST" bash "$EXT" >/dev/null 2>&1 ) || true
+fi
+
+exit 0
diff --git a/factory/agents/sop-compact/sop-compact.md b/factory/agents/sop-compact/sop-compact.md
new file mode 100644
index 00000000..6559773a
--- /dev/null
+++ b/factory/agents/sop-compact/sop-compact.md
@@ -0,0 +1,42 @@
+# sop-compact — re:factory agent
+
+Standard operating procedure for the sop-compact PreCompact sidecar when running
+inside a re:factory workspace. The PreCompact hook reads this file to know what to
+promote and what to snapshot before context compaction.
+
+## Promotion targets
+
+Durable learnings go here (direct file edits by the sidecar):
+
+- `.refactory/CLAUDE.md` — workspace-level instructions, validated patterns, recurring
+  gotchas discovered during supervision. Append to the existing content; do not
+  overwrite the preamble.
+
+## Snapshot conventions
+
+The handoff snapshot should capture non-reconstructable in-flight state:
+
+- **Active CEO sessions**: run `factory tmux-ls` to list running factory sessions and
+  their current status. Record which projects have active loops and their last cycle.
+- **Project score trajectory**: recent score changes, whether scores are trending up or
+  down, and any plateau/regression patterns observed this session.
+- **Backlog state**: items recently added, removed, or reprioritized. Note any items
+  the user explicitly deferred or promoted.
+- **In-flight decisions**: what the user and agent were mid-discussing — open questions,
+  half-formed directions, rejected approaches and why.
+
+## Live-state checks
+
+Before writing the snapshot, check these for current ground truth:
+
+- `factory tmux-ls` — which factory sessions are running
+- `factory status .` — project status if inside a project
+- `git status` — uncommitted changes in the workspace
+
+## In-flight work locations
+
+These files contain ephemeral state that may be lost in compaction:
+
+- `.factory/strategy/current.md` — the current hypothesis or focus area
+- `.factory/reviews/` — recent agent review outputs and CEO verdicts
+- `.factory/strategy/backlog.md` — the working backlog
diff --git a/factory/cli.py b/factory/cli.py
index af588fd0..3ed37283 100644
--- a/factory/cli.py
+++ b/factory/cli.py
@@ -3142,16 +3142,16 @@ def _tmux_available() -> bool:
 
 
 def _build_tmux_run_args(args: argparse.Namespace, project_path: Path, model: str | None) -> str:
-    """Build the 'factory run ...' command string from parsed args."""
-    parts = [f"factory run {project_path}"]
+    """Build the 'factory ceo ...' command string from parsed args.
+
+    Uses 'factory ceo' (not 'factory run') so the session inside tmux
+    is interactive — the user can attach and interact with the CEO directly.
+    --loop/--interval/--max-cycles are factory-run-only flags and are
+    NOT forwarded to factory ceo.
+    """
+    parts = [f"factory ceo {project_path}"]
     if args.mode:
         parts.append(f"--mode {args.mode}")
-    if args.loop:
-        parts.append("--loop")
-    if args.interval:
-        parts.append(f"--interval {args.interval}")
-    if args.max_cycles is not None:
-        parts.append(f"--max-cycles {args.max_cycles}")
     if model:
         parts.append(f"--model {shlex.quote(model)}")
     if getattr(args, "no_github", False):
@@ -3350,6 +3350,60 @@ def cmd_tmux_stop(args: argparse.Namespace) -> int:
     return 0
 
 
+def cmd_refactory(args: argparse.Namespace) -> int:
+    """Launch the re:factory persistent supervisor agent.
+
+    Sets up the workspace, resolves the session ID, and replaces the current
+    process with an interactive claude session via os.execvp.
+    """
+    import shutil
+
+    from factory.agents.runner import resolve_prompt
+    from factory.refactory import get_session_id, setup_workspace
+
+    claude_path = shutil.which("claude")
+    if not claude_path:
+        print("Error: 'claude' CLI not found. Install Claude Code first.", file=sys.stderr)
+        return 1
+
+    project_path = Path(getattr(args, "path", None) or Path.cwd()).resolve()
+
+    setup_workspace(project_path)
+    reset = getattr(args, "reset", False)
+    session_file = project_path / ".refactory" / "session.json"
+    is_new_session = reset or not session_file.exists()
+    session_id = get_session_id(project_path, reset=reset)
+    model = getattr(args, "model", None)
+
+    prompt = resolve_prompt("refactory")
+    prompt_file = tempfile.NamedTemporaryFile(
+        mode="w", suffix=".md", prefix="refactory-prompt-", delete=False,
+    )
+    prompt_file.write(prompt)
+    prompt_file.close()
+
+    if is_new_session:
+        cmd = [
+            "claude",
+            "--session-id", session_id,
+            "--append-system-prompt-file", prompt_file.name,
+            "--dangerously-skip-permissions",
+        ]
+    else:
+        cmd = [
+            "claude",
+            "--resume", session_id,
+            "--append-system-prompt-file", prompt_file.name,
+            "--dangerously-skip-permissions",
+        ]
+
+    if model:
+        cmd.extend(["--model", model])
+
+    os.chdir(project_path)
+    os.execvp("claude", cmd)
+    return 0  # unreachable after execvp
+
 
 def _has_research_target(project_path: Path) -> bool:
     """Check if project already has research_target configured."""
@@ -4525,6 +4579,15 @@ def build_parser() -> argparse.ArgumentParser:
     p.add_argument("--all", action="store_true", default=False, dest="stop_all",
                     help="Stop ALL factory tmux sessions (required when no --session/--path given)")
 
+    # refactory — persistent supervisor agent
+    p = sub.add_parser("refactory", help="Launch the re:factory persistent supervisor agent")
+    p.add_argument("path", nargs="?", default=None,
+                    help="Project directory (default: current working directory)")
+    p.add_argument("--reset", action="store_true", default=False,
+                    help="Reset session (new session ID, fresh start)")
+    p.add_argument("--model", default=None,
+                    help="Claude model override")
+
     # workflow — graph engine commands
     from factory.workflow.cli import add_workflow_parser
     add_workflow_parser(sub)
@@ -4553,7 +4616,7 @@ def main(argv: list[str] | None = None) -> int:
 
     if not args.command:
         if sys.stdin.isatty() and sys.stderr.isatty():
-            return _welcome_wizard()
+            return cmd_refactory(args)
         parser.print_help()
         return 1
 
@@ -4618,6 +4681,7 @@ def main(argv: list[str] | None = None) -> int:
         "tmux": cmd_tmux,
         "tmux-ls": cmd_tmux_ls,
         "tmux-stop": cmd_tmux_stop,
+        "refactory": cmd_refactory,
         "workflow": lambda a: __import__("factory.workflow.cli", fromlist=["cmd_workflow"]).cmd_workflow(a),
     }
 
diff --git a/factory/refactory.py b/factory/refactory.py
new file mode 100644
index 00000000..991dc807
--- /dev/null
+++ b/factory/refactory.py
@@ -0,0 +1,141 @@
+"""Workspace setup and session management for the re:factory agent."""
+
+from __future__ import annotations
+
+import json
+import shutil
+import stat
+import uuid
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+
+SETTINGS_JSON: dict[str, Any] = {
+    "mcpServers": {
+        "factory": {
+            "command": "factory",
+            "args": ["mcp-serve"],
+        }
+    }
+}
+
+CLAUDE_MD_CONTENT = """\
+# re:factory workspace
+
+You are the re:factory supervisor. Use /slash commands and factory CLI to manage projects.
+See your system prompt for full instructions.
+"""
+
+SOP_COMPACT_DIR = Path(__file__).parent / "agents" / "sop-compact"
+
+
+def setup_workspace(project_path: Path) -> Path:
+    """Set up re:factory for a project.
+
+    Session state goes in <project>/.refactory/. Skills and settings are
+    installed into the PROJECT's .claude/ so the agent runs from the
+    project root with full access to the source tree.
+
+    Idempotent — safe to call on every launch. Overwrites settings and
+    skills to pick up updates.
+
+    Returns the workspace path (.refactory/).
+    """
+    workspace = project_path / ".refactory"
+    workspace.mkdir(parents=True, exist_ok=True)
+
+    sop_dir = workspace / ".claude" / "sop-compact"
+    sop_dir.mkdir(parents=True, exist_ok=True)
+
+    for hook_name in ("pre-compact.sh", "session-start.sh"):
+        src = SOP_COMPACT_DIR / hook_name
+        if src.is_file():
+            dst = sop_dir / hook_name
+            shutil.copy2(src, dst)
+            dst.chmod(dst.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
+
+    sop_src = SOP_COMPACT_DIR / "sop-compact.md"
+    if sop_src.is_file():
+        shutil.copy2(sop_src, workspace / ".claude" / "sop-compact.md")
+
+    project_claude_dir = project_path / ".claude"
+    project_claude_dir.mkdir(exist_ok=True)
+
+    commands_dir = project_claude_dir / "commands"
+    commands_dir.mkdir(exist_ok=True)
+
+    skills_src = Path(__file__).parent / "agents" / "skills"
+    if skills_src.is_dir():
+        for skill_file in skills_src.glob("*.md"):
+            shutil.copy2(skill_file, commands_dir / skill_file.name)
+
+    settings = dict(SETTINGS_JSON)
+    settings["hooks"] = {
+        "PreCompact": [
+            {
+                "hooks": [
+                    {
+                        "type": "command",
+                        "command": str((project_path / ".refactory" / ".claude" / "sop-compact" / "pre-compact.sh").resolve()),
+                    }
+                ]
+            }
+        ],
+        "SessionStart": [
+            {
+                "matcher": "*",
+                "hooks": [
+                    {
+                        "type": "command",
+                        "command": str((project_path / ".refactory" / ".claude" / "sop-compact" / "session-start.sh").resolve()),
+                    }
+                ],
+            }
+        ],
+    }
+
+    settings_path = project_claude_dir / "settings.local.json"
+    settings_path.write_text(json.dumps(settings, indent=2) + "\n")
+
+    claude_md_path = workspace / "CLAUDE.md"
+    claude_md_path.write_text(CLAUDE_MD_CONTENT)
+
+    return workspace
+
+
+def get_session_id(project_path: Path, reset: bool = False) -> str:
+    """Read or create a persistent session ID for a project.
+
+    The session ID is stored in <project>/.refactory/session.json.
+
+    Args:
+        project_path: Root directory of the project.
+        reset: If True, generate a new session ID even if one exists.
+
+    Returns:
+        The session ID string.
+    """
+    session_file = project_path / ".refactory" / "session.json"
+    if not reset and session_file.exists():
+        try:
+            data = json.loads(session_file.read_text())
+            sid = data.get("session_id")
+            if isinstance(sid, str) and sid:
+                return sid
+        except (json.JSONDecodeError, KeyError):
+            pass
+
+    sid = str(uuid.uuid4())
+    save_session_id(project_path, sid)
+    return sid
+
+
+def save_session_id(project_path: Path, session_id: str) -> None:
+    """Write session state to <project>/.refactory/session.json."""
+    session_file = project_path / ".refactory" / "session.json"
+    session_file.parent.mkdir(parents=True, exist_ok=True)
+    data = {
+        "session_id": session_id,
+        "created": datetime.now(timezone.utc).isoformat(),
+    }
+    session_file.write_text(json.dumps(data, indent=2) + "\n")
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 85d91d94..ed7116e2 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1561,7 +1561,7 @@ def test_cmd_home_returns_package_root(self, capsys):
 
 class TestCmdTmuxBareCLI:
     def test_tmux_command_uses_bare_factory(self):
-        """cmd_tmux generates a shell command using bare 'factory run', not uv run."""
+        """cmd_tmux generates a shell command using bare 'factory ceo', not uv run."""
         from factory.cli import cmd_tmux
         import argparse
 
@@ -1590,7 +1590,7 @@ def test_tmux_command_uses_bare_factory(self):
 
             new_session_call = mock_run.call_args_list[1]
             shell_cmd = new_session_call[0][0][-1]  # last arg is the shell command
-            assert "factory run" in shell_cmd
+            assert "factory ceo" in shell_cmd
             assert "uv run python -m factory" not in shell_cmd
             assert "cd " not in shell_cmd
             assert "source .venv/bin/activate" not in shell_cmd
diff --git a/tests/test_cli_wizard.py b/tests/test_cli_wizard.py
index ae53c76d..03f8b9f4 100644
--- a/tests/test_cli_wizard.py
+++ b/tests/test_cli_wizard.py
@@ -40,16 +40,16 @@ def test_non_tty_prints_help(self, capsys: pytest.CaptureFixture[str]) -> None:
             code = main([])
         assert code == 1
 
-    def test_tty_launches_wizard(self) -> None:
-        """TTY with no subcommand dispatches to _welcome_wizard."""
-        with patch("factory.cli._welcome_wizard", return_value=0) as mock_wizard, \
+    def test_tty_launches_refactory(self) -> None:
+        """TTY with no subcommand always dispatches to cmd_refactory."""
+        with patch("factory.cli.cmd_refactory", return_value=0) as mock_refactory, \
              patch("sys.stdin") as mock_stdin, \
              patch("sys.stderr") as mock_stderr:
             mock_stdin.isatty.return_value = True
             mock_stderr.isatty.return_value = True
             code = main([])
         assert code == 0
-        mock_wizard.assert_called_once()
+        mock_refactory.assert_called_once()
 
     def test_stdin_not_tty_stderr_tty(self, capsys: pytest.CaptureFixture[str]) -> None:
         """If stdin is not a TTY (piped), falls through to help."""
diff --git a/tests/test_refactory.py b/tests/test_refactory.py
new file mode 100644
index 00000000..74d6d24d
--- /dev/null
+++ b/tests/test_refactory.py
@@ -0,0 +1,271 @@
+"""Tests for the re:factory agent workspace setup and session management."""
+
+from __future__ import annotations
+
+import json
+import os
+import stat
+from pathlib import Path
+from typing import get_args
+from unittest.mock import patch
+
+import pytest
+
+from factory.refactory import (
+    CLAUDE_MD_CONTENT,
+    get_session_id,
+    save_session_id,
+    setup_workspace,
+)
+
+
+# ── setup_workspace ──────────────────────────────────────────────
+
+
+class TestSetupWorkspace:
+    def test_creates_directories(self, tmp_path: Path) -> None:
+        setup_workspace(tmp_path)
+        workspace = tmp_path / ".refactory"
+        assert workspace.is_dir()
+        assert (tmp_path / ".claude").is_dir()
+        assert (tmp_path / ".claude" / "commands").is_dir()
+
+    def test_writes_settings_json(self, tmp_path: Path) -> None:
+        setup_workspace(tmp_path)
+        settings = tmp_path / ".claude" / "settings.local.json"
+        assert settings.exists()
+        data = json.loads(settings.read_text())
+        assert "factory" in data["mcpServers"]
+
+    def test_writes_claude_md(self, tmp_path: Path) -> None:
+        setup_workspace(tmp_path)
+        claude_md = tmp_path / ".refactory" / "CLAUDE.md"
+        assert claude_md.exists()
+        assert claude_md.read_text() == CLAUDE_MD_CONTENT
+
+    def test_copies_skills(self, tmp_path: Path) -> None:
+        setup_workspace(tmp_path)
+        commands_dir = tmp_path / ".claude" / "commands"
+        skills_src = Path(__file__).parent.parent / "factory" / "agents" / "skills"
+        expected = list(skills_src.glob("*.md"))
+        assert len(expected) > 0, "No skill source files found"
+        for skill in expected:
+            assert (commands_dir / skill.name).exists(), f"Missing skill: {skill.name}"
+
+    def test_idempotent(self, tmp_path: Path) -> None:
+        ws1 = setup_workspace(tmp_path)
+        ws2 = setup_workspace(tmp_path)
+        assert ws1 == ws2
+        settings = tmp_path / ".claude" / "settings.local.json"
+        data = json.loads(settings.read_text())
+        assert "factory" in data["mcpServers"]
+
+    def test_copies_hooks(self, tmp_path: Path) -> None:
+        setup_workspace(tmp_path)
+        sop_dir = tmp_path / ".refactory" / ".claude" / "sop-compact"
+        assert sop_dir.is_dir()
+        for name in ("pre-compact.sh", "session-start.sh"):
+            hook = sop_dir / name
+            assert hook.exists(), f"Missing hook: {name}"
+            assert hook.stat().st_mode & stat.S_IXUSR, f"Hook not executable: {name}"
+
+    def test_copies_sop(self, tmp_path: Path) -> None:
+        setup_workspace(tmp_path)
+        sop = tmp_path / ".refactory" / ".claude" / "sop-compact.md"
+        assert sop.exists()
+        content = sop.read_text()
+        assert "re:factory" in content
+        assert "Promotion targets" in content
+
+    def test_settings_json_has_hooks(self, tmp_path: Path) -> None:
+        setup_workspace(tmp_path)
+        settings = tmp_path / ".claude" / "settings.local.json"
+        data = json.loads(settings.read_text())
+        assert "hooks" in data
+        assert "PreCompact" in data["hooks"]
+        assert "SessionStart" in data["hooks"]
+        pre_cmd = data["hooks"]["PreCompact"][0]["hooks"][0]["command"]
+        assert "pre-compact.sh" in pre_cmd
+        assert os.path.isabs(pre_cmd)
+        session_cmd = data["hooks"]["SessionStart"][0]["hooks"][0]["command"]
+        assert "session-start.sh" in session_cmd
+        assert os.path.isabs(session_cmd)
+
+
+# ── Session ID ───────────────────────────────────────────────────
+
+
+class TestSessionId:
+    def test_creates_new(self, tmp_path: Path) -> None:
+        (tmp_path / ".refactory").mkdir()
+        session_file = tmp_path / ".refactory" / "session.json"
+        assert not session_file.exists()
+        sid = get_session_id(tmp_path)
+        assert isinstance(sid, str)
+        assert len(sid) == 36
+        assert sid.count("-") == 4
+        assert session_file.exists()
+
+    def test_returns_existing(self, tmp_path: Path) -> None:
+        (tmp_path / ".refactory").mkdir()
+        sid1 = get_session_id(tmp_path)
+        sid2 = get_session_id(tmp_path)
+        assert sid1 == sid2
+
+    def test_reset(self, tmp_path: Path) -> None:
+        (tmp_path / ".refactory").mkdir()
+        sid1 = get_session_id(tmp_path)
+        sid2 = get_session_id(tmp_path, reset=True)
+        assert sid1 != sid2
+        assert len(sid2) == 36
+
+    def test_save_roundtrip(self, tmp_path: Path) -> None:
+        (tmp_path / ".refactory").mkdir()
+        custom_id = "abcdef1234567890abcdef1234567890"
+        save_session_id(tmp_path, custom_id)
+        assert get_session_id(tmp_path) == custom_id
+
+    def test_corrupt_json_generates_new(self, tmp_path: Path) -> None:
+        session_file = tmp_path / ".refactory" / "session.json"
+        session_file.parent.mkdir(parents=True, exist_ok=True)
+        session_file.write_text("{corrupt json!!")
+        sid = get_session_id(tmp_path)
+        assert isinstance(sid, str)
+        assert len(sid) == 36
+
+
+# ── Agent role registration ──────────────────────────────────────
+
+
+class TestAgentRegistration:
+    def test_refactory_role_in_agent_role(self) -> None:
+        from factory.agents.runner import AgentRole
+
+        assert "refactory" in get_args(AgentRole)
+
+    def test_refactory_in_agents_yml(self) -> None:
+        import yaml
+
+        yml_path = Path(__file__).parent.parent / "factory" / "agents" / "agents.yml"
+        data = yaml.safe_load(yml_path.read_text())
+        assert "refactory" in data
+        assert "model" in data["refactory"]
+        assert "tools" in data["refactory"]
+
+
+# ── CLI integration ──────────────────────────────────────────────
+
+
+class TestCLIIntegration:
+    def test_refactory_subcommand_exists(self) -> None:
+        from factory.cli import build_parser
+
+        parser = build_parser()
+        args = parser.parse_args(["refactory"])
+        assert args.command == "refactory"
+
+    def test_refactory_accepts_path_arg(self) -> None:
+        from factory.cli import build_parser
+
+        parser = build_parser()
+        args = parser.parse_args(["refactory", "/some/path"])
+        assert args.path == "/some/path"
+
+    def test_refactory_path_default_none(self) -> None:
+        from factory.cli import build_parser
+
+        parser = build_parser()
+        args = parser.parse_args(["refactory"])
+        assert args.path is None
+
+    def test_refactory_prompt_resolves(self) -> None:
+        from factory.agents.runner import resolve_prompt
+
+        prompt = resolve_prompt("refactory")
+        assert isinstance(prompt, str)
+        assert len(prompt) > 0
+
+
+# ── cmd_refactory ────────────────────────────────────────────────
+
+
+class TestCmdRefactory:
+    def test_no_claude_returns_error(self, tmp_path: Path) -> None:
+        from factory.cli import cmd_refactory, build_parser
+
+        parser = build_parser()
+        args = parser.parse_args(["refactory", str(tmp_path)])
+        with patch("shutil.which", return_value=None):
+            code = cmd_refactory(args)
+        assert code == 1
+
+    def test_new_session_uses_session_id(self, tmp_path: Path) -> None:
+        from factory.cli import cmd_refactory, build_parser
+
+        parser = build_parser()
+        args = parser.parse_args(["refactory", str(tmp_path)])
+        with patch("shutil.which", return_value="/usr/bin/claude"), \
+             patch("os.execvp") as mock_exec:
+            cmd_refactory(args)
+
+        cmd = mock_exec.call_args[0][1]
+        assert "--session-id" in cmd
+        assert "--resume" not in cmd
+        assert "--append-system-prompt-file" in cmd
+
+    def test_existing_session_uses_resume(self, tmp_path: Path) -> None:
+        from factory.cli import cmd_refactory, build_parser
+
+        save_session_id(tmp_path, "existing-uuid")
+        parser = build_parser()
+        args = parser.parse_args(["refactory", str(tmp_path)])
+        with patch("shutil.which", return_value="/usr/bin/claude"), \
+             patch("os.execvp") as mock_exec:
+            cmd_refactory(args)
+
+        cmd = mock_exec.call_args[0][1]
+        assert "--resume" in cmd
+        assert "--session-id" not in cmd
+        resume_idx = cmd.index("--resume")
+        assert cmd[resume_idx + 1] == "existing-uuid"
+
+    def test_reset_flag_uses_session_id(self, tmp_path: Path) -> None:
+        from factory.cli import cmd_refactory, build_parser
+
+        save_session_id(tmp_path, "old-uuid")
+        parser = build_parser()
+        args = parser.parse_args(["refactory", "--reset", str(tmp_path)])
+        with patch("shutil.which", return_value="/usr/bin/claude"), \
+             patch("os.execvp") as mock_exec:
+            cmd_refactory(args)
+
+        cmd = mock_exec.call_args[0][1]
+        assert "--session-id" in cmd
+        assert "--resume" not in cmd
+
+    def test_model_flag_forwarded(self, tmp_path: Path) -> None:
+        from factory.cli import cmd_refactory, build_parser
+
+        parser = build_parser()
+        args = parser.parse_args(["refactory", "--model", "sonnet", str(tmp_path)])
+        with patch("shutil.which", return_value="/usr/bin/claude"), \
+             patch("os.execvp") as mock_exec:
+            cmd_refactory(args)
+
+        cmd = mock_exec.call_args[0][1]
+        assert "--model" in cmd
+        model_idx = cmd.index("--model")
+        assert cmd[model_idx + 1] == "sonnet"
+
+    def test_default_path_uses_cwd(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None:
+        from factory.cli import cmd_refactory, build_parser
+
+        monkeypatch.chdir(tmp_path)
+        parser = build_parser()
+        args = parser.parse_args(["refactory"])
+        with patch("shutil.which", return_value="/usr/bin/claude"), \
+             patch("os.execvp"):
+            cmd_refactory(args)
+
+        assert (tmp_path / ".refactory").is_dir()
+        assert (tmp_path / ".refactory" / "session.json").exists()
diff --git a/tests/test_subprocess.py b/tests/test_subprocess.py
index 93b00fbf..7c54270f 100644
--- a/tests/test_subprocess.py
+++ b/tests/test_subprocess.py
@@ -8,7 +8,7 @@
 
 def test_subprocess_readline_limit():
     """Verify subprocess uses 1MB readline limit, not default 64KB."""
-    source = Path("factory/runners/_subprocess.py").read_text()
+    source = (Path(__file__).parent.parent / "factory" / "runners" / "_subprocess.py").read_text()
     tree = ast.parse(source)
     for node in ast.walk(tree):
         if isinstance(node, ast.Call) and "create_subprocess_exec" in ast.dump(node):
diff --git a/tests/test_tmux_cli.py b/tests/test_tmux_cli.py
index 0c4153a2..34b80813 100644
--- a/tests/test_tmux_cli.py
+++ b/tests/test_tmux_cli.py
@@ -133,9 +133,9 @@ def test_propagates_all_flags(self) -> None:
         result = _build_tmux_run_args(args, Path("/tmp/project"), "opus-4")
 
         assert "--mode improve" in result
-        assert "--loop" in result
-        assert "--interval 900" in result
-        assert "--max-cycles 5" in result
+        assert "--loop" not in result
+        assert "--interval" not in result
+        assert "--max-cycles" not in result
         assert "--model" in result
         assert "--no-github" in result
         assert "--profile" in result
@@ -172,7 +172,7 @@ def test_minimal_args(self) -> None:
             bg_agents=False, tmux_persist=False, use_profile=False,
         )
         result = _build_tmux_run_args(args, Path("/tmp/p"), None)
-        assert result == "factory run /tmp/p"
+        assert result == "factory ceo /tmp/p"
 
 
 class TestCmdTmuxStop: