From e067ab9d122607bb8d4b140d535fa6af3ba2c478 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Wed, 24 Jun 2026 16:40:09 +0000 Subject: [PATCH 01/21] feat: add re:factory agent with skill files and workspace setup (#728) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces the re:factory persistent supervisor agent — a layer above the CEO that manages CEO lifecycles, context/compaction, and playbooks. Phase 1: Agent prompt (refactory.md), workspace module (refactory.py), AgentRole registration, agents.yml config, CLI subcommand + bare `factory` entry point. Phase 2-3: Four slash-command skill files (factory-run, sessions, compaction, playbook) copied to workspace on setup, giving the agent dispatch, monitoring, context preservation, and ACE evolution abilities. Co-Authored-By: Claude Opus 4.6 --- factory/agents/agents.yml | 9 ++ factory/agents/prompts/refactory.md | 149 +++++++++++++++++++++++++++ factory/agents/runner.py | 1 + factory/agents/skills/compaction.md | 60 +++++++++++ factory/agents/skills/factory-run.md | 66 ++++++++++++ factory/agents/skills/playbook.md | 47 +++++++++ factory/agents/skills/sessions.md | 49 +++++++++ factory/cli.py | 55 +++++++++- factory/refactory.py | 96 +++++++++++++++++ 9 files changed, 531 insertions(+), 1 deletion(-) create mode 100644 factory/agents/prompts/refactory.md create mode 100644 factory/agents/skills/compaction.md create mode 100644 factory/agents/skills/factory-run.md create mode 100644 factory/agents/skills/playbook.md create mode 100644 factory/agents/skills/sessions.md create mode 100644 factory/refactory.py diff --git a/factory/agents/agents.yml b/factory/agents/agents.yml index 6c9a6ce7..e01fda70 100644 --- a/factory/agents/agents.yml +++ b/factory/agents/agents.yml @@ -74,3 +74,12 @@ profiler: Synthesize a user's working style, preferences, and decision patterns from factory session evidence into a coherent prose profile. Use when generating or updating a user profile from experiment data. + +refactory: + model: opus + tools: [Bash, Read, Write, Edit, Grep, Glob, WebSearch, WebFetch] + description: >- + Persistent factory supervisor that manages CEO agent lifecycles, + context/compaction for child sessions, and playbook evolution via ACE. + Launched via bare 'factory' command or 'factory refactory'. Not spawned + by the CEO — it's the layer above. diff --git a/factory/agents/prompts/refactory.md b/factory/agents/prompts/refactory.md new file mode 100644 index 00000000..404060d3 --- /dev/null +++ b/factory/agents/prompts/refactory.md @@ -0,0 +1,149 @@ +# re:factory Agent — Persistent Factory Supervisor + +You are the re:factory agent — a persistent supervisor that outlives individual CEO sessions. You are not a specialist spawned by the CEO. You are the layer above: you manage CEO lifecycles, preserve context across sessions, and curate the playbooks that guide all factory agents. + +## Identity + +You are the factory's long-term memory and control plane. While the CEO operates within a single experiment cycle — hypothesize, build, evaluate, verdict — you operate across cycles, across projects, and across time. You think in projects and trajectories, not lines of code. + +You are interactive. The user talks to you directly. You are their interface to the factory system — you translate intent into dispatched work, monitor progress, and report results. + +You persist across restarts via `--session-id`. Your session state survives process exits. When you resume, you pick up where you left off — check on running sessions, review completed work, and continue managing the factory. + +## Capabilities + +Three core capabilities, delivered via slash commands: + +1. **CEO Dispatch** — Launch, monitor, and stop factory runs across projects. Use `/factory-run` for dispatch patterns. +2. **Compaction Management** — Preserve context for long-running CEO sessions. Use `/compaction` for context injection patterns. +3. **Playbook Evolution** — Curate agent playbooks via ACE. Use `/playbook` for evolution triggers and review. + +Use your slash commands to recall the detailed procedures for each capability. + +## Factory CLI Reference + +You have access to the full factory CLI. Key commands: + +### Dispatch & Monitoring +- `factory ceo ` — Single CEO improvement cycle (foreground, blocks until done) +- `factory run --loop --interval 1800` — Continuous heartbeat loop +- `factory tmux ` — Dispatch CEO in a detached tmux session +- `factory tmux --loop` — Continuous loop in tmux (preferred for multi-project) +- `factory tmux-ls` — List active factory tmux sessions +- `factory tmux-stop --session ` — Stop a tmux session +- `factory tmux-stop --path ` — Stop session by project path + +### Project Intelligence +- `factory eval ` — Run eval, get current composite score +- `factory history ` — Show experiment history (TSV) +- `factory study ` — Analyze codebase, write observations +- `factory status ` — Show project state and recent activity +- `factory backlog-list ` — List pending backlog items +- `factory backlog-add "item"` — Add backlog item + +### Recovery & State +- `factory checkpoint ` — Save CEO state for crash recovery +- `factory resume ` — Resume from last checkpoint + +### Self-Evolution +- `factory ace` — Evolve all agent playbooks from experiment data +- `factory ace-stats` — Show playbook evolution statistics + +## Session Persistence + +You run with `--session-id` for persistent memory across restarts. Your session ID is stored in `~/.factory/refactory-session.json`. + +When you start: +1. Check `factory tmux-ls` for any running CEO sessions +2. Check recent project activity if you have active projects +3. Resume any monitoring or follow-up tasks from your prior session + +When you're interrupted or restarted, you lose nothing — your conversation history persists via the session ID. Use `--resume` to continue seamlessly. + +## Working Directory + +Your workspace is `~/.factory/refactory/`. It contains: +- `.claude/commands/` — Your slash command skills (installed by `factory refactory`) +- `.claude/settings.json` — MCP server configuration +- `CLAUDE.md` — Workspace-level instructions + +Do not store project data here. Project state lives in each project's `.factory/` directory. + +## Behavioral Rules + +### 1. Never Implement Code Directly + +You do not write code, fix bugs, run tests, or edit source files. You are a supervisor. When something needs to be built or fixed, you dispatch a CEO run: + +```bash +factory ceo /path/to/project --focus "the thing to build" +factory tmux /path/to/project --loop +``` + +The CEO handles the full experiment lifecycle — it has its own specialist agents (Builder, QA, Researcher, Strategist, Archivist) for all technical work. + +### 2. Think in Projects and Cycles + +Your mental model is: +- **Projects** — directories with codebases that the factory improves +- **Cycles** — CEO experiment runs that hypothesize, build, evaluate, and verdict +- **Trajectories** — the arc of a project's improvement over many cycles + +You track which projects exist, what their current scores are, what's in their backlogs, and whether CEO runs are active. You don't track individual code changes. + +### 3. Dispatch Based on Intent + +When the user says "work on X": +1. Determine the project path (ask if ambiguous) +2. Check if a CEO session is already running for that project (`factory tmux-ls`) +3. Choose the right dispatch mode: + - `factory tmux --loop` for ongoing improvement + - `factory ceo --focus "item"` for targeted single-item work + - `factory ceo --mode design` for brainstorming what to work on + - `factory ceo --mode research` for research-driven improvement + +### 4. Monitor Proactively + +While CEO sessions are running: +- Periodically check `factory tmux-ls` for session status +- After completion, read `.factory/reviews/` for agent outputs +- Run `factory eval ` to check scores +- Report findings back to the user + +### 5. Review Completed Work + +After a CEO cycle completes: +1. Read the project's `.factory/reviews/ceo-latest.md` +2. Run `factory eval ` for the current score +3. Run `factory history ` to see the experiment record +4. Summarize: what was attempted, what was the verdict, what's the score delta + +### 6. Preserve Context Across Sessions + +You are the persistent layer. When CEO sessions compact or restart, context is lost. You retain the big picture: +- Which hypotheses have been tried +- What the score trajectory looks like +- What's still in the backlog +- What patterns of success or failure have emerged + +Use `factory checkpoint ` before long runs and `factory resume ` after crashes. + +### 7. Curate Playbooks + +Periodically trigger playbook evolution via `factory ace` to distill experiment outcomes into agent behavior rules. Review with `factory ace-stats`. This is how the factory's agents improve over time. + +## Hierarchy + +``` +re:factory (you) — persistent supervisor + └── CEO — per-cycle orchestrator (spawned by you) + ├── Researcher + ├── Strategist + ├── Builder + ├── QA + ├── Archivist + ├── Refiner + └── Failure Analyst +``` + +You spawn CEOs. CEOs spawn specialists. Never the reverse. diff --git a/factory/agents/runner.py b/factory/agents/runner.py index 0e5fc005..fb62de75 100644 --- a/factory/agents/runner.py +++ b/factory/agents/runner.py @@ -16,6 +16,7 @@ AgentRole = Literal[ "researcher", "strategist", "builder", "qa", "archivist", "ceo", "failure_analyst", "refiner", "profiler", + "refactory", ] # Consecutive failure tracking diff --git a/factory/agents/skills/compaction.md b/factory/agents/skills/compaction.md new file mode 100644 index 00000000..ade0e453 --- /dev/null +++ b/factory/agents/skills/compaction.md @@ -0,0 +1,60 @@ +# /compaction — Context Preservation for CEO Sessions + +Use this skill to manage compaction and context loss in long-running CEO sessions. + +## Why Compaction Matters + +CEO sessions running long `--loop` cycles will hit Claude Code's context compaction. When this happens, the CEO loses track of its strategy, repeats work, or makes contradictory decisions. You are the persistent memory layer — you know what the CEO was doing and can help recover context. + +## Checkpoint Before Long Runs + +Before dispatching a long `--loop` run, save a recovery point: +```bash +factory checkpoint +``` +This captures the current strategy state so you can resume if the session crashes. + +## Resume from Crashes + +If a CEO session dies unexpectedly: +```bash +factory resume +``` +This restarts from the last checkpoint, preserving strategy and experiment state. + +## Context Injection Pattern + +When a CEO session has compacted or needs context refreshed, gather and compose state: + +1. **Generate fresh observations:** + ```bash + factory study + ``` + +2. **Read current strategy:** + Read `.factory/strategy/current.md` — contains hypotheses, priorities, and the design space assessment. + +3. **Read pending work:** + Read `.factory/strategy/backlog.md` — items the CEO should be working on. + +4. **Read latest agent outputs:** + Read `.factory/reviews/` — `ceo-latest.md` and other agent review files show what was last attempted. + +5. **Compose a summary** of the above and inject it via the CEO's next `--focus` or `--prompt` flag to restore awareness. + +## Proactive Monitoring + +While CEO runs are active, periodically check on them: + +```bash +factory tmux-ls # are sessions still running? +factory status # project state and recent activity +factory history # latest experiment outcomes +``` + +Signs of compaction trouble: +- A CEO cycle takes much longer than usual +- The user reports the CEO seems confused or is repeating work +- History shows consecutive REVERTs with similar hypotheses + +When you detect these signals, checkpoint the project, stop the session, and dispatch a fresh CEO with context injected via `--focus` or `--prompt`. diff --git a/factory/agents/skills/factory-run.md b/factory/agents/skills/factory-run.md new file mode 100644 index 00000000..90a96b01 --- /dev/null +++ b/factory/agents/skills/factory-run.md @@ -0,0 +1,66 @@ +# /factory-run — CEO Dispatch + +Use this skill to launch, monitor, and manage factory CEO runs. + +## Dispatch Modes + +**Long-running improvement (preferred for multi-project):** +```bash +factory tmux --loop +factory tmux --loop --interval 1800 # custom interval (seconds) +``` +Runs in a detached tmux session. Use this when managing multiple projects — sessions persist and you can check back later. + +**Single blocking cycle:** +```bash +factory ceo +``` +Runs in foreground, blocks until the cycle completes. Use when you want to immediately process results after completion. + +**Targeted single-item build:** +```bash +factory ceo --focus "" +factory ceo --focus 42 # GitHub issue number +factory ceo --focus "owner/repo#42" +``` + +**Mode selection:** +```bash +factory ceo --mode improve # default — score-driven improvement +factory ceo --mode design # brainstorm what to work on first +factory ceo --mode research # research-driven improvement +factory ceo --mode meta # improve the factory itself + ACE evolution +``` + +## Monitor Running Sessions + +```bash +factory tmux-ls +``` +Lists all active factory tmux sessions with project paths and status. + +## Stop a Session + +```bash +factory tmux-stop --session +factory tmux-stop --path +``` + +## Check Results After Completion + +1. Read `.factory/reviews/ceo-latest.md` in the project directory for the CEO's final output +2. Run `factory eval ` for the current composite score +3. Run `factory history ` for the full experiment log +4. Read `.factory/reviews/` for individual agent outputs (builder-latest.md, qa-latest.md, etc.) + +## When to Use Which + +| Scenario | Command | +|---|---| +| Managing 2+ projects simultaneously | `factory tmux --loop` for each | +| User asks "work on this project" | `factory tmux --loop` | +| User asks to build one specific thing | `factory ceo --focus ""` | +| User wants to discuss what to work on | `factory ceo --mode design` | +| Quick one-off improvement | `factory ceo ` | + +Always check `factory tmux-ls` before dispatching to avoid launching duplicate sessions for the same project. diff --git a/factory/agents/skills/playbook.md b/factory/agents/skills/playbook.md new file mode 100644 index 00000000..46da1e12 --- /dev/null +++ b/factory/agents/skills/playbook.md @@ -0,0 +1,47 @@ +# /playbook — ACE Playbook Evolution + +Use this skill to manage and evolve agent playbooks via the ACE (Automated Capability Evolution) system. + +## Trigger Playbook Evolution + +```bash +factory ace +``` +Evolves all agent playbooks from accumulated experiment data. ACE analyzes experiment outcomes (KEEP vs REVERT), extracts behavioral patterns, and distills them into DO/DON'T rules in each role's playbook. + +## Check Evolution Stats + +```bash +factory ace-stats +``` +Shows which rules were added, removed, or updated in the latest evolution run. Use this to verify that evolution produced sensible changes. + +## Read Current Playbooks + +Playbooks live at `~/.factory/playbooks/.md` — one per agent role: +- `researcher.md`, `strategist.md`, `builder.md`, `qa.md` +- `archivist.md`, `refiner.md`, `failure_analyst.md`, `ceo.md` + +Each playbook contains empirically-derived DO/DON'T rules with helpful/harmful counts. Higher helpful counts indicate stronger confidence in a rule. + +## When to Evolve + +Trigger `factory ace` when: +- **3+ experiments** have completed across any project since the last evolution +- **Agent mistakes repeat** — you observe the same failure pattern across experiments (e.g., builder keeps making the same type of error) +- **User requests it** — "improve how the builder works", "agents keep doing X wrong" +- **After a meta mode run** — meta mode already runs ACE, but you may want a follow-up evolution after reviewing the results + +## Targeted Review for Underperforming Roles + +If a specific agent role is underperforming: + +1. **Read its playbook:** `~/.factory/playbooks/.md` +2. **Check experiment archives:** Read `.factory/archive/experiments/` in relevant projects for patterns of failure +3. **Read agent outputs:** Check `.factory/reviews/-latest.md` across projects to spot recurring issues +4. **Trigger evolution:** Run `factory ace` — ACE will incorporate the latest experiment data +5. **Verify changes:** Run `factory ace-stats` and read the updated playbook to confirm the new rules address the observed issues + +## Manual Playbook Editing + +Playbooks are plain markdown. If ACE misses a pattern or you need an immediate fix, you can edit `~/.factory/playbooks/.md` directly. ACE will preserve manual edits on subsequent evolutions as long as the format is maintained. diff --git a/factory/agents/skills/sessions.md b/factory/agents/skills/sessions.md new file mode 100644 index 00000000..621bbfe6 --- /dev/null +++ b/factory/agents/skills/sessions.md @@ -0,0 +1,49 @@ +# /sessions — Active Session Tracking + +Use this skill to track, health-check, and review factory CEO sessions. + +## List Active Sessions + +```bash +factory tmux-ls +``` +Shows all active factory tmux sessions. Each entry includes the session name and project path. Run this frequently while CEO sessions are active. + +## Health Check a Session + +Verify a tmux session is alive and the CEO process is running: +```bash +tmux has-session -t 2>/dev/null && echo "alive" || echo "dead" +tmux list-panes -t -F '#{pane_pid}' 2>/dev/null +``` +If the session exists but the CEO process has exited, the session is stale — stop it and dispatch a fresh one if needed. + +## User Attach Guidance + +If the user wants to watch or interact with a running CEO session: +``` +tmux attach -t +``` +- `Ctrl-b d` to detach without stopping the session +- `Ctrl-c` inside the session will interrupt the CEO — warn the user + +## Post-Completion Review + +When a CEO session finishes: + +1. **Read agent outputs:** Check `.factory/reviews/` in the project directory — `ceo-latest.md`, `builder-latest.md`, `qa-latest.md` contain the latest agent outputs +2. **Check scores:** `factory eval ` for the current composite score +3. **Check history:** `factory history ` for the experiment log — look at the latest entry for the verdict (KEEP/REVERT) and score delta +4. **Check strategy:** Read `.factory/strategy/current.md` for what the CEO planned and `.factory/strategy/observations.md` for what was observed + +Summarize findings to the user: what was attempted, what was the verdict, what's the score delta. + +## Concurrent Multi-Project Management + +You can have multiple CEO sessions running simultaneously across different projects. Best practices: + +- Track which projects have active sessions to avoid duplicate launches +- Use `factory tmux-ls` as your dashboard — run it periodically +- When a session completes, review results before deciding whether to launch another cycle +- Stagger launches to avoid resource contention on the host machine +- If multiple sessions are running, check each project's results systematically — don't let completed sessions go unreviewed diff --git a/factory/cli.py b/factory/cli.py index af588fd0..37c18b2b 100644 --- a/factory/cli.py +++ b/factory/cli.py @@ -3350,6 +3350,51 @@ def cmd_tmux_stop(args: argparse.Namespace) -> int: return 0 +def cmd_refactory(args: argparse.Namespace) -> int: + """Launch the re:factory persistent supervisor agent. + + Sets up the workspace, resolves the session ID, and replaces the current + process with an interactive claude session via os.execvp. + """ + import shutil + + from factory.agents.runner import resolve_prompt + from factory.refactory import get_session_id, setup_workspace + + claude_path = shutil.which("claude") + if not claude_path: + print("Error: 'claude' CLI not found. Install Claude Code first.", file=sys.stderr) + return 1 + + workspace = setup_workspace() + reset = getattr(args, "reset", False) + is_new_session = reset or not (Path.home() / ".factory" / "refactory-session.json").exists() + session_id = get_session_id(reset=reset) + model = getattr(args, "model", None) + + prompt = resolve_prompt("refactory") + prompt_file = tempfile.NamedTemporaryFile( + mode="w", suffix=".md", prefix="refactory-prompt-", delete=False, + ) + prompt_file.write(prompt) + prompt_file.close() + + cmd = [ + "claude", + "--session-id", session_id, + "--append-system-prompt-file", prompt_file.name, + "--cwd", str(workspace), + ] + + if not is_new_session: + cmd.insert(3, "--resume") + + if model: + cmd.extend(["--model", model]) + + os.execvp("claude", cmd) + return 0 # unreachable after execvp + def _has_research_target(project_path: Path) -> bool: """Check if project already has research_target configured.""" @@ -4525,6 +4570,13 @@ def build_parser() -> argparse.ArgumentParser: p.add_argument("--all", action="store_true", default=False, dest="stop_all", help="Stop ALL factory tmux sessions (required when no --session/--path given)") + # refactory — persistent supervisor agent + p = sub.add_parser("refactory", help="Launch the re:factory persistent supervisor agent") + p.add_argument("--reset", action="store_true", default=False, + help="Reset session (new session ID, fresh start)") + p.add_argument("--model", default=None, + help="Claude model override") + # workflow — graph engine commands from factory.workflow.cli import add_workflow_parser add_workflow_parser(sub) @@ -4553,7 +4605,7 @@ def main(argv: list[str] | None = None) -> int: if not args.command: if sys.stdin.isatty() and sys.stderr.isatty(): - return _welcome_wizard() + return cmd_refactory(args) parser.print_help() return 1 @@ -4618,6 +4670,7 @@ def main(argv: list[str] | None = None) -> int: "tmux": cmd_tmux, "tmux-ls": cmd_tmux_ls, "tmux-stop": cmd_tmux_stop, + "refactory": cmd_refactory, "workflow": lambda a: __import__("factory.workflow.cli", fromlist=["cmd_workflow"]).cmd_workflow(a), } diff --git a/factory/refactory.py b/factory/refactory.py new file mode 100644 index 00000000..576823df --- /dev/null +++ b/factory/refactory.py @@ -0,0 +1,96 @@ +"""Workspace setup and session management for the re:factory agent.""" + +from __future__ import annotations + +import json +import shutil +import uuid +from datetime import datetime, timezone +from pathlib import Path + +WORKSPACE_DIR = Path.home() / ".factory" / "refactory" +SESSION_FILE = Path.home() / ".factory" / "refactory-session.json" + +SETTINGS_JSON = { + "mcpServers": { + "factory": { + "command": "factory", + "args": ["mcp-serve"], + } + } +} + +CLAUDE_MD_CONTENT = """\ +# re:factory workspace + +You are the re:factory supervisor. Use /slash commands and factory CLI to manage projects. +See your system prompt for full instructions. +""" + + +def setup_workspace() -> Path: + """Create the re:factory workspace at ~/.factory/refactory/. + + Idempotent — safe to call on every launch. Creates directory structure + and writes config files, overwriting settings.json and CLAUDE.md to + pick up any updates. + + Returns the workspace path. + """ + workspace = WORKSPACE_DIR + workspace.mkdir(parents=True, exist_ok=True) + + claude_dir = workspace / ".claude" + claude_dir.mkdir(exist_ok=True) + + commands_dir = claude_dir / "commands" + commands_dir.mkdir(exist_ok=True) + + settings_path = claude_dir / "settings.json" + settings_path.write_text(json.dumps(SETTINGS_JSON, indent=2) + "\n") + + claude_md_path = workspace / "CLAUDE.md" + claude_md_path.write_text(CLAUDE_MD_CONTENT) + + skills_src = Path(__file__).parent / "agents" / "skills" + if skills_src.is_dir(): + for skill_file in skills_src.glob("*.md"): + shutil.copy2(skill_file, commands_dir / skill_file.name) + + return workspace + + +def get_session_id(reset: bool = False) -> str: + """Read or create a persistent session ID. + + The session ID is stored in ~/.factory/refactory-session.json (outside + the workspace, so it survives workspace regeneration). + + Args: + reset: If True, generate a new session ID even if one exists. + + Returns: + The session ID string. + """ + if not reset and SESSION_FILE.exists(): + try: + data = json.loads(SESSION_FILE.read_text()) + sid = data.get("session_id") + if isinstance(sid, str) and sid: + return sid + except (json.JSONDecodeError, KeyError): + pass + + sid = uuid.uuid4().hex + save_session_id(sid) + return sid + + +def save_session_id(session_id: str) -> None: + """Write session state to ~/.factory/refactory-session.json.""" + SESSION_FILE.parent.mkdir(parents=True, exist_ok=True) + data = { + "session_id": session_id, + "created": datetime.now(timezone.utc).isoformat(), + } + SESSION_FILE.write_text(json.dumps(data, indent=2) + "\n") From fa27d268d96953a9e95d6d2acb7ddcf573a03c12 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Wed, 24 Jun 2026 16:42:21 +0000 Subject: [PATCH 02/21] test: add tests for re:factory agent workspace and session management 13 test cases covering setup_workspace (directories, settings.json, CLAUDE.md, skill copying, idempotency), session ID lifecycle (create, existing, reset, roundtrip), agent role registration (AgentRole type, agents.yml), and CLI integration (subcommand parsing, prompt resolution). Co-Authored-By: Claude Opus 4.6 --- tests/test_refactory.py | 139 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) create mode 100644 tests/test_refactory.py diff --git a/tests/test_refactory.py b/tests/test_refactory.py new file mode 100644 index 00000000..ee1e581f --- /dev/null +++ b/tests/test_refactory.py @@ -0,0 +1,139 @@ +"""Tests for the re:factory agent workspace setup and session management.""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import get_args + +import pytest + +from factory.refactory import ( + CLAUDE_MD_CONTENT, + SETTINGS_JSON, + get_session_id, + save_session_id, + setup_workspace, +) + + +@pytest.fixture +def mock_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setattr( + "factory.refactory.WORKSPACE_DIR", tmp_path / ".factory" / "refactory" + ) + monkeypatch.setattr( + "factory.refactory.SESSION_FILE", + tmp_path / ".factory" / "refactory-session.json", + ) + return tmp_path + + +# ── setup_workspace ────────────────────────────────────────────── + + +class TestSetupWorkspace: + def test_creates_directories(self, mock_home: Path) -> None: + setup_workspace() + workspace = mock_home / ".factory" / "refactory" + assert workspace.is_dir() + assert (workspace / ".claude").is_dir() + assert (workspace / ".claude" / "commands").is_dir() + + def test_writes_settings_json(self, mock_home: Path) -> None: + setup_workspace() + settings = mock_home / ".factory" / "refactory" / ".claude" / "settings.json" + assert settings.exists() + data = json.loads(settings.read_text()) + assert data == SETTINGS_JSON + assert "factory" in data["mcpServers"] + + def test_writes_claude_md(self, mock_home: Path) -> None: + setup_workspace() + claude_md = mock_home / ".factory" / "refactory" / "CLAUDE.md" + assert claude_md.exists() + assert claude_md.read_text() == CLAUDE_MD_CONTENT + + def test_copies_skills(self, mock_home: Path) -> None: + setup_workspace() + commands_dir = mock_home / ".factory" / "refactory" / ".claude" / "commands" + skills_src = Path(__file__).parent.parent / "factory" / "agents" / "skills" + expected = list(skills_src.glob("*.md")) + assert len(expected) > 0, "No skill source files found" + for skill in expected: + assert (commands_dir / skill.name).exists(), f"Missing skill: {skill.name}" + + def test_idempotent(self, mock_home: Path) -> None: + ws1 = setup_workspace() + ws2 = setup_workspace() + assert ws1 == ws2 + settings = mock_home / ".factory" / "refactory" / ".claude" / "settings.json" + assert json.loads(settings.read_text()) == SETTINGS_JSON + + +# ── Session ID ─────────────────────────────────────────────────── + + +class TestSessionId: + def test_creates_new(self, mock_home: Path) -> None: + session_file = mock_home / ".factory" / "refactory-session.json" + assert not session_file.exists() + sid = get_session_id() + assert isinstance(sid, str) + assert len(sid) == 32 + assert session_file.exists() + + def test_returns_existing(self, mock_home: Path) -> None: + sid1 = get_session_id() + sid2 = get_session_id() + assert sid1 == sid2 + + def test_reset(self, mock_home: Path) -> None: + sid1 = get_session_id() + sid2 = get_session_id(reset=True) + assert sid1 != sid2 + assert len(sid2) == 32 + + def test_save_roundtrip(self, mock_home: Path) -> None: + custom_id = "abcdef1234567890abcdef1234567890" + save_session_id(custom_id) + assert get_session_id() == custom_id + + +# ── Agent role registration ────────────────────────────────────── + + +class TestAgentRegistration: + def test_refactory_role_in_agent_role(self) -> None: + from factory.agents.runner import AgentRole + + assert "refactory" in get_args(AgentRole) + + def test_refactory_in_agents_yml(self) -> None: + import yaml + + yml_path = Path(__file__).parent.parent / "factory" / "agents" / "agents.yml" + data = yaml.safe_load(yml_path.read_text()) + assert "refactory" in data + assert "model" in data["refactory"] + assert "tools" in data["refactory"] + + +# ── CLI integration ────────────────────────────────────────────── + + +class TestCLIIntegration: + def test_refactory_subcommand_exists(self) -> None: + from factory.cli import build_parser + + parser = build_parser() + args = parser.parse_args(["refactory"]) + assert args.command == "refactory" + + def test_refactory_prompt_resolves(self) -> None: + from factory.agents.runner import resolve_prompt + + prompt = resolve_prompt("refactory") + assert isinstance(prompt, str) + assert len(prompt) > 0 From f6cb2d9bd2ce57e0f4257e6c3fbab7a2aa88e505 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Wed, 24 Jun 2026 17:42:21 +0000 Subject: [PATCH 03/21] fix: add refactory to sandbox roles and update wizard test - Add "refactory" to _WORKSPACE_WRITE_ROLES in plugin.py so Codex sandbox mode handles the new role - Update test_tty_launches_wizard to mock cmd_refactory instead of _welcome_wizard, matching the new bare `factory` behavior Co-Authored-By: Claude Opus 4.6 (1M context) --- factory/agents/plugin.py | 2 +- tests/test_cli_wizard.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/factory/agents/plugin.py b/factory/agents/plugin.py index 7e39d408..e5e58893 100644 --- a/factory/agents/plugin.py +++ b/factory/agents/plugin.py @@ -90,7 +90,7 @@ def generate_agent_content(role: str) -> str: _READ_ONLY_ROLES = frozenset({"researcher", "qa", "failure_analyst", "refiner", "profiler"}) -_WORKSPACE_WRITE_ROLES = frozenset({"builder", "archivist", "ceo", "strategist"}) +_WORKSPACE_WRITE_ROLES = frozenset({"builder", "archivist", "ceo", "strategist", "refactory"}) def _sandbox_mode(role: str) -> str: diff --git a/tests/test_cli_wizard.py b/tests/test_cli_wizard.py index ae53c76d..937c65c2 100644 --- a/tests/test_cli_wizard.py +++ b/tests/test_cli_wizard.py @@ -41,15 +41,15 @@ def test_non_tty_prints_help(self, capsys: pytest.CaptureFixture[str]) -> None: assert code == 1 def test_tty_launches_wizard(self) -> None: - """TTY with no subcommand dispatches to _welcome_wizard.""" - with patch("factory.cli._welcome_wizard", return_value=0) as mock_wizard, \ + """TTY with no subcommand dispatches to cmd_refactory.""" + with patch("factory.cli.cmd_refactory", return_value=0) as mock_refactory, \ patch("sys.stdin") as mock_stdin, \ patch("sys.stderr") as mock_stderr: mock_stdin.isatty.return_value = True mock_stderr.isatty.return_value = True code = main([]) assert code == 0 - mock_wizard.assert_called_once() + mock_refactory.assert_called_once() def test_stdin_not_tty_stderr_tty(self, capsys: pytest.CaptureFixture[str]) -> None: """If stdin is not a TTY (piped), falls through to help.""" From 36a7df1684b4a49b436278e755865bead14f61d5 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 25 Jun 2026 14:32:02 +0000 Subject: [PATCH 04/21] fix: use dashed UUID format for session IDs Claude Code's --session-id flag requires standard UUID format (with dashes), not hex-only. Found during E2E testing. Co-Authored-By: Claude Opus 4.6 (1M context) --- factory/refactory.py | 2 +- tests/test_refactory.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/factory/refactory.py b/factory/refactory.py index 576823df..a97fcca2 100644 --- a/factory/refactory.py +++ b/factory/refactory.py @@ -81,7 +81,7 @@ def get_session_id(reset: bool = False) -> str: except (json.JSONDecodeError, KeyError): pass - sid = uuid.uuid4().hex + sid = str(uuid.uuid4()) save_session_id(sid) return sid diff --git a/tests/test_refactory.py b/tests/test_refactory.py index ee1e581f..80f9dddc 100644 --- a/tests/test_refactory.py +++ b/tests/test_refactory.py @@ -81,7 +81,8 @@ def test_creates_new(self, mock_home: Path) -> None: assert not session_file.exists() sid = get_session_id() assert isinstance(sid, str) - assert len(sid) == 32 + assert len(sid) == 36 # UUID with dashes + assert sid.count("-") == 4 assert session_file.exists() def test_returns_existing(self, mock_home: Path) -> None: @@ -93,7 +94,7 @@ def test_reset(self, mock_home: Path) -> None: sid1 = get_session_id() sid2 = get_session_id(reset=True) assert sid1 != sid2 - assert len(sid2) == 32 + assert len(sid2) == 36 def test_save_roundtrip(self, mock_home: Path) -> None: custom_id = "abcdef1234567890abcdef1234567890" From f1db2c6917e265db5c5f55692d228e4100688514 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 25 Jun 2026 14:42:12 +0000 Subject: [PATCH 05/21] test: add coverage for cmd_refactory and corrupt JSON edge case - Add TestCmdRefactory: no_claude error, new/existing session resume flag, reset flag, model forwarding (all mock os.execvp) - Add test_corrupt_json_generates_new for the except branch - factory/refactory.py now at 100% coverage Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_refactory.py | 87 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/tests/test_refactory.py b/tests/test_refactory.py index 80f9dddc..2914f6fb 100644 --- a/tests/test_refactory.py +++ b/tests/test_refactory.py @@ -101,6 +101,14 @@ def test_save_roundtrip(self, mock_home: Path) -> None: save_session_id(custom_id) assert get_session_id() == custom_id + def test_corrupt_json_generates_new(self, mock_home: Path) -> None: + session_file = mock_home / ".factory" / "refactory-session.json" + session_file.parent.mkdir(parents=True, exist_ok=True) + session_file.write_text("{corrupt json!!") + sid = get_session_id() + assert isinstance(sid, str) + assert len(sid) == 36 + # ── Agent role registration ────────────────────────────────────── @@ -138,3 +146,82 @@ def test_refactory_prompt_resolves(self) -> None: prompt = resolve_prompt("refactory") assert isinstance(prompt, str) assert len(prompt) > 0 + + +# ── cmd_refactory ──────────────────────────────────────────────── + + +class TestCmdRefactory: + def test_no_claude_returns_error(self, mock_home: Path) -> None: + from unittest.mock import patch + + from factory.cli import cmd_refactory, build_parser + + parser = build_parser() + args = parser.parse_args(["refactory"]) + with patch("shutil.which", return_value=None): + code = cmd_refactory(args) + assert code == 1 + + def test_new_session_no_resume_flag(self, mock_home: Path) -> None: + from unittest.mock import patch + + from factory.cli import cmd_refactory, build_parser + + parser = build_parser() + args = parser.parse_args(["refactory"]) + with patch("shutil.which", return_value="/usr/bin/claude"), \ + patch("os.execvp") as mock_exec: + cmd_refactory(args) + + cmd = mock_exec.call_args[0][1] + assert "--session-id" in cmd + assert "--resume" not in cmd + assert "--append-system-prompt-file" in cmd + assert "--cwd" in cmd + + def test_existing_session_has_resume_flag(self, mock_home: Path) -> None: + from unittest.mock import patch + + from factory.cli import cmd_refactory, build_parser + + save_session_id("existing-uuid") + parser = build_parser() + args = parser.parse_args(["refactory"]) + with patch("shutil.which", return_value="/usr/bin/claude"), \ + patch("os.execvp") as mock_exec: + cmd_refactory(args) + + cmd = mock_exec.call_args[0][1] + assert "--resume" in cmd + + def test_reset_flag_no_resume(self, mock_home: Path) -> None: + from unittest.mock import patch + + from factory.cli import cmd_refactory, build_parser + + save_session_id("old-uuid") + parser = build_parser() + args = parser.parse_args(["refactory", "--reset"]) + with patch("shutil.which", return_value="/usr/bin/claude"), \ + patch("os.execvp") as mock_exec: + cmd_refactory(args) + + cmd = mock_exec.call_args[0][1] + assert "--resume" not in cmd + + def test_model_flag_forwarded(self, mock_home: Path) -> None: + from unittest.mock import patch + + from factory.cli import cmd_refactory, build_parser + + parser = build_parser() + args = parser.parse_args(["refactory", "--model", "sonnet"]) + with patch("shutil.which", return_value="/usr/bin/claude"), \ + patch("os.execvp") as mock_exec: + cmd_refactory(args) + + cmd = mock_exec.call_args[0][1] + assert "--model" in cmd + model_idx = cmd.index("--model") + assert cmd[model_idx + 1] == "sonnet" From 839e8fae3d958303a3baf9f9d8dbb720a98a33a1 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 25 Jun 2026 20:22:42 +0000 Subject: [PATCH 06/21] fix: replace --cwd with os.chdir for claude CLI compatibility Claude Code CLI doesn't support --cwd. Change to os.chdir(workspace) before os.execvp instead. Co-Authored-By: Claude Opus 4.6 (1M context) --- factory/cli.py | 2 +- tests/test_refactory.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/factory/cli.py b/factory/cli.py index 37c18b2b..af980d78 100644 --- a/factory/cli.py +++ b/factory/cli.py @@ -3383,7 +3383,6 @@ def cmd_refactory(args: argparse.Namespace) -> int: "claude", "--session-id", session_id, "--append-system-prompt-file", prompt_file.name, - "--cwd", str(workspace), ] if not is_new_session: @@ -3392,6 +3391,7 @@ def cmd_refactory(args: argparse.Namespace) -> int: if model: cmd.extend(["--model", model]) + os.chdir(workspace) os.execvp("claude", cmd) return 0 # unreachable after execvp diff --git a/tests/test_refactory.py b/tests/test_refactory.py index 2914f6fb..cdc61c22 100644 --- a/tests/test_refactory.py +++ b/tests/test_refactory.py @@ -178,7 +178,7 @@ def test_new_session_no_resume_flag(self, mock_home: Path) -> None: assert "--session-id" in cmd assert "--resume" not in cmd assert "--append-system-prompt-file" in cmd - assert "--cwd" in cmd + assert "--cwd" not in cmd def test_existing_session_has_resume_flag(self, mock_home: Path) -> None: from unittest.mock import patch From d2a6a68d6e442b238c66fc96bf30575e99ea4450 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 25 Jun 2026 20:33:05 +0000 Subject: [PATCH 07/21] fix: use --resume for existing sessions, --session-id for new Claude Code doesn't allow --session-id combined with --resume. New sessions: --session-id (sets the ID). Existing sessions: --resume (resumes by ID). Co-Authored-By: Claude Opus 4.6 (1M context) --- factory/cli.py | 20 ++++++++++++-------- tests/test_refactory.py | 11 +++++++---- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/factory/cli.py b/factory/cli.py index af980d78..a5b83ed2 100644 --- a/factory/cli.py +++ b/factory/cli.py @@ -3379,14 +3379,18 @@ def cmd_refactory(args: argparse.Namespace) -> int: prompt_file.write(prompt) prompt_file.close() - cmd = [ - "claude", - "--session-id", session_id, - "--append-system-prompt-file", prompt_file.name, - ] - - if not is_new_session: - cmd.insert(3, "--resume") + if is_new_session: + cmd = [ + "claude", + "--session-id", session_id, + "--append-system-prompt-file", prompt_file.name, + ] + else: + cmd = [ + "claude", + "--resume", session_id, + "--append-system-prompt-file", prompt_file.name, + ] if model: cmd.extend(["--model", model]) diff --git a/tests/test_refactory.py b/tests/test_refactory.py index cdc61c22..a7bf7274 100644 --- a/tests/test_refactory.py +++ b/tests/test_refactory.py @@ -163,7 +163,7 @@ def test_no_claude_returns_error(self, mock_home: Path) -> None: code = cmd_refactory(args) assert code == 1 - def test_new_session_no_resume_flag(self, mock_home: Path) -> None: + def test_new_session_uses_session_id(self, mock_home: Path) -> None: from unittest.mock import patch from factory.cli import cmd_refactory, build_parser @@ -178,9 +178,8 @@ def test_new_session_no_resume_flag(self, mock_home: Path) -> None: assert "--session-id" in cmd assert "--resume" not in cmd assert "--append-system-prompt-file" in cmd - assert "--cwd" not in cmd - def test_existing_session_has_resume_flag(self, mock_home: Path) -> None: + def test_existing_session_uses_resume(self, mock_home: Path) -> None: from unittest.mock import patch from factory.cli import cmd_refactory, build_parser @@ -194,8 +193,11 @@ def test_existing_session_has_resume_flag(self, mock_home: Path) -> None: cmd = mock_exec.call_args[0][1] assert "--resume" in cmd + assert "--session-id" not in cmd + resume_idx = cmd.index("--resume") + assert cmd[resume_idx + 1] == "existing-uuid" - def test_reset_flag_no_resume(self, mock_home: Path) -> None: + def test_reset_flag_uses_session_id(self, mock_home: Path) -> None: from unittest.mock import patch from factory.cli import cmd_refactory, build_parser @@ -208,6 +210,7 @@ def test_reset_flag_no_resume(self, mock_home: Path) -> None: cmd_refactory(args) cmd = mock_exec.call_args[0][1] + assert "--session-id" in cmd assert "--resume" not in cmd def test_model_flag_forwarded(self, mock_home: Path) -> None: From 7d3cc72a80ed33ef389ba189e016928292f23411 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 25 Jun 2026 21:32:20 +0000 Subject: [PATCH 08/21] feat: per-project refactory workspace + sop-compact hooks Move refactory workspace from global ~/.factory/refactory/ to per-project .refactory/ so each project gets its own session and config. Add sop-compact PreCompact and SessionStart hooks so the agent survives context compaction. Co-Authored-By: Claude Opus 4.6 --- factory/agents/sop-compact/pre-compact.sh | 243 ++++++++++++++++++++ factory/agents/sop-compact/session-start.sh | 56 +++++ factory/agents/sop-compact/sop-compact.md | 42 ++++ factory/cli.py | 15 +- factory/refactory.py | 80 +++++-- tests/test_refactory.py | 181 +++++++++------ 6 files changed, 524 insertions(+), 93 deletions(-) create mode 100755 factory/agents/sop-compact/pre-compact.sh create mode 100755 factory/agents/sop-compact/session-start.sh create mode 100644 factory/agents/sop-compact/sop-compact.md diff --git a/factory/agents/sop-compact/pre-compact.sh b/factory/agents/sop-compact/pre-compact.sh new file mode 100755 index 00000000..b94fa5ac --- /dev/null +++ b/factory/agents/sop-compact/pre-compact.sh @@ -0,0 +1,243 @@ +#!/usr/bin/env bash +# pre-compact: fires before /compact (manual or auto). Runs a `claude -p` sidecar +# that reads the just-finished conversation, promotes principled learnings to the +# repo's durable targets, and writes an ephemeral handoff snapshot the SessionStart +# hook will point the post-compact session at. +# +# PreCompact is awaited (blocking) — CC waits for this hook (and its sidecar) before +# it starts summarizing. Exit 2 hard-blocks the compaction and surfaces stderr to the +# user; we use that so a failed snapshot aborts /compact rather than silently losing +# the in-flight context. Exit 0 lets compaction proceed. +# +# Sidecar invocation pattern (claude -p with tool use + no session pollution) is +# derived from an upstream session-management tool's `sop` subcommand; this plugin +# ships its own prompt rather than shelling out to it. +# +# TRUST ASSUMPTION (security): the sidecar runs with --dangerously-skip-permissions and +# reads the just-finished transcript, which contains verbatim session content (user +# messages, tool output, external data). A crafted message in that transcript is therefore +# untrusted input reaching an agent with broad tool access. We accept this deliberately: +# - The transcript was already read, in full, by the main session that produced it — the +# sidecar gains no privilege the original session didn't already have over this repo. +# - Promotion targets are not confinable to a fixed subtree (CLAUDE.md lives at the repo +# root; per-project memory dirs can live OUTSIDE the repo under ~/.claude/projects/...), +# and the prompt samples large transcripts via head/grep — so a narrow --allowed-tools +# allowlist would break promotion. Broad access is required for the feature to work. +# - A `timeout` wrapper (below) bounds runaway/looping behavior so a hijacked sidecar +# can't block /compact forever. +# Treat the transcript as a controlled artifact from the CC runtime. If you need a stronger +# boundary, run the sidecar in a sandbox or scope promotion to in-repo paths only. + +set -uo pipefail + +INPUT="$(cat 2>/dev/null || true)" + +jq_field() { + # $1 = jq path expression (e.g. .transcript_path). Prints value or empty string. + printf '%s' "$INPUT" | jq -r "${1} // empty" 2>/dev/null || printf '' +} + +TRANSCRIPT="$(jq_field .transcript_path)" +CWD="$(jq_field .cwd)" +TRIGGER="$(jq_field .trigger)" +SESSION_ID="$(jq_field .session_id)" + +# Repo root: prefer the cwd from stdin, then $CLAUDE_PROJECT_DIR, then $PWD. +REPO_DIR="${CWD:-${CLAUDE_PROJECT_DIR:-$PWD}}" + +SOP_FILE="${REPO_DIR}/.claude/sop-compact.md" +SNAP_DIR="${REPO_DIR}/.claude/sop-compact" +# Seconds-resolution UTC + a PID suffix: two concurrent compacts (e.g. auto-compact in +# two long-running sessions on the same repo) can hit the same wall-second; the -$$ +# disambiguates so the second writer's mv -f doesn't clobber the first's handoff. The +# timestamp prefix still dominates lexical order, so SessionStart's latest-glob and +# prune_handoffs' sort are unaffected. (machine#118) +TS="$(date -u +%Y%m%dT%H%M%SZ)-$$" +HANDOFF="${SNAP_DIR}/handoff-${TS}.md" + +mkdir -p "$SNAP_DIR" + +# prune_handoffs: after a new handoff is written, keep only the most recent N matching +# handoff-*.md and remove the rest. N is SOP_COMPACT_HANDOFF_RETENTION (default 10). +# Glob expansion is lexically sorted and the zero-padded UTC timestamp prefix dominates +# (the -$$ suffix only disambiguates within a wall-second), so lexical == chronological; +# the oldest files sort first and are the ones removed. Called AFTER the write so the +# just-written handoff is always among the kept N. A keep < 1 (or non-numeric) value is a +# no-op so we never delete the file SessionStart needs. *.error.log and .handoff-*.XXXXXX +# temp files don't match handoff-*.md, so they're untouched. +prune_handoffs() { + local keep="${SOP_COMPACT_HANDOFF_RETENTION:-10}" + [[ "$keep" =~ ^[0-9]+$ ]] && (( keep >= 1 )) || return 0 + local files=() + shopt -s nullglob + files=( "${SNAP_DIR}"/handoff-*.md ) + shopt -u nullglob + local count=${#files[@]} + (( count > keep )) || return 0 + local i + for (( i = 0; i < count - keep; i++ )); do + rm -f "${files[i]}" + done +} + +# --- No SOP yet: write a minimal stub handoff and let compaction proceed. ---------- +if [[ ! -f "$SOP_FILE" ]]; then + TMP="$(mktemp "${SNAP_DIR}/.handoff-${TS}.XXXXXX")" + { + printf '# Pre-compact handoff (stub — no SOP)\n\n' + printf '_Generated %s by sop-compact PreCompact hook (trigger: %s)._\n\n' "$TS" "${TRIGGER:-unknown}" + printf 'This repo has **no `.claude/sop-compact.md`** — run `/init-sop-compact` to generate one ' + printf 'so future compactions get a real Promote+Snapshot pass.\n\n' + printf 'For now there is no repo-tailored procedure. After this compaction:\n\n' + printf '1. Treat the conversation history above as a lossy compaction summary, not the live session.\n' + printf '2. Re-check live state (git status, gh, files) before acting.\n' + printf '3. Prior transcript (for archaeology if needed): `%s`\n' "${TRANSCRIPT:-unknown}" + } >"$TMP" + mv -f "$TMP" "$HANDOFF" + prune_handoffs + exit 0 +fi + +# --- SOP present: run the sidecar to promote + snapshot. --------------------------- +MODEL="${SOP_COMPACT_MODEL:-opus[1m]}" + +PROMPT_FILE="$(mktemp)" +STDERR_FILE="$(mktemp)" +cleanup() { rm -f "$PROMPT_FILE" "$STDERR_FILE"; } +trap cleanup EXIT + +# extract_handoff: pull the handoff body out from between the ===HANDOFF=== and ===END=== +# sentinels the sidecar is asked to emit. The sidecar tends to narrate its promotion +# decisions before the markdown; the sentinels let us drop that preamble so the saved file +# starts at the `# Pre-compact handoff` heading. Reads raw output on stdin, prints the +# extracted body on stdout. Exit 0 if both sentinels were found and the body is non-empty; +# exit 1 otherwise (caller falls back to writing the raw output). +extract_handoff() { + awk ' + /^===HANDOFF===[[:space:]]*$/ { capture=1; started=1; next } + /^===END===[[:space:]]*$/ { if (capture) { capture=0; ended=1 } next } + capture { lines[n++] = $0 } + END { + if (!started || !ended) exit 1 + # Strip a single leading blank line so the H1 lands at the top of the file. + first = 0 + if (n > 0 && lines[0] == "") first = 1 + empty = 1 + for (i = first; i < n; i++) { + print lines[i] + if (lines[i] != "") empty = 0 + } + if (empty) exit 1 + } + ' +} + +cat >"$PROMPT_FILE" < +===END=== +EOF + +# Run the sidecar from the repo root so relative paths in the SOP resolve. Capture +# stdout (the handoff) and stderr (debug on failure) separately. +# +# PreCompact is awaited and blocking, so an unbounded sidecar would hang /compact forever +# (the user can't interrupt it). Wrap in `timeout` (default 600s, override via +# SOP_COMPACT_TIMEOUT); on expiry `timeout` exits 124, which the RC check below catches and +# converts to an exit-2 block — a clear failure rather than an infinite hang. +# Default is 600s (not 300s) because the sidecar defaults to opus[1m] (v0.3.3) and ingests +# the whole just-finished transcript — the long sessions this targets can need >5min to +# read + promote + snapshot, and a 300s wall would exit-2-block compaction (rc=124) on +# exactly those sessions (machine#120 review). +SIDECAR_OUT="$( + cd "$REPO_DIR" && timeout "${SOP_COMPACT_TIMEOUT:-600}" claude -p "$(cat "$PROMPT_FILE")" \ + --model "$MODEL" \ + --setting-sources "" \ + --disable-slash-commands \ + --strict-mcp-config \ + --no-chrome \ + --no-session-persistence \ + --dangerously-skip-permissions \ + 2>"$STDERR_FILE" +)" +RC=$? + +if [[ $RC -ne 0 || -z "${SIDECAR_OUT// /}" ]]; then + DEBUG="${SNAP_DIR}/handoff-${TS}.error.log" + { + printf 'sop-compact PreCompact sidecar failed (rc=%s) at %s\n' "$RC" "$TS" + printf 'model=%s session=%s\n\n--- stderr ---\n' "$MODEL" "${SESSION_ID:-unknown}" + cat "$STDERR_FILE" 2>/dev/null + } >"$DEBUG" + # Exit 2 hard-blocks compaction so the user keeps the live context and knows the + # snapshot failed (rather than silently compacting into a lossy summary). + echo "sop-compact: pre-compact sidecar failed (rc=$RC). Compaction blocked to preserve context. See $DEBUG" >&2 + exit 2 +fi + +# The sidecar wraps its handoff in ===HANDOFF===/===END=== sentinels so any promotion- +# decision narration it emits stays out of the saved file. Extract the body; if the +# sentinels are missing/malformed, fall back to the raw output (a degraded snapshot beats +# losing the in-flight context) and warn so a maintainer can spot the extraction failure. +if HANDOFF_BODY="$(printf '%s\n' "$SIDECAR_OUT" | extract_handoff)"; then + HANDOFF_CONTENT="$HANDOFF_BODY" +else + HANDOFF_CONTENT="$SIDECAR_OUT" + echo "sop-compact: sidecar output missing sentinels; wrote raw output as fallback (see handoff for inspection)" >&2 +fi + +# Write the handoff atomically so SessionStart never reads a partial file. +TMP="$(mktemp "${SNAP_DIR}/.handoff-${TS}.XXXXXX")" +printf '%s\n' "$HANDOFF_CONTENT" >"$TMP" +mv -f "$TMP" "$HANDOFF" +prune_handoffs + +# --- Optional repo-local extension: run after a successful snapshot. --------------- +# Failures here must not take down the pre-hook, so guard with controlled error handling. +EXT="${REPO_DIR}/.claude/sop-compact/pre.sh" +if [[ -f "$EXT" ]]; then + ( set +e; SOP_COMPACT_HANDOFF="$HANDOFF" SOP_COMPACT_TRANSCRIPT="$TRANSCRIPT" bash "$EXT" ) || true +fi + +exit 0 diff --git a/factory/agents/sop-compact/session-start.sh b/factory/agents/sop-compact/session-start.sh new file mode 100755 index 00000000..411c49b0 --- /dev/null +++ b/factory/agents/sop-compact/session-start.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# session-start: post-compact orientation. Fires on every SessionStart but only acts +# when source == "compact"; for every other source (startup, resume, clear) it exits +# silently so normal session starts are untouched. +# +# It emits a pointer-only directive (not the full handoff content) via +# hookSpecificOutput.additionalContext, telling the new session to read the latest +# handoff file the PreCompact sidecar wrote. + +set -uo pipefail + +INPUT="$(cat 2>/dev/null || true)" + +jq_field() { + printf '%s' "$INPUT" | jq -r "${1} // empty" 2>/dev/null || printf '' +} + +SOURCE="$(jq_field .source)" + +# Only orient on the way back in from a /compact. No-op otherwise. +if [[ "$SOURCE" != "compact" ]]; then + exit 0 +fi + +CWD="$(jq_field .cwd)" +REPO_DIR="${CWD:-${CLAUDE_PROJECT_DIR:-$PWD}}" +REPO="$(basename "$REPO_DIR")" + +# Latest handoff (timestamps are zero-padded + UTC, so lexical == chronological; +# ls -t by mtime is equivalent and robust to clock format). +LATEST="$(ls -t "${REPO_DIR}/.claude/sop-compact/"handoff-*.md 2>/dev/null | head -1 || true)" +LEGACY="" + +if [[ -z "$LATEST" ]]; then + # Back-compat: pick up v0.2.x snapshots written by the old compact-sop plugin. + SNAP_DIR="${COMPACT_SOP_SNAPSHOT_DIR:-$HOME/.claude/compact-sop/snapshots}" + LATEST="$(ls -t "${SNAP_DIR}/pre-compact-${REPO}-"*.md /tmp/pre-compact-"${REPO}"-*.md 2>/dev/null | head -1 || true)" + [[ -n "$LATEST" ]] && LEGACY=" (legacy compact-sop snapshot — consider running /init-sop-compact to migrate)" +fi + +if [[ -n "$LATEST" ]]; then + POINTER="You were just compacted (SessionStart source=compact). Before doing anything else, read \`${LATEST}\`${LEGACY} immediately for orientation — it is the pre-compact handoff with the non-reconstructable in-flight context. Trust hierarchy: live state (git/gh/files) > handoff > compaction summary. Do not start new work until you have read it; then re-check live state and wait for the user." +else + POINTER="You were just compacted (SessionStart source=compact), but no pre-compact handoff was found under \`${REPO_DIR}/.claude/sop-compact/\`. Treat the history above as a lossy summary: re-check live state (git status, gh, files) before acting, and consider running \`/init-sop-compact\` so future compactions produce a handoff. Do not start new work until you have re-oriented." +fi + +jq -nc --arg ctx "$POINTER" \ + '{hookSpecificOutput: {hookEventName: "SessionStart", additionalContext: $ctx}}' + +# --- Optional repo-local extension. ------------------------------------------------ +EXT="${REPO_DIR}/.claude/sop-compact/post.sh" +if [[ -f "$EXT" ]]; then + ( set +e; SOP_COMPACT_HANDOFF="$LATEST" bash "$EXT" >/dev/null 2>&1 ) || true +fi + +exit 0 diff --git a/factory/agents/sop-compact/sop-compact.md b/factory/agents/sop-compact/sop-compact.md new file mode 100644 index 00000000..6559773a --- /dev/null +++ b/factory/agents/sop-compact/sop-compact.md @@ -0,0 +1,42 @@ +# sop-compact — re:factory agent + +Standard operating procedure for the sop-compact PreCompact sidecar when running +inside a re:factory workspace. The PreCompact hook reads this file to know what to +promote and what to snapshot before context compaction. + +## Promotion targets + +Durable learnings go here (direct file edits by the sidecar): + +- `.refactory/CLAUDE.md` — workspace-level instructions, validated patterns, recurring + gotchas discovered during supervision. Append to the existing content; do not + overwrite the preamble. + +## Snapshot conventions + +The handoff snapshot should capture non-reconstructable in-flight state: + +- **Active CEO sessions**: run `factory tmux-ls` to list running factory sessions and + their current status. Record which projects have active loops and their last cycle. +- **Project score trajectory**: recent score changes, whether scores are trending up or + down, and any plateau/regression patterns observed this session. +- **Backlog state**: items recently added, removed, or reprioritized. Note any items + the user explicitly deferred or promoted. +- **In-flight decisions**: what the user and agent were mid-discussing — open questions, + half-formed directions, rejected approaches and why. + +## Live-state checks + +Before writing the snapshot, check these for current ground truth: + +- `factory tmux-ls` — which factory sessions are running +- `factory status .` — project status if inside a project +- `git status` — uncommitted changes in the workspace + +## In-flight work locations + +These files contain ephemeral state that may be lost in compaction: + +- `.factory/strategy/current.md` — the current hypothesis or focus area +- `.factory/reviews/` — recent agent review outputs and CEO verdicts +- `.factory/strategy/backlog.md` — the working backlog diff --git a/factory/cli.py b/factory/cli.py index a5b83ed2..83f6a8e2 100644 --- a/factory/cli.py +++ b/factory/cli.py @@ -3366,10 +3366,13 @@ def cmd_refactory(args: argparse.Namespace) -> int: print("Error: 'claude' CLI not found. Install Claude Code first.", file=sys.stderr) return 1 - workspace = setup_workspace() + project_path = Path(getattr(args, "path", None) or Path.cwd()).resolve() + + workspace = setup_workspace(project_path) reset = getattr(args, "reset", False) - is_new_session = reset or not (Path.home() / ".factory" / "refactory-session.json").exists() - session_id = get_session_id(reset=reset) + session_file = project_path / ".refactory" / "session.json" + is_new_session = reset or not session_file.exists() + session_id = get_session_id(project_path, reset=reset) model = getattr(args, "model", None) prompt = resolve_prompt("refactory") @@ -4576,6 +4579,8 @@ def build_parser() -> argparse.ArgumentParser: # refactory — persistent supervisor agent p = sub.add_parser("refactory", help="Launch the re:factory persistent supervisor agent") + p.add_argument("path", nargs="?", default=None, + help="Project directory (default: current working directory)") p.add_argument("--reset", action="store_true", default=False, help="Reset session (new session ID, fresh start)") p.add_argument("--model", default=None, @@ -4609,7 +4614,9 @@ def main(argv: list[str] | None = None) -> int: if not args.command: if sys.stdin.isatty() and sys.stderr.isatty(): - return cmd_refactory(args) + if (Path.cwd() / ".git").is_dir(): + return cmd_refactory(args) + return _welcome_wizard() parser.print_help() return 1 diff --git a/factory/refactory.py b/factory/refactory.py index a97fcca2..a039930d 100644 --- a/factory/refactory.py +++ b/factory/refactory.py @@ -4,14 +4,13 @@ import json import shutil +import stat import uuid from datetime import datetime, timezone from pathlib import Path +from typing import Any -WORKSPACE_DIR = Path.home() / ".factory" / "refactory" -SESSION_FILE = Path.home() / ".factory" / "refactory-session.json" - -SETTINGS_JSON = { +SETTINGS_JSON: dict[str, Any] = { "mcpServers": { "factory": { "command": "factory", @@ -27,9 +26,11 @@ See your system prompt for full instructions. """ +SOP_COMPACT_DIR = Path(__file__).parent / "agents" / "sop-compact" + -def setup_workspace() -> Path: - """Create the re:factory workspace at ~/.factory/refactory/. +def setup_workspace(project_path: Path) -> Path: + """Create the re:factory workspace at /.refactory/. Idempotent — safe to call on every launch. Creates directory structure and writes config files, overwriting settings.json and CLAUDE.md to @@ -37,7 +38,7 @@ def setup_workspace() -> Path: Returns the workspace path. """ - workspace = WORKSPACE_DIR + workspace = project_path / ".refactory" workspace.mkdir(parents=True, exist_ok=True) claude_dir = workspace / ".claude" @@ -46,8 +47,47 @@ def setup_workspace() -> Path: commands_dir = claude_dir / "commands" commands_dir.mkdir(exist_ok=True) + sop_dir = claude_dir / "sop-compact" + sop_dir.mkdir(exist_ok=True) + + for hook_name in ("pre-compact.sh", "session-start.sh"): + src = SOP_COMPACT_DIR / hook_name + if src.is_file(): + dst = sop_dir / hook_name + shutil.copy2(src, dst) + dst.chmod(dst.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + + sop_src = SOP_COMPACT_DIR / "sop-compact.md" + if sop_src.is_file(): + shutil.copy2(sop_src, claude_dir / "sop-compact.md") + + settings = dict(SETTINGS_JSON) + settings["hooks"] = { + "PreCompact": [ + { + "hooks": [ + { + "type": "command", + "command": str((project_path / ".refactory" / ".claude" / "sop-compact" / "pre-compact.sh").resolve()), + } + ] + } + ], + "SessionStart": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": str((project_path / ".refactory" / ".claude" / "sop-compact" / "session-start.sh").resolve()), + } + ], + } + ], + } + settings_path = claude_dir / "settings.json" - settings_path.write_text(json.dumps(SETTINGS_JSON, indent=2) + "\n") + settings_path.write_text(json.dumps(settings, indent=2) + "\n") claude_md_path = workspace / "CLAUDE.md" claude_md_path.write_text(CLAUDE_MD_CONTENT) @@ -60,21 +100,22 @@ def setup_workspace() -> Path: return workspace -def get_session_id(reset: bool = False) -> str: - """Read or create a persistent session ID. +def get_session_id(project_path: Path, reset: bool = False) -> str: + """Read or create a persistent session ID for a project. - The session ID is stored in ~/.factory/refactory-session.json (outside - the workspace, so it survives workspace regeneration). + The session ID is stored in /.refactory/session.json. Args: + project_path: Root directory of the project. reset: If True, generate a new session ID even if one exists. Returns: The session ID string. """ - if not reset and SESSION_FILE.exists(): + session_file = project_path / ".refactory" / "session.json" + if not reset and session_file.exists(): try: - data = json.loads(SESSION_FILE.read_text()) + data = json.loads(session_file.read_text()) sid = data.get("session_id") if isinstance(sid, str) and sid: return sid @@ -82,15 +123,16 @@ def get_session_id(reset: bool = False) -> str: pass sid = str(uuid.uuid4()) - save_session_id(sid) + save_session_id(project_path, sid) return sid -def save_session_id(session_id: str) -> None: - """Write session state to ~/.factory/refactory-session.json.""" - SESSION_FILE.parent.mkdir(parents=True, exist_ok=True) +def save_session_id(project_path: Path, session_id: str) -> None: + """Write session state to /.refactory/session.json.""" + session_file = project_path / ".refactory" / "session.json" + session_file.parent.mkdir(parents=True, exist_ok=True) data = { "session_id": session_id, "created": datetime.now(timezone.utc).isoformat(), } - SESSION_FILE.write_text(json.dumps(data, indent=2) + "\n") + session_file.write_text(json.dumps(data, indent=2) + "\n") diff --git a/tests/test_refactory.py b/tests/test_refactory.py index a7bf7274..05df5167 100644 --- a/tests/test_refactory.py +++ b/tests/test_refactory.py @@ -3,109 +3,133 @@ from __future__ import annotations import json +import os +import stat from pathlib import Path from typing import get_args +from unittest.mock import patch import pytest from factory.refactory import ( CLAUDE_MD_CONTENT, - SETTINGS_JSON, get_session_id, save_session_id, setup_workspace, ) -@pytest.fixture -def mock_home(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: - monkeypatch.setattr(Path, "home", lambda: tmp_path) - monkeypatch.setattr( - "factory.refactory.WORKSPACE_DIR", tmp_path / ".factory" / "refactory" - ) - monkeypatch.setattr( - "factory.refactory.SESSION_FILE", - tmp_path / ".factory" / "refactory-session.json", - ) - return tmp_path - - # ── setup_workspace ────────────────────────────────────────────── class TestSetupWorkspace: - def test_creates_directories(self, mock_home: Path) -> None: - setup_workspace() - workspace = mock_home / ".factory" / "refactory" + def test_creates_directories(self, tmp_path: Path) -> None: + setup_workspace(tmp_path) + workspace = tmp_path / ".refactory" assert workspace.is_dir() assert (workspace / ".claude").is_dir() assert (workspace / ".claude" / "commands").is_dir() - def test_writes_settings_json(self, mock_home: Path) -> None: - setup_workspace() - settings = mock_home / ".factory" / "refactory" / ".claude" / "settings.json" + def test_writes_settings_json(self, tmp_path: Path) -> None: + setup_workspace(tmp_path) + settings = tmp_path / ".refactory" / ".claude" / "settings.json" assert settings.exists() data = json.loads(settings.read_text()) - assert data == SETTINGS_JSON assert "factory" in data["mcpServers"] - def test_writes_claude_md(self, mock_home: Path) -> None: - setup_workspace() - claude_md = mock_home / ".factory" / "refactory" / "CLAUDE.md" + def test_writes_claude_md(self, tmp_path: Path) -> None: + setup_workspace(tmp_path) + claude_md = tmp_path / ".refactory" / "CLAUDE.md" assert claude_md.exists() assert claude_md.read_text() == CLAUDE_MD_CONTENT - def test_copies_skills(self, mock_home: Path) -> None: - setup_workspace() - commands_dir = mock_home / ".factory" / "refactory" / ".claude" / "commands" + def test_copies_skills(self, tmp_path: Path) -> None: + setup_workspace(tmp_path) + commands_dir = tmp_path / ".refactory" / ".claude" / "commands" skills_src = Path(__file__).parent.parent / "factory" / "agents" / "skills" expected = list(skills_src.glob("*.md")) assert len(expected) > 0, "No skill source files found" for skill in expected: assert (commands_dir / skill.name).exists(), f"Missing skill: {skill.name}" - def test_idempotent(self, mock_home: Path) -> None: - ws1 = setup_workspace() - ws2 = setup_workspace() + def test_idempotent(self, tmp_path: Path) -> None: + ws1 = setup_workspace(tmp_path) + ws2 = setup_workspace(tmp_path) assert ws1 == ws2 - settings = mock_home / ".factory" / "refactory" / ".claude" / "settings.json" - assert json.loads(settings.read_text()) == SETTINGS_JSON + settings = tmp_path / ".refactory" / ".claude" / "settings.json" + data = json.loads(settings.read_text()) + assert "factory" in data["mcpServers"] + + def test_copies_hooks(self, tmp_path: Path) -> None: + setup_workspace(tmp_path) + sop_dir = tmp_path / ".refactory" / ".claude" / "sop-compact" + assert sop_dir.is_dir() + for name in ("pre-compact.sh", "session-start.sh"): + hook = sop_dir / name + assert hook.exists(), f"Missing hook: {name}" + assert hook.stat().st_mode & stat.S_IXUSR, f"Hook not executable: {name}" + + def test_copies_sop(self, tmp_path: Path) -> None: + setup_workspace(tmp_path) + sop = tmp_path / ".refactory" / ".claude" / "sop-compact.md" + assert sop.exists() + content = sop.read_text() + assert "re:factory" in content + assert "Promotion targets" in content + + def test_settings_json_has_hooks(self, tmp_path: Path) -> None: + setup_workspace(tmp_path) + settings = tmp_path / ".refactory" / ".claude" / "settings.json" + data = json.loads(settings.read_text()) + assert "hooks" in data + assert "PreCompact" in data["hooks"] + assert "SessionStart" in data["hooks"] + pre_cmd = data["hooks"]["PreCompact"][0]["hooks"][0]["command"] + assert "pre-compact.sh" in pre_cmd + assert os.path.isabs(pre_cmd) + session_cmd = data["hooks"]["SessionStart"][0]["hooks"][0]["command"] + assert "session-start.sh" in session_cmd + assert os.path.isabs(session_cmd) # ── Session ID ─────────────────────────────────────────────────── class TestSessionId: - def test_creates_new(self, mock_home: Path) -> None: - session_file = mock_home / ".factory" / "refactory-session.json" + def test_creates_new(self, tmp_path: Path) -> None: + (tmp_path / ".refactory").mkdir() + session_file = tmp_path / ".refactory" / "session.json" assert not session_file.exists() - sid = get_session_id() + sid = get_session_id(tmp_path) assert isinstance(sid, str) - assert len(sid) == 36 # UUID with dashes + assert len(sid) == 36 assert sid.count("-") == 4 assert session_file.exists() - def test_returns_existing(self, mock_home: Path) -> None: - sid1 = get_session_id() - sid2 = get_session_id() + def test_returns_existing(self, tmp_path: Path) -> None: + (tmp_path / ".refactory").mkdir() + sid1 = get_session_id(tmp_path) + sid2 = get_session_id(tmp_path) assert sid1 == sid2 - def test_reset(self, mock_home: Path) -> None: - sid1 = get_session_id() - sid2 = get_session_id(reset=True) + def test_reset(self, tmp_path: Path) -> None: + (tmp_path / ".refactory").mkdir() + sid1 = get_session_id(tmp_path) + sid2 = get_session_id(tmp_path, reset=True) assert sid1 != sid2 assert len(sid2) == 36 - def test_save_roundtrip(self, mock_home: Path) -> None: + def test_save_roundtrip(self, tmp_path: Path) -> None: + (tmp_path / ".refactory").mkdir() custom_id = "abcdef1234567890abcdef1234567890" - save_session_id(custom_id) - assert get_session_id() == custom_id + save_session_id(tmp_path, custom_id) + assert get_session_id(tmp_path) == custom_id - def test_corrupt_json_generates_new(self, mock_home: Path) -> None: - session_file = mock_home / ".factory" / "refactory-session.json" + def test_corrupt_json_generates_new(self, tmp_path: Path) -> None: + session_file = tmp_path / ".refactory" / "session.json" session_file.parent.mkdir(parents=True, exist_ok=True) session_file.write_text("{corrupt json!!") - sid = get_session_id() + sid = get_session_id(tmp_path) assert isinstance(sid, str) assert len(sid) == 36 @@ -140,6 +164,20 @@ def test_refactory_subcommand_exists(self) -> None: args = parser.parse_args(["refactory"]) assert args.command == "refactory" + def test_refactory_accepts_path_arg(self) -> None: + from factory.cli import build_parser + + parser = build_parser() + args = parser.parse_args(["refactory", "/some/path"]) + assert args.path == "/some/path" + + def test_refactory_path_default_none(self) -> None: + from factory.cli import build_parser + + parser = build_parser() + args = parser.parse_args(["refactory"]) + assert args.path is None + def test_refactory_prompt_resolves(self) -> None: from factory.agents.runner import resolve_prompt @@ -152,24 +190,20 @@ def test_refactory_prompt_resolves(self) -> None: class TestCmdRefactory: - def test_no_claude_returns_error(self, mock_home: Path) -> None: - from unittest.mock import patch - + def test_no_claude_returns_error(self, tmp_path: Path) -> None: from factory.cli import cmd_refactory, build_parser parser = build_parser() - args = parser.parse_args(["refactory"]) + args = parser.parse_args(["refactory", str(tmp_path)]) with patch("shutil.which", return_value=None): code = cmd_refactory(args) assert code == 1 - def test_new_session_uses_session_id(self, mock_home: Path) -> None: - from unittest.mock import patch - + def test_new_session_uses_session_id(self, tmp_path: Path) -> None: from factory.cli import cmd_refactory, build_parser parser = build_parser() - args = parser.parse_args(["refactory"]) + args = parser.parse_args(["refactory", str(tmp_path)]) with patch("shutil.which", return_value="/usr/bin/claude"), \ patch("os.execvp") as mock_exec: cmd_refactory(args) @@ -179,14 +213,12 @@ def test_new_session_uses_session_id(self, mock_home: Path) -> None: assert "--resume" not in cmd assert "--append-system-prompt-file" in cmd - def test_existing_session_uses_resume(self, mock_home: Path) -> None: - from unittest.mock import patch - + def test_existing_session_uses_resume(self, tmp_path: Path) -> None: from factory.cli import cmd_refactory, build_parser - save_session_id("existing-uuid") + save_session_id(tmp_path, "existing-uuid") parser = build_parser() - args = parser.parse_args(["refactory"]) + args = parser.parse_args(["refactory", str(tmp_path)]) with patch("shutil.which", return_value="/usr/bin/claude"), \ patch("os.execvp") as mock_exec: cmd_refactory(args) @@ -197,14 +229,12 @@ def test_existing_session_uses_resume(self, mock_home: Path) -> None: resume_idx = cmd.index("--resume") assert cmd[resume_idx + 1] == "existing-uuid" - def test_reset_flag_uses_session_id(self, mock_home: Path) -> None: - from unittest.mock import patch - + def test_reset_flag_uses_session_id(self, tmp_path: Path) -> None: from factory.cli import cmd_refactory, build_parser - save_session_id("old-uuid") + save_session_id(tmp_path, "old-uuid") parser = build_parser() - args = parser.parse_args(["refactory", "--reset"]) + args = parser.parse_args(["refactory", "--reset", str(tmp_path)]) with patch("shutil.which", return_value="/usr/bin/claude"), \ patch("os.execvp") as mock_exec: cmd_refactory(args) @@ -213,13 +243,11 @@ def test_reset_flag_uses_session_id(self, mock_home: Path) -> None: assert "--session-id" in cmd assert "--resume" not in cmd - def test_model_flag_forwarded(self, mock_home: Path) -> None: - from unittest.mock import patch - + def test_model_flag_forwarded(self, tmp_path: Path) -> None: from factory.cli import cmd_refactory, build_parser parser = build_parser() - args = parser.parse_args(["refactory", "--model", "sonnet"]) + args = parser.parse_args(["refactory", "--model", "sonnet", str(tmp_path)]) with patch("shutil.which", return_value="/usr/bin/claude"), \ patch("os.execvp") as mock_exec: cmd_refactory(args) @@ -228,3 +256,16 @@ def test_model_flag_forwarded(self, mock_home: Path) -> None: assert "--model" in cmd model_idx = cmd.index("--model") assert cmd[model_idx + 1] == "sonnet" + + def test_default_path_uses_cwd(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + from factory.cli import cmd_refactory, build_parser + + monkeypatch.chdir(tmp_path) + parser = build_parser() + args = parser.parse_args(["refactory"]) + with patch("shutil.which", return_value="/usr/bin/claude"), \ + patch("os.execvp"): + cmd_refactory(args) + + assert (tmp_path / ".refactory").is_dir() + assert (tmp_path / ".refactory" / "session.json").exists() From 4d383d7529b9b55f048c75bb4add371040aa9173 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 25 Jun 2026 21:33:37 +0000 Subject: [PATCH 09/21] fix: update wizard tests for git-repo-conditional bare factory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bare `factory` now checks for .git/ — launches refactory in git repos, wizard otherwise. Split test into two: git repo and non-git-repo cases. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_cli_wizard.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/tests/test_cli_wizard.py b/tests/test_cli_wizard.py index 937c65c2..d257813f 100644 --- a/tests/test_cli_wizard.py +++ b/tests/test_cli_wizard.py @@ -40,17 +40,34 @@ def test_non_tty_prints_help(self, capsys: pytest.CaptureFixture[str]) -> None: code = main([]) assert code == 1 - def test_tty_launches_wizard(self) -> None: - """TTY with no subcommand dispatches to cmd_refactory.""" + def test_tty_in_git_repo_launches_refactory(self) -> None: + """TTY with no subcommand in a git repo dispatches to cmd_refactory.""" with patch("factory.cli.cmd_refactory", return_value=0) as mock_refactory, \ + patch("factory.cli.Path") as mock_path_cls, \ patch("sys.stdin") as mock_stdin, \ patch("sys.stderr") as mock_stderr: mock_stdin.isatty.return_value = True mock_stderr.isatty.return_value = True + mock_path_cls.cwd.return_value.__truediv__ = lambda self, x: mock_path_cls.cwd.return_value + mock_path_cls.cwd.return_value.is_dir.return_value = True code = main([]) assert code == 0 mock_refactory.assert_called_once() + def test_tty_no_git_repo_launches_wizard(self) -> None: + """TTY with no subcommand outside a git repo dispatches to _welcome_wizard.""" + with patch("factory.cli._welcome_wizard", return_value=0) as mock_wizard, \ + patch("factory.cli.Path") as mock_path_cls, \ + patch("sys.stdin") as mock_stdin, \ + patch("sys.stderr") as mock_stderr: + mock_stdin.isatty.return_value = True + mock_stderr.isatty.return_value = True + mock_path_cls.cwd.return_value.__truediv__ = lambda self, x: mock_path_cls.cwd.return_value + mock_path_cls.cwd.return_value.is_dir.return_value = False + code = main([]) + assert code == 0 + mock_wizard.assert_called_once() + def test_stdin_not_tty_stderr_tty(self, capsys: pytest.CaptureFixture[str]) -> None: """If stdin is not a TTY (piped), falls through to help.""" with patch("sys.stdin") as mock_stdin, \ From fab6b4d8defe004c48d2ffb8ab45b50c56af88b0 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Thu, 25 Jun 2026 21:39:59 +0000 Subject: [PATCH 10/21] feat: launch refactory agent with --dangerously-skip-permissions The refactory agent is fully trusted within its project workspace, same as the board agent in refactory-server. Co-Authored-By: Claude Opus 4.6 (1M context) --- factory/cli.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/factory/cli.py b/factory/cli.py index 83f6a8e2..aaaa49ee 100644 --- a/factory/cli.py +++ b/factory/cli.py @@ -3387,12 +3387,14 @@ def cmd_refactory(args: argparse.Namespace) -> int: "claude", "--session-id", session_id, "--append-system-prompt-file", prompt_file.name, + "--dangerously-skip-permissions", ] else: cmd = [ "claude", "--resume", session_id, "--append-system-prompt-file", prompt_file.name, + "--dangerously-skip-permissions", ] if model: From 053a1003f2788929124faeebed64dc3d5ff44e87 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 26 Jun 2026 14:27:27 +0000 Subject: [PATCH 11/21] refactor: run refactory agent from project root instead of .refactory/ Skills install to project .claude/commands/, settings to .claude/settings.local.json, sop-compact hooks stay in .refactory/.claude/sop-compact/. Agent now chdir to project_path. Co-Authored-By: Claude Opus 4.6 --- factory/cli.py | 2 +- factory/refactory.py | 43 ++++++++++++++++++++++------------------- tests/test_refactory.py | 12 ++++++------ 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/factory/cli.py b/factory/cli.py index aaaa49ee..fe01bc69 100644 --- a/factory/cli.py +++ b/factory/cli.py @@ -3400,7 +3400,7 @@ def cmd_refactory(args: argparse.Namespace) -> int: if model: cmd.extend(["--model", model]) - os.chdir(workspace) + os.chdir(project_path) os.execvp("claude", cmd) return 0 # unreachable after execvp diff --git a/factory/refactory.py b/factory/refactory.py index a039930d..991dc807 100644 --- a/factory/refactory.py +++ b/factory/refactory.py @@ -30,25 +30,22 @@ def setup_workspace(project_path: Path) -> Path: - """Create the re:factory workspace at /.refactory/. + """Set up re:factory for a project. - Idempotent — safe to call on every launch. Creates directory structure - and writes config files, overwriting settings.json and CLAUDE.md to - pick up any updates. + Session state goes in /.refactory/. Skills and settings are + installed into the PROJECT's .claude/ so the agent runs from the + project root with full access to the source tree. - Returns the workspace path. + Idempotent — safe to call on every launch. Overwrites settings and + skills to pick up updates. + + Returns the workspace path (.refactory/). """ workspace = project_path / ".refactory" workspace.mkdir(parents=True, exist_ok=True) - claude_dir = workspace / ".claude" - claude_dir.mkdir(exist_ok=True) - - commands_dir = claude_dir / "commands" - commands_dir.mkdir(exist_ok=True) - - sop_dir = claude_dir / "sop-compact" - sop_dir.mkdir(exist_ok=True) + sop_dir = workspace / ".claude" / "sop-compact" + sop_dir.mkdir(parents=True, exist_ok=True) for hook_name in ("pre-compact.sh", "session-start.sh"): src = SOP_COMPACT_DIR / hook_name @@ -59,7 +56,18 @@ def setup_workspace(project_path: Path) -> Path: sop_src = SOP_COMPACT_DIR / "sop-compact.md" if sop_src.is_file(): - shutil.copy2(sop_src, claude_dir / "sop-compact.md") + shutil.copy2(sop_src, workspace / ".claude" / "sop-compact.md") + + project_claude_dir = project_path / ".claude" + project_claude_dir.mkdir(exist_ok=True) + + commands_dir = project_claude_dir / "commands" + commands_dir.mkdir(exist_ok=True) + + skills_src = Path(__file__).parent / "agents" / "skills" + if skills_src.is_dir(): + for skill_file in skills_src.glob("*.md"): + shutil.copy2(skill_file, commands_dir / skill_file.name) settings = dict(SETTINGS_JSON) settings["hooks"] = { @@ -86,17 +94,12 @@ def setup_workspace(project_path: Path) -> Path: ], } - settings_path = claude_dir / "settings.json" + settings_path = project_claude_dir / "settings.local.json" settings_path.write_text(json.dumps(settings, indent=2) + "\n") claude_md_path = workspace / "CLAUDE.md" claude_md_path.write_text(CLAUDE_MD_CONTENT) - skills_src = Path(__file__).parent / "agents" / "skills" - if skills_src.is_dir(): - for skill_file in skills_src.glob("*.md"): - shutil.copy2(skill_file, commands_dir / skill_file.name) - return workspace diff --git a/tests/test_refactory.py b/tests/test_refactory.py index 05df5167..74d6d24d 100644 --- a/tests/test_refactory.py +++ b/tests/test_refactory.py @@ -27,12 +27,12 @@ def test_creates_directories(self, tmp_path: Path) -> None: setup_workspace(tmp_path) workspace = tmp_path / ".refactory" assert workspace.is_dir() - assert (workspace / ".claude").is_dir() - assert (workspace / ".claude" / "commands").is_dir() + assert (tmp_path / ".claude").is_dir() + assert (tmp_path / ".claude" / "commands").is_dir() def test_writes_settings_json(self, tmp_path: Path) -> None: setup_workspace(tmp_path) - settings = tmp_path / ".refactory" / ".claude" / "settings.json" + settings = tmp_path / ".claude" / "settings.local.json" assert settings.exists() data = json.loads(settings.read_text()) assert "factory" in data["mcpServers"] @@ -45,7 +45,7 @@ def test_writes_claude_md(self, tmp_path: Path) -> None: def test_copies_skills(self, tmp_path: Path) -> None: setup_workspace(tmp_path) - commands_dir = tmp_path / ".refactory" / ".claude" / "commands" + commands_dir = tmp_path / ".claude" / "commands" skills_src = Path(__file__).parent.parent / "factory" / "agents" / "skills" expected = list(skills_src.glob("*.md")) assert len(expected) > 0, "No skill source files found" @@ -56,7 +56,7 @@ def test_idempotent(self, tmp_path: Path) -> None: ws1 = setup_workspace(tmp_path) ws2 = setup_workspace(tmp_path) assert ws1 == ws2 - settings = tmp_path / ".refactory" / ".claude" / "settings.json" + settings = tmp_path / ".claude" / "settings.local.json" data = json.loads(settings.read_text()) assert "factory" in data["mcpServers"] @@ -79,7 +79,7 @@ def test_copies_sop(self, tmp_path: Path) -> None: def test_settings_json_has_hooks(self, tmp_path: Path) -> None: setup_workspace(tmp_path) - settings = tmp_path / ".refactory" / ".claude" / "settings.json" + settings = tmp_path / ".claude" / "settings.local.json" data = json.loads(settings.read_text()) assert "hooks" in data assert "PreCompact" in data["hooks"] From 1012810acbf990225839ffb0c4d509b7aa8eb2e7 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 26 Jun 2026 14:40:43 +0000 Subject: [PATCH 12/21] feat: bare `factory` always launches refactory agent, drop .git gate Bare `factory` on a TTY now always launches the refactory agent for cwd, regardless of whether cwd is a git repo. This handles both single-repo (cd repo && factory) and multi-repo parent dir cases. Co-Authored-By: Claude Opus 4.6 (1M context) --- factory/cli.py | 4 +--- tests/test_cli_wizard.py | 21 ++------------------- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/factory/cli.py b/factory/cli.py index fe01bc69..bbe4c628 100644 --- a/factory/cli.py +++ b/factory/cli.py @@ -4616,9 +4616,7 @@ def main(argv: list[str] | None = None) -> int: if not args.command: if sys.stdin.isatty() and sys.stderr.isatty(): - if (Path.cwd() / ".git").is_dir(): - return cmd_refactory(args) - return _welcome_wizard() + return cmd_refactory(args) parser.print_help() return 1 diff --git a/tests/test_cli_wizard.py b/tests/test_cli_wizard.py index d257813f..03f8b9f4 100644 --- a/tests/test_cli_wizard.py +++ b/tests/test_cli_wizard.py @@ -40,34 +40,17 @@ def test_non_tty_prints_help(self, capsys: pytest.CaptureFixture[str]) -> None: code = main([]) assert code == 1 - def test_tty_in_git_repo_launches_refactory(self) -> None: - """TTY with no subcommand in a git repo dispatches to cmd_refactory.""" + def test_tty_launches_refactory(self) -> None: + """TTY with no subcommand always dispatches to cmd_refactory.""" with patch("factory.cli.cmd_refactory", return_value=0) as mock_refactory, \ - patch("factory.cli.Path") as mock_path_cls, \ patch("sys.stdin") as mock_stdin, \ patch("sys.stderr") as mock_stderr: mock_stdin.isatty.return_value = True mock_stderr.isatty.return_value = True - mock_path_cls.cwd.return_value.__truediv__ = lambda self, x: mock_path_cls.cwd.return_value - mock_path_cls.cwd.return_value.is_dir.return_value = True code = main([]) assert code == 0 mock_refactory.assert_called_once() - def test_tty_no_git_repo_launches_wizard(self) -> None: - """TTY with no subcommand outside a git repo dispatches to _welcome_wizard.""" - with patch("factory.cli._welcome_wizard", return_value=0) as mock_wizard, \ - patch("factory.cli.Path") as mock_path_cls, \ - patch("sys.stdin") as mock_stdin, \ - patch("sys.stderr") as mock_stderr: - mock_stdin.isatty.return_value = True - mock_stderr.isatty.return_value = True - mock_path_cls.cwd.return_value.__truediv__ = lambda self, x: mock_path_cls.cwd.return_value - mock_path_cls.cwd.return_value.is_dir.return_value = False - code = main([]) - assert code == 0 - mock_wizard.assert_called_once() - def test_stdin_not_tty_stderr_tty(self, capsys: pytest.CaptureFixture[str]) -> None: """If stdin is not a TTY (piped), falls through to help.""" with patch("sys.stdin") as mock_stdin, \ From 61dd2a0af3ecda689cd8b97be2148379af6ad605 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 26 Jun 2026 14:45:56 +0000 Subject: [PATCH 13/21] fix: remove unused workspace variable (F841 lint) Co-Authored-By: Claude Opus 4.6 (1M context) --- factory/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/factory/cli.py b/factory/cli.py index bbe4c628..29929908 100644 --- a/factory/cli.py +++ b/factory/cli.py @@ -3368,7 +3368,7 @@ def cmd_refactory(args: argparse.Namespace) -> int: project_path = Path(getattr(args, "path", None) or Path.cwd()).resolve() - workspace = setup_workspace(project_path) + setup_workspace(project_path) reset = getattr(args, "reset", False) session_file = project_path / ".refactory" / "session.json" is_new_session = reset or not session_file.exists() From 0b8f930a45cdd620e93973b43878756f2fc5191c Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 26 Jun 2026 14:52:56 +0000 Subject: [PATCH 14/21] fix: use __file__-relative path in test_subprocess_readline_limit The test used a bare relative path that breaks when CWD isn't the repo root (e.g. in CI worktree checkouts). Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_subprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_subprocess.py b/tests/test_subprocess.py index 93b00fbf..7c54270f 100644 --- a/tests/test_subprocess.py +++ b/tests/test_subprocess.py @@ -8,7 +8,7 @@ def test_subprocess_readline_limit(): """Verify subprocess uses 1MB readline limit, not default 64KB.""" - source = Path("factory/runners/_subprocess.py").read_text() + source = (Path(__file__).parent.parent / "factory" / "runners" / "_subprocess.py").read_text() tree = ast.parse(source) for node in ast.walk(tree): if isinstance(node, ast.Call) and "create_subprocess_exec" in ast.dump(node): From 99b93b32953a3dddbdbe67f61d0ab295c7001909 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 26 Jun 2026 16:16:37 +0000 Subject: [PATCH 15/21] feat: teach refactory agent about factory discover for project setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Project Setup section to CLI reference with discover + init. Add behavioral rule: always run `factory discover` on uninitialized projects before dispatching a CEO — don't manually write factory.md. Co-Authored-By: Claude Opus 4.6 (1M context) --- factory/agents/prompts/refactory.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/factory/agents/prompts/refactory.md b/factory/agents/prompts/refactory.md index 404060d3..618b4051 100644 --- a/factory/agents/prompts/refactory.md +++ b/factory/agents/prompts/refactory.md @@ -33,6 +33,10 @@ You have access to the full factory CLI. Key commands: - `factory tmux-stop --session ` — Stop a tmux session - `factory tmux-stop --path ` — Stop session by project path +### Project Setup +- `factory discover ` — Introspect a project, generate eval profile + factory.md automatically. **Use this first on any uninitialized project** — it detects language, framework, test commands, and builds the eval harness. +- `factory init ` — Parse an existing factory.md into .factory/config.json. Only needed after manually editing factory.md. + ### Project Intelligence - `factory eval ` — Run eval, get current composite score - `factory history ` — Show experiment history (TSV) @@ -91,12 +95,20 @@ Your mental model is: You track which projects exist, what their current scores are, what's in their backlogs, and whether CEO runs are active. You don't track individual code changes. -### 3. Dispatch Based on Intent +### 3. Initialize Before Dispatch + +Before dispatching a CEO on any project, check `factory status `. If the state is `no_factory`, the project needs setup first: +1. Run `factory discover ` — this introspects the codebase and generates the eval profile and factory.md automatically +2. Do NOT manually write factory.md or call `factory init` directly — `discover` handles everything +3. After discover completes, the CEO can run normally + +### 4. Dispatch Based on Intent When the user says "work on X": 1. Determine the project path (ask if ambiguous) 2. Check if a CEO session is already running for that project (`factory tmux-ls`) -3. Choose the right dispatch mode: +3. Check `factory status ` — if `no_factory`, run `factory discover ` first +4. Choose the right dispatch mode: - `factory tmux --loop` for ongoing improvement - `factory ceo --focus "item"` for targeted single-item work - `factory ceo --mode design` for brainstorming what to work on From 1a9dccb5a22ce9274c97a1ecd2a7b13278034ec0 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 26 Jun 2026 17:21:20 +0000 Subject: [PATCH 16/21] feat: enforce factory tmux over factory ceo in refactory prompt The refactory agent should always dispatch via `factory tmux` so CEO sessions run in detached tmux windows that survive independently. `factory ceo` blocks the shell and dies on compaction. Also fixed rule numbering after inserting the discover rule. Co-Authored-By: Claude Opus 4.6 (1M context) --- factory/agents/prompts/refactory.md | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/factory/agents/prompts/refactory.md b/factory/agents/prompts/refactory.md index 618b4051..97a0055f 100644 --- a/factory/agents/prompts/refactory.md +++ b/factory/agents/prompts/refactory.md @@ -77,13 +77,16 @@ Do not store project data here. Project state lives in each project's `.factory/ ### 1. Never Implement Code Directly -You do not write code, fix bugs, run tests, or edit source files. You are a supervisor. When something needs to be built or fixed, you dispatch a CEO run: +You do not write code, fix bugs, run tests, or edit source files. You are a supervisor. When something needs to be built or fixed, you dispatch a CEO run via `factory tmux`: ```bash -factory ceo /path/to/project --focus "the thing to build" -factory tmux /path/to/project --loop +factory tmux /path/to/project # single cycle in tmux +factory tmux /path/to/project --loop # continuous loop in tmux +factory tmux /path/to/project --focus "item" # targeted build in tmux ``` +**Always use `factory tmux`**, not `factory ceo`. The tmux dispatch runs CEO sessions in detached tmux windows that survive independently of your session. `factory ceo` runs in the foreground and blocks — never use it from your Bash tool, as it ties up a shell and dies if your session compacts. + The CEO handles the full experiment lifecycle — it has its own specialist agents (Builder, QA, Researcher, Strategist, Archivist) for all technical work. ### 2. Think in Projects and Cycles @@ -110,11 +113,11 @@ When the user says "work on X": 3. Check `factory status ` — if `no_factory`, run `factory discover ` first 4. Choose the right dispatch mode: - `factory tmux --loop` for ongoing improvement - - `factory ceo --focus "item"` for targeted single-item work - - `factory ceo --mode design` for brainstorming what to work on - - `factory ceo --mode research` for research-driven improvement + - `factory tmux --focus "item"` for targeted single-item work + - `factory tmux --mode design` for brainstorming what to work on + - `factory tmux --mode research` for research-driven improvement -### 4. Monitor Proactively +### 5. Monitor Proactively While CEO sessions are running: - Periodically check `factory tmux-ls` for session status @@ -122,7 +125,7 @@ While CEO sessions are running: - Run `factory eval ` to check scores - Report findings back to the user -### 5. Review Completed Work +### 6. Review Completed Work After a CEO cycle completes: 1. Read the project's `.factory/reviews/ceo-latest.md` @@ -130,7 +133,7 @@ After a CEO cycle completes: 3. Run `factory history ` to see the experiment record 4. Summarize: what was attempted, what was the verdict, what's the score delta -### 6. Preserve Context Across Sessions +### 7. Preserve Context Across Sessions You are the persistent layer. When CEO sessions compact or restart, context is lost. You retain the big picture: - Which hypotheses have been tried @@ -140,7 +143,7 @@ You are the persistent layer. When CEO sessions compact or restart, context is l Use `factory checkpoint ` before long runs and `factory resume ` after crashes. -### 7. Curate Playbooks +### 8. Curate Playbooks Periodically trigger playbook evolution via `factory ace` to distill experiment outcomes into agent behavior rules. Review with `factory ace-stats`. This is how the factory's agents improve over time. From c92276e7d6fc63c447f9e7747f9545821edabbdb Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 26 Jun 2026 17:28:16 +0000 Subject: [PATCH 17/21] feat: enforce --tmux-persist for all CEO dispatch Always pass --tmux-persist so CEO runs are interactive in tmux windows, not headless. Users can attach and watch. Updated both the agent prompt and the factory-run skill file. Co-Authored-By: Claude Opus 4.6 (1M context) --- factory/agents/prompts/refactory.md | 18 +++++++------- factory/agents/skills/factory-run.md | 36 ++++++++++++++-------------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/factory/agents/prompts/refactory.md b/factory/agents/prompts/refactory.md index 97a0055f..45d74e7f 100644 --- a/factory/agents/prompts/refactory.md +++ b/factory/agents/prompts/refactory.md @@ -80,12 +80,12 @@ Do not store project data here. Project state lives in each project's `.factory/ You do not write code, fix bugs, run tests, or edit source files. You are a supervisor. When something needs to be built or fixed, you dispatch a CEO run via `factory tmux`: ```bash -factory tmux /path/to/project # single cycle in tmux -factory tmux /path/to/project --loop # continuous loop in tmux -factory tmux /path/to/project --focus "item" # targeted build in tmux +factory tmux /path/to/project --tmux-persist # single cycle in tmux +factory tmux /path/to/project --tmux-persist --loop # continuous loop in tmux +factory tmux /path/to/project --tmux-persist --focus "item" # targeted build in tmux ``` -**Always use `factory tmux`**, not `factory ceo`. The tmux dispatch runs CEO sessions in detached tmux windows that survive independently of your session. `factory ceo` runs in the foreground and blocks — never use it from your Bash tool, as it ties up a shell and dies if your session compacts. +**Always use `factory tmux --tmux-persist`**, not `factory ceo`. The `--tmux-persist` flag runs the CEO interactively in a tmux window (not headless) so the user can attach and watch. Without it, the CEO runs headless in the background. The tmux dispatch runs CEO sessions in detached tmux windows that survive independently of your session. The CEO handles the full experiment lifecycle — it has its own specialist agents (Builder, QA, Researcher, Strategist, Archivist) for all technical work. @@ -111,11 +111,11 @@ When the user says "work on X": 1. Determine the project path (ask if ambiguous) 2. Check if a CEO session is already running for that project (`factory tmux-ls`) 3. Check `factory status ` — if `no_factory`, run `factory discover ` first -4. Choose the right dispatch mode: - - `factory tmux --loop` for ongoing improvement - - `factory tmux --focus "item"` for targeted single-item work - - `factory tmux --mode design` for brainstorming what to work on - - `factory tmux --mode research` for research-driven improvement +4. Choose the right dispatch mode (always include `--tmux-persist`): + - `factory tmux --tmux-persist --loop` for ongoing improvement + - `factory tmux --tmux-persist --focus "item"` for targeted single-item work + - `factory tmux --tmux-persist --mode design` for brainstorming what to work on + - `factory tmux --tmux-persist --mode research` for research-driven improvement ### 5. Monitor Proactively diff --git a/factory/agents/skills/factory-run.md b/factory/agents/skills/factory-run.md index 90a96b01..5f3b9216 100644 --- a/factory/agents/skills/factory-run.md +++ b/factory/agents/skills/factory-run.md @@ -2,34 +2,35 @@ Use this skill to launch, monitor, and manage factory CEO runs. +**Always use `factory tmux --tmux-persist`** for all dispatch. This runs the CEO interactively in a tmux window (not headless) so you or the user can attach and watch. + ## Dispatch Modes **Long-running improvement (preferred for multi-project):** ```bash -factory tmux --loop -factory tmux --loop --interval 1800 # custom interval (seconds) +factory tmux --tmux-persist --loop +factory tmux --tmux-persist --loop --interval 1800 # custom interval (seconds) ``` -Runs in a detached tmux session. Use this when managing multiple projects — sessions persist and you can check back later. +Runs in a detached tmux session. Sessions persist and you can check back later. -**Single blocking cycle:** +**Single cycle:** ```bash -factory ceo +factory tmux --tmux-persist ``` -Runs in foreground, blocks until the cycle completes. Use when you want to immediately process results after completion. **Targeted single-item build:** ```bash -factory ceo --focus "" -factory ceo --focus 42 # GitHub issue number -factory ceo --focus "owner/repo#42" +factory tmux --tmux-persist --focus "" +factory tmux --tmux-persist --focus 42 # GitHub issue number +factory tmux --tmux-persist --focus "owner/repo#42" ``` **Mode selection:** ```bash -factory ceo --mode improve # default — score-driven improvement -factory ceo --mode design # brainstorm what to work on first -factory ceo --mode research # research-driven improvement -factory ceo --mode meta # improve the factory itself + ACE evolution +factory tmux --tmux-persist --mode improve # default — score-driven improvement +factory tmux --tmux-persist --mode design # brainstorm what to work on first +factory tmux --tmux-persist --mode research # research-driven improvement +factory tmux --tmux-persist --mode meta # improve the factory itself + ACE evolution ``` ## Monitor Running Sessions @@ -57,10 +58,9 @@ factory tmux-stop --path | Scenario | Command | |---|---| -| Managing 2+ projects simultaneously | `factory tmux --loop` for each | -| User asks "work on this project" | `factory tmux --loop` | -| User asks to build one specific thing | `factory ceo --focus ""` | -| User wants to discuss what to work on | `factory ceo --mode design` | -| Quick one-off improvement | `factory ceo ` | +| Managing 2+ projects simultaneously | `factory tmux --tmux-persist --loop` for each | +| User asks "work on this project" | `factory tmux --tmux-persist` | +| User asks to build one specific thing | `factory tmux --tmux-persist --focus ""` | +| User wants to discuss what to work on | `factory tmux --tmux-persist --mode design` | Always check `factory tmux-ls` before dispatching to avoid launching duplicate sessions for the same project. From 27d71120e44e5a73119eb32e7ee801707fd0bfb4 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 26 Jun 2026 17:45:31 +0000 Subject: [PATCH 18/21] fix: use factory ceo --tmux-persist, not factory tmux --tmux-persist factory tmux --tmux-persist creates double-nested tmux sessions. The correct command is factory ceo --tmux-persist which launches the CEO in its own tmux session directly. Updated both the agent prompt and the factory-run skill (skill was updated by the rf agent itself during testing). Co-Authored-By: Claude Opus 4.6 (1M context) --- factory/agents/prompts/refactory.md | 18 +++++++------- factory/agents/skills/factory-run.md | 36 ++++++++++++++-------------- 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/factory/agents/prompts/refactory.md b/factory/agents/prompts/refactory.md index 45d74e7f..474cd6c5 100644 --- a/factory/agents/prompts/refactory.md +++ b/factory/agents/prompts/refactory.md @@ -77,15 +77,15 @@ Do not store project data here. Project state lives in each project's `.factory/ ### 1. Never Implement Code Directly -You do not write code, fix bugs, run tests, or edit source files. You are a supervisor. When something needs to be built or fixed, you dispatch a CEO run via `factory tmux`: +You do not write code, fix bugs, run tests, or edit source files. You are a supervisor. When something needs to be built or fixed, you dispatch a CEO run via `factory ceo --tmux-persist`: ```bash -factory tmux /path/to/project --tmux-persist # single cycle in tmux -factory tmux /path/to/project --tmux-persist --loop # continuous loop in tmux -factory tmux /path/to/project --tmux-persist --focus "item" # targeted build in tmux +factory ceo /path/to/project --tmux-persist # single cycle in tmux +factory ceo /path/to/project --tmux-persist --loop # continuous loop in tmux +factory ceo /path/to/project --tmux-persist --focus "item" # targeted build in tmux ``` -**Always use `factory tmux --tmux-persist`**, not `factory ceo`. The `--tmux-persist` flag runs the CEO interactively in a tmux window (not headless) so the user can attach and watch. Without it, the CEO runs headless in the background. The tmux dispatch runs CEO sessions in detached tmux windows that survive independently of your session. +**Always use `factory ceo --tmux-persist`**. The `--tmux-persist` flag runs the CEO interactively in its own tmux session (not headless) so the user can attach and watch. Do NOT use `factory tmux --tmux-persist` — that creates double-nested tmux sessions. The CEO handles the full experiment lifecycle — it has its own specialist agents (Builder, QA, Researcher, Strategist, Archivist) for all technical work. @@ -112,10 +112,10 @@ When the user says "work on X": 2. Check if a CEO session is already running for that project (`factory tmux-ls`) 3. Check `factory status ` — if `no_factory`, run `factory discover ` first 4. Choose the right dispatch mode (always include `--tmux-persist`): - - `factory tmux --tmux-persist --loop` for ongoing improvement - - `factory tmux --tmux-persist --focus "item"` for targeted single-item work - - `factory tmux --tmux-persist --mode design` for brainstorming what to work on - - `factory tmux --tmux-persist --mode research` for research-driven improvement + - `factory ceo --tmux-persist --loop` for ongoing improvement + - `factory ceo --tmux-persist --focus "item"` for targeted single-item work + - `factory ceo --tmux-persist --mode design` for brainstorming what to work on + - `factory ceo --tmux-persist --mode research` for research-driven improvement ### 5. Monitor Proactively diff --git a/factory/agents/skills/factory-run.md b/factory/agents/skills/factory-run.md index 5f3b9216..568975f9 100644 --- a/factory/agents/skills/factory-run.md +++ b/factory/agents/skills/factory-run.md @@ -2,35 +2,35 @@ Use this skill to launch, monitor, and manage factory CEO runs. -**Always use `factory tmux --tmux-persist`** for all dispatch. This runs the CEO interactively in a tmux window (not headless) so you or the user can attach and watch. +**Always use `factory ceo --tmux-persist`** for dispatch. This launches the CEO interactively in its own tmux session (not headless) so the user can attach and watch. Do NOT use `factory tmux --tmux-persist` — that creates double-nested tmux sessions. ## Dispatch Modes -**Long-running improvement (preferred for multi-project):** +**Single cycle (default):** ```bash -factory tmux --tmux-persist --loop -factory tmux --tmux-persist --loop --interval 1800 # custom interval (seconds) +factory ceo --tmux-persist ``` -Runs in a detached tmux session. Sessions persist and you can check back later. +Launches in a detached tmux session. The user can attach to interact. -**Single cycle:** +**Long-running improvement loop:** ```bash -factory tmux --tmux-persist +factory ceo --tmux-persist --loop +factory ceo --tmux-persist --loop --interval 1800 # custom interval (seconds) ``` **Targeted single-item build:** ```bash -factory tmux --tmux-persist --focus "" -factory tmux --tmux-persist --focus 42 # GitHub issue number -factory tmux --tmux-persist --focus "owner/repo#42" +factory ceo --tmux-persist --focus "" +factory ceo --tmux-persist --focus 42 # GitHub issue number +factory ceo --tmux-persist --focus "owner/repo#42" ``` **Mode selection:** ```bash -factory tmux --tmux-persist --mode improve # default — score-driven improvement -factory tmux --tmux-persist --mode design # brainstorm what to work on first -factory tmux --tmux-persist --mode research # research-driven improvement -factory tmux --tmux-persist --mode meta # improve the factory itself + ACE evolution +factory ceo --tmux-persist --mode improve # default — score-driven improvement +factory ceo --tmux-persist --mode design # brainstorm what to work on first +factory ceo --tmux-persist --mode research # research-driven improvement +factory ceo --tmux-persist --mode meta # improve the factory itself + ACE evolution ``` ## Monitor Running Sessions @@ -58,9 +58,9 @@ factory tmux-stop --path | Scenario | Command | |---|---| -| Managing 2+ projects simultaneously | `factory tmux --tmux-persist --loop` for each | -| User asks "work on this project" | `factory tmux --tmux-persist` | -| User asks to build one specific thing | `factory tmux --tmux-persist --focus ""` | -| User wants to discuss what to work on | `factory tmux --tmux-persist --mode design` | +| Managing 2+ projects simultaneously | `factory ceo --tmux-persist --loop` for each | +| User asks "work on this project" | `factory ceo --tmux-persist` | +| User asks to build one specific thing | `factory ceo --tmux-persist --focus ""` | +| User wants to discuss what to work on | `factory ceo --tmux-persist --mode design` | Always check `factory tmux-ls` before dispatching to avoid launching duplicate sessions for the same project. From 80b7abd3b2fde0967f7861196a4f6b1b8978166b Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 26 Jun 2026 18:45:36 +0000 Subject: [PATCH 19/21] fix: factory tmux runs interactive CEO, not headless MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changed _build_tmux_run_args to use `factory ceo` (not `factory run`) inside the tmux session. `factory ceo` defaults to interactive mode — a normal claude session the user can attach to and interact with. `factory run` was headless, requiring --tmux-persist which caused double-nested tmux sessions. Updated agent prompt and factory-run skill to use plain `factory tmux` without --tmux-persist (no longer needed). Co-Authored-By: Claude Opus 4.6 (1M context) --- factory/agents/prompts/refactory.md | 20 +++++++++---------- factory/agents/skills/factory-run.md | 30 ++++++++++++++-------------- factory/cli.py | 8 ++++++-- 3 files changed, 31 insertions(+), 27 deletions(-) diff --git a/factory/agents/prompts/refactory.md b/factory/agents/prompts/refactory.md index 474cd6c5..f8431af4 100644 --- a/factory/agents/prompts/refactory.md +++ b/factory/agents/prompts/refactory.md @@ -77,15 +77,15 @@ Do not store project data here. Project state lives in each project's `.factory/ ### 1. Never Implement Code Directly -You do not write code, fix bugs, run tests, or edit source files. You are a supervisor. When something needs to be built or fixed, you dispatch a CEO run via `factory ceo --tmux-persist`: +You do not write code, fix bugs, run tests, or edit source files. You are a supervisor. When something needs to be built or fixed, you dispatch a CEO run via `factory tmux`: ```bash -factory ceo /path/to/project --tmux-persist # single cycle in tmux -factory ceo /path/to/project --tmux-persist --loop # continuous loop in tmux -factory ceo /path/to/project --tmux-persist --focus "item" # targeted build in tmux +factory tmux /path/to/project # single cycle in tmux +factory tmux /path/to/project --loop # continuous loop in tmux +factory tmux /path/to/project --focus "item" # targeted build in tmux ``` -**Always use `factory ceo --tmux-persist`**. The `--tmux-persist` flag runs the CEO interactively in its own tmux session (not headless) so the user can attach and watch. Do NOT use `factory tmux --tmux-persist` — that creates double-nested tmux sessions. +**Always use `factory tmux`** to dispatch CEO runs. This creates a detached tmux session with an interactive CEO inside — the user can attach and watch. The CEO runs as a normal interactive `claude` session (not headless). The CEO handles the full experiment lifecycle — it has its own specialist agents (Builder, QA, Researcher, Strategist, Archivist) for all technical work. @@ -111,11 +111,11 @@ When the user says "work on X": 1. Determine the project path (ask if ambiguous) 2. Check if a CEO session is already running for that project (`factory tmux-ls`) 3. Check `factory status ` — if `no_factory`, run `factory discover ` first -4. Choose the right dispatch mode (always include `--tmux-persist`): - - `factory ceo --tmux-persist --loop` for ongoing improvement - - `factory ceo --tmux-persist --focus "item"` for targeted single-item work - - `factory ceo --tmux-persist --mode design` for brainstorming what to work on - - `factory ceo --tmux-persist --mode research` for research-driven improvement +4. Choose the right dispatch mode: + - `factory tmux --loop` for ongoing improvement + - `factory tmux --focus "item"` for targeted single-item work + - `factory tmux --mode design` for brainstorming what to work on + - `factory tmux --mode research` for research-driven improvement ### 5. Monitor Proactively diff --git a/factory/agents/skills/factory-run.md b/factory/agents/skills/factory-run.md index 568975f9..c06d1487 100644 --- a/factory/agents/skills/factory-run.md +++ b/factory/agents/skills/factory-run.md @@ -2,35 +2,35 @@ Use this skill to launch, monitor, and manage factory CEO runs. -**Always use `factory ceo --tmux-persist`** for dispatch. This launches the CEO interactively in its own tmux session (not headless) so the user can attach and watch. Do NOT use `factory tmux --tmux-persist` — that creates double-nested tmux sessions. +**Always use `factory tmux`** for dispatch. This creates a detached tmux session with an interactive CEO inside — the user can attach and watch. The CEO runs as a normal `claude` session (not headless). ## Dispatch Modes **Single cycle (default):** ```bash -factory ceo --tmux-persist +factory tmux ``` Launches in a detached tmux session. The user can attach to interact. **Long-running improvement loop:** ```bash -factory ceo --tmux-persist --loop -factory ceo --tmux-persist --loop --interval 1800 # custom interval (seconds) +factory tmux --loop +factory tmux --loop --interval 1800 # custom interval (seconds) ``` **Targeted single-item build:** ```bash -factory ceo --tmux-persist --focus "" -factory ceo --tmux-persist --focus 42 # GitHub issue number -factory ceo --tmux-persist --focus "owner/repo#42" +factory tmux --focus "" +factory tmux --focus 42 # GitHub issue number +factory tmux --focus "owner/repo#42" ``` **Mode selection:** ```bash -factory ceo --tmux-persist --mode improve # default — score-driven improvement -factory ceo --tmux-persist --mode design # brainstorm what to work on first -factory ceo --tmux-persist --mode research # research-driven improvement -factory ceo --tmux-persist --mode meta # improve the factory itself + ACE evolution +factory tmux --mode improve # default — score-driven improvement +factory tmux --mode design # brainstorm what to work on first +factory tmux --mode research # research-driven improvement +factory tmux --mode meta # improve the factory itself + ACE evolution ``` ## Monitor Running Sessions @@ -58,9 +58,9 @@ factory tmux-stop --path | Scenario | Command | |---|---| -| Managing 2+ projects simultaneously | `factory ceo --tmux-persist --loop` for each | -| User asks "work on this project" | `factory ceo --tmux-persist` | -| User asks to build one specific thing | `factory ceo --tmux-persist --focus ""` | -| User wants to discuss what to work on | `factory ceo --tmux-persist --mode design` | +| Managing 2+ projects simultaneously | `factory tmux --loop` for each | +| User asks "work on this project" | `factory tmux ` | +| User asks to build one specific thing | `factory tmux --focus ""` | +| User wants to discuss what to work on | `factory tmux --mode design` | Always check `factory tmux-ls` before dispatching to avoid launching duplicate sessions for the same project. diff --git a/factory/cli.py b/factory/cli.py index 29929908..258cae5b 100644 --- a/factory/cli.py +++ b/factory/cli.py @@ -3142,8 +3142,12 @@ def _tmux_available() -> bool: def _build_tmux_run_args(args: argparse.Namespace, project_path: Path, model: str | None) -> str: - """Build the 'factory run ...' command string from parsed args.""" - parts = [f"factory run {project_path}"] + """Build the 'factory ceo ...' command string from parsed args. + + Uses 'factory ceo' (not 'factory run') so the session inside tmux + is interactive — the user can attach and interact with the CEO directly. + """ + parts = [f"factory ceo {project_path}"] if args.mode: parts.append(f"--mode {args.mode}") if args.loop: From a28a2731757dcd4c3b76cd0703076dbc374db632 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Fri, 26 Jun 2026 20:45:20 +0000 Subject: [PATCH 20/21] fix: strip --loop/--interval/--max-cycles from factory tmux command factory tmux now runs `factory ceo` inside tmux, but --loop, --interval, and --max-cycles are factory-run-only flags that factory ceo doesn't accept. Passing them caused factory ceo to crash with "unrecognized arguments" and the tmux session died immediately. Co-Authored-By: Claude Opus 4.6 (1M context) --- factory/cli.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/factory/cli.py b/factory/cli.py index 258cae5b..3ed37283 100644 --- a/factory/cli.py +++ b/factory/cli.py @@ -3146,16 +3146,12 @@ def _build_tmux_run_args(args: argparse.Namespace, project_path: Path, model: st Uses 'factory ceo' (not 'factory run') so the session inside tmux is interactive — the user can attach and interact with the CEO directly. + --loop/--interval/--max-cycles are factory-run-only flags and are + NOT forwarded to factory ceo. """ parts = [f"factory ceo {project_path}"] if args.mode: parts.append(f"--mode {args.mode}") - if args.loop: - parts.append("--loop") - if args.interval: - parts.append(f"--interval {args.interval}") - if args.max_cycles is not None: - parts.append(f"--max-cycles {args.max_cycles}") if model: parts.append(f"--model {shlex.quote(model)}") if getattr(args, "no_github", False): From 83f5c0c7051f89eef94a3ea20888d3e7ac850e64 Mon Sep 17 00:00:00 2001 From: Kai Xu Date: Sat, 27 Jun 2026 03:20:07 +0000 Subject: [PATCH 21/21] fix: update tmux tests for factory ceo (not factory run) inside tmux Tests expected `factory run` and `--loop`/`--interval`/`--max-cycles` in the tmux command, but we changed to `factory ceo` (interactive) which doesn't accept those flags. Co-Authored-By: Claude Opus 4.6 (1M context) --- tests/test_cli.py | 4 ++-- tests/test_tmux_cli.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 85d91d94..ed7116e2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1561,7 +1561,7 @@ def test_cmd_home_returns_package_root(self, capsys): class TestCmdTmuxBareCLI: def test_tmux_command_uses_bare_factory(self): - """cmd_tmux generates a shell command using bare 'factory run', not uv run.""" + """cmd_tmux generates a shell command using bare 'factory ceo', not uv run.""" from factory.cli import cmd_tmux import argparse @@ -1590,7 +1590,7 @@ def test_tmux_command_uses_bare_factory(self): new_session_call = mock_run.call_args_list[1] shell_cmd = new_session_call[0][0][-1] # last arg is the shell command - assert "factory run" in shell_cmd + assert "factory ceo" in shell_cmd assert "uv run python -m factory" not in shell_cmd assert "cd " not in shell_cmd assert "source .venv/bin/activate" not in shell_cmd diff --git a/tests/test_tmux_cli.py b/tests/test_tmux_cli.py index 0c4153a2..34b80813 100644 --- a/tests/test_tmux_cli.py +++ b/tests/test_tmux_cli.py @@ -133,9 +133,9 @@ def test_propagates_all_flags(self) -> None: result = _build_tmux_run_args(args, Path("/tmp/project"), "opus-4") assert "--mode improve" in result - assert "--loop" in result - assert "--interval 900" in result - assert "--max-cycles 5" in result + assert "--loop" not in result + assert "--interval" not in result + assert "--max-cycles" not in result assert "--model" in result assert "--no-github" in result assert "--profile" in result @@ -172,7 +172,7 @@ def test_minimal_args(self) -> None: bg_agents=False, tmux_persist=False, use_profile=False, ) result = _build_tmux_run_args(args, Path("/tmp/p"), None) - assert result == "factory run /tmp/p" + assert result == "factory ceo /tmp/p" class TestCmdTmuxStop: