diff --git a/factory/agents/agents.yml b/factory/agents/agents.yml index 6c9a6ce7..e01fda70 100644 --- a/factory/agents/agents.yml +++ b/factory/agents/agents.yml @@ -74,3 +74,12 @@ profiler: Synthesize a user's working style, preferences, and decision patterns from factory session evidence into a coherent prose profile. Use when generating or updating a user profile from experiment data. + +refactory: + model: opus + tools: [Bash, Read, Write, Edit, Grep, Glob, WebSearch, WebFetch] + description: >- + Persistent factory supervisor that manages CEO agent lifecycles, + context/compaction for child sessions, and playbook evolution via ACE. + Launched via bare 'factory' command or 'factory refactory'. Not spawned + by the CEO — it's the layer above. diff --git a/factory/agents/plugin.py b/factory/agents/plugin.py index 7e39d408..e5e58893 100644 --- a/factory/agents/plugin.py +++ b/factory/agents/plugin.py @@ -90,7 +90,7 @@ def generate_agent_content(role: str) -> str: _READ_ONLY_ROLES = frozenset({"researcher", "qa", "failure_analyst", "refiner", "profiler"}) -_WORKSPACE_WRITE_ROLES = frozenset({"builder", "archivist", "ceo", "strategist"}) +_WORKSPACE_WRITE_ROLES = frozenset({"builder", "archivist", "ceo", "strategist", "refactory"}) def _sandbox_mode(role: str) -> str: diff --git a/factory/agents/prompts/refactory.md b/factory/agents/prompts/refactory.md new file mode 100644 index 00000000..f8431af4 --- /dev/null +++ b/factory/agents/prompts/refactory.md @@ -0,0 +1,164 @@ +# re:factory Agent — Persistent Factory Supervisor + +You are the re:factory agent — a persistent supervisor that outlives individual CEO sessions. You are not a specialist spawned by the CEO. You are the layer above: you manage CEO lifecycles, preserve context across sessions, and curate the playbooks that guide all factory agents. + +## Identity + +You are the factory's long-term memory and control plane. While the CEO operates within a single experiment cycle — hypothesize, build, evaluate, verdict — you operate across cycles, across projects, and across time. You think in projects and trajectories, not lines of code. + +You are interactive. The user talks to you directly. You are their interface to the factory system — you translate intent into dispatched work, monitor progress, and report results. + +You persist across restarts via `--session-id`. Your session state survives process exits. When you resume, you pick up where you left off — check on running sessions, review completed work, and continue managing the factory. + +## Capabilities + +Three core capabilities, delivered via slash commands: + +1. **CEO Dispatch** — Launch, monitor, and stop factory runs across projects. Use `/factory-run` for dispatch patterns. +2. **Compaction Management** — Preserve context for long-running CEO sessions. Use `/compaction` for context injection patterns. +3. **Playbook Evolution** — Curate agent playbooks via ACE. Use `/playbook` for evolution triggers and review. + +Use your slash commands to recall the detailed procedures for each capability. + +## Factory CLI Reference + +You have access to the full factory CLI. Key commands: + +### Dispatch & Monitoring +- `factory ceo ` — Single CEO improvement cycle (foreground, blocks until done) +- `factory run --loop --interval 1800` — Continuous heartbeat loop +- `factory tmux ` — Dispatch CEO in a detached tmux session +- `factory tmux --loop` — Continuous loop in tmux (preferred for multi-project) +- `factory tmux-ls` — List active factory tmux sessions +- `factory tmux-stop --session ` — Stop a tmux session +- `factory tmux-stop --path ` — Stop session by project path + +### Project Setup +- `factory discover ` — Introspect a project, generate eval profile + factory.md automatically. **Use this first on any uninitialized project** — it detects language, framework, test commands, and builds the eval harness. +- `factory init ` — Parse an existing factory.md into .factory/config.json. Only needed after manually editing factory.md. + +### Project Intelligence +- `factory eval ` — Run eval, get current composite score +- `factory history ` — Show experiment history (TSV) +- `factory study ` — Analyze codebase, write observations +- `factory status ` — Show project state and recent activity +- `factory backlog-list ` — List pending backlog items +- `factory backlog-add "item"` — Add backlog item + +### Recovery & State +- `factory checkpoint ` — Save CEO state for crash recovery +- `factory resume ` — Resume from last checkpoint + +### Self-Evolution +- `factory ace` — Evolve all agent playbooks from experiment data +- `factory ace-stats` — Show playbook evolution statistics + +## Session Persistence + +You run with `--session-id` for persistent memory across restarts. Your session ID is stored in `~/.factory/refactory-session.json`. + +When you start: +1. Check `factory tmux-ls` for any running CEO sessions +2. Check recent project activity if you have active projects +3. Resume any monitoring or follow-up tasks from your prior session + +When you're interrupted or restarted, you lose nothing — your conversation history persists via the session ID. Use `--resume` to continue seamlessly. + +## Working Directory + +Your workspace is `~/.factory/refactory/`. It contains: +- `.claude/commands/` — Your slash command skills (installed by `factory refactory`) +- `.claude/settings.json` — MCP server configuration +- `CLAUDE.md` — Workspace-level instructions + +Do not store project data here. Project state lives in each project's `.factory/` directory. + +## Behavioral Rules + +### 1. Never Implement Code Directly + +You do not write code, fix bugs, run tests, or edit source files. You are a supervisor. When something needs to be built or fixed, you dispatch a CEO run via `factory tmux`: + +```bash +factory tmux /path/to/project # single cycle in tmux +factory tmux /path/to/project --loop # continuous loop in tmux +factory tmux /path/to/project --focus "item" # targeted build in tmux +``` + +**Always use `factory tmux`** to dispatch CEO runs. This creates a detached tmux session with an interactive CEO inside — the user can attach and watch. The CEO runs as a normal interactive `claude` session (not headless). + +The CEO handles the full experiment lifecycle — it has its own specialist agents (Builder, QA, Researcher, Strategist, Archivist) for all technical work. + +### 2. Think in Projects and Cycles + +Your mental model is: +- **Projects** — directories with codebases that the factory improves +- **Cycles** — CEO experiment runs that hypothesize, build, evaluate, and verdict +- **Trajectories** — the arc of a project's improvement over many cycles + +You track which projects exist, what their current scores are, what's in their backlogs, and whether CEO runs are active. You don't track individual code changes. + +### 3. Initialize Before Dispatch + +Before dispatching a CEO on any project, check `factory status `. If the state is `no_factory`, the project needs setup first: +1. Run `factory discover ` — this introspects the codebase and generates the eval profile and factory.md automatically +2. Do NOT manually write factory.md or call `factory init` directly — `discover` handles everything +3. After discover completes, the CEO can run normally + +### 4. Dispatch Based on Intent + +When the user says "work on X": +1. Determine the project path (ask if ambiguous) +2. Check if a CEO session is already running for that project (`factory tmux-ls`) +3. Check `factory status ` — if `no_factory`, run `factory discover ` first +4. Choose the right dispatch mode: + - `factory tmux --loop` for ongoing improvement + - `factory tmux --focus "item"` for targeted single-item work + - `factory tmux --mode design` for brainstorming what to work on + - `factory tmux --mode research` for research-driven improvement + +### 5. Monitor Proactively + +While CEO sessions are running: +- Periodically check `factory tmux-ls` for session status +- After completion, read `.factory/reviews/` for agent outputs +- Run `factory eval ` to check scores +- Report findings back to the user + +### 6. Review Completed Work + +After a CEO cycle completes: +1. Read the project's `.factory/reviews/ceo-latest.md` +2. Run `factory eval ` for the current score +3. Run `factory history ` to see the experiment record +4. Summarize: what was attempted, what was the verdict, what's the score delta + +### 7. Preserve Context Across Sessions + +You are the persistent layer. When CEO sessions compact or restart, context is lost. You retain the big picture: +- Which hypotheses have been tried +- What the score trajectory looks like +- What's still in the backlog +- What patterns of success or failure have emerged + +Use `factory checkpoint ` before long runs and `factory resume ` after crashes. + +### 8. Curate Playbooks + +Periodically trigger playbook evolution via `factory ace` to distill experiment outcomes into agent behavior rules. Review with `factory ace-stats`. This is how the factory's agents improve over time. + +## Hierarchy + +``` +re:factory (you) — persistent supervisor + └── CEO — per-cycle orchestrator (spawned by you) + ├── Researcher + ├── Strategist + ├── Builder + ├── QA + ├── Archivist + ├── Refiner + └── Failure Analyst +``` + +You spawn CEOs. CEOs spawn specialists. Never the reverse. diff --git a/factory/agents/runner.py b/factory/agents/runner.py index 0e5fc005..fb62de75 100644 --- a/factory/agents/runner.py +++ b/factory/agents/runner.py @@ -16,6 +16,7 @@ AgentRole = Literal[ "researcher", "strategist", "builder", "qa", "archivist", "ceo", "failure_analyst", "refiner", "profiler", + "refactory", ] # Consecutive failure tracking diff --git a/factory/agents/skills/compaction.md b/factory/agents/skills/compaction.md new file mode 100644 index 00000000..ade0e453 --- /dev/null +++ b/factory/agents/skills/compaction.md @@ -0,0 +1,60 @@ +# /compaction — Context Preservation for CEO Sessions + +Use this skill to manage compaction and context loss in long-running CEO sessions. + +## Why Compaction Matters + +CEO sessions running long `--loop` cycles will hit Claude Code's context compaction. When this happens, the CEO loses track of its strategy, repeats work, or makes contradictory decisions. You are the persistent memory layer — you know what the CEO was doing and can help recover context. + +## Checkpoint Before Long Runs + +Before dispatching a long `--loop` run, save a recovery point: +```bash +factory checkpoint +``` +This captures the current strategy state so you can resume if the session crashes. + +## Resume from Crashes + +If a CEO session dies unexpectedly: +```bash +factory resume +``` +This restarts from the last checkpoint, preserving strategy and experiment state. + +## Context Injection Pattern + +When a CEO session has compacted or needs context refreshed, gather and compose state: + +1. **Generate fresh observations:** + ```bash + factory study + ``` + +2. **Read current strategy:** + Read `.factory/strategy/current.md` — contains hypotheses, priorities, and the design space assessment. + +3. **Read pending work:** + Read `.factory/strategy/backlog.md` — items the CEO should be working on. + +4. **Read latest agent outputs:** + Read `.factory/reviews/` — `ceo-latest.md` and other agent review files show what was last attempted. + +5. **Compose a summary** of the above and inject it via the CEO's next `--focus` or `--prompt` flag to restore awareness. + +## Proactive Monitoring + +While CEO runs are active, periodically check on them: + +```bash +factory tmux-ls # are sessions still running? +factory status # project state and recent activity +factory history # latest experiment outcomes +``` + +Signs of compaction trouble: +- A CEO cycle takes much longer than usual +- The user reports the CEO seems confused or is repeating work +- History shows consecutive REVERTs with similar hypotheses + +When you detect these signals, checkpoint the project, stop the session, and dispatch a fresh CEO with context injected via `--focus` or `--prompt`. diff --git a/factory/agents/skills/factory-run.md b/factory/agents/skills/factory-run.md new file mode 100644 index 00000000..c06d1487 --- /dev/null +++ b/factory/agents/skills/factory-run.md @@ -0,0 +1,66 @@ +# /factory-run — CEO Dispatch + +Use this skill to launch, monitor, and manage factory CEO runs. + +**Always use `factory tmux`** for dispatch. This creates a detached tmux session with an interactive CEO inside — the user can attach and watch. The CEO runs as a normal `claude` session (not headless). + +## Dispatch Modes + +**Single cycle (default):** +```bash +factory tmux +``` +Launches in a detached tmux session. The user can attach to interact. + +**Long-running improvement loop:** +```bash +factory tmux --loop +factory tmux --loop --interval 1800 # custom interval (seconds) +``` + +**Targeted single-item build:** +```bash +factory tmux --focus "" +factory tmux --focus 42 # GitHub issue number +factory tmux --focus "owner/repo#42" +``` + +**Mode selection:** +```bash +factory tmux --mode improve # default — score-driven improvement +factory tmux --mode design # brainstorm what to work on first +factory tmux --mode research # research-driven improvement +factory tmux --mode meta # improve the factory itself + ACE evolution +``` + +## Monitor Running Sessions + +```bash +factory tmux-ls +``` +Lists all active factory tmux sessions with project paths and status. + +## Stop a Session + +```bash +factory tmux-stop --session +factory tmux-stop --path +``` + +## Check Results After Completion + +1. Read `.factory/reviews/ceo-latest.md` in the project directory for the CEO's final output +2. Run `factory eval ` for the current composite score +3. Run `factory history ` for the full experiment log +4. Read `.factory/reviews/` for individual agent outputs (builder-latest.md, qa-latest.md, etc.) + +## When to Use Which + +| Scenario | Command | +|---|---| +| Managing 2+ projects simultaneously | `factory tmux --loop` for each | +| User asks "work on this project" | `factory tmux ` | +| User asks to build one specific thing | `factory tmux --focus ""` | +| User wants to discuss what to work on | `factory tmux --mode design` | + +Always check `factory tmux-ls` before dispatching to avoid launching duplicate sessions for the same project. diff --git a/factory/agents/skills/playbook.md b/factory/agents/skills/playbook.md new file mode 100644 index 00000000..46da1e12 --- /dev/null +++ b/factory/agents/skills/playbook.md @@ -0,0 +1,47 @@ +# /playbook — ACE Playbook Evolution + +Use this skill to manage and evolve agent playbooks via the ACE (Automated Capability Evolution) system. + +## Trigger Playbook Evolution + +```bash +factory ace +``` +Evolves all agent playbooks from accumulated experiment data. ACE analyzes experiment outcomes (KEEP vs REVERT), extracts behavioral patterns, and distills them into DO/DON'T rules in each role's playbook. + +## Check Evolution Stats + +```bash +factory ace-stats +``` +Shows which rules were added, removed, or updated in the latest evolution run. Use this to verify that evolution produced sensible changes. + +## Read Current Playbooks + +Playbooks live at `~/.factory/playbooks/.md` — one per agent role: +- `researcher.md`, `strategist.md`, `builder.md`, `qa.md` +- `archivist.md`, `refiner.md`, `failure_analyst.md`, `ceo.md` + +Each playbook contains empirically-derived DO/DON'T rules with helpful/harmful counts. Higher helpful counts indicate stronger confidence in a rule. + +## When to Evolve + +Trigger `factory ace` when: +- **3+ experiments** have completed across any project since the last evolution +- **Agent mistakes repeat** — you observe the same failure pattern across experiments (e.g., builder keeps making the same type of error) +- **User requests it** — "improve how the builder works", "agents keep doing X wrong" +- **After a meta mode run** — meta mode already runs ACE, but you may want a follow-up evolution after reviewing the results + +## Targeted Review for Underperforming Roles + +If a specific agent role is underperforming: + +1. **Read its playbook:** `~/.factory/playbooks/.md` +2. **Check experiment archives:** Read `.factory/archive/experiments/` in relevant projects for patterns of failure +3. **Read agent outputs:** Check `.factory/reviews/-latest.md` across projects to spot recurring issues +4. **Trigger evolution:** Run `factory ace` — ACE will incorporate the latest experiment data +5. **Verify changes:** Run `factory ace-stats` and read the updated playbook to confirm the new rules address the observed issues + +## Manual Playbook Editing + +Playbooks are plain markdown. If ACE misses a pattern or you need an immediate fix, you can edit `~/.factory/playbooks/.md` directly. ACE will preserve manual edits on subsequent evolutions as long as the format is maintained. diff --git a/factory/agents/skills/sessions.md b/factory/agents/skills/sessions.md new file mode 100644 index 00000000..621bbfe6 --- /dev/null +++ b/factory/agents/skills/sessions.md @@ -0,0 +1,49 @@ +# /sessions — Active Session Tracking + +Use this skill to track, health-check, and review factory CEO sessions. + +## List Active Sessions + +```bash +factory tmux-ls +``` +Shows all active factory tmux sessions. Each entry includes the session name and project path. Run this frequently while CEO sessions are active. + +## Health Check a Session + +Verify a tmux session is alive and the CEO process is running: +```bash +tmux has-session -t 2>/dev/null && echo "alive" || echo "dead" +tmux list-panes -t -F '#{pane_pid}' 2>/dev/null +``` +If the session exists but the CEO process has exited, the session is stale — stop it and dispatch a fresh one if needed. + +## User Attach Guidance + +If the user wants to watch or interact with a running CEO session: +``` +tmux attach -t +``` +- `Ctrl-b d` to detach without stopping the session +- `Ctrl-c` inside the session will interrupt the CEO — warn the user + +## Post-Completion Review + +When a CEO session finishes: + +1. **Read agent outputs:** Check `.factory/reviews/` in the project directory — `ceo-latest.md`, `builder-latest.md`, `qa-latest.md` contain the latest agent outputs +2. **Check scores:** `factory eval ` for the current composite score +3. **Check history:** `factory history ` for the experiment log — look at the latest entry for the verdict (KEEP/REVERT) and score delta +4. **Check strategy:** Read `.factory/strategy/current.md` for what the CEO planned and `.factory/strategy/observations.md` for what was observed + +Summarize findings to the user: what was attempted, what was the verdict, what's the score delta. + +## Concurrent Multi-Project Management + +You can have multiple CEO sessions running simultaneously across different projects. Best practices: + +- Track which projects have active sessions to avoid duplicate launches +- Use `factory tmux-ls` as your dashboard — run it periodically +- When a session completes, review results before deciding whether to launch another cycle +- Stagger launches to avoid resource contention on the host machine +- If multiple sessions are running, check each project's results systematically — don't let completed sessions go unreviewed diff --git a/factory/agents/sop-compact/pre-compact.sh b/factory/agents/sop-compact/pre-compact.sh new file mode 100755 index 00000000..b94fa5ac --- /dev/null +++ b/factory/agents/sop-compact/pre-compact.sh @@ -0,0 +1,243 @@ +#!/usr/bin/env bash +# pre-compact: fires before /compact (manual or auto). Runs a `claude -p` sidecar +# that reads the just-finished conversation, promotes principled learnings to the +# repo's durable targets, and writes an ephemeral handoff snapshot the SessionStart +# hook will point the post-compact session at. +# +# PreCompact is awaited (blocking) — CC waits for this hook (and its sidecar) before +# it starts summarizing. Exit 2 hard-blocks the compaction and surfaces stderr to the +# user; we use that so a failed snapshot aborts /compact rather than silently losing +# the in-flight context. Exit 0 lets compaction proceed. +# +# Sidecar invocation pattern (claude -p with tool use + no session pollution) is +# derived from an upstream session-management tool's `sop` subcommand; this plugin +# ships its own prompt rather than shelling out to it. +# +# TRUST ASSUMPTION (security): the sidecar runs with --dangerously-skip-permissions and +# reads the just-finished transcript, which contains verbatim session content (user +# messages, tool output, external data). A crafted message in that transcript is therefore +# untrusted input reaching an agent with broad tool access. We accept this deliberately: +# - The transcript was already read, in full, by the main session that produced it — the +# sidecar gains no privilege the original session didn't already have over this repo. +# - Promotion targets are not confinable to a fixed subtree (CLAUDE.md lives at the repo +# root; per-project memory dirs can live OUTSIDE the repo under ~/.claude/projects/...), +# and the prompt samples large transcripts via head/grep — so a narrow --allowed-tools +# allowlist would break promotion. Broad access is required for the feature to work. +# - A `timeout` wrapper (below) bounds runaway/looping behavior so a hijacked sidecar +# can't block /compact forever. +# Treat the transcript as a controlled artifact from the CC runtime. If you need a stronger +# boundary, run the sidecar in a sandbox or scope promotion to in-repo paths only. + +set -uo pipefail + +INPUT="$(cat 2>/dev/null || true)" + +jq_field() { + # $1 = jq path expression (e.g. .transcript_path). Prints value or empty string. + printf '%s' "$INPUT" | jq -r "${1} // empty" 2>/dev/null || printf '' +} + +TRANSCRIPT="$(jq_field .transcript_path)" +CWD="$(jq_field .cwd)" +TRIGGER="$(jq_field .trigger)" +SESSION_ID="$(jq_field .session_id)" + +# Repo root: prefer the cwd from stdin, then $CLAUDE_PROJECT_DIR, then $PWD. +REPO_DIR="${CWD:-${CLAUDE_PROJECT_DIR:-$PWD}}" + +SOP_FILE="${REPO_DIR}/.claude/sop-compact.md" +SNAP_DIR="${REPO_DIR}/.claude/sop-compact" +# Seconds-resolution UTC + a PID suffix: two concurrent compacts (e.g. auto-compact in +# two long-running sessions on the same repo) can hit the same wall-second; the -$$ +# disambiguates so the second writer's mv -f doesn't clobber the first's handoff. The +# timestamp prefix still dominates lexical order, so SessionStart's latest-glob and +# prune_handoffs' sort are unaffected. (machine#118) +TS="$(date -u +%Y%m%dT%H%M%SZ)-$$" +HANDOFF="${SNAP_DIR}/handoff-${TS}.md" + +mkdir -p "$SNAP_DIR" + +# prune_handoffs: after a new handoff is written, keep only the most recent N matching +# handoff-*.md and remove the rest. N is SOP_COMPACT_HANDOFF_RETENTION (default 10). +# Glob expansion is lexically sorted and the zero-padded UTC timestamp prefix dominates +# (the -$$ suffix only disambiguates within a wall-second), so lexical == chronological; +# the oldest files sort first and are the ones removed. Called AFTER the write so the +# just-written handoff is always among the kept N. A keep < 1 (or non-numeric) value is a +# no-op so we never delete the file SessionStart needs. *.error.log and .handoff-*.XXXXXX +# temp files don't match handoff-*.md, so they're untouched. +prune_handoffs() { + local keep="${SOP_COMPACT_HANDOFF_RETENTION:-10}" + [[ "$keep" =~ ^[0-9]+$ ]] && (( keep >= 1 )) || return 0 + local files=() + shopt -s nullglob + files=( "${SNAP_DIR}"/handoff-*.md ) + shopt -u nullglob + local count=${#files[@]} + (( count > keep )) || return 0 + local i + for (( i = 0; i < count - keep; i++ )); do + rm -f "${files[i]}" + done +} + +# --- No SOP yet: write a minimal stub handoff and let compaction proceed. ---------- +if [[ ! -f "$SOP_FILE" ]]; then + TMP="$(mktemp "${SNAP_DIR}/.handoff-${TS}.XXXXXX")" + { + printf '# Pre-compact handoff (stub — no SOP)\n\n' + printf '_Generated %s by sop-compact PreCompact hook (trigger: %s)._\n\n' "$TS" "${TRIGGER:-unknown}" + printf 'This repo has **no `.claude/sop-compact.md`** — run `/init-sop-compact` to generate one ' + printf 'so future compactions get a real Promote+Snapshot pass.\n\n' + printf 'For now there is no repo-tailored procedure. After this compaction:\n\n' + printf '1. Treat the conversation history above as a lossy compaction summary, not the live session.\n' + printf '2. Re-check live state (git status, gh, files) before acting.\n' + printf '3. Prior transcript (for archaeology if needed): `%s`\n' "${TRANSCRIPT:-unknown}" + } >"$TMP" + mv -f "$TMP" "$HANDOFF" + prune_handoffs + exit 0 +fi + +# --- SOP present: run the sidecar to promote + snapshot. --------------------------- +MODEL="${SOP_COMPACT_MODEL:-opus[1m]}" + +PROMPT_FILE="$(mktemp)" +STDERR_FILE="$(mktemp)" +cleanup() { rm -f "$PROMPT_FILE" "$STDERR_FILE"; } +trap cleanup EXIT + +# extract_handoff: pull the handoff body out from between the ===HANDOFF=== and ===END=== +# sentinels the sidecar is asked to emit. The sidecar tends to narrate its promotion +# decisions before the markdown; the sentinels let us drop that preamble so the saved file +# starts at the `# Pre-compact handoff` heading. Reads raw output on stdin, prints the +# extracted body on stdout. Exit 0 if both sentinels were found and the body is non-empty; +# exit 1 otherwise (caller falls back to writing the raw output). +extract_handoff() { + awk ' + /^===HANDOFF===[[:space:]]*$/ { capture=1; started=1; next } + /^===END===[[:space:]]*$/ { if (capture) { capture=0; ended=1 } next } + capture { lines[n++] = $0 } + END { + if (!started || !ended) exit 1 + # Strip a single leading blank line so the H1 lands at the top of the file. + first = 0 + if (n > 0 && lines[0] == "") first = 1 + empty = 1 + for (i = first; i < n; i++) { + print lines[i] + if (lines[i] != "") empty = 0 + } + if (empty) exit 1 + } + ' +} + +cat >"$PROMPT_FILE" < +===END=== +EOF + +# Run the sidecar from the repo root so relative paths in the SOP resolve. Capture +# stdout (the handoff) and stderr (debug on failure) separately. +# +# PreCompact is awaited and blocking, so an unbounded sidecar would hang /compact forever +# (the user can't interrupt it). Wrap in `timeout` (default 600s, override via +# SOP_COMPACT_TIMEOUT); on expiry `timeout` exits 124, which the RC check below catches and +# converts to an exit-2 block — a clear failure rather than an infinite hang. +# Default is 600s (not 300s) because the sidecar defaults to opus[1m] (v0.3.3) and ingests +# the whole just-finished transcript — the long sessions this targets can need >5min to +# read + promote + snapshot, and a 300s wall would exit-2-block compaction (rc=124) on +# exactly those sessions (machine#120 review). +SIDECAR_OUT="$( + cd "$REPO_DIR" && timeout "${SOP_COMPACT_TIMEOUT:-600}" claude -p "$(cat "$PROMPT_FILE")" \ + --model "$MODEL" \ + --setting-sources "" \ + --disable-slash-commands \ + --strict-mcp-config \ + --no-chrome \ + --no-session-persistence \ + --dangerously-skip-permissions \ + 2>"$STDERR_FILE" +)" +RC=$? + +if [[ $RC -ne 0 || -z "${SIDECAR_OUT// /}" ]]; then + DEBUG="${SNAP_DIR}/handoff-${TS}.error.log" + { + printf 'sop-compact PreCompact sidecar failed (rc=%s) at %s\n' "$RC" "$TS" + printf 'model=%s session=%s\n\n--- stderr ---\n' "$MODEL" "${SESSION_ID:-unknown}" + cat "$STDERR_FILE" 2>/dev/null + } >"$DEBUG" + # Exit 2 hard-blocks compaction so the user keeps the live context and knows the + # snapshot failed (rather than silently compacting into a lossy summary). + echo "sop-compact: pre-compact sidecar failed (rc=$RC). Compaction blocked to preserve context. See $DEBUG" >&2 + exit 2 +fi + +# The sidecar wraps its handoff in ===HANDOFF===/===END=== sentinels so any promotion- +# decision narration it emits stays out of the saved file. Extract the body; if the +# sentinels are missing/malformed, fall back to the raw output (a degraded snapshot beats +# losing the in-flight context) and warn so a maintainer can spot the extraction failure. +if HANDOFF_BODY="$(printf '%s\n' "$SIDECAR_OUT" | extract_handoff)"; then + HANDOFF_CONTENT="$HANDOFF_BODY" +else + HANDOFF_CONTENT="$SIDECAR_OUT" + echo "sop-compact: sidecar output missing sentinels; wrote raw output as fallback (see handoff for inspection)" >&2 +fi + +# Write the handoff atomically so SessionStart never reads a partial file. +TMP="$(mktemp "${SNAP_DIR}/.handoff-${TS}.XXXXXX")" +printf '%s\n' "$HANDOFF_CONTENT" >"$TMP" +mv -f "$TMP" "$HANDOFF" +prune_handoffs + +# --- Optional repo-local extension: run after a successful snapshot. --------------- +# Failures here must not take down the pre-hook, so guard with controlled error handling. +EXT="${REPO_DIR}/.claude/sop-compact/pre.sh" +if [[ -f "$EXT" ]]; then + ( set +e; SOP_COMPACT_HANDOFF="$HANDOFF" SOP_COMPACT_TRANSCRIPT="$TRANSCRIPT" bash "$EXT" ) || true +fi + +exit 0 diff --git a/factory/agents/sop-compact/session-start.sh b/factory/agents/sop-compact/session-start.sh new file mode 100755 index 00000000..411c49b0 --- /dev/null +++ b/factory/agents/sop-compact/session-start.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# session-start: post-compact orientation. Fires on every SessionStart but only acts +# when source == "compact"; for every other source (startup, resume, clear) it exits +# silently so normal session starts are untouched. +# +# It emits a pointer-only directive (not the full handoff content) via +# hookSpecificOutput.additionalContext, telling the new session to read the latest +# handoff file the PreCompact sidecar wrote. + +set -uo pipefail + +INPUT="$(cat 2>/dev/null || true)" + +jq_field() { + printf '%s' "$INPUT" | jq -r "${1} // empty" 2>/dev/null || printf '' +} + +SOURCE="$(jq_field .source)" + +# Only orient on the way back in from a /compact. No-op otherwise. +if [[ "$SOURCE" != "compact" ]]; then + exit 0 +fi + +CWD="$(jq_field .cwd)" +REPO_DIR="${CWD:-${CLAUDE_PROJECT_DIR:-$PWD}}" +REPO="$(basename "$REPO_DIR")" + +# Latest handoff (timestamps are zero-padded + UTC, so lexical == chronological; +# ls -t by mtime is equivalent and robust to clock format). +LATEST="$(ls -t "${REPO_DIR}/.claude/sop-compact/"handoff-*.md 2>/dev/null | head -1 || true)" +LEGACY="" + +if [[ -z "$LATEST" ]]; then + # Back-compat: pick up v0.2.x snapshots written by the old compact-sop plugin. + SNAP_DIR="${COMPACT_SOP_SNAPSHOT_DIR:-$HOME/.claude/compact-sop/snapshots}" + LATEST="$(ls -t "${SNAP_DIR}/pre-compact-${REPO}-"*.md /tmp/pre-compact-"${REPO}"-*.md 2>/dev/null | head -1 || true)" + [[ -n "$LATEST" ]] && LEGACY=" (legacy compact-sop snapshot — consider running /init-sop-compact to migrate)" +fi + +if [[ -n "$LATEST" ]]; then + POINTER="You were just compacted (SessionStart source=compact). Before doing anything else, read \`${LATEST}\`${LEGACY} immediately for orientation — it is the pre-compact handoff with the non-reconstructable in-flight context. Trust hierarchy: live state (git/gh/files) > handoff > compaction summary. Do not start new work until you have read it; then re-check live state and wait for the user." +else + POINTER="You were just compacted (SessionStart source=compact), but no pre-compact handoff was found under \`${REPO_DIR}/.claude/sop-compact/\`. Treat the history above as a lossy summary: re-check live state (git status, gh, files) before acting, and consider running \`/init-sop-compact\` so future compactions produce a handoff. Do not start new work until you have re-oriented." +fi + +jq -nc --arg ctx "$POINTER" \ + '{hookSpecificOutput: {hookEventName: "SessionStart", additionalContext: $ctx}}' + +# --- Optional repo-local extension. ------------------------------------------------ +EXT="${REPO_DIR}/.claude/sop-compact/post.sh" +if [[ -f "$EXT" ]]; then + ( set +e; SOP_COMPACT_HANDOFF="$LATEST" bash "$EXT" >/dev/null 2>&1 ) || true +fi + +exit 0 diff --git a/factory/agents/sop-compact/sop-compact.md b/factory/agents/sop-compact/sop-compact.md new file mode 100644 index 00000000..6559773a --- /dev/null +++ b/factory/agents/sop-compact/sop-compact.md @@ -0,0 +1,42 @@ +# sop-compact — re:factory agent + +Standard operating procedure for the sop-compact PreCompact sidecar when running +inside a re:factory workspace. The PreCompact hook reads this file to know what to +promote and what to snapshot before context compaction. + +## Promotion targets + +Durable learnings go here (direct file edits by the sidecar): + +- `.refactory/CLAUDE.md` — workspace-level instructions, validated patterns, recurring + gotchas discovered during supervision. Append to the existing content; do not + overwrite the preamble. + +## Snapshot conventions + +The handoff snapshot should capture non-reconstructable in-flight state: + +- **Active CEO sessions**: run `factory tmux-ls` to list running factory sessions and + their current status. Record which projects have active loops and their last cycle. +- **Project score trajectory**: recent score changes, whether scores are trending up or + down, and any plateau/regression patterns observed this session. +- **Backlog state**: items recently added, removed, or reprioritized. Note any items + the user explicitly deferred or promoted. +- **In-flight decisions**: what the user and agent were mid-discussing — open questions, + half-formed directions, rejected approaches and why. + +## Live-state checks + +Before writing the snapshot, check these for current ground truth: + +- `factory tmux-ls` — which factory sessions are running +- `factory status .` — project status if inside a project +- `git status` — uncommitted changes in the workspace + +## In-flight work locations + +These files contain ephemeral state that may be lost in compaction: + +- `.factory/strategy/current.md` — the current hypothesis or focus area +- `.factory/reviews/` — recent agent review outputs and CEO verdicts +- `.factory/strategy/backlog.md` — the working backlog diff --git a/factory/cli.py b/factory/cli.py index af588fd0..3ed37283 100644 --- a/factory/cli.py +++ b/factory/cli.py @@ -3142,16 +3142,16 @@ def _tmux_available() -> bool: def _build_tmux_run_args(args: argparse.Namespace, project_path: Path, model: str | None) -> str: - """Build the 'factory run ...' command string from parsed args.""" - parts = [f"factory run {project_path}"] + """Build the 'factory ceo ...' command string from parsed args. + + Uses 'factory ceo' (not 'factory run') so the session inside tmux + is interactive — the user can attach and interact with the CEO directly. + --loop/--interval/--max-cycles are factory-run-only flags and are + NOT forwarded to factory ceo. + """ + parts = [f"factory ceo {project_path}"] if args.mode: parts.append(f"--mode {args.mode}") - if args.loop: - parts.append("--loop") - if args.interval: - parts.append(f"--interval {args.interval}") - if args.max_cycles is not None: - parts.append(f"--max-cycles {args.max_cycles}") if model: parts.append(f"--model {shlex.quote(model)}") if getattr(args, "no_github", False): @@ -3350,6 +3350,60 @@ def cmd_tmux_stop(args: argparse.Namespace) -> int: return 0 +def cmd_refactory(args: argparse.Namespace) -> int: + """Launch the re:factory persistent supervisor agent. + + Sets up the workspace, resolves the session ID, and replaces the current + process with an interactive claude session via os.execvp. + """ + import shutil + + from factory.agents.runner import resolve_prompt + from factory.refactory import get_session_id, setup_workspace + + claude_path = shutil.which("claude") + if not claude_path: + print("Error: 'claude' CLI not found. Install Claude Code first.", file=sys.stderr) + return 1 + + project_path = Path(getattr(args, "path", None) or Path.cwd()).resolve() + + setup_workspace(project_path) + reset = getattr(args, "reset", False) + session_file = project_path / ".refactory" / "session.json" + is_new_session = reset or not session_file.exists() + session_id = get_session_id(project_path, reset=reset) + model = getattr(args, "model", None) + + prompt = resolve_prompt("refactory") + prompt_file = tempfile.NamedTemporaryFile( + mode="w", suffix=".md", prefix="refactory-prompt-", delete=False, + ) + prompt_file.write(prompt) + prompt_file.close() + + if is_new_session: + cmd = [ + "claude", + "--session-id", session_id, + "--append-system-prompt-file", prompt_file.name, + "--dangerously-skip-permissions", + ] + else: + cmd = [ + "claude", + "--resume", session_id, + "--append-system-prompt-file", prompt_file.name, + "--dangerously-skip-permissions", + ] + + if model: + cmd.extend(["--model", model]) + + os.chdir(project_path) + os.execvp("claude", cmd) + return 0 # unreachable after execvp + def _has_research_target(project_path: Path) -> bool: """Check if project already has research_target configured.""" @@ -4525,6 +4579,15 @@ def build_parser() -> argparse.ArgumentParser: p.add_argument("--all", action="store_true", default=False, dest="stop_all", help="Stop ALL factory tmux sessions (required when no --session/--path given)") + # refactory — persistent supervisor agent + p = sub.add_parser("refactory", help="Launch the re:factory persistent supervisor agent") + p.add_argument("path", nargs="?", default=None, + help="Project directory (default: current working directory)") + p.add_argument("--reset", action="store_true", default=False, + help="Reset session (new session ID, fresh start)") + p.add_argument("--model", default=None, + help="Claude model override") + # workflow — graph engine commands from factory.workflow.cli import add_workflow_parser add_workflow_parser(sub) @@ -4553,7 +4616,7 @@ def main(argv: list[str] | None = None) -> int: if not args.command: if sys.stdin.isatty() and sys.stderr.isatty(): - return _welcome_wizard() + return cmd_refactory(args) parser.print_help() return 1 @@ -4618,6 +4681,7 @@ def main(argv: list[str] | None = None) -> int: "tmux": cmd_tmux, "tmux-ls": cmd_tmux_ls, "tmux-stop": cmd_tmux_stop, + "refactory": cmd_refactory, "workflow": lambda a: __import__("factory.workflow.cli", fromlist=["cmd_workflow"]).cmd_workflow(a), } diff --git a/factory/refactory.py b/factory/refactory.py new file mode 100644 index 00000000..991dc807 --- /dev/null +++ b/factory/refactory.py @@ -0,0 +1,141 @@ +"""Workspace setup and session management for the re:factory agent.""" + +from __future__ import annotations + +import json +import shutil +import stat +import uuid +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +SETTINGS_JSON: dict[str, Any] = { + "mcpServers": { + "factory": { + "command": "factory", + "args": ["mcp-serve"], + } + } +} + +CLAUDE_MD_CONTENT = """\ +# re:factory workspace + +You are the re:factory supervisor. Use /slash commands and factory CLI to manage projects. +See your system prompt for full instructions. +""" + +SOP_COMPACT_DIR = Path(__file__).parent / "agents" / "sop-compact" + + +def setup_workspace(project_path: Path) -> Path: + """Set up re:factory for a project. + + Session state goes in /.refactory/. Skills and settings are + installed into the PROJECT's .claude/ so the agent runs from the + project root with full access to the source tree. + + Idempotent — safe to call on every launch. Overwrites settings and + skills to pick up updates. + + Returns the workspace path (.refactory/). + """ + workspace = project_path / ".refactory" + workspace.mkdir(parents=True, exist_ok=True) + + sop_dir = workspace / ".claude" / "sop-compact" + sop_dir.mkdir(parents=True, exist_ok=True) + + for hook_name in ("pre-compact.sh", "session-start.sh"): + src = SOP_COMPACT_DIR / hook_name + if src.is_file(): + dst = sop_dir / hook_name + shutil.copy2(src, dst) + dst.chmod(dst.stat().st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + + sop_src = SOP_COMPACT_DIR / "sop-compact.md" + if sop_src.is_file(): + shutil.copy2(sop_src, workspace / ".claude" / "sop-compact.md") + + project_claude_dir = project_path / ".claude" + project_claude_dir.mkdir(exist_ok=True) + + commands_dir = project_claude_dir / "commands" + commands_dir.mkdir(exist_ok=True) + + skills_src = Path(__file__).parent / "agents" / "skills" + if skills_src.is_dir(): + for skill_file in skills_src.glob("*.md"): + shutil.copy2(skill_file, commands_dir / skill_file.name) + + settings = dict(SETTINGS_JSON) + settings["hooks"] = { + "PreCompact": [ + { + "hooks": [ + { + "type": "command", + "command": str((project_path / ".refactory" / ".claude" / "sop-compact" / "pre-compact.sh").resolve()), + } + ] + } + ], + "SessionStart": [ + { + "matcher": "*", + "hooks": [ + { + "type": "command", + "command": str((project_path / ".refactory" / ".claude" / "sop-compact" / "session-start.sh").resolve()), + } + ], + } + ], + } + + settings_path = project_claude_dir / "settings.local.json" + settings_path.write_text(json.dumps(settings, indent=2) + "\n") + + claude_md_path = workspace / "CLAUDE.md" + claude_md_path.write_text(CLAUDE_MD_CONTENT) + + return workspace + + +def get_session_id(project_path: Path, reset: bool = False) -> str: + """Read or create a persistent session ID for a project. + + The session ID is stored in /.refactory/session.json. + + Args: + project_path: Root directory of the project. + reset: If True, generate a new session ID even if one exists. + + Returns: + The session ID string. + """ + session_file = project_path / ".refactory" / "session.json" + if not reset and session_file.exists(): + try: + data = json.loads(session_file.read_text()) + sid = data.get("session_id") + if isinstance(sid, str) and sid: + return sid + except (json.JSONDecodeError, KeyError): + pass + + sid = str(uuid.uuid4()) + save_session_id(project_path, sid) + return sid + + +def save_session_id(project_path: Path, session_id: str) -> None: + """Write session state to /.refactory/session.json.""" + session_file = project_path / ".refactory" / "session.json" + session_file.parent.mkdir(parents=True, exist_ok=True) + data = { + "session_id": session_id, + "created": datetime.now(timezone.utc).isoformat(), + } + session_file.write_text(json.dumps(data, indent=2) + "\n") diff --git a/tests/test_cli.py b/tests/test_cli.py index 85d91d94..ed7116e2 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1561,7 +1561,7 @@ def test_cmd_home_returns_package_root(self, capsys): class TestCmdTmuxBareCLI: def test_tmux_command_uses_bare_factory(self): - """cmd_tmux generates a shell command using bare 'factory run', not uv run.""" + """cmd_tmux generates a shell command using bare 'factory ceo', not uv run.""" from factory.cli import cmd_tmux import argparse @@ -1590,7 +1590,7 @@ def test_tmux_command_uses_bare_factory(self): new_session_call = mock_run.call_args_list[1] shell_cmd = new_session_call[0][0][-1] # last arg is the shell command - assert "factory run" in shell_cmd + assert "factory ceo" in shell_cmd assert "uv run python -m factory" not in shell_cmd assert "cd " not in shell_cmd assert "source .venv/bin/activate" not in shell_cmd diff --git a/tests/test_cli_wizard.py b/tests/test_cli_wizard.py index ae53c76d..03f8b9f4 100644 --- a/tests/test_cli_wizard.py +++ b/tests/test_cli_wizard.py @@ -40,16 +40,16 @@ def test_non_tty_prints_help(self, capsys: pytest.CaptureFixture[str]) -> None: code = main([]) assert code == 1 - def test_tty_launches_wizard(self) -> None: - """TTY with no subcommand dispatches to _welcome_wizard.""" - with patch("factory.cli._welcome_wizard", return_value=0) as mock_wizard, \ + def test_tty_launches_refactory(self) -> None: + """TTY with no subcommand always dispatches to cmd_refactory.""" + with patch("factory.cli.cmd_refactory", return_value=0) as mock_refactory, \ patch("sys.stdin") as mock_stdin, \ patch("sys.stderr") as mock_stderr: mock_stdin.isatty.return_value = True mock_stderr.isatty.return_value = True code = main([]) assert code == 0 - mock_wizard.assert_called_once() + mock_refactory.assert_called_once() def test_stdin_not_tty_stderr_tty(self, capsys: pytest.CaptureFixture[str]) -> None: """If stdin is not a TTY (piped), falls through to help.""" diff --git a/tests/test_refactory.py b/tests/test_refactory.py new file mode 100644 index 00000000..74d6d24d --- /dev/null +++ b/tests/test_refactory.py @@ -0,0 +1,271 @@ +"""Tests for the re:factory agent workspace setup and session management.""" + +from __future__ import annotations + +import json +import os +import stat +from pathlib import Path +from typing import get_args +from unittest.mock import patch + +import pytest + +from factory.refactory import ( + CLAUDE_MD_CONTENT, + get_session_id, + save_session_id, + setup_workspace, +) + + +# ── setup_workspace ────────────────────────────────────────────── + + +class TestSetupWorkspace: + def test_creates_directories(self, tmp_path: Path) -> None: + setup_workspace(tmp_path) + workspace = tmp_path / ".refactory" + assert workspace.is_dir() + assert (tmp_path / ".claude").is_dir() + assert (tmp_path / ".claude" / "commands").is_dir() + + def test_writes_settings_json(self, tmp_path: Path) -> None: + setup_workspace(tmp_path) + settings = tmp_path / ".claude" / "settings.local.json" + assert settings.exists() + data = json.loads(settings.read_text()) + assert "factory" in data["mcpServers"] + + def test_writes_claude_md(self, tmp_path: Path) -> None: + setup_workspace(tmp_path) + claude_md = tmp_path / ".refactory" / "CLAUDE.md" + assert claude_md.exists() + assert claude_md.read_text() == CLAUDE_MD_CONTENT + + def test_copies_skills(self, tmp_path: Path) -> None: + setup_workspace(tmp_path) + commands_dir = tmp_path / ".claude" / "commands" + skills_src = Path(__file__).parent.parent / "factory" / "agents" / "skills" + expected = list(skills_src.glob("*.md")) + assert len(expected) > 0, "No skill source files found" + for skill in expected: + assert (commands_dir / skill.name).exists(), f"Missing skill: {skill.name}" + + def test_idempotent(self, tmp_path: Path) -> None: + ws1 = setup_workspace(tmp_path) + ws2 = setup_workspace(tmp_path) + assert ws1 == ws2 + settings = tmp_path / ".claude" / "settings.local.json" + data = json.loads(settings.read_text()) + assert "factory" in data["mcpServers"] + + def test_copies_hooks(self, tmp_path: Path) -> None: + setup_workspace(tmp_path) + sop_dir = tmp_path / ".refactory" / ".claude" / "sop-compact" + assert sop_dir.is_dir() + for name in ("pre-compact.sh", "session-start.sh"): + hook = sop_dir / name + assert hook.exists(), f"Missing hook: {name}" + assert hook.stat().st_mode & stat.S_IXUSR, f"Hook not executable: {name}" + + def test_copies_sop(self, tmp_path: Path) -> None: + setup_workspace(tmp_path) + sop = tmp_path / ".refactory" / ".claude" / "sop-compact.md" + assert sop.exists() + content = sop.read_text() + assert "re:factory" in content + assert "Promotion targets" in content + + def test_settings_json_has_hooks(self, tmp_path: Path) -> None: + setup_workspace(tmp_path) + settings = tmp_path / ".claude" / "settings.local.json" + data = json.loads(settings.read_text()) + assert "hooks" in data + assert "PreCompact" in data["hooks"] + assert "SessionStart" in data["hooks"] + pre_cmd = data["hooks"]["PreCompact"][0]["hooks"][0]["command"] + assert "pre-compact.sh" in pre_cmd + assert os.path.isabs(pre_cmd) + session_cmd = data["hooks"]["SessionStart"][0]["hooks"][0]["command"] + assert "session-start.sh" in session_cmd + assert os.path.isabs(session_cmd) + + +# ── Session ID ─────────────────────────────────────────────────── + + +class TestSessionId: + def test_creates_new(self, tmp_path: Path) -> None: + (tmp_path / ".refactory").mkdir() + session_file = tmp_path / ".refactory" / "session.json" + assert not session_file.exists() + sid = get_session_id(tmp_path) + assert isinstance(sid, str) + assert len(sid) == 36 + assert sid.count("-") == 4 + assert session_file.exists() + + def test_returns_existing(self, tmp_path: Path) -> None: + (tmp_path / ".refactory").mkdir() + sid1 = get_session_id(tmp_path) + sid2 = get_session_id(tmp_path) + assert sid1 == sid2 + + def test_reset(self, tmp_path: Path) -> None: + (tmp_path / ".refactory").mkdir() + sid1 = get_session_id(tmp_path) + sid2 = get_session_id(tmp_path, reset=True) + assert sid1 != sid2 + assert len(sid2) == 36 + + def test_save_roundtrip(self, tmp_path: Path) -> None: + (tmp_path / ".refactory").mkdir() + custom_id = "abcdef1234567890abcdef1234567890" + save_session_id(tmp_path, custom_id) + assert get_session_id(tmp_path) == custom_id + + def test_corrupt_json_generates_new(self, tmp_path: Path) -> None: + session_file = tmp_path / ".refactory" / "session.json" + session_file.parent.mkdir(parents=True, exist_ok=True) + session_file.write_text("{corrupt json!!") + sid = get_session_id(tmp_path) + assert isinstance(sid, str) + assert len(sid) == 36 + + +# ── Agent role registration ────────────────────────────────────── + + +class TestAgentRegistration: + def test_refactory_role_in_agent_role(self) -> None: + from factory.agents.runner import AgentRole + + assert "refactory" in get_args(AgentRole) + + def test_refactory_in_agents_yml(self) -> None: + import yaml + + yml_path = Path(__file__).parent.parent / "factory" / "agents" / "agents.yml" + data = yaml.safe_load(yml_path.read_text()) + assert "refactory" in data + assert "model" in data["refactory"] + assert "tools" in data["refactory"] + + +# ── CLI integration ────────────────────────────────────────────── + + +class TestCLIIntegration: + def test_refactory_subcommand_exists(self) -> None: + from factory.cli import build_parser + + parser = build_parser() + args = parser.parse_args(["refactory"]) + assert args.command == "refactory" + + def test_refactory_accepts_path_arg(self) -> None: + from factory.cli import build_parser + + parser = build_parser() + args = parser.parse_args(["refactory", "/some/path"]) + assert args.path == "/some/path" + + def test_refactory_path_default_none(self) -> None: + from factory.cli import build_parser + + parser = build_parser() + args = parser.parse_args(["refactory"]) + assert args.path is None + + def test_refactory_prompt_resolves(self) -> None: + from factory.agents.runner import resolve_prompt + + prompt = resolve_prompt("refactory") + assert isinstance(prompt, str) + assert len(prompt) > 0 + + +# ── cmd_refactory ──────────────────────────────────────────────── + + +class TestCmdRefactory: + def test_no_claude_returns_error(self, tmp_path: Path) -> None: + from factory.cli import cmd_refactory, build_parser + + parser = build_parser() + args = parser.parse_args(["refactory", str(tmp_path)]) + with patch("shutil.which", return_value=None): + code = cmd_refactory(args) + assert code == 1 + + def test_new_session_uses_session_id(self, tmp_path: Path) -> None: + from factory.cli import cmd_refactory, build_parser + + parser = build_parser() + args = parser.parse_args(["refactory", str(tmp_path)]) + with patch("shutil.which", return_value="/usr/bin/claude"), \ + patch("os.execvp") as mock_exec: + cmd_refactory(args) + + cmd = mock_exec.call_args[0][1] + assert "--session-id" in cmd + assert "--resume" not in cmd + assert "--append-system-prompt-file" in cmd + + def test_existing_session_uses_resume(self, tmp_path: Path) -> None: + from factory.cli import cmd_refactory, build_parser + + save_session_id(tmp_path, "existing-uuid") + parser = build_parser() + args = parser.parse_args(["refactory", str(tmp_path)]) + with patch("shutil.which", return_value="/usr/bin/claude"), \ + patch("os.execvp") as mock_exec: + cmd_refactory(args) + + cmd = mock_exec.call_args[0][1] + assert "--resume" in cmd + assert "--session-id" not in cmd + resume_idx = cmd.index("--resume") + assert cmd[resume_idx + 1] == "existing-uuid" + + def test_reset_flag_uses_session_id(self, tmp_path: Path) -> None: + from factory.cli import cmd_refactory, build_parser + + save_session_id(tmp_path, "old-uuid") + parser = build_parser() + args = parser.parse_args(["refactory", "--reset", str(tmp_path)]) + with patch("shutil.which", return_value="/usr/bin/claude"), \ + patch("os.execvp") as mock_exec: + cmd_refactory(args) + + cmd = mock_exec.call_args[0][1] + assert "--session-id" in cmd + assert "--resume" not in cmd + + def test_model_flag_forwarded(self, tmp_path: Path) -> None: + from factory.cli import cmd_refactory, build_parser + + parser = build_parser() + args = parser.parse_args(["refactory", "--model", "sonnet", str(tmp_path)]) + with patch("shutil.which", return_value="/usr/bin/claude"), \ + patch("os.execvp") as mock_exec: + cmd_refactory(args) + + cmd = mock_exec.call_args[0][1] + assert "--model" in cmd + model_idx = cmd.index("--model") + assert cmd[model_idx + 1] == "sonnet" + + def test_default_path_uses_cwd(self, tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + from factory.cli import cmd_refactory, build_parser + + monkeypatch.chdir(tmp_path) + parser = build_parser() + args = parser.parse_args(["refactory"]) + with patch("shutil.which", return_value="/usr/bin/claude"), \ + patch("os.execvp"): + cmd_refactory(args) + + assert (tmp_path / ".refactory").is_dir() + assert (tmp_path / ".refactory" / "session.json").exists() diff --git a/tests/test_subprocess.py b/tests/test_subprocess.py index 93b00fbf..7c54270f 100644 --- a/tests/test_subprocess.py +++ b/tests/test_subprocess.py @@ -8,7 +8,7 @@ def test_subprocess_readline_limit(): """Verify subprocess uses 1MB readline limit, not default 64KB.""" - source = Path("factory/runners/_subprocess.py").read_text() + source = (Path(__file__).parent.parent / "factory" / "runners" / "_subprocess.py").read_text() tree = ast.parse(source) for node in ast.walk(tree): if isinstance(node, ast.Call) and "create_subprocess_exec" in ast.dump(node): diff --git a/tests/test_tmux_cli.py b/tests/test_tmux_cli.py index 0c4153a2..34b80813 100644 --- a/tests/test_tmux_cli.py +++ b/tests/test_tmux_cli.py @@ -133,9 +133,9 @@ def test_propagates_all_flags(self) -> None: result = _build_tmux_run_args(args, Path("/tmp/project"), "opus-4") assert "--mode improve" in result - assert "--loop" in result - assert "--interval 900" in result - assert "--max-cycles 5" in result + assert "--loop" not in result + assert "--interval" not in result + assert "--max-cycles" not in result assert "--model" in result assert "--no-github" in result assert "--profile" in result @@ -172,7 +172,7 @@ def test_minimal_args(self) -> None: bg_agents=False, tmux_persist=False, use_profile=False, ) result = _build_tmux_run_args(args, Path("/tmp/p"), None) - assert result == "factory run /tmp/p" + assert result == "factory ceo /tmp/p" class TestCmdTmuxStop: