diff --git a/CLAUDE.md b/CLAUDE.md index d9f6a3145b..4a96a7e246 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -428,6 +428,7 @@ The manual-commit strategy (`manual_commit*.go`) does not modify the active bran - **Worktree-specific branches** - each git worktree gets its own shadow branch namespace, preventing conflicts - **Supports multiple concurrent sessions** - checkpoints from different sessions in the same directory interleave on the same shadow branch - Condenses session logs to permanent `entire/checkpoints/v1` branch on user commits +- Each committed session stores the raw transcript (`full.jsonl`, read by CLI rewind/resume/explain) plus a best-effort compact transcript (`transcript.jsonl`, generated via `transcript/compact` and pre-sliced to the checkpoint's `checkpoint_transcript_start`). The root `metadata.json` `sessions[].transcript` pointer targets `transcript.jsonl` when generated, falling back to `full.jsonl`. Both are pushed with the v1 branch. - When `checkpoints_version` is `1.1`, best-effort mirrors v1 metadata to the `refs/entire/checkpoints/v1.1` read ref after entire-managed v1 writes and fetches; mirror failures are logged, not fatal. The resolver also adds v1.1 to the push set, so `PrePush` pushes it to the configured remote alongside v1 (re-pointing the mirror at the current v1 tip first); v1.1 is a non-branch ref, so it gets no origin-tracking shadow and reads do not bootstrap it from origin (reads target v1.1 while Primary stays v1). The resume bootstrap that promotes local v1 from origin's remote-tracking ref is the deliberate exception — it does not mirror and is skipped entirely in v1.1 mode. Read paths use the configured ref as-is. - Uses the `post-rewrite` Git hook to keep local session linkage aligned after amend/rebase rewrites - Builds git trees in-memory using go-git plumbing APIs diff --git a/cmd/entire/cli/checkpoint/checkpoint.go b/cmd/entire/cli/checkpoint/checkpoint.go index 13a70d93dc..24c4cc35bb 100644 --- a/cmd/entire/cli/checkpoint/checkpoint.go +++ b/cmd/entire/cli/checkpoint/checkpoint.go @@ -562,7 +562,10 @@ func (m CommittedMetadata) GetTranscriptStart() int { // Paths include the full checkpoint path prefix (e.g., "/a1/b2c3d4e5f6/1/metadata.json"). // Used in CheckpointSummary.Sessions to map session IDs to their file locations. type SessionFilePaths struct { - Metadata string `json:"metadata"` + Metadata string `json:"metadata"` + // Transcript points at the compact transcript.jsonl when one was + // generated, otherwise at the raw full.jsonl. Checkpoints written by + // older CLI versions always point at full.jsonl. Transcript string `json:"transcript,omitempty"` ContentHash string `json:"content_hash,omitempty"` Prompt string `json:"prompt"` @@ -579,7 +582,8 @@ type SessionFilePaths struct { // ├── metadata.json # This CheckpointSummary // ├── 1/ # First session // │ ├── metadata.json # Session-specific CommittedMetadata -// │ ├── full.jsonl +// │ ├── full.jsonl # Raw agent transcript +// │ ├── transcript.jsonl # Compact transcript scoped to this checkpoint // │ ├── prompt.txt // │ └── content_hash.txt // ├── 2/ # Second session diff --git a/cmd/entire/cli/checkpoint/committed.go b/cmd/entire/cli/checkpoint/committed.go index dbfa1bebaa..1bc021ba30 100644 --- a/cmd/entire/cli/checkpoint/committed.go +++ b/cmd/entire/cli/checkpoint/committed.go @@ -26,6 +26,7 @@ import ( "github.com/entireio/cli/cmd/entire/cli/paths" "github.com/entireio/cli/cmd/entire/cli/settings" "github.com/entireio/cli/cmd/entire/cli/trailers" + transcriptcompact "github.com/entireio/cli/cmd/entire/cli/transcript/compact" "github.com/entireio/cli/cmd/entire/cli/validation" "github.com/entireio/cli/cmd/entire/cli/vercelconfig" "github.com/entireio/cli/cmd/entire/cli/versioninfo" @@ -292,7 +293,8 @@ func (s *GitStore) writeFinalTaskCheckpoint(ctx context.Context, opts WriteCommi // ├── metadata.json # CheckpointSummary (aggregated stats) // ├── 1/ # First session // │ ├── metadata.json # CommittedMetadata (session-specific, includes initial_attribution) -// │ ├── full.jsonl +// │ ├── full.jsonl # Raw agent transcript (CLI rewind/resume/explain) +// │ ├── transcript.jsonl # Compact transcript scoped to this checkpoint (referenced by metadata.json) // │ ├── prompt.txt // │ └── content_hash.txt // ├── 2/ # Second session @@ -402,13 +404,15 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom } } - // Write transcript - wroteTranscript, err := s.writeTranscript(ctx, opts, sessionPath, entries) + // Write transcript. The pointer targets the compact transcript.jsonl when + // available; full.jsonl stays in the tree for CLI rewind/resume/explain, + // which read it by filename rather than through these paths. + transcriptPointer, err := s.writeTranscript(ctx, opts, sessionPath, entries) if err != nil { return filePaths, err } - if wroteTranscript { - filePaths.Transcript = "/" + sessionPath + paths.TranscriptFileName + if transcriptPointer != "" { + filePaths.Transcript = "/" + sessionPath + transcriptPointer filePaths.ContentHash = "/" + sessionPath + paths.ContentHashFileName } @@ -716,9 +720,12 @@ func aggregateTokenUsage(a, b *agent.TokenUsage) *agent.TokenUsage { return result } -// writeTranscript writes the transcript and content hash to the checkpoint entries. -// Returns (true, nil) if files were written, (false, nil) if transcript was empty. -func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptions, basePath string, entries map[string]object.TreeEntry) (bool, error) { +// writeTranscript writes the transcript, compact transcript, and content hash +// to the checkpoint entries. Returns the session-relative filename the +// metadata transcript pointer should target: transcript.jsonl when the +// compact transcript was generated (best-effort), full.jsonl otherwise, or "" +// when the transcript was empty and nothing was written. +func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptions, basePath string, entries map[string]object.TreeEntry) (string, error) { logCtx := logging.WithComponent(ctx, "checkpoint") transcriptBytes := opts.Transcript.Bytes() @@ -734,13 +741,13 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio if len(rawData) > 0 { redacted, redactErr := redact.JSONLBytes(rawData) if redactErr != nil { - return false, fmt.Errorf("failed to redact transcript from file: %w", redactErr) + return "", fmt.Errorf("failed to redact transcript from file: %w", redactErr) } transcriptBytes = redacted.Bytes() } } if len(transcriptBytes) == 0 { - return false, nil + return "", nil } if opts.Agent == agent.AgentTypeCodex { @@ -754,7 +761,7 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio if err != nil { chunkTranscriptSpan.RecordError(err) chunkTranscriptSpan.End() - return false, fmt.Errorf("failed to chunk transcript: %w", err) + return "", fmt.Errorf("failed to chunk transcript: %w", err) } chunkTranscriptSpan.End() chunkDuration := time.Since(chunkStart) @@ -768,7 +775,7 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio if err != nil { writeTranscriptBlobsSpan.RecordError(err) writeTranscriptBlobsSpan.End() - return false, err + return "", err } entries[chunkPath] = object.TreeEntry{ Name: chunkPath, @@ -787,7 +794,7 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio if err != nil { contentHashSpan.RecordError(err) contentHashSpan.End() - return false, err + return "", err } entries[basePath+paths.ContentHashFileName] = object.TreeEntry{ Name: basePath + paths.ContentHashFileName, @@ -796,6 +803,11 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio } contentHashSpan.End() + pointerFile := paths.TranscriptFileName + if s.writeCompactTranscript(logCtx, opts.Agent, opts.CheckpointTranscriptStart, transcriptBytes, basePath, entries) { + pointerFile = paths.CompactTranscriptFileName + } + logging.Debug(logCtx, "write transcript timings", slog.String("session_id", opts.SessionID), slog.String("checkpoint_id", opts.CheckpointID.String()), @@ -805,8 +817,72 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio slog.Int64("write_transcript_content_hash_ms", time.Since(contentHashStart).Milliseconds()), slog.Int("transcript_bytes", len(transcriptBytes)), slog.Int("chunk_count", len(chunks)), + slog.String("transcript_pointer", pointerFile), ) - return true, nil + return pointerFile, nil +} + +// compactAgentName resolves the agent slug used in compact transcript lines +// (e.g. "claude-code"). Falls back to the raw agent type string when the +// agent type is not registered. +func compactAgentName(agentType types.AgentType) string { + if ag, err := agent.GetByAgentType(agentType); err == nil { + return string(ag.Name()) + } + return string(agentType) +} + +// writeCompactTranscript converts the pre-redacted full transcript into the +// compact transcript.jsonl format, scoped to this checkpoint via startLine, +// and records it at sessionPath in the tree. Best-effort: the compact +// transcript is derived data, so failures are logged and never fail the +// checkpoint write. Returns true when transcript.jsonl was recorded. +func (s *GitStore) writeCompactTranscript(ctx context.Context, agentType types.AgentType, startLine int, transcriptBytes []byte, sessionPath string, entries map[string]object.TreeEntry) bool { + compactCtx, compactSpan := perf.Start(ctx, "write_compact_transcript") + defer compactSpan.End() + + compacted, err := transcriptcompact.Compact(redact.AlreadyRedacted(transcriptBytes), transcriptcompact.MetadataFields{ + Agent: compactAgentName(agentType), + CLIVersion: versioninfo.Version, + StartLine: startLine, + }) + if err != nil { + compactSpan.RecordError(err) + logging.Warn(compactCtx, "compact transcript generation failed, skipping transcript.jsonl", + slog.String("agent", string(agentType)), + slog.String("error", err.Error()), + ) + return false + } + if len(bytes.TrimSpace(compacted)) == 0 { + logging.Debug(compactCtx, "compact transcript empty, skipping transcript.jsonl", + slog.String("agent", string(agentType)), + ) + return false + } + if len(compacted) > agent.MaxChunkSize { + logging.Warn(compactCtx, "compact transcript exceeds max blob size, skipping transcript.jsonl", + slog.String("agent", string(agentType)), + slog.Int("compact_bytes", len(compacted)), + ) + return false + } + + blobHash, err := CreateBlobFromContent(s.repo, compacted) + if err != nil { + compactSpan.RecordError(err) + logging.Warn(compactCtx, "failed to create compact transcript blob, skipping transcript.jsonl", + slog.String("error", err.Error()), + ) + return false + } + compactPath := sessionPath + paths.CompactTranscriptFileName + entries[compactPath] = object.TreeEntry{ + Name: compactPath, + Mode: filemode.Regular, + Hash: blobHash, + } + return true } // mergeFilesTouched combines two file lists, removing duplicates. @@ -946,7 +1022,8 @@ type taskCheckpointData struct { // ├── metadata.json # CheckpointSummary with sessions map // ├── 0/ # First session // │ ├── metadata.json # Session-specific metadata -// │ └── full.jsonl # Transcript +// │ ├── full.jsonl # Raw agent transcript +// │ └── transcript.jsonl # Compact transcript (referenced by metadata.json) // ├── 1/ # Second session // └── ... func (s *GitStore) ReadCommitted(ctx context.Context, checkpointID id.CheckpointID) (*CheckpointSummary, error) { @@ -1474,12 +1551,14 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti // Find session index matching opts.SessionID sessionIndex := -1 + var sessionMeta *CommittedMetadata for i := range len(checkpointSummary.Sessions) { metaPath := fmt.Sprintf("%s%d/%s", basePath, i, paths.MetadataFileName) if metaEntry, metaExists := entries[metaPath]; metaExists { meta, metaErr := s.readMetadataFromBlob(metaEntry.Hash) if metaErr == nil && meta.SessionID == opts.SessionID { sessionIndex = i + sessionMeta = meta break } } @@ -1492,6 +1571,10 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti slog.String("checkpoint_id", string(opts.CheckpointID)), slog.Int("fallback_index", sessionIndex), ) + metaPath := fmt.Sprintf("%s%d/%s", basePath, sessionIndex, paths.MetadataFileName) + if metaEntry, metaExists := entries[metaPath]; metaExists { + sessionMeta, _ = s.readMetadataFromBlob(metaEntry.Hash) //nolint:errcheck // best-effort; nil meta means start 0 + } } sessionPath := fmt.Sprintf("%s%d/", basePath, sessionIndex) @@ -1499,7 +1582,15 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti // Replace transcript (full replace, not append). // Transcript is pre-redacted by the caller (enforced by RedactedBytes type). if opts.Transcript.Len() > 0 { - if err := s.replaceTranscript(ctx, opts.Transcript, opts.Agent, opts.PrecomputedBlobs, sessionPath, entries); err != nil { + agentType := opts.Agent + startLine := 0 + if sessionMeta != nil { + startLine = sessionMeta.GetTranscriptStart() + if agentType == "" { + agentType = sessionMeta.Agent + } + } + if err := s.replaceTranscript(ctx, opts.Transcript, agentType, startLine, opts.PrecomputedBlobs, sessionPath, entries); err != nil { return fmt.Errorf("failed to replace transcript: %w", err) } } @@ -1574,14 +1665,19 @@ func (s *GitStore) replaceSkillEvents(skillEvents []agent.SkillEvent, sessionPat return nil } -// replaceTranscript writes the full transcript content, replacing any existing transcript. -// Also removes any chunk files from a previous write and updates the content hash. +// replaceTranscript writes the full transcript content, replacing any existing +// transcript, and regenerates the compact transcript.jsonl scoped at startLine +// (the checkpoint's transcript start). Also removes any chunk files from a +// previous write and updates the content hash. // // Short-circuits when the existing content_hash.txt already matches the new // transcript's sha256 — in that case the chunk entries are preserved as-is and // no chunking/zlib happens. Use precomputed (non-nil) to reuse blob hashes -// computed once across multiple checkpoints. -func (s *GitStore) replaceTranscript(ctx context.Context, transcript redact.RedactedBytes, agentType types.AgentType, precomputed *PrecomputedTranscriptBlobs, sessionPath string, entries map[string]object.TreeEntry) error { +// computed once across multiple checkpoints. The compact transcript cannot +// reuse precomputed blobs: each checkpoint in a turn shares the full +// transcript but has its own start offset, so the compact content differs per +// checkpoint. +func (s *GitStore) replaceTranscript(ctx context.Context, transcript redact.RedactedBytes, agentType types.AgentType, startLine int, precomputed *PrecomputedTranscriptBlobs, sessionPath string, entries map[string]object.TreeEntry) error { // Ignore precompute if invariants are violated — fall back to fresh chunking. if precomputed != nil && !precomputed.isUsable() { precomputed = nil @@ -1665,6 +1761,11 @@ func (s *GitStore) replaceTranscript(ctx context.Context, transcript redact.Reda Hash: hashBlob, } + // Regenerate the compact transcript from the new content. Best-effort: on + // generation failure the previous transcript.jsonl entry (if any) is left + // in place so the metadata.json transcript pointer never dangles. + s.writeCompactTranscript(ctx, agentType, startLine, transcript.Bytes(), sessionPath, entries) + return nil } diff --git a/cmd/entire/cli/checkpoint/committed_compact_transcript_test.go b/cmd/entire/cli/checkpoint/committed_compact_transcript_test.go new file mode 100644 index 0000000000..8410e053d6 --- /dev/null +++ b/cmd/entire/cli/checkpoint/committed_compact_transcript_test.go @@ -0,0 +1,241 @@ +package checkpoint + +import ( + "context" + "encoding/json" + "strings" + "testing" + + "github.com/entireio/cli/cmd/entire/cli/agent" + "github.com/entireio/cli/cmd/entire/cli/checkpoint/id" + "github.com/entireio/cli/cmd/entire/cli/paths" + "github.com/entireio/cli/redact" + + // Registers the Claude Code agent so compactAgentName resolves the + // "claude-code" slug instead of falling back to the raw agent type. + _ "github.com/entireio/cli/cmd/entire/cli/agent/claudecode" +) + +// claudeStyleTranscript returns a Claude Code-format JSONL transcript with two +// user/assistant exchanges (4 lines total). +func claudeStyleTranscript() []byte { + lines := []string{ + `{"type":"user","uuid":"u1","timestamp":"2026-01-01T00:00:00Z","message":{"role":"user","content":"hello one"}}`, + `{"type":"assistant","uuid":"a1","timestamp":"2026-01-01T00:00:01Z","message":{"id":"msg_1","role":"assistant","content":[{"type":"text","text":"reply one"}],"usage":{"input_tokens":5,"output_tokens":7}}}`, + `{"type":"user","uuid":"u2","timestamp":"2026-01-01T00:00:02Z","message":{"role":"user","content":"hello two"}}`, + `{"type":"assistant","uuid":"a2","timestamp":"2026-01-01T00:00:03Z","message":{"id":"msg_2","role":"assistant","content":[{"type":"text","text":"reply two"}],"usage":{"input_tokens":6,"output_tokens":8}}}`, + } + return []byte(strings.Join(lines, "\n") + "\n") +} + +// readBranchFile reads a file from the committed checkpoints branch tree. +// Returns ("", false) when the file does not exist. +func readBranchFile(t *testing.T, store *GitStore, path string) (string, bool) { + t.Helper() + tree, err := store.getSessionsBranchTree() + if err != nil { + t.Fatalf("getSessionsBranchTree() error = %v", err) + } + file, err := tree.File(path) + if err != nil { + return "", false + } + content, err := file.Contents() + if err != nil { + t.Fatalf("Contents(%s) error = %v", path, err) + } + return content, true +} + +// compactTranscriptLine is the subset of the compact transcript line format +// asserted in these tests. +type compactTranscriptLine struct { + V int `json:"v"` + Agent string `json:"agent"` + Type string `json:"type"` + Content json.RawMessage `json:"content"` +} + +func parseCompactLines(t *testing.T, content string) []compactTranscriptLine { + t.Helper() + var lines []compactTranscriptLine + for _, raw := range strings.Split(strings.TrimSpace(content), "\n") { + var line compactTranscriptLine + if err := json.Unmarshal([]byte(raw), &line); err != nil { + t.Fatalf("compact transcript line is not valid JSON: %v\nline: %s", err, raw) + } + lines = append(lines, line) + } + return lines +} + +func TestWriteCommitted_WritesCompactTranscript(t *testing.T) { + t.Parallel() + repo, _ := setupTestRepo(t) + store := NewGitStore(repo, DefaultV1Refs()) + cpID := id.MustCheckpointID("a1b2c3d4e5f6") + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-001", + Strategy: "manual-commit", + Transcript: redact.AlreadyRedacted(claudeStyleTranscript()), + Prompts: []string{"hello one"}, + Agent: agent.AgentTypeClaudeCode, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + sessionPath := cpID.Path() + "/0/" + + // full.jsonl is still written for CLI read paths. + if _, ok := readBranchFile(t, store, sessionPath+paths.TranscriptFileName); !ok { + t.Error("full.jsonl missing from checkpoint tree") + } + + // transcript.jsonl holds the compact format. + compactContent, ok := readBranchFile(t, store, sessionPath+paths.CompactTranscriptFileName) + if !ok { + t.Fatal("transcript.jsonl missing from checkpoint tree") + } + lines := parseCompactLines(t, compactContent) + if len(lines) != 4 { + t.Fatalf("compact transcript line count = %d, want 4\ncontent: %s", len(lines), compactContent) + } + for i, line := range lines { + if line.V != 1 { + t.Errorf("line %d: v = %d, want 1", i, line.V) + } + if line.Agent != "claude-code" { + t.Errorf("line %d: agent = %q, want %q", i, line.Agent, "claude-code") + } + } + if lines[0].Type != "user" || lines[1].Type != "assistant" { + t.Errorf("unexpected line types: %q, %q", lines[0].Type, lines[1].Type) + } + if !strings.Contains(compactContent, "reply two") { + t.Error("compact transcript missing assistant content") + } + + // Root metadata.json points at the compact transcript. + summary := readSummaryFromBranch(t, repo, cpID) + if len(summary.Sessions) != 1 { + t.Fatalf("session count = %d, want 1", len(summary.Sessions)) + } + wantTranscript := "/" + sessionPath + paths.CompactTranscriptFileName + if summary.Sessions[0].Transcript != wantTranscript { + t.Errorf("sessions[0].transcript = %q, want %q", summary.Sessions[0].Transcript, wantTranscript) + } + wantHash := "/" + sessionPath + paths.ContentHashFileName + if summary.Sessions[0].ContentHash != wantHash { + t.Errorf("sessions[0].content_hash = %q, want %q", summary.Sessions[0].ContentHash, wantHash) + } +} + +func TestWriteCommitted_CompactTranscriptScopedToCheckpointStart(t *testing.T) { + t.Parallel() + repo, _ := setupTestRepo(t) + store := NewGitStore(repo, DefaultV1Refs()) + cpID := id.MustCheckpointID("b2c3d4e5f6a1") + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-001", + Strategy: "manual-commit", + Transcript: redact.AlreadyRedacted(claudeStyleTranscript()), + Agent: agent.AgentTypeClaudeCode, + CheckpointTranscriptStart: 2, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + compactContent, ok := readBranchFile(t, store, cpID.Path()+"/0/"+paths.CompactTranscriptFileName) + if !ok { + t.Fatal("transcript.jsonl missing from checkpoint tree") + } + if strings.Contains(compactContent, "hello one") || strings.Contains(compactContent, "reply one") { + t.Errorf("compact transcript contains content before checkpoint start:\n%s", compactContent) + } + if !strings.Contains(compactContent, "hello two") || !strings.Contains(compactContent, "reply two") { + t.Errorf("compact transcript missing checkpoint-scoped content:\n%s", compactContent) + } +} + +func TestWriteCommitted_NonCompactableTranscriptPointsAtFull(t *testing.T) { + t.Parallel() + repo, _ := setupTestRepo(t) + store := NewGitStore(repo, DefaultV1Refs()) + cpID := id.MustCheckpointID("c3d4e5f6a1b2") + + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-001", + Strategy: "manual-commit", + Transcript: redact.AlreadyRedacted([]byte("not json at all\nstill not json\n")), + Agent: agent.AgentTypeClaudeCode, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + sessionPath := cpID.Path() + "/0/" + if _, ok := readBranchFile(t, store, sessionPath+paths.CompactTranscriptFileName); ok { + t.Error("transcript.jsonl written for non-compactable transcript") + } + + summary := readSummaryFromBranch(t, repo, cpID) + wantTranscript := "/" + sessionPath + paths.TranscriptFileName + if summary.Sessions[0].Transcript != wantTranscript { + t.Errorf("sessions[0].transcript = %q, want %q", summary.Sessions[0].Transcript, wantTranscript) + } +} + +func TestUpdateCommitted_RegeneratesCompactTranscript(t *testing.T) { + t.Parallel() + repo, _ := setupTestRepo(t) + store := NewGitStore(repo, DefaultV1Refs()) + cpID := id.MustCheckpointID("d4e5f6a1b2c3") + + initial := claudeStyleTranscript() + err := store.WriteCommitted(context.Background(), WriteCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-001", + Strategy: "manual-commit", + Transcript: redact.AlreadyRedacted(initial), + Agent: agent.AgentTypeClaudeCode, + AuthorName: "Test", + AuthorEmail: "test@test.com", + }) + if err != nil { + t.Fatalf("WriteCommitted() error = %v", err) + } + + extended := append([]byte{}, initial...) + extended = append(extended, + []byte(`{"type":"user","uuid":"u3","timestamp":"2026-01-01T00:00:04Z","message":{"role":"user","content":"hello three"}}`+"\n")...) + err = store.UpdateCommitted(context.Background(), UpdateCommittedOptions{ + CheckpointID: cpID, + SessionID: "session-001", + Transcript: redact.AlreadyRedacted(extended), + Agent: agent.AgentTypeClaudeCode, + }) + if err != nil { + t.Fatalf("UpdateCommitted() error = %v", err) + } + + compactContent, ok := readBranchFile(t, store, cpID.Path()+"/0/"+paths.CompactTranscriptFileName) + if !ok { + t.Fatal("transcript.jsonl missing after UpdateCommitted") + } + if !strings.Contains(compactContent, "hello three") { + t.Errorf("compact transcript not regenerated with new content:\n%s", compactContent) + } +} diff --git a/cmd/entire/cli/paths/paths.go b/cmd/entire/cli/paths/paths.go index c21b49ef04..cb65f52b7e 100644 --- a/cmd/entire/cli/paths/paths.go +++ b/cmd/entire/cli/paths/paths.go @@ -29,10 +29,13 @@ const ( PromptFileName = "prompt.txt" TranscriptFileName = "full.jsonl" TranscriptFileNameLegacy = "full.log" - MetadataFileName = "metadata.json" - CheckpointFileName = "checkpoint.json" - ContentHashFileName = "content_hash.txt" - SettingsFileName = "settings.json" + // CompactTranscriptFileName is the compact transcript stored alongside + // full.jsonl, pre-sliced to the checkpoint's own portion. + CompactTranscriptFileName = "transcript.jsonl" + MetadataFileName = "metadata.json" + CheckpointFileName = "checkpoint.json" + ContentHashFileName = "content_hash.txt" + SettingsFileName = "settings.json" ) // MetadataBranchName is the orphan branch used by manual-commit strategy to store metadata diff --git a/docs/architecture/sessions-and-checkpoints.md b/docs/architecture/sessions-and-checkpoints.md index 1517cd430f..2103cc1692 100644 --- a/docs/architecture/sessions-and-checkpoints.md +++ b/docs/architecture/sessions-and-checkpoints.md @@ -194,9 +194,10 @@ Metadata only, sharded by checkpoint ID. Supports **multiple sessions per checkp ├── metadata.json # CheckpointSummary (aggregated stats) ├── 0/ # First session (0-based indexing) │ ├── metadata.json # Session-specific CommittedMetadata -│ ├── full.jsonl +│ ├── full.jsonl # Raw agent transcript (CLI rewind/resume/explain) +│ ├── transcript.jsonl # Compact transcript, scoped to this checkpoint │ ├── prompt.txt # Checkpoint-scoped user prompts -│ └── content_hash.txt +│ └── content_hash.txt # sha256 of full.jsonl (dedup short-circuit) ├── 1/ # Second session │ ├── metadata.json │ ├── full.jsonl @@ -204,6 +205,21 @@ Metadata only, sharded by checkpoint ID. Supports **multiple sessions per checkp └── 2/ # Third session... ``` +**Compact transcript (`transcript.jsonl`):** generated best-effort from +`full.jsonl` via `transcript/compact` on every committed write and on +transcript replacement during finalization. Unlike `full.jsonl` (the +cumulative session transcript, scoped at read time via +`checkpoint_transcript_start`), `transcript.jsonl` is pre-sliced to the +checkpoint's own portion (`compact.Compact` is called with +`StartLine = checkpoint_transcript_start`), so it needs no offset to consume. +The root `metadata.json` `sessions[].transcript` pointer targets +`transcript.jsonl` when it was generated and falls back to `full.jsonl` +otherwise (e.g. unparseable/external-agent transcripts, or checkpoints written +by older CLI versions). CLI read paths (rewind/resume/explain) ignore the +pointer and read `full.jsonl` by filename. Generation failures are logged but +never fail the checkpoint write; during finalization a failed regeneration +keeps the previous `transcript.jsonl` so the pointer never dangles. + #### v1.1 local read mirror `entire/checkpoints/v1` remains the durable source of truth: committed writes @@ -244,7 +260,7 @@ diagnosis in `entire-refs.txt`. "sessions": [ { "metadata": "/ab/c123def456/0/metadata.json", - "transcript": "/ab/c123def456/0/full.jsonl", + "transcript": "/ab/c123def456/0/transcript.jsonl", "content_hash": "/ab/c123def456/0/content_hash.txt", "prompt": "/ab/c123def456/0/prompt.txt" } @@ -350,6 +366,7 @@ are for human readability in `git log` only. The CLI always reads from the tree │ │ (checkpoint_id: "a3b2c4d5e6f7") │ │ ├── 0/ │ │ │ ├── full.jsonl │ +│ │ ├── transcript.jsonl │ │ │ └── prompt.txt │ │ └── ... │ │ │