Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ The manual-commit strategy (`manual_commit*.go`) does not modify the active bran
- **Worktree-specific branches** - each git worktree gets its own shadow branch namespace, preventing conflicts
- **Supports multiple concurrent sessions** - checkpoints from different sessions in the same directory interleave on the same shadow branch
- Condenses session logs to permanent `entire/checkpoints/v1` branch on user commits
- Each committed session stores the raw transcript (`full.jsonl`, read by CLI rewind/resume/explain) plus a best-effort compact transcript (`transcript.jsonl`, generated via `transcript/compact` and pre-sliced to the checkpoint's `checkpoint_transcript_start`). The root `metadata.json` `sessions[].transcript` pointer targets `transcript.jsonl` when generated, falling back to `full.jsonl`. Both are pushed with the v1 branch.
- When `checkpoints_version` is `1.1`, best-effort mirrors v1 metadata to the `refs/entire/checkpoints/v1.1` read ref after entire-managed v1 writes and fetches; mirror failures are logged, not fatal. The resolver also adds v1.1 to the push set, so `PrePush` pushes it to the configured remote alongside v1 (re-pointing the mirror at the current v1 tip first); v1.1 is a non-branch ref, so it gets no origin-tracking shadow and reads do not bootstrap it from origin (reads target v1.1 while Primary stays v1). The resume bootstrap that promotes local v1 from origin's remote-tracking ref is the deliberate exception — it does not mirror and is skipped entirely in v1.1 mode. Read paths use the configured ref as-is.
- Uses the `post-rewrite` Git hook to keep local session linkage aligned after amend/rebase rewrites
- Builds git trees in-memory using go-git plumbing APIs
Expand Down
8 changes: 6 additions & 2 deletions cmd/entire/cli/checkpoint/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,10 @@ func (m CommittedMetadata) GetTranscriptStart() int {
// Paths include the full checkpoint path prefix (e.g., "/a1/b2c3d4e5f6/1/metadata.json").
// Used in CheckpointSummary.Sessions to map session IDs to their file locations.
type SessionFilePaths struct {
Metadata string `json:"metadata"`
Metadata string `json:"metadata"`
// Transcript points at the compact transcript.jsonl when one was
// generated, otherwise at the raw full.jsonl. Checkpoints written by
// older CLI versions always point at full.jsonl.
Transcript string `json:"transcript,omitempty"`
ContentHash string `json:"content_hash,omitempty"`
Prompt string `json:"prompt"`
Expand All @@ -579,7 +582,8 @@ type SessionFilePaths struct {
// ├── metadata.json # This CheckpointSummary
// ├── 1/ # First session
// │ ├── metadata.json # Session-specific CommittedMetadata
// │ ├── full.jsonl
// │ ├── full.jsonl # Raw agent transcript
// │ ├── transcript.jsonl # Compact transcript scoped to this checkpoint
// │ ├── prompt.txt
// │ └── content_hash.txt
// ├── 2/ # Second session
Expand Down
141 changes: 121 additions & 20 deletions cmd/entire/cli/checkpoint/committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/entireio/cli/cmd/entire/cli/paths"
"github.com/entireio/cli/cmd/entire/cli/settings"
"github.com/entireio/cli/cmd/entire/cli/trailers"
transcriptcompact "github.com/entireio/cli/cmd/entire/cli/transcript/compact"
"github.com/entireio/cli/cmd/entire/cli/validation"
"github.com/entireio/cli/cmd/entire/cli/vercelconfig"
"github.com/entireio/cli/cmd/entire/cli/versioninfo"
Expand Down Expand Up @@ -292,7 +293,8 @@ func (s *GitStore) writeFinalTaskCheckpoint(ctx context.Context, opts WriteCommi
// ├── metadata.json # CheckpointSummary (aggregated stats)
// ├── 1/ # First session
// │ ├── metadata.json # CommittedMetadata (session-specific, includes initial_attribution)
// │ ├── full.jsonl
// │ ├── full.jsonl # Raw agent transcript (CLI rewind/resume/explain)
// │ ├── transcript.jsonl # Compact transcript scoped to this checkpoint (referenced by metadata.json)
// │ ├── prompt.txt
// │ └── content_hash.txt
// ├── 2/ # Second session
Expand Down Expand Up @@ -402,13 +404,15 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom
}
}

// Write transcript
wroteTranscript, err := s.writeTranscript(ctx, opts, sessionPath, entries)
// Write transcript. The pointer targets the compact transcript.jsonl when
// available; full.jsonl stays in the tree for CLI rewind/resume/explain,
// which read it by filename rather than through these paths.
transcriptPointer, err := s.writeTranscript(ctx, opts, sessionPath, entries)
if err != nil {
return filePaths, err
}
if wroteTranscript {
filePaths.Transcript = "/" + sessionPath + paths.TranscriptFileName
if transcriptPointer != "" {
filePaths.Transcript = "/" + sessionPath + transcriptPointer
filePaths.ContentHash = "/" + sessionPath + paths.ContentHashFileName
}

Expand Down Expand Up @@ -716,9 +720,12 @@ func aggregateTokenUsage(a, b *agent.TokenUsage) *agent.TokenUsage {
return result
}

// writeTranscript writes the transcript and content hash to the checkpoint entries.
// Returns (true, nil) if files were written, (false, nil) if transcript was empty.
func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptions, basePath string, entries map[string]object.TreeEntry) (bool, error) {
// writeTranscript writes the transcript, compact transcript, and content hash
// to the checkpoint entries. Returns the session-relative filename the
// metadata transcript pointer should target: transcript.jsonl when the
// compact transcript was generated (best-effort), full.jsonl otherwise, or ""
// when the transcript was empty and nothing was written.
func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptions, basePath string, entries map[string]object.TreeEntry) (string, error) {
logCtx := logging.WithComponent(ctx, "checkpoint")
transcriptBytes := opts.Transcript.Bytes()

Expand All @@ -734,13 +741,13 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio
if len(rawData) > 0 {
redacted, redactErr := redact.JSONLBytes(rawData)
if redactErr != nil {
return false, fmt.Errorf("failed to redact transcript from file: %w", redactErr)
return "", fmt.Errorf("failed to redact transcript from file: %w", redactErr)
}
transcriptBytes = redacted.Bytes()
}
}
if len(transcriptBytes) == 0 {
return false, nil
return "", nil
}

if opts.Agent == agent.AgentTypeCodex {
Expand All @@ -754,7 +761,7 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio
if err != nil {
chunkTranscriptSpan.RecordError(err)
chunkTranscriptSpan.End()
return false, fmt.Errorf("failed to chunk transcript: %w", err)
return "", fmt.Errorf("failed to chunk transcript: %w", err)
}
chunkTranscriptSpan.End()
chunkDuration := time.Since(chunkStart)
Expand All @@ -768,7 +775,7 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio
if err != nil {
writeTranscriptBlobsSpan.RecordError(err)
writeTranscriptBlobsSpan.End()
return false, err
return "", err
}
entries[chunkPath] = object.TreeEntry{
Name: chunkPath,
Expand All @@ -787,7 +794,7 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio
if err != nil {
contentHashSpan.RecordError(err)
contentHashSpan.End()
return false, err
return "", err
}
entries[basePath+paths.ContentHashFileName] = object.TreeEntry{
Name: basePath + paths.ContentHashFileName,
Expand All @@ -796,6 +803,11 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio
}
contentHashSpan.End()

pointerFile := paths.TranscriptFileName
if s.writeCompactTranscript(logCtx, opts.Agent, opts.CheckpointTranscriptStart, transcriptBytes, basePath, entries) {
pointerFile = paths.CompactTranscriptFileName
}

logging.Debug(logCtx, "write transcript timings",
slog.String("session_id", opts.SessionID),
slog.String("checkpoint_id", opts.CheckpointID.String()),
Expand All @@ -805,8 +817,72 @@ func (s *GitStore) writeTranscript(ctx context.Context, opts WriteCommittedOptio
slog.Int64("write_transcript_content_hash_ms", time.Since(contentHashStart).Milliseconds()),
slog.Int("transcript_bytes", len(transcriptBytes)),
slog.Int("chunk_count", len(chunks)),
slog.String("transcript_pointer", pointerFile),
)
return true, nil
return pointerFile, nil
}

// compactAgentName resolves the agent slug used in compact transcript lines
// (e.g. "claude-code"). Falls back to the raw agent type string when the
// agent type is not registered.
func compactAgentName(agentType types.AgentType) string {
if ag, err := agent.GetByAgentType(agentType); err == nil {
return string(ag.Name())
}
return string(agentType)
}

// writeCompactTranscript converts the pre-redacted full transcript into the
// compact transcript.jsonl format, scoped to this checkpoint via startLine,
// and records it at sessionPath in the tree. Best-effort: the compact
// transcript is derived data, so failures are logged and never fail the
// checkpoint write. Returns true when transcript.jsonl was recorded.
func (s *GitStore) writeCompactTranscript(ctx context.Context, agentType types.AgentType, startLine int, transcriptBytes []byte, sessionPath string, entries map[string]object.TreeEntry) bool {
compactCtx, compactSpan := perf.Start(ctx, "write_compact_transcript")
defer compactSpan.End()

compacted, err := transcriptcompact.Compact(redact.AlreadyRedacted(transcriptBytes), transcriptcompact.MetadataFields{
Agent: compactAgentName(agentType),
CLIVersion: versioninfo.Version,
StartLine: startLine,
})
if err != nil {
compactSpan.RecordError(err)
logging.Warn(compactCtx, "compact transcript generation failed, skipping transcript.jsonl",
slog.String("agent", string(agentType)),
slog.String("error", err.Error()),
)
return false
}
if len(bytes.TrimSpace(compacted)) == 0 {
logging.Debug(compactCtx, "compact transcript empty, skipping transcript.jsonl",
slog.String("agent", string(agentType)),
)
return false
}
if len(compacted) > agent.MaxChunkSize {
logging.Warn(compactCtx, "compact transcript exceeds max blob size, skipping transcript.jsonl",
slog.String("agent", string(agentType)),
slog.Int("compact_bytes", len(compacted)),
)
return false
}

blobHash, err := CreateBlobFromContent(s.repo, compacted)
if err != nil {
compactSpan.RecordError(err)
logging.Warn(compactCtx, "failed to create compact transcript blob, skipping transcript.jsonl",
slog.String("error", err.Error()),
)
return false
}
compactPath := sessionPath + paths.CompactTranscriptFileName
entries[compactPath] = object.TreeEntry{
Name: compactPath,
Mode: filemode.Regular,
Hash: blobHash,
}
return true
}

// mergeFilesTouched combines two file lists, removing duplicates.
Expand Down Expand Up @@ -946,7 +1022,8 @@ type taskCheckpointData struct {
// ├── metadata.json # CheckpointSummary with sessions map
// ├── 0/ # First session
// │ ├── metadata.json # Session-specific metadata
// │ └── full.jsonl # Transcript
// │ ├── full.jsonl # Raw agent transcript
// │ └── transcript.jsonl # Compact transcript (referenced by metadata.json)
// ├── 1/ # Second session
// └── ...
func (s *GitStore) ReadCommitted(ctx context.Context, checkpointID id.CheckpointID) (*CheckpointSummary, error) {
Expand Down Expand Up @@ -1474,12 +1551,14 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti

// Find session index matching opts.SessionID
sessionIndex := -1
var sessionMeta *CommittedMetadata
for i := range len(checkpointSummary.Sessions) {
metaPath := fmt.Sprintf("%s%d/%s", basePath, i, paths.MetadataFileName)
if metaEntry, metaExists := entries[metaPath]; metaExists {
meta, metaErr := s.readMetadataFromBlob(metaEntry.Hash)
if metaErr == nil && meta.SessionID == opts.SessionID {
sessionIndex = i
sessionMeta = meta
break
}
}
Expand All @@ -1492,14 +1571,26 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti
slog.String("checkpoint_id", string(opts.CheckpointID)),
slog.Int("fallback_index", sessionIndex),
)
metaPath := fmt.Sprintf("%s%d/%s", basePath, sessionIndex, paths.MetadataFileName)
if metaEntry, metaExists := entries[metaPath]; metaExists {
sessionMeta, _ = s.readMetadataFromBlob(metaEntry.Hash) //nolint:errcheck // best-effort; nil meta means start 0
}
}

sessionPath := fmt.Sprintf("%s%d/", basePath, sessionIndex)

// Replace transcript (full replace, not append).
// Transcript is pre-redacted by the caller (enforced by RedactedBytes type).
if opts.Transcript.Len() > 0 {
if err := s.replaceTranscript(ctx, opts.Transcript, opts.Agent, opts.PrecomputedBlobs, sessionPath, entries); err != nil {
agentType := opts.Agent
startLine := 0
if sessionMeta != nil {
startLine = sessionMeta.GetTranscriptStart()
if agentType == "" {
agentType = sessionMeta.Agent
}
}
if err := s.replaceTranscript(ctx, opts.Transcript, agentType, startLine, opts.PrecomputedBlobs, sessionPath, entries); err != nil {

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Summary pointer not updated

Medium Severity

UpdateCommitted regenerates transcript.jsonl via replaceTranscript but never updates the checkpoint root CheckpointSummary sessions[].transcript path. If initial WriteCommitted pointed at full.jsonl because compaction failed or the transcript was not yet compactable, finalize can add a valid transcript.jsonl while metadata still points at full.jsonl.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit afd9428. Configure here.

return fmt.Errorf("failed to replace transcript: %w", err)
}
}
Expand Down Expand Up @@ -1574,14 +1665,19 @@ func (s *GitStore) replaceSkillEvents(skillEvents []agent.SkillEvent, sessionPat
return nil
}

// replaceTranscript writes the full transcript content, replacing any existing transcript.
// Also removes any chunk files from a previous write and updates the content hash.
// replaceTranscript writes the full transcript content, replacing any existing
// transcript, and regenerates the compact transcript.jsonl scoped at startLine
// (the checkpoint's transcript start). Also removes any chunk files from a
// previous write and updates the content hash.
//
// Short-circuits when the existing content_hash.txt already matches the new
// transcript's sha256 — in that case the chunk entries are preserved as-is and
// no chunking/zlib happens. Use precomputed (non-nil) to reuse blob hashes
// computed once across multiple checkpoints.
func (s *GitStore) replaceTranscript(ctx context.Context, transcript redact.RedactedBytes, agentType types.AgentType, precomputed *PrecomputedTranscriptBlobs, sessionPath string, entries map[string]object.TreeEntry) error {
// computed once across multiple checkpoints. The compact transcript cannot
// reuse precomputed blobs: each checkpoint in a turn shares the full
// transcript but has its own start offset, so the compact content differs per
// checkpoint.
func (s *GitStore) replaceTranscript(ctx context.Context, transcript redact.RedactedBytes, agentType types.AgentType, startLine int, precomputed *PrecomputedTranscriptBlobs, sessionPath string, entries map[string]object.TreeEntry) error {
// Ignore precompute if invariants are violated — fall back to fresh chunking.
if precomputed != nil && !precomputed.isUsable() {
precomputed = nil
Expand Down Expand Up @@ -1665,6 +1761,11 @@ func (s *GitStore) replaceTranscript(ctx context.Context, transcript redact.Reda
Hash: hashBlob,
}

// Regenerate the compact transcript from the new content. Best-effort: on
// generation failure the previous transcript.jsonl entry (if any) is left
// in place so the metadata.json transcript pointer never dangles.
s.writeCompactTranscript(ctx, agentType, startLine, transcript.Bytes(), sessionPath, entries)

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Codex sanitize skipped on finalize

Medium Severity

Initial committed writes run Codex transcripts through codex.SanitizePortableTranscript before compaction, but replaceTranscript passes raw bytes to writeCompactTranscript on finalize. Compact output after UpdateCommitted can diverge from the initial write path or fail where the initial write succeeded.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit afd9428. Configure here.


return nil
}

Expand Down
Loading
Loading