Skip to content
2 changes: 2 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ The manual-commit strategy (`manual_commit*.go`) does not modify the active bran
- **Shadow branch migration** - if user does stash/pull/rebase (HEAD changes without commit), shadow branch is automatically moved to new base commit
- **Orphaned branch cleanup** - if a shadow branch exists without a corresponding session state file, it is automatically reset when a new session starts
- PrePush hook can push `entire/checkpoints/v1` branch alongside user pushes
- **OPF (OpenAI Privacy Filter) runs at pre-push, not post-commit**: when `redaction.openai_privacy_filter.enabled` is true, the PrePush hook re-redacts unpushed `entire/checkpoints/v1` commits with the OPF 8th layer, builds new commits carrying an `Entire-OPF-Applied: true` trailer, and atomically updates the local v1 ref before pushing. Per-commit condensation stays on the fast 7-layer pipeline. See `strategy/manual_commit_opf_rewrite.go` and `docs/security-and-privacy.md` for the full flow, including divergence detection, bootstrap caps, and CAS-on-conflict semantics.
- Safe to use on main/master since it never modifies commit history

#### Key Files
Expand All @@ -450,6 +451,7 @@ The manual-commit strategy (`manual_commit*.go`) does not modify the active bran
- `common.go` - Helpers for metadata extraction, tree building, rewind validation, `ListCheckpoints()`
- `session.go` - Session/checkpoint data structures
- `push_common.go` - PrePush logic for pushing `entire/checkpoints/v1` branch
- `manual_commit_opf_rewrite.go` - Pre-push OPF re-redaction: walks unpushed v1 commits, runs OPF over their blobs, rebuilds commits with `Entire-OPF-Applied: true` trailer, CAS-updates the local ref. Sentinel error types (use `errors.As`): `V1DivergedError`, `BootstrapTooLargeError`, `V1RefMovedError`, `OPFRuntimeFailedError`.
- `manual_commit.go` - Manual-commit strategy main implementation
- `manual_commit_types.go` - Type definitions: `SessionState`, `CheckpointInfo`, `CondenseResult`
- `manual_commit_session.go` - Session state management (load/save/list session states)
Expand Down
27 changes: 5 additions & 22 deletions cmd/entire/cli/checkpoint/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,22 +222,11 @@ type WriteCommittedOptions struct {
// Must be pre-redacted (via redact.JSONLBytes or redact.AlreadyRedacted for trusted sources).
Transcript redact.RedactedBytes

// Prompts contains the raw user prompts from the session. These are NOT
// guaranteed to be redacted on entry — the writer always emits the typed
// PromptsRedacted blob below (running the safety-net pipeline if it is
// the zero value). Do not read Prompts independently for persistence; go
// through redactJoinedPrompts so the redaction guarantee is preserved.
// Prompts contains the raw user prompts from the session. Run through
// redactedJoinedPrompts before persisting — the writer does this
// inside writeSessionToSubdirectory.
Prompts []string

// PromptsRedacted, when set, is the pre-redacted joined-prompts blob the
// writer uses verbatim instead of re-running the safety-net pipeline.
// Used by finalizeAllTurnCheckpoints to avoid running the OpenAI
// Privacy Filter once per checkpoint over identical joined-prompt
// strings. The typed wrapper makes the "this content was produced by
// the redaction pipeline" claim a compile-time invariant — callers
// cannot assign an arbitrary string.
PromptsRedacted redact.RedactedJoinedPrompts

// FilesTouched are files modified during the session
FilesTouched []string

Expand Down Expand Up @@ -366,16 +355,10 @@ type UpdateCommittedOptions struct {
// Must be pre-redacted (via redact.JSONLBytes or redact.AlreadyRedacted for trusted sources).
Transcript redact.RedactedBytes

// Prompts contains the raw user prompts (replaces existing). NOT
// guaranteed to be redacted on entry — see WriteCommittedOptions.Prompts
// for the relationship to PromptsRedacted.
// Prompts contains the raw user prompts (replaces existing).
// See WriteCommittedOptions.Prompts.
Prompts []string

// PromptsRedacted, when set, is the pre-redacted joined-prompts blob
// the writer uses verbatim instead of re-running the safety-net
// pipeline. See WriteCommittedOptions.PromptsRedacted for rationale.
PromptsRedacted redact.RedactedJoinedPrompts

// Agent identifies the agent type (needed for transcript chunking)
Agent types.AgentType

Expand Down
39 changes: 39 additions & 0 deletions cmd/entire/cli/checkpoint/checkpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4362,3 +4362,42 @@ func TestCheckpointSummary_HasReview(t *testing.T) {
t.Errorf(`expected zero-value summary to omit "has_review" key, got %s`, string(bZero))
}
}

// TestRedactBlobBytes_JSONMetadata pins the .json branch of RedactBlobBytes:
// checkpoint metadata files (metadata.json) carry free-form fields like
// Summary.Intent and ReviewPrompt that previously bypassed redaction because
// the dispatcher only matched .jsonl. The PR 1236 fix extended the JSON-aware
// branch to .json. We assert via a low-entropy AWS-key shaped secret (catches
// the 7-layer pipeline) so the test stays deterministic without the OPF binary.
func TestRedactBlobBytes_JSONMetadata(t *testing.T) {
t.Parallel()

meta := CommittedMetadata{
Kind: "agent_review",
ReviewPrompt: "credential leak: key=AKIAYRWQG5EJLPZLBYNP",
Summary: &Summary{
Intent: "leak: key=AKIAYRWQG5EJLPZLBYNP",
},
}
b, err := json.Marshal(meta)
if err != nil {
t.Fatalf("marshal: %v", err)
}

got := RedactBlobBytes(context.Background(), b, "metadata.json", false)
if strings.Contains(string(got), "AKIAYRWQG5EJLPZLBYNP") {
t.Errorf("expected AWS key redacted in metadata.json blob, got %s", string(got))
}
if !strings.Contains(string(got), "REDACTED") {
t.Errorf("expected REDACTED placeholder in metadata.json blob, got %s", string(got))
}
// JSON structure must survive — Kind is not redactable content, so it
// should round-trip through the JSON-aware redactor.
var roundTripped map[string]any
if err := json.Unmarshal(got, &roundTripped); err != nil {
t.Errorf("redacted .json blob must remain valid JSON, got parse err %v (content: %s)", err, string(got))
}
if roundTripped["kind"] != "agent_review" {
t.Errorf(`expected "kind":"agent_review" preserved after redaction, got %v`, roundTripped["kind"])
}
}
77 changes: 38 additions & 39 deletions cmd/entire/cli/checkpoint/committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,12 +417,10 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom
filePaths.ContentHash = "/" + sessionPath + paths.ContentHashFileName
}

// Write prompts. Uses the full 8-layer pipeline (including OPF) via
// redactedJoinedPrompts; the helper unwraps opts.PromptsRedacted when
// set so callers (finalizeAllTurnCheckpoints) that pre-redact once
// across multiple checkpoint writes don't pay OPF per checkpoint.
// Write prompts via the 7-layer pipeline. OPF runs only in the
// pre-push rewrite path (manual_commit_opf_rewrite.go).
if len(opts.Prompts) > 0 {
promptContent := redactedJoinedPrompts(ctx, opts.Prompts, opts.PromptsRedacted)
promptContent := redactedJoinedPrompts(opts.Prompts)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return filePaths, err
Expand Down Expand Up @@ -1403,10 +1401,9 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti
}
}

// Replace prompts (apply redaction as safety net; unwraps
// opts.PromptsRedacted when set).
// Replace prompts with 7-layer-redacted content.
if len(opts.Prompts) > 0 {
promptContent := redactedJoinedPrompts(ctx, opts.Prompts, opts.PromptsRedacted)
promptContent := redactedJoinedPrompts(opts.Prompts)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return fmt.Errorf("failed to create prompt blob: %w", err)
Expand Down Expand Up @@ -1725,11 +1722,14 @@ func (s *GitStore) copyMetadataDir(ctx context.Context, metadataDir, basePath st
return fmt.Errorf("path traversal detected: %s", relPath)
}

// Create blob from file with secrets redaction
// Committed-checkpoint write — run the full 8-layer pipeline
// including OPF. The per-turn temp-write path stays on plain
// redactors via the sibling createRedactedBlobFromFile.
blobHash, mode, err := createRedactedBlobFromFileWithPrivacyFilter(ctx, s.repo, path, relPath)
// Create blob from file with 7-layer secrets redaction.
// Post-commit emits 7-layer-only blobs; the pre-push rewrite
// (strategy/manual_commit_opf_rewrite.go) walks the resulting
// tree, re-redacts these blobs with OPF when enabled, and
// rewrites entire/checkpoints/v1 into 8-layer commits before
// they leave the local machine.
_ = ctx // ctx not needed by the 7-layer path; kept on caller signature for future use
blobHash, mode, err := createRedactedBlobFromFile(s.repo, path, relPath)
if err != nil {
return fmt.Errorf("failed to create blob for %s: %w", path, err)
}
Expand All @@ -1751,22 +1751,13 @@ func (s *GitStore) copyMetadataDir(ctx context.Context, metadataDir, basePath st
}

// createRedactedBlobFromFile reads a file, applies the 7-layer redaction
// pipeline, and creates a git blob. Used by per-turn temporary-checkpoint
// writes — the OpenAI Privacy Filter is intentionally NOT run here to
// keep per-turn latency inside the agent loop's budget.
// pipeline, and creates a git blob. Used by committed-checkpoint writes
// at post-commit time. The OpenAI Privacy Filter is intentionally NOT
// run here — OPF lives in the pre-push rewrite path
// (strategy/manual_commit_opf_rewrite.go), which re-redacts the 7-layer
// blobs into 8-layer commits before they leave the local machine.
// JSONL files get JSONL-aware redaction; all other files get plain byte redaction.
func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string) (plumbing.Hash, filemode.FileMode, error) {
return createRedactedBlobFromFileImpl(context.Background(), repo, filePath, treePath, false)
}

// createRedactedBlobFromFileWithPrivacyFilter reads a file, applies the full
// 8-layer pipeline (including the OpenAI Privacy Filter), and creates a git
// blob. Used by committed-checkpoint writes — slower but more thorough.
func createRedactedBlobFromFileWithPrivacyFilter(ctx context.Context, repo *git.Repository, filePath, treePath string) (plumbing.Hash, filemode.FileMode, error) {
return createRedactedBlobFromFileImpl(ctx, repo, filePath, treePath, true)
}

func createRedactedBlobFromFileImpl(ctx context.Context, repo *git.Repository, filePath, treePath string, usePrivacyFilter bool) (plumbing.Hash, filemode.FileMode, error) {
info, err := os.Stat(filePath)
if err != nil {
return plumbing.ZeroHash, 0, fmt.Errorf("failed to stat file: %w", err)
Expand All @@ -1793,7 +1784,7 @@ func createRedactedBlobFromFileImpl(ctx context.Context, repo *git.Repository, f
return hash, mode, nil
}

content = redactBytesForBlob(ctx, content, treePath, usePrivacyFilter)
content = RedactBlobBytes(context.Background(), content, treePath, false)

hash, err := CreateBlobFromContent(repo, content)
if err != nil {
Expand All @@ -1802,15 +1793,24 @@ func createRedactedBlobFromFileImpl(ctx context.Context, repo *git.Repository, f
return hash, mode, nil
}

// redactBytesForBlob applies the appropriate redaction pipeline to file
// content for a checkpoint blob. JSONL files get JSONL-aware redaction
// (falling back to plain byte redaction on parse failure so the regex
// layers still apply); other files get plain byte redaction.
// usePrivacyFilter selects the lighter 7-layer pipeline (per-turn temp
// writes) versus the full 8-layer pipeline including OPF (committed
// writes).
func redactBytesForBlob(ctx context.Context, content []byte, treePath string, usePrivacyFilter bool) []byte {
if strings.HasSuffix(treePath, ".jsonl") {
// RedactBlobBytes redacts a single blob's content given its tree path.
// JSON-shaped files (.jsonl or .json) get JSON-aware redaction (falling
// back to plain bytes on parse failure so regex/credential layers
// still apply); other files get plain byte redaction. When
// usePrivacyFilter is true the full 8-layer pipeline (including OPF)
// runs; otherwise the 7-layer pipeline.
//
// .json is handled alongside .jsonl because checkpoint metadata files
// (metadata.json, per-session metadata.json) carry free-form fields
// like Summary.Intent / Summary.Outcome / ReviewPrompt that can
// contain PII the regex layers miss. The JSON-aware redactor extracts
// string leaves and applies OPF only to those, preserving the JSON
// structure.
//
// Post-commit condensation uses false (fast path). The pre-push rewrite
// (strategy/manual_commit_opf_rewrite.go) uses true.
func RedactBlobBytes(ctx context.Context, content []byte, treePath string, usePrivacyFilter bool) []byte {
if strings.HasSuffix(treePath, ".jsonl") || strings.HasSuffix(treePath, ".json") {
var (
redacted redact.RedactedBytes
err error
Expand All @@ -1823,8 +1823,7 @@ func redactBytesForBlob(ctx context.Context, content []byte, treePath string, us
if err == nil {
return redacted.Bytes()
}
// JSONL parse failed — fall through so regex/credential layers
// still apply via the plain byte path.
// JSONL parse failed — fall through to plain bytes.
}
if usePrivacyFilter {
return redact.BytesWithPrivacyFilter(ctx, content)
Expand Down
70 changes: 70 additions & 0 deletions cmd/entire/cli/checkpoint/committed_opf_trailer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package checkpoint

import (
"context"
"os"
"path/filepath"
"testing"

"github.com/entireio/cli/cmd/entire/cli/checkpoint/id"
"github.com/entireio/cli/cmd/entire/cli/testutil"
"github.com/entireio/cli/cmd/entire/cli/trailers"
"github.com/entireio/cli/redact"
"github.com/go-git/go-git/v6"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/object"
"github.com/stretchr/testify/require"
)

// TestWriteCommitted_DoesNotEmitOPFAppliedTrailer is the regression guard
// for the architectural promise: standard post-commit condensation writes
// 7-layer-only blobs and MUST NOT mark them with the Entire-OPF-Applied
// trailer. The trailer is emitted exclusively by the pre-push rewrite
// path; if a future change accidentally added it to the standard writer,
// the pre-push rewrite would skip those commits (HasOPFApplied true →
// reparent-only, no actual OPF run) and ship 7-layer content as if it
// were 8-layer. This test pins down that contract.
func TestWriteCommitted_DoesNotEmitOPFAppliedTrailer(t *testing.T) {
t.Parallel()

tempDir := t.TempDir()
testutil.InitRepo(t, tempDir)
repo, err := git.PlainOpen(tempDir)
require.NoError(t, err)

wt, err := repo.Worktree()
require.NoError(t, err)
readmeFile := filepath.Join(tempDir, "README.md")
require.NoError(t, os.WriteFile(readmeFile, []byte("# Test"), 0o644))
_, err = wt.Add("README.md")
require.NoError(t, err)
_, err = wt.Commit("Initial commit", &git.CommitOptions{
Author: &object.Signature{Name: "Test", Email: "test@test.com"},
})
require.NoError(t, err)

store := NewGitStore(repo)
cpID := id.MustCheckpointID("a1b2c3d4e5f6")

err = store.WriteCommitted(context.Background(), WriteCommittedOptions{
CheckpointID: cpID,
SessionID: "regression-no-opf-trailer",
Strategy: "manual-commit",
Transcript: redact.AlreadyRedacted([]byte(`{"role":"user","content":"hello"}` + "\n")),
AuthorName: "Test",
AuthorEmail: "test@test.com",
})
require.NoError(t, err)

// Read the latest commit message on entire/checkpoints/v1 and assert
// HasOPFApplied is false. We resolve via the ref then walk back the
// single commit the writer just produced.
ref, err := repo.Reference(plumbing.NewBranchReferenceName("entire/checkpoints/v1"), true)
require.NoError(t, err, "writer should have created entire/checkpoints/v1")
commit, err := repo.CommitObject(ref.Hash())
require.NoError(t, err)

if trailers.HasOPFApplied(commit.Message) {
t.Errorf("standard WriteCommitted emitted Entire-OPF-Applied trailer; commit message:\n%s", commit.Message)
}
}
18 changes: 5 additions & 13 deletions cmd/entire/cli/checkpoint/prompts.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package checkpoint

import (
"context"
"strings"

"github.com/entireio/cli/redact"
Expand Down Expand Up @@ -29,16 +28,9 @@ func SplitPromptContent(content string) []string {
return prompts
}

// redactedJoinedPrompts returns the redacted prompt-blob content for the
// supplied prompts. When preRedacted is set it is unwrapped verbatim;
// otherwise the prompts are joined and run through the full 8-layer
// pipeline as a safety net. Callers that share the same prompts across
// multiple checkpoint writes (finalizeAllTurnCheckpoints) should compute
// the redacted blob once via redact.JoinedPrompts and pass it through to
// avoid running OPF repeatedly over identical input.
func redactedJoinedPrompts(ctx context.Context, prompts []string, preRedacted redact.RedactedJoinedPrompts) string {
if preRedacted.IsSet() {
return preRedacted.String()
}
return redact.JoinedPrompts(ctx, prompts, PromptSeparator).String()
// redactedJoinedPrompts joins prompts and runs the 7-layer redaction
// pipeline. OPF runs exclusively in the pre-push rewrite (not here),
// so the writer's hot path stays predictable.
func redactedJoinedPrompts(prompts []string) string {
return redact.String(strings.Join(prompts, PromptSeparator))
}
36 changes: 7 additions & 29 deletions cmd/entire/cli/checkpoint/prompts_test.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
package checkpoint

import (
"context"
"testing"

"github.com/entireio/cli/redact"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
Expand All @@ -16,7 +14,6 @@ func TestJoinAndSplitPrompts_RoundTrip(t *testing.T) {
"first line\nwith newline",
"second prompt",
}

joined := JoinPrompts(original)
split := SplitPromptContent(joined)

Expand All @@ -26,34 +23,15 @@ func TestJoinAndSplitPrompts_RoundTrip(t *testing.T) {

func TestSplitPromptContent_EmptyContent(t *testing.T) {
t.Parallel()

assert.Nil(t, SplitPromptContent(""))
}

// TestRedactedJoinedPrompts_PreRedactedIsTrustedVerbatim verifies that when
// the caller supplies a set RedactedJoinedPrompts the helper unwraps it
// untouched and never re-invokes the redaction pipeline. The pre-redacted
// path is what finalizeAllTurnCheckpoints relies on to avoid running OPF
// once per checkpoint over identical joined-prompt strings.
func TestRedactedJoinedPrompts_PreRedactedIsTrustedVerbatim(t *testing.T) {
// TestRedactedJoinedPrompts_AppliesSafetyNet verifies the helper joins
// prompts with the canonical separator and runs them through the 7-layer
// pipeline. OPF runs only in the pre-push rewrite path, never here.
func TestRedactedJoinedPrompts_AppliesSafetyNet(t *testing.T) {
t.Parallel()

const preRedacted = "[REDACTED_PERSON] asked about [REDACTED_EMAIL]"
got := redactedJoinedPrompts(
context.Background(),
[]string{"raw prompt text"},
redact.AlreadyRedactedJoinedPrompts(preRedacted),
)
assert.Equal(t, preRedacted, got, "preRedacted should pass through verbatim")
}

// TestRedactedJoinedPrompts_ZeroValueFallsBackToRedaction verifies that
// when the typed preRedacted is the zero value the helper joins the
// prompts and runs the full pipeline as a safety net.
func TestRedactedJoinedPrompts_ZeroValueFallsBackToRedaction(t *testing.T) {
t.Parallel()

got := redactedJoinedPrompts(context.Background(), []string{"hello", "world"}, redact.RedactedJoinedPrompts{})
assert.NotEmpty(t, got, "zero-value preRedacted should fall back to running the redaction pipeline")
assert.Contains(t, got, PromptSeparator, "fallback output should preserve the prompt separator")
got := redactedJoinedPrompts([]string{"hello", "world"})
assert.NotEmpty(t, got)
assert.Contains(t, got, PromptSeparator)
}
Loading
Loading