Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ The manual-commit strategy (`manual_commit*.go`) does not modify the active bran
- **Shadow branch migration** - if user does stash/pull/rebase (HEAD changes without commit), shadow branch is automatically moved to new base commit
- **Orphaned branch cleanup** - if a shadow branch exists without a corresponding session state file, it is automatically reset when a new session starts
- PrePush hook can push `entire/checkpoints/v1` branch alongside user pushes
- **OPF (OpenAI Privacy Filter) runs at pre-push, not post-commit**: when `redaction.openai_privacy_filter.enabled` is true, the PrePush hook re-redacts unpushed `entire/checkpoints/v1` commits with the OPF 8th layer, builds new commits carrying an `Entire-OPF-Applied: true` trailer, and atomically updates the local v1 ref before pushing. Per-commit condensation stays on the fast 7-layer pipeline. See `strategy/manual_commit_opf_rewrite.go` and `docs/security-and-privacy.md` for the full flow, including divergence detection, bootstrap caps, and CAS-on-conflict semantics.
- Safe to use on main/master since it never modifies commit history

#### Key Files
Expand All @@ -450,6 +451,7 @@ The manual-commit strategy (`manual_commit*.go`) does not modify the active bran
- `common.go` - Helpers for metadata extraction, tree building, rewind validation, `ListCheckpoints()`
- `session.go` - Session/checkpoint data structures
- `push_common.go` - PrePush logic for pushing `entire/checkpoints/v1` branch
- `manual_commit_opf_rewrite.go` - Pre-push OPF re-redaction: walks unpushed v1 commits, runs OPF over their blobs, rebuilds commits with `Entire-OPF-Applied: true` trailer, CAS-updates the local ref. Sentinel error types (use `errors.As`): `V1DivergedError`, `BootstrapTooLargeError`, `V1RefMovedError`, `OPFRuntimeFailedError`.
- `manual_commit.go` - Manual-commit strategy main implementation
- `manual_commit_types.go` - Type definitions: `SessionState`, `CheckpointInfo`, `CondenseResult`
- `manual_commit_session.go` - Session state management (load/save/list session states)
Expand Down
7 changes: 5 additions & 2 deletions cmd/entire/cli/checkpoint/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,9 @@ type WriteCommittedOptions struct {
// Must be pre-redacted (via redact.JSONLBytes or redact.AlreadyRedacted for trusted sources).
Transcript redact.RedactedBytes

// Prompts contains user prompts from the session
// Prompts contains the raw user prompts from the session. Run through
// redactedJoinedPrompts before persisting — the writer does this
// inside writeSessionToSubdirectory.
Prompts []string

// FilesTouched are files modified during the session
Expand Down Expand Up @@ -358,7 +360,8 @@ type UpdateCommittedOptions struct {
// Must be pre-redacted (via redact.JSONLBytes or redact.AlreadyRedacted for trusted sources).
Transcript redact.RedactedBytes

// Prompts contains all user prompts (replaces existing)
// Prompts contains the raw user prompts (replaces existing).
// See WriteCommittedOptions.Prompts.
Prompts []string

// Agent identifies the agent type (needed for transcript chunking)
Expand Down
43 changes: 41 additions & 2 deletions cmd/entire/cli/checkpoint/checkpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func TestCopyMetadataDir_SkipsSymlinks(t *testing.T) {
store := NewGitStore(repo)
entries := make(map[string]object.TreeEntry)

err = store.copyMetadataDir(metadataDir, "checkpoint/", entries)
err = store.copyMetadataDir(context.Background(), metadataDir, "checkpoint/", entries)
if err != nil {
t.Fatalf("copyMetadataDir failed: %v", err)
}
Expand Down Expand Up @@ -3409,7 +3409,7 @@ func TestCopyMetadataDir_RedactsSecrets(t *testing.T) {
store := NewGitStore(repo)
entries := make(map[string]object.TreeEntry)

if err := store.copyMetadataDir(metadataDir, "cp/", entries); err != nil {
if err := store.copyMetadataDir(context.Background(), metadataDir, "cp/", entries); err != nil {
t.Fatalf("copyMetadataDir() error = %v", err)
}

Expand Down Expand Up @@ -4410,6 +4410,45 @@ func TestCheckpointSummary_HasReview(t *testing.T) {
}
}

// TestRedactBlobBytes_JSONMetadata pins the .json branch of RedactBlobBytes:
// checkpoint metadata files (metadata.json) carry free-form fields like
// Summary.Intent and ReviewPrompt that previously bypassed redaction because
// the dispatcher only matched .jsonl. The PR 1236 fix extended the JSON-aware
// branch to .json. We assert via a low-entropy AWS-key shaped secret (catches
// the 7-layer pipeline) so the test stays deterministic without the OPF binary.
func TestRedactBlobBytes_JSONMetadata(t *testing.T) {
t.Parallel()

meta := CommittedMetadata{
Kind: "agent_review",
ReviewPrompt: "credential leak: key=AKIAYRWQG5EJLPZLBYNP",
Summary: &Summary{
Intent: "leak: key=AKIAYRWQG5EJLPZLBYNP",
},
}
b, err := json.Marshal(meta)
if err != nil {
t.Fatalf("marshal: %v", err)
}

got := RedactBlobBytes(context.Background(), b, "metadata.json", false)
if strings.Contains(string(got), "AKIAYRWQG5EJLPZLBYNP") {
t.Errorf("expected AWS key redacted in metadata.json blob, got %s", string(got))
}
if !strings.Contains(string(got), "REDACTED") {
t.Errorf("expected REDACTED placeholder in metadata.json blob, got %s", string(got))
}
// JSON structure must survive — Kind is not redactable content, so it
// should round-trip through the JSON-aware redactor.
var roundTripped map[string]any
if err := json.Unmarshal(got, &roundTripped); err != nil {
t.Errorf("redacted .json blob must remain valid JSON, got parse err %v (content: %s)", err, string(got))
}
if roundTripped["kind"] != "agent_review" {
t.Errorf(`expected "kind":"agent_review" preserved after redaction, got %v`, roundTripped["kind"])
}
}

// TestCheckpointSummary_HasInvestigation pins the JSON wire format for the
// HasInvestigation umbrella flag on CheckpointSummary. Mirrors the
// HasReview test: callers depend on the on-disk shape, so this asserts on
Expand Down
79 changes: 60 additions & 19 deletions cmd/entire/cli/checkpoint/committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ func (s *GitStore) writeStandardCheckpointEntries(ctx context.Context, opts Writ

// Copy additional metadata files from directory if specified (to session subdirectory)
if opts.MetadataDir != "" {
if err := s.copyMetadataDir(opts.MetadataDir, sessionPath, entries); err != nil {
if err := s.copyMetadataDir(ctx, opts.MetadataDir, sessionPath, entries); err != nil {
return fmt.Errorf("failed to copy metadata directory: %w", err)
}
}
Expand Down Expand Up @@ -418,9 +418,10 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom
filePaths.ContentHash = "/" + sessionPath + paths.ContentHashFileName
}

// Write prompts
// Write prompts via the 7-layer pipeline. OPF runs only in the
// pre-push rewrite path (manual_commit_opf_rewrite.go).
if len(opts.Prompts) > 0 {
promptContent := redact.String(JoinPrompts(opts.Prompts))
promptContent := redactedJoinedPrompts(opts.Prompts)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return filePaths, err
Expand Down Expand Up @@ -1512,9 +1513,9 @@ func (s *GitStore) UpdateCommitted(ctx context.Context, opts UpdateCommittedOpti
}
}

// Replace prompts (apply redaction as safety net)
// Replace prompts with 7-layer-redacted content.
if len(opts.Prompts) > 0 {
promptContent := redact.String(JoinPrompts(opts.Prompts))
promptContent := redactedJoinedPrompts(opts.Prompts)
blobHash, err := CreateBlobFromContent(s.repo, []byte(promptContent))
if err != nil {
return fmt.Errorf("failed to create prompt blob: %w", err)
Expand Down Expand Up @@ -1793,7 +1794,7 @@ func CreateBlobFromContent(repo *git.Repository, content []byte) (plumbing.Hash,

// copyMetadataDir copies all files from a directory to the checkpoint path.
// Used to include additional metadata files like task checkpoints, subagent transcripts, etc.
func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[string]object.TreeEntry) error {
func (s *GitStore) copyMetadataDir(ctx context.Context, metadataDir, basePath string, entries map[string]object.TreeEntry) error {
err := filepath.Walk(metadataDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
Expand Down Expand Up @@ -1832,7 +1833,13 @@ func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[str
return fmt.Errorf("path traversal detected: %s", relPath)
}

// Create blob from file with secrets redaction
// Create blob from file with 7-layer secrets redaction.
// Post-commit emits 7-layer-only blobs; the pre-push rewrite
// (strategy/manual_commit_opf_rewrite.go) walks the resulting
// tree, re-redacts these blobs with OPF when enabled, and
// rewrites entire/checkpoints/v1 into 8-layer commits before
// they leave the local machine.
_ = ctx // ctx not needed by the 7-layer path; kept on caller signature for future use
blobHash, mode, err := createRedactedBlobFromFile(s.repo, path, relPath)
if err != nil {
return fmt.Errorf("failed to create blob for %s: %w", path, err)
Expand All @@ -1854,8 +1861,13 @@ func (s *GitStore) copyMetadataDir(metadataDir, basePath string, entries map[str
return nil
}

// createRedactedBlobFromFile reads a file, applies secrets redaction, and creates a git blob.
// JSONL files get JSONL-aware redaction; all other files get plain string redaction.
// createRedactedBlobFromFile reads a file, applies the 7-layer redaction
// pipeline, and creates a git blob. Used by committed-checkpoint writes
// at post-commit time. The OpenAI Privacy Filter is intentionally NOT
// run here — OPF lives in the pre-push rewrite path
// (strategy/manual_commit_opf_rewrite.go), which re-redacts the 7-layer
// blobs into 8-layer commits before they leave the local machine.
// JSONL files get JSONL-aware redaction; all other files get plain byte redaction.
func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string) (plumbing.Hash, filemode.FileMode, error) {
info, err := os.Stat(filePath)
if err != nil {
Expand Down Expand Up @@ -1883,16 +1895,7 @@ func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string)
return hash, mode, nil
}

if strings.HasSuffix(treePath, ".jsonl") {
redacted, jsonlErr := redact.JSONLBytes(content)
if jsonlErr != nil {
content = redact.Bytes(content)
} else {
content = redacted.Bytes()
}
} else {
content = redact.Bytes(content)
}
content = RedactBlobBytes(context.Background(), content, treePath, false)

hash, err := CreateBlobFromContent(repo, content)
if err != nil {
Expand All @@ -1901,6 +1904,44 @@ func createRedactedBlobFromFile(repo *git.Repository, filePath, treePath string)
return hash, mode, nil
}

// RedactBlobBytes redacts a single blob's content given its tree path.
// JSON-shaped files (.jsonl or .json) get JSON-aware redaction (falling
// back to plain bytes on parse failure so regex/credential layers
// still apply); other files get plain byte redaction. When
// usePrivacyFilter is true the full 8-layer pipeline (including OPF)
// runs; otherwise the 7-layer pipeline.
//
// .json is handled alongside .jsonl because checkpoint metadata files
// (metadata.json, per-session metadata.json) carry free-form fields
// like Summary.Intent / Summary.Outcome / ReviewPrompt that can
// contain PII the regex layers miss. The JSON-aware redactor extracts
// string leaves and applies OPF only to those, preserving the JSON
// structure.
//
// Post-commit condensation uses false (fast path). The pre-push rewrite
// (strategy/manual_commit_opf_rewrite.go) uses true.
func RedactBlobBytes(ctx context.Context, content []byte, treePath string, usePrivacyFilter bool) []byte {
if strings.HasSuffix(treePath, ".jsonl") || strings.HasSuffix(treePath, ".json") {
var (
redacted redact.RedactedBytes
err error
)
if usePrivacyFilter {
redacted, err = redact.JSONLBytesWithPrivacyFilter(ctx, content)
} else {
redacted, err = redact.JSONLBytes(content)
}
if err == nil {
return redacted.Bytes()
}
// JSONL parse failed — fall through to plain bytes.
}
if usePrivacyFilter {
return redact.BytesWithPrivacyFilter(ctx, content)
}
return redact.Bytes(content)
}

// GetGitAuthorFromRepo retrieves the git user.name and user.email,
// checking both the repository-local config and the global ~/.gitconfig.
func GetGitAuthorFromRepo(repo *git.Repository) (name, email string) {
Expand Down
70 changes: 70 additions & 0 deletions cmd/entire/cli/checkpoint/committed_opf_trailer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package checkpoint

import (
"context"
"os"
"path/filepath"
"testing"

"github.com/entireio/cli/cmd/entire/cli/checkpoint/id"
"github.com/entireio/cli/cmd/entire/cli/testutil"
"github.com/entireio/cli/cmd/entire/cli/trailers"
"github.com/entireio/cli/redact"
"github.com/go-git/go-git/v6"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/object"
"github.com/stretchr/testify/require"
)

// TestWriteCommitted_DoesNotEmitOPFAppliedTrailer is the regression guard
// for the architectural promise: standard post-commit condensation writes
// 7-layer-only blobs and MUST NOT mark them with the Entire-OPF-Applied
// trailer. The trailer is emitted exclusively by the pre-push rewrite
// path; if a future change accidentally added it to the standard writer,
// the pre-push rewrite would skip those commits (HasOPFApplied true →
// reparent-only, no actual OPF run) and ship 7-layer content as if it
// were 8-layer. This test pins down that contract.
func TestWriteCommitted_DoesNotEmitOPFAppliedTrailer(t *testing.T) {
t.Parallel()

tempDir := t.TempDir()
testutil.InitRepo(t, tempDir)
repo, err := git.PlainOpen(tempDir)
require.NoError(t, err)

wt, err := repo.Worktree()
require.NoError(t, err)
readmeFile := filepath.Join(tempDir, "README.md")
require.NoError(t, os.WriteFile(readmeFile, []byte("# Test"), 0o644))
_, err = wt.Add("README.md")
require.NoError(t, err)
_, err = wt.Commit("Initial commit", &git.CommitOptions{
Author: &object.Signature{Name: "Test", Email: "test@test.com"},
})
require.NoError(t, err)

store := NewGitStore(repo)
cpID := id.MustCheckpointID("a1b2c3d4e5f6")

err = store.WriteCommitted(context.Background(), WriteCommittedOptions{
CheckpointID: cpID,
SessionID: "regression-no-opf-trailer",
Strategy: "manual-commit",
Transcript: redact.AlreadyRedacted([]byte(`{"role":"user","content":"hello"}` + "\n")),
AuthorName: "Test",
AuthorEmail: "test@test.com",
})
require.NoError(t, err)

// Read the latest commit message on entire/checkpoints/v1 and assert
// HasOPFApplied is false. We resolve via the ref then walk back the
// single commit the writer just produced.
ref, err := repo.Reference(plumbing.NewBranchReferenceName("entire/checkpoints/v1"), true)
require.NoError(t, err, "writer should have created entire/checkpoints/v1")
commit, err := repo.CommitObject(ref.Hash())
require.NoError(t, err)

if trailers.HasOPFApplied(commit.Message) {
t.Errorf("standard WriteCommitted emitted Entire-OPF-Applied trailer; commit message:\n%s", commit.Message)
}
}
3 changes: 2 additions & 1 deletion cmd/entire/cli/checkpoint/committed_reader_resolve.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ type CommittedReaderOptions struct {
BlobFetcher BlobFetchFunc
}

func NewCommittedReader(ctx context.Context, repo *git.Repository, opts CommittedReaderOptions) (CommittedStore, error) { //nolint:ireturn // Factory selects between v1 and dual reader implementations.
//nolint:ireturn // Factory selects between v1 and dual reader implementations.
func NewCommittedReader(ctx context.Context, repo *git.Repository, opts CommittedReaderOptions) (CommittedStore, error) {
if repo == nil {
return nil, errors.New("git repository is required")
}
Expand Down
13 changes: 12 additions & 1 deletion cmd/entire/cli/checkpoint/prompts.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
package checkpoint

import "strings"
import (
"strings"

"github.com/entireio/cli/redact"
)

// PromptSeparator is the canonical separator used in prompt.txt when multiple
// prompts are stored in a single file.
Expand All @@ -23,3 +27,10 @@ func SplitPromptContent(content string) []string {
}
return prompts
}

// redactedJoinedPrompts joins prompts and runs the 7-layer redaction
// pipeline. OPF runs exclusively in the pre-push rewrite (not here),
// so the writer's hot path stays predictable.
func redactedJoinedPrompts(prompts []string) string {
return redact.String(strings.Join(prompts, PromptSeparator))
}
12 changes: 10 additions & 2 deletions cmd/entire/cli/checkpoint/prompts_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ func TestJoinAndSplitPrompts_RoundTrip(t *testing.T) {
"first line\nwith newline",
"second prompt",
}

joined := JoinPrompts(original)
split := SplitPromptContent(joined)

Expand All @@ -24,6 +23,15 @@ func TestJoinAndSplitPrompts_RoundTrip(t *testing.T) {

func TestSplitPromptContent_EmptyContent(t *testing.T) {
t.Parallel()

assert.Nil(t, SplitPromptContent(""))
}

// TestRedactedJoinedPrompts_AppliesSafetyNet verifies the helper joins
// prompts with the canonical separator and runs them through the 7-layer
// pipeline. OPF runs only in the pre-push rewrite path, never here.
func TestRedactedJoinedPrompts_AppliesSafetyNet(t *testing.T) {
t.Parallel()
got := redactedJoinedPrompts([]string{"hello", "world"})
assert.NotEmpty(t, got)
assert.Contains(t, got, PromptSeparator)
}
Loading
Loading