diff --git a/cmd/bbox-init/main.go b/cmd/bbox-init/main.go index fc90aa1..be72f47 100644 --- a/cmd/bbox-init/main.go +++ b/cmd/bbox-init/main.go @@ -86,6 +86,10 @@ func main() { "error", err) } + if err := mountExtras(logger); err != nil { + logger.Warn("failed to mount extras", "error", err) + } + if err := harden.ApplySeccomp(); err != nil { logger.Error("seccomp filter failed", "error", err) halt() diff --git a/cmd/bbox-init/mounts.go b/cmd/bbox-init/mounts.go new file mode 100644 index 0000000..b33dcbe --- /dev/null +++ b/cmd/bbox-init/mounts.go @@ -0,0 +1,100 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +//go:build linux + +package main + +import ( + "encoding/json" + "errors" + "fmt" + "log/slog" + "os" + "syscall" + "time" +) + +// guestMountEntry matches the JSON written by InjectMountConfig on the host. +type guestMountEntry struct { + Tag string `json:"tag"` + GuestPath string `json:"guest_path"` + ReadOnly bool `json:"read_only"` +} + +// mountConfigPath is the guest path where the host writes extra mount config. +const mountConfigPath = "/etc/broodbox-mounts.json" + +// mountExtras reads /etc/broodbox-mounts.json and mounts each virtiofs share. +// If the config file does not exist, it returns nil (no extra mounts needed). +func mountExtras(logger *slog.Logger) error { + data, err := os.ReadFile(mountConfigPath) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil + } + return fmt.Errorf("reading mount config: %w", err) + } + + var entries []guestMountEntry + if err := json.Unmarshal(data, &entries); err != nil { + return fmt.Errorf("parsing mount config: %w", err) + } + + for _, entry := range entries { + if err := mountOne(logger, entry); err != nil { + return err + } + } + + return nil +} + +// mountRetries is the number of attempts for each virtiofs mount. +const mountRetries = 5 + +// mountRetrySleep is the delay between mount retries. +const mountRetrySleep = 500 * time.Millisecond + +// sandboxUID and sandboxGID are the UID/GID of the sandbox user in the guest. +const ( + sandboxUID = 1000 + sandboxGID = 1000 +) + +func mountOne(logger *slog.Logger, entry guestMountEntry) error { + if err := os.MkdirAll(entry.GuestPath, 0o755); err != nil { + return fmt.Errorf("creating mount point %s: %w", entry.GuestPath, err) + } + + flags := uintptr(syscall.MS_NOSUID | syscall.MS_NODEV) + if entry.ReadOnly { + flags |= syscall.MS_RDONLY + } + + var mountErr error + for attempt := 1; attempt <= mountRetries; attempt++ { + mountErr = syscall.Mount(entry.Tag, entry.GuestPath, "virtiofs", flags, "") + if mountErr == nil { + break + } + logger.Debug("virtiofs mount attempt failed, retrying", + "tag", entry.Tag, + "guest_path", entry.GuestPath, + "attempt", attempt, + "error", mountErr, + ) + time.Sleep(mountRetrySleep) + } + if mountErr != nil { + return fmt.Errorf("mounting virtiofs tag %q at %s after %d attempts: %w", + entry.Tag, entry.GuestPath, mountRetries, mountErr) + } + + if err := os.Chown(entry.GuestPath, sandboxUID, sandboxGID); err != nil { + logger.Warn("failed to chown mount point", "path", entry.GuestPath, "error", err) + } + + logger.Info("mounted extra virtiofs share", "tag", entry.Tag, "guest_path", entry.GuestPath, "read_only", entry.ReadOnly) + return nil +} diff --git a/cmd/bbox/main.go b/cmd/bbox/main.go index 1ba5092..ad63c99 100644 --- a/cmd/bbox/main.go +++ b/cmd/bbox/main.go @@ -737,8 +737,9 @@ func run(parentCtx context.Context, agentName string, flags runFlags) error { deps.Flusher = review.NewFSFlusher() deps.Differ = diff.NewFSDiffer() - // Wire snapshot post-processors (git config sanitizer). + // Wire snapshot post-processors (worktree reconstruction, then git config sanitizer). deps.SnapshotPostProcessors = []workspace.SnapshotPostProcessor{ + infragit.NewWorktreeProcessor(logger), infragit.NewConfigSanitizer(logger), } diff --git a/internal/infra/git/sanitizer.go b/internal/infra/git/sanitizer.go index c53e779..9c26830 100644 --- a/internal/infra/git/sanitizer.go +++ b/internal/infra/git/sanitizer.go @@ -52,24 +52,24 @@ func NewConfigSanitizer(logger *slog.Logger) *ConfigSanitizer { // // For external worktrees (where git metadata lives outside the workspace), // sanitization is skipped because the config is not present in the snapshot. -func (s *ConfigSanitizer) Process(_ context.Context, originalPath, snapshotPath string) error { +func (s *ConfigSanitizer) Process(_ context.Context, originalPath, snapshotPath string) (*workspace.PostProcessResult, error) { // Find the git config source on the host filesystem. srcPath, err := resolveGitConfigPath(originalPath) if err != nil { s.logger.Warn("could not resolve git config path, skipping sanitization", "path", originalPath, "error", err) - return nil + return nil, nil } if srcPath == "" { - return nil + return nil, nil } data, err := os.ReadFile(srcPath) if err != nil { if errors.Is(err, fs.ErrNotExist) { - return nil + return nil, nil } - return fmt.Errorf("reading git config: %w", err) + return nil, fmt.Errorf("reading git config: %w", err) } sanitized := SanitizeConfig(string(data)) @@ -77,20 +77,20 @@ func (s *ConfigSanitizer) Process(_ context.Context, originalPath, snapshotPath // Determine where the sanitized config should be written in the snapshot. dstPath := s.resolveSnapshotConfigDest(originalPath, snapshotPath) if dstPath == "" { - return nil + return nil, nil } // Ensure parent directory exists. Normally the snapshot creator copies // .git/ first, but be defensive for edge cases and tests. if err := os.MkdirAll(filepath.Dir(dstPath), 0o755); err != nil { - return fmt.Errorf("creating git config directory in snapshot: %w", err) + return nil, fmt.Errorf("creating git config directory in snapshot: %w", err) } if err := os.WriteFile(dstPath, []byte(sanitized), 0o644); err != nil { - return fmt.Errorf("writing sanitized git config: %w", err) + return nil, fmt.Errorf("writing sanitized git config: %w", err) } - return nil + return nil, nil } // resolveSnapshotConfigDest determines where to write the sanitized git config diff --git a/internal/infra/git/sanitizer_test.go b/internal/infra/git/sanitizer_test.go index 27309ab..bbc8f47 100644 --- a/internal/infra/git/sanitizer_test.go +++ b/internal/infra/git/sanitizer_test.go @@ -323,7 +323,7 @@ func TestProcess_ReadsAndWritesSanitizedConfig(t *testing.T) { logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) sanitizer := NewConfigSanitizer(logger) - err := sanitizer.Process(t.Context(), originalDir, snapshotDir) + _, err := sanitizer.Process(t.Context(), originalDir, snapshotDir) require.NoError(t, err) // Read the sanitized config from the snapshot. @@ -356,7 +356,7 @@ func TestProcess_NoGitConfig_NoOp(t *testing.T) { logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) sanitizer := NewConfigSanitizer(logger) - err := sanitizer.Process(t.Context(), originalDir, snapshotDir) + _, err := sanitizer.Process(t.Context(), originalDir, snapshotDir) require.NoError(t, err) // Snapshot should not have a .git directory. @@ -688,7 +688,7 @@ func TestProcess_ExternalWorktree_SkipsSanitization(t *testing.T) { logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) sanitizer := NewConfigSanitizer(logger) - err := sanitizer.Process(t.Context(), originalDir, snapshotDir) + _, err := sanitizer.Process(t.Context(), originalDir, snapshotDir) require.NoError(t, err) // The .git file should be PRESERVED (not replaced with a directory). @@ -756,7 +756,7 @@ func TestProcess_InWorkspaceWorktree_SanitizesConfig(t *testing.T) { logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) sanitizer := NewConfigSanitizer(logger) - err := sanitizer.Process(t.Context(), workspace, snapshotDir) + _, err := sanitizer.Process(t.Context(), workspace, snapshotDir) require.NoError(t, err) // The .git file should remain a file (not converted to directory). @@ -805,7 +805,7 @@ func TestProcess_ExternalWorktreeNoCommondir_SkipsSanitization(t *testing.T) { logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) sanitizer := NewConfigSanitizer(logger) - err := sanitizer.Process(t.Context(), originalDir, snapshotDir) + _, err := sanitizer.Process(t.Context(), originalDir, snapshotDir) require.NoError(t, err) // No config should be written for external worktrees. @@ -828,7 +828,7 @@ func TestProcess_NormalRepo_NoDoubleCreate(t *testing.T) { logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) sanitizer := NewConfigSanitizer(logger) - err := sanitizer.Process(t.Context(), originalDir, snapshotDir) + _, err := sanitizer.Process(t.Context(), originalDir, snapshotDir) require.NoError(t, err) // For normal repos, Process should NOT create HEAD/objects/refs @@ -868,7 +868,7 @@ func TestProcess_Worktree_MaliciousGitdir(t *testing.T) { sanitizer := NewConfigSanitizer(logger) // Process should succeed — external worktrees are skipped. - err := sanitizer.Process(t.Context(), originalDir, snapshotDir) + _, err := sanitizer.Process(t.Context(), originalDir, snapshotDir) require.NoError(t, err) // No config should be written since gitdir is external. @@ -922,7 +922,7 @@ func TestProcess_InWorkspaceWorktree_RelativeGitdir(t *testing.T) { logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) sanitizer := NewConfigSanitizer(logger) - err := sanitizer.Process(t.Context(), workspace, snapshotDir) + _, err := sanitizer.Process(t.Context(), workspace, snapshotDir) require.NoError(t, err) // Config should be sanitized at the correct location. @@ -978,7 +978,7 @@ func TestProcess_InWorkspaceWorktree_NoCommondir(t *testing.T) { logger := slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelError})) sanitizer := NewConfigSanitizer(logger) - err := sanitizer.Process(t.Context(), workspace, snapshotDir) + _, err := sanitizer.Process(t.Context(), workspace, snapshotDir) require.NoError(t, err) // Config should be sanitized at gitdir/config (no commondir fallback). @@ -1030,7 +1030,7 @@ func TestProcess_Worktree_CommondirEscapesSnapshot(t *testing.T) { sanitizer := NewConfigSanitizer(logger) // Process should succeed (escaping commondir is non-fatal, just skipped). - err := sanitizer.Process(t.Context(), workspace, snapshotDir) + _, err := sanitizer.Process(t.Context(), workspace, snapshotDir) require.NoError(t, err) // Should NOT have written to /tmp/config. @@ -1063,6 +1063,6 @@ func TestProcess_Worktree_MalformedGitFileInSnapshot(t *testing.T) { sanitizer := NewConfigSanitizer(logger) // Process should succeed (malformed snapshot .git is non-fatal). - err := sanitizer.Process(t.Context(), workspace, snapshotDir) + _, err := sanitizer.Process(t.Context(), workspace, snapshotDir) require.NoError(t, err) } diff --git a/internal/infra/git/worktree.go b/internal/infra/git/worktree.go new file mode 100644 index 0000000..3e61843 --- /dev/null +++ b/internal/infra/git/worktree.go @@ -0,0 +1,323 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package git + +import ( + "context" + "errors" + "fmt" + "io/fs" + "log/slog" + "os" + "path/filepath" + "strings" + + "github.com/stacklok/brood-box/pkg/domain/workspace" +) + +const ( + // gitObjectsTag is the virtiofs tag for the git objects mount. + gitObjectsTag = "git-objects" + + // gitObjectsGuestPath is where git objects are mounted inside the guest VM. + gitObjectsGuestPath = "/mnt/git-objects" +) + +// Ensure WorktreeProcessor implements workspace.SnapshotPostProcessor at compile time. +var _ workspace.SnapshotPostProcessor = (*WorktreeProcessor)(nil) + +// WorktreeProcessor detects git worktrees and reconstructs a proper .git/ +// directory in the snapshot with an objects/info/alternates file pointing +// to a guest mount path. The actual git objects directory is exposed as a +// read-only virtiofs mount. +type WorktreeProcessor struct { + logger *slog.Logger +} + +// NewWorktreeProcessor creates a new WorktreeProcessor. +func NewWorktreeProcessor(logger *slog.Logger) *WorktreeProcessor { + return &WorktreeProcessor{logger: logger} +} + +// Process checks whether the snapshot workspace is a git worktree (where +// .git is a file, not a directory). If so, it reconstructs a proper .git/ +// directory in the snapshot containing refs, config, HEAD, and an +// objects/info/alternates file that points to the guest mount path. +// +// Returns nil, nil if the workspace is not a worktree or is a normal repo. +func (w *WorktreeProcessor) Process(_ context.Context, originalPath, snapshotPath string) (*workspace.PostProcessResult, error) { + // Step 1: Check if .git is a regular file (worktree indicator). + dotGitPath := filepath.Join(snapshotPath, ".git") + info, err := os.Lstat(dotGitPath) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return nil, nil + } + return nil, fmt.Errorf("checking .git in snapshot: %w", err) + } + if info.IsDir() { + // Normal repo with .git directory — nothing to do. + return nil, nil + } + if !info.Mode().IsRegular() { + return nil, nil + } + + // Step 2: Parse the gitdir pointer from the .git file. + data, err := os.ReadFile(dotGitPath) + if err != nil { + return nil, fmt.Errorf("reading .git file: %w", err) + } + content := strings.TrimSpace(string(data)) + if !strings.HasPrefix(content, "gitdir: ") { + w.logger.Warn("malformed .git file: missing 'gitdir: ' prefix", + "path", dotGitPath, "content", content) + return nil, nil + } + gitdirPath := strings.TrimPrefix(content, "gitdir: ") + + // Step 3: Resolve gitdir to absolute path (relative paths resolve against originalPath). + if !filepath.IsAbs(gitdirPath) { + gitdirPath = filepath.Join(originalPath, gitdirPath) + } + gitdirPath = filepath.Clean(gitdirPath) + + // Defense-in-depth: validate the gitdir path looks like a git worktree. + // Legitimate worktree gitdirs live under /.git/worktrees//. + // Reject paths that don't match this pattern to prevent crafted .git files + // from pointing to arbitrary host directories. + if !isWorktreeGitdir(gitdirPath) { + w.logger.Warn("gitdir path does not appear to be a git worktree, skipping", + "path", gitdirPath) + return nil, nil + } + + // Step 4: Read commondir to find the main repo .git directory. + mainGitDir := gitdirPath // default: gitdir IS the main .git dir + commondirData, err := os.ReadFile(filepath.Join(gitdirPath, "commondir")) + if err != nil { + if !errors.Is(err, fs.ErrNotExist) { + return nil, fmt.Errorf("reading commondir: %w", err) + } + // No commondir file — gitdir is the main .git dir (rare but valid). + w.logger.Debug("no commondir file, using gitdir as main git dir", + "gitdir", gitdirPath) + } else { + commondir := strings.TrimSpace(string(commondirData)) + if !filepath.IsAbs(commondir) { + commondir = filepath.Join(gitdirPath, commondir) + } + mainGitDir = filepath.Clean(commondir) + } + + // Step 5: Validate that mainGitDir looks like a git directory. + // Defense-in-depth: a crafted .git file could point anywhere on the host. + // Verify HEAD exists before treating the resolved path as a git directory. + if _, err := os.Stat(filepath.Join(mainGitDir, "HEAD")); err != nil { + w.logger.Warn("resolved git directory does not contain HEAD, skipping worktree reconstruction", + "path", mainGitDir, "error", err) + return nil, nil + } + + // Step 6: Determine the objects path. + objectsPath := filepath.Join(mainGitDir, "objects") + if _, err := os.Stat(objectsPath); err != nil { + w.logger.Warn("git objects directory not found, skipping worktree reconstruction", + "path", objectsPath, "error", err) + return nil, nil + } + + // Step 7: Reconstruct .git/ directory in the snapshot. + // 7a: Remove the .git file. + if err := os.Remove(dotGitPath); err != nil { + return nil, fmt.Errorf("removing .git file from snapshot: %w", err) + } + + // 7b: Create .git/ directory. + if err := os.MkdirAll(dotGitPath, 0o755); err != nil { + return nil, fmt.Errorf("creating .git directory in snapshot: %w", err) + } + + // 7c: Copy HEAD and optional state files from the worktree gitdir. + requiredWorktreeFiles := []string{"HEAD"} + for _, name := range requiredWorktreeFiles { + src := filepath.Join(gitdirPath, name) + dst := filepath.Join(dotGitPath, name) + if err := copyFile(src, dst); err != nil { + return nil, fmt.Errorf("copying %s from worktree gitdir: %w", name, err) + } + } + optionalWorktreeFiles := []string{"MERGE_HEAD", "CHERRY_PICK_HEAD", "REVERT_HEAD", "BISECT_LOG"} + for _, name := range optionalWorktreeFiles { + src := filepath.Join(gitdirPath, name) + dst := filepath.Join(dotGitPath, name) + if cpErr := copyFile(src, dst); cpErr != nil { + if errors.Is(cpErr, fs.ErrNotExist) { + w.logger.Debug("optional worktree file not found, skipping", + "file", name) + continue + } + return nil, fmt.Errorf("copying %s from worktree gitdir: %w", name, cpErr) + } + } + + // 7d: Copy config (sanitized), packed-refs, and info/exclude from main repo .git/. + configData, err := os.ReadFile(filepath.Join(mainGitDir, "config")) + if err != nil { + if !errors.Is(err, fs.ErrNotExist) { + return nil, fmt.Errorf("reading git config from main repo: %w", err) + } + w.logger.Debug("no config file in main repo git dir") + } else { + sanitized := SanitizeConfig(string(configData)) + if err := os.WriteFile(filepath.Join(dotGitPath, "config"), []byte(sanitized), 0o644); err != nil { + return nil, fmt.Errorf("writing sanitized git config: %w", err) + } + } + + optionalMainFiles := []string{"packed-refs"} + for _, name := range optionalMainFiles { + src := filepath.Join(mainGitDir, name) + dst := filepath.Join(dotGitPath, name) + if cpErr := copyFile(src, dst); cpErr != nil { + if errors.Is(cpErr, fs.ErrNotExist) { + w.logger.Debug("optional main repo file not found, skipping", + "file", name) + continue + } + return nil, fmt.Errorf("copying %s from main repo: %w", name, cpErr) + } + } + + // info/exclude (optional). + infoExcludeSrc := filepath.Join(mainGitDir, "info", "exclude") + infoExcludeDst := filepath.Join(dotGitPath, "info", "exclude") + if cpErr := copyFile(infoExcludeSrc, infoExcludeDst); cpErr != nil { + if !errors.Is(cpErr, fs.ErrNotExist) { + return nil, fmt.Errorf("copying info/exclude from main repo: %w", cpErr) + } + w.logger.Debug("info/exclude not found in main repo, skipping") + } + + // 7e: Copy refs/ from both main repo (shared) and worktree gitdir (worktree-specific). + // Main repo refs first (shared branches, tags). + mainRefsDir := filepath.Join(mainGitDir, "refs") + snapshotRefsDir := filepath.Join(dotGitPath, "refs") + if err := copyDirRecursive(mainRefsDir, snapshotRefsDir); err != nil { + return nil, fmt.Errorf("copying refs from main repo: %w", err) + } + + // Worktree-specific refs override (typically in refs/worktree/, no overlap). + worktreeRefsDir := filepath.Join(gitdirPath, "refs") + if err := copyDirRecursive(worktreeRefsDir, snapshotRefsDir); err != nil { + return nil, fmt.Errorf("copying refs from worktree gitdir: %w", err) + } + + // 7f: Create objects/ with info/alternates pointing to guest mount. + objectsInfoDir := filepath.Join(dotGitPath, "objects", "info") + if err := os.MkdirAll(objectsInfoDir, 0o755); err != nil { + return nil, fmt.Errorf("creating objects/info directory: %w", err) + } + alternatesContent := gitObjectsGuestPath + "\n" + if err := os.WriteFile(filepath.Join(objectsInfoDir, "alternates"), []byte(alternatesContent), 0o644); err != nil { + return nil, fmt.Errorf("writing objects/info/alternates: %w", err) + } + + // 7g: Create empty objects/pack/ directory (git expects it). + objectsPackDir := filepath.Join(dotGitPath, "objects", "pack") + if err := os.MkdirAll(objectsPackDir, 0o755); err != nil { + return nil, fmt.Errorf("creating objects/pack directory: %w", err) + } + + // Step 8: Return mount request and diff exclusions. + return &workspace.PostProcessResult{ + Mounts: []workspace.MountRequest{{ + Tag: gitObjectsTag, + HostPath: objectsPath, + GuestPath: gitObjectsGuestPath, + ReadOnly: true, + }}, + DiffExclude: []string{".git", ".git/"}, + }, nil +} + +// isWorktreeGitdir checks if the path looks like a legitimate git worktree +// gitdir (contains /.git/worktrees/ as a path component). +func isWorktreeGitdir(path string) bool { + sep := string(filepath.Separator) + return strings.Contains(path, sep+".git"+sep+"worktrees"+sep) +} + +// copyFile reads src and writes it to dst with the same permissions. +// Parent directories of dst are created as needed. +func copyFile(src, dst string) error { + // Reject symlinks to prevent reading arbitrary host files. + info, err := os.Lstat(src) + if err != nil { + return err + } + if info.Mode()&os.ModeSymlink != 0 { + return nil // silently skip symlinks + } + + data, err := os.ReadFile(src) + if err != nil { + return err + } + + // Preserve source file permissions, defaulting to 0o644. + perm := os.FileMode(0o644) + if info, statErr := os.Stat(src); statErr == nil { + perm = info.Mode().Perm() + } + + if err := os.MkdirAll(filepath.Dir(dst), 0o755); err != nil { + return fmt.Errorf("creating parent directory for %s: %w", dst, err) + } + + return os.WriteFile(dst, data, perm) +} + +// copyDirRecursive walks src and recreates its structure in dst. +// Only regular files and directories are copied (symlinks are skipped). +// If src does not exist, this is a no-op (returns nil). +func copyDirRecursive(src, dst string) error { + srcInfo, err := os.Stat(src) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + return nil + } + return fmt.Errorf("stat %s: %w", src, err) + } + if !srcInfo.IsDir() { + return fmt.Errorf("%s is not a directory", src) + } + + return filepath.WalkDir(src, func(path string, d fs.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + + rel, err := filepath.Rel(src, path) + if err != nil { + return fmt.Errorf("computing relative path: %w", err) + } + target := filepath.Join(dst, rel) + + // Skip symlinks. + if d.Type()&os.ModeSymlink != 0 { + return nil + } + + if d.IsDir() { + return os.MkdirAll(target, 0o755) + } + + if !d.Type().IsRegular() { + return nil + } + + return copyFile(path, target) + }) +} diff --git a/internal/infra/git/worktree_test.go b/internal/infra/git/worktree_test.go new file mode 100644 index 0000000..6511705 --- /dev/null +++ b/internal/infra/git/worktree_test.go @@ -0,0 +1,410 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package git + +import ( + "context" + "log/slog" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestWorktreeProcessor_NotAWorktree(t *testing.T) { + t.Parallel() + + snapshot := t.TempDir() + // .git is a directory — normal repo, not a worktree. + require.NoError(t, os.MkdirAll(filepath.Join(snapshot, ".git"), 0o755)) + + proc := NewWorktreeProcessor(slog.Default()) + result, err := proc.Process(context.Background(), snapshot, snapshot) + + require.NoError(t, err) + assert.Nil(t, result) +} + +func TestWorktreeProcessor_NoGitEntry(t *testing.T) { + t.Parallel() + + snapshot := t.TempDir() + // No .git at all. + + proc := NewWorktreeProcessor(slog.Default()) + result, err := proc.Process(context.Background(), snapshot, snapshot) + + require.NoError(t, err) + assert.Nil(t, result) +} + +func TestWorktreeProcessor_ExternalWorktree(t *testing.T) { + t.Parallel() + + // Set up a simulated main repo .git directory. + mainRepo := t.TempDir() + mainGitDir := filepath.Join(mainRepo, ".git") + require.NoError(t, os.MkdirAll(mainGitDir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(mainGitDir, "HEAD"), []byte("ref: refs/heads/main\n"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(mainGitDir, "config"), []byte("[core]\n\tbare = false\n"), 0o644)) + require.NoError(t, os.MkdirAll(filepath.Join(mainGitDir, "objects", "pack"), 0o755)) + require.NoError(t, os.MkdirAll(filepath.Join(mainGitDir, "refs", "heads"), 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(mainGitDir, "refs", "heads", "main"), + []byte("abcdef1234567890abcdef1234567890abcdef12\n"), + 0o644, + )) + require.NoError(t, os.WriteFile(filepath.Join(mainGitDir, "packed-refs"), []byte("# pack-refs with: peeled fully-peeled sorted\n"), 0o644)) + require.NoError(t, os.MkdirAll(filepath.Join(mainGitDir, "info"), 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(mainGitDir, "info", "exclude"), []byte("# git ls-files --others --exclude-from\n"), 0o644)) + + // Set up a simulated worktree gitdir (like .git/worktrees/wt1/). + worktreeGitDir := filepath.Join(mainGitDir, "worktrees", "wt1") + require.NoError(t, os.MkdirAll(worktreeGitDir, 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(worktreeGitDir, "HEAD"), + []byte("ref: refs/heads/feature\n"), + 0o644, + )) + require.NoError(t, os.WriteFile( + filepath.Join(worktreeGitDir, "commondir"), + []byte("../..\n"), // relative: worktrees/wt1/../../ = .git/ + 0o644, + )) + require.NoError(t, os.MkdirAll(filepath.Join(worktreeGitDir, "refs", "heads"), 0o755)) + + // Set up the original workspace directory with a .git file. + workspace := t.TempDir() + gitFileContent := "gitdir: " + worktreeGitDir + "\n" + require.NoError(t, os.WriteFile(filepath.Join(workspace, ".git"), []byte(gitFileContent), 0o644)) + + // Set up the snapshot directory with a copy of the .git file. + snapshot := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(snapshot, ".git"), []byte(gitFileContent), 0o644)) + + proc := NewWorktreeProcessor(slog.Default()) + result, err := proc.Process(context.Background(), workspace, snapshot) + + require.NoError(t, err) + require.NotNil(t, result) + + // Verify mount request. + require.Len(t, result.Mounts, 1) + assert.Equal(t, "git-objects", result.Mounts[0].Tag) + assert.Equal(t, filepath.Join(mainGitDir, "objects"), result.Mounts[0].HostPath) + assert.Equal(t, "/mnt/git-objects", result.Mounts[0].GuestPath) + assert.True(t, result.Mounts[0].ReadOnly) + + // Verify diff exclude. + assert.Contains(t, result.DiffExclude, ".git") + + // Verify .git is now a directory. + dotGitInfo, err := os.Lstat(filepath.Join(snapshot, ".git")) + require.NoError(t, err) + assert.True(t, dotGitInfo.IsDir()) + + // Verify HEAD is from the worktree (feature branch). + headData, err := os.ReadFile(filepath.Join(snapshot, ".git", "HEAD")) + require.NoError(t, err) + assert.Equal(t, "ref: refs/heads/feature\n", string(headData)) + + // Verify config exists (and is sanitized — covered in separate test). + _, err = os.Stat(filepath.Join(snapshot, ".git", "config")) + assert.NoError(t, err) + + // Verify objects/info/alternates. + alternatesData, err := os.ReadFile(filepath.Join(snapshot, ".git", "objects", "info", "alternates")) + require.NoError(t, err) + assert.Equal(t, "/mnt/git-objects\n", string(alternatesData)) + + // Verify objects/pack/ exists. + packInfo, err := os.Stat(filepath.Join(snapshot, ".git", "objects", "pack")) + require.NoError(t, err) + assert.True(t, packInfo.IsDir()) + + // Verify refs from main repo were copied. + mainRefData, err := os.ReadFile(filepath.Join(snapshot, ".git", "refs", "heads", "main")) + require.NoError(t, err) + assert.Equal(t, "abcdef1234567890abcdef1234567890abcdef12\n", string(mainRefData)) + + // Verify packed-refs was copied. + _, err = os.Stat(filepath.Join(snapshot, ".git", "packed-refs")) + assert.NoError(t, err) + + // Verify info/exclude was copied. + _, err = os.Stat(filepath.Join(snapshot, ".git", "info", "exclude")) + assert.NoError(t, err) +} + +func TestWorktreeProcessor_MalformedGitFile(t *testing.T) { + t.Parallel() + + snapshot := t.TempDir() + // .git file with garbage content (no "gitdir: " prefix). + require.NoError(t, os.WriteFile(filepath.Join(snapshot, ".git"), []byte("this is garbage\n"), 0o644)) + + proc := NewWorktreeProcessor(slog.Default()) + result, err := proc.Process(context.Background(), snapshot, snapshot) + + require.NoError(t, err) + assert.Nil(t, result) +} + +func TestWorktreeProcessor_MissingObjects(t *testing.T) { + t.Parallel() + + // Set up main repo WITHOUT objects directory. + mainRepo := t.TempDir() + mainGitDir := filepath.Join(mainRepo, ".git") + require.NoError(t, os.MkdirAll(mainGitDir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(mainGitDir, "HEAD"), []byte("ref: refs/heads/main\n"), 0o644)) + // Intentionally do NOT create objects/. + + // Set up worktree gitdir pointing to main repo. + worktreeGitDir := filepath.Join(mainGitDir, "worktrees", "wt1") + require.NoError(t, os.MkdirAll(worktreeGitDir, 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(worktreeGitDir, "HEAD"), + []byte("ref: refs/heads/feature\n"), + 0o644, + )) + require.NoError(t, os.WriteFile( + filepath.Join(worktreeGitDir, "commondir"), + []byte("../..\n"), + 0o644, + )) + + workspace := t.TempDir() + gitFileContent := "gitdir: " + worktreeGitDir + "\n" + require.NoError(t, os.WriteFile(filepath.Join(workspace, ".git"), []byte(gitFileContent), 0o644)) + + snapshot := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(snapshot, ".git"), []byte(gitFileContent), 0o644)) + + proc := NewWorktreeProcessor(slog.Default()) + result, err := proc.Process(context.Background(), workspace, snapshot) + + require.NoError(t, err) + assert.Nil(t, result) +} + +func TestWorktreeProcessor_CraftedGitdir(t *testing.T) { + t.Parallel() + + // Simulate a crafted repo whose .git file points to a gitdir that is + // NOT under /.git/worktrees//. This could be used to trick + // the processor into reading arbitrary host directories. + craftedDir := t.TempDir() + // Provide everything the processor would need if the path were accepted. + require.NoError(t, os.WriteFile(filepath.Join(craftedDir, "HEAD"), []byte("ref: refs/heads/main\n"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(craftedDir, "commondir"), []byte(".\n"), 0o644)) + require.NoError(t, os.MkdirAll(filepath.Join(craftedDir, "objects", "pack"), 0o755)) + require.NoError(t, os.MkdirAll(filepath.Join(craftedDir, "refs", "heads"), 0o755)) + + workspace := t.TempDir() + gitFileContent := "gitdir: " + craftedDir + "\n" + require.NoError(t, os.WriteFile(filepath.Join(workspace, ".git"), []byte(gitFileContent), 0o644)) + + snapshot := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(snapshot, ".git"), []byte(gitFileContent), 0o644)) + + proc := NewWorktreeProcessor(slog.Default()) + result, err := proc.Process(context.Background(), workspace, snapshot) + + require.NoError(t, err) + assert.Nil(t, result, "crafted gitdir path should be rejected (nil result)") + + // .git should remain a file (not converted to a directory). + dotGitInfo, err := os.Lstat(filepath.Join(snapshot, ".git")) + require.NoError(t, err) + assert.True(t, dotGitInfo.Mode().IsRegular(), ".git should still be a file") +} + +func TestWorktreeProcessor_SymlinkInGitdir(t *testing.T) { + t.Parallel() + + // Set up a legitimate-looking worktree structure, but with a symlinked + // packed-refs pointing to an external file. The copyFile function should + // skip the symlink rather than following it. + mainRepo := t.TempDir() + mainGitDir := filepath.Join(mainRepo, ".git") + require.NoError(t, os.MkdirAll(mainGitDir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(mainGitDir, "HEAD"), []byte("ref: refs/heads/main\n"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(mainGitDir, "config"), []byte("[core]\n\tbare = false\n"), 0o644)) + require.NoError(t, os.MkdirAll(filepath.Join(mainGitDir, "objects", "pack"), 0o755)) + require.NoError(t, os.MkdirAll(filepath.Join(mainGitDir, "refs", "heads"), 0o755)) + + // Create a secret file that should NOT be exposed. + secretFile := filepath.Join(t.TempDir(), "secret.txt") + secretContent := "TOP SECRET DATA" + require.NoError(t, os.WriteFile(secretFile, []byte(secretContent), 0o644)) + + // Replace packed-refs with a symlink to the secret file. + packedRefsPath := filepath.Join(mainGitDir, "packed-refs") + require.NoError(t, os.Symlink(secretFile, packedRefsPath)) + + // Set up worktree gitdir. + worktreeGitDir := filepath.Join(mainGitDir, "worktrees", "wt1") + require.NoError(t, os.MkdirAll(worktreeGitDir, 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(worktreeGitDir, "HEAD"), + []byte("ref: refs/heads/feature\n"), + 0o644, + )) + require.NoError(t, os.WriteFile( + filepath.Join(worktreeGitDir, "commondir"), + []byte("../..\n"), + 0o644, + )) + + workspace := t.TempDir() + gitFileContent := "gitdir: " + worktreeGitDir + "\n" + require.NoError(t, os.WriteFile(filepath.Join(workspace, ".git"), []byte(gitFileContent), 0o644)) + + snapshot := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(snapshot, ".git"), []byte(gitFileContent), 0o644)) + + proc := NewWorktreeProcessor(slog.Default()) + result, err := proc.Process(context.Background(), workspace, snapshot) + + require.NoError(t, err) + require.NotNil(t, result, "worktree processing should succeed") + + // The symlinked packed-refs should NOT have been copied. + snapshotPackedRefs := filepath.Join(snapshot, ".git", "packed-refs") + _, statErr := os.Stat(snapshotPackedRefs) + assert.True(t, os.IsNotExist(statErr), + "symlinked packed-refs should not be copied into the snapshot") +} + +func TestWorktreeProcessor_ConfigIsSanitized(t *testing.T) { + t.Parallel() + + // Set up main repo with a config containing credentials. + mainRepo := t.TempDir() + mainGitDir := filepath.Join(mainRepo, ".git") + require.NoError(t, os.MkdirAll(mainGitDir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(mainGitDir, "HEAD"), []byte("ref: refs/heads/main\n"), 0o644)) + require.NoError(t, os.MkdirAll(filepath.Join(mainGitDir, "objects", "pack"), 0o755)) + require.NoError(t, os.MkdirAll(filepath.Join(mainGitDir, "refs", "heads"), 0o755)) + + // Config with credential section and URL containing user:token. + configContent := strings.Join([]string{ + "[core]", + "\tbare = false", + "[credential]", + "\thelper = store", + `[remote "origin"]`, + "\turl = https://user:token@github.com/org/repo.git", + "\tfetch = +refs/heads/*:refs/remotes/origin/*", + }, "\n") + require.NoError(t, os.WriteFile(filepath.Join(mainGitDir, "config"), []byte(configContent), 0o644)) + + // Set up worktree gitdir. + worktreeGitDir := filepath.Join(mainGitDir, "worktrees", "wt1") + require.NoError(t, os.MkdirAll(worktreeGitDir, 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(worktreeGitDir, "HEAD"), + []byte("ref: refs/heads/feature\n"), + 0o644, + )) + require.NoError(t, os.WriteFile( + filepath.Join(worktreeGitDir, "commondir"), + []byte("../..\n"), + 0o644, + )) + + workspace := t.TempDir() + gitFileContent := "gitdir: " + worktreeGitDir + "\n" + require.NoError(t, os.WriteFile(filepath.Join(workspace, ".git"), []byte(gitFileContent), 0o644)) + + snapshot := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(snapshot, ".git"), []byte(gitFileContent), 0o644)) + + proc := NewWorktreeProcessor(slog.Default()) + result, err := proc.Process(context.Background(), workspace, snapshot) + + require.NoError(t, err) + require.NotNil(t, result) + + // Read the config that was written to the snapshot. + sanitizedConfig, err := os.ReadFile(filepath.Join(snapshot, ".git", "config")) + require.NoError(t, err) + + configStr := string(sanitizedConfig) + + // Credential section should be stripped. + assert.NotContains(t, configStr, "[credential]") + assert.NotContains(t, configStr, "helper = store") + + // URL should have credentials stripped. + assert.NotContains(t, configStr, "user:token") + + // Core section should still be present. + assert.Contains(t, configStr, "[core]") + assert.Contains(t, configStr, "bare = false") + + // Remote section should still be present (with sanitized URL). + assert.Contains(t, configStr, `[remote "origin"]`) + assert.Contains(t, configStr, "github.com/org/repo.git") +} + +func TestWorktreeProcessor_DetachedHEAD(t *testing.T) { + t.Parallel() + + detachedSHA := "a1b2c3d4e5f6a1b2c3d4e5f6a1b2c3d4e5f6a1b2" + + // Set up a simulated main repo .git directory. + mainRepo := t.TempDir() + mainGitDir := filepath.Join(mainRepo, ".git") + require.NoError(t, os.MkdirAll(mainGitDir, 0o755)) + require.NoError(t, os.WriteFile(filepath.Join(mainGitDir, "HEAD"), []byte("ref: refs/heads/main\n"), 0o644)) + require.NoError(t, os.WriteFile(filepath.Join(mainGitDir, "config"), []byte("[core]\n\tbare = false\n"), 0o644)) + require.NoError(t, os.MkdirAll(filepath.Join(mainGitDir, "objects", "pack"), 0o755)) + require.NoError(t, os.MkdirAll(filepath.Join(mainGitDir, "refs", "heads"), 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(mainGitDir, "refs", "heads", "main"), + []byte("abcdef1234567890abcdef1234567890abcdef12\n"), + 0o644, + )) + + // Set up a simulated worktree gitdir with a detached HEAD (raw SHA, not a ref). + worktreeGitDir := filepath.Join(mainGitDir, "worktrees", "wt1") + require.NoError(t, os.MkdirAll(worktreeGitDir, 0o755)) + require.NoError(t, os.WriteFile( + filepath.Join(worktreeGitDir, "HEAD"), + []byte(detachedSHA+"\n"), + 0o644, + )) + require.NoError(t, os.WriteFile( + filepath.Join(worktreeGitDir, "commondir"), + []byte("../..\n"), + 0o644, + )) + require.NoError(t, os.MkdirAll(filepath.Join(worktreeGitDir, "refs", "heads"), 0o755)) + + // Set up the original workspace directory with a .git file. + workspace := t.TempDir() + gitFileContent := "gitdir: " + worktreeGitDir + "\n" + require.NoError(t, os.WriteFile(filepath.Join(workspace, ".git"), []byte(gitFileContent), 0o644)) + + // Set up the snapshot directory with a copy of the .git file. + snapshot := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(snapshot, ".git"), []byte(gitFileContent), 0o644)) + + proc := NewWorktreeProcessor(slog.Default()) + result, err := proc.Process(context.Background(), workspace, snapshot) + + require.NoError(t, err) + require.NotNil(t, result) + + // Verify the snapshot .git/HEAD contains the raw SHA (detached HEAD preserved), + // NOT "ref: refs/heads/main" from the main repo. + headData, err := os.ReadFile(filepath.Join(snapshot, ".git", "HEAD")) + require.NoError(t, err) + assert.Equal(t, detachedSHA+"\n", string(headData)) +} diff --git a/internal/infra/vm/hooks_mounts.go b/internal/infra/vm/hooks_mounts.go new file mode 100644 index 0000000..70d4083 --- /dev/null +++ b/internal/infra/vm/hooks_mounts.go @@ -0,0 +1,57 @@ +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. +// SPDX-License-Identifier: Apache-2.0 + +package vm + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/stacklok/go-microvm/image" + + "github.com/stacklok/brood-box/pkg/domain/workspace" +) + +// mountConfigPath is the guest path where extra mount configuration is written. +const mountConfigPath = "/etc/broodbox-mounts.json" + +// mountEntry is the JSON-serializable form of a mount request for the guest. +type mountEntry struct { + Tag string `json:"tag"` + GuestPath string `json:"guest_path"` + ReadOnly bool `json:"read_only"` +} + +// InjectMountConfig returns a rootfs hook that writes extra mount configuration +// to /etc/broodbox-mounts.json. The guest init (bbox-init) reads this file to +// mount additional virtiofs shares. +func InjectMountConfig(mounts []workspace.MountRequest) func(string, *image.OCIConfig) error { + return func(rootfsDir string, _ *image.OCIConfig) error { + entries := make([]mountEntry, len(mounts)) + for i, m := range mounts { + entries[i] = mountEntry{ + Tag: m.Tag, + GuestPath: m.GuestPath, + ReadOnly: m.ReadOnly, + } + } + + data, err := json.Marshal(entries) + if err != nil { + return fmt.Errorf("marshaling mount config: %w", err) + } + + path := filepath.Join(rootfsDir, mountConfigPath) + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return fmt.Errorf("creating mount config directory: %w", err) + } + + if err := os.WriteFile(path, data, 0o644); err != nil { + return fmt.Errorf("writing mount config: %w", err) + } + + return nil + } +} diff --git a/internal/infra/vm/runner.go b/internal/infra/vm/runner.go index c0069b5..a09cbfe 100644 --- a/internal/infra/vm/runner.go +++ b/internal/infra/vm/runner.go @@ -257,6 +257,13 @@ func (r *MicroVMRunner) Start(ctx context.Context, cfg domvm.VMConfig) (domvm.VM )) } + // Add mount config injection hook if extra mounts are requested. + if len(cfg.ExtraMounts) > 0 { + opts = append(opts, microvm.WithRootFSHook( + InjectMountConfig(cfg.ExtraMounts), + )) + } + // Add backend options if runner path, lib dir, or embedded sources are specified. var backendOpts []libkrun.Option if r.runnerPath != "" { @@ -300,6 +307,14 @@ func (r *MicroVMRunner) Start(ctx context.Context, cfg domvm.VMConfig) (domvm.VM })) } + // Add extra mounts requested by post-processors (e.g. git objects). + for _, m := range cfg.ExtraMounts { + opts = append(opts, microvm.WithVirtioFS(microvm.VirtioFSMount{ + Tag: m.Tag, + HostPath: m.HostPath, + })) + } + // Run microvm. start := time.Now() pvm, err := microvm.Run(ctx, cfg.Image, opts...) diff --git a/pkg/domain/vm/vm.go b/pkg/domain/vm/vm.go index 93e1839..40ab16d 100644 --- a/pkg/domain/vm/vm.go +++ b/pkg/domain/vm/vm.go @@ -15,6 +15,7 @@ import ( "github.com/stacklok/brood-box/pkg/domain/egress" "github.com/stacklok/brood-box/pkg/domain/git" "github.com/stacklok/brood-box/pkg/domain/settings" + "github.com/stacklok/brood-box/pkg/domain/workspace" ) // VMConfig holds the parameters needed to start a sandbox VM. @@ -78,6 +79,10 @@ type VMConfig struct { // SettingsManifest declares agent settings to inject into the rootfs. SettingsManifest *settings.Manifest + + // ExtraMounts are additional virtiofs mounts requested by snapshot + // post-processors (e.g. git objects for worktree support). + ExtraMounts []workspace.MountRequest } // HostService describes an HTTP service exposed from host to guest. diff --git a/pkg/domain/workspace/postprocessor.go b/pkg/domain/workspace/postprocessor.go index 65e519f..b6afecc 100644 --- a/pkg/domain/workspace/postprocessor.go +++ b/pkg/domain/workspace/postprocessor.go @@ -5,6 +5,33 @@ package workspace import "context" +// MountRequest describes an additional filesystem mount that a +// post-processor needs exposed inside the guest VM. +type MountRequest struct { + // Tag is the virtiofs tag visible in the guest (e.g. "git-objects"). + Tag string + + // HostPath is the host directory to mount. + HostPath string + + // GuestPath is the mount point inside the guest (e.g. "/mnt/git-objects"). + GuestPath string + + // ReadOnly mounts the filesystem read-only when true. + ReadOnly bool +} + +// PostProcessResult carries side-effects produced by a post-processor +// that must be wired into the VM configuration or diff pipeline. +type PostProcessResult struct { + // Mounts are additional virtiofs mounts to expose inside the VM. + Mounts []MountRequest + + // DiffExclude lists additional path prefixes to exclude from the diff + // computation (e.g. ".git" excludes ".git" and everything under ".git/"). + DiffExclude []string +} + // SnapshotPostProcessor runs a transformation on a workspace snapshot // after it has been created but before the VM is started. // @@ -12,6 +39,9 @@ import "context" // a post-processor must read files excluded from the snapshot (e.g., // .git/config is a security pattern excluded from the snapshot, but the // sanitizer needs to read it from the original workspace). +// +// Returns a PostProcessResult with any side-effects (extra mounts, diff +// excludes). Nil result means no side-effects. type SnapshotPostProcessor interface { - Process(ctx context.Context, originalPath, snapshotPath string) error + Process(ctx context.Context, originalPath, snapshotPath string) (*PostProcessResult, error) } diff --git a/pkg/sandbox/sandbox.go b/pkg/sandbox/sandbox.go index 8e539de..046147a 100644 --- a/pkg/sandbox/sandbox.go +++ b/pkg/sandbox/sandbox.go @@ -432,6 +432,9 @@ func (s *SandboxRunner) Prepare(ctx context.Context, agentName string, opts RunO diffMatcher = snapshot.NopMatcher } + var extraMounts []workspace.MountRequest + var extraDiffExclude []string + if opts.Snapshot.Enabled && s.workspaceCloner != nil { s.observer.Start(progress.PhaseCreatingSnapshot, "Creating workspace snapshot...") @@ -447,17 +450,27 @@ func (s *SandboxRunner) Prepare(ctx context.Context, agentName string, opts RunO "snapshot", snap.SnapshotPath, ) - // Run post-processors on the snapshot (e.g., git config sanitizer). + // Run post-processors on the snapshot (e.g., git config sanitizer, worktree processor). // Failures abort VM start — post-processors are security-relevant // (credential stripping) and must not be silently skipped. for _, pp := range s.snapshotPostProcessors { - if ppErr := pp.Process(ctx, snap.OriginalPath, snap.SnapshotPath); ppErr != nil { + ppResult, ppErr := pp.Process(ctx, snap.OriginalPath, snap.SnapshotPath) + if ppErr != nil { s.observer.Fail("Snapshot post-processing failed") if cleanErr := snap.Cleanup(); cleanErr != nil { s.logger.Error("failed to clean up snapshot after post-processor failure", "error", cleanErr) } return nil, fmt.Errorf("snapshot post-processing: %w", ppErr) } + if ppResult != nil { + extraMounts = append(extraMounts, ppResult.Mounts...) + extraDiffExclude = append(extraDiffExclude, ppResult.DiffExclude...) + } + } + + // If post-processors added diff excludes, compose them with the existing matcher. + if len(extraDiffExclude) > 0 { + diffMatcher = composeMatcher(diffMatcher, extraDiffExclude) } workspacePath = snap.SnapshotPath @@ -485,6 +498,7 @@ func (s *SandboxRunner) Prepare(ctx context.Context, agentName string, opts RunO LogLevel: opts.LogLevel, TmpSize: ag.DefaultTmpSize, SettingsManifest: settingsManifest, + ExtraMounts: extraMounts, } sandboxVM, err := s.vmRunner.Start(ctx, vmCfg) @@ -787,3 +801,28 @@ func (s *SandboxRunner) resolveMCPConfig(cfg *SandboxConfig, agentName string) c return mcpCfg } + +// composeMatcher wraps an existing snapshot.Matcher with additional gitignore-style +// exclude patterns. Paths matching either the original matcher or any of the extra +// patterns are excluded from the diff. +func composeMatcher(base snapshot.Matcher, extraPatterns []string) snapshot.Matcher { + return &compositeMatcher{base: base, extra: extraPatterns} +} + +// compositeMatcher combines a base matcher with a set of literal path prefixes. +type compositeMatcher struct { + base snapshot.Matcher + extra []string +} + +func (c *compositeMatcher) Match(path string) bool { + if c.base != nil && c.base.Match(path) { + return true + } + for _, pattern := range c.extra { + if path == pattern || strings.HasPrefix(path, strings.TrimSuffix(pattern, "/")+"/") { + return true + } + } + return false +} diff --git a/pkg/sandbox/sandbox_test.go b/pkg/sandbox/sandbox_test.go index e8fa9c5..7e29163 100644 --- a/pkg/sandbox/sandbox_test.go +++ b/pkg/sandbox/sandbox_test.go @@ -1680,3 +1680,30 @@ func TestSandboxRunner_Prepare_MCPSuccess_AddsHostServices(t *testing.T) { assert.Equal(t, "mcp", sb.VMConfig.HostServices[0].Name) assert.Equal(t, uint16(4483), sb.VMConfig.HostServices[0].Port) } + +func TestComposeMatcher(t *testing.T) { + tests := []struct { + name string + patterns []string + path string + want bool + }{ + {"exact match", []string{".git"}, ".git", true}, + {"prefix match with slash", []string{".git"}, ".git/config", true}, + {"prefix match trailing slash pattern", []string{".git/"}, ".git/refs/heads", true}, + {"no false positive on .github", []string{".git"}, ".github/workflows/ci.yml", false}, + {"no false positive on .gitignore", []string{".git"}, ".gitignore", false}, + {"no false positive on .gitmodules", []string{".git"}, ".gitmodules", false}, + {"no match", []string{".git"}, "src/main.go", false}, + {"multiple patterns", []string{".git", "vendor"}, "vendor/lib/foo.go", true}, + {"base matcher delegates", []string{}, ".git", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + m := composeMatcher(snapshot.NopMatcher, tt.patterns) + got := m.Match(tt.path) + assert.Equal(t, tt.want, got) + }) + } +}