Skip to content
74 changes: 74 additions & 0 deletions cmd/api/api/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,80 @@ func (s *ApiService) RestoreInstance(ctx context.Context, request oapi.RestoreIn
return oapi.RestoreInstance200JSONResponse(instanceToOAPI(*result)), nil
}

// PromoteInstanceToTemplate promotes a standby instance into a fork-only template.
// The id parameter can be an instance ID, name, or ID prefix.
// Note: Resolution is handled by ResolveResource middleware.
func (s *ApiService) PromoteInstanceToTemplate(ctx context.Context, request oapi.PromoteInstanceToTemplateRequestObject) (oapi.PromoteInstanceToTemplateResponseObject, error) {
inst := mw.GetResolvedInstance[instances.Instance](ctx)
if inst == nil {
return oapi.PromoteInstanceToTemplate500JSONResponse{
Code: "internal_error",
Message: "resource not resolved",
}, nil
}
log := logger.FromContext(ctx)

result, err := s.InstanceManager.PromoteToTemplate(ctx, inst.Id)
if err != nil {
switch {
case errors.Is(err, instances.ErrNotFound):
return oapi.PromoteInstanceToTemplate404JSONResponse{
Code: "not_found",
Message: "instance not found",
}, nil
case errors.Is(err, instances.ErrInvalidState):
return oapi.PromoteInstanceToTemplate409JSONResponse{
Code: "invalid_state",
Message: err.Error(),
}, nil
default:
log.ErrorContext(ctx, "failed to promote instance to template", "error", err)
return oapi.PromoteInstanceToTemplate500JSONResponse{
Code: "internal_error",
Message: "failed to promote instance to template",
}, nil
}
}
return oapi.PromoteInstanceToTemplate200JSONResponse(instanceToOAPI(*result)), nil
}

// DemoteInstanceTemplate demotes a template back to standby so it can be restored or deleted.
// The id parameter can be an instance ID, name, or ID prefix.
// Note: Resolution is handled by ResolveResource middleware.
func (s *ApiService) DemoteInstanceTemplate(ctx context.Context, request oapi.DemoteInstanceTemplateRequestObject) (oapi.DemoteInstanceTemplateResponseObject, error) {
inst := mw.GetResolvedInstance[instances.Instance](ctx)
if inst == nil {
return oapi.DemoteInstanceTemplate500JSONResponse{
Code: "internal_error",
Message: "resource not resolved",
}, nil
}
log := logger.FromContext(ctx)

result, err := s.InstanceManager.DemoteTemplate(ctx, inst.Id)
if err != nil {
switch {
case errors.Is(err, instances.ErrNotFound):
return oapi.DemoteInstanceTemplate404JSONResponse{
Code: "not_found",
Message: "instance not found",
}, nil
case errors.Is(err, instances.ErrInvalidState):
return oapi.DemoteInstanceTemplate409JSONResponse{
Code: "invalid_state",
Message: err.Error(),
}, nil
default:
log.ErrorContext(ctx, "failed to demote template", "error", err)
return oapi.DemoteInstanceTemplate500JSONResponse{
Code: "internal_error",
Message: "failed to demote template",
}, nil
}
}
return oapi.DemoteInstanceTemplate200JSONResponse(instanceToOAPI(*result)), nil
}

// ForkInstance forks an instance from stopped or standby into a new instance.
// The id parameter can be an instance ID, name, or ID prefix.
// Note: Resolution is handled by ResolveResource middleware.
Expand Down
8 changes: 8 additions & 0 deletions lib/builds/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,14 @@ func (m *mockInstanceManager) RestoreInstance(ctx context.Context, id string) (*
return nil, nil
}

func (m *mockInstanceManager) PromoteToTemplate(ctx context.Context, id string) (*instances.Instance, error) {
return nil, nil
}

func (m *mockInstanceManager) DemoteTemplate(ctx context.Context, id string) (*instances.Instance, error) {
return nil, nil
}

func (m *mockInstanceManager) RestoreSnapshot(ctx context.Context, id string, snapshotID string, req instances.RestoreSnapshotRequest) (*instances.Instance, error) {
return nil, instances.ErrNotSupported
}
Expand Down
29 changes: 29 additions & 0 deletions lib/forkvm/copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,30 @@ type copyState struct {
reflinkDead bool
}

// CopyOptions tunes CopyGuestDirectory behavior. The zero value reproduces
// the original full-copy semantics; callers can opt into skipping specific
// paths when the consumer arranges its own substitute (e.g. a symlink to a
// template-shared mem-file).
type CopyOptions struct {
// SkipRelPaths lists relative paths under srcDir that should not be
// materialized in dstDir. Comparison is exact and uses forward-slash
// separators on all platforms.
SkipRelPaths []string
}

// CopyGuestDirectory recursively copies a guest directory to a new destination.
// Regular files are cloned via reflink (FICLONE) when the underlying filesystem
// supports it; otherwise we fall back to a sparse extent copy
// (SEEK_DATA/SEEK_HOLE). Runtime sockets and logs are skipped because they are
// host-runtime artifacts.
func CopyGuestDirectory(srcDir, dstDir string) error {
return CopyGuestDirectoryWithOptions(srcDir, dstDir, CopyOptions{})
}

// CopyGuestDirectoryWithOptions is the option-taking variant of
// CopyGuestDirectory. Use this when forking with template-shared assets, so
// the caller can install a symlink in place of a heavy copied file.
func CopyGuestDirectoryWithOptions(srcDir, dstDir string, opts CopyOptions) error {
srcInfo, err := os.Stat(srcDir)
if err != nil {
return fmt.Errorf("stat source directory: %w", err)
Expand All @@ -56,6 +74,11 @@ func CopyGuestDirectory(srcDir, dstDir string) error {
state.reflinkDead = true
}

skipSet := make(map[string]struct{}, len(opts.SkipRelPaths))
for _, p := range opts.SkipRelPaths {
skipSet[filepath.ToSlash(p)] = struct{}{}
}

return filepath.WalkDir(srcDir, func(path string, d fs.DirEntry, walkErr error) error {
if walkErr != nil {
return walkErr
Expand All @@ -68,6 +91,12 @@ func CopyGuestDirectory(srcDir, dstDir string) error {
if relPath == "." {
return nil
}
if _, skip := skipSet[filepath.ToSlash(relPath)]; skip {
if d.IsDir() {
return filepath.SkipDir
}
return nil
}
if d.IsDir() && shouldSkipDirectory(relPath) {
return filepath.SkipDir
}
Expand Down
19 changes: 19 additions & 0 deletions lib/forkvm/copy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,25 @@ func TestCopyGuestDirectory(t *testing.T) {
assert.Equal(t, "metadata.json", linkTarget)
}

func TestCopyGuestDirectory_SkipRelPaths(t *testing.T) {
src := filepath.Join(t.TempDir(), "src")
dst := filepath.Join(t.TempDir(), "dst")

require.NoError(t, os.MkdirAll(filepath.Join(src, "snapshots", "snapshot-latest"), 0755))
require.NoError(t, os.WriteFile(filepath.Join(src, "snapshots", "snapshot-latest", "config.json"), []byte(`{}`), 0644))
require.NoError(t, os.WriteFile(filepath.Join(src, "snapshots", "snapshot-latest", "memory"), []byte("the heavy mem-file"), 0644))
require.NoError(t, os.WriteFile(filepath.Join(src, "snapshots", "snapshot-latest", "state"), []byte("device state"), 0644))

err := CopyGuestDirectoryWithOptions(src, dst, CopyOptions{
SkipRelPaths: []string{"snapshots/snapshot-latest/memory"},
})
require.NoError(t, err)

assert.NoFileExists(t, filepath.Join(dst, "snapshots", "snapshot-latest", "memory"))
assert.FileExists(t, filepath.Join(dst, "snapshots", "snapshot-latest", "config.json"))
assert.FileExists(t, filepath.Join(dst, "snapshots", "snapshot-latest", "state"))
}

func TestCopyGuestDirectory_DoesNotSkipTmpSuffixedDirectories(t *testing.T) {
src := filepath.Join(t.TempDir(), "src")
dst := filepath.Join(t.TempDir(), "dst")
Expand Down
10 changes: 10 additions & 0 deletions lib/instances/delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,16 @@ func (m *manager) deleteInstance(
stored := &meta.StoredMetadata
log.DebugContext(ctx, "loaded instance", "instance_id", id, "state", inst.State)

if inst.State == StateTemplate {
forks, err := m.countTemplateForks(id)
if err != nil {
return fmt.Errorf("count forks of template %s: %w", id, err)
}
if forks > 0 {
return fmt.Errorf("%w: cannot delete template %s with %d live fork(s); delete forks first", ErrInvalidState, id, forks)
}
}

target, err := m.cancelAndWaitCompressionJob(ctx, m.snapshotJobKeyForInstance(id))
if err != nil {
return fmt.Errorf("wait for instance compression to stop: %w", err)
Expand Down
65 changes: 54 additions & 11 deletions lib/instances/fork.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ func (m *manager) forkInstance(ctx context.Context, id string, req ForkInstanceR
return nil, "", fmt.Errorf("standby source instance: %w", err)
}

// Running fork is a one-shot clone that restores the source afterward.
// Promotion is now an explicit caller step, so the running flow simply
// doesn't promote — there's no skip flag to thread anymore.
forked, forkErr := m.forkInstanceFromStoppedOrStandby(ctx, id, req, true)
if forkErr == nil {
if err := m.rotateSourceVsockForRestore(ctx, id, forked.Id); err != nil {
Expand Down Expand Up @@ -104,14 +107,14 @@ func (m *manager) forkInstance(ctx context.Context, id string, req ForkInstanceR
return nil, "", forkErr
}
return forked, targetState, nil
case StateStopped, StateStandby:
case StateStopped, StateStandby, StateTemplate:
forked, err := m.forkInstanceFromStoppedOrStandby(ctx, id, req, false)
if err != nil {
return nil, "", err
}
return forked, targetState, nil
default:
return nil, "", fmt.Errorf("%w: cannot fork from state %s (must be Stopped or Standby, or Running with from_running=true)", ErrInvalidState, source.State)
return nil, "", fmt.Errorf("%w: cannot fork from state %s (must be Stopped, Standby, or Template, or Running with from_running=true)", ErrInvalidState, source.State)
}
}

Expand Down Expand Up @@ -205,10 +208,10 @@ func (m *manager) forkInstanceFromStoppedOrStandby(ctx context.Context, id strin
stored := &meta.StoredMetadata

switch source.State {
case StateStopped, StateStandby:
case StateStopped, StateStandby, StateTemplate:
// allowed
default:
return nil, fmt.Errorf("%w: cannot fork from state %s (must be Stopped or Standby)", ErrInvalidState, source.State)
return nil, fmt.Errorf("%w: cannot fork from state %s (must be Stopped, Standby, or Template)", ErrInvalidState, source.State)
}

if !supportValidated {
Expand Down Expand Up @@ -250,19 +253,41 @@ func (m *manager) forkInstanceFromStoppedOrStandby(ctx context.Context, id strin
})
defer cu.Clean()

if source.State == StateStandby {
fromSnapshot := source.State == StateStandby || source.State == StateTemplate

// shareMemFile gates mem-file fan-out from the source's standby snapshot.
// Firecracker only: it mmaps the snapshot mem-file MAP_PRIVATE on restore,
// so all forks safely COW from the same backing file. Cloud-hypervisor and
// other hypervisors take a copy-mode path and don't benefit. Restricted to
// Template sources because they are explicitly promoted as fork-only and
// can never be restored — sharing the mem-file with a non-Template source
// would let a later RestoreInstance mutate the file out from under live
// forks.
shareMemFile := source.State == StateTemplate && stored.HypervisorType == hypervisor.TypeFirecracker

if fromSnapshot {
if err := m.ensureSnapshotMemoryReady(ctx, m.paths.InstanceSnapshotLatest(id), m.snapshotJobKeyForInstance(id), stored.HypervisorType); err != nil {
return nil, fmt.Errorf("prepare standby snapshot for fork: %w", err)
}
}

if err := forkvm.CopyGuestDirectory(srcDir, dstDir); err != nil {
copyOpts := forkvm.CopyOptions{}
if shareMemFile {
copyOpts.SkipRelPaths = []string{templateSharedMemFileRelPath}
}
if err := forkvm.CopyGuestDirectoryWithOptions(srcDir, dstDir, copyOpts); err != nil {
if errors.Is(err, forkvm.ErrSparseCopyUnsupported) {
return nil, fmt.Errorf("fork requires sparse-capable filesystem (SEEK_DATA/SEEK_HOLE unsupported): %w", err)
}
return nil, fmt.Errorf("clone guest directory: %w", err)
}

if shareMemFile {
if err := m.installForkSharedMemFile(dstDir, id); err != nil {
return nil, fmt.Errorf("install shared mem-file: %w", err)
}
}

starter, err := m.getVMStarter(stored.HypervisorType)
if err != nil {
return nil, fmt.Errorf("get vm starter: %w", err)
Expand All @@ -286,17 +311,22 @@ func (m *manager) forkInstanceFromStoppedOrStandby(ctx context.Context, id strin
// phase (Standby for snapshot forks, Stopped for stopped forks) will be
// recorded by the appropriate operation when the fork is acted on.
forkMeta.Phases.Reset()
switch source.State {
case StateStandby:
if fromSnapshot {
forkMeta.Phases.Record(phasetracking.PhaseStandby, now)
case StateStopped:
} else {
forkMeta.Phases.Record(phasetracking.PhaseStopped, now)
}

// Template-only fields don't carry forward to the fork; the fork is a fresh
// instance regardless of whether the parent is a template.
forkMeta.IsTemplate = false
forkMeta.HotPagesPath = ""
forkMeta.ForkOfTemplate = ""

// Keep the original CID for snapshot-based forks.
// Rewriting CID in restored memory snapshots is not reliable across
// hypervisors.
if source.State == StateStandby {
if fromSnapshot {
forkMeta.VsockCID = stored.VsockCID
} else {
forkMeta.VsockCID = generateVsockCID(forkID)
Expand All @@ -309,7 +339,7 @@ func (m *manager) forkInstanceFromStoppedOrStandby(ctx context.Context, id strin
forkMeta.MAC = ""
}

if source.State == StateStandby {
if fromSnapshot {
snapshotConfigPath := m.paths.InstanceSnapshotConfig(forkID)
netCfg := (*hypervisor.ForkNetworkConfig)(nil)
if forkMeta.NetworkEnabled {
Expand All @@ -331,6 +361,15 @@ func (m *manager) forkInstanceFromStoppedOrStandby(ctx context.Context, id strin
}
}

// If the source is already a Template, record the parent linkage so it
// can be counted as a live fork. Live forks are counted at read time by
// scanning ForkOfTemplate across all instances. Plain Standby forks
// don't get this linkage — promotion is an explicit lifecycle step the
// caller must perform via PromoteToTemplate.
if fromSnapshot && stored.IsTemplate {
forkMeta.ForkOfTemplate = stored.Id
}

newMeta := &metadata{StoredMetadata: forkMeta}
if err := m.saveMetadata(newMeta); err != nil {
return nil, fmt.Errorf("save fork metadata: %w", err)
Expand Down Expand Up @@ -384,6 +423,10 @@ func resolveForkTargetState(requested State, sourceState State) (State, error) {
switch sourceState {
case StateRunning, StateStandby, StateStopped:
return sourceState, nil
case StateTemplate:
// Forks of a template are plain Standby instances; the fork itself
// is never a template.
return StateStandby, nil
default:
return "", fmt.Errorf("%w: cannot derive fork target state from source state %s", ErrInvalidState, sourceState)
}
Expand Down
Loading
Loading