From a8dc62475916019e81a61334164c5c01902045e2 Mon Sep 17 00:00:00 2001 From: Colin Weld <54096193+cweld510@users.noreply.github.com> Date: Mon, 30 Dec 2024 10:54:11 -0800 Subject: [PATCH] Add flag to disable container spec validation when restoring checkpoints This PR adds a new flag entitled `skip-restore-spec-validation-unsafe`, defaulting to false. If the flag is set, the container spec given when restoring a checkpoint will no longer be validated against the original container spec given when the checkpoint was taken. In practice, many spec differences are benign, and it can be useful to allow the container specs to vary somewhat between checkpoint and restore. See https://github.com/google/gvisor/issues/11307 . FUTURE_COPYBARA_INTEGRATE_REVIEW=https://github.com/google/gvisor/pull/11323 from cweld510:cweld/skip-spec-validation-unsafe 17b2c1b18bba24299165cf8a1e466c2ef20643e0 PiperOrigin-RevId: 710752422 --- runsc/boot/restore.go | 10 ++++++---- runsc/config/config.go | 4 ++++ runsc/config/flags.go | 1 + 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/runsc/boot/restore.go b/runsc/boot/restore.go index c9be9a5744..1a40804c05 100644 --- a/runsc/boot/restore.go +++ b/runsc/boot/restore.go @@ -132,7 +132,7 @@ func (r *restorer) restoreContainerInfo(l *Loader, info *containerInfo) error { if len(r.containers) == r.totalContainers { // Trigger the restore if this is the last container. - return r.restore(l) + return r.restore(l, info.conf.UnsafeSkipRestoreSpecValidation) } return nil } @@ -544,7 +544,7 @@ func validateSpecs(oldSpecs, newSpecs map[string]*specs.Spec) error { return nil } -func (r *restorer) restore(l *Loader) error { +func (r *restorer) restore(l *Loader, unsafeSkipRestoreSpecValidation bool) error { log.Infof("Starting to restore %d containers", len(r.containers)) // Create a new root network namespace with the network stack of the @@ -650,8 +650,10 @@ func (r *restorer) restore(l *Loader) error { if err != nil { return fmt.Errorf("failed to pop container specs from checkpoint: %w", err) } - if err := validateSpecs(oldSpecs, l.containerSpecs); err != nil { - return fmt.Errorf("failed to validate restore spec: %w", err) + if !unsafeSkipRestoreSpecValidation { + if err := validateSpecs(oldSpecs, l.containerSpecs); err != nil { + return fmt.Errorf("failed to validate restore spec: %w", err) + } } // Since we have a new kernel we also must make a new watchdog. diff --git a/runsc/config/config.go b/runsc/config/config.go index 3783c6c5f4..5dfabc208b 100644 --- a/runsc/config/config.go +++ b/runsc/config/config.go @@ -384,6 +384,10 @@ type Config struct { // TestOnlySaveRestoreNetstack indicates netstack should be saved and restored. TestOnlySaveRestoreNetstack bool `flag:"TESTONLY-save-restore-netstack"` + + // UnsafeSkipRestoreSpecValidation optionally skips validation of the container spec for restored + // containers. + UnsafeSkipRestoreSpecValidation bool `flag:"unsafe-skip-restore-spec-validation"` } func (c *Config) validate() error { diff --git a/runsc/config/flags.go b/runsc/config/flags.go index 328a313130..b8fce2236b 100644 --- a/runsc/config/flags.go +++ b/runsc/config/flags.go @@ -106,6 +106,7 @@ func RegisterFlags(flagSet *flag.FlagSet) { flagSet.Bool("enable-core-tags", false, "enables core tagging. Requires host linux kernel >= 5.14.") flagSet.String("pod-init-config", "", "path to configuration file with additional steps to take during pod creation.") flagSet.Var(HostSettingsCheck.Ptr(), "host-settings", "how to handle non-optimal host kernel settings: check (default, advisory-only), ignore (do not check), adjust (best-effort auto-adjustment), or enforce (auto-adjustment must succeed).") + flagSet.Bool("unsafe-skip-restore-spec-validation", false, "Enables skipping validation of the restore-time container spec when restoring checkpoints.") // Flags that control sandbox runtime behavior: MM related. flagSet.Bool("app-huge-pages", true, "enable use of huge pages for application memory; requires /sys/kernel/mm/transparent_hugepage/shmem_enabled = advise")