[minor] add a checking around local_state_dict (#1040)

min-xu-ai · flying-x · web-flow · commit b0c3fe1ec6ae · 2022-07-25T20:31:48.000-07:00
Co-authored-by: Min Xu &lt;min.xu.public@gmail.com&gt;
diff --git a/fairscale/nn/data_parallel/fully_sharded_data_parallel.py b/fairscale/nn/data_parallel/fully_sharded_data_parallel.py
@@ -962,6 +962,11 @@ def local_state_dict(self, *args: Any, **kwargs: Any) -> Any:
         so the resulting state_dict can only be loaded after the Module has been
         wrapped with FSDP.
         """
+        # Check state, specifically, we shouldn't be in SUMMON_FULL_PARAMS since
+        # that will produce full state, not sharded state.
+        self.assert_state(
+            [TrainingState.IDLE, TrainingState.FORWARD, TrainingState.BACKWARD_PRE, TrainingState.BACKWARD_POST]
+        )
         with contextlib.ExitStack() as stack:
             # Tell any nested FSDP instances not to auto summon full params.
             for module in self.modules():  # includes self
@@ -1025,6 +1030,11 @@ def load_local_state_dict(
         self, state_dict: Union[Dict[str, torch.Tensor], "OrderedDict[str, torch.Tensor]"], strict: bool = True
     ) -> NamedTuple:
         """Load a local (sharded) state_dict."""
+        # Check state, specifically, we shouldn't be in SUMMON_FULL_PARAMS since
+        # that will load full state, not sharded state.
+        self.assert_state(
+            [TrainingState.IDLE, TrainingState.FORWARD, TrainingState.BACKWARD_PRE, TrainingState.BACKWARD_POST]
+        )
         with contextlib.ExitStack() as stack:
             # Tell any nested FSDP instances not to auto summon full params.
             for module in self.modules():  # includes self