Skip to content

Commit ef6315f

Browse files
authored
Removed extraneous usage of fsdp_config.load_monolith_rank0_only since that's unreliable (#3901)
1 parent 0e59217 commit ef6315f

File tree

2 files changed

+0
-9
lines changed

2 files changed

+0
-9
lines changed

composer/distributed/shared_utils.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,9 +181,6 @@ def update_sync_module_states_if_needed(model: nn.Module, fsdp_config: FSDP2Conf
181181
dist.all_reduce(any_ranks_meta, reduce_operation='MAX')
182182
requires_sync = all_ranks_meta.item() == 0 and any_ranks_meta.item() == 1
183183

184-
if fsdp_config.load_monolith_rank0_only:
185-
fsdp_config.sync_module_states = True
186-
187184
if not fsdp_config.sync_module_states and requires_sync:
188185
fsdp_config.sync_module_states = True
189186

composer/trainer/trainer.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1701,12 +1701,6 @@ def __init__(
17011701
log.info('No previous autoresume checkpoint found')
17021702
# Actually load the checkpoint from potentially updated arguments
17031703
if load_path is not None:
1704-
# If we are using FSDP and load_monolith_rank0_only is True, then the state_dict must be `full`
1705-
# when we are loading a checkpoint
1706-
if self.state.fsdp_config and self.state.fsdp_config.load_monolith_rank0_only: # type: ignore
1707-
err_msg = 'state_dict_type must be `full` when load_monolith_rank0_only is True when loading a checkpoint'
1708-
assert self.state.fsdp_config.state_dict_type == 'full', err_msg # type: ignore
1709-
17101704
log.info(f'Loading checkpoint from {load_path}')
17111705
if load_object_store is None:
17121706
load_object_store = maybe_create_object_store_from_uri(load_path)

0 commit comments

Comments
 (0)