Skip to content

Commit

Permalink
fix planner
Browse files Browse the repository at this point in the history
  • Loading branch information
mvpatel2000 committed Aug 9, 2024
1 parent fac2593 commit 9d90cc9
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions composer/utils/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -1163,11 +1163,16 @@ def _save_checkpoint(
if expect_file:
if version.parse(torch.__version__) >= version.parse('2.3.0'):
save_planner = state.fsdp_config.save_planner
if version.parse(torch.__version__) < version.parse('2.4.0') and save_planner is None:
# Dedup is only broken on <2.4
from composer.trainer._patch_pytorch import SavePlannerWithDedupFix
if save_planner is None:
if version.parse(torch.__version__) < version.parse('2.4.0'):
# Dedup is only broken on <2.4
from composer.trainer._patch_pytorch import SavePlannerWithDedupFix

save_planner = SavePlannerWithDedupFix()
save_planner = SavePlannerWithDedupFix()
else:
from torch.distributed.checkpoint.default_planner import DefaultSavePlanner

save_planner = DefaultSavePlanner(dedup_save_to_lowest_rank=True)
dist_cp.save(
state_dict=state_dict,
storage_writer=dist_cp.FileSystemWriter(dirname),
Expand Down

0 comments on commit 9d90cc9

Please sign in to comment.