From 1344ffde9e61b3b67e8ab9d6fa1daa86f93f730d Mon Sep 17 00:00:00 2001 From: Hongwei Chen <33092912+hwchen2017@users.noreply.github.com> Date: Sat, 21 Jun 2025 10:20:30 -0700 Subject: [PATCH 1/4] Add file extension (#980) Signed-off-by: Hongwei Chen Signed-off-by: jouw --- deepnvme/model_checkpoint/{README => README.md} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename deepnvme/model_checkpoint/{README => README.md} (100%) diff --git a/deepnvme/model_checkpoint/README b/deepnvme/model_checkpoint/README.md similarity index 100% rename from deepnvme/model_checkpoint/README rename to deepnvme/model_checkpoint/README.md From 4c87e8de4866f99a87dc24d30b3fee6803358180 Mon Sep 17 00:00:00 2001 From: jouw Date: Thu, 26 Jun 2025 18:36:19 +0800 Subject: [PATCH 2/4] fix init weights issue for critic/reward model Signed-off-by: jouw --- applications/DeepSpeed-Chat/dschat/rlhf/ppo_trainer.py | 3 +-- applications/DeepSpeed-Chat/dschat/utils/model/model_utils.py | 4 +++- .../DeepSpeed-Chat/training/step3_rlhf_finetuning/main.py | 3 +-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/applications/DeepSpeed-Chat/dschat/rlhf/ppo_trainer.py b/applications/DeepSpeed-Chat/dschat/rlhf/ppo_trainer.py index 22cba6be0..1801d42f9 100644 --- a/applications/DeepSpeed-Chat/dschat/rlhf/ppo_trainer.py +++ b/applications/DeepSpeed-Chat/dschat/rlhf/ppo_trainer.py @@ -236,8 +236,7 @@ def train_rlhf(self, inputs): value = self.critic_model.forward_value(**batch, return_value_only=True, use_cache=False)[:, :-1] - critic_loss = self.critic_loss_fn(value[:, start:], old_values[:, - start:], + critic_loss = self.critic_loss_fn(value[:, start:], old_values[:, start:], returns, action_mask[:, start:]) self.critic_model.backward(critic_loss) diff --git a/applications/DeepSpeed-Chat/dschat/utils/model/model_utils.py b/applications/DeepSpeed-Chat/dschat/utils/model/model_utils.py index 050819a22..0a37fa299 100644 --- a/applications/DeepSpeed-Chat/dschat/utils/model/model_utils.py +++ b/applications/DeepSpeed-Chat/dschat/utils/model/model_utils.py @@ -11,6 +11,7 @@ ) from huggingface_hub import snapshot_download from transformers.integrations.deepspeed import HfDeepSpeedConfig +from transformers.modeling_utils import no_init_weights from dschat.utils.model.reward_model import RewardModel from dschat.utils.utils import load_state_dict_into_model, print_rank_0 @@ -99,7 +100,8 @@ def create_hf_model(model_class, dschf = None if rlhf_training: # the weight loading is handled by create critic model - model = model_class.from_config(model_config) + with no_init_weights(): + model = model_class.from_config(model_config) else: model = model_class.from_pretrained( model_name_or_path, diff --git a/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/main.py b/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/main.py index 1378dc4e6..a6647d92b 100644 --- a/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/main.py +++ b/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/main.py @@ -594,8 +594,7 @@ def main(): "-------------------------------------------------------------------------------------", args.global_rank) - if args.enable_tensorboard and torch.distributed.get_rank( - ) == 0: + if args.enable_tensorboard and torch.distributed.get_rank() == 0: writer.add_scalar('reward', average_reward / inner_iter, global_step=step) From 9a7062bb14576b81f3a836b00fc9b88c76780e49 Mon Sep 17 00:00:00 2001 From: raviguptaamd Date: Thu, 3 Jul 2025 22:24:42 -0700 Subject: [PATCH 3/4] Update submodule link to reflect https style (#981) Signed-off-by: raviguptaamd Signed-off-by: jouw --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 1e192cfb3..5190f4b46 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "training/DeepSpeed-Domino/Megatron-LM"] path = training/DeepSpeed-Domino/Megatron-LM - url = git@github.com:NVIDIA/Megatron-LM.git + url = https://github.com/NVIDIA/Megatron-LM.git From 8000b6fcd1f7365a23bb73583b594b788346ed44 Mon Sep 17 00:00:00 2001 From: jouw Date: Mon, 7 Jul 2025 10:46:41 +0800 Subject: [PATCH 4/4] fix formatting issue Signed-off-by: jouw --- applications/DeepSpeed-Chat/dschat/rlhf/ppo_trainer.py | 3 ++- .../DeepSpeed-Chat/training/step3_rlhf_finetuning/main.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/applications/DeepSpeed-Chat/dschat/rlhf/ppo_trainer.py b/applications/DeepSpeed-Chat/dschat/rlhf/ppo_trainer.py index 1801d42f9..22cba6be0 100644 --- a/applications/DeepSpeed-Chat/dschat/rlhf/ppo_trainer.py +++ b/applications/DeepSpeed-Chat/dschat/rlhf/ppo_trainer.py @@ -236,7 +236,8 @@ def train_rlhf(self, inputs): value = self.critic_model.forward_value(**batch, return_value_only=True, use_cache=False)[:, :-1] - critic_loss = self.critic_loss_fn(value[:, start:], old_values[:, start:], + critic_loss = self.critic_loss_fn(value[:, start:], old_values[:, + start:], returns, action_mask[:, start:]) self.critic_model.backward(critic_loss) diff --git a/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/main.py b/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/main.py index a6647d92b..1378dc4e6 100644 --- a/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/main.py +++ b/applications/DeepSpeed-Chat/training/step3_rlhf_finetuning/main.py @@ -594,7 +594,8 @@ def main(): "-------------------------------------------------------------------------------------", args.global_rank) - if args.enable_tensorboard and torch.distributed.get_rank() == 0: + if args.enable_tensorboard and torch.distributed.get_rank( + ) == 0: writer.add_scalar('reward', average_reward / inner_iter, global_step=step)