From 052a8e14b51161a5b7f7bc723ae2fa0144d76fce Mon Sep 17 00:00:00 2001 From: Doxie Date: Tue, 16 Jul 2024 22:08:15 +0800 Subject: [PATCH] fix ppov2_trainer tensorboard log bugs (#1836) --- trl/trainer/ppov2_trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/trl/trainer/ppov2_trainer.py b/trl/trainer/ppov2_trainer.py index 2352792335..806e135556 100644 --- a/trl/trainer/ppov2_trainer.py +++ b/trl/trainer/ppov2_trainer.py @@ -498,6 +498,7 @@ def repeat_generator(): metrics["lr"] = self.lr_scheduler.get_last_lr()[0] metrics["episode"] = global_step self.state.epoch = global_step / self.train_dataset_len # used by self.log + self.state.global_step += 1 self.log(metrics) del kl, mean_kl, mean_entropy, mean_non_score_reward, scores, metrics, non_score_reward torch.cuda.empty_cache()