🔥 [Remove] & fix typo of momentum schedule

henrytsui000 · henrytsui000 · commit b4dad5e21a8f · 2024-11-24T01:25:24.000+08:00
diff --git a/yolo/config/config.py b/yolo/config/config.py
@@ -66,6 +66,7 @@ class DataConfig:
 class OptimizerArgs:
     lr: float
     weight_decay: float
+    momentum: float
 
 
 @dataclass
diff --git a/yolo/utils/model_utils.py b/yolo/utils/model_utils.py
@@ -8,7 +8,6 @@
 import torch.distributed as dist
 from lightning import LightningModule, Trainer
 from lightning.pytorch.callbacks import Callback
-from lightning.pytorch.utilities import rank_zero_only
 from omegaconf import ListConfig
 from torch import Tensor, no_grad
 from torch.optim import Optimizer
@@ -77,9 +76,9 @@ def create_optimizer(model: YOLO, optim_cfg: OptimizerConfig) -> Optimizer:
     conv_params = [p for name, p in model.named_parameters() if "weight" in name and "bn" not in name]
 
     model_parameters = [
-        {"params": bias_params, "momentum": 0.8, "weight_decay": 0},
-        {"params": conv_params, "momentum": 0.8},
-        {"params": norm_params, "momentum": 0.8, "weight_decay": 0},
+        {"params": bias_params, "momentum": 0.937, "weight_decay": 0},
+        {"params": conv_params, "momentum": 0.937},
+        {"params": norm_params, "momentum": 0.937, "weight_decay": 0},
     ]
 
     def next_epoch(self, batch_num, epoch_idx):
@@ -89,8 +88,8 @@ def next_epoch(self, batch_num, epoch_idx):
         #       0.937: Start Momentum
         #       0.8  : Normal Momemtum
         #       3    : The warm up epoch num
-        self.min_mom = lerp(0.937, 0.8, max(epoch_idx, 3), 3)
-        self.max_mom = lerp(0.937, 0.8, max(epoch_idx + 1, 3), 3)
+        self.min_mom = lerp(0.937, 0.8, min(epoch_idx, 3), 3)
+        self.max_mom = lerp(0.937, 0.8, min(epoch_idx + 1, 3), 3)
         self.batch_num = batch_num
         self.batch_idx = 0
 
@@ -100,7 +99,7 @@ def next_batch(self):
         for lr_idx, param_group in enumerate(self.param_groups):
             min_lr, max_lr = self.min_lr[lr_idx], self.max_lr[lr_idx]
             param_group["lr"] = lerp(min_lr, max_lr, self.batch_idx, self.batch_num)
-            param_group["momentum"] = lerp(self.min_mom, self.max_mom, self.batch_idx, self.batch_num)
+            # param_group["momentum"] = lerp(self.min_mom, self.max_mom, self.batch_idx, self.batch_num)
             lr_dict[f"LR/{lr_idx}"] = param_group["lr"]
         return lr_dict
 
@@ -125,7 +124,7 @@ def create_scheduler(optimizer: Optimizer, schedule_cfg: SchedulerConfig) -> _LR
         lambda1 = lambda epoch: (epoch + 1) / wepoch if epoch < wepoch else 1
         lambda2 = lambda epoch: 10 - 9 * ((epoch + 1) / wepoch) if epoch < wepoch else 1
         warmup_schedule = LambdaLR(optimizer, lr_lambda=[lambda2, lambda1, lambda1])
-        schedule = SequentialLR(optimizer, schedulers=[warmup_schedule, schedule], milestones=[2])
+        schedule = SequentialLR(optimizer, schedulers=[warmup_schedule, schedule], milestones=[wepoch - 1])
     return schedule