henry000 commited on
Commit
3e08dd8
·
1 Parent(s): 16c6705

🔨 [Add] Warm up in training proccess

Browse files
yolo/config/config.py CHANGED
@@ -53,6 +53,7 @@ class SchedulerArgs:
53
  class SchedulerConfig:
54
  type: str
55
  args: SchedulerArgs
 
56
 
57
 
58
  @dataclass
 
53
  class SchedulerConfig:
54
  type: str
55
  args: SchedulerArgs
56
+ warmup: Dict[str, Union[str, int, float]]
57
 
58
 
59
  @dataclass
yolo/config/hyper/default.yaml CHANGED
@@ -15,12 +15,13 @@ data:
15
  shuffle: True
16
  pin_memory: True
17
  train:
18
- epoch: 10
19
  optimizer:
20
- type: Adam
21
  args:
22
- lr: 0.001
23
- weight_decay: 0.0001
 
24
  loss:
25
  objective:
26
  BCELoss: 0.5
@@ -35,10 +36,13 @@ train:
35
  iou: 6.0
36
  cls: 0.5
37
  scheduler:
38
- type: StepLR
 
 
39
  args:
40
- step_size: 10
41
- gamma: 0.1
 
42
  ema:
43
  enabled: true
44
  decay: 0.995
 
15
  shuffle: True
16
  pin_memory: True
17
  train:
18
+ epoch: 500
19
  optimizer:
20
+ type: SGD
21
  args:
22
+ lr: 0.01
23
+ weight_decay: 0.0005
24
+ momentum: 0.937
25
  loss:
26
  objective:
27
  BCELoss: 0.5
 
36
  iou: 6.0
37
  cls: 0.5
38
  scheduler:
39
+ type: LinearLR
40
+ warmup:
41
+ epochs: 3.0
42
  args:
43
+ total_iters: ${hyper.train.epoch}
44
+ start_factor: 1
45
+ end_factor: 0.01
46
  ema:
47
  enabled: true
48
  decay: 0.995
yolo/tools/model_helper.py CHANGED
@@ -2,9 +2,10 @@ from typing import Any, Dict, Type
2
 
3
  import torch
4
  from torch.optim import Optimizer
5
- from torch.optim.lr_scheduler import _LRScheduler
6
 
7
  from yolo.config.config import OptimizerConfig, SchedulerConfig
 
8
 
9
 
10
  class EMA:
@@ -31,21 +32,38 @@ class EMA:
31
  self.shadow[name].copy_(param.data)
32
 
33
 
34
- def get_optimizer(model_parameters, optim_cfg: OptimizerConfig) -> Optimizer:
35
  """Create an optimizer for the given model parameters based on the configuration.
36
 
37
  Returns:
38
  An instance of the optimizer configured according to the provided settings.
39
  """
40
  optimizer_class: Type[Optimizer] = getattr(torch.optim, optim_cfg.type)
 
 
 
 
 
 
 
 
 
 
41
  return optimizer_class(model_parameters, **optim_cfg.args)
42
 
43
 
44
- def get_scheduler(optimizer: Optimizer, schedul_cfg: SchedulerConfig) -> _LRScheduler:
45
  """Create a learning rate scheduler for the given optimizer based on the configuration.
46
 
47
  Returns:
48
  An instance of the scheduler configured according to the provided settings.
49
  """
50
- scheduler_class: Type[_LRScheduler] = getattr(torch.optim.lr_scheduler, schedul_cfg.type)
51
- return scheduler_class(optimizer, **schedul_cfg.args)
 
 
 
 
 
 
 
 
2
 
3
  import torch
4
  from torch.optim import Optimizer
5
+ from torch.optim.lr_scheduler import LambdaLR, SequentialLR, _LRScheduler
6
 
7
  from yolo.config.config import OptimizerConfig, SchedulerConfig
8
+ from yolo.model.yolo import YOLO
9
 
10
 
11
  class EMA:
 
32
  self.shadow[name].copy_(param.data)
33
 
34
 
35
+ def get_optimizer(model: YOLO, optim_cfg: OptimizerConfig) -> Optimizer:
36
  """Create an optimizer for the given model parameters based on the configuration.
37
 
38
  Returns:
39
  An instance of the optimizer configured according to the provided settings.
40
  """
41
  optimizer_class: Type[Optimizer] = getattr(torch.optim, optim_cfg.type)
42
+
43
+ bias_params = [p for name, p in model.named_parameters() if "bias" in name]
44
+ norm_params = [p for name, p in model.named_parameters() if "weight" in name and "bn" in name]
45
+ conv_params = [p for name, p in model.named_parameters() if "weight" in name and "bn" not in name]
46
+
47
+ model_parameters = [
48
+ {"params": bias_params, "nestrov": True, "momentum": 0.937},
49
+ {"params": conv_params, "weight_decay": 0.0},
50
+ {"params": norm_params, "weight_decay": 1e-5},
51
+ ]
52
  return optimizer_class(model_parameters, **optim_cfg.args)
53
 
54
 
55
+ def get_scheduler(optimizer: Optimizer, schedule_cfg: SchedulerConfig) -> _LRScheduler:
56
  """Create a learning rate scheduler for the given optimizer based on the configuration.
57
 
58
  Returns:
59
  An instance of the scheduler configured according to the provided settings.
60
  """
61
+ scheduler_class: Type[_LRScheduler] = getattr(torch.optim.lr_scheduler, schedule_cfg.type)
62
+ schedule = scheduler_class(optimizer, **schedule_cfg.args)
63
+ if hasattr(schedule_cfg, "warmup"):
64
+ wepoch = schedule_cfg.warmup.epochs
65
+ lambda1 = lambda epoch: 0.1 + 0.9 * (epoch + 1 / wepoch) if epoch < wepoch else 1
66
+ lambda2 = lambda epoch: 10 - 9 * (epoch + 1 / wepoch) if epoch < wepoch else 1
67
+ warmup_schedule = LambdaLR(optimizer, lr_lambda=[lambda1, lambda2, lambda1])
68
+ schedule = SequentialLR(optimizer, schedulers=[warmup_schedule, schedule], milestones=[2])
69
+ return schedule
yolo/tools/trainer.py CHANGED
@@ -19,7 +19,7 @@ class Trainer:
19
 
20
  self.model = model.to(device)
21
  self.device = device
22
- self.optimizer = get_optimizer(model.parameters(), train_cfg.optimizer)
23
  self.scheduler = get_scheduler(self.optimizer, train_cfg.scheduler)
24
  self.loss_fn = get_loss_function(cfg)
25
  self.progress = CustomProgress(cfg, save_path, use_wandb=True)
@@ -47,7 +47,6 @@ class Trainer:
47
  def train_one_epoch(self, dataloader):
48
  self.model.train()
49
  total_loss = 0
50
- self.progress.start_batch(len(dataloader))
51
 
52
  for data, targets in dataloader:
53
  loss, loss_each = self.train_one_batch(data, targets)
@@ -58,7 +57,6 @@ class Trainer:
58
  if self.scheduler:
59
  self.scheduler.step()
60
 
61
- self.progress.finish_batch()
62
  return total_loss / len(dataloader)
63
 
64
  def save_checkpoint(self, epoch: int, filename="checkpoint.pt"):
@@ -80,8 +78,9 @@ class Trainer:
80
  self.progress.start_train(num_epochs)
81
  for epoch in range(num_epochs):
82
 
 
83
  epoch_loss = self.train_one_epoch(dataloader)
84
- self.progress.one_epoch()
85
 
86
  logger.info(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")
87
  if (epoch + 1) % 5 == 0:
 
19
 
20
  self.model = model.to(device)
21
  self.device = device
22
+ self.optimizer = get_optimizer(model, train_cfg.optimizer)
23
  self.scheduler = get_scheduler(self.optimizer, train_cfg.scheduler)
24
  self.loss_fn = get_loss_function(cfg)
25
  self.progress = CustomProgress(cfg, save_path, use_wandb=True)
 
47
  def train_one_epoch(self, dataloader):
48
  self.model.train()
49
  total_loss = 0
 
50
 
51
  for data, targets in dataloader:
52
  loss, loss_each = self.train_one_batch(data, targets)
 
57
  if self.scheduler:
58
  self.scheduler.step()
59
 
 
60
  return total_loss / len(dataloader)
61
 
62
  def save_checkpoint(self, epoch: int, filename="checkpoint.pt"):
 
78
  self.progress.start_train(num_epochs)
79
  for epoch in range(num_epochs):
80
 
81
+ self.progress.start_one_epoch(len(dataloader), self.optimizer, epoch)
82
  epoch_loss = self.train_one_epoch(dataloader)
83
+ self.progress.finish_one_epoch()
84
 
85
  logger.info(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")
86
  if (epoch + 1) % 5 == 0: