🔨 [Add] Warm up in training proccess
Browse files- yolo/config/config.py +1 -0
- yolo/config/hyper/default.yaml +11 -7
- yolo/tools/model_helper.py +23 -5
- yolo/tools/trainer.py +3 -4
yolo/config/config.py
CHANGED
@@ -53,6 +53,7 @@ class SchedulerArgs:
|
|
53 |
class SchedulerConfig:
|
54 |
type: str
|
55 |
args: SchedulerArgs
|
|
|
56 |
|
57 |
|
58 |
@dataclass
|
|
|
53 |
class SchedulerConfig:
|
54 |
type: str
|
55 |
args: SchedulerArgs
|
56 |
+
warmup: Dict[str, Union[str, int, float]]
|
57 |
|
58 |
|
59 |
@dataclass
|
yolo/config/hyper/default.yaml
CHANGED
@@ -15,12 +15,13 @@ data:
|
|
15 |
shuffle: True
|
16 |
pin_memory: True
|
17 |
train:
|
18 |
-
epoch:
|
19 |
optimizer:
|
20 |
-
type:
|
21 |
args:
|
22 |
-
lr: 0.
|
23 |
-
weight_decay: 0.
|
|
|
24 |
loss:
|
25 |
objective:
|
26 |
BCELoss: 0.5
|
@@ -35,10 +36,13 @@ train:
|
|
35 |
iou: 6.0
|
36 |
cls: 0.5
|
37 |
scheduler:
|
38 |
-
type:
|
|
|
|
|
39 |
args:
|
40 |
-
|
41 |
-
|
|
|
42 |
ema:
|
43 |
enabled: true
|
44 |
decay: 0.995
|
|
|
15 |
shuffle: True
|
16 |
pin_memory: True
|
17 |
train:
|
18 |
+
epoch: 500
|
19 |
optimizer:
|
20 |
+
type: SGD
|
21 |
args:
|
22 |
+
lr: 0.01
|
23 |
+
weight_decay: 0.0005
|
24 |
+
momentum: 0.937
|
25 |
loss:
|
26 |
objective:
|
27 |
BCELoss: 0.5
|
|
|
36 |
iou: 6.0
|
37 |
cls: 0.5
|
38 |
scheduler:
|
39 |
+
type: LinearLR
|
40 |
+
warmup:
|
41 |
+
epochs: 3.0
|
42 |
args:
|
43 |
+
total_iters: ${hyper.train.epoch}
|
44 |
+
start_factor: 1
|
45 |
+
end_factor: 0.01
|
46 |
ema:
|
47 |
enabled: true
|
48 |
decay: 0.995
|
yolo/tools/model_helper.py
CHANGED
@@ -2,9 +2,10 @@ from typing import Any, Dict, Type
|
|
2 |
|
3 |
import torch
|
4 |
from torch.optim import Optimizer
|
5 |
-
from torch.optim.lr_scheduler import _LRScheduler
|
6 |
|
7 |
from yolo.config.config import OptimizerConfig, SchedulerConfig
|
|
|
8 |
|
9 |
|
10 |
class EMA:
|
@@ -31,21 +32,38 @@ class EMA:
|
|
31 |
self.shadow[name].copy_(param.data)
|
32 |
|
33 |
|
34 |
-
def get_optimizer(
|
35 |
"""Create an optimizer for the given model parameters based on the configuration.
|
36 |
|
37 |
Returns:
|
38 |
An instance of the optimizer configured according to the provided settings.
|
39 |
"""
|
40 |
optimizer_class: Type[Optimizer] = getattr(torch.optim, optim_cfg.type)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
return optimizer_class(model_parameters, **optim_cfg.args)
|
42 |
|
43 |
|
44 |
-
def get_scheduler(optimizer: Optimizer,
|
45 |
"""Create a learning rate scheduler for the given optimizer based on the configuration.
|
46 |
|
47 |
Returns:
|
48 |
An instance of the scheduler configured according to the provided settings.
|
49 |
"""
|
50 |
-
scheduler_class: Type[_LRScheduler] = getattr(torch.optim.lr_scheduler,
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
import torch
|
4 |
from torch.optim import Optimizer
|
5 |
+
from torch.optim.lr_scheduler import LambdaLR, SequentialLR, _LRScheduler
|
6 |
|
7 |
from yolo.config.config import OptimizerConfig, SchedulerConfig
|
8 |
+
from yolo.model.yolo import YOLO
|
9 |
|
10 |
|
11 |
class EMA:
|
|
|
32 |
self.shadow[name].copy_(param.data)
|
33 |
|
34 |
|
35 |
+
def get_optimizer(model: YOLO, optim_cfg: OptimizerConfig) -> Optimizer:
|
36 |
"""Create an optimizer for the given model parameters based on the configuration.
|
37 |
|
38 |
Returns:
|
39 |
An instance of the optimizer configured according to the provided settings.
|
40 |
"""
|
41 |
optimizer_class: Type[Optimizer] = getattr(torch.optim, optim_cfg.type)
|
42 |
+
|
43 |
+
bias_params = [p for name, p in model.named_parameters() if "bias" in name]
|
44 |
+
norm_params = [p for name, p in model.named_parameters() if "weight" in name and "bn" in name]
|
45 |
+
conv_params = [p for name, p in model.named_parameters() if "weight" in name and "bn" not in name]
|
46 |
+
|
47 |
+
model_parameters = [
|
48 |
+
{"params": bias_params, "nestrov": True, "momentum": 0.937},
|
49 |
+
{"params": conv_params, "weight_decay": 0.0},
|
50 |
+
{"params": norm_params, "weight_decay": 1e-5},
|
51 |
+
]
|
52 |
return optimizer_class(model_parameters, **optim_cfg.args)
|
53 |
|
54 |
|
55 |
+
def get_scheduler(optimizer: Optimizer, schedule_cfg: SchedulerConfig) -> _LRScheduler:
|
56 |
"""Create a learning rate scheduler for the given optimizer based on the configuration.
|
57 |
|
58 |
Returns:
|
59 |
An instance of the scheduler configured according to the provided settings.
|
60 |
"""
|
61 |
+
scheduler_class: Type[_LRScheduler] = getattr(torch.optim.lr_scheduler, schedule_cfg.type)
|
62 |
+
schedule = scheduler_class(optimizer, **schedule_cfg.args)
|
63 |
+
if hasattr(schedule_cfg, "warmup"):
|
64 |
+
wepoch = schedule_cfg.warmup.epochs
|
65 |
+
lambda1 = lambda epoch: 0.1 + 0.9 * (epoch + 1 / wepoch) if epoch < wepoch else 1
|
66 |
+
lambda2 = lambda epoch: 10 - 9 * (epoch + 1 / wepoch) if epoch < wepoch else 1
|
67 |
+
warmup_schedule = LambdaLR(optimizer, lr_lambda=[lambda1, lambda2, lambda1])
|
68 |
+
schedule = SequentialLR(optimizer, schedulers=[warmup_schedule, schedule], milestones=[2])
|
69 |
+
return schedule
|
yolo/tools/trainer.py
CHANGED
@@ -19,7 +19,7 @@ class Trainer:
|
|
19 |
|
20 |
self.model = model.to(device)
|
21 |
self.device = device
|
22 |
-
self.optimizer = get_optimizer(model
|
23 |
self.scheduler = get_scheduler(self.optimizer, train_cfg.scheduler)
|
24 |
self.loss_fn = get_loss_function(cfg)
|
25 |
self.progress = CustomProgress(cfg, save_path, use_wandb=True)
|
@@ -47,7 +47,6 @@ class Trainer:
|
|
47 |
def train_one_epoch(self, dataloader):
|
48 |
self.model.train()
|
49 |
total_loss = 0
|
50 |
-
self.progress.start_batch(len(dataloader))
|
51 |
|
52 |
for data, targets in dataloader:
|
53 |
loss, loss_each = self.train_one_batch(data, targets)
|
@@ -58,7 +57,6 @@ class Trainer:
|
|
58 |
if self.scheduler:
|
59 |
self.scheduler.step()
|
60 |
|
61 |
-
self.progress.finish_batch()
|
62 |
return total_loss / len(dataloader)
|
63 |
|
64 |
def save_checkpoint(self, epoch: int, filename="checkpoint.pt"):
|
@@ -80,8 +78,9 @@ class Trainer:
|
|
80 |
self.progress.start_train(num_epochs)
|
81 |
for epoch in range(num_epochs):
|
82 |
|
|
|
83 |
epoch_loss = self.train_one_epoch(dataloader)
|
84 |
-
self.progress.
|
85 |
|
86 |
logger.info(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")
|
87 |
if (epoch + 1) % 5 == 0:
|
|
|
19 |
|
20 |
self.model = model.to(device)
|
21 |
self.device = device
|
22 |
+
self.optimizer = get_optimizer(model, train_cfg.optimizer)
|
23 |
self.scheduler = get_scheduler(self.optimizer, train_cfg.scheduler)
|
24 |
self.loss_fn = get_loss_function(cfg)
|
25 |
self.progress = CustomProgress(cfg, save_path, use_wandb=True)
|
|
|
47 |
def train_one_epoch(self, dataloader):
|
48 |
self.model.train()
|
49 |
total_loss = 0
|
|
|
50 |
|
51 |
for data, targets in dataloader:
|
52 |
loss, loss_each = self.train_one_batch(data, targets)
|
|
|
57 |
if self.scheduler:
|
58 |
self.scheduler.step()
|
59 |
|
|
|
60 |
return total_loss / len(dataloader)
|
61 |
|
62 |
def save_checkpoint(self, epoch: int, filename="checkpoint.pt"):
|
|
|
78 |
self.progress.start_train(num_epochs)
|
79 |
for epoch in range(num_epochs):
|
80 |
|
81 |
+
self.progress.start_one_epoch(len(dataloader), self.optimizer, epoch)
|
82 |
epoch_loss = self.train_one_epoch(dataloader)
|
83 |
+
self.progress.finish_one_epoch()
|
84 |
|
85 |
logger.info(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")
|
86 |
if (epoch + 1) % 5 == 0:
|