|
|
|
from ...parallel import is_module_wrapper
|
|
from ..hooks.hook import HOOKS, Hook
|
|
|
|
|
|
@HOOKS.register_module()
|
|
class EMAHook(Hook):
|
|
r"""Exponential Moving Average Hook.
|
|
|
|
Use Exponential Moving Average on all parameters of model in training
|
|
process. All parameters have a ema backup, which update by the formula
|
|
as below. EMAHook takes priority over EvalHook and CheckpointSaverHook.
|
|
|
|
.. math::
|
|
|
|
\text{Xema\_{t+1}} = (1 - \text{momentum}) \times
|
|
\text{Xema\_{t}} + \text{momentum} \times X_t
|
|
|
|
Args:
|
|
momentum (float): The momentum used for updating ema parameter.
|
|
Defaults to 0.0002.
|
|
interval (int): Update ema parameter every interval iteration.
|
|
Defaults to 1.
|
|
warm_up (int): During first warm_up steps, we may use smaller momentum
|
|
to update ema parameters more slowly. Defaults to 100.
|
|
resume_from (str): The checkpoint path. Defaults to None.
|
|
"""
|
|
|
|
def __init__(self,
|
|
momentum=0.0002,
|
|
interval=1,
|
|
warm_up=100,
|
|
resume_from=None):
|
|
assert isinstance(interval, int) and interval > 0
|
|
self.warm_up = warm_up
|
|
self.interval = interval
|
|
assert momentum > 0 and momentum < 1
|
|
self.momentum = momentum**interval
|
|
self.checkpoint = resume_from
|
|
|
|
def before_run(self, runner):
|
|
"""To resume model with it's ema parameters more friendly.
|
|
|
|
Register ema parameter as ``named_buffer`` to model
|
|
"""
|
|
model = runner.model
|
|
if is_module_wrapper(model):
|
|
model = model.module
|
|
self.param_ema_buffer = {}
|
|
self.model_parameters = dict(model.named_parameters(recurse=True))
|
|
for name, value in self.model_parameters.items():
|
|
|
|
buffer_name = f"ema_{name.replace('.', '_')}"
|
|
self.param_ema_buffer[name] = buffer_name
|
|
model.register_buffer(buffer_name, value.data.clone())
|
|
self.model_buffers = dict(model.named_buffers(recurse=True))
|
|
if self.checkpoint is not None:
|
|
runner.resume(self.checkpoint)
|
|
|
|
def after_train_iter(self, runner):
|
|
"""Update ema parameter every self.interval iterations."""
|
|
curr_step = runner.iter
|
|
|
|
momentum = min(self.momentum,
|
|
(1 + curr_step) / (self.warm_up + curr_step))
|
|
if curr_step % self.interval != 0:
|
|
return
|
|
for name, parameter in self.model_parameters.items():
|
|
buffer_name = self.param_ema_buffer[name]
|
|
buffer_parameter = self.model_buffers[buffer_name]
|
|
buffer_parameter.mul_(1 - momentum).add_(momentum, parameter.data)
|
|
|
|
def after_train_epoch(self, runner):
|
|
"""We load parameter values from ema backup to model before the
|
|
EvalHook."""
|
|
self._swap_ema_parameters()
|
|
|
|
def before_train_epoch(self, runner):
|
|
"""We recover model's parameter from ema backup after last epoch's
|
|
EvalHook."""
|
|
self._swap_ema_parameters()
|
|
|
|
def _swap_ema_parameters(self):
|
|
"""Swap the parameter of model with parameter in ema_buffer."""
|
|
for name, value in self.model_parameters.items():
|
|
temp = value.data.clone()
|
|
ema_buffer = self.model_buffers[self.param_ema_buffer[name]]
|
|
value.data.copy_(ema_buffer.data)
|
|
ema_buffer.data.copy_(temp)
|
|
|