File size: 2,232 Bytes
ad16788 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
from distutils.version import LooseVersion
from typing import Union
import warnings
import torch
from torch.optim.lr_scheduler import _LRScheduler
from typeguard import check_argument_types
from espnet2.schedulers.abs_scheduler import AbsBatchStepScheduler
class NoamLR(_LRScheduler, AbsBatchStepScheduler):
"""The LR scheduler proposed by Noam
Ref:
"Attention Is All You Need", https://arxiv.org/pdf/1706.03762.pdf
FIXME(kamo): PyTorch doesn't provide _LRScheduler as public class,
thus the behaviour isn't guaranteed at forward PyTorch version.
NOTE(kamo): The "model_size" in original implementation is derived from
the model, but in this implementation, this parameter is a constant value.
You need to change it if the model is changed.
"""
def __init__(
self,
optimizer: torch.optim.Optimizer,
model_size: Union[int, float] = 320,
warmup_steps: Union[int, float] = 25000,
last_epoch: int = -1,
):
if LooseVersion(torch.__version__) < LooseVersion("1.1.0"):
raise NotImplementedError(f"Require PyTorch>=1.1.0: {torch.__version__}")
assert check_argument_types()
self.model_size = model_size
self.warmup_steps = warmup_steps
lr = list(optimizer.param_groups)[0]["lr"]
new_lr = self.lr_for_WarmupLR(lr)
warnings.warn(
f"NoamLR is deprecated. "
f"Use WarmupLR(warmup_steps={warmup_steps}) with Optimizer(lr={new_lr})",
)
# __init__() must be invoked before setting field
# because step() is also invoked in __init__()
super().__init__(optimizer, last_epoch)
def lr_for_WarmupLR(self, lr: float) -> float:
return lr / self.model_size ** 0.5 / self.warmup_steps ** 0.5
def __repr__(self):
return (
f"{self.__class__.__name__}(model_size={self.model_size}, "
f"warmup_steps={self.warmup_steps})"
)
def get_lr(self):
step_num = self.last_epoch + 1
return [
lr
* self.model_size ** -0.5
* min(step_num ** -0.5, step_num * self.warmup_steps ** -1.5)
for lr in self.base_lrs
]
|