SummerTime / model /third_party /HMNet /Models /Optimizers /LnrWrmpInvSqRtDcyScheduler.py
akhaliq3
spaces demo
546a9ba
raw
history blame contribute delete
943 Bytes
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
import math
from torch.optim.lr_scheduler import LambdaLR
class LnrWrmpInvSqRtDcyScheduler(LambdaLR):
"""Inverse Square Root learning rate schedule used in T5"""
def __init__(self, optimizer, warmup_steps, warmup_init_lr, warmup_end_lr):
self.warmup_steps = warmup_steps
self.warmup_init_lr = warmup_init_lr
self.warmup_end_lr = warmup_end_lr
self.lr_step = (warmup_end_lr - warmup_init_lr) / warmup_steps
super(LnrWrmpInvSqRtDcyScheduler, self).__init__(
optimizer, self.lr_lambda, last_epoch=-1
)
def lr_lambda(self, step):
if step < self.warmup_steps:
return (self.warmup_init_lr + step * self.lr_step) / self.warmup_end_lr
else:
return 1.0 / float(math.sqrt(step / float(self.warmup_steps)))
def get_last_lr(self):
return self.get_lr()