Spaces:
Runtime error
Runtime error
# Copyright 2023 The TensorFlow Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""Dataclasses for learning rate schedule config.""" | |
from typing import List, Optional | |
import dataclasses | |
from official.modeling.hyperparams import base_config | |
class ConstantLrConfig(base_config.Config): | |
"""Configuration for constant learning rate. | |
This class is a containers for the constant learning rate decay configs. | |
Attributes: | |
name: The name of the learning rate schedule. Defaults to Constant. | |
learning_rate: A float. The learning rate. Defaults to 0.1. | |
""" | |
name: str = 'Constant' | |
learning_rate: float = 0.1 | |
class StepwiseLrConfig(base_config.Config): | |
"""Configuration for stepwise learning rate decay. | |
This class is a container for the piecewise constant learning rate scheduling | |
configs. It will configure an instance of PiecewiseConstantDecay keras | |
learning rate schedule. | |
An example (from keras docs): use a learning rate that's 1.0 for the first | |
100001 steps, 0.5 for the next 10000 steps, and 0.1 for any additional steps. | |
```python | |
boundaries: [100000, 110000] | |
values: [1.0, 0.5, 0.1] | |
Attributes: | |
name: The name of the learning rate schedule. Defaults to PiecewiseConstant. | |
boundaries: A list of ints of strictly increasing entries. Defaults to None. | |
values: A list of floats that specifies the values for the intervals defined | |
by `boundaries`. It should have one more element than `boundaries`. | |
The learning rate is computed as follows: [0, boundaries[0]] -> | |
values[0] [boundaries[0], boundaries[1]] -> values[1] | |
[boundaries[n-1], boundaries[n]] -> values[n] [boundaries[n], | |
end] -> values[n+1] Defaults to None. | |
offset: An int. The offset applied to steps. Defaults to 0. | |
""" | |
name: str = 'PiecewiseConstantDecay' | |
boundaries: Optional[List[int]] = None | |
values: Optional[List[float]] = None | |
offset: int = 0 | |
class ExponentialLrConfig(base_config.Config): | |
"""Configuration for exponential learning rate decay. | |
This class is a containers for the exponential learning rate decay configs. | |
Attributes: | |
name: The name of the learning rate schedule. Defaults to ExponentialDecay. | |
initial_learning_rate: A float. The initial learning rate. Defaults to None. | |
decay_steps: A positive integer that is used for decay computation. Defaults | |
to None. | |
decay_rate: A float. Defaults to None. | |
staircase: A boolean, if true, learning rate is decreased at discreate | |
intervals. Defaults to False. | |
offset: An int. The offset applied to steps. Defaults to 0. | |
""" | |
name: str = 'ExponentialDecay' | |
initial_learning_rate: Optional[float] = None | |
decay_steps: Optional[int] = None | |
decay_rate: Optional[float] = None | |
staircase: Optional[bool] = None | |
offset: int = 0 | |
class PolynomialLrConfig(base_config.Config): | |
"""Configuration for polynomial learning rate decay. | |
This class is a containers for the polynomial learning rate decay configs. | |
Attributes: | |
name: The name of the learning rate schedule. Defaults to PolynomialDecay. | |
initial_learning_rate: A float. The initial learning rate. Defaults to None. | |
decay_steps: A positive integer that is used for decay computation. Defaults | |
to None. | |
end_learning_rate: A float. The minimal end learning rate. | |
power: A float. The power of the polynomial. Defaults to linear, 1.0. | |
cycle: A boolean, whether or not it should cycle beyond decay_steps. | |
Defaults to False. | |
offset: An int. The offset applied to steps. Defaults to 0. | |
""" | |
name: str = 'PolynomialDecay' | |
initial_learning_rate: Optional[float] = None | |
decay_steps: Optional[int] = None | |
end_learning_rate: float = 0.0001 | |
power: float = 1.0 | |
cycle: bool = False | |
offset: int = 0 | |
class CosineLrConfig(base_config.Config): | |
"""Configuration for Cosine learning rate decay. | |
This class is a containers for the cosine learning rate decay configs, | |
tf_keras.experimental.CosineDecay. | |
Attributes: | |
name: The name of the learning rate schedule. Defaults to CosineDecay. | |
initial_learning_rate: A float. The initial learning rate. Defaults to None. | |
decay_steps: A positive integer that is used for decay computation. Defaults | |
to None. | |
alpha: A float. Minimum learning rate value as a fraction of | |
initial_learning_rate. | |
offset: An int. The offset applied to steps. Defaults to 0. | |
""" | |
name: str = 'CosineDecay' | |
initial_learning_rate: Optional[float] = None | |
decay_steps: Optional[int] = None | |
alpha: float = 0.0 | |
offset: int = 0 | |
class DirectPowerLrConfig(base_config.Config): | |
"""Configuration for DirectPower learning rate decay. | |
This class configures a schedule following follows lr * (step)^power. | |
Attributes: | |
name: The name of the learning rate schedule. Defaults to DirectPowerDecay. | |
initial_learning_rate: A float. The initial learning rate. Defaults to None. | |
power: A float. Defaults to -0.5, for sqrt decay. | |
""" | |
name: str = 'DirectPowerDecay' | |
initial_learning_rate: Optional[float] = None | |
power: float = -0.5 | |
class PowerAndLinearDecayLrConfig(base_config.Config): | |
"""Configuration for DirectPower learning rate decay. | |
The schedule has the following behavoir. | |
Let offset_step = step - offset. | |
1) offset_step < 0, the actual learning rate equals initial_learning_rate. | |
2) offset_step <= total_decay_steps * (1 - linear_decay_fraction), the | |
actual learning rate equals lr * offset_step^power. | |
3) total_decay_steps * (1 - linear_decay_fraction) <= offset_step < | |
total_decay_steps, the actual learning rate equals lr * offset_step^power * | |
(total_decay_steps - offset_step) / (total_decay_steps * | |
linear_decay_fraction). | |
4) offset_step >= total_decay_steps, the actual learning rate equals zero. | |
Attributes: | |
name: The name of the learning rate schedule. Defaults to | |
PowerAndLinearDecay. | |
initial_learning_rate: A float. The initial learning rate. Defaults to None. | |
total_decay_steps: An int. The total number of steps for power + linear | |
decay. Defaults to None. | |
power: A float. The order of the polynomial. Defaults to -0.5, for sqrt | |
decay. | |
linear_decay_fraction: A float. In the last `linear_decay_fraction` steps, | |
the learning rate will be multiplied by a linear decay. Defaults to 0.1. | |
offset: An int. The offset applied to steps. Defaults to 0. | |
""" | |
name: str = 'PowerAndLinearDecay' | |
initial_learning_rate: Optional[float] = None | |
total_decay_steps: Optional[int] = None | |
power: float = -0.5 | |
linear_decay_fraction: float = 0.1 | |
offset: int = 0 | |
class PowerDecayWithOffsetLrConfig(base_config.Config): | |
"""Configuration for power learning rate decay with step offset. | |
Learning rate equals to `pre_offset_learning_rate` if `step` < `offset`. | |
Otherwise, learning rate equals to lr * (step - offset)^power. | |
Attributes: | |
name: The name of the learning rate schedule. Defaults to | |
PowerDecayWithOffset. | |
initial_learning_rate: A float. The initial learning rate. Defaults to None. | |
power: A float. Defaults to -0.5, for sqrt decay. | |
offset: An integer. Power decay happens after `offset` steps. | |
pre_offset_learning_rate: A float. The constant learning rate before | |
`offset` steps. | |
""" | |
name: str = 'PowerDecayWithOffset' | |
initial_learning_rate: Optional[float] = None | |
power: float = -0.5 | |
offset: int = 0 | |
pre_offset_learning_rate: float = 1.0e6 | |
class StepCosineLrConfig(base_config.Config): | |
"""Configuration for stepwise learning rate decay. | |
This class is a container for the piecewise cosine learning rate scheduling | |
configs. It will configure an instance of StepCosineDecayWithOffset keras | |
learning rate schedule. | |
```python | |
boundaries: [100000, 110000] | |
values: [1.0, 0.5] | |
lr_decayed_fn = ( | |
lr_schedule.StepCosineDecayWithOffset( | |
boundaries, | |
values)) | |
``` | |
from 0 to 100000 step, it will cosine decay from 1.0 to 0.5 | |
from 100000 to 110000 step, it cosine decay from 0.5 to 0.0 | |
Attributes: | |
name: The name of the learning rate schedule. Defaults to PiecewiseConstant. | |
boundaries: A list of ints of strictly increasing entries. Defaults to None. | |
values: A list of floats that specifies the values for the intervals defined | |
by `boundaries`. It should have one more element than `boundaries`. | |
The learning rate is computed as follows: | |
[0, boundaries[0]] -> cosine from values[0] to values[1] | |
[boundaries[0], boundaries[1]] -> values[1] to values[2] | |
... | |
[boundaries[n-1], boundaries[n]] -> values[n] to values[n+1] | |
[boundaries[n], end] -> values[n+1] to 0. | |
offset: An int. The offset applied to steps. Defaults to 0. | |
""" | |
name: str = 'StepCosineDecayWithOffset' | |
boundaries: Optional[List[int]] = None | |
values: Optional[List[float]] = None | |
offset: int = 0 | |
class LinearWarmupConfig(base_config.Config): | |
"""Configuration for linear warmup schedule config. | |
This class is a container for the linear warmup schedule configs. | |
Warmup_learning_rate is the initial learning rate, the final learning rate of | |
the warmup period is the learning_rate of the optimizer in use. The learning | |
rate at each step linearly increased according to the following formula: | |
warmup_learning_rate = warmup_learning_rate + | |
step / warmup_steps * (final_learning_rate - warmup_learning_rate). | |
Using warmup overrides the learning rate schedule by the number of warmup | |
steps. | |
Attributes: | |
name: The name of warmup schedule. Defaults to linear. | |
warmup_learning_rate: Initial learning rate for the warmup. Defaults to 0. | |
warmup_steps: Warmup steps. Defaults to None. | |
""" | |
name: str = 'linear' | |
warmup_learning_rate: float = 0 | |
warmup_steps: Optional[int] = None | |
class PolynomialWarmupConfig(base_config.Config): | |
"""Configuration for linear warmup schedule config. | |
This class is a container for the polynomial warmup schedule configs. | |
Attributes: | |
name: The name of warmup schedule. Defaults to Polynomial. | |
power: Polynomial power. Defaults to 1. | |
warmup_steps: Warmup steps. Defaults to None. | |
""" | |
name: str = 'polynomial' | |
power: float = 1 | |
warmup_steps: Optional[int] = None | |