File size: 11,122 Bytes
5672777
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93528c6
5672777
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Dataclasses for learning rate schedule config."""
from typing import List, Optional

import dataclasses
from official.modeling.hyperparams import base_config


@dataclasses.dataclass
class ConstantLrConfig(base_config.Config):
  """Configuration for constant learning rate.

  This class is a containers for the constant learning rate decay configs.

  Attributes:
    name: The name of the learning rate schedule. Defaults to Constant.
    learning_rate: A float. The learning rate. Defaults to 0.1.
  """
  name: str = 'Constant'
  learning_rate: float = 0.1


@dataclasses.dataclass
class StepwiseLrConfig(base_config.Config):
  """Configuration for stepwise learning rate decay.

  This class is a container for the piecewise constant learning rate scheduling
  configs. It will configure an instance of PiecewiseConstantDecay keras
  learning rate schedule.

  An example (from keras docs): use a learning rate that's 1.0 for the first
  100001 steps, 0.5 for the next 10000 steps, and 0.1 for any additional steps.
    ```python
    boundaries: [100000, 110000]
    values: [1.0, 0.5, 0.1]

  Attributes:
    name: The name of the learning rate schedule. Defaults to PiecewiseConstant.
    boundaries: A list of ints of strictly increasing entries. Defaults to None.
    values: A list of floats that specifies the values for the intervals defined
      by `boundaries`. It should have one more element than `boundaries`.
            The learning rate is computed as follows: [0, boundaries[0]] ->
              values[0] [boundaries[0], boundaries[1]]     -> values[1]
              [boundaries[n-1], boundaries[n]]   -> values[n] [boundaries[n],
              end]               -> values[n+1] Defaults to None.
    offset: An int. The offset applied to steps. Defaults to 0.
  """
  name: str = 'PiecewiseConstantDecay'
  boundaries: Optional[List[int]] = None
  values: Optional[List[float]] = None
  offset: int = 0


@dataclasses.dataclass
class ExponentialLrConfig(base_config.Config):
  """Configuration for exponential learning rate decay.

  This class is a containers for the exponential learning rate decay configs.

  Attributes:
    name: The name of the learning rate schedule. Defaults to ExponentialDecay.
    initial_learning_rate: A float. The initial learning rate. Defaults to None.
    decay_steps: A positive integer that is used for decay computation. Defaults
      to None.
    decay_rate: A float. Defaults to None.
    staircase: A boolean, if true, learning rate is decreased at discreate
      intervals. Defaults to False.
    offset: An int. The offset applied to steps. Defaults to 0.
  """
  name: str = 'ExponentialDecay'
  initial_learning_rate: Optional[float] = None
  decay_steps: Optional[int] = None
  decay_rate: Optional[float] = None
  staircase: Optional[bool] = None
  offset: int = 0


@dataclasses.dataclass
class PolynomialLrConfig(base_config.Config):
  """Configuration for polynomial learning rate decay.

  This class is a containers for the polynomial learning rate decay configs.

  Attributes:
    name: The name of the learning rate schedule. Defaults to PolynomialDecay.
    initial_learning_rate: A float. The initial learning rate. Defaults to None.
    decay_steps: A positive integer that is used for decay computation. Defaults
      to None.
    end_learning_rate: A float.  The minimal end learning rate.
    power: A float.  The power of the polynomial. Defaults to linear, 1.0.
    cycle: A boolean, whether or not it should cycle beyond decay_steps.
      Defaults to False.
    offset: An int. The offset applied to steps. Defaults to 0.
  """
  name: str = 'PolynomialDecay'
  initial_learning_rate: Optional[float] = None
  decay_steps: Optional[int] = None
  end_learning_rate: float = 0.0001
  power: float = 1.0
  cycle: bool = False
  offset: int = 0


@dataclasses.dataclass
class CosineLrConfig(base_config.Config):
  """Configuration for Cosine learning rate decay.

  This class is a containers for the cosine learning rate decay configs,
  tf_keras.experimental.CosineDecay.

  Attributes:
    name: The name of the learning rate schedule. Defaults to CosineDecay.
    initial_learning_rate: A float. The initial learning rate. Defaults to None.
    decay_steps: A positive integer that is used for decay computation. Defaults
      to None.
    alpha: A float.  Minimum learning rate value as a fraction of
      initial_learning_rate.
    offset: An int. The offset applied to steps. Defaults to 0.
  """
  name: str = 'CosineDecay'
  initial_learning_rate: Optional[float] = None
  decay_steps: Optional[int] = None
  alpha: float = 0.0
  offset: int = 0


@dataclasses.dataclass
class DirectPowerLrConfig(base_config.Config):
  """Configuration for DirectPower learning rate decay.

  This class configures a schedule following follows lr * (step)^power.

  Attributes:
    name: The name of the learning rate schedule. Defaults to DirectPowerDecay.
    initial_learning_rate: A float. The initial learning rate. Defaults to None.
    power: A float. Defaults to -0.5, for sqrt decay.
  """
  name: str = 'DirectPowerDecay'
  initial_learning_rate: Optional[float] = None
  power: float = -0.5


@dataclasses.dataclass
class PowerAndLinearDecayLrConfig(base_config.Config):
  """Configuration for DirectPower learning rate decay.

  The schedule has the following behavoir.
  Let offset_step = step - offset.
  1) offset_step < 0, the actual learning rate equals initial_learning_rate.
  2) offset_step <= total_decay_steps * (1 - linear_decay_fraction), the
  actual learning rate equals lr * offset_step^power.
  3) total_decay_steps * (1 - linear_decay_fraction) <= offset_step <
  total_decay_steps, the actual learning rate equals lr * offset_step^power *
  (total_decay_steps - offset_step) / (total_decay_steps *
  linear_decay_fraction).
  4) offset_step >= total_decay_steps, the actual learning rate equals zero.

  Attributes:
    name: The name of the learning rate schedule. Defaults to
      PowerAndLinearDecay.
    initial_learning_rate: A float. The initial learning rate. Defaults to None.
    total_decay_steps: An int. The total number of steps for power + linear
      decay. Defaults to None.
    power: A float. The order of the polynomial. Defaults to -0.5, for sqrt
      decay.
    linear_decay_fraction: A float. In the last `linear_decay_fraction` steps,
      the learning rate will be multiplied by a linear decay. Defaults to 0.1.
    offset: An int. The offset applied to steps. Defaults to 0.
  """
  name: str = 'PowerAndLinearDecay'
  initial_learning_rate: Optional[float] = None
  total_decay_steps: Optional[int] = None
  power: float = -0.5
  linear_decay_fraction: float = 0.1
  offset: int = 0


@dataclasses.dataclass
class PowerDecayWithOffsetLrConfig(base_config.Config):
  """Configuration for power learning rate decay with step offset.

  Learning rate equals to `pre_offset_learning_rate` if `step` < `offset`.
  Otherwise, learning rate equals to lr * (step - offset)^power.

  Attributes:
    name: The name of the learning rate schedule. Defaults to
      PowerDecayWithOffset.
    initial_learning_rate: A float. The initial learning rate. Defaults to None.
    power: A float. Defaults to -0.5, for sqrt decay.
    offset: An integer. Power decay happens after `offset` steps.
    pre_offset_learning_rate: A float. The constant learning rate before
      `offset` steps.
  """
  name: str = 'PowerDecayWithOffset'
  initial_learning_rate: Optional[float] = None
  power: float = -0.5
  offset: int = 0
  pre_offset_learning_rate: float = 1.0e6


@dataclasses.dataclass
class StepCosineLrConfig(base_config.Config):
  """Configuration for stepwise learning rate decay.

  This class is a container for the piecewise cosine learning rate scheduling
  configs. It will configure an instance of StepCosineDecayWithOffset keras
  learning rate schedule.

    ```python
    boundaries: [100000, 110000]
    values: [1.0, 0.5]
    lr_decayed_fn = (
    lr_schedule.StepCosineDecayWithOffset(
        boundaries,
        values))
    ```
    from 0 to 100000 step, it will cosine decay from 1.0 to 0.5
    from 100000 to 110000 step, it cosine decay from 0.5 to 0.0

  Attributes:
    name: The name of the learning rate schedule. Defaults to PiecewiseConstant.
    boundaries: A list of ints of strictly increasing entries. Defaults to None.
    values: A list of floats that specifies the values for the intervals defined
      by `boundaries`. It should have one more element than `boundaries`.
            The learning rate is computed as follows:
              [0, boundaries[0]] -> cosine from values[0] to values[1]
              [boundaries[0], boundaries[1]]     -> values[1] to values[2]
              ...
              [boundaries[n-1], boundaries[n]]   -> values[n] to values[n+1]
              [boundaries[n], end]               -> values[n+1] to 0.
    offset: An int. The offset applied to steps. Defaults to 0.
  """
  name: str = 'StepCosineDecayWithOffset'
  boundaries: Optional[List[int]] = None
  values: Optional[List[float]] = None
  offset: int = 0


@dataclasses.dataclass
class LinearWarmupConfig(base_config.Config):
  """Configuration for linear warmup schedule config.

  This class is a container for the linear warmup schedule configs.
  Warmup_learning_rate is the initial learning rate, the final learning rate of
  the warmup period is the learning_rate of the optimizer in use. The learning
  rate at each step linearly increased according to the following formula:
    warmup_learning_rate = warmup_learning_rate +
    step / warmup_steps * (final_learning_rate - warmup_learning_rate).
  Using warmup overrides the learning rate schedule by the number of warmup
  steps.

  Attributes:
    name: The name of warmup schedule. Defaults to linear.
    warmup_learning_rate: Initial learning rate for the warmup. Defaults to 0.
    warmup_steps: Warmup steps. Defaults to None.
  """
  name: str = 'linear'
  warmup_learning_rate: float = 0
  warmup_steps: Optional[int] = None


@dataclasses.dataclass
class PolynomialWarmupConfig(base_config.Config):
  """Configuration for linear warmup schedule config.

  This class is a container for the polynomial warmup schedule configs.

  Attributes:
    name: The name of warmup schedule. Defaults to Polynomial.
    power: Polynomial power. Defaults to 1.
    warmup_steps: Warmup steps. Defaults to None.
  """
  name: str = 'polynomial'
  power: float = 1
  warmup_steps: Optional[int] = None