mtasic85 commited on
Commit
cfb3029
1 Parent(s): e66e4fa

sophia_opt.SophiaG

Browse files
scripts/pretrain-model.yaml CHANGED
@@ -115,21 +115,32 @@ eval:
115
  final_validation: true
116
 
117
  # Optimizer-related arguments
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  optimizer:
119
- # class_path: torch.optim.AdamW
120
- class_path: grokadamw.GrokAdamW
121
 
122
  init_args:
123
- # (type: float, default: 0.001)
124
- lr: 4e-04
125
-
126
- # (type: float, default: 0.01)
127
- weight_decay: 0.1
128
-
129
- # (type: tuple, default: (0.9,0.999))
130
  betas:
131
- - 0.9
132
- - 0.95
 
 
133
 
134
  # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
135
  devices: auto
 
115
  final_validation: true
116
 
117
  # Optimizer-related arguments
118
+ # optimizer:
119
+ # # class_path: torch.optim.AdamW
120
+ # class_path: grokadamw.GrokAdamW
121
+ #
122
+ # init_args:
123
+ # # (type: float, default: 0.001)
124
+ # lr: 4e-04
125
+ #
126
+ # # (type: float, default: 0.01)
127
+ # weight_decay: 0.1
128
+ #
129
+ # # (type: tuple, default: (0.9,0.999))
130
+ # betas:
131
+ # - 0.9
132
+ # - 0.95
133
+
134
  optimizer:
135
+ class_path: sophia_opt.SophiaG
 
136
 
137
  init_args:
138
+ lr: 2e-4
 
 
 
 
 
 
139
  betas:
140
+ - 0.965
141
+ - 0.99
142
+ rho: 0.01
143
+ weight_decay: 1e-1
144
 
145
  # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
146
  devices: auto
scripts/requirements.in CHANGED
@@ -17,3 +17,5 @@ grokadamw
17
  # pyzstd
18
  # zstd
19
  Pillow
 
 
 
17
  # pyzstd
18
  # zstd
19
  Pillow
20
+
21
+ sophia-opt