DeskDown commited on
Commit
b8fa365
·
1 Parent(s): a452dce

Training in progress, step 20000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc449c3c9aa977e62e8a253fc1964181c661fbd908462f8287ca14c67fa221d5
3
  size 751197645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eddb74787696dede4919ec271555a6350a568c321174a7c15dc3e1d67bee90d9
3
  size 751197645
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c647d2e5ab98d23af56502a4839e2d950f16e5f58d356abdc346b70f550298af
3
  size 376008389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab9255443ab93767d06e31e700d16c826a1b9c4bd217525b669b2851a871e8fd
3
  size 376008389
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:392a70bbd171fc077436dd500b1dfc29564709bf147ae66f4482dfc8047b6daf
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:955f2254b1e2583a0c202f4167da7de0b528b48ae4ee34006c7519a67f4f2166
3
+ size 14567
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5afed70c6b44287fbb97642baae95caed1fca70c0d2879628f6644dcd1c87ebf
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35de3af080a175fc9162ccb44a37ba550925fde9c9502c74bafce2c813e66388
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49ac74872413f57561e77e286f80f4be64cce2d235c693168b3f7d99462940fd
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:265e019a5d08011062ca46680adb46fea92faeb6e1ff9bcaf722eb0c685c75c6
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.7863522686673812,
5
- "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -136,11 +136,141 @@
136
  "eval_samples_per_second": 6.25,
137
  "eval_steps_per_second": 0.196,
138
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  }
140
  ],
141
  "max_steps": 27990,
142
  "num_train_epochs": 5,
143
- "total_flos": 2.16932586356736e+16,
144
  "trial_name": null,
145
  "trial_params": null
146
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.5727045373347623,
5
+ "global_step": 20000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
136
  "eval_samples_per_second": 6.25,
137
  "eval_steps_per_second": 0.196,
138
  "step": 10000
139
+ },
140
+ {
141
+ "epoch": 1.88,
142
+ "learning_rate": 6.251965689778412e-05,
143
+ "loss": 0.9,
144
+ "step": 10500
145
+ },
146
+ {
147
+ "epoch": 1.96,
148
+ "learning_rate": 6.073266619013581e-05,
149
+ "loss": 0.8792,
150
+ "step": 11000
151
+ },
152
+ {
153
+ "epoch": 2.05,
154
+ "learning_rate": 5.894924946390279e-05,
155
+ "loss": 0.8345,
156
+ "step": 11500
157
+ },
158
+ {
159
+ "epoch": 2.14,
160
+ "learning_rate": 5.716583273766977e-05,
161
+ "loss": 0.8162,
162
+ "step": 12000
163
+ },
164
+ {
165
+ "epoch": 2.23,
166
+ "learning_rate": 5.537884203002145e-05,
167
+ "loss": 0.8095,
168
+ "step": 12500
169
+ },
170
+ {
171
+ "epoch": 2.32,
172
+ "learning_rate": 5.359185132237312e-05,
173
+ "loss": 0.7957,
174
+ "step": 13000
175
+ },
176
+ {
177
+ "epoch": 2.41,
178
+ "learning_rate": 5.18048606147248e-05,
179
+ "loss": 0.7941,
180
+ "step": 13500
181
+ },
182
+ {
183
+ "epoch": 2.5,
184
+ "learning_rate": 5.0017869907076487e-05,
185
+ "loss": 0.7864,
186
+ "step": 14000
187
+ },
188
+ {
189
+ "epoch": 2.59,
190
+ "learning_rate": 4.823087919942816e-05,
191
+ "loss": 0.7805,
192
+ "step": 14500
193
+ },
194
+ {
195
+ "epoch": 2.68,
196
+ "learning_rate": 4.6443888491779846e-05,
197
+ "loss": 0.7744,
198
+ "step": 15000
199
+ },
200
+ {
201
+ "epoch": 2.77,
202
+ "learning_rate": 4.465689778413152e-05,
203
+ "loss": 0.7757,
204
+ "step": 15500
205
+ },
206
+ {
207
+ "epoch": 2.86,
208
+ "learning_rate": 4.2869907076483206e-05,
209
+ "loss": 0.7671,
210
+ "step": 16000
211
+ },
212
+ {
213
+ "epoch": 2.95,
214
+ "learning_rate": 4.108291636883488e-05,
215
+ "loss": 0.7618,
216
+ "step": 16500
217
+ },
218
+ {
219
+ "epoch": 3.04,
220
+ "learning_rate": 3.929592566118656e-05,
221
+ "loss": 0.7375,
222
+ "step": 17000
223
+ },
224
+ {
225
+ "epoch": 3.13,
226
+ "learning_rate": 3.750893495353824e-05,
227
+ "loss": 0.6979,
228
+ "step": 17500
229
+ },
230
+ {
231
+ "epoch": 3.22,
232
+ "learning_rate": 3.5729092208720514e-05,
233
+ "loss": 0.7052,
234
+ "step": 18000
235
+ },
236
+ {
237
+ "epoch": 3.3,
238
+ "learning_rate": 3.39421015010722e-05,
239
+ "loss": 0.6974,
240
+ "step": 18500
241
+ },
242
+ {
243
+ "epoch": 3.39,
244
+ "learning_rate": 3.2155110793423873e-05,
245
+ "loss": 0.6921,
246
+ "step": 19000
247
+ },
248
+ {
249
+ "epoch": 3.48,
250
+ "learning_rate": 3.0368120085775553e-05,
251
+ "loss": 0.696,
252
+ "step": 19500
253
+ },
254
+ {
255
+ "epoch": 3.57,
256
+ "learning_rate": 2.8581129378127237e-05,
257
+ "loss": 0.6971,
258
+ "step": 20000
259
+ },
260
+ {
261
+ "epoch": 3.57,
262
+ "eval_bleu": 5.3321,
263
+ "eval_gen_len": 44.1972,
264
+ "eval_loss": 0.8275578618049622,
265
+ "eval_runtime": 1432.7377,
266
+ "eval_samples_per_second": 6.233,
267
+ "eval_steps_per_second": 0.195,
268
+ "step": 20000
269
  }
270
  ],
271
  "max_steps": 27990,
272
  "num_train_epochs": 5,
273
+ "total_flos": 4.33848223531008e+16,
274
  "trial_name": null,
275
  "trial_params": null
276
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c647d2e5ab98d23af56502a4839e2d950f16e5f58d356abdc346b70f550298af
3
  size 376008389
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab9255443ab93767d06e31e700d16c826a1b9c4bd217525b669b2851a871e8fd
3
  size 376008389
runs/Feb08_19-24-11_fadcca9c27d0/events.out.tfevents.1644348258.fadcca9c27d0.35.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fb419f0fea41195134665abb1056ce1fcc92f38e8ba7eff4ff72a2e64885d36
3
- size 7218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6d8d0dbfec2c2425f8dbde5ffbd5b91ba063ed34d1bef26590e5f7a9537720b
3
+ size 10759