CocoRoF commited on
Commit
0a85874
·
verified ·
1 Parent(s): 6fbe521

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8807736dbffe8aef2eab4950916adaf7e0fa48b77a88e4d90c8b88ffef2ab45
3
  size 368988278
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e1934cf89fd4787f6fb31a354e6b6d38ef87d8331f840843fdd09707b90f1ae
3
  size 368988278
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:862a77bd5a51f96c0430dc229ffb8f0aba4ea0915c3e4e987d6984fbe2058151
3
  size 1107079290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aefa035f4f0db94e28330038485db17cc190dc4abebadfedd0459c73aa21f82
3
  size 1107079290
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78d3f197f6c6558fa8056324f1563ab9e957255f5a1a959362aa4eed7a9545db
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74386f26f36ed67f56395205881e5db2d0c28ffcbeed50dd95b28771d2dac588
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c1a9c65c2869356282cad6b4a0f7dff7f4dd68ab3d9d216c72b7d6cb524f860
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41c88f9de084200454883a13c3717941ea3fd433e2f8735507fc30611f9c5501
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:896febe768e17bae5022a95960c041f6425783774ec8859d99d3b149063b1bf9
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:965b00d4cb4710ebab57c8787b9925bb3f77b8eeba94a186ec4bc1c2f326ef3f
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eac482d57e966585467c8ef44dae2869bf7e5d92886f69c11ed7bccc34c07efe
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5dc374b8b9a4c45c950f9d136feab85a767081fa59f0c7d68ed3a62060c4949
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1f27d227a20dc320ac283e0938fb2f6e5b475829a583f8c44d1a16a8c828307
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c7c212fb779217f1edac0baf44f67b608eefc1e0e4e3f5a9dd7eb557032c1bc
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d05a7106aaeaec4b81704e3f4a998b5123cf9342a6733bd9fd2d578e99108c3b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86e1effd626ce1e95dd68a0c8089fe19218f2b24dfe9e45ed2cab1c0ebc10ba1
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b94120d8d88502ec8d8b623ec7550315caca003b44fcffbb5767ab0de91baefe
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:799cc83f60dfc1c4243cfd6403592112414a2eec494e6832f10221c96ff62c20
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:332e4d901be380f740b5d8578f7b80ef1865c7fba83bc288c8a35852205cc668
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:586777c398770c3255d3a1f48c7fef44ea9d89117c627c9ea490e16bfd9a49ba
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6add71cc2ee097875f5cb7960ac29c9bf154d3226c75749cd7c39c08237a2bbe
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9361adbf8fe7e919bd805eb1cb79b701fd3c8fe0ef4fe1c33df9e6bf132cee3
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.4847383170491555,
5
  "eval_steps": 300,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -225,6 +225,224 @@
225
  "eval_samples_per_second": 1362.845,
226
  "eval_steps_per_second": 42.605,
227
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
  }
229
  ],
230
  "logging_steps": 10,
@@ -244,7 +462,7 @@
244
  "attributes": {}
245
  }
246
  },
247
- "total_flos": 8.283349733238374e+17,
248
  "train_batch_size": 4,
249
  "trial_name": null,
250
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.969476634098311,
5
  "eval_steps": 300,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
225
  "eval_samples_per_second": 1362.845,
226
  "eval_steps_per_second": 42.605,
227
  "step": 300
228
+ },
229
+ {
230
+ "epoch": 0.500896260950794,
231
+ "grad_norm": 62.3125,
232
+ "learning_rate": 9.990216931651065e-07,
233
+ "loss": 87.5903,
234
+ "step": 310
235
+ },
236
+ {
237
+ "epoch": 0.5170542048524325,
238
+ "grad_norm": 61.0625,
239
+ "learning_rate": 9.9899013488011e-07,
240
+ "loss": 89.8225,
241
+ "step": 320
242
+ },
243
+ {
244
+ "epoch": 0.5332121487540711,
245
+ "grad_norm": 65.6875,
246
+ "learning_rate": 9.989585765951135e-07,
247
+ "loss": 87.4679,
248
+ "step": 330
249
+ },
250
+ {
251
+ "epoch": 0.5493700926557096,
252
+ "grad_norm": 66.625,
253
+ "learning_rate": 9.98927018310117e-07,
254
+ "loss": 88.2141,
255
+ "step": 340
256
+ },
257
+ {
258
+ "epoch": 0.5655280365573481,
259
+ "grad_norm": 64.25,
260
+ "learning_rate": 9.988954600251204e-07,
261
+ "loss": 88.7629,
262
+ "step": 350
263
+ },
264
+ {
265
+ "epoch": 0.5816859804589866,
266
+ "grad_norm": 67.6875,
267
+ "learning_rate": 9.988639017401237e-07,
268
+ "loss": 87.7045,
269
+ "step": 360
270
+ },
271
+ {
272
+ "epoch": 0.5978439243606251,
273
+ "grad_norm": 68.3125,
274
+ "learning_rate": 9.988323434551273e-07,
275
+ "loss": 89.4493,
276
+ "step": 370
277
+ },
278
+ {
279
+ "epoch": 0.6140018682622637,
280
+ "grad_norm": 63.6875,
281
+ "learning_rate": 9.988007851701307e-07,
282
+ "loss": 87.7596,
283
+ "step": 380
284
+ },
285
+ {
286
+ "epoch": 0.6301598121639022,
287
+ "grad_norm": 64.875,
288
+ "learning_rate": 9.98769226885134e-07,
289
+ "loss": 88.0606,
290
+ "step": 390
291
+ },
292
+ {
293
+ "epoch": 0.6463177560655406,
294
+ "grad_norm": 64.1875,
295
+ "learning_rate": 9.987376686001376e-07,
296
+ "loss": 87.7883,
297
+ "step": 400
298
+ },
299
+ {
300
+ "epoch": 0.6624756999671791,
301
+ "grad_norm": 62.09375,
302
+ "learning_rate": 9.98706110315141e-07,
303
+ "loss": 88.77,
304
+ "step": 410
305
+ },
306
+ {
307
+ "epoch": 0.6786336438688176,
308
+ "grad_norm": 65.6875,
309
+ "learning_rate": 9.986745520301443e-07,
310
+ "loss": 88.4047,
311
+ "step": 420
312
+ },
313
+ {
314
+ "epoch": 0.6947915877704562,
315
+ "grad_norm": 65.3125,
316
+ "learning_rate": 9.986429937451479e-07,
317
+ "loss": 86.9815,
318
+ "step": 430
319
+ },
320
+ {
321
+ "epoch": 0.7109495316720947,
322
+ "grad_norm": 68.0625,
323
+ "learning_rate": 9.986114354601514e-07,
324
+ "loss": 88.4078,
325
+ "step": 440
326
+ },
327
+ {
328
+ "epoch": 0.7271074755737332,
329
+ "grad_norm": 64.4375,
330
+ "learning_rate": 9.985798771751548e-07,
331
+ "loss": 87.8946,
332
+ "step": 450
333
+ },
334
+ {
335
+ "epoch": 0.7432654194753717,
336
+ "grad_norm": 65.3125,
337
+ "learning_rate": 9.985483188901581e-07,
338
+ "loss": 88.9161,
339
+ "step": 460
340
+ },
341
+ {
342
+ "epoch": 0.7594233633770103,
343
+ "grad_norm": 64.875,
344
+ "learning_rate": 9.985167606051617e-07,
345
+ "loss": 87.7899,
346
+ "step": 470
347
+ },
348
+ {
349
+ "epoch": 0.7755813072786488,
350
+ "grad_norm": 68.75,
351
+ "learning_rate": 9.98485202320165e-07,
352
+ "loss": 87.9098,
353
+ "step": 480
354
+ },
355
+ {
356
+ "epoch": 0.7917392511802873,
357
+ "grad_norm": 64.625,
358
+ "learning_rate": 9.984536440351684e-07,
359
+ "loss": 88.2259,
360
+ "step": 490
361
+ },
362
+ {
363
+ "epoch": 0.8078971950819258,
364
+ "grad_norm": 62.96875,
365
+ "learning_rate": 9.98422085750172e-07,
366
+ "loss": 87.4765,
367
+ "step": 500
368
+ },
369
+ {
370
+ "epoch": 0.8240551389835643,
371
+ "grad_norm": 62.75,
372
+ "learning_rate": 9.983905274651753e-07,
373
+ "loss": 88.4725,
374
+ "step": 510
375
+ },
376
+ {
377
+ "epoch": 0.8402130828852029,
378
+ "grad_norm": 66.3125,
379
+ "learning_rate": 9.98358969180179e-07,
380
+ "loss": 88.8554,
381
+ "step": 520
382
+ },
383
+ {
384
+ "epoch": 0.8563710267868414,
385
+ "grad_norm": 64.75,
386
+ "learning_rate": 9.983274108951823e-07,
387
+ "loss": 88.1039,
388
+ "step": 530
389
+ },
390
+ {
391
+ "epoch": 0.8725289706884799,
392
+ "grad_norm": 70.3125,
393
+ "learning_rate": 9.982958526101856e-07,
394
+ "loss": 87.927,
395
+ "step": 540
396
+ },
397
+ {
398
+ "epoch": 0.8886869145901184,
399
+ "grad_norm": 61.03125,
400
+ "learning_rate": 9.982642943251892e-07,
401
+ "loss": 88.5062,
402
+ "step": 550
403
+ },
404
+ {
405
+ "epoch": 0.9048448584917569,
406
+ "grad_norm": 69.1875,
407
+ "learning_rate": 9.982327360401925e-07,
408
+ "loss": 89.0445,
409
+ "step": 560
410
+ },
411
+ {
412
+ "epoch": 0.9210028023933955,
413
+ "grad_norm": 64.625,
414
+ "learning_rate": 9.982011777551961e-07,
415
+ "loss": 87.5429,
416
+ "step": 570
417
+ },
418
+ {
419
+ "epoch": 0.937160746295034,
420
+ "grad_norm": 64.4375,
421
+ "learning_rate": 9.981696194701995e-07,
422
+ "loss": 87.194,
423
+ "step": 580
424
+ },
425
+ {
426
+ "epoch": 0.9533186901966725,
427
+ "grad_norm": 65.0,
428
+ "learning_rate": 9.98138061185203e-07,
429
+ "loss": 87.1017,
430
+ "step": 590
431
+ },
432
+ {
433
+ "epoch": 0.969476634098311,
434
+ "grad_norm": 78.375,
435
+ "learning_rate": 9.981065029002064e-07,
436
+ "loss": 87.2723,
437
+ "step": 600
438
+ },
439
+ {
440
+ "epoch": 0.969476634098311,
441
+ "eval_loss": 1.3628411293029785,
442
+ "eval_runtime": 22.8208,
443
+ "eval_samples_per_second": 1424.139,
444
+ "eval_steps_per_second": 44.521,
445
+ "step": 600
446
  }
447
  ],
448
  "logging_steps": 10,
 
462
  "attributes": {}
463
  }
464
  },
465
+ "total_flos": 1.656669946647675e+18,
466
  "train_batch_size": 4,
467
  "trial_name": null,
468
  "trial_params": null