humbertonc commited on
Commit
0dc0fab
·
1 Parent(s): eb15342

Training in progress, step 75, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afee68c4cfaaf6b8cd20275f8c90ed251cf16d37374144e9a0b0a60faa5b47b6
3
  size 80013120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b74265b17ca020ad147ba93efef015a8c237e1c34da6d3dd21e759b0aef08a2
3
  size 80013120
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9dfeec10c4488a928944026c2d75408c2e1b33654f1376bb77c53629f1618bc1
3
  size 40570324
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8417b3f9d5b1359970536bb12f52bc99c995b94763d8ae32f24181d5adae6553
3
  size 40570324
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4d4d519233b1d715f1e302ab6f44d45be958cf79d89096c5f483d5bae7b8096
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7d8f7f8e1a76d94091cc701a4a00cc6fd2a3c1a746e29e297276a305cc6801f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1c7360581e177bcd8f1baa11a13d34ab7c59b5ff1e9e1739e68f1c3241102b6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1584b1b2cf9ecd956165af14d13e1fadf07b1f7d91e0a62867f52fe0a3a9ac7c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0012437810945273632,
5
  "eval_steps": 1000,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -307,13 +307,163 @@
307
  "learning_rate": 0.00010736842105263158,
308
  "loss": 1.6563,
309
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
  }
311
  ],
312
  "logging_steps": 1,
313
  "max_steps": 100,
314
  "num_train_epochs": 1,
315
  "save_steps": 25,
316
- "total_flos": 2741329648189440.0,
317
  "trial_name": null,
318
  "trial_params": null
319
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0018656716417910447,
5
  "eval_steps": 1000,
6
+ "global_step": 75,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
307
  "learning_rate": 0.00010736842105263158,
308
  "loss": 1.6563,
309
  "step": 50
310
+ },
311
+ {
312
+ "epoch": 0.0,
313
+ "learning_rate": 0.00010526315789473685,
314
+ "loss": 1.5786,
315
+ "step": 51
316
+ },
317
+ {
318
+ "epoch": 0.0,
319
+ "learning_rate": 0.00010315789473684211,
320
+ "loss": 1.5629,
321
+ "step": 52
322
+ },
323
+ {
324
+ "epoch": 0.0,
325
+ "learning_rate": 0.00010105263157894738,
326
+ "loss": 1.0232,
327
+ "step": 53
328
+ },
329
+ {
330
+ "epoch": 0.0,
331
+ "learning_rate": 9.894736842105263e-05,
332
+ "loss": 1.5671,
333
+ "step": 54
334
+ },
335
+ {
336
+ "epoch": 0.0,
337
+ "learning_rate": 9.68421052631579e-05,
338
+ "loss": 1.6186,
339
+ "step": 55
340
+ },
341
+ {
342
+ "epoch": 0.0,
343
+ "learning_rate": 9.473684210526316e-05,
344
+ "loss": 1.6145,
345
+ "step": 56
346
+ },
347
+ {
348
+ "epoch": 0.0,
349
+ "learning_rate": 9.263157894736843e-05,
350
+ "loss": 1.5494,
351
+ "step": 57
352
+ },
353
+ {
354
+ "epoch": 0.0,
355
+ "learning_rate": 9.052631578947369e-05,
356
+ "loss": 1.7099,
357
+ "step": 58
358
+ },
359
+ {
360
+ "epoch": 0.0,
361
+ "learning_rate": 8.842105263157894e-05,
362
+ "loss": 1.586,
363
+ "step": 59
364
+ },
365
+ {
366
+ "epoch": 0.0,
367
+ "learning_rate": 8.631578947368421e-05,
368
+ "loss": 1.2571,
369
+ "step": 60
370
+ },
371
+ {
372
+ "epoch": 0.0,
373
+ "learning_rate": 8.421052631578948e-05,
374
+ "loss": 1.4626,
375
+ "step": 61
376
+ },
377
+ {
378
+ "epoch": 0.0,
379
+ "learning_rate": 8.210526315789474e-05,
380
+ "loss": 1.4228,
381
+ "step": 62
382
+ },
383
+ {
384
+ "epoch": 0.0,
385
+ "learning_rate": 8e-05,
386
+ "loss": 1.4772,
387
+ "step": 63
388
+ },
389
+ {
390
+ "epoch": 0.0,
391
+ "learning_rate": 7.789473684210526e-05,
392
+ "loss": 1.6396,
393
+ "step": 64
394
+ },
395
+ {
396
+ "epoch": 0.0,
397
+ "learning_rate": 7.578947368421054e-05,
398
+ "loss": 1.6148,
399
+ "step": 65
400
+ },
401
+ {
402
+ "epoch": 0.0,
403
+ "learning_rate": 7.368421052631579e-05,
404
+ "loss": 1.4147,
405
+ "step": 66
406
+ },
407
+ {
408
+ "epoch": 0.0,
409
+ "learning_rate": 7.157894736842105e-05,
410
+ "loss": 1.9057,
411
+ "step": 67
412
+ },
413
+ {
414
+ "epoch": 0.0,
415
+ "learning_rate": 6.947368421052632e-05,
416
+ "loss": 1.2152,
417
+ "step": 68
418
+ },
419
+ {
420
+ "epoch": 0.0,
421
+ "learning_rate": 6.736842105263159e-05,
422
+ "loss": 1.611,
423
+ "step": 69
424
+ },
425
+ {
426
+ "epoch": 0.0,
427
+ "learning_rate": 6.526315789473685e-05,
428
+ "loss": 1.3557,
429
+ "step": 70
430
+ },
431
+ {
432
+ "epoch": 0.0,
433
+ "learning_rate": 6.31578947368421e-05,
434
+ "loss": 1.6397,
435
+ "step": 71
436
+ },
437
+ {
438
+ "epoch": 0.0,
439
+ "learning_rate": 6.105263157894737e-05,
440
+ "loss": 1.631,
441
+ "step": 72
442
+ },
443
+ {
444
+ "epoch": 0.0,
445
+ "learning_rate": 5.894736842105263e-05,
446
+ "loss": 1.6067,
447
+ "step": 73
448
+ },
449
+ {
450
+ "epoch": 0.0,
451
+ "learning_rate": 5.68421052631579e-05,
452
+ "loss": 1.7248,
453
+ "step": 74
454
+ },
455
+ {
456
+ "epoch": 0.0,
457
+ "learning_rate": 5.4736842105263165e-05,
458
+ "loss": 1.5881,
459
+ "step": 75
460
  }
461
  ],
462
  "logging_steps": 1,
463
  "max_steps": 100,
464
  "num_train_epochs": 1,
465
  "save_steps": 25,
466
+ "total_flos": 4183967298846720.0,
467
  "trial_name": null,
468
  "trial_params": null
469
  }