neuralwonderland commited on
Commit
42b64a3
·
verified ·
1 Parent(s): 6868ddb

Training in progress, step 4800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:522baa3295580d0a477ccbf634e4d7745722419e4d0a6ca3c0b036eba920c680
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa42da0a15c323cb30a8a23ad04a2f91dda438add8549801e89e35ad6851e5ff
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa37b5b222889c3f4b9685b74b8bc5a4c7e6211b9306d32cf4856d712f31e2ba
3
  size 640010002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af7a6113bbc3c7964b2223df922240b6cd5b2143269435fcabeaa7e2c8a5c570
3
  size 640010002
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:216a35202103f6785993d6394e0cf3ddd33323b87f3ecab01cb5eb640ec670dd
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd517e1e1cfd43ab38eb9c513a110085b9d38a5d09c69d0052001093328ce0e8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:657bbe4e559d5f9e7581fbbb12237706f640d94bb8a67370d9ae890c56e7c61f
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3ecab210fbeef159cafb78be4b3e45fddac96a485c1a8431fd12fe8ae378aa1
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.5525649785995483,
3
- "best_model_checkpoint": "./output/checkpoint-4650",
4
- "epoch": 0.4378943403333647,
5
  "eval_steps": 150,
6
- "global_step": 4650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3510,6 +3510,119 @@
3510
  "eval_samples_per_second": 13.189,
3511
  "eval_steps_per_second": 13.189,
3512
  "step": 4650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3513
  }
3514
  ],
3515
  "logging_steps": 10,
@@ -3529,7 +3642,7 @@
3529
  "attributes": {}
3530
  }
3531
  },
3532
- "total_flos": 2.935943914686382e+17,
3533
  "train_batch_size": 4,
3534
  "trial_name": null,
3535
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.5524957180023193,
3
+ "best_model_checkpoint": "./output/checkpoint-4800",
4
+ "epoch": 0.45201996421508617,
5
  "eval_steps": 150,
6
+ "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3510
  "eval_samples_per_second": 13.189,
3511
  "eval_steps_per_second": 13.189,
3512
  "step": 4650
3513
+ },
3514
+ {
3515
+ "epoch": 0.43883604859214614,
3516
+ "grad_norm": 3.5562143325805664,
3517
+ "learning_rate": 8.874548095798464e-08,
3518
+ "loss": 1.3575,
3519
+ "step": 4660
3520
+ },
3521
+ {
3522
+ "epoch": 0.43977775685092757,
3523
+ "grad_norm": 4.702554702758789,
3524
+ "learning_rate": 8.362113243093245e-08,
3525
+ "loss": 1.5063,
3526
+ "step": 4670
3527
+ },
3528
+ {
3529
+ "epoch": 0.440719465109709,
3530
+ "grad_norm": 4.083225250244141,
3531
+ "learning_rate": 7.864749437890173e-08,
3532
+ "loss": 1.5242,
3533
+ "step": 4680
3534
+ },
3535
+ {
3536
+ "epoch": 0.4416611733684904,
3537
+ "grad_norm": 5.257496356964111,
3538
+ "learning_rate": 7.382477124867282e-08,
3539
+ "loss": 1.5159,
3540
+ "step": 4690
3541
+ },
3542
+ {
3543
+ "epoch": 0.44260288162727185,
3544
+ "grad_norm": 3.3656086921691895,
3545
+ "learning_rate": 6.915316128350461e-08,
3546
+ "loss": 1.4988,
3547
+ "step": 4700
3548
+ },
3549
+ {
3550
+ "epoch": 0.4435445898860533,
3551
+ "grad_norm": 2.969943046569824,
3552
+ "learning_rate": 6.463285651498563e-08,
3553
+ "loss": 1.3161,
3554
+ "step": 4710
3555
+ },
3556
+ {
3557
+ "epoch": 0.44448629814483476,
3558
+ "grad_norm": 4.175876140594482,
3559
+ "learning_rate": 6.026404275513875e-08,
3560
+ "loss": 1.259,
3561
+ "step": 4720
3562
+ },
3563
+ {
3564
+ "epoch": 0.4454280064036162,
3565
+ "grad_norm": 5.987540245056152,
3566
+ "learning_rate": 5.604689958878723e-08,
3567
+ "loss": 1.4463,
3568
+ "step": 4730
3569
+ },
3570
+ {
3571
+ "epoch": 0.4463697146623976,
3572
+ "grad_norm": 5.366607666015625,
3573
+ "learning_rate": 5.198160036616898e-08,
3574
+ "loss": 1.3034,
3575
+ "step": 4740
3576
+ },
3577
+ {
3578
+ "epoch": 0.44731142292117904,
3579
+ "grad_norm": 3.249971866607666,
3580
+ "learning_rate": 4.8068312195811847e-08,
3581
+ "loss": 1.2685,
3582
+ "step": 4750
3583
+ },
3584
+ {
3585
+ "epoch": 0.44825313117996046,
3586
+ "grad_norm": 4.363810062408447,
3587
+ "learning_rate": 4.4307195937666194e-08,
3588
+ "loss": 1.5111,
3589
+ "step": 4760
3590
+ },
3591
+ {
3592
+ "epoch": 0.4491948394387419,
3593
+ "grad_norm": 3.4074866771698,
3594
+ "learning_rate": 4.069840619648935e-08,
3595
+ "loss": 1.6763,
3596
+ "step": 4770
3597
+ },
3598
+ {
3599
+ "epoch": 0.4501365476975233,
3600
+ "grad_norm": 6.260430335998535,
3601
+ "learning_rate": 3.72420913154932e-08,
3602
+ "loss": 1.4295,
3603
+ "step": 4780
3604
+ },
3605
+ {
3606
+ "epoch": 0.45107825595630474,
3607
+ "grad_norm": 3.294346809387207,
3608
+ "learning_rate": 3.3938393370244876e-08,
3609
+ "loss": 1.4655,
3610
+ "step": 4790
3611
+ },
3612
+ {
3613
+ "epoch": 0.45201996421508617,
3614
+ "grad_norm": 4.4235639572143555,
3615
+ "learning_rate": 3.078744816282731e-08,
3616
+ "loss": 1.4596,
3617
+ "step": 4800
3618
+ },
3619
+ {
3620
+ "epoch": 0.45201996421508617,
3621
+ "eval_loss": 1.5524957180023193,
3622
+ "eval_runtime": 37.2391,
3623
+ "eval_samples_per_second": 13.427,
3624
+ "eval_steps_per_second": 13.427,
3625
+ "step": 4800
3626
  }
3627
  ],
3628
  "logging_steps": 10,
 
3642
  "attributes": {}
3643
  }
3644
  },
3645
+ "total_flos": 3.0222991715077325e+17,
3646
  "train_batch_size": 4,
3647
  "trial_name": null,
3648
  "trial_params": null