ProgramInNonsense commited on
Commit
ffab012
·
verified ·
1 Parent(s): 18e8735

Training in progress, step 4950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8999735af70410295de19f30e22966f6dd3004beff68125e830fdb7575c32b33
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d82577e4315a0245aa3888fca2f71a8260373eb445fbad147c6fd76cb585817
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8286c1a06c14ad0ab5875b0a8c24a687654a9093c97b325c38cb13df4b32b10
3
  size 671467026
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d986a4ba297308f83f2bc0a91fbe53d870040131c1b3ee9e78cb05739dfc0c9f
3
  size 671467026
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43f70beeab3045e8185d11bdb74b1a7618728d482bc5259a57ebd6fa0bd44177
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68a2c94f0c11b9467b4681897c349d42ab25acb9747f60932ec6d730da9a44df
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aee07848921afadf2e29f9d8e13f23709a7903803a3baa073dfe984ab9de3b14
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c9be9776f3509ee5ee5abd9b7b0086172f78ba2db59e74bb85fa6d84d248249
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.5389729738235474,
3
  "best_model_checkpoint": "./output/checkpoint-4800",
4
- "epoch": 0.11384389156369329,
5
  "eval_steps": 150,
6
- "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3623,6 +3623,119 @@
3623
  "eval_samples_per_second": 9.502,
3624
  "eval_steps_per_second": 9.502,
3625
  "step": 4800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3626
  }
3627
  ],
3628
  "logging_steps": 10,
@@ -3642,7 +3755,7 @@
3642
  "attributes": {}
3643
  }
3644
  },
3645
- "total_flos": 3.6346743679687066e+17,
3646
  "train_batch_size": 4,
3647
  "trial_name": null,
3648
  "trial_params": null
 
1
  {
2
  "best_metric": 1.5389729738235474,
3
  "best_model_checkpoint": "./output/checkpoint-4800",
4
+ "epoch": 0.1174015131750587,
5
  "eval_steps": 150,
6
+ "global_step": 4950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3623
  "eval_samples_per_second": 9.502,
3624
  "eval_steps_per_second": 9.502,
3625
  "step": 4800
3626
+ },
3627
+ {
3628
+ "epoch": 0.11408106633778431,
3629
+ "grad_norm": 18.068140029907227,
3630
+ "learning_rate": 4.631564202709354e-08,
3631
+ "loss": 1.7176,
3632
+ "step": 4810
3633
+ },
3634
+ {
3635
+ "epoch": 0.11431824111187534,
3636
+ "grad_norm": 12.530498504638672,
3637
+ "learning_rate": 4.1573879615262184e-08,
3638
+ "loss": 1.5955,
3639
+ "step": 4820
3640
+ },
3641
+ {
3642
+ "epoch": 0.11455541588596636,
3643
+ "grad_norm": 14.457528114318848,
3644
+ "learning_rate": 3.708732128449785e-08,
3645
+ "loss": 1.5592,
3646
+ "step": 4830
3647
+ },
3648
+ {
3649
+ "epoch": 0.1147925906600574,
3650
+ "grad_norm": 9.580428123474121,
3651
+ "learning_rate": 3.2856151459641216e-08,
3652
+ "loss": 1.527,
3653
+ "step": 4840
3654
+ },
3655
+ {
3656
+ "epoch": 0.11502976543414842,
3657
+ "grad_norm": 31.357723236083984,
3658
+ "learning_rate": 2.8880544067511063e-08,
3659
+ "loss": 1.6266,
3660
+ "step": 4850
3661
+ },
3662
+ {
3663
+ "epoch": 0.11526694020823945,
3664
+ "grad_norm": 14.516955375671387,
3665
+ "learning_rate": 2.5160662529755823e-08,
3666
+ "loss": 1.4625,
3667
+ "step": 4860
3668
+ },
3669
+ {
3670
+ "epoch": 0.11550411498233049,
3671
+ "grad_norm": 17.862333297729492,
3672
+ "learning_rate": 2.169665975613605e-08,
3673
+ "loss": 1.6717,
3674
+ "step": 4870
3675
+ },
3676
+ {
3677
+ "epoch": 0.1157412897564215,
3678
+ "grad_norm": 11.661571502685547,
3679
+ "learning_rate": 1.8488678138238458e-08,
3680
+ "loss": 1.5904,
3681
+ "step": 4880
3682
+ },
3683
+ {
3684
+ "epoch": 0.11597846453051254,
3685
+ "grad_norm": 19.821514129638672,
3686
+ "learning_rate": 1.5536849543621583e-08,
3687
+ "loss": 1.633,
3688
+ "step": 4890
3689
+ },
3690
+ {
3691
+ "epoch": 0.11621563930460356,
3692
+ "grad_norm": 13.340200424194336,
3693
+ "learning_rate": 1.2841295310397906e-08,
3694
+ "loss": 1.4345,
3695
+ "step": 4900
3696
+ },
3697
+ {
3698
+ "epoch": 0.1164528140786946,
3699
+ "grad_norm": 14.068325996398926,
3700
+ "learning_rate": 1.0402126242244764e-08,
3701
+ "loss": 1.5872,
3702
+ "step": 4910
3703
+ },
3704
+ {
3705
+ "epoch": 0.11668998885278561,
3706
+ "grad_norm": 13.935022354125977,
3707
+ "learning_rate": 8.219442603847605e-09,
3708
+ "loss": 1.5762,
3709
+ "step": 4920
3710
+ },
3711
+ {
3712
+ "epoch": 0.11692716362687665,
3713
+ "grad_norm": 10.617624282836914,
3714
+ "learning_rate": 6.293334116783817e-09,
3715
+ "loss": 1.5457,
3716
+ "step": 4930
3717
+ },
3718
+ {
3719
+ "epoch": 0.11716433840096767,
3720
+ "grad_norm": 11.610488891601562,
3721
+ "learning_rate": 4.623879955827082e-09,
3722
+ "loss": 1.4991,
3723
+ "step": 4940
3724
+ },
3725
+ {
3726
+ "epoch": 0.1174015131750587,
3727
+ "grad_norm": 11.387807846069336,
3728
+ "learning_rate": 3.211148745700665e-09,
3729
+ "loss": 1.5349,
3730
+ "step": 4950
3731
+ },
3732
+ {
3733
+ "epoch": 0.1174015131750587,
3734
+ "eval_loss": 1.5390245914459229,
3735
+ "eval_runtime": 53.2681,
3736
+ "eval_samples_per_second": 9.405,
3737
+ "eval_steps_per_second": 9.405,
3738
+ "step": 4950
3739
  }
3740
  ],
3741
  "logging_steps": 10,
 
3755
  "attributes": {}
3756
  }
3757
  },
3758
+ "total_flos": 3.7387334060399e+17,
3759
  "train_batch_size": 4,
3760
  "trial_name": null,
3761
  "trial_params": null