plip commited on
Commit
fc0c9a8
1 Parent(s): 5345174

Training in progress, step 240000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:133edef03c1d84d7f47c379bb8627e715102779ac7fbfbbc81cf76fdb3857138
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95219d59f6e21a9a0cd580ed60c0915e8162896fe27e6f7170a921f2e0d281bd
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef3e57cb5c11d79b27478df38200399f2d7e69d1e9da7282bcbb17ee802e3b1d
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58d9a817ebedb7184a71cca1a13709576b2d1a51f03c46a318ad05be242513b5
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8419ed7583455ff96bd8dc55fc07544cbd4508b78a00a26bb6c6297cac401ebd
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8419ed7583455ff96bd8dc55fc07544cbd4508b78a00a26bb6c6297cac401ebd
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8419ed7583455ff96bd8dc55fc07544cbd4508b78a00a26bb6c6297cac401ebd
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8419ed7583455ff96bd8dc55fc07544cbd4508b78a00a26bb6c6297cac401ebd
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8419ed7583455ff96bd8dc55fc07544cbd4508b78a00a26bb6c6297cac401ebd
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8419ed7583455ff96bd8dc55fc07544cbd4508b78a00a26bb6c6297cac401ebd
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8419ed7583455ff96bd8dc55fc07544cbd4508b78a00a26bb6c6297cac401ebd
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a04cad25c1edacc0a2853d5253114151a48b6b991129f2e052e17228b772e940
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8419ed7583455ff96bd8dc55fc07544cbd4508b78a00a26bb6c6297cac401ebd
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a2bb37359d05bc7917d91ab1261ba8c4d8f00648cd8cb2d11c677b6c91ddb27
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a310455c0ab00dbcb4f4c26c03e0304d748fbc8c3e547006fb15346d32ac70da
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.358931941564342,
5
- "global_step": 230000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4606,11 +4606,211 @@
4606
  "eval_samples_per_second": 1525.752,
4607
  "eval_steps_per_second": 24.295,
4608
  "step": 230000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4609
  }
4610
  ],
4611
  "max_steps": 500000,
4612
  "num_train_epochs": 12,
4613
- "total_flos": 7.348191706858966e+21,
4614
  "trial_name": null,
4615
  "trial_params": null
4616
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.591928982501923,
5
+ "global_step": 240000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
4606
  "eval_samples_per_second": 1525.752,
4607
  "eval_steps_per_second": 24.295,
4608
  "step": 230000
4609
+ },
4610
+ {
4611
+ "epoch": 5.37,
4612
+ "learning_rate": 0.00018545980690870903,
4613
+ "loss": 0.276,
4614
+ "step": 230500
4615
+ },
4616
+ {
4617
+ "epoch": 5.38,
4618
+ "learning_rate": 0.00018499083425600648,
4619
+ "loss": 0.2753,
4620
+ "step": 231000
4621
+ },
4622
+ {
4623
+ "epoch": 5.38,
4624
+ "eval_loss": 0.2578088045120239,
4625
+ "eval_runtime": 1.4301,
4626
+ "eval_samples_per_second": 1536.977,
4627
+ "eval_steps_per_second": 24.474,
4628
+ "step": 231000
4629
+ },
4630
+ {
4631
+ "epoch": 5.39,
4632
+ "learning_rate": 0.00018452153362834552,
4633
+ "loss": 0.275,
4634
+ "step": 231500
4635
+ },
4636
+ {
4637
+ "epoch": 5.41,
4638
+ "learning_rate": 0.00018405191015792254,
4639
+ "loss": 0.2749,
4640
+ "step": 232000
4641
+ },
4642
+ {
4643
+ "epoch": 5.41,
4644
+ "eval_loss": 0.25682488083839417,
4645
+ "eval_runtime": 1.4507,
4646
+ "eval_samples_per_second": 1515.179,
4647
+ "eval_steps_per_second": 24.127,
4648
+ "step": 232000
4649
+ },
4650
+ {
4651
+ "epoch": 5.42,
4652
+ "learning_rate": 0.0001835819689804646,
4653
+ "loss": 0.2749,
4654
+ "step": 232500
4655
+ },
4656
+ {
4657
+ "epoch": 5.43,
4658
+ "learning_rate": 0.0001831117152351732,
4659
+ "loss": 0.2749,
4660
+ "step": 233000
4661
+ },
4662
+ {
4663
+ "epoch": 5.43,
4664
+ "eval_loss": 0.25751060247421265,
4665
+ "eval_runtime": 1.429,
4666
+ "eval_samples_per_second": 1538.092,
4667
+ "eval_steps_per_second": 24.492,
4668
+ "step": 233000
4669
+ },
4670
+ {
4671
+ "epoch": 5.44,
4672
+ "learning_rate": 0.00018264115406466778,
4673
+ "loss": 0.2752,
4674
+ "step": 233500
4675
+ },
4676
+ {
4677
+ "epoch": 5.45,
4678
+ "learning_rate": 0.00018217029061493007,
4679
+ "loss": 0.2748,
4680
+ "step": 234000
4681
+ },
4682
+ {
4683
+ "epoch": 5.45,
4684
+ "eval_loss": 0.2570262551307678,
4685
+ "eval_runtime": 1.4679,
4686
+ "eval_samples_per_second": 1497.347,
4687
+ "eval_steps_per_second": 23.843,
4688
+ "step": 234000
4689
+ },
4690
+ {
4691
+ "epoch": 5.46,
4692
+ "learning_rate": 0.00018169913003524717,
4693
+ "loss": 0.2748,
4694
+ "step": 234500
4695
+ },
4696
+ {
4697
+ "epoch": 5.48,
4698
+ "learning_rate": 0.00018122767747815594,
4699
+ "loss": 0.2744,
4700
+ "step": 235000
4701
+ },
4702
+ {
4703
+ "epoch": 5.48,
4704
+ "eval_loss": 0.2594066262245178,
4705
+ "eval_runtime": 1.449,
4706
+ "eval_samples_per_second": 1516.917,
4707
+ "eval_steps_per_second": 24.155,
4708
+ "step": 235000
4709
+ },
4710
+ {
4711
+ "epoch": 5.49,
4712
+ "learning_rate": 0.00018075593809938574,
4713
+ "loss": 0.2746,
4714
+ "step": 235500
4715
+ },
4716
+ {
4717
+ "epoch": 5.5,
4718
+ "learning_rate": 0.00018028391705780295,
4719
+ "loss": 0.2748,
4720
+ "step": 236000
4721
+ },
4722
+ {
4723
+ "epoch": 5.5,
4724
+ "eval_loss": 0.25915682315826416,
4725
+ "eval_runtime": 1.4494,
4726
+ "eval_samples_per_second": 1516.483,
4727
+ "eval_steps_per_second": 24.148,
4728
+ "step": 236000
4729
+ },
4730
+ {
4731
+ "epoch": 5.51,
4732
+ "learning_rate": 0.0001798116195153541,
4733
+ "loss": 0.2745,
4734
+ "step": 236500
4735
+ },
4736
+ {
4737
+ "epoch": 5.52,
4738
+ "learning_rate": 0.0001793390506370094,
4739
+ "loss": 0.2742,
4740
+ "step": 237000
4741
+ },
4742
+ {
4743
+ "epoch": 5.52,
4744
+ "eval_loss": 0.25850772857666016,
4745
+ "eval_runtime": 1.4467,
4746
+ "eval_samples_per_second": 1519.275,
4747
+ "eval_steps_per_second": 24.192,
4748
+ "step": 237000
4749
+ },
4750
+ {
4751
+ "epoch": 5.53,
4752
+ "learning_rate": 0.00017886621559070638,
4753
+ "loss": 0.2743,
4754
+ "step": 237500
4755
+ },
4756
+ {
4757
+ "epoch": 5.55,
4758
+ "learning_rate": 0.00017839311954729337,
4759
+ "loss": 0.2742,
4760
+ "step": 238000
4761
+ },
4762
+ {
4763
+ "epoch": 5.55,
4764
+ "eval_loss": 0.25702983140945435,
4765
+ "eval_runtime": 1.4245,
4766
+ "eval_samples_per_second": 1542.944,
4767
+ "eval_steps_per_second": 24.569,
4768
+ "step": 238000
4769
+ },
4770
+ {
4771
+ "epoch": 5.56,
4772
+ "learning_rate": 0.00017791976768047292,
4773
+ "loss": 0.2738,
4774
+ "step": 238500
4775
+ },
4776
+ {
4777
+ "epoch": 5.57,
4778
+ "learning_rate": 0.00017744616516674518,
4779
+ "loss": 0.2743,
4780
+ "step": 239000
4781
+ },
4782
+ {
4783
+ "epoch": 5.57,
4784
+ "eval_loss": 0.2567313611507416,
4785
+ "eval_runtime": 1.5014,
4786
+ "eval_samples_per_second": 1463.976,
4787
+ "eval_steps_per_second": 23.312,
4788
+ "step": 239000
4789
+ },
4790
+ {
4791
+ "epoch": 5.58,
4792
+ "learning_rate": 0.00017697231718535132,
4793
+ "loss": 0.2742,
4794
+ "step": 239500
4795
+ },
4796
+ {
4797
+ "epoch": 5.59,
4798
+ "learning_rate": 0.00017649822891821707,
4799
+ "loss": 0.274,
4800
+ "step": 240000
4801
+ },
4802
+ {
4803
+ "epoch": 5.59,
4804
+ "eval_loss": 0.25702497363090515,
4805
+ "eval_runtime": 1.442,
4806
+ "eval_samples_per_second": 1524.238,
4807
+ "eval_steps_per_second": 24.271,
4808
+ "step": 240000
4809
  }
4810
  ],
4811
  "max_steps": 500000,
4812
  "num_train_epochs": 12,
4813
+ "total_flos": 7.667678845633466e+21,
4814
  "trial_name": null,
4815
  "trial_params": null
4816
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef3e57cb5c11d79b27478df38200399f2d7e69d1e9da7282bcbb17ee802e3b1d
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58d9a817ebedb7184a71cca1a13709576b2d1a51f03c46a318ad05be242513b5
3
  size 102501541