mtzig commited on
Commit
ed404a3
·
verified ·
1 Parent(s): 69975d3

Training in progress, step 1100, checkpoint

Browse files
last-checkpoint/optimizer_0/.metadata CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
 
last-checkpoint/optimizer_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cd9f4e1d0bb326b818db1b6faa552753bc4a3328ac93e01b3631a83d08e1c95
3
  size 13934748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2283e0a60eecf0e26db089b1ea106db4a8943588d7c5d1ae539887caf2a0623
3
  size 13934748
last-checkpoint/optimizer_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c1ce66274008394f36d101e20d4378dd480a6f7db7387a58eed60435a8f39a7
3
  size 13999412
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:039e2daa021420447c63254cecab80fa9d8c4955bb82c268ed4412f9d7cc457e
3
  size 13999412
last-checkpoint/optimizer_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b3e7a569d804afc7e9af01c045d344bcf8aa04435a748d8f22d80f77f68191f
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bb386b06abed058c971637632b15f30bf72e3f0fed658811dd9d1879b2cc249
3
  size 13990904
last-checkpoint/optimizer_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84f72d90d6d6f96ffde5e12766b8aa3f0ebf70484ff977b4cc1380cfd2635d82
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b690d2a646fe4b43296ed7a11b9ed92846b14116b36ab4f256a0c0a38193fec5
3
  size 13990904
last-checkpoint/pytorch_model_fsdp_0/.metadata CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
 
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7ec10706acfd7aebf2e0313a26ad47f112db6494baa4011866a112fa6459782
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40e60cce6b988d761a7cb0a778490b5bef357343f215f4368e74b271a3544add
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d45fd8c4f5cac20eb0715bd7c3583b8b9d6d50be52eb3b819ead289c264bf4c
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fbc16fe00aa2316ff9c35dc5c163a58bd61d544a0b373903f53b1dd607744f0
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc7bbedf822084a972aaf7dbfdc31778a6b5afdff5f9d51666b28397948c4cf6
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c26b5d36343cbeb9400248846e9572b9226655bba020bfffb57eb55ac503eaa0
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc56dd27c16979078189d0168509b3491fac9a7018e2acd5413b0b5bfb9e62b8
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80e37075e6a4c78861839fc539511c538b95c0905a026bfa2e725cda89b0c3ea
3
  size 6966784
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbefc2a9b5877ac52b5c278c40b832840a445a83b4f45552eae9c8d8fd7025ab
3
  size 15088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47c3058111d1013a8c7bee77444904a0475c2f0bffb7d0c4d6c87291a641236d
3
  size 15088
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f7ee2bc06c634de7d668e8f27eb2c655185598b0005a48f28db9b8c13871cf8
3
  size 15088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ad5b62f0cc16eba86ed43478cd91d2660dc90a4ec6abf69d8eba3dc9fd166bb
3
  size 15088
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e89f6ffe398cd010021cbea856f31e9f12c086dc22192dd94cd4139ed13bc428
3
  size 15088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b7a7201b10eea93ada67755ff9fd428efa7903fb7a7749a862485dd229f2c71
3
  size 15088
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1cac7e1aa01f996ea4ccf65c0edbca9c2218b27d0fee393e5dadf9e12f0a4ac0
3
  size 15088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccc03cdb9d7cd36d375ad07cb7f8faa863f3ab0b15c5d442b25b00ccb4f627e1
3
  size 15088
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca19ec64a3f37f86c1a9f3bd1615be54fe5912d912de79a7d3f808a593a2192d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b064707c1d587f8d90b9fc42a28979d7e7f25a60fc5fe43535927f2c04c08c63
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.847457627118644,
5
  "eval_steps": 20,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7619,6 +7619,766 @@
7619
  "eval_samples_per_second": 5.83,
7620
  "eval_steps_per_second": 0.2,
7621
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7622
  }
7623
  ],
7624
  "logging_steps": 1,
@@ -7638,7 +8398,7 @@
7638
  "attributes": {}
7639
  }
7640
  },
7641
- "total_flos": 3.076975196163277e+17,
7642
  "train_batch_size": 8,
7643
  "trial_name": null,
7644
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9322033898305084,
5
  "eval_steps": 20,
6
+ "global_step": 1100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7619
  "eval_samples_per_second": 5.83,
7620
  "eval_steps_per_second": 0.2,
7621
  "step": 1000
7622
+ },
7623
+ {
7624
+ "epoch": 0.8483050847457627,
7625
+ "grad_norm": 1.054631233215332,
7626
+ "learning_rate": 1.3694792101976938e-06,
7627
+ "loss": 0.0064,
7628
+ "step": 1001
7629
+ },
7630
+ {
7631
+ "epoch": 0.8491525423728814,
7632
+ "grad_norm": 3.1577975749969482,
7633
+ "learning_rate": 1.354574758753363e-06,
7634
+ "loss": 0.0276,
7635
+ "step": 1002
7636
+ },
7637
+ {
7638
+ "epoch": 0.85,
7639
+ "grad_norm": 2.423903703689575,
7640
+ "learning_rate": 1.339745962155613e-06,
7641
+ "loss": 0.0236,
7642
+ "step": 1003
7643
+ },
7644
+ {
7645
+ "epoch": 0.8508474576271187,
7646
+ "grad_norm": 0.5103369951248169,
7647
+ "learning_rate": 1.324992950169065e-06,
7648
+ "loss": 0.0016,
7649
+ "step": 1004
7650
+ },
7651
+ {
7652
+ "epoch": 0.8516949152542372,
7653
+ "grad_norm": 2.461594343185425,
7654
+ "learning_rate": 1.310315851895162e-06,
7655
+ "loss": 0.0258,
7656
+ "step": 1005
7657
+ },
7658
+ {
7659
+ "epoch": 0.8525423728813559,
7660
+ "grad_norm": 0.4233887195587158,
7661
+ "learning_rate": 1.2957147957710292e-06,
7662
+ "loss": 0.0016,
7663
+ "step": 1006
7664
+ },
7665
+ {
7666
+ "epoch": 0.8533898305084746,
7667
+ "grad_norm": 1.1271721124649048,
7668
+ "learning_rate": 1.28118990956837e-06,
7669
+ "loss": 0.0051,
7670
+ "step": 1007
7671
+ },
7672
+ {
7673
+ "epoch": 0.8542372881355932,
7674
+ "grad_norm": 1.2144718170166016,
7675
+ "learning_rate": 1.2667413203923283e-06,
7676
+ "loss": 0.0059,
7677
+ "step": 1008
7678
+ },
7679
+ {
7680
+ "epoch": 0.8550847457627119,
7681
+ "grad_norm": 0.09200064092874527,
7682
+ "learning_rate": 1.2523691546803872e-06,
7683
+ "loss": 0.0003,
7684
+ "step": 1009
7685
+ },
7686
+ {
7687
+ "epoch": 0.8559322033898306,
7688
+ "grad_norm": 0.7451255917549133,
7689
+ "learning_rate": 1.2380735382012576e-06,
7690
+ "loss": 0.0047,
7691
+ "step": 1010
7692
+ },
7693
+ {
7694
+ "epoch": 0.8567796610169491,
7695
+ "grad_norm": 0.9666259288787842,
7696
+ "learning_rate": 1.2238545960537795e-06,
7697
+ "loss": 0.0044,
7698
+ "step": 1011
7699
+ },
7700
+ {
7701
+ "epoch": 0.8576271186440678,
7702
+ "grad_norm": 0.9461873769760132,
7703
+ "learning_rate": 1.2097124526658277e-06,
7704
+ "loss": 0.0052,
7705
+ "step": 1012
7706
+ },
7707
+ {
7708
+ "epoch": 0.8584745762711864,
7709
+ "grad_norm": 1.253847360610962,
7710
+ "learning_rate": 1.1956472317932211e-06,
7711
+ "loss": 0.0117,
7712
+ "step": 1013
7713
+ },
7714
+ {
7715
+ "epoch": 0.8593220338983051,
7716
+ "grad_norm": 2.8636152744293213,
7717
+ "learning_rate": 1.1816590565186414e-06,
7718
+ "loss": 0.0236,
7719
+ "step": 1014
7720
+ },
7721
+ {
7722
+ "epoch": 0.8601694915254238,
7723
+ "grad_norm": 0.7781552076339722,
7724
+ "learning_rate": 1.1677480492505589e-06,
7725
+ "loss": 0.0068,
7726
+ "step": 1015
7727
+ },
7728
+ {
7729
+ "epoch": 0.8610169491525423,
7730
+ "grad_norm": 2.7591519355773926,
7731
+ "learning_rate": 1.1539143317221524e-06,
7732
+ "loss": 0.0293,
7733
+ "step": 1016
7734
+ },
7735
+ {
7736
+ "epoch": 0.861864406779661,
7737
+ "grad_norm": 1.2667608261108398,
7738
+ "learning_rate": 1.1401580249902566e-06,
7739
+ "loss": 0.0135,
7740
+ "step": 1017
7741
+ },
7742
+ {
7743
+ "epoch": 0.8627118644067797,
7744
+ "grad_norm": 1.1776992082595825,
7745
+ "learning_rate": 1.1264792494342858e-06,
7746
+ "loss": 0.0072,
7747
+ "step": 1018
7748
+ },
7749
+ {
7750
+ "epoch": 0.8635593220338983,
7751
+ "grad_norm": 1.9288878440856934,
7752
+ "learning_rate": 1.112878124755199e-06,
7753
+ "loss": 0.0101,
7754
+ "step": 1019
7755
+ },
7756
+ {
7757
+ "epoch": 0.864406779661017,
7758
+ "grad_norm": 1.5513525009155273,
7759
+ "learning_rate": 1.0993547699744366e-06,
7760
+ "loss": 0.0182,
7761
+ "step": 1020
7762
+ },
7763
+ {
7764
+ "epoch": 0.864406779661017,
7765
+ "eval_accuracy": 1.0,
7766
+ "eval_f1": 1.0,
7767
+ "eval_loss": 0.00010546201519900933,
7768
+ "eval_precision": 1.0,
7769
+ "eval_recall": 1.0,
7770
+ "eval_runtime": 51.2385,
7771
+ "eval_samples_per_second": 5.699,
7772
+ "eval_steps_per_second": 0.195,
7773
+ "step": 1020
7774
+ },
7775
+ {
7776
+ "epoch": 0.8652542372881356,
7777
+ "grad_norm": 1.8383911848068237,
7778
+ "learning_rate": 1.0859093034328972e-06,
7779
+ "loss": 0.0058,
7780
+ "step": 1021
7781
+ },
7782
+ {
7783
+ "epoch": 0.8661016949152542,
7784
+ "grad_norm": 0.7148596048355103,
7785
+ "learning_rate": 1.0725418427898792e-06,
7786
+ "loss": 0.0033,
7787
+ "step": 1022
7788
+ },
7789
+ {
7790
+ "epoch": 0.8669491525423729,
7791
+ "grad_norm": 0.8727383017539978,
7792
+ "learning_rate": 1.0592525050220692e-06,
7793
+ "loss": 0.006,
7794
+ "step": 1023
7795
+ },
7796
+ {
7797
+ "epoch": 0.8677966101694915,
7798
+ "grad_norm": 1.3725224733352661,
7799
+ "learning_rate": 1.0460414064225099e-06,
7800
+ "loss": 0.007,
7801
+ "step": 1024
7802
+ },
7803
+ {
7804
+ "epoch": 0.8686440677966102,
7805
+ "grad_norm": 0.29742565751075745,
7806
+ "learning_rate": 1.0329086625995843e-06,
7807
+ "loss": 0.0015,
7808
+ "step": 1025
7809
+ },
7810
+ {
7811
+ "epoch": 0.8694915254237288,
7812
+ "grad_norm": 1.752211332321167,
7813
+ "learning_rate": 1.0198543884760049e-06,
7814
+ "loss": 0.0151,
7815
+ "step": 1026
7816
+ },
7817
+ {
7818
+ "epoch": 0.8703389830508474,
7819
+ "grad_norm": 0.8796065449714661,
7820
+ "learning_rate": 1.0068786982878087e-06,
7821
+ "loss": 0.0052,
7822
+ "step": 1027
7823
+ },
7824
+ {
7825
+ "epoch": 0.8711864406779661,
7826
+ "grad_norm": 1.1921021938323975,
7827
+ "learning_rate": 9.939817055833534e-07,
7828
+ "loss": 0.0079,
7829
+ "step": 1028
7830
+ },
7831
+ {
7832
+ "epoch": 0.8720338983050847,
7833
+ "grad_norm": 3.8833506107330322,
7834
+ "learning_rate": 9.811635232223283e-07,
7835
+ "loss": 0.0188,
7836
+ "step": 1029
7837
+ },
7838
+ {
7839
+ "epoch": 0.8728813559322034,
7840
+ "grad_norm": 0.4924924969673157,
7841
+ "learning_rate": 9.684242633747642e-07,
7842
+ "loss": 0.0018,
7843
+ "step": 1030
7844
+ },
7845
+ {
7846
+ "epoch": 0.8737288135593221,
7847
+ "grad_norm": 0.3568030893802643,
7848
+ "learning_rate": 9.55764037520055e-07,
7849
+ "loss": 0.0021,
7850
+ "step": 1031
7851
+ },
7852
+ {
7853
+ "epoch": 0.8745762711864407,
7854
+ "grad_norm": 1.1321516036987305,
7855
+ "learning_rate": 9.43182956445976e-07,
7856
+ "loss": 0.0187,
7857
+ "step": 1032
7858
+ },
7859
+ {
7860
+ "epoch": 0.8754237288135593,
7861
+ "grad_norm": 1.1234910488128662,
7862
+ "learning_rate": 9.306811302477214e-07,
7863
+ "loss": 0.0054,
7864
+ "step": 1033
7865
+ },
7866
+ {
7867
+ "epoch": 0.8762711864406779,
7868
+ "grad_norm": 1.4271575212478638,
7869
+ "learning_rate": 9.182586683269323e-07,
7870
+ "loss": 0.0063,
7871
+ "step": 1034
7872
+ },
7873
+ {
7874
+ "epoch": 0.8771186440677966,
7875
+ "grad_norm": 1.43450129032135,
7876
+ "learning_rate": 9.059156793907542e-07,
7877
+ "loss": 0.0189,
7878
+ "step": 1035
7879
+ },
7880
+ {
7881
+ "epoch": 0.8779661016949153,
7882
+ "grad_norm": 1.4197413921356201,
7883
+ "learning_rate": 8.936522714508678e-07,
7884
+ "loss": 0.0058,
7885
+ "step": 1036
7886
+ },
7887
+ {
7888
+ "epoch": 0.8788135593220339,
7889
+ "grad_norm": 0.9812485575675964,
7890
+ "learning_rate": 8.814685518225552e-07,
7891
+ "loss": 0.0074,
7892
+ "step": 1037
7893
+ },
7894
+ {
7895
+ "epoch": 0.8796610169491526,
7896
+ "grad_norm": 1.7358640432357788,
7897
+ "learning_rate": 8.693646271237577e-07,
7898
+ "loss": 0.0099,
7899
+ "step": 1038
7900
+ },
7901
+ {
7902
+ "epoch": 0.8805084745762712,
7903
+ "grad_norm": 0.25579047203063965,
7904
+ "learning_rate": 8.573406032741438e-07,
7905
+ "loss": 0.0014,
7906
+ "step": 1039
7907
+ },
7908
+ {
7909
+ "epoch": 0.8813559322033898,
7910
+ "grad_norm": 0.7132051587104797,
7911
+ "learning_rate": 8.453965854941748e-07,
7912
+ "loss": 0.0053,
7913
+ "step": 1040
7914
+ },
7915
+ {
7916
+ "epoch": 0.8813559322033898,
7917
+ "eval_accuracy": 1.0,
7918
+ "eval_f1": 1.0,
7919
+ "eval_loss": 0.00010203008423559368,
7920
+ "eval_precision": 1.0,
7921
+ "eval_recall": 1.0,
7922
+ "eval_runtime": 51.4316,
7923
+ "eval_samples_per_second": 5.677,
7924
+ "eval_steps_per_second": 0.194,
7925
+ "step": 1040
7926
+ },
7927
+ {
7928
+ "epoch": 0.8822033898305085,
7929
+ "grad_norm": 0.3814912736415863,
7930
+ "learning_rate": 8.33532678304203e-07,
7931
+ "loss": 0.0022,
7932
+ "step": 1041
7933
+ },
7934
+ {
7935
+ "epoch": 0.8830508474576271,
7936
+ "grad_norm": 0.48492759466171265,
7937
+ "learning_rate": 8.217489855235338e-07,
7938
+ "loss": 0.0026,
7939
+ "step": 1042
7940
+ },
7941
+ {
7942
+ "epoch": 0.8838983050847458,
7943
+ "grad_norm": 1.3368293046951294,
7944
+ "learning_rate": 8.100456102695342e-07,
7945
+ "loss": 0.0099,
7946
+ "step": 1043
7947
+ },
7948
+ {
7949
+ "epoch": 0.8847457627118644,
7950
+ "grad_norm": 0.33328455686569214,
7951
+ "learning_rate": 7.98422654956722e-07,
7952
+ "loss": 0.0014,
7953
+ "step": 1044
7954
+ },
7955
+ {
7956
+ "epoch": 0.885593220338983,
7957
+ "grad_norm": 2.369331121444702,
7958
+ "learning_rate": 7.868802212958704e-07,
7959
+ "loss": 0.0122,
7960
+ "step": 1045
7961
+ },
7962
+ {
7963
+ "epoch": 0.8864406779661017,
7964
+ "grad_norm": 1.2063324451446533,
7965
+ "learning_rate": 7.754184102931228e-07,
7966
+ "loss": 0.0174,
7967
+ "step": 1046
7968
+ },
7969
+ {
7970
+ "epoch": 0.8872881355932203,
7971
+ "grad_norm": 1.7367677688598633,
7972
+ "learning_rate": 7.640373222491038e-07,
7973
+ "loss": 0.0105,
7974
+ "step": 1047
7975
+ },
7976
+ {
7977
+ "epoch": 0.888135593220339,
7978
+ "grad_norm": 1.0690417289733887,
7979
+ "learning_rate": 7.527370567580416e-07,
7980
+ "loss": 0.0054,
7981
+ "step": 1048
7982
+ },
7983
+ {
7984
+ "epoch": 0.8889830508474577,
7985
+ "grad_norm": 2.0434165000915527,
7986
+ "learning_rate": 7.415177127069029e-07,
7987
+ "loss": 0.0161,
7988
+ "step": 1049
7989
+ },
7990
+ {
7991
+ "epoch": 0.8898305084745762,
7992
+ "grad_norm": 2.049274206161499,
7993
+ "learning_rate": 7.303793882745181e-07,
7994
+ "loss": 0.0097,
7995
+ "step": 1050
7996
+ },
7997
+ {
7998
+ "epoch": 0.8906779661016949,
7999
+ "grad_norm": 0.8351269960403442,
8000
+ "learning_rate": 7.193221809307304e-07,
8001
+ "loss": 0.0063,
8002
+ "step": 1051
8003
+ },
8004
+ {
8005
+ "epoch": 0.8915254237288136,
8006
+ "grad_norm": 0.6742042303085327,
8007
+ "learning_rate": 7.083461874355335e-07,
8008
+ "loss": 0.0047,
8009
+ "step": 1052
8010
+ },
8011
+ {
8012
+ "epoch": 0.8923728813559322,
8013
+ "grad_norm": 2.22886061668396,
8014
+ "learning_rate": 6.974515038382379e-07,
8015
+ "loss": 0.0125,
8016
+ "step": 1053
8017
+ },
8018
+ {
8019
+ "epoch": 0.8932203389830509,
8020
+ "grad_norm": 2.4455456733703613,
8021
+ "learning_rate": 6.866382254766158e-07,
8022
+ "loss": 0.0186,
8023
+ "step": 1054
8024
+ },
8025
+ {
8026
+ "epoch": 0.8940677966101694,
8027
+ "grad_norm": 2.354172945022583,
8028
+ "learning_rate": 6.759064469760823e-07,
8029
+ "loss": 0.0088,
8030
+ "step": 1055
8031
+ },
8032
+ {
8033
+ "epoch": 0.8949152542372881,
8034
+ "grad_norm": 0.5831468105316162,
8035
+ "learning_rate": 6.652562622488535e-07,
8036
+ "loss": 0.0033,
8037
+ "step": 1056
8038
+ },
8039
+ {
8040
+ "epoch": 0.8957627118644068,
8041
+ "grad_norm": 1.065117359161377,
8042
+ "learning_rate": 6.546877644931315e-07,
8043
+ "loss": 0.0129,
8044
+ "step": 1057
8045
+ },
8046
+ {
8047
+ "epoch": 0.8966101694915254,
8048
+ "grad_norm": 1.6987978219985962,
8049
+ "learning_rate": 6.442010461922888e-07,
8050
+ "loss": 0.0125,
8051
+ "step": 1058
8052
+ },
8053
+ {
8054
+ "epoch": 0.8974576271186441,
8055
+ "grad_norm": 0.6148873567581177,
8056
+ "learning_rate": 6.33796199114055e-07,
8057
+ "loss": 0.0027,
8058
+ "step": 1059
8059
+ },
8060
+ {
8061
+ "epoch": 0.8983050847457628,
8062
+ "grad_norm": 1.205604076385498,
8063
+ "learning_rate": 6.234733143097215e-07,
8064
+ "loss": 0.0087,
8065
+ "step": 1060
8066
+ },
8067
+ {
8068
+ "epoch": 0.8983050847457628,
8069
+ "eval_accuracy": 1.0,
8070
+ "eval_f1": 1.0,
8071
+ "eval_loss": 9.550240793032572e-05,
8072
+ "eval_precision": 1.0,
8073
+ "eval_recall": 1.0,
8074
+ "eval_runtime": 51.1494,
8075
+ "eval_samples_per_second": 5.709,
8076
+ "eval_steps_per_second": 0.196,
8077
+ "step": 1060
8078
+ },
8079
+ {
8080
+ "epoch": 0.8991525423728813,
8081
+ "grad_norm": 3.041253089904785,
8082
+ "learning_rate": 6.132324821133362e-07,
8083
+ "loss": 0.0132,
8084
+ "step": 1061
8085
+ },
8086
+ {
8087
+ "epoch": 0.9,
8088
+ "grad_norm": 0.2654479146003723,
8089
+ "learning_rate": 6.030737921409169e-07,
8090
+ "loss": 0.001,
8091
+ "step": 1062
8092
+ },
8093
+ {
8094
+ "epoch": 0.9008474576271186,
8095
+ "grad_norm": 0.14473998546600342,
8096
+ "learning_rate": 5.929973332896677e-07,
8097
+ "loss": 0.0009,
8098
+ "step": 1063
8099
+ },
8100
+ {
8101
+ "epoch": 0.9016949152542373,
8102
+ "grad_norm": 0.26278120279312134,
8103
+ "learning_rate": 5.830031937372005e-07,
8104
+ "loss": 0.001,
8105
+ "step": 1064
8106
+ },
8107
+ {
8108
+ "epoch": 0.902542372881356,
8109
+ "grad_norm": 1.2509464025497437,
8110
+ "learning_rate": 5.730914609407634e-07,
8111
+ "loss": 0.006,
8112
+ "step": 1065
8113
+ },
8114
+ {
8115
+ "epoch": 0.9033898305084745,
8116
+ "grad_norm": 0.7587932348251343,
8117
+ "learning_rate": 5.632622216364736e-07,
8118
+ "loss": 0.0052,
8119
+ "step": 1066
8120
+ },
8121
+ {
8122
+ "epoch": 0.9042372881355932,
8123
+ "grad_norm": 0.35142210125923157,
8124
+ "learning_rate": 5.535155618385612e-07,
8125
+ "loss": 0.0022,
8126
+ "step": 1067
8127
+ },
8128
+ {
8129
+ "epoch": 0.9050847457627119,
8130
+ "grad_norm": 1.582571268081665,
8131
+ "learning_rate": 5.438515668386124e-07,
8132
+ "loss": 0.0121,
8133
+ "step": 1068
8134
+ },
8135
+ {
8136
+ "epoch": 0.9059322033898305,
8137
+ "grad_norm": 0.8583706617355347,
8138
+ "learning_rate": 5.342703212048306e-07,
8139
+ "loss": 0.0033,
8140
+ "step": 1069
8141
+ },
8142
+ {
8143
+ "epoch": 0.9067796610169492,
8144
+ "grad_norm": 1.316669225692749,
8145
+ "learning_rate": 5.247719087812897e-07,
8146
+ "loss": 0.0052,
8147
+ "step": 1070
8148
+ },
8149
+ {
8150
+ "epoch": 0.9076271186440678,
8151
+ "grad_norm": 1.5841196775436401,
8152
+ "learning_rate": 5.153564126872002e-07,
8153
+ "loss": 0.0087,
8154
+ "step": 1071
8155
+ },
8156
+ {
8157
+ "epoch": 0.9084745762711864,
8158
+ "grad_norm": 0.39058223366737366,
8159
+ "learning_rate": 5.060239153161872e-07,
8160
+ "loss": 0.0013,
8161
+ "step": 1072
8162
+ },
8163
+ {
8164
+ "epoch": 0.9093220338983051,
8165
+ "grad_norm": 1.9693634510040283,
8166
+ "learning_rate": 4.967744983355638e-07,
8167
+ "loss": 0.0137,
8168
+ "step": 1073
8169
+ },
8170
+ {
8171
+ "epoch": 0.9101694915254237,
8172
+ "grad_norm": 0.7177203297615051,
8173
+ "learning_rate": 4.876082426856177e-07,
8174
+ "loss": 0.0027,
8175
+ "step": 1074
8176
+ },
8177
+ {
8178
+ "epoch": 0.9110169491525424,
8179
+ "grad_norm": 1.0042608976364136,
8180
+ "learning_rate": 4.785252285789077e-07,
8181
+ "loss": 0.021,
8182
+ "step": 1075
8183
+ },
8184
+ {
8185
+ "epoch": 0.911864406779661,
8186
+ "grad_norm": 1.2595585584640503,
8187
+ "learning_rate": 4.695255354995576e-07,
8188
+ "loss": 0.0072,
8189
+ "step": 1076
8190
+ },
8191
+ {
8192
+ "epoch": 0.9127118644067796,
8193
+ "grad_norm": 0.42359045147895813,
8194
+ "learning_rate": 4.6060924220255654e-07,
8195
+ "loss": 0.0029,
8196
+ "step": 1077
8197
+ },
8198
+ {
8199
+ "epoch": 0.9135593220338983,
8200
+ "grad_norm": 1.1068620681762695,
8201
+ "learning_rate": 4.5177642671308087e-07,
8202
+ "loss": 0.0053,
8203
+ "step": 1078
8204
+ },
8205
+ {
8206
+ "epoch": 0.9144067796610169,
8207
+ "grad_norm": 0.701837956905365,
8208
+ "learning_rate": 4.430271663258001e-07,
8209
+ "loss": 0.0047,
8210
+ "step": 1079
8211
+ },
8212
+ {
8213
+ "epoch": 0.9152542372881356,
8214
+ "grad_norm": 0.43353769183158875,
8215
+ "learning_rate": 4.343615376042065e-07,
8216
+ "loss": 0.0017,
8217
+ "step": 1080
8218
+ },
8219
+ {
8220
+ "epoch": 0.9152542372881356,
8221
+ "eval_accuracy": 1.0,
8222
+ "eval_f1": 1.0,
8223
+ "eval_loss": 9.550563845550641e-05,
8224
+ "eval_precision": 1.0,
8225
+ "eval_recall": 1.0,
8226
+ "eval_runtime": 51.4172,
8227
+ "eval_samples_per_second": 5.679,
8228
+ "eval_steps_per_second": 0.194,
8229
+ "step": 1080
8230
+ },
8231
+ {
8232
+ "epoch": 0.9161016949152543,
8233
+ "grad_norm": 1.3469489812850952,
8234
+ "learning_rate": 4.2577961637994544e-07,
8235
+ "loss": 0.0116,
8236
+ "step": 1081
8237
+ },
8238
+ {
8239
+ "epoch": 0.9169491525423729,
8240
+ "grad_norm": 1.7352731227874756,
8241
+ "learning_rate": 4.172814777521483e-07,
8242
+ "loss": 0.012,
8243
+ "step": 1082
8244
+ },
8245
+ {
8246
+ "epoch": 0.9177966101694915,
8247
+ "grad_norm": 1.6800487041473389,
8248
+ "learning_rate": 4.088671960867785e-07,
8249
+ "loss": 0.0248,
8250
+ "step": 1083
8251
+ },
8252
+ {
8253
+ "epoch": 0.9186440677966101,
8254
+ "grad_norm": 1.4252413511276245,
8255
+ "learning_rate": 4.0053684501598076e-07,
8256
+ "loss": 0.0083,
8257
+ "step": 1084
8258
+ },
8259
+ {
8260
+ "epoch": 0.9194915254237288,
8261
+ "grad_norm": 2.737250804901123,
8262
+ "learning_rate": 3.9229049743743087e-07,
8263
+ "loss": 0.0172,
8264
+ "step": 1085
8265
+ },
8266
+ {
8267
+ "epoch": 0.9203389830508475,
8268
+ "grad_norm": 1.0084242820739746,
8269
+ "learning_rate": 3.8412822551370797e-07,
8270
+ "loss": 0.0093,
8271
+ "step": 1086
8272
+ },
8273
+ {
8274
+ "epoch": 0.9211864406779661,
8275
+ "grad_norm": 1.396519422531128,
8276
+ "learning_rate": 3.7605010067165216e-07,
8277
+ "loss": 0.016,
8278
+ "step": 1087
8279
+ },
8280
+ {
8281
+ "epoch": 0.9220338983050848,
8282
+ "grad_norm": 3.1301279067993164,
8283
+ "learning_rate": 3.68056193601749e-07,
8284
+ "loss": 0.0136,
8285
+ "step": 1088
8286
+ },
8287
+ {
8288
+ "epoch": 0.9228813559322034,
8289
+ "grad_norm": 1.233527421951294,
8290
+ "learning_rate": 3.601465742575061e-07,
8291
+ "loss": 0.0053,
8292
+ "step": 1089
8293
+ },
8294
+ {
8295
+ "epoch": 0.923728813559322,
8296
+ "grad_norm": 1.5494545698165894,
8297
+ "learning_rate": 3.5232131185484075e-07,
8298
+ "loss": 0.0119,
8299
+ "step": 1090
8300
+ },
8301
+ {
8302
+ "epoch": 0.9245762711864407,
8303
+ "grad_norm": 0.7201801538467407,
8304
+ "learning_rate": 3.445804748714732e-07,
8305
+ "loss": 0.0071,
8306
+ "step": 1091
8307
+ },
8308
+ {
8309
+ "epoch": 0.9254237288135593,
8310
+ "grad_norm": 2.4075450897216797,
8311
+ "learning_rate": 3.3692413104633226e-07,
8312
+ "loss": 0.0131,
8313
+ "step": 1092
8314
+ },
8315
+ {
8316
+ "epoch": 0.926271186440678,
8317
+ "grad_norm": 0.0901576578617096,
8318
+ "learning_rate": 3.2935234737895837e-07,
8319
+ "loss": 0.0005,
8320
+ "step": 1093
8321
+ },
8322
+ {
8323
+ "epoch": 0.9271186440677966,
8324
+ "grad_norm": 0.39205268025398254,
8325
+ "learning_rate": 3.2186519012891823e-07,
8326
+ "loss": 0.0027,
8327
+ "step": 1094
8328
+ },
8329
+ {
8330
+ "epoch": 0.9279661016949152,
8331
+ "grad_norm": 1.3817288875579834,
8332
+ "learning_rate": 3.1446272481522544e-07,
8333
+ "loss": 0.0158,
8334
+ "step": 1095
8335
+ },
8336
+ {
8337
+ "epoch": 0.9288135593220339,
8338
+ "grad_norm": 2.032911777496338,
8339
+ "learning_rate": 3.071450162157663e-07,
8340
+ "loss": 0.0121,
8341
+ "step": 1096
8342
+ },
8343
+ {
8344
+ "epoch": 0.9296610169491526,
8345
+ "grad_norm": 1.8076006174087524,
8346
+ "learning_rate": 2.999121283667339e-07,
8347
+ "loss": 0.0057,
8348
+ "step": 1097
8349
+ },
8350
+ {
8351
+ "epoch": 0.9305084745762712,
8352
+ "grad_norm": 1.118698000907898,
8353
+ "learning_rate": 2.9276412456206826e-07,
8354
+ "loss": 0.0037,
8355
+ "step": 1098
8356
+ },
8357
+ {
8358
+ "epoch": 0.9313559322033899,
8359
+ "grad_norm": 0.7791540026664734,
8360
+ "learning_rate": 2.857010673529015e-07,
8361
+ "loss": 0.0063,
8362
+ "step": 1099
8363
+ },
8364
+ {
8365
+ "epoch": 0.9322033898305084,
8366
+ "grad_norm": 1.4591788053512573,
8367
+ "learning_rate": 2.78723018547008e-07,
8368
+ "loss": 0.0058,
8369
+ "step": 1100
8370
+ },
8371
+ {
8372
+ "epoch": 0.9322033898305084,
8373
+ "eval_accuracy": 1.0,
8374
+ "eval_f1": 1.0,
8375
+ "eval_loss": 0.00010077878687297925,
8376
+ "eval_precision": 1.0,
8377
+ "eval_recall": 1.0,
8378
+ "eval_runtime": 51.6426,
8379
+ "eval_samples_per_second": 5.654,
8380
+ "eval_steps_per_second": 0.194,
8381
+ "step": 1100
8382
  }
8383
  ],
8384
  "logging_steps": 1,
 
8398
  "attributes": {}
8399
  }
8400
  },
8401
+ "total_flos": 3.384126928028959e+17,
8402
  "train_batch_size": 8,
8403
  "trial_name": null,
8404
  "trial_params": null