mtzig commited on
Commit
da56f8a
·
verified ·
1 Parent(s): 82a89ac

Training in progress, step 1078, checkpoint

Browse files
last-checkpoint/optimizer_0/.metadata CHANGED
Binary files a/last-checkpoint/optimizer_0/.metadata and b/last-checkpoint/optimizer_0/.metadata differ
 
last-checkpoint/optimizer_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a352f0d84009b1817ea378a4704c01130220431cda057a719176edb53b9ce38
3
  size 13934748
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1423094be38f2b5ae113f9990f9d961754f62cae91e49b685bfcf23e02eb7bb
3
  size 13934748
last-checkpoint/optimizer_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:192c82f34e86d685c6f351fd58c1000ddea9a13d640195ac79c49fbf42423aa5
3
  size 13999412
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08f90b95cf50f53920f37b950eff873dabfa8ac64d13dc56f42397849a07fc07
3
  size 13999412
last-checkpoint/optimizer_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b90ca6c9a0d45f633e326ad429b79dcb8a229254c394c0026c58947de8b6ccb1
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd93a0a0d0644200d9fcbccac4f34181e77bde63b6d95229e509a8a350855523
3
  size 13990904
last-checkpoint/optimizer_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1ddcd3b678ecc28638f1325c2c32db98cad1876b80914907eec102e20d65888
3
  size 13990904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8ecc9cc52a997dd5614bbdc7880c6e20c1e932861d4d32c5e916f3e3d25f364
3
  size 13990904
last-checkpoint/pytorch_model_fsdp_0/.metadata CHANGED
Binary files a/last-checkpoint/pytorch_model_fsdp_0/.metadata and b/last-checkpoint/pytorch_model_fsdp_0/.metadata differ
 
last-checkpoint/pytorch_model_fsdp_0/__0_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:441eb9b06b4fc0f3fa0a9291de25b8426d0d9f412df64f69773da2db1b4860b2
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e17140212e09dfb8383f39aada365a791d0a38991ff14289444e40e32fbaa42f
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__1_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01659b87d6d23358ab75fa4077af9feedf08b369b1c157aa83e98851b9c0d1ee
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:558da04d338fb499d79f7280741df812318a3c5c4aee5a107c7093df843fc4c8
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__2_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71450373e32f8a9a1b7bd7c09bbf7665cd2aab9935d9141b9e0d70c0fce7c3de
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4916f4007500b9580d9f5d30067dea1660a0a0d57759318fb72d6b96562ab22b
3
  size 6966784
last-checkpoint/pytorch_model_fsdp_0/__3_0.distcp CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:173eff09d590e65fe2dd1179e23f7fb059beaf649179bf2d537bde02e80545b0
3
  size 6966784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0548992fa9f7a727bc6d76af19be130091084cba363ceab858c1d0bb1f9cc034
3
  size 6966784
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6afe62f64f980792c5f93908f1252e0efd7d9d6dd9a401096016c0cf0f6e9df7
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13e6011477fddf03495b9f86e6d1296fafc3690e147017eae3cf4b4b45062459
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c96a88229c7cf8988c09092a9afef0bd222230400623a17d132e957aa024720
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8280d014b0e56eff05e43f5a8c43d0264e1d5386ee80569ca7b06fed8e39dd5
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50d2280d0785bc9b8dd3a1397de7a4d5f6e608d8e08010244249962de0f0c423
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc32ead699b38a508a624a41fb823b6da5d825da75c4d29d42299e951670eefa
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05e89f8a1132e0f0def133732be826c04d18fb1ddc8e499809e4f481802df182
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ff9068623665c0763ac309d5165fb01837d68f9e74aa1848d43a5612394a0e9
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ead26a1aba46fa0b3384e323e0349ee0e9c3d6b20dad4ce8e9c9bf15675155cc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:706495f69aeec71fe1992742f4e3f2241879821efb572c1f0819a3bb4ed3108f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9276437847866419,
5
  "eval_steps": 20,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7619,6 +7619,588 @@
7619
  "eval_samples_per_second": 5.766,
7620
  "eval_steps_per_second": 0.188,
7621
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7622
  }
7623
  ],
7624
  "logging_steps": 1,
@@ -7633,12 +8215,12 @@
7633
  "should_evaluate": false,
7634
  "should_log": false,
7635
  "should_save": true,
7636
- "should_training_stop": false
7637
  },
7638
  "attributes": {}
7639
  }
7640
  },
7641
- "total_flos": 3.198993040534405e+17,
7642
  "train_batch_size": 8,
7643
  "trial_name": null,
7644
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 20,
6
+ "global_step": 1078,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7619
  "eval_samples_per_second": 5.766,
7620
  "eval_steps_per_second": 0.188,
7621
  "step": 1000
7622
+ },
7623
+ {
7624
+ "epoch": 0.9285714285714286,
7625
+ "grad_norm": 6.499805927276611,
7626
+ "learning_rate": 3.093540055676958e-07,
7627
+ "loss": 0.2015,
7628
+ "step": 1001
7629
+ },
7630
+ {
7631
+ "epoch": 0.9294990723562152,
7632
+ "grad_norm": 4.742324352264404,
7633
+ "learning_rate": 3.0141136285129825e-07,
7634
+ "loss": 0.1585,
7635
+ "step": 1002
7636
+ },
7637
+ {
7638
+ "epoch": 0.9304267161410018,
7639
+ "grad_norm": 4.395940780639648,
7640
+ "learning_rate": 2.935704537404083e-07,
7641
+ "loss": 0.1249,
7642
+ "step": 1003
7643
+ },
7644
+ {
7645
+ "epoch": 0.9313543599257885,
7646
+ "grad_norm": 3.036573886871338,
7647
+ "learning_rate": 2.8583136048245697e-07,
7648
+ "loss": 0.1331,
7649
+ "step": 1004
7650
+ },
7651
+ {
7652
+ "epoch": 0.9322820037105751,
7653
+ "grad_norm": 4.401485919952393,
7654
+ "learning_rate": 2.781941642568686e-07,
7655
+ "loss": 0.2138,
7656
+ "step": 1005
7657
+ },
7658
+ {
7659
+ "epoch": 0.9332096474953617,
7660
+ "grad_norm": 4.973133087158203,
7661
+ "learning_rate": 2.706589451742181e-07,
7662
+ "loss": 0.2253,
7663
+ "step": 1006
7664
+ },
7665
+ {
7666
+ "epoch": 0.9341372912801484,
7667
+ "grad_norm": 6.711733818054199,
7668
+ "learning_rate": 2.632257822753881e-07,
7669
+ "loss": 0.2465,
7670
+ "step": 1007
7671
+ },
7672
+ {
7673
+ "epoch": 0.935064935064935,
7674
+ "grad_norm": 3.2245848178863525,
7675
+ "learning_rate": 2.5589475353073987e-07,
7676
+ "loss": 0.1524,
7677
+ "step": 1008
7678
+ },
7679
+ {
7680
+ "epoch": 0.9359925788497218,
7681
+ "grad_norm": 3.8495306968688965,
7682
+ "learning_rate": 2.486659358392951e-07,
7683
+ "loss": 0.1646,
7684
+ "step": 1009
7685
+ },
7686
+ {
7687
+ "epoch": 0.9369202226345084,
7688
+ "grad_norm": 5.713381290435791,
7689
+ "learning_rate": 2.4153940502793185e-07,
7690
+ "loss": 0.2161,
7691
+ "step": 1010
7692
+ },
7693
+ {
7694
+ "epoch": 0.937847866419295,
7695
+ "grad_norm": 3.7789957523345947,
7696
+ "learning_rate": 2.3451523585058756e-07,
7697
+ "loss": 0.1509,
7698
+ "step": 1011
7699
+ },
7700
+ {
7701
+ "epoch": 0.9387755102040817,
7702
+ "grad_norm": 4.0073652267456055,
7703
+ "learning_rate": 2.2759350198746978e-07,
7704
+ "loss": 0.1402,
7705
+ "step": 1012
7706
+ },
7707
+ {
7708
+ "epoch": 0.9397031539888683,
7709
+ "grad_norm": 3.3210415840148926,
7710
+ "learning_rate": 2.2077427604429435e-07,
7711
+ "loss": 0.1431,
7712
+ "step": 1013
7713
+ },
7714
+ {
7715
+ "epoch": 0.9406307977736549,
7716
+ "grad_norm": 5.2902750968933105,
7717
+ "learning_rate": 2.1405762955151178e-07,
7718
+ "loss": 0.1239,
7719
+ "step": 1014
7720
+ },
7721
+ {
7722
+ "epoch": 0.9415584415584416,
7723
+ "grad_norm": 5.931840419769287,
7724
+ "learning_rate": 2.0744363296356872e-07,
7725
+ "loss": 0.1965,
7726
+ "step": 1015
7727
+ },
7728
+ {
7729
+ "epoch": 0.9424860853432282,
7730
+ "grad_norm": 3.9065020084381104,
7731
+ "learning_rate": 2.009323556581566e-07,
7732
+ "loss": 0.1344,
7733
+ "step": 1016
7734
+ },
7735
+ {
7736
+ "epoch": 0.9434137291280148,
7737
+ "grad_norm": 3.194225311279297,
7738
+ "learning_rate": 1.9452386593549534e-07,
7739
+ "loss": 0.0979,
7740
+ "step": 1017
7741
+ },
7742
+ {
7743
+ "epoch": 0.9443413729128015,
7744
+ "grad_norm": 3.794304847717285,
7745
+ "learning_rate": 1.8821823101760949e-07,
7746
+ "loss": 0.2038,
7747
+ "step": 1018
7748
+ },
7749
+ {
7750
+ "epoch": 0.9452690166975881,
7751
+ "grad_norm": 3.638219118118286,
7752
+ "learning_rate": 1.8201551704762453e-07,
7753
+ "loss": 0.1254,
7754
+ "step": 1019
7755
+ },
7756
+ {
7757
+ "epoch": 0.9461966604823747,
7758
+ "grad_norm": 4.820856094360352,
7759
+ "learning_rate": 1.7591578908907724e-07,
7760
+ "loss": 0.1909,
7761
+ "step": 1020
7762
+ },
7763
+ {
7764
+ "epoch": 0.9461966604823747,
7765
+ "eval_accuracy": 0.8580931263858093,
7766
+ "eval_f1": 0.6966824644549763,
7767
+ "eval_loss": 0.3055438697338104,
7768
+ "eval_precision": 0.8698224852071006,
7769
+ "eval_recall": 0.5810276679841897,
7770
+ "eval_runtime": 47.7431,
7771
+ "eval_samples_per_second": 5.781,
7772
+ "eval_steps_per_second": 0.189,
7773
+ "step": 1020
7774
+ },
7775
+ {
7776
+ "epoch": 0.9471243042671614,
7777
+ "grad_norm": 3.5032293796539307,
7778
+ "learning_rate": 1.699191111252241e-07,
7779
+ "loss": 0.077,
7780
+ "step": 1021
7781
+ },
7782
+ {
7783
+ "epoch": 0.948051948051948,
7784
+ "grad_norm": 3.7761011123657227,
7785
+ "learning_rate": 1.6402554605838173e-07,
7786
+ "loss": 0.1564,
7787
+ "step": 1022
7788
+ },
7789
+ {
7790
+ "epoch": 0.9489795918367347,
7791
+ "grad_norm": 5.951882839202881,
7792
+ "learning_rate": 1.5823515570925763e-07,
7793
+ "loss": 0.2323,
7794
+ "step": 1023
7795
+ },
7796
+ {
7797
+ "epoch": 0.9499072356215214,
7798
+ "grad_norm": 4.935650825500488,
7799
+ "learning_rate": 1.5254800081630828e-07,
7800
+ "loss": 0.2172,
7801
+ "step": 1024
7802
+ },
7803
+ {
7804
+ "epoch": 0.950834879406308,
7805
+ "grad_norm": 5.279281139373779,
7806
+ "learning_rate": 1.469641410350964e-07,
7807
+ "loss": 0.1845,
7808
+ "step": 1025
7809
+ },
7810
+ {
7811
+ "epoch": 0.9517625231910947,
7812
+ "grad_norm": 5.034005641937256,
7813
+ "learning_rate": 1.4148363493766803e-07,
7814
+ "loss": 0.174,
7815
+ "step": 1026
7816
+ },
7817
+ {
7818
+ "epoch": 0.9526901669758813,
7819
+ "grad_norm": 3.964360237121582,
7820
+ "learning_rate": 1.361065400119399e-07,
7821
+ "loss": 0.0888,
7822
+ "step": 1027
7823
+ },
7824
+ {
7825
+ "epoch": 0.9536178107606679,
7826
+ "grad_norm": 4.862616062164307,
7827
+ "learning_rate": 1.30832912661093e-07,
7828
+ "loss": 0.2007,
7829
+ "step": 1028
7830
+ },
7831
+ {
7832
+ "epoch": 0.9545454545454546,
7833
+ "grad_norm": 4.140252590179443,
7834
+ "learning_rate": 1.2566280820298427e-07,
7835
+ "loss": 0.13,
7836
+ "step": 1029
7837
+ },
7838
+ {
7839
+ "epoch": 0.9554730983302412,
7840
+ "grad_norm": 5.299205780029297,
7841
+ "learning_rate": 1.2059628086956044e-07,
7842
+ "loss": 0.1795,
7843
+ "step": 1030
7844
+ },
7845
+ {
7846
+ "epoch": 0.9564007421150278,
7847
+ "grad_norm": 5.694372653961182,
7848
+ "learning_rate": 1.1563338380629618e-07,
7849
+ "loss": 0.2278,
7850
+ "step": 1031
7851
+ },
7852
+ {
7853
+ "epoch": 0.9573283858998145,
7854
+ "grad_norm": 4.10621452331543,
7855
+ "learning_rate": 1.1077416907163573e-07,
7856
+ "loss": 0.1832,
7857
+ "step": 1032
7858
+ },
7859
+ {
7860
+ "epoch": 0.9582560296846011,
7861
+ "grad_norm": 3.0105836391448975,
7862
+ "learning_rate": 1.0601868763643997e-07,
7863
+ "loss": 0.1126,
7864
+ "step": 1033
7865
+ },
7866
+ {
7867
+ "epoch": 0.9591836734693877,
7868
+ "grad_norm": 5.412391185760498,
7869
+ "learning_rate": 1.0136698938346012e-07,
7870
+ "loss": 0.1996,
7871
+ "step": 1034
7872
+ },
7873
+ {
7874
+ "epoch": 0.9601113172541744,
7875
+ "grad_norm": 3.507596492767334,
7876
+ "learning_rate": 9.68191231068083e-08,
7877
+ "loss": 0.1647,
7878
+ "step": 1035
7879
+ },
7880
+ {
7881
+ "epoch": 0.961038961038961,
7882
+ "grad_norm": 4.733442783355713,
7883
+ "learning_rate": 9.237513651145224e-08,
7884
+ "loss": 0.102,
7885
+ "step": 1036
7886
+ },
7887
+ {
7888
+ "epoch": 0.9619666048237476,
7889
+ "grad_norm": 6.855641841888428,
7890
+ "learning_rate": 8.80350762127058e-08,
7891
+ "loss": 0.1197,
7892
+ "step": 1037
7893
+ },
7894
+ {
7895
+ "epoch": 0.9628942486085343,
7896
+ "grad_norm": 3.809262275695801,
7897
+ "learning_rate": 8.379898773574924e-08,
7898
+ "loss": 0.1287,
7899
+ "step": 1038
7900
+ },
7901
+ {
7902
+ "epoch": 0.963821892393321,
7903
+ "grad_norm": 3.4764761924743652,
7904
+ "learning_rate": 7.966691551514527e-08,
7905
+ "loss": 0.1106,
7906
+ "step": 1039
7907
+ },
7908
+ {
7909
+ "epoch": 0.9647495361781077,
7910
+ "grad_norm": 5.395627021789551,
7911
+ "learning_rate": 7.563890289437825e-08,
7912
+ "loss": 0.2017,
7913
+ "step": 1040
7914
+ },
7915
+ {
7916
+ "epoch": 0.9647495361781077,
7917
+ "eval_accuracy": 0.8580931263858093,
7918
+ "eval_f1": 0.6952380952380952,
7919
+ "eval_loss": 0.30581432580947876,
7920
+ "eval_precision": 0.874251497005988,
7921
+ "eval_recall": 0.5770750988142292,
7922
+ "eval_runtime": 48.746,
7923
+ "eval_samples_per_second": 5.662,
7924
+ "eval_steps_per_second": 0.185,
7925
+ "step": 1040
7926
+ },
7927
+ {
7928
+ "epoch": 0.9656771799628943,
7929
+ "grad_norm": 3.9932026863098145,
7930
+ "learning_rate": 7.171499212539124e-08,
7931
+ "loss": 0.1513,
7932
+ "step": 1041
7933
+ },
7934
+ {
7935
+ "epoch": 0.9666048237476809,
7936
+ "grad_norm": 5.8107075691223145,
7937
+ "learning_rate": 6.78952243681541e-08,
7938
+ "loss": 0.172,
7939
+ "step": 1042
7940
+ },
7941
+ {
7942
+ "epoch": 0.9675324675324676,
7943
+ "grad_norm": 3.7357017993927,
7944
+ "learning_rate": 6.417963969022389e-08,
7945
+ "loss": 0.1422,
7946
+ "step": 1043
7947
+ },
7948
+ {
7949
+ "epoch": 0.9684601113172542,
7950
+ "grad_norm": 3.854876756668091,
7951
+ "learning_rate": 6.056827706632185e-08,
7952
+ "loss": 0.1587,
7953
+ "step": 1044
7954
+ },
7955
+ {
7956
+ "epoch": 0.9693877551020408,
7957
+ "grad_norm": 6.006348133087158,
7958
+ "learning_rate": 5.7061174377937015e-08,
7959
+ "loss": 0.2244,
7960
+ "step": 1045
7961
+ },
7962
+ {
7963
+ "epoch": 0.9703153988868275,
7964
+ "grad_norm": 4.745636463165283,
7965
+ "learning_rate": 5.365836841291439e-08,
7966
+ "loss": 0.1803,
7967
+ "step": 1046
7968
+ },
7969
+ {
7970
+ "epoch": 0.9712430426716141,
7971
+ "grad_norm": 3.8510711193084717,
7972
+ "learning_rate": 5.035989486508075e-08,
7973
+ "loss": 0.1635,
7974
+ "step": 1047
7975
+ },
7976
+ {
7977
+ "epoch": 0.9721706864564007,
7978
+ "grad_norm": 5.504276752471924,
7979
+ "learning_rate": 4.716578833386054e-08,
7980
+ "loss": 0.1517,
7981
+ "step": 1048
7982
+ },
7983
+ {
7984
+ "epoch": 0.9730983302411874,
7985
+ "grad_norm": 3.3400299549102783,
7986
+ "learning_rate": 4.4076082323920576e-08,
7987
+ "loss": 0.1494,
7988
+ "step": 1049
7989
+ },
7990
+ {
7991
+ "epoch": 0.974025974025974,
7992
+ "grad_norm": 5.584471225738525,
7993
+ "learning_rate": 4.109080924481479e-08,
7994
+ "loss": 0.1781,
7995
+ "step": 1050
7996
+ },
7997
+ {
7998
+ "epoch": 0.9749536178107606,
7999
+ "grad_norm": 4.575666904449463,
8000
+ "learning_rate": 3.82100004106456e-08,
8001
+ "loss": 0.1298,
8002
+ "step": 1051
8003
+ },
8004
+ {
8005
+ "epoch": 0.9758812615955473,
8006
+ "grad_norm": 3.4456560611724854,
8007
+ "learning_rate": 3.543368603973529e-08,
8008
+ "loss": 0.1292,
8009
+ "step": 1052
8010
+ },
8011
+ {
8012
+ "epoch": 0.9768089053803339,
8013
+ "grad_norm": 2.841853618621826,
8014
+ "learning_rate": 3.2761895254306285e-08,
8015
+ "loss": 0.076,
8016
+ "step": 1053
8017
+ },
8018
+ {
8019
+ "epoch": 0.9777365491651205,
8020
+ "grad_norm": 4.662397384643555,
8021
+ "learning_rate": 3.019465608018024e-08,
8022
+ "loss": 0.2181,
8023
+ "step": 1054
8024
+ },
8025
+ {
8026
+ "epoch": 0.9786641929499073,
8027
+ "grad_norm": 3.259526014328003,
8028
+ "learning_rate": 2.773199544648164e-08,
8029
+ "loss": 0.108,
8030
+ "step": 1055
8031
+ },
8032
+ {
8033
+ "epoch": 0.9795918367346939,
8034
+ "grad_norm": 4.32330322265625,
8035
+ "learning_rate": 2.537393918535358e-08,
8036
+ "loss": 0.2068,
8037
+ "step": 1056
8038
+ },
8039
+ {
8040
+ "epoch": 0.9805194805194806,
8041
+ "grad_norm": 4.7618536949157715,
8042
+ "learning_rate": 2.312051203169352e-08,
8043
+ "loss": 0.1936,
8044
+ "step": 1057
8045
+ },
8046
+ {
8047
+ "epoch": 0.9814471243042672,
8048
+ "grad_norm": 4.779612064361572,
8049
+ "learning_rate": 2.0971737622883515e-08,
8050
+ "loss": 0.1007,
8051
+ "step": 1058
8052
+ },
8053
+ {
8054
+ "epoch": 0.9823747680890538,
8055
+ "grad_norm": 4.346301078796387,
8056
+ "learning_rate": 1.8927638498551502e-08,
8057
+ "loss": 0.1594,
8058
+ "step": 1059
8059
+ },
8060
+ {
8061
+ "epoch": 0.9833024118738405,
8062
+ "grad_norm": 4.017016410827637,
8063
+ "learning_rate": 1.698823610032929e-08,
8064
+ "loss": 0.1828,
8065
+ "step": 1060
8066
+ },
8067
+ {
8068
+ "epoch": 0.9833024118738405,
8069
+ "eval_accuracy": 0.8603104212860311,
8070
+ "eval_f1": 0.7028301886792453,
8071
+ "eval_loss": 0.306577205657959,
8072
+ "eval_precision": 0.8713450292397661,
8073
+ "eval_recall": 0.5889328063241107,
8074
+ "eval_runtime": 47.7001,
8075
+ "eval_samples_per_second": 5.786,
8076
+ "eval_steps_per_second": 0.189,
8077
+ "step": 1060
8078
+ },
8079
+ {
8080
+ "epoch": 0.9842300556586271,
8081
+ "grad_norm": 5.6250505447387695,
8082
+ "learning_rate": 1.5153550771630498e-08,
8083
+ "loss": 0.17,
8084
+ "step": 1061
8085
+ },
8086
+ {
8087
+ "epoch": 0.9851576994434137,
8088
+ "grad_norm": 5.426425933837891,
8089
+ "learning_rate": 1.3423601757436289e-08,
8090
+ "loss": 0.2122,
8091
+ "step": 1062
8092
+ },
8093
+ {
8094
+ "epoch": 0.9860853432282004,
8095
+ "grad_norm": 4.71135950088501,
8096
+ "learning_rate": 1.179840720409331e-08,
8097
+ "loss": 0.1715,
8098
+ "step": 1063
8099
+ },
8100
+ {
8101
+ "epoch": 0.987012987012987,
8102
+ "grad_norm": 4.350978851318359,
8103
+ "learning_rate": 1.0277984159122734e-08,
8104
+ "loss": 0.1704,
8105
+ "step": 1064
8106
+ },
8107
+ {
8108
+ "epoch": 0.9879406307977736,
8109
+ "grad_norm": 5.211360931396484,
8110
+ "learning_rate": 8.862348571043733e-09,
8111
+ "loss": 0.166,
8112
+ "step": 1065
8113
+ },
8114
+ {
8115
+ "epoch": 0.9888682745825603,
8116
+ "grad_norm": 4.015779495239258,
8117
+ "learning_rate": 7.551515289203615e-09,
8118
+ "loss": 0.1616,
8119
+ "step": 1066
8120
+ },
8121
+ {
8122
+ "epoch": 0.9897959183673469,
8123
+ "grad_norm": 4.356948375701904,
8124
+ "learning_rate": 6.345498063622391e-09,
8125
+ "loss": 0.1961,
8126
+ "step": 1067
8127
+ },
8128
+ {
8129
+ "epoch": 0.9907235621521335,
8130
+ "grad_norm": 6.508297920227051,
8131
+ "learning_rate": 5.2443095448506674e-09,
8132
+ "loss": 0.1935,
8133
+ "step": 1068
8134
+ },
8135
+ {
8136
+ "epoch": 0.9916512059369202,
8137
+ "grad_norm": 4.595229625701904,
8138
+ "learning_rate": 4.247961283835311e-09,
8139
+ "loss": 0.2116,
8140
+ "step": 1069
8141
+ },
8142
+ {
8143
+ "epoch": 0.9925788497217068,
8144
+ "grad_norm": 4.39501428604126,
8145
+ "learning_rate": 3.3564637317984318e-09,
8146
+ "loss": 0.1568,
8147
+ "step": 1070
8148
+ },
8149
+ {
8150
+ "epoch": 0.9935064935064936,
8151
+ "grad_norm": 4.391909599304199,
8152
+ "learning_rate": 2.5698262401263607e-09,
8153
+ "loss": 0.1553,
8154
+ "step": 1071
8155
+ },
8156
+ {
8157
+ "epoch": 0.9944341372912802,
8158
+ "grad_norm": 3.195699453353882,
8159
+ "learning_rate": 1.888057060274173e-09,
8160
+ "loss": 0.1469,
8161
+ "step": 1072
8162
+ },
8163
+ {
8164
+ "epoch": 0.9953617810760668,
8165
+ "grad_norm": 5.479938507080078,
8166
+ "learning_rate": 1.3111633436779792e-09,
8167
+ "loss": 0.1662,
8168
+ "step": 1073
8169
+ },
8170
+ {
8171
+ "epoch": 0.9962894248608535,
8172
+ "grad_norm": 4.181588172912598,
8173
+ "learning_rate": 8.391511416816489e-10,
8174
+ "loss": 0.1746,
8175
+ "step": 1074
8176
+ },
8177
+ {
8178
+ "epoch": 0.9972170686456401,
8179
+ "grad_norm": 4.144800662994385,
8180
+ "learning_rate": 4.720254054679796e-10,
8181
+ "loss": 0.1624,
8182
+ "step": 1075
8183
+ },
8184
+ {
8185
+ "epoch": 0.9981447124304267,
8186
+ "grad_norm": 3.857682228088379,
8187
+ "learning_rate": 2.0978998601206558e-10,
8188
+ "loss": 0.1293,
8189
+ "step": 1076
8190
+ },
8191
+ {
8192
+ "epoch": 0.9990723562152134,
8193
+ "grad_norm": 5.734769344329834,
8194
+ "learning_rate": 5.244763404133046e-11,
8195
+ "loss": 0.1897,
8196
+ "step": 1077
8197
+ },
8198
+ {
8199
+ "epoch": 1.0,
8200
+ "grad_norm": 3.7049508094787598,
8201
+ "learning_rate": 0.0,
8202
+ "loss": 0.1706,
8203
+ "step": 1078
8204
  }
8205
  ],
8206
  "logging_steps": 1,
 
8215
  "should_evaluate": false,
8216
  "should_log": false,
8217
  "should_save": true,
8218
+ "should_training_stop": true
8219
  },
8220
  "attributes": {}
8221
  }
8222
  },
8223
+ "total_flos": 3.4499307937307034e+17,
8224
  "train_batch_size": 8,
8225
  "trial_name": null,
8226
  "trial_params": null