CocoRoF commited on
Commit
bf077c5
·
verified ·
1 Parent(s): bdcd2d3

Training in progress, step 10670, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb9f3b8a1efbafead71f0b4f3c0934de3258af1a9bfad9aecc866f7dc032377f
3
  size 738367848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3745bf4a87f162fa13cf355e199ab846b0247a904a1213366e15055a6bf2f43a
3
  size 738367848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:090bb424ceafc5d15809a08a16a11eb78b1ac54f4b1366d08eb4391ce4040896
3
  size 1476823354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d1ed18d6fe47a085d91a34b52f0b0e9d63181b84ee4c405b93d5c8e09294fad
3
  size 1476823354
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c383f9e8151a96a9b2b8c275978c19aa387d72a92b0fa7ffae9836fb29ad4e1
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e73f3d2830bcf37e3d736f23b27e8ce733b3473d4cab28360690d45ce9f8fbaa
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c68bb140caa20e97fbacbd7b5bfac9f50a34da20ffb8898607809de5338939b7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0272fe6cd9dc84504ed48d7b132ccc945c2d44d9831efb836fdb17160ecec1c1
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84b2b03148e735c06e96e9718897d377bb259c4fc8d0d7eac4359e0df9fd59c3
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b990e0a8c37f6931ac353c659c83107fdc7ef191bb09facaeb9644d6874f096
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb82703e57f841f914dfb29dc3442d88d2c174cf8ce56f91f0c9c5f2849c5754
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c914e450f9b754db2d81a37a948ec5a8925105ae3c9bf9546862109977afdeb
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58d69503f206ccaaa32432817fe07a0b2fe6f226f63d9d38c4bb47f2804049c1
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d26231e7958cdd63301effbce7fbbecf14c30b13c5cf7b6c8e00c1f8efd5317
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e0fdfa3710c3ff050391030ae78220221b31a31e2ceea64687f7a428110d141
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0d7d94f216995135aa7d9b310b13d4e41fe010a347e4abe474e09fb0b16e836
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a7b71b6ee08059a6838f5a634279837dec0a6f331500089354f5a30e88cd0b0
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c3ac9632d99d55f1b6c9a1a327344e69abc94e4998731b976e138a7dfbb679e
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b12210119d32a8e51c4b367e898fd3bee7de6dc9d70f1258ce5df806569ea8a4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f742e8fccfd64912166b4c2a12a9757a7223df0c62ca19cb8c5ef3c4dd1595a8
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cc7c9d1278bb28457a3d5ee38d783399f75651e4b34a536f0367d5f3082f3b4
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a60d80d2f480f1553b178ffde691b3d8251b3da0f0ce4460a958add33beb93ab
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.372071227741332,
5
  "eval_steps": 250,
6
- "global_step": 10000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7647,6 +7647,507 @@
7647
  "eval_spearman_manhattan": 0.7421316928799319,
7648
  "eval_steps_per_second": 7.269,
7649
  "step": 10000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7650
  }
7651
  ],
7652
  "logging_steps": 10,
@@ -7661,7 +8162,7 @@
7661
  "should_evaluate": false,
7662
  "should_log": false,
7663
  "should_save": true,
7664
- "should_training_stop": false
7665
  },
7666
  "attributes": {}
7667
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
  "eval_steps": 250,
6
+ "global_step": 10670,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7647
  "eval_spearman_manhattan": 0.7421316928799319,
7648
  "eval_steps_per_second": 7.269,
7649
  "step": 10000
7650
+ },
7651
+ {
7652
+ "epoch": 9.381443298969073,
7653
+ "grad_norm": 0.4994644522666931,
7654
+ "learning_rate": 9.926708547496669e-06,
7655
+ "loss": 0.0226,
7656
+ "step": 10010
7657
+ },
7658
+ {
7659
+ "epoch": 9.390815370196814,
7660
+ "grad_norm": 1.5270389318466187,
7661
+ "learning_rate": 9.926635329262401e-06,
7662
+ "loss": 0.0211,
7663
+ "step": 10020
7664
+ },
7665
+ {
7666
+ "epoch": 9.400187441424555,
7667
+ "grad_norm": 0.47197312116622925,
7668
+ "learning_rate": 9.92656211102813e-06,
7669
+ "loss": 0.0235,
7670
+ "step": 10030
7671
+ },
7672
+ {
7673
+ "epoch": 9.409559512652296,
7674
+ "grad_norm": 1.132454752922058,
7675
+ "learning_rate": 9.926488892793863e-06,
7676
+ "loss": 0.023,
7677
+ "step": 10040
7678
+ },
7679
+ {
7680
+ "epoch": 9.418931583880038,
7681
+ "grad_norm": 0.7693812251091003,
7682
+ "learning_rate": 9.926415674559593e-06,
7683
+ "loss": 0.0247,
7684
+ "step": 10050
7685
+ },
7686
+ {
7687
+ "epoch": 9.42830365510778,
7688
+ "grad_norm": 0.42411306500434875,
7689
+ "learning_rate": 9.926342456325324e-06,
7690
+ "loss": 0.0234,
7691
+ "step": 10060
7692
+ },
7693
+ {
7694
+ "epoch": 9.43767572633552,
7695
+ "grad_norm": 0.9110538959503174,
7696
+ "learning_rate": 9.926269238091055e-06,
7697
+ "loss": 0.0256,
7698
+ "step": 10070
7699
+ },
7700
+ {
7701
+ "epoch": 9.447047797563261,
7702
+ "grad_norm": 0.6932746171951294,
7703
+ "learning_rate": 9.926196019856786e-06,
7704
+ "loss": 0.0288,
7705
+ "step": 10080
7706
+ },
7707
+ {
7708
+ "epoch": 9.456419868791002,
7709
+ "grad_norm": 0.6196317076683044,
7710
+ "learning_rate": 9.926122801622516e-06,
7711
+ "loss": 0.0239,
7712
+ "step": 10090
7713
+ },
7714
+ {
7715
+ "epoch": 9.465791940018745,
7716
+ "grad_norm": 0.6985231637954712,
7717
+ "learning_rate": 9.926049583388247e-06,
7718
+ "loss": 0.0194,
7719
+ "step": 10100
7720
+ },
7721
+ {
7722
+ "epoch": 9.475164011246486,
7723
+ "grad_norm": 0.8828220963478088,
7724
+ "learning_rate": 9.925976365153978e-06,
7725
+ "loss": 0.0282,
7726
+ "step": 10110
7727
+ },
7728
+ {
7729
+ "epoch": 9.484536082474227,
7730
+ "grad_norm": 0.3887142241001129,
7731
+ "learning_rate": 9.92590314691971e-06,
7732
+ "loss": 0.0231,
7733
+ "step": 10120
7734
+ },
7735
+ {
7736
+ "epoch": 9.493908153701968,
7737
+ "grad_norm": 0.696250855922699,
7738
+ "learning_rate": 9.925829928685441e-06,
7739
+ "loss": 0.0241,
7740
+ "step": 10130
7741
+ },
7742
+ {
7743
+ "epoch": 9.503280224929709,
7744
+ "grad_norm": 0.9591291546821594,
7745
+ "learning_rate": 9.925756710451172e-06,
7746
+ "loss": 0.0237,
7747
+ "step": 10140
7748
+ },
7749
+ {
7750
+ "epoch": 9.512652296157452,
7751
+ "grad_norm": 0.6247865557670593,
7752
+ "learning_rate": 9.925683492216903e-06,
7753
+ "loss": 0.0225,
7754
+ "step": 10150
7755
+ },
7756
+ {
7757
+ "epoch": 9.522024367385193,
7758
+ "grad_norm": 0.8061539530754089,
7759
+ "learning_rate": 9.925610273982633e-06,
7760
+ "loss": 0.0248,
7761
+ "step": 10160
7762
+ },
7763
+ {
7764
+ "epoch": 9.531396438612934,
7765
+ "grad_norm": 0.5681460499763489,
7766
+ "learning_rate": 9.925537055748364e-06,
7767
+ "loss": 0.0216,
7768
+ "step": 10170
7769
+ },
7770
+ {
7771
+ "epoch": 9.540768509840674,
7772
+ "grad_norm": 0.7798430323600769,
7773
+ "learning_rate": 9.925463837514095e-06,
7774
+ "loss": 0.0205,
7775
+ "step": 10180
7776
+ },
7777
+ {
7778
+ "epoch": 9.550140581068415,
7779
+ "grad_norm": 0.633307695388794,
7780
+ "learning_rate": 9.925390619279827e-06,
7781
+ "loss": 0.0257,
7782
+ "step": 10190
7783
+ },
7784
+ {
7785
+ "epoch": 9.559512652296158,
7786
+ "grad_norm": 0.5352799892425537,
7787
+ "learning_rate": 9.925317401045558e-06,
7788
+ "loss": 0.0214,
7789
+ "step": 10200
7790
+ },
7791
+ {
7792
+ "epoch": 9.5688847235239,
7793
+ "grad_norm": 1.4367021322250366,
7794
+ "learning_rate": 9.925244182811287e-06,
7795
+ "loss": 0.0245,
7796
+ "step": 10210
7797
+ },
7798
+ {
7799
+ "epoch": 9.57825679475164,
7800
+ "grad_norm": 0.6616729497909546,
7801
+ "learning_rate": 9.92517096457702e-06,
7802
+ "loss": 0.0168,
7803
+ "step": 10220
7804
+ },
7805
+ {
7806
+ "epoch": 9.587628865979381,
7807
+ "grad_norm": 0.5232043862342834,
7808
+ "learning_rate": 9.92509774634275e-06,
7809
+ "loss": 0.0229,
7810
+ "step": 10230
7811
+ },
7812
+ {
7813
+ "epoch": 9.597000937207122,
7814
+ "grad_norm": 0.5471720099449158,
7815
+ "learning_rate": 9.925024528108481e-06,
7816
+ "loss": 0.0244,
7817
+ "step": 10240
7818
+ },
7819
+ {
7820
+ "epoch": 9.606373008434865,
7821
+ "grad_norm": 0.8130425214767456,
7822
+ "learning_rate": 9.924951309874212e-06,
7823
+ "loss": 0.0243,
7824
+ "step": 10250
7825
+ },
7826
+ {
7827
+ "epoch": 9.606373008434865,
7828
+ "eval_loss": 0.037354420870542526,
7829
+ "eval_pearson_cosine": 0.7731273770332336,
7830
+ "eval_pearson_dot": 0.7302557826042175,
7831
+ "eval_pearson_euclidean": 0.7300422191619873,
7832
+ "eval_pearson_manhattan": 0.7321226596832275,
7833
+ "eval_runtime": 25.5048,
7834
+ "eval_samples_per_second": 58.813,
7835
+ "eval_spearman_cosine": 0.7727287355752905,
7836
+ "eval_spearman_dot": 0.7305929253470385,
7837
+ "eval_spearman_euclidean": 0.7346168467659768,
7838
+ "eval_spearman_manhattan": 0.7364009847987945,
7839
+ "eval_steps_per_second": 7.371,
7840
+ "step": 10250
7841
+ },
7842
+ {
7843
+ "epoch": 9.615745079662606,
7844
+ "grad_norm": 0.497060626745224,
7845
+ "learning_rate": 9.924878091639943e-06,
7846
+ "loss": 0.0217,
7847
+ "step": 10260
7848
+ },
7849
+ {
7850
+ "epoch": 9.625117150890347,
7851
+ "grad_norm": 0.985636830329895,
7852
+ "learning_rate": 9.924804873405673e-06,
7853
+ "loss": 0.0238,
7854
+ "step": 10270
7855
+ },
7856
+ {
7857
+ "epoch": 9.634489222118088,
7858
+ "grad_norm": 0.8833957314491272,
7859
+ "learning_rate": 9.924731655171404e-06,
7860
+ "loss": 0.0215,
7861
+ "step": 10280
7862
+ },
7863
+ {
7864
+ "epoch": 9.643861293345829,
7865
+ "grad_norm": 0.7223436832427979,
7866
+ "learning_rate": 9.924658436937137e-06,
7867
+ "loss": 0.0257,
7868
+ "step": 10290
7869
+ },
7870
+ {
7871
+ "epoch": 9.653233364573572,
7872
+ "grad_norm": 1.0917994976043701,
7873
+ "learning_rate": 9.924585218702867e-06,
7874
+ "loss": 0.0272,
7875
+ "step": 10300
7876
+ },
7877
+ {
7878
+ "epoch": 9.662605435801312,
7879
+ "grad_norm": 0.79998779296875,
7880
+ "learning_rate": 9.924512000468598e-06,
7881
+ "loss": 0.0232,
7882
+ "step": 10310
7883
+ },
7884
+ {
7885
+ "epoch": 9.671977507029053,
7886
+ "grad_norm": 0.9708638191223145,
7887
+ "learning_rate": 9.924438782234329e-06,
7888
+ "loss": 0.0214,
7889
+ "step": 10320
7890
+ },
7891
+ {
7892
+ "epoch": 9.681349578256794,
7893
+ "grad_norm": 0.5575175881385803,
7894
+ "learning_rate": 9.92436556400006e-06,
7895
+ "loss": 0.0256,
7896
+ "step": 10330
7897
+ },
7898
+ {
7899
+ "epoch": 9.690721649484535,
7900
+ "grad_norm": 1.2645318508148193,
7901
+ "learning_rate": 9.92429234576579e-06,
7902
+ "loss": 0.0276,
7903
+ "step": 10340
7904
+ },
7905
+ {
7906
+ "epoch": 9.700093720712278,
7907
+ "grad_norm": 0.6546396017074585,
7908
+ "learning_rate": 9.924219127531521e-06,
7909
+ "loss": 0.024,
7910
+ "step": 10350
7911
+ },
7912
+ {
7913
+ "epoch": 9.70946579194002,
7914
+ "grad_norm": 0.8439049124717712,
7915
+ "learning_rate": 9.924145909297252e-06,
7916
+ "loss": 0.0259,
7917
+ "step": 10360
7918
+ },
7919
+ {
7920
+ "epoch": 9.71883786316776,
7921
+ "grad_norm": 0.9637166261672974,
7922
+ "learning_rate": 9.924072691062984e-06,
7923
+ "loss": 0.0225,
7924
+ "step": 10370
7925
+ },
7926
+ {
7927
+ "epoch": 9.728209934395501,
7928
+ "grad_norm": 0.6104253530502319,
7929
+ "learning_rate": 9.923999472828713e-06,
7930
+ "loss": 0.0254,
7931
+ "step": 10380
7932
+ },
7933
+ {
7934
+ "epoch": 9.737582005623242,
7935
+ "grad_norm": 0.5664217472076416,
7936
+ "learning_rate": 9.923926254594444e-06,
7937
+ "loss": 0.0192,
7938
+ "step": 10390
7939
+ },
7940
+ {
7941
+ "epoch": 9.746954076850985,
7942
+ "grad_norm": 0.6904122233390808,
7943
+ "learning_rate": 9.923853036360176e-06,
7944
+ "loss": 0.0213,
7945
+ "step": 10400
7946
+ },
7947
+ {
7948
+ "epoch": 9.756326148078726,
7949
+ "grad_norm": 1.0864416360855103,
7950
+ "learning_rate": 9.923779818125907e-06,
7951
+ "loss": 0.0254,
7952
+ "step": 10410
7953
+ },
7954
+ {
7955
+ "epoch": 9.765698219306467,
7956
+ "grad_norm": 0.791348397731781,
7957
+ "learning_rate": 9.923706599891638e-06,
7958
+ "loss": 0.0264,
7959
+ "step": 10420
7960
+ },
7961
+ {
7962
+ "epoch": 9.775070290534208,
7963
+ "grad_norm": 0.7972745895385742,
7964
+ "learning_rate": 9.923633381657369e-06,
7965
+ "loss": 0.0206,
7966
+ "step": 10430
7967
+ },
7968
+ {
7969
+ "epoch": 9.784442361761949,
7970
+ "grad_norm": 0.6930385231971741,
7971
+ "learning_rate": 9.9235601634231e-06,
7972
+ "loss": 0.0283,
7973
+ "step": 10440
7974
+ },
7975
+ {
7976
+ "epoch": 9.793814432989691,
7977
+ "grad_norm": 0.5096721053123474,
7978
+ "learning_rate": 9.92348694518883e-06,
7979
+ "loss": 0.0263,
7980
+ "step": 10450
7981
+ },
7982
+ {
7983
+ "epoch": 9.803186504217432,
7984
+ "grad_norm": 0.7492228150367737,
7985
+ "learning_rate": 9.923413726954561e-06,
7986
+ "loss": 0.0237,
7987
+ "step": 10460
7988
+ },
7989
+ {
7990
+ "epoch": 9.812558575445173,
7991
+ "grad_norm": 0.8097043037414551,
7992
+ "learning_rate": 9.923340508720293e-06,
7993
+ "loss": 0.0225,
7994
+ "step": 10470
7995
+ },
7996
+ {
7997
+ "epoch": 9.821930646672914,
7998
+ "grad_norm": 0.45464569330215454,
7999
+ "learning_rate": 9.923267290486024e-06,
8000
+ "loss": 0.0175,
8001
+ "step": 10480
8002
+ },
8003
+ {
8004
+ "epoch": 9.831302717900655,
8005
+ "grad_norm": 0.6172147393226624,
8006
+ "learning_rate": 9.923194072251753e-06,
8007
+ "loss": 0.0272,
8008
+ "step": 10490
8009
+ },
8010
+ {
8011
+ "epoch": 9.840674789128398,
8012
+ "grad_norm": 0.9826374650001526,
8013
+ "learning_rate": 9.923120854017486e-06,
8014
+ "loss": 0.0233,
8015
+ "step": 10500
8016
+ },
8017
+ {
8018
+ "epoch": 9.840674789128398,
8019
+ "eval_loss": 0.03700366988778114,
8020
+ "eval_pearson_cosine": 0.7760223746299744,
8021
+ "eval_pearson_dot": 0.7342942953109741,
8022
+ "eval_pearson_euclidean": 0.7316151857376099,
8023
+ "eval_pearson_manhattan": 0.7336723804473877,
8024
+ "eval_runtime": 22.135,
8025
+ "eval_samples_per_second": 67.766,
8026
+ "eval_spearman_cosine": 0.7753394120917871,
8027
+ "eval_spearman_dot": 0.7356003834746606,
8028
+ "eval_spearman_euclidean": 0.7371167930939387,
8029
+ "eval_spearman_manhattan": 0.7388623589601665,
8030
+ "eval_steps_per_second": 8.493,
8031
+ "step": 10500
8032
+ },
8033
+ {
8034
+ "epoch": 9.850046860356139,
8035
+ "grad_norm": 0.5944278240203857,
8036
+ "learning_rate": 9.923047635783216e-06,
8037
+ "loss": 0.0245,
8038
+ "step": 10510
8039
+ },
8040
+ {
8041
+ "epoch": 9.85941893158388,
8042
+ "grad_norm": 0.4207167625427246,
8043
+ "learning_rate": 9.922974417548947e-06,
8044
+ "loss": 0.0236,
8045
+ "step": 10520
8046
+ },
8047
+ {
8048
+ "epoch": 9.868791002811621,
8049
+ "grad_norm": 1.185616374015808,
8050
+ "learning_rate": 9.922901199314678e-06,
8051
+ "loss": 0.025,
8052
+ "step": 10530
8053
+ },
8054
+ {
8055
+ "epoch": 9.878163074039362,
8056
+ "grad_norm": 0.6041834354400635,
8057
+ "learning_rate": 9.92282798108041e-06,
8058
+ "loss": 0.0229,
8059
+ "step": 10540
8060
+ },
8061
+ {
8062
+ "epoch": 9.887535145267105,
8063
+ "grad_norm": 1.3135936260223389,
8064
+ "learning_rate": 9.92275476284614e-06,
8065
+ "loss": 0.022,
8066
+ "step": 10550
8067
+ },
8068
+ {
8069
+ "epoch": 9.896907216494846,
8070
+ "grad_norm": 0.7592184543609619,
8071
+ "learning_rate": 9.92268154461187e-06,
8072
+ "loss": 0.0251,
8073
+ "step": 10560
8074
+ },
8075
+ {
8076
+ "epoch": 9.906279287722587,
8077
+ "grad_norm": 0.5679847002029419,
8078
+ "learning_rate": 9.922608326377603e-06,
8079
+ "loss": 0.0218,
8080
+ "step": 10570
8081
+ },
8082
+ {
8083
+ "epoch": 9.915651358950328,
8084
+ "grad_norm": 1.1727142333984375,
8085
+ "learning_rate": 9.922535108143333e-06,
8086
+ "loss": 0.0266,
8087
+ "step": 10580
8088
+ },
8089
+ {
8090
+ "epoch": 9.925023430178069,
8091
+ "grad_norm": 1.2769267559051514,
8092
+ "learning_rate": 9.922461889909064e-06,
8093
+ "loss": 0.0237,
8094
+ "step": 10590
8095
+ },
8096
+ {
8097
+ "epoch": 9.934395501405811,
8098
+ "grad_norm": 0.6604001522064209,
8099
+ "learning_rate": 9.922388671674795e-06,
8100
+ "loss": 0.0206,
8101
+ "step": 10600
8102
+ },
8103
+ {
8104
+ "epoch": 9.943767572633552,
8105
+ "grad_norm": 0.8065370321273804,
8106
+ "learning_rate": 9.922315453440526e-06,
8107
+ "loss": 0.0272,
8108
+ "step": 10610
8109
+ },
8110
+ {
8111
+ "epoch": 9.953139643861293,
8112
+ "grad_norm": 1.0085433721542358,
8113
+ "learning_rate": 9.922242235206256e-06,
8114
+ "loss": 0.019,
8115
+ "step": 10620
8116
+ },
8117
+ {
8118
+ "epoch": 9.962511715089034,
8119
+ "grad_norm": 0.9662045240402222,
8120
+ "learning_rate": 9.922169016971987e-06,
8121
+ "loss": 0.0218,
8122
+ "step": 10630
8123
+ },
8124
+ {
8125
+ "epoch": 9.971883786316775,
8126
+ "grad_norm": 0.49303632974624634,
8127
+ "learning_rate": 9.922095798737718e-06,
8128
+ "loss": 0.0223,
8129
+ "step": 10640
8130
+ },
8131
+ {
8132
+ "epoch": 9.981255857544518,
8133
+ "grad_norm": 0.7215604186058044,
8134
+ "learning_rate": 9.92202258050345e-06,
8135
+ "loss": 0.0259,
8136
+ "step": 10650
8137
+ },
8138
+ {
8139
+ "epoch": 9.990627928772259,
8140
+ "grad_norm": 0.6104753017425537,
8141
+ "learning_rate": 9.92194936226918e-06,
8142
+ "loss": 0.0232,
8143
+ "step": 10660
8144
+ },
8145
+ {
8146
+ "epoch": 10.0,
8147
+ "grad_norm": 1.011549949645996,
8148
+ "learning_rate": 9.92187614403491e-06,
8149
+ "loss": 0.0234,
8150
+ "step": 10670
8151
  }
8152
  ],
8153
  "logging_steps": 10,
 
8162
  "should_evaluate": false,
8163
  "should_log": false,
8164
  "should_save": true,
8165
+ "should_training_stop": true
8166
  },
8167
  "attributes": {}
8168
  }