CocoRoF commited on
Commit
abdf212
·
verified ·
1 Parent(s): eed6aed

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb9e1bcee7f011fef03e2175c8251f4452754d982e429b6f1c85c49ad3df80dd
3
  size 738367848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba21b9e70b08c8157a0be6f7f43a461e7115ad8add195cfd8d7ecd41a4a559ba
3
  size 738367848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7edaedb3fab405157cc63ab8b3d7262e165878db58ddc86f23c506a7b06f9617
3
  size 1476823354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f32f2ca1f450a8a345cef7a5a79a872c2bc4a1ba658bcfe499421b034c07bf73
3
  size 1476823354
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06fea830cf5ad73ec00d500ea6fb952740ac936f18e93fa2d32abde1ea3ead92
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ca51d4b33edcedf9568d5202767b896d828b5aeca18f2cdd82617688464b784
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be561d1df19be227394d8ea607c54262a06c9bf880af0aa5e04a52596a2a6cb0
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10e6ea705ea5a1704cd5773090c827a2013c8caab967a116ff24a5f57ce3ce90
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03f3e24417a59435f5a8450a4aeb0f09cc92734b5c3b45a0701b2c043c415c05
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6ff0b4da12dd0cdcb6e90b04160e41685d9ccc1fa1cc74bb7949edf700200d4
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bea02744c29f30024590ab1629a0e7b7dabbf1e8476456c2e7c5ce46dc35c28
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:322470b09ac4f5d9443d55c37c8b8e7d0e8a1702208c81e52e3a58a8de515b5b
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:041be966454b60c86af576fc1eb7f34189114689abff8f9622b947110f7334c8
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ababe8505205ca2bcb959a2abbe2fbc8b6ad677bd43b1f2ee9055b3cb400b061
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b85766f6596d15a810177d77dd259d9b50588cf100ec5f8ebff5fed881d57957
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed1c3ba656fdb40a72824e366a08e148a30e1089a6ecd019eaf28fa4a17859fa
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8be75d04b1ebe614241b88fd010a5dda1b7bf703c00c6ebe310ca07975830fe7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:042c62210b9e1c9ed394e0a3362b1c773c07591d94f2716a8e928676134742b7
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4699833a7ab4cb692996ef7567f934c0bac79d6a067963a873f89a38e412bd48
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb54f0278c663494261026658652f845bae43245e75ccc213c6897de179f542a
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:452f0ce45a0532872d3dafca4ac6f3ac9310f686f3f3a2026b82609fe1ad92b0
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fe8eacf1bb6bef5171d133da4e7818c1e7ae0192bd879d86e40a8d923aeb81f
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9372071227741331,
5
  "eval_steps": 250,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -771,6 +771,770 @@
771
  "eval_spearman_manhattan": 0.7778181970888292,
772
  "eval_steps_per_second": 8.583,
773
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
774
  }
775
  ],
776
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.8744142455482662,
5
  "eval_steps": 250,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
771
  "eval_spearman_manhattan": 0.7778181970888292,
772
  "eval_steps_per_second": 8.583,
773
  "step": 1000
774
+ },
775
+ {
776
+ "epoch": 0.9465791940018744,
777
+ "grad_norm": 1.1828556060791016,
778
+ "learning_rate": 9.992604958338825e-06,
779
+ "loss": 0.2168,
780
+ "step": 1010
781
+ },
782
+ {
783
+ "epoch": 0.9559512652296157,
784
+ "grad_norm": 1.2189664840698242,
785
+ "learning_rate": 9.992531740104556e-06,
786
+ "loss": 0.2072,
787
+ "step": 1020
788
+ },
789
+ {
790
+ "epoch": 0.9653233364573571,
791
+ "grad_norm": 1.6102409362792969,
792
+ "learning_rate": 9.992458521870287e-06,
793
+ "loss": 0.2228,
794
+ "step": 1030
795
+ },
796
+ {
797
+ "epoch": 0.9746954076850984,
798
+ "grad_norm": 1.6891916990280151,
799
+ "learning_rate": 9.99238530363602e-06,
800
+ "loss": 0.2404,
801
+ "step": 1040
802
+ },
803
+ {
804
+ "epoch": 0.9840674789128397,
805
+ "grad_norm": 1.2274008989334106,
806
+ "learning_rate": 9.99231208540175e-06,
807
+ "loss": 0.2225,
808
+ "step": 1050
809
+ },
810
+ {
811
+ "epoch": 0.993439550140581,
812
+ "grad_norm": 1.2388169765472412,
813
+ "learning_rate": 9.992238867167479e-06,
814
+ "loss": 0.2215,
815
+ "step": 1060
816
+ },
817
+ {
818
+ "epoch": 1.0028116213683225,
819
+ "grad_norm": 1.2347650527954102,
820
+ "learning_rate": 9.992165648933211e-06,
821
+ "loss": 0.2239,
822
+ "step": 1070
823
+ },
824
+ {
825
+ "epoch": 1.0121836925960637,
826
+ "grad_norm": 1.1266793012619019,
827
+ "learning_rate": 9.992092430698942e-06,
828
+ "loss": 0.1932,
829
+ "step": 1080
830
+ },
831
+ {
832
+ "epoch": 1.021555763823805,
833
+ "grad_norm": 1.5187146663665771,
834
+ "learning_rate": 9.992019212464673e-06,
835
+ "loss": 0.205,
836
+ "step": 1090
837
+ },
838
+ {
839
+ "epoch": 1.0309278350515463,
840
+ "grad_norm": 1.4463717937469482,
841
+ "learning_rate": 9.991945994230404e-06,
842
+ "loss": 0.1818,
843
+ "step": 1100
844
+ },
845
+ {
846
+ "epoch": 1.0402999062792877,
847
+ "grad_norm": 1.6186790466308594,
848
+ "learning_rate": 9.991872775996136e-06,
849
+ "loss": 0.2076,
850
+ "step": 1110
851
+ },
852
+ {
853
+ "epoch": 1.0496719775070291,
854
+ "grad_norm": 1.3895883560180664,
855
+ "learning_rate": 9.991799557761865e-06,
856
+ "loss": 0.2096,
857
+ "step": 1120
858
+ },
859
+ {
860
+ "epoch": 1.0590440487347703,
861
+ "grad_norm": 1.296912670135498,
862
+ "learning_rate": 9.991726339527596e-06,
863
+ "loss": 0.2046,
864
+ "step": 1130
865
+ },
866
+ {
867
+ "epoch": 1.0684161199625117,
868
+ "grad_norm": 1.5527839660644531,
869
+ "learning_rate": 9.991653121293328e-06,
870
+ "loss": 0.1972,
871
+ "step": 1140
872
+ },
873
+ {
874
+ "epoch": 1.077788191190253,
875
+ "grad_norm": 1.4777096509933472,
876
+ "learning_rate": 9.99157990305906e-06,
877
+ "loss": 0.2086,
878
+ "step": 1150
879
+ },
880
+ {
881
+ "epoch": 1.0871602624179943,
882
+ "grad_norm": 1.3155533075332642,
883
+ "learning_rate": 9.99150668482479e-06,
884
+ "loss": 0.1969,
885
+ "step": 1160
886
+ },
887
+ {
888
+ "epoch": 1.0965323336457358,
889
+ "grad_norm": 1.5277265310287476,
890
+ "learning_rate": 9.99143346659052e-06,
891
+ "loss": 0.1923,
892
+ "step": 1170
893
+ },
894
+ {
895
+ "epoch": 1.105904404873477,
896
+ "grad_norm": 1.3764179944992065,
897
+ "learning_rate": 9.991360248356251e-06,
898
+ "loss": 0.1916,
899
+ "step": 1180
900
+ },
901
+ {
902
+ "epoch": 1.1152764761012184,
903
+ "grad_norm": 1.6024688482284546,
904
+ "learning_rate": 9.991287030121982e-06,
905
+ "loss": 0.185,
906
+ "step": 1190
907
+ },
908
+ {
909
+ "epoch": 1.1246485473289598,
910
+ "grad_norm": 1.2752821445465088,
911
+ "learning_rate": 9.991213811887713e-06,
912
+ "loss": 0.1829,
913
+ "step": 1200
914
+ },
915
+ {
916
+ "epoch": 1.134020618556701,
917
+ "grad_norm": 1.4704368114471436,
918
+ "learning_rate": 9.991140593653444e-06,
919
+ "loss": 0.2006,
920
+ "step": 1210
921
+ },
922
+ {
923
+ "epoch": 1.1433926897844424,
924
+ "grad_norm": 1.3614213466644287,
925
+ "learning_rate": 9.991067375419176e-06,
926
+ "loss": 0.1776,
927
+ "step": 1220
928
+ },
929
+ {
930
+ "epoch": 1.1527647610121836,
931
+ "grad_norm": 1.2852075099945068,
932
+ "learning_rate": 9.990994157184905e-06,
933
+ "loss": 0.2116,
934
+ "step": 1230
935
+ },
936
+ {
937
+ "epoch": 1.162136832239925,
938
+ "grad_norm": 1.1774332523345947,
939
+ "learning_rate": 9.990920938950636e-06,
940
+ "loss": 0.1909,
941
+ "step": 1240
942
+ },
943
+ {
944
+ "epoch": 1.1715089034676662,
945
+ "grad_norm": 1.0442605018615723,
946
+ "learning_rate": 9.990847720716368e-06,
947
+ "loss": 0.1933,
948
+ "step": 1250
949
+ },
950
+ {
951
+ "epoch": 1.1715089034676662,
952
+ "eval_loss": 0.08017747104167938,
953
+ "eval_pearson_cosine": 0.7703680992126465,
954
+ "eval_pearson_dot": 0.6808142066001892,
955
+ "eval_pearson_euclidean": 0.7676056623458862,
956
+ "eval_pearson_manhattan": 0.7677772045135498,
957
+ "eval_runtime": 22.1599,
958
+ "eval_samples_per_second": 67.69,
959
+ "eval_spearman_cosine": 0.7790172740054649,
960
+ "eval_spearman_dot": 0.6796557194170769,
961
+ "eval_spearman_euclidean": 0.7739566900498013,
962
+ "eval_spearman_manhattan": 0.7741509176342483,
963
+ "eval_steps_per_second": 8.484,
964
+ "step": 1250
965
+ },
966
+ {
967
+ "epoch": 1.1808809746954076,
968
+ "grad_norm": 1.3561466932296753,
969
+ "learning_rate": 9.990774502482099e-06,
970
+ "loss": 0.1921,
971
+ "step": 1260
972
+ },
973
+ {
974
+ "epoch": 1.190253045923149,
975
+ "grad_norm": 1.2151105403900146,
976
+ "learning_rate": 9.99070128424783e-06,
977
+ "loss": 0.1865,
978
+ "step": 1270
979
+ },
980
+ {
981
+ "epoch": 1.1996251171508903,
982
+ "grad_norm": 1.4363489151000977,
983
+ "learning_rate": 9.99062806601356e-06,
984
+ "loss": 0.2071,
985
+ "step": 1280
986
+ },
987
+ {
988
+ "epoch": 1.2089971883786317,
989
+ "grad_norm": 1.1078994274139404,
990
+ "learning_rate": 9.990554847779291e-06,
991
+ "loss": 0.1984,
992
+ "step": 1290
993
+ },
994
+ {
995
+ "epoch": 1.218369259606373,
996
+ "grad_norm": 1.4608142375946045,
997
+ "learning_rate": 9.990481629545022e-06,
998
+ "loss": 0.1926,
999
+ "step": 1300
1000
+ },
1001
+ {
1002
+ "epoch": 1.2277413308341143,
1003
+ "grad_norm": 1.5290361642837524,
1004
+ "learning_rate": 9.990408411310753e-06,
1005
+ "loss": 0.1935,
1006
+ "step": 1310
1007
+ },
1008
+ {
1009
+ "epoch": 1.2371134020618557,
1010
+ "grad_norm": 1.09344482421875,
1011
+ "learning_rate": 9.990335193076485e-06,
1012
+ "loss": 0.2026,
1013
+ "step": 1320
1014
+ },
1015
+ {
1016
+ "epoch": 1.246485473289597,
1017
+ "grad_norm": 1.5567576885223389,
1018
+ "learning_rate": 9.990261974842216e-06,
1019
+ "loss": 0.1968,
1020
+ "step": 1330
1021
+ },
1022
+ {
1023
+ "epoch": 1.2558575445173383,
1024
+ "grad_norm": 1.243221402168274,
1025
+ "learning_rate": 9.990188756607947e-06,
1026
+ "loss": 0.1859,
1027
+ "step": 1340
1028
+ },
1029
+ {
1030
+ "epoch": 1.2652296157450795,
1031
+ "grad_norm": 1.5287493467330933,
1032
+ "learning_rate": 9.990115538373678e-06,
1033
+ "loss": 0.2067,
1034
+ "step": 1350
1035
+ },
1036
+ {
1037
+ "epoch": 1.274601686972821,
1038
+ "grad_norm": 1.1587677001953125,
1039
+ "learning_rate": 9.990042320139408e-06,
1040
+ "loss": 0.1848,
1041
+ "step": 1360
1042
+ },
1043
+ {
1044
+ "epoch": 1.2839737582005624,
1045
+ "grad_norm": 1.3521069288253784,
1046
+ "learning_rate": 9.989969101905139e-06,
1047
+ "loss": 0.1975,
1048
+ "step": 1370
1049
+ },
1050
+ {
1051
+ "epoch": 1.2933458294283038,
1052
+ "grad_norm": 1.1655584573745728,
1053
+ "learning_rate": 9.98989588367087e-06,
1054
+ "loss": 0.1963,
1055
+ "step": 1380
1056
+ },
1057
+ {
1058
+ "epoch": 1.302717900656045,
1059
+ "grad_norm": 1.1636890172958374,
1060
+ "learning_rate": 9.989822665436602e-06,
1061
+ "loss": 0.1768,
1062
+ "step": 1390
1063
+ },
1064
+ {
1065
+ "epoch": 1.3120899718837864,
1066
+ "grad_norm": 1.3106030225753784,
1067
+ "learning_rate": 9.989749447202333e-06,
1068
+ "loss": 0.1918,
1069
+ "step": 1400
1070
+ },
1071
+ {
1072
+ "epoch": 1.3214620431115276,
1073
+ "grad_norm": 1.314274787902832,
1074
+ "learning_rate": 9.989676228968062e-06,
1075
+ "loss": 0.1733,
1076
+ "step": 1410
1077
+ },
1078
+ {
1079
+ "epoch": 1.330834114339269,
1080
+ "grad_norm": 1.646234393119812,
1081
+ "learning_rate": 9.989603010733795e-06,
1082
+ "loss": 0.1797,
1083
+ "step": 1420
1084
+ },
1085
+ {
1086
+ "epoch": 1.3402061855670104,
1087
+ "grad_norm": 1.3321646451950073,
1088
+ "learning_rate": 9.989529792499525e-06,
1089
+ "loss": 0.1726,
1090
+ "step": 1430
1091
+ },
1092
+ {
1093
+ "epoch": 1.3495782567947516,
1094
+ "grad_norm": 1.3959871530532837,
1095
+ "learning_rate": 9.989456574265256e-06,
1096
+ "loss": 0.1889,
1097
+ "step": 1440
1098
+ },
1099
+ {
1100
+ "epoch": 1.358950328022493,
1101
+ "grad_norm": 1.1790053844451904,
1102
+ "learning_rate": 9.989383356030987e-06,
1103
+ "loss": 0.1779,
1104
+ "step": 1450
1105
+ },
1106
+ {
1107
+ "epoch": 1.3683223992502342,
1108
+ "grad_norm": 1.7612881660461426,
1109
+ "learning_rate": 9.989310137796718e-06,
1110
+ "loss": 0.1834,
1111
+ "step": 1460
1112
+ },
1113
+ {
1114
+ "epoch": 1.3776944704779757,
1115
+ "grad_norm": 1.2366232872009277,
1116
+ "learning_rate": 9.989236919562448e-06,
1117
+ "loss": 0.1996,
1118
+ "step": 1470
1119
+ },
1120
+ {
1121
+ "epoch": 1.387066541705717,
1122
+ "grad_norm": 1.550465703010559,
1123
+ "learning_rate": 9.989163701328179e-06,
1124
+ "loss": 0.1991,
1125
+ "step": 1480
1126
+ },
1127
+ {
1128
+ "epoch": 1.3964386129334583,
1129
+ "grad_norm": 1.2935107946395874,
1130
+ "learning_rate": 9.98909048309391e-06,
1131
+ "loss": 0.1956,
1132
+ "step": 1490
1133
+ },
1134
+ {
1135
+ "epoch": 1.4058106841611997,
1136
+ "grad_norm": 0.9709776639938354,
1137
+ "learning_rate": 9.989017264859642e-06,
1138
+ "loss": 0.1872,
1139
+ "step": 1500
1140
+ },
1141
+ {
1142
+ "epoch": 1.4058106841611997,
1143
+ "eval_loss": 0.07902642339468002,
1144
+ "eval_pearson_cosine": 0.7684531211853027,
1145
+ "eval_pearson_dot": 0.6580111980438232,
1146
+ "eval_pearson_euclidean": 0.768983006477356,
1147
+ "eval_pearson_manhattan": 0.7692690491676331,
1148
+ "eval_runtime": 23.5462,
1149
+ "eval_samples_per_second": 63.704,
1150
+ "eval_spearman_cosine": 0.7777241764238451,
1151
+ "eval_spearman_dot": 0.6568945327389543,
1152
+ "eval_spearman_euclidean": 0.7752386276211667,
1153
+ "eval_spearman_manhattan": 0.7755204438878311,
1154
+ "eval_steps_per_second": 7.984,
1155
+ "step": 1500
1156
+ },
1157
+ {
1158
+ "epoch": 1.415182755388941,
1159
+ "grad_norm": 1.5001726150512695,
1160
+ "learning_rate": 9.988944046625373e-06,
1161
+ "loss": 0.2094,
1162
+ "step": 1510
1163
+ },
1164
+ {
1165
+ "epoch": 1.4245548266166823,
1166
+ "grad_norm": 1.1697657108306885,
1167
+ "learning_rate": 9.988870828391102e-06,
1168
+ "loss": 0.1862,
1169
+ "step": 1520
1170
+ },
1171
+ {
1172
+ "epoch": 1.4339268978444237,
1173
+ "grad_norm": 1.3496723175048828,
1174
+ "learning_rate": 9.988797610156834e-06,
1175
+ "loss": 0.1863,
1176
+ "step": 1530
1177
+ },
1178
+ {
1179
+ "epoch": 1.443298969072165,
1180
+ "grad_norm": 1.3314088582992554,
1181
+ "learning_rate": 9.988724391922565e-06,
1182
+ "loss": 0.1809,
1183
+ "step": 1540
1184
+ },
1185
+ {
1186
+ "epoch": 1.4526710402999063,
1187
+ "grad_norm": 1.2966681718826294,
1188
+ "learning_rate": 9.988651173688296e-06,
1189
+ "loss": 0.1799,
1190
+ "step": 1550
1191
+ },
1192
+ {
1193
+ "epoch": 1.4620431115276475,
1194
+ "grad_norm": 1.141318917274475,
1195
+ "learning_rate": 9.988577955454027e-06,
1196
+ "loss": 0.1983,
1197
+ "step": 1560
1198
+ },
1199
+ {
1200
+ "epoch": 1.471415182755389,
1201
+ "grad_norm": 1.1170287132263184,
1202
+ "learning_rate": 9.98850473721976e-06,
1203
+ "loss": 0.1823,
1204
+ "step": 1570
1205
+ },
1206
+ {
1207
+ "epoch": 1.4807872539831304,
1208
+ "grad_norm": 1.4531837701797485,
1209
+ "learning_rate": 9.988431518985488e-06,
1210
+ "loss": 0.1693,
1211
+ "step": 1580
1212
+ },
1213
+ {
1214
+ "epoch": 1.4901593252108716,
1215
+ "grad_norm": 1.5249556303024292,
1216
+ "learning_rate": 9.988358300751219e-06,
1217
+ "loss": 0.2014,
1218
+ "step": 1590
1219
+ },
1220
+ {
1221
+ "epoch": 1.499531396438613,
1222
+ "grad_norm": 1.319170594215393,
1223
+ "learning_rate": 9.988285082516951e-06,
1224
+ "loss": 0.1841,
1225
+ "step": 1600
1226
+ },
1227
+ {
1228
+ "epoch": 1.5089034676663542,
1229
+ "grad_norm": 1.2907928228378296,
1230
+ "learning_rate": 9.988211864282682e-06,
1231
+ "loss": 0.1778,
1232
+ "step": 1610
1233
+ },
1234
+ {
1235
+ "epoch": 1.5182755388940956,
1236
+ "grad_norm": 1.170284628868103,
1237
+ "learning_rate": 9.988138646048413e-06,
1238
+ "loss": 0.1668,
1239
+ "step": 1620
1240
+ },
1241
+ {
1242
+ "epoch": 1.527647610121837,
1243
+ "grad_norm": 1.4182498455047607,
1244
+ "learning_rate": 9.988065427814144e-06,
1245
+ "loss": 0.1968,
1246
+ "step": 1630
1247
+ },
1248
+ {
1249
+ "epoch": 1.5370196813495782,
1250
+ "grad_norm": 1.3137290477752686,
1251
+ "learning_rate": 9.987992209579874e-06,
1252
+ "loss": 0.1734,
1253
+ "step": 1640
1254
+ },
1255
+ {
1256
+ "epoch": 1.5463917525773194,
1257
+ "grad_norm": 1.458721399307251,
1258
+ "learning_rate": 9.987918991345605e-06,
1259
+ "loss": 0.209,
1260
+ "step": 1650
1261
+ },
1262
+ {
1263
+ "epoch": 1.5557638238050608,
1264
+ "grad_norm": 1.1368082761764526,
1265
+ "learning_rate": 9.987845773111336e-06,
1266
+ "loss": 0.1831,
1267
+ "step": 1660
1268
+ },
1269
+ {
1270
+ "epoch": 1.5651358950328023,
1271
+ "grad_norm": 1.0743663311004639,
1272
+ "learning_rate": 9.987772554877068e-06,
1273
+ "loss": 0.1883,
1274
+ "step": 1670
1275
+ },
1276
+ {
1277
+ "epoch": 1.5745079662605437,
1278
+ "grad_norm": 1.4294681549072266,
1279
+ "learning_rate": 9.987699336642799e-06,
1280
+ "loss": 0.1851,
1281
+ "step": 1680
1282
+ },
1283
+ {
1284
+ "epoch": 1.5838800374882849,
1285
+ "grad_norm": 1.0537577867507935,
1286
+ "learning_rate": 9.987626118408528e-06,
1287
+ "loss": 0.1818,
1288
+ "step": 1690
1289
+ },
1290
+ {
1291
+ "epoch": 1.5932521087160263,
1292
+ "grad_norm": 1.3930073976516724,
1293
+ "learning_rate": 9.98755290017426e-06,
1294
+ "loss": 0.1876,
1295
+ "step": 1700
1296
+ },
1297
+ {
1298
+ "epoch": 1.6026241799437675,
1299
+ "grad_norm": 1.3290959596633911,
1300
+ "learning_rate": 9.987479681939991e-06,
1301
+ "loss": 0.1777,
1302
+ "step": 1710
1303
+ },
1304
+ {
1305
+ "epoch": 1.611996251171509,
1306
+ "grad_norm": 1.3895900249481201,
1307
+ "learning_rate": 9.987406463705722e-06,
1308
+ "loss": 0.1728,
1309
+ "step": 1720
1310
+ },
1311
+ {
1312
+ "epoch": 1.6213683223992503,
1313
+ "grad_norm": 1.336679220199585,
1314
+ "learning_rate": 9.987333245471453e-06,
1315
+ "loss": 0.202,
1316
+ "step": 1730
1317
+ },
1318
+ {
1319
+ "epoch": 1.6307403936269915,
1320
+ "grad_norm": 1.4338617324829102,
1321
+ "learning_rate": 9.987260027237184e-06,
1322
+ "loss": 0.1745,
1323
+ "step": 1740
1324
+ },
1325
+ {
1326
+ "epoch": 1.640112464854733,
1327
+ "grad_norm": 1.1854125261306763,
1328
+ "learning_rate": 9.987186809002914e-06,
1329
+ "loss": 0.1628,
1330
+ "step": 1750
1331
+ },
1332
+ {
1333
+ "epoch": 1.640112464854733,
1334
+ "eval_loss": 0.07191870361566544,
1335
+ "eval_pearson_cosine": 0.7651911973953247,
1336
+ "eval_pearson_dot": 0.6584045886993408,
1337
+ "eval_pearson_euclidean": 0.7615811228752136,
1338
+ "eval_pearson_manhattan": 0.7618914842605591,
1339
+ "eval_runtime": 22.2177,
1340
+ "eval_samples_per_second": 67.514,
1341
+ "eval_spearman_cosine": 0.7733826669765486,
1342
+ "eval_spearman_dot": 0.6574446699366203,
1343
+ "eval_spearman_euclidean": 0.7678793093449918,
1344
+ "eval_spearman_manhattan": 0.7684997409854779,
1345
+ "eval_steps_per_second": 8.462,
1346
+ "step": 1750
1347
+ },
1348
+ {
1349
+ "epoch": 1.6494845360824741,
1350
+ "grad_norm": 1.468126654624939,
1351
+ "learning_rate": 9.987113590768645e-06,
1352
+ "loss": 0.1714,
1353
+ "step": 1760
1354
+ },
1355
+ {
1356
+ "epoch": 1.6588566073102156,
1357
+ "grad_norm": 1.3639568090438843,
1358
+ "learning_rate": 9.987040372534378e-06,
1359
+ "loss": 0.1839,
1360
+ "step": 1770
1361
+ },
1362
+ {
1363
+ "epoch": 1.668228678537957,
1364
+ "grad_norm": 1.2494312524795532,
1365
+ "learning_rate": 9.986967154300108e-06,
1366
+ "loss": 0.1753,
1367
+ "step": 1780
1368
+ },
1369
+ {
1370
+ "epoch": 1.6776007497656982,
1371
+ "grad_norm": 1.2897909879684448,
1372
+ "learning_rate": 9.986893936065839e-06,
1373
+ "loss": 0.1704,
1374
+ "step": 1790
1375
+ },
1376
+ {
1377
+ "epoch": 1.6869728209934396,
1378
+ "grad_norm": 1.413866400718689,
1379
+ "learning_rate": 9.98682071783157e-06,
1380
+ "loss": 0.1868,
1381
+ "step": 1800
1382
+ },
1383
+ {
1384
+ "epoch": 1.6963448922211808,
1385
+ "grad_norm": 1.093849778175354,
1386
+ "learning_rate": 9.9867474995973e-06,
1387
+ "loss": 0.1889,
1388
+ "step": 1810
1389
+ },
1390
+ {
1391
+ "epoch": 1.7057169634489222,
1392
+ "grad_norm": 1.3857814073562622,
1393
+ "learning_rate": 9.986674281363031e-06,
1394
+ "loss": 0.1818,
1395
+ "step": 1820
1396
+ },
1397
+ {
1398
+ "epoch": 1.7150890346766636,
1399
+ "grad_norm": 1.3772344589233398,
1400
+ "learning_rate": 9.986601063128762e-06,
1401
+ "loss": 0.1683,
1402
+ "step": 1830
1403
+ },
1404
+ {
1405
+ "epoch": 1.7244611059044048,
1406
+ "grad_norm": 1.3299206495285034,
1407
+ "learning_rate": 9.986527844894493e-06,
1408
+ "loss": 0.1865,
1409
+ "step": 1840
1410
+ },
1411
+ {
1412
+ "epoch": 1.7338331771321462,
1413
+ "grad_norm": 1.3139843940734863,
1414
+ "learning_rate": 9.986454626660225e-06,
1415
+ "loss": 0.169,
1416
+ "step": 1850
1417
+ },
1418
+ {
1419
+ "epoch": 1.7432052483598874,
1420
+ "grad_norm": 1.3562296628952026,
1421
+ "learning_rate": 9.986381408425954e-06,
1422
+ "loss": 0.2012,
1423
+ "step": 1860
1424
+ },
1425
+ {
1426
+ "epoch": 1.7525773195876289,
1427
+ "grad_norm": 1.2332826852798462,
1428
+ "learning_rate": 9.986308190191685e-06,
1429
+ "loss": 0.1877,
1430
+ "step": 1870
1431
+ },
1432
+ {
1433
+ "epoch": 1.7619493908153703,
1434
+ "grad_norm": 1.083622932434082,
1435
+ "learning_rate": 9.986234971957418e-06,
1436
+ "loss": 0.2026,
1437
+ "step": 1880
1438
+ },
1439
+ {
1440
+ "epoch": 1.7713214620431117,
1441
+ "grad_norm": 1.6391818523406982,
1442
+ "learning_rate": 9.986161753723148e-06,
1443
+ "loss": 0.1902,
1444
+ "step": 1890
1445
+ },
1446
+ {
1447
+ "epoch": 1.780693533270853,
1448
+ "grad_norm": 1.0985593795776367,
1449
+ "learning_rate": 9.986088535488879e-06,
1450
+ "loss": 0.1845,
1451
+ "step": 1900
1452
+ },
1453
+ {
1454
+ "epoch": 1.790065604498594,
1455
+ "grad_norm": 1.609025001525879,
1456
+ "learning_rate": 9.98601531725461e-06,
1457
+ "loss": 0.1939,
1458
+ "step": 1910
1459
+ },
1460
+ {
1461
+ "epoch": 1.7994376757263355,
1462
+ "grad_norm": 1.0637205839157104,
1463
+ "learning_rate": 9.98594209902034e-06,
1464
+ "loss": 0.1775,
1465
+ "step": 1920
1466
+ },
1467
+ {
1468
+ "epoch": 1.808809746954077,
1469
+ "grad_norm": 1.159469723701477,
1470
+ "learning_rate": 9.985868880786071e-06,
1471
+ "loss": 0.161,
1472
+ "step": 1930
1473
+ },
1474
+ {
1475
+ "epoch": 1.8181818181818183,
1476
+ "grad_norm": 1.1251918077468872,
1477
+ "learning_rate": 9.985795662551802e-06,
1478
+ "loss": 0.1965,
1479
+ "step": 1940
1480
+ },
1481
+ {
1482
+ "epoch": 1.8275538894095595,
1483
+ "grad_norm": 1.3804899454116821,
1484
+ "learning_rate": 9.985722444317534e-06,
1485
+ "loss": 0.1768,
1486
+ "step": 1950
1487
+ },
1488
+ {
1489
+ "epoch": 1.8369259606373007,
1490
+ "grad_norm": 1.194275140762329,
1491
+ "learning_rate": 9.985649226083265e-06,
1492
+ "loss": 0.1782,
1493
+ "step": 1960
1494
+ },
1495
+ {
1496
+ "epoch": 1.8462980318650422,
1497
+ "grad_norm": 1.5173845291137695,
1498
+ "learning_rate": 9.985576007848996e-06,
1499
+ "loss": 0.193,
1500
+ "step": 1970
1501
+ },
1502
+ {
1503
+ "epoch": 1.8556701030927836,
1504
+ "grad_norm": 1.7733920812606812,
1505
+ "learning_rate": 9.985502789614727e-06,
1506
+ "loss": 0.1804,
1507
+ "step": 1980
1508
+ },
1509
+ {
1510
+ "epoch": 1.865042174320525,
1511
+ "grad_norm": 1.1430355310440063,
1512
+ "learning_rate": 9.985429571380457e-06,
1513
+ "loss": 0.1869,
1514
+ "step": 1990
1515
+ },
1516
+ {
1517
+ "epoch": 1.8744142455482662,
1518
+ "grad_norm": 1.3633067607879639,
1519
+ "learning_rate": 9.985356353146188e-06,
1520
+ "loss": 0.1983,
1521
+ "step": 2000
1522
+ },
1523
+ {
1524
+ "epoch": 1.8744142455482662,
1525
+ "eval_loss": 0.07371454685926437,
1526
+ "eval_pearson_cosine": 0.7772414684295654,
1527
+ "eval_pearson_dot": 0.660416841506958,
1528
+ "eval_pearson_euclidean": 0.7648824453353882,
1529
+ "eval_pearson_manhattan": 0.7654331922531128,
1530
+ "eval_runtime": 22.1973,
1531
+ "eval_samples_per_second": 67.576,
1532
+ "eval_spearman_cosine": 0.7863920785446639,
1533
+ "eval_spearman_dot": 0.6607574545837009,
1534
+ "eval_spearman_euclidean": 0.7740511645049805,
1535
+ "eval_spearman_manhattan": 0.7747616492851076,
1536
+ "eval_steps_per_second": 8.47,
1537
+ "step": 2000
1538
  }
1539
  ],
1540
  "logging_steps": 10,