CocoRoF commited on
Commit
c046b24
·
verified ·
1 Parent(s): e2a280d

Training in progress, step 6000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77bdd07402c0fe434c587ece44b2edeb5f86258e2a03ca9d156a6d48b5150f65
3
  size 738367848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee3ca4993c74fdba66a4a45cec937d889ea635d5a3363dbf41258ab3cdb82d1e
3
  size 738367848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cfa7170844cfd8c9e538ce6b712c134638973e02bcee820b7bd2a686a44027c
3
  size 1476823354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:703c818dd2c8e17f4fd71ce7085e8a08cd52a233c9ec0efa1da31b9b6249f59f
3
  size 1476823354
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5504b8d722b425f58bab6aedf9a43fc8129b02036307d31c7a21e224d2412ace
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b92a5a9232e10290e92a7ee43e17a65c2d7fd5bd9b7fae4a78bb653de6ff7f1e
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e555860fd7a2cfb8945f188f7232baf938ce622886881cc422b3eb0e7444eda4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:027e63f43c97b9a1e6e633ec27654b2d81e59843c5c61895f16184d95b5ecfce
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea0c272cf77c9504efaa077bfa8f9229d461c16d6641be0e57a7f20f9b761399
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ce4c5bab7dcc40e8eda0dabcca4b51013677ae4eb8d8b9aae51fdbac3ff5302
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:921e0812b510be6ea788fc2c6aa7541f3ff4eb1bb3dd7c230340a35d8e1e764b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b04fd2cdaec73d40bd342c2736426a28ca23cb93fea46275f9c93f0355e8e51
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3603a5403f0489f4be4d27720a0fa7e0fe0d08dbde5d58c1060cef37b9084d2
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fb08899b31a5efc329181f0ecc59c2d36f1c1b6251e03bcab322df2bd5b23a5
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b9f685b83b5545cc2db9c29e88184590e89acb7836b4bb92a6a1df01b4bf43f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1f2ce10faab494375937f049ced3ce1d0fb669dd4ede3a5d75a6c0bc4eebe50
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5843202dd0ab5bc42fd0b6fa35e7cc2dca365d38fb379a2faf93bf274ef023e6
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fb2fd90ebb124ea406837f497911487ba5c20d875615f6a03594328a7dafc26
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79f72ffc2afb7672fc32ddd050c69181c2c0c16f8eac79a352eecb064fb5a9c7
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de98b2885a6933e6cd867d0a9af94f7453c3971e0017fa67668e1a0ca515fd9d
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10f98a9bf3c827b7f13510b7bc00db936ca6b0dcd935745bc447f03aae03112f
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:662f374a56de952606a2f764f88941c488163b14a1fd8282c0553ed7f96dbcfe
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.686035613870665,
5
  "eval_steps": 250,
6
- "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3827,6 +3827,770 @@
3827
  "eval_spearman_manhattan": 0.745568766414613,
3828
  "eval_steps_per_second": 8.354,
3829
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3830
  }
3831
  ],
3832
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.623242736644799,
5
  "eval_steps": 250,
6
+ "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3827
  "eval_spearman_manhattan": 0.745568766414613,
3828
  "eval_steps_per_second": 8.354,
3829
  "step": 5000
3830
+ },
3831
+ {
3832
+ "epoch": 4.695407685098407,
3833
+ "grad_norm": 1.6077407598495483,
3834
+ "learning_rate": 9.9633176646312e-06,
3835
+ "loss": 0.0993,
3836
+ "step": 5010
3837
+ },
3838
+ {
3839
+ "epoch": 4.704779756326148,
3840
+ "grad_norm": 1.206281065940857,
3841
+ "learning_rate": 9.963244446396931e-06,
3842
+ "loss": 0.082,
3843
+ "step": 5020
3844
+ },
3845
+ {
3846
+ "epoch": 4.71415182755389,
3847
+ "grad_norm": 1.168562650680542,
3848
+ "learning_rate": 9.963171228162662e-06,
3849
+ "loss": 0.075,
3850
+ "step": 5030
3851
+ },
3852
+ {
3853
+ "epoch": 4.723523898781631,
3854
+ "grad_norm": 1.0943313837051392,
3855
+ "learning_rate": 9.963098009928394e-06,
3856
+ "loss": 0.0907,
3857
+ "step": 5040
3858
+ },
3859
+ {
3860
+ "epoch": 4.7328959700093725,
3861
+ "grad_norm": 1.1832613945007324,
3862
+ "learning_rate": 9.963024791694125e-06,
3863
+ "loss": 0.0776,
3864
+ "step": 5050
3865
+ },
3866
+ {
3867
+ "epoch": 4.742268041237113,
3868
+ "grad_norm": 1.1568524837493896,
3869
+ "learning_rate": 9.962951573459856e-06,
3870
+ "loss": 0.0956,
3871
+ "step": 5060
3872
+ },
3873
+ {
3874
+ "epoch": 4.751640112464854,
3875
+ "grad_norm": 1.4179660081863403,
3876
+ "learning_rate": 9.962878355225586e-06,
3877
+ "loss": 0.079,
3878
+ "step": 5070
3879
+ },
3880
+ {
3881
+ "epoch": 4.761012183692596,
3882
+ "grad_norm": 1.56465744972229,
3883
+ "learning_rate": 9.962805136991317e-06,
3884
+ "loss": 0.0708,
3885
+ "step": 5080
3886
+ },
3887
+ {
3888
+ "epoch": 4.770384254920337,
3889
+ "grad_norm": 1.47963547706604,
3890
+ "learning_rate": 9.962731918757048e-06,
3891
+ "loss": 0.0817,
3892
+ "step": 5090
3893
+ },
3894
+ {
3895
+ "epoch": 4.779756326148079,
3896
+ "grad_norm": 1.4979149103164673,
3897
+ "learning_rate": 9.962658700522779e-06,
3898
+ "loss": 0.0859,
3899
+ "step": 5100
3900
+ },
3901
+ {
3902
+ "epoch": 4.78912839737582,
3903
+ "grad_norm": 1.0254287719726562,
3904
+ "learning_rate": 9.962585482288511e-06,
3905
+ "loss": 0.077,
3906
+ "step": 5110
3907
+ },
3908
+ {
3909
+ "epoch": 4.798500468603561,
3910
+ "grad_norm": 1.5644149780273438,
3911
+ "learning_rate": 9.96251226405424e-06,
3912
+ "loss": 0.0775,
3913
+ "step": 5120
3914
+ },
3915
+ {
3916
+ "epoch": 4.807872539831303,
3917
+ "grad_norm": 1.2777773141860962,
3918
+ "learning_rate": 9.962439045819971e-06,
3919
+ "loss": 0.0734,
3920
+ "step": 5130
3921
+ },
3922
+ {
3923
+ "epoch": 4.817244611059044,
3924
+ "grad_norm": 1.130614995956421,
3925
+ "learning_rate": 9.962365827585703e-06,
3926
+ "loss": 0.082,
3927
+ "step": 5140
3928
+ },
3929
+ {
3930
+ "epoch": 4.826616682286786,
3931
+ "grad_norm": 0.9016211032867432,
3932
+ "learning_rate": 9.962292609351434e-06,
3933
+ "loss": 0.08,
3934
+ "step": 5150
3935
+ },
3936
+ {
3937
+ "epoch": 4.835988753514527,
3938
+ "grad_norm": 1.4159069061279297,
3939
+ "learning_rate": 9.962219391117165e-06,
3940
+ "loss": 0.0841,
3941
+ "step": 5160
3942
+ },
3943
+ {
3944
+ "epoch": 4.845360824742268,
3945
+ "grad_norm": 1.600085973739624,
3946
+ "learning_rate": 9.962146172882896e-06,
3947
+ "loss": 0.0766,
3948
+ "step": 5170
3949
+ },
3950
+ {
3951
+ "epoch": 4.85473289597001,
3952
+ "grad_norm": 1.4401110410690308,
3953
+ "learning_rate": 9.962072954648626e-06,
3954
+ "loss": 0.0869,
3955
+ "step": 5180
3956
+ },
3957
+ {
3958
+ "epoch": 4.8641049671977505,
3959
+ "grad_norm": 1.4603939056396484,
3960
+ "learning_rate": 9.961999736414357e-06,
3961
+ "loss": 0.077,
3962
+ "step": 5190
3963
+ },
3964
+ {
3965
+ "epoch": 4.873477038425492,
3966
+ "grad_norm": 1.0498592853546143,
3967
+ "learning_rate": 9.961926518180088e-06,
3968
+ "loss": 0.0673,
3969
+ "step": 5200
3970
+ },
3971
+ {
3972
+ "epoch": 4.882849109653233,
3973
+ "grad_norm": 1.9157027006149292,
3974
+ "learning_rate": 9.96185329994582e-06,
3975
+ "loss": 0.0865,
3976
+ "step": 5210
3977
+ },
3978
+ {
3979
+ "epoch": 4.892221180880974,
3980
+ "grad_norm": 1.0183812379837036,
3981
+ "learning_rate": 9.961780081711551e-06,
3982
+ "loss": 0.0809,
3983
+ "step": 5220
3984
+ },
3985
+ {
3986
+ "epoch": 4.901593252108716,
3987
+ "grad_norm": 1.4563605785369873,
3988
+ "learning_rate": 9.96170686347728e-06,
3989
+ "loss": 0.086,
3990
+ "step": 5230
3991
+ },
3992
+ {
3993
+ "epoch": 4.910965323336457,
3994
+ "grad_norm": 1.1856083869934082,
3995
+ "learning_rate": 9.961633645243013e-06,
3996
+ "loss": 0.0802,
3997
+ "step": 5240
3998
+ },
3999
+ {
4000
+ "epoch": 4.920337394564199,
4001
+ "grad_norm": 1.3724653720855713,
4002
+ "learning_rate": 9.961560427008743e-06,
4003
+ "loss": 0.0839,
4004
+ "step": 5250
4005
+ },
4006
+ {
4007
+ "epoch": 4.920337394564199,
4008
+ "eval_loss": 0.04000931978225708,
4009
+ "eval_pearson_cosine": 0.7643105387687683,
4010
+ "eval_pearson_dot": 0.6954823732376099,
4011
+ "eval_pearson_euclidean": 0.7297146320343018,
4012
+ "eval_pearson_manhattan": 0.7310500144958496,
4013
+ "eval_runtime": 21.985,
4014
+ "eval_samples_per_second": 68.228,
4015
+ "eval_spearman_cosine": 0.7658903505068073,
4016
+ "eval_spearman_dot": 0.6968591888025883,
4017
+ "eval_spearman_euclidean": 0.7350736410651904,
4018
+ "eval_spearman_manhattan": 0.7366836781540181,
4019
+ "eval_steps_per_second": 8.551,
4020
+ "step": 5250
4021
+ },
4022
+ {
4023
+ "epoch": 4.92970946579194,
4024
+ "grad_norm": 1.7151585817337036,
4025
+ "learning_rate": 9.961487208774474e-06,
4026
+ "loss": 0.0791,
4027
+ "step": 5260
4028
+ },
4029
+ {
4030
+ "epoch": 4.939081537019681,
4031
+ "grad_norm": 1.6940653324127197,
4032
+ "learning_rate": 9.961413990540205e-06,
4033
+ "loss": 0.0893,
4034
+ "step": 5270
4035
+ },
4036
+ {
4037
+ "epoch": 4.948453608247423,
4038
+ "grad_norm": 1.5087528228759766,
4039
+ "learning_rate": 9.961340772305936e-06,
4040
+ "loss": 0.0801,
4041
+ "step": 5280
4042
+ },
4043
+ {
4044
+ "epoch": 4.957825679475164,
4045
+ "grad_norm": 1.2038474082946777,
4046
+ "learning_rate": 9.961267554071666e-06,
4047
+ "loss": 0.0791,
4048
+ "step": 5290
4049
+ },
4050
+ {
4051
+ "epoch": 4.967197750702906,
4052
+ "grad_norm": 1.4044734239578247,
4053
+ "learning_rate": 9.961194335837397e-06,
4054
+ "loss": 0.0832,
4055
+ "step": 5300
4056
+ },
4057
+ {
4058
+ "epoch": 4.976569821930647,
4059
+ "grad_norm": 1.057298183441162,
4060
+ "learning_rate": 9.96112111760313e-06,
4061
+ "loss": 0.0869,
4062
+ "step": 5310
4063
+ },
4064
+ {
4065
+ "epoch": 4.985941893158388,
4066
+ "grad_norm": 1.4192899465560913,
4067
+ "learning_rate": 9.96104789936886e-06,
4068
+ "loss": 0.0837,
4069
+ "step": 5320
4070
+ },
4071
+ {
4072
+ "epoch": 4.9953139643861295,
4073
+ "grad_norm": 1.7742289304733276,
4074
+ "learning_rate": 9.960974681134591e-06,
4075
+ "loss": 0.0858,
4076
+ "step": 5330
4077
+ },
4078
+ {
4079
+ "epoch": 5.0046860356138705,
4080
+ "grad_norm": 0.9188485741615295,
4081
+ "learning_rate": 9.960901462900322e-06,
4082
+ "loss": 0.0684,
4083
+ "step": 5340
4084
+ },
4085
+ {
4086
+ "epoch": 5.014058106841612,
4087
+ "grad_norm": 1.6541597843170166,
4088
+ "learning_rate": 9.960828244666052e-06,
4089
+ "loss": 0.0669,
4090
+ "step": 5350
4091
+ },
4092
+ {
4093
+ "epoch": 5.023430178069353,
4094
+ "grad_norm": 1.5705071687698364,
4095
+ "learning_rate": 9.960755026431783e-06,
4096
+ "loss": 0.0646,
4097
+ "step": 5360
4098
+ },
4099
+ {
4100
+ "epoch": 5.032802249297094,
4101
+ "grad_norm": 0.9007801413536072,
4102
+ "learning_rate": 9.960681808197514e-06,
4103
+ "loss": 0.0721,
4104
+ "step": 5370
4105
+ },
4106
+ {
4107
+ "epoch": 5.042174320524836,
4108
+ "grad_norm": 1.044138789176941,
4109
+ "learning_rate": 9.960608589963245e-06,
4110
+ "loss": 0.0585,
4111
+ "step": 5380
4112
+ },
4113
+ {
4114
+ "epoch": 5.051546391752577,
4115
+ "grad_norm": 1.455098032951355,
4116
+ "learning_rate": 9.960535371728977e-06,
4117
+ "loss": 0.0677,
4118
+ "step": 5390
4119
+ },
4120
+ {
4121
+ "epoch": 5.060918462980319,
4122
+ "grad_norm": 1.3480255603790283,
4123
+ "learning_rate": 9.960462153494708e-06,
4124
+ "loss": 0.0582,
4125
+ "step": 5400
4126
+ },
4127
+ {
4128
+ "epoch": 5.07029053420806,
4129
+ "grad_norm": 0.9733775854110718,
4130
+ "learning_rate": 9.960388935260437e-06,
4131
+ "loss": 0.057,
4132
+ "step": 5410
4133
+ },
4134
+ {
4135
+ "epoch": 5.079662605435801,
4136
+ "grad_norm": 1.202635645866394,
4137
+ "learning_rate": 9.96031571702617e-06,
4138
+ "loss": 0.0642,
4139
+ "step": 5420
4140
+ },
4141
+ {
4142
+ "epoch": 5.089034676663543,
4143
+ "grad_norm": 1.2410409450531006,
4144
+ "learning_rate": 9.9602424987919e-06,
4145
+ "loss": 0.055,
4146
+ "step": 5430
4147
+ },
4148
+ {
4149
+ "epoch": 5.098406747891284,
4150
+ "grad_norm": 1.341126799583435,
4151
+ "learning_rate": 9.960169280557631e-06,
4152
+ "loss": 0.066,
4153
+ "step": 5440
4154
+ },
4155
+ {
4156
+ "epoch": 5.107778819119026,
4157
+ "grad_norm": 1.070065975189209,
4158
+ "learning_rate": 9.960096062323362e-06,
4159
+ "loss": 0.0565,
4160
+ "step": 5450
4161
+ },
4162
+ {
4163
+ "epoch": 5.117150890346767,
4164
+ "grad_norm": 1.5855072736740112,
4165
+ "learning_rate": 9.960022844089092e-06,
4166
+ "loss": 0.0613,
4167
+ "step": 5460
4168
+ },
4169
+ {
4170
+ "epoch": 5.126522961574508,
4171
+ "grad_norm": 0.7614333629608154,
4172
+ "learning_rate": 9.959949625854823e-06,
4173
+ "loss": 0.0572,
4174
+ "step": 5470
4175
+ },
4176
+ {
4177
+ "epoch": 5.1358950328022495,
4178
+ "grad_norm": 1.0969761610031128,
4179
+ "learning_rate": 9.959876407620554e-06,
4180
+ "loss": 0.0557,
4181
+ "step": 5480
4182
+ },
4183
+ {
4184
+ "epoch": 5.14526710402999,
4185
+ "grad_norm": 1.7454636096954346,
4186
+ "learning_rate": 9.959803189386286e-06,
4187
+ "loss": 0.0647,
4188
+ "step": 5490
4189
+ },
4190
+ {
4191
+ "epoch": 5.154639175257732,
4192
+ "grad_norm": 0.9625281691551208,
4193
+ "learning_rate": 9.959729971152017e-06,
4194
+ "loss": 0.0499,
4195
+ "step": 5500
4196
+ },
4197
+ {
4198
+ "epoch": 5.154639175257732,
4199
+ "eval_loss": 0.03924967721104622,
4200
+ "eval_pearson_cosine": 0.7608553767204285,
4201
+ "eval_pearson_dot": 0.6993385553359985,
4202
+ "eval_pearson_euclidean": 0.732108473777771,
4203
+ "eval_pearson_manhattan": 0.7334935069084167,
4204
+ "eval_runtime": 28.2448,
4205
+ "eval_samples_per_second": 53.107,
4206
+ "eval_spearman_cosine": 0.7615678141531256,
4207
+ "eval_spearman_dot": 0.6999177956469285,
4208
+ "eval_spearman_euclidean": 0.7378738640113753,
4209
+ "eval_spearman_manhattan": 0.7392624046122273,
4210
+ "eval_steps_per_second": 6.656,
4211
+ "step": 5500
4212
+ },
4213
+ {
4214
+ "epoch": 5.164011246485473,
4215
+ "grad_norm": 1.4280071258544922,
4216
+ "learning_rate": 9.959656752917748e-06,
4217
+ "loss": 0.0557,
4218
+ "step": 5510
4219
+ },
4220
+ {
4221
+ "epoch": 5.173383317713214,
4222
+ "grad_norm": 1.6271259784698486,
4223
+ "learning_rate": 9.959583534683479e-06,
4224
+ "loss": 0.0602,
4225
+ "step": 5520
4226
+ },
4227
+ {
4228
+ "epoch": 5.182755388940956,
4229
+ "grad_norm": 1.2609021663665771,
4230
+ "learning_rate": 9.95951031644921e-06,
4231
+ "loss": 0.0545,
4232
+ "step": 5530
4233
+ },
4234
+ {
4235
+ "epoch": 5.192127460168697,
4236
+ "grad_norm": 1.2945165634155273,
4237
+ "learning_rate": 9.95943709821494e-06,
4238
+ "loss": 0.0592,
4239
+ "step": 5540
4240
+ },
4241
+ {
4242
+ "epoch": 5.201499531396439,
4243
+ "grad_norm": 1.3600184917449951,
4244
+ "learning_rate": 9.959363879980671e-06,
4245
+ "loss": 0.0492,
4246
+ "step": 5550
4247
+ },
4248
+ {
4249
+ "epoch": 5.21087160262418,
4250
+ "grad_norm": 1.3210471868515015,
4251
+ "learning_rate": 9.959290661746403e-06,
4252
+ "loss": 0.0558,
4253
+ "step": 5560
4254
+ },
4255
+ {
4256
+ "epoch": 5.220243673851921,
4257
+ "grad_norm": 0.8935280442237854,
4258
+ "learning_rate": 9.959217443512134e-06,
4259
+ "loss": 0.0566,
4260
+ "step": 5570
4261
+ },
4262
+ {
4263
+ "epoch": 5.229615745079663,
4264
+ "grad_norm": 0.9014615416526794,
4265
+ "learning_rate": 9.959144225277863e-06,
4266
+ "loss": 0.0578,
4267
+ "step": 5580
4268
+ },
4269
+ {
4270
+ "epoch": 5.238987816307404,
4271
+ "grad_norm": 0.9144461750984192,
4272
+ "learning_rate": 9.959071007043596e-06,
4273
+ "loss": 0.0642,
4274
+ "step": 5590
4275
+ },
4276
+ {
4277
+ "epoch": 5.248359887535146,
4278
+ "grad_norm": 1.1306620836257935,
4279
+ "learning_rate": 9.958997788809326e-06,
4280
+ "loss": 0.0645,
4281
+ "step": 5600
4282
+ },
4283
+ {
4284
+ "epoch": 5.257731958762887,
4285
+ "grad_norm": 1.6353179216384888,
4286
+ "learning_rate": 9.958924570575057e-06,
4287
+ "loss": 0.0563,
4288
+ "step": 5610
4289
+ },
4290
+ {
4291
+ "epoch": 5.2671040299906275,
4292
+ "grad_norm": 1.0438508987426758,
4293
+ "learning_rate": 9.958851352340788e-06,
4294
+ "loss": 0.0554,
4295
+ "step": 5620
4296
+ },
4297
+ {
4298
+ "epoch": 5.276476101218369,
4299
+ "grad_norm": 1.0287367105484009,
4300
+ "learning_rate": 9.958778134106519e-06,
4301
+ "loss": 0.0586,
4302
+ "step": 5630
4303
+ },
4304
+ {
4305
+ "epoch": 5.28584817244611,
4306
+ "grad_norm": 1.0613245964050293,
4307
+ "learning_rate": 9.95870491587225e-06,
4308
+ "loss": 0.0634,
4309
+ "step": 5640
4310
+ },
4311
+ {
4312
+ "epoch": 5.295220243673852,
4313
+ "grad_norm": 1.489405632019043,
4314
+ "learning_rate": 9.95863169763798e-06,
4315
+ "loss": 0.0474,
4316
+ "step": 5650
4317
+ },
4318
+ {
4319
+ "epoch": 5.304592314901593,
4320
+ "grad_norm": 1.4497292041778564,
4321
+ "learning_rate": 9.95855847940371e-06,
4322
+ "loss": 0.056,
4323
+ "step": 5660
4324
+ },
4325
+ {
4326
+ "epoch": 5.313964386129334,
4327
+ "grad_norm": 1.2881600856781006,
4328
+ "learning_rate": 9.958485261169443e-06,
4329
+ "loss": 0.0561,
4330
+ "step": 5670
4331
+ },
4332
+ {
4333
+ "epoch": 5.323336457357076,
4334
+ "grad_norm": 1.4863743782043457,
4335
+ "learning_rate": 9.958412042935174e-06,
4336
+ "loss": 0.0562,
4337
+ "step": 5680
4338
+ },
4339
+ {
4340
+ "epoch": 5.332708528584817,
4341
+ "grad_norm": 1.325191855430603,
4342
+ "learning_rate": 9.958338824700903e-06,
4343
+ "loss": 0.0569,
4344
+ "step": 5690
4345
+ },
4346
+ {
4347
+ "epoch": 5.342080599812559,
4348
+ "grad_norm": 1.0650861263275146,
4349
+ "learning_rate": 9.958265606466636e-06,
4350
+ "loss": 0.0574,
4351
+ "step": 5700
4352
+ },
4353
+ {
4354
+ "epoch": 5.3514526710403,
4355
+ "grad_norm": 1.7255184650421143,
4356
+ "learning_rate": 9.958192388232366e-06,
4357
+ "loss": 0.055,
4358
+ "step": 5710
4359
+ },
4360
+ {
4361
+ "epoch": 5.360824742268041,
4362
+ "grad_norm": 0.8258642554283142,
4363
+ "learning_rate": 9.958119169998097e-06,
4364
+ "loss": 0.0509,
4365
+ "step": 5720
4366
+ },
4367
+ {
4368
+ "epoch": 5.370196813495783,
4369
+ "grad_norm": 1.2811216115951538,
4370
+ "learning_rate": 9.958045951763828e-06,
4371
+ "loss": 0.0585,
4372
+ "step": 5730
4373
+ },
4374
+ {
4375
+ "epoch": 5.379568884723524,
4376
+ "grad_norm": 1.2582824230194092,
4377
+ "learning_rate": 9.95797273352956e-06,
4378
+ "loss": 0.0589,
4379
+ "step": 5740
4380
+ },
4381
+ {
4382
+ "epoch": 5.3889409559512655,
4383
+ "grad_norm": 1.3511929512023926,
4384
+ "learning_rate": 9.95789951529529e-06,
4385
+ "loss": 0.0542,
4386
+ "step": 5750
4387
+ },
4388
+ {
4389
+ "epoch": 5.3889409559512655,
4390
+ "eval_loss": 0.03850702941417694,
4391
+ "eval_pearson_cosine": 0.7663590312004089,
4392
+ "eval_pearson_dot": 0.7060524225234985,
4393
+ "eval_pearson_euclidean": 0.7385671734809875,
4394
+ "eval_pearson_manhattan": 0.7399072647094727,
4395
+ "eval_runtime": 27.6896,
4396
+ "eval_samples_per_second": 54.172,
4397
+ "eval_spearman_cosine": 0.7668814587849042,
4398
+ "eval_spearman_dot": 0.706466499232552,
4399
+ "eval_spearman_euclidean": 0.744533534662993,
4400
+ "eval_spearman_manhattan": 0.7454034343244123,
4401
+ "eval_steps_per_second": 6.79,
4402
+ "step": 5750
4403
+ },
4404
+ {
4405
+ "epoch": 5.3983130271790065,
4406
+ "grad_norm": 1.3905717134475708,
4407
+ "learning_rate": 9.95782629706102e-06,
4408
+ "loss": 0.0583,
4409
+ "step": 5760
4410
+ },
4411
+ {
4412
+ "epoch": 5.4076850984067475,
4413
+ "grad_norm": 1.5047788619995117,
4414
+ "learning_rate": 9.957753078826752e-06,
4415
+ "loss": 0.0605,
4416
+ "step": 5770
4417
+ },
4418
+ {
4419
+ "epoch": 5.417057169634489,
4420
+ "grad_norm": 1.280427098274231,
4421
+ "learning_rate": 9.957679860592483e-06,
4422
+ "loss": 0.0584,
4423
+ "step": 5780
4424
+ },
4425
+ {
4426
+ "epoch": 5.42642924086223,
4427
+ "grad_norm": 1.3530281782150269,
4428
+ "learning_rate": 9.957606642358214e-06,
4429
+ "loss": 0.0591,
4430
+ "step": 5790
4431
+ },
4432
+ {
4433
+ "epoch": 5.435801312089972,
4434
+ "grad_norm": 1.0610909461975098,
4435
+ "learning_rate": 9.957533424123945e-06,
4436
+ "loss": 0.0546,
4437
+ "step": 5800
4438
+ },
4439
+ {
4440
+ "epoch": 5.445173383317713,
4441
+ "grad_norm": 0.9637224674224854,
4442
+ "learning_rate": 9.957460205889675e-06,
4443
+ "loss": 0.0641,
4444
+ "step": 5810
4445
+ },
4446
+ {
4447
+ "epoch": 5.454545454545454,
4448
+ "grad_norm": 1.3324577808380127,
4449
+ "learning_rate": 9.957386987655406e-06,
4450
+ "loss": 0.0599,
4451
+ "step": 5820
4452
+ },
4453
+ {
4454
+ "epoch": 5.463917525773196,
4455
+ "grad_norm": 0.9660161137580872,
4456
+ "learning_rate": 9.957313769421137e-06,
4457
+ "loss": 0.0591,
4458
+ "step": 5830
4459
+ },
4460
+ {
4461
+ "epoch": 5.473289597000937,
4462
+ "grad_norm": 1.128570556640625,
4463
+ "learning_rate": 9.95724055118687e-06,
4464
+ "loss": 0.0579,
4465
+ "step": 5840
4466
+ },
4467
+ {
4468
+ "epoch": 5.482661668228679,
4469
+ "grad_norm": 1.444172739982605,
4470
+ "learning_rate": 9.9571673329526e-06,
4471
+ "loss": 0.0636,
4472
+ "step": 5850
4473
+ },
4474
+ {
4475
+ "epoch": 5.49203373945642,
4476
+ "grad_norm": 1.3510165214538574,
4477
+ "learning_rate": 9.95709411471833e-06,
4478
+ "loss": 0.0631,
4479
+ "step": 5860
4480
+ },
4481
+ {
4482
+ "epoch": 5.501405810684162,
4483
+ "grad_norm": 1.0439740419387817,
4484
+ "learning_rate": 9.957020896484062e-06,
4485
+ "loss": 0.0635,
4486
+ "step": 5870
4487
+ },
4488
+ {
4489
+ "epoch": 5.510777881911903,
4490
+ "grad_norm": 1.15412175655365,
4491
+ "learning_rate": 9.956947678249792e-06,
4492
+ "loss": 0.0595,
4493
+ "step": 5880
4494
+ },
4495
+ {
4496
+ "epoch": 5.520149953139644,
4497
+ "grad_norm": 1.221147894859314,
4498
+ "learning_rate": 9.956874460015523e-06,
4499
+ "loss": 0.0552,
4500
+ "step": 5890
4501
+ },
4502
+ {
4503
+ "epoch": 5.5295220243673855,
4504
+ "grad_norm": 1.4210234880447388,
4505
+ "learning_rate": 9.956801241781254e-06,
4506
+ "loss": 0.0593,
4507
+ "step": 5900
4508
+ },
4509
+ {
4510
+ "epoch": 5.5388940955951265,
4511
+ "grad_norm": 1.1082103252410889,
4512
+ "learning_rate": 9.956728023546985e-06,
4513
+ "loss": 0.0535,
4514
+ "step": 5910
4515
+ },
4516
+ {
4517
+ "epoch": 5.548266166822868,
4518
+ "grad_norm": 0.8931286334991455,
4519
+ "learning_rate": 9.956654805312715e-06,
4520
+ "loss": 0.0556,
4521
+ "step": 5920
4522
+ },
4523
+ {
4524
+ "epoch": 5.557638238050609,
4525
+ "grad_norm": 1.5182912349700928,
4526
+ "learning_rate": 9.956581587078446e-06,
4527
+ "loss": 0.0583,
4528
+ "step": 5930
4529
+ },
4530
+ {
4531
+ "epoch": 5.56701030927835,
4532
+ "grad_norm": 1.2056432962417603,
4533
+ "learning_rate": 9.956508368844177e-06,
4534
+ "loss": 0.064,
4535
+ "step": 5940
4536
+ },
4537
+ {
4538
+ "epoch": 5.576382380506092,
4539
+ "grad_norm": 1.5039522647857666,
4540
+ "learning_rate": 9.95643515060991e-06,
4541
+ "loss": 0.0708,
4542
+ "step": 5950
4543
+ },
4544
+ {
4545
+ "epoch": 5.585754451733833,
4546
+ "grad_norm": 1.2651883363723755,
4547
+ "learning_rate": 9.95636193237564e-06,
4548
+ "loss": 0.0596,
4549
+ "step": 5960
4550
+ },
4551
+ {
4552
+ "epoch": 5.595126522961575,
4553
+ "grad_norm": 1.317690134048462,
4554
+ "learning_rate": 9.956288714141371e-06,
4555
+ "loss": 0.0713,
4556
+ "step": 5970
4557
+ },
4558
+ {
4559
+ "epoch": 5.604498594189316,
4560
+ "grad_norm": 0.9705867767333984,
4561
+ "learning_rate": 9.956215495907102e-06,
4562
+ "loss": 0.0699,
4563
+ "step": 5980
4564
+ },
4565
+ {
4566
+ "epoch": 5.613870665417057,
4567
+ "grad_norm": 1.4250271320343018,
4568
+ "learning_rate": 9.956142277672832e-06,
4569
+ "loss": 0.0595,
4570
+ "step": 5990
4571
+ },
4572
+ {
4573
+ "epoch": 5.623242736644799,
4574
+ "grad_norm": 1.0857118368148804,
4575
+ "learning_rate": 9.956069059438563e-06,
4576
+ "loss": 0.0555,
4577
+ "step": 6000
4578
+ },
4579
+ {
4580
+ "epoch": 5.623242736644799,
4581
+ "eval_loss": 0.03963544964790344,
4582
+ "eval_pearson_cosine": 0.7571043968200684,
4583
+ "eval_pearson_dot": 0.700376570224762,
4584
+ "eval_pearson_euclidean": 0.7279260158538818,
4585
+ "eval_pearson_manhattan": 0.729307234287262,
4586
+ "eval_runtime": 25.5449,
4587
+ "eval_samples_per_second": 58.72,
4588
+ "eval_spearman_cosine": 0.7579022153365402,
4589
+ "eval_spearman_dot": 0.6992710065203335,
4590
+ "eval_spearman_euclidean": 0.7330627821557505,
4591
+ "eval_spearman_manhattan": 0.7343750357819732,
4592
+ "eval_steps_per_second": 7.36,
4593
+ "step": 6000
4594
  }
4595
  ],
4596
  "logging_steps": 10,