|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.8062622309197651, |
|
"eval_steps": 5, |
|
"global_step": 412, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0019569471624266144, |
|
"grad_norm": 3.932948112487793, |
|
"learning_rate": 7.8125e-08, |
|
"loss": 0.107, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.003913894324853229, |
|
"grad_norm": 4.482716083526611, |
|
"learning_rate": 1.5625e-07, |
|
"loss": 0.1529, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.005870841487279843, |
|
"grad_norm": 4.672689437866211, |
|
"learning_rate": 2.3437500000000003e-07, |
|
"loss": 0.1874, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.007827788649706457, |
|
"grad_norm": 4.226949214935303, |
|
"learning_rate": 3.125e-07, |
|
"loss": 0.1682, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.009784735812133072, |
|
"grad_norm": 4.327479362487793, |
|
"learning_rate": 3.90625e-07, |
|
"loss": 0.1438, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.009784735812133072, |
|
"eval_loss": 0.1470455378293991, |
|
"eval_runtime": 107.3614, |
|
"eval_samples_per_second": 28.427, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8861388036460539, |
|
"eval_sts-test_pearson_dot": 0.8769528313548112, |
|
"eval_sts-test_pearson_euclidean": 0.9079831987750276, |
|
"eval_sts-test_pearson_manhattan": 0.9086786527495163, |
|
"eval_sts-test_pearson_max": 0.9086786527495163, |
|
"eval_sts-test_spearman_cosine": 0.9077902566323186, |
|
"eval_sts-test_spearman_dot": 0.8794770733264693, |
|
"eval_sts-test_spearman_euclidean": 0.903967335376697, |
|
"eval_sts-test_spearman_manhattan": 0.9043498244078092, |
|
"eval_sts-test_spearman_max": 0.9077902566323186, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.011741682974559686, |
|
"grad_norm": 5.27250337600708, |
|
"learning_rate": 4.6875000000000006e-07, |
|
"loss": 0.2961, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0136986301369863, |
|
"grad_norm": 5.903276443481445, |
|
"learning_rate": 5.468750000000001e-07, |
|
"loss": 0.3019, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.015655577299412915, |
|
"grad_norm": 4.000335693359375, |
|
"learning_rate": 6.25e-07, |
|
"loss": 0.1184, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01761252446183953, |
|
"grad_norm": 5.876769065856934, |
|
"learning_rate": 7.03125e-07, |
|
"loss": 0.3176, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.019569471624266144, |
|
"grad_norm": 4.8437933921813965, |
|
"learning_rate": 7.8125e-07, |
|
"loss": 0.2234, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.019569471624266144, |
|
"eval_loss": 0.1467687040567398, |
|
"eval_runtime": 107.2549, |
|
"eval_samples_per_second": 28.456, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8861409457129842, |
|
"eval_sts-test_pearson_dot": 0.876972814890145, |
|
"eval_sts-test_pearson_euclidean": 0.9080268416052204, |
|
"eval_sts-test_pearson_manhattan": 0.9087444298597203, |
|
"eval_sts-test_pearson_max": 0.9087444298597203, |
|
"eval_sts-test_spearman_cosine": 0.9078342918735278, |
|
"eval_sts-test_spearman_dot": 0.8794190309404447, |
|
"eval_sts-test_spearman_euclidean": 0.9039501508923226, |
|
"eval_sts-test_spearman_manhattan": 0.9044244247605487, |
|
"eval_sts-test_spearman_max": 0.9078342918735278, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.021526418786692758, |
|
"grad_norm": 4.726498603820801, |
|
"learning_rate": 8.59375e-07, |
|
"loss": 0.1881, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.023483365949119372, |
|
"grad_norm": 4.818070411682129, |
|
"learning_rate": 9.375000000000001e-07, |
|
"loss": 0.1593, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.025440313111545987, |
|
"grad_norm": 4.98201322555542, |
|
"learning_rate": 1.0156250000000001e-06, |
|
"loss": 0.1833, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0273972602739726, |
|
"grad_norm": 4.269514560699463, |
|
"learning_rate": 1.0937500000000001e-06, |
|
"loss": 0.1352, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.029354207436399216, |
|
"grad_norm": 6.1525492668151855, |
|
"learning_rate": 1.1718750000000001e-06, |
|
"loss": 0.3143, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.029354207436399216, |
|
"eval_loss": 0.1462097316980362, |
|
"eval_runtime": 107.0721, |
|
"eval_samples_per_second": 28.504, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8860829119688085, |
|
"eval_sts-test_pearson_dot": 0.8768990080043222, |
|
"eval_sts-test_pearson_euclidean": 0.9080646402781543, |
|
"eval_sts-test_pearson_manhattan": 0.9088063929836994, |
|
"eval_sts-test_pearson_max": 0.9088063929836994, |
|
"eval_sts-test_spearman_cosine": 0.907713597721555, |
|
"eval_sts-test_spearman_dot": 0.8795110842851269, |
|
"eval_sts-test_spearman_euclidean": 0.9040110126078148, |
|
"eval_sts-test_spearman_manhattan": 0.9045081991218733, |
|
"eval_sts-test_spearman_max": 0.907713597721555, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03131115459882583, |
|
"grad_norm": 4.751354694366455, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.1583, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.033268101761252444, |
|
"grad_norm": 5.435980319976807, |
|
"learning_rate": 1.328125e-06, |
|
"loss": 0.2015, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.03522504892367906, |
|
"grad_norm": 4.1765851974487305, |
|
"learning_rate": 1.40625e-06, |
|
"loss": 0.1476, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03718199608610567, |
|
"grad_norm": 4.689794540405273, |
|
"learning_rate": 1.484375e-06, |
|
"loss": 0.1676, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.03913894324853229, |
|
"grad_norm": 4.203744888305664, |
|
"learning_rate": 1.5625e-06, |
|
"loss": 0.1525, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03913894324853229, |
|
"eval_loss": 0.14544810354709625, |
|
"eval_runtime": 107.1845, |
|
"eval_samples_per_second": 28.474, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8861436293943533, |
|
"eval_sts-test_pearson_dot": 0.8769239163708102, |
|
"eval_sts-test_pearson_euclidean": 0.9082269545633608, |
|
"eval_sts-test_pearson_manhattan": 0.9089828403051001, |
|
"eval_sts-test_pearson_max": 0.9089828403051001, |
|
"eval_sts-test_spearman_cosine": 0.907929343552723, |
|
"eval_sts-test_spearman_dot": 0.8796122221358714, |
|
"eval_sts-test_spearman_euclidean": 0.9043074002120102, |
|
"eval_sts-test_spearman_manhattan": 0.9047217521412333, |
|
"eval_sts-test_spearman_max": 0.907929343552723, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0410958904109589, |
|
"grad_norm": 5.152130603790283, |
|
"learning_rate": 1.640625e-06, |
|
"loss": 0.1717, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.043052837573385516, |
|
"grad_norm": 5.343059062957764, |
|
"learning_rate": 1.71875e-06, |
|
"loss": 0.198, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04500978473581213, |
|
"grad_norm": 5.224748134613037, |
|
"learning_rate": 1.796875e-06, |
|
"loss": 0.3062, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.046966731898238745, |
|
"grad_norm": 4.6179423332214355, |
|
"learning_rate": 1.8750000000000003e-06, |
|
"loss": 0.1241, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.04892367906066536, |
|
"grad_norm": 4.200148105621338, |
|
"learning_rate": 1.953125e-06, |
|
"loss": 0.1087, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04892367906066536, |
|
"eval_loss": 0.14457188546657562, |
|
"eval_runtime": 107.3809, |
|
"eval_samples_per_second": 28.422, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8862905994058754, |
|
"eval_sts-test_pearson_dot": 0.877015249192232, |
|
"eval_sts-test_pearson_euclidean": 0.9085054742522269, |
|
"eval_sts-test_pearson_manhattan": 0.9092575877809899, |
|
"eval_sts-test_pearson_max": 0.9092575877809899, |
|
"eval_sts-test_spearman_cosine": 0.9082294902628751, |
|
"eval_sts-test_spearman_dot": 0.8798810429630494, |
|
"eval_sts-test_spearman_euclidean": 0.9047149499495015, |
|
"eval_sts-test_spearman_manhattan": 0.9051023616193669, |
|
"eval_sts-test_spearman_max": 0.9082294902628751, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.050880626223091974, |
|
"grad_norm": 4.890737533569336, |
|
"learning_rate": 2.0312500000000002e-06, |
|
"loss": 0.1767, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05283757338551859, |
|
"grad_norm": 4.683767795562744, |
|
"learning_rate": 2.109375e-06, |
|
"loss": 0.1951, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0547945205479452, |
|
"grad_norm": 4.656280040740967, |
|
"learning_rate": 2.1875000000000002e-06, |
|
"loss": 0.1621, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.05675146771037182, |
|
"grad_norm": 4.446409702301025, |
|
"learning_rate": 2.265625e-06, |
|
"loss": 0.221, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.05870841487279843, |
|
"grad_norm": 5.765133857727051, |
|
"learning_rate": 2.3437500000000002e-06, |
|
"loss": 0.2241, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05870841487279843, |
|
"eval_loss": 0.14350731670856476, |
|
"eval_runtime": 107.3747, |
|
"eval_samples_per_second": 28.424, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8863784941826807, |
|
"eval_sts-test_pearson_dot": 0.8768948467465629, |
|
"eval_sts-test_pearson_euclidean": 0.9088066170487232, |
|
"eval_sts-test_pearson_manhattan": 0.9095658568102677, |
|
"eval_sts-test_pearson_max": 0.9095658568102677, |
|
"eval_sts-test_spearman_cosine": 0.9082580415676429, |
|
"eval_sts-test_spearman_dot": 0.8801849487791585, |
|
"eval_sts-test_spearman_euclidean": 0.9051721735871375, |
|
"eval_sts-test_spearman_manhattan": 0.9054862826908437, |
|
"eval_sts-test_spearman_max": 0.9082580415676429, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.060665362035225046, |
|
"grad_norm": 5.359245777130127, |
|
"learning_rate": 2.421875e-06, |
|
"loss": 0.2093, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.06262230919765166, |
|
"grad_norm": 4.439486503601074, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1615, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06457925636007827, |
|
"grad_norm": 3.689824342727661, |
|
"learning_rate": 2.5781250000000004e-06, |
|
"loss": 0.1615, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06653620352250489, |
|
"grad_norm": 4.842885494232178, |
|
"learning_rate": 2.65625e-06, |
|
"loss": 0.1772, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0684931506849315, |
|
"grad_norm": 5.209301948547363, |
|
"learning_rate": 2.7343750000000004e-06, |
|
"loss": 0.2324, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0684931506849315, |
|
"eval_loss": 0.14226235449314117, |
|
"eval_runtime": 107.3108, |
|
"eval_samples_per_second": 28.441, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8863574366132135, |
|
"eval_sts-test_pearson_dot": 0.8765683077424664, |
|
"eval_sts-test_pearson_euclidean": 0.9091012263251723, |
|
"eval_sts-test_pearson_manhattan": 0.9098631032540263, |
|
"eval_sts-test_pearson_max": 0.9098631032540263, |
|
"eval_sts-test_spearman_cosine": 0.9083728733043733, |
|
"eval_sts-test_spearman_dot": 0.8800282746130272, |
|
"eval_sts-test_spearman_euclidean": 0.9052579170039636, |
|
"eval_sts-test_spearman_manhattan": 0.9059997586640487, |
|
"eval_sts-test_spearman_max": 0.9083728733043733, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07045009784735812, |
|
"grad_norm": 4.740983009338379, |
|
"learning_rate": 2.8125e-06, |
|
"loss": 0.2611, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07240704500978473, |
|
"grad_norm": 5.090059757232666, |
|
"learning_rate": 2.8906250000000004e-06, |
|
"loss": 0.214, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07436399217221135, |
|
"grad_norm": 5.123153209686279, |
|
"learning_rate": 2.96875e-06, |
|
"loss": 0.1985, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.07632093933463796, |
|
"grad_norm": 5.401946067810059, |
|
"learning_rate": 3.0468750000000004e-06, |
|
"loss": 0.1855, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.07827788649706457, |
|
"grad_norm": 4.838700294494629, |
|
"learning_rate": 3.125e-06, |
|
"loss": 0.1234, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07827788649706457, |
|
"eval_loss": 0.14100149273872375, |
|
"eval_runtime": 107.3059, |
|
"eval_samples_per_second": 28.442, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8864265749012155, |
|
"eval_sts-test_pearson_dot": 0.8764612424174422, |
|
"eval_sts-test_pearson_euclidean": 0.9094092487009695, |
|
"eval_sts-test_pearson_manhattan": 0.9101707626021143, |
|
"eval_sts-test_pearson_max": 0.9101707626021143, |
|
"eval_sts-test_spearman_cosine": 0.908505695048183, |
|
"eval_sts-test_spearman_dot": 0.8802103674956289, |
|
"eval_sts-test_spearman_euclidean": 0.9054564783507572, |
|
"eval_sts-test_spearman_manhattan": 0.9063046490079084, |
|
"eval_sts-test_spearman_max": 0.908505695048183, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08023483365949119, |
|
"grad_norm": 3.8856801986694336, |
|
"learning_rate": 3.2031250000000004e-06, |
|
"loss": 0.1492, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0821917808219178, |
|
"grad_norm": 5.678151607513428, |
|
"learning_rate": 3.28125e-06, |
|
"loss": 0.2022, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.08414872798434442, |
|
"grad_norm": 5.104148864746094, |
|
"learning_rate": 3.3593750000000003e-06, |
|
"loss": 0.2146, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.08610567514677103, |
|
"grad_norm": 4.76043701171875, |
|
"learning_rate": 3.4375e-06, |
|
"loss": 0.1688, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.08806262230919765, |
|
"grad_norm": 5.128803730010986, |
|
"learning_rate": 3.5156250000000003e-06, |
|
"loss": 0.175, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08806262230919765, |
|
"eval_loss": 0.13962982594966888, |
|
"eval_runtime": 107.4144, |
|
"eval_samples_per_second": 28.413, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.886410908658177, |
|
"eval_sts-test_pearson_dot": 0.8762836795862763, |
|
"eval_sts-test_pearson_euclidean": 0.9096890242379734, |
|
"eval_sts-test_pearson_manhattan": 0.9104590803642174, |
|
"eval_sts-test_pearson_max": 0.9104590803642174, |
|
"eval_sts-test_spearman_cosine": 0.9086694846648755, |
|
"eval_sts-test_spearman_dot": 0.8801346931126159, |
|
"eval_sts-test_spearman_euclidean": 0.9057376952773407, |
|
"eval_sts-test_spearman_manhattan": 0.9064708999439774, |
|
"eval_sts-test_spearman_max": 0.9086694846648755, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09001956947162426, |
|
"grad_norm": 4.968522548675537, |
|
"learning_rate": 3.59375e-06, |
|
"loss": 0.2123, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09197651663405088, |
|
"grad_norm": 4.343472957611084, |
|
"learning_rate": 3.6718750000000003e-06, |
|
"loss": 0.1118, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.09393346379647749, |
|
"grad_norm": 6.252938270568848, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.3009, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0958904109589041, |
|
"grad_norm": 3.411029815673828, |
|
"learning_rate": 3.828125000000001e-06, |
|
"loss": 0.1071, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.09784735812133072, |
|
"grad_norm": 5.379226207733154, |
|
"learning_rate": 3.90625e-06, |
|
"loss": 0.2608, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09784735812133072, |
|
"eval_loss": 0.13823722302913666, |
|
"eval_runtime": 107.3656, |
|
"eval_samples_per_second": 28.426, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8863074884351817, |
|
"eval_sts-test_pearson_dot": 0.8763122134205692, |
|
"eval_sts-test_pearson_euclidean": 0.9097700018848961, |
|
"eval_sts-test_pearson_manhattan": 0.9105724410858811, |
|
"eval_sts-test_pearson_max": 0.9105724410858811, |
|
"eval_sts-test_spearman_cosine": 0.9085105281844131, |
|
"eval_sts-test_spearman_dot": 0.8801239975611433, |
|
"eval_sts-test_spearman_euclidean": 0.9059798443527296, |
|
"eval_sts-test_spearman_manhattan": 0.9065691737139927, |
|
"eval_sts-test_spearman_max": 0.9085105281844131, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09980430528375733, |
|
"grad_norm": 4.599095821380615, |
|
"learning_rate": 3.984375e-06, |
|
"loss": 0.1368, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.10176125244618395, |
|
"grad_norm": 5.634761333465576, |
|
"learning_rate": 4.0625000000000005e-06, |
|
"loss": 0.2307, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.10371819960861056, |
|
"grad_norm": 4.678525924682617, |
|
"learning_rate": 4.140625000000001e-06, |
|
"loss": 0.1366, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.10567514677103718, |
|
"grad_norm": 4.931070327758789, |
|
"learning_rate": 4.21875e-06, |
|
"loss": 0.1857, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.10763209393346379, |
|
"grad_norm": 4.903087139129639, |
|
"learning_rate": 4.296875e-06, |
|
"loss": 0.2155, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.10763209393346379, |
|
"eval_loss": 0.1367325782775879, |
|
"eval_runtime": 107.3012, |
|
"eval_samples_per_second": 28.443, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.88603017002284, |
|
"eval_sts-test_pearson_dot": 0.8761626193697236, |
|
"eval_sts-test_pearson_euclidean": 0.9096799681812165, |
|
"eval_sts-test_pearson_manhattan": 0.9104977957475867, |
|
"eval_sts-test_pearson_max": 0.9104977957475867, |
|
"eval_sts-test_spearman_cosine": 0.9084685067499666, |
|
"eval_sts-test_spearman_dot": 0.8802836700617878, |
|
"eval_sts-test_spearman_euclidean": 0.9058409364373706, |
|
"eval_sts-test_spearman_manhattan": 0.9064240006220393, |
|
"eval_sts-test_spearman_max": 0.9084685067499666, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1095890410958904, |
|
"grad_norm": 5.408311367034912, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 0.2022, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.11154598825831702, |
|
"grad_norm": 4.5926713943481445, |
|
"learning_rate": 4.453125000000001e-06, |
|
"loss": 0.2076, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.11350293542074363, |
|
"grad_norm": 6.475535869598389, |
|
"learning_rate": 4.53125e-06, |
|
"loss": 0.4133, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.11545988258317025, |
|
"grad_norm": 4.997581481933594, |
|
"learning_rate": 4.609375e-06, |
|
"loss": 0.1823, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.11741682974559686, |
|
"grad_norm": 3.899284601211548, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"loss": 0.1136, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11741682974559686, |
|
"eval_loss": 0.13528631627559662, |
|
"eval_runtime": 107.3435, |
|
"eval_samples_per_second": 28.432, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8860224650016398, |
|
"eval_sts-test_pearson_dot": 0.8762739756970772, |
|
"eval_sts-test_pearson_euclidean": 0.9099016820022997, |
|
"eval_sts-test_pearson_manhattan": 0.9107281338135995, |
|
"eval_sts-test_pearson_max": 0.9107281338135995, |
|
"eval_sts-test_spearman_cosine": 0.9087510214631306, |
|
"eval_sts-test_spearman_dot": 0.8808623486228402, |
|
"eval_sts-test_spearman_euclidean": 0.9060555634870038, |
|
"eval_sts-test_spearman_manhattan": 0.9067256241238172, |
|
"eval_sts-test_spearman_max": 0.9087510214631306, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.11937377690802348, |
|
"grad_norm": 4.476404190063477, |
|
"learning_rate": 4.765625000000001e-06, |
|
"loss": 0.1687, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.12133072407045009, |
|
"grad_norm": 4.893277168273926, |
|
"learning_rate": 4.84375e-06, |
|
"loss": 0.1591, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1232876712328767, |
|
"grad_norm": 4.510354042053223, |
|
"learning_rate": 4.921875e-06, |
|
"loss": 0.1653, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.12524461839530332, |
|
"grad_norm": 4.400285243988037, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1799, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.12720156555772993, |
|
"grad_norm": 4.631839752197266, |
|
"learning_rate": 5.078125000000001e-06, |
|
"loss": 0.1578, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.12720156555772993, |
|
"eval_loss": 0.1336735188961029, |
|
"eval_runtime": 107.4984, |
|
"eval_samples_per_second": 28.391, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.886014179849858, |
|
"eval_sts-test_pearson_dot": 0.8762492282837839, |
|
"eval_sts-test_pearson_euclidean": 0.9101155794045166, |
|
"eval_sts-test_pearson_manhattan": 0.9109538919103571, |
|
"eval_sts-test_pearson_max": 0.9109538919103571, |
|
"eval_sts-test_spearman_cosine": 0.9089514176116413, |
|
"eval_sts-test_spearman_dot": 0.8810853441583534, |
|
"eval_sts-test_spearman_euclidean": 0.9061670836303911, |
|
"eval_sts-test_spearman_manhattan": 0.9072153371772234, |
|
"eval_sts-test_spearman_max": 0.9089514176116413, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.12915851272015655, |
|
"grad_norm": 4.043459415435791, |
|
"learning_rate": 5.156250000000001e-06, |
|
"loss": 0.1844, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.13111545988258316, |
|
"grad_norm": 4.447835922241211, |
|
"learning_rate": 5.234375e-06, |
|
"loss": 0.1489, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.13307240704500978, |
|
"grad_norm": 5.372109889984131, |
|
"learning_rate": 5.3125e-06, |
|
"loss": 0.1845, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1350293542074364, |
|
"grad_norm": 3.5112483501434326, |
|
"learning_rate": 5.390625000000001e-06, |
|
"loss": 0.1364, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.136986301369863, |
|
"grad_norm": 4.305239200592041, |
|
"learning_rate": 5.468750000000001e-06, |
|
"loss": 0.1584, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.136986301369863, |
|
"eval_loss": 0.1320798397064209, |
|
"eval_runtime": 107.505, |
|
"eval_samples_per_second": 28.389, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.88578311613969, |
|
"eval_sts-test_pearson_dot": 0.875928774505713, |
|
"eval_sts-test_pearson_euclidean": 0.91024619729973, |
|
"eval_sts-test_pearson_manhattan": 0.9110959495329505, |
|
"eval_sts-test_pearson_max": 0.9110959495329505, |
|
"eval_sts-test_spearman_cosine": 0.9086066538938818, |
|
"eval_sts-test_spearman_dot": 0.8801235500485294, |
|
"eval_sts-test_spearman_euclidean": 0.9060052183179386, |
|
"eval_sts-test_spearman_manhattan": 0.907439182986703, |
|
"eval_sts-test_spearman_max": 0.9086066538938818, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.13894324853228962, |
|
"grad_norm": 5.093306064605713, |
|
"learning_rate": 5.546875e-06, |
|
"loss": 0.2279, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.14090019569471623, |
|
"grad_norm": 4.953585147857666, |
|
"learning_rate": 5.625e-06, |
|
"loss": 0.2028, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 4.1561102867126465, |
|
"learning_rate": 5.7031250000000006e-06, |
|
"loss": 0.2291, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.14481409001956946, |
|
"grad_norm": 5.00941801071167, |
|
"learning_rate": 5.781250000000001e-06, |
|
"loss": 0.2419, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.14677103718199608, |
|
"grad_norm": 3.6476099491119385, |
|
"learning_rate": 5.859375e-06, |
|
"loss": 0.1329, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.14677103718199608, |
|
"eval_loss": 0.13061992824077606, |
|
"eval_runtime": 107.3395, |
|
"eval_samples_per_second": 28.433, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8854112983780439, |
|
"eval_sts-test_pearson_dot": 0.8752625071185561, |
|
"eval_sts-test_pearson_euclidean": 0.9103378320010516, |
|
"eval_sts-test_pearson_manhattan": 0.9112261622276095, |
|
"eval_sts-test_pearson_max": 0.9112261622276095, |
|
"eval_sts-test_spearman_cosine": 0.9082604133844965, |
|
"eval_sts-test_spearman_dot": 0.8794192099454903, |
|
"eval_sts-test_spearman_euclidean": 0.9060063370994732, |
|
"eval_sts-test_spearman_manhattan": 0.90766132824825, |
|
"eval_sts-test_spearman_max": 0.9082604133844965, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1487279843444227, |
|
"grad_norm": 4.10636568069458, |
|
"learning_rate": 5.9375e-06, |
|
"loss": 0.204, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.1506849315068493, |
|
"grad_norm": 4.767779350280762, |
|
"learning_rate": 6.0156250000000005e-06, |
|
"loss": 0.2239, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.15264187866927592, |
|
"grad_norm": 5.366302490234375, |
|
"learning_rate": 6.093750000000001e-06, |
|
"loss": 0.2181, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.15459882583170254, |
|
"grad_norm": 4.087960720062256, |
|
"learning_rate": 6.171875e-06, |
|
"loss": 0.1285, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.15655577299412915, |
|
"grad_norm": 3.7557668685913086, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.1067, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15655577299412915, |
|
"eval_loss": 0.12924787402153015, |
|
"eval_runtime": 107.2528, |
|
"eval_samples_per_second": 28.456, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8850894038300653, |
|
"eval_sts-test_pearson_dot": 0.874941916465686, |
|
"eval_sts-test_pearson_euclidean": 0.9101863990952803, |
|
"eval_sts-test_pearson_manhattan": 0.9110826056950171, |
|
"eval_sts-test_pearson_max": 0.9110826056950171, |
|
"eval_sts-test_spearman_cosine": 0.9078700928826409, |
|
"eval_sts-test_spearman_dot": 0.8792947566875607, |
|
"eval_sts-test_spearman_euclidean": 0.9059290069197888, |
|
"eval_sts-test_spearman_manhattan": 0.9075206750336968, |
|
"eval_sts-test_spearman_max": 0.9078700928826409, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.15851272015655576, |
|
"grad_norm": 3.5708839893341064, |
|
"learning_rate": 6.3281250000000005e-06, |
|
"loss": 0.1189, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.16046966731898238, |
|
"grad_norm": 4.602839469909668, |
|
"learning_rate": 6.406250000000001e-06, |
|
"loss": 0.236, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.162426614481409, |
|
"grad_norm": 4.304513931274414, |
|
"learning_rate": 6.484375000000001e-06, |
|
"loss": 0.1584, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.1643835616438356, |
|
"grad_norm": 4.165163516998291, |
|
"learning_rate": 6.5625e-06, |
|
"loss": 0.1925, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.16634050880626222, |
|
"grad_norm": 3.9157192707061768, |
|
"learning_rate": 6.6406250000000005e-06, |
|
"loss": 0.129, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.16634050880626222, |
|
"eval_loss": 0.1278335303068161, |
|
"eval_runtime": 107.1978, |
|
"eval_samples_per_second": 28.471, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8845993101894516, |
|
"eval_sts-test_pearson_dot": 0.8740701762146532, |
|
"eval_sts-test_pearson_euclidean": 0.9100055922999684, |
|
"eval_sts-test_pearson_manhattan": 0.9108899080028133, |
|
"eval_sts-test_pearson_max": 0.9108899080028133, |
|
"eval_sts-test_spearman_cosine": 0.9078923342595523, |
|
"eval_sts-test_spearman_dot": 0.8788126513485913, |
|
"eval_sts-test_spearman_euclidean": 0.9057257466905491, |
|
"eval_sts-test_spearman_manhattan": 0.9070083178420268, |
|
"eval_sts-test_spearman_max": 0.9078923342595523, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.16829745596868884, |
|
"grad_norm": 4.233823776245117, |
|
"learning_rate": 6.718750000000001e-06, |
|
"loss": 0.1376, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.17025440313111545, |
|
"grad_norm": 4.670790195465088, |
|
"learning_rate": 6.796875000000001e-06, |
|
"loss": 0.1691, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.17221135029354206, |
|
"grad_norm": 3.742030382156372, |
|
"learning_rate": 6.875e-06, |
|
"loss": 0.1045, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.17416829745596868, |
|
"grad_norm": 4.242702960968018, |
|
"learning_rate": 6.9531250000000004e-06, |
|
"loss": 0.165, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.1761252446183953, |
|
"grad_norm": 5.499476909637451, |
|
"learning_rate": 7.031250000000001e-06, |
|
"loss": 0.2926, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1761252446183953, |
|
"eval_loss": 0.12669824063777924, |
|
"eval_runtime": 107.2778, |
|
"eval_samples_per_second": 28.45, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8844194771150324, |
|
"eval_sts-test_pearson_dot": 0.873458365713796, |
|
"eval_sts-test_pearson_euclidean": 0.9099396625521212, |
|
"eval_sts-test_pearson_manhattan": 0.910745898918033, |
|
"eval_sts-test_pearson_max": 0.910745898918033, |
|
"eval_sts-test_spearman_cosine": 0.907622707909669, |
|
"eval_sts-test_spearman_dot": 0.8783740442356941, |
|
"eval_sts-test_spearman_euclidean": 0.9058808545625318, |
|
"eval_sts-test_spearman_manhattan": 0.906889458491771, |
|
"eval_sts-test_spearman_max": 0.907622707909669, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1780821917808219, |
|
"grad_norm": 2.992021083831787, |
|
"learning_rate": 7.109375000000001e-06, |
|
"loss": 0.1048, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.18003913894324852, |
|
"grad_norm": 4.298286437988281, |
|
"learning_rate": 7.1875e-06, |
|
"loss": 0.1596, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.18199608610567514, |
|
"grad_norm": 5.210509300231934, |
|
"learning_rate": 7.265625e-06, |
|
"loss": 0.2474, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.18395303326810175, |
|
"grad_norm": 4.527407169342041, |
|
"learning_rate": 7.343750000000001e-06, |
|
"loss": 0.1652, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.18590998043052837, |
|
"grad_norm": 5.302050590515137, |
|
"learning_rate": 7.421875000000001e-06, |
|
"loss": 0.2483, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.18590998043052837, |
|
"eval_loss": 0.1252526491880417, |
|
"eval_runtime": 107.5519, |
|
"eval_samples_per_second": 28.377, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.884272350180128, |
|
"eval_sts-test_pearson_dot": 0.8727334938335432, |
|
"eval_sts-test_pearson_euclidean": 0.9099441972021025, |
|
"eval_sts-test_pearson_manhattan": 0.9106991509833859, |
|
"eval_sts-test_pearson_max": 0.9106991509833859, |
|
"eval_sts-test_spearman_cosine": 0.9075948278738224, |
|
"eval_sts-test_spearman_dot": 0.87780624023116, |
|
"eval_sts-test_spearman_euclidean": 0.9060086194138042, |
|
"eval_sts-test_spearman_manhattan": 0.9069788267607697, |
|
"eval_sts-test_spearman_max": 0.9075948278738224, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.18786692759295498, |
|
"grad_norm": 3.690441608428955, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.1623, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.1898238747553816, |
|
"grad_norm": 4.585984706878662, |
|
"learning_rate": 7.578125e-06, |
|
"loss": 0.1955, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.1917808219178082, |
|
"grad_norm": 4.493942737579346, |
|
"learning_rate": 7.656250000000001e-06, |
|
"loss": 0.2023, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.19373776908023482, |
|
"grad_norm": 4.569936275482178, |
|
"learning_rate": 7.734375e-06, |
|
"loss": 0.1886, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.19569471624266144, |
|
"grad_norm": 3.7703664302825928, |
|
"learning_rate": 7.8125e-06, |
|
"loss": 0.1284, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19569471624266144, |
|
"eval_loss": 0.12290485948324203, |
|
"eval_runtime": 107.6958, |
|
"eval_samples_per_second": 28.339, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8836376979322419, |
|
"eval_sts-test_pearson_dot": 0.8710695777275684, |
|
"eval_sts-test_pearson_euclidean": 0.9098265834859519, |
|
"eval_sts-test_pearson_manhattan": 0.9106248996071287, |
|
"eval_sts-test_pearson_max": 0.9106248996071287, |
|
"eval_sts-test_spearman_cosine": 0.9078868298544011, |
|
"eval_sts-test_spearman_dot": 0.8773200625274038, |
|
"eval_sts-test_spearman_euclidean": 0.9063156130669492, |
|
"eval_sts-test_spearman_manhattan": 0.9071474495136926, |
|
"eval_sts-test_spearman_max": 0.9078868298544011, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19765166340508805, |
|
"grad_norm": 4.356619358062744, |
|
"learning_rate": 7.890625e-06, |
|
"loss": 0.2005, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.19960861056751467, |
|
"grad_norm": 4.293449878692627, |
|
"learning_rate": 7.96875e-06, |
|
"loss": 0.2301, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.20156555772994128, |
|
"grad_norm": 4.654509544372559, |
|
"learning_rate": 8.046875e-06, |
|
"loss": 0.2249, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2035225048923679, |
|
"grad_norm": 4.510340213775635, |
|
"learning_rate": 8.125000000000001e-06, |
|
"loss": 0.214, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2054794520547945, |
|
"grad_norm": 3.880908489227295, |
|
"learning_rate": 8.203125000000001e-06, |
|
"loss": 0.1429, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2054794520547945, |
|
"eval_loss": 0.12076468020677567, |
|
"eval_runtime": 107.7074, |
|
"eval_samples_per_second": 28.336, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8828542959864998, |
|
"eval_sts-test_pearson_dot": 0.8689355363147886, |
|
"eval_sts-test_pearson_euclidean": 0.9096459762354197, |
|
"eval_sts-test_pearson_manhattan": 0.9104979967855148, |
|
"eval_sts-test_pearson_max": 0.9104979967855148, |
|
"eval_sts-test_spearman_cosine": 0.9076751563880199, |
|
"eval_sts-test_spearman_dot": 0.8750991469270715, |
|
"eval_sts-test_spearman_euclidean": 0.906379383614432, |
|
"eval_sts-test_spearman_manhattan": 0.9071111562407043, |
|
"eval_sts-test_spearman_max": 0.9076751563880199, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.20743639921722112, |
|
"grad_norm": 3.8524463176727295, |
|
"learning_rate": 8.281250000000001e-06, |
|
"loss": 0.17, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.20939334637964774, |
|
"grad_norm": 4.660905838012695, |
|
"learning_rate": 8.359375e-06, |
|
"loss": 0.1955, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.21135029354207435, |
|
"grad_norm": 4.391407012939453, |
|
"learning_rate": 8.4375e-06, |
|
"loss": 0.1964, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.21330724070450097, |
|
"grad_norm": 3.908740758895874, |
|
"learning_rate": 8.515625e-06, |
|
"loss": 0.1246, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.21526418786692758, |
|
"grad_norm": 3.295600414276123, |
|
"learning_rate": 8.59375e-06, |
|
"loss": 0.1295, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.21526418786692758, |
|
"eval_loss": 0.11901199817657471, |
|
"eval_runtime": 107.5373, |
|
"eval_samples_per_second": 28.381, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8820675142963768, |
|
"eval_sts-test_pearson_dot": 0.8664913359514981, |
|
"eval_sts-test_pearson_euclidean": 0.9093761405951237, |
|
"eval_sts-test_pearson_manhattan": 0.910248319457324, |
|
"eval_sts-test_pearson_max": 0.910248319457324, |
|
"eval_sts-test_spearman_cosine": 0.9071699146469111, |
|
"eval_sts-test_spearman_dot": 0.8726812810253556, |
|
"eval_sts-test_spearman_euclidean": 0.9064896954737618, |
|
"eval_sts-test_spearman_manhattan": 0.9068174537121922, |
|
"eval_sts-test_spearman_max": 0.9071699146469111, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2172211350293542, |
|
"grad_norm": 5.0308518409729, |
|
"learning_rate": 8.671875e-06, |
|
"loss": 0.2203, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2191780821917808, |
|
"grad_norm": 4.501624584197998, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 0.2195, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.22113502935420742, |
|
"grad_norm": 4.200097560882568, |
|
"learning_rate": 8.828125000000001e-06, |
|
"loss": 0.1823, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.22309197651663404, |
|
"grad_norm": 3.6750545501708984, |
|
"learning_rate": 8.906250000000001e-06, |
|
"loss": 0.174, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.22504892367906065, |
|
"grad_norm": 4.105295181274414, |
|
"learning_rate": 8.984375000000002e-06, |
|
"loss": 0.207, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.22504892367906065, |
|
"eval_loss": 0.11745984107255936, |
|
"eval_runtime": 107.5979, |
|
"eval_samples_per_second": 28.365, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.882042560326929, |
|
"eval_sts-test_pearson_dot": 0.8653067979173212, |
|
"eval_sts-test_pearson_euclidean": 0.9095832495385563, |
|
"eval_sts-test_pearson_manhattan": 0.9103602950988618, |
|
"eval_sts-test_pearson_max": 0.9103602950988618, |
|
"eval_sts-test_spearman_cosine": 0.9068824772949942, |
|
"eval_sts-test_spearman_dot": 0.8714208617482668, |
|
"eval_sts-test_spearman_euclidean": 0.906395180809703, |
|
"eval_sts-test_spearman_manhattan": 0.9068741088091138, |
|
"eval_sts-test_spearman_max": 0.9068824772949942, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.22700587084148727, |
|
"grad_norm": 4.654273509979248, |
|
"learning_rate": 9.0625e-06, |
|
"loss": 0.2156, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.22896281800391388, |
|
"grad_norm": 4.661588191986084, |
|
"learning_rate": 9.140625e-06, |
|
"loss": 0.2202, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2309197651663405, |
|
"grad_norm": 5.366416931152344, |
|
"learning_rate": 9.21875e-06, |
|
"loss": 0.2718, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.2328767123287671, |
|
"grad_norm": 3.672802448272705, |
|
"learning_rate": 9.296875e-06, |
|
"loss": 0.1387, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.23483365949119372, |
|
"grad_norm": 3.7878501415252686, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 0.1506, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.23483365949119372, |
|
"eval_loss": 0.11679373681545258, |
|
"eval_runtime": 107.6687, |
|
"eval_samples_per_second": 28.346, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.882107468031623, |
|
"eval_sts-test_pearson_dot": 0.8647556765462645, |
|
"eval_sts-test_pearson_euclidean": 0.9099443435071429, |
|
"eval_sts-test_pearson_manhattan": 0.9105934104125866, |
|
"eval_sts-test_pearson_max": 0.9105934104125866, |
|
"eval_sts-test_spearman_cosine": 0.9068624287298908, |
|
"eval_sts-test_spearman_dot": 0.8710628964083971, |
|
"eval_sts-test_spearman_euclidean": 0.906624531024334, |
|
"eval_sts-test_spearman_manhattan": 0.9069254385059298, |
|
"eval_sts-test_spearman_max": 0.9069254385059298, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.23679060665362034, |
|
"grad_norm": 3.4761197566986084, |
|
"learning_rate": 9.453125000000001e-06, |
|
"loss": 0.1185, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.23874755381604695, |
|
"grad_norm": 3.9917871952056885, |
|
"learning_rate": 9.531250000000001e-06, |
|
"loss": 0.1681, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.24070450097847357, |
|
"grad_norm": 4.491674423217773, |
|
"learning_rate": 9.609375000000001e-06, |
|
"loss": 0.2321, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.24266144814090018, |
|
"grad_norm": 3.903496503829956, |
|
"learning_rate": 9.6875e-06, |
|
"loss": 0.1457, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2446183953033268, |
|
"grad_norm": 5.046339988708496, |
|
"learning_rate": 9.765625e-06, |
|
"loss": 0.2027, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2446183953033268, |
|
"eval_loss": 0.11647585779428482, |
|
"eval_runtime": 107.5396, |
|
"eval_samples_per_second": 28.38, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8824938293263067, |
|
"eval_sts-test_pearson_dot": 0.8653100788410637, |
|
"eval_sts-test_pearson_euclidean": 0.9104636052712812, |
|
"eval_sts-test_pearson_manhattan": 0.9109341151161342, |
|
"eval_sts-test_pearson_max": 0.9109341151161342, |
|
"eval_sts-test_spearman_cosine": 0.9070702535877924, |
|
"eval_sts-test_spearman_dot": 0.8716920543922986, |
|
"eval_sts-test_spearman_euclidean": 0.9070027239343528, |
|
"eval_sts-test_spearman_manhattan": 0.9073061822378479, |
|
"eval_sts-test_spearman_max": 0.9073061822378479, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2465753424657534, |
|
"grad_norm": 4.304446697235107, |
|
"learning_rate": 9.84375e-06, |
|
"loss": 0.1821, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.24853228962818003, |
|
"grad_norm": 3.208357810974121, |
|
"learning_rate": 9.921875e-06, |
|
"loss": 0.1258, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.25048923679060664, |
|
"grad_norm": 4.275379657745361, |
|
"learning_rate": 1e-05, |
|
"loss": 0.184, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.25244618395303325, |
|
"grad_norm": 4.408608436584473, |
|
"learning_rate": 1.0078125000000001e-05, |
|
"loss": 0.2015, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.25440313111545987, |
|
"grad_norm": 3.565253973007202, |
|
"learning_rate": 1.0156250000000001e-05, |
|
"loss": 0.1323, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.25440313111545987, |
|
"eval_loss": 0.1154385656118393, |
|
"eval_runtime": 107.5442, |
|
"eval_samples_per_second": 28.379, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8820850631122565, |
|
"eval_sts-test_pearson_dot": 0.8648589750662984, |
|
"eval_sts-test_pearson_euclidean": 0.9105884442785888, |
|
"eval_sts-test_pearson_manhattan": 0.9109040210291837, |
|
"eval_sts-test_pearson_max": 0.9109040210291837, |
|
"eval_sts-test_spearman_cosine": 0.9074317095260507, |
|
"eval_sts-test_spearman_dot": 0.8710452196601474, |
|
"eval_sts-test_spearman_euclidean": 0.9070635408985837, |
|
"eval_sts-test_spearman_manhattan": 0.9074422260724778, |
|
"eval_sts-test_spearman_max": 0.9074422260724778, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2563600782778865, |
|
"grad_norm": 4.261953353881836, |
|
"learning_rate": 1.0234375000000001e-05, |
|
"loss": 0.1939, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.2583170254403131, |
|
"grad_norm": 3.806480646133423, |
|
"learning_rate": 1.0312500000000002e-05, |
|
"loss": 0.1428, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.2602739726027397, |
|
"grad_norm": 2.824733257293701, |
|
"learning_rate": 1.0390625e-05, |
|
"loss": 0.1063, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.2622309197651663, |
|
"grad_norm": 4.076455116271973, |
|
"learning_rate": 1.046875e-05, |
|
"loss": 0.1602, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.26418786692759294, |
|
"grad_norm": 3.7571659088134766, |
|
"learning_rate": 1.0546875e-05, |
|
"loss": 0.1814, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.26418786692759294, |
|
"eval_loss": 0.11387230455875397, |
|
"eval_runtime": 107.5968, |
|
"eval_samples_per_second": 28.365, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8812889193869892, |
|
"eval_sts-test_pearson_dot": 0.8634898982579755, |
|
"eval_sts-test_pearson_euclidean": 0.9104977472627025, |
|
"eval_sts-test_pearson_manhattan": 0.9107178140804983, |
|
"eval_sts-test_pearson_max": 0.9107178140804983, |
|
"eval_sts-test_spearman_cosine": 0.9066986391131981, |
|
"eval_sts-test_spearman_dot": 0.870129116588204, |
|
"eval_sts-test_spearman_euclidean": 0.9070359293703052, |
|
"eval_sts-test_spearman_manhattan": 0.9073414909830857, |
|
"eval_sts-test_spearman_max": 0.9073414909830857, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.26614481409001955, |
|
"grad_norm": 3.864948034286499, |
|
"learning_rate": 1.0625e-05, |
|
"loss": 0.1518, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.26810176125244617, |
|
"grad_norm": 3.5900001525878906, |
|
"learning_rate": 1.0703125000000001e-05, |
|
"loss": 0.1379, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2700587084148728, |
|
"grad_norm": 4.291954517364502, |
|
"learning_rate": 1.0781250000000001e-05, |
|
"loss": 0.1708, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.2720156555772994, |
|
"grad_norm": 3.8340342044830322, |
|
"learning_rate": 1.0859375000000001e-05, |
|
"loss": 0.2046, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.273972602739726, |
|
"grad_norm": 3.749396562576294, |
|
"learning_rate": 1.0937500000000002e-05, |
|
"loss": 0.1259, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.273972602739726, |
|
"eval_loss": 0.1124362125992775, |
|
"eval_runtime": 107.5142, |
|
"eval_samples_per_second": 28.387, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8805714116282963, |
|
"eval_sts-test_pearson_dot": 0.8618911680351633, |
|
"eval_sts-test_pearson_euclidean": 0.9102979980912764, |
|
"eval_sts-test_pearson_manhattan": 0.9105232760600299, |
|
"eval_sts-test_pearson_max": 0.9105232760600299, |
|
"eval_sts-test_spearman_cosine": 0.9063180743863257, |
|
"eval_sts-test_spearman_dot": 0.8687826406354595, |
|
"eval_sts-test_spearman_euclidean": 0.9070556199253175, |
|
"eval_sts-test_spearman_manhattan": 0.9073570196707885, |
|
"eval_sts-test_spearman_max": 0.9073570196707885, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2759295499021526, |
|
"grad_norm": 2.8815276622772217, |
|
"learning_rate": 1.1015625e-05, |
|
"loss": 0.1181, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.27788649706457924, |
|
"grad_norm": 3.766554355621338, |
|
"learning_rate": 1.109375e-05, |
|
"loss": 0.2144, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.27984344422700586, |
|
"grad_norm": 4.289268493652344, |
|
"learning_rate": 1.1171875e-05, |
|
"loss": 0.1822, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.28180039138943247, |
|
"grad_norm": 3.9036617279052734, |
|
"learning_rate": 1.125e-05, |
|
"loss": 0.1667, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2837573385518591, |
|
"grad_norm": 3.321366786956787, |
|
"learning_rate": 1.1328125000000001e-05, |
|
"loss": 0.0779, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2837573385518591, |
|
"eval_loss": 0.1118142157793045, |
|
"eval_runtime": 107.3173, |
|
"eval_samples_per_second": 28.439, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8796044904115364, |
|
"eval_sts-test_pearson_dot": 0.8607678603166254, |
|
"eval_sts-test_pearson_euclidean": 0.9097479995877322, |
|
"eval_sts-test_pearson_manhattan": 0.9098650580518599, |
|
"eval_sts-test_pearson_max": 0.9098650580518599, |
|
"eval_sts-test_spearman_cosine": 0.9059690592987342, |
|
"eval_sts-test_spearman_dot": 0.8685229490656053, |
|
"eval_sts-test_spearman_euclidean": 0.90680836920613, |
|
"eval_sts-test_spearman_manhattan": 0.9069437865231001, |
|
"eval_sts-test_spearman_max": 0.9069437865231001, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 3.460301160812378, |
|
"learning_rate": 1.1406250000000001e-05, |
|
"loss": 0.147, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.2876712328767123, |
|
"grad_norm": 3.8999266624450684, |
|
"learning_rate": 1.1484375000000001e-05, |
|
"loss": 0.1913, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.2896281800391389, |
|
"grad_norm": 3.539788007736206, |
|
"learning_rate": 1.1562500000000002e-05, |
|
"loss": 0.1357, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.29158512720156554, |
|
"grad_norm": 3.499439001083374, |
|
"learning_rate": 1.1640625000000002e-05, |
|
"loss": 0.1128, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.29354207436399216, |
|
"grad_norm": 3.2960240840911865, |
|
"learning_rate": 1.171875e-05, |
|
"loss": 0.0996, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.29354207436399216, |
|
"eval_loss": 0.11132737249135971, |
|
"eval_runtime": 107.5867, |
|
"eval_samples_per_second": 28.368, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8787852416493207, |
|
"eval_sts-test_pearson_dot": 0.8593025559452621, |
|
"eval_sts-test_pearson_euclidean": 0.9091617970047303, |
|
"eval_sts-test_pearson_manhattan": 0.9091664157178929, |
|
"eval_sts-test_pearson_max": 0.9091664157178929, |
|
"eval_sts-test_spearman_cosine": 0.9054375485671886, |
|
"eval_sts-test_spearman_dot": 0.867029912731804, |
|
"eval_sts-test_spearman_euclidean": 0.9062253050214613, |
|
"eval_sts-test_spearman_manhattan": 0.9062610165280517, |
|
"eval_sts-test_spearman_max": 0.9062610165280517, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.29549902152641877, |
|
"grad_norm": 4.271719932556152, |
|
"learning_rate": 1.1796875e-05, |
|
"loss": 0.1956, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.2974559686888454, |
|
"grad_norm": 3.168663501739502, |
|
"learning_rate": 1.1875e-05, |
|
"loss": 0.0942, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.299412915851272, |
|
"grad_norm": 3.816993236541748, |
|
"learning_rate": 1.1953125000000001e-05, |
|
"loss": 0.1406, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.3013698630136986, |
|
"grad_norm": 5.383023738861084, |
|
"learning_rate": 1.2031250000000001e-05, |
|
"loss": 0.2868, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.30332681017612523, |
|
"grad_norm": 3.123462677001953, |
|
"learning_rate": 1.2109375000000001e-05, |
|
"loss": 0.1102, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.30332681017612523, |
|
"eval_loss": 0.11142811924219131, |
|
"eval_runtime": 107.3019, |
|
"eval_samples_per_second": 28.443, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8780761726881443, |
|
"eval_sts-test_pearson_dot": 0.8581767032057357, |
|
"eval_sts-test_pearson_euclidean": 0.9081534036571242, |
|
"eval_sts-test_pearson_manhattan": 0.9081724370385316, |
|
"eval_sts-test_pearson_max": 0.9081724370385316, |
|
"eval_sts-test_spearman_cosine": 0.9048428490545583, |
|
"eval_sts-test_spearman_dot": 0.8670075818523697, |
|
"eval_sts-test_spearman_euclidean": 0.9052714766361651, |
|
"eval_sts-test_spearman_manhattan": 0.9054467225757737, |
|
"eval_sts-test_spearman_max": 0.9054467225757737, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.30528375733855184, |
|
"grad_norm": 4.1034979820251465, |
|
"learning_rate": 1.2187500000000001e-05, |
|
"loss": 0.1659, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.30724070450097846, |
|
"grad_norm": 3.60249400138855, |
|
"learning_rate": 1.2265625000000002e-05, |
|
"loss": 0.1645, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.30919765166340507, |
|
"grad_norm": 3.771853446960449, |
|
"learning_rate": 1.234375e-05, |
|
"loss": 0.151, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3111545988258317, |
|
"grad_norm": 4.291686058044434, |
|
"learning_rate": 1.2421875e-05, |
|
"loss": 0.158, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.3131115459882583, |
|
"grad_norm": 5.1689453125, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.2323, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3131115459882583, |
|
"eval_loss": 0.11126424372196198, |
|
"eval_runtime": 107.301, |
|
"eval_samples_per_second": 28.443, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8777597983330929, |
|
"eval_sts-test_pearson_dot": 0.8577739588604719, |
|
"eval_sts-test_pearson_euclidean": 0.9075483317216817, |
|
"eval_sts-test_pearson_manhattan": 0.9075908461381532, |
|
"eval_sts-test_pearson_max": 0.9075908461381532, |
|
"eval_sts-test_spearman_cosine": 0.9047649818597372, |
|
"eval_sts-test_spearman_dot": 0.867389712873391, |
|
"eval_sts-test_spearman_euclidean": 0.9048189966322366, |
|
"eval_sts-test_spearman_manhattan": 0.9049692713679889, |
|
"eval_sts-test_spearman_max": 0.9049692713679889, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3150684931506849, |
|
"grad_norm": 3.304703712463379, |
|
"learning_rate": 1.2578125e-05, |
|
"loss": 0.1157, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.31702544031311153, |
|
"grad_norm": 4.064731121063232, |
|
"learning_rate": 1.2656250000000001e-05, |
|
"loss": 0.1507, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.31898238747553814, |
|
"grad_norm": 4.615545749664307, |
|
"learning_rate": 1.2734375000000001e-05, |
|
"loss": 0.1879, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.32093933463796476, |
|
"grad_norm": 3.767533540725708, |
|
"learning_rate": 1.2812500000000001e-05, |
|
"loss": 0.143, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.32289628180039137, |
|
"grad_norm": 4.727967262268066, |
|
"learning_rate": 1.2890625000000002e-05, |
|
"loss": 0.2227, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.32289628180039137, |
|
"eval_loss": 0.11155427247285843, |
|
"eval_runtime": 107.2898, |
|
"eval_samples_per_second": 28.446, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8775899700998113, |
|
"eval_sts-test_pearson_dot": 0.8571711542435376, |
|
"eval_sts-test_pearson_euclidean": 0.907399950708088, |
|
"eval_sts-test_pearson_manhattan": 0.9073879045697356, |
|
"eval_sts-test_pearson_max": 0.907399950708088, |
|
"eval_sts-test_spearman_cosine": 0.9049959431197784, |
|
"eval_sts-test_spearman_dot": 0.8667648957618442, |
|
"eval_sts-test_spearman_euclidean": 0.9048916279294749, |
|
"eval_sts-test_spearman_manhattan": 0.9050786882020909, |
|
"eval_sts-test_spearman_max": 0.9050786882020909, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.324853228962818, |
|
"grad_norm": 4.0150017738342285, |
|
"learning_rate": 1.2968750000000002e-05, |
|
"loss": 0.1624, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.3268101761252446, |
|
"grad_norm": 3.021153450012207, |
|
"learning_rate": 1.3046875e-05, |
|
"loss": 0.1345, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.3287671232876712, |
|
"grad_norm": 3.869710922241211, |
|
"learning_rate": 1.3125e-05, |
|
"loss": 0.1765, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.33072407045009783, |
|
"grad_norm": 3.538076162338257, |
|
"learning_rate": 1.3203125e-05, |
|
"loss": 0.1368, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.33268101761252444, |
|
"grad_norm": 3.378551483154297, |
|
"learning_rate": 1.3281250000000001e-05, |
|
"loss": 0.0962, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.33268101761252444, |
|
"eval_loss": 0.11131894588470459, |
|
"eval_runtime": 107.3532, |
|
"eval_samples_per_second": 28.43, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8782576778514848, |
|
"eval_sts-test_pearson_dot": 0.8576530243239538, |
|
"eval_sts-test_pearson_euclidean": 0.9077401564122008, |
|
"eval_sts-test_pearson_manhattan": 0.907609849534313, |
|
"eval_sts-test_pearson_max": 0.9077401564122008, |
|
"eval_sts-test_spearman_cosine": 0.9055560946586144, |
|
"eval_sts-test_spearman_dot": 0.8666707838591381, |
|
"eval_sts-test_spearman_euclidean": 0.9054064016892602, |
|
"eval_sts-test_spearman_manhattan": 0.9054834186101147, |
|
"eval_sts-test_spearman_max": 0.9055560946586144, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.33463796477495106, |
|
"grad_norm": 4.588249683380127, |
|
"learning_rate": 1.3359375000000001e-05, |
|
"loss": 0.1783, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.33659491193737767, |
|
"grad_norm": 4.370199680328369, |
|
"learning_rate": 1.3437500000000001e-05, |
|
"loss": 0.2019, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3385518590998043, |
|
"grad_norm": 4.000157356262207, |
|
"learning_rate": 1.3515625000000002e-05, |
|
"loss": 0.1761, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.3405088062622309, |
|
"grad_norm": 4.3335862159729, |
|
"learning_rate": 1.3593750000000002e-05, |
|
"loss": 0.1855, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.3424657534246575, |
|
"grad_norm": 4.247244358062744, |
|
"learning_rate": 1.3671875e-05, |
|
"loss": 0.1922, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3424657534246575, |
|
"eval_loss": 0.1105586364865303, |
|
"eval_runtime": 107.3507, |
|
"eval_samples_per_second": 28.43, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8775475016000731, |
|
"eval_sts-test_pearson_dot": 0.8543732981082479, |
|
"eval_sts-test_pearson_euclidean": 0.9076643456809551, |
|
"eval_sts-test_pearson_manhattan": 0.9075054089199206, |
|
"eval_sts-test_pearson_max": 0.9076643456809551, |
|
"eval_sts-test_spearman_cosine": 0.905357578063082, |
|
"eval_sts-test_spearman_dot": 0.8628476388472094, |
|
"eval_sts-test_spearman_euclidean": 0.9054710672619708, |
|
"eval_sts-test_spearman_manhattan": 0.9055309444497123, |
|
"eval_sts-test_spearman_max": 0.9055309444497123, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.34442270058708413, |
|
"grad_norm": 3.881108522415161, |
|
"learning_rate": 1.375e-05, |
|
"loss": 0.1538, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.34637964774951074, |
|
"grad_norm": 3.4271416664123535, |
|
"learning_rate": 1.3828125e-05, |
|
"loss": 0.1049, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.34833659491193736, |
|
"grad_norm": 3.7847940921783447, |
|
"learning_rate": 1.3906250000000001e-05, |
|
"loss": 0.1619, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.350293542074364, |
|
"grad_norm": 2.3725311756134033, |
|
"learning_rate": 1.3984375000000001e-05, |
|
"loss": 0.0731, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.3522504892367906, |
|
"grad_norm": 3.6820032596588135, |
|
"learning_rate": 1.4062500000000001e-05, |
|
"loss": 0.1205, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3522504892367906, |
|
"eval_loss": 0.10974939167499542, |
|
"eval_runtime": 107.353, |
|
"eval_samples_per_second": 28.43, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8782123578217031, |
|
"eval_sts-test_pearson_dot": 0.852106566478191, |
|
"eval_sts-test_pearson_euclidean": 0.9088860377565003, |
|
"eval_sts-test_pearson_manhattan": 0.9087269620613702, |
|
"eval_sts-test_pearson_max": 0.9088860377565003, |
|
"eval_sts-test_spearman_cosine": 0.9058966517578029, |
|
"eval_sts-test_spearman_dot": 0.8595467858069799, |
|
"eval_sts-test_spearman_euclidean": 0.9064047128283795, |
|
"eval_sts-test_spearman_manhattan": 0.9067846510375924, |
|
"eval_sts-test_spearman_max": 0.9067846510375924, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3542074363992172, |
|
"grad_norm": 3.7714688777923584, |
|
"learning_rate": 1.4140625000000002e-05, |
|
"loss": 0.169, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.3561643835616438, |
|
"grad_norm": 3.7113559246063232, |
|
"learning_rate": 1.4218750000000002e-05, |
|
"loss": 0.1688, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.35812133072407043, |
|
"grad_norm": 3.1639597415924072, |
|
"learning_rate": 1.4296875000000002e-05, |
|
"loss": 0.1274, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.36007827788649704, |
|
"grad_norm": 4.144288539886475, |
|
"learning_rate": 1.4375e-05, |
|
"loss": 0.1477, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.36203522504892366, |
|
"grad_norm": 3.4342098236083984, |
|
"learning_rate": 1.4453125e-05, |
|
"loss": 0.1418, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.36203522504892366, |
|
"eval_loss": 0.10942607372999191, |
|
"eval_runtime": 107.2679, |
|
"eval_samples_per_second": 28.452, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8778855142398189, |
|
"eval_sts-test_pearson_dot": 0.8501658695420333, |
|
"eval_sts-test_pearson_euclidean": 0.9088432870055996, |
|
"eval_sts-test_pearson_manhattan": 0.9086435133118579, |
|
"eval_sts-test_pearson_max": 0.9088432870055996, |
|
"eval_sts-test_spearman_cosine": 0.9055185931015683, |
|
"eval_sts-test_spearman_dot": 0.8575025481866207, |
|
"eval_sts-test_spearman_euclidean": 0.9063994321795352, |
|
"eval_sts-test_spearman_manhattan": 0.9064969899293684, |
|
"eval_sts-test_spearman_max": 0.9064969899293684, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3639921722113503, |
|
"grad_norm": 4.744626045227051, |
|
"learning_rate": 1.453125e-05, |
|
"loss": 0.2477, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.3659491193737769, |
|
"grad_norm": 4.062248229980469, |
|
"learning_rate": 1.4609375000000001e-05, |
|
"loss": 0.1713, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.3679060665362035, |
|
"grad_norm": 3.989694833755493, |
|
"learning_rate": 1.4687500000000001e-05, |
|
"loss": 0.1703, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.3698630136986301, |
|
"grad_norm": 3.3543660640716553, |
|
"learning_rate": 1.4765625000000001e-05, |
|
"loss": 0.1176, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.37181996086105673, |
|
"grad_norm": 4.307045936584473, |
|
"learning_rate": 1.4843750000000002e-05, |
|
"loss": 0.1811, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.37181996086105673, |
|
"eval_loss": 0.10837770998477936, |
|
"eval_runtime": 107.3429, |
|
"eval_samples_per_second": 28.432, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8774103555789884, |
|
"eval_sts-test_pearson_dot": 0.84942827650618, |
|
"eval_sts-test_pearson_euclidean": 0.9086430009253119, |
|
"eval_sts-test_pearson_manhattan": 0.9084642534632353, |
|
"eval_sts-test_pearson_max": 0.9086430009253119, |
|
"eval_sts-test_spearman_cosine": 0.9048482639571866, |
|
"eval_sts-test_spearman_dot": 0.8562155914115267, |
|
"eval_sts-test_spearman_euclidean": 0.9060070531196555, |
|
"eval_sts-test_spearman_manhattan": 0.9061608184537963, |
|
"eval_sts-test_spearman_max": 0.9061608184537963, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.37377690802348335, |
|
"grad_norm": 4.140930652618408, |
|
"learning_rate": 1.4921875000000002e-05, |
|
"loss": 0.162, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.37573385518590996, |
|
"grad_norm": 2.7555642127990723, |
|
"learning_rate": 1.5000000000000002e-05, |
|
"loss": 0.1141, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.3776908023483366, |
|
"grad_norm": 4.070343017578125, |
|
"learning_rate": 1.5078125e-05, |
|
"loss": 0.154, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.3796477495107632, |
|
"grad_norm": 4.453440189361572, |
|
"learning_rate": 1.515625e-05, |
|
"loss": 0.2461, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3816046966731898, |
|
"grad_norm": 3.7656772136688232, |
|
"learning_rate": 1.5234375000000001e-05, |
|
"loss": 0.1573, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3816046966731898, |
|
"eval_loss": 0.10762027651071548, |
|
"eval_runtime": 107.299, |
|
"eval_samples_per_second": 28.444, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8779461080888007, |
|
"eval_sts-test_pearson_dot": 0.8521074278329072, |
|
"eval_sts-test_pearson_euclidean": 0.9087045359990432, |
|
"eval_sts-test_pearson_manhattan": 0.9086340705654771, |
|
"eval_sts-test_pearson_max": 0.9087045359990432, |
|
"eval_sts-test_spearman_cosine": 0.9045706718827756, |
|
"eval_sts-test_spearman_dot": 0.8584340456924826, |
|
"eval_sts-test_spearman_euclidean": 0.9055143864829975, |
|
"eval_sts-test_spearman_manhattan": 0.9058283613329196, |
|
"eval_sts-test_spearman_max": 0.9058283613329196, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3835616438356164, |
|
"grad_norm": 3.063400983810425, |
|
"learning_rate": 1.5312500000000003e-05, |
|
"loss": 0.1197, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.38551859099804303, |
|
"grad_norm": 3.893153429031372, |
|
"learning_rate": 1.5390625e-05, |
|
"loss": 0.1395, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.38747553816046965, |
|
"grad_norm": 2.95540714263916, |
|
"learning_rate": 1.546875e-05, |
|
"loss": 0.0847, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.38943248532289626, |
|
"grad_norm": 3.4665300846099854, |
|
"learning_rate": 1.5546875e-05, |
|
"loss": 0.1848, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.3913894324853229, |
|
"grad_norm": 3.6926543712615967, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 0.1377, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3913894324853229, |
|
"eval_loss": 0.10723523795604706, |
|
"eval_runtime": 107.245, |
|
"eval_samples_per_second": 28.458, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.877994665901344, |
|
"eval_sts-test_pearson_dot": 0.854134605280733, |
|
"eval_sts-test_pearson_euclidean": 0.9085191117850383, |
|
"eval_sts-test_pearson_manhattan": 0.9086424100414001, |
|
"eval_sts-test_pearson_max": 0.9086424100414001, |
|
"eval_sts-test_spearman_cosine": 0.904685279863199, |
|
"eval_sts-test_spearman_dot": 0.8598855528557127, |
|
"eval_sts-test_spearman_euclidean": 0.9052407772708506, |
|
"eval_sts-test_spearman_manhattan": 0.9058868959828196, |
|
"eval_sts-test_spearman_max": 0.9058868959828196, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3933463796477495, |
|
"grad_norm": 3.303112268447876, |
|
"learning_rate": 1.5703125e-05, |
|
"loss": 0.1109, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.3953033268101761, |
|
"grad_norm": 3.4490058422088623, |
|
"learning_rate": 1.578125e-05, |
|
"loss": 0.1051, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.3972602739726027, |
|
"grad_norm": 2.6598286628723145, |
|
"learning_rate": 1.5859375e-05, |
|
"loss": 0.0975, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.39921722113502933, |
|
"grad_norm": 3.373512029647827, |
|
"learning_rate": 1.59375e-05, |
|
"loss": 0.127, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.40117416829745595, |
|
"grad_norm": 3.1471354961395264, |
|
"learning_rate": 1.6015625e-05, |
|
"loss": 0.1297, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.40117416829745595, |
|
"eval_loss": 0.10685314983129501, |
|
"eval_runtime": 107.3321, |
|
"eval_samples_per_second": 28.435, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8785914848590666, |
|
"eval_sts-test_pearson_dot": 0.8570818659891223, |
|
"eval_sts-test_pearson_euclidean": 0.9086611488562145, |
|
"eval_sts-test_pearson_manhattan": 0.9087606701935215, |
|
"eval_sts-test_pearson_max": 0.9087606701935215, |
|
"eval_sts-test_spearman_cosine": 0.9048987433800361, |
|
"eval_sts-test_spearman_dot": 0.8616398023022556, |
|
"eval_sts-test_spearman_euclidean": 0.9052247563192726, |
|
"eval_sts-test_spearman_manhattan": 0.9056138237858093, |
|
"eval_sts-test_spearman_max": 0.9056138237858093, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.40313111545988256, |
|
"grad_norm": 2.6924684047698975, |
|
"learning_rate": 1.609375e-05, |
|
"loss": 0.0783, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4050880626223092, |
|
"grad_norm": 2.1100542545318604, |
|
"learning_rate": 1.6171875000000002e-05, |
|
"loss": 0.053, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.4070450097847358, |
|
"grad_norm": 3.7984156608581543, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 0.1916, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.4090019569471624, |
|
"grad_norm": 4.329834461212158, |
|
"learning_rate": 1.6328125000000002e-05, |
|
"loss": 0.178, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.410958904109589, |
|
"grad_norm": 4.427723407745361, |
|
"learning_rate": 1.6406250000000002e-05, |
|
"loss": 0.2343, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.410958904109589, |
|
"eval_loss": 0.10670512914657593, |
|
"eval_runtime": 107.2313, |
|
"eval_samples_per_second": 28.462, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8788965355860006, |
|
"eval_sts-test_pearson_dot": 0.8580075676260999, |
|
"eval_sts-test_pearson_euclidean": 0.908776492246521, |
|
"eval_sts-test_pearson_manhattan": 0.9089340980301853, |
|
"eval_sts-test_pearson_max": 0.9089340980301853, |
|
"eval_sts-test_spearman_cosine": 0.90530862018312, |
|
"eval_sts-test_spearman_dot": 0.8630207814775328, |
|
"eval_sts-test_spearman_euclidean": 0.905449362900196, |
|
"eval_sts-test_spearman_manhattan": 0.9056519071092534, |
|
"eval_sts-test_spearman_max": 0.9056519071092534, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.41291585127201563, |
|
"grad_norm": 3.890899419784546, |
|
"learning_rate": 1.6484375000000003e-05, |
|
"loss": 0.1816, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.41487279843444225, |
|
"grad_norm": 4.071934700012207, |
|
"learning_rate": 1.6562500000000003e-05, |
|
"loss": 0.2522, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.41682974559686886, |
|
"grad_norm": 3.8046796321868896, |
|
"learning_rate": 1.6640625000000003e-05, |
|
"loss": 0.1787, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.4187866927592955, |
|
"grad_norm": 3.357276201248169, |
|
"learning_rate": 1.671875e-05, |
|
"loss": 0.1913, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.4207436399217221, |
|
"grad_norm": 3.8679873943328857, |
|
"learning_rate": 1.6796875e-05, |
|
"loss": 0.175, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4207436399217221, |
|
"eval_loss": 0.10552908480167389, |
|
"eval_runtime": 107.6412, |
|
"eval_samples_per_second": 28.353, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8791676817178924, |
|
"eval_sts-test_pearson_dot": 0.8573342496118925, |
|
"eval_sts-test_pearson_euclidean": 0.909475190469058, |
|
"eval_sts-test_pearson_manhattan": 0.9097533727394405, |
|
"eval_sts-test_pearson_max": 0.9097533727394405, |
|
"eval_sts-test_spearman_cosine": 0.9056468502167161, |
|
"eval_sts-test_spearman_dot": 0.8624976392318674, |
|
"eval_sts-test_spearman_euclidean": 0.9066117769148375, |
|
"eval_sts-test_spearman_manhattan": 0.9069566301351195, |
|
"eval_sts-test_spearman_max": 0.9069566301351195, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4227005870841487, |
|
"grad_norm": 3.436488389968872, |
|
"learning_rate": 1.6875e-05, |
|
"loss": 0.1533, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.4246575342465753, |
|
"grad_norm": 3.891040563583374, |
|
"learning_rate": 1.6953125e-05, |
|
"loss": 0.1819, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.42661448140900193, |
|
"grad_norm": 4.554884910583496, |
|
"learning_rate": 1.703125e-05, |
|
"loss": 0.2541, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 3.4431850910186768, |
|
"learning_rate": 1.7109375e-05, |
|
"loss": 0.1103, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.43052837573385516, |
|
"grad_norm": 3.5396361351013184, |
|
"learning_rate": 1.71875e-05, |
|
"loss": 0.1693, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.43052837573385516, |
|
"eval_loss": 0.10396925359964371, |
|
"eval_runtime": 107.33, |
|
"eval_samples_per_second": 28.436, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8794186180626897, |
|
"eval_sts-test_pearson_dot": 0.8555325369075935, |
|
"eval_sts-test_pearson_euclidean": 0.9099071011406157, |
|
"eval_sts-test_pearson_manhattan": 0.9104095617945829, |
|
"eval_sts-test_pearson_max": 0.9104095617945829, |
|
"eval_sts-test_spearman_cosine": 0.9061536582519738, |
|
"eval_sts-test_spearman_dot": 0.8609769018672648, |
|
"eval_sts-test_spearman_euclidean": 0.9068523149448162, |
|
"eval_sts-test_spearman_manhattan": 0.9075606826613808, |
|
"eval_sts-test_spearman_max": 0.9075606826613808, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4324853228962818, |
|
"grad_norm": 3.4416589736938477, |
|
"learning_rate": 1.7265625e-05, |
|
"loss": 0.1233, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.4344422700587084, |
|
"grad_norm": 2.9554316997528076, |
|
"learning_rate": 1.734375e-05, |
|
"loss": 0.0922, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.436399217221135, |
|
"grad_norm": 3.1570141315460205, |
|
"learning_rate": 1.7421875e-05, |
|
"loss": 0.1243, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.4383561643835616, |
|
"grad_norm": 3.8479344844818115, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 0.1613, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.44031311154598823, |
|
"grad_norm": 3.004990339279175, |
|
"learning_rate": 1.7578125000000002e-05, |
|
"loss": 0.1188, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.44031311154598823, |
|
"eval_loss": 0.1029738187789917, |
|
"eval_runtime": 107.2661, |
|
"eval_samples_per_second": 28.453, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8796765477862789, |
|
"eval_sts-test_pearson_dot": 0.8576485629522204, |
|
"eval_sts-test_pearson_euclidean": 0.9098263075403831, |
|
"eval_sts-test_pearson_manhattan": 0.9104321398639006, |
|
"eval_sts-test_pearson_max": 0.9104321398639006, |
|
"eval_sts-test_spearman_cosine": 0.9064603386462892, |
|
"eval_sts-test_spearman_dot": 0.8635142088856343, |
|
"eval_sts-test_spearman_euclidean": 0.9066103896257344, |
|
"eval_sts-test_spearman_manhattan": 0.9076328216947436, |
|
"eval_sts-test_spearman_max": 0.9076328216947436, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.44227005870841485, |
|
"grad_norm": 3.595667839050293, |
|
"learning_rate": 1.7656250000000002e-05, |
|
"loss": 0.196, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.44422700587084146, |
|
"grad_norm": 3.9599428176879883, |
|
"learning_rate": 1.7734375000000002e-05, |
|
"loss": 0.2254, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.4461839530332681, |
|
"grad_norm": 3.2490875720977783, |
|
"learning_rate": 1.7812500000000003e-05, |
|
"loss": 0.1162, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.4481409001956947, |
|
"grad_norm": 4.811342239379883, |
|
"learning_rate": 1.7890625000000003e-05, |
|
"loss": 0.2579, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.4500978473581213, |
|
"grad_norm": 2.993255138397217, |
|
"learning_rate": 1.7968750000000003e-05, |
|
"loss": 0.1203, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4500978473581213, |
|
"eval_loss": 0.102933868765831, |
|
"eval_runtime": 107.2515, |
|
"eval_samples_per_second": 28.456, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8799758353085696, |
|
"eval_sts-test_pearson_dot": 0.8592997081846103, |
|
"eval_sts-test_pearson_euclidean": 0.9101945793558552, |
|
"eval_sts-test_pearson_manhattan": 0.9106837055219174, |
|
"eval_sts-test_pearson_max": 0.9106837055219174, |
|
"eval_sts-test_spearman_cosine": 0.9071432428951217, |
|
"eval_sts-test_spearman_dot": 0.865314059867535, |
|
"eval_sts-test_spearman_euclidean": 0.9072587906520344, |
|
"eval_sts-test_spearman_manhattan": 0.9077949555147645, |
|
"eval_sts-test_spearman_max": 0.9077949555147645, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4520547945205479, |
|
"grad_norm": 3.654191017150879, |
|
"learning_rate": 1.8046875e-05, |
|
"loss": 0.1654, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.45401174168297453, |
|
"grad_norm": 3.429565668106079, |
|
"learning_rate": 1.8125e-05, |
|
"loss": 0.1808, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.45596868884540115, |
|
"grad_norm": 3.5679566860198975, |
|
"learning_rate": 1.8203125e-05, |
|
"loss": 0.1397, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.45792563600782776, |
|
"grad_norm": 3.9862124919891357, |
|
"learning_rate": 1.828125e-05, |
|
"loss": 0.2177, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.4598825831702544, |
|
"grad_norm": 3.536984443664551, |
|
"learning_rate": 1.8359375e-05, |
|
"loss": 0.162, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.4598825831702544, |
|
"eval_loss": 0.10404225438833237, |
|
"eval_runtime": 107.254, |
|
"eval_samples_per_second": 28.456, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8802088610554777, |
|
"eval_sts-test_pearson_dot": 0.8618209119350905, |
|
"eval_sts-test_pearson_euclidean": 0.9103461475031536, |
|
"eval_sts-test_pearson_manhattan": 0.9106782364335553, |
|
"eval_sts-test_pearson_max": 0.9106782364335553, |
|
"eval_sts-test_spearman_cosine": 0.9077748174471387, |
|
"eval_sts-test_spearman_dot": 0.8686349167216066, |
|
"eval_sts-test_spearman_euclidean": 0.907571109705285, |
|
"eval_sts-test_spearman_manhattan": 0.9080472631264893, |
|
"eval_sts-test_spearman_max": 0.9080472631264893, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.461839530332681, |
|
"grad_norm": 3.2987570762634277, |
|
"learning_rate": 1.84375e-05, |
|
"loss": 0.177, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.4637964774951076, |
|
"grad_norm": 1.792919397354126, |
|
"learning_rate": 1.8515625e-05, |
|
"loss": 0.0556, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.4657534246575342, |
|
"grad_norm": 3.8270483016967773, |
|
"learning_rate": 1.859375e-05, |
|
"loss": 0.2285, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.46771037181996084, |
|
"grad_norm": 3.2458577156066895, |
|
"learning_rate": 1.8671875e-05, |
|
"loss": 0.1657, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.46966731898238745, |
|
"grad_norm": 4.352839469909668, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.2555, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.46966731898238745, |
|
"eval_loss": 0.10528620332479477, |
|
"eval_runtime": 107.3201, |
|
"eval_samples_per_second": 28.438, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8794205585889476, |
|
"eval_sts-test_pearson_dot": 0.8616236846471828, |
|
"eval_sts-test_pearson_euclidean": 0.9100171674371834, |
|
"eval_sts-test_pearson_manhattan": 0.9102120642982687, |
|
"eval_sts-test_pearson_max": 0.9102120642982687, |
|
"eval_sts-test_spearman_cosine": 0.9076779309662261, |
|
"eval_sts-test_spearman_dot": 0.8702396969551023, |
|
"eval_sts-test_spearman_euclidean": 0.9078436896384199, |
|
"eval_sts-test_spearman_manhattan": 0.9080407741935878, |
|
"eval_sts-test_spearman_max": 0.9080407741935878, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.47162426614481406, |
|
"grad_norm": 3.644327163696289, |
|
"learning_rate": 1.8828125000000002e-05, |
|
"loss": 0.1606, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.4735812133072407, |
|
"grad_norm": 3.0316474437713623, |
|
"learning_rate": 1.8906250000000002e-05, |
|
"loss": 0.1257, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.4755381604696673, |
|
"grad_norm": 3.8527326583862305, |
|
"learning_rate": 1.8984375000000002e-05, |
|
"loss": 0.1898, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.4774951076320939, |
|
"grad_norm": 3.91603422164917, |
|
"learning_rate": 1.9062500000000003e-05, |
|
"loss": 0.1621, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.4794520547945205, |
|
"grad_norm": 3.6845171451568604, |
|
"learning_rate": 1.9140625000000003e-05, |
|
"loss": 0.1606, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.4794520547945205, |
|
"eval_loss": 0.10541080683469772, |
|
"eval_runtime": 107.3443, |
|
"eval_samples_per_second": 28.432, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8782579310286232, |
|
"eval_sts-test_pearson_dot": 0.8596847230641689, |
|
"eval_sts-test_pearson_euclidean": 0.909741577402618, |
|
"eval_sts-test_pearson_manhattan": 0.9098438643121189, |
|
"eval_sts-test_pearson_max": 0.9098438643121189, |
|
"eval_sts-test_spearman_cosine": 0.9078928712746891, |
|
"eval_sts-test_spearman_dot": 0.8682800392187727, |
|
"eval_sts-test_spearman_euclidean": 0.9083291960732551, |
|
"eval_sts-test_spearman_manhattan": 0.908423397478484, |
|
"eval_sts-test_spearman_max": 0.908423397478484, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.48140900195694714, |
|
"grad_norm": 3.31758451461792, |
|
"learning_rate": 1.9218750000000003e-05, |
|
"loss": 0.0983, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.48336594911937375, |
|
"grad_norm": 3.8613622188568115, |
|
"learning_rate": 1.9296875000000003e-05, |
|
"loss": 0.2028, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.48532289628180036, |
|
"grad_norm": 2.792924165725708, |
|
"learning_rate": 1.9375e-05, |
|
"loss": 0.0997, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.487279843444227, |
|
"grad_norm": 3.4162261486053467, |
|
"learning_rate": 1.9453125e-05, |
|
"loss": 0.1582, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.4892367906066536, |
|
"grad_norm": 4.499621391296387, |
|
"learning_rate": 1.953125e-05, |
|
"loss": 0.2394, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4892367906066536, |
|
"eval_loss": 0.10517927259206772, |
|
"eval_runtime": 107.2761, |
|
"eval_samples_per_second": 28.45, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8775291091187776, |
|
"eval_sts-test_pearson_dot": 0.8591957018286404, |
|
"eval_sts-test_pearson_euclidean": 0.9092406666480166, |
|
"eval_sts-test_pearson_manhattan": 0.909395200356788, |
|
"eval_sts-test_pearson_max": 0.909395200356788, |
|
"eval_sts-test_spearman_cosine": 0.9073655224104529, |
|
"eval_sts-test_spearman_dot": 0.866218124850164, |
|
"eval_sts-test_spearman_euclidean": 0.9077081380676655, |
|
"eval_sts-test_spearman_manhattan": 0.907968321901395, |
|
"eval_sts-test_spearman_max": 0.907968321901395, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4911937377690802, |
|
"grad_norm": 4.491675853729248, |
|
"learning_rate": 1.9609375e-05, |
|
"loss": 0.2186, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.4931506849315068, |
|
"grad_norm": 2.9051578044891357, |
|
"learning_rate": 1.96875e-05, |
|
"loss": 0.0993, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.49510763209393344, |
|
"grad_norm": 3.53365421295166, |
|
"learning_rate": 1.9765625e-05, |
|
"loss": 0.1805, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.49706457925636005, |
|
"grad_norm": 3.2181098461151123, |
|
"learning_rate": 1.984375e-05, |
|
"loss": 0.1178, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.49902152641878667, |
|
"grad_norm": 4.045453071594238, |
|
"learning_rate": 1.9921875e-05, |
|
"loss": 0.2198, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.49902152641878667, |
|
"eval_loss": 0.10428859293460846, |
|
"eval_runtime": 107.2698, |
|
"eval_samples_per_second": 28.452, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8777129617944619, |
|
"eval_sts-test_pearson_dot": 0.8588391652180615, |
|
"eval_sts-test_pearson_euclidean": 0.9093230964292308, |
|
"eval_sts-test_pearson_manhattan": 0.9095932968076137, |
|
"eval_sts-test_pearson_max": 0.9095932968076137, |
|
"eval_sts-test_spearman_cosine": 0.9069800350448274, |
|
"eval_sts-test_spearman_dot": 0.8639776976998651, |
|
"eval_sts-test_spearman_euclidean": 0.9072912800678044, |
|
"eval_sts-test_spearman_manhattan": 0.9080281095866138, |
|
"eval_sts-test_spearman_max": 0.9080281095866138, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5009784735812133, |
|
"grad_norm": 2.8251521587371826, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1064, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.50293542074364, |
|
"grad_norm": 3.3597464561462402, |
|
"learning_rate": 1.999924308128909e-05, |
|
"loss": 0.1436, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5048923679060665, |
|
"grad_norm": 2.580488920211792, |
|
"learning_rate": 1.9996972439741537e-05, |
|
"loss": 0.0859, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5068493150684932, |
|
"grad_norm": 3.937856674194336, |
|
"learning_rate": 1.9993188419095562e-05, |
|
"loss": 0.2157, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5088062622309197, |
|
"grad_norm": 3.344531774520874, |
|
"learning_rate": 1.9987891592190367e-05, |
|
"loss": 0.1455, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5088062622309197, |
|
"eval_loss": 0.10292962938547134, |
|
"eval_runtime": 107.2285, |
|
"eval_samples_per_second": 28.463, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8767515459977318, |
|
"eval_sts-test_pearson_dot": 0.8564862360521637, |
|
"eval_sts-test_pearson_euclidean": 0.9083760527634203, |
|
"eval_sts-test_pearson_manhattan": 0.9086626400377007, |
|
"eval_sts-test_pearson_max": 0.9086626400377007, |
|
"eval_sts-test_spearman_cosine": 0.9057508521481897, |
|
"eval_sts-test_spearman_dot": 0.8601081456298736, |
|
"eval_sts-test_spearman_euclidean": 0.9063700753520626, |
|
"eval_sts-test_spearman_manhattan": 0.9068438122051519, |
|
"eval_sts-test_spearman_max": 0.9068438122051519, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5107632093933464, |
|
"grad_norm": 3.7637484073638916, |
|
"learning_rate": 1.9981082760879432e-05, |
|
"loss": 0.1974, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.512720156555773, |
|
"grad_norm": 3.182102918624878, |
|
"learning_rate": 1.997276295590912e-05, |
|
"loss": 0.1667, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5146771037181996, |
|
"grad_norm": 3.7908170223236084, |
|
"learning_rate": 1.9962933436762644e-05, |
|
"loss": 0.1512, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5166340508806262, |
|
"grad_norm": 3.4492650032043457, |
|
"learning_rate": 1.9951595691469397e-05, |
|
"loss": 0.1684, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5185909980430529, |
|
"grad_norm": 3.816772222518921, |
|
"learning_rate": 1.9938751436379684e-05, |
|
"loss": 0.2132, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5185909980430529, |
|
"eval_loss": 0.10117975622415543, |
|
"eval_runtime": 107.3212, |
|
"eval_samples_per_second": 28.438, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8770393502752714, |
|
"eval_sts-test_pearson_dot": 0.8567524208989885, |
|
"eval_sts-test_pearson_euclidean": 0.9080912956763092, |
|
"eval_sts-test_pearson_manhattan": 0.908247948105785, |
|
"eval_sts-test_pearson_max": 0.908247948105785, |
|
"eval_sts-test_spearman_cosine": 0.9053279079767796, |
|
"eval_sts-test_spearman_dot": 0.8598375795035011, |
|
"eval_sts-test_spearman_euclidean": 0.9057662913333698, |
|
"eval_sts-test_spearman_manhattan": 0.9061448870047409, |
|
"eval_sts-test_spearman_max": 0.9061448870047409, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5205479452054794, |
|
"grad_norm": 3.5570499897003174, |
|
"learning_rate": 1.992440261590491e-05, |
|
"loss": 0.1645, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.5225048923679061, |
|
"grad_norm": 4.160579681396484, |
|
"learning_rate": 1.9908551402223218e-05, |
|
"loss": 0.203, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.5244618395303327, |
|
"grad_norm": 3.5718774795532227, |
|
"learning_rate": 1.9891200194950644e-05, |
|
"loss": 0.1539, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.5264187866927593, |
|
"grad_norm": 3.604438066482544, |
|
"learning_rate": 1.9872351620777883e-05, |
|
"loss": 0.1445, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.5283757338551859, |
|
"grad_norm": 3.4854915142059326, |
|
"learning_rate": 1.9852008533072627e-05, |
|
"loss": 0.1377, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5283757338551859, |
|
"eval_loss": 0.09936786442995071, |
|
"eval_runtime": 107.3119, |
|
"eval_samples_per_second": 28.44, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8772155384897071, |
|
"eval_sts-test_pearson_dot": 0.8577040756637748, |
|
"eval_sts-test_pearson_euclidean": 0.9081962404777727, |
|
"eval_sts-test_pearson_manhattan": 0.9082660411148933, |
|
"eval_sts-test_pearson_max": 0.9082660411148933, |
|
"eval_sts-test_spearman_cosine": 0.9056296657323417, |
|
"eval_sts-test_spearman_dot": 0.8627456954737598, |
|
"eval_sts-test_spearman_euclidean": 0.9061553587999066, |
|
"eval_sts-test_spearman_manhattan": 0.9063870360801298, |
|
"eval_sts-test_spearman_max": 0.9063870360801298, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5303326810176126, |
|
"grad_norm": 3.662992238998413, |
|
"learning_rate": 1.9830174011447617e-05, |
|
"loss": 0.1719, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.5322896281800391, |
|
"grad_norm": 3.5594613552093506, |
|
"learning_rate": 1.980685136129445e-05, |
|
"loss": 0.1896, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.5342465753424658, |
|
"grad_norm": 3.257335662841797, |
|
"learning_rate": 1.978204411328318e-05, |
|
"loss": 0.1452, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.5362035225048923, |
|
"grad_norm": 3.292863368988037, |
|
"learning_rate": 1.9755756022827847e-05, |
|
"loss": 0.1275, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.538160469667319, |
|
"grad_norm": 4.065443515777588, |
|
"learning_rate": 1.972799106951796e-05, |
|
"loss": 0.1883, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.538160469667319, |
|
"eval_loss": 0.09800439327955246, |
|
"eval_runtime": 107.2596, |
|
"eval_samples_per_second": 28.454, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8773598628753827, |
|
"eval_sts-test_pearson_dot": 0.8578251655808844, |
|
"eval_sts-test_pearson_euclidean": 0.9082603623937704, |
|
"eval_sts-test_pearson_manhattan": 0.9081101963076783, |
|
"eval_sts-test_pearson_max": 0.9082603623937704, |
|
"eval_sts-test_spearman_cosine": 0.9056689328319392, |
|
"eval_sts-test_spearman_dot": 0.8647132741833555, |
|
"eval_sts-test_spearman_euclidean": 0.9063065285608867, |
|
"eval_sts-test_spearman_manhattan": 0.9067770433231558, |
|
"eval_sts-test_spearman_max": 0.9067770433231558, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5401174168297456, |
|
"grad_norm": 3.7186553478240967, |
|
"learning_rate": 1.9698753456516047e-05, |
|
"loss": 0.1462, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.5420743639921722, |
|
"grad_norm": 3.5399951934814453, |
|
"learning_rate": 1.9668047609921382e-05, |
|
"loss": 0.1595, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.5440313111545988, |
|
"grad_norm": 3.6143035888671875, |
|
"learning_rate": 1.963587817809993e-05, |
|
"loss": 0.1693, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.5459882583170255, |
|
"grad_norm": 4.133859634399414, |
|
"learning_rate": 1.9602250030980657e-05, |
|
"loss": 0.1929, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"grad_norm": 3.6929726600646973, |
|
"learning_rate": 1.9567168259318324e-05, |
|
"loss": 0.154, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.547945205479452, |
|
"eval_loss": 0.0969705730676651, |
|
"eval_runtime": 107.333, |
|
"eval_samples_per_second": 28.435, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8772326487842304, |
|
"eval_sts-test_pearson_dot": 0.8584362564160372, |
|
"eval_sts-test_pearson_euclidean": 0.9077579223693962, |
|
"eval_sts-test_pearson_manhattan": 0.9072827835669532, |
|
"eval_sts-test_pearson_max": 0.9077579223693962, |
|
"eval_sts-test_spearman_cosine": 0.9052923754752349, |
|
"eval_sts-test_spearman_dot": 0.866326959917868, |
|
"eval_sts-test_spearman_euclidean": 0.9057464665245734, |
|
"eval_sts-test_spearman_manhattan": 0.9059635996448444, |
|
"eval_sts-test_spearman_max": 0.9059635996448444, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5499021526418787, |
|
"grad_norm": 3.515667200088501, |
|
"learning_rate": 1.953063817392281e-05, |
|
"loss": 0.1468, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.5518590998043053, |
|
"grad_norm": 2.3627371788024902, |
|
"learning_rate": 1.949266530485513e-05, |
|
"loss": 0.0898, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.5538160469667319, |
|
"grad_norm": 3.26710844039917, |
|
"learning_rate": 1.945325540059032e-05, |
|
"loss": 0.1425, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.5557729941291585, |
|
"grad_norm": 3.6672258377075195, |
|
"learning_rate": 1.941241442714716e-05, |
|
"loss": 0.1362, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.5577299412915852, |
|
"grad_norm": 3.306119203567505, |
|
"learning_rate": 1.9370148567185043e-05, |
|
"loss": 0.1025, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5577299412915852, |
|
"eval_loss": 0.09782103449106216, |
|
"eval_runtime": 107.2147, |
|
"eval_samples_per_second": 28.466, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8780373092719852, |
|
"eval_sts-test_pearson_dot": 0.861128415219923, |
|
"eval_sts-test_pearson_euclidean": 0.9076094585437832, |
|
"eval_sts-test_pearson_manhattan": 0.9068707688162918, |
|
"eval_sts-test_pearson_max": 0.9076094585437832, |
|
"eval_sts-test_spearman_cosine": 0.9052606468309083, |
|
"eval_sts-test_spearman_dot": 0.868469739815811, |
|
"eval_sts-test_spearman_euclidean": 0.9051151604801249, |
|
"eval_sts-test_spearman_manhattan": 0.9048908224067698, |
|
"eval_sts-test_spearman_max": 0.9052606468309083, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5596868884540117, |
|
"grad_norm": 3.2606685161590576, |
|
"learning_rate": 1.9326464219068023e-05, |
|
"loss": 0.1578, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.5616438356164384, |
|
"grad_norm": 3.5152740478515625, |
|
"learning_rate": 1.9281367995896187e-05, |
|
"loss": 0.1235, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.5636007827788649, |
|
"grad_norm": 2.8671882152557373, |
|
"learning_rate": 1.9234866724504554e-05, |
|
"loss": 0.1109, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.5655577299412916, |
|
"grad_norm": 2.315185785293579, |
|
"learning_rate": 1.9186967444429613e-05, |
|
"loss": 0.0746, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.5675146771037182, |
|
"grad_norm": 3.4961392879486084, |
|
"learning_rate": 1.913767740684362e-05, |
|
"loss": 0.1471, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5675146771037182, |
|
"eval_loss": 0.09924904257059097, |
|
"eval_runtime": 107.3422, |
|
"eval_samples_per_second": 28.432, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8783681424807572, |
|
"eval_sts-test_pearson_dot": 0.861120631953773, |
|
"eval_sts-test_pearson_euclidean": 0.9077238606316402, |
|
"eval_sts-test_pearson_manhattan": 0.9069786963498391, |
|
"eval_sts-test_pearson_max": 0.9077238606316402, |
|
"eval_sts-test_spearman_cosine": 0.9052591700392825, |
|
"eval_sts-test_spearman_dot": 0.8684268233561366, |
|
"eval_sts-test_spearman_euclidean": 0.9046835793152661, |
|
"eval_sts-test_spearman_manhattan": 0.9045985071673613, |
|
"eval_sts-test_spearman_max": 0.9052591700392825, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5694716242661448, |
|
"grad_norm": 4.221432209014893, |
|
"learning_rate": 1.9087004073456926e-05, |
|
"loss": 0.2631, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 3.4570438861846924, |
|
"learning_rate": 1.9034955115388364e-05, |
|
"loss": 0.11, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.5733855185909981, |
|
"grad_norm": 3.6059136390686035, |
|
"learning_rate": 1.898153841200398e-05, |
|
"loss": 0.1834, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.5753424657534246, |
|
"grad_norm": 3.3278088569641113, |
|
"learning_rate": 1.892676204972423e-05, |
|
"loss": 0.1277, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.5772994129158513, |
|
"grad_norm": 4.314577579498291, |
|
"learning_rate": 1.8870634320799822e-05, |
|
"loss": 0.2104, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5772994129158513, |
|
"eval_loss": 0.09903673827648163, |
|
"eval_runtime": 107.6434, |
|
"eval_samples_per_second": 28.353, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8781909348259072, |
|
"eval_sts-test_pearson_dot": 0.8596231931866185, |
|
"eval_sts-test_pearson_euclidean": 0.9076411156234586, |
|
"eval_sts-test_pearson_manhattan": 0.9069147632233857, |
|
"eval_sts-test_pearson_max": 0.9076411156234586, |
|
"eval_sts-test_spearman_cosine": 0.9042011607174669, |
|
"eval_sts-test_spearman_dot": 0.8660264551976247, |
|
"eval_sts-test_spearman_euclidean": 0.9044265280698341, |
|
"eval_sts-test_spearman_manhattan": 0.9041656729671835, |
|
"eval_sts-test_spearman_max": 0.9044265280698341, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5792563600782779, |
|
"grad_norm": 3.195991039276123, |
|
"learning_rate": 1.8813163722056397e-05, |
|
"loss": 0.1294, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.5812133072407045, |
|
"grad_norm": 3.6352145671844482, |
|
"learning_rate": 1.875435895360826e-05, |
|
"loss": 0.1672, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.5831702544031311, |
|
"grad_norm": 3.7248518466949463, |
|
"learning_rate": 1.8694228917541313e-05, |
|
"loss": 0.2171, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.5851272015655578, |
|
"grad_norm": 3.459801435470581, |
|
"learning_rate": 1.8632782716565438e-05, |
|
"loss": 0.1451, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.5870841487279843, |
|
"grad_norm": 2.6911542415618896, |
|
"learning_rate": 1.857002965263648e-05, |
|
"loss": 0.0871, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5870841487279843, |
|
"eval_loss": 0.09800251573324203, |
|
"eval_runtime": 107.2338, |
|
"eval_samples_per_second": 28.461, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8772925626670083, |
|
"eval_sts-test_pearson_dot": 0.8566016359384749, |
|
"eval_sts-test_pearson_euclidean": 0.9070931796775764, |
|
"eval_sts-test_pearson_manhattan": 0.9064105714529896, |
|
"eval_sts-test_pearson_max": 0.9070931796775764, |
|
"eval_sts-test_spearman_cosine": 0.9032592361677008, |
|
"eval_sts-test_spearman_dot": 0.8623085204012272, |
|
"eval_sts-test_spearman_euclidean": 0.9038942565668446, |
|
"eval_sts-test_spearman_manhattan": 0.9033954590073763, |
|
"eval_sts-test_spearman_max": 0.9038942565668446, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.589041095890411, |
|
"grad_norm": 2.913508653640747, |
|
"learning_rate": 1.850597922554809e-05, |
|
"loss": 0.0897, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.5909980430528375, |
|
"grad_norm": 3.2928783893585205, |
|
"learning_rate": 1.844064113149361e-05, |
|
"loss": 0.1296, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.5929549902152642, |
|
"grad_norm": 3.2551913261413574, |
|
"learning_rate": 1.8374025261598224e-05, |
|
"loss": 0.1206, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.5949119373776908, |
|
"grad_norm": 3.246716022491455, |
|
"learning_rate": 1.8306141700421606e-05, |
|
"loss": 0.1665, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.5968688845401174, |
|
"grad_norm": 3.980085611343384, |
|
"learning_rate": 1.8237000724431283e-05, |
|
"loss": 0.1511, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5968688845401174, |
|
"eval_loss": 0.09785618633031845, |
|
"eval_runtime": 107.3879, |
|
"eval_samples_per_second": 28.42, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8767817266460346, |
|
"eval_sts-test_pearson_dot": 0.8544828510438696, |
|
"eval_sts-test_pearson_euclidean": 0.9070553577944469, |
|
"eval_sts-test_pearson_manhattan": 0.9065146784679962, |
|
"eval_sts-test_pearson_max": 0.9070553577944469, |
|
"eval_sts-test_spearman_cosine": 0.9032290290662617, |
|
"eval_sts-test_spearman_dot": 0.8599922398628699, |
|
"eval_sts-test_spearman_euclidean": 0.9039456310149221, |
|
"eval_sts-test_spearman_manhattan": 0.9035283702537087, |
|
"eval_sts-test_spearman_max": 0.9039456310149221, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.598825831702544, |
|
"grad_norm": 3.576425790786743, |
|
"learning_rate": 1.8166612800446927e-05, |
|
"loss": 0.1566, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6007827788649707, |
|
"grad_norm": 3.3370437622070312, |
|
"learning_rate": 1.809498858405589e-05, |
|
"loss": 0.1339, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6027397260273972, |
|
"grad_norm": 3.3882863521575928, |
|
"learning_rate": 1.802213891800007e-05, |
|
"loss": 0.1474, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6046966731898239, |
|
"grad_norm": 2.9576971530914307, |
|
"learning_rate": 1.7948074830534535e-05, |
|
"loss": 0.1022, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.6066536203522505, |
|
"grad_norm": 3.737396001815796, |
|
"learning_rate": 1.7872807533758007e-05, |
|
"loss": 0.1263, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6066536203522505, |
|
"eval_loss": 0.09827280789613724, |
|
"eval_runtime": 107.5911, |
|
"eval_samples_per_second": 28.367, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8774447336518928, |
|
"eval_sts-test_pearson_dot": 0.8526883126161577, |
|
"eval_sts-test_pearson_euclidean": 0.9083025051320742, |
|
"eval_sts-test_pearson_manhattan": 0.9079128512948802, |
|
"eval_sts-test_pearson_max": 0.9083025051320742, |
|
"eval_sts-test_spearman_cosine": 0.9043404713941784, |
|
"eval_sts-test_spearman_dot": 0.8595169367156317, |
|
"eval_sts-test_spearman_euclidean": 0.9055969973115261, |
|
"eval_sts-test_spearman_manhattan": 0.9051065234866762, |
|
"eval_sts-test_spearman_max": 0.9055969973115261, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6086105675146771, |
|
"grad_norm": 3.6634974479675293, |
|
"learning_rate": 1.7796348421915536e-05, |
|
"loss": 0.1713, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.6105675146771037, |
|
"grad_norm": 4.3175225257873535, |
|
"learning_rate": 1.7718709069673595e-05, |
|
"loss": 0.1628, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.6125244618395304, |
|
"grad_norm": 3.73574161529541, |
|
"learning_rate": 1.763990123036787e-05, |
|
"loss": 0.1585, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.6144814090019569, |
|
"grad_norm": 3.8439183235168457, |
|
"learning_rate": 1.7559936834223982e-05, |
|
"loss": 0.1419, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.6164383561643836, |
|
"grad_norm": 2.908531904220581, |
|
"learning_rate": 1.747882798655147e-05, |
|
"loss": 0.1136, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6164383561643836, |
|
"eval_loss": 0.09831386059522629, |
|
"eval_runtime": 107.369, |
|
"eval_samples_per_second": 28.425, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8766201899554078, |
|
"eval_sts-test_pearson_dot": 0.8489007626542151, |
|
"eval_sts-test_pearson_euclidean": 0.907814904603313, |
|
"eval_sts-test_pearson_manhattan": 0.9075136258935672, |
|
"eval_sts-test_pearson_max": 0.907814904603313, |
|
"eval_sts-test_spearman_cosine": 0.9040450683177336, |
|
"eval_sts-test_spearman_dot": 0.856758468963466, |
|
"eval_sts-test_spearman_euclidean": 0.9053801326988233, |
|
"eval_sts-test_spearman_manhattan": 0.9047017483273913, |
|
"eval_sts-test_spearman_max": 0.9053801326988233, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6183953033268101, |
|
"grad_norm": 4.284037113189697, |
|
"learning_rate": 1.739658696591121e-05, |
|
"loss": 0.255, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.6203522504892368, |
|
"grad_norm": 3.051182270050049, |
|
"learning_rate": 1.7313226222256675e-05, |
|
"loss": 0.1262, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.6223091976516634, |
|
"grad_norm": 3.270893096923828, |
|
"learning_rate": 1.7228758375049186e-05, |
|
"loss": 0.1393, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.62426614481409, |
|
"grad_norm": 3.4940428733825684, |
|
"learning_rate": 1.714319621134755e-05, |
|
"loss": 0.1134, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.6262230919765166, |
|
"grad_norm": 3.899348258972168, |
|
"learning_rate": 1.705655268387229e-05, |
|
"loss": 0.1441, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6262230919765166, |
|
"eval_loss": 0.09844871610403061, |
|
"eval_runtime": 107.3824, |
|
"eval_samples_per_second": 28.422, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8758902555910153, |
|
"eval_sts-test_pearson_dot": 0.8487463478874753, |
|
"eval_sts-test_pearson_euclidean": 0.9067076042499689, |
|
"eval_sts-test_pearson_manhattan": 0.9065947885559749, |
|
"eval_sts-test_pearson_max": 0.9067076042499689, |
|
"eval_sts-test_spearman_cosine": 0.9032847891379552, |
|
"eval_sts-test_spearman_dot": 0.8557776108162892, |
|
"eval_sts-test_spearman_euclidean": 0.9042920057780914, |
|
"eval_sts-test_spearman_manhattan": 0.9038587688165614, |
|
"eval_sts-test_spearman_max": 0.9042920057780914, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6281800391389433, |
|
"grad_norm": 4.422016143798828, |
|
"learning_rate": 1.696884090904484e-05, |
|
"loss": 0.1744, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.6301369863013698, |
|
"grad_norm": 3.950225353240967, |
|
"learning_rate": 1.6880074165001906e-05, |
|
"loss": 0.2124, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.6320939334637965, |
|
"grad_norm": 3.2186155319213867, |
|
"learning_rate": 1.6790265889585377e-05, |
|
"loss": 0.1267, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.6340508806262231, |
|
"grad_norm": 3.156022548675537, |
|
"learning_rate": 1.669942967830807e-05, |
|
"loss": 0.1435, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.6360078277886497, |
|
"grad_norm": 3.511422634124756, |
|
"learning_rate": 1.6607579282295572e-05, |
|
"loss": 0.1705, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6360078277886497, |
|
"eval_loss": 0.09894353151321411, |
|
"eval_runtime": 107.3913, |
|
"eval_samples_per_second": 28.419, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8748236722620941, |
|
"eval_sts-test_pearson_dot": 0.8489660029264176, |
|
"eval_sts-test_pearson_euclidean": 0.9056717167596496, |
|
"eval_sts-test_pearson_manhattan": 0.9057306950198961, |
|
"eval_sts-test_pearson_max": 0.9057306950198961, |
|
"eval_sts-test_spearman_cosine": 0.9023375391880836, |
|
"eval_sts-test_spearman_dot": 0.8556132394331987, |
|
"eval_sts-test_spearman_euclidean": 0.9032693499527753, |
|
"eval_sts-test_spearman_manhattan": 0.9032065639330431, |
|
"eval_sts-test_spearman_max": 0.9032693499527753, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6379647749510763, |
|
"grad_norm": 3.2097976207733154, |
|
"learning_rate": 1.651472860620455e-05, |
|
"loss": 0.1441, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.639921722113503, |
|
"grad_norm": 3.0201833248138428, |
|
"learning_rate": 1.6420891706117818e-05, |
|
"loss": 0.118, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.6418786692759295, |
|
"grad_norm": 3.370908737182617, |
|
"learning_rate": 1.6326082787416465e-05, |
|
"loss": 0.1956, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.6438356164383562, |
|
"grad_norm": 2.768566131591797, |
|
"learning_rate": 1.6230316202629393e-05, |
|
"loss": 0.0803, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.6457925636007827, |
|
"grad_norm": 3.2455928325653076, |
|
"learning_rate": 1.613360644926059e-05, |
|
"loss": 0.1651, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6457925636007827, |
|
"eval_loss": 0.09914453327655792, |
|
"eval_runtime": 107.2995, |
|
"eval_samples_per_second": 28.444, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8743455111936658, |
|
"eval_sts-test_pearson_dot": 0.8479229638933452, |
|
"eval_sts-test_pearson_euclidean": 0.9055198964101038, |
|
"eval_sts-test_pearson_manhattan": 0.9055992524553022, |
|
"eval_sts-test_pearson_max": 0.9055992524553022, |
|
"eval_sts-test_spearman_cosine": 0.9022275405875834, |
|
"eval_sts-test_spearman_dot": 0.8543106197166178, |
|
"eval_sts-test_spearman_euclidean": 0.9029672341871219, |
|
"eval_sts-test_spearman_manhattan": 0.9028108285285591, |
|
"eval_sts-test_spearman_max": 0.9029672341871219, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6477495107632094, |
|
"grad_norm": 3.465236186981201, |
|
"learning_rate": 1.603596816759442e-05, |
|
"loss": 0.1498, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.649706457925636, |
|
"grad_norm": 3.303255558013916, |
|
"learning_rate": 1.5937416138479344e-05, |
|
"loss": 0.1171, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.6516634050880626, |
|
"grad_norm": 3.893554449081421, |
|
"learning_rate": 1.5837965281090334e-05, |
|
"loss": 0.1976, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.6536203522504892, |
|
"grad_norm": 2.688338041305542, |
|
"learning_rate": 1.5737630650670336e-05, |
|
"loss": 0.0926, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.6555772994129159, |
|
"grad_norm": 3.4313673973083496, |
|
"learning_rate": 1.5636427436251182e-05, |
|
"loss": 0.1496, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6555772994129159, |
|
"eval_loss": 0.09911184757947922, |
|
"eval_runtime": 107.4795, |
|
"eval_samples_per_second": 28.396, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8746652537201496, |
|
"eval_sts-test_pearson_dot": 0.8492828619583224, |
|
"eval_sts-test_pearson_euclidean": 0.906047319803132, |
|
"eval_sts-test_pearson_manhattan": 0.9060247174283395, |
|
"eval_sts-test_pearson_max": 0.906047319803132, |
|
"eval_sts-test_spearman_cosine": 0.9026310179602884, |
|
"eval_sts-test_spearman_dot": 0.856069836553175, |
|
"eval_sts-test_spearman_euclidean": 0.9034376594468683, |
|
"eval_sts-test_spearman_manhattan": 0.9036356837785253, |
|
"eval_sts-test_spearman_max": 0.9036356837785253, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6575342465753424, |
|
"grad_norm": 3.2240829467773438, |
|
"learning_rate": 1.5534370958354184e-05, |
|
"loss": 0.1131, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.6594911937377691, |
|
"grad_norm": 3.2019200325012207, |
|
"learning_rate": 1.5431476666670885e-05, |
|
"loss": 0.1352, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.6614481409001957, |
|
"grad_norm": 3.5696215629577637, |
|
"learning_rate": 1.5327760137724213e-05, |
|
"loss": 0.1608, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.6634050880626223, |
|
"grad_norm": 3.2444350719451904, |
|
"learning_rate": 1.5223237072510433e-05, |
|
"loss": 0.1239, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.6653620352250489, |
|
"grad_norm": 3.1613712310791016, |
|
"learning_rate": 1.5117923294122312e-05, |
|
"loss": 0.1227, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6653620352250489, |
|
"eval_loss": 0.09929565340280533, |
|
"eval_runtime": 107.2604, |
|
"eval_samples_per_second": 28.454, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.874603261003018, |
|
"eval_sts-test_pearson_dot": 0.8513553588561518, |
|
"eval_sts-test_pearson_euclidean": 0.9056124511024704, |
|
"eval_sts-test_pearson_manhattan": 0.9053134930024975, |
|
"eval_sts-test_pearson_max": 0.9056124511024704, |
|
"eval_sts-test_spearman_cosine": 0.9019408192558488, |
|
"eval_sts-test_spearman_dot": 0.8587178581922269, |
|
"eval_sts-test_spearman_euclidean": 0.9026150865112329, |
|
"eval_sts-test_spearman_manhattan": 0.9023037967369943, |
|
"eval_sts-test_spearman_max": 0.9026150865112329, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6673189823874756, |
|
"grad_norm": 3.3650641441345215, |
|
"learning_rate": 1.5011834745353725e-05, |
|
"loss": 0.1452, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.6692759295499021, |
|
"grad_norm": 3.7061643600463867, |
|
"learning_rate": 1.4904987486286184e-05, |
|
"loss": 0.1992, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.6712328767123288, |
|
"grad_norm": 3.262500286102295, |
|
"learning_rate": 1.4797397691857614e-05, |
|
"loss": 0.1349, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.6731898238747553, |
|
"grad_norm": 3.4780774116516113, |
|
"learning_rate": 1.468908164941371e-05, |
|
"loss": 0.1702, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.675146771037182, |
|
"grad_norm": 2.908043146133423, |
|
"learning_rate": 1.4580055756242315e-05, |
|
"loss": 0.1033, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.675146771037182, |
|
"eval_loss": 0.09903653711080551, |
|
"eval_runtime": 107.5298, |
|
"eval_samples_per_second": 28.383, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8748590023241125, |
|
"eval_sts-test_pearson_dot": 0.8538817887560792, |
|
"eval_sts-test_pearson_euclidean": 0.905694726781384, |
|
"eval_sts-test_pearson_manhattan": 0.9051916896005284, |
|
"eval_sts-test_pearson_max": 0.905694726781384, |
|
"eval_sts-test_spearman_cosine": 0.9022368936012142, |
|
"eval_sts-test_spearman_dot": 0.86127843586652, |
|
"eval_sts-test_spearman_euclidean": 0.9024703161806319, |
|
"eval_sts-test_spearman_manhattan": 0.9023726241770144, |
|
"eval_sts-test_spearman_max": 0.9024703161806319, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6771037181996086, |
|
"grad_norm": 3.7118523120880127, |
|
"learning_rate": 1.4470336517091139e-05, |
|
"loss": 0.1788, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.6790606653620352, |
|
"grad_norm": 3.106895923614502, |
|
"learning_rate": 1.435994054166919e-05, |
|
"loss": 0.1084, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.6810176125244618, |
|
"grad_norm": 3.782027244567871, |
|
"learning_rate": 1.4248884542132348e-05, |
|
"loss": 0.1325, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.6829745596868885, |
|
"grad_norm": 3.8729352951049805, |
|
"learning_rate": 1.4137185330553416e-05, |
|
"loss": 0.1537, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.684931506849315, |
|
"grad_norm": 3.7617311477661133, |
|
"learning_rate": 1.4024859816377046e-05, |
|
"loss": 0.2099, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.684931506849315, |
|
"eval_loss": 0.09886621683835983, |
|
"eval_runtime": 107.402, |
|
"eval_samples_per_second": 28.417, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8751440351237663, |
|
"eval_sts-test_pearson_dot": 0.8544171062513735, |
|
"eval_sts-test_pearson_euclidean": 0.906084032839116, |
|
"eval_sts-test_pearson_manhattan": 0.9052674845128671, |
|
"eval_sts-test_pearson_max": 0.906084032839116, |
|
"eval_sts-test_spearman_cosine": 0.9021938876390173, |
|
"eval_sts-test_spearman_dot": 0.861093657908235, |
|
"eval_sts-test_spearman_euclidean": 0.902945753581654, |
|
"eval_sts-test_spearman_manhattan": 0.9021002232489249, |
|
"eval_sts-test_spearman_max": 0.902945753581654, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6868884540117417, |
|
"grad_norm": 3.419968366622925, |
|
"learning_rate": 1.3911925003859907e-05, |
|
"loss": 0.1603, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.6888454011741683, |
|
"grad_norm": 3.050192356109619, |
|
"learning_rate": 1.3798397989496549e-05, |
|
"loss": 0.0982, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.6908023483365949, |
|
"grad_norm": 3.8518471717834473, |
|
"learning_rate": 1.3684295959431241e-05, |
|
"loss": 0.1537, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.6927592954990215, |
|
"grad_norm": 3.516019582748413, |
|
"learning_rate": 1.3569636186856286e-05, |
|
"loss": 0.1758, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.6947162426614482, |
|
"grad_norm": 3.678056240081787, |
|
"learning_rate": 1.3454436029397135e-05, |
|
"loss": 0.1521, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6947162426614482, |
|
"eval_loss": 0.09901077300310135, |
|
"eval_runtime": 107.3981, |
|
"eval_samples_per_second": 28.418, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8757904015245911, |
|
"eval_sts-test_pearson_dot": 0.8540145186864471, |
|
"eval_sts-test_pearson_euclidean": 0.9064035478905541, |
|
"eval_sts-test_pearson_manhattan": 0.9052325995566524, |
|
"eval_sts-test_pearson_max": 0.9064035478905541, |
|
"eval_sts-test_spearman_cosine": 0.9019130734737861, |
|
"eval_sts-test_spearman_dot": 0.859719212417121, |
|
"eval_sts-test_spearman_euclidean": 0.9030878388365718, |
|
"eval_sts-test_spearman_manhattan": 0.9015811981193075, |
|
"eval_sts-test_spearman_max": 0.9030878388365718, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.6966731898238747, |
|
"grad_norm": 2.466977119445801, |
|
"learning_rate": 1.3338712926484722e-05, |
|
"loss": 0.089, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.6986301369863014, |
|
"grad_norm": 3.8731167316436768, |
|
"learning_rate": 1.322248439671543e-05, |
|
"loss": 0.1509, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.700587084148728, |
|
"grad_norm": 4.406742572784424, |
|
"learning_rate": 1.3105768035199033e-05, |
|
"loss": 0.1943, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.7025440313111546, |
|
"grad_norm": 3.6811671257019043, |
|
"learning_rate": 1.2988581510895118e-05, |
|
"loss": 0.1582, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.7045009784735812, |
|
"grad_norm": 3.6019861698150635, |
|
"learning_rate": 1.2870942563938265e-05, |
|
"loss": 0.1527, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7045009784735812, |
|
"eval_loss": 0.09933393448591232, |
|
"eval_runtime": 107.4727, |
|
"eval_samples_per_second": 28.398, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8759901613929976, |
|
"eval_sts-test_pearson_dot": 0.8539317550786957, |
|
"eval_sts-test_pearson_euclidean": 0.9058198972317745, |
|
"eval_sts-test_pearson_manhattan": 0.9045671303429671, |
|
"eval_sts-test_pearson_max": 0.9058198972317745, |
|
"eval_sts-test_spearman_cosine": 0.9007536577936579, |
|
"eval_sts-test_spearman_dot": 0.8583788673871872, |
|
"eval_sts-test_spearman_euclidean": 0.9019342408204244, |
|
"eval_sts-test_spearman_manhattan": 0.9006401238435077, |
|
"eval_sts-test_spearman_max": 0.9019342408204244, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7064579256360078, |
|
"grad_norm": 2.7014875411987305, |
|
"learning_rate": 1.2752869002952492e-05, |
|
"loss": 0.0754, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.7084148727984344, |
|
"grad_norm": 3.4292407035827637, |
|
"learning_rate": 1.2634378702355317e-05, |
|
"loss": 0.122, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.7103718199608611, |
|
"grad_norm": 3.9553112983703613, |
|
"learning_rate": 1.2515489599651846e-05, |
|
"loss": 0.1727, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.7123287671232876, |
|
"grad_norm": 2.3133935928344727, |
|
"learning_rate": 1.2396219692719364e-05, |
|
"loss": 0.074, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 4.136401176452637, |
|
"learning_rate": 1.2276587037082707e-05, |
|
"loss": 0.1822, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"eval_loss": 0.09977750480175018, |
|
"eval_runtime": 107.5146, |
|
"eval_samples_per_second": 28.387, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8763860642424627, |
|
"eval_sts-test_pearson_dot": 0.8548955898076025, |
|
"eval_sts-test_pearson_euclidean": 0.9053617182443816, |
|
"eval_sts-test_pearson_manhattan": 0.9041710686717819, |
|
"eval_sts-test_pearson_max": 0.9053617182443816, |
|
"eval_sts-test_spearman_cosine": 0.9004566884230645, |
|
"eval_sts-test_spearman_dot": 0.8588638368068857, |
|
"eval_sts-test_spearman_euclidean": 0.90101585543415, |
|
"eval_sts-test_spearman_manhattan": 0.9002594696141124, |
|
"eval_sts-test_spearman_max": 0.90101585543415, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7162426614481409, |
|
"grad_norm": 3.4787533283233643, |
|
"learning_rate": 1.215660974318097e-05, |
|
"loss": 0.1344, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.7181996086105675, |
|
"grad_norm": 3.3478775024414062, |
|
"learning_rate": 1.2036305973625881e-05, |
|
"loss": 0.1819, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.7201565557729941, |
|
"grad_norm": 3.435234546661377, |
|
"learning_rate": 1.191569394045228e-05, |
|
"loss": 0.1811, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.7221135029354208, |
|
"grad_norm": 3.827272653579712, |
|
"learning_rate": 1.1794791902361095e-05, |
|
"loss": 0.1564, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.7240704500978473, |
|
"grad_norm": 4.088834762573242, |
|
"learning_rate": 1.1673618161955288e-05, |
|
"loss": 0.1522, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7240704500978473, |
|
"eval_loss": 0.09972013533115387, |
|
"eval_runtime": 107.5001, |
|
"eval_samples_per_second": 28.391, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8760486874181325, |
|
"eval_sts-test_pearson_dot": 0.8547648526750222, |
|
"eval_sts-test_pearson_euclidean": 0.9048215352945358, |
|
"eval_sts-test_pearson_manhattan": 0.9037328672460638, |
|
"eval_sts-test_pearson_max": 0.9048215352945358, |
|
"eval_sts-test_spearman_cosine": 0.8997958912973589, |
|
"eval_sts-test_spearman_dot": 0.859370958100973, |
|
"eval_sts-test_spearman_euclidean": 0.9005396125104228, |
|
"eval_sts-test_spearman_manhattan": 0.8996819545858564, |
|
"eval_sts-test_spearman_max": 0.9005396125104228, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.726027397260274, |
|
"grad_norm": 3.117750406265259, |
|
"learning_rate": 1.1552191062969147e-05, |
|
"loss": 0.1379, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.7279843444227005, |
|
"grad_norm": 2.870415449142456, |
|
"learning_rate": 1.1430528987491303e-05, |
|
"loss": 0.082, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.7299412915851272, |
|
"grad_norm": 3.0934267044067383, |
|
"learning_rate": 1.1308650353182036e-05, |
|
"loss": 0.1288, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.7318982387475538, |
|
"grad_norm": 4.175031661987305, |
|
"learning_rate": 1.1186573610485099e-05, |
|
"loss": 0.1809, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.7338551859099804, |
|
"grad_norm": 4.580765724182129, |
|
"learning_rate": 1.1064317239834628e-05, |
|
"loss": 0.2418, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7338551859099804, |
|
"eval_loss": 0.0990942195057869, |
|
"eval_runtime": 107.4134, |
|
"eval_samples_per_second": 28.414, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8759927956331243, |
|
"eval_sts-test_pearson_dot": 0.8545842177056733, |
|
"eval_sts-test_pearson_euclidean": 0.9050231511967292, |
|
"eval_sts-test_pearson_manhattan": 0.9040000709018708, |
|
"eval_sts-test_pearson_max": 0.9050231511967292, |
|
"eval_sts-test_spearman_cosine": 0.9005535301527153, |
|
"eval_sts-test_spearman_dot": 0.8591386543030902, |
|
"eval_sts-test_spearman_euclidean": 0.9009244733583888, |
|
"eval_sts-test_spearman_manhattan": 0.9002575453098726, |
|
"eval_sts-test_spearman_max": 0.9009244733583888, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.735812133072407, |
|
"grad_norm": 2.8520517349243164, |
|
"learning_rate": 1.094189974885752e-05, |
|
"loss": 0.0789, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.7377690802348337, |
|
"grad_norm": 3.5163254737854004, |
|
"learning_rate": 1.081933966957167e-05, |
|
"loss": 0.132, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.7397260273972602, |
|
"grad_norm": 3.160409688949585, |
|
"learning_rate": 1.0696655555580527e-05, |
|
"loss": 0.1425, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.7416829745596869, |
|
"grad_norm": 3.013707160949707, |
|
"learning_rate": 1.0573865979264362e-05, |
|
"loss": 0.1514, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.7436399217221135, |
|
"grad_norm": 2.8930296897888184, |
|
"learning_rate": 1.0450989528968747e-05, |
|
"loss": 0.0997, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7436399217221135, |
|
"eval_loss": 0.09842444956302643, |
|
"eval_runtime": 107.304, |
|
"eval_samples_per_second": 28.443, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8764482210558782, |
|
"eval_sts-test_pearson_dot": 0.8556595611954538, |
|
"eval_sts-test_pearson_euclidean": 0.9055523774056253, |
|
"eval_sts-test_pearson_manhattan": 0.9046224074077234, |
|
"eval_sts-test_pearson_max": 0.9055523774056253, |
|
"eval_sts-test_spearman_cosine": 0.9017774324005088, |
|
"eval_sts-test_spearman_dot": 0.860274840931642, |
|
"eval_sts-test_spearman_euclidean": 0.9016713719110112, |
|
"eval_sts-test_spearman_manhattan": 0.9011951289872838, |
|
"eval_sts-test_spearman_max": 0.9017774324005088, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7455968688845401, |
|
"grad_norm": 4.028687477111816, |
|
"learning_rate": 1.0328044806190547e-05, |
|
"loss": 0.2002, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.7475538160469667, |
|
"grad_norm": 3.8006536960601807, |
|
"learning_rate": 1.0205050422761989e-05, |
|
"loss": 0.1943, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.7495107632093934, |
|
"grad_norm": 2.689953088760376, |
|
"learning_rate": 1.0082024998033092e-05, |
|
"loss": 0.1198, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.7514677103718199, |
|
"grad_norm": 3.1326684951782227, |
|
"learning_rate": 9.95898715605304e-06, |
|
"loss": 0.1171, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.7534246575342466, |
|
"grad_norm": 2.8200089931488037, |
|
"learning_rate": 9.835955522750789e-06, |
|
"loss": 0.0872, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7534246575342466, |
|
"eval_loss": 0.09781364351511002, |
|
"eval_runtime": 107.4026, |
|
"eval_samples_per_second": 28.416, |
|
"eval_steps_per_second": 0.223, |
|
"eval_sts-test_pearson_cosine": 0.8764091455338202, |
|
"eval_sts-test_pearson_dot": 0.8558042892931453, |
|
"eval_sts-test_pearson_euclidean": 0.9058212793054227, |
|
"eval_sts-test_pearson_manhattan": 0.9049414370234095, |
|
"eval_sts-test_pearson_max": 0.9058212793054227, |
|
"eval_sts-test_spearman_cosine": 0.902642877044557, |
|
"eval_sts-test_spearman_dot": 0.861618590204356, |
|
"eval_sts-test_spearman_euclidean": 0.9022327317339048, |
|
"eval_sts-test_spearman_manhattan": 0.9017312490987527, |
|
"eval_sts-test_spearman_max": 0.902642877044557, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7553816046966731, |
|
"grad_norm": 3.0478882789611816, |
|
"learning_rate": 9.712948723115384e-06, |
|
"loss": 0.0937, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.7573385518590998, |
|
"grad_norm": 3.0198819637298584, |
|
"learning_rate": 9.589985378376474e-06, |
|
"loss": 0.0933, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.7592954990215264, |
|
"grad_norm": 3.319575786590576, |
|
"learning_rate": 9.46708410318533e-06, |
|
"loss": 0.1109, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.761252446183953, |
|
"grad_norm": 3.1134960651397705, |
|
"learning_rate": 9.344263502796918e-06, |
|
"loss": 0.0999, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.7632093933463796, |
|
"grad_norm": 3.596510887145996, |
|
"learning_rate": 9.221542170253334e-06, |
|
"loss": 0.1625, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7632093933463796, |
|
"eval_loss": 0.09730728715658188, |
|
"eval_runtime": 107.3398, |
|
"eval_samples_per_second": 28.433, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8764763861944456, |
|
"eval_sts-test_pearson_dot": 0.8561556513692123, |
|
"eval_sts-test_pearson_euclidean": 0.9062056976600675, |
|
"eval_sts-test_pearson_manhattan": 0.9053202975786792, |
|
"eval_sts-test_pearson_max": 0.9062056976600675, |
|
"eval_sts-test_spearman_cosine": 0.9034457146739187, |
|
"eval_sts-test_spearman_dot": 0.8626341305791112, |
|
"eval_sts-test_spearman_euclidean": 0.9030662687285811, |
|
"eval_sts-test_spearman_manhattan": 0.9025959777226189, |
|
"eval_sts-test_spearman_max": 0.9034457146739187, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7651663405088063, |
|
"grad_norm": 2.9087250232696533, |
|
"learning_rate": 9.098938683569155e-06, |
|
"loss": 0.1357, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.7671232876712328, |
|
"grad_norm": 3.40970516204834, |
|
"learning_rate": 8.97647160291899e-06, |
|
"loss": 0.1202, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.7690802348336595, |
|
"grad_norm": 2.962822437286377, |
|
"learning_rate": 8.854159467827808e-06, |
|
"loss": 0.116, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.7710371819960861, |
|
"grad_norm": 3.2842860221862793, |
|
"learning_rate": 8.732020794364327e-06, |
|
"loss": 0.1256, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.7729941291585127, |
|
"grad_norm": 3.8854291439056396, |
|
"learning_rate": 8.610074072338006e-06, |
|
"loss": 0.2402, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7729941291585127, |
|
"eval_loss": 0.09690071642398834, |
|
"eval_runtime": 107.3243, |
|
"eval_samples_per_second": 28.437, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8764916753522476, |
|
"eval_sts-test_pearson_dot": 0.8559063099369891, |
|
"eval_sts-test_pearson_euclidean": 0.906553160229111, |
|
"eval_sts-test_pearson_manhattan": 0.9056446394888255, |
|
"eval_sts-test_pearson_max": 0.906553160229111, |
|
"eval_sts-test_spearman_cosine": 0.9036960084788808, |
|
"eval_sts-test_spearman_dot": 0.8628471465833341, |
|
"eval_sts-test_spearman_euclidean": 0.9032961559583487, |
|
"eval_sts-test_spearman_manhattan": 0.9026765747443849, |
|
"eval_sts-test_spearman_max": 0.9036960084788808, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7749510763209393, |
|
"grad_norm": 4.216285705566406, |
|
"learning_rate": 8.488337762499971e-06, |
|
"loss": 0.2413, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.776908023483366, |
|
"grad_norm": 3.007631540298462, |
|
"learning_rate": 8.366830293748364e-06, |
|
"loss": 0.1144, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.7788649706457925, |
|
"grad_norm": 3.3882946968078613, |
|
"learning_rate": 8.245570060338511e-06, |
|
"loss": 0.1198, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.7808219178082192, |
|
"grad_norm": 3.6439969539642334, |
|
"learning_rate": 8.124575419098321e-06, |
|
"loss": 0.1361, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.7827788649706457, |
|
"grad_norm": 3.3682761192321777, |
|
"learning_rate": 8.003864686649369e-06, |
|
"loss": 0.1496, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7827788649706457, |
|
"eval_loss": 0.09637484699487686, |
|
"eval_runtime": 107.2784, |
|
"eval_samples_per_second": 28.449, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8766720014290486, |
|
"eval_sts-test_pearson_dot": 0.8562700614904787, |
|
"eval_sts-test_pearson_euclidean": 0.9067249528004879, |
|
"eval_sts-test_pearson_manhattan": 0.905876462094101, |
|
"eval_sts-test_pearson_max": 0.9067249528004879, |
|
"eval_sts-test_spearman_cosine": 0.9038214015132995, |
|
"eval_sts-test_spearman_dot": 0.8632292328530939, |
|
"eval_sts-test_spearman_euclidean": 0.9036244959631774, |
|
"eval_sts-test_spearman_manhattan": 0.9027354226531145, |
|
"eval_sts-test_spearman_max": 0.9038214015132995, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7847358121330724, |
|
"grad_norm": 3.5519731044769287, |
|
"learning_rate": 7.883456136634053e-06, |
|
"loss": 0.1606, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.786692759295499, |
|
"grad_norm": 4.0106329917907715, |
|
"learning_rate": 7.763367996949262e-06, |
|
"loss": 0.1739, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.7886497064579256, |
|
"grad_norm": 3.347114086151123, |
|
"learning_rate": 7.64361844698699e-06, |
|
"loss": 0.1121, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.7906066536203522, |
|
"grad_norm": 3.429165840148926, |
|
"learning_rate": 7.524225614882216e-06, |
|
"loss": 0.1176, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.7925636007827789, |
|
"grad_norm": 3.169438362121582, |
|
"learning_rate": 7.4052075747686625e-06, |
|
"loss": 0.1024, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.7925636007827789, |
|
"eval_loss": 0.09552557021379471, |
|
"eval_runtime": 107.3797, |
|
"eval_samples_per_second": 28.423, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.8765829897566032, |
|
"eval_sts-test_pearson_dot": 0.856005218083783, |
|
"eval_sts-test_pearson_euclidean": 0.9066659750196452, |
|
"eval_sts-test_pearson_manhattan": 0.9058351541345429, |
|
"eval_sts-test_pearson_max": 0.9066659750196452, |
|
"eval_sts-test_spearman_cosine": 0.9033677132253135, |
|
"eval_sts-test_spearman_dot": 0.8628391808588065, |
|
"eval_sts-test_spearman_euclidean": 0.9033363425910781, |
|
"eval_sts-test_spearman_manhattan": 0.9027725662000693, |
|
"eval_sts-test_spearman_max": 0.9033677132253135, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.7945205479452054, |
|
"grad_norm": 3.571244239807129, |
|
"learning_rate": 7.286582344042625e-06, |
|
"loss": 0.1256, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.7964774951076321, |
|
"grad_norm": 3.144022226333618, |
|
"learning_rate": 7.168367880635454e-06, |
|
"loss": 0.1424, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.7984344422700587, |
|
"grad_norm": 3.899695634841919, |
|
"learning_rate": 7.050582080294996e-06, |
|
"loss": 0.181, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.8003913894324853, |
|
"grad_norm": 2.7152762413024902, |
|
"learning_rate": 6.933242773876481e-06, |
|
"loss": 0.0829, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.8023483365949119, |
|
"grad_norm": 4.248819351196289, |
|
"learning_rate": 6.816367724643225e-06, |
|
"loss": 0.2329, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8023483365949119, |
|
"eval_loss": 0.09485543519258499, |
|
"eval_runtime": 107.35, |
|
"eval_samples_per_second": 28.43, |
|
"eval_steps_per_second": 0.224, |
|
"eval_sts-test_pearson_cosine": 0.876662263871305, |
|
"eval_sts-test_pearson_dot": 0.8565606973853341, |
|
"eval_sts-test_pearson_euclidean": 0.906503720630583, |
|
"eval_sts-test_pearson_manhattan": 0.9056869432887309, |
|
"eval_sts-test_pearson_max": 0.906503720630583, |
|
"eval_sts-test_spearman_cosine": 0.9031667800616662, |
|
"eval_sts-test_spearman_dot": 0.8635160884386128, |
|
"eval_sts-test_spearman_euclidean": 0.9027914064811151, |
|
"eval_sts-test_spearman_manhattan": 0.9026432798059095, |
|
"eval_sts-test_spearman_max": 0.9031667800616662, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8043052837573386, |
|
"grad_norm": 2.724637508392334, |
|
"learning_rate": 6.699974625577545e-06, |
|
"loss": 0.075, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.8062622309197651, |
|
"grad_norm": 2.825380325317383, |
|
"learning_rate": 6.584081096702343e-06, |
|
"loss": 0.1157, |
|
"step": 412 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1022, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 103, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 320, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|