|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.497656982193065, |
|
"eval_steps": 250, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00937207122774133, |
|
"grad_norm": 0.573442816734314, |
|
"learning_rate": 9.999926781765732e-06, |
|
"loss": 1.312, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01874414245548266, |
|
"grad_norm": 1.0577057600021362, |
|
"learning_rate": 9.999853563531462e-06, |
|
"loss": 1.2611, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.028116213683223992, |
|
"grad_norm": 1.358649492263794, |
|
"learning_rate": 9.999780345297193e-06, |
|
"loss": 1.1822, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03748828491096532, |
|
"grad_norm": 1.7219270467758179, |
|
"learning_rate": 9.999707127062924e-06, |
|
"loss": 1.062, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.046860356138706656, |
|
"grad_norm": 1.7191277742385864, |
|
"learning_rate": 9.999633908828655e-06, |
|
"loss": 0.9325, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.056232427366447985, |
|
"grad_norm": 1.6047089099884033, |
|
"learning_rate": 9.999560690594387e-06, |
|
"loss": 0.7909, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06560449859418932, |
|
"grad_norm": 1.1597000360488892, |
|
"learning_rate": 9.999487472360118e-06, |
|
"loss": 0.6858, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07497656982193064, |
|
"grad_norm": 1.4232110977172852, |
|
"learning_rate": 9.999414254125849e-06, |
|
"loss": 0.6554, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08434864104967198, |
|
"grad_norm": 1.3652020692825317, |
|
"learning_rate": 9.99934103589158e-06, |
|
"loss": 0.5937, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09372071227741331, |
|
"grad_norm": 1.299221396446228, |
|
"learning_rate": 9.99926781765731e-06, |
|
"loss": 0.5778, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10309278350515463, |
|
"grad_norm": 1.367699146270752, |
|
"learning_rate": 9.99919459942304e-06, |
|
"loss": 0.5562, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.11246485473289597, |
|
"grad_norm": 1.2190635204315186, |
|
"learning_rate": 9.999121381188772e-06, |
|
"loss": 0.5259, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1218369259606373, |
|
"grad_norm": 1.1808373928070068, |
|
"learning_rate": 9.999048162954504e-06, |
|
"loss": 0.5158, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.13120899718837864, |
|
"grad_norm": 1.5956122875213623, |
|
"learning_rate": 9.998974944720235e-06, |
|
"loss": 0.4877, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.14058106841611998, |
|
"grad_norm": 1.2425106763839722, |
|
"learning_rate": 9.998901726485964e-06, |
|
"loss": 0.4858, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.14995313964386128, |
|
"grad_norm": 1.284425139427185, |
|
"learning_rate": 9.998828508251696e-06, |
|
"loss": 0.4426, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.15932521087160262, |
|
"grad_norm": 1.4248498678207397, |
|
"learning_rate": 9.998755290017427e-06, |
|
"loss": 0.4644, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.16869728209934395, |
|
"grad_norm": 2.5712969303131104, |
|
"learning_rate": 9.998682071783158e-06, |
|
"loss": 0.4363, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.1780693533270853, |
|
"grad_norm": 1.572169542312622, |
|
"learning_rate": 9.998608853548888e-06, |
|
"loss": 0.4206, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.18744142455482662, |
|
"grad_norm": 1.4508352279663086, |
|
"learning_rate": 9.998535635314621e-06, |
|
"loss": 0.4247, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.19681349578256796, |
|
"grad_norm": 1.2668938636779785, |
|
"learning_rate": 9.99846241708035e-06, |
|
"loss": 0.4302, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.20618556701030927, |
|
"grad_norm": 1.0630348920822144, |
|
"learning_rate": 9.99838919884608e-06, |
|
"loss": 0.3987, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.2155576382380506, |
|
"grad_norm": 1.1395602226257324, |
|
"learning_rate": 9.998315980611813e-06, |
|
"loss": 0.3746, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.22492970946579194, |
|
"grad_norm": 1.6570693254470825, |
|
"learning_rate": 9.998242762377544e-06, |
|
"loss": 0.3954, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.23430178069353327, |
|
"grad_norm": 1.2213038206100464, |
|
"learning_rate": 9.998169544143275e-06, |
|
"loss": 0.3877, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.23430178069353327, |
|
"eval_loss": 0.15415821969509125, |
|
"eval_pearson_cosine": 0.7471039295196533, |
|
"eval_pearson_dot": 0.6414342522621155, |
|
"eval_pearson_euclidean": 0.739482581615448, |
|
"eval_pearson_manhattan": 0.7393465042114258, |
|
"eval_runtime": 29.8457, |
|
"eval_samples_per_second": 50.258, |
|
"eval_spearman_cosine": 0.7499078042299374, |
|
"eval_spearman_dot": 0.6346699933138464, |
|
"eval_spearman_euclidean": 0.7397365400334271, |
|
"eval_spearman_manhattan": 0.7393369553461101, |
|
"eval_steps_per_second": 6.299, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2436738519212746, |
|
"grad_norm": 1.3511942625045776, |
|
"learning_rate": 9.998096325909005e-06, |
|
"loss": 0.3685, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2530459231490159, |
|
"grad_norm": 1.3458188772201538, |
|
"learning_rate": 9.998023107674736e-06, |
|
"loss": 0.367, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2624179943767573, |
|
"grad_norm": 1.424850344657898, |
|
"learning_rate": 9.997949889440467e-06, |
|
"loss": 0.3511, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.2717900656044986, |
|
"grad_norm": 1.4595459699630737, |
|
"learning_rate": 9.997876671206198e-06, |
|
"loss": 0.3389, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.28116213683223995, |
|
"grad_norm": 1.167495608329773, |
|
"learning_rate": 9.997803452971928e-06, |
|
"loss": 0.3335, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29053420805998126, |
|
"grad_norm": 1.1749252080917358, |
|
"learning_rate": 9.997730234737661e-06, |
|
"loss": 0.3339, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.29990627928772257, |
|
"grad_norm": 1.2500739097595215, |
|
"learning_rate": 9.99765701650339e-06, |
|
"loss": 0.3215, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.30927835051546393, |
|
"grad_norm": 1.332942247390747, |
|
"learning_rate": 9.99758379826912e-06, |
|
"loss": 0.3093, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.31865042174320524, |
|
"grad_norm": 1.173511266708374, |
|
"learning_rate": 9.997510580034853e-06, |
|
"loss": 0.3234, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.3280224929709466, |
|
"grad_norm": 1.3587061166763306, |
|
"learning_rate": 9.997437361800584e-06, |
|
"loss": 0.3285, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3373945641986879, |
|
"grad_norm": 1.4196358919143677, |
|
"learning_rate": 9.997364143566315e-06, |
|
"loss": 0.3078, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.3467666354264292, |
|
"grad_norm": 1.1899330615997314, |
|
"learning_rate": 9.997290925332045e-06, |
|
"loss": 0.2952, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3561387066541706, |
|
"grad_norm": 1.3728539943695068, |
|
"learning_rate": 9.997217707097776e-06, |
|
"loss": 0.2912, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3655107778819119, |
|
"grad_norm": 1.6375203132629395, |
|
"learning_rate": 9.997144488863507e-06, |
|
"loss": 0.3153, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.37488284910965325, |
|
"grad_norm": 1.3330031633377075, |
|
"learning_rate": 9.997071270629238e-06, |
|
"loss": 0.2858, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.38425492033739456, |
|
"grad_norm": 1.2047045230865479, |
|
"learning_rate": 9.99699805239497e-06, |
|
"loss": 0.3004, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.3936269915651359, |
|
"grad_norm": 1.280134916305542, |
|
"learning_rate": 9.9969248341607e-06, |
|
"loss": 0.2819, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4029990627928772, |
|
"grad_norm": 1.2952693700790405, |
|
"learning_rate": 9.99685161592643e-06, |
|
"loss": 0.2772, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.41237113402061853, |
|
"grad_norm": 1.1937365531921387, |
|
"learning_rate": 9.996778397692162e-06, |
|
"loss": 0.3024, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.4217432052483599, |
|
"grad_norm": 1.226347804069519, |
|
"learning_rate": 9.996705179457893e-06, |
|
"loss": 0.2844, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.4311152764761012, |
|
"grad_norm": 1.5503312349319458, |
|
"learning_rate": 9.996631961223624e-06, |
|
"loss": 0.2634, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.44048734770384257, |
|
"grad_norm": 1.4498707056045532, |
|
"learning_rate": 9.996558742989355e-06, |
|
"loss": 0.2697, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4498594189315839, |
|
"grad_norm": 1.2823820114135742, |
|
"learning_rate": 9.996485524755087e-06, |
|
"loss": 0.2927, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.4592314901593252, |
|
"grad_norm": 1.1089231967926025, |
|
"learning_rate": 9.996412306520816e-06, |
|
"loss": 0.2669, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.46860356138706655, |
|
"grad_norm": 1.3862818479537964, |
|
"learning_rate": 9.996339088286547e-06, |
|
"loss": 0.2805, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.46860356138706655, |
|
"eval_loss": 0.11416644603013992, |
|
"eval_pearson_cosine": 0.7577512264251709, |
|
"eval_pearson_dot": 0.6366492509841919, |
|
"eval_pearson_euclidean": 0.7618618011474609, |
|
"eval_pearson_manhattan": 0.7619431614875793, |
|
"eval_runtime": 22.679, |
|
"eval_samples_per_second": 66.14, |
|
"eval_spearman_cosine": 0.7643092952449725, |
|
"eval_spearman_dot": 0.6341280960850315, |
|
"eval_spearman_euclidean": 0.7653570734883524, |
|
"eval_spearman_manhattan": 0.7652284643248553, |
|
"eval_steps_per_second": 8.29, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.47797563261480785, |
|
"grad_norm": 1.079265832901001, |
|
"learning_rate": 9.99626587005228e-06, |
|
"loss": 0.2649, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.4873477038425492, |
|
"grad_norm": 1.3966060876846313, |
|
"learning_rate": 9.99619265181801e-06, |
|
"loss": 0.279, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.4967197750702905, |
|
"grad_norm": 1.197001576423645, |
|
"learning_rate": 9.99611943358374e-06, |
|
"loss": 0.263, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5060918462980318, |
|
"grad_norm": 1.414509892463684, |
|
"learning_rate": 9.996046215349472e-06, |
|
"loss": 0.2816, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5154639175257731, |
|
"grad_norm": 1.4723501205444336, |
|
"learning_rate": 9.995972997115202e-06, |
|
"loss": 0.2696, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5248359887535146, |
|
"grad_norm": 1.1838375329971313, |
|
"learning_rate": 9.995899778880933e-06, |
|
"loss": 0.2686, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5342080599812559, |
|
"grad_norm": 1.2640224695205688, |
|
"learning_rate": 9.995826560646664e-06, |
|
"loss": 0.2842, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5435801312089972, |
|
"grad_norm": 1.2584717273712158, |
|
"learning_rate": 9.995753342412395e-06, |
|
"loss": 0.2505, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.5529522024367385, |
|
"grad_norm": 1.3276816606521606, |
|
"learning_rate": 9.995680124178127e-06, |
|
"loss": 0.2764, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.5623242736644799, |
|
"grad_norm": 1.5065838098526, |
|
"learning_rate": 9.995606905943858e-06, |
|
"loss": 0.2778, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5716963448922212, |
|
"grad_norm": 1.1485587358474731, |
|
"learning_rate": 9.995533687709588e-06, |
|
"loss": 0.2533, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.5810684161199625, |
|
"grad_norm": 1.242677927017212, |
|
"learning_rate": 9.99546046947532e-06, |
|
"loss": 0.2549, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.5904404873477038, |
|
"grad_norm": 1.4471759796142578, |
|
"learning_rate": 9.99538725124105e-06, |
|
"loss": 0.2734, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.5998125585754451, |
|
"grad_norm": 1.3379895687103271, |
|
"learning_rate": 9.99531403300678e-06, |
|
"loss": 0.2551, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6091846298031866, |
|
"grad_norm": 1.2373607158660889, |
|
"learning_rate": 9.995240814772511e-06, |
|
"loss": 0.2358, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.6185567010309279, |
|
"grad_norm": 1.2897976636886597, |
|
"learning_rate": 9.995167596538242e-06, |
|
"loss": 0.2572, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6279287722586692, |
|
"grad_norm": 1.3715548515319824, |
|
"learning_rate": 9.995094378303973e-06, |
|
"loss": 0.2554, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6373008434864105, |
|
"grad_norm": 1.3889539241790771, |
|
"learning_rate": 9.995021160069704e-06, |
|
"loss": 0.2502, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.6466729147141518, |
|
"grad_norm": 1.3987656831741333, |
|
"learning_rate": 9.994947941835436e-06, |
|
"loss": 0.2449, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.6560449859418932, |
|
"grad_norm": 1.4677623510360718, |
|
"learning_rate": 9.994874723601167e-06, |
|
"loss": 0.2438, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6654170571696345, |
|
"grad_norm": 1.238258719444275, |
|
"learning_rate": 9.994801505366898e-06, |
|
"loss": 0.2609, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.6747891283973758, |
|
"grad_norm": 1.2697819471359253, |
|
"learning_rate": 9.994728287132628e-06, |
|
"loss": 0.2685, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.6841611996251171, |
|
"grad_norm": 1.1607269048690796, |
|
"learning_rate": 9.99465506889836e-06, |
|
"loss": 0.2342, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.6935332708528584, |
|
"grad_norm": 1.2666348218917847, |
|
"learning_rate": 9.99458185066409e-06, |
|
"loss": 0.2308, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7029053420805998, |
|
"grad_norm": 1.252940058708191, |
|
"learning_rate": 9.99450863242982e-06, |
|
"loss": 0.2331, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7029053420805998, |
|
"eval_loss": 0.09498214721679688, |
|
"eval_pearson_cosine": 0.7673527002334595, |
|
"eval_pearson_dot": 0.6584292054176331, |
|
"eval_pearson_euclidean": 0.7682392001152039, |
|
"eval_pearson_manhattan": 0.7685161232948303, |
|
"eval_runtime": 21.4883, |
|
"eval_samples_per_second": 69.805, |
|
"eval_spearman_cosine": 0.7771628917615258, |
|
"eval_spearman_dot": 0.6570265964452069, |
|
"eval_spearman_euclidean": 0.7740883932373563, |
|
"eval_spearman_manhattan": 0.7747253819422362, |
|
"eval_steps_per_second": 8.749, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7122774133083412, |
|
"grad_norm": 1.204959750175476, |
|
"learning_rate": 9.994435414195553e-06, |
|
"loss": 0.2514, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7216494845360825, |
|
"grad_norm": 2.5355069637298584, |
|
"learning_rate": 9.994362195961284e-06, |
|
"loss": 0.2473, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7310215557638238, |
|
"grad_norm": 1.2129027843475342, |
|
"learning_rate": 9.994288977727013e-06, |
|
"loss": 0.2302, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.7403936269915652, |
|
"grad_norm": 1.109953761100769, |
|
"learning_rate": 9.994215759492745e-06, |
|
"loss": 0.2264, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.7497656982193065, |
|
"grad_norm": 1.443888545036316, |
|
"learning_rate": 9.994142541258476e-06, |
|
"loss": 0.2372, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.7591377694470478, |
|
"grad_norm": 1.3083347082138062, |
|
"learning_rate": 9.994069323024207e-06, |
|
"loss": 0.2417, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.7685098406747891, |
|
"grad_norm": 1.0919073820114136, |
|
"learning_rate": 9.993996104789938e-06, |
|
"loss": 0.2331, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.7778819119025304, |
|
"grad_norm": 1.3770041465759277, |
|
"learning_rate": 9.993922886555668e-06, |
|
"loss": 0.2692, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.7872539831302718, |
|
"grad_norm": 1.2099621295928955, |
|
"learning_rate": 9.993849668321399e-06, |
|
"loss": 0.2279, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.7966260543580131, |
|
"grad_norm": 1.1606112718582153, |
|
"learning_rate": 9.99377645008713e-06, |
|
"loss": 0.2474, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8059981255857545, |
|
"grad_norm": 1.472863793373108, |
|
"learning_rate": 9.993703231852862e-06, |
|
"loss": 0.2298, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8153701968134958, |
|
"grad_norm": 1.2455284595489502, |
|
"learning_rate": 9.993630013618593e-06, |
|
"loss": 0.2371, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8247422680412371, |
|
"grad_norm": 1.3777674436569214, |
|
"learning_rate": 9.993556795384324e-06, |
|
"loss": 0.2434, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.8341143392689785, |
|
"grad_norm": 0.9551514983177185, |
|
"learning_rate": 9.993483577150055e-06, |
|
"loss": 0.2074, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.8434864104967198, |
|
"grad_norm": 1.0588115453720093, |
|
"learning_rate": 9.993410358915785e-06, |
|
"loss": 0.2162, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.8528584817244611, |
|
"grad_norm": 1.3450068235397339, |
|
"learning_rate": 9.993337140681516e-06, |
|
"loss": 0.2272, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.8622305529522024, |
|
"grad_norm": 1.6997965574264526, |
|
"learning_rate": 9.993263922447247e-06, |
|
"loss": 0.2315, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.8716026241799437, |
|
"grad_norm": 1.2186520099639893, |
|
"learning_rate": 9.993190704212978e-06, |
|
"loss": 0.2426, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.8809746954076851, |
|
"grad_norm": 1.0515309572219849, |
|
"learning_rate": 9.99311748597871e-06, |
|
"loss": 0.2328, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.8903467666354264, |
|
"grad_norm": 1.29239821434021, |
|
"learning_rate": 9.993044267744439e-06, |
|
"loss": 0.2263, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.8997188378631678, |
|
"grad_norm": 1.7695139646530151, |
|
"learning_rate": 9.99297104951017e-06, |
|
"loss": 0.2466, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 1.359837293624878, |
|
"learning_rate": 9.992897831275902e-06, |
|
"loss": 0.2215, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.9184629803186504, |
|
"grad_norm": 1.2525417804718018, |
|
"learning_rate": 9.992824613041633e-06, |
|
"loss": 0.2295, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.9278350515463918, |
|
"grad_norm": 1.2337384223937988, |
|
"learning_rate": 9.992751394807364e-06, |
|
"loss": 0.2101, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.9372071227741331, |
|
"grad_norm": 1.1121580600738525, |
|
"learning_rate": 9.992678176573095e-06, |
|
"loss": 0.2455, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9372071227741331, |
|
"eval_loss": 0.09235719591379166, |
|
"eval_pearson_cosine": 0.7676932215690613, |
|
"eval_pearson_dot": 0.6569437980651855, |
|
"eval_pearson_euclidean": 0.7712024450302124, |
|
"eval_pearson_manhattan": 0.7713895440101624, |
|
"eval_runtime": 21.9039, |
|
"eval_samples_per_second": 68.481, |
|
"eval_spearman_cosine": 0.7780572781571132, |
|
"eval_spearman_dot": 0.6557682135268442, |
|
"eval_spearman_euclidean": 0.7775782712174545, |
|
"eval_spearman_manhattan": 0.7778181970888292, |
|
"eval_steps_per_second": 8.583, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.9465791940018744, |
|
"grad_norm": 1.1828556060791016, |
|
"learning_rate": 9.992604958338825e-06, |
|
"loss": 0.2168, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.9559512652296157, |
|
"grad_norm": 1.2189664840698242, |
|
"learning_rate": 9.992531740104556e-06, |
|
"loss": 0.2072, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.9653233364573571, |
|
"grad_norm": 1.6102409362792969, |
|
"learning_rate": 9.992458521870287e-06, |
|
"loss": 0.2228, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.9746954076850984, |
|
"grad_norm": 1.6891916990280151, |
|
"learning_rate": 9.99238530363602e-06, |
|
"loss": 0.2404, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.9840674789128397, |
|
"grad_norm": 1.2274008989334106, |
|
"learning_rate": 9.99231208540175e-06, |
|
"loss": 0.2225, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.993439550140581, |
|
"grad_norm": 1.2388169765472412, |
|
"learning_rate": 9.992238867167479e-06, |
|
"loss": 0.2215, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.0028116213683225, |
|
"grad_norm": 1.2347650527954102, |
|
"learning_rate": 9.992165648933211e-06, |
|
"loss": 0.2239, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.0121836925960637, |
|
"grad_norm": 1.1266793012619019, |
|
"learning_rate": 9.992092430698942e-06, |
|
"loss": 0.1932, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.021555763823805, |
|
"grad_norm": 1.5187146663665771, |
|
"learning_rate": 9.992019212464673e-06, |
|
"loss": 0.205, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.0309278350515463, |
|
"grad_norm": 1.4463717937469482, |
|
"learning_rate": 9.991945994230404e-06, |
|
"loss": 0.1818, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.0402999062792877, |
|
"grad_norm": 1.6186790466308594, |
|
"learning_rate": 9.991872775996136e-06, |
|
"loss": 0.2076, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.0496719775070291, |
|
"grad_norm": 1.3895883560180664, |
|
"learning_rate": 9.991799557761865e-06, |
|
"loss": 0.2096, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.0590440487347703, |
|
"grad_norm": 1.296912670135498, |
|
"learning_rate": 9.991726339527596e-06, |
|
"loss": 0.2046, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.0684161199625117, |
|
"grad_norm": 1.5527839660644531, |
|
"learning_rate": 9.991653121293328e-06, |
|
"loss": 0.1972, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.077788191190253, |
|
"grad_norm": 1.4777096509933472, |
|
"learning_rate": 9.99157990305906e-06, |
|
"loss": 0.2086, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.0871602624179943, |
|
"grad_norm": 1.3155533075332642, |
|
"learning_rate": 9.99150668482479e-06, |
|
"loss": 0.1969, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.0965323336457358, |
|
"grad_norm": 1.5277265310287476, |
|
"learning_rate": 9.99143346659052e-06, |
|
"loss": 0.1923, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.105904404873477, |
|
"grad_norm": 1.3764179944992065, |
|
"learning_rate": 9.991360248356251e-06, |
|
"loss": 0.1916, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.1152764761012184, |
|
"grad_norm": 1.6024688482284546, |
|
"learning_rate": 9.991287030121982e-06, |
|
"loss": 0.185, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.1246485473289598, |
|
"grad_norm": 1.2752821445465088, |
|
"learning_rate": 9.991213811887713e-06, |
|
"loss": 0.1829, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.134020618556701, |
|
"grad_norm": 1.4704368114471436, |
|
"learning_rate": 9.991140593653444e-06, |
|
"loss": 0.2006, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.1433926897844424, |
|
"grad_norm": 1.3614213466644287, |
|
"learning_rate": 9.991067375419176e-06, |
|
"loss": 0.1776, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.1527647610121836, |
|
"grad_norm": 1.2852075099945068, |
|
"learning_rate": 9.990994157184905e-06, |
|
"loss": 0.2116, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.162136832239925, |
|
"grad_norm": 1.1774332523345947, |
|
"learning_rate": 9.990920938950636e-06, |
|
"loss": 0.1909, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.1715089034676662, |
|
"grad_norm": 1.0442605018615723, |
|
"learning_rate": 9.990847720716368e-06, |
|
"loss": 0.1933, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.1715089034676662, |
|
"eval_loss": 0.08017747104167938, |
|
"eval_pearson_cosine": 0.7703680992126465, |
|
"eval_pearson_dot": 0.6808142066001892, |
|
"eval_pearson_euclidean": 0.7676056623458862, |
|
"eval_pearson_manhattan": 0.7677772045135498, |
|
"eval_runtime": 22.1599, |
|
"eval_samples_per_second": 67.69, |
|
"eval_spearman_cosine": 0.7790172740054649, |
|
"eval_spearman_dot": 0.6796557194170769, |
|
"eval_spearman_euclidean": 0.7739566900498013, |
|
"eval_spearman_manhattan": 0.7741509176342483, |
|
"eval_steps_per_second": 8.484, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.1808809746954076, |
|
"grad_norm": 1.3561466932296753, |
|
"learning_rate": 9.990774502482099e-06, |
|
"loss": 0.1921, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.190253045923149, |
|
"grad_norm": 1.2151105403900146, |
|
"learning_rate": 9.99070128424783e-06, |
|
"loss": 0.1865, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.1996251171508903, |
|
"grad_norm": 1.4363489151000977, |
|
"learning_rate": 9.99062806601356e-06, |
|
"loss": 0.2071, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.2089971883786317, |
|
"grad_norm": 1.1078994274139404, |
|
"learning_rate": 9.990554847779291e-06, |
|
"loss": 0.1984, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.218369259606373, |
|
"grad_norm": 1.4608142375946045, |
|
"learning_rate": 9.990481629545022e-06, |
|
"loss": 0.1926, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.2277413308341143, |
|
"grad_norm": 1.5290361642837524, |
|
"learning_rate": 9.990408411310753e-06, |
|
"loss": 0.1935, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.2371134020618557, |
|
"grad_norm": 1.09344482421875, |
|
"learning_rate": 9.990335193076485e-06, |
|
"loss": 0.2026, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.246485473289597, |
|
"grad_norm": 1.5567576885223389, |
|
"learning_rate": 9.990261974842216e-06, |
|
"loss": 0.1968, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.2558575445173383, |
|
"grad_norm": 1.243221402168274, |
|
"learning_rate": 9.990188756607947e-06, |
|
"loss": 0.1859, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.2652296157450795, |
|
"grad_norm": 1.5287493467330933, |
|
"learning_rate": 9.990115538373678e-06, |
|
"loss": 0.2067, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.274601686972821, |
|
"grad_norm": 1.1587677001953125, |
|
"learning_rate": 9.990042320139408e-06, |
|
"loss": 0.1848, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.2839737582005624, |
|
"grad_norm": 1.3521069288253784, |
|
"learning_rate": 9.989969101905139e-06, |
|
"loss": 0.1975, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.2933458294283038, |
|
"grad_norm": 1.1655584573745728, |
|
"learning_rate": 9.98989588367087e-06, |
|
"loss": 0.1963, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.302717900656045, |
|
"grad_norm": 1.1636890172958374, |
|
"learning_rate": 9.989822665436602e-06, |
|
"loss": 0.1768, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.3120899718837864, |
|
"grad_norm": 1.3106030225753784, |
|
"learning_rate": 9.989749447202333e-06, |
|
"loss": 0.1918, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.3214620431115276, |
|
"grad_norm": 1.314274787902832, |
|
"learning_rate": 9.989676228968062e-06, |
|
"loss": 0.1733, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.330834114339269, |
|
"grad_norm": 1.646234393119812, |
|
"learning_rate": 9.989603010733795e-06, |
|
"loss": 0.1797, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.3402061855670104, |
|
"grad_norm": 1.3321646451950073, |
|
"learning_rate": 9.989529792499525e-06, |
|
"loss": 0.1726, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.3495782567947516, |
|
"grad_norm": 1.3959871530532837, |
|
"learning_rate": 9.989456574265256e-06, |
|
"loss": 0.1889, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.358950328022493, |
|
"grad_norm": 1.1790053844451904, |
|
"learning_rate": 9.989383356030987e-06, |
|
"loss": 0.1779, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.3683223992502342, |
|
"grad_norm": 1.7612881660461426, |
|
"learning_rate": 9.989310137796718e-06, |
|
"loss": 0.1834, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.3776944704779757, |
|
"grad_norm": 1.2366232872009277, |
|
"learning_rate": 9.989236919562448e-06, |
|
"loss": 0.1996, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.387066541705717, |
|
"grad_norm": 1.550465703010559, |
|
"learning_rate": 9.989163701328179e-06, |
|
"loss": 0.1991, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.3964386129334583, |
|
"grad_norm": 1.2935107946395874, |
|
"learning_rate": 9.98909048309391e-06, |
|
"loss": 0.1956, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.4058106841611997, |
|
"grad_norm": 0.9709776639938354, |
|
"learning_rate": 9.989017264859642e-06, |
|
"loss": 0.1872, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.4058106841611997, |
|
"eval_loss": 0.07902642339468002, |
|
"eval_pearson_cosine": 0.7684531211853027, |
|
"eval_pearson_dot": 0.6580111980438232, |
|
"eval_pearson_euclidean": 0.768983006477356, |
|
"eval_pearson_manhattan": 0.7692690491676331, |
|
"eval_runtime": 23.5462, |
|
"eval_samples_per_second": 63.704, |
|
"eval_spearman_cosine": 0.7777241764238451, |
|
"eval_spearman_dot": 0.6568945327389543, |
|
"eval_spearman_euclidean": 0.7752386276211667, |
|
"eval_spearman_manhattan": 0.7755204438878311, |
|
"eval_steps_per_second": 7.984, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.415182755388941, |
|
"grad_norm": 1.5001726150512695, |
|
"learning_rate": 9.988944046625373e-06, |
|
"loss": 0.2094, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.4245548266166823, |
|
"grad_norm": 1.1697657108306885, |
|
"learning_rate": 9.988870828391102e-06, |
|
"loss": 0.1862, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.4339268978444237, |
|
"grad_norm": 1.3496723175048828, |
|
"learning_rate": 9.988797610156834e-06, |
|
"loss": 0.1863, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.443298969072165, |
|
"grad_norm": 1.3314088582992554, |
|
"learning_rate": 9.988724391922565e-06, |
|
"loss": 0.1809, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.4526710402999063, |
|
"grad_norm": 1.2966681718826294, |
|
"learning_rate": 9.988651173688296e-06, |
|
"loss": 0.1799, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.4620431115276475, |
|
"grad_norm": 1.141318917274475, |
|
"learning_rate": 9.988577955454027e-06, |
|
"loss": 0.1983, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.471415182755389, |
|
"grad_norm": 1.1170287132263184, |
|
"learning_rate": 9.98850473721976e-06, |
|
"loss": 0.1823, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.4807872539831304, |
|
"grad_norm": 1.4531837701797485, |
|
"learning_rate": 9.988431518985488e-06, |
|
"loss": 0.1693, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.4901593252108716, |
|
"grad_norm": 1.5249556303024292, |
|
"learning_rate": 9.988358300751219e-06, |
|
"loss": 0.2014, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.499531396438613, |
|
"grad_norm": 1.319170594215393, |
|
"learning_rate": 9.988285082516951e-06, |
|
"loss": 0.1841, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.5089034676663542, |
|
"grad_norm": 1.2907928228378296, |
|
"learning_rate": 9.988211864282682e-06, |
|
"loss": 0.1778, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.5182755388940956, |
|
"grad_norm": 1.170284628868103, |
|
"learning_rate": 9.988138646048413e-06, |
|
"loss": 0.1668, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.527647610121837, |
|
"grad_norm": 1.4182498455047607, |
|
"learning_rate": 9.988065427814144e-06, |
|
"loss": 0.1968, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.5370196813495782, |
|
"grad_norm": 1.3137290477752686, |
|
"learning_rate": 9.987992209579874e-06, |
|
"loss": 0.1734, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.5463917525773194, |
|
"grad_norm": 1.458721399307251, |
|
"learning_rate": 9.987918991345605e-06, |
|
"loss": 0.209, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.5557638238050608, |
|
"grad_norm": 1.1368082761764526, |
|
"learning_rate": 9.987845773111336e-06, |
|
"loss": 0.1831, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.5651358950328023, |
|
"grad_norm": 1.0743663311004639, |
|
"learning_rate": 9.987772554877068e-06, |
|
"loss": 0.1883, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.5745079662605437, |
|
"grad_norm": 1.4294681549072266, |
|
"learning_rate": 9.987699336642799e-06, |
|
"loss": 0.1851, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.5838800374882849, |
|
"grad_norm": 1.0537577867507935, |
|
"learning_rate": 9.987626118408528e-06, |
|
"loss": 0.1818, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.5932521087160263, |
|
"grad_norm": 1.3930073976516724, |
|
"learning_rate": 9.98755290017426e-06, |
|
"loss": 0.1876, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.6026241799437675, |
|
"grad_norm": 1.3290959596633911, |
|
"learning_rate": 9.987479681939991e-06, |
|
"loss": 0.1777, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.611996251171509, |
|
"grad_norm": 1.3895900249481201, |
|
"learning_rate": 9.987406463705722e-06, |
|
"loss": 0.1728, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.6213683223992503, |
|
"grad_norm": 1.336679220199585, |
|
"learning_rate": 9.987333245471453e-06, |
|
"loss": 0.202, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.6307403936269915, |
|
"grad_norm": 1.4338617324829102, |
|
"learning_rate": 9.987260027237184e-06, |
|
"loss": 0.1745, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.640112464854733, |
|
"grad_norm": 1.1854125261306763, |
|
"learning_rate": 9.987186809002914e-06, |
|
"loss": 0.1628, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.640112464854733, |
|
"eval_loss": 0.07191870361566544, |
|
"eval_pearson_cosine": 0.7651911973953247, |
|
"eval_pearson_dot": 0.6584045886993408, |
|
"eval_pearson_euclidean": 0.7615811228752136, |
|
"eval_pearson_manhattan": 0.7618914842605591, |
|
"eval_runtime": 22.2177, |
|
"eval_samples_per_second": 67.514, |
|
"eval_spearman_cosine": 0.7733826669765486, |
|
"eval_spearman_dot": 0.6574446699366203, |
|
"eval_spearman_euclidean": 0.7678793093449918, |
|
"eval_spearman_manhattan": 0.7684997409854779, |
|
"eval_steps_per_second": 8.462, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.6494845360824741, |
|
"grad_norm": 1.468126654624939, |
|
"learning_rate": 9.987113590768645e-06, |
|
"loss": 0.1714, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.6588566073102156, |
|
"grad_norm": 1.3639568090438843, |
|
"learning_rate": 9.987040372534378e-06, |
|
"loss": 0.1839, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.668228678537957, |
|
"grad_norm": 1.2494312524795532, |
|
"learning_rate": 9.986967154300108e-06, |
|
"loss": 0.1753, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.6776007497656982, |
|
"grad_norm": 1.2897909879684448, |
|
"learning_rate": 9.986893936065839e-06, |
|
"loss": 0.1704, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.6869728209934396, |
|
"grad_norm": 1.413866400718689, |
|
"learning_rate": 9.98682071783157e-06, |
|
"loss": 0.1868, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.6963448922211808, |
|
"grad_norm": 1.093849778175354, |
|
"learning_rate": 9.9867474995973e-06, |
|
"loss": 0.1889, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.7057169634489222, |
|
"grad_norm": 1.3857814073562622, |
|
"learning_rate": 9.986674281363031e-06, |
|
"loss": 0.1818, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.7150890346766636, |
|
"grad_norm": 1.3772344589233398, |
|
"learning_rate": 9.986601063128762e-06, |
|
"loss": 0.1683, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.7244611059044048, |
|
"grad_norm": 1.3299206495285034, |
|
"learning_rate": 9.986527844894493e-06, |
|
"loss": 0.1865, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 1.7338331771321462, |
|
"grad_norm": 1.3139843940734863, |
|
"learning_rate": 9.986454626660225e-06, |
|
"loss": 0.169, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.7432052483598874, |
|
"grad_norm": 1.3562296628952026, |
|
"learning_rate": 9.986381408425954e-06, |
|
"loss": 0.2012, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 1.7525773195876289, |
|
"grad_norm": 1.2332826852798462, |
|
"learning_rate": 9.986308190191685e-06, |
|
"loss": 0.1877, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 1.7619493908153703, |
|
"grad_norm": 1.083622932434082, |
|
"learning_rate": 9.986234971957418e-06, |
|
"loss": 0.2026, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 1.7713214620431117, |
|
"grad_norm": 1.6391818523406982, |
|
"learning_rate": 9.986161753723148e-06, |
|
"loss": 0.1902, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 1.780693533270853, |
|
"grad_norm": 1.0985593795776367, |
|
"learning_rate": 9.986088535488879e-06, |
|
"loss": 0.1845, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.790065604498594, |
|
"grad_norm": 1.609025001525879, |
|
"learning_rate": 9.98601531725461e-06, |
|
"loss": 0.1939, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.7994376757263355, |
|
"grad_norm": 1.0637205839157104, |
|
"learning_rate": 9.98594209902034e-06, |
|
"loss": 0.1775, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 1.808809746954077, |
|
"grad_norm": 1.159469723701477, |
|
"learning_rate": 9.985868880786071e-06, |
|
"loss": 0.161, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 1.1251918077468872, |
|
"learning_rate": 9.985795662551802e-06, |
|
"loss": 0.1965, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 1.8275538894095595, |
|
"grad_norm": 1.3804899454116821, |
|
"learning_rate": 9.985722444317534e-06, |
|
"loss": 0.1768, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.8369259606373007, |
|
"grad_norm": 1.194275140762329, |
|
"learning_rate": 9.985649226083265e-06, |
|
"loss": 0.1782, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 1.8462980318650422, |
|
"grad_norm": 1.5173845291137695, |
|
"learning_rate": 9.985576007848996e-06, |
|
"loss": 0.193, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 1.8556701030927836, |
|
"grad_norm": 1.7733920812606812, |
|
"learning_rate": 9.985502789614727e-06, |
|
"loss": 0.1804, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 1.865042174320525, |
|
"grad_norm": 1.1430355310440063, |
|
"learning_rate": 9.985429571380457e-06, |
|
"loss": 0.1869, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 1.8744142455482662, |
|
"grad_norm": 1.3633067607879639, |
|
"learning_rate": 9.985356353146188e-06, |
|
"loss": 0.1983, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.8744142455482662, |
|
"eval_loss": 0.07371454685926437, |
|
"eval_pearson_cosine": 0.7772414684295654, |
|
"eval_pearson_dot": 0.660416841506958, |
|
"eval_pearson_euclidean": 0.7648824453353882, |
|
"eval_pearson_manhattan": 0.7654331922531128, |
|
"eval_runtime": 22.1973, |
|
"eval_samples_per_second": 67.576, |
|
"eval_spearman_cosine": 0.7863920785446639, |
|
"eval_spearman_dot": 0.6607574545837009, |
|
"eval_spearman_euclidean": 0.7740511645049805, |
|
"eval_spearman_manhattan": 0.7747616492851076, |
|
"eval_steps_per_second": 8.47, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.8837863167760074, |
|
"grad_norm": 1.116107702255249, |
|
"learning_rate": 9.985283134911919e-06, |
|
"loss": 0.1775, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 1.8931583880037488, |
|
"grad_norm": 1.280927300453186, |
|
"learning_rate": 9.985209916677651e-06, |
|
"loss": 0.1853, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 1.9025304592314902, |
|
"grad_norm": 1.419044852256775, |
|
"learning_rate": 9.98513669844338e-06, |
|
"loss": 0.1767, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 1.9119025304592316, |
|
"grad_norm": 1.4140015840530396, |
|
"learning_rate": 9.985063480209111e-06, |
|
"loss": 0.1968, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 1.9212746016869728, |
|
"grad_norm": 1.23015296459198, |
|
"learning_rate": 9.984990261974844e-06, |
|
"loss": 0.1559, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.930646672914714, |
|
"grad_norm": 1.4209731817245483, |
|
"learning_rate": 9.984917043740574e-06, |
|
"loss": 0.18, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 1.9400187441424555, |
|
"grad_norm": 1.5270899534225464, |
|
"learning_rate": 9.984843825506305e-06, |
|
"loss": 0.1858, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 1.9493908153701969, |
|
"grad_norm": 2.0037920475006104, |
|
"learning_rate": 9.984770607272036e-06, |
|
"loss": 0.1812, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 1.9587628865979383, |
|
"grad_norm": 1.4397103786468506, |
|
"learning_rate": 9.984697389037767e-06, |
|
"loss": 0.1853, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 1.9681349578256795, |
|
"grad_norm": 1.555161476135254, |
|
"learning_rate": 9.984624170803497e-06, |
|
"loss": 0.1758, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.9775070290534207, |
|
"grad_norm": 1.1453354358673096, |
|
"learning_rate": 9.984550952569228e-06, |
|
"loss": 0.1821, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 1.986879100281162, |
|
"grad_norm": 1.3050484657287598, |
|
"learning_rate": 9.984477734334959e-06, |
|
"loss": 0.1828, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 1.9962511715089035, |
|
"grad_norm": 1.1858463287353516, |
|
"learning_rate": 9.984404516100691e-06, |
|
"loss": 0.1801, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.005623242736645, |
|
"grad_norm": 1.2467753887176514, |
|
"learning_rate": 9.984331297866422e-06, |
|
"loss": 0.1651, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.014995313964386, |
|
"grad_norm": 1.9730074405670166, |
|
"learning_rate": 9.984258079632151e-06, |
|
"loss": 0.1654, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.0243673851921273, |
|
"grad_norm": 1.384181261062622, |
|
"learning_rate": 9.984184861397884e-06, |
|
"loss": 0.151, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.0337394564198688, |
|
"grad_norm": 1.2262136936187744, |
|
"learning_rate": 9.984111643163614e-06, |
|
"loss": 0.1338, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.04311152764761, |
|
"grad_norm": 1.3417856693267822, |
|
"learning_rate": 9.984038424929345e-06, |
|
"loss": 0.1445, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.0524835988753516, |
|
"grad_norm": 1.3032526969909668, |
|
"learning_rate": 9.983965206695076e-06, |
|
"loss": 0.1675, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.0618556701030926, |
|
"grad_norm": 1.4586397409439087, |
|
"learning_rate": 9.983891988460808e-06, |
|
"loss": 0.1503, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.071227741330834, |
|
"grad_norm": 1.8017582893371582, |
|
"learning_rate": 9.983818770226537e-06, |
|
"loss": 0.1614, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.0805998125585754, |
|
"grad_norm": 1.1136542558670044, |
|
"learning_rate": 9.983745551992268e-06, |
|
"loss": 0.1385, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.089971883786317, |
|
"grad_norm": 1.48130202293396, |
|
"learning_rate": 9.983672333758e-06, |
|
"loss": 0.1448, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.0993439550140582, |
|
"grad_norm": 1.1847114562988281, |
|
"learning_rate": 9.983599115523731e-06, |
|
"loss": 0.1263, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.108716026241799, |
|
"grad_norm": 1.068515419960022, |
|
"learning_rate": 9.983525897289462e-06, |
|
"loss": 0.1448, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.108716026241799, |
|
"eval_loss": 0.0637284442782402, |
|
"eval_pearson_cosine": 0.766581654548645, |
|
"eval_pearson_dot": 0.652958333492279, |
|
"eval_pearson_euclidean": 0.76385897397995, |
|
"eval_pearson_manhattan": 0.7643536329269409, |
|
"eval_runtime": 24.9836, |
|
"eval_samples_per_second": 60.039, |
|
"eval_spearman_cosine": 0.7736502023043434, |
|
"eval_spearman_dot": 0.6506365364740643, |
|
"eval_spearman_euclidean": 0.7701725336122238, |
|
"eval_spearman_manhattan": 0.7705851416924343, |
|
"eval_steps_per_second": 7.525, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.1180880974695406, |
|
"grad_norm": 1.2607600688934326, |
|
"learning_rate": 9.983452679055193e-06, |
|
"loss": 0.1405, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.127460168697282, |
|
"grad_norm": 1.3096617460250854, |
|
"learning_rate": 9.983379460820924e-06, |
|
"loss": 0.159, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.1368322399250235, |
|
"grad_norm": 1.4220956563949585, |
|
"learning_rate": 9.983306242586654e-06, |
|
"loss": 0.1634, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.146204311152765, |
|
"grad_norm": 1.5565595626831055, |
|
"learning_rate": 9.983233024352385e-06, |
|
"loss": 0.1549, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.155576382380506, |
|
"grad_norm": 1.357906460762024, |
|
"learning_rate": 9.983159806118118e-06, |
|
"loss": 0.1503, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.1649484536082473, |
|
"grad_norm": 1.0181514024734497, |
|
"learning_rate": 9.983086587883848e-06, |
|
"loss": 0.1242, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.1743205248359887, |
|
"grad_norm": 1.2936785221099854, |
|
"learning_rate": 9.983013369649577e-06, |
|
"loss": 0.1516, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.18369259606373, |
|
"grad_norm": 1.353125810623169, |
|
"learning_rate": 9.98294015141531e-06, |
|
"loss": 0.1576, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.1930646672914715, |
|
"grad_norm": 1.5978926420211792, |
|
"learning_rate": 9.98286693318104e-06, |
|
"loss": 0.143, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.2024367385192125, |
|
"grad_norm": 1.643609642982483, |
|
"learning_rate": 9.982793714946771e-06, |
|
"loss": 0.1509, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.211808809746954, |
|
"grad_norm": 1.2868740558624268, |
|
"learning_rate": 9.982720496712502e-06, |
|
"loss": 0.1407, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.2211808809746953, |
|
"grad_norm": 1.662234902381897, |
|
"learning_rate": 9.982647278478233e-06, |
|
"loss": 0.1499, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.2305529522024368, |
|
"grad_norm": 1.7390748262405396, |
|
"learning_rate": 9.982574060243964e-06, |
|
"loss": 0.139, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.239925023430178, |
|
"grad_norm": 1.2645044326782227, |
|
"learning_rate": 9.982500842009694e-06, |
|
"loss": 0.1541, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.2492970946579196, |
|
"grad_norm": 1.5143808126449585, |
|
"learning_rate": 9.982427623775425e-06, |
|
"loss": 0.15, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.2586691658856606, |
|
"grad_norm": 1.516233205795288, |
|
"learning_rate": 9.982354405541158e-06, |
|
"loss": 0.1387, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.268041237113402, |
|
"grad_norm": 1.607926368713379, |
|
"learning_rate": 9.982281187306888e-06, |
|
"loss": 0.1459, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.2774133083411434, |
|
"grad_norm": 1.433325171470642, |
|
"learning_rate": 9.982207969072617e-06, |
|
"loss": 0.145, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.286785379568885, |
|
"grad_norm": 1.4051145315170288, |
|
"learning_rate": 9.98213475083835e-06, |
|
"loss": 0.1433, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.296157450796626, |
|
"grad_norm": 1.5076231956481934, |
|
"learning_rate": 9.98206153260408e-06, |
|
"loss": 0.1514, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.3055295220243672, |
|
"grad_norm": 1.185927152633667, |
|
"learning_rate": 9.981988314369811e-06, |
|
"loss": 0.1315, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.3149015932521086, |
|
"grad_norm": 1.1687299013137817, |
|
"learning_rate": 9.981915096135542e-06, |
|
"loss": 0.1611, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.32427366447985, |
|
"grad_norm": 1.205338716506958, |
|
"learning_rate": 9.981841877901274e-06, |
|
"loss": 0.1587, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.3336457357075915, |
|
"grad_norm": 1.1079684495925903, |
|
"learning_rate": 9.981768659667004e-06, |
|
"loss": 0.142, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.3430178069353325, |
|
"grad_norm": 1.1689645051956177, |
|
"learning_rate": 9.981695441432734e-06, |
|
"loss": 0.1449, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.3430178069353325, |
|
"eval_loss": 0.05785529315471649, |
|
"eval_pearson_cosine": 0.7640599012374878, |
|
"eval_pearson_dot": 0.6659318208694458, |
|
"eval_pearson_euclidean": 0.7584241628646851, |
|
"eval_pearson_manhattan": 0.7589800357818604, |
|
"eval_runtime": 27.3942, |
|
"eval_samples_per_second": 54.756, |
|
"eval_spearman_cosine": 0.7698402659202235, |
|
"eval_spearman_dot": 0.6637382071207051, |
|
"eval_spearman_euclidean": 0.765183939076614, |
|
"eval_spearman_manhattan": 0.7654494135153407, |
|
"eval_steps_per_second": 6.863, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.352389878163074, |
|
"grad_norm": 1.1410503387451172, |
|
"learning_rate": 9.981622223198467e-06, |
|
"loss": 0.1253, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.3617619493908153, |
|
"grad_norm": 1.6562408208847046, |
|
"learning_rate": 9.981549004964197e-06, |
|
"loss": 0.1363, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.3711340206185567, |
|
"grad_norm": 1.3503327369689941, |
|
"learning_rate": 9.981475786729928e-06, |
|
"loss": 0.141, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.380506091846298, |
|
"grad_norm": 1.4653688669204712, |
|
"learning_rate": 9.981402568495659e-06, |
|
"loss": 0.1452, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.3898781630740396, |
|
"grad_norm": 1.4135221242904663, |
|
"learning_rate": 9.98132935026139e-06, |
|
"loss": 0.1387, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.3992502343017805, |
|
"grad_norm": 1.1758474111557007, |
|
"learning_rate": 9.98125613202712e-06, |
|
"loss": 0.1402, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.408622305529522, |
|
"grad_norm": 1.6394227743148804, |
|
"learning_rate": 9.981182913792851e-06, |
|
"loss": 0.1434, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.4179943767572634, |
|
"grad_norm": 1.5223402976989746, |
|
"learning_rate": 9.981109695558584e-06, |
|
"loss": 0.1433, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.427366447985005, |
|
"grad_norm": 1.3722361326217651, |
|
"learning_rate": 9.981036477324314e-06, |
|
"loss": 0.145, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.436738519212746, |
|
"grad_norm": 1.4288251399993896, |
|
"learning_rate": 9.980963259090045e-06, |
|
"loss": 0.1419, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.446110590440487, |
|
"grad_norm": 1.3789891004562378, |
|
"learning_rate": 9.980890040855776e-06, |
|
"loss": 0.1428, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.4554826616682286, |
|
"grad_norm": 1.3833218812942505, |
|
"learning_rate": 9.980816822621507e-06, |
|
"loss": 0.163, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.46485473289597, |
|
"grad_norm": 1.2749391794204712, |
|
"learning_rate": 9.980743604387237e-06, |
|
"loss": 0.1457, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.4742268041237114, |
|
"grad_norm": 1.3677037954330444, |
|
"learning_rate": 9.980670386152968e-06, |
|
"loss": 0.1393, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.483598875351453, |
|
"grad_norm": 1.2386823892593384, |
|
"learning_rate": 9.980597167918699e-06, |
|
"loss": 0.1446, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.492970946579194, |
|
"grad_norm": 1.6553146839141846, |
|
"learning_rate": 9.98052394968443e-06, |
|
"loss": 0.1399, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.5023430178069352, |
|
"grad_norm": 1.2258574962615967, |
|
"learning_rate": 9.98045073145016e-06, |
|
"loss": 0.1557, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.5117150890346767, |
|
"grad_norm": 1.1680238246917725, |
|
"learning_rate": 9.980377513215891e-06, |
|
"loss": 0.14, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.521087160262418, |
|
"grad_norm": 1.3764533996582031, |
|
"learning_rate": 9.980304294981624e-06, |
|
"loss": 0.1429, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.530459231490159, |
|
"grad_norm": 1.1607757806777954, |
|
"learning_rate": 9.980231076747354e-06, |
|
"loss": 0.156, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.539831302717901, |
|
"grad_norm": 1.30258309841156, |
|
"learning_rate": 9.980157858513085e-06, |
|
"loss": 0.1334, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.549203373945642, |
|
"grad_norm": 1.3965803384780884, |
|
"learning_rate": 9.980084640278816e-06, |
|
"loss": 0.1532, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.5585754451733833, |
|
"grad_norm": 1.2492479085922241, |
|
"learning_rate": 9.980011422044547e-06, |
|
"loss": 0.1538, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.5679475164011247, |
|
"grad_norm": 1.5879229307174683, |
|
"learning_rate": 9.979938203810277e-06, |
|
"loss": 0.1393, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.5773195876288657, |
|
"grad_norm": 1.5499955415725708, |
|
"learning_rate": 9.979864985576008e-06, |
|
"loss": 0.1443, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.5773195876288657, |
|
"eval_loss": 0.059572458267211914, |
|
"eval_pearson_cosine": 0.7583234310150146, |
|
"eval_pearson_dot": 0.6585268378257751, |
|
"eval_pearson_euclidean": 0.7594324946403503, |
|
"eval_pearson_manhattan": 0.7599164843559265, |
|
"eval_runtime": 25.1198, |
|
"eval_samples_per_second": 59.714, |
|
"eval_spearman_cosine": 0.7658877891929784, |
|
"eval_spearman_dot": 0.6550703356470525, |
|
"eval_spearman_euclidean": 0.7651954936870381, |
|
"eval_spearman_manhattan": 0.7656066832066194, |
|
"eval_steps_per_second": 7.484, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.5866916588566076, |
|
"grad_norm": 1.1182575225830078, |
|
"learning_rate": 9.97979176734174e-06, |
|
"loss": 0.1449, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.5960637300843485, |
|
"grad_norm": 1.3228731155395508, |
|
"learning_rate": 9.979718549107471e-06, |
|
"loss": 0.1339, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 2.60543580131209, |
|
"grad_norm": 1.3763021230697632, |
|
"learning_rate": 9.9796453308732e-06, |
|
"loss": 0.1379, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 2.6148078725398314, |
|
"grad_norm": 1.6708637475967407, |
|
"learning_rate": 9.979572112638933e-06, |
|
"loss": 0.1491, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 2.624179943767573, |
|
"grad_norm": 1.0826717615127563, |
|
"learning_rate": 9.979498894404664e-06, |
|
"loss": 0.1447, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.633552014995314, |
|
"grad_norm": 1.4416155815124512, |
|
"learning_rate": 9.979425676170394e-06, |
|
"loss": 0.1398, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 2.642924086223055, |
|
"grad_norm": 1.3966304063796997, |
|
"learning_rate": 9.979352457936125e-06, |
|
"loss": 0.1332, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 2.6522961574507966, |
|
"grad_norm": 1.5255811214447021, |
|
"learning_rate": 9.979279239701856e-06, |
|
"loss": 0.1423, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 2.661668228678538, |
|
"grad_norm": 1.3866652250289917, |
|
"learning_rate": 9.979206021467587e-06, |
|
"loss": 0.1554, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 2.6710402999062794, |
|
"grad_norm": 1.3477802276611328, |
|
"learning_rate": 9.979132803233317e-06, |
|
"loss": 0.1547, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.680412371134021, |
|
"grad_norm": 1.540963053703308, |
|
"learning_rate": 9.97905958499905e-06, |
|
"loss": 0.1229, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 2.689784442361762, |
|
"grad_norm": 1.697350025177002, |
|
"learning_rate": 9.97898636676478e-06, |
|
"loss": 0.153, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 2.6991565135895033, |
|
"grad_norm": 1.6020257472991943, |
|
"learning_rate": 9.978913148530511e-06, |
|
"loss": 0.1334, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 2.7085285848172447, |
|
"grad_norm": 1.7637958526611328, |
|
"learning_rate": 9.978839930296242e-06, |
|
"loss": 0.1513, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 2.717900656044986, |
|
"grad_norm": 1.2917182445526123, |
|
"learning_rate": 9.978766712061973e-06, |
|
"loss": 0.1296, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 1.42876136302948, |
|
"learning_rate": 9.978693493827704e-06, |
|
"loss": 0.1276, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 2.7366447985004685, |
|
"grad_norm": 1.340184211730957, |
|
"learning_rate": 9.978620275593434e-06, |
|
"loss": 0.164, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 2.74601686972821, |
|
"grad_norm": 1.1638396978378296, |
|
"learning_rate": 9.978547057359165e-06, |
|
"loss": 0.1372, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 2.7553889409559513, |
|
"grad_norm": 1.5060447454452515, |
|
"learning_rate": 9.978473839124897e-06, |
|
"loss": 0.1489, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 2.7647610121836927, |
|
"grad_norm": 1.3632638454437256, |
|
"learning_rate": 9.978400620890627e-06, |
|
"loss": 0.1242, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.774133083411434, |
|
"grad_norm": 1.6402980089187622, |
|
"learning_rate": 9.978327402656359e-06, |
|
"loss": 0.1395, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 2.783505154639175, |
|
"grad_norm": 1.8350452184677124, |
|
"learning_rate": 9.97825418442209e-06, |
|
"loss": 0.1501, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 2.7928772258669166, |
|
"grad_norm": 1.6517874002456665, |
|
"learning_rate": 9.97818096618782e-06, |
|
"loss": 0.1596, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 2.802249297094658, |
|
"grad_norm": 1.7441259622573853, |
|
"learning_rate": 9.978107747953551e-06, |
|
"loss": 0.1344, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 2.8116213683223994, |
|
"grad_norm": 1.4474517107009888, |
|
"learning_rate": 9.978034529719282e-06, |
|
"loss": 0.1363, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.8116213683223994, |
|
"eval_loss": 0.05750729516148567, |
|
"eval_pearson_cosine": 0.767126202583313, |
|
"eval_pearson_dot": 0.676889181137085, |
|
"eval_pearson_euclidean": 0.756407618522644, |
|
"eval_pearson_manhattan": 0.7570176124572754, |
|
"eval_runtime": 25.3699, |
|
"eval_samples_per_second": 59.125, |
|
"eval_spearman_cosine": 0.7727339030438767, |
|
"eval_spearman_dot": 0.6755843192398268, |
|
"eval_spearman_euclidean": 0.7624238185076594, |
|
"eval_spearman_manhattan": 0.7629469399526556, |
|
"eval_steps_per_second": 7.41, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.820993439550141, |
|
"grad_norm": 1.4202260971069336, |
|
"learning_rate": 9.977961311485013e-06, |
|
"loss": 0.1456, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 2.830365510777882, |
|
"grad_norm": 1.3678419589996338, |
|
"learning_rate": 9.977888093250743e-06, |
|
"loss": 0.1445, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 2.839737582005623, |
|
"grad_norm": 1.168271541595459, |
|
"learning_rate": 9.977814875016474e-06, |
|
"loss": 0.1428, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 2.8491096532333646, |
|
"grad_norm": 1.5929275751113892, |
|
"learning_rate": 9.977741656782207e-06, |
|
"loss": 0.1593, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 2.858481724461106, |
|
"grad_norm": 1.265101432800293, |
|
"learning_rate": 9.977668438547937e-06, |
|
"loss": 0.1519, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 2.8678537956888475, |
|
"grad_norm": 1.1187818050384521, |
|
"learning_rate": 9.977595220313666e-06, |
|
"loss": 0.1454, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 2.8772258669165884, |
|
"grad_norm": 1.1976639032363892, |
|
"learning_rate": 9.977522002079399e-06, |
|
"loss": 0.1321, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 2.88659793814433, |
|
"grad_norm": 1.7162209749221802, |
|
"learning_rate": 9.97744878384513e-06, |
|
"loss": 0.147, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 2.8959700093720713, |
|
"grad_norm": 1.3301661014556885, |
|
"learning_rate": 9.97737556561086e-06, |
|
"loss": 0.1341, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 2.9053420805998127, |
|
"grad_norm": 1.279984951019287, |
|
"learning_rate": 9.977302347376591e-06, |
|
"loss": 0.1342, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.914714151827554, |
|
"grad_norm": 1.6548879146575928, |
|
"learning_rate": 9.977229129142324e-06, |
|
"loss": 0.1429, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 2.924086223055295, |
|
"grad_norm": 0.9662721753120422, |
|
"learning_rate": 9.977155910908053e-06, |
|
"loss": 0.1524, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 2.9334582942830365, |
|
"grad_norm": 1.5336380004882812, |
|
"learning_rate": 9.977082692673783e-06, |
|
"loss": 0.1445, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 2.942830365510778, |
|
"grad_norm": 1.4380927085876465, |
|
"learning_rate": 9.977009474439516e-06, |
|
"loss": 0.1371, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 2.9522024367385193, |
|
"grad_norm": 1.551700472831726, |
|
"learning_rate": 9.976936256205247e-06, |
|
"loss": 0.135, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 2.9615745079662608, |
|
"grad_norm": 1.32683265209198, |
|
"learning_rate": 9.976863037970977e-06, |
|
"loss": 0.1444, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 2.9709465791940017, |
|
"grad_norm": 1.3574503660202026, |
|
"learning_rate": 9.976789819736708e-06, |
|
"loss": 0.1391, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 2.980318650421743, |
|
"grad_norm": 1.506625771522522, |
|
"learning_rate": 9.976716601502439e-06, |
|
"loss": 0.1552, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 2.9896907216494846, |
|
"grad_norm": 1.3970105648040771, |
|
"learning_rate": 9.97664338326817e-06, |
|
"loss": 0.147, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 2.999062792877226, |
|
"grad_norm": 1.4303011894226074, |
|
"learning_rate": 9.9765701650339e-06, |
|
"loss": 0.1559, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.0084348641049674, |
|
"grad_norm": 1.377488613128662, |
|
"learning_rate": 9.976496946799633e-06, |
|
"loss": 0.1187, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 3.0178069353327084, |
|
"grad_norm": 1.1664360761642456, |
|
"learning_rate": 9.976423728565364e-06, |
|
"loss": 0.1101, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 3.02717900656045, |
|
"grad_norm": 0.9129014015197754, |
|
"learning_rate": 9.976350510331093e-06, |
|
"loss": 0.111, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 3.036551077788191, |
|
"grad_norm": 1.2628843784332275, |
|
"learning_rate": 9.976277292096825e-06, |
|
"loss": 0.1141, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 3.0459231490159326, |
|
"grad_norm": 1.1534360647201538, |
|
"learning_rate": 9.976204073862556e-06, |
|
"loss": 0.1227, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 3.0459231490159326, |
|
"eval_loss": 0.051736850291490555, |
|
"eval_pearson_cosine": 0.763727605342865, |
|
"eval_pearson_dot": 0.673626720905304, |
|
"eval_pearson_euclidean": 0.756030797958374, |
|
"eval_pearson_manhattan": 0.7567305564880371, |
|
"eval_runtime": 21.997, |
|
"eval_samples_per_second": 68.191, |
|
"eval_spearman_cosine": 0.7669834916269708, |
|
"eval_spearman_dot": 0.6714383880600381, |
|
"eval_spearman_euclidean": 0.7611960037220876, |
|
"eval_spearman_manhattan": 0.7615680957541558, |
|
"eval_steps_per_second": 8.547, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 3.055295220243674, |
|
"grad_norm": 1.4779927730560303, |
|
"learning_rate": 9.976130855628287e-06, |
|
"loss": 0.1186, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 3.064667291471415, |
|
"grad_norm": 1.2425293922424316, |
|
"learning_rate": 9.976057637394017e-06, |
|
"loss": 0.1213, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 3.0740393626991565, |
|
"grad_norm": 1.6161679029464722, |
|
"learning_rate": 9.975984419159748e-06, |
|
"loss": 0.1127, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 3.083411433926898, |
|
"grad_norm": 1.199263334274292, |
|
"learning_rate": 9.975911200925479e-06, |
|
"loss": 0.0971, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 3.0927835051546393, |
|
"grad_norm": 1.5749520063400269, |
|
"learning_rate": 9.97583798269121e-06, |
|
"loss": 0.1162, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.1021555763823807, |
|
"grad_norm": 1.558112382888794, |
|
"learning_rate": 9.97576476445694e-06, |
|
"loss": 0.125, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 3.1115276476101217, |
|
"grad_norm": 1.5197752714157104, |
|
"learning_rate": 9.975691546222673e-06, |
|
"loss": 0.1199, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 3.120899718837863, |
|
"grad_norm": 1.1978933811187744, |
|
"learning_rate": 9.975618327988404e-06, |
|
"loss": 0.0975, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 3.1302717900656045, |
|
"grad_norm": 1.0790154933929443, |
|
"learning_rate": 9.975545109754134e-06, |
|
"loss": 0.1078, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 3.139643861293346, |
|
"grad_norm": 1.7810611724853516, |
|
"learning_rate": 9.975471891519865e-06, |
|
"loss": 0.1065, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 3.1490159325210874, |
|
"grad_norm": 1.2899665832519531, |
|
"learning_rate": 9.975398673285596e-06, |
|
"loss": 0.1104, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 3.1583880037488283, |
|
"grad_norm": 1.1923859119415283, |
|
"learning_rate": 9.975325455051327e-06, |
|
"loss": 0.1143, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 3.1677600749765698, |
|
"grad_norm": 1.428306221961975, |
|
"learning_rate": 9.975252236817057e-06, |
|
"loss": 0.101, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 3.177132146204311, |
|
"grad_norm": 1.323941946029663, |
|
"learning_rate": 9.97517901858279e-06, |
|
"loss": 0.1115, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 3.1865042174320526, |
|
"grad_norm": 1.4079722166061401, |
|
"learning_rate": 9.97510580034852e-06, |
|
"loss": 0.1032, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.195876288659794, |
|
"grad_norm": 1.2919671535491943, |
|
"learning_rate": 9.97503258211425e-06, |
|
"loss": 0.1145, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 3.205248359887535, |
|
"grad_norm": 1.1800559759140015, |
|
"learning_rate": 9.974959363879982e-06, |
|
"loss": 0.106, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 3.2146204311152764, |
|
"grad_norm": 1.5425052642822266, |
|
"learning_rate": 9.974886145645713e-06, |
|
"loss": 0.1156, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 3.223992502343018, |
|
"grad_norm": 1.7271355390548706, |
|
"learning_rate": 9.974812927411443e-06, |
|
"loss": 0.1121, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 3.2333645735707592, |
|
"grad_norm": 1.3295711278915405, |
|
"learning_rate": 9.974739709177174e-06, |
|
"loss": 0.1072, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 3.2427366447985007, |
|
"grad_norm": 1.658498764038086, |
|
"learning_rate": 9.974666490942905e-06, |
|
"loss": 0.1131, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 3.2521087160262416, |
|
"grad_norm": 1.6077649593353271, |
|
"learning_rate": 9.974593272708636e-06, |
|
"loss": 0.1143, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 3.261480787253983, |
|
"grad_norm": 1.4552775621414185, |
|
"learning_rate": 9.974520054474366e-06, |
|
"loss": 0.1065, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 3.2708528584817245, |
|
"grad_norm": 1.586267113685608, |
|
"learning_rate": 9.974446836240099e-06, |
|
"loss": 0.1137, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 3.280224929709466, |
|
"grad_norm": 0.9890511631965637, |
|
"learning_rate": 9.97437361800583e-06, |
|
"loss": 0.103, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.280224929709466, |
|
"eval_loss": 0.04644956439733505, |
|
"eval_pearson_cosine": 0.760254442691803, |
|
"eval_pearson_dot": 0.6812557578086853, |
|
"eval_pearson_euclidean": 0.7475454807281494, |
|
"eval_pearson_manhattan": 0.7483712434768677, |
|
"eval_runtime": 22.2407, |
|
"eval_samples_per_second": 67.444, |
|
"eval_spearman_cosine": 0.7642516190492565, |
|
"eval_spearman_dot": 0.6795590047108491, |
|
"eval_spearman_euclidean": 0.7527436591109528, |
|
"eval_spearman_manhattan": 0.7534967017417152, |
|
"eval_steps_per_second": 8.453, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.2895970009372073, |
|
"grad_norm": 1.4361557960510254, |
|
"learning_rate": 9.97430039977156e-06, |
|
"loss": 0.1078, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 3.2989690721649483, |
|
"grad_norm": 1.307634949684143, |
|
"learning_rate": 9.974227181537291e-06, |
|
"loss": 0.105, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 3.3083411433926897, |
|
"grad_norm": 1.103812336921692, |
|
"learning_rate": 9.974153963303022e-06, |
|
"loss": 0.1021, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 3.317713214620431, |
|
"grad_norm": 1.485766887664795, |
|
"learning_rate": 9.974080745068753e-06, |
|
"loss": 0.1055, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 3.3270852858481725, |
|
"grad_norm": 1.4017934799194336, |
|
"learning_rate": 9.974007526834483e-06, |
|
"loss": 0.0991, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 3.336457357075914, |
|
"grad_norm": 1.1994048357009888, |
|
"learning_rate": 9.973934308600214e-06, |
|
"loss": 0.1176, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 3.345829428303655, |
|
"grad_norm": 1.0661845207214355, |
|
"learning_rate": 9.973861090365947e-06, |
|
"loss": 0.1036, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 3.3552014995313963, |
|
"grad_norm": 1.273992896080017, |
|
"learning_rate": 9.973787872131676e-06, |
|
"loss": 0.1069, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 3.3645735707591378, |
|
"grad_norm": 1.157599687576294, |
|
"learning_rate": 9.973714653897406e-06, |
|
"loss": 0.1154, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 3.373945641986879, |
|
"grad_norm": 1.567265272140503, |
|
"learning_rate": 9.973641435663139e-06, |
|
"loss": 0.1104, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.3833177132146206, |
|
"grad_norm": 1.509450078010559, |
|
"learning_rate": 9.97356821742887e-06, |
|
"loss": 0.1123, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 3.3926897844423616, |
|
"grad_norm": 1.6206624507904053, |
|
"learning_rate": 9.9734949991946e-06, |
|
"loss": 0.0915, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 3.402061855670103, |
|
"grad_norm": 1.3384416103363037, |
|
"learning_rate": 9.973421780960331e-06, |
|
"loss": 0.1286, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 3.4114339268978444, |
|
"grad_norm": 1.4834225177764893, |
|
"learning_rate": 9.973348562726062e-06, |
|
"loss": 0.1129, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 3.420805998125586, |
|
"grad_norm": 1.486007809638977, |
|
"learning_rate": 9.973275344491793e-06, |
|
"loss": 0.1037, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 3.4301780693533273, |
|
"grad_norm": 1.5038363933563232, |
|
"learning_rate": 9.973202126257523e-06, |
|
"loss": 0.104, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 3.4395501405810682, |
|
"grad_norm": 1.3018808364868164, |
|
"learning_rate": 9.973128908023256e-06, |
|
"loss": 0.1068, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 3.4489222118088096, |
|
"grad_norm": 1.733067512512207, |
|
"learning_rate": 9.973055689788987e-06, |
|
"loss": 0.1011, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 3.458294283036551, |
|
"grad_norm": 1.3246439695358276, |
|
"learning_rate": 9.972982471554716e-06, |
|
"loss": 0.0989, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 3.4676663542642925, |
|
"grad_norm": 1.7354522943496704, |
|
"learning_rate": 9.972909253320448e-06, |
|
"loss": 0.1174, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 3.477038425492034, |
|
"grad_norm": 1.5907713174819946, |
|
"learning_rate": 9.972836035086179e-06, |
|
"loss": 0.1067, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 3.486410496719775, |
|
"grad_norm": 1.4252599477767944, |
|
"learning_rate": 9.97276281685191e-06, |
|
"loss": 0.1064, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 3.4957825679475163, |
|
"grad_norm": 1.3505686521530151, |
|
"learning_rate": 9.97268959861764e-06, |
|
"loss": 0.1168, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 3.5051546391752577, |
|
"grad_norm": 1.3022727966308594, |
|
"learning_rate": 9.972616380383373e-06, |
|
"loss": 0.1111, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 3.514526710402999, |
|
"grad_norm": 1.080246090888977, |
|
"learning_rate": 9.972543162149102e-06, |
|
"loss": 0.0982, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.514526710402999, |
|
"eval_loss": 0.04514094442129135, |
|
"eval_pearson_cosine": 0.7656620144844055, |
|
"eval_pearson_dot": 0.6821019649505615, |
|
"eval_pearson_euclidean": 0.7441372871398926, |
|
"eval_pearson_manhattan": 0.7452259659767151, |
|
"eval_runtime": 22.4556, |
|
"eval_samples_per_second": 66.798, |
|
"eval_spearman_cosine": 0.7694518035767811, |
|
"eval_spearman_dot": 0.6821838150409313, |
|
"eval_spearman_euclidean": 0.7516165395512334, |
|
"eval_spearman_manhattan": 0.7527176854515762, |
|
"eval_steps_per_second": 8.372, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.5238987816307406, |
|
"grad_norm": 1.3396129608154297, |
|
"learning_rate": 9.972469943914833e-06, |
|
"loss": 0.1145, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 3.5332708528584815, |
|
"grad_norm": 1.5277647972106934, |
|
"learning_rate": 9.972396725680565e-06, |
|
"loss": 0.1101, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 3.542642924086223, |
|
"grad_norm": 1.8469972610473633, |
|
"learning_rate": 9.972323507446296e-06, |
|
"loss": 0.1129, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 3.5520149953139644, |
|
"grad_norm": 1.2464599609375, |
|
"learning_rate": 9.972250289212027e-06, |
|
"loss": 0.1103, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 3.561387066541706, |
|
"grad_norm": 1.7863965034484863, |
|
"learning_rate": 9.972177070977757e-06, |
|
"loss": 0.1084, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 3.570759137769447, |
|
"grad_norm": 1.3085591793060303, |
|
"learning_rate": 9.972103852743488e-06, |
|
"loss": 0.11, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 3.580131208997188, |
|
"grad_norm": 1.5875599384307861, |
|
"learning_rate": 9.972030634509219e-06, |
|
"loss": 0.1213, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 3.5895032802249296, |
|
"grad_norm": 1.2654856443405151, |
|
"learning_rate": 9.97195741627495e-06, |
|
"loss": 0.1045, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 3.598875351452671, |
|
"grad_norm": 1.4713581800460815, |
|
"learning_rate": 9.97188419804068e-06, |
|
"loss": 0.1123, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 3.6082474226804124, |
|
"grad_norm": 1.3559589385986328, |
|
"learning_rate": 9.971810979806413e-06, |
|
"loss": 0.1171, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 3.617619493908154, |
|
"grad_norm": 1.7482990026474, |
|
"learning_rate": 9.971737761572142e-06, |
|
"loss": 0.1141, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 3.626991565135895, |
|
"grad_norm": 1.7189960479736328, |
|
"learning_rate": 9.971664543337873e-06, |
|
"loss": 0.107, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 3.6363636363636362, |
|
"grad_norm": 1.8246538639068604, |
|
"learning_rate": 9.971591325103605e-06, |
|
"loss": 0.1161, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 3.6457357075913777, |
|
"grad_norm": 1.0778300762176514, |
|
"learning_rate": 9.971518106869336e-06, |
|
"loss": 0.1084, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 3.655107778819119, |
|
"grad_norm": 1.5588942766189575, |
|
"learning_rate": 9.971444888635066e-06, |
|
"loss": 0.1038, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.6644798500468605, |
|
"grad_norm": 1.3670451641082764, |
|
"learning_rate": 9.971371670400797e-06, |
|
"loss": 0.1069, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 3.6738519212746015, |
|
"grad_norm": 1.437696099281311, |
|
"learning_rate": 9.971298452166528e-06, |
|
"loss": 0.1129, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 3.683223992502343, |
|
"grad_norm": 1.39695143699646, |
|
"learning_rate": 9.971225233932259e-06, |
|
"loss": 0.1113, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 3.6925960637300843, |
|
"grad_norm": 1.3372693061828613, |
|
"learning_rate": 9.97115201569799e-06, |
|
"loss": 0.1042, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 3.7019681349578257, |
|
"grad_norm": 1.4336313009262085, |
|
"learning_rate": 9.971078797463722e-06, |
|
"loss": 0.1224, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 3.711340206185567, |
|
"grad_norm": 1.3641144037246704, |
|
"learning_rate": 9.971005579229453e-06, |
|
"loss": 0.1082, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 3.720712277413308, |
|
"grad_norm": 1.1231974363327026, |
|
"learning_rate": 9.970932360995183e-06, |
|
"loss": 0.1108, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 3.7300843486410495, |
|
"grad_norm": 1.0743800401687622, |
|
"learning_rate": 9.970859142760914e-06, |
|
"loss": 0.1148, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 3.739456419868791, |
|
"grad_norm": 1.5260711908340454, |
|
"learning_rate": 9.970785924526645e-06, |
|
"loss": 0.1248, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 3.7488284910965324, |
|
"grad_norm": 1.1183910369873047, |
|
"learning_rate": 9.970712706292376e-06, |
|
"loss": 0.0987, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.7488284910965324, |
|
"eval_loss": 0.046661876142024994, |
|
"eval_pearson_cosine": 0.7576525807380676, |
|
"eval_pearson_dot": 0.6644298434257507, |
|
"eval_pearson_euclidean": 0.7384845614433289, |
|
"eval_pearson_manhattan": 0.7396556735038757, |
|
"eval_runtime": 23.8808, |
|
"eval_samples_per_second": 62.812, |
|
"eval_spearman_cosine": 0.7607075839895016, |
|
"eval_spearman_dot": 0.6622737418861694, |
|
"eval_spearman_euclidean": 0.7433752629911805, |
|
"eval_spearman_manhattan": 0.7446298314535014, |
|
"eval_steps_per_second": 7.872, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.758200562324274, |
|
"grad_norm": 1.3608311414718628, |
|
"learning_rate": 9.970639488058106e-06, |
|
"loss": 0.1179, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 3.7675726335520148, |
|
"grad_norm": 1.6313430070877075, |
|
"learning_rate": 9.970566269823839e-06, |
|
"loss": 0.1186, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 3.776944704779756, |
|
"grad_norm": 1.4092051982879639, |
|
"learning_rate": 9.970493051589568e-06, |
|
"loss": 0.1048, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 3.7863167760074976, |
|
"grad_norm": 1.4106525182724, |
|
"learning_rate": 9.970419833355299e-06, |
|
"loss": 0.1233, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 3.795688847235239, |
|
"grad_norm": 1.498146891593933, |
|
"learning_rate": 9.970346615121031e-06, |
|
"loss": 0.1164, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 3.8050609184629804, |
|
"grad_norm": 1.68582284450531, |
|
"learning_rate": 9.970273396886762e-06, |
|
"loss": 0.1194, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 3.8144329896907214, |
|
"grad_norm": 1.329270362854004, |
|
"learning_rate": 9.970200178652493e-06, |
|
"loss": 0.1001, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 3.823805060918463, |
|
"grad_norm": 1.6010513305664062, |
|
"learning_rate": 9.970126960418223e-06, |
|
"loss": 0.107, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 3.8331771321462043, |
|
"grad_norm": 1.213576078414917, |
|
"learning_rate": 9.970053742183954e-06, |
|
"loss": 0.1108, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 3.8425492033739457, |
|
"grad_norm": 1.585524320602417, |
|
"learning_rate": 9.969980523949685e-06, |
|
"loss": 0.1079, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 3.851921274601687, |
|
"grad_norm": 1.6043713092803955, |
|
"learning_rate": 9.969907305715416e-06, |
|
"loss": 0.1141, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 3.861293345829428, |
|
"grad_norm": 1.3566473722457886, |
|
"learning_rate": 9.969834087481146e-06, |
|
"loss": 0.1148, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 3.8706654170571695, |
|
"grad_norm": 1.390787124633789, |
|
"learning_rate": 9.969760869246879e-06, |
|
"loss": 0.1024, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 3.880037488284911, |
|
"grad_norm": 1.689005970954895, |
|
"learning_rate": 9.96968765101261e-06, |
|
"loss": 0.111, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 3.8894095595126523, |
|
"grad_norm": 1.850071907043457, |
|
"learning_rate": 9.96961443277834e-06, |
|
"loss": 0.1097, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 3.8987816307403937, |
|
"grad_norm": 1.4834603071212769, |
|
"learning_rate": 9.969541214544071e-06, |
|
"loss": 0.1084, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 3.9081537019681347, |
|
"grad_norm": 1.3408997058868408, |
|
"learning_rate": 9.969467996309802e-06, |
|
"loss": 0.1194, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 3.917525773195876, |
|
"grad_norm": 1.3920304775238037, |
|
"learning_rate": 9.969394778075533e-06, |
|
"loss": 0.1091, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 3.9268978444236176, |
|
"grad_norm": 1.0026508569717407, |
|
"learning_rate": 9.969321559841263e-06, |
|
"loss": 0.119, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 3.936269915651359, |
|
"grad_norm": 1.7984665632247925, |
|
"learning_rate": 9.969248341606996e-06, |
|
"loss": 0.1065, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.9456419868791004, |
|
"grad_norm": 1.6500909328460693, |
|
"learning_rate": 9.969175123372725e-06, |
|
"loss": 0.1083, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 3.9550140581068414, |
|
"grad_norm": 1.7580713033676147, |
|
"learning_rate": 9.969101905138456e-06, |
|
"loss": 0.1237, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 3.964386129334583, |
|
"grad_norm": 1.8374171257019043, |
|
"learning_rate": 9.969028686904188e-06, |
|
"loss": 0.1003, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 3.973758200562324, |
|
"grad_norm": 1.5857341289520264, |
|
"learning_rate": 9.968955468669919e-06, |
|
"loss": 0.1012, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 3.9831302717900656, |
|
"grad_norm": 1.627947211265564, |
|
"learning_rate": 9.96888225043565e-06, |
|
"loss": 0.1111, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.9831302717900656, |
|
"eval_loss": 0.04063473269343376, |
|
"eval_pearson_cosine": 0.7690664529800415, |
|
"eval_pearson_dot": 0.6998196840286255, |
|
"eval_pearson_euclidean": 0.7456687092781067, |
|
"eval_pearson_manhattan": 0.7471497058868408, |
|
"eval_runtime": 23.0817, |
|
"eval_samples_per_second": 64.986, |
|
"eval_spearman_cosine": 0.7702784084250337, |
|
"eval_spearman_dot": 0.7005907360024843, |
|
"eval_spearman_euclidean": 0.7509877657044322, |
|
"eval_spearman_manhattan": 0.7524785559548752, |
|
"eval_steps_per_second": 8.145, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.992502343017807, |
|
"grad_norm": 1.3161486387252808, |
|
"learning_rate": 9.96880903220138e-06, |
|
"loss": 0.1114, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 4.001874414245548, |
|
"grad_norm": 0.9556475281715393, |
|
"learning_rate": 9.968735813967111e-06, |
|
"loss": 0.1141, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 4.01124648547329, |
|
"grad_norm": 1.0041595697402954, |
|
"learning_rate": 9.968662595732842e-06, |
|
"loss": 0.0807, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 4.020618556701031, |
|
"grad_norm": 1.1500684022903442, |
|
"learning_rate": 9.968589377498573e-06, |
|
"loss": 0.0701, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 4.029990627928772, |
|
"grad_norm": 1.3963230848312378, |
|
"learning_rate": 9.968516159264305e-06, |
|
"loss": 0.0863, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 4.039362699156514, |
|
"grad_norm": 1.4251878261566162, |
|
"learning_rate": 9.968442941030036e-06, |
|
"loss": 0.0746, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 4.048734770384255, |
|
"grad_norm": 1.0674968957901, |
|
"learning_rate": 9.968369722795765e-06, |
|
"loss": 0.0667, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 4.0581068416119965, |
|
"grad_norm": 1.2465558052062988, |
|
"learning_rate": 9.968296504561497e-06, |
|
"loss": 0.0773, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 4.0674789128397375, |
|
"grad_norm": 1.409511923789978, |
|
"learning_rate": 9.968223286327228e-06, |
|
"loss": 0.0775, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 4.0768509840674785, |
|
"grad_norm": 1.2048633098602295, |
|
"learning_rate": 9.968150068092959e-06, |
|
"loss": 0.0885, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 4.08622305529522, |
|
"grad_norm": 1.3504215478897095, |
|
"learning_rate": 9.96807684985869e-06, |
|
"loss": 0.0802, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 4.095595126522961, |
|
"grad_norm": 1.5094915628433228, |
|
"learning_rate": 9.96800363162442e-06, |
|
"loss": 0.0889, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 4.104967197750703, |
|
"grad_norm": 1.2075692415237427, |
|
"learning_rate": 9.967930413390151e-06, |
|
"loss": 0.0718, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 4.114339268978444, |
|
"grad_norm": 1.476462960243225, |
|
"learning_rate": 9.967857195155882e-06, |
|
"loss": 0.0809, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 4.123711340206185, |
|
"grad_norm": 1.4811893701553345, |
|
"learning_rate": 9.967783976921614e-06, |
|
"loss": 0.082, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 4.133083411433927, |
|
"grad_norm": 1.3016406297683716, |
|
"learning_rate": 9.967710758687345e-06, |
|
"loss": 0.0867, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 4.142455482661668, |
|
"grad_norm": 1.3254297971725464, |
|
"learning_rate": 9.967637540453076e-06, |
|
"loss": 0.0783, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 4.15182755388941, |
|
"grad_norm": 1.7814503908157349, |
|
"learning_rate": 9.967564322218806e-06, |
|
"loss": 0.0812, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 4.161199625117151, |
|
"grad_norm": 1.3375070095062256, |
|
"learning_rate": 9.967491103984537e-06, |
|
"loss": 0.0835, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 4.170571696344892, |
|
"grad_norm": 1.3573247194290161, |
|
"learning_rate": 9.967417885750268e-06, |
|
"loss": 0.0772, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 4.179943767572634, |
|
"grad_norm": 1.601321816444397, |
|
"learning_rate": 9.967344667515999e-06, |
|
"loss": 0.0785, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 4.189315838800375, |
|
"grad_norm": 1.0777158737182617, |
|
"learning_rate": 9.96727144928173e-06, |
|
"loss": 0.0789, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 4.1986879100281165, |
|
"grad_norm": 1.717281699180603, |
|
"learning_rate": 9.967198231047462e-06, |
|
"loss": 0.0876, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 4.2080599812558575, |
|
"grad_norm": 1.6537655591964722, |
|
"learning_rate": 9.967125012813191e-06, |
|
"loss": 0.0859, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 4.217432052483598, |
|
"grad_norm": 1.3347113132476807, |
|
"learning_rate": 9.967051794578922e-06, |
|
"loss": 0.0888, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.217432052483598, |
|
"eval_loss": 0.042121224105358124, |
|
"eval_pearson_cosine": 0.7580196857452393, |
|
"eval_pearson_dot": 0.6874213814735413, |
|
"eval_pearson_euclidean": 0.740117073059082, |
|
"eval_pearson_manhattan": 0.7411655187606812, |
|
"eval_runtime": 22.046, |
|
"eval_samples_per_second": 68.04, |
|
"eval_spearman_cosine": 0.7598083870591178, |
|
"eval_spearman_dot": 0.6866180590359211, |
|
"eval_spearman_euclidean": 0.7457408658977246, |
|
"eval_spearman_manhattan": 0.7467901472090236, |
|
"eval_steps_per_second": 8.528, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.22680412371134, |
|
"grad_norm": 1.283334732055664, |
|
"learning_rate": 9.966978576344654e-06, |
|
"loss": 0.0824, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 4.236176194939081, |
|
"grad_norm": 1.4807559251785278, |
|
"learning_rate": 9.966905358110385e-06, |
|
"loss": 0.0812, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 4.245548266166823, |
|
"grad_norm": 1.1873483657836914, |
|
"learning_rate": 9.966832139876116e-06, |
|
"loss": 0.0788, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 4.254920337394564, |
|
"grad_norm": 1.27379310131073, |
|
"learning_rate": 9.966758921641846e-06, |
|
"loss": 0.0802, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 4.264292408622305, |
|
"grad_norm": 1.3721706867218018, |
|
"learning_rate": 9.966685703407577e-06, |
|
"loss": 0.0776, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 4.273664479850047, |
|
"grad_norm": 1.4129197597503662, |
|
"learning_rate": 9.966612485173308e-06, |
|
"loss": 0.0924, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 4.283036551077788, |
|
"grad_norm": 1.453730821609497, |
|
"learning_rate": 9.966539266939039e-06, |
|
"loss": 0.0823, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 4.29240862230553, |
|
"grad_norm": 1.4608802795410156, |
|
"learning_rate": 9.966466048704771e-06, |
|
"loss": 0.0806, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 4.301780693533271, |
|
"grad_norm": 1.0814175605773926, |
|
"learning_rate": 9.966392830470502e-06, |
|
"loss": 0.0781, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 4.311152764761012, |
|
"grad_norm": 1.9891834259033203, |
|
"learning_rate": 9.966319612236233e-06, |
|
"loss": 0.0792, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 4.320524835988754, |
|
"grad_norm": 0.7774847745895386, |
|
"learning_rate": 9.966246394001963e-06, |
|
"loss": 0.0734, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 4.329896907216495, |
|
"grad_norm": 2.0921082496643066, |
|
"learning_rate": 9.966173175767694e-06, |
|
"loss": 0.0789, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 4.339268978444236, |
|
"grad_norm": 1.4378306865692139, |
|
"learning_rate": 9.966099957533425e-06, |
|
"loss": 0.0829, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 4.348641049671977, |
|
"grad_norm": 1.5577812194824219, |
|
"learning_rate": 9.966026739299156e-06, |
|
"loss": 0.0782, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 4.358013120899718, |
|
"grad_norm": 1.8791301250457764, |
|
"learning_rate": 9.965953521064888e-06, |
|
"loss": 0.088, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 4.36738519212746, |
|
"grad_norm": 0.8537359833717346, |
|
"learning_rate": 9.965880302830617e-06, |
|
"loss": 0.0766, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 4.376757263355201, |
|
"grad_norm": 1.258042573928833, |
|
"learning_rate": 9.965807084596348e-06, |
|
"loss": 0.0877, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 4.386129334582943, |
|
"grad_norm": 1.5519142150878906, |
|
"learning_rate": 9.96573386636208e-06, |
|
"loss": 0.0881, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 4.395501405810684, |
|
"grad_norm": 1.1437076330184937, |
|
"learning_rate": 9.965660648127811e-06, |
|
"loss": 0.0816, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 4.404873477038425, |
|
"grad_norm": 1.3333864212036133, |
|
"learning_rate": 9.965587429893542e-06, |
|
"loss": 0.0818, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 4.414245548266167, |
|
"grad_norm": 1.403075098991394, |
|
"learning_rate": 9.965514211659273e-06, |
|
"loss": 0.0771, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 4.423617619493908, |
|
"grad_norm": 1.3652963638305664, |
|
"learning_rate": 9.965440993425003e-06, |
|
"loss": 0.0692, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 4.43298969072165, |
|
"grad_norm": 1.4429869651794434, |
|
"learning_rate": 9.965367775190734e-06, |
|
"loss": 0.0846, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 4.442361761949391, |
|
"grad_norm": 1.291710376739502, |
|
"learning_rate": 9.965294556956465e-06, |
|
"loss": 0.0796, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 4.451733833177133, |
|
"grad_norm": 1.4110385179519653, |
|
"learning_rate": 9.965221338722196e-06, |
|
"loss": 0.0756, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 4.451733833177133, |
|
"eval_loss": 0.039456192404031754, |
|
"eval_pearson_cosine": 0.7664028406143188, |
|
"eval_pearson_dot": 0.7008457779884338, |
|
"eval_pearson_euclidean": 0.7418538928031921, |
|
"eval_pearson_manhattan": 0.7431594133377075, |
|
"eval_runtime": 23.3602, |
|
"eval_samples_per_second": 64.212, |
|
"eval_spearman_cosine": 0.7673929323503452, |
|
"eval_spearman_dot": 0.7011750025269451, |
|
"eval_spearman_euclidean": 0.7464768579915497, |
|
"eval_spearman_manhattan": 0.7479944496608657, |
|
"eval_steps_per_second": 8.048, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 4.4611059044048735, |
|
"grad_norm": 1.1584782600402832, |
|
"learning_rate": 9.965148120487928e-06, |
|
"loss": 0.0834, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 4.4704779756326145, |
|
"grad_norm": 1.2065712213516235, |
|
"learning_rate": 9.965074902253659e-06, |
|
"loss": 0.0865, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 4.479850046860356, |
|
"grad_norm": 1.3458271026611328, |
|
"learning_rate": 9.965001684019388e-06, |
|
"loss": 0.0764, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 4.489222118088097, |
|
"grad_norm": 2.0091888904571533, |
|
"learning_rate": 9.96492846578512e-06, |
|
"loss": 0.0773, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 4.498594189315839, |
|
"grad_norm": 1.3832370042800903, |
|
"learning_rate": 9.964855247550851e-06, |
|
"loss": 0.0806, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 4.50796626054358, |
|
"grad_norm": 1.4656741619110107, |
|
"learning_rate": 9.964782029316582e-06, |
|
"loss": 0.0852, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 4.517338331771321, |
|
"grad_norm": 1.3915668725967407, |
|
"learning_rate": 9.964708811082312e-06, |
|
"loss": 0.086, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 4.526710402999063, |
|
"grad_norm": 1.2182085514068604, |
|
"learning_rate": 9.964635592848043e-06, |
|
"loss": 0.0777, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 4.536082474226804, |
|
"grad_norm": 1.2041029930114746, |
|
"learning_rate": 9.964562374613774e-06, |
|
"loss": 0.0738, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 4.545454545454545, |
|
"grad_norm": 1.289475917816162, |
|
"learning_rate": 9.964489156379505e-06, |
|
"loss": 0.0723, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 4.554826616682287, |
|
"grad_norm": 1.8206441402435303, |
|
"learning_rate": 9.964415938145237e-06, |
|
"loss": 0.0823, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 4.564198687910028, |
|
"grad_norm": 1.393254280090332, |
|
"learning_rate": 9.964342719910968e-06, |
|
"loss": 0.0869, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 4.57357075913777, |
|
"grad_norm": 1.6424909830093384, |
|
"learning_rate": 9.964269501676699e-06, |
|
"loss": 0.0721, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 4.582942830365511, |
|
"grad_norm": 1.6760517358779907, |
|
"learning_rate": 9.96419628344243e-06, |
|
"loss": 0.0849, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 4.592314901593252, |
|
"grad_norm": 1.4797537326812744, |
|
"learning_rate": 9.96412306520816e-06, |
|
"loss": 0.0815, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 4.6016869728209935, |
|
"grad_norm": 1.3184549808502197, |
|
"learning_rate": 9.964049846973891e-06, |
|
"loss": 0.0875, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 4.6110590440487345, |
|
"grad_norm": 1.0524438619613647, |
|
"learning_rate": 9.963976628739622e-06, |
|
"loss": 0.0821, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 4.620431115276476, |
|
"grad_norm": 0.8284000158309937, |
|
"learning_rate": 9.963903410505354e-06, |
|
"loss": 0.0737, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 4.629803186504217, |
|
"grad_norm": 1.2979810237884521, |
|
"learning_rate": 9.963830192271085e-06, |
|
"loss": 0.1031, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 4.639175257731958, |
|
"grad_norm": 1.2484486103057861, |
|
"learning_rate": 9.963756974036814e-06, |
|
"loss": 0.0853, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 4.6485473289597, |
|
"grad_norm": 1.4267854690551758, |
|
"learning_rate": 9.963683755802546e-06, |
|
"loss": 0.0784, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 4.657919400187441, |
|
"grad_norm": 1.2631357908248901, |
|
"learning_rate": 9.963610537568277e-06, |
|
"loss": 0.0814, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 4.667291471415183, |
|
"grad_norm": 1.5679900646209717, |
|
"learning_rate": 9.963537319334008e-06, |
|
"loss": 0.0851, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 4.676663542642924, |
|
"grad_norm": 1.216604471206665, |
|
"learning_rate": 9.963464101099739e-06, |
|
"loss": 0.0747, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 4.686035613870665, |
|
"grad_norm": 1.3772624731063843, |
|
"learning_rate": 9.96339088286547e-06, |
|
"loss": 0.0871, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.686035613870665, |
|
"eval_loss": 0.041086822748184204, |
|
"eval_pearson_cosine": 0.7587878704071045, |
|
"eval_pearson_dot": 0.6872098445892334, |
|
"eval_pearson_euclidean": 0.7388917207717896, |
|
"eval_pearson_manhattan": 0.7404583692550659, |
|
"eval_runtime": 22.5042, |
|
"eval_samples_per_second": 66.654, |
|
"eval_spearman_cosine": 0.7603871650644157, |
|
"eval_spearman_dot": 0.6866960900397536, |
|
"eval_spearman_euclidean": 0.7440960862957542, |
|
"eval_spearman_manhattan": 0.745568766414613, |
|
"eval_steps_per_second": 8.354, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.695407685098407, |
|
"grad_norm": 1.6077407598495483, |
|
"learning_rate": 9.9633176646312e-06, |
|
"loss": 0.0993, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 4.704779756326148, |
|
"grad_norm": 1.206281065940857, |
|
"learning_rate": 9.963244446396931e-06, |
|
"loss": 0.082, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 4.71415182755389, |
|
"grad_norm": 1.168562650680542, |
|
"learning_rate": 9.963171228162662e-06, |
|
"loss": 0.075, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 4.723523898781631, |
|
"grad_norm": 1.0943313837051392, |
|
"learning_rate": 9.963098009928394e-06, |
|
"loss": 0.0907, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 4.7328959700093725, |
|
"grad_norm": 1.1832613945007324, |
|
"learning_rate": 9.963024791694125e-06, |
|
"loss": 0.0776, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 4.742268041237113, |
|
"grad_norm": 1.1568524837493896, |
|
"learning_rate": 9.962951573459856e-06, |
|
"loss": 0.0956, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 4.751640112464854, |
|
"grad_norm": 1.4179660081863403, |
|
"learning_rate": 9.962878355225586e-06, |
|
"loss": 0.079, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 4.761012183692596, |
|
"grad_norm": 1.56465744972229, |
|
"learning_rate": 9.962805136991317e-06, |
|
"loss": 0.0708, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 4.770384254920337, |
|
"grad_norm": 1.47963547706604, |
|
"learning_rate": 9.962731918757048e-06, |
|
"loss": 0.0817, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 4.779756326148079, |
|
"grad_norm": 1.4979149103164673, |
|
"learning_rate": 9.962658700522779e-06, |
|
"loss": 0.0859, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 4.78912839737582, |
|
"grad_norm": 1.0254287719726562, |
|
"learning_rate": 9.962585482288511e-06, |
|
"loss": 0.077, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 4.798500468603561, |
|
"grad_norm": 1.5644149780273438, |
|
"learning_rate": 9.96251226405424e-06, |
|
"loss": 0.0775, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 4.807872539831303, |
|
"grad_norm": 1.2777773141860962, |
|
"learning_rate": 9.962439045819971e-06, |
|
"loss": 0.0734, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 4.817244611059044, |
|
"grad_norm": 1.130614995956421, |
|
"learning_rate": 9.962365827585703e-06, |
|
"loss": 0.082, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 4.826616682286786, |
|
"grad_norm": 0.9016211032867432, |
|
"learning_rate": 9.962292609351434e-06, |
|
"loss": 0.08, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 4.835988753514527, |
|
"grad_norm": 1.4159069061279297, |
|
"learning_rate": 9.962219391117165e-06, |
|
"loss": 0.0841, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 4.845360824742268, |
|
"grad_norm": 1.600085973739624, |
|
"learning_rate": 9.962146172882896e-06, |
|
"loss": 0.0766, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 4.85473289597001, |
|
"grad_norm": 1.4401110410690308, |
|
"learning_rate": 9.962072954648626e-06, |
|
"loss": 0.0869, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 4.8641049671977505, |
|
"grad_norm": 1.4603939056396484, |
|
"learning_rate": 9.961999736414357e-06, |
|
"loss": 0.077, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 4.873477038425492, |
|
"grad_norm": 1.0498592853546143, |
|
"learning_rate": 9.961926518180088e-06, |
|
"loss": 0.0673, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 4.882849109653233, |
|
"grad_norm": 1.9157027006149292, |
|
"learning_rate": 9.96185329994582e-06, |
|
"loss": 0.0865, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 4.892221180880974, |
|
"grad_norm": 1.0183812379837036, |
|
"learning_rate": 9.961780081711551e-06, |
|
"loss": 0.0809, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 4.901593252108716, |
|
"grad_norm": 1.4563605785369873, |
|
"learning_rate": 9.96170686347728e-06, |
|
"loss": 0.086, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 4.910965323336457, |
|
"grad_norm": 1.1856083869934082, |
|
"learning_rate": 9.961633645243013e-06, |
|
"loss": 0.0802, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 4.920337394564199, |
|
"grad_norm": 1.3724653720855713, |
|
"learning_rate": 9.961560427008743e-06, |
|
"loss": 0.0839, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 4.920337394564199, |
|
"eval_loss": 0.04000931978225708, |
|
"eval_pearson_cosine": 0.7643105387687683, |
|
"eval_pearson_dot": 0.6954823732376099, |
|
"eval_pearson_euclidean": 0.7297146320343018, |
|
"eval_pearson_manhattan": 0.7310500144958496, |
|
"eval_runtime": 21.985, |
|
"eval_samples_per_second": 68.228, |
|
"eval_spearman_cosine": 0.7658903505068073, |
|
"eval_spearman_dot": 0.6968591888025883, |
|
"eval_spearman_euclidean": 0.7350736410651904, |
|
"eval_spearman_manhattan": 0.7366836781540181, |
|
"eval_steps_per_second": 8.551, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 4.92970946579194, |
|
"grad_norm": 1.7151585817337036, |
|
"learning_rate": 9.961487208774474e-06, |
|
"loss": 0.0791, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 4.939081537019681, |
|
"grad_norm": 1.6940653324127197, |
|
"learning_rate": 9.961413990540205e-06, |
|
"loss": 0.0893, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 4.948453608247423, |
|
"grad_norm": 1.5087528228759766, |
|
"learning_rate": 9.961340772305936e-06, |
|
"loss": 0.0801, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 4.957825679475164, |
|
"grad_norm": 1.2038474082946777, |
|
"learning_rate": 9.961267554071666e-06, |
|
"loss": 0.0791, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 4.967197750702906, |
|
"grad_norm": 1.4044734239578247, |
|
"learning_rate": 9.961194335837397e-06, |
|
"loss": 0.0832, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 4.976569821930647, |
|
"grad_norm": 1.057298183441162, |
|
"learning_rate": 9.96112111760313e-06, |
|
"loss": 0.0869, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 4.985941893158388, |
|
"grad_norm": 1.4192899465560913, |
|
"learning_rate": 9.96104789936886e-06, |
|
"loss": 0.0837, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 4.9953139643861295, |
|
"grad_norm": 1.7742289304733276, |
|
"learning_rate": 9.960974681134591e-06, |
|
"loss": 0.0858, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 5.0046860356138705, |
|
"grad_norm": 0.9188485741615295, |
|
"learning_rate": 9.960901462900322e-06, |
|
"loss": 0.0684, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 5.014058106841612, |
|
"grad_norm": 1.6541597843170166, |
|
"learning_rate": 9.960828244666052e-06, |
|
"loss": 0.0669, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 5.023430178069353, |
|
"grad_norm": 1.5705071687698364, |
|
"learning_rate": 9.960755026431783e-06, |
|
"loss": 0.0646, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 5.032802249297094, |
|
"grad_norm": 0.9007801413536072, |
|
"learning_rate": 9.960681808197514e-06, |
|
"loss": 0.0721, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 5.042174320524836, |
|
"grad_norm": 1.044138789176941, |
|
"learning_rate": 9.960608589963245e-06, |
|
"loss": 0.0585, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 5.051546391752577, |
|
"grad_norm": 1.455098032951355, |
|
"learning_rate": 9.960535371728977e-06, |
|
"loss": 0.0677, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 5.060918462980319, |
|
"grad_norm": 1.3480255603790283, |
|
"learning_rate": 9.960462153494708e-06, |
|
"loss": 0.0582, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 5.07029053420806, |
|
"grad_norm": 0.9733775854110718, |
|
"learning_rate": 9.960388935260437e-06, |
|
"loss": 0.057, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 5.079662605435801, |
|
"grad_norm": 1.202635645866394, |
|
"learning_rate": 9.96031571702617e-06, |
|
"loss": 0.0642, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 5.089034676663543, |
|
"grad_norm": 1.2410409450531006, |
|
"learning_rate": 9.9602424987919e-06, |
|
"loss": 0.055, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 5.098406747891284, |
|
"grad_norm": 1.341126799583435, |
|
"learning_rate": 9.960169280557631e-06, |
|
"loss": 0.066, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 5.107778819119026, |
|
"grad_norm": 1.070065975189209, |
|
"learning_rate": 9.960096062323362e-06, |
|
"loss": 0.0565, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 5.117150890346767, |
|
"grad_norm": 1.5855072736740112, |
|
"learning_rate": 9.960022844089092e-06, |
|
"loss": 0.0613, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 5.126522961574508, |
|
"grad_norm": 0.7614333629608154, |
|
"learning_rate": 9.959949625854823e-06, |
|
"loss": 0.0572, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 5.1358950328022495, |
|
"grad_norm": 1.0969761610031128, |
|
"learning_rate": 9.959876407620554e-06, |
|
"loss": 0.0557, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 5.14526710402999, |
|
"grad_norm": 1.7454636096954346, |
|
"learning_rate": 9.959803189386286e-06, |
|
"loss": 0.0647, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 5.154639175257732, |
|
"grad_norm": 0.9625281691551208, |
|
"learning_rate": 9.959729971152017e-06, |
|
"loss": 0.0499, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.154639175257732, |
|
"eval_loss": 0.03924967721104622, |
|
"eval_pearson_cosine": 0.7608553767204285, |
|
"eval_pearson_dot": 0.6993385553359985, |
|
"eval_pearson_euclidean": 0.732108473777771, |
|
"eval_pearson_manhattan": 0.7334935069084167, |
|
"eval_runtime": 28.2448, |
|
"eval_samples_per_second": 53.107, |
|
"eval_spearman_cosine": 0.7615678141531256, |
|
"eval_spearman_dot": 0.6999177956469285, |
|
"eval_spearman_euclidean": 0.7378738640113753, |
|
"eval_spearman_manhattan": 0.7392624046122273, |
|
"eval_steps_per_second": 6.656, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.164011246485473, |
|
"grad_norm": 1.4280071258544922, |
|
"learning_rate": 9.959656752917748e-06, |
|
"loss": 0.0557, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 5.173383317713214, |
|
"grad_norm": 1.6271259784698486, |
|
"learning_rate": 9.959583534683479e-06, |
|
"loss": 0.0602, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 5.182755388940956, |
|
"grad_norm": 1.2609021663665771, |
|
"learning_rate": 9.95951031644921e-06, |
|
"loss": 0.0545, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 5.192127460168697, |
|
"grad_norm": 1.2945165634155273, |
|
"learning_rate": 9.95943709821494e-06, |
|
"loss": 0.0592, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 5.201499531396439, |
|
"grad_norm": 1.3600184917449951, |
|
"learning_rate": 9.959363879980671e-06, |
|
"loss": 0.0492, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 5.21087160262418, |
|
"grad_norm": 1.3210471868515015, |
|
"learning_rate": 9.959290661746403e-06, |
|
"loss": 0.0558, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 5.220243673851921, |
|
"grad_norm": 0.8935280442237854, |
|
"learning_rate": 9.959217443512134e-06, |
|
"loss": 0.0566, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 5.229615745079663, |
|
"grad_norm": 0.9014615416526794, |
|
"learning_rate": 9.959144225277863e-06, |
|
"loss": 0.0578, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 5.238987816307404, |
|
"grad_norm": 0.9144461750984192, |
|
"learning_rate": 9.959071007043596e-06, |
|
"loss": 0.0642, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 5.248359887535146, |
|
"grad_norm": 1.1306620836257935, |
|
"learning_rate": 9.958997788809326e-06, |
|
"loss": 0.0645, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 5.257731958762887, |
|
"grad_norm": 1.6353179216384888, |
|
"learning_rate": 9.958924570575057e-06, |
|
"loss": 0.0563, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 5.2671040299906275, |
|
"grad_norm": 1.0438508987426758, |
|
"learning_rate": 9.958851352340788e-06, |
|
"loss": 0.0554, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 5.276476101218369, |
|
"grad_norm": 1.0287367105484009, |
|
"learning_rate": 9.958778134106519e-06, |
|
"loss": 0.0586, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 5.28584817244611, |
|
"grad_norm": 1.0613245964050293, |
|
"learning_rate": 9.95870491587225e-06, |
|
"loss": 0.0634, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 5.295220243673852, |
|
"grad_norm": 1.489405632019043, |
|
"learning_rate": 9.95863169763798e-06, |
|
"loss": 0.0474, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 5.304592314901593, |
|
"grad_norm": 1.4497292041778564, |
|
"learning_rate": 9.95855847940371e-06, |
|
"loss": 0.056, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 5.313964386129334, |
|
"grad_norm": 1.2881600856781006, |
|
"learning_rate": 9.958485261169443e-06, |
|
"loss": 0.0561, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 5.323336457357076, |
|
"grad_norm": 1.4863743782043457, |
|
"learning_rate": 9.958412042935174e-06, |
|
"loss": 0.0562, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 5.332708528584817, |
|
"grad_norm": 1.325191855430603, |
|
"learning_rate": 9.958338824700903e-06, |
|
"loss": 0.0569, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 5.342080599812559, |
|
"grad_norm": 1.0650861263275146, |
|
"learning_rate": 9.958265606466636e-06, |
|
"loss": 0.0574, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 5.3514526710403, |
|
"grad_norm": 1.7255184650421143, |
|
"learning_rate": 9.958192388232366e-06, |
|
"loss": 0.055, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 5.360824742268041, |
|
"grad_norm": 0.8258642554283142, |
|
"learning_rate": 9.958119169998097e-06, |
|
"loss": 0.0509, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 5.370196813495783, |
|
"grad_norm": 1.2811216115951538, |
|
"learning_rate": 9.958045951763828e-06, |
|
"loss": 0.0585, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 5.379568884723524, |
|
"grad_norm": 1.2582824230194092, |
|
"learning_rate": 9.95797273352956e-06, |
|
"loss": 0.0589, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 5.3889409559512655, |
|
"grad_norm": 1.3511929512023926, |
|
"learning_rate": 9.95789951529529e-06, |
|
"loss": 0.0542, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 5.3889409559512655, |
|
"eval_loss": 0.03850702941417694, |
|
"eval_pearson_cosine": 0.7663590312004089, |
|
"eval_pearson_dot": 0.7060524225234985, |
|
"eval_pearson_euclidean": 0.7385671734809875, |
|
"eval_pearson_manhattan": 0.7399072647094727, |
|
"eval_runtime": 27.6896, |
|
"eval_samples_per_second": 54.172, |
|
"eval_spearman_cosine": 0.7668814587849042, |
|
"eval_spearman_dot": 0.706466499232552, |
|
"eval_spearman_euclidean": 0.744533534662993, |
|
"eval_spearman_manhattan": 0.7454034343244123, |
|
"eval_steps_per_second": 6.79, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 5.3983130271790065, |
|
"grad_norm": 1.3905717134475708, |
|
"learning_rate": 9.95782629706102e-06, |
|
"loss": 0.0583, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 5.4076850984067475, |
|
"grad_norm": 1.5047788619995117, |
|
"learning_rate": 9.957753078826752e-06, |
|
"loss": 0.0605, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 5.417057169634489, |
|
"grad_norm": 1.280427098274231, |
|
"learning_rate": 9.957679860592483e-06, |
|
"loss": 0.0584, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 5.42642924086223, |
|
"grad_norm": 1.3530281782150269, |
|
"learning_rate": 9.957606642358214e-06, |
|
"loss": 0.0591, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 5.435801312089972, |
|
"grad_norm": 1.0610909461975098, |
|
"learning_rate": 9.957533424123945e-06, |
|
"loss": 0.0546, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 5.445173383317713, |
|
"grad_norm": 0.9637224674224854, |
|
"learning_rate": 9.957460205889675e-06, |
|
"loss": 0.0641, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 5.454545454545454, |
|
"grad_norm": 1.3324577808380127, |
|
"learning_rate": 9.957386987655406e-06, |
|
"loss": 0.0599, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 5.463917525773196, |
|
"grad_norm": 0.9660161137580872, |
|
"learning_rate": 9.957313769421137e-06, |
|
"loss": 0.0591, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 5.473289597000937, |
|
"grad_norm": 1.128570556640625, |
|
"learning_rate": 9.95724055118687e-06, |
|
"loss": 0.0579, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 5.482661668228679, |
|
"grad_norm": 1.444172739982605, |
|
"learning_rate": 9.9571673329526e-06, |
|
"loss": 0.0636, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 5.49203373945642, |
|
"grad_norm": 1.3510165214538574, |
|
"learning_rate": 9.95709411471833e-06, |
|
"loss": 0.0631, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 5.501405810684162, |
|
"grad_norm": 1.0439740419387817, |
|
"learning_rate": 9.957020896484062e-06, |
|
"loss": 0.0635, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 5.510777881911903, |
|
"grad_norm": 1.15412175655365, |
|
"learning_rate": 9.956947678249792e-06, |
|
"loss": 0.0595, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 5.520149953139644, |
|
"grad_norm": 1.221147894859314, |
|
"learning_rate": 9.956874460015523e-06, |
|
"loss": 0.0552, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 5.5295220243673855, |
|
"grad_norm": 1.4210234880447388, |
|
"learning_rate": 9.956801241781254e-06, |
|
"loss": 0.0593, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 5.5388940955951265, |
|
"grad_norm": 1.1082103252410889, |
|
"learning_rate": 9.956728023546985e-06, |
|
"loss": 0.0535, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 5.548266166822868, |
|
"grad_norm": 0.8931286334991455, |
|
"learning_rate": 9.956654805312715e-06, |
|
"loss": 0.0556, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 5.557638238050609, |
|
"grad_norm": 1.5182912349700928, |
|
"learning_rate": 9.956581587078446e-06, |
|
"loss": 0.0583, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 5.56701030927835, |
|
"grad_norm": 1.2056432962417603, |
|
"learning_rate": 9.956508368844177e-06, |
|
"loss": 0.064, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 5.576382380506092, |
|
"grad_norm": 1.5039522647857666, |
|
"learning_rate": 9.95643515060991e-06, |
|
"loss": 0.0708, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 5.585754451733833, |
|
"grad_norm": 1.2651883363723755, |
|
"learning_rate": 9.95636193237564e-06, |
|
"loss": 0.0596, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 5.595126522961575, |
|
"grad_norm": 1.317690134048462, |
|
"learning_rate": 9.956288714141371e-06, |
|
"loss": 0.0713, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 5.604498594189316, |
|
"grad_norm": 0.9705867767333984, |
|
"learning_rate": 9.956215495907102e-06, |
|
"loss": 0.0699, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 5.613870665417057, |
|
"grad_norm": 1.4250271320343018, |
|
"learning_rate": 9.956142277672832e-06, |
|
"loss": 0.0595, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 5.623242736644799, |
|
"grad_norm": 1.0857118368148804, |
|
"learning_rate": 9.956069059438563e-06, |
|
"loss": 0.0555, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.623242736644799, |
|
"eval_loss": 0.03963544964790344, |
|
"eval_pearson_cosine": 0.7571043968200684, |
|
"eval_pearson_dot": 0.700376570224762, |
|
"eval_pearson_euclidean": 0.7279260158538818, |
|
"eval_pearson_manhattan": 0.729307234287262, |
|
"eval_runtime": 25.5449, |
|
"eval_samples_per_second": 58.72, |
|
"eval_spearman_cosine": 0.7579022153365402, |
|
"eval_spearman_dot": 0.6992710065203335, |
|
"eval_spearman_euclidean": 0.7330627821557505, |
|
"eval_spearman_manhattan": 0.7343750357819732, |
|
"eval_steps_per_second": 7.36, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.63261480787254, |
|
"grad_norm": 1.2122074365615845, |
|
"learning_rate": 9.955995841204294e-06, |
|
"loss": 0.0665, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 5.641986879100282, |
|
"grad_norm": 1.7832310199737549, |
|
"learning_rate": 9.955922622970026e-06, |
|
"loss": 0.063, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 5.651358950328023, |
|
"grad_norm": 1.1854170560836792, |
|
"learning_rate": 9.955849404735755e-06, |
|
"loss": 0.0573, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 5.660731021555764, |
|
"grad_norm": 1.6633968353271484, |
|
"learning_rate": 9.955776186501486e-06, |
|
"loss": 0.0549, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 5.670103092783505, |
|
"grad_norm": 1.31834077835083, |
|
"learning_rate": 9.955702968267219e-06, |
|
"loss": 0.0478, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 5.679475164011246, |
|
"grad_norm": 0.8284873962402344, |
|
"learning_rate": 9.95562975003295e-06, |
|
"loss": 0.0639, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 5.688847235238988, |
|
"grad_norm": 1.2393404245376587, |
|
"learning_rate": 9.95555653179868e-06, |
|
"loss": 0.0593, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 5.698219306466729, |
|
"grad_norm": 1.5327643156051636, |
|
"learning_rate": 9.95548331356441e-06, |
|
"loss": 0.0644, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 5.70759137769447, |
|
"grad_norm": 1.8985389471054077, |
|
"learning_rate": 9.955410095330142e-06, |
|
"loss": 0.0646, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 5.716963448922212, |
|
"grad_norm": 1.5896059274673462, |
|
"learning_rate": 9.955336877095872e-06, |
|
"loss": 0.0716, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 5.726335520149953, |
|
"grad_norm": 1.21624755859375, |
|
"learning_rate": 9.955263658861603e-06, |
|
"loss": 0.0559, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 5.735707591377695, |
|
"grad_norm": 1.3084664344787598, |
|
"learning_rate": 9.955190440627336e-06, |
|
"loss": 0.065, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 5.745079662605436, |
|
"grad_norm": 0.9755469560623169, |
|
"learning_rate": 9.955117222393066e-06, |
|
"loss": 0.0601, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 5.754451733833177, |
|
"grad_norm": 1.1662402153015137, |
|
"learning_rate": 9.955044004158797e-06, |
|
"loss": 0.0588, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 5.763823805060919, |
|
"grad_norm": 1.313323974609375, |
|
"learning_rate": 9.954970785924528e-06, |
|
"loss": 0.0667, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 5.77319587628866, |
|
"grad_norm": 1.4725874662399292, |
|
"learning_rate": 9.954897567690259e-06, |
|
"loss": 0.0619, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 5.782567947516402, |
|
"grad_norm": 1.3176454305648804, |
|
"learning_rate": 9.95482434945599e-06, |
|
"loss": 0.056, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 5.7919400187441425, |
|
"grad_norm": 1.0566222667694092, |
|
"learning_rate": 9.95475113122172e-06, |
|
"loss": 0.0587, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 5.8013120899718835, |
|
"grad_norm": 1.0623878240585327, |
|
"learning_rate": 9.95467791298745e-06, |
|
"loss": 0.0591, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 5.810684161199625, |
|
"grad_norm": 1.6217368841171265, |
|
"learning_rate": 9.954604694753183e-06, |
|
"loss": 0.0536, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 5.820056232427366, |
|
"grad_norm": 1.2574353218078613, |
|
"learning_rate": 9.954531476518912e-06, |
|
"loss": 0.0552, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 5.829428303655108, |
|
"grad_norm": 1.2605924606323242, |
|
"learning_rate": 9.954458258284643e-06, |
|
"loss": 0.0669, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 5.838800374882849, |
|
"grad_norm": 1.8283051252365112, |
|
"learning_rate": 9.954385040050375e-06, |
|
"loss": 0.0631, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 5.84817244611059, |
|
"grad_norm": 1.2457951307296753, |
|
"learning_rate": 9.954311821816106e-06, |
|
"loss": 0.0578, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 5.857544517338332, |
|
"grad_norm": 1.1618739366531372, |
|
"learning_rate": 9.954238603581837e-06, |
|
"loss": 0.0547, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 5.857544517338332, |
|
"eval_loss": 0.03839369863271713, |
|
"eval_pearson_cosine": 0.7663547396659851, |
|
"eval_pearson_dot": 0.7110079526901245, |
|
"eval_pearson_euclidean": 0.7369804978370667, |
|
"eval_pearson_manhattan": 0.738224983215332, |
|
"eval_runtime": 28.702, |
|
"eval_samples_per_second": 52.261, |
|
"eval_spearman_cosine": 0.766680322110213, |
|
"eval_spearman_dot": 0.7118792296635837, |
|
"eval_spearman_euclidean": 0.7420173359570077, |
|
"eval_spearman_manhattan": 0.7431811125331302, |
|
"eval_steps_per_second": 6.55, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 5.866916588566073, |
|
"grad_norm": 1.565491795539856, |
|
"learning_rate": 9.954165385347568e-06, |
|
"loss": 0.0634, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 5.876288659793815, |
|
"grad_norm": 1.412607192993164, |
|
"learning_rate": 9.954092167113298e-06, |
|
"loss": 0.0641, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 5.885660731021556, |
|
"grad_norm": 1.5475645065307617, |
|
"learning_rate": 9.95401894887903e-06, |
|
"loss": 0.058, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 5.895032802249297, |
|
"grad_norm": 1.6942791938781738, |
|
"learning_rate": 9.95394573064476e-06, |
|
"loss": 0.0668, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 5.904404873477039, |
|
"grad_norm": 1.286224603652954, |
|
"learning_rate": 9.953872512410492e-06, |
|
"loss": 0.058, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 5.91377694470478, |
|
"grad_norm": 1.5031893253326416, |
|
"learning_rate": 9.953799294176223e-06, |
|
"loss": 0.062, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 5.9231490159325215, |
|
"grad_norm": 1.416455864906311, |
|
"learning_rate": 9.953726075941952e-06, |
|
"loss": 0.0596, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 5.9325210871602625, |
|
"grad_norm": 1.3160662651062012, |
|
"learning_rate": 9.953652857707685e-06, |
|
"loss": 0.062, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 5.9418931583880035, |
|
"grad_norm": 0.9542105793952942, |
|
"learning_rate": 9.953579639473415e-06, |
|
"loss": 0.0645, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 5.951265229615745, |
|
"grad_norm": 1.4458489418029785, |
|
"learning_rate": 9.953506421239146e-06, |
|
"loss": 0.0563, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 5.960637300843486, |
|
"grad_norm": 1.0310072898864746, |
|
"learning_rate": 9.953433203004877e-06, |
|
"loss": 0.0567, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 5.970009372071228, |
|
"grad_norm": 1.4674971103668213, |
|
"learning_rate": 9.95335998477061e-06, |
|
"loss": 0.0579, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 5.979381443298969, |
|
"grad_norm": 1.229636311531067, |
|
"learning_rate": 9.953286766536338e-06, |
|
"loss": 0.0589, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 5.98875351452671, |
|
"grad_norm": 1.4654268026351929, |
|
"learning_rate": 9.95321354830207e-06, |
|
"loss": 0.0519, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 5.998125585754452, |
|
"grad_norm": 1.276367425918579, |
|
"learning_rate": 9.953140330067802e-06, |
|
"loss": 0.066, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 6.007497656982193, |
|
"grad_norm": 1.0710258483886719, |
|
"learning_rate": 9.953067111833532e-06, |
|
"loss": 0.0462, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 6.016869728209935, |
|
"grad_norm": 0.9316133856773376, |
|
"learning_rate": 9.952993893599263e-06, |
|
"loss": 0.044, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 6.026241799437676, |
|
"grad_norm": 0.8318607211112976, |
|
"learning_rate": 9.952920675364994e-06, |
|
"loss": 0.0399, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 6.035613870665417, |
|
"grad_norm": 0.9682859182357788, |
|
"learning_rate": 9.952847457130725e-06, |
|
"loss": 0.0371, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 6.044985941893159, |
|
"grad_norm": 0.8720560669898987, |
|
"learning_rate": 9.952774238896455e-06, |
|
"loss": 0.0453, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 6.0543580131209, |
|
"grad_norm": 0.7835734486579895, |
|
"learning_rate": 9.952701020662186e-06, |
|
"loss": 0.0475, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 6.0637300843486415, |
|
"grad_norm": 1.4373115301132202, |
|
"learning_rate": 9.952627802427917e-06, |
|
"loss": 0.0416, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 6.073102155576382, |
|
"grad_norm": 1.317517638206482, |
|
"learning_rate": 9.95255458419365e-06, |
|
"loss": 0.0425, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 6.082474226804123, |
|
"grad_norm": 1.1831910610198975, |
|
"learning_rate": 9.952481365959378e-06, |
|
"loss": 0.0471, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 6.091846298031865, |
|
"grad_norm": 1.0449994802474976, |
|
"learning_rate": 9.95240814772511e-06, |
|
"loss": 0.0476, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.091846298031865, |
|
"eval_loss": 0.03876839950680733, |
|
"eval_pearson_cosine": 0.7637665867805481, |
|
"eval_pearson_dot": 0.7007623910903931, |
|
"eval_pearson_euclidean": 0.7322614192962646, |
|
"eval_pearson_manhattan": 0.7338271141052246, |
|
"eval_runtime": 22.3296, |
|
"eval_samples_per_second": 67.175, |
|
"eval_spearman_cosine": 0.7641548541194557, |
|
"eval_spearman_dot": 0.7012776165056044, |
|
"eval_spearman_euclidean": 0.7377602855270703, |
|
"eval_spearman_manhattan": 0.73918298594716, |
|
"eval_steps_per_second": 8.419, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.101218369259606, |
|
"grad_norm": 0.7369022965431213, |
|
"learning_rate": 9.952334929490842e-06, |
|
"loss": 0.0364, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 6.110590440487348, |
|
"grad_norm": 0.8673484325408936, |
|
"learning_rate": 9.952261711256572e-06, |
|
"loss": 0.0498, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 6.119962511715089, |
|
"grad_norm": 1.5341424942016602, |
|
"learning_rate": 9.952188493022303e-06, |
|
"loss": 0.045, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 6.12933458294283, |
|
"grad_norm": 0.8899186253547668, |
|
"learning_rate": 9.952115274788034e-06, |
|
"loss": 0.0441, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 6.138706654170572, |
|
"grad_norm": 1.0708824396133423, |
|
"learning_rate": 9.952042056553765e-06, |
|
"loss": 0.0458, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 6.148078725398313, |
|
"grad_norm": 1.1551895141601562, |
|
"learning_rate": 9.951968838319495e-06, |
|
"loss": 0.0421, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 6.157450796626055, |
|
"grad_norm": 1.0832526683807373, |
|
"learning_rate": 9.951895620085226e-06, |
|
"loss": 0.0462, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 6.166822867853796, |
|
"grad_norm": 1.303536295890808, |
|
"learning_rate": 9.951822401850959e-06, |
|
"loss": 0.0423, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 6.176194939081537, |
|
"grad_norm": 1.2826794385910034, |
|
"learning_rate": 9.95174918361669e-06, |
|
"loss": 0.0463, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 6.185567010309279, |
|
"grad_norm": 1.0724890232086182, |
|
"learning_rate": 9.95167596538242e-06, |
|
"loss": 0.043, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 6.1949390815370196, |
|
"grad_norm": 0.9407768249511719, |
|
"learning_rate": 9.95160274714815e-06, |
|
"loss": 0.045, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 6.204311152764761, |
|
"grad_norm": 1.1686878204345703, |
|
"learning_rate": 9.951529528913882e-06, |
|
"loss": 0.0407, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 6.213683223992502, |
|
"grad_norm": 1.5972820520401, |
|
"learning_rate": 9.951456310679612e-06, |
|
"loss": 0.0449, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 6.223055295220243, |
|
"grad_norm": 0.7610195875167847, |
|
"learning_rate": 9.951383092445343e-06, |
|
"loss": 0.0397, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 6.232427366447985, |
|
"grad_norm": 1.02704656124115, |
|
"learning_rate": 9.951309874211075e-06, |
|
"loss": 0.0448, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 6.241799437675726, |
|
"grad_norm": 0.8035688400268555, |
|
"learning_rate": 9.951236655976805e-06, |
|
"loss": 0.0445, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 6.251171508903468, |
|
"grad_norm": 1.019539475440979, |
|
"learning_rate": 9.951163437742535e-06, |
|
"loss": 0.0452, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 6.260543580131209, |
|
"grad_norm": 1.662574291229248, |
|
"learning_rate": 9.951090219508268e-06, |
|
"loss": 0.0517, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 6.26991565135895, |
|
"grad_norm": 1.1599600315093994, |
|
"learning_rate": 9.951017001273998e-06, |
|
"loss": 0.0493, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 6.279287722586692, |
|
"grad_norm": 0.7756074070930481, |
|
"learning_rate": 9.95094378303973e-06, |
|
"loss": 0.048, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 6.288659793814433, |
|
"grad_norm": 1.0959285497665405, |
|
"learning_rate": 9.95087056480546e-06, |
|
"loss": 0.0501, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 6.298031865042175, |
|
"grad_norm": 1.2311910390853882, |
|
"learning_rate": 9.95079734657119e-06, |
|
"loss": 0.0486, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 6.307403936269916, |
|
"grad_norm": 1.2149254083633423, |
|
"learning_rate": 9.950724128336921e-06, |
|
"loss": 0.0389, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 6.316776007497657, |
|
"grad_norm": 1.5355291366577148, |
|
"learning_rate": 9.950650910102652e-06, |
|
"loss": 0.0472, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 6.3261480787253985, |
|
"grad_norm": 1.1264081001281738, |
|
"learning_rate": 9.950577691868385e-06, |
|
"loss": 0.043, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 6.3261480787253985, |
|
"eval_loss": 0.03764544054865837, |
|
"eval_pearson_cosine": 0.7692497968673706, |
|
"eval_pearson_dot": 0.7138222455978394, |
|
"eval_pearson_euclidean": 0.7343003749847412, |
|
"eval_pearson_manhattan": 0.7356712818145752, |
|
"eval_runtime": 22.6897, |
|
"eval_samples_per_second": 66.109, |
|
"eval_spearman_cosine": 0.7695765922931803, |
|
"eval_spearman_dot": 0.7152262336240688, |
|
"eval_spearman_euclidean": 0.739557951171161, |
|
"eval_spearman_manhattan": 0.7408550126908494, |
|
"eval_steps_per_second": 8.286, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 6.3355201499531395, |
|
"grad_norm": 0.6277545690536499, |
|
"learning_rate": 9.950504473634115e-06, |
|
"loss": 0.0406, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 6.344892221180881, |
|
"grad_norm": 1.3999137878417969, |
|
"learning_rate": 9.950431255399846e-06, |
|
"loss": 0.0447, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 6.354264292408622, |
|
"grad_norm": 0.7465086579322815, |
|
"learning_rate": 9.950358037165577e-06, |
|
"loss": 0.0502, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 6.363636363636363, |
|
"grad_norm": 1.1154383420944214, |
|
"learning_rate": 9.950284818931308e-06, |
|
"loss": 0.05, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 6.373008434864105, |
|
"grad_norm": 1.1133472919464111, |
|
"learning_rate": 9.950211600697038e-06, |
|
"loss": 0.0473, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 6.382380506091846, |
|
"grad_norm": 1.0995352268218994, |
|
"learning_rate": 9.95013838246277e-06, |
|
"loss": 0.0414, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 6.391752577319588, |
|
"grad_norm": 0.9666862487792969, |
|
"learning_rate": 9.9500651642285e-06, |
|
"loss": 0.049, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 6.401124648547329, |
|
"grad_norm": 1.1517918109893799, |
|
"learning_rate": 9.94999194599423e-06, |
|
"loss": 0.0413, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 6.41049671977507, |
|
"grad_norm": 0.5381759405136108, |
|
"learning_rate": 9.949918727759961e-06, |
|
"loss": 0.0418, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 6.419868791002812, |
|
"grad_norm": 0.973006546497345, |
|
"learning_rate": 9.949845509525692e-06, |
|
"loss": 0.0495, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 6.429240862230553, |
|
"grad_norm": 1.126633882522583, |
|
"learning_rate": 9.949772291291425e-06, |
|
"loss": 0.0493, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 6.438612933458295, |
|
"grad_norm": 0.7894268035888672, |
|
"learning_rate": 9.949699073057155e-06, |
|
"loss": 0.0436, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 6.447985004686036, |
|
"grad_norm": 0.7125422358512878, |
|
"learning_rate": 9.949625854822886e-06, |
|
"loss": 0.0433, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 6.457357075913777, |
|
"grad_norm": 0.9013342261314392, |
|
"learning_rate": 9.949552636588617e-06, |
|
"loss": 0.0376, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 6.4667291471415185, |
|
"grad_norm": 1.132384181022644, |
|
"learning_rate": 9.949479418354348e-06, |
|
"loss": 0.0482, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 6.4761012183692594, |
|
"grad_norm": 1.0104179382324219, |
|
"learning_rate": 9.949406200120078e-06, |
|
"loss": 0.0485, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 6.485473289597001, |
|
"grad_norm": 1.233464241027832, |
|
"learning_rate": 9.949332981885809e-06, |
|
"loss": 0.0478, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 6.494845360824742, |
|
"grad_norm": 0.7077954411506653, |
|
"learning_rate": 9.949259763651542e-06, |
|
"loss": 0.0464, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 6.504217432052483, |
|
"grad_norm": 1.5273882150650024, |
|
"learning_rate": 9.949186545417272e-06, |
|
"loss": 0.0404, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 6.513589503280225, |
|
"grad_norm": 1.2204720973968506, |
|
"learning_rate": 9.949113327183001e-06, |
|
"loss": 0.0375, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 6.522961574507966, |
|
"grad_norm": 0.9539759755134583, |
|
"learning_rate": 9.949040108948734e-06, |
|
"loss": 0.0397, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 6.532333645735708, |
|
"grad_norm": 1.949201226234436, |
|
"learning_rate": 9.948966890714465e-06, |
|
"loss": 0.0476, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 6.541705716963449, |
|
"grad_norm": 1.046915888786316, |
|
"learning_rate": 9.948893672480195e-06, |
|
"loss": 0.0445, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 6.55107778819119, |
|
"grad_norm": 0.8392923474311829, |
|
"learning_rate": 9.948820454245926e-06, |
|
"loss": 0.0502, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 6.560449859418932, |
|
"grad_norm": 1.357014536857605, |
|
"learning_rate": 9.948747236011659e-06, |
|
"loss": 0.0436, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.560449859418932, |
|
"eval_loss": 0.03813355416059494, |
|
"eval_pearson_cosine": 0.7662351131439209, |
|
"eval_pearson_dot": 0.7104849219322205, |
|
"eval_pearson_euclidean": 0.7334129810333252, |
|
"eval_pearson_manhattan": 0.7350986003875732, |
|
"eval_runtime": 22.7512, |
|
"eval_samples_per_second": 65.931, |
|
"eval_spearman_cosine": 0.7662226343415417, |
|
"eval_spearman_dot": 0.7115825441503862, |
|
"eval_spearman_euclidean": 0.7384103552275764, |
|
"eval_spearman_manhattan": 0.7397995971405482, |
|
"eval_steps_per_second": 8.263, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.569821930646673, |
|
"grad_norm": 1.1269482374191284, |
|
"learning_rate": 9.948674017777388e-06, |
|
"loss": 0.0395, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 6.579194001874415, |
|
"grad_norm": 0.8978859782218933, |
|
"learning_rate": 9.948600799543118e-06, |
|
"loss": 0.0438, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 6.588566073102156, |
|
"grad_norm": 1.3999450206756592, |
|
"learning_rate": 9.94852758130885e-06, |
|
"loss": 0.0466, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 6.597938144329897, |
|
"grad_norm": 0.985998272895813, |
|
"learning_rate": 9.948454363074582e-06, |
|
"loss": 0.0474, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 6.607310215557638, |
|
"grad_norm": 0.7843828797340393, |
|
"learning_rate": 9.948381144840312e-06, |
|
"loss": 0.0417, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 6.616682286785379, |
|
"grad_norm": 1.64656400680542, |
|
"learning_rate": 9.948307926606043e-06, |
|
"loss": 0.045, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 6.626054358013121, |
|
"grad_norm": 0.6348075866699219, |
|
"learning_rate": 9.948234708371774e-06, |
|
"loss": 0.0501, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 6.635426429240862, |
|
"grad_norm": 1.8781590461730957, |
|
"learning_rate": 9.948161490137505e-06, |
|
"loss": 0.0445, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 6.644798500468603, |
|
"grad_norm": 1.0441402196884155, |
|
"learning_rate": 9.948088271903235e-06, |
|
"loss": 0.0457, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 6.654170571696345, |
|
"grad_norm": 1.2460689544677734, |
|
"learning_rate": 9.948015053668966e-06, |
|
"loss": 0.0471, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 6.663542642924086, |
|
"grad_norm": 0.993414580821991, |
|
"learning_rate": 9.947941835434698e-06, |
|
"loss": 0.0423, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 6.672914714151828, |
|
"grad_norm": 1.2848552465438843, |
|
"learning_rate": 9.947868617200428e-06, |
|
"loss": 0.0414, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 6.682286785379569, |
|
"grad_norm": 1.2903103828430176, |
|
"learning_rate": 9.947795398966158e-06, |
|
"loss": 0.0402, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 6.69165885660731, |
|
"grad_norm": 1.2319235801696777, |
|
"learning_rate": 9.94772218073189e-06, |
|
"loss": 0.0504, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 6.701030927835052, |
|
"grad_norm": 0.8465273976325989, |
|
"learning_rate": 9.947648962497621e-06, |
|
"loss": 0.0409, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 6.710402999062793, |
|
"grad_norm": 1.186928153038025, |
|
"learning_rate": 9.947575744263352e-06, |
|
"loss": 0.0458, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 6.719775070290535, |
|
"grad_norm": 1.3528752326965332, |
|
"learning_rate": 9.947502526029083e-06, |
|
"loss": 0.0433, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 6.7291471415182755, |
|
"grad_norm": 0.8908892273902893, |
|
"learning_rate": 9.947429307794814e-06, |
|
"loss": 0.0456, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 6.7385192127460165, |
|
"grad_norm": 1.1235069036483765, |
|
"learning_rate": 9.947356089560544e-06, |
|
"loss": 0.0481, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 6.747891283973758, |
|
"grad_norm": 1.6809895038604736, |
|
"learning_rate": 9.947282871326275e-06, |
|
"loss": 0.0454, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 6.757263355201499, |
|
"grad_norm": 0.8632039427757263, |
|
"learning_rate": 9.947209653092008e-06, |
|
"loss": 0.0481, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 6.766635426429241, |
|
"grad_norm": 1.2185996770858765, |
|
"learning_rate": 9.947136434857738e-06, |
|
"loss": 0.0383, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 6.776007497656982, |
|
"grad_norm": 0.6979696154594421, |
|
"learning_rate": 9.947063216623467e-06, |
|
"loss": 0.0435, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 6.785379568884723, |
|
"grad_norm": 1.459441065788269, |
|
"learning_rate": 9.9469899983892e-06, |
|
"loss": 0.0449, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 6.794751640112465, |
|
"grad_norm": 1.0957977771759033, |
|
"learning_rate": 9.94691678015493e-06, |
|
"loss": 0.032, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 6.794751640112465, |
|
"eval_loss": 0.03765299916267395, |
|
"eval_pearson_cosine": 0.7692482471466064, |
|
"eval_pearson_dot": 0.722366452217102, |
|
"eval_pearson_euclidean": 0.7316011190414429, |
|
"eval_pearson_manhattan": 0.7333144545555115, |
|
"eval_runtime": 22.5438, |
|
"eval_samples_per_second": 66.537, |
|
"eval_spearman_cosine": 0.7695046405395065, |
|
"eval_spearman_dot": 0.7242050912795406, |
|
"eval_spearman_euclidean": 0.7356828429817377, |
|
"eval_spearman_manhattan": 0.737487116385034, |
|
"eval_steps_per_second": 8.339, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 6.804123711340206, |
|
"grad_norm": 1.377066731452942, |
|
"learning_rate": 9.946843561920661e-06, |
|
"loss": 0.0529, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 6.813495782567948, |
|
"grad_norm": 0.714728057384491, |
|
"learning_rate": 9.946770343686392e-06, |
|
"loss": 0.0432, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 6.822867853795689, |
|
"grad_norm": 1.4324384927749634, |
|
"learning_rate": 9.946697125452125e-06, |
|
"loss": 0.046, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 6.83223992502343, |
|
"grad_norm": 1.2564704418182373, |
|
"learning_rate": 9.946623907217854e-06, |
|
"loss": 0.046, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 6.841611996251172, |
|
"grad_norm": 0.8522197008132935, |
|
"learning_rate": 9.946550688983584e-06, |
|
"loss": 0.0393, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 6.850984067478913, |
|
"grad_norm": 0.8751912117004395, |
|
"learning_rate": 9.946477470749317e-06, |
|
"loss": 0.0426, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 6.8603561387066545, |
|
"grad_norm": 0.8960391879081726, |
|
"learning_rate": 9.946404252515048e-06, |
|
"loss": 0.0445, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 6.8697282099343955, |
|
"grad_norm": 1.092128872871399, |
|
"learning_rate": 9.946331034280778e-06, |
|
"loss": 0.0459, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 6.8791002811621365, |
|
"grad_norm": 1.1840777397155762, |
|
"learning_rate": 9.946257816046509e-06, |
|
"loss": 0.0387, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 6.888472352389878, |
|
"grad_norm": 1.0283764600753784, |
|
"learning_rate": 9.94618459781224e-06, |
|
"loss": 0.0577, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 6.897844423617619, |
|
"grad_norm": 0.749761164188385, |
|
"learning_rate": 9.94611137957797e-06, |
|
"loss": 0.0414, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 6.907216494845361, |
|
"grad_norm": 0.8442000150680542, |
|
"learning_rate": 9.946038161343701e-06, |
|
"loss": 0.046, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 6.916588566073102, |
|
"grad_norm": 1.2296583652496338, |
|
"learning_rate": 9.945964943109432e-06, |
|
"loss": 0.0412, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 6.925960637300843, |
|
"grad_norm": 0.6515626311302185, |
|
"learning_rate": 9.945891724875165e-06, |
|
"loss": 0.0481, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 6.935332708528585, |
|
"grad_norm": 1.8992091417312622, |
|
"learning_rate": 9.945818506640895e-06, |
|
"loss": 0.0431, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 6.944704779756326, |
|
"grad_norm": 1.1663875579833984, |
|
"learning_rate": 9.945745288406624e-06, |
|
"loss": 0.0459, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 6.954076850984068, |
|
"grad_norm": 0.6695976853370667, |
|
"learning_rate": 9.945672070172357e-06, |
|
"loss": 0.0448, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 6.963448922211809, |
|
"grad_norm": 1.158563494682312, |
|
"learning_rate": 9.945598851938088e-06, |
|
"loss": 0.0398, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 6.97282099343955, |
|
"grad_norm": 1.2068713903427124, |
|
"learning_rate": 9.945525633703818e-06, |
|
"loss": 0.0443, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 6.982193064667292, |
|
"grad_norm": 0.9688456654548645, |
|
"learning_rate": 9.945452415469549e-06, |
|
"loss": 0.0452, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 6.991565135895033, |
|
"grad_norm": 1.5483156442642212, |
|
"learning_rate": 9.94537919723528e-06, |
|
"loss": 0.0498, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 7.0009372071227745, |
|
"grad_norm": 1.18287193775177, |
|
"learning_rate": 9.94530597900101e-06, |
|
"loss": 0.0445, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 7.010309278350515, |
|
"grad_norm": 0.7765620946884155, |
|
"learning_rate": 9.945232760766741e-06, |
|
"loss": 0.0346, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 7.019681349578256, |
|
"grad_norm": 0.948760986328125, |
|
"learning_rate": 9.945159542532474e-06, |
|
"loss": 0.0348, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 7.029053420805998, |
|
"grad_norm": 0.9965664744377136, |
|
"learning_rate": 9.945086324298205e-06, |
|
"loss": 0.0342, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.029053420805998, |
|
"eval_loss": 0.03782695531845093, |
|
"eval_pearson_cosine": 0.768491804599762, |
|
"eval_pearson_dot": 0.7183945775032043, |
|
"eval_pearson_euclidean": 0.7320147752761841, |
|
"eval_pearson_manhattan": 0.7333334684371948, |
|
"eval_runtime": 21.6515, |
|
"eval_samples_per_second": 69.279, |
|
"eval_spearman_cosine": 0.7677979499645443, |
|
"eval_spearman_dot": 0.7186610110098233, |
|
"eval_spearman_euclidean": 0.7364530110375347, |
|
"eval_spearman_manhattan": 0.737620665225201, |
|
"eval_steps_per_second": 8.683, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.038425492033739, |
|
"grad_norm": 0.8594346046447754, |
|
"learning_rate": 9.945013106063935e-06, |
|
"loss": 0.0318, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 7.047797563261481, |
|
"grad_norm": 1.62812340259552, |
|
"learning_rate": 9.944939887829666e-06, |
|
"loss": 0.0414, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 7.057169634489222, |
|
"grad_norm": 1.1017098426818848, |
|
"learning_rate": 9.944866669595397e-06, |
|
"loss": 0.0327, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 7.066541705716963, |
|
"grad_norm": 0.8536505699157715, |
|
"learning_rate": 9.944793451361128e-06, |
|
"loss": 0.0286, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 7.075913776944705, |
|
"grad_norm": 1.0389901399612427, |
|
"learning_rate": 9.944720233126858e-06, |
|
"loss": 0.0365, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 7.085285848172446, |
|
"grad_norm": 1.0682491064071655, |
|
"learning_rate": 9.94464701489259e-06, |
|
"loss": 0.034, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 7.094657919400188, |
|
"grad_norm": 0.8786489963531494, |
|
"learning_rate": 9.944573796658321e-06, |
|
"loss": 0.0373, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 7.104029990627929, |
|
"grad_norm": 1.3642008304595947, |
|
"learning_rate": 9.94450057842405e-06, |
|
"loss": 0.0314, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 7.11340206185567, |
|
"grad_norm": 0.7243325114250183, |
|
"learning_rate": 9.944427360189783e-06, |
|
"loss": 0.0299, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 7.122774133083412, |
|
"grad_norm": 0.6696385145187378, |
|
"learning_rate": 9.944354141955514e-06, |
|
"loss": 0.0311, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 7.1321462043111525, |
|
"grad_norm": 1.03152334690094, |
|
"learning_rate": 9.944280923721244e-06, |
|
"loss": 0.0355, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 7.141518275538894, |
|
"grad_norm": 0.8586616516113281, |
|
"learning_rate": 9.944207705486975e-06, |
|
"loss": 0.0394, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 7.150890346766635, |
|
"grad_norm": 0.9514285922050476, |
|
"learning_rate": 9.944134487252706e-06, |
|
"loss": 0.035, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 7.160262417994376, |
|
"grad_norm": 0.8053460717201233, |
|
"learning_rate": 9.944061269018437e-06, |
|
"loss": 0.0312, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 7.169634489222118, |
|
"grad_norm": 1.0056674480438232, |
|
"learning_rate": 9.943988050784167e-06, |
|
"loss": 0.0371, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 7.179006560449859, |
|
"grad_norm": 0.7738359570503235, |
|
"learning_rate": 9.943914832549898e-06, |
|
"loss": 0.0302, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 7.188378631677601, |
|
"grad_norm": 1.039197325706482, |
|
"learning_rate": 9.94384161431563e-06, |
|
"loss": 0.0316, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 7.197750702905342, |
|
"grad_norm": 1.578165888786316, |
|
"learning_rate": 9.943768396081361e-06, |
|
"loss": 0.0388, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 7.207122774133083, |
|
"grad_norm": 1.1753205060958862, |
|
"learning_rate": 9.943695177847092e-06, |
|
"loss": 0.0387, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 7.216494845360825, |
|
"grad_norm": 1.295299768447876, |
|
"learning_rate": 9.943621959612823e-06, |
|
"loss": 0.0417, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 7.225866916588566, |
|
"grad_norm": 0.9477363228797913, |
|
"learning_rate": 9.943548741378554e-06, |
|
"loss": 0.0305, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 7.235238987816308, |
|
"grad_norm": 1.0547223091125488, |
|
"learning_rate": 9.943475523144284e-06, |
|
"loss": 0.0314, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 7.244611059044049, |
|
"grad_norm": 1.4873117208480835, |
|
"learning_rate": 9.943402304910015e-06, |
|
"loss": 0.0302, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 7.25398313027179, |
|
"grad_norm": 0.9882778525352478, |
|
"learning_rate": 9.943329086675748e-06, |
|
"loss": 0.0328, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 7.2633552014995315, |
|
"grad_norm": 1.3187719583511353, |
|
"learning_rate": 9.943255868441477e-06, |
|
"loss": 0.0341, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 7.2633552014995315, |
|
"eval_loss": 0.03773624449968338, |
|
"eval_pearson_cosine": 0.7699387073516846, |
|
"eval_pearson_dot": 0.7237234115600586, |
|
"eval_pearson_euclidean": 0.7316513061523438, |
|
"eval_pearson_manhattan": 0.7335678339004517, |
|
"eval_runtime": 22.1612, |
|
"eval_samples_per_second": 67.686, |
|
"eval_spearman_cosine": 0.7694615753118931, |
|
"eval_spearman_dot": 0.7243788947148158, |
|
"eval_spearman_euclidean": 0.7361849268567764, |
|
"eval_spearman_manhattan": 0.7377945356892571, |
|
"eval_steps_per_second": 8.483, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 7.2727272727272725, |
|
"grad_norm": 1.0984870195388794, |
|
"learning_rate": 9.943182650207207e-06, |
|
"loss": 0.0329, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 7.282099343955014, |
|
"grad_norm": 0.7666100263595581, |
|
"learning_rate": 9.94310943197294e-06, |
|
"loss": 0.0358, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 7.291471415182755, |
|
"grad_norm": 0.9941838383674622, |
|
"learning_rate": 9.94303621373867e-06, |
|
"loss": 0.0351, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 7.300843486410496, |
|
"grad_norm": 1.3012335300445557, |
|
"learning_rate": 9.942962995504401e-06, |
|
"loss": 0.0296, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 7.310215557638238, |
|
"grad_norm": 1.1914719343185425, |
|
"learning_rate": 9.942889777270132e-06, |
|
"loss": 0.0333, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 7.319587628865979, |
|
"grad_norm": 1.1405929327011108, |
|
"learning_rate": 9.942816559035863e-06, |
|
"loss": 0.0408, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 7.328959700093721, |
|
"grad_norm": 0.665600061416626, |
|
"learning_rate": 9.942743340801594e-06, |
|
"loss": 0.0314, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 7.338331771321462, |
|
"grad_norm": 1.2029966115951538, |
|
"learning_rate": 9.942670122567324e-06, |
|
"loss": 0.041, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 7.347703842549203, |
|
"grad_norm": 0.44810751080513, |
|
"learning_rate": 9.942596904333057e-06, |
|
"loss": 0.0317, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 7.357075913776945, |
|
"grad_norm": 1.565082311630249, |
|
"learning_rate": 9.942523686098788e-06, |
|
"loss": 0.035, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 7.366447985004686, |
|
"grad_norm": 1.6850316524505615, |
|
"learning_rate": 9.942450467864517e-06, |
|
"loss": 0.0365, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 7.375820056232428, |
|
"grad_norm": 1.0027261972427368, |
|
"learning_rate": 9.942377249630249e-06, |
|
"loss": 0.0309, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 7.385192127460169, |
|
"grad_norm": 0.51674485206604, |
|
"learning_rate": 9.94230403139598e-06, |
|
"loss": 0.0321, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 7.39456419868791, |
|
"grad_norm": 1.0429599285125732, |
|
"learning_rate": 9.94223081316171e-06, |
|
"loss": 0.033, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 7.4039362699156515, |
|
"grad_norm": 0.618232250213623, |
|
"learning_rate": 9.942157594927441e-06, |
|
"loss": 0.0353, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 7.413308341143392, |
|
"grad_norm": 0.9780518412590027, |
|
"learning_rate": 9.942084376693174e-06, |
|
"loss": 0.0354, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 7.422680412371134, |
|
"grad_norm": 1.214362621307373, |
|
"learning_rate": 9.942011158458903e-06, |
|
"loss": 0.0338, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 7.432052483598875, |
|
"grad_norm": 1.202986240386963, |
|
"learning_rate": 9.941937940224634e-06, |
|
"loss": 0.0387, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 7.441424554826616, |
|
"grad_norm": 1.4128488302230835, |
|
"learning_rate": 9.941864721990366e-06, |
|
"loss": 0.0315, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 7.450796626054358, |
|
"grad_norm": 0.7198026180267334, |
|
"learning_rate": 9.941791503756097e-06, |
|
"loss": 0.0338, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 7.460168697282099, |
|
"grad_norm": 1.1124250888824463, |
|
"learning_rate": 9.941718285521828e-06, |
|
"loss": 0.0352, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 7.469540768509841, |
|
"grad_norm": 1.0420817136764526, |
|
"learning_rate": 9.941645067287558e-06, |
|
"loss": 0.0338, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 7.478912839737582, |
|
"grad_norm": 0.9638373255729675, |
|
"learning_rate": 9.941571849053289e-06, |
|
"loss": 0.0356, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 7.488284910965323, |
|
"grad_norm": 0.8584896922111511, |
|
"learning_rate": 9.94149863081902e-06, |
|
"loss": 0.0353, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 7.497656982193065, |
|
"grad_norm": 0.7161556482315063, |
|
"learning_rate": 9.94142541258475e-06, |
|
"loss": 0.0329, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.497656982193065, |
|
"eval_loss": 0.03753030672669411, |
|
"eval_pearson_cosine": 0.7705868482589722, |
|
"eval_pearson_dot": 0.7248358726501465, |
|
"eval_pearson_euclidean": 0.734631359577179, |
|
"eval_pearson_manhattan": 0.7363988161087036, |
|
"eval_runtime": 22.3628, |
|
"eval_samples_per_second": 67.076, |
|
"eval_spearman_cosine": 0.769708288306187, |
|
"eval_spearman_dot": 0.7249767839130733, |
|
"eval_spearman_euclidean": 0.7394619718544255, |
|
"eval_spearman_manhattan": 0.7409361299302836, |
|
"eval_steps_per_second": 8.407, |
|
"step": 8000 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 10670, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|