CocoRoF's picture
Training in progress, step 8000, checkpoint
7350bc0 verified
raw
history blame
159 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 7.497656982193065,
"eval_steps": 250,
"global_step": 8000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00937207122774133,
"grad_norm": 0.573442816734314,
"learning_rate": 9.999926781765732e-06,
"loss": 1.312,
"step": 10
},
{
"epoch": 0.01874414245548266,
"grad_norm": 1.0577057600021362,
"learning_rate": 9.999853563531462e-06,
"loss": 1.2611,
"step": 20
},
{
"epoch": 0.028116213683223992,
"grad_norm": 1.358649492263794,
"learning_rate": 9.999780345297193e-06,
"loss": 1.1822,
"step": 30
},
{
"epoch": 0.03748828491096532,
"grad_norm": 1.7219270467758179,
"learning_rate": 9.999707127062924e-06,
"loss": 1.062,
"step": 40
},
{
"epoch": 0.046860356138706656,
"grad_norm": 1.7191277742385864,
"learning_rate": 9.999633908828655e-06,
"loss": 0.9325,
"step": 50
},
{
"epoch": 0.056232427366447985,
"grad_norm": 1.6047089099884033,
"learning_rate": 9.999560690594387e-06,
"loss": 0.7909,
"step": 60
},
{
"epoch": 0.06560449859418932,
"grad_norm": 1.1597000360488892,
"learning_rate": 9.999487472360118e-06,
"loss": 0.6858,
"step": 70
},
{
"epoch": 0.07497656982193064,
"grad_norm": 1.4232110977172852,
"learning_rate": 9.999414254125849e-06,
"loss": 0.6554,
"step": 80
},
{
"epoch": 0.08434864104967198,
"grad_norm": 1.3652020692825317,
"learning_rate": 9.99934103589158e-06,
"loss": 0.5937,
"step": 90
},
{
"epoch": 0.09372071227741331,
"grad_norm": 1.299221396446228,
"learning_rate": 9.99926781765731e-06,
"loss": 0.5778,
"step": 100
},
{
"epoch": 0.10309278350515463,
"grad_norm": 1.367699146270752,
"learning_rate": 9.99919459942304e-06,
"loss": 0.5562,
"step": 110
},
{
"epoch": 0.11246485473289597,
"grad_norm": 1.2190635204315186,
"learning_rate": 9.999121381188772e-06,
"loss": 0.5259,
"step": 120
},
{
"epoch": 0.1218369259606373,
"grad_norm": 1.1808373928070068,
"learning_rate": 9.999048162954504e-06,
"loss": 0.5158,
"step": 130
},
{
"epoch": 0.13120899718837864,
"grad_norm": 1.5956122875213623,
"learning_rate": 9.998974944720235e-06,
"loss": 0.4877,
"step": 140
},
{
"epoch": 0.14058106841611998,
"grad_norm": 1.2425106763839722,
"learning_rate": 9.998901726485964e-06,
"loss": 0.4858,
"step": 150
},
{
"epoch": 0.14995313964386128,
"grad_norm": 1.284425139427185,
"learning_rate": 9.998828508251696e-06,
"loss": 0.4426,
"step": 160
},
{
"epoch": 0.15932521087160262,
"grad_norm": 1.4248498678207397,
"learning_rate": 9.998755290017427e-06,
"loss": 0.4644,
"step": 170
},
{
"epoch": 0.16869728209934395,
"grad_norm": 2.5712969303131104,
"learning_rate": 9.998682071783158e-06,
"loss": 0.4363,
"step": 180
},
{
"epoch": 0.1780693533270853,
"grad_norm": 1.572169542312622,
"learning_rate": 9.998608853548888e-06,
"loss": 0.4206,
"step": 190
},
{
"epoch": 0.18744142455482662,
"grad_norm": 1.4508352279663086,
"learning_rate": 9.998535635314621e-06,
"loss": 0.4247,
"step": 200
},
{
"epoch": 0.19681349578256796,
"grad_norm": 1.2668938636779785,
"learning_rate": 9.99846241708035e-06,
"loss": 0.4302,
"step": 210
},
{
"epoch": 0.20618556701030927,
"grad_norm": 1.0630348920822144,
"learning_rate": 9.99838919884608e-06,
"loss": 0.3987,
"step": 220
},
{
"epoch": 0.2155576382380506,
"grad_norm": 1.1395602226257324,
"learning_rate": 9.998315980611813e-06,
"loss": 0.3746,
"step": 230
},
{
"epoch": 0.22492970946579194,
"grad_norm": 1.6570693254470825,
"learning_rate": 9.998242762377544e-06,
"loss": 0.3954,
"step": 240
},
{
"epoch": 0.23430178069353327,
"grad_norm": 1.2213038206100464,
"learning_rate": 9.998169544143275e-06,
"loss": 0.3877,
"step": 250
},
{
"epoch": 0.23430178069353327,
"eval_loss": 0.15415821969509125,
"eval_pearson_cosine": 0.7471039295196533,
"eval_pearson_dot": 0.6414342522621155,
"eval_pearson_euclidean": 0.739482581615448,
"eval_pearson_manhattan": 0.7393465042114258,
"eval_runtime": 29.8457,
"eval_samples_per_second": 50.258,
"eval_spearman_cosine": 0.7499078042299374,
"eval_spearman_dot": 0.6346699933138464,
"eval_spearman_euclidean": 0.7397365400334271,
"eval_spearman_manhattan": 0.7393369553461101,
"eval_steps_per_second": 6.299,
"step": 250
},
{
"epoch": 0.2436738519212746,
"grad_norm": 1.3511942625045776,
"learning_rate": 9.998096325909005e-06,
"loss": 0.3685,
"step": 260
},
{
"epoch": 0.2530459231490159,
"grad_norm": 1.3458188772201538,
"learning_rate": 9.998023107674736e-06,
"loss": 0.367,
"step": 270
},
{
"epoch": 0.2624179943767573,
"grad_norm": 1.424850344657898,
"learning_rate": 9.997949889440467e-06,
"loss": 0.3511,
"step": 280
},
{
"epoch": 0.2717900656044986,
"grad_norm": 1.4595459699630737,
"learning_rate": 9.997876671206198e-06,
"loss": 0.3389,
"step": 290
},
{
"epoch": 0.28116213683223995,
"grad_norm": 1.167495608329773,
"learning_rate": 9.997803452971928e-06,
"loss": 0.3335,
"step": 300
},
{
"epoch": 0.29053420805998126,
"grad_norm": 1.1749252080917358,
"learning_rate": 9.997730234737661e-06,
"loss": 0.3339,
"step": 310
},
{
"epoch": 0.29990627928772257,
"grad_norm": 1.2500739097595215,
"learning_rate": 9.99765701650339e-06,
"loss": 0.3215,
"step": 320
},
{
"epoch": 0.30927835051546393,
"grad_norm": 1.332942247390747,
"learning_rate": 9.99758379826912e-06,
"loss": 0.3093,
"step": 330
},
{
"epoch": 0.31865042174320524,
"grad_norm": 1.173511266708374,
"learning_rate": 9.997510580034853e-06,
"loss": 0.3234,
"step": 340
},
{
"epoch": 0.3280224929709466,
"grad_norm": 1.3587061166763306,
"learning_rate": 9.997437361800584e-06,
"loss": 0.3285,
"step": 350
},
{
"epoch": 0.3373945641986879,
"grad_norm": 1.4196358919143677,
"learning_rate": 9.997364143566315e-06,
"loss": 0.3078,
"step": 360
},
{
"epoch": 0.3467666354264292,
"grad_norm": 1.1899330615997314,
"learning_rate": 9.997290925332045e-06,
"loss": 0.2952,
"step": 370
},
{
"epoch": 0.3561387066541706,
"grad_norm": 1.3728539943695068,
"learning_rate": 9.997217707097776e-06,
"loss": 0.2912,
"step": 380
},
{
"epoch": 0.3655107778819119,
"grad_norm": 1.6375203132629395,
"learning_rate": 9.997144488863507e-06,
"loss": 0.3153,
"step": 390
},
{
"epoch": 0.37488284910965325,
"grad_norm": 1.3330031633377075,
"learning_rate": 9.997071270629238e-06,
"loss": 0.2858,
"step": 400
},
{
"epoch": 0.38425492033739456,
"grad_norm": 1.2047045230865479,
"learning_rate": 9.99699805239497e-06,
"loss": 0.3004,
"step": 410
},
{
"epoch": 0.3936269915651359,
"grad_norm": 1.280134916305542,
"learning_rate": 9.9969248341607e-06,
"loss": 0.2819,
"step": 420
},
{
"epoch": 0.4029990627928772,
"grad_norm": 1.2952693700790405,
"learning_rate": 9.99685161592643e-06,
"loss": 0.2772,
"step": 430
},
{
"epoch": 0.41237113402061853,
"grad_norm": 1.1937365531921387,
"learning_rate": 9.996778397692162e-06,
"loss": 0.3024,
"step": 440
},
{
"epoch": 0.4217432052483599,
"grad_norm": 1.226347804069519,
"learning_rate": 9.996705179457893e-06,
"loss": 0.2844,
"step": 450
},
{
"epoch": 0.4311152764761012,
"grad_norm": 1.5503312349319458,
"learning_rate": 9.996631961223624e-06,
"loss": 0.2634,
"step": 460
},
{
"epoch": 0.44048734770384257,
"grad_norm": 1.4498707056045532,
"learning_rate": 9.996558742989355e-06,
"loss": 0.2697,
"step": 470
},
{
"epoch": 0.4498594189315839,
"grad_norm": 1.2823820114135742,
"learning_rate": 9.996485524755087e-06,
"loss": 0.2927,
"step": 480
},
{
"epoch": 0.4592314901593252,
"grad_norm": 1.1089231967926025,
"learning_rate": 9.996412306520816e-06,
"loss": 0.2669,
"step": 490
},
{
"epoch": 0.46860356138706655,
"grad_norm": 1.3862818479537964,
"learning_rate": 9.996339088286547e-06,
"loss": 0.2805,
"step": 500
},
{
"epoch": 0.46860356138706655,
"eval_loss": 0.11416644603013992,
"eval_pearson_cosine": 0.7577512264251709,
"eval_pearson_dot": 0.6366492509841919,
"eval_pearson_euclidean": 0.7618618011474609,
"eval_pearson_manhattan": 0.7619431614875793,
"eval_runtime": 22.679,
"eval_samples_per_second": 66.14,
"eval_spearman_cosine": 0.7643092952449725,
"eval_spearman_dot": 0.6341280960850315,
"eval_spearman_euclidean": 0.7653570734883524,
"eval_spearman_manhattan": 0.7652284643248553,
"eval_steps_per_second": 8.29,
"step": 500
},
{
"epoch": 0.47797563261480785,
"grad_norm": 1.079265832901001,
"learning_rate": 9.99626587005228e-06,
"loss": 0.2649,
"step": 510
},
{
"epoch": 0.4873477038425492,
"grad_norm": 1.3966060876846313,
"learning_rate": 9.99619265181801e-06,
"loss": 0.279,
"step": 520
},
{
"epoch": 0.4967197750702905,
"grad_norm": 1.197001576423645,
"learning_rate": 9.99611943358374e-06,
"loss": 0.263,
"step": 530
},
{
"epoch": 0.5060918462980318,
"grad_norm": 1.414509892463684,
"learning_rate": 9.996046215349472e-06,
"loss": 0.2816,
"step": 540
},
{
"epoch": 0.5154639175257731,
"grad_norm": 1.4723501205444336,
"learning_rate": 9.995972997115202e-06,
"loss": 0.2696,
"step": 550
},
{
"epoch": 0.5248359887535146,
"grad_norm": 1.1838375329971313,
"learning_rate": 9.995899778880933e-06,
"loss": 0.2686,
"step": 560
},
{
"epoch": 0.5342080599812559,
"grad_norm": 1.2640224695205688,
"learning_rate": 9.995826560646664e-06,
"loss": 0.2842,
"step": 570
},
{
"epoch": 0.5435801312089972,
"grad_norm": 1.2584717273712158,
"learning_rate": 9.995753342412395e-06,
"loss": 0.2505,
"step": 580
},
{
"epoch": 0.5529522024367385,
"grad_norm": 1.3276816606521606,
"learning_rate": 9.995680124178127e-06,
"loss": 0.2764,
"step": 590
},
{
"epoch": 0.5623242736644799,
"grad_norm": 1.5065838098526,
"learning_rate": 9.995606905943858e-06,
"loss": 0.2778,
"step": 600
},
{
"epoch": 0.5716963448922212,
"grad_norm": 1.1485587358474731,
"learning_rate": 9.995533687709588e-06,
"loss": 0.2533,
"step": 610
},
{
"epoch": 0.5810684161199625,
"grad_norm": 1.242677927017212,
"learning_rate": 9.99546046947532e-06,
"loss": 0.2549,
"step": 620
},
{
"epoch": 0.5904404873477038,
"grad_norm": 1.4471759796142578,
"learning_rate": 9.99538725124105e-06,
"loss": 0.2734,
"step": 630
},
{
"epoch": 0.5998125585754451,
"grad_norm": 1.3379895687103271,
"learning_rate": 9.99531403300678e-06,
"loss": 0.2551,
"step": 640
},
{
"epoch": 0.6091846298031866,
"grad_norm": 1.2373607158660889,
"learning_rate": 9.995240814772511e-06,
"loss": 0.2358,
"step": 650
},
{
"epoch": 0.6185567010309279,
"grad_norm": 1.2897976636886597,
"learning_rate": 9.995167596538242e-06,
"loss": 0.2572,
"step": 660
},
{
"epoch": 0.6279287722586692,
"grad_norm": 1.3715548515319824,
"learning_rate": 9.995094378303973e-06,
"loss": 0.2554,
"step": 670
},
{
"epoch": 0.6373008434864105,
"grad_norm": 1.3889539241790771,
"learning_rate": 9.995021160069704e-06,
"loss": 0.2502,
"step": 680
},
{
"epoch": 0.6466729147141518,
"grad_norm": 1.3987656831741333,
"learning_rate": 9.994947941835436e-06,
"loss": 0.2449,
"step": 690
},
{
"epoch": 0.6560449859418932,
"grad_norm": 1.4677623510360718,
"learning_rate": 9.994874723601167e-06,
"loss": 0.2438,
"step": 700
},
{
"epoch": 0.6654170571696345,
"grad_norm": 1.238258719444275,
"learning_rate": 9.994801505366898e-06,
"loss": 0.2609,
"step": 710
},
{
"epoch": 0.6747891283973758,
"grad_norm": 1.2697819471359253,
"learning_rate": 9.994728287132628e-06,
"loss": 0.2685,
"step": 720
},
{
"epoch": 0.6841611996251171,
"grad_norm": 1.1607269048690796,
"learning_rate": 9.99465506889836e-06,
"loss": 0.2342,
"step": 730
},
{
"epoch": 0.6935332708528584,
"grad_norm": 1.2666348218917847,
"learning_rate": 9.99458185066409e-06,
"loss": 0.2308,
"step": 740
},
{
"epoch": 0.7029053420805998,
"grad_norm": 1.252940058708191,
"learning_rate": 9.99450863242982e-06,
"loss": 0.2331,
"step": 750
},
{
"epoch": 0.7029053420805998,
"eval_loss": 0.09498214721679688,
"eval_pearson_cosine": 0.7673527002334595,
"eval_pearson_dot": 0.6584292054176331,
"eval_pearson_euclidean": 0.7682392001152039,
"eval_pearson_manhattan": 0.7685161232948303,
"eval_runtime": 21.4883,
"eval_samples_per_second": 69.805,
"eval_spearman_cosine": 0.7771628917615258,
"eval_spearman_dot": 0.6570265964452069,
"eval_spearman_euclidean": 0.7740883932373563,
"eval_spearman_manhattan": 0.7747253819422362,
"eval_steps_per_second": 8.749,
"step": 750
},
{
"epoch": 0.7122774133083412,
"grad_norm": 1.204959750175476,
"learning_rate": 9.994435414195553e-06,
"loss": 0.2514,
"step": 760
},
{
"epoch": 0.7216494845360825,
"grad_norm": 2.5355069637298584,
"learning_rate": 9.994362195961284e-06,
"loss": 0.2473,
"step": 770
},
{
"epoch": 0.7310215557638238,
"grad_norm": 1.2129027843475342,
"learning_rate": 9.994288977727013e-06,
"loss": 0.2302,
"step": 780
},
{
"epoch": 0.7403936269915652,
"grad_norm": 1.109953761100769,
"learning_rate": 9.994215759492745e-06,
"loss": 0.2264,
"step": 790
},
{
"epoch": 0.7497656982193065,
"grad_norm": 1.443888545036316,
"learning_rate": 9.994142541258476e-06,
"loss": 0.2372,
"step": 800
},
{
"epoch": 0.7591377694470478,
"grad_norm": 1.3083347082138062,
"learning_rate": 9.994069323024207e-06,
"loss": 0.2417,
"step": 810
},
{
"epoch": 0.7685098406747891,
"grad_norm": 1.0919073820114136,
"learning_rate": 9.993996104789938e-06,
"loss": 0.2331,
"step": 820
},
{
"epoch": 0.7778819119025304,
"grad_norm": 1.3770041465759277,
"learning_rate": 9.993922886555668e-06,
"loss": 0.2692,
"step": 830
},
{
"epoch": 0.7872539831302718,
"grad_norm": 1.2099621295928955,
"learning_rate": 9.993849668321399e-06,
"loss": 0.2279,
"step": 840
},
{
"epoch": 0.7966260543580131,
"grad_norm": 1.1606112718582153,
"learning_rate": 9.99377645008713e-06,
"loss": 0.2474,
"step": 850
},
{
"epoch": 0.8059981255857545,
"grad_norm": 1.472863793373108,
"learning_rate": 9.993703231852862e-06,
"loss": 0.2298,
"step": 860
},
{
"epoch": 0.8153701968134958,
"grad_norm": 1.2455284595489502,
"learning_rate": 9.993630013618593e-06,
"loss": 0.2371,
"step": 870
},
{
"epoch": 0.8247422680412371,
"grad_norm": 1.3777674436569214,
"learning_rate": 9.993556795384324e-06,
"loss": 0.2434,
"step": 880
},
{
"epoch": 0.8341143392689785,
"grad_norm": 0.9551514983177185,
"learning_rate": 9.993483577150055e-06,
"loss": 0.2074,
"step": 890
},
{
"epoch": 0.8434864104967198,
"grad_norm": 1.0588115453720093,
"learning_rate": 9.993410358915785e-06,
"loss": 0.2162,
"step": 900
},
{
"epoch": 0.8528584817244611,
"grad_norm": 1.3450068235397339,
"learning_rate": 9.993337140681516e-06,
"loss": 0.2272,
"step": 910
},
{
"epoch": 0.8622305529522024,
"grad_norm": 1.6997965574264526,
"learning_rate": 9.993263922447247e-06,
"loss": 0.2315,
"step": 920
},
{
"epoch": 0.8716026241799437,
"grad_norm": 1.2186520099639893,
"learning_rate": 9.993190704212978e-06,
"loss": 0.2426,
"step": 930
},
{
"epoch": 0.8809746954076851,
"grad_norm": 1.0515309572219849,
"learning_rate": 9.99311748597871e-06,
"loss": 0.2328,
"step": 940
},
{
"epoch": 0.8903467666354264,
"grad_norm": 1.29239821434021,
"learning_rate": 9.993044267744439e-06,
"loss": 0.2263,
"step": 950
},
{
"epoch": 0.8997188378631678,
"grad_norm": 1.7695139646530151,
"learning_rate": 9.99297104951017e-06,
"loss": 0.2466,
"step": 960
},
{
"epoch": 0.9090909090909091,
"grad_norm": 1.359837293624878,
"learning_rate": 9.992897831275902e-06,
"loss": 0.2215,
"step": 970
},
{
"epoch": 0.9184629803186504,
"grad_norm": 1.2525417804718018,
"learning_rate": 9.992824613041633e-06,
"loss": 0.2295,
"step": 980
},
{
"epoch": 0.9278350515463918,
"grad_norm": 1.2337384223937988,
"learning_rate": 9.992751394807364e-06,
"loss": 0.2101,
"step": 990
},
{
"epoch": 0.9372071227741331,
"grad_norm": 1.1121580600738525,
"learning_rate": 9.992678176573095e-06,
"loss": 0.2455,
"step": 1000
},
{
"epoch": 0.9372071227741331,
"eval_loss": 0.09235719591379166,
"eval_pearson_cosine": 0.7676932215690613,
"eval_pearson_dot": 0.6569437980651855,
"eval_pearson_euclidean": 0.7712024450302124,
"eval_pearson_manhattan": 0.7713895440101624,
"eval_runtime": 21.9039,
"eval_samples_per_second": 68.481,
"eval_spearman_cosine": 0.7780572781571132,
"eval_spearman_dot": 0.6557682135268442,
"eval_spearman_euclidean": 0.7775782712174545,
"eval_spearman_manhattan": 0.7778181970888292,
"eval_steps_per_second": 8.583,
"step": 1000
},
{
"epoch": 0.9465791940018744,
"grad_norm": 1.1828556060791016,
"learning_rate": 9.992604958338825e-06,
"loss": 0.2168,
"step": 1010
},
{
"epoch": 0.9559512652296157,
"grad_norm": 1.2189664840698242,
"learning_rate": 9.992531740104556e-06,
"loss": 0.2072,
"step": 1020
},
{
"epoch": 0.9653233364573571,
"grad_norm": 1.6102409362792969,
"learning_rate": 9.992458521870287e-06,
"loss": 0.2228,
"step": 1030
},
{
"epoch": 0.9746954076850984,
"grad_norm": 1.6891916990280151,
"learning_rate": 9.99238530363602e-06,
"loss": 0.2404,
"step": 1040
},
{
"epoch": 0.9840674789128397,
"grad_norm": 1.2274008989334106,
"learning_rate": 9.99231208540175e-06,
"loss": 0.2225,
"step": 1050
},
{
"epoch": 0.993439550140581,
"grad_norm": 1.2388169765472412,
"learning_rate": 9.992238867167479e-06,
"loss": 0.2215,
"step": 1060
},
{
"epoch": 1.0028116213683225,
"grad_norm": 1.2347650527954102,
"learning_rate": 9.992165648933211e-06,
"loss": 0.2239,
"step": 1070
},
{
"epoch": 1.0121836925960637,
"grad_norm": 1.1266793012619019,
"learning_rate": 9.992092430698942e-06,
"loss": 0.1932,
"step": 1080
},
{
"epoch": 1.021555763823805,
"grad_norm": 1.5187146663665771,
"learning_rate": 9.992019212464673e-06,
"loss": 0.205,
"step": 1090
},
{
"epoch": 1.0309278350515463,
"grad_norm": 1.4463717937469482,
"learning_rate": 9.991945994230404e-06,
"loss": 0.1818,
"step": 1100
},
{
"epoch": 1.0402999062792877,
"grad_norm": 1.6186790466308594,
"learning_rate": 9.991872775996136e-06,
"loss": 0.2076,
"step": 1110
},
{
"epoch": 1.0496719775070291,
"grad_norm": 1.3895883560180664,
"learning_rate": 9.991799557761865e-06,
"loss": 0.2096,
"step": 1120
},
{
"epoch": 1.0590440487347703,
"grad_norm": 1.296912670135498,
"learning_rate": 9.991726339527596e-06,
"loss": 0.2046,
"step": 1130
},
{
"epoch": 1.0684161199625117,
"grad_norm": 1.5527839660644531,
"learning_rate": 9.991653121293328e-06,
"loss": 0.1972,
"step": 1140
},
{
"epoch": 1.077788191190253,
"grad_norm": 1.4777096509933472,
"learning_rate": 9.99157990305906e-06,
"loss": 0.2086,
"step": 1150
},
{
"epoch": 1.0871602624179943,
"grad_norm": 1.3155533075332642,
"learning_rate": 9.99150668482479e-06,
"loss": 0.1969,
"step": 1160
},
{
"epoch": 1.0965323336457358,
"grad_norm": 1.5277265310287476,
"learning_rate": 9.99143346659052e-06,
"loss": 0.1923,
"step": 1170
},
{
"epoch": 1.105904404873477,
"grad_norm": 1.3764179944992065,
"learning_rate": 9.991360248356251e-06,
"loss": 0.1916,
"step": 1180
},
{
"epoch": 1.1152764761012184,
"grad_norm": 1.6024688482284546,
"learning_rate": 9.991287030121982e-06,
"loss": 0.185,
"step": 1190
},
{
"epoch": 1.1246485473289598,
"grad_norm": 1.2752821445465088,
"learning_rate": 9.991213811887713e-06,
"loss": 0.1829,
"step": 1200
},
{
"epoch": 1.134020618556701,
"grad_norm": 1.4704368114471436,
"learning_rate": 9.991140593653444e-06,
"loss": 0.2006,
"step": 1210
},
{
"epoch": 1.1433926897844424,
"grad_norm": 1.3614213466644287,
"learning_rate": 9.991067375419176e-06,
"loss": 0.1776,
"step": 1220
},
{
"epoch": 1.1527647610121836,
"grad_norm": 1.2852075099945068,
"learning_rate": 9.990994157184905e-06,
"loss": 0.2116,
"step": 1230
},
{
"epoch": 1.162136832239925,
"grad_norm": 1.1774332523345947,
"learning_rate": 9.990920938950636e-06,
"loss": 0.1909,
"step": 1240
},
{
"epoch": 1.1715089034676662,
"grad_norm": 1.0442605018615723,
"learning_rate": 9.990847720716368e-06,
"loss": 0.1933,
"step": 1250
},
{
"epoch": 1.1715089034676662,
"eval_loss": 0.08017747104167938,
"eval_pearson_cosine": 0.7703680992126465,
"eval_pearson_dot": 0.6808142066001892,
"eval_pearson_euclidean": 0.7676056623458862,
"eval_pearson_manhattan": 0.7677772045135498,
"eval_runtime": 22.1599,
"eval_samples_per_second": 67.69,
"eval_spearman_cosine": 0.7790172740054649,
"eval_spearman_dot": 0.6796557194170769,
"eval_spearman_euclidean": 0.7739566900498013,
"eval_spearman_manhattan": 0.7741509176342483,
"eval_steps_per_second": 8.484,
"step": 1250
},
{
"epoch": 1.1808809746954076,
"grad_norm": 1.3561466932296753,
"learning_rate": 9.990774502482099e-06,
"loss": 0.1921,
"step": 1260
},
{
"epoch": 1.190253045923149,
"grad_norm": 1.2151105403900146,
"learning_rate": 9.99070128424783e-06,
"loss": 0.1865,
"step": 1270
},
{
"epoch": 1.1996251171508903,
"grad_norm": 1.4363489151000977,
"learning_rate": 9.99062806601356e-06,
"loss": 0.2071,
"step": 1280
},
{
"epoch": 1.2089971883786317,
"grad_norm": 1.1078994274139404,
"learning_rate": 9.990554847779291e-06,
"loss": 0.1984,
"step": 1290
},
{
"epoch": 1.218369259606373,
"grad_norm": 1.4608142375946045,
"learning_rate": 9.990481629545022e-06,
"loss": 0.1926,
"step": 1300
},
{
"epoch": 1.2277413308341143,
"grad_norm": 1.5290361642837524,
"learning_rate": 9.990408411310753e-06,
"loss": 0.1935,
"step": 1310
},
{
"epoch": 1.2371134020618557,
"grad_norm": 1.09344482421875,
"learning_rate": 9.990335193076485e-06,
"loss": 0.2026,
"step": 1320
},
{
"epoch": 1.246485473289597,
"grad_norm": 1.5567576885223389,
"learning_rate": 9.990261974842216e-06,
"loss": 0.1968,
"step": 1330
},
{
"epoch": 1.2558575445173383,
"grad_norm": 1.243221402168274,
"learning_rate": 9.990188756607947e-06,
"loss": 0.1859,
"step": 1340
},
{
"epoch": 1.2652296157450795,
"grad_norm": 1.5287493467330933,
"learning_rate": 9.990115538373678e-06,
"loss": 0.2067,
"step": 1350
},
{
"epoch": 1.274601686972821,
"grad_norm": 1.1587677001953125,
"learning_rate": 9.990042320139408e-06,
"loss": 0.1848,
"step": 1360
},
{
"epoch": 1.2839737582005624,
"grad_norm": 1.3521069288253784,
"learning_rate": 9.989969101905139e-06,
"loss": 0.1975,
"step": 1370
},
{
"epoch": 1.2933458294283038,
"grad_norm": 1.1655584573745728,
"learning_rate": 9.98989588367087e-06,
"loss": 0.1963,
"step": 1380
},
{
"epoch": 1.302717900656045,
"grad_norm": 1.1636890172958374,
"learning_rate": 9.989822665436602e-06,
"loss": 0.1768,
"step": 1390
},
{
"epoch": 1.3120899718837864,
"grad_norm": 1.3106030225753784,
"learning_rate": 9.989749447202333e-06,
"loss": 0.1918,
"step": 1400
},
{
"epoch": 1.3214620431115276,
"grad_norm": 1.314274787902832,
"learning_rate": 9.989676228968062e-06,
"loss": 0.1733,
"step": 1410
},
{
"epoch": 1.330834114339269,
"grad_norm": 1.646234393119812,
"learning_rate": 9.989603010733795e-06,
"loss": 0.1797,
"step": 1420
},
{
"epoch": 1.3402061855670104,
"grad_norm": 1.3321646451950073,
"learning_rate": 9.989529792499525e-06,
"loss": 0.1726,
"step": 1430
},
{
"epoch": 1.3495782567947516,
"grad_norm": 1.3959871530532837,
"learning_rate": 9.989456574265256e-06,
"loss": 0.1889,
"step": 1440
},
{
"epoch": 1.358950328022493,
"grad_norm": 1.1790053844451904,
"learning_rate": 9.989383356030987e-06,
"loss": 0.1779,
"step": 1450
},
{
"epoch": 1.3683223992502342,
"grad_norm": 1.7612881660461426,
"learning_rate": 9.989310137796718e-06,
"loss": 0.1834,
"step": 1460
},
{
"epoch": 1.3776944704779757,
"grad_norm": 1.2366232872009277,
"learning_rate": 9.989236919562448e-06,
"loss": 0.1996,
"step": 1470
},
{
"epoch": 1.387066541705717,
"grad_norm": 1.550465703010559,
"learning_rate": 9.989163701328179e-06,
"loss": 0.1991,
"step": 1480
},
{
"epoch": 1.3964386129334583,
"grad_norm": 1.2935107946395874,
"learning_rate": 9.98909048309391e-06,
"loss": 0.1956,
"step": 1490
},
{
"epoch": 1.4058106841611997,
"grad_norm": 0.9709776639938354,
"learning_rate": 9.989017264859642e-06,
"loss": 0.1872,
"step": 1500
},
{
"epoch": 1.4058106841611997,
"eval_loss": 0.07902642339468002,
"eval_pearson_cosine": 0.7684531211853027,
"eval_pearson_dot": 0.6580111980438232,
"eval_pearson_euclidean": 0.768983006477356,
"eval_pearson_manhattan": 0.7692690491676331,
"eval_runtime": 23.5462,
"eval_samples_per_second": 63.704,
"eval_spearman_cosine": 0.7777241764238451,
"eval_spearman_dot": 0.6568945327389543,
"eval_spearman_euclidean": 0.7752386276211667,
"eval_spearman_manhattan": 0.7755204438878311,
"eval_steps_per_second": 7.984,
"step": 1500
},
{
"epoch": 1.415182755388941,
"grad_norm": 1.5001726150512695,
"learning_rate": 9.988944046625373e-06,
"loss": 0.2094,
"step": 1510
},
{
"epoch": 1.4245548266166823,
"grad_norm": 1.1697657108306885,
"learning_rate": 9.988870828391102e-06,
"loss": 0.1862,
"step": 1520
},
{
"epoch": 1.4339268978444237,
"grad_norm": 1.3496723175048828,
"learning_rate": 9.988797610156834e-06,
"loss": 0.1863,
"step": 1530
},
{
"epoch": 1.443298969072165,
"grad_norm": 1.3314088582992554,
"learning_rate": 9.988724391922565e-06,
"loss": 0.1809,
"step": 1540
},
{
"epoch": 1.4526710402999063,
"grad_norm": 1.2966681718826294,
"learning_rate": 9.988651173688296e-06,
"loss": 0.1799,
"step": 1550
},
{
"epoch": 1.4620431115276475,
"grad_norm": 1.141318917274475,
"learning_rate": 9.988577955454027e-06,
"loss": 0.1983,
"step": 1560
},
{
"epoch": 1.471415182755389,
"grad_norm": 1.1170287132263184,
"learning_rate": 9.98850473721976e-06,
"loss": 0.1823,
"step": 1570
},
{
"epoch": 1.4807872539831304,
"grad_norm": 1.4531837701797485,
"learning_rate": 9.988431518985488e-06,
"loss": 0.1693,
"step": 1580
},
{
"epoch": 1.4901593252108716,
"grad_norm": 1.5249556303024292,
"learning_rate": 9.988358300751219e-06,
"loss": 0.2014,
"step": 1590
},
{
"epoch": 1.499531396438613,
"grad_norm": 1.319170594215393,
"learning_rate": 9.988285082516951e-06,
"loss": 0.1841,
"step": 1600
},
{
"epoch": 1.5089034676663542,
"grad_norm": 1.2907928228378296,
"learning_rate": 9.988211864282682e-06,
"loss": 0.1778,
"step": 1610
},
{
"epoch": 1.5182755388940956,
"grad_norm": 1.170284628868103,
"learning_rate": 9.988138646048413e-06,
"loss": 0.1668,
"step": 1620
},
{
"epoch": 1.527647610121837,
"grad_norm": 1.4182498455047607,
"learning_rate": 9.988065427814144e-06,
"loss": 0.1968,
"step": 1630
},
{
"epoch": 1.5370196813495782,
"grad_norm": 1.3137290477752686,
"learning_rate": 9.987992209579874e-06,
"loss": 0.1734,
"step": 1640
},
{
"epoch": 1.5463917525773194,
"grad_norm": 1.458721399307251,
"learning_rate": 9.987918991345605e-06,
"loss": 0.209,
"step": 1650
},
{
"epoch": 1.5557638238050608,
"grad_norm": 1.1368082761764526,
"learning_rate": 9.987845773111336e-06,
"loss": 0.1831,
"step": 1660
},
{
"epoch": 1.5651358950328023,
"grad_norm": 1.0743663311004639,
"learning_rate": 9.987772554877068e-06,
"loss": 0.1883,
"step": 1670
},
{
"epoch": 1.5745079662605437,
"grad_norm": 1.4294681549072266,
"learning_rate": 9.987699336642799e-06,
"loss": 0.1851,
"step": 1680
},
{
"epoch": 1.5838800374882849,
"grad_norm": 1.0537577867507935,
"learning_rate": 9.987626118408528e-06,
"loss": 0.1818,
"step": 1690
},
{
"epoch": 1.5932521087160263,
"grad_norm": 1.3930073976516724,
"learning_rate": 9.98755290017426e-06,
"loss": 0.1876,
"step": 1700
},
{
"epoch": 1.6026241799437675,
"grad_norm": 1.3290959596633911,
"learning_rate": 9.987479681939991e-06,
"loss": 0.1777,
"step": 1710
},
{
"epoch": 1.611996251171509,
"grad_norm": 1.3895900249481201,
"learning_rate": 9.987406463705722e-06,
"loss": 0.1728,
"step": 1720
},
{
"epoch": 1.6213683223992503,
"grad_norm": 1.336679220199585,
"learning_rate": 9.987333245471453e-06,
"loss": 0.202,
"step": 1730
},
{
"epoch": 1.6307403936269915,
"grad_norm": 1.4338617324829102,
"learning_rate": 9.987260027237184e-06,
"loss": 0.1745,
"step": 1740
},
{
"epoch": 1.640112464854733,
"grad_norm": 1.1854125261306763,
"learning_rate": 9.987186809002914e-06,
"loss": 0.1628,
"step": 1750
},
{
"epoch": 1.640112464854733,
"eval_loss": 0.07191870361566544,
"eval_pearson_cosine": 0.7651911973953247,
"eval_pearson_dot": 0.6584045886993408,
"eval_pearson_euclidean": 0.7615811228752136,
"eval_pearson_manhattan": 0.7618914842605591,
"eval_runtime": 22.2177,
"eval_samples_per_second": 67.514,
"eval_spearman_cosine": 0.7733826669765486,
"eval_spearman_dot": 0.6574446699366203,
"eval_spearman_euclidean": 0.7678793093449918,
"eval_spearman_manhattan": 0.7684997409854779,
"eval_steps_per_second": 8.462,
"step": 1750
},
{
"epoch": 1.6494845360824741,
"grad_norm": 1.468126654624939,
"learning_rate": 9.987113590768645e-06,
"loss": 0.1714,
"step": 1760
},
{
"epoch": 1.6588566073102156,
"grad_norm": 1.3639568090438843,
"learning_rate": 9.987040372534378e-06,
"loss": 0.1839,
"step": 1770
},
{
"epoch": 1.668228678537957,
"grad_norm": 1.2494312524795532,
"learning_rate": 9.986967154300108e-06,
"loss": 0.1753,
"step": 1780
},
{
"epoch": 1.6776007497656982,
"grad_norm": 1.2897909879684448,
"learning_rate": 9.986893936065839e-06,
"loss": 0.1704,
"step": 1790
},
{
"epoch": 1.6869728209934396,
"grad_norm": 1.413866400718689,
"learning_rate": 9.98682071783157e-06,
"loss": 0.1868,
"step": 1800
},
{
"epoch": 1.6963448922211808,
"grad_norm": 1.093849778175354,
"learning_rate": 9.9867474995973e-06,
"loss": 0.1889,
"step": 1810
},
{
"epoch": 1.7057169634489222,
"grad_norm": 1.3857814073562622,
"learning_rate": 9.986674281363031e-06,
"loss": 0.1818,
"step": 1820
},
{
"epoch": 1.7150890346766636,
"grad_norm": 1.3772344589233398,
"learning_rate": 9.986601063128762e-06,
"loss": 0.1683,
"step": 1830
},
{
"epoch": 1.7244611059044048,
"grad_norm": 1.3299206495285034,
"learning_rate": 9.986527844894493e-06,
"loss": 0.1865,
"step": 1840
},
{
"epoch": 1.7338331771321462,
"grad_norm": 1.3139843940734863,
"learning_rate": 9.986454626660225e-06,
"loss": 0.169,
"step": 1850
},
{
"epoch": 1.7432052483598874,
"grad_norm": 1.3562296628952026,
"learning_rate": 9.986381408425954e-06,
"loss": 0.2012,
"step": 1860
},
{
"epoch": 1.7525773195876289,
"grad_norm": 1.2332826852798462,
"learning_rate": 9.986308190191685e-06,
"loss": 0.1877,
"step": 1870
},
{
"epoch": 1.7619493908153703,
"grad_norm": 1.083622932434082,
"learning_rate": 9.986234971957418e-06,
"loss": 0.2026,
"step": 1880
},
{
"epoch": 1.7713214620431117,
"grad_norm": 1.6391818523406982,
"learning_rate": 9.986161753723148e-06,
"loss": 0.1902,
"step": 1890
},
{
"epoch": 1.780693533270853,
"grad_norm": 1.0985593795776367,
"learning_rate": 9.986088535488879e-06,
"loss": 0.1845,
"step": 1900
},
{
"epoch": 1.790065604498594,
"grad_norm": 1.609025001525879,
"learning_rate": 9.98601531725461e-06,
"loss": 0.1939,
"step": 1910
},
{
"epoch": 1.7994376757263355,
"grad_norm": 1.0637205839157104,
"learning_rate": 9.98594209902034e-06,
"loss": 0.1775,
"step": 1920
},
{
"epoch": 1.808809746954077,
"grad_norm": 1.159469723701477,
"learning_rate": 9.985868880786071e-06,
"loss": 0.161,
"step": 1930
},
{
"epoch": 1.8181818181818183,
"grad_norm": 1.1251918077468872,
"learning_rate": 9.985795662551802e-06,
"loss": 0.1965,
"step": 1940
},
{
"epoch": 1.8275538894095595,
"grad_norm": 1.3804899454116821,
"learning_rate": 9.985722444317534e-06,
"loss": 0.1768,
"step": 1950
},
{
"epoch": 1.8369259606373007,
"grad_norm": 1.194275140762329,
"learning_rate": 9.985649226083265e-06,
"loss": 0.1782,
"step": 1960
},
{
"epoch": 1.8462980318650422,
"grad_norm": 1.5173845291137695,
"learning_rate": 9.985576007848996e-06,
"loss": 0.193,
"step": 1970
},
{
"epoch": 1.8556701030927836,
"grad_norm": 1.7733920812606812,
"learning_rate": 9.985502789614727e-06,
"loss": 0.1804,
"step": 1980
},
{
"epoch": 1.865042174320525,
"grad_norm": 1.1430355310440063,
"learning_rate": 9.985429571380457e-06,
"loss": 0.1869,
"step": 1990
},
{
"epoch": 1.8744142455482662,
"grad_norm": 1.3633067607879639,
"learning_rate": 9.985356353146188e-06,
"loss": 0.1983,
"step": 2000
},
{
"epoch": 1.8744142455482662,
"eval_loss": 0.07371454685926437,
"eval_pearson_cosine": 0.7772414684295654,
"eval_pearson_dot": 0.660416841506958,
"eval_pearson_euclidean": 0.7648824453353882,
"eval_pearson_manhattan": 0.7654331922531128,
"eval_runtime": 22.1973,
"eval_samples_per_second": 67.576,
"eval_spearman_cosine": 0.7863920785446639,
"eval_spearman_dot": 0.6607574545837009,
"eval_spearman_euclidean": 0.7740511645049805,
"eval_spearman_manhattan": 0.7747616492851076,
"eval_steps_per_second": 8.47,
"step": 2000
},
{
"epoch": 1.8837863167760074,
"grad_norm": 1.116107702255249,
"learning_rate": 9.985283134911919e-06,
"loss": 0.1775,
"step": 2010
},
{
"epoch": 1.8931583880037488,
"grad_norm": 1.280927300453186,
"learning_rate": 9.985209916677651e-06,
"loss": 0.1853,
"step": 2020
},
{
"epoch": 1.9025304592314902,
"grad_norm": 1.419044852256775,
"learning_rate": 9.98513669844338e-06,
"loss": 0.1767,
"step": 2030
},
{
"epoch": 1.9119025304592316,
"grad_norm": 1.4140015840530396,
"learning_rate": 9.985063480209111e-06,
"loss": 0.1968,
"step": 2040
},
{
"epoch": 1.9212746016869728,
"grad_norm": 1.23015296459198,
"learning_rate": 9.984990261974844e-06,
"loss": 0.1559,
"step": 2050
},
{
"epoch": 1.930646672914714,
"grad_norm": 1.4209731817245483,
"learning_rate": 9.984917043740574e-06,
"loss": 0.18,
"step": 2060
},
{
"epoch": 1.9400187441424555,
"grad_norm": 1.5270899534225464,
"learning_rate": 9.984843825506305e-06,
"loss": 0.1858,
"step": 2070
},
{
"epoch": 1.9493908153701969,
"grad_norm": 2.0037920475006104,
"learning_rate": 9.984770607272036e-06,
"loss": 0.1812,
"step": 2080
},
{
"epoch": 1.9587628865979383,
"grad_norm": 1.4397103786468506,
"learning_rate": 9.984697389037767e-06,
"loss": 0.1853,
"step": 2090
},
{
"epoch": 1.9681349578256795,
"grad_norm": 1.555161476135254,
"learning_rate": 9.984624170803497e-06,
"loss": 0.1758,
"step": 2100
},
{
"epoch": 1.9775070290534207,
"grad_norm": 1.1453354358673096,
"learning_rate": 9.984550952569228e-06,
"loss": 0.1821,
"step": 2110
},
{
"epoch": 1.986879100281162,
"grad_norm": 1.3050484657287598,
"learning_rate": 9.984477734334959e-06,
"loss": 0.1828,
"step": 2120
},
{
"epoch": 1.9962511715089035,
"grad_norm": 1.1858463287353516,
"learning_rate": 9.984404516100691e-06,
"loss": 0.1801,
"step": 2130
},
{
"epoch": 2.005623242736645,
"grad_norm": 1.2467753887176514,
"learning_rate": 9.984331297866422e-06,
"loss": 0.1651,
"step": 2140
},
{
"epoch": 2.014995313964386,
"grad_norm": 1.9730074405670166,
"learning_rate": 9.984258079632151e-06,
"loss": 0.1654,
"step": 2150
},
{
"epoch": 2.0243673851921273,
"grad_norm": 1.384181261062622,
"learning_rate": 9.984184861397884e-06,
"loss": 0.151,
"step": 2160
},
{
"epoch": 2.0337394564198688,
"grad_norm": 1.2262136936187744,
"learning_rate": 9.984111643163614e-06,
"loss": 0.1338,
"step": 2170
},
{
"epoch": 2.04311152764761,
"grad_norm": 1.3417856693267822,
"learning_rate": 9.984038424929345e-06,
"loss": 0.1445,
"step": 2180
},
{
"epoch": 2.0524835988753516,
"grad_norm": 1.3032526969909668,
"learning_rate": 9.983965206695076e-06,
"loss": 0.1675,
"step": 2190
},
{
"epoch": 2.0618556701030926,
"grad_norm": 1.4586397409439087,
"learning_rate": 9.983891988460808e-06,
"loss": 0.1503,
"step": 2200
},
{
"epoch": 2.071227741330834,
"grad_norm": 1.8017582893371582,
"learning_rate": 9.983818770226537e-06,
"loss": 0.1614,
"step": 2210
},
{
"epoch": 2.0805998125585754,
"grad_norm": 1.1136542558670044,
"learning_rate": 9.983745551992268e-06,
"loss": 0.1385,
"step": 2220
},
{
"epoch": 2.089971883786317,
"grad_norm": 1.48130202293396,
"learning_rate": 9.983672333758e-06,
"loss": 0.1448,
"step": 2230
},
{
"epoch": 2.0993439550140582,
"grad_norm": 1.1847114562988281,
"learning_rate": 9.983599115523731e-06,
"loss": 0.1263,
"step": 2240
},
{
"epoch": 2.108716026241799,
"grad_norm": 1.068515419960022,
"learning_rate": 9.983525897289462e-06,
"loss": 0.1448,
"step": 2250
},
{
"epoch": 2.108716026241799,
"eval_loss": 0.0637284442782402,
"eval_pearson_cosine": 0.766581654548645,
"eval_pearson_dot": 0.652958333492279,
"eval_pearson_euclidean": 0.76385897397995,
"eval_pearson_manhattan": 0.7643536329269409,
"eval_runtime": 24.9836,
"eval_samples_per_second": 60.039,
"eval_spearman_cosine": 0.7736502023043434,
"eval_spearman_dot": 0.6506365364740643,
"eval_spearman_euclidean": 0.7701725336122238,
"eval_spearman_manhattan": 0.7705851416924343,
"eval_steps_per_second": 7.525,
"step": 2250
},
{
"epoch": 2.1180880974695406,
"grad_norm": 1.2607600688934326,
"learning_rate": 9.983452679055193e-06,
"loss": 0.1405,
"step": 2260
},
{
"epoch": 2.127460168697282,
"grad_norm": 1.3096617460250854,
"learning_rate": 9.983379460820924e-06,
"loss": 0.159,
"step": 2270
},
{
"epoch": 2.1368322399250235,
"grad_norm": 1.4220956563949585,
"learning_rate": 9.983306242586654e-06,
"loss": 0.1634,
"step": 2280
},
{
"epoch": 2.146204311152765,
"grad_norm": 1.5565595626831055,
"learning_rate": 9.983233024352385e-06,
"loss": 0.1549,
"step": 2290
},
{
"epoch": 2.155576382380506,
"grad_norm": 1.357906460762024,
"learning_rate": 9.983159806118118e-06,
"loss": 0.1503,
"step": 2300
},
{
"epoch": 2.1649484536082473,
"grad_norm": 1.0181514024734497,
"learning_rate": 9.983086587883848e-06,
"loss": 0.1242,
"step": 2310
},
{
"epoch": 2.1743205248359887,
"grad_norm": 1.2936785221099854,
"learning_rate": 9.983013369649577e-06,
"loss": 0.1516,
"step": 2320
},
{
"epoch": 2.18369259606373,
"grad_norm": 1.353125810623169,
"learning_rate": 9.98294015141531e-06,
"loss": 0.1576,
"step": 2330
},
{
"epoch": 2.1930646672914715,
"grad_norm": 1.5978926420211792,
"learning_rate": 9.98286693318104e-06,
"loss": 0.143,
"step": 2340
},
{
"epoch": 2.2024367385192125,
"grad_norm": 1.643609642982483,
"learning_rate": 9.982793714946771e-06,
"loss": 0.1509,
"step": 2350
},
{
"epoch": 2.211808809746954,
"grad_norm": 1.2868740558624268,
"learning_rate": 9.982720496712502e-06,
"loss": 0.1407,
"step": 2360
},
{
"epoch": 2.2211808809746953,
"grad_norm": 1.662234902381897,
"learning_rate": 9.982647278478233e-06,
"loss": 0.1499,
"step": 2370
},
{
"epoch": 2.2305529522024368,
"grad_norm": 1.7390748262405396,
"learning_rate": 9.982574060243964e-06,
"loss": 0.139,
"step": 2380
},
{
"epoch": 2.239925023430178,
"grad_norm": 1.2645044326782227,
"learning_rate": 9.982500842009694e-06,
"loss": 0.1541,
"step": 2390
},
{
"epoch": 2.2492970946579196,
"grad_norm": 1.5143808126449585,
"learning_rate": 9.982427623775425e-06,
"loss": 0.15,
"step": 2400
},
{
"epoch": 2.2586691658856606,
"grad_norm": 1.516233205795288,
"learning_rate": 9.982354405541158e-06,
"loss": 0.1387,
"step": 2410
},
{
"epoch": 2.268041237113402,
"grad_norm": 1.607926368713379,
"learning_rate": 9.982281187306888e-06,
"loss": 0.1459,
"step": 2420
},
{
"epoch": 2.2774133083411434,
"grad_norm": 1.433325171470642,
"learning_rate": 9.982207969072617e-06,
"loss": 0.145,
"step": 2430
},
{
"epoch": 2.286785379568885,
"grad_norm": 1.4051145315170288,
"learning_rate": 9.98213475083835e-06,
"loss": 0.1433,
"step": 2440
},
{
"epoch": 2.296157450796626,
"grad_norm": 1.5076231956481934,
"learning_rate": 9.98206153260408e-06,
"loss": 0.1514,
"step": 2450
},
{
"epoch": 2.3055295220243672,
"grad_norm": 1.185927152633667,
"learning_rate": 9.981988314369811e-06,
"loss": 0.1315,
"step": 2460
},
{
"epoch": 2.3149015932521086,
"grad_norm": 1.1687299013137817,
"learning_rate": 9.981915096135542e-06,
"loss": 0.1611,
"step": 2470
},
{
"epoch": 2.32427366447985,
"grad_norm": 1.205338716506958,
"learning_rate": 9.981841877901274e-06,
"loss": 0.1587,
"step": 2480
},
{
"epoch": 2.3336457357075915,
"grad_norm": 1.1079684495925903,
"learning_rate": 9.981768659667004e-06,
"loss": 0.142,
"step": 2490
},
{
"epoch": 2.3430178069353325,
"grad_norm": 1.1689645051956177,
"learning_rate": 9.981695441432734e-06,
"loss": 0.1449,
"step": 2500
},
{
"epoch": 2.3430178069353325,
"eval_loss": 0.05785529315471649,
"eval_pearson_cosine": 0.7640599012374878,
"eval_pearson_dot": 0.6659318208694458,
"eval_pearson_euclidean": 0.7584241628646851,
"eval_pearson_manhattan": 0.7589800357818604,
"eval_runtime": 27.3942,
"eval_samples_per_second": 54.756,
"eval_spearman_cosine": 0.7698402659202235,
"eval_spearman_dot": 0.6637382071207051,
"eval_spearman_euclidean": 0.765183939076614,
"eval_spearman_manhattan": 0.7654494135153407,
"eval_steps_per_second": 6.863,
"step": 2500
},
{
"epoch": 2.352389878163074,
"grad_norm": 1.1410503387451172,
"learning_rate": 9.981622223198467e-06,
"loss": 0.1253,
"step": 2510
},
{
"epoch": 2.3617619493908153,
"grad_norm": 1.6562408208847046,
"learning_rate": 9.981549004964197e-06,
"loss": 0.1363,
"step": 2520
},
{
"epoch": 2.3711340206185567,
"grad_norm": 1.3503327369689941,
"learning_rate": 9.981475786729928e-06,
"loss": 0.141,
"step": 2530
},
{
"epoch": 2.380506091846298,
"grad_norm": 1.4653688669204712,
"learning_rate": 9.981402568495659e-06,
"loss": 0.1452,
"step": 2540
},
{
"epoch": 2.3898781630740396,
"grad_norm": 1.4135221242904663,
"learning_rate": 9.98132935026139e-06,
"loss": 0.1387,
"step": 2550
},
{
"epoch": 2.3992502343017805,
"grad_norm": 1.1758474111557007,
"learning_rate": 9.98125613202712e-06,
"loss": 0.1402,
"step": 2560
},
{
"epoch": 2.408622305529522,
"grad_norm": 1.6394227743148804,
"learning_rate": 9.981182913792851e-06,
"loss": 0.1434,
"step": 2570
},
{
"epoch": 2.4179943767572634,
"grad_norm": 1.5223402976989746,
"learning_rate": 9.981109695558584e-06,
"loss": 0.1433,
"step": 2580
},
{
"epoch": 2.427366447985005,
"grad_norm": 1.3722361326217651,
"learning_rate": 9.981036477324314e-06,
"loss": 0.145,
"step": 2590
},
{
"epoch": 2.436738519212746,
"grad_norm": 1.4288251399993896,
"learning_rate": 9.980963259090045e-06,
"loss": 0.1419,
"step": 2600
},
{
"epoch": 2.446110590440487,
"grad_norm": 1.3789891004562378,
"learning_rate": 9.980890040855776e-06,
"loss": 0.1428,
"step": 2610
},
{
"epoch": 2.4554826616682286,
"grad_norm": 1.3833218812942505,
"learning_rate": 9.980816822621507e-06,
"loss": 0.163,
"step": 2620
},
{
"epoch": 2.46485473289597,
"grad_norm": 1.2749391794204712,
"learning_rate": 9.980743604387237e-06,
"loss": 0.1457,
"step": 2630
},
{
"epoch": 2.4742268041237114,
"grad_norm": 1.3677037954330444,
"learning_rate": 9.980670386152968e-06,
"loss": 0.1393,
"step": 2640
},
{
"epoch": 2.483598875351453,
"grad_norm": 1.2386823892593384,
"learning_rate": 9.980597167918699e-06,
"loss": 0.1446,
"step": 2650
},
{
"epoch": 2.492970946579194,
"grad_norm": 1.6553146839141846,
"learning_rate": 9.98052394968443e-06,
"loss": 0.1399,
"step": 2660
},
{
"epoch": 2.5023430178069352,
"grad_norm": 1.2258574962615967,
"learning_rate": 9.98045073145016e-06,
"loss": 0.1557,
"step": 2670
},
{
"epoch": 2.5117150890346767,
"grad_norm": 1.1680238246917725,
"learning_rate": 9.980377513215891e-06,
"loss": 0.14,
"step": 2680
},
{
"epoch": 2.521087160262418,
"grad_norm": 1.3764533996582031,
"learning_rate": 9.980304294981624e-06,
"loss": 0.1429,
"step": 2690
},
{
"epoch": 2.530459231490159,
"grad_norm": 1.1607757806777954,
"learning_rate": 9.980231076747354e-06,
"loss": 0.156,
"step": 2700
},
{
"epoch": 2.539831302717901,
"grad_norm": 1.30258309841156,
"learning_rate": 9.980157858513085e-06,
"loss": 0.1334,
"step": 2710
},
{
"epoch": 2.549203373945642,
"grad_norm": 1.3965803384780884,
"learning_rate": 9.980084640278816e-06,
"loss": 0.1532,
"step": 2720
},
{
"epoch": 2.5585754451733833,
"grad_norm": 1.2492479085922241,
"learning_rate": 9.980011422044547e-06,
"loss": 0.1538,
"step": 2730
},
{
"epoch": 2.5679475164011247,
"grad_norm": 1.5879229307174683,
"learning_rate": 9.979938203810277e-06,
"loss": 0.1393,
"step": 2740
},
{
"epoch": 2.5773195876288657,
"grad_norm": 1.5499955415725708,
"learning_rate": 9.979864985576008e-06,
"loss": 0.1443,
"step": 2750
},
{
"epoch": 2.5773195876288657,
"eval_loss": 0.059572458267211914,
"eval_pearson_cosine": 0.7583234310150146,
"eval_pearson_dot": 0.6585268378257751,
"eval_pearson_euclidean": 0.7594324946403503,
"eval_pearson_manhattan": 0.7599164843559265,
"eval_runtime": 25.1198,
"eval_samples_per_second": 59.714,
"eval_spearman_cosine": 0.7658877891929784,
"eval_spearman_dot": 0.6550703356470525,
"eval_spearman_euclidean": 0.7651954936870381,
"eval_spearman_manhattan": 0.7656066832066194,
"eval_steps_per_second": 7.484,
"step": 2750
},
{
"epoch": 2.5866916588566076,
"grad_norm": 1.1182575225830078,
"learning_rate": 9.97979176734174e-06,
"loss": 0.1449,
"step": 2760
},
{
"epoch": 2.5960637300843485,
"grad_norm": 1.3228731155395508,
"learning_rate": 9.979718549107471e-06,
"loss": 0.1339,
"step": 2770
},
{
"epoch": 2.60543580131209,
"grad_norm": 1.3763021230697632,
"learning_rate": 9.9796453308732e-06,
"loss": 0.1379,
"step": 2780
},
{
"epoch": 2.6148078725398314,
"grad_norm": 1.6708637475967407,
"learning_rate": 9.979572112638933e-06,
"loss": 0.1491,
"step": 2790
},
{
"epoch": 2.624179943767573,
"grad_norm": 1.0826717615127563,
"learning_rate": 9.979498894404664e-06,
"loss": 0.1447,
"step": 2800
},
{
"epoch": 2.633552014995314,
"grad_norm": 1.4416155815124512,
"learning_rate": 9.979425676170394e-06,
"loss": 0.1398,
"step": 2810
},
{
"epoch": 2.642924086223055,
"grad_norm": 1.3966304063796997,
"learning_rate": 9.979352457936125e-06,
"loss": 0.1332,
"step": 2820
},
{
"epoch": 2.6522961574507966,
"grad_norm": 1.5255811214447021,
"learning_rate": 9.979279239701856e-06,
"loss": 0.1423,
"step": 2830
},
{
"epoch": 2.661668228678538,
"grad_norm": 1.3866652250289917,
"learning_rate": 9.979206021467587e-06,
"loss": 0.1554,
"step": 2840
},
{
"epoch": 2.6710402999062794,
"grad_norm": 1.3477802276611328,
"learning_rate": 9.979132803233317e-06,
"loss": 0.1547,
"step": 2850
},
{
"epoch": 2.680412371134021,
"grad_norm": 1.540963053703308,
"learning_rate": 9.97905958499905e-06,
"loss": 0.1229,
"step": 2860
},
{
"epoch": 2.689784442361762,
"grad_norm": 1.697350025177002,
"learning_rate": 9.97898636676478e-06,
"loss": 0.153,
"step": 2870
},
{
"epoch": 2.6991565135895033,
"grad_norm": 1.6020257472991943,
"learning_rate": 9.978913148530511e-06,
"loss": 0.1334,
"step": 2880
},
{
"epoch": 2.7085285848172447,
"grad_norm": 1.7637958526611328,
"learning_rate": 9.978839930296242e-06,
"loss": 0.1513,
"step": 2890
},
{
"epoch": 2.717900656044986,
"grad_norm": 1.2917182445526123,
"learning_rate": 9.978766712061973e-06,
"loss": 0.1296,
"step": 2900
},
{
"epoch": 2.7272727272727275,
"grad_norm": 1.42876136302948,
"learning_rate": 9.978693493827704e-06,
"loss": 0.1276,
"step": 2910
},
{
"epoch": 2.7366447985004685,
"grad_norm": 1.340184211730957,
"learning_rate": 9.978620275593434e-06,
"loss": 0.164,
"step": 2920
},
{
"epoch": 2.74601686972821,
"grad_norm": 1.1638396978378296,
"learning_rate": 9.978547057359165e-06,
"loss": 0.1372,
"step": 2930
},
{
"epoch": 2.7553889409559513,
"grad_norm": 1.5060447454452515,
"learning_rate": 9.978473839124897e-06,
"loss": 0.1489,
"step": 2940
},
{
"epoch": 2.7647610121836927,
"grad_norm": 1.3632638454437256,
"learning_rate": 9.978400620890627e-06,
"loss": 0.1242,
"step": 2950
},
{
"epoch": 2.774133083411434,
"grad_norm": 1.6402980089187622,
"learning_rate": 9.978327402656359e-06,
"loss": 0.1395,
"step": 2960
},
{
"epoch": 2.783505154639175,
"grad_norm": 1.8350452184677124,
"learning_rate": 9.97825418442209e-06,
"loss": 0.1501,
"step": 2970
},
{
"epoch": 2.7928772258669166,
"grad_norm": 1.6517874002456665,
"learning_rate": 9.97818096618782e-06,
"loss": 0.1596,
"step": 2980
},
{
"epoch": 2.802249297094658,
"grad_norm": 1.7441259622573853,
"learning_rate": 9.978107747953551e-06,
"loss": 0.1344,
"step": 2990
},
{
"epoch": 2.8116213683223994,
"grad_norm": 1.4474517107009888,
"learning_rate": 9.978034529719282e-06,
"loss": 0.1363,
"step": 3000
},
{
"epoch": 2.8116213683223994,
"eval_loss": 0.05750729516148567,
"eval_pearson_cosine": 0.767126202583313,
"eval_pearson_dot": 0.676889181137085,
"eval_pearson_euclidean": 0.756407618522644,
"eval_pearson_manhattan": 0.7570176124572754,
"eval_runtime": 25.3699,
"eval_samples_per_second": 59.125,
"eval_spearman_cosine": 0.7727339030438767,
"eval_spearman_dot": 0.6755843192398268,
"eval_spearman_euclidean": 0.7624238185076594,
"eval_spearman_manhattan": 0.7629469399526556,
"eval_steps_per_second": 7.41,
"step": 3000
},
{
"epoch": 2.820993439550141,
"grad_norm": 1.4202260971069336,
"learning_rate": 9.977961311485013e-06,
"loss": 0.1456,
"step": 3010
},
{
"epoch": 2.830365510777882,
"grad_norm": 1.3678419589996338,
"learning_rate": 9.977888093250743e-06,
"loss": 0.1445,
"step": 3020
},
{
"epoch": 2.839737582005623,
"grad_norm": 1.168271541595459,
"learning_rate": 9.977814875016474e-06,
"loss": 0.1428,
"step": 3030
},
{
"epoch": 2.8491096532333646,
"grad_norm": 1.5929275751113892,
"learning_rate": 9.977741656782207e-06,
"loss": 0.1593,
"step": 3040
},
{
"epoch": 2.858481724461106,
"grad_norm": 1.265101432800293,
"learning_rate": 9.977668438547937e-06,
"loss": 0.1519,
"step": 3050
},
{
"epoch": 2.8678537956888475,
"grad_norm": 1.1187818050384521,
"learning_rate": 9.977595220313666e-06,
"loss": 0.1454,
"step": 3060
},
{
"epoch": 2.8772258669165884,
"grad_norm": 1.1976639032363892,
"learning_rate": 9.977522002079399e-06,
"loss": 0.1321,
"step": 3070
},
{
"epoch": 2.88659793814433,
"grad_norm": 1.7162209749221802,
"learning_rate": 9.97744878384513e-06,
"loss": 0.147,
"step": 3080
},
{
"epoch": 2.8959700093720713,
"grad_norm": 1.3301661014556885,
"learning_rate": 9.97737556561086e-06,
"loss": 0.1341,
"step": 3090
},
{
"epoch": 2.9053420805998127,
"grad_norm": 1.279984951019287,
"learning_rate": 9.977302347376591e-06,
"loss": 0.1342,
"step": 3100
},
{
"epoch": 2.914714151827554,
"grad_norm": 1.6548879146575928,
"learning_rate": 9.977229129142324e-06,
"loss": 0.1429,
"step": 3110
},
{
"epoch": 2.924086223055295,
"grad_norm": 0.9662721753120422,
"learning_rate": 9.977155910908053e-06,
"loss": 0.1524,
"step": 3120
},
{
"epoch": 2.9334582942830365,
"grad_norm": 1.5336380004882812,
"learning_rate": 9.977082692673783e-06,
"loss": 0.1445,
"step": 3130
},
{
"epoch": 2.942830365510778,
"grad_norm": 1.4380927085876465,
"learning_rate": 9.977009474439516e-06,
"loss": 0.1371,
"step": 3140
},
{
"epoch": 2.9522024367385193,
"grad_norm": 1.551700472831726,
"learning_rate": 9.976936256205247e-06,
"loss": 0.135,
"step": 3150
},
{
"epoch": 2.9615745079662608,
"grad_norm": 1.32683265209198,
"learning_rate": 9.976863037970977e-06,
"loss": 0.1444,
"step": 3160
},
{
"epoch": 2.9709465791940017,
"grad_norm": 1.3574503660202026,
"learning_rate": 9.976789819736708e-06,
"loss": 0.1391,
"step": 3170
},
{
"epoch": 2.980318650421743,
"grad_norm": 1.506625771522522,
"learning_rate": 9.976716601502439e-06,
"loss": 0.1552,
"step": 3180
},
{
"epoch": 2.9896907216494846,
"grad_norm": 1.3970105648040771,
"learning_rate": 9.97664338326817e-06,
"loss": 0.147,
"step": 3190
},
{
"epoch": 2.999062792877226,
"grad_norm": 1.4303011894226074,
"learning_rate": 9.9765701650339e-06,
"loss": 0.1559,
"step": 3200
},
{
"epoch": 3.0084348641049674,
"grad_norm": 1.377488613128662,
"learning_rate": 9.976496946799633e-06,
"loss": 0.1187,
"step": 3210
},
{
"epoch": 3.0178069353327084,
"grad_norm": 1.1664360761642456,
"learning_rate": 9.976423728565364e-06,
"loss": 0.1101,
"step": 3220
},
{
"epoch": 3.02717900656045,
"grad_norm": 0.9129014015197754,
"learning_rate": 9.976350510331093e-06,
"loss": 0.111,
"step": 3230
},
{
"epoch": 3.036551077788191,
"grad_norm": 1.2628843784332275,
"learning_rate": 9.976277292096825e-06,
"loss": 0.1141,
"step": 3240
},
{
"epoch": 3.0459231490159326,
"grad_norm": 1.1534360647201538,
"learning_rate": 9.976204073862556e-06,
"loss": 0.1227,
"step": 3250
},
{
"epoch": 3.0459231490159326,
"eval_loss": 0.051736850291490555,
"eval_pearson_cosine": 0.763727605342865,
"eval_pearson_dot": 0.673626720905304,
"eval_pearson_euclidean": 0.756030797958374,
"eval_pearson_manhattan": 0.7567305564880371,
"eval_runtime": 21.997,
"eval_samples_per_second": 68.191,
"eval_spearman_cosine": 0.7669834916269708,
"eval_spearman_dot": 0.6714383880600381,
"eval_spearman_euclidean": 0.7611960037220876,
"eval_spearman_manhattan": 0.7615680957541558,
"eval_steps_per_second": 8.547,
"step": 3250
},
{
"epoch": 3.055295220243674,
"grad_norm": 1.4779927730560303,
"learning_rate": 9.976130855628287e-06,
"loss": 0.1186,
"step": 3260
},
{
"epoch": 3.064667291471415,
"grad_norm": 1.2425293922424316,
"learning_rate": 9.976057637394017e-06,
"loss": 0.1213,
"step": 3270
},
{
"epoch": 3.0740393626991565,
"grad_norm": 1.6161679029464722,
"learning_rate": 9.975984419159748e-06,
"loss": 0.1127,
"step": 3280
},
{
"epoch": 3.083411433926898,
"grad_norm": 1.199263334274292,
"learning_rate": 9.975911200925479e-06,
"loss": 0.0971,
"step": 3290
},
{
"epoch": 3.0927835051546393,
"grad_norm": 1.5749520063400269,
"learning_rate": 9.97583798269121e-06,
"loss": 0.1162,
"step": 3300
},
{
"epoch": 3.1021555763823807,
"grad_norm": 1.558112382888794,
"learning_rate": 9.97576476445694e-06,
"loss": 0.125,
"step": 3310
},
{
"epoch": 3.1115276476101217,
"grad_norm": 1.5197752714157104,
"learning_rate": 9.975691546222673e-06,
"loss": 0.1199,
"step": 3320
},
{
"epoch": 3.120899718837863,
"grad_norm": 1.1978933811187744,
"learning_rate": 9.975618327988404e-06,
"loss": 0.0975,
"step": 3330
},
{
"epoch": 3.1302717900656045,
"grad_norm": 1.0790154933929443,
"learning_rate": 9.975545109754134e-06,
"loss": 0.1078,
"step": 3340
},
{
"epoch": 3.139643861293346,
"grad_norm": 1.7810611724853516,
"learning_rate": 9.975471891519865e-06,
"loss": 0.1065,
"step": 3350
},
{
"epoch": 3.1490159325210874,
"grad_norm": 1.2899665832519531,
"learning_rate": 9.975398673285596e-06,
"loss": 0.1104,
"step": 3360
},
{
"epoch": 3.1583880037488283,
"grad_norm": 1.1923859119415283,
"learning_rate": 9.975325455051327e-06,
"loss": 0.1143,
"step": 3370
},
{
"epoch": 3.1677600749765698,
"grad_norm": 1.428306221961975,
"learning_rate": 9.975252236817057e-06,
"loss": 0.101,
"step": 3380
},
{
"epoch": 3.177132146204311,
"grad_norm": 1.323941946029663,
"learning_rate": 9.97517901858279e-06,
"loss": 0.1115,
"step": 3390
},
{
"epoch": 3.1865042174320526,
"grad_norm": 1.4079722166061401,
"learning_rate": 9.97510580034852e-06,
"loss": 0.1032,
"step": 3400
},
{
"epoch": 3.195876288659794,
"grad_norm": 1.2919671535491943,
"learning_rate": 9.97503258211425e-06,
"loss": 0.1145,
"step": 3410
},
{
"epoch": 3.205248359887535,
"grad_norm": 1.1800559759140015,
"learning_rate": 9.974959363879982e-06,
"loss": 0.106,
"step": 3420
},
{
"epoch": 3.2146204311152764,
"grad_norm": 1.5425052642822266,
"learning_rate": 9.974886145645713e-06,
"loss": 0.1156,
"step": 3430
},
{
"epoch": 3.223992502343018,
"grad_norm": 1.7271355390548706,
"learning_rate": 9.974812927411443e-06,
"loss": 0.1121,
"step": 3440
},
{
"epoch": 3.2333645735707592,
"grad_norm": 1.3295711278915405,
"learning_rate": 9.974739709177174e-06,
"loss": 0.1072,
"step": 3450
},
{
"epoch": 3.2427366447985007,
"grad_norm": 1.658498764038086,
"learning_rate": 9.974666490942905e-06,
"loss": 0.1131,
"step": 3460
},
{
"epoch": 3.2521087160262416,
"grad_norm": 1.6077649593353271,
"learning_rate": 9.974593272708636e-06,
"loss": 0.1143,
"step": 3470
},
{
"epoch": 3.261480787253983,
"grad_norm": 1.4552775621414185,
"learning_rate": 9.974520054474366e-06,
"loss": 0.1065,
"step": 3480
},
{
"epoch": 3.2708528584817245,
"grad_norm": 1.586267113685608,
"learning_rate": 9.974446836240099e-06,
"loss": 0.1137,
"step": 3490
},
{
"epoch": 3.280224929709466,
"grad_norm": 0.9890511631965637,
"learning_rate": 9.97437361800583e-06,
"loss": 0.103,
"step": 3500
},
{
"epoch": 3.280224929709466,
"eval_loss": 0.04644956439733505,
"eval_pearson_cosine": 0.760254442691803,
"eval_pearson_dot": 0.6812557578086853,
"eval_pearson_euclidean": 0.7475454807281494,
"eval_pearson_manhattan": 0.7483712434768677,
"eval_runtime": 22.2407,
"eval_samples_per_second": 67.444,
"eval_spearman_cosine": 0.7642516190492565,
"eval_spearman_dot": 0.6795590047108491,
"eval_spearman_euclidean": 0.7527436591109528,
"eval_spearman_manhattan": 0.7534967017417152,
"eval_steps_per_second": 8.453,
"step": 3500
},
{
"epoch": 3.2895970009372073,
"grad_norm": 1.4361557960510254,
"learning_rate": 9.97430039977156e-06,
"loss": 0.1078,
"step": 3510
},
{
"epoch": 3.2989690721649483,
"grad_norm": 1.307634949684143,
"learning_rate": 9.974227181537291e-06,
"loss": 0.105,
"step": 3520
},
{
"epoch": 3.3083411433926897,
"grad_norm": 1.103812336921692,
"learning_rate": 9.974153963303022e-06,
"loss": 0.1021,
"step": 3530
},
{
"epoch": 3.317713214620431,
"grad_norm": 1.485766887664795,
"learning_rate": 9.974080745068753e-06,
"loss": 0.1055,
"step": 3540
},
{
"epoch": 3.3270852858481725,
"grad_norm": 1.4017934799194336,
"learning_rate": 9.974007526834483e-06,
"loss": 0.0991,
"step": 3550
},
{
"epoch": 3.336457357075914,
"grad_norm": 1.1994048357009888,
"learning_rate": 9.973934308600214e-06,
"loss": 0.1176,
"step": 3560
},
{
"epoch": 3.345829428303655,
"grad_norm": 1.0661845207214355,
"learning_rate": 9.973861090365947e-06,
"loss": 0.1036,
"step": 3570
},
{
"epoch": 3.3552014995313963,
"grad_norm": 1.273992896080017,
"learning_rate": 9.973787872131676e-06,
"loss": 0.1069,
"step": 3580
},
{
"epoch": 3.3645735707591378,
"grad_norm": 1.157599687576294,
"learning_rate": 9.973714653897406e-06,
"loss": 0.1154,
"step": 3590
},
{
"epoch": 3.373945641986879,
"grad_norm": 1.567265272140503,
"learning_rate": 9.973641435663139e-06,
"loss": 0.1104,
"step": 3600
},
{
"epoch": 3.3833177132146206,
"grad_norm": 1.509450078010559,
"learning_rate": 9.97356821742887e-06,
"loss": 0.1123,
"step": 3610
},
{
"epoch": 3.3926897844423616,
"grad_norm": 1.6206624507904053,
"learning_rate": 9.9734949991946e-06,
"loss": 0.0915,
"step": 3620
},
{
"epoch": 3.402061855670103,
"grad_norm": 1.3384416103363037,
"learning_rate": 9.973421780960331e-06,
"loss": 0.1286,
"step": 3630
},
{
"epoch": 3.4114339268978444,
"grad_norm": 1.4834225177764893,
"learning_rate": 9.973348562726062e-06,
"loss": 0.1129,
"step": 3640
},
{
"epoch": 3.420805998125586,
"grad_norm": 1.486007809638977,
"learning_rate": 9.973275344491793e-06,
"loss": 0.1037,
"step": 3650
},
{
"epoch": 3.4301780693533273,
"grad_norm": 1.5038363933563232,
"learning_rate": 9.973202126257523e-06,
"loss": 0.104,
"step": 3660
},
{
"epoch": 3.4395501405810682,
"grad_norm": 1.3018808364868164,
"learning_rate": 9.973128908023256e-06,
"loss": 0.1068,
"step": 3670
},
{
"epoch": 3.4489222118088096,
"grad_norm": 1.733067512512207,
"learning_rate": 9.973055689788987e-06,
"loss": 0.1011,
"step": 3680
},
{
"epoch": 3.458294283036551,
"grad_norm": 1.3246439695358276,
"learning_rate": 9.972982471554716e-06,
"loss": 0.0989,
"step": 3690
},
{
"epoch": 3.4676663542642925,
"grad_norm": 1.7354522943496704,
"learning_rate": 9.972909253320448e-06,
"loss": 0.1174,
"step": 3700
},
{
"epoch": 3.477038425492034,
"grad_norm": 1.5907713174819946,
"learning_rate": 9.972836035086179e-06,
"loss": 0.1067,
"step": 3710
},
{
"epoch": 3.486410496719775,
"grad_norm": 1.4252599477767944,
"learning_rate": 9.97276281685191e-06,
"loss": 0.1064,
"step": 3720
},
{
"epoch": 3.4957825679475163,
"grad_norm": 1.3505686521530151,
"learning_rate": 9.97268959861764e-06,
"loss": 0.1168,
"step": 3730
},
{
"epoch": 3.5051546391752577,
"grad_norm": 1.3022727966308594,
"learning_rate": 9.972616380383373e-06,
"loss": 0.1111,
"step": 3740
},
{
"epoch": 3.514526710402999,
"grad_norm": 1.080246090888977,
"learning_rate": 9.972543162149102e-06,
"loss": 0.0982,
"step": 3750
},
{
"epoch": 3.514526710402999,
"eval_loss": 0.04514094442129135,
"eval_pearson_cosine": 0.7656620144844055,
"eval_pearson_dot": 0.6821019649505615,
"eval_pearson_euclidean": 0.7441372871398926,
"eval_pearson_manhattan": 0.7452259659767151,
"eval_runtime": 22.4556,
"eval_samples_per_second": 66.798,
"eval_spearman_cosine": 0.7694518035767811,
"eval_spearman_dot": 0.6821838150409313,
"eval_spearman_euclidean": 0.7516165395512334,
"eval_spearman_manhattan": 0.7527176854515762,
"eval_steps_per_second": 8.372,
"step": 3750
},
{
"epoch": 3.5238987816307406,
"grad_norm": 1.3396129608154297,
"learning_rate": 9.972469943914833e-06,
"loss": 0.1145,
"step": 3760
},
{
"epoch": 3.5332708528584815,
"grad_norm": 1.5277647972106934,
"learning_rate": 9.972396725680565e-06,
"loss": 0.1101,
"step": 3770
},
{
"epoch": 3.542642924086223,
"grad_norm": 1.8469972610473633,
"learning_rate": 9.972323507446296e-06,
"loss": 0.1129,
"step": 3780
},
{
"epoch": 3.5520149953139644,
"grad_norm": 1.2464599609375,
"learning_rate": 9.972250289212027e-06,
"loss": 0.1103,
"step": 3790
},
{
"epoch": 3.561387066541706,
"grad_norm": 1.7863965034484863,
"learning_rate": 9.972177070977757e-06,
"loss": 0.1084,
"step": 3800
},
{
"epoch": 3.570759137769447,
"grad_norm": 1.3085591793060303,
"learning_rate": 9.972103852743488e-06,
"loss": 0.11,
"step": 3810
},
{
"epoch": 3.580131208997188,
"grad_norm": 1.5875599384307861,
"learning_rate": 9.972030634509219e-06,
"loss": 0.1213,
"step": 3820
},
{
"epoch": 3.5895032802249296,
"grad_norm": 1.2654856443405151,
"learning_rate": 9.97195741627495e-06,
"loss": 0.1045,
"step": 3830
},
{
"epoch": 3.598875351452671,
"grad_norm": 1.4713581800460815,
"learning_rate": 9.97188419804068e-06,
"loss": 0.1123,
"step": 3840
},
{
"epoch": 3.6082474226804124,
"grad_norm": 1.3559589385986328,
"learning_rate": 9.971810979806413e-06,
"loss": 0.1171,
"step": 3850
},
{
"epoch": 3.617619493908154,
"grad_norm": 1.7482990026474,
"learning_rate": 9.971737761572142e-06,
"loss": 0.1141,
"step": 3860
},
{
"epoch": 3.626991565135895,
"grad_norm": 1.7189960479736328,
"learning_rate": 9.971664543337873e-06,
"loss": 0.107,
"step": 3870
},
{
"epoch": 3.6363636363636362,
"grad_norm": 1.8246538639068604,
"learning_rate": 9.971591325103605e-06,
"loss": 0.1161,
"step": 3880
},
{
"epoch": 3.6457357075913777,
"grad_norm": 1.0778300762176514,
"learning_rate": 9.971518106869336e-06,
"loss": 0.1084,
"step": 3890
},
{
"epoch": 3.655107778819119,
"grad_norm": 1.5588942766189575,
"learning_rate": 9.971444888635066e-06,
"loss": 0.1038,
"step": 3900
},
{
"epoch": 3.6644798500468605,
"grad_norm": 1.3670451641082764,
"learning_rate": 9.971371670400797e-06,
"loss": 0.1069,
"step": 3910
},
{
"epoch": 3.6738519212746015,
"grad_norm": 1.437696099281311,
"learning_rate": 9.971298452166528e-06,
"loss": 0.1129,
"step": 3920
},
{
"epoch": 3.683223992502343,
"grad_norm": 1.39695143699646,
"learning_rate": 9.971225233932259e-06,
"loss": 0.1113,
"step": 3930
},
{
"epoch": 3.6925960637300843,
"grad_norm": 1.3372693061828613,
"learning_rate": 9.97115201569799e-06,
"loss": 0.1042,
"step": 3940
},
{
"epoch": 3.7019681349578257,
"grad_norm": 1.4336313009262085,
"learning_rate": 9.971078797463722e-06,
"loss": 0.1224,
"step": 3950
},
{
"epoch": 3.711340206185567,
"grad_norm": 1.3641144037246704,
"learning_rate": 9.971005579229453e-06,
"loss": 0.1082,
"step": 3960
},
{
"epoch": 3.720712277413308,
"grad_norm": 1.1231974363327026,
"learning_rate": 9.970932360995183e-06,
"loss": 0.1108,
"step": 3970
},
{
"epoch": 3.7300843486410495,
"grad_norm": 1.0743800401687622,
"learning_rate": 9.970859142760914e-06,
"loss": 0.1148,
"step": 3980
},
{
"epoch": 3.739456419868791,
"grad_norm": 1.5260711908340454,
"learning_rate": 9.970785924526645e-06,
"loss": 0.1248,
"step": 3990
},
{
"epoch": 3.7488284910965324,
"grad_norm": 1.1183910369873047,
"learning_rate": 9.970712706292376e-06,
"loss": 0.0987,
"step": 4000
},
{
"epoch": 3.7488284910965324,
"eval_loss": 0.046661876142024994,
"eval_pearson_cosine": 0.7576525807380676,
"eval_pearson_dot": 0.6644298434257507,
"eval_pearson_euclidean": 0.7384845614433289,
"eval_pearson_manhattan": 0.7396556735038757,
"eval_runtime": 23.8808,
"eval_samples_per_second": 62.812,
"eval_spearman_cosine": 0.7607075839895016,
"eval_spearman_dot": 0.6622737418861694,
"eval_spearman_euclidean": 0.7433752629911805,
"eval_spearman_manhattan": 0.7446298314535014,
"eval_steps_per_second": 7.872,
"step": 4000
},
{
"epoch": 3.758200562324274,
"grad_norm": 1.3608311414718628,
"learning_rate": 9.970639488058106e-06,
"loss": 0.1179,
"step": 4010
},
{
"epoch": 3.7675726335520148,
"grad_norm": 1.6313430070877075,
"learning_rate": 9.970566269823839e-06,
"loss": 0.1186,
"step": 4020
},
{
"epoch": 3.776944704779756,
"grad_norm": 1.4092051982879639,
"learning_rate": 9.970493051589568e-06,
"loss": 0.1048,
"step": 4030
},
{
"epoch": 3.7863167760074976,
"grad_norm": 1.4106525182724,
"learning_rate": 9.970419833355299e-06,
"loss": 0.1233,
"step": 4040
},
{
"epoch": 3.795688847235239,
"grad_norm": 1.498146891593933,
"learning_rate": 9.970346615121031e-06,
"loss": 0.1164,
"step": 4050
},
{
"epoch": 3.8050609184629804,
"grad_norm": 1.68582284450531,
"learning_rate": 9.970273396886762e-06,
"loss": 0.1194,
"step": 4060
},
{
"epoch": 3.8144329896907214,
"grad_norm": 1.329270362854004,
"learning_rate": 9.970200178652493e-06,
"loss": 0.1001,
"step": 4070
},
{
"epoch": 3.823805060918463,
"grad_norm": 1.6010513305664062,
"learning_rate": 9.970126960418223e-06,
"loss": 0.107,
"step": 4080
},
{
"epoch": 3.8331771321462043,
"grad_norm": 1.213576078414917,
"learning_rate": 9.970053742183954e-06,
"loss": 0.1108,
"step": 4090
},
{
"epoch": 3.8425492033739457,
"grad_norm": 1.585524320602417,
"learning_rate": 9.969980523949685e-06,
"loss": 0.1079,
"step": 4100
},
{
"epoch": 3.851921274601687,
"grad_norm": 1.6043713092803955,
"learning_rate": 9.969907305715416e-06,
"loss": 0.1141,
"step": 4110
},
{
"epoch": 3.861293345829428,
"grad_norm": 1.3566473722457886,
"learning_rate": 9.969834087481146e-06,
"loss": 0.1148,
"step": 4120
},
{
"epoch": 3.8706654170571695,
"grad_norm": 1.390787124633789,
"learning_rate": 9.969760869246879e-06,
"loss": 0.1024,
"step": 4130
},
{
"epoch": 3.880037488284911,
"grad_norm": 1.689005970954895,
"learning_rate": 9.96968765101261e-06,
"loss": 0.111,
"step": 4140
},
{
"epoch": 3.8894095595126523,
"grad_norm": 1.850071907043457,
"learning_rate": 9.96961443277834e-06,
"loss": 0.1097,
"step": 4150
},
{
"epoch": 3.8987816307403937,
"grad_norm": 1.4834603071212769,
"learning_rate": 9.969541214544071e-06,
"loss": 0.1084,
"step": 4160
},
{
"epoch": 3.9081537019681347,
"grad_norm": 1.3408997058868408,
"learning_rate": 9.969467996309802e-06,
"loss": 0.1194,
"step": 4170
},
{
"epoch": 3.917525773195876,
"grad_norm": 1.3920304775238037,
"learning_rate": 9.969394778075533e-06,
"loss": 0.1091,
"step": 4180
},
{
"epoch": 3.9268978444236176,
"grad_norm": 1.0026508569717407,
"learning_rate": 9.969321559841263e-06,
"loss": 0.119,
"step": 4190
},
{
"epoch": 3.936269915651359,
"grad_norm": 1.7984665632247925,
"learning_rate": 9.969248341606996e-06,
"loss": 0.1065,
"step": 4200
},
{
"epoch": 3.9456419868791004,
"grad_norm": 1.6500909328460693,
"learning_rate": 9.969175123372725e-06,
"loss": 0.1083,
"step": 4210
},
{
"epoch": 3.9550140581068414,
"grad_norm": 1.7580713033676147,
"learning_rate": 9.969101905138456e-06,
"loss": 0.1237,
"step": 4220
},
{
"epoch": 3.964386129334583,
"grad_norm": 1.8374171257019043,
"learning_rate": 9.969028686904188e-06,
"loss": 0.1003,
"step": 4230
},
{
"epoch": 3.973758200562324,
"grad_norm": 1.5857341289520264,
"learning_rate": 9.968955468669919e-06,
"loss": 0.1012,
"step": 4240
},
{
"epoch": 3.9831302717900656,
"grad_norm": 1.627947211265564,
"learning_rate": 9.96888225043565e-06,
"loss": 0.1111,
"step": 4250
},
{
"epoch": 3.9831302717900656,
"eval_loss": 0.04063473269343376,
"eval_pearson_cosine": 0.7690664529800415,
"eval_pearson_dot": 0.6998196840286255,
"eval_pearson_euclidean": 0.7456687092781067,
"eval_pearson_manhattan": 0.7471497058868408,
"eval_runtime": 23.0817,
"eval_samples_per_second": 64.986,
"eval_spearman_cosine": 0.7702784084250337,
"eval_spearman_dot": 0.7005907360024843,
"eval_spearman_euclidean": 0.7509877657044322,
"eval_spearman_manhattan": 0.7524785559548752,
"eval_steps_per_second": 8.145,
"step": 4250
},
{
"epoch": 3.992502343017807,
"grad_norm": 1.3161486387252808,
"learning_rate": 9.96880903220138e-06,
"loss": 0.1114,
"step": 4260
},
{
"epoch": 4.001874414245548,
"grad_norm": 0.9556475281715393,
"learning_rate": 9.968735813967111e-06,
"loss": 0.1141,
"step": 4270
},
{
"epoch": 4.01124648547329,
"grad_norm": 1.0041595697402954,
"learning_rate": 9.968662595732842e-06,
"loss": 0.0807,
"step": 4280
},
{
"epoch": 4.020618556701031,
"grad_norm": 1.1500684022903442,
"learning_rate": 9.968589377498573e-06,
"loss": 0.0701,
"step": 4290
},
{
"epoch": 4.029990627928772,
"grad_norm": 1.3963230848312378,
"learning_rate": 9.968516159264305e-06,
"loss": 0.0863,
"step": 4300
},
{
"epoch": 4.039362699156514,
"grad_norm": 1.4251878261566162,
"learning_rate": 9.968442941030036e-06,
"loss": 0.0746,
"step": 4310
},
{
"epoch": 4.048734770384255,
"grad_norm": 1.0674968957901,
"learning_rate": 9.968369722795765e-06,
"loss": 0.0667,
"step": 4320
},
{
"epoch": 4.0581068416119965,
"grad_norm": 1.2465558052062988,
"learning_rate": 9.968296504561497e-06,
"loss": 0.0773,
"step": 4330
},
{
"epoch": 4.0674789128397375,
"grad_norm": 1.409511923789978,
"learning_rate": 9.968223286327228e-06,
"loss": 0.0775,
"step": 4340
},
{
"epoch": 4.0768509840674785,
"grad_norm": 1.2048633098602295,
"learning_rate": 9.968150068092959e-06,
"loss": 0.0885,
"step": 4350
},
{
"epoch": 4.08622305529522,
"grad_norm": 1.3504215478897095,
"learning_rate": 9.96807684985869e-06,
"loss": 0.0802,
"step": 4360
},
{
"epoch": 4.095595126522961,
"grad_norm": 1.5094915628433228,
"learning_rate": 9.96800363162442e-06,
"loss": 0.0889,
"step": 4370
},
{
"epoch": 4.104967197750703,
"grad_norm": 1.2075692415237427,
"learning_rate": 9.967930413390151e-06,
"loss": 0.0718,
"step": 4380
},
{
"epoch": 4.114339268978444,
"grad_norm": 1.476462960243225,
"learning_rate": 9.967857195155882e-06,
"loss": 0.0809,
"step": 4390
},
{
"epoch": 4.123711340206185,
"grad_norm": 1.4811893701553345,
"learning_rate": 9.967783976921614e-06,
"loss": 0.082,
"step": 4400
},
{
"epoch": 4.133083411433927,
"grad_norm": 1.3016406297683716,
"learning_rate": 9.967710758687345e-06,
"loss": 0.0867,
"step": 4410
},
{
"epoch": 4.142455482661668,
"grad_norm": 1.3254297971725464,
"learning_rate": 9.967637540453076e-06,
"loss": 0.0783,
"step": 4420
},
{
"epoch": 4.15182755388941,
"grad_norm": 1.7814503908157349,
"learning_rate": 9.967564322218806e-06,
"loss": 0.0812,
"step": 4430
},
{
"epoch": 4.161199625117151,
"grad_norm": 1.3375070095062256,
"learning_rate": 9.967491103984537e-06,
"loss": 0.0835,
"step": 4440
},
{
"epoch": 4.170571696344892,
"grad_norm": 1.3573247194290161,
"learning_rate": 9.967417885750268e-06,
"loss": 0.0772,
"step": 4450
},
{
"epoch": 4.179943767572634,
"grad_norm": 1.601321816444397,
"learning_rate": 9.967344667515999e-06,
"loss": 0.0785,
"step": 4460
},
{
"epoch": 4.189315838800375,
"grad_norm": 1.0777158737182617,
"learning_rate": 9.96727144928173e-06,
"loss": 0.0789,
"step": 4470
},
{
"epoch": 4.1986879100281165,
"grad_norm": 1.717281699180603,
"learning_rate": 9.967198231047462e-06,
"loss": 0.0876,
"step": 4480
},
{
"epoch": 4.2080599812558575,
"grad_norm": 1.6537655591964722,
"learning_rate": 9.967125012813191e-06,
"loss": 0.0859,
"step": 4490
},
{
"epoch": 4.217432052483598,
"grad_norm": 1.3347113132476807,
"learning_rate": 9.967051794578922e-06,
"loss": 0.0888,
"step": 4500
},
{
"epoch": 4.217432052483598,
"eval_loss": 0.042121224105358124,
"eval_pearson_cosine": 0.7580196857452393,
"eval_pearson_dot": 0.6874213814735413,
"eval_pearson_euclidean": 0.740117073059082,
"eval_pearson_manhattan": 0.7411655187606812,
"eval_runtime": 22.046,
"eval_samples_per_second": 68.04,
"eval_spearman_cosine": 0.7598083870591178,
"eval_spearman_dot": 0.6866180590359211,
"eval_spearman_euclidean": 0.7457408658977246,
"eval_spearman_manhattan": 0.7467901472090236,
"eval_steps_per_second": 8.528,
"step": 4500
},
{
"epoch": 4.22680412371134,
"grad_norm": 1.283334732055664,
"learning_rate": 9.966978576344654e-06,
"loss": 0.0824,
"step": 4510
},
{
"epoch": 4.236176194939081,
"grad_norm": 1.4807559251785278,
"learning_rate": 9.966905358110385e-06,
"loss": 0.0812,
"step": 4520
},
{
"epoch": 4.245548266166823,
"grad_norm": 1.1873483657836914,
"learning_rate": 9.966832139876116e-06,
"loss": 0.0788,
"step": 4530
},
{
"epoch": 4.254920337394564,
"grad_norm": 1.27379310131073,
"learning_rate": 9.966758921641846e-06,
"loss": 0.0802,
"step": 4540
},
{
"epoch": 4.264292408622305,
"grad_norm": 1.3721706867218018,
"learning_rate": 9.966685703407577e-06,
"loss": 0.0776,
"step": 4550
},
{
"epoch": 4.273664479850047,
"grad_norm": 1.4129197597503662,
"learning_rate": 9.966612485173308e-06,
"loss": 0.0924,
"step": 4560
},
{
"epoch": 4.283036551077788,
"grad_norm": 1.453730821609497,
"learning_rate": 9.966539266939039e-06,
"loss": 0.0823,
"step": 4570
},
{
"epoch": 4.29240862230553,
"grad_norm": 1.4608802795410156,
"learning_rate": 9.966466048704771e-06,
"loss": 0.0806,
"step": 4580
},
{
"epoch": 4.301780693533271,
"grad_norm": 1.0814175605773926,
"learning_rate": 9.966392830470502e-06,
"loss": 0.0781,
"step": 4590
},
{
"epoch": 4.311152764761012,
"grad_norm": 1.9891834259033203,
"learning_rate": 9.966319612236233e-06,
"loss": 0.0792,
"step": 4600
},
{
"epoch": 4.320524835988754,
"grad_norm": 0.7774847745895386,
"learning_rate": 9.966246394001963e-06,
"loss": 0.0734,
"step": 4610
},
{
"epoch": 4.329896907216495,
"grad_norm": 2.0921082496643066,
"learning_rate": 9.966173175767694e-06,
"loss": 0.0789,
"step": 4620
},
{
"epoch": 4.339268978444236,
"grad_norm": 1.4378306865692139,
"learning_rate": 9.966099957533425e-06,
"loss": 0.0829,
"step": 4630
},
{
"epoch": 4.348641049671977,
"grad_norm": 1.5577812194824219,
"learning_rate": 9.966026739299156e-06,
"loss": 0.0782,
"step": 4640
},
{
"epoch": 4.358013120899718,
"grad_norm": 1.8791301250457764,
"learning_rate": 9.965953521064888e-06,
"loss": 0.088,
"step": 4650
},
{
"epoch": 4.36738519212746,
"grad_norm": 0.8537359833717346,
"learning_rate": 9.965880302830617e-06,
"loss": 0.0766,
"step": 4660
},
{
"epoch": 4.376757263355201,
"grad_norm": 1.258042573928833,
"learning_rate": 9.965807084596348e-06,
"loss": 0.0877,
"step": 4670
},
{
"epoch": 4.386129334582943,
"grad_norm": 1.5519142150878906,
"learning_rate": 9.96573386636208e-06,
"loss": 0.0881,
"step": 4680
},
{
"epoch": 4.395501405810684,
"grad_norm": 1.1437076330184937,
"learning_rate": 9.965660648127811e-06,
"loss": 0.0816,
"step": 4690
},
{
"epoch": 4.404873477038425,
"grad_norm": 1.3333864212036133,
"learning_rate": 9.965587429893542e-06,
"loss": 0.0818,
"step": 4700
},
{
"epoch": 4.414245548266167,
"grad_norm": 1.403075098991394,
"learning_rate": 9.965514211659273e-06,
"loss": 0.0771,
"step": 4710
},
{
"epoch": 4.423617619493908,
"grad_norm": 1.3652963638305664,
"learning_rate": 9.965440993425003e-06,
"loss": 0.0692,
"step": 4720
},
{
"epoch": 4.43298969072165,
"grad_norm": 1.4429869651794434,
"learning_rate": 9.965367775190734e-06,
"loss": 0.0846,
"step": 4730
},
{
"epoch": 4.442361761949391,
"grad_norm": 1.291710376739502,
"learning_rate": 9.965294556956465e-06,
"loss": 0.0796,
"step": 4740
},
{
"epoch": 4.451733833177133,
"grad_norm": 1.4110385179519653,
"learning_rate": 9.965221338722196e-06,
"loss": 0.0756,
"step": 4750
},
{
"epoch": 4.451733833177133,
"eval_loss": 0.039456192404031754,
"eval_pearson_cosine": 0.7664028406143188,
"eval_pearson_dot": 0.7008457779884338,
"eval_pearson_euclidean": 0.7418538928031921,
"eval_pearson_manhattan": 0.7431594133377075,
"eval_runtime": 23.3602,
"eval_samples_per_second": 64.212,
"eval_spearman_cosine": 0.7673929323503452,
"eval_spearman_dot": 0.7011750025269451,
"eval_spearman_euclidean": 0.7464768579915497,
"eval_spearman_manhattan": 0.7479944496608657,
"eval_steps_per_second": 8.048,
"step": 4750
},
{
"epoch": 4.4611059044048735,
"grad_norm": 1.1584782600402832,
"learning_rate": 9.965148120487928e-06,
"loss": 0.0834,
"step": 4760
},
{
"epoch": 4.4704779756326145,
"grad_norm": 1.2065712213516235,
"learning_rate": 9.965074902253659e-06,
"loss": 0.0865,
"step": 4770
},
{
"epoch": 4.479850046860356,
"grad_norm": 1.3458271026611328,
"learning_rate": 9.965001684019388e-06,
"loss": 0.0764,
"step": 4780
},
{
"epoch": 4.489222118088097,
"grad_norm": 2.0091888904571533,
"learning_rate": 9.96492846578512e-06,
"loss": 0.0773,
"step": 4790
},
{
"epoch": 4.498594189315839,
"grad_norm": 1.3832370042800903,
"learning_rate": 9.964855247550851e-06,
"loss": 0.0806,
"step": 4800
},
{
"epoch": 4.50796626054358,
"grad_norm": 1.4656741619110107,
"learning_rate": 9.964782029316582e-06,
"loss": 0.0852,
"step": 4810
},
{
"epoch": 4.517338331771321,
"grad_norm": 1.3915668725967407,
"learning_rate": 9.964708811082312e-06,
"loss": 0.086,
"step": 4820
},
{
"epoch": 4.526710402999063,
"grad_norm": 1.2182085514068604,
"learning_rate": 9.964635592848043e-06,
"loss": 0.0777,
"step": 4830
},
{
"epoch": 4.536082474226804,
"grad_norm": 1.2041029930114746,
"learning_rate": 9.964562374613774e-06,
"loss": 0.0738,
"step": 4840
},
{
"epoch": 4.545454545454545,
"grad_norm": 1.289475917816162,
"learning_rate": 9.964489156379505e-06,
"loss": 0.0723,
"step": 4850
},
{
"epoch": 4.554826616682287,
"grad_norm": 1.8206441402435303,
"learning_rate": 9.964415938145237e-06,
"loss": 0.0823,
"step": 4860
},
{
"epoch": 4.564198687910028,
"grad_norm": 1.393254280090332,
"learning_rate": 9.964342719910968e-06,
"loss": 0.0869,
"step": 4870
},
{
"epoch": 4.57357075913777,
"grad_norm": 1.6424909830093384,
"learning_rate": 9.964269501676699e-06,
"loss": 0.0721,
"step": 4880
},
{
"epoch": 4.582942830365511,
"grad_norm": 1.6760517358779907,
"learning_rate": 9.96419628344243e-06,
"loss": 0.0849,
"step": 4890
},
{
"epoch": 4.592314901593252,
"grad_norm": 1.4797537326812744,
"learning_rate": 9.96412306520816e-06,
"loss": 0.0815,
"step": 4900
},
{
"epoch": 4.6016869728209935,
"grad_norm": 1.3184549808502197,
"learning_rate": 9.964049846973891e-06,
"loss": 0.0875,
"step": 4910
},
{
"epoch": 4.6110590440487345,
"grad_norm": 1.0524438619613647,
"learning_rate": 9.963976628739622e-06,
"loss": 0.0821,
"step": 4920
},
{
"epoch": 4.620431115276476,
"grad_norm": 0.8284000158309937,
"learning_rate": 9.963903410505354e-06,
"loss": 0.0737,
"step": 4930
},
{
"epoch": 4.629803186504217,
"grad_norm": 1.2979810237884521,
"learning_rate": 9.963830192271085e-06,
"loss": 0.1031,
"step": 4940
},
{
"epoch": 4.639175257731958,
"grad_norm": 1.2484486103057861,
"learning_rate": 9.963756974036814e-06,
"loss": 0.0853,
"step": 4950
},
{
"epoch": 4.6485473289597,
"grad_norm": 1.4267854690551758,
"learning_rate": 9.963683755802546e-06,
"loss": 0.0784,
"step": 4960
},
{
"epoch": 4.657919400187441,
"grad_norm": 1.2631357908248901,
"learning_rate": 9.963610537568277e-06,
"loss": 0.0814,
"step": 4970
},
{
"epoch": 4.667291471415183,
"grad_norm": 1.5679900646209717,
"learning_rate": 9.963537319334008e-06,
"loss": 0.0851,
"step": 4980
},
{
"epoch": 4.676663542642924,
"grad_norm": 1.216604471206665,
"learning_rate": 9.963464101099739e-06,
"loss": 0.0747,
"step": 4990
},
{
"epoch": 4.686035613870665,
"grad_norm": 1.3772624731063843,
"learning_rate": 9.96339088286547e-06,
"loss": 0.0871,
"step": 5000
},
{
"epoch": 4.686035613870665,
"eval_loss": 0.041086822748184204,
"eval_pearson_cosine": 0.7587878704071045,
"eval_pearson_dot": 0.6872098445892334,
"eval_pearson_euclidean": 0.7388917207717896,
"eval_pearson_manhattan": 0.7404583692550659,
"eval_runtime": 22.5042,
"eval_samples_per_second": 66.654,
"eval_spearman_cosine": 0.7603871650644157,
"eval_spearman_dot": 0.6866960900397536,
"eval_spearman_euclidean": 0.7440960862957542,
"eval_spearman_manhattan": 0.745568766414613,
"eval_steps_per_second": 8.354,
"step": 5000
},
{
"epoch": 4.695407685098407,
"grad_norm": 1.6077407598495483,
"learning_rate": 9.9633176646312e-06,
"loss": 0.0993,
"step": 5010
},
{
"epoch": 4.704779756326148,
"grad_norm": 1.206281065940857,
"learning_rate": 9.963244446396931e-06,
"loss": 0.082,
"step": 5020
},
{
"epoch": 4.71415182755389,
"grad_norm": 1.168562650680542,
"learning_rate": 9.963171228162662e-06,
"loss": 0.075,
"step": 5030
},
{
"epoch": 4.723523898781631,
"grad_norm": 1.0943313837051392,
"learning_rate": 9.963098009928394e-06,
"loss": 0.0907,
"step": 5040
},
{
"epoch": 4.7328959700093725,
"grad_norm": 1.1832613945007324,
"learning_rate": 9.963024791694125e-06,
"loss": 0.0776,
"step": 5050
},
{
"epoch": 4.742268041237113,
"grad_norm": 1.1568524837493896,
"learning_rate": 9.962951573459856e-06,
"loss": 0.0956,
"step": 5060
},
{
"epoch": 4.751640112464854,
"grad_norm": 1.4179660081863403,
"learning_rate": 9.962878355225586e-06,
"loss": 0.079,
"step": 5070
},
{
"epoch": 4.761012183692596,
"grad_norm": 1.56465744972229,
"learning_rate": 9.962805136991317e-06,
"loss": 0.0708,
"step": 5080
},
{
"epoch": 4.770384254920337,
"grad_norm": 1.47963547706604,
"learning_rate": 9.962731918757048e-06,
"loss": 0.0817,
"step": 5090
},
{
"epoch": 4.779756326148079,
"grad_norm": 1.4979149103164673,
"learning_rate": 9.962658700522779e-06,
"loss": 0.0859,
"step": 5100
},
{
"epoch": 4.78912839737582,
"grad_norm": 1.0254287719726562,
"learning_rate": 9.962585482288511e-06,
"loss": 0.077,
"step": 5110
},
{
"epoch": 4.798500468603561,
"grad_norm": 1.5644149780273438,
"learning_rate": 9.96251226405424e-06,
"loss": 0.0775,
"step": 5120
},
{
"epoch": 4.807872539831303,
"grad_norm": 1.2777773141860962,
"learning_rate": 9.962439045819971e-06,
"loss": 0.0734,
"step": 5130
},
{
"epoch": 4.817244611059044,
"grad_norm": 1.130614995956421,
"learning_rate": 9.962365827585703e-06,
"loss": 0.082,
"step": 5140
},
{
"epoch": 4.826616682286786,
"grad_norm": 0.9016211032867432,
"learning_rate": 9.962292609351434e-06,
"loss": 0.08,
"step": 5150
},
{
"epoch": 4.835988753514527,
"grad_norm": 1.4159069061279297,
"learning_rate": 9.962219391117165e-06,
"loss": 0.0841,
"step": 5160
},
{
"epoch": 4.845360824742268,
"grad_norm": 1.600085973739624,
"learning_rate": 9.962146172882896e-06,
"loss": 0.0766,
"step": 5170
},
{
"epoch": 4.85473289597001,
"grad_norm": 1.4401110410690308,
"learning_rate": 9.962072954648626e-06,
"loss": 0.0869,
"step": 5180
},
{
"epoch": 4.8641049671977505,
"grad_norm": 1.4603939056396484,
"learning_rate": 9.961999736414357e-06,
"loss": 0.077,
"step": 5190
},
{
"epoch": 4.873477038425492,
"grad_norm": 1.0498592853546143,
"learning_rate": 9.961926518180088e-06,
"loss": 0.0673,
"step": 5200
},
{
"epoch": 4.882849109653233,
"grad_norm": 1.9157027006149292,
"learning_rate": 9.96185329994582e-06,
"loss": 0.0865,
"step": 5210
},
{
"epoch": 4.892221180880974,
"grad_norm": 1.0183812379837036,
"learning_rate": 9.961780081711551e-06,
"loss": 0.0809,
"step": 5220
},
{
"epoch": 4.901593252108716,
"grad_norm": 1.4563605785369873,
"learning_rate": 9.96170686347728e-06,
"loss": 0.086,
"step": 5230
},
{
"epoch": 4.910965323336457,
"grad_norm": 1.1856083869934082,
"learning_rate": 9.961633645243013e-06,
"loss": 0.0802,
"step": 5240
},
{
"epoch": 4.920337394564199,
"grad_norm": 1.3724653720855713,
"learning_rate": 9.961560427008743e-06,
"loss": 0.0839,
"step": 5250
},
{
"epoch": 4.920337394564199,
"eval_loss": 0.04000931978225708,
"eval_pearson_cosine": 0.7643105387687683,
"eval_pearson_dot": 0.6954823732376099,
"eval_pearson_euclidean": 0.7297146320343018,
"eval_pearson_manhattan": 0.7310500144958496,
"eval_runtime": 21.985,
"eval_samples_per_second": 68.228,
"eval_spearman_cosine": 0.7658903505068073,
"eval_spearman_dot": 0.6968591888025883,
"eval_spearman_euclidean": 0.7350736410651904,
"eval_spearman_manhattan": 0.7366836781540181,
"eval_steps_per_second": 8.551,
"step": 5250
},
{
"epoch": 4.92970946579194,
"grad_norm": 1.7151585817337036,
"learning_rate": 9.961487208774474e-06,
"loss": 0.0791,
"step": 5260
},
{
"epoch": 4.939081537019681,
"grad_norm": 1.6940653324127197,
"learning_rate": 9.961413990540205e-06,
"loss": 0.0893,
"step": 5270
},
{
"epoch": 4.948453608247423,
"grad_norm": 1.5087528228759766,
"learning_rate": 9.961340772305936e-06,
"loss": 0.0801,
"step": 5280
},
{
"epoch": 4.957825679475164,
"grad_norm": 1.2038474082946777,
"learning_rate": 9.961267554071666e-06,
"loss": 0.0791,
"step": 5290
},
{
"epoch": 4.967197750702906,
"grad_norm": 1.4044734239578247,
"learning_rate": 9.961194335837397e-06,
"loss": 0.0832,
"step": 5300
},
{
"epoch": 4.976569821930647,
"grad_norm": 1.057298183441162,
"learning_rate": 9.96112111760313e-06,
"loss": 0.0869,
"step": 5310
},
{
"epoch": 4.985941893158388,
"grad_norm": 1.4192899465560913,
"learning_rate": 9.96104789936886e-06,
"loss": 0.0837,
"step": 5320
},
{
"epoch": 4.9953139643861295,
"grad_norm": 1.7742289304733276,
"learning_rate": 9.960974681134591e-06,
"loss": 0.0858,
"step": 5330
},
{
"epoch": 5.0046860356138705,
"grad_norm": 0.9188485741615295,
"learning_rate": 9.960901462900322e-06,
"loss": 0.0684,
"step": 5340
},
{
"epoch": 5.014058106841612,
"grad_norm": 1.6541597843170166,
"learning_rate": 9.960828244666052e-06,
"loss": 0.0669,
"step": 5350
},
{
"epoch": 5.023430178069353,
"grad_norm": 1.5705071687698364,
"learning_rate": 9.960755026431783e-06,
"loss": 0.0646,
"step": 5360
},
{
"epoch": 5.032802249297094,
"grad_norm": 0.9007801413536072,
"learning_rate": 9.960681808197514e-06,
"loss": 0.0721,
"step": 5370
},
{
"epoch": 5.042174320524836,
"grad_norm": 1.044138789176941,
"learning_rate": 9.960608589963245e-06,
"loss": 0.0585,
"step": 5380
},
{
"epoch": 5.051546391752577,
"grad_norm": 1.455098032951355,
"learning_rate": 9.960535371728977e-06,
"loss": 0.0677,
"step": 5390
},
{
"epoch": 5.060918462980319,
"grad_norm": 1.3480255603790283,
"learning_rate": 9.960462153494708e-06,
"loss": 0.0582,
"step": 5400
},
{
"epoch": 5.07029053420806,
"grad_norm": 0.9733775854110718,
"learning_rate": 9.960388935260437e-06,
"loss": 0.057,
"step": 5410
},
{
"epoch": 5.079662605435801,
"grad_norm": 1.202635645866394,
"learning_rate": 9.96031571702617e-06,
"loss": 0.0642,
"step": 5420
},
{
"epoch": 5.089034676663543,
"grad_norm": 1.2410409450531006,
"learning_rate": 9.9602424987919e-06,
"loss": 0.055,
"step": 5430
},
{
"epoch": 5.098406747891284,
"grad_norm": 1.341126799583435,
"learning_rate": 9.960169280557631e-06,
"loss": 0.066,
"step": 5440
},
{
"epoch": 5.107778819119026,
"grad_norm": 1.070065975189209,
"learning_rate": 9.960096062323362e-06,
"loss": 0.0565,
"step": 5450
},
{
"epoch": 5.117150890346767,
"grad_norm": 1.5855072736740112,
"learning_rate": 9.960022844089092e-06,
"loss": 0.0613,
"step": 5460
},
{
"epoch": 5.126522961574508,
"grad_norm": 0.7614333629608154,
"learning_rate": 9.959949625854823e-06,
"loss": 0.0572,
"step": 5470
},
{
"epoch": 5.1358950328022495,
"grad_norm": 1.0969761610031128,
"learning_rate": 9.959876407620554e-06,
"loss": 0.0557,
"step": 5480
},
{
"epoch": 5.14526710402999,
"grad_norm": 1.7454636096954346,
"learning_rate": 9.959803189386286e-06,
"loss": 0.0647,
"step": 5490
},
{
"epoch": 5.154639175257732,
"grad_norm": 0.9625281691551208,
"learning_rate": 9.959729971152017e-06,
"loss": 0.0499,
"step": 5500
},
{
"epoch": 5.154639175257732,
"eval_loss": 0.03924967721104622,
"eval_pearson_cosine": 0.7608553767204285,
"eval_pearson_dot": 0.6993385553359985,
"eval_pearson_euclidean": 0.732108473777771,
"eval_pearson_manhattan": 0.7334935069084167,
"eval_runtime": 28.2448,
"eval_samples_per_second": 53.107,
"eval_spearman_cosine": 0.7615678141531256,
"eval_spearman_dot": 0.6999177956469285,
"eval_spearman_euclidean": 0.7378738640113753,
"eval_spearman_manhattan": 0.7392624046122273,
"eval_steps_per_second": 6.656,
"step": 5500
},
{
"epoch": 5.164011246485473,
"grad_norm": 1.4280071258544922,
"learning_rate": 9.959656752917748e-06,
"loss": 0.0557,
"step": 5510
},
{
"epoch": 5.173383317713214,
"grad_norm": 1.6271259784698486,
"learning_rate": 9.959583534683479e-06,
"loss": 0.0602,
"step": 5520
},
{
"epoch": 5.182755388940956,
"grad_norm": 1.2609021663665771,
"learning_rate": 9.95951031644921e-06,
"loss": 0.0545,
"step": 5530
},
{
"epoch": 5.192127460168697,
"grad_norm": 1.2945165634155273,
"learning_rate": 9.95943709821494e-06,
"loss": 0.0592,
"step": 5540
},
{
"epoch": 5.201499531396439,
"grad_norm": 1.3600184917449951,
"learning_rate": 9.959363879980671e-06,
"loss": 0.0492,
"step": 5550
},
{
"epoch": 5.21087160262418,
"grad_norm": 1.3210471868515015,
"learning_rate": 9.959290661746403e-06,
"loss": 0.0558,
"step": 5560
},
{
"epoch": 5.220243673851921,
"grad_norm": 0.8935280442237854,
"learning_rate": 9.959217443512134e-06,
"loss": 0.0566,
"step": 5570
},
{
"epoch": 5.229615745079663,
"grad_norm": 0.9014615416526794,
"learning_rate": 9.959144225277863e-06,
"loss": 0.0578,
"step": 5580
},
{
"epoch": 5.238987816307404,
"grad_norm": 0.9144461750984192,
"learning_rate": 9.959071007043596e-06,
"loss": 0.0642,
"step": 5590
},
{
"epoch": 5.248359887535146,
"grad_norm": 1.1306620836257935,
"learning_rate": 9.958997788809326e-06,
"loss": 0.0645,
"step": 5600
},
{
"epoch": 5.257731958762887,
"grad_norm": 1.6353179216384888,
"learning_rate": 9.958924570575057e-06,
"loss": 0.0563,
"step": 5610
},
{
"epoch": 5.2671040299906275,
"grad_norm": 1.0438508987426758,
"learning_rate": 9.958851352340788e-06,
"loss": 0.0554,
"step": 5620
},
{
"epoch": 5.276476101218369,
"grad_norm": 1.0287367105484009,
"learning_rate": 9.958778134106519e-06,
"loss": 0.0586,
"step": 5630
},
{
"epoch": 5.28584817244611,
"grad_norm": 1.0613245964050293,
"learning_rate": 9.95870491587225e-06,
"loss": 0.0634,
"step": 5640
},
{
"epoch": 5.295220243673852,
"grad_norm": 1.489405632019043,
"learning_rate": 9.95863169763798e-06,
"loss": 0.0474,
"step": 5650
},
{
"epoch": 5.304592314901593,
"grad_norm": 1.4497292041778564,
"learning_rate": 9.95855847940371e-06,
"loss": 0.056,
"step": 5660
},
{
"epoch": 5.313964386129334,
"grad_norm": 1.2881600856781006,
"learning_rate": 9.958485261169443e-06,
"loss": 0.0561,
"step": 5670
},
{
"epoch": 5.323336457357076,
"grad_norm": 1.4863743782043457,
"learning_rate": 9.958412042935174e-06,
"loss": 0.0562,
"step": 5680
},
{
"epoch": 5.332708528584817,
"grad_norm": 1.325191855430603,
"learning_rate": 9.958338824700903e-06,
"loss": 0.0569,
"step": 5690
},
{
"epoch": 5.342080599812559,
"grad_norm": 1.0650861263275146,
"learning_rate": 9.958265606466636e-06,
"loss": 0.0574,
"step": 5700
},
{
"epoch": 5.3514526710403,
"grad_norm": 1.7255184650421143,
"learning_rate": 9.958192388232366e-06,
"loss": 0.055,
"step": 5710
},
{
"epoch": 5.360824742268041,
"grad_norm": 0.8258642554283142,
"learning_rate": 9.958119169998097e-06,
"loss": 0.0509,
"step": 5720
},
{
"epoch": 5.370196813495783,
"grad_norm": 1.2811216115951538,
"learning_rate": 9.958045951763828e-06,
"loss": 0.0585,
"step": 5730
},
{
"epoch": 5.379568884723524,
"grad_norm": 1.2582824230194092,
"learning_rate": 9.95797273352956e-06,
"loss": 0.0589,
"step": 5740
},
{
"epoch": 5.3889409559512655,
"grad_norm": 1.3511929512023926,
"learning_rate": 9.95789951529529e-06,
"loss": 0.0542,
"step": 5750
},
{
"epoch": 5.3889409559512655,
"eval_loss": 0.03850702941417694,
"eval_pearson_cosine": 0.7663590312004089,
"eval_pearson_dot": 0.7060524225234985,
"eval_pearson_euclidean": 0.7385671734809875,
"eval_pearson_manhattan": 0.7399072647094727,
"eval_runtime": 27.6896,
"eval_samples_per_second": 54.172,
"eval_spearman_cosine": 0.7668814587849042,
"eval_spearman_dot": 0.706466499232552,
"eval_spearman_euclidean": 0.744533534662993,
"eval_spearman_manhattan": 0.7454034343244123,
"eval_steps_per_second": 6.79,
"step": 5750
},
{
"epoch": 5.3983130271790065,
"grad_norm": 1.3905717134475708,
"learning_rate": 9.95782629706102e-06,
"loss": 0.0583,
"step": 5760
},
{
"epoch": 5.4076850984067475,
"grad_norm": 1.5047788619995117,
"learning_rate": 9.957753078826752e-06,
"loss": 0.0605,
"step": 5770
},
{
"epoch": 5.417057169634489,
"grad_norm": 1.280427098274231,
"learning_rate": 9.957679860592483e-06,
"loss": 0.0584,
"step": 5780
},
{
"epoch": 5.42642924086223,
"grad_norm": 1.3530281782150269,
"learning_rate": 9.957606642358214e-06,
"loss": 0.0591,
"step": 5790
},
{
"epoch": 5.435801312089972,
"grad_norm": 1.0610909461975098,
"learning_rate": 9.957533424123945e-06,
"loss": 0.0546,
"step": 5800
},
{
"epoch": 5.445173383317713,
"grad_norm": 0.9637224674224854,
"learning_rate": 9.957460205889675e-06,
"loss": 0.0641,
"step": 5810
},
{
"epoch": 5.454545454545454,
"grad_norm": 1.3324577808380127,
"learning_rate": 9.957386987655406e-06,
"loss": 0.0599,
"step": 5820
},
{
"epoch": 5.463917525773196,
"grad_norm": 0.9660161137580872,
"learning_rate": 9.957313769421137e-06,
"loss": 0.0591,
"step": 5830
},
{
"epoch": 5.473289597000937,
"grad_norm": 1.128570556640625,
"learning_rate": 9.95724055118687e-06,
"loss": 0.0579,
"step": 5840
},
{
"epoch": 5.482661668228679,
"grad_norm": 1.444172739982605,
"learning_rate": 9.9571673329526e-06,
"loss": 0.0636,
"step": 5850
},
{
"epoch": 5.49203373945642,
"grad_norm": 1.3510165214538574,
"learning_rate": 9.95709411471833e-06,
"loss": 0.0631,
"step": 5860
},
{
"epoch": 5.501405810684162,
"grad_norm": 1.0439740419387817,
"learning_rate": 9.957020896484062e-06,
"loss": 0.0635,
"step": 5870
},
{
"epoch": 5.510777881911903,
"grad_norm": 1.15412175655365,
"learning_rate": 9.956947678249792e-06,
"loss": 0.0595,
"step": 5880
},
{
"epoch": 5.520149953139644,
"grad_norm": 1.221147894859314,
"learning_rate": 9.956874460015523e-06,
"loss": 0.0552,
"step": 5890
},
{
"epoch": 5.5295220243673855,
"grad_norm": 1.4210234880447388,
"learning_rate": 9.956801241781254e-06,
"loss": 0.0593,
"step": 5900
},
{
"epoch": 5.5388940955951265,
"grad_norm": 1.1082103252410889,
"learning_rate": 9.956728023546985e-06,
"loss": 0.0535,
"step": 5910
},
{
"epoch": 5.548266166822868,
"grad_norm": 0.8931286334991455,
"learning_rate": 9.956654805312715e-06,
"loss": 0.0556,
"step": 5920
},
{
"epoch": 5.557638238050609,
"grad_norm": 1.5182912349700928,
"learning_rate": 9.956581587078446e-06,
"loss": 0.0583,
"step": 5930
},
{
"epoch": 5.56701030927835,
"grad_norm": 1.2056432962417603,
"learning_rate": 9.956508368844177e-06,
"loss": 0.064,
"step": 5940
},
{
"epoch": 5.576382380506092,
"grad_norm": 1.5039522647857666,
"learning_rate": 9.95643515060991e-06,
"loss": 0.0708,
"step": 5950
},
{
"epoch": 5.585754451733833,
"grad_norm": 1.2651883363723755,
"learning_rate": 9.95636193237564e-06,
"loss": 0.0596,
"step": 5960
},
{
"epoch": 5.595126522961575,
"grad_norm": 1.317690134048462,
"learning_rate": 9.956288714141371e-06,
"loss": 0.0713,
"step": 5970
},
{
"epoch": 5.604498594189316,
"grad_norm": 0.9705867767333984,
"learning_rate": 9.956215495907102e-06,
"loss": 0.0699,
"step": 5980
},
{
"epoch": 5.613870665417057,
"grad_norm": 1.4250271320343018,
"learning_rate": 9.956142277672832e-06,
"loss": 0.0595,
"step": 5990
},
{
"epoch": 5.623242736644799,
"grad_norm": 1.0857118368148804,
"learning_rate": 9.956069059438563e-06,
"loss": 0.0555,
"step": 6000
},
{
"epoch": 5.623242736644799,
"eval_loss": 0.03963544964790344,
"eval_pearson_cosine": 0.7571043968200684,
"eval_pearson_dot": 0.700376570224762,
"eval_pearson_euclidean": 0.7279260158538818,
"eval_pearson_manhattan": 0.729307234287262,
"eval_runtime": 25.5449,
"eval_samples_per_second": 58.72,
"eval_spearman_cosine": 0.7579022153365402,
"eval_spearman_dot": 0.6992710065203335,
"eval_spearman_euclidean": 0.7330627821557505,
"eval_spearman_manhattan": 0.7343750357819732,
"eval_steps_per_second": 7.36,
"step": 6000
},
{
"epoch": 5.63261480787254,
"grad_norm": 1.2122074365615845,
"learning_rate": 9.955995841204294e-06,
"loss": 0.0665,
"step": 6010
},
{
"epoch": 5.641986879100282,
"grad_norm": 1.7832310199737549,
"learning_rate": 9.955922622970026e-06,
"loss": 0.063,
"step": 6020
},
{
"epoch": 5.651358950328023,
"grad_norm": 1.1854170560836792,
"learning_rate": 9.955849404735755e-06,
"loss": 0.0573,
"step": 6030
},
{
"epoch": 5.660731021555764,
"grad_norm": 1.6633968353271484,
"learning_rate": 9.955776186501486e-06,
"loss": 0.0549,
"step": 6040
},
{
"epoch": 5.670103092783505,
"grad_norm": 1.31834077835083,
"learning_rate": 9.955702968267219e-06,
"loss": 0.0478,
"step": 6050
},
{
"epoch": 5.679475164011246,
"grad_norm": 0.8284873962402344,
"learning_rate": 9.95562975003295e-06,
"loss": 0.0639,
"step": 6060
},
{
"epoch": 5.688847235238988,
"grad_norm": 1.2393404245376587,
"learning_rate": 9.95555653179868e-06,
"loss": 0.0593,
"step": 6070
},
{
"epoch": 5.698219306466729,
"grad_norm": 1.5327643156051636,
"learning_rate": 9.95548331356441e-06,
"loss": 0.0644,
"step": 6080
},
{
"epoch": 5.70759137769447,
"grad_norm": 1.8985389471054077,
"learning_rate": 9.955410095330142e-06,
"loss": 0.0646,
"step": 6090
},
{
"epoch": 5.716963448922212,
"grad_norm": 1.5896059274673462,
"learning_rate": 9.955336877095872e-06,
"loss": 0.0716,
"step": 6100
},
{
"epoch": 5.726335520149953,
"grad_norm": 1.21624755859375,
"learning_rate": 9.955263658861603e-06,
"loss": 0.0559,
"step": 6110
},
{
"epoch": 5.735707591377695,
"grad_norm": 1.3084664344787598,
"learning_rate": 9.955190440627336e-06,
"loss": 0.065,
"step": 6120
},
{
"epoch": 5.745079662605436,
"grad_norm": 0.9755469560623169,
"learning_rate": 9.955117222393066e-06,
"loss": 0.0601,
"step": 6130
},
{
"epoch": 5.754451733833177,
"grad_norm": 1.1662402153015137,
"learning_rate": 9.955044004158797e-06,
"loss": 0.0588,
"step": 6140
},
{
"epoch": 5.763823805060919,
"grad_norm": 1.313323974609375,
"learning_rate": 9.954970785924528e-06,
"loss": 0.0667,
"step": 6150
},
{
"epoch": 5.77319587628866,
"grad_norm": 1.4725874662399292,
"learning_rate": 9.954897567690259e-06,
"loss": 0.0619,
"step": 6160
},
{
"epoch": 5.782567947516402,
"grad_norm": 1.3176454305648804,
"learning_rate": 9.95482434945599e-06,
"loss": 0.056,
"step": 6170
},
{
"epoch": 5.7919400187441425,
"grad_norm": 1.0566222667694092,
"learning_rate": 9.95475113122172e-06,
"loss": 0.0587,
"step": 6180
},
{
"epoch": 5.8013120899718835,
"grad_norm": 1.0623878240585327,
"learning_rate": 9.95467791298745e-06,
"loss": 0.0591,
"step": 6190
},
{
"epoch": 5.810684161199625,
"grad_norm": 1.6217368841171265,
"learning_rate": 9.954604694753183e-06,
"loss": 0.0536,
"step": 6200
},
{
"epoch": 5.820056232427366,
"grad_norm": 1.2574353218078613,
"learning_rate": 9.954531476518912e-06,
"loss": 0.0552,
"step": 6210
},
{
"epoch": 5.829428303655108,
"grad_norm": 1.2605924606323242,
"learning_rate": 9.954458258284643e-06,
"loss": 0.0669,
"step": 6220
},
{
"epoch": 5.838800374882849,
"grad_norm": 1.8283051252365112,
"learning_rate": 9.954385040050375e-06,
"loss": 0.0631,
"step": 6230
},
{
"epoch": 5.84817244611059,
"grad_norm": 1.2457951307296753,
"learning_rate": 9.954311821816106e-06,
"loss": 0.0578,
"step": 6240
},
{
"epoch": 5.857544517338332,
"grad_norm": 1.1618739366531372,
"learning_rate": 9.954238603581837e-06,
"loss": 0.0547,
"step": 6250
},
{
"epoch": 5.857544517338332,
"eval_loss": 0.03839369863271713,
"eval_pearson_cosine": 0.7663547396659851,
"eval_pearson_dot": 0.7110079526901245,
"eval_pearson_euclidean": 0.7369804978370667,
"eval_pearson_manhattan": 0.738224983215332,
"eval_runtime": 28.702,
"eval_samples_per_second": 52.261,
"eval_spearman_cosine": 0.766680322110213,
"eval_spearman_dot": 0.7118792296635837,
"eval_spearman_euclidean": 0.7420173359570077,
"eval_spearman_manhattan": 0.7431811125331302,
"eval_steps_per_second": 6.55,
"step": 6250
},
{
"epoch": 5.866916588566073,
"grad_norm": 1.565491795539856,
"learning_rate": 9.954165385347568e-06,
"loss": 0.0634,
"step": 6260
},
{
"epoch": 5.876288659793815,
"grad_norm": 1.412607192993164,
"learning_rate": 9.954092167113298e-06,
"loss": 0.0641,
"step": 6270
},
{
"epoch": 5.885660731021556,
"grad_norm": 1.5475645065307617,
"learning_rate": 9.95401894887903e-06,
"loss": 0.058,
"step": 6280
},
{
"epoch": 5.895032802249297,
"grad_norm": 1.6942791938781738,
"learning_rate": 9.95394573064476e-06,
"loss": 0.0668,
"step": 6290
},
{
"epoch": 5.904404873477039,
"grad_norm": 1.286224603652954,
"learning_rate": 9.953872512410492e-06,
"loss": 0.058,
"step": 6300
},
{
"epoch": 5.91377694470478,
"grad_norm": 1.5031893253326416,
"learning_rate": 9.953799294176223e-06,
"loss": 0.062,
"step": 6310
},
{
"epoch": 5.9231490159325215,
"grad_norm": 1.416455864906311,
"learning_rate": 9.953726075941952e-06,
"loss": 0.0596,
"step": 6320
},
{
"epoch": 5.9325210871602625,
"grad_norm": 1.3160662651062012,
"learning_rate": 9.953652857707685e-06,
"loss": 0.062,
"step": 6330
},
{
"epoch": 5.9418931583880035,
"grad_norm": 0.9542105793952942,
"learning_rate": 9.953579639473415e-06,
"loss": 0.0645,
"step": 6340
},
{
"epoch": 5.951265229615745,
"grad_norm": 1.4458489418029785,
"learning_rate": 9.953506421239146e-06,
"loss": 0.0563,
"step": 6350
},
{
"epoch": 5.960637300843486,
"grad_norm": 1.0310072898864746,
"learning_rate": 9.953433203004877e-06,
"loss": 0.0567,
"step": 6360
},
{
"epoch": 5.970009372071228,
"grad_norm": 1.4674971103668213,
"learning_rate": 9.95335998477061e-06,
"loss": 0.0579,
"step": 6370
},
{
"epoch": 5.979381443298969,
"grad_norm": 1.229636311531067,
"learning_rate": 9.953286766536338e-06,
"loss": 0.0589,
"step": 6380
},
{
"epoch": 5.98875351452671,
"grad_norm": 1.4654268026351929,
"learning_rate": 9.95321354830207e-06,
"loss": 0.0519,
"step": 6390
},
{
"epoch": 5.998125585754452,
"grad_norm": 1.276367425918579,
"learning_rate": 9.953140330067802e-06,
"loss": 0.066,
"step": 6400
},
{
"epoch": 6.007497656982193,
"grad_norm": 1.0710258483886719,
"learning_rate": 9.953067111833532e-06,
"loss": 0.0462,
"step": 6410
},
{
"epoch": 6.016869728209935,
"grad_norm": 0.9316133856773376,
"learning_rate": 9.952993893599263e-06,
"loss": 0.044,
"step": 6420
},
{
"epoch": 6.026241799437676,
"grad_norm": 0.8318607211112976,
"learning_rate": 9.952920675364994e-06,
"loss": 0.0399,
"step": 6430
},
{
"epoch": 6.035613870665417,
"grad_norm": 0.9682859182357788,
"learning_rate": 9.952847457130725e-06,
"loss": 0.0371,
"step": 6440
},
{
"epoch": 6.044985941893159,
"grad_norm": 0.8720560669898987,
"learning_rate": 9.952774238896455e-06,
"loss": 0.0453,
"step": 6450
},
{
"epoch": 6.0543580131209,
"grad_norm": 0.7835734486579895,
"learning_rate": 9.952701020662186e-06,
"loss": 0.0475,
"step": 6460
},
{
"epoch": 6.0637300843486415,
"grad_norm": 1.4373115301132202,
"learning_rate": 9.952627802427917e-06,
"loss": 0.0416,
"step": 6470
},
{
"epoch": 6.073102155576382,
"grad_norm": 1.317517638206482,
"learning_rate": 9.95255458419365e-06,
"loss": 0.0425,
"step": 6480
},
{
"epoch": 6.082474226804123,
"grad_norm": 1.1831910610198975,
"learning_rate": 9.952481365959378e-06,
"loss": 0.0471,
"step": 6490
},
{
"epoch": 6.091846298031865,
"grad_norm": 1.0449994802474976,
"learning_rate": 9.95240814772511e-06,
"loss": 0.0476,
"step": 6500
},
{
"epoch": 6.091846298031865,
"eval_loss": 0.03876839950680733,
"eval_pearson_cosine": 0.7637665867805481,
"eval_pearson_dot": 0.7007623910903931,
"eval_pearson_euclidean": 0.7322614192962646,
"eval_pearson_manhattan": 0.7338271141052246,
"eval_runtime": 22.3296,
"eval_samples_per_second": 67.175,
"eval_spearman_cosine": 0.7641548541194557,
"eval_spearman_dot": 0.7012776165056044,
"eval_spearman_euclidean": 0.7377602855270703,
"eval_spearman_manhattan": 0.73918298594716,
"eval_steps_per_second": 8.419,
"step": 6500
},
{
"epoch": 6.101218369259606,
"grad_norm": 0.7369022965431213,
"learning_rate": 9.952334929490842e-06,
"loss": 0.0364,
"step": 6510
},
{
"epoch": 6.110590440487348,
"grad_norm": 0.8673484325408936,
"learning_rate": 9.952261711256572e-06,
"loss": 0.0498,
"step": 6520
},
{
"epoch": 6.119962511715089,
"grad_norm": 1.5341424942016602,
"learning_rate": 9.952188493022303e-06,
"loss": 0.045,
"step": 6530
},
{
"epoch": 6.12933458294283,
"grad_norm": 0.8899186253547668,
"learning_rate": 9.952115274788034e-06,
"loss": 0.0441,
"step": 6540
},
{
"epoch": 6.138706654170572,
"grad_norm": 1.0708824396133423,
"learning_rate": 9.952042056553765e-06,
"loss": 0.0458,
"step": 6550
},
{
"epoch": 6.148078725398313,
"grad_norm": 1.1551895141601562,
"learning_rate": 9.951968838319495e-06,
"loss": 0.0421,
"step": 6560
},
{
"epoch": 6.157450796626055,
"grad_norm": 1.0832526683807373,
"learning_rate": 9.951895620085226e-06,
"loss": 0.0462,
"step": 6570
},
{
"epoch": 6.166822867853796,
"grad_norm": 1.303536295890808,
"learning_rate": 9.951822401850959e-06,
"loss": 0.0423,
"step": 6580
},
{
"epoch": 6.176194939081537,
"grad_norm": 1.2826794385910034,
"learning_rate": 9.95174918361669e-06,
"loss": 0.0463,
"step": 6590
},
{
"epoch": 6.185567010309279,
"grad_norm": 1.0724890232086182,
"learning_rate": 9.95167596538242e-06,
"loss": 0.043,
"step": 6600
},
{
"epoch": 6.1949390815370196,
"grad_norm": 0.9407768249511719,
"learning_rate": 9.95160274714815e-06,
"loss": 0.045,
"step": 6610
},
{
"epoch": 6.204311152764761,
"grad_norm": 1.1686878204345703,
"learning_rate": 9.951529528913882e-06,
"loss": 0.0407,
"step": 6620
},
{
"epoch": 6.213683223992502,
"grad_norm": 1.5972820520401,
"learning_rate": 9.951456310679612e-06,
"loss": 0.0449,
"step": 6630
},
{
"epoch": 6.223055295220243,
"grad_norm": 0.7610195875167847,
"learning_rate": 9.951383092445343e-06,
"loss": 0.0397,
"step": 6640
},
{
"epoch": 6.232427366447985,
"grad_norm": 1.02704656124115,
"learning_rate": 9.951309874211075e-06,
"loss": 0.0448,
"step": 6650
},
{
"epoch": 6.241799437675726,
"grad_norm": 0.8035688400268555,
"learning_rate": 9.951236655976805e-06,
"loss": 0.0445,
"step": 6660
},
{
"epoch": 6.251171508903468,
"grad_norm": 1.019539475440979,
"learning_rate": 9.951163437742535e-06,
"loss": 0.0452,
"step": 6670
},
{
"epoch": 6.260543580131209,
"grad_norm": 1.662574291229248,
"learning_rate": 9.951090219508268e-06,
"loss": 0.0517,
"step": 6680
},
{
"epoch": 6.26991565135895,
"grad_norm": 1.1599600315093994,
"learning_rate": 9.951017001273998e-06,
"loss": 0.0493,
"step": 6690
},
{
"epoch": 6.279287722586692,
"grad_norm": 0.7756074070930481,
"learning_rate": 9.95094378303973e-06,
"loss": 0.048,
"step": 6700
},
{
"epoch": 6.288659793814433,
"grad_norm": 1.0959285497665405,
"learning_rate": 9.95087056480546e-06,
"loss": 0.0501,
"step": 6710
},
{
"epoch": 6.298031865042175,
"grad_norm": 1.2311910390853882,
"learning_rate": 9.95079734657119e-06,
"loss": 0.0486,
"step": 6720
},
{
"epoch": 6.307403936269916,
"grad_norm": 1.2149254083633423,
"learning_rate": 9.950724128336921e-06,
"loss": 0.0389,
"step": 6730
},
{
"epoch": 6.316776007497657,
"grad_norm": 1.5355291366577148,
"learning_rate": 9.950650910102652e-06,
"loss": 0.0472,
"step": 6740
},
{
"epoch": 6.3261480787253985,
"grad_norm": 1.1264081001281738,
"learning_rate": 9.950577691868385e-06,
"loss": 0.043,
"step": 6750
},
{
"epoch": 6.3261480787253985,
"eval_loss": 0.03764544054865837,
"eval_pearson_cosine": 0.7692497968673706,
"eval_pearson_dot": 0.7138222455978394,
"eval_pearson_euclidean": 0.7343003749847412,
"eval_pearson_manhattan": 0.7356712818145752,
"eval_runtime": 22.6897,
"eval_samples_per_second": 66.109,
"eval_spearman_cosine": 0.7695765922931803,
"eval_spearman_dot": 0.7152262336240688,
"eval_spearman_euclidean": 0.739557951171161,
"eval_spearman_manhattan": 0.7408550126908494,
"eval_steps_per_second": 8.286,
"step": 6750
},
{
"epoch": 6.3355201499531395,
"grad_norm": 0.6277545690536499,
"learning_rate": 9.950504473634115e-06,
"loss": 0.0406,
"step": 6760
},
{
"epoch": 6.344892221180881,
"grad_norm": 1.3999137878417969,
"learning_rate": 9.950431255399846e-06,
"loss": 0.0447,
"step": 6770
},
{
"epoch": 6.354264292408622,
"grad_norm": 0.7465086579322815,
"learning_rate": 9.950358037165577e-06,
"loss": 0.0502,
"step": 6780
},
{
"epoch": 6.363636363636363,
"grad_norm": 1.1154383420944214,
"learning_rate": 9.950284818931308e-06,
"loss": 0.05,
"step": 6790
},
{
"epoch": 6.373008434864105,
"grad_norm": 1.1133472919464111,
"learning_rate": 9.950211600697038e-06,
"loss": 0.0473,
"step": 6800
},
{
"epoch": 6.382380506091846,
"grad_norm": 1.0995352268218994,
"learning_rate": 9.95013838246277e-06,
"loss": 0.0414,
"step": 6810
},
{
"epoch": 6.391752577319588,
"grad_norm": 0.9666862487792969,
"learning_rate": 9.9500651642285e-06,
"loss": 0.049,
"step": 6820
},
{
"epoch": 6.401124648547329,
"grad_norm": 1.1517918109893799,
"learning_rate": 9.94999194599423e-06,
"loss": 0.0413,
"step": 6830
},
{
"epoch": 6.41049671977507,
"grad_norm": 0.5381759405136108,
"learning_rate": 9.949918727759961e-06,
"loss": 0.0418,
"step": 6840
},
{
"epoch": 6.419868791002812,
"grad_norm": 0.973006546497345,
"learning_rate": 9.949845509525692e-06,
"loss": 0.0495,
"step": 6850
},
{
"epoch": 6.429240862230553,
"grad_norm": 1.126633882522583,
"learning_rate": 9.949772291291425e-06,
"loss": 0.0493,
"step": 6860
},
{
"epoch": 6.438612933458295,
"grad_norm": 0.7894268035888672,
"learning_rate": 9.949699073057155e-06,
"loss": 0.0436,
"step": 6870
},
{
"epoch": 6.447985004686036,
"grad_norm": 0.7125422358512878,
"learning_rate": 9.949625854822886e-06,
"loss": 0.0433,
"step": 6880
},
{
"epoch": 6.457357075913777,
"grad_norm": 0.9013342261314392,
"learning_rate": 9.949552636588617e-06,
"loss": 0.0376,
"step": 6890
},
{
"epoch": 6.4667291471415185,
"grad_norm": 1.132384181022644,
"learning_rate": 9.949479418354348e-06,
"loss": 0.0482,
"step": 6900
},
{
"epoch": 6.4761012183692594,
"grad_norm": 1.0104179382324219,
"learning_rate": 9.949406200120078e-06,
"loss": 0.0485,
"step": 6910
},
{
"epoch": 6.485473289597001,
"grad_norm": 1.233464241027832,
"learning_rate": 9.949332981885809e-06,
"loss": 0.0478,
"step": 6920
},
{
"epoch": 6.494845360824742,
"grad_norm": 0.7077954411506653,
"learning_rate": 9.949259763651542e-06,
"loss": 0.0464,
"step": 6930
},
{
"epoch": 6.504217432052483,
"grad_norm": 1.5273882150650024,
"learning_rate": 9.949186545417272e-06,
"loss": 0.0404,
"step": 6940
},
{
"epoch": 6.513589503280225,
"grad_norm": 1.2204720973968506,
"learning_rate": 9.949113327183001e-06,
"loss": 0.0375,
"step": 6950
},
{
"epoch": 6.522961574507966,
"grad_norm": 0.9539759755134583,
"learning_rate": 9.949040108948734e-06,
"loss": 0.0397,
"step": 6960
},
{
"epoch": 6.532333645735708,
"grad_norm": 1.949201226234436,
"learning_rate": 9.948966890714465e-06,
"loss": 0.0476,
"step": 6970
},
{
"epoch": 6.541705716963449,
"grad_norm": 1.046915888786316,
"learning_rate": 9.948893672480195e-06,
"loss": 0.0445,
"step": 6980
},
{
"epoch": 6.55107778819119,
"grad_norm": 0.8392923474311829,
"learning_rate": 9.948820454245926e-06,
"loss": 0.0502,
"step": 6990
},
{
"epoch": 6.560449859418932,
"grad_norm": 1.357014536857605,
"learning_rate": 9.948747236011659e-06,
"loss": 0.0436,
"step": 7000
},
{
"epoch": 6.560449859418932,
"eval_loss": 0.03813355416059494,
"eval_pearson_cosine": 0.7662351131439209,
"eval_pearson_dot": 0.7104849219322205,
"eval_pearson_euclidean": 0.7334129810333252,
"eval_pearson_manhattan": 0.7350986003875732,
"eval_runtime": 22.7512,
"eval_samples_per_second": 65.931,
"eval_spearman_cosine": 0.7662226343415417,
"eval_spearman_dot": 0.7115825441503862,
"eval_spearman_euclidean": 0.7384103552275764,
"eval_spearman_manhattan": 0.7397995971405482,
"eval_steps_per_second": 8.263,
"step": 7000
},
{
"epoch": 6.569821930646673,
"grad_norm": 1.1269482374191284,
"learning_rate": 9.948674017777388e-06,
"loss": 0.0395,
"step": 7010
},
{
"epoch": 6.579194001874415,
"grad_norm": 0.8978859782218933,
"learning_rate": 9.948600799543118e-06,
"loss": 0.0438,
"step": 7020
},
{
"epoch": 6.588566073102156,
"grad_norm": 1.3999450206756592,
"learning_rate": 9.94852758130885e-06,
"loss": 0.0466,
"step": 7030
},
{
"epoch": 6.597938144329897,
"grad_norm": 0.985998272895813,
"learning_rate": 9.948454363074582e-06,
"loss": 0.0474,
"step": 7040
},
{
"epoch": 6.607310215557638,
"grad_norm": 0.7843828797340393,
"learning_rate": 9.948381144840312e-06,
"loss": 0.0417,
"step": 7050
},
{
"epoch": 6.616682286785379,
"grad_norm": 1.64656400680542,
"learning_rate": 9.948307926606043e-06,
"loss": 0.045,
"step": 7060
},
{
"epoch": 6.626054358013121,
"grad_norm": 0.6348075866699219,
"learning_rate": 9.948234708371774e-06,
"loss": 0.0501,
"step": 7070
},
{
"epoch": 6.635426429240862,
"grad_norm": 1.8781590461730957,
"learning_rate": 9.948161490137505e-06,
"loss": 0.0445,
"step": 7080
},
{
"epoch": 6.644798500468603,
"grad_norm": 1.0441402196884155,
"learning_rate": 9.948088271903235e-06,
"loss": 0.0457,
"step": 7090
},
{
"epoch": 6.654170571696345,
"grad_norm": 1.2460689544677734,
"learning_rate": 9.948015053668966e-06,
"loss": 0.0471,
"step": 7100
},
{
"epoch": 6.663542642924086,
"grad_norm": 0.993414580821991,
"learning_rate": 9.947941835434698e-06,
"loss": 0.0423,
"step": 7110
},
{
"epoch": 6.672914714151828,
"grad_norm": 1.2848552465438843,
"learning_rate": 9.947868617200428e-06,
"loss": 0.0414,
"step": 7120
},
{
"epoch": 6.682286785379569,
"grad_norm": 1.2903103828430176,
"learning_rate": 9.947795398966158e-06,
"loss": 0.0402,
"step": 7130
},
{
"epoch": 6.69165885660731,
"grad_norm": 1.2319235801696777,
"learning_rate": 9.94772218073189e-06,
"loss": 0.0504,
"step": 7140
},
{
"epoch": 6.701030927835052,
"grad_norm": 0.8465273976325989,
"learning_rate": 9.947648962497621e-06,
"loss": 0.0409,
"step": 7150
},
{
"epoch": 6.710402999062793,
"grad_norm": 1.186928153038025,
"learning_rate": 9.947575744263352e-06,
"loss": 0.0458,
"step": 7160
},
{
"epoch": 6.719775070290535,
"grad_norm": 1.3528752326965332,
"learning_rate": 9.947502526029083e-06,
"loss": 0.0433,
"step": 7170
},
{
"epoch": 6.7291471415182755,
"grad_norm": 0.8908892273902893,
"learning_rate": 9.947429307794814e-06,
"loss": 0.0456,
"step": 7180
},
{
"epoch": 6.7385192127460165,
"grad_norm": 1.1235069036483765,
"learning_rate": 9.947356089560544e-06,
"loss": 0.0481,
"step": 7190
},
{
"epoch": 6.747891283973758,
"grad_norm": 1.6809895038604736,
"learning_rate": 9.947282871326275e-06,
"loss": 0.0454,
"step": 7200
},
{
"epoch": 6.757263355201499,
"grad_norm": 0.8632039427757263,
"learning_rate": 9.947209653092008e-06,
"loss": 0.0481,
"step": 7210
},
{
"epoch": 6.766635426429241,
"grad_norm": 1.2185996770858765,
"learning_rate": 9.947136434857738e-06,
"loss": 0.0383,
"step": 7220
},
{
"epoch": 6.776007497656982,
"grad_norm": 0.6979696154594421,
"learning_rate": 9.947063216623467e-06,
"loss": 0.0435,
"step": 7230
},
{
"epoch": 6.785379568884723,
"grad_norm": 1.459441065788269,
"learning_rate": 9.9469899983892e-06,
"loss": 0.0449,
"step": 7240
},
{
"epoch": 6.794751640112465,
"grad_norm": 1.0957977771759033,
"learning_rate": 9.94691678015493e-06,
"loss": 0.032,
"step": 7250
},
{
"epoch": 6.794751640112465,
"eval_loss": 0.03765299916267395,
"eval_pearson_cosine": 0.7692482471466064,
"eval_pearson_dot": 0.722366452217102,
"eval_pearson_euclidean": 0.7316011190414429,
"eval_pearson_manhattan": 0.7333144545555115,
"eval_runtime": 22.5438,
"eval_samples_per_second": 66.537,
"eval_spearman_cosine": 0.7695046405395065,
"eval_spearman_dot": 0.7242050912795406,
"eval_spearman_euclidean": 0.7356828429817377,
"eval_spearman_manhattan": 0.737487116385034,
"eval_steps_per_second": 8.339,
"step": 7250
},
{
"epoch": 6.804123711340206,
"grad_norm": 1.377066731452942,
"learning_rate": 9.946843561920661e-06,
"loss": 0.0529,
"step": 7260
},
{
"epoch": 6.813495782567948,
"grad_norm": 0.714728057384491,
"learning_rate": 9.946770343686392e-06,
"loss": 0.0432,
"step": 7270
},
{
"epoch": 6.822867853795689,
"grad_norm": 1.4324384927749634,
"learning_rate": 9.946697125452125e-06,
"loss": 0.046,
"step": 7280
},
{
"epoch": 6.83223992502343,
"grad_norm": 1.2564704418182373,
"learning_rate": 9.946623907217854e-06,
"loss": 0.046,
"step": 7290
},
{
"epoch": 6.841611996251172,
"grad_norm": 0.8522197008132935,
"learning_rate": 9.946550688983584e-06,
"loss": 0.0393,
"step": 7300
},
{
"epoch": 6.850984067478913,
"grad_norm": 0.8751912117004395,
"learning_rate": 9.946477470749317e-06,
"loss": 0.0426,
"step": 7310
},
{
"epoch": 6.8603561387066545,
"grad_norm": 0.8960391879081726,
"learning_rate": 9.946404252515048e-06,
"loss": 0.0445,
"step": 7320
},
{
"epoch": 6.8697282099343955,
"grad_norm": 1.092128872871399,
"learning_rate": 9.946331034280778e-06,
"loss": 0.0459,
"step": 7330
},
{
"epoch": 6.8791002811621365,
"grad_norm": 1.1840777397155762,
"learning_rate": 9.946257816046509e-06,
"loss": 0.0387,
"step": 7340
},
{
"epoch": 6.888472352389878,
"grad_norm": 1.0283764600753784,
"learning_rate": 9.94618459781224e-06,
"loss": 0.0577,
"step": 7350
},
{
"epoch": 6.897844423617619,
"grad_norm": 0.749761164188385,
"learning_rate": 9.94611137957797e-06,
"loss": 0.0414,
"step": 7360
},
{
"epoch": 6.907216494845361,
"grad_norm": 0.8442000150680542,
"learning_rate": 9.946038161343701e-06,
"loss": 0.046,
"step": 7370
},
{
"epoch": 6.916588566073102,
"grad_norm": 1.2296583652496338,
"learning_rate": 9.945964943109432e-06,
"loss": 0.0412,
"step": 7380
},
{
"epoch": 6.925960637300843,
"grad_norm": 0.6515626311302185,
"learning_rate": 9.945891724875165e-06,
"loss": 0.0481,
"step": 7390
},
{
"epoch": 6.935332708528585,
"grad_norm": 1.8992091417312622,
"learning_rate": 9.945818506640895e-06,
"loss": 0.0431,
"step": 7400
},
{
"epoch": 6.944704779756326,
"grad_norm": 1.1663875579833984,
"learning_rate": 9.945745288406624e-06,
"loss": 0.0459,
"step": 7410
},
{
"epoch": 6.954076850984068,
"grad_norm": 0.6695976853370667,
"learning_rate": 9.945672070172357e-06,
"loss": 0.0448,
"step": 7420
},
{
"epoch": 6.963448922211809,
"grad_norm": 1.158563494682312,
"learning_rate": 9.945598851938088e-06,
"loss": 0.0398,
"step": 7430
},
{
"epoch": 6.97282099343955,
"grad_norm": 1.2068713903427124,
"learning_rate": 9.945525633703818e-06,
"loss": 0.0443,
"step": 7440
},
{
"epoch": 6.982193064667292,
"grad_norm": 0.9688456654548645,
"learning_rate": 9.945452415469549e-06,
"loss": 0.0452,
"step": 7450
},
{
"epoch": 6.991565135895033,
"grad_norm": 1.5483156442642212,
"learning_rate": 9.94537919723528e-06,
"loss": 0.0498,
"step": 7460
},
{
"epoch": 7.0009372071227745,
"grad_norm": 1.18287193775177,
"learning_rate": 9.94530597900101e-06,
"loss": 0.0445,
"step": 7470
},
{
"epoch": 7.010309278350515,
"grad_norm": 0.7765620946884155,
"learning_rate": 9.945232760766741e-06,
"loss": 0.0346,
"step": 7480
},
{
"epoch": 7.019681349578256,
"grad_norm": 0.948760986328125,
"learning_rate": 9.945159542532474e-06,
"loss": 0.0348,
"step": 7490
},
{
"epoch": 7.029053420805998,
"grad_norm": 0.9965664744377136,
"learning_rate": 9.945086324298205e-06,
"loss": 0.0342,
"step": 7500
},
{
"epoch": 7.029053420805998,
"eval_loss": 0.03782695531845093,
"eval_pearson_cosine": 0.768491804599762,
"eval_pearson_dot": 0.7183945775032043,
"eval_pearson_euclidean": 0.7320147752761841,
"eval_pearson_manhattan": 0.7333334684371948,
"eval_runtime": 21.6515,
"eval_samples_per_second": 69.279,
"eval_spearman_cosine": 0.7677979499645443,
"eval_spearman_dot": 0.7186610110098233,
"eval_spearman_euclidean": 0.7364530110375347,
"eval_spearman_manhattan": 0.737620665225201,
"eval_steps_per_second": 8.683,
"step": 7500
},
{
"epoch": 7.038425492033739,
"grad_norm": 0.8594346046447754,
"learning_rate": 9.945013106063935e-06,
"loss": 0.0318,
"step": 7510
},
{
"epoch": 7.047797563261481,
"grad_norm": 1.62812340259552,
"learning_rate": 9.944939887829666e-06,
"loss": 0.0414,
"step": 7520
},
{
"epoch": 7.057169634489222,
"grad_norm": 1.1017098426818848,
"learning_rate": 9.944866669595397e-06,
"loss": 0.0327,
"step": 7530
},
{
"epoch": 7.066541705716963,
"grad_norm": 0.8536505699157715,
"learning_rate": 9.944793451361128e-06,
"loss": 0.0286,
"step": 7540
},
{
"epoch": 7.075913776944705,
"grad_norm": 1.0389901399612427,
"learning_rate": 9.944720233126858e-06,
"loss": 0.0365,
"step": 7550
},
{
"epoch": 7.085285848172446,
"grad_norm": 1.0682491064071655,
"learning_rate": 9.94464701489259e-06,
"loss": 0.034,
"step": 7560
},
{
"epoch": 7.094657919400188,
"grad_norm": 0.8786489963531494,
"learning_rate": 9.944573796658321e-06,
"loss": 0.0373,
"step": 7570
},
{
"epoch": 7.104029990627929,
"grad_norm": 1.3642008304595947,
"learning_rate": 9.94450057842405e-06,
"loss": 0.0314,
"step": 7580
},
{
"epoch": 7.11340206185567,
"grad_norm": 0.7243325114250183,
"learning_rate": 9.944427360189783e-06,
"loss": 0.0299,
"step": 7590
},
{
"epoch": 7.122774133083412,
"grad_norm": 0.6696385145187378,
"learning_rate": 9.944354141955514e-06,
"loss": 0.0311,
"step": 7600
},
{
"epoch": 7.1321462043111525,
"grad_norm": 1.03152334690094,
"learning_rate": 9.944280923721244e-06,
"loss": 0.0355,
"step": 7610
},
{
"epoch": 7.141518275538894,
"grad_norm": 0.8586616516113281,
"learning_rate": 9.944207705486975e-06,
"loss": 0.0394,
"step": 7620
},
{
"epoch": 7.150890346766635,
"grad_norm": 0.9514285922050476,
"learning_rate": 9.944134487252706e-06,
"loss": 0.035,
"step": 7630
},
{
"epoch": 7.160262417994376,
"grad_norm": 0.8053460717201233,
"learning_rate": 9.944061269018437e-06,
"loss": 0.0312,
"step": 7640
},
{
"epoch": 7.169634489222118,
"grad_norm": 1.0056674480438232,
"learning_rate": 9.943988050784167e-06,
"loss": 0.0371,
"step": 7650
},
{
"epoch": 7.179006560449859,
"grad_norm": 0.7738359570503235,
"learning_rate": 9.943914832549898e-06,
"loss": 0.0302,
"step": 7660
},
{
"epoch": 7.188378631677601,
"grad_norm": 1.039197325706482,
"learning_rate": 9.94384161431563e-06,
"loss": 0.0316,
"step": 7670
},
{
"epoch": 7.197750702905342,
"grad_norm": 1.578165888786316,
"learning_rate": 9.943768396081361e-06,
"loss": 0.0388,
"step": 7680
},
{
"epoch": 7.207122774133083,
"grad_norm": 1.1753205060958862,
"learning_rate": 9.943695177847092e-06,
"loss": 0.0387,
"step": 7690
},
{
"epoch": 7.216494845360825,
"grad_norm": 1.295299768447876,
"learning_rate": 9.943621959612823e-06,
"loss": 0.0417,
"step": 7700
},
{
"epoch": 7.225866916588566,
"grad_norm": 0.9477363228797913,
"learning_rate": 9.943548741378554e-06,
"loss": 0.0305,
"step": 7710
},
{
"epoch": 7.235238987816308,
"grad_norm": 1.0547223091125488,
"learning_rate": 9.943475523144284e-06,
"loss": 0.0314,
"step": 7720
},
{
"epoch": 7.244611059044049,
"grad_norm": 1.4873117208480835,
"learning_rate": 9.943402304910015e-06,
"loss": 0.0302,
"step": 7730
},
{
"epoch": 7.25398313027179,
"grad_norm": 0.9882778525352478,
"learning_rate": 9.943329086675748e-06,
"loss": 0.0328,
"step": 7740
},
{
"epoch": 7.2633552014995315,
"grad_norm": 1.3187719583511353,
"learning_rate": 9.943255868441477e-06,
"loss": 0.0341,
"step": 7750
},
{
"epoch": 7.2633552014995315,
"eval_loss": 0.03773624449968338,
"eval_pearson_cosine": 0.7699387073516846,
"eval_pearson_dot": 0.7237234115600586,
"eval_pearson_euclidean": 0.7316513061523438,
"eval_pearson_manhattan": 0.7335678339004517,
"eval_runtime": 22.1612,
"eval_samples_per_second": 67.686,
"eval_spearman_cosine": 0.7694615753118931,
"eval_spearman_dot": 0.7243788947148158,
"eval_spearman_euclidean": 0.7361849268567764,
"eval_spearman_manhattan": 0.7377945356892571,
"eval_steps_per_second": 8.483,
"step": 7750
},
{
"epoch": 7.2727272727272725,
"grad_norm": 1.0984870195388794,
"learning_rate": 9.943182650207207e-06,
"loss": 0.0329,
"step": 7760
},
{
"epoch": 7.282099343955014,
"grad_norm": 0.7666100263595581,
"learning_rate": 9.94310943197294e-06,
"loss": 0.0358,
"step": 7770
},
{
"epoch": 7.291471415182755,
"grad_norm": 0.9941838383674622,
"learning_rate": 9.94303621373867e-06,
"loss": 0.0351,
"step": 7780
},
{
"epoch": 7.300843486410496,
"grad_norm": 1.3012335300445557,
"learning_rate": 9.942962995504401e-06,
"loss": 0.0296,
"step": 7790
},
{
"epoch": 7.310215557638238,
"grad_norm": 1.1914719343185425,
"learning_rate": 9.942889777270132e-06,
"loss": 0.0333,
"step": 7800
},
{
"epoch": 7.319587628865979,
"grad_norm": 1.1405929327011108,
"learning_rate": 9.942816559035863e-06,
"loss": 0.0408,
"step": 7810
},
{
"epoch": 7.328959700093721,
"grad_norm": 0.665600061416626,
"learning_rate": 9.942743340801594e-06,
"loss": 0.0314,
"step": 7820
},
{
"epoch": 7.338331771321462,
"grad_norm": 1.2029966115951538,
"learning_rate": 9.942670122567324e-06,
"loss": 0.041,
"step": 7830
},
{
"epoch": 7.347703842549203,
"grad_norm": 0.44810751080513,
"learning_rate": 9.942596904333057e-06,
"loss": 0.0317,
"step": 7840
},
{
"epoch": 7.357075913776945,
"grad_norm": 1.565082311630249,
"learning_rate": 9.942523686098788e-06,
"loss": 0.035,
"step": 7850
},
{
"epoch": 7.366447985004686,
"grad_norm": 1.6850316524505615,
"learning_rate": 9.942450467864517e-06,
"loss": 0.0365,
"step": 7860
},
{
"epoch": 7.375820056232428,
"grad_norm": 1.0027261972427368,
"learning_rate": 9.942377249630249e-06,
"loss": 0.0309,
"step": 7870
},
{
"epoch": 7.385192127460169,
"grad_norm": 0.51674485206604,
"learning_rate": 9.94230403139598e-06,
"loss": 0.0321,
"step": 7880
},
{
"epoch": 7.39456419868791,
"grad_norm": 1.0429599285125732,
"learning_rate": 9.94223081316171e-06,
"loss": 0.033,
"step": 7890
},
{
"epoch": 7.4039362699156515,
"grad_norm": 0.618232250213623,
"learning_rate": 9.942157594927441e-06,
"loss": 0.0353,
"step": 7900
},
{
"epoch": 7.413308341143392,
"grad_norm": 0.9780518412590027,
"learning_rate": 9.942084376693174e-06,
"loss": 0.0354,
"step": 7910
},
{
"epoch": 7.422680412371134,
"grad_norm": 1.214362621307373,
"learning_rate": 9.942011158458903e-06,
"loss": 0.0338,
"step": 7920
},
{
"epoch": 7.432052483598875,
"grad_norm": 1.202986240386963,
"learning_rate": 9.941937940224634e-06,
"loss": 0.0387,
"step": 7930
},
{
"epoch": 7.441424554826616,
"grad_norm": 1.4128488302230835,
"learning_rate": 9.941864721990366e-06,
"loss": 0.0315,
"step": 7940
},
{
"epoch": 7.450796626054358,
"grad_norm": 0.7198026180267334,
"learning_rate": 9.941791503756097e-06,
"loss": 0.0338,
"step": 7950
},
{
"epoch": 7.460168697282099,
"grad_norm": 1.1124250888824463,
"learning_rate": 9.941718285521828e-06,
"loss": 0.0352,
"step": 7960
},
{
"epoch": 7.469540768509841,
"grad_norm": 1.0420817136764526,
"learning_rate": 9.941645067287558e-06,
"loss": 0.0338,
"step": 7970
},
{
"epoch": 7.478912839737582,
"grad_norm": 0.9638373255729675,
"learning_rate": 9.941571849053289e-06,
"loss": 0.0356,
"step": 7980
},
{
"epoch": 7.488284910965323,
"grad_norm": 0.8584896922111511,
"learning_rate": 9.94149863081902e-06,
"loss": 0.0353,
"step": 7990
},
{
"epoch": 7.497656982193065,
"grad_norm": 0.7161556482315063,
"learning_rate": 9.94142541258475e-06,
"loss": 0.0329,
"step": 8000
},
{
"epoch": 7.497656982193065,
"eval_loss": 0.03753030672669411,
"eval_pearson_cosine": 0.7705868482589722,
"eval_pearson_dot": 0.7248358726501465,
"eval_pearson_euclidean": 0.734631359577179,
"eval_pearson_manhattan": 0.7363988161087036,
"eval_runtime": 22.3628,
"eval_samples_per_second": 67.076,
"eval_spearman_cosine": 0.769708288306187,
"eval_spearman_dot": 0.7249767839130733,
"eval_spearman_euclidean": 0.7394619718544255,
"eval_spearman_manhattan": 0.7409361299302836,
"eval_steps_per_second": 8.407,
"step": 8000
}
],
"logging_steps": 10,
"max_steps": 10670,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}