snowflake-arctic-embed-xs-zyda-2 / trainer_state.json
agentlans's picture
Upload 13 files
af7dace verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 90669,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005514563963427412,
"grad_norm": 4.689566135406494,
"learning_rate": 4.972427180182863e-05,
"loss": 6.5185,
"step": 500
},
{
"epoch": 0.011029127926854823,
"grad_norm": 4.601550102233887,
"learning_rate": 4.944854360365726e-05,
"loss": 5.5109,
"step": 1000
},
{
"epoch": 0.016543691890282236,
"grad_norm": 4.204834461212158,
"learning_rate": 4.917281540548589e-05,
"loss": 5.1513,
"step": 1500
},
{
"epoch": 0.022058255853709647,
"grad_norm": 4.12762451171875,
"learning_rate": 4.889708720731452e-05,
"loss": 4.9528,
"step": 2000
},
{
"epoch": 0.027572819817137058,
"grad_norm": 4.254393100738525,
"learning_rate": 4.862135900914315e-05,
"loss": 4.7961,
"step": 2500
},
{
"epoch": 0.03308738378056447,
"grad_norm": 4.859085559844971,
"learning_rate": 4.834563081097178e-05,
"loss": 4.7004,
"step": 3000
},
{
"epoch": 0.03860194774399188,
"grad_norm": 4.5257368087768555,
"learning_rate": 4.806990261280041e-05,
"loss": 4.5864,
"step": 3500
},
{
"epoch": 0.044116511707419294,
"grad_norm": 4.193604946136475,
"learning_rate": 4.779417441462904e-05,
"loss": 4.5161,
"step": 4000
},
{
"epoch": 0.04963107567084671,
"grad_norm": 4.394799709320068,
"learning_rate": 4.7518446216457665e-05,
"loss": 4.4474,
"step": 4500
},
{
"epoch": 0.055145639634274116,
"grad_norm": 4.351430892944336,
"learning_rate": 4.72427180182863e-05,
"loss": 4.3782,
"step": 5000
},
{
"epoch": 0.06066020359770153,
"grad_norm": 4.718796730041504,
"learning_rate": 4.696698982011493e-05,
"loss": 4.3183,
"step": 5500
},
{
"epoch": 0.06617476756112894,
"grad_norm": 4.186001300811768,
"learning_rate": 4.6691261621943555e-05,
"loss": 4.2597,
"step": 6000
},
{
"epoch": 0.07168933152455635,
"grad_norm": 4.420439720153809,
"learning_rate": 4.641553342377218e-05,
"loss": 4.2321,
"step": 6500
},
{
"epoch": 0.07720389548798376,
"grad_norm": 4.082899570465088,
"learning_rate": 4.613980522560081e-05,
"loss": 4.1731,
"step": 7000
},
{
"epoch": 0.08271845945141118,
"grad_norm": 4.149295806884766,
"learning_rate": 4.5864077027429445e-05,
"loss": 4.1479,
"step": 7500
},
{
"epoch": 0.08823302341483859,
"grad_norm": 4.364389419555664,
"learning_rate": 4.558834882925807e-05,
"loss": 4.1119,
"step": 8000
},
{
"epoch": 0.093747587378266,
"grad_norm": 4.409417629241943,
"learning_rate": 4.53126206310867e-05,
"loss": 4.0806,
"step": 8500
},
{
"epoch": 0.09926215134169342,
"grad_norm": 4.639771938323975,
"learning_rate": 4.5036892432915335e-05,
"loss": 4.0431,
"step": 9000
},
{
"epoch": 0.10477671530512082,
"grad_norm": 4.332629203796387,
"learning_rate": 4.476116423474396e-05,
"loss": 4.0352,
"step": 9500
},
{
"epoch": 0.11029127926854823,
"grad_norm": 4.82522439956665,
"learning_rate": 4.448543603657259e-05,
"loss": 3.9858,
"step": 10000
},
{
"epoch": 0.11580584323197565,
"grad_norm": 4.305941104888916,
"learning_rate": 4.420970783840122e-05,
"loss": 3.9731,
"step": 10500
},
{
"epoch": 0.12132040719540306,
"grad_norm": 4.728514194488525,
"learning_rate": 4.393397964022985e-05,
"loss": 3.9417,
"step": 11000
},
{
"epoch": 0.12683497115883047,
"grad_norm": 4.233896732330322,
"learning_rate": 4.365825144205848e-05,
"loss": 3.9396,
"step": 11500
},
{
"epoch": 0.1323495351222579,
"grad_norm": 4.335183143615723,
"learning_rate": 4.338252324388711e-05,
"loss": 3.9091,
"step": 12000
},
{
"epoch": 0.13786409908568528,
"grad_norm": 4.590264797210693,
"learning_rate": 4.3106795045715735e-05,
"loss": 3.8935,
"step": 12500
},
{
"epoch": 0.1433786630491127,
"grad_norm": 4.33479642868042,
"learning_rate": 4.283106684754436e-05,
"loss": 3.8875,
"step": 13000
},
{
"epoch": 0.14889322701254012,
"grad_norm": 4.2722697257995605,
"learning_rate": 4.2555338649373e-05,
"loss": 3.8566,
"step": 13500
},
{
"epoch": 0.15440779097596752,
"grad_norm": 4.284050464630127,
"learning_rate": 4.2279610451201625e-05,
"loss": 3.8433,
"step": 14000
},
{
"epoch": 0.15992235493939494,
"grad_norm": 4.086195945739746,
"learning_rate": 4.200388225303025e-05,
"loss": 3.8381,
"step": 14500
},
{
"epoch": 0.16543691890282236,
"grad_norm": 4.229586124420166,
"learning_rate": 4.172815405485889e-05,
"loss": 3.8148,
"step": 15000
},
{
"epoch": 0.17095148286624975,
"grad_norm": 4.43237829208374,
"learning_rate": 4.1452425856687515e-05,
"loss": 3.7945,
"step": 15500
},
{
"epoch": 0.17646604682967718,
"grad_norm": 4.232430934906006,
"learning_rate": 4.117669765851614e-05,
"loss": 3.8078,
"step": 16000
},
{
"epoch": 0.1819806107931046,
"grad_norm": 5.106810092926025,
"learning_rate": 4.090096946034477e-05,
"loss": 3.7684,
"step": 16500
},
{
"epoch": 0.187495174756532,
"grad_norm": 4.939910411834717,
"learning_rate": 4.0625241262173405e-05,
"loss": 3.7634,
"step": 17000
},
{
"epoch": 0.1930097387199594,
"grad_norm": 4.215509414672852,
"learning_rate": 4.034951306400203e-05,
"loss": 3.7487,
"step": 17500
},
{
"epoch": 0.19852430268338683,
"grad_norm": 4.279122829437256,
"learning_rate": 4.007378486583066e-05,
"loss": 3.7697,
"step": 18000
},
{
"epoch": 0.20403886664681423,
"grad_norm": 4.503846168518066,
"learning_rate": 3.979805666765929e-05,
"loss": 3.7488,
"step": 18500
},
{
"epoch": 0.20955343061024165,
"grad_norm": 3.935098648071289,
"learning_rate": 3.9522328469487916e-05,
"loss": 3.7347,
"step": 19000
},
{
"epoch": 0.21506799457366907,
"grad_norm": 4.217621326446533,
"learning_rate": 3.924660027131655e-05,
"loss": 3.7063,
"step": 19500
},
{
"epoch": 0.22058255853709646,
"grad_norm": 4.404201030731201,
"learning_rate": 3.897087207314518e-05,
"loss": 3.6878,
"step": 20000
},
{
"epoch": 0.22609712250052388,
"grad_norm": 4.507588863372803,
"learning_rate": 3.869514387497381e-05,
"loss": 3.7021,
"step": 20500
},
{
"epoch": 0.2316116864639513,
"grad_norm": 4.7501220703125,
"learning_rate": 3.841941567680244e-05,
"loss": 3.6856,
"step": 21000
},
{
"epoch": 0.2371262504273787,
"grad_norm": 4.5834879875183105,
"learning_rate": 3.814368747863107e-05,
"loss": 3.6799,
"step": 21500
},
{
"epoch": 0.24264081439080612,
"grad_norm": 4.500739574432373,
"learning_rate": 3.7867959280459695e-05,
"loss": 3.6695,
"step": 22000
},
{
"epoch": 0.24815537835423354,
"grad_norm": 4.357424736022949,
"learning_rate": 3.759223108228832e-05,
"loss": 3.6648,
"step": 22500
},
{
"epoch": 0.25366994231766093,
"grad_norm": 4.667726039886475,
"learning_rate": 3.731650288411696e-05,
"loss": 3.6623,
"step": 23000
},
{
"epoch": 0.25918450628108836,
"grad_norm": 4.472695827484131,
"learning_rate": 3.7040774685945585e-05,
"loss": 3.6336,
"step": 23500
},
{
"epoch": 0.2646990702445158,
"grad_norm": 4.226781368255615,
"learning_rate": 3.676504648777421e-05,
"loss": 3.6362,
"step": 24000
},
{
"epoch": 0.2702136342079432,
"grad_norm": 4.575997829437256,
"learning_rate": 3.648931828960284e-05,
"loss": 3.6146,
"step": 24500
},
{
"epoch": 0.27572819817137056,
"grad_norm": 4.648991584777832,
"learning_rate": 3.621359009143147e-05,
"loss": 3.6352,
"step": 25000
},
{
"epoch": 0.281242762134798,
"grad_norm": 4.165131092071533,
"learning_rate": 3.59378618932601e-05,
"loss": 3.6165,
"step": 25500
},
{
"epoch": 0.2867573260982254,
"grad_norm": 4.220915794372559,
"learning_rate": 3.566213369508873e-05,
"loss": 3.6067,
"step": 26000
},
{
"epoch": 0.29227189006165283,
"grad_norm": 4.650350093841553,
"learning_rate": 3.5386405496917365e-05,
"loss": 3.5922,
"step": 26500
},
{
"epoch": 0.29778645402508025,
"grad_norm": 4.175040245056152,
"learning_rate": 3.511067729874599e-05,
"loss": 3.6063,
"step": 27000
},
{
"epoch": 0.30330101798850767,
"grad_norm": 4.40975284576416,
"learning_rate": 3.483494910057462e-05,
"loss": 3.5945,
"step": 27500
},
{
"epoch": 0.30881558195193504,
"grad_norm": 3.9698615074157715,
"learning_rate": 3.455922090240325e-05,
"loss": 3.5887,
"step": 28000
},
{
"epoch": 0.31433014591536246,
"grad_norm": 4.441317081451416,
"learning_rate": 3.4283492704231876e-05,
"loss": 3.5741,
"step": 28500
},
{
"epoch": 0.3198447098787899,
"grad_norm": 4.244263648986816,
"learning_rate": 3.400776450606051e-05,
"loss": 3.5688,
"step": 29000
},
{
"epoch": 0.3253592738422173,
"grad_norm": 4.017004013061523,
"learning_rate": 3.373203630788914e-05,
"loss": 3.568,
"step": 29500
},
{
"epoch": 0.3308738378056447,
"grad_norm": 4.664565563201904,
"learning_rate": 3.3456308109717765e-05,
"loss": 3.5571,
"step": 30000
},
{
"epoch": 0.33638840176907214,
"grad_norm": 4.073508262634277,
"learning_rate": 3.318057991154639e-05,
"loss": 3.5189,
"step": 30500
},
{
"epoch": 0.3419029657324995,
"grad_norm": 4.424101829528809,
"learning_rate": 3.290485171337502e-05,
"loss": 3.554,
"step": 31000
},
{
"epoch": 0.34741752969592693,
"grad_norm": 4.302523136138916,
"learning_rate": 3.2629123515203655e-05,
"loss": 3.5388,
"step": 31500
},
{
"epoch": 0.35293209365935435,
"grad_norm": 4.329090118408203,
"learning_rate": 3.235339531703228e-05,
"loss": 3.5485,
"step": 32000
},
{
"epoch": 0.3584466576227818,
"grad_norm": 4.2849531173706055,
"learning_rate": 3.207766711886092e-05,
"loss": 3.5388,
"step": 32500
},
{
"epoch": 0.3639612215862092,
"grad_norm": 4.334972381591797,
"learning_rate": 3.1801938920689545e-05,
"loss": 3.5225,
"step": 33000
},
{
"epoch": 0.3694757855496366,
"grad_norm": 4.848361492156982,
"learning_rate": 3.152621072251817e-05,
"loss": 3.5226,
"step": 33500
},
{
"epoch": 0.374990349513064,
"grad_norm": 4.436476230621338,
"learning_rate": 3.12504825243468e-05,
"loss": 3.5046,
"step": 34000
},
{
"epoch": 0.3805049134764914,
"grad_norm": 4.017549991607666,
"learning_rate": 3.097475432617543e-05,
"loss": 3.5091,
"step": 34500
},
{
"epoch": 0.3860194774399188,
"grad_norm": 4.507646083831787,
"learning_rate": 3.069902612800406e-05,
"loss": 3.4951,
"step": 35000
},
{
"epoch": 0.39153404140334624,
"grad_norm": 4.1406989097595215,
"learning_rate": 3.042329792983269e-05,
"loss": 3.496,
"step": 35500
},
{
"epoch": 0.39704860536677367,
"grad_norm": 4.320881366729736,
"learning_rate": 3.0147569731661318e-05,
"loss": 3.5025,
"step": 36000
},
{
"epoch": 0.4025631693302011,
"grad_norm": 4.030999183654785,
"learning_rate": 2.9871841533489946e-05,
"loss": 3.4974,
"step": 36500
},
{
"epoch": 0.40807773329362845,
"grad_norm": 4.489917755126953,
"learning_rate": 2.9596113335318577e-05,
"loss": 3.4867,
"step": 37000
},
{
"epoch": 0.4135922972570559,
"grad_norm": 4.384711742401123,
"learning_rate": 2.9320385137147204e-05,
"loss": 3.4753,
"step": 37500
},
{
"epoch": 0.4191068612204833,
"grad_norm": 4.64800500869751,
"learning_rate": 2.9044656938975836e-05,
"loss": 3.4703,
"step": 38000
},
{
"epoch": 0.4246214251839107,
"grad_norm": 4.490517616271973,
"learning_rate": 2.876892874080447e-05,
"loss": 3.471,
"step": 38500
},
{
"epoch": 0.43013598914733814,
"grad_norm": 4.496025085449219,
"learning_rate": 2.8493200542633098e-05,
"loss": 3.4579,
"step": 39000
},
{
"epoch": 0.43565055311076556,
"grad_norm": 4.765578746795654,
"learning_rate": 2.8217472344461725e-05,
"loss": 3.4646,
"step": 39500
},
{
"epoch": 0.4411651170741929,
"grad_norm": 4.592626094818115,
"learning_rate": 2.7941744146290356e-05,
"loss": 3.4656,
"step": 40000
},
{
"epoch": 0.44667968103762035,
"grad_norm": 4.292928695678711,
"learning_rate": 2.7666015948118984e-05,
"loss": 3.4533,
"step": 40500
},
{
"epoch": 0.45219424500104777,
"grad_norm": 4.014820098876953,
"learning_rate": 2.7390287749947612e-05,
"loss": 3.4576,
"step": 41000
},
{
"epoch": 0.4577088089644752,
"grad_norm": 4.129273891448975,
"learning_rate": 2.7114559551776243e-05,
"loss": 3.4509,
"step": 41500
},
{
"epoch": 0.4632233729279026,
"grad_norm": 4.679018497467041,
"learning_rate": 2.683883135360487e-05,
"loss": 3.4382,
"step": 42000
},
{
"epoch": 0.46873793689133,
"grad_norm": 4.382132053375244,
"learning_rate": 2.6563103155433498e-05,
"loss": 3.4435,
"step": 42500
},
{
"epoch": 0.4742525008547574,
"grad_norm": 4.3672380447387695,
"learning_rate": 2.628737495726213e-05,
"loss": 3.4398,
"step": 43000
},
{
"epoch": 0.4797670648181848,
"grad_norm": 4.159623622894287,
"learning_rate": 2.6011646759090757e-05,
"loss": 3.4529,
"step": 43500
},
{
"epoch": 0.48528162878161224,
"grad_norm": 4.100943565368652,
"learning_rate": 2.573591856091939e-05,
"loss": 3.411,
"step": 44000
},
{
"epoch": 0.49079619274503966,
"grad_norm": 4.237346649169922,
"learning_rate": 2.5460190362748023e-05,
"loss": 3.4153,
"step": 44500
},
{
"epoch": 0.4963107567084671,
"grad_norm": 4.697793960571289,
"learning_rate": 2.518446216457665e-05,
"loss": 3.4153,
"step": 45000
},
{
"epoch": 0.5018253206718944,
"grad_norm": 4.274381160736084,
"learning_rate": 2.4908733966405278e-05,
"loss": 3.4228,
"step": 45500
},
{
"epoch": 0.5073398846353219,
"grad_norm": 4.421125411987305,
"learning_rate": 2.463300576823391e-05,
"loss": 3.3963,
"step": 46000
},
{
"epoch": 0.5128544485987493,
"grad_norm": 4.356249809265137,
"learning_rate": 2.4357277570062537e-05,
"loss": 3.4186,
"step": 46500
},
{
"epoch": 0.5183690125621767,
"grad_norm": 4.516757965087891,
"learning_rate": 2.4081549371891164e-05,
"loss": 3.4222,
"step": 47000
},
{
"epoch": 0.5238835765256041,
"grad_norm": 5.137631416320801,
"learning_rate": 2.3805821173719796e-05,
"loss": 3.4121,
"step": 47500
},
{
"epoch": 0.5293981404890316,
"grad_norm": 4.224301338195801,
"learning_rate": 2.3530092975548423e-05,
"loss": 3.3854,
"step": 48000
},
{
"epoch": 0.534912704452459,
"grad_norm": 4.442605972290039,
"learning_rate": 2.3254364777377054e-05,
"loss": 3.3857,
"step": 48500
},
{
"epoch": 0.5404272684158864,
"grad_norm": 4.190525531768799,
"learning_rate": 2.2978636579205685e-05,
"loss": 3.3958,
"step": 49000
},
{
"epoch": 0.5459418323793138,
"grad_norm": 4.089470386505127,
"learning_rate": 2.2702908381034313e-05,
"loss": 3.4038,
"step": 49500
},
{
"epoch": 0.5514563963427411,
"grad_norm": 4.37148380279541,
"learning_rate": 2.242718018286294e-05,
"loss": 3.3908,
"step": 50000
},
{
"epoch": 0.5569709603061685,
"grad_norm": 4.484643936157227,
"learning_rate": 2.2151451984691572e-05,
"loss": 3.392,
"step": 50500
},
{
"epoch": 0.562485524269596,
"grad_norm": 4.480875492095947,
"learning_rate": 2.18757237865202e-05,
"loss": 3.38,
"step": 51000
},
{
"epoch": 0.5680000882330234,
"grad_norm": 4.415227890014648,
"learning_rate": 2.159999558834883e-05,
"loss": 3.4014,
"step": 51500
},
{
"epoch": 0.5735146521964508,
"grad_norm": 4.461363315582275,
"learning_rate": 2.132426739017746e-05,
"loss": 3.3768,
"step": 52000
},
{
"epoch": 0.5790292161598782,
"grad_norm": 4.364917755126953,
"learning_rate": 2.104853919200609e-05,
"loss": 3.3769,
"step": 52500
},
{
"epoch": 0.5845437801233057,
"grad_norm": 4.509827613830566,
"learning_rate": 2.0772810993834717e-05,
"loss": 3.3567,
"step": 53000
},
{
"epoch": 0.5900583440867331,
"grad_norm": 4.165256023406982,
"learning_rate": 2.0497082795663348e-05,
"loss": 3.3649,
"step": 53500
},
{
"epoch": 0.5955729080501605,
"grad_norm": 4.39963436126709,
"learning_rate": 2.0221354597491976e-05,
"loss": 3.3688,
"step": 54000
},
{
"epoch": 0.6010874720135879,
"grad_norm": 4.492909908294678,
"learning_rate": 1.9945626399320607e-05,
"loss": 3.3654,
"step": 54500
},
{
"epoch": 0.6066020359770153,
"grad_norm": 4.136989593505859,
"learning_rate": 1.9669898201149238e-05,
"loss": 3.3588,
"step": 55000
},
{
"epoch": 0.6121165999404428,
"grad_norm": 4.091104030609131,
"learning_rate": 1.9394170002977866e-05,
"loss": 3.3714,
"step": 55500
},
{
"epoch": 0.6176311639038701,
"grad_norm": 4.557612895965576,
"learning_rate": 1.9118441804806493e-05,
"loss": 3.3783,
"step": 56000
},
{
"epoch": 0.6231457278672975,
"grad_norm": 4.4669718742370605,
"learning_rate": 1.8842713606635124e-05,
"loss": 3.3576,
"step": 56500
},
{
"epoch": 0.6286602918307249,
"grad_norm": 4.214612007141113,
"learning_rate": 1.8566985408463752e-05,
"loss": 3.3769,
"step": 57000
},
{
"epoch": 0.6341748557941523,
"grad_norm": 4.079827785491943,
"learning_rate": 1.8291257210292383e-05,
"loss": 3.3584,
"step": 57500
},
{
"epoch": 0.6396894197575798,
"grad_norm": 4.0199713706970215,
"learning_rate": 1.8015529012121014e-05,
"loss": 3.3593,
"step": 58000
},
{
"epoch": 0.6452039837210072,
"grad_norm": 4.746074199676514,
"learning_rate": 1.7739800813949642e-05,
"loss": 3.3417,
"step": 58500
},
{
"epoch": 0.6507185476844346,
"grad_norm": 4.219590187072754,
"learning_rate": 1.746407261577827e-05,
"loss": 3.3518,
"step": 59000
},
{
"epoch": 0.656233111647862,
"grad_norm": 4.15669584274292,
"learning_rate": 1.71883444176069e-05,
"loss": 3.3401,
"step": 59500
},
{
"epoch": 0.6617476756112894,
"grad_norm": 4.129217147827148,
"learning_rate": 1.6912616219435532e-05,
"loss": 3.3504,
"step": 60000
},
{
"epoch": 0.6672622395747169,
"grad_norm": 4.176223278045654,
"learning_rate": 1.663688802126416e-05,
"loss": 3.3429,
"step": 60500
},
{
"epoch": 0.6727768035381443,
"grad_norm": 3.982861042022705,
"learning_rate": 1.636115982309279e-05,
"loss": 3.3239,
"step": 61000
},
{
"epoch": 0.6782913675015716,
"grad_norm": 4.495360851287842,
"learning_rate": 1.6085431624921418e-05,
"loss": 3.334,
"step": 61500
},
{
"epoch": 0.683805931464999,
"grad_norm": 4.5026679039001465,
"learning_rate": 1.5809703426750046e-05,
"loss": 3.3434,
"step": 62000
},
{
"epoch": 0.6893204954284264,
"grad_norm": 4.469930648803711,
"learning_rate": 1.5533975228578677e-05,
"loss": 3.3085,
"step": 62500
},
{
"epoch": 0.6948350593918539,
"grad_norm": 4.942314147949219,
"learning_rate": 1.5258247030407308e-05,
"loss": 3.3169,
"step": 63000
},
{
"epoch": 0.7003496233552813,
"grad_norm": 4.131747245788574,
"learning_rate": 1.4982518832235937e-05,
"loss": 3.3242,
"step": 63500
},
{
"epoch": 0.7058641873187087,
"grad_norm": 4.662265777587891,
"learning_rate": 1.4706790634064565e-05,
"loss": 3.3455,
"step": 64000
},
{
"epoch": 0.7113787512821361,
"grad_norm": 4.53313684463501,
"learning_rate": 1.4431062435893195e-05,
"loss": 3.3093,
"step": 64500
},
{
"epoch": 0.7168933152455635,
"grad_norm": 4.306014537811279,
"learning_rate": 1.4155334237721824e-05,
"loss": 3.3146,
"step": 65000
},
{
"epoch": 0.722407879208991,
"grad_norm": 4.205687999725342,
"learning_rate": 1.3879606039550452e-05,
"loss": 3.3182,
"step": 65500
},
{
"epoch": 0.7279224431724184,
"grad_norm": 4.351266384124756,
"learning_rate": 1.3603877841379084e-05,
"loss": 3.3363,
"step": 66000
},
{
"epoch": 0.7334370071358458,
"grad_norm": 4.580765724182129,
"learning_rate": 1.3328149643207714e-05,
"loss": 3.2988,
"step": 66500
},
{
"epoch": 0.7389515710992732,
"grad_norm": 4.511965274810791,
"learning_rate": 1.3052421445036341e-05,
"loss": 3.3153,
"step": 67000
},
{
"epoch": 0.7444661350627005,
"grad_norm": 4.1504950523376465,
"learning_rate": 1.277669324686497e-05,
"loss": 3.3193,
"step": 67500
},
{
"epoch": 0.749980699026128,
"grad_norm": 4.668148994445801,
"learning_rate": 1.25009650486936e-05,
"loss": 3.2916,
"step": 68000
},
{
"epoch": 0.7554952629895554,
"grad_norm": 4.106932163238525,
"learning_rate": 1.222523685052223e-05,
"loss": 3.3268,
"step": 68500
},
{
"epoch": 0.7610098269529828,
"grad_norm": 4.127325057983398,
"learning_rate": 1.1949508652350859e-05,
"loss": 3.3036,
"step": 69000
},
{
"epoch": 0.7665243909164102,
"grad_norm": 4.444930076599121,
"learning_rate": 1.1673780454179488e-05,
"loss": 3.3024,
"step": 69500
},
{
"epoch": 0.7720389548798376,
"grad_norm": 4.07660436630249,
"learning_rate": 1.1398052256008118e-05,
"loss": 3.3053,
"step": 70000
},
{
"epoch": 0.7775535188432651,
"grad_norm": 4.658594131469727,
"learning_rate": 1.1122324057836747e-05,
"loss": 3.3048,
"step": 70500
},
{
"epoch": 0.7830680828066925,
"grad_norm": 4.439772129058838,
"learning_rate": 1.0846595859665378e-05,
"loss": 3.2943,
"step": 71000
},
{
"epoch": 0.7885826467701199,
"grad_norm": 4.101642608642578,
"learning_rate": 1.0570867661494006e-05,
"loss": 3.2914,
"step": 71500
},
{
"epoch": 0.7940972107335473,
"grad_norm": 4.650053024291992,
"learning_rate": 1.0295139463322635e-05,
"loss": 3.2977,
"step": 72000
},
{
"epoch": 0.7996117746969748,
"grad_norm": 4.005575180053711,
"learning_rate": 1.0019411265151266e-05,
"loss": 3.2971,
"step": 72500
},
{
"epoch": 0.8051263386604022,
"grad_norm": 4.499767780303955,
"learning_rate": 9.743683066979894e-06,
"loss": 3.3002,
"step": 73000
},
{
"epoch": 0.8106409026238295,
"grad_norm": 4.143964767456055,
"learning_rate": 9.467954868808523e-06,
"loss": 3.2879,
"step": 73500
},
{
"epoch": 0.8161554665872569,
"grad_norm": 4.027842998504639,
"learning_rate": 9.192226670637155e-06,
"loss": 3.2974,
"step": 74000
},
{
"epoch": 0.8216700305506843,
"grad_norm": 4.330503463745117,
"learning_rate": 8.916498472465782e-06,
"loss": 3.2971,
"step": 74500
},
{
"epoch": 0.8271845945141117,
"grad_norm": 4.108890056610107,
"learning_rate": 8.640770274294412e-06,
"loss": 3.2951,
"step": 75000
},
{
"epoch": 0.8326991584775392,
"grad_norm": 4.396561622619629,
"learning_rate": 8.365042076123043e-06,
"loss": 3.3018,
"step": 75500
},
{
"epoch": 0.8382137224409666,
"grad_norm": 4.230642795562744,
"learning_rate": 8.08931387795167e-06,
"loss": 3.2832,
"step": 76000
},
{
"epoch": 0.843728286404394,
"grad_norm": 4.438147068023682,
"learning_rate": 7.8135856797803e-06,
"loss": 3.2845,
"step": 76500
},
{
"epoch": 0.8492428503678214,
"grad_norm": 4.5078325271606445,
"learning_rate": 7.53785748160893e-06,
"loss": 3.2977,
"step": 77000
},
{
"epoch": 0.8547574143312489,
"grad_norm": 4.344171524047852,
"learning_rate": 7.262129283437559e-06,
"loss": 3.2785,
"step": 77500
},
{
"epoch": 0.8602719782946763,
"grad_norm": 4.636903762817383,
"learning_rate": 6.986401085266188e-06,
"loss": 3.2784,
"step": 78000
},
{
"epoch": 0.8657865422581037,
"grad_norm": 4.955584526062012,
"learning_rate": 6.710672887094818e-06,
"loss": 3.2676,
"step": 78500
},
{
"epoch": 0.8713011062215311,
"grad_norm": 4.11828088760376,
"learning_rate": 6.4349446889234475e-06,
"loss": 3.2901,
"step": 79000
},
{
"epoch": 0.8768156701849584,
"grad_norm": 4.916619777679443,
"learning_rate": 6.159216490752077e-06,
"loss": 3.2964,
"step": 79500
},
{
"epoch": 0.8823302341483859,
"grad_norm": 4.217592239379883,
"learning_rate": 5.883488292580705e-06,
"loss": 3.2923,
"step": 80000
},
{
"epoch": 0.8878447981118133,
"grad_norm": 4.360821723937988,
"learning_rate": 5.607760094409336e-06,
"loss": 3.2807,
"step": 80500
},
{
"epoch": 0.8933593620752407,
"grad_norm": 4.062866687774658,
"learning_rate": 5.332031896237965e-06,
"loss": 3.2936,
"step": 81000
},
{
"epoch": 0.8988739260386681,
"grad_norm": 4.373243808746338,
"learning_rate": 5.0563036980665936e-06,
"loss": 3.2592,
"step": 81500
},
{
"epoch": 0.9043884900020955,
"grad_norm": 4.527072906494141,
"learning_rate": 4.780575499895224e-06,
"loss": 3.2754,
"step": 82000
},
{
"epoch": 0.909903053965523,
"grad_norm": 4.206862926483154,
"learning_rate": 4.504847301723853e-06,
"loss": 3.2609,
"step": 82500
},
{
"epoch": 0.9154176179289504,
"grad_norm": 4.414552688598633,
"learning_rate": 4.229119103552482e-06,
"loss": 3.2936,
"step": 83000
},
{
"epoch": 0.9209321818923778,
"grad_norm": 4.722365856170654,
"learning_rate": 3.953390905381112e-06,
"loss": 3.2733,
"step": 83500
},
{
"epoch": 0.9264467458558052,
"grad_norm": 4.194797515869141,
"learning_rate": 3.6776627072097413e-06,
"loss": 3.272,
"step": 84000
},
{
"epoch": 0.9319613098192326,
"grad_norm": 4.476502895355225,
"learning_rate": 3.4019345090383703e-06,
"loss": 3.286,
"step": 84500
},
{
"epoch": 0.93747587378266,
"grad_norm": 4.418745994567871,
"learning_rate": 3.1262063108669997e-06,
"loss": 3.2611,
"step": 85000
},
{
"epoch": 0.9429904377460874,
"grad_norm": 4.2887797355651855,
"learning_rate": 2.8504781126956295e-06,
"loss": 3.2749,
"step": 85500
},
{
"epoch": 0.9485050017095148,
"grad_norm": 4.276843547821045,
"learning_rate": 2.574749914524259e-06,
"loss": 3.2739,
"step": 86000
},
{
"epoch": 0.9540195656729422,
"grad_norm": 4.448587417602539,
"learning_rate": 2.299021716352888e-06,
"loss": 3.2756,
"step": 86500
},
{
"epoch": 0.9595341296363696,
"grad_norm": 4.589288711547852,
"learning_rate": 2.0232935181815176e-06,
"loss": 3.2734,
"step": 87000
},
{
"epoch": 0.9650486935997971,
"grad_norm": 4.560390472412109,
"learning_rate": 1.7475653200101468e-06,
"loss": 3.2652,
"step": 87500
},
{
"epoch": 0.9705632575632245,
"grad_norm": 4.633021831512451,
"learning_rate": 1.4718371218387762e-06,
"loss": 3.2831,
"step": 88000
},
{
"epoch": 0.9760778215266519,
"grad_norm": 4.438389301300049,
"learning_rate": 1.1961089236674058e-06,
"loss": 3.2584,
"step": 88500
},
{
"epoch": 0.9815923854900793,
"grad_norm": 4.40911340713501,
"learning_rate": 9.203807254960352e-07,
"loss": 3.2831,
"step": 89000
},
{
"epoch": 0.9871069494535067,
"grad_norm": 4.270487308502197,
"learning_rate": 6.446525273246646e-07,
"loss": 3.2702,
"step": 89500
},
{
"epoch": 0.9926215134169342,
"grad_norm": 4.149001598358154,
"learning_rate": 3.689243291532939e-07,
"loss": 3.2504,
"step": 90000
},
{
"epoch": 0.9981360773803616,
"grad_norm": 4.047245502471924,
"learning_rate": 9.319613098192327e-08,
"loss": 3.2622,
"step": 90500
},
{
"epoch": 1.0,
"step": 90669,
"total_flos": 2.4124793739319296e+16,
"train_loss": 3.571538051929227,
"train_runtime": 3663.4284,
"train_samples_per_second": 197.997,
"train_steps_per_second": 24.75
}
],
"logging_steps": 500,
"max_steps": 90669,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.4124793739319296e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}