llm_model_m_unigram_hm_1e_v0.1 / trainer_state.json
RefalMachine's picture
load model
061a7e0
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9999954795108831,
"eval_steps": 1000,
"global_step": 110607,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.00024521369940743435,
"loss": 5.4472,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 0.0002927618508292659,
"loss": 3.467,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 0.0002997989257927486,
"loss": 3.3454,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 0.00029952720389105764,
"loss": 3.2872,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 0.00029925548198936656,
"loss": 3.2489,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 0.0002989837600876756,
"loss": 3.2458,
"step": 600
},
{
"epoch": 0.01,
"learning_rate": 0.00029871475540500146,
"loss": 3.2073,
"step": 700
},
{
"epoch": 0.01,
"learning_rate": 0.00029844303350331044,
"loss": 3.1957,
"step": 800
},
{
"epoch": 0.01,
"learning_rate": 0.0002981713116016194,
"loss": 3.1817,
"step": 900
},
{
"epoch": 0.01,
"learning_rate": 0.00029789958969992844,
"loss": 3.1631,
"step": 1000
},
{
"epoch": 0.01,
"eval_accuracy": 0.4182117332510669,
"eval_loss": 3.180420398712158,
"eval_runtime": 43.6723,
"eval_samples_per_second": 148.447,
"eval_steps_per_second": 2.496,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 0.0002976278677982374,
"loss": 3.1598,
"step": 1100
},
{
"epoch": 0.01,
"learning_rate": 0.0002973561458965464,
"loss": 3.1584,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 0.00029708442399485537,
"loss": 3.144,
"step": 1300
},
{
"epoch": 0.01,
"learning_rate": 0.00029681270209316434,
"loss": 3.1346,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 0.0002965409801914733,
"loss": 3.1359,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 0.00029626925828978235,
"loss": 3.1268,
"step": 1600
},
{
"epoch": 0.02,
"learning_rate": 0.0002959975363880913,
"loss": 3.1175,
"step": 1700
},
{
"epoch": 0.02,
"learning_rate": 0.0002957258144864003,
"loss": 3.1189,
"step": 1800
},
{
"epoch": 0.02,
"learning_rate": 0.0002954540925847093,
"loss": 3.1057,
"step": 1900
},
{
"epoch": 0.02,
"learning_rate": 0.00029518237068301825,
"loss": 3.1124,
"step": 2000
},
{
"epoch": 0.02,
"eval_accuracy": 0.4272265623818554,
"eval_loss": 3.106520891189575,
"eval_runtime": 43.4484,
"eval_samples_per_second": 149.212,
"eval_steps_per_second": 2.509,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 0.0002949106487813272,
"loss": 3.1004,
"step": 2100
},
{
"epoch": 0.02,
"learning_rate": 0.0002946389268796362,
"loss": 3.1018,
"step": 2200
},
{
"epoch": 0.02,
"learning_rate": 0.00029436720497794523,
"loss": 3.0864,
"step": 2300
},
{
"epoch": 0.02,
"learning_rate": 0.0002940954830762542,
"loss": 3.0872,
"step": 2400
},
{
"epoch": 0.02,
"learning_rate": 0.0002938237611745632,
"loss": 3.0883,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 0.00029355475649188906,
"loss": 3.0843,
"step": 2600
},
{
"epoch": 0.02,
"learning_rate": 0.0002932830345901981,
"loss": 3.0815,
"step": 2700
},
{
"epoch": 0.03,
"learning_rate": 0.00029301131268850706,
"loss": 3.0784,
"step": 2800
},
{
"epoch": 0.03,
"learning_rate": 0.00029273959078681604,
"loss": 3.0728,
"step": 2900
},
{
"epoch": 0.03,
"learning_rate": 0.000292467868885125,
"loss": 3.0757,
"step": 3000
},
{
"epoch": 0.03,
"eval_accuracy": 0.42875514543315396,
"eval_loss": 3.0894298553466797,
"eval_runtime": 43.8742,
"eval_samples_per_second": 147.763,
"eval_steps_per_second": 2.484,
"step": 3000
},
{
"epoch": 0.03,
"learning_rate": 0.000292196146983434,
"loss": 3.0754,
"step": 3100
},
{
"epoch": 0.03,
"learning_rate": 0.000291924425081743,
"loss": 3.0634,
"step": 3200
},
{
"epoch": 0.03,
"learning_rate": 0.00029165270318005194,
"loss": 3.0652,
"step": 3300
},
{
"epoch": 0.03,
"learning_rate": 0.00029138098127836097,
"loss": 3.0566,
"step": 3400
},
{
"epoch": 0.03,
"learning_rate": 0.00029110925937666994,
"loss": 3.067,
"step": 3500
},
{
"epoch": 0.03,
"learning_rate": 0.0002908375374749789,
"loss": 3.0525,
"step": 3600
},
{
"epoch": 0.03,
"learning_rate": 0.0002905658155732879,
"loss": 3.0595,
"step": 3700
},
{
"epoch": 0.03,
"learning_rate": 0.00029029409367159687,
"loss": 3.0586,
"step": 3800
},
{
"epoch": 0.04,
"learning_rate": 0.0002900223717699059,
"loss": 3.0499,
"step": 3900
},
{
"epoch": 0.04,
"learning_rate": 0.0002897506498682148,
"loss": 3.0488,
"step": 4000
},
{
"epoch": 0.04,
"eval_accuracy": 0.43185525945686004,
"eval_loss": 3.062988758087158,
"eval_runtime": 44.0507,
"eval_samples_per_second": 147.171,
"eval_steps_per_second": 2.474,
"step": 4000
},
{
"epoch": 0.04,
"learning_rate": 0.00028947892796652385,
"loss": 3.0426,
"step": 4100
},
{
"epoch": 0.04,
"learning_rate": 0.0002892072060648328,
"loss": 3.0433,
"step": 4200
},
{
"epoch": 0.04,
"learning_rate": 0.0002889354841631418,
"loss": 3.0428,
"step": 4300
},
{
"epoch": 0.04,
"learning_rate": 0.00028866376226145083,
"loss": 3.0359,
"step": 4400
},
{
"epoch": 0.04,
"learning_rate": 0.00028839204035975975,
"loss": 3.0386,
"step": 4500
},
{
"epoch": 0.04,
"learning_rate": 0.0002881203184580688,
"loss": 3.034,
"step": 4600
},
{
"epoch": 0.04,
"learning_rate": 0.00028784859655637776,
"loss": 3.0456,
"step": 4700
},
{
"epoch": 0.04,
"learning_rate": 0.00028757687465468673,
"loss": 3.0361,
"step": 4800
},
{
"epoch": 0.04,
"learning_rate": 0.0002873051527529957,
"loss": 3.0412,
"step": 4900
},
{
"epoch": 0.05,
"learning_rate": 0.0002870334308513047,
"loss": 3.0403,
"step": 5000
},
{
"epoch": 0.05,
"eval_accuracy": 0.43361975362410893,
"eval_loss": 3.0423271656036377,
"eval_runtime": 43.1088,
"eval_samples_per_second": 150.387,
"eval_steps_per_second": 2.528,
"step": 5000
},
{
"epoch": 0.05,
"learning_rate": 0.0002867617089496137,
"loss": 3.0341,
"step": 5100
},
{
"epoch": 0.05,
"learning_rate": 0.00028648998704792263,
"loss": 3.0297,
"step": 5200
},
{
"epoch": 0.05,
"learning_rate": 0.00028621826514623166,
"loss": 3.0324,
"step": 5300
},
{
"epoch": 0.05,
"learning_rate": 0.00028594654324454064,
"loss": 3.0317,
"step": 5400
},
{
"epoch": 0.05,
"learning_rate": 0.0002856748213428496,
"loss": 3.0167,
"step": 5500
},
{
"epoch": 0.05,
"learning_rate": 0.0002854030994411586,
"loss": 3.0202,
"step": 5600
},
{
"epoch": 0.05,
"learning_rate": 0.00028513137753946756,
"loss": 3.0231,
"step": 5700
},
{
"epoch": 0.05,
"learning_rate": 0.0002848596556377766,
"loss": 3.0166,
"step": 5800
},
{
"epoch": 0.05,
"learning_rate": 0.0002845879337360855,
"loss": 3.0246,
"step": 5900
},
{
"epoch": 0.05,
"learning_rate": 0.00028431621183439454,
"loss": 3.0172,
"step": 6000
},
{
"epoch": 0.05,
"eval_accuracy": 0.434315993866311,
"eval_loss": 3.038356304168701,
"eval_runtime": 43.5763,
"eval_samples_per_second": 148.774,
"eval_steps_per_second": 2.501,
"step": 6000
},
{
"epoch": 0.06,
"learning_rate": 0.0002840444899327035,
"loss": 3.0123,
"step": 6100
},
{
"epoch": 0.06,
"learning_rate": 0.0002837727680310125,
"loss": 3.0177,
"step": 6200
},
{
"epoch": 0.06,
"learning_rate": 0.0002835010461293215,
"loss": 3.0195,
"step": 6300
},
{
"epoch": 0.06,
"learning_rate": 0.0002832320414466474,
"loss": 3.0175,
"step": 6400
},
{
"epoch": 0.06,
"learning_rate": 0.0002829603195449564,
"loss": 3.0192,
"step": 6500
},
{
"epoch": 0.06,
"learning_rate": 0.00028268859764326535,
"loss": 3.0079,
"step": 6600
},
{
"epoch": 0.06,
"learning_rate": 0.0002824168757415743,
"loss": 3.0138,
"step": 6700
},
{
"epoch": 0.06,
"learning_rate": 0.0002821451538398833,
"loss": 3.0175,
"step": 6800
},
{
"epoch": 0.06,
"learning_rate": 0.00028187343193819233,
"loss": 3.0148,
"step": 6900
},
{
"epoch": 0.06,
"learning_rate": 0.00028160171003650125,
"loss": 3.0102,
"step": 7000
},
{
"epoch": 0.06,
"eval_accuracy": 0.43602120780442366,
"eval_loss": 3.026742696762085,
"eval_runtime": 43.2189,
"eval_samples_per_second": 150.004,
"eval_steps_per_second": 2.522,
"step": 7000
},
{
"epoch": 0.06,
"learning_rate": 0.0002813299881348103,
"loss": 3.0111,
"step": 7100
},
{
"epoch": 0.07,
"learning_rate": 0.00028105826623311925,
"loss": 3.0077,
"step": 7200
},
{
"epoch": 0.07,
"learning_rate": 0.00028078654433142823,
"loss": 3.0055,
"step": 7300
},
{
"epoch": 0.07,
"learning_rate": 0.0002805148224297372,
"loss": 3.0084,
"step": 7400
},
{
"epoch": 0.07,
"learning_rate": 0.0002802431005280462,
"loss": 3.0124,
"step": 7500
},
{
"epoch": 0.07,
"learning_rate": 0.0002799713786263552,
"loss": 3.0051,
"step": 7600
},
{
"epoch": 0.07,
"learning_rate": 0.00027969965672466413,
"loss": 3.0039,
"step": 7700
},
{
"epoch": 0.07,
"learning_rate": 0.00027942793482297316,
"loss": 3.0033,
"step": 7800
},
{
"epoch": 0.07,
"learning_rate": 0.00027915621292128214,
"loss": 3.0044,
"step": 7900
},
{
"epoch": 0.07,
"learning_rate": 0.0002788844910195911,
"loss": 2.9888,
"step": 8000
},
{
"epoch": 0.07,
"eval_accuracy": 0.4361119428490199,
"eval_loss": 3.0189716815948486,
"eval_runtime": 43.5746,
"eval_samples_per_second": 148.779,
"eval_steps_per_second": 2.501,
"step": 8000
},
{
"epoch": 0.07,
"learning_rate": 0.00027861276911790014,
"loss": 3.0097,
"step": 8100
},
{
"epoch": 0.07,
"learning_rate": 0.00027834104721620906,
"loss": 3.008,
"step": 8200
},
{
"epoch": 0.08,
"learning_rate": 0.0002780693253145181,
"loss": 2.9979,
"step": 8300
},
{
"epoch": 0.08,
"learning_rate": 0.00027779760341282707,
"loss": 2.994,
"step": 8400
},
{
"epoch": 0.08,
"learning_rate": 0.00027752588151113604,
"loss": 2.9985,
"step": 8500
},
{
"epoch": 0.08,
"learning_rate": 0.000277254159609445,
"loss": 2.9966,
"step": 8600
},
{
"epoch": 0.08,
"learning_rate": 0.000276982437707754,
"loss": 2.9968,
"step": 8700
},
{
"epoch": 0.08,
"learning_rate": 0.000276710715806063,
"loss": 2.9999,
"step": 8800
},
{
"epoch": 0.08,
"learning_rate": 0.000276438993904372,
"loss": 2.9973,
"step": 8900
},
{
"epoch": 0.08,
"learning_rate": 0.00027616727200268097,
"loss": 3.0024,
"step": 9000
},
{
"epoch": 0.08,
"eval_accuracy": 0.4384970647213073,
"eval_loss": 3.0039989948272705,
"eval_runtime": 44.0911,
"eval_samples_per_second": 147.036,
"eval_steps_per_second": 2.472,
"step": 9000
},
{
"epoch": 0.08,
"learning_rate": 0.00027589555010098995,
"loss": 3.0015,
"step": 9100
},
{
"epoch": 0.08,
"learning_rate": 0.0002756238281992989,
"loss": 2.9946,
"step": 9200
},
{
"epoch": 0.08,
"learning_rate": 0.0002753521062976079,
"loss": 2.9932,
"step": 9300
},
{
"epoch": 0.08,
"learning_rate": 0.0002750803843959169,
"loss": 2.9985,
"step": 9400
},
{
"epoch": 0.09,
"learning_rate": 0.0002748086624942259,
"loss": 2.9913,
"step": 9500
},
{
"epoch": 0.09,
"learning_rate": 0.0002745369405925349,
"loss": 2.9946,
"step": 9600
},
{
"epoch": 0.09,
"learning_rate": 0.00027426521869084385,
"loss": 2.99,
"step": 9700
},
{
"epoch": 0.09,
"learning_rate": 0.00027399349678915283,
"loss": 2.9927,
"step": 9800
},
{
"epoch": 0.09,
"learning_rate": 0.0002737217748874618,
"loss": 2.9883,
"step": 9900
},
{
"epoch": 0.09,
"learning_rate": 0.00027345005298577083,
"loss": 2.9948,
"step": 10000
},
{
"epoch": 0.09,
"eval_accuracy": 0.43782985969337607,
"eval_loss": 3.0057804584503174,
"eval_runtime": 43.6419,
"eval_samples_per_second": 148.55,
"eval_steps_per_second": 2.498,
"step": 10000
},
{
"epoch": 0.09,
"learning_rate": 0.0002731810483030967,
"loss": 2.983,
"step": 10100
},
{
"epoch": 0.09,
"learning_rate": 0.0002729093264014057,
"loss": 2.9806,
"step": 10200
},
{
"epoch": 0.09,
"learning_rate": 0.00027263760449971466,
"loss": 2.9881,
"step": 10300
},
{
"epoch": 0.09,
"learning_rate": 0.00027236588259802364,
"loss": 2.9814,
"step": 10400
},
{
"epoch": 0.09,
"learning_rate": 0.00027209416069633267,
"loss": 2.9824,
"step": 10500
},
{
"epoch": 0.1,
"learning_rate": 0.00027182243879464164,
"loss": 2.9885,
"step": 10600
},
{
"epoch": 0.1,
"learning_rate": 0.0002715507168929506,
"loss": 2.989,
"step": 10700
},
{
"epoch": 0.1,
"learning_rate": 0.0002712789949912596,
"loss": 2.986,
"step": 10800
},
{
"epoch": 0.1,
"learning_rate": 0.00027100727308956857,
"loss": 2.9856,
"step": 10900
},
{
"epoch": 0.1,
"learning_rate": 0.00027073555118787754,
"loss": 2.9774,
"step": 11000
},
{
"epoch": 0.1,
"eval_accuracy": 0.438878756808909,
"eval_loss": 2.9962034225463867,
"eval_runtime": 43.3441,
"eval_samples_per_second": 149.57,
"eval_steps_per_second": 2.515,
"step": 11000
},
{
"epoch": 0.1,
"learning_rate": 0.0002704638292861865,
"loss": 2.9941,
"step": 11100
},
{
"epoch": 0.1,
"learning_rate": 0.00027019210738449555,
"loss": 2.9799,
"step": 11200
},
{
"epoch": 0.1,
"learning_rate": 0.0002699203854828045,
"loss": 2.9834,
"step": 11300
},
{
"epoch": 0.1,
"learning_rate": 0.0002696513808001304,
"loss": 2.9767,
"step": 11400
},
{
"epoch": 0.1,
"learning_rate": 0.0002693796588984394,
"loss": 2.9772,
"step": 11500
},
{
"epoch": 0.1,
"learning_rate": 0.0002691079369967484,
"loss": 2.9891,
"step": 11600
},
{
"epoch": 0.11,
"learning_rate": 0.0002688362150950573,
"loss": 2.9787,
"step": 11700
},
{
"epoch": 0.11,
"learning_rate": 0.00026856449319336635,
"loss": 2.987,
"step": 11800
},
{
"epoch": 0.11,
"learning_rate": 0.00026829277129167533,
"loss": 2.979,
"step": 11900
},
{
"epoch": 0.11,
"learning_rate": 0.0002680210493899843,
"loss": 2.9818,
"step": 12000
},
{
"epoch": 0.11,
"eval_accuracy": 0.4390166740766953,
"eval_loss": 2.9964208602905273,
"eval_runtime": 44.0294,
"eval_samples_per_second": 147.242,
"eval_steps_per_second": 2.476,
"step": 12000
},
{
"epoch": 0.11,
"learning_rate": 0.0002677493274882933,
"loss": 2.9798,
"step": 12100
},
{
"epoch": 0.11,
"learning_rate": 0.00026747760558660225,
"loss": 2.9833,
"step": 12200
},
{
"epoch": 0.11,
"learning_rate": 0.0002672058836849113,
"loss": 2.9787,
"step": 12300
},
{
"epoch": 0.11,
"learning_rate": 0.00026693416178322026,
"loss": 2.9807,
"step": 12400
},
{
"epoch": 0.11,
"learning_rate": 0.00026666515710054614,
"loss": 2.9846,
"step": 12500
},
{
"epoch": 0.11,
"learning_rate": 0.0002663934351988551,
"loss": 2.9758,
"step": 12600
},
{
"epoch": 0.11,
"learning_rate": 0.00026612171329716414,
"loss": 2.9749,
"step": 12700
},
{
"epoch": 0.12,
"learning_rate": 0.00026584999139547306,
"loss": 2.9688,
"step": 12800
},
{
"epoch": 0.12,
"learning_rate": 0.0002655782694937821,
"loss": 2.9886,
"step": 12900
},
{
"epoch": 0.12,
"learning_rate": 0.00026530654759209107,
"loss": 2.9771,
"step": 13000
},
{
"epoch": 0.12,
"eval_accuracy": 0.4395580598427864,
"eval_loss": 2.991270065307617,
"eval_runtime": 43.0298,
"eval_samples_per_second": 150.663,
"eval_steps_per_second": 2.533,
"step": 13000
},
{
"epoch": 0.12,
"learning_rate": 0.00026503482569040004,
"loss": 2.9802,
"step": 13100
},
{
"epoch": 0.12,
"learning_rate": 0.000264763103788709,
"loss": 2.9711,
"step": 13200
},
{
"epoch": 0.12,
"learning_rate": 0.000264491381887018,
"loss": 2.9845,
"step": 13300
},
{
"epoch": 0.12,
"learning_rate": 0.000264219659985327,
"loss": 2.9735,
"step": 13400
},
{
"epoch": 0.12,
"learning_rate": 0.00026394793808363594,
"loss": 2.9731,
"step": 13500
},
{
"epoch": 0.12,
"learning_rate": 0.00026367621618194497,
"loss": 2.9717,
"step": 13600
},
{
"epoch": 0.12,
"learning_rate": 0.00026340449428025395,
"loss": 2.9718,
"step": 13700
},
{
"epoch": 0.12,
"learning_rate": 0.0002631327723785629,
"loss": 2.9766,
"step": 13800
},
{
"epoch": 0.13,
"learning_rate": 0.0002628637676958888,
"loss": 2.9812,
"step": 13900
},
{
"epoch": 0.13,
"learning_rate": 0.00026259204579419783,
"loss": 2.9786,
"step": 14000
},
{
"epoch": 0.13,
"eval_accuracy": 0.43911829732664315,
"eval_loss": 2.9915201663970947,
"eval_runtime": 43.7467,
"eval_samples_per_second": 148.194,
"eval_steps_per_second": 2.492,
"step": 14000
},
{
"epoch": 0.13,
"learning_rate": 0.0002623203238925068,
"loss": 2.9757,
"step": 14100
},
{
"epoch": 0.13,
"learning_rate": 0.0002620486019908158,
"loss": 2.9781,
"step": 14200
},
{
"epoch": 0.13,
"learning_rate": 0.00026177959730814166,
"loss": 2.9733,
"step": 14300
},
{
"epoch": 0.13,
"learning_rate": 0.0002615078754064507,
"loss": 2.9773,
"step": 14400
},
{
"epoch": 0.13,
"learning_rate": 0.0002612361535047596,
"loss": 2.9755,
"step": 14500
},
{
"epoch": 0.13,
"learning_rate": 0.00026096443160306864,
"loss": 2.9837,
"step": 14600
},
{
"epoch": 0.13,
"learning_rate": 0.0002606927097013776,
"loss": 2.9786,
"step": 14700
},
{
"epoch": 0.13,
"learning_rate": 0.0002604209877996866,
"loss": 2.9709,
"step": 14800
},
{
"epoch": 0.13,
"learning_rate": 0.00026014926589799556,
"loss": 2.9797,
"step": 14900
},
{
"epoch": 0.14,
"learning_rate": 0.00025987754399630454,
"loss": 2.9866,
"step": 15000
},
{
"epoch": 0.14,
"eval_accuracy": 0.4393814289559723,
"eval_loss": 2.9924139976501465,
"eval_runtime": 43.2705,
"eval_samples_per_second": 149.825,
"eval_steps_per_second": 2.519,
"step": 15000
},
{
"epoch": 0.14,
"learning_rate": 0.00025960582209461357,
"loss": 2.976,
"step": 15100
},
{
"epoch": 0.14,
"learning_rate": 0.0002593341001929225,
"loss": 2.9674,
"step": 15200
},
{
"epoch": 0.14,
"learning_rate": 0.0002590623782912315,
"loss": 2.98,
"step": 15300
},
{
"epoch": 0.14,
"learning_rate": 0.0002587906563895405,
"loss": 2.9805,
"step": 15400
},
{
"epoch": 0.14,
"learning_rate": 0.00025851893448784947,
"loss": 2.9738,
"step": 15500
},
{
"epoch": 0.14,
"learning_rate": 0.00025824721258615844,
"loss": 2.9702,
"step": 15600
},
{
"epoch": 0.14,
"learning_rate": 0.0002579754906844674,
"loss": 2.9678,
"step": 15700
},
{
"epoch": 0.14,
"learning_rate": 0.00025770376878277645,
"loss": 2.9699,
"step": 15800
},
{
"epoch": 0.14,
"learning_rate": 0.0002574320468810854,
"loss": 2.9717,
"step": 15900
},
{
"epoch": 0.14,
"learning_rate": 0.0002571603249793944,
"loss": 2.9751,
"step": 16000
},
{
"epoch": 0.14,
"eval_accuracy": 0.43892109982972055,
"eval_loss": 2.9917728900909424,
"eval_runtime": 44.2385,
"eval_samples_per_second": 146.547,
"eval_steps_per_second": 2.464,
"step": 16000
},
{
"epoch": 0.15,
"learning_rate": 0.0002568886030777034,
"loss": 2.9653,
"step": 16100
},
{
"epoch": 0.15,
"learning_rate": 0.00025661688117601235,
"loss": 2.9817,
"step": 16200
},
{
"epoch": 0.15,
"learning_rate": 0.0002563451592743214,
"loss": 2.9652,
"step": 16300
},
{
"epoch": 0.15,
"learning_rate": 0.00025607343737263035,
"loss": 2.9704,
"step": 16400
},
{
"epoch": 0.15,
"learning_rate": 0.00025580171547093933,
"loss": 2.9727,
"step": 16500
},
{
"epoch": 0.15,
"learning_rate": 0.0002555299935692483,
"loss": 2.9743,
"step": 16600
},
{
"epoch": 0.15,
"learning_rate": 0.0002552582716675573,
"loss": 2.9719,
"step": 16700
},
{
"epoch": 0.15,
"learning_rate": 0.00025498654976586626,
"loss": 2.9615,
"step": 16800
},
{
"epoch": 0.15,
"learning_rate": 0.00025471482786417523,
"loss": 2.973,
"step": 16900
},
{
"epoch": 0.15,
"learning_rate": 0.00025444310596248426,
"loss": 2.9702,
"step": 17000
},
{
"epoch": 0.15,
"eval_accuracy": 0.4393215438265388,
"eval_loss": 2.992605447769165,
"eval_runtime": 45.7096,
"eval_samples_per_second": 141.83,
"eval_steps_per_second": 2.385,
"step": 17000
},
{
"epoch": 0.15,
"learning_rate": 0.00025417410127981014,
"loss": 2.9689,
"step": 17100
},
{
"epoch": 0.16,
"learning_rate": 0.0002539023793781191,
"loss": 2.9727,
"step": 17200
},
{
"epoch": 0.16,
"learning_rate": 0.0002536306574764281,
"loss": 2.9669,
"step": 17300
},
{
"epoch": 0.16,
"learning_rate": 0.00025335893557473706,
"loss": 2.9717,
"step": 17400
},
{
"epoch": 0.16,
"learning_rate": 0.0002530872136730461,
"loss": 2.9646,
"step": 17500
},
{
"epoch": 0.16,
"learning_rate": 0.00025281549177135507,
"loss": 2.9757,
"step": 17600
},
{
"epoch": 0.16,
"learning_rate": 0.00025254376986966404,
"loss": 2.9679,
"step": 17700
},
{
"epoch": 0.16,
"learning_rate": 0.000252272047967973,
"loss": 2.9691,
"step": 17800
},
{
"epoch": 0.16,
"learning_rate": 0.000252000326066282,
"loss": 2.9718,
"step": 17900
},
{
"epoch": 0.16,
"learning_rate": 0.00025172860416459097,
"loss": 2.9695,
"step": 18000
},
{
"epoch": 0.16,
"eval_accuracy": 0.44013513472641874,
"eval_loss": 2.981644868850708,
"eval_runtime": 43.6409,
"eval_samples_per_second": 148.553,
"eval_steps_per_second": 2.498,
"step": 18000
},
{
"epoch": 0.16,
"learning_rate": 0.0002514568822629,
"loss": 2.9666,
"step": 18100
},
{
"epoch": 0.16,
"learning_rate": 0.0002511878775802259,
"loss": 2.9696,
"step": 18200
},
{
"epoch": 0.17,
"learning_rate": 0.00025091615567853485,
"loss": 2.9687,
"step": 18300
},
{
"epoch": 0.17,
"learning_rate": 0.0002506444337768438,
"loss": 2.9674,
"step": 18400
},
{
"epoch": 0.17,
"learning_rate": 0.0002503727118751528,
"loss": 2.9655,
"step": 18500
},
{
"epoch": 0.17,
"learning_rate": 0.00025010098997346183,
"loss": 2.9661,
"step": 18600
},
{
"epoch": 0.17,
"learning_rate": 0.0002498292680717708,
"loss": 2.9673,
"step": 18700
},
{
"epoch": 0.17,
"learning_rate": 0.0002495575461700798,
"loss": 2.9641,
"step": 18800
},
{
"epoch": 0.17,
"learning_rate": 0.00024928582426838876,
"loss": 2.9598,
"step": 18900
},
{
"epoch": 0.17,
"learning_rate": 0.00024901410236669773,
"loss": 2.9615,
"step": 19000
},
{
"epoch": 0.17,
"eval_accuracy": 0.44022042566833924,
"eval_loss": 2.982591390609741,
"eval_runtime": 43.6998,
"eval_samples_per_second": 148.353,
"eval_steps_per_second": 2.494,
"step": 19000
},
{
"epoch": 0.17,
"learning_rate": 0.00024874238046500676,
"loss": 2.958,
"step": 19100
},
{
"epoch": 0.17,
"learning_rate": 0.0002484706585633157,
"loss": 2.9688,
"step": 19200
},
{
"epoch": 0.17,
"learning_rate": 0.0002481989366616247,
"loss": 2.9603,
"step": 19300
},
{
"epoch": 0.18,
"learning_rate": 0.0002479272147599337,
"loss": 2.9625,
"step": 19400
},
{
"epoch": 0.18,
"learning_rate": 0.00024765549285824266,
"loss": 2.9611,
"step": 19500
},
{
"epoch": 0.18,
"learning_rate": 0.00024738377095655164,
"loss": 2.9594,
"step": 19600
},
{
"epoch": 0.18,
"learning_rate": 0.0002471120490548606,
"loss": 2.9648,
"step": 19700
},
{
"epoch": 0.18,
"learning_rate": 0.00024684032715316964,
"loss": 2.961,
"step": 19800
},
{
"epoch": 0.18,
"learning_rate": 0.0002465686052514786,
"loss": 2.9589,
"step": 19900
},
{
"epoch": 0.18,
"learning_rate": 0.0002462968833497876,
"loss": 2.9609,
"step": 20000
},
{
"epoch": 0.18,
"eval_accuracy": 0.4406414362752659,
"eval_loss": 2.9791083335876465,
"eval_runtime": 44.2323,
"eval_samples_per_second": 146.567,
"eval_steps_per_second": 2.464,
"step": 20000
},
{
"epoch": 0.18,
"learning_rate": 0.00024602516144809657,
"loss": 2.962,
"step": 20100
},
{
"epoch": 0.18,
"learning_rate": 0.00024575343954640554,
"loss": 2.9566,
"step": 20200
},
{
"epoch": 0.18,
"learning_rate": 0.00024548171764471457,
"loss": 2.964,
"step": 20300
},
{
"epoch": 0.18,
"learning_rate": 0.0002452099957430235,
"loss": 2.9573,
"step": 20400
},
{
"epoch": 0.19,
"learning_rate": 0.0002449409910603494,
"loss": 2.9621,
"step": 20500
},
{
"epoch": 0.19,
"learning_rate": 0.0002446692691586584,
"loss": 2.9568,
"step": 20600
},
{
"epoch": 0.19,
"learning_rate": 0.0002443975472569674,
"loss": 2.9643,
"step": 20700
},
{
"epoch": 0.19,
"learning_rate": 0.00024412582535527635,
"loss": 2.9614,
"step": 20800
},
{
"epoch": 0.19,
"learning_rate": 0.00024385410345358535,
"loss": 2.9546,
"step": 20900
},
{
"epoch": 0.19,
"learning_rate": 0.00024358238155189433,
"loss": 2.9607,
"step": 21000
},
{
"epoch": 0.19,
"eval_accuracy": 0.44158508073906716,
"eval_loss": 2.9684245586395264,
"eval_runtime": 43.092,
"eval_samples_per_second": 150.446,
"eval_steps_per_second": 2.529,
"step": 21000
},
{
"epoch": 0.19,
"learning_rate": 0.00024331065965020333,
"loss": 2.9608,
"step": 21100
},
{
"epoch": 0.19,
"learning_rate": 0.00024303893774851228,
"loss": 2.9556,
"step": 21200
},
{
"epoch": 0.19,
"learning_rate": 0.00024276721584682128,
"loss": 2.9579,
"step": 21300
},
{
"epoch": 0.19,
"learning_rate": 0.00024249549394513028,
"loss": 2.9585,
"step": 21400
},
{
"epoch": 0.19,
"learning_rate": 0.00024222377204343926,
"loss": 2.9544,
"step": 21500
},
{
"epoch": 0.2,
"learning_rate": 0.00024195205014174823,
"loss": 2.9614,
"step": 21600
},
{
"epoch": 0.2,
"learning_rate": 0.0002416803282400572,
"loss": 2.9536,
"step": 21700
},
{
"epoch": 0.2,
"learning_rate": 0.00024141132355738314,
"loss": 2.9556,
"step": 21800
},
{
"epoch": 0.2,
"learning_rate": 0.0002411396016556921,
"loss": 2.9559,
"step": 21900
},
{
"epoch": 0.2,
"learning_rate": 0.0002408678797540011,
"loss": 2.9533,
"step": 22000
},
{
"epoch": 0.2,
"eval_accuracy": 0.4422038937432138,
"eval_loss": 2.967719554901123,
"eval_runtime": 42.9224,
"eval_samples_per_second": 151.04,
"eval_steps_per_second": 2.539,
"step": 22000
},
{
"epoch": 0.2,
"learning_rate": 0.00024059615785231007,
"loss": 2.9493,
"step": 22100
},
{
"epoch": 0.2,
"learning_rate": 0.00024032443595061907,
"loss": 2.9543,
"step": 22200
},
{
"epoch": 0.2,
"learning_rate": 0.00024005271404892804,
"loss": 2.9565,
"step": 22300
},
{
"epoch": 0.2,
"learning_rate": 0.00023978099214723702,
"loss": 2.9501,
"step": 22400
},
{
"epoch": 0.2,
"learning_rate": 0.00023950927024554602,
"loss": 2.9395,
"step": 22500
},
{
"epoch": 0.2,
"learning_rate": 0.00023923754834385497,
"loss": 2.9598,
"step": 22600
},
{
"epoch": 0.21,
"learning_rate": 0.00023896854366118087,
"loss": 2.9492,
"step": 22700
},
{
"epoch": 0.21,
"learning_rate": 0.00023869682175948988,
"loss": 2.947,
"step": 22800
},
{
"epoch": 0.21,
"learning_rate": 0.00023842509985779885,
"loss": 2.9573,
"step": 22900
},
{
"epoch": 0.21,
"learning_rate": 0.00023815337795610783,
"loss": 2.9513,
"step": 23000
},
{
"epoch": 0.21,
"eval_accuracy": 0.4420853332849413,
"eval_loss": 2.9676427841186523,
"eval_runtime": 43.0836,
"eval_samples_per_second": 150.475,
"eval_steps_per_second": 2.53,
"step": 23000
},
{
"epoch": 0.21,
"learning_rate": 0.00023788165605441683,
"loss": 2.9472,
"step": 23100
},
{
"epoch": 0.21,
"learning_rate": 0.00023760993415272578,
"loss": 2.9513,
"step": 23200
},
{
"epoch": 0.21,
"learning_rate": 0.00023733821225103478,
"loss": 2.9542,
"step": 23300
},
{
"epoch": 0.21,
"learning_rate": 0.00023706649034934378,
"loss": 2.9497,
"step": 23400
},
{
"epoch": 0.21,
"learning_rate": 0.00023679476844765276,
"loss": 2.9565,
"step": 23500
},
{
"epoch": 0.21,
"learning_rate": 0.00023652576376497863,
"loss": 2.9518,
"step": 23600
},
{
"epoch": 0.21,
"learning_rate": 0.00023625404186328764,
"loss": 2.9471,
"step": 23700
},
{
"epoch": 0.22,
"learning_rate": 0.00023598231996159664,
"loss": 2.956,
"step": 23800
},
{
"epoch": 0.22,
"learning_rate": 0.00023571059805990559,
"loss": 2.953,
"step": 23900
},
{
"epoch": 0.22,
"learning_rate": 0.0002354388761582146,
"loss": 2.9563,
"step": 24000
},
{
"epoch": 0.22,
"eval_accuracy": 0.4428523468619285,
"eval_loss": 2.9609880447387695,
"eval_runtime": 42.9764,
"eval_samples_per_second": 150.85,
"eval_steps_per_second": 2.536,
"step": 24000
},
{
"epoch": 0.22,
"learning_rate": 0.00023516715425652356,
"loss": 2.9458,
"step": 24100
},
{
"epoch": 0.22,
"learning_rate": 0.00023489543235483257,
"loss": 2.9539,
"step": 24200
},
{
"epoch": 0.22,
"learning_rate": 0.00023462371045314151,
"loss": 2.9549,
"step": 24300
},
{
"epoch": 0.22,
"learning_rate": 0.00023435198855145052,
"loss": 2.9496,
"step": 24400
},
{
"epoch": 0.22,
"learning_rate": 0.00023408026664975952,
"loss": 2.9514,
"step": 24500
},
{
"epoch": 0.22,
"learning_rate": 0.0002338085447480685,
"loss": 2.9471,
"step": 24600
},
{
"epoch": 0.22,
"learning_rate": 0.00023353682284637747,
"loss": 2.9448,
"step": 24700
},
{
"epoch": 0.22,
"learning_rate": 0.00023326510094468644,
"loss": 2.948,
"step": 24800
},
{
"epoch": 0.23,
"learning_rate": 0.00023299337904299545,
"loss": 2.9454,
"step": 24900
},
{
"epoch": 0.23,
"learning_rate": 0.00023272165714130445,
"loss": 2.9466,
"step": 25000
},
{
"epoch": 0.23,
"eval_accuracy": 0.44241621374756906,
"eval_loss": 2.9626522064208984,
"eval_runtime": 43.5013,
"eval_samples_per_second": 149.03,
"eval_steps_per_second": 2.506,
"step": 25000
},
{
"epoch": 0.23,
"learning_rate": 0.0002324499352396134,
"loss": 2.9417,
"step": 25100
},
{
"epoch": 0.23,
"learning_rate": 0.0002321782133379224,
"loss": 2.9452,
"step": 25200
},
{
"epoch": 0.23,
"learning_rate": 0.00023190649143623138,
"loss": 2.9406,
"step": 25300
},
{
"epoch": 0.23,
"learning_rate": 0.00023163476953454038,
"loss": 2.945,
"step": 25400
},
{
"epoch": 0.23,
"learning_rate": 0.00023136304763284933,
"loss": 2.9419,
"step": 25500
},
{
"epoch": 0.23,
"learning_rate": 0.00023109132573115833,
"loss": 2.9452,
"step": 25600
},
{
"epoch": 0.23,
"learning_rate": 0.00023081960382946733,
"loss": 2.9435,
"step": 25700
},
{
"epoch": 0.23,
"learning_rate": 0.00023054788192777628,
"loss": 2.947,
"step": 25800
},
{
"epoch": 0.23,
"learning_rate": 0.00023027616002608528,
"loss": 2.9343,
"step": 25900
},
{
"epoch": 0.24,
"learning_rate": 0.00023000443812439426,
"loss": 2.9431,
"step": 26000
},
{
"epoch": 0.24,
"eval_accuracy": 0.442384758932109,
"eval_loss": 2.9589717388153076,
"eval_runtime": 43.1206,
"eval_samples_per_second": 150.346,
"eval_steps_per_second": 2.528,
"step": 26000
},
{
"epoch": 0.24,
"learning_rate": 0.00022973271622270326,
"loss": 2.9431,
"step": 26100
},
{
"epoch": 0.24,
"learning_rate": 0.00022946099432101226,
"loss": 2.9477,
"step": 26200
},
{
"epoch": 0.24,
"learning_rate": 0.0002291892724193212,
"loss": 2.939,
"step": 26300
},
{
"epoch": 0.24,
"learning_rate": 0.0002289175505176302,
"loss": 2.9385,
"step": 26400
},
{
"epoch": 0.24,
"learning_rate": 0.0002286458286159392,
"loss": 2.944,
"step": 26500
},
{
"epoch": 0.24,
"learning_rate": 0.00022837410671424816,
"loss": 2.9404,
"step": 26600
},
{
"epoch": 0.24,
"learning_rate": 0.00022810238481255716,
"loss": 2.9334,
"step": 26700
},
{
"epoch": 0.24,
"learning_rate": 0.00022783066291086614,
"loss": 2.9419,
"step": 26800
},
{
"epoch": 0.24,
"learning_rate": 0.00022755894100917514,
"loss": 2.9432,
"step": 26900
},
{
"epoch": 0.24,
"learning_rate": 0.00022728993632650102,
"loss": 2.9412,
"step": 27000
},
{
"epoch": 0.24,
"eval_accuracy": 0.4435655243124552,
"eval_loss": 2.952514410018921,
"eval_runtime": 43.0804,
"eval_samples_per_second": 150.486,
"eval_steps_per_second": 2.53,
"step": 27000
},
{
"epoch": 0.25,
"learning_rate": 0.00022701821442481,
"loss": 2.9359,
"step": 27100
},
{
"epoch": 0.25,
"learning_rate": 0.000226746492523119,
"loss": 2.9426,
"step": 27200
},
{
"epoch": 0.25,
"learning_rate": 0.00022647477062142797,
"loss": 2.9307,
"step": 27300
},
{
"epoch": 0.25,
"learning_rate": 0.00022620304871973695,
"loss": 2.9353,
"step": 27400
},
{
"epoch": 0.25,
"learning_rate": 0.00022593132681804595,
"loss": 2.9353,
"step": 27500
},
{
"epoch": 0.25,
"learning_rate": 0.0002256596049163549,
"loss": 2.9403,
"step": 27600
},
{
"epoch": 0.25,
"learning_rate": 0.0002253878830146639,
"loss": 2.9393,
"step": 27700
},
{
"epoch": 0.25,
"learning_rate": 0.0002251161611129729,
"loss": 2.9313,
"step": 27800
},
{
"epoch": 0.25,
"learning_rate": 0.00022484443921128188,
"loss": 2.9348,
"step": 27900
},
{
"epoch": 0.25,
"learning_rate": 0.00022457543452860775,
"loss": 2.9299,
"step": 28000
},
{
"epoch": 0.25,
"eval_accuracy": 0.4434947509776701,
"eval_loss": 2.9504144191741943,
"eval_runtime": 43.7459,
"eval_samples_per_second": 148.197,
"eval_steps_per_second": 2.492,
"step": 28000
},
{
"epoch": 0.25,
"learning_rate": 0.00022430371262691676,
"loss": 2.938,
"step": 28100
},
{
"epoch": 0.25,
"learning_rate": 0.00022403199072522576,
"loss": 2.9353,
"step": 28200
},
{
"epoch": 0.26,
"learning_rate": 0.00022376298604255164,
"loss": 2.9329,
"step": 28300
},
{
"epoch": 0.26,
"learning_rate": 0.0002234912641408606,
"loss": 2.9311,
"step": 28400
},
{
"epoch": 0.26,
"learning_rate": 0.0002232195422391696,
"loss": 2.9377,
"step": 28500
},
{
"epoch": 0.26,
"learning_rate": 0.00022294782033747856,
"loss": 2.9303,
"step": 28600
},
{
"epoch": 0.26,
"learning_rate": 0.00022267609843578756,
"loss": 2.9278,
"step": 28700
},
{
"epoch": 0.26,
"learning_rate": 0.00022240437653409657,
"loss": 2.9394,
"step": 28800
},
{
"epoch": 0.26,
"learning_rate": 0.00022213265463240551,
"loss": 2.9332,
"step": 28900
},
{
"epoch": 0.26,
"learning_rate": 0.00022186093273071452,
"loss": 2.9332,
"step": 29000
},
{
"epoch": 0.26,
"eval_accuracy": 0.4434814431711293,
"eval_loss": 2.9485716819763184,
"eval_runtime": 41.8653,
"eval_samples_per_second": 154.854,
"eval_steps_per_second": 2.604,
"step": 29000
},
{
"epoch": 0.26,
"learning_rate": 0.0002215892108290235,
"loss": 2.9339,
"step": 29100
},
{
"epoch": 0.26,
"learning_rate": 0.0002213174889273325,
"loss": 2.9322,
"step": 29200
},
{
"epoch": 0.26,
"learning_rate": 0.0002210457670256415,
"loss": 2.9305,
"step": 29300
},
{
"epoch": 0.27,
"learning_rate": 0.00022077404512395044,
"loss": 2.9321,
"step": 29400
},
{
"epoch": 0.27,
"learning_rate": 0.00022050232322225945,
"loss": 2.9265,
"step": 29500
},
{
"epoch": 0.27,
"learning_rate": 0.00022023331853958532,
"loss": 2.9247,
"step": 29600
},
{
"epoch": 0.27,
"learning_rate": 0.00021996159663789433,
"loss": 2.9312,
"step": 29700
},
{
"epoch": 0.27,
"learning_rate": 0.0002196898747362033,
"loss": 2.9288,
"step": 29800
},
{
"epoch": 0.27,
"learning_rate": 0.0002194181528345123,
"loss": 2.9328,
"step": 29900
},
{
"epoch": 0.27,
"learning_rate": 0.00021914643093282125,
"loss": 2.9255,
"step": 30000
},
{
"epoch": 0.27,
"eval_accuracy": 0.444235753841873,
"eval_loss": 2.942479372024536,
"eval_runtime": 41.7184,
"eval_samples_per_second": 155.399,
"eval_steps_per_second": 2.613,
"step": 30000
},
{
"epoch": 0.27,
"learning_rate": 0.00021887470903113025,
"loss": 2.9265,
"step": 30100
},
{
"epoch": 0.27,
"learning_rate": 0.00021860298712943923,
"loss": 2.9184,
"step": 30200
},
{
"epoch": 0.27,
"learning_rate": 0.00021833126522774823,
"loss": 2.9271,
"step": 30300
},
{
"epoch": 0.27,
"learning_rate": 0.0002180622605450741,
"loss": 2.9232,
"step": 30400
},
{
"epoch": 0.28,
"learning_rate": 0.0002177905386433831,
"loss": 2.9303,
"step": 30500
},
{
"epoch": 0.28,
"learning_rate": 0.00021751881674169206,
"loss": 2.9348,
"step": 30600
},
{
"epoch": 0.28,
"learning_rate": 0.00021724709484000106,
"loss": 2.9218,
"step": 30700
},
{
"epoch": 0.28,
"learning_rate": 0.00021697537293831006,
"loss": 2.9324,
"step": 30800
},
{
"epoch": 0.28,
"learning_rate": 0.00021670365103661904,
"loss": 2.9294,
"step": 30900
},
{
"epoch": 0.28,
"learning_rate": 0.00021643192913492801,
"loss": 2.9242,
"step": 31000
},
{
"epoch": 0.28,
"eval_accuracy": 0.44344756875448005,
"eval_loss": 2.945934534072876,
"eval_runtime": 43.5276,
"eval_samples_per_second": 148.94,
"eval_steps_per_second": 2.504,
"step": 31000
},
{
"epoch": 0.28,
"learning_rate": 0.000216160207233237,
"loss": 2.9231,
"step": 31100
},
{
"epoch": 0.28,
"learning_rate": 0.000215888485331546,
"loss": 2.9269,
"step": 31200
},
{
"epoch": 0.28,
"learning_rate": 0.000215616763429855,
"loss": 2.9247,
"step": 31300
},
{
"epoch": 0.28,
"learning_rate": 0.00021534504152816394,
"loss": 2.9236,
"step": 31400
},
{
"epoch": 0.28,
"learning_rate": 0.00021507331962647295,
"loss": 2.9296,
"step": 31500
},
{
"epoch": 0.29,
"learning_rate": 0.00021480159772478192,
"loss": 2.9267,
"step": 31600
},
{
"epoch": 0.29,
"learning_rate": 0.00021452987582309092,
"loss": 2.9259,
"step": 31700
},
{
"epoch": 0.29,
"learning_rate": 0.00021425815392139987,
"loss": 2.9259,
"step": 31800
},
{
"epoch": 0.29,
"learning_rate": 0.00021398643201970887,
"loss": 2.9236,
"step": 31900
},
{
"epoch": 0.29,
"learning_rate": 0.00021371471011801788,
"loss": 2.9242,
"step": 32000
},
{
"epoch": 0.29,
"eval_accuracy": 0.4445194520813107,
"eval_loss": 2.9377670288085938,
"eval_runtime": 43.8729,
"eval_samples_per_second": 147.768,
"eval_steps_per_second": 2.484,
"step": 32000
},
{
"epoch": 0.29,
"learning_rate": 0.00021344298821632685,
"loss": 2.9178,
"step": 32100
},
{
"epoch": 0.29,
"learning_rate": 0.00021317126631463583,
"loss": 2.9257,
"step": 32200
},
{
"epoch": 0.29,
"learning_rate": 0.0002128995444129448,
"loss": 2.9227,
"step": 32300
},
{
"epoch": 0.29,
"learning_rate": 0.0002126278225112538,
"loss": 2.9228,
"step": 32400
},
{
"epoch": 0.29,
"learning_rate": 0.0002123561006095628,
"loss": 2.9183,
"step": 32500
},
{
"epoch": 0.29,
"learning_rate": 0.00021208437870787175,
"loss": 2.9196,
"step": 32600
},
{
"epoch": 0.3,
"learning_rate": 0.00021181265680618076,
"loss": 2.9143,
"step": 32700
},
{
"epoch": 0.3,
"learning_rate": 0.00021154093490448973,
"loss": 2.9192,
"step": 32800
},
{
"epoch": 0.3,
"learning_rate": 0.0002112692130027987,
"loss": 2.9187,
"step": 32900
},
{
"epoch": 0.3,
"learning_rate": 0.00021099749110110768,
"loss": 2.9267,
"step": 33000
},
{
"epoch": 0.3,
"eval_accuracy": 0.4452544059425405,
"eval_loss": 2.9316306114196777,
"eval_runtime": 45.514,
"eval_samples_per_second": 142.44,
"eval_steps_per_second": 2.395,
"step": 33000
},
{
"epoch": 0.3,
"learning_rate": 0.00021072576919941669,
"loss": 2.9169,
"step": 33100
},
{
"epoch": 0.3,
"learning_rate": 0.0002104540472977257,
"loss": 2.9219,
"step": 33200
},
{
"epoch": 0.3,
"learning_rate": 0.00021018232539603464,
"loss": 2.9096,
"step": 33300
},
{
"epoch": 0.3,
"learning_rate": 0.00020991060349434364,
"loss": 2.9202,
"step": 33400
},
{
"epoch": 0.3,
"learning_rate": 0.0002096388815926526,
"loss": 2.9241,
"step": 33500
},
{
"epoch": 0.3,
"learning_rate": 0.00020936987690997852,
"loss": 2.9148,
"step": 33600
},
{
"epoch": 0.3,
"learning_rate": 0.0002090981550082875,
"loss": 2.9194,
"step": 33700
},
{
"epoch": 0.31,
"learning_rate": 0.0002088264331065965,
"loss": 2.9267,
"step": 33800
},
{
"epoch": 0.31,
"learning_rate": 0.00020855471120490544,
"loss": 2.9164,
"step": 33900
},
{
"epoch": 0.31,
"learning_rate": 0.00020828298930321445,
"loss": 2.9151,
"step": 34000
},
{
"epoch": 0.31,
"eval_accuracy": 0.44544252993500344,
"eval_loss": 2.931532382965088,
"eval_runtime": 43.496,
"eval_samples_per_second": 149.048,
"eval_steps_per_second": 2.506,
"step": 34000
},
{
"epoch": 0.31,
"learning_rate": 0.00020801126740152345,
"loss": 2.9178,
"step": 34100
},
{
"epoch": 0.31,
"learning_rate": 0.00020773954549983242,
"loss": 2.9119,
"step": 34200
},
{
"epoch": 0.31,
"learning_rate": 0.00020746782359814143,
"loss": 2.9143,
"step": 34300
},
{
"epoch": 0.31,
"learning_rate": 0.00020719610169645037,
"loss": 2.9084,
"step": 34400
},
{
"epoch": 0.31,
"learning_rate": 0.00020692437979475938,
"loss": 2.9227,
"step": 34500
},
{
"epoch": 0.31,
"learning_rate": 0.00020665265789306835,
"loss": 2.9159,
"step": 34600
},
{
"epoch": 0.31,
"learning_rate": 0.00020638365321039425,
"loss": 2.9151,
"step": 34700
},
{
"epoch": 0.31,
"learning_rate": 0.00020611193130870323,
"loss": 2.9218,
"step": 34800
},
{
"epoch": 0.32,
"learning_rate": 0.00020584020940701223,
"loss": 2.9169,
"step": 34900
},
{
"epoch": 0.32,
"learning_rate": 0.00020556848750532118,
"loss": 2.9105,
"step": 35000
},
{
"epoch": 0.32,
"eval_accuracy": 0.4455647197950598,
"eval_loss": 2.928622245788574,
"eval_runtime": 45.1155,
"eval_samples_per_second": 143.698,
"eval_steps_per_second": 2.416,
"step": 35000
},
{
"epoch": 0.32,
"learning_rate": 0.00020529676560363018,
"loss": 2.9135,
"step": 35100
},
{
"epoch": 0.32,
"learning_rate": 0.00020502504370193919,
"loss": 2.9099,
"step": 35200
},
{
"epoch": 0.32,
"learning_rate": 0.00020475332180024816,
"loss": 2.9114,
"step": 35300
},
{
"epoch": 0.32,
"learning_rate": 0.00020448159989855714,
"loss": 2.9169,
"step": 35400
},
{
"epoch": 0.32,
"learning_rate": 0.0002042098779968661,
"loss": 2.9098,
"step": 35500
},
{
"epoch": 0.32,
"learning_rate": 0.00020393815609517511,
"loss": 2.9126,
"step": 35600
},
{
"epoch": 0.32,
"learning_rate": 0.00020366643419348412,
"loss": 2.9095,
"step": 35700
},
{
"epoch": 0.32,
"learning_rate": 0.00020339471229179306,
"loss": 2.9086,
"step": 35800
},
{
"epoch": 0.32,
"learning_rate": 0.00020312299039010207,
"loss": 2.9077,
"step": 35900
},
{
"epoch": 0.33,
"learning_rate": 0.00020285126848841104,
"loss": 2.9053,
"step": 36000
},
{
"epoch": 0.33,
"eval_accuracy": 0.4457353016789008,
"eval_loss": 2.924194097518921,
"eval_runtime": 41.9708,
"eval_samples_per_second": 154.464,
"eval_steps_per_second": 2.597,
"step": 36000
},
{
"epoch": 0.33,
"learning_rate": 0.00020257954658672004,
"loss": 2.9099,
"step": 36100
},
{
"epoch": 0.33,
"learning_rate": 0.000202307824685029,
"loss": 2.9118,
"step": 36200
},
{
"epoch": 0.33,
"learning_rate": 0.000202036102783338,
"loss": 2.91,
"step": 36300
},
{
"epoch": 0.33,
"learning_rate": 0.000201764380881647,
"loss": 2.8983,
"step": 36400
},
{
"epoch": 0.33,
"learning_rate": 0.00020149265897995595,
"loss": 2.8964,
"step": 36500
},
{
"epoch": 0.33,
"learning_rate": 0.00020122093707826495,
"loss": 2.9024,
"step": 36600
},
{
"epoch": 0.33,
"learning_rate": 0.00020095193239559085,
"loss": 2.9057,
"step": 36700
},
{
"epoch": 0.33,
"learning_rate": 0.00020068021049389983,
"loss": 2.9094,
"step": 36800
},
{
"epoch": 0.33,
"learning_rate": 0.0002004084885922088,
"loss": 2.9071,
"step": 36900
},
{
"epoch": 0.33,
"learning_rate": 0.0002001367666905178,
"loss": 2.9023,
"step": 37000
},
{
"epoch": 0.33,
"eval_accuracy": 0.44664325702516083,
"eval_loss": 2.9194602966308594,
"eval_runtime": 42.9573,
"eval_samples_per_second": 150.917,
"eval_steps_per_second": 2.537,
"step": 37000
},
{
"epoch": 0.34,
"learning_rate": 0.00019986504478882678,
"loss": 2.9047,
"step": 37100
},
{
"epoch": 0.34,
"learning_rate": 0.00019959332288713575,
"loss": 2.9097,
"step": 37200
},
{
"epoch": 0.34,
"learning_rate": 0.00019932160098544476,
"loss": 2.908,
"step": 37300
},
{
"epoch": 0.34,
"learning_rate": 0.00019905259630277066,
"loss": 2.9019,
"step": 37400
},
{
"epoch": 0.34,
"learning_rate": 0.0001987808744010796,
"loss": 2.9105,
"step": 37500
},
{
"epoch": 0.34,
"learning_rate": 0.0001985091524993886,
"loss": 2.9064,
"step": 37600
},
{
"epoch": 0.34,
"learning_rate": 0.0001982401478167145,
"loss": 2.9053,
"step": 37700
},
{
"epoch": 0.34,
"learning_rate": 0.0001979684259150235,
"loss": 2.906,
"step": 37800
},
{
"epoch": 0.34,
"learning_rate": 0.00019769670401333247,
"loss": 2.8997,
"step": 37900
},
{
"epoch": 0.34,
"learning_rate": 0.00019742498211164147,
"loss": 2.8946,
"step": 38000
},
{
"epoch": 0.34,
"eval_accuracy": 0.4468059752051368,
"eval_loss": 2.917731285095215,
"eval_runtime": 43.2928,
"eval_samples_per_second": 149.748,
"eval_steps_per_second": 2.518,
"step": 38000
},
{
"epoch": 0.34,
"learning_rate": 0.00019715326020995042,
"loss": 2.9018,
"step": 38100
},
{
"epoch": 0.35,
"learning_rate": 0.00019688153830825942,
"loss": 2.8969,
"step": 38200
},
{
"epoch": 0.35,
"learning_rate": 0.00019660981640656842,
"loss": 2.9104,
"step": 38300
},
{
"epoch": 0.35,
"learning_rate": 0.0001963380945048774,
"loss": 2.9057,
"step": 38400
},
{
"epoch": 0.35,
"learning_rate": 0.00019606637260318637,
"loss": 2.9094,
"step": 38500
},
{
"epoch": 0.35,
"learning_rate": 0.00019579465070149535,
"loss": 2.9008,
"step": 38600
},
{
"epoch": 0.35,
"learning_rate": 0.00019552292879980435,
"loss": 2.8998,
"step": 38700
},
{
"epoch": 0.35,
"learning_rate": 0.00019525120689811335,
"loss": 2.9019,
"step": 38800
},
{
"epoch": 0.35,
"learning_rate": 0.0001949794849964223,
"loss": 2.8925,
"step": 38900
},
{
"epoch": 0.35,
"learning_rate": 0.0001947077630947313,
"loss": 2.9037,
"step": 39000
},
{
"epoch": 0.35,
"eval_accuracy": 0.44703039321543825,
"eval_loss": 2.9147427082061768,
"eval_runtime": 43.7223,
"eval_samples_per_second": 148.277,
"eval_steps_per_second": 2.493,
"step": 39000
},
{
"epoch": 0.35,
"learning_rate": 0.00019443604119304028,
"loss": 2.9052,
"step": 39100
},
{
"epoch": 0.35,
"learning_rate": 0.00019416431929134928,
"loss": 2.9038,
"step": 39200
},
{
"epoch": 0.36,
"learning_rate": 0.00019389259738965823,
"loss": 2.9046,
"step": 39300
},
{
"epoch": 0.36,
"learning_rate": 0.00019362087548796723,
"loss": 2.903,
"step": 39400
},
{
"epoch": 0.36,
"learning_rate": 0.00019334915358627623,
"loss": 2.8919,
"step": 39500
},
{
"epoch": 0.36,
"learning_rate": 0.00019307743168458518,
"loss": 2.8936,
"step": 39600
},
{
"epoch": 0.36,
"learning_rate": 0.00019280570978289418,
"loss": 2.8985,
"step": 39700
},
{
"epoch": 0.36,
"learning_rate": 0.00019253398788120316,
"loss": 2.8955,
"step": 39800
},
{
"epoch": 0.36,
"learning_rate": 0.00019226226597951216,
"loss": 2.8943,
"step": 39900
},
{
"epoch": 0.36,
"learning_rate": 0.00019199326129683804,
"loss": 2.8893,
"step": 40000
},
{
"epoch": 0.36,
"eval_accuracy": 0.44681383890900184,
"eval_loss": 2.9129724502563477,
"eval_runtime": 42.9613,
"eval_samples_per_second": 150.903,
"eval_steps_per_second": 2.537,
"step": 40000
},
{
"epoch": 0.36,
"learning_rate": 0.00019172153939514704,
"loss": 2.8923,
"step": 40100
},
{
"epoch": 0.36,
"learning_rate": 0.00019144981749345602,
"loss": 2.8998,
"step": 40200
},
{
"epoch": 0.36,
"learning_rate": 0.000191178095591765,
"loss": 2.8931,
"step": 40300
},
{
"epoch": 0.37,
"learning_rate": 0.00019090637369007397,
"loss": 2.8965,
"step": 40400
},
{
"epoch": 0.37,
"learning_rate": 0.00019063465178838297,
"loss": 2.8992,
"step": 40500
},
{
"epoch": 0.37,
"learning_rate": 0.00019036292988669197,
"loss": 2.8974,
"step": 40600
},
{
"epoch": 0.37,
"learning_rate": 0.00019009120798500092,
"loss": 2.8929,
"step": 40700
},
{
"epoch": 0.37,
"learning_rate": 0.00018981948608330992,
"loss": 2.8919,
"step": 40800
},
{
"epoch": 0.37,
"learning_rate": 0.0001895477641816189,
"loss": 2.8907,
"step": 40900
},
{
"epoch": 0.37,
"learning_rate": 0.0001892760422799279,
"loss": 2.8891,
"step": 41000
},
{
"epoch": 0.37,
"eval_accuracy": 0.4481204235511882,
"eval_loss": 2.9055044651031494,
"eval_runtime": 43.4382,
"eval_samples_per_second": 149.246,
"eval_steps_per_second": 2.509,
"step": 41000
},
{
"epoch": 0.37,
"learning_rate": 0.00018900432037823687,
"loss": 2.8892,
"step": 41100
},
{
"epoch": 0.37,
"learning_rate": 0.00018873259847654585,
"loss": 2.8979,
"step": 41200
},
{
"epoch": 0.37,
"learning_rate": 0.00018846087657485485,
"loss": 2.8864,
"step": 41300
},
{
"epoch": 0.37,
"learning_rate": 0.0001881891546731638,
"loss": 2.8905,
"step": 41400
},
{
"epoch": 0.38,
"learning_rate": 0.0001879174327714728,
"loss": 2.8849,
"step": 41500
},
{
"epoch": 0.38,
"learning_rate": 0.0001876457108697818,
"loss": 2.8959,
"step": 41600
},
{
"epoch": 0.38,
"learning_rate": 0.00018737398896809078,
"loss": 2.8923,
"step": 41700
},
{
"epoch": 0.38,
"learning_rate": 0.00018710226706639978,
"loss": 2.8878,
"step": 41800
},
{
"epoch": 0.38,
"learning_rate": 0.00018683326238372566,
"loss": 2.8848,
"step": 41900
},
{
"epoch": 0.38,
"learning_rate": 0.00018656154048203463,
"loss": 2.8851,
"step": 42000
},
{
"epoch": 0.38,
"eval_accuracy": 0.4484996960376006,
"eval_loss": 2.90169358253479,
"eval_runtime": 44.5924,
"eval_samples_per_second": 145.384,
"eval_steps_per_second": 2.444,
"step": 42000
},
{
"epoch": 0.38,
"learning_rate": 0.0001862898185803436,
"loss": 2.8892,
"step": 42100
},
{
"epoch": 0.38,
"learning_rate": 0.0001860180966786526,
"loss": 2.8835,
"step": 42200
},
{
"epoch": 0.38,
"learning_rate": 0.0001857463747769616,
"loss": 2.8868,
"step": 42300
},
{
"epoch": 0.38,
"learning_rate": 0.0001854746528752706,
"loss": 2.89,
"step": 42400
},
{
"epoch": 0.38,
"learning_rate": 0.00018520293097357954,
"loss": 2.8903,
"step": 42500
},
{
"epoch": 0.39,
"learning_rate": 0.00018493120907188854,
"loss": 2.8868,
"step": 42600
},
{
"epoch": 0.39,
"learning_rate": 0.00018466220438921442,
"loss": 2.8882,
"step": 42700
},
{
"epoch": 0.39,
"learning_rate": 0.00018439048248752342,
"loss": 2.8788,
"step": 42800
},
{
"epoch": 0.39,
"learning_rate": 0.0001841187605858324,
"loss": 2.8884,
"step": 42900
},
{
"epoch": 0.39,
"learning_rate": 0.0001838470386841414,
"loss": 2.8909,
"step": 43000
},
{
"epoch": 0.39,
"eval_accuracy": 0.44834423666119233,
"eval_loss": 2.9010777473449707,
"eval_runtime": 43.3319,
"eval_samples_per_second": 149.613,
"eval_steps_per_second": 2.515,
"step": 43000
},
{
"epoch": 0.39,
"learning_rate": 0.0001835753167824504,
"loss": 2.8868,
"step": 43100
},
{
"epoch": 0.39,
"learning_rate": 0.00018330359488075935,
"loss": 2.8935,
"step": 43200
},
{
"epoch": 0.39,
"learning_rate": 0.00018303187297906835,
"loss": 2.883,
"step": 43300
},
{
"epoch": 0.39,
"learning_rate": 0.00018276015107737733,
"loss": 2.8895,
"step": 43400
},
{
"epoch": 0.39,
"learning_rate": 0.0001824911463947032,
"loss": 2.8958,
"step": 43500
},
{
"epoch": 0.39,
"learning_rate": 0.0001822194244930122,
"loss": 2.8916,
"step": 43600
},
{
"epoch": 0.4,
"learning_rate": 0.0001819477025913212,
"loss": 2.8949,
"step": 43700
},
{
"epoch": 0.4,
"learning_rate": 0.00018167869790864708,
"loss": 2.8898,
"step": 43800
},
{
"epoch": 0.4,
"learning_rate": 0.00018140697600695606,
"loss": 2.8887,
"step": 43900
},
{
"epoch": 0.4,
"learning_rate": 0.00018113525410526506,
"loss": 2.896,
"step": 44000
},
{
"epoch": 0.4,
"eval_accuracy": 0.4478663654263186,
"eval_loss": 2.9061102867126465,
"eval_runtime": 43.1173,
"eval_samples_per_second": 150.357,
"eval_steps_per_second": 2.528,
"step": 44000
},
{
"epoch": 0.4,
"learning_rate": 0.00018086353220357404,
"loss": 2.8965,
"step": 44100
},
{
"epoch": 0.4,
"learning_rate": 0.000180591810301883,
"loss": 2.8969,
"step": 44200
},
{
"epoch": 0.4,
"learning_rate": 0.00018032008840019201,
"loss": 2.8913,
"step": 44300
},
{
"epoch": 0.4,
"learning_rate": 0.00018004836649850096,
"loss": 2.8897,
"step": 44400
},
{
"epoch": 0.4,
"learning_rate": 0.00017977664459680996,
"loss": 2.8952,
"step": 44500
},
{
"epoch": 0.4,
"learning_rate": 0.00017950492269511897,
"loss": 2.9008,
"step": 44600
},
{
"epoch": 0.4,
"learning_rate": 0.00017923320079342794,
"loss": 2.8884,
"step": 44700
},
{
"epoch": 0.41,
"learning_rate": 0.00017896147889173694,
"loss": 2.8971,
"step": 44800
},
{
"epoch": 0.41,
"learning_rate": 0.0001786897569900459,
"loss": 2.8824,
"step": 44900
},
{
"epoch": 0.41,
"learning_rate": 0.0001784180350883549,
"loss": 2.8918,
"step": 45000
},
{
"epoch": 0.41,
"eval_accuracy": 0.44788874673731904,
"eval_loss": 2.90425443649292,
"eval_runtime": 45.928,
"eval_samples_per_second": 141.156,
"eval_steps_per_second": 2.373,
"step": 45000
},
{
"epoch": 0.41,
"learning_rate": 0.00017814631318666387,
"loss": 2.886,
"step": 45100
},
{
"epoch": 0.41,
"learning_rate": 0.00017787459128497285,
"loss": 2.8935,
"step": 45200
},
{
"epoch": 0.41,
"learning_rate": 0.00017760286938328185,
"loss": 2.8851,
"step": 45300
},
{
"epoch": 0.41,
"learning_rate": 0.00017733114748159082,
"loss": 2.8869,
"step": 45400
},
{
"epoch": 0.41,
"learning_rate": 0.00017705942557989983,
"loss": 2.8816,
"step": 45500
},
{
"epoch": 0.41,
"learning_rate": 0.00017678770367820877,
"loss": 2.8726,
"step": 45600
},
{
"epoch": 0.41,
"learning_rate": 0.00017651598177651778,
"loss": 2.8815,
"step": 45700
},
{
"epoch": 0.41,
"learning_rate": 0.00017624425987482678,
"loss": 2.8835,
"step": 45800
},
{
"epoch": 0.41,
"learning_rate": 0.00017597253797313575,
"loss": 2.8814,
"step": 45900
},
{
"epoch": 0.42,
"learning_rate": 0.00017570081607144473,
"loss": 2.8847,
"step": 46000
},
{
"epoch": 0.42,
"eval_accuracy": 0.4490059975864478,
"eval_loss": 2.89544415473938,
"eval_runtime": 42.9804,
"eval_samples_per_second": 150.836,
"eval_steps_per_second": 2.536,
"step": 46000
},
{
"epoch": 0.42,
"learning_rate": 0.0001754290941697537,
"loss": 2.8699,
"step": 46100
},
{
"epoch": 0.42,
"learning_rate": 0.0001751573722680627,
"loss": 2.8829,
"step": 46200
},
{
"epoch": 0.42,
"learning_rate": 0.00017488565036637165,
"loss": 2.8773,
"step": 46300
},
{
"epoch": 0.42,
"learning_rate": 0.00017461392846468066,
"loss": 2.8812,
"step": 46400
},
{
"epoch": 0.42,
"learning_rate": 0.00017434220656298966,
"loss": 2.8805,
"step": 46500
},
{
"epoch": 0.42,
"learning_rate": 0.00017407048466129863,
"loss": 2.8812,
"step": 46600
},
{
"epoch": 0.42,
"learning_rate": 0.00017379876275960764,
"loss": 2.8826,
"step": 46700
},
{
"epoch": 0.42,
"learning_rate": 0.00017352704085791659,
"loss": 2.8801,
"step": 46800
},
{
"epoch": 0.42,
"learning_rate": 0.00017325803617524252,
"loss": 2.8787,
"step": 46900
},
{
"epoch": 0.42,
"learning_rate": 0.00017298631427355146,
"loss": 2.8749,
"step": 47000
},
{
"epoch": 0.42,
"eval_accuracy": 0.44940160238088755,
"eval_loss": 2.8912456035614014,
"eval_runtime": 43.8328,
"eval_samples_per_second": 147.903,
"eval_steps_per_second": 2.487,
"step": 47000
},
{
"epoch": 0.43,
"learning_rate": 0.00017271730959087737,
"loss": 2.8715,
"step": 47100
},
{
"epoch": 0.43,
"learning_rate": 0.00017244558768918637,
"loss": 2.8804,
"step": 47200
},
{
"epoch": 0.43,
"learning_rate": 0.00017217386578749535,
"loss": 2.8802,
"step": 47300
},
{
"epoch": 0.43,
"learning_rate": 0.00017190214388580432,
"loss": 2.8779,
"step": 47400
},
{
"epoch": 0.43,
"learning_rate": 0.00017163042198411332,
"loss": 2.878,
"step": 47500
},
{
"epoch": 0.43,
"learning_rate": 0.00017135870008242227,
"loss": 2.8835,
"step": 47600
},
{
"epoch": 0.43,
"learning_rate": 0.00017108697818073127,
"loss": 2.8758,
"step": 47700
},
{
"epoch": 0.43,
"learning_rate": 0.00017081525627904025,
"loss": 2.8751,
"step": 47800
},
{
"epoch": 0.43,
"learning_rate": 0.00017054353437734925,
"loss": 2.8737,
"step": 47900
},
{
"epoch": 0.43,
"learning_rate": 0.00017027181247565825,
"loss": 2.8832,
"step": 48000
},
{
"epoch": 0.43,
"eval_accuracy": 0.4496018243792967,
"eval_loss": 2.891221761703491,
"eval_runtime": 43.1479,
"eval_samples_per_second": 150.251,
"eval_steps_per_second": 2.526,
"step": 48000
},
{
"epoch": 0.43,
"learning_rate": 0.0001700000905739672,
"loss": 2.8757,
"step": 48100
},
{
"epoch": 0.44,
"learning_rate": 0.0001697283686722762,
"loss": 2.8725,
"step": 48200
},
{
"epoch": 0.44,
"learning_rate": 0.00016945664677058518,
"loss": 2.8749,
"step": 48300
},
{
"epoch": 0.44,
"learning_rate": 0.00016918492486889416,
"loss": 2.8747,
"step": 48400
},
{
"epoch": 0.44,
"learning_rate": 0.00016891320296720316,
"loss": 2.8724,
"step": 48500
},
{
"epoch": 0.44,
"learning_rate": 0.00016864148106551213,
"loss": 2.8717,
"step": 48600
},
{
"epoch": 0.44,
"learning_rate": 0.00016836975916382114,
"loss": 2.8653,
"step": 48700
},
{
"epoch": 0.44,
"learning_rate": 0.00016809803726213008,
"loss": 2.869,
"step": 48800
},
{
"epoch": 0.44,
"learning_rate": 0.00016782631536043909,
"loss": 2.8763,
"step": 48900
},
{
"epoch": 0.44,
"learning_rate": 0.0001675545934587481,
"loss": 2.8745,
"step": 49000
},
{
"epoch": 0.44,
"eval_accuracy": 0.45002646438800725,
"eval_loss": 2.8852970600128174,
"eval_runtime": 43.6365,
"eval_samples_per_second": 148.568,
"eval_steps_per_second": 2.498,
"step": 49000
},
{
"epoch": 0.44,
"learning_rate": 0.00016728287155705706,
"loss": 2.8753,
"step": 49100
},
{
"epoch": 0.44,
"learning_rate": 0.00016701114965536604,
"loss": 2.8684,
"step": 49200
},
{
"epoch": 0.45,
"learning_rate": 0.00016673942775367501,
"loss": 2.8711,
"step": 49300
},
{
"epoch": 0.45,
"learning_rate": 0.00016646770585198402,
"loss": 2.8646,
"step": 49400
},
{
"epoch": 0.45,
"learning_rate": 0.00016619598395029296,
"loss": 2.865,
"step": 49500
},
{
"epoch": 0.45,
"learning_rate": 0.0001659269792676189,
"loss": 2.8773,
"step": 49600
},
{
"epoch": 0.45,
"learning_rate": 0.00016565525736592787,
"loss": 2.8703,
"step": 49700
},
{
"epoch": 0.45,
"learning_rate": 0.00016538353546423687,
"loss": 2.8722,
"step": 49800
},
{
"epoch": 0.45,
"learning_rate": 0.00016511181356254582,
"loss": 2.8713,
"step": 49900
},
{
"epoch": 0.45,
"learning_rate": 0.00016484009166085482,
"loss": 2.8717,
"step": 50000
},
{
"epoch": 0.45,
"eval_accuracy": 0.45021942758284866,
"eval_loss": 2.8834283351898193,
"eval_runtime": 43.5477,
"eval_samples_per_second": 148.871,
"eval_steps_per_second": 2.503,
"step": 50000
},
{
"epoch": 0.45,
"learning_rate": 0.00016456836975916383,
"loss": 2.8727,
"step": 50100
},
{
"epoch": 0.45,
"learning_rate": 0.00016429664785747277,
"loss": 2.8622,
"step": 50200
},
{
"epoch": 0.45,
"learning_rate": 0.00016402492595578178,
"loss": 2.8707,
"step": 50300
},
{
"epoch": 0.46,
"learning_rate": 0.00016375320405409075,
"loss": 2.8645,
"step": 50400
},
{
"epoch": 0.46,
"learning_rate": 0.00016348148215239975,
"loss": 2.8642,
"step": 50500
},
{
"epoch": 0.46,
"learning_rate": 0.00016321247746972563,
"loss": 2.8679,
"step": 50600
},
{
"epoch": 0.46,
"learning_rate": 0.00016294075556803463,
"loss": 2.871,
"step": 50700
},
{
"epoch": 0.46,
"learning_rate": 0.0001626690336663436,
"loss": 2.867,
"step": 50800
},
{
"epoch": 0.46,
"learning_rate": 0.00016239731176465258,
"loss": 2.8643,
"step": 50900
},
{
"epoch": 0.46,
"learning_rate": 0.00016212558986296156,
"loss": 2.8659,
"step": 51000
},
{
"epoch": 0.46,
"eval_accuracy": 0.45029624992060685,
"eval_loss": 2.883072853088379,
"eval_runtime": 43.5545,
"eval_samples_per_second": 148.848,
"eval_steps_per_second": 2.503,
"step": 51000
},
{
"epoch": 0.46,
"learning_rate": 0.00016185386796127056,
"loss": 2.8694,
"step": 51100
},
{
"epoch": 0.46,
"learning_rate": 0.00016158214605957956,
"loss": 2.8671,
"step": 51200
},
{
"epoch": 0.46,
"learning_rate": 0.0001613104241578885,
"loss": 2.8624,
"step": 51300
},
{
"epoch": 0.46,
"learning_rate": 0.00016103870225619751,
"loss": 2.8665,
"step": 51400
},
{
"epoch": 0.47,
"learning_rate": 0.0001607669803545065,
"loss": 2.8613,
"step": 51500
},
{
"epoch": 0.47,
"learning_rate": 0.00016049525845281547,
"loss": 2.8637,
"step": 51600
},
{
"epoch": 0.47,
"learning_rate": 0.00016022353655112447,
"loss": 2.8662,
"step": 51700
},
{
"epoch": 0.47,
"learning_rate": 0.00015995181464943344,
"loss": 2.8652,
"step": 51800
},
{
"epoch": 0.47,
"learning_rate": 0.00015968009274774245,
"loss": 2.8673,
"step": 51900
},
{
"epoch": 0.47,
"learning_rate": 0.0001594083708460514,
"loss": 2.865,
"step": 52000
},
{
"epoch": 0.47,
"eval_accuracy": 0.450486793514259,
"eval_loss": 2.878352403640747,
"eval_runtime": 43.3417,
"eval_samples_per_second": 149.579,
"eval_steps_per_second": 2.515,
"step": 52000
},
{
"epoch": 0.47,
"learning_rate": 0.0001591366489443604,
"loss": 2.8688,
"step": 52100
},
{
"epoch": 0.47,
"learning_rate": 0.00015886492704266937,
"loss": 2.862,
"step": 52200
},
{
"epoch": 0.47,
"learning_rate": 0.00015859320514097837,
"loss": 2.8646,
"step": 52300
},
{
"epoch": 0.47,
"learning_rate": 0.00015832148323928735,
"loss": 2.8672,
"step": 52400
},
{
"epoch": 0.47,
"learning_rate": 0.00015804976133759632,
"loss": 2.8594,
"step": 52500
},
{
"epoch": 0.48,
"learning_rate": 0.00015777803943590533,
"loss": 2.8558,
"step": 52600
},
{
"epoch": 0.48,
"learning_rate": 0.0001575063175342143,
"loss": 2.8576,
"step": 52700
},
{
"epoch": 0.48,
"learning_rate": 0.0001572373128515402,
"loss": 2.8597,
"step": 52800
},
{
"epoch": 0.48,
"learning_rate": 0.00015696559094984918,
"loss": 2.8615,
"step": 52900
},
{
"epoch": 0.48,
"learning_rate": 0.00015669386904815818,
"loss": 2.8575,
"step": 53000
},
{
"epoch": 0.48,
"eval_accuracy": 0.45082372297985984,
"eval_loss": 2.8763039112091064,
"eval_runtime": 43.6525,
"eval_samples_per_second": 148.514,
"eval_steps_per_second": 2.497,
"step": 53000
},
{
"epoch": 0.48,
"learning_rate": 0.00015642214714646713,
"loss": 2.8673,
"step": 53100
},
{
"epoch": 0.48,
"learning_rate": 0.00015615042524477613,
"loss": 2.854,
"step": 53200
},
{
"epoch": 0.48,
"learning_rate": 0.00015587870334308514,
"loss": 2.8652,
"step": 53300
},
{
"epoch": 0.48,
"learning_rate": 0.000155609698660411,
"loss": 2.8596,
"step": 53400
},
{
"epoch": 0.48,
"learning_rate": 0.00015533797675872,
"loss": 2.8641,
"step": 53500
},
{
"epoch": 0.48,
"learning_rate": 0.000155066254857029,
"loss": 2.8595,
"step": 53600
},
{
"epoch": 0.49,
"learning_rate": 0.000154794532955338,
"loss": 2.8562,
"step": 53700
},
{
"epoch": 0.49,
"learning_rate": 0.00015452281105364694,
"loss": 2.8529,
"step": 53800
},
{
"epoch": 0.49,
"learning_rate": 0.00015425108915195594,
"loss": 2.8629,
"step": 53900
},
{
"epoch": 0.49,
"learning_rate": 0.00015397936725026492,
"loss": 2.8571,
"step": 54000
},
{
"epoch": 0.49,
"eval_accuracy": 0.4512689295986789,
"eval_loss": 2.874122142791748,
"eval_runtime": 43.0942,
"eval_samples_per_second": 150.438,
"eval_steps_per_second": 2.529,
"step": 54000
},
{
"epoch": 0.49,
"learning_rate": 0.0001537076453485739,
"loss": 2.8605,
"step": 54100
},
{
"epoch": 0.49,
"learning_rate": 0.00015343592344688287,
"loss": 2.8668,
"step": 54200
},
{
"epoch": 0.49,
"learning_rate": 0.00015316420154519187,
"loss": 2.8604,
"step": 54300
},
{
"epoch": 0.49,
"learning_rate": 0.00015289247964350087,
"loss": 2.857,
"step": 54400
},
{
"epoch": 0.49,
"learning_rate": 0.00015262075774180982,
"loss": 2.8599,
"step": 54500
},
{
"epoch": 0.49,
"learning_rate": 0.00015234903584011882,
"loss": 2.8653,
"step": 54600
},
{
"epoch": 0.49,
"learning_rate": 0.0001520773139384278,
"loss": 2.857,
"step": 54700
},
{
"epoch": 0.5,
"learning_rate": 0.0001518055920367368,
"loss": 2.8543,
"step": 54800
},
{
"epoch": 0.5,
"learning_rate": 0.00015153658735406268,
"loss": 2.8495,
"step": 54900
},
{
"epoch": 0.5,
"learning_rate": 0.00015126486545237168,
"loss": 2.8554,
"step": 55000
},
{
"epoch": 0.5,
"eval_accuracy": 0.4514479800866822,
"eval_loss": 2.870398998260498,
"eval_runtime": 43.838,
"eval_samples_per_second": 147.885,
"eval_steps_per_second": 2.486,
"step": 55000
},
{
"epoch": 0.5,
"learning_rate": 0.00015099314355068063,
"loss": 2.8595,
"step": 55100
},
{
"epoch": 0.5,
"learning_rate": 0.00015072142164898963,
"loss": 2.855,
"step": 55200
},
{
"epoch": 0.5,
"learning_rate": 0.0001504496997472986,
"loss": 2.8663,
"step": 55300
},
{
"epoch": 0.5,
"learning_rate": 0.0001501779778456076,
"loss": 2.8555,
"step": 55400
},
{
"epoch": 0.5,
"learning_rate": 0.00014990625594391658,
"loss": 2.8596,
"step": 55500
},
{
"epoch": 0.5,
"learning_rate": 0.00014963453404222556,
"loss": 2.8589,
"step": 55600
},
{
"epoch": 0.5,
"learning_rate": 0.00014936281214053456,
"loss": 2.8568,
"step": 55700
},
{
"epoch": 0.5,
"learning_rate": 0.00014909109023884354,
"loss": 2.8474,
"step": 55800
},
{
"epoch": 0.51,
"learning_rate": 0.0001488193683371525,
"loss": 2.8515,
"step": 55900
},
{
"epoch": 0.51,
"learning_rate": 0.00014854764643546151,
"loss": 2.8526,
"step": 56000
},
{
"epoch": 0.51,
"eval_accuracy": 0.45189379160579857,
"eval_loss": 2.86692214012146,
"eval_runtime": 43.3506,
"eval_samples_per_second": 149.548,
"eval_steps_per_second": 2.514,
"step": 56000
},
{
"epoch": 0.51,
"learning_rate": 0.0001482759245337705,
"loss": 2.8504,
"step": 56100
},
{
"epoch": 0.51,
"learning_rate": 0.0001480042026320795,
"loss": 2.854,
"step": 56200
},
{
"epoch": 0.51,
"learning_rate": 0.00014773248073038847,
"loss": 2.8512,
"step": 56300
},
{
"epoch": 0.51,
"learning_rate": 0.00014746075882869744,
"loss": 2.8515,
"step": 56400
},
{
"epoch": 0.51,
"learning_rate": 0.00014718903692700642,
"loss": 2.8492,
"step": 56500
},
{
"epoch": 0.51,
"learning_rate": 0.00014691731502531542,
"loss": 2.8491,
"step": 56600
},
{
"epoch": 0.51,
"learning_rate": 0.0001466455931236244,
"loss": 2.8466,
"step": 56700
},
{
"epoch": 0.51,
"learning_rate": 0.0001463738712219334,
"loss": 2.8508,
"step": 56800
},
{
"epoch": 0.51,
"learning_rate": 0.00014610214932024237,
"loss": 2.8567,
"step": 56900
},
{
"epoch": 0.52,
"learning_rate": 0.00014583042741855135,
"loss": 2.8521,
"step": 57000
},
{
"epoch": 0.52,
"eval_accuracy": 0.45249203799983667,
"eval_loss": 2.861818552017212,
"eval_runtime": 43.168,
"eval_samples_per_second": 150.181,
"eval_steps_per_second": 2.525,
"step": 57000
},
{
"epoch": 0.52,
"learning_rate": 0.00014555870551686032,
"loss": 2.8463,
"step": 57100
},
{
"epoch": 0.52,
"learning_rate": 0.0001452869836151693,
"loss": 2.8433,
"step": 57200
},
{
"epoch": 0.52,
"learning_rate": 0.0001450152617134783,
"loss": 2.8446,
"step": 57300
},
{
"epoch": 0.52,
"learning_rate": 0.00014474353981178728,
"loss": 2.8477,
"step": 57400
},
{
"epoch": 0.52,
"learning_rate": 0.00014447181791009628,
"loss": 2.8439,
"step": 57500
},
{
"epoch": 0.52,
"learning_rate": 0.00014420009600840525,
"loss": 2.8459,
"step": 57600
},
{
"epoch": 0.52,
"learning_rate": 0.00014392837410671423,
"loss": 2.8445,
"step": 57700
},
{
"epoch": 0.52,
"learning_rate": 0.0001436566522050232,
"loss": 2.8455,
"step": 57800
},
{
"epoch": 0.52,
"learning_rate": 0.0001433876475223491,
"loss": 2.8474,
"step": 57900
},
{
"epoch": 0.52,
"learning_rate": 0.000143118642839675,
"loss": 2.8398,
"step": 58000
},
{
"epoch": 0.52,
"eval_accuracy": 0.45218656334969587,
"eval_loss": 2.8599517345428467,
"eval_runtime": 43.8444,
"eval_samples_per_second": 147.864,
"eval_steps_per_second": 2.486,
"step": 58000
},
{
"epoch": 0.53,
"learning_rate": 0.000142846920937984,
"loss": 2.8492,
"step": 58100
},
{
"epoch": 0.53,
"learning_rate": 0.00014257519903629296,
"loss": 2.8434,
"step": 58200
},
{
"epoch": 0.53,
"learning_rate": 0.00014230347713460197,
"loss": 2.8483,
"step": 58300
},
{
"epoch": 0.53,
"learning_rate": 0.00014203175523291094,
"loss": 2.8441,
"step": 58400
},
{
"epoch": 0.53,
"learning_rate": 0.00014176003333121992,
"loss": 2.8474,
"step": 58500
},
{
"epoch": 0.53,
"learning_rate": 0.00014148831142952892,
"loss": 2.8385,
"step": 58600
},
{
"epoch": 0.53,
"learning_rate": 0.0001412165895278379,
"loss": 2.8424,
"step": 58700
},
{
"epoch": 0.53,
"learning_rate": 0.00014094486762614687,
"loss": 2.847,
"step": 58800
},
{
"epoch": 0.53,
"learning_rate": 0.00014067314572445587,
"loss": 2.8511,
"step": 58900
},
{
"epoch": 0.53,
"learning_rate": 0.00014040142382276485,
"loss": 2.8398,
"step": 59000
},
{
"epoch": 0.53,
"eval_accuracy": 0.45275395982857125,
"eval_loss": 2.8576090335845947,
"eval_runtime": 43.2028,
"eval_samples_per_second": 150.06,
"eval_steps_per_second": 2.523,
"step": 59000
},
{
"epoch": 0.53,
"learning_rate": 0.00014012970192107382,
"loss": 2.8386,
"step": 59100
},
{
"epoch": 0.54,
"learning_rate": 0.00013985798001938282,
"loss": 2.8458,
"step": 59200
},
{
"epoch": 0.54,
"learning_rate": 0.0001395862581176918,
"loss": 2.8356,
"step": 59300
},
{
"epoch": 0.54,
"learning_rate": 0.00013931453621600078,
"loss": 2.8379,
"step": 59400
},
{
"epoch": 0.54,
"learning_rate": 0.00013904281431430978,
"loss": 2.8325,
"step": 59500
},
{
"epoch": 0.54,
"learning_rate": 0.00013877109241261875,
"loss": 2.8461,
"step": 59600
},
{
"epoch": 0.54,
"learning_rate": 0.00013849937051092773,
"loss": 2.8521,
"step": 59700
},
{
"epoch": 0.54,
"learning_rate": 0.00013823036582825363,
"loss": 2.8273,
"step": 59800
},
{
"epoch": 0.54,
"learning_rate": 0.00013795864392656263,
"loss": 2.8318,
"step": 59900
},
{
"epoch": 0.54,
"learning_rate": 0.0001376869220248716,
"loss": 2.837,
"step": 60000
},
{
"epoch": 0.54,
"eval_accuracy": 0.4528289674654375,
"eval_loss": 2.8535568714141846,
"eval_runtime": 43.1874,
"eval_samples_per_second": 150.113,
"eval_steps_per_second": 2.524,
"step": 60000
},
{
"epoch": 0.54,
"learning_rate": 0.00013741520012318058,
"loss": 2.8396,
"step": 60100
},
{
"epoch": 0.54,
"learning_rate": 0.00013714347822148956,
"loss": 2.8395,
"step": 60200
},
{
"epoch": 0.55,
"learning_rate": 0.00013687447353881546,
"loss": 2.8325,
"step": 60300
},
{
"epoch": 0.55,
"learning_rate": 0.00013660275163712444,
"loss": 2.8412,
"step": 60400
},
{
"epoch": 0.55,
"learning_rate": 0.00013633102973543344,
"loss": 2.8392,
"step": 60500
},
{
"epoch": 0.55,
"learning_rate": 0.00013605930783374242,
"loss": 2.843,
"step": 60600
},
{
"epoch": 0.55,
"learning_rate": 0.0001357875859320514,
"loss": 2.8337,
"step": 60700
},
{
"epoch": 0.55,
"learning_rate": 0.00013551586403036037,
"loss": 2.8452,
"step": 60800
},
{
"epoch": 0.55,
"learning_rate": 0.00013524414212866937,
"loss": 2.8448,
"step": 60900
},
{
"epoch": 0.55,
"learning_rate": 0.00013497242022697835,
"loss": 2.837,
"step": 61000
},
{
"epoch": 0.55,
"eval_accuracy": 0.4534701617805845,
"eval_loss": 2.851900577545166,
"eval_runtime": 43.1282,
"eval_samples_per_second": 150.319,
"eval_steps_per_second": 2.527,
"step": 61000
},
{
"epoch": 0.55,
"learning_rate": 0.00013470069832528735,
"loss": 2.8331,
"step": 61100
},
{
"epoch": 0.55,
"learning_rate": 0.00013442897642359632,
"loss": 2.832,
"step": 61200
},
{
"epoch": 0.55,
"learning_rate": 0.0001341572545219053,
"loss": 2.8255,
"step": 61300
},
{
"epoch": 0.56,
"learning_rate": 0.00013388553262021427,
"loss": 2.8327,
"step": 61400
},
{
"epoch": 0.56,
"learning_rate": 0.00013361381071852328,
"loss": 2.8386,
"step": 61500
},
{
"epoch": 0.56,
"learning_rate": 0.00013334208881683225,
"loss": 2.8315,
"step": 61600
},
{
"epoch": 0.56,
"learning_rate": 0.00013307036691514125,
"loss": 2.824,
"step": 61700
},
{
"epoch": 0.56,
"learning_rate": 0.00013279864501345023,
"loss": 2.8296,
"step": 61800
},
{
"epoch": 0.56,
"learning_rate": 0.0001325269231117592,
"loss": 2.8378,
"step": 61900
},
{
"epoch": 0.56,
"learning_rate": 0.0001322579184290851,
"loss": 2.8427,
"step": 62000
},
{
"epoch": 0.56,
"eval_accuracy": 0.4535663409278566,
"eval_loss": 2.8492891788482666,
"eval_runtime": 43.4858,
"eval_samples_per_second": 149.083,
"eval_steps_per_second": 2.507,
"step": 62000
},
{
"epoch": 0.56,
"learning_rate": 0.00013198619652739408,
"loss": 2.8329,
"step": 62100
},
{
"epoch": 0.56,
"learning_rate": 0.00013171447462570306,
"loss": 2.8389,
"step": 62200
},
{
"epoch": 0.56,
"learning_rate": 0.00013144275272401206,
"loss": 2.8358,
"step": 62300
},
{
"epoch": 0.56,
"learning_rate": 0.00013117103082232104,
"loss": 2.8369,
"step": 62400
},
{
"epoch": 0.57,
"learning_rate": 0.00013089930892063,
"loss": 2.8294,
"step": 62500
},
{
"epoch": 0.57,
"learning_rate": 0.000130627587018939,
"loss": 2.834,
"step": 62600
},
{
"epoch": 0.57,
"learning_rate": 0.000130355865117248,
"loss": 2.8414,
"step": 62700
},
{
"epoch": 0.57,
"learning_rate": 0.00013008414321555696,
"loss": 2.8384,
"step": 62800
},
{
"epoch": 0.57,
"learning_rate": 0.00012981242131386597,
"loss": 2.8384,
"step": 62900
},
{
"epoch": 0.57,
"learning_rate": 0.00012954069941217494,
"loss": 2.8365,
"step": 63000
},
{
"epoch": 0.57,
"eval_accuracy": 0.45409986299008265,
"eval_loss": 2.8467965126037598,
"eval_runtime": 47.1796,
"eval_samples_per_second": 137.411,
"eval_steps_per_second": 2.31,
"step": 63000
},
{
"epoch": 0.57,
"learning_rate": 0.00012926897751048392,
"loss": 2.8281,
"step": 63100
},
{
"epoch": 0.57,
"learning_rate": 0.00012899725560879292,
"loss": 2.8197,
"step": 63200
},
{
"epoch": 0.57,
"learning_rate": 0.0001287255337071019,
"loss": 2.8233,
"step": 63300
},
{
"epoch": 0.57,
"learning_rate": 0.00012845652902442777,
"loss": 2.828,
"step": 63400
},
{
"epoch": 0.57,
"learning_rate": 0.00012818480712273677,
"loss": 2.8334,
"step": 63500
},
{
"epoch": 0.58,
"learning_rate": 0.00012791308522104578,
"loss": 2.8332,
"step": 63600
},
{
"epoch": 0.58,
"learning_rate": 0.00012764136331935475,
"loss": 2.8279,
"step": 63700
},
{
"epoch": 0.58,
"learning_rate": 0.00012736964141766373,
"loss": 2.8271,
"step": 63800
},
{
"epoch": 0.58,
"learning_rate": 0.0001270979195159727,
"loss": 2.8306,
"step": 63900
},
{
"epoch": 0.58,
"learning_rate": 0.00012682619761428168,
"loss": 2.8327,
"step": 64000
},
{
"epoch": 0.58,
"eval_accuracy": 0.4538736302788893,
"eval_loss": 2.8447225093841553,
"eval_runtime": 44.4204,
"eval_samples_per_second": 145.946,
"eval_steps_per_second": 2.454,
"step": 64000
},
{
"epoch": 0.58,
"learning_rate": 0.00012655447571259068,
"loss": 2.836,
"step": 64100
},
{
"epoch": 0.58,
"learning_rate": 0.00012628275381089965,
"loss": 2.8337,
"step": 64200
},
{
"epoch": 0.58,
"learning_rate": 0.00012601103190920866,
"loss": 2.8333,
"step": 64300
},
{
"epoch": 0.58,
"learning_rate": 0.00012573931000751763,
"loss": 2.8298,
"step": 64400
},
{
"epoch": 0.58,
"learning_rate": 0.0001254675881058266,
"loss": 2.8285,
"step": 64500
},
{
"epoch": 0.58,
"learning_rate": 0.00012519586620413558,
"loss": 2.8252,
"step": 64600
},
{
"epoch": 0.58,
"learning_rate": 0.00012492414430244459,
"loss": 2.8227,
"step": 64700
},
{
"epoch": 0.59,
"learning_rate": 0.00012465242240075356,
"loss": 2.8286,
"step": 64800
},
{
"epoch": 0.59,
"learning_rate": 0.00012438070049906256,
"loss": 2.8218,
"step": 64900
},
{
"epoch": 0.59,
"learning_rate": 0.00012410897859737154,
"loss": 2.8289,
"step": 65000
},
{
"epoch": 0.59,
"eval_accuracy": 0.4545583774154425,
"eval_loss": 2.838773012161255,
"eval_runtime": 43.8892,
"eval_samples_per_second": 147.713,
"eval_steps_per_second": 2.484,
"step": 65000
},
{
"epoch": 0.59,
"learning_rate": 0.0001238372566956805,
"loss": 2.8198,
"step": 65100
},
{
"epoch": 0.59,
"learning_rate": 0.0001235655347939895,
"loss": 2.8207,
"step": 65200
},
{
"epoch": 0.59,
"learning_rate": 0.00012329381289229846,
"loss": 2.8296,
"step": 65300
},
{
"epoch": 0.59,
"learning_rate": 0.00012302209099060747,
"loss": 2.8293,
"step": 65400
},
{
"epoch": 0.59,
"learning_rate": 0.00012275036908891647,
"loss": 2.8188,
"step": 65500
},
{
"epoch": 0.59,
"learning_rate": 0.00012247864718722544,
"loss": 2.819,
"step": 65600
},
{
"epoch": 0.59,
"learning_rate": 0.00012220692528553442,
"loss": 2.8219,
"step": 65700
},
{
"epoch": 0.59,
"learning_rate": 0.0001219352033838434,
"loss": 2.8199,
"step": 65800
},
{
"epoch": 0.6,
"learning_rate": 0.0001216634814821524,
"loss": 2.8282,
"step": 65900
},
{
"epoch": 0.6,
"learning_rate": 0.00012139175958046137,
"loss": 2.8166,
"step": 66000
},
{
"epoch": 0.6,
"eval_accuracy": 0.45473863770404044,
"eval_loss": 2.834634780883789,
"eval_runtime": 43.1108,
"eval_samples_per_second": 150.38,
"eval_steps_per_second": 2.528,
"step": 66000
},
{
"epoch": 0.6,
"learning_rate": 0.00012112003767877036,
"loss": 2.8226,
"step": 66100
},
{
"epoch": 0.6,
"learning_rate": 0.00012084831577707934,
"loss": 2.8135,
"step": 66200
},
{
"epoch": 0.6,
"learning_rate": 0.00012057659387538832,
"loss": 2.8134,
"step": 66300
},
{
"epoch": 0.6,
"learning_rate": 0.0001203048719736973,
"loss": 2.8214,
"step": 66400
},
{
"epoch": 0.6,
"learning_rate": 0.0001200358672910232,
"loss": 2.8142,
"step": 66500
},
{
"epoch": 0.6,
"learning_rate": 0.00011976414538933219,
"loss": 2.8196,
"step": 66600
},
{
"epoch": 0.6,
"learning_rate": 0.00011949242348764117,
"loss": 2.8145,
"step": 66700
},
{
"epoch": 0.6,
"learning_rate": 0.00011922070158595016,
"loss": 2.8093,
"step": 66800
},
{
"epoch": 0.6,
"learning_rate": 0.00011894897968425913,
"loss": 2.8168,
"step": 66900
},
{
"epoch": 0.61,
"learning_rate": 0.00011867725778256813,
"loss": 2.8171,
"step": 67000
},
{
"epoch": 0.61,
"eval_accuracy": 0.45580810142968187,
"eval_loss": 2.8293869495391846,
"eval_runtime": 44.4137,
"eval_samples_per_second": 145.968,
"eval_steps_per_second": 2.454,
"step": 67000
},
{
"epoch": 0.61,
"learning_rate": 0.00011840553588087711,
"loss": 2.8123,
"step": 67100
},
{
"epoch": 0.61,
"learning_rate": 0.0001181338139791861,
"loss": 2.8121,
"step": 67200
},
{
"epoch": 0.61,
"learning_rate": 0.00011786209207749507,
"loss": 2.8083,
"step": 67300
},
{
"epoch": 0.61,
"learning_rate": 0.00011759037017580405,
"loss": 2.8156,
"step": 67400
},
{
"epoch": 0.61,
"learning_rate": 0.00011731864827411304,
"loss": 2.8225,
"step": 67500
},
{
"epoch": 0.61,
"learning_rate": 0.00011704692637242204,
"loss": 2.8109,
"step": 67600
},
{
"epoch": 0.61,
"learning_rate": 0.00011677520447073102,
"loss": 2.8137,
"step": 67700
},
{
"epoch": 0.61,
"learning_rate": 0.0001165061997880569,
"loss": 2.8097,
"step": 67800
},
{
"epoch": 0.61,
"learning_rate": 0.00011623447788636588,
"loss": 2.8099,
"step": 67900
},
{
"epoch": 0.61,
"learning_rate": 0.00011596275598467488,
"loss": 2.8184,
"step": 68000
},
{
"epoch": 0.61,
"eval_accuracy": 0.4556344950443543,
"eval_loss": 2.826944589614868,
"eval_runtime": 43.7297,
"eval_samples_per_second": 148.252,
"eval_steps_per_second": 2.493,
"step": 68000
},
{
"epoch": 0.62,
"learning_rate": 0.00011569103408298386,
"loss": 2.8164,
"step": 68100
},
{
"epoch": 0.62,
"learning_rate": 0.00011541931218129285,
"loss": 2.8137,
"step": 68200
},
{
"epoch": 0.62,
"learning_rate": 0.00011514759027960182,
"loss": 2.8168,
"step": 68300
},
{
"epoch": 0.62,
"learning_rate": 0.00011487858559692771,
"loss": 2.8156,
"step": 68400
},
{
"epoch": 0.62,
"learning_rate": 0.00011460686369523672,
"loss": 2.8114,
"step": 68500
},
{
"epoch": 0.62,
"learning_rate": 0.00011433514179354569,
"loss": 2.8066,
"step": 68600
},
{
"epoch": 0.62,
"learning_rate": 0.00011406341989185468,
"loss": 2.8124,
"step": 68700
},
{
"epoch": 0.62,
"learning_rate": 0.00011379169799016366,
"loss": 2.8093,
"step": 68800
},
{
"epoch": 0.62,
"learning_rate": 0.00011351997608847263,
"loss": 2.8131,
"step": 68900
},
{
"epoch": 0.62,
"learning_rate": 0.00011324825418678162,
"loss": 2.8102,
"step": 69000
},
{
"epoch": 0.62,
"eval_accuracy": 0.45632710588477254,
"eval_loss": 2.8243494033813477,
"eval_runtime": 42.7646,
"eval_samples_per_second": 151.597,
"eval_steps_per_second": 2.549,
"step": 69000
},
{
"epoch": 0.62,
"learning_rate": 0.00011297653228509062,
"loss": 2.8064,
"step": 69100
},
{
"epoch": 0.63,
"learning_rate": 0.0001127048103833996,
"loss": 2.8075,
"step": 69200
},
{
"epoch": 0.63,
"learning_rate": 0.00011243308848170857,
"loss": 2.8146,
"step": 69300
},
{
"epoch": 0.63,
"learning_rate": 0.00011216136658001756,
"loss": 2.8166,
"step": 69400
},
{
"epoch": 0.63,
"learning_rate": 0.00011188964467832654,
"loss": 2.8073,
"step": 69500
},
{
"epoch": 0.63,
"learning_rate": 0.00011161792277663554,
"loss": 2.8116,
"step": 69600
},
{
"epoch": 0.63,
"learning_rate": 0.00011134620087494451,
"loss": 2.807,
"step": 69700
},
{
"epoch": 0.63,
"learning_rate": 0.0001110744789732535,
"loss": 2.8066,
"step": 69800
},
{
"epoch": 0.63,
"learning_rate": 0.00011080547429057939,
"loss": 2.8101,
"step": 69900
},
{
"epoch": 0.63,
"learning_rate": 0.00011053375238888837,
"loss": 2.8153,
"step": 70000
},
{
"epoch": 0.63,
"eval_accuracy": 0.45636279500231375,
"eval_loss": 2.821134328842163,
"eval_runtime": 42.931,
"eval_samples_per_second": 151.01,
"eval_steps_per_second": 2.539,
"step": 70000
},
{
"epoch": 0.63,
"learning_rate": 0.00011026203048719737,
"loss": 2.8109,
"step": 70100
},
{
"epoch": 0.63,
"learning_rate": 0.00010999030858550635,
"loss": 2.8025,
"step": 70200
},
{
"epoch": 0.64,
"learning_rate": 0.00010971858668381533,
"loss": 2.8055,
"step": 70300
},
{
"epoch": 0.64,
"learning_rate": 0.00010944686478212431,
"loss": 2.8047,
"step": 70400
},
{
"epoch": 0.64,
"learning_rate": 0.00010917514288043329,
"loss": 2.8095,
"step": 70500
},
{
"epoch": 0.64,
"learning_rate": 0.00010890342097874227,
"loss": 2.805,
"step": 70600
},
{
"epoch": 0.64,
"learning_rate": 0.00010863169907705128,
"loss": 2.8079,
"step": 70700
},
{
"epoch": 0.64,
"learning_rate": 0.00010835997717536025,
"loss": 2.8071,
"step": 70800
},
{
"epoch": 0.64,
"learning_rate": 0.00010809097249268614,
"loss": 2.8016,
"step": 70900
},
{
"epoch": 0.64,
"learning_rate": 0.00010781925059099512,
"loss": 2.8035,
"step": 71000
},
{
"epoch": 0.64,
"eval_accuracy": 0.4569090199707833,
"eval_loss": 2.8184897899627686,
"eval_runtime": 43.5955,
"eval_samples_per_second": 148.708,
"eval_steps_per_second": 2.5,
"step": 71000
},
{
"epoch": 0.64,
"learning_rate": 0.00010755024590832102,
"loss": 2.8002,
"step": 71100
},
{
"epoch": 0.64,
"learning_rate": 0.00010727852400663001,
"loss": 2.8186,
"step": 71200
},
{
"epoch": 0.64,
"learning_rate": 0.00010700680210493899,
"loss": 2.8036,
"step": 71300
},
{
"epoch": 0.65,
"learning_rate": 0.00010673508020324797,
"loss": 2.8077,
"step": 71400
},
{
"epoch": 0.65,
"learning_rate": 0.00010646335830155695,
"loss": 2.8111,
"step": 71500
},
{
"epoch": 0.65,
"learning_rate": 0.00010619163639986595,
"loss": 2.8018,
"step": 71600
},
{
"epoch": 0.65,
"learning_rate": 0.00010591991449817493,
"loss": 2.8079,
"step": 71700
},
{
"epoch": 0.65,
"learning_rate": 0.00010564819259648392,
"loss": 2.8124,
"step": 71800
},
{
"epoch": 0.65,
"learning_rate": 0.00010537647069479289,
"loss": 2.807,
"step": 71900
},
{
"epoch": 0.65,
"learning_rate": 0.00010510474879310187,
"loss": 2.8042,
"step": 72000
},
{
"epoch": 0.65,
"eval_accuracy": 0.4569186983755403,
"eval_loss": 2.8206183910369873,
"eval_runtime": 44.1793,
"eval_samples_per_second": 146.743,
"eval_steps_per_second": 2.467,
"step": 72000
},
{
"epoch": 0.65,
"learning_rate": 0.00010483302689141086,
"loss": 2.8066,
"step": 72100
},
{
"epoch": 0.65,
"learning_rate": 0.00010456130498971986,
"loss": 2.8088,
"step": 72200
},
{
"epoch": 0.65,
"learning_rate": 0.00010428958308802883,
"loss": 2.8036,
"step": 72300
},
{
"epoch": 0.65,
"learning_rate": 0.00010401786118633781,
"loss": 2.7985,
"step": 72400
},
{
"epoch": 0.66,
"learning_rate": 0.0001037461392846468,
"loss": 2.7981,
"step": 72500
},
{
"epoch": 0.66,
"learning_rate": 0.00010347441738295577,
"loss": 2.7993,
"step": 72600
},
{
"epoch": 0.66,
"learning_rate": 0.00010320269548126476,
"loss": 2.7999,
"step": 72700
},
{
"epoch": 0.66,
"learning_rate": 0.00010293097357957375,
"loss": 2.8009,
"step": 72800
},
{
"epoch": 0.66,
"learning_rate": 0.00010265925167788274,
"loss": 2.7943,
"step": 72900
},
{
"epoch": 0.66,
"learning_rate": 0.00010238752977619171,
"loss": 2.7984,
"step": 73000
},
{
"epoch": 0.66,
"eval_accuracy": 0.457420160722009,
"eval_loss": 2.8137617111206055,
"eval_runtime": 43.507,
"eval_samples_per_second": 149.01,
"eval_steps_per_second": 2.505,
"step": 73000
},
{
"epoch": 0.66,
"learning_rate": 0.0001021158078745007,
"loss": 2.7913,
"step": 73100
},
{
"epoch": 0.66,
"learning_rate": 0.00010184408597280968,
"loss": 2.8016,
"step": 73200
},
{
"epoch": 0.66,
"learning_rate": 0.00010157236407111868,
"loss": 2.7988,
"step": 73300
},
{
"epoch": 0.66,
"learning_rate": 0.00010130064216942766,
"loss": 2.792,
"step": 73400
},
{
"epoch": 0.66,
"learning_rate": 0.00010103163748675355,
"loss": 2.7926,
"step": 73500
},
{
"epoch": 0.67,
"learning_rate": 0.00010075991558506253,
"loss": 2.7796,
"step": 73600
},
{
"epoch": 0.67,
"learning_rate": 0.00010048819368337151,
"loss": 2.7971,
"step": 73700
},
{
"epoch": 0.67,
"learning_rate": 0.00010021647178168051,
"loss": 2.7974,
"step": 73800
},
{
"epoch": 0.67,
"learning_rate": 9.994474987998949e-05,
"loss": 2.7951,
"step": 73900
},
{
"epoch": 0.67,
"learning_rate": 9.967302797829848e-05,
"loss": 2.7883,
"step": 74000
},
{
"epoch": 0.67,
"eval_accuracy": 0.45740261861338705,
"eval_loss": 2.8111917972564697,
"eval_runtime": 44.0953,
"eval_samples_per_second": 147.023,
"eval_steps_per_second": 2.472,
"step": 74000
},
{
"epoch": 0.67,
"learning_rate": 9.940130607660745e-05,
"loss": 2.7898,
"step": 74100
},
{
"epoch": 0.67,
"learning_rate": 9.912958417491643e-05,
"loss": 2.7914,
"step": 74200
},
{
"epoch": 0.67,
"learning_rate": 9.885786227322542e-05,
"loss": 2.798,
"step": 74300
},
{
"epoch": 0.67,
"learning_rate": 9.85861403715344e-05,
"loss": 2.7938,
"step": 74400
},
{
"epoch": 0.67,
"learning_rate": 9.83144184698434e-05,
"loss": 2.7927,
"step": 74500
},
{
"epoch": 0.67,
"learning_rate": 9.804269656815237e-05,
"loss": 2.7967,
"step": 74600
},
{
"epoch": 0.68,
"learning_rate": 9.777369188547826e-05,
"loss": 2.7933,
"step": 74700
},
{
"epoch": 0.68,
"learning_rate": 9.750196998378726e-05,
"loss": 2.7913,
"step": 74800
},
{
"epoch": 0.68,
"learning_rate": 9.723024808209624e-05,
"loss": 2.7924,
"step": 74900
},
{
"epoch": 0.68,
"learning_rate": 9.695852618040523e-05,
"loss": 2.7962,
"step": 75000
},
{
"epoch": 0.68,
"eval_accuracy": 0.4583686443881887,
"eval_loss": 2.8055942058563232,
"eval_runtime": 44.8912,
"eval_samples_per_second": 144.416,
"eval_steps_per_second": 2.428,
"step": 75000
},
{
"epoch": 0.68,
"learning_rate": 9.66868042787142e-05,
"loss": 2.7848,
"step": 75100
},
{
"epoch": 0.68,
"learning_rate": 9.641779959604009e-05,
"loss": 2.7935,
"step": 75200
},
{
"epoch": 0.68,
"learning_rate": 9.61460776943491e-05,
"loss": 2.7961,
"step": 75300
},
{
"epoch": 0.68,
"learning_rate": 9.587435579265807e-05,
"loss": 2.788,
"step": 75400
},
{
"epoch": 0.68,
"learning_rate": 9.560263389096706e-05,
"loss": 2.7934,
"step": 75500
},
{
"epoch": 0.68,
"learning_rate": 9.533091198927603e-05,
"loss": 2.7888,
"step": 75600
},
{
"epoch": 0.68,
"learning_rate": 9.505919008758501e-05,
"loss": 2.7954,
"step": 75700
},
{
"epoch": 0.69,
"learning_rate": 9.4787468185894e-05,
"loss": 2.7934,
"step": 75800
},
{
"epoch": 0.69,
"learning_rate": 9.451574628420299e-05,
"loss": 2.7867,
"step": 75900
},
{
"epoch": 0.69,
"learning_rate": 9.424402438251197e-05,
"loss": 2.7937,
"step": 76000
},
{
"epoch": 0.69,
"eval_accuracy": 0.4582416153257539,
"eval_loss": 2.8068454265594482,
"eval_runtime": 44.3778,
"eval_samples_per_second": 146.087,
"eval_steps_per_second": 2.456,
"step": 76000
},
{
"epoch": 0.69,
"learning_rate": 9.397230248082095e-05,
"loss": 2.7933,
"step": 76100
},
{
"epoch": 0.69,
"learning_rate": 9.370058057912994e-05,
"loss": 2.7876,
"step": 76200
},
{
"epoch": 0.69,
"learning_rate": 9.342885867743891e-05,
"loss": 2.7885,
"step": 76300
},
{
"epoch": 0.69,
"learning_rate": 9.31571367757479e-05,
"loss": 2.7859,
"step": 76400
},
{
"epoch": 0.69,
"learning_rate": 9.288541487405689e-05,
"loss": 2.7867,
"step": 76500
},
{
"epoch": 0.69,
"learning_rate": 9.261369297236588e-05,
"loss": 2.7882,
"step": 76600
},
{
"epoch": 0.69,
"learning_rate": 9.234197107067486e-05,
"loss": 2.7874,
"step": 76700
},
{
"epoch": 0.69,
"learning_rate": 9.207024916898384e-05,
"loss": 2.79,
"step": 76800
},
{
"epoch": 0.7,
"learning_rate": 9.179852726729282e-05,
"loss": 2.7828,
"step": 76900
},
{
"epoch": 0.7,
"learning_rate": 9.152680536560182e-05,
"loss": 2.7853,
"step": 77000
},
{
"epoch": 0.7,
"eval_accuracy": 0.4587721128864935,
"eval_loss": 2.801090955734253,
"eval_runtime": 43.1479,
"eval_samples_per_second": 150.251,
"eval_steps_per_second": 2.526,
"step": 77000
},
{
"epoch": 0.7,
"learning_rate": 9.12550834639108e-05,
"loss": 2.7861,
"step": 77100
},
{
"epoch": 0.7,
"learning_rate": 9.098336156221979e-05,
"loss": 2.793,
"step": 77200
},
{
"epoch": 0.7,
"learning_rate": 9.071163966052876e-05,
"loss": 2.7914,
"step": 77300
},
{
"epoch": 0.7,
"learning_rate": 9.043991775883774e-05,
"loss": 2.7774,
"step": 77400
},
{
"epoch": 0.7,
"learning_rate": 9.016819585714673e-05,
"loss": 2.7791,
"step": 77500
},
{
"epoch": 0.7,
"learning_rate": 8.989647395545573e-05,
"loss": 2.7837,
"step": 77600
},
{
"epoch": 0.7,
"learning_rate": 8.96247520537647e-05,
"loss": 2.779,
"step": 77700
},
{
"epoch": 0.7,
"learning_rate": 8.935303015207368e-05,
"loss": 2.7807,
"step": 77800
},
{
"epoch": 0.7,
"learning_rate": 8.908130825038267e-05,
"loss": 2.7832,
"step": 77900
},
{
"epoch": 0.71,
"learning_rate": 8.880958634869164e-05,
"loss": 2.7798,
"step": 78000
},
{
"epoch": 0.71,
"eval_accuracy": 0.4596697849276993,
"eval_loss": 2.795370578765869,
"eval_runtime": 43.9941,
"eval_samples_per_second": 147.361,
"eval_steps_per_second": 2.478,
"step": 78000
},
{
"epoch": 0.71,
"learning_rate": 8.853786444700063e-05,
"loss": 2.7851,
"step": 78100
},
{
"epoch": 0.71,
"learning_rate": 8.826885976432654e-05,
"loss": 2.7819,
"step": 78200
},
{
"epoch": 0.71,
"learning_rate": 8.799713786263551e-05,
"loss": 2.7767,
"step": 78300
},
{
"epoch": 0.71,
"learning_rate": 8.77254159609445e-05,
"loss": 2.7745,
"step": 78400
},
{
"epoch": 0.71,
"learning_rate": 8.745369405925347e-05,
"loss": 2.7807,
"step": 78500
},
{
"epoch": 0.71,
"learning_rate": 8.718197215756246e-05,
"loss": 2.7828,
"step": 78600
},
{
"epoch": 0.71,
"learning_rate": 8.691025025587145e-05,
"loss": 2.7768,
"step": 78700
},
{
"epoch": 0.71,
"learning_rate": 8.663852835418044e-05,
"loss": 2.7749,
"step": 78800
},
{
"epoch": 0.71,
"learning_rate": 8.636680645248942e-05,
"loss": 2.7782,
"step": 78900
},
{
"epoch": 0.71,
"learning_rate": 8.60950845507984e-05,
"loss": 2.7851,
"step": 79000
},
{
"epoch": 0.71,
"eval_accuracy": 0.4597998384916206,
"eval_loss": 2.7913172245025635,
"eval_runtime": 43.6998,
"eval_samples_per_second": 148.353,
"eval_steps_per_second": 2.494,
"step": 79000
},
{
"epoch": 0.72,
"learning_rate": 8.582336264910738e-05,
"loss": 2.7722,
"step": 79100
},
{
"epoch": 0.72,
"learning_rate": 8.555435796643328e-05,
"loss": 2.7695,
"step": 79200
},
{
"epoch": 0.72,
"learning_rate": 8.528535328375917e-05,
"loss": 2.7732,
"step": 79300
},
{
"epoch": 0.72,
"learning_rate": 8.501363138206815e-05,
"loss": 2.7714,
"step": 79400
},
{
"epoch": 0.72,
"learning_rate": 8.474190948037714e-05,
"loss": 2.7739,
"step": 79500
},
{
"epoch": 0.72,
"learning_rate": 8.447018757868613e-05,
"loss": 2.7733,
"step": 79600
},
{
"epoch": 0.72,
"learning_rate": 8.419846567699512e-05,
"loss": 2.773,
"step": 79700
},
{
"epoch": 0.72,
"learning_rate": 8.392674377530409e-05,
"loss": 2.7754,
"step": 79800
},
{
"epoch": 0.72,
"learning_rate": 8.365502187361308e-05,
"loss": 2.7817,
"step": 79900
},
{
"epoch": 0.72,
"learning_rate": 8.338329997192206e-05,
"loss": 2.7831,
"step": 80000
},
{
"epoch": 0.72,
"eval_accuracy": 0.46004845251381443,
"eval_loss": 2.78973126411438,
"eval_runtime": 44.9439,
"eval_samples_per_second": 144.247,
"eval_steps_per_second": 2.425,
"step": 80000
},
{
"epoch": 0.72,
"learning_rate": 8.311157807023106e-05,
"loss": 2.7739,
"step": 80100
},
{
"epoch": 0.73,
"learning_rate": 8.283985616854003e-05,
"loss": 2.781,
"step": 80200
},
{
"epoch": 0.73,
"learning_rate": 8.256813426684902e-05,
"loss": 2.7773,
"step": 80300
},
{
"epoch": 0.73,
"learning_rate": 8.2296412365158e-05,
"loss": 2.7688,
"step": 80400
},
{
"epoch": 0.73,
"learning_rate": 8.202469046346699e-05,
"loss": 2.7765,
"step": 80500
},
{
"epoch": 0.73,
"learning_rate": 8.175568578079289e-05,
"loss": 2.7735,
"step": 80600
},
{
"epoch": 0.73,
"learning_rate": 8.148396387910187e-05,
"loss": 2.7692,
"step": 80700
},
{
"epoch": 0.73,
"learning_rate": 8.121224197741084e-05,
"loss": 2.7661,
"step": 80800
},
{
"epoch": 0.73,
"learning_rate": 8.094052007571983e-05,
"loss": 2.7714,
"step": 80900
},
{
"epoch": 0.73,
"learning_rate": 8.06687981740288e-05,
"loss": 2.7773,
"step": 81000
},
{
"epoch": 0.73,
"eval_accuracy": 0.4603297311520629,
"eval_loss": 2.786165475845337,
"eval_runtime": 45.3636,
"eval_samples_per_second": 142.912,
"eval_steps_per_second": 2.403,
"step": 81000
},
{
"epoch": 0.73,
"learning_rate": 8.03970762723378e-05,
"loss": 2.77,
"step": 81100
},
{
"epoch": 0.73,
"learning_rate": 8.012535437064678e-05,
"loss": 2.772,
"step": 81200
},
{
"epoch": 0.74,
"learning_rate": 7.985363246895577e-05,
"loss": 2.7751,
"step": 81300
},
{
"epoch": 0.74,
"learning_rate": 7.958191056726475e-05,
"loss": 2.7705,
"step": 81400
},
{
"epoch": 0.74,
"learning_rate": 7.931018866557374e-05,
"loss": 2.7711,
"step": 81500
},
{
"epoch": 0.74,
"learning_rate": 7.903846676388271e-05,
"loss": 2.7666,
"step": 81600
},
{
"epoch": 0.74,
"learning_rate": 7.87667448621917e-05,
"loss": 2.7678,
"step": 81700
},
{
"epoch": 0.74,
"learning_rate": 7.84977401795176e-05,
"loss": 2.7707,
"step": 81800
},
{
"epoch": 0.74,
"learning_rate": 7.822601827782658e-05,
"loss": 2.7624,
"step": 81900
},
{
"epoch": 0.74,
"learning_rate": 7.795429637613557e-05,
"loss": 2.7688,
"step": 82000
},
{
"epoch": 0.74,
"eval_accuracy": 0.4608795855223163,
"eval_loss": 2.7835707664489746,
"eval_runtime": 44.1206,
"eval_samples_per_second": 146.938,
"eval_steps_per_second": 2.47,
"step": 82000
},
{
"epoch": 0.74,
"learning_rate": 7.768257447444454e-05,
"loss": 2.7652,
"step": 82100
},
{
"epoch": 0.74,
"learning_rate": 7.741085257275354e-05,
"loss": 2.763,
"step": 82200
},
{
"epoch": 0.74,
"learning_rate": 7.713913067106252e-05,
"loss": 2.7718,
"step": 82300
},
{
"epoch": 0.74,
"learning_rate": 7.686740876937151e-05,
"loss": 2.774,
"step": 82400
},
{
"epoch": 0.75,
"learning_rate": 7.659568686768048e-05,
"loss": 2.7624,
"step": 82500
},
{
"epoch": 0.75,
"learning_rate": 7.632396496598946e-05,
"loss": 2.7672,
"step": 82600
},
{
"epoch": 0.75,
"learning_rate": 7.605224306429845e-05,
"loss": 2.7646,
"step": 82700
},
{
"epoch": 0.75,
"learning_rate": 7.578052116260744e-05,
"loss": 2.7643,
"step": 82800
},
{
"epoch": 0.75,
"learning_rate": 7.550879926091643e-05,
"loss": 2.7636,
"step": 82900
},
{
"epoch": 0.75,
"learning_rate": 7.523979457824232e-05,
"loss": 2.7658,
"step": 83000
},
{
"epoch": 0.75,
"eval_accuracy": 0.4610453282037788,
"eval_loss": 2.7798171043395996,
"eval_runtime": 44.7143,
"eval_samples_per_second": 144.987,
"eval_steps_per_second": 2.438,
"step": 83000
},
{
"epoch": 0.75,
"learning_rate": 7.49680726765513e-05,
"loss": 2.7694,
"step": 83100
},
{
"epoch": 0.75,
"learning_rate": 7.469635077486028e-05,
"loss": 2.7662,
"step": 83200
},
{
"epoch": 0.75,
"learning_rate": 7.442734609218618e-05,
"loss": 2.7624,
"step": 83300
},
{
"epoch": 0.75,
"learning_rate": 7.415562419049516e-05,
"loss": 2.7632,
"step": 83400
},
{
"epoch": 0.75,
"learning_rate": 7.388390228880415e-05,
"loss": 2.7697,
"step": 83500
},
{
"epoch": 0.76,
"learning_rate": 7.361218038711314e-05,
"loss": 2.7663,
"step": 83600
},
{
"epoch": 0.76,
"learning_rate": 7.334045848542211e-05,
"loss": 2.7623,
"step": 83700
},
{
"epoch": 0.76,
"learning_rate": 7.306873658373109e-05,
"loss": 2.7685,
"step": 83800
},
{
"epoch": 0.76,
"learning_rate": 7.279701468204009e-05,
"loss": 2.7702,
"step": 83900
},
{
"epoch": 0.76,
"learning_rate": 7.252529278034907e-05,
"loss": 2.7622,
"step": 84000
},
{
"epoch": 0.76,
"eval_accuracy": 0.4611511857558078,
"eval_loss": 2.781484603881836,
"eval_runtime": 43.3638,
"eval_samples_per_second": 149.503,
"eval_steps_per_second": 2.514,
"step": 84000
},
{
"epoch": 0.76,
"learning_rate": 7.225357087865804e-05,
"loss": 2.7672,
"step": 84100
},
{
"epoch": 0.76,
"learning_rate": 7.198184897696703e-05,
"loss": 2.7652,
"step": 84200
},
{
"epoch": 0.76,
"learning_rate": 7.171012707527602e-05,
"loss": 2.7671,
"step": 84300
},
{
"epoch": 0.76,
"learning_rate": 7.143840517358501e-05,
"loss": 2.7621,
"step": 84400
},
{
"epoch": 0.76,
"learning_rate": 7.11694004909109e-05,
"loss": 2.7662,
"step": 84500
},
{
"epoch": 0.76,
"learning_rate": 7.089767858921989e-05,
"loss": 2.7684,
"step": 84600
},
{
"epoch": 0.77,
"learning_rate": 7.062595668752886e-05,
"loss": 2.7662,
"step": 84700
},
{
"epoch": 0.77,
"learning_rate": 7.035423478583785e-05,
"loss": 2.7638,
"step": 84800
},
{
"epoch": 0.77,
"learning_rate": 7.008251288414684e-05,
"loss": 2.7639,
"step": 84900
},
{
"epoch": 0.77,
"learning_rate": 6.981079098245581e-05,
"loss": 2.7691,
"step": 85000
},
{
"epoch": 0.77,
"eval_accuracy": 0.46120986108464673,
"eval_loss": 2.7783455848693848,
"eval_runtime": 43.5919,
"eval_samples_per_second": 148.72,
"eval_steps_per_second": 2.5,
"step": 85000
},
{
"epoch": 0.77,
"learning_rate": 6.95390690807648e-05,
"loss": 2.7649,
"step": 85100
},
{
"epoch": 0.77,
"learning_rate": 6.926734717907379e-05,
"loss": 2.7638,
"step": 85200
},
{
"epoch": 0.77,
"learning_rate": 6.899562527738277e-05,
"loss": 2.7675,
"step": 85300
},
{
"epoch": 0.77,
"learning_rate": 6.872390337569176e-05,
"loss": 2.7657,
"step": 85400
},
{
"epoch": 0.77,
"learning_rate": 6.845218147400074e-05,
"loss": 2.7612,
"step": 85500
},
{
"epoch": 0.77,
"learning_rate": 6.818045957230972e-05,
"loss": 2.7682,
"step": 85600
},
{
"epoch": 0.77,
"learning_rate": 6.79087376706187e-05,
"loss": 2.7588,
"step": 85700
},
{
"epoch": 0.78,
"learning_rate": 6.763701576892768e-05,
"loss": 2.765,
"step": 85800
},
{
"epoch": 0.78,
"learning_rate": 6.736529386723667e-05,
"loss": 2.7556,
"step": 85900
},
{
"epoch": 0.78,
"learning_rate": 6.709357196554565e-05,
"loss": 2.7579,
"step": 86000
},
{
"epoch": 0.78,
"eval_accuracy": 0.4619333218402277,
"eval_loss": 2.7711987495422363,
"eval_runtime": 43.3357,
"eval_samples_per_second": 149.6,
"eval_steps_per_second": 2.515,
"step": 86000
},
{
"epoch": 0.78,
"learning_rate": 6.682185006385464e-05,
"loss": 2.7538,
"step": 86100
},
{
"epoch": 0.78,
"learning_rate": 6.655012816216363e-05,
"loss": 2.7596,
"step": 86200
},
{
"epoch": 0.78,
"learning_rate": 6.62784062604726e-05,
"loss": 2.7512,
"step": 86300
},
{
"epoch": 0.78,
"learning_rate": 6.600668435878159e-05,
"loss": 2.7559,
"step": 86400
},
{
"epoch": 0.78,
"learning_rate": 6.573496245709058e-05,
"loss": 2.7574,
"step": 86500
},
{
"epoch": 0.78,
"learning_rate": 6.546324055539957e-05,
"loss": 2.7614,
"step": 86600
},
{
"epoch": 0.78,
"learning_rate": 6.519423587272546e-05,
"loss": 2.7501,
"step": 86700
},
{
"epoch": 0.78,
"learning_rate": 6.492251397103445e-05,
"loss": 2.7488,
"step": 86800
},
{
"epoch": 0.79,
"learning_rate": 6.465079206934342e-05,
"loss": 2.7497,
"step": 86900
},
{
"epoch": 0.79,
"learning_rate": 6.437907016765241e-05,
"loss": 2.7614,
"step": 87000
},
{
"epoch": 0.79,
"eval_accuracy": 0.46246986840394033,
"eval_loss": 2.7673110961914062,
"eval_runtime": 43.038,
"eval_samples_per_second": 150.634,
"eval_steps_per_second": 2.533,
"step": 87000
},
{
"epoch": 0.79,
"learning_rate": 6.41073482659614e-05,
"loss": 2.7544,
"step": 87100
},
{
"epoch": 0.79,
"learning_rate": 6.383834358328728e-05,
"loss": 2.7546,
"step": 87200
},
{
"epoch": 0.79,
"learning_rate": 6.356662168159627e-05,
"loss": 2.7564,
"step": 87300
},
{
"epoch": 0.79,
"learning_rate": 6.329489977990525e-05,
"loss": 2.759,
"step": 87400
},
{
"epoch": 0.79,
"learning_rate": 6.302317787821423e-05,
"loss": 2.7586,
"step": 87500
},
{
"epoch": 0.79,
"learning_rate": 6.275145597652322e-05,
"loss": 2.7546,
"step": 87600
},
{
"epoch": 0.79,
"learning_rate": 6.247973407483221e-05,
"loss": 2.7548,
"step": 87700
},
{
"epoch": 0.79,
"learning_rate": 6.220801217314118e-05,
"loss": 2.7527,
"step": 87800
},
{
"epoch": 0.79,
"learning_rate": 6.193629027145017e-05,
"loss": 2.7607,
"step": 87900
},
{
"epoch": 0.8,
"learning_rate": 6.166456836975916e-05,
"loss": 2.7592,
"step": 88000
},
{
"epoch": 0.8,
"eval_accuracy": 0.46232166783109974,
"eval_loss": 2.7691469192504883,
"eval_runtime": 43.5697,
"eval_samples_per_second": 148.796,
"eval_steps_per_second": 2.502,
"step": 88000
},
{
"epoch": 0.8,
"learning_rate": 6.139284646806815e-05,
"loss": 2.7481,
"step": 88100
},
{
"epoch": 0.8,
"learning_rate": 6.112112456637712e-05,
"loss": 2.7579,
"step": 88200
},
{
"epoch": 0.8,
"learning_rate": 6.0849402664686106e-05,
"loss": 2.7559,
"step": 88300
},
{
"epoch": 0.8,
"learning_rate": 6.05776807629951e-05,
"loss": 2.7515,
"step": 88400
},
{
"epoch": 0.8,
"learning_rate": 6.030595886130408e-05,
"loss": 2.7524,
"step": 88500
},
{
"epoch": 0.8,
"learning_rate": 6.003423695961306e-05,
"loss": 2.7395,
"step": 88600
},
{
"epoch": 0.8,
"learning_rate": 5.976251505792205e-05,
"loss": 2.7438,
"step": 88700
},
{
"epoch": 0.8,
"learning_rate": 5.949079315623103e-05,
"loss": 2.7468,
"step": 88800
},
{
"epoch": 0.8,
"learning_rate": 5.921907125454001e-05,
"loss": 2.7423,
"step": 88900
},
{
"epoch": 0.8,
"learning_rate": 5.8947349352849e-05,
"loss": 2.7551,
"step": 89000
},
{
"epoch": 0.8,
"eval_accuracy": 0.4633808482516869,
"eval_loss": 2.760658025741577,
"eval_runtime": 43.7777,
"eval_samples_per_second": 148.089,
"eval_steps_per_second": 2.49,
"step": 89000
},
{
"epoch": 0.81,
"learning_rate": 5.867562745115798e-05,
"loss": 2.7352,
"step": 89100
},
{
"epoch": 0.81,
"learning_rate": 5.8403905549466965e-05,
"loss": 2.751,
"step": 89200
},
{
"epoch": 0.81,
"learning_rate": 5.813490086679286e-05,
"loss": 2.7456,
"step": 89300
},
{
"epoch": 0.81,
"learning_rate": 5.7863178965101844e-05,
"loss": 2.7491,
"step": 89400
},
{
"epoch": 0.81,
"learning_rate": 5.759145706341083e-05,
"loss": 2.7477,
"step": 89500
},
{
"epoch": 0.81,
"learning_rate": 5.7319735161719815e-05,
"loss": 2.7431,
"step": 89600
},
{
"epoch": 0.81,
"learning_rate": 5.70480132600288e-05,
"loss": 2.7406,
"step": 89700
},
{
"epoch": 0.81,
"learning_rate": 5.6776291358337786e-05,
"loss": 2.7444,
"step": 89800
},
{
"epoch": 0.81,
"learning_rate": 5.650456945664677e-05,
"loss": 2.7437,
"step": 89900
},
{
"epoch": 0.81,
"learning_rate": 5.623284755495574e-05,
"loss": 2.7397,
"step": 90000
},
{
"epoch": 0.81,
"eval_accuracy": 0.4636597072887461,
"eval_loss": 2.7578768730163574,
"eval_runtime": 43.3807,
"eval_samples_per_second": 149.444,
"eval_steps_per_second": 2.513,
"step": 90000
},
{
"epoch": 0.81,
"learning_rate": 5.596112565326473e-05,
"loss": 2.7456,
"step": 90100
},
{
"epoch": 0.82,
"learning_rate": 5.5689403751573714e-05,
"loss": 2.7393,
"step": 90200
},
{
"epoch": 0.82,
"learning_rate": 5.54176818498827e-05,
"loss": 2.74,
"step": 90300
},
{
"epoch": 0.82,
"learning_rate": 5.5145959948191685e-05,
"loss": 2.7411,
"step": 90400
},
{
"epoch": 0.82,
"learning_rate": 5.487695526551758e-05,
"loss": 2.747,
"step": 90500
},
{
"epoch": 0.82,
"learning_rate": 5.4605233363826564e-05,
"loss": 2.741,
"step": 90600
},
{
"epoch": 0.82,
"learning_rate": 5.433622868115246e-05,
"loss": 2.7441,
"step": 90700
},
{
"epoch": 0.82,
"learning_rate": 5.406450677946144e-05,
"loss": 2.7447,
"step": 90800
},
{
"epoch": 0.82,
"learning_rate": 5.3792784877770425e-05,
"loss": 2.7517,
"step": 90900
},
{
"epoch": 0.82,
"learning_rate": 5.3521062976079414e-05,
"loss": 2.7357,
"step": 91000
},
{
"epoch": 0.82,
"eval_accuracy": 0.4636022417605018,
"eval_loss": 2.758023738861084,
"eval_runtime": 43.2538,
"eval_samples_per_second": 149.883,
"eval_steps_per_second": 2.52,
"step": 91000
},
{
"epoch": 0.82,
"learning_rate": 5.3249341074388396e-05,
"loss": 2.7429,
"step": 91100
},
{
"epoch": 0.82,
"learning_rate": 5.297761917269738e-05,
"loss": 2.7445,
"step": 91200
},
{
"epoch": 0.83,
"learning_rate": 5.270589727100637e-05,
"loss": 2.7473,
"step": 91300
},
{
"epoch": 0.83,
"learning_rate": 5.243417536931535e-05,
"loss": 2.7404,
"step": 91400
},
{
"epoch": 0.83,
"learning_rate": 5.216245346762434e-05,
"loss": 2.7401,
"step": 91500
},
{
"epoch": 0.83,
"learning_rate": 5.189073156593331e-05,
"loss": 2.7441,
"step": 91600
},
{
"epoch": 0.83,
"learning_rate": 5.1619009664242295e-05,
"loss": 2.737,
"step": 91700
},
{
"epoch": 0.83,
"learning_rate": 5.1347287762551284e-05,
"loss": 2.7337,
"step": 91800
},
{
"epoch": 0.83,
"learning_rate": 5.1075565860860266e-05,
"loss": 2.7422,
"step": 91900
},
{
"epoch": 0.83,
"learning_rate": 5.080384395916925e-05,
"loss": 2.7452,
"step": 92000
},
{
"epoch": 0.83,
"eval_accuracy": 0.46426944678843307,
"eval_loss": 2.751744031906128,
"eval_runtime": 44.8905,
"eval_samples_per_second": 144.418,
"eval_steps_per_second": 2.428,
"step": 92000
},
{
"epoch": 0.83,
"learning_rate": 5.0532122057478237e-05,
"loss": 2.7387,
"step": 92100
},
{
"epoch": 0.83,
"learning_rate": 5.026311737480413e-05,
"loss": 2.7342,
"step": 92200
},
{
"epoch": 0.83,
"learning_rate": 4.9991395473113116e-05,
"loss": 2.7349,
"step": 92300
},
{
"epoch": 0.84,
"learning_rate": 4.97196735714221e-05,
"loss": 2.7388,
"step": 92400
},
{
"epoch": 0.84,
"learning_rate": 4.944795166973108e-05,
"loss": 2.7397,
"step": 92500
},
{
"epoch": 0.84,
"learning_rate": 4.917622976804007e-05,
"loss": 2.7352,
"step": 92600
},
{
"epoch": 0.84,
"learning_rate": 4.890450786634905e-05,
"loss": 2.7392,
"step": 92700
},
{
"epoch": 0.84,
"learning_rate": 4.863278596465803e-05,
"loss": 2.7419,
"step": 92800
},
{
"epoch": 0.84,
"learning_rate": 4.836106406296702e-05,
"loss": 2.738,
"step": 92900
},
{
"epoch": 0.84,
"learning_rate": 4.8089342161276004e-05,
"loss": 2.7418,
"step": 93000
},
{
"epoch": 0.84,
"eval_accuracy": 0.46412548051767366,
"eval_loss": 2.7533059120178223,
"eval_runtime": 43.1643,
"eval_samples_per_second": 150.193,
"eval_steps_per_second": 2.525,
"step": 93000
},
{
"epoch": 0.84,
"learning_rate": 4.781762025958498e-05,
"loss": 2.7372,
"step": 93100
},
{
"epoch": 0.84,
"learning_rate": 4.7545898357893974e-05,
"loss": 2.7369,
"step": 93200
},
{
"epoch": 0.84,
"learning_rate": 4.727417645620295e-05,
"loss": 2.7331,
"step": 93300
},
{
"epoch": 0.84,
"learning_rate": 4.700245455451193e-05,
"loss": 2.7379,
"step": 93400
},
{
"epoch": 0.85,
"learning_rate": 4.673073265282092e-05,
"loss": 2.7341,
"step": 93500
},
{
"epoch": 0.85,
"learning_rate": 4.64590107511299e-05,
"loss": 2.7359,
"step": 93600
},
{
"epoch": 0.85,
"learning_rate": 4.618728884943889e-05,
"loss": 2.737,
"step": 93700
},
{
"epoch": 0.85,
"learning_rate": 4.5915566947747873e-05,
"loss": 2.7343,
"step": 93800
},
{
"epoch": 0.85,
"learning_rate": 4.564656226507377e-05,
"loss": 2.7346,
"step": 93900
},
{
"epoch": 0.85,
"learning_rate": 4.537484036338275e-05,
"loss": 2.7379,
"step": 94000
},
{
"epoch": 0.85,
"eval_accuracy": 0.46473280041617143,
"eval_loss": 2.748091697692871,
"eval_runtime": 43.4169,
"eval_samples_per_second": 149.32,
"eval_steps_per_second": 2.511,
"step": 94000
},
{
"epoch": 0.85,
"learning_rate": 4.5103118461691735e-05,
"loss": 2.7341,
"step": 94100
},
{
"epoch": 0.85,
"learning_rate": 4.4831396560000724e-05,
"loss": 2.7431,
"step": 94200
},
{
"epoch": 0.85,
"learning_rate": 4.4559674658309706e-05,
"loss": 2.7347,
"step": 94300
},
{
"epoch": 0.85,
"learning_rate": 4.428795275661869e-05,
"loss": 2.7366,
"step": 94400
},
{
"epoch": 0.85,
"learning_rate": 4.4016230854927676e-05,
"loss": 2.7344,
"step": 94500
},
{
"epoch": 0.86,
"learning_rate": 4.374450895323666e-05,
"loss": 2.7382,
"step": 94600
},
{
"epoch": 0.86,
"learning_rate": 4.347278705154564e-05,
"loss": 2.7279,
"step": 94700
},
{
"epoch": 0.86,
"learning_rate": 4.320106514985463e-05,
"loss": 2.7307,
"step": 94800
},
{
"epoch": 0.86,
"learning_rate": 4.292934324816361e-05,
"loss": 2.7275,
"step": 94900
},
{
"epoch": 0.86,
"learning_rate": 4.26603385654895e-05,
"loss": 2.7308,
"step": 95000
},
{
"epoch": 0.86,
"eval_accuracy": 0.4653649212268588,
"eval_loss": 2.7459847927093506,
"eval_runtime": 43.1356,
"eval_samples_per_second": 150.294,
"eval_steps_per_second": 2.527,
"step": 95000
},
{
"epoch": 0.86,
"learning_rate": 4.2388616663798484e-05,
"loss": 2.7304,
"step": 95100
},
{
"epoch": 0.86,
"learning_rate": 4.211689476210747e-05,
"loss": 2.7334,
"step": 95200
},
{
"epoch": 0.86,
"learning_rate": 4.1845172860416455e-05,
"loss": 2.7324,
"step": 95300
},
{
"epoch": 0.86,
"learning_rate": 4.157345095872544e-05,
"loss": 2.7338,
"step": 95400
},
{
"epoch": 0.86,
"learning_rate": 4.1301729057034425e-05,
"loss": 2.7334,
"step": 95500
},
{
"epoch": 0.86,
"learning_rate": 4.103000715534341e-05,
"loss": 2.7323,
"step": 95600
},
{
"epoch": 0.87,
"learning_rate": 4.075828525365239e-05,
"loss": 2.7338,
"step": 95700
},
{
"epoch": 0.87,
"learning_rate": 4.048656335196138e-05,
"loss": 2.73,
"step": 95800
},
{
"epoch": 0.87,
"learning_rate": 4.021484145027036e-05,
"loss": 2.7367,
"step": 95900
},
{
"epoch": 0.87,
"learning_rate": 3.994311954857934e-05,
"loss": 2.727,
"step": 96000
},
{
"epoch": 0.87,
"eval_accuracy": 0.46549799929226665,
"eval_loss": 2.740849018096924,
"eval_runtime": 43.5693,
"eval_samples_per_second": 148.797,
"eval_steps_per_second": 2.502,
"step": 96000
},
{
"epoch": 0.87,
"learning_rate": 3.967139764688833e-05,
"loss": 2.7257,
"step": 96100
},
{
"epoch": 0.87,
"learning_rate": 3.939967574519731e-05,
"loss": 2.7251,
"step": 96200
},
{
"epoch": 0.87,
"learning_rate": 3.9127953843506295e-05,
"loss": 2.7236,
"step": 96300
},
{
"epoch": 0.87,
"learning_rate": 3.8856231941815284e-05,
"loss": 2.7224,
"step": 96400
},
{
"epoch": 0.87,
"learning_rate": 3.8584510040124266e-05,
"loss": 2.7204,
"step": 96500
},
{
"epoch": 0.87,
"learning_rate": 3.831278813843325e-05,
"loss": 2.7249,
"step": 96600
},
{
"epoch": 0.87,
"learning_rate": 3.804106623674224e-05,
"loss": 2.7214,
"step": 96700
},
{
"epoch": 0.88,
"learning_rate": 3.776934433505122e-05,
"loss": 2.7242,
"step": 96800
},
{
"epoch": 0.88,
"learning_rate": 3.74976224333602e-05,
"loss": 2.7147,
"step": 96900
},
{
"epoch": 0.88,
"learning_rate": 3.722861775068609e-05,
"loss": 2.7282,
"step": 97000
},
{
"epoch": 0.88,
"eval_accuracy": 0.4663823635269317,
"eval_loss": 2.7350597381591797,
"eval_runtime": 43.4285,
"eval_samples_per_second": 149.28,
"eval_steps_per_second": 2.51,
"step": 97000
},
{
"epoch": 0.88,
"learning_rate": 3.695689584899508e-05,
"loss": 2.718,
"step": 97100
},
{
"epoch": 0.88,
"learning_rate": 3.668517394730406e-05,
"loss": 2.7174,
"step": 97200
},
{
"epoch": 0.88,
"learning_rate": 3.6413452045613044e-05,
"loss": 2.7205,
"step": 97300
},
{
"epoch": 0.88,
"learning_rate": 3.614173014392203e-05,
"loss": 2.7195,
"step": 97400
},
{
"epoch": 0.88,
"learning_rate": 3.5870008242231015e-05,
"loss": 2.7172,
"step": 97500
},
{
"epoch": 0.88,
"learning_rate": 3.559828634054e-05,
"loss": 2.7128,
"step": 97600
},
{
"epoch": 0.88,
"learning_rate": 3.532656443884898e-05,
"loss": 2.7192,
"step": 97700
},
{
"epoch": 0.88,
"learning_rate": 3.505484253715797e-05,
"loss": 2.7191,
"step": 97800
},
{
"epoch": 0.89,
"learning_rate": 3.478312063546695e-05,
"loss": 2.7178,
"step": 97900
},
{
"epoch": 0.89,
"learning_rate": 3.451139873377593e-05,
"loss": 2.7133,
"step": 98000
},
{
"epoch": 0.89,
"eval_accuracy": 0.46685176615764307,
"eval_loss": 2.730079412460327,
"eval_runtime": 43.3235,
"eval_samples_per_second": 149.642,
"eval_steps_per_second": 2.516,
"step": 98000
},
{
"epoch": 0.89,
"learning_rate": 3.423967683208492e-05,
"loss": 2.7164,
"step": 98100
},
{
"epoch": 0.89,
"learning_rate": 3.39679549303939e-05,
"loss": 2.7106,
"step": 98200
},
{
"epoch": 0.89,
"learning_rate": 3.3696233028702885e-05,
"loss": 2.715,
"step": 98300
},
{
"epoch": 0.89,
"learning_rate": 3.3424511127011874e-05,
"loss": 2.7091,
"step": 98400
},
{
"epoch": 0.89,
"learning_rate": 3.3152789225320856e-05,
"loss": 2.7093,
"step": 98500
},
{
"epoch": 0.89,
"learning_rate": 3.288106732362984e-05,
"loss": 2.7116,
"step": 98600
},
{
"epoch": 0.89,
"learning_rate": 3.260934542193883e-05,
"loss": 2.7172,
"step": 98700
},
{
"epoch": 0.89,
"learning_rate": 3.233762352024781e-05,
"loss": 2.7072,
"step": 98800
},
{
"epoch": 0.89,
"learning_rate": 3.206590161855679e-05,
"loss": 2.7165,
"step": 98900
},
{
"epoch": 0.9,
"learning_rate": 3.179417971686577e-05,
"loss": 2.7136,
"step": 99000
},
{
"epoch": 0.9,
"eval_accuracy": 0.4673356863954899,
"eval_loss": 2.7250616550445557,
"eval_runtime": 43.1535,
"eval_samples_per_second": 150.231,
"eval_steps_per_second": 2.526,
"step": 99000
},
{
"epoch": 0.9,
"learning_rate": 3.152245781517476e-05,
"loss": 2.7117,
"step": 99100
},
{
"epoch": 0.9,
"learning_rate": 3.1250735913483744e-05,
"loss": 2.7099,
"step": 99200
},
{
"epoch": 0.9,
"learning_rate": 3.0979014011792726e-05,
"loss": 2.715,
"step": 99300
},
{
"epoch": 0.9,
"learning_rate": 3.0707292110101715e-05,
"loss": 2.7119,
"step": 99400
},
{
"epoch": 0.9,
"learning_rate": 3.0435570208410697e-05,
"loss": 2.7136,
"step": 99500
},
{
"epoch": 0.9,
"learning_rate": 3.016384830671968e-05,
"loss": 2.7069,
"step": 99600
},
{
"epoch": 0.9,
"learning_rate": 2.9892126405028664e-05,
"loss": 2.7092,
"step": 99700
},
{
"epoch": 0.9,
"learning_rate": 2.962040450333765e-05,
"loss": 2.7052,
"step": 99800
},
{
"epoch": 0.9,
"learning_rate": 2.934868260164663e-05,
"loss": 2.7099,
"step": 99900
},
{
"epoch": 0.9,
"learning_rate": 2.9076960699955617e-05,
"loss": 2.7108,
"step": 100000
},
{
"epoch": 0.9,
"eval_accuracy": 0.46786981335801325,
"eval_loss": 2.7208478450775146,
"eval_runtime": 43.4331,
"eval_samples_per_second": 149.264,
"eval_steps_per_second": 2.51,
"step": 100000
},
{
"epoch": 0.91,
"learning_rate": 2.8807956017281514e-05,
"loss": 2.7137,
"step": 100100
},
{
"epoch": 0.91,
"learning_rate": 2.8536234115590493e-05,
"loss": 2.7069,
"step": 100200
},
{
"epoch": 0.91,
"learning_rate": 2.8264512213899478e-05,
"loss": 2.698,
"step": 100300
},
{
"epoch": 0.91,
"learning_rate": 2.7992790312208464e-05,
"loss": 2.7027,
"step": 100400
},
{
"epoch": 0.91,
"learning_rate": 2.7721068410517446e-05,
"loss": 2.7062,
"step": 100500
},
{
"epoch": 0.91,
"learning_rate": 2.744934650882643e-05,
"loss": 2.7064,
"step": 100600
},
{
"epoch": 0.91,
"learning_rate": 2.718034182615232e-05,
"loss": 2.7059,
"step": 100700
},
{
"epoch": 0.91,
"learning_rate": 2.691133714347822e-05,
"loss": 2.7146,
"step": 100800
},
{
"epoch": 0.91,
"learning_rate": 2.6639615241787204e-05,
"loss": 2.7036,
"step": 100900
},
{
"epoch": 0.91,
"learning_rate": 2.6367893340096186e-05,
"loss": 2.7051,
"step": 101000
},
{
"epoch": 0.91,
"eval_accuracy": 0.46807245495761163,
"eval_loss": 2.7191717624664307,
"eval_runtime": 43.4633,
"eval_samples_per_second": 149.16,
"eval_steps_per_second": 2.508,
"step": 101000
},
{
"epoch": 0.91,
"learning_rate": 2.609617143840517e-05,
"loss": 2.7007,
"step": 101100
},
{
"epoch": 0.91,
"learning_rate": 2.5824449536714157e-05,
"loss": 2.7024,
"step": 101200
},
{
"epoch": 0.92,
"learning_rate": 2.555272763502314e-05,
"loss": 2.7027,
"step": 101300
},
{
"epoch": 0.92,
"learning_rate": 2.5281005733332124e-05,
"loss": 2.7082,
"step": 101400
},
{
"epoch": 0.92,
"learning_rate": 2.500928383164111e-05,
"loss": 2.7067,
"step": 101500
},
{
"epoch": 0.92,
"learning_rate": 2.4737561929950092e-05,
"loss": 2.7044,
"step": 101600
},
{
"epoch": 0.92,
"learning_rate": 2.4465840028259074e-05,
"loss": 2.705,
"step": 101700
},
{
"epoch": 0.92,
"learning_rate": 2.419411812656806e-05,
"loss": 2.7069,
"step": 101800
},
{
"epoch": 0.92,
"learning_rate": 2.3922396224877045e-05,
"loss": 2.7005,
"step": 101900
},
{
"epoch": 0.92,
"learning_rate": 2.3650674323186027e-05,
"loss": 2.7013,
"step": 102000
},
{
"epoch": 0.92,
"eval_accuracy": 0.4687317962816779,
"eval_loss": 2.7151107788085938,
"eval_runtime": 43.2863,
"eval_samples_per_second": 149.77,
"eval_steps_per_second": 2.518,
"step": 102000
},
{
"epoch": 0.92,
"learning_rate": 2.3378952421495012e-05,
"loss": 2.7029,
"step": 102100
},
{
"epoch": 0.92,
"learning_rate": 2.3107230519803998e-05,
"loss": 2.7007,
"step": 102200
},
{
"epoch": 0.92,
"learning_rate": 2.283550861811298e-05,
"loss": 2.7089,
"step": 102300
},
{
"epoch": 0.93,
"learning_rate": 2.2563786716421965e-05,
"loss": 2.7018,
"step": 102400
},
{
"epoch": 0.93,
"learning_rate": 2.229206481473095e-05,
"loss": 2.6984,
"step": 102500
},
{
"epoch": 0.93,
"learning_rate": 2.202306013205684e-05,
"loss": 2.7011,
"step": 102600
},
{
"epoch": 0.93,
"learning_rate": 2.1751338230365826e-05,
"loss": 2.6968,
"step": 102700
},
{
"epoch": 0.93,
"learning_rate": 2.1479616328674812e-05,
"loss": 2.701,
"step": 102800
},
{
"epoch": 0.93,
"learning_rate": 2.1207894426983794e-05,
"loss": 2.7079,
"step": 102900
},
{
"epoch": 0.93,
"learning_rate": 2.093617252529278e-05,
"loss": 2.6996,
"step": 103000
},
{
"epoch": 0.93,
"eval_accuracy": 0.46891387127116774,
"eval_loss": 2.7129361629486084,
"eval_runtime": 43.7353,
"eval_samples_per_second": 148.233,
"eval_steps_per_second": 2.492,
"step": 103000
},
{
"epoch": 0.93,
"learning_rate": 2.0664450623601765e-05,
"loss": 2.6985,
"step": 103100
},
{
"epoch": 0.93,
"learning_rate": 2.0392728721910743e-05,
"loss": 2.6945,
"step": 103200
},
{
"epoch": 0.93,
"learning_rate": 2.012100682021973e-05,
"loss": 2.6988,
"step": 103300
},
{
"epoch": 0.93,
"learning_rate": 1.9849284918528714e-05,
"loss": 2.701,
"step": 103400
},
{
"epoch": 0.94,
"learning_rate": 1.9577563016837696e-05,
"loss": 2.7044,
"step": 103500
},
{
"epoch": 0.94,
"learning_rate": 1.930584111514668e-05,
"loss": 2.6897,
"step": 103600
},
{
"epoch": 0.94,
"learning_rate": 1.9034119213455667e-05,
"loss": 2.6993,
"step": 103700
},
{
"epoch": 0.94,
"learning_rate": 1.8762397311764652e-05,
"loss": 2.6978,
"step": 103800
},
{
"epoch": 0.94,
"learning_rate": 1.8490675410073634e-05,
"loss": 2.6965,
"step": 103900
},
{
"epoch": 0.94,
"learning_rate": 1.821895350838262e-05,
"loss": 2.6898,
"step": 104000
},
{
"epoch": 0.94,
"eval_accuracy": 0.46940021111020375,
"eval_loss": 2.7084131240844727,
"eval_runtime": 44.0036,
"eval_samples_per_second": 147.329,
"eval_steps_per_second": 2.477,
"step": 104000
},
{
"epoch": 0.94,
"learning_rate": 1.7947231606691602e-05,
"loss": 2.6918,
"step": 104100
},
{
"epoch": 0.94,
"learning_rate": 1.7675509705000587e-05,
"loss": 2.6941,
"step": 104200
},
{
"epoch": 0.94,
"learning_rate": 1.7403787803309573e-05,
"loss": 2.6954,
"step": 104300
},
{
"epoch": 0.94,
"learning_rate": 1.7132065901618555e-05,
"loss": 2.7015,
"step": 104400
},
{
"epoch": 0.94,
"learning_rate": 1.686034399992754e-05,
"loss": 2.698,
"step": 104500
},
{
"epoch": 0.95,
"learning_rate": 1.6588622098236522e-05,
"loss": 2.6922,
"step": 104600
},
{
"epoch": 0.95,
"learning_rate": 1.6319617415562416e-05,
"loss": 2.6932,
"step": 104700
},
{
"epoch": 0.95,
"learning_rate": 1.60478955138714e-05,
"loss": 2.6887,
"step": 104800
},
{
"epoch": 0.95,
"learning_rate": 1.5776173612180387e-05,
"loss": 2.6887,
"step": 104900
},
{
"epoch": 0.95,
"learning_rate": 1.550445171048937e-05,
"loss": 2.688,
"step": 105000
},
{
"epoch": 0.95,
"eval_accuracy": 0.4697316964731288,
"eval_loss": 2.705327272415161,
"eval_runtime": 43.7246,
"eval_samples_per_second": 148.269,
"eval_steps_per_second": 2.493,
"step": 105000
},
{
"epoch": 0.95,
"learning_rate": 1.5232729808798354e-05,
"loss": 2.6933,
"step": 105100
},
{
"epoch": 0.95,
"learning_rate": 1.4961007907107338e-05,
"loss": 2.6992,
"step": 105200
},
{
"epoch": 0.95,
"learning_rate": 1.468928600541632e-05,
"loss": 2.6943,
"step": 105300
},
{
"epoch": 0.95,
"learning_rate": 1.4417564103725306e-05,
"loss": 2.6919,
"step": 105400
},
{
"epoch": 0.95,
"learning_rate": 1.414584220203429e-05,
"loss": 2.6961,
"step": 105500
},
{
"epoch": 0.95,
"learning_rate": 1.3874120300343275e-05,
"loss": 2.6942,
"step": 105600
},
{
"epoch": 0.96,
"learning_rate": 1.3602398398652258e-05,
"loss": 2.6936,
"step": 105700
},
{
"epoch": 0.96,
"learning_rate": 1.3330676496961242e-05,
"loss": 2.6851,
"step": 105800
},
{
"epoch": 0.96,
"learning_rate": 1.3058954595270228e-05,
"loss": 2.6929,
"step": 105900
},
{
"epoch": 0.96,
"learning_rate": 1.278723269357921e-05,
"loss": 2.6855,
"step": 106000
},
{
"epoch": 0.96,
"eval_accuracy": 0.4701273012675686,
"eval_loss": 2.701770305633545,
"eval_runtime": 44.1379,
"eval_samples_per_second": 146.881,
"eval_steps_per_second": 2.47,
"step": 106000
},
{
"epoch": 0.96,
"learning_rate": 1.2515510791888195e-05,
"loss": 2.6922,
"step": 106100
},
{
"epoch": 0.96,
"learning_rate": 1.2243788890197179e-05,
"loss": 2.6811,
"step": 106200
},
{
"epoch": 0.96,
"learning_rate": 1.1972066988506163e-05,
"loss": 2.6819,
"step": 106300
},
{
"epoch": 0.96,
"learning_rate": 1.1700345086815148e-05,
"loss": 2.6882,
"step": 106400
},
{
"epoch": 0.96,
"learning_rate": 1.142862318512413e-05,
"loss": 2.685,
"step": 106500
},
{
"epoch": 0.96,
"learning_rate": 1.1159618502450025e-05,
"loss": 2.6841,
"step": 106600
},
{
"epoch": 0.96,
"learning_rate": 1.0887896600759008e-05,
"loss": 2.6806,
"step": 106700
},
{
"epoch": 0.97,
"learning_rate": 1.0616174699067993e-05,
"loss": 2.6896,
"step": 106800
},
{
"epoch": 0.97,
"learning_rate": 1.0344452797376977e-05,
"loss": 2.6807,
"step": 106900
},
{
"epoch": 0.97,
"learning_rate": 1.0072730895685962e-05,
"loss": 2.6852,
"step": 107000
},
{
"epoch": 0.97,
"eval_accuracy": 0.4704999198507106,
"eval_loss": 2.698939085006714,
"eval_runtime": 43.9086,
"eval_samples_per_second": 147.648,
"eval_steps_per_second": 2.482,
"step": 107000
},
{
"epoch": 0.97,
"learning_rate": 9.803726213011856e-06,
"loss": 2.6861,
"step": 107100
},
{
"epoch": 0.97,
"learning_rate": 9.53200431132084e-06,
"loss": 2.6886,
"step": 107200
},
{
"epoch": 0.97,
"learning_rate": 9.260282409629823e-06,
"loss": 2.6872,
"step": 107300
},
{
"epoch": 0.97,
"learning_rate": 8.988560507938807e-06,
"loss": 2.685,
"step": 107400
},
{
"epoch": 0.97,
"learning_rate": 8.71683860624779e-06,
"loss": 2.6892,
"step": 107500
},
{
"epoch": 0.97,
"learning_rate": 8.445116704556776e-06,
"loss": 2.6815,
"step": 107600
},
{
"epoch": 0.97,
"learning_rate": 8.17339480286576e-06,
"loss": 2.6879,
"step": 107700
},
{
"epoch": 0.97,
"learning_rate": 7.901672901174744e-06,
"loss": 2.6822,
"step": 107800
},
{
"epoch": 0.98,
"learning_rate": 7.629950999483727e-06,
"loss": 2.6806,
"step": 107900
},
{
"epoch": 0.98,
"learning_rate": 7.360946316809621e-06,
"loss": 2.689,
"step": 108000
},
{
"epoch": 0.98,
"eval_accuracy": 0.4705204864608191,
"eval_loss": 2.6981818675994873,
"eval_runtime": 43.1633,
"eval_samples_per_second": 150.197,
"eval_steps_per_second": 2.525,
"step": 108000
},
{
"epoch": 0.98,
"learning_rate": 7.089224415118606e-06,
"loss": 2.6872,
"step": 108100
},
{
"epoch": 0.98,
"learning_rate": 6.81750251342759e-06,
"loss": 2.6962,
"step": 108200
},
{
"epoch": 0.98,
"learning_rate": 6.545780611736574e-06,
"loss": 2.6831,
"step": 108300
},
{
"epoch": 0.98,
"learning_rate": 6.274058710045559e-06,
"loss": 2.6877,
"step": 108400
},
{
"epoch": 0.98,
"learning_rate": 6.0023368083545415e-06,
"loss": 2.6956,
"step": 108500
},
{
"epoch": 0.98,
"learning_rate": 5.730614906663526e-06,
"loss": 2.6936,
"step": 108600
},
{
"epoch": 0.98,
"learning_rate": 5.458893004972511e-06,
"loss": 2.6864,
"step": 108700
},
{
"epoch": 0.98,
"learning_rate": 5.187171103281495e-06,
"loss": 2.6838,
"step": 108800
},
{
"epoch": 0.98,
"learning_rate": 4.915449201590478e-06,
"loss": 2.6867,
"step": 108900
},
{
"epoch": 0.99,
"learning_rate": 4.643727299899463e-06,
"loss": 2.6868,
"step": 109000
},
{
"epoch": 0.99,
"eval_accuracy": 0.4707297819636878,
"eval_loss": 2.6994001865386963,
"eval_runtime": 43.0302,
"eval_samples_per_second": 150.662,
"eval_steps_per_second": 2.533,
"step": 109000
},
{
"epoch": 0.99,
"learning_rate": 4.3720053982084465e-06,
"loss": 2.689,
"step": 109100
},
{
"epoch": 0.99,
"learning_rate": 4.10028349651743e-06,
"loss": 2.6831,
"step": 109200
},
{
"epoch": 0.99,
"learning_rate": 3.831278813843325e-06,
"loss": 2.6825,
"step": 109300
},
{
"epoch": 0.99,
"learning_rate": 3.559556912152309e-06,
"loss": 2.6851,
"step": 109400
},
{
"epoch": 0.99,
"learning_rate": 3.2878350104612927e-06,
"loss": 2.6798,
"step": 109500
},
{
"epoch": 0.99,
"learning_rate": 3.016113108770277e-06,
"loss": 2.6773,
"step": 109600
},
{
"epoch": 0.99,
"learning_rate": 2.744391207079261e-06,
"loss": 2.6829,
"step": 109700
},
{
"epoch": 0.99,
"learning_rate": 2.472669305388245e-06,
"loss": 2.6819,
"step": 109800
},
{
"epoch": 0.99,
"learning_rate": 2.2036646227141394e-06,
"loss": 2.6827,
"step": 109900
},
{
"epoch": 0.99,
"learning_rate": 1.931942721023123e-06,
"loss": 2.6901,
"step": 110000
},
{
"epoch": 0.99,
"eval_accuracy": 0.47069106834466007,
"eval_loss": 2.700648307800293,
"eval_runtime": 43.0535,
"eval_samples_per_second": 150.58,
"eval_steps_per_second": 2.532,
"step": 110000
},
{
"epoch": 1.0,
"learning_rate": 1.6602208193321073e-06,
"loss": 2.6809,
"step": 110100
},
{
"epoch": 1.0,
"learning_rate": 1.3884989176410914e-06,
"loss": 2.6866,
"step": 110200
},
{
"epoch": 1.0,
"learning_rate": 1.1167770159500756e-06,
"loss": 2.6863,
"step": 110300
},
{
"epoch": 1.0,
"learning_rate": 8.450551142590596e-07,
"loss": 2.6912,
"step": 110400
},
{
"epoch": 1.0,
"learning_rate": 5.733332125680437e-07,
"loss": 2.6916,
"step": 110500
},
{
"epoch": 1.0,
"learning_rate": 3.0161131087702765e-07,
"loss": 2.684,
"step": 110600
},
{
"epoch": 1.0,
"step": 110607,
"total_flos": 2.899312376933253e+20,
"train_loss": 2.8584754099769967,
"train_runtime": 318077.2613,
"train_samples_per_second": 83.457,
"train_steps_per_second": 0.348
}
],
"logging_steps": 100,
"max_steps": 110607,
"num_train_epochs": 1,
"save_steps": 11061,
"total_flos": 2.899312376933253e+20,
"trial_name": null,
"trial_params": null
}