{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999954795108831, "eval_steps": 1000, "global_step": 110607, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00024521369940743435, "loss": 5.4472, "step": 100 }, { "epoch": 0.0, "learning_rate": 0.0002927618508292659, "loss": 3.467, "step": 200 }, { "epoch": 0.0, "learning_rate": 0.0002997989257927486, "loss": 3.3454, "step": 300 }, { "epoch": 0.0, "learning_rate": 0.00029952720389105764, "loss": 3.2872, "step": 400 }, { "epoch": 0.0, "learning_rate": 0.00029925548198936656, "loss": 3.2489, "step": 500 }, { "epoch": 0.01, "learning_rate": 0.0002989837600876756, "loss": 3.2458, "step": 600 }, { "epoch": 0.01, "learning_rate": 0.00029871475540500146, "loss": 3.2073, "step": 700 }, { "epoch": 0.01, "learning_rate": 0.00029844303350331044, "loss": 3.1957, "step": 800 }, { "epoch": 0.01, "learning_rate": 0.0002981713116016194, "loss": 3.1817, "step": 900 }, { "epoch": 0.01, "learning_rate": 0.00029789958969992844, "loss": 3.1631, "step": 1000 }, { "epoch": 0.01, "eval_accuracy": 0.4182117332510669, "eval_loss": 3.180420398712158, "eval_runtime": 43.6723, "eval_samples_per_second": 148.447, "eval_steps_per_second": 2.496, "step": 1000 }, { "epoch": 0.01, "learning_rate": 0.0002976278677982374, "loss": 3.1598, "step": 1100 }, { "epoch": 0.01, "learning_rate": 0.0002973561458965464, "loss": 3.1584, "step": 1200 }, { "epoch": 0.01, "learning_rate": 0.00029708442399485537, "loss": 3.144, "step": 1300 }, { "epoch": 0.01, "learning_rate": 0.00029681270209316434, "loss": 3.1346, "step": 1400 }, { "epoch": 0.01, "learning_rate": 0.0002965409801914733, "loss": 3.1359, "step": 1500 }, { "epoch": 0.01, "learning_rate": 0.00029626925828978235, "loss": 3.1268, "step": 1600 }, { "epoch": 0.02, "learning_rate": 0.0002959975363880913, "loss": 3.1175, "step": 1700 }, { "epoch": 0.02, "learning_rate": 0.0002957258144864003, "loss": 3.1189, "step": 1800 }, { "epoch": 0.02, "learning_rate": 0.0002954540925847093, "loss": 3.1057, "step": 1900 }, { "epoch": 0.02, "learning_rate": 0.00029518237068301825, "loss": 3.1124, "step": 2000 }, { "epoch": 0.02, "eval_accuracy": 0.4272265623818554, "eval_loss": 3.106520891189575, "eval_runtime": 43.4484, "eval_samples_per_second": 149.212, "eval_steps_per_second": 2.509, "step": 2000 }, { "epoch": 0.02, "learning_rate": 0.0002949106487813272, "loss": 3.1004, "step": 2100 }, { "epoch": 0.02, "learning_rate": 0.0002946389268796362, "loss": 3.1018, "step": 2200 }, { "epoch": 0.02, "learning_rate": 0.00029436720497794523, "loss": 3.0864, "step": 2300 }, { "epoch": 0.02, "learning_rate": 0.0002940954830762542, "loss": 3.0872, "step": 2400 }, { "epoch": 0.02, "learning_rate": 0.0002938237611745632, "loss": 3.0883, "step": 2500 }, { "epoch": 0.02, "learning_rate": 0.00029355475649188906, "loss": 3.0843, "step": 2600 }, { "epoch": 0.02, "learning_rate": 0.0002932830345901981, "loss": 3.0815, "step": 2700 }, { "epoch": 0.03, "learning_rate": 0.00029301131268850706, "loss": 3.0784, "step": 2800 }, { "epoch": 0.03, "learning_rate": 0.00029273959078681604, "loss": 3.0728, "step": 2900 }, { "epoch": 0.03, "learning_rate": 0.000292467868885125, "loss": 3.0757, "step": 3000 }, { "epoch": 0.03, "eval_accuracy": 0.42875514543315396, "eval_loss": 3.0894298553466797, "eval_runtime": 43.8742, "eval_samples_per_second": 147.763, "eval_steps_per_second": 2.484, "step": 3000 }, { "epoch": 0.03, "learning_rate": 0.000292196146983434, "loss": 3.0754, "step": 3100 }, { "epoch": 0.03, "learning_rate": 0.000291924425081743, "loss": 3.0634, "step": 3200 }, { "epoch": 0.03, "learning_rate": 0.00029165270318005194, "loss": 3.0652, "step": 3300 }, { "epoch": 0.03, "learning_rate": 0.00029138098127836097, "loss": 3.0566, "step": 3400 }, { "epoch": 0.03, "learning_rate": 0.00029110925937666994, "loss": 3.067, "step": 3500 }, { "epoch": 0.03, "learning_rate": 0.0002908375374749789, "loss": 3.0525, "step": 3600 }, { "epoch": 0.03, "learning_rate": 0.0002905658155732879, "loss": 3.0595, "step": 3700 }, { "epoch": 0.03, "learning_rate": 0.00029029409367159687, "loss": 3.0586, "step": 3800 }, { "epoch": 0.04, "learning_rate": 0.0002900223717699059, "loss": 3.0499, "step": 3900 }, { "epoch": 0.04, "learning_rate": 0.0002897506498682148, "loss": 3.0488, "step": 4000 }, { "epoch": 0.04, "eval_accuracy": 0.43185525945686004, "eval_loss": 3.062988758087158, "eval_runtime": 44.0507, "eval_samples_per_second": 147.171, "eval_steps_per_second": 2.474, "step": 4000 }, { "epoch": 0.04, "learning_rate": 0.00028947892796652385, "loss": 3.0426, "step": 4100 }, { "epoch": 0.04, "learning_rate": 0.0002892072060648328, "loss": 3.0433, "step": 4200 }, { "epoch": 0.04, "learning_rate": 0.0002889354841631418, "loss": 3.0428, "step": 4300 }, { "epoch": 0.04, "learning_rate": 0.00028866376226145083, "loss": 3.0359, "step": 4400 }, { "epoch": 0.04, "learning_rate": 0.00028839204035975975, "loss": 3.0386, "step": 4500 }, { "epoch": 0.04, "learning_rate": 0.0002881203184580688, "loss": 3.034, "step": 4600 }, { "epoch": 0.04, "learning_rate": 0.00028784859655637776, "loss": 3.0456, "step": 4700 }, { "epoch": 0.04, "learning_rate": 0.00028757687465468673, "loss": 3.0361, "step": 4800 }, { "epoch": 0.04, "learning_rate": 0.0002873051527529957, "loss": 3.0412, "step": 4900 }, { "epoch": 0.05, "learning_rate": 0.0002870334308513047, "loss": 3.0403, "step": 5000 }, { "epoch": 0.05, "eval_accuracy": 0.43361975362410893, "eval_loss": 3.0423271656036377, "eval_runtime": 43.1088, "eval_samples_per_second": 150.387, "eval_steps_per_second": 2.528, "step": 5000 }, { "epoch": 0.05, "learning_rate": 0.0002867617089496137, "loss": 3.0341, "step": 5100 }, { "epoch": 0.05, "learning_rate": 0.00028648998704792263, "loss": 3.0297, "step": 5200 }, { "epoch": 0.05, "learning_rate": 0.00028621826514623166, "loss": 3.0324, "step": 5300 }, { "epoch": 0.05, "learning_rate": 0.00028594654324454064, "loss": 3.0317, "step": 5400 }, { "epoch": 0.05, "learning_rate": 0.0002856748213428496, "loss": 3.0167, "step": 5500 }, { "epoch": 0.05, "learning_rate": 0.0002854030994411586, "loss": 3.0202, "step": 5600 }, { "epoch": 0.05, "learning_rate": 0.00028513137753946756, "loss": 3.0231, "step": 5700 }, { "epoch": 0.05, "learning_rate": 0.0002848596556377766, "loss": 3.0166, "step": 5800 }, { "epoch": 0.05, "learning_rate": 0.0002845879337360855, "loss": 3.0246, "step": 5900 }, { "epoch": 0.05, "learning_rate": 0.00028431621183439454, "loss": 3.0172, "step": 6000 }, { "epoch": 0.05, "eval_accuracy": 0.434315993866311, "eval_loss": 3.038356304168701, "eval_runtime": 43.5763, "eval_samples_per_second": 148.774, "eval_steps_per_second": 2.501, "step": 6000 }, { "epoch": 0.06, "learning_rate": 0.0002840444899327035, "loss": 3.0123, "step": 6100 }, { "epoch": 0.06, "learning_rate": 0.0002837727680310125, "loss": 3.0177, "step": 6200 }, { "epoch": 0.06, "learning_rate": 0.0002835010461293215, "loss": 3.0195, "step": 6300 }, { "epoch": 0.06, "learning_rate": 0.0002832320414466474, "loss": 3.0175, "step": 6400 }, { "epoch": 0.06, "learning_rate": 0.0002829603195449564, "loss": 3.0192, "step": 6500 }, { "epoch": 0.06, "learning_rate": 0.00028268859764326535, "loss": 3.0079, "step": 6600 }, { "epoch": 0.06, "learning_rate": 0.0002824168757415743, "loss": 3.0138, "step": 6700 }, { "epoch": 0.06, "learning_rate": 0.0002821451538398833, "loss": 3.0175, "step": 6800 }, { "epoch": 0.06, "learning_rate": 0.00028187343193819233, "loss": 3.0148, "step": 6900 }, { "epoch": 0.06, "learning_rate": 0.00028160171003650125, "loss": 3.0102, "step": 7000 }, { "epoch": 0.06, "eval_accuracy": 0.43602120780442366, "eval_loss": 3.026742696762085, "eval_runtime": 43.2189, "eval_samples_per_second": 150.004, "eval_steps_per_second": 2.522, "step": 7000 }, { "epoch": 0.06, "learning_rate": 0.0002813299881348103, "loss": 3.0111, "step": 7100 }, { "epoch": 0.07, "learning_rate": 0.00028105826623311925, "loss": 3.0077, "step": 7200 }, { "epoch": 0.07, "learning_rate": 0.00028078654433142823, "loss": 3.0055, "step": 7300 }, { "epoch": 0.07, "learning_rate": 0.0002805148224297372, "loss": 3.0084, "step": 7400 }, { "epoch": 0.07, "learning_rate": 0.0002802431005280462, "loss": 3.0124, "step": 7500 }, { "epoch": 0.07, "learning_rate": 0.0002799713786263552, "loss": 3.0051, "step": 7600 }, { "epoch": 0.07, "learning_rate": 0.00027969965672466413, "loss": 3.0039, "step": 7700 }, { "epoch": 0.07, "learning_rate": 0.00027942793482297316, "loss": 3.0033, "step": 7800 }, { "epoch": 0.07, "learning_rate": 0.00027915621292128214, "loss": 3.0044, "step": 7900 }, { "epoch": 0.07, "learning_rate": 0.0002788844910195911, "loss": 2.9888, "step": 8000 }, { "epoch": 0.07, "eval_accuracy": 0.4361119428490199, "eval_loss": 3.0189716815948486, "eval_runtime": 43.5746, "eval_samples_per_second": 148.779, "eval_steps_per_second": 2.501, "step": 8000 }, { "epoch": 0.07, "learning_rate": 0.00027861276911790014, "loss": 3.0097, "step": 8100 }, { "epoch": 0.07, "learning_rate": 0.00027834104721620906, "loss": 3.008, "step": 8200 }, { "epoch": 0.08, "learning_rate": 0.0002780693253145181, "loss": 2.9979, "step": 8300 }, { "epoch": 0.08, "learning_rate": 0.00027779760341282707, "loss": 2.994, "step": 8400 }, { "epoch": 0.08, "learning_rate": 0.00027752588151113604, "loss": 2.9985, "step": 8500 }, { "epoch": 0.08, "learning_rate": 0.000277254159609445, "loss": 2.9966, "step": 8600 }, { "epoch": 0.08, "learning_rate": 0.000276982437707754, "loss": 2.9968, "step": 8700 }, { "epoch": 0.08, "learning_rate": 0.000276710715806063, "loss": 2.9999, "step": 8800 }, { "epoch": 0.08, "learning_rate": 0.000276438993904372, "loss": 2.9973, "step": 8900 }, { "epoch": 0.08, "learning_rate": 0.00027616727200268097, "loss": 3.0024, "step": 9000 }, { "epoch": 0.08, "eval_accuracy": 0.4384970647213073, "eval_loss": 3.0039989948272705, "eval_runtime": 44.0911, "eval_samples_per_second": 147.036, "eval_steps_per_second": 2.472, "step": 9000 }, { "epoch": 0.08, "learning_rate": 0.00027589555010098995, "loss": 3.0015, "step": 9100 }, { "epoch": 0.08, "learning_rate": 0.0002756238281992989, "loss": 2.9946, "step": 9200 }, { "epoch": 0.08, "learning_rate": 0.0002753521062976079, "loss": 2.9932, "step": 9300 }, { "epoch": 0.08, "learning_rate": 0.0002750803843959169, "loss": 2.9985, "step": 9400 }, { "epoch": 0.09, "learning_rate": 0.0002748086624942259, "loss": 2.9913, "step": 9500 }, { "epoch": 0.09, "learning_rate": 0.0002745369405925349, "loss": 2.9946, "step": 9600 }, { "epoch": 0.09, "learning_rate": 0.00027426521869084385, "loss": 2.99, "step": 9700 }, { "epoch": 0.09, "learning_rate": 0.00027399349678915283, "loss": 2.9927, "step": 9800 }, { "epoch": 0.09, "learning_rate": 0.0002737217748874618, "loss": 2.9883, "step": 9900 }, { "epoch": 0.09, "learning_rate": 0.00027345005298577083, "loss": 2.9948, "step": 10000 }, { "epoch": 0.09, "eval_accuracy": 0.43782985969337607, "eval_loss": 3.0057804584503174, "eval_runtime": 43.6419, "eval_samples_per_second": 148.55, "eval_steps_per_second": 2.498, "step": 10000 }, { "epoch": 0.09, "learning_rate": 0.0002731810483030967, "loss": 2.983, "step": 10100 }, { "epoch": 0.09, "learning_rate": 0.0002729093264014057, "loss": 2.9806, "step": 10200 }, { "epoch": 0.09, "learning_rate": 0.00027263760449971466, "loss": 2.9881, "step": 10300 }, { "epoch": 0.09, "learning_rate": 0.00027236588259802364, "loss": 2.9814, "step": 10400 }, { "epoch": 0.09, "learning_rate": 0.00027209416069633267, "loss": 2.9824, "step": 10500 }, { "epoch": 0.1, "learning_rate": 0.00027182243879464164, "loss": 2.9885, "step": 10600 }, { "epoch": 0.1, "learning_rate": 0.0002715507168929506, "loss": 2.989, "step": 10700 }, { "epoch": 0.1, "learning_rate": 0.0002712789949912596, "loss": 2.986, "step": 10800 }, { "epoch": 0.1, "learning_rate": 0.00027100727308956857, "loss": 2.9856, "step": 10900 }, { "epoch": 0.1, "learning_rate": 0.00027073555118787754, "loss": 2.9774, "step": 11000 }, { "epoch": 0.1, "eval_accuracy": 0.438878756808909, "eval_loss": 2.9962034225463867, "eval_runtime": 43.3441, "eval_samples_per_second": 149.57, "eval_steps_per_second": 2.515, "step": 11000 }, { "epoch": 0.1, "learning_rate": 0.0002704638292861865, "loss": 2.9941, "step": 11100 }, { "epoch": 0.1, "learning_rate": 0.00027019210738449555, "loss": 2.9799, "step": 11200 }, { "epoch": 0.1, "learning_rate": 0.0002699203854828045, "loss": 2.9834, "step": 11300 }, { "epoch": 0.1, "learning_rate": 0.0002696513808001304, "loss": 2.9767, "step": 11400 }, { "epoch": 0.1, "learning_rate": 0.0002693796588984394, "loss": 2.9772, "step": 11500 }, { "epoch": 0.1, "learning_rate": 0.0002691079369967484, "loss": 2.9891, "step": 11600 }, { "epoch": 0.11, "learning_rate": 0.0002688362150950573, "loss": 2.9787, "step": 11700 }, { "epoch": 0.11, "learning_rate": 0.00026856449319336635, "loss": 2.987, "step": 11800 }, { "epoch": 0.11, "learning_rate": 0.00026829277129167533, "loss": 2.979, "step": 11900 }, { "epoch": 0.11, "learning_rate": 0.0002680210493899843, "loss": 2.9818, "step": 12000 }, { "epoch": 0.11, "eval_accuracy": 0.4390166740766953, "eval_loss": 2.9964208602905273, "eval_runtime": 44.0294, "eval_samples_per_second": 147.242, "eval_steps_per_second": 2.476, "step": 12000 }, { "epoch": 0.11, "learning_rate": 0.0002677493274882933, "loss": 2.9798, "step": 12100 }, { "epoch": 0.11, "learning_rate": 0.00026747760558660225, "loss": 2.9833, "step": 12200 }, { "epoch": 0.11, "learning_rate": 0.0002672058836849113, "loss": 2.9787, "step": 12300 }, { "epoch": 0.11, "learning_rate": 0.00026693416178322026, "loss": 2.9807, "step": 12400 }, { "epoch": 0.11, "learning_rate": 0.00026666515710054614, "loss": 2.9846, "step": 12500 }, { "epoch": 0.11, "learning_rate": 0.0002663934351988551, "loss": 2.9758, "step": 12600 }, { "epoch": 0.11, "learning_rate": 0.00026612171329716414, "loss": 2.9749, "step": 12700 }, { "epoch": 0.12, "learning_rate": 0.00026584999139547306, "loss": 2.9688, "step": 12800 }, { "epoch": 0.12, "learning_rate": 0.0002655782694937821, "loss": 2.9886, "step": 12900 }, { "epoch": 0.12, "learning_rate": 0.00026530654759209107, "loss": 2.9771, "step": 13000 }, { "epoch": 0.12, "eval_accuracy": 0.4395580598427864, "eval_loss": 2.991270065307617, "eval_runtime": 43.0298, "eval_samples_per_second": 150.663, "eval_steps_per_second": 2.533, "step": 13000 }, { "epoch": 0.12, "learning_rate": 0.00026503482569040004, "loss": 2.9802, "step": 13100 }, { "epoch": 0.12, "learning_rate": 0.000264763103788709, "loss": 2.9711, "step": 13200 }, { "epoch": 0.12, "learning_rate": 0.000264491381887018, "loss": 2.9845, "step": 13300 }, { "epoch": 0.12, "learning_rate": 0.000264219659985327, "loss": 2.9735, "step": 13400 }, { "epoch": 0.12, "learning_rate": 0.00026394793808363594, "loss": 2.9731, "step": 13500 }, { "epoch": 0.12, "learning_rate": 0.00026367621618194497, "loss": 2.9717, "step": 13600 }, { "epoch": 0.12, "learning_rate": 0.00026340449428025395, "loss": 2.9718, "step": 13700 }, { "epoch": 0.12, "learning_rate": 0.0002631327723785629, "loss": 2.9766, "step": 13800 }, { "epoch": 0.13, "learning_rate": 0.0002628637676958888, "loss": 2.9812, "step": 13900 }, { "epoch": 0.13, "learning_rate": 0.00026259204579419783, "loss": 2.9786, "step": 14000 }, { "epoch": 0.13, "eval_accuracy": 0.43911829732664315, "eval_loss": 2.9915201663970947, "eval_runtime": 43.7467, "eval_samples_per_second": 148.194, "eval_steps_per_second": 2.492, "step": 14000 }, { "epoch": 0.13, "learning_rate": 0.0002623203238925068, "loss": 2.9757, "step": 14100 }, { "epoch": 0.13, "learning_rate": 0.0002620486019908158, "loss": 2.9781, "step": 14200 }, { "epoch": 0.13, "learning_rate": 0.00026177959730814166, "loss": 2.9733, "step": 14300 }, { "epoch": 0.13, "learning_rate": 0.0002615078754064507, "loss": 2.9773, "step": 14400 }, { "epoch": 0.13, "learning_rate": 0.0002612361535047596, "loss": 2.9755, "step": 14500 }, { "epoch": 0.13, "learning_rate": 0.00026096443160306864, "loss": 2.9837, "step": 14600 }, { "epoch": 0.13, "learning_rate": 0.0002606927097013776, "loss": 2.9786, "step": 14700 }, { "epoch": 0.13, "learning_rate": 0.0002604209877996866, "loss": 2.9709, "step": 14800 }, { "epoch": 0.13, "learning_rate": 0.00026014926589799556, "loss": 2.9797, "step": 14900 }, { "epoch": 0.14, "learning_rate": 0.00025987754399630454, "loss": 2.9866, "step": 15000 }, { "epoch": 0.14, "eval_accuracy": 0.4393814289559723, "eval_loss": 2.9924139976501465, "eval_runtime": 43.2705, "eval_samples_per_second": 149.825, "eval_steps_per_second": 2.519, "step": 15000 }, { "epoch": 0.14, "learning_rate": 0.00025960582209461357, "loss": 2.976, "step": 15100 }, { "epoch": 0.14, "learning_rate": 0.0002593341001929225, "loss": 2.9674, "step": 15200 }, { "epoch": 0.14, "learning_rate": 0.0002590623782912315, "loss": 2.98, "step": 15300 }, { "epoch": 0.14, "learning_rate": 0.0002587906563895405, "loss": 2.9805, "step": 15400 }, { "epoch": 0.14, "learning_rate": 0.00025851893448784947, "loss": 2.9738, "step": 15500 }, { "epoch": 0.14, "learning_rate": 0.00025824721258615844, "loss": 2.9702, "step": 15600 }, { "epoch": 0.14, "learning_rate": 0.0002579754906844674, "loss": 2.9678, "step": 15700 }, { "epoch": 0.14, "learning_rate": 0.00025770376878277645, "loss": 2.9699, "step": 15800 }, { "epoch": 0.14, "learning_rate": 0.0002574320468810854, "loss": 2.9717, "step": 15900 }, { "epoch": 0.14, "learning_rate": 0.0002571603249793944, "loss": 2.9751, "step": 16000 }, { "epoch": 0.14, "eval_accuracy": 0.43892109982972055, "eval_loss": 2.9917728900909424, "eval_runtime": 44.2385, "eval_samples_per_second": 146.547, "eval_steps_per_second": 2.464, "step": 16000 }, { "epoch": 0.15, "learning_rate": 0.0002568886030777034, "loss": 2.9653, "step": 16100 }, { "epoch": 0.15, "learning_rate": 0.00025661688117601235, "loss": 2.9817, "step": 16200 }, { "epoch": 0.15, "learning_rate": 0.0002563451592743214, "loss": 2.9652, "step": 16300 }, { "epoch": 0.15, "learning_rate": 0.00025607343737263035, "loss": 2.9704, "step": 16400 }, { "epoch": 0.15, "learning_rate": 0.00025580171547093933, "loss": 2.9727, "step": 16500 }, { "epoch": 0.15, "learning_rate": 0.0002555299935692483, "loss": 2.9743, "step": 16600 }, { "epoch": 0.15, "learning_rate": 0.0002552582716675573, "loss": 2.9719, "step": 16700 }, { "epoch": 0.15, "learning_rate": 0.00025498654976586626, "loss": 2.9615, "step": 16800 }, { "epoch": 0.15, "learning_rate": 0.00025471482786417523, "loss": 2.973, "step": 16900 }, { "epoch": 0.15, "learning_rate": 0.00025444310596248426, "loss": 2.9702, "step": 17000 }, { "epoch": 0.15, "eval_accuracy": 0.4393215438265388, "eval_loss": 2.992605447769165, "eval_runtime": 45.7096, "eval_samples_per_second": 141.83, "eval_steps_per_second": 2.385, "step": 17000 }, { "epoch": 0.15, "learning_rate": 0.00025417410127981014, "loss": 2.9689, "step": 17100 }, { "epoch": 0.16, "learning_rate": 0.0002539023793781191, "loss": 2.9727, "step": 17200 }, { "epoch": 0.16, "learning_rate": 0.0002536306574764281, "loss": 2.9669, "step": 17300 }, { "epoch": 0.16, "learning_rate": 0.00025335893557473706, "loss": 2.9717, "step": 17400 }, { "epoch": 0.16, "learning_rate": 0.0002530872136730461, "loss": 2.9646, "step": 17500 }, { "epoch": 0.16, "learning_rate": 0.00025281549177135507, "loss": 2.9757, "step": 17600 }, { "epoch": 0.16, "learning_rate": 0.00025254376986966404, "loss": 2.9679, "step": 17700 }, { "epoch": 0.16, "learning_rate": 0.000252272047967973, "loss": 2.9691, "step": 17800 }, { "epoch": 0.16, "learning_rate": 0.000252000326066282, "loss": 2.9718, "step": 17900 }, { "epoch": 0.16, "learning_rate": 0.00025172860416459097, "loss": 2.9695, "step": 18000 }, { "epoch": 0.16, "eval_accuracy": 0.44013513472641874, "eval_loss": 2.981644868850708, "eval_runtime": 43.6409, "eval_samples_per_second": 148.553, "eval_steps_per_second": 2.498, "step": 18000 }, { "epoch": 0.16, "learning_rate": 0.0002514568822629, "loss": 2.9666, "step": 18100 }, { "epoch": 0.16, "learning_rate": 0.0002511878775802259, "loss": 2.9696, "step": 18200 }, { "epoch": 0.17, "learning_rate": 0.00025091615567853485, "loss": 2.9687, "step": 18300 }, { "epoch": 0.17, "learning_rate": 0.0002506444337768438, "loss": 2.9674, "step": 18400 }, { "epoch": 0.17, "learning_rate": 0.0002503727118751528, "loss": 2.9655, "step": 18500 }, { "epoch": 0.17, "learning_rate": 0.00025010098997346183, "loss": 2.9661, "step": 18600 }, { "epoch": 0.17, "learning_rate": 0.0002498292680717708, "loss": 2.9673, "step": 18700 }, { "epoch": 0.17, "learning_rate": 0.0002495575461700798, "loss": 2.9641, "step": 18800 }, { "epoch": 0.17, "learning_rate": 0.00024928582426838876, "loss": 2.9598, "step": 18900 }, { "epoch": 0.17, "learning_rate": 0.00024901410236669773, "loss": 2.9615, "step": 19000 }, { "epoch": 0.17, "eval_accuracy": 0.44022042566833924, "eval_loss": 2.982591390609741, "eval_runtime": 43.6998, "eval_samples_per_second": 148.353, "eval_steps_per_second": 2.494, "step": 19000 }, { "epoch": 0.17, "learning_rate": 0.00024874238046500676, "loss": 2.958, "step": 19100 }, { "epoch": 0.17, "learning_rate": 0.0002484706585633157, "loss": 2.9688, "step": 19200 }, { "epoch": 0.17, "learning_rate": 0.0002481989366616247, "loss": 2.9603, "step": 19300 }, { "epoch": 0.18, "learning_rate": 0.0002479272147599337, "loss": 2.9625, "step": 19400 }, { "epoch": 0.18, "learning_rate": 0.00024765549285824266, "loss": 2.9611, "step": 19500 }, { "epoch": 0.18, "learning_rate": 0.00024738377095655164, "loss": 2.9594, "step": 19600 }, { "epoch": 0.18, "learning_rate": 0.0002471120490548606, "loss": 2.9648, "step": 19700 }, { "epoch": 0.18, "learning_rate": 0.00024684032715316964, "loss": 2.961, "step": 19800 }, { "epoch": 0.18, "learning_rate": 0.0002465686052514786, "loss": 2.9589, "step": 19900 }, { "epoch": 0.18, "learning_rate": 0.0002462968833497876, "loss": 2.9609, "step": 20000 }, { "epoch": 0.18, "eval_accuracy": 0.4406414362752659, "eval_loss": 2.9791083335876465, "eval_runtime": 44.2323, "eval_samples_per_second": 146.567, "eval_steps_per_second": 2.464, "step": 20000 }, { "epoch": 0.18, "learning_rate": 0.00024602516144809657, "loss": 2.962, "step": 20100 }, { "epoch": 0.18, "learning_rate": 0.00024575343954640554, "loss": 2.9566, "step": 20200 }, { "epoch": 0.18, "learning_rate": 0.00024548171764471457, "loss": 2.964, "step": 20300 }, { "epoch": 0.18, "learning_rate": 0.0002452099957430235, "loss": 2.9573, "step": 20400 }, { "epoch": 0.19, "learning_rate": 0.0002449409910603494, "loss": 2.9621, "step": 20500 }, { "epoch": 0.19, "learning_rate": 0.0002446692691586584, "loss": 2.9568, "step": 20600 }, { "epoch": 0.19, "learning_rate": 0.0002443975472569674, "loss": 2.9643, "step": 20700 }, { "epoch": 0.19, "learning_rate": 0.00024412582535527635, "loss": 2.9614, "step": 20800 }, { "epoch": 0.19, "learning_rate": 0.00024385410345358535, "loss": 2.9546, "step": 20900 }, { "epoch": 0.19, "learning_rate": 0.00024358238155189433, "loss": 2.9607, "step": 21000 }, { "epoch": 0.19, "eval_accuracy": 0.44158508073906716, "eval_loss": 2.9684245586395264, "eval_runtime": 43.092, "eval_samples_per_second": 150.446, "eval_steps_per_second": 2.529, "step": 21000 }, { "epoch": 0.19, "learning_rate": 0.00024331065965020333, "loss": 2.9608, "step": 21100 }, { "epoch": 0.19, "learning_rate": 0.00024303893774851228, "loss": 2.9556, "step": 21200 }, { "epoch": 0.19, "learning_rate": 0.00024276721584682128, "loss": 2.9579, "step": 21300 }, { "epoch": 0.19, "learning_rate": 0.00024249549394513028, "loss": 2.9585, "step": 21400 }, { "epoch": 0.19, "learning_rate": 0.00024222377204343926, "loss": 2.9544, "step": 21500 }, { "epoch": 0.2, "learning_rate": 0.00024195205014174823, "loss": 2.9614, "step": 21600 }, { "epoch": 0.2, "learning_rate": 0.0002416803282400572, "loss": 2.9536, "step": 21700 }, { "epoch": 0.2, "learning_rate": 0.00024141132355738314, "loss": 2.9556, "step": 21800 }, { "epoch": 0.2, "learning_rate": 0.0002411396016556921, "loss": 2.9559, "step": 21900 }, { "epoch": 0.2, "learning_rate": 0.0002408678797540011, "loss": 2.9533, "step": 22000 }, { "epoch": 0.2, "eval_accuracy": 0.4422038937432138, "eval_loss": 2.967719554901123, "eval_runtime": 42.9224, "eval_samples_per_second": 151.04, "eval_steps_per_second": 2.539, "step": 22000 }, { "epoch": 0.2, "learning_rate": 0.00024059615785231007, "loss": 2.9493, "step": 22100 }, { "epoch": 0.2, "learning_rate": 0.00024032443595061907, "loss": 2.9543, "step": 22200 }, { "epoch": 0.2, "learning_rate": 0.00024005271404892804, "loss": 2.9565, "step": 22300 }, { "epoch": 0.2, "learning_rate": 0.00023978099214723702, "loss": 2.9501, "step": 22400 }, { "epoch": 0.2, "learning_rate": 0.00023950927024554602, "loss": 2.9395, "step": 22500 }, { "epoch": 0.2, "learning_rate": 0.00023923754834385497, "loss": 2.9598, "step": 22600 }, { "epoch": 0.21, "learning_rate": 0.00023896854366118087, "loss": 2.9492, "step": 22700 }, { "epoch": 0.21, "learning_rate": 0.00023869682175948988, "loss": 2.947, "step": 22800 }, { "epoch": 0.21, "learning_rate": 0.00023842509985779885, "loss": 2.9573, "step": 22900 }, { "epoch": 0.21, "learning_rate": 0.00023815337795610783, "loss": 2.9513, "step": 23000 }, { "epoch": 0.21, "eval_accuracy": 0.4420853332849413, "eval_loss": 2.9676427841186523, "eval_runtime": 43.0836, "eval_samples_per_second": 150.475, "eval_steps_per_second": 2.53, "step": 23000 }, { "epoch": 0.21, "learning_rate": 0.00023788165605441683, "loss": 2.9472, "step": 23100 }, { "epoch": 0.21, "learning_rate": 0.00023760993415272578, "loss": 2.9513, "step": 23200 }, { "epoch": 0.21, "learning_rate": 0.00023733821225103478, "loss": 2.9542, "step": 23300 }, { "epoch": 0.21, "learning_rate": 0.00023706649034934378, "loss": 2.9497, "step": 23400 }, { "epoch": 0.21, "learning_rate": 0.00023679476844765276, "loss": 2.9565, "step": 23500 }, { "epoch": 0.21, "learning_rate": 0.00023652576376497863, "loss": 2.9518, "step": 23600 }, { "epoch": 0.21, "learning_rate": 0.00023625404186328764, "loss": 2.9471, "step": 23700 }, { "epoch": 0.22, "learning_rate": 0.00023598231996159664, "loss": 2.956, "step": 23800 }, { "epoch": 0.22, "learning_rate": 0.00023571059805990559, "loss": 2.953, "step": 23900 }, { "epoch": 0.22, "learning_rate": 0.0002354388761582146, "loss": 2.9563, "step": 24000 }, { "epoch": 0.22, "eval_accuracy": 0.4428523468619285, "eval_loss": 2.9609880447387695, "eval_runtime": 42.9764, "eval_samples_per_second": 150.85, "eval_steps_per_second": 2.536, "step": 24000 }, { "epoch": 0.22, "learning_rate": 0.00023516715425652356, "loss": 2.9458, "step": 24100 }, { "epoch": 0.22, "learning_rate": 0.00023489543235483257, "loss": 2.9539, "step": 24200 }, { "epoch": 0.22, "learning_rate": 0.00023462371045314151, "loss": 2.9549, "step": 24300 }, { "epoch": 0.22, "learning_rate": 0.00023435198855145052, "loss": 2.9496, "step": 24400 }, { "epoch": 0.22, "learning_rate": 0.00023408026664975952, "loss": 2.9514, "step": 24500 }, { "epoch": 0.22, "learning_rate": 0.0002338085447480685, "loss": 2.9471, "step": 24600 }, { "epoch": 0.22, "learning_rate": 0.00023353682284637747, "loss": 2.9448, "step": 24700 }, { "epoch": 0.22, "learning_rate": 0.00023326510094468644, "loss": 2.948, "step": 24800 }, { "epoch": 0.23, "learning_rate": 0.00023299337904299545, "loss": 2.9454, "step": 24900 }, { "epoch": 0.23, "learning_rate": 0.00023272165714130445, "loss": 2.9466, "step": 25000 }, { "epoch": 0.23, "eval_accuracy": 0.44241621374756906, "eval_loss": 2.9626522064208984, "eval_runtime": 43.5013, "eval_samples_per_second": 149.03, "eval_steps_per_second": 2.506, "step": 25000 }, { "epoch": 0.23, "learning_rate": 0.0002324499352396134, "loss": 2.9417, "step": 25100 }, { "epoch": 0.23, "learning_rate": 0.0002321782133379224, "loss": 2.9452, "step": 25200 }, { "epoch": 0.23, "learning_rate": 0.00023190649143623138, "loss": 2.9406, "step": 25300 }, { "epoch": 0.23, "learning_rate": 0.00023163476953454038, "loss": 2.945, "step": 25400 }, { "epoch": 0.23, "learning_rate": 0.00023136304763284933, "loss": 2.9419, "step": 25500 }, { "epoch": 0.23, "learning_rate": 0.00023109132573115833, "loss": 2.9452, "step": 25600 }, { "epoch": 0.23, "learning_rate": 0.00023081960382946733, "loss": 2.9435, "step": 25700 }, { "epoch": 0.23, "learning_rate": 0.00023054788192777628, "loss": 2.947, "step": 25800 }, { "epoch": 0.23, "learning_rate": 0.00023027616002608528, "loss": 2.9343, "step": 25900 }, { "epoch": 0.24, "learning_rate": 0.00023000443812439426, "loss": 2.9431, "step": 26000 }, { "epoch": 0.24, "eval_accuracy": 0.442384758932109, "eval_loss": 2.9589717388153076, "eval_runtime": 43.1206, "eval_samples_per_second": 150.346, "eval_steps_per_second": 2.528, "step": 26000 }, { "epoch": 0.24, "learning_rate": 0.00022973271622270326, "loss": 2.9431, "step": 26100 }, { "epoch": 0.24, "learning_rate": 0.00022946099432101226, "loss": 2.9477, "step": 26200 }, { "epoch": 0.24, "learning_rate": 0.0002291892724193212, "loss": 2.939, "step": 26300 }, { "epoch": 0.24, "learning_rate": 0.0002289175505176302, "loss": 2.9385, "step": 26400 }, { "epoch": 0.24, "learning_rate": 0.0002286458286159392, "loss": 2.944, "step": 26500 }, { "epoch": 0.24, "learning_rate": 0.00022837410671424816, "loss": 2.9404, "step": 26600 }, { "epoch": 0.24, "learning_rate": 0.00022810238481255716, "loss": 2.9334, "step": 26700 }, { "epoch": 0.24, "learning_rate": 0.00022783066291086614, "loss": 2.9419, "step": 26800 }, { "epoch": 0.24, "learning_rate": 0.00022755894100917514, "loss": 2.9432, "step": 26900 }, { "epoch": 0.24, "learning_rate": 0.00022728993632650102, "loss": 2.9412, "step": 27000 }, { "epoch": 0.24, "eval_accuracy": 0.4435655243124552, "eval_loss": 2.952514410018921, "eval_runtime": 43.0804, "eval_samples_per_second": 150.486, "eval_steps_per_second": 2.53, "step": 27000 }, { "epoch": 0.25, "learning_rate": 0.00022701821442481, "loss": 2.9359, "step": 27100 }, { "epoch": 0.25, "learning_rate": 0.000226746492523119, "loss": 2.9426, "step": 27200 }, { "epoch": 0.25, "learning_rate": 0.00022647477062142797, "loss": 2.9307, "step": 27300 }, { "epoch": 0.25, "learning_rate": 0.00022620304871973695, "loss": 2.9353, "step": 27400 }, { "epoch": 0.25, "learning_rate": 0.00022593132681804595, "loss": 2.9353, "step": 27500 }, { "epoch": 0.25, "learning_rate": 0.0002256596049163549, "loss": 2.9403, "step": 27600 }, { "epoch": 0.25, "learning_rate": 0.0002253878830146639, "loss": 2.9393, "step": 27700 }, { "epoch": 0.25, "learning_rate": 0.0002251161611129729, "loss": 2.9313, "step": 27800 }, { "epoch": 0.25, "learning_rate": 0.00022484443921128188, "loss": 2.9348, "step": 27900 }, { "epoch": 0.25, "learning_rate": 0.00022457543452860775, "loss": 2.9299, "step": 28000 }, { "epoch": 0.25, "eval_accuracy": 0.4434947509776701, "eval_loss": 2.9504144191741943, "eval_runtime": 43.7459, "eval_samples_per_second": 148.197, "eval_steps_per_second": 2.492, "step": 28000 }, { "epoch": 0.25, "learning_rate": 0.00022430371262691676, "loss": 2.938, "step": 28100 }, { "epoch": 0.25, "learning_rate": 0.00022403199072522576, "loss": 2.9353, "step": 28200 }, { "epoch": 0.26, "learning_rate": 0.00022376298604255164, "loss": 2.9329, "step": 28300 }, { "epoch": 0.26, "learning_rate": 0.0002234912641408606, "loss": 2.9311, "step": 28400 }, { "epoch": 0.26, "learning_rate": 0.0002232195422391696, "loss": 2.9377, "step": 28500 }, { "epoch": 0.26, "learning_rate": 0.00022294782033747856, "loss": 2.9303, "step": 28600 }, { "epoch": 0.26, "learning_rate": 0.00022267609843578756, "loss": 2.9278, "step": 28700 }, { "epoch": 0.26, "learning_rate": 0.00022240437653409657, "loss": 2.9394, "step": 28800 }, { "epoch": 0.26, "learning_rate": 0.00022213265463240551, "loss": 2.9332, "step": 28900 }, { "epoch": 0.26, "learning_rate": 0.00022186093273071452, "loss": 2.9332, "step": 29000 }, { "epoch": 0.26, "eval_accuracy": 0.4434814431711293, "eval_loss": 2.9485716819763184, "eval_runtime": 41.8653, "eval_samples_per_second": 154.854, "eval_steps_per_second": 2.604, "step": 29000 }, { "epoch": 0.26, "learning_rate": 0.0002215892108290235, "loss": 2.9339, "step": 29100 }, { "epoch": 0.26, "learning_rate": 0.0002213174889273325, "loss": 2.9322, "step": 29200 }, { "epoch": 0.26, "learning_rate": 0.0002210457670256415, "loss": 2.9305, "step": 29300 }, { "epoch": 0.27, "learning_rate": 0.00022077404512395044, "loss": 2.9321, "step": 29400 }, { "epoch": 0.27, "learning_rate": 0.00022050232322225945, "loss": 2.9265, "step": 29500 }, { "epoch": 0.27, "learning_rate": 0.00022023331853958532, "loss": 2.9247, "step": 29600 }, { "epoch": 0.27, "learning_rate": 0.00021996159663789433, "loss": 2.9312, "step": 29700 }, { "epoch": 0.27, "learning_rate": 0.0002196898747362033, "loss": 2.9288, "step": 29800 }, { "epoch": 0.27, "learning_rate": 0.0002194181528345123, "loss": 2.9328, "step": 29900 }, { "epoch": 0.27, "learning_rate": 0.00021914643093282125, "loss": 2.9255, "step": 30000 }, { "epoch": 0.27, "eval_accuracy": 0.444235753841873, "eval_loss": 2.942479372024536, "eval_runtime": 41.7184, "eval_samples_per_second": 155.399, "eval_steps_per_second": 2.613, "step": 30000 }, { "epoch": 0.27, "learning_rate": 0.00021887470903113025, "loss": 2.9265, "step": 30100 }, { "epoch": 0.27, "learning_rate": 0.00021860298712943923, "loss": 2.9184, "step": 30200 }, { "epoch": 0.27, "learning_rate": 0.00021833126522774823, "loss": 2.9271, "step": 30300 }, { "epoch": 0.27, "learning_rate": 0.0002180622605450741, "loss": 2.9232, "step": 30400 }, { "epoch": 0.28, "learning_rate": 0.0002177905386433831, "loss": 2.9303, "step": 30500 }, { "epoch": 0.28, "learning_rate": 0.00021751881674169206, "loss": 2.9348, "step": 30600 }, { "epoch": 0.28, "learning_rate": 0.00021724709484000106, "loss": 2.9218, "step": 30700 }, { "epoch": 0.28, "learning_rate": 0.00021697537293831006, "loss": 2.9324, "step": 30800 }, { "epoch": 0.28, "learning_rate": 0.00021670365103661904, "loss": 2.9294, "step": 30900 }, { "epoch": 0.28, "learning_rate": 0.00021643192913492801, "loss": 2.9242, "step": 31000 }, { "epoch": 0.28, "eval_accuracy": 0.44344756875448005, "eval_loss": 2.945934534072876, "eval_runtime": 43.5276, "eval_samples_per_second": 148.94, "eval_steps_per_second": 2.504, "step": 31000 }, { "epoch": 0.28, "learning_rate": 0.000216160207233237, "loss": 2.9231, "step": 31100 }, { "epoch": 0.28, "learning_rate": 0.000215888485331546, "loss": 2.9269, "step": 31200 }, { "epoch": 0.28, "learning_rate": 0.000215616763429855, "loss": 2.9247, "step": 31300 }, { "epoch": 0.28, "learning_rate": 0.00021534504152816394, "loss": 2.9236, "step": 31400 }, { "epoch": 0.28, "learning_rate": 0.00021507331962647295, "loss": 2.9296, "step": 31500 }, { "epoch": 0.29, "learning_rate": 0.00021480159772478192, "loss": 2.9267, "step": 31600 }, { "epoch": 0.29, "learning_rate": 0.00021452987582309092, "loss": 2.9259, "step": 31700 }, { "epoch": 0.29, "learning_rate": 0.00021425815392139987, "loss": 2.9259, "step": 31800 }, { "epoch": 0.29, "learning_rate": 0.00021398643201970887, "loss": 2.9236, "step": 31900 }, { "epoch": 0.29, "learning_rate": 0.00021371471011801788, "loss": 2.9242, "step": 32000 }, { "epoch": 0.29, "eval_accuracy": 0.4445194520813107, "eval_loss": 2.9377670288085938, "eval_runtime": 43.8729, "eval_samples_per_second": 147.768, "eval_steps_per_second": 2.484, "step": 32000 }, { "epoch": 0.29, "learning_rate": 0.00021344298821632685, "loss": 2.9178, "step": 32100 }, { "epoch": 0.29, "learning_rate": 0.00021317126631463583, "loss": 2.9257, "step": 32200 }, { "epoch": 0.29, "learning_rate": 0.0002128995444129448, "loss": 2.9227, "step": 32300 }, { "epoch": 0.29, "learning_rate": 0.0002126278225112538, "loss": 2.9228, "step": 32400 }, { "epoch": 0.29, "learning_rate": 0.0002123561006095628, "loss": 2.9183, "step": 32500 }, { "epoch": 0.29, "learning_rate": 0.00021208437870787175, "loss": 2.9196, "step": 32600 }, { "epoch": 0.3, "learning_rate": 0.00021181265680618076, "loss": 2.9143, "step": 32700 }, { "epoch": 0.3, "learning_rate": 0.00021154093490448973, "loss": 2.9192, "step": 32800 }, { "epoch": 0.3, "learning_rate": 0.0002112692130027987, "loss": 2.9187, "step": 32900 }, { "epoch": 0.3, "learning_rate": 0.00021099749110110768, "loss": 2.9267, "step": 33000 }, { "epoch": 0.3, "eval_accuracy": 0.4452544059425405, "eval_loss": 2.9316306114196777, "eval_runtime": 45.514, "eval_samples_per_second": 142.44, "eval_steps_per_second": 2.395, "step": 33000 }, { "epoch": 0.3, "learning_rate": 0.00021072576919941669, "loss": 2.9169, "step": 33100 }, { "epoch": 0.3, "learning_rate": 0.0002104540472977257, "loss": 2.9219, "step": 33200 }, { "epoch": 0.3, "learning_rate": 0.00021018232539603464, "loss": 2.9096, "step": 33300 }, { "epoch": 0.3, "learning_rate": 0.00020991060349434364, "loss": 2.9202, "step": 33400 }, { "epoch": 0.3, "learning_rate": 0.0002096388815926526, "loss": 2.9241, "step": 33500 }, { "epoch": 0.3, "learning_rate": 0.00020936987690997852, "loss": 2.9148, "step": 33600 }, { "epoch": 0.3, "learning_rate": 0.0002090981550082875, "loss": 2.9194, "step": 33700 }, { "epoch": 0.31, "learning_rate": 0.0002088264331065965, "loss": 2.9267, "step": 33800 }, { "epoch": 0.31, "learning_rate": 0.00020855471120490544, "loss": 2.9164, "step": 33900 }, { "epoch": 0.31, "learning_rate": 0.00020828298930321445, "loss": 2.9151, "step": 34000 }, { "epoch": 0.31, "eval_accuracy": 0.44544252993500344, "eval_loss": 2.931532382965088, "eval_runtime": 43.496, "eval_samples_per_second": 149.048, "eval_steps_per_second": 2.506, "step": 34000 }, { "epoch": 0.31, "learning_rate": 0.00020801126740152345, "loss": 2.9178, "step": 34100 }, { "epoch": 0.31, "learning_rate": 0.00020773954549983242, "loss": 2.9119, "step": 34200 }, { "epoch": 0.31, "learning_rate": 0.00020746782359814143, "loss": 2.9143, "step": 34300 }, { "epoch": 0.31, "learning_rate": 0.00020719610169645037, "loss": 2.9084, "step": 34400 }, { "epoch": 0.31, "learning_rate": 0.00020692437979475938, "loss": 2.9227, "step": 34500 }, { "epoch": 0.31, "learning_rate": 0.00020665265789306835, "loss": 2.9159, "step": 34600 }, { "epoch": 0.31, "learning_rate": 0.00020638365321039425, "loss": 2.9151, "step": 34700 }, { "epoch": 0.31, "learning_rate": 0.00020611193130870323, "loss": 2.9218, "step": 34800 }, { "epoch": 0.32, "learning_rate": 0.00020584020940701223, "loss": 2.9169, "step": 34900 }, { "epoch": 0.32, "learning_rate": 0.00020556848750532118, "loss": 2.9105, "step": 35000 }, { "epoch": 0.32, "eval_accuracy": 0.4455647197950598, "eval_loss": 2.928622245788574, "eval_runtime": 45.1155, "eval_samples_per_second": 143.698, "eval_steps_per_second": 2.416, "step": 35000 }, { "epoch": 0.32, "learning_rate": 0.00020529676560363018, "loss": 2.9135, "step": 35100 }, { "epoch": 0.32, "learning_rate": 0.00020502504370193919, "loss": 2.9099, "step": 35200 }, { "epoch": 0.32, "learning_rate": 0.00020475332180024816, "loss": 2.9114, "step": 35300 }, { "epoch": 0.32, "learning_rate": 0.00020448159989855714, "loss": 2.9169, "step": 35400 }, { "epoch": 0.32, "learning_rate": 0.0002042098779968661, "loss": 2.9098, "step": 35500 }, { "epoch": 0.32, "learning_rate": 0.00020393815609517511, "loss": 2.9126, "step": 35600 }, { "epoch": 0.32, "learning_rate": 0.00020366643419348412, "loss": 2.9095, "step": 35700 }, { "epoch": 0.32, "learning_rate": 0.00020339471229179306, "loss": 2.9086, "step": 35800 }, { "epoch": 0.32, "learning_rate": 0.00020312299039010207, "loss": 2.9077, "step": 35900 }, { "epoch": 0.33, "learning_rate": 0.00020285126848841104, "loss": 2.9053, "step": 36000 }, { "epoch": 0.33, "eval_accuracy": 0.4457353016789008, "eval_loss": 2.924194097518921, "eval_runtime": 41.9708, "eval_samples_per_second": 154.464, "eval_steps_per_second": 2.597, "step": 36000 }, { "epoch": 0.33, "learning_rate": 0.00020257954658672004, "loss": 2.9099, "step": 36100 }, { "epoch": 0.33, "learning_rate": 0.000202307824685029, "loss": 2.9118, "step": 36200 }, { "epoch": 0.33, "learning_rate": 0.000202036102783338, "loss": 2.91, "step": 36300 }, { "epoch": 0.33, "learning_rate": 0.000201764380881647, "loss": 2.8983, "step": 36400 }, { "epoch": 0.33, "learning_rate": 0.00020149265897995595, "loss": 2.8964, "step": 36500 }, { "epoch": 0.33, "learning_rate": 0.00020122093707826495, "loss": 2.9024, "step": 36600 }, { "epoch": 0.33, "learning_rate": 0.00020095193239559085, "loss": 2.9057, "step": 36700 }, { "epoch": 0.33, "learning_rate": 0.00020068021049389983, "loss": 2.9094, "step": 36800 }, { "epoch": 0.33, "learning_rate": 0.0002004084885922088, "loss": 2.9071, "step": 36900 }, { "epoch": 0.33, "learning_rate": 0.0002001367666905178, "loss": 2.9023, "step": 37000 }, { "epoch": 0.33, "eval_accuracy": 0.44664325702516083, "eval_loss": 2.9194602966308594, "eval_runtime": 42.9573, "eval_samples_per_second": 150.917, "eval_steps_per_second": 2.537, "step": 37000 }, { "epoch": 0.34, "learning_rate": 0.00019986504478882678, "loss": 2.9047, "step": 37100 }, { "epoch": 0.34, "learning_rate": 0.00019959332288713575, "loss": 2.9097, "step": 37200 }, { "epoch": 0.34, "learning_rate": 0.00019932160098544476, "loss": 2.908, "step": 37300 }, { "epoch": 0.34, "learning_rate": 0.00019905259630277066, "loss": 2.9019, "step": 37400 }, { "epoch": 0.34, "learning_rate": 0.0001987808744010796, "loss": 2.9105, "step": 37500 }, { "epoch": 0.34, "learning_rate": 0.0001985091524993886, "loss": 2.9064, "step": 37600 }, { "epoch": 0.34, "learning_rate": 0.0001982401478167145, "loss": 2.9053, "step": 37700 }, { "epoch": 0.34, "learning_rate": 0.0001979684259150235, "loss": 2.906, "step": 37800 }, { "epoch": 0.34, "learning_rate": 0.00019769670401333247, "loss": 2.8997, "step": 37900 }, { "epoch": 0.34, "learning_rate": 0.00019742498211164147, "loss": 2.8946, "step": 38000 }, { "epoch": 0.34, "eval_accuracy": 0.4468059752051368, "eval_loss": 2.917731285095215, "eval_runtime": 43.2928, "eval_samples_per_second": 149.748, "eval_steps_per_second": 2.518, "step": 38000 }, { "epoch": 0.34, "learning_rate": 0.00019715326020995042, "loss": 2.9018, "step": 38100 }, { "epoch": 0.35, "learning_rate": 0.00019688153830825942, "loss": 2.8969, "step": 38200 }, { "epoch": 0.35, "learning_rate": 0.00019660981640656842, "loss": 2.9104, "step": 38300 }, { "epoch": 0.35, "learning_rate": 0.0001963380945048774, "loss": 2.9057, "step": 38400 }, { "epoch": 0.35, "learning_rate": 0.00019606637260318637, "loss": 2.9094, "step": 38500 }, { "epoch": 0.35, "learning_rate": 0.00019579465070149535, "loss": 2.9008, "step": 38600 }, { "epoch": 0.35, "learning_rate": 0.00019552292879980435, "loss": 2.8998, "step": 38700 }, { "epoch": 0.35, "learning_rate": 0.00019525120689811335, "loss": 2.9019, "step": 38800 }, { "epoch": 0.35, "learning_rate": 0.0001949794849964223, "loss": 2.8925, "step": 38900 }, { "epoch": 0.35, "learning_rate": 0.0001947077630947313, "loss": 2.9037, "step": 39000 }, { "epoch": 0.35, "eval_accuracy": 0.44703039321543825, "eval_loss": 2.9147427082061768, "eval_runtime": 43.7223, "eval_samples_per_second": 148.277, "eval_steps_per_second": 2.493, "step": 39000 }, { "epoch": 0.35, "learning_rate": 0.00019443604119304028, "loss": 2.9052, "step": 39100 }, { "epoch": 0.35, "learning_rate": 0.00019416431929134928, "loss": 2.9038, "step": 39200 }, { "epoch": 0.36, "learning_rate": 0.00019389259738965823, "loss": 2.9046, "step": 39300 }, { "epoch": 0.36, "learning_rate": 0.00019362087548796723, "loss": 2.903, "step": 39400 }, { "epoch": 0.36, "learning_rate": 0.00019334915358627623, "loss": 2.8919, "step": 39500 }, { "epoch": 0.36, "learning_rate": 0.00019307743168458518, "loss": 2.8936, "step": 39600 }, { "epoch": 0.36, "learning_rate": 0.00019280570978289418, "loss": 2.8985, "step": 39700 }, { "epoch": 0.36, "learning_rate": 0.00019253398788120316, "loss": 2.8955, "step": 39800 }, { "epoch": 0.36, "learning_rate": 0.00019226226597951216, "loss": 2.8943, "step": 39900 }, { "epoch": 0.36, "learning_rate": 0.00019199326129683804, "loss": 2.8893, "step": 40000 }, { "epoch": 0.36, "eval_accuracy": 0.44681383890900184, "eval_loss": 2.9129724502563477, "eval_runtime": 42.9613, "eval_samples_per_second": 150.903, "eval_steps_per_second": 2.537, "step": 40000 }, { "epoch": 0.36, "learning_rate": 0.00019172153939514704, "loss": 2.8923, "step": 40100 }, { "epoch": 0.36, "learning_rate": 0.00019144981749345602, "loss": 2.8998, "step": 40200 }, { "epoch": 0.36, "learning_rate": 0.000191178095591765, "loss": 2.8931, "step": 40300 }, { "epoch": 0.37, "learning_rate": 0.00019090637369007397, "loss": 2.8965, "step": 40400 }, { "epoch": 0.37, "learning_rate": 0.00019063465178838297, "loss": 2.8992, "step": 40500 }, { "epoch": 0.37, "learning_rate": 0.00019036292988669197, "loss": 2.8974, "step": 40600 }, { "epoch": 0.37, "learning_rate": 0.00019009120798500092, "loss": 2.8929, "step": 40700 }, { "epoch": 0.37, "learning_rate": 0.00018981948608330992, "loss": 2.8919, "step": 40800 }, { "epoch": 0.37, "learning_rate": 0.0001895477641816189, "loss": 2.8907, "step": 40900 }, { "epoch": 0.37, "learning_rate": 0.0001892760422799279, "loss": 2.8891, "step": 41000 }, { "epoch": 0.37, "eval_accuracy": 0.4481204235511882, "eval_loss": 2.9055044651031494, "eval_runtime": 43.4382, "eval_samples_per_second": 149.246, "eval_steps_per_second": 2.509, "step": 41000 }, { "epoch": 0.37, "learning_rate": 0.00018900432037823687, "loss": 2.8892, "step": 41100 }, { "epoch": 0.37, "learning_rate": 0.00018873259847654585, "loss": 2.8979, "step": 41200 }, { "epoch": 0.37, "learning_rate": 0.00018846087657485485, "loss": 2.8864, "step": 41300 }, { "epoch": 0.37, "learning_rate": 0.0001881891546731638, "loss": 2.8905, "step": 41400 }, { "epoch": 0.38, "learning_rate": 0.0001879174327714728, "loss": 2.8849, "step": 41500 }, { "epoch": 0.38, "learning_rate": 0.0001876457108697818, "loss": 2.8959, "step": 41600 }, { "epoch": 0.38, "learning_rate": 0.00018737398896809078, "loss": 2.8923, "step": 41700 }, { "epoch": 0.38, "learning_rate": 0.00018710226706639978, "loss": 2.8878, "step": 41800 }, { "epoch": 0.38, "learning_rate": 0.00018683326238372566, "loss": 2.8848, "step": 41900 }, { "epoch": 0.38, "learning_rate": 0.00018656154048203463, "loss": 2.8851, "step": 42000 }, { "epoch": 0.38, "eval_accuracy": 0.4484996960376006, "eval_loss": 2.90169358253479, "eval_runtime": 44.5924, "eval_samples_per_second": 145.384, "eval_steps_per_second": 2.444, "step": 42000 }, { "epoch": 0.38, "learning_rate": 0.0001862898185803436, "loss": 2.8892, "step": 42100 }, { "epoch": 0.38, "learning_rate": 0.0001860180966786526, "loss": 2.8835, "step": 42200 }, { "epoch": 0.38, "learning_rate": 0.0001857463747769616, "loss": 2.8868, "step": 42300 }, { "epoch": 0.38, "learning_rate": 0.0001854746528752706, "loss": 2.89, "step": 42400 }, { "epoch": 0.38, "learning_rate": 0.00018520293097357954, "loss": 2.8903, "step": 42500 }, { "epoch": 0.39, "learning_rate": 0.00018493120907188854, "loss": 2.8868, "step": 42600 }, { "epoch": 0.39, "learning_rate": 0.00018466220438921442, "loss": 2.8882, "step": 42700 }, { "epoch": 0.39, "learning_rate": 0.00018439048248752342, "loss": 2.8788, "step": 42800 }, { "epoch": 0.39, "learning_rate": 0.0001841187605858324, "loss": 2.8884, "step": 42900 }, { "epoch": 0.39, "learning_rate": 0.0001838470386841414, "loss": 2.8909, "step": 43000 }, { "epoch": 0.39, "eval_accuracy": 0.44834423666119233, "eval_loss": 2.9010777473449707, "eval_runtime": 43.3319, "eval_samples_per_second": 149.613, "eval_steps_per_second": 2.515, "step": 43000 }, { "epoch": 0.39, "learning_rate": 0.0001835753167824504, "loss": 2.8868, "step": 43100 }, { "epoch": 0.39, "learning_rate": 0.00018330359488075935, "loss": 2.8935, "step": 43200 }, { "epoch": 0.39, "learning_rate": 0.00018303187297906835, "loss": 2.883, "step": 43300 }, { "epoch": 0.39, "learning_rate": 0.00018276015107737733, "loss": 2.8895, "step": 43400 }, { "epoch": 0.39, "learning_rate": 0.0001824911463947032, "loss": 2.8958, "step": 43500 }, { "epoch": 0.39, "learning_rate": 0.0001822194244930122, "loss": 2.8916, "step": 43600 }, { "epoch": 0.4, "learning_rate": 0.0001819477025913212, "loss": 2.8949, "step": 43700 }, { "epoch": 0.4, "learning_rate": 0.00018167869790864708, "loss": 2.8898, "step": 43800 }, { "epoch": 0.4, "learning_rate": 0.00018140697600695606, "loss": 2.8887, "step": 43900 }, { "epoch": 0.4, "learning_rate": 0.00018113525410526506, "loss": 2.896, "step": 44000 }, { "epoch": 0.4, "eval_accuracy": 0.4478663654263186, "eval_loss": 2.9061102867126465, "eval_runtime": 43.1173, "eval_samples_per_second": 150.357, "eval_steps_per_second": 2.528, "step": 44000 }, { "epoch": 0.4, "learning_rate": 0.00018086353220357404, "loss": 2.8965, "step": 44100 }, { "epoch": 0.4, "learning_rate": 0.000180591810301883, "loss": 2.8969, "step": 44200 }, { "epoch": 0.4, "learning_rate": 0.00018032008840019201, "loss": 2.8913, "step": 44300 }, { "epoch": 0.4, "learning_rate": 0.00018004836649850096, "loss": 2.8897, "step": 44400 }, { "epoch": 0.4, "learning_rate": 0.00017977664459680996, "loss": 2.8952, "step": 44500 }, { "epoch": 0.4, "learning_rate": 0.00017950492269511897, "loss": 2.9008, "step": 44600 }, { "epoch": 0.4, "learning_rate": 0.00017923320079342794, "loss": 2.8884, "step": 44700 }, { "epoch": 0.41, "learning_rate": 0.00017896147889173694, "loss": 2.8971, "step": 44800 }, { "epoch": 0.41, "learning_rate": 0.0001786897569900459, "loss": 2.8824, "step": 44900 }, { "epoch": 0.41, "learning_rate": 0.0001784180350883549, "loss": 2.8918, "step": 45000 }, { "epoch": 0.41, "eval_accuracy": 0.44788874673731904, "eval_loss": 2.90425443649292, "eval_runtime": 45.928, "eval_samples_per_second": 141.156, "eval_steps_per_second": 2.373, "step": 45000 }, { "epoch": 0.41, "learning_rate": 0.00017814631318666387, "loss": 2.886, "step": 45100 }, { "epoch": 0.41, "learning_rate": 0.00017787459128497285, "loss": 2.8935, "step": 45200 }, { "epoch": 0.41, "learning_rate": 0.00017760286938328185, "loss": 2.8851, "step": 45300 }, { "epoch": 0.41, "learning_rate": 0.00017733114748159082, "loss": 2.8869, "step": 45400 }, { "epoch": 0.41, "learning_rate": 0.00017705942557989983, "loss": 2.8816, "step": 45500 }, { "epoch": 0.41, "learning_rate": 0.00017678770367820877, "loss": 2.8726, "step": 45600 }, { "epoch": 0.41, "learning_rate": 0.00017651598177651778, "loss": 2.8815, "step": 45700 }, { "epoch": 0.41, "learning_rate": 0.00017624425987482678, "loss": 2.8835, "step": 45800 }, { "epoch": 0.41, "learning_rate": 0.00017597253797313575, "loss": 2.8814, "step": 45900 }, { "epoch": 0.42, "learning_rate": 0.00017570081607144473, "loss": 2.8847, "step": 46000 }, { "epoch": 0.42, "eval_accuracy": 0.4490059975864478, "eval_loss": 2.89544415473938, "eval_runtime": 42.9804, "eval_samples_per_second": 150.836, "eval_steps_per_second": 2.536, "step": 46000 }, { "epoch": 0.42, "learning_rate": 0.0001754290941697537, "loss": 2.8699, "step": 46100 }, { "epoch": 0.42, "learning_rate": 0.0001751573722680627, "loss": 2.8829, "step": 46200 }, { "epoch": 0.42, "learning_rate": 0.00017488565036637165, "loss": 2.8773, "step": 46300 }, { "epoch": 0.42, "learning_rate": 0.00017461392846468066, "loss": 2.8812, "step": 46400 }, { "epoch": 0.42, "learning_rate": 0.00017434220656298966, "loss": 2.8805, "step": 46500 }, { "epoch": 0.42, "learning_rate": 0.00017407048466129863, "loss": 2.8812, "step": 46600 }, { "epoch": 0.42, "learning_rate": 0.00017379876275960764, "loss": 2.8826, "step": 46700 }, { "epoch": 0.42, "learning_rate": 0.00017352704085791659, "loss": 2.8801, "step": 46800 }, { "epoch": 0.42, "learning_rate": 0.00017325803617524252, "loss": 2.8787, "step": 46900 }, { "epoch": 0.42, "learning_rate": 0.00017298631427355146, "loss": 2.8749, "step": 47000 }, { "epoch": 0.42, "eval_accuracy": 0.44940160238088755, "eval_loss": 2.8912456035614014, "eval_runtime": 43.8328, "eval_samples_per_second": 147.903, "eval_steps_per_second": 2.487, "step": 47000 }, { "epoch": 0.43, "learning_rate": 0.00017271730959087737, "loss": 2.8715, "step": 47100 }, { "epoch": 0.43, "learning_rate": 0.00017244558768918637, "loss": 2.8804, "step": 47200 }, { "epoch": 0.43, "learning_rate": 0.00017217386578749535, "loss": 2.8802, "step": 47300 }, { "epoch": 0.43, "learning_rate": 0.00017190214388580432, "loss": 2.8779, "step": 47400 }, { "epoch": 0.43, "learning_rate": 0.00017163042198411332, "loss": 2.878, "step": 47500 }, { "epoch": 0.43, "learning_rate": 0.00017135870008242227, "loss": 2.8835, "step": 47600 }, { "epoch": 0.43, "learning_rate": 0.00017108697818073127, "loss": 2.8758, "step": 47700 }, { "epoch": 0.43, "learning_rate": 0.00017081525627904025, "loss": 2.8751, "step": 47800 }, { "epoch": 0.43, "learning_rate": 0.00017054353437734925, "loss": 2.8737, "step": 47900 }, { "epoch": 0.43, "learning_rate": 0.00017027181247565825, "loss": 2.8832, "step": 48000 }, { "epoch": 0.43, "eval_accuracy": 0.4496018243792967, "eval_loss": 2.891221761703491, "eval_runtime": 43.1479, "eval_samples_per_second": 150.251, "eval_steps_per_second": 2.526, "step": 48000 }, { "epoch": 0.43, "learning_rate": 0.0001700000905739672, "loss": 2.8757, "step": 48100 }, { "epoch": 0.44, "learning_rate": 0.0001697283686722762, "loss": 2.8725, "step": 48200 }, { "epoch": 0.44, "learning_rate": 0.00016945664677058518, "loss": 2.8749, "step": 48300 }, { "epoch": 0.44, "learning_rate": 0.00016918492486889416, "loss": 2.8747, "step": 48400 }, { "epoch": 0.44, "learning_rate": 0.00016891320296720316, "loss": 2.8724, "step": 48500 }, { "epoch": 0.44, "learning_rate": 0.00016864148106551213, "loss": 2.8717, "step": 48600 }, { "epoch": 0.44, "learning_rate": 0.00016836975916382114, "loss": 2.8653, "step": 48700 }, { "epoch": 0.44, "learning_rate": 0.00016809803726213008, "loss": 2.869, "step": 48800 }, { "epoch": 0.44, "learning_rate": 0.00016782631536043909, "loss": 2.8763, "step": 48900 }, { "epoch": 0.44, "learning_rate": 0.0001675545934587481, "loss": 2.8745, "step": 49000 }, { "epoch": 0.44, "eval_accuracy": 0.45002646438800725, "eval_loss": 2.8852970600128174, "eval_runtime": 43.6365, "eval_samples_per_second": 148.568, "eval_steps_per_second": 2.498, "step": 49000 }, { "epoch": 0.44, "learning_rate": 0.00016728287155705706, "loss": 2.8753, "step": 49100 }, { "epoch": 0.44, "learning_rate": 0.00016701114965536604, "loss": 2.8684, "step": 49200 }, { "epoch": 0.45, "learning_rate": 0.00016673942775367501, "loss": 2.8711, "step": 49300 }, { "epoch": 0.45, "learning_rate": 0.00016646770585198402, "loss": 2.8646, "step": 49400 }, { "epoch": 0.45, "learning_rate": 0.00016619598395029296, "loss": 2.865, "step": 49500 }, { "epoch": 0.45, "learning_rate": 0.0001659269792676189, "loss": 2.8773, "step": 49600 }, { "epoch": 0.45, "learning_rate": 0.00016565525736592787, "loss": 2.8703, "step": 49700 }, { "epoch": 0.45, "learning_rate": 0.00016538353546423687, "loss": 2.8722, "step": 49800 }, { "epoch": 0.45, "learning_rate": 0.00016511181356254582, "loss": 2.8713, "step": 49900 }, { "epoch": 0.45, "learning_rate": 0.00016484009166085482, "loss": 2.8717, "step": 50000 }, { "epoch": 0.45, "eval_accuracy": 0.45021942758284866, "eval_loss": 2.8834283351898193, "eval_runtime": 43.5477, "eval_samples_per_second": 148.871, "eval_steps_per_second": 2.503, "step": 50000 }, { "epoch": 0.45, "learning_rate": 0.00016456836975916383, "loss": 2.8727, "step": 50100 }, { "epoch": 0.45, "learning_rate": 0.00016429664785747277, "loss": 2.8622, "step": 50200 }, { "epoch": 0.45, "learning_rate": 0.00016402492595578178, "loss": 2.8707, "step": 50300 }, { "epoch": 0.46, "learning_rate": 0.00016375320405409075, "loss": 2.8645, "step": 50400 }, { "epoch": 0.46, "learning_rate": 0.00016348148215239975, "loss": 2.8642, "step": 50500 }, { "epoch": 0.46, "learning_rate": 0.00016321247746972563, "loss": 2.8679, "step": 50600 }, { "epoch": 0.46, "learning_rate": 0.00016294075556803463, "loss": 2.871, "step": 50700 }, { "epoch": 0.46, "learning_rate": 0.0001626690336663436, "loss": 2.867, "step": 50800 }, { "epoch": 0.46, "learning_rate": 0.00016239731176465258, "loss": 2.8643, "step": 50900 }, { "epoch": 0.46, "learning_rate": 0.00016212558986296156, "loss": 2.8659, "step": 51000 }, { "epoch": 0.46, "eval_accuracy": 0.45029624992060685, "eval_loss": 2.883072853088379, "eval_runtime": 43.5545, "eval_samples_per_second": 148.848, "eval_steps_per_second": 2.503, "step": 51000 }, { "epoch": 0.46, "learning_rate": 0.00016185386796127056, "loss": 2.8694, "step": 51100 }, { "epoch": 0.46, "learning_rate": 0.00016158214605957956, "loss": 2.8671, "step": 51200 }, { "epoch": 0.46, "learning_rate": 0.0001613104241578885, "loss": 2.8624, "step": 51300 }, { "epoch": 0.46, "learning_rate": 0.00016103870225619751, "loss": 2.8665, "step": 51400 }, { "epoch": 0.47, "learning_rate": 0.0001607669803545065, "loss": 2.8613, "step": 51500 }, { "epoch": 0.47, "learning_rate": 0.00016049525845281547, "loss": 2.8637, "step": 51600 }, { "epoch": 0.47, "learning_rate": 0.00016022353655112447, "loss": 2.8662, "step": 51700 }, { "epoch": 0.47, "learning_rate": 0.00015995181464943344, "loss": 2.8652, "step": 51800 }, { "epoch": 0.47, "learning_rate": 0.00015968009274774245, "loss": 2.8673, "step": 51900 }, { "epoch": 0.47, "learning_rate": 0.0001594083708460514, "loss": 2.865, "step": 52000 }, { "epoch": 0.47, "eval_accuracy": 0.450486793514259, "eval_loss": 2.878352403640747, "eval_runtime": 43.3417, "eval_samples_per_second": 149.579, "eval_steps_per_second": 2.515, "step": 52000 }, { "epoch": 0.47, "learning_rate": 0.0001591366489443604, "loss": 2.8688, "step": 52100 }, { "epoch": 0.47, "learning_rate": 0.00015886492704266937, "loss": 2.862, "step": 52200 }, { "epoch": 0.47, "learning_rate": 0.00015859320514097837, "loss": 2.8646, "step": 52300 }, { "epoch": 0.47, "learning_rate": 0.00015832148323928735, "loss": 2.8672, "step": 52400 }, { "epoch": 0.47, "learning_rate": 0.00015804976133759632, "loss": 2.8594, "step": 52500 }, { "epoch": 0.48, "learning_rate": 0.00015777803943590533, "loss": 2.8558, "step": 52600 }, { "epoch": 0.48, "learning_rate": 0.0001575063175342143, "loss": 2.8576, "step": 52700 }, { "epoch": 0.48, "learning_rate": 0.0001572373128515402, "loss": 2.8597, "step": 52800 }, { "epoch": 0.48, "learning_rate": 0.00015696559094984918, "loss": 2.8615, "step": 52900 }, { "epoch": 0.48, "learning_rate": 0.00015669386904815818, "loss": 2.8575, "step": 53000 }, { "epoch": 0.48, "eval_accuracy": 0.45082372297985984, "eval_loss": 2.8763039112091064, "eval_runtime": 43.6525, "eval_samples_per_second": 148.514, "eval_steps_per_second": 2.497, "step": 53000 }, { "epoch": 0.48, "learning_rate": 0.00015642214714646713, "loss": 2.8673, "step": 53100 }, { "epoch": 0.48, "learning_rate": 0.00015615042524477613, "loss": 2.854, "step": 53200 }, { "epoch": 0.48, "learning_rate": 0.00015587870334308514, "loss": 2.8652, "step": 53300 }, { "epoch": 0.48, "learning_rate": 0.000155609698660411, "loss": 2.8596, "step": 53400 }, { "epoch": 0.48, "learning_rate": 0.00015533797675872, "loss": 2.8641, "step": 53500 }, { "epoch": 0.48, "learning_rate": 0.000155066254857029, "loss": 2.8595, "step": 53600 }, { "epoch": 0.49, "learning_rate": 0.000154794532955338, "loss": 2.8562, "step": 53700 }, { "epoch": 0.49, "learning_rate": 0.00015452281105364694, "loss": 2.8529, "step": 53800 }, { "epoch": 0.49, "learning_rate": 0.00015425108915195594, "loss": 2.8629, "step": 53900 }, { "epoch": 0.49, "learning_rate": 0.00015397936725026492, "loss": 2.8571, "step": 54000 }, { "epoch": 0.49, "eval_accuracy": 0.4512689295986789, "eval_loss": 2.874122142791748, "eval_runtime": 43.0942, "eval_samples_per_second": 150.438, "eval_steps_per_second": 2.529, "step": 54000 }, { "epoch": 0.49, "learning_rate": 0.0001537076453485739, "loss": 2.8605, "step": 54100 }, { "epoch": 0.49, "learning_rate": 0.00015343592344688287, "loss": 2.8668, "step": 54200 }, { "epoch": 0.49, "learning_rate": 0.00015316420154519187, "loss": 2.8604, "step": 54300 }, { "epoch": 0.49, "learning_rate": 0.00015289247964350087, "loss": 2.857, "step": 54400 }, { "epoch": 0.49, "learning_rate": 0.00015262075774180982, "loss": 2.8599, "step": 54500 }, { "epoch": 0.49, "learning_rate": 0.00015234903584011882, "loss": 2.8653, "step": 54600 }, { "epoch": 0.49, "learning_rate": 0.0001520773139384278, "loss": 2.857, "step": 54700 }, { "epoch": 0.5, "learning_rate": 0.0001518055920367368, "loss": 2.8543, "step": 54800 }, { "epoch": 0.5, "learning_rate": 0.00015153658735406268, "loss": 2.8495, "step": 54900 }, { "epoch": 0.5, "learning_rate": 0.00015126486545237168, "loss": 2.8554, "step": 55000 }, { "epoch": 0.5, "eval_accuracy": 0.4514479800866822, "eval_loss": 2.870398998260498, "eval_runtime": 43.838, "eval_samples_per_second": 147.885, "eval_steps_per_second": 2.486, "step": 55000 }, { "epoch": 0.5, "learning_rate": 0.00015099314355068063, "loss": 2.8595, "step": 55100 }, { "epoch": 0.5, "learning_rate": 0.00015072142164898963, "loss": 2.855, "step": 55200 }, { "epoch": 0.5, "learning_rate": 0.0001504496997472986, "loss": 2.8663, "step": 55300 }, { "epoch": 0.5, "learning_rate": 0.0001501779778456076, "loss": 2.8555, "step": 55400 }, { "epoch": 0.5, "learning_rate": 0.00014990625594391658, "loss": 2.8596, "step": 55500 }, { "epoch": 0.5, "learning_rate": 0.00014963453404222556, "loss": 2.8589, "step": 55600 }, { "epoch": 0.5, "learning_rate": 0.00014936281214053456, "loss": 2.8568, "step": 55700 }, { "epoch": 0.5, "learning_rate": 0.00014909109023884354, "loss": 2.8474, "step": 55800 }, { "epoch": 0.51, "learning_rate": 0.0001488193683371525, "loss": 2.8515, "step": 55900 }, { "epoch": 0.51, "learning_rate": 0.00014854764643546151, "loss": 2.8526, "step": 56000 }, { "epoch": 0.51, "eval_accuracy": 0.45189379160579857, "eval_loss": 2.86692214012146, "eval_runtime": 43.3506, "eval_samples_per_second": 149.548, "eval_steps_per_second": 2.514, "step": 56000 }, { "epoch": 0.51, "learning_rate": 0.0001482759245337705, "loss": 2.8504, "step": 56100 }, { "epoch": 0.51, "learning_rate": 0.0001480042026320795, "loss": 2.854, "step": 56200 }, { "epoch": 0.51, "learning_rate": 0.00014773248073038847, "loss": 2.8512, "step": 56300 }, { "epoch": 0.51, "learning_rate": 0.00014746075882869744, "loss": 2.8515, "step": 56400 }, { "epoch": 0.51, "learning_rate": 0.00014718903692700642, "loss": 2.8492, "step": 56500 }, { "epoch": 0.51, "learning_rate": 0.00014691731502531542, "loss": 2.8491, "step": 56600 }, { "epoch": 0.51, "learning_rate": 0.0001466455931236244, "loss": 2.8466, "step": 56700 }, { "epoch": 0.51, "learning_rate": 0.0001463738712219334, "loss": 2.8508, "step": 56800 }, { "epoch": 0.51, "learning_rate": 0.00014610214932024237, "loss": 2.8567, "step": 56900 }, { "epoch": 0.52, "learning_rate": 0.00014583042741855135, "loss": 2.8521, "step": 57000 }, { "epoch": 0.52, "eval_accuracy": 0.45249203799983667, "eval_loss": 2.861818552017212, "eval_runtime": 43.168, "eval_samples_per_second": 150.181, "eval_steps_per_second": 2.525, "step": 57000 }, { "epoch": 0.52, "learning_rate": 0.00014555870551686032, "loss": 2.8463, "step": 57100 }, { "epoch": 0.52, "learning_rate": 0.0001452869836151693, "loss": 2.8433, "step": 57200 }, { "epoch": 0.52, "learning_rate": 0.0001450152617134783, "loss": 2.8446, "step": 57300 }, { "epoch": 0.52, "learning_rate": 0.00014474353981178728, "loss": 2.8477, "step": 57400 }, { "epoch": 0.52, "learning_rate": 0.00014447181791009628, "loss": 2.8439, "step": 57500 }, { "epoch": 0.52, "learning_rate": 0.00014420009600840525, "loss": 2.8459, "step": 57600 }, { "epoch": 0.52, "learning_rate": 0.00014392837410671423, "loss": 2.8445, "step": 57700 }, { "epoch": 0.52, "learning_rate": 0.0001436566522050232, "loss": 2.8455, "step": 57800 }, { "epoch": 0.52, "learning_rate": 0.0001433876475223491, "loss": 2.8474, "step": 57900 }, { "epoch": 0.52, "learning_rate": 0.000143118642839675, "loss": 2.8398, "step": 58000 }, { "epoch": 0.52, "eval_accuracy": 0.45218656334969587, "eval_loss": 2.8599517345428467, "eval_runtime": 43.8444, "eval_samples_per_second": 147.864, "eval_steps_per_second": 2.486, "step": 58000 }, { "epoch": 0.53, "learning_rate": 0.000142846920937984, "loss": 2.8492, "step": 58100 }, { "epoch": 0.53, "learning_rate": 0.00014257519903629296, "loss": 2.8434, "step": 58200 }, { "epoch": 0.53, "learning_rate": 0.00014230347713460197, "loss": 2.8483, "step": 58300 }, { "epoch": 0.53, "learning_rate": 0.00014203175523291094, "loss": 2.8441, "step": 58400 }, { "epoch": 0.53, "learning_rate": 0.00014176003333121992, "loss": 2.8474, "step": 58500 }, { "epoch": 0.53, "learning_rate": 0.00014148831142952892, "loss": 2.8385, "step": 58600 }, { "epoch": 0.53, "learning_rate": 0.0001412165895278379, "loss": 2.8424, "step": 58700 }, { "epoch": 0.53, "learning_rate": 0.00014094486762614687, "loss": 2.847, "step": 58800 }, { "epoch": 0.53, "learning_rate": 0.00014067314572445587, "loss": 2.8511, "step": 58900 }, { "epoch": 0.53, "learning_rate": 0.00014040142382276485, "loss": 2.8398, "step": 59000 }, { "epoch": 0.53, "eval_accuracy": 0.45275395982857125, "eval_loss": 2.8576090335845947, "eval_runtime": 43.2028, "eval_samples_per_second": 150.06, "eval_steps_per_second": 2.523, "step": 59000 }, { "epoch": 0.53, "learning_rate": 0.00014012970192107382, "loss": 2.8386, "step": 59100 }, { "epoch": 0.54, "learning_rate": 0.00013985798001938282, "loss": 2.8458, "step": 59200 }, { "epoch": 0.54, "learning_rate": 0.0001395862581176918, "loss": 2.8356, "step": 59300 }, { "epoch": 0.54, "learning_rate": 0.00013931453621600078, "loss": 2.8379, "step": 59400 }, { "epoch": 0.54, "learning_rate": 0.00013904281431430978, "loss": 2.8325, "step": 59500 }, { "epoch": 0.54, "learning_rate": 0.00013877109241261875, "loss": 2.8461, "step": 59600 }, { "epoch": 0.54, "learning_rate": 0.00013849937051092773, "loss": 2.8521, "step": 59700 }, { "epoch": 0.54, "learning_rate": 0.00013823036582825363, "loss": 2.8273, "step": 59800 }, { "epoch": 0.54, "learning_rate": 0.00013795864392656263, "loss": 2.8318, "step": 59900 }, { "epoch": 0.54, "learning_rate": 0.0001376869220248716, "loss": 2.837, "step": 60000 }, { "epoch": 0.54, "eval_accuracy": 0.4528289674654375, "eval_loss": 2.8535568714141846, "eval_runtime": 43.1874, "eval_samples_per_second": 150.113, "eval_steps_per_second": 2.524, "step": 60000 }, { "epoch": 0.54, "learning_rate": 0.00013741520012318058, "loss": 2.8396, "step": 60100 }, { "epoch": 0.54, "learning_rate": 0.00013714347822148956, "loss": 2.8395, "step": 60200 }, { "epoch": 0.55, "learning_rate": 0.00013687447353881546, "loss": 2.8325, "step": 60300 }, { "epoch": 0.55, "learning_rate": 0.00013660275163712444, "loss": 2.8412, "step": 60400 }, { "epoch": 0.55, "learning_rate": 0.00013633102973543344, "loss": 2.8392, "step": 60500 }, { "epoch": 0.55, "learning_rate": 0.00013605930783374242, "loss": 2.843, "step": 60600 }, { "epoch": 0.55, "learning_rate": 0.0001357875859320514, "loss": 2.8337, "step": 60700 }, { "epoch": 0.55, "learning_rate": 0.00013551586403036037, "loss": 2.8452, "step": 60800 }, { "epoch": 0.55, "learning_rate": 0.00013524414212866937, "loss": 2.8448, "step": 60900 }, { "epoch": 0.55, "learning_rate": 0.00013497242022697835, "loss": 2.837, "step": 61000 }, { "epoch": 0.55, "eval_accuracy": 0.4534701617805845, "eval_loss": 2.851900577545166, "eval_runtime": 43.1282, "eval_samples_per_second": 150.319, "eval_steps_per_second": 2.527, "step": 61000 }, { "epoch": 0.55, "learning_rate": 0.00013470069832528735, "loss": 2.8331, "step": 61100 }, { "epoch": 0.55, "learning_rate": 0.00013442897642359632, "loss": 2.832, "step": 61200 }, { "epoch": 0.55, "learning_rate": 0.0001341572545219053, "loss": 2.8255, "step": 61300 }, { "epoch": 0.56, "learning_rate": 0.00013388553262021427, "loss": 2.8327, "step": 61400 }, { "epoch": 0.56, "learning_rate": 0.00013361381071852328, "loss": 2.8386, "step": 61500 }, { "epoch": 0.56, "learning_rate": 0.00013334208881683225, "loss": 2.8315, "step": 61600 }, { "epoch": 0.56, "learning_rate": 0.00013307036691514125, "loss": 2.824, "step": 61700 }, { "epoch": 0.56, "learning_rate": 0.00013279864501345023, "loss": 2.8296, "step": 61800 }, { "epoch": 0.56, "learning_rate": 0.0001325269231117592, "loss": 2.8378, "step": 61900 }, { "epoch": 0.56, "learning_rate": 0.0001322579184290851, "loss": 2.8427, "step": 62000 }, { "epoch": 0.56, "eval_accuracy": 0.4535663409278566, "eval_loss": 2.8492891788482666, "eval_runtime": 43.4858, "eval_samples_per_second": 149.083, "eval_steps_per_second": 2.507, "step": 62000 }, { "epoch": 0.56, "learning_rate": 0.00013198619652739408, "loss": 2.8329, "step": 62100 }, { "epoch": 0.56, "learning_rate": 0.00013171447462570306, "loss": 2.8389, "step": 62200 }, { "epoch": 0.56, "learning_rate": 0.00013144275272401206, "loss": 2.8358, "step": 62300 }, { "epoch": 0.56, "learning_rate": 0.00013117103082232104, "loss": 2.8369, "step": 62400 }, { "epoch": 0.57, "learning_rate": 0.00013089930892063, "loss": 2.8294, "step": 62500 }, { "epoch": 0.57, "learning_rate": 0.000130627587018939, "loss": 2.834, "step": 62600 }, { "epoch": 0.57, "learning_rate": 0.000130355865117248, "loss": 2.8414, "step": 62700 }, { "epoch": 0.57, "learning_rate": 0.00013008414321555696, "loss": 2.8384, "step": 62800 }, { "epoch": 0.57, "learning_rate": 0.00012981242131386597, "loss": 2.8384, "step": 62900 }, { "epoch": 0.57, "learning_rate": 0.00012954069941217494, "loss": 2.8365, "step": 63000 }, { "epoch": 0.57, "eval_accuracy": 0.45409986299008265, "eval_loss": 2.8467965126037598, "eval_runtime": 47.1796, "eval_samples_per_second": 137.411, "eval_steps_per_second": 2.31, "step": 63000 }, { "epoch": 0.57, "learning_rate": 0.00012926897751048392, "loss": 2.8281, "step": 63100 }, { "epoch": 0.57, "learning_rate": 0.00012899725560879292, "loss": 2.8197, "step": 63200 }, { "epoch": 0.57, "learning_rate": 0.0001287255337071019, "loss": 2.8233, "step": 63300 }, { "epoch": 0.57, "learning_rate": 0.00012845652902442777, "loss": 2.828, "step": 63400 }, { "epoch": 0.57, "learning_rate": 0.00012818480712273677, "loss": 2.8334, "step": 63500 }, { "epoch": 0.58, "learning_rate": 0.00012791308522104578, "loss": 2.8332, "step": 63600 }, { "epoch": 0.58, "learning_rate": 0.00012764136331935475, "loss": 2.8279, "step": 63700 }, { "epoch": 0.58, "learning_rate": 0.00012736964141766373, "loss": 2.8271, "step": 63800 }, { "epoch": 0.58, "learning_rate": 0.0001270979195159727, "loss": 2.8306, "step": 63900 }, { "epoch": 0.58, "learning_rate": 0.00012682619761428168, "loss": 2.8327, "step": 64000 }, { "epoch": 0.58, "eval_accuracy": 0.4538736302788893, "eval_loss": 2.8447225093841553, "eval_runtime": 44.4204, "eval_samples_per_second": 145.946, "eval_steps_per_second": 2.454, "step": 64000 }, { "epoch": 0.58, "learning_rate": 0.00012655447571259068, "loss": 2.836, "step": 64100 }, { "epoch": 0.58, "learning_rate": 0.00012628275381089965, "loss": 2.8337, "step": 64200 }, { "epoch": 0.58, "learning_rate": 0.00012601103190920866, "loss": 2.8333, "step": 64300 }, { "epoch": 0.58, "learning_rate": 0.00012573931000751763, "loss": 2.8298, "step": 64400 }, { "epoch": 0.58, "learning_rate": 0.0001254675881058266, "loss": 2.8285, "step": 64500 }, { "epoch": 0.58, "learning_rate": 0.00012519586620413558, "loss": 2.8252, "step": 64600 }, { "epoch": 0.58, "learning_rate": 0.00012492414430244459, "loss": 2.8227, "step": 64700 }, { "epoch": 0.59, "learning_rate": 0.00012465242240075356, "loss": 2.8286, "step": 64800 }, { "epoch": 0.59, "learning_rate": 0.00012438070049906256, "loss": 2.8218, "step": 64900 }, { "epoch": 0.59, "learning_rate": 0.00012410897859737154, "loss": 2.8289, "step": 65000 }, { "epoch": 0.59, "eval_accuracy": 0.4545583774154425, "eval_loss": 2.838773012161255, "eval_runtime": 43.8892, "eval_samples_per_second": 147.713, "eval_steps_per_second": 2.484, "step": 65000 }, { "epoch": 0.59, "learning_rate": 0.0001238372566956805, "loss": 2.8198, "step": 65100 }, { "epoch": 0.59, "learning_rate": 0.0001235655347939895, "loss": 2.8207, "step": 65200 }, { "epoch": 0.59, "learning_rate": 0.00012329381289229846, "loss": 2.8296, "step": 65300 }, { "epoch": 0.59, "learning_rate": 0.00012302209099060747, "loss": 2.8293, "step": 65400 }, { "epoch": 0.59, "learning_rate": 0.00012275036908891647, "loss": 2.8188, "step": 65500 }, { "epoch": 0.59, "learning_rate": 0.00012247864718722544, "loss": 2.819, "step": 65600 }, { "epoch": 0.59, "learning_rate": 0.00012220692528553442, "loss": 2.8219, "step": 65700 }, { "epoch": 0.59, "learning_rate": 0.0001219352033838434, "loss": 2.8199, "step": 65800 }, { "epoch": 0.6, "learning_rate": 0.0001216634814821524, "loss": 2.8282, "step": 65900 }, { "epoch": 0.6, "learning_rate": 0.00012139175958046137, "loss": 2.8166, "step": 66000 }, { "epoch": 0.6, "eval_accuracy": 0.45473863770404044, "eval_loss": 2.834634780883789, "eval_runtime": 43.1108, "eval_samples_per_second": 150.38, "eval_steps_per_second": 2.528, "step": 66000 }, { "epoch": 0.6, "learning_rate": 0.00012112003767877036, "loss": 2.8226, "step": 66100 }, { "epoch": 0.6, "learning_rate": 0.00012084831577707934, "loss": 2.8135, "step": 66200 }, { "epoch": 0.6, "learning_rate": 0.00012057659387538832, "loss": 2.8134, "step": 66300 }, { "epoch": 0.6, "learning_rate": 0.0001203048719736973, "loss": 2.8214, "step": 66400 }, { "epoch": 0.6, "learning_rate": 0.0001200358672910232, "loss": 2.8142, "step": 66500 }, { "epoch": 0.6, "learning_rate": 0.00011976414538933219, "loss": 2.8196, "step": 66600 }, { "epoch": 0.6, "learning_rate": 0.00011949242348764117, "loss": 2.8145, "step": 66700 }, { "epoch": 0.6, "learning_rate": 0.00011922070158595016, "loss": 2.8093, "step": 66800 }, { "epoch": 0.6, "learning_rate": 0.00011894897968425913, "loss": 2.8168, "step": 66900 }, { "epoch": 0.61, "learning_rate": 0.00011867725778256813, "loss": 2.8171, "step": 67000 }, { "epoch": 0.61, "eval_accuracy": 0.45580810142968187, "eval_loss": 2.8293869495391846, "eval_runtime": 44.4137, "eval_samples_per_second": 145.968, "eval_steps_per_second": 2.454, "step": 67000 }, { "epoch": 0.61, "learning_rate": 0.00011840553588087711, "loss": 2.8123, "step": 67100 }, { "epoch": 0.61, "learning_rate": 0.0001181338139791861, "loss": 2.8121, "step": 67200 }, { "epoch": 0.61, "learning_rate": 0.00011786209207749507, "loss": 2.8083, "step": 67300 }, { "epoch": 0.61, "learning_rate": 0.00011759037017580405, "loss": 2.8156, "step": 67400 }, { "epoch": 0.61, "learning_rate": 0.00011731864827411304, "loss": 2.8225, "step": 67500 }, { "epoch": 0.61, "learning_rate": 0.00011704692637242204, "loss": 2.8109, "step": 67600 }, { "epoch": 0.61, "learning_rate": 0.00011677520447073102, "loss": 2.8137, "step": 67700 }, { "epoch": 0.61, "learning_rate": 0.0001165061997880569, "loss": 2.8097, "step": 67800 }, { "epoch": 0.61, "learning_rate": 0.00011623447788636588, "loss": 2.8099, "step": 67900 }, { "epoch": 0.61, "learning_rate": 0.00011596275598467488, "loss": 2.8184, "step": 68000 }, { "epoch": 0.61, "eval_accuracy": 0.4556344950443543, "eval_loss": 2.826944589614868, "eval_runtime": 43.7297, "eval_samples_per_second": 148.252, "eval_steps_per_second": 2.493, "step": 68000 }, { "epoch": 0.62, "learning_rate": 0.00011569103408298386, "loss": 2.8164, "step": 68100 }, { "epoch": 0.62, "learning_rate": 0.00011541931218129285, "loss": 2.8137, "step": 68200 }, { "epoch": 0.62, "learning_rate": 0.00011514759027960182, "loss": 2.8168, "step": 68300 }, { "epoch": 0.62, "learning_rate": 0.00011487858559692771, "loss": 2.8156, "step": 68400 }, { "epoch": 0.62, "learning_rate": 0.00011460686369523672, "loss": 2.8114, "step": 68500 }, { "epoch": 0.62, "learning_rate": 0.00011433514179354569, "loss": 2.8066, "step": 68600 }, { "epoch": 0.62, "learning_rate": 0.00011406341989185468, "loss": 2.8124, "step": 68700 }, { "epoch": 0.62, "learning_rate": 0.00011379169799016366, "loss": 2.8093, "step": 68800 }, { "epoch": 0.62, "learning_rate": 0.00011351997608847263, "loss": 2.8131, "step": 68900 }, { "epoch": 0.62, "learning_rate": 0.00011324825418678162, "loss": 2.8102, "step": 69000 }, { "epoch": 0.62, "eval_accuracy": 0.45632710588477254, "eval_loss": 2.8243494033813477, "eval_runtime": 42.7646, "eval_samples_per_second": 151.597, "eval_steps_per_second": 2.549, "step": 69000 }, { "epoch": 0.62, "learning_rate": 0.00011297653228509062, "loss": 2.8064, "step": 69100 }, { "epoch": 0.63, "learning_rate": 0.0001127048103833996, "loss": 2.8075, "step": 69200 }, { "epoch": 0.63, "learning_rate": 0.00011243308848170857, "loss": 2.8146, "step": 69300 }, { "epoch": 0.63, "learning_rate": 0.00011216136658001756, "loss": 2.8166, "step": 69400 }, { "epoch": 0.63, "learning_rate": 0.00011188964467832654, "loss": 2.8073, "step": 69500 }, { "epoch": 0.63, "learning_rate": 0.00011161792277663554, "loss": 2.8116, "step": 69600 }, { "epoch": 0.63, "learning_rate": 0.00011134620087494451, "loss": 2.807, "step": 69700 }, { "epoch": 0.63, "learning_rate": 0.0001110744789732535, "loss": 2.8066, "step": 69800 }, { "epoch": 0.63, "learning_rate": 0.00011080547429057939, "loss": 2.8101, "step": 69900 }, { "epoch": 0.63, "learning_rate": 0.00011053375238888837, "loss": 2.8153, "step": 70000 }, { "epoch": 0.63, "eval_accuracy": 0.45636279500231375, "eval_loss": 2.821134328842163, "eval_runtime": 42.931, "eval_samples_per_second": 151.01, "eval_steps_per_second": 2.539, "step": 70000 }, { "epoch": 0.63, "learning_rate": 0.00011026203048719737, "loss": 2.8109, "step": 70100 }, { "epoch": 0.63, "learning_rate": 0.00010999030858550635, "loss": 2.8025, "step": 70200 }, { "epoch": 0.64, "learning_rate": 0.00010971858668381533, "loss": 2.8055, "step": 70300 }, { "epoch": 0.64, "learning_rate": 0.00010944686478212431, "loss": 2.8047, "step": 70400 }, { "epoch": 0.64, "learning_rate": 0.00010917514288043329, "loss": 2.8095, "step": 70500 }, { "epoch": 0.64, "learning_rate": 0.00010890342097874227, "loss": 2.805, "step": 70600 }, { "epoch": 0.64, "learning_rate": 0.00010863169907705128, "loss": 2.8079, "step": 70700 }, { "epoch": 0.64, "learning_rate": 0.00010835997717536025, "loss": 2.8071, "step": 70800 }, { "epoch": 0.64, "learning_rate": 0.00010809097249268614, "loss": 2.8016, "step": 70900 }, { "epoch": 0.64, "learning_rate": 0.00010781925059099512, "loss": 2.8035, "step": 71000 }, { "epoch": 0.64, "eval_accuracy": 0.4569090199707833, "eval_loss": 2.8184897899627686, "eval_runtime": 43.5955, "eval_samples_per_second": 148.708, "eval_steps_per_second": 2.5, "step": 71000 }, { "epoch": 0.64, "learning_rate": 0.00010755024590832102, "loss": 2.8002, "step": 71100 }, { "epoch": 0.64, "learning_rate": 0.00010727852400663001, "loss": 2.8186, "step": 71200 }, { "epoch": 0.64, "learning_rate": 0.00010700680210493899, "loss": 2.8036, "step": 71300 }, { "epoch": 0.65, "learning_rate": 0.00010673508020324797, "loss": 2.8077, "step": 71400 }, { "epoch": 0.65, "learning_rate": 0.00010646335830155695, "loss": 2.8111, "step": 71500 }, { "epoch": 0.65, "learning_rate": 0.00010619163639986595, "loss": 2.8018, "step": 71600 }, { "epoch": 0.65, "learning_rate": 0.00010591991449817493, "loss": 2.8079, "step": 71700 }, { "epoch": 0.65, "learning_rate": 0.00010564819259648392, "loss": 2.8124, "step": 71800 }, { "epoch": 0.65, "learning_rate": 0.00010537647069479289, "loss": 2.807, "step": 71900 }, { "epoch": 0.65, "learning_rate": 0.00010510474879310187, "loss": 2.8042, "step": 72000 }, { "epoch": 0.65, "eval_accuracy": 0.4569186983755403, "eval_loss": 2.8206183910369873, "eval_runtime": 44.1793, "eval_samples_per_second": 146.743, "eval_steps_per_second": 2.467, "step": 72000 }, { "epoch": 0.65, "learning_rate": 0.00010483302689141086, "loss": 2.8066, "step": 72100 }, { "epoch": 0.65, "learning_rate": 0.00010456130498971986, "loss": 2.8088, "step": 72200 }, { "epoch": 0.65, "learning_rate": 0.00010428958308802883, "loss": 2.8036, "step": 72300 }, { "epoch": 0.65, "learning_rate": 0.00010401786118633781, "loss": 2.7985, "step": 72400 }, { "epoch": 0.66, "learning_rate": 0.0001037461392846468, "loss": 2.7981, "step": 72500 }, { "epoch": 0.66, "learning_rate": 0.00010347441738295577, "loss": 2.7993, "step": 72600 }, { "epoch": 0.66, "learning_rate": 0.00010320269548126476, "loss": 2.7999, "step": 72700 }, { "epoch": 0.66, "learning_rate": 0.00010293097357957375, "loss": 2.8009, "step": 72800 }, { "epoch": 0.66, "learning_rate": 0.00010265925167788274, "loss": 2.7943, "step": 72900 }, { "epoch": 0.66, "learning_rate": 0.00010238752977619171, "loss": 2.7984, "step": 73000 }, { "epoch": 0.66, "eval_accuracy": 0.457420160722009, "eval_loss": 2.8137617111206055, "eval_runtime": 43.507, "eval_samples_per_second": 149.01, "eval_steps_per_second": 2.505, "step": 73000 }, { "epoch": 0.66, "learning_rate": 0.0001021158078745007, "loss": 2.7913, "step": 73100 }, { "epoch": 0.66, "learning_rate": 0.00010184408597280968, "loss": 2.8016, "step": 73200 }, { "epoch": 0.66, "learning_rate": 0.00010157236407111868, "loss": 2.7988, "step": 73300 }, { "epoch": 0.66, "learning_rate": 0.00010130064216942766, "loss": 2.792, "step": 73400 }, { "epoch": 0.66, "learning_rate": 0.00010103163748675355, "loss": 2.7926, "step": 73500 }, { "epoch": 0.67, "learning_rate": 0.00010075991558506253, "loss": 2.7796, "step": 73600 }, { "epoch": 0.67, "learning_rate": 0.00010048819368337151, "loss": 2.7971, "step": 73700 }, { "epoch": 0.67, "learning_rate": 0.00010021647178168051, "loss": 2.7974, "step": 73800 }, { "epoch": 0.67, "learning_rate": 9.994474987998949e-05, "loss": 2.7951, "step": 73900 }, { "epoch": 0.67, "learning_rate": 9.967302797829848e-05, "loss": 2.7883, "step": 74000 }, { "epoch": 0.67, "eval_accuracy": 0.45740261861338705, "eval_loss": 2.8111917972564697, "eval_runtime": 44.0953, "eval_samples_per_second": 147.023, "eval_steps_per_second": 2.472, "step": 74000 }, { "epoch": 0.67, "learning_rate": 9.940130607660745e-05, "loss": 2.7898, "step": 74100 }, { "epoch": 0.67, "learning_rate": 9.912958417491643e-05, "loss": 2.7914, "step": 74200 }, { "epoch": 0.67, "learning_rate": 9.885786227322542e-05, "loss": 2.798, "step": 74300 }, { "epoch": 0.67, "learning_rate": 9.85861403715344e-05, "loss": 2.7938, "step": 74400 }, { "epoch": 0.67, "learning_rate": 9.83144184698434e-05, "loss": 2.7927, "step": 74500 }, { "epoch": 0.67, "learning_rate": 9.804269656815237e-05, "loss": 2.7967, "step": 74600 }, { "epoch": 0.68, "learning_rate": 9.777369188547826e-05, "loss": 2.7933, "step": 74700 }, { "epoch": 0.68, "learning_rate": 9.750196998378726e-05, "loss": 2.7913, "step": 74800 }, { "epoch": 0.68, "learning_rate": 9.723024808209624e-05, "loss": 2.7924, "step": 74900 }, { "epoch": 0.68, "learning_rate": 9.695852618040523e-05, "loss": 2.7962, "step": 75000 }, { "epoch": 0.68, "eval_accuracy": 0.4583686443881887, "eval_loss": 2.8055942058563232, "eval_runtime": 44.8912, "eval_samples_per_second": 144.416, "eval_steps_per_second": 2.428, "step": 75000 }, { "epoch": 0.68, "learning_rate": 9.66868042787142e-05, "loss": 2.7848, "step": 75100 }, { "epoch": 0.68, "learning_rate": 9.641779959604009e-05, "loss": 2.7935, "step": 75200 }, { "epoch": 0.68, "learning_rate": 9.61460776943491e-05, "loss": 2.7961, "step": 75300 }, { "epoch": 0.68, "learning_rate": 9.587435579265807e-05, "loss": 2.788, "step": 75400 }, { "epoch": 0.68, "learning_rate": 9.560263389096706e-05, "loss": 2.7934, "step": 75500 }, { "epoch": 0.68, "learning_rate": 9.533091198927603e-05, "loss": 2.7888, "step": 75600 }, { "epoch": 0.68, "learning_rate": 9.505919008758501e-05, "loss": 2.7954, "step": 75700 }, { "epoch": 0.69, "learning_rate": 9.4787468185894e-05, "loss": 2.7934, "step": 75800 }, { "epoch": 0.69, "learning_rate": 9.451574628420299e-05, "loss": 2.7867, "step": 75900 }, { "epoch": 0.69, "learning_rate": 9.424402438251197e-05, "loss": 2.7937, "step": 76000 }, { "epoch": 0.69, "eval_accuracy": 0.4582416153257539, "eval_loss": 2.8068454265594482, "eval_runtime": 44.3778, "eval_samples_per_second": 146.087, "eval_steps_per_second": 2.456, "step": 76000 }, { "epoch": 0.69, "learning_rate": 9.397230248082095e-05, "loss": 2.7933, "step": 76100 }, { "epoch": 0.69, "learning_rate": 9.370058057912994e-05, "loss": 2.7876, "step": 76200 }, { "epoch": 0.69, "learning_rate": 9.342885867743891e-05, "loss": 2.7885, "step": 76300 }, { "epoch": 0.69, "learning_rate": 9.31571367757479e-05, "loss": 2.7859, "step": 76400 }, { "epoch": 0.69, "learning_rate": 9.288541487405689e-05, "loss": 2.7867, "step": 76500 }, { "epoch": 0.69, "learning_rate": 9.261369297236588e-05, "loss": 2.7882, "step": 76600 }, { "epoch": 0.69, "learning_rate": 9.234197107067486e-05, "loss": 2.7874, "step": 76700 }, { "epoch": 0.69, "learning_rate": 9.207024916898384e-05, "loss": 2.79, "step": 76800 }, { "epoch": 0.7, "learning_rate": 9.179852726729282e-05, "loss": 2.7828, "step": 76900 }, { "epoch": 0.7, "learning_rate": 9.152680536560182e-05, "loss": 2.7853, "step": 77000 }, { "epoch": 0.7, "eval_accuracy": 0.4587721128864935, "eval_loss": 2.801090955734253, "eval_runtime": 43.1479, "eval_samples_per_second": 150.251, "eval_steps_per_second": 2.526, "step": 77000 }, { "epoch": 0.7, "learning_rate": 9.12550834639108e-05, "loss": 2.7861, "step": 77100 }, { "epoch": 0.7, "learning_rate": 9.098336156221979e-05, "loss": 2.793, "step": 77200 }, { "epoch": 0.7, "learning_rate": 9.071163966052876e-05, "loss": 2.7914, "step": 77300 }, { "epoch": 0.7, "learning_rate": 9.043991775883774e-05, "loss": 2.7774, "step": 77400 }, { "epoch": 0.7, "learning_rate": 9.016819585714673e-05, "loss": 2.7791, "step": 77500 }, { "epoch": 0.7, "learning_rate": 8.989647395545573e-05, "loss": 2.7837, "step": 77600 }, { "epoch": 0.7, "learning_rate": 8.96247520537647e-05, "loss": 2.779, "step": 77700 }, { "epoch": 0.7, "learning_rate": 8.935303015207368e-05, "loss": 2.7807, "step": 77800 }, { "epoch": 0.7, "learning_rate": 8.908130825038267e-05, "loss": 2.7832, "step": 77900 }, { "epoch": 0.71, "learning_rate": 8.880958634869164e-05, "loss": 2.7798, "step": 78000 }, { "epoch": 0.71, "eval_accuracy": 0.4596697849276993, "eval_loss": 2.795370578765869, "eval_runtime": 43.9941, "eval_samples_per_second": 147.361, "eval_steps_per_second": 2.478, "step": 78000 }, { "epoch": 0.71, "learning_rate": 8.853786444700063e-05, "loss": 2.7851, "step": 78100 }, { "epoch": 0.71, "learning_rate": 8.826885976432654e-05, "loss": 2.7819, "step": 78200 }, { "epoch": 0.71, "learning_rate": 8.799713786263551e-05, "loss": 2.7767, "step": 78300 }, { "epoch": 0.71, "learning_rate": 8.77254159609445e-05, "loss": 2.7745, "step": 78400 }, { "epoch": 0.71, "learning_rate": 8.745369405925347e-05, "loss": 2.7807, "step": 78500 }, { "epoch": 0.71, "learning_rate": 8.718197215756246e-05, "loss": 2.7828, "step": 78600 }, { "epoch": 0.71, "learning_rate": 8.691025025587145e-05, "loss": 2.7768, "step": 78700 }, { "epoch": 0.71, "learning_rate": 8.663852835418044e-05, "loss": 2.7749, "step": 78800 }, { "epoch": 0.71, "learning_rate": 8.636680645248942e-05, "loss": 2.7782, "step": 78900 }, { "epoch": 0.71, "learning_rate": 8.60950845507984e-05, "loss": 2.7851, "step": 79000 }, { "epoch": 0.71, "eval_accuracy": 0.4597998384916206, "eval_loss": 2.7913172245025635, "eval_runtime": 43.6998, "eval_samples_per_second": 148.353, "eval_steps_per_second": 2.494, "step": 79000 }, { "epoch": 0.72, "learning_rate": 8.582336264910738e-05, "loss": 2.7722, "step": 79100 }, { "epoch": 0.72, "learning_rate": 8.555435796643328e-05, "loss": 2.7695, "step": 79200 }, { "epoch": 0.72, "learning_rate": 8.528535328375917e-05, "loss": 2.7732, "step": 79300 }, { "epoch": 0.72, "learning_rate": 8.501363138206815e-05, "loss": 2.7714, "step": 79400 }, { "epoch": 0.72, "learning_rate": 8.474190948037714e-05, "loss": 2.7739, "step": 79500 }, { "epoch": 0.72, "learning_rate": 8.447018757868613e-05, "loss": 2.7733, "step": 79600 }, { "epoch": 0.72, "learning_rate": 8.419846567699512e-05, "loss": 2.773, "step": 79700 }, { "epoch": 0.72, "learning_rate": 8.392674377530409e-05, "loss": 2.7754, "step": 79800 }, { "epoch": 0.72, "learning_rate": 8.365502187361308e-05, "loss": 2.7817, "step": 79900 }, { "epoch": 0.72, "learning_rate": 8.338329997192206e-05, "loss": 2.7831, "step": 80000 }, { "epoch": 0.72, "eval_accuracy": 0.46004845251381443, "eval_loss": 2.78973126411438, "eval_runtime": 44.9439, "eval_samples_per_second": 144.247, "eval_steps_per_second": 2.425, "step": 80000 }, { "epoch": 0.72, "learning_rate": 8.311157807023106e-05, "loss": 2.7739, "step": 80100 }, { "epoch": 0.73, "learning_rate": 8.283985616854003e-05, "loss": 2.781, "step": 80200 }, { "epoch": 0.73, "learning_rate": 8.256813426684902e-05, "loss": 2.7773, "step": 80300 }, { "epoch": 0.73, "learning_rate": 8.2296412365158e-05, "loss": 2.7688, "step": 80400 }, { "epoch": 0.73, "learning_rate": 8.202469046346699e-05, "loss": 2.7765, "step": 80500 }, { "epoch": 0.73, "learning_rate": 8.175568578079289e-05, "loss": 2.7735, "step": 80600 }, { "epoch": 0.73, "learning_rate": 8.148396387910187e-05, "loss": 2.7692, "step": 80700 }, { "epoch": 0.73, "learning_rate": 8.121224197741084e-05, "loss": 2.7661, "step": 80800 }, { "epoch": 0.73, "learning_rate": 8.094052007571983e-05, "loss": 2.7714, "step": 80900 }, { "epoch": 0.73, "learning_rate": 8.06687981740288e-05, "loss": 2.7773, "step": 81000 }, { "epoch": 0.73, "eval_accuracy": 0.4603297311520629, "eval_loss": 2.786165475845337, "eval_runtime": 45.3636, "eval_samples_per_second": 142.912, "eval_steps_per_second": 2.403, "step": 81000 }, { "epoch": 0.73, "learning_rate": 8.03970762723378e-05, "loss": 2.77, "step": 81100 }, { "epoch": 0.73, "learning_rate": 8.012535437064678e-05, "loss": 2.772, "step": 81200 }, { "epoch": 0.74, "learning_rate": 7.985363246895577e-05, "loss": 2.7751, "step": 81300 }, { "epoch": 0.74, "learning_rate": 7.958191056726475e-05, "loss": 2.7705, "step": 81400 }, { "epoch": 0.74, "learning_rate": 7.931018866557374e-05, "loss": 2.7711, "step": 81500 }, { "epoch": 0.74, "learning_rate": 7.903846676388271e-05, "loss": 2.7666, "step": 81600 }, { "epoch": 0.74, "learning_rate": 7.87667448621917e-05, "loss": 2.7678, "step": 81700 }, { "epoch": 0.74, "learning_rate": 7.84977401795176e-05, "loss": 2.7707, "step": 81800 }, { "epoch": 0.74, "learning_rate": 7.822601827782658e-05, "loss": 2.7624, "step": 81900 }, { "epoch": 0.74, "learning_rate": 7.795429637613557e-05, "loss": 2.7688, "step": 82000 }, { "epoch": 0.74, "eval_accuracy": 0.4608795855223163, "eval_loss": 2.7835707664489746, "eval_runtime": 44.1206, "eval_samples_per_second": 146.938, "eval_steps_per_second": 2.47, "step": 82000 }, { "epoch": 0.74, "learning_rate": 7.768257447444454e-05, "loss": 2.7652, "step": 82100 }, { "epoch": 0.74, "learning_rate": 7.741085257275354e-05, "loss": 2.763, "step": 82200 }, { "epoch": 0.74, "learning_rate": 7.713913067106252e-05, "loss": 2.7718, "step": 82300 }, { "epoch": 0.74, "learning_rate": 7.686740876937151e-05, "loss": 2.774, "step": 82400 }, { "epoch": 0.75, "learning_rate": 7.659568686768048e-05, "loss": 2.7624, "step": 82500 }, { "epoch": 0.75, "learning_rate": 7.632396496598946e-05, "loss": 2.7672, "step": 82600 }, { "epoch": 0.75, "learning_rate": 7.605224306429845e-05, "loss": 2.7646, "step": 82700 }, { "epoch": 0.75, "learning_rate": 7.578052116260744e-05, "loss": 2.7643, "step": 82800 }, { "epoch": 0.75, "learning_rate": 7.550879926091643e-05, "loss": 2.7636, "step": 82900 }, { "epoch": 0.75, "learning_rate": 7.523979457824232e-05, "loss": 2.7658, "step": 83000 }, { "epoch": 0.75, "eval_accuracy": 0.4610453282037788, "eval_loss": 2.7798171043395996, "eval_runtime": 44.7143, "eval_samples_per_second": 144.987, "eval_steps_per_second": 2.438, "step": 83000 }, { "epoch": 0.75, "learning_rate": 7.49680726765513e-05, "loss": 2.7694, "step": 83100 }, { "epoch": 0.75, "learning_rate": 7.469635077486028e-05, "loss": 2.7662, "step": 83200 }, { "epoch": 0.75, "learning_rate": 7.442734609218618e-05, "loss": 2.7624, "step": 83300 }, { "epoch": 0.75, "learning_rate": 7.415562419049516e-05, "loss": 2.7632, "step": 83400 }, { "epoch": 0.75, "learning_rate": 7.388390228880415e-05, "loss": 2.7697, "step": 83500 }, { "epoch": 0.76, "learning_rate": 7.361218038711314e-05, "loss": 2.7663, "step": 83600 }, { "epoch": 0.76, "learning_rate": 7.334045848542211e-05, "loss": 2.7623, "step": 83700 }, { "epoch": 0.76, "learning_rate": 7.306873658373109e-05, "loss": 2.7685, "step": 83800 }, { "epoch": 0.76, "learning_rate": 7.279701468204009e-05, "loss": 2.7702, "step": 83900 }, { "epoch": 0.76, "learning_rate": 7.252529278034907e-05, "loss": 2.7622, "step": 84000 }, { "epoch": 0.76, "eval_accuracy": 0.4611511857558078, "eval_loss": 2.781484603881836, "eval_runtime": 43.3638, "eval_samples_per_second": 149.503, "eval_steps_per_second": 2.514, "step": 84000 }, { "epoch": 0.76, "learning_rate": 7.225357087865804e-05, "loss": 2.7672, "step": 84100 }, { "epoch": 0.76, "learning_rate": 7.198184897696703e-05, "loss": 2.7652, "step": 84200 }, { "epoch": 0.76, "learning_rate": 7.171012707527602e-05, "loss": 2.7671, "step": 84300 }, { "epoch": 0.76, "learning_rate": 7.143840517358501e-05, "loss": 2.7621, "step": 84400 }, { "epoch": 0.76, "learning_rate": 7.11694004909109e-05, "loss": 2.7662, "step": 84500 }, { "epoch": 0.76, "learning_rate": 7.089767858921989e-05, "loss": 2.7684, "step": 84600 }, { "epoch": 0.77, "learning_rate": 7.062595668752886e-05, "loss": 2.7662, "step": 84700 }, { "epoch": 0.77, "learning_rate": 7.035423478583785e-05, "loss": 2.7638, "step": 84800 }, { "epoch": 0.77, "learning_rate": 7.008251288414684e-05, "loss": 2.7639, "step": 84900 }, { "epoch": 0.77, "learning_rate": 6.981079098245581e-05, "loss": 2.7691, "step": 85000 }, { "epoch": 0.77, "eval_accuracy": 0.46120986108464673, "eval_loss": 2.7783455848693848, "eval_runtime": 43.5919, "eval_samples_per_second": 148.72, "eval_steps_per_second": 2.5, "step": 85000 }, { "epoch": 0.77, "learning_rate": 6.95390690807648e-05, "loss": 2.7649, "step": 85100 }, { "epoch": 0.77, "learning_rate": 6.926734717907379e-05, "loss": 2.7638, "step": 85200 }, { "epoch": 0.77, "learning_rate": 6.899562527738277e-05, "loss": 2.7675, "step": 85300 }, { "epoch": 0.77, "learning_rate": 6.872390337569176e-05, "loss": 2.7657, "step": 85400 }, { "epoch": 0.77, "learning_rate": 6.845218147400074e-05, "loss": 2.7612, "step": 85500 }, { "epoch": 0.77, "learning_rate": 6.818045957230972e-05, "loss": 2.7682, "step": 85600 }, { "epoch": 0.77, "learning_rate": 6.79087376706187e-05, "loss": 2.7588, "step": 85700 }, { "epoch": 0.78, "learning_rate": 6.763701576892768e-05, "loss": 2.765, "step": 85800 }, { "epoch": 0.78, "learning_rate": 6.736529386723667e-05, "loss": 2.7556, "step": 85900 }, { "epoch": 0.78, "learning_rate": 6.709357196554565e-05, "loss": 2.7579, "step": 86000 }, { "epoch": 0.78, "eval_accuracy": 0.4619333218402277, "eval_loss": 2.7711987495422363, "eval_runtime": 43.3357, "eval_samples_per_second": 149.6, "eval_steps_per_second": 2.515, "step": 86000 }, { "epoch": 0.78, "learning_rate": 6.682185006385464e-05, "loss": 2.7538, "step": 86100 }, { "epoch": 0.78, "learning_rate": 6.655012816216363e-05, "loss": 2.7596, "step": 86200 }, { "epoch": 0.78, "learning_rate": 6.62784062604726e-05, "loss": 2.7512, "step": 86300 }, { "epoch": 0.78, "learning_rate": 6.600668435878159e-05, "loss": 2.7559, "step": 86400 }, { "epoch": 0.78, "learning_rate": 6.573496245709058e-05, "loss": 2.7574, "step": 86500 }, { "epoch": 0.78, "learning_rate": 6.546324055539957e-05, "loss": 2.7614, "step": 86600 }, { "epoch": 0.78, "learning_rate": 6.519423587272546e-05, "loss": 2.7501, "step": 86700 }, { "epoch": 0.78, "learning_rate": 6.492251397103445e-05, "loss": 2.7488, "step": 86800 }, { "epoch": 0.79, "learning_rate": 6.465079206934342e-05, "loss": 2.7497, "step": 86900 }, { "epoch": 0.79, "learning_rate": 6.437907016765241e-05, "loss": 2.7614, "step": 87000 }, { "epoch": 0.79, "eval_accuracy": 0.46246986840394033, "eval_loss": 2.7673110961914062, "eval_runtime": 43.038, "eval_samples_per_second": 150.634, "eval_steps_per_second": 2.533, "step": 87000 }, { "epoch": 0.79, "learning_rate": 6.41073482659614e-05, "loss": 2.7544, "step": 87100 }, { "epoch": 0.79, "learning_rate": 6.383834358328728e-05, "loss": 2.7546, "step": 87200 }, { "epoch": 0.79, "learning_rate": 6.356662168159627e-05, "loss": 2.7564, "step": 87300 }, { "epoch": 0.79, "learning_rate": 6.329489977990525e-05, "loss": 2.759, "step": 87400 }, { "epoch": 0.79, "learning_rate": 6.302317787821423e-05, "loss": 2.7586, "step": 87500 }, { "epoch": 0.79, "learning_rate": 6.275145597652322e-05, "loss": 2.7546, "step": 87600 }, { "epoch": 0.79, "learning_rate": 6.247973407483221e-05, "loss": 2.7548, "step": 87700 }, { "epoch": 0.79, "learning_rate": 6.220801217314118e-05, "loss": 2.7527, "step": 87800 }, { "epoch": 0.79, "learning_rate": 6.193629027145017e-05, "loss": 2.7607, "step": 87900 }, { "epoch": 0.8, "learning_rate": 6.166456836975916e-05, "loss": 2.7592, "step": 88000 }, { "epoch": 0.8, "eval_accuracy": 0.46232166783109974, "eval_loss": 2.7691469192504883, "eval_runtime": 43.5697, "eval_samples_per_second": 148.796, "eval_steps_per_second": 2.502, "step": 88000 }, { "epoch": 0.8, "learning_rate": 6.139284646806815e-05, "loss": 2.7481, "step": 88100 }, { "epoch": 0.8, "learning_rate": 6.112112456637712e-05, "loss": 2.7579, "step": 88200 }, { "epoch": 0.8, "learning_rate": 6.0849402664686106e-05, "loss": 2.7559, "step": 88300 }, { "epoch": 0.8, "learning_rate": 6.05776807629951e-05, "loss": 2.7515, "step": 88400 }, { "epoch": 0.8, "learning_rate": 6.030595886130408e-05, "loss": 2.7524, "step": 88500 }, { "epoch": 0.8, "learning_rate": 6.003423695961306e-05, "loss": 2.7395, "step": 88600 }, { "epoch": 0.8, "learning_rate": 5.976251505792205e-05, "loss": 2.7438, "step": 88700 }, { "epoch": 0.8, "learning_rate": 5.949079315623103e-05, "loss": 2.7468, "step": 88800 }, { "epoch": 0.8, "learning_rate": 5.921907125454001e-05, "loss": 2.7423, "step": 88900 }, { "epoch": 0.8, "learning_rate": 5.8947349352849e-05, "loss": 2.7551, "step": 89000 }, { "epoch": 0.8, "eval_accuracy": 0.4633808482516869, "eval_loss": 2.760658025741577, "eval_runtime": 43.7777, "eval_samples_per_second": 148.089, "eval_steps_per_second": 2.49, "step": 89000 }, { "epoch": 0.81, "learning_rate": 5.867562745115798e-05, "loss": 2.7352, "step": 89100 }, { "epoch": 0.81, "learning_rate": 5.8403905549466965e-05, "loss": 2.751, "step": 89200 }, { "epoch": 0.81, "learning_rate": 5.813490086679286e-05, "loss": 2.7456, "step": 89300 }, { "epoch": 0.81, "learning_rate": 5.7863178965101844e-05, "loss": 2.7491, "step": 89400 }, { "epoch": 0.81, "learning_rate": 5.759145706341083e-05, "loss": 2.7477, "step": 89500 }, { "epoch": 0.81, "learning_rate": 5.7319735161719815e-05, "loss": 2.7431, "step": 89600 }, { "epoch": 0.81, "learning_rate": 5.70480132600288e-05, "loss": 2.7406, "step": 89700 }, { "epoch": 0.81, "learning_rate": 5.6776291358337786e-05, "loss": 2.7444, "step": 89800 }, { "epoch": 0.81, "learning_rate": 5.650456945664677e-05, "loss": 2.7437, "step": 89900 }, { "epoch": 0.81, "learning_rate": 5.623284755495574e-05, "loss": 2.7397, "step": 90000 }, { "epoch": 0.81, "eval_accuracy": 0.4636597072887461, "eval_loss": 2.7578768730163574, "eval_runtime": 43.3807, "eval_samples_per_second": 149.444, "eval_steps_per_second": 2.513, "step": 90000 }, { "epoch": 0.81, "learning_rate": 5.596112565326473e-05, "loss": 2.7456, "step": 90100 }, { "epoch": 0.82, "learning_rate": 5.5689403751573714e-05, "loss": 2.7393, "step": 90200 }, { "epoch": 0.82, "learning_rate": 5.54176818498827e-05, "loss": 2.74, "step": 90300 }, { "epoch": 0.82, "learning_rate": 5.5145959948191685e-05, "loss": 2.7411, "step": 90400 }, { "epoch": 0.82, "learning_rate": 5.487695526551758e-05, "loss": 2.747, "step": 90500 }, { "epoch": 0.82, "learning_rate": 5.4605233363826564e-05, "loss": 2.741, "step": 90600 }, { "epoch": 0.82, "learning_rate": 5.433622868115246e-05, "loss": 2.7441, "step": 90700 }, { "epoch": 0.82, "learning_rate": 5.406450677946144e-05, "loss": 2.7447, "step": 90800 }, { "epoch": 0.82, "learning_rate": 5.3792784877770425e-05, "loss": 2.7517, "step": 90900 }, { "epoch": 0.82, "learning_rate": 5.3521062976079414e-05, "loss": 2.7357, "step": 91000 }, { "epoch": 0.82, "eval_accuracy": 0.4636022417605018, "eval_loss": 2.758023738861084, "eval_runtime": 43.2538, "eval_samples_per_second": 149.883, "eval_steps_per_second": 2.52, "step": 91000 }, { "epoch": 0.82, "learning_rate": 5.3249341074388396e-05, "loss": 2.7429, "step": 91100 }, { "epoch": 0.82, "learning_rate": 5.297761917269738e-05, "loss": 2.7445, "step": 91200 }, { "epoch": 0.83, "learning_rate": 5.270589727100637e-05, "loss": 2.7473, "step": 91300 }, { "epoch": 0.83, "learning_rate": 5.243417536931535e-05, "loss": 2.7404, "step": 91400 }, { "epoch": 0.83, "learning_rate": 5.216245346762434e-05, "loss": 2.7401, "step": 91500 }, { "epoch": 0.83, "learning_rate": 5.189073156593331e-05, "loss": 2.7441, "step": 91600 }, { "epoch": 0.83, "learning_rate": 5.1619009664242295e-05, "loss": 2.737, "step": 91700 }, { "epoch": 0.83, "learning_rate": 5.1347287762551284e-05, "loss": 2.7337, "step": 91800 }, { "epoch": 0.83, "learning_rate": 5.1075565860860266e-05, "loss": 2.7422, "step": 91900 }, { "epoch": 0.83, "learning_rate": 5.080384395916925e-05, "loss": 2.7452, "step": 92000 }, { "epoch": 0.83, "eval_accuracy": 0.46426944678843307, "eval_loss": 2.751744031906128, "eval_runtime": 44.8905, "eval_samples_per_second": 144.418, "eval_steps_per_second": 2.428, "step": 92000 }, { "epoch": 0.83, "learning_rate": 5.0532122057478237e-05, "loss": 2.7387, "step": 92100 }, { "epoch": 0.83, "learning_rate": 5.026311737480413e-05, "loss": 2.7342, "step": 92200 }, { "epoch": 0.83, "learning_rate": 4.9991395473113116e-05, "loss": 2.7349, "step": 92300 }, { "epoch": 0.84, "learning_rate": 4.97196735714221e-05, "loss": 2.7388, "step": 92400 }, { "epoch": 0.84, "learning_rate": 4.944795166973108e-05, "loss": 2.7397, "step": 92500 }, { "epoch": 0.84, "learning_rate": 4.917622976804007e-05, "loss": 2.7352, "step": 92600 }, { "epoch": 0.84, "learning_rate": 4.890450786634905e-05, "loss": 2.7392, "step": 92700 }, { "epoch": 0.84, "learning_rate": 4.863278596465803e-05, "loss": 2.7419, "step": 92800 }, { "epoch": 0.84, "learning_rate": 4.836106406296702e-05, "loss": 2.738, "step": 92900 }, { "epoch": 0.84, "learning_rate": 4.8089342161276004e-05, "loss": 2.7418, "step": 93000 }, { "epoch": 0.84, "eval_accuracy": 0.46412548051767366, "eval_loss": 2.7533059120178223, "eval_runtime": 43.1643, "eval_samples_per_second": 150.193, "eval_steps_per_second": 2.525, "step": 93000 }, { "epoch": 0.84, "learning_rate": 4.781762025958498e-05, "loss": 2.7372, "step": 93100 }, { "epoch": 0.84, "learning_rate": 4.7545898357893974e-05, "loss": 2.7369, "step": 93200 }, { "epoch": 0.84, "learning_rate": 4.727417645620295e-05, "loss": 2.7331, "step": 93300 }, { "epoch": 0.84, "learning_rate": 4.700245455451193e-05, "loss": 2.7379, "step": 93400 }, { "epoch": 0.85, "learning_rate": 4.673073265282092e-05, "loss": 2.7341, "step": 93500 }, { "epoch": 0.85, "learning_rate": 4.64590107511299e-05, "loss": 2.7359, "step": 93600 }, { "epoch": 0.85, "learning_rate": 4.618728884943889e-05, "loss": 2.737, "step": 93700 }, { "epoch": 0.85, "learning_rate": 4.5915566947747873e-05, "loss": 2.7343, "step": 93800 }, { "epoch": 0.85, "learning_rate": 4.564656226507377e-05, "loss": 2.7346, "step": 93900 }, { "epoch": 0.85, "learning_rate": 4.537484036338275e-05, "loss": 2.7379, "step": 94000 }, { "epoch": 0.85, "eval_accuracy": 0.46473280041617143, "eval_loss": 2.748091697692871, "eval_runtime": 43.4169, "eval_samples_per_second": 149.32, "eval_steps_per_second": 2.511, "step": 94000 }, { "epoch": 0.85, "learning_rate": 4.5103118461691735e-05, "loss": 2.7341, "step": 94100 }, { "epoch": 0.85, "learning_rate": 4.4831396560000724e-05, "loss": 2.7431, "step": 94200 }, { "epoch": 0.85, "learning_rate": 4.4559674658309706e-05, "loss": 2.7347, "step": 94300 }, { "epoch": 0.85, "learning_rate": 4.428795275661869e-05, "loss": 2.7366, "step": 94400 }, { "epoch": 0.85, "learning_rate": 4.4016230854927676e-05, "loss": 2.7344, "step": 94500 }, { "epoch": 0.86, "learning_rate": 4.374450895323666e-05, "loss": 2.7382, "step": 94600 }, { "epoch": 0.86, "learning_rate": 4.347278705154564e-05, "loss": 2.7279, "step": 94700 }, { "epoch": 0.86, "learning_rate": 4.320106514985463e-05, "loss": 2.7307, "step": 94800 }, { "epoch": 0.86, "learning_rate": 4.292934324816361e-05, "loss": 2.7275, "step": 94900 }, { "epoch": 0.86, "learning_rate": 4.26603385654895e-05, "loss": 2.7308, "step": 95000 }, { "epoch": 0.86, "eval_accuracy": 0.4653649212268588, "eval_loss": 2.7459847927093506, "eval_runtime": 43.1356, "eval_samples_per_second": 150.294, "eval_steps_per_second": 2.527, "step": 95000 }, { "epoch": 0.86, "learning_rate": 4.2388616663798484e-05, "loss": 2.7304, "step": 95100 }, { "epoch": 0.86, "learning_rate": 4.211689476210747e-05, "loss": 2.7334, "step": 95200 }, { "epoch": 0.86, "learning_rate": 4.1845172860416455e-05, "loss": 2.7324, "step": 95300 }, { "epoch": 0.86, "learning_rate": 4.157345095872544e-05, "loss": 2.7338, "step": 95400 }, { "epoch": 0.86, "learning_rate": 4.1301729057034425e-05, "loss": 2.7334, "step": 95500 }, { "epoch": 0.86, "learning_rate": 4.103000715534341e-05, "loss": 2.7323, "step": 95600 }, { "epoch": 0.87, "learning_rate": 4.075828525365239e-05, "loss": 2.7338, "step": 95700 }, { "epoch": 0.87, "learning_rate": 4.048656335196138e-05, "loss": 2.73, "step": 95800 }, { "epoch": 0.87, "learning_rate": 4.021484145027036e-05, "loss": 2.7367, "step": 95900 }, { "epoch": 0.87, "learning_rate": 3.994311954857934e-05, "loss": 2.727, "step": 96000 }, { "epoch": 0.87, "eval_accuracy": 0.46549799929226665, "eval_loss": 2.740849018096924, "eval_runtime": 43.5693, "eval_samples_per_second": 148.797, "eval_steps_per_second": 2.502, "step": 96000 }, { "epoch": 0.87, "learning_rate": 3.967139764688833e-05, "loss": 2.7257, "step": 96100 }, { "epoch": 0.87, "learning_rate": 3.939967574519731e-05, "loss": 2.7251, "step": 96200 }, { "epoch": 0.87, "learning_rate": 3.9127953843506295e-05, "loss": 2.7236, "step": 96300 }, { "epoch": 0.87, "learning_rate": 3.8856231941815284e-05, "loss": 2.7224, "step": 96400 }, { "epoch": 0.87, "learning_rate": 3.8584510040124266e-05, "loss": 2.7204, "step": 96500 }, { "epoch": 0.87, "learning_rate": 3.831278813843325e-05, "loss": 2.7249, "step": 96600 }, { "epoch": 0.87, "learning_rate": 3.804106623674224e-05, "loss": 2.7214, "step": 96700 }, { "epoch": 0.88, "learning_rate": 3.776934433505122e-05, "loss": 2.7242, "step": 96800 }, { "epoch": 0.88, "learning_rate": 3.74976224333602e-05, "loss": 2.7147, "step": 96900 }, { "epoch": 0.88, "learning_rate": 3.722861775068609e-05, "loss": 2.7282, "step": 97000 }, { "epoch": 0.88, "eval_accuracy": 0.4663823635269317, "eval_loss": 2.7350597381591797, "eval_runtime": 43.4285, "eval_samples_per_second": 149.28, "eval_steps_per_second": 2.51, "step": 97000 }, { "epoch": 0.88, "learning_rate": 3.695689584899508e-05, "loss": 2.718, "step": 97100 }, { "epoch": 0.88, "learning_rate": 3.668517394730406e-05, "loss": 2.7174, "step": 97200 }, { "epoch": 0.88, "learning_rate": 3.6413452045613044e-05, "loss": 2.7205, "step": 97300 }, { "epoch": 0.88, "learning_rate": 3.614173014392203e-05, "loss": 2.7195, "step": 97400 }, { "epoch": 0.88, "learning_rate": 3.5870008242231015e-05, "loss": 2.7172, "step": 97500 }, { "epoch": 0.88, "learning_rate": 3.559828634054e-05, "loss": 2.7128, "step": 97600 }, { "epoch": 0.88, "learning_rate": 3.532656443884898e-05, "loss": 2.7192, "step": 97700 }, { "epoch": 0.88, "learning_rate": 3.505484253715797e-05, "loss": 2.7191, "step": 97800 }, { "epoch": 0.89, "learning_rate": 3.478312063546695e-05, "loss": 2.7178, "step": 97900 }, { "epoch": 0.89, "learning_rate": 3.451139873377593e-05, "loss": 2.7133, "step": 98000 }, { "epoch": 0.89, "eval_accuracy": 0.46685176615764307, "eval_loss": 2.730079412460327, "eval_runtime": 43.3235, "eval_samples_per_second": 149.642, "eval_steps_per_second": 2.516, "step": 98000 }, { "epoch": 0.89, "learning_rate": 3.423967683208492e-05, "loss": 2.7164, "step": 98100 }, { "epoch": 0.89, "learning_rate": 3.39679549303939e-05, "loss": 2.7106, "step": 98200 }, { "epoch": 0.89, "learning_rate": 3.3696233028702885e-05, "loss": 2.715, "step": 98300 }, { "epoch": 0.89, "learning_rate": 3.3424511127011874e-05, "loss": 2.7091, "step": 98400 }, { "epoch": 0.89, "learning_rate": 3.3152789225320856e-05, "loss": 2.7093, "step": 98500 }, { "epoch": 0.89, "learning_rate": 3.288106732362984e-05, "loss": 2.7116, "step": 98600 }, { "epoch": 0.89, "learning_rate": 3.260934542193883e-05, "loss": 2.7172, "step": 98700 }, { "epoch": 0.89, "learning_rate": 3.233762352024781e-05, "loss": 2.7072, "step": 98800 }, { "epoch": 0.89, "learning_rate": 3.206590161855679e-05, "loss": 2.7165, "step": 98900 }, { "epoch": 0.9, "learning_rate": 3.179417971686577e-05, "loss": 2.7136, "step": 99000 }, { "epoch": 0.9, "eval_accuracy": 0.4673356863954899, "eval_loss": 2.7250616550445557, "eval_runtime": 43.1535, "eval_samples_per_second": 150.231, "eval_steps_per_second": 2.526, "step": 99000 }, { "epoch": 0.9, "learning_rate": 3.152245781517476e-05, "loss": 2.7117, "step": 99100 }, { "epoch": 0.9, "learning_rate": 3.1250735913483744e-05, "loss": 2.7099, "step": 99200 }, { "epoch": 0.9, "learning_rate": 3.0979014011792726e-05, "loss": 2.715, "step": 99300 }, { "epoch": 0.9, "learning_rate": 3.0707292110101715e-05, "loss": 2.7119, "step": 99400 }, { "epoch": 0.9, "learning_rate": 3.0435570208410697e-05, "loss": 2.7136, "step": 99500 }, { "epoch": 0.9, "learning_rate": 3.016384830671968e-05, "loss": 2.7069, "step": 99600 }, { "epoch": 0.9, "learning_rate": 2.9892126405028664e-05, "loss": 2.7092, "step": 99700 }, { "epoch": 0.9, "learning_rate": 2.962040450333765e-05, "loss": 2.7052, "step": 99800 }, { "epoch": 0.9, "learning_rate": 2.934868260164663e-05, "loss": 2.7099, "step": 99900 }, { "epoch": 0.9, "learning_rate": 2.9076960699955617e-05, "loss": 2.7108, "step": 100000 }, { "epoch": 0.9, "eval_accuracy": 0.46786981335801325, "eval_loss": 2.7208478450775146, "eval_runtime": 43.4331, "eval_samples_per_second": 149.264, "eval_steps_per_second": 2.51, "step": 100000 }, { "epoch": 0.91, "learning_rate": 2.8807956017281514e-05, "loss": 2.7137, "step": 100100 }, { "epoch": 0.91, "learning_rate": 2.8536234115590493e-05, "loss": 2.7069, "step": 100200 }, { "epoch": 0.91, "learning_rate": 2.8264512213899478e-05, "loss": 2.698, "step": 100300 }, { "epoch": 0.91, "learning_rate": 2.7992790312208464e-05, "loss": 2.7027, "step": 100400 }, { "epoch": 0.91, "learning_rate": 2.7721068410517446e-05, "loss": 2.7062, "step": 100500 }, { "epoch": 0.91, "learning_rate": 2.744934650882643e-05, "loss": 2.7064, "step": 100600 }, { "epoch": 0.91, "learning_rate": 2.718034182615232e-05, "loss": 2.7059, "step": 100700 }, { "epoch": 0.91, "learning_rate": 2.691133714347822e-05, "loss": 2.7146, "step": 100800 }, { "epoch": 0.91, "learning_rate": 2.6639615241787204e-05, "loss": 2.7036, "step": 100900 }, { "epoch": 0.91, "learning_rate": 2.6367893340096186e-05, "loss": 2.7051, "step": 101000 }, { "epoch": 0.91, "eval_accuracy": 0.46807245495761163, "eval_loss": 2.7191717624664307, "eval_runtime": 43.4633, "eval_samples_per_second": 149.16, "eval_steps_per_second": 2.508, "step": 101000 }, { "epoch": 0.91, "learning_rate": 2.609617143840517e-05, "loss": 2.7007, "step": 101100 }, { "epoch": 0.91, "learning_rate": 2.5824449536714157e-05, "loss": 2.7024, "step": 101200 }, { "epoch": 0.92, "learning_rate": 2.555272763502314e-05, "loss": 2.7027, "step": 101300 }, { "epoch": 0.92, "learning_rate": 2.5281005733332124e-05, "loss": 2.7082, "step": 101400 }, { "epoch": 0.92, "learning_rate": 2.500928383164111e-05, "loss": 2.7067, "step": 101500 }, { "epoch": 0.92, "learning_rate": 2.4737561929950092e-05, "loss": 2.7044, "step": 101600 }, { "epoch": 0.92, "learning_rate": 2.4465840028259074e-05, "loss": 2.705, "step": 101700 }, { "epoch": 0.92, "learning_rate": 2.419411812656806e-05, "loss": 2.7069, "step": 101800 }, { "epoch": 0.92, "learning_rate": 2.3922396224877045e-05, "loss": 2.7005, "step": 101900 }, { "epoch": 0.92, "learning_rate": 2.3650674323186027e-05, "loss": 2.7013, "step": 102000 }, { "epoch": 0.92, "eval_accuracy": 0.4687317962816779, "eval_loss": 2.7151107788085938, "eval_runtime": 43.2863, "eval_samples_per_second": 149.77, "eval_steps_per_second": 2.518, "step": 102000 }, { "epoch": 0.92, "learning_rate": 2.3378952421495012e-05, "loss": 2.7029, "step": 102100 }, { "epoch": 0.92, "learning_rate": 2.3107230519803998e-05, "loss": 2.7007, "step": 102200 }, { "epoch": 0.92, "learning_rate": 2.283550861811298e-05, "loss": 2.7089, "step": 102300 }, { "epoch": 0.93, "learning_rate": 2.2563786716421965e-05, "loss": 2.7018, "step": 102400 }, { "epoch": 0.93, "learning_rate": 2.229206481473095e-05, "loss": 2.6984, "step": 102500 }, { "epoch": 0.93, "learning_rate": 2.202306013205684e-05, "loss": 2.7011, "step": 102600 }, { "epoch": 0.93, "learning_rate": 2.1751338230365826e-05, "loss": 2.6968, "step": 102700 }, { "epoch": 0.93, "learning_rate": 2.1479616328674812e-05, "loss": 2.701, "step": 102800 }, { "epoch": 0.93, "learning_rate": 2.1207894426983794e-05, "loss": 2.7079, "step": 102900 }, { "epoch": 0.93, "learning_rate": 2.093617252529278e-05, "loss": 2.6996, "step": 103000 }, { "epoch": 0.93, "eval_accuracy": 0.46891387127116774, "eval_loss": 2.7129361629486084, "eval_runtime": 43.7353, "eval_samples_per_second": 148.233, "eval_steps_per_second": 2.492, "step": 103000 }, { "epoch": 0.93, "learning_rate": 2.0664450623601765e-05, "loss": 2.6985, "step": 103100 }, { "epoch": 0.93, "learning_rate": 2.0392728721910743e-05, "loss": 2.6945, "step": 103200 }, { "epoch": 0.93, "learning_rate": 2.012100682021973e-05, "loss": 2.6988, "step": 103300 }, { "epoch": 0.93, "learning_rate": 1.9849284918528714e-05, "loss": 2.701, "step": 103400 }, { "epoch": 0.94, "learning_rate": 1.9577563016837696e-05, "loss": 2.7044, "step": 103500 }, { "epoch": 0.94, "learning_rate": 1.930584111514668e-05, "loss": 2.6897, "step": 103600 }, { "epoch": 0.94, "learning_rate": 1.9034119213455667e-05, "loss": 2.6993, "step": 103700 }, { "epoch": 0.94, "learning_rate": 1.8762397311764652e-05, "loss": 2.6978, "step": 103800 }, { "epoch": 0.94, "learning_rate": 1.8490675410073634e-05, "loss": 2.6965, "step": 103900 }, { "epoch": 0.94, "learning_rate": 1.821895350838262e-05, "loss": 2.6898, "step": 104000 }, { "epoch": 0.94, "eval_accuracy": 0.46940021111020375, "eval_loss": 2.7084131240844727, "eval_runtime": 44.0036, "eval_samples_per_second": 147.329, "eval_steps_per_second": 2.477, "step": 104000 }, { "epoch": 0.94, "learning_rate": 1.7947231606691602e-05, "loss": 2.6918, "step": 104100 }, { "epoch": 0.94, "learning_rate": 1.7675509705000587e-05, "loss": 2.6941, "step": 104200 }, { "epoch": 0.94, "learning_rate": 1.7403787803309573e-05, "loss": 2.6954, "step": 104300 }, { "epoch": 0.94, "learning_rate": 1.7132065901618555e-05, "loss": 2.7015, "step": 104400 }, { "epoch": 0.94, "learning_rate": 1.686034399992754e-05, "loss": 2.698, "step": 104500 }, { "epoch": 0.95, "learning_rate": 1.6588622098236522e-05, "loss": 2.6922, "step": 104600 }, { "epoch": 0.95, "learning_rate": 1.6319617415562416e-05, "loss": 2.6932, "step": 104700 }, { "epoch": 0.95, "learning_rate": 1.60478955138714e-05, "loss": 2.6887, "step": 104800 }, { "epoch": 0.95, "learning_rate": 1.5776173612180387e-05, "loss": 2.6887, "step": 104900 }, { "epoch": 0.95, "learning_rate": 1.550445171048937e-05, "loss": 2.688, "step": 105000 }, { "epoch": 0.95, "eval_accuracy": 0.4697316964731288, "eval_loss": 2.705327272415161, "eval_runtime": 43.7246, "eval_samples_per_second": 148.269, "eval_steps_per_second": 2.493, "step": 105000 }, { "epoch": 0.95, "learning_rate": 1.5232729808798354e-05, "loss": 2.6933, "step": 105100 }, { "epoch": 0.95, "learning_rate": 1.4961007907107338e-05, "loss": 2.6992, "step": 105200 }, { "epoch": 0.95, "learning_rate": 1.468928600541632e-05, "loss": 2.6943, "step": 105300 }, { "epoch": 0.95, "learning_rate": 1.4417564103725306e-05, "loss": 2.6919, "step": 105400 }, { "epoch": 0.95, "learning_rate": 1.414584220203429e-05, "loss": 2.6961, "step": 105500 }, { "epoch": 0.95, "learning_rate": 1.3874120300343275e-05, "loss": 2.6942, "step": 105600 }, { "epoch": 0.96, "learning_rate": 1.3602398398652258e-05, "loss": 2.6936, "step": 105700 }, { "epoch": 0.96, "learning_rate": 1.3330676496961242e-05, "loss": 2.6851, "step": 105800 }, { "epoch": 0.96, "learning_rate": 1.3058954595270228e-05, "loss": 2.6929, "step": 105900 }, { "epoch": 0.96, "learning_rate": 1.278723269357921e-05, "loss": 2.6855, "step": 106000 }, { "epoch": 0.96, "eval_accuracy": 0.4701273012675686, "eval_loss": 2.701770305633545, "eval_runtime": 44.1379, "eval_samples_per_second": 146.881, "eval_steps_per_second": 2.47, "step": 106000 }, { "epoch": 0.96, "learning_rate": 1.2515510791888195e-05, "loss": 2.6922, "step": 106100 }, { "epoch": 0.96, "learning_rate": 1.2243788890197179e-05, "loss": 2.6811, "step": 106200 }, { "epoch": 0.96, "learning_rate": 1.1972066988506163e-05, "loss": 2.6819, "step": 106300 }, { "epoch": 0.96, "learning_rate": 1.1700345086815148e-05, "loss": 2.6882, "step": 106400 }, { "epoch": 0.96, "learning_rate": 1.142862318512413e-05, "loss": 2.685, "step": 106500 }, { "epoch": 0.96, "learning_rate": 1.1159618502450025e-05, "loss": 2.6841, "step": 106600 }, { "epoch": 0.96, "learning_rate": 1.0887896600759008e-05, "loss": 2.6806, "step": 106700 }, { "epoch": 0.97, "learning_rate": 1.0616174699067993e-05, "loss": 2.6896, "step": 106800 }, { "epoch": 0.97, "learning_rate": 1.0344452797376977e-05, "loss": 2.6807, "step": 106900 }, { "epoch": 0.97, "learning_rate": 1.0072730895685962e-05, "loss": 2.6852, "step": 107000 }, { "epoch": 0.97, "eval_accuracy": 0.4704999198507106, "eval_loss": 2.698939085006714, "eval_runtime": 43.9086, "eval_samples_per_second": 147.648, "eval_steps_per_second": 2.482, "step": 107000 }, { "epoch": 0.97, "learning_rate": 9.803726213011856e-06, "loss": 2.6861, "step": 107100 }, { "epoch": 0.97, "learning_rate": 9.53200431132084e-06, "loss": 2.6886, "step": 107200 }, { "epoch": 0.97, "learning_rate": 9.260282409629823e-06, "loss": 2.6872, "step": 107300 }, { "epoch": 0.97, "learning_rate": 8.988560507938807e-06, "loss": 2.685, "step": 107400 }, { "epoch": 0.97, "learning_rate": 8.71683860624779e-06, "loss": 2.6892, "step": 107500 }, { "epoch": 0.97, "learning_rate": 8.445116704556776e-06, "loss": 2.6815, "step": 107600 }, { "epoch": 0.97, "learning_rate": 8.17339480286576e-06, "loss": 2.6879, "step": 107700 }, { "epoch": 0.97, "learning_rate": 7.901672901174744e-06, "loss": 2.6822, "step": 107800 }, { "epoch": 0.98, "learning_rate": 7.629950999483727e-06, "loss": 2.6806, "step": 107900 }, { "epoch": 0.98, "learning_rate": 7.360946316809621e-06, "loss": 2.689, "step": 108000 }, { "epoch": 0.98, "eval_accuracy": 0.4705204864608191, "eval_loss": 2.6981818675994873, "eval_runtime": 43.1633, "eval_samples_per_second": 150.197, "eval_steps_per_second": 2.525, "step": 108000 }, { "epoch": 0.98, "learning_rate": 7.089224415118606e-06, "loss": 2.6872, "step": 108100 }, { "epoch": 0.98, "learning_rate": 6.81750251342759e-06, "loss": 2.6962, "step": 108200 }, { "epoch": 0.98, "learning_rate": 6.545780611736574e-06, "loss": 2.6831, "step": 108300 }, { "epoch": 0.98, "learning_rate": 6.274058710045559e-06, "loss": 2.6877, "step": 108400 }, { "epoch": 0.98, "learning_rate": 6.0023368083545415e-06, "loss": 2.6956, "step": 108500 }, { "epoch": 0.98, "learning_rate": 5.730614906663526e-06, "loss": 2.6936, "step": 108600 }, { "epoch": 0.98, "learning_rate": 5.458893004972511e-06, "loss": 2.6864, "step": 108700 }, { "epoch": 0.98, "learning_rate": 5.187171103281495e-06, "loss": 2.6838, "step": 108800 }, { "epoch": 0.98, "learning_rate": 4.915449201590478e-06, "loss": 2.6867, "step": 108900 }, { "epoch": 0.99, "learning_rate": 4.643727299899463e-06, "loss": 2.6868, "step": 109000 }, { "epoch": 0.99, "eval_accuracy": 0.4707297819636878, "eval_loss": 2.6994001865386963, "eval_runtime": 43.0302, "eval_samples_per_second": 150.662, "eval_steps_per_second": 2.533, "step": 109000 }, { "epoch": 0.99, "learning_rate": 4.3720053982084465e-06, "loss": 2.689, "step": 109100 }, { "epoch": 0.99, "learning_rate": 4.10028349651743e-06, "loss": 2.6831, "step": 109200 }, { "epoch": 0.99, "learning_rate": 3.831278813843325e-06, "loss": 2.6825, "step": 109300 }, { "epoch": 0.99, "learning_rate": 3.559556912152309e-06, "loss": 2.6851, "step": 109400 }, { "epoch": 0.99, "learning_rate": 3.2878350104612927e-06, "loss": 2.6798, "step": 109500 }, { "epoch": 0.99, "learning_rate": 3.016113108770277e-06, "loss": 2.6773, "step": 109600 }, { "epoch": 0.99, "learning_rate": 2.744391207079261e-06, "loss": 2.6829, "step": 109700 }, { "epoch": 0.99, "learning_rate": 2.472669305388245e-06, "loss": 2.6819, "step": 109800 }, { "epoch": 0.99, "learning_rate": 2.2036646227141394e-06, "loss": 2.6827, "step": 109900 }, { "epoch": 0.99, "learning_rate": 1.931942721023123e-06, "loss": 2.6901, "step": 110000 }, { "epoch": 0.99, "eval_accuracy": 0.47069106834466007, "eval_loss": 2.700648307800293, "eval_runtime": 43.0535, "eval_samples_per_second": 150.58, "eval_steps_per_second": 2.532, "step": 110000 }, { "epoch": 1.0, "learning_rate": 1.6602208193321073e-06, "loss": 2.6809, "step": 110100 }, { "epoch": 1.0, "learning_rate": 1.3884989176410914e-06, "loss": 2.6866, "step": 110200 }, { "epoch": 1.0, "learning_rate": 1.1167770159500756e-06, "loss": 2.6863, "step": 110300 }, { "epoch": 1.0, "learning_rate": 8.450551142590596e-07, "loss": 2.6912, "step": 110400 }, { "epoch": 1.0, "learning_rate": 5.733332125680437e-07, "loss": 2.6916, "step": 110500 }, { "epoch": 1.0, "learning_rate": 3.0161131087702765e-07, "loss": 2.684, "step": 110600 }, { "epoch": 1.0, "step": 110607, "total_flos": 2.899312376933253e+20, "train_loss": 2.8584754099769967, "train_runtime": 318077.2613, "train_samples_per_second": 83.457, "train_steps_per_second": 0.348 } ], "logging_steps": 100, "max_steps": 110607, "num_train_epochs": 1, "save_steps": 11061, "total_flos": 2.899312376933253e+20, "trial_name": null, "trial_params": null }