|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999954795108831, |
|
"eval_steps": 1000, |
|
"global_step": 110607, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00024521369940743435, |
|
"loss": 5.4472, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0002927618508292659, |
|
"loss": 3.467, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0002997989257927486, |
|
"loss": 3.3454, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00029952720389105764, |
|
"loss": 3.2872, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00029925548198936656, |
|
"loss": 3.2489, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002989837600876756, |
|
"loss": 3.2458, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029871475540500146, |
|
"loss": 3.2073, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029844303350331044, |
|
"loss": 3.1957, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002981713116016194, |
|
"loss": 3.1817, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029789958969992844, |
|
"loss": 3.1631, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.4182117332510669, |
|
"eval_loss": 3.180420398712158, |
|
"eval_runtime": 43.6723, |
|
"eval_samples_per_second": 148.447, |
|
"eval_steps_per_second": 2.496, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002976278677982374, |
|
"loss": 3.1598, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002973561458965464, |
|
"loss": 3.1584, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029708442399485537, |
|
"loss": 3.144, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029681270209316434, |
|
"loss": 3.1346, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0002965409801914733, |
|
"loss": 3.1359, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00029626925828978235, |
|
"loss": 3.1268, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002959975363880913, |
|
"loss": 3.1175, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002957258144864003, |
|
"loss": 3.1189, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002954540925847093, |
|
"loss": 3.1057, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029518237068301825, |
|
"loss": 3.1124, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.4272265623818554, |
|
"eval_loss": 3.106520891189575, |
|
"eval_runtime": 43.4484, |
|
"eval_samples_per_second": 149.212, |
|
"eval_steps_per_second": 2.509, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002949106487813272, |
|
"loss": 3.1004, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002946389268796362, |
|
"loss": 3.1018, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029436720497794523, |
|
"loss": 3.0864, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002940954830762542, |
|
"loss": 3.0872, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002938237611745632, |
|
"loss": 3.0883, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00029355475649188906, |
|
"loss": 3.0843, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002932830345901981, |
|
"loss": 3.0815, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029301131268850706, |
|
"loss": 3.0784, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029273959078681604, |
|
"loss": 3.0728, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000292467868885125, |
|
"loss": 3.0757, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.42875514543315396, |
|
"eval_loss": 3.0894298553466797, |
|
"eval_runtime": 43.8742, |
|
"eval_samples_per_second": 147.763, |
|
"eval_steps_per_second": 2.484, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000292196146983434, |
|
"loss": 3.0754, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.000291924425081743, |
|
"loss": 3.0634, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029165270318005194, |
|
"loss": 3.0652, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029138098127836097, |
|
"loss": 3.0566, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029110925937666994, |
|
"loss": 3.067, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002908375374749789, |
|
"loss": 3.0525, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002905658155732879, |
|
"loss": 3.0595, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029029409367159687, |
|
"loss": 3.0586, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002900223717699059, |
|
"loss": 3.0499, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002897506498682148, |
|
"loss": 3.0488, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.43185525945686004, |
|
"eval_loss": 3.062988758087158, |
|
"eval_runtime": 44.0507, |
|
"eval_samples_per_second": 147.171, |
|
"eval_steps_per_second": 2.474, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00028947892796652385, |
|
"loss": 3.0426, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002892072060648328, |
|
"loss": 3.0433, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002889354841631418, |
|
"loss": 3.0428, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00028866376226145083, |
|
"loss": 3.0359, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00028839204035975975, |
|
"loss": 3.0386, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002881203184580688, |
|
"loss": 3.034, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00028784859655637776, |
|
"loss": 3.0456, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00028757687465468673, |
|
"loss": 3.0361, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002873051527529957, |
|
"loss": 3.0412, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002870334308513047, |
|
"loss": 3.0403, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.43361975362410893, |
|
"eval_loss": 3.0423271656036377, |
|
"eval_runtime": 43.1088, |
|
"eval_samples_per_second": 150.387, |
|
"eval_steps_per_second": 2.528, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002867617089496137, |
|
"loss": 3.0341, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00028648998704792263, |
|
"loss": 3.0297, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00028621826514623166, |
|
"loss": 3.0324, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00028594654324454064, |
|
"loss": 3.0317, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002856748213428496, |
|
"loss": 3.0167, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002854030994411586, |
|
"loss": 3.0202, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00028513137753946756, |
|
"loss": 3.0231, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002848596556377766, |
|
"loss": 3.0166, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002845879337360855, |
|
"loss": 3.0246, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00028431621183439454, |
|
"loss": 3.0172, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.434315993866311, |
|
"eval_loss": 3.038356304168701, |
|
"eval_runtime": 43.5763, |
|
"eval_samples_per_second": 148.774, |
|
"eval_steps_per_second": 2.501, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002840444899327035, |
|
"loss": 3.0123, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002837727680310125, |
|
"loss": 3.0177, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002835010461293215, |
|
"loss": 3.0195, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002832320414466474, |
|
"loss": 3.0175, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002829603195449564, |
|
"loss": 3.0192, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00028268859764326535, |
|
"loss": 3.0079, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002824168757415743, |
|
"loss": 3.0138, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002821451538398833, |
|
"loss": 3.0175, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00028187343193819233, |
|
"loss": 3.0148, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00028160171003650125, |
|
"loss": 3.0102, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.43602120780442366, |
|
"eval_loss": 3.026742696762085, |
|
"eval_runtime": 43.2189, |
|
"eval_samples_per_second": 150.004, |
|
"eval_steps_per_second": 2.522, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002813299881348103, |
|
"loss": 3.0111, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00028105826623311925, |
|
"loss": 3.0077, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00028078654433142823, |
|
"loss": 3.0055, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002805148224297372, |
|
"loss": 3.0084, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002802431005280462, |
|
"loss": 3.0124, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002799713786263552, |
|
"loss": 3.0051, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00027969965672466413, |
|
"loss": 3.0039, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00027942793482297316, |
|
"loss": 3.0033, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00027915621292128214, |
|
"loss": 3.0044, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002788844910195911, |
|
"loss": 2.9888, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.4361119428490199, |
|
"eval_loss": 3.0189716815948486, |
|
"eval_runtime": 43.5746, |
|
"eval_samples_per_second": 148.779, |
|
"eval_steps_per_second": 2.501, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00027861276911790014, |
|
"loss": 3.0097, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00027834104721620906, |
|
"loss": 3.008, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002780693253145181, |
|
"loss": 2.9979, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00027779760341282707, |
|
"loss": 2.994, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00027752588151113604, |
|
"loss": 2.9985, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000277254159609445, |
|
"loss": 2.9966, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000276982437707754, |
|
"loss": 2.9968, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000276710715806063, |
|
"loss": 2.9999, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000276438993904372, |
|
"loss": 2.9973, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00027616727200268097, |
|
"loss": 3.0024, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.4384970647213073, |
|
"eval_loss": 3.0039989948272705, |
|
"eval_runtime": 44.0911, |
|
"eval_samples_per_second": 147.036, |
|
"eval_steps_per_second": 2.472, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00027589555010098995, |
|
"loss": 3.0015, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002756238281992989, |
|
"loss": 2.9946, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002753521062976079, |
|
"loss": 2.9932, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002750803843959169, |
|
"loss": 2.9985, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002748086624942259, |
|
"loss": 2.9913, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002745369405925349, |
|
"loss": 2.9946, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00027426521869084385, |
|
"loss": 2.99, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00027399349678915283, |
|
"loss": 2.9927, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002737217748874618, |
|
"loss": 2.9883, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00027345005298577083, |
|
"loss": 2.9948, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.43782985969337607, |
|
"eval_loss": 3.0057804584503174, |
|
"eval_runtime": 43.6419, |
|
"eval_samples_per_second": 148.55, |
|
"eval_steps_per_second": 2.498, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002731810483030967, |
|
"loss": 2.983, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002729093264014057, |
|
"loss": 2.9806, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00027263760449971466, |
|
"loss": 2.9881, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00027236588259802364, |
|
"loss": 2.9814, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00027209416069633267, |
|
"loss": 2.9824, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00027182243879464164, |
|
"loss": 2.9885, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002715507168929506, |
|
"loss": 2.989, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002712789949912596, |
|
"loss": 2.986, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00027100727308956857, |
|
"loss": 2.9856, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00027073555118787754, |
|
"loss": 2.9774, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.438878756808909, |
|
"eval_loss": 2.9962034225463867, |
|
"eval_runtime": 43.3441, |
|
"eval_samples_per_second": 149.57, |
|
"eval_steps_per_second": 2.515, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002704638292861865, |
|
"loss": 2.9941, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00027019210738449555, |
|
"loss": 2.9799, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002699203854828045, |
|
"loss": 2.9834, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002696513808001304, |
|
"loss": 2.9767, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002693796588984394, |
|
"loss": 2.9772, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002691079369967484, |
|
"loss": 2.9891, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002688362150950573, |
|
"loss": 2.9787, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00026856449319336635, |
|
"loss": 2.987, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00026829277129167533, |
|
"loss": 2.979, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002680210493899843, |
|
"loss": 2.9818, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.4390166740766953, |
|
"eval_loss": 2.9964208602905273, |
|
"eval_runtime": 44.0294, |
|
"eval_samples_per_second": 147.242, |
|
"eval_steps_per_second": 2.476, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002677493274882933, |
|
"loss": 2.9798, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00026747760558660225, |
|
"loss": 2.9833, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002672058836849113, |
|
"loss": 2.9787, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00026693416178322026, |
|
"loss": 2.9807, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00026666515710054614, |
|
"loss": 2.9846, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002663934351988551, |
|
"loss": 2.9758, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00026612171329716414, |
|
"loss": 2.9749, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00026584999139547306, |
|
"loss": 2.9688, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002655782694937821, |
|
"loss": 2.9886, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00026530654759209107, |
|
"loss": 2.9771, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.4395580598427864, |
|
"eval_loss": 2.991270065307617, |
|
"eval_runtime": 43.0298, |
|
"eval_samples_per_second": 150.663, |
|
"eval_steps_per_second": 2.533, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00026503482569040004, |
|
"loss": 2.9802, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000264763103788709, |
|
"loss": 2.9711, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000264491381887018, |
|
"loss": 2.9845, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000264219659985327, |
|
"loss": 2.9735, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00026394793808363594, |
|
"loss": 2.9731, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00026367621618194497, |
|
"loss": 2.9717, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00026340449428025395, |
|
"loss": 2.9718, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002631327723785629, |
|
"loss": 2.9766, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002628637676958888, |
|
"loss": 2.9812, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00026259204579419783, |
|
"loss": 2.9786, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.43911829732664315, |
|
"eval_loss": 2.9915201663970947, |
|
"eval_runtime": 43.7467, |
|
"eval_samples_per_second": 148.194, |
|
"eval_steps_per_second": 2.492, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002623203238925068, |
|
"loss": 2.9757, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002620486019908158, |
|
"loss": 2.9781, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00026177959730814166, |
|
"loss": 2.9733, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002615078754064507, |
|
"loss": 2.9773, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002612361535047596, |
|
"loss": 2.9755, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00026096443160306864, |
|
"loss": 2.9837, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002606927097013776, |
|
"loss": 2.9786, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002604209877996866, |
|
"loss": 2.9709, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00026014926589799556, |
|
"loss": 2.9797, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025987754399630454, |
|
"loss": 2.9866, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.4393814289559723, |
|
"eval_loss": 2.9924139976501465, |
|
"eval_runtime": 43.2705, |
|
"eval_samples_per_second": 149.825, |
|
"eval_steps_per_second": 2.519, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025960582209461357, |
|
"loss": 2.976, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002593341001929225, |
|
"loss": 2.9674, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002590623782912315, |
|
"loss": 2.98, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002587906563895405, |
|
"loss": 2.9805, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025851893448784947, |
|
"loss": 2.9738, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025824721258615844, |
|
"loss": 2.9702, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002579754906844674, |
|
"loss": 2.9678, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00025770376878277645, |
|
"loss": 2.9699, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002574320468810854, |
|
"loss": 2.9717, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002571603249793944, |
|
"loss": 2.9751, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.43892109982972055, |
|
"eval_loss": 2.9917728900909424, |
|
"eval_runtime": 44.2385, |
|
"eval_samples_per_second": 146.547, |
|
"eval_steps_per_second": 2.464, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002568886030777034, |
|
"loss": 2.9653, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025661688117601235, |
|
"loss": 2.9817, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002563451592743214, |
|
"loss": 2.9652, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025607343737263035, |
|
"loss": 2.9704, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025580171547093933, |
|
"loss": 2.9727, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002555299935692483, |
|
"loss": 2.9743, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002552582716675573, |
|
"loss": 2.9719, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025498654976586626, |
|
"loss": 2.9615, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025471482786417523, |
|
"loss": 2.973, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025444310596248426, |
|
"loss": 2.9702, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.4393215438265388, |
|
"eval_loss": 2.992605447769165, |
|
"eval_runtime": 45.7096, |
|
"eval_samples_per_second": 141.83, |
|
"eval_steps_per_second": 2.385, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00025417410127981014, |
|
"loss": 2.9689, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002539023793781191, |
|
"loss": 2.9727, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002536306574764281, |
|
"loss": 2.9669, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00025335893557473706, |
|
"loss": 2.9717, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002530872136730461, |
|
"loss": 2.9646, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00025281549177135507, |
|
"loss": 2.9757, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00025254376986966404, |
|
"loss": 2.9679, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000252272047967973, |
|
"loss": 2.9691, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000252000326066282, |
|
"loss": 2.9718, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00025172860416459097, |
|
"loss": 2.9695, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.44013513472641874, |
|
"eval_loss": 2.981644868850708, |
|
"eval_runtime": 43.6409, |
|
"eval_samples_per_second": 148.553, |
|
"eval_steps_per_second": 2.498, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002514568822629, |
|
"loss": 2.9666, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002511878775802259, |
|
"loss": 2.9696, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00025091615567853485, |
|
"loss": 2.9687, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002506444337768438, |
|
"loss": 2.9674, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002503727118751528, |
|
"loss": 2.9655, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00025010098997346183, |
|
"loss": 2.9661, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002498292680717708, |
|
"loss": 2.9673, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002495575461700798, |
|
"loss": 2.9641, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00024928582426838876, |
|
"loss": 2.9598, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00024901410236669773, |
|
"loss": 2.9615, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.44022042566833924, |
|
"eval_loss": 2.982591390609741, |
|
"eval_runtime": 43.6998, |
|
"eval_samples_per_second": 148.353, |
|
"eval_steps_per_second": 2.494, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00024874238046500676, |
|
"loss": 2.958, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002484706585633157, |
|
"loss": 2.9688, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002481989366616247, |
|
"loss": 2.9603, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002479272147599337, |
|
"loss": 2.9625, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00024765549285824266, |
|
"loss": 2.9611, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00024738377095655164, |
|
"loss": 2.9594, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002471120490548606, |
|
"loss": 2.9648, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00024684032715316964, |
|
"loss": 2.961, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002465686052514786, |
|
"loss": 2.9589, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002462968833497876, |
|
"loss": 2.9609, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.4406414362752659, |
|
"eval_loss": 2.9791083335876465, |
|
"eval_runtime": 44.2323, |
|
"eval_samples_per_second": 146.567, |
|
"eval_steps_per_second": 2.464, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00024602516144809657, |
|
"loss": 2.962, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00024575343954640554, |
|
"loss": 2.9566, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00024548171764471457, |
|
"loss": 2.964, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002452099957430235, |
|
"loss": 2.9573, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002449409910603494, |
|
"loss": 2.9621, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002446692691586584, |
|
"loss": 2.9568, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002443975472569674, |
|
"loss": 2.9643, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024412582535527635, |
|
"loss": 2.9614, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024385410345358535, |
|
"loss": 2.9546, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024358238155189433, |
|
"loss": 2.9607, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.44158508073906716, |
|
"eval_loss": 2.9684245586395264, |
|
"eval_runtime": 43.092, |
|
"eval_samples_per_second": 150.446, |
|
"eval_steps_per_second": 2.529, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024331065965020333, |
|
"loss": 2.9608, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024303893774851228, |
|
"loss": 2.9556, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024276721584682128, |
|
"loss": 2.9579, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024249549394513028, |
|
"loss": 2.9585, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00024222377204343926, |
|
"loss": 2.9544, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00024195205014174823, |
|
"loss": 2.9614, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002416803282400572, |
|
"loss": 2.9536, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00024141132355738314, |
|
"loss": 2.9556, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002411396016556921, |
|
"loss": 2.9559, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002408678797540011, |
|
"loss": 2.9533, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.4422038937432138, |
|
"eval_loss": 2.967719554901123, |
|
"eval_runtime": 42.9224, |
|
"eval_samples_per_second": 151.04, |
|
"eval_steps_per_second": 2.539, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00024059615785231007, |
|
"loss": 2.9493, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00024032443595061907, |
|
"loss": 2.9543, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00024005271404892804, |
|
"loss": 2.9565, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00023978099214723702, |
|
"loss": 2.9501, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00023950927024554602, |
|
"loss": 2.9395, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00023923754834385497, |
|
"loss": 2.9598, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023896854366118087, |
|
"loss": 2.9492, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023869682175948988, |
|
"loss": 2.947, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023842509985779885, |
|
"loss": 2.9573, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023815337795610783, |
|
"loss": 2.9513, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.4420853332849413, |
|
"eval_loss": 2.9676427841186523, |
|
"eval_runtime": 43.0836, |
|
"eval_samples_per_second": 150.475, |
|
"eval_steps_per_second": 2.53, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023788165605441683, |
|
"loss": 2.9472, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023760993415272578, |
|
"loss": 2.9513, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023733821225103478, |
|
"loss": 2.9542, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023706649034934378, |
|
"loss": 2.9497, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023679476844765276, |
|
"loss": 2.9565, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023652576376497863, |
|
"loss": 2.9518, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00023625404186328764, |
|
"loss": 2.9471, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023598231996159664, |
|
"loss": 2.956, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023571059805990559, |
|
"loss": 2.953, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002354388761582146, |
|
"loss": 2.9563, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.4428523468619285, |
|
"eval_loss": 2.9609880447387695, |
|
"eval_runtime": 42.9764, |
|
"eval_samples_per_second": 150.85, |
|
"eval_steps_per_second": 2.536, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023516715425652356, |
|
"loss": 2.9458, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023489543235483257, |
|
"loss": 2.9539, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023462371045314151, |
|
"loss": 2.9549, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023435198855145052, |
|
"loss": 2.9496, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023408026664975952, |
|
"loss": 2.9514, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002338085447480685, |
|
"loss": 2.9471, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023353682284637747, |
|
"loss": 2.9448, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00023326510094468644, |
|
"loss": 2.948, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023299337904299545, |
|
"loss": 2.9454, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023272165714130445, |
|
"loss": 2.9466, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.44241621374756906, |
|
"eval_loss": 2.9626522064208984, |
|
"eval_runtime": 43.5013, |
|
"eval_samples_per_second": 149.03, |
|
"eval_steps_per_second": 2.506, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002324499352396134, |
|
"loss": 2.9417, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002321782133379224, |
|
"loss": 2.9452, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023190649143623138, |
|
"loss": 2.9406, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023163476953454038, |
|
"loss": 2.945, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023136304763284933, |
|
"loss": 2.9419, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023109132573115833, |
|
"loss": 2.9452, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023081960382946733, |
|
"loss": 2.9435, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023054788192777628, |
|
"loss": 2.947, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00023027616002608528, |
|
"loss": 2.9343, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00023000443812439426, |
|
"loss": 2.9431, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.442384758932109, |
|
"eval_loss": 2.9589717388153076, |
|
"eval_runtime": 43.1206, |
|
"eval_samples_per_second": 150.346, |
|
"eval_steps_per_second": 2.528, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00022973271622270326, |
|
"loss": 2.9431, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00022946099432101226, |
|
"loss": 2.9477, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002291892724193212, |
|
"loss": 2.939, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002289175505176302, |
|
"loss": 2.9385, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002286458286159392, |
|
"loss": 2.944, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00022837410671424816, |
|
"loss": 2.9404, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00022810238481255716, |
|
"loss": 2.9334, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00022783066291086614, |
|
"loss": 2.9419, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00022755894100917514, |
|
"loss": 2.9432, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00022728993632650102, |
|
"loss": 2.9412, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.4435655243124552, |
|
"eval_loss": 2.952514410018921, |
|
"eval_runtime": 43.0804, |
|
"eval_samples_per_second": 150.486, |
|
"eval_steps_per_second": 2.53, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00022701821442481, |
|
"loss": 2.9359, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.000226746492523119, |
|
"loss": 2.9426, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00022647477062142797, |
|
"loss": 2.9307, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00022620304871973695, |
|
"loss": 2.9353, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00022593132681804595, |
|
"loss": 2.9353, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002256596049163549, |
|
"loss": 2.9403, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002253878830146639, |
|
"loss": 2.9393, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002251161611129729, |
|
"loss": 2.9313, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00022484443921128188, |
|
"loss": 2.9348, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00022457543452860775, |
|
"loss": 2.9299, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.4434947509776701, |
|
"eval_loss": 2.9504144191741943, |
|
"eval_runtime": 43.7459, |
|
"eval_samples_per_second": 148.197, |
|
"eval_steps_per_second": 2.492, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00022430371262691676, |
|
"loss": 2.938, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00022403199072522576, |
|
"loss": 2.9353, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022376298604255164, |
|
"loss": 2.9329, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002234912641408606, |
|
"loss": 2.9311, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002232195422391696, |
|
"loss": 2.9377, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022294782033747856, |
|
"loss": 2.9303, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022267609843578756, |
|
"loss": 2.9278, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022240437653409657, |
|
"loss": 2.9394, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022213265463240551, |
|
"loss": 2.9332, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00022186093273071452, |
|
"loss": 2.9332, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.4434814431711293, |
|
"eval_loss": 2.9485716819763184, |
|
"eval_runtime": 41.8653, |
|
"eval_samples_per_second": 154.854, |
|
"eval_steps_per_second": 2.604, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002215892108290235, |
|
"loss": 2.9339, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002213174889273325, |
|
"loss": 2.9322, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002210457670256415, |
|
"loss": 2.9305, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00022077404512395044, |
|
"loss": 2.9321, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00022050232322225945, |
|
"loss": 2.9265, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00022023331853958532, |
|
"loss": 2.9247, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00021996159663789433, |
|
"loss": 2.9312, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002196898747362033, |
|
"loss": 2.9288, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002194181528345123, |
|
"loss": 2.9328, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00021914643093282125, |
|
"loss": 2.9255, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.444235753841873, |
|
"eval_loss": 2.942479372024536, |
|
"eval_runtime": 41.7184, |
|
"eval_samples_per_second": 155.399, |
|
"eval_steps_per_second": 2.613, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00021887470903113025, |
|
"loss": 2.9265, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00021860298712943923, |
|
"loss": 2.9184, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00021833126522774823, |
|
"loss": 2.9271, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002180622605450741, |
|
"loss": 2.9232, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002177905386433831, |
|
"loss": 2.9303, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00021751881674169206, |
|
"loss": 2.9348, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00021724709484000106, |
|
"loss": 2.9218, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00021697537293831006, |
|
"loss": 2.9324, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00021670365103661904, |
|
"loss": 2.9294, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00021643192913492801, |
|
"loss": 2.9242, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.44344756875448005, |
|
"eval_loss": 2.945934534072876, |
|
"eval_runtime": 43.5276, |
|
"eval_samples_per_second": 148.94, |
|
"eval_steps_per_second": 2.504, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000216160207233237, |
|
"loss": 2.9231, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000215888485331546, |
|
"loss": 2.9269, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000215616763429855, |
|
"loss": 2.9247, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00021534504152816394, |
|
"loss": 2.9236, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00021507331962647295, |
|
"loss": 2.9296, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021480159772478192, |
|
"loss": 2.9267, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021452987582309092, |
|
"loss": 2.9259, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021425815392139987, |
|
"loss": 2.9259, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021398643201970887, |
|
"loss": 2.9236, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021371471011801788, |
|
"loss": 2.9242, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.4445194520813107, |
|
"eval_loss": 2.9377670288085938, |
|
"eval_runtime": 43.8729, |
|
"eval_samples_per_second": 147.768, |
|
"eval_steps_per_second": 2.484, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021344298821632685, |
|
"loss": 2.9178, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021317126631463583, |
|
"loss": 2.9257, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002128995444129448, |
|
"loss": 2.9227, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002126278225112538, |
|
"loss": 2.9228, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002123561006095628, |
|
"loss": 2.9183, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00021208437870787175, |
|
"loss": 2.9196, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00021181265680618076, |
|
"loss": 2.9143, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00021154093490448973, |
|
"loss": 2.9192, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002112692130027987, |
|
"loss": 2.9187, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00021099749110110768, |
|
"loss": 2.9267, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.4452544059425405, |
|
"eval_loss": 2.9316306114196777, |
|
"eval_runtime": 45.514, |
|
"eval_samples_per_second": 142.44, |
|
"eval_steps_per_second": 2.395, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00021072576919941669, |
|
"loss": 2.9169, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002104540472977257, |
|
"loss": 2.9219, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00021018232539603464, |
|
"loss": 2.9096, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00020991060349434364, |
|
"loss": 2.9202, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002096388815926526, |
|
"loss": 2.9241, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00020936987690997852, |
|
"loss": 2.9148, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002090981550082875, |
|
"loss": 2.9194, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002088264331065965, |
|
"loss": 2.9267, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020855471120490544, |
|
"loss": 2.9164, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020828298930321445, |
|
"loss": 2.9151, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.44544252993500344, |
|
"eval_loss": 2.931532382965088, |
|
"eval_runtime": 43.496, |
|
"eval_samples_per_second": 149.048, |
|
"eval_steps_per_second": 2.506, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020801126740152345, |
|
"loss": 2.9178, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020773954549983242, |
|
"loss": 2.9119, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020746782359814143, |
|
"loss": 2.9143, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020719610169645037, |
|
"loss": 2.9084, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020692437979475938, |
|
"loss": 2.9227, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020665265789306835, |
|
"loss": 2.9159, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020638365321039425, |
|
"loss": 2.9151, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00020611193130870323, |
|
"loss": 2.9218, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020584020940701223, |
|
"loss": 2.9169, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020556848750532118, |
|
"loss": 2.9105, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.4455647197950598, |
|
"eval_loss": 2.928622245788574, |
|
"eval_runtime": 45.1155, |
|
"eval_samples_per_second": 143.698, |
|
"eval_steps_per_second": 2.416, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020529676560363018, |
|
"loss": 2.9135, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020502504370193919, |
|
"loss": 2.9099, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020475332180024816, |
|
"loss": 2.9114, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020448159989855714, |
|
"loss": 2.9169, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002042098779968661, |
|
"loss": 2.9098, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020393815609517511, |
|
"loss": 2.9126, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020366643419348412, |
|
"loss": 2.9095, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020339471229179306, |
|
"loss": 2.9086, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00020312299039010207, |
|
"loss": 2.9077, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00020285126848841104, |
|
"loss": 2.9053, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.4457353016789008, |
|
"eval_loss": 2.924194097518921, |
|
"eval_runtime": 41.9708, |
|
"eval_samples_per_second": 154.464, |
|
"eval_steps_per_second": 2.597, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00020257954658672004, |
|
"loss": 2.9099, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000202307824685029, |
|
"loss": 2.9118, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000202036102783338, |
|
"loss": 2.91, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.000201764380881647, |
|
"loss": 2.8983, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00020149265897995595, |
|
"loss": 2.8964, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00020122093707826495, |
|
"loss": 2.9024, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00020095193239559085, |
|
"loss": 2.9057, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00020068021049389983, |
|
"loss": 2.9094, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002004084885922088, |
|
"loss": 2.9071, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002001367666905178, |
|
"loss": 2.9023, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.44664325702516083, |
|
"eval_loss": 2.9194602966308594, |
|
"eval_runtime": 42.9573, |
|
"eval_samples_per_second": 150.917, |
|
"eval_steps_per_second": 2.537, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019986504478882678, |
|
"loss": 2.9047, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019959332288713575, |
|
"loss": 2.9097, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019932160098544476, |
|
"loss": 2.908, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019905259630277066, |
|
"loss": 2.9019, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001987808744010796, |
|
"loss": 2.9105, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001985091524993886, |
|
"loss": 2.9064, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001982401478167145, |
|
"loss": 2.9053, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001979684259150235, |
|
"loss": 2.906, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019769670401333247, |
|
"loss": 2.8997, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019742498211164147, |
|
"loss": 2.8946, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.4468059752051368, |
|
"eval_loss": 2.917731285095215, |
|
"eval_runtime": 43.2928, |
|
"eval_samples_per_second": 149.748, |
|
"eval_steps_per_second": 2.518, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019715326020995042, |
|
"loss": 2.9018, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019688153830825942, |
|
"loss": 2.8969, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019660981640656842, |
|
"loss": 2.9104, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001963380945048774, |
|
"loss": 2.9057, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019606637260318637, |
|
"loss": 2.9094, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019579465070149535, |
|
"loss": 2.9008, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019552292879980435, |
|
"loss": 2.8998, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019525120689811335, |
|
"loss": 2.9019, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001949794849964223, |
|
"loss": 2.8925, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001947077630947313, |
|
"loss": 2.9037, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.44703039321543825, |
|
"eval_loss": 2.9147427082061768, |
|
"eval_runtime": 43.7223, |
|
"eval_samples_per_second": 148.277, |
|
"eval_steps_per_second": 2.493, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019443604119304028, |
|
"loss": 2.9052, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019416431929134928, |
|
"loss": 2.9038, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019389259738965823, |
|
"loss": 2.9046, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019362087548796723, |
|
"loss": 2.903, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019334915358627623, |
|
"loss": 2.8919, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019307743168458518, |
|
"loss": 2.8936, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019280570978289418, |
|
"loss": 2.8985, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019253398788120316, |
|
"loss": 2.8955, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019226226597951216, |
|
"loss": 2.8943, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019199326129683804, |
|
"loss": 2.8893, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.44681383890900184, |
|
"eval_loss": 2.9129724502563477, |
|
"eval_runtime": 42.9613, |
|
"eval_samples_per_second": 150.903, |
|
"eval_steps_per_second": 2.537, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019172153939514704, |
|
"loss": 2.8923, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019144981749345602, |
|
"loss": 2.8998, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000191178095591765, |
|
"loss": 2.8931, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019090637369007397, |
|
"loss": 2.8965, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019063465178838297, |
|
"loss": 2.8992, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019036292988669197, |
|
"loss": 2.8974, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019009120798500092, |
|
"loss": 2.8929, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00018981948608330992, |
|
"loss": 2.8919, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001895477641816189, |
|
"loss": 2.8907, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001892760422799279, |
|
"loss": 2.8891, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.4481204235511882, |
|
"eval_loss": 2.9055044651031494, |
|
"eval_runtime": 43.4382, |
|
"eval_samples_per_second": 149.246, |
|
"eval_steps_per_second": 2.509, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00018900432037823687, |
|
"loss": 2.8892, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00018873259847654585, |
|
"loss": 2.8979, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00018846087657485485, |
|
"loss": 2.8864, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001881891546731638, |
|
"loss": 2.8905, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001879174327714728, |
|
"loss": 2.8849, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001876457108697818, |
|
"loss": 2.8959, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018737398896809078, |
|
"loss": 2.8923, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018710226706639978, |
|
"loss": 2.8878, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018683326238372566, |
|
"loss": 2.8848, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018656154048203463, |
|
"loss": 2.8851, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.4484996960376006, |
|
"eval_loss": 2.90169358253479, |
|
"eval_runtime": 44.5924, |
|
"eval_samples_per_second": 145.384, |
|
"eval_steps_per_second": 2.444, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001862898185803436, |
|
"loss": 2.8892, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001860180966786526, |
|
"loss": 2.8835, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001857463747769616, |
|
"loss": 2.8868, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001854746528752706, |
|
"loss": 2.89, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018520293097357954, |
|
"loss": 2.8903, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018493120907188854, |
|
"loss": 2.8868, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018466220438921442, |
|
"loss": 2.8882, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018439048248752342, |
|
"loss": 2.8788, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001841187605858324, |
|
"loss": 2.8884, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001838470386841414, |
|
"loss": 2.8909, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.44834423666119233, |
|
"eval_loss": 2.9010777473449707, |
|
"eval_runtime": 43.3319, |
|
"eval_samples_per_second": 149.613, |
|
"eval_steps_per_second": 2.515, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001835753167824504, |
|
"loss": 2.8868, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018330359488075935, |
|
"loss": 2.8935, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018303187297906835, |
|
"loss": 2.883, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00018276015107737733, |
|
"loss": 2.8895, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001824911463947032, |
|
"loss": 2.8958, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001822194244930122, |
|
"loss": 2.8916, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001819477025913212, |
|
"loss": 2.8949, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00018167869790864708, |
|
"loss": 2.8898, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00018140697600695606, |
|
"loss": 2.8887, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00018113525410526506, |
|
"loss": 2.896, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.4478663654263186, |
|
"eval_loss": 2.9061102867126465, |
|
"eval_runtime": 43.1173, |
|
"eval_samples_per_second": 150.357, |
|
"eval_steps_per_second": 2.528, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00018086353220357404, |
|
"loss": 2.8965, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.000180591810301883, |
|
"loss": 2.8969, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00018032008840019201, |
|
"loss": 2.8913, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00018004836649850096, |
|
"loss": 2.8897, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00017977664459680996, |
|
"loss": 2.8952, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00017950492269511897, |
|
"loss": 2.9008, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00017923320079342794, |
|
"loss": 2.8884, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017896147889173694, |
|
"loss": 2.8971, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001786897569900459, |
|
"loss": 2.8824, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001784180350883549, |
|
"loss": 2.8918, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.44788874673731904, |
|
"eval_loss": 2.90425443649292, |
|
"eval_runtime": 45.928, |
|
"eval_samples_per_second": 141.156, |
|
"eval_steps_per_second": 2.373, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017814631318666387, |
|
"loss": 2.886, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017787459128497285, |
|
"loss": 2.8935, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017760286938328185, |
|
"loss": 2.8851, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017733114748159082, |
|
"loss": 2.8869, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017705942557989983, |
|
"loss": 2.8816, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017678770367820877, |
|
"loss": 2.8726, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017651598177651778, |
|
"loss": 2.8815, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017624425987482678, |
|
"loss": 2.8835, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00017597253797313575, |
|
"loss": 2.8814, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017570081607144473, |
|
"loss": 2.8847, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.4490059975864478, |
|
"eval_loss": 2.89544415473938, |
|
"eval_runtime": 42.9804, |
|
"eval_samples_per_second": 150.836, |
|
"eval_steps_per_second": 2.536, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001754290941697537, |
|
"loss": 2.8699, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001751573722680627, |
|
"loss": 2.8829, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017488565036637165, |
|
"loss": 2.8773, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017461392846468066, |
|
"loss": 2.8812, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017434220656298966, |
|
"loss": 2.8805, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017407048466129863, |
|
"loss": 2.8812, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017379876275960764, |
|
"loss": 2.8826, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017352704085791659, |
|
"loss": 2.8801, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017325803617524252, |
|
"loss": 2.8787, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00017298631427355146, |
|
"loss": 2.8749, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.44940160238088755, |
|
"eval_loss": 2.8912456035614014, |
|
"eval_runtime": 43.8328, |
|
"eval_samples_per_second": 147.903, |
|
"eval_steps_per_second": 2.487, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017271730959087737, |
|
"loss": 2.8715, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017244558768918637, |
|
"loss": 2.8804, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017217386578749535, |
|
"loss": 2.8802, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017190214388580432, |
|
"loss": 2.8779, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017163042198411332, |
|
"loss": 2.878, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017135870008242227, |
|
"loss": 2.8835, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017108697818073127, |
|
"loss": 2.8758, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017081525627904025, |
|
"loss": 2.8751, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017054353437734925, |
|
"loss": 2.8737, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00017027181247565825, |
|
"loss": 2.8832, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.4496018243792967, |
|
"eval_loss": 2.891221761703491, |
|
"eval_runtime": 43.1479, |
|
"eval_samples_per_second": 150.251, |
|
"eval_steps_per_second": 2.526, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001700000905739672, |
|
"loss": 2.8757, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001697283686722762, |
|
"loss": 2.8725, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016945664677058518, |
|
"loss": 2.8749, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016918492486889416, |
|
"loss": 2.8747, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016891320296720316, |
|
"loss": 2.8724, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016864148106551213, |
|
"loss": 2.8717, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016836975916382114, |
|
"loss": 2.8653, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016809803726213008, |
|
"loss": 2.869, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016782631536043909, |
|
"loss": 2.8763, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001675545934587481, |
|
"loss": 2.8745, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.45002646438800725, |
|
"eval_loss": 2.8852970600128174, |
|
"eval_runtime": 43.6365, |
|
"eval_samples_per_second": 148.568, |
|
"eval_steps_per_second": 2.498, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016728287155705706, |
|
"loss": 2.8753, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00016701114965536604, |
|
"loss": 2.8684, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016673942775367501, |
|
"loss": 2.8711, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016646770585198402, |
|
"loss": 2.8646, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016619598395029296, |
|
"loss": 2.865, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001659269792676189, |
|
"loss": 2.8773, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016565525736592787, |
|
"loss": 2.8703, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016538353546423687, |
|
"loss": 2.8722, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016511181356254582, |
|
"loss": 2.8713, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016484009166085482, |
|
"loss": 2.8717, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.45021942758284866, |
|
"eval_loss": 2.8834283351898193, |
|
"eval_runtime": 43.5477, |
|
"eval_samples_per_second": 148.871, |
|
"eval_steps_per_second": 2.503, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016456836975916383, |
|
"loss": 2.8727, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016429664785747277, |
|
"loss": 2.8622, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00016402492595578178, |
|
"loss": 2.8707, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016375320405409075, |
|
"loss": 2.8645, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016348148215239975, |
|
"loss": 2.8642, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016321247746972563, |
|
"loss": 2.8679, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016294075556803463, |
|
"loss": 2.871, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001626690336663436, |
|
"loss": 2.867, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016239731176465258, |
|
"loss": 2.8643, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016212558986296156, |
|
"loss": 2.8659, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.45029624992060685, |
|
"eval_loss": 2.883072853088379, |
|
"eval_runtime": 43.5545, |
|
"eval_samples_per_second": 148.848, |
|
"eval_steps_per_second": 2.503, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016185386796127056, |
|
"loss": 2.8694, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016158214605957956, |
|
"loss": 2.8671, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001613104241578885, |
|
"loss": 2.8624, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00016103870225619751, |
|
"loss": 2.8665, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001607669803545065, |
|
"loss": 2.8613, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00016049525845281547, |
|
"loss": 2.8637, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00016022353655112447, |
|
"loss": 2.8662, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015995181464943344, |
|
"loss": 2.8652, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015968009274774245, |
|
"loss": 2.8673, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001594083708460514, |
|
"loss": 2.865, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.450486793514259, |
|
"eval_loss": 2.878352403640747, |
|
"eval_runtime": 43.3417, |
|
"eval_samples_per_second": 149.579, |
|
"eval_steps_per_second": 2.515, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001591366489443604, |
|
"loss": 2.8688, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015886492704266937, |
|
"loss": 2.862, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015859320514097837, |
|
"loss": 2.8646, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015832148323928735, |
|
"loss": 2.8672, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00015804976133759632, |
|
"loss": 2.8594, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015777803943590533, |
|
"loss": 2.8558, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001575063175342143, |
|
"loss": 2.8576, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001572373128515402, |
|
"loss": 2.8597, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015696559094984918, |
|
"loss": 2.8615, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015669386904815818, |
|
"loss": 2.8575, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.45082372297985984, |
|
"eval_loss": 2.8763039112091064, |
|
"eval_runtime": 43.6525, |
|
"eval_samples_per_second": 148.514, |
|
"eval_steps_per_second": 2.497, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015642214714646713, |
|
"loss": 2.8673, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015615042524477613, |
|
"loss": 2.854, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015587870334308514, |
|
"loss": 2.8652, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.000155609698660411, |
|
"loss": 2.8596, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00015533797675872, |
|
"loss": 2.8641, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.000155066254857029, |
|
"loss": 2.8595, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.000154794532955338, |
|
"loss": 2.8562, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015452281105364694, |
|
"loss": 2.8529, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015425108915195594, |
|
"loss": 2.8629, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015397936725026492, |
|
"loss": 2.8571, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.4512689295986789, |
|
"eval_loss": 2.874122142791748, |
|
"eval_runtime": 43.0942, |
|
"eval_samples_per_second": 150.438, |
|
"eval_steps_per_second": 2.529, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001537076453485739, |
|
"loss": 2.8605, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015343592344688287, |
|
"loss": 2.8668, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015316420154519187, |
|
"loss": 2.8604, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015289247964350087, |
|
"loss": 2.857, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015262075774180982, |
|
"loss": 2.8599, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00015234903584011882, |
|
"loss": 2.8653, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001520773139384278, |
|
"loss": 2.857, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001518055920367368, |
|
"loss": 2.8543, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015153658735406268, |
|
"loss": 2.8495, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015126486545237168, |
|
"loss": 2.8554, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.4514479800866822, |
|
"eval_loss": 2.870398998260498, |
|
"eval_runtime": 43.838, |
|
"eval_samples_per_second": 147.885, |
|
"eval_steps_per_second": 2.486, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015099314355068063, |
|
"loss": 2.8595, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015072142164898963, |
|
"loss": 2.855, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001504496997472986, |
|
"loss": 2.8663, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001501779778456076, |
|
"loss": 2.8555, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00014990625594391658, |
|
"loss": 2.8596, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00014963453404222556, |
|
"loss": 2.8589, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00014936281214053456, |
|
"loss": 2.8568, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00014909109023884354, |
|
"loss": 2.8474, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001488193683371525, |
|
"loss": 2.8515, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014854764643546151, |
|
"loss": 2.8526, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.45189379160579857, |
|
"eval_loss": 2.86692214012146, |
|
"eval_runtime": 43.3506, |
|
"eval_samples_per_second": 149.548, |
|
"eval_steps_per_second": 2.514, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001482759245337705, |
|
"loss": 2.8504, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001480042026320795, |
|
"loss": 2.854, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014773248073038847, |
|
"loss": 2.8512, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014746075882869744, |
|
"loss": 2.8515, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014718903692700642, |
|
"loss": 2.8492, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014691731502531542, |
|
"loss": 2.8491, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001466455931236244, |
|
"loss": 2.8466, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001463738712219334, |
|
"loss": 2.8508, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014610214932024237, |
|
"loss": 2.8567, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014583042741855135, |
|
"loss": 2.8521, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.45249203799983667, |
|
"eval_loss": 2.861818552017212, |
|
"eval_runtime": 43.168, |
|
"eval_samples_per_second": 150.181, |
|
"eval_steps_per_second": 2.525, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014555870551686032, |
|
"loss": 2.8463, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001452869836151693, |
|
"loss": 2.8433, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001450152617134783, |
|
"loss": 2.8446, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014474353981178728, |
|
"loss": 2.8477, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014447181791009628, |
|
"loss": 2.8439, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014420009600840525, |
|
"loss": 2.8459, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014392837410671423, |
|
"loss": 2.8445, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001436566522050232, |
|
"loss": 2.8455, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001433876475223491, |
|
"loss": 2.8474, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.000143118642839675, |
|
"loss": 2.8398, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.45218656334969587, |
|
"eval_loss": 2.8599517345428467, |
|
"eval_runtime": 43.8444, |
|
"eval_samples_per_second": 147.864, |
|
"eval_steps_per_second": 2.486, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.000142846920937984, |
|
"loss": 2.8492, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014257519903629296, |
|
"loss": 2.8434, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014230347713460197, |
|
"loss": 2.8483, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014203175523291094, |
|
"loss": 2.8441, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014176003333121992, |
|
"loss": 2.8474, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014148831142952892, |
|
"loss": 2.8385, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001412165895278379, |
|
"loss": 2.8424, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014094486762614687, |
|
"loss": 2.847, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014067314572445587, |
|
"loss": 2.8511, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014040142382276485, |
|
"loss": 2.8398, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.45275395982857125, |
|
"eval_loss": 2.8576090335845947, |
|
"eval_runtime": 43.2028, |
|
"eval_samples_per_second": 150.06, |
|
"eval_steps_per_second": 2.523, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014012970192107382, |
|
"loss": 2.8386, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013985798001938282, |
|
"loss": 2.8458, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001395862581176918, |
|
"loss": 2.8356, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013931453621600078, |
|
"loss": 2.8379, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013904281431430978, |
|
"loss": 2.8325, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013877109241261875, |
|
"loss": 2.8461, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013849937051092773, |
|
"loss": 2.8521, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013823036582825363, |
|
"loss": 2.8273, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013795864392656263, |
|
"loss": 2.8318, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001376869220248716, |
|
"loss": 2.837, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.4528289674654375, |
|
"eval_loss": 2.8535568714141846, |
|
"eval_runtime": 43.1874, |
|
"eval_samples_per_second": 150.113, |
|
"eval_steps_per_second": 2.524, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013741520012318058, |
|
"loss": 2.8396, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013714347822148956, |
|
"loss": 2.8395, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013687447353881546, |
|
"loss": 2.8325, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013660275163712444, |
|
"loss": 2.8412, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013633102973543344, |
|
"loss": 2.8392, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013605930783374242, |
|
"loss": 2.843, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001357875859320514, |
|
"loss": 2.8337, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013551586403036037, |
|
"loss": 2.8452, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013524414212866937, |
|
"loss": 2.8448, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013497242022697835, |
|
"loss": 2.837, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.4534701617805845, |
|
"eval_loss": 2.851900577545166, |
|
"eval_runtime": 43.1282, |
|
"eval_samples_per_second": 150.319, |
|
"eval_steps_per_second": 2.527, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013470069832528735, |
|
"loss": 2.8331, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013442897642359632, |
|
"loss": 2.832, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001341572545219053, |
|
"loss": 2.8255, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013388553262021427, |
|
"loss": 2.8327, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013361381071852328, |
|
"loss": 2.8386, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013334208881683225, |
|
"loss": 2.8315, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013307036691514125, |
|
"loss": 2.824, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013279864501345023, |
|
"loss": 2.8296, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001325269231117592, |
|
"loss": 2.8378, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001322579184290851, |
|
"loss": 2.8427, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.4535663409278566, |
|
"eval_loss": 2.8492891788482666, |
|
"eval_runtime": 43.4858, |
|
"eval_samples_per_second": 149.083, |
|
"eval_steps_per_second": 2.507, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013198619652739408, |
|
"loss": 2.8329, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013171447462570306, |
|
"loss": 2.8389, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013144275272401206, |
|
"loss": 2.8358, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013117103082232104, |
|
"loss": 2.8369, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00013089930892063, |
|
"loss": 2.8294, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.000130627587018939, |
|
"loss": 2.834, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.000130355865117248, |
|
"loss": 2.8414, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00013008414321555696, |
|
"loss": 2.8384, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012981242131386597, |
|
"loss": 2.8384, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012954069941217494, |
|
"loss": 2.8365, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.45409986299008265, |
|
"eval_loss": 2.8467965126037598, |
|
"eval_runtime": 47.1796, |
|
"eval_samples_per_second": 137.411, |
|
"eval_steps_per_second": 2.31, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012926897751048392, |
|
"loss": 2.8281, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012899725560879292, |
|
"loss": 2.8197, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001287255337071019, |
|
"loss": 2.8233, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012845652902442777, |
|
"loss": 2.828, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012818480712273677, |
|
"loss": 2.8334, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012791308522104578, |
|
"loss": 2.8332, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012764136331935475, |
|
"loss": 2.8279, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012736964141766373, |
|
"loss": 2.8271, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001270979195159727, |
|
"loss": 2.8306, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012682619761428168, |
|
"loss": 2.8327, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.4538736302788893, |
|
"eval_loss": 2.8447225093841553, |
|
"eval_runtime": 44.4204, |
|
"eval_samples_per_second": 145.946, |
|
"eval_steps_per_second": 2.454, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012655447571259068, |
|
"loss": 2.836, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012628275381089965, |
|
"loss": 2.8337, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012601103190920866, |
|
"loss": 2.8333, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012573931000751763, |
|
"loss": 2.8298, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001254675881058266, |
|
"loss": 2.8285, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012519586620413558, |
|
"loss": 2.8252, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012492414430244459, |
|
"loss": 2.8227, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012465242240075356, |
|
"loss": 2.8286, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012438070049906256, |
|
"loss": 2.8218, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012410897859737154, |
|
"loss": 2.8289, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.4545583774154425, |
|
"eval_loss": 2.838773012161255, |
|
"eval_runtime": 43.8892, |
|
"eval_samples_per_second": 147.713, |
|
"eval_steps_per_second": 2.484, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001238372566956805, |
|
"loss": 2.8198, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001235655347939895, |
|
"loss": 2.8207, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012329381289229846, |
|
"loss": 2.8296, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012302209099060747, |
|
"loss": 2.8293, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012275036908891647, |
|
"loss": 2.8188, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012247864718722544, |
|
"loss": 2.819, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00012220692528553442, |
|
"loss": 2.8219, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001219352033838434, |
|
"loss": 2.8199, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001216634814821524, |
|
"loss": 2.8282, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00012139175958046137, |
|
"loss": 2.8166, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.45473863770404044, |
|
"eval_loss": 2.834634780883789, |
|
"eval_runtime": 43.1108, |
|
"eval_samples_per_second": 150.38, |
|
"eval_steps_per_second": 2.528, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00012112003767877036, |
|
"loss": 2.8226, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00012084831577707934, |
|
"loss": 2.8135, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00012057659387538832, |
|
"loss": 2.8134, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001203048719736973, |
|
"loss": 2.8214, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001200358672910232, |
|
"loss": 2.8142, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011976414538933219, |
|
"loss": 2.8196, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011949242348764117, |
|
"loss": 2.8145, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011922070158595016, |
|
"loss": 2.8093, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011894897968425913, |
|
"loss": 2.8168, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011867725778256813, |
|
"loss": 2.8171, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.45580810142968187, |
|
"eval_loss": 2.8293869495391846, |
|
"eval_runtime": 44.4137, |
|
"eval_samples_per_second": 145.968, |
|
"eval_steps_per_second": 2.454, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011840553588087711, |
|
"loss": 2.8123, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001181338139791861, |
|
"loss": 2.8121, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011786209207749507, |
|
"loss": 2.8083, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011759037017580405, |
|
"loss": 2.8156, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011731864827411304, |
|
"loss": 2.8225, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011704692637242204, |
|
"loss": 2.8109, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011677520447073102, |
|
"loss": 2.8137, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001165061997880569, |
|
"loss": 2.8097, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011623447788636588, |
|
"loss": 2.8099, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00011596275598467488, |
|
"loss": 2.8184, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.4556344950443543, |
|
"eval_loss": 2.826944589614868, |
|
"eval_runtime": 43.7297, |
|
"eval_samples_per_second": 148.252, |
|
"eval_steps_per_second": 2.493, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011569103408298386, |
|
"loss": 2.8164, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011541931218129285, |
|
"loss": 2.8137, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011514759027960182, |
|
"loss": 2.8168, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011487858559692771, |
|
"loss": 2.8156, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011460686369523672, |
|
"loss": 2.8114, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011433514179354569, |
|
"loss": 2.8066, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011406341989185468, |
|
"loss": 2.8124, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011379169799016366, |
|
"loss": 2.8093, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011351997608847263, |
|
"loss": 2.8131, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011324825418678162, |
|
"loss": 2.8102, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.45632710588477254, |
|
"eval_loss": 2.8243494033813477, |
|
"eval_runtime": 42.7646, |
|
"eval_samples_per_second": 151.597, |
|
"eval_steps_per_second": 2.549, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00011297653228509062, |
|
"loss": 2.8064, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001127048103833996, |
|
"loss": 2.8075, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011243308848170857, |
|
"loss": 2.8146, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011216136658001756, |
|
"loss": 2.8166, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011188964467832654, |
|
"loss": 2.8073, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011161792277663554, |
|
"loss": 2.8116, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011134620087494451, |
|
"loss": 2.807, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001110744789732535, |
|
"loss": 2.8066, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011080547429057939, |
|
"loss": 2.8101, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011053375238888837, |
|
"loss": 2.8153, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.45636279500231375, |
|
"eval_loss": 2.821134328842163, |
|
"eval_runtime": 42.931, |
|
"eval_samples_per_second": 151.01, |
|
"eval_steps_per_second": 2.539, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00011026203048719737, |
|
"loss": 2.8109, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00010999030858550635, |
|
"loss": 2.8025, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010971858668381533, |
|
"loss": 2.8055, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010944686478212431, |
|
"loss": 2.8047, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010917514288043329, |
|
"loss": 2.8095, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010890342097874227, |
|
"loss": 2.805, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010863169907705128, |
|
"loss": 2.8079, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010835997717536025, |
|
"loss": 2.8071, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010809097249268614, |
|
"loss": 2.8016, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010781925059099512, |
|
"loss": 2.8035, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.4569090199707833, |
|
"eval_loss": 2.8184897899627686, |
|
"eval_runtime": 43.5955, |
|
"eval_samples_per_second": 148.708, |
|
"eval_steps_per_second": 2.5, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010755024590832102, |
|
"loss": 2.8002, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010727852400663001, |
|
"loss": 2.8186, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00010700680210493899, |
|
"loss": 2.8036, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010673508020324797, |
|
"loss": 2.8077, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010646335830155695, |
|
"loss": 2.8111, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010619163639986595, |
|
"loss": 2.8018, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010591991449817493, |
|
"loss": 2.8079, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010564819259648392, |
|
"loss": 2.8124, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010537647069479289, |
|
"loss": 2.807, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010510474879310187, |
|
"loss": 2.8042, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.4569186983755403, |
|
"eval_loss": 2.8206183910369873, |
|
"eval_runtime": 44.1793, |
|
"eval_samples_per_second": 146.743, |
|
"eval_steps_per_second": 2.467, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010483302689141086, |
|
"loss": 2.8066, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010456130498971986, |
|
"loss": 2.8088, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010428958308802883, |
|
"loss": 2.8036, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00010401786118633781, |
|
"loss": 2.7985, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001037461392846468, |
|
"loss": 2.7981, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010347441738295577, |
|
"loss": 2.7993, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010320269548126476, |
|
"loss": 2.7999, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010293097357957375, |
|
"loss": 2.8009, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010265925167788274, |
|
"loss": 2.7943, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010238752977619171, |
|
"loss": 2.7984, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.457420160722009, |
|
"eval_loss": 2.8137617111206055, |
|
"eval_runtime": 43.507, |
|
"eval_samples_per_second": 149.01, |
|
"eval_steps_per_second": 2.505, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001021158078745007, |
|
"loss": 2.7913, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010184408597280968, |
|
"loss": 2.8016, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010157236407111868, |
|
"loss": 2.7988, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010130064216942766, |
|
"loss": 2.792, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00010103163748675355, |
|
"loss": 2.7926, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00010075991558506253, |
|
"loss": 2.7796, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00010048819368337151, |
|
"loss": 2.7971, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00010021647178168051, |
|
"loss": 2.7974, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.994474987998949e-05, |
|
"loss": 2.7951, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.967302797829848e-05, |
|
"loss": 2.7883, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.45740261861338705, |
|
"eval_loss": 2.8111917972564697, |
|
"eval_runtime": 44.0953, |
|
"eval_samples_per_second": 147.023, |
|
"eval_steps_per_second": 2.472, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.940130607660745e-05, |
|
"loss": 2.7898, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.912958417491643e-05, |
|
"loss": 2.7914, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.885786227322542e-05, |
|
"loss": 2.798, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.85861403715344e-05, |
|
"loss": 2.7938, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.83144184698434e-05, |
|
"loss": 2.7927, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.804269656815237e-05, |
|
"loss": 2.7967, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.777369188547826e-05, |
|
"loss": 2.7933, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.750196998378726e-05, |
|
"loss": 2.7913, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.723024808209624e-05, |
|
"loss": 2.7924, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.695852618040523e-05, |
|
"loss": 2.7962, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.4583686443881887, |
|
"eval_loss": 2.8055942058563232, |
|
"eval_runtime": 44.8912, |
|
"eval_samples_per_second": 144.416, |
|
"eval_steps_per_second": 2.428, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.66868042787142e-05, |
|
"loss": 2.7848, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.641779959604009e-05, |
|
"loss": 2.7935, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.61460776943491e-05, |
|
"loss": 2.7961, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.587435579265807e-05, |
|
"loss": 2.788, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.560263389096706e-05, |
|
"loss": 2.7934, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.533091198927603e-05, |
|
"loss": 2.7888, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.505919008758501e-05, |
|
"loss": 2.7954, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.4787468185894e-05, |
|
"loss": 2.7934, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.451574628420299e-05, |
|
"loss": 2.7867, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.424402438251197e-05, |
|
"loss": 2.7937, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.4582416153257539, |
|
"eval_loss": 2.8068454265594482, |
|
"eval_runtime": 44.3778, |
|
"eval_samples_per_second": 146.087, |
|
"eval_steps_per_second": 2.456, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.397230248082095e-05, |
|
"loss": 2.7933, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.370058057912994e-05, |
|
"loss": 2.7876, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.342885867743891e-05, |
|
"loss": 2.7885, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.31571367757479e-05, |
|
"loss": 2.7859, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.288541487405689e-05, |
|
"loss": 2.7867, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.261369297236588e-05, |
|
"loss": 2.7882, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.234197107067486e-05, |
|
"loss": 2.7874, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.207024916898384e-05, |
|
"loss": 2.79, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.179852726729282e-05, |
|
"loss": 2.7828, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.152680536560182e-05, |
|
"loss": 2.7853, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.4587721128864935, |
|
"eval_loss": 2.801090955734253, |
|
"eval_runtime": 43.1479, |
|
"eval_samples_per_second": 150.251, |
|
"eval_steps_per_second": 2.526, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.12550834639108e-05, |
|
"loss": 2.7861, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.098336156221979e-05, |
|
"loss": 2.793, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.071163966052876e-05, |
|
"loss": 2.7914, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.043991775883774e-05, |
|
"loss": 2.7774, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.016819585714673e-05, |
|
"loss": 2.7791, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.989647395545573e-05, |
|
"loss": 2.7837, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.96247520537647e-05, |
|
"loss": 2.779, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.935303015207368e-05, |
|
"loss": 2.7807, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 8.908130825038267e-05, |
|
"loss": 2.7832, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.880958634869164e-05, |
|
"loss": 2.7798, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.4596697849276993, |
|
"eval_loss": 2.795370578765869, |
|
"eval_runtime": 43.9941, |
|
"eval_samples_per_second": 147.361, |
|
"eval_steps_per_second": 2.478, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.853786444700063e-05, |
|
"loss": 2.7851, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.826885976432654e-05, |
|
"loss": 2.7819, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.799713786263551e-05, |
|
"loss": 2.7767, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.77254159609445e-05, |
|
"loss": 2.7745, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.745369405925347e-05, |
|
"loss": 2.7807, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.718197215756246e-05, |
|
"loss": 2.7828, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.691025025587145e-05, |
|
"loss": 2.7768, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.663852835418044e-05, |
|
"loss": 2.7749, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.636680645248942e-05, |
|
"loss": 2.7782, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 8.60950845507984e-05, |
|
"loss": 2.7851, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.4597998384916206, |
|
"eval_loss": 2.7913172245025635, |
|
"eval_runtime": 43.6998, |
|
"eval_samples_per_second": 148.353, |
|
"eval_steps_per_second": 2.494, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.582336264910738e-05, |
|
"loss": 2.7722, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.555435796643328e-05, |
|
"loss": 2.7695, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.528535328375917e-05, |
|
"loss": 2.7732, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.501363138206815e-05, |
|
"loss": 2.7714, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.474190948037714e-05, |
|
"loss": 2.7739, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.447018757868613e-05, |
|
"loss": 2.7733, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.419846567699512e-05, |
|
"loss": 2.773, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.392674377530409e-05, |
|
"loss": 2.7754, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.365502187361308e-05, |
|
"loss": 2.7817, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.338329997192206e-05, |
|
"loss": 2.7831, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.46004845251381443, |
|
"eval_loss": 2.78973126411438, |
|
"eval_runtime": 44.9439, |
|
"eval_samples_per_second": 144.247, |
|
"eval_steps_per_second": 2.425, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 8.311157807023106e-05, |
|
"loss": 2.7739, |
|
"step": 80100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.283985616854003e-05, |
|
"loss": 2.781, |
|
"step": 80200 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.256813426684902e-05, |
|
"loss": 2.7773, |
|
"step": 80300 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.2296412365158e-05, |
|
"loss": 2.7688, |
|
"step": 80400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.202469046346699e-05, |
|
"loss": 2.7765, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.175568578079289e-05, |
|
"loss": 2.7735, |
|
"step": 80600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.148396387910187e-05, |
|
"loss": 2.7692, |
|
"step": 80700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.121224197741084e-05, |
|
"loss": 2.7661, |
|
"step": 80800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.094052007571983e-05, |
|
"loss": 2.7714, |
|
"step": 80900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.06687981740288e-05, |
|
"loss": 2.7773, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.4603297311520629, |
|
"eval_loss": 2.786165475845337, |
|
"eval_runtime": 45.3636, |
|
"eval_samples_per_second": 142.912, |
|
"eval_steps_per_second": 2.403, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.03970762723378e-05, |
|
"loss": 2.77, |
|
"step": 81100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 8.012535437064678e-05, |
|
"loss": 2.772, |
|
"step": 81200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.985363246895577e-05, |
|
"loss": 2.7751, |
|
"step": 81300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.958191056726475e-05, |
|
"loss": 2.7705, |
|
"step": 81400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.931018866557374e-05, |
|
"loss": 2.7711, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.903846676388271e-05, |
|
"loss": 2.7666, |
|
"step": 81600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.87667448621917e-05, |
|
"loss": 2.7678, |
|
"step": 81700 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.84977401795176e-05, |
|
"loss": 2.7707, |
|
"step": 81800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.822601827782658e-05, |
|
"loss": 2.7624, |
|
"step": 81900 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.795429637613557e-05, |
|
"loss": 2.7688, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.4608795855223163, |
|
"eval_loss": 2.7835707664489746, |
|
"eval_runtime": 44.1206, |
|
"eval_samples_per_second": 146.938, |
|
"eval_steps_per_second": 2.47, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.768257447444454e-05, |
|
"loss": 2.7652, |
|
"step": 82100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.741085257275354e-05, |
|
"loss": 2.763, |
|
"step": 82200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.713913067106252e-05, |
|
"loss": 2.7718, |
|
"step": 82300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 7.686740876937151e-05, |
|
"loss": 2.774, |
|
"step": 82400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.659568686768048e-05, |
|
"loss": 2.7624, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.632396496598946e-05, |
|
"loss": 2.7672, |
|
"step": 82600 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.605224306429845e-05, |
|
"loss": 2.7646, |
|
"step": 82700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.578052116260744e-05, |
|
"loss": 2.7643, |
|
"step": 82800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.550879926091643e-05, |
|
"loss": 2.7636, |
|
"step": 82900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.523979457824232e-05, |
|
"loss": 2.7658, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.4610453282037788, |
|
"eval_loss": 2.7798171043395996, |
|
"eval_runtime": 44.7143, |
|
"eval_samples_per_second": 144.987, |
|
"eval_steps_per_second": 2.438, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.49680726765513e-05, |
|
"loss": 2.7694, |
|
"step": 83100 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.469635077486028e-05, |
|
"loss": 2.7662, |
|
"step": 83200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.442734609218618e-05, |
|
"loss": 2.7624, |
|
"step": 83300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.415562419049516e-05, |
|
"loss": 2.7632, |
|
"step": 83400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 7.388390228880415e-05, |
|
"loss": 2.7697, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.361218038711314e-05, |
|
"loss": 2.7663, |
|
"step": 83600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.334045848542211e-05, |
|
"loss": 2.7623, |
|
"step": 83700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.306873658373109e-05, |
|
"loss": 2.7685, |
|
"step": 83800 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.279701468204009e-05, |
|
"loss": 2.7702, |
|
"step": 83900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.252529278034907e-05, |
|
"loss": 2.7622, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.4611511857558078, |
|
"eval_loss": 2.781484603881836, |
|
"eval_runtime": 43.3638, |
|
"eval_samples_per_second": 149.503, |
|
"eval_steps_per_second": 2.514, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.225357087865804e-05, |
|
"loss": 2.7672, |
|
"step": 84100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.198184897696703e-05, |
|
"loss": 2.7652, |
|
"step": 84200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.171012707527602e-05, |
|
"loss": 2.7671, |
|
"step": 84300 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.143840517358501e-05, |
|
"loss": 2.7621, |
|
"step": 84400 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.11694004909109e-05, |
|
"loss": 2.7662, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 7.089767858921989e-05, |
|
"loss": 2.7684, |
|
"step": 84600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.062595668752886e-05, |
|
"loss": 2.7662, |
|
"step": 84700 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.035423478583785e-05, |
|
"loss": 2.7638, |
|
"step": 84800 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.008251288414684e-05, |
|
"loss": 2.7639, |
|
"step": 84900 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.981079098245581e-05, |
|
"loss": 2.7691, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.46120986108464673, |
|
"eval_loss": 2.7783455848693848, |
|
"eval_runtime": 43.5919, |
|
"eval_samples_per_second": 148.72, |
|
"eval_steps_per_second": 2.5, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.95390690807648e-05, |
|
"loss": 2.7649, |
|
"step": 85100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.926734717907379e-05, |
|
"loss": 2.7638, |
|
"step": 85200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.899562527738277e-05, |
|
"loss": 2.7675, |
|
"step": 85300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.872390337569176e-05, |
|
"loss": 2.7657, |
|
"step": 85400 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.845218147400074e-05, |
|
"loss": 2.7612, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.818045957230972e-05, |
|
"loss": 2.7682, |
|
"step": 85600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 6.79087376706187e-05, |
|
"loss": 2.7588, |
|
"step": 85700 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.763701576892768e-05, |
|
"loss": 2.765, |
|
"step": 85800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.736529386723667e-05, |
|
"loss": 2.7556, |
|
"step": 85900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.709357196554565e-05, |
|
"loss": 2.7579, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.4619333218402277, |
|
"eval_loss": 2.7711987495422363, |
|
"eval_runtime": 43.3357, |
|
"eval_samples_per_second": 149.6, |
|
"eval_steps_per_second": 2.515, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.682185006385464e-05, |
|
"loss": 2.7538, |
|
"step": 86100 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.655012816216363e-05, |
|
"loss": 2.7596, |
|
"step": 86200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.62784062604726e-05, |
|
"loss": 2.7512, |
|
"step": 86300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.600668435878159e-05, |
|
"loss": 2.7559, |
|
"step": 86400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.573496245709058e-05, |
|
"loss": 2.7574, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.546324055539957e-05, |
|
"loss": 2.7614, |
|
"step": 86600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.519423587272546e-05, |
|
"loss": 2.7501, |
|
"step": 86700 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 6.492251397103445e-05, |
|
"loss": 2.7488, |
|
"step": 86800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.465079206934342e-05, |
|
"loss": 2.7497, |
|
"step": 86900 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.437907016765241e-05, |
|
"loss": 2.7614, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.46246986840394033, |
|
"eval_loss": 2.7673110961914062, |
|
"eval_runtime": 43.038, |
|
"eval_samples_per_second": 150.634, |
|
"eval_steps_per_second": 2.533, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.41073482659614e-05, |
|
"loss": 2.7544, |
|
"step": 87100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.383834358328728e-05, |
|
"loss": 2.7546, |
|
"step": 87200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.356662168159627e-05, |
|
"loss": 2.7564, |
|
"step": 87300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.329489977990525e-05, |
|
"loss": 2.759, |
|
"step": 87400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.302317787821423e-05, |
|
"loss": 2.7586, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.275145597652322e-05, |
|
"loss": 2.7546, |
|
"step": 87600 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.247973407483221e-05, |
|
"loss": 2.7548, |
|
"step": 87700 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.220801217314118e-05, |
|
"loss": 2.7527, |
|
"step": 87800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.193629027145017e-05, |
|
"loss": 2.7607, |
|
"step": 87900 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.166456836975916e-05, |
|
"loss": 2.7592, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.46232166783109974, |
|
"eval_loss": 2.7691469192504883, |
|
"eval_runtime": 43.5697, |
|
"eval_samples_per_second": 148.796, |
|
"eval_steps_per_second": 2.502, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.139284646806815e-05, |
|
"loss": 2.7481, |
|
"step": 88100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.112112456637712e-05, |
|
"loss": 2.7579, |
|
"step": 88200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.0849402664686106e-05, |
|
"loss": 2.7559, |
|
"step": 88300 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.05776807629951e-05, |
|
"loss": 2.7515, |
|
"step": 88400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.030595886130408e-05, |
|
"loss": 2.7524, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 6.003423695961306e-05, |
|
"loss": 2.7395, |
|
"step": 88600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.976251505792205e-05, |
|
"loss": 2.7438, |
|
"step": 88700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.949079315623103e-05, |
|
"loss": 2.7468, |
|
"step": 88800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.921907125454001e-05, |
|
"loss": 2.7423, |
|
"step": 88900 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.8947349352849e-05, |
|
"loss": 2.7551, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.4633808482516869, |
|
"eval_loss": 2.760658025741577, |
|
"eval_runtime": 43.7777, |
|
"eval_samples_per_second": 148.089, |
|
"eval_steps_per_second": 2.49, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.867562745115798e-05, |
|
"loss": 2.7352, |
|
"step": 89100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.8403905549466965e-05, |
|
"loss": 2.751, |
|
"step": 89200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.813490086679286e-05, |
|
"loss": 2.7456, |
|
"step": 89300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.7863178965101844e-05, |
|
"loss": 2.7491, |
|
"step": 89400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.759145706341083e-05, |
|
"loss": 2.7477, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.7319735161719815e-05, |
|
"loss": 2.7431, |
|
"step": 89600 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.70480132600288e-05, |
|
"loss": 2.7406, |
|
"step": 89700 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.6776291358337786e-05, |
|
"loss": 2.7444, |
|
"step": 89800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.650456945664677e-05, |
|
"loss": 2.7437, |
|
"step": 89900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.623284755495574e-05, |
|
"loss": 2.7397, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.4636597072887461, |
|
"eval_loss": 2.7578768730163574, |
|
"eval_runtime": 43.3807, |
|
"eval_samples_per_second": 149.444, |
|
"eval_steps_per_second": 2.513, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.596112565326473e-05, |
|
"loss": 2.7456, |
|
"step": 90100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.5689403751573714e-05, |
|
"loss": 2.7393, |
|
"step": 90200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.54176818498827e-05, |
|
"loss": 2.74, |
|
"step": 90300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.5145959948191685e-05, |
|
"loss": 2.7411, |
|
"step": 90400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.487695526551758e-05, |
|
"loss": 2.747, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.4605233363826564e-05, |
|
"loss": 2.741, |
|
"step": 90600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.433622868115246e-05, |
|
"loss": 2.7441, |
|
"step": 90700 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.406450677946144e-05, |
|
"loss": 2.7447, |
|
"step": 90800 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.3792784877770425e-05, |
|
"loss": 2.7517, |
|
"step": 90900 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.3521062976079414e-05, |
|
"loss": 2.7357, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.4636022417605018, |
|
"eval_loss": 2.758023738861084, |
|
"eval_runtime": 43.2538, |
|
"eval_samples_per_second": 149.883, |
|
"eval_steps_per_second": 2.52, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.3249341074388396e-05, |
|
"loss": 2.7429, |
|
"step": 91100 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 5.297761917269738e-05, |
|
"loss": 2.7445, |
|
"step": 91200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.270589727100637e-05, |
|
"loss": 2.7473, |
|
"step": 91300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.243417536931535e-05, |
|
"loss": 2.7404, |
|
"step": 91400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.216245346762434e-05, |
|
"loss": 2.7401, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.189073156593331e-05, |
|
"loss": 2.7441, |
|
"step": 91600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.1619009664242295e-05, |
|
"loss": 2.737, |
|
"step": 91700 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.1347287762551284e-05, |
|
"loss": 2.7337, |
|
"step": 91800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.1075565860860266e-05, |
|
"loss": 2.7422, |
|
"step": 91900 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.080384395916925e-05, |
|
"loss": 2.7452, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.46426944678843307, |
|
"eval_loss": 2.751744031906128, |
|
"eval_runtime": 44.8905, |
|
"eval_samples_per_second": 144.418, |
|
"eval_steps_per_second": 2.428, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.0532122057478237e-05, |
|
"loss": 2.7387, |
|
"step": 92100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 5.026311737480413e-05, |
|
"loss": 2.7342, |
|
"step": 92200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.9991395473113116e-05, |
|
"loss": 2.7349, |
|
"step": 92300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.97196735714221e-05, |
|
"loss": 2.7388, |
|
"step": 92400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.944795166973108e-05, |
|
"loss": 2.7397, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.917622976804007e-05, |
|
"loss": 2.7352, |
|
"step": 92600 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.890450786634905e-05, |
|
"loss": 2.7392, |
|
"step": 92700 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.863278596465803e-05, |
|
"loss": 2.7419, |
|
"step": 92800 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.836106406296702e-05, |
|
"loss": 2.738, |
|
"step": 92900 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.8089342161276004e-05, |
|
"loss": 2.7418, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.46412548051767366, |
|
"eval_loss": 2.7533059120178223, |
|
"eval_runtime": 43.1643, |
|
"eval_samples_per_second": 150.193, |
|
"eval_steps_per_second": 2.525, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.781762025958498e-05, |
|
"loss": 2.7372, |
|
"step": 93100 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.7545898357893974e-05, |
|
"loss": 2.7369, |
|
"step": 93200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.727417645620295e-05, |
|
"loss": 2.7331, |
|
"step": 93300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.700245455451193e-05, |
|
"loss": 2.7379, |
|
"step": 93400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.673073265282092e-05, |
|
"loss": 2.7341, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.64590107511299e-05, |
|
"loss": 2.7359, |
|
"step": 93600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.618728884943889e-05, |
|
"loss": 2.737, |
|
"step": 93700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.5915566947747873e-05, |
|
"loss": 2.7343, |
|
"step": 93800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.564656226507377e-05, |
|
"loss": 2.7346, |
|
"step": 93900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.537484036338275e-05, |
|
"loss": 2.7379, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.46473280041617143, |
|
"eval_loss": 2.748091697692871, |
|
"eval_runtime": 43.4169, |
|
"eval_samples_per_second": 149.32, |
|
"eval_steps_per_second": 2.511, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.5103118461691735e-05, |
|
"loss": 2.7341, |
|
"step": 94100 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.4831396560000724e-05, |
|
"loss": 2.7431, |
|
"step": 94200 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.4559674658309706e-05, |
|
"loss": 2.7347, |
|
"step": 94300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.428795275661869e-05, |
|
"loss": 2.7366, |
|
"step": 94400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.4016230854927676e-05, |
|
"loss": 2.7344, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.374450895323666e-05, |
|
"loss": 2.7382, |
|
"step": 94600 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.347278705154564e-05, |
|
"loss": 2.7279, |
|
"step": 94700 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.320106514985463e-05, |
|
"loss": 2.7307, |
|
"step": 94800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.292934324816361e-05, |
|
"loss": 2.7275, |
|
"step": 94900 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.26603385654895e-05, |
|
"loss": 2.7308, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.4653649212268588, |
|
"eval_loss": 2.7459847927093506, |
|
"eval_runtime": 43.1356, |
|
"eval_samples_per_second": 150.294, |
|
"eval_steps_per_second": 2.527, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.2388616663798484e-05, |
|
"loss": 2.7304, |
|
"step": 95100 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.211689476210747e-05, |
|
"loss": 2.7334, |
|
"step": 95200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.1845172860416455e-05, |
|
"loss": 2.7324, |
|
"step": 95300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.157345095872544e-05, |
|
"loss": 2.7338, |
|
"step": 95400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.1301729057034425e-05, |
|
"loss": 2.7334, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.103000715534341e-05, |
|
"loss": 2.7323, |
|
"step": 95600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.075828525365239e-05, |
|
"loss": 2.7338, |
|
"step": 95700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.048656335196138e-05, |
|
"loss": 2.73, |
|
"step": 95800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.021484145027036e-05, |
|
"loss": 2.7367, |
|
"step": 95900 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.994311954857934e-05, |
|
"loss": 2.727, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.46549799929226665, |
|
"eval_loss": 2.740849018096924, |
|
"eval_runtime": 43.5693, |
|
"eval_samples_per_second": 148.797, |
|
"eval_steps_per_second": 2.502, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.967139764688833e-05, |
|
"loss": 2.7257, |
|
"step": 96100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.939967574519731e-05, |
|
"loss": 2.7251, |
|
"step": 96200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.9127953843506295e-05, |
|
"loss": 2.7236, |
|
"step": 96300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.8856231941815284e-05, |
|
"loss": 2.7224, |
|
"step": 96400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.8584510040124266e-05, |
|
"loss": 2.7204, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.831278813843325e-05, |
|
"loss": 2.7249, |
|
"step": 96600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.804106623674224e-05, |
|
"loss": 2.7214, |
|
"step": 96700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.776934433505122e-05, |
|
"loss": 2.7242, |
|
"step": 96800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.74976224333602e-05, |
|
"loss": 2.7147, |
|
"step": 96900 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.722861775068609e-05, |
|
"loss": 2.7282, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.4663823635269317, |
|
"eval_loss": 2.7350597381591797, |
|
"eval_runtime": 43.4285, |
|
"eval_samples_per_second": 149.28, |
|
"eval_steps_per_second": 2.51, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.695689584899508e-05, |
|
"loss": 2.718, |
|
"step": 97100 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.668517394730406e-05, |
|
"loss": 2.7174, |
|
"step": 97200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.6413452045613044e-05, |
|
"loss": 2.7205, |
|
"step": 97300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.614173014392203e-05, |
|
"loss": 2.7195, |
|
"step": 97400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5870008242231015e-05, |
|
"loss": 2.7172, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.559828634054e-05, |
|
"loss": 2.7128, |
|
"step": 97600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.532656443884898e-05, |
|
"loss": 2.7192, |
|
"step": 97700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.505484253715797e-05, |
|
"loss": 2.7191, |
|
"step": 97800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.478312063546695e-05, |
|
"loss": 2.7178, |
|
"step": 97900 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.451139873377593e-05, |
|
"loss": 2.7133, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.46685176615764307, |
|
"eval_loss": 2.730079412460327, |
|
"eval_runtime": 43.3235, |
|
"eval_samples_per_second": 149.642, |
|
"eval_steps_per_second": 2.516, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.423967683208492e-05, |
|
"loss": 2.7164, |
|
"step": 98100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.39679549303939e-05, |
|
"loss": 2.7106, |
|
"step": 98200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.3696233028702885e-05, |
|
"loss": 2.715, |
|
"step": 98300 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.3424511127011874e-05, |
|
"loss": 2.7091, |
|
"step": 98400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.3152789225320856e-05, |
|
"loss": 2.7093, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.288106732362984e-05, |
|
"loss": 2.7116, |
|
"step": 98600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.260934542193883e-05, |
|
"loss": 2.7172, |
|
"step": 98700 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.233762352024781e-05, |
|
"loss": 2.7072, |
|
"step": 98800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.206590161855679e-05, |
|
"loss": 2.7165, |
|
"step": 98900 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.179417971686577e-05, |
|
"loss": 2.7136, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.4673356863954899, |
|
"eval_loss": 2.7250616550445557, |
|
"eval_runtime": 43.1535, |
|
"eval_samples_per_second": 150.231, |
|
"eval_steps_per_second": 2.526, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.152245781517476e-05, |
|
"loss": 2.7117, |
|
"step": 99100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.1250735913483744e-05, |
|
"loss": 2.7099, |
|
"step": 99200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.0979014011792726e-05, |
|
"loss": 2.715, |
|
"step": 99300 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.0707292110101715e-05, |
|
"loss": 2.7119, |
|
"step": 99400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.0435570208410697e-05, |
|
"loss": 2.7136, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.016384830671968e-05, |
|
"loss": 2.7069, |
|
"step": 99600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.9892126405028664e-05, |
|
"loss": 2.7092, |
|
"step": 99700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.962040450333765e-05, |
|
"loss": 2.7052, |
|
"step": 99800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.934868260164663e-05, |
|
"loss": 2.7099, |
|
"step": 99900 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.9076960699955617e-05, |
|
"loss": 2.7108, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.46786981335801325, |
|
"eval_loss": 2.7208478450775146, |
|
"eval_runtime": 43.4331, |
|
"eval_samples_per_second": 149.264, |
|
"eval_steps_per_second": 2.51, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.8807956017281514e-05, |
|
"loss": 2.7137, |
|
"step": 100100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.8536234115590493e-05, |
|
"loss": 2.7069, |
|
"step": 100200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.8264512213899478e-05, |
|
"loss": 2.698, |
|
"step": 100300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.7992790312208464e-05, |
|
"loss": 2.7027, |
|
"step": 100400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.7721068410517446e-05, |
|
"loss": 2.7062, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.744934650882643e-05, |
|
"loss": 2.7064, |
|
"step": 100600 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.718034182615232e-05, |
|
"loss": 2.7059, |
|
"step": 100700 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.691133714347822e-05, |
|
"loss": 2.7146, |
|
"step": 100800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.6639615241787204e-05, |
|
"loss": 2.7036, |
|
"step": 100900 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.6367893340096186e-05, |
|
"loss": 2.7051, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.46807245495761163, |
|
"eval_loss": 2.7191717624664307, |
|
"eval_runtime": 43.4633, |
|
"eval_samples_per_second": 149.16, |
|
"eval_steps_per_second": 2.508, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.609617143840517e-05, |
|
"loss": 2.7007, |
|
"step": 101100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.5824449536714157e-05, |
|
"loss": 2.7024, |
|
"step": 101200 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.555272763502314e-05, |
|
"loss": 2.7027, |
|
"step": 101300 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.5281005733332124e-05, |
|
"loss": 2.7082, |
|
"step": 101400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.500928383164111e-05, |
|
"loss": 2.7067, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.4737561929950092e-05, |
|
"loss": 2.7044, |
|
"step": 101600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.4465840028259074e-05, |
|
"loss": 2.705, |
|
"step": 101700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.419411812656806e-05, |
|
"loss": 2.7069, |
|
"step": 101800 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.3922396224877045e-05, |
|
"loss": 2.7005, |
|
"step": 101900 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.3650674323186027e-05, |
|
"loss": 2.7013, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.4687317962816779, |
|
"eval_loss": 2.7151107788085938, |
|
"eval_runtime": 43.2863, |
|
"eval_samples_per_second": 149.77, |
|
"eval_steps_per_second": 2.518, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.3378952421495012e-05, |
|
"loss": 2.7029, |
|
"step": 102100 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.3107230519803998e-05, |
|
"loss": 2.7007, |
|
"step": 102200 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.283550861811298e-05, |
|
"loss": 2.7089, |
|
"step": 102300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.2563786716421965e-05, |
|
"loss": 2.7018, |
|
"step": 102400 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.229206481473095e-05, |
|
"loss": 2.6984, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.202306013205684e-05, |
|
"loss": 2.7011, |
|
"step": 102600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.1751338230365826e-05, |
|
"loss": 2.6968, |
|
"step": 102700 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.1479616328674812e-05, |
|
"loss": 2.701, |
|
"step": 102800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.1207894426983794e-05, |
|
"loss": 2.7079, |
|
"step": 102900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.093617252529278e-05, |
|
"loss": 2.6996, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.46891387127116774, |
|
"eval_loss": 2.7129361629486084, |
|
"eval_runtime": 43.7353, |
|
"eval_samples_per_second": 148.233, |
|
"eval_steps_per_second": 2.492, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.0664450623601765e-05, |
|
"loss": 2.6985, |
|
"step": 103100 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.0392728721910743e-05, |
|
"loss": 2.6945, |
|
"step": 103200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.012100682021973e-05, |
|
"loss": 2.6988, |
|
"step": 103300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.9849284918528714e-05, |
|
"loss": 2.701, |
|
"step": 103400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.9577563016837696e-05, |
|
"loss": 2.7044, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.930584111514668e-05, |
|
"loss": 2.6897, |
|
"step": 103600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.9034119213455667e-05, |
|
"loss": 2.6993, |
|
"step": 103700 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.8762397311764652e-05, |
|
"loss": 2.6978, |
|
"step": 103800 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.8490675410073634e-05, |
|
"loss": 2.6965, |
|
"step": 103900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.821895350838262e-05, |
|
"loss": 2.6898, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.46940021111020375, |
|
"eval_loss": 2.7084131240844727, |
|
"eval_runtime": 44.0036, |
|
"eval_samples_per_second": 147.329, |
|
"eval_steps_per_second": 2.477, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7947231606691602e-05, |
|
"loss": 2.6918, |
|
"step": 104100 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7675509705000587e-05, |
|
"loss": 2.6941, |
|
"step": 104200 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7403787803309573e-05, |
|
"loss": 2.6954, |
|
"step": 104300 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7132065901618555e-05, |
|
"loss": 2.7015, |
|
"step": 104400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.686034399992754e-05, |
|
"loss": 2.698, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6588622098236522e-05, |
|
"loss": 2.6922, |
|
"step": 104600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.6319617415562416e-05, |
|
"loss": 2.6932, |
|
"step": 104700 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.60478955138714e-05, |
|
"loss": 2.6887, |
|
"step": 104800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5776173612180387e-05, |
|
"loss": 2.6887, |
|
"step": 104900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.550445171048937e-05, |
|
"loss": 2.688, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.4697316964731288, |
|
"eval_loss": 2.705327272415161, |
|
"eval_runtime": 43.7246, |
|
"eval_samples_per_second": 148.269, |
|
"eval_steps_per_second": 2.493, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.5232729808798354e-05, |
|
"loss": 2.6933, |
|
"step": 105100 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.4961007907107338e-05, |
|
"loss": 2.6992, |
|
"step": 105200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.468928600541632e-05, |
|
"loss": 2.6943, |
|
"step": 105300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.4417564103725306e-05, |
|
"loss": 2.6919, |
|
"step": 105400 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.414584220203429e-05, |
|
"loss": 2.6961, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3874120300343275e-05, |
|
"loss": 2.6942, |
|
"step": 105600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3602398398652258e-05, |
|
"loss": 2.6936, |
|
"step": 105700 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3330676496961242e-05, |
|
"loss": 2.6851, |
|
"step": 105800 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.3058954595270228e-05, |
|
"loss": 2.6929, |
|
"step": 105900 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.278723269357921e-05, |
|
"loss": 2.6855, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.4701273012675686, |
|
"eval_loss": 2.701770305633545, |
|
"eval_runtime": 44.1379, |
|
"eval_samples_per_second": 146.881, |
|
"eval_steps_per_second": 2.47, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2515510791888195e-05, |
|
"loss": 2.6922, |
|
"step": 106100 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.2243788890197179e-05, |
|
"loss": 2.6811, |
|
"step": 106200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1972066988506163e-05, |
|
"loss": 2.6819, |
|
"step": 106300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1700345086815148e-05, |
|
"loss": 2.6882, |
|
"step": 106400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.142862318512413e-05, |
|
"loss": 2.685, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1159618502450025e-05, |
|
"loss": 2.6841, |
|
"step": 106600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0887896600759008e-05, |
|
"loss": 2.6806, |
|
"step": 106700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0616174699067993e-05, |
|
"loss": 2.6896, |
|
"step": 106800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0344452797376977e-05, |
|
"loss": 2.6807, |
|
"step": 106900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.0072730895685962e-05, |
|
"loss": 2.6852, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.4704999198507106, |
|
"eval_loss": 2.698939085006714, |
|
"eval_runtime": 43.9086, |
|
"eval_samples_per_second": 147.648, |
|
"eval_steps_per_second": 2.482, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.803726213011856e-06, |
|
"loss": 2.6861, |
|
"step": 107100 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.53200431132084e-06, |
|
"loss": 2.6886, |
|
"step": 107200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.260282409629823e-06, |
|
"loss": 2.6872, |
|
"step": 107300 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.988560507938807e-06, |
|
"loss": 2.685, |
|
"step": 107400 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.71683860624779e-06, |
|
"loss": 2.6892, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.445116704556776e-06, |
|
"loss": 2.6815, |
|
"step": 107600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.17339480286576e-06, |
|
"loss": 2.6879, |
|
"step": 107700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.901672901174744e-06, |
|
"loss": 2.6822, |
|
"step": 107800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.629950999483727e-06, |
|
"loss": 2.6806, |
|
"step": 107900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.360946316809621e-06, |
|
"loss": 2.689, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.4705204864608191, |
|
"eval_loss": 2.6981818675994873, |
|
"eval_runtime": 43.1633, |
|
"eval_samples_per_second": 150.197, |
|
"eval_steps_per_second": 2.525, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 7.089224415118606e-06, |
|
"loss": 2.6872, |
|
"step": 108100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.81750251342759e-06, |
|
"loss": 2.6962, |
|
"step": 108200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.545780611736574e-06, |
|
"loss": 2.6831, |
|
"step": 108300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.274058710045559e-06, |
|
"loss": 2.6877, |
|
"step": 108400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.0023368083545415e-06, |
|
"loss": 2.6956, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.730614906663526e-06, |
|
"loss": 2.6936, |
|
"step": 108600 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.458893004972511e-06, |
|
"loss": 2.6864, |
|
"step": 108700 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.187171103281495e-06, |
|
"loss": 2.6838, |
|
"step": 108800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.915449201590478e-06, |
|
"loss": 2.6867, |
|
"step": 108900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.643727299899463e-06, |
|
"loss": 2.6868, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.4707297819636878, |
|
"eval_loss": 2.6994001865386963, |
|
"eval_runtime": 43.0302, |
|
"eval_samples_per_second": 150.662, |
|
"eval_steps_per_second": 2.533, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.3720053982084465e-06, |
|
"loss": 2.689, |
|
"step": 109100 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.10028349651743e-06, |
|
"loss": 2.6831, |
|
"step": 109200 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.831278813843325e-06, |
|
"loss": 2.6825, |
|
"step": 109300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.559556912152309e-06, |
|
"loss": 2.6851, |
|
"step": 109400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.2878350104612927e-06, |
|
"loss": 2.6798, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.016113108770277e-06, |
|
"loss": 2.6773, |
|
"step": 109600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.744391207079261e-06, |
|
"loss": 2.6829, |
|
"step": 109700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.472669305388245e-06, |
|
"loss": 2.6819, |
|
"step": 109800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.2036646227141394e-06, |
|
"loss": 2.6827, |
|
"step": 109900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.931942721023123e-06, |
|
"loss": 2.6901, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.47069106834466007, |
|
"eval_loss": 2.700648307800293, |
|
"eval_runtime": 43.0535, |
|
"eval_samples_per_second": 150.58, |
|
"eval_steps_per_second": 2.532, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.6602208193321073e-06, |
|
"loss": 2.6809, |
|
"step": 110100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.3884989176410914e-06, |
|
"loss": 2.6866, |
|
"step": 110200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.1167770159500756e-06, |
|
"loss": 2.6863, |
|
"step": 110300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.450551142590596e-07, |
|
"loss": 2.6912, |
|
"step": 110400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5.733332125680437e-07, |
|
"loss": 2.6916, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.0161131087702765e-07, |
|
"loss": 2.684, |
|
"step": 110600 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 110607, |
|
"total_flos": 2.899312376933253e+20, |
|
"train_loss": 2.8584754099769967, |
|
"train_runtime": 318077.2613, |
|
"train_samples_per_second": 83.457, |
|
"train_steps_per_second": 0.348 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 110607, |
|
"num_train_epochs": 1, |
|
"save_steps": 11061, |
|
"total_flos": 2.899312376933253e+20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|