|
{ |
|
"best_metric": 3.962663412094116, |
|
"best_model_checkpoint": "output_hemo_aug_4/checkpoint-990", |
|
"epoch": 200.0, |
|
"eval_steps": 500, |
|
"global_step": 1200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 9.950000000000001e-06, |
|
"loss": 9.4301, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.024437927663734114, |
|
"eval_loss": 8.684534072875977, |
|
"eval_runtime": 3.4336, |
|
"eval_samples_per_second": 0.291, |
|
"eval_steps_per_second": 0.291, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 9.9e-06, |
|
"loss": 8.4172, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.08113391984359726, |
|
"eval_loss": 7.8148698806762695, |
|
"eval_runtime": 3.5386, |
|
"eval_samples_per_second": 0.283, |
|
"eval_steps_per_second": 0.283, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 9.85e-06, |
|
"loss": 7.6869, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.15542521994134897, |
|
"eval_loss": 7.201221942901611, |
|
"eval_runtime": 3.2749, |
|
"eval_samples_per_second": 0.305, |
|
"eval_steps_per_second": 0.305, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 7.1731, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.1827956989247312, |
|
"eval_loss": 6.913944721221924, |
|
"eval_runtime": 3.2424, |
|
"eval_samples_per_second": 0.308, |
|
"eval_steps_per_second": 0.308, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 9.75e-06, |
|
"loss": 6.8807, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.19550342130987292, |
|
"eval_loss": 6.623848915100098, |
|
"eval_runtime": 3.4613, |
|
"eval_samples_per_second": 0.289, |
|
"eval_steps_per_second": 0.289, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 9.7e-06, |
|
"loss": 6.6009, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.1935483870967742, |
|
"eval_loss": 6.384663105010986, |
|
"eval_runtime": 4.2768, |
|
"eval_samples_per_second": 0.234, |
|
"eval_steps_per_second": 0.234, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 9.65e-06, |
|
"loss": 6.4347, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.20625610948191594, |
|
"eval_loss": 6.234148979187012, |
|
"eval_runtime": 3.3994, |
|
"eval_samples_per_second": 0.294, |
|
"eval_steps_per_second": 0.294, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 6.2831, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.21407624633431085, |
|
"eval_loss": 6.096428394317627, |
|
"eval_runtime": 3.1809, |
|
"eval_samples_per_second": 0.314, |
|
"eval_steps_per_second": 0.314, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 9.55e-06, |
|
"loss": 6.1728, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.2209188660801564, |
|
"eval_loss": 5.98640775680542, |
|
"eval_runtime": 3.4302, |
|
"eval_samples_per_second": 0.292, |
|
"eval_steps_per_second": 0.292, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 9.5e-06, |
|
"loss": 6.0805, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.2316715542521994, |
|
"eval_loss": 5.893611907958984, |
|
"eval_runtime": 3.6923, |
|
"eval_samples_per_second": 0.271, |
|
"eval_steps_per_second": 0.271, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 9.450000000000001e-06, |
|
"loss": 5.9959, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.2404692082111437, |
|
"eval_loss": 5.81611967086792, |
|
"eval_runtime": 3.2748, |
|
"eval_samples_per_second": 0.305, |
|
"eval_steps_per_second": 0.305, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 9.4e-06, |
|
"loss": 5.925, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.23851417399804498, |
|
"eval_loss": 5.745607852935791, |
|
"eval_runtime": 3.4474, |
|
"eval_samples_per_second": 0.29, |
|
"eval_steps_per_second": 0.29, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 9.350000000000002e-06, |
|
"loss": 5.8787, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.2482893450635386, |
|
"eval_loss": 5.664583683013916, |
|
"eval_runtime": 3.5432, |
|
"eval_samples_per_second": 0.282, |
|
"eval_steps_per_second": 0.282, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 9.3e-06, |
|
"loss": 5.7996, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.24926686217008798, |
|
"eval_loss": 5.590141296386719, |
|
"eval_runtime": 3.2321, |
|
"eval_samples_per_second": 0.309, |
|
"eval_steps_per_second": 0.309, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 9.250000000000001e-06, |
|
"loss": 5.7312, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.25317693059628543, |
|
"eval_loss": 5.521559238433838, |
|
"eval_runtime": 3.5703, |
|
"eval_samples_per_second": 0.28, |
|
"eval_steps_per_second": 0.28, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 5.6751, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.2590420332355816, |
|
"eval_loss": 5.469524383544922, |
|
"eval_runtime": 3.5655, |
|
"eval_samples_per_second": 0.28, |
|
"eval_steps_per_second": 0.28, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 9.15e-06, |
|
"loss": 5.6076, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.2619745845552297, |
|
"eval_loss": 5.421563148498535, |
|
"eval_runtime": 3.4299, |
|
"eval_samples_per_second": 0.292, |
|
"eval_steps_per_second": 0.292, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 9.100000000000001e-06, |
|
"loss": 5.569, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.2619745845552297, |
|
"eval_loss": 5.373498916625977, |
|
"eval_runtime": 3.4799, |
|
"eval_samples_per_second": 0.287, |
|
"eval_steps_per_second": 0.287, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 9.050000000000001e-06, |
|
"loss": 5.5037, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.26295210166177907, |
|
"eval_loss": 5.327227592468262, |
|
"eval_runtime": 3.5293, |
|
"eval_samples_per_second": 0.283, |
|
"eval_steps_per_second": 0.283, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 9e-06, |
|
"loss": 5.4681, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.2697947214076246, |
|
"eval_loss": 5.285594463348389, |
|
"eval_runtime": 3.6414, |
|
"eval_samples_per_second": 0.275, |
|
"eval_steps_per_second": 0.275, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 8.95e-06, |
|
"loss": 5.4225, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.27174975562072334, |
|
"eval_loss": 5.255927085876465, |
|
"eval_runtime": 3.6672, |
|
"eval_samples_per_second": 0.273, |
|
"eval_steps_per_second": 0.273, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 8.900000000000001e-06, |
|
"loss": 5.3805, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.27663734115347016, |
|
"eval_loss": 5.212594509124756, |
|
"eval_runtime": 3.547, |
|
"eval_samples_per_second": 0.282, |
|
"eval_steps_per_second": 0.282, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 8.85e-06, |
|
"loss": 5.3527, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.2756598240469208, |
|
"eval_loss": 5.1883745193481445, |
|
"eval_runtime": 3.3405, |
|
"eval_samples_per_second": 0.299, |
|
"eval_steps_per_second": 0.299, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 8.8e-06, |
|
"loss": 5.3033, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.27956989247311825, |
|
"eval_loss": 5.153918266296387, |
|
"eval_runtime": 3.6205, |
|
"eval_samples_per_second": 0.276, |
|
"eval_steps_per_second": 0.276, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 8.750000000000001e-06, |
|
"loss": 5.2635, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.2854349951124145, |
|
"eval_loss": 5.111028671264648, |
|
"eval_runtime": 3.658, |
|
"eval_samples_per_second": 0.273, |
|
"eval_steps_per_second": 0.273, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 8.700000000000001e-06, |
|
"loss": 5.2411, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.2854349951124145, |
|
"eval_loss": 5.0881805419921875, |
|
"eval_runtime": 3.5731, |
|
"eval_samples_per_second": 0.28, |
|
"eval_steps_per_second": 0.28, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 8.65e-06, |
|
"loss": 5.1972, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.2903225806451613, |
|
"eval_loss": 5.057516098022461, |
|
"eval_runtime": 3.6003, |
|
"eval_samples_per_second": 0.278, |
|
"eval_steps_per_second": 0.278, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 8.6e-06, |
|
"loss": 5.163, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.2913000977517107, |
|
"eval_loss": 5.029298782348633, |
|
"eval_runtime": 3.5678, |
|
"eval_samples_per_second": 0.28, |
|
"eval_steps_per_second": 0.28, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 8.550000000000001e-06, |
|
"loss": 5.1273, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.2903225806451613, |
|
"eval_loss": 5.00468111038208, |
|
"eval_runtime": 3.5535, |
|
"eval_samples_per_second": 0.281, |
|
"eval_steps_per_second": 0.281, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 8.5e-06, |
|
"loss": 5.1032, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.29521016617790813, |
|
"eval_loss": 4.981687545776367, |
|
"eval_runtime": 3.5476, |
|
"eval_samples_per_second": 0.282, |
|
"eval_steps_per_second": 0.282, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 8.45e-06, |
|
"loss": 5.0726, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.29521016617790813, |
|
"eval_loss": 4.958263874053955, |
|
"eval_runtime": 3.574, |
|
"eval_samples_per_second": 0.28, |
|
"eval_steps_per_second": 0.28, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 5.0405, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.29521016617790813, |
|
"eval_loss": 4.9354777336120605, |
|
"eval_runtime": 3.2861, |
|
"eval_samples_per_second": 0.304, |
|
"eval_steps_per_second": 0.304, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 8.35e-06, |
|
"loss": 5.007, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.29521016617790813, |
|
"eval_loss": 4.918440818786621, |
|
"eval_runtime": 3.5379, |
|
"eval_samples_per_second": 0.283, |
|
"eval_steps_per_second": 0.283, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 8.3e-06, |
|
"loss": 4.9897, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.29716520039100686, |
|
"eval_loss": 4.891113758087158, |
|
"eval_runtime": 3.2282, |
|
"eval_samples_per_second": 0.31, |
|
"eval_steps_per_second": 0.31, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 8.25e-06, |
|
"loss": 4.9416, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.29716520039100686, |
|
"eval_loss": 4.862751483917236, |
|
"eval_runtime": 3.5473, |
|
"eval_samples_per_second": 0.282, |
|
"eval_steps_per_second": 0.282, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 8.2e-06, |
|
"loss": 4.9245, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.2981427174975562, |
|
"eval_loss": 4.849916934967041, |
|
"eval_runtime": 3.5627, |
|
"eval_samples_per_second": 0.281, |
|
"eval_steps_per_second": 0.281, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 8.15e-06, |
|
"loss": 4.901, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.30303030303030304, |
|
"eval_loss": 4.826337814331055, |
|
"eval_runtime": 3.475, |
|
"eval_samples_per_second": 0.288, |
|
"eval_steps_per_second": 0.288, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 8.1e-06, |
|
"loss": 4.8713, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.30303030303030304, |
|
"eval_loss": 4.803491115570068, |
|
"eval_runtime": 3.4779, |
|
"eval_samples_per_second": 0.288, |
|
"eval_steps_per_second": 0.288, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 8.050000000000001e-06, |
|
"loss": 4.845, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.30596285434995113, |
|
"eval_loss": 4.7873663902282715, |
|
"eval_runtime": 3.3398, |
|
"eval_samples_per_second": 0.299, |
|
"eval_steps_per_second": 0.299, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 4.8052, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.3040078201368524, |
|
"eval_loss": 4.753478527069092, |
|
"eval_runtime": 3.1758, |
|
"eval_samples_per_second": 0.315, |
|
"eval_steps_per_second": 0.315, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 7.950000000000002e-06, |
|
"loss": 4.7786, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.30596285434995113, |
|
"eval_loss": 4.731250762939453, |
|
"eval_runtime": 3.2392, |
|
"eval_samples_per_second": 0.309, |
|
"eval_steps_per_second": 0.309, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 7.9e-06, |
|
"loss": 4.7501, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.3088954056695992, |
|
"eval_loss": 4.717497825622559, |
|
"eval_runtime": 3.5586, |
|
"eval_samples_per_second": 0.281, |
|
"eval_steps_per_second": 0.281, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 7.850000000000001e-06, |
|
"loss": 4.7221, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.3118279569892473, |
|
"eval_loss": 4.697778701782227, |
|
"eval_runtime": 3.4899, |
|
"eval_samples_per_second": 0.287, |
|
"eval_steps_per_second": 0.287, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 7.800000000000002e-06, |
|
"loss": 4.7038, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.31085043988269795, |
|
"eval_loss": 4.678452014923096, |
|
"eval_runtime": 3.2849, |
|
"eval_samples_per_second": 0.304, |
|
"eval_steps_per_second": 0.304, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 7.75e-06, |
|
"loss": 4.681, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.3128054740957967, |
|
"eval_loss": 4.666133403778076, |
|
"eval_runtime": 3.4251, |
|
"eval_samples_per_second": 0.292, |
|
"eval_steps_per_second": 0.292, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 7.7e-06, |
|
"loss": 4.6566, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.31573802541544477, |
|
"eval_loss": 4.653237342834473, |
|
"eval_runtime": 3.1995, |
|
"eval_samples_per_second": 0.313, |
|
"eval_steps_per_second": 0.313, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 7.650000000000001e-06, |
|
"loss": 4.632, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.31573802541544477, |
|
"eval_loss": 4.636072635650635, |
|
"eval_runtime": 3.5333, |
|
"eval_samples_per_second": 0.283, |
|
"eval_steps_per_second": 0.283, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 4.618, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.3196480938416422, |
|
"eval_loss": 4.6162004470825195, |
|
"eval_runtime": 3.2635, |
|
"eval_samples_per_second": 0.306, |
|
"eval_steps_per_second": 0.306, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"learning_rate": 7.5500000000000006e-06, |
|
"loss": 4.5928, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.32453567937438904, |
|
"eval_loss": 4.598696708679199, |
|
"eval_runtime": 3.5544, |
|
"eval_samples_per_second": 0.281, |
|
"eval_steps_per_second": 0.281, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 4.5716, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.32160312805474095, |
|
"eval_loss": 4.584750175476074, |
|
"eval_runtime": 3.2866, |
|
"eval_samples_per_second": 0.304, |
|
"eval_steps_per_second": 0.304, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"learning_rate": 7.450000000000001e-06, |
|
"loss": 4.5485, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.32453567937438904, |
|
"eval_loss": 4.572133541107178, |
|
"eval_runtime": 4.8161, |
|
"eval_samples_per_second": 0.208, |
|
"eval_steps_per_second": 0.208, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 7.4e-06, |
|
"loss": 4.5324, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.3196480938416422, |
|
"eval_loss": 4.557907581329346, |
|
"eval_runtime": 3.1695, |
|
"eval_samples_per_second": 0.316, |
|
"eval_steps_per_second": 0.316, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 7.350000000000001e-06, |
|
"loss": 4.5038, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.3196480938416422, |
|
"eval_loss": 4.542334079742432, |
|
"eval_runtime": 3.3713, |
|
"eval_samples_per_second": 0.297, |
|
"eval_steps_per_second": 0.297, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 7.3e-06, |
|
"loss": 4.4831, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.32649071358748777, |
|
"eval_loss": 4.524044036865234, |
|
"eval_runtime": 3.1022, |
|
"eval_samples_per_second": 0.322, |
|
"eval_steps_per_second": 0.322, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 7.25e-06, |
|
"loss": 4.4347, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.3255131964809384, |
|
"eval_loss": 4.508722305297852, |
|
"eval_runtime": 3.0991, |
|
"eval_samples_per_second": 0.323, |
|
"eval_steps_per_second": 0.323, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 4.4218, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.3255131964809384, |
|
"eval_loss": 4.484982013702393, |
|
"eval_runtime": 3.3635, |
|
"eval_samples_per_second": 0.297, |
|
"eval_steps_per_second": 0.297, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"learning_rate": 7.15e-06, |
|
"loss": 4.3939, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.32746823069403713, |
|
"eval_loss": 4.479069709777832, |
|
"eval_runtime": 3.1386, |
|
"eval_samples_per_second": 0.319, |
|
"eval_steps_per_second": 0.319, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 7.100000000000001e-06, |
|
"loss": 4.3766, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.32649071358748777, |
|
"eval_loss": 4.463999271392822, |
|
"eval_runtime": 3.2574, |
|
"eval_samples_per_second": 0.307, |
|
"eval_steps_per_second": 0.307, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"learning_rate": 7.05e-06, |
|
"loss": 4.3472, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.32746823069403713, |
|
"eval_loss": 4.4470648765563965, |
|
"eval_runtime": 3.1788, |
|
"eval_samples_per_second": 0.315, |
|
"eval_steps_per_second": 0.315, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 7e-06, |
|
"loss": 4.3241, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.32746823069403713, |
|
"eval_loss": 4.433444499969482, |
|
"eval_runtime": 3.1305, |
|
"eval_samples_per_second": 0.319, |
|
"eval_steps_per_second": 0.319, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"learning_rate": 6.95e-06, |
|
"loss": 4.2919, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.3304007820136852, |
|
"eval_loss": 4.42958927154541, |
|
"eval_runtime": 3.1351, |
|
"eval_samples_per_second": 0.319, |
|
"eval_steps_per_second": 0.319, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"learning_rate": 6.9e-06, |
|
"loss": 4.2678, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.3343108504398827, |
|
"eval_loss": 4.428121566772461, |
|
"eval_runtime": 3.0787, |
|
"eval_samples_per_second": 0.325, |
|
"eval_steps_per_second": 0.325, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"learning_rate": 6.850000000000001e-06, |
|
"loss": 4.2515, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.33724340175953077, |
|
"eval_loss": 4.412038803100586, |
|
"eval_runtime": 3.3246, |
|
"eval_samples_per_second": 0.301, |
|
"eval_steps_per_second": 0.301, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 4.2244, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.3343108504398827, |
|
"eval_loss": 4.403759002685547, |
|
"eval_runtime": 3.1858, |
|
"eval_samples_per_second": 0.314, |
|
"eval_steps_per_second": 0.314, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 6.750000000000001e-06, |
|
"loss": 4.2129, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.3391984359726295, |
|
"eval_loss": 4.382594585418701, |
|
"eval_runtime": 3.1111, |
|
"eval_samples_per_second": 0.321, |
|
"eval_steps_per_second": 0.321, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"learning_rate": 6.700000000000001e-06, |
|
"loss": 4.1882, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.33724340175953077, |
|
"eval_loss": 4.383427143096924, |
|
"eval_runtime": 3.2114, |
|
"eval_samples_per_second": 0.311, |
|
"eval_steps_per_second": 0.311, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"learning_rate": 6.650000000000001e-06, |
|
"loss": 4.1503, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.33724340175953077, |
|
"eval_loss": 4.37380838394165, |
|
"eval_runtime": 3.1592, |
|
"eval_samples_per_second": 0.317, |
|
"eval_steps_per_second": 0.317, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 4.1398, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.33724340175953077, |
|
"eval_loss": 4.359556674957275, |
|
"eval_runtime": 3.2732, |
|
"eval_samples_per_second": 0.306, |
|
"eval_steps_per_second": 0.306, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"learning_rate": 6.550000000000001e-06, |
|
"loss": 4.115, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.3411534701857282, |
|
"eval_loss": 4.337615966796875, |
|
"eval_runtime": 3.5266, |
|
"eval_samples_per_second": 0.284, |
|
"eval_steps_per_second": 0.284, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 4.1052, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.3411534701857282, |
|
"eval_loss": 4.333048343658447, |
|
"eval_runtime": 3.1824, |
|
"eval_samples_per_second": 0.314, |
|
"eval_steps_per_second": 0.314, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"learning_rate": 6.450000000000001e-06, |
|
"loss": 4.0932, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.3411534701857282, |
|
"eval_loss": 4.329537391662598, |
|
"eval_runtime": 3.1403, |
|
"eval_samples_per_second": 0.318, |
|
"eval_steps_per_second": 0.318, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 4.0573, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.3411534701857282, |
|
"eval_loss": 4.311087131500244, |
|
"eval_runtime": 3.3734, |
|
"eval_samples_per_second": 0.296, |
|
"eval_steps_per_second": 0.296, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"learning_rate": 6.35e-06, |
|
"loss": 4.0449, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.34408602150537637, |
|
"eval_loss": 4.304787635803223, |
|
"eval_runtime": 3.3551, |
|
"eval_samples_per_second": 0.298, |
|
"eval_steps_per_second": 0.298, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"learning_rate": 6.300000000000001e-06, |
|
"loss": 4.0165, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.3460410557184751, |
|
"eval_loss": 4.293551445007324, |
|
"eval_runtime": 3.1253, |
|
"eval_samples_per_second": 0.32, |
|
"eval_steps_per_second": 0.32, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 6.25e-06, |
|
"loss": 3.9936, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.3509286412512219, |
|
"eval_loss": 4.281482696533203, |
|
"eval_runtime": 3.2326, |
|
"eval_samples_per_second": 0.309, |
|
"eval_steps_per_second": 0.309, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"learning_rate": 6.200000000000001e-06, |
|
"loss": 3.967, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.35386119257087, |
|
"eval_loss": 4.268586158752441, |
|
"eval_runtime": 3.1926, |
|
"eval_samples_per_second": 0.313, |
|
"eval_steps_per_second": 0.313, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"learning_rate": 6.15e-06, |
|
"loss": 3.9524, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.3509286412512219, |
|
"eval_loss": 4.269726276397705, |
|
"eval_runtime": 3.0654, |
|
"eval_samples_per_second": 0.326, |
|
"eval_steps_per_second": 0.326, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"learning_rate": 6.1e-06, |
|
"loss": 3.9287, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.35288367546432065, |
|
"eval_loss": 4.254611015319824, |
|
"eval_runtime": 3.2483, |
|
"eval_samples_per_second": 0.308, |
|
"eval_steps_per_second": 0.308, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"learning_rate": 6.0500000000000005e-06, |
|
"loss": 3.9092, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.35386119257087, |
|
"eval_loss": 4.248389720916748, |
|
"eval_runtime": 3.279, |
|
"eval_samples_per_second": 0.305, |
|
"eval_steps_per_second": 0.305, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 6e-06, |
|
"loss": 3.8907, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.35386119257087, |
|
"eval_loss": 4.242007255554199, |
|
"eval_runtime": 3.1903, |
|
"eval_samples_per_second": 0.313, |
|
"eval_steps_per_second": 0.313, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"learning_rate": 5.950000000000001e-06, |
|
"loss": 3.8704, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.35288367546432065, |
|
"eval_loss": 4.241766929626465, |
|
"eval_runtime": 3.0667, |
|
"eval_samples_per_second": 0.326, |
|
"eval_steps_per_second": 0.326, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"learning_rate": 5.9e-06, |
|
"loss": 3.8499, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.3548387096774194, |
|
"eval_loss": 4.226541519165039, |
|
"eval_runtime": 3.1032, |
|
"eval_samples_per_second": 0.322, |
|
"eval_steps_per_second": 0.322, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"learning_rate": 5.85e-06, |
|
"loss": 3.8325, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.3548387096774194, |
|
"eval_loss": 4.208946704864502, |
|
"eval_runtime": 3.0764, |
|
"eval_samples_per_second": 0.325, |
|
"eval_steps_per_second": 0.325, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"learning_rate": 5.8e-06, |
|
"loss": 3.8024, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.3567937438905181, |
|
"eval_loss": 4.205758094787598, |
|
"eval_runtime": 3.0957, |
|
"eval_samples_per_second": 0.323, |
|
"eval_steps_per_second": 0.323, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"learning_rate": 5.75e-06, |
|
"loss": 3.8058, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.35581622678396874, |
|
"eval_loss": 4.203883647918701, |
|
"eval_runtime": 3.1095, |
|
"eval_samples_per_second": 0.322, |
|
"eval_steps_per_second": 0.322, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"learning_rate": 5.7e-06, |
|
"loss": 3.7888, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.35777126099706746, |
|
"eval_loss": 4.190591335296631, |
|
"eval_runtime": 3.1, |
|
"eval_samples_per_second": 0.323, |
|
"eval_steps_per_second": 0.323, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"learning_rate": 5.65e-06, |
|
"loss": 3.7622, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.3616813294232649, |
|
"eval_loss": 4.179159164428711, |
|
"eval_runtime": 3.1107, |
|
"eval_samples_per_second": 0.321, |
|
"eval_steps_per_second": 0.321, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 3.746, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.35777126099706746, |
|
"eval_loss": 4.18191385269165, |
|
"eval_runtime": 3.1176, |
|
"eval_samples_per_second": 0.321, |
|
"eval_steps_per_second": 0.321, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"learning_rate": 5.550000000000001e-06, |
|
"loss": 3.7196, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.3597262952101662, |
|
"eval_loss": 4.178895473480225, |
|
"eval_runtime": 3.3763, |
|
"eval_samples_per_second": 0.296, |
|
"eval_steps_per_second": 0.296, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 3.7046, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.36070381231671556, |
|
"eval_loss": 4.160989284515381, |
|
"eval_runtime": 3.3055, |
|
"eval_samples_per_second": 0.303, |
|
"eval_steps_per_second": 0.303, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"learning_rate": 5.450000000000001e-06, |
|
"loss": 3.7078, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.36070381231671556, |
|
"eval_loss": 4.151462078094482, |
|
"eval_runtime": 3.0842, |
|
"eval_samples_per_second": 0.324, |
|
"eval_steps_per_second": 0.324, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"learning_rate": 5.400000000000001e-06, |
|
"loss": 3.6687, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.36070381231671556, |
|
"eval_loss": 4.17518424987793, |
|
"eval_runtime": 3.097, |
|
"eval_samples_per_second": 0.323, |
|
"eval_steps_per_second": 0.323, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"learning_rate": 5.3500000000000004e-06, |
|
"loss": 3.6559, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.36363636363636365, |
|
"eval_loss": 4.128724575042725, |
|
"eval_runtime": 3.2229, |
|
"eval_samples_per_second": 0.31, |
|
"eval_steps_per_second": 0.31, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"learning_rate": 5.300000000000001e-06, |
|
"loss": 3.6401, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.364613880742913, |
|
"eval_loss": 4.156914234161377, |
|
"eval_runtime": 3.0553, |
|
"eval_samples_per_second": 0.327, |
|
"eval_steps_per_second": 0.327, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"learning_rate": 5.2500000000000006e-06, |
|
"loss": 3.6281, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.3626588465298143, |
|
"eval_loss": 4.123438358306885, |
|
"eval_runtime": 3.127, |
|
"eval_samples_per_second": 0.32, |
|
"eval_steps_per_second": 0.32, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 5.2e-06, |
|
"loss": 3.5978, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.36950146627565983, |
|
"eval_loss": 4.1269941329956055, |
|
"eval_runtime": 3.0893, |
|
"eval_samples_per_second": 0.324, |
|
"eval_steps_per_second": 0.324, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"learning_rate": 5.150000000000001e-06, |
|
"loss": 3.5951, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.364613880742913, |
|
"eval_loss": 4.118751049041748, |
|
"eval_runtime": 3.148, |
|
"eval_samples_per_second": 0.318, |
|
"eval_steps_per_second": 0.318, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"learning_rate": 5.1e-06, |
|
"loss": 3.5679, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.36852394916911047, |
|
"eval_loss": 4.128164291381836, |
|
"eval_runtime": 3.0607, |
|
"eval_samples_per_second": 0.327, |
|
"eval_steps_per_second": 0.327, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"learning_rate": 5.050000000000001e-06, |
|
"loss": 3.5618, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.364613880742913, |
|
"eval_loss": 4.1089253425598145, |
|
"eval_runtime": 3.0857, |
|
"eval_samples_per_second": 0.324, |
|
"eval_steps_per_second": 0.324, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 5e-06, |
|
"loss": 3.5404, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.36950146627565983, |
|
"eval_loss": 4.1090216636657715, |
|
"eval_runtime": 3.1735, |
|
"eval_samples_per_second": 0.315, |
|
"eval_steps_per_second": 0.315, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"learning_rate": 4.95e-06, |
|
"loss": 3.5255, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.364613880742913, |
|
"eval_loss": 4.103869915008545, |
|
"eval_runtime": 3.1307, |
|
"eval_samples_per_second": 0.319, |
|
"eval_steps_per_second": 0.319, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"learning_rate": 4.9000000000000005e-06, |
|
"loss": 3.5111, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.36950146627565983, |
|
"eval_loss": 4.100970268249512, |
|
"eval_runtime": 3.116, |
|
"eval_samples_per_second": 0.321, |
|
"eval_steps_per_second": 0.321, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"learning_rate": 4.85e-06, |
|
"loss": 3.5015, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.3704789833822092, |
|
"eval_loss": 4.088879585266113, |
|
"eval_runtime": 3.3403, |
|
"eval_samples_per_second": 0.299, |
|
"eval_steps_per_second": 0.299, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 3.493, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.3704789833822092, |
|
"eval_loss": 4.082551956176758, |
|
"eval_runtime": 3.3467, |
|
"eval_samples_per_second": 0.299, |
|
"eval_steps_per_second": 0.299, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"learning_rate": 4.75e-06, |
|
"loss": 3.5643, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.375366568914956, |
|
"eval_loss": 4.091529846191406, |
|
"eval_runtime": 3.5246, |
|
"eval_samples_per_second": 0.284, |
|
"eval_steps_per_second": 0.284, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"learning_rate": 4.7e-06, |
|
"loss": 3.4543, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.3724340175953079, |
|
"eval_loss": 4.091198921203613, |
|
"eval_runtime": 3.1421, |
|
"eval_samples_per_second": 0.318, |
|
"eval_steps_per_second": 0.318, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"learning_rate": 4.65e-06, |
|
"loss": 3.4517, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_accuracy": 0.375366568914956, |
|
"eval_loss": 4.084360599517822, |
|
"eval_runtime": 3.5262, |
|
"eval_samples_per_second": 0.284, |
|
"eval_steps_per_second": 0.284, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 3.4387, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.375366568914956, |
|
"eval_loss": 4.066359996795654, |
|
"eval_runtime": 3.1772, |
|
"eval_samples_per_second": 0.315, |
|
"eval_steps_per_second": 0.315, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"learning_rate": 4.5500000000000005e-06, |
|
"loss": 3.4274, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_accuracy": 0.3763440860215054, |
|
"eval_loss": 4.088470458984375, |
|
"eval_runtime": 3.106, |
|
"eval_samples_per_second": 0.322, |
|
"eval_steps_per_second": 0.322, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"learning_rate": 4.5e-06, |
|
"loss": 3.4241, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy": 0.37927663734115347, |
|
"eval_loss": 4.058298587799072, |
|
"eval_runtime": 3.0985, |
|
"eval_samples_per_second": 0.323, |
|
"eval_steps_per_second": 0.323, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"learning_rate": 4.450000000000001e-06, |
|
"loss": 3.4016, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_accuracy": 0.38025415444770283, |
|
"eval_loss": 4.0626630783081055, |
|
"eval_runtime": 3.5144, |
|
"eval_samples_per_second": 0.285, |
|
"eval_steps_per_second": 0.285, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"learning_rate": 4.4e-06, |
|
"loss": 3.383, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.3812316715542522, |
|
"eval_loss": 4.062616348266602, |
|
"eval_runtime": 3.2502, |
|
"eval_samples_per_second": 0.308, |
|
"eval_steps_per_second": 0.308, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"learning_rate": 4.350000000000001e-06, |
|
"loss": 3.3709, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_accuracy": 0.3870967741935484, |
|
"eval_loss": 4.0414323806762695, |
|
"eval_runtime": 3.4468, |
|
"eval_samples_per_second": 0.29, |
|
"eval_steps_per_second": 0.29, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"learning_rate": 4.3e-06, |
|
"loss": 3.3646, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_accuracy": 0.38220918866080156, |
|
"eval_loss": 4.0561604499816895, |
|
"eval_runtime": 3.0962, |
|
"eval_samples_per_second": 0.323, |
|
"eval_steps_per_second": 0.323, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"learning_rate": 4.25e-06, |
|
"loss": 3.3456, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_accuracy": 0.386119257086999, |
|
"eval_loss": 4.036128044128418, |
|
"eval_runtime": 3.3227, |
|
"eval_samples_per_second": 0.301, |
|
"eval_steps_per_second": 0.301, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 3.3369, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.38514173998044965, |
|
"eval_loss": 4.052359104156494, |
|
"eval_runtime": 3.0856, |
|
"eval_samples_per_second": 0.324, |
|
"eval_steps_per_second": 0.324, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"learning_rate": 4.15e-06, |
|
"loss": 3.3136, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_accuracy": 0.3841642228739003, |
|
"eval_loss": 4.042443752288818, |
|
"eval_runtime": 3.358, |
|
"eval_samples_per_second": 0.298, |
|
"eval_steps_per_second": 0.298, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"learning_rate": 4.1e-06, |
|
"loss": 3.307, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_accuracy": 0.386119257086999, |
|
"eval_loss": 4.047730922698975, |
|
"eval_runtime": 3.2939, |
|
"eval_samples_per_second": 0.304, |
|
"eval_steps_per_second": 0.304, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"learning_rate": 4.05e-06, |
|
"loss": 3.2954, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_accuracy": 0.38514173998044965, |
|
"eval_loss": 4.028741359710693, |
|
"eval_runtime": 3.1849, |
|
"eval_samples_per_second": 0.314, |
|
"eval_steps_per_second": 0.314, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 3.2887, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.39002932551319647, |
|
"eval_loss": 4.0391716957092285, |
|
"eval_runtime": 3.373, |
|
"eval_samples_per_second": 0.296, |
|
"eval_steps_per_second": 0.296, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"learning_rate": 3.95e-06, |
|
"loss": 3.2776, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_accuracy": 0.39100684261974583, |
|
"eval_loss": 4.019059658050537, |
|
"eval_runtime": 3.0671, |
|
"eval_samples_per_second": 0.326, |
|
"eval_steps_per_second": 0.326, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"learning_rate": 3.900000000000001e-06, |
|
"loss": 3.2527, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_accuracy": 0.39100684261974583, |
|
"eval_loss": 4.03394889831543, |
|
"eval_runtime": 3.608, |
|
"eval_samples_per_second": 0.277, |
|
"eval_steps_per_second": 0.277, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"learning_rate": 3.85e-06, |
|
"loss": 3.259, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_accuracy": 0.39296187683284456, |
|
"eval_loss": 4.006428241729736, |
|
"eval_runtime": 3.37, |
|
"eval_samples_per_second": 0.297, |
|
"eval_steps_per_second": 0.297, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 3.2559, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.38807429130009774, |
|
"eval_loss": 4.028494834899902, |
|
"eval_runtime": 3.4043, |
|
"eval_samples_per_second": 0.294, |
|
"eval_steps_per_second": 0.294, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 3.2335, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_accuracy": 0.39296187683284456, |
|
"eval_loss": 4.015052795410156, |
|
"eval_runtime": 3.1425, |
|
"eval_samples_per_second": 0.318, |
|
"eval_steps_per_second": 0.318, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"learning_rate": 3.7e-06, |
|
"loss": 3.2318, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_accuracy": 0.39002932551319647, |
|
"eval_loss": 4.027667999267578, |
|
"eval_runtime": 3.3897, |
|
"eval_samples_per_second": 0.295, |
|
"eval_steps_per_second": 0.295, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"learning_rate": 3.65e-06, |
|
"loss": 3.2266, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_accuracy": 0.3978494623655914, |
|
"eval_loss": 3.992929697036743, |
|
"eval_runtime": 3.1289, |
|
"eval_samples_per_second": 0.32, |
|
"eval_steps_per_second": 0.32, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 3.2051, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.3978494623655914, |
|
"eval_loss": 3.9944605827331543, |
|
"eval_runtime": 3.5639, |
|
"eval_samples_per_second": 0.281, |
|
"eval_steps_per_second": 0.281, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"learning_rate": 3.5500000000000003e-06, |
|
"loss": 3.2009, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_accuracy": 0.39296187683284456, |
|
"eval_loss": 4.02908182144165, |
|
"eval_runtime": 3.1278, |
|
"eval_samples_per_second": 0.32, |
|
"eval_steps_per_second": 0.32, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"learning_rate": 3.5e-06, |
|
"loss": 3.1791, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_accuracy": 0.39296187683284456, |
|
"eval_loss": 3.9955568313598633, |
|
"eval_runtime": 3.1553, |
|
"eval_samples_per_second": 0.317, |
|
"eval_steps_per_second": 0.317, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"learning_rate": 3.45e-06, |
|
"loss": 3.1759, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"eval_accuracy": 0.396871945259042, |
|
"eval_loss": 4.001156806945801, |
|
"eval_runtime": 3.1012, |
|
"eval_samples_per_second": 0.322, |
|
"eval_steps_per_second": 0.322, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 3.1622, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_accuracy": 0.3949169110459433, |
|
"eval_loss": 4.010651111602783, |
|
"eval_runtime": 3.2622, |
|
"eval_samples_per_second": 0.307, |
|
"eval_steps_per_second": 0.307, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"learning_rate": 3.3500000000000005e-06, |
|
"loss": 3.1559, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"eval_accuracy": 0.3939393939393939, |
|
"eval_loss": 4.009001731872559, |
|
"eval_runtime": 3.147, |
|
"eval_samples_per_second": 0.318, |
|
"eval_steps_per_second": 0.318, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"learning_rate": 3.3000000000000006e-06, |
|
"loss": 3.1521, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_accuracy": 0.39100684261974583, |
|
"eval_loss": 4.00282621383667, |
|
"eval_runtime": 3.2649, |
|
"eval_samples_per_second": 0.306, |
|
"eval_steps_per_second": 0.306, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"loss": 3.1353, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"eval_accuracy": 0.3939393939393939, |
|
"eval_loss": 4.0033183097839355, |
|
"eval_runtime": 3.0829, |
|
"eval_samples_per_second": 0.324, |
|
"eval_steps_per_second": 0.324, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 3.1427, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_accuracy": 0.3939393939393939, |
|
"eval_loss": 3.999516248703003, |
|
"eval_runtime": 3.3725, |
|
"eval_samples_per_second": 0.297, |
|
"eval_steps_per_second": 0.297, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"learning_rate": 3.1500000000000003e-06, |
|
"loss": 3.1276, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"eval_accuracy": 0.3919843597262952, |
|
"eval_loss": 3.9962964057922363, |
|
"eval_runtime": 3.3881, |
|
"eval_samples_per_second": 0.295, |
|
"eval_steps_per_second": 0.295, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"learning_rate": 3.1000000000000004e-06, |
|
"loss": 3.1228, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_accuracy": 0.3978494623655914, |
|
"eval_loss": 3.9996395111083984, |
|
"eval_runtime": 3.1889, |
|
"eval_samples_per_second": 0.314, |
|
"eval_steps_per_second": 0.314, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"learning_rate": 3.05e-06, |
|
"loss": 3.1039, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"eval_accuracy": 0.39882697947214074, |
|
"eval_loss": 3.992779493331909, |
|
"eval_runtime": 3.1905, |
|
"eval_samples_per_second": 0.313, |
|
"eval_steps_per_second": 0.313, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"learning_rate": 3e-06, |
|
"loss": 3.097, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_accuracy": 0.396871945259042, |
|
"eval_loss": 3.9969444274902344, |
|
"eval_runtime": 3.2194, |
|
"eval_samples_per_second": 0.311, |
|
"eval_steps_per_second": 0.311, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"learning_rate": 2.95e-06, |
|
"loss": 3.083, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"eval_accuracy": 0.3949169110459433, |
|
"eval_loss": 3.991848945617676, |
|
"eval_runtime": 3.2252, |
|
"eval_samples_per_second": 0.31, |
|
"eval_steps_per_second": 0.31, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"learning_rate": 2.9e-06, |
|
"loss": 3.0844, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_accuracy": 0.396871945259042, |
|
"eval_loss": 3.9899985790252686, |
|
"eval_runtime": 3.3224, |
|
"eval_samples_per_second": 0.301, |
|
"eval_steps_per_second": 0.301, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"learning_rate": 2.85e-06, |
|
"loss": 3.077, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"eval_accuracy": 0.39589442815249265, |
|
"eval_loss": 3.981245756149292, |
|
"eval_runtime": 3.3063, |
|
"eval_samples_per_second": 0.302, |
|
"eval_steps_per_second": 0.302, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 3.0601, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.39589442815249265, |
|
"eval_loss": 3.9948182106018066, |
|
"eval_runtime": 3.5418, |
|
"eval_samples_per_second": 0.282, |
|
"eval_steps_per_second": 0.282, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"learning_rate": 2.7500000000000004e-06, |
|
"loss": 3.0669, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"eval_accuracy": 0.39589442815249265, |
|
"eval_loss": 3.9938085079193115, |
|
"eval_runtime": 3.3604, |
|
"eval_samples_per_second": 0.298, |
|
"eval_steps_per_second": 0.298, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"learning_rate": 2.7000000000000004e-06, |
|
"loss": 3.0515, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"eval_accuracy": 0.3978494623655914, |
|
"eval_loss": 3.9895379543304443, |
|
"eval_runtime": 3.1997, |
|
"eval_samples_per_second": 0.313, |
|
"eval_steps_per_second": 0.313, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"learning_rate": 2.6500000000000005e-06, |
|
"loss": 3.0405, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"eval_accuracy": 0.39882697947214074, |
|
"eval_loss": 3.9802987575531006, |
|
"eval_runtime": 3.2702, |
|
"eval_samples_per_second": 0.306, |
|
"eval_steps_per_second": 0.306, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"learning_rate": 2.6e-06, |
|
"loss": 3.029, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_accuracy": 0.396871945259042, |
|
"eval_loss": 3.985629081726074, |
|
"eval_runtime": 3.064, |
|
"eval_samples_per_second": 0.326, |
|
"eval_steps_per_second": 0.326, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"learning_rate": 2.55e-06, |
|
"loss": 3.0342, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"eval_accuracy": 0.396871945259042, |
|
"eval_loss": 3.982790231704712, |
|
"eval_runtime": 3.2872, |
|
"eval_samples_per_second": 0.304, |
|
"eval_steps_per_second": 0.304, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 2.5e-06, |
|
"loss": 3.0137, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_accuracy": 0.3978494623655914, |
|
"eval_loss": 3.997673273086548, |
|
"eval_runtime": 3.3166, |
|
"eval_samples_per_second": 0.302, |
|
"eval_steps_per_second": 0.302, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"learning_rate": 2.4500000000000003e-06, |
|
"loss": 3.0277, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"eval_accuracy": 0.3998044965786901, |
|
"eval_loss": 3.9793360233306885, |
|
"eval_runtime": 3.1051, |
|
"eval_samples_per_second": 0.322, |
|
"eval_steps_per_second": 0.322, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 3.0005, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_accuracy": 0.3998044965786901, |
|
"eval_loss": 3.9779205322265625, |
|
"eval_runtime": 3.1637, |
|
"eval_samples_per_second": 0.316, |
|
"eval_steps_per_second": 0.316, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"learning_rate": 2.35e-06, |
|
"loss": 3.0027, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"eval_accuracy": 0.39882697947214074, |
|
"eval_loss": 3.989084482192993, |
|
"eval_runtime": 3.2887, |
|
"eval_samples_per_second": 0.304, |
|
"eval_steps_per_second": 0.304, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"learning_rate": 2.3000000000000004e-06, |
|
"loss": 3.0034, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"eval_accuracy": 0.40078201368523947, |
|
"eval_loss": 3.9687039852142334, |
|
"eval_runtime": 3.2813, |
|
"eval_samples_per_second": 0.305, |
|
"eval_steps_per_second": 0.305, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"learning_rate": 2.25e-06, |
|
"loss": 2.9853, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"eval_accuracy": 0.3978494623655914, |
|
"eval_loss": 3.988722801208496, |
|
"eval_runtime": 3.4968, |
|
"eval_samples_per_second": 0.286, |
|
"eval_steps_per_second": 0.286, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"learning_rate": 2.2e-06, |
|
"loss": 2.9947, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_accuracy": 0.4027370478983382, |
|
"eval_loss": 3.9860475063323975, |
|
"eval_runtime": 3.29, |
|
"eval_samples_per_second": 0.304, |
|
"eval_steps_per_second": 0.304, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"learning_rate": 2.15e-06, |
|
"loss": 2.9768, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"eval_accuracy": 0.4027370478983382, |
|
"eval_loss": 3.989997148513794, |
|
"eval_runtime": 3.3737, |
|
"eval_samples_per_second": 0.296, |
|
"eval_steps_per_second": 0.296, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"learning_rate": 2.1000000000000002e-06, |
|
"loss": 2.9752, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"eval_accuracy": 0.39882697947214074, |
|
"eval_loss": 3.9992799758911133, |
|
"eval_runtime": 3.3931, |
|
"eval_samples_per_second": 0.295, |
|
"eval_steps_per_second": 0.295, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"learning_rate": 2.05e-06, |
|
"loss": 2.9773, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"eval_accuracy": 0.40175953079178883, |
|
"eval_loss": 3.9693987369537354, |
|
"eval_runtime": 3.1141, |
|
"eval_samples_per_second": 0.321, |
|
"eval_steps_per_second": 0.321, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.9662, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_accuracy": 0.3998044965786901, |
|
"eval_loss": 3.9923715591430664, |
|
"eval_runtime": 3.2181, |
|
"eval_samples_per_second": 0.311, |
|
"eval_steps_per_second": 0.311, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"learning_rate": 1.9500000000000004e-06, |
|
"loss": 2.9661, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"eval_accuracy": 0.39882697947214074, |
|
"eval_loss": 4.008890151977539, |
|
"eval_runtime": 3.1472, |
|
"eval_samples_per_second": 0.318, |
|
"eval_steps_per_second": 0.318, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"learning_rate": 1.9000000000000002e-06, |
|
"loss": 2.9488, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"eval_accuracy": 0.39882697947214074, |
|
"eval_loss": 3.974891424179077, |
|
"eval_runtime": 3.2045, |
|
"eval_samples_per_second": 0.312, |
|
"eval_steps_per_second": 0.312, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"learning_rate": 1.85e-06, |
|
"loss": 2.9487, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"eval_accuracy": 0.3978494623655914, |
|
"eval_loss": 3.9931938648223877, |
|
"eval_runtime": 2.9879, |
|
"eval_samples_per_second": 0.335, |
|
"eval_steps_per_second": 0.335, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"loss": 2.9482, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_accuracy": 0.39882697947214074, |
|
"eval_loss": 3.998748540878296, |
|
"eval_runtime": 3.0761, |
|
"eval_samples_per_second": 0.325, |
|
"eval_steps_per_second": 0.325, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"learning_rate": 1.75e-06, |
|
"loss": 2.9624, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"eval_accuracy": 0.3978494623655914, |
|
"eval_loss": 3.962663412094116, |
|
"eval_runtime": 3.2497, |
|
"eval_samples_per_second": 0.308, |
|
"eval_steps_per_second": 0.308, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"learning_rate": 1.7000000000000002e-06, |
|
"loss": 2.9524, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"eval_accuracy": 0.40078201368523947, |
|
"eval_loss": 3.979146957397461, |
|
"eval_runtime": 3.2339, |
|
"eval_samples_per_second": 0.309, |
|
"eval_steps_per_second": 0.309, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"learning_rate": 1.6500000000000003e-06, |
|
"loss": 2.9357, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"eval_accuracy": 0.3998044965786901, |
|
"eval_loss": 3.996922731399536, |
|
"eval_runtime": 3.0918, |
|
"eval_samples_per_second": 0.323, |
|
"eval_steps_per_second": 0.323, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 2.9323, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_accuracy": 0.40078201368523947, |
|
"eval_loss": 3.9853515625, |
|
"eval_runtime": 3.2542, |
|
"eval_samples_per_second": 0.307, |
|
"eval_steps_per_second": 0.307, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 169.0, |
|
"learning_rate": 1.5500000000000002e-06, |
|
"loss": 2.9334, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 169.0, |
|
"eval_accuracy": 0.40078201368523947, |
|
"eval_loss": 3.977756977081299, |
|
"eval_runtime": 3.2318, |
|
"eval_samples_per_second": 0.309, |
|
"eval_steps_per_second": 0.309, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"learning_rate": 1.5e-06, |
|
"loss": 2.9228, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"eval_accuracy": 0.4027370478983382, |
|
"eval_loss": 3.9858651161193848, |
|
"eval_runtime": 3.1222, |
|
"eval_samples_per_second": 0.32, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 171.0, |
|
"learning_rate": 1.45e-06, |
|
"loss": 2.9305, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 171.0, |
|
"eval_accuracy": 0.40371456500488756, |
|
"eval_loss": 3.9820897579193115, |
|
"eval_runtime": 3.1725, |
|
"eval_samples_per_second": 0.315, |
|
"eval_steps_per_second": 0.315, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 2.9239, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_accuracy": 0.4066471163245357, |
|
"eval_loss": 3.987579107284546, |
|
"eval_runtime": 3.1039, |
|
"eval_samples_per_second": 0.322, |
|
"eval_steps_per_second": 0.322, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 173.0, |
|
"learning_rate": 1.3500000000000002e-06, |
|
"loss": 2.9181, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 173.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.9791972637176514, |
|
"eval_runtime": 3.503, |
|
"eval_samples_per_second": 0.285, |
|
"eval_steps_per_second": 0.285, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"learning_rate": 1.3e-06, |
|
"loss": 2.9162, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"eval_accuracy": 0.40371456500488756, |
|
"eval_loss": 3.9731061458587646, |
|
"eval_runtime": 3.0863, |
|
"eval_samples_per_second": 0.324, |
|
"eval_steps_per_second": 0.324, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"learning_rate": 1.25e-06, |
|
"loss": 2.9171, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"eval_accuracy": 0.4066471163245357, |
|
"eval_loss": 3.9795916080474854, |
|
"eval_runtime": 3.0684, |
|
"eval_samples_per_second": 0.326, |
|
"eval_steps_per_second": 0.326, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 2.9132, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_accuracy": 0.4046920821114369, |
|
"eval_loss": 3.9914140701293945, |
|
"eval_runtime": 3.5253, |
|
"eval_samples_per_second": 0.284, |
|
"eval_steps_per_second": 0.284, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"learning_rate": 1.1500000000000002e-06, |
|
"loss": 2.9168, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"eval_accuracy": 0.4046920821114369, |
|
"eval_loss": 3.9826488494873047, |
|
"eval_runtime": 3.0836, |
|
"eval_samples_per_second": 0.324, |
|
"eval_steps_per_second": 0.324, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"learning_rate": 1.1e-06, |
|
"loss": 2.8974, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.9753177165985107, |
|
"eval_runtime": 3.165, |
|
"eval_samples_per_second": 0.316, |
|
"eval_steps_per_second": 0.316, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 179.0, |
|
"learning_rate": 1.0500000000000001e-06, |
|
"loss": 2.8954, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 179.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.976564884185791, |
|
"eval_runtime": 3.123, |
|
"eval_samples_per_second": 0.32, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 2.9003, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_accuracy": 0.4027370478983382, |
|
"eval_loss": 3.986520290374756, |
|
"eval_runtime": 3.106, |
|
"eval_samples_per_second": 0.322, |
|
"eval_steps_per_second": 0.322, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 181.0, |
|
"learning_rate": 9.500000000000001e-07, |
|
"loss": 2.9012, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 181.0, |
|
"eval_accuracy": 0.4046920821114369, |
|
"eval_loss": 3.983490228652954, |
|
"eval_runtime": 3.1013, |
|
"eval_samples_per_second": 0.322, |
|
"eval_steps_per_second": 0.322, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 182.0, |
|
"learning_rate": 9.000000000000001e-07, |
|
"loss": 2.8994, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 182.0, |
|
"eval_accuracy": 0.4046920821114369, |
|
"eval_loss": 3.980245590209961, |
|
"eval_runtime": 3.2714, |
|
"eval_samples_per_second": 0.306, |
|
"eval_steps_per_second": 0.306, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 183.0, |
|
"learning_rate": 8.500000000000001e-07, |
|
"loss": 2.8918, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 183.0, |
|
"eval_accuracy": 0.4066471163245357, |
|
"eval_loss": 3.9810588359832764, |
|
"eval_runtime": 3.2463, |
|
"eval_samples_per_second": 0.308, |
|
"eval_steps_per_second": 0.308, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 2.8893, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.981043815612793, |
|
"eval_runtime": 3.5146, |
|
"eval_samples_per_second": 0.285, |
|
"eval_steps_per_second": 0.285, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 185.0, |
|
"learning_rate": 7.5e-07, |
|
"loss": 2.8865, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 185.0, |
|
"eval_accuracy": 0.40762463343108507, |
|
"eval_loss": 3.9851670265197754, |
|
"eval_runtime": 3.121, |
|
"eval_samples_per_second": 0.32, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 186.0, |
|
"learning_rate": 7.000000000000001e-07, |
|
"loss": 2.8784, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 186.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.9805047512054443, |
|
"eval_runtime": 3.1275, |
|
"eval_samples_per_second": 0.32, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 187.0, |
|
"learning_rate": 6.5e-07, |
|
"loss": 2.8875, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 187.0, |
|
"eval_accuracy": 0.4066471163245357, |
|
"eval_loss": 3.978147029876709, |
|
"eval_runtime": 3.3509, |
|
"eval_samples_per_second": 0.298, |
|
"eval_steps_per_second": 0.298, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 2.8948, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.9830515384674072, |
|
"eval_runtime": 3.3453, |
|
"eval_samples_per_second": 0.299, |
|
"eval_steps_per_second": 0.299, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 189.0, |
|
"learning_rate": 5.5e-07, |
|
"loss": 2.8927, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 189.0, |
|
"eval_accuracy": 0.4066471163245357, |
|
"eval_loss": 3.9836947917938232, |
|
"eval_runtime": 3.1414, |
|
"eval_samples_per_second": 0.318, |
|
"eval_steps_per_second": 0.318, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 2.8739, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.9822254180908203, |
|
"eval_runtime": 3.3757, |
|
"eval_samples_per_second": 0.296, |
|
"eval_steps_per_second": 0.296, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 191.0, |
|
"learning_rate": 4.5000000000000003e-07, |
|
"loss": 2.8919, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 191.0, |
|
"eval_accuracy": 0.4066471163245357, |
|
"eval_loss": 3.9791526794433594, |
|
"eval_runtime": 3.5323, |
|
"eval_samples_per_second": 0.283, |
|
"eval_steps_per_second": 0.283, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 2.8713, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.9800055027008057, |
|
"eval_runtime": 3.4653, |
|
"eval_samples_per_second": 0.289, |
|
"eval_steps_per_second": 0.289, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 193.0, |
|
"learning_rate": 3.5000000000000004e-07, |
|
"loss": 2.8798, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 193.0, |
|
"eval_accuracy": 0.4046920821114369, |
|
"eval_loss": 3.985433578491211, |
|
"eval_runtime": 3.0972, |
|
"eval_samples_per_second": 0.323, |
|
"eval_steps_per_second": 0.323, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 194.0, |
|
"learning_rate": 3.0000000000000004e-07, |
|
"loss": 2.8835, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 194.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.984498977661133, |
|
"eval_runtime": 4.5165, |
|
"eval_samples_per_second": 0.221, |
|
"eval_steps_per_second": 0.221, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 195.0, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"loss": 2.878, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 195.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.981985330581665, |
|
"eval_runtime": 3.1036, |
|
"eval_samples_per_second": 0.322, |
|
"eval_steps_per_second": 0.322, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 2.8931, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.9816386699676514, |
|
"eval_runtime": 3.2982, |
|
"eval_samples_per_second": 0.303, |
|
"eval_steps_per_second": 0.303, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 197.0, |
|
"learning_rate": 1.5000000000000002e-07, |
|
"loss": 2.8662, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 197.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.982978105545044, |
|
"eval_runtime": 4.8102, |
|
"eval_samples_per_second": 0.208, |
|
"eval_steps_per_second": 0.208, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 198.0, |
|
"learning_rate": 1.0000000000000001e-07, |
|
"loss": 2.8734, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 198.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.984077215194702, |
|
"eval_runtime": 3.1139, |
|
"eval_samples_per_second": 0.321, |
|
"eval_steps_per_second": 0.321, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 199.0, |
|
"learning_rate": 5.0000000000000004e-08, |
|
"loss": 2.8825, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 199.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.98300838470459, |
|
"eval_runtime": 3.644, |
|
"eval_samples_per_second": 0.274, |
|
"eval_steps_per_second": 0.274, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.8825, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_accuracy": 0.4056695992179863, |
|
"eval_loss": 3.982684373855591, |
|
"eval_runtime": 3.4062, |
|
"eval_samples_per_second": 0.294, |
|
"eval_steps_per_second": 0.294, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"step": 1200, |
|
"total_flos": 1.044564148224e+16, |
|
"train_loss": 3.9290491278966266, |
|
"train_runtime": 32687.0411, |
|
"train_samples_per_second": 0.073, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 1200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 200, |
|
"save_steps": 500, |
|
"total_flos": 1.044564148224e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|