|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 3338128, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9925557078698005e-05, |
|
"loss": 5.3279, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_accuracy": 0.31327971235572855, |
|
"eval_loss": 3.994140625, |
|
"eval_runtime": 39.5933, |
|
"eval_samples_per_second": 90.116, |
|
"eval_steps_per_second": 11.265, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.985067978220128e-05, |
|
"loss": 3.5754, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_accuracy": 0.3823957607318666, |
|
"eval_loss": 3.310546875, |
|
"eval_runtime": 39.5685, |
|
"eval_samples_per_second": 90.173, |
|
"eval_steps_per_second": 11.272, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9775862399524525e-05, |
|
"loss": 3.6102, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_accuracy": 0.39768097216925513, |
|
"eval_loss": 3.166015625, |
|
"eval_runtime": 39.5825, |
|
"eval_samples_per_second": 90.141, |
|
"eval_steps_per_second": 11.268, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.970101505993779e-05, |
|
"loss": 3.0639, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.41336891627105715, |
|
"eval_loss": 3.021484375, |
|
"eval_runtime": 39.5961, |
|
"eval_samples_per_second": 90.11, |
|
"eval_steps_per_second": 11.264, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.962618269880604e-05, |
|
"loss": 2.9477, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.425242680676284, |
|
"eval_loss": 2.919921875, |
|
"eval_runtime": 39.6071, |
|
"eval_samples_per_second": 90.085, |
|
"eval_steps_per_second": 11.261, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9551335359219304e-05, |
|
"loss": 2.8589, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.4315466797294513, |
|
"eval_loss": 2.8671875, |
|
"eval_runtime": 39.6079, |
|
"eval_samples_per_second": 90.083, |
|
"eval_steps_per_second": 11.26, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.947647304117757e-05, |
|
"loss": 2.8063, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.43875449855125825, |
|
"eval_loss": 2.802734375, |
|
"eval_runtime": 39.6873, |
|
"eval_samples_per_second": 89.903, |
|
"eval_steps_per_second": 11.238, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.940162570159084e-05, |
|
"loss": 2.7646, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.44185690990623727, |
|
"eval_loss": 2.771484375, |
|
"eval_runtime": 39.6738, |
|
"eval_samples_per_second": 89.933, |
|
"eval_steps_per_second": 11.242, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.932679334045909e-05, |
|
"loss": 2.7306, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.4467244957896629, |
|
"eval_loss": 2.736328125, |
|
"eval_runtime": 39.7305, |
|
"eval_samples_per_second": 89.805, |
|
"eval_steps_per_second": 11.226, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9251960979327336e-05, |
|
"loss": 2.7106, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.4492548623804952, |
|
"eval_loss": 2.712890625, |
|
"eval_runtime": 39.6605, |
|
"eval_samples_per_second": 89.964, |
|
"eval_steps_per_second": 11.245, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.917712861819559e-05, |
|
"loss": 2.6829, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.45224302916332426, |
|
"eval_loss": 2.689453125, |
|
"eval_runtime": 39.6345, |
|
"eval_samples_per_second": 90.023, |
|
"eval_steps_per_second": 11.253, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9102311235518835e-05, |
|
"loss": 2.6703, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.45370464737056665, |
|
"eval_loss": 2.67578125, |
|
"eval_runtime": 39.6085, |
|
"eval_samples_per_second": 90.082, |
|
"eval_steps_per_second": 11.26, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.90274489174771e-05, |
|
"loss": 2.6522, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.4559714569388372, |
|
"eval_loss": 2.66015625, |
|
"eval_runtime": 39.7158, |
|
"eval_samples_per_second": 89.838, |
|
"eval_steps_per_second": 11.23, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.895260157789037e-05, |
|
"loss": 2.6377, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.4573840349100728, |
|
"eval_loss": 2.6484375, |
|
"eval_runtime": 40.0228, |
|
"eval_samples_per_second": 89.149, |
|
"eval_steps_per_second": 11.144, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8877769216758615e-05, |
|
"loss": 2.6241, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.4586505880444836, |
|
"eval_loss": 2.634765625, |
|
"eval_runtime": 39.747, |
|
"eval_samples_per_second": 89.768, |
|
"eval_steps_per_second": 11.221, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.880295183408186e-05, |
|
"loss": 2.6159, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.46040042037619067, |
|
"eval_loss": 2.625, |
|
"eval_runtime": 39.8297, |
|
"eval_samples_per_second": 89.581, |
|
"eval_steps_per_second": 11.198, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.872807453758514e-05, |
|
"loss": 2.5959, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.4612864322379005, |
|
"eval_loss": 2.61328125, |
|
"eval_runtime": 39.842, |
|
"eval_samples_per_second": 89.554, |
|
"eval_steps_per_second": 11.194, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.86532271979984e-05, |
|
"loss": 2.5877, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.4624083303744811, |
|
"eval_loss": 2.603515625, |
|
"eval_runtime": 39.7194, |
|
"eval_samples_per_second": 89.83, |
|
"eval_steps_per_second": 11.229, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.857840981532165e-05, |
|
"loss": 2.5832, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.46323187757803697, |
|
"eval_loss": 2.599609375, |
|
"eval_runtime": 40.0969, |
|
"eval_samples_per_second": 88.984, |
|
"eval_steps_per_second": 11.123, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8503562475734907e-05, |
|
"loss": 2.5726, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.46476143979941176, |
|
"eval_loss": 2.5859375, |
|
"eval_runtime": 39.6873, |
|
"eval_samples_per_second": 89.903, |
|
"eval_steps_per_second": 11.238, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8428775049968125e-05, |
|
"loss": 2.5723, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.46553211121777593, |
|
"eval_loss": 2.580078125, |
|
"eval_runtime": 39.7946, |
|
"eval_samples_per_second": 89.66, |
|
"eval_steps_per_second": 11.208, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.83539127319264e-05, |
|
"loss": 2.5584, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.46414501225183996, |
|
"eval_loss": 2.59375, |
|
"eval_runtime": 39.8172, |
|
"eval_samples_per_second": 89.61, |
|
"eval_steps_per_second": 11.201, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.827905041388467e-05, |
|
"loss": 2.5541, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.4673400247228542, |
|
"eval_loss": 2.56640625, |
|
"eval_runtime": 39.875, |
|
"eval_samples_per_second": 89.48, |
|
"eval_steps_per_second": 11.185, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.820423303120791e-05, |
|
"loss": 2.541, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.46835206177206756, |
|
"eval_loss": 2.55859375, |
|
"eval_runtime": 39.7895, |
|
"eval_samples_per_second": 89.672, |
|
"eval_steps_per_second": 11.209, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.812941564853116e-05, |
|
"loss": 2.5359, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.4673909827334534, |
|
"eval_loss": 2.564453125, |
|
"eval_runtime": 39.8856, |
|
"eval_samples_per_second": 89.456, |
|
"eval_steps_per_second": 11.182, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.805458328739941e-05, |
|
"loss": 2.5298, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.4699054591919484, |
|
"eval_loss": 2.544921875, |
|
"eval_runtime": 39.8462, |
|
"eval_samples_per_second": 89.544, |
|
"eval_steps_per_second": 11.193, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.797972096935768e-05, |
|
"loss": 2.5258, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.47030819185636197, |
|
"eval_loss": 2.541015625, |
|
"eval_runtime": 39.8831, |
|
"eval_samples_per_second": 89.461, |
|
"eval_steps_per_second": 11.183, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.790488860822593e-05, |
|
"loss": 2.5207, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.47090352388341683, |
|
"eval_loss": 2.537109375, |
|
"eval_runtime": 40.0239, |
|
"eval_samples_per_second": 89.147, |
|
"eval_steps_per_second": 11.143, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.783005624709418e-05, |
|
"loss": 2.5167, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.47193419074295684, |
|
"eval_loss": 2.53125, |
|
"eval_runtime": 39.9144, |
|
"eval_samples_per_second": 89.391, |
|
"eval_steps_per_second": 11.174, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.7755223885962435e-05, |
|
"loss": 2.5101, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.4701947691876088, |
|
"eval_loss": 2.544921875, |
|
"eval_runtime": 40.0269, |
|
"eval_samples_per_second": 89.14, |
|
"eval_steps_per_second": 11.143, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.768039152483069e-05, |
|
"loss": 2.5058, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.4730330755844281, |
|
"eval_loss": 2.521484375, |
|
"eval_runtime": 39.9333, |
|
"eval_samples_per_second": 89.349, |
|
"eval_steps_per_second": 11.169, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.760554418524395e-05, |
|
"loss": 2.5021, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.473403480048569, |
|
"eval_loss": 2.51953125, |
|
"eval_runtime": 40.0331, |
|
"eval_samples_per_second": 89.126, |
|
"eval_steps_per_second": 11.141, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.753084663020711e-05, |
|
"loss": 2.8135, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.4317466762226635, |
|
"eval_loss": 2.83203125, |
|
"eval_runtime": 40.0127, |
|
"eval_samples_per_second": 89.172, |
|
"eval_steps_per_second": 11.146, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7455954355255405e-05, |
|
"loss": 2.7932, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.4729974597705684, |
|
"eval_loss": 2.521484375, |
|
"eval_runtime": 39.9612, |
|
"eval_samples_per_second": 89.287, |
|
"eval_steps_per_second": 11.161, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7381077058758686e-05, |
|
"loss": 2.4914, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.4751818050313638, |
|
"eval_loss": 2.505859375, |
|
"eval_runtime": 40.3867, |
|
"eval_samples_per_second": 88.346, |
|
"eval_steps_per_second": 11.043, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.730621474071695e-05, |
|
"loss": 2.487, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.4753875548483533, |
|
"eval_loss": 2.50390625, |
|
"eval_runtime": 40.0145, |
|
"eval_samples_per_second": 89.168, |
|
"eval_steps_per_second": 11.146, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.723138237958521e-05, |
|
"loss": 2.4829, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.47510947753244875, |
|
"eval_loss": 2.50390625, |
|
"eval_runtime": 40.0338, |
|
"eval_samples_per_second": 89.125, |
|
"eval_steps_per_second": 11.141, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.7156505083088486e-05, |
|
"loss": 2.4778, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.47625877244892145, |
|
"eval_loss": 2.49609375, |
|
"eval_runtime": 40.0473, |
|
"eval_samples_per_second": 89.095, |
|
"eval_steps_per_second": 11.137, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.708170267886672e-05, |
|
"loss": 2.4779, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.4770064305721763, |
|
"eval_loss": 2.4921875, |
|
"eval_runtime": 40.1061, |
|
"eval_samples_per_second": 88.964, |
|
"eval_steps_per_second": 11.121, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.700687031773498e-05, |
|
"loss": 2.4685, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.4765894515822188, |
|
"eval_loss": 2.494140625, |
|
"eval_runtime": 40.1514, |
|
"eval_samples_per_second": 88.864, |
|
"eval_steps_per_second": 11.108, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.693202297814823e-05, |
|
"loss": 2.4661, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.47763491270290054, |
|
"eval_loss": 2.484375, |
|
"eval_runtime": 40.1854, |
|
"eval_samples_per_second": 88.788, |
|
"eval_steps_per_second": 11.099, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.6857190617016483e-05, |
|
"loss": 2.4579, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.47826366880142374, |
|
"eval_loss": 2.48046875, |
|
"eval_runtime": 40.1794, |
|
"eval_samples_per_second": 88.802, |
|
"eval_steps_per_second": 11.1, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.6782358255884736e-05, |
|
"loss": 2.4589, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.4787937964923355, |
|
"eval_loss": 2.478515625, |
|
"eval_runtime": 40.1613, |
|
"eval_samples_per_second": 88.842, |
|
"eval_steps_per_second": 11.105, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6707540873207975e-05, |
|
"loss": 2.4571, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.47927871949642525, |
|
"eval_loss": 2.474609375, |
|
"eval_runtime": 40.1097, |
|
"eval_samples_per_second": 88.956, |
|
"eval_steps_per_second": 11.119, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.663269353362124e-05, |
|
"loss": 2.4504, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.47965542521994137, |
|
"eval_loss": 2.47265625, |
|
"eval_runtime": 40.1284, |
|
"eval_samples_per_second": 88.915, |
|
"eval_steps_per_second": 11.114, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.65578461940345e-05, |
|
"loss": 2.4538, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.47995733773435206, |
|
"eval_loss": 2.46875, |
|
"eval_runtime": 40.2242, |
|
"eval_samples_per_second": 88.703, |
|
"eval_steps_per_second": 11.088, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6483013832902755e-05, |
|
"loss": 2.4481, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.4806011620618159, |
|
"eval_loss": 2.466796875, |
|
"eval_runtime": 40.1909, |
|
"eval_samples_per_second": 88.776, |
|
"eval_steps_per_second": 11.097, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.640815151486103e-05, |
|
"loss": 2.4454, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.4809529367156302, |
|
"eval_loss": 2.4609375, |
|
"eval_runtime": 40.319, |
|
"eval_samples_per_second": 88.494, |
|
"eval_steps_per_second": 11.062, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6333319153729274e-05, |
|
"loss": 2.44, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.4811408786256898, |
|
"eval_loss": 2.458984375, |
|
"eval_runtime": 40.235, |
|
"eval_samples_per_second": 88.679, |
|
"eval_steps_per_second": 11.085, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.625850177105252e-05, |
|
"loss": 2.4392, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.4810606060606061, |
|
"eval_loss": 2.458984375, |
|
"eval_runtime": 40.2635, |
|
"eval_samples_per_second": 88.616, |
|
"eval_steps_per_second": 11.077, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.618366940992077e-05, |
|
"loss": 2.431, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.48131512214580346, |
|
"eval_loss": 2.45703125, |
|
"eval_runtime": 40.2108, |
|
"eval_samples_per_second": 88.732, |
|
"eval_steps_per_second": 11.092, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.610885202724401e-05, |
|
"loss": 2.4377, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.482264420569064, |
|
"eval_loss": 2.451171875, |
|
"eval_runtime": 40.1835, |
|
"eval_samples_per_second": 88.793, |
|
"eval_steps_per_second": 11.099, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.6033959752292307e-05, |
|
"loss": 2.4299, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.4825972914447528, |
|
"eval_loss": 2.447265625, |
|
"eval_runtime": 40.2876, |
|
"eval_samples_per_second": 88.563, |
|
"eval_steps_per_second": 11.07, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.5959142369615546e-05, |
|
"loss": 2.4283, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.482810712360112, |
|
"eval_loss": 2.447265625, |
|
"eval_runtime": 40.3402, |
|
"eval_samples_per_second": 88.448, |
|
"eval_steps_per_second": 11.056, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.5884295030028805e-05, |
|
"loss": 2.4256, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.48325371829096697, |
|
"eval_loss": 2.443359375, |
|
"eval_runtime": 40.3191, |
|
"eval_samples_per_second": 88.494, |
|
"eval_steps_per_second": 11.062, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.5809462668897065e-05, |
|
"loss": 2.4198, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.48383589986367365, |
|
"eval_loss": 2.44140625, |
|
"eval_runtime": 40.3148, |
|
"eval_samples_per_second": 88.503, |
|
"eval_steps_per_second": 11.063, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.573461532931032e-05, |
|
"loss": 2.4174, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.4840175405143581, |
|
"eval_loss": 2.44140625, |
|
"eval_runtime": 40.703, |
|
"eval_samples_per_second": 87.659, |
|
"eval_steps_per_second": 10.957, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.565978296817857e-05, |
|
"loss": 2.4151, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.4844402728280929, |
|
"eval_loss": 2.435546875, |
|
"eval_runtime": 41.2392, |
|
"eval_samples_per_second": 86.52, |
|
"eval_steps_per_second": 10.815, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.558493562859184e-05, |
|
"loss": 2.4191, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.4847421853425036, |
|
"eval_loss": 2.43359375, |
|
"eval_runtime": 40.6977, |
|
"eval_samples_per_second": 87.671, |
|
"eval_steps_per_second": 10.959, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.55100882890051e-05, |
|
"loss": 2.4071, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.4848361562975334, |
|
"eval_loss": 2.431640625, |
|
"eval_runtime": 40.2058, |
|
"eval_samples_per_second": 88.744, |
|
"eval_steps_per_second": 11.093, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.543524094941836e-05, |
|
"loss": 2.4126, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.48549011743355736, |
|
"eval_loss": 2.427734375, |
|
"eval_runtime": 40.4122, |
|
"eval_samples_per_second": 88.29, |
|
"eval_steps_per_second": 11.036, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.536045352365158e-05, |
|
"loss": 2.4053, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.48513834277974305, |
|
"eval_loss": 2.4296875, |
|
"eval_runtime": 40.2632, |
|
"eval_samples_per_second": 88.617, |
|
"eval_steps_per_second": 11.077, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.528559120560985e-05, |
|
"loss": 2.4071, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.4857840848817993, |
|
"eval_loss": 2.42578125, |
|
"eval_runtime": 40.3587, |
|
"eval_samples_per_second": 88.407, |
|
"eval_steps_per_second": 11.051, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.5210743866023116e-05, |
|
"loss": 2.4027, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.486615303183725, |
|
"eval_loss": 2.421875, |
|
"eval_runtime": 40.2594, |
|
"eval_samples_per_second": 88.625, |
|
"eval_steps_per_second": 11.078, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.513591150489136e-05, |
|
"loss": 2.4013, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.4867040687505753, |
|
"eval_loss": 2.41796875, |
|
"eval_runtime": 40.2109, |
|
"eval_samples_per_second": 88.732, |
|
"eval_steps_per_second": 11.092, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.5061049186849636e-05, |
|
"loss": 2.4032, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.4866416040924214, |
|
"eval_loss": 2.41796875, |
|
"eval_runtime": 40.2756, |
|
"eval_samples_per_second": 88.59, |
|
"eval_steps_per_second": 11.074, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.498621682571789e-05, |
|
"loss": 2.3919, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.4870514599196069, |
|
"eval_loss": 2.416015625, |
|
"eval_runtime": 40.3049, |
|
"eval_samples_per_second": 88.525, |
|
"eval_steps_per_second": 11.066, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.491136948613115e-05, |
|
"loss": 2.3936, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.4872848804842874, |
|
"eval_loss": 2.4140625, |
|
"eval_runtime": 40.3719, |
|
"eval_samples_per_second": 88.378, |
|
"eval_steps_per_second": 11.047, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.48365371249994e-05, |
|
"loss": 2.3905, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.48784870621446635, |
|
"eval_loss": 2.41015625, |
|
"eval_runtime": 40.4162, |
|
"eval_samples_per_second": 88.281, |
|
"eval_steps_per_second": 11.035, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.4761704763867654e-05, |
|
"loss": 2.3889, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.4881240438523818, |
|
"eval_loss": 2.41015625, |
|
"eval_runtime": 40.2942, |
|
"eval_samples_per_second": 88.549, |
|
"eval_steps_per_second": 11.069, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.468688738119089e-05, |
|
"loss": 2.3866, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.48837801200198133, |
|
"eval_loss": 2.408203125, |
|
"eval_runtime": 40.2476, |
|
"eval_samples_per_second": 88.651, |
|
"eval_steps_per_second": 11.081, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.4612025063149173e-05, |
|
"loss": 2.3823, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.48875663550008985, |
|
"eval_loss": 2.40625, |
|
"eval_runtime": 40.3148, |
|
"eval_samples_per_second": 88.504, |
|
"eval_steps_per_second": 11.063, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.4537177723562427e-05, |
|
"loss": 2.3828, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.48881882619044487, |
|
"eval_loss": 2.40234375, |
|
"eval_runtime": 40.2734, |
|
"eval_samples_per_second": 88.594, |
|
"eval_steps_per_second": 11.074, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.446233038397569e-05, |
|
"loss": 2.3795, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.48893718027957866, |
|
"eval_loss": 2.400390625, |
|
"eval_runtime": 40.2791, |
|
"eval_samples_per_second": 88.582, |
|
"eval_steps_per_second": 11.073, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.43875729151189e-05, |
|
"loss": 2.3812, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.48680735461076846, |
|
"eval_loss": 2.416015625, |
|
"eval_runtime": 40.6108, |
|
"eval_samples_per_second": 87.858, |
|
"eval_steps_per_second": 10.982, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.431269561862217e-05, |
|
"loss": 2.3789, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.4895744293798684, |
|
"eval_loss": 2.396484375, |
|
"eval_runtime": 40.2591, |
|
"eval_samples_per_second": 88.626, |
|
"eval_steps_per_second": 11.078, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.423786325749043e-05, |
|
"loss": 2.372, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.48950730726913283, |
|
"eval_loss": 2.396484375, |
|
"eval_runtime": 40.2108, |
|
"eval_samples_per_second": 88.732, |
|
"eval_steps_per_second": 11.092, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.41630009394487e-05, |
|
"loss": 2.3732, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.4898645612789255, |
|
"eval_loss": 2.396484375, |
|
"eval_runtime": 40.4903, |
|
"eval_samples_per_second": 88.12, |
|
"eval_steps_per_second": 11.015, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.408815359986196e-05, |
|
"loss": 2.3725, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.4903272928913027, |
|
"eval_loss": 2.392578125, |
|
"eval_runtime": 40.3547, |
|
"eval_samples_per_second": 88.416, |
|
"eval_steps_per_second": 11.052, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.401332123873022e-05, |
|
"loss": 2.3716, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.49036948393233654, |
|
"eval_loss": 2.390625, |
|
"eval_runtime": 40.3654, |
|
"eval_samples_per_second": 88.392, |
|
"eval_steps_per_second": 11.049, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.393848887759846e-05, |
|
"loss": 2.3709, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.49040482577839734, |
|
"eval_loss": 2.390625, |
|
"eval_runtime": 40.3034, |
|
"eval_samples_per_second": 88.529, |
|
"eval_steps_per_second": 11.066, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.3863656516466716e-05, |
|
"loss": 2.3619, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.4906423558600616, |
|
"eval_loss": 2.388671875, |
|
"eval_runtime": 40.362, |
|
"eval_samples_per_second": 88.4, |
|
"eval_steps_per_second": 11.05, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.378877921997e-05, |
|
"loss": 2.367, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.49115138803045644, |
|
"eval_loss": 2.38671875, |
|
"eval_runtime": 40.2804, |
|
"eval_samples_per_second": 88.579, |
|
"eval_steps_per_second": 11.072, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3713961837293236e-05, |
|
"loss": 2.3639, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.49116152483901654, |
|
"eval_loss": 2.384765625, |
|
"eval_runtime": 40.3366, |
|
"eval_samples_per_second": 88.456, |
|
"eval_steps_per_second": 11.057, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.363914445461648e-05, |
|
"loss": 2.3621, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.4918730192128138, |
|
"eval_loss": 2.3828125, |
|
"eval_runtime": 40.2687, |
|
"eval_samples_per_second": 88.605, |
|
"eval_steps_per_second": 11.076, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3564282136574755e-05, |
|
"loss": 2.3578, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.4919609628762674, |
|
"eval_loss": 2.380859375, |
|
"eval_runtime": 40.2478, |
|
"eval_samples_per_second": 88.651, |
|
"eval_steps_per_second": 11.081, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3489449775443e-05, |
|
"loss": 2.3608, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.4921738358560288, |
|
"eval_loss": 2.37890625, |
|
"eval_runtime": 40.4074, |
|
"eval_samples_per_second": 88.301, |
|
"eval_steps_per_second": 11.038, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.341461741431126e-05, |
|
"loss": 2.3541, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.4923423260523651, |
|
"eval_loss": 2.376953125, |
|
"eval_runtime": 40.2757, |
|
"eval_samples_per_second": 88.589, |
|
"eval_steps_per_second": 11.074, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.333978505317951e-05, |
|
"loss": 2.3556, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.4925938284917744, |
|
"eval_loss": 2.376953125, |
|
"eval_runtime": 40.292, |
|
"eval_samples_per_second": 88.553, |
|
"eval_steps_per_second": 11.069, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.326493771359277e-05, |
|
"loss": 2.3562, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.49278067453063834, |
|
"eval_loss": 2.376953125, |
|
"eval_runtime": 40.1882, |
|
"eval_samples_per_second": 88.782, |
|
"eval_steps_per_second": 11.098, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9925197595778234e-05, |
|
"loss": 2.3641, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.4910004317732511, |
|
"eval_loss": 2.38671875, |
|
"eval_runtime": 39.6555, |
|
"eval_samples_per_second": 89.975, |
|
"eval_steps_per_second": 11.247, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.985036523464649e-05, |
|
"loss": 2.3641, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.4911015258910529, |
|
"eval_loss": 2.38671875, |
|
"eval_runtime": 39.5455, |
|
"eval_samples_per_second": 90.225, |
|
"eval_steps_per_second": 11.278, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.977551789505975e-05, |
|
"loss": 2.3646, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.4910639923026007, |
|
"eval_loss": 2.38671875, |
|
"eval_runtime": 39.4635, |
|
"eval_samples_per_second": 90.413, |
|
"eval_steps_per_second": 11.302, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.970071549083798e-05, |
|
"loss": 2.3629, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.4911439908998856, |
|
"eval_loss": 2.384765625, |
|
"eval_runtime": 39.5626, |
|
"eval_samples_per_second": 90.186, |
|
"eval_steps_per_second": 11.273, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9625868151251246e-05, |
|
"loss": 2.3659, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.4913645349780168, |
|
"eval_loss": 2.3828125, |
|
"eval_runtime": 39.4888, |
|
"eval_samples_per_second": 90.355, |
|
"eval_steps_per_second": 11.294, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.9551020811664506e-05, |
|
"loss": 2.3651, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.4916360370667473, |
|
"eval_loss": 2.3828125, |
|
"eval_runtime": 38.5403, |
|
"eval_samples_per_second": 92.578, |
|
"eval_steps_per_second": 11.572, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9476173472077765e-05, |
|
"loss": 2.3608, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.4917579527372671, |
|
"eval_loss": 2.380859375, |
|
"eval_runtime": 39.5737, |
|
"eval_samples_per_second": 90.161, |
|
"eval_steps_per_second": 11.27, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9401356089401005e-05, |
|
"loss": 2.3612, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.49203685195656843, |
|
"eval_loss": 2.380859375, |
|
"eval_runtime": 38.5594, |
|
"eval_samples_per_second": 92.533, |
|
"eval_steps_per_second": 11.567, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.932649377135928e-05, |
|
"loss": 2.3569, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.49215328827110977, |
|
"eval_loss": 2.37890625, |
|
"eval_runtime": 39.5649, |
|
"eval_samples_per_second": 90.181, |
|
"eval_steps_per_second": 11.273, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9251676388682524e-05, |
|
"loss": 2.3557, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.4923439698591586, |
|
"eval_loss": 2.37890625, |
|
"eval_runtime": 39.6114, |
|
"eval_samples_per_second": 90.075, |
|
"eval_steps_per_second": 11.259, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9176829049095784e-05, |
|
"loss": 2.3541, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.49218726027817594, |
|
"eval_loss": 2.376953125, |
|
"eval_runtime": 39.5989, |
|
"eval_samples_per_second": 90.104, |
|
"eval_steps_per_second": 11.263, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.910196673105406e-05, |
|
"loss": 2.351, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.49274588062017544, |
|
"eval_loss": 2.375, |
|
"eval_runtime": 38.6221, |
|
"eval_samples_per_second": 92.382, |
|
"eval_steps_per_second": 11.548, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9027134369922304e-05, |
|
"loss": 2.3504, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.49260917068851395, |
|
"eval_loss": 2.375, |
|
"eval_runtime": 39.6516, |
|
"eval_samples_per_second": 89.984, |
|
"eval_steps_per_second": 11.248, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.895231698724555e-05, |
|
"loss": 2.3479, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.492896014973984, |
|
"eval_loss": 2.373046875, |
|
"eval_runtime": 39.6699, |
|
"eval_samples_per_second": 89.942, |
|
"eval_steps_per_second": 11.243, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.887745466920382e-05, |
|
"loss": 2.3451, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.4929382060150178, |
|
"eval_loss": 2.37109375, |
|
"eval_runtime": 39.6216, |
|
"eval_samples_per_second": 90.052, |
|
"eval_steps_per_second": 11.256, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.880262230807207e-05, |
|
"loss": 2.3505, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.49343929311924395, |
|
"eval_loss": 2.369140625, |
|
"eval_runtime": 39.6785, |
|
"eval_samples_per_second": 89.923, |
|
"eval_steps_per_second": 11.24, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.8727804925395315e-05, |
|
"loss": 2.3457, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.4933929925612263, |
|
"eval_loss": 2.369140625, |
|
"eval_runtime": 39.6502, |
|
"eval_samples_per_second": 89.987, |
|
"eval_steps_per_second": 11.248, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.865297256426357e-05, |
|
"loss": 2.3479, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.4937496986354212, |
|
"eval_loss": 2.369140625, |
|
"eval_runtime": 39.6648, |
|
"eval_samples_per_second": 89.954, |
|
"eval_steps_per_second": 11.244, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.8578110246221835e-05, |
|
"loss": 2.3421, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.4935861398594655, |
|
"eval_loss": 2.3671875, |
|
"eval_runtime": 39.7026, |
|
"eval_samples_per_second": 89.868, |
|
"eval_steps_per_second": 11.234, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.850327788509009e-05, |
|
"loss": 2.3433, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.4937406576980568, |
|
"eval_loss": 2.3671875, |
|
"eval_runtime": 39.7646, |
|
"eval_samples_per_second": 89.728, |
|
"eval_steps_per_second": 11.216, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.842846050241333e-05, |
|
"loss": 2.3425, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.4939384624488776, |
|
"eval_loss": 2.365234375, |
|
"eval_runtime": 39.7934, |
|
"eval_samples_per_second": 89.663, |
|
"eval_steps_per_second": 11.208, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.835361316282659e-05, |
|
"loss": 2.3403, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.49420284137483617, |
|
"eval_loss": 2.36328125, |
|
"eval_runtime": 39.7702, |
|
"eval_samples_per_second": 89.715, |
|
"eval_steps_per_second": 11.214, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.827876582323985e-05, |
|
"loss": 2.3417, |
|
"step": 565000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.49440420770704296, |
|
"eval_loss": 2.361328125, |
|
"eval_runtime": 40.0918, |
|
"eval_samples_per_second": 88.996, |
|
"eval_steps_per_second": 11.124, |
|
"step": 565000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.82039484405631e-05, |
|
"loss": 2.3382, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.4947474893590907, |
|
"eval_loss": 2.361328125, |
|
"eval_runtime": 39.7167, |
|
"eval_samples_per_second": 89.836, |
|
"eval_steps_per_second": 11.23, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.812913105788634e-05, |
|
"loss": 2.3354, |
|
"step": 575000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.4949266642995849, |
|
"eval_loss": 2.359375, |
|
"eval_runtime": 39.8562, |
|
"eval_samples_per_second": 89.522, |
|
"eval_steps_per_second": 11.19, |
|
"step": 575000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.805425376138962e-05, |
|
"loss": 2.3366, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.4946513266616695, |
|
"eval_loss": 2.359375, |
|
"eval_runtime": 38.7841, |
|
"eval_samples_per_second": 91.997, |
|
"eval_steps_per_second": 11.5, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.797942140025787e-05, |
|
"loss": 2.3373, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.49454119160650334, |
|
"eval_loss": 2.359375, |
|
"eval_runtime": 38.7732, |
|
"eval_samples_per_second": 92.022, |
|
"eval_steps_per_second": 11.503, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.790460401758111e-05, |
|
"loss": 2.3365, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.49488255548395865, |
|
"eval_loss": 2.359375, |
|
"eval_runtime": 39.8158, |
|
"eval_samples_per_second": 89.613, |
|
"eval_steps_per_second": 11.202, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.782975667799438e-05, |
|
"loss": 2.3318, |
|
"step": 595000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.49525295994809954, |
|
"eval_loss": 2.35546875, |
|
"eval_runtime": 39.8567, |
|
"eval_samples_per_second": 89.521, |
|
"eval_steps_per_second": 11.19, |
|
"step": 595000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7754894359952644e-05, |
|
"loss": 2.3278, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.4957521292777332, |
|
"eval_loss": 2.353515625, |
|
"eval_runtime": 39.8687, |
|
"eval_samples_per_second": 89.494, |
|
"eval_steps_per_second": 11.187, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.768004702036591e-05, |
|
"loss": 2.3277, |
|
"step": 605000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.4959060991807267, |
|
"eval_loss": 2.3515625, |
|
"eval_runtime": 40.0704, |
|
"eval_samples_per_second": 89.043, |
|
"eval_steps_per_second": 11.13, |
|
"step": 605000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7605214659234157e-05, |
|
"loss": 2.326, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.49614417719798887, |
|
"eval_loss": 2.3515625, |
|
"eval_runtime": 39.9671, |
|
"eval_samples_per_second": 89.273, |
|
"eval_steps_per_second": 11.159, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.753036731964742e-05, |
|
"loss": 2.3273, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.49605705543793205, |
|
"eval_loss": 2.3515625, |
|
"eval_runtime": 39.9201, |
|
"eval_samples_per_second": 89.378, |
|
"eval_steps_per_second": 11.172, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.745551998006068e-05, |
|
"loss": 2.3284, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.49654992350819055, |
|
"eval_loss": 2.349609375, |
|
"eval_runtime": 39.956, |
|
"eval_samples_per_second": 89.298, |
|
"eval_steps_per_second": 11.162, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.7380687618928936e-05, |
|
"loss": 2.3276, |
|
"step": 625000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.49658115583726753, |
|
"eval_loss": 2.34765625, |
|
"eval_runtime": 39.9741, |
|
"eval_samples_per_second": 89.258, |
|
"eval_steps_per_second": 11.157, |
|
"step": 625000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.7305840279342196e-05, |
|
"loss": 2.3228, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.4966945785060207, |
|
"eval_loss": 2.345703125, |
|
"eval_runtime": 39.9089, |
|
"eval_samples_per_second": 89.404, |
|
"eval_steps_per_second": 11.175, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.723103787512043e-05, |
|
"loss": 2.3219, |
|
"step": 635000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.49684580873102496, |
|
"eval_loss": 2.345703125, |
|
"eval_runtime": 39.9108, |
|
"eval_samples_per_second": 89.399, |
|
"eval_steps_per_second": 11.175, |
|
"step": 635000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.715619053553369e-05, |
|
"loss": 2.326, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.49703155889869327, |
|
"eval_loss": 2.34375, |
|
"eval_runtime": 40.0352, |
|
"eval_samples_per_second": 89.122, |
|
"eval_steps_per_second": 11.14, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.708135817440194e-05, |
|
"loss": 2.3191, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.4972345690376936, |
|
"eval_loss": 2.341796875, |
|
"eval_runtime": 40.0269, |
|
"eval_samples_per_second": 89.14, |
|
"eval_steps_per_second": 11.143, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.70065258132702e-05, |
|
"loss": 2.3167, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.4972822394347058, |
|
"eval_loss": 2.34375, |
|
"eval_runtime": 40.0234, |
|
"eval_samples_per_second": 89.148, |
|
"eval_steps_per_second": 11.143, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.693166349522847e-05, |
|
"loss": 2.3172, |
|
"step": 655000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.4974205931731608, |
|
"eval_loss": 2.341796875, |
|
"eval_runtime": 40.0416, |
|
"eval_samples_per_second": 89.107, |
|
"eval_steps_per_second": 11.138, |
|
"step": 655000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.685683113409672e-05, |
|
"loss": 2.3194, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.4977205879129791, |
|
"eval_loss": 2.337890625, |
|
"eval_runtime": 40.5115, |
|
"eval_samples_per_second": 88.074, |
|
"eval_steps_per_second": 11.009, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.678198379450998e-05, |
|
"loss": 2.3204, |
|
"step": 665000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.49760332969504095, |
|
"eval_loss": 2.33984375, |
|
"eval_runtime": 40.059, |
|
"eval_samples_per_second": 89.069, |
|
"eval_steps_per_second": 11.134, |
|
"step": 665000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.670716641183322e-05, |
|
"loss": 2.309, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.49802085662059625, |
|
"eval_loss": 2.3359375, |
|
"eval_runtime": 40.172, |
|
"eval_samples_per_second": 88.818, |
|
"eval_steps_per_second": 11.102, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.663233405070147e-05, |
|
"loss": 2.3147, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.49805729433785273, |
|
"eval_loss": 2.337890625, |
|
"eval_runtime": 40.0906, |
|
"eval_samples_per_second": 88.999, |
|
"eval_steps_per_second": 11.125, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.655745675420475e-05, |
|
"loss": 2.3122, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.4980255140731779, |
|
"eval_loss": 2.3359375, |
|
"eval_runtime": 40.0778, |
|
"eval_samples_per_second": 89.027, |
|
"eval_steps_per_second": 11.128, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6482624393073005e-05, |
|
"loss": 2.3096, |
|
"step": 685000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.4984096169272648, |
|
"eval_loss": 2.333984375, |
|
"eval_runtime": 40.3028, |
|
"eval_samples_per_second": 88.53, |
|
"eval_steps_per_second": 11.066, |
|
"step": 685000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.640780701039625e-05, |
|
"loss": 2.3093, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.49861701055104785, |
|
"eval_loss": 2.333984375, |
|
"eval_runtime": 40.1409, |
|
"eval_samples_per_second": 88.887, |
|
"eval_steps_per_second": 11.111, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.633295967080951e-05, |
|
"loss": 2.3048, |
|
"step": 695000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.498526601177404, |
|
"eval_loss": 2.33203125, |
|
"eval_runtime": 40.2295, |
|
"eval_samples_per_second": 88.691, |
|
"eval_steps_per_second": 11.086, |
|
"step": 695000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6258127309677764e-05, |
|
"loss": 2.3111, |
|
"step": 700000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.4988186508510536, |
|
"eval_loss": 2.330078125, |
|
"eval_runtime": 40.2382, |
|
"eval_samples_per_second": 88.672, |
|
"eval_steps_per_second": 11.084, |
|
"step": 700000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6183279970091023e-05, |
|
"loss": 2.3074, |
|
"step": 705000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.4989449500063561, |
|
"eval_loss": 2.330078125, |
|
"eval_runtime": 40.2221, |
|
"eval_samples_per_second": 88.707, |
|
"eval_steps_per_second": 11.088, |
|
"step": 705000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.610843263050428e-05, |
|
"loss": 2.3082, |
|
"step": 710000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.49918768547619985, |
|
"eval_loss": 2.330078125, |
|
"eval_runtime": 40.2424, |
|
"eval_samples_per_second": 88.663, |
|
"eval_steps_per_second": 11.083, |
|
"step": 710000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.603357031246256e-05, |
|
"loss": 2.3093, |
|
"step": 715000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.4993685042234876, |
|
"eval_loss": 2.328125, |
|
"eval_runtime": 39.2194, |
|
"eval_samples_per_second": 90.975, |
|
"eval_steps_per_second": 11.372, |
|
"step": 715000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.595873795133081e-05, |
|
"loss": 2.3011, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.4995095976399318, |
|
"eval_loss": 2.328125, |
|
"eval_runtime": 40.3274, |
|
"eval_samples_per_second": 88.476, |
|
"eval_steps_per_second": 11.059, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.588390559019906e-05, |
|
"loss": 2.2998, |
|
"step": 725000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.4994558999513433, |
|
"eval_loss": 2.326171875, |
|
"eval_runtime": 40.2634, |
|
"eval_samples_per_second": 88.616, |
|
"eval_steps_per_second": 11.077, |
|
"step": 725000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.580907322906731e-05, |
|
"loss": 2.3012, |
|
"step": 730000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.49959206194740696, |
|
"eval_loss": 2.326171875, |
|
"eval_runtime": 40.2894, |
|
"eval_samples_per_second": 88.559, |
|
"eval_steps_per_second": 11.07, |
|
"step": 730000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.573421091102558e-05, |
|
"loss": 2.3002, |
|
"step": 735000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.4997052106483612, |
|
"eval_loss": 2.32421875, |
|
"eval_runtime": 40.3059, |
|
"eval_samples_per_second": 88.523, |
|
"eval_steps_per_second": 11.065, |
|
"step": 735000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.5659378549893835e-05, |
|
"loss": 2.2994, |
|
"step": 740000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.5000197256815223, |
|
"eval_loss": 2.32421875, |
|
"eval_runtime": 40.7124, |
|
"eval_samples_per_second": 87.639, |
|
"eval_steps_per_second": 10.955, |
|
"step": 740000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.558454618876209e-05, |
|
"loss": 2.299, |
|
"step": 745000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.5000953407940244, |
|
"eval_loss": 2.322265625, |
|
"eval_runtime": 40.3194, |
|
"eval_samples_per_second": 88.493, |
|
"eval_steps_per_second": 11.062, |
|
"step": 745000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.550971382763034e-05, |
|
"loss": 2.2969, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.5002605433767736, |
|
"eval_loss": 2.322265625, |
|
"eval_runtime": 40.3665, |
|
"eval_samples_per_second": 88.39, |
|
"eval_steps_per_second": 11.049, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.543489644495358e-05, |
|
"loss": 2.2934, |
|
"step": 755000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.5003739660455269, |
|
"eval_loss": 2.3203125, |
|
"eval_runtime": 40.3284, |
|
"eval_samples_per_second": 88.474, |
|
"eval_steps_per_second": 11.059, |
|
"step": 755000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.536004910536684e-05, |
|
"loss": 2.2988, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.5004895804566715, |
|
"eval_loss": 2.318359375, |
|
"eval_runtime": 40.3305, |
|
"eval_samples_per_second": 88.469, |
|
"eval_steps_per_second": 11.059, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.5285186787325113e-05, |
|
"loss": 2.2911, |
|
"step": 765000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.5007449184452656, |
|
"eval_loss": 2.318359375, |
|
"eval_runtime": 39.3805, |
|
"eval_samples_per_second": 90.603, |
|
"eval_steps_per_second": 11.325, |
|
"step": 765000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.5210354426193366e-05, |
|
"loss": 2.2929, |
|
"step": 770000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.5008427249494803, |
|
"eval_loss": 2.318359375, |
|
"eval_runtime": 40.4207, |
|
"eval_samples_per_second": 88.272, |
|
"eval_steps_per_second": 11.034, |
|
"step": 770000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.5135567000426584e-05, |
|
"loss": 2.2926, |
|
"step": 775000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.5008994362838569, |
|
"eval_loss": 2.31640625, |
|
"eval_runtime": 40.4103, |
|
"eval_samples_per_second": 88.294, |
|
"eval_steps_per_second": 11.037, |
|
"step": 775000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.506070468238486e-05, |
|
"loss": 2.292, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.5011701164691906, |
|
"eval_loss": 2.31640625, |
|
"eval_runtime": 40.3936, |
|
"eval_samples_per_second": 88.331, |
|
"eval_steps_per_second": 11.041, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.498587232125311e-05, |
|
"loss": 2.2932, |
|
"step": 785000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.5014183312950129, |
|
"eval_loss": 2.314453125, |
|
"eval_runtime": 40.4142, |
|
"eval_samples_per_second": 88.286, |
|
"eval_steps_per_second": 11.036, |
|
"step": 785000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.4911039960121364e-05, |
|
"loss": 2.2903, |
|
"step": 790000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.5013958659355015, |
|
"eval_loss": 2.314453125, |
|
"eval_runtime": 40.3913, |
|
"eval_samples_per_second": 88.336, |
|
"eval_steps_per_second": 11.042, |
|
"step": 790000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.483620759898962e-05, |
|
"loss": 2.2886, |
|
"step": 795000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.5015205212840104, |
|
"eval_loss": 2.3125, |
|
"eval_runtime": 40.3641, |
|
"eval_samples_per_second": 88.395, |
|
"eval_steps_per_second": 11.049, |
|
"step": 795000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.476137523785787e-05, |
|
"loss": 2.2924, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.5014750426293895, |
|
"eval_loss": 2.3125, |
|
"eval_runtime": 40.359, |
|
"eval_samples_per_second": 88.407, |
|
"eval_steps_per_second": 11.051, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.468652789827113e-05, |
|
"loss": 2.2891, |
|
"step": 805000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.5018673645174441, |
|
"eval_loss": 2.310546875, |
|
"eval_runtime": 40.4253, |
|
"eval_samples_per_second": 88.261, |
|
"eval_steps_per_second": 11.033, |
|
"step": 805000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.461168055868439e-05, |
|
"loss": 2.2862, |
|
"step": 810000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.5019873624133714, |
|
"eval_loss": 2.30859375, |
|
"eval_runtime": 40.3854, |
|
"eval_samples_per_second": 88.349, |
|
"eval_steps_per_second": 11.044, |
|
"step": 810000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.453686317600763e-05, |
|
"loss": 2.2858, |
|
"step": 815000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.5021665373538656, |
|
"eval_loss": 2.30859375, |
|
"eval_runtime": 40.4075, |
|
"eval_samples_per_second": 88.301, |
|
"eval_steps_per_second": 11.038, |
|
"step": 815000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.44620008579659e-05, |
|
"loss": 2.2841, |
|
"step": 820000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.502265439729276, |
|
"eval_loss": 2.306640625, |
|
"eval_runtime": 40.4403, |
|
"eval_samples_per_second": 88.229, |
|
"eval_steps_per_second": 11.029, |
|
"step": 820000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.438718347528915e-05, |
|
"loss": 2.2843, |
|
"step": 825000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.5022361251747914, |
|
"eval_loss": 2.30859375, |
|
"eval_runtime": 40.4536, |
|
"eval_samples_per_second": 88.2, |
|
"eval_steps_per_second": 11.025, |
|
"step": 825000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.431233613570241e-05, |
|
"loss": 2.2832, |
|
"step": 830000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.5024813263548256, |
|
"eval_loss": 2.306640625, |
|
"eval_runtime": 40.4096, |
|
"eval_samples_per_second": 88.296, |
|
"eval_steps_per_second": 11.037, |
|
"step": 830000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.423756368839062e-05, |
|
"loss": 2.2846, |
|
"step": 835000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.502600776315155, |
|
"eval_loss": 2.306640625, |
|
"eval_runtime": 39.3247, |
|
"eval_samples_per_second": 90.732, |
|
"eval_steps_per_second": 11.341, |
|
"step": 835000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.416267141343891e-05, |
|
"loss": 2.2784, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.5026766653954561, |
|
"eval_loss": 2.3046875, |
|
"eval_runtime": 40.3768, |
|
"eval_samples_per_second": 88.367, |
|
"eval_steps_per_second": 11.046, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.408782407385217e-05, |
|
"loss": 2.277, |
|
"step": 845000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.5028424159138032, |
|
"eval_loss": 2.302734375, |
|
"eval_runtime": 40.4265, |
|
"eval_samples_per_second": 88.259, |
|
"eval_steps_per_second": 11.032, |
|
"step": 845000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.4013021669630405e-05, |
|
"loss": 2.276, |
|
"step": 850000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.5025583113063223, |
|
"eval_loss": 2.306640625, |
|
"eval_runtime": 40.7923, |
|
"eval_samples_per_second": 87.468, |
|
"eval_steps_per_second": 10.933, |
|
"step": 850000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.393818930849866e-05, |
|
"loss": 2.2802, |
|
"step": 855000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.5031032332583757, |
|
"eval_loss": 2.302734375, |
|
"eval_runtime": 40.3592, |
|
"eval_samples_per_second": 88.406, |
|
"eval_steps_per_second": 11.051, |
|
"step": 855000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.3863356947366904e-05, |
|
"loss": 2.2781, |
|
"step": 860000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.5032018616659872, |
|
"eval_loss": 2.30078125, |
|
"eval_runtime": 40.4036, |
|
"eval_samples_per_second": 88.309, |
|
"eval_steps_per_second": 11.039, |
|
"step": 860000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.378850960778017e-05, |
|
"loss": 2.2749, |
|
"step": 865000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.5038193850847547, |
|
"eval_loss": 2.298828125, |
|
"eval_runtime": 40.3881, |
|
"eval_samples_per_second": 88.343, |
|
"eval_steps_per_second": 11.043, |
|
"step": 865000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.371366226819343e-05, |
|
"loss": 2.2729, |
|
"step": 870000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.5037152773211648, |
|
"eval_loss": 2.296875, |
|
"eval_runtime": 40.4456, |
|
"eval_samples_per_second": 88.217, |
|
"eval_steps_per_second": 11.027, |
|
"step": 870000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.363882990706168e-05, |
|
"loss": 2.2708, |
|
"step": 875000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.5038993836820396, |
|
"eval_loss": 2.296875, |
|
"eval_runtime": 40.4095, |
|
"eval_samples_per_second": 88.296, |
|
"eval_steps_per_second": 11.037, |
|
"step": 875000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.3563997545929936e-05, |
|
"loss": 2.2754, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.5038591104155982, |
|
"eval_loss": 2.296875, |
|
"eval_runtime": 40.397, |
|
"eval_samples_per_second": 88.323, |
|
"eval_steps_per_second": 11.04, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.3489150206343196e-05, |
|
"loss": 2.2761, |
|
"step": 885000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.5040640383291909, |
|
"eval_loss": 2.294921875, |
|
"eval_runtime": 40.3166, |
|
"eval_samples_per_second": 88.499, |
|
"eval_steps_per_second": 11.062, |
|
"step": 885000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.341434780212143e-05, |
|
"loss": 2.2742, |
|
"step": 890000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.5041032157244366, |
|
"eval_loss": 2.294921875, |
|
"eval_runtime": 40.3381, |
|
"eval_samples_per_second": 88.452, |
|
"eval_steps_per_second": 11.057, |
|
"step": 890000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.3339470505624715e-05, |
|
"loss": 2.2734, |
|
"step": 895000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.5040837640107132, |
|
"eval_loss": 2.294921875, |
|
"eval_runtime": 40.3833, |
|
"eval_samples_per_second": 88.353, |
|
"eval_steps_per_second": 11.044, |
|
"step": 895000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.326463814449296e-05, |
|
"loss": 2.2682, |
|
"step": 900000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.5043944434946894, |
|
"eval_loss": 2.29296875, |
|
"eval_runtime": 40.4818, |
|
"eval_samples_per_second": 88.138, |
|
"eval_steps_per_second": 11.017, |
|
"step": 900000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.318982076181621e-05, |
|
"loss": 2.2667, |
|
"step": 905000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.5045489613332808, |
|
"eval_loss": 2.29296875, |
|
"eval_runtime": 40.3614, |
|
"eval_samples_per_second": 88.401, |
|
"eval_steps_per_second": 11.05, |
|
"step": 905000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.311498840068446e-05, |
|
"loss": 2.2676, |
|
"step": 910000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.5045801936623577, |
|
"eval_loss": 2.29296875, |
|
"eval_runtime": 40.4147, |
|
"eval_samples_per_second": 88.285, |
|
"eval_steps_per_second": 11.036, |
|
"step": 910000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.304012608264273e-05, |
|
"loss": 2.2707, |
|
"step": 915000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.5046547129036641, |
|
"eval_loss": 2.291015625, |
|
"eval_runtime": 40.4009, |
|
"eval_samples_per_second": 88.315, |
|
"eval_steps_per_second": 11.039, |
|
"step": 915000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.296529372151098e-05, |
|
"loss": 2.265, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.5047700533470098, |
|
"eval_loss": 2.291015625, |
|
"eval_runtime": 40.3704, |
|
"eval_samples_per_second": 88.382, |
|
"eval_steps_per_second": 11.048, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.289046136037923e-05, |
|
"loss": 2.2676, |
|
"step": 925000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.5046149875728206, |
|
"eval_loss": 2.291015625, |
|
"eval_runtime": 40.3019, |
|
"eval_samples_per_second": 88.532, |
|
"eval_steps_per_second": 11.066, |
|
"step": 925000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.281564397770247e-05, |
|
"loss": 2.2662, |
|
"step": 930000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.5051503206519119, |
|
"eval_loss": 2.2890625, |
|
"eval_runtime": 40.3203, |
|
"eval_samples_per_second": 88.491, |
|
"eval_steps_per_second": 11.061, |
|
"step": 930000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.274079663811574e-05, |
|
"loss": 2.2706, |
|
"step": 935000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.5050968969311223, |
|
"eval_loss": 2.2890625, |
|
"eval_runtime": 40.3497, |
|
"eval_samples_per_second": 88.427, |
|
"eval_steps_per_second": 11.053, |
|
"step": 935000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.2665949298529e-05, |
|
"loss": 2.2657, |
|
"step": 940000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.5048788185631814, |
|
"eval_loss": 2.2890625, |
|
"eval_runtime": 40.7581, |
|
"eval_samples_per_second": 87.541, |
|
"eval_steps_per_second": 10.943, |
|
"step": 940000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.259110195894226e-05, |
|
"loss": 2.2672, |
|
"step": 945000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.5050453909849252, |
|
"eval_loss": 2.287109375, |
|
"eval_runtime": 40.3332, |
|
"eval_samples_per_second": 88.463, |
|
"eval_steps_per_second": 11.058, |
|
"step": 945000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.9925167638868255e-05, |
|
"loss": 2.2716, |
|
"step": 950000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.5037065103515993, |
|
"eval_loss": 2.296875, |
|
"eval_runtime": 38.5412, |
|
"eval_samples_per_second": 92.576, |
|
"eval_steps_per_second": 11.572, |
|
"step": 950000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9850290342371536e-05, |
|
"loss": 2.2702, |
|
"step": 955000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.5036908941870608, |
|
"eval_loss": 2.298828125, |
|
"eval_runtime": 39.4253, |
|
"eval_samples_per_second": 90.5, |
|
"eval_steps_per_second": 11.313, |
|
"step": 955000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.977548793814977e-05, |
|
"loss": 2.2708, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.5035029522770011, |
|
"eval_loss": 2.298828125, |
|
"eval_runtime": 39.5074, |
|
"eval_samples_per_second": 90.312, |
|
"eval_steps_per_second": 11.289, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.970064059856303e-05, |
|
"loss": 2.2738, |
|
"step": 965000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.5035369242840674, |
|
"eval_loss": 2.298828125, |
|
"eval_runtime": 39.5917, |
|
"eval_samples_per_second": 90.12, |
|
"eval_steps_per_second": 11.265, |
|
"step": 965000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.962582321588627e-05, |
|
"loss": 2.2737, |
|
"step": 970000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.5035577458367854, |
|
"eval_loss": 2.298828125, |
|
"eval_runtime": 39.4627, |
|
"eval_samples_per_second": 90.415, |
|
"eval_steps_per_second": 11.302, |
|
"step": 970000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9550990854754526e-05, |
|
"loss": 2.2763, |
|
"step": 975000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.49873289892999134, |
|
"eval_loss": 2.330078125, |
|
"eval_runtime": 39.5211, |
|
"eval_samples_per_second": 90.281, |
|
"eval_steps_per_second": 11.285, |
|
"step": 975000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.9476143515167786e-05, |
|
"loss": 2.2738, |
|
"step": 980000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.5034662405919458, |
|
"eval_loss": 2.296875, |
|
"eval_runtime": 39.4696, |
|
"eval_samples_per_second": 90.399, |
|
"eval_steps_per_second": 11.3, |
|
"step": 980000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.9401296175581046e-05, |
|
"loss": 2.2737, |
|
"step": 985000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.5036182927203469, |
|
"eval_loss": 2.296875, |
|
"eval_runtime": 39.6371, |
|
"eval_samples_per_second": 90.017, |
|
"eval_steps_per_second": 11.252, |
|
"step": 985000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.932644883599431e-05, |
|
"loss": 2.2748, |
|
"step": 990000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.5036056902015965, |
|
"eval_loss": 2.296875, |
|
"eval_runtime": 39.6139, |
|
"eval_samples_per_second": 90.069, |
|
"eval_steps_per_second": 11.259, |
|
"step": 990000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.9251631453317545e-05, |
|
"loss": 2.2724, |
|
"step": 995000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.5038232206339396, |
|
"eval_loss": 2.296875, |
|
"eval_runtime": 39.4746, |
|
"eval_samples_per_second": 90.387, |
|
"eval_steps_per_second": 11.298, |
|
"step": 995000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.917678411373081e-05, |
|
"loss": 2.2744, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.5032999421380009, |
|
"eval_loss": 2.298828125, |
|
"eval_runtime": 39.6576, |
|
"eval_samples_per_second": 89.97, |
|
"eval_steps_per_second": 11.246, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.9101951752599065e-05, |
|
"loss": 2.2694, |
|
"step": 1005000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.5033465166638174, |
|
"eval_loss": 2.298828125, |
|
"eval_runtime": 39.6623, |
|
"eval_samples_per_second": 89.959, |
|
"eval_steps_per_second": 11.245, |
|
"step": 1005000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.9027104413012324e-05, |
|
"loss": 2.2684, |
|
"step": 1010000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.5039421226586712, |
|
"eval_loss": 2.294921875, |
|
"eval_runtime": 39.6275, |
|
"eval_samples_per_second": 90.039, |
|
"eval_steps_per_second": 11.255, |
|
"step": 1010000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.895231698724555e-05, |
|
"loss": 2.2731, |
|
"step": 1015000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.5039547251774216, |
|
"eval_loss": 2.294921875, |
|
"eval_runtime": 39.6334, |
|
"eval_samples_per_second": 90.025, |
|
"eval_steps_per_second": 11.253, |
|
"step": 1015000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.887746964765881e-05, |
|
"loss": 2.2714, |
|
"step": 1020000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.5042445831086797, |
|
"eval_loss": 2.294921875, |
|
"eval_runtime": 39.6384, |
|
"eval_samples_per_second": 90.014, |
|
"eval_steps_per_second": 11.252, |
|
"step": 1020000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.880263728652706e-05, |
|
"loss": 2.2687, |
|
"step": 1025000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.5045218385211876, |
|
"eval_loss": 2.29296875, |
|
"eval_runtime": 39.6654, |
|
"eval_samples_per_second": 89.952, |
|
"eval_steps_per_second": 11.244, |
|
"step": 1025000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.872778994694032e-05, |
|
"loss": 2.2673, |
|
"step": 1030000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.5046215627999947, |
|
"eval_loss": 2.29296875, |
|
"eval_runtime": 39.7655, |
|
"eval_samples_per_second": 89.726, |
|
"eval_steps_per_second": 11.216, |
|
"step": 1030000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.8652957585808575e-05, |
|
"loss": 2.2677, |
|
"step": 1035000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.5044056761744452, |
|
"eval_loss": 2.29296875, |
|
"eval_runtime": 39.7885, |
|
"eval_samples_per_second": 89.674, |
|
"eval_steps_per_second": 11.209, |
|
"step": 1035000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.8578110246221835e-05, |
|
"loss": 2.265, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.5046538910002674, |
|
"eval_loss": 2.291015625, |
|
"eval_runtime": 39.7963, |
|
"eval_samples_per_second": 89.657, |
|
"eval_steps_per_second": 11.207, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.850327788509009e-05, |
|
"loss": 2.2659, |
|
"step": 1045000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.504468688768197, |
|
"eval_loss": 2.291015625, |
|
"eval_runtime": 40.1905, |
|
"eval_samples_per_second": 88.777, |
|
"eval_steps_per_second": 11.097, |
|
"step": 1045000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.842849045932331e-05, |
|
"loss": 2.2633, |
|
"step": 1050000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.5042100631660157, |
|
"eval_loss": 2.294921875, |
|
"eval_runtime": 39.7629, |
|
"eval_samples_per_second": 89.732, |
|
"eval_steps_per_second": 11.216, |
|
"step": 1050000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.835361316282659e-05, |
|
"loss": 2.2689, |
|
"step": 1055000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.5049952548777227, |
|
"eval_loss": 2.2890625, |
|
"eval_runtime": 39.8901, |
|
"eval_samples_per_second": 89.446, |
|
"eval_steps_per_second": 11.181, |
|
"step": 1055000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.827876582323985e-05, |
|
"loss": 2.2617, |
|
"step": 1060000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.5049081331176659, |
|
"eval_loss": 2.2890625, |
|
"eval_runtime": 39.8913, |
|
"eval_samples_per_second": 89.443, |
|
"eval_steps_per_second": 11.18, |
|
"step": 1060000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.820390350519813e-05, |
|
"loss": 2.2613, |
|
"step": 1065000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.5052440176391427, |
|
"eval_loss": 2.287109375, |
|
"eval_runtime": 39.9185, |
|
"eval_samples_per_second": 89.382, |
|
"eval_steps_per_second": 11.173, |
|
"step": 1065000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.8129086122521366e-05, |
|
"loss": 2.2649, |
|
"step": 1070000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.5047081366244537, |
|
"eval_loss": 2.2890625, |
|
"eval_runtime": 39.8452, |
|
"eval_samples_per_second": 89.547, |
|
"eval_steps_per_second": 11.193, |
|
"step": 1070000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.805422380447964e-05, |
|
"loss": 2.2587, |
|
"step": 1075000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.505284564873383, |
|
"eval_loss": 2.287109375, |
|
"eval_runtime": 39.8746, |
|
"eval_samples_per_second": 89.481, |
|
"eval_steps_per_second": 11.185, |
|
"step": 1075000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.797939144334789e-05, |
|
"loss": 2.2641, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.5054223706762402, |
|
"eval_loss": 2.28515625, |
|
"eval_runtime": 39.914, |
|
"eval_samples_per_second": 89.392, |
|
"eval_steps_per_second": 11.174, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.7904559082216145e-05, |
|
"loss": 2.2634, |
|
"step": 1085000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.5056552433053229, |
|
"eval_loss": 2.28515625, |
|
"eval_runtime": 39.8319, |
|
"eval_samples_per_second": 89.576, |
|
"eval_steps_per_second": 11.197, |
|
"step": 1085000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.7829741699539384e-05, |
|
"loss": 2.2597, |
|
"step": 1090000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.5057451047433689, |
|
"eval_loss": 2.283203125, |
|
"eval_runtime": 39.976, |
|
"eval_samples_per_second": 89.254, |
|
"eval_steps_per_second": 11.157, |
|
"step": 1090000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.775487938149766e-05, |
|
"loss": 2.2572, |
|
"step": 1095000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.5059667446926958, |
|
"eval_loss": 2.283203125, |
|
"eval_runtime": 40.0116, |
|
"eval_samples_per_second": 89.174, |
|
"eval_steps_per_second": 11.147, |
|
"step": 1095000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.768003204191092e-05, |
|
"loss": 2.2566, |
|
"step": 1100000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.5055771624826304, |
|
"eval_loss": 2.283203125, |
|
"eval_runtime": 39.963, |
|
"eval_samples_per_second": 89.283, |
|
"eval_steps_per_second": 11.16, |
|
"step": 1100000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.760524461614414e-05, |
|
"loss": 2.2576, |
|
"step": 1105000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.5055905869047775, |
|
"eval_loss": 2.283203125, |
|
"eval_runtime": 39.8683, |
|
"eval_samples_per_second": 89.495, |
|
"eval_steps_per_second": 11.187, |
|
"step": 1105000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.753038229810241e-05, |
|
"loss": 2.2612, |
|
"step": 1110000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.505675790890242, |
|
"eval_loss": 2.283203125, |
|
"eval_runtime": 39.9091, |
|
"eval_samples_per_second": 89.403, |
|
"eval_steps_per_second": 11.175, |
|
"step": 1110000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.7455534958515676e-05, |
|
"loss": 2.2585, |
|
"step": 1115000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.505924553651662, |
|
"eval_loss": 2.28125, |
|
"eval_runtime": 39.8428, |
|
"eval_samples_per_second": 89.552, |
|
"eval_steps_per_second": 11.194, |
|
"step": 1115000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.738073255429391e-05, |
|
"loss": 2.2528, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.5059566078841358, |
|
"eval_loss": 2.28125, |
|
"eval_runtime": 40.0018, |
|
"eval_samples_per_second": 89.196, |
|
"eval_steps_per_second": 11.149, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.730588521470717e-05, |
|
"loss": 2.2599, |
|
"step": 1125000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.5059996208285663, |
|
"eval_loss": 2.28125, |
|
"eval_runtime": 39.9769, |
|
"eval_samples_per_second": 89.251, |
|
"eval_steps_per_second": 11.156, |
|
"step": 1125000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.723105285357542e-05, |
|
"loss": 2.2556, |
|
"step": 1130000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.5065730354317075, |
|
"eval_loss": 2.27734375, |
|
"eval_runtime": 39.975, |
|
"eval_samples_per_second": 89.256, |
|
"eval_steps_per_second": 11.157, |
|
"step": 1130000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.715619053553369e-05, |
|
"loss": 2.2519, |
|
"step": 1135000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.5063790662300716, |
|
"eval_loss": 2.279296875, |
|
"eval_runtime": 40.0596, |
|
"eval_samples_per_second": 89.067, |
|
"eval_steps_per_second": 11.133, |
|
"step": 1135000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.708135817440194e-05, |
|
"loss": 2.2567, |
|
"step": 1140000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.5067524843400006, |
|
"eval_loss": 2.27734375, |
|
"eval_runtime": 40.0409, |
|
"eval_samples_per_second": 89.109, |
|
"eval_steps_per_second": 11.139, |
|
"step": 1140000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.700651083481521e-05, |
|
"loss": 2.2516, |
|
"step": 1145000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.506862071459569, |
|
"eval_loss": 2.275390625, |
|
"eval_runtime": 39.8652, |
|
"eval_samples_per_second": 89.502, |
|
"eval_steps_per_second": 11.188, |
|
"step": 1145000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.693166349522847e-05, |
|
"loss": 2.2533, |
|
"step": 1150000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.5067752236673111, |
|
"eval_loss": 2.275390625, |
|
"eval_runtime": 39.9361, |
|
"eval_samples_per_second": 89.343, |
|
"eval_steps_per_second": 11.168, |
|
"step": 1150000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.6856846112551706e-05, |
|
"loss": 2.2532, |
|
"step": 1155000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.5069639874807674, |
|
"eval_loss": 2.275390625, |
|
"eval_runtime": 40.0505, |
|
"eval_samples_per_second": 89.088, |
|
"eval_steps_per_second": 11.136, |
|
"step": 1155000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.6781998772964966e-05, |
|
"loss": 2.2572, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.5063629021299353, |
|
"eval_loss": 2.279296875, |
|
"eval_runtime": 40.0931, |
|
"eval_samples_per_second": 88.993, |
|
"eval_steps_per_second": 11.124, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.670712147646825e-05, |
|
"loss": 2.2514, |
|
"step": 1165000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.5071686414265613, |
|
"eval_loss": 2.2734375, |
|
"eval_runtime": 40.4455, |
|
"eval_samples_per_second": 88.217, |
|
"eval_steps_per_second": 11.027, |
|
"step": 1165000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.66322891153365e-05, |
|
"loss": 2.2471, |
|
"step": 1170000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.5073409671720824, |
|
"eval_loss": 2.2734375, |
|
"eval_runtime": 40.0562, |
|
"eval_samples_per_second": 89.075, |
|
"eval_steps_per_second": 11.134, |
|
"step": 1170000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.655745675420475e-05, |
|
"loss": 2.2524, |
|
"step": 1175000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.50760562006584, |
|
"eval_loss": 2.271484375, |
|
"eval_runtime": 40.09, |
|
"eval_samples_per_second": 89.0, |
|
"eval_steps_per_second": 11.125, |
|
"step": 1175000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.648260941461802e-05, |
|
"loss": 2.247, |
|
"step": 1180000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.5072979542276519, |
|
"eval_loss": 2.271484375, |
|
"eval_runtime": 40.3075, |
|
"eval_samples_per_second": 88.52, |
|
"eval_steps_per_second": 11.065, |
|
"step": 1180000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.640780701039625e-05, |
|
"loss": 2.2491, |
|
"step": 1185000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.5076653450460047, |
|
"eval_loss": 2.271484375, |
|
"eval_runtime": 40.1522, |
|
"eval_samples_per_second": 88.862, |
|
"eval_steps_per_second": 11.108, |
|
"step": 1185000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.63329746492645e-05, |
|
"loss": 2.2481, |
|
"step": 1190000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.5078031508488619, |
|
"eval_loss": 2.26953125, |
|
"eval_runtime": 40.1488, |
|
"eval_samples_per_second": 88.869, |
|
"eval_steps_per_second": 11.109, |
|
"step": 1190000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.992515266041327e-05, |
|
"loss": 2.2465, |
|
"step": 1195000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.5069333030872883, |
|
"eval_loss": 2.2734375, |
|
"eval_runtime": 39.5233, |
|
"eval_samples_per_second": 90.276, |
|
"eval_steps_per_second": 11.284, |
|
"step": 1195000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.985030532082652e-05, |
|
"loss": 2.2494, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.5067053618785863, |
|
"eval_loss": 2.279296875, |
|
"eval_runtime": 38.5856, |
|
"eval_samples_per_second": 92.47, |
|
"eval_steps_per_second": 11.559, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.977545798123979e-05, |
|
"loss": 2.2541, |
|
"step": 1205000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.5068631673307646, |
|
"eval_loss": 2.275390625, |
|
"eval_runtime": 39.5375, |
|
"eval_samples_per_second": 90.244, |
|
"eval_steps_per_second": 11.28, |
|
"step": 1205000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.9700625620108035e-05, |
|
"loss": 2.25, |
|
"step": 1210000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.5067459091128265, |
|
"eval_loss": 2.275390625, |
|
"eval_runtime": 39.4824, |
|
"eval_samples_per_second": 90.369, |
|
"eval_steps_per_second": 11.296, |
|
"step": 1210000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.962582321588627e-05, |
|
"loss": 2.25, |
|
"step": 1215000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.5064313940796654, |
|
"eval_loss": 2.279296875, |
|
"eval_runtime": 39.3855, |
|
"eval_samples_per_second": 90.592, |
|
"eval_steps_per_second": 11.324, |
|
"step": 1215000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.955096089784455e-05, |
|
"loss": 2.2508, |
|
"step": 1220000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.507028095945715, |
|
"eval_loss": 2.2734375, |
|
"eval_runtime": 39.5116, |
|
"eval_samples_per_second": 90.303, |
|
"eval_steps_per_second": 11.288, |
|
"step": 1220000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.947612853671279e-05, |
|
"loss": 2.2496, |
|
"step": 1225000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.507010562006584, |
|
"eval_loss": 2.2734375, |
|
"eval_runtime": 39.5165, |
|
"eval_samples_per_second": 90.291, |
|
"eval_steps_per_second": 11.286, |
|
"step": 1225000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.940131115403604e-05, |
|
"loss": 2.2499, |
|
"step": 1230000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.5073475423992566, |
|
"eval_loss": 2.2734375, |
|
"eval_runtime": 39.4273, |
|
"eval_samples_per_second": 90.496, |
|
"eval_steps_per_second": 11.312, |
|
"step": 1230000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.932647879290429e-05, |
|
"loss": 2.2467, |
|
"step": 1235000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.5075538401518439, |
|
"eval_loss": 2.271484375, |
|
"eval_runtime": 39.5247, |
|
"eval_samples_per_second": 90.273, |
|
"eval_steps_per_second": 11.284, |
|
"step": 1235000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.925164643177254e-05, |
|
"loss": 2.2497, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.5073163100701796, |
|
"eval_loss": 2.271484375, |
|
"eval_runtime": 38.6276, |
|
"eval_samples_per_second": 92.369, |
|
"eval_steps_per_second": 11.546, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.917678411373081e-05, |
|
"loss": 2.2463, |
|
"step": 1245000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.5073409671720824, |
|
"eval_loss": 2.271484375, |
|
"eval_runtime": 39.5689, |
|
"eval_samples_per_second": 90.172, |
|
"eval_steps_per_second": 11.271, |
|
"step": 1245000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.910196673105406e-05, |
|
"loss": 2.2479, |
|
"step": 1250000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.5077573982264421, |
|
"eval_loss": 2.26953125, |
|
"eval_runtime": 39.6608, |
|
"eval_samples_per_second": 89.963, |
|
"eval_steps_per_second": 11.245, |
|
"step": 1250000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.902711939146731e-05, |
|
"loss": 2.2445, |
|
"step": 1255000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.5078749304121791, |
|
"eval_loss": 2.26953125, |
|
"eval_runtime": 39.6577, |
|
"eval_samples_per_second": 89.97, |
|
"eval_steps_per_second": 11.246, |
|
"step": 1255000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.89522420949706e-05, |
|
"loss": 2.247, |
|
"step": 1260000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.5078483555356837, |
|
"eval_loss": 2.26953125, |
|
"eval_runtime": 39.6203, |
|
"eval_samples_per_second": 90.055, |
|
"eval_steps_per_second": 11.257, |
|
"step": 1260000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.887743969074883e-05, |
|
"loss": 2.2443, |
|
"step": 1265000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.5078826015105489, |
|
"eval_loss": 2.267578125, |
|
"eval_runtime": 39.6508, |
|
"eval_samples_per_second": 89.986, |
|
"eval_steps_per_second": 11.248, |
|
"step": 1265000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.880262230807207e-05, |
|
"loss": 2.243, |
|
"step": 1270000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.5080672558070215, |
|
"eval_loss": 2.267578125, |
|
"eval_runtime": 39.6639, |
|
"eval_samples_per_second": 89.956, |
|
"eval_steps_per_second": 11.244, |
|
"step": 1270000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.8727804925395315e-05, |
|
"loss": 2.2454, |
|
"step": 1275000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.5076889062767118, |
|
"eval_loss": 2.271484375, |
|
"eval_runtime": 39.6373, |
|
"eval_samples_per_second": 90.016, |
|
"eval_steps_per_second": 11.252, |
|
"step": 1275000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.86529126504436e-05, |
|
"loss": 2.2451, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.5080541053526733, |
|
"eval_loss": 2.26953125, |
|
"eval_runtime": 39.7457, |
|
"eval_samples_per_second": 89.771, |
|
"eval_steps_per_second": 11.221, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.8578080289311855e-05, |
|
"loss": 2.2455, |
|
"step": 1285000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.5083853324215685, |
|
"eval_loss": 2.265625, |
|
"eval_runtime": 39.6403, |
|
"eval_samples_per_second": 90.009, |
|
"eval_steps_per_second": 11.251, |
|
"step": 1285000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.850324792818011e-05, |
|
"loss": 2.241, |
|
"step": 1290000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.508259307234065, |
|
"eval_loss": 2.267578125, |
|
"eval_runtime": 40.1652, |
|
"eval_samples_per_second": 88.833, |
|
"eval_steps_per_second": 11.104, |
|
"step": 1290000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.8428415567048354e-05, |
|
"loss": 2.243, |
|
"step": 1295000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.5085801235266012, |
|
"eval_loss": 2.263671875, |
|
"eval_runtime": 39.68, |
|
"eval_samples_per_second": 89.919, |
|
"eval_steps_per_second": 11.24, |
|
"step": 1295000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.8353553249006635e-05, |
|
"loss": 2.2408, |
|
"step": 1300000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.5084371123355645, |
|
"eval_loss": 2.263671875, |
|
"eval_runtime": 39.9223, |
|
"eval_samples_per_second": 89.374, |
|
"eval_steps_per_second": 11.172, |
|
"step": 1300000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.827876582323985e-05, |
|
"loss": 2.2508, |
|
"step": 1305000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.5063459161264021, |
|
"eval_loss": 2.279296875, |
|
"eval_runtime": 39.7588, |
|
"eval_samples_per_second": 89.741, |
|
"eval_steps_per_second": 11.218, |
|
"step": 1305000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.820396341901809e-05, |
|
"loss": 2.252, |
|
"step": 1310000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.504651699257876, |
|
"eval_loss": 2.291015625, |
|
"eval_runtime": 39.8037, |
|
"eval_samples_per_second": 89.64, |
|
"eval_steps_per_second": 11.205, |
|
"step": 1310000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.8129146036341324e-05, |
|
"loss": 2.7482, |
|
"step": 1315000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.4505951128528157, |
|
"eval_loss": 2.646484375, |
|
"eval_runtime": 39.8831, |
|
"eval_samples_per_second": 89.462, |
|
"eval_steps_per_second": 11.183, |
|
"step": 1315000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.805426873984461e-05, |
|
"loss": 2.4189, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.5070354930762858, |
|
"eval_loss": 2.275390625, |
|
"eval_runtime": 39.9021, |
|
"eval_samples_per_second": 89.419, |
|
"eval_steps_per_second": 11.177, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.797939144334789e-05, |
|
"loss": 2.2446, |
|
"step": 1325000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.5081412271127301, |
|
"eval_loss": 2.267578125, |
|
"eval_runtime": 39.8679, |
|
"eval_samples_per_second": 89.495, |
|
"eval_steps_per_second": 11.187, |
|
"step": 1325000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.790457406067113e-05, |
|
"loss": 2.2416, |
|
"step": 1330000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.5086858750969846, |
|
"eval_loss": 2.263671875, |
|
"eval_runtime": 39.8423, |
|
"eval_samples_per_second": 89.553, |
|
"eval_steps_per_second": 11.194, |
|
"step": 1330000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.78297267210844e-05, |
|
"loss": 2.2421, |
|
"step": 1335000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.5087765584384274, |
|
"eval_loss": 2.26171875, |
|
"eval_runtime": 39.9585, |
|
"eval_samples_per_second": 89.293, |
|
"eval_steps_per_second": 11.162, |
|
"step": 1335000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.775487938149766e-05, |
|
"loss": 2.2367, |
|
"step": 1340000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.509168606358683, |
|
"eval_loss": 2.26171875, |
|
"eval_runtime": 39.8472, |
|
"eval_samples_per_second": 89.542, |
|
"eval_steps_per_second": 11.193, |
|
"step": 1340000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.768001706345593e-05, |
|
"loss": 2.2355, |
|
"step": 1345000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.5090864160190068, |
|
"eval_loss": 2.259765625, |
|
"eval_runtime": 39.8651, |
|
"eval_samples_per_second": 89.502, |
|
"eval_steps_per_second": 11.188, |
|
"step": 1345000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.760518470232418e-05, |
|
"loss": 2.2379, |
|
"step": 1350000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.5093981913741786, |
|
"eval_loss": 2.259765625, |
|
"eval_runtime": 39.8769, |
|
"eval_samples_per_second": 89.475, |
|
"eval_steps_per_second": 11.184, |
|
"step": 1350000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.7530337362737444e-05, |
|
"loss": 2.2365, |
|
"step": 1355000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.509393259953798, |
|
"eval_loss": 2.259765625, |
|
"eval_runtime": 39.8496, |
|
"eval_samples_per_second": 89.537, |
|
"eval_steps_per_second": 11.192, |
|
"step": 1355000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.745550500160569e-05, |
|
"loss": 2.2379, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.5091491546449597, |
|
"eval_loss": 2.2578125, |
|
"eval_runtime": 39.8235, |
|
"eval_samples_per_second": 89.595, |
|
"eval_steps_per_second": 11.199, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.738067264047394e-05, |
|
"loss": 2.235, |
|
"step": 1365000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.5094527109661638, |
|
"eval_loss": 2.2578125, |
|
"eval_runtime": 38.8651, |
|
"eval_samples_per_second": 91.805, |
|
"eval_steps_per_second": 11.476, |
|
"step": 1365000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.730582530088721e-05, |
|
"loss": 2.236, |
|
"step": 1370000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.5093398362330085, |
|
"eval_loss": 2.2578125, |
|
"eval_runtime": 40.9049, |
|
"eval_samples_per_second": 87.227, |
|
"eval_steps_per_second": 10.903, |
|
"step": 1370000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.7230992939755456e-05, |
|
"loss": 2.2344, |
|
"step": 1375000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.5095472298567916, |
|
"eval_loss": 2.2578125, |
|
"eval_runtime": 39.9487, |
|
"eval_samples_per_second": 89.314, |
|
"eval_steps_per_second": 11.164, |
|
"step": 1375000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.715614560016872e-05, |
|
"loss": 2.2348, |
|
"step": 1380000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.5095688733129063, |
|
"eval_loss": 2.255859375, |
|
"eval_runtime": 39.951, |
|
"eval_samples_per_second": 89.309, |
|
"eval_steps_per_second": 11.164, |
|
"step": 1380000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.7081313239036975e-05, |
|
"loss": 2.2306, |
|
"step": 1385000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.5097368155736447, |
|
"eval_loss": 2.255859375, |
|
"eval_runtime": 40.0156, |
|
"eval_samples_per_second": 89.165, |
|
"eval_steps_per_second": 11.146, |
|
"step": 1385000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.7006495856360214e-05, |
|
"loss": 2.2293, |
|
"step": 1390000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.5097521577703843, |
|
"eval_loss": 2.255859375, |
|
"eval_runtime": 40.0042, |
|
"eval_samples_per_second": 89.191, |
|
"eval_steps_per_second": 11.149, |
|
"step": 1390000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.693166349522847e-05, |
|
"loss": 2.2311, |
|
"step": 1395000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.510102562585204, |
|
"eval_loss": 2.25390625, |
|
"eval_runtime": 39.9554, |
|
"eval_samples_per_second": 89.3, |
|
"eval_steps_per_second": 11.162, |
|
"step": 1395000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.685683113409672e-05, |
|
"loss": 2.231, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.5101116035225683, |
|
"eval_loss": 2.25390625, |
|
"eval_runtime": 39.9882, |
|
"eval_samples_per_second": 89.226, |
|
"eval_steps_per_second": 11.153, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.678201375141996e-05, |
|
"loss": 2.2272, |
|
"step": 1405000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.5102126976403701, |
|
"eval_loss": 2.251953125, |
|
"eval_runtime": 40.3186, |
|
"eval_samples_per_second": 88.495, |
|
"eval_steps_per_second": 11.062, |
|
"step": 1405000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.670718139028821e-05, |
|
"loss": 2.2264, |
|
"step": 1410000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.5102436560016482, |
|
"eval_loss": 2.25390625, |
|
"eval_runtime": 40.0007, |
|
"eval_samples_per_second": 89.198, |
|
"eval_steps_per_second": 11.15, |
|
"step": 1410000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.6632349029156465e-05, |
|
"loss": 2.2295, |
|
"step": 1415000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.5104469401084474, |
|
"eval_loss": 2.251953125, |
|
"eval_runtime": 40.1353, |
|
"eval_samples_per_second": 88.899, |
|
"eval_steps_per_second": 11.112, |
|
"step": 1415000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.655753164647971e-05, |
|
"loss": 2.2281, |
|
"step": 1420000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.5103937903554567, |
|
"eval_loss": 2.251953125, |
|
"eval_runtime": 40.0088, |
|
"eval_samples_per_second": 89.18, |
|
"eval_steps_per_second": 11.148, |
|
"step": 1420000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.6482699285347956e-05, |
|
"loss": 2.2234, |
|
"step": 1425000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.5106672102187797, |
|
"eval_loss": 2.25, |
|
"eval_runtime": 40.1918, |
|
"eval_samples_per_second": 88.774, |
|
"eval_steps_per_second": 11.097, |
|
"step": 1425000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.64078819026712e-05, |
|
"loss": 2.2293, |
|
"step": 1430000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.5107220037785639, |
|
"eval_loss": 2.25, |
|
"eval_runtime": 40.1338, |
|
"eval_samples_per_second": 88.903, |
|
"eval_steps_per_second": 11.113, |
|
"step": 1430000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.633306451999444e-05, |
|
"loss": 2.2256, |
|
"step": 1435000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.5108652889373995, |
|
"eval_loss": 2.25, |
|
"eval_runtime": 40.1228, |
|
"eval_samples_per_second": 88.927, |
|
"eval_steps_per_second": 11.116, |
|
"step": 1435000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.6258232158862694e-05, |
|
"loss": 2.2247, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.5107954271486747, |
|
"eval_loss": 2.25, |
|
"eval_runtime": 40.0563, |
|
"eval_samples_per_second": 89.075, |
|
"eval_steps_per_second": 11.134, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.6183384819275954e-05, |
|
"loss": 2.222, |
|
"step": 1445000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.510766386561989, |
|
"eval_loss": 2.25, |
|
"eval_runtime": 40.2217, |
|
"eval_samples_per_second": 88.708, |
|
"eval_steps_per_second": 11.089, |
|
"step": 1445000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.61085674365992e-05, |
|
"loss": 2.2228, |
|
"step": 1450000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.5106184439505719, |
|
"eval_loss": 2.248046875, |
|
"eval_runtime": 40.5305, |
|
"eval_samples_per_second": 88.032, |
|
"eval_steps_per_second": 11.004, |
|
"step": 1450000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.603372009701246e-05, |
|
"loss": 2.2241, |
|
"step": 1455000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.5110554225898505, |
|
"eval_loss": 2.248046875, |
|
"eval_runtime": 40.2413, |
|
"eval_samples_per_second": 88.665, |
|
"eval_steps_per_second": 11.083, |
|
"step": 1455000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.595891769279069e-05, |
|
"loss": 2.2219, |
|
"step": 1460000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.511077613981563, |
|
"eval_loss": 2.24609375, |
|
"eval_runtime": 40.2762, |
|
"eval_samples_per_second": 88.588, |
|
"eval_steps_per_second": 11.074, |
|
"step": 1460000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.5884085331658944e-05, |
|
"loss": 2.2219, |
|
"step": 1465000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.5112833637985525, |
|
"eval_loss": 2.24609375, |
|
"eval_runtime": 40.2334, |
|
"eval_samples_per_second": 88.682, |
|
"eval_steps_per_second": 11.085, |
|
"step": 1465000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.58092529705272e-05, |
|
"loss": 2.2215, |
|
"step": 1470000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.5112595286000464, |
|
"eval_loss": 2.24609375, |
|
"eval_runtime": 40.2987, |
|
"eval_samples_per_second": 88.539, |
|
"eval_steps_per_second": 11.067, |
|
"step": 1470000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.573445056630543e-05, |
|
"loss": 2.2193, |
|
"step": 1475000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.5116091115114694, |
|
"eval_loss": 2.244140625, |
|
"eval_runtime": 40.1594, |
|
"eval_samples_per_second": 88.846, |
|
"eval_steps_per_second": 11.106, |
|
"step": 1475000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.56595882482637e-05, |
|
"loss": 2.2183, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.5114707577730144, |
|
"eval_loss": 2.244140625, |
|
"eval_runtime": 40.9626, |
|
"eval_samples_per_second": 87.104, |
|
"eval_steps_per_second": 10.888, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.5584755887131956e-05, |
|
"loss": 2.2177, |
|
"step": 1485000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.5116211660946219, |
|
"eval_loss": 2.244140625, |
|
"eval_runtime": 40.3714, |
|
"eval_samples_per_second": 88.379, |
|
"eval_steps_per_second": 11.047, |
|
"step": 1485000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.55099235260002e-05, |
|
"loss": 2.2211, |
|
"step": 1490000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.5115852763129632, |
|
"eval_loss": 2.2421875, |
|
"eval_runtime": 40.2564, |
|
"eval_samples_per_second": 88.632, |
|
"eval_steps_per_second": 11.079, |
|
"step": 1490000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.5435091164868455e-05, |
|
"loss": 2.2183, |
|
"step": 1495000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.5118184229098449, |
|
"eval_loss": 2.2421875, |
|
"eval_runtime": 40.2315, |
|
"eval_samples_per_second": 88.687, |
|
"eval_steps_per_second": 11.086, |
|
"step": 1495000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.5360243825281715e-05, |
|
"loss": 2.2182, |
|
"step": 1500000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.5120186933708559, |
|
"eval_loss": 2.240234375, |
|
"eval_runtime": 40.3125, |
|
"eval_samples_per_second": 88.509, |
|
"eval_steps_per_second": 11.064, |
|
"step": 1500000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.528539648569498e-05, |
|
"loss": 2.2148, |
|
"step": 1505000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.5121849918248009, |
|
"eval_loss": 2.240234375, |
|
"eval_runtime": 40.3172, |
|
"eval_samples_per_second": 88.498, |
|
"eval_steps_per_second": 11.062, |
|
"step": 1505000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.521059408147321e-05, |
|
"loss": 2.2217, |
|
"step": 1510000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.5122685520034718, |
|
"eval_loss": 2.240234375, |
|
"eval_runtime": 40.2766, |
|
"eval_samples_per_second": 88.587, |
|
"eval_steps_per_second": 11.073, |
|
"step": 1510000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.5135761720341466e-05, |
|
"loss": 2.2117, |
|
"step": 1515000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.5123701940568713, |
|
"eval_loss": 2.23828125, |
|
"eval_runtime": 40.4763, |
|
"eval_samples_per_second": 88.15, |
|
"eval_steps_per_second": 11.019, |
|
"step": 1515000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.5060944337664705e-05, |
|
"loss": 2.2152, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.5123003322681465, |
|
"eval_loss": 2.23828125, |
|
"eval_runtime": 40.309, |
|
"eval_samples_per_second": 88.516, |
|
"eval_steps_per_second": 11.065, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.4986096998077965e-05, |
|
"loss": 2.2148, |
|
"step": 1525000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.5124967671799727, |
|
"eval_loss": 2.23828125, |
|
"eval_runtime": 40.2971, |
|
"eval_samples_per_second": 88.542, |
|
"eval_steps_per_second": 11.068, |
|
"step": 1525000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.491127961540121e-05, |
|
"loss": 2.2151, |
|
"step": 1530000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.5127488175549798, |
|
"eval_loss": 2.236328125, |
|
"eval_runtime": 40.3675, |
|
"eval_samples_per_second": 88.388, |
|
"eval_steps_per_second": 11.049, |
|
"step": 1530000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.483646223272445e-05, |
|
"loss": 2.2129, |
|
"step": 1535000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.5127022430291633, |
|
"eval_loss": 2.236328125, |
|
"eval_runtime": 40.4605, |
|
"eval_samples_per_second": 88.185, |
|
"eval_steps_per_second": 11.023, |
|
"step": 1535000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.47616298715927e-05, |
|
"loss": 2.2145, |
|
"step": 1540000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.5127690911721, |
|
"eval_loss": 2.236328125, |
|
"eval_runtime": 40.376, |
|
"eval_samples_per_second": 88.369, |
|
"eval_steps_per_second": 11.046, |
|
"step": 1540000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.468681248891595e-05, |
|
"loss": 2.2099, |
|
"step": 1545000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.5128871712934349, |
|
"eval_loss": 2.236328125, |
|
"eval_runtime": 40.3585, |
|
"eval_samples_per_second": 88.408, |
|
"eval_steps_per_second": 11.051, |
|
"step": 1545000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.46119651493292e-05, |
|
"loss": 2.2125, |
|
"step": 1550000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.5131964809384164, |
|
"eval_loss": 2.234375, |
|
"eval_runtime": 40.3163, |
|
"eval_samples_per_second": 88.5, |
|
"eval_steps_per_second": 11.063, |
|
"step": 1550000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.453713278819746e-05, |
|
"loss": 2.2101, |
|
"step": 1555000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.5130504561015916, |
|
"eval_loss": 2.234375, |
|
"eval_runtime": 40.2993, |
|
"eval_samples_per_second": 88.537, |
|
"eval_steps_per_second": 11.067, |
|
"step": 1555000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.4462300427065714e-05, |
|
"loss": 2.211, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.5132159326521398, |
|
"eval_loss": 2.234375, |
|
"eval_runtime": 40.3465, |
|
"eval_samples_per_second": 88.434, |
|
"eval_steps_per_second": 11.054, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.438743810902398e-05, |
|
"loss": 2.2086, |
|
"step": 1565000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.5131567556075729, |
|
"eval_loss": 2.234375, |
|
"eval_runtime": 40.256, |
|
"eval_samples_per_second": 88.633, |
|
"eval_steps_per_second": 11.079, |
|
"step": 1565000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.4312605747892234e-05, |
|
"loss": 2.2137, |
|
"step": 1570000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.5131545638651815, |
|
"eval_loss": 2.232421875, |
|
"eval_runtime": 40.2935, |
|
"eval_samples_per_second": 88.55, |
|
"eval_steps_per_second": 11.069, |
|
"step": 1570000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.423778836521548e-05, |
|
"loss": 2.2122, |
|
"step": 1575000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.5134236002437218, |
|
"eval_loss": 2.232421875, |
|
"eval_runtime": 40.3698, |
|
"eval_samples_per_second": 88.383, |
|
"eval_steps_per_second": 11.048, |
|
"step": 1575000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.416297098253872e-05, |
|
"loss": 2.2053, |
|
"step": 1580000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.5133767517501063, |
|
"eval_loss": 2.232421875, |
|
"eval_runtime": 40.3058, |
|
"eval_samples_per_second": 88.523, |
|
"eval_steps_per_second": 11.065, |
|
"step": 1580000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.408813862140697e-05, |
|
"loss": 2.208, |
|
"step": 1585000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.513388258397661, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 40.3539, |
|
"eval_samples_per_second": 88.418, |
|
"eval_steps_per_second": 11.052, |
|
"step": 1585000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.401329128182023e-05, |
|
"loss": 2.2081, |
|
"step": 1590000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.5135512692380189, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 42.8555, |
|
"eval_samples_per_second": 83.257, |
|
"eval_steps_per_second": 10.407, |
|
"step": 1590000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.3938458920688484e-05, |
|
"loss": 2.2077, |
|
"step": 1595000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.5137540054092202, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 40.2657, |
|
"eval_samples_per_second": 88.611, |
|
"eval_steps_per_second": 11.076, |
|
"step": 1595000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.3863611581101744e-05, |
|
"loss": 2.2061, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.5135893507620688, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 41.6767, |
|
"eval_samples_per_second": 85.611, |
|
"eval_steps_per_second": 10.701, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.378880917687998e-05, |
|
"loss": 2.2055, |
|
"step": 1605000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.5138961946968601, |
|
"eval_loss": 2.228515625, |
|
"eval_runtime": 40.3609, |
|
"eval_samples_per_second": 88.402, |
|
"eval_steps_per_second": 11.05, |
|
"step": 1605000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.371397681574823e-05, |
|
"loss": 2.2065, |
|
"step": 1610000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.5138923591476752, |
|
"eval_loss": 2.228515625, |
|
"eval_runtime": 40.453, |
|
"eval_samples_per_second": 88.201, |
|
"eval_steps_per_second": 11.025, |
|
"step": 1610000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.363914445461648e-05, |
|
"loss": 2.2054, |
|
"step": 1615000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.5138997562782461, |
|
"eval_loss": 2.228515625, |
|
"eval_runtime": 41.6251, |
|
"eval_samples_per_second": 85.718, |
|
"eval_steps_per_second": 10.715, |
|
"step": 1615000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.356432707193973e-05, |
|
"loss": 2.2035, |
|
"step": 1620000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.5140274252725432, |
|
"eval_loss": 2.228515625, |
|
"eval_runtime": 40.4365, |
|
"eval_samples_per_second": 88.237, |
|
"eval_steps_per_second": 11.03, |
|
"step": 1620000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.348947973235298e-05, |
|
"loss": 2.2021, |
|
"step": 1625000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.5139844123281126, |
|
"eval_loss": 2.228515625, |
|
"eval_runtime": 40.3492, |
|
"eval_samples_per_second": 88.428, |
|
"eval_steps_per_second": 11.054, |
|
"step": 1625000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.3414662349676226e-05, |
|
"loss": 2.2036, |
|
"step": 1630000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.5138233192623471, |
|
"eval_loss": 2.228515625, |
|
"eval_runtime": 40.3662, |
|
"eval_samples_per_second": 88.391, |
|
"eval_steps_per_second": 11.049, |
|
"step": 1630000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.333981501008949e-05, |
|
"loss": 2.204, |
|
"step": 1635000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.5139803028111288, |
|
"eval_loss": 2.2265625, |
|
"eval_runtime": 40.2896, |
|
"eval_samples_per_second": 88.559, |
|
"eval_steps_per_second": 11.07, |
|
"step": 1635000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.3264967670502746e-05, |
|
"loss": 2.2042, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.5140860543815122, |
|
"eval_loss": 2.2265625, |
|
"eval_runtime": 42.4068, |
|
"eval_samples_per_second": 84.137, |
|
"eval_steps_per_second": 10.517, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.3190135309371006e-05, |
|
"loss": 2.2024, |
|
"step": 1645000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.514173450109368, |
|
"eval_loss": 2.2265625, |
|
"eval_runtime": 40.3018, |
|
"eval_samples_per_second": 88.532, |
|
"eval_steps_per_second": 11.067, |
|
"step": 1645000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.311530294823926e-05, |
|
"loss": 2.2023, |
|
"step": 1650000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.5144172814504074, |
|
"eval_loss": 2.2265625, |
|
"eval_runtime": 40.2694, |
|
"eval_samples_per_second": 88.603, |
|
"eval_steps_per_second": 11.075, |
|
"step": 1650000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.30404855655625e-05, |
|
"loss": 2.1976, |
|
"step": 1655000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.5145805662585642, |
|
"eval_loss": 2.224609375, |
|
"eval_runtime": 40.3729, |
|
"eval_samples_per_second": 88.376, |
|
"eval_steps_per_second": 11.047, |
|
"step": 1655000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.296565320443075e-05, |
|
"loss": 2.2028, |
|
"step": 1660000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.5147172761902257, |
|
"eval_loss": 2.224609375, |
|
"eval_runtime": 40.282, |
|
"eval_samples_per_second": 88.576, |
|
"eval_steps_per_second": 11.072, |
|
"step": 1660000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.289080586484401e-05, |
|
"loss": 2.1971, |
|
"step": 1665000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.5146457705947074, |
|
"eval_loss": 2.224609375, |
|
"eval_runtime": 40.4909, |
|
"eval_samples_per_second": 88.119, |
|
"eval_steps_per_second": 11.015, |
|
"step": 1665000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.281595852525727e-05, |
|
"loss": 2.1978, |
|
"step": 1670000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.5146065931994617, |
|
"eval_loss": 2.224609375, |
|
"eval_runtime": 40.3534, |
|
"eval_samples_per_second": 88.419, |
|
"eval_steps_per_second": 11.052, |
|
"step": 1670000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.27411561210355e-05, |
|
"loss": 2.1955, |
|
"step": 1675000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.5148249455352015, |
|
"eval_loss": 2.22265625, |
|
"eval_runtime": 39.3164, |
|
"eval_samples_per_second": 90.751, |
|
"eval_steps_per_second": 11.344, |
|
"step": 1675000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.266630878144877e-05, |
|
"loss": 2.1967, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.5146874137001434, |
|
"eval_loss": 2.22265625, |
|
"eval_runtime": 40.3287, |
|
"eval_samples_per_second": 88.473, |
|
"eval_steps_per_second": 11.059, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.259149139877201e-05, |
|
"loss": 2.1975, |
|
"step": 1685000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.5151745284466245, |
|
"eval_loss": 2.22265625, |
|
"eval_runtime": 40.2734, |
|
"eval_samples_per_second": 88.594, |
|
"eval_steps_per_second": 11.074, |
|
"step": 1685000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.251668899455024e-05, |
|
"loss": 2.1972, |
|
"step": 1690000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.5148622051558548, |
|
"eval_loss": 2.220703125, |
|
"eval_runtime": 40.2657, |
|
"eval_samples_per_second": 88.611, |
|
"eval_steps_per_second": 11.076, |
|
"step": 1690000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.2441841654963506e-05, |
|
"loss": 2.1967, |
|
"step": 1695000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.5150745302000184, |
|
"eval_loss": 2.220703125, |
|
"eval_runtime": 40.2491, |
|
"eval_samples_per_second": 88.648, |
|
"eval_steps_per_second": 11.081, |
|
"step": 1695000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.236699431537676e-05, |
|
"loss": 2.194, |
|
"step": 1700000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.5150706946508335, |
|
"eval_loss": 2.220703125, |
|
"eval_runtime": 40.3323, |
|
"eval_samples_per_second": 88.465, |
|
"eval_steps_per_second": 11.058, |
|
"step": 1700000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.9925167638868255e-05, |
|
"loss": 2.2009, |
|
"step": 1705000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.5139096191190072, |
|
"eval_loss": 2.228515625, |
|
"eval_runtime": 39.4304, |
|
"eval_samples_per_second": 90.489, |
|
"eval_steps_per_second": 11.311, |
|
"step": 1705000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.9850350256191494e-05, |
|
"loss": 2.2085, |
|
"step": 1710000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.5136279802217166, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 39.2325, |
|
"eval_samples_per_second": 90.945, |
|
"eval_steps_per_second": 11.368, |
|
"step": 1710000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.977551789505975e-05, |
|
"loss": 2.2077, |
|
"step": 1715000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.5136983899460393, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 39.2977, |
|
"eval_samples_per_second": 90.794, |
|
"eval_steps_per_second": 11.349, |
|
"step": 1715000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.9700670555473014e-05, |
|
"loss": 2.205, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.51339866917402, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 39.3375, |
|
"eval_samples_per_second": 90.702, |
|
"eval_steps_per_second": 11.338, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.962583819434126e-05, |
|
"loss": 2.2063, |
|
"step": 1725000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.5134490792490214, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 39.3134, |
|
"eval_samples_per_second": 90.758, |
|
"eval_steps_per_second": 11.345, |
|
"step": 1725000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.9551020811664506e-05, |
|
"loss": 2.2076, |
|
"step": 1730000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.5134718185763318, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 39.2618, |
|
"eval_samples_per_second": 90.877, |
|
"eval_steps_per_second": 11.36, |
|
"step": 1730000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.9476173472077765e-05, |
|
"loss": 2.2036, |
|
"step": 1735000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.5133134651885556, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 39.3642, |
|
"eval_samples_per_second": 90.641, |
|
"eval_steps_per_second": 11.33, |
|
"step": 1735000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.9401356089401005e-05, |
|
"loss": 2.2064, |
|
"step": 1740000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.5138024977096292, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 39.4343, |
|
"eval_samples_per_second": 90.48, |
|
"eval_steps_per_second": 11.31, |
|
"step": 1740000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.932650874981427e-05, |
|
"loss": 2.2053, |
|
"step": 1745000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.5136904448798706, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 39.4148, |
|
"eval_samples_per_second": 90.524, |
|
"eval_steps_per_second": 11.316, |
|
"step": 1745000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.9251676388682524e-05, |
|
"loss": 2.2048, |
|
"step": 1750000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.5138953727934633, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 39.3715, |
|
"eval_samples_per_second": 90.624, |
|
"eval_steps_per_second": 11.328, |
|
"step": 1750000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.917684402755077e-05, |
|
"loss": 2.2075, |
|
"step": 1755000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.5137868815450907, |
|
"eval_loss": 2.23046875, |
|
"eval_runtime": 39.4167, |
|
"eval_samples_per_second": 90.52, |
|
"eval_steps_per_second": 11.315, |
|
"step": 1755000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.910201166641902e-05, |
|
"loss": 2.2041, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.5136414046438638, |
|
"eval_loss": 2.228515625, |
|
"eval_runtime": 39.46, |
|
"eval_samples_per_second": 90.421, |
|
"eval_steps_per_second": 11.303, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.902716432683229e-05, |
|
"loss": 2.2057, |
|
"step": 1765000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.5138789347255281, |
|
"eval_loss": 2.228515625, |
|
"eval_runtime": 39.4616, |
|
"eval_samples_per_second": 90.417, |
|
"eval_steps_per_second": 11.302, |
|
"step": 1765000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.8952331965700536e-05, |
|
"loss": 2.2054, |
|
"step": 1770000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.5138929070832731, |
|
"eval_loss": 2.228515625, |
|
"eval_runtime": 39.5693, |
|
"eval_samples_per_second": 90.171, |
|
"eval_steps_per_second": 11.271, |
|
"step": 1770000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.887752956147877e-05, |
|
"loss": 2.2085, |
|
"step": 1775000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.5139266051225403, |
|
"eval_loss": 2.228515625, |
|
"eval_runtime": 39.4709, |
|
"eval_samples_per_second": 90.396, |
|
"eval_steps_per_second": 11.299, |
|
"step": 1775000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.880269720034703e-05, |
|
"loss": 2.2051, |
|
"step": 1780000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.5141471492006715, |
|
"eval_loss": 2.2265625, |
|
"eval_runtime": 39.4552, |
|
"eval_samples_per_second": 90.432, |
|
"eval_steps_per_second": 11.304, |
|
"step": 1780000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.872786483921527e-05, |
|
"loss": 2.2023, |
|
"step": 1785000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.5139211257665619, |
|
"eval_loss": 2.2265625, |
|
"eval_runtime": 39.5212, |
|
"eval_samples_per_second": 90.281, |
|
"eval_steps_per_second": 11.285, |
|
"step": 1785000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.8653032478083526e-05, |
|
"loss": 2.205, |
|
"step": 1790000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.5140561918914298, |
|
"eval_loss": 2.2265625, |
|
"eval_runtime": 39.4725, |
|
"eval_samples_per_second": 90.392, |
|
"eval_steps_per_second": 11.299, |
|
"step": 1790000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.857818513849679e-05, |
|
"loss": 2.2009, |
|
"step": 1795000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.5141463272972748, |
|
"eval_loss": 2.2265625, |
|
"eval_runtime": 39.6114, |
|
"eval_samples_per_second": 90.075, |
|
"eval_steps_per_second": 11.259, |
|
"step": 1795000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.850335277736504e-05, |
|
"loss": 2.1998, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.5143134476546165, |
|
"eval_loss": 2.2265625, |
|
"eval_runtime": 39.8498, |
|
"eval_samples_per_second": 89.536, |
|
"eval_steps_per_second": 11.192, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.8428505437778305e-05, |
|
"loss": 2.2009, |
|
"step": 1805000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.5143608440838298, |
|
"eval_loss": 2.224609375, |
|
"eval_runtime": 38.3896, |
|
"eval_samples_per_second": 92.942, |
|
"eval_steps_per_second": 11.618, |
|
"step": 1805000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.8353688055101544e-05, |
|
"loss": 2.2027, |
|
"step": 1810000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.5143013930714639, |
|
"eval_loss": 2.2265625, |
|
"eval_runtime": 39.6231, |
|
"eval_samples_per_second": 90.048, |
|
"eval_steps_per_second": 11.256, |
|
"step": 1810000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.8278840715514804e-05, |
|
"loss": 2.2007, |
|
"step": 1815000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.5145857716467437, |
|
"eval_loss": 2.224609375, |
|
"eval_runtime": 39.6832, |
|
"eval_samples_per_second": 89.912, |
|
"eval_steps_per_second": 11.239, |
|
"step": 1815000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.820402333283805e-05, |
|
"loss": 2.1978, |
|
"step": 1820000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.5144972800476924, |
|
"eval_loss": 2.224609375, |
|
"eval_runtime": 39.721, |
|
"eval_samples_per_second": 89.827, |
|
"eval_steps_per_second": 11.228, |
|
"step": 1820000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.812920595016129e-05, |
|
"loss": 2.1999, |
|
"step": 1825000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.5145970043264995, |
|
"eval_loss": 2.22265625, |
|
"eval_runtime": 39.6521, |
|
"eval_samples_per_second": 89.983, |
|
"eval_steps_per_second": 11.248, |
|
"step": 1825000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.8054388567484535e-05, |
|
"loss": 2.1978, |
|
"step": 1830000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.5148150826944404, |
|
"eval_loss": 2.22265625, |
|
"eval_runtime": 39.5596, |
|
"eval_samples_per_second": 90.193, |
|
"eval_steps_per_second": 11.274, |
|
"step": 1830000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.79795262494428e-05, |
|
"loss": 2.1989, |
|
"step": 1835000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.5147271390309869, |
|
"eval_loss": 2.22265625, |
|
"eval_runtime": 39.6103, |
|
"eval_samples_per_second": 90.077, |
|
"eval_steps_per_second": 11.26, |
|
"step": 1835000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.790467890985607e-05, |
|
"loss": 2.1989, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.5147980966909074, |
|
"eval_loss": 2.22265625, |
|
"eval_runtime": 39.6511, |
|
"eval_samples_per_second": 89.985, |
|
"eval_steps_per_second": 11.248, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.7829876505634294e-05, |
|
"loss": 2.1982, |
|
"step": 1845000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.5149764497280047, |
|
"eval_loss": 2.220703125, |
|
"eval_runtime": 39.6684, |
|
"eval_samples_per_second": 89.946, |
|
"eval_steps_per_second": 11.243, |
|
"step": 1845000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.775501418759257e-05, |
|
"loss": 2.1974, |
|
"step": 1850000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.515062201649067, |
|
"eval_loss": 2.220703125, |
|
"eval_runtime": 39.5826, |
|
"eval_samples_per_second": 90.141, |
|
"eval_steps_per_second": 11.268, |
|
"step": 1850000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.768016684800583e-05, |
|
"loss": 2.1972, |
|
"step": 1855000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.5151389126327648, |
|
"eval_loss": 2.220703125, |
|
"eval_runtime": 39.774, |
|
"eval_samples_per_second": 89.707, |
|
"eval_steps_per_second": 11.213, |
|
"step": 1855000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.760533448687408e-05, |
|
"loss": 2.1966, |
|
"step": 1860000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.515106858400291, |
|
"eval_loss": 2.220703125, |
|
"eval_runtime": 39.7218, |
|
"eval_samples_per_second": 89.825, |
|
"eval_steps_per_second": 11.228, |
|
"step": 1860000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.753050212574233e-05, |
|
"loss": 2.198, |
|
"step": 1865000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.5150282296420008, |
|
"eval_loss": 2.220703125, |
|
"eval_runtime": 39.6783, |
|
"eval_samples_per_second": 89.923, |
|
"eval_steps_per_second": 11.24, |
|
"step": 1865000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.7455669764610586e-05, |
|
"loss": 2.1978, |
|
"step": 1870000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.5151887747721684, |
|
"eval_loss": 2.220703125, |
|
"eval_runtime": 39.753, |
|
"eval_samples_per_second": 89.754, |
|
"eval_steps_per_second": 11.219, |
|
"step": 1870000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.738083740347884e-05, |
|
"loss": 2.1938, |
|
"step": 1875000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.5152000074519242, |
|
"eval_loss": 2.220703125, |
|
"eval_runtime": 39.7549, |
|
"eval_samples_per_second": 89.75, |
|
"eval_steps_per_second": 11.219, |
|
"step": 1875000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.730600504234709e-05, |
|
"loss": 2.1908, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.5152191851978486, |
|
"eval_loss": 2.21875, |
|
"eval_runtime": 39.6445, |
|
"eval_samples_per_second": 90.0, |
|
"eval_steps_per_second": 11.25, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.723118765967034e-05, |
|
"loss": 2.1899, |
|
"step": 1885000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.5151602821210807, |
|
"eval_loss": 2.21875, |
|
"eval_runtime": 39.7932, |
|
"eval_samples_per_second": 89.664, |
|
"eval_steps_per_second": 11.208, |
|
"step": 1885000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.715634032008359e-05, |
|
"loss": 2.1938, |
|
"step": 1890000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.5151682271872493, |
|
"eval_loss": 2.21875, |
|
"eval_runtime": 39.7818, |
|
"eval_samples_per_second": 89.689, |
|
"eval_steps_per_second": 11.211, |
|
"step": 1890000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.708150795895185e-05, |
|
"loss": 2.1909, |
|
"step": 1895000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.5153520595803252, |
|
"eval_loss": 2.21875, |
|
"eval_runtime": 39.8181, |
|
"eval_samples_per_second": 89.607, |
|
"eval_steps_per_second": 11.201, |
|
"step": 1895000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.700669057627509e-05, |
|
"loss": 2.1921, |
|
"step": 1900000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.5155320564242162, |
|
"eval_loss": 2.21875, |
|
"eval_runtime": 40.2538, |
|
"eval_samples_per_second": 88.638, |
|
"eval_steps_per_second": 11.08, |
|
"step": 1900000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.693187319359833e-05, |
|
"loss": 2.1926, |
|
"step": 1905000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.5155928772755766, |
|
"eval_loss": 2.216796875, |
|
"eval_runtime": 39.9117, |
|
"eval_samples_per_second": 89.397, |
|
"eval_steps_per_second": 11.175, |
|
"step": 1905000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.685704083246658e-05, |
|
"loss": 2.194, |
|
"step": 1910000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.5154164420130716, |
|
"eval_loss": 2.216796875, |
|
"eval_runtime": 39.9039, |
|
"eval_samples_per_second": 89.415, |
|
"eval_steps_per_second": 11.177, |
|
"step": 1910000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.992518261732325e-05, |
|
"loss": 2.1942, |
|
"step": 1915000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.5151652135414612, |
|
"eval_loss": 2.21875, |
|
"eval_runtime": 39.5459, |
|
"eval_samples_per_second": 90.224, |
|
"eval_steps_per_second": 11.278, |
|
"step": 1915000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.985036523464649e-05, |
|
"loss": 2.1947, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.5150559003896918, |
|
"eval_loss": 2.21875, |
|
"eval_runtime": 39.6198, |
|
"eval_samples_per_second": 90.056, |
|
"eval_steps_per_second": 11.257, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.977553287351474e-05, |
|
"loss": 2.1941, |
|
"step": 1925000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.5150868587509698, |
|
"eval_loss": 2.220703125, |
|
"eval_runtime": 39.5078, |
|
"eval_samples_per_second": 90.311, |
|
"eval_steps_per_second": 11.289, |
|
"step": 1925000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.970071549083798e-05, |
|
"loss": 2.1984, |
|
"step": 1930000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.5151849392229835, |
|
"eval_loss": 2.220703125, |
|
"eval_runtime": 39.6089, |
|
"eval_samples_per_second": 90.081, |
|
"eval_steps_per_second": 11.26, |
|
"step": 1930000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.9625868151251246e-05, |
|
"loss": 2.1929, |
|
"step": 1935000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.5150846670085785, |
|
"eval_loss": 2.220703125, |
|
"eval_runtime": 39.574, |
|
"eval_samples_per_second": 90.16, |
|
"eval_steps_per_second": 11.27, |
|
"step": 1935000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.955106574702947e-05, |
|
"loss": 2.1921, |
|
"step": 1940000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.515442742921768, |
|
"eval_loss": 2.21875, |
|
"eval_runtime": 39.6485, |
|
"eval_samples_per_second": 89.991, |
|
"eval_steps_per_second": 11.249, |
|
"step": 1940000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.947621840744274e-05, |
|
"loss": 2.1932, |
|
"step": 1945000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.5153391830937759, |
|
"eval_loss": 2.21875, |
|
"eval_runtime": 39.6644, |
|
"eval_samples_per_second": 89.955, |
|
"eval_steps_per_second": 11.244, |
|
"step": 1945000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.940138604631099e-05, |
|
"loss": 2.1959, |
|
"step": 1950000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.5154117845604899, |
|
"eval_loss": 2.21875, |
|
"eval_runtime": 39.6108, |
|
"eval_samples_per_second": 90.077, |
|
"eval_steps_per_second": 11.26, |
|
"step": 1950000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.932656866363423e-05, |
|
"loss": 2.1927, |
|
"step": 1955000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.5153761687466302, |
|
"eval_loss": 2.21875, |
|
"eval_runtime": 39.6613, |
|
"eval_samples_per_second": 89.962, |
|
"eval_steps_per_second": 11.245, |
|
"step": 1955000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.9251751280957475e-05, |
|
"loss": 2.1949, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.5154917831577748, |
|
"eval_loss": 2.21875, |
|
"eval_runtime": 39.8742, |
|
"eval_samples_per_second": 89.481, |
|
"eval_steps_per_second": 11.185, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.9176933898280714e-05, |
|
"loss": 2.1918, |
|
"step": 1965000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.5153786344568205, |
|
"eval_loss": 2.216796875, |
|
"eval_runtime": 39.9956, |
|
"eval_samples_per_second": 89.21, |
|
"eval_steps_per_second": 11.151, |
|
"step": 1965000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.910210153714897e-05, |
|
"loss": 2.1957, |
|
"step": 1970000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.5154745231864427, |
|
"eval_loss": 2.216796875, |
|
"eval_runtime": 39.6981, |
|
"eval_samples_per_second": 89.878, |
|
"eval_steps_per_second": 11.235, |
|
"step": 1970000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.902726917601722e-05, |
|
"loss": 2.1884, |
|
"step": 1975000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.51571890246308, |
|
"eval_loss": 2.216796875, |
|
"eval_runtime": 39.9761, |
|
"eval_samples_per_second": 89.253, |
|
"eval_steps_per_second": 11.157, |
|
"step": 1975000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.895245179334046e-05, |
|
"loss": 2.1942, |
|
"step": 1980000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.5156073975689194, |
|
"eval_loss": 2.21484375, |
|
"eval_runtime": 39.9647, |
|
"eval_samples_per_second": 89.279, |
|
"eval_steps_per_second": 11.16, |
|
"step": 1980000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.8877604453753726e-05, |
|
"loss": 2.1938, |
|
"step": 1985000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.5155802747568262, |
|
"eval_loss": 2.216796875, |
|
"eval_runtime": 39.7886, |
|
"eval_samples_per_second": 89.674, |
|
"eval_steps_per_second": 11.209, |
|
"step": 1985000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.8802787071076965e-05, |
|
"loss": 2.1935, |
|
"step": 1990000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.5160071165875447, |
|
"eval_loss": 2.21484375, |
|
"eval_runtime": 40.1621, |
|
"eval_samples_per_second": 88.84, |
|
"eval_steps_per_second": 11.105, |
|
"step": 1990000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.872795470994522e-05, |
|
"loss": 2.1902, |
|
"step": 1995000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.5157413678225916, |
|
"eval_loss": 2.21484375, |
|
"eval_runtime": 39.8406, |
|
"eval_samples_per_second": 89.557, |
|
"eval_steps_per_second": 11.195, |
|
"step": 1995000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.865310737035848e-05, |
|
"loss": 2.188, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.5158473933607739, |
|
"eval_loss": 2.21484375, |
|
"eval_runtime": 39.7424, |
|
"eval_samples_per_second": 89.778, |
|
"eval_steps_per_second": 11.222, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.8578289987681716e-05, |
|
"loss": 2.1862, |
|
"step": 2005000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.5159235564088739, |
|
"eval_loss": 2.212890625, |
|
"eval_runtime": 39.7499, |
|
"eval_samples_per_second": 89.761, |
|
"eval_steps_per_second": 11.22, |
|
"step": 2005000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.850347260500496e-05, |
|
"loss": 2.1886, |
|
"step": 2010000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.5160695812456987, |
|
"eval_loss": 2.212890625, |
|
"eval_runtime": 40.1441, |
|
"eval_samples_per_second": 88.88, |
|
"eval_steps_per_second": 11.11, |
|
"step": 2010000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.8428610286963236e-05, |
|
"loss": 2.1811, |
|
"step": 2015000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.516141360809016, |
|
"eval_loss": 2.212890625, |
|
"eval_runtime": 40.0164, |
|
"eval_samples_per_second": 89.163, |
|
"eval_steps_per_second": 11.145, |
|
"step": 2015000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.835377792583148e-05, |
|
"loss": 2.19, |
|
"step": 2020000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.5160435543048012, |
|
"eval_loss": 2.212890625, |
|
"eval_runtime": 39.7821, |
|
"eval_samples_per_second": 89.689, |
|
"eval_steps_per_second": 11.211, |
|
"step": 2020000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.827893058624475e-05, |
|
"loss": 2.1895, |
|
"step": 2025000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.5164950532374227, |
|
"eval_loss": 2.212890625, |
|
"eval_runtime": 39.7945, |
|
"eval_samples_per_second": 89.661, |
|
"eval_steps_per_second": 11.208, |
|
"step": 2025000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.820411320356799e-05, |
|
"loss": 2.1904, |
|
"step": 2030000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.516082457732248, |
|
"eval_loss": 2.212890625, |
|
"eval_runtime": 39.8538, |
|
"eval_samples_per_second": 89.527, |
|
"eval_steps_per_second": 11.191, |
|
"step": 2030000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.812928084243624e-05, |
|
"loss": 2.1854, |
|
"step": 2035000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.516504368142586, |
|
"eval_loss": 2.212890625, |
|
"eval_runtime": 39.7999, |
|
"eval_samples_per_second": 89.649, |
|
"eval_steps_per_second": 11.206, |
|
"step": 2035000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.805447843821447e-05, |
|
"loss": 2.1883, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.51652875127669, |
|
"eval_loss": 2.2109375, |
|
"eval_runtime": 39.8414, |
|
"eval_samples_per_second": 89.555, |
|
"eval_steps_per_second": 11.194, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.7979646077082725e-05, |
|
"loss": 2.1859, |
|
"step": 2045000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.5165314909546791, |
|
"eval_loss": 2.2109375, |
|
"eval_runtime": 39.9091, |
|
"eval_samples_per_second": 89.403, |
|
"eval_steps_per_second": 11.175, |
|
"step": 2045000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.790481371595098e-05, |
|
"loss": 2.1849, |
|
"step": 2050000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.5167750483279198, |
|
"eval_loss": 2.208984375, |
|
"eval_runtime": 39.8945, |
|
"eval_samples_per_second": 89.436, |
|
"eval_steps_per_second": 11.179, |
|
"step": 2050000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.782996637636424e-05, |
|
"loss": 2.1844, |
|
"step": 2055000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.5167309395122934, |
|
"eval_loss": 2.2109375, |
|
"eval_runtime": 39.9417, |
|
"eval_samples_per_second": 89.33, |
|
"eval_steps_per_second": 11.166, |
|
"step": 2055000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.7755148993687484e-05, |
|
"loss": 2.1866, |
|
"step": 2060000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.5166953236984337, |
|
"eval_loss": 2.208984375, |
|
"eval_runtime": 39.9499, |
|
"eval_samples_per_second": 89.312, |
|
"eval_steps_per_second": 11.164, |
|
"step": 2060000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.768031663255573e-05, |
|
"loss": 2.1865, |
|
"step": 2065000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.5167821714906917, |
|
"eval_loss": 2.208984375, |
|
"eval_runtime": 39.8531, |
|
"eval_samples_per_second": 89.529, |
|
"eval_steps_per_second": 11.191, |
|
"step": 2065000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.7605499249878976e-05, |
|
"loss": 2.1846, |
|
"step": 2070000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.5171333982089081, |
|
"eval_loss": 2.20703125, |
|
"eval_runtime": 39.9613, |
|
"eval_samples_per_second": 89.286, |
|
"eval_steps_per_second": 11.161, |
|
"step": 2070000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.753066688874723e-05, |
|
"loss": 2.1821, |
|
"step": 2075000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.5169824419517027, |
|
"eval_loss": 2.20703125, |
|
"eval_runtime": 39.9564, |
|
"eval_samples_per_second": 89.297, |
|
"eval_steps_per_second": 11.162, |
|
"step": 2075000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.7455849506070474e-05, |
|
"loss": 2.184, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.5170257288639323, |
|
"eval_loss": 2.20703125, |
|
"eval_runtime": 41.0867, |
|
"eval_samples_per_second": 86.841, |
|
"eval_steps_per_second": 10.855, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.738101714493872e-05, |
|
"loss": 2.1847, |
|
"step": 2085000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.5173227099579624, |
|
"eval_loss": 2.205078125, |
|
"eval_runtime": 39.9314, |
|
"eval_samples_per_second": 89.353, |
|
"eval_steps_per_second": 11.169, |
|
"step": 2085000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.730618478380697e-05, |
|
"loss": 2.1836, |
|
"step": 2090000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.5173509286412512, |
|
"eval_loss": 2.205078125, |
|
"eval_runtime": 40.0844, |
|
"eval_samples_per_second": 89.012, |
|
"eval_steps_per_second": 11.127, |
|
"step": 2090000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.723136740113022e-05, |
|
"loss": 2.1791, |
|
"step": 2095000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.5174243520113619, |
|
"eval_loss": 2.205078125, |
|
"eval_runtime": 39.9803, |
|
"eval_samples_per_second": 89.244, |
|
"eval_steps_per_second": 11.156, |
|
"step": 2095000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.715652006154348e-05, |
|
"loss": 2.1812, |
|
"step": 2100000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.5173136690205979, |
|
"eval_loss": 2.205078125, |
|
"eval_runtime": 40.0609, |
|
"eval_samples_per_second": 89.064, |
|
"eval_steps_per_second": 11.133, |
|
"step": 2100000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.708168770041173e-05, |
|
"loss": 2.1835, |
|
"step": 2105000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.5175572263938386, |
|
"eval_loss": 2.205078125, |
|
"eval_runtime": 40.0398, |
|
"eval_samples_per_second": 89.111, |
|
"eval_steps_per_second": 11.139, |
|
"step": 2105000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.700687031773498e-05, |
|
"loss": 2.1806, |
|
"step": 2110000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.517552294973458, |
|
"eval_loss": 2.205078125, |
|
"eval_runtime": 40.3766, |
|
"eval_samples_per_second": 88.368, |
|
"eval_steps_per_second": 11.046, |
|
"step": 2110000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.6932037956603224e-05, |
|
"loss": 2.1832, |
|
"step": 2115000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_accuracy": 0.5174777757321516, |
|
"eval_loss": 2.205078125, |
|
"eval_runtime": 41.019, |
|
"eval_samples_per_second": 86.984, |
|
"eval_steps_per_second": 10.873, |
|
"step": 2115000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6857205595471476e-05, |
|
"loss": 2.1766, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.5177577708226486, |
|
"eval_loss": 2.203125, |
|
"eval_runtime": 40.432, |
|
"eval_samples_per_second": 88.247, |
|
"eval_steps_per_second": 11.031, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.678237323433973e-05, |
|
"loss": 2.1775, |
|
"step": 2125000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.5178353037097432, |
|
"eval_loss": 2.203125, |
|
"eval_runtime": 41.1107, |
|
"eval_samples_per_second": 86.79, |
|
"eval_steps_per_second": 10.849, |
|
"step": 2125000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6707540873207975e-05, |
|
"loss": 2.1801, |
|
"step": 2130000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.5176514713166673, |
|
"eval_loss": 2.203125, |
|
"eval_runtime": 41.0472, |
|
"eval_samples_per_second": 86.924, |
|
"eval_steps_per_second": 10.866, |
|
"step": 2130000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6632708512076235e-05, |
|
"loss": 2.1789, |
|
"step": 2135000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.5177799622143612, |
|
"eval_loss": 2.203125, |
|
"eval_runtime": 41.4469, |
|
"eval_samples_per_second": 86.086, |
|
"eval_steps_per_second": 10.761, |
|
"step": 2135000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.655787615094449e-05, |
|
"loss": 2.1794, |
|
"step": 2140000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.5178131123180306, |
|
"eval_loss": 2.203125, |
|
"eval_runtime": 40.4764, |
|
"eval_samples_per_second": 88.15, |
|
"eval_steps_per_second": 11.019, |
|
"step": 2140000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6483043789812734e-05, |
|
"loss": 2.1799, |
|
"step": 2145000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.5178917410763209, |
|
"eval_loss": 2.201171875, |
|
"eval_runtime": 40.376, |
|
"eval_samples_per_second": 88.369, |
|
"eval_steps_per_second": 11.046, |
|
"step": 2145000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.6408196450226e-05, |
|
"loss": 2.1746, |
|
"step": 2150000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.5180391357521402, |
|
"eval_loss": 2.201171875, |
|
"eval_runtime": 40.5387, |
|
"eval_samples_per_second": 88.015, |
|
"eval_steps_per_second": 11.002, |
|
"step": 2150000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.633336408909425e-05, |
|
"loss": 2.1766, |
|
"step": 2155000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.5178873575915381, |
|
"eval_loss": 2.201171875, |
|
"eval_runtime": 40.514, |
|
"eval_samples_per_second": 88.068, |
|
"eval_steps_per_second": 11.009, |
|
"step": 2155000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.62585317279625e-05, |
|
"loss": 2.1754, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.5177369492699306, |
|
"eval_loss": 2.201171875, |
|
"eval_runtime": 42.6971, |
|
"eval_samples_per_second": 83.565, |
|
"eval_steps_per_second": 10.446, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.618369936683075e-05, |
|
"loss": 2.1764, |
|
"step": 2165000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.5177405108513166, |
|
"eval_loss": 2.201171875, |
|
"eval_runtime": 40.5808, |
|
"eval_samples_per_second": 87.923, |
|
"eval_steps_per_second": 10.99, |
|
"step": 2165000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.6108881984154e-05, |
|
"loss": 2.1745, |
|
"step": 2170000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.51831337751886, |
|
"eval_loss": 2.19921875, |
|
"eval_runtime": 42.1403, |
|
"eval_samples_per_second": 84.67, |
|
"eval_steps_per_second": 10.584, |
|
"step": 2170000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.6034049623022244e-05, |
|
"loss": 2.1735, |
|
"step": 2175000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.5180128348434438, |
|
"eval_loss": 2.19921875, |
|
"eval_runtime": 40.2519, |
|
"eval_samples_per_second": 88.642, |
|
"eval_steps_per_second": 11.08, |
|
"step": 2175000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.59592172618905e-05, |
|
"loss": 2.1778, |
|
"step": 2180000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.5180588614336625, |
|
"eval_loss": 2.19921875, |
|
"eval_runtime": 42.4462, |
|
"eval_samples_per_second": 84.059, |
|
"eval_steps_per_second": 10.507, |
|
"step": 2180000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.5884384900758757e-05, |
|
"loss": 2.1717, |
|
"step": 2185000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.5183484453971218, |
|
"eval_loss": 2.19921875, |
|
"eval_runtime": 41.2843, |
|
"eval_samples_per_second": 86.425, |
|
"eval_steps_per_second": 10.803, |
|
"step": 2185000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.580953756117201e-05, |
|
"loss": 2.1752, |
|
"step": 2190000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.518520497174844, |
|
"eval_loss": 2.197265625, |
|
"eval_runtime": 40.4537, |
|
"eval_samples_per_second": 88.2, |
|
"eval_steps_per_second": 11.025, |
|
"step": 2190000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.5734720178495255e-05, |
|
"loss": 2.1747, |
|
"step": 2195000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.5184799499406038, |
|
"eval_loss": 2.197265625, |
|
"eval_runtime": 43.6458, |
|
"eval_samples_per_second": 81.749, |
|
"eval_steps_per_second": 10.219, |
|
"step": 2195000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.56599027958185e-05, |
|
"loss": 2.1754, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.5186040573535149, |
|
"eval_loss": 2.197265625, |
|
"eval_runtime": 41.0029, |
|
"eval_samples_per_second": 87.018, |
|
"eval_steps_per_second": 10.877, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.558508541314174e-05, |
|
"loss": 2.1728, |
|
"step": 2205000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.5187728215176501, |
|
"eval_loss": 2.197265625, |
|
"eval_runtime": 40.4446, |
|
"eval_samples_per_second": 88.22, |
|
"eval_steps_per_second": 11.027, |
|
"step": 2205000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.551025305200999e-05, |
|
"loss": 2.1684, |
|
"step": 2210000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.5185736469278347, |
|
"eval_loss": 2.197265625, |
|
"eval_runtime": 40.6605, |
|
"eval_samples_per_second": 87.751, |
|
"eval_steps_per_second": 10.969, |
|
"step": 2210000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.543540571242325e-05, |
|
"loss": 2.1722, |
|
"step": 2215000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.5188199439790645, |
|
"eval_loss": 2.1953125, |
|
"eval_runtime": 40.5747, |
|
"eval_samples_per_second": 87.937, |
|
"eval_steps_per_second": 10.992, |
|
"step": 2215000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.536058832974649e-05, |
|
"loss": 2.1692, |
|
"step": 2220000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.519004872243336, |
|
"eval_loss": 2.1953125, |
|
"eval_runtime": 41.1424, |
|
"eval_samples_per_second": 86.723, |
|
"eval_steps_per_second": 10.84, |
|
"step": 2220000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.528577094706974e-05, |
|
"loss": 2.176, |
|
"step": 2225000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.519122404429073, |
|
"eval_loss": 2.1953125, |
|
"eval_runtime": 42.7268, |
|
"eval_samples_per_second": 83.507, |
|
"eval_steps_per_second": 10.438, |
|
"step": 2225000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.5210923607483e-05, |
|
"loss": 2.1697, |
|
"step": 2230000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.5190202144400756, |
|
"eval_loss": 2.1953125, |
|
"eval_runtime": 40.4249, |
|
"eval_samples_per_second": 88.262, |
|
"eval_steps_per_second": 11.033, |
|
"step": 2230000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.5136106224806244e-05, |
|
"loss": 2.1731, |
|
"step": 2235000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.5190837749694251, |
|
"eval_loss": 2.1953125, |
|
"eval_runtime": 42.0959, |
|
"eval_samples_per_second": 84.759, |
|
"eval_steps_per_second": 10.595, |
|
"step": 2235000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.506124390676451e-05, |
|
"loss": 2.173, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.5191125415883119, |
|
"eval_loss": 2.193359375, |
|
"eval_runtime": 40.9494, |
|
"eval_samples_per_second": 87.132, |
|
"eval_steps_per_second": 10.892, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.498644150254274e-05, |
|
"loss": 2.1714, |
|
"step": 2245000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.5192900727220126, |
|
"eval_loss": 2.193359375, |
|
"eval_runtime": 40.4515, |
|
"eval_samples_per_second": 88.204, |
|
"eval_steps_per_second": 11.026, |
|
"step": 2245000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 4.4911594162956e-05, |
|
"loss": 2.1719, |
|
"step": 2250000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.5192147315773094, |
|
"eval_loss": 2.193359375, |
|
"eval_runtime": 40.7422, |
|
"eval_samples_per_second": 87.575, |
|
"eval_steps_per_second": 10.947, |
|
"step": 2250000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.483674682336927e-05, |
|
"loss": 2.1667, |
|
"step": 2255000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.5189793932380363, |
|
"eval_loss": 2.193359375, |
|
"eval_runtime": 40.4467, |
|
"eval_samples_per_second": 88.215, |
|
"eval_steps_per_second": 11.027, |
|
"step": 2255000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.476191446223752e-05, |
|
"loss": 2.1653, |
|
"step": 2260000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.5191834992482324, |
|
"eval_loss": 2.193359375, |
|
"eval_runtime": 40.3493, |
|
"eval_samples_per_second": 88.428, |
|
"eval_steps_per_second": 11.053, |
|
"step": 2260000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.468709707956076e-05, |
|
"loss": 2.1656, |
|
"step": 2265000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.5193434964428021, |
|
"eval_loss": 2.19140625, |
|
"eval_runtime": 44.129, |
|
"eval_samples_per_second": 80.854, |
|
"eval_steps_per_second": 10.107, |
|
"step": 2265000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.4612264718429014e-05, |
|
"loss": 2.1695, |
|
"step": 2270000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.5194328099452503, |
|
"eval_loss": 2.19140625, |
|
"eval_runtime": 42.07, |
|
"eval_samples_per_second": 84.811, |
|
"eval_steps_per_second": 10.601, |
|
"step": 2270000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.453744733575226e-05, |
|
"loss": 2.17, |
|
"step": 2275000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.5195643144887323, |
|
"eval_loss": 2.19140625, |
|
"eval_runtime": 40.4092, |
|
"eval_samples_per_second": 88.297, |
|
"eval_steps_per_second": 11.037, |
|
"step": 2275000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.44626299530755e-05, |
|
"loss": 2.1628, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.5197062298085732, |
|
"eval_loss": 2.19140625, |
|
"eval_runtime": 40.3663, |
|
"eval_samples_per_second": 88.391, |
|
"eval_steps_per_second": 11.049, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.438779759194375e-05, |
|
"loss": 2.1648, |
|
"step": 2285000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.5196210258231089, |
|
"eval_loss": 2.189453125, |
|
"eval_runtime": 41.4265, |
|
"eval_samples_per_second": 86.128, |
|
"eval_steps_per_second": 10.766, |
|
"step": 2285000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.431298020926699e-05, |
|
"loss": 2.1647, |
|
"step": 2290000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.5199106097865681, |
|
"eval_loss": 2.189453125, |
|
"eval_runtime": 43.7368, |
|
"eval_samples_per_second": 81.579, |
|
"eval_steps_per_second": 10.197, |
|
"step": 2290000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.423813286968026e-05, |
|
"loss": 2.1648, |
|
"step": 2295000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.5198015706025977, |
|
"eval_loss": 2.189453125, |
|
"eval_runtime": 40.3741, |
|
"eval_samples_per_second": 88.374, |
|
"eval_steps_per_second": 11.047, |
|
"step": 2295000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.4163315487003496e-05, |
|
"loss": 2.168, |
|
"step": 2300000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.5196733536727027, |
|
"eval_loss": 2.189453125, |
|
"eval_runtime": 40.4104, |
|
"eval_samples_per_second": 88.294, |
|
"eval_steps_per_second": 11.037, |
|
"step": 2300000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.4088498104326735e-05, |
|
"loss": 2.1607, |
|
"step": 2305000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.5197840366634667, |
|
"eval_loss": 2.189453125, |
|
"eval_runtime": 40.7993, |
|
"eval_samples_per_second": 87.452, |
|
"eval_steps_per_second": 10.932, |
|
"step": 2305000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.401365076474e-05, |
|
"loss": 2.1674, |
|
"step": 2310000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.5199684169921404, |
|
"eval_loss": 2.1875, |
|
"eval_runtime": 40.3824, |
|
"eval_samples_per_second": 88.355, |
|
"eval_steps_per_second": 11.044, |
|
"step": 2310000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.393881840360825e-05, |
|
"loss": 2.1656, |
|
"step": 2315000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.5199689649277383, |
|
"eval_loss": 2.1875, |
|
"eval_runtime": 40.7535, |
|
"eval_samples_per_second": 87.551, |
|
"eval_steps_per_second": 10.944, |
|
"step": 2315000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.38639860424765e-05, |
|
"loss": 2.1637, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.5201547150954066, |
|
"eval_loss": 2.1875, |
|
"eval_runtime": 40.6083, |
|
"eval_samples_per_second": 87.864, |
|
"eval_steps_per_second": 10.983, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.3789168659799746e-05, |
|
"loss": 2.1649, |
|
"step": 2325000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.5201152637323619, |
|
"eval_loss": 2.1875, |
|
"eval_runtime": 41.4342, |
|
"eval_samples_per_second": 86.112, |
|
"eval_steps_per_second": 10.764, |
|
"step": 2325000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.371436625557798e-05, |
|
"loss": 2.1625, |
|
"step": 2330000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.5200544428810016, |
|
"eval_loss": 2.1875, |
|
"eval_runtime": 44.1316, |
|
"eval_samples_per_second": 80.849, |
|
"eval_steps_per_second": 10.106, |
|
"step": 2330000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.363950393753625e-05, |
|
"loss": 2.1627, |
|
"step": 2335000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.5202593707945943, |
|
"eval_loss": 2.1875, |
|
"eval_runtime": 42.6548, |
|
"eval_samples_per_second": 83.648, |
|
"eval_steps_per_second": 10.456, |
|
"step": 2335000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.356468655485949e-05, |
|
"loss": 2.1598, |
|
"step": 2340000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.5203048494492152, |
|
"eval_loss": 2.185546875, |
|
"eval_runtime": 43.9859, |
|
"eval_samples_per_second": 81.117, |
|
"eval_steps_per_second": 10.14, |
|
"step": 2340000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.348982423681776e-05, |
|
"loss": 2.1638, |
|
"step": 2345000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.5201473179648357, |
|
"eval_loss": 2.1875, |
|
"eval_runtime": 44.5632, |
|
"eval_samples_per_second": 80.066, |
|
"eval_steps_per_second": 10.008, |
|
"step": 2345000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.341499187568602e-05, |
|
"loss": 2.1588, |
|
"step": 2350000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.5204785450337309, |
|
"eval_loss": 2.185546875, |
|
"eval_runtime": 41.6749, |
|
"eval_samples_per_second": 85.615, |
|
"eval_steps_per_second": 10.702, |
|
"step": 2350000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.334015951455427e-05, |
|
"loss": 2.1633, |
|
"step": 2355000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.5204648466437849, |
|
"eval_loss": 2.185546875, |
|
"eval_runtime": 40.4241, |
|
"eval_samples_per_second": 88.264, |
|
"eval_steps_per_second": 11.033, |
|
"step": 2355000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.326534213187751e-05, |
|
"loss": 2.1621, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.5205470369834612, |
|
"eval_loss": 2.185546875, |
|
"eval_runtime": 40.3566, |
|
"eval_samples_per_second": 88.412, |
|
"eval_steps_per_second": 11.051, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.319049479229077e-05, |
|
"loss": 2.165, |
|
"step": 2365000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.5207188147933844, |
|
"eval_loss": 2.18359375, |
|
"eval_runtime": 40.8467, |
|
"eval_samples_per_second": 87.351, |
|
"eval_steps_per_second": 10.919, |
|
"step": 2365000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.311566243115902e-05, |
|
"loss": 2.159, |
|
"step": 2370000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.52062347399936, |
|
"eval_loss": 2.18359375, |
|
"eval_runtime": 43.216, |
|
"eval_samples_per_second": 82.562, |
|
"eval_steps_per_second": 10.32, |
|
"step": 2370000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.3040830070027275e-05, |
|
"loss": 2.1573, |
|
"step": 2375000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.5207314173121348, |
|
"eval_loss": 2.18359375, |
|
"eval_runtime": 44.6428, |
|
"eval_samples_per_second": 79.923, |
|
"eval_steps_per_second": 9.99, |
|
"step": 2375000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.2966012687350514e-05, |
|
"loss": 2.1556, |
|
"step": 2380000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.520848401562274, |
|
"eval_loss": 2.18359375, |
|
"eval_runtime": 40.6164, |
|
"eval_samples_per_second": 87.846, |
|
"eval_steps_per_second": 10.981, |
|
"step": 2380000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.289119530467376e-05, |
|
"loss": 2.1562, |
|
"step": 2385000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.5209563448750487, |
|
"eval_loss": 2.18359375, |
|
"eval_runtime": 40.356, |
|
"eval_samples_per_second": 88.413, |
|
"eval_steps_per_second": 11.052, |
|
"step": 2385000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.281636294354201e-05, |
|
"loss": 2.1572, |
|
"step": 2390000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.5209188112865967, |
|
"eval_loss": 2.18359375, |
|
"eval_runtime": 44.3639, |
|
"eval_samples_per_second": 80.426, |
|
"eval_steps_per_second": 10.053, |
|
"step": 2390000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.274154556086525e-05, |
|
"loss": 2.1577, |
|
"step": 2395000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.5208686751793942, |
|
"eval_loss": 2.181640625, |
|
"eval_runtime": 42.2402, |
|
"eval_samples_per_second": 84.469, |
|
"eval_steps_per_second": 10.559, |
|
"step": 2395000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2666713199733505e-05, |
|
"loss": 2.1529, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.5209933305279031, |
|
"eval_loss": 2.181640625, |
|
"eval_runtime": 42.0012, |
|
"eval_samples_per_second": 84.95, |
|
"eval_steps_per_second": 10.619, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.259188083860176e-05, |
|
"loss": 2.1636, |
|
"step": 2405000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.5210516856690732, |
|
"eval_loss": 2.181640625, |
|
"eval_runtime": 40.8766, |
|
"eval_samples_per_second": 87.287, |
|
"eval_steps_per_second": 10.911, |
|
"step": 2405000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2517063455924996e-05, |
|
"loss": 2.1521, |
|
"step": 2410000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.5212941471711181, |
|
"eval_loss": 2.181640625, |
|
"eval_runtime": 44.8987, |
|
"eval_samples_per_second": 79.468, |
|
"eval_steps_per_second": 9.933, |
|
"step": 2410000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.244221611633826e-05, |
|
"loss": 2.1574, |
|
"step": 2415000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.5213697622836202, |
|
"eval_loss": 2.181640625, |
|
"eval_runtime": 40.8755, |
|
"eval_samples_per_second": 87.289, |
|
"eval_steps_per_second": 10.911, |
|
"step": 2415000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.236736877675152e-05, |
|
"loss": 2.1546, |
|
"step": 2420000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.5213067496898685, |
|
"eval_loss": 2.1796875, |
|
"eval_runtime": 43.2862, |
|
"eval_samples_per_second": 82.428, |
|
"eval_steps_per_second": 10.304, |
|
"step": 2420000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.2292536415619776e-05, |
|
"loss": 2.1572, |
|
"step": 2425000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.521194148924512, |
|
"eval_loss": 2.1796875, |
|
"eval_runtime": 42.5113, |
|
"eval_samples_per_second": 83.931, |
|
"eval_steps_per_second": 10.491, |
|
"step": 2425000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.2217719032943015e-05, |
|
"loss": 2.1544, |
|
"step": 2430000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.5212160663484257, |
|
"eval_loss": 2.1796875, |
|
"eval_runtime": 42.4347, |
|
"eval_samples_per_second": 84.082, |
|
"eval_steps_per_second": 10.51, |
|
"step": 2430000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.214290165026626e-05, |
|
"loss": 2.15, |
|
"step": 2435000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.5213122290458468, |
|
"eval_loss": 2.1796875, |
|
"eval_runtime": 41.657, |
|
"eval_samples_per_second": 85.652, |
|
"eval_steps_per_second": 10.706, |
|
"step": 2435000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.206805431067952e-05, |
|
"loss": 2.1537, |
|
"step": 2440000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.5217242766154238, |
|
"eval_loss": 2.177734375, |
|
"eval_runtime": 45.9893, |
|
"eval_samples_per_second": 77.583, |
|
"eval_steps_per_second": 9.698, |
|
"step": 2440000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.199322194954777e-05, |
|
"loss": 2.1552, |
|
"step": 2445000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.5215689368734356, |
|
"eval_loss": 2.177734375, |
|
"eval_runtime": 43.8458, |
|
"eval_samples_per_second": 81.376, |
|
"eval_steps_per_second": 10.172, |
|
"step": 2445000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.1918389588416026e-05, |
|
"loss": 2.1522, |
|
"step": 2450000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.5215245540900105, |
|
"eval_loss": 2.177734375, |
|
"eval_runtime": 45.304, |
|
"eval_samples_per_second": 78.757, |
|
"eval_steps_per_second": 9.845, |
|
"step": 2450000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1843542248829286e-05, |
|
"loss": 2.1487, |
|
"step": 2455000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.5214875684371562, |
|
"eval_loss": 2.177734375, |
|
"eval_runtime": 41.9232, |
|
"eval_samples_per_second": 85.108, |
|
"eval_steps_per_second": 10.639, |
|
"step": 2455000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.176870988769754e-05, |
|
"loss": 2.1582, |
|
"step": 2460000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.52146373323865, |
|
"eval_loss": 2.177734375, |
|
"eval_runtime": 44.2104, |
|
"eval_samples_per_second": 80.705, |
|
"eval_steps_per_second": 10.088, |
|
"step": 2460000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.169390748347577e-05, |
|
"loss": 2.1582, |
|
"step": 2465000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.5218020834703172, |
|
"eval_loss": 2.177734375, |
|
"eval_runtime": 43.1549, |
|
"eval_samples_per_second": 82.679, |
|
"eval_steps_per_second": 10.335, |
|
"step": 2465000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.161907512234402e-05, |
|
"loss": 2.1529, |
|
"step": 2470000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.5217976999855345, |
|
"eval_loss": 2.177734375, |
|
"eval_runtime": 45.1081, |
|
"eval_samples_per_second": 79.099, |
|
"eval_steps_per_second": 9.887, |
|
"step": 2470000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1544242761212276e-05, |
|
"loss": 2.1549, |
|
"step": 2475000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.5219418070477668, |
|
"eval_loss": 2.17578125, |
|
"eval_runtime": 40.8292, |
|
"eval_samples_per_second": 87.388, |
|
"eval_steps_per_second": 10.924, |
|
"step": 2475000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.1469425378535516e-05, |
|
"loss": 2.1525, |
|
"step": 2480000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.521930574368011, |
|
"eval_loss": 2.17578125, |
|
"eval_runtime": 43.1177, |
|
"eval_samples_per_second": 82.75, |
|
"eval_steps_per_second": 10.344, |
|
"step": 2480000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.139459301740377e-05, |
|
"loss": 2.1478, |
|
"step": 2485000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.5221272832476362, |
|
"eval_loss": 2.17578125, |
|
"eval_runtime": 44.4519, |
|
"eval_samples_per_second": 80.266, |
|
"eval_steps_per_second": 10.033, |
|
"step": 2485000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.131977563472701e-05, |
|
"loss": 2.1524, |
|
"step": 2490000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.5219949568007575, |
|
"eval_loss": 2.17578125, |
|
"eval_runtime": 43.8567, |
|
"eval_samples_per_second": 81.356, |
|
"eval_steps_per_second": 10.169, |
|
"step": 2490000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.124494327359526e-05, |
|
"loss": 2.1477, |
|
"step": 2495000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.5220256411942366, |
|
"eval_loss": 2.173828125, |
|
"eval_runtime": 40.3311, |
|
"eval_samples_per_second": 88.468, |
|
"eval_steps_per_second": 11.058, |
|
"step": 2495000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.117011091246352e-05, |
|
"loss": 2.1524, |
|
"step": 2500000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.5221511184461423, |
|
"eval_loss": 2.173828125, |
|
"eval_runtime": 42.2823, |
|
"eval_samples_per_second": 84.385, |
|
"eval_steps_per_second": 10.548, |
|
"step": 2500000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.1095278551331766e-05, |
|
"loss": 2.147, |
|
"step": 2505000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.5221815288718226, |
|
"eval_loss": 2.173828125, |
|
"eval_runtime": 40.7144, |
|
"eval_samples_per_second": 87.635, |
|
"eval_steps_per_second": 10.954, |
|
"step": 2505000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.102046116865501e-05, |
|
"loss": 2.1481, |
|
"step": 2510000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.5222954994761736, |
|
"eval_loss": 2.173828125, |
|
"eval_runtime": 42.7002, |
|
"eval_samples_per_second": 83.559, |
|
"eval_steps_per_second": 10.445, |
|
"step": 2510000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.094559885061328e-05, |
|
"loss": 2.1494, |
|
"step": 2515000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.5222659109538901, |
|
"eval_loss": 2.173828125, |
|
"eval_runtime": 42.0725, |
|
"eval_samples_per_second": 84.806, |
|
"eval_steps_per_second": 10.601, |
|
"step": 2515000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.087078146793652e-05, |
|
"loss": 2.1484, |
|
"step": 2520000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.5222675547606836, |
|
"eval_loss": 2.173828125, |
|
"eval_runtime": 44.1957, |
|
"eval_samples_per_second": 80.732, |
|
"eval_steps_per_second": 10.091, |
|
"step": 2520000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.079594910680478e-05, |
|
"loss": 2.1474, |
|
"step": 2525000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.5223297454510387, |
|
"eval_loss": 2.173828125, |
|
"eval_runtime": 43.3427, |
|
"eval_samples_per_second": 82.321, |
|
"eval_steps_per_second": 10.29, |
|
"step": 2525000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.072110176721804e-05, |
|
"loss": 2.1487, |
|
"step": 2530000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.5222724861810643, |
|
"eval_loss": 2.173828125, |
|
"eval_runtime": 44.0468, |
|
"eval_samples_per_second": 81.005, |
|
"eval_steps_per_second": 10.126, |
|
"step": 2530000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.064626940608628e-05, |
|
"loss": 2.1465, |
|
"step": 2535000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.5224768661590591, |
|
"eval_loss": 2.171875, |
|
"eval_runtime": 40.4028, |
|
"eval_samples_per_second": 88.311, |
|
"eval_steps_per_second": 11.039, |
|
"step": 2535000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.057145202340953e-05, |
|
"loss": 2.1456, |
|
"step": 2540000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.5226201513178947, |
|
"eval_loss": 2.171875, |
|
"eval_runtime": 42.5959, |
|
"eval_samples_per_second": 83.764, |
|
"eval_steps_per_second": 10.47, |
|
"step": 2540000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.049661966227778e-05, |
|
"loss": 2.1482, |
|
"step": 2545000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.5223516628749524, |
|
"eval_loss": 2.171875, |
|
"eval_runtime": 43.342, |
|
"eval_samples_per_second": 82.322, |
|
"eval_steps_per_second": 10.29, |
|
"step": 2545000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.042180227960102e-05, |
|
"loss": 2.1451, |
|
"step": 2550000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.5226286443196613, |
|
"eval_loss": 2.171875, |
|
"eval_runtime": 42.1401, |
|
"eval_samples_per_second": 84.67, |
|
"eval_steps_per_second": 10.584, |
|
"step": 2550000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.0346969918469274e-05, |
|
"loss": 2.143, |
|
"step": 2555000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.5225609742733278, |
|
"eval_loss": 2.171875, |
|
"eval_runtime": 44.8351, |
|
"eval_samples_per_second": 79.581, |
|
"eval_steps_per_second": 9.948, |
|
"step": 2555000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.027210760042755e-05, |
|
"loss": 2.1463, |
|
"step": 2560000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.5225061807135436, |
|
"eval_loss": 2.171875, |
|
"eval_runtime": 42.3643, |
|
"eval_samples_per_second": 84.222, |
|
"eval_steps_per_second": 10.528, |
|
"step": 2560000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.0197290217750786e-05, |
|
"loss": 2.1466, |
|
"step": 2565000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.5227820662870569, |
|
"eval_loss": 2.169921875, |
|
"eval_runtime": 44.9199, |
|
"eval_samples_per_second": 79.43, |
|
"eval_steps_per_second": 9.929, |
|
"step": 2565000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.012247283507403e-05, |
|
"loss": 2.1423, |
|
"step": 2570000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.5229272692204849, |
|
"eval_loss": 2.169921875, |
|
"eval_runtime": 43.6123, |
|
"eval_samples_per_second": 81.812, |
|
"eval_steps_per_second": 10.226, |
|
"step": 2570000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.004765545239728e-05, |
|
"loss": 2.1423, |
|
"step": 2575000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.5230557601181788, |
|
"eval_loss": 2.169921875, |
|
"eval_runtime": 44.819, |
|
"eval_samples_per_second": 79.609, |
|
"eval_steps_per_second": 9.951, |
|
"step": 2575000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.997283806972052e-05, |
|
"loss": 2.1444, |
|
"step": 2580000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.5230245277891018, |
|
"eval_loss": 2.169921875, |
|
"eval_runtime": 42.2873, |
|
"eval_samples_per_second": 84.375, |
|
"eval_steps_per_second": 10.547, |
|
"step": 2580000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.9898020687043756e-05, |
|
"loss": 2.1402, |
|
"step": 2585000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.5230417877604338, |
|
"eval_loss": 2.16796875, |
|
"eval_runtime": 44.6974, |
|
"eval_samples_per_second": 79.826, |
|
"eval_steps_per_second": 9.978, |
|
"step": 2585000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.9823203304367e-05, |
|
"loss": 2.1376, |
|
"step": 2590000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.5230924718032341, |
|
"eval_loss": 2.16796875, |
|
"eval_runtime": 43.4573, |
|
"eval_samples_per_second": 82.104, |
|
"eval_steps_per_second": 10.263, |
|
"step": 2590000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.974838592169024e-05, |
|
"loss": 2.1395, |
|
"step": 2595000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.5231894564040521, |
|
"eval_loss": 2.16796875, |
|
"eval_runtime": 43.8137, |
|
"eval_samples_per_second": 81.436, |
|
"eval_steps_per_second": 10.179, |
|
"step": 2595000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.9673553560558494e-05, |
|
"loss": 2.1399, |
|
"step": 2600000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.5232801397454949, |
|
"eval_loss": 2.16796875, |
|
"eval_runtime": 41.9685, |
|
"eval_samples_per_second": 85.016, |
|
"eval_steps_per_second": 10.627, |
|
"step": 2600000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.9598721199426747e-05, |
|
"loss": 2.1379, |
|
"step": 2605000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.5230908279964406, |
|
"eval_loss": 2.16796875, |
|
"eval_runtime": 42.9652, |
|
"eval_samples_per_second": 83.044, |
|
"eval_steps_per_second": 10.38, |
|
"step": 2605000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.952390381674999e-05, |
|
"loss": 2.1411, |
|
"step": 2610000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.5233708230869376, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 41.4989, |
|
"eval_samples_per_second": 85.978, |
|
"eval_steps_per_second": 10.747, |
|
"step": 2610000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.944908643407323e-05, |
|
"loss": 2.1421, |
|
"step": 2615000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.5232478115452223, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 41.1264, |
|
"eval_samples_per_second": 86.757, |
|
"eval_steps_per_second": 10.845, |
|
"step": 2615000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.9374254072941484e-05, |
|
"loss": 2.1412, |
|
"step": 2620000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.5236650645029786, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 43.7159, |
|
"eval_samples_per_second": 81.618, |
|
"eval_steps_per_second": 10.202, |
|
"step": 2620000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.929942171180974e-05, |
|
"loss": 2.1381, |
|
"step": 2625000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.5235886274870797, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 43.6678, |
|
"eval_samples_per_second": 81.708, |
|
"eval_steps_per_second": 10.213, |
|
"step": 2625000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.922458935067799e-05, |
|
"loss": 2.142, |
|
"step": 2630000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.523625339172135, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 42.3354, |
|
"eval_samples_per_second": 84.279, |
|
"eval_steps_per_second": 10.535, |
|
"step": 2630000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.9149756989546236e-05, |
|
"loss": 2.1394, |
|
"step": 2635000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.5236212296551512, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 42.9131, |
|
"eval_samples_per_second": 83.145, |
|
"eval_steps_per_second": 10.393, |
|
"step": 2635000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.907493960686948e-05, |
|
"loss": 2.1384, |
|
"step": 2640000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.5233793160887042, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 40.2525, |
|
"eval_samples_per_second": 88.64, |
|
"eval_steps_per_second": 11.08, |
|
"step": 2640000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.900012222419273e-05, |
|
"loss": 2.138, |
|
"step": 2645000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.5235653402241714, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 43.9803, |
|
"eval_samples_per_second": 81.127, |
|
"eval_steps_per_second": 10.141, |
|
"step": 2645000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.892530484151597e-05, |
|
"loss": 2.1346, |
|
"step": 2650000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.523867526706381, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 44.5226, |
|
"eval_samples_per_second": 80.139, |
|
"eval_steps_per_second": 10.017, |
|
"step": 2650000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.885047248038421e-05, |
|
"loss": 2.1376, |
|
"step": 2655000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.5239204024915728, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 42.646, |
|
"eval_samples_per_second": 83.666, |
|
"eval_steps_per_second": 10.458, |
|
"step": 2655000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.877564011925247e-05, |
|
"loss": 2.1409, |
|
"step": 2660000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.5239705385987753, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 40.3234, |
|
"eval_samples_per_second": 88.485, |
|
"eval_steps_per_second": 11.061, |
|
"step": 2660000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.8700807758120725e-05, |
|
"loss": 2.1343, |
|
"step": 2665000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.5239877985701072, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 42.9773, |
|
"eval_samples_per_second": 83.021, |
|
"eval_steps_per_second": 10.378, |
|
"step": 2665000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.862597539698897e-05, |
|
"loss": 2.1363, |
|
"step": 2670000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.5240447838722828, |
|
"eval_loss": 2.162109375, |
|
"eval_runtime": 44.9261, |
|
"eval_samples_per_second": 79.419, |
|
"eval_steps_per_second": 9.927, |
|
"step": 2670000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.855112805740224e-05, |
|
"loss": 2.1343, |
|
"step": 2675000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.5241869731599227, |
|
"eval_loss": 2.162109375, |
|
"eval_runtime": 43.4218, |
|
"eval_samples_per_second": 82.171, |
|
"eval_steps_per_second": 10.271, |
|
"step": 2675000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.847631067472548e-05, |
|
"loss": 2.1381, |
|
"step": 2680000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.5243439567087043, |
|
"eval_loss": 2.162109375, |
|
"eval_runtime": 43.0405, |
|
"eval_samples_per_second": 82.899, |
|
"eval_steps_per_second": 10.362, |
|
"step": 2680000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.840147831359373e-05, |
|
"loss": 2.1355, |
|
"step": 2685000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.5241456040222856, |
|
"eval_loss": 2.162109375, |
|
"eval_runtime": 43.7247, |
|
"eval_samples_per_second": 81.601, |
|
"eval_steps_per_second": 10.2, |
|
"step": 2685000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.8326615995552e-05, |
|
"loss": 2.1394, |
|
"step": 2690000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.5242305340399511, |
|
"eval_loss": 2.16015625, |
|
"eval_runtime": 40.8207, |
|
"eval_samples_per_second": 87.407, |
|
"eval_steps_per_second": 10.926, |
|
"step": 2690000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.825179861287524e-05, |
|
"loss": 2.1359, |
|
"step": 2695000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.5244875158353388, |
|
"eval_loss": 2.16015625, |
|
"eval_runtime": 44.3125, |
|
"eval_samples_per_second": 80.519, |
|
"eval_steps_per_second": 10.065, |
|
"step": 2695000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.817698123019848e-05, |
|
"loss": 2.1365, |
|
"step": 2700000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.524362312551232, |
|
"eval_loss": 2.16015625, |
|
"eval_runtime": 42.9757, |
|
"eval_samples_per_second": 83.024, |
|
"eval_steps_per_second": 10.378, |
|
"step": 2700000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.810216384752173e-05, |
|
"loss": 2.131, |
|
"step": 2705000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.5244225854669946, |
|
"eval_loss": 2.16015625, |
|
"eval_runtime": 44.5469, |
|
"eval_samples_per_second": 80.095, |
|
"eval_steps_per_second": 10.012, |
|
"step": 2705000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.802731650793499e-05, |
|
"loss": 2.1337, |
|
"step": 2710000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.5244025858176733, |
|
"eval_loss": 2.16015625, |
|
"eval_runtime": 43.6203, |
|
"eval_samples_per_second": 81.797, |
|
"eval_steps_per_second": 10.225, |
|
"step": 2710000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.7952499125258226e-05, |
|
"loss": 2.1307, |
|
"step": 2715000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.5245954591481136, |
|
"eval_loss": 2.158203125, |
|
"eval_runtime": 43.7473, |
|
"eval_samples_per_second": 81.559, |
|
"eval_steps_per_second": 10.195, |
|
"step": 2715000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.787765178567149e-05, |
|
"loss": 2.1333, |
|
"step": 2720000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.524749429051107, |
|
"eval_loss": 2.158203125, |
|
"eval_runtime": 43.6989, |
|
"eval_samples_per_second": 81.65, |
|
"eval_steps_per_second": 10.206, |
|
"step": 2720000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.780283440299474e-05, |
|
"loss": 2.1354, |
|
"step": 2725000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.5246316228975711, |
|
"eval_loss": 2.158203125, |
|
"eval_runtime": 43.001, |
|
"eval_samples_per_second": 82.975, |
|
"eval_steps_per_second": 10.372, |
|
"step": 2725000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.7728002041862985e-05, |
|
"loss": 2.1372, |
|
"step": 2730000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.5248077841922771, |
|
"eval_loss": 2.158203125, |
|
"eval_runtime": 41.555, |
|
"eval_samples_per_second": 85.862, |
|
"eval_steps_per_second": 10.733, |
|
"step": 2730000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.765316968073124e-05, |
|
"loss": 2.1323, |
|
"step": 2735000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.5248480574587185, |
|
"eval_loss": 2.158203125, |
|
"eval_runtime": 41.1337, |
|
"eval_samples_per_second": 86.742, |
|
"eval_steps_per_second": 10.843, |
|
"step": 2735000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.757835229805448e-05, |
|
"loss": 2.1315, |
|
"step": 2740000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.5249064125998887, |
|
"eval_loss": 2.15625, |
|
"eval_runtime": 43.6977, |
|
"eval_samples_per_second": 81.652, |
|
"eval_steps_per_second": 10.206, |
|
"step": 2740000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.750351993692273e-05, |
|
"loss": 2.1341, |
|
"step": 2745000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.5249143576660573, |
|
"eval_loss": 2.15625, |
|
"eval_runtime": 44.5031, |
|
"eval_samples_per_second": 80.174, |
|
"eval_steps_per_second": 10.022, |
|
"step": 2745000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.7428702554245975e-05, |
|
"loss": 2.132, |
|
"step": 2750000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.5249768223242113, |
|
"eval_loss": 2.15625, |
|
"eval_runtime": 40.671, |
|
"eval_samples_per_second": 87.728, |
|
"eval_steps_per_second": 10.966, |
|
"step": 2750000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.7353885171569214e-05, |
|
"loss": 2.1322, |
|
"step": 2755000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.5251954486277501, |
|
"eval_loss": 2.15625, |
|
"eval_runtime": 43.2998, |
|
"eval_samples_per_second": 82.402, |
|
"eval_steps_per_second": 10.3, |
|
"step": 2755000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.727906778889246e-05, |
|
"loss": 2.1298, |
|
"step": 2760000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.5252044895651145, |
|
"eval_loss": 2.15625, |
|
"eval_runtime": 44.6651, |
|
"eval_samples_per_second": 79.883, |
|
"eval_steps_per_second": 9.985, |
|
"step": 2760000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.720422044930572e-05, |
|
"loss": 2.1285, |
|
"step": 2765000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.5252362698297893, |
|
"eval_loss": 2.154296875, |
|
"eval_runtime": 43.7277, |
|
"eval_samples_per_second": 81.596, |
|
"eval_steps_per_second": 10.199, |
|
"step": 2765000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.7129403066628966e-05, |
|
"loss": 2.1299, |
|
"step": 2770000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.5251601067816893, |
|
"eval_loss": 2.15625, |
|
"eval_runtime": 43.8556, |
|
"eval_samples_per_second": 81.358, |
|
"eval_steps_per_second": 10.17, |
|
"step": 2770000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.7054585683952205e-05, |
|
"loss": 2.1304, |
|
"step": 2775000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.5252631186740835, |
|
"eval_loss": 2.154296875, |
|
"eval_runtime": 44.0024, |
|
"eval_samples_per_second": 81.086, |
|
"eval_steps_per_second": 10.136, |
|
"step": 2775000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6979768301275444e-05, |
|
"loss": 2.1288, |
|
"step": 2780000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.5254154447702835, |
|
"eval_loss": 2.154296875, |
|
"eval_runtime": 43.1187, |
|
"eval_samples_per_second": 82.748, |
|
"eval_steps_per_second": 10.344, |
|
"step": 2780000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6904935940143697e-05, |
|
"loss": 2.1295, |
|
"step": 2785000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.5253255833322374, |
|
"eval_loss": 2.154296875, |
|
"eval_runtime": 44.6338, |
|
"eval_samples_per_second": 79.939, |
|
"eval_steps_per_second": 9.992, |
|
"step": 2785000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.683010357901195e-05, |
|
"loss": 2.129, |
|
"step": 2790000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.5255368125052053, |
|
"eval_loss": 2.154296875, |
|
"eval_runtime": 44.4544, |
|
"eval_samples_per_second": 80.262, |
|
"eval_steps_per_second": 10.033, |
|
"step": 2790000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6755286196335195e-05, |
|
"loss": 2.1285, |
|
"step": 2795000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.5253598293071026, |
|
"eval_loss": 2.154296875, |
|
"eval_runtime": 44.2524, |
|
"eval_samples_per_second": 80.628, |
|
"eval_steps_per_second": 10.079, |
|
"step": 2795000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.668045383520344e-05, |
|
"loss": 2.1292, |
|
"step": 2800000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.5252880497437853, |
|
"eval_loss": 2.154296875, |
|
"eval_runtime": 40.6928, |
|
"eval_samples_per_second": 87.681, |
|
"eval_steps_per_second": 10.96, |
|
"step": 2800000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.660563645252669e-05, |
|
"loss": 2.1278, |
|
"step": 2805000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.5256472215281704, |
|
"eval_loss": 2.15234375, |
|
"eval_runtime": 43.3931, |
|
"eval_samples_per_second": 82.225, |
|
"eval_steps_per_second": 10.278, |
|
"step": 2805000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.653080409139494e-05, |
|
"loss": 2.1239, |
|
"step": 2810000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.5255110595321069, |
|
"eval_loss": 2.15234375, |
|
"eval_runtime": 42.2966, |
|
"eval_samples_per_second": 84.357, |
|
"eval_steps_per_second": 10.545, |
|
"step": 2810000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.645598670871818e-05, |
|
"loss": 2.1241, |
|
"step": 2815000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.5259063950659495, |
|
"eval_loss": 2.15234375, |
|
"eval_runtime": 42.6516, |
|
"eval_samples_per_second": 83.655, |
|
"eval_steps_per_second": 10.457, |
|
"step": 2815000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.638115434758643e-05, |
|
"loss": 2.1232, |
|
"step": 2820000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.5256781798894485, |
|
"eval_loss": 2.15234375, |
|
"eval_runtime": 46.2947, |
|
"eval_samples_per_second": 77.071, |
|
"eval_steps_per_second": 9.634, |
|
"step": 2820000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.6306321986454685e-05, |
|
"loss": 2.1241, |
|
"step": 2825000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.525697357635373, |
|
"eval_loss": 2.150390625, |
|
"eval_runtime": 43.287, |
|
"eval_samples_per_second": 82.427, |
|
"eval_steps_per_second": 10.303, |
|
"step": 2825000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.623148962532294e-05, |
|
"loss": 2.1236, |
|
"step": 2830000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.5259439286544016, |
|
"eval_loss": 2.150390625, |
|
"eval_runtime": 43.0484, |
|
"eval_samples_per_second": 82.884, |
|
"eval_steps_per_second": 10.36, |
|
"step": 2830000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.6156672242646177e-05, |
|
"loss": 2.1272, |
|
"step": 2835000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.5259442026222007, |
|
"eval_loss": 2.150390625, |
|
"eval_runtime": 42.9436, |
|
"eval_samples_per_second": 83.086, |
|
"eval_steps_per_second": 10.386, |
|
"step": 2835000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.608183988151443e-05, |
|
"loss": 2.1271, |
|
"step": 2840000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.5260592690977474, |
|
"eval_loss": 2.150390625, |
|
"eval_runtime": 40.596, |
|
"eval_samples_per_second": 87.89, |
|
"eval_steps_per_second": 10.986, |
|
"step": 2840000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.6007022498837675e-05, |
|
"loss": 2.1249, |
|
"step": 2845000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.52616776034612, |
|
"eval_loss": 2.1484375, |
|
"eval_runtime": 43.4159, |
|
"eval_samples_per_second": 82.182, |
|
"eval_steps_per_second": 10.273, |
|
"step": 2845000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5932175159250935e-05, |
|
"loss": 2.1245, |
|
"step": 2850000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.5260307764466595, |
|
"eval_loss": 2.1484375, |
|
"eval_runtime": 43.8356, |
|
"eval_samples_per_second": 81.395, |
|
"eval_steps_per_second": 10.174, |
|
"step": 2850000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.585735777657418e-05, |
|
"loss": 2.1222, |
|
"step": 2855000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.5261137886897326, |
|
"eval_loss": 2.1484375, |
|
"eval_runtime": 43.0232, |
|
"eval_samples_per_second": 82.932, |
|
"eval_steps_per_second": 10.367, |
|
"step": 2855000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5782525415442434e-05, |
|
"loss": 2.125, |
|
"step": 2860000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.5263050182133793, |
|
"eval_loss": 2.1484375, |
|
"eval_runtime": 43.0096, |
|
"eval_samples_per_second": 82.958, |
|
"eval_steps_per_second": 10.37, |
|
"step": 2860000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.570770803276567e-05, |
|
"loss": 2.1261, |
|
"step": 2865000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.5260803646182642, |
|
"eval_loss": 2.1484375, |
|
"eval_runtime": 40.5259, |
|
"eval_samples_per_second": 88.042, |
|
"eval_steps_per_second": 11.005, |
|
"step": 2865000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.563289065008891e-05, |
|
"loss": 2.1247, |
|
"step": 2870000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.5262392659416383, |
|
"eval_loss": 2.1484375, |
|
"eval_runtime": 46.7614, |
|
"eval_samples_per_second": 76.302, |
|
"eval_steps_per_second": 9.538, |
|
"step": 2870000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5558028332047185e-05, |
|
"loss": 2.1225, |
|
"step": 2875000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.5263167988287328, |
|
"eval_loss": 2.1484375, |
|
"eval_runtime": 43.7319, |
|
"eval_samples_per_second": 81.588, |
|
"eval_steps_per_second": 10.199, |
|
"step": 2875000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5483210949370424e-05, |
|
"loss": 2.122, |
|
"step": 2880000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.5261085833015531, |
|
"eval_loss": 2.1484375, |
|
"eval_runtime": 46.8934, |
|
"eval_samples_per_second": 76.087, |
|
"eval_steps_per_second": 9.511, |
|
"step": 2880000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.540839356669367e-05, |
|
"loss": 2.1237, |
|
"step": 2885000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.5261107750439444, |
|
"eval_loss": 2.146484375, |
|
"eval_runtime": 42.8485, |
|
"eval_samples_per_second": 83.27, |
|
"eval_steps_per_second": 10.409, |
|
"step": 2885000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.533356120556192e-05, |
|
"loss": 2.1219, |
|
"step": 2890000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.5261825546072617, |
|
"eval_loss": 2.146484375, |
|
"eval_runtime": 44.3826, |
|
"eval_samples_per_second": 80.392, |
|
"eval_steps_per_second": 10.049, |
|
"step": 2890000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.525874382288516e-05, |
|
"loss": 2.1248, |
|
"step": 2895000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.526191595544626, |
|
"eval_loss": 2.146484375, |
|
"eval_runtime": 40.9038, |
|
"eval_samples_per_second": 87.229, |
|
"eval_steps_per_second": 10.904, |
|
"step": 2895000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.51839264402084e-05, |
|
"loss": 2.1191, |
|
"step": 2900000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.526354332417185, |
|
"eval_loss": 2.146484375, |
|
"eval_runtime": 44.4026, |
|
"eval_samples_per_second": 80.356, |
|
"eval_steps_per_second": 10.044, |
|
"step": 2900000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.510909407907666e-05, |
|
"loss": 2.1181, |
|
"step": 2905000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.5264343310144699, |
|
"eval_loss": 2.146484375, |
|
"eval_runtime": 43.5492, |
|
"eval_samples_per_second": 81.93, |
|
"eval_steps_per_second": 10.241, |
|
"step": 2905000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.50342766963999e-05, |
|
"loss": 2.1176, |
|
"step": 2910000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.5263020045675911, |
|
"eval_loss": 2.146484375, |
|
"eval_runtime": 46.4423, |
|
"eval_samples_per_second": 76.827, |
|
"eval_steps_per_second": 9.603, |
|
"step": 2910000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.495944433526815e-05, |
|
"loss": 2.1191, |
|
"step": 2915000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.5266524093824109, |
|
"eval_loss": 2.146484375, |
|
"eval_runtime": 42.6675, |
|
"eval_samples_per_second": 83.623, |
|
"eval_steps_per_second": 10.453, |
|
"step": 2915000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.488462695259139e-05, |
|
"loss": 2.1206, |
|
"step": 2920000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.5267954205734475, |
|
"eval_loss": 2.14453125, |
|
"eval_runtime": 42.9737, |
|
"eval_samples_per_second": 83.028, |
|
"eval_steps_per_second": 10.378, |
|
"step": 2920000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.4809794591459645e-05, |
|
"loss": 2.1148, |
|
"step": 2925000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.5267219972033367, |
|
"eval_loss": 2.14453125, |
|
"eval_runtime": 44.1462, |
|
"eval_samples_per_second": 80.822, |
|
"eval_steps_per_second": 10.103, |
|
"step": 2925000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.473497720878289e-05, |
|
"loss": 2.1188, |
|
"step": 2930000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.5270244576533453, |
|
"eval_loss": 2.14453125, |
|
"eval_runtime": 44.0298, |
|
"eval_samples_per_second": 81.036, |
|
"eval_steps_per_second": 10.129, |
|
"step": 2930000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.466015982610613e-05, |
|
"loss": 2.1118, |
|
"step": 2935000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.5270036361006273, |
|
"eval_loss": 2.14453125, |
|
"eval_runtime": 41.5147, |
|
"eval_samples_per_second": 85.945, |
|
"eval_steps_per_second": 10.743, |
|
"step": 2935000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.992518261732325e-05, |
|
"loss": 2.1283, |
|
"step": 2940000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.5243740931665856, |
|
"eval_loss": 2.158203125, |
|
"eval_runtime": 39.6741, |
|
"eval_samples_per_second": 89.933, |
|
"eval_steps_per_second": 11.242, |
|
"step": 2940000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.9850350256191494e-05, |
|
"loss": 2.1336, |
|
"step": 2945000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.5240346470637227, |
|
"eval_loss": 2.162109375, |
|
"eval_runtime": 39.6588, |
|
"eval_samples_per_second": 89.967, |
|
"eval_steps_per_second": 11.246, |
|
"step": 2945000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.977551789505975e-05, |
|
"loss": 2.1311, |
|
"step": 2950000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.5237494465850462, |
|
"eval_loss": 2.162109375, |
|
"eval_runtime": 39.7632, |
|
"eval_samples_per_second": 89.731, |
|
"eval_steps_per_second": 11.216, |
|
"step": 2950000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.970070051238299e-05, |
|
"loss": 2.1377, |
|
"step": 2955000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.523618763944961, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 39.7355, |
|
"eval_samples_per_second": 89.794, |
|
"eval_steps_per_second": 11.224, |
|
"step": 2955000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.962588312970624e-05, |
|
"loss": 2.136, |
|
"step": 2960000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.523584244002297, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 39.6172, |
|
"eval_samples_per_second": 90.062, |
|
"eval_steps_per_second": 11.258, |
|
"step": 2960000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.955103579011949e-05, |
|
"loss": 2.1394, |
|
"step": 2965000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.5233458920172359, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 39.7138, |
|
"eval_samples_per_second": 89.843, |
|
"eval_steps_per_second": 11.23, |
|
"step": 2965000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.947621840744274e-05, |
|
"loss": 2.1405, |
|
"step": 2970000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.5233305498204963, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.7403, |
|
"eval_samples_per_second": 89.783, |
|
"eval_steps_per_second": 11.223, |
|
"step": 2970000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.9401401024765983e-05, |
|
"loss": 2.1391, |
|
"step": 2975000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.5235795865497153, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.791, |
|
"eval_samples_per_second": 89.668, |
|
"eval_steps_per_second": 11.209, |
|
"step": 2975000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.9326553685179237e-05, |
|
"loss": 2.1353, |
|
"step": 2980000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.5233823297344923, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.6943, |
|
"eval_samples_per_second": 89.887, |
|
"eval_steps_per_second": 11.236, |
|
"step": 2980000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.925173630250248e-05, |
|
"loss": 2.1392, |
|
"step": 2985000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.5233889049616665, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.7244, |
|
"eval_samples_per_second": 89.819, |
|
"eval_steps_per_second": 11.227, |
|
"step": 2985000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.917691891982573e-05, |
|
"loss": 2.1384, |
|
"step": 2990000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.5235058892118056, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.7383, |
|
"eval_samples_per_second": 89.787, |
|
"eval_steps_per_second": 11.223, |
|
"step": 2990000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.910210153714897e-05, |
|
"loss": 2.1373, |
|
"step": 2995000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.523321234915333, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.8394, |
|
"eval_samples_per_second": 89.56, |
|
"eval_steps_per_second": 11.195, |
|
"step": 2995000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.9027284154472206e-05, |
|
"loss": 2.1346, |
|
"step": 3000000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.523394110349846, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.7248, |
|
"eval_samples_per_second": 89.818, |
|
"eval_steps_per_second": 11.227, |
|
"step": 3000000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.895246677179545e-05, |
|
"loss": 2.1368, |
|
"step": 3005000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.523494108596452, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.8116, |
|
"eval_samples_per_second": 89.622, |
|
"eval_steps_per_second": 11.203, |
|
"step": 3005000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.8877634410663705e-05, |
|
"loss": 2.1383, |
|
"step": 3010000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.5232738384861197, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.8277, |
|
"eval_samples_per_second": 89.586, |
|
"eval_steps_per_second": 11.198, |
|
"step": 3010000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.880280204953195e-05, |
|
"loss": 2.1447, |
|
"step": 3015000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.5233116460423708, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.9037, |
|
"eval_samples_per_second": 89.415, |
|
"eval_steps_per_second": 11.177, |
|
"step": 3015000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.87279846668552e-05, |
|
"loss": 2.1392, |
|
"step": 3020000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.5234119182567758, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.8551, |
|
"eval_samples_per_second": 89.524, |
|
"eval_steps_per_second": 11.191, |
|
"step": 3020000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.865315230572345e-05, |
|
"loss": 2.1359, |
|
"step": 3025000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.5233072625575881, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.8861, |
|
"eval_samples_per_second": 89.455, |
|
"eval_steps_per_second": 11.182, |
|
"step": 3025000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.8578334923046695e-05, |
|
"loss": 2.1408, |
|
"step": 3030000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.5233184952373438, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.8536, |
|
"eval_samples_per_second": 89.528, |
|
"eval_steps_per_second": 11.191, |
|
"step": 3030000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.850350256191494e-05, |
|
"loss": 2.1437, |
|
"step": 3035000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.5232642496131574, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.8602, |
|
"eval_samples_per_second": 89.513, |
|
"eval_steps_per_second": 11.189, |
|
"step": 3035000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.8428715136148166e-05, |
|
"loss": 2.1354, |
|
"step": 3040000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.5233253444323168, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.8447, |
|
"eval_samples_per_second": 89.548, |
|
"eval_steps_per_second": 11.193, |
|
"step": 3040000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.835383783965145e-05, |
|
"loss": 2.1371, |
|
"step": 3045000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.5234626022995761, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 40.0007, |
|
"eval_samples_per_second": 89.199, |
|
"eval_steps_per_second": 11.15, |
|
"step": 3045000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.82790054785197e-05, |
|
"loss": 2.1399, |
|
"step": 3050000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.5233957541566394, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.8876, |
|
"eval_samples_per_second": 89.451, |
|
"eval_steps_per_second": 11.181, |
|
"step": 3050000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.820420307429793e-05, |
|
"loss": 2.1387, |
|
"step": 3055000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.5234242468077273, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.9685, |
|
"eval_samples_per_second": 89.27, |
|
"eval_steps_per_second": 11.159, |
|
"step": 3055000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.8129340756256206e-05, |
|
"loss": 2.1406, |
|
"step": 3060000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.5232119217635636, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.9746, |
|
"eval_samples_per_second": 89.257, |
|
"eval_steps_per_second": 11.157, |
|
"step": 3060000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.805450839512445e-05, |
|
"loss": 2.1387, |
|
"step": 3065000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.5234650680097664, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 40.0463, |
|
"eval_samples_per_second": 89.097, |
|
"eval_steps_per_second": 11.137, |
|
"step": 3065000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.797966105553772e-05, |
|
"loss": 2.1413, |
|
"step": 3070000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.5234842457556909, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 39.9277, |
|
"eval_samples_per_second": 89.362, |
|
"eval_steps_per_second": 11.17, |
|
"step": 3070000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.790484367286096e-05, |
|
"loss": 2.1371, |
|
"step": 3075000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.523473561011533, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 39.9812, |
|
"eval_samples_per_second": 89.242, |
|
"eval_steps_per_second": 11.155, |
|
"step": 3075000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.782999633327422e-05, |
|
"loss": 2.138, |
|
"step": 3080000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.5234982181134358, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 39.9703, |
|
"eval_samples_per_second": 89.266, |
|
"eval_steps_per_second": 11.158, |
|
"step": 3080000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.7755148993687484e-05, |
|
"loss": 2.1385, |
|
"step": 3085000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.5236360239162929, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 40.1115, |
|
"eval_samples_per_second": 88.952, |
|
"eval_steps_per_second": 11.119, |
|
"step": 3085000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.768031663255573e-05, |
|
"loss": 2.135, |
|
"step": 3090000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.5233746586361225, |
|
"eval_loss": 2.166015625, |
|
"eval_runtime": 40.1304, |
|
"eval_samples_per_second": 88.91, |
|
"eval_steps_per_second": 11.114, |
|
"step": 3090000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.760548427142398e-05, |
|
"loss": 2.1401, |
|
"step": 3095000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.5235689018055574, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 40.1365, |
|
"eval_samples_per_second": 88.897, |
|
"eval_steps_per_second": 11.112, |
|
"step": 3095000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.753066688874723e-05, |
|
"loss": 2.1374, |
|
"step": 3100000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.5235552034156113, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 40.133, |
|
"eval_samples_per_second": 88.904, |
|
"eval_steps_per_second": 11.113, |
|
"step": 3100000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.745586448452546e-05, |
|
"loss": 2.1358, |
|
"step": 3105000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.5237198580627628, |
|
"eval_loss": 2.1640625, |
|
"eval_runtime": 40.0267, |
|
"eval_samples_per_second": 89.141, |
|
"eval_steps_per_second": 11.143, |
|
"step": 3105000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.7381032123393713e-05, |
|
"loss": 2.1344, |
|
"step": 3110000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.5239288954933393, |
|
"eval_loss": 2.162109375, |
|
"eval_runtime": 40.1156, |
|
"eval_samples_per_second": 88.943, |
|
"eval_steps_per_second": 11.118, |
|
"step": 3110000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.730621474071695e-05, |
|
"loss": 2.1368, |
|
"step": 3115000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.5238793073217346, |
|
"eval_loss": 2.162109375, |
|
"eval_runtime": 40.1212, |
|
"eval_samples_per_second": 88.93, |
|
"eval_steps_per_second": 11.116, |
|
"step": 3115000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.723136740113022e-05, |
|
"loss": 2.1345, |
|
"step": 3120000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.5236836943133052, |
|
"eval_loss": 2.162109375, |
|
"eval_runtime": 40.203, |
|
"eval_samples_per_second": 88.75, |
|
"eval_steps_per_second": 11.094, |
|
"step": 3120000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.7156535039998465e-05, |
|
"loss": 2.1358, |
|
"step": 3125000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.5238979371320612, |
|
"eval_loss": 2.162109375, |
|
"eval_runtime": 40.2344, |
|
"eval_samples_per_second": 88.68, |
|
"eval_steps_per_second": 11.085, |
|
"step": 3125000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.708171765732171e-05, |
|
"loss": 2.1395, |
|
"step": 3130000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.5239330050103231, |
|
"eval_loss": 2.162109375, |
|
"eval_runtime": 40.4542, |
|
"eval_samples_per_second": 88.199, |
|
"eval_steps_per_second": 11.025, |
|
"step": 3130000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.700690027464496e-05, |
|
"loss": 2.1359, |
|
"step": 3135000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.5242872453743277, |
|
"eval_loss": 2.162109375, |
|
"eval_runtime": 40.3355, |
|
"eval_samples_per_second": 88.458, |
|
"eval_steps_per_second": 11.057, |
|
"step": 3135000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.693205293505821e-05, |
|
"loss": 2.1373, |
|
"step": 3140000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.5241631379614166, |
|
"eval_loss": 2.16015625, |
|
"eval_runtime": 40.2137, |
|
"eval_samples_per_second": 88.726, |
|
"eval_steps_per_second": 11.091, |
|
"step": 3140000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.6857235552381456e-05, |
|
"loss": 2.1357, |
|
"step": 3145000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.5243412170307151, |
|
"eval_loss": 2.16015625, |
|
"eval_runtime": 40.3184, |
|
"eval_samples_per_second": 88.496, |
|
"eval_steps_per_second": 11.062, |
|
"step": 3145000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.67824181697047e-05, |
|
"loss": 2.1354, |
|
"step": 3150000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.5243636823902266, |
|
"eval_loss": 2.16015625, |
|
"eval_runtime": 40.1892, |
|
"eval_samples_per_second": 88.78, |
|
"eval_steps_per_second": 11.098, |
|
"step": 3150000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.670757083011796e-05, |
|
"loss": 2.1323, |
|
"step": 3155000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.5243839560073468, |
|
"eval_loss": 2.16015625, |
|
"eval_runtime": 40.3635, |
|
"eval_samples_per_second": 88.397, |
|
"eval_steps_per_second": 11.05, |
|
"step": 3155000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.66327534474412e-05, |
|
"loss": 2.133, |
|
"step": 3160000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.5242327257823425, |
|
"eval_loss": 2.16015625, |
|
"eval_runtime": 41.1497, |
|
"eval_samples_per_second": 86.708, |
|
"eval_steps_per_second": 10.838, |
|
"step": 3160000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.655790610785447e-05, |
|
"loss": 2.1315, |
|
"step": 3165000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.524407243270255, |
|
"eval_loss": 2.16015625, |
|
"eval_runtime": 41.4927, |
|
"eval_samples_per_second": 85.991, |
|
"eval_steps_per_second": 10.749, |
|
"step": 3165000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.6483088725177706e-05, |
|
"loss": 2.1363, |
|
"step": 3170000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.5242845056963384, |
|
"eval_loss": 2.16015625, |
|
"eval_runtime": 43.5552, |
|
"eval_samples_per_second": 81.919, |
|
"eval_steps_per_second": 10.24, |
|
"step": 3170000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.640825636404596e-05, |
|
"loss": 2.1349, |
|
"step": 3175000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.5245045018388719, |
|
"eval_loss": 2.16015625, |
|
"eval_runtime": 41.5498, |
|
"eval_samples_per_second": 85.873, |
|
"eval_steps_per_second": 10.734, |
|
"step": 3175000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.633342400291421e-05, |
|
"loss": 2.1336, |
|
"step": 3180000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.524365600164819, |
|
"eval_loss": 2.16015625, |
|
"eval_runtime": 45.093, |
|
"eval_samples_per_second": 79.125, |
|
"eval_steps_per_second": 9.891, |
|
"step": 3180000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.625860662023746e-05, |
|
"loss": 2.1364, |
|
"step": 3185000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.5243612166800363, |
|
"eval_loss": 2.158203125, |
|
"eval_runtime": 42.4027, |
|
"eval_samples_per_second": 84.146, |
|
"eval_steps_per_second": 10.518, |
|
"step": 3185000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.618375928065071e-05, |
|
"loss": 2.133, |
|
"step": 3190000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.5243256008661766, |
|
"eval_loss": 2.158203125, |
|
"eval_runtime": 40.6817, |
|
"eval_samples_per_second": 87.705, |
|
"eval_steps_per_second": 10.963, |
|
"step": 3190000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.6108941897973956e-05, |
|
"loss": 2.1349, |
|
"step": 3195000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.5245045018388719, |
|
"eval_loss": 2.158203125, |
|
"eval_runtime": 42.2213, |
|
"eval_samples_per_second": 84.507, |
|
"eval_steps_per_second": 10.563, |
|
"step": 3195000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.60341245152972e-05, |
|
"loss": 2.134, |
|
"step": 3200000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.5245702541106129, |
|
"eval_loss": 2.158203125, |
|
"eval_runtime": 41.8771, |
|
"eval_samples_per_second": 85.202, |
|
"eval_steps_per_second": 10.65, |
|
"step": 3200000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.5959277175710455e-05, |
|
"loss": 2.1308, |
|
"step": 3205000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.5249423023815473, |
|
"eval_loss": 2.15625, |
|
"eval_runtime": 42.7291, |
|
"eval_samples_per_second": 83.503, |
|
"eval_steps_per_second": 10.438, |
|
"step": 3205000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.5884444814578715e-05, |
|
"loss": 2.1302, |
|
"step": 3210000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.5246628552266481, |
|
"eval_loss": 2.15625, |
|
"eval_runtime": 41.7473, |
|
"eval_samples_per_second": 85.467, |
|
"eval_steps_per_second": 10.683, |
|
"step": 3210000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.580964241035695e-05, |
|
"loss": 2.1302, |
|
"step": 3215000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.5246708002928168, |
|
"eval_loss": 2.15625, |
|
"eval_runtime": 43.5638, |
|
"eval_samples_per_second": 81.903, |
|
"eval_steps_per_second": 10.238, |
|
"step": 3215000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.5734825027680186e-05, |
|
"loss": 2.1331, |
|
"step": 3220000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.5247650452156455, |
|
"eval_loss": 2.15625, |
|
"eval_runtime": 42.8533, |
|
"eval_samples_per_second": 83.261, |
|
"eval_steps_per_second": 10.408, |
|
"step": 3220000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.565999266654844e-05, |
|
"loss": 2.1273, |
|
"step": 3225000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.5247250459170031, |
|
"eval_loss": 2.15625, |
|
"eval_runtime": 42.4534, |
|
"eval_samples_per_second": 84.045, |
|
"eval_steps_per_second": 10.506, |
|
"step": 3225000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.5585175283871685e-05, |
|
"loss": 2.1286, |
|
"step": 3230000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.5249765483564124, |
|
"eval_loss": 2.15625, |
|
"eval_runtime": 43.0626, |
|
"eval_samples_per_second": 82.856, |
|
"eval_steps_per_second": 10.357, |
|
"step": 3230000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.551034292273994e-05, |
|
"loss": 2.1282, |
|
"step": 3235000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.525047232048534, |
|
"eval_loss": 2.154296875, |
|
"eval_runtime": 42.4424, |
|
"eval_samples_per_second": 84.067, |
|
"eval_steps_per_second": 10.508, |
|
"step": 3235000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.543552554006317e-05, |
|
"loss": 2.1309, |
|
"step": 3240000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.5250951764133451, |
|
"eval_loss": 2.154296875, |
|
"eval_runtime": 44.2507, |
|
"eval_samples_per_second": 80.631, |
|
"eval_steps_per_second": 10.079, |
|
"step": 3240000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.5360708157386415e-05, |
|
"loss": 2.1295, |
|
"step": 3245000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.5253592813715047, |
|
"eval_loss": 2.154296875, |
|
"eval_runtime": 44.2616, |
|
"eval_samples_per_second": 80.612, |
|
"eval_steps_per_second": 10.076, |
|
"step": 3245000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.528589077470966e-05, |
|
"loss": 2.1275, |
|
"step": 3250000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.5253707880190593, |
|
"eval_loss": 2.154296875, |
|
"eval_runtime": 43.0103, |
|
"eval_samples_per_second": 82.957, |
|
"eval_steps_per_second": 10.37, |
|
"step": 3250000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.5211058413577914e-05, |
|
"loss": 2.133, |
|
"step": 3255000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.5253924314751741, |
|
"eval_loss": 2.154296875, |
|
"eval_runtime": 41.4169, |
|
"eval_samples_per_second": 86.148, |
|
"eval_steps_per_second": 10.769, |
|
"step": 3255000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.513622605244616e-05, |
|
"loss": 2.1301, |
|
"step": 3260000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.5251132582880739, |
|
"eval_loss": 2.154296875, |
|
"eval_runtime": 42.1862, |
|
"eval_samples_per_second": 84.577, |
|
"eval_steps_per_second": 10.572, |
|
"step": 3260000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.5061408669769406e-05, |
|
"loss": 2.1314, |
|
"step": 3265000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.5253096931999001, |
|
"eval_loss": 2.15234375, |
|
"eval_runtime": 43.9727, |
|
"eval_samples_per_second": 81.141, |
|
"eval_steps_per_second": 10.143, |
|
"step": 3265000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.498656133018267e-05, |
|
"loss": 2.1258, |
|
"step": 3270000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.5254751697504482, |
|
"eval_loss": 2.15234375, |
|
"eval_runtime": 42.0779, |
|
"eval_samples_per_second": 84.795, |
|
"eval_steps_per_second": 10.599, |
|
"step": 3270000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.491172896905092e-05, |
|
"loss": 2.1286, |
|
"step": 3275000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.525354897886722, |
|
"eval_loss": 2.15234375, |
|
"eval_runtime": 41.8893, |
|
"eval_samples_per_second": 85.177, |
|
"eval_steps_per_second": 10.647, |
|
"step": 3275000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.4836911586374165e-05, |
|
"loss": 2.1267, |
|
"step": 3280000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.5253622950172928, |
|
"eval_loss": 2.15234375, |
|
"eval_runtime": 42.0241, |
|
"eval_samples_per_second": 84.904, |
|
"eval_steps_per_second": 10.613, |
|
"step": 3280000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.476207922524242e-05, |
|
"loss": 2.13, |
|
"step": 3285000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.5254332526772133, |
|
"eval_loss": 2.15234375, |
|
"eval_runtime": 43.4043, |
|
"eval_samples_per_second": 82.204, |
|
"eval_steps_per_second": 10.275, |
|
"step": 3285000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.4687246864110663e-05, |
|
"loss": 2.1284, |
|
"step": 3290000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.5254976351099597, |
|
"eval_loss": 2.15234375, |
|
"eval_runtime": 43.4747, |
|
"eval_samples_per_second": 82.071, |
|
"eval_steps_per_second": 10.259, |
|
"step": 3290000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.461242948143391e-05, |
|
"loss": 2.1295, |
|
"step": 3295000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.5254190063516695, |
|
"eval_loss": 2.15234375, |
|
"eval_runtime": 44.0831, |
|
"eval_samples_per_second": 80.938, |
|
"eval_steps_per_second": 10.117, |
|
"step": 3295000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.453759712030216e-05, |
|
"loss": 2.1241, |
|
"step": 3300000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.5255573600901244, |
|
"eval_loss": 2.15234375, |
|
"eval_runtime": 43.4229, |
|
"eval_samples_per_second": 82.169, |
|
"eval_steps_per_second": 10.271, |
|
"step": 3300000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.446276475917041e-05, |
|
"loss": 2.1297, |
|
"step": 3305000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.5257825616208374, |
|
"eval_loss": 2.15234375, |
|
"eval_runtime": 41.6675, |
|
"eval_samples_per_second": 85.63, |
|
"eval_steps_per_second": 10.704, |
|
"step": 3305000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.4387947376493654e-05, |
|
"loss": 2.126, |
|
"step": 3310000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.5256425640755888, |
|
"eval_loss": 2.150390625, |
|
"eval_runtime": 43.5829, |
|
"eval_samples_per_second": 81.867, |
|
"eval_steps_per_second": 10.233, |
|
"step": 3310000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.43131299938169e-05, |
|
"loss": 2.1263, |
|
"step": 3315000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.5255672229308856, |
|
"eval_loss": 2.150390625, |
|
"eval_runtime": 40.6138, |
|
"eval_samples_per_second": 87.852, |
|
"eval_steps_per_second": 10.981, |
|
"step": 3315000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.423828265423016e-05, |
|
"loss": 2.1273, |
|
"step": 3320000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.525589962258196, |
|
"eval_loss": 2.150390625, |
|
"eval_runtime": 41.1643, |
|
"eval_samples_per_second": 86.677, |
|
"eval_steps_per_second": 10.835, |
|
"step": 3320000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.4163450293098406e-05, |
|
"loss": 2.1214, |
|
"step": 3325000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5255433877323795, |
|
"eval_loss": 2.150390625, |
|
"eval_runtime": 40.8274, |
|
"eval_samples_per_second": 87.392, |
|
"eval_steps_per_second": 10.924, |
|
"step": 3325000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.408863291042165e-05, |
|
"loss": 2.1275, |
|
"step": 3330000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5255872225802068, |
|
"eval_loss": 2.150390625, |
|
"eval_runtime": 42.63, |
|
"eval_samples_per_second": 83.697, |
|
"eval_steps_per_second": 10.462, |
|
"step": 3330000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.40138155277449e-05, |
|
"loss": 2.1227, |
|
"step": 3335000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5258348894704312, |
|
"eval_loss": 2.150390625, |
|
"eval_runtime": 42.4569, |
|
"eval_samples_per_second": 84.038, |
|
"eval_steps_per_second": 10.505, |
|
"step": 3335000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3338128, |
|
"total_flos": 1.0872066371139498e+21, |
|
"train_loss": 0.2576859601399347, |
|
"train_runtime": 150388.7494, |
|
"train_samples_per_second": 177.573, |
|
"train_steps_per_second": 22.197 |
|
} |
|
], |
|
"max_steps": 3338128, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.0872066371139498e+21, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|