|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.025573961057832027, |
|
"eval_steps": 1, |
|
"global_step": 352, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 7.265329845975008e-05, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8438, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 7.265329845975008e-05, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 267.5371, |
|
"eval_samples_per_second": 126.214, |
|
"eval_steps_per_second": 2.631, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00014530659691950015, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8359, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00014530659691950015, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 267.9301, |
|
"eval_samples_per_second": 126.029, |
|
"eval_steps_per_second": 2.628, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00021795989537925023, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8438, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00021795989537925023, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 267.74, |
|
"eval_samples_per_second": 126.119, |
|
"eval_steps_per_second": 2.629, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0002906131938390003, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8359, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0002906131938390003, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 267.3271, |
|
"eval_samples_per_second": 126.313, |
|
"eval_steps_per_second": 2.633, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0003632664922987504, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8438, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0003632664922987504, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 267.207, |
|
"eval_samples_per_second": 126.37, |
|
"eval_steps_per_second": 2.635, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00043591979075850045, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8438, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00043591979075850045, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 268.9045, |
|
"eval_samples_per_second": 125.572, |
|
"eval_steps_per_second": 2.618, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0005085730892182505, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8359, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0005085730892182505, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 269.4441, |
|
"eval_samples_per_second": 125.321, |
|
"eval_steps_per_second": 2.613, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0005812263876780006, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8359, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0005812263876780006, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 265.7225, |
|
"eval_samples_per_second": 127.076, |
|
"eval_steps_per_second": 2.649, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0006538796861377507, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8438, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0006538796861377507, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 265.6793, |
|
"eval_samples_per_second": 127.097, |
|
"eval_steps_per_second": 2.65, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0007265329845975008, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8359, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0007265329845975008, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 266.2481, |
|
"eval_samples_per_second": 126.825, |
|
"eval_steps_per_second": 2.644, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0007991862830572508, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8359, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0007991862830572508, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 265.3262, |
|
"eval_samples_per_second": 127.266, |
|
"eval_steps_per_second": 2.653, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0008718395815170009, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8438, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0008718395815170009, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 265.9213, |
|
"eval_samples_per_second": 126.981, |
|
"eval_steps_per_second": 2.647, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.000944492879976751, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8438, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.000944492879976751, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 266.6935, |
|
"eval_samples_per_second": 126.614, |
|
"eval_steps_per_second": 2.64, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.001017146178436501, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8359, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.001017146178436501, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 265.5702, |
|
"eval_samples_per_second": 127.149, |
|
"eval_steps_per_second": 2.651, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.001089799476896251, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8359, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.001089799476896251, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 266.1801, |
|
"eval_samples_per_second": 126.858, |
|
"eval_steps_per_second": 2.645, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0011624527753560012, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8438, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0011624527753560012, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 265.2069, |
|
"eval_samples_per_second": 127.323, |
|
"eval_steps_per_second": 2.655, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0012351060738157512, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8359, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0012351060738157512, |
|
"eval_accuracy": 0.010348185357762373, |
|
"eval_loss": 10.84375, |
|
"eval_runtime": 265.4707, |
|
"eval_samples_per_second": 127.197, |
|
"eval_steps_per_second": 2.652, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0013077593722755014, |
|
"grad_norm": 2.8743269443511963, |
|
"learning_rate": 9.999992734670155e-06, |
|
"loss": 10.8438, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0013077593722755014, |
|
"eval_accuracy": 0.011272349663430095, |
|
"eval_loss": 10.828125, |
|
"eval_runtime": 265.1122, |
|
"eval_samples_per_second": 127.369, |
|
"eval_steps_per_second": 2.655, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0013804126707352513, |
|
"grad_norm": 2.8402953147888184, |
|
"learning_rate": 9.999985469340309e-06, |
|
"loss": 10.8203, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0013804126707352513, |
|
"eval_accuracy": 0.011634905538764718, |
|
"eval_loss": 10.8125, |
|
"eval_runtime": 265.8546, |
|
"eval_samples_per_second": 127.013, |
|
"eval_steps_per_second": 2.648, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0014530659691950015, |
|
"grad_norm": 2.8661510944366455, |
|
"learning_rate": 9.999978204010463e-06, |
|
"loss": 10.8203, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0014530659691950015, |
|
"eval_accuracy": 0.01173857150727105, |
|
"eval_loss": 10.8046875, |
|
"eval_runtime": 265.3217, |
|
"eval_samples_per_second": 127.268, |
|
"eval_steps_per_second": 2.653, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0015257192676547515, |
|
"grad_norm": 2.8541078567504883, |
|
"learning_rate": 9.999970938680617e-06, |
|
"loss": 10.8047, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0015257192676547515, |
|
"eval_accuracy": 0.011759009422313067, |
|
"eval_loss": 10.7890625, |
|
"eval_runtime": 265.864, |
|
"eval_samples_per_second": 127.009, |
|
"eval_steps_per_second": 2.648, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0015983725661145017, |
|
"grad_norm": 2.8900887966156006, |
|
"learning_rate": 9.99996367335077e-06, |
|
"loss": 10.7969, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0015983725661145017, |
|
"eval_accuracy": 0.011769199430945915, |
|
"eval_loss": 10.7734375, |
|
"eval_runtime": 266.281, |
|
"eval_samples_per_second": 126.81, |
|
"eval_steps_per_second": 2.644, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0016710258645742516, |
|
"grad_norm": 2.884963035583496, |
|
"learning_rate": 9.999956408020926e-06, |
|
"loss": 10.7812, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0016710258645742516, |
|
"eval_accuracy": 0.01179959576351549, |
|
"eval_loss": 10.765625, |
|
"eval_runtime": 266.3138, |
|
"eval_samples_per_second": 126.794, |
|
"eval_steps_per_second": 2.643, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0017436791630340018, |
|
"grad_norm": 2.8954319953918457, |
|
"learning_rate": 9.999949142691078e-06, |
|
"loss": 10.7656, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0017436791630340018, |
|
"eval_accuracy": 0.011878857819301676, |
|
"eval_loss": 10.75, |
|
"eval_runtime": 265.8429, |
|
"eval_samples_per_second": 127.019, |
|
"eval_steps_per_second": 2.648, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0018163324614937518, |
|
"grad_norm": 2.713453769683838, |
|
"learning_rate": 9.999941877361234e-06, |
|
"loss": 10.7578, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0018163324614937518, |
|
"eval_accuracy": 0.012068501985647663, |
|
"eval_loss": 10.734375, |
|
"eval_runtime": 266.534, |
|
"eval_samples_per_second": 126.689, |
|
"eval_steps_per_second": 2.641, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.001888985759953502, |
|
"grad_norm": 2.663592576980591, |
|
"learning_rate": 9.999934612031386e-06, |
|
"loss": 10.75, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.001888985759953502, |
|
"eval_accuracy": 0.012414296454736778, |
|
"eval_loss": 10.7265625, |
|
"eval_runtime": 265.503, |
|
"eval_samples_per_second": 127.181, |
|
"eval_steps_per_second": 2.652, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.001961639058413252, |
|
"grad_norm": 2.4643020629882812, |
|
"learning_rate": 9.999927346701542e-06, |
|
"loss": 10.7344, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.001961639058413252, |
|
"eval_accuracy": 0.013084492164563661, |
|
"eval_loss": 10.71875, |
|
"eval_runtime": 264.7117, |
|
"eval_samples_per_second": 127.561, |
|
"eval_steps_per_second": 2.659, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.002034292356873002, |
|
"grad_norm": 2.2399826049804688, |
|
"learning_rate": 9.999920081371694e-06, |
|
"loss": 10.7266, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.002034292356873002, |
|
"eval_accuracy": 0.014443063485982847, |
|
"eval_loss": 10.703125, |
|
"eval_runtime": 264.8704, |
|
"eval_samples_per_second": 127.485, |
|
"eval_steps_per_second": 2.658, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0021069456553327523, |
|
"grad_norm": 2.138185977935791, |
|
"learning_rate": 9.99991281604185e-06, |
|
"loss": 10.7109, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0021069456553327523, |
|
"eval_accuracy": 0.016544752766507735, |
|
"eval_loss": 10.6953125, |
|
"eval_runtime": 264.0295, |
|
"eval_samples_per_second": 127.891, |
|
"eval_steps_per_second": 2.666, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.002179598953792502, |
|
"grad_norm": 1.8671512603759766, |
|
"learning_rate": 9.999905550712004e-06, |
|
"loss": 10.7031, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.002179598953792502, |
|
"eval_accuracy": 0.01964894204406536, |
|
"eval_loss": 10.6875, |
|
"eval_runtime": 264.1126, |
|
"eval_samples_per_second": 127.851, |
|
"eval_steps_per_second": 2.666, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0022522522522522522, |
|
"grad_norm": 1.72816002368927, |
|
"learning_rate": 9.999898285382156e-06, |
|
"loss": 10.7031, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0022522522522522522, |
|
"eval_accuracy": 0.023590159473924593, |
|
"eval_loss": 10.6796875, |
|
"eval_runtime": 265.2939, |
|
"eval_samples_per_second": 127.281, |
|
"eval_steps_per_second": 2.654, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0023249055507120024, |
|
"grad_norm": 1.6541900634765625, |
|
"learning_rate": 9.999891020052312e-06, |
|
"loss": 10.6875, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0023249055507120024, |
|
"eval_accuracy": 0.028234313806121365, |
|
"eval_loss": 10.671875, |
|
"eval_runtime": 264.9943, |
|
"eval_samples_per_second": 127.425, |
|
"eval_steps_per_second": 2.657, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0023975588491717526, |
|
"grad_norm": 1.4378719329833984, |
|
"learning_rate": 9.999883754722464e-06, |
|
"loss": 10.6797, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0023975588491717526, |
|
"eval_accuracy": 0.03299875076862917, |
|
"eval_loss": 10.6640625, |
|
"eval_runtime": 264.4617, |
|
"eval_samples_per_second": 127.682, |
|
"eval_steps_per_second": 2.662, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0024702121476315024, |
|
"grad_norm": 1.3948858976364136, |
|
"learning_rate": 9.99987648939262e-06, |
|
"loss": 10.6719, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0024702121476315024, |
|
"eval_accuracy": 0.03746090344095459, |
|
"eval_loss": 10.6640625, |
|
"eval_runtime": 263.8541, |
|
"eval_samples_per_second": 127.976, |
|
"eval_steps_per_second": 2.668, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0025428654460912525, |
|
"grad_norm": 1.2194068431854248, |
|
"learning_rate": 9.999869224062774e-06, |
|
"loss": 10.6719, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0025428654460912525, |
|
"eval_accuracy": 0.04094886812886922, |
|
"eval_loss": 10.65625, |
|
"eval_runtime": 263.4632, |
|
"eval_samples_per_second": 128.166, |
|
"eval_steps_per_second": 2.672, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0026155187445510027, |
|
"grad_norm": 1.2569856643676758, |
|
"learning_rate": 9.999861958732927e-06, |
|
"loss": 10.6719, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0026155187445510027, |
|
"eval_accuracy": 0.04373679080326246, |
|
"eval_loss": 10.6484375, |
|
"eval_runtime": 265.8278, |
|
"eval_samples_per_second": 127.026, |
|
"eval_steps_per_second": 2.648, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.002688172043010753, |
|
"grad_norm": 1.16013503074646, |
|
"learning_rate": 9.999854693403081e-06, |
|
"loss": 10.6484, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.002688172043010753, |
|
"eval_accuracy": 0.046063847178124624, |
|
"eval_loss": 10.6484375, |
|
"eval_runtime": 266.8235, |
|
"eval_samples_per_second": 126.552, |
|
"eval_steps_per_second": 2.638, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0027608253414705027, |
|
"grad_norm": 1.1432477235794067, |
|
"learning_rate": 9.999847428073235e-06, |
|
"loss": 10.6562, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0027608253414705027, |
|
"eval_accuracy": 0.04811105465112957, |
|
"eval_loss": 10.640625, |
|
"eval_runtime": 266.3014, |
|
"eval_samples_per_second": 126.8, |
|
"eval_steps_per_second": 2.644, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.002833478639930253, |
|
"grad_norm": 1.071315050125122, |
|
"learning_rate": 9.99984016274339e-06, |
|
"loss": 10.6484, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.002833478639930253, |
|
"eval_accuracy": 0.04980491199523524, |
|
"eval_loss": 10.640625, |
|
"eval_runtime": 266.126, |
|
"eval_samples_per_second": 126.883, |
|
"eval_steps_per_second": 2.645, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.002906131938390003, |
|
"grad_norm": 1.0130771398544312, |
|
"learning_rate": 9.999832897413543e-06, |
|
"loss": 10.6484, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.002906131938390003, |
|
"eval_accuracy": 0.05107903940988734, |
|
"eval_loss": 10.6328125, |
|
"eval_runtime": 265.8854, |
|
"eval_samples_per_second": 126.998, |
|
"eval_steps_per_second": 2.648, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0029787852368497528, |
|
"grad_norm": 1.014347791671753, |
|
"learning_rate": 9.999825632083697e-06, |
|
"loss": 10.6406, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0029787852368497528, |
|
"eval_accuracy": 0.052067614991714396, |
|
"eval_loss": 10.6328125, |
|
"eval_runtime": 266.8406, |
|
"eval_samples_per_second": 126.544, |
|
"eval_steps_per_second": 2.638, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.003051438535309503, |
|
"grad_norm": 1.0095568895339966, |
|
"learning_rate": 9.999818366753851e-06, |
|
"loss": 10.6406, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.003051438535309503, |
|
"eval_accuracy": 0.05287193090039351, |
|
"eval_loss": 10.625, |
|
"eval_runtime": 266.7685, |
|
"eval_samples_per_second": 126.578, |
|
"eval_steps_per_second": 2.639, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.003124091833769253, |
|
"grad_norm": 0.9412463307380676, |
|
"learning_rate": 9.999811101424005e-06, |
|
"loss": 10.6406, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.003124091833769253, |
|
"eval_accuracy": 0.053470536009796996, |
|
"eval_loss": 10.625, |
|
"eval_runtime": 267.0069, |
|
"eval_samples_per_second": 126.465, |
|
"eval_steps_per_second": 2.637, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0031967451322290033, |
|
"grad_norm": 0.952081561088562, |
|
"learning_rate": 9.999803836094159e-06, |
|
"loss": 10.625, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0031967451322290033, |
|
"eval_accuracy": 0.05392726261832098, |
|
"eval_loss": 10.6171875, |
|
"eval_runtime": 269.2581, |
|
"eval_samples_per_second": 125.408, |
|
"eval_steps_per_second": 2.615, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.003269398430688753, |
|
"grad_norm": 0.9194355607032776, |
|
"learning_rate": 9.999796570764313e-06, |
|
"loss": 10.625, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.003269398430688753, |
|
"eval_accuracy": 0.05422120962871285, |
|
"eval_loss": 10.6171875, |
|
"eval_runtime": 269.3844, |
|
"eval_samples_per_second": 125.349, |
|
"eval_steps_per_second": 2.613, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0033420517291485033, |
|
"grad_norm": 0.9257526993751526, |
|
"learning_rate": 9.999789305434467e-06, |
|
"loss": 10.625, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0033420517291485033, |
|
"eval_accuracy": 0.054314019764158616, |
|
"eval_loss": 10.6171875, |
|
"eval_runtime": 269.7126, |
|
"eval_samples_per_second": 125.196, |
|
"eval_steps_per_second": 2.61, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0034147050276082534, |
|
"grad_norm": 0.9701704382896423, |
|
"learning_rate": 9.999782040104623e-06, |
|
"loss": 10.6172, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0034147050276082534, |
|
"eval_accuracy": 0.05444489768753676, |
|
"eval_loss": 10.609375, |
|
"eval_runtime": 269.4275, |
|
"eval_samples_per_second": 125.329, |
|
"eval_steps_per_second": 2.613, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0034873583260680036, |
|
"grad_norm": 0.8972945809364319, |
|
"learning_rate": 9.999774774774775e-06, |
|
"loss": 10.625, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0034873583260680036, |
|
"eval_accuracy": 0.05449599247514181, |
|
"eval_loss": 10.609375, |
|
"eval_runtime": 268.4057, |
|
"eval_samples_per_second": 125.806, |
|
"eval_steps_per_second": 2.623, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0035600116245277534, |
|
"grad_norm": 0.9347382187843323, |
|
"learning_rate": 9.99976750944493e-06, |
|
"loss": 10.6172, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0035600116245277534, |
|
"eval_accuracy": 0.05453342338753463, |
|
"eval_loss": 10.6015625, |
|
"eval_runtime": 269.5777, |
|
"eval_samples_per_second": 125.259, |
|
"eval_steps_per_second": 2.611, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0036326649229875036, |
|
"grad_norm": 0.9273884892463684, |
|
"learning_rate": 9.999760244115083e-06, |
|
"loss": 10.6016, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0036326649229875036, |
|
"eval_accuracy": 0.05452213332115164, |
|
"eval_loss": 10.6015625, |
|
"eval_runtime": 268.6914, |
|
"eval_samples_per_second": 125.672, |
|
"eval_steps_per_second": 2.62, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0037053182214472537, |
|
"grad_norm": 0.9508588910102844, |
|
"learning_rate": 9.999752978785238e-06, |
|
"loss": 10.6016, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0037053182214472537, |
|
"eval_accuracy": 0.054549547918240585, |
|
"eval_loss": 10.6015625, |
|
"eval_runtime": 269.6867, |
|
"eval_samples_per_second": 125.208, |
|
"eval_steps_per_second": 2.61, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.003777971519907004, |
|
"grad_norm": 0.97487872838974, |
|
"learning_rate": 9.999745713455392e-06, |
|
"loss": 10.6016, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.003777971519907004, |
|
"eval_accuracy": 0.05455154539152372, |
|
"eval_loss": 10.59375, |
|
"eval_runtime": 269.8783, |
|
"eval_samples_per_second": 125.119, |
|
"eval_steps_per_second": 2.609, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0038506248183667537, |
|
"grad_norm": 1.050345540046692, |
|
"learning_rate": 9.999738448125546e-06, |
|
"loss": 10.6016, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0038506248183667537, |
|
"eval_accuracy": 0.054549403173799776, |
|
"eval_loss": 10.59375, |
|
"eval_runtime": 269.476, |
|
"eval_samples_per_second": 125.306, |
|
"eval_steps_per_second": 2.612, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.003923278116826504, |
|
"grad_norm": 0.9317484498023987, |
|
"learning_rate": 9.9997311827957e-06, |
|
"loss": 10.5938, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.003923278116826504, |
|
"eval_accuracy": 0.05454671092720075, |
|
"eval_loss": 10.59375, |
|
"eval_runtime": 268.2865, |
|
"eval_samples_per_second": 125.862, |
|
"eval_steps_per_second": 2.624, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.003995931415286254, |
|
"grad_norm": 0.9053019285202026, |
|
"learning_rate": 9.999723917465854e-06, |
|
"loss": 10.6016, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.003995931415286254, |
|
"eval_accuracy": 0.054510206379229105, |
|
"eval_loss": 10.5859375, |
|
"eval_runtime": 268.6755, |
|
"eval_samples_per_second": 125.68, |
|
"eval_steps_per_second": 2.62, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.004068584713746004, |
|
"grad_norm": 1.051640272140503, |
|
"learning_rate": 9.999716652136008e-06, |
|
"loss": 10.5859, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.004068584713746004, |
|
"eval_accuracy": 0.05450499557936003, |
|
"eval_loss": 10.5859375, |
|
"eval_runtime": 268.9754, |
|
"eval_samples_per_second": 125.539, |
|
"eval_steps_per_second": 2.617, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.004141238012205754, |
|
"grad_norm": 0.8980646729469299, |
|
"learning_rate": 9.999709386806162e-06, |
|
"loss": 10.6016, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.004141238012205754, |
|
"eval_accuracy": 0.05452688093881013, |
|
"eval_loss": 10.5859375, |
|
"eval_runtime": 269.557, |
|
"eval_samples_per_second": 125.269, |
|
"eval_steps_per_second": 2.612, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.004213891310665505, |
|
"grad_norm": 0.9363867044448853, |
|
"learning_rate": 9.999702121476316e-06, |
|
"loss": 10.5859, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.004213891310665505, |
|
"eval_accuracy": 0.054599542648095495, |
|
"eval_loss": 10.5859375, |
|
"eval_runtime": 268.413, |
|
"eval_samples_per_second": 125.802, |
|
"eval_steps_per_second": 2.623, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.004286544609125254, |
|
"grad_norm": 0.9355424642562866, |
|
"learning_rate": 9.99969485614647e-06, |
|
"loss": 10.5859, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.004286544609125254, |
|
"eval_accuracy": 0.05472584664714412, |
|
"eval_loss": 10.578125, |
|
"eval_runtime": 267.1942, |
|
"eval_samples_per_second": 126.376, |
|
"eval_steps_per_second": 2.635, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.004359197907585004, |
|
"grad_norm": 0.9955667853355408, |
|
"learning_rate": 9.999687590816624e-06, |
|
"loss": 10.5781, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.004359197907585004, |
|
"eval_accuracy": 0.05484039739759917, |
|
"eval_loss": 10.578125, |
|
"eval_runtime": 268.4725, |
|
"eval_samples_per_second": 125.774, |
|
"eval_steps_per_second": 2.622, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.004431851206044755, |
|
"grad_norm": 0.9198755025863647, |
|
"learning_rate": 9.999680325486778e-06, |
|
"loss": 10.5781, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.004431851206044755, |
|
"eval_accuracy": 0.054993913351519604, |
|
"eval_loss": 10.578125, |
|
"eval_runtime": 268.5476, |
|
"eval_samples_per_second": 125.739, |
|
"eval_steps_per_second": 2.622, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0045045045045045045, |
|
"grad_norm": 0.9875515699386597, |
|
"learning_rate": 9.999673060156932e-06, |
|
"loss": 10.5781, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0045045045045045045, |
|
"eval_accuracy": 0.055311424756874936, |
|
"eval_loss": 10.5703125, |
|
"eval_runtime": 267.2035, |
|
"eval_samples_per_second": 126.372, |
|
"eval_steps_per_second": 2.635, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.004577157802964254, |
|
"grad_norm": 0.9037775993347168, |
|
"learning_rate": 9.999665794827086e-06, |
|
"loss": 10.5781, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.004577157802964254, |
|
"eval_accuracy": 0.05571670919113593, |
|
"eval_loss": 10.5703125, |
|
"eval_runtime": 267.7809, |
|
"eval_samples_per_second": 126.099, |
|
"eval_steps_per_second": 2.629, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.004649811101424005, |
|
"grad_norm": 0.9087035655975342, |
|
"learning_rate": 9.99965852949724e-06, |
|
"loss": 10.5703, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.004649811101424005, |
|
"eval_accuracy": 0.056142518387103435, |
|
"eval_loss": 10.5703125, |
|
"eval_runtime": 267.3757, |
|
"eval_samples_per_second": 126.29, |
|
"eval_steps_per_second": 2.633, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.004722464399883755, |
|
"grad_norm": 0.8892097473144531, |
|
"learning_rate": 9.999651264167394e-06, |
|
"loss": 10.5781, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.004722464399883755, |
|
"eval_accuracy": 0.05656430368761649, |
|
"eval_loss": 10.5625, |
|
"eval_runtime": 266.879, |
|
"eval_samples_per_second": 126.525, |
|
"eval_steps_per_second": 2.638, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.004795117698343505, |
|
"grad_norm": 0.9172134399414062, |
|
"learning_rate": 9.999643998837548e-06, |
|
"loss": 10.5625, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.004795117698343505, |
|
"eval_accuracy": 0.0569930367212883, |
|
"eval_loss": 10.5625, |
|
"eval_runtime": 266.0917, |
|
"eval_samples_per_second": 126.9, |
|
"eval_steps_per_second": 2.646, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.004867770996803255, |
|
"grad_norm": 0.9037718176841736, |
|
"learning_rate": 9.999636733507701e-06, |
|
"loss": 10.5781, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.004867770996803255, |
|
"eval_accuracy": 0.05732160660192132, |
|
"eval_loss": 10.5625, |
|
"eval_runtime": 267.3756, |
|
"eval_samples_per_second": 126.291, |
|
"eval_steps_per_second": 2.633, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.004940424295263005, |
|
"grad_norm": 0.8923665881156921, |
|
"learning_rate": 9.999629468177855e-06, |
|
"loss": 10.5703, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.004940424295263005, |
|
"eval_accuracy": 0.05754022860531697, |
|
"eval_loss": 10.5546875, |
|
"eval_runtime": 267.5062, |
|
"eval_samples_per_second": 126.229, |
|
"eval_steps_per_second": 2.632, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.005013077593722755, |
|
"grad_norm": 0.9167753458023071, |
|
"learning_rate": 9.999622202848011e-06, |
|
"loss": 10.5625, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.005013077593722755, |
|
"eval_accuracy": 0.0576851756883416, |
|
"eval_loss": 10.5546875, |
|
"eval_runtime": 269.0923, |
|
"eval_samples_per_second": 125.485, |
|
"eval_steps_per_second": 2.616, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.005085730892182505, |
|
"grad_norm": 0.9031029343605042, |
|
"learning_rate": 9.999614937518163e-06, |
|
"loss": 10.5625, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.005085730892182505, |
|
"eval_accuracy": 0.05778192287257733, |
|
"eval_loss": 10.5546875, |
|
"eval_runtime": 269.1242, |
|
"eval_samples_per_second": 125.47, |
|
"eval_steps_per_second": 2.616, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.005158384190642255, |
|
"grad_norm": 0.8912838101387024, |
|
"learning_rate": 9.999607672188319e-06, |
|
"loss": 10.5625, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.005158384190642255, |
|
"eval_accuracy": 0.05789719734523642, |
|
"eval_loss": 10.5546875, |
|
"eval_runtime": 269.7373, |
|
"eval_samples_per_second": 125.185, |
|
"eval_steps_per_second": 2.61, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0052310374891020054, |
|
"grad_norm": 0.8998405933380127, |
|
"learning_rate": 9.999600406858471e-06, |
|
"loss": 10.5547, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0052310374891020054, |
|
"eval_accuracy": 0.057985607249681645, |
|
"eval_loss": 10.546875, |
|
"eval_runtime": 269.5868, |
|
"eval_samples_per_second": 125.255, |
|
"eval_steps_per_second": 2.611, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.005303690787561755, |
|
"grad_norm": 0.9078417420387268, |
|
"learning_rate": 9.999593141528627e-06, |
|
"loss": 10.5469, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.005303690787561755, |
|
"eval_accuracy": 0.05800511880030249, |
|
"eval_loss": 10.546875, |
|
"eval_runtime": 271.0227, |
|
"eval_samples_per_second": 124.591, |
|
"eval_steps_per_second": 2.598, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.005376344086021506, |
|
"grad_norm": 0.8995553851127625, |
|
"learning_rate": 9.99958587619878e-06, |
|
"loss": 10.5469, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.005376344086021506, |
|
"eval_accuracy": 0.05802399347538379, |
|
"eval_loss": 10.546875, |
|
"eval_runtime": 270.6941, |
|
"eval_samples_per_second": 124.742, |
|
"eval_steps_per_second": 2.601, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0054489973844812556, |
|
"grad_norm": 0.8786413073539734, |
|
"learning_rate": 9.999578610868935e-06, |
|
"loss": 10.5547, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0054489973844812556, |
|
"eval_accuracy": 0.05797920954539795, |
|
"eval_loss": 10.5390625, |
|
"eval_runtime": 268.937, |
|
"eval_samples_per_second": 125.557, |
|
"eval_steps_per_second": 2.618, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.005521650682941005, |
|
"grad_norm": 0.9166957139968872, |
|
"learning_rate": 9.999571345539089e-06, |
|
"loss": 10.5547, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.005521650682941005, |
|
"eval_accuracy": 0.05800387399811155, |
|
"eval_loss": 10.5390625, |
|
"eval_runtime": 270.6425, |
|
"eval_samples_per_second": 124.766, |
|
"eval_steps_per_second": 2.601, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.005594303981400756, |
|
"grad_norm": 0.9106067419052124, |
|
"learning_rate": 9.999564080209243e-06, |
|
"loss": 10.5469, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.005594303981400756, |
|
"eval_accuracy": 0.058152873925478785, |
|
"eval_loss": 10.5390625, |
|
"eval_runtime": 268.7053, |
|
"eval_samples_per_second": 125.666, |
|
"eval_steps_per_second": 2.62, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.005666957279860506, |
|
"grad_norm": 0.9021939039230347, |
|
"learning_rate": 9.999556814879397e-06, |
|
"loss": 10.5469, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.005666957279860506, |
|
"eval_accuracy": 0.05821331920396, |
|
"eval_loss": 10.5390625, |
|
"eval_runtime": 268.8573, |
|
"eval_samples_per_second": 125.594, |
|
"eval_steps_per_second": 2.618, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.005739610578320255, |
|
"grad_norm": 0.935400664806366, |
|
"learning_rate": 9.99954954954955e-06, |
|
"loss": 10.5312, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.005739610578320255, |
|
"eval_accuracy": 0.05838906790398846, |
|
"eval_loss": 10.53125, |
|
"eval_runtime": 270.5471, |
|
"eval_samples_per_second": 124.81, |
|
"eval_steps_per_second": 2.602, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.005812263876780006, |
|
"grad_norm": 0.9361926317214966, |
|
"learning_rate": 9.999542284219704e-06, |
|
"loss": 10.5312, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.005812263876780006, |
|
"eval_accuracy": 0.05863825993328266, |
|
"eval_loss": 10.53125, |
|
"eval_runtime": 270.2854, |
|
"eval_samples_per_second": 124.931, |
|
"eval_steps_per_second": 2.605, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.005884917175239756, |
|
"grad_norm": 0.9991462826728821, |
|
"learning_rate": 9.999535018889858e-06, |
|
"loss": 10.5312, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.005884917175239756, |
|
"eval_accuracy": 0.058984343891253385, |
|
"eval_loss": 10.53125, |
|
"eval_runtime": 269.5128, |
|
"eval_samples_per_second": 125.289, |
|
"eval_steps_per_second": 2.612, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0059575704736995055, |
|
"grad_norm": 0.8942323327064514, |
|
"learning_rate": 9.999527753560012e-06, |
|
"loss": 10.5312, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0059575704736995055, |
|
"eval_accuracy": 0.05927577234837521, |
|
"eval_loss": 10.53125, |
|
"eval_runtime": 267.9968, |
|
"eval_samples_per_second": 125.998, |
|
"eval_steps_per_second": 2.627, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.006030223772159256, |
|
"grad_norm": 0.9410443902015686, |
|
"learning_rate": 9.999520488230166e-06, |
|
"loss": 10.5312, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.006030223772159256, |
|
"eval_accuracy": 0.05966313742086423, |
|
"eval_loss": 10.5234375, |
|
"eval_runtime": 269.3505, |
|
"eval_samples_per_second": 125.365, |
|
"eval_steps_per_second": 2.614, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.006102877070619006, |
|
"grad_norm": 0.9418770670890808, |
|
"learning_rate": 9.99951322290032e-06, |
|
"loss": 10.5234, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.006102877070619006, |
|
"eval_accuracy": 0.06000398163007773, |
|
"eval_loss": 10.5234375, |
|
"eval_runtime": 268.3417, |
|
"eval_samples_per_second": 125.836, |
|
"eval_steps_per_second": 2.624, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.0061755303690787565, |
|
"grad_norm": 0.8822703957557678, |
|
"learning_rate": 9.999505957570474e-06, |
|
"loss": 10.5312, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0061755303690787565, |
|
"eval_accuracy": 0.060200255091812704, |
|
"eval_loss": 10.5234375, |
|
"eval_runtime": 269.365, |
|
"eval_samples_per_second": 125.358, |
|
"eval_steps_per_second": 2.614, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.006248183667538506, |
|
"grad_norm": 0.8689332604408264, |
|
"learning_rate": 9.999498692240628e-06, |
|
"loss": 10.5312, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.006248183667538506, |
|
"eval_accuracy": 0.06028594380077074, |
|
"eval_loss": 10.5234375, |
|
"eval_runtime": 267.6304, |
|
"eval_samples_per_second": 126.17, |
|
"eval_steps_per_second": 2.63, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.006320836965998256, |
|
"grad_norm": 0.8931795954704285, |
|
"learning_rate": 9.999491426910782e-06, |
|
"loss": 10.5234, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.006320836965998256, |
|
"eval_accuracy": 0.060404489497792084, |
|
"eval_loss": 10.515625, |
|
"eval_runtime": 267.5381, |
|
"eval_samples_per_second": 126.214, |
|
"eval_steps_per_second": 2.631, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.006393490264458007, |
|
"grad_norm": 0.8975218534469604, |
|
"learning_rate": 9.999484161580936e-06, |
|
"loss": 10.5156, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.006393490264458007, |
|
"eval_accuracy": 0.06048001714700543, |
|
"eval_loss": 10.515625, |
|
"eval_runtime": 266.4763, |
|
"eval_samples_per_second": 126.717, |
|
"eval_steps_per_second": 2.642, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.006466143562917756, |
|
"grad_norm": 0.8878839015960693, |
|
"learning_rate": 9.99947689625109e-06, |
|
"loss": 10.5234, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.006466143562917756, |
|
"eval_accuracy": 0.060569556058088954, |
|
"eval_loss": 10.515625, |
|
"eval_runtime": 265.4792, |
|
"eval_samples_per_second": 127.193, |
|
"eval_steps_per_second": 2.652, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.006538796861377506, |
|
"grad_norm": 0.8937884569168091, |
|
"learning_rate": 9.999469630921244e-06, |
|
"loss": 10.5156, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.006538796861377506, |
|
"eval_accuracy": 0.06063920708300553, |
|
"eval_loss": 10.515625, |
|
"eval_runtime": 264.9514, |
|
"eval_samples_per_second": 127.446, |
|
"eval_steps_per_second": 2.657, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.006611450159837257, |
|
"grad_norm": 0.9820625185966492, |
|
"learning_rate": 9.9994623655914e-06, |
|
"loss": 10.5156, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.006611450159837257, |
|
"eval_accuracy": 0.06055398155625807, |
|
"eval_loss": 10.5078125, |
|
"eval_runtime": 265.7628, |
|
"eval_samples_per_second": 127.057, |
|
"eval_steps_per_second": 2.649, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0066841034582970065, |
|
"grad_norm": 0.8808642029762268, |
|
"learning_rate": 9.999455100261552e-06, |
|
"loss": 10.5156, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0066841034582970065, |
|
"eval_accuracy": 0.060472056202761026, |
|
"eval_loss": 10.5078125, |
|
"eval_runtime": 266.9758, |
|
"eval_samples_per_second": 126.48, |
|
"eval_steps_per_second": 2.637, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.006756756756756757, |
|
"grad_norm": 0.8804787993431091, |
|
"learning_rate": 9.999447834931707e-06, |
|
"loss": 10.5156, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.006756756756756757, |
|
"eval_accuracy": 0.06028976505400806, |
|
"eval_loss": 10.5078125, |
|
"eval_runtime": 267.6988, |
|
"eval_samples_per_second": 126.138, |
|
"eval_steps_per_second": 2.63, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.006829410055216507, |
|
"grad_norm": 0.8913342356681824, |
|
"learning_rate": 9.99944056960186e-06, |
|
"loss": 10.5156, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.006829410055216507, |
|
"eval_accuracy": 0.06017234836362501, |
|
"eval_loss": 10.5078125, |
|
"eval_runtime": 268.8716, |
|
"eval_samples_per_second": 125.588, |
|
"eval_steps_per_second": 2.618, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.006902063353676257, |
|
"grad_norm": 0.8503950834274292, |
|
"learning_rate": 9.999433304272015e-06, |
|
"loss": 10.5234, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.006902063353676257, |
|
"eval_accuracy": 0.0601403308933184, |
|
"eval_loss": 10.5, |
|
"eval_runtime": 267.2705, |
|
"eval_samples_per_second": 126.34, |
|
"eval_steps_per_second": 2.634, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.006974716652136007, |
|
"grad_norm": 0.912339985370636, |
|
"learning_rate": 9.99942603894217e-06, |
|
"loss": 10.5156, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.006974716652136007, |
|
"eval_accuracy": 0.06016896134371012, |
|
"eval_loss": 10.5, |
|
"eval_runtime": 268.9999, |
|
"eval_samples_per_second": 125.528, |
|
"eval_steps_per_second": 2.617, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.007047369950595757, |
|
"grad_norm": 0.8949794769287109, |
|
"learning_rate": 9.999418773612323e-06, |
|
"loss": 10.5078, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.007047369950595757, |
|
"eval_accuracy": 0.06026235045691912, |
|
"eval_loss": 10.5, |
|
"eval_runtime": 267.9858, |
|
"eval_samples_per_second": 126.003, |
|
"eval_steps_per_second": 2.627, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.007120023249055507, |
|
"grad_norm": 0.8988801836967468, |
|
"learning_rate": 9.999411508282477e-06, |
|
"loss": 10.5, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.007120023249055507, |
|
"eval_accuracy": 0.06031005822460927, |
|
"eval_loss": 10.5, |
|
"eval_runtime": 268.4028, |
|
"eval_samples_per_second": 125.807, |
|
"eval_steps_per_second": 2.623, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.007192676547515257, |
|
"grad_norm": 0.8954498767852783, |
|
"learning_rate": 9.999404242952631e-06, |
|
"loss": 10.5078, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.007192676547515257, |
|
"eval_accuracy": 0.06044918658111344, |
|
"eval_loss": 10.5, |
|
"eval_runtime": 266.438, |
|
"eval_samples_per_second": 126.735, |
|
"eval_steps_per_second": 2.642, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.007265329845975007, |
|
"grad_norm": 0.8816587328910828, |
|
"learning_rate": 9.999396977622785e-06, |
|
"loss": 10.5078, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.007265329845975007, |
|
"eval_accuracy": 0.060579746066721805, |
|
"eval_loss": 10.4921875, |
|
"eval_runtime": 268.404, |
|
"eval_samples_per_second": 125.807, |
|
"eval_steps_per_second": 2.623, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.007337983144434758, |
|
"grad_norm": 0.9182707071304321, |
|
"learning_rate": 9.999389712292939e-06, |
|
"loss": 10.5, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.007337983144434758, |
|
"eval_accuracy": 0.0607209587431736, |
|
"eval_loss": 10.4921875, |
|
"eval_runtime": 269.5331, |
|
"eval_samples_per_second": 125.28, |
|
"eval_steps_per_second": 2.612, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0074106364428945075, |
|
"grad_norm": 0.9346348643302917, |
|
"learning_rate": 9.999382446963093e-06, |
|
"loss": 10.4922, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0074106364428945075, |
|
"eval_accuracy": 0.06087233247937008, |
|
"eval_loss": 10.4921875, |
|
"eval_runtime": 268.6739, |
|
"eval_samples_per_second": 125.68, |
|
"eval_steps_per_second": 2.62, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.007483289741354257, |
|
"grad_norm": 1.0120168924331665, |
|
"learning_rate": 9.999375181633247e-06, |
|
"loss": 10.4922, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.007483289741354257, |
|
"eval_accuracy": 0.06115695794777395, |
|
"eval_loss": 10.4921875, |
|
"eval_runtime": 269.4069, |
|
"eval_samples_per_second": 125.338, |
|
"eval_steps_per_second": 2.613, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.007555943039814008, |
|
"grad_norm": 0.9036211967468262, |
|
"learning_rate": 9.999367916303401e-06, |
|
"loss": 10.4844, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.007555943039814008, |
|
"eval_accuracy": 0.06140849483700922, |
|
"eval_loss": 10.484375, |
|
"eval_runtime": 267.3403, |
|
"eval_samples_per_second": 126.307, |
|
"eval_steps_per_second": 2.633, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.007628596338273758, |
|
"grad_norm": 0.895473837852478, |
|
"learning_rate": 9.999360650973555e-06, |
|
"loss": 10.4922, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.007628596338273758, |
|
"eval_accuracy": 0.0616540103575069, |
|
"eval_loss": 10.484375, |
|
"eval_runtime": 268.6172, |
|
"eval_samples_per_second": 125.707, |
|
"eval_steps_per_second": 2.621, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.007701249636733507, |
|
"grad_norm": 0.908990204334259, |
|
"learning_rate": 9.999353385643709e-06, |
|
"loss": 10.4922, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.007701249636733507, |
|
"eval_accuracy": 0.06192879320393586, |
|
"eval_loss": 10.484375, |
|
"eval_runtime": 269.3754, |
|
"eval_samples_per_second": 125.353, |
|
"eval_steps_per_second": 2.613, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.007773902935193258, |
|
"grad_norm": 0.929440975189209, |
|
"learning_rate": 9.999346120313863e-06, |
|
"loss": 10.4844, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.007773902935193258, |
|
"eval_accuracy": 0.062228703685288995, |
|
"eval_loss": 10.484375, |
|
"eval_runtime": 268.7245, |
|
"eval_samples_per_second": 125.657, |
|
"eval_steps_per_second": 2.62, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.007846556233653008, |
|
"grad_norm": 0.9397541880607605, |
|
"learning_rate": 9.999338854984018e-06, |
|
"loss": 10.4922, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.007846556233653008, |
|
"eval_accuracy": 0.06252207171791763, |
|
"eval_loss": 10.4765625, |
|
"eval_runtime": 267.6862, |
|
"eval_samples_per_second": 126.144, |
|
"eval_steps_per_second": 2.63, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.007919209532112758, |
|
"grad_norm": 0.9697725772857666, |
|
"learning_rate": 9.99933158965417e-06, |
|
"loss": 10.4844, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.007919209532112758, |
|
"eval_accuracy": 0.06282742459024514, |
|
"eval_loss": 10.4765625, |
|
"eval_runtime": 266.1334, |
|
"eval_samples_per_second": 126.88, |
|
"eval_steps_per_second": 2.645, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.007991862830572507, |
|
"grad_norm": 0.8942638039588928, |
|
"learning_rate": 9.999324324324326e-06, |
|
"loss": 10.4766, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.007991862830572507, |
|
"eval_accuracy": 0.06300488127467513, |
|
"eval_loss": 10.4765625, |
|
"eval_runtime": 267.3455, |
|
"eval_samples_per_second": 126.305, |
|
"eval_steps_per_second": 2.633, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.008064516129032258, |
|
"grad_norm": 0.8714835047721863, |
|
"learning_rate": 9.999317058994478e-06, |
|
"loss": 10.4844, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.008064516129032258, |
|
"eval_accuracy": 0.06321102630727317, |
|
"eval_loss": 10.4765625, |
|
"eval_runtime": 266.4815, |
|
"eval_samples_per_second": 126.714, |
|
"eval_steps_per_second": 2.642, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.008137169427492008, |
|
"grad_norm": 0.8855974078178406, |
|
"learning_rate": 9.999309793664634e-06, |
|
"loss": 10.4766, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.008137169427492008, |
|
"eval_accuracy": 0.06342374273748387, |
|
"eval_loss": 10.4765625, |
|
"eval_runtime": 265.4409, |
|
"eval_samples_per_second": 127.211, |
|
"eval_steps_per_second": 2.652, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.008209822725951759, |
|
"grad_norm": 0.8999938368797302, |
|
"learning_rate": 9.999302528334788e-06, |
|
"loss": 10.4844, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.008209822725951759, |
|
"eval_accuracy": 0.06359850717531484, |
|
"eval_loss": 10.46875, |
|
"eval_runtime": 264.9726, |
|
"eval_samples_per_second": 127.436, |
|
"eval_steps_per_second": 2.657, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.008282476024411508, |
|
"grad_norm": 0.8735718727111816, |
|
"learning_rate": 9.99929526300494e-06, |
|
"loss": 10.4766, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.008282476024411508, |
|
"eval_accuracy": 0.0637932173970891, |
|
"eval_loss": 10.46875, |
|
"eval_runtime": 264.7038, |
|
"eval_samples_per_second": 127.565, |
|
"eval_steps_per_second": 2.66, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.008355129322871259, |
|
"grad_norm": 0.9054996371269226, |
|
"learning_rate": 9.999287997675096e-06, |
|
"loss": 10.4766, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.008355129322871259, |
|
"eval_accuracy": 0.06404524641742311, |
|
"eval_loss": 10.46875, |
|
"eval_runtime": 265.6768, |
|
"eval_samples_per_second": 127.098, |
|
"eval_steps_per_second": 2.65, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.00842778262133101, |
|
"grad_norm": 0.9062832593917847, |
|
"learning_rate": 9.999280732345248e-06, |
|
"loss": 10.4844, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.00842778262133101, |
|
"eval_accuracy": 0.06427657698272166, |
|
"eval_loss": 10.46875, |
|
"eval_runtime": 264.6617, |
|
"eval_samples_per_second": 127.586, |
|
"eval_steps_per_second": 2.66, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.008500435919790758, |
|
"grad_norm": 0.9465892910957336, |
|
"learning_rate": 9.999273467015404e-06, |
|
"loss": 10.4531, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.008500435919790758, |
|
"eval_accuracy": 0.06444393050518328, |
|
"eval_loss": 10.4609375, |
|
"eval_runtime": 265.0601, |
|
"eval_samples_per_second": 127.394, |
|
"eval_steps_per_second": 2.656, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.008573089218250509, |
|
"grad_norm": 0.9980528354644775, |
|
"learning_rate": 9.999266201685556e-06, |
|
"loss": 10.4609, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.008573089218250509, |
|
"eval_accuracy": 0.06469538054775407, |
|
"eval_loss": 10.4609375, |
|
"eval_runtime": 265.9103, |
|
"eval_samples_per_second": 126.986, |
|
"eval_steps_per_second": 2.648, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.00864574251671026, |
|
"grad_norm": 0.9110475778579712, |
|
"learning_rate": 9.999258936355712e-06, |
|
"loss": 10.4609, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.00864574251671026, |
|
"eval_accuracy": 0.06482475312894781, |
|
"eval_loss": 10.4609375, |
|
"eval_runtime": 268.5579, |
|
"eval_samples_per_second": 125.735, |
|
"eval_steps_per_second": 2.621, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.008718395815170008, |
|
"grad_norm": 0.8688368797302246, |
|
"learning_rate": 9.999251671025866e-06, |
|
"loss": 10.4688, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.008718395815170008, |
|
"eval_accuracy": 0.06493464310840887, |
|
"eval_loss": 10.4609375, |
|
"eval_runtime": 268.4968, |
|
"eval_samples_per_second": 125.763, |
|
"eval_steps_per_second": 2.622, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.008791049113629759, |
|
"grad_norm": 0.8964656591415405, |
|
"learning_rate": 9.99924440569602e-06, |
|
"loss": 10.4609, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.008791049113629759, |
|
"eval_accuracy": 0.06505721270088466, |
|
"eval_loss": 10.4609375, |
|
"eval_runtime": 268.2266, |
|
"eval_samples_per_second": 125.89, |
|
"eval_steps_per_second": 2.625, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.00886370241208951, |
|
"grad_norm": 0.917405903339386, |
|
"learning_rate": 9.999237140366174e-06, |
|
"loss": 10.4609, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.00886370241208951, |
|
"eval_accuracy": 0.06532562679191808, |
|
"eval_loss": 10.453125, |
|
"eval_runtime": 267.3486, |
|
"eval_samples_per_second": 126.303, |
|
"eval_steps_per_second": 2.633, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.008936355710549258, |
|
"grad_norm": 0.9321388602256775, |
|
"learning_rate": 9.999229875036328e-06, |
|
"loss": 10.4531, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.008936355710549258, |
|
"eval_accuracy": 0.06563454037748945, |
|
"eval_loss": 10.453125, |
|
"eval_runtime": 268.6664, |
|
"eval_samples_per_second": 125.684, |
|
"eval_steps_per_second": 2.62, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.009009009009009009, |
|
"grad_norm": 1.0306340456008911, |
|
"learning_rate": 9.999222609706481e-06, |
|
"loss": 10.4531, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.009009009009009009, |
|
"eval_accuracy": 0.06589062224216607, |
|
"eval_loss": 10.453125, |
|
"eval_runtime": 269.4306, |
|
"eval_samples_per_second": 125.327, |
|
"eval_steps_per_second": 2.613, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.00908166230746876, |
|
"grad_norm": 0.90235435962677, |
|
"learning_rate": 9.999215344376635e-06, |
|
"loss": 10.4531, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.00908166230746876, |
|
"eval_accuracy": 0.06600340711044328, |
|
"eval_loss": 10.453125, |
|
"eval_runtime": 268.5113, |
|
"eval_samples_per_second": 125.756, |
|
"eval_steps_per_second": 2.622, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.009154315605928508, |
|
"grad_norm": 0.8829610347747803, |
|
"learning_rate": 9.99920807904679e-06, |
|
"loss": 10.4531, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.009154315605928508, |
|
"eval_accuracy": 0.06620975478525845, |
|
"eval_loss": 10.4453125, |
|
"eval_runtime": 268.5597, |
|
"eval_samples_per_second": 125.734, |
|
"eval_steps_per_second": 2.621, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.009226968904388259, |
|
"grad_norm": 0.9231570959091187, |
|
"learning_rate": 9.999200813716943e-06, |
|
"loss": 10.4531, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.009226968904388259, |
|
"eval_accuracy": 0.06642637931537096, |
|
"eval_loss": 10.4453125, |
|
"eval_runtime": 266.401, |
|
"eval_samples_per_second": 126.753, |
|
"eval_steps_per_second": 2.643, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.00929962220284801, |
|
"grad_norm": 0.9046792984008789, |
|
"learning_rate": 9.999193548387097e-06, |
|
"loss": 10.4453, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.00929962220284801, |
|
"eval_accuracy": 0.06673063212994831, |
|
"eval_loss": 10.4453125, |
|
"eval_runtime": 268.2718, |
|
"eval_samples_per_second": 125.869, |
|
"eval_steps_per_second": 2.624, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.009372275501307759, |
|
"grad_norm": 0.9026487469673157, |
|
"learning_rate": 9.999186283057251e-06, |
|
"loss": 10.4531, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.009372275501307759, |
|
"eval_accuracy": 0.06701204427176626, |
|
"eval_loss": 10.4453125, |
|
"eval_runtime": 266.0718, |
|
"eval_samples_per_second": 126.909, |
|
"eval_steps_per_second": 2.646, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.00944492879976751, |
|
"grad_norm": 0.918516218662262, |
|
"learning_rate": 9.999179017727407e-06, |
|
"loss": 10.4375, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.00944492879976751, |
|
"eval_accuracy": 0.0673045727866382, |
|
"eval_loss": 10.4453125, |
|
"eval_runtime": 267.2937, |
|
"eval_samples_per_second": 126.329, |
|
"eval_steps_per_second": 2.634, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.00951758209822726, |
|
"grad_norm": 0.9067806601524353, |
|
"learning_rate": 9.999171752397559e-06, |
|
"loss": 10.4453, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.00951758209822726, |
|
"eval_accuracy": 0.06758094782191605, |
|
"eval_loss": 10.4375, |
|
"eval_runtime": 266.3825, |
|
"eval_samples_per_second": 126.761, |
|
"eval_steps_per_second": 2.643, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.00959023539668701, |
|
"grad_norm": 0.9461184740066528, |
|
"learning_rate": 9.999164487067715e-06, |
|
"loss": 10.4375, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.00959023539668701, |
|
"eval_accuracy": 0.06776827607720912, |
|
"eval_loss": 10.4375, |
|
"eval_runtime": 266.9747, |
|
"eval_samples_per_second": 126.48, |
|
"eval_steps_per_second": 2.637, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.00966288869514676, |
|
"grad_norm": 0.9130184054374695, |
|
"learning_rate": 9.999157221737867e-06, |
|
"loss": 10.4375, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.00966288869514676, |
|
"eval_accuracy": 0.06790248312272583, |
|
"eval_loss": 10.4375, |
|
"eval_runtime": 267.3131, |
|
"eval_samples_per_second": 126.32, |
|
"eval_steps_per_second": 2.634, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.00973554199360651, |
|
"grad_norm": 0.9464238882064819, |
|
"learning_rate": 9.999149956408023e-06, |
|
"loss": 10.4297, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.00973554199360651, |
|
"eval_accuracy": 0.06788108989437448, |
|
"eval_loss": 10.4375, |
|
"eval_runtime": 267.1185, |
|
"eval_samples_per_second": 126.412, |
|
"eval_steps_per_second": 2.636, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.00980819529206626, |
|
"grad_norm": 0.8799238204956055, |
|
"learning_rate": 9.999142691078175e-06, |
|
"loss": 10.4453, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.00980819529206626, |
|
"eval_accuracy": 0.06779870135866685, |
|
"eval_loss": 10.4296875, |
|
"eval_runtime": 266.9212, |
|
"eval_samples_per_second": 126.505, |
|
"eval_steps_per_second": 2.637, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.00988084859052601, |
|
"grad_norm": 0.8914628624916077, |
|
"learning_rate": 9.99913542574833e-06, |
|
"loss": 10.4375, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.00988084859052601, |
|
"eval_accuracy": 0.06771660231184085, |
|
"eval_loss": 10.4296875, |
|
"eval_runtime": 265.5018, |
|
"eval_samples_per_second": 127.182, |
|
"eval_steps_per_second": 2.652, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.00995350188898576, |
|
"grad_norm": 0.9166758060455322, |
|
"learning_rate": 9.999128160418484e-06, |
|
"loss": 10.4375, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.00995350188898576, |
|
"eval_accuracy": 0.0677075992076226, |
|
"eval_loss": 10.4296875, |
|
"eval_runtime": 266.035, |
|
"eval_samples_per_second": 126.927, |
|
"eval_steps_per_second": 2.646, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.01002615518744551, |
|
"grad_norm": 0.9371738433837891, |
|
"learning_rate": 9.999120895088638e-06, |
|
"loss": 10.4219, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.01002615518744551, |
|
"eval_accuracy": 0.0677291950781911, |
|
"eval_loss": 10.4296875, |
|
"eval_runtime": 266.3525, |
|
"eval_samples_per_second": 126.776, |
|
"eval_steps_per_second": 2.643, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.01009880848590526, |
|
"grad_norm": 0.8814043998718262, |
|
"learning_rate": 9.999113629758792e-06, |
|
"loss": 10.4375, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.01009880848590526, |
|
"eval_accuracy": 0.06779661703871923, |
|
"eval_loss": 10.421875, |
|
"eval_runtime": 266.1042, |
|
"eval_samples_per_second": 126.894, |
|
"eval_steps_per_second": 2.646, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.01017146178436501, |
|
"grad_norm": 0.9055945873260498, |
|
"learning_rate": 9.999106364428946e-06, |
|
"loss": 10.4297, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01017146178436501, |
|
"eval_accuracy": 0.06796996298103028, |
|
"eval_loss": 10.421875, |
|
"eval_runtime": 267.0593, |
|
"eval_samples_per_second": 126.44, |
|
"eval_steps_per_second": 2.636, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.01024411508282476, |
|
"grad_norm": 0.8938325643539429, |
|
"learning_rate": 9.9990990990991e-06, |
|
"loss": 10.4297, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.01024411508282476, |
|
"eval_accuracy": 0.06815485952971778, |
|
"eval_loss": 10.421875, |
|
"eval_runtime": 267.7346, |
|
"eval_samples_per_second": 126.121, |
|
"eval_steps_per_second": 2.629, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.01031676838128451, |
|
"grad_norm": 0.9098795056343079, |
|
"learning_rate": 9.999091833769254e-06, |
|
"loss": 10.4219, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.01031676838128451, |
|
"eval_accuracy": 0.06841157826993396, |
|
"eval_loss": 10.421875, |
|
"eval_runtime": 268.1424, |
|
"eval_samples_per_second": 125.929, |
|
"eval_steps_per_second": 2.625, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.01038942167974426, |
|
"grad_norm": 0.907673716545105, |
|
"learning_rate": 9.999084568439408e-06, |
|
"loss": 10.4219, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.01038942167974426, |
|
"eval_accuracy": 0.06867860281433565, |
|
"eval_loss": 10.421875, |
|
"eval_runtime": 268.3436, |
|
"eval_samples_per_second": 125.835, |
|
"eval_steps_per_second": 2.624, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.010462074978204011, |
|
"grad_norm": 0.9119425415992737, |
|
"learning_rate": 9.999077303109562e-06, |
|
"loss": 10.4219, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.010462074978204011, |
|
"eval_accuracy": 0.06893219507463037, |
|
"eval_loss": 10.4140625, |
|
"eval_runtime": 267.8804, |
|
"eval_samples_per_second": 126.053, |
|
"eval_steps_per_second": 2.628, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.01053472827666376, |
|
"grad_norm": 0.8991184830665588, |
|
"learning_rate": 9.999070037779716e-06, |
|
"loss": 10.4219, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.01053472827666376, |
|
"eval_accuracy": 0.0691721234597129, |
|
"eval_loss": 10.4140625, |
|
"eval_runtime": 268.0954, |
|
"eval_samples_per_second": 125.951, |
|
"eval_steps_per_second": 2.626, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.01060738157512351, |
|
"grad_norm": 1.0204856395721436, |
|
"learning_rate": 9.99906277244987e-06, |
|
"loss": 10.4141, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.01060738157512351, |
|
"eval_accuracy": 0.06934083757991812, |
|
"eval_loss": 10.4140625, |
|
"eval_runtime": 267.5128, |
|
"eval_samples_per_second": 126.226, |
|
"eval_steps_per_second": 2.632, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.010680034873583261, |
|
"grad_norm": 0.9581719040870667, |
|
"learning_rate": 9.999055507120024e-06, |
|
"loss": 10.4062, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.010680034873583261, |
|
"eval_accuracy": 0.06947478408544137, |
|
"eval_loss": 10.4140625, |
|
"eval_runtime": 267.7655, |
|
"eval_samples_per_second": 126.107, |
|
"eval_steps_per_second": 2.629, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.010752688172043012, |
|
"grad_norm": 0.9252108931541443, |
|
"learning_rate": 9.999048241790178e-06, |
|
"loss": 10.4141, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.010752688172043012, |
|
"eval_accuracy": 0.06958163443164546, |
|
"eval_loss": 10.40625, |
|
"eval_runtime": 266.9258, |
|
"eval_samples_per_second": 126.503, |
|
"eval_steps_per_second": 2.637, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.01082534147050276, |
|
"grad_norm": 0.8792810440063477, |
|
"learning_rate": 9.999040976460332e-06, |
|
"loss": 10.4141, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.01082534147050276, |
|
"eval_accuracy": 0.0696740971804333, |
|
"eval_loss": 10.40625, |
|
"eval_runtime": 267.4905, |
|
"eval_samples_per_second": 126.236, |
|
"eval_steps_per_second": 2.632, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.010897994768962511, |
|
"grad_norm": 0.8908605575561523, |
|
"learning_rate": 9.999033711130486e-06, |
|
"loss": 10.4219, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.010897994768962511, |
|
"eval_accuracy": 0.06974991431852827, |
|
"eval_loss": 10.40625, |
|
"eval_runtime": 268.0407, |
|
"eval_samples_per_second": 125.977, |
|
"eval_steps_per_second": 2.626, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.010970648067422262, |
|
"grad_norm": 0.9047368764877319, |
|
"learning_rate": 9.99902644580064e-06, |
|
"loss": 10.4062, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.010970648067422262, |
|
"eval_accuracy": 0.06984810894717207, |
|
"eval_loss": 10.40625, |
|
"eval_runtime": 268.9337, |
|
"eval_samples_per_second": 125.559, |
|
"eval_steps_per_second": 2.618, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.01104330136588201, |
|
"grad_norm": 0.9523606300354004, |
|
"learning_rate": 9.999019180470794e-06, |
|
"loss": 10.4141, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.01104330136588201, |
|
"eval_accuracy": 0.06996532299533799, |
|
"eval_loss": 10.40625, |
|
"eval_runtime": 268.7819, |
|
"eval_samples_per_second": 125.63, |
|
"eval_steps_per_second": 2.619, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.011115954664341761, |
|
"grad_norm": 0.9536779522895813, |
|
"learning_rate": 9.999011915140948e-06, |
|
"loss": 10.4141, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.011115954664341761, |
|
"eval_accuracy": 0.0701369030554712, |
|
"eval_loss": 10.3984375, |
|
"eval_runtime": 267.3646, |
|
"eval_samples_per_second": 126.296, |
|
"eval_steps_per_second": 2.633, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.011188607962801512, |
|
"grad_norm": 0.8978484272956848, |
|
"learning_rate": 9.999004649811103e-06, |
|
"loss": 10.4219, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.011188607962801512, |
|
"eval_accuracy": 0.07024372445278713, |
|
"eval_loss": 10.3984375, |
|
"eval_runtime": 267.0372, |
|
"eval_samples_per_second": 126.451, |
|
"eval_steps_per_second": 2.636, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.01126126126126126, |
|
"grad_norm": 0.9067463874816895, |
|
"learning_rate": 9.998997384481255e-06, |
|
"loss": 10.4141, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.01126126126126126, |
|
"eval_accuracy": 0.07035488818332729, |
|
"eval_loss": 10.3984375, |
|
"eval_runtime": 266.8902, |
|
"eval_samples_per_second": 126.52, |
|
"eval_steps_per_second": 2.638, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.011333914559721011, |
|
"grad_norm": 0.8969941735267639, |
|
"learning_rate": 9.998990119151411e-06, |
|
"loss": 10.4062, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.011333914559721011, |
|
"eval_accuracy": 0.07053625296765909, |
|
"eval_loss": 10.3984375, |
|
"eval_runtime": 266.2466, |
|
"eval_samples_per_second": 126.826, |
|
"eval_steps_per_second": 2.644, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.011406567858180762, |
|
"grad_norm": 0.9160457253456116, |
|
"learning_rate": 9.998982853821563e-06, |
|
"loss": 10.4062, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.011406567858180762, |
|
"eval_accuracy": 0.07071909414528711, |
|
"eval_loss": 10.390625, |
|
"eval_runtime": 266.1843, |
|
"eval_samples_per_second": 126.856, |
|
"eval_steps_per_second": 2.645, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.01147922115664051, |
|
"grad_norm": 0.9947687387466431, |
|
"learning_rate": 9.998975588491719e-06, |
|
"loss": 10.3906, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.01147922115664051, |
|
"eval_accuracy": 0.07083940572448631, |
|
"eval_loss": 10.390625, |
|
"eval_runtime": 266.0309, |
|
"eval_samples_per_second": 126.929, |
|
"eval_steps_per_second": 2.646, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.011551874455100261, |
|
"grad_norm": 0.9923911690711975, |
|
"learning_rate": 9.998968323161873e-06, |
|
"loss": 10.3906, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.011551874455100261, |
|
"eval_accuracy": 0.07099616395388084, |
|
"eval_loss": 10.390625, |
|
"eval_runtime": 265.8514, |
|
"eval_samples_per_second": 127.015, |
|
"eval_steps_per_second": 2.648, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.011624527753560012, |
|
"grad_norm": 0.8818120360374451, |
|
"learning_rate": 9.998961057832027e-06, |
|
"loss": 10.3984, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.011624527753560012, |
|
"eval_accuracy": 0.07109409804253118, |
|
"eval_loss": 10.390625, |
|
"eval_runtime": 265.1784, |
|
"eval_samples_per_second": 127.337, |
|
"eval_steps_per_second": 2.655, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.011697181052019761, |
|
"grad_norm": 0.8733471035957336, |
|
"learning_rate": 9.99895379250218e-06, |
|
"loss": 10.3984, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.011697181052019761, |
|
"eval_accuracy": 0.07113795560809585, |
|
"eval_loss": 10.390625, |
|
"eval_runtime": 266.4251, |
|
"eval_samples_per_second": 126.741, |
|
"eval_steps_per_second": 2.642, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.011769834350479512, |
|
"grad_norm": 0.8769287467002869, |
|
"learning_rate": 9.998946527172335e-06, |
|
"loss": 10.3906, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.011769834350479512, |
|
"eval_accuracy": 0.07117098628948813, |
|
"eval_loss": 10.3828125, |
|
"eval_runtime": 266.1962, |
|
"eval_samples_per_second": 126.85, |
|
"eval_steps_per_second": 2.645, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.011842487648939262, |
|
"grad_norm": 0.8957408666610718, |
|
"learning_rate": 9.998939261842489e-06, |
|
"loss": 10.3906, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.011842487648939262, |
|
"eval_accuracy": 0.07124170842326667, |
|
"eval_loss": 10.3828125, |
|
"eval_runtime": 266.0683, |
|
"eval_samples_per_second": 126.911, |
|
"eval_steps_per_second": 2.646, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.011915140947399011, |
|
"grad_norm": 0.9207865595817566, |
|
"learning_rate": 9.998931996512643e-06, |
|
"loss": 10.3906, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.011915140947399011, |
|
"eval_accuracy": 0.07135113522051714, |
|
"eval_loss": 10.3828125, |
|
"eval_runtime": 266.3681, |
|
"eval_samples_per_second": 126.768, |
|
"eval_steps_per_second": 2.643, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.011987794245858762, |
|
"grad_norm": 1.0381028652191162, |
|
"learning_rate": 9.998924731182797e-06, |
|
"loss": 10.3828, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.011987794245858762, |
|
"eval_accuracy": 0.0714838079749613, |
|
"eval_loss": 10.3828125, |
|
"eval_runtime": 266.8907, |
|
"eval_samples_per_second": 126.52, |
|
"eval_steps_per_second": 2.638, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.012060447544318512, |
|
"grad_norm": 1.0229851007461548, |
|
"learning_rate": 9.99891746585295e-06, |
|
"loss": 10.375, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.012060447544318512, |
|
"eval_accuracy": 0.07162267579147201, |
|
"eval_loss": 10.375, |
|
"eval_runtime": 268.0765, |
|
"eval_samples_per_second": 125.96, |
|
"eval_steps_per_second": 2.626, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.012133100842778263, |
|
"grad_norm": 0.8751774430274963, |
|
"learning_rate": 9.998910200523105e-06, |
|
"loss": 10.3828, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.012133100842778263, |
|
"eval_accuracy": 0.07171681757577321, |
|
"eval_loss": 10.375, |
|
"eval_runtime": 266.5398, |
|
"eval_samples_per_second": 126.687, |
|
"eval_steps_per_second": 2.641, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.012205754141238012, |
|
"grad_norm": 0.9067946672439575, |
|
"learning_rate": 9.998902935193258e-06, |
|
"loss": 10.3828, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.012205754141238012, |
|
"eval_accuracy": 0.07183573960834065, |
|
"eval_loss": 10.375, |
|
"eval_runtime": 269.0409, |
|
"eval_samples_per_second": 125.509, |
|
"eval_steps_per_second": 2.617, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.012278407439697762, |
|
"grad_norm": 0.8819664120674133, |
|
"learning_rate": 9.998895669863412e-06, |
|
"loss": 10.3828, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.012278407439697762, |
|
"eval_accuracy": 0.07194832183440072, |
|
"eval_loss": 10.375, |
|
"eval_runtime": 269.4359, |
|
"eval_samples_per_second": 125.325, |
|
"eval_steps_per_second": 2.613, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.012351060738157513, |
|
"grad_norm": 0.8808407783508301, |
|
"learning_rate": 9.998888404533566e-06, |
|
"loss": 10.3828, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.012351060738157513, |
|
"eval_accuracy": 0.0720716440979687, |
|
"eval_loss": 10.375, |
|
"eval_runtime": 269.1418, |
|
"eval_samples_per_second": 125.462, |
|
"eval_steps_per_second": 2.616, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.012423714036617262, |
|
"grad_norm": 0.9482495188713074, |
|
"learning_rate": 9.99888113920372e-06, |
|
"loss": 10.3672, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.012423714036617262, |
|
"eval_accuracy": 0.07210840918593382, |
|
"eval_loss": 10.3671875, |
|
"eval_runtime": 268.1791, |
|
"eval_samples_per_second": 125.912, |
|
"eval_steps_per_second": 2.625, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.012496367335077013, |
|
"grad_norm": 0.8792570233345032, |
|
"learning_rate": 9.998873873873874e-06, |
|
"loss": 10.375, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.012496367335077013, |
|
"eval_accuracy": 0.07209656909067576, |
|
"eval_loss": 10.3671875, |
|
"eval_runtime": 267.4096, |
|
"eval_samples_per_second": 126.274, |
|
"eval_steps_per_second": 2.633, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.012569020633536763, |
|
"grad_norm": 1.0035219192504883, |
|
"learning_rate": 9.998866608544028e-06, |
|
"loss": 10.3594, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.012569020633536763, |
|
"eval_accuracy": 0.07205164041624912, |
|
"eval_loss": 10.3671875, |
|
"eval_runtime": 267.2327, |
|
"eval_samples_per_second": 126.358, |
|
"eval_steps_per_second": 2.634, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.012641673931996512, |
|
"grad_norm": 0.9102580547332764, |
|
"learning_rate": 9.998859343214182e-06, |
|
"loss": 10.375, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.012641673931996512, |
|
"eval_accuracy": 0.07202917607903579, |
|
"eval_loss": 10.3671875, |
|
"eval_runtime": 267.9463, |
|
"eval_samples_per_second": 126.022, |
|
"eval_steps_per_second": 2.627, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.012714327230456263, |
|
"grad_norm": 0.9545760154724121, |
|
"learning_rate": 9.998852077884336e-06, |
|
"loss": 10.3594, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.012714327230456263, |
|
"eval_accuracy": 0.0720582697116381, |
|
"eval_loss": 10.359375, |
|
"eval_runtime": 266.8569, |
|
"eval_samples_per_second": 126.536, |
|
"eval_steps_per_second": 2.638, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.012786980528916013, |
|
"grad_norm": 0.9329653978347778, |
|
"learning_rate": 9.998844812554492e-06, |
|
"loss": 10.3672, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.012786980528916013, |
|
"eval_accuracy": 0.07217727859087003, |
|
"eval_loss": 10.359375, |
|
"eval_runtime": 265.4766, |
|
"eval_samples_per_second": 127.194, |
|
"eval_steps_per_second": 2.652, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.012859633827375762, |
|
"grad_norm": 0.903916597366333, |
|
"learning_rate": 9.998837547224644e-06, |
|
"loss": 10.375, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.012859633827375762, |
|
"eval_accuracy": 0.07231886760286792, |
|
"eval_loss": 10.359375, |
|
"eval_runtime": 266.8553, |
|
"eval_samples_per_second": 126.537, |
|
"eval_steps_per_second": 2.638, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.012932287125835513, |
|
"grad_norm": 0.9190238118171692, |
|
"learning_rate": 9.9988302818948e-06, |
|
"loss": 10.3672, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.012932287125835513, |
|
"eval_accuracy": 0.07257023079877423, |
|
"eval_loss": 10.359375, |
|
"eval_runtime": 266.7383, |
|
"eval_samples_per_second": 126.592, |
|
"eval_steps_per_second": 2.639, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.013004940424295263, |
|
"grad_norm": 0.8731828927993774, |
|
"learning_rate": 9.998823016564952e-06, |
|
"loss": 10.3672, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.013004940424295263, |
|
"eval_accuracy": 0.0727295075814388, |
|
"eval_loss": 10.359375, |
|
"eval_runtime": 266.7924, |
|
"eval_samples_per_second": 126.567, |
|
"eval_steps_per_second": 2.639, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.013077593722755012, |
|
"grad_norm": 0.8964665532112122, |
|
"learning_rate": 9.998815751235107e-06, |
|
"loss": 10.3594, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.013077593722755012, |
|
"eval_accuracy": 0.0728463452940586, |
|
"eval_loss": 10.3515625, |
|
"eval_runtime": 264.1125, |
|
"eval_samples_per_second": 127.851, |
|
"eval_steps_per_second": 2.666, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.013150247021214763, |
|
"grad_norm": 0.8609874844551086, |
|
"learning_rate": 9.998808485905261e-06, |
|
"loss": 10.3672, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.013150247021214763, |
|
"eval_accuracy": 0.07293759218954365, |
|
"eval_loss": 10.3515625, |
|
"eval_runtime": 264.4015, |
|
"eval_samples_per_second": 127.711, |
|
"eval_steps_per_second": 2.663, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.013222900319674514, |
|
"grad_norm": 0.9078623056411743, |
|
"learning_rate": 9.998801220575415e-06, |
|
"loss": 10.3594, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.013222900319674514, |
|
"eval_accuracy": 0.0730243230584755, |
|
"eval_loss": 10.3515625, |
|
"eval_runtime": 265.1597, |
|
"eval_samples_per_second": 127.346, |
|
"eval_steps_per_second": 2.655, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.013295553618134264, |
|
"grad_norm": 0.9211888909339905, |
|
"learning_rate": 9.99879395524557e-06, |
|
"loss": 10.3516, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.013295553618134264, |
|
"eval_accuracy": 0.07311939120719788, |
|
"eval_loss": 10.3515625, |
|
"eval_runtime": 265.0761, |
|
"eval_samples_per_second": 127.386, |
|
"eval_steps_per_second": 2.656, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.013368206916594013, |
|
"grad_norm": 0.9223811030387878, |
|
"learning_rate": 9.998786689915723e-06, |
|
"loss": 10.3594, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.013368206916594013, |
|
"eval_accuracy": 0.07324948751059565, |
|
"eval_loss": 10.3515625, |
|
"eval_runtime": 267.2168, |
|
"eval_samples_per_second": 126.366, |
|
"eval_steps_per_second": 2.635, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.013440860215053764, |
|
"grad_norm": 0.8940264582633972, |
|
"learning_rate": 9.998779424585877e-06, |
|
"loss": 10.3516, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.013440860215053764, |
|
"eval_accuracy": 0.07333268661517181, |
|
"eval_loss": 10.34375, |
|
"eval_runtime": 266.2508, |
|
"eval_samples_per_second": 126.824, |
|
"eval_steps_per_second": 2.644, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.013513513513513514, |
|
"grad_norm": 0.8663957118988037, |
|
"learning_rate": 9.998772159256031e-06, |
|
"loss": 10.3516, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.013513513513513514, |
|
"eval_accuracy": 0.07334869535032512, |
|
"eval_loss": 10.34375, |
|
"eval_runtime": 268.3928, |
|
"eval_samples_per_second": 125.812, |
|
"eval_steps_per_second": 2.623, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.013586166811973263, |
|
"grad_norm": 0.899961531162262, |
|
"learning_rate": 9.998764893926185e-06, |
|
"loss": 10.3438, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.013586166811973263, |
|
"eval_accuracy": 0.07337804952292087, |
|
"eval_loss": 10.34375, |
|
"eval_runtime": 268.2225, |
|
"eval_samples_per_second": 125.892, |
|
"eval_steps_per_second": 2.625, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.013658820110433014, |
|
"grad_norm": 0.8721891045570374, |
|
"learning_rate": 9.998757628596339e-06, |
|
"loss": 10.3516, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.013658820110433014, |
|
"eval_accuracy": 0.07342506251729515, |
|
"eval_loss": 10.34375, |
|
"eval_runtime": 268.3043, |
|
"eval_samples_per_second": 125.853, |
|
"eval_steps_per_second": 2.624, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.013731473408892764, |
|
"grad_norm": 0.9019783735275269, |
|
"learning_rate": 9.998750363266493e-06, |
|
"loss": 10.3516, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.013731473408892764, |
|
"eval_accuracy": 0.07348243921363125, |
|
"eval_loss": 10.3359375, |
|
"eval_runtime": 267.616, |
|
"eval_samples_per_second": 126.177, |
|
"eval_steps_per_second": 2.631, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.013804126707352513, |
|
"grad_norm": 0.9109626412391663, |
|
"learning_rate": 9.998743097936647e-06, |
|
"loss": 10.3438, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.013804126707352513, |
|
"eval_accuracy": 0.07349436615555378, |
|
"eval_loss": 10.3359375, |
|
"eval_runtime": 268.802, |
|
"eval_samples_per_second": 125.62, |
|
"eval_steps_per_second": 2.619, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.013876780005812264, |
|
"grad_norm": 0.8707013130187988, |
|
"learning_rate": 9.998735832606801e-06, |
|
"loss": 10.3516, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.013876780005812264, |
|
"eval_accuracy": 0.07354867426974475, |
|
"eval_loss": 10.3359375, |
|
"eval_runtime": 269.0333, |
|
"eval_samples_per_second": 125.512, |
|
"eval_steps_per_second": 2.617, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.013949433304272014, |
|
"grad_norm": 0.9611899256706238, |
|
"learning_rate": 9.998728567276955e-06, |
|
"loss": 10.3359, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.013949433304272014, |
|
"eval_accuracy": 0.07366001169361389, |
|
"eval_loss": 10.3359375, |
|
"eval_runtime": 266.4167, |
|
"eval_samples_per_second": 126.745, |
|
"eval_steps_per_second": 2.642, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.014022086602731763, |
|
"grad_norm": 0.8997408151626587, |
|
"learning_rate": 9.99872130194711e-06, |
|
"loss": 10.3359, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.014022086602731763, |
|
"eval_accuracy": 0.07367448613769463, |
|
"eval_loss": 10.3359375, |
|
"eval_runtime": 267.3071, |
|
"eval_samples_per_second": 126.323, |
|
"eval_steps_per_second": 2.634, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.014094739901191514, |
|
"grad_norm": 0.8796170949935913, |
|
"learning_rate": 9.998714036617263e-06, |
|
"loss": 10.3359, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.014094739901191514, |
|
"eval_accuracy": 0.07364093437631546, |
|
"eval_loss": 10.328125, |
|
"eval_runtime": 267.1483, |
|
"eval_samples_per_second": 126.398, |
|
"eval_steps_per_second": 2.635, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.014167393199651265, |
|
"grad_norm": 0.9038819670677185, |
|
"learning_rate": 9.998706771287417e-06, |
|
"loss": 10.3359, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.014167393199651265, |
|
"eval_accuracy": 0.0735885658376313, |
|
"eval_loss": 10.328125, |
|
"eval_runtime": 267.3865, |
|
"eval_samples_per_second": 126.285, |
|
"eval_steps_per_second": 2.633, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.014240046498111014, |
|
"grad_norm": 0.9091231822967529, |
|
"learning_rate": 9.99869950595757e-06, |
|
"loss": 10.3359, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.014240046498111014, |
|
"eval_accuracy": 0.0736078168482587, |
|
"eval_loss": 10.328125, |
|
"eval_runtime": 267.9665, |
|
"eval_samples_per_second": 126.012, |
|
"eval_steps_per_second": 2.627, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.014312699796570764, |
|
"grad_norm": 0.9065552949905396, |
|
"learning_rate": 9.998692240627725e-06, |
|
"loss": 10.3281, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.014312699796570764, |
|
"eval_accuracy": 0.07369920848818455, |
|
"eval_loss": 10.328125, |
|
"eval_runtime": 267.2862, |
|
"eval_samples_per_second": 126.333, |
|
"eval_steps_per_second": 2.634, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.014385353095030515, |
|
"grad_norm": 0.8987371921539307, |
|
"learning_rate": 9.99868497529788e-06, |
|
"loss": 10.3359, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.014385353095030515, |
|
"eval_accuracy": 0.07384696361336085, |
|
"eval_loss": 10.328125, |
|
"eval_runtime": 267.5117, |
|
"eval_samples_per_second": 126.226, |
|
"eval_steps_per_second": 2.632, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.014458006393490264, |
|
"grad_norm": 0.891822338104248, |
|
"learning_rate": 9.998677709968032e-06, |
|
"loss": 10.3203, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.014458006393490264, |
|
"eval_accuracy": 0.07396643567480336, |
|
"eval_loss": 10.3203125, |
|
"eval_runtime": 265.9448, |
|
"eval_samples_per_second": 126.97, |
|
"eval_steps_per_second": 2.647, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.014530659691950014, |
|
"grad_norm": 0.8724116683006287, |
|
"learning_rate": 9.998670444638188e-06, |
|
"loss": 10.3359, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.014530659691950014, |
|
"eval_accuracy": 0.07408226017633752, |
|
"eval_loss": 10.3203125, |
|
"eval_runtime": 266.1838, |
|
"eval_samples_per_second": 126.856, |
|
"eval_steps_per_second": 2.645, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.014603312990409765, |
|
"grad_norm": 0.8940464854240417, |
|
"learning_rate": 9.99866317930834e-06, |
|
"loss": 10.3359, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.014603312990409765, |
|
"eval_accuracy": 0.07420095061779967, |
|
"eval_loss": 10.3203125, |
|
"eval_runtime": 264.9987, |
|
"eval_samples_per_second": 127.423, |
|
"eval_steps_per_second": 2.657, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.014675966288869515, |
|
"grad_norm": 0.9207845330238342, |
|
"learning_rate": 9.998655913978496e-06, |
|
"loss": 10.3281, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.014675966288869515, |
|
"eval_accuracy": 0.0742804732135793, |
|
"eval_loss": 10.3203125, |
|
"eval_runtime": 264.7467, |
|
"eval_samples_per_second": 127.545, |
|
"eval_steps_per_second": 2.659, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.014748619587329264, |
|
"grad_norm": 0.8840688467025757, |
|
"learning_rate": 9.998648648648648e-06, |
|
"loss": 10.3203, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.014748619587329264, |
|
"eval_accuracy": 0.07432360705693994, |
|
"eval_loss": 10.3125, |
|
"eval_runtime": 264.0804, |
|
"eval_samples_per_second": 127.866, |
|
"eval_steps_per_second": 2.666, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.014821272885789015, |
|
"grad_norm": 0.8885826468467712, |
|
"learning_rate": 9.998641383318804e-06, |
|
"loss": 10.3203, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.014821272885789015, |
|
"eval_accuracy": 0.07433359442335566, |
|
"eval_loss": 10.3125, |
|
"eval_runtime": 266.5522, |
|
"eval_samples_per_second": 126.681, |
|
"eval_steps_per_second": 2.641, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.014893926184248766, |
|
"grad_norm": 0.897081732749939, |
|
"learning_rate": 9.998634117988958e-06, |
|
"loss": 10.3281, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.014893926184248766, |
|
"eval_accuracy": 0.07427410445818378, |
|
"eval_loss": 10.3125, |
|
"eval_runtime": 267.4051, |
|
"eval_samples_per_second": 126.277, |
|
"eval_steps_per_second": 2.633, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.014966579482708514, |
|
"grad_norm": 0.9606081247329712, |
|
"learning_rate": 9.998626852659112e-06, |
|
"loss": 10.3125, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.014966579482708514, |
|
"eval_accuracy": 0.07412779677741556, |
|
"eval_loss": 10.3125, |
|
"eval_runtime": 265.1816, |
|
"eval_samples_per_second": 127.335, |
|
"eval_steps_per_second": 2.655, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.015039232781168265, |
|
"grad_norm": 0.9314731955528259, |
|
"learning_rate": 9.998619587329266e-06, |
|
"loss": 10.3125, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.015039232781168265, |
|
"eval_accuracy": 0.07401284074252625, |
|
"eval_loss": 10.3125, |
|
"eval_runtime": 266.8876, |
|
"eval_samples_per_second": 126.521, |
|
"eval_steps_per_second": 2.638, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.015111886079628016, |
|
"grad_norm": 0.9583424925804138, |
|
"learning_rate": 9.99861232199942e-06, |
|
"loss": 10.3047, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.015111886079628016, |
|
"eval_accuracy": 0.07404071852182577, |
|
"eval_loss": 10.3046875, |
|
"eval_runtime": 266.703, |
|
"eval_samples_per_second": 126.609, |
|
"eval_steps_per_second": 2.64, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.015184539378087765, |
|
"grad_norm": 0.9071934223175049, |
|
"learning_rate": 9.998605056669574e-06, |
|
"loss": 10.3125, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.015184539378087765, |
|
"eval_accuracy": 0.07412215174422407, |
|
"eval_loss": 10.3046875, |
|
"eval_runtime": 266.5993, |
|
"eval_samples_per_second": 126.658, |
|
"eval_steps_per_second": 2.641, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.015257192676547515, |
|
"grad_norm": 0.8879753351211548, |
|
"learning_rate": 9.998597791339728e-06, |
|
"loss": 10.3125, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.015257192676547515, |
|
"eval_accuracy": 0.07420225331776693, |
|
"eval_loss": 10.3046875, |
|
"eval_runtime": 267.0745, |
|
"eval_samples_per_second": 126.433, |
|
"eval_steps_per_second": 2.636, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.015329845975007266, |
|
"grad_norm": 0.9064663052558899, |
|
"learning_rate": 9.998590526009882e-06, |
|
"loss": 10.3203, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.015329845975007266, |
|
"eval_accuracy": 0.07434856099853515, |
|
"eval_loss": 10.3046875, |
|
"eval_runtime": 267.2364, |
|
"eval_samples_per_second": 126.356, |
|
"eval_steps_per_second": 2.634, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.015402499273467015, |
|
"grad_norm": 0.888227105140686, |
|
"learning_rate": 9.998583260680035e-06, |
|
"loss": 10.3047, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.015402499273467015, |
|
"eval_accuracy": 0.07441022213031916, |
|
"eval_loss": 10.3046875, |
|
"eval_runtime": 267.6856, |
|
"eval_samples_per_second": 126.144, |
|
"eval_steps_per_second": 2.63, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.015475152571926765, |
|
"grad_norm": 0.873029887676239, |
|
"learning_rate": 9.99857599535019e-06, |
|
"loss": 10.3203, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.015475152571926765, |
|
"eval_accuracy": 0.07452069108754343, |
|
"eval_loss": 10.296875, |
|
"eval_runtime": 267.3911, |
|
"eval_samples_per_second": 126.283, |
|
"eval_steps_per_second": 2.633, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.015547805870386516, |
|
"grad_norm": 0.9147621989250183, |
|
"learning_rate": 9.998568730020343e-06, |
|
"loss": 10.3125, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.015547805870386516, |
|
"eval_accuracy": 0.07470987207167884, |
|
"eval_loss": 10.296875, |
|
"eval_runtime": 267.6606, |
|
"eval_samples_per_second": 126.156, |
|
"eval_steps_per_second": 2.63, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.015620459168846265, |
|
"grad_norm": 0.9260271787643433, |
|
"learning_rate": 9.998561464690499e-06, |
|
"loss": 10.3047, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.015620459168846265, |
|
"eval_accuracy": 0.07488478125395062, |
|
"eval_loss": 10.296875, |
|
"eval_runtime": 266.1126, |
|
"eval_samples_per_second": 126.89, |
|
"eval_steps_per_second": 2.645, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.015693112467306015, |
|
"grad_norm": 0.9096031785011292, |
|
"learning_rate": 9.998554199360651e-06, |
|
"loss": 10.2969, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.015693112467306015, |
|
"eval_accuracy": 0.07498534969142368, |
|
"eval_loss": 10.296875, |
|
"eval_runtime": 266.7055, |
|
"eval_samples_per_second": 126.608, |
|
"eval_steps_per_second": 2.64, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.015765765765765764, |
|
"grad_norm": 0.9063442945480347, |
|
"learning_rate": 9.998546934030807e-06, |
|
"loss": 10.3047, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.015765765765765764, |
|
"eval_accuracy": 0.07498459702033147, |
|
"eval_loss": 10.296875, |
|
"eval_runtime": 267.4598, |
|
"eval_samples_per_second": 126.251, |
|
"eval_steps_per_second": 2.632, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.015838419064225517, |
|
"grad_norm": 0.9258260130882263, |
|
"learning_rate": 9.998539668700959e-06, |
|
"loss": 10.2969, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.015838419064225517, |
|
"eval_accuracy": 0.0748771966452523, |
|
"eval_loss": 10.2890625, |
|
"eval_runtime": 267.3776, |
|
"eval_samples_per_second": 126.29, |
|
"eval_steps_per_second": 2.633, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.015911072362685266, |
|
"grad_norm": 0.9510604739189148, |
|
"learning_rate": 9.998532403371115e-06, |
|
"loss": 10.2891, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.015911072362685266, |
|
"eval_accuracy": 0.07466393018616653, |
|
"eval_loss": 10.2890625, |
|
"eval_runtime": 266.9849, |
|
"eval_samples_per_second": 126.475, |
|
"eval_steps_per_second": 2.637, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.015983725661145014, |
|
"grad_norm": 0.888974130153656, |
|
"learning_rate": 9.998525138041267e-06, |
|
"loss": 10.2969, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.015983725661145014, |
|
"eval_accuracy": 0.07440356388604201, |
|
"eval_loss": 10.2890625, |
|
"eval_runtime": 265.0185, |
|
"eval_samples_per_second": 127.414, |
|
"eval_steps_per_second": 2.656, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.016056378959604767, |
|
"grad_norm": 0.9004181623458862, |
|
"learning_rate": 9.998517872711423e-06, |
|
"loss": 10.2969, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.016056378959604767, |
|
"eval_accuracy": 0.07420781150429394, |
|
"eval_loss": 10.2890625, |
|
"eval_runtime": 265.4243, |
|
"eval_samples_per_second": 127.219, |
|
"eval_steps_per_second": 2.652, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.016129032258064516, |
|
"grad_norm": 0.8965704441070557, |
|
"learning_rate": 9.998510607381577e-06, |
|
"loss": 10.2891, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.016129032258064516, |
|
"eval_accuracy": 0.07409245018497036, |
|
"eval_loss": 10.2890625, |
|
"eval_runtime": 265.3546, |
|
"eval_samples_per_second": 127.252, |
|
"eval_steps_per_second": 2.653, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.016201685556524265, |
|
"grad_norm": 0.9500789046287537, |
|
"learning_rate": 9.998503342051729e-06, |
|
"loss": 10.2891, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.016201685556524265, |
|
"eval_accuracy": 0.07416858576083511, |
|
"eval_loss": 10.28125, |
|
"eval_runtime": 265.6881, |
|
"eval_samples_per_second": 127.093, |
|
"eval_steps_per_second": 2.65, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.016274338854984017, |
|
"grad_norm": 0.9081275463104248, |
|
"learning_rate": 9.998496076721884e-06, |
|
"loss": 10.2891, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.016274338854984017, |
|
"eval_accuracy": 0.07432068321923563, |
|
"eval_loss": 10.28125, |
|
"eval_runtime": 265.8739, |
|
"eval_samples_per_second": 127.004, |
|
"eval_steps_per_second": 2.648, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.016346992153443766, |
|
"grad_norm": 0.9124018549919128, |
|
"learning_rate": 9.998488811392037e-06, |
|
"loss": 10.2891, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.016346992153443766, |
|
"eval_accuracy": 0.07457499920173441, |
|
"eval_loss": 10.28125, |
|
"eval_runtime": 265.6262, |
|
"eval_samples_per_second": 127.122, |
|
"eval_steps_per_second": 2.65, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.016419645451903518, |
|
"grad_norm": 0.8811033368110657, |
|
"learning_rate": 9.998481546062192e-06, |
|
"loss": 10.2969, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.016419645451903518, |
|
"eval_accuracy": 0.07480795090477, |
|
"eval_loss": 10.28125, |
|
"eval_runtime": 265.1697, |
|
"eval_samples_per_second": 127.341, |
|
"eval_steps_per_second": 2.655, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.016492298750363267, |
|
"grad_norm": 0.8931852579116821, |
|
"learning_rate": 9.998474280732346e-06, |
|
"loss": 10.2812, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.016492298750363267, |
|
"eval_accuracy": 0.07488660503390479, |
|
"eval_loss": 10.2734375, |
|
"eval_runtime": 265.5292, |
|
"eval_samples_per_second": 127.169, |
|
"eval_steps_per_second": 2.651, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.016564952048823016, |
|
"grad_norm": 0.9159207344055176, |
|
"learning_rate": 9.9984670154025e-06, |
|
"loss": 10.2891, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.016564952048823016, |
|
"eval_accuracy": 0.07503638658125239, |
|
"eval_loss": 10.2734375, |
|
"eval_runtime": 264.9834, |
|
"eval_samples_per_second": 127.431, |
|
"eval_steps_per_second": 2.657, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.01663760534728277, |
|
"grad_norm": 0.9291688203811646, |
|
"learning_rate": 9.998459750072654e-06, |
|
"loss": 10.2734, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.01663760534728277, |
|
"eval_accuracy": 0.0751111905082617, |
|
"eval_loss": 10.2734375, |
|
"eval_runtime": 265.3402, |
|
"eval_samples_per_second": 127.259, |
|
"eval_steps_per_second": 2.653, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.016710258645742517, |
|
"grad_norm": 0.8605514168739319, |
|
"learning_rate": 9.998452484742808e-06, |
|
"loss": 10.2969, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.016710258645742517, |
|
"eval_accuracy": 0.075049760967583, |
|
"eval_loss": 10.2734375, |
|
"eval_runtime": 266.7587, |
|
"eval_samples_per_second": 126.583, |
|
"eval_steps_per_second": 2.639, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.016782911944202266, |
|
"grad_norm": 0.9553351998329163, |
|
"learning_rate": 9.998445219412962e-06, |
|
"loss": 10.2656, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.016782911944202266, |
|
"eval_accuracy": 0.07486694873884313, |
|
"eval_loss": 10.2734375, |
|
"eval_runtime": 266.6123, |
|
"eval_samples_per_second": 126.652, |
|
"eval_steps_per_second": 2.641, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.01685556524266202, |
|
"grad_norm": 0.9175562262535095, |
|
"learning_rate": 9.998437954083116e-06, |
|
"loss": 10.2734, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.01685556524266202, |
|
"eval_accuracy": 0.07473213376667504, |
|
"eval_loss": 10.265625, |
|
"eval_runtime": 267.4978, |
|
"eval_samples_per_second": 126.233, |
|
"eval_steps_per_second": 2.632, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.016928218541121767, |
|
"grad_norm": 0.9021575450897217, |
|
"learning_rate": 9.99843068875327e-06, |
|
"loss": 10.2734, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.016928218541121767, |
|
"eval_accuracy": 0.0746572719418894, |
|
"eval_loss": 10.265625, |
|
"eval_runtime": 267.5513, |
|
"eval_samples_per_second": 126.208, |
|
"eval_steps_per_second": 2.631, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.017000871839581516, |
|
"grad_norm": 0.8851971626281738, |
|
"learning_rate": 9.998423423423424e-06, |
|
"loss": 10.2734, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.017000871839581516, |
|
"eval_accuracy": 0.07463700772017634, |
|
"eval_loss": 10.265625, |
|
"eval_runtime": 267.13, |
|
"eval_samples_per_second": 126.407, |
|
"eval_steps_per_second": 2.635, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.01707352513804127, |
|
"grad_norm": 0.9394397139549255, |
|
"learning_rate": 9.998416158093578e-06, |
|
"loss": 10.2656, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.01707352513804127, |
|
"eval_accuracy": 0.07467038578822655, |
|
"eval_loss": 10.265625, |
|
"eval_runtime": 265.7106, |
|
"eval_samples_per_second": 127.082, |
|
"eval_steps_per_second": 2.649, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.017146178436501017, |
|
"grad_norm": 0.9121464490890503, |
|
"learning_rate": 9.998408892763732e-06, |
|
"loss": 10.2656, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.017146178436501017, |
|
"eval_accuracy": 0.07478282326984581, |
|
"eval_loss": 10.265625, |
|
"eval_runtime": 267.6727, |
|
"eval_samples_per_second": 126.15, |
|
"eval_steps_per_second": 2.63, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.017218831734960766, |
|
"grad_norm": 0.8910766839981079, |
|
"learning_rate": 9.998401627433887e-06, |
|
"loss": 10.2734, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.017218831734960766, |
|
"eval_accuracy": 0.07491986730640236, |
|
"eval_loss": 10.2578125, |
|
"eval_runtime": 268.4345, |
|
"eval_samples_per_second": 125.792, |
|
"eval_steps_per_second": 2.623, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.01729148503342052, |
|
"grad_norm": 0.9403276443481445, |
|
"learning_rate": 9.99839436210404e-06, |
|
"loss": 10.2656, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.01729148503342052, |
|
"eval_accuracy": 0.07518078363540195, |
|
"eval_loss": 10.2578125, |
|
"eval_runtime": 267.0058, |
|
"eval_samples_per_second": 126.465, |
|
"eval_steps_per_second": 2.637, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.017364138331880268, |
|
"grad_norm": 0.8892084956169128, |
|
"learning_rate": 9.998387096774195e-06, |
|
"loss": 10.2734, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.017364138331880268, |
|
"eval_accuracy": 0.07545047147751449, |
|
"eval_loss": 10.2578125, |
|
"eval_runtime": 265.2789, |
|
"eval_samples_per_second": 127.289, |
|
"eval_steps_per_second": 2.654, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.017436791630340016, |
|
"grad_norm": 0.9249821305274963, |
|
"learning_rate": 9.998379831444348e-06, |
|
"loss": 10.2578, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.017436791630340016, |
|
"eval_accuracy": 0.07559730023826962, |
|
"eval_loss": 10.2578125, |
|
"eval_runtime": 265.5188, |
|
"eval_samples_per_second": 127.174, |
|
"eval_steps_per_second": 2.651, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.01750944492879977, |
|
"grad_norm": 0.8785547614097595, |
|
"learning_rate": 9.998372566114503e-06, |
|
"loss": 10.2734, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.01750944492879977, |
|
"eval_accuracy": 0.07564480536374263, |
|
"eval_loss": 10.2578125, |
|
"eval_runtime": 265.4778, |
|
"eval_samples_per_second": 127.193, |
|
"eval_steps_per_second": 2.652, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.017582098227259518, |
|
"grad_norm": 0.9142479300498962, |
|
"learning_rate": 9.998365300784656e-06, |
|
"loss": 10.2656, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.017582098227259518, |
|
"eval_accuracy": 0.07562578594422054, |
|
"eval_loss": 10.25, |
|
"eval_runtime": 265.3917, |
|
"eval_samples_per_second": 127.235, |
|
"eval_steps_per_second": 2.653, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.017654751525719267, |
|
"grad_norm": 0.924387514591217, |
|
"learning_rate": 9.998358035454811e-06, |
|
"loss": 10.2578, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.017654751525719267, |
|
"eval_accuracy": 0.07555361636603392, |
|
"eval_loss": 10.25, |
|
"eval_runtime": 263.7272, |
|
"eval_samples_per_second": 128.038, |
|
"eval_steps_per_second": 2.669, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.01772740482417902, |
|
"grad_norm": 0.9198188185691833, |
|
"learning_rate": 9.998350770124965e-06, |
|
"loss": 10.2578, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.01772740482417902, |
|
"eval_accuracy": 0.07558369426083371, |
|
"eval_loss": 10.25, |
|
"eval_runtime": 264.6264, |
|
"eval_samples_per_second": 127.603, |
|
"eval_steps_per_second": 2.66, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.017800058122638768, |
|
"grad_norm": 0.9178450703620911, |
|
"learning_rate": 9.998343504795119e-06, |
|
"loss": 10.2578, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.017800058122638768, |
|
"eval_accuracy": 0.07555222681940216, |
|
"eval_loss": 10.25, |
|
"eval_runtime": 264.3962, |
|
"eval_samples_per_second": 127.714, |
|
"eval_steps_per_second": 2.663, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.017872711421098517, |
|
"grad_norm": 0.8939234614372253, |
|
"learning_rate": 9.998336239465273e-06, |
|
"loss": 10.2578, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.017872711421098517, |
|
"eval_accuracy": 0.07556374847689043, |
|
"eval_loss": 10.25, |
|
"eval_runtime": 264.4976, |
|
"eval_samples_per_second": 127.665, |
|
"eval_steps_per_second": 2.662, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.01794536471955827, |
|
"grad_norm": 0.9162428379058838, |
|
"learning_rate": 9.998328974135427e-06, |
|
"loss": 10.2578, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.01794536471955827, |
|
"eval_accuracy": 0.07567158308529202, |
|
"eval_loss": 10.2421875, |
|
"eval_runtime": 264.7261, |
|
"eval_samples_per_second": 127.554, |
|
"eval_steps_per_second": 2.659, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.018018018018018018, |
|
"grad_norm": 0.8900968432426453, |
|
"learning_rate": 9.998321708805581e-06, |
|
"loss": 10.2578, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.018018018018018018, |
|
"eval_accuracy": 0.07578150201364124, |
|
"eval_loss": 10.2421875, |
|
"eval_runtime": 264.8491, |
|
"eval_samples_per_second": 127.495, |
|
"eval_steps_per_second": 2.658, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.018090671316477767, |
|
"grad_norm": 0.9296072721481323, |
|
"learning_rate": 9.998314443475735e-06, |
|
"loss": 10.2422, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.018090671316477767, |
|
"eval_accuracy": 0.07588282312220648, |
|
"eval_loss": 10.2421875, |
|
"eval_runtime": 265.9596, |
|
"eval_samples_per_second": 126.963, |
|
"eval_steps_per_second": 2.647, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.01816332461493752, |
|
"grad_norm": 0.9414094686508179, |
|
"learning_rate": 9.998307178145889e-06, |
|
"loss": 10.2422, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01816332461493752, |
|
"eval_accuracy": 0.07589431583080661, |
|
"eval_loss": 10.2421875, |
|
"eval_runtime": 265.5852, |
|
"eval_samples_per_second": 127.142, |
|
"eval_steps_per_second": 2.651, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.018235977913397268, |
|
"grad_norm": 0.9078280329704285, |
|
"learning_rate": 9.998299912816043e-06, |
|
"loss": 10.2422, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.018235977913397268, |
|
"eval_accuracy": 0.07588244678666038, |
|
"eval_loss": 10.2421875, |
|
"eval_runtime": 266.4837, |
|
"eval_samples_per_second": 126.713, |
|
"eval_steps_per_second": 2.642, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.018308631211857017, |
|
"grad_norm": 0.9042601585388184, |
|
"learning_rate": 9.998292647486197e-06, |
|
"loss": 10.2422, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.018308631211857017, |
|
"eval_accuracy": 0.07590384001501174, |
|
"eval_loss": 10.234375, |
|
"eval_runtime": 265.9021, |
|
"eval_samples_per_second": 126.99, |
|
"eval_steps_per_second": 2.648, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.01838128451031677, |
|
"grad_norm": 0.9029207825660706, |
|
"learning_rate": 9.99828538215635e-06, |
|
"loss": 10.2422, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.01838128451031677, |
|
"eval_accuracy": 0.0758776991690019, |
|
"eval_loss": 10.234375, |
|
"eval_runtime": 264.8706, |
|
"eval_samples_per_second": 127.485, |
|
"eval_steps_per_second": 2.658, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.018453937808776518, |
|
"grad_norm": 0.901042640209198, |
|
"learning_rate": 9.998278116826505e-06, |
|
"loss": 10.2422, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.018453937808776518, |
|
"eval_accuracy": 0.075944397407326, |
|
"eval_loss": 10.234375, |
|
"eval_runtime": 266.8196, |
|
"eval_samples_per_second": 126.554, |
|
"eval_steps_per_second": 2.638, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.018526591107236267, |
|
"grad_norm": 0.921441376209259, |
|
"learning_rate": 9.998270851496658e-06, |
|
"loss": 10.2422, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.018526591107236267, |
|
"eval_accuracy": 0.07608468371935663, |
|
"eval_loss": 10.234375, |
|
"eval_runtime": 266.5157, |
|
"eval_samples_per_second": 126.698, |
|
"eval_steps_per_second": 2.641, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.01859924440569602, |
|
"grad_norm": 0.9235514998435974, |
|
"learning_rate": 9.998263586166812e-06, |
|
"loss": 10.2422, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.01859924440569602, |
|
"eval_accuracy": 0.07609151565696273, |
|
"eval_loss": 10.234375, |
|
"eval_runtime": 265.5975, |
|
"eval_samples_per_second": 127.136, |
|
"eval_steps_per_second": 2.651, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.018671897704155768, |
|
"grad_norm": 0.8886791467666626, |
|
"learning_rate": 9.998256320836966e-06, |
|
"loss": 10.2422, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.018671897704155768, |
|
"eval_accuracy": 0.07604354734927914, |
|
"eval_loss": 10.2265625, |
|
"eval_runtime": 266.183, |
|
"eval_samples_per_second": 126.856, |
|
"eval_steps_per_second": 2.645, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.018744551002615517, |
|
"grad_norm": 0.8807479739189148, |
|
"learning_rate": 9.99824905550712e-06, |
|
"loss": 10.2422, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.018744551002615517, |
|
"eval_accuracy": 0.07599326313054261, |
|
"eval_loss": 10.2265625, |
|
"eval_runtime": 266.5367, |
|
"eval_samples_per_second": 126.688, |
|
"eval_steps_per_second": 2.641, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.01881720430107527, |
|
"grad_norm": 0.902275025844574, |
|
"learning_rate": 9.998241790177274e-06, |
|
"loss": 10.2344, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.01881720430107527, |
|
"eval_accuracy": 0.07592763600108049, |
|
"eval_loss": 10.2265625, |
|
"eval_runtime": 265.7243, |
|
"eval_samples_per_second": 127.075, |
|
"eval_steps_per_second": 2.649, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.01888985759953502, |
|
"grad_norm": 0.8911043405532837, |
|
"learning_rate": 9.998234524847428e-06, |
|
"loss": 10.2344, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.01888985759953502, |
|
"eval_accuracy": 0.07592697017665277, |
|
"eval_loss": 10.2265625, |
|
"eval_runtime": 265.6968, |
|
"eval_samples_per_second": 127.088, |
|
"eval_steps_per_second": 2.65, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.01896251089799477, |
|
"grad_norm": 0.9092383980751038, |
|
"learning_rate": 9.998227259517584e-06, |
|
"loss": 10.2266, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.01896251089799477, |
|
"eval_accuracy": 0.0759951448082731, |
|
"eval_loss": 10.2265625, |
|
"eval_runtime": 265.7199, |
|
"eval_samples_per_second": 127.077, |
|
"eval_steps_per_second": 2.649, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.01903516419645452, |
|
"grad_norm": 0.928420901298523, |
|
"learning_rate": 9.998219994187736e-06, |
|
"loss": 10.2188, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.01903516419645452, |
|
"eval_accuracy": 0.07604878709803636, |
|
"eval_loss": 10.21875, |
|
"eval_runtime": 265.3942, |
|
"eval_samples_per_second": 127.233, |
|
"eval_steps_per_second": 2.653, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.01910781749491427, |
|
"grad_norm": 0.9022119641304016, |
|
"learning_rate": 9.998212728857892e-06, |
|
"loss": 10.2266, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.01910781749491427, |
|
"eval_accuracy": 0.07615966133969491, |
|
"eval_loss": 10.21875, |
|
"eval_runtime": 265.8823, |
|
"eval_samples_per_second": 127.0, |
|
"eval_steps_per_second": 2.648, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.01918047079337402, |
|
"grad_norm": 0.8958231210708618, |
|
"learning_rate": 9.998205463528044e-06, |
|
"loss": 10.2266, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.01918047079337402, |
|
"eval_accuracy": 0.07618090982360545, |
|
"eval_loss": 10.21875, |
|
"eval_runtime": 264.9067, |
|
"eval_samples_per_second": 127.468, |
|
"eval_steps_per_second": 2.658, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.01925312409183377, |
|
"grad_norm": 0.9452428817749023, |
|
"learning_rate": 9.9981981981982e-06, |
|
"loss": 10.2188, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.01925312409183377, |
|
"eval_accuracy": 0.07618519425905335, |
|
"eval_loss": 10.21875, |
|
"eval_runtime": 265.6026, |
|
"eval_samples_per_second": 127.134, |
|
"eval_steps_per_second": 2.651, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.01932577739029352, |
|
"grad_norm": 0.8848786354064941, |
|
"learning_rate": 9.998190932868354e-06, |
|
"loss": 10.2266, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.01932577739029352, |
|
"eval_accuracy": 0.07619544216546252, |
|
"eval_loss": 10.21875, |
|
"eval_runtime": 264.5225, |
|
"eval_samples_per_second": 127.653, |
|
"eval_steps_per_second": 2.661, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.01939843068875327, |
|
"grad_norm": 0.89435875415802, |
|
"learning_rate": 9.998183667538508e-06, |
|
"loss": 10.2188, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.01939843068875327, |
|
"eval_accuracy": 0.07622743068688098, |
|
"eval_loss": 10.2109375, |
|
"eval_runtime": 263.8523, |
|
"eval_samples_per_second": 127.977, |
|
"eval_steps_per_second": 2.668, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.01947108398721302, |
|
"grad_norm": 0.9147275686264038, |
|
"learning_rate": 9.998176402208661e-06, |
|
"loss": 10.2109, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.01947108398721302, |
|
"eval_accuracy": 0.076288020709803, |
|
"eval_loss": 10.2109375, |
|
"eval_runtime": 264.7583, |
|
"eval_samples_per_second": 127.539, |
|
"eval_steps_per_second": 2.659, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.01954373728567277, |
|
"grad_norm": 0.9651392102241516, |
|
"learning_rate": 9.998169136878815e-06, |
|
"loss": 10.2109, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.01954373728567277, |
|
"eval_accuracy": 0.07618924710339596, |
|
"eval_loss": 10.2109375, |
|
"eval_runtime": 263.97, |
|
"eval_samples_per_second": 127.92, |
|
"eval_steps_per_second": 2.667, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.01961639058413252, |
|
"grad_norm": 0.9311045408248901, |
|
"learning_rate": 9.99816187154897e-06, |
|
"loss": 10.2109, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.01961639058413252, |
|
"eval_accuracy": 0.07608592852154757, |
|
"eval_loss": 10.2109375, |
|
"eval_runtime": 263.7922, |
|
"eval_samples_per_second": 128.006, |
|
"eval_steps_per_second": 2.669, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.01968904388259227, |
|
"grad_norm": 0.9008721113204956, |
|
"learning_rate": 9.998154606219123e-06, |
|
"loss": 10.2188, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.01968904388259227, |
|
"eval_accuracy": 0.07606844339309803, |
|
"eval_loss": 10.2109375, |
|
"eval_runtime": 263.8126, |
|
"eval_samples_per_second": 127.996, |
|
"eval_steps_per_second": 2.669, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.01976169718105202, |
|
"grad_norm": 0.9026838541030884, |
|
"learning_rate": 9.998147340889277e-06, |
|
"loss": 10.2109, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.01976169718105202, |
|
"eval_accuracy": 0.07600967715013018, |
|
"eval_loss": 10.203125, |
|
"eval_runtime": 265.1164, |
|
"eval_samples_per_second": 127.367, |
|
"eval_steps_per_second": 2.655, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.01983435047951177, |
|
"grad_norm": 0.9332795143127441, |
|
"learning_rate": 9.998140075559431e-06, |
|
"loss": 10.2188, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.01983435047951177, |
|
"eval_accuracy": 0.07610422421886563, |
|
"eval_loss": 10.203125, |
|
"eval_runtime": 266.0904, |
|
"eval_samples_per_second": 126.9, |
|
"eval_steps_per_second": 2.646, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.01990700377797152, |
|
"grad_norm": 0.8622159361839294, |
|
"learning_rate": 9.998132810229585e-06, |
|
"loss": 10.2266, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.01990700377797152, |
|
"eval_accuracy": 0.07618247306356617, |
|
"eval_loss": 10.203125, |
|
"eval_runtime": 265.3346, |
|
"eval_samples_per_second": 127.262, |
|
"eval_steps_per_second": 2.653, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.01997965707643127, |
|
"grad_norm": 0.8802500367164612, |
|
"learning_rate": 9.998125544899739e-06, |
|
"loss": 10.2188, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.01997965707643127, |
|
"eval_accuracy": 0.07621292729391207, |
|
"eval_loss": 10.203125, |
|
"eval_runtime": 263.5634, |
|
"eval_samples_per_second": 128.117, |
|
"eval_steps_per_second": 2.671, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.02005231037489102, |
|
"grad_norm": 0.8940539956092834, |
|
"learning_rate": 9.998118279569893e-06, |
|
"loss": 10.2109, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.02005231037489102, |
|
"eval_accuracy": 0.07612616747609205, |
|
"eval_loss": 10.1953125, |
|
"eval_runtime": 264.8033, |
|
"eval_samples_per_second": 127.517, |
|
"eval_steps_per_second": 2.659, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.02012496367335077, |
|
"grad_norm": 0.9146431684494019, |
|
"learning_rate": 9.998111014240047e-06, |
|
"loss": 10.2109, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.02012496367335077, |
|
"eval_accuracy": 0.07617022768387385, |
|
"eval_loss": 10.1953125, |
|
"eval_runtime": 264.0677, |
|
"eval_samples_per_second": 127.873, |
|
"eval_steps_per_second": 2.666, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.02019761697181052, |
|
"grad_norm": 0.9410712122917175, |
|
"learning_rate": 9.998103748910201e-06, |
|
"loss": 10.1953, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.02019761697181052, |
|
"eval_accuracy": 0.07624436578645546, |
|
"eval_loss": 10.1953125, |
|
"eval_runtime": 263.8716, |
|
"eval_samples_per_second": 127.968, |
|
"eval_steps_per_second": 2.668, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.02027027027027027, |
|
"grad_norm": 0.8908507227897644, |
|
"learning_rate": 9.998096483580355e-06, |
|
"loss": 10.2031, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.02027027027027027, |
|
"eval_accuracy": 0.07630177143167971, |
|
"eval_loss": 10.1953125, |
|
"eval_runtime": 263.3209, |
|
"eval_samples_per_second": 128.235, |
|
"eval_steps_per_second": 2.674, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.02034292356873002, |
|
"grad_norm": 0.9145093560218811, |
|
"learning_rate": 9.998089218250509e-06, |
|
"loss": 10.2188, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02034292356873002, |
|
"eval_accuracy": 0.07648229669825482, |
|
"eval_loss": 10.1953125, |
|
"eval_runtime": 262.7765, |
|
"eval_samples_per_second": 128.501, |
|
"eval_steps_per_second": 2.679, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02041557686718977, |
|
"grad_norm": 0.9509057402610779, |
|
"learning_rate": 9.998081952920663e-06, |
|
"loss": 10.1953, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.02041557686718977, |
|
"eval_accuracy": 0.07658419678458331, |
|
"eval_loss": 10.1875, |
|
"eval_runtime": 264.7797, |
|
"eval_samples_per_second": 127.529, |
|
"eval_steps_per_second": 2.659, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.02048823016564952, |
|
"grad_norm": 0.9156680107116699, |
|
"learning_rate": 9.998074687590817e-06, |
|
"loss": 10.1953, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.02048823016564952, |
|
"eval_accuracy": 0.07667509629341042, |
|
"eval_loss": 10.1875, |
|
"eval_runtime": 264.7774, |
|
"eval_samples_per_second": 127.53, |
|
"eval_steps_per_second": 2.659, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.02056088346410927, |
|
"grad_norm": 0.9039434194564819, |
|
"learning_rate": 9.998067422260972e-06, |
|
"loss": 10.2031, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.02056088346410927, |
|
"eval_accuracy": 0.07671203507470449, |
|
"eval_loss": 10.1875, |
|
"eval_runtime": 264.8864, |
|
"eval_samples_per_second": 127.477, |
|
"eval_steps_per_second": 2.658, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.02063353676256902, |
|
"grad_norm": 0.9945496320724487, |
|
"learning_rate": 9.998060156931125e-06, |
|
"loss": 10.1797, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.02063353676256902, |
|
"eval_accuracy": 0.0766314992678392, |
|
"eval_loss": 10.1875, |
|
"eval_runtime": 264.5241, |
|
"eval_samples_per_second": 127.652, |
|
"eval_steps_per_second": 2.661, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.02070619006102877, |
|
"grad_norm": 1.0437395572662354, |
|
"learning_rate": 9.99805289160128e-06, |
|
"loss": 10.1953, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.02070619006102877, |
|
"eval_accuracy": 0.07648548107595259, |
|
"eval_loss": 10.1875, |
|
"eval_runtime": 265.3496, |
|
"eval_samples_per_second": 127.255, |
|
"eval_steps_per_second": 2.653, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.02077884335948852, |
|
"grad_norm": 0.9568849802017212, |
|
"learning_rate": 9.998045626271433e-06, |
|
"loss": 10.1953, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.02077884335948852, |
|
"eval_accuracy": 0.07641924601983908, |
|
"eval_loss": 10.1796875, |
|
"eval_runtime": 264.6514, |
|
"eval_samples_per_second": 127.59, |
|
"eval_steps_per_second": 2.66, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.02085149665794827, |
|
"grad_norm": 0.9541803002357483, |
|
"learning_rate": 9.998038360941588e-06, |
|
"loss": 10.1875, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.02085149665794827, |
|
"eval_accuracy": 0.07642651219076761, |
|
"eval_loss": 10.1796875, |
|
"eval_runtime": 264.1269, |
|
"eval_samples_per_second": 127.844, |
|
"eval_steps_per_second": 2.665, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.020924149956408022, |
|
"grad_norm": 0.8972413539886475, |
|
"learning_rate": 9.998031095611742e-06, |
|
"loss": 10.1953, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.020924149956408022, |
|
"eval_accuracy": 0.0764929209402101, |
|
"eval_loss": 10.1796875, |
|
"eval_runtime": 262.3613, |
|
"eval_samples_per_second": 128.704, |
|
"eval_steps_per_second": 2.683, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.02099680325486777, |
|
"grad_norm": 0.9032208323478699, |
|
"learning_rate": 9.998023830281896e-06, |
|
"loss": 10.1875, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.02099680325486777, |
|
"eval_accuracy": 0.07653272566143215, |
|
"eval_loss": 10.1796875, |
|
"eval_runtime": 264.0713, |
|
"eval_samples_per_second": 127.871, |
|
"eval_steps_per_second": 2.666, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.02106945655332752, |
|
"grad_norm": 0.9714264869689941, |
|
"learning_rate": 9.99801656495205e-06, |
|
"loss": 10.1875, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.02106945655332752, |
|
"eval_accuracy": 0.07676037971793419, |
|
"eval_loss": 10.1796875, |
|
"eval_runtime": 265.0307, |
|
"eval_samples_per_second": 127.408, |
|
"eval_steps_per_second": 2.656, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.021142109851787272, |
|
"grad_norm": 0.9713578820228577, |
|
"learning_rate": 9.998009299622204e-06, |
|
"loss": 10.1797, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.021142109851787272, |
|
"eval_accuracy": 0.07695943227293267, |
|
"eval_loss": 10.171875, |
|
"eval_runtime": 264.6088, |
|
"eval_samples_per_second": 127.611, |
|
"eval_steps_per_second": 2.661, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.02121476315024702, |
|
"grad_norm": 0.947812557220459, |
|
"learning_rate": 9.998002034292358e-06, |
|
"loss": 10.1719, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.02121476315024702, |
|
"eval_accuracy": 0.07713083863973691, |
|
"eval_loss": 10.171875, |
|
"eval_runtime": 263.4033, |
|
"eval_samples_per_second": 128.195, |
|
"eval_steps_per_second": 2.673, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.02128741644870677, |
|
"grad_norm": 0.980165421962738, |
|
"learning_rate": 9.997994768962512e-06, |
|
"loss": 10.1719, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.02128741644870677, |
|
"eval_accuracy": 0.07723297031717068, |
|
"eval_loss": 10.171875, |
|
"eval_runtime": 264.0562, |
|
"eval_samples_per_second": 127.878, |
|
"eval_steps_per_second": 2.666, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.021360069747166522, |
|
"grad_norm": 0.9119016528129578, |
|
"learning_rate": 9.997987503632666e-06, |
|
"loss": 10.1797, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.021360069747166522, |
|
"eval_accuracy": 0.07731897746389849, |
|
"eval_loss": 10.171875, |
|
"eval_runtime": 265.096, |
|
"eval_samples_per_second": 127.377, |
|
"eval_steps_per_second": 2.656, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.02143272304562627, |
|
"grad_norm": 0.9215472936630249, |
|
"learning_rate": 9.99798023830282e-06, |
|
"loss": 10.1797, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.02143272304562627, |
|
"eval_accuracy": 0.07734911325647462, |
|
"eval_loss": 10.171875, |
|
"eval_runtime": 264.1241, |
|
"eval_samples_per_second": 127.845, |
|
"eval_steps_per_second": 2.665, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.021505376344086023, |
|
"grad_norm": 0.915708601474762, |
|
"learning_rate": 9.997972972972974e-06, |
|
"loss": 10.1641, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.021505376344086023, |
|
"eval_accuracy": 0.07730580571978501, |
|
"eval_loss": 10.1640625, |
|
"eval_runtime": 262.6233, |
|
"eval_samples_per_second": 128.576, |
|
"eval_steps_per_second": 2.681, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.021578029642545772, |
|
"grad_norm": 0.9310121536254883, |
|
"learning_rate": 9.997965707643128e-06, |
|
"loss": 10.1719, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.021578029642545772, |
|
"eval_accuracy": 0.07730317137096232, |
|
"eval_loss": 10.1640625, |
|
"eval_runtime": 263.411, |
|
"eval_samples_per_second": 128.191, |
|
"eval_steps_per_second": 2.673, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.02165068294100552, |
|
"grad_norm": 0.9275549650192261, |
|
"learning_rate": 9.997958442313282e-06, |
|
"loss": 10.1719, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.02165068294100552, |
|
"eval_accuracy": 0.07726713000520125, |
|
"eval_loss": 10.1640625, |
|
"eval_runtime": 263.956, |
|
"eval_samples_per_second": 127.927, |
|
"eval_steps_per_second": 2.667, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.021723336239465273, |
|
"grad_norm": 0.9178668260574341, |
|
"learning_rate": 9.997951176983435e-06, |
|
"loss": 10.1719, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.021723336239465273, |
|
"eval_accuracy": 0.07729367613564535, |
|
"eval_loss": 10.1640625, |
|
"eval_runtime": 263.6087, |
|
"eval_samples_per_second": 128.095, |
|
"eval_steps_per_second": 2.671, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.021795989537925022, |
|
"grad_norm": 0.9181063175201416, |
|
"learning_rate": 9.997943911653591e-06, |
|
"loss": 10.1719, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.021795989537925022, |
|
"eval_accuracy": 0.0773372442123284, |
|
"eval_loss": 10.1640625, |
|
"eval_runtime": 264.6871, |
|
"eval_samples_per_second": 127.573, |
|
"eval_steps_per_second": 2.66, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.02186864283638477, |
|
"grad_norm": 0.9063278436660767, |
|
"learning_rate": 9.997936646323743e-06, |
|
"loss": 10.1641, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.02186864283638477, |
|
"eval_accuracy": 0.07729164971347403, |
|
"eval_loss": 10.1640625, |
|
"eval_runtime": 263.4469, |
|
"eval_samples_per_second": 128.174, |
|
"eval_steps_per_second": 2.672, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.021941296134844523, |
|
"grad_norm": 0.9040680527687073, |
|
"learning_rate": 9.997929380993899e-06, |
|
"loss": 10.1562, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.021941296134844523, |
|
"eval_accuracy": 0.07716485358332667, |
|
"eval_loss": 10.15625, |
|
"eval_runtime": 264.4717, |
|
"eval_samples_per_second": 127.677, |
|
"eval_steps_per_second": 2.662, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.022013949433304272, |
|
"grad_norm": 0.9027392864227295, |
|
"learning_rate": 9.997922115664051e-06, |
|
"loss": 10.1719, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.022013949433304272, |
|
"eval_accuracy": 0.0771164799912088, |
|
"eval_loss": 10.15625, |
|
"eval_runtime": 263.5132, |
|
"eval_samples_per_second": 128.142, |
|
"eval_steps_per_second": 2.672, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.02208660273176402, |
|
"grad_norm": 0.9688916802406311, |
|
"learning_rate": 9.997914850334205e-06, |
|
"loss": 10.1562, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.02208660273176402, |
|
"eval_accuracy": 0.07716798006324811, |
|
"eval_loss": 10.15625, |
|
"eval_runtime": 265.6792, |
|
"eval_samples_per_second": 127.097, |
|
"eval_steps_per_second": 2.65, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.022159256030223774, |
|
"grad_norm": 0.9013357162475586, |
|
"learning_rate": 9.997907585004361e-06, |
|
"loss": 10.1641, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.022159256030223774, |
|
"eval_accuracy": 0.07729283661788866, |
|
"eval_loss": 10.15625, |
|
"eval_runtime": 263.3689, |
|
"eval_samples_per_second": 128.212, |
|
"eval_steps_per_second": 2.673, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.022231909328683522, |
|
"grad_norm": 0.9209669828414917, |
|
"learning_rate": 9.997900319674513e-06, |
|
"loss": 10.1562, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.022231909328683522, |
|
"eval_accuracy": 0.07727607521164315, |
|
"eval_loss": 10.1484375, |
|
"eval_runtime": 262.7117, |
|
"eval_samples_per_second": 128.533, |
|
"eval_steps_per_second": 2.68, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.02230456262714327, |
|
"grad_norm": 0.9404518604278564, |
|
"learning_rate": 9.997893054344669e-06, |
|
"loss": 10.1641, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.02230456262714327, |
|
"eval_accuracy": 0.07732798056811672, |
|
"eval_loss": 10.1484375, |
|
"eval_runtime": 264.5099, |
|
"eval_samples_per_second": 127.659, |
|
"eval_steps_per_second": 2.662, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.022377215925603024, |
|
"grad_norm": 0.8949778079986572, |
|
"learning_rate": 9.997885789014821e-06, |
|
"loss": 10.1719, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.022377215925603024, |
|
"eval_accuracy": 0.07748395717753088, |
|
"eval_loss": 10.1484375, |
|
"eval_runtime": 265.2866, |
|
"eval_samples_per_second": 127.285, |
|
"eval_steps_per_second": 2.654, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.022449869224062773, |
|
"grad_norm": 0.9001926183700562, |
|
"learning_rate": 9.997878523684977e-06, |
|
"loss": 10.1562, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.022449869224062773, |
|
"eval_accuracy": 0.07751218234348835, |
|
"eval_loss": 10.1484375, |
|
"eval_runtime": 265.0408, |
|
"eval_samples_per_second": 127.403, |
|
"eval_steps_per_second": 2.656, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.02252252252252252, |
|
"grad_norm": 0.9069272875785828, |
|
"learning_rate": 9.997871258355129e-06, |
|
"loss": 10.1719, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.02252252252252252, |
|
"eval_accuracy": 0.07750650836140868, |
|
"eval_loss": 10.1484375, |
|
"eval_runtime": 265.3373, |
|
"eval_samples_per_second": 127.261, |
|
"eval_steps_per_second": 2.653, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.022595175820982274, |
|
"grad_norm": 0.92779940366745, |
|
"learning_rate": 9.997863993025285e-06, |
|
"loss": 10.1562, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.022595175820982274, |
|
"eval_accuracy": 0.07742909903446483, |
|
"eval_loss": 10.140625, |
|
"eval_runtime": 264.244, |
|
"eval_samples_per_second": 127.787, |
|
"eval_steps_per_second": 2.664, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.022667829119442023, |
|
"grad_norm": 0.9007747769355774, |
|
"learning_rate": 9.997856727695438e-06, |
|
"loss": 10.1562, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.022667829119442023, |
|
"eval_accuracy": 0.07736051911841024, |
|
"eval_loss": 10.140625, |
|
"eval_runtime": 265.1787, |
|
"eval_samples_per_second": 127.337, |
|
"eval_steps_per_second": 2.655, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.02274048241790177, |
|
"grad_norm": 0.9027653336524963, |
|
"learning_rate": 9.997849462365592e-06, |
|
"loss": 10.1562, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.02274048241790177, |
|
"eval_accuracy": 0.07730719526641676, |
|
"eval_loss": 10.140625, |
|
"eval_runtime": 262.9549, |
|
"eval_samples_per_second": 128.414, |
|
"eval_steps_per_second": 2.677, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.022813135716361524, |
|
"grad_norm": 0.9862774610519409, |
|
"learning_rate": 9.997842197035746e-06, |
|
"loss": 10.1406, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.022813135716361524, |
|
"eval_accuracy": 0.07735834795179813, |
|
"eval_loss": 10.140625, |
|
"eval_runtime": 263.4521, |
|
"eval_samples_per_second": 128.171, |
|
"eval_steps_per_second": 2.672, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.022885789014821273, |
|
"grad_norm": 0.9319806694984436, |
|
"learning_rate": 9.9978349317059e-06, |
|
"loss": 10.1406, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.022885789014821273, |
|
"eval_accuracy": 0.07738877323325587, |
|
"eval_loss": 10.140625, |
|
"eval_runtime": 262.7183, |
|
"eval_samples_per_second": 128.529, |
|
"eval_steps_per_second": 2.68, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.02295844231328102, |
|
"grad_norm": 0.9190651774406433, |
|
"learning_rate": 9.997827666376054e-06, |
|
"loss": 10.1406, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.02295844231328102, |
|
"eval_accuracy": 0.07740159759071141, |
|
"eval_loss": 10.140625, |
|
"eval_runtime": 264.3693, |
|
"eval_samples_per_second": 127.727, |
|
"eval_steps_per_second": 2.663, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.023031095611740774, |
|
"grad_norm": 0.9385405778884888, |
|
"learning_rate": 9.997820401046208e-06, |
|
"loss": 10.1328, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.023031095611740774, |
|
"eval_accuracy": 0.07745408192494821, |
|
"eval_loss": 10.1328125, |
|
"eval_runtime": 263.8288, |
|
"eval_samples_per_second": 127.988, |
|
"eval_steps_per_second": 2.668, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.023103748910200523, |
|
"grad_norm": 0.9594412446022034, |
|
"learning_rate": 9.997813135716362e-06, |
|
"loss": 10.1484, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.023103748910200523, |
|
"eval_accuracy": 0.07749652099499298, |
|
"eval_loss": 10.1328125, |
|
"eval_runtime": 262.8505, |
|
"eval_samples_per_second": 128.465, |
|
"eval_steps_per_second": 2.678, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.023176402208660272, |
|
"grad_norm": 0.9393614530563354, |
|
"learning_rate": 9.997805870386516e-06, |
|
"loss": 10.1328, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.023176402208660272, |
|
"eval_accuracy": 0.07747440404443759, |
|
"eval_loss": 10.1328125, |
|
"eval_runtime": 263.8311, |
|
"eval_samples_per_second": 127.987, |
|
"eval_steps_per_second": 2.668, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.023249055507120024, |
|
"grad_norm": 0.9211113452911377, |
|
"learning_rate": 9.99779860505667e-06, |
|
"loss": 10.1328, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.023249055507120024, |
|
"eval_accuracy": 0.07746320082471908, |
|
"eval_loss": 10.1328125, |
|
"eval_runtime": 264.0976, |
|
"eval_samples_per_second": 127.858, |
|
"eval_steps_per_second": 2.666, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.023321708805579773, |
|
"grad_norm": 0.9568068385124207, |
|
"learning_rate": 9.997791339726824e-06, |
|
"loss": 10.125, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.023321708805579773, |
|
"eval_accuracy": 0.07749640519944033, |
|
"eval_loss": 10.1328125, |
|
"eval_runtime": 264.3338, |
|
"eval_samples_per_second": 127.744, |
|
"eval_steps_per_second": 2.663, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.023394362104039522, |
|
"grad_norm": 0.9372284412384033, |
|
"learning_rate": 9.99778407439698e-06, |
|
"loss": 10.1406, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.023394362104039522, |
|
"eval_accuracy": 0.07760533986559205, |
|
"eval_loss": 10.125, |
|
"eval_runtime": 264.2267, |
|
"eval_samples_per_second": 127.796, |
|
"eval_steps_per_second": 2.664, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.023467015402499274, |
|
"grad_norm": 0.9022813439369202, |
|
"learning_rate": 9.997776809067132e-06, |
|
"loss": 10.1328, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.023467015402499274, |
|
"eval_accuracy": 0.07770527142752555, |
|
"eval_loss": 10.125, |
|
"eval_runtime": 263.5487, |
|
"eval_samples_per_second": 128.124, |
|
"eval_steps_per_second": 2.671, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.023539668700959023, |
|
"grad_norm": 0.9569028615951538, |
|
"learning_rate": 9.997769543737288e-06, |
|
"loss": 10.125, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.023539668700959023, |
|
"eval_accuracy": 0.07776036116169688, |
|
"eval_loss": 10.125, |
|
"eval_runtime": 264.1074, |
|
"eval_samples_per_second": 127.853, |
|
"eval_steps_per_second": 2.666, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.023612321999418772, |
|
"grad_norm": 0.926621675491333, |
|
"learning_rate": 9.99776227840744e-06, |
|
"loss": 10.125, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.023612321999418772, |
|
"eval_accuracy": 0.07774163123105639, |
|
"eval_loss": 10.125, |
|
"eval_runtime": 264.9077, |
|
"eval_samples_per_second": 127.467, |
|
"eval_steps_per_second": 2.658, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.023684975297878524, |
|
"grad_norm": 0.8989631533622742, |
|
"learning_rate": 9.997755013077595e-06, |
|
"loss": 10.125, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.023684975297878524, |
|
"eval_accuracy": 0.0777147087650662, |
|
"eval_loss": 10.125, |
|
"eval_runtime": 262.9955, |
|
"eval_samples_per_second": 128.394, |
|
"eval_steps_per_second": 2.677, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.023757628596338273, |
|
"grad_norm": 0.918336033821106, |
|
"learning_rate": 9.997747747747748e-06, |
|
"loss": 10.1328, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.023757628596338273, |
|
"eval_accuracy": 0.07771042432961829, |
|
"eval_loss": 10.1171875, |
|
"eval_runtime": 264.3981, |
|
"eval_samples_per_second": 127.713, |
|
"eval_steps_per_second": 2.663, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.023830281894798022, |
|
"grad_norm": 0.9403995275497437, |
|
"learning_rate": 9.997740482417903e-06, |
|
"loss": 10.1172, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.023830281894798022, |
|
"eval_accuracy": 0.07768630990577977, |
|
"eval_loss": 10.1171875, |
|
"eval_runtime": 262.3713, |
|
"eval_samples_per_second": 128.699, |
|
"eval_steps_per_second": 2.683, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.023902935193257775, |
|
"grad_norm": 0.9186561703681946, |
|
"learning_rate": 9.997733217088057e-06, |
|
"loss": 10.1172, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.023902935193257775, |
|
"eval_accuracy": 0.07772889372026533, |
|
"eval_loss": 10.1171875, |
|
"eval_runtime": 264.6641, |
|
"eval_samples_per_second": 127.584, |
|
"eval_steps_per_second": 2.66, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.023975588491717523, |
|
"grad_norm": 0.9268199801445007, |
|
"learning_rate": 9.997725951758211e-06, |
|
"loss": 10.125, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.023975588491717523, |
|
"eval_accuracy": 0.07778919425430574, |
|
"eval_loss": 10.1171875, |
|
"eval_runtime": 264.2577, |
|
"eval_samples_per_second": 127.781, |
|
"eval_steps_per_second": 2.664, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.024048241790177276, |
|
"grad_norm": 0.9123356342315674, |
|
"learning_rate": 9.997718686428365e-06, |
|
"loss": 10.1094, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.024048241790177276, |
|
"eval_accuracy": 0.07782868053775802, |
|
"eval_loss": 10.1171875, |
|
"eval_runtime": 264.6854, |
|
"eval_samples_per_second": 127.574, |
|
"eval_steps_per_second": 2.66, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.024120895088637025, |
|
"grad_norm": 0.9475653767585754, |
|
"learning_rate": 9.997711421098517e-06, |
|
"loss": 10.1094, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.024120895088637025, |
|
"eval_accuracy": 0.07771977482049446, |
|
"eval_loss": 10.109375, |
|
"eval_runtime": 264.176, |
|
"eval_samples_per_second": 127.82, |
|
"eval_steps_per_second": 2.665, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.024193548387096774, |
|
"grad_norm": 0.9262251853942871, |
|
"learning_rate": 9.997704155768673e-06, |
|
"loss": 10.1094, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.024193548387096774, |
|
"eval_accuracy": 0.07764413137572845, |
|
"eval_loss": 10.109375, |
|
"eval_runtime": 266.5418, |
|
"eval_samples_per_second": 126.686, |
|
"eval_steps_per_second": 2.641, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.024266201685556526, |
|
"grad_norm": 0.9046162962913513, |
|
"learning_rate": 9.997696890438827e-06, |
|
"loss": 10.1172, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.024266201685556526, |
|
"eval_accuracy": 0.07753328608295808, |
|
"eval_loss": 10.109375, |
|
"eval_runtime": 264.8485, |
|
"eval_samples_per_second": 127.496, |
|
"eval_steps_per_second": 2.658, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.024338854984016275, |
|
"grad_norm": 0.8864550590515137, |
|
"learning_rate": 9.997689625108981e-06, |
|
"loss": 10.125, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.024338854984016275, |
|
"eval_accuracy": 0.07736963801818111, |
|
"eval_loss": 10.109375, |
|
"eval_runtime": 262.4932, |
|
"eval_samples_per_second": 128.64, |
|
"eval_steps_per_second": 2.682, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.024411508282476024, |
|
"grad_norm": 0.8957669138908386, |
|
"learning_rate": 9.997682359779135e-06, |
|
"loss": 10.1172, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.024411508282476024, |
|
"eval_accuracy": 0.07715223186808826, |
|
"eval_loss": 10.109375, |
|
"eval_runtime": 264.8989, |
|
"eval_samples_per_second": 127.471, |
|
"eval_steps_per_second": 2.658, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.024484161580935776, |
|
"grad_norm": 0.9608045816421509, |
|
"learning_rate": 9.997675094449289e-06, |
|
"loss": 10.1016, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.024484161580935776, |
|
"eval_accuracy": 0.07713784427067198, |
|
"eval_loss": 10.1015625, |
|
"eval_runtime": 264.4843, |
|
"eval_samples_per_second": 127.671, |
|
"eval_steps_per_second": 2.662, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.024556814879395525, |
|
"grad_norm": 0.9367948770523071, |
|
"learning_rate": 9.997667829119443e-06, |
|
"loss": 10.1094, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.024556814879395525, |
|
"eval_accuracy": 0.0773351309434926, |
|
"eval_loss": 10.1015625, |
|
"eval_runtime": 264.3155, |
|
"eval_samples_per_second": 127.753, |
|
"eval_steps_per_second": 2.663, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.024629468177855274, |
|
"grad_norm": 0.9086586833000183, |
|
"learning_rate": 9.997660563789597e-06, |
|
"loss": 10.1172, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.024629468177855274, |
|
"eval_accuracy": 0.07753942324724832, |
|
"eval_loss": 10.1015625, |
|
"eval_runtime": 263.7541, |
|
"eval_samples_per_second": 128.025, |
|
"eval_steps_per_second": 2.669, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.024702121476315026, |
|
"grad_norm": 0.936314046382904, |
|
"learning_rate": 9.99765329845975e-06, |
|
"loss": 10.1094, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.024702121476315026, |
|
"eval_accuracy": 0.07769429979891233, |
|
"eval_loss": 10.1015625, |
|
"eval_runtime": 264.8284, |
|
"eval_samples_per_second": 127.505, |
|
"eval_steps_per_second": 2.658, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.024774774774774775, |
|
"grad_norm": 0.8729653358459473, |
|
"learning_rate": 9.997646033129905e-06, |
|
"loss": 10.1172, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.024774774774774775, |
|
"eval_accuracy": 0.07776137437278254, |
|
"eval_loss": 10.1015625, |
|
"eval_runtime": 264.1666, |
|
"eval_samples_per_second": 127.825, |
|
"eval_steps_per_second": 2.665, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.024847428073234524, |
|
"grad_norm": 0.9122793078422546, |
|
"learning_rate": 9.997638767800059e-06, |
|
"loss": 10.0938, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.024847428073234524, |
|
"eval_accuracy": 0.07787676464099427, |
|
"eval_loss": 10.09375, |
|
"eval_runtime": 264.3725, |
|
"eval_samples_per_second": 127.725, |
|
"eval_steps_per_second": 2.663, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.024920081371694276, |
|
"grad_norm": 0.9096229076385498, |
|
"learning_rate": 9.997631502470212e-06, |
|
"loss": 10.1016, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.024920081371694276, |
|
"eval_accuracy": 0.077991257493673, |
|
"eval_loss": 10.09375, |
|
"eval_runtime": 265.889, |
|
"eval_samples_per_second": 126.997, |
|
"eval_steps_per_second": 2.648, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.024992734670154025, |
|
"grad_norm": 0.9116566181182861, |
|
"learning_rate": 9.997624237140366e-06, |
|
"loss": 10.0938, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.024992734670154025, |
|
"eval_accuracy": 0.07803271230152027, |
|
"eval_loss": 10.09375, |
|
"eval_runtime": 263.9618, |
|
"eval_samples_per_second": 127.924, |
|
"eval_steps_per_second": 2.667, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.025065387968613774, |
|
"grad_norm": 0.9252493381500244, |
|
"learning_rate": 9.99761697181052e-06, |
|
"loss": 10.0938, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.025065387968613774, |
|
"eval_accuracy": 0.0780303674415792, |
|
"eval_loss": 10.09375, |
|
"eval_runtime": 264.8551, |
|
"eval_samples_per_second": 127.492, |
|
"eval_steps_per_second": 2.658, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.025138041267073526, |
|
"grad_norm": 0.8922543525695801, |
|
"learning_rate": 9.997609706480676e-06, |
|
"loss": 10.1016, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.025138041267073526, |
|
"eval_accuracy": 0.07808099904697366, |
|
"eval_loss": 10.09375, |
|
"eval_runtime": 264.1901, |
|
"eval_samples_per_second": 127.813, |
|
"eval_steps_per_second": 2.665, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.025210694565533275, |
|
"grad_norm": 0.8663190603256226, |
|
"learning_rate": 9.997602441150828e-06, |
|
"loss": 10.1094, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.025210694565533275, |
|
"eval_accuracy": 0.07801609563971557, |
|
"eval_loss": 10.0859375, |
|
"eval_runtime": 262.4011, |
|
"eval_samples_per_second": 128.685, |
|
"eval_steps_per_second": 2.683, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.025283347863993024, |
|
"grad_norm": 0.9128501415252686, |
|
"learning_rate": 9.997595175820984e-06, |
|
"loss": 10.0938, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.025283347863993024, |
|
"eval_accuracy": 0.07798708885377775, |
|
"eval_loss": 10.0859375, |
|
"eval_runtime": 263.7953, |
|
"eval_samples_per_second": 128.005, |
|
"eval_steps_per_second": 2.669, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.025356001162452776, |
|
"grad_norm": 0.9011194705963135, |
|
"learning_rate": 9.997587910491136e-06, |
|
"loss": 10.0938, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.025356001162452776, |
|
"eval_accuracy": 0.07796170067886012, |
|
"eval_loss": 10.0859375, |
|
"eval_runtime": 263.0115, |
|
"eval_samples_per_second": 128.386, |
|
"eval_steps_per_second": 2.677, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.025428654460912525, |
|
"grad_norm": 0.9395301342010498, |
|
"learning_rate": 9.997580645161292e-06, |
|
"loss": 10.0859, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.025428654460912525, |
|
"eval_accuracy": 0.0779401337571798, |
|
"eval_loss": 10.0859375, |
|
"eval_runtime": 265.4304, |
|
"eval_samples_per_second": 127.216, |
|
"eval_steps_per_second": 2.652, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.025501307759372274, |
|
"grad_norm": 0.9046230316162109, |
|
"learning_rate": 9.997573379831446e-06, |
|
"loss": 10.0859, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.025501307759372274, |
|
"eval_accuracy": 0.07796905369645313, |
|
"eval_loss": 10.0859375, |
|
"eval_runtime": 264.1028, |
|
"eval_samples_per_second": 127.856, |
|
"eval_steps_per_second": 2.666, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.025573961057832027, |
|
"grad_norm": 0.9076169729232788, |
|
"learning_rate": 9.9975661145016e-06, |
|
"loss": 10.0938, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.025573961057832027, |
|
"eval_accuracy": 0.07807518032045319, |
|
"eval_loss": 10.078125, |
|
"eval_runtime": 263.5124, |
|
"eval_samples_per_second": 128.142, |
|
"eval_steps_per_second": 2.672, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.025573961057832027, |
|
"step": 352, |
|
"total_flos": 247015648788480.0, |
|
"train_loss": 10.390092329545455, |
|
"train_runtime": 94034.2968, |
|
"train_samples_per_second": 702.555, |
|
"train_steps_per_second": 14.637 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1376400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 247015648788480.0, |
|
"train_batch_size": 48, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|