|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9261208166701747, |
|
"eval_steps": 100, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.6430506706237793, |
|
"learning_rate": 9.5e-06, |
|
"loss": 5.6141, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_cer": 0.9770320871299519, |
|
"eval_loss": 2.355087995529175, |
|
"eval_runtime": 329.8418, |
|
"eval_samples_per_second": 28.735, |
|
"eval_steps_per_second": 3.593, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.838773012161255, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"loss": 2.1498, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_cer": 0.6385162879824802, |
|
"eval_loss": 1.961960792541504, |
|
"eval_runtime": 334.0577, |
|
"eval_samples_per_second": 28.372, |
|
"eval_steps_per_second": 3.547, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 3.848980665206909, |
|
"learning_rate": 2.95e-05, |
|
"loss": 2.6461, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_cer": 0.5852479371162645, |
|
"eval_loss": 2.6564223766326904, |
|
"eval_runtime": 350.7705, |
|
"eval_samples_per_second": 27.021, |
|
"eval_steps_per_second": 3.378, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 3.9299519062042236, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"loss": 2.359, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_cer": 0.5907913261116108, |
|
"eval_loss": 2.6195201873779297, |
|
"eval_runtime": 341.3159, |
|
"eval_samples_per_second": 27.769, |
|
"eval_steps_per_second": 3.472, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 4.236645698547363, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 4.2363, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_cer": 0.624139650385202, |
|
"eval_loss": 2.457566261291504, |
|
"eval_runtime": 356.3471, |
|
"eval_samples_per_second": 26.598, |
|
"eval_steps_per_second": 3.325, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 13.769584655761719, |
|
"learning_rate": 4.9932862190812725e-05, |
|
"loss": 2.6933, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_cer": 0.6483369833013961, |
|
"eval_loss": 3.4720070362091064, |
|
"eval_runtime": 345.5879, |
|
"eval_samples_per_second": 27.426, |
|
"eval_steps_per_second": 3.429, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 4.292990207672119, |
|
"learning_rate": 4.986289752650177e-05, |
|
"loss": 4.1075, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_cer": 0.6181098705564898, |
|
"eval_loss": 3.3849761486053467, |
|
"eval_runtime": 353.3198, |
|
"eval_samples_per_second": 26.826, |
|
"eval_steps_per_second": 3.354, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 4.32611083984375, |
|
"learning_rate": 4.97922261484099e-05, |
|
"loss": 2.6907, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_cer": 0.6191510891243988, |
|
"eval_loss": 2.5412750244140625, |
|
"eval_runtime": 343.4967, |
|
"eval_samples_per_second": 27.593, |
|
"eval_steps_per_second": 3.45, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 3.173403263092041, |
|
"learning_rate": 4.972155477031802e-05, |
|
"loss": 4.2294, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_cer": 0.5840918423213797, |
|
"eval_loss": 2.6817102432250977, |
|
"eval_runtime": 354.5341, |
|
"eval_samples_per_second": 26.734, |
|
"eval_steps_per_second": 3.342, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 4.8158440589904785, |
|
"learning_rate": 4.965088339222615e-05, |
|
"loss": 2.2949, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_cer": 0.5736649915920379, |
|
"eval_loss": 1.9513429403305054, |
|
"eval_runtime": 345.1224, |
|
"eval_samples_per_second": 27.463, |
|
"eval_steps_per_second": 3.434, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 3.0650887489318848, |
|
"learning_rate": 4.958021201413428e-05, |
|
"loss": 2.6115, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_cer": 0.5645799929607759, |
|
"eval_loss": 2.8409903049468994, |
|
"eval_runtime": 352.0824, |
|
"eval_samples_per_second": 26.92, |
|
"eval_steps_per_second": 3.366, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 3.0540664196014404, |
|
"learning_rate": 4.950954063604241e-05, |
|
"loss": 2.3453, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_cer": 0.598292499315631, |
|
"eval_loss": 1.8713030815124512, |
|
"eval_runtime": 344.1368, |
|
"eval_samples_per_second": 27.541, |
|
"eval_steps_per_second": 3.443, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 4.366485118865967, |
|
"learning_rate": 4.9438869257950535e-05, |
|
"loss": 3.2578, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_cer": 0.5734816784638849, |
|
"eval_loss": 2.000699758529663, |
|
"eval_runtime": 350.6902, |
|
"eval_samples_per_second": 27.027, |
|
"eval_steps_per_second": 3.379, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 81.67564392089844, |
|
"learning_rate": 4.936819787985866e-05, |
|
"loss": 1.9657, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_cer": 0.6325427241797349, |
|
"eval_loss": 3.403754472732544, |
|
"eval_runtime": 340.0489, |
|
"eval_samples_per_second": 27.872, |
|
"eval_steps_per_second": 3.485, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.837653875350952, |
|
"learning_rate": 4.929752650176679e-05, |
|
"loss": 2.6865, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_cer": 0.5574918853388604, |
|
"eval_loss": 3.5201330184936523, |
|
"eval_runtime": 351.432, |
|
"eval_samples_per_second": 26.97, |
|
"eval_steps_per_second": 3.372, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 2.9047458171844482, |
|
"learning_rate": 4.922685512367491e-05, |
|
"loss": 2.4105, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_cer": 0.5886624496499941, |
|
"eval_loss": 3.5269789695739746, |
|
"eval_runtime": 347.023, |
|
"eval_samples_per_second": 27.312, |
|
"eval_steps_per_second": 3.415, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 2.9270007610321045, |
|
"learning_rate": 4.915618374558304e-05, |
|
"loss": 2.418, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_cer": 0.624877791247898, |
|
"eval_loss": 2.498126983642578, |
|
"eval_runtime": 355.9419, |
|
"eval_samples_per_second": 26.628, |
|
"eval_steps_per_second": 3.329, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 3.458078622817993, |
|
"learning_rate": 4.908551236749117e-05, |
|
"loss": 2.6118, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_cer": 0.5718074185600877, |
|
"eval_loss": 3.407224655151367, |
|
"eval_runtime": 343.8365, |
|
"eval_samples_per_second": 27.565, |
|
"eval_steps_per_second": 3.446, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.9933278560638428, |
|
"learning_rate": 4.90148409893993e-05, |
|
"loss": 3.2046, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_cer": 0.5539844941535333, |
|
"eval_loss": 1.928993821144104, |
|
"eval_runtime": 357.2509, |
|
"eval_samples_per_second": 26.53, |
|
"eval_steps_per_second": 3.317, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 4.917328357696533, |
|
"learning_rate": 4.8944169611307425e-05, |
|
"loss": 3.1904, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_cer": 0.5568001838019632, |
|
"eval_loss": 2.729193925857544, |
|
"eval_runtime": 341.3463, |
|
"eval_samples_per_second": 27.767, |
|
"eval_steps_per_second": 3.472, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 2.842353343963623, |
|
"learning_rate": 4.8873498233215547e-05, |
|
"loss": 2.1649, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_cer": 0.544757733369833, |
|
"eval_loss": 2.421653985977173, |
|
"eval_runtime": 358.7261, |
|
"eval_samples_per_second": 26.421, |
|
"eval_steps_per_second": 3.303, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 4.818225383758545, |
|
"learning_rate": 4.880282685512368e-05, |
|
"loss": 2.2775, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_cer": 0.5533832270931915, |
|
"eval_loss": 2.188101291656494, |
|
"eval_runtime": 354.4215, |
|
"eval_samples_per_second": 26.742, |
|
"eval_steps_per_second": 3.343, |
|
"step": 2200 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 71250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 100, |
|
"total_flos": 2.411832086421037e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|