|
{ |
|
"best_metric": 1.0819282531738281, |
|
"best_model_checkpoint": "./jako_mbartLarge_13p_run1/checkpoint-7500", |
|
"epoch": 3.5055829654635158, |
|
"eval_steps": 1500, |
|
"global_step": 13500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.97382884061764e-05, |
|
"loss": 1.9323, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.9084009421617376e-05, |
|
"loss": 1.487, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.8429730437058366e-05, |
|
"loss": 1.3826, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_bleu": 22.0729, |
|
"eval_gen_len": 18.9717, |
|
"eval_loss": 1.2989100217819214, |
|
"eval_runtime": 581.8634, |
|
"eval_samples_per_second": 13.235, |
|
"eval_steps_per_second": 0.828, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.777545145249935e-05, |
|
"loss": 1.2846, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.712117246794033e-05, |
|
"loss": 1.2292, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.646689348338132e-05, |
|
"loss": 1.1964, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_bleu": 25.4863, |
|
"eval_gen_len": 19.1908, |
|
"eval_loss": 1.1629722118377686, |
|
"eval_runtime": 599.5574, |
|
"eval_samples_per_second": 12.844, |
|
"eval_steps_per_second": 0.804, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.58126144988223e-05, |
|
"loss": 1.1636, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.515833551426328e-05, |
|
"loss": 1.1243, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.450405652970427e-05, |
|
"loss": 0.9449, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_bleu": 27.385, |
|
"eval_gen_len": 18.2955, |
|
"eval_loss": 1.112542748451233, |
|
"eval_runtime": 534.7746, |
|
"eval_samples_per_second": 14.4, |
|
"eval_steps_per_second": 0.901, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.3849777545145253e-05, |
|
"loss": 0.8708, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.3195498560586236e-05, |
|
"loss": 0.8446, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.254121957602722e-05, |
|
"loss": 0.8102, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_bleu": 28.0041, |
|
"eval_gen_len": 18.6572, |
|
"eval_loss": 1.0919792652130127, |
|
"eval_runtime": 549.0767, |
|
"eval_samples_per_second": 14.025, |
|
"eval_steps_per_second": 0.878, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.18869405914682e-05, |
|
"loss": 0.7903, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.123266160690919e-05, |
|
"loss": 0.7814, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.057838262235017e-05, |
|
"loss": 0.7692, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_bleu": 29.1055, |
|
"eval_gen_len": 18.2731, |
|
"eval_loss": 1.0819282531738281, |
|
"eval_runtime": 529.4168, |
|
"eval_samples_per_second": 14.546, |
|
"eval_steps_per_second": 0.91, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.992410363779115e-05, |
|
"loss": 0.7152, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.926982465323214e-05, |
|
"loss": 0.6146, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.8615545668673124e-05, |
|
"loss": 0.5741, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_bleu": 28.1574, |
|
"eval_gen_len": 18.3485, |
|
"eval_loss": 1.1368560791015625, |
|
"eval_runtime": 531.4915, |
|
"eval_samples_per_second": 14.489, |
|
"eval_steps_per_second": 0.907, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.7961266684114114e-05, |
|
"loss": 0.5501, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.730698769955509e-05, |
|
"loss": 0.5317, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 3.665270871499607e-05, |
|
"loss": 0.5198, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_bleu": 28.657, |
|
"eval_gen_len": 18.4527, |
|
"eval_loss": 1.1538134813308716, |
|
"eval_runtime": 540.4496, |
|
"eval_samples_per_second": 14.249, |
|
"eval_steps_per_second": 0.892, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 3.599842973043706e-05, |
|
"loss": 0.5201, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 3.5344150745878046e-05, |
|
"loss": 0.5022, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.468987176131903e-05, |
|
"loss": 0.4532, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_bleu": 28.6914, |
|
"eval_gen_len": 18.4562, |
|
"eval_loss": 1.158207893371582, |
|
"eval_runtime": 536.18, |
|
"eval_samples_per_second": 14.363, |
|
"eval_steps_per_second": 0.899, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 3.403559277676001e-05, |
|
"loss": 0.3846, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.3381313792200995e-05, |
|
"loss": 0.3652, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.2727034807641985e-05, |
|
"loss": 0.3466, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_bleu": 28.8955, |
|
"eval_gen_len": 18.427, |
|
"eval_loss": 1.2048161029815674, |
|
"eval_runtime": 531.5751, |
|
"eval_samples_per_second": 14.487, |
|
"eval_steps_per_second": 0.907, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"step": 13500, |
|
"total_flos": 4.682040518346015e+17, |
|
"train_loss": 0.8403220813892506, |
|
"train_runtime": 20423.5684, |
|
"train_samples_per_second": 30.169, |
|
"train_steps_per_second": 1.886 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 38510, |
|
"num_train_epochs": 10, |
|
"save_steps": 1500, |
|
"total_flos": 4.682040518346015e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|