|
{ |
|
"best_metric": 0.436233788728714, |
|
"best_model_checkpoint": "flan_large_ft_adam_filtd/checkpoint-20000", |
|
"epoch": 8.32639467110741, |
|
"global_step": 20000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.8e-05, |
|
"loss": 1.6461, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_f1": 0.7336931843203959, |
|
"eval_loss": 0.543613076210022, |
|
"eval_precision": 0.7462491253306569, |
|
"eval_recall": 0.7237358282616747, |
|
"eval_runtime": 339.9295, |
|
"eval_sacrebleu": 5.478519586104642, |
|
"eval_samples_per_second": 28.262, |
|
"eval_steps_per_second": 0.885, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.6266, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_f1": 0.7411911502965993, |
|
"eval_loss": 0.5126737356185913, |
|
"eval_precision": 0.7532233481006616, |
|
"eval_recall": 0.7315065660194513, |
|
"eval_runtime": 330.4901, |
|
"eval_sacrebleu": 5.581437847090348, |
|
"eval_samples_per_second": 29.069, |
|
"eval_steps_per_second": 0.911, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.4e-05, |
|
"loss": 0.591, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_f1": 0.744462796992661, |
|
"eval_loss": 0.49647408723831177, |
|
"eval_precision": 0.7574931879507412, |
|
"eval_recall": 0.7338014463765621, |
|
"eval_runtime": 328.9086, |
|
"eval_sacrebleu": 5.712014723912081, |
|
"eval_samples_per_second": 29.209, |
|
"eval_steps_per_second": 0.915, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.5723, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_f1": 0.7482227764808884, |
|
"eval_loss": 0.4846822917461395, |
|
"eval_precision": 0.7609444108747153, |
|
"eval_recall": 0.7378004755756169, |
|
"eval_runtime": 328.5657, |
|
"eval_sacrebleu": 5.759016321774072, |
|
"eval_samples_per_second": 29.239, |
|
"eval_steps_per_second": 0.916, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.0000000000000004e-05, |
|
"loss": 0.5576, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_f1": 0.7495765637889841, |
|
"eval_loss": 0.4766782820224762, |
|
"eval_precision": 0.7626191373513915, |
|
"eval_recall": 0.7388611246473127, |
|
"eval_runtime": 326.106, |
|
"eval_sacrebleu": 5.811597759442428, |
|
"eval_samples_per_second": 29.46, |
|
"eval_steps_per_second": 0.923, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.8e-05, |
|
"loss": 0.5393, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_f1": 0.7505085720869012, |
|
"eval_loss": 0.47017449140548706, |
|
"eval_precision": 0.7641689959470273, |
|
"eval_recall": 0.7391936523188186, |
|
"eval_runtime": 328.1093, |
|
"eval_sacrebleu": 5.885476099157212, |
|
"eval_samples_per_second": 29.28, |
|
"eval_steps_per_second": 0.917, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.5318, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_f1": 0.7533312253634614, |
|
"eval_loss": 0.46316930651664734, |
|
"eval_precision": 0.7659040383844901, |
|
"eval_recall": 0.7430121202963726, |
|
"eval_runtime": 330.001, |
|
"eval_sacrebleu": 5.928167844047436, |
|
"eval_samples_per_second": 29.112, |
|
"eval_steps_per_second": 0.912, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.514, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_f1": 0.7534677800234317, |
|
"eval_loss": 0.45805472135543823, |
|
"eval_precision": 0.7670932310055877, |
|
"eval_recall": 0.7421523856331236, |
|
"eval_runtime": 326.6901, |
|
"eval_sacrebleu": 5.977964366842188, |
|
"eval_samples_per_second": 29.407, |
|
"eval_steps_per_second": 0.921, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.5084, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_f1": 0.7547050486360937, |
|
"eval_loss": 0.4548051953315735, |
|
"eval_precision": 0.7675773970595853, |
|
"eval_recall": 0.7440757734278951, |
|
"eval_runtime": 329.4886, |
|
"eval_sacrebleu": 5.986154752583561, |
|
"eval_samples_per_second": 29.157, |
|
"eval_steps_per_second": 0.914, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4987, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"eval_f1": 0.7559209833584902, |
|
"eval_loss": 0.45204678177833557, |
|
"eval_precision": 0.7687782156203135, |
|
"eval_recall": 0.7452970269140299, |
|
"eval_runtime": 328.8363, |
|
"eval_sacrebleu": 6.112626970123219, |
|
"eval_samples_per_second": 29.215, |
|
"eval_steps_per_second": 0.915, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.4916, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_f1": 0.7562574939163441, |
|
"eval_loss": 0.44847676157951355, |
|
"eval_precision": 0.7693036325555866, |
|
"eval_recall": 0.7454933088714478, |
|
"eval_runtime": 328.9876, |
|
"eval_sacrebleu": 6.110669366899824, |
|
"eval_samples_per_second": 29.202, |
|
"eval_steps_per_second": 0.915, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.4855, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_f1": 0.7573605920082946, |
|
"eval_loss": 0.44527965784072876, |
|
"eval_precision": 0.7699207649359262, |
|
"eval_recall": 0.7470148278161243, |
|
"eval_runtime": 326.1238, |
|
"eval_sacrebleu": 6.199619598523688, |
|
"eval_samples_per_second": 29.458, |
|
"eval_steps_per_second": 0.923, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 1.4e-05, |
|
"loss": 0.4735, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_f1": 0.757606573654931, |
|
"eval_loss": 0.44323351979255676, |
|
"eval_precision": 0.7707262306620629, |
|
"eval_recall": 0.7467577414308141, |
|
"eval_runtime": 325.2009, |
|
"eval_sacrebleu": 6.170302966384815, |
|
"eval_samples_per_second": 29.542, |
|
"eval_steps_per_second": 0.926, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.4714, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"eval_f1": 0.7582219671704239, |
|
"eval_loss": 0.44084230065345764, |
|
"eval_precision": 0.7707039175730187, |
|
"eval_recall": 0.7479663680666155, |
|
"eval_runtime": 328.6225, |
|
"eval_sacrebleu": 6.217397957778798, |
|
"eval_samples_per_second": 29.234, |
|
"eval_steps_per_second": 0.916, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4619, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"eval_f1": 0.7582127043374982, |
|
"eval_loss": 0.44005897641181946, |
|
"eval_precision": 0.7708874883637588, |
|
"eval_recall": 0.7477565708537125, |
|
"eval_runtime": 329.0722, |
|
"eval_sacrebleu": 6.251480154755987, |
|
"eval_samples_per_second": 29.194, |
|
"eval_steps_per_second": 0.915, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.4594, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"eval_f1": 0.7590658383431736, |
|
"eval_loss": 0.4385643005371094, |
|
"eval_precision": 0.7722058984135644, |
|
"eval_recall": 0.7481865650026411, |
|
"eval_runtime": 329.0887, |
|
"eval_sacrebleu": 6.274907412338864, |
|
"eval_samples_per_second": 29.193, |
|
"eval_steps_per_second": 0.915, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 6e-06, |
|
"loss": 0.4548, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"eval_f1": 0.7591303865470561, |
|
"eval_loss": 0.4375361502170563, |
|
"eval_precision": 0.7716383277265539, |
|
"eval_recall": 0.7488526040952809, |
|
"eval_runtime": 326.0628, |
|
"eval_sacrebleu": 6.2691323707280056, |
|
"eval_samples_per_second": 29.464, |
|
"eval_steps_per_second": 0.923, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.4496, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"eval_f1": 0.7595453337276964, |
|
"eval_loss": 0.4368315041065216, |
|
"eval_precision": 0.7720739444758674, |
|
"eval_recall": 0.7492328676017723, |
|
"eval_runtime": 326.1933, |
|
"eval_sacrebleu": 6.332761567803722, |
|
"eval_samples_per_second": 29.452, |
|
"eval_steps_per_second": 0.923, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.4484, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"eval_f1": 0.7595180715794393, |
|
"eval_loss": 0.4362909495830536, |
|
"eval_precision": 0.7722789199881863, |
|
"eval_recall": 0.7489893756232178, |
|
"eval_runtime": 326.3639, |
|
"eval_sacrebleu": 6.311690383163712, |
|
"eval_samples_per_second": 29.436, |
|
"eval_steps_per_second": 0.922, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 0.0, |
|
"loss": 0.4446, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_f1": 0.7592586470783173, |
|
"eval_loss": 0.436233788728714, |
|
"eval_precision": 0.7721431963591666, |
|
"eval_recall": 0.7486093006745032, |
|
"eval_runtime": 326.7518, |
|
"eval_sacrebleu": 6.312650510263652, |
|
"eval_samples_per_second": 29.402, |
|
"eval_steps_per_second": 0.921, |
|
"step": 20000 |
|
} |
|
], |
|
"max_steps": 20000, |
|
"num_train_epochs": 9, |
|
"total_flos": 2.033175326799954e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|