FLAN-T5-Paraphraser / trainer_state.json
alykassem's picture
Upload 13 files
e84dea6 verified
raw
history blame
10 kB
{
"best_metric": 0.436233788728714,
"best_model_checkpoint": "flan_large_ft_adam_filtd/checkpoint-20000",
"epoch": 8.32639467110741,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.42,
"learning_rate": 3.8e-05,
"loss": 1.6461,
"step": 1000
},
{
"epoch": 0.42,
"eval_f1": 0.7336931843203959,
"eval_loss": 0.543613076210022,
"eval_precision": 0.7462491253306569,
"eval_recall": 0.7237358282616747,
"eval_runtime": 339.9295,
"eval_sacrebleu": 5.478519586104642,
"eval_samples_per_second": 28.262,
"eval_steps_per_second": 0.885,
"step": 1000
},
{
"epoch": 0.83,
"learning_rate": 3.6e-05,
"loss": 0.6266,
"step": 2000
},
{
"epoch": 0.83,
"eval_f1": 0.7411911502965993,
"eval_loss": 0.5126737356185913,
"eval_precision": 0.7532233481006616,
"eval_recall": 0.7315065660194513,
"eval_runtime": 330.4901,
"eval_sacrebleu": 5.581437847090348,
"eval_samples_per_second": 29.069,
"eval_steps_per_second": 0.911,
"step": 2000
},
{
"epoch": 1.25,
"learning_rate": 3.4e-05,
"loss": 0.591,
"step": 3000
},
{
"epoch": 1.25,
"eval_f1": 0.744462796992661,
"eval_loss": 0.49647408723831177,
"eval_precision": 0.7574931879507412,
"eval_recall": 0.7338014463765621,
"eval_runtime": 328.9086,
"eval_sacrebleu": 5.712014723912081,
"eval_samples_per_second": 29.209,
"eval_steps_per_second": 0.915,
"step": 3000
},
{
"epoch": 1.67,
"learning_rate": 3.2000000000000005e-05,
"loss": 0.5723,
"step": 4000
},
{
"epoch": 1.67,
"eval_f1": 0.7482227764808884,
"eval_loss": 0.4846822917461395,
"eval_precision": 0.7609444108747153,
"eval_recall": 0.7378004755756169,
"eval_runtime": 328.5657,
"eval_sacrebleu": 5.759016321774072,
"eval_samples_per_second": 29.239,
"eval_steps_per_second": 0.916,
"step": 4000
},
{
"epoch": 2.08,
"learning_rate": 3.0000000000000004e-05,
"loss": 0.5576,
"step": 5000
},
{
"epoch": 2.08,
"eval_f1": 0.7495765637889841,
"eval_loss": 0.4766782820224762,
"eval_precision": 0.7626191373513915,
"eval_recall": 0.7388611246473127,
"eval_runtime": 326.106,
"eval_sacrebleu": 5.811597759442428,
"eval_samples_per_second": 29.46,
"eval_steps_per_second": 0.923,
"step": 5000
},
{
"epoch": 2.5,
"learning_rate": 2.8e-05,
"loss": 0.5393,
"step": 6000
},
{
"epoch": 2.5,
"eval_f1": 0.7505085720869012,
"eval_loss": 0.47017449140548706,
"eval_precision": 0.7641689959470273,
"eval_recall": 0.7391936523188186,
"eval_runtime": 328.1093,
"eval_sacrebleu": 5.885476099157212,
"eval_samples_per_second": 29.28,
"eval_steps_per_second": 0.917,
"step": 6000
},
{
"epoch": 2.91,
"learning_rate": 2.6000000000000002e-05,
"loss": 0.5318,
"step": 7000
},
{
"epoch": 2.91,
"eval_f1": 0.7533312253634614,
"eval_loss": 0.46316930651664734,
"eval_precision": 0.7659040383844901,
"eval_recall": 0.7430121202963726,
"eval_runtime": 330.001,
"eval_sacrebleu": 5.928167844047436,
"eval_samples_per_second": 29.112,
"eval_steps_per_second": 0.912,
"step": 7000
},
{
"epoch": 3.33,
"learning_rate": 2.4e-05,
"loss": 0.514,
"step": 8000
},
{
"epoch": 3.33,
"eval_f1": 0.7534677800234317,
"eval_loss": 0.45805472135543823,
"eval_precision": 0.7670932310055877,
"eval_recall": 0.7421523856331236,
"eval_runtime": 326.6901,
"eval_sacrebleu": 5.977964366842188,
"eval_samples_per_second": 29.407,
"eval_steps_per_second": 0.921,
"step": 8000
},
{
"epoch": 3.75,
"learning_rate": 2.2000000000000003e-05,
"loss": 0.5084,
"step": 9000
},
{
"epoch": 3.75,
"eval_f1": 0.7547050486360937,
"eval_loss": 0.4548051953315735,
"eval_precision": 0.7675773970595853,
"eval_recall": 0.7440757734278951,
"eval_runtime": 329.4886,
"eval_sacrebleu": 5.986154752583561,
"eval_samples_per_second": 29.157,
"eval_steps_per_second": 0.914,
"step": 9000
},
{
"epoch": 4.16,
"learning_rate": 2e-05,
"loss": 0.4987,
"step": 10000
},
{
"epoch": 4.16,
"eval_f1": 0.7559209833584902,
"eval_loss": 0.45204678177833557,
"eval_precision": 0.7687782156203135,
"eval_recall": 0.7452970269140299,
"eval_runtime": 328.8363,
"eval_sacrebleu": 6.112626970123219,
"eval_samples_per_second": 29.215,
"eval_steps_per_second": 0.915,
"step": 10000
},
{
"epoch": 4.58,
"learning_rate": 1.8e-05,
"loss": 0.4916,
"step": 11000
},
{
"epoch": 4.58,
"eval_f1": 0.7562574939163441,
"eval_loss": 0.44847676157951355,
"eval_precision": 0.7693036325555866,
"eval_recall": 0.7454933088714478,
"eval_runtime": 328.9876,
"eval_sacrebleu": 6.110669366899824,
"eval_samples_per_second": 29.202,
"eval_steps_per_second": 0.915,
"step": 11000
},
{
"epoch": 5.0,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.4855,
"step": 12000
},
{
"epoch": 5.0,
"eval_f1": 0.7573605920082946,
"eval_loss": 0.44527965784072876,
"eval_precision": 0.7699207649359262,
"eval_recall": 0.7470148278161243,
"eval_runtime": 326.1238,
"eval_sacrebleu": 6.199619598523688,
"eval_samples_per_second": 29.458,
"eval_steps_per_second": 0.923,
"step": 12000
},
{
"epoch": 5.41,
"learning_rate": 1.4e-05,
"loss": 0.4735,
"step": 13000
},
{
"epoch": 5.41,
"eval_f1": 0.757606573654931,
"eval_loss": 0.44323351979255676,
"eval_precision": 0.7707262306620629,
"eval_recall": 0.7467577414308141,
"eval_runtime": 325.2009,
"eval_sacrebleu": 6.170302966384815,
"eval_samples_per_second": 29.542,
"eval_steps_per_second": 0.926,
"step": 13000
},
{
"epoch": 5.83,
"learning_rate": 1.2e-05,
"loss": 0.4714,
"step": 14000
},
{
"epoch": 5.83,
"eval_f1": 0.7582219671704239,
"eval_loss": 0.44084230065345764,
"eval_precision": 0.7707039175730187,
"eval_recall": 0.7479663680666155,
"eval_runtime": 328.6225,
"eval_sacrebleu": 6.217397957778798,
"eval_samples_per_second": 29.234,
"eval_steps_per_second": 0.916,
"step": 14000
},
{
"epoch": 6.24,
"learning_rate": 1e-05,
"loss": 0.4619,
"step": 15000
},
{
"epoch": 6.24,
"eval_f1": 0.7582127043374982,
"eval_loss": 0.44005897641181946,
"eval_precision": 0.7708874883637588,
"eval_recall": 0.7477565708537125,
"eval_runtime": 329.0722,
"eval_sacrebleu": 6.251480154755987,
"eval_samples_per_second": 29.194,
"eval_steps_per_second": 0.915,
"step": 15000
},
{
"epoch": 6.66,
"learning_rate": 8.000000000000001e-06,
"loss": 0.4594,
"step": 16000
},
{
"epoch": 6.66,
"eval_f1": 0.7590658383431736,
"eval_loss": 0.4385643005371094,
"eval_precision": 0.7722058984135644,
"eval_recall": 0.7481865650026411,
"eval_runtime": 329.0887,
"eval_sacrebleu": 6.274907412338864,
"eval_samples_per_second": 29.193,
"eval_steps_per_second": 0.915,
"step": 16000
},
{
"epoch": 7.08,
"learning_rate": 6e-06,
"loss": 0.4548,
"step": 17000
},
{
"epoch": 7.08,
"eval_f1": 0.7591303865470561,
"eval_loss": 0.4375361502170563,
"eval_precision": 0.7716383277265539,
"eval_recall": 0.7488526040952809,
"eval_runtime": 326.0628,
"eval_sacrebleu": 6.2691323707280056,
"eval_samples_per_second": 29.464,
"eval_steps_per_second": 0.923,
"step": 17000
},
{
"epoch": 7.49,
"learning_rate": 4.000000000000001e-06,
"loss": 0.4496,
"step": 18000
},
{
"epoch": 7.49,
"eval_f1": 0.7595453337276964,
"eval_loss": 0.4368315041065216,
"eval_precision": 0.7720739444758674,
"eval_recall": 0.7492328676017723,
"eval_runtime": 326.1933,
"eval_sacrebleu": 6.332761567803722,
"eval_samples_per_second": 29.452,
"eval_steps_per_second": 0.923,
"step": 18000
},
{
"epoch": 7.91,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.4484,
"step": 19000
},
{
"epoch": 7.91,
"eval_f1": 0.7595180715794393,
"eval_loss": 0.4362909495830536,
"eval_precision": 0.7722789199881863,
"eval_recall": 0.7489893756232178,
"eval_runtime": 326.3639,
"eval_sacrebleu": 6.311690383163712,
"eval_samples_per_second": 29.436,
"eval_steps_per_second": 0.922,
"step": 19000
},
{
"epoch": 8.33,
"learning_rate": 0.0,
"loss": 0.4446,
"step": 20000
},
{
"epoch": 8.33,
"eval_f1": 0.7592586470783173,
"eval_loss": 0.436233788728714,
"eval_precision": 0.7721431963591666,
"eval_recall": 0.7486093006745032,
"eval_runtime": 326.7518,
"eval_sacrebleu": 6.312650510263652,
"eval_samples_per_second": 29.402,
"eval_steps_per_second": 0.921,
"step": 20000
}
],
"max_steps": 20000,
"num_train_epochs": 9,
"total_flos": 2.033175326799954e+17,
"trial_name": null,
"trial_params": null
}