Omar
update
5e0c3c5
{
"best_metric": 0.8108949416342413,
"best_model_checkpoint": "finetune_results/omarmomen/structformer_s2_final_with_pos/qqp/checkpoint-10000",
"epoch": 5.911330049261084,
"global_step": 12000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"eval_accuracy": 0.7234556674957275,
"eval_f1": 0.6753688989784338,
"eval_loss": 0.5372109413146973,
"eval_mcc": 0.4346909424161479,
"eval_runtime": 51.7649,
"eval_samples_per_second": 519.444,
"eval_steps_per_second": 64.947,
"step": 400
},
{
"epoch": 0.25,
"learning_rate": 4.876847290640394e-05,
"loss": 0.5934,
"step": 500
},
{
"epoch": 0.39,
"eval_accuracy": 0.7503811717033386,
"eval_f1": 0.7230337542295948,
"eval_loss": 0.5017327070236206,
"eval_mcc": 0.5014557368856788,
"eval_runtime": 51.7433,
"eval_samples_per_second": 519.661,
"eval_steps_per_second": 64.975,
"step": 800
},
{
"epoch": 0.49,
"learning_rate": 4.753694581280788e-05,
"loss": 0.5159,
"step": 1000
},
{
"epoch": 0.59,
"eval_accuracy": 0.7581911087036133,
"eval_f1": 0.7443579460564599,
"eval_loss": 0.48863014578819275,
"eval_mcc": 0.5320171029709727,
"eval_runtime": 51.7388,
"eval_samples_per_second": 519.707,
"eval_steps_per_second": 64.98,
"step": 1200
},
{
"epoch": 0.74,
"learning_rate": 4.630541871921182e-05,
"loss": 0.4792,
"step": 1500
},
{
"epoch": 0.79,
"eval_accuracy": 0.7796124815940857,
"eval_f1": 0.7622944243882872,
"eval_loss": 0.4617643356323242,
"eval_mcc": 0.5687201331096594,
"eval_runtime": 51.762,
"eval_samples_per_second": 519.474,
"eval_steps_per_second": 64.951,
"step": 1600
},
{
"epoch": 0.99,
"learning_rate": 4.507389162561577e-05,
"loss": 0.4542,
"step": 2000
},
{
"epoch": 0.99,
"eval_accuracy": 0.789095938205719,
"eval_f1": 0.7523039965057874,
"eval_loss": 0.44058433175086975,
"eval_mcc": 0.5688664310414974,
"eval_runtime": 51.7652,
"eval_samples_per_second": 519.442,
"eval_steps_per_second": 64.947,
"step": 2000
},
{
"epoch": 1.18,
"eval_accuracy": 0.792219877243042,
"eval_f1": 0.7745268170628354,
"eval_loss": 0.4416797161102295,
"eval_mcc": 0.59239707340152,
"eval_runtime": 51.7802,
"eval_samples_per_second": 519.291,
"eval_steps_per_second": 64.928,
"step": 2400
},
{
"epoch": 1.23,
"learning_rate": 4.384236453201971e-05,
"loss": 0.3933,
"step": 2500
},
{
"epoch": 1.38,
"eval_accuracy": 0.7879801988601685,
"eval_f1": 0.7779206108059679,
"eval_loss": 0.45901182293891907,
"eval_mcc": 0.5959266879901132,
"eval_runtime": 51.7681,
"eval_samples_per_second": 519.412,
"eval_steps_per_second": 64.943,
"step": 2800
},
{
"epoch": 1.48,
"learning_rate": 4.261083743842365e-05,
"loss": 0.3802,
"step": 3000
},
{
"epoch": 1.58,
"eval_accuracy": 0.8091784715652466,
"eval_f1": 0.7775995838931992,
"eval_loss": 0.41946539282798767,
"eval_mcc": 0.6110078146235389,
"eval_runtime": 51.7792,
"eval_samples_per_second": 519.301,
"eval_steps_per_second": 64.93,
"step": 3200
},
{
"epoch": 1.72,
"learning_rate": 4.1379310344827587e-05,
"loss": 0.3713,
"step": 3500
},
{
"epoch": 1.77,
"eval_accuracy": 0.8115586042404175,
"eval_f1": 0.7876629091061476,
"eval_loss": 0.4056394398212433,
"eval_mcc": 0.6219198677315879,
"eval_runtime": 51.7731,
"eval_samples_per_second": 519.362,
"eval_steps_per_second": 64.937,
"step": 3600
},
{
"epoch": 1.97,
"learning_rate": 4.014778325123153e-05,
"loss": 0.3679,
"step": 4000
},
{
"epoch": 1.97,
"eval_accuracy": 0.8167652487754822,
"eval_f1": 0.7969168624541445,
"eval_loss": 0.40430837869644165,
"eval_mcc": 0.6363171595176773,
"eval_runtime": 51.7659,
"eval_samples_per_second": 519.434,
"eval_steps_per_second": 64.946,
"step": 4000
},
{
"epoch": 2.17,
"eval_accuracy": 0.8047528862953186,
"eval_f1": 0.7935509241053873,
"eval_loss": 0.45002058148384094,
"eval_mcc": 0.6264120987418264,
"eval_runtime": 51.7934,
"eval_samples_per_second": 519.159,
"eval_steps_per_second": 64.912,
"step": 4400
},
{
"epoch": 2.22,
"learning_rate": 3.891625615763547e-05,
"loss": 0.2934,
"step": 4500
},
{
"epoch": 2.36,
"eval_accuracy": 0.8004016280174255,
"eval_f1": 0.7906541326988337,
"eval_loss": 0.4479910731315613,
"eval_mcc": 0.6206352234514999,
"eval_runtime": 51.7752,
"eval_samples_per_second": 519.341,
"eval_steps_per_second": 64.935,
"step": 4800
},
{
"epoch": 2.46,
"learning_rate": 3.768472906403941e-05,
"loss": 0.2895,
"step": 5000
},
{
"epoch": 2.56,
"eval_accuracy": 0.8060173392295837,
"eval_f1": 0.7936219039328954,
"eval_loss": 0.4439913034439087,
"eval_mcc": 0.6267733245835018,
"eval_runtime": 51.7787,
"eval_samples_per_second": 519.306,
"eval_steps_per_second": 64.93,
"step": 5200
},
{
"epoch": 2.71,
"learning_rate": 3.645320197044335e-05,
"loss": 0.2901,
"step": 5500
},
{
"epoch": 2.76,
"eval_accuracy": 0.8209676742553711,
"eval_f1": 0.8036864856047631,
"eval_loss": 0.4141245186328888,
"eval_mcc": 0.6476398578041523,
"eval_runtime": 51.7588,
"eval_samples_per_second": 519.505,
"eval_steps_per_second": 64.955,
"step": 5600
},
{
"epoch": 2.96,
"learning_rate": 3.522167487684729e-05,
"loss": 0.2882,
"step": 6000
},
{
"epoch": 2.96,
"eval_accuracy": 0.8188106417655945,
"eval_f1": 0.8016609672691744,
"eval_loss": 0.4020179808139801,
"eval_mcc": 0.643743269882208,
"eval_runtime": 51.7887,
"eval_samples_per_second": 519.206,
"eval_steps_per_second": 64.918,
"step": 6000
},
{
"epoch": 3.15,
"eval_accuracy": 0.8268436789512634,
"eval_f1": 0.8052534716412916,
"eval_loss": 0.47344014048576355,
"eval_mcc": 0.6533536137012623,
"eval_runtime": 51.8108,
"eval_samples_per_second": 518.985,
"eval_steps_per_second": 64.89,
"step": 6400
},
{
"epoch": 3.2,
"learning_rate": 3.399014778325123e-05,
"loss": 0.2247,
"step": 6500
},
{
"epoch": 3.35,
"eval_accuracy": 0.8171743154525757,
"eval_f1": 0.8004546192563728,
"eval_loss": 0.47986865043640137,
"eval_mcc": 0.6412550092053579,
"eval_runtime": 51.7925,
"eval_samples_per_second": 519.168,
"eval_steps_per_second": 64.913,
"step": 6800
},
{
"epoch": 3.45,
"learning_rate": 3.275862068965517e-05,
"loss": 0.2198,
"step": 7000
},
{
"epoch": 3.55,
"eval_accuracy": 0.8249841928482056,
"eval_f1": 0.8056656755863892,
"eval_loss": 0.44043195247650146,
"eval_mcc": 0.6525207506387493,
"eval_runtime": 51.8061,
"eval_samples_per_second": 519.032,
"eval_steps_per_second": 64.896,
"step": 7200
},
{
"epoch": 3.69,
"learning_rate": 3.152709359605912e-05,
"loss": 0.2178,
"step": 7500
},
{
"epoch": 3.74,
"eval_accuracy": 0.825653612613678,
"eval_f1": 0.8072685413583294,
"eval_loss": 0.4325750470161438,
"eval_mcc": 0.6549889472807147,
"eval_runtime": 51.7769,
"eval_samples_per_second": 519.324,
"eval_steps_per_second": 64.932,
"step": 7600
},
{
"epoch": 3.94,
"learning_rate": 3.0295566502463057e-05,
"loss": 0.2234,
"step": 8000
},
{
"epoch": 3.94,
"eval_accuracy": 0.8143478631973267,
"eval_f1": 0.8024222274994064,
"eval_loss": 0.46264368295669556,
"eval_mcc": 0.643552030907142,
"eval_runtime": 51.7905,
"eval_samples_per_second": 519.188,
"eval_steps_per_second": 64.915,
"step": 8000
},
{
"epoch": 4.14,
"eval_accuracy": 0.8114470839500427,
"eval_f1": 0.7995096488453021,
"eval_loss": 0.5472643375396729,
"eval_mcc": 0.6379765063766293,
"eval_runtime": 51.7718,
"eval_samples_per_second": 519.375,
"eval_steps_per_second": 64.939,
"step": 8400
},
{
"epoch": 4.19,
"learning_rate": 2.9064039408866993e-05,
"loss": 0.1747,
"step": 8500
},
{
"epoch": 4.33,
"eval_accuracy": 0.8158354759216309,
"eval_f1": 0.801172408255039,
"eval_loss": 0.5199980735778809,
"eval_mcc": 0.6418079417002838,
"eval_runtime": 51.8043,
"eval_samples_per_second": 519.049,
"eval_steps_per_second": 64.898,
"step": 8800
},
{
"epoch": 4.43,
"learning_rate": 2.7832512315270936e-05,
"loss": 0.1652,
"step": 9000
},
{
"epoch": 4.53,
"eval_accuracy": 0.8288519382476807,
"eval_f1": 0.8095040980213594,
"eval_loss": 0.4734426736831665,
"eval_mcc": 0.6597842225635926,
"eval_runtime": 51.7806,
"eval_samples_per_second": 519.287,
"eval_steps_per_second": 64.928,
"step": 9200
},
{
"epoch": 4.68,
"learning_rate": 2.660098522167488e-05,
"loss": 0.1639,
"step": 9500
},
{
"epoch": 4.73,
"eval_accuracy": 0.8255420327186584,
"eval_f1": 0.809083879369989,
"eval_loss": 0.5020354390144348,
"eval_mcc": 0.657461047074956,
"eval_runtime": 51.784,
"eval_samples_per_second": 519.253,
"eval_steps_per_second": 64.924,
"step": 9600
},
{
"epoch": 4.93,
"learning_rate": 2.5369458128078822e-05,
"loss": 0.1684,
"step": 10000
},
{
"epoch": 4.93,
"eval_accuracy": 0.8282940983772278,
"eval_f1": 0.8108949416342413,
"eval_loss": 0.4710612893104553,
"eval_mcc": 0.6613084269281064,
"eval_runtime": 51.7629,
"eval_samples_per_second": 519.465,
"eval_steps_per_second": 64.95,
"step": 10000
},
{
"epoch": 5.12,
"eval_accuracy": 0.8276618719100952,
"eval_f1": 0.8096450870851134,
"eval_loss": 0.5808185338973999,
"eval_mcc": 0.6592695922742537,
"eval_runtime": 51.8111,
"eval_samples_per_second": 518.982,
"eval_steps_per_second": 64.89,
"step": 10400
},
{
"epoch": 5.17,
"learning_rate": 2.413793103448276e-05,
"loss": 0.1323,
"step": 10500
},
{
"epoch": 5.32,
"eval_accuracy": 0.8322362303733826,
"eval_f1": 0.8096222831821059,
"eval_loss": 0.5204941034317017,
"eval_mcc": 0.6624782529759075,
"eval_runtime": 51.8048,
"eval_samples_per_second": 519.045,
"eval_steps_per_second": 64.897,
"step": 10800
},
{
"epoch": 5.42,
"learning_rate": 2.29064039408867e-05,
"loss": 0.1216,
"step": 11000
},
{
"epoch": 5.52,
"eval_accuracy": 0.8215999007225037,
"eval_f1": 0.8064555174500707,
"eval_loss": 0.5831347703933716,
"eval_mcc": 0.651990774420572,
"eval_runtime": 51.808,
"eval_samples_per_second": 519.013,
"eval_steps_per_second": 64.893,
"step": 11200
},
{
"epoch": 5.67,
"learning_rate": 2.1674876847290644e-05,
"loss": 0.1237,
"step": 11500
},
{
"epoch": 5.71,
"eval_accuracy": 0.8242031931877136,
"eval_f1": 0.8080250172602851,
"eval_loss": 0.6121179461479187,
"eval_mcc": 0.6553446821187937,
"eval_runtime": 51.8296,
"eval_samples_per_second": 518.797,
"eval_steps_per_second": 64.866,
"step": 11600
},
{
"epoch": 5.91,
"learning_rate": 2.0443349753694584e-05,
"loss": 0.1248,
"step": 12000
},
{
"epoch": 5.91,
"eval_accuracy": 0.8253932595252991,
"eval_f1": 0.805871407897457,
"eval_loss": 0.6064007878303528,
"eval_mcc": 0.6530389230916024,
"eval_runtime": 51.78,
"eval_samples_per_second": 519.293,
"eval_steps_per_second": 64.928,
"step": 12000
},
{
"epoch": 5.91,
"step": 12000,
"total_flos": 1.2015943752264192e+17,
"train_loss": 0.28238233852386474,
"train_runtime": 8038.3604,
"train_samples_per_second": 302.92,
"train_steps_per_second": 2.525
}
],
"max_steps": 20300,
"num_train_epochs": 10,
"total_flos": 1.2015943752264192e+17,
"trial_name": null,
"trial_params": null
}