sara-nabhani's picture
Training in progress, step 1600
e6ab595
{
"best_metric": 0.37634159533224454,
"best_model_checkpoint": "/home2/s5432073/language-tech-project/results/ltp-roberta-large-defaultltp-roberta-large-default-2/checkpoint-1600",
"epoch": 9.467455621301776,
"global_step": 1600,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.18,
"learning_rate": 9.28348909657321e-06,
"loss": 0.4492,
"step": 200
},
{
"epoch": 1.18,
"eval_f1": 0.11572875154130194,
"eval_f1_all": [
0.0,
0.05847953216374269,
0.0,
0.0,
0.4682713347921225,
0.0,
0.0,
0.0,
0.6747759282970549,
0.520393811533052,
0.0,
0.0,
0.0,
0.0,
0.0,
0.0,
0.5926544240400667,
0.0,
0.0,
0.0
],
"eval_loss": 0.36748331785202026,
"eval_runtime": 3.8035,
"eval_samples_per_second": 498.483,
"eval_steps_per_second": 15.775,
"step": 200
},
{
"epoch": 2.37,
"learning_rate": 8.037383177570094e-06,
"loss": 0.3449,
"step": 400
},
{
"epoch": 2.37,
"eval_f1": 0.22644858426717676,
"eval_f1_all": [
0.3419354838709677,
0.30434782608695654,
0.0,
0.0,
0.5748898678414096,
0.0,
0.3260869565217391,
0.0,
0.7539203860072377,
0.5495118549511855,
0.0879120879120879,
0.23971377459749552,
0.0,
0.0,
0.055045871559633024,
0.0,
0.6266548984995588,
0.5684210526315789,
0.008928571428571428,
0.09160305343511452
],
"eval_loss": 0.33438944816589355,
"eval_runtime": 2.9482,
"eval_samples_per_second": 643.096,
"eval_steps_per_second": 20.351,
"step": 400
},
{
"epoch": 3.55,
"learning_rate": 6.791277258566978e-06,
"loss": 0.3163,
"step": 600
},
{
"epoch": 3.55,
"eval_f1": 0.29790838893606486,
"eval_f1_all": [
0.4838709677419356,
0.4791965566714491,
0.0,
0.0,
0.5882352941176471,
0.012121212121212121,
0.303030303030303,
0.0,
0.7144866385372716,
0.574385510996119,
0.3069767441860465,
0.4,
0.0,
0.0,
0.40190249702734837,
0.0,
0.6844840386043058,
0.6761904761904762,
0.034482758620689655,
0.29880478087649404
],
"eval_loss": 0.3200507164001465,
"eval_runtime": 2.9119,
"eval_samples_per_second": 651.111,
"eval_steps_per_second": 20.605,
"step": 600
},
{
"epoch": 4.73,
"learning_rate": 5.545171339563863e-06,
"loss": 0.3021,
"step": 800
},
{
"epoch": 4.73,
"eval_f1": 0.33206438169614216,
"eval_f1_all": [
0.5162907268170427,
0.5621761658031088,
0.0,
0.0,
0.6198347107438016,
0.08139534883720931,
0.36734693877551017,
0.0,
0.7533632286995516,
0.6167290886392011,
0.36111111111111105,
0.42990654205607476,
0.0,
0.0,
0.5192909280500522,
0.014814814814814815,
0.6877828054298644,
0.6568627450980392,
0.08906882591093117,
0.36531365313653136
],
"eval_loss": 0.3112545609474182,
"eval_runtime": 3.7309,
"eval_samples_per_second": 508.183,
"eval_steps_per_second": 16.082,
"step": 800
},
{
"epoch": 5.92,
"learning_rate": 4.299065420560748e-06,
"loss": 0.2879,
"step": 1000
},
{
"epoch": 5.92,
"eval_f1": 0.3556641957175606,
"eval_f1_all": [
0.5228426395939088,
0.5761843790012804,
0.014285714285714285,
0.05660377358490566,
0.6353166986564299,
0.09195402298850576,
0.4059405940594059,
0.0,
0.7578814627994955,
0.639225181598063,
0.3873873873873874,
0.49717514124293793,
0.0,
0.015503875968992248,
0.5467059980334316,
0.08934707903780069,
0.6688907422852377,
0.6995515695067265,
0.09876543209876544,
0.4097222222222222
],
"eval_loss": 0.3077404499053955,
"eval_runtime": 3.283,
"eval_samples_per_second": 577.521,
"eval_steps_per_second": 18.276,
"step": 1000
},
{
"epoch": 7.1,
"learning_rate": 3.0529595015576325e-06,
"loss": 0.2783,
"step": 1200
},
{
"epoch": 7.1,
"eval_f1": 0.3662892905615983,
"eval_f1_all": [
0.5064599483204134,
0.5771643663739021,
0.028169014084507046,
0.125,
0.6408730158730159,
0.1222222222222222,
0.35789473684210527,
0.0,
0.7578947368421052,
0.6680988184747583,
0.4291497975708502,
0.5216178521617852,
0.0,
0.015503875968992248,
0.5635864592863677,
0.10135135135135136,
0.6790622473726757,
0.7058823529411764,
0.10317460317460318,
0.422680412371134
],
"eval_loss": 0.3067641854286194,
"eval_runtime": 3.0738,
"eval_samples_per_second": 616.829,
"eval_steps_per_second": 19.52,
"step": 1200
},
{
"epoch": 8.28,
"learning_rate": 1.8068535825545173e-06,
"loss": 0.2721,
"step": 1400
},
{
"epoch": 8.28,
"eval_f1": 0.36379633008264506,
"eval_f1_all": [
0.5247524752475247,
0.5838509316770186,
0.028368794326241138,
0.07407407407407407,
0.6463654223968567,
0.13259668508287292,
0.35106382978723405,
0.0,
0.7543075941289088,
0.6308243727598566,
0.4,
0.5212620027434842,
0.0,
0.015503875968992248,
0.5748709122203098,
0.07612456747404846,
0.6950959488272921,
0.6986899563318777,
0.14785992217898833,
0.42031523642732044
],
"eval_loss": 0.3058605194091797,
"eval_runtime": 3.1337,
"eval_samples_per_second": 605.037,
"eval_steps_per_second": 19.147,
"step": 1400
},
{
"epoch": 9.47,
"learning_rate": 5.607476635514019e-07,
"loss": 0.2672,
"step": 1600
},
{
"epoch": 9.47,
"eval_f1": 0.37634159533224454,
"eval_f1_all": [
0.5393794749403341,
0.5859085290482077,
0.06896551724137931,
0.15384615384615385,
0.6564885496183206,
0.12972972972972974,
0.38000000000000006,
0.0,
0.7552715654952076,
0.6600441501103753,
0.41463414634146345,
0.5263157894736842,
0.0,
0.015503875968992248,
0.5658263305322129,
0.10596026490066227,
0.6955848179705655,
0.7085201793721972,
0.13076923076923078,
0.4340836012861736
],
"eval_loss": 0.3051939308643341,
"eval_runtime": 3.2008,
"eval_samples_per_second": 592.358,
"eval_steps_per_second": 18.746,
"step": 1600
}
],
"max_steps": 1690,
"num_train_epochs": 10,
"total_flos": 1.1949029604729552e+16,
"trial_name": null,
"trial_params": null
}