lombardata's picture
Evaluation on the test set completed on 2024_11_04.
2db9804 verified
raw
history blame
32.6 kB
{
"best_metric": 0.3492669463157654,
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-produttoria_binary-probabilities-large-2024_11_03-batch-size64_freeze_probs/checkpoint-10498",
"epoch": 68.0,
"eval_steps": 500,
"global_step": 12308,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_explained_variance": 0.3009445369243622,
"eval_loss": 0.3794757127761841,
"eval_mae": 0.14892756938934326,
"eval_r2": 0.2894136607646942,
"eval_rmse": 0.20668388903141022,
"eval_runtime": 77.6082,
"eval_samples_per_second": 49.531,
"eval_steps_per_second": 0.786,
"learning_rate": 0.001,
"step": 181
},
{
"epoch": 2.0,
"eval_explained_variance": 0.3548472821712494,
"eval_loss": 0.3673744201660156,
"eval_mae": 0.1374034583568573,
"eval_r2": 0.35173478722572327,
"eval_rmse": 0.198309525847435,
"eval_runtime": 76.8329,
"eval_samples_per_second": 50.031,
"eval_steps_per_second": 0.794,
"learning_rate": 0.001,
"step": 362
},
{
"epoch": 2.7624309392265194,
"grad_norm": 0.26878172159194946,
"learning_rate": 0.001,
"loss": 0.4416,
"step": 500
},
{
"epoch": 3.0,
"eval_explained_variance": 0.35690778493881226,
"eval_loss": 0.3671453297138214,
"eval_mae": 0.1413687765598297,
"eval_r2": 0.35207054018974304,
"eval_rmse": 0.198079913854599,
"eval_runtime": 77.6227,
"eval_samples_per_second": 49.522,
"eval_steps_per_second": 0.786,
"learning_rate": 0.001,
"step": 543
},
{
"epoch": 4.0,
"eval_explained_variance": 0.37491223216056824,
"eval_loss": 0.36317145824432373,
"eval_mae": 0.1391323208808899,
"eval_r2": 0.3708474040031433,
"eval_rmse": 0.19521364569664001,
"eval_runtime": 77.1791,
"eval_samples_per_second": 49.806,
"eval_steps_per_second": 0.79,
"learning_rate": 0.001,
"step": 724
},
{
"epoch": 5.0,
"eval_explained_variance": 0.3614368736743927,
"eval_loss": 0.3678734302520752,
"eval_mae": 0.141828715801239,
"eval_r2": 0.3453221321105957,
"eval_rmse": 0.19933271408081055,
"eval_runtime": 79.1374,
"eval_samples_per_second": 48.574,
"eval_steps_per_second": 0.771,
"learning_rate": 0.001,
"step": 905
},
{
"epoch": 5.524861878453039,
"grad_norm": 0.19997762143611908,
"learning_rate": 0.001,
"loss": 0.3813,
"step": 1000
},
{
"epoch": 6.0,
"eval_explained_variance": 0.3743399381637573,
"eval_loss": 0.36250030994415283,
"eval_mae": 0.13803647458553314,
"eval_r2": 0.37177884578704834,
"eval_rmse": 0.19508354365825653,
"eval_runtime": 77.2448,
"eval_samples_per_second": 49.764,
"eval_steps_per_second": 0.79,
"learning_rate": 0.001,
"step": 1086
},
{
"epoch": 7.0,
"eval_explained_variance": 0.38368356227874756,
"eval_loss": 0.36188462376594543,
"eval_mae": 0.1347939670085907,
"eval_r2": 0.3771490454673767,
"eval_rmse": 0.19410446286201477,
"eval_runtime": 77.3383,
"eval_samples_per_second": 49.704,
"eval_steps_per_second": 0.789,
"learning_rate": 0.001,
"step": 1267
},
{
"epoch": 8.0,
"eval_explained_variance": 0.3808881640434265,
"eval_loss": 0.36125126481056213,
"eval_mae": 0.13681310415267944,
"eval_r2": 0.37883251905441284,
"eval_rmse": 0.19354337453842163,
"eval_runtime": 80.1691,
"eval_samples_per_second": 47.949,
"eval_steps_per_second": 0.761,
"learning_rate": 0.001,
"step": 1448
},
{
"epoch": 8.287292817679559,
"grad_norm": 0.1677914261817932,
"learning_rate": 0.001,
"loss": 0.3785,
"step": 1500
},
{
"epoch": 9.0,
"eval_explained_variance": 0.38331514596939087,
"eval_loss": 0.3603876233100891,
"eval_mae": 0.135352224111557,
"eval_r2": 0.3812035918235779,
"eval_rmse": 0.19339486956596375,
"eval_runtime": 77.1781,
"eval_samples_per_second": 49.807,
"eval_steps_per_second": 0.79,
"learning_rate": 0.001,
"step": 1629
},
{
"epoch": 10.0,
"eval_explained_variance": 0.3843615651130676,
"eval_loss": 0.3612792491912842,
"eval_mae": 0.13375206291675568,
"eval_r2": 0.38122493028640747,
"eval_rmse": 0.19321060180664062,
"eval_runtime": 77.2138,
"eval_samples_per_second": 49.784,
"eval_steps_per_second": 0.79,
"learning_rate": 0.001,
"step": 1810
},
{
"epoch": 11.0,
"eval_explained_variance": 0.3856731057167053,
"eval_loss": 0.3603772521018982,
"eval_mae": 0.13226205110549927,
"eval_r2": 0.3844555616378784,
"eval_rmse": 0.19312407076358795,
"eval_runtime": 77.7929,
"eval_samples_per_second": 49.413,
"eval_steps_per_second": 0.784,
"learning_rate": 0.001,
"step": 1991
},
{
"epoch": 11.049723756906078,
"grad_norm": 1.5252280235290527,
"learning_rate": 0.001,
"loss": 0.3743,
"step": 2000
},
{
"epoch": 12.0,
"eval_explained_variance": 0.38444802165031433,
"eval_loss": 0.361823707818985,
"eval_mae": 0.13859649002552032,
"eval_r2": 0.3774065375328064,
"eval_rmse": 0.1941623091697693,
"eval_runtime": 76.0433,
"eval_samples_per_second": 50.55,
"eval_steps_per_second": 0.802,
"learning_rate": 0.001,
"step": 2172
},
{
"epoch": 13.0,
"eval_explained_variance": 0.3894338309764862,
"eval_loss": 0.35931822657585144,
"eval_mae": 0.13433586061000824,
"eval_r2": 0.3875495195388794,
"eval_rmse": 0.1924724578857422,
"eval_runtime": 80.7793,
"eval_samples_per_second": 47.586,
"eval_steps_per_second": 0.755,
"learning_rate": 0.001,
"step": 2353
},
{
"epoch": 13.812154696132596,
"grad_norm": 0.13441379368305206,
"learning_rate": 0.001,
"loss": 0.3732,
"step": 2500
},
{
"epoch": 14.0,
"eval_explained_variance": 0.3862605392932892,
"eval_loss": 0.3604746460914612,
"eval_mae": 0.13521355390548706,
"eval_r2": 0.38306838274002075,
"eval_rmse": 0.19319292902946472,
"eval_runtime": 77.1543,
"eval_samples_per_second": 49.822,
"eval_steps_per_second": 0.791,
"learning_rate": 0.001,
"step": 2534
},
{
"epoch": 15.0,
"eval_explained_variance": 0.3836004436016083,
"eval_loss": 0.36050480604171753,
"eval_mae": 0.13660094141960144,
"eval_r2": 0.3816676735877991,
"eval_rmse": 0.19354429841041565,
"eval_runtime": 78.1543,
"eval_samples_per_second": 49.185,
"eval_steps_per_second": 0.781,
"learning_rate": 0.001,
"step": 2715
},
{
"epoch": 16.0,
"eval_explained_variance": 0.39098840951919556,
"eval_loss": 0.3599933683872223,
"eval_mae": 0.13121920824050903,
"eval_r2": 0.388213574886322,
"eval_rmse": 0.1921611875295639,
"eval_runtime": 78.6199,
"eval_samples_per_second": 48.893,
"eval_steps_per_second": 0.776,
"learning_rate": 0.001,
"step": 2896
},
{
"epoch": 16.574585635359117,
"grad_norm": 0.13405688107013702,
"learning_rate": 0.001,
"loss": 0.3733,
"step": 3000
},
{
"epoch": 17.0,
"eval_explained_variance": 0.38817232847213745,
"eval_loss": 0.3628774583339691,
"eval_mae": 0.13783428072929382,
"eval_r2": 0.38425371050834656,
"eval_rmse": 0.1932491511106491,
"eval_runtime": 80.2352,
"eval_samples_per_second": 47.909,
"eval_steps_per_second": 0.76,
"learning_rate": 0.001,
"step": 3077
},
{
"epoch": 18.0,
"eval_explained_variance": 0.3839576840400696,
"eval_loss": 0.36154037714004517,
"eval_mae": 0.1323489546775818,
"eval_r2": 0.37684857845306396,
"eval_rmse": 0.19430074095726013,
"eval_runtime": 85.0559,
"eval_samples_per_second": 45.194,
"eval_steps_per_second": 0.717,
"learning_rate": 0.001,
"step": 3258
},
{
"epoch": 19.0,
"eval_explained_variance": 0.3911250829696655,
"eval_loss": 0.3594801127910614,
"eval_mae": 0.13296250998973846,
"eval_r2": 0.38950252532958984,
"eval_rmse": 0.19218452274799347,
"eval_runtime": 83.3283,
"eval_samples_per_second": 46.131,
"eval_steps_per_second": 0.732,
"learning_rate": 0.001,
"step": 3439
},
{
"epoch": 19.337016574585636,
"grad_norm": 0.1141289696097374,
"learning_rate": 0.0001,
"loss": 0.3723,
"step": 3500
},
{
"epoch": 20.0,
"eval_explained_variance": 0.4041489064693451,
"eval_loss": 0.3565874397754669,
"eval_mae": 0.13302744925022125,
"eval_r2": 0.40064936876296997,
"eval_rmse": 0.19017010927200317,
"eval_runtime": 81.1931,
"eval_samples_per_second": 47.344,
"eval_steps_per_second": 0.751,
"learning_rate": 0.0001,
"step": 3620
},
{
"epoch": 21.0,
"eval_explained_variance": 0.4089391827583313,
"eval_loss": 0.35486647486686707,
"eval_mae": 0.13062793016433716,
"eval_r2": 0.40758493542671204,
"eval_rmse": 0.18895885348320007,
"eval_runtime": 83.1097,
"eval_samples_per_second": 46.252,
"eval_steps_per_second": 0.734,
"learning_rate": 0.0001,
"step": 3801
},
{
"epoch": 22.0,
"eval_explained_variance": 0.41082069277763367,
"eval_loss": 0.35447388887405396,
"eval_mae": 0.13081100583076477,
"eval_r2": 0.4096067547798157,
"eval_rmse": 0.18863680958747864,
"eval_runtime": 83.0406,
"eval_samples_per_second": 46.291,
"eval_steps_per_second": 0.735,
"learning_rate": 0.0001,
"step": 3982
},
{
"epoch": 22.099447513812155,
"grad_norm": 0.1349981278181076,
"learning_rate": 0.0001,
"loss": 0.3683,
"step": 4000
},
{
"epoch": 23.0,
"eval_explained_variance": 0.4124269485473633,
"eval_loss": 0.3544616997241974,
"eval_mae": 0.13033078610897064,
"eval_r2": 0.411631315946579,
"eval_rmse": 0.18823565542697906,
"eval_runtime": 84.9131,
"eval_samples_per_second": 45.27,
"eval_steps_per_second": 0.718,
"learning_rate": 0.0001,
"step": 4163
},
{
"epoch": 24.0,
"eval_explained_variance": 0.4130716025829315,
"eval_loss": 0.3539991080760956,
"eval_mae": 0.1316699981689453,
"eval_r2": 0.4120980501174927,
"eval_rmse": 0.1881898045539856,
"eval_runtime": 83.8568,
"eval_samples_per_second": 45.84,
"eval_steps_per_second": 0.727,
"learning_rate": 0.0001,
"step": 4344
},
{
"epoch": 24.861878453038674,
"grad_norm": 0.10933272540569305,
"learning_rate": 0.0001,
"loss": 0.3654,
"step": 4500
},
{
"epoch": 25.0,
"eval_explained_variance": 0.4125872850418091,
"eval_loss": 0.3545873463153839,
"eval_mae": 0.12844440340995789,
"eval_r2": 0.41126883029937744,
"eval_rmse": 0.18831981718540192,
"eval_runtime": 83.2028,
"eval_samples_per_second": 46.2,
"eval_steps_per_second": 0.733,
"learning_rate": 0.0001,
"step": 4525
},
{
"epoch": 26.0,
"eval_explained_variance": 0.4165402948856354,
"eval_loss": 0.3529074192047119,
"eval_mae": 0.1263934224843979,
"eval_r2": 0.4154190421104431,
"eval_rmse": 0.18757320940494537,
"eval_runtime": 81.7389,
"eval_samples_per_second": 47.028,
"eval_steps_per_second": 0.746,
"learning_rate": 0.0001,
"step": 4706
},
{
"epoch": 27.0,
"eval_explained_variance": 0.4176566004753113,
"eval_loss": 0.3532767593860626,
"eval_mae": 0.129387766122818,
"eval_r2": 0.41658732295036316,
"eval_rmse": 0.187411367893219,
"eval_runtime": 81.9775,
"eval_samples_per_second": 46.891,
"eval_steps_per_second": 0.744,
"learning_rate": 0.0001,
"step": 4887
},
{
"epoch": 27.624309392265193,
"grad_norm": 0.11634723842144012,
"learning_rate": 0.0001,
"loss": 0.3652,
"step": 5000
},
{
"epoch": 28.0,
"eval_explained_variance": 0.41691651940345764,
"eval_loss": 0.3532498776912689,
"eval_mae": 0.12938687205314636,
"eval_r2": 0.41600102186203003,
"eval_rmse": 0.18755248188972473,
"eval_runtime": 84.518,
"eval_samples_per_second": 45.481,
"eval_steps_per_second": 0.722,
"learning_rate": 0.0001,
"step": 5068
},
{
"epoch": 29.0,
"eval_explained_variance": 0.41915032267570496,
"eval_loss": 0.35306474566459656,
"eval_mae": 0.1302015781402588,
"eval_r2": 0.41835859417915344,
"eval_rmse": 0.1871432662010193,
"eval_runtime": 81.5924,
"eval_samples_per_second": 47.112,
"eval_steps_per_second": 0.748,
"learning_rate": 0.0001,
"step": 5249
},
{
"epoch": 30.0,
"eval_explained_variance": 0.4160480499267578,
"eval_loss": 0.3536038398742676,
"eval_mae": 0.1291646808385849,
"eval_r2": 0.414754718542099,
"eval_rmse": 0.18775980174541473,
"eval_runtime": 81.1332,
"eval_samples_per_second": 47.379,
"eval_steps_per_second": 0.752,
"learning_rate": 0.0001,
"step": 5430
},
{
"epoch": 30.386740331491712,
"grad_norm": 0.12858645617961884,
"learning_rate": 0.0001,
"loss": 0.3628,
"step": 5500
},
{
"epoch": 31.0,
"eval_explained_variance": 0.41752102971076965,
"eval_loss": 0.3530591130256653,
"eval_mae": 0.1267225444316864,
"eval_r2": 0.415239542722702,
"eval_rmse": 0.1876552253961563,
"eval_runtime": 82.6896,
"eval_samples_per_second": 46.487,
"eval_steps_per_second": 0.738,
"learning_rate": 0.0001,
"step": 5611
},
{
"epoch": 32.0,
"eval_explained_variance": 0.4167982339859009,
"eval_loss": 0.3528367877006531,
"eval_mae": 0.12877780199050903,
"eval_r2": 0.4161965548992157,
"eval_rmse": 0.18764065206050873,
"eval_runtime": 81.5086,
"eval_samples_per_second": 47.161,
"eval_steps_per_second": 0.748,
"learning_rate": 0.0001,
"step": 5792
},
{
"epoch": 33.0,
"eval_explained_variance": 0.4230208098888397,
"eval_loss": 0.35152381658554077,
"eval_mae": 0.12729588150978088,
"eval_r2": 0.4225224256515503,
"eval_rmse": 0.18640562891960144,
"eval_runtime": 83.3119,
"eval_samples_per_second": 46.14,
"eval_steps_per_second": 0.732,
"learning_rate": 0.0001,
"step": 5973
},
{
"epoch": 33.149171270718234,
"grad_norm": 0.12355350703001022,
"learning_rate": 0.0001,
"loss": 0.3638,
"step": 6000
},
{
"epoch": 34.0,
"eval_explained_variance": 0.4216320216655731,
"eval_loss": 0.35195404291152954,
"eval_mae": 0.12629321217536926,
"eval_r2": 0.4202421009540558,
"eval_rmse": 0.18677598237991333,
"eval_runtime": 81.9599,
"eval_samples_per_second": 46.901,
"eval_steps_per_second": 0.744,
"learning_rate": 0.0001,
"step": 6154
},
{
"epoch": 35.0,
"eval_explained_variance": 0.42201581597328186,
"eval_loss": 0.35178276896476746,
"eval_mae": 0.12782610952854156,
"eval_r2": 0.42147499322891235,
"eval_rmse": 0.18657900393009186,
"eval_runtime": 82.9654,
"eval_samples_per_second": 46.333,
"eval_steps_per_second": 0.735,
"learning_rate": 0.0001,
"step": 6335
},
{
"epoch": 35.91160220994475,
"grad_norm": 0.12306394428014755,
"learning_rate": 0.0001,
"loss": 0.3618,
"step": 6500
},
{
"epoch": 36.0,
"eval_explained_variance": 0.41956183314323425,
"eval_loss": 0.35231974720954895,
"eval_mae": 0.12849368155002594,
"eval_r2": 0.4192589223384857,
"eval_rmse": 0.18713095784187317,
"eval_runtime": 81.841,
"eval_samples_per_second": 46.969,
"eval_steps_per_second": 0.745,
"learning_rate": 0.0001,
"step": 6516
},
{
"epoch": 37.0,
"eval_explained_variance": 0.4224753677845001,
"eval_loss": 0.3515876829624176,
"eval_mae": 0.12726719677448273,
"eval_r2": 0.4216739237308502,
"eval_rmse": 0.18659605085849762,
"eval_runtime": 81.3618,
"eval_samples_per_second": 47.246,
"eval_steps_per_second": 0.75,
"learning_rate": 0.0001,
"step": 6697
},
{
"epoch": 38.0,
"eval_explained_variance": 0.4183831810951233,
"eval_loss": 0.35274896025657654,
"eval_mae": 0.12742024660110474,
"eval_r2": 0.41570571064949036,
"eval_rmse": 0.1878250390291214,
"eval_runtime": 82.5613,
"eval_samples_per_second": 46.559,
"eval_steps_per_second": 0.739,
"learning_rate": 0.0001,
"step": 6878
},
{
"epoch": 38.67403314917127,
"grad_norm": 0.12681056559085846,
"learning_rate": 0.0001,
"loss": 0.3611,
"step": 7000
},
{
"epoch": 39.0,
"eval_explained_variance": 0.42493724822998047,
"eval_loss": 0.35124146938323975,
"eval_mae": 0.12662582099437714,
"eval_r2": 0.4241558611392975,
"eval_rmse": 0.18624022603034973,
"eval_runtime": 83.3424,
"eval_samples_per_second": 46.123,
"eval_steps_per_second": 0.732,
"learning_rate": 0.0001,
"step": 7059
},
{
"epoch": 40.0,
"eval_explained_variance": 0.42367231845855713,
"eval_loss": 0.35209622979164124,
"eval_mae": 0.13019172847270966,
"eval_r2": 0.4224165081977844,
"eval_rmse": 0.18663105368614197,
"eval_runtime": 82.8483,
"eval_samples_per_second": 46.398,
"eval_steps_per_second": 0.736,
"learning_rate": 0.0001,
"step": 7240
},
{
"epoch": 41.0,
"eval_explained_variance": 0.4274601340293884,
"eval_loss": 0.35067644715309143,
"eval_mae": 0.1265629082918167,
"eval_r2": 0.42641735076904297,
"eval_rmse": 0.18584123253822327,
"eval_runtime": 83.6418,
"eval_samples_per_second": 45.958,
"eval_steps_per_second": 0.729,
"learning_rate": 0.0001,
"step": 7421
},
{
"epoch": 41.43646408839779,
"grad_norm": 0.13862788677215576,
"learning_rate": 0.0001,
"loss": 0.3613,
"step": 7500
},
{
"epoch": 42.0,
"eval_explained_variance": 0.4272458851337433,
"eval_loss": 0.3512935936450958,
"eval_mae": 0.12775851786136627,
"eval_r2": 0.4262687563896179,
"eval_rmse": 0.18596960604190826,
"eval_runtime": 83.9115,
"eval_samples_per_second": 45.81,
"eval_steps_per_second": 0.727,
"learning_rate": 0.0001,
"step": 7602
},
{
"epoch": 43.0,
"eval_explained_variance": 0.4272707402706146,
"eval_loss": 0.3510710895061493,
"eval_mae": 0.12741515040397644,
"eval_r2": 0.42624664306640625,
"eval_rmse": 0.1859511435031891,
"eval_runtime": 82.5626,
"eval_samples_per_second": 46.559,
"eval_steps_per_second": 0.739,
"learning_rate": 0.0001,
"step": 7783
},
{
"epoch": 44.0,
"eval_explained_variance": 0.42821547389030457,
"eval_loss": 0.35139599442481995,
"eval_mae": 0.12441141903400421,
"eval_r2": 0.4265681505203247,
"eval_rmse": 0.18587811291217804,
"eval_runtime": 82.7673,
"eval_samples_per_second": 46.443,
"eval_steps_per_second": 0.737,
"learning_rate": 0.0001,
"step": 7964
},
{
"epoch": 44.19889502762431,
"grad_norm": 0.15475843846797943,
"learning_rate": 0.0001,
"loss": 0.3603,
"step": 8000
},
{
"epoch": 45.0,
"eval_explained_variance": 0.42755335569381714,
"eval_loss": 0.35247302055358887,
"eval_mae": 0.127328023314476,
"eval_r2": 0.42492759227752686,
"eval_rmse": 0.18626871705055237,
"eval_runtime": 84.2463,
"eval_samples_per_second": 45.628,
"eval_steps_per_second": 0.724,
"learning_rate": 0.0001,
"step": 8145
},
{
"epoch": 46.0,
"eval_explained_variance": 0.4285746216773987,
"eval_loss": 0.3505423069000244,
"eval_mae": 0.12581512331962585,
"eval_r2": 0.4274958372116089,
"eval_rmse": 0.18559609353542328,
"eval_runtime": 82.9445,
"eval_samples_per_second": 46.344,
"eval_steps_per_second": 0.735,
"learning_rate": 0.0001,
"step": 8326
},
{
"epoch": 46.96132596685083,
"grad_norm": 0.172088161110878,
"learning_rate": 0.0001,
"loss": 0.3603,
"step": 8500
},
{
"epoch": 47.0,
"eval_explained_variance": 0.42584168910980225,
"eval_loss": 0.3517468571662903,
"eval_mae": 0.1250177025794983,
"eval_r2": 0.4231443405151367,
"eval_rmse": 0.18658187985420227,
"eval_runtime": 84.7488,
"eval_samples_per_second": 45.358,
"eval_steps_per_second": 0.72,
"learning_rate": 0.0001,
"step": 8507
},
{
"epoch": 48.0,
"eval_explained_variance": 0.4292495548725128,
"eval_loss": 0.35043978691101074,
"eval_mae": 0.12591718137264252,
"eval_r2": 0.42857199907302856,
"eval_rmse": 0.18564504384994507,
"eval_runtime": 80.6864,
"eval_samples_per_second": 47.641,
"eval_steps_per_second": 0.756,
"learning_rate": 0.0001,
"step": 8688
},
{
"epoch": 49.0,
"eval_explained_variance": 0.42835286259651184,
"eval_loss": 0.35074281692504883,
"eval_mae": 0.12717720866203308,
"eval_r2": 0.4274061322212219,
"eval_rmse": 0.1857146918773651,
"eval_runtime": 81.6357,
"eval_samples_per_second": 47.087,
"eval_steps_per_second": 0.747,
"learning_rate": 0.0001,
"step": 8869
},
{
"epoch": 49.72375690607735,
"grad_norm": 0.17033293843269348,
"learning_rate": 0.0001,
"loss": 0.3604,
"step": 9000
},
{
"epoch": 50.0,
"eval_explained_variance": 0.42889854311943054,
"eval_loss": 0.3515849709510803,
"eval_mae": 0.1283276230096817,
"eval_r2": 0.42797213792800903,
"eval_rmse": 0.1857057511806488,
"eval_runtime": 82.7487,
"eval_samples_per_second": 46.454,
"eval_steps_per_second": 0.737,
"learning_rate": 0.0001,
"step": 9050
},
{
"epoch": 51.0,
"eval_explained_variance": 0.4282180070877075,
"eval_loss": 0.35289809107780457,
"eval_mae": 0.1288221776485443,
"eval_r2": 0.42265036702156067,
"eval_rmse": 0.1866857409477234,
"eval_runtime": 83.8305,
"eval_samples_per_second": 45.854,
"eval_steps_per_second": 0.728,
"learning_rate": 0.0001,
"step": 9231
},
{
"epoch": 52.0,
"eval_explained_variance": 0.42951008677482605,
"eval_loss": 0.3505743443965912,
"eval_mae": 0.12677451968193054,
"eval_r2": 0.4281761944293976,
"eval_rmse": 0.18569740653038025,
"eval_runtime": 83.1138,
"eval_samples_per_second": 46.25,
"eval_steps_per_second": 0.734,
"learning_rate": 0.0001,
"step": 9412
},
{
"epoch": 52.48618784530387,
"grad_norm": 0.19461286067962646,
"learning_rate": 0.0001,
"loss": 0.3592,
"step": 9500
},
{
"epoch": 53.0,
"eval_explained_variance": 0.4302181005477905,
"eval_loss": 0.35052910447120667,
"eval_mae": 0.1273086667060852,
"eval_r2": 0.4285990595817566,
"eval_rmse": 0.18561594188213348,
"eval_runtime": 82.8342,
"eval_samples_per_second": 46.406,
"eval_steps_per_second": 0.736,
"learning_rate": 0.0001,
"step": 9593
},
{
"epoch": 54.0,
"eval_explained_variance": 0.43035173416137695,
"eval_loss": 0.35016006231307983,
"eval_mae": 0.12655657529830933,
"eval_r2": 0.4299810826778412,
"eval_rmse": 0.1853920817375183,
"eval_runtime": 83.2383,
"eval_samples_per_second": 46.181,
"eval_steps_per_second": 0.733,
"learning_rate": 0.0001,
"step": 9774
},
{
"epoch": 55.0,
"eval_explained_variance": 0.4318656921386719,
"eval_loss": 0.35006165504455566,
"eval_mae": 0.12509843707084656,
"eval_r2": 0.42986157536506653,
"eval_rmse": 0.18541744351387024,
"eval_runtime": 82.8153,
"eval_samples_per_second": 46.417,
"eval_steps_per_second": 0.737,
"learning_rate": 0.0001,
"step": 9955
},
{
"epoch": 55.248618784530386,
"grad_norm": 0.179665207862854,
"learning_rate": 0.0001,
"loss": 0.3601,
"step": 10000
},
{
"epoch": 56.0,
"eval_explained_variance": 0.42942577600479126,
"eval_loss": 0.35072585940361023,
"eval_mae": 0.12430255115032196,
"eval_r2": 0.4273306131362915,
"eval_rmse": 0.18582786619663239,
"eval_runtime": 83.8049,
"eval_samples_per_second": 45.868,
"eval_steps_per_second": 0.728,
"learning_rate": 0.0001,
"step": 10136
},
{
"epoch": 57.0,
"eval_explained_variance": 0.42968177795410156,
"eval_loss": 0.3508891463279724,
"eval_mae": 0.12534378468990326,
"eval_r2": 0.4273567497730255,
"eval_rmse": 0.18598994612693787,
"eval_runtime": 86.999,
"eval_samples_per_second": 44.184,
"eval_steps_per_second": 0.701,
"learning_rate": 0.0001,
"step": 10317
},
{
"epoch": 58.0,
"eval_explained_variance": 0.43535250425338745,
"eval_loss": 0.3492669463157654,
"eval_mae": 0.12510134279727936,
"eval_r2": 0.4338167607784271,
"eval_rmse": 0.1846422404050827,
"eval_runtime": 84.7392,
"eval_samples_per_second": 45.363,
"eval_steps_per_second": 0.72,
"learning_rate": 0.0001,
"step": 10498
},
{
"epoch": 58.011049723756905,
"grad_norm": 0.23822635412216187,
"learning_rate": 0.0001,
"loss": 0.3601,
"step": 10500
},
{
"epoch": 59.0,
"eval_explained_variance": 0.42991289496421814,
"eval_loss": 0.3500733971595764,
"eval_mae": 0.12414979934692383,
"eval_r2": 0.42818644642829895,
"eval_rmse": 0.18548892438411713,
"eval_runtime": 83.0713,
"eval_samples_per_second": 46.274,
"eval_steps_per_second": 0.734,
"learning_rate": 0.0001,
"step": 10679
},
{
"epoch": 60.0,
"eval_explained_variance": 0.43251222372055054,
"eval_loss": 0.350059449672699,
"eval_mae": 0.12591439485549927,
"eval_r2": 0.43032628297805786,
"eval_rmse": 0.18521927297115326,
"eval_runtime": 82.5828,
"eval_samples_per_second": 46.547,
"eval_steps_per_second": 0.739,
"learning_rate": 0.0001,
"step": 10860
},
{
"epoch": 60.773480662983424,
"grad_norm": 0.2104698270559311,
"learning_rate": 0.0001,
"loss": 0.3588,
"step": 11000
},
{
"epoch": 61.0,
"eval_explained_variance": 0.4309556186199188,
"eval_loss": 0.34978389739990234,
"eval_mae": 0.126389279961586,
"eval_r2": 0.43050628900527954,
"eval_rmse": 0.18503333628177643,
"eval_runtime": 84.1032,
"eval_samples_per_second": 45.706,
"eval_steps_per_second": 0.725,
"learning_rate": 0.0001,
"step": 11041
},
{
"epoch": 62.0,
"eval_explained_variance": 0.43332165479660034,
"eval_loss": 0.34984564781188965,
"eval_mae": 0.1265084147453308,
"eval_r2": 0.4322562515735626,
"eval_rmse": 0.18499605357646942,
"eval_runtime": 83.3683,
"eval_samples_per_second": 46.109,
"eval_steps_per_second": 0.732,
"learning_rate": 0.0001,
"step": 11222
},
{
"epoch": 63.0,
"eval_explained_variance": 0.4338870644569397,
"eval_loss": 0.35018646717071533,
"eval_mae": 0.1270289421081543,
"eval_r2": 0.4321424067020416,
"eval_rmse": 0.18513011932373047,
"eval_runtime": 82.3267,
"eval_samples_per_second": 46.692,
"eval_steps_per_second": 0.741,
"learning_rate": 0.0001,
"step": 11403
},
{
"epoch": 63.53591160220994,
"grad_norm": 0.18755941092967987,
"learning_rate": 0.0001,
"loss": 0.3579,
"step": 11500
},
{
"epoch": 64.0,
"eval_explained_variance": 0.43124625086784363,
"eval_loss": 0.34996479749679565,
"eval_mae": 0.12558279931545258,
"eval_r2": 0.43004974722862244,
"eval_rmse": 0.1853456199169159,
"eval_runtime": 82.1284,
"eval_samples_per_second": 46.805,
"eval_steps_per_second": 0.743,
"learning_rate": 0.0001,
"step": 11584
},
{
"epoch": 65.0,
"eval_explained_variance": 0.4304056167602539,
"eval_loss": 0.3501463234424591,
"eval_mae": 0.1280103474855423,
"eval_r2": 0.42989540100097656,
"eval_rmse": 0.1853969395160675,
"eval_runtime": 83.5593,
"eval_samples_per_second": 46.003,
"eval_steps_per_second": 0.73,
"learning_rate": 1e-05,
"step": 11765
},
{
"epoch": 66.0,
"eval_explained_variance": 0.43423348665237427,
"eval_loss": 0.34930846095085144,
"eval_mae": 0.1253172904253006,
"eval_r2": 0.43362313508987427,
"eval_rmse": 0.1847212016582489,
"eval_runtime": 82.5157,
"eval_samples_per_second": 46.585,
"eval_steps_per_second": 0.739,
"learning_rate": 1e-05,
"step": 11946
},
{
"epoch": 66.29834254143647,
"grad_norm": 0.23534314334392548,
"learning_rate": 1e-05,
"loss": 0.3564,
"step": 12000
},
{
"epoch": 67.0,
"eval_explained_variance": 0.43399736285209656,
"eval_loss": 0.3493542969226837,
"eval_mae": 0.12613731622695923,
"eval_r2": 0.43344247341156006,
"eval_rmse": 0.18472003936767578,
"eval_runtime": 83.448,
"eval_samples_per_second": 46.065,
"eval_steps_per_second": 0.731,
"learning_rate": 1e-05,
"step": 12127
},
{
"epoch": 68.0,
"eval_explained_variance": 0.4307097792625427,
"eval_loss": 0.3500206172466278,
"eval_mae": 0.12607118487358093,
"eval_r2": 0.4291488826274872,
"eval_rmse": 0.18558326363563538,
"eval_runtime": 81.73,
"eval_samples_per_second": 47.033,
"eval_steps_per_second": 0.746,
"learning_rate": 1e-05,
"step": 12308
},
{
"epoch": 68.0,
"learning_rate": 1e-05,
"step": 12308,
"total_flos": 1.159646636554683e+20,
"train_loss": 0.36796005863937264,
"train_runtime": 25085.7825,
"train_samples_per_second": 68.896,
"train_steps_per_second": 1.082
}
],
"logging_steps": 500,
"max_steps": 27150,
"num_input_tokens_seen": 0,
"num_train_epochs": 150,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.159646636554683e+20,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}