{ "best_metric": 0.3492669463157654, "best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-produttoria_binary-probabilities-large-2024_11_03-batch-size64_freeze_probs/checkpoint-10498", "epoch": 68.0, "eval_steps": 500, "global_step": 12308, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_explained_variance": 0.3009445369243622, "eval_loss": 0.3794757127761841, "eval_mae": 0.14892756938934326, "eval_r2": 0.2894136607646942, "eval_rmse": 0.20668388903141022, "eval_runtime": 77.6082, "eval_samples_per_second": 49.531, "eval_steps_per_second": 0.786, "learning_rate": 0.001, "step": 181 }, { "epoch": 2.0, "eval_explained_variance": 0.3548472821712494, "eval_loss": 0.3673744201660156, "eval_mae": 0.1374034583568573, "eval_r2": 0.35173478722572327, "eval_rmse": 0.198309525847435, "eval_runtime": 76.8329, "eval_samples_per_second": 50.031, "eval_steps_per_second": 0.794, "learning_rate": 0.001, "step": 362 }, { "epoch": 2.7624309392265194, "grad_norm": 0.26878172159194946, "learning_rate": 0.001, "loss": 0.4416, "step": 500 }, { "epoch": 3.0, "eval_explained_variance": 0.35690778493881226, "eval_loss": 0.3671453297138214, "eval_mae": 0.1413687765598297, "eval_r2": 0.35207054018974304, "eval_rmse": 0.198079913854599, "eval_runtime": 77.6227, "eval_samples_per_second": 49.522, "eval_steps_per_second": 0.786, "learning_rate": 0.001, "step": 543 }, { "epoch": 4.0, "eval_explained_variance": 0.37491223216056824, "eval_loss": 0.36317145824432373, "eval_mae": 0.1391323208808899, "eval_r2": 0.3708474040031433, "eval_rmse": 0.19521364569664001, "eval_runtime": 77.1791, "eval_samples_per_second": 49.806, "eval_steps_per_second": 0.79, "learning_rate": 0.001, "step": 724 }, { "epoch": 5.0, "eval_explained_variance": 0.3614368736743927, "eval_loss": 0.3678734302520752, "eval_mae": 0.141828715801239, "eval_r2": 0.3453221321105957, "eval_rmse": 0.19933271408081055, "eval_runtime": 79.1374, "eval_samples_per_second": 48.574, "eval_steps_per_second": 0.771, "learning_rate": 0.001, "step": 905 }, { "epoch": 5.524861878453039, "grad_norm": 0.19997762143611908, "learning_rate": 0.001, "loss": 0.3813, "step": 1000 }, { "epoch": 6.0, "eval_explained_variance": 0.3743399381637573, "eval_loss": 0.36250030994415283, "eval_mae": 0.13803647458553314, "eval_r2": 0.37177884578704834, "eval_rmse": 0.19508354365825653, "eval_runtime": 77.2448, "eval_samples_per_second": 49.764, "eval_steps_per_second": 0.79, "learning_rate": 0.001, "step": 1086 }, { "epoch": 7.0, "eval_explained_variance": 0.38368356227874756, "eval_loss": 0.36188462376594543, "eval_mae": 0.1347939670085907, "eval_r2": 0.3771490454673767, "eval_rmse": 0.19410446286201477, "eval_runtime": 77.3383, "eval_samples_per_second": 49.704, "eval_steps_per_second": 0.789, "learning_rate": 0.001, "step": 1267 }, { "epoch": 8.0, "eval_explained_variance": 0.3808881640434265, "eval_loss": 0.36125126481056213, "eval_mae": 0.13681310415267944, "eval_r2": 0.37883251905441284, "eval_rmse": 0.19354337453842163, "eval_runtime": 80.1691, "eval_samples_per_second": 47.949, "eval_steps_per_second": 0.761, "learning_rate": 0.001, "step": 1448 }, { "epoch": 8.287292817679559, "grad_norm": 0.1677914261817932, "learning_rate": 0.001, "loss": 0.3785, "step": 1500 }, { "epoch": 9.0, "eval_explained_variance": 0.38331514596939087, "eval_loss": 0.3603876233100891, "eval_mae": 0.135352224111557, "eval_r2": 0.3812035918235779, "eval_rmse": 0.19339486956596375, "eval_runtime": 77.1781, "eval_samples_per_second": 49.807, "eval_steps_per_second": 0.79, "learning_rate": 0.001, "step": 1629 }, { "epoch": 10.0, "eval_explained_variance": 0.3843615651130676, "eval_loss": 0.3612792491912842, "eval_mae": 0.13375206291675568, "eval_r2": 0.38122493028640747, "eval_rmse": 0.19321060180664062, "eval_runtime": 77.2138, "eval_samples_per_second": 49.784, "eval_steps_per_second": 0.79, "learning_rate": 0.001, "step": 1810 }, { "epoch": 11.0, "eval_explained_variance": 0.3856731057167053, "eval_loss": 0.3603772521018982, "eval_mae": 0.13226205110549927, "eval_r2": 0.3844555616378784, "eval_rmse": 0.19312407076358795, "eval_runtime": 77.7929, "eval_samples_per_second": 49.413, "eval_steps_per_second": 0.784, "learning_rate": 0.001, "step": 1991 }, { "epoch": 11.049723756906078, "grad_norm": 1.5252280235290527, "learning_rate": 0.001, "loss": 0.3743, "step": 2000 }, { "epoch": 12.0, "eval_explained_variance": 0.38444802165031433, "eval_loss": 0.361823707818985, "eval_mae": 0.13859649002552032, "eval_r2": 0.3774065375328064, "eval_rmse": 0.1941623091697693, "eval_runtime": 76.0433, "eval_samples_per_second": 50.55, "eval_steps_per_second": 0.802, "learning_rate": 0.001, "step": 2172 }, { "epoch": 13.0, "eval_explained_variance": 0.3894338309764862, "eval_loss": 0.35931822657585144, "eval_mae": 0.13433586061000824, "eval_r2": 0.3875495195388794, "eval_rmse": 0.1924724578857422, "eval_runtime": 80.7793, "eval_samples_per_second": 47.586, "eval_steps_per_second": 0.755, "learning_rate": 0.001, "step": 2353 }, { "epoch": 13.812154696132596, "grad_norm": 0.13441379368305206, "learning_rate": 0.001, "loss": 0.3732, "step": 2500 }, { "epoch": 14.0, "eval_explained_variance": 0.3862605392932892, "eval_loss": 0.3604746460914612, "eval_mae": 0.13521355390548706, "eval_r2": 0.38306838274002075, "eval_rmse": 0.19319292902946472, "eval_runtime": 77.1543, "eval_samples_per_second": 49.822, "eval_steps_per_second": 0.791, "learning_rate": 0.001, "step": 2534 }, { "epoch": 15.0, "eval_explained_variance": 0.3836004436016083, "eval_loss": 0.36050480604171753, "eval_mae": 0.13660094141960144, "eval_r2": 0.3816676735877991, "eval_rmse": 0.19354429841041565, "eval_runtime": 78.1543, "eval_samples_per_second": 49.185, "eval_steps_per_second": 0.781, "learning_rate": 0.001, "step": 2715 }, { "epoch": 16.0, "eval_explained_variance": 0.39098840951919556, "eval_loss": 0.3599933683872223, "eval_mae": 0.13121920824050903, "eval_r2": 0.388213574886322, "eval_rmse": 0.1921611875295639, "eval_runtime": 78.6199, "eval_samples_per_second": 48.893, "eval_steps_per_second": 0.776, "learning_rate": 0.001, "step": 2896 }, { "epoch": 16.574585635359117, "grad_norm": 0.13405688107013702, "learning_rate": 0.001, "loss": 0.3733, "step": 3000 }, { "epoch": 17.0, "eval_explained_variance": 0.38817232847213745, "eval_loss": 0.3628774583339691, "eval_mae": 0.13783428072929382, "eval_r2": 0.38425371050834656, "eval_rmse": 0.1932491511106491, "eval_runtime": 80.2352, "eval_samples_per_second": 47.909, "eval_steps_per_second": 0.76, "learning_rate": 0.001, "step": 3077 }, { "epoch": 18.0, "eval_explained_variance": 0.3839576840400696, "eval_loss": 0.36154037714004517, "eval_mae": 0.1323489546775818, "eval_r2": 0.37684857845306396, "eval_rmse": 0.19430074095726013, "eval_runtime": 85.0559, "eval_samples_per_second": 45.194, "eval_steps_per_second": 0.717, "learning_rate": 0.001, "step": 3258 }, { "epoch": 19.0, "eval_explained_variance": 0.3911250829696655, "eval_loss": 0.3594801127910614, "eval_mae": 0.13296250998973846, "eval_r2": 0.38950252532958984, "eval_rmse": 0.19218452274799347, "eval_runtime": 83.3283, "eval_samples_per_second": 46.131, "eval_steps_per_second": 0.732, "learning_rate": 0.001, "step": 3439 }, { "epoch": 19.337016574585636, "grad_norm": 0.1141289696097374, "learning_rate": 0.0001, "loss": 0.3723, "step": 3500 }, { "epoch": 20.0, "eval_explained_variance": 0.4041489064693451, "eval_loss": 0.3565874397754669, "eval_mae": 0.13302744925022125, "eval_r2": 0.40064936876296997, "eval_rmse": 0.19017010927200317, "eval_runtime": 81.1931, "eval_samples_per_second": 47.344, "eval_steps_per_second": 0.751, "learning_rate": 0.0001, "step": 3620 }, { "epoch": 21.0, "eval_explained_variance": 0.4089391827583313, "eval_loss": 0.35486647486686707, "eval_mae": 0.13062793016433716, "eval_r2": 0.40758493542671204, "eval_rmse": 0.18895885348320007, "eval_runtime": 83.1097, "eval_samples_per_second": 46.252, "eval_steps_per_second": 0.734, "learning_rate": 0.0001, "step": 3801 }, { "epoch": 22.0, "eval_explained_variance": 0.41082069277763367, "eval_loss": 0.35447388887405396, "eval_mae": 0.13081100583076477, "eval_r2": 0.4096067547798157, "eval_rmse": 0.18863680958747864, "eval_runtime": 83.0406, "eval_samples_per_second": 46.291, "eval_steps_per_second": 0.735, "learning_rate": 0.0001, "step": 3982 }, { "epoch": 22.099447513812155, "grad_norm": 0.1349981278181076, "learning_rate": 0.0001, "loss": 0.3683, "step": 4000 }, { "epoch": 23.0, "eval_explained_variance": 0.4124269485473633, "eval_loss": 0.3544616997241974, "eval_mae": 0.13033078610897064, "eval_r2": 0.411631315946579, "eval_rmse": 0.18823565542697906, "eval_runtime": 84.9131, "eval_samples_per_second": 45.27, "eval_steps_per_second": 0.718, "learning_rate": 0.0001, "step": 4163 }, { "epoch": 24.0, "eval_explained_variance": 0.4130716025829315, "eval_loss": 0.3539991080760956, "eval_mae": 0.1316699981689453, "eval_r2": 0.4120980501174927, "eval_rmse": 0.1881898045539856, "eval_runtime": 83.8568, "eval_samples_per_second": 45.84, "eval_steps_per_second": 0.727, "learning_rate": 0.0001, "step": 4344 }, { "epoch": 24.861878453038674, "grad_norm": 0.10933272540569305, "learning_rate": 0.0001, "loss": 0.3654, "step": 4500 }, { "epoch": 25.0, "eval_explained_variance": 0.4125872850418091, "eval_loss": 0.3545873463153839, "eval_mae": 0.12844440340995789, "eval_r2": 0.41126883029937744, "eval_rmse": 0.18831981718540192, "eval_runtime": 83.2028, "eval_samples_per_second": 46.2, "eval_steps_per_second": 0.733, "learning_rate": 0.0001, "step": 4525 }, { "epoch": 26.0, "eval_explained_variance": 0.4165402948856354, "eval_loss": 0.3529074192047119, "eval_mae": 0.1263934224843979, "eval_r2": 0.4154190421104431, "eval_rmse": 0.18757320940494537, "eval_runtime": 81.7389, "eval_samples_per_second": 47.028, "eval_steps_per_second": 0.746, "learning_rate": 0.0001, "step": 4706 }, { "epoch": 27.0, "eval_explained_variance": 0.4176566004753113, "eval_loss": 0.3532767593860626, "eval_mae": 0.129387766122818, "eval_r2": 0.41658732295036316, "eval_rmse": 0.187411367893219, "eval_runtime": 81.9775, "eval_samples_per_second": 46.891, "eval_steps_per_second": 0.744, "learning_rate": 0.0001, "step": 4887 }, { "epoch": 27.624309392265193, "grad_norm": 0.11634723842144012, "learning_rate": 0.0001, "loss": 0.3652, "step": 5000 }, { "epoch": 28.0, "eval_explained_variance": 0.41691651940345764, "eval_loss": 0.3532498776912689, "eval_mae": 0.12938687205314636, "eval_r2": 0.41600102186203003, "eval_rmse": 0.18755248188972473, "eval_runtime": 84.518, "eval_samples_per_second": 45.481, "eval_steps_per_second": 0.722, "learning_rate": 0.0001, "step": 5068 }, { "epoch": 29.0, "eval_explained_variance": 0.41915032267570496, "eval_loss": 0.35306474566459656, "eval_mae": 0.1302015781402588, "eval_r2": 0.41835859417915344, "eval_rmse": 0.1871432662010193, "eval_runtime": 81.5924, "eval_samples_per_second": 47.112, "eval_steps_per_second": 0.748, "learning_rate": 0.0001, "step": 5249 }, { "epoch": 30.0, "eval_explained_variance": 0.4160480499267578, "eval_loss": 0.3536038398742676, "eval_mae": 0.1291646808385849, "eval_r2": 0.414754718542099, "eval_rmse": 0.18775980174541473, "eval_runtime": 81.1332, "eval_samples_per_second": 47.379, "eval_steps_per_second": 0.752, "learning_rate": 0.0001, "step": 5430 }, { "epoch": 30.386740331491712, "grad_norm": 0.12858645617961884, "learning_rate": 0.0001, "loss": 0.3628, "step": 5500 }, { "epoch": 31.0, "eval_explained_variance": 0.41752102971076965, "eval_loss": 0.3530591130256653, "eval_mae": 0.1267225444316864, "eval_r2": 0.415239542722702, "eval_rmse": 0.1876552253961563, "eval_runtime": 82.6896, "eval_samples_per_second": 46.487, "eval_steps_per_second": 0.738, "learning_rate": 0.0001, "step": 5611 }, { "epoch": 32.0, "eval_explained_variance": 0.4167982339859009, "eval_loss": 0.3528367877006531, "eval_mae": 0.12877780199050903, "eval_r2": 0.4161965548992157, "eval_rmse": 0.18764065206050873, "eval_runtime": 81.5086, "eval_samples_per_second": 47.161, "eval_steps_per_second": 0.748, "learning_rate": 0.0001, "step": 5792 }, { "epoch": 33.0, "eval_explained_variance": 0.4230208098888397, "eval_loss": 0.35152381658554077, "eval_mae": 0.12729588150978088, "eval_r2": 0.4225224256515503, "eval_rmse": 0.18640562891960144, "eval_runtime": 83.3119, "eval_samples_per_second": 46.14, "eval_steps_per_second": 0.732, "learning_rate": 0.0001, "step": 5973 }, { "epoch": 33.149171270718234, "grad_norm": 0.12355350703001022, "learning_rate": 0.0001, "loss": 0.3638, "step": 6000 }, { "epoch": 34.0, "eval_explained_variance": 0.4216320216655731, "eval_loss": 0.35195404291152954, "eval_mae": 0.12629321217536926, "eval_r2": 0.4202421009540558, "eval_rmse": 0.18677598237991333, "eval_runtime": 81.9599, "eval_samples_per_second": 46.901, "eval_steps_per_second": 0.744, "learning_rate": 0.0001, "step": 6154 }, { "epoch": 35.0, "eval_explained_variance": 0.42201581597328186, "eval_loss": 0.35178276896476746, "eval_mae": 0.12782610952854156, "eval_r2": 0.42147499322891235, "eval_rmse": 0.18657900393009186, "eval_runtime": 82.9654, "eval_samples_per_second": 46.333, "eval_steps_per_second": 0.735, "learning_rate": 0.0001, "step": 6335 }, { "epoch": 35.91160220994475, "grad_norm": 0.12306394428014755, "learning_rate": 0.0001, "loss": 0.3618, "step": 6500 }, { "epoch": 36.0, "eval_explained_variance": 0.41956183314323425, "eval_loss": 0.35231974720954895, "eval_mae": 0.12849368155002594, "eval_r2": 0.4192589223384857, "eval_rmse": 0.18713095784187317, "eval_runtime": 81.841, "eval_samples_per_second": 46.969, "eval_steps_per_second": 0.745, "learning_rate": 0.0001, "step": 6516 }, { "epoch": 37.0, "eval_explained_variance": 0.4224753677845001, "eval_loss": 0.3515876829624176, "eval_mae": 0.12726719677448273, "eval_r2": 0.4216739237308502, "eval_rmse": 0.18659605085849762, "eval_runtime": 81.3618, "eval_samples_per_second": 47.246, "eval_steps_per_second": 0.75, "learning_rate": 0.0001, "step": 6697 }, { "epoch": 38.0, "eval_explained_variance": 0.4183831810951233, "eval_loss": 0.35274896025657654, "eval_mae": 0.12742024660110474, "eval_r2": 0.41570571064949036, "eval_rmse": 0.1878250390291214, "eval_runtime": 82.5613, "eval_samples_per_second": 46.559, "eval_steps_per_second": 0.739, "learning_rate": 0.0001, "step": 6878 }, { "epoch": 38.67403314917127, "grad_norm": 0.12681056559085846, "learning_rate": 0.0001, "loss": 0.3611, "step": 7000 }, { "epoch": 39.0, "eval_explained_variance": 0.42493724822998047, "eval_loss": 0.35124146938323975, "eval_mae": 0.12662582099437714, "eval_r2": 0.4241558611392975, "eval_rmse": 0.18624022603034973, "eval_runtime": 83.3424, "eval_samples_per_second": 46.123, "eval_steps_per_second": 0.732, "learning_rate": 0.0001, "step": 7059 }, { "epoch": 40.0, "eval_explained_variance": 0.42367231845855713, "eval_loss": 0.35209622979164124, "eval_mae": 0.13019172847270966, "eval_r2": 0.4224165081977844, "eval_rmse": 0.18663105368614197, "eval_runtime": 82.8483, "eval_samples_per_second": 46.398, "eval_steps_per_second": 0.736, "learning_rate": 0.0001, "step": 7240 }, { "epoch": 41.0, "eval_explained_variance": 0.4274601340293884, "eval_loss": 0.35067644715309143, "eval_mae": 0.1265629082918167, "eval_r2": 0.42641735076904297, "eval_rmse": 0.18584123253822327, "eval_runtime": 83.6418, "eval_samples_per_second": 45.958, "eval_steps_per_second": 0.729, "learning_rate": 0.0001, "step": 7421 }, { "epoch": 41.43646408839779, "grad_norm": 0.13862788677215576, "learning_rate": 0.0001, "loss": 0.3613, "step": 7500 }, { "epoch": 42.0, "eval_explained_variance": 0.4272458851337433, "eval_loss": 0.3512935936450958, "eval_mae": 0.12775851786136627, "eval_r2": 0.4262687563896179, "eval_rmse": 0.18596960604190826, "eval_runtime": 83.9115, "eval_samples_per_second": 45.81, "eval_steps_per_second": 0.727, "learning_rate": 0.0001, "step": 7602 }, { "epoch": 43.0, "eval_explained_variance": 0.4272707402706146, "eval_loss": 0.3510710895061493, "eval_mae": 0.12741515040397644, "eval_r2": 0.42624664306640625, "eval_rmse": 0.1859511435031891, "eval_runtime": 82.5626, "eval_samples_per_second": 46.559, "eval_steps_per_second": 0.739, "learning_rate": 0.0001, "step": 7783 }, { "epoch": 44.0, "eval_explained_variance": 0.42821547389030457, "eval_loss": 0.35139599442481995, "eval_mae": 0.12441141903400421, "eval_r2": 0.4265681505203247, "eval_rmse": 0.18587811291217804, "eval_runtime": 82.7673, "eval_samples_per_second": 46.443, "eval_steps_per_second": 0.737, "learning_rate": 0.0001, "step": 7964 }, { "epoch": 44.19889502762431, "grad_norm": 0.15475843846797943, "learning_rate": 0.0001, "loss": 0.3603, "step": 8000 }, { "epoch": 45.0, "eval_explained_variance": 0.42755335569381714, "eval_loss": 0.35247302055358887, "eval_mae": 0.127328023314476, "eval_r2": 0.42492759227752686, "eval_rmse": 0.18626871705055237, "eval_runtime": 84.2463, "eval_samples_per_second": 45.628, "eval_steps_per_second": 0.724, "learning_rate": 0.0001, "step": 8145 }, { "epoch": 46.0, "eval_explained_variance": 0.4285746216773987, "eval_loss": 0.3505423069000244, "eval_mae": 0.12581512331962585, "eval_r2": 0.4274958372116089, "eval_rmse": 0.18559609353542328, "eval_runtime": 82.9445, "eval_samples_per_second": 46.344, "eval_steps_per_second": 0.735, "learning_rate": 0.0001, "step": 8326 }, { "epoch": 46.96132596685083, "grad_norm": 0.172088161110878, "learning_rate": 0.0001, "loss": 0.3603, "step": 8500 }, { "epoch": 47.0, "eval_explained_variance": 0.42584168910980225, "eval_loss": 0.3517468571662903, "eval_mae": 0.1250177025794983, "eval_r2": 0.4231443405151367, "eval_rmse": 0.18658187985420227, "eval_runtime": 84.7488, "eval_samples_per_second": 45.358, "eval_steps_per_second": 0.72, "learning_rate": 0.0001, "step": 8507 }, { "epoch": 48.0, "eval_explained_variance": 0.4292495548725128, "eval_loss": 0.35043978691101074, "eval_mae": 0.12591718137264252, "eval_r2": 0.42857199907302856, "eval_rmse": 0.18564504384994507, "eval_runtime": 80.6864, "eval_samples_per_second": 47.641, "eval_steps_per_second": 0.756, "learning_rate": 0.0001, "step": 8688 }, { "epoch": 49.0, "eval_explained_variance": 0.42835286259651184, "eval_loss": 0.35074281692504883, "eval_mae": 0.12717720866203308, "eval_r2": 0.4274061322212219, "eval_rmse": 0.1857146918773651, "eval_runtime": 81.6357, "eval_samples_per_second": 47.087, "eval_steps_per_second": 0.747, "learning_rate": 0.0001, "step": 8869 }, { "epoch": 49.72375690607735, "grad_norm": 0.17033293843269348, "learning_rate": 0.0001, "loss": 0.3604, "step": 9000 }, { "epoch": 50.0, "eval_explained_variance": 0.42889854311943054, "eval_loss": 0.3515849709510803, "eval_mae": 0.1283276230096817, "eval_r2": 0.42797213792800903, "eval_rmse": 0.1857057511806488, "eval_runtime": 82.7487, "eval_samples_per_second": 46.454, "eval_steps_per_second": 0.737, "learning_rate": 0.0001, "step": 9050 }, { "epoch": 51.0, "eval_explained_variance": 0.4282180070877075, "eval_loss": 0.35289809107780457, "eval_mae": 0.1288221776485443, "eval_r2": 0.42265036702156067, "eval_rmse": 0.1866857409477234, "eval_runtime": 83.8305, "eval_samples_per_second": 45.854, "eval_steps_per_second": 0.728, "learning_rate": 0.0001, "step": 9231 }, { "epoch": 52.0, "eval_explained_variance": 0.42951008677482605, "eval_loss": 0.3505743443965912, "eval_mae": 0.12677451968193054, "eval_r2": 0.4281761944293976, "eval_rmse": 0.18569740653038025, "eval_runtime": 83.1138, "eval_samples_per_second": 46.25, "eval_steps_per_second": 0.734, "learning_rate": 0.0001, "step": 9412 }, { "epoch": 52.48618784530387, "grad_norm": 0.19461286067962646, "learning_rate": 0.0001, "loss": 0.3592, "step": 9500 }, { "epoch": 53.0, "eval_explained_variance": 0.4302181005477905, "eval_loss": 0.35052910447120667, "eval_mae": 0.1273086667060852, "eval_r2": 0.4285990595817566, "eval_rmse": 0.18561594188213348, "eval_runtime": 82.8342, "eval_samples_per_second": 46.406, "eval_steps_per_second": 0.736, "learning_rate": 0.0001, "step": 9593 }, { "epoch": 54.0, "eval_explained_variance": 0.43035173416137695, "eval_loss": 0.35016006231307983, "eval_mae": 0.12655657529830933, "eval_r2": 0.4299810826778412, "eval_rmse": 0.1853920817375183, "eval_runtime": 83.2383, "eval_samples_per_second": 46.181, "eval_steps_per_second": 0.733, "learning_rate": 0.0001, "step": 9774 }, { "epoch": 55.0, "eval_explained_variance": 0.4318656921386719, "eval_loss": 0.35006165504455566, "eval_mae": 0.12509843707084656, "eval_r2": 0.42986157536506653, "eval_rmse": 0.18541744351387024, "eval_runtime": 82.8153, "eval_samples_per_second": 46.417, "eval_steps_per_second": 0.737, "learning_rate": 0.0001, "step": 9955 }, { "epoch": 55.248618784530386, "grad_norm": 0.179665207862854, "learning_rate": 0.0001, "loss": 0.3601, "step": 10000 }, { "epoch": 56.0, "eval_explained_variance": 0.42942577600479126, "eval_loss": 0.35072585940361023, "eval_mae": 0.12430255115032196, "eval_r2": 0.4273306131362915, "eval_rmse": 0.18582786619663239, "eval_runtime": 83.8049, "eval_samples_per_second": 45.868, "eval_steps_per_second": 0.728, "learning_rate": 0.0001, "step": 10136 }, { "epoch": 57.0, "eval_explained_variance": 0.42968177795410156, "eval_loss": 0.3508891463279724, "eval_mae": 0.12534378468990326, "eval_r2": 0.4273567497730255, "eval_rmse": 0.18598994612693787, "eval_runtime": 86.999, "eval_samples_per_second": 44.184, "eval_steps_per_second": 0.701, "learning_rate": 0.0001, "step": 10317 }, { "epoch": 58.0, "eval_explained_variance": 0.43535250425338745, "eval_loss": 0.3492669463157654, "eval_mae": 0.12510134279727936, "eval_r2": 0.4338167607784271, "eval_rmse": 0.1846422404050827, "eval_runtime": 84.7392, "eval_samples_per_second": 45.363, "eval_steps_per_second": 0.72, "learning_rate": 0.0001, "step": 10498 }, { "epoch": 58.011049723756905, "grad_norm": 0.23822635412216187, "learning_rate": 0.0001, "loss": 0.3601, "step": 10500 }, { "epoch": 59.0, "eval_explained_variance": 0.42991289496421814, "eval_loss": 0.3500733971595764, "eval_mae": 0.12414979934692383, "eval_r2": 0.42818644642829895, "eval_rmse": 0.18548892438411713, "eval_runtime": 83.0713, "eval_samples_per_second": 46.274, "eval_steps_per_second": 0.734, "learning_rate": 0.0001, "step": 10679 }, { "epoch": 60.0, "eval_explained_variance": 0.43251222372055054, "eval_loss": 0.350059449672699, "eval_mae": 0.12591439485549927, "eval_r2": 0.43032628297805786, "eval_rmse": 0.18521927297115326, "eval_runtime": 82.5828, "eval_samples_per_second": 46.547, "eval_steps_per_second": 0.739, "learning_rate": 0.0001, "step": 10860 }, { "epoch": 60.773480662983424, "grad_norm": 0.2104698270559311, "learning_rate": 0.0001, "loss": 0.3588, "step": 11000 }, { "epoch": 61.0, "eval_explained_variance": 0.4309556186199188, "eval_loss": 0.34978389739990234, "eval_mae": 0.126389279961586, "eval_r2": 0.43050628900527954, "eval_rmse": 0.18503333628177643, "eval_runtime": 84.1032, "eval_samples_per_second": 45.706, "eval_steps_per_second": 0.725, "learning_rate": 0.0001, "step": 11041 }, { "epoch": 62.0, "eval_explained_variance": 0.43332165479660034, "eval_loss": 0.34984564781188965, "eval_mae": 0.1265084147453308, "eval_r2": 0.4322562515735626, "eval_rmse": 0.18499605357646942, "eval_runtime": 83.3683, "eval_samples_per_second": 46.109, "eval_steps_per_second": 0.732, "learning_rate": 0.0001, "step": 11222 }, { "epoch": 63.0, "eval_explained_variance": 0.4338870644569397, "eval_loss": 0.35018646717071533, "eval_mae": 0.1270289421081543, "eval_r2": 0.4321424067020416, "eval_rmse": 0.18513011932373047, "eval_runtime": 82.3267, "eval_samples_per_second": 46.692, "eval_steps_per_second": 0.741, "learning_rate": 0.0001, "step": 11403 }, { "epoch": 63.53591160220994, "grad_norm": 0.18755941092967987, "learning_rate": 0.0001, "loss": 0.3579, "step": 11500 }, { "epoch": 64.0, "eval_explained_variance": 0.43124625086784363, "eval_loss": 0.34996479749679565, "eval_mae": 0.12558279931545258, "eval_r2": 0.43004974722862244, "eval_rmse": 0.1853456199169159, "eval_runtime": 82.1284, "eval_samples_per_second": 46.805, "eval_steps_per_second": 0.743, "learning_rate": 0.0001, "step": 11584 }, { "epoch": 65.0, "eval_explained_variance": 0.4304056167602539, "eval_loss": 0.3501463234424591, "eval_mae": 0.1280103474855423, "eval_r2": 0.42989540100097656, "eval_rmse": 0.1853969395160675, "eval_runtime": 83.5593, "eval_samples_per_second": 46.003, "eval_steps_per_second": 0.73, "learning_rate": 1e-05, "step": 11765 }, { "epoch": 66.0, "eval_explained_variance": 0.43423348665237427, "eval_loss": 0.34930846095085144, "eval_mae": 0.1253172904253006, "eval_r2": 0.43362313508987427, "eval_rmse": 0.1847212016582489, "eval_runtime": 82.5157, "eval_samples_per_second": 46.585, "eval_steps_per_second": 0.739, "learning_rate": 1e-05, "step": 11946 }, { "epoch": 66.29834254143647, "grad_norm": 0.23534314334392548, "learning_rate": 1e-05, "loss": 0.3564, "step": 12000 }, { "epoch": 67.0, "eval_explained_variance": 0.43399736285209656, "eval_loss": 0.3493542969226837, "eval_mae": 0.12613731622695923, "eval_r2": 0.43344247341156006, "eval_rmse": 0.18472003936767578, "eval_runtime": 83.448, "eval_samples_per_second": 46.065, "eval_steps_per_second": 0.731, "learning_rate": 1e-05, "step": 12127 }, { "epoch": 68.0, "eval_explained_variance": 0.4307097792625427, "eval_loss": 0.3500206172466278, "eval_mae": 0.12607118487358093, "eval_r2": 0.4291488826274872, "eval_rmse": 0.18558326363563538, "eval_runtime": 81.73, "eval_samples_per_second": 47.033, "eval_steps_per_second": 0.746, "learning_rate": 1e-05, "step": 12308 }, { "epoch": 68.0, "learning_rate": 1e-05, "step": 12308, "total_flos": 1.159646636554683e+20, "train_loss": 0.36796005863937264, "train_runtime": 25085.7825, "train_samples_per_second": 68.896, "train_steps_per_second": 1.082 } ], "logging_steps": 500, "max_steps": 27150, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.159646636554683e+20, "train_batch_size": 64, "trial_name": null, "trial_params": null }