|
{ |
|
"best_metric": 0.3492669463157654, |
|
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-produttoria_binary-probabilities-large-2024_11_03-batch-size64_freeze_probs/checkpoint-10498", |
|
"epoch": 68.0, |
|
"eval_steps": 500, |
|
"global_step": 12308, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_explained_variance": 0.3009445369243622, |
|
"eval_loss": 0.3794757127761841, |
|
"eval_mae": 0.14892756938934326, |
|
"eval_r2": 0.2894136607646942, |
|
"eval_rmse": 0.20668388903141022, |
|
"eval_runtime": 77.6082, |
|
"eval_samples_per_second": 49.531, |
|
"eval_steps_per_second": 0.786, |
|
"learning_rate": 0.001, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_explained_variance": 0.3548472821712494, |
|
"eval_loss": 0.3673744201660156, |
|
"eval_mae": 0.1374034583568573, |
|
"eval_r2": 0.35173478722572327, |
|
"eval_rmse": 0.198309525847435, |
|
"eval_runtime": 76.8329, |
|
"eval_samples_per_second": 50.031, |
|
"eval_steps_per_second": 0.794, |
|
"learning_rate": 0.001, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.7624309392265194, |
|
"grad_norm": 0.26878172159194946, |
|
"learning_rate": 0.001, |
|
"loss": 0.4416, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_explained_variance": 0.35690778493881226, |
|
"eval_loss": 0.3671453297138214, |
|
"eval_mae": 0.1413687765598297, |
|
"eval_r2": 0.35207054018974304, |
|
"eval_rmse": 0.198079913854599, |
|
"eval_runtime": 77.6227, |
|
"eval_samples_per_second": 49.522, |
|
"eval_steps_per_second": 0.786, |
|
"learning_rate": 0.001, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_explained_variance": 0.37491223216056824, |
|
"eval_loss": 0.36317145824432373, |
|
"eval_mae": 0.1391323208808899, |
|
"eval_r2": 0.3708474040031433, |
|
"eval_rmse": 0.19521364569664001, |
|
"eval_runtime": 77.1791, |
|
"eval_samples_per_second": 49.806, |
|
"eval_steps_per_second": 0.79, |
|
"learning_rate": 0.001, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_explained_variance": 0.3614368736743927, |
|
"eval_loss": 0.3678734302520752, |
|
"eval_mae": 0.141828715801239, |
|
"eval_r2": 0.3453221321105957, |
|
"eval_rmse": 0.19933271408081055, |
|
"eval_runtime": 79.1374, |
|
"eval_samples_per_second": 48.574, |
|
"eval_steps_per_second": 0.771, |
|
"learning_rate": 0.001, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 5.524861878453039, |
|
"grad_norm": 0.19997762143611908, |
|
"learning_rate": 0.001, |
|
"loss": 0.3813, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_explained_variance": 0.3743399381637573, |
|
"eval_loss": 0.36250030994415283, |
|
"eval_mae": 0.13803647458553314, |
|
"eval_r2": 0.37177884578704834, |
|
"eval_rmse": 0.19508354365825653, |
|
"eval_runtime": 77.2448, |
|
"eval_samples_per_second": 49.764, |
|
"eval_steps_per_second": 0.79, |
|
"learning_rate": 0.001, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_explained_variance": 0.38368356227874756, |
|
"eval_loss": 0.36188462376594543, |
|
"eval_mae": 0.1347939670085907, |
|
"eval_r2": 0.3771490454673767, |
|
"eval_rmse": 0.19410446286201477, |
|
"eval_runtime": 77.3383, |
|
"eval_samples_per_second": 49.704, |
|
"eval_steps_per_second": 0.789, |
|
"learning_rate": 0.001, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_explained_variance": 0.3808881640434265, |
|
"eval_loss": 0.36125126481056213, |
|
"eval_mae": 0.13681310415267944, |
|
"eval_r2": 0.37883251905441284, |
|
"eval_rmse": 0.19354337453842163, |
|
"eval_runtime": 80.1691, |
|
"eval_samples_per_second": 47.949, |
|
"eval_steps_per_second": 0.761, |
|
"learning_rate": 0.001, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 8.287292817679559, |
|
"grad_norm": 0.1677914261817932, |
|
"learning_rate": 0.001, |
|
"loss": 0.3785, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_explained_variance": 0.38331514596939087, |
|
"eval_loss": 0.3603876233100891, |
|
"eval_mae": 0.135352224111557, |
|
"eval_r2": 0.3812035918235779, |
|
"eval_rmse": 0.19339486956596375, |
|
"eval_runtime": 77.1781, |
|
"eval_samples_per_second": 49.807, |
|
"eval_steps_per_second": 0.79, |
|
"learning_rate": 0.001, |
|
"step": 1629 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_explained_variance": 0.3843615651130676, |
|
"eval_loss": 0.3612792491912842, |
|
"eval_mae": 0.13375206291675568, |
|
"eval_r2": 0.38122493028640747, |
|
"eval_rmse": 0.19321060180664062, |
|
"eval_runtime": 77.2138, |
|
"eval_samples_per_second": 49.784, |
|
"eval_steps_per_second": 0.79, |
|
"learning_rate": 0.001, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_explained_variance": 0.3856731057167053, |
|
"eval_loss": 0.3603772521018982, |
|
"eval_mae": 0.13226205110549927, |
|
"eval_r2": 0.3844555616378784, |
|
"eval_rmse": 0.19312407076358795, |
|
"eval_runtime": 77.7929, |
|
"eval_samples_per_second": 49.413, |
|
"eval_steps_per_second": 0.784, |
|
"learning_rate": 0.001, |
|
"step": 1991 |
|
}, |
|
{ |
|
"epoch": 11.049723756906078, |
|
"grad_norm": 1.5252280235290527, |
|
"learning_rate": 0.001, |
|
"loss": 0.3743, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_explained_variance": 0.38444802165031433, |
|
"eval_loss": 0.361823707818985, |
|
"eval_mae": 0.13859649002552032, |
|
"eval_r2": 0.3774065375328064, |
|
"eval_rmse": 0.1941623091697693, |
|
"eval_runtime": 76.0433, |
|
"eval_samples_per_second": 50.55, |
|
"eval_steps_per_second": 0.802, |
|
"learning_rate": 0.001, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_explained_variance": 0.3894338309764862, |
|
"eval_loss": 0.35931822657585144, |
|
"eval_mae": 0.13433586061000824, |
|
"eval_r2": 0.3875495195388794, |
|
"eval_rmse": 0.1924724578857422, |
|
"eval_runtime": 80.7793, |
|
"eval_samples_per_second": 47.586, |
|
"eval_steps_per_second": 0.755, |
|
"learning_rate": 0.001, |
|
"step": 2353 |
|
}, |
|
{ |
|
"epoch": 13.812154696132596, |
|
"grad_norm": 0.13441379368305206, |
|
"learning_rate": 0.001, |
|
"loss": 0.3732, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_explained_variance": 0.3862605392932892, |
|
"eval_loss": 0.3604746460914612, |
|
"eval_mae": 0.13521355390548706, |
|
"eval_r2": 0.38306838274002075, |
|
"eval_rmse": 0.19319292902946472, |
|
"eval_runtime": 77.1543, |
|
"eval_samples_per_second": 49.822, |
|
"eval_steps_per_second": 0.791, |
|
"learning_rate": 0.001, |
|
"step": 2534 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_explained_variance": 0.3836004436016083, |
|
"eval_loss": 0.36050480604171753, |
|
"eval_mae": 0.13660094141960144, |
|
"eval_r2": 0.3816676735877991, |
|
"eval_rmse": 0.19354429841041565, |
|
"eval_runtime": 78.1543, |
|
"eval_samples_per_second": 49.185, |
|
"eval_steps_per_second": 0.781, |
|
"learning_rate": 0.001, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_explained_variance": 0.39098840951919556, |
|
"eval_loss": 0.3599933683872223, |
|
"eval_mae": 0.13121920824050903, |
|
"eval_r2": 0.388213574886322, |
|
"eval_rmse": 0.1921611875295639, |
|
"eval_runtime": 78.6199, |
|
"eval_samples_per_second": 48.893, |
|
"eval_steps_per_second": 0.776, |
|
"learning_rate": 0.001, |
|
"step": 2896 |
|
}, |
|
{ |
|
"epoch": 16.574585635359117, |
|
"grad_norm": 0.13405688107013702, |
|
"learning_rate": 0.001, |
|
"loss": 0.3733, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_explained_variance": 0.38817232847213745, |
|
"eval_loss": 0.3628774583339691, |
|
"eval_mae": 0.13783428072929382, |
|
"eval_r2": 0.38425371050834656, |
|
"eval_rmse": 0.1932491511106491, |
|
"eval_runtime": 80.2352, |
|
"eval_samples_per_second": 47.909, |
|
"eval_steps_per_second": 0.76, |
|
"learning_rate": 0.001, |
|
"step": 3077 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_explained_variance": 0.3839576840400696, |
|
"eval_loss": 0.36154037714004517, |
|
"eval_mae": 0.1323489546775818, |
|
"eval_r2": 0.37684857845306396, |
|
"eval_rmse": 0.19430074095726013, |
|
"eval_runtime": 85.0559, |
|
"eval_samples_per_second": 45.194, |
|
"eval_steps_per_second": 0.717, |
|
"learning_rate": 0.001, |
|
"step": 3258 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_explained_variance": 0.3911250829696655, |
|
"eval_loss": 0.3594801127910614, |
|
"eval_mae": 0.13296250998973846, |
|
"eval_r2": 0.38950252532958984, |
|
"eval_rmse": 0.19218452274799347, |
|
"eval_runtime": 83.3283, |
|
"eval_samples_per_second": 46.131, |
|
"eval_steps_per_second": 0.732, |
|
"learning_rate": 0.001, |
|
"step": 3439 |
|
}, |
|
{ |
|
"epoch": 19.337016574585636, |
|
"grad_norm": 0.1141289696097374, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3723, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_explained_variance": 0.4041489064693451, |
|
"eval_loss": 0.3565874397754669, |
|
"eval_mae": 0.13302744925022125, |
|
"eval_r2": 0.40064936876296997, |
|
"eval_rmse": 0.19017010927200317, |
|
"eval_runtime": 81.1931, |
|
"eval_samples_per_second": 47.344, |
|
"eval_steps_per_second": 0.751, |
|
"learning_rate": 0.0001, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_explained_variance": 0.4089391827583313, |
|
"eval_loss": 0.35486647486686707, |
|
"eval_mae": 0.13062793016433716, |
|
"eval_r2": 0.40758493542671204, |
|
"eval_rmse": 0.18895885348320007, |
|
"eval_runtime": 83.1097, |
|
"eval_samples_per_second": 46.252, |
|
"eval_steps_per_second": 0.734, |
|
"learning_rate": 0.0001, |
|
"step": 3801 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_explained_variance": 0.41082069277763367, |
|
"eval_loss": 0.35447388887405396, |
|
"eval_mae": 0.13081100583076477, |
|
"eval_r2": 0.4096067547798157, |
|
"eval_rmse": 0.18863680958747864, |
|
"eval_runtime": 83.0406, |
|
"eval_samples_per_second": 46.291, |
|
"eval_steps_per_second": 0.735, |
|
"learning_rate": 0.0001, |
|
"step": 3982 |
|
}, |
|
{ |
|
"epoch": 22.099447513812155, |
|
"grad_norm": 0.1349981278181076, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3683, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_explained_variance": 0.4124269485473633, |
|
"eval_loss": 0.3544616997241974, |
|
"eval_mae": 0.13033078610897064, |
|
"eval_r2": 0.411631315946579, |
|
"eval_rmse": 0.18823565542697906, |
|
"eval_runtime": 84.9131, |
|
"eval_samples_per_second": 45.27, |
|
"eval_steps_per_second": 0.718, |
|
"learning_rate": 0.0001, |
|
"step": 4163 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_explained_variance": 0.4130716025829315, |
|
"eval_loss": 0.3539991080760956, |
|
"eval_mae": 0.1316699981689453, |
|
"eval_r2": 0.4120980501174927, |
|
"eval_rmse": 0.1881898045539856, |
|
"eval_runtime": 83.8568, |
|
"eval_samples_per_second": 45.84, |
|
"eval_steps_per_second": 0.727, |
|
"learning_rate": 0.0001, |
|
"step": 4344 |
|
}, |
|
{ |
|
"epoch": 24.861878453038674, |
|
"grad_norm": 0.10933272540569305, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3654, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_explained_variance": 0.4125872850418091, |
|
"eval_loss": 0.3545873463153839, |
|
"eval_mae": 0.12844440340995789, |
|
"eval_r2": 0.41126883029937744, |
|
"eval_rmse": 0.18831981718540192, |
|
"eval_runtime": 83.2028, |
|
"eval_samples_per_second": 46.2, |
|
"eval_steps_per_second": 0.733, |
|
"learning_rate": 0.0001, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_explained_variance": 0.4165402948856354, |
|
"eval_loss": 0.3529074192047119, |
|
"eval_mae": 0.1263934224843979, |
|
"eval_r2": 0.4154190421104431, |
|
"eval_rmse": 0.18757320940494537, |
|
"eval_runtime": 81.7389, |
|
"eval_samples_per_second": 47.028, |
|
"eval_steps_per_second": 0.746, |
|
"learning_rate": 0.0001, |
|
"step": 4706 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_explained_variance": 0.4176566004753113, |
|
"eval_loss": 0.3532767593860626, |
|
"eval_mae": 0.129387766122818, |
|
"eval_r2": 0.41658732295036316, |
|
"eval_rmse": 0.187411367893219, |
|
"eval_runtime": 81.9775, |
|
"eval_samples_per_second": 46.891, |
|
"eval_steps_per_second": 0.744, |
|
"learning_rate": 0.0001, |
|
"step": 4887 |
|
}, |
|
{ |
|
"epoch": 27.624309392265193, |
|
"grad_norm": 0.11634723842144012, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3652, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_explained_variance": 0.41691651940345764, |
|
"eval_loss": 0.3532498776912689, |
|
"eval_mae": 0.12938687205314636, |
|
"eval_r2": 0.41600102186203003, |
|
"eval_rmse": 0.18755248188972473, |
|
"eval_runtime": 84.518, |
|
"eval_samples_per_second": 45.481, |
|
"eval_steps_per_second": 0.722, |
|
"learning_rate": 0.0001, |
|
"step": 5068 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_explained_variance": 0.41915032267570496, |
|
"eval_loss": 0.35306474566459656, |
|
"eval_mae": 0.1302015781402588, |
|
"eval_r2": 0.41835859417915344, |
|
"eval_rmse": 0.1871432662010193, |
|
"eval_runtime": 81.5924, |
|
"eval_samples_per_second": 47.112, |
|
"eval_steps_per_second": 0.748, |
|
"learning_rate": 0.0001, |
|
"step": 5249 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_explained_variance": 0.4160480499267578, |
|
"eval_loss": 0.3536038398742676, |
|
"eval_mae": 0.1291646808385849, |
|
"eval_r2": 0.414754718542099, |
|
"eval_rmse": 0.18775980174541473, |
|
"eval_runtime": 81.1332, |
|
"eval_samples_per_second": 47.379, |
|
"eval_steps_per_second": 0.752, |
|
"learning_rate": 0.0001, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 30.386740331491712, |
|
"grad_norm": 0.12858645617961884, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3628, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_explained_variance": 0.41752102971076965, |
|
"eval_loss": 0.3530591130256653, |
|
"eval_mae": 0.1267225444316864, |
|
"eval_r2": 0.415239542722702, |
|
"eval_rmse": 0.1876552253961563, |
|
"eval_runtime": 82.6896, |
|
"eval_samples_per_second": 46.487, |
|
"eval_steps_per_second": 0.738, |
|
"learning_rate": 0.0001, |
|
"step": 5611 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_explained_variance": 0.4167982339859009, |
|
"eval_loss": 0.3528367877006531, |
|
"eval_mae": 0.12877780199050903, |
|
"eval_r2": 0.4161965548992157, |
|
"eval_rmse": 0.18764065206050873, |
|
"eval_runtime": 81.5086, |
|
"eval_samples_per_second": 47.161, |
|
"eval_steps_per_second": 0.748, |
|
"learning_rate": 0.0001, |
|
"step": 5792 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_explained_variance": 0.4230208098888397, |
|
"eval_loss": 0.35152381658554077, |
|
"eval_mae": 0.12729588150978088, |
|
"eval_r2": 0.4225224256515503, |
|
"eval_rmse": 0.18640562891960144, |
|
"eval_runtime": 83.3119, |
|
"eval_samples_per_second": 46.14, |
|
"eval_steps_per_second": 0.732, |
|
"learning_rate": 0.0001, |
|
"step": 5973 |
|
}, |
|
{ |
|
"epoch": 33.149171270718234, |
|
"grad_norm": 0.12355350703001022, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3638, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_explained_variance": 0.4216320216655731, |
|
"eval_loss": 0.35195404291152954, |
|
"eval_mae": 0.12629321217536926, |
|
"eval_r2": 0.4202421009540558, |
|
"eval_rmse": 0.18677598237991333, |
|
"eval_runtime": 81.9599, |
|
"eval_samples_per_second": 46.901, |
|
"eval_steps_per_second": 0.744, |
|
"learning_rate": 0.0001, |
|
"step": 6154 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_explained_variance": 0.42201581597328186, |
|
"eval_loss": 0.35178276896476746, |
|
"eval_mae": 0.12782610952854156, |
|
"eval_r2": 0.42147499322891235, |
|
"eval_rmse": 0.18657900393009186, |
|
"eval_runtime": 82.9654, |
|
"eval_samples_per_second": 46.333, |
|
"eval_steps_per_second": 0.735, |
|
"learning_rate": 0.0001, |
|
"step": 6335 |
|
}, |
|
{ |
|
"epoch": 35.91160220994475, |
|
"grad_norm": 0.12306394428014755, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3618, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_explained_variance": 0.41956183314323425, |
|
"eval_loss": 0.35231974720954895, |
|
"eval_mae": 0.12849368155002594, |
|
"eval_r2": 0.4192589223384857, |
|
"eval_rmse": 0.18713095784187317, |
|
"eval_runtime": 81.841, |
|
"eval_samples_per_second": 46.969, |
|
"eval_steps_per_second": 0.745, |
|
"learning_rate": 0.0001, |
|
"step": 6516 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_explained_variance": 0.4224753677845001, |
|
"eval_loss": 0.3515876829624176, |
|
"eval_mae": 0.12726719677448273, |
|
"eval_r2": 0.4216739237308502, |
|
"eval_rmse": 0.18659605085849762, |
|
"eval_runtime": 81.3618, |
|
"eval_samples_per_second": 47.246, |
|
"eval_steps_per_second": 0.75, |
|
"learning_rate": 0.0001, |
|
"step": 6697 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_explained_variance": 0.4183831810951233, |
|
"eval_loss": 0.35274896025657654, |
|
"eval_mae": 0.12742024660110474, |
|
"eval_r2": 0.41570571064949036, |
|
"eval_rmse": 0.1878250390291214, |
|
"eval_runtime": 82.5613, |
|
"eval_samples_per_second": 46.559, |
|
"eval_steps_per_second": 0.739, |
|
"learning_rate": 0.0001, |
|
"step": 6878 |
|
}, |
|
{ |
|
"epoch": 38.67403314917127, |
|
"grad_norm": 0.12681056559085846, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3611, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_explained_variance": 0.42493724822998047, |
|
"eval_loss": 0.35124146938323975, |
|
"eval_mae": 0.12662582099437714, |
|
"eval_r2": 0.4241558611392975, |
|
"eval_rmse": 0.18624022603034973, |
|
"eval_runtime": 83.3424, |
|
"eval_samples_per_second": 46.123, |
|
"eval_steps_per_second": 0.732, |
|
"learning_rate": 0.0001, |
|
"step": 7059 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_explained_variance": 0.42367231845855713, |
|
"eval_loss": 0.35209622979164124, |
|
"eval_mae": 0.13019172847270966, |
|
"eval_r2": 0.4224165081977844, |
|
"eval_rmse": 0.18663105368614197, |
|
"eval_runtime": 82.8483, |
|
"eval_samples_per_second": 46.398, |
|
"eval_steps_per_second": 0.736, |
|
"learning_rate": 0.0001, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_explained_variance": 0.4274601340293884, |
|
"eval_loss": 0.35067644715309143, |
|
"eval_mae": 0.1265629082918167, |
|
"eval_r2": 0.42641735076904297, |
|
"eval_rmse": 0.18584123253822327, |
|
"eval_runtime": 83.6418, |
|
"eval_samples_per_second": 45.958, |
|
"eval_steps_per_second": 0.729, |
|
"learning_rate": 0.0001, |
|
"step": 7421 |
|
}, |
|
{ |
|
"epoch": 41.43646408839779, |
|
"grad_norm": 0.13862788677215576, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3613, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_explained_variance": 0.4272458851337433, |
|
"eval_loss": 0.3512935936450958, |
|
"eval_mae": 0.12775851786136627, |
|
"eval_r2": 0.4262687563896179, |
|
"eval_rmse": 0.18596960604190826, |
|
"eval_runtime": 83.9115, |
|
"eval_samples_per_second": 45.81, |
|
"eval_steps_per_second": 0.727, |
|
"learning_rate": 0.0001, |
|
"step": 7602 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_explained_variance": 0.4272707402706146, |
|
"eval_loss": 0.3510710895061493, |
|
"eval_mae": 0.12741515040397644, |
|
"eval_r2": 0.42624664306640625, |
|
"eval_rmse": 0.1859511435031891, |
|
"eval_runtime": 82.5626, |
|
"eval_samples_per_second": 46.559, |
|
"eval_steps_per_second": 0.739, |
|
"learning_rate": 0.0001, |
|
"step": 7783 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_explained_variance": 0.42821547389030457, |
|
"eval_loss": 0.35139599442481995, |
|
"eval_mae": 0.12441141903400421, |
|
"eval_r2": 0.4265681505203247, |
|
"eval_rmse": 0.18587811291217804, |
|
"eval_runtime": 82.7673, |
|
"eval_samples_per_second": 46.443, |
|
"eval_steps_per_second": 0.737, |
|
"learning_rate": 0.0001, |
|
"step": 7964 |
|
}, |
|
{ |
|
"epoch": 44.19889502762431, |
|
"grad_norm": 0.15475843846797943, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3603, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_explained_variance": 0.42755335569381714, |
|
"eval_loss": 0.35247302055358887, |
|
"eval_mae": 0.127328023314476, |
|
"eval_r2": 0.42492759227752686, |
|
"eval_rmse": 0.18626871705055237, |
|
"eval_runtime": 84.2463, |
|
"eval_samples_per_second": 45.628, |
|
"eval_steps_per_second": 0.724, |
|
"learning_rate": 0.0001, |
|
"step": 8145 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_explained_variance": 0.4285746216773987, |
|
"eval_loss": 0.3505423069000244, |
|
"eval_mae": 0.12581512331962585, |
|
"eval_r2": 0.4274958372116089, |
|
"eval_rmse": 0.18559609353542328, |
|
"eval_runtime": 82.9445, |
|
"eval_samples_per_second": 46.344, |
|
"eval_steps_per_second": 0.735, |
|
"learning_rate": 0.0001, |
|
"step": 8326 |
|
}, |
|
{ |
|
"epoch": 46.96132596685083, |
|
"grad_norm": 0.172088161110878, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3603, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_explained_variance": 0.42584168910980225, |
|
"eval_loss": 0.3517468571662903, |
|
"eval_mae": 0.1250177025794983, |
|
"eval_r2": 0.4231443405151367, |
|
"eval_rmse": 0.18658187985420227, |
|
"eval_runtime": 84.7488, |
|
"eval_samples_per_second": 45.358, |
|
"eval_steps_per_second": 0.72, |
|
"learning_rate": 0.0001, |
|
"step": 8507 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_explained_variance": 0.4292495548725128, |
|
"eval_loss": 0.35043978691101074, |
|
"eval_mae": 0.12591718137264252, |
|
"eval_r2": 0.42857199907302856, |
|
"eval_rmse": 0.18564504384994507, |
|
"eval_runtime": 80.6864, |
|
"eval_samples_per_second": 47.641, |
|
"eval_steps_per_second": 0.756, |
|
"learning_rate": 0.0001, |
|
"step": 8688 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_explained_variance": 0.42835286259651184, |
|
"eval_loss": 0.35074281692504883, |
|
"eval_mae": 0.12717720866203308, |
|
"eval_r2": 0.4274061322212219, |
|
"eval_rmse": 0.1857146918773651, |
|
"eval_runtime": 81.6357, |
|
"eval_samples_per_second": 47.087, |
|
"eval_steps_per_second": 0.747, |
|
"learning_rate": 0.0001, |
|
"step": 8869 |
|
}, |
|
{ |
|
"epoch": 49.72375690607735, |
|
"grad_norm": 0.17033293843269348, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3604, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_explained_variance": 0.42889854311943054, |
|
"eval_loss": 0.3515849709510803, |
|
"eval_mae": 0.1283276230096817, |
|
"eval_r2": 0.42797213792800903, |
|
"eval_rmse": 0.1857057511806488, |
|
"eval_runtime": 82.7487, |
|
"eval_samples_per_second": 46.454, |
|
"eval_steps_per_second": 0.737, |
|
"learning_rate": 0.0001, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_explained_variance": 0.4282180070877075, |
|
"eval_loss": 0.35289809107780457, |
|
"eval_mae": 0.1288221776485443, |
|
"eval_r2": 0.42265036702156067, |
|
"eval_rmse": 0.1866857409477234, |
|
"eval_runtime": 83.8305, |
|
"eval_samples_per_second": 45.854, |
|
"eval_steps_per_second": 0.728, |
|
"learning_rate": 0.0001, |
|
"step": 9231 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_explained_variance": 0.42951008677482605, |
|
"eval_loss": 0.3505743443965912, |
|
"eval_mae": 0.12677451968193054, |
|
"eval_r2": 0.4281761944293976, |
|
"eval_rmse": 0.18569740653038025, |
|
"eval_runtime": 83.1138, |
|
"eval_samples_per_second": 46.25, |
|
"eval_steps_per_second": 0.734, |
|
"learning_rate": 0.0001, |
|
"step": 9412 |
|
}, |
|
{ |
|
"epoch": 52.48618784530387, |
|
"grad_norm": 0.19461286067962646, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3592, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_explained_variance": 0.4302181005477905, |
|
"eval_loss": 0.35052910447120667, |
|
"eval_mae": 0.1273086667060852, |
|
"eval_r2": 0.4285990595817566, |
|
"eval_rmse": 0.18561594188213348, |
|
"eval_runtime": 82.8342, |
|
"eval_samples_per_second": 46.406, |
|
"eval_steps_per_second": 0.736, |
|
"learning_rate": 0.0001, |
|
"step": 9593 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_explained_variance": 0.43035173416137695, |
|
"eval_loss": 0.35016006231307983, |
|
"eval_mae": 0.12655657529830933, |
|
"eval_r2": 0.4299810826778412, |
|
"eval_rmse": 0.1853920817375183, |
|
"eval_runtime": 83.2383, |
|
"eval_samples_per_second": 46.181, |
|
"eval_steps_per_second": 0.733, |
|
"learning_rate": 0.0001, |
|
"step": 9774 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_explained_variance": 0.4318656921386719, |
|
"eval_loss": 0.35006165504455566, |
|
"eval_mae": 0.12509843707084656, |
|
"eval_r2": 0.42986157536506653, |
|
"eval_rmse": 0.18541744351387024, |
|
"eval_runtime": 82.8153, |
|
"eval_samples_per_second": 46.417, |
|
"eval_steps_per_second": 0.737, |
|
"learning_rate": 0.0001, |
|
"step": 9955 |
|
}, |
|
{ |
|
"epoch": 55.248618784530386, |
|
"grad_norm": 0.179665207862854, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3601, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_explained_variance": 0.42942577600479126, |
|
"eval_loss": 0.35072585940361023, |
|
"eval_mae": 0.12430255115032196, |
|
"eval_r2": 0.4273306131362915, |
|
"eval_rmse": 0.18582786619663239, |
|
"eval_runtime": 83.8049, |
|
"eval_samples_per_second": 45.868, |
|
"eval_steps_per_second": 0.728, |
|
"learning_rate": 0.0001, |
|
"step": 10136 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_explained_variance": 0.42968177795410156, |
|
"eval_loss": 0.3508891463279724, |
|
"eval_mae": 0.12534378468990326, |
|
"eval_r2": 0.4273567497730255, |
|
"eval_rmse": 0.18598994612693787, |
|
"eval_runtime": 86.999, |
|
"eval_samples_per_second": 44.184, |
|
"eval_steps_per_second": 0.701, |
|
"learning_rate": 0.0001, |
|
"step": 10317 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_explained_variance": 0.43535250425338745, |
|
"eval_loss": 0.3492669463157654, |
|
"eval_mae": 0.12510134279727936, |
|
"eval_r2": 0.4338167607784271, |
|
"eval_rmse": 0.1846422404050827, |
|
"eval_runtime": 84.7392, |
|
"eval_samples_per_second": 45.363, |
|
"eval_steps_per_second": 0.72, |
|
"learning_rate": 0.0001, |
|
"step": 10498 |
|
}, |
|
{ |
|
"epoch": 58.011049723756905, |
|
"grad_norm": 0.23822635412216187, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3601, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_explained_variance": 0.42991289496421814, |
|
"eval_loss": 0.3500733971595764, |
|
"eval_mae": 0.12414979934692383, |
|
"eval_r2": 0.42818644642829895, |
|
"eval_rmse": 0.18548892438411713, |
|
"eval_runtime": 83.0713, |
|
"eval_samples_per_second": 46.274, |
|
"eval_steps_per_second": 0.734, |
|
"learning_rate": 0.0001, |
|
"step": 10679 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_explained_variance": 0.43251222372055054, |
|
"eval_loss": 0.350059449672699, |
|
"eval_mae": 0.12591439485549927, |
|
"eval_r2": 0.43032628297805786, |
|
"eval_rmse": 0.18521927297115326, |
|
"eval_runtime": 82.5828, |
|
"eval_samples_per_second": 46.547, |
|
"eval_steps_per_second": 0.739, |
|
"learning_rate": 0.0001, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 60.773480662983424, |
|
"grad_norm": 0.2104698270559311, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3588, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_explained_variance": 0.4309556186199188, |
|
"eval_loss": 0.34978389739990234, |
|
"eval_mae": 0.126389279961586, |
|
"eval_r2": 0.43050628900527954, |
|
"eval_rmse": 0.18503333628177643, |
|
"eval_runtime": 84.1032, |
|
"eval_samples_per_second": 45.706, |
|
"eval_steps_per_second": 0.725, |
|
"learning_rate": 0.0001, |
|
"step": 11041 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_explained_variance": 0.43332165479660034, |
|
"eval_loss": 0.34984564781188965, |
|
"eval_mae": 0.1265084147453308, |
|
"eval_r2": 0.4322562515735626, |
|
"eval_rmse": 0.18499605357646942, |
|
"eval_runtime": 83.3683, |
|
"eval_samples_per_second": 46.109, |
|
"eval_steps_per_second": 0.732, |
|
"learning_rate": 0.0001, |
|
"step": 11222 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_explained_variance": 0.4338870644569397, |
|
"eval_loss": 0.35018646717071533, |
|
"eval_mae": 0.1270289421081543, |
|
"eval_r2": 0.4321424067020416, |
|
"eval_rmse": 0.18513011932373047, |
|
"eval_runtime": 82.3267, |
|
"eval_samples_per_second": 46.692, |
|
"eval_steps_per_second": 0.741, |
|
"learning_rate": 0.0001, |
|
"step": 11403 |
|
}, |
|
{ |
|
"epoch": 63.53591160220994, |
|
"grad_norm": 0.18755941092967987, |
|
"learning_rate": 0.0001, |
|
"loss": 0.3579, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_explained_variance": 0.43124625086784363, |
|
"eval_loss": 0.34996479749679565, |
|
"eval_mae": 0.12558279931545258, |
|
"eval_r2": 0.43004974722862244, |
|
"eval_rmse": 0.1853456199169159, |
|
"eval_runtime": 82.1284, |
|
"eval_samples_per_second": 46.805, |
|
"eval_steps_per_second": 0.743, |
|
"learning_rate": 0.0001, |
|
"step": 11584 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_explained_variance": 0.4304056167602539, |
|
"eval_loss": 0.3501463234424591, |
|
"eval_mae": 0.1280103474855423, |
|
"eval_r2": 0.42989540100097656, |
|
"eval_rmse": 0.1853969395160675, |
|
"eval_runtime": 83.5593, |
|
"eval_samples_per_second": 46.003, |
|
"eval_steps_per_second": 0.73, |
|
"learning_rate": 1e-05, |
|
"step": 11765 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_explained_variance": 0.43423348665237427, |
|
"eval_loss": 0.34930846095085144, |
|
"eval_mae": 0.1253172904253006, |
|
"eval_r2": 0.43362313508987427, |
|
"eval_rmse": 0.1847212016582489, |
|
"eval_runtime": 82.5157, |
|
"eval_samples_per_second": 46.585, |
|
"eval_steps_per_second": 0.739, |
|
"learning_rate": 1e-05, |
|
"step": 11946 |
|
}, |
|
{ |
|
"epoch": 66.29834254143647, |
|
"grad_norm": 0.23534314334392548, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3564, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_explained_variance": 0.43399736285209656, |
|
"eval_loss": 0.3493542969226837, |
|
"eval_mae": 0.12613731622695923, |
|
"eval_r2": 0.43344247341156006, |
|
"eval_rmse": 0.18472003936767578, |
|
"eval_runtime": 83.448, |
|
"eval_samples_per_second": 46.065, |
|
"eval_steps_per_second": 0.731, |
|
"learning_rate": 1e-05, |
|
"step": 12127 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_explained_variance": 0.4307097792625427, |
|
"eval_loss": 0.3500206172466278, |
|
"eval_mae": 0.12607118487358093, |
|
"eval_r2": 0.4291488826274872, |
|
"eval_rmse": 0.18558326363563538, |
|
"eval_runtime": 81.73, |
|
"eval_samples_per_second": 47.033, |
|
"eval_steps_per_second": 0.746, |
|
"learning_rate": 1e-05, |
|
"step": 12308 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 1e-05, |
|
"step": 12308, |
|
"total_flos": 1.159646636554683e+20, |
|
"train_loss": 0.36796005863937264, |
|
"train_runtime": 25085.7825, |
|
"train_samples_per_second": 68.896, |
|
"train_steps_per_second": 1.082 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 27150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 150, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.159646636554683e+20, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|