drone-DinoVdeau-produttoria-probabilities-large-2024_11_04-batch-size64_freeze_probs
/
trainer_state.json
{ | |
"best_metric": 0.31807705760002136, | |
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-produttoria-probabilities-large-2024_11_04-batch-size64_freeze_probs/checkpoint-10679", | |
"epoch": 69.0, | |
"eval_steps": 500, | |
"global_step": 12489, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 1.0, | |
"eval_explained_variance": 0.30368584394454956, | |
"eval_loss": 0.36101797223091125, | |
"eval_mae": 0.18781842291355133, | |
"eval_r2": 0.28179988265037537, | |
"eval_rmse": 0.2644973397254944, | |
"eval_runtime": 77.9895, | |
"eval_samples_per_second": 49.289, | |
"eval_steps_per_second": 0.782, | |
"learning_rate": 0.001, | |
"step": 181 | |
}, | |
{ | |
"epoch": 2.0, | |
"eval_explained_variance": 0.3478943109512329, | |
"eval_loss": 0.3464529514312744, | |
"eval_mae": 0.17783641815185547, | |
"eval_r2": 0.33490002155303955, | |
"eval_rmse": 0.25657808780670166, | |
"eval_runtime": 82.5528, | |
"eval_samples_per_second": 46.564, | |
"eval_steps_per_second": 0.739, | |
"learning_rate": 0.001, | |
"step": 362 | |
}, | |
{ | |
"epoch": 2.7624309392265194, | |
"grad_norm": 0.36520126461982727, | |
"learning_rate": 0.001, | |
"loss": 0.4149, | |
"step": 500 | |
}, | |
{ | |
"epoch": 3.0, | |
"eval_explained_variance": 0.35995176434516907, | |
"eval_loss": 0.34133487939834595, | |
"eval_mae": 0.17307622730731964, | |
"eval_r2": 0.3535836637020111, | |
"eval_rmse": 0.25322917103767395, | |
"eval_runtime": 82.3103, | |
"eval_samples_per_second": 46.701, | |
"eval_steps_per_second": 0.741, | |
"learning_rate": 0.001, | |
"step": 543 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_explained_variance": 0.35911357402801514, | |
"eval_loss": 0.3406243324279785, | |
"eval_mae": 0.17434848845005035, | |
"eval_r2": 0.35186487436294556, | |
"eval_rmse": 0.25316712260246277, | |
"eval_runtime": 82.3198, | |
"eval_samples_per_second": 46.696, | |
"eval_steps_per_second": 0.741, | |
"learning_rate": 0.001, | |
"step": 724 | |
}, | |
{ | |
"epoch": 5.0, | |
"eval_explained_variance": 0.3730728328227997, | |
"eval_loss": 0.3341675400733948, | |
"eval_mae": 0.1660642772912979, | |
"eval_r2": 0.37022241950035095, | |
"eval_rmse": 0.24957779049873352, | |
"eval_runtime": 81.2913, | |
"eval_samples_per_second": 47.287, | |
"eval_steps_per_second": 0.75, | |
"learning_rate": 0.001, | |
"step": 905 | |
}, | |
{ | |
"epoch": 5.524861878453039, | |
"grad_norm": 0.2786722779273987, | |
"learning_rate": 0.001, | |
"loss": 0.3486, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 6.0, | |
"eval_explained_variance": 0.37237703800201416, | |
"eval_loss": 0.33848145604133606, | |
"eval_mae": 0.17393800616264343, | |
"eval_r2": 0.36507394909858704, | |
"eval_rmse": 0.2512134909629822, | |
"eval_runtime": 81.4015, | |
"eval_samples_per_second": 47.223, | |
"eval_steps_per_second": 0.749, | |
"learning_rate": 0.001, | |
"step": 1086 | |
}, | |
{ | |
"epoch": 7.0, | |
"eval_explained_variance": 0.38455089926719666, | |
"eval_loss": 0.3320676386356354, | |
"eval_mae": 0.16495750844478607, | |
"eval_r2": 0.38360098004341125, | |
"eval_rmse": 0.24762088060379028, | |
"eval_runtime": 80.8916, | |
"eval_samples_per_second": 47.52, | |
"eval_steps_per_second": 0.754, | |
"learning_rate": 0.001, | |
"step": 1267 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_explained_variance": 0.38114219903945923, | |
"eval_loss": 0.3332081437110901, | |
"eval_mae": 0.1629333347082138, | |
"eval_r2": 0.38021257519721985, | |
"eval_rmse": 0.24838922917842865, | |
"eval_runtime": 81.8669, | |
"eval_samples_per_second": 46.954, | |
"eval_steps_per_second": 0.745, | |
"learning_rate": 0.001, | |
"step": 1448 | |
}, | |
{ | |
"epoch": 8.287292817679559, | |
"grad_norm": 0.24299436807632446, | |
"learning_rate": 0.001, | |
"loss": 0.3462, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 9.0, | |
"eval_explained_variance": 0.3872137665748596, | |
"eval_loss": 0.3305376172065735, | |
"eval_mae": 0.16518667340278625, | |
"eval_r2": 0.38587409257888794, | |
"eval_rmse": 0.24679909646511078, | |
"eval_runtime": 80.7757, | |
"eval_samples_per_second": 47.589, | |
"eval_steps_per_second": 0.755, | |
"learning_rate": 0.001, | |
"step": 1629 | |
}, | |
{ | |
"epoch": 10.0, | |
"eval_explained_variance": 0.3849840760231018, | |
"eval_loss": 0.33136793971061707, | |
"eval_mae": 0.16550396382808685, | |
"eval_r2": 0.38265591859817505, | |
"eval_rmse": 0.2476043999195099, | |
"eval_runtime": 81.3286, | |
"eval_samples_per_second": 47.265, | |
"eval_steps_per_second": 0.75, | |
"learning_rate": 0.001, | |
"step": 1810 | |
}, | |
{ | |
"epoch": 11.0, | |
"eval_explained_variance": 0.3866196274757385, | |
"eval_loss": 0.3319685757160187, | |
"eval_mae": 0.16017739474773407, | |
"eval_r2": 0.3840087652206421, | |
"eval_rmse": 0.2473573535680771, | |
"eval_runtime": 80.4031, | |
"eval_samples_per_second": 47.809, | |
"eval_steps_per_second": 0.759, | |
"learning_rate": 0.001, | |
"step": 1991 | |
}, | |
{ | |
"epoch": 11.049723756906078, | |
"grad_norm": 0.1871100217103958, | |
"learning_rate": 0.001, | |
"loss": 0.3391, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 12.0, | |
"eval_explained_variance": 0.3842613399028778, | |
"eval_loss": 0.3341500759124756, | |
"eval_mae": 0.16828522086143494, | |
"eval_r2": 0.37614381313323975, | |
"eval_rmse": 0.2493605613708496, | |
"eval_runtime": 85.712, | |
"eval_samples_per_second": 44.848, | |
"eval_steps_per_second": 0.712, | |
"learning_rate": 0.001, | |
"step": 2172 | |
}, | |
{ | |
"epoch": 13.0, | |
"eval_explained_variance": 0.38362985849380493, | |
"eval_loss": 0.33248215913772583, | |
"eval_mae": 0.16487525403499603, | |
"eval_r2": 0.38205400109291077, | |
"eval_rmse": 0.2479550540447235, | |
"eval_runtime": 81.2132, | |
"eval_samples_per_second": 47.332, | |
"eval_steps_per_second": 0.751, | |
"learning_rate": 0.001, | |
"step": 2353 | |
}, | |
{ | |
"epoch": 13.812154696132596, | |
"grad_norm": 0.19864079356193542, | |
"learning_rate": 0.001, | |
"loss": 0.3372, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 14.0, | |
"eval_explained_variance": 0.3930034041404724, | |
"eval_loss": 0.33228376507759094, | |
"eval_mae": 0.17001599073410034, | |
"eval_r2": 0.38782283663749695, | |
"eval_rmse": 0.24722439050674438, | |
"eval_runtime": 81.4831, | |
"eval_samples_per_second": 47.175, | |
"eval_steps_per_second": 0.749, | |
"learning_rate": 0.001, | |
"step": 2534 | |
}, | |
{ | |
"epoch": 15.0, | |
"eval_explained_variance": 0.38280433416366577, | |
"eval_loss": 0.334873229265213, | |
"eval_mae": 0.1702941358089447, | |
"eval_r2": 0.37488964200019836, | |
"eval_rmse": 0.24934227764606476, | |
"eval_runtime": 82.0523, | |
"eval_samples_per_second": 46.848, | |
"eval_steps_per_second": 0.743, | |
"learning_rate": 0.001, | |
"step": 2715 | |
}, | |
{ | |
"epoch": 16.0, | |
"eval_explained_variance": 0.4018595516681671, | |
"eval_loss": 0.3279329538345337, | |
"eval_mae": 0.16485117375850677, | |
"eval_r2": 0.3982715308666229, | |
"eval_rmse": 0.2447740137577057, | |
"eval_runtime": 80.4915, | |
"eval_samples_per_second": 47.757, | |
"eval_steps_per_second": 0.758, | |
"learning_rate": 0.0001, | |
"step": 2896 | |
}, | |
{ | |
"epoch": 16.574585635359117, | |
"grad_norm": 0.18467135727405548, | |
"learning_rate": 0.0001, | |
"loss": 0.3343, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 17.0, | |
"eval_explained_variance": 0.4041018486022949, | |
"eval_loss": 0.3279244005680084, | |
"eval_mae": 0.1647535115480423, | |
"eval_r2": 0.39838364720344543, | |
"eval_rmse": 0.24482907354831696, | |
"eval_runtime": 80.8242, | |
"eval_samples_per_second": 47.56, | |
"eval_steps_per_second": 0.755, | |
"learning_rate": 0.0001, | |
"step": 3077 | |
}, | |
{ | |
"epoch": 18.0, | |
"eval_explained_variance": 0.40324074029922485, | |
"eval_loss": 0.3262367248535156, | |
"eval_mae": 0.16224807500839233, | |
"eval_r2": 0.40254056453704834, | |
"eval_rmse": 0.24403510987758636, | |
"eval_runtime": 79.9167, | |
"eval_samples_per_second": 48.1, | |
"eval_steps_per_second": 0.763, | |
"learning_rate": 0.0001, | |
"step": 3258 | |
}, | |
{ | |
"epoch": 19.0, | |
"eval_explained_variance": 0.4051153361797333, | |
"eval_loss": 0.3247373402118683, | |
"eval_mae": 0.15879866480827332, | |
"eval_r2": 0.4045550227165222, | |
"eval_rmse": 0.24321436882019043, | |
"eval_runtime": 78.6921, | |
"eval_samples_per_second": 48.849, | |
"eval_steps_per_second": 0.775, | |
"learning_rate": 0.0001, | |
"step": 3439 | |
}, | |
{ | |
"epoch": 19.337016574585636, | |
"grad_norm": 0.19419366121292114, | |
"learning_rate": 0.0001, | |
"loss": 0.3271, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 20.0, | |
"eval_explained_variance": 0.41063615679740906, | |
"eval_loss": 0.32612329721450806, | |
"eval_mae": 0.16252389550209045, | |
"eval_r2": 0.4059467911720276, | |
"eval_rmse": 0.24334031343460083, | |
"eval_runtime": 79.0343, | |
"eval_samples_per_second": 48.637, | |
"eval_steps_per_second": 0.772, | |
"learning_rate": 0.0001, | |
"step": 3620 | |
}, | |
{ | |
"epoch": 21.0, | |
"eval_explained_variance": 0.411923885345459, | |
"eval_loss": 0.3241129517555237, | |
"eval_mae": 0.16064995527267456, | |
"eval_r2": 0.4094885587692261, | |
"eval_rmse": 0.2424389272928238, | |
"eval_runtime": 79.7663, | |
"eval_samples_per_second": 48.191, | |
"eval_steps_per_second": 0.765, | |
"learning_rate": 0.0001, | |
"step": 3801 | |
}, | |
{ | |
"epoch": 22.0, | |
"eval_explained_variance": 0.41316869854927063, | |
"eval_loss": 0.32355180382728577, | |
"eval_mae": 0.1587211936712265, | |
"eval_r2": 0.4111122488975525, | |
"eval_rmse": 0.24220238626003265, | |
"eval_runtime": 79.4498, | |
"eval_samples_per_second": 48.383, | |
"eval_steps_per_second": 0.768, | |
"learning_rate": 0.0001, | |
"step": 3982 | |
}, | |
{ | |
"epoch": 22.099447513812155, | |
"grad_norm": 0.1995573192834854, | |
"learning_rate": 0.0001, | |
"loss": 0.3275, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 23.0, | |
"eval_explained_variance": 0.41342437267303467, | |
"eval_loss": 0.3242079019546509, | |
"eval_mae": 0.16008274257183075, | |
"eval_r2": 0.4106961488723755, | |
"eval_rmse": 0.2422674000263214, | |
"eval_runtime": 78.6301, | |
"eval_samples_per_second": 48.887, | |
"eval_steps_per_second": 0.776, | |
"learning_rate": 0.0001, | |
"step": 4163 | |
}, | |
{ | |
"epoch": 24.0, | |
"eval_explained_variance": 0.4160999655723572, | |
"eval_loss": 0.3227241337299347, | |
"eval_mae": 0.15855328738689423, | |
"eval_r2": 0.41498833894729614, | |
"eval_rmse": 0.2413560003042221, | |
"eval_runtime": 78.9117, | |
"eval_samples_per_second": 48.713, | |
"eval_steps_per_second": 0.773, | |
"learning_rate": 0.0001, | |
"step": 4344 | |
}, | |
{ | |
"epoch": 24.861878453038674, | |
"grad_norm": 0.19915054738521576, | |
"learning_rate": 0.0001, | |
"loss": 0.3247, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 25.0, | |
"eval_explained_variance": 0.4161580204963684, | |
"eval_loss": 0.3223778307437897, | |
"eval_mae": 0.1586799919605255, | |
"eval_r2": 0.4147694408893585, | |
"eval_rmse": 0.24130770564079285, | |
"eval_runtime": 79.8112, | |
"eval_samples_per_second": 48.164, | |
"eval_steps_per_second": 0.764, | |
"learning_rate": 0.0001, | |
"step": 4525 | |
}, | |
{ | |
"epoch": 26.0, | |
"eval_explained_variance": 0.4154631793498993, | |
"eval_loss": 0.3217927813529968, | |
"eval_mae": 0.15566809475421906, | |
"eval_r2": 0.4142845571041107, | |
"eval_rmse": 0.24133403599262238, | |
"eval_runtime": 80.3582, | |
"eval_samples_per_second": 47.836, | |
"eval_steps_per_second": 0.759, | |
"learning_rate": 0.0001, | |
"step": 4706 | |
}, | |
{ | |
"epoch": 27.0, | |
"eval_explained_variance": 0.4153793156147003, | |
"eval_loss": 0.3227355182170868, | |
"eval_mae": 0.16026344895362854, | |
"eval_r2": 0.4137920141220093, | |
"eval_rmse": 0.2415631115436554, | |
"eval_runtime": 81.132, | |
"eval_samples_per_second": 47.38, | |
"eval_steps_per_second": 0.752, | |
"learning_rate": 0.0001, | |
"step": 4887 | |
}, | |
{ | |
"epoch": 27.624309392265193, | |
"grad_norm": 0.21681347489356995, | |
"learning_rate": 0.0001, | |
"loss": 0.3231, | |
"step": 5000 | |
}, | |
{ | |
"epoch": 28.0, | |
"eval_explained_variance": 0.41966134309768677, | |
"eval_loss": 0.32067713141441345, | |
"eval_mae": 0.15620459616184235, | |
"eval_r2": 0.4186200499534607, | |
"eval_rmse": 0.24054032564163208, | |
"eval_runtime": 81.32, | |
"eval_samples_per_second": 47.27, | |
"eval_steps_per_second": 0.75, | |
"learning_rate": 0.0001, | |
"step": 5068 | |
}, | |
{ | |
"epoch": 29.0, | |
"eval_explained_variance": 0.4175001382827759, | |
"eval_loss": 0.32205939292907715, | |
"eval_mae": 0.15971842408180237, | |
"eval_r2": 0.41626471281051636, | |
"eval_rmse": 0.24114808440208435, | |
"eval_runtime": 80.4359, | |
"eval_samples_per_second": 47.79, | |
"eval_steps_per_second": 0.758, | |
"learning_rate": 0.0001, | |
"step": 5249 | |
}, | |
{ | |
"epoch": 30.0, | |
"eval_explained_variance": 0.4190256893634796, | |
"eval_loss": 0.32246074080467224, | |
"eval_mae": 0.16082431375980377, | |
"eval_r2": 0.41641533374786377, | |
"eval_rmse": 0.24128668010234833, | |
"eval_runtime": 80.4517, | |
"eval_samples_per_second": 47.78, | |
"eval_steps_per_second": 0.758, | |
"learning_rate": 0.0001, | |
"step": 5430 | |
}, | |
{ | |
"epoch": 30.386740331491712, | |
"grad_norm": 0.20712591707706451, | |
"learning_rate": 0.0001, | |
"loss": 0.3215, | |
"step": 5500 | |
}, | |
{ | |
"epoch": 31.0, | |
"eval_explained_variance": 0.41637617349624634, | |
"eval_loss": 0.3223503530025482, | |
"eval_mae": 0.15350204706192017, | |
"eval_r2": 0.4133574366569519, | |
"eval_rmse": 0.2416228950023651, | |
"eval_runtime": 83.145, | |
"eval_samples_per_second": 46.232, | |
"eval_steps_per_second": 0.734, | |
"learning_rate": 0.0001, | |
"step": 5611 | |
}, | |
{ | |
"epoch": 32.0, | |
"eval_explained_variance": 0.41848501563072205, | |
"eval_loss": 0.3212696313858032, | |
"eval_mae": 0.1552996039390564, | |
"eval_r2": 0.4179571568965912, | |
"eval_rmse": 0.24075190722942352, | |
"eval_runtime": 80.1214, | |
"eval_samples_per_second": 47.977, | |
"eval_steps_per_second": 0.761, | |
"learning_rate": 0.0001, | |
"step": 5792 | |
}, | |
{ | |
"epoch": 33.0, | |
"eval_explained_variance": 0.4142448604106903, | |
"eval_loss": 0.32156360149383545, | |
"eval_mae": 0.15830956399440765, | |
"eval_r2": 0.4123082458972931, | |
"eval_rmse": 0.24135708808898926, | |
"eval_runtime": 80.4898, | |
"eval_samples_per_second": 47.758, | |
"eval_steps_per_second": 0.758, | |
"learning_rate": 0.0001, | |
"step": 5973 | |
}, | |
{ | |
"epoch": 33.149171270718234, | |
"grad_norm": 0.22835813462734222, | |
"learning_rate": 0.0001, | |
"loss": 0.3227, | |
"step": 6000 | |
}, | |
{ | |
"epoch": 34.0, | |
"eval_explained_variance": 0.4181065559387207, | |
"eval_loss": 0.3205103278160095, | |
"eval_mae": 0.15616008639335632, | |
"eval_r2": 0.41721493005752563, | |
"eval_rmse": 0.24057045578956604, | |
"eval_runtime": 80.866, | |
"eval_samples_per_second": 47.535, | |
"eval_steps_per_second": 0.754, | |
"learning_rate": 0.0001, | |
"step": 6154 | |
}, | |
{ | |
"epoch": 35.0, | |
"eval_explained_variance": 0.422443687915802, | |
"eval_loss": 0.3197581171989441, | |
"eval_mae": 0.15349127352237701, | |
"eval_r2": 0.42148736119270325, | |
"eval_rmse": 0.23994095623493195, | |
"eval_runtime": 81.1391, | |
"eval_samples_per_second": 47.375, | |
"eval_steps_per_second": 0.752, | |
"learning_rate": 0.0001, | |
"step": 6335 | |
}, | |
{ | |
"epoch": 35.91160220994475, | |
"grad_norm": 0.18953262269496918, | |
"learning_rate": 0.0001, | |
"loss": 0.3202, | |
"step": 6500 | |
}, | |
{ | |
"epoch": 36.0, | |
"eval_explained_variance": 0.4194072186946869, | |
"eval_loss": 0.3211075961589813, | |
"eval_mae": 0.15770210325717926, | |
"eval_r2": 0.4186839759349823, | |
"eval_rmse": 0.24057835340499878, | |
"eval_runtime": 80.0524, | |
"eval_samples_per_second": 48.019, | |
"eval_steps_per_second": 0.762, | |
"learning_rate": 0.0001, | |
"step": 6516 | |
}, | |
{ | |
"epoch": 37.0, | |
"eval_explained_variance": 0.4203309416770935, | |
"eval_loss": 0.3203599154949188, | |
"eval_mae": 0.1520441472530365, | |
"eval_r2": 0.4187575578689575, | |
"eval_rmse": 0.24031898379325867, | |
"eval_runtime": 79.535, | |
"eval_samples_per_second": 48.331, | |
"eval_steps_per_second": 0.767, | |
"learning_rate": 0.0001, | |
"step": 6697 | |
}, | |
{ | |
"epoch": 38.0, | |
"eval_explained_variance": 0.41849103569984436, | |
"eval_loss": 0.32143038511276245, | |
"eval_mae": 0.15602850914001465, | |
"eval_r2": 0.41701728105545044, | |
"eval_rmse": 0.24094969034194946, | |
"eval_runtime": 78.4033, | |
"eval_samples_per_second": 49.029, | |
"eval_steps_per_second": 0.778, | |
"learning_rate": 0.0001, | |
"step": 6878 | |
}, | |
{ | |
"epoch": 38.67403314917127, | |
"grad_norm": 0.19431360065937042, | |
"learning_rate": 0.0001, | |
"loss": 0.3195, | |
"step": 7000 | |
}, | |
{ | |
"epoch": 39.0, | |
"eval_explained_variance": 0.42322617769241333, | |
"eval_loss": 0.3195198178291321, | |
"eval_mae": 0.15203459560871124, | |
"eval_r2": 0.42263996601104736, | |
"eval_rmse": 0.23966462910175323, | |
"eval_runtime": 77.9703, | |
"eval_samples_per_second": 49.301, | |
"eval_steps_per_second": 0.782, | |
"learning_rate": 0.0001, | |
"step": 7059 | |
}, | |
{ | |
"epoch": 40.0, | |
"eval_explained_variance": 0.42311784625053406, | |
"eval_loss": 0.3207896649837494, | |
"eval_mae": 0.1577463448047638, | |
"eval_r2": 0.4204080402851105, | |
"eval_rmse": 0.2403651773929596, | |
"eval_runtime": 77.4683, | |
"eval_samples_per_second": 49.62, | |
"eval_steps_per_second": 0.787, | |
"learning_rate": 0.0001, | |
"step": 7240 | |
}, | |
{ | |
"epoch": 41.0, | |
"eval_explained_variance": 0.42329341173171997, | |
"eval_loss": 0.3197501003742218, | |
"eval_mae": 0.15471500158309937, | |
"eval_r2": 0.4217017889022827, | |
"eval_rmse": 0.23984502255916595, | |
"eval_runtime": 78.5707, | |
"eval_samples_per_second": 48.924, | |
"eval_steps_per_second": 0.776, | |
"learning_rate": 0.0001, | |
"step": 7421 | |
}, | |
{ | |
"epoch": 41.43646408839779, | |
"grad_norm": 0.22206854820251465, | |
"learning_rate": 0.0001, | |
"loss": 0.3192, | |
"step": 7500 | |
}, | |
{ | |
"epoch": 42.0, | |
"eval_explained_variance": 0.42175009846687317, | |
"eval_loss": 0.32175716757774353, | |
"eval_mae": 0.15892744064331055, | |
"eval_r2": 0.4173850417137146, | |
"eval_rmse": 0.24101921916007996, | |
"eval_runtime": 80.0006, | |
"eval_samples_per_second": 48.05, | |
"eval_steps_per_second": 0.762, | |
"learning_rate": 0.0001, | |
"step": 7602 | |
}, | |
{ | |
"epoch": 43.0, | |
"eval_explained_variance": 0.4254191517829895, | |
"eval_loss": 0.3189575970172882, | |
"eval_mae": 0.1544325351715088, | |
"eval_r2": 0.4235268831253052, | |
"eval_rmse": 0.23960073292255402, | |
"eval_runtime": 77.8217, | |
"eval_samples_per_second": 49.395, | |
"eval_steps_per_second": 0.784, | |
"learning_rate": 0.0001, | |
"step": 7783 | |
}, | |
{ | |
"epoch": 44.0, | |
"eval_explained_variance": 0.42385637760162354, | |
"eval_loss": 0.31898385286331177, | |
"eval_mae": 0.15337544679641724, | |
"eval_r2": 0.42299988865852356, | |
"eval_rmse": 0.23955906927585602, | |
"eval_runtime": 77.9179, | |
"eval_samples_per_second": 49.334, | |
"eval_steps_per_second": 0.783, | |
"learning_rate": 0.0001, | |
"step": 7964 | |
}, | |
{ | |
"epoch": 44.19889502762431, | |
"grad_norm": 0.2577707767486572, | |
"learning_rate": 0.0001, | |
"loss": 0.3178, | |
"step": 8000 | |
}, | |
{ | |
"epoch": 45.0, | |
"eval_explained_variance": 0.42595985531806946, | |
"eval_loss": 0.31977778673171997, | |
"eval_mae": 0.1566278040409088, | |
"eval_r2": 0.423930287361145, | |
"eval_rmse": 0.2397065907716751, | |
"eval_runtime": 78.7581, | |
"eval_samples_per_second": 48.808, | |
"eval_steps_per_second": 0.775, | |
"learning_rate": 0.0001, | |
"step": 8145 | |
}, | |
{ | |
"epoch": 46.0, | |
"eval_explained_variance": 0.42309415340423584, | |
"eval_loss": 0.3193351626396179, | |
"eval_mae": 0.15561553835868835, | |
"eval_r2": 0.421285480260849, | |
"eval_rmse": 0.23979297280311584, | |
"eval_runtime": 77.9122, | |
"eval_samples_per_second": 49.338, | |
"eval_steps_per_second": 0.783, | |
"learning_rate": 0.0001, | |
"step": 8326 | |
}, | |
{ | |
"epoch": 46.96132596685083, | |
"grad_norm": 0.24199023842811584, | |
"learning_rate": 0.0001, | |
"loss": 0.3175, | |
"step": 8500 | |
}, | |
{ | |
"epoch": 47.0, | |
"eval_explained_variance": 0.4257267415523529, | |
"eval_loss": 0.31895366311073303, | |
"eval_mae": 0.15244629979133606, | |
"eval_r2": 0.42454099655151367, | |
"eval_rmse": 0.23932920396327972, | |
"eval_runtime": 78.1903, | |
"eval_samples_per_second": 49.162, | |
"eval_steps_per_second": 0.78, | |
"learning_rate": 0.0001, | |
"step": 8507 | |
}, | |
{ | |
"epoch": 48.0, | |
"eval_explained_variance": 0.4229738414287567, | |
"eval_loss": 0.3192996680736542, | |
"eval_mae": 0.15246888995170593, | |
"eval_r2": 0.4215165376663208, | |
"eval_rmse": 0.2398112416267395, | |
"eval_runtime": 78.4361, | |
"eval_samples_per_second": 49.008, | |
"eval_steps_per_second": 0.778, | |
"learning_rate": 0.0001, | |
"step": 8688 | |
}, | |
{ | |
"epoch": 49.0, | |
"eval_explained_variance": 0.4195903539657593, | |
"eval_loss": 0.32073548436164856, | |
"eval_mae": 0.15583214163780212, | |
"eval_r2": 0.41874682903289795, | |
"eval_rmse": 0.24045619368553162, | |
"eval_runtime": 78.3955, | |
"eval_samples_per_second": 49.033, | |
"eval_steps_per_second": 0.778, | |
"learning_rate": 0.0001, | |
"step": 8869 | |
}, | |
{ | |
"epoch": 49.72375690607735, | |
"grad_norm": 0.23354829847812653, | |
"learning_rate": 1e-05, | |
"loss": 0.3174, | |
"step": 9000 | |
}, | |
{ | |
"epoch": 50.0, | |
"eval_explained_variance": 0.4236743748188019, | |
"eval_loss": 0.3198453485965729, | |
"eval_mae": 0.15717381238937378, | |
"eval_r2": 0.4218462109565735, | |
"eval_rmse": 0.23996752500534058, | |
"eval_runtime": 78.1702, | |
"eval_samples_per_second": 49.175, | |
"eval_steps_per_second": 0.78, | |
"learning_rate": 1e-05, | |
"step": 9050 | |
}, | |
{ | |
"epoch": 51.0, | |
"eval_explained_variance": 0.4172780215740204, | |
"eval_loss": 0.32436585426330566, | |
"eval_mae": 0.16015969216823578, | |
"eval_r2": 0.4091569483280182, | |
"eval_rmse": 0.24263373017311096, | |
"eval_runtime": 77.8337, | |
"eval_samples_per_second": 49.387, | |
"eval_steps_per_second": 0.784, | |
"learning_rate": 1e-05, | |
"step": 9231 | |
}, | |
{ | |
"epoch": 52.0, | |
"eval_explained_variance": 0.42350247502326965, | |
"eval_loss": 0.31899821758270264, | |
"eval_mae": 0.15497206151485443, | |
"eval_r2": 0.42265191674232483, | |
"eval_rmse": 0.23958924412727356, | |
"eval_runtime": 77.2812, | |
"eval_samples_per_second": 49.74, | |
"eval_steps_per_second": 0.789, | |
"learning_rate": 1e-05, | |
"step": 9412 | |
}, | |
{ | |
"epoch": 52.48618784530387, | |
"grad_norm": 0.29434072971343994, | |
"learning_rate": 1e-05, | |
"loss": 0.3152, | |
"step": 9500 | |
}, | |
{ | |
"epoch": 53.0, | |
"eval_explained_variance": 0.4269993007183075, | |
"eval_loss": 0.31892043352127075, | |
"eval_mae": 0.1552329957485199, | |
"eval_r2": 0.4248761534690857, | |
"eval_rmse": 0.23935101926326752, | |
"eval_runtime": 78.9075, | |
"eval_samples_per_second": 48.715, | |
"eval_steps_per_second": 0.773, | |
"learning_rate": 1e-05, | |
"step": 9593 | |
}, | |
{ | |
"epoch": 54.0, | |
"eval_explained_variance": 0.4239312410354614, | |
"eval_loss": 0.3194037675857544, | |
"eval_mae": 0.15401999652385712, | |
"eval_r2": 0.42274925112724304, | |
"eval_rmse": 0.23955823481082916, | |
"eval_runtime": 77.3784, | |
"eval_samples_per_second": 49.678, | |
"eval_steps_per_second": 0.788, | |
"learning_rate": 1e-05, | |
"step": 9774 | |
}, | |
{ | |
"epoch": 55.0, | |
"eval_explained_variance": 0.4257669150829315, | |
"eval_loss": 0.3184601366519928, | |
"eval_mae": 0.15387418866157532, | |
"eval_r2": 0.42496535181999207, | |
"eval_rmse": 0.2391066700220108, | |
"eval_runtime": 77.4594, | |
"eval_samples_per_second": 49.626, | |
"eval_steps_per_second": 0.788, | |
"learning_rate": 1e-05, | |
"step": 9955 | |
}, | |
{ | |
"epoch": 55.248618784530386, | |
"grad_norm": 0.21759380400180817, | |
"learning_rate": 1e-05, | |
"loss": 0.317, | |
"step": 10000 | |
}, | |
{ | |
"epoch": 56.0, | |
"eval_explained_variance": 0.4281463027000427, | |
"eval_loss": 0.318115234375, | |
"eval_mae": 0.15269973874092102, | |
"eval_r2": 0.4272992014884949, | |
"eval_rmse": 0.23881223797798157, | |
"eval_runtime": 77.3587, | |
"eval_samples_per_second": 49.691, | |
"eval_steps_per_second": 0.789, | |
"learning_rate": 1e-05, | |
"step": 10136 | |
}, | |
{ | |
"epoch": 57.0, | |
"eval_explained_variance": 0.4273688495159149, | |
"eval_loss": 0.31871330738067627, | |
"eval_mae": 0.15321487188339233, | |
"eval_r2": 0.42585498094558716, | |
"eval_rmse": 0.23920726776123047, | |
"eval_runtime": 77.7128, | |
"eval_samples_per_second": 49.464, | |
"eval_steps_per_second": 0.785, | |
"learning_rate": 1e-05, | |
"step": 10317 | |
}, | |
{ | |
"epoch": 58.0, | |
"eval_explained_variance": 0.4258714020252228, | |
"eval_loss": 0.32010164856910706, | |
"eval_mae": 0.15668974816799164, | |
"eval_r2": 0.42169713973999023, | |
"eval_rmse": 0.2401351034641266, | |
"eval_runtime": 78.5354, | |
"eval_samples_per_second": 48.946, | |
"eval_steps_per_second": 0.777, | |
"learning_rate": 1e-05, | |
"step": 10498 | |
}, | |
{ | |
"epoch": 58.011049723756905, | |
"grad_norm": 0.26900964975357056, | |
"learning_rate": 1e-05, | |
"loss": 0.314, | |
"step": 10500 | |
}, | |
{ | |
"epoch": 59.0, | |
"eval_explained_variance": 0.42817097902297974, | |
"eval_loss": 0.31807705760002136, | |
"eval_mae": 0.15281866490840912, | |
"eval_r2": 0.42699384689331055, | |
"eval_rmse": 0.23875540494918823, | |
"eval_runtime": 78.4147, | |
"eval_samples_per_second": 49.021, | |
"eval_steps_per_second": 0.778, | |
"learning_rate": 1e-05, | |
"step": 10679 | |
}, | |
{ | |
"epoch": 60.0, | |
"eval_explained_variance": 0.4267863631248474, | |
"eval_loss": 0.3181913495063782, | |
"eval_mae": 0.15336427092552185, | |
"eval_r2": 0.4256437420845032, | |
"eval_rmse": 0.23891934752464294, | |
"eval_runtime": 78.5798, | |
"eval_samples_per_second": 48.918, | |
"eval_steps_per_second": 0.776, | |
"learning_rate": 1e-05, | |
"step": 10860 | |
}, | |
{ | |
"epoch": 60.773480662983424, | |
"grad_norm": 0.276735782623291, | |
"learning_rate": 1e-05, | |
"loss": 0.314, | |
"step": 11000 | |
}, | |
{ | |
"epoch": 61.0, | |
"eval_explained_variance": 0.4265504479408264, | |
"eval_loss": 0.3185857832431793, | |
"eval_mae": 0.1509746015071869, | |
"eval_r2": 0.42550772428512573, | |
"eval_rmse": 0.23908774554729462, | |
"eval_runtime": 78.0232, | |
"eval_samples_per_second": 49.267, | |
"eval_steps_per_second": 0.782, | |
"learning_rate": 1e-05, | |
"step": 11041 | |
}, | |
{ | |
"epoch": 62.0, | |
"eval_explained_variance": 0.4262467920780182, | |
"eval_loss": 0.32031872868537903, | |
"eval_mae": 0.15960827469825745, | |
"eval_r2": 0.4240296185016632, | |
"eval_rmse": 0.23980861902236938, | |
"eval_runtime": 78.7243, | |
"eval_samples_per_second": 48.829, | |
"eval_steps_per_second": 0.775, | |
"learning_rate": 1e-05, | |
"step": 11222 | |
}, | |
{ | |
"epoch": 63.0, | |
"eval_explained_variance": 0.42764127254486084, | |
"eval_loss": 0.31964218616485596, | |
"eval_mae": 0.1570112109184265, | |
"eval_r2": 0.42423343658447266, | |
"eval_rmse": 0.23966987431049347, | |
"eval_runtime": 79.5804, | |
"eval_samples_per_second": 48.303, | |
"eval_steps_per_second": 0.767, | |
"learning_rate": 1e-05, | |
"step": 11403 | |
}, | |
{ | |
"epoch": 63.53591160220994, | |
"grad_norm": 0.25174766778945923, | |
"learning_rate": 1e-05, | |
"loss": 0.3142, | |
"step": 11500 | |
}, | |
{ | |
"epoch": 64.0, | |
"eval_explained_variance": 0.4253126382827759, | |
"eval_loss": 0.31808170676231384, | |
"eval_mae": 0.15270139276981354, | |
"eval_r2": 0.42435672879219055, | |
"eval_rmse": 0.23907549679279327, | |
"eval_runtime": 79.2944, | |
"eval_samples_per_second": 48.478, | |
"eval_steps_per_second": 0.769, | |
"learning_rate": 1e-05, | |
"step": 11584 | |
}, | |
{ | |
"epoch": 65.0, | |
"eval_explained_variance": 0.4263868033885956, | |
"eval_loss": 0.31850185990333557, | |
"eval_mae": 0.1550404131412506, | |
"eval_r2": 0.4259207546710968, | |
"eval_rmse": 0.23902735114097595, | |
"eval_runtime": 78.9958, | |
"eval_samples_per_second": 48.661, | |
"eval_steps_per_second": 0.772, | |
"learning_rate": 1e-05, | |
"step": 11765 | |
}, | |
{ | |
"epoch": 66.0, | |
"eval_explained_variance": 0.4291055500507355, | |
"eval_loss": 0.3186076879501343, | |
"eval_mae": 0.15623149275779724, | |
"eval_r2": 0.42775318026542664, | |
"eval_rmse": 0.23892195522785187, | |
"eval_runtime": 79.1276, | |
"eval_samples_per_second": 48.58, | |
"eval_steps_per_second": 0.771, | |
"learning_rate": 1.0000000000000002e-06, | |
"step": 11946 | |
}, | |
{ | |
"epoch": 66.29834254143647, | |
"grad_norm": 0.31120502948760986, | |
"learning_rate": 1.0000000000000002e-06, | |
"loss": 0.3131, | |
"step": 12000 | |
}, | |
{ | |
"epoch": 67.0, | |
"eval_explained_variance": 0.4278748631477356, | |
"eval_loss": 0.3181016743183136, | |
"eval_mae": 0.15263643860816956, | |
"eval_r2": 0.42703741788864136, | |
"eval_rmse": 0.2387426644563675, | |
"eval_runtime": 77.8773, | |
"eval_samples_per_second": 49.36, | |
"eval_steps_per_second": 0.783, | |
"learning_rate": 1.0000000000000002e-06, | |
"step": 12127 | |
}, | |
{ | |
"epoch": 68.0, | |
"eval_explained_variance": 0.42565029859542847, | |
"eval_loss": 0.3194774389266968, | |
"eval_mae": 0.15491320192813873, | |
"eval_r2": 0.4220888018608093, | |
"eval_rmse": 0.23974499106407166, | |
"eval_runtime": 77.5869, | |
"eval_samples_per_second": 49.544, | |
"eval_steps_per_second": 0.786, | |
"learning_rate": 1.0000000000000002e-06, | |
"step": 12308 | |
}, | |
{ | |
"epoch": 69.0, | |
"eval_explained_variance": 0.4274521470069885, | |
"eval_loss": 0.3183264136314392, | |
"eval_mae": 0.15404105186462402, | |
"eval_r2": 0.4259319305419922, | |
"eval_rmse": 0.23901674151420593, | |
"eval_runtime": 78.3061, | |
"eval_samples_per_second": 49.089, | |
"eval_steps_per_second": 0.779, | |
"learning_rate": 1.0000000000000002e-06, | |
"step": 12489 | |
}, | |
{ | |
"epoch": 69.0, | |
"learning_rate": 1.0000000000000002e-06, | |
"step": 12489, | |
"total_flos": 1.1767002635628401e+20, | |
"train_loss": 0.32720188785662635, | |
"train_runtime": 24708.1609, | |
"train_samples_per_second": 69.949, | |
"train_steps_per_second": 1.099 | |
} | |
], | |
"logging_steps": 500, | |
"max_steps": 27150, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 150, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"EarlyStoppingCallback": { | |
"args": { | |
"early_stopping_patience": 10, | |
"early_stopping_threshold": 0.0 | |
}, | |
"attributes": { | |
"early_stopping_patience_counter": 0 | |
} | |
}, | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 1.1767002635628401e+20, | |
"train_batch_size": 64, | |
"trial_name": null, | |
"trial_params": null | |
} | |