{ "best_metric": 0.31807705760002136, "best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-produttoria-probabilities-large-2024_11_04-batch-size64_freeze_probs/checkpoint-10679", "epoch": 69.0, "eval_steps": 500, "global_step": 12489, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_explained_variance": 0.30368584394454956, "eval_loss": 0.36101797223091125, "eval_mae": 0.18781842291355133, "eval_r2": 0.28179988265037537, "eval_rmse": 0.2644973397254944, "eval_runtime": 77.9895, "eval_samples_per_second": 49.289, "eval_steps_per_second": 0.782, "learning_rate": 0.001, "step": 181 }, { "epoch": 2.0, "eval_explained_variance": 0.3478943109512329, "eval_loss": 0.3464529514312744, "eval_mae": 0.17783641815185547, "eval_r2": 0.33490002155303955, "eval_rmse": 0.25657808780670166, "eval_runtime": 82.5528, "eval_samples_per_second": 46.564, "eval_steps_per_second": 0.739, "learning_rate": 0.001, "step": 362 }, { "epoch": 2.7624309392265194, "grad_norm": 0.36520126461982727, "learning_rate": 0.001, "loss": 0.4149, "step": 500 }, { "epoch": 3.0, "eval_explained_variance": 0.35995176434516907, "eval_loss": 0.34133487939834595, "eval_mae": 0.17307622730731964, "eval_r2": 0.3535836637020111, "eval_rmse": 0.25322917103767395, "eval_runtime": 82.3103, "eval_samples_per_second": 46.701, "eval_steps_per_second": 0.741, "learning_rate": 0.001, "step": 543 }, { "epoch": 4.0, "eval_explained_variance": 0.35911357402801514, "eval_loss": 0.3406243324279785, "eval_mae": 0.17434848845005035, "eval_r2": 0.35186487436294556, "eval_rmse": 0.25316712260246277, "eval_runtime": 82.3198, "eval_samples_per_second": 46.696, "eval_steps_per_second": 0.741, "learning_rate": 0.001, "step": 724 }, { "epoch": 5.0, "eval_explained_variance": 0.3730728328227997, "eval_loss": 0.3341675400733948, "eval_mae": 0.1660642772912979, "eval_r2": 0.37022241950035095, "eval_rmse": 0.24957779049873352, "eval_runtime": 81.2913, "eval_samples_per_second": 47.287, "eval_steps_per_second": 0.75, "learning_rate": 0.001, "step": 905 }, { "epoch": 5.524861878453039, "grad_norm": 0.2786722779273987, "learning_rate": 0.001, "loss": 0.3486, "step": 1000 }, { "epoch": 6.0, "eval_explained_variance": 0.37237703800201416, "eval_loss": 0.33848145604133606, "eval_mae": 0.17393800616264343, "eval_r2": 0.36507394909858704, "eval_rmse": 0.2512134909629822, "eval_runtime": 81.4015, "eval_samples_per_second": 47.223, "eval_steps_per_second": 0.749, "learning_rate": 0.001, "step": 1086 }, { "epoch": 7.0, "eval_explained_variance": 0.38455089926719666, "eval_loss": 0.3320676386356354, "eval_mae": 0.16495750844478607, "eval_r2": 0.38360098004341125, "eval_rmse": 0.24762088060379028, "eval_runtime": 80.8916, "eval_samples_per_second": 47.52, "eval_steps_per_second": 0.754, "learning_rate": 0.001, "step": 1267 }, { "epoch": 8.0, "eval_explained_variance": 0.38114219903945923, "eval_loss": 0.3332081437110901, "eval_mae": 0.1629333347082138, "eval_r2": 0.38021257519721985, "eval_rmse": 0.24838922917842865, "eval_runtime": 81.8669, "eval_samples_per_second": 46.954, "eval_steps_per_second": 0.745, "learning_rate": 0.001, "step": 1448 }, { "epoch": 8.287292817679559, "grad_norm": 0.24299436807632446, "learning_rate": 0.001, "loss": 0.3462, "step": 1500 }, { "epoch": 9.0, "eval_explained_variance": 0.3872137665748596, "eval_loss": 0.3305376172065735, "eval_mae": 0.16518667340278625, "eval_r2": 0.38587409257888794, "eval_rmse": 0.24679909646511078, "eval_runtime": 80.7757, "eval_samples_per_second": 47.589, "eval_steps_per_second": 0.755, "learning_rate": 0.001, "step": 1629 }, { "epoch": 10.0, "eval_explained_variance": 0.3849840760231018, "eval_loss": 0.33136793971061707, "eval_mae": 0.16550396382808685, "eval_r2": 0.38265591859817505, "eval_rmse": 0.2476043999195099, "eval_runtime": 81.3286, "eval_samples_per_second": 47.265, "eval_steps_per_second": 0.75, "learning_rate": 0.001, "step": 1810 }, { "epoch": 11.0, "eval_explained_variance": 0.3866196274757385, "eval_loss": 0.3319685757160187, "eval_mae": 0.16017739474773407, "eval_r2": 0.3840087652206421, "eval_rmse": 0.2473573535680771, "eval_runtime": 80.4031, "eval_samples_per_second": 47.809, "eval_steps_per_second": 0.759, "learning_rate": 0.001, "step": 1991 }, { "epoch": 11.049723756906078, "grad_norm": 0.1871100217103958, "learning_rate": 0.001, "loss": 0.3391, "step": 2000 }, { "epoch": 12.0, "eval_explained_variance": 0.3842613399028778, "eval_loss": 0.3341500759124756, "eval_mae": 0.16828522086143494, "eval_r2": 0.37614381313323975, "eval_rmse": 0.2493605613708496, "eval_runtime": 85.712, "eval_samples_per_second": 44.848, "eval_steps_per_second": 0.712, "learning_rate": 0.001, "step": 2172 }, { "epoch": 13.0, "eval_explained_variance": 0.38362985849380493, "eval_loss": 0.33248215913772583, "eval_mae": 0.16487525403499603, "eval_r2": 0.38205400109291077, "eval_rmse": 0.2479550540447235, "eval_runtime": 81.2132, "eval_samples_per_second": 47.332, "eval_steps_per_second": 0.751, "learning_rate": 0.001, "step": 2353 }, { "epoch": 13.812154696132596, "grad_norm": 0.19864079356193542, "learning_rate": 0.001, "loss": 0.3372, "step": 2500 }, { "epoch": 14.0, "eval_explained_variance": 0.3930034041404724, "eval_loss": 0.33228376507759094, "eval_mae": 0.17001599073410034, "eval_r2": 0.38782283663749695, "eval_rmse": 0.24722439050674438, "eval_runtime": 81.4831, "eval_samples_per_second": 47.175, "eval_steps_per_second": 0.749, "learning_rate": 0.001, "step": 2534 }, { "epoch": 15.0, "eval_explained_variance": 0.38280433416366577, "eval_loss": 0.334873229265213, "eval_mae": 0.1702941358089447, "eval_r2": 0.37488964200019836, "eval_rmse": 0.24934227764606476, "eval_runtime": 82.0523, "eval_samples_per_second": 46.848, "eval_steps_per_second": 0.743, "learning_rate": 0.001, "step": 2715 }, { "epoch": 16.0, "eval_explained_variance": 0.4018595516681671, "eval_loss": 0.3279329538345337, "eval_mae": 0.16485117375850677, "eval_r2": 0.3982715308666229, "eval_rmse": 0.2447740137577057, "eval_runtime": 80.4915, "eval_samples_per_second": 47.757, "eval_steps_per_second": 0.758, "learning_rate": 0.0001, "step": 2896 }, { "epoch": 16.574585635359117, "grad_norm": 0.18467135727405548, "learning_rate": 0.0001, "loss": 0.3343, "step": 3000 }, { "epoch": 17.0, "eval_explained_variance": 0.4041018486022949, "eval_loss": 0.3279244005680084, "eval_mae": 0.1647535115480423, "eval_r2": 0.39838364720344543, "eval_rmse": 0.24482907354831696, "eval_runtime": 80.8242, "eval_samples_per_second": 47.56, "eval_steps_per_second": 0.755, "learning_rate": 0.0001, "step": 3077 }, { "epoch": 18.0, "eval_explained_variance": 0.40324074029922485, "eval_loss": 0.3262367248535156, "eval_mae": 0.16224807500839233, "eval_r2": 0.40254056453704834, "eval_rmse": 0.24403510987758636, "eval_runtime": 79.9167, "eval_samples_per_second": 48.1, "eval_steps_per_second": 0.763, "learning_rate": 0.0001, "step": 3258 }, { "epoch": 19.0, "eval_explained_variance": 0.4051153361797333, "eval_loss": 0.3247373402118683, "eval_mae": 0.15879866480827332, "eval_r2": 0.4045550227165222, "eval_rmse": 0.24321436882019043, "eval_runtime": 78.6921, "eval_samples_per_second": 48.849, "eval_steps_per_second": 0.775, "learning_rate": 0.0001, "step": 3439 }, { "epoch": 19.337016574585636, "grad_norm": 0.19419366121292114, "learning_rate": 0.0001, "loss": 0.3271, "step": 3500 }, { "epoch": 20.0, "eval_explained_variance": 0.41063615679740906, "eval_loss": 0.32612329721450806, "eval_mae": 0.16252389550209045, "eval_r2": 0.4059467911720276, "eval_rmse": 0.24334031343460083, "eval_runtime": 79.0343, "eval_samples_per_second": 48.637, "eval_steps_per_second": 0.772, "learning_rate": 0.0001, "step": 3620 }, { "epoch": 21.0, "eval_explained_variance": 0.411923885345459, "eval_loss": 0.3241129517555237, "eval_mae": 0.16064995527267456, "eval_r2": 0.4094885587692261, "eval_rmse": 0.2424389272928238, "eval_runtime": 79.7663, "eval_samples_per_second": 48.191, "eval_steps_per_second": 0.765, "learning_rate": 0.0001, "step": 3801 }, { "epoch": 22.0, "eval_explained_variance": 0.41316869854927063, "eval_loss": 0.32355180382728577, "eval_mae": 0.1587211936712265, "eval_r2": 0.4111122488975525, "eval_rmse": 0.24220238626003265, "eval_runtime": 79.4498, "eval_samples_per_second": 48.383, "eval_steps_per_second": 0.768, "learning_rate": 0.0001, "step": 3982 }, { "epoch": 22.099447513812155, "grad_norm": 0.1995573192834854, "learning_rate": 0.0001, "loss": 0.3275, "step": 4000 }, { "epoch": 23.0, "eval_explained_variance": 0.41342437267303467, "eval_loss": 0.3242079019546509, "eval_mae": 0.16008274257183075, "eval_r2": 0.4106961488723755, "eval_rmse": 0.2422674000263214, "eval_runtime": 78.6301, "eval_samples_per_second": 48.887, "eval_steps_per_second": 0.776, "learning_rate": 0.0001, "step": 4163 }, { "epoch": 24.0, "eval_explained_variance": 0.4160999655723572, "eval_loss": 0.3227241337299347, "eval_mae": 0.15855328738689423, "eval_r2": 0.41498833894729614, "eval_rmse": 0.2413560003042221, "eval_runtime": 78.9117, "eval_samples_per_second": 48.713, "eval_steps_per_second": 0.773, "learning_rate": 0.0001, "step": 4344 }, { "epoch": 24.861878453038674, "grad_norm": 0.19915054738521576, "learning_rate": 0.0001, "loss": 0.3247, "step": 4500 }, { "epoch": 25.0, "eval_explained_variance": 0.4161580204963684, "eval_loss": 0.3223778307437897, "eval_mae": 0.1586799919605255, "eval_r2": 0.4147694408893585, "eval_rmse": 0.24130770564079285, "eval_runtime": 79.8112, "eval_samples_per_second": 48.164, "eval_steps_per_second": 0.764, "learning_rate": 0.0001, "step": 4525 }, { "epoch": 26.0, "eval_explained_variance": 0.4154631793498993, "eval_loss": 0.3217927813529968, "eval_mae": 0.15566809475421906, "eval_r2": 0.4142845571041107, "eval_rmse": 0.24133403599262238, "eval_runtime": 80.3582, "eval_samples_per_second": 47.836, "eval_steps_per_second": 0.759, "learning_rate": 0.0001, "step": 4706 }, { "epoch": 27.0, "eval_explained_variance": 0.4153793156147003, "eval_loss": 0.3227355182170868, "eval_mae": 0.16026344895362854, "eval_r2": 0.4137920141220093, "eval_rmse": 0.2415631115436554, "eval_runtime": 81.132, "eval_samples_per_second": 47.38, "eval_steps_per_second": 0.752, "learning_rate": 0.0001, "step": 4887 }, { "epoch": 27.624309392265193, "grad_norm": 0.21681347489356995, "learning_rate": 0.0001, "loss": 0.3231, "step": 5000 }, { "epoch": 28.0, "eval_explained_variance": 0.41966134309768677, "eval_loss": 0.32067713141441345, "eval_mae": 0.15620459616184235, "eval_r2": 0.4186200499534607, "eval_rmse": 0.24054032564163208, "eval_runtime": 81.32, "eval_samples_per_second": 47.27, "eval_steps_per_second": 0.75, "learning_rate": 0.0001, "step": 5068 }, { "epoch": 29.0, "eval_explained_variance": 0.4175001382827759, "eval_loss": 0.32205939292907715, "eval_mae": 0.15971842408180237, "eval_r2": 0.41626471281051636, "eval_rmse": 0.24114808440208435, "eval_runtime": 80.4359, "eval_samples_per_second": 47.79, "eval_steps_per_second": 0.758, "learning_rate": 0.0001, "step": 5249 }, { "epoch": 30.0, "eval_explained_variance": 0.4190256893634796, "eval_loss": 0.32246074080467224, "eval_mae": 0.16082431375980377, "eval_r2": 0.41641533374786377, "eval_rmse": 0.24128668010234833, "eval_runtime": 80.4517, "eval_samples_per_second": 47.78, "eval_steps_per_second": 0.758, "learning_rate": 0.0001, "step": 5430 }, { "epoch": 30.386740331491712, "grad_norm": 0.20712591707706451, "learning_rate": 0.0001, "loss": 0.3215, "step": 5500 }, { "epoch": 31.0, "eval_explained_variance": 0.41637617349624634, "eval_loss": 0.3223503530025482, "eval_mae": 0.15350204706192017, "eval_r2": 0.4133574366569519, "eval_rmse": 0.2416228950023651, "eval_runtime": 83.145, "eval_samples_per_second": 46.232, "eval_steps_per_second": 0.734, "learning_rate": 0.0001, "step": 5611 }, { "epoch": 32.0, "eval_explained_variance": 0.41848501563072205, "eval_loss": 0.3212696313858032, "eval_mae": 0.1552996039390564, "eval_r2": 0.4179571568965912, "eval_rmse": 0.24075190722942352, "eval_runtime": 80.1214, "eval_samples_per_second": 47.977, "eval_steps_per_second": 0.761, "learning_rate": 0.0001, "step": 5792 }, { "epoch": 33.0, "eval_explained_variance": 0.4142448604106903, "eval_loss": 0.32156360149383545, "eval_mae": 0.15830956399440765, "eval_r2": 0.4123082458972931, "eval_rmse": 0.24135708808898926, "eval_runtime": 80.4898, "eval_samples_per_second": 47.758, "eval_steps_per_second": 0.758, "learning_rate": 0.0001, "step": 5973 }, { "epoch": 33.149171270718234, "grad_norm": 0.22835813462734222, "learning_rate": 0.0001, "loss": 0.3227, "step": 6000 }, { "epoch": 34.0, "eval_explained_variance": 0.4181065559387207, "eval_loss": 0.3205103278160095, "eval_mae": 0.15616008639335632, "eval_r2": 0.41721493005752563, "eval_rmse": 0.24057045578956604, "eval_runtime": 80.866, "eval_samples_per_second": 47.535, "eval_steps_per_second": 0.754, "learning_rate": 0.0001, "step": 6154 }, { "epoch": 35.0, "eval_explained_variance": 0.422443687915802, "eval_loss": 0.3197581171989441, "eval_mae": 0.15349127352237701, "eval_r2": 0.42148736119270325, "eval_rmse": 0.23994095623493195, "eval_runtime": 81.1391, "eval_samples_per_second": 47.375, "eval_steps_per_second": 0.752, "learning_rate": 0.0001, "step": 6335 }, { "epoch": 35.91160220994475, "grad_norm": 0.18953262269496918, "learning_rate": 0.0001, "loss": 0.3202, "step": 6500 }, { "epoch": 36.0, "eval_explained_variance": 0.4194072186946869, "eval_loss": 0.3211075961589813, "eval_mae": 0.15770210325717926, "eval_r2": 0.4186839759349823, "eval_rmse": 0.24057835340499878, "eval_runtime": 80.0524, "eval_samples_per_second": 48.019, "eval_steps_per_second": 0.762, "learning_rate": 0.0001, "step": 6516 }, { "epoch": 37.0, "eval_explained_variance": 0.4203309416770935, "eval_loss": 0.3203599154949188, "eval_mae": 0.1520441472530365, "eval_r2": 0.4187575578689575, "eval_rmse": 0.24031898379325867, "eval_runtime": 79.535, "eval_samples_per_second": 48.331, "eval_steps_per_second": 0.767, "learning_rate": 0.0001, "step": 6697 }, { "epoch": 38.0, "eval_explained_variance": 0.41849103569984436, "eval_loss": 0.32143038511276245, "eval_mae": 0.15602850914001465, "eval_r2": 0.41701728105545044, "eval_rmse": 0.24094969034194946, "eval_runtime": 78.4033, "eval_samples_per_second": 49.029, "eval_steps_per_second": 0.778, "learning_rate": 0.0001, "step": 6878 }, { "epoch": 38.67403314917127, "grad_norm": 0.19431360065937042, "learning_rate": 0.0001, "loss": 0.3195, "step": 7000 }, { "epoch": 39.0, "eval_explained_variance": 0.42322617769241333, "eval_loss": 0.3195198178291321, "eval_mae": 0.15203459560871124, "eval_r2": 0.42263996601104736, "eval_rmse": 0.23966462910175323, "eval_runtime": 77.9703, "eval_samples_per_second": 49.301, "eval_steps_per_second": 0.782, "learning_rate": 0.0001, "step": 7059 }, { "epoch": 40.0, "eval_explained_variance": 0.42311784625053406, "eval_loss": 0.3207896649837494, "eval_mae": 0.1577463448047638, "eval_r2": 0.4204080402851105, "eval_rmse": 0.2403651773929596, "eval_runtime": 77.4683, "eval_samples_per_second": 49.62, "eval_steps_per_second": 0.787, "learning_rate": 0.0001, "step": 7240 }, { "epoch": 41.0, "eval_explained_variance": 0.42329341173171997, "eval_loss": 0.3197501003742218, "eval_mae": 0.15471500158309937, "eval_r2": 0.4217017889022827, "eval_rmse": 0.23984502255916595, "eval_runtime": 78.5707, "eval_samples_per_second": 48.924, "eval_steps_per_second": 0.776, "learning_rate": 0.0001, "step": 7421 }, { "epoch": 41.43646408839779, "grad_norm": 0.22206854820251465, "learning_rate": 0.0001, "loss": 0.3192, "step": 7500 }, { "epoch": 42.0, "eval_explained_variance": 0.42175009846687317, "eval_loss": 0.32175716757774353, "eval_mae": 0.15892744064331055, "eval_r2": 0.4173850417137146, "eval_rmse": 0.24101921916007996, "eval_runtime": 80.0006, "eval_samples_per_second": 48.05, "eval_steps_per_second": 0.762, "learning_rate": 0.0001, "step": 7602 }, { "epoch": 43.0, "eval_explained_variance": 0.4254191517829895, "eval_loss": 0.3189575970172882, "eval_mae": 0.1544325351715088, "eval_r2": 0.4235268831253052, "eval_rmse": 0.23960073292255402, "eval_runtime": 77.8217, "eval_samples_per_second": 49.395, "eval_steps_per_second": 0.784, "learning_rate": 0.0001, "step": 7783 }, { "epoch": 44.0, "eval_explained_variance": 0.42385637760162354, "eval_loss": 0.31898385286331177, "eval_mae": 0.15337544679641724, "eval_r2": 0.42299988865852356, "eval_rmse": 0.23955906927585602, "eval_runtime": 77.9179, "eval_samples_per_second": 49.334, "eval_steps_per_second": 0.783, "learning_rate": 0.0001, "step": 7964 }, { "epoch": 44.19889502762431, "grad_norm": 0.2577707767486572, "learning_rate": 0.0001, "loss": 0.3178, "step": 8000 }, { "epoch": 45.0, "eval_explained_variance": 0.42595985531806946, "eval_loss": 0.31977778673171997, "eval_mae": 0.1566278040409088, "eval_r2": 0.423930287361145, "eval_rmse": 0.2397065907716751, "eval_runtime": 78.7581, "eval_samples_per_second": 48.808, "eval_steps_per_second": 0.775, "learning_rate": 0.0001, "step": 8145 }, { "epoch": 46.0, "eval_explained_variance": 0.42309415340423584, "eval_loss": 0.3193351626396179, "eval_mae": 0.15561553835868835, "eval_r2": 0.421285480260849, "eval_rmse": 0.23979297280311584, "eval_runtime": 77.9122, "eval_samples_per_second": 49.338, "eval_steps_per_second": 0.783, "learning_rate": 0.0001, "step": 8326 }, { "epoch": 46.96132596685083, "grad_norm": 0.24199023842811584, "learning_rate": 0.0001, "loss": 0.3175, "step": 8500 }, { "epoch": 47.0, "eval_explained_variance": 0.4257267415523529, "eval_loss": 0.31895366311073303, "eval_mae": 0.15244629979133606, "eval_r2": 0.42454099655151367, "eval_rmse": 0.23932920396327972, "eval_runtime": 78.1903, "eval_samples_per_second": 49.162, "eval_steps_per_second": 0.78, "learning_rate": 0.0001, "step": 8507 }, { "epoch": 48.0, "eval_explained_variance": 0.4229738414287567, "eval_loss": 0.3192996680736542, "eval_mae": 0.15246888995170593, "eval_r2": 0.4215165376663208, "eval_rmse": 0.2398112416267395, "eval_runtime": 78.4361, "eval_samples_per_second": 49.008, "eval_steps_per_second": 0.778, "learning_rate": 0.0001, "step": 8688 }, { "epoch": 49.0, "eval_explained_variance": 0.4195903539657593, "eval_loss": 0.32073548436164856, "eval_mae": 0.15583214163780212, "eval_r2": 0.41874682903289795, "eval_rmse": 0.24045619368553162, "eval_runtime": 78.3955, "eval_samples_per_second": 49.033, "eval_steps_per_second": 0.778, "learning_rate": 0.0001, "step": 8869 }, { "epoch": 49.72375690607735, "grad_norm": 0.23354829847812653, "learning_rate": 1e-05, "loss": 0.3174, "step": 9000 }, { "epoch": 50.0, "eval_explained_variance": 0.4236743748188019, "eval_loss": 0.3198453485965729, "eval_mae": 0.15717381238937378, "eval_r2": 0.4218462109565735, "eval_rmse": 0.23996752500534058, "eval_runtime": 78.1702, "eval_samples_per_second": 49.175, "eval_steps_per_second": 0.78, "learning_rate": 1e-05, "step": 9050 }, { "epoch": 51.0, "eval_explained_variance": 0.4172780215740204, "eval_loss": 0.32436585426330566, "eval_mae": 0.16015969216823578, "eval_r2": 0.4091569483280182, "eval_rmse": 0.24263373017311096, "eval_runtime": 77.8337, "eval_samples_per_second": 49.387, "eval_steps_per_second": 0.784, "learning_rate": 1e-05, "step": 9231 }, { "epoch": 52.0, "eval_explained_variance": 0.42350247502326965, "eval_loss": 0.31899821758270264, "eval_mae": 0.15497206151485443, "eval_r2": 0.42265191674232483, "eval_rmse": 0.23958924412727356, "eval_runtime": 77.2812, "eval_samples_per_second": 49.74, "eval_steps_per_second": 0.789, "learning_rate": 1e-05, "step": 9412 }, { "epoch": 52.48618784530387, "grad_norm": 0.29434072971343994, "learning_rate": 1e-05, "loss": 0.3152, "step": 9500 }, { "epoch": 53.0, "eval_explained_variance": 0.4269993007183075, "eval_loss": 0.31892043352127075, "eval_mae": 0.1552329957485199, "eval_r2": 0.4248761534690857, "eval_rmse": 0.23935101926326752, "eval_runtime": 78.9075, "eval_samples_per_second": 48.715, "eval_steps_per_second": 0.773, "learning_rate": 1e-05, "step": 9593 }, { "epoch": 54.0, "eval_explained_variance": 0.4239312410354614, "eval_loss": 0.3194037675857544, "eval_mae": 0.15401999652385712, "eval_r2": 0.42274925112724304, "eval_rmse": 0.23955823481082916, "eval_runtime": 77.3784, "eval_samples_per_second": 49.678, "eval_steps_per_second": 0.788, "learning_rate": 1e-05, "step": 9774 }, { "epoch": 55.0, "eval_explained_variance": 0.4257669150829315, "eval_loss": 0.3184601366519928, "eval_mae": 0.15387418866157532, "eval_r2": 0.42496535181999207, "eval_rmse": 0.2391066700220108, "eval_runtime": 77.4594, "eval_samples_per_second": 49.626, "eval_steps_per_second": 0.788, "learning_rate": 1e-05, "step": 9955 }, { "epoch": 55.248618784530386, "grad_norm": 0.21759380400180817, "learning_rate": 1e-05, "loss": 0.317, "step": 10000 }, { "epoch": 56.0, "eval_explained_variance": 0.4281463027000427, "eval_loss": 0.318115234375, "eval_mae": 0.15269973874092102, "eval_r2": 0.4272992014884949, "eval_rmse": 0.23881223797798157, "eval_runtime": 77.3587, "eval_samples_per_second": 49.691, "eval_steps_per_second": 0.789, "learning_rate": 1e-05, "step": 10136 }, { "epoch": 57.0, "eval_explained_variance": 0.4273688495159149, "eval_loss": 0.31871330738067627, "eval_mae": 0.15321487188339233, "eval_r2": 0.42585498094558716, "eval_rmse": 0.23920726776123047, "eval_runtime": 77.7128, "eval_samples_per_second": 49.464, "eval_steps_per_second": 0.785, "learning_rate": 1e-05, "step": 10317 }, { "epoch": 58.0, "eval_explained_variance": 0.4258714020252228, "eval_loss": 0.32010164856910706, "eval_mae": 0.15668974816799164, "eval_r2": 0.42169713973999023, "eval_rmse": 0.2401351034641266, "eval_runtime": 78.5354, "eval_samples_per_second": 48.946, "eval_steps_per_second": 0.777, "learning_rate": 1e-05, "step": 10498 }, { "epoch": 58.011049723756905, "grad_norm": 0.26900964975357056, "learning_rate": 1e-05, "loss": 0.314, "step": 10500 }, { "epoch": 59.0, "eval_explained_variance": 0.42817097902297974, "eval_loss": 0.31807705760002136, "eval_mae": 0.15281866490840912, "eval_r2": 0.42699384689331055, "eval_rmse": 0.23875540494918823, "eval_runtime": 78.4147, "eval_samples_per_second": 49.021, "eval_steps_per_second": 0.778, "learning_rate": 1e-05, "step": 10679 }, { "epoch": 60.0, "eval_explained_variance": 0.4267863631248474, "eval_loss": 0.3181913495063782, "eval_mae": 0.15336427092552185, "eval_r2": 0.4256437420845032, "eval_rmse": 0.23891934752464294, "eval_runtime": 78.5798, "eval_samples_per_second": 48.918, "eval_steps_per_second": 0.776, "learning_rate": 1e-05, "step": 10860 }, { "epoch": 60.773480662983424, "grad_norm": 0.276735782623291, "learning_rate": 1e-05, "loss": 0.314, "step": 11000 }, { "epoch": 61.0, "eval_explained_variance": 0.4265504479408264, "eval_loss": 0.3185857832431793, "eval_mae": 0.1509746015071869, "eval_r2": 0.42550772428512573, "eval_rmse": 0.23908774554729462, "eval_runtime": 78.0232, "eval_samples_per_second": 49.267, "eval_steps_per_second": 0.782, "learning_rate": 1e-05, "step": 11041 }, { "epoch": 62.0, "eval_explained_variance": 0.4262467920780182, "eval_loss": 0.32031872868537903, "eval_mae": 0.15960827469825745, "eval_r2": 0.4240296185016632, "eval_rmse": 0.23980861902236938, "eval_runtime": 78.7243, "eval_samples_per_second": 48.829, "eval_steps_per_second": 0.775, "learning_rate": 1e-05, "step": 11222 }, { "epoch": 63.0, "eval_explained_variance": 0.42764127254486084, "eval_loss": 0.31964218616485596, "eval_mae": 0.1570112109184265, "eval_r2": 0.42423343658447266, "eval_rmse": 0.23966987431049347, "eval_runtime": 79.5804, "eval_samples_per_second": 48.303, "eval_steps_per_second": 0.767, "learning_rate": 1e-05, "step": 11403 }, { "epoch": 63.53591160220994, "grad_norm": 0.25174766778945923, "learning_rate": 1e-05, "loss": 0.3142, "step": 11500 }, { "epoch": 64.0, "eval_explained_variance": 0.4253126382827759, "eval_loss": 0.31808170676231384, "eval_mae": 0.15270139276981354, "eval_r2": 0.42435672879219055, "eval_rmse": 0.23907549679279327, "eval_runtime": 79.2944, "eval_samples_per_second": 48.478, "eval_steps_per_second": 0.769, "learning_rate": 1e-05, "step": 11584 }, { "epoch": 65.0, "eval_explained_variance": 0.4263868033885956, "eval_loss": 0.31850185990333557, "eval_mae": 0.1550404131412506, "eval_r2": 0.4259207546710968, "eval_rmse": 0.23902735114097595, "eval_runtime": 78.9958, "eval_samples_per_second": 48.661, "eval_steps_per_second": 0.772, "learning_rate": 1e-05, "step": 11765 }, { "epoch": 66.0, "eval_explained_variance": 0.4291055500507355, "eval_loss": 0.3186076879501343, "eval_mae": 0.15623149275779724, "eval_r2": 0.42775318026542664, "eval_rmse": 0.23892195522785187, "eval_runtime": 79.1276, "eval_samples_per_second": 48.58, "eval_steps_per_second": 0.771, "learning_rate": 1.0000000000000002e-06, "step": 11946 }, { "epoch": 66.29834254143647, "grad_norm": 0.31120502948760986, "learning_rate": 1.0000000000000002e-06, "loss": 0.3131, "step": 12000 }, { "epoch": 67.0, "eval_explained_variance": 0.4278748631477356, "eval_loss": 0.3181016743183136, "eval_mae": 0.15263643860816956, "eval_r2": 0.42703741788864136, "eval_rmse": 0.2387426644563675, "eval_runtime": 77.8773, "eval_samples_per_second": 49.36, "eval_steps_per_second": 0.783, "learning_rate": 1.0000000000000002e-06, "step": 12127 }, { "epoch": 68.0, "eval_explained_variance": 0.42565029859542847, "eval_loss": 0.3194774389266968, "eval_mae": 0.15491320192813873, "eval_r2": 0.4220888018608093, "eval_rmse": 0.23974499106407166, "eval_runtime": 77.5869, "eval_samples_per_second": 49.544, "eval_steps_per_second": 0.786, "learning_rate": 1.0000000000000002e-06, "step": 12308 }, { "epoch": 69.0, "eval_explained_variance": 0.4274521470069885, "eval_loss": 0.3183264136314392, "eval_mae": 0.15404105186462402, "eval_r2": 0.4259319305419922, "eval_rmse": 0.23901674151420593, "eval_runtime": 78.3061, "eval_samples_per_second": 49.089, "eval_steps_per_second": 0.779, "learning_rate": 1.0000000000000002e-06, "step": 12489 }, { "epoch": 69.0, "learning_rate": 1.0000000000000002e-06, "step": 12489, "total_flos": 1.1767002635628401e+20, "train_loss": 0.32720188785662635, "train_runtime": 24708.1609, "train_samples_per_second": 69.949, "train_steps_per_second": 1.099 } ], "logging_steps": 500, "max_steps": 27150, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1767002635628401e+20, "train_batch_size": 64, "trial_name": null, "trial_params": null }