{ "best_metric": 0.32638314366340637, "best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-produttoria-probabilities-large-2024_11_06-batch-size16_freeze_probs/checkpoint-25235", "epoch": 45.0, "eval_steps": 500, "global_step": 32445, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.6934812760055479, "grad_norm": 0.6020499467849731, "learning_rate": 0.001, "loss": 0.4549, "step": 500 }, { "epoch": 1.0, "eval_explained_variance": 0.28087472915649414, "eval_loss": 0.36246591806411743, "eval_mae": 0.18796318769454956, "eval_r2": 0.2743779420852661, "eval_rmse": 0.26685553789138794, "eval_runtime": 84.6117, "eval_samples_per_second": 45.431, "eval_steps_per_second": 2.848, "learning_rate": 0.001, "step": 721 }, { "epoch": 1.3869625520110958, "grad_norm": 0.48873767256736755, "learning_rate": 0.001, "loss": 0.3806, "step": 1000 }, { "epoch": 2.0, "eval_explained_variance": 0.339480996131897, "eval_loss": 0.3457428216934204, "eval_mae": 0.16845344007015228, "eval_r2": 0.3366556167602539, "eval_rmse": 0.25599098205566406, "eval_runtime": 81.9342, "eval_samples_per_second": 46.916, "eval_steps_per_second": 2.941, "learning_rate": 0.001, "step": 1442 }, { "epoch": 2.0804438280166435, "grad_norm": 0.4343818128108978, "learning_rate": 0.001, "loss": 0.3697, "step": 1500 }, { "epoch": 2.7739251040221915, "grad_norm": 0.44139501452445984, "learning_rate": 0.001, "loss": 0.368, "step": 2000 }, { "epoch": 3.0, "eval_explained_variance": 0.3180062472820282, "eval_loss": 0.3518487811088562, "eval_mae": 0.17466770112514496, "eval_r2": 0.3157402276992798, "eval_rmse": 0.25968268513679504, "eval_runtime": 85.9096, "eval_samples_per_second": 44.745, "eval_steps_per_second": 2.805, "learning_rate": 0.001, "step": 2163 }, { "epoch": 3.4674063800277395, "grad_norm": 0.4190344512462616, "learning_rate": 0.001, "loss": 0.3637, "step": 2500 }, { "epoch": 4.0, "eval_explained_variance": 0.3375174403190613, "eval_loss": 0.3507988750934601, "eval_mae": 0.17512458562850952, "eval_r2": 0.3344818949699402, "eval_rmse": 0.2562903165817261, "eval_runtime": 82.3152, "eval_samples_per_second": 46.699, "eval_steps_per_second": 2.928, "learning_rate": 0.001, "step": 2884 }, { "epoch": 4.160887656033287, "grad_norm": 0.46223729848861694, "learning_rate": 0.001, "loss": 0.3604, "step": 3000 }, { "epoch": 4.854368932038835, "grad_norm": 0.397588312625885, "learning_rate": 0.001, "loss": 0.36, "step": 3500 }, { "epoch": 5.0, "eval_explained_variance": 0.3404175043106079, "eval_loss": 0.3436409533023834, "eval_mae": 0.16958864033222198, "eval_r2": 0.33709797263145447, "eval_rmse": 0.25463980436325073, "eval_runtime": 87.0977, "eval_samples_per_second": 44.134, "eval_steps_per_second": 2.767, "learning_rate": 0.001, "step": 3605 }, { "epoch": 5.547850208044383, "grad_norm": 0.3559507429599762, "learning_rate": 0.001, "loss": 0.3585, "step": 4000 }, { "epoch": 6.0, "eval_explained_variance": 0.321065753698349, "eval_loss": 0.35096481442451477, "eval_mae": 0.17673969268798828, "eval_r2": 0.31747376918792725, "eval_rmse": 0.25984567403793335, "eval_runtime": 88.6251, "eval_samples_per_second": 43.374, "eval_steps_per_second": 2.719, "learning_rate": 0.001, "step": 4326 }, { "epoch": 6.2413314840499305, "grad_norm": 0.3629322946071625, "learning_rate": 0.001, "loss": 0.3617, "step": 4500 }, { "epoch": 6.934812760055479, "grad_norm": 0.26215413212776184, "learning_rate": 0.001, "loss": 0.3581, "step": 5000 }, { "epoch": 7.0, "eval_explained_variance": 0.35101062059402466, "eval_loss": 0.3412320613861084, "eval_mae": 0.1749519258737564, "eval_r2": 0.3471425771713257, "eval_rmse": 0.2537590265274048, "eval_runtime": 86.9964, "eval_samples_per_second": 44.186, "eval_steps_per_second": 2.77, "learning_rate": 0.001, "step": 5047 }, { "epoch": 7.6282940360610265, "grad_norm": 0.23537978529930115, "learning_rate": 0.001, "loss": 0.3601, "step": 5500 }, { "epoch": 8.0, "eval_explained_variance": 0.3551764190196991, "eval_loss": 0.3456409275531769, "eval_mae": 0.167846217751503, "eval_r2": 0.34348151087760925, "eval_rmse": 0.25611478090286255, "eval_runtime": 87.0061, "eval_samples_per_second": 44.181, "eval_steps_per_second": 2.77, "learning_rate": 0.001, "step": 5768 }, { "epoch": 8.321775312066574, "grad_norm": 0.24255254864692688, "learning_rate": 0.001, "loss": 0.3619, "step": 6000 }, { "epoch": 9.0, "eval_explained_variance": 0.3427416980266571, "eval_loss": 0.3425351679325104, "eval_mae": 0.17405511438846588, "eval_r2": 0.3409159481525421, "eval_rmse": 0.25445955991744995, "eval_runtime": 87.2735, "eval_samples_per_second": 44.045, "eval_steps_per_second": 2.761, "learning_rate": 0.001, "step": 6489 }, { "epoch": 9.015256588072122, "grad_norm": 0.2687969207763672, "learning_rate": 0.001, "loss": 0.3527, "step": 6500 }, { "epoch": 9.70873786407767, "grad_norm": 0.22893770039081573, "learning_rate": 0.001, "loss": 0.355, "step": 7000 }, { "epoch": 10.0, "eval_explained_variance": 0.36017459630966187, "eval_loss": 0.33964109420776367, "eval_mae": 0.1710653305053711, "eval_r2": 0.3582787811756134, "eval_rmse": 0.25249695777893066, "eval_runtime": 92.8944, "eval_samples_per_second": 41.38, "eval_steps_per_second": 2.594, "learning_rate": 0.001, "step": 7210 }, { "epoch": 10.402219140083218, "grad_norm": 0.2397356480360031, "learning_rate": 0.001, "loss": 0.3574, "step": 7500 }, { "epoch": 11.0, "eval_explained_variance": 0.35241732001304626, "eval_loss": 0.34479108452796936, "eval_mae": 0.17209044098854065, "eval_r2": 0.3498174250125885, "eval_rmse": 0.25417372584342957, "eval_runtime": 93.2158, "eval_samples_per_second": 41.238, "eval_steps_per_second": 2.585, "learning_rate": 0.001, "step": 7931 }, { "epoch": 11.095700416088766, "grad_norm": 0.23666760325431824, "learning_rate": 0.001, "loss": 0.3548, "step": 8000 }, { "epoch": 11.789181692094314, "grad_norm": 0.17148445546627045, "learning_rate": 0.001, "loss": 0.3549, "step": 8500 }, { "epoch": 12.0, "eval_explained_variance": 0.3604218363761902, "eval_loss": 0.3415849804878235, "eval_mae": 0.17670249938964844, "eval_r2": 0.35767847299575806, "eval_rmse": 0.2527333199977875, "eval_runtime": 92.5773, "eval_samples_per_second": 41.522, "eval_steps_per_second": 2.603, "learning_rate": 0.001, "step": 8652 }, { "epoch": 12.482662968099861, "grad_norm": 0.2034599930047989, "learning_rate": 0.001, "loss": 0.354, "step": 9000 }, { "epoch": 13.0, "eval_explained_variance": 0.35451599955558777, "eval_loss": 0.33990854024887085, "eval_mae": 0.16771164536476135, "eval_r2": 0.3523372411727905, "eval_rmse": 0.25265443325042725, "eval_runtime": 91.5276, "eval_samples_per_second": 41.998, "eval_steps_per_second": 2.633, "learning_rate": 0.001, "step": 9373 }, { "epoch": 13.176144244105409, "grad_norm": 0.21185149252414703, "learning_rate": 0.001, "loss": 0.3555, "step": 9500 }, { "epoch": 13.869625520110956, "grad_norm": 0.24503760039806366, "learning_rate": 0.001, "loss": 0.3566, "step": 10000 }, { "epoch": 14.0, "eval_explained_variance": 0.34794220328330994, "eval_loss": 0.34520208835601807, "eval_mae": 0.1745852380990982, "eval_r2": 0.34426644444465637, "eval_rmse": 0.25396791100502014, "eval_runtime": 93.4248, "eval_samples_per_second": 41.145, "eval_steps_per_second": 2.58, "learning_rate": 0.001, "step": 10094 }, { "epoch": 14.563106796116505, "grad_norm": 0.29965028166770935, "learning_rate": 0.001, "loss": 0.3553, "step": 10500 }, { "epoch": 15.0, "eval_explained_variance": 0.3463059067726135, "eval_loss": 0.34849879145622253, "eval_mae": 0.18007972836494446, "eval_r2": 0.3333212435245514, "eval_rmse": 0.2568492293357849, "eval_runtime": 100.4245, "eval_samples_per_second": 38.278, "eval_steps_per_second": 2.4, "learning_rate": 0.001, "step": 10815 }, { "epoch": 15.256588072122053, "grad_norm": 0.26795288920402527, "learning_rate": 0.001, "loss": 0.3571, "step": 11000 }, { "epoch": 15.9500693481276, "grad_norm": 0.19320347905158997, "learning_rate": 0.001, "loss": 0.3536, "step": 11500 }, { "epoch": 16.0, "eval_explained_variance": 0.34989917278289795, "eval_loss": 0.34347954392433167, "eval_mae": 0.17179666459560394, "eval_r2": 0.34726396203041077, "eval_rmse": 0.2537030875682831, "eval_runtime": 103.999, "eval_samples_per_second": 36.962, "eval_steps_per_second": 2.317, "learning_rate": 0.001, "step": 11536 }, { "epoch": 16.643550624133148, "grad_norm": 0.19884039461612701, "learning_rate": 0.0001, "loss": 0.3518, "step": 12000 }, { "epoch": 17.0, "eval_explained_variance": 0.366793692111969, "eval_loss": 0.341246634721756, "eval_mae": 0.17114569246768951, "eval_r2": 0.36331599950790405, "eval_rmse": 0.25078731775283813, "eval_runtime": 92.1763, "eval_samples_per_second": 41.703, "eval_steps_per_second": 2.615, "learning_rate": 0.0001, "step": 12257 }, { "epoch": 17.337031900138697, "grad_norm": 0.19435305893421173, "learning_rate": 0.0001, "loss": 0.3475, "step": 12500 }, { "epoch": 18.0, "eval_explained_variance": 0.36558443307876587, "eval_loss": 0.3398562967777252, "eval_mae": 0.17082427442073822, "eval_r2": 0.36493533849716187, "eval_rmse": 0.25065672397613525, "eval_runtime": 98.7078, "eval_samples_per_second": 38.943, "eval_steps_per_second": 2.442, "learning_rate": 0.0001, "step": 12978 }, { "epoch": 18.030513176144243, "grad_norm": 0.2573840022087097, "learning_rate": 0.0001, "loss": 0.3449, "step": 13000 }, { "epoch": 18.723994452149793, "grad_norm": 0.21889527142047882, "learning_rate": 0.0001, "loss": 0.347, "step": 13500 }, { "epoch": 19.0, "eval_explained_variance": 0.3787304162979126, "eval_loss": 0.3332718312740326, "eval_mae": 0.16749390959739685, "eval_r2": 0.3774765133857727, "eval_rmse": 0.24829652905464172, "eval_runtime": 108.1714, "eval_samples_per_second": 35.536, "eval_steps_per_second": 2.228, "learning_rate": 0.0001, "step": 13699 }, { "epoch": 19.41747572815534, "grad_norm": 0.3126258850097656, "learning_rate": 0.0001, "loss": 0.3445, "step": 14000 }, { "epoch": 20.0, "eval_explained_variance": 0.3822024166584015, "eval_loss": 0.333162784576416, "eval_mae": 0.16876617074012756, "eval_r2": 0.3809906542301178, "eval_rmse": 0.2478322684764862, "eval_runtime": 93.5874, "eval_samples_per_second": 41.074, "eval_steps_per_second": 2.575, "learning_rate": 0.0001, "step": 14420 }, { "epoch": 20.110957004160888, "grad_norm": 0.25440576672554016, "learning_rate": 0.0001, "loss": 0.3474, "step": 14500 }, { "epoch": 20.804438280166437, "grad_norm": 0.23914161324501038, "learning_rate": 0.0001, "loss": 0.3447, "step": 15000 }, { "epoch": 21.0, "eval_explained_variance": 0.38327154517173767, "eval_loss": 0.3324449062347412, "eval_mae": 0.16733573377132416, "eval_r2": 0.38100260496139526, "eval_rmse": 0.24757729470729828, "eval_runtime": 97.5169, "eval_samples_per_second": 39.419, "eval_steps_per_second": 2.471, "learning_rate": 0.0001, "step": 15141 }, { "epoch": 21.497919556171983, "grad_norm": 0.23553606867790222, "learning_rate": 0.0001, "loss": 0.3445, "step": 15500 }, { "epoch": 22.0, "eval_explained_variance": 0.38486921787261963, "eval_loss": 0.3320053517818451, "eval_mae": 0.16713765263557434, "eval_r2": 0.38355034589767456, "eval_rmse": 0.2471724897623062, "eval_runtime": 95.4307, "eval_samples_per_second": 40.281, "eval_steps_per_second": 2.525, "learning_rate": 0.0001, "step": 15862 }, { "epoch": 22.191400832177532, "grad_norm": 0.23152843117713928, "learning_rate": 0.0001, "loss": 0.3441, "step": 16000 }, { "epoch": 22.884882108183078, "grad_norm": 0.23495590686798096, "learning_rate": 0.0001, "loss": 0.3398, "step": 16500 }, { "epoch": 23.0, "eval_explained_variance": 0.39000746607780457, "eval_loss": 0.3301050662994385, "eval_mae": 0.16577981412410736, "eval_r2": 0.3889786899089813, "eval_rmse": 0.24611681699752808, "eval_runtime": 100.934, "eval_samples_per_second": 38.084, "eval_steps_per_second": 2.388, "learning_rate": 0.0001, "step": 16583 }, { "epoch": 23.578363384188627, "grad_norm": 0.24516963958740234, "learning_rate": 0.0001, "loss": 0.3417, "step": 17000 }, { "epoch": 24.0, "eval_explained_variance": 0.3905264139175415, "eval_loss": 0.3298528492450714, "eval_mae": 0.16478358209133148, "eval_r2": 0.3899492621421814, "eval_rmse": 0.24583497643470764, "eval_runtime": 96.7182, "eval_samples_per_second": 39.744, "eval_steps_per_second": 2.492, "learning_rate": 0.0001, "step": 17304 }, { "epoch": 24.271844660194176, "grad_norm": 0.2366987019777298, "learning_rate": 0.0001, "loss": 0.3394, "step": 17500 }, { "epoch": 24.965325936199722, "grad_norm": 0.22849540412425995, "learning_rate": 0.0001, "loss": 0.3406, "step": 18000 }, { "epoch": 25.0, "eval_explained_variance": 0.39103788137435913, "eval_loss": 0.32962867617607117, "eval_mae": 0.1640922725200653, "eval_r2": 0.3903038799762726, "eval_rmse": 0.2458016723394394, "eval_runtime": 94.6291, "eval_samples_per_second": 40.622, "eval_steps_per_second": 2.547, "learning_rate": 0.0001, "step": 18025 }, { "epoch": 25.65880721220527, "grad_norm": 0.24230694770812988, "learning_rate": 0.0001, "loss": 0.3381, "step": 18500 }, { "epoch": 26.0, "eval_explained_variance": 0.39299964904785156, "eval_loss": 0.32889437675476074, "eval_mae": 0.1631881594657898, "eval_r2": 0.3926166296005249, "eval_rmse": 0.24544604122638702, "eval_runtime": 93.5737, "eval_samples_per_second": 41.08, "eval_steps_per_second": 2.576, "learning_rate": 0.0001, "step": 18746 }, { "epoch": 26.352288488210817, "grad_norm": 0.2334737330675125, "learning_rate": 0.0001, "loss": 0.3399, "step": 19000 }, { "epoch": 27.0, "eval_explained_variance": 0.39082765579223633, "eval_loss": 0.33042922616004944, "eval_mae": 0.1674499809741974, "eval_r2": 0.38912835717201233, "eval_rmse": 0.2460869997739792, "eval_runtime": 90.4822, "eval_samples_per_second": 42.483, "eval_steps_per_second": 2.664, "learning_rate": 0.0001, "step": 19467 }, { "epoch": 27.045769764216367, "grad_norm": 0.23623766005039215, "learning_rate": 0.0001, "loss": 0.339, "step": 19500 }, { "epoch": 27.739251040221912, "grad_norm": 0.237477108836174, "learning_rate": 0.0001, "loss": 0.3377, "step": 20000 }, { "epoch": 28.0, "eval_explained_variance": 0.3972352147102356, "eval_loss": 0.32880541682243347, "eval_mae": 0.16454365849494934, "eval_r2": 0.3955116868019104, "eval_rmse": 0.24511639773845673, "eval_runtime": 89.1819, "eval_samples_per_second": 43.103, "eval_steps_per_second": 2.702, "learning_rate": 0.0001, "step": 20188 }, { "epoch": 28.43273231622746, "grad_norm": 0.23042118549346924, "learning_rate": 0.0001, "loss": 0.3384, "step": 20500 }, { "epoch": 29.0, "eval_explained_variance": 0.39730560779571533, "eval_loss": 0.3293789327144623, "eval_mae": 0.16559576988220215, "eval_r2": 0.3961379826068878, "eval_rmse": 0.24510112404823303, "eval_runtime": 88.0385, "eval_samples_per_second": 43.663, "eval_steps_per_second": 2.737, "learning_rate": 0.0001, "step": 20909 }, { "epoch": 29.12621359223301, "grad_norm": 0.4347997307777405, "learning_rate": 0.0001, "loss": 0.3396, "step": 21000 }, { "epoch": 29.819694868238557, "grad_norm": 0.38524329662323, "learning_rate": 0.0001, "loss": 0.3372, "step": 21500 }, { "epoch": 30.0, "eval_explained_variance": 0.3955426812171936, "eval_loss": 0.33135533332824707, "eval_mae": 0.16844958066940308, "eval_r2": 0.3913615643978119, "eval_rmse": 0.24635259807109833, "eval_runtime": 91.08, "eval_samples_per_second": 42.205, "eval_steps_per_second": 2.646, "learning_rate": 0.0001, "step": 21630 }, { "epoch": 30.513176144244106, "grad_norm": 0.3073582947254181, "learning_rate": 0.0001, "loss": 0.3375, "step": 22000 }, { "epoch": 31.0, "eval_explained_variance": 0.3935950696468353, "eval_loss": 0.32911789417266846, "eval_mae": 0.16081956028938293, "eval_r2": 0.3904249966144562, "eval_rmse": 0.24574027955532074, "eval_runtime": 90.421, "eval_samples_per_second": 42.512, "eval_steps_per_second": 2.665, "learning_rate": 0.0001, "step": 22351 }, { "epoch": 31.206657420249652, "grad_norm": 0.31049737334251404, "learning_rate": 0.0001, "loss": 0.339, "step": 22500 }, { "epoch": 31.9001386962552, "grad_norm": 0.40785181522369385, "learning_rate": 0.0001, "loss": 0.3373, "step": 23000 }, { "epoch": 32.0, "eval_explained_variance": 0.3971378207206726, "eval_loss": 0.3289436399936676, "eval_mae": 0.16307200491428375, "eval_r2": 0.3959096372127533, "eval_rmse": 0.24528969824314117, "eval_runtime": 92.2146, "eval_samples_per_second": 41.685, "eval_steps_per_second": 2.613, "learning_rate": 0.0001, "step": 23072 }, { "epoch": 32.59361997226075, "grad_norm": 0.3135410249233246, "learning_rate": 0.0001, "loss": 0.3362, "step": 23500 }, { "epoch": 33.0, "eval_explained_variance": 0.3988523483276367, "eval_loss": 0.3271527588367462, "eval_mae": 0.16276519000530243, "eval_r2": 0.3971821069717407, "eval_rmse": 0.24443718791007996, "eval_runtime": 89.5278, "eval_samples_per_second": 42.936, "eval_steps_per_second": 2.692, "learning_rate": 0.0001, "step": 23793 }, { "epoch": 33.287101248266296, "grad_norm": 0.22212661802768707, "learning_rate": 0.0001, "loss": 0.337, "step": 24000 }, { "epoch": 33.980582524271846, "grad_norm": 0.25206565856933594, "learning_rate": 0.0001, "loss": 0.3371, "step": 24500 }, { "epoch": 34.0, "eval_explained_variance": 0.39812082052230835, "eval_loss": 0.32699429988861084, "eval_mae": 0.16210177540779114, "eval_r2": 0.39756229519844055, "eval_rmse": 0.24429556727409363, "eval_runtime": 90.0899, "eval_samples_per_second": 42.668, "eval_steps_per_second": 2.675, "learning_rate": 0.0001, "step": 24514 }, { "epoch": 34.674063800277395, "grad_norm": 0.27033254504203796, "learning_rate": 0.0001, "loss": 0.3342, "step": 25000 }, { "epoch": 35.0, "eval_explained_variance": 0.3996908366680145, "eval_loss": 0.32638314366340637, "eval_mae": 0.1614546775817871, "eval_r2": 0.39874374866485596, "eval_rmse": 0.24386192858219147, "eval_runtime": 88.9919, "eval_samples_per_second": 43.195, "eval_steps_per_second": 2.708, "learning_rate": 0.0001, "step": 25235 }, { "epoch": 35.36754507628294, "grad_norm": 0.3120824098587036, "learning_rate": 0.0001, "loss": 0.3367, "step": 25500 }, { "epoch": 36.0, "eval_explained_variance": 0.3955422639846802, "eval_loss": 0.3293066918849945, "eval_mae": 0.16555820405483246, "eval_r2": 0.39463794231414795, "eval_rmse": 0.24545253813266754, "eval_runtime": 91.9694, "eval_samples_per_second": 41.796, "eval_steps_per_second": 2.62, "learning_rate": 0.0001, "step": 25956 }, { "epoch": 36.061026352288486, "grad_norm": 0.3190695345401764, "learning_rate": 0.0001, "loss": 0.3386, "step": 26000 }, { "epoch": 36.754507628294036, "grad_norm": 0.2679268717765808, "learning_rate": 0.0001, "loss": 0.3363, "step": 26500 }, { "epoch": 37.0, "eval_explained_variance": 0.4032209515571594, "eval_loss": 0.3271186649799347, "eval_mae": 0.1597272753715515, "eval_r2": 0.39962705969810486, "eval_rmse": 0.24424488842487335, "eval_runtime": 88.1563, "eval_samples_per_second": 43.604, "eval_steps_per_second": 2.734, "learning_rate": 0.0001, "step": 26677 }, { "epoch": 37.447988904299585, "grad_norm": 0.2553563714027405, "learning_rate": 0.0001, "loss": 0.3357, "step": 27000 }, { "epoch": 38.0, "eval_explained_variance": 0.4041087031364441, "eval_loss": 0.32695677876472473, "eval_mae": 0.16126109659671783, "eval_r2": 0.402229368686676, "eval_rmse": 0.24366919696331024, "eval_runtime": 91.2875, "eval_samples_per_second": 42.109, "eval_steps_per_second": 2.64, "learning_rate": 0.0001, "step": 27398 }, { "epoch": 38.141470180305134, "grad_norm": 0.3718196451663971, "learning_rate": 0.0001, "loss": 0.3348, "step": 27500 }, { "epoch": 38.83495145631068, "grad_norm": 0.30295658111572266, "learning_rate": 0.0001, "loss": 0.3377, "step": 28000 }, { "epoch": 39.0, "eval_explained_variance": 0.4026513695716858, "eval_loss": 0.33263665437698364, "eval_mae": 0.1575259119272232, "eval_r2": 0.4006690979003906, "eval_rmse": 0.24382774531841278, "eval_runtime": 93.1613, "eval_samples_per_second": 41.262, "eval_steps_per_second": 2.587, "learning_rate": 0.0001, "step": 28119 }, { "epoch": 39.528432732316226, "grad_norm": 0.3047119081020355, "learning_rate": 0.0001, "loss": 0.3354, "step": 28500 }, { "epoch": 40.0, "eval_explained_variance": 0.40179282426834106, "eval_loss": 0.33278176188468933, "eval_mae": 0.16507098078727722, "eval_r2": 0.4002922475337982, "eval_rmse": 0.2442423701286316, "eval_runtime": 98.0451, "eval_samples_per_second": 39.206, "eval_steps_per_second": 2.458, "learning_rate": 0.0001, "step": 28840 }, { "epoch": 40.221914008321775, "grad_norm": 0.2754528522491455, "learning_rate": 0.0001, "loss": 0.3367, "step": 29000 }, { "epoch": 40.915395284327325, "grad_norm": 0.5425918102264404, "learning_rate": 0.0001, "loss": 0.3363, "step": 29500 }, { "epoch": 41.0, "eval_explained_variance": 0.4044828712940216, "eval_loss": 0.33069443702697754, "eval_mae": 0.16271242499351501, "eval_r2": 0.4031254053115845, "eval_rmse": 0.24350450932979584, "eval_runtime": 93.471, "eval_samples_per_second": 41.125, "eval_steps_per_second": 2.578, "learning_rate": 0.0001, "step": 29561 }, { "epoch": 41.608876560332874, "grad_norm": 0.4664023518562317, "learning_rate": 1e-05, "loss": 0.335, "step": 30000 }, { "epoch": 42.0, "eval_explained_variance": 0.4039740562438965, "eval_loss": 0.3310275375843048, "eval_mae": 0.1640516221523285, "eval_r2": 0.40303775668144226, "eval_rmse": 0.24363353848457336, "eval_runtime": 91.0645, "eval_samples_per_second": 42.212, "eval_steps_per_second": 2.646, "learning_rate": 1e-05, "step": 30282 }, { "epoch": 42.302357836338416, "grad_norm": 0.30786848068237305, "learning_rate": 1e-05, "loss": 0.3348, "step": 30500 }, { "epoch": 42.995839112343965, "grad_norm": 0.3475857079029083, "learning_rate": 1e-05, "loss": 0.334, "step": 31000 }, { "epoch": 43.0, "eval_explained_variance": 0.4058697521686554, "eval_loss": 0.32956016063690186, "eval_mae": 0.16028979420661926, "eval_r2": 0.40524527430534363, "eval_rmse": 0.2429088056087494, "eval_runtime": 91.4351, "eval_samples_per_second": 42.041, "eval_steps_per_second": 2.636, "learning_rate": 1e-05, "step": 31003 }, { "epoch": 43.689320388349515, "grad_norm": 0.38462796807289124, "learning_rate": 1e-05, "loss": 0.3366, "step": 31500 }, { "epoch": 44.0, "eval_explained_variance": 0.40545278787612915, "eval_loss": 0.33022987842559814, "eval_mae": 0.16247908771038055, "eval_r2": 0.4038069546222687, "eval_rmse": 0.2432354837656021, "eval_runtime": 95.2656, "eval_samples_per_second": 40.35, "eval_steps_per_second": 2.53, "learning_rate": 1e-05, "step": 31724 }, { "epoch": 44.382801664355064, "grad_norm": 0.3637019395828247, "learning_rate": 1e-05, "loss": 0.3326, "step": 32000 }, { "epoch": 45.0, "eval_explained_variance": 0.405474990606308, "eval_loss": 0.3266430199146271, "eval_mae": 0.16172955930233002, "eval_r2": 0.4047379195690155, "eval_rmse": 0.24298621714115143, "eval_runtime": 92.749, "eval_samples_per_second": 41.445, "eval_steps_per_second": 2.598, "learning_rate": 1e-05, "step": 32445 }, { "epoch": 45.0, "learning_rate": 1e-05, "step": 32445, "total_flos": 7.674132153670687e+19, "train_loss": 0.3477805222434721, "train_runtime": 18758.0673, "train_samples_per_second": 92.136, "train_steps_per_second": 5.766 } ], "logging_steps": 500, "max_steps": 108150, "num_input_tokens_seen": 0, "num_train_epochs": 150, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.674132153670687e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }