drone-DinoVdeau-produttoria-probabilities-large-2024_11_06-batch-size16_freeze_probs
/
trainer_state.json
{ | |
"best_metric": 0.32638314366340637, | |
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-produttoria-probabilities-large-2024_11_06-batch-size16_freeze_probs/checkpoint-25235", | |
"epoch": 45.0, | |
"eval_steps": 500, | |
"global_step": 32445, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.6934812760055479, | |
"grad_norm": 0.6020499467849731, | |
"learning_rate": 0.001, | |
"loss": 0.4549, | |
"step": 500 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_explained_variance": 0.28087472915649414, | |
"eval_loss": 0.36246591806411743, | |
"eval_mae": 0.18796318769454956, | |
"eval_r2": 0.2743779420852661, | |
"eval_rmse": 0.26685553789138794, | |
"eval_runtime": 84.6117, | |
"eval_samples_per_second": 45.431, | |
"eval_steps_per_second": 2.848, | |
"learning_rate": 0.001, | |
"step": 721 | |
}, | |
{ | |
"epoch": 1.3869625520110958, | |
"grad_norm": 0.48873767256736755, | |
"learning_rate": 0.001, | |
"loss": 0.3806, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 2.0, | |
"eval_explained_variance": 0.339480996131897, | |
"eval_loss": 0.3457428216934204, | |
"eval_mae": 0.16845344007015228, | |
"eval_r2": 0.3366556167602539, | |
"eval_rmse": 0.25599098205566406, | |
"eval_runtime": 81.9342, | |
"eval_samples_per_second": 46.916, | |
"eval_steps_per_second": 2.941, | |
"learning_rate": 0.001, | |
"step": 1442 | |
}, | |
{ | |
"epoch": 2.0804438280166435, | |
"grad_norm": 0.4343818128108978, | |
"learning_rate": 0.001, | |
"loss": 0.3697, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 2.7739251040221915, | |
"grad_norm": 0.44139501452445984, | |
"learning_rate": 0.001, | |
"loss": 0.368, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 3.0, | |
"eval_explained_variance": 0.3180062472820282, | |
"eval_loss": 0.3518487811088562, | |
"eval_mae": 0.17466770112514496, | |
"eval_r2": 0.3157402276992798, | |
"eval_rmse": 0.25968268513679504, | |
"eval_runtime": 85.9096, | |
"eval_samples_per_second": 44.745, | |
"eval_steps_per_second": 2.805, | |
"learning_rate": 0.001, | |
"step": 2163 | |
}, | |
{ | |
"epoch": 3.4674063800277395, | |
"grad_norm": 0.4190344512462616, | |
"learning_rate": 0.001, | |
"loss": 0.3637, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_explained_variance": 0.3375174403190613, | |
"eval_loss": 0.3507988750934601, | |
"eval_mae": 0.17512458562850952, | |
"eval_r2": 0.3344818949699402, | |
"eval_rmse": 0.2562903165817261, | |
"eval_runtime": 82.3152, | |
"eval_samples_per_second": 46.699, | |
"eval_steps_per_second": 2.928, | |
"learning_rate": 0.001, | |
"step": 2884 | |
}, | |
{ | |
"epoch": 4.160887656033287, | |
"grad_norm": 0.46223729848861694, | |
"learning_rate": 0.001, | |
"loss": 0.3604, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 4.854368932038835, | |
"grad_norm": 0.397588312625885, | |
"learning_rate": 0.001, | |
"loss": 0.36, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 5.0, | |
"eval_explained_variance": 0.3404175043106079, | |
"eval_loss": 0.3436409533023834, | |
"eval_mae": 0.16958864033222198, | |
"eval_r2": 0.33709797263145447, | |
"eval_rmse": 0.25463980436325073, | |
"eval_runtime": 87.0977, | |
"eval_samples_per_second": 44.134, | |
"eval_steps_per_second": 2.767, | |
"learning_rate": 0.001, | |
"step": 3605 | |
}, | |
{ | |
"epoch": 5.547850208044383, | |
"grad_norm": 0.3559507429599762, | |
"learning_rate": 0.001, | |
"loss": 0.3585, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 6.0, | |
"eval_explained_variance": 0.321065753698349, | |
"eval_loss": 0.35096481442451477, | |
"eval_mae": 0.17673969268798828, | |
"eval_r2": 0.31747376918792725, | |
"eval_rmse": 0.25984567403793335, | |
"eval_runtime": 88.6251, | |
"eval_samples_per_second": 43.374, | |
"eval_steps_per_second": 2.719, | |
"learning_rate": 0.001, | |
"step": 4326 | |
}, | |
{ | |
"epoch": 6.2413314840499305, | |
"grad_norm": 0.3629322946071625, | |
"learning_rate": 0.001, | |
"loss": 0.3617, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 6.934812760055479, | |
"grad_norm": 0.26215413212776184, | |
"learning_rate": 0.001, | |
"loss": 0.3581, | |
"step": 5000 | |
}, | |
{ | |
"epoch": 7.0, | |
"eval_explained_variance": 0.35101062059402466, | |
"eval_loss": 0.3412320613861084, | |
"eval_mae": 0.1749519258737564, | |
"eval_r2": 0.3471425771713257, | |
"eval_rmse": 0.2537590265274048, | |
"eval_runtime": 86.9964, | |
"eval_samples_per_second": 44.186, | |
"eval_steps_per_second": 2.77, | |
"learning_rate": 0.001, | |
"step": 5047 | |
}, | |
{ | |
"epoch": 7.6282940360610265, | |
"grad_norm": 0.23537978529930115, | |
"learning_rate": 0.001, | |
"loss": 0.3601, | |
"step": 5500 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_explained_variance": 0.3551764190196991, | |
"eval_loss": 0.3456409275531769, | |
"eval_mae": 0.167846217751503, | |
"eval_r2": 0.34348151087760925, | |
"eval_rmse": 0.25611478090286255, | |
"eval_runtime": 87.0061, | |
"eval_samples_per_second": 44.181, | |
"eval_steps_per_second": 2.77, | |
"learning_rate": 0.001, | |
"step": 5768 | |
}, | |
{ | |
"epoch": 8.321775312066574, | |
"grad_norm": 0.24255254864692688, | |
"learning_rate": 0.001, | |
"loss": 0.3619, | |
"step": 6000 | |
}, | |
{ | |
"epoch": 9.0, | |
"eval_explained_variance": 0.3427416980266571, | |
"eval_loss": 0.3425351679325104, | |
"eval_mae": 0.17405511438846588, | |
"eval_r2": 0.3409159481525421, | |
"eval_rmse": 0.25445955991744995, | |
"eval_runtime": 87.2735, | |
"eval_samples_per_second": 44.045, | |
"eval_steps_per_second": 2.761, | |
"learning_rate": 0.001, | |
"step": 6489 | |
}, | |
{ | |
"epoch": 9.015256588072122, | |
"grad_norm": 0.2687969207763672, | |
"learning_rate": 0.001, | |
"loss": 0.3527, | |
"step": 6500 | |
}, | |
{ | |
"epoch": 9.70873786407767, | |
"grad_norm": 0.22893770039081573, | |
"learning_rate": 0.001, | |
"loss": 0.355, | |
"step": 7000 | |
}, | |
{ | |
"epoch": 10.0, | |
"eval_explained_variance": 0.36017459630966187, | |
"eval_loss": 0.33964109420776367, | |
"eval_mae": 0.1710653305053711, | |
"eval_r2": 0.3582787811756134, | |
"eval_rmse": 0.25249695777893066, | |
"eval_runtime": 92.8944, | |
"eval_samples_per_second": 41.38, | |
"eval_steps_per_second": 2.594, | |
"learning_rate": 0.001, | |
"step": 7210 | |
}, | |
{ | |
"epoch": 10.402219140083218, | |
"grad_norm": 0.2397356480360031, | |
"learning_rate": 0.001, | |
"loss": 0.3574, | |
"step": 7500 | |
}, | |
{ | |
"epoch": 11.0, | |
"eval_explained_variance": 0.35241732001304626, | |
"eval_loss": 0.34479108452796936, | |
"eval_mae": 0.17209044098854065, | |
"eval_r2": 0.3498174250125885, | |
"eval_rmse": 0.25417372584342957, | |
"eval_runtime": 93.2158, | |
"eval_samples_per_second": 41.238, | |
"eval_steps_per_second": 2.585, | |
"learning_rate": 0.001, | |
"step": 7931 | |
}, | |
{ | |
"epoch": 11.095700416088766, | |
"grad_norm": 0.23666760325431824, | |
"learning_rate": 0.001, | |
"loss": 0.3548, | |
"step": 8000 | |
}, | |
{ | |
"epoch": 11.789181692094314, | |
"grad_norm": 0.17148445546627045, | |
"learning_rate": 0.001, | |
"loss": 0.3549, | |
"step": 8500 | |
}, | |
{ | |
"epoch": 12.0, | |
"eval_explained_variance": 0.3604218363761902, | |
"eval_loss": 0.3415849804878235, | |
"eval_mae": 0.17670249938964844, | |
"eval_r2": 0.35767847299575806, | |
"eval_rmse": 0.2527333199977875, | |
"eval_runtime": 92.5773, | |
"eval_samples_per_second": 41.522, | |
"eval_steps_per_second": 2.603, | |
"learning_rate": 0.001, | |
"step": 8652 | |
}, | |
{ | |
"epoch": 12.482662968099861, | |
"grad_norm": 0.2034599930047989, | |
"learning_rate": 0.001, | |
"loss": 0.354, | |
"step": 9000 | |
}, | |
{ | |
"epoch": 13.0, | |
"eval_explained_variance": 0.35451599955558777, | |
"eval_loss": 0.33990854024887085, | |
"eval_mae": 0.16771164536476135, | |
"eval_r2": 0.3523372411727905, | |
"eval_rmse": 0.25265443325042725, | |
"eval_runtime": 91.5276, | |
"eval_samples_per_second": 41.998, | |
"eval_steps_per_second": 2.633, | |
"learning_rate": 0.001, | |
"step": 9373 | |
}, | |
{ | |
"epoch": 13.176144244105409, | |
"grad_norm": 0.21185149252414703, | |
"learning_rate": 0.001, | |
"loss": 0.3555, | |
"step": 9500 | |
}, | |
{ | |
"epoch": 13.869625520110956, | |
"grad_norm": 0.24503760039806366, | |
"learning_rate": 0.001, | |
"loss": 0.3566, | |
"step": 10000 | |
}, | |
{ | |
"epoch": 14.0, | |
"eval_explained_variance": 0.34794220328330994, | |
"eval_loss": 0.34520208835601807, | |
"eval_mae": 0.1745852380990982, | |
"eval_r2": 0.34426644444465637, | |
"eval_rmse": 0.25396791100502014, | |
"eval_runtime": 93.4248, | |
"eval_samples_per_second": 41.145, | |
"eval_steps_per_second": 2.58, | |
"learning_rate": 0.001, | |
"step": 10094 | |
}, | |
{ | |
"epoch": 14.563106796116505, | |
"grad_norm": 0.29965028166770935, | |
"learning_rate": 0.001, | |
"loss": 0.3553, | |
"step": 10500 | |
}, | |
{ | |
"epoch": 15.0, | |
"eval_explained_variance": 0.3463059067726135, | |
"eval_loss": 0.34849879145622253, | |
"eval_mae": 0.18007972836494446, | |
"eval_r2": 0.3333212435245514, | |
"eval_rmse": 0.2568492293357849, | |
"eval_runtime": 100.4245, | |
"eval_samples_per_second": 38.278, | |
"eval_steps_per_second": 2.4, | |
"learning_rate": 0.001, | |
"step": 10815 | |
}, | |
{ | |
"epoch": 15.256588072122053, | |
"grad_norm": 0.26795288920402527, | |
"learning_rate": 0.001, | |
"loss": 0.3571, | |
"step": 11000 | |
}, | |
{ | |
"epoch": 15.9500693481276, | |
"grad_norm": 0.19320347905158997, | |
"learning_rate": 0.001, | |
"loss": 0.3536, | |
"step": 11500 | |
}, | |
{ | |
"epoch": 16.0, | |
"eval_explained_variance": 0.34989917278289795, | |
"eval_loss": 0.34347954392433167, | |
"eval_mae": 0.17179666459560394, | |
"eval_r2": 0.34726396203041077, | |
"eval_rmse": 0.2537030875682831, | |
"eval_runtime": 103.999, | |
"eval_samples_per_second": 36.962, | |
"eval_steps_per_second": 2.317, | |
"learning_rate": 0.001, | |
"step": 11536 | |
}, | |
{ | |
"epoch": 16.643550624133148, | |
"grad_norm": 0.19884039461612701, | |
"learning_rate": 0.0001, | |
"loss": 0.3518, | |
"step": 12000 | |
}, | |
{ | |
"epoch": 17.0, | |
"eval_explained_variance": 0.366793692111969, | |
"eval_loss": 0.341246634721756, | |
"eval_mae": 0.17114569246768951, | |
"eval_r2": 0.36331599950790405, | |
"eval_rmse": 0.25078731775283813, | |
"eval_runtime": 92.1763, | |
"eval_samples_per_second": 41.703, | |
"eval_steps_per_second": 2.615, | |
"learning_rate": 0.0001, | |
"step": 12257 | |
}, | |
{ | |
"epoch": 17.337031900138697, | |
"grad_norm": 0.19435305893421173, | |
"learning_rate": 0.0001, | |
"loss": 0.3475, | |
"step": 12500 | |
}, | |
{ | |
"epoch": 18.0, | |
"eval_explained_variance": 0.36558443307876587, | |
"eval_loss": 0.3398562967777252, | |
"eval_mae": 0.17082427442073822, | |
"eval_r2": 0.36493533849716187, | |
"eval_rmse": 0.25065672397613525, | |
"eval_runtime": 98.7078, | |
"eval_samples_per_second": 38.943, | |
"eval_steps_per_second": 2.442, | |
"learning_rate": 0.0001, | |
"step": 12978 | |
}, | |
{ | |
"epoch": 18.030513176144243, | |
"grad_norm": 0.2573840022087097, | |
"learning_rate": 0.0001, | |
"loss": 0.3449, | |
"step": 13000 | |
}, | |
{ | |
"epoch": 18.723994452149793, | |
"grad_norm": 0.21889527142047882, | |
"learning_rate": 0.0001, | |
"loss": 0.347, | |
"step": 13500 | |
}, | |
{ | |
"epoch": 19.0, | |
"eval_explained_variance": 0.3787304162979126, | |
"eval_loss": 0.3332718312740326, | |
"eval_mae": 0.16749390959739685, | |
"eval_r2": 0.3774765133857727, | |
"eval_rmse": 0.24829652905464172, | |
"eval_runtime": 108.1714, | |
"eval_samples_per_second": 35.536, | |
"eval_steps_per_second": 2.228, | |
"learning_rate": 0.0001, | |
"step": 13699 | |
}, | |
{ | |
"epoch": 19.41747572815534, | |
"grad_norm": 0.3126258850097656, | |
"learning_rate": 0.0001, | |
"loss": 0.3445, | |
"step": 14000 | |
}, | |
{ | |
"epoch": 20.0, | |
"eval_explained_variance": 0.3822024166584015, | |
"eval_loss": 0.333162784576416, | |
"eval_mae": 0.16876617074012756, | |
"eval_r2": 0.3809906542301178, | |
"eval_rmse": 0.2478322684764862, | |
"eval_runtime": 93.5874, | |
"eval_samples_per_second": 41.074, | |
"eval_steps_per_second": 2.575, | |
"learning_rate": 0.0001, | |
"step": 14420 | |
}, | |
{ | |
"epoch": 20.110957004160888, | |
"grad_norm": 0.25440576672554016, | |
"learning_rate": 0.0001, | |
"loss": 0.3474, | |
"step": 14500 | |
}, | |
{ | |
"epoch": 20.804438280166437, | |
"grad_norm": 0.23914161324501038, | |
"learning_rate": 0.0001, | |
"loss": 0.3447, | |
"step": 15000 | |
}, | |
{ | |
"epoch": 21.0, | |
"eval_explained_variance": 0.38327154517173767, | |
"eval_loss": 0.3324449062347412, | |
"eval_mae": 0.16733573377132416, | |
"eval_r2": 0.38100260496139526, | |
"eval_rmse": 0.24757729470729828, | |
"eval_runtime": 97.5169, | |
"eval_samples_per_second": 39.419, | |
"eval_steps_per_second": 2.471, | |
"learning_rate": 0.0001, | |
"step": 15141 | |
}, | |
{ | |
"epoch": 21.497919556171983, | |
"grad_norm": 0.23553606867790222, | |
"learning_rate": 0.0001, | |
"loss": 0.3445, | |
"step": 15500 | |
}, | |
{ | |
"epoch": 22.0, | |
"eval_explained_variance": 0.38486921787261963, | |
"eval_loss": 0.3320053517818451, | |
"eval_mae": 0.16713765263557434, | |
"eval_r2": 0.38355034589767456, | |
"eval_rmse": 0.2471724897623062, | |
"eval_runtime": 95.4307, | |
"eval_samples_per_second": 40.281, | |
"eval_steps_per_second": 2.525, | |
"learning_rate": 0.0001, | |
"step": 15862 | |
}, | |
{ | |
"epoch": 22.191400832177532, | |
"grad_norm": 0.23152843117713928, | |
"learning_rate": 0.0001, | |
"loss": 0.3441, | |
"step": 16000 | |
}, | |
{ | |
"epoch": 22.884882108183078, | |
"grad_norm": 0.23495590686798096, | |
"learning_rate": 0.0001, | |
"loss": 0.3398, | |
"step": 16500 | |
}, | |
{ | |
"epoch": 23.0, | |
"eval_explained_variance": 0.39000746607780457, | |
"eval_loss": 0.3301050662994385, | |
"eval_mae": 0.16577981412410736, | |
"eval_r2": 0.3889786899089813, | |
"eval_rmse": 0.24611681699752808, | |
"eval_runtime": 100.934, | |
"eval_samples_per_second": 38.084, | |
"eval_steps_per_second": 2.388, | |
"learning_rate": 0.0001, | |
"step": 16583 | |
}, | |
{ | |
"epoch": 23.578363384188627, | |
"grad_norm": 0.24516963958740234, | |
"learning_rate": 0.0001, | |
"loss": 0.3417, | |
"step": 17000 | |
}, | |
{ | |
"epoch": 24.0, | |
"eval_explained_variance": 0.3905264139175415, | |
"eval_loss": 0.3298528492450714, | |
"eval_mae": 0.16478358209133148, | |
"eval_r2": 0.3899492621421814, | |
"eval_rmse": 0.24583497643470764, | |
"eval_runtime": 96.7182, | |
"eval_samples_per_second": 39.744, | |
"eval_steps_per_second": 2.492, | |
"learning_rate": 0.0001, | |
"step": 17304 | |
}, | |
{ | |
"epoch": 24.271844660194176, | |
"grad_norm": 0.2366987019777298, | |
"learning_rate": 0.0001, | |
"loss": 0.3394, | |
"step": 17500 | |
}, | |
{ | |
"epoch": 24.965325936199722, | |
"grad_norm": 0.22849540412425995, | |
"learning_rate": 0.0001, | |
"loss": 0.3406, | |
"step": 18000 | |
}, | |
{ | |
"epoch": 25.0, | |
"eval_explained_variance": 0.39103788137435913, | |
"eval_loss": 0.32962867617607117, | |
"eval_mae": 0.1640922725200653, | |
"eval_r2": 0.3903038799762726, | |
"eval_rmse": 0.2458016723394394, | |
"eval_runtime": 94.6291, | |
"eval_samples_per_second": 40.622, | |
"eval_steps_per_second": 2.547, | |
"learning_rate": 0.0001, | |
"step": 18025 | |
}, | |
{ | |
"epoch": 25.65880721220527, | |
"grad_norm": 0.24230694770812988, | |
"learning_rate": 0.0001, | |
"loss": 0.3381, | |
"step": 18500 | |
}, | |
{ | |
"epoch": 26.0, | |
"eval_explained_variance": 0.39299964904785156, | |
"eval_loss": 0.32889437675476074, | |
"eval_mae": 0.1631881594657898, | |
"eval_r2": 0.3926166296005249, | |
"eval_rmse": 0.24544604122638702, | |
"eval_runtime": 93.5737, | |
"eval_samples_per_second": 41.08, | |
"eval_steps_per_second": 2.576, | |
"learning_rate": 0.0001, | |
"step": 18746 | |
}, | |
{ | |
"epoch": 26.352288488210817, | |
"grad_norm": 0.2334737330675125, | |
"learning_rate": 0.0001, | |
"loss": 0.3399, | |
"step": 19000 | |
}, | |
{ | |
"epoch": 27.0, | |
"eval_explained_variance": 0.39082765579223633, | |
"eval_loss": 0.33042922616004944, | |
"eval_mae": 0.1674499809741974, | |
"eval_r2": 0.38912835717201233, | |
"eval_rmse": 0.2460869997739792, | |
"eval_runtime": 90.4822, | |
"eval_samples_per_second": 42.483, | |
"eval_steps_per_second": 2.664, | |
"learning_rate": 0.0001, | |
"step": 19467 | |
}, | |
{ | |
"epoch": 27.045769764216367, | |
"grad_norm": 0.23623766005039215, | |
"learning_rate": 0.0001, | |
"loss": 0.339, | |
"step": 19500 | |
}, | |
{ | |
"epoch": 27.739251040221912, | |
"grad_norm": 0.237477108836174, | |
"learning_rate": 0.0001, | |
"loss": 0.3377, | |
"step": 20000 | |
}, | |
{ | |
"epoch": 28.0, | |
"eval_explained_variance": 0.3972352147102356, | |
"eval_loss": 0.32880541682243347, | |
"eval_mae": 0.16454365849494934, | |
"eval_r2": 0.3955116868019104, | |
"eval_rmse": 0.24511639773845673, | |
"eval_runtime": 89.1819, | |
"eval_samples_per_second": 43.103, | |
"eval_steps_per_second": 2.702, | |
"learning_rate": 0.0001, | |
"step": 20188 | |
}, | |
{ | |
"epoch": 28.43273231622746, | |
"grad_norm": 0.23042118549346924, | |
"learning_rate": 0.0001, | |
"loss": 0.3384, | |
"step": 20500 | |
}, | |
{ | |
"epoch": 29.0, | |
"eval_explained_variance": 0.39730560779571533, | |
"eval_loss": 0.3293789327144623, | |
"eval_mae": 0.16559576988220215, | |
"eval_r2": 0.3961379826068878, | |
"eval_rmse": 0.24510112404823303, | |
"eval_runtime": 88.0385, | |
"eval_samples_per_second": 43.663, | |
"eval_steps_per_second": 2.737, | |
"learning_rate": 0.0001, | |
"step": 20909 | |
}, | |
{ | |
"epoch": 29.12621359223301, | |
"grad_norm": 0.4347997307777405, | |
"learning_rate": 0.0001, | |
"loss": 0.3396, | |
"step": 21000 | |
}, | |
{ | |
"epoch": 29.819694868238557, | |
"grad_norm": 0.38524329662323, | |
"learning_rate": 0.0001, | |
"loss": 0.3372, | |
"step": 21500 | |
}, | |
{ | |
"epoch": 30.0, | |
"eval_explained_variance": 0.3955426812171936, | |
"eval_loss": 0.33135533332824707, | |
"eval_mae": 0.16844958066940308, | |
"eval_r2": 0.3913615643978119, | |
"eval_rmse": 0.24635259807109833, | |
"eval_runtime": 91.08, | |
"eval_samples_per_second": 42.205, | |
"eval_steps_per_second": 2.646, | |
"learning_rate": 0.0001, | |
"step": 21630 | |
}, | |
{ | |
"epoch": 30.513176144244106, | |
"grad_norm": 0.3073582947254181, | |
"learning_rate": 0.0001, | |
"loss": 0.3375, | |
"step": 22000 | |
}, | |
{ | |
"epoch": 31.0, | |
"eval_explained_variance": 0.3935950696468353, | |
"eval_loss": 0.32911789417266846, | |
"eval_mae": 0.16081956028938293, | |
"eval_r2": 0.3904249966144562, | |
"eval_rmse": 0.24574027955532074, | |
"eval_runtime": 90.421, | |
"eval_samples_per_second": 42.512, | |
"eval_steps_per_second": 2.665, | |
"learning_rate": 0.0001, | |
"step": 22351 | |
}, | |
{ | |
"epoch": 31.206657420249652, | |
"grad_norm": 0.31049737334251404, | |
"learning_rate": 0.0001, | |
"loss": 0.339, | |
"step": 22500 | |
}, | |
{ | |
"epoch": 31.9001386962552, | |
"grad_norm": 0.40785181522369385, | |
"learning_rate": 0.0001, | |
"loss": 0.3373, | |
"step": 23000 | |
}, | |
{ | |
"epoch": 32.0, | |
"eval_explained_variance": 0.3971378207206726, | |
"eval_loss": 0.3289436399936676, | |
"eval_mae": 0.16307200491428375, | |
"eval_r2": 0.3959096372127533, | |
"eval_rmse": 0.24528969824314117, | |
"eval_runtime": 92.2146, | |
"eval_samples_per_second": 41.685, | |
"eval_steps_per_second": 2.613, | |
"learning_rate": 0.0001, | |
"step": 23072 | |
}, | |
{ | |
"epoch": 32.59361997226075, | |
"grad_norm": 0.3135410249233246, | |
"learning_rate": 0.0001, | |
"loss": 0.3362, | |
"step": 23500 | |
}, | |
{ | |
"epoch": 33.0, | |
"eval_explained_variance": 0.3988523483276367, | |
"eval_loss": 0.3271527588367462, | |
"eval_mae": 0.16276519000530243, | |
"eval_r2": 0.3971821069717407, | |
"eval_rmse": 0.24443718791007996, | |
"eval_runtime": 89.5278, | |
"eval_samples_per_second": 42.936, | |
"eval_steps_per_second": 2.692, | |
"learning_rate": 0.0001, | |
"step": 23793 | |
}, | |
{ | |
"epoch": 33.287101248266296, | |
"grad_norm": 0.22212661802768707, | |
"learning_rate": 0.0001, | |
"loss": 0.337, | |
"step": 24000 | |
}, | |
{ | |
"epoch": 33.980582524271846, | |
"grad_norm": 0.25206565856933594, | |
"learning_rate": 0.0001, | |
"loss": 0.3371, | |
"step": 24500 | |
}, | |
{ | |
"epoch": 34.0, | |
"eval_explained_variance": 0.39812082052230835, | |
"eval_loss": 0.32699429988861084, | |
"eval_mae": 0.16210177540779114, | |
"eval_r2": 0.39756229519844055, | |
"eval_rmse": 0.24429556727409363, | |
"eval_runtime": 90.0899, | |
"eval_samples_per_second": 42.668, | |
"eval_steps_per_second": 2.675, | |
"learning_rate": 0.0001, | |
"step": 24514 | |
}, | |
{ | |
"epoch": 34.674063800277395, | |
"grad_norm": 0.27033254504203796, | |
"learning_rate": 0.0001, | |
"loss": 0.3342, | |
"step": 25000 | |
}, | |
{ | |
"epoch": 35.0, | |
"eval_explained_variance": 0.3996908366680145, | |
"eval_loss": 0.32638314366340637, | |
"eval_mae": 0.1614546775817871, | |
"eval_r2": 0.39874374866485596, | |
"eval_rmse": 0.24386192858219147, | |
"eval_runtime": 88.9919, | |
"eval_samples_per_second": 43.195, | |
"eval_steps_per_second": 2.708, | |
"learning_rate": 0.0001, | |
"step": 25235 | |
}, | |
{ | |
"epoch": 35.36754507628294, | |
"grad_norm": 0.3120824098587036, | |
"learning_rate": 0.0001, | |
"loss": 0.3367, | |
"step": 25500 | |
}, | |
{ | |
"epoch": 36.0, | |
"eval_explained_variance": 0.3955422639846802, | |
"eval_loss": 0.3293066918849945, | |
"eval_mae": 0.16555820405483246, | |
"eval_r2": 0.39463794231414795, | |
"eval_rmse": 0.24545253813266754, | |
"eval_runtime": 91.9694, | |
"eval_samples_per_second": 41.796, | |
"eval_steps_per_second": 2.62, | |
"learning_rate": 0.0001, | |
"step": 25956 | |
}, | |
{ | |
"epoch": 36.061026352288486, | |
"grad_norm": 0.3190695345401764, | |
"learning_rate": 0.0001, | |
"loss": 0.3386, | |
"step": 26000 | |
}, | |
{ | |
"epoch": 36.754507628294036, | |
"grad_norm": 0.2679268717765808, | |
"learning_rate": 0.0001, | |
"loss": 0.3363, | |
"step": 26500 | |
}, | |
{ | |
"epoch": 37.0, | |
"eval_explained_variance": 0.4032209515571594, | |
"eval_loss": 0.3271186649799347, | |
"eval_mae": 0.1597272753715515, | |
"eval_r2": 0.39962705969810486, | |
"eval_rmse": 0.24424488842487335, | |
"eval_runtime": 88.1563, | |
"eval_samples_per_second": 43.604, | |
"eval_steps_per_second": 2.734, | |
"learning_rate": 0.0001, | |
"step": 26677 | |
}, | |
{ | |
"epoch": 37.447988904299585, | |
"grad_norm": 0.2553563714027405, | |
"learning_rate": 0.0001, | |
"loss": 0.3357, | |
"step": 27000 | |
}, | |
{ | |
"epoch": 38.0, | |
"eval_explained_variance": 0.4041087031364441, | |
"eval_loss": 0.32695677876472473, | |
"eval_mae": 0.16126109659671783, | |
"eval_r2": 0.402229368686676, | |
"eval_rmse": 0.24366919696331024, | |
"eval_runtime": 91.2875, | |
"eval_samples_per_second": 42.109, | |
"eval_steps_per_second": 2.64, | |
"learning_rate": 0.0001, | |
"step": 27398 | |
}, | |
{ | |
"epoch": 38.141470180305134, | |
"grad_norm": 0.3718196451663971, | |
"learning_rate": 0.0001, | |
"loss": 0.3348, | |
"step": 27500 | |
}, | |
{ | |
"epoch": 38.83495145631068, | |
"grad_norm": 0.30295658111572266, | |
"learning_rate": 0.0001, | |
"loss": 0.3377, | |
"step": 28000 | |
}, | |
{ | |
"epoch": 39.0, | |
"eval_explained_variance": 0.4026513695716858, | |
"eval_loss": 0.33263665437698364, | |
"eval_mae": 0.1575259119272232, | |
"eval_r2": 0.4006690979003906, | |
"eval_rmse": 0.24382774531841278, | |
"eval_runtime": 93.1613, | |
"eval_samples_per_second": 41.262, | |
"eval_steps_per_second": 2.587, | |
"learning_rate": 0.0001, | |
"step": 28119 | |
}, | |
{ | |
"epoch": 39.528432732316226, | |
"grad_norm": 0.3047119081020355, | |
"learning_rate": 0.0001, | |
"loss": 0.3354, | |
"step": 28500 | |
}, | |
{ | |
"epoch": 40.0, | |
"eval_explained_variance": 0.40179282426834106, | |
"eval_loss": 0.33278176188468933, | |
"eval_mae": 0.16507098078727722, | |
"eval_r2": 0.4002922475337982, | |
"eval_rmse": 0.2442423701286316, | |
"eval_runtime": 98.0451, | |
"eval_samples_per_second": 39.206, | |
"eval_steps_per_second": 2.458, | |
"learning_rate": 0.0001, | |
"step": 28840 | |
}, | |
{ | |
"epoch": 40.221914008321775, | |
"grad_norm": 0.2754528522491455, | |
"learning_rate": 0.0001, | |
"loss": 0.3367, | |
"step": 29000 | |
}, | |
{ | |
"epoch": 40.915395284327325, | |
"grad_norm": 0.5425918102264404, | |
"learning_rate": 0.0001, | |
"loss": 0.3363, | |
"step": 29500 | |
}, | |
{ | |
"epoch": 41.0, | |
"eval_explained_variance": 0.4044828712940216, | |
"eval_loss": 0.33069443702697754, | |
"eval_mae": 0.16271242499351501, | |
"eval_r2": 0.4031254053115845, | |
"eval_rmse": 0.24350450932979584, | |
"eval_runtime": 93.471, | |
"eval_samples_per_second": 41.125, | |
"eval_steps_per_second": 2.578, | |
"learning_rate": 0.0001, | |
"step": 29561 | |
}, | |
{ | |
"epoch": 41.608876560332874, | |
"grad_norm": 0.4664023518562317, | |
"learning_rate": 1e-05, | |
"loss": 0.335, | |
"step": 30000 | |
}, | |
{ | |
"epoch": 42.0, | |
"eval_explained_variance": 0.4039740562438965, | |
"eval_loss": 0.3310275375843048, | |
"eval_mae": 0.1640516221523285, | |
"eval_r2": 0.40303775668144226, | |
"eval_rmse": 0.24363353848457336, | |
"eval_runtime": 91.0645, | |
"eval_samples_per_second": 42.212, | |
"eval_steps_per_second": 2.646, | |
"learning_rate": 1e-05, | |
"step": 30282 | |
}, | |
{ | |
"epoch": 42.302357836338416, | |
"grad_norm": 0.30786848068237305, | |
"learning_rate": 1e-05, | |
"loss": 0.3348, | |
"step": 30500 | |
}, | |
{ | |
"epoch": 42.995839112343965, | |
"grad_norm": 0.3475857079029083, | |
"learning_rate": 1e-05, | |
"loss": 0.334, | |
"step": 31000 | |
}, | |
{ | |
"epoch": 43.0, | |
"eval_explained_variance": 0.4058697521686554, | |
"eval_loss": 0.32956016063690186, | |
"eval_mae": 0.16028979420661926, | |
"eval_r2": 0.40524527430534363, | |
"eval_rmse": 0.2429088056087494, | |
"eval_runtime": 91.4351, | |
"eval_samples_per_second": 42.041, | |
"eval_steps_per_second": 2.636, | |
"learning_rate": 1e-05, | |
"step": 31003 | |
}, | |
{ | |
"epoch": 43.689320388349515, | |
"grad_norm": 0.38462796807289124, | |
"learning_rate": 1e-05, | |
"loss": 0.3366, | |
"step": 31500 | |
}, | |
{ | |
"epoch": 44.0, | |
"eval_explained_variance": 0.40545278787612915, | |
"eval_loss": 0.33022987842559814, | |
"eval_mae": 0.16247908771038055, | |
"eval_r2": 0.4038069546222687, | |
"eval_rmse": 0.2432354837656021, | |
"eval_runtime": 95.2656, | |
"eval_samples_per_second": 40.35, | |
"eval_steps_per_second": 2.53, | |
"learning_rate": 1e-05, | |
"step": 31724 | |
}, | |
{ | |
"epoch": 44.382801664355064, | |
"grad_norm": 0.3637019395828247, | |
"learning_rate": 1e-05, | |
"loss": 0.3326, | |
"step": 32000 | |
}, | |
{ | |
"epoch": 45.0, | |
"eval_explained_variance": 0.405474990606308, | |
"eval_loss": 0.3266430199146271, | |
"eval_mae": 0.16172955930233002, | |
"eval_r2": 0.4047379195690155, | |
"eval_rmse": 0.24298621714115143, | |
"eval_runtime": 92.749, | |
"eval_samples_per_second": 41.445, | |
"eval_steps_per_second": 2.598, | |
"learning_rate": 1e-05, | |
"step": 32445 | |
}, | |
{ | |
"epoch": 45.0, | |
"learning_rate": 1e-05, | |
"step": 32445, | |
"total_flos": 7.674132153670687e+19, | |
"train_loss": 0.3477805222434721, | |
"train_runtime": 18758.0673, | |
"train_samples_per_second": 92.136, | |
"train_steps_per_second": 5.766 | |
} | |
], | |
"logging_steps": 500, | |
"max_steps": 108150, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 150, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"EarlyStoppingCallback": { | |
"args": { | |
"early_stopping_patience": 10, | |
"early_stopping_threshold": 0.0 | |
}, | |
"attributes": { | |
"early_stopping_patience_counter": 0 | |
} | |
}, | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": true, | |
"should_training_stop": true | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 7.674132153670687e+19, | |
"train_batch_size": 16, | |
"trial_name": null, | |
"trial_params": null | |
} | |