lombardata's picture
Evaluation on the test set completed on 2024_11_06.
c377a3c verified
{
"best_metric": 0.32638314366340637,
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-produttoria-probabilities-large-2024_11_06-batch-size16_freeze_probs/checkpoint-25235",
"epoch": 45.0,
"eval_steps": 500,
"global_step": 32445,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.6934812760055479,
"grad_norm": 0.6020499467849731,
"learning_rate": 0.001,
"loss": 0.4549,
"step": 500
},
{
"epoch": 1.0,
"eval_explained_variance": 0.28087472915649414,
"eval_loss": 0.36246591806411743,
"eval_mae": 0.18796318769454956,
"eval_r2": 0.2743779420852661,
"eval_rmse": 0.26685553789138794,
"eval_runtime": 84.6117,
"eval_samples_per_second": 45.431,
"eval_steps_per_second": 2.848,
"learning_rate": 0.001,
"step": 721
},
{
"epoch": 1.3869625520110958,
"grad_norm": 0.48873767256736755,
"learning_rate": 0.001,
"loss": 0.3806,
"step": 1000
},
{
"epoch": 2.0,
"eval_explained_variance": 0.339480996131897,
"eval_loss": 0.3457428216934204,
"eval_mae": 0.16845344007015228,
"eval_r2": 0.3366556167602539,
"eval_rmse": 0.25599098205566406,
"eval_runtime": 81.9342,
"eval_samples_per_second": 46.916,
"eval_steps_per_second": 2.941,
"learning_rate": 0.001,
"step": 1442
},
{
"epoch": 2.0804438280166435,
"grad_norm": 0.4343818128108978,
"learning_rate": 0.001,
"loss": 0.3697,
"step": 1500
},
{
"epoch": 2.7739251040221915,
"grad_norm": 0.44139501452445984,
"learning_rate": 0.001,
"loss": 0.368,
"step": 2000
},
{
"epoch": 3.0,
"eval_explained_variance": 0.3180062472820282,
"eval_loss": 0.3518487811088562,
"eval_mae": 0.17466770112514496,
"eval_r2": 0.3157402276992798,
"eval_rmse": 0.25968268513679504,
"eval_runtime": 85.9096,
"eval_samples_per_second": 44.745,
"eval_steps_per_second": 2.805,
"learning_rate": 0.001,
"step": 2163
},
{
"epoch": 3.4674063800277395,
"grad_norm": 0.4190344512462616,
"learning_rate": 0.001,
"loss": 0.3637,
"step": 2500
},
{
"epoch": 4.0,
"eval_explained_variance": 0.3375174403190613,
"eval_loss": 0.3507988750934601,
"eval_mae": 0.17512458562850952,
"eval_r2": 0.3344818949699402,
"eval_rmse": 0.2562903165817261,
"eval_runtime": 82.3152,
"eval_samples_per_second": 46.699,
"eval_steps_per_second": 2.928,
"learning_rate": 0.001,
"step": 2884
},
{
"epoch": 4.160887656033287,
"grad_norm": 0.46223729848861694,
"learning_rate": 0.001,
"loss": 0.3604,
"step": 3000
},
{
"epoch": 4.854368932038835,
"grad_norm": 0.397588312625885,
"learning_rate": 0.001,
"loss": 0.36,
"step": 3500
},
{
"epoch": 5.0,
"eval_explained_variance": 0.3404175043106079,
"eval_loss": 0.3436409533023834,
"eval_mae": 0.16958864033222198,
"eval_r2": 0.33709797263145447,
"eval_rmse": 0.25463980436325073,
"eval_runtime": 87.0977,
"eval_samples_per_second": 44.134,
"eval_steps_per_second": 2.767,
"learning_rate": 0.001,
"step": 3605
},
{
"epoch": 5.547850208044383,
"grad_norm": 0.3559507429599762,
"learning_rate": 0.001,
"loss": 0.3585,
"step": 4000
},
{
"epoch": 6.0,
"eval_explained_variance": 0.321065753698349,
"eval_loss": 0.35096481442451477,
"eval_mae": 0.17673969268798828,
"eval_r2": 0.31747376918792725,
"eval_rmse": 0.25984567403793335,
"eval_runtime": 88.6251,
"eval_samples_per_second": 43.374,
"eval_steps_per_second": 2.719,
"learning_rate": 0.001,
"step": 4326
},
{
"epoch": 6.2413314840499305,
"grad_norm": 0.3629322946071625,
"learning_rate": 0.001,
"loss": 0.3617,
"step": 4500
},
{
"epoch": 6.934812760055479,
"grad_norm": 0.26215413212776184,
"learning_rate": 0.001,
"loss": 0.3581,
"step": 5000
},
{
"epoch": 7.0,
"eval_explained_variance": 0.35101062059402466,
"eval_loss": 0.3412320613861084,
"eval_mae": 0.1749519258737564,
"eval_r2": 0.3471425771713257,
"eval_rmse": 0.2537590265274048,
"eval_runtime": 86.9964,
"eval_samples_per_second": 44.186,
"eval_steps_per_second": 2.77,
"learning_rate": 0.001,
"step": 5047
},
{
"epoch": 7.6282940360610265,
"grad_norm": 0.23537978529930115,
"learning_rate": 0.001,
"loss": 0.3601,
"step": 5500
},
{
"epoch": 8.0,
"eval_explained_variance": 0.3551764190196991,
"eval_loss": 0.3456409275531769,
"eval_mae": 0.167846217751503,
"eval_r2": 0.34348151087760925,
"eval_rmse": 0.25611478090286255,
"eval_runtime": 87.0061,
"eval_samples_per_second": 44.181,
"eval_steps_per_second": 2.77,
"learning_rate": 0.001,
"step": 5768
},
{
"epoch": 8.321775312066574,
"grad_norm": 0.24255254864692688,
"learning_rate": 0.001,
"loss": 0.3619,
"step": 6000
},
{
"epoch": 9.0,
"eval_explained_variance": 0.3427416980266571,
"eval_loss": 0.3425351679325104,
"eval_mae": 0.17405511438846588,
"eval_r2": 0.3409159481525421,
"eval_rmse": 0.25445955991744995,
"eval_runtime": 87.2735,
"eval_samples_per_second": 44.045,
"eval_steps_per_second": 2.761,
"learning_rate": 0.001,
"step": 6489
},
{
"epoch": 9.015256588072122,
"grad_norm": 0.2687969207763672,
"learning_rate": 0.001,
"loss": 0.3527,
"step": 6500
},
{
"epoch": 9.70873786407767,
"grad_norm": 0.22893770039081573,
"learning_rate": 0.001,
"loss": 0.355,
"step": 7000
},
{
"epoch": 10.0,
"eval_explained_variance": 0.36017459630966187,
"eval_loss": 0.33964109420776367,
"eval_mae": 0.1710653305053711,
"eval_r2": 0.3582787811756134,
"eval_rmse": 0.25249695777893066,
"eval_runtime": 92.8944,
"eval_samples_per_second": 41.38,
"eval_steps_per_second": 2.594,
"learning_rate": 0.001,
"step": 7210
},
{
"epoch": 10.402219140083218,
"grad_norm": 0.2397356480360031,
"learning_rate": 0.001,
"loss": 0.3574,
"step": 7500
},
{
"epoch": 11.0,
"eval_explained_variance": 0.35241732001304626,
"eval_loss": 0.34479108452796936,
"eval_mae": 0.17209044098854065,
"eval_r2": 0.3498174250125885,
"eval_rmse": 0.25417372584342957,
"eval_runtime": 93.2158,
"eval_samples_per_second": 41.238,
"eval_steps_per_second": 2.585,
"learning_rate": 0.001,
"step": 7931
},
{
"epoch": 11.095700416088766,
"grad_norm": 0.23666760325431824,
"learning_rate": 0.001,
"loss": 0.3548,
"step": 8000
},
{
"epoch": 11.789181692094314,
"grad_norm": 0.17148445546627045,
"learning_rate": 0.001,
"loss": 0.3549,
"step": 8500
},
{
"epoch": 12.0,
"eval_explained_variance": 0.3604218363761902,
"eval_loss": 0.3415849804878235,
"eval_mae": 0.17670249938964844,
"eval_r2": 0.35767847299575806,
"eval_rmse": 0.2527333199977875,
"eval_runtime": 92.5773,
"eval_samples_per_second": 41.522,
"eval_steps_per_second": 2.603,
"learning_rate": 0.001,
"step": 8652
},
{
"epoch": 12.482662968099861,
"grad_norm": 0.2034599930047989,
"learning_rate": 0.001,
"loss": 0.354,
"step": 9000
},
{
"epoch": 13.0,
"eval_explained_variance": 0.35451599955558777,
"eval_loss": 0.33990854024887085,
"eval_mae": 0.16771164536476135,
"eval_r2": 0.3523372411727905,
"eval_rmse": 0.25265443325042725,
"eval_runtime": 91.5276,
"eval_samples_per_second": 41.998,
"eval_steps_per_second": 2.633,
"learning_rate": 0.001,
"step": 9373
},
{
"epoch": 13.176144244105409,
"grad_norm": 0.21185149252414703,
"learning_rate": 0.001,
"loss": 0.3555,
"step": 9500
},
{
"epoch": 13.869625520110956,
"grad_norm": 0.24503760039806366,
"learning_rate": 0.001,
"loss": 0.3566,
"step": 10000
},
{
"epoch": 14.0,
"eval_explained_variance": 0.34794220328330994,
"eval_loss": 0.34520208835601807,
"eval_mae": 0.1745852380990982,
"eval_r2": 0.34426644444465637,
"eval_rmse": 0.25396791100502014,
"eval_runtime": 93.4248,
"eval_samples_per_second": 41.145,
"eval_steps_per_second": 2.58,
"learning_rate": 0.001,
"step": 10094
},
{
"epoch": 14.563106796116505,
"grad_norm": 0.29965028166770935,
"learning_rate": 0.001,
"loss": 0.3553,
"step": 10500
},
{
"epoch": 15.0,
"eval_explained_variance": 0.3463059067726135,
"eval_loss": 0.34849879145622253,
"eval_mae": 0.18007972836494446,
"eval_r2": 0.3333212435245514,
"eval_rmse": 0.2568492293357849,
"eval_runtime": 100.4245,
"eval_samples_per_second": 38.278,
"eval_steps_per_second": 2.4,
"learning_rate": 0.001,
"step": 10815
},
{
"epoch": 15.256588072122053,
"grad_norm": 0.26795288920402527,
"learning_rate": 0.001,
"loss": 0.3571,
"step": 11000
},
{
"epoch": 15.9500693481276,
"grad_norm": 0.19320347905158997,
"learning_rate": 0.001,
"loss": 0.3536,
"step": 11500
},
{
"epoch": 16.0,
"eval_explained_variance": 0.34989917278289795,
"eval_loss": 0.34347954392433167,
"eval_mae": 0.17179666459560394,
"eval_r2": 0.34726396203041077,
"eval_rmse": 0.2537030875682831,
"eval_runtime": 103.999,
"eval_samples_per_second": 36.962,
"eval_steps_per_second": 2.317,
"learning_rate": 0.001,
"step": 11536
},
{
"epoch": 16.643550624133148,
"grad_norm": 0.19884039461612701,
"learning_rate": 0.0001,
"loss": 0.3518,
"step": 12000
},
{
"epoch": 17.0,
"eval_explained_variance": 0.366793692111969,
"eval_loss": 0.341246634721756,
"eval_mae": 0.17114569246768951,
"eval_r2": 0.36331599950790405,
"eval_rmse": 0.25078731775283813,
"eval_runtime": 92.1763,
"eval_samples_per_second": 41.703,
"eval_steps_per_second": 2.615,
"learning_rate": 0.0001,
"step": 12257
},
{
"epoch": 17.337031900138697,
"grad_norm": 0.19435305893421173,
"learning_rate": 0.0001,
"loss": 0.3475,
"step": 12500
},
{
"epoch": 18.0,
"eval_explained_variance": 0.36558443307876587,
"eval_loss": 0.3398562967777252,
"eval_mae": 0.17082427442073822,
"eval_r2": 0.36493533849716187,
"eval_rmse": 0.25065672397613525,
"eval_runtime": 98.7078,
"eval_samples_per_second": 38.943,
"eval_steps_per_second": 2.442,
"learning_rate": 0.0001,
"step": 12978
},
{
"epoch": 18.030513176144243,
"grad_norm": 0.2573840022087097,
"learning_rate": 0.0001,
"loss": 0.3449,
"step": 13000
},
{
"epoch": 18.723994452149793,
"grad_norm": 0.21889527142047882,
"learning_rate": 0.0001,
"loss": 0.347,
"step": 13500
},
{
"epoch": 19.0,
"eval_explained_variance": 0.3787304162979126,
"eval_loss": 0.3332718312740326,
"eval_mae": 0.16749390959739685,
"eval_r2": 0.3774765133857727,
"eval_rmse": 0.24829652905464172,
"eval_runtime": 108.1714,
"eval_samples_per_second": 35.536,
"eval_steps_per_second": 2.228,
"learning_rate": 0.0001,
"step": 13699
},
{
"epoch": 19.41747572815534,
"grad_norm": 0.3126258850097656,
"learning_rate": 0.0001,
"loss": 0.3445,
"step": 14000
},
{
"epoch": 20.0,
"eval_explained_variance": 0.3822024166584015,
"eval_loss": 0.333162784576416,
"eval_mae": 0.16876617074012756,
"eval_r2": 0.3809906542301178,
"eval_rmse": 0.2478322684764862,
"eval_runtime": 93.5874,
"eval_samples_per_second": 41.074,
"eval_steps_per_second": 2.575,
"learning_rate": 0.0001,
"step": 14420
},
{
"epoch": 20.110957004160888,
"grad_norm": 0.25440576672554016,
"learning_rate": 0.0001,
"loss": 0.3474,
"step": 14500
},
{
"epoch": 20.804438280166437,
"grad_norm": 0.23914161324501038,
"learning_rate": 0.0001,
"loss": 0.3447,
"step": 15000
},
{
"epoch": 21.0,
"eval_explained_variance": 0.38327154517173767,
"eval_loss": 0.3324449062347412,
"eval_mae": 0.16733573377132416,
"eval_r2": 0.38100260496139526,
"eval_rmse": 0.24757729470729828,
"eval_runtime": 97.5169,
"eval_samples_per_second": 39.419,
"eval_steps_per_second": 2.471,
"learning_rate": 0.0001,
"step": 15141
},
{
"epoch": 21.497919556171983,
"grad_norm": 0.23553606867790222,
"learning_rate": 0.0001,
"loss": 0.3445,
"step": 15500
},
{
"epoch": 22.0,
"eval_explained_variance": 0.38486921787261963,
"eval_loss": 0.3320053517818451,
"eval_mae": 0.16713765263557434,
"eval_r2": 0.38355034589767456,
"eval_rmse": 0.2471724897623062,
"eval_runtime": 95.4307,
"eval_samples_per_second": 40.281,
"eval_steps_per_second": 2.525,
"learning_rate": 0.0001,
"step": 15862
},
{
"epoch": 22.191400832177532,
"grad_norm": 0.23152843117713928,
"learning_rate": 0.0001,
"loss": 0.3441,
"step": 16000
},
{
"epoch": 22.884882108183078,
"grad_norm": 0.23495590686798096,
"learning_rate": 0.0001,
"loss": 0.3398,
"step": 16500
},
{
"epoch": 23.0,
"eval_explained_variance": 0.39000746607780457,
"eval_loss": 0.3301050662994385,
"eval_mae": 0.16577981412410736,
"eval_r2": 0.3889786899089813,
"eval_rmse": 0.24611681699752808,
"eval_runtime": 100.934,
"eval_samples_per_second": 38.084,
"eval_steps_per_second": 2.388,
"learning_rate": 0.0001,
"step": 16583
},
{
"epoch": 23.578363384188627,
"grad_norm": 0.24516963958740234,
"learning_rate": 0.0001,
"loss": 0.3417,
"step": 17000
},
{
"epoch": 24.0,
"eval_explained_variance": 0.3905264139175415,
"eval_loss": 0.3298528492450714,
"eval_mae": 0.16478358209133148,
"eval_r2": 0.3899492621421814,
"eval_rmse": 0.24583497643470764,
"eval_runtime": 96.7182,
"eval_samples_per_second": 39.744,
"eval_steps_per_second": 2.492,
"learning_rate": 0.0001,
"step": 17304
},
{
"epoch": 24.271844660194176,
"grad_norm": 0.2366987019777298,
"learning_rate": 0.0001,
"loss": 0.3394,
"step": 17500
},
{
"epoch": 24.965325936199722,
"grad_norm": 0.22849540412425995,
"learning_rate": 0.0001,
"loss": 0.3406,
"step": 18000
},
{
"epoch": 25.0,
"eval_explained_variance": 0.39103788137435913,
"eval_loss": 0.32962867617607117,
"eval_mae": 0.1640922725200653,
"eval_r2": 0.3903038799762726,
"eval_rmse": 0.2458016723394394,
"eval_runtime": 94.6291,
"eval_samples_per_second": 40.622,
"eval_steps_per_second": 2.547,
"learning_rate": 0.0001,
"step": 18025
},
{
"epoch": 25.65880721220527,
"grad_norm": 0.24230694770812988,
"learning_rate": 0.0001,
"loss": 0.3381,
"step": 18500
},
{
"epoch": 26.0,
"eval_explained_variance": 0.39299964904785156,
"eval_loss": 0.32889437675476074,
"eval_mae": 0.1631881594657898,
"eval_r2": 0.3926166296005249,
"eval_rmse": 0.24544604122638702,
"eval_runtime": 93.5737,
"eval_samples_per_second": 41.08,
"eval_steps_per_second": 2.576,
"learning_rate": 0.0001,
"step": 18746
},
{
"epoch": 26.352288488210817,
"grad_norm": 0.2334737330675125,
"learning_rate": 0.0001,
"loss": 0.3399,
"step": 19000
},
{
"epoch": 27.0,
"eval_explained_variance": 0.39082765579223633,
"eval_loss": 0.33042922616004944,
"eval_mae": 0.1674499809741974,
"eval_r2": 0.38912835717201233,
"eval_rmse": 0.2460869997739792,
"eval_runtime": 90.4822,
"eval_samples_per_second": 42.483,
"eval_steps_per_second": 2.664,
"learning_rate": 0.0001,
"step": 19467
},
{
"epoch": 27.045769764216367,
"grad_norm": 0.23623766005039215,
"learning_rate": 0.0001,
"loss": 0.339,
"step": 19500
},
{
"epoch": 27.739251040221912,
"grad_norm": 0.237477108836174,
"learning_rate": 0.0001,
"loss": 0.3377,
"step": 20000
},
{
"epoch": 28.0,
"eval_explained_variance": 0.3972352147102356,
"eval_loss": 0.32880541682243347,
"eval_mae": 0.16454365849494934,
"eval_r2": 0.3955116868019104,
"eval_rmse": 0.24511639773845673,
"eval_runtime": 89.1819,
"eval_samples_per_second": 43.103,
"eval_steps_per_second": 2.702,
"learning_rate": 0.0001,
"step": 20188
},
{
"epoch": 28.43273231622746,
"grad_norm": 0.23042118549346924,
"learning_rate": 0.0001,
"loss": 0.3384,
"step": 20500
},
{
"epoch": 29.0,
"eval_explained_variance": 0.39730560779571533,
"eval_loss": 0.3293789327144623,
"eval_mae": 0.16559576988220215,
"eval_r2": 0.3961379826068878,
"eval_rmse": 0.24510112404823303,
"eval_runtime": 88.0385,
"eval_samples_per_second": 43.663,
"eval_steps_per_second": 2.737,
"learning_rate": 0.0001,
"step": 20909
},
{
"epoch": 29.12621359223301,
"grad_norm": 0.4347997307777405,
"learning_rate": 0.0001,
"loss": 0.3396,
"step": 21000
},
{
"epoch": 29.819694868238557,
"grad_norm": 0.38524329662323,
"learning_rate": 0.0001,
"loss": 0.3372,
"step": 21500
},
{
"epoch": 30.0,
"eval_explained_variance": 0.3955426812171936,
"eval_loss": 0.33135533332824707,
"eval_mae": 0.16844958066940308,
"eval_r2": 0.3913615643978119,
"eval_rmse": 0.24635259807109833,
"eval_runtime": 91.08,
"eval_samples_per_second": 42.205,
"eval_steps_per_second": 2.646,
"learning_rate": 0.0001,
"step": 21630
},
{
"epoch": 30.513176144244106,
"grad_norm": 0.3073582947254181,
"learning_rate": 0.0001,
"loss": 0.3375,
"step": 22000
},
{
"epoch": 31.0,
"eval_explained_variance": 0.3935950696468353,
"eval_loss": 0.32911789417266846,
"eval_mae": 0.16081956028938293,
"eval_r2": 0.3904249966144562,
"eval_rmse": 0.24574027955532074,
"eval_runtime": 90.421,
"eval_samples_per_second": 42.512,
"eval_steps_per_second": 2.665,
"learning_rate": 0.0001,
"step": 22351
},
{
"epoch": 31.206657420249652,
"grad_norm": 0.31049737334251404,
"learning_rate": 0.0001,
"loss": 0.339,
"step": 22500
},
{
"epoch": 31.9001386962552,
"grad_norm": 0.40785181522369385,
"learning_rate": 0.0001,
"loss": 0.3373,
"step": 23000
},
{
"epoch": 32.0,
"eval_explained_variance": 0.3971378207206726,
"eval_loss": 0.3289436399936676,
"eval_mae": 0.16307200491428375,
"eval_r2": 0.3959096372127533,
"eval_rmse": 0.24528969824314117,
"eval_runtime": 92.2146,
"eval_samples_per_second": 41.685,
"eval_steps_per_second": 2.613,
"learning_rate": 0.0001,
"step": 23072
},
{
"epoch": 32.59361997226075,
"grad_norm": 0.3135410249233246,
"learning_rate": 0.0001,
"loss": 0.3362,
"step": 23500
},
{
"epoch": 33.0,
"eval_explained_variance": 0.3988523483276367,
"eval_loss": 0.3271527588367462,
"eval_mae": 0.16276519000530243,
"eval_r2": 0.3971821069717407,
"eval_rmse": 0.24443718791007996,
"eval_runtime": 89.5278,
"eval_samples_per_second": 42.936,
"eval_steps_per_second": 2.692,
"learning_rate": 0.0001,
"step": 23793
},
{
"epoch": 33.287101248266296,
"grad_norm": 0.22212661802768707,
"learning_rate": 0.0001,
"loss": 0.337,
"step": 24000
},
{
"epoch": 33.980582524271846,
"grad_norm": 0.25206565856933594,
"learning_rate": 0.0001,
"loss": 0.3371,
"step": 24500
},
{
"epoch": 34.0,
"eval_explained_variance": 0.39812082052230835,
"eval_loss": 0.32699429988861084,
"eval_mae": 0.16210177540779114,
"eval_r2": 0.39756229519844055,
"eval_rmse": 0.24429556727409363,
"eval_runtime": 90.0899,
"eval_samples_per_second": 42.668,
"eval_steps_per_second": 2.675,
"learning_rate": 0.0001,
"step": 24514
},
{
"epoch": 34.674063800277395,
"grad_norm": 0.27033254504203796,
"learning_rate": 0.0001,
"loss": 0.3342,
"step": 25000
},
{
"epoch": 35.0,
"eval_explained_variance": 0.3996908366680145,
"eval_loss": 0.32638314366340637,
"eval_mae": 0.1614546775817871,
"eval_r2": 0.39874374866485596,
"eval_rmse": 0.24386192858219147,
"eval_runtime": 88.9919,
"eval_samples_per_second": 43.195,
"eval_steps_per_second": 2.708,
"learning_rate": 0.0001,
"step": 25235
},
{
"epoch": 35.36754507628294,
"grad_norm": 0.3120824098587036,
"learning_rate": 0.0001,
"loss": 0.3367,
"step": 25500
},
{
"epoch": 36.0,
"eval_explained_variance": 0.3955422639846802,
"eval_loss": 0.3293066918849945,
"eval_mae": 0.16555820405483246,
"eval_r2": 0.39463794231414795,
"eval_rmse": 0.24545253813266754,
"eval_runtime": 91.9694,
"eval_samples_per_second": 41.796,
"eval_steps_per_second": 2.62,
"learning_rate": 0.0001,
"step": 25956
},
{
"epoch": 36.061026352288486,
"grad_norm": 0.3190695345401764,
"learning_rate": 0.0001,
"loss": 0.3386,
"step": 26000
},
{
"epoch": 36.754507628294036,
"grad_norm": 0.2679268717765808,
"learning_rate": 0.0001,
"loss": 0.3363,
"step": 26500
},
{
"epoch": 37.0,
"eval_explained_variance": 0.4032209515571594,
"eval_loss": 0.3271186649799347,
"eval_mae": 0.1597272753715515,
"eval_r2": 0.39962705969810486,
"eval_rmse": 0.24424488842487335,
"eval_runtime": 88.1563,
"eval_samples_per_second": 43.604,
"eval_steps_per_second": 2.734,
"learning_rate": 0.0001,
"step": 26677
},
{
"epoch": 37.447988904299585,
"grad_norm": 0.2553563714027405,
"learning_rate": 0.0001,
"loss": 0.3357,
"step": 27000
},
{
"epoch": 38.0,
"eval_explained_variance": 0.4041087031364441,
"eval_loss": 0.32695677876472473,
"eval_mae": 0.16126109659671783,
"eval_r2": 0.402229368686676,
"eval_rmse": 0.24366919696331024,
"eval_runtime": 91.2875,
"eval_samples_per_second": 42.109,
"eval_steps_per_second": 2.64,
"learning_rate": 0.0001,
"step": 27398
},
{
"epoch": 38.141470180305134,
"grad_norm": 0.3718196451663971,
"learning_rate": 0.0001,
"loss": 0.3348,
"step": 27500
},
{
"epoch": 38.83495145631068,
"grad_norm": 0.30295658111572266,
"learning_rate": 0.0001,
"loss": 0.3377,
"step": 28000
},
{
"epoch": 39.0,
"eval_explained_variance": 0.4026513695716858,
"eval_loss": 0.33263665437698364,
"eval_mae": 0.1575259119272232,
"eval_r2": 0.4006690979003906,
"eval_rmse": 0.24382774531841278,
"eval_runtime": 93.1613,
"eval_samples_per_second": 41.262,
"eval_steps_per_second": 2.587,
"learning_rate": 0.0001,
"step": 28119
},
{
"epoch": 39.528432732316226,
"grad_norm": 0.3047119081020355,
"learning_rate": 0.0001,
"loss": 0.3354,
"step": 28500
},
{
"epoch": 40.0,
"eval_explained_variance": 0.40179282426834106,
"eval_loss": 0.33278176188468933,
"eval_mae": 0.16507098078727722,
"eval_r2": 0.4002922475337982,
"eval_rmse": 0.2442423701286316,
"eval_runtime": 98.0451,
"eval_samples_per_second": 39.206,
"eval_steps_per_second": 2.458,
"learning_rate": 0.0001,
"step": 28840
},
{
"epoch": 40.221914008321775,
"grad_norm": 0.2754528522491455,
"learning_rate": 0.0001,
"loss": 0.3367,
"step": 29000
},
{
"epoch": 40.915395284327325,
"grad_norm": 0.5425918102264404,
"learning_rate": 0.0001,
"loss": 0.3363,
"step": 29500
},
{
"epoch": 41.0,
"eval_explained_variance": 0.4044828712940216,
"eval_loss": 0.33069443702697754,
"eval_mae": 0.16271242499351501,
"eval_r2": 0.4031254053115845,
"eval_rmse": 0.24350450932979584,
"eval_runtime": 93.471,
"eval_samples_per_second": 41.125,
"eval_steps_per_second": 2.578,
"learning_rate": 0.0001,
"step": 29561
},
{
"epoch": 41.608876560332874,
"grad_norm": 0.4664023518562317,
"learning_rate": 1e-05,
"loss": 0.335,
"step": 30000
},
{
"epoch": 42.0,
"eval_explained_variance": 0.4039740562438965,
"eval_loss": 0.3310275375843048,
"eval_mae": 0.1640516221523285,
"eval_r2": 0.40303775668144226,
"eval_rmse": 0.24363353848457336,
"eval_runtime": 91.0645,
"eval_samples_per_second": 42.212,
"eval_steps_per_second": 2.646,
"learning_rate": 1e-05,
"step": 30282
},
{
"epoch": 42.302357836338416,
"grad_norm": 0.30786848068237305,
"learning_rate": 1e-05,
"loss": 0.3348,
"step": 30500
},
{
"epoch": 42.995839112343965,
"grad_norm": 0.3475857079029083,
"learning_rate": 1e-05,
"loss": 0.334,
"step": 31000
},
{
"epoch": 43.0,
"eval_explained_variance": 0.4058697521686554,
"eval_loss": 0.32956016063690186,
"eval_mae": 0.16028979420661926,
"eval_r2": 0.40524527430534363,
"eval_rmse": 0.2429088056087494,
"eval_runtime": 91.4351,
"eval_samples_per_second": 42.041,
"eval_steps_per_second": 2.636,
"learning_rate": 1e-05,
"step": 31003
},
{
"epoch": 43.689320388349515,
"grad_norm": 0.38462796807289124,
"learning_rate": 1e-05,
"loss": 0.3366,
"step": 31500
},
{
"epoch": 44.0,
"eval_explained_variance": 0.40545278787612915,
"eval_loss": 0.33022987842559814,
"eval_mae": 0.16247908771038055,
"eval_r2": 0.4038069546222687,
"eval_rmse": 0.2432354837656021,
"eval_runtime": 95.2656,
"eval_samples_per_second": 40.35,
"eval_steps_per_second": 2.53,
"learning_rate": 1e-05,
"step": 31724
},
{
"epoch": 44.382801664355064,
"grad_norm": 0.3637019395828247,
"learning_rate": 1e-05,
"loss": 0.3326,
"step": 32000
},
{
"epoch": 45.0,
"eval_explained_variance": 0.405474990606308,
"eval_loss": 0.3266430199146271,
"eval_mae": 0.16172955930233002,
"eval_r2": 0.4047379195690155,
"eval_rmse": 0.24298621714115143,
"eval_runtime": 92.749,
"eval_samples_per_second": 41.445,
"eval_steps_per_second": 2.598,
"learning_rate": 1e-05,
"step": 32445
},
{
"epoch": 45.0,
"learning_rate": 1e-05,
"step": 32445,
"total_flos": 7.674132153670687e+19,
"train_loss": 0.3477805222434721,
"train_runtime": 18758.0673,
"train_samples_per_second": 92.136,
"train_steps_per_second": 5.766
}
],
"logging_steps": 500,
"max_steps": 108150,
"num_input_tokens_seen": 0,
"num_train_epochs": 150,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 7.674132153670687e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}