lombardata's picture
Evaluation on the test set completed on 2024_11_05.
2ae096e verified
{
"best_metric": 0.31807705760002136,
"best_model_checkpoint": "/home/datawork-iot-nos/Seatizen/models/multilabel/drone/drone-DinoVdeau-produttoria-probabilities-large-2024_11_04-batch-size64_freeze_probs/checkpoint-10679",
"epoch": 69.0,
"eval_steps": 500,
"global_step": 12489,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_explained_variance": 0.30368584394454956,
"eval_loss": 0.36101797223091125,
"eval_mae": 0.18781842291355133,
"eval_r2": 0.28179988265037537,
"eval_rmse": 0.2644973397254944,
"eval_runtime": 77.9895,
"eval_samples_per_second": 49.289,
"eval_steps_per_second": 0.782,
"learning_rate": 0.001,
"step": 181
},
{
"epoch": 2.0,
"eval_explained_variance": 0.3478943109512329,
"eval_loss": 0.3464529514312744,
"eval_mae": 0.17783641815185547,
"eval_r2": 0.33490002155303955,
"eval_rmse": 0.25657808780670166,
"eval_runtime": 82.5528,
"eval_samples_per_second": 46.564,
"eval_steps_per_second": 0.739,
"learning_rate": 0.001,
"step": 362
},
{
"epoch": 2.7624309392265194,
"grad_norm": 0.36520126461982727,
"learning_rate": 0.001,
"loss": 0.4149,
"step": 500
},
{
"epoch": 3.0,
"eval_explained_variance": 0.35995176434516907,
"eval_loss": 0.34133487939834595,
"eval_mae": 0.17307622730731964,
"eval_r2": 0.3535836637020111,
"eval_rmse": 0.25322917103767395,
"eval_runtime": 82.3103,
"eval_samples_per_second": 46.701,
"eval_steps_per_second": 0.741,
"learning_rate": 0.001,
"step": 543
},
{
"epoch": 4.0,
"eval_explained_variance": 0.35911357402801514,
"eval_loss": 0.3406243324279785,
"eval_mae": 0.17434848845005035,
"eval_r2": 0.35186487436294556,
"eval_rmse": 0.25316712260246277,
"eval_runtime": 82.3198,
"eval_samples_per_second": 46.696,
"eval_steps_per_second": 0.741,
"learning_rate": 0.001,
"step": 724
},
{
"epoch": 5.0,
"eval_explained_variance": 0.3730728328227997,
"eval_loss": 0.3341675400733948,
"eval_mae": 0.1660642772912979,
"eval_r2": 0.37022241950035095,
"eval_rmse": 0.24957779049873352,
"eval_runtime": 81.2913,
"eval_samples_per_second": 47.287,
"eval_steps_per_second": 0.75,
"learning_rate": 0.001,
"step": 905
},
{
"epoch": 5.524861878453039,
"grad_norm": 0.2786722779273987,
"learning_rate": 0.001,
"loss": 0.3486,
"step": 1000
},
{
"epoch": 6.0,
"eval_explained_variance": 0.37237703800201416,
"eval_loss": 0.33848145604133606,
"eval_mae": 0.17393800616264343,
"eval_r2": 0.36507394909858704,
"eval_rmse": 0.2512134909629822,
"eval_runtime": 81.4015,
"eval_samples_per_second": 47.223,
"eval_steps_per_second": 0.749,
"learning_rate": 0.001,
"step": 1086
},
{
"epoch": 7.0,
"eval_explained_variance": 0.38455089926719666,
"eval_loss": 0.3320676386356354,
"eval_mae": 0.16495750844478607,
"eval_r2": 0.38360098004341125,
"eval_rmse": 0.24762088060379028,
"eval_runtime": 80.8916,
"eval_samples_per_second": 47.52,
"eval_steps_per_second": 0.754,
"learning_rate": 0.001,
"step": 1267
},
{
"epoch": 8.0,
"eval_explained_variance": 0.38114219903945923,
"eval_loss": 0.3332081437110901,
"eval_mae": 0.1629333347082138,
"eval_r2": 0.38021257519721985,
"eval_rmse": 0.24838922917842865,
"eval_runtime": 81.8669,
"eval_samples_per_second": 46.954,
"eval_steps_per_second": 0.745,
"learning_rate": 0.001,
"step": 1448
},
{
"epoch": 8.287292817679559,
"grad_norm": 0.24299436807632446,
"learning_rate": 0.001,
"loss": 0.3462,
"step": 1500
},
{
"epoch": 9.0,
"eval_explained_variance": 0.3872137665748596,
"eval_loss": 0.3305376172065735,
"eval_mae": 0.16518667340278625,
"eval_r2": 0.38587409257888794,
"eval_rmse": 0.24679909646511078,
"eval_runtime": 80.7757,
"eval_samples_per_second": 47.589,
"eval_steps_per_second": 0.755,
"learning_rate": 0.001,
"step": 1629
},
{
"epoch": 10.0,
"eval_explained_variance": 0.3849840760231018,
"eval_loss": 0.33136793971061707,
"eval_mae": 0.16550396382808685,
"eval_r2": 0.38265591859817505,
"eval_rmse": 0.2476043999195099,
"eval_runtime": 81.3286,
"eval_samples_per_second": 47.265,
"eval_steps_per_second": 0.75,
"learning_rate": 0.001,
"step": 1810
},
{
"epoch": 11.0,
"eval_explained_variance": 0.3866196274757385,
"eval_loss": 0.3319685757160187,
"eval_mae": 0.16017739474773407,
"eval_r2": 0.3840087652206421,
"eval_rmse": 0.2473573535680771,
"eval_runtime": 80.4031,
"eval_samples_per_second": 47.809,
"eval_steps_per_second": 0.759,
"learning_rate": 0.001,
"step": 1991
},
{
"epoch": 11.049723756906078,
"grad_norm": 0.1871100217103958,
"learning_rate": 0.001,
"loss": 0.3391,
"step": 2000
},
{
"epoch": 12.0,
"eval_explained_variance": 0.3842613399028778,
"eval_loss": 0.3341500759124756,
"eval_mae": 0.16828522086143494,
"eval_r2": 0.37614381313323975,
"eval_rmse": 0.2493605613708496,
"eval_runtime": 85.712,
"eval_samples_per_second": 44.848,
"eval_steps_per_second": 0.712,
"learning_rate": 0.001,
"step": 2172
},
{
"epoch": 13.0,
"eval_explained_variance": 0.38362985849380493,
"eval_loss": 0.33248215913772583,
"eval_mae": 0.16487525403499603,
"eval_r2": 0.38205400109291077,
"eval_rmse": 0.2479550540447235,
"eval_runtime": 81.2132,
"eval_samples_per_second": 47.332,
"eval_steps_per_second": 0.751,
"learning_rate": 0.001,
"step": 2353
},
{
"epoch": 13.812154696132596,
"grad_norm": 0.19864079356193542,
"learning_rate": 0.001,
"loss": 0.3372,
"step": 2500
},
{
"epoch": 14.0,
"eval_explained_variance": 0.3930034041404724,
"eval_loss": 0.33228376507759094,
"eval_mae": 0.17001599073410034,
"eval_r2": 0.38782283663749695,
"eval_rmse": 0.24722439050674438,
"eval_runtime": 81.4831,
"eval_samples_per_second": 47.175,
"eval_steps_per_second": 0.749,
"learning_rate": 0.001,
"step": 2534
},
{
"epoch": 15.0,
"eval_explained_variance": 0.38280433416366577,
"eval_loss": 0.334873229265213,
"eval_mae": 0.1702941358089447,
"eval_r2": 0.37488964200019836,
"eval_rmse": 0.24934227764606476,
"eval_runtime": 82.0523,
"eval_samples_per_second": 46.848,
"eval_steps_per_second": 0.743,
"learning_rate": 0.001,
"step": 2715
},
{
"epoch": 16.0,
"eval_explained_variance": 0.4018595516681671,
"eval_loss": 0.3279329538345337,
"eval_mae": 0.16485117375850677,
"eval_r2": 0.3982715308666229,
"eval_rmse": 0.2447740137577057,
"eval_runtime": 80.4915,
"eval_samples_per_second": 47.757,
"eval_steps_per_second": 0.758,
"learning_rate": 0.0001,
"step": 2896
},
{
"epoch": 16.574585635359117,
"grad_norm": 0.18467135727405548,
"learning_rate": 0.0001,
"loss": 0.3343,
"step": 3000
},
{
"epoch": 17.0,
"eval_explained_variance": 0.4041018486022949,
"eval_loss": 0.3279244005680084,
"eval_mae": 0.1647535115480423,
"eval_r2": 0.39838364720344543,
"eval_rmse": 0.24482907354831696,
"eval_runtime": 80.8242,
"eval_samples_per_second": 47.56,
"eval_steps_per_second": 0.755,
"learning_rate": 0.0001,
"step": 3077
},
{
"epoch": 18.0,
"eval_explained_variance": 0.40324074029922485,
"eval_loss": 0.3262367248535156,
"eval_mae": 0.16224807500839233,
"eval_r2": 0.40254056453704834,
"eval_rmse": 0.24403510987758636,
"eval_runtime": 79.9167,
"eval_samples_per_second": 48.1,
"eval_steps_per_second": 0.763,
"learning_rate": 0.0001,
"step": 3258
},
{
"epoch": 19.0,
"eval_explained_variance": 0.4051153361797333,
"eval_loss": 0.3247373402118683,
"eval_mae": 0.15879866480827332,
"eval_r2": 0.4045550227165222,
"eval_rmse": 0.24321436882019043,
"eval_runtime": 78.6921,
"eval_samples_per_second": 48.849,
"eval_steps_per_second": 0.775,
"learning_rate": 0.0001,
"step": 3439
},
{
"epoch": 19.337016574585636,
"grad_norm": 0.19419366121292114,
"learning_rate": 0.0001,
"loss": 0.3271,
"step": 3500
},
{
"epoch": 20.0,
"eval_explained_variance": 0.41063615679740906,
"eval_loss": 0.32612329721450806,
"eval_mae": 0.16252389550209045,
"eval_r2": 0.4059467911720276,
"eval_rmse": 0.24334031343460083,
"eval_runtime": 79.0343,
"eval_samples_per_second": 48.637,
"eval_steps_per_second": 0.772,
"learning_rate": 0.0001,
"step": 3620
},
{
"epoch": 21.0,
"eval_explained_variance": 0.411923885345459,
"eval_loss": 0.3241129517555237,
"eval_mae": 0.16064995527267456,
"eval_r2": 0.4094885587692261,
"eval_rmse": 0.2424389272928238,
"eval_runtime": 79.7663,
"eval_samples_per_second": 48.191,
"eval_steps_per_second": 0.765,
"learning_rate": 0.0001,
"step": 3801
},
{
"epoch": 22.0,
"eval_explained_variance": 0.41316869854927063,
"eval_loss": 0.32355180382728577,
"eval_mae": 0.1587211936712265,
"eval_r2": 0.4111122488975525,
"eval_rmse": 0.24220238626003265,
"eval_runtime": 79.4498,
"eval_samples_per_second": 48.383,
"eval_steps_per_second": 0.768,
"learning_rate": 0.0001,
"step": 3982
},
{
"epoch": 22.099447513812155,
"grad_norm": 0.1995573192834854,
"learning_rate": 0.0001,
"loss": 0.3275,
"step": 4000
},
{
"epoch": 23.0,
"eval_explained_variance": 0.41342437267303467,
"eval_loss": 0.3242079019546509,
"eval_mae": 0.16008274257183075,
"eval_r2": 0.4106961488723755,
"eval_rmse": 0.2422674000263214,
"eval_runtime": 78.6301,
"eval_samples_per_second": 48.887,
"eval_steps_per_second": 0.776,
"learning_rate": 0.0001,
"step": 4163
},
{
"epoch": 24.0,
"eval_explained_variance": 0.4160999655723572,
"eval_loss": 0.3227241337299347,
"eval_mae": 0.15855328738689423,
"eval_r2": 0.41498833894729614,
"eval_rmse": 0.2413560003042221,
"eval_runtime": 78.9117,
"eval_samples_per_second": 48.713,
"eval_steps_per_second": 0.773,
"learning_rate": 0.0001,
"step": 4344
},
{
"epoch": 24.861878453038674,
"grad_norm": 0.19915054738521576,
"learning_rate": 0.0001,
"loss": 0.3247,
"step": 4500
},
{
"epoch": 25.0,
"eval_explained_variance": 0.4161580204963684,
"eval_loss": 0.3223778307437897,
"eval_mae": 0.1586799919605255,
"eval_r2": 0.4147694408893585,
"eval_rmse": 0.24130770564079285,
"eval_runtime": 79.8112,
"eval_samples_per_second": 48.164,
"eval_steps_per_second": 0.764,
"learning_rate": 0.0001,
"step": 4525
},
{
"epoch": 26.0,
"eval_explained_variance": 0.4154631793498993,
"eval_loss": 0.3217927813529968,
"eval_mae": 0.15566809475421906,
"eval_r2": 0.4142845571041107,
"eval_rmse": 0.24133403599262238,
"eval_runtime": 80.3582,
"eval_samples_per_second": 47.836,
"eval_steps_per_second": 0.759,
"learning_rate": 0.0001,
"step": 4706
},
{
"epoch": 27.0,
"eval_explained_variance": 0.4153793156147003,
"eval_loss": 0.3227355182170868,
"eval_mae": 0.16026344895362854,
"eval_r2": 0.4137920141220093,
"eval_rmse": 0.2415631115436554,
"eval_runtime": 81.132,
"eval_samples_per_second": 47.38,
"eval_steps_per_second": 0.752,
"learning_rate": 0.0001,
"step": 4887
},
{
"epoch": 27.624309392265193,
"grad_norm": 0.21681347489356995,
"learning_rate": 0.0001,
"loss": 0.3231,
"step": 5000
},
{
"epoch": 28.0,
"eval_explained_variance": 0.41966134309768677,
"eval_loss": 0.32067713141441345,
"eval_mae": 0.15620459616184235,
"eval_r2": 0.4186200499534607,
"eval_rmse": 0.24054032564163208,
"eval_runtime": 81.32,
"eval_samples_per_second": 47.27,
"eval_steps_per_second": 0.75,
"learning_rate": 0.0001,
"step": 5068
},
{
"epoch": 29.0,
"eval_explained_variance": 0.4175001382827759,
"eval_loss": 0.32205939292907715,
"eval_mae": 0.15971842408180237,
"eval_r2": 0.41626471281051636,
"eval_rmse": 0.24114808440208435,
"eval_runtime": 80.4359,
"eval_samples_per_second": 47.79,
"eval_steps_per_second": 0.758,
"learning_rate": 0.0001,
"step": 5249
},
{
"epoch": 30.0,
"eval_explained_variance": 0.4190256893634796,
"eval_loss": 0.32246074080467224,
"eval_mae": 0.16082431375980377,
"eval_r2": 0.41641533374786377,
"eval_rmse": 0.24128668010234833,
"eval_runtime": 80.4517,
"eval_samples_per_second": 47.78,
"eval_steps_per_second": 0.758,
"learning_rate": 0.0001,
"step": 5430
},
{
"epoch": 30.386740331491712,
"grad_norm": 0.20712591707706451,
"learning_rate": 0.0001,
"loss": 0.3215,
"step": 5500
},
{
"epoch": 31.0,
"eval_explained_variance": 0.41637617349624634,
"eval_loss": 0.3223503530025482,
"eval_mae": 0.15350204706192017,
"eval_r2": 0.4133574366569519,
"eval_rmse": 0.2416228950023651,
"eval_runtime": 83.145,
"eval_samples_per_second": 46.232,
"eval_steps_per_second": 0.734,
"learning_rate": 0.0001,
"step": 5611
},
{
"epoch": 32.0,
"eval_explained_variance": 0.41848501563072205,
"eval_loss": 0.3212696313858032,
"eval_mae": 0.1552996039390564,
"eval_r2": 0.4179571568965912,
"eval_rmse": 0.24075190722942352,
"eval_runtime": 80.1214,
"eval_samples_per_second": 47.977,
"eval_steps_per_second": 0.761,
"learning_rate": 0.0001,
"step": 5792
},
{
"epoch": 33.0,
"eval_explained_variance": 0.4142448604106903,
"eval_loss": 0.32156360149383545,
"eval_mae": 0.15830956399440765,
"eval_r2": 0.4123082458972931,
"eval_rmse": 0.24135708808898926,
"eval_runtime": 80.4898,
"eval_samples_per_second": 47.758,
"eval_steps_per_second": 0.758,
"learning_rate": 0.0001,
"step": 5973
},
{
"epoch": 33.149171270718234,
"grad_norm": 0.22835813462734222,
"learning_rate": 0.0001,
"loss": 0.3227,
"step": 6000
},
{
"epoch": 34.0,
"eval_explained_variance": 0.4181065559387207,
"eval_loss": 0.3205103278160095,
"eval_mae": 0.15616008639335632,
"eval_r2": 0.41721493005752563,
"eval_rmse": 0.24057045578956604,
"eval_runtime": 80.866,
"eval_samples_per_second": 47.535,
"eval_steps_per_second": 0.754,
"learning_rate": 0.0001,
"step": 6154
},
{
"epoch": 35.0,
"eval_explained_variance": 0.422443687915802,
"eval_loss": 0.3197581171989441,
"eval_mae": 0.15349127352237701,
"eval_r2": 0.42148736119270325,
"eval_rmse": 0.23994095623493195,
"eval_runtime": 81.1391,
"eval_samples_per_second": 47.375,
"eval_steps_per_second": 0.752,
"learning_rate": 0.0001,
"step": 6335
},
{
"epoch": 35.91160220994475,
"grad_norm": 0.18953262269496918,
"learning_rate": 0.0001,
"loss": 0.3202,
"step": 6500
},
{
"epoch": 36.0,
"eval_explained_variance": 0.4194072186946869,
"eval_loss": 0.3211075961589813,
"eval_mae": 0.15770210325717926,
"eval_r2": 0.4186839759349823,
"eval_rmse": 0.24057835340499878,
"eval_runtime": 80.0524,
"eval_samples_per_second": 48.019,
"eval_steps_per_second": 0.762,
"learning_rate": 0.0001,
"step": 6516
},
{
"epoch": 37.0,
"eval_explained_variance": 0.4203309416770935,
"eval_loss": 0.3203599154949188,
"eval_mae": 0.1520441472530365,
"eval_r2": 0.4187575578689575,
"eval_rmse": 0.24031898379325867,
"eval_runtime": 79.535,
"eval_samples_per_second": 48.331,
"eval_steps_per_second": 0.767,
"learning_rate": 0.0001,
"step": 6697
},
{
"epoch": 38.0,
"eval_explained_variance": 0.41849103569984436,
"eval_loss": 0.32143038511276245,
"eval_mae": 0.15602850914001465,
"eval_r2": 0.41701728105545044,
"eval_rmse": 0.24094969034194946,
"eval_runtime": 78.4033,
"eval_samples_per_second": 49.029,
"eval_steps_per_second": 0.778,
"learning_rate": 0.0001,
"step": 6878
},
{
"epoch": 38.67403314917127,
"grad_norm": 0.19431360065937042,
"learning_rate": 0.0001,
"loss": 0.3195,
"step": 7000
},
{
"epoch": 39.0,
"eval_explained_variance": 0.42322617769241333,
"eval_loss": 0.3195198178291321,
"eval_mae": 0.15203459560871124,
"eval_r2": 0.42263996601104736,
"eval_rmse": 0.23966462910175323,
"eval_runtime": 77.9703,
"eval_samples_per_second": 49.301,
"eval_steps_per_second": 0.782,
"learning_rate": 0.0001,
"step": 7059
},
{
"epoch": 40.0,
"eval_explained_variance": 0.42311784625053406,
"eval_loss": 0.3207896649837494,
"eval_mae": 0.1577463448047638,
"eval_r2": 0.4204080402851105,
"eval_rmse": 0.2403651773929596,
"eval_runtime": 77.4683,
"eval_samples_per_second": 49.62,
"eval_steps_per_second": 0.787,
"learning_rate": 0.0001,
"step": 7240
},
{
"epoch": 41.0,
"eval_explained_variance": 0.42329341173171997,
"eval_loss": 0.3197501003742218,
"eval_mae": 0.15471500158309937,
"eval_r2": 0.4217017889022827,
"eval_rmse": 0.23984502255916595,
"eval_runtime": 78.5707,
"eval_samples_per_second": 48.924,
"eval_steps_per_second": 0.776,
"learning_rate": 0.0001,
"step": 7421
},
{
"epoch": 41.43646408839779,
"grad_norm": 0.22206854820251465,
"learning_rate": 0.0001,
"loss": 0.3192,
"step": 7500
},
{
"epoch": 42.0,
"eval_explained_variance": 0.42175009846687317,
"eval_loss": 0.32175716757774353,
"eval_mae": 0.15892744064331055,
"eval_r2": 0.4173850417137146,
"eval_rmse": 0.24101921916007996,
"eval_runtime": 80.0006,
"eval_samples_per_second": 48.05,
"eval_steps_per_second": 0.762,
"learning_rate": 0.0001,
"step": 7602
},
{
"epoch": 43.0,
"eval_explained_variance": 0.4254191517829895,
"eval_loss": 0.3189575970172882,
"eval_mae": 0.1544325351715088,
"eval_r2": 0.4235268831253052,
"eval_rmse": 0.23960073292255402,
"eval_runtime": 77.8217,
"eval_samples_per_second": 49.395,
"eval_steps_per_second": 0.784,
"learning_rate": 0.0001,
"step": 7783
},
{
"epoch": 44.0,
"eval_explained_variance": 0.42385637760162354,
"eval_loss": 0.31898385286331177,
"eval_mae": 0.15337544679641724,
"eval_r2": 0.42299988865852356,
"eval_rmse": 0.23955906927585602,
"eval_runtime": 77.9179,
"eval_samples_per_second": 49.334,
"eval_steps_per_second": 0.783,
"learning_rate": 0.0001,
"step": 7964
},
{
"epoch": 44.19889502762431,
"grad_norm": 0.2577707767486572,
"learning_rate": 0.0001,
"loss": 0.3178,
"step": 8000
},
{
"epoch": 45.0,
"eval_explained_variance": 0.42595985531806946,
"eval_loss": 0.31977778673171997,
"eval_mae": 0.1566278040409088,
"eval_r2": 0.423930287361145,
"eval_rmse": 0.2397065907716751,
"eval_runtime": 78.7581,
"eval_samples_per_second": 48.808,
"eval_steps_per_second": 0.775,
"learning_rate": 0.0001,
"step": 8145
},
{
"epoch": 46.0,
"eval_explained_variance": 0.42309415340423584,
"eval_loss": 0.3193351626396179,
"eval_mae": 0.15561553835868835,
"eval_r2": 0.421285480260849,
"eval_rmse": 0.23979297280311584,
"eval_runtime": 77.9122,
"eval_samples_per_second": 49.338,
"eval_steps_per_second": 0.783,
"learning_rate": 0.0001,
"step": 8326
},
{
"epoch": 46.96132596685083,
"grad_norm": 0.24199023842811584,
"learning_rate": 0.0001,
"loss": 0.3175,
"step": 8500
},
{
"epoch": 47.0,
"eval_explained_variance": 0.4257267415523529,
"eval_loss": 0.31895366311073303,
"eval_mae": 0.15244629979133606,
"eval_r2": 0.42454099655151367,
"eval_rmse": 0.23932920396327972,
"eval_runtime": 78.1903,
"eval_samples_per_second": 49.162,
"eval_steps_per_second": 0.78,
"learning_rate": 0.0001,
"step": 8507
},
{
"epoch": 48.0,
"eval_explained_variance": 0.4229738414287567,
"eval_loss": 0.3192996680736542,
"eval_mae": 0.15246888995170593,
"eval_r2": 0.4215165376663208,
"eval_rmse": 0.2398112416267395,
"eval_runtime": 78.4361,
"eval_samples_per_second": 49.008,
"eval_steps_per_second": 0.778,
"learning_rate": 0.0001,
"step": 8688
},
{
"epoch": 49.0,
"eval_explained_variance": 0.4195903539657593,
"eval_loss": 0.32073548436164856,
"eval_mae": 0.15583214163780212,
"eval_r2": 0.41874682903289795,
"eval_rmse": 0.24045619368553162,
"eval_runtime": 78.3955,
"eval_samples_per_second": 49.033,
"eval_steps_per_second": 0.778,
"learning_rate": 0.0001,
"step": 8869
},
{
"epoch": 49.72375690607735,
"grad_norm": 0.23354829847812653,
"learning_rate": 1e-05,
"loss": 0.3174,
"step": 9000
},
{
"epoch": 50.0,
"eval_explained_variance": 0.4236743748188019,
"eval_loss": 0.3198453485965729,
"eval_mae": 0.15717381238937378,
"eval_r2": 0.4218462109565735,
"eval_rmse": 0.23996752500534058,
"eval_runtime": 78.1702,
"eval_samples_per_second": 49.175,
"eval_steps_per_second": 0.78,
"learning_rate": 1e-05,
"step": 9050
},
{
"epoch": 51.0,
"eval_explained_variance": 0.4172780215740204,
"eval_loss": 0.32436585426330566,
"eval_mae": 0.16015969216823578,
"eval_r2": 0.4091569483280182,
"eval_rmse": 0.24263373017311096,
"eval_runtime": 77.8337,
"eval_samples_per_second": 49.387,
"eval_steps_per_second": 0.784,
"learning_rate": 1e-05,
"step": 9231
},
{
"epoch": 52.0,
"eval_explained_variance": 0.42350247502326965,
"eval_loss": 0.31899821758270264,
"eval_mae": 0.15497206151485443,
"eval_r2": 0.42265191674232483,
"eval_rmse": 0.23958924412727356,
"eval_runtime": 77.2812,
"eval_samples_per_second": 49.74,
"eval_steps_per_second": 0.789,
"learning_rate": 1e-05,
"step": 9412
},
{
"epoch": 52.48618784530387,
"grad_norm": 0.29434072971343994,
"learning_rate": 1e-05,
"loss": 0.3152,
"step": 9500
},
{
"epoch": 53.0,
"eval_explained_variance": 0.4269993007183075,
"eval_loss": 0.31892043352127075,
"eval_mae": 0.1552329957485199,
"eval_r2": 0.4248761534690857,
"eval_rmse": 0.23935101926326752,
"eval_runtime": 78.9075,
"eval_samples_per_second": 48.715,
"eval_steps_per_second": 0.773,
"learning_rate": 1e-05,
"step": 9593
},
{
"epoch": 54.0,
"eval_explained_variance": 0.4239312410354614,
"eval_loss": 0.3194037675857544,
"eval_mae": 0.15401999652385712,
"eval_r2": 0.42274925112724304,
"eval_rmse": 0.23955823481082916,
"eval_runtime": 77.3784,
"eval_samples_per_second": 49.678,
"eval_steps_per_second": 0.788,
"learning_rate": 1e-05,
"step": 9774
},
{
"epoch": 55.0,
"eval_explained_variance": 0.4257669150829315,
"eval_loss": 0.3184601366519928,
"eval_mae": 0.15387418866157532,
"eval_r2": 0.42496535181999207,
"eval_rmse": 0.2391066700220108,
"eval_runtime": 77.4594,
"eval_samples_per_second": 49.626,
"eval_steps_per_second": 0.788,
"learning_rate": 1e-05,
"step": 9955
},
{
"epoch": 55.248618784530386,
"grad_norm": 0.21759380400180817,
"learning_rate": 1e-05,
"loss": 0.317,
"step": 10000
},
{
"epoch": 56.0,
"eval_explained_variance": 0.4281463027000427,
"eval_loss": 0.318115234375,
"eval_mae": 0.15269973874092102,
"eval_r2": 0.4272992014884949,
"eval_rmse": 0.23881223797798157,
"eval_runtime": 77.3587,
"eval_samples_per_second": 49.691,
"eval_steps_per_second": 0.789,
"learning_rate": 1e-05,
"step": 10136
},
{
"epoch": 57.0,
"eval_explained_variance": 0.4273688495159149,
"eval_loss": 0.31871330738067627,
"eval_mae": 0.15321487188339233,
"eval_r2": 0.42585498094558716,
"eval_rmse": 0.23920726776123047,
"eval_runtime": 77.7128,
"eval_samples_per_second": 49.464,
"eval_steps_per_second": 0.785,
"learning_rate": 1e-05,
"step": 10317
},
{
"epoch": 58.0,
"eval_explained_variance": 0.4258714020252228,
"eval_loss": 0.32010164856910706,
"eval_mae": 0.15668974816799164,
"eval_r2": 0.42169713973999023,
"eval_rmse": 0.2401351034641266,
"eval_runtime": 78.5354,
"eval_samples_per_second": 48.946,
"eval_steps_per_second": 0.777,
"learning_rate": 1e-05,
"step": 10498
},
{
"epoch": 58.011049723756905,
"grad_norm": 0.26900964975357056,
"learning_rate": 1e-05,
"loss": 0.314,
"step": 10500
},
{
"epoch": 59.0,
"eval_explained_variance": 0.42817097902297974,
"eval_loss": 0.31807705760002136,
"eval_mae": 0.15281866490840912,
"eval_r2": 0.42699384689331055,
"eval_rmse": 0.23875540494918823,
"eval_runtime": 78.4147,
"eval_samples_per_second": 49.021,
"eval_steps_per_second": 0.778,
"learning_rate": 1e-05,
"step": 10679
},
{
"epoch": 60.0,
"eval_explained_variance": 0.4267863631248474,
"eval_loss": 0.3181913495063782,
"eval_mae": 0.15336427092552185,
"eval_r2": 0.4256437420845032,
"eval_rmse": 0.23891934752464294,
"eval_runtime": 78.5798,
"eval_samples_per_second": 48.918,
"eval_steps_per_second": 0.776,
"learning_rate": 1e-05,
"step": 10860
},
{
"epoch": 60.773480662983424,
"grad_norm": 0.276735782623291,
"learning_rate": 1e-05,
"loss": 0.314,
"step": 11000
},
{
"epoch": 61.0,
"eval_explained_variance": 0.4265504479408264,
"eval_loss": 0.3185857832431793,
"eval_mae": 0.1509746015071869,
"eval_r2": 0.42550772428512573,
"eval_rmse": 0.23908774554729462,
"eval_runtime": 78.0232,
"eval_samples_per_second": 49.267,
"eval_steps_per_second": 0.782,
"learning_rate": 1e-05,
"step": 11041
},
{
"epoch": 62.0,
"eval_explained_variance": 0.4262467920780182,
"eval_loss": 0.32031872868537903,
"eval_mae": 0.15960827469825745,
"eval_r2": 0.4240296185016632,
"eval_rmse": 0.23980861902236938,
"eval_runtime": 78.7243,
"eval_samples_per_second": 48.829,
"eval_steps_per_second": 0.775,
"learning_rate": 1e-05,
"step": 11222
},
{
"epoch": 63.0,
"eval_explained_variance": 0.42764127254486084,
"eval_loss": 0.31964218616485596,
"eval_mae": 0.1570112109184265,
"eval_r2": 0.42423343658447266,
"eval_rmse": 0.23966987431049347,
"eval_runtime": 79.5804,
"eval_samples_per_second": 48.303,
"eval_steps_per_second": 0.767,
"learning_rate": 1e-05,
"step": 11403
},
{
"epoch": 63.53591160220994,
"grad_norm": 0.25174766778945923,
"learning_rate": 1e-05,
"loss": 0.3142,
"step": 11500
},
{
"epoch": 64.0,
"eval_explained_variance": 0.4253126382827759,
"eval_loss": 0.31808170676231384,
"eval_mae": 0.15270139276981354,
"eval_r2": 0.42435672879219055,
"eval_rmse": 0.23907549679279327,
"eval_runtime": 79.2944,
"eval_samples_per_second": 48.478,
"eval_steps_per_second": 0.769,
"learning_rate": 1e-05,
"step": 11584
},
{
"epoch": 65.0,
"eval_explained_variance": 0.4263868033885956,
"eval_loss": 0.31850185990333557,
"eval_mae": 0.1550404131412506,
"eval_r2": 0.4259207546710968,
"eval_rmse": 0.23902735114097595,
"eval_runtime": 78.9958,
"eval_samples_per_second": 48.661,
"eval_steps_per_second": 0.772,
"learning_rate": 1e-05,
"step": 11765
},
{
"epoch": 66.0,
"eval_explained_variance": 0.4291055500507355,
"eval_loss": 0.3186076879501343,
"eval_mae": 0.15623149275779724,
"eval_r2": 0.42775318026542664,
"eval_rmse": 0.23892195522785187,
"eval_runtime": 79.1276,
"eval_samples_per_second": 48.58,
"eval_steps_per_second": 0.771,
"learning_rate": 1.0000000000000002e-06,
"step": 11946
},
{
"epoch": 66.29834254143647,
"grad_norm": 0.31120502948760986,
"learning_rate": 1.0000000000000002e-06,
"loss": 0.3131,
"step": 12000
},
{
"epoch": 67.0,
"eval_explained_variance": 0.4278748631477356,
"eval_loss": 0.3181016743183136,
"eval_mae": 0.15263643860816956,
"eval_r2": 0.42703741788864136,
"eval_rmse": 0.2387426644563675,
"eval_runtime": 77.8773,
"eval_samples_per_second": 49.36,
"eval_steps_per_second": 0.783,
"learning_rate": 1.0000000000000002e-06,
"step": 12127
},
{
"epoch": 68.0,
"eval_explained_variance": 0.42565029859542847,
"eval_loss": 0.3194774389266968,
"eval_mae": 0.15491320192813873,
"eval_r2": 0.4220888018608093,
"eval_rmse": 0.23974499106407166,
"eval_runtime": 77.5869,
"eval_samples_per_second": 49.544,
"eval_steps_per_second": 0.786,
"learning_rate": 1.0000000000000002e-06,
"step": 12308
},
{
"epoch": 69.0,
"eval_explained_variance": 0.4274521470069885,
"eval_loss": 0.3183264136314392,
"eval_mae": 0.15404105186462402,
"eval_r2": 0.4259319305419922,
"eval_rmse": 0.23901674151420593,
"eval_runtime": 78.3061,
"eval_samples_per_second": 49.089,
"eval_steps_per_second": 0.779,
"learning_rate": 1.0000000000000002e-06,
"step": 12489
},
{
"epoch": 69.0,
"learning_rate": 1.0000000000000002e-06,
"step": 12489,
"total_flos": 1.1767002635628401e+20,
"train_loss": 0.32720188785662635,
"train_runtime": 24708.1609,
"train_samples_per_second": 69.949,
"train_steps_per_second": 1.099
}
],
"logging_steps": 500,
"max_steps": 27150,
"num_input_tokens_seen": 0,
"num_train_epochs": 150,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 10,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1767002635628401e+20,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}