hogru's picture
Update tokenizer, bump hf versions
ce10699
{
"best_metric": 0.39055171608924866,
"best_model_checkpoint": "/home/stephan/code/molreactgen/checkpoints/2023-11-07_18-29-28_experiment/checkpoint-6895",
"epoch": 30.996954314720814,
"eval_steps": 500,
"global_step": 7633,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.41,
"learning_rate": 4.065040650406504e-05,
"loss": 6.0993,
"step": 100
},
{
"epoch": 0.81,
"learning_rate": 8.130081300813008e-05,
"loss": 5.0338,
"step": 200
},
{
"epoch": 1.0,
"eval_accuracy": 0.2072072072072072,
"eval_loss": 3.800293445587158,
"eval_runtime": 1.7789,
"eval_samples_per_second": 1357.054,
"eval_steps_per_second": 84.886,
"step": 246
},
{
"epoch": 1.22,
"learning_rate": 0.00012195121951219512,
"loss": 3.8428,
"step": 300
},
{
"epoch": 1.62,
"learning_rate": 0.00016260162601626016,
"loss": 2.8076,
"step": 400
},
{
"epoch": 2.0,
"eval_accuracy": 0.25823462236055444,
"eval_loss": 1.976719856262207,
"eval_runtime": 1.774,
"eval_samples_per_second": 1360.786,
"eval_steps_per_second": 85.12,
"step": 492
},
{
"epoch": 2.03,
"learning_rate": 0.0002032520325203252,
"loss": 2.2493,
"step": 500
},
{
"epoch": 2.44,
"learning_rate": 0.00024390243902439024,
"loss": 1.9599,
"step": 600
},
{
"epoch": 2.84,
"learning_rate": 0.0002845528455284553,
"loss": 1.7151,
"step": 700
},
{
"epoch": 3.0,
"eval_accuracy": 0.2894563992327041,
"eval_loss": 1.4048570394515991,
"eval_runtime": 1.775,
"eval_samples_per_second": 1360.028,
"eval_steps_per_second": 85.072,
"step": 738
},
{
"epoch": 3.25,
"learning_rate": 0.0003252032520325203,
"loss": 1.5375,
"step": 800
},
{
"epoch": 3.65,
"learning_rate": 0.00036585365853658537,
"loss": 1.3954,
"step": 900
},
{
"epoch": 4.0,
"eval_accuracy": 0.312210304753801,
"eval_loss": 1.0894988775253296,
"eval_runtime": 1.8067,
"eval_samples_per_second": 1336.115,
"eval_steps_per_second": 83.576,
"step": 985
},
{
"epoch": 4.06,
"learning_rate": 0.0004065040650406504,
"loss": 1.2718,
"step": 1000
},
{
"epoch": 4.47,
"learning_rate": 0.00044715447154471545,
"loss": 1.1709,
"step": 1100
},
{
"epoch": 4.87,
"learning_rate": 0.0004878048780487805,
"loss": 1.0895,
"step": 1200
},
{
"epoch": 5.0,
"eval_accuracy": 0.3280339162692104,
"eval_loss": 0.8805840015411377,
"eval_runtime": 1.7957,
"eval_samples_per_second": 1344.309,
"eval_steps_per_second": 84.089,
"step": 1231
},
{
"epoch": 5.28,
"learning_rate": 0.0004999506716812021,
"loss": 0.9914,
"step": 1300
},
{
"epoch": 5.69,
"learning_rate": 0.0004997091104496882,
"loss": 0.9375,
"step": 1400
},
{
"epoch": 6.0,
"eval_accuracy": 0.3402263075005411,
"eval_loss": 0.7313582301139832,
"eval_runtime": 1.7974,
"eval_samples_per_second": 1343.047,
"eval_steps_per_second": 84.01,
"step": 1477
},
{
"epoch": 6.09,
"learning_rate": 0.0004992664502959351,
"loss": 0.8598,
"step": 1500
},
{
"epoch": 6.5,
"learning_rate": 0.0004986230477086575,
"loss": 0.8097,
"step": 1600
},
{
"epoch": 6.9,
"learning_rate": 0.0004977794208410241,
"loss": 0.7668,
"step": 1700
},
{
"epoch": 7.0,
"eval_accuracy": 0.3481269173067019,
"eval_loss": 0.6367093324661255,
"eval_runtime": 1.8149,
"eval_samples_per_second": 1330.094,
"eval_steps_per_second": 83.2,
"step": 1723
},
{
"epoch": 7.31,
"learning_rate": 0.0004967362490933723,
"loss": 0.716,
"step": 1800
},
{
"epoch": 7.72,
"learning_rate": 0.0004954943725660643,
"loss": 0.6978,
"step": 1900
},
{
"epoch": 8.0,
"eval_accuracy": 0.35578121617889635,
"eval_loss": 0.5603917241096497,
"eval_runtime": 1.8199,
"eval_samples_per_second": 1326.454,
"eval_steps_per_second": 82.972,
"step": 1970
},
{
"epoch": 8.12,
"learning_rate": 0.0004940547913829275,
"loss": 0.6556,
"step": 2000
},
{
"epoch": 8.53,
"learning_rate": 0.0004924186648858207,
"loss": 0.627,
"step": 2100
},
{
"epoch": 8.93,
"learning_rate": 0.0004905873107009799,
"loss": 0.6133,
"step": 2200
},
{
"epoch": 9.0,
"eval_accuracy": 0.3603864842472962,
"eval_loss": 0.5122300386428833,
"eval_runtime": 1.8088,
"eval_samples_per_second": 1334.561,
"eval_steps_per_second": 83.479,
"step": 2216
},
{
"epoch": 9.34,
"learning_rate": 0.0004885622036778897,
"loss": 0.5846,
"step": 2300
},
{
"epoch": 9.75,
"learning_rate": 0.0004863449747015384,
"loss": 0.5681,
"step": 2400
},
{
"epoch": 10.0,
"eval_accuracy": 0.36240548750905005,
"eval_loss": 0.48733416199684143,
"eval_runtime": 1.781,
"eval_samples_per_second": 1355.443,
"eval_steps_per_second": 84.785,
"step": 2462
},
{
"epoch": 10.15,
"learning_rate": 0.0004839374093790139,
"loss": 0.5537,
"step": 2500
},
{
"epoch": 10.56,
"learning_rate": 0.00048134144660149535,
"loss": 0.5314,
"step": 2600
},
{
"epoch": 10.96,
"learning_rate": 0.0004785591769828005,
"loss": 0.536,
"step": 2700
},
{
"epoch": 11.0,
"eval_accuracy": 0.36355120655037804,
"eval_loss": 0.47042036056518555,
"eval_runtime": 1.8486,
"eval_samples_per_second": 1305.878,
"eval_steps_per_second": 81.685,
"step": 2708
},
{
"epoch": 11.37,
"learning_rate": 0.00047559284117574613,
"loss": 0.5126,
"step": 2800
},
{
"epoch": 11.78,
"learning_rate": 0.0004724448280676768,
"loss": 0.511,
"step": 2900
},
{
"epoch": 12.0,
"eval_accuracy": 0.3647827612202094,
"eval_loss": 0.4570145606994629,
"eval_runtime": 1.8132,
"eval_samples_per_second": 1331.315,
"eval_steps_per_second": 83.276,
"step": 2955
},
{
"epoch": 12.18,
"learning_rate": 0.00046911767285661587,
"loss": 0.4918,
"step": 3000
},
{
"epoch": 12.59,
"learning_rate": 0.0004656140550095876,
"loss": 0.4883,
"step": 3100
},
{
"epoch": 12.99,
"learning_rate": 0.00046193679610475414,
"loss": 0.4929,
"step": 3200
},
{
"epoch": 13.0,
"eval_accuracy": 0.3655067660867164,
"eval_loss": 0.4465464651584625,
"eval_runtime": 1.8353,
"eval_samples_per_second": 1315.328,
"eval_steps_per_second": 82.276,
"step": 3201
},
{
"epoch": 13.4,
"learning_rate": 0.0004580888575591068,
"loss": 0.4634,
"step": 3300
},
{
"epoch": 13.81,
"learning_rate": 0.00045407333824353966,
"loss": 0.4757,
"step": 3400
},
{
"epoch": 14.0,
"eval_accuracy": 0.36605909969621653,
"eval_loss": 0.43762096762657166,
"eval_runtime": 1.8426,
"eval_samples_per_second": 1310.127,
"eval_steps_per_second": 81.951,
"step": 3447
},
{
"epoch": 14.21,
"learning_rate": 0.00044989347198722777,
"loss": 0.4605,
"step": 3500
},
{
"epoch": 14.62,
"learning_rate": 0.00044555262497331783,
"loss": 0.4507,
"step": 3600
},
{
"epoch": 15.0,
"eval_accuracy": 0.3666450211603484,
"eval_loss": 0.42967188358306885,
"eval_runtime": 1.7693,
"eval_samples_per_second": 1364.379,
"eval_steps_per_second": 85.344,
"step": 3693
},
{
"epoch": 15.03,
"learning_rate": 0.0004410542930280316,
"loss": 0.4591,
"step": 3700
},
{
"epoch": 15.43,
"learning_rate": 0.0004364020988053623,
"loss": 0.4366,
"step": 3800
},
{
"epoch": 15.84,
"learning_rate": 0.00043159978886963223,
"loss": 0.4449,
"step": 3900
},
{
"epoch": 16.0,
"eval_accuracy": 0.3675183053807743,
"eval_loss": 0.4223393499851227,
"eval_runtime": 1.82,
"eval_samples_per_second": 1326.352,
"eval_steps_per_second": 82.966,
"step": 3940
},
{
"epoch": 16.24,
"learning_rate": 0.0004266512306782628,
"loss": 0.4323,
"step": 4000
},
{
"epoch": 16.65,
"learning_rate": 0.00042156040946718344,
"loss": 0.4312,
"step": 4100
},
{
"epoch": 17.0,
"eval_accuracy": 0.36820125842495355,
"eval_loss": 0.4195675849914551,
"eval_runtime": 1.8599,
"eval_samples_per_second": 1297.945,
"eval_steps_per_second": 81.189,
"step": 4186
},
{
"epoch": 17.06,
"learning_rate": 0.00041633142504139133,
"loss": 0.4315,
"step": 4200
},
{
"epoch": 17.46,
"learning_rate": 0.00041096848847324417,
"loss": 0.4158,
"step": 4300
},
{
"epoch": 17.87,
"learning_rate": 0.0004054759187111451,
"loss": 0.4252,
"step": 4400
},
{
"epoch": 18.0,
"eval_accuracy": 0.36844383737507186,
"eval_loss": 0.41086554527282715,
"eval_runtime": 1.8281,
"eval_samples_per_second": 1320.494,
"eval_steps_per_second": 82.599,
"step": 4432
},
{
"epoch": 18.27,
"learning_rate": 0.00039985813910135305,
"loss": 0.4129,
"step": 4500
},
{
"epoch": 18.68,
"learning_rate": 0.00039411967382571643,
"loss": 0.4102,
"step": 4600
},
{
"epoch": 19.0,
"eval_accuracy": 0.3685520649066631,
"eval_loss": 0.40959808230400085,
"eval_runtime": 1.835,
"eval_samples_per_second": 1315.545,
"eval_steps_per_second": 82.29,
"step": 4678
},
{
"epoch": 19.09,
"learning_rate": 0.0003882651442582019,
"loss": 0.4104,
"step": 4700
},
{
"epoch": 19.49,
"learning_rate": 0.00038229926524315015,
"loss": 0.3982,
"step": 4800
},
{
"epoch": 19.9,
"learning_rate": 0.0003762268412982577,
"loss": 0.4092,
"step": 4900
},
{
"epoch": 20.0,
"eval_accuracy": 0.3690036349522679,
"eval_loss": 0.4085357189178467,
"eval_runtime": 1.7636,
"eval_samples_per_second": 1368.753,
"eval_steps_per_second": 85.618,
"step": 4925
},
{
"epoch": 20.3,
"learning_rate": 0.00037005276274534144,
"loss": 0.3863,
"step": 5000
},
{
"epoch": 20.71,
"learning_rate": 0.0003637820017720022,
"loss": 0.3941,
"step": 5100
},
{
"epoch": 21.0,
"eval_accuracy": 0.36924248191853826,
"eval_loss": 0.4053109884262085,
"eval_runtime": 1.7869,
"eval_samples_per_second": 1350.961,
"eval_steps_per_second": 84.505,
"step": 5171
},
{
"epoch": 21.12,
"learning_rate": 0.00035741960842735953,
"loss": 0.3992,
"step": 5200
},
{
"epoch": 21.52,
"learning_rate": 0.0003509707065550817,
"loss": 0.3846,
"step": 5300
},
{
"epoch": 21.93,
"learning_rate": 0.00034444048966698643,
"loss": 0.3882,
"step": 5400
},
{
"epoch": 22.0,
"eval_accuracy": 0.3694365450786329,
"eval_loss": 0.40214401483535767,
"eval_runtime": 1.8091,
"eval_samples_per_second": 1334.335,
"eval_steps_per_second": 83.465,
"step": 5417
},
{
"epoch": 22.34,
"learning_rate": 0.0003378342167605362,
"loss": 0.3787,
"step": 5500
},
{
"epoch": 22.74,
"learning_rate": 0.00033115720808359495,
"loss": 0.3821,
"step": 5600
},
{
"epoch": 23.0,
"eval_accuracy": 0.3694477410301768,
"eval_loss": 0.4013039767742157,
"eval_runtime": 1.8106,
"eval_samples_per_second": 1333.273,
"eval_steps_per_second": 83.399,
"step": 5663
},
{
"epoch": 23.15,
"learning_rate": 0.0003244148408498587,
"loss": 0.3724,
"step": 5700
},
{
"epoch": 23.55,
"learning_rate": 0.000317612544908409,
"loss": 0.372,
"step": 5800
},
{
"epoch": 23.96,
"learning_rate": 0.000310755798370878,
"loss": 0.3769,
"step": 5900
},
{
"epoch": 24.0,
"eval_accuracy": 0.36991797099502155,
"eval_loss": 0.399305135011673,
"eval_runtime": 1.7971,
"eval_samples_per_second": 1343.28,
"eval_steps_per_second": 84.025,
"step": 5910
},
{
"epoch": 24.37,
"learning_rate": 0.00030385012319974537,
"loss": 0.3671,
"step": 6000
},
{
"epoch": 24.77,
"learning_rate": 0.00029690108076132154,
"loss": 0.3696,
"step": 6100
},
{
"epoch": 25.0,
"eval_accuracy": 0.3692051620800585,
"eval_loss": 0.3980158865451813,
"eval_runtime": 1.8534,
"eval_samples_per_second": 1302.488,
"eval_steps_per_second": 81.473,
"step": 6156
},
{
"epoch": 25.18,
"learning_rate": 0.0002899142673469971,
"loss": 0.3577,
"step": 6200
},
{
"epoch": 25.58,
"learning_rate": 0.00028289530966636625,
"loss": 0.3604,
"step": 6300
},
{
"epoch": 25.99,
"learning_rate": 0.000275849860315853,
"loss": 0.3628,
"step": 6400
},
{
"epoch": 26.0,
"eval_accuracy": 0.36994782686580535,
"eval_loss": 0.39616090059280396,
"eval_runtime": 1.7695,
"eval_samples_per_second": 1364.248,
"eval_steps_per_second": 85.336,
"step": 6402
},
{
"epoch": 26.4,
"learning_rate": 0.0002687835932264908,
"loss": 0.3498,
"step": 6500
},
{
"epoch": 26.8,
"learning_rate": 0.0002617021990945197,
"loss": 0.3587,
"step": 6600
},
{
"epoch": 27.0,
"eval_accuracy": 0.37061958395844063,
"eval_loss": 0.3926030397415161,
"eval_runtime": 1.8394,
"eval_samples_per_second": 1312.376,
"eval_steps_per_second": 82.091,
"step": 6648
},
{
"epoch": 27.21,
"learning_rate": 0.0002546113807984821,
"loss": 0.3534,
"step": 6700
},
{
"epoch": 27.61,
"learning_rate": 0.00024751684880650884,
"loss": 0.3492,
"step": 6800
},
{
"epoch": 28.0,
"eval_accuracy": 0.37057106816841695,
"eval_loss": 0.39055171608924866,
"eval_runtime": 1.8305,
"eval_samples_per_second": 1318.747,
"eval_steps_per_second": 82.49,
"step": 6895
},
{
"epoch": 28.02,
"learning_rate": 0.00024042431657749118,
"loss": 0.3534,
"step": 6900
},
{
"epoch": 28.43,
"learning_rate": 0.0002333394959598461,
"loss": 0.3418,
"step": 7000
},
{
"epoch": 28.83,
"learning_rate": 0.00022626809259157726,
"loss": 0.3461,
"step": 7100
},
{
"epoch": 29.0,
"eval_accuracy": 0.3706457078453764,
"eval_loss": 0.3932913541793823,
"eval_runtime": 1.7675,
"eval_samples_per_second": 1365.756,
"eval_steps_per_second": 85.43,
"step": 7141
},
{
"epoch": 29.24,
"learning_rate": 0.00021921580130533828,
"loss": 0.3412,
"step": 7200
},
{
"epoch": 29.64,
"learning_rate": 0.0002121883015421973,
"loss": 0.3363,
"step": 7300
},
{
"epoch": 30.0,
"eval_accuracy": 0.3706942236354001,
"eval_loss": 0.39353010058403015,
"eval_runtime": 1.8169,
"eval_samples_per_second": 1328.629,
"eval_steps_per_second": 83.108,
"step": 7387
},
{
"epoch": 30.05,
"learning_rate": 0.00020519125277779733,
"loss": 0.3422,
"step": 7400
},
{
"epoch": 30.46,
"learning_rate": 0.00019823028996459485,
"loss": 0.3356,
"step": 7500
},
{
"epoch": 30.86,
"learning_rate": 0.00019131101899384867,
"loss": 0.3337,
"step": 7600
},
{
"epoch": 31.0,
"eval_accuracy": 0.3702277256544034,
"eval_loss": 0.3950214684009552,
"eval_runtime": 1.7977,
"eval_samples_per_second": 1342.849,
"eval_steps_per_second": 83.998,
"step": 7633
},
{
"epoch": 31.0,
"step": 7633,
"total_flos": 564540063409152.0,
"train_loss": 0.7912082670869731,
"train_runtime": 664.7096,
"train_samples_per_second": 592.59,
"train_steps_per_second": 18.504
}
],
"logging_steps": 100,
"max_steps": 12300,
"num_train_epochs": 50,
"save_steps": 500,
"total_flos": 564540063409152.0,
"trial_name": null,
"trial_params": null
}