{ "best_metric": 0.39055171608924866, "best_model_checkpoint": "/home/stephan/code/molreactgen/checkpoints/2023-11-07_18-29-28_experiment/checkpoint-6895", "epoch": 30.996954314720814, "eval_steps": 500, "global_step": 7633, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.41, "learning_rate": 4.065040650406504e-05, "loss": 6.0993, "step": 100 }, { "epoch": 0.81, "learning_rate": 8.130081300813008e-05, "loss": 5.0338, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.2072072072072072, "eval_loss": 3.800293445587158, "eval_runtime": 1.7789, "eval_samples_per_second": 1357.054, "eval_steps_per_second": 84.886, "step": 246 }, { "epoch": 1.22, "learning_rate": 0.00012195121951219512, "loss": 3.8428, "step": 300 }, { "epoch": 1.62, "learning_rate": 0.00016260162601626016, "loss": 2.8076, "step": 400 }, { "epoch": 2.0, "eval_accuracy": 0.25823462236055444, "eval_loss": 1.976719856262207, "eval_runtime": 1.774, "eval_samples_per_second": 1360.786, "eval_steps_per_second": 85.12, "step": 492 }, { "epoch": 2.03, "learning_rate": 0.0002032520325203252, "loss": 2.2493, "step": 500 }, { "epoch": 2.44, "learning_rate": 0.00024390243902439024, "loss": 1.9599, "step": 600 }, { "epoch": 2.84, "learning_rate": 0.0002845528455284553, "loss": 1.7151, "step": 700 }, { "epoch": 3.0, "eval_accuracy": 0.2894563992327041, "eval_loss": 1.4048570394515991, "eval_runtime": 1.775, "eval_samples_per_second": 1360.028, "eval_steps_per_second": 85.072, "step": 738 }, { "epoch": 3.25, "learning_rate": 0.0003252032520325203, "loss": 1.5375, "step": 800 }, { "epoch": 3.65, "learning_rate": 0.00036585365853658537, "loss": 1.3954, "step": 900 }, { "epoch": 4.0, "eval_accuracy": 0.312210304753801, "eval_loss": 1.0894988775253296, "eval_runtime": 1.8067, "eval_samples_per_second": 1336.115, "eval_steps_per_second": 83.576, "step": 985 }, { "epoch": 4.06, "learning_rate": 0.0004065040650406504, "loss": 1.2718, "step": 1000 }, { "epoch": 4.47, "learning_rate": 0.00044715447154471545, "loss": 1.1709, "step": 1100 }, { "epoch": 4.87, "learning_rate": 0.0004878048780487805, "loss": 1.0895, "step": 1200 }, { "epoch": 5.0, "eval_accuracy": 0.3280339162692104, "eval_loss": 0.8805840015411377, "eval_runtime": 1.7957, "eval_samples_per_second": 1344.309, "eval_steps_per_second": 84.089, "step": 1231 }, { "epoch": 5.28, "learning_rate": 0.0004999506716812021, "loss": 0.9914, "step": 1300 }, { "epoch": 5.69, "learning_rate": 0.0004997091104496882, "loss": 0.9375, "step": 1400 }, { "epoch": 6.0, "eval_accuracy": 0.3402263075005411, "eval_loss": 0.7313582301139832, "eval_runtime": 1.7974, "eval_samples_per_second": 1343.047, "eval_steps_per_second": 84.01, "step": 1477 }, { "epoch": 6.09, "learning_rate": 0.0004992664502959351, "loss": 0.8598, "step": 1500 }, { "epoch": 6.5, "learning_rate": 0.0004986230477086575, "loss": 0.8097, "step": 1600 }, { "epoch": 6.9, "learning_rate": 0.0004977794208410241, "loss": 0.7668, "step": 1700 }, { "epoch": 7.0, "eval_accuracy": 0.3481269173067019, "eval_loss": 0.6367093324661255, "eval_runtime": 1.8149, "eval_samples_per_second": 1330.094, "eval_steps_per_second": 83.2, "step": 1723 }, { "epoch": 7.31, "learning_rate": 0.0004967362490933723, "loss": 0.716, "step": 1800 }, { "epoch": 7.72, "learning_rate": 0.0004954943725660643, "loss": 0.6978, "step": 1900 }, { "epoch": 8.0, "eval_accuracy": 0.35578121617889635, "eval_loss": 0.5603917241096497, "eval_runtime": 1.8199, "eval_samples_per_second": 1326.454, "eval_steps_per_second": 82.972, "step": 1970 }, { "epoch": 8.12, "learning_rate": 0.0004940547913829275, "loss": 0.6556, "step": 2000 }, { "epoch": 8.53, "learning_rate": 0.0004924186648858207, "loss": 0.627, "step": 2100 }, { "epoch": 8.93, "learning_rate": 0.0004905873107009799, "loss": 0.6133, "step": 2200 }, { "epoch": 9.0, "eval_accuracy": 0.3603864842472962, "eval_loss": 0.5122300386428833, "eval_runtime": 1.8088, "eval_samples_per_second": 1334.561, "eval_steps_per_second": 83.479, "step": 2216 }, { "epoch": 9.34, "learning_rate": 0.0004885622036778897, "loss": 0.5846, "step": 2300 }, { "epoch": 9.75, "learning_rate": 0.0004863449747015384, "loss": 0.5681, "step": 2400 }, { "epoch": 10.0, "eval_accuracy": 0.36240548750905005, "eval_loss": 0.48733416199684143, "eval_runtime": 1.781, "eval_samples_per_second": 1355.443, "eval_steps_per_second": 84.785, "step": 2462 }, { "epoch": 10.15, "learning_rate": 0.0004839374093790139, "loss": 0.5537, "step": 2500 }, { "epoch": 10.56, "learning_rate": 0.00048134144660149535, "loss": 0.5314, "step": 2600 }, { "epoch": 10.96, "learning_rate": 0.0004785591769828005, "loss": 0.536, "step": 2700 }, { "epoch": 11.0, "eval_accuracy": 0.36355120655037804, "eval_loss": 0.47042036056518555, "eval_runtime": 1.8486, "eval_samples_per_second": 1305.878, "eval_steps_per_second": 81.685, "step": 2708 }, { "epoch": 11.37, "learning_rate": 0.00047559284117574613, "loss": 0.5126, "step": 2800 }, { "epoch": 11.78, "learning_rate": 0.0004724448280676768, "loss": 0.511, "step": 2900 }, { "epoch": 12.0, "eval_accuracy": 0.3647827612202094, "eval_loss": 0.4570145606994629, "eval_runtime": 1.8132, "eval_samples_per_second": 1331.315, "eval_steps_per_second": 83.276, "step": 2955 }, { "epoch": 12.18, "learning_rate": 0.00046911767285661587, "loss": 0.4918, "step": 3000 }, { "epoch": 12.59, "learning_rate": 0.0004656140550095876, "loss": 0.4883, "step": 3100 }, { "epoch": 12.99, "learning_rate": 0.00046193679610475414, "loss": 0.4929, "step": 3200 }, { "epoch": 13.0, "eval_accuracy": 0.3655067660867164, "eval_loss": 0.4465464651584625, "eval_runtime": 1.8353, "eval_samples_per_second": 1315.328, "eval_steps_per_second": 82.276, "step": 3201 }, { "epoch": 13.4, "learning_rate": 0.0004580888575591068, "loss": 0.4634, "step": 3300 }, { "epoch": 13.81, "learning_rate": 0.00045407333824353966, "loss": 0.4757, "step": 3400 }, { "epoch": 14.0, "eval_accuracy": 0.36605909969621653, "eval_loss": 0.43762096762657166, "eval_runtime": 1.8426, "eval_samples_per_second": 1310.127, "eval_steps_per_second": 81.951, "step": 3447 }, { "epoch": 14.21, "learning_rate": 0.00044989347198722777, "loss": 0.4605, "step": 3500 }, { "epoch": 14.62, "learning_rate": 0.00044555262497331783, "loss": 0.4507, "step": 3600 }, { "epoch": 15.0, "eval_accuracy": 0.3666450211603484, "eval_loss": 0.42967188358306885, "eval_runtime": 1.7693, "eval_samples_per_second": 1364.379, "eval_steps_per_second": 85.344, "step": 3693 }, { "epoch": 15.03, "learning_rate": 0.0004410542930280316, "loss": 0.4591, "step": 3700 }, { "epoch": 15.43, "learning_rate": 0.0004364020988053623, "loss": 0.4366, "step": 3800 }, { "epoch": 15.84, "learning_rate": 0.00043159978886963223, "loss": 0.4449, "step": 3900 }, { "epoch": 16.0, "eval_accuracy": 0.3675183053807743, "eval_loss": 0.4223393499851227, "eval_runtime": 1.82, "eval_samples_per_second": 1326.352, "eval_steps_per_second": 82.966, "step": 3940 }, { "epoch": 16.24, "learning_rate": 0.0004266512306782628, "loss": 0.4323, "step": 4000 }, { "epoch": 16.65, "learning_rate": 0.00042156040946718344, "loss": 0.4312, "step": 4100 }, { "epoch": 17.0, "eval_accuracy": 0.36820125842495355, "eval_loss": 0.4195675849914551, "eval_runtime": 1.8599, "eval_samples_per_second": 1297.945, "eval_steps_per_second": 81.189, "step": 4186 }, { "epoch": 17.06, "learning_rate": 0.00041633142504139133, "loss": 0.4315, "step": 4200 }, { "epoch": 17.46, "learning_rate": 0.00041096848847324417, "loss": 0.4158, "step": 4300 }, { "epoch": 17.87, "learning_rate": 0.0004054759187111451, "loss": 0.4252, "step": 4400 }, { "epoch": 18.0, "eval_accuracy": 0.36844383737507186, "eval_loss": 0.41086554527282715, "eval_runtime": 1.8281, "eval_samples_per_second": 1320.494, "eval_steps_per_second": 82.599, "step": 4432 }, { "epoch": 18.27, "learning_rate": 0.00039985813910135305, "loss": 0.4129, "step": 4500 }, { "epoch": 18.68, "learning_rate": 0.00039411967382571643, "loss": 0.4102, "step": 4600 }, { "epoch": 19.0, "eval_accuracy": 0.3685520649066631, "eval_loss": 0.40959808230400085, "eval_runtime": 1.835, "eval_samples_per_second": 1315.545, "eval_steps_per_second": 82.29, "step": 4678 }, { "epoch": 19.09, "learning_rate": 0.0003882651442582019, "loss": 0.4104, "step": 4700 }, { "epoch": 19.49, "learning_rate": 0.00038229926524315015, "loss": 0.3982, "step": 4800 }, { "epoch": 19.9, "learning_rate": 0.0003762268412982577, "loss": 0.4092, "step": 4900 }, { "epoch": 20.0, "eval_accuracy": 0.3690036349522679, "eval_loss": 0.4085357189178467, "eval_runtime": 1.7636, "eval_samples_per_second": 1368.753, "eval_steps_per_second": 85.618, "step": 4925 }, { "epoch": 20.3, "learning_rate": 0.00037005276274534144, "loss": 0.3863, "step": 5000 }, { "epoch": 20.71, "learning_rate": 0.0003637820017720022, "loss": 0.3941, "step": 5100 }, { "epoch": 21.0, "eval_accuracy": 0.36924248191853826, "eval_loss": 0.4053109884262085, "eval_runtime": 1.7869, "eval_samples_per_second": 1350.961, "eval_steps_per_second": 84.505, "step": 5171 }, { "epoch": 21.12, "learning_rate": 0.00035741960842735953, "loss": 0.3992, "step": 5200 }, { "epoch": 21.52, "learning_rate": 0.0003509707065550817, "loss": 0.3846, "step": 5300 }, { "epoch": 21.93, "learning_rate": 0.00034444048966698643, "loss": 0.3882, "step": 5400 }, { "epoch": 22.0, "eval_accuracy": 0.3694365450786329, "eval_loss": 0.40214401483535767, "eval_runtime": 1.8091, "eval_samples_per_second": 1334.335, "eval_steps_per_second": 83.465, "step": 5417 }, { "epoch": 22.34, "learning_rate": 0.0003378342167605362, "loss": 0.3787, "step": 5500 }, { "epoch": 22.74, "learning_rate": 0.00033115720808359495, "loss": 0.3821, "step": 5600 }, { "epoch": 23.0, "eval_accuracy": 0.3694477410301768, "eval_loss": 0.4013039767742157, "eval_runtime": 1.8106, "eval_samples_per_second": 1333.273, "eval_steps_per_second": 83.399, "step": 5663 }, { "epoch": 23.15, "learning_rate": 0.0003244148408498587, "loss": 0.3724, "step": 5700 }, { "epoch": 23.55, "learning_rate": 0.000317612544908409, "loss": 0.372, "step": 5800 }, { "epoch": 23.96, "learning_rate": 0.000310755798370878, "loss": 0.3769, "step": 5900 }, { "epoch": 24.0, "eval_accuracy": 0.36991797099502155, "eval_loss": 0.399305135011673, "eval_runtime": 1.7971, "eval_samples_per_second": 1343.28, "eval_steps_per_second": 84.025, "step": 5910 }, { "epoch": 24.37, "learning_rate": 0.00030385012319974537, "loss": 0.3671, "step": 6000 }, { "epoch": 24.77, "learning_rate": 0.00029690108076132154, "loss": 0.3696, "step": 6100 }, { "epoch": 25.0, "eval_accuracy": 0.3692051620800585, "eval_loss": 0.3980158865451813, "eval_runtime": 1.8534, "eval_samples_per_second": 1302.488, "eval_steps_per_second": 81.473, "step": 6156 }, { "epoch": 25.18, "learning_rate": 0.0002899142673469971, "loss": 0.3577, "step": 6200 }, { "epoch": 25.58, "learning_rate": 0.00028289530966636625, "loss": 0.3604, "step": 6300 }, { "epoch": 25.99, "learning_rate": 0.000275849860315853, "loss": 0.3628, "step": 6400 }, { "epoch": 26.0, "eval_accuracy": 0.36994782686580535, "eval_loss": 0.39616090059280396, "eval_runtime": 1.7695, "eval_samples_per_second": 1364.248, "eval_steps_per_second": 85.336, "step": 6402 }, { "epoch": 26.4, "learning_rate": 0.0002687835932264908, "loss": 0.3498, "step": 6500 }, { "epoch": 26.8, "learning_rate": 0.0002617021990945197, "loss": 0.3587, "step": 6600 }, { "epoch": 27.0, "eval_accuracy": 0.37061958395844063, "eval_loss": 0.3926030397415161, "eval_runtime": 1.8394, "eval_samples_per_second": 1312.376, "eval_steps_per_second": 82.091, "step": 6648 }, { "epoch": 27.21, "learning_rate": 0.0002546113807984821, "loss": 0.3534, "step": 6700 }, { "epoch": 27.61, "learning_rate": 0.00024751684880650884, "loss": 0.3492, "step": 6800 }, { "epoch": 28.0, "eval_accuracy": 0.37057106816841695, "eval_loss": 0.39055171608924866, "eval_runtime": 1.8305, "eval_samples_per_second": 1318.747, "eval_steps_per_second": 82.49, "step": 6895 }, { "epoch": 28.02, "learning_rate": 0.00024042431657749118, "loss": 0.3534, "step": 6900 }, { "epoch": 28.43, "learning_rate": 0.0002333394959598461, "loss": 0.3418, "step": 7000 }, { "epoch": 28.83, "learning_rate": 0.00022626809259157726, "loss": 0.3461, "step": 7100 }, { "epoch": 29.0, "eval_accuracy": 0.3706457078453764, "eval_loss": 0.3932913541793823, "eval_runtime": 1.7675, "eval_samples_per_second": 1365.756, "eval_steps_per_second": 85.43, "step": 7141 }, { "epoch": 29.24, "learning_rate": 0.00021921580130533828, "loss": 0.3412, "step": 7200 }, { "epoch": 29.64, "learning_rate": 0.0002121883015421973, "loss": 0.3363, "step": 7300 }, { "epoch": 30.0, "eval_accuracy": 0.3706942236354001, "eval_loss": 0.39353010058403015, "eval_runtime": 1.8169, "eval_samples_per_second": 1328.629, "eval_steps_per_second": 83.108, "step": 7387 }, { "epoch": 30.05, "learning_rate": 0.00020519125277779733, "loss": 0.3422, "step": 7400 }, { "epoch": 30.46, "learning_rate": 0.00019823028996459485, "loss": 0.3356, "step": 7500 }, { "epoch": 30.86, "learning_rate": 0.00019131101899384867, "loss": 0.3337, "step": 7600 }, { "epoch": 31.0, "eval_accuracy": 0.3702277256544034, "eval_loss": 0.3950214684009552, "eval_runtime": 1.7977, "eval_samples_per_second": 1342.849, "eval_steps_per_second": 83.998, "step": 7633 }, { "epoch": 31.0, "step": 7633, "total_flos": 564540063409152.0, "train_loss": 0.7912082670869731, "train_runtime": 664.7096, "train_samples_per_second": 592.59, "train_steps_per_second": 18.504 } ], "logging_steps": 100, "max_steps": 12300, "num_train_epochs": 50, "save_steps": 500, "total_flos": 564540063409152.0, "trial_name": null, "trial_params": null }