|
{ |
|
"best_metric": 0.8415765069551777, |
|
"best_model_checkpoint": "cvt-13-384-22k-fv-finetuned-memes/checkpoint-380", |
|
"epoch": 19.987654320987655, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6042, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6e-05, |
|
"loss": 1.3821, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.49690880989180836, |
|
"eval_f1": 0.4458280336574581, |
|
"eval_loss": 1.2779731750488281, |
|
"eval_precision": 0.5083478748744594, |
|
"eval_recall": 0.49690880989180836, |
|
"eval_runtime": 11.7189, |
|
"eval_samples_per_second": 110.42, |
|
"eval_steps_per_second": 1.792, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 9e-05, |
|
"loss": 1.2697, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00012, |
|
"loss": 1.0785, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.6669242658423493, |
|
"eval_f1": 0.6499601011830624, |
|
"eval_loss": 0.8633419275283813, |
|
"eval_precision": 0.6658463652527506, |
|
"eval_recall": 0.6669242658423493, |
|
"eval_runtime": 10.8339, |
|
"eval_samples_per_second": 119.44, |
|
"eval_steps_per_second": 1.938, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00011666666666666667, |
|
"loss": 0.8908, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.00011333333333333333, |
|
"loss": 0.8862, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.7217928902627512, |
|
"eval_f1": 0.701310570825714, |
|
"eval_loss": 0.71103435754776, |
|
"eval_precision": 0.7257543548866834, |
|
"eval_recall": 0.7217928902627512, |
|
"eval_runtime": 11.1739, |
|
"eval_samples_per_second": 115.806, |
|
"eval_steps_per_second": 1.879, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 0.00011, |
|
"loss": 0.7375, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.00010666666666666667, |
|
"loss": 0.665, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_accuracy": 0.8044822256568779, |
|
"eval_f1": 0.805017946518536, |
|
"eval_loss": 0.5514629483222961, |
|
"eval_precision": 0.8136720242079513, |
|
"eval_recall": 0.8044822256568779, |
|
"eval_runtime": 10.847, |
|
"eval_samples_per_second": 119.295, |
|
"eval_steps_per_second": 1.936, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.00010333333333333334, |
|
"loss": 0.6256, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 0.0001, |
|
"loss": 0.6056, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.7959814528593508, |
|
"eval_f1": 0.7846029654723703, |
|
"eval_loss": 0.5956416726112366, |
|
"eval_precision": 0.8040705084962495, |
|
"eval_recall": 0.7959814528593508, |
|
"eval_runtime": 11.1412, |
|
"eval_samples_per_second": 116.145, |
|
"eval_steps_per_second": 1.885, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 9.666666666666667e-05, |
|
"loss": 0.5889, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 0.4779, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"eval_accuracy": 0.7936630602782071, |
|
"eval_f1": 0.7856708002513016, |
|
"eval_loss": 0.622891366481781, |
|
"eval_precision": 0.7944777820954576, |
|
"eval_recall": 0.7936630602782071, |
|
"eval_runtime": 12.2285, |
|
"eval_samples_per_second": 105.818, |
|
"eval_steps_per_second": 1.717, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 9e-05, |
|
"loss": 0.4969, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 0.4554, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.8098918083462133, |
|
"eval_f1": 0.8086497823932973, |
|
"eval_loss": 0.5355423092842102, |
|
"eval_precision": 0.8125537937361387, |
|
"eval_recall": 0.8098918083462133, |
|
"eval_runtime": 11.0611, |
|
"eval_samples_per_second": 116.986, |
|
"eval_steps_per_second": 1.899, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 8.333333333333333e-05, |
|
"loss": 0.475, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 7.999999999999999e-05, |
|
"loss": 0.4249, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_accuracy": 0.8268933539412674, |
|
"eval_f1": 0.8235690575721668, |
|
"eval_loss": 0.5447452664375305, |
|
"eval_precision": 0.8274671013460899, |
|
"eval_recall": 0.8268933539412674, |
|
"eval_runtime": 10.9569, |
|
"eval_samples_per_second": 118.1, |
|
"eval_steps_per_second": 1.917, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 7.666666666666667e-05, |
|
"loss": 0.4241, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 7.333333333333334e-05, |
|
"loss": 0.4313, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.8153013910355487, |
|
"eval_f1": 0.8132391167433421, |
|
"eval_loss": 0.5530120730400085, |
|
"eval_precision": 0.8139736548329274, |
|
"eval_recall": 0.8153013910355487, |
|
"eval_runtime": 10.8836, |
|
"eval_samples_per_second": 118.895, |
|
"eval_steps_per_second": 1.93, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 7.000000000000001e-05, |
|
"loss": 0.4092, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 0.423, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.99, |
|
"eval_accuracy": 0.8238021638330757, |
|
"eval_f1": 0.8222730304295096, |
|
"eval_loss": 0.5345684885978699, |
|
"eval_precision": 0.8230247813283141, |
|
"eval_recall": 0.8238021638330757, |
|
"eval_runtime": 10.942, |
|
"eval_samples_per_second": 118.259, |
|
"eval_steps_per_second": 1.919, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 6.333333333333333e-05, |
|
"loss": 0.3905, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 6e-05, |
|
"loss": 0.3997, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.8338485316846986, |
|
"eval_f1": 0.8338050888395913, |
|
"eval_loss": 0.5413401126861572, |
|
"eval_precision": 0.8346663333915662, |
|
"eval_recall": 0.8338485316846986, |
|
"eval_runtime": 11.954, |
|
"eval_samples_per_second": 108.248, |
|
"eval_steps_per_second": 1.757, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 5.6666666666666664e-05, |
|
"loss": 0.3824, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 0.4095, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 11.99, |
|
"eval_accuracy": 0.8207109737248841, |
|
"eval_f1": 0.8177361362413736, |
|
"eval_loss": 0.5999411344528198, |
|
"eval_precision": 0.8230816829155458, |
|
"eval_recall": 0.8207109737248841, |
|
"eval_runtime": 11.094, |
|
"eval_samples_per_second": 116.639, |
|
"eval_steps_per_second": 1.893, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 12.49, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3849, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 0.3979, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.8284389489953632, |
|
"eval_f1": 0.825049010410594, |
|
"eval_loss": 0.563246488571167, |
|
"eval_precision": 0.8254511686180086, |
|
"eval_recall": 0.8284389489953632, |
|
"eval_runtime": 11.1372, |
|
"eval_samples_per_second": 116.187, |
|
"eval_steps_per_second": 1.886, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 13.49, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 0.3571, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"learning_rate": 3.9999999999999996e-05, |
|
"loss": 0.3408, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 13.99, |
|
"eval_accuracy": 0.8207109737248841, |
|
"eval_f1": 0.819562228886707, |
|
"eval_loss": 0.5724937319755554, |
|
"eval_precision": 0.8198440242244688, |
|
"eval_recall": 0.8207109737248841, |
|
"eval_runtime": 10.994, |
|
"eval_samples_per_second": 117.7, |
|
"eval_steps_per_second": 1.91, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 3.666666666666667e-05, |
|
"loss": 0.3746, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.3828, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.8276661514683153, |
|
"eval_f1": 0.8260480131366693, |
|
"eval_loss": 0.5630865693092346, |
|
"eval_precision": 0.8257927020758725, |
|
"eval_recall": 0.8276661514683153, |
|
"eval_runtime": 10.9146, |
|
"eval_samples_per_second": 118.557, |
|
"eval_steps_per_second": 1.924, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3506, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 0.3595, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 15.99, |
|
"eval_accuracy": 0.8307573415765069, |
|
"eval_f1": 0.8275327108334436, |
|
"eval_loss": 0.6005313396453857, |
|
"eval_precision": 0.8296678468094887, |
|
"eval_recall": 0.8307573415765069, |
|
"eval_runtime": 11.7668, |
|
"eval_samples_per_second": 109.971, |
|
"eval_steps_per_second": 1.785, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 16.49, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 0.3535, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 0.3789, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.8299845440494591, |
|
"eval_f1": 0.8273370558566532, |
|
"eval_loss": 0.5840371251106262, |
|
"eval_precision": 0.827140388432744, |
|
"eval_recall": 0.8299845440494591, |
|
"eval_runtime": 10.837, |
|
"eval_samples_per_second": 119.405, |
|
"eval_steps_per_second": 1.938, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 17.49, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.389, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 0.3545, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 17.99, |
|
"eval_accuracy": 0.8245749613601236, |
|
"eval_f1": 0.8222146970644545, |
|
"eval_loss": 0.5983098745346069, |
|
"eval_precision": 0.8226259868849637, |
|
"eval_recall": 0.8245749613601236, |
|
"eval_runtime": 10.7069, |
|
"eval_samples_per_second": 120.857, |
|
"eval_steps_per_second": 1.961, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 18.49, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 0.3661, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.3472, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.8415765069551777, |
|
"eval_f1": 0.8390361807029366, |
|
"eval_loss": 0.5795101523399353, |
|
"eval_precision": 0.838184001941316, |
|
"eval_recall": 0.8415765069551777, |
|
"eval_runtime": 10.7681, |
|
"eval_samples_per_second": 120.17, |
|
"eval_steps_per_second": 1.95, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.3654, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"learning_rate": 0.0, |
|
"loss": 0.355, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"eval_accuracy": 0.8315301391035549, |
|
"eval_f1": 0.8292026505769348, |
|
"eval_loss": 0.5761234760284424, |
|
"eval_precision": 0.8302128280229624, |
|
"eval_recall": 0.8315301391035549, |
|
"eval_runtime": 11.7079, |
|
"eval_samples_per_second": 110.524, |
|
"eval_steps_per_second": 1.794, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 19.99, |
|
"step": 400, |
|
"total_flos": 1.832202236159447e+18, |
|
"train_loss": 0.5447965228557586, |
|
"train_runtime": 1430.1645, |
|
"train_samples_per_second": 72.355, |
|
"train_steps_per_second": 0.28 |
|
} |
|
], |
|
"max_steps": 400, |
|
"num_train_epochs": 20, |
|
"total_flos": 1.832202236159447e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|