|
{ |
|
"best_metric": 0.19081147015094757, |
|
"best_model_checkpoint": "Resneteau-50-2024_09_23-batch-size32_freeze/checkpoint-4914", |
|
"epoch": 28.0, |
|
"eval_steps": 500, |
|
"global_step": 7644, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.08766458766458766, |
|
"eval_f1_macro": 0.226738844317642, |
|
"eval_f1_micro": 0.5801698557249565, |
|
"eval_loss": 0.24598382413387299, |
|
"eval_runtime": 416.4206, |
|
"eval_samples_per_second": 6.93, |
|
"eval_steps_per_second": 0.219, |
|
"learning_rate": 0.001, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.8315018315018317, |
|
"grad_norm": 0.0508086197078228, |
|
"learning_rate": 0.001, |
|
"loss": 0.2786, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.13686763686763687, |
|
"eval_f1_macro": 0.3160165508599939, |
|
"eval_f1_micro": 0.6411905904944791, |
|
"eval_loss": 0.22168199717998505, |
|
"eval_runtime": 395.166, |
|
"eval_samples_per_second": 7.303, |
|
"eval_steps_per_second": 0.23, |
|
"learning_rate": 0.001, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.14864864864864866, |
|
"eval_f1_macro": 0.3580673052862397, |
|
"eval_f1_micro": 0.6595584072466503, |
|
"eval_loss": 0.21166761219501495, |
|
"eval_runtime": 401.6658, |
|
"eval_samples_per_second": 7.185, |
|
"eval_steps_per_second": 0.227, |
|
"learning_rate": 0.001, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 3.663003663003663, |
|
"grad_norm": 0.04649221897125244, |
|
"learning_rate": 0.001, |
|
"loss": 0.231, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.16181566181566182, |
|
"eval_f1_macro": 0.3831121485565155, |
|
"eval_f1_micro": 0.6673936750272628, |
|
"eval_loss": 0.20492619276046753, |
|
"eval_runtime": 411.5182, |
|
"eval_samples_per_second": 7.013, |
|
"eval_steps_per_second": 0.221, |
|
"learning_rate": 0.001, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.1677061677061677, |
|
"eval_f1_macro": 0.3964602797407069, |
|
"eval_f1_micro": 0.6707461695365495, |
|
"eval_loss": 0.20162147283554077, |
|
"eval_runtime": 414.4686, |
|
"eval_samples_per_second": 6.963, |
|
"eval_steps_per_second": 0.22, |
|
"learning_rate": 0.001, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 5.4945054945054945, |
|
"grad_norm": 0.03698631748557091, |
|
"learning_rate": 0.001, |
|
"loss": 0.2206, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.1677061677061677, |
|
"eval_f1_macro": 0.40758628553731013, |
|
"eval_f1_micro": 0.6719734660033168, |
|
"eval_loss": 0.20019273459911346, |
|
"eval_runtime": 422.4235, |
|
"eval_samples_per_second": 6.832, |
|
"eval_steps_per_second": 0.215, |
|
"learning_rate": 0.001, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.17463617463617465, |
|
"eval_f1_macro": 0.4142080471846538, |
|
"eval_f1_micro": 0.6751762240426747, |
|
"eval_loss": 0.19761690497398376, |
|
"eval_runtime": 419.1011, |
|
"eval_samples_per_second": 6.886, |
|
"eval_steps_per_second": 0.217, |
|
"learning_rate": 0.001, |
|
"step": 1911 |
|
}, |
|
{ |
|
"epoch": 7.326007326007326, |
|
"grad_norm": 0.05039607360959053, |
|
"learning_rate": 0.001, |
|
"loss": 0.2157, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.17636867636867637, |
|
"eval_f1_macro": 0.42809095916498113, |
|
"eval_f1_micro": 0.6823529411764706, |
|
"eval_loss": 0.19706940650939941, |
|
"eval_runtime": 419.7988, |
|
"eval_samples_per_second": 6.875, |
|
"eval_steps_per_second": 0.217, |
|
"learning_rate": 0.001, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.17636867636867637, |
|
"eval_f1_macro": 0.43000179684162393, |
|
"eval_f1_micro": 0.6844589857443328, |
|
"eval_loss": 0.19613835215568542, |
|
"eval_runtime": 418.2306, |
|
"eval_samples_per_second": 6.9, |
|
"eval_steps_per_second": 0.218, |
|
"learning_rate": 0.001, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 9.157509157509157, |
|
"grad_norm": 0.05213358625769615, |
|
"learning_rate": 0.001, |
|
"loss": 0.2127, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.18052668052668053, |
|
"eval_f1_macro": 0.4264062108185488, |
|
"eval_f1_micro": 0.676261056657901, |
|
"eval_loss": 0.19443827867507935, |
|
"eval_runtime": 412.2909, |
|
"eval_samples_per_second": 7.0, |
|
"eval_steps_per_second": 0.221, |
|
"learning_rate": 0.001, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 10.989010989010989, |
|
"grad_norm": 0.035016052424907684, |
|
"learning_rate": 0.001, |
|
"loss": 0.2117, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.1781011781011781, |
|
"eval_f1_macro": 0.43914447135579204, |
|
"eval_f1_micro": 0.6902341199514971, |
|
"eval_loss": 0.19399969279766083, |
|
"eval_runtime": 415.1772, |
|
"eval_samples_per_second": 6.951, |
|
"eval_steps_per_second": 0.219, |
|
"learning_rate": 0.001, |
|
"step": 3003 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.1729036729036729, |
|
"eval_f1_macro": 0.45234247782022446, |
|
"eval_f1_micro": 0.6938511326860841, |
|
"eval_loss": 0.19451384246349335, |
|
"eval_runtime": 421.6946, |
|
"eval_samples_per_second": 6.844, |
|
"eval_steps_per_second": 0.216, |
|
"learning_rate": 0.001, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 12.820512820512821, |
|
"grad_norm": 0.051621340215206146, |
|
"learning_rate": 0.001, |
|
"loss": 0.2107, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.1794871794871795, |
|
"eval_f1_macro": 0.44605482120784584, |
|
"eval_f1_micro": 0.6907971453892439, |
|
"eval_loss": 0.19363747537136078, |
|
"eval_runtime": 400.266, |
|
"eval_samples_per_second": 7.21, |
|
"eval_steps_per_second": 0.227, |
|
"learning_rate": 0.001, |
|
"step": 3549 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.1781011781011781, |
|
"eval_f1_macro": 0.44244925103284655, |
|
"eval_f1_micro": 0.6916442548455903, |
|
"eval_loss": 0.1931454837322235, |
|
"eval_runtime": 399.4345, |
|
"eval_samples_per_second": 7.225, |
|
"eval_steps_per_second": 0.228, |
|
"learning_rate": 0.001, |
|
"step": 3822 |
|
}, |
|
{ |
|
"epoch": 14.652014652014651, |
|
"grad_norm": 0.044662874191999435, |
|
"learning_rate": 0.001, |
|
"loss": 0.2105, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.18087318087318088, |
|
"eval_f1_macro": 0.44307178033824657, |
|
"eval_f1_micro": 0.6936180088187515, |
|
"eval_loss": 0.1935158371925354, |
|
"eval_runtime": 402.2391, |
|
"eval_samples_per_second": 7.175, |
|
"eval_steps_per_second": 0.226, |
|
"learning_rate": 0.001, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.18052668052668053, |
|
"eval_f1_macro": 0.4428841041517678, |
|
"eval_f1_micro": 0.6895936942854461, |
|
"eval_loss": 0.19309590756893158, |
|
"eval_runtime": 394.2567, |
|
"eval_samples_per_second": 7.32, |
|
"eval_steps_per_second": 0.231, |
|
"learning_rate": 0.001, |
|
"step": 4368 |
|
}, |
|
{ |
|
"epoch": 16.483516483516482, |
|
"grad_norm": 0.041027914732694626, |
|
"learning_rate": 0.001, |
|
"loss": 0.2086, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.18191268191268192, |
|
"eval_f1_macro": 0.4411042424961882, |
|
"eval_f1_micro": 0.6953186376449928, |
|
"eval_loss": 0.19311168789863586, |
|
"eval_runtime": 406.8435, |
|
"eval_samples_per_second": 7.094, |
|
"eval_steps_per_second": 0.224, |
|
"learning_rate": 0.001, |
|
"step": 4641 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.18572418572418573, |
|
"eval_f1_macro": 0.4490480976278912, |
|
"eval_f1_micro": 0.6983818770226538, |
|
"eval_loss": 0.19081147015094757, |
|
"eval_runtime": 398.4396, |
|
"eval_samples_per_second": 7.243, |
|
"eval_steps_per_second": 0.228, |
|
"learning_rate": 0.001, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 18.315018315018314, |
|
"grad_norm": 0.05783214792609215, |
|
"learning_rate": 0.001, |
|
"loss": 0.2101, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.1812196812196812, |
|
"eval_f1_macro": 0.4428453523216445, |
|
"eval_f1_micro": 0.6878854936673101, |
|
"eval_loss": 0.19249168038368225, |
|
"eval_runtime": 397.6758, |
|
"eval_samples_per_second": 7.257, |
|
"eval_steps_per_second": 0.229, |
|
"learning_rate": 0.001, |
|
"step": 5187 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.1774081774081774, |
|
"eval_f1_macro": 0.43568338344914237, |
|
"eval_f1_micro": 0.6796580216840999, |
|
"eval_loss": 0.19134406745433807, |
|
"eval_runtime": 404.3345, |
|
"eval_samples_per_second": 7.138, |
|
"eval_steps_per_second": 0.225, |
|
"learning_rate": 0.001, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 20.146520146520146, |
|
"grad_norm": 0.04509862884879112, |
|
"learning_rate": 0.001, |
|
"loss": 0.2088, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.18225918225918225, |
|
"eval_f1_macro": 0.4381469652060519, |
|
"eval_f1_micro": 0.6957772621809745, |
|
"eval_loss": 0.19149190187454224, |
|
"eval_runtime": 403.3745, |
|
"eval_samples_per_second": 7.155, |
|
"eval_steps_per_second": 0.226, |
|
"learning_rate": 0.001, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 21.978021978021978, |
|
"grad_norm": 0.04410397261381149, |
|
"learning_rate": 0.001, |
|
"loss": 0.2084, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.1826056826056826, |
|
"eval_f1_macro": 0.4534807464842353, |
|
"eval_f1_micro": 0.7038712011577424, |
|
"eval_loss": 0.19192616641521454, |
|
"eval_runtime": 394.3882, |
|
"eval_samples_per_second": 7.318, |
|
"eval_steps_per_second": 0.231, |
|
"learning_rate": 0.001, |
|
"step": 6006 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.17983367983367984, |
|
"eval_f1_macro": 0.4363028843794499, |
|
"eval_f1_micro": 0.6907461850762985, |
|
"eval_loss": 0.19255639612674713, |
|
"eval_runtime": 404.3195, |
|
"eval_samples_per_second": 7.138, |
|
"eval_steps_per_second": 0.225, |
|
"learning_rate": 0.001, |
|
"step": 6279 |
|
}, |
|
{ |
|
"epoch": 23.80952380952381, |
|
"grad_norm": 0.0686459094285965, |
|
"learning_rate": 0.001, |
|
"loss": 0.2083, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.18052668052668053, |
|
"eval_f1_macro": 0.45443118252910614, |
|
"eval_f1_micro": 0.6952745610758312, |
|
"eval_loss": 0.19186602532863617, |
|
"eval_runtime": 397.0622, |
|
"eval_samples_per_second": 7.268, |
|
"eval_steps_per_second": 0.229, |
|
"learning_rate": 0.001, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.1781011781011781, |
|
"eval_f1_macro": 0.4465566917300777, |
|
"eval_f1_micro": 0.6961779911373708, |
|
"eval_loss": 0.19193170964717865, |
|
"eval_runtime": 397.2293, |
|
"eval_samples_per_second": 7.265, |
|
"eval_steps_per_second": 0.229, |
|
"learning_rate": 0.0001, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 25.641025641025642, |
|
"grad_norm": 0.04814450815320015, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2076, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.18225918225918225, |
|
"eval_f1_macro": 0.441825214268795, |
|
"eval_f1_micro": 0.6942802624842929, |
|
"eval_loss": 0.19118554890155792, |
|
"eval_runtime": 398.0912, |
|
"eval_samples_per_second": 7.25, |
|
"eval_steps_per_second": 0.229, |
|
"learning_rate": 0.0001, |
|
"step": 7098 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.18087318087318088, |
|
"eval_f1_macro": 0.449975636684123, |
|
"eval_f1_micro": 0.6971996137398262, |
|
"eval_loss": 0.19123922288417816, |
|
"eval_runtime": 398.8223, |
|
"eval_samples_per_second": 7.236, |
|
"eval_steps_per_second": 0.228, |
|
"learning_rate": 0.0001, |
|
"step": 7371 |
|
}, |
|
{ |
|
"epoch": 27.47252747252747, |
|
"grad_norm": 0.05590255931019783, |
|
"learning_rate": 0.0001, |
|
"loss": 0.2081, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.18572418572418573, |
|
"eval_f1_macro": 0.44543509037683293, |
|
"eval_f1_micro": 0.6943913469159402, |
|
"eval_loss": 0.19151046872138977, |
|
"eval_runtime": 405.2259, |
|
"eval_samples_per_second": 7.122, |
|
"eval_steps_per_second": 0.225, |
|
"learning_rate": 0.0001, |
|
"step": 7644 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 0.0001, |
|
"step": 7644, |
|
"total_flos": 2.778404267780425e+19, |
|
"train_loss": 0.2165746406882549, |
|
"train_runtime": 45987.1682, |
|
"train_samples_per_second": 75.812, |
|
"train_steps_per_second": 2.375 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 109200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 400, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.778404267780425e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|