|
{ |
|
"best_metric": 70.54026450257521, |
|
"best_model_checkpoint": "/home/jcanete/ft-data/all_results/tar/albeto_base_4/epochs_3_bs_16_lr_5e-5/checkpoint-9000", |
|
"epoch": 3.0, |
|
"global_step": 16455, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_exact_match": 34.645222327341536, |
|
"eval_f1": 52.13460975937906, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.848678213309025e-05, |
|
"loss": 2.8212, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_exact_match": 40.38789025543993, |
|
"eval_f1": 58.80108695949609, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_exact_match": 44.1438032166509, |
|
"eval_f1": 61.68021475413032, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.6967487085992104e-05, |
|
"loss": 2.147, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_exact_match": 45.08041627246925, |
|
"eval_f1": 63.22958368722072, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.544819203889396e-05, |
|
"loss": 2.0333, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_exact_match": 46.09271523178808, |
|
"eval_f1": 63.34953738572746, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_exact_match": 47.13339640491959, |
|
"eval_f1": 64.76366505969077, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.392889699179581e-05, |
|
"loss": 1.9667, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_exact_match": 47.4077578051088, |
|
"eval_f1": 65.80706119842145, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_exact_match": 48.55250709555345, |
|
"eval_f1": 66.49751567340297, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2409601944697666e-05, |
|
"loss": 1.8743, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_exact_match": 48.84578997161778, |
|
"eval_f1": 66.76044504218349, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.089030689759952e-05, |
|
"loss": 1.8655, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_exact_match": 49.55534531693472, |
|
"eval_f1": 67.55609852983001, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_exact_match": 49.45127719962157, |
|
"eval_f1": 67.28955811469469, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9371011850501374e-05, |
|
"loss": 1.8223, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_exact_match": 50.08514664143803, |
|
"eval_f1": 68.11294509259712, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_exact_match": 50.38789025543993, |
|
"eval_f1": 68.15488930053279, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.785171680340323e-05, |
|
"loss": 1.7684, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_exact_match": 51.05960264900662, |
|
"eval_f1": 69.07576620387981, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6335460346399276e-05, |
|
"loss": 1.7677, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_exact_match": 51.561021759697255, |
|
"eval_f1": 69.20806146876814, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_exact_match": 51.61778618732261, |
|
"eval_f1": 69.20269381359182, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.481616529930112e-05, |
|
"loss": 1.7566, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_exact_match": 51.68401135288553, |
|
"eval_f1": 69.22415299954444, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_exact_match": 51.82592242194891, |
|
"eval_f1": 69.68048492944583, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.329990884229718e-05, |
|
"loss": 1.6976, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_exact_match": 51.84484389782403, |
|
"eval_f1": 69.61844469538968, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.1780613795199024e-05, |
|
"loss": 1.3996, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_exact_match": 51.79754020813623, |
|
"eval_f1": 69.51904893791095, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_exact_match": 51.72185430463576, |
|
"eval_f1": 69.18971361078592, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0261318748100882e-05, |
|
"loss": 1.3947, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_exact_match": 51.82592242194891, |
|
"eval_f1": 69.9010405722466, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_exact_match": 51.74077578051088, |
|
"eval_f1": 69.21425877541581, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8742023701002736e-05, |
|
"loss": 1.3898, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_exact_match": 52.64900662251656, |
|
"eval_f1": 70.03047551437781, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.722272865390459e-05, |
|
"loss": 1.3861, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_exact_match": 52.232734153263955, |
|
"eval_f1": 69.84348950849247, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_exact_match": 52.05298013245033, |
|
"eval_f1": 69.85616305536696, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5703433606806444e-05, |
|
"loss": 1.4272, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_exact_match": 52.42194891201514, |
|
"eval_f1": 70.12191052992236, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_exact_match": 52.327341532639544, |
|
"eval_f1": 70.1945849880174, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4184138559708297e-05, |
|
"loss": 1.387, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_exact_match": 52.73415326395459, |
|
"eval_f1": 70.53063216515172, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.266484351261015e-05, |
|
"loss": 1.4181, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_exact_match": 52.980132450331126, |
|
"eval_f1": 70.54026450257521, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_exact_match": 52.327341532639544, |
|
"eval_f1": 70.03067034409061, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1145548465512002e-05, |
|
"loss": 1.3941, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_exact_match": 52.75307473982971, |
|
"eval_f1": 70.53593438103856, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_exact_match": 52.37464522232734, |
|
"eval_f1": 69.9459164585827, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9629292008508053e-05, |
|
"loss": 1.4019, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_exact_match": 52.544938505203405, |
|
"eval_f1": 70.19906201457854, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8109996961409907e-05, |
|
"loss": 1.4031, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"eval_exact_match": 52.61116367076632, |
|
"eval_f1": 70.3522720546097, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_exact_match": 53.02743614001892, |
|
"eval_f1": 70.4807280516286, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.659070191431176e-05, |
|
"loss": 1.3625, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_exact_match": 51.88268684957427, |
|
"eval_f1": 69.74458138686404, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_exact_match": 51.66508987701041, |
|
"eval_f1": 69.55251413256356, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.5071406867213613e-05, |
|
"loss": 1.0261, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"eval_exact_match": 51.12582781456954, |
|
"eval_f1": 69.45293644822506, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3555150410209664e-05, |
|
"loss": 0.9969, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_exact_match": 51.84484389782403, |
|
"eval_f1": 69.82132514934786, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_exact_match": 51.106906338694415, |
|
"eval_f1": 69.26705027187695, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.2035855363111518e-05, |
|
"loss": 1.0186, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_exact_match": 51.59886471144749, |
|
"eval_f1": 69.76823383346489, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_exact_match": 51.45695364238411, |
|
"eval_f1": 69.76810827781901, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.051656031601337e-05, |
|
"loss": 1.0272, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_exact_match": 51.35288552507095, |
|
"eval_f1": 69.64753731877455, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.997265268915224e-06, |
|
"loss": 1.0469, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_exact_match": 51.324503311258276, |
|
"eval_f1": 69.46890832651873, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"eval_exact_match": 51.343424787133394, |
|
"eval_f1": 69.28540200400431, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.4810088119112735e-06, |
|
"loss": 1.0324, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"eval_exact_match": 51.27719962157048, |
|
"eval_f1": 69.21410385147476, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_exact_match": 51.400189214758754, |
|
"eval_f1": 69.48987451431931, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.964752354907324e-06, |
|
"loss": 1.026, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_exact_match": 51.14474929044466, |
|
"eval_f1": 69.48096843416343, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.445457307809177e-06, |
|
"loss": 1.0191, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_exact_match": 51.333964049195835, |
|
"eval_f1": 69.47772191155812, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_exact_match": 51.68401135288553, |
|
"eval_f1": 69.54969415984502, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.9261622607110303e-06, |
|
"loss": 0.9864, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_exact_match": 51.37180700094607, |
|
"eval_f1": 69.56621150352814, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_exact_match": 51.561021759697255, |
|
"eval_f1": 69.58651422829355, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.4068672136128838e-06, |
|
"loss": 1.0319, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"eval_exact_match": 51.627246925260174, |
|
"eval_f1": 69.53955054930253, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 16455, |
|
"total_flos": 3819871314614016.0, |
|
"train_loss": 1.4580001696595593, |
|
"train_runtime": 2422.3692, |
|
"train_samples_per_second": 108.671, |
|
"train_steps_per_second": 6.793 |
|
} |
|
], |
|
"max_steps": 16455, |
|
"num_train_epochs": 3, |
|
"total_flos": 3819871314614016.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|