|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9465930018416207, |
|
"global_step": 24000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9590750971966443e-05, |
|
"loss": 2.4744, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9181501943932885e-05, |
|
"loss": 1.9135, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8772252915899326e-05, |
|
"loss": 1.796, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.8363003887865768e-05, |
|
"loss": 1.7038, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.795375485983221e-05, |
|
"loss": 1.6516, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.754450583179865e-05, |
|
"loss": 1.6246, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.7135256803765093e-05, |
|
"loss": 1.5577, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.6726007775731534e-05, |
|
"loss": 1.5208, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.6316758747697976e-05, |
|
"loss": 1.5118, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.5907509719664418e-05, |
|
"loss": 1.5089, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5498260691630856e-05, |
|
"loss": 1.4391, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.50890116635973e-05, |
|
"loss": 1.4344, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.467976263556374e-05, |
|
"loss": 1.4238, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4270513607530182e-05, |
|
"loss": 1.4384, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3861264579496626e-05, |
|
"loss": 1.3665, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3452015551463067e-05, |
|
"loss": 1.3999, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_em": 0.5297689323663349, |
|
"eval_f1": 0.6106438572272215, |
|
"eval_loss": 1.3775073289871216, |
|
"eval_runtime": 131.0166, |
|
"eval_samples_per_second": 90.508, |
|
"eval_steps_per_second": 5.663, |
|
"step": 8145 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.3042766523429509e-05, |
|
"loss": 1.1856, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.263351749539595e-05, |
|
"loss": 1.0984, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.222426846736239e-05, |
|
"loss": 1.1179, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1815019439328832e-05, |
|
"loss": 1.0886, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1405770411295274e-05, |
|
"loss": 1.1104, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.0996521383261715e-05, |
|
"loss": 1.1339, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.0587272355228157e-05, |
|
"loss": 1.1361, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.01780233271946e-05, |
|
"loss": 1.1057, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.76877429916104e-06, |
|
"loss": 1.0967, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.359525271127482e-06, |
|
"loss": 1.1119, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.950276243093923e-06, |
|
"loss": 1.0718, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.541027215060365e-06, |
|
"loss": 1.093, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.131778187026806e-06, |
|
"loss": 1.0782, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.722529158993248e-06, |
|
"loss": 1.0655, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.313280130959689e-06, |
|
"loss": 1.0953, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.904031102926131e-06, |
|
"loss": 1.0765, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_em": 0.5461291954798448, |
|
"eval_f1": 0.6263308103966033, |
|
"eval_loss": 1.385632038116455, |
|
"eval_runtime": 134.345, |
|
"eval_samples_per_second": 88.265, |
|
"eval_steps_per_second": 5.523, |
|
"step": 16290 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 6.494782074892573e-06, |
|
"loss": 0.9839, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 6.0855330468590145e-06, |
|
"loss": 0.8102, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5.676284018825455e-06, |
|
"loss": 0.8344, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.267034990791897e-06, |
|
"loss": 0.8254, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.857785962758339e-06, |
|
"loss": 0.8348, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.44853693472478e-06, |
|
"loss": 0.8377, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.0392879066912225e-06, |
|
"loss": 0.8376, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.6300388786576637e-06, |
|
"loss": 0.8231, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.220789850624105e-06, |
|
"loss": 0.8427, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.8115408225905465e-06, |
|
"loss": 0.8219, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.402291794556988e-06, |
|
"loss": 0.8272, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.9930427665234297e-06, |
|
"loss": 0.8353, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.5837937384898713e-06, |
|
"loss": 0.8305, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.1745447104563129e-06, |
|
"loss": 0.8231, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.652956824227544e-07, |
|
"loss": 0.8398, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.5604665438919586e-07, |
|
"loss": 0.8243, |
|
"step": 24000 |
|
} |
|
], |
|
"max_steps": 24435, |
|
"num_train_epochs": 3, |
|
"total_flos": 7.525072232402227e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|