|
{ |
|
"best_metric": 0.8582048416137695, |
|
"best_model_checkpoint": "/content/drive/My Drive/Colab Notebooks/aai520-project/checkpoints/roberta-finetuned/checkpoint-1600", |
|
"epoch": 3.9215686274509802, |
|
"eval_steps": 100, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.811764705882353e-05, |
|
"loss": 2.9129, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 1.4699586629867554, |
|
"eval_runtime": 17.6815, |
|
"eval_samples_per_second": 676.131, |
|
"eval_steps_per_second": 10.576, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.6176470588235296e-05, |
|
"loss": 1.4395, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_loss": 1.240740418434143, |
|
"eval_runtime": 17.6813, |
|
"eval_samples_per_second": 676.14, |
|
"eval_steps_per_second": 10.576, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.4215686274509805e-05, |
|
"loss": 1.2356, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 1.0324599742889404, |
|
"eval_runtime": 17.7184, |
|
"eval_samples_per_second": 674.723, |
|
"eval_steps_per_second": 10.554, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2254901960784315e-05, |
|
"loss": 1.1284, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_loss": 0.9749970436096191, |
|
"eval_runtime": 17.7173, |
|
"eval_samples_per_second": 674.763, |
|
"eval_steps_per_second": 10.555, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0294117647058823e-05, |
|
"loss": 1.0821, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 0.9345471858978271, |
|
"eval_runtime": 17.7101, |
|
"eval_samples_per_second": 675.038, |
|
"eval_steps_per_second": 10.559, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.9978, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 0.9892988801002502, |
|
"eval_runtime": 17.6864, |
|
"eval_samples_per_second": 675.943, |
|
"eval_steps_per_second": 10.573, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 6.372549019607843e-06, |
|
"loss": 0.9697, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.9299613237380981, |
|
"eval_runtime": 17.7225, |
|
"eval_samples_per_second": 674.565, |
|
"eval_steps_per_second": 10.552, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 4.411764705882353e-06, |
|
"loss": 0.9455, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 0.9351199865341187, |
|
"eval_runtime": 17.7361, |
|
"eval_samples_per_second": 674.051, |
|
"eval_steps_per_second": 10.543, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.450980392156863e-06, |
|
"loss": 0.9322, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 0.9450510144233704, |
|
"eval_runtime": 17.7032, |
|
"eval_samples_per_second": 675.301, |
|
"eval_steps_per_second": 10.563, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.901960784313725e-07, |
|
"loss": 0.9269, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"eval_loss": 0.9063502550125122, |
|
"eval_runtime": 17.6937, |
|
"eval_samples_per_second": 675.663, |
|
"eval_steps_per_second": 10.569, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 9.284313725490197e-06, |
|
"loss": 0.9105, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 0.8837365508079529, |
|
"eval_runtime": 17.5298, |
|
"eval_samples_per_second": 681.981, |
|
"eval_steps_per_second": 10.668, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 8.303921568627452e-06, |
|
"loss": 0.8805, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"eval_loss": 0.8875929713249207, |
|
"eval_runtime": 17.5814, |
|
"eval_samples_per_second": 679.978, |
|
"eval_steps_per_second": 10.636, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 0.8703, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 0.9852611422538757, |
|
"eval_runtime": 17.5854, |
|
"eval_samples_per_second": 679.824, |
|
"eval_steps_per_second": 10.634, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 6.352941176470589e-06, |
|
"loss": 0.8699, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 0.9235011339187622, |
|
"eval_runtime": 17.5815, |
|
"eval_samples_per_second": 679.975, |
|
"eval_steps_per_second": 10.636, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.372549019607843e-06, |
|
"loss": 0.8633, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_loss": 0.8929564356803894, |
|
"eval_runtime": 17.5589, |
|
"eval_samples_per_second": 680.85, |
|
"eval_steps_per_second": 10.65, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 4.392156862745098e-06, |
|
"loss": 0.828, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_loss": 0.8582048416137695, |
|
"eval_runtime": 17.5663, |
|
"eval_samples_per_second": 680.564, |
|
"eval_steps_per_second": 10.645, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.421568627450981e-06, |
|
"loss": 0.8284, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 0.920342743396759, |
|
"eval_runtime": 17.6216, |
|
"eval_samples_per_second": 678.428, |
|
"eval_steps_per_second": 10.612, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.4411764705882356e-06, |
|
"loss": 0.8076, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"eval_loss": 0.8865646719932556, |
|
"eval_runtime": 17.6165, |
|
"eval_samples_per_second": 678.626, |
|
"eval_steps_per_second": 10.615, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.4607843137254903e-06, |
|
"loss": 0.7805, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_loss": 0.9098581075668335, |
|
"eval_runtime": 17.5589, |
|
"eval_samples_per_second": 680.85, |
|
"eval_steps_per_second": 10.65, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 4.901960784313725e-07, |
|
"loss": 0.7974, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_loss": 0.8746156096458435, |
|
"eval_runtime": 17.5409, |
|
"eval_samples_per_second": 681.548, |
|
"eval_steps_per_second": 10.661, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"step": 2000, |
|
"total_flos": 1.3377688443640013e+17, |
|
"train_loss": 0.0, |
|
"train_runtime": 0.6732, |
|
"train_samples_per_second": 775720.158, |
|
"train_steps_per_second": 1515.183 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_loss": 0.8582085371017456, |
|
"eval_runtime": 17.3486, |
|
"eval_samples_per_second": 689.103, |
|
"eval_steps_per_second": 5.418, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 1020, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"total_flos": 1.3377688443640013e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|