|
{ |
|
"best_metric": 0.7291533946990967, |
|
"best_model_checkpoint": "finetune_results/omarmomen/structformer_s1_final_with_pos/mnli-mm/checkpoint-6000", |
|
"epoch": 3.695150115473441, |
|
"global_step": 8000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.614417552947998, |
|
"eval_loss": 0.8566737771034241, |
|
"eval_runtime": 12.1288, |
|
"eval_samples_per_second": 518.105, |
|
"eval_steps_per_second": 64.804, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.884526558891455e-05, |
|
"loss": 0.9715, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.6376512050628662, |
|
"eval_loss": 0.8208665251731873, |
|
"eval_runtime": 12.1366, |
|
"eval_samples_per_second": 517.771, |
|
"eval_steps_per_second": 64.763, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.7690531177829104e-05, |
|
"loss": 0.8412, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.6712285280227661, |
|
"eval_loss": 0.7672932147979736, |
|
"eval_runtime": 12.1384, |
|
"eval_samples_per_second": 517.695, |
|
"eval_steps_per_second": 64.753, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.653579676674365e-05, |
|
"loss": 0.8026, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.6904837489128113, |
|
"eval_loss": 0.7384242415428162, |
|
"eval_runtime": 12.1361, |
|
"eval_samples_per_second": 517.794, |
|
"eval_steps_per_second": 64.765, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.53810623556582e-05, |
|
"loss": 0.7778, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.6818904876708984, |
|
"eval_loss": 0.7472041249275208, |
|
"eval_runtime": 12.1396, |
|
"eval_samples_per_second": 517.643, |
|
"eval_steps_per_second": 64.747, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_accuracy": 0.7019414305686951, |
|
"eval_loss": 0.7058711647987366, |
|
"eval_runtime": 12.1415, |
|
"eval_samples_per_second": 517.566, |
|
"eval_steps_per_second": 64.737, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.422632794457275e-05, |
|
"loss": 0.7107, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.6974856853485107, |
|
"eval_loss": 0.7071972489356995, |
|
"eval_runtime": 12.1307, |
|
"eval_samples_per_second": 518.025, |
|
"eval_steps_per_second": 64.794, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.30715935334873e-05, |
|
"loss": 0.6742, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.7054423689842224, |
|
"eval_loss": 0.702299177646637, |
|
"eval_runtime": 12.1415, |
|
"eval_samples_per_second": 517.563, |
|
"eval_steps_per_second": 64.737, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.1916859122401844e-05, |
|
"loss": 0.67, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.7143539190292358, |
|
"eval_loss": 0.6831963062286377, |
|
"eval_runtime": 12.1328, |
|
"eval_samples_per_second": 517.935, |
|
"eval_steps_per_second": 64.783, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.07621247113164e-05, |
|
"loss": 0.6684, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.7118077874183655, |
|
"eval_loss": 0.695106029510498, |
|
"eval_runtime": 12.1396, |
|
"eval_samples_per_second": 517.643, |
|
"eval_steps_per_second": 64.747, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.7141947746276855, |
|
"eval_loss": 0.7270315885543823, |
|
"eval_runtime": 12.134, |
|
"eval_samples_per_second": 517.885, |
|
"eval_steps_per_second": 64.777, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.960739030023095e-05, |
|
"loss": 0.6182, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_accuracy": 0.7051241397857666, |
|
"eval_loss": 0.7234853506088257, |
|
"eval_runtime": 12.1395, |
|
"eval_samples_per_second": 517.649, |
|
"eval_steps_per_second": 64.747, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.84526558891455e-05, |
|
"loss": 0.5444, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.7169000506401062, |
|
"eval_loss": 0.6947318911552429, |
|
"eval_runtime": 12.151, |
|
"eval_samples_per_second": 517.16, |
|
"eval_steps_per_second": 64.686, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.729792147806005e-05, |
|
"loss": 0.5451, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_accuracy": 0.7250159382820129, |
|
"eval_loss": 0.701257050037384, |
|
"eval_runtime": 12.1376, |
|
"eval_samples_per_second": 517.728, |
|
"eval_steps_per_second": 64.757, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.61431870669746e-05, |
|
"loss": 0.5473, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_accuracy": 0.7291533946990967, |
|
"eval_loss": 0.6873072981834412, |
|
"eval_runtime": 12.137, |
|
"eval_samples_per_second": 517.754, |
|
"eval_steps_per_second": 64.76, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.7162635326385498, |
|
"eval_loss": 0.7030926942825317, |
|
"eval_runtime": 12.1357, |
|
"eval_samples_per_second": 517.813, |
|
"eval_steps_per_second": 64.768, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.498845265588915e-05, |
|
"loss": 0.5482, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_accuracy": 0.7113303542137146, |
|
"eval_loss": 0.7891779541969299, |
|
"eval_runtime": 12.1379, |
|
"eval_samples_per_second": 517.715, |
|
"eval_steps_per_second": 64.756, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.38337182448037e-05, |
|
"loss": 0.4137, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_accuracy": 0.7176957130432129, |
|
"eval_loss": 0.7793703675270081, |
|
"eval_runtime": 12.1394, |
|
"eval_samples_per_second": 517.654, |
|
"eval_steps_per_second": 64.748, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 3.2678983833718243e-05, |
|
"loss": 0.4196, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_accuracy": 0.7189688086509705, |
|
"eval_loss": 0.773220419883728, |
|
"eval_runtime": 12.1325, |
|
"eval_samples_per_second": 517.947, |
|
"eval_steps_per_second": 64.785, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 3.1524249422632794e-05, |
|
"loss": 0.4286, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_accuracy": 0.7173774838447571, |
|
"eval_loss": 0.7898668646812439, |
|
"eval_runtime": 12.1342, |
|
"eval_samples_per_second": 517.877, |
|
"eval_steps_per_second": 64.776, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"step": 8000, |
|
"total_flos": 8.013030200197632e+16, |
|
"train_loss": 0.6363458843231201, |
|
"train_runtime": 4859.8552, |
|
"train_samples_per_second": 534.543, |
|
"train_steps_per_second": 4.455 |
|
} |
|
], |
|
"max_steps": 21650, |
|
"num_train_epochs": 10, |
|
"total_flos": 8.013030200197632e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|