{ "best_metric": 0.7291533946990967, "best_model_checkpoint": "finetune_results/omarmomen/structformer_s1_final_with_pos/mnli-mm/checkpoint-6000", "epoch": 3.695150115473441, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "eval_accuracy": 0.614417552947998, "eval_loss": 0.8566737771034241, "eval_runtime": 12.1288, "eval_samples_per_second": 518.105, "eval_steps_per_second": 64.804, "step": 400 }, { "epoch": 0.23, "learning_rate": 4.884526558891455e-05, "loss": 0.9715, "step": 500 }, { "epoch": 0.37, "eval_accuracy": 0.6376512050628662, "eval_loss": 0.8208665251731873, "eval_runtime": 12.1366, "eval_samples_per_second": 517.771, "eval_steps_per_second": 64.763, "step": 800 }, { "epoch": 0.46, "learning_rate": 4.7690531177829104e-05, "loss": 0.8412, "step": 1000 }, { "epoch": 0.55, "eval_accuracy": 0.6712285280227661, "eval_loss": 0.7672932147979736, "eval_runtime": 12.1384, "eval_samples_per_second": 517.695, "eval_steps_per_second": 64.753, "step": 1200 }, { "epoch": 0.69, "learning_rate": 4.653579676674365e-05, "loss": 0.8026, "step": 1500 }, { "epoch": 0.74, "eval_accuracy": 0.6904837489128113, "eval_loss": 0.7384242415428162, "eval_runtime": 12.1361, "eval_samples_per_second": 517.794, "eval_steps_per_second": 64.765, "step": 1600 }, { "epoch": 0.92, "learning_rate": 4.53810623556582e-05, "loss": 0.7778, "step": 2000 }, { "epoch": 0.92, "eval_accuracy": 0.6818904876708984, "eval_loss": 0.7472041249275208, "eval_runtime": 12.1396, "eval_samples_per_second": 517.643, "eval_steps_per_second": 64.747, "step": 2000 }, { "epoch": 1.11, "eval_accuracy": 0.7019414305686951, "eval_loss": 0.7058711647987366, "eval_runtime": 12.1415, "eval_samples_per_second": 517.566, "eval_steps_per_second": 64.737, "step": 2400 }, { "epoch": 1.15, "learning_rate": 4.422632794457275e-05, "loss": 0.7107, "step": 2500 }, { "epoch": 1.29, "eval_accuracy": 0.6974856853485107, "eval_loss": 0.7071972489356995, "eval_runtime": 12.1307, "eval_samples_per_second": 518.025, "eval_steps_per_second": 64.794, "step": 2800 }, { "epoch": 1.39, "learning_rate": 4.30715935334873e-05, "loss": 0.6742, "step": 3000 }, { "epoch": 1.48, "eval_accuracy": 0.7054423689842224, "eval_loss": 0.702299177646637, "eval_runtime": 12.1415, "eval_samples_per_second": 517.563, "eval_steps_per_second": 64.737, "step": 3200 }, { "epoch": 1.62, "learning_rate": 4.1916859122401844e-05, "loss": 0.67, "step": 3500 }, { "epoch": 1.66, "eval_accuracy": 0.7143539190292358, "eval_loss": 0.6831963062286377, "eval_runtime": 12.1328, "eval_samples_per_second": 517.935, "eval_steps_per_second": 64.783, "step": 3600 }, { "epoch": 1.85, "learning_rate": 4.07621247113164e-05, "loss": 0.6684, "step": 4000 }, { "epoch": 1.85, "eval_accuracy": 0.7118077874183655, "eval_loss": 0.695106029510498, "eval_runtime": 12.1396, "eval_samples_per_second": 517.643, "eval_steps_per_second": 64.747, "step": 4000 }, { "epoch": 2.03, "eval_accuracy": 0.7141947746276855, "eval_loss": 0.7270315885543823, "eval_runtime": 12.134, "eval_samples_per_second": 517.885, "eval_steps_per_second": 64.777, "step": 4400 }, { "epoch": 2.08, "learning_rate": 3.960739030023095e-05, "loss": 0.6182, "step": 4500 }, { "epoch": 2.22, "eval_accuracy": 0.7051241397857666, "eval_loss": 0.7234853506088257, "eval_runtime": 12.1395, "eval_samples_per_second": 517.649, "eval_steps_per_second": 64.747, "step": 4800 }, { "epoch": 2.31, "learning_rate": 3.84526558891455e-05, "loss": 0.5444, "step": 5000 }, { "epoch": 2.4, "eval_accuracy": 0.7169000506401062, "eval_loss": 0.6947318911552429, "eval_runtime": 12.151, "eval_samples_per_second": 517.16, "eval_steps_per_second": 64.686, "step": 5200 }, { "epoch": 2.54, "learning_rate": 3.729792147806005e-05, "loss": 0.5451, "step": 5500 }, { "epoch": 2.59, "eval_accuracy": 0.7250159382820129, "eval_loss": 0.701257050037384, "eval_runtime": 12.1376, "eval_samples_per_second": 517.728, "eval_steps_per_second": 64.757, "step": 5600 }, { "epoch": 2.77, "learning_rate": 3.61431870669746e-05, "loss": 0.5473, "step": 6000 }, { "epoch": 2.77, "eval_accuracy": 0.7291533946990967, "eval_loss": 0.6873072981834412, "eval_runtime": 12.137, "eval_samples_per_second": 517.754, "eval_steps_per_second": 64.76, "step": 6000 }, { "epoch": 2.96, "eval_accuracy": 0.7162635326385498, "eval_loss": 0.7030926942825317, "eval_runtime": 12.1357, "eval_samples_per_second": 517.813, "eval_steps_per_second": 64.768, "step": 6400 }, { "epoch": 3.0, "learning_rate": 3.498845265588915e-05, "loss": 0.5482, "step": 6500 }, { "epoch": 3.14, "eval_accuracy": 0.7113303542137146, "eval_loss": 0.7891779541969299, "eval_runtime": 12.1379, "eval_samples_per_second": 517.715, "eval_steps_per_second": 64.756, "step": 6800 }, { "epoch": 3.23, "learning_rate": 3.38337182448037e-05, "loss": 0.4137, "step": 7000 }, { "epoch": 3.33, "eval_accuracy": 0.7176957130432129, "eval_loss": 0.7793703675270081, "eval_runtime": 12.1394, "eval_samples_per_second": 517.654, "eval_steps_per_second": 64.748, "step": 7200 }, { "epoch": 3.46, "learning_rate": 3.2678983833718243e-05, "loss": 0.4196, "step": 7500 }, { "epoch": 3.51, "eval_accuracy": 0.7189688086509705, "eval_loss": 0.773220419883728, "eval_runtime": 12.1325, "eval_samples_per_second": 517.947, "eval_steps_per_second": 64.785, "step": 7600 }, { "epoch": 3.7, "learning_rate": 3.1524249422632794e-05, "loss": 0.4286, "step": 8000 }, { "epoch": 3.7, "eval_accuracy": 0.7173774838447571, "eval_loss": 0.7898668646812439, "eval_runtime": 12.1342, "eval_samples_per_second": 517.877, "eval_steps_per_second": 64.776, "step": 8000 }, { "epoch": 3.7, "step": 8000, "total_flos": 8.013030200197632e+16, "train_loss": 0.6363458843231201, "train_runtime": 4859.8552, "train_samples_per_second": 534.543, "train_steps_per_second": 4.455 } ], "max_steps": 21650, "num_train_epochs": 10, "total_flos": 8.013030200197632e+16, "trial_name": null, "trial_params": null }