bobox's picture
Training in progress, step 774, checkpoint
4b4d7d3 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.40082858622475404,
"eval_steps": 97,
"global_step": 774,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.010357327809425169,
"grad_norm": 160.78689575195312,
"learning_rate": 3.529411764705882e-07,
"loss": 10.2062,
"step": 20
},
{
"epoch": 0.020714655618850338,
"grad_norm": 303.3471374511719,
"learning_rate": 8.000000000000001e-07,
"loss": 7.9221,
"step": 40
},
{
"epoch": 0.031071983428275506,
"grad_norm": 81.0146484375,
"learning_rate": 1.2235294117647059e-06,
"loss": 5.9499,
"step": 60
},
{
"epoch": 0.041429311237700675,
"grad_norm": 88.74897003173828,
"learning_rate": 1.6470588235294118e-06,
"loss": 6.0555,
"step": 80
},
{
"epoch": 0.050233039875712066,
"eval_Qnli-dev_cosine_accuracy": 0.62109375,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.9108127355575562,
"eval_Qnli-dev_cosine_ap": 0.6197524033200674,
"eval_Qnli-dev_cosine_f1": 0.6396396396396397,
"eval_Qnli-dev_cosine_f1_threshold": 0.8376526832580566,
"eval_Qnli-dev_cosine_precision": 0.49534883720930234,
"eval_Qnli-dev_cosine_recall": 0.902542372881356,
"eval_Qnli-dev_dot_accuracy": 0.58984375,
"eval_Qnli-dev_dot_accuracy_threshold": 865.2555541992188,
"eval_Qnli-dev_dot_ap": 0.5567642852275692,
"eval_Qnli-dev_dot_f1": 0.6363636363636364,
"eval_Qnli-dev_dot_f1_threshold": 691.9456787109375,
"eval_Qnli-dev_dot_precision": 0.47863247863247865,
"eval_Qnli-dev_dot_recall": 0.9491525423728814,
"eval_Qnli-dev_euclidean_accuracy": 0.609375,
"eval_Qnli-dev_euclidean_accuracy_threshold": 13.323524475097656,
"eval_Qnli-dev_euclidean_ap": 0.6115116478210071,
"eval_Qnli-dev_euclidean_f1": 0.6449612403100775,
"eval_Qnli-dev_euclidean_f1_threshold": 16.585830688476562,
"eval_Qnli-dev_euclidean_precision": 0.508557457212714,
"eval_Qnli-dev_euclidean_recall": 0.8813559322033898,
"eval_Qnli-dev_manhattan_accuracy": 0.619140625,
"eval_Qnli-dev_manhattan_accuracy_threshold": 406.1038818359375,
"eval_Qnli-dev_manhattan_ap": 0.609904024113499,
"eval_Qnli-dev_manhattan_f1": 0.6494345718901454,
"eval_Qnli-dev_manhattan_f1_threshold": 484.52716064453125,
"eval_Qnli-dev_manhattan_precision": 0.5248041775456919,
"eval_Qnli-dev_manhattan_recall": 0.8516949152542372,
"eval_Qnli-dev_max_accuracy": 0.62109375,
"eval_Qnli-dev_max_accuracy_threshold": 865.2555541992188,
"eval_Qnli-dev_max_ap": 0.6197524033200674,
"eval_Qnli-dev_max_f1": 0.6494345718901454,
"eval_Qnli-dev_max_f1_threshold": 691.9456787109375,
"eval_Qnli-dev_max_precision": 0.5248041775456919,
"eval_Qnli-dev_max_recall": 0.9491525423728814,
"eval_allNLI-dev_cosine_accuracy": 0.67578125,
"eval_allNLI-dev_cosine_accuracy_threshold": 0.9652533531188965,
"eval_allNLI-dev_cosine_ap": 0.4282858392784667,
"eval_allNLI-dev_cosine_f1": 0.515527950310559,
"eval_allNLI-dev_cosine_f1_threshold": 0.798592746257782,
"eval_allNLI-dev_cosine_precision": 0.3524416135881104,
"eval_allNLI-dev_cosine_recall": 0.9595375722543352,
"eval_allNLI-dev_dot_accuracy": 0.666015625,
"eval_allNLI-dev_dot_accuracy_threshold": 968.9529418945312,
"eval_allNLI-dev_dot_ap": 0.36425260705842155,
"eval_allNLI-dev_dot_f1": 0.5162287480680062,
"eval_allNLI-dev_dot_f1_threshold": 686.5814208984375,
"eval_allNLI-dev_dot_precision": 0.35232067510548526,
"eval_allNLI-dev_dot_recall": 0.9653179190751445,
"eval_allNLI-dev_euclidean_accuracy": 0.67578125,
"eval_allNLI-dev_euclidean_accuracy_threshold": 8.16073226928711,
"eval_allNLI-dev_euclidean_ap": 0.4333583117036793,
"eval_allNLI-dev_euclidean_f1": 0.5164319248826291,
"eval_allNLI-dev_euclidean_f1_threshold": 18.877037048339844,
"eval_allNLI-dev_euclidean_precision": 0.3540772532188841,
"eval_allNLI-dev_euclidean_recall": 0.953757225433526,
"eval_allNLI-dev_manhattan_accuracy": 0.67578125,
"eval_allNLI-dev_manhattan_accuracy_threshold": 226.18099975585938,
"eval_allNLI-dev_manhattan_ap": 0.4400955405569059,
"eval_allNLI-dev_manhattan_f1": 0.5179407176287052,
"eval_allNLI-dev_manhattan_f1_threshold": 570.2012329101562,
"eval_allNLI-dev_manhattan_precision": 0.3547008547008547,
"eval_allNLI-dev_manhattan_recall": 0.9595375722543352,
"eval_allNLI-dev_max_accuracy": 0.67578125,
"eval_allNLI-dev_max_accuracy_threshold": 968.9529418945312,
"eval_allNLI-dev_max_ap": 0.4400955405569059,
"eval_allNLI-dev_max_f1": 0.5179407176287052,
"eval_allNLI-dev_max_f1_threshold": 686.5814208984375,
"eval_allNLI-dev_max_precision": 0.3547008547008547,
"eval_allNLI-dev_max_recall": 0.9653179190751445,
"eval_sequential_score": 0.6197524033200674,
"eval_sts-test_pearson_cosine": 0.6170839897033953,
"eval_sts-test_pearson_dot": 0.43346770865150264,
"eval_sts-test_pearson_euclidean": 0.6474775644966124,
"eval_sts-test_pearson_manhattan": 0.6616828287248389,
"eval_sts-test_pearson_max": 0.6616828287248389,
"eval_sts-test_spearman_cosine": 0.6552392427969004,
"eval_sts-test_spearman_dot": 0.4585595522909849,
"eval_sts-test_spearman_euclidean": 0.652406174691995,
"eval_sts-test_spearman_manhattan": 0.6662387448368152,
"eval_sts-test_spearman_max": 0.6662387448368152,
"eval_vitaminc-pairs_loss": 3.7554073333740234,
"eval_vitaminc-pairs_runtime": 4.7418,
"eval_vitaminc-pairs_samples_per_second": 26.994,
"eval_vitaminc-pairs_steps_per_second": 0.211,
"step": 97
},
{
"epoch": 0.050233039875712066,
"eval_negation-triplets_loss": 3.6897997856140137,
"eval_negation-triplets_runtime": 3.1578,
"eval_negation-triplets_samples_per_second": 40.534,
"eval_negation-triplets_steps_per_second": 0.317,
"step": 97
},
{
"epoch": 0.050233039875712066,
"eval_scitail-pairs-pos_loss": 0.45563364028930664,
"eval_scitail-pairs-pos_runtime": 2.7223,
"eval_scitail-pairs-pos_samples_per_second": 47.02,
"eval_scitail-pairs-pos_steps_per_second": 0.367,
"step": 97
},
{
"epoch": 0.050233039875712066,
"eval_scitail-pairs-qa_loss": 1.115855097770691,
"eval_scitail-pairs-qa_runtime": 2.2597,
"eval_scitail-pairs-qa_samples_per_second": 56.646,
"eval_scitail-pairs-qa_steps_per_second": 0.443,
"step": 97
},
{
"epoch": 0.050233039875712066,
"eval_xsum-pairs_loss": 3.2118453979492188,
"eval_xsum-pairs_runtime": 3.0538,
"eval_xsum-pairs_samples_per_second": 41.915,
"eval_xsum-pairs_steps_per_second": 0.327,
"step": 97
},
{
"epoch": 0.050233039875712066,
"eval_sciq_pairs_loss": 0.484823614358902,
"eval_sciq_pairs_runtime": 3.959,
"eval_sciq_pairs_samples_per_second": 32.331,
"eval_sciq_pairs_steps_per_second": 0.253,
"step": 97
},
{
"epoch": 0.050233039875712066,
"eval_qasc_pairs_loss": 2.8566131591796875,
"eval_qasc_pairs_runtime": 2.1087,
"eval_qasc_pairs_samples_per_second": 60.701,
"eval_qasc_pairs_steps_per_second": 0.474,
"step": 97
},
{
"epoch": 0.050233039875712066,
"eval_openbookqa_pairs_loss": 2.1501104831695557,
"eval_openbookqa_pairs_runtime": 2.2555,
"eval_openbookqa_pairs_samples_per_second": 56.751,
"eval_openbookqa_pairs_steps_per_second": 0.443,
"step": 97
},
{
"epoch": 0.050233039875712066,
"eval_msmarco_pairs_loss": 4.395960807800293,
"eval_msmarco_pairs_runtime": 2.2407,
"eval_msmarco_pairs_samples_per_second": 57.125,
"eval_msmarco_pairs_steps_per_second": 0.446,
"step": 97
},
{
"epoch": 0.050233039875712066,
"eval_nq_pairs_loss": 4.488173484802246,
"eval_nq_pairs_runtime": 2.7484,
"eval_nq_pairs_samples_per_second": 46.572,
"eval_nq_pairs_steps_per_second": 0.364,
"step": 97
},
{
"epoch": 0.050233039875712066,
"eval_trivia_pairs_loss": 4.023955345153809,
"eval_trivia_pairs_runtime": 3.7908,
"eval_trivia_pairs_samples_per_second": 33.766,
"eval_trivia_pairs_steps_per_second": 0.264,
"step": 97
},
{
"epoch": 0.050233039875712066,
"eval_gooaq_pairs_loss": 3.383638858795166,
"eval_gooaq_pairs_runtime": 2.1349,
"eval_gooaq_pairs_samples_per_second": 59.957,
"eval_gooaq_pairs_steps_per_second": 0.468,
"step": 97
},
{
"epoch": 0.050233039875712066,
"eval_paws-pos_loss": 0.12275903671979904,
"eval_paws-pos_runtime": 2.2818,
"eval_paws-pos_samples_per_second": 56.095,
"eval_paws-pos_steps_per_second": 0.438,
"step": 97
},
{
"epoch": 0.050233039875712066,
"eval_global_dataset_loss": 1.9564138650894165,
"eval_global_dataset_runtime": 10.2217,
"eval_global_dataset_samples_per_second": 40.698,
"eval_global_dataset_steps_per_second": 0.391,
"step": 97
},
{
"epoch": 0.05178663904712584,
"grad_norm": 83.8360824584961,
"learning_rate": 2.1176470588235296e-06,
"loss": 4.0315,
"step": 100
},
{
"epoch": 0.06214396685655101,
"grad_norm": 325.5680236816406,
"learning_rate": 2.588235294117647e-06,
"loss": 1.6348,
"step": 120
},
{
"epoch": 0.07250129466597618,
"grad_norm": 106.99758911132812,
"learning_rate": 3.0588235294117647e-06,
"loss": 1.1866,
"step": 140
},
{
"epoch": 0.08285862247540135,
"grad_norm": 30.390771865844727,
"learning_rate": 3.5294117647058825e-06,
"loss": 0.6138,
"step": 160
},
{
"epoch": 0.09321595028482652,
"grad_norm": 39.691532135009766,
"learning_rate": 4e-06,
"loss": 0.5244,
"step": 180
},
{
"epoch": 0.10046607975142413,
"eval_Qnli-dev_cosine_accuracy": 0.666015625,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.7696025967597961,
"eval_Qnli-dev_cosine_ap": 0.693851901846308,
"eval_Qnli-dev_cosine_f1": 0.6625000000000001,
"eval_Qnli-dev_cosine_f1_threshold": 0.6638460159301758,
"eval_Qnli-dev_cosine_precision": 0.5247524752475248,
"eval_Qnli-dev_cosine_recall": 0.8983050847457628,
"eval_Qnli-dev_dot_accuracy": 0.6796875,
"eval_Qnli-dev_dot_accuracy_threshold": 822.6981201171875,
"eval_Qnli-dev_dot_ap": 0.6603086879421342,
"eval_Qnli-dev_dot_f1": 0.657856093979442,
"eval_Qnli-dev_dot_f1_threshold": 618.4547119140625,
"eval_Qnli-dev_dot_precision": 0.503370786516854,
"eval_Qnli-dev_dot_recall": 0.9491525423728814,
"eval_Qnli-dev_euclidean_accuracy": 0.666015625,
"eval_Qnli-dev_euclidean_accuracy_threshold": 19.874027252197266,
"eval_Qnli-dev_euclidean_ap": 0.694158709095853,
"eval_Qnli-dev_euclidean_f1": 0.6630236794171221,
"eval_Qnli-dev_euclidean_f1_threshold": 23.005264282226562,
"eval_Qnli-dev_euclidean_precision": 0.5814696485623003,
"eval_Qnli-dev_euclidean_recall": 0.7711864406779662,
"eval_Qnli-dev_manhattan_accuracy": 0.66796875,
"eval_Qnli-dev_manhattan_accuracy_threshold": 624.285888671875,
"eval_Qnli-dev_manhattan_ap": 0.692633721446368,
"eval_Qnli-dev_manhattan_f1": 0.6641366223908918,
"eval_Qnli-dev_manhattan_f1_threshold": 696.709716796875,
"eval_Qnli-dev_manhattan_precision": 0.6013745704467354,
"eval_Qnli-dev_manhattan_recall": 0.7415254237288136,
"eval_Qnli-dev_max_accuracy": 0.6796875,
"eval_Qnli-dev_max_accuracy_threshold": 822.6981201171875,
"eval_Qnli-dev_max_ap": 0.694158709095853,
"eval_Qnli-dev_max_f1": 0.6641366223908918,
"eval_Qnli-dev_max_f1_threshold": 696.709716796875,
"eval_Qnli-dev_max_precision": 0.6013745704467354,
"eval_Qnli-dev_max_recall": 0.9491525423728814,
"eval_allNLI-dev_cosine_accuracy": 0.701171875,
"eval_allNLI-dev_cosine_accuracy_threshold": 0.854247510433197,
"eval_allNLI-dev_cosine_ap": 0.5504250327111149,
"eval_allNLI-dev_cosine_f1": 0.567287784679089,
"eval_allNLI-dev_cosine_f1_threshold": 0.7080726623535156,
"eval_allNLI-dev_cosine_precision": 0.44193548387096776,
"eval_allNLI-dev_cosine_recall": 0.791907514450867,
"eval_allNLI-dev_dot_accuracy": 0.69921875,
"eval_allNLI-dev_dot_accuracy_threshold": 885.8963623046875,
"eval_allNLI-dev_dot_ap": 0.5371398846089106,
"eval_allNLI-dev_dot_f1": 0.5720338983050848,
"eval_allNLI-dev_dot_f1_threshold": 732.1597290039062,
"eval_allNLI-dev_dot_precision": 0.451505016722408,
"eval_allNLI-dev_dot_recall": 0.7803468208092486,
"eval_allNLI-dev_euclidean_accuracy": 0.701171875,
"eval_allNLI-dev_euclidean_accuracy_threshold": 16.9801082611084,
"eval_allNLI-dev_euclidean_ap": 0.5503780840587245,
"eval_allNLI-dev_euclidean_f1": 0.5671641791044777,
"eval_allNLI-dev_euclidean_f1_threshold": 24.19074821472168,
"eval_allNLI-dev_euclidean_precision": 0.44932432432432434,
"eval_allNLI-dev_euclidean_recall": 0.7687861271676301,
"eval_allNLI-dev_manhattan_accuracy": 0.703125,
"eval_allNLI-dev_manhattan_accuracy_threshold": 529.9462280273438,
"eval_allNLI-dev_manhattan_ap": 0.5524969745859143,
"eval_allNLI-dev_manhattan_f1": 0.5638297872340425,
"eval_allNLI-dev_manhattan_f1_threshold": 826.8560791015625,
"eval_allNLI-dev_manhattan_precision": 0.40664961636828645,
"eval_allNLI-dev_manhattan_recall": 0.9190751445086706,
"eval_allNLI-dev_max_accuracy": 0.703125,
"eval_allNLI-dev_max_accuracy_threshold": 885.8963623046875,
"eval_allNLI-dev_max_ap": 0.5524969745859143,
"eval_allNLI-dev_max_f1": 0.5720338983050848,
"eval_allNLI-dev_max_f1_threshold": 826.8560791015625,
"eval_allNLI-dev_max_precision": 0.451505016722408,
"eval_allNLI-dev_max_recall": 0.9190751445086706,
"eval_sequential_score": 0.694158709095853,
"eval_sts-test_pearson_cosine": 0.8866994033223972,
"eval_sts-test_pearson_dot": 0.8712266973511624,
"eval_sts-test_pearson_euclidean": 0.9028053322103908,
"eval_sts-test_pearson_manhattan": 0.9029714248344419,
"eval_sts-test_pearson_max": 0.9029714248344419,
"eval_sts-test_spearman_cosine": 0.8941879764786184,
"eval_sts-test_spearman_dot": 0.8632849034222648,
"eval_sts-test_spearman_euclidean": 0.8944520984233506,
"eval_sts-test_spearman_manhattan": 0.8945218656398598,
"eval_sts-test_spearman_max": 0.8945218656398598,
"eval_vitaminc-pairs_loss": 3.507073163986206,
"eval_vitaminc-pairs_runtime": 4.4774,
"eval_vitaminc-pairs_samples_per_second": 28.588,
"eval_vitaminc-pairs_steps_per_second": 0.223,
"step": 194
},
{
"epoch": 0.10046607975142413,
"eval_negation-triplets_loss": 1.1223009824752808,
"eval_negation-triplets_runtime": 3.102,
"eval_negation-triplets_samples_per_second": 41.264,
"eval_negation-triplets_steps_per_second": 0.322,
"step": 194
},
{
"epoch": 0.10046607975142413,
"eval_scitail-pairs-pos_loss": 0.06560208648443222,
"eval_scitail-pairs-pos_runtime": 2.6151,
"eval_scitail-pairs-pos_samples_per_second": 48.946,
"eval_scitail-pairs-pos_steps_per_second": 0.382,
"step": 194
},
{
"epoch": 0.10046607975142413,
"eval_scitail-pairs-qa_loss": 0.044671397656202316,
"eval_scitail-pairs-qa_runtime": 2.2115,
"eval_scitail-pairs-qa_samples_per_second": 57.879,
"eval_scitail-pairs-qa_steps_per_second": 0.452,
"step": 194
},
{
"epoch": 0.10046607975142413,
"eval_xsum-pairs_loss": 0.07691845297813416,
"eval_xsum-pairs_runtime": 3.043,
"eval_xsum-pairs_samples_per_second": 42.064,
"eval_xsum-pairs_steps_per_second": 0.329,
"step": 194
},
{
"epoch": 0.10046607975142413,
"eval_sciq_pairs_loss": 0.12039273232221603,
"eval_sciq_pairs_runtime": 3.878,
"eval_sciq_pairs_samples_per_second": 33.007,
"eval_sciq_pairs_steps_per_second": 0.258,
"step": 194
},
{
"epoch": 0.10046607975142413,
"eval_qasc_pairs_loss": 0.36198654770851135,
"eval_qasc_pairs_runtime": 2.0543,
"eval_qasc_pairs_samples_per_second": 62.307,
"eval_qasc_pairs_steps_per_second": 0.487,
"step": 194
},
{
"epoch": 0.10046607975142413,
"eval_openbookqa_pairs_loss": 0.5711529850959778,
"eval_openbookqa_pairs_runtime": 2.2213,
"eval_openbookqa_pairs_samples_per_second": 57.624,
"eval_openbookqa_pairs_steps_per_second": 0.45,
"step": 194
},
{
"epoch": 0.10046607975142413,
"eval_msmarco_pairs_loss": 0.3250836133956909,
"eval_msmarco_pairs_runtime": 2.22,
"eval_msmarco_pairs_samples_per_second": 57.657,
"eval_msmarco_pairs_steps_per_second": 0.45,
"step": 194
},
{
"epoch": 0.10046607975142413,
"eval_nq_pairs_loss": 0.4249531030654907,
"eval_nq_pairs_runtime": 2.7189,
"eval_nq_pairs_samples_per_second": 47.079,
"eval_nq_pairs_steps_per_second": 0.368,
"step": 194
},
{
"epoch": 0.10046607975142413,
"eval_trivia_pairs_loss": 0.2965388894081116,
"eval_trivia_pairs_runtime": 3.7556,
"eval_trivia_pairs_samples_per_second": 34.082,
"eval_trivia_pairs_steps_per_second": 0.266,
"step": 194
},
{
"epoch": 0.10046607975142413,
"eval_gooaq_pairs_loss": 0.2151084989309311,
"eval_gooaq_pairs_runtime": 2.1122,
"eval_gooaq_pairs_samples_per_second": 60.601,
"eval_gooaq_pairs_steps_per_second": 0.473,
"step": 194
},
{
"epoch": 0.10046607975142413,
"eval_paws-pos_loss": 0.0295370165258646,
"eval_paws-pos_runtime": 2.2123,
"eval_paws-pos_samples_per_second": 57.86,
"eval_paws-pos_steps_per_second": 0.452,
"step": 194
},
{
"epoch": 0.10046607975142413,
"eval_global_dataset_loss": 0.35498398542404175,
"eval_global_dataset_runtime": 10.1407,
"eval_global_dataset_samples_per_second": 41.023,
"eval_global_dataset_steps_per_second": 0.394,
"step": 194
},
{
"epoch": 0.10357327809425168,
"grad_norm": 43.11693572998047,
"learning_rate": 4.470588235294118e-06,
"loss": 0.376,
"step": 200
},
{
"epoch": 0.11393060590367685,
"grad_norm": 31.125375747680664,
"learning_rate": 4.941176470588235e-06,
"loss": 0.2782,
"step": 220
},
{
"epoch": 0.12428793371310203,
"grad_norm": 14.048110961914062,
"learning_rate": 5.411764705882353e-06,
"loss": 0.2391,
"step": 240
},
{
"epoch": 0.13464526152252718,
"grad_norm": 5.956579685211182,
"learning_rate": 5.882352941176471e-06,
"loss": 0.2767,
"step": 260
},
{
"epoch": 0.14500258933195237,
"grad_norm": 14.470146179199219,
"learning_rate": 6.352941176470589e-06,
"loss": 0.2359,
"step": 280
},
{
"epoch": 0.1506991196271362,
"eval_Qnli-dev_cosine_accuracy": 0.6875,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.7567152976989746,
"eval_Qnli-dev_cosine_ap": 0.7133123361631746,
"eval_Qnli-dev_cosine_f1": 0.6853146853146853,
"eval_Qnli-dev_cosine_f1_threshold": 0.6536699533462524,
"eval_Qnli-dev_cosine_precision": 0.5833333333333334,
"eval_Qnli-dev_cosine_recall": 0.8305084745762712,
"eval_Qnli-dev_dot_accuracy": 0.673828125,
"eval_Qnli-dev_dot_accuracy_threshold": 731.5150756835938,
"eval_Qnli-dev_dot_ap": 0.6890325242500185,
"eval_Qnli-dev_dot_f1": 0.6782006920415226,
"eval_Qnli-dev_dot_f1_threshold": 621.156982421875,
"eval_Qnli-dev_dot_precision": 0.5730994152046783,
"eval_Qnli-dev_dot_recall": 0.8305084745762712,
"eval_Qnli-dev_euclidean_accuracy": 0.6875,
"eval_Qnli-dev_euclidean_accuracy_threshold": 21.166996002197266,
"eval_Qnli-dev_euclidean_ap": 0.717782618584373,
"eval_Qnli-dev_euclidean_f1": 0.6832740213523131,
"eval_Qnli-dev_euclidean_f1_threshold": 25.534191131591797,
"eval_Qnli-dev_euclidean_precision": 0.588957055214724,
"eval_Qnli-dev_euclidean_recall": 0.8135593220338984,
"eval_Qnli-dev_manhattan_accuracy": 0.689453125,
"eval_Qnli-dev_manhattan_accuracy_threshold": 717.0855712890625,
"eval_Qnli-dev_manhattan_ap": 0.7178394918687495,
"eval_Qnli-dev_manhattan_f1": 0.6815068493150686,
"eval_Qnli-dev_manhattan_f1_threshold": 809.9966430664062,
"eval_Qnli-dev_manhattan_precision": 0.5718390804597702,
"eval_Qnli-dev_manhattan_recall": 0.8432203389830508,
"eval_Qnli-dev_max_accuracy": 0.689453125,
"eval_Qnli-dev_max_accuracy_threshold": 731.5150756835938,
"eval_Qnli-dev_max_ap": 0.7178394918687495,
"eval_Qnli-dev_max_f1": 0.6853146853146853,
"eval_Qnli-dev_max_f1_threshold": 809.9966430664062,
"eval_Qnli-dev_max_precision": 0.588957055214724,
"eval_Qnli-dev_max_recall": 0.8432203389830508,
"eval_allNLI-dev_cosine_accuracy": 0.71484375,
"eval_allNLI-dev_cosine_accuracy_threshold": 0.8485724329948425,
"eval_allNLI-dev_cosine_ap": 0.5777522094864251,
"eval_allNLI-dev_cosine_f1": 0.5925925925925926,
"eval_allNLI-dev_cosine_f1_threshold": 0.7124052047729492,
"eval_allNLI-dev_cosine_precision": 0.4942084942084942,
"eval_allNLI-dev_cosine_recall": 0.7398843930635838,
"eval_allNLI-dev_dot_accuracy": 0.71484375,
"eval_allNLI-dev_dot_accuracy_threshold": 835.6192016601562,
"eval_allNLI-dev_dot_ap": 0.5708546535940942,
"eval_allNLI-dev_dot_f1": 0.5931372549019609,
"eval_allNLI-dev_dot_f1_threshold": 712.94482421875,
"eval_allNLI-dev_dot_precision": 0.5148936170212766,
"eval_allNLI-dev_dot_recall": 0.6994219653179191,
"eval_allNLI-dev_euclidean_accuracy": 0.712890625,
"eval_allNLI-dev_euclidean_accuracy_threshold": 15.772256851196289,
"eval_allNLI-dev_euclidean_ap": 0.5773033114664347,
"eval_allNLI-dev_euclidean_f1": 0.5957446808510639,
"eval_allNLI-dev_euclidean_f1_threshold": 24.513042449951172,
"eval_allNLI-dev_euclidean_precision": 0.4713804713804714,
"eval_allNLI-dev_euclidean_recall": 0.8092485549132948,
"eval_allNLI-dev_manhattan_accuracy": 0.71484375,
"eval_allNLI-dev_manhattan_accuracy_threshold": 494.4720153808594,
"eval_allNLI-dev_manhattan_ap": 0.5787277750430182,
"eval_allNLI-dev_manhattan_f1": 0.597457627118644,
"eval_allNLI-dev_manhattan_f1_threshold": 764.1075439453125,
"eval_allNLI-dev_manhattan_precision": 0.47157190635451507,
"eval_allNLI-dev_manhattan_recall": 0.815028901734104,
"eval_allNLI-dev_max_accuracy": 0.71484375,
"eval_allNLI-dev_max_accuracy_threshold": 835.6192016601562,
"eval_allNLI-dev_max_ap": 0.5787277750430182,
"eval_allNLI-dev_max_f1": 0.597457627118644,
"eval_allNLI-dev_max_f1_threshold": 764.1075439453125,
"eval_allNLI-dev_max_precision": 0.5148936170212766,
"eval_allNLI-dev_max_recall": 0.815028901734104,
"eval_sequential_score": 0.7178394918687495,
"eval_sts-test_pearson_cosine": 0.9080888281681364,
"eval_sts-test_pearson_dot": 0.8993720999648187,
"eval_sts-test_pearson_euclidean": 0.9185021221297063,
"eval_sts-test_pearson_manhattan": 0.9182084064307341,
"eval_sts-test_pearson_max": 0.9185021221297063,
"eval_sts-test_spearman_cosine": 0.9145502926755805,
"eval_sts-test_spearman_dot": 0.8990795555767088,
"eval_sts-test_spearman_euclidean": 0.9143005806370166,
"eval_sts-test_spearman_manhattan": 0.9141107457861942,
"eval_sts-test_spearman_max": 0.9145502926755805,
"eval_vitaminc-pairs_loss": 3.4645299911499023,
"eval_vitaminc-pairs_runtime": 4.4497,
"eval_vitaminc-pairs_samples_per_second": 28.766,
"eval_vitaminc-pairs_steps_per_second": 0.225,
"step": 291
},
{
"epoch": 0.1506991196271362,
"eval_negation-triplets_loss": 0.8774887323379517,
"eval_negation-triplets_runtime": 3.1401,
"eval_negation-triplets_samples_per_second": 40.764,
"eval_negation-triplets_steps_per_second": 0.318,
"step": 291
},
{
"epoch": 0.1506991196271362,
"eval_scitail-pairs-pos_loss": 0.029673559591174126,
"eval_scitail-pairs-pos_runtime": 2.6642,
"eval_scitail-pairs-pos_samples_per_second": 48.044,
"eval_scitail-pairs-pos_steps_per_second": 0.375,
"step": 291
},
{
"epoch": 0.1506991196271362,
"eval_scitail-pairs-qa_loss": 0.011800204403698444,
"eval_scitail-pairs-qa_runtime": 2.1861,
"eval_scitail-pairs-qa_samples_per_second": 58.551,
"eval_scitail-pairs-qa_steps_per_second": 0.457,
"step": 291
},
{
"epoch": 0.1506991196271362,
"eval_xsum-pairs_loss": 0.017930012196302414,
"eval_xsum-pairs_runtime": 3.0255,
"eval_xsum-pairs_samples_per_second": 42.307,
"eval_xsum-pairs_steps_per_second": 0.331,
"step": 291
},
{
"epoch": 0.1506991196271362,
"eval_sciq_pairs_loss": 0.09765021502971649,
"eval_sciq_pairs_runtime": 3.8726,
"eval_sciq_pairs_samples_per_second": 33.053,
"eval_sciq_pairs_steps_per_second": 0.258,
"step": 291
},
{
"epoch": 0.1506991196271362,
"eval_qasc_pairs_loss": 0.3064229488372803,
"eval_qasc_pairs_runtime": 2.1307,
"eval_qasc_pairs_samples_per_second": 60.075,
"eval_qasc_pairs_steps_per_second": 0.469,
"step": 291
},
{
"epoch": 0.1506991196271362,
"eval_openbookqa_pairs_loss": 0.46111759543418884,
"eval_openbookqa_pairs_runtime": 2.2685,
"eval_openbookqa_pairs_samples_per_second": 56.424,
"eval_openbookqa_pairs_steps_per_second": 0.441,
"step": 291
},
{
"epoch": 0.1506991196271362,
"eval_msmarco_pairs_loss": 0.08168309926986694,
"eval_msmarco_pairs_runtime": 2.2657,
"eval_msmarco_pairs_samples_per_second": 56.495,
"eval_msmarco_pairs_steps_per_second": 0.441,
"step": 291
},
{
"epoch": 0.1506991196271362,
"eval_nq_pairs_loss": 0.13220462203025818,
"eval_nq_pairs_runtime": 2.7139,
"eval_nq_pairs_samples_per_second": 47.164,
"eval_nq_pairs_steps_per_second": 0.368,
"step": 291
},
{
"epoch": 0.1506991196271362,
"eval_trivia_pairs_loss": 0.1532345414161682,
"eval_trivia_pairs_runtime": 3.76,
"eval_trivia_pairs_samples_per_second": 34.043,
"eval_trivia_pairs_steps_per_second": 0.266,
"step": 291
},
{
"epoch": 0.1506991196271362,
"eval_gooaq_pairs_loss": 0.10126010328531265,
"eval_gooaq_pairs_runtime": 2.1372,
"eval_gooaq_pairs_samples_per_second": 59.892,
"eval_gooaq_pairs_steps_per_second": 0.468,
"step": 291
},
{
"epoch": 0.1506991196271362,
"eval_paws-pos_loss": 0.021147189661860466,
"eval_paws-pos_runtime": 2.2138,
"eval_paws-pos_samples_per_second": 57.819,
"eval_paws-pos_steps_per_second": 0.452,
"step": 291
},
{
"epoch": 0.1506991196271362,
"eval_global_dataset_loss": 0.2509276270866394,
"eval_global_dataset_runtime": 10.154,
"eval_global_dataset_samples_per_second": 40.969,
"eval_global_dataset_steps_per_second": 0.394,
"step": 291
},
{
"epoch": 0.15535991714137753,
"grad_norm": 6.319842338562012,
"learning_rate": 6.823529411764706e-06,
"loss": 0.1505,
"step": 300
},
{
"epoch": 0.1657172449508027,
"grad_norm": 16.11246109008789,
"learning_rate": 7.294117647058823e-06,
"loss": 0.1473,
"step": 320
},
{
"epoch": 0.17607457276022787,
"grad_norm": 9.587472915649414,
"learning_rate": 7.764705882352943e-06,
"loss": 0.1614,
"step": 340
},
{
"epoch": 0.18643190056965303,
"grad_norm": 21.551036834716797,
"learning_rate": 8.23529411764706e-06,
"loss": 0.1834,
"step": 360
},
{
"epoch": 0.1967892283790782,
"grad_norm": 7.1668548583984375,
"learning_rate": 8.705882352941177e-06,
"loss": 0.164,
"step": 380
},
{
"epoch": 0.20093215950284826,
"eval_Qnli-dev_cosine_accuracy": 0.685546875,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.6639062166213989,
"eval_Qnli-dev_cosine_ap": 0.7263308094103806,
"eval_Qnli-dev_cosine_f1": 0.6986754966887417,
"eval_Qnli-dev_cosine_f1_threshold": 0.576126754283905,
"eval_Qnli-dev_cosine_precision": 0.5733695652173914,
"eval_Qnli-dev_cosine_recall": 0.8940677966101694,
"eval_Qnli-dev_dot_accuracy": 0.677734375,
"eval_Qnli-dev_dot_accuracy_threshold": 591.922607421875,
"eval_Qnli-dev_dot_ap": 0.7011149935901715,
"eval_Qnli-dev_dot_f1": 0.6837606837606838,
"eval_Qnli-dev_dot_f1_threshold": 505.88946533203125,
"eval_Qnli-dev_dot_precision": 0.5730659025787965,
"eval_Qnli-dev_dot_recall": 0.847457627118644,
"eval_Qnli-dev_euclidean_accuracy": 0.693359375,
"eval_Qnli-dev_euclidean_accuracy_threshold": 23.223190307617188,
"eval_Qnli-dev_euclidean_ap": 0.7305021849937567,
"eval_Qnli-dev_euclidean_f1": 0.698205546492659,
"eval_Qnli-dev_euclidean_f1_threshold": 27.346588134765625,
"eval_Qnli-dev_euclidean_precision": 0.5676392572944297,
"eval_Qnli-dev_euclidean_recall": 0.9067796610169492,
"eval_Qnli-dev_manhattan_accuracy": 0.6953125,
"eval_Qnli-dev_manhattan_accuracy_threshold": 733.809814453125,
"eval_Qnli-dev_manhattan_ap": 0.7286452491858623,
"eval_Qnli-dev_manhattan_f1": 0.7008264462809918,
"eval_Qnli-dev_manhattan_f1_threshold": 845.1378784179688,
"eval_Qnli-dev_manhattan_precision": 0.5745257452574526,
"eval_Qnli-dev_manhattan_recall": 0.8983050847457628,
"eval_Qnli-dev_max_accuracy": 0.6953125,
"eval_Qnli-dev_max_accuracy_threshold": 733.809814453125,
"eval_Qnli-dev_max_ap": 0.7305021849937567,
"eval_Qnli-dev_max_f1": 0.7008264462809918,
"eval_Qnli-dev_max_f1_threshold": 845.1378784179688,
"eval_Qnli-dev_max_precision": 0.5745257452574526,
"eval_Qnli-dev_max_recall": 0.9067796610169492,
"eval_allNLI-dev_cosine_accuracy": 0.720703125,
"eval_allNLI-dev_cosine_accuracy_threshold": 0.8122999668121338,
"eval_allNLI-dev_cosine_ap": 0.6057945129739214,
"eval_allNLI-dev_cosine_f1": 0.6099585062240664,
"eval_allNLI-dev_cosine_f1_threshold": 0.6289657950401306,
"eval_allNLI-dev_cosine_precision": 0.47572815533980584,
"eval_allNLI-dev_cosine_recall": 0.8497109826589595,
"eval_allNLI-dev_dot_accuracy": 0.71875,
"eval_allNLI-dev_dot_accuracy_threshold": 745.8334350585938,
"eval_allNLI-dev_dot_ap": 0.5916353965674287,
"eval_allNLI-dev_dot_f1": 0.610655737704918,
"eval_allNLI-dev_dot_f1_threshold": 540.4627075195312,
"eval_allNLI-dev_dot_precision": 0.473015873015873,
"eval_allNLI-dev_dot_recall": 0.861271676300578,
"eval_allNLI-dev_euclidean_accuracy": 0.71875,
"eval_allNLI-dev_euclidean_accuracy_threshold": 18.420812606811523,
"eval_allNLI-dev_euclidean_ap": 0.6036307863078971,
"eval_allNLI-dev_euclidean_f1": 0.6182572614107884,
"eval_allNLI-dev_euclidean_f1_threshold": 25.34260368347168,
"eval_allNLI-dev_euclidean_precision": 0.48220064724919093,
"eval_allNLI-dev_euclidean_recall": 0.861271676300578,
"eval_allNLI-dev_manhattan_accuracy": 0.72265625,
"eval_allNLI-dev_manhattan_accuracy_threshold": 600.5528564453125,
"eval_allNLI-dev_manhattan_ap": 0.60455800678133,
"eval_allNLI-dev_manhattan_f1": 0.6170212765957447,
"eval_allNLI-dev_manhattan_f1_threshold": 781.2642822265625,
"eval_allNLI-dev_manhattan_precision": 0.4882154882154882,
"eval_allNLI-dev_manhattan_recall": 0.838150289017341,
"eval_allNLI-dev_max_accuracy": 0.72265625,
"eval_allNLI-dev_max_accuracy_threshold": 745.8334350585938,
"eval_allNLI-dev_max_ap": 0.6057945129739214,
"eval_allNLI-dev_max_f1": 0.6182572614107884,
"eval_allNLI-dev_max_f1_threshold": 781.2642822265625,
"eval_allNLI-dev_max_precision": 0.4882154882154882,
"eval_allNLI-dev_max_recall": 0.861271676300578,
"eval_sequential_score": 0.7305021849937567,
"eval_sts-test_pearson_cosine": 0.9127094359947289,
"eval_sts-test_pearson_dot": 0.895861018162462,
"eval_sts-test_pearson_euclidean": 0.9199643208978237,
"eval_sts-test_pearson_manhattan": 0.9196227536115376,
"eval_sts-test_pearson_max": 0.9199643208978237,
"eval_sts-test_spearman_cosine": 0.9147098309224408,
"eval_sts-test_spearman_dot": 0.8908736085574486,
"eval_sts-test_spearman_euclidean": 0.9157628281029806,
"eval_sts-test_spearman_manhattan": 0.9154038334840987,
"eval_sts-test_spearman_max": 0.9157628281029806,
"eval_vitaminc-pairs_loss": 2.9898574352264404,
"eval_vitaminc-pairs_runtime": 4.5557,
"eval_vitaminc-pairs_samples_per_second": 28.097,
"eval_vitaminc-pairs_steps_per_second": 0.22,
"step": 388
},
{
"epoch": 0.20093215950284826,
"eval_negation-triplets_loss": 0.761246919631958,
"eval_negation-triplets_runtime": 3.3164,
"eval_negation-triplets_samples_per_second": 38.596,
"eval_negation-triplets_steps_per_second": 0.302,
"step": 388
},
{
"epoch": 0.20093215950284826,
"eval_scitail-pairs-pos_loss": 0.0324205681681633,
"eval_scitail-pairs-pos_runtime": 2.7411,
"eval_scitail-pairs-pos_samples_per_second": 46.696,
"eval_scitail-pairs-pos_steps_per_second": 0.365,
"step": 388
},
{
"epoch": 0.20093215950284826,
"eval_scitail-pairs-qa_loss": 0.0026867901906371117,
"eval_scitail-pairs-qa_runtime": 2.4223,
"eval_scitail-pairs-qa_samples_per_second": 52.843,
"eval_scitail-pairs-qa_steps_per_second": 0.413,
"step": 388
},
{
"epoch": 0.20093215950284826,
"eval_xsum-pairs_loss": 0.00942266546189785,
"eval_xsum-pairs_runtime": 3.1951,
"eval_xsum-pairs_samples_per_second": 40.061,
"eval_xsum-pairs_steps_per_second": 0.313,
"step": 388
},
{
"epoch": 0.20093215950284826,
"eval_sciq_pairs_loss": 0.09475678950548172,
"eval_sciq_pairs_runtime": 4.1048,
"eval_sciq_pairs_samples_per_second": 31.183,
"eval_sciq_pairs_steps_per_second": 0.244,
"step": 388
},
{
"epoch": 0.20093215950284826,
"eval_qasc_pairs_loss": 0.2342282086610794,
"eval_qasc_pairs_runtime": 2.27,
"eval_qasc_pairs_samples_per_second": 56.388,
"eval_qasc_pairs_steps_per_second": 0.441,
"step": 388
},
{
"epoch": 0.20093215950284826,
"eval_openbookqa_pairs_loss": 0.4459604024887085,
"eval_openbookqa_pairs_runtime": 2.3513,
"eval_openbookqa_pairs_samples_per_second": 54.438,
"eval_openbookqa_pairs_steps_per_second": 0.425,
"step": 388
},
{
"epoch": 0.20093215950284826,
"eval_msmarco_pairs_loss": 0.2047792673110962,
"eval_msmarco_pairs_runtime": 2.3749,
"eval_msmarco_pairs_samples_per_second": 53.897,
"eval_msmarco_pairs_steps_per_second": 0.421,
"step": 388
},
{
"epoch": 0.20093215950284826,
"eval_nq_pairs_loss": 0.12687399983406067,
"eval_nq_pairs_runtime": 2.8216,
"eval_nq_pairs_samples_per_second": 45.365,
"eval_nq_pairs_steps_per_second": 0.354,
"step": 388
},
{
"epoch": 0.20093215950284826,
"eval_trivia_pairs_loss": 0.208355113863945,
"eval_trivia_pairs_runtime": 3.8421,
"eval_trivia_pairs_samples_per_second": 33.315,
"eval_trivia_pairs_steps_per_second": 0.26,
"step": 388
},
{
"epoch": 0.20093215950284826,
"eval_gooaq_pairs_loss": 0.10170701891183853,
"eval_gooaq_pairs_runtime": 2.3264,
"eval_gooaq_pairs_samples_per_second": 55.02,
"eval_gooaq_pairs_steps_per_second": 0.43,
"step": 388
},
{
"epoch": 0.20093215950284826,
"eval_paws-pos_loss": 0.0226531233638525,
"eval_paws-pos_runtime": 2.4028,
"eval_paws-pos_samples_per_second": 53.271,
"eval_paws-pos_steps_per_second": 0.416,
"step": 388
},
{
"epoch": 0.20093215950284826,
"eval_global_dataset_loss": 0.22992311418056488,
"eval_global_dataset_runtime": 10.4483,
"eval_global_dataset_samples_per_second": 39.815,
"eval_global_dataset_steps_per_second": 0.383,
"step": 388
},
{
"epoch": 0.20714655618850336,
"grad_norm": 0.033974967896938324,
"learning_rate": 9.176470588235295e-06,
"loss": 0.1426,
"step": 400
},
{
"epoch": 0.21750388399792853,
"grad_norm": 8.04489517211914,
"learning_rate": 9.647058823529412e-06,
"loss": 0.1838,
"step": 420
},
{
"epoch": 0.2278612118073537,
"grad_norm": 37.961544036865234,
"learning_rate": 1.0117647058823531e-05,
"loss": 0.1324,
"step": 440
},
{
"epoch": 0.23821853961677888,
"grad_norm": 9.86117172241211,
"learning_rate": 1.0588235294117648e-05,
"loss": 0.1242,
"step": 460
},
{
"epoch": 0.24857586742620405,
"grad_norm": 15.386984825134277,
"learning_rate": 1.1058823529411766e-05,
"loss": 0.2166,
"step": 480
},
{
"epoch": 0.25116519937856036,
"eval_Qnli-dev_cosine_accuracy": 0.70703125,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.6728878021240234,
"eval_Qnli-dev_cosine_ap": 0.7490927864840249,
"eval_Qnli-dev_cosine_f1": 0.7092198581560283,
"eval_Qnli-dev_cosine_f1_threshold": 0.6182924509048462,
"eval_Qnli-dev_cosine_precision": 0.6097560975609756,
"eval_Qnli-dev_cosine_recall": 0.847457627118644,
"eval_Qnli-dev_dot_accuracy": 0.67578125,
"eval_Qnli-dev_dot_accuracy_threshold": 664.639404296875,
"eval_Qnli-dev_dot_ap": 0.7143774472576185,
"eval_Qnli-dev_dot_f1": 0.7084019769357496,
"eval_Qnli-dev_dot_f1_threshold": 506.4283447265625,
"eval_Qnli-dev_dot_precision": 0.5795148247978437,
"eval_Qnli-dev_dot_recall": 0.9110169491525424,
"eval_Qnli-dev_euclidean_accuracy": 0.7109375,
"eval_Qnli-dev_euclidean_accuracy_threshold": 23.347135543823242,
"eval_Qnli-dev_euclidean_ap": 0.7578270539486094,
"eval_Qnli-dev_euclidean_f1": 0.712041884816754,
"eval_Qnli-dev_euclidean_f1_threshold": 26.101980209350586,
"eval_Qnli-dev_euclidean_precision": 0.6053412462908012,
"eval_Qnli-dev_euclidean_recall": 0.864406779661017,
"eval_Qnli-dev_manhattan_accuracy": 0.71484375,
"eval_Qnli-dev_manhattan_accuracy_threshold": 734.9889526367188,
"eval_Qnli-dev_manhattan_ap": 0.7578518420666434,
"eval_Qnli-dev_manhattan_f1": 0.7160940325497287,
"eval_Qnli-dev_manhattan_f1_threshold": 797.8458251953125,
"eval_Qnli-dev_manhattan_precision": 0.6246056782334385,
"eval_Qnli-dev_manhattan_recall": 0.8389830508474576,
"eval_Qnli-dev_max_accuracy": 0.71484375,
"eval_Qnli-dev_max_accuracy_threshold": 734.9889526367188,
"eval_Qnli-dev_max_ap": 0.7578518420666434,
"eval_Qnli-dev_max_f1": 0.7160940325497287,
"eval_Qnli-dev_max_f1_threshold": 797.8458251953125,
"eval_Qnli-dev_max_precision": 0.6246056782334385,
"eval_Qnli-dev_max_recall": 0.9110169491525424,
"eval_allNLI-dev_cosine_accuracy": 0.712890625,
"eval_allNLI-dev_cosine_accuracy_threshold": 0.8226721286773682,
"eval_allNLI-dev_cosine_ap": 0.608903927832523,
"eval_allNLI-dev_cosine_f1": 0.6211764705882353,
"eval_allNLI-dev_cosine_f1_threshold": 0.6668639183044434,
"eval_allNLI-dev_cosine_precision": 0.5238095238095238,
"eval_allNLI-dev_cosine_recall": 0.7630057803468208,
"eval_allNLI-dev_dot_accuracy": 0.72265625,
"eval_allNLI-dev_dot_accuracy_threshold": 701.8555908203125,
"eval_allNLI-dev_dot_ap": 0.6006292150580212,
"eval_allNLI-dev_dot_f1": 0.6206896551724138,
"eval_allNLI-dev_dot_f1_threshold": 543.5947265625,
"eval_allNLI-dev_dot_precision": 0.4948453608247423,
"eval_allNLI-dev_dot_recall": 0.8323699421965318,
"eval_allNLI-dev_euclidean_accuracy": 0.716796875,
"eval_allNLI-dev_euclidean_accuracy_threshold": 21.63890266418457,
"eval_allNLI-dev_euclidean_ap": 0.6064044650997461,
"eval_allNLI-dev_euclidean_f1": 0.6169354838709676,
"eval_allNLI-dev_euclidean_f1_threshold": 25.579940795898438,
"eval_allNLI-dev_euclidean_precision": 0.47368421052631576,
"eval_allNLI-dev_euclidean_recall": 0.884393063583815,
"eval_allNLI-dev_manhattan_accuracy": 0.71484375,
"eval_allNLI-dev_manhattan_accuracy_threshold": 673.708251953125,
"eval_allNLI-dev_manhattan_ap": 0.6078798861215969,
"eval_allNLI-dev_manhattan_f1": 0.6170212765957447,
"eval_allNLI-dev_manhattan_f1_threshold": 779.1580200195312,
"eval_allNLI-dev_manhattan_precision": 0.4882154882154882,
"eval_allNLI-dev_manhattan_recall": 0.838150289017341,
"eval_allNLI-dev_max_accuracy": 0.72265625,
"eval_allNLI-dev_max_accuracy_threshold": 701.8555908203125,
"eval_allNLI-dev_max_ap": 0.608903927832523,
"eval_allNLI-dev_max_f1": 0.6211764705882353,
"eval_allNLI-dev_max_f1_threshold": 779.1580200195312,
"eval_allNLI-dev_max_precision": 0.5238095238095238,
"eval_allNLI-dev_max_recall": 0.884393063583815,
"eval_sequential_score": 0.7578518420666434,
"eval_sts-test_pearson_cosine": 0.9085589003585355,
"eval_sts-test_pearson_dot": 0.8934066815875845,
"eval_sts-test_pearson_euclidean": 0.9132129682245754,
"eval_sts-test_pearson_manhattan": 0.9127682865746231,
"eval_sts-test_pearson_max": 0.9132129682245754,
"eval_sts-test_spearman_cosine": 0.9109138499261769,
"eval_sts-test_spearman_dot": 0.8920668114399275,
"eval_sts-test_spearman_euclidean": 0.912336089764457,
"eval_sts-test_spearman_manhattan": 0.9119349842086059,
"eval_sts-test_spearman_max": 0.912336089764457,
"eval_vitaminc-pairs_loss": 2.359689474105835,
"eval_vitaminc-pairs_runtime": 4.4921,
"eval_vitaminc-pairs_samples_per_second": 28.494,
"eval_vitaminc-pairs_steps_per_second": 0.223,
"step": 485
},
{
"epoch": 0.25116519937856036,
"eval_negation-triplets_loss": 0.6828347444534302,
"eval_negation-triplets_runtime": 3.0767,
"eval_negation-triplets_samples_per_second": 41.603,
"eval_negation-triplets_steps_per_second": 0.325,
"step": 485
},
{
"epoch": 0.25116519937856036,
"eval_scitail-pairs-pos_loss": 0.020232411101460457,
"eval_scitail-pairs-pos_runtime": 2.6103,
"eval_scitail-pairs-pos_samples_per_second": 49.036,
"eval_scitail-pairs-pos_steps_per_second": 0.383,
"step": 485
},
{
"epoch": 0.25116519937856036,
"eval_scitail-pairs-qa_loss": 0.0017561395652592182,
"eval_scitail-pairs-qa_runtime": 2.2409,
"eval_scitail-pairs-qa_samples_per_second": 57.12,
"eval_scitail-pairs-qa_steps_per_second": 0.446,
"step": 485
},
{
"epoch": 0.25116519937856036,
"eval_xsum-pairs_loss": 0.009539155289530754,
"eval_xsum-pairs_runtime": 3.0343,
"eval_xsum-pairs_samples_per_second": 42.184,
"eval_xsum-pairs_steps_per_second": 0.33,
"step": 485
},
{
"epoch": 0.25116519937856036,
"eval_sciq_pairs_loss": 0.07515428215265274,
"eval_sciq_pairs_runtime": 3.8288,
"eval_sciq_pairs_samples_per_second": 33.431,
"eval_sciq_pairs_steps_per_second": 0.261,
"step": 485
},
{
"epoch": 0.25116519937856036,
"eval_qasc_pairs_loss": 0.16715534031391144,
"eval_qasc_pairs_runtime": 2.0736,
"eval_qasc_pairs_samples_per_second": 61.729,
"eval_qasc_pairs_steps_per_second": 0.482,
"step": 485
},
{
"epoch": 0.25116519937856036,
"eval_openbookqa_pairs_loss": 0.5365710854530334,
"eval_openbookqa_pairs_runtime": 2.2749,
"eval_openbookqa_pairs_samples_per_second": 56.267,
"eval_openbookqa_pairs_steps_per_second": 0.44,
"step": 485
},
{
"epoch": 0.25116519937856036,
"eval_msmarco_pairs_loss": 0.183290034532547,
"eval_msmarco_pairs_runtime": 2.2376,
"eval_msmarco_pairs_samples_per_second": 57.204,
"eval_msmarco_pairs_steps_per_second": 0.447,
"step": 485
},
{
"epoch": 0.25116519937856036,
"eval_nq_pairs_loss": 0.13633984327316284,
"eval_nq_pairs_runtime": 2.7168,
"eval_nq_pairs_samples_per_second": 47.115,
"eval_nq_pairs_steps_per_second": 0.368,
"step": 485
},
{
"epoch": 0.25116519937856036,
"eval_trivia_pairs_loss": 0.13907591998577118,
"eval_trivia_pairs_runtime": 3.7638,
"eval_trivia_pairs_samples_per_second": 34.008,
"eval_trivia_pairs_steps_per_second": 0.266,
"step": 485
},
{
"epoch": 0.25116519937856036,
"eval_gooaq_pairs_loss": 0.15382522344589233,
"eval_gooaq_pairs_runtime": 2.1349,
"eval_gooaq_pairs_samples_per_second": 59.955,
"eval_gooaq_pairs_steps_per_second": 0.468,
"step": 485
},
{
"epoch": 0.25116519937856036,
"eval_paws-pos_loss": 0.02764580212533474,
"eval_paws-pos_runtime": 2.299,
"eval_paws-pos_samples_per_second": 55.676,
"eval_paws-pos_steps_per_second": 0.435,
"step": 485
},
{
"epoch": 0.25116519937856036,
"eval_global_dataset_loss": 0.26656147837638855,
"eval_global_dataset_runtime": 10.0817,
"eval_global_dataset_samples_per_second": 41.263,
"eval_global_dataset_steps_per_second": 0.397,
"step": 485
},
{
"epoch": 0.2589331952356292,
"grad_norm": 8.639649391174316,
"learning_rate": 1.1529411764705883e-05,
"loss": 0.1781,
"step": 500
},
{
"epoch": 0.26929052304505435,
"grad_norm": 14.192313194274902,
"learning_rate": 1.2e-05,
"loss": 0.2177,
"step": 520
},
{
"epoch": 0.2796478508544795,
"grad_norm": 0.47864726185798645,
"learning_rate": 1.2470588235294119e-05,
"loss": 0.5771,
"step": 540
},
{
"epoch": 0.29000517866390474,
"grad_norm": 94.00303649902344,
"learning_rate": 1.291764705882353e-05,
"loss": 2.2303,
"step": 560
},
{
"epoch": 0.3003625064733299,
"grad_norm": 284.8737487792969,
"learning_rate": 1.3364705882352942e-05,
"loss": 1.0045,
"step": 580
},
{
"epoch": 0.3013982392542724,
"eval_Qnli-dev_cosine_accuracy": 0.6953125,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.8107659816741943,
"eval_Qnli-dev_cosine_ap": 0.7306763373744987,
"eval_Qnli-dev_cosine_f1": 0.6955074875207986,
"eval_Qnli-dev_cosine_f1_threshold": 0.7081253528594971,
"eval_Qnli-dev_cosine_precision": 0.5726027397260274,
"eval_Qnli-dev_cosine_recall": 0.885593220338983,
"eval_Qnli-dev_dot_accuracy": 0.671875,
"eval_Qnli-dev_dot_accuracy_threshold": 875.9421997070312,
"eval_Qnli-dev_dot_ap": 0.6876970673529026,
"eval_Qnli-dev_dot_f1": 0.6821192052980133,
"eval_Qnli-dev_dot_f1_threshold": 786.2505493164062,
"eval_Qnli-dev_dot_precision": 0.5597826086956522,
"eval_Qnli-dev_dot_recall": 0.8728813559322034,
"eval_Qnli-dev_euclidean_accuracy": 0.69921875,
"eval_Qnli-dev_euclidean_accuracy_threshold": 21.132396697998047,
"eval_Qnli-dev_euclidean_ap": 0.7325235937497143,
"eval_Qnli-dev_euclidean_f1": 0.6955074875207986,
"eval_Qnli-dev_euclidean_f1_threshold": 25.694360733032227,
"eval_Qnli-dev_euclidean_precision": 0.5726027397260274,
"eval_Qnli-dev_euclidean_recall": 0.885593220338983,
"eval_Qnli-dev_manhattan_accuracy": 0.701171875,
"eval_Qnli-dev_manhattan_accuracy_threshold": 650.69775390625,
"eval_Qnli-dev_manhattan_ap": 0.73351057649253,
"eval_Qnli-dev_manhattan_f1": 0.6923076923076924,
"eval_Qnli-dev_manhattan_f1_threshold": 790.9528198242188,
"eval_Qnli-dev_manhattan_precision": 0.5718232044198895,
"eval_Qnli-dev_manhattan_recall": 0.8771186440677966,
"eval_Qnli-dev_max_accuracy": 0.701171875,
"eval_Qnli-dev_max_accuracy_threshold": 875.9421997070312,
"eval_Qnli-dev_max_ap": 0.73351057649253,
"eval_Qnli-dev_max_f1": 0.6955074875207986,
"eval_Qnli-dev_max_f1_threshold": 790.9528198242188,
"eval_Qnli-dev_max_precision": 0.5726027397260274,
"eval_Qnli-dev_max_recall": 0.885593220338983,
"eval_allNLI-dev_cosine_accuracy": 0.71875,
"eval_allNLI-dev_cosine_accuracy_threshold": 0.8345531225204468,
"eval_allNLI-dev_cosine_ap": 0.5833256810054208,
"eval_allNLI-dev_cosine_f1": 0.6093366093366094,
"eval_allNLI-dev_cosine_f1_threshold": 0.7519584894180298,
"eval_allNLI-dev_cosine_precision": 0.5299145299145299,
"eval_allNLI-dev_cosine_recall": 0.7167630057803468,
"eval_allNLI-dev_dot_accuracy": 0.71875,
"eval_allNLI-dev_dot_accuracy_threshold": 932.83544921875,
"eval_allNLI-dev_dot_ap": 0.5730668161963208,
"eval_allNLI-dev_dot_f1": 0.6140350877192983,
"eval_allNLI-dev_dot_f1_threshold": 790.3121337890625,
"eval_allNLI-dev_dot_precision": 0.49469964664310956,
"eval_allNLI-dev_dot_recall": 0.8092485549132948,
"eval_allNLI-dev_euclidean_accuracy": 0.712890625,
"eval_allNLI-dev_euclidean_accuracy_threshold": 16.462337493896484,
"eval_allNLI-dev_euclidean_ap": 0.5830290393354319,
"eval_allNLI-dev_euclidean_f1": 0.6080760095011876,
"eval_allNLI-dev_euclidean_f1_threshold": 23.817108154296875,
"eval_allNLI-dev_euclidean_precision": 0.5161290322580645,
"eval_allNLI-dev_euclidean_recall": 0.7398843930635838,
"eval_allNLI-dev_manhattan_accuracy": 0.71484375,
"eval_allNLI-dev_manhattan_accuracy_threshold": 514.4776611328125,
"eval_allNLI-dev_manhattan_ap": 0.5824345257218883,
"eval_allNLI-dev_manhattan_f1": 0.6029411764705882,
"eval_allNLI-dev_manhattan_f1_threshold": 725.9110717773438,
"eval_allNLI-dev_manhattan_precision": 0.5234042553191489,
"eval_allNLI-dev_manhattan_recall": 0.7109826589595376,
"eval_allNLI-dev_max_accuracy": 0.71875,
"eval_allNLI-dev_max_accuracy_threshold": 932.83544921875,
"eval_allNLI-dev_max_ap": 0.5833256810054208,
"eval_allNLI-dev_max_f1": 0.6140350877192983,
"eval_allNLI-dev_max_f1_threshold": 790.3121337890625,
"eval_allNLI-dev_max_precision": 0.5299145299145299,
"eval_allNLI-dev_max_recall": 0.8092485549132948,
"eval_sequential_score": 0.73351057649253,
"eval_sts-test_pearson_cosine": 0.911958388742002,
"eval_sts-test_pearson_dot": 0.8881053452310657,
"eval_sts-test_pearson_euclidean": 0.9250703199093523,
"eval_sts-test_pearson_manhattan": 0.9254282934479543,
"eval_sts-test_pearson_max": 0.9254282934479543,
"eval_sts-test_spearman_cosine": 0.9182240579769849,
"eval_sts-test_spearman_dot": 0.8777027753148232,
"eval_sts-test_spearman_euclidean": 0.9183138737585973,
"eval_sts-test_spearman_manhattan": 0.9189913183535404,
"eval_sts-test_spearman_max": 0.9189913183535404,
"eval_vitaminc-pairs_loss": 3.1416079998016357,
"eval_vitaminc-pairs_runtime": 4.5,
"eval_vitaminc-pairs_samples_per_second": 28.445,
"eval_vitaminc-pairs_steps_per_second": 0.222,
"step": 582
},
{
"epoch": 0.3013982392542724,
"eval_negation-triplets_loss": 0.7199142575263977,
"eval_negation-triplets_runtime": 3.1016,
"eval_negation-triplets_samples_per_second": 41.27,
"eval_negation-triplets_steps_per_second": 0.322,
"step": 582
},
{
"epoch": 0.3013982392542724,
"eval_scitail-pairs-pos_loss": 0.020503610372543335,
"eval_scitail-pairs-pos_runtime": 2.6474,
"eval_scitail-pairs-pos_samples_per_second": 48.35,
"eval_scitail-pairs-pos_steps_per_second": 0.378,
"step": 582
},
{
"epoch": 0.3013982392542724,
"eval_scitail-pairs-qa_loss": 0.014284193515777588,
"eval_scitail-pairs-qa_runtime": 2.2281,
"eval_scitail-pairs-qa_samples_per_second": 57.447,
"eval_scitail-pairs-qa_steps_per_second": 0.449,
"step": 582
},
{
"epoch": 0.3013982392542724,
"eval_xsum-pairs_loss": 0.020332960411906242,
"eval_xsum-pairs_runtime": 3.0646,
"eval_xsum-pairs_samples_per_second": 41.767,
"eval_xsum-pairs_steps_per_second": 0.326,
"step": 582
},
{
"epoch": 0.3013982392542724,
"eval_sciq_pairs_loss": 0.07365372776985168,
"eval_sciq_pairs_runtime": 3.9037,
"eval_sciq_pairs_samples_per_second": 32.789,
"eval_sciq_pairs_steps_per_second": 0.256,
"step": 582
},
{
"epoch": 0.3013982392542724,
"eval_qasc_pairs_loss": 0.9374014735221863,
"eval_qasc_pairs_runtime": 2.0898,
"eval_qasc_pairs_samples_per_second": 61.249,
"eval_qasc_pairs_steps_per_second": 0.479,
"step": 582
},
{
"epoch": 0.3013982392542724,
"eval_openbookqa_pairs_loss": 0.5403007864952087,
"eval_openbookqa_pairs_runtime": 2.1959,
"eval_openbookqa_pairs_samples_per_second": 58.291,
"eval_openbookqa_pairs_steps_per_second": 0.455,
"step": 582
},
{
"epoch": 0.3013982392542724,
"eval_msmarco_pairs_loss": 2.8998327255249023,
"eval_msmarco_pairs_runtime": 2.2455,
"eval_msmarco_pairs_samples_per_second": 57.004,
"eval_msmarco_pairs_steps_per_second": 0.445,
"step": 582
},
{
"epoch": 0.3013982392542724,
"eval_nq_pairs_loss": 0.1996317207813263,
"eval_nq_pairs_runtime": 2.7271,
"eval_nq_pairs_samples_per_second": 46.936,
"eval_nq_pairs_steps_per_second": 0.367,
"step": 582
},
{
"epoch": 0.3013982392542724,
"eval_trivia_pairs_loss": 0.16345469653606415,
"eval_trivia_pairs_runtime": 3.7445,
"eval_trivia_pairs_samples_per_second": 34.183,
"eval_trivia_pairs_steps_per_second": 0.267,
"step": 582
},
{
"epoch": 0.3013982392542724,
"eval_gooaq_pairs_loss": 1.6607106924057007,
"eval_gooaq_pairs_runtime": 2.1231,
"eval_gooaq_pairs_samples_per_second": 60.289,
"eval_gooaq_pairs_steps_per_second": 0.471,
"step": 582
},
{
"epoch": 0.3013982392542724,
"eval_paws-pos_loss": 0.026082171127200127,
"eval_paws-pos_runtime": 2.2328,
"eval_paws-pos_samples_per_second": 57.327,
"eval_paws-pos_steps_per_second": 0.448,
"step": 582
},
{
"epoch": 0.3013982392542724,
"eval_global_dataset_loss": 0.4393865168094635,
"eval_global_dataset_runtime": 10.0711,
"eval_global_dataset_samples_per_second": 41.307,
"eval_global_dataset_steps_per_second": 0.397,
"step": 582
},
{
"epoch": 0.31071983428275507,
"grad_norm": 2.5726535320281982,
"learning_rate": 1.3835294117647059e-05,
"loss": 0.5632,
"step": 600
},
{
"epoch": 0.32107716209218023,
"grad_norm": 65.14546966552734,
"learning_rate": 1.4305882352941177e-05,
"loss": 0.2533,
"step": 620
},
{
"epoch": 0.3314344899016054,
"grad_norm": 4.207058906555176,
"learning_rate": 1.4776470588235294e-05,
"loss": 0.2559,
"step": 640
},
{
"epoch": 0.34179181771103057,
"grad_norm": 278.2864990234375,
"learning_rate": 1.5223529411764707e-05,
"loss": 0.2664,
"step": 660
},
{
"epoch": 0.3516312791299845,
"eval_Qnli-dev_cosine_accuracy": 0.69140625,
"eval_Qnli-dev_cosine_accuracy_threshold": 0.677927553653717,
"eval_Qnli-dev_cosine_ap": 0.7135273513673487,
"eval_Qnli-dev_cosine_f1": 0.7079037800687284,
"eval_Qnli-dev_cosine_f1_threshold": 0.6066854000091553,
"eval_Qnli-dev_cosine_precision": 0.5953757225433526,
"eval_Qnli-dev_cosine_recall": 0.8728813559322034,
"eval_Qnli-dev_dot_accuracy": 0.669921875,
"eval_Qnli-dev_dot_accuracy_threshold": 615.3697509765625,
"eval_Qnli-dev_dot_ap": 0.6697900654196967,
"eval_Qnli-dev_dot_f1": 0.6946308724832215,
"eval_Qnli-dev_dot_f1_threshold": 583.5701293945312,
"eval_Qnli-dev_dot_precision": 0.575,
"eval_Qnli-dev_dot_recall": 0.8771186440677966,
"eval_Qnli-dev_euclidean_accuracy": 0.70703125,
"eval_Qnli-dev_euclidean_accuracy_threshold": 24.364826202392578,
"eval_Qnli-dev_euclidean_ap": 0.720573540901372,
"eval_Qnli-dev_euclidean_f1": 0.7003367003367004,
"eval_Qnli-dev_euclidean_f1_threshold": 28.203102111816406,
"eval_Qnli-dev_euclidean_precision": 0.5810055865921788,
"eval_Qnli-dev_euclidean_recall": 0.8813559322033898,
"eval_Qnli-dev_manhattan_accuracy": 0.705078125,
"eval_Qnli-dev_manhattan_accuracy_threshold": 754.4404296875,
"eval_Qnli-dev_manhattan_ap": 0.7236352677013607,
"eval_Qnli-dev_manhattan_f1": 0.701168614357262,
"eval_Qnli-dev_manhattan_f1_threshold": 882.8988037109375,
"eval_Qnli-dev_manhattan_precision": 0.5785123966942148,
"eval_Qnli-dev_manhattan_recall": 0.8898305084745762,
"eval_Qnli-dev_max_accuracy": 0.70703125,
"eval_Qnli-dev_max_accuracy_threshold": 754.4404296875,
"eval_Qnli-dev_max_ap": 0.7236352677013607,
"eval_Qnli-dev_max_f1": 0.7079037800687284,
"eval_Qnli-dev_max_f1_threshold": 882.8988037109375,
"eval_Qnli-dev_max_precision": 0.5953757225433526,
"eval_Qnli-dev_max_recall": 0.8898305084745762,
"eval_allNLI-dev_cosine_accuracy": 0.7109375,
"eval_allNLI-dev_cosine_accuracy_threshold": 0.8685251474380493,
"eval_allNLI-dev_cosine_ap": 0.5715847909509861,
"eval_allNLI-dev_cosine_f1": 0.5822222222222222,
"eval_allNLI-dev_cosine_f1_threshold": 0.6784489154815674,
"eval_allNLI-dev_cosine_precision": 0.4729241877256318,
"eval_allNLI-dev_cosine_recall": 0.7572254335260116,
"eval_allNLI-dev_dot_accuracy": 0.705078125,
"eval_allNLI-dev_dot_accuracy_threshold": 886.351318359375,
"eval_allNLI-dev_dot_ap": 0.5644148142455192,
"eval_allNLI-dev_dot_f1": 0.5940170940170941,
"eval_allNLI-dev_dot_f1_threshold": 659.7857666015625,
"eval_allNLI-dev_dot_precision": 0.4711864406779661,
"eval_allNLI-dev_dot_recall": 0.8034682080924855,
"eval_allNLI-dev_euclidean_accuracy": 0.708984375,
"eval_allNLI-dev_euclidean_accuracy_threshold": 15.756305694580078,
"eval_allNLI-dev_euclidean_ap": 0.5670650775218166,
"eval_allNLI-dev_euclidean_f1": 0.5840000000000001,
"eval_allNLI-dev_euclidean_f1_threshold": 26.798587799072266,
"eval_allNLI-dev_euclidean_precision": 0.44648318042813456,
"eval_allNLI-dev_euclidean_recall": 0.8439306358381503,
"eval_allNLI-dev_manhattan_accuracy": 0.712890625,
"eval_allNLI-dev_manhattan_accuracy_threshold": 551.571533203125,
"eval_allNLI-dev_manhattan_ap": 0.5708799354607844,
"eval_allNLI-dev_manhattan_f1": 0.5863453815261045,
"eval_allNLI-dev_manhattan_f1_threshold": 831.1856689453125,
"eval_allNLI-dev_manhattan_precision": 0.4492307692307692,
"eval_allNLI-dev_manhattan_recall": 0.8439306358381503,
"eval_allNLI-dev_max_accuracy": 0.712890625,
"eval_allNLI-dev_max_accuracy_threshold": 886.351318359375,
"eval_allNLI-dev_max_ap": 0.5715847909509861,
"eval_allNLI-dev_max_f1": 0.5940170940170941,
"eval_allNLI-dev_max_f1_threshold": 831.1856689453125,
"eval_allNLI-dev_max_precision": 0.4729241877256318,
"eval_allNLI-dev_max_recall": 0.8439306358381503,
"eval_sequential_score": 0.7236352677013607,
"eval_sts-test_pearson_cosine": 0.9021470099112117,
"eval_sts-test_pearson_dot": 0.8917251782363848,
"eval_sts-test_pearson_euclidean": 0.9135854372994908,
"eval_sts-test_pearson_manhattan": 0.9142235212520329,
"eval_sts-test_pearson_max": 0.9142235212520329,
"eval_sts-test_spearman_cosine": 0.907700664607013,
"eval_sts-test_spearman_dot": 0.8887621545424798,
"eval_sts-test_spearman_euclidean": 0.9072292995707775,
"eval_sts-test_spearman_manhattan": 0.9078543851898925,
"eval_sts-test_spearman_max": 0.9078543851898925,
"eval_vitaminc-pairs_loss": 2.5545308589935303,
"eval_vitaminc-pairs_runtime": 4.4484,
"eval_vitaminc-pairs_samples_per_second": 28.775,
"eval_vitaminc-pairs_steps_per_second": 0.225,
"step": 679
},
{
"epoch": 0.3516312791299845,
"eval_negation-triplets_loss": 0.792955756187439,
"eval_negation-triplets_runtime": 3.0828,
"eval_negation-triplets_samples_per_second": 41.521,
"eval_negation-triplets_steps_per_second": 0.324,
"step": 679
},
{
"epoch": 0.3516312791299845,
"eval_scitail-pairs-pos_loss": 0.011221353895962238,
"eval_scitail-pairs-pos_runtime": 2.6324,
"eval_scitail-pairs-pos_samples_per_second": 48.624,
"eval_scitail-pairs-pos_steps_per_second": 0.38,
"step": 679
},
{
"epoch": 0.3516312791299845,
"eval_scitail-pairs-qa_loss": 0.008626868017017841,
"eval_scitail-pairs-qa_runtime": 2.1929,
"eval_scitail-pairs-qa_samples_per_second": 58.369,
"eval_scitail-pairs-qa_steps_per_second": 0.456,
"step": 679
},
{
"epoch": 0.3516312791299845,
"eval_xsum-pairs_loss": 0.004584914073348045,
"eval_xsum-pairs_runtime": 3.0494,
"eval_xsum-pairs_samples_per_second": 41.975,
"eval_xsum-pairs_steps_per_second": 0.328,
"step": 679
},
{
"epoch": 0.3516312791299845,
"eval_sciq_pairs_loss": 0.07755079865455627,
"eval_sciq_pairs_runtime": 3.8446,
"eval_sciq_pairs_samples_per_second": 33.294,
"eval_sciq_pairs_steps_per_second": 0.26,
"step": 679
},
{
"epoch": 0.3516312791299845,
"eval_qasc_pairs_loss": 0.2504812777042389,
"eval_qasc_pairs_runtime": 2.0696,
"eval_qasc_pairs_samples_per_second": 61.849,
"eval_qasc_pairs_steps_per_second": 0.483,
"step": 679
},
{
"epoch": 0.3516312791299845,
"eval_openbookqa_pairs_loss": 0.615034818649292,
"eval_openbookqa_pairs_runtime": 2.1876,
"eval_openbookqa_pairs_samples_per_second": 58.512,
"eval_openbookqa_pairs_steps_per_second": 0.457,
"step": 679
},
{
"epoch": 0.3516312791299845,
"eval_msmarco_pairs_loss": 0.12970499694347382,
"eval_msmarco_pairs_runtime": 2.2547,
"eval_msmarco_pairs_samples_per_second": 56.771,
"eval_msmarco_pairs_steps_per_second": 0.444,
"step": 679
},
{
"epoch": 0.3516312791299845,
"eval_nq_pairs_loss": 0.11113697290420532,
"eval_nq_pairs_runtime": 2.7434,
"eval_nq_pairs_samples_per_second": 46.658,
"eval_nq_pairs_steps_per_second": 0.365,
"step": 679
},
{
"epoch": 0.3516312791299845,
"eval_trivia_pairs_loss": 0.13746751844882965,
"eval_trivia_pairs_runtime": 3.7502,
"eval_trivia_pairs_samples_per_second": 34.131,
"eval_trivia_pairs_steps_per_second": 0.267,
"step": 679
},
{
"epoch": 0.3516312791299845,
"eval_gooaq_pairs_loss": 0.15882055461406708,
"eval_gooaq_pairs_runtime": 2.1544,
"eval_gooaq_pairs_samples_per_second": 59.413,
"eval_gooaq_pairs_steps_per_second": 0.464,
"step": 679
},
{
"epoch": 0.3516312791299845,
"eval_paws-pos_loss": 0.024272503331303596,
"eval_paws-pos_runtime": 2.2216,
"eval_paws-pos_samples_per_second": 57.616,
"eval_paws-pos_steps_per_second": 0.45,
"step": 679
},
{
"epoch": 0.3516312791299845,
"eval_global_dataset_loss": 0.19072183966636658,
"eval_global_dataset_runtime": 10.0803,
"eval_global_dataset_samples_per_second": 41.269,
"eval_global_dataset_steps_per_second": 0.397,
"step": 679
},
{
"epoch": 0.35214914552045573,
"grad_norm": 13.81802749633789,
"learning_rate": 1.5694117647058825e-05,
"loss": 0.2108,
"step": 680
},
{
"epoch": 0.3625064733298809,
"grad_norm": 1107.8421630859375,
"learning_rate": 1.6164705882352942e-05,
"loss": 0.2936,
"step": 700
},
{
"epoch": 0.37286380113930606,
"grad_norm": 9.161267280578613,
"learning_rate": 1.6635294117647062e-05,
"loss": 1.13,
"step": 720
},
{
"epoch": 0.3832211289487312,
"grad_norm": 523.1478881835938,
"learning_rate": 1.7105882352941176e-05,
"loss": 0.2598,
"step": 740
},
{
"epoch": 0.3935784567581564,
"grad_norm": 0.9771941304206848,
"learning_rate": 1.7576470588235296e-05,
"loss": 0.1599,
"step": 760
}
],
"logging_steps": 20,
"max_steps": 3862,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 387,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}