{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 20, "global_step": 2916, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00102880658436214, "grad_norm": NaN, "learning_rate": 0.0, "loss": 6.6792, "step": 1 }, { "epoch": 0.00205761316872428, "grad_norm": NaN, "learning_rate": 0.0, "loss": 7.1528, "step": 2 }, { "epoch": 0.0030864197530864196, "grad_norm": 56.1685905456543, "learning_rate": 3.634475597092419e-08, "loss": 7.5608, "step": 3 }, { "epoch": 0.00411522633744856, "grad_norm": 47.3843994140625, "learning_rate": 7.268951194184838e-08, "loss": 7.9785, "step": 4 }, { "epoch": 0.0051440329218107, "grad_norm": Infinity, "learning_rate": 7.268951194184838e-08, "loss": 8.3006, "step": 5 }, { "epoch": 0.006172839506172839, "grad_norm": 30.015766143798828, "learning_rate": 1.0903426791277258e-07, "loss": 7.2339, "step": 6 }, { "epoch": 0.00720164609053498, "grad_norm": 38.63535690307617, "learning_rate": 1.4537902388369677e-07, "loss": 6.9116, "step": 7 }, { "epoch": 0.00823045267489712, "grad_norm": 43.006290435791016, "learning_rate": 1.8172377985462097e-07, "loss": 7.5568, "step": 8 }, { "epoch": 0.009259259259259259, "grad_norm": 32.70055389404297, "learning_rate": 2.1806853582554515e-07, "loss": 7.3119, "step": 9 }, { "epoch": 0.0102880658436214, "grad_norm": 34.09101486206055, "learning_rate": 2.5441329179646936e-07, "loss": 7.3456, "step": 10 }, { "epoch": 0.01131687242798354, "grad_norm": 46.04302978515625, "learning_rate": 2.9075804776739353e-07, "loss": 7.3899, "step": 11 }, { "epoch": 0.012345679012345678, "grad_norm": 39.30464172363281, "learning_rate": 3.271028037383177e-07, "loss": 7.1603, "step": 12 }, { "epoch": 0.013374485596707819, "grad_norm": 45.96063995361328, "learning_rate": 3.6344755970924194e-07, "loss": 7.5501, "step": 13 }, { "epoch": 0.01440329218106996, "grad_norm": 31.248769760131836, "learning_rate": 3.997923156801661e-07, "loss": 7.1211, "step": 14 }, { "epoch": 0.015432098765432098, "grad_norm": 37.31939697265625, "learning_rate": 4.361370716510903e-07, "loss": 6.6898, "step": 15 }, { "epoch": 0.01646090534979424, "grad_norm": 57.4151725769043, "learning_rate": 4.724818276220145e-07, "loss": 7.9275, "step": 16 }, { "epoch": 0.01748971193415638, "grad_norm": 60.12082290649414, "learning_rate": 5.088265835929387e-07, "loss": 8.8934, "step": 17 }, { "epoch": 0.018518518518518517, "grad_norm": 46.84602355957031, "learning_rate": 5.451713395638628e-07, "loss": 7.7481, "step": 18 }, { "epoch": 0.01954732510288066, "grad_norm": 49.21991729736328, "learning_rate": 5.815160955347871e-07, "loss": 7.9482, "step": 19 }, { "epoch": 0.0205761316872428, "grad_norm": 28.904695510864258, "learning_rate": 6.178608515057113e-07, "loss": 7.2578, "step": 20 }, { "epoch": 0.0205761316872428, "eval_Qnli-dev_cosine_accuracy": 0.599609375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.9141477346420288, "eval_Qnli-dev_cosine_ap": 0.5547693808475234, "eval_Qnli-dev_cosine_f1": 0.6315789473684211, "eval_Qnli-dev_cosine_f1_threshold": 0.6694607138633728, "eval_Qnli-dev_cosine_precision": 0.4633663366336634, "eval_Qnli-dev_cosine_recall": 0.9915254237288136, "eval_Qnli-dev_dot_accuracy": 0.576171875, "eval_Qnli-dev_dot_accuracy_threshold": 375.9344177246094, "eval_Qnli-dev_dot_ap": 0.4951635671727113, "eval_Qnli-dev_dot_f1": 0.6291834002677376, "eval_Qnli-dev_dot_f1_threshold": 237.4730682373047, "eval_Qnli-dev_dot_precision": 0.4598825831702544, "eval_Qnli-dev_dot_recall": 0.9957627118644068, "eval_Qnli-dev_euclidean_accuracy": 0.603515625, "eval_Qnli-dev_euclidean_accuracy_threshold": 8.203678131103516, "eval_Qnli-dev_euclidean_ap": 0.5621167645444726, "eval_Qnli-dev_euclidean_f1": 0.6307277628032345, "eval_Qnli-dev_euclidean_f1_threshold": 17.37430763244629, "eval_Qnli-dev_euclidean_precision": 0.4624505928853755, "eval_Qnli-dev_euclidean_recall": 0.9915254237288136, "eval_Qnli-dev_manhattan_accuracy": 0.615234375, "eval_Qnli-dev_manhattan_accuracy_threshold": 168.66110229492188, "eval_Qnli-dev_manhattan_ap": 0.6068938574265019, "eval_Qnli-dev_manhattan_f1": 0.629878869448183, "eval_Qnli-dev_manhattan_f1_threshold": 250.46356201171875, "eval_Qnli-dev_manhattan_precision": 0.46153846153846156, "eval_Qnli-dev_manhattan_recall": 0.9915254237288136, "eval_Qnli-dev_max_accuracy": 0.615234375, "eval_Qnli-dev_max_accuracy_threshold": 375.9344177246094, "eval_Qnli-dev_max_ap": 0.6068938574265019, "eval_Qnli-dev_max_f1": 0.6315789473684211, "eval_Qnli-dev_max_f1_threshold": 250.46356201171875, "eval_Qnli-dev_max_precision": 0.4633663366336634, "eval_Qnli-dev_max_recall": 0.9957627118644068, "eval_allNLI-dev_cosine_accuracy": 0.6640625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9848551154136658, "eval_allNLI-dev_cosine_ap": 0.34665869156342183, "eval_allNLI-dev_cosine_f1": 0.5105105105105106, "eval_allNLI-dev_cosine_f1_threshold": 0.726775050163269, "eval_allNLI-dev_cosine_precision": 0.3448275862068966, "eval_allNLI-dev_cosine_recall": 0.9826589595375722, "eval_allNLI-dev_dot_accuracy": 0.66015625, "eval_allNLI-dev_dot_accuracy_threshold": 510.3038330078125, "eval_allNLI-dev_dot_ap": 0.3325722102020561, "eval_allNLI-dev_dot_f1": 0.5081240768094535, "eval_allNLI-dev_dot_f1_threshold": 321.1283264160156, "eval_allNLI-dev_dot_precision": 0.3412698412698413, "eval_allNLI-dev_dot_recall": 0.9942196531791907, "eval_allNLI-dev_euclidean_accuracy": 0.6640625, "eval_allNLI-dev_euclidean_accuracy_threshold": 3.5479774475097656, "eval_allNLI-dev_euclidean_ap": 0.35150722956160885, "eval_allNLI-dev_euclidean_f1": 0.5120481927710844, "eval_allNLI-dev_euclidean_f1_threshold": 16.336387634277344, "eval_allNLI-dev_euclidean_precision": 0.34623217922606925, "eval_allNLI-dev_euclidean_recall": 0.9826589595375722, "eval_allNLI-dev_manhattan_accuracy": 0.6640625, "eval_allNLI-dev_manhattan_accuracy_threshold": 59.594974517822266, "eval_allNLI-dev_manhattan_ap": 0.3707715964225075, "eval_allNLI-dev_manhattan_f1": 0.5096870342771982, "eval_allNLI-dev_manhattan_f1_threshold": 246.92552185058594, "eval_allNLI-dev_manhattan_precision": 0.3433734939759036, "eval_allNLI-dev_manhattan_recall": 0.9884393063583815, "eval_allNLI-dev_max_accuracy": 0.6640625, "eval_allNLI-dev_max_accuracy_threshold": 510.3038330078125, "eval_allNLI-dev_max_ap": 0.3707715964225075, "eval_allNLI-dev_max_f1": 0.5120481927710844, "eval_allNLI-dev_max_f1_threshold": 321.1283264160156, "eval_allNLI-dev_max_precision": 0.34623217922606925, "eval_allNLI-dev_max_recall": 0.9942196531791907, "eval_sequential_score": 0.6068938574265019, "eval_sts-test_pearson_cosine": 0.15158128737060533, "eval_sts-test_pearson_dot": 0.28519318322703113, "eval_sts-test_pearson_euclidean": 0.14101979920513222, "eval_sts-test_pearson_manhattan": 0.18765507958122332, "eval_sts-test_pearson_max": 0.28519318322703113, "eval_sts-test_spearman_cosine": 0.19495891500289336, "eval_sts-test_spearman_dot": 0.2996743605881303, "eval_sts-test_spearman_euclidean": 0.16263986728485438, "eval_sts-test_spearman_manhattan": 0.20827944121487316, "eval_sts-test_spearman_max": 0.2996743605881303, "eval_vitaminc-pairs_loss": 3.0276453495025635, "eval_vitaminc-pairs_runtime": 3.2256, "eval_vitaminc-pairs_samples_per_second": 39.683, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 20 }, { "epoch": 0.0205761316872428, "eval_negation-triplets_loss": 4.588223457336426, "eval_negation-triplets_runtime": 0.7341, "eval_negation-triplets_samples_per_second": 174.361, "eval_negation-triplets_steps_per_second": 1.362, "step": 20 }, { "epoch": 0.0205761316872428, "eval_scitail-pairs-pos_loss": 2.112419366836548, "eval_scitail-pairs-pos_runtime": 0.8038, "eval_scitail-pairs-pos_samples_per_second": 159.242, "eval_scitail-pairs-pos_steps_per_second": 1.244, "step": 20 }, { "epoch": 0.0205761316872428, "eval_scitail-pairs-qa_loss": 2.4077870845794678, "eval_scitail-pairs-qa_runtime": 0.5735, "eval_scitail-pairs-qa_samples_per_second": 223.199, "eval_scitail-pairs-qa_steps_per_second": 1.744, "step": 20 }, { "epoch": 0.0205761316872428, "eval_xsum-pairs_loss": 7.2197651863098145, "eval_xsum-pairs_runtime": 3.0069, "eval_xsum-pairs_samples_per_second": 42.568, "eval_xsum-pairs_steps_per_second": 0.333, "step": 20 }, { "epoch": 0.0205761316872428, "eval_sciq_pairs_loss": 0.8614505529403687, "eval_sciq_pairs_runtime": 3.4174, "eval_sciq_pairs_samples_per_second": 37.455, "eval_sciq_pairs_steps_per_second": 0.293, "step": 20 }, { "epoch": 0.0205761316872428, "eval_qasc_pairs_loss": 5.234526634216309, "eval_qasc_pairs_runtime": 0.5917, "eval_qasc_pairs_samples_per_second": 216.327, "eval_qasc_pairs_steps_per_second": 1.69, "step": 20 }, { "epoch": 0.0205761316872428, "eval_openbookqa_pairs_loss": 5.655325889587402, "eval_openbookqa_pairs_runtime": 0.5683, "eval_openbookqa_pairs_samples_per_second": 225.252, "eval_openbookqa_pairs_steps_per_second": 1.76, "step": 20 }, { "epoch": 0.0205761316872428, "eval_msmarco_pairs_loss": 15.688506126403809, "eval_msmarco_pairs_runtime": 1.5377, "eval_msmarco_pairs_samples_per_second": 83.243, "eval_msmarco_pairs_steps_per_second": 0.65, "step": 20 }, { "epoch": 0.0205761316872428, "eval_nq_pairs_loss": 14.804839134216309, "eval_nq_pairs_runtime": 2.884, "eval_nq_pairs_samples_per_second": 44.382, "eval_nq_pairs_steps_per_second": 0.347, "step": 20 }, { "epoch": 0.0205761316872428, "eval_trivia_pairs_loss": 9.255401611328125, "eval_trivia_pairs_runtime": 3.4138, "eval_trivia_pairs_samples_per_second": 37.495, "eval_trivia_pairs_steps_per_second": 0.293, "step": 20 }, { "epoch": 0.0205761316872428, "eval_gooaq_pairs_loss": 10.233977317810059, "eval_gooaq_pairs_runtime": 0.951, "eval_gooaq_pairs_samples_per_second": 134.592, "eval_gooaq_pairs_steps_per_second": 1.051, "step": 20 }, { "epoch": 0.0205761316872428, "eval_paws-pos_loss": 1.2437409162521362, "eval_paws-pos_runtime": 0.6909, "eval_paws-pos_samples_per_second": 185.275, "eval_paws-pos_steps_per_second": 1.447, "step": 20 }, { "epoch": 0.0205761316872428, "eval_global_dataset_loss": 6.9472880363464355, "eval_global_dataset_runtime": 13.3358, "eval_global_dataset_samples_per_second": 31.194, "eval_global_dataset_steps_per_second": 0.3, "step": 20 }, { "epoch": 0.021604938271604937, "grad_norm": 129.84130859375, "learning_rate": 6.542056074766354e-07, "loss": 14.6214, "step": 21 }, { "epoch": 0.02263374485596708, "grad_norm": 25.52641487121582, "learning_rate": 6.905503634475597e-07, "loss": 2.7071, "step": 22 }, { "epoch": 0.023662551440329218, "grad_norm": 52.22842025756836, "learning_rate": 7.268951194184839e-07, "loss": 7.3533, "step": 23 }, { "epoch": 0.024691358024691357, "grad_norm": 18.928892135620117, "learning_rate": 7.63239875389408e-07, "loss": 5.8828, "step": 24 }, { "epoch": 0.0257201646090535, "grad_norm": 18.747142791748047, "learning_rate": 7.995846313603322e-07, "loss": 5.549, "step": 25 }, { "epoch": 0.026748971193415638, "grad_norm": 39.345096588134766, "learning_rate": 8.359293873312565e-07, "loss": 7.0614, "step": 26 }, { "epoch": 0.027777777777777776, "grad_norm": 16.357666015625, "learning_rate": 8.722741433021806e-07, "loss": 5.4115, "step": 27 }, { "epoch": 0.02880658436213992, "grad_norm": 143.72604370117188, "learning_rate": 9.086188992731048e-07, "loss": 14.986, "step": 28 }, { "epoch": 0.029835390946502057, "grad_norm": 29.933956146240234, "learning_rate": 9.44963655244029e-07, "loss": 6.5017, "step": 29 }, { "epoch": 0.030864197530864196, "grad_norm": 24.71169662475586, "learning_rate": 9.813084112149532e-07, "loss": 6.8621, "step": 30 }, { "epoch": 0.03189300411522634, "grad_norm": 48.559242248535156, "learning_rate": 1.0176531671858774e-06, "loss": 7.6911, "step": 31 }, { "epoch": 0.03292181069958848, "grad_norm": 43.564395904541016, "learning_rate": 1.0539979231568014e-06, "loss": 7.3478, "step": 32 }, { "epoch": 0.033950617283950615, "grad_norm": 71.6847152709961, "learning_rate": 1.0903426791277257e-06, "loss": 9.8953, "step": 33 }, { "epoch": 0.03497942386831276, "grad_norm": 130.1976776123047, "learning_rate": 1.12668743509865e-06, "loss": 14.7971, "step": 34 }, { "epoch": 0.0360082304526749, "grad_norm": 25.184886932373047, "learning_rate": 1.1630321910695741e-06, "loss": 6.6194, "step": 35 }, { "epoch": 0.037037037037037035, "grad_norm": 15.403931617736816, "learning_rate": 1.1993769470404982e-06, "loss": 5.397, "step": 36 }, { "epoch": 0.03806584362139918, "grad_norm": 97.28205871582031, "learning_rate": 1.2357217030114226e-06, "loss": 9.3816, "step": 37 }, { "epoch": 0.03909465020576132, "grad_norm": 113.59951782226562, "learning_rate": 1.2720664589823466e-06, "loss": 13.3627, "step": 38 }, { "epoch": 0.040123456790123455, "grad_norm": 91.30632781982422, "learning_rate": 1.3084112149532708e-06, "loss": 9.0198, "step": 39 }, { "epoch": 0.0411522633744856, "grad_norm": 34.121768951416016, "learning_rate": 1.344755970924195e-06, "loss": 6.3785, "step": 40 }, { "epoch": 0.0411522633744856, "eval_Qnli-dev_cosine_accuracy": 0.599609375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.9141815304756165, "eval_Qnli-dev_cosine_ap": 0.560800067413789, "eval_Qnli-dev_cosine_f1": 0.6315789473684211, "eval_Qnli-dev_cosine_f1_threshold": 0.71217280626297, "eval_Qnli-dev_cosine_precision": 0.4633663366336634, "eval_Qnli-dev_cosine_recall": 0.9915254237288136, "eval_Qnli-dev_dot_accuracy": 0.580078125, "eval_Qnli-dev_dot_accuracy_threshold": 383.35107421875, "eval_Qnli-dev_dot_ap": 0.4975321617530368, "eval_Qnli-dev_dot_f1": 0.6291834002677376, "eval_Qnli-dev_dot_f1_threshold": 237.07284545898438, "eval_Qnli-dev_dot_precision": 0.4598825831702544, "eval_Qnli-dev_dot_recall": 0.9957627118644068, "eval_Qnli-dev_euclidean_accuracy": 0.59375, "eval_Qnli-dev_euclidean_accuracy_threshold": 8.007088661193848, "eval_Qnli-dev_euclidean_ap": 0.5669399990959784, "eval_Qnli-dev_euclidean_f1": 0.6307277628032345, "eval_Qnli-dev_euclidean_f1_threshold": 16.091142654418945, "eval_Qnli-dev_euclidean_precision": 0.4624505928853755, "eval_Qnli-dev_euclidean_recall": 0.9915254237288136, "eval_Qnli-dev_manhattan_accuracy": 0.6171875, "eval_Qnli-dev_manhattan_accuracy_threshold": 165.9488067626953, "eval_Qnli-dev_manhattan_ap": 0.6135192533670535, "eval_Qnli-dev_manhattan_f1": 0.629878869448183, "eval_Qnli-dev_manhattan_f1_threshold": 239.3395233154297, "eval_Qnli-dev_manhattan_precision": 0.46153846153846156, "eval_Qnli-dev_manhattan_recall": 0.9915254237288136, "eval_Qnli-dev_max_accuracy": 0.6171875, "eval_Qnli-dev_max_accuracy_threshold": 383.35107421875, "eval_Qnli-dev_max_ap": 0.6135192533670535, "eval_Qnli-dev_max_f1": 0.6315789473684211, "eval_Qnli-dev_max_f1_threshold": 239.3395233154297, "eval_Qnli-dev_max_precision": 0.4633663366336634, "eval_Qnli-dev_max_recall": 0.9957627118644068, "eval_allNLI-dev_cosine_accuracy": 0.6640625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9857255220413208, "eval_allNLI-dev_cosine_ap": 0.35080477217886274, "eval_allNLI-dev_cosine_f1": 0.5090361445783133, "eval_allNLI-dev_cosine_f1_threshold": 0.7498464584350586, "eval_allNLI-dev_cosine_precision": 0.34419551934826886, "eval_allNLI-dev_cosine_recall": 0.976878612716763, "eval_allNLI-dev_dot_accuracy": 0.66015625, "eval_allNLI-dev_dot_accuracy_threshold": 511.11175537109375, "eval_allNLI-dev_dot_ap": 0.33267543574243635, "eval_allNLI-dev_dot_f1": 0.5066273932253312, "eval_allNLI-dev_dot_f1_threshold": 327.406494140625, "eval_allNLI-dev_dot_precision": 0.33992094861660077, "eval_allNLI-dev_dot_recall": 0.9942196531791907, "eval_allNLI-dev_euclidean_accuracy": 0.666015625, "eval_allNLI-dev_euclidean_accuracy_threshold": 4.716782569885254, "eval_allNLI-dev_euclidean_ap": 0.3570960285605865, "eval_allNLI-dev_euclidean_f1": 0.5113464447806354, "eval_allNLI-dev_euclidean_f1_threshold": 15.28095817565918, "eval_allNLI-dev_euclidean_precision": 0.3463114754098361, "eval_allNLI-dev_euclidean_recall": 0.976878612716763, "eval_allNLI-dev_manhattan_accuracy": 0.6640625, "eval_allNLI-dev_manhattan_accuracy_threshold": 58.618408203125, "eval_allNLI-dev_manhattan_ap": 0.3738907800968901, "eval_allNLI-dev_manhattan_f1": 0.5096296296296297, "eval_allNLI-dev_manhattan_f1_threshold": 251.27957153320312, "eval_allNLI-dev_manhattan_precision": 0.3426294820717131, "eval_allNLI-dev_manhattan_recall": 0.9942196531791907, "eval_allNLI-dev_max_accuracy": 0.666015625, "eval_allNLI-dev_max_accuracy_threshold": 511.11175537109375, "eval_allNLI-dev_max_ap": 0.3738907800968901, "eval_allNLI-dev_max_f1": 0.5113464447806354, "eval_allNLI-dev_max_f1_threshold": 327.406494140625, "eval_allNLI-dev_max_precision": 0.3463114754098361, "eval_allNLI-dev_max_recall": 0.9942196531791907, "eval_sequential_score": 0.6135192533670535, "eval_sts-test_pearson_cosine": 0.15947289948306198, "eval_sts-test_pearson_dot": 0.30037019316788005, "eval_sts-test_pearson_euclidean": 0.14704475799070915, "eval_sts-test_pearson_manhattan": 0.1919977257434266, "eval_sts-test_pearson_max": 0.30037019316788005, "eval_sts-test_spearman_cosine": 0.2043480876529001, "eval_sts-test_spearman_dot": 0.312789299505278, "eval_sts-test_spearman_euclidean": 0.16989717934469764, "eval_sts-test_spearman_manhattan": 0.21343563680112884, "eval_sts-test_spearman_max": 0.312789299505278, "eval_vitaminc-pairs_loss": 2.966029167175293, "eval_vitaminc-pairs_runtime": 3.1856, "eval_vitaminc-pairs_samples_per_second": 40.18, "eval_vitaminc-pairs_steps_per_second": 0.314, "step": 40 }, { "epoch": 0.0411522633744856, "eval_negation-triplets_loss": 4.4332098960876465, "eval_negation-triplets_runtime": 0.7499, "eval_negation-triplets_samples_per_second": 170.686, "eval_negation-triplets_steps_per_second": 1.333, "step": 40 }, { "epoch": 0.0411522633744856, "eval_scitail-pairs-pos_loss": 2.064517021179199, "eval_scitail-pairs-pos_runtime": 0.7727, "eval_scitail-pairs-pos_samples_per_second": 165.651, "eval_scitail-pairs-pos_steps_per_second": 1.294, "step": 40 }, { "epoch": 0.0411522633744856, "eval_scitail-pairs-qa_loss": 2.3564093112945557, "eval_scitail-pairs-qa_runtime": 0.561, "eval_scitail-pairs-qa_samples_per_second": 228.169, "eval_scitail-pairs-qa_steps_per_second": 1.783, "step": 40 }, { "epoch": 0.0411522633744856, "eval_xsum-pairs_loss": 6.930158615112305, "eval_xsum-pairs_runtime": 3.0064, "eval_xsum-pairs_samples_per_second": 42.575, "eval_xsum-pairs_steps_per_second": 0.333, "step": 40 }, { "epoch": 0.0411522633744856, "eval_sciq_pairs_loss": 0.781018078327179, "eval_sciq_pairs_runtime": 3.3616, "eval_sciq_pairs_samples_per_second": 38.077, "eval_sciq_pairs_steps_per_second": 0.297, "step": 40 }, { "epoch": 0.0411522633744856, "eval_qasc_pairs_loss": 4.685440540313721, "eval_qasc_pairs_runtime": 0.5973, "eval_qasc_pairs_samples_per_second": 214.304, "eval_qasc_pairs_steps_per_second": 1.674, "step": 40 }, { "epoch": 0.0411522633744856, "eval_openbookqa_pairs_loss": 5.424518585205078, "eval_openbookqa_pairs_runtime": 0.5716, "eval_openbookqa_pairs_samples_per_second": 223.932, "eval_openbookqa_pairs_steps_per_second": 1.749, "step": 40 }, { "epoch": 0.0411522633744856, "eval_msmarco_pairs_loss": 13.714217185974121, "eval_msmarco_pairs_runtime": 1.5089, "eval_msmarco_pairs_samples_per_second": 84.831, "eval_msmarco_pairs_steps_per_second": 0.663, "step": 40 }, { "epoch": 0.0411522633744856, "eval_nq_pairs_loss": 12.863033294677734, "eval_nq_pairs_runtime": 2.8862, "eval_nq_pairs_samples_per_second": 44.35, "eval_nq_pairs_steps_per_second": 0.346, "step": 40 }, { "epoch": 0.0411522633744856, "eval_trivia_pairs_loss": 8.43865966796875, "eval_trivia_pairs_runtime": 3.4314, "eval_trivia_pairs_samples_per_second": 37.303, "eval_trivia_pairs_steps_per_second": 0.291, "step": 40 }, { "epoch": 0.0411522633744856, "eval_gooaq_pairs_loss": 9.148645401000977, "eval_gooaq_pairs_runtime": 0.9461, "eval_gooaq_pairs_samples_per_second": 135.299, "eval_gooaq_pairs_steps_per_second": 1.057, "step": 40 }, { "epoch": 0.0411522633744856, "eval_paws-pos_loss": 1.288989543914795, "eval_paws-pos_runtime": 0.6728, "eval_paws-pos_samples_per_second": 190.262, "eval_paws-pos_steps_per_second": 1.486, "step": 40 }, { "epoch": 0.0411522633744856, "eval_global_dataset_loss": 6.3770952224731445, "eval_global_dataset_runtime": 13.329, "eval_global_dataset_samples_per_second": 31.21, "eval_global_dataset_steps_per_second": 0.3, "step": 40 }, { "epoch": 0.04218106995884774, "grad_norm": 58.63786315917969, "learning_rate": 1.3811007268951193e-06, "loss": 8.8223, "step": 41 }, { "epoch": 0.043209876543209874, "grad_norm": 19.849580764770508, "learning_rate": 1.4174454828660433e-06, "loss": 5.7515, "step": 42 }, { "epoch": 0.044238683127572016, "grad_norm": 32.95113754272461, "learning_rate": 1.4537902388369678e-06, "loss": 6.6943, "step": 43 }, { "epoch": 0.04526748971193416, "grad_norm": 115.43840026855469, "learning_rate": 1.4901349948078918e-06, "loss": 12.7157, "step": 44 }, { "epoch": 0.046296296296296294, "grad_norm": 16.027889251708984, "learning_rate": 1.526479750778816e-06, "loss": 5.729, "step": 45 }, { "epoch": 0.047325102880658436, "grad_norm": 55.49090576171875, "learning_rate": 1.5628245067497403e-06, "loss": 8.843, "step": 46 }, { "epoch": 0.04835390946502058, "grad_norm": 20.623491287231445, "learning_rate": 1.5991692627206645e-06, "loss": 6.8743, "step": 47 }, { "epoch": 0.04938271604938271, "grad_norm": 19.15467643737793, "learning_rate": 1.6355140186915885e-06, "loss": 5.5015, "step": 48 }, { "epoch": 0.050411522633744855, "grad_norm": 24.568716049194336, "learning_rate": 1.671858774662513e-06, "loss": 2.7761, "step": 49 }, { "epoch": 0.051440329218107, "grad_norm": 17.216365814208984, "learning_rate": 1.708203530633437e-06, "loss": 4.6785, "step": 50 }, { "epoch": 0.05246913580246913, "grad_norm": 27.83530616760254, "learning_rate": 1.7445482866043612e-06, "loss": 6.6596, "step": 51 }, { "epoch": 0.053497942386831275, "grad_norm": 14.741978645324707, "learning_rate": 1.7808930425752854e-06, "loss": 5.4409, "step": 52 }, { "epoch": 0.05452674897119342, "grad_norm": 27.180707931518555, "learning_rate": 1.8172377985462097e-06, "loss": 6.3967, "step": 53 }, { "epoch": 0.05555555555555555, "grad_norm": 26.400497436523438, "learning_rate": 1.8535825545171337e-06, "loss": 6.3174, "step": 54 }, { "epoch": 0.056584362139917695, "grad_norm": 19.098752975463867, "learning_rate": 1.889927310488058e-06, "loss": 5.5442, "step": 55 }, { "epoch": 0.05761316872427984, "grad_norm": 21.40766716003418, "learning_rate": 1.9262720664589824e-06, "loss": 5.9004, "step": 56 }, { "epoch": 0.05864197530864197, "grad_norm": 25.238555908203125, "learning_rate": 1.9626168224299064e-06, "loss": 2.9543, "step": 57 }, { "epoch": 0.059670781893004114, "grad_norm": 21.333162307739258, "learning_rate": 1.9989615784008304e-06, "loss": 6.4092, "step": 58 }, { "epoch": 0.060699588477366256, "grad_norm": 24.3674373626709, "learning_rate": 2.035306334371755e-06, "loss": 3.083, "step": 59 }, { "epoch": 0.06172839506172839, "grad_norm": 77.95449829101562, "learning_rate": 2.071651090342679e-06, "loss": 10.6811, "step": 60 }, { "epoch": 0.06172839506172839, "eval_Qnli-dev_cosine_accuracy": 0.591796875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.9277275800704956, "eval_Qnli-dev_cosine_ap": 0.5663256424153785, "eval_Qnli-dev_cosine_f1": 0.6302864938608458, "eval_Qnli-dev_cosine_f1_threshold": 0.7895882725715637, "eval_Qnli-dev_cosine_precision": 0.4647887323943662, "eval_Qnli-dev_cosine_recall": 0.9788135593220338, "eval_Qnli-dev_dot_accuracy": 0.58203125, "eval_Qnli-dev_dot_accuracy_threshold": 386.7135009765625, "eval_Qnli-dev_dot_ap": 0.5015283426358628, "eval_Qnli-dev_dot_f1": 0.6291834002677376, "eval_Qnli-dev_dot_f1_threshold": 233.70668029785156, "eval_Qnli-dev_dot_precision": 0.4598825831702544, "eval_Qnli-dev_dot_recall": 0.9957627118644068, "eval_Qnli-dev_euclidean_accuracy": 0.591796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 7.363377571105957, "eval_Qnli-dev_euclidean_ap": 0.5754970319906212, "eval_Qnli-dev_euclidean_f1": 0.6322930800542741, "eval_Qnli-dev_euclidean_f1_threshold": 13.553762435913086, "eval_Qnli-dev_euclidean_precision": 0.46506986027944114, "eval_Qnli-dev_euclidean_recall": 0.9872881355932204, "eval_Qnli-dev_manhattan_accuracy": 0.62890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 166.58721923828125, "eval_Qnli-dev_manhattan_ap": 0.6222630621246192, "eval_Qnli-dev_manhattan_f1": 0.6346483704974271, "eval_Qnli-dev_manhattan_f1_threshold": 178.5355224609375, "eval_Qnli-dev_manhattan_precision": 0.5331412103746398, "eval_Qnli-dev_manhattan_recall": 0.7838983050847458, "eval_Qnli-dev_max_accuracy": 0.62890625, "eval_Qnli-dev_max_accuracy_threshold": 386.7135009765625, "eval_Qnli-dev_max_ap": 0.6222630621246192, "eval_Qnli-dev_max_f1": 0.6346483704974271, "eval_Qnli-dev_max_f1_threshold": 233.70668029785156, "eval_Qnli-dev_max_precision": 0.5331412103746398, "eval_Qnli-dev_max_recall": 0.9957627118644068, "eval_allNLI-dev_cosine_accuracy": 0.6640625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9866780042648315, "eval_allNLI-dev_cosine_ap": 0.3639636732129889, "eval_allNLI-dev_cosine_f1": 0.5089285714285714, "eval_allNLI-dev_cosine_f1_threshold": 0.7668333053588867, "eval_allNLI-dev_cosine_precision": 0.342685370741483, "eval_allNLI-dev_cosine_recall": 0.9884393063583815, "eval_allNLI-dev_dot_accuracy": 0.662109375, "eval_allNLI-dev_dot_accuracy_threshold": 494.3717956542969, "eval_allNLI-dev_dot_ap": 0.3315337132453944, "eval_allNLI-dev_dot_f1": 0.5065885797950219, "eval_allNLI-dev_dot_f1_threshold": 322.2677001953125, "eval_allNLI-dev_dot_precision": 0.3392156862745098, "eval_allNLI-dev_dot_recall": 1.0, "eval_allNLI-dev_euclidean_accuracy": 0.66796875, "eval_allNLI-dev_euclidean_accuracy_threshold": 4.485382080078125, "eval_allNLI-dev_euclidean_ap": 0.36871559335230386, "eval_allNLI-dev_euclidean_f1": 0.5096870342771982, "eval_allNLI-dev_euclidean_f1_threshold": 14.86199951171875, "eval_allNLI-dev_euclidean_precision": 0.3433734939759036, "eval_allNLI-dev_euclidean_recall": 0.9884393063583815, "eval_allNLI-dev_manhattan_accuracy": 0.6640625, "eval_allNLI-dev_manhattan_accuracy_threshold": 57.50782012939453, "eval_allNLI-dev_manhattan_ap": 0.3800340904251822, "eval_allNLI-dev_manhattan_f1": 0.5074183976261127, "eval_allNLI-dev_manhattan_f1_threshold": 237.91455078125, "eval_allNLI-dev_manhattan_precision": 0.3413173652694611, "eval_allNLI-dev_manhattan_recall": 0.9884393063583815, "eval_allNLI-dev_max_accuracy": 0.66796875, "eval_allNLI-dev_max_accuracy_threshold": 494.3717956542969, "eval_allNLI-dev_max_ap": 0.3800340904251822, "eval_allNLI-dev_max_f1": 0.5096870342771982, "eval_allNLI-dev_max_f1_threshold": 322.2677001953125, "eval_allNLI-dev_max_precision": 0.3433734939759036, "eval_allNLI-dev_max_recall": 1.0, "eval_sequential_score": 0.6222630621246192, "eval_sts-test_pearson_cosine": 0.18291082738946113, "eval_sts-test_pearson_dot": 0.316123119088567, "eval_sts-test_pearson_euclidean": 0.16506396318167735, "eval_sts-test_pearson_manhattan": 0.20347659235425056, "eval_sts-test_pearson_max": 0.316123119088567, "eval_sts-test_spearman_cosine": 0.22625803672256098, "eval_sts-test_spearman_dot": 0.32449976483491805, "eval_sts-test_spearman_euclidean": 0.18659512800514774, "eval_sts-test_spearman_manhattan": 0.2238469730125765, "eval_sts-test_spearman_max": 0.32449976483491805, "eval_vitaminc-pairs_loss": 2.9115335941314697, "eval_vitaminc-pairs_runtime": 3.1703, "eval_vitaminc-pairs_samples_per_second": 40.374, "eval_vitaminc-pairs_steps_per_second": 0.315, "step": 60 }, { "epoch": 0.06172839506172839, "eval_negation-triplets_loss": 4.1277852058410645, "eval_negation-triplets_runtime": 0.7484, "eval_negation-triplets_samples_per_second": 171.033, "eval_negation-triplets_steps_per_second": 1.336, "step": 60 }, { "epoch": 0.06172839506172839, "eval_scitail-pairs-pos_loss": 1.9703718423843384, "eval_scitail-pairs-pos_runtime": 0.8012, "eval_scitail-pairs-pos_samples_per_second": 159.764, "eval_scitail-pairs-pos_steps_per_second": 1.248, "step": 60 }, { "epoch": 0.06172839506172839, "eval_scitail-pairs-qa_loss": 2.240999221801758, "eval_scitail-pairs-qa_runtime": 0.5627, "eval_scitail-pairs-qa_samples_per_second": 227.467, "eval_scitail-pairs-qa_steps_per_second": 1.777, "step": 60 }, { "epoch": 0.06172839506172839, "eval_xsum-pairs_loss": 6.690690994262695, "eval_xsum-pairs_runtime": 3.0003, "eval_xsum-pairs_samples_per_second": 42.663, "eval_xsum-pairs_steps_per_second": 0.333, "step": 60 }, { "epoch": 0.06172839506172839, "eval_sciq_pairs_loss": 0.703199565410614, "eval_sciq_pairs_runtime": 3.4121, "eval_sciq_pairs_samples_per_second": 37.513, "eval_sciq_pairs_steps_per_second": 0.293, "step": 60 }, { "epoch": 0.06172839506172839, "eval_qasc_pairs_loss": 4.066890239715576, "eval_qasc_pairs_runtime": 0.6223, "eval_qasc_pairs_samples_per_second": 205.675, "eval_qasc_pairs_steps_per_second": 1.607, "step": 60 }, { "epoch": 0.06172839506172839, "eval_openbookqa_pairs_loss": 5.092636585235596, "eval_openbookqa_pairs_runtime": 0.5896, "eval_openbookqa_pairs_samples_per_second": 217.085, "eval_openbookqa_pairs_steps_per_second": 1.696, "step": 60 }, { "epoch": 0.06172839506172839, "eval_msmarco_pairs_loss": 11.276179313659668, "eval_msmarco_pairs_runtime": 1.5132, "eval_msmarco_pairs_samples_per_second": 84.591, "eval_msmarco_pairs_steps_per_second": 0.661, "step": 60 }, { "epoch": 0.06172839506172839, "eval_nq_pairs_loss": 10.514344215393066, "eval_nq_pairs_runtime": 2.9064, "eval_nq_pairs_samples_per_second": 44.041, "eval_nq_pairs_steps_per_second": 0.344, "step": 60 }, { "epoch": 0.06172839506172839, "eval_trivia_pairs_loss": 7.659719467163086, "eval_trivia_pairs_runtime": 3.436, "eval_trivia_pairs_samples_per_second": 37.253, "eval_trivia_pairs_steps_per_second": 0.291, "step": 60 }, { "epoch": 0.06172839506172839, "eval_gooaq_pairs_loss": 7.905792236328125, "eval_gooaq_pairs_runtime": 0.9586, "eval_gooaq_pairs_samples_per_second": 133.534, "eval_gooaq_pairs_steps_per_second": 1.043, "step": 60 }, { "epoch": 0.06172839506172839, "eval_paws-pos_loss": 1.2882633209228516, "eval_paws-pos_runtime": 0.6775, "eval_paws-pos_samples_per_second": 188.929, "eval_paws-pos_steps_per_second": 1.476, "step": 60 }, { "epoch": 0.06172839506172839, "eval_global_dataset_loss": 5.7225518226623535, "eval_global_dataset_runtime": 13.3827, "eval_global_dataset_samples_per_second": 31.085, "eval_global_dataset_steps_per_second": 0.299, "step": 60 }, { "epoch": 0.06275720164609054, "grad_norm": 18.45367431640625, "learning_rate": 2.107995846313603e-06, "loss": 4.6024, "step": 61 }, { "epoch": 0.06378600823045268, "grad_norm": 17.7939510345459, "learning_rate": 2.1443406022845273e-06, "loss": 4.4378, "step": 62 }, { "epoch": 0.06481481481481481, "grad_norm": 19.061763763427734, "learning_rate": 2.1806853582554513e-06, "loss": 6.4802, "step": 63 }, { "epoch": 0.06584362139917696, "grad_norm": 77.05914306640625, "learning_rate": 2.2170301142263758e-06, "loss": 10.9004, "step": 64 }, { "epoch": 0.0668724279835391, "grad_norm": 20.099227905273438, "learning_rate": 2.2533748701973e-06, "loss": 6.7516, "step": 65 }, { "epoch": 0.06790123456790123, "grad_norm": 53.35956573486328, "learning_rate": 2.289719626168224e-06, "loss": 7.7821, "step": 66 }, { "epoch": 0.06893004115226338, "grad_norm": 23.51174545288086, "learning_rate": 2.3260643821391483e-06, "loss": 6.1714, "step": 67 }, { "epoch": 0.06995884773662552, "grad_norm": 11.979568481445312, "learning_rate": 2.3624091381100727e-06, "loss": 5.3013, "step": 68 }, { "epoch": 0.07098765432098765, "grad_norm": 50.14888381958008, "learning_rate": 2.3987538940809963e-06, "loss": 9.0397, "step": 69 }, { "epoch": 0.0720164609053498, "grad_norm": 52.993473052978516, "learning_rate": 2.4350986500519208e-06, "loss": 9.3361, "step": 70 }, { "epoch": 0.07304526748971193, "grad_norm": 16.7055721282959, "learning_rate": 2.471443406022845e-06, "loss": 5.1927, "step": 71 }, { "epoch": 0.07407407407407407, "grad_norm": 17.894912719726562, "learning_rate": 2.5077881619937692e-06, "loss": 5.6994, "step": 72 }, { "epoch": 0.07510288065843622, "grad_norm": 29.04665184020996, "learning_rate": 2.5441329179646932e-06, "loss": 7.5132, "step": 73 }, { "epoch": 0.07613168724279835, "grad_norm": 14.857793807983398, "learning_rate": 2.5804776739356177e-06, "loss": 5.4796, "step": 74 }, { "epoch": 0.07716049382716049, "grad_norm": 24.775344848632812, "learning_rate": 2.6168224299065417e-06, "loss": 2.7714, "step": 75 }, { "epoch": 0.07818930041152264, "grad_norm": 49.390663146972656, "learning_rate": 2.653167185877466e-06, "loss": 8.9842, "step": 76 }, { "epoch": 0.07921810699588477, "grad_norm": 65.65110778808594, "learning_rate": 2.68951194184839e-06, "loss": 10.1764, "step": 77 }, { "epoch": 0.08024691358024691, "grad_norm": 13.745916366577148, "learning_rate": 2.725856697819314e-06, "loss": 5.0512, "step": 78 }, { "epoch": 0.08127572016460906, "grad_norm": 14.591425895690918, "learning_rate": 2.7622014537902386e-06, "loss": 5.5013, "step": 79 }, { "epoch": 0.0823045267489712, "grad_norm": 14.892078399658203, "learning_rate": 2.798546209761163e-06, "loss": 5.4496, "step": 80 }, { "epoch": 0.0823045267489712, "eval_Qnli-dev_cosine_accuracy": 0.591796875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.9341771602630615, "eval_Qnli-dev_cosine_ap": 0.576947319869376, "eval_Qnli-dev_cosine_f1": 0.6346153846153846, "eval_Qnli-dev_cosine_f1_threshold": 0.8134556412696838, "eval_Qnli-dev_cosine_precision": 0.4695121951219512, "eval_Qnli-dev_cosine_recall": 0.9788135593220338, "eval_Qnli-dev_dot_accuracy": 0.580078125, "eval_Qnli-dev_dot_accuracy_threshold": 388.09979248046875, "eval_Qnli-dev_dot_ap": 0.5032087471570361, "eval_Qnli-dev_dot_f1": 0.6291834002677376, "eval_Qnli-dev_dot_f1_threshold": 230.6592254638672, "eval_Qnli-dev_dot_precision": 0.4598825831702544, "eval_Qnli-dev_dot_recall": 0.9957627118644068, "eval_Qnli-dev_euclidean_accuracy": 0.6015625, "eval_Qnli-dev_euclidean_accuracy_threshold": 7.462021827697754, "eval_Qnli-dev_euclidean_ap": 0.5838970485633856, "eval_Qnli-dev_euclidean_f1": 0.6344827586206897, "eval_Qnli-dev_euclidean_f1_threshold": 12.409799575805664, "eval_Qnli-dev_euclidean_precision": 0.4703476482617587, "eval_Qnli-dev_euclidean_recall": 0.9745762711864406, "eval_Qnli-dev_manhattan_accuracy": 0.62890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 163.1259002685547, "eval_Qnli-dev_manhattan_ap": 0.630845221732911, "eval_Qnli-dev_manhattan_f1": 0.6355421686746988, "eval_Qnli-dev_manhattan_f1_threshold": 187.08981323242188, "eval_Qnli-dev_manhattan_precision": 0.4929906542056075, "eval_Qnli-dev_manhattan_recall": 0.8940677966101694, "eval_Qnli-dev_max_accuracy": 0.62890625, "eval_Qnli-dev_max_accuracy_threshold": 388.09979248046875, "eval_Qnli-dev_max_ap": 0.630845221732911, "eval_Qnli-dev_max_f1": 0.6355421686746988, "eval_Qnli-dev_max_f1_threshold": 230.6592254638672, "eval_Qnli-dev_max_precision": 0.4929906542056075, "eval_Qnli-dev_max_recall": 0.9957627118644068, "eval_allNLI-dev_cosine_accuracy": 0.666015625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.97718346118927, "eval_allNLI-dev_cosine_ap": 0.3756015628227595, "eval_allNLI-dev_cosine_f1": 0.5068702290076336, "eval_allNLI-dev_cosine_f1_threshold": 0.8239856958389282, "eval_allNLI-dev_cosine_precision": 0.34439834024896265, "eval_allNLI-dev_cosine_recall": 0.9595375722543352, "eval_allNLI-dev_dot_accuracy": 0.662109375, "eval_allNLI-dev_dot_accuracy_threshold": 495.46832275390625, "eval_allNLI-dev_dot_ap": 0.332020946884521, "eval_allNLI-dev_dot_f1": 0.5036603221083455, "eval_allNLI-dev_dot_f1_threshold": 312.1241760253906, "eval_allNLI-dev_dot_precision": 0.33725490196078434, "eval_allNLI-dev_dot_recall": 0.9942196531791907, "eval_allNLI-dev_euclidean_accuracy": 0.66796875, "eval_allNLI-dev_euclidean_accuracy_threshold": 4.271183013916016, "eval_allNLI-dev_euclidean_ap": 0.3777199146320434, "eval_allNLI-dev_euclidean_f1": 0.5091463414634146, "eval_allNLI-dev_euclidean_f1_threshold": 12.89515209197998, "eval_allNLI-dev_euclidean_precision": 0.34575569358178054, "eval_allNLI-dev_euclidean_recall": 0.9653179190751445, "eval_allNLI-dev_manhattan_accuracy": 0.666015625, "eval_allNLI-dev_manhattan_accuracy_threshold": 88.01801300048828, "eval_allNLI-dev_manhattan_ap": 0.38882171851359393, "eval_allNLI-dev_manhattan_f1": 0.5068285280728376, "eval_allNLI-dev_manhattan_f1_threshold": 208.61183166503906, "eval_allNLI-dev_manhattan_precision": 0.3436213991769547, "eval_allNLI-dev_manhattan_recall": 0.9653179190751445, "eval_allNLI-dev_max_accuracy": 0.66796875, "eval_allNLI-dev_max_accuracy_threshold": 495.46832275390625, "eval_allNLI-dev_max_ap": 0.38882171851359393, "eval_allNLI-dev_max_f1": 0.5091463414634146, "eval_allNLI-dev_max_f1_threshold": 312.1241760253906, "eval_allNLI-dev_max_precision": 0.34575569358178054, "eval_allNLI-dev_max_recall": 0.9942196531791907, "eval_sequential_score": 0.630845221732911, "eval_sts-test_pearson_cosine": 0.2082090854077526, "eval_sts-test_pearson_dot": 0.31968251773175477, "eval_sts-test_pearson_euclidean": 0.18565762314607082, "eval_sts-test_pearson_manhattan": 0.21750467365326087, "eval_sts-test_pearson_max": 0.31968251773175477, "eval_sts-test_spearman_cosine": 0.2475509554001572, "eval_sts-test_spearman_dot": 0.32583854357070313, "eval_sts-test_spearman_euclidean": 0.20592825469263046, "eval_sts-test_spearman_manhattan": 0.23787152606876585, "eval_sts-test_spearman_max": 0.32583854357070313, "eval_vitaminc-pairs_loss": 2.887739896774292, "eval_vitaminc-pairs_runtime": 3.1934, "eval_vitaminc-pairs_samples_per_second": 40.083, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 80 }, { "epoch": 0.0823045267489712, "eval_negation-triplets_loss": 3.878300666809082, "eval_negation-triplets_runtime": 0.7531, "eval_negation-triplets_samples_per_second": 169.96, "eval_negation-triplets_steps_per_second": 1.328, "step": 80 }, { "epoch": 0.0823045267489712, "eval_scitail-pairs-pos_loss": 1.8523993492126465, "eval_scitail-pairs-pos_runtime": 0.7885, "eval_scitail-pairs-pos_samples_per_second": 162.341, "eval_scitail-pairs-pos_steps_per_second": 1.268, "step": 80 }, { "epoch": 0.0823045267489712, "eval_scitail-pairs-qa_loss": 2.0656681060791016, "eval_scitail-pairs-qa_runtime": 0.5722, "eval_scitail-pairs-qa_samples_per_second": 223.714, "eval_scitail-pairs-qa_steps_per_second": 1.748, "step": 80 }, { "epoch": 0.0823045267489712, "eval_xsum-pairs_loss": 6.511655807495117, "eval_xsum-pairs_runtime": 3.0191, "eval_xsum-pairs_samples_per_second": 42.397, "eval_xsum-pairs_steps_per_second": 0.331, "step": 80 }, { "epoch": 0.0823045267489712, "eval_sciq_pairs_loss": 0.6626698970794678, "eval_sciq_pairs_runtime": 3.445, "eval_sciq_pairs_samples_per_second": 37.156, "eval_sciq_pairs_steps_per_second": 0.29, "step": 80 }, { "epoch": 0.0823045267489712, "eval_qasc_pairs_loss": 3.637084722518921, "eval_qasc_pairs_runtime": 0.6139, "eval_qasc_pairs_samples_per_second": 208.501, "eval_qasc_pairs_steps_per_second": 1.629, "step": 80 }, { "epoch": 0.0823045267489712, "eval_openbookqa_pairs_loss": 4.819972038269043, "eval_openbookqa_pairs_runtime": 0.5735, "eval_openbookqa_pairs_samples_per_second": 223.194, "eval_openbookqa_pairs_steps_per_second": 1.744, "step": 80 }, { "epoch": 0.0823045267489712, "eval_msmarco_pairs_loss": 9.547957420349121, "eval_msmarco_pairs_runtime": 1.5165, "eval_msmarco_pairs_samples_per_second": 84.404, "eval_msmarco_pairs_steps_per_second": 0.659, "step": 80 }, { "epoch": 0.0823045267489712, "eval_nq_pairs_loss": 8.830368995666504, "eval_nq_pairs_runtime": 2.8979, "eval_nq_pairs_samples_per_second": 44.17, "eval_nq_pairs_steps_per_second": 0.345, "step": 80 }, { "epoch": 0.0823045267489712, "eval_trivia_pairs_loss": 7.168319225311279, "eval_trivia_pairs_runtime": 3.4425, "eval_trivia_pairs_samples_per_second": 37.182, "eval_trivia_pairs_steps_per_second": 0.29, "step": 80 }, { "epoch": 0.0823045267489712, "eval_gooaq_pairs_loss": 7.121779918670654, "eval_gooaq_pairs_runtime": 0.9493, "eval_gooaq_pairs_samples_per_second": 134.835, "eval_gooaq_pairs_steps_per_second": 1.053, "step": 80 }, { "epoch": 0.0823045267489712, "eval_paws-pos_loss": 1.2316638231277466, "eval_paws-pos_runtime": 0.6943, "eval_paws-pos_samples_per_second": 184.348, "eval_paws-pos_steps_per_second": 1.44, "step": 80 }, { "epoch": 0.0823045267489712, "eval_global_dataset_loss": 5.2695698738098145, "eval_global_dataset_runtime": 13.3665, "eval_global_dataset_samples_per_second": 31.123, "eval_global_dataset_steps_per_second": 0.299, "step": 80 }, { "epoch": 0.08333333333333333, "grad_norm": 48.79065704345703, "learning_rate": 2.8348909657320867e-06, "loss": 9.1986, "step": 81 }, { "epoch": 0.08436213991769548, "grad_norm": 13.215801239013672, "learning_rate": 2.871235721703011e-06, "loss": 5.5779, "step": 82 }, { "epoch": 0.08539094650205761, "grad_norm": 14.010391235351562, "learning_rate": 2.9075804776739355e-06, "loss": 5.446, "step": 83 }, { "epoch": 0.08641975308641975, "grad_norm": 14.401032447814941, "learning_rate": 2.9439252336448596e-06, "loss": 5.5707, "step": 84 }, { "epoch": 0.0874485596707819, "grad_norm": 11.690423011779785, "learning_rate": 2.9802699896157836e-06, "loss": 5.064, "step": 85 }, { "epoch": 0.08847736625514403, "grad_norm": 14.510086059570312, "learning_rate": 3.016614745586708e-06, "loss": 5.1192, "step": 86 }, { "epoch": 0.08950617283950617, "grad_norm": 11.915549278259277, "learning_rate": 3.052959501557632e-06, "loss": 5.0992, "step": 87 }, { "epoch": 0.09053497942386832, "grad_norm": 11.183893203735352, "learning_rate": 3.0893042575285565e-06, "loss": 5.1639, "step": 88 }, { "epoch": 0.09156378600823045, "grad_norm": 23.76273536682129, "learning_rate": 3.1256490134994805e-06, "loss": 6.4692, "step": 89 }, { "epoch": 0.09259259259259259, "grad_norm": 13.50161075592041, "learning_rate": 3.1619937694704045e-06, "loss": 5.1285, "step": 90 }, { "epoch": 0.09362139917695474, "grad_norm": 25.397741317749023, "learning_rate": 3.198338525441329e-06, "loss": 2.8464, "step": 91 }, { "epoch": 0.09465020576131687, "grad_norm": 12.421465873718262, "learning_rate": 3.2346832814122534e-06, "loss": 4.9592, "step": 92 }, { "epoch": 0.09567901234567901, "grad_norm": 12.573847770690918, "learning_rate": 3.271028037383177e-06, "loss": 5.1014, "step": 93 }, { "epoch": 0.09670781893004116, "grad_norm": 34.48383331298828, "learning_rate": 3.3073727933541015e-06, "loss": 8.0528, "step": 94 }, { "epoch": 0.09773662551440329, "grad_norm": 22.98038673400879, "learning_rate": 3.343717549325026e-06, "loss": 6.5803, "step": 95 }, { "epoch": 0.09876543209876543, "grad_norm": 18.991193771362305, "learning_rate": 3.38006230529595e-06, "loss": 3.9696, "step": 96 }, { "epoch": 0.09979423868312758, "grad_norm": 14.325688362121582, "learning_rate": 3.416407061266874e-06, "loss": 5.2509, "step": 97 }, { "epoch": 0.10082304526748971, "grad_norm": 32.09270095825195, "learning_rate": 3.4527518172377984e-06, "loss": 7.8866, "step": 98 }, { "epoch": 0.10185185185185185, "grad_norm": 28.032167434692383, "learning_rate": 3.4890965732087224e-06, "loss": 2.4669, "step": 99 }, { "epoch": 0.102880658436214, "grad_norm": 19.722026824951172, "learning_rate": 3.525441329179647e-06, "loss": 6.8252, "step": 100 }, { "epoch": 0.102880658436214, "eval_Qnli-dev_cosine_accuracy": 0.60546875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.9359708428382874, "eval_Qnli-dev_cosine_ap": 0.5859495580627428, "eval_Qnli-dev_cosine_f1": 0.6305278174037089, "eval_Qnli-dev_cosine_f1_threshold": 0.8434731960296631, "eval_Qnli-dev_cosine_precision": 0.4752688172043011, "eval_Qnli-dev_cosine_recall": 0.9364406779661016, "eval_Qnli-dev_dot_accuracy": 0.58203125, "eval_Qnli-dev_dot_accuracy_threshold": 392.71923828125, "eval_Qnli-dev_dot_ap": 0.5087577253973941, "eval_Qnli-dev_dot_f1": 0.6291834002677376, "eval_Qnli-dev_dot_f1_threshold": 236.47132873535156, "eval_Qnli-dev_dot_precision": 0.4598825831702544, "eval_Qnli-dev_dot_recall": 0.9957627118644068, "eval_Qnli-dev_euclidean_accuracy": 0.603515625, "eval_Qnli-dev_euclidean_accuracy_threshold": 7.414036750793457, "eval_Qnli-dev_euclidean_ap": 0.59330387039405, "eval_Qnli-dev_euclidean_f1": 0.6291834002677376, "eval_Qnli-dev_euclidean_f1_threshold": 18.49761962890625, "eval_Qnli-dev_euclidean_precision": 0.4598825831702544, "eval_Qnli-dev_euclidean_recall": 0.9957627118644068, "eval_Qnli-dev_manhattan_accuracy": 0.6328125, "eval_Qnli-dev_manhattan_accuracy_threshold": 158.1238555908203, "eval_Qnli-dev_manhattan_ap": 0.636242439203504, "eval_Qnli-dev_manhattan_f1": 0.640746500777605, "eval_Qnli-dev_manhattan_f1_threshold": 185.45480346679688, "eval_Qnli-dev_manhattan_precision": 0.5061425061425061, "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, "eval_Qnli-dev_max_accuracy": 0.6328125, "eval_Qnli-dev_max_accuracy_threshold": 392.71923828125, "eval_Qnli-dev_max_ap": 0.636242439203504, "eval_Qnli-dev_max_f1": 0.640746500777605, "eval_Qnli-dev_max_f1_threshold": 236.47132873535156, "eval_Qnli-dev_max_precision": 0.5061425061425061, "eval_Qnli-dev_max_recall": 0.9957627118644068, "eval_allNLI-dev_cosine_accuracy": 0.671875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9702135324478149, "eval_allNLI-dev_cosine_ap": 0.38635245174664545, "eval_allNLI-dev_cosine_f1": 0.5051698670605613, "eval_allNLI-dev_cosine_f1_threshold": 0.7689170837402344, "eval_allNLI-dev_cosine_precision": 0.3392857142857143, "eval_allNLI-dev_cosine_recall": 0.9884393063583815, "eval_allNLI-dev_dot_accuracy": 0.662109375, "eval_allNLI-dev_dot_accuracy_threshold": 499.2386779785156, "eval_allNLI-dev_dot_ap": 0.33354410040006655, "eval_allNLI-dev_dot_f1": 0.5036603221083455, "eval_allNLI-dev_dot_f1_threshold": 310.7790222167969, "eval_allNLI-dev_dot_precision": 0.33725490196078434, "eval_allNLI-dev_dot_recall": 0.9942196531791907, "eval_allNLI-dev_euclidean_accuracy": 0.669921875, "eval_allNLI-dev_euclidean_accuracy_threshold": 5.469601154327393, "eval_allNLI-dev_euclidean_ap": 0.38522819959781573, "eval_allNLI-dev_euclidean_f1": 0.5059171597633136, "eval_allNLI-dev_euclidean_f1_threshold": 14.400506019592285, "eval_allNLI-dev_euclidean_precision": 0.3399602385685885, "eval_allNLI-dev_euclidean_recall": 0.9884393063583815, "eval_allNLI-dev_manhattan_accuracy": 0.66796875, "eval_allNLI-dev_manhattan_accuracy_threshold": 83.476806640625, "eval_allNLI-dev_manhattan_ap": 0.398882755775317, "eval_allNLI-dev_manhattan_f1": 0.5051395007342143, "eval_allNLI-dev_manhattan_f1_threshold": 256.5009765625, "eval_allNLI-dev_manhattan_precision": 0.33858267716535434, "eval_allNLI-dev_manhattan_recall": 0.9942196531791907, "eval_allNLI-dev_max_accuracy": 0.671875, "eval_allNLI-dev_max_accuracy_threshold": 499.2386779785156, "eval_allNLI-dev_max_ap": 0.398882755775317, "eval_allNLI-dev_max_f1": 0.5059171597633136, "eval_allNLI-dev_max_f1_threshold": 310.7790222167969, "eval_allNLI-dev_max_precision": 0.3399602385685885, "eval_allNLI-dev_max_recall": 0.9942196531791907, "eval_sequential_score": 0.636242439203504, "eval_sts-test_pearson_cosine": 0.233326009931931, "eval_sts-test_pearson_dot": 0.3167806500856212, "eval_sts-test_pearson_euclidean": 0.20945664323942717, "eval_sts-test_pearson_manhattan": 0.23559165515257938, "eval_sts-test_pearson_max": 0.3167806500856212, "eval_sts-test_spearman_cosine": 0.2687911570918344, "eval_sts-test_spearman_dot": 0.32229956906860985, "eval_sts-test_spearman_euclidean": 0.22929892968536797, "eval_sts-test_spearman_manhattan": 0.25574708751351516, "eval_sts-test_spearman_max": 0.32229956906860985, "eval_vitaminc-pairs_loss": 2.8645708560943604, "eval_vitaminc-pairs_runtime": 3.1781, "eval_vitaminc-pairs_samples_per_second": 40.275, "eval_vitaminc-pairs_steps_per_second": 0.315, "step": 100 }, { "epoch": 0.102880658436214, "eval_negation-triplets_loss": 3.715083599090576, "eval_negation-triplets_runtime": 0.7412, "eval_negation-triplets_samples_per_second": 172.701, "eval_negation-triplets_steps_per_second": 1.349, "step": 100 }, { "epoch": 0.102880658436214, "eval_scitail-pairs-pos_loss": 1.6467901468276978, "eval_scitail-pairs-pos_runtime": 0.828, "eval_scitail-pairs-pos_samples_per_second": 154.583, "eval_scitail-pairs-pos_steps_per_second": 1.208, "step": 100 }, { "epoch": 0.102880658436214, "eval_scitail-pairs-qa_loss": 1.8661956787109375, "eval_scitail-pairs-qa_runtime": 0.5663, "eval_scitail-pairs-qa_samples_per_second": 226.026, "eval_scitail-pairs-qa_steps_per_second": 1.766, "step": 100 }, { "epoch": 0.102880658436214, "eval_xsum-pairs_loss": 6.297423839569092, "eval_xsum-pairs_runtime": 3.0214, "eval_xsum-pairs_samples_per_second": 42.364, "eval_xsum-pairs_steps_per_second": 0.331, "step": 100 }, { "epoch": 0.102880658436214, "eval_sciq_pairs_loss": 0.6386430263519287, "eval_sciq_pairs_runtime": 3.404, "eval_sciq_pairs_samples_per_second": 37.603, "eval_sciq_pairs_steps_per_second": 0.294, "step": 100 }, { "epoch": 0.102880658436214, "eval_qasc_pairs_loss": 3.3296892642974854, "eval_qasc_pairs_runtime": 0.5903, "eval_qasc_pairs_samples_per_second": 216.831, "eval_qasc_pairs_steps_per_second": 1.694, "step": 100 }, { "epoch": 0.102880658436214, "eval_openbookqa_pairs_loss": 4.621798038482666, "eval_openbookqa_pairs_runtime": 0.5726, "eval_openbookqa_pairs_samples_per_second": 223.561, "eval_openbookqa_pairs_steps_per_second": 1.747, "step": 100 }, { "epoch": 0.102880658436214, "eval_msmarco_pairs_loss": 8.393180847167969, "eval_msmarco_pairs_runtime": 1.5114, "eval_msmarco_pairs_samples_per_second": 84.687, "eval_msmarco_pairs_steps_per_second": 0.662, "step": 100 }, { "epoch": 0.102880658436214, "eval_nq_pairs_loss": 7.865816116333008, "eval_nq_pairs_runtime": 2.888, "eval_nq_pairs_samples_per_second": 44.321, "eval_nq_pairs_steps_per_second": 0.346, "step": 100 }, { "epoch": 0.102880658436214, "eval_trivia_pairs_loss": 6.768343925476074, "eval_trivia_pairs_runtime": 3.4313, "eval_trivia_pairs_samples_per_second": 37.303, "eval_trivia_pairs_steps_per_second": 0.291, "step": 100 }, { "epoch": 0.102880658436214, "eval_gooaq_pairs_loss": 6.616071701049805, "eval_gooaq_pairs_runtime": 0.9398, "eval_gooaq_pairs_samples_per_second": 136.205, "eval_gooaq_pairs_steps_per_second": 1.064, "step": 100 }, { "epoch": 0.102880658436214, "eval_paws-pos_loss": 1.1048204898834229, "eval_paws-pos_runtime": 0.6813, "eval_paws-pos_samples_per_second": 187.888, "eval_paws-pos_steps_per_second": 1.468, "step": 100 }, { "epoch": 0.102880658436214, "eval_global_dataset_loss": 4.85481595993042, "eval_global_dataset_runtime": 13.3418, "eval_global_dataset_samples_per_second": 31.18, "eval_global_dataset_steps_per_second": 0.3, "step": 100 }, { "epoch": 0.10390946502057613, "grad_norm": 16.656429290771484, "learning_rate": 3.561786085150571e-06, "loss": 5.3891, "step": 101 }, { "epoch": 0.10493827160493827, "grad_norm": 35.718448638916016, "learning_rate": 3.598130841121495e-06, "loss": 8.2981, "step": 102 }, { "epoch": 0.10596707818930041, "grad_norm": 33.725162506103516, "learning_rate": 3.6344755970924193e-06, "loss": 8.0844, "step": 103 }, { "epoch": 0.10699588477366255, "grad_norm": 19.359039306640625, "learning_rate": 3.6708203530633433e-06, "loss": 4.0626, "step": 104 }, { "epoch": 0.10802469135802469, "grad_norm": 14.194345474243164, "learning_rate": 3.7071651090342674e-06, "loss": 4.8851, "step": 105 }, { "epoch": 0.10905349794238683, "grad_norm": 14.311044692993164, "learning_rate": 3.743509865005192e-06, "loss": 5.1174, "step": 106 }, { "epoch": 0.11008230452674897, "grad_norm": 13.10085678100586, "learning_rate": 3.779854620976116e-06, "loss": 4.973, "step": 107 }, { "epoch": 0.1111111111111111, "grad_norm": 14.114293098449707, "learning_rate": 3.81619937694704e-06, "loss": 5.2879, "step": 108 }, { "epoch": 0.11213991769547325, "grad_norm": 13.561037063598633, "learning_rate": 3.852544132917965e-06, "loss": 4.9833, "step": 109 }, { "epoch": 0.11316872427983539, "grad_norm": 14.047689437866211, "learning_rate": 3.888888888888889e-06, "loss": 5.1619, "step": 110 }, { "epoch": 0.11419753086419752, "grad_norm": 37.19677734375, "learning_rate": 3.925233644859813e-06, "loss": 8.2926, "step": 111 }, { "epoch": 0.11522633744855967, "grad_norm": 12.727770805358887, "learning_rate": 3.961578400830737e-06, "loss": 4.7478, "step": 112 }, { "epoch": 0.11625514403292181, "grad_norm": 12.889444351196289, "learning_rate": 3.997923156801661e-06, "loss": 4.7644, "step": 113 }, { "epoch": 0.11728395061728394, "grad_norm": 20.46539878845215, "learning_rate": 4.034267912772586e-06, "loss": 6.379, "step": 114 }, { "epoch": 0.1183127572016461, "grad_norm": 14.000577926635742, "learning_rate": 4.07061266874351e-06, "loss": 4.8567, "step": 115 }, { "epoch": 0.11934156378600823, "grad_norm": 14.417937278747559, "learning_rate": 4.106957424714434e-06, "loss": 4.939, "step": 116 }, { "epoch": 0.12037037037037036, "grad_norm": 36.433433532714844, "learning_rate": 4.143302180685358e-06, "loss": 6.4653, "step": 117 }, { "epoch": 0.12139917695473251, "grad_norm": 15.122117042541504, "learning_rate": 4.179646936656283e-06, "loss": 5.0902, "step": 118 }, { "epoch": 0.12242798353909465, "grad_norm": 15.600722312927246, "learning_rate": 4.215991692627206e-06, "loss": 4.4886, "step": 119 }, { "epoch": 0.12345679012345678, "grad_norm": 18.391870498657227, "learning_rate": 4.252336448598131e-06, "loss": 6.2223, "step": 120 }, { "epoch": 0.12345679012345678, "eval_Qnli-dev_cosine_accuracy": 0.619140625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.9281325340270996, "eval_Qnli-dev_cosine_ap": 0.6014574414783835, "eval_Qnli-dev_cosine_f1": 0.6309341500765697, "eval_Qnli-dev_cosine_f1_threshold": 0.8621190786361694, "eval_Qnli-dev_cosine_precision": 0.4940047961630695, "eval_Qnli-dev_cosine_recall": 0.8728813559322034, "eval_Qnli-dev_dot_accuracy": 0.58984375, "eval_Qnli-dev_dot_accuracy_threshold": 388.7757568359375, "eval_Qnli-dev_dot_ap": 0.5127748615151599, "eval_Qnli-dev_dot_f1": 0.6304044630404463, "eval_Qnli-dev_dot_f1_threshold": 322.849853515625, "eval_Qnli-dev_dot_precision": 0.4698544698544699, "eval_Qnli-dev_dot_recall": 0.9576271186440678, "eval_Qnli-dev_euclidean_accuracy": 0.6171875, "eval_Qnli-dev_euclidean_accuracy_threshold": 8.507330894470215, "eval_Qnli-dev_euclidean_ap": 0.6089590025180598, "eval_Qnli-dev_euclidean_f1": 0.6291834002677376, "eval_Qnli-dev_euclidean_f1_threshold": 18.0284423828125, "eval_Qnli-dev_euclidean_precision": 0.4598825831702544, "eval_Qnli-dev_euclidean_recall": 0.9957627118644068, "eval_Qnli-dev_manhattan_accuracy": 0.642578125, "eval_Qnli-dev_manhattan_accuracy_threshold": 169.31954956054688, "eval_Qnli-dev_manhattan_ap": 0.6439314246828807, "eval_Qnli-dev_manhattan_f1": 0.6509433962264151, "eval_Qnli-dev_manhattan_f1_threshold": 195.28048706054688, "eval_Qnli-dev_manhattan_precision": 0.5175, "eval_Qnli-dev_manhattan_recall": 0.8771186440677966, "eval_Qnli-dev_max_accuracy": 0.642578125, "eval_Qnli-dev_max_accuracy_threshold": 388.7757568359375, "eval_Qnli-dev_max_ap": 0.6439314246828807, "eval_Qnli-dev_max_f1": 0.6509433962264151, "eval_Qnli-dev_max_f1_threshold": 322.849853515625, "eval_Qnli-dev_max_precision": 0.5175, "eval_Qnli-dev_max_recall": 0.9957627118644068, "eval_allNLI-dev_cosine_accuracy": 0.66796875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9790990948677063, "eval_allNLI-dev_cosine_ap": 0.3955241297150008, "eval_allNLI-dev_cosine_f1": 0.5052005943536404, "eval_allNLI-dev_cosine_f1_threshold": 0.7795530557632446, "eval_allNLI-dev_cosine_precision": 0.34, "eval_allNLI-dev_cosine_recall": 0.9826589595375722, "eval_allNLI-dev_dot_accuracy": 0.6640625, "eval_allNLI-dev_dot_accuracy_threshold": 514.1408081054688, "eval_allNLI-dev_dot_ap": 0.3428427300114505, "eval_allNLI-dev_dot_f1": 0.5043988269794721, "eval_allNLI-dev_dot_f1_threshold": 316.1231994628906, "eval_allNLI-dev_dot_precision": 0.3379174852652259, "eval_allNLI-dev_dot_recall": 0.9942196531791907, "eval_allNLI-dev_euclidean_accuracy": 0.669921875, "eval_allNLI-dev_euclidean_accuracy_threshold": 4.674912452697754, "eval_allNLI-dev_euclidean_ap": 0.394931606062899, "eval_allNLI-dev_euclidean_f1": 0.5067873303167421, "eval_allNLI-dev_euclidean_f1_threshold": 13.242253303527832, "eval_allNLI-dev_euclidean_precision": 0.34285714285714286, "eval_allNLI-dev_euclidean_recall": 0.9710982658959537, "eval_allNLI-dev_manhattan_accuracy": 0.669921875, "eval_allNLI-dev_manhattan_accuracy_threshold": 84.45820617675781, "eval_allNLI-dev_manhattan_ap": 0.40742769361596887, "eval_allNLI-dev_manhattan_f1": 0.5058479532163743, "eval_allNLI-dev_manhattan_f1_threshold": 293.953369140625, "eval_allNLI-dev_manhattan_precision": 0.3385518590998043, "eval_allNLI-dev_manhattan_recall": 1.0, "eval_allNLI-dev_max_accuracy": 0.669921875, "eval_allNLI-dev_max_accuracy_threshold": 514.1408081054688, "eval_allNLI-dev_max_ap": 0.40742769361596887, "eval_allNLI-dev_max_f1": 0.5067873303167421, "eval_allNLI-dev_max_f1_threshold": 316.1231994628906, "eval_allNLI-dev_max_precision": 0.34285714285714286, "eval_allNLI-dev_max_recall": 1.0, "eval_sequential_score": 0.6439314246828807, "eval_sts-test_pearson_cosine": 0.25252985635600256, "eval_sts-test_pearson_dot": 0.3099351189652281, "eval_sts-test_pearson_euclidean": 0.23142843084411574, "eval_sts-test_pearson_manhattan": 0.2502258002878053, "eval_sts-test_pearson_max": 0.3099351189652281, "eval_sts-test_spearman_cosine": 0.28591643554731094, "eval_sts-test_spearman_dot": 0.3177811684597045, "eval_sts-test_spearman_euclidean": 0.24943896636699894, "eval_sts-test_spearman_manhattan": 0.2700833945157724, "eval_sts-test_spearman_max": 0.3177811684597045, "eval_vitaminc-pairs_loss": 2.8456013202667236, "eval_vitaminc-pairs_runtime": 3.1683, "eval_vitaminc-pairs_samples_per_second": 40.4, "eval_vitaminc-pairs_steps_per_second": 0.316, "step": 120 }, { "epoch": 0.12345679012345678, "eval_negation-triplets_loss": 3.5346930027008057, "eval_negation-triplets_runtime": 0.7273, "eval_negation-triplets_samples_per_second": 175.983, "eval_negation-triplets_steps_per_second": 1.375, "step": 120 }, { "epoch": 0.12345679012345678, "eval_scitail-pairs-pos_loss": 1.3952267169952393, "eval_scitail-pairs-pos_runtime": 0.7901, "eval_scitail-pairs-pos_samples_per_second": 162.002, "eval_scitail-pairs-pos_steps_per_second": 1.266, "step": 120 }, { "epoch": 0.12345679012345678, "eval_scitail-pairs-qa_loss": 1.6222929954528809, "eval_scitail-pairs-qa_runtime": 0.5623, "eval_scitail-pairs-qa_samples_per_second": 227.629, "eval_scitail-pairs-qa_steps_per_second": 1.778, "step": 120 }, { "epoch": 0.12345679012345678, "eval_xsum-pairs_loss": 6.103888988494873, "eval_xsum-pairs_runtime": 3.0165, "eval_xsum-pairs_samples_per_second": 42.433, "eval_xsum-pairs_steps_per_second": 0.332, "step": 120 }, { "epoch": 0.12345679012345678, "eval_sciq_pairs_loss": 0.6113746762275696, "eval_sciq_pairs_runtime": 3.3757, "eval_sciq_pairs_samples_per_second": 37.918, "eval_sciq_pairs_steps_per_second": 0.296, "step": 120 }, { "epoch": 0.12345679012345678, "eval_qasc_pairs_loss": 3.058934450149536, "eval_qasc_pairs_runtime": 0.59, "eval_qasc_pairs_samples_per_second": 216.943, "eval_qasc_pairs_steps_per_second": 1.695, "step": 120 }, { "epoch": 0.12345679012345678, "eval_openbookqa_pairs_loss": 4.499716758728027, "eval_openbookqa_pairs_runtime": 0.5708, "eval_openbookqa_pairs_samples_per_second": 224.263, "eval_openbookqa_pairs_steps_per_second": 1.752, "step": 120 }, { "epoch": 0.12345679012345678, "eval_msmarco_pairs_loss": 7.5348734855651855, "eval_msmarco_pairs_runtime": 1.514, "eval_msmarco_pairs_samples_per_second": 84.546, "eval_msmarco_pairs_steps_per_second": 0.661, "step": 120 }, { "epoch": 0.12345679012345678, "eval_nq_pairs_loss": 7.202226638793945, "eval_nq_pairs_runtime": 2.8915, "eval_nq_pairs_samples_per_second": 44.268, "eval_nq_pairs_steps_per_second": 0.346, "step": 120 }, { "epoch": 0.12345679012345678, "eval_trivia_pairs_loss": 6.395583152770996, "eval_trivia_pairs_runtime": 3.4281, "eval_trivia_pairs_samples_per_second": 37.338, "eval_trivia_pairs_steps_per_second": 0.292, "step": 120 }, { "epoch": 0.12345679012345678, "eval_gooaq_pairs_loss": 6.247038841247559, "eval_gooaq_pairs_runtime": 0.9411, "eval_gooaq_pairs_samples_per_second": 136.018, "eval_gooaq_pairs_steps_per_second": 1.063, "step": 120 }, { "epoch": 0.12345679012345678, "eval_paws-pos_loss": 0.8818368911743164, "eval_paws-pos_runtime": 0.7042, "eval_paws-pos_samples_per_second": 181.755, "eval_paws-pos_steps_per_second": 1.42, "step": 120 }, { "epoch": 0.12345679012345678, "eval_global_dataset_loss": 4.362409591674805, "eval_global_dataset_runtime": 13.3877, "eval_global_dataset_samples_per_second": 31.073, "eval_global_dataset_steps_per_second": 0.299, "step": 120 }, { "epoch": 0.12448559670781893, "grad_norm": 14.784011840820312, "learning_rate": 4.288681204569055e-06, "loss": 4.5958, "step": 121 }, { "epoch": 0.12551440329218108, "grad_norm": 16.07524871826172, "learning_rate": 4.325025960539979e-06, "loss": 6.2355, "step": 122 }, { "epoch": 0.12654320987654322, "grad_norm": 25.21320915222168, "learning_rate": 4.361370716510903e-06, "loss": 6.0763, "step": 123 }, { "epoch": 0.12757201646090535, "grad_norm": 13.882258415222168, "learning_rate": 4.3977154724818276e-06, "loss": 4.719, "step": 124 }, { "epoch": 0.1286008230452675, "grad_norm": 25.57428741455078, "learning_rate": 4.4340602284527516e-06, "loss": 6.0796, "step": 125 }, { "epoch": 0.12962962962962962, "grad_norm": 14.860637664794922, "learning_rate": 4.470404984423675e-06, "loss": 4.284, "step": 126 }, { "epoch": 0.13065843621399176, "grad_norm": 14.258697509765625, "learning_rate": 4.5067497403946e-06, "loss": 4.4948, "step": 127 }, { "epoch": 0.13168724279835392, "grad_norm": 12.680214881896973, "learning_rate": 4.543094496365524e-06, "loss": 5.5456, "step": 128 }, { "epoch": 0.13271604938271606, "grad_norm": 18.65047836303711, "learning_rate": 4.579439252336448e-06, "loss": 5.8322, "step": 129 }, { "epoch": 0.1337448559670782, "grad_norm": 14.29658031463623, "learning_rate": 4.6157840083073725e-06, "loss": 4.0772, "step": 130 }, { "epoch": 0.13477366255144033, "grad_norm": 14.54943561553955, "learning_rate": 4.6521287642782965e-06, "loss": 4.3008, "step": 131 }, { "epoch": 0.13580246913580246, "grad_norm": 25.34575080871582, "learning_rate": 4.6884735202492206e-06, "loss": 6.878, "step": 132 }, { "epoch": 0.1368312757201646, "grad_norm": 20.41341781616211, "learning_rate": 4.724818276220145e-06, "loss": 6.1897, "step": 133 }, { "epoch": 0.13786008230452676, "grad_norm": 12.791062355041504, "learning_rate": 4.7611630321910694e-06, "loss": 4.142, "step": 134 }, { "epoch": 0.1388888888888889, "grad_norm": 32.09108352661133, "learning_rate": 4.797507788161993e-06, "loss": 1.7782, "step": 135 }, { "epoch": 0.13991769547325103, "grad_norm": 15.483809471130371, "learning_rate": 4.8338525441329175e-06, "loss": 3.9578, "step": 136 }, { "epoch": 0.14094650205761317, "grad_norm": 17.372329711914062, "learning_rate": 4.8701973001038415e-06, "loss": 5.8774, "step": 137 }, { "epoch": 0.1419753086419753, "grad_norm": 31.082347869873047, "learning_rate": 4.9065420560747655e-06, "loss": 1.6068, "step": 138 }, { "epoch": 0.14300411522633744, "grad_norm": 13.522706985473633, "learning_rate": 4.94288681204569e-06, "loss": 4.164, "step": 139 }, { "epoch": 0.1440329218106996, "grad_norm": 12.907632827758789, "learning_rate": 4.979231568016614e-06, "loss": 3.8015, "step": 140 }, { "epoch": 0.1440329218106996, "eval_Qnli-dev_cosine_accuracy": 0.6328125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.9141117334365845, "eval_Qnli-dev_cosine_ap": 0.6198480681016185, "eval_Qnli-dev_cosine_f1": 0.6397608370702541, "eval_Qnli-dev_cosine_f1_threshold": 0.8335354328155518, "eval_Qnli-dev_cosine_precision": 0.4942263279445728, "eval_Qnli-dev_cosine_recall": 0.9067796610169492, "eval_Qnli-dev_dot_accuracy": 0.599609375, "eval_Qnli-dev_dot_accuracy_threshold": 405.96319580078125, "eval_Qnli-dev_dot_ap": 0.5257457885237911, "eval_Qnli-dev_dot_f1": 0.6291834002677376, "eval_Qnli-dev_dot_f1_threshold": 256.84857177734375, "eval_Qnli-dev_dot_precision": 0.4598825831702544, "eval_Qnli-dev_dot_recall": 0.9957627118644068, "eval_Qnli-dev_euclidean_accuracy": 0.63671875, "eval_Qnli-dev_euclidean_accuracy_threshold": 9.261069297790527, "eval_Qnli-dev_euclidean_ap": 0.6306496803563475, "eval_Qnli-dev_euclidean_f1": 0.636094674556213, "eval_Qnli-dev_euclidean_f1_threshold": 12.323160171508789, "eval_Qnli-dev_euclidean_precision": 0.48863636363636365, "eval_Qnli-dev_euclidean_recall": 0.9110169491525424, "eval_Qnli-dev_manhattan_accuracy": 0.646484375, "eval_Qnli-dev_manhattan_accuracy_threshold": 171.06039428710938, "eval_Qnli-dev_manhattan_ap": 0.6564076451753581, "eval_Qnli-dev_manhattan_f1": 0.6487341772151899, "eval_Qnli-dev_manhattan_f1_threshold": 217.7759552001953, "eval_Qnli-dev_manhattan_precision": 0.5176767676767676, "eval_Qnli-dev_manhattan_recall": 0.8686440677966102, "eval_Qnli-dev_max_accuracy": 0.646484375, "eval_Qnli-dev_max_accuracy_threshold": 405.96319580078125, "eval_Qnli-dev_max_ap": 0.6564076451753581, "eval_Qnli-dev_max_f1": 0.6487341772151899, "eval_Qnli-dev_max_f1_threshold": 256.84857177734375, "eval_Qnli-dev_max_precision": 0.5176767676767676, "eval_Qnli-dev_max_recall": 0.9957627118644068, "eval_allNLI-dev_cosine_accuracy": 0.66796875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9803125262260437, "eval_allNLI-dev_cosine_ap": 0.40304954675643245, "eval_allNLI-dev_cosine_f1": 0.5073313782991202, "eval_allNLI-dev_cosine_f1_threshold": 0.7168662548065186, "eval_allNLI-dev_cosine_precision": 0.33988212180746563, "eval_allNLI-dev_cosine_recall": 1.0, "eval_allNLI-dev_dot_accuracy": 0.662109375, "eval_allNLI-dev_dot_accuracy_threshold": 522.0433959960938, "eval_allNLI-dev_dot_ap": 0.3516359548665584, "eval_allNLI-dev_dot_f1": 0.5065885797950219, "eval_allNLI-dev_dot_f1_threshold": 309.74951171875, "eval_allNLI-dev_dot_precision": 0.3392156862745098, "eval_allNLI-dev_dot_recall": 1.0, "eval_allNLI-dev_euclidean_accuracy": 0.669921875, "eval_allNLI-dev_euclidean_accuracy_threshold": 6.128700256347656, "eval_allNLI-dev_euclidean_ap": 0.4017412298983858, "eval_allNLI-dev_euclidean_f1": 0.5088235294117648, "eval_allNLI-dev_euclidean_f1_threshold": 16.340839385986328, "eval_allNLI-dev_euclidean_precision": 0.34122287968441817, "eval_allNLI-dev_euclidean_recall": 1.0, "eval_allNLI-dev_manhattan_accuracy": 0.66796875, "eval_allNLI-dev_manhattan_accuracy_threshold": 83.17814636230469, "eval_allNLI-dev_manhattan_ap": 0.41319055063996046, "eval_allNLI-dev_manhattan_f1": 0.5186567164179104, "eval_allNLI-dev_manhattan_f1_threshold": 201.40753173828125, "eval_allNLI-dev_manhattan_precision": 0.38292011019283745, "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, "eval_allNLI-dev_max_accuracy": 0.669921875, "eval_allNLI-dev_max_accuracy_threshold": 522.0433959960938, "eval_allNLI-dev_max_ap": 0.41319055063996046, "eval_allNLI-dev_max_f1": 0.5186567164179104, "eval_allNLI-dev_max_f1_threshold": 309.74951171875, "eval_allNLI-dev_max_precision": 0.38292011019283745, "eval_allNLI-dev_max_recall": 1.0, "eval_sequential_score": 0.6564076451753581, "eval_sts-test_pearson_cosine": 0.2550498328876235, "eval_sts-test_pearson_dot": 0.29981841169421564, "eval_sts-test_pearson_euclidean": 0.24239763397446795, "eval_sts-test_pearson_manhattan": 0.25344327521082516, "eval_sts-test_pearson_max": 0.29981841169421564, "eval_sts-test_spearman_cosine": 0.29254387360307027, "eval_sts-test_spearman_dot": 0.3123169499412918, "eval_sts-test_spearman_euclidean": 0.26282456091304185, "eval_sts-test_spearman_manhattan": 0.27282288773310837, "eval_sts-test_spearman_max": 0.3123169499412918, "eval_vitaminc-pairs_loss": 2.815100908279419, "eval_vitaminc-pairs_runtime": 3.1739, "eval_vitaminc-pairs_samples_per_second": 40.329, "eval_vitaminc-pairs_steps_per_second": 0.315, "step": 140 }, { "epoch": 0.1440329218106996, "eval_negation-triplets_loss": 3.359119176864624, "eval_negation-triplets_runtime": 0.7309, "eval_negation-triplets_samples_per_second": 175.129, "eval_negation-triplets_steps_per_second": 1.368, "step": 140 }, { "epoch": 0.1440329218106996, "eval_scitail-pairs-pos_loss": 1.217439889907837, "eval_scitail-pairs-pos_runtime": 0.8032, "eval_scitail-pairs-pos_samples_per_second": 159.359, "eval_scitail-pairs-pos_steps_per_second": 1.245, "step": 140 }, { "epoch": 0.1440329218106996, "eval_scitail-pairs-qa_loss": 1.2241069078445435, "eval_scitail-pairs-qa_runtime": 0.5639, "eval_scitail-pairs-qa_samples_per_second": 226.986, "eval_scitail-pairs-qa_steps_per_second": 1.773, "step": 140 }, { "epoch": 0.1440329218106996, "eval_xsum-pairs_loss": 5.798659801483154, "eval_xsum-pairs_runtime": 3.0162, "eval_xsum-pairs_samples_per_second": 42.437, "eval_xsum-pairs_steps_per_second": 0.332, "step": 140 }, { "epoch": 0.1440329218106996, "eval_sciq_pairs_loss": 0.5781325697898865, "eval_sciq_pairs_runtime": 3.3872, "eval_sciq_pairs_samples_per_second": 37.789, "eval_sciq_pairs_steps_per_second": 0.295, "step": 140 }, { "epoch": 0.1440329218106996, "eval_qasc_pairs_loss": 2.687833070755005, "eval_qasc_pairs_runtime": 0.5882, "eval_qasc_pairs_samples_per_second": 217.599, "eval_qasc_pairs_steps_per_second": 1.7, "step": 140 }, { "epoch": 0.1440329218106996, "eval_openbookqa_pairs_loss": 4.54829216003418, "eval_openbookqa_pairs_runtime": 0.5719, "eval_openbookqa_pairs_samples_per_second": 223.814, "eval_openbookqa_pairs_steps_per_second": 1.749, "step": 140 }, { "epoch": 0.1440329218106996, "eval_msmarco_pairs_loss": 6.890722274780273, "eval_msmarco_pairs_runtime": 1.5121, "eval_msmarco_pairs_samples_per_second": 84.65, "eval_msmarco_pairs_steps_per_second": 0.661, "step": 140 }, { "epoch": 0.1440329218106996, "eval_nq_pairs_loss": 6.53361701965332, "eval_nq_pairs_runtime": 2.8908, "eval_nq_pairs_samples_per_second": 44.278, "eval_nq_pairs_steps_per_second": 0.346, "step": 140 }, { "epoch": 0.1440329218106996, "eval_trivia_pairs_loss": 6.02173376083374, "eval_trivia_pairs_runtime": 3.4272, "eval_trivia_pairs_samples_per_second": 37.348, "eval_trivia_pairs_steps_per_second": 0.292, "step": 140 }, { "epoch": 0.1440329218106996, "eval_gooaq_pairs_loss": 5.6636834144592285, "eval_gooaq_pairs_runtime": 0.9412, "eval_gooaq_pairs_samples_per_second": 135.991, "eval_gooaq_pairs_steps_per_second": 1.062, "step": 140 }, { "epoch": 0.1440329218106996, "eval_paws-pos_loss": 0.4957215487957001, "eval_paws-pos_runtime": 0.6753, "eval_paws-pos_samples_per_second": 189.545, "eval_paws-pos_steps_per_second": 1.481, "step": 140 }, { "epoch": 0.1440329218106996, "eval_global_dataset_loss": 3.7551324367523193, "eval_global_dataset_runtime": 13.3267, "eval_global_dataset_samples_per_second": 31.215, "eval_global_dataset_steps_per_second": 0.3, "step": 140 }, { "epoch": 0.14506172839506173, "grad_norm": 15.316786766052246, "learning_rate": 5.0155763239875384e-06, "loss": 4.1039, "step": 141 }, { "epoch": 0.14609053497942387, "grad_norm": 40.62092971801758, "learning_rate": 5.051921079958463e-06, "loss": 6.598, "step": 142 }, { "epoch": 0.147119341563786, "grad_norm": 15.075995445251465, "learning_rate": 5.0882658359293865e-06, "loss": 3.8367, "step": 143 }, { "epoch": 0.14814814814814814, "grad_norm": 35.782997131347656, "learning_rate": 5.1246105919003105e-06, "loss": 6.798, "step": 144 }, { "epoch": 0.14917695473251028, "grad_norm": 18.818031311035156, "learning_rate": 5.160955347871235e-06, "loss": 5.9084, "step": 145 }, { "epoch": 0.15020576131687244, "grad_norm": 13.719802856445312, "learning_rate": 5.197300103842159e-06, "loss": 3.8655, "step": 146 }, { "epoch": 0.15123456790123457, "grad_norm": 14.322517395019531, "learning_rate": 5.233644859813083e-06, "loss": 3.493, "step": 147 }, { "epoch": 0.1522633744855967, "grad_norm": 13.363450050354004, "learning_rate": 5.269989615784008e-06, "loss": 3.4505, "step": 148 }, { "epoch": 0.15329218106995884, "grad_norm": 15.056668281555176, "learning_rate": 5.306334371754932e-06, "loss": 3.9404, "step": 149 }, { "epoch": 0.15432098765432098, "grad_norm": 22.7237491607666, "learning_rate": 5.3426791277258555e-06, "loss": 0.977, "step": 150 }, { "epoch": 0.15534979423868311, "grad_norm": 12.547760009765625, "learning_rate": 5.37902388369678e-06, "loss": 3.5493, "step": 151 }, { "epoch": 0.15637860082304528, "grad_norm": 13.1975679397583, "learning_rate": 5.415368639667704e-06, "loss": 3.2979, "step": 152 }, { "epoch": 0.1574074074074074, "grad_norm": 13.909899711608887, "learning_rate": 5.451713395638628e-06, "loss": 3.375, "step": 153 }, { "epoch": 0.15843621399176955, "grad_norm": 28.727937698364258, "learning_rate": 5.488058151609553e-06, "loss": 6.1056, "step": 154 }, { "epoch": 0.15946502057613168, "grad_norm": 14.287629127502441, "learning_rate": 5.524402907580477e-06, "loss": 3.4521, "step": 155 }, { "epoch": 0.16049382716049382, "grad_norm": 13.95241641998291, "learning_rate": 5.560747663551401e-06, "loss": 3.3153, "step": 156 }, { "epoch": 0.16152263374485595, "grad_norm": 16.688383102416992, "learning_rate": 5.597092419522326e-06, "loss": 3.4398, "step": 157 }, { "epoch": 0.16255144032921812, "grad_norm": 14.100769996643066, "learning_rate": 5.63343717549325e-06, "loss": 3.393, "step": 158 }, { "epoch": 0.16358024691358025, "grad_norm": 36.206336975097656, "learning_rate": 5.669781931464173e-06, "loss": 6.2424, "step": 159 }, { "epoch": 0.1646090534979424, "grad_norm": 25.714920043945312, "learning_rate": 5.706126687435098e-06, "loss": 5.7107, "step": 160 }, { "epoch": 0.1646090534979424, "eval_Qnli-dev_cosine_accuracy": 0.650390625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.881838858127594, "eval_Qnli-dev_cosine_ap": 0.6388216177805902, "eval_Qnli-dev_cosine_f1": 0.6594090202177294, "eval_Qnli-dev_cosine_f1_threshold": 0.7928681969642639, "eval_Qnli-dev_cosine_precision": 0.5208845208845209, "eval_Qnli-dev_cosine_recall": 0.8983050847457628, "eval_Qnli-dev_dot_accuracy": 0.599609375, "eval_Qnli-dev_dot_accuracy_threshold": 383.6549072265625, "eval_Qnli-dev_dot_ap": 0.530471167859375, "eval_Qnli-dev_dot_f1": 0.6400000000000001, "eval_Qnli-dev_dot_f1_threshold": 328.07598876953125, "eval_Qnli-dev_dot_precision": 0.5024154589371981, "eval_Qnli-dev_dot_recall": 0.8813559322033898, "eval_Qnli-dev_euclidean_accuracy": 0.6640625, "eval_Qnli-dev_euclidean_accuracy_threshold": 10.708932876586914, "eval_Qnli-dev_euclidean_ap": 0.6617842095463597, "eval_Qnli-dev_euclidean_f1": 0.65086887835703, "eval_Qnli-dev_euclidean_f1_threshold": 13.247828483581543, "eval_Qnli-dev_euclidean_precision": 0.5188916876574308, "eval_Qnli-dev_euclidean_recall": 0.8728813559322034, "eval_Qnli-dev_manhattan_accuracy": 0.671875, "eval_Qnli-dev_manhattan_accuracy_threshold": 203.75650024414062, "eval_Qnli-dev_manhattan_ap": 0.6718309029852861, "eval_Qnli-dev_manhattan_f1": 0.6581875993640699, "eval_Qnli-dev_manhattan_f1_threshold": 251.2660675048828, "eval_Qnli-dev_manhattan_precision": 0.5267175572519084, "eval_Qnli-dev_manhattan_recall": 0.8771186440677966, "eval_Qnli-dev_max_accuracy": 0.671875, "eval_Qnli-dev_max_accuracy_threshold": 383.6549072265625, "eval_Qnli-dev_max_ap": 0.6718309029852861, "eval_Qnli-dev_max_f1": 0.6594090202177294, "eval_Qnli-dev_max_f1_threshold": 328.07598876953125, "eval_Qnli-dev_max_precision": 0.5267175572519084, "eval_Qnli-dev_max_recall": 0.8983050847457628, "eval_allNLI-dev_cosine_accuracy": 0.669921875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.97170090675354, "eval_allNLI-dev_cosine_ap": 0.42392325835068695, "eval_allNLI-dev_cosine_f1": 0.532319391634981, "eval_allNLI-dev_cosine_f1_threshold": 0.8698199987411499, "eval_allNLI-dev_cosine_precision": 0.39660056657223797, "eval_allNLI-dev_cosine_recall": 0.8092485549132948, "eval_allNLI-dev_dot_accuracy": 0.666015625, "eval_allNLI-dev_dot_accuracy_threshold": 489.699951171875, "eval_allNLI-dev_dot_ap": 0.37488661345316393, "eval_allNLI-dev_dot_f1": 0.5079365079365079, "eval_allNLI-dev_dot_f1_threshold": 370.46728515625, "eval_allNLI-dev_dot_precision": 0.350109409190372, "eval_allNLI-dev_dot_recall": 0.9248554913294798, "eval_allNLI-dev_euclidean_accuracy": 0.669921875, "eval_allNLI-dev_euclidean_accuracy_threshold": 5.109055995941162, "eval_allNLI-dev_euclidean_ap": 0.42414702832207185, "eval_allNLI-dev_euclidean_f1": 0.5325670498084292, "eval_allNLI-dev_euclidean_f1_threshold": 11.284603118896484, "eval_allNLI-dev_euclidean_precision": 0.3982808022922636, "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, "eval_allNLI-dev_manhattan_accuracy": 0.671875, "eval_allNLI-dev_manhattan_accuracy_threshold": 118.02589416503906, "eval_allNLI-dev_manhattan_ap": 0.42975205717472725, "eval_allNLI-dev_manhattan_f1": 0.5330948121645797, "eval_allNLI-dev_manhattan_f1_threshold": 225.3105010986328, "eval_allNLI-dev_manhattan_precision": 0.3860103626943005, "eval_allNLI-dev_manhattan_recall": 0.861271676300578, "eval_allNLI-dev_max_accuracy": 0.671875, "eval_allNLI-dev_max_accuracy_threshold": 489.699951171875, "eval_allNLI-dev_max_ap": 0.42975205717472725, "eval_allNLI-dev_max_f1": 0.5330948121645797, "eval_allNLI-dev_max_f1_threshold": 370.46728515625, "eval_allNLI-dev_max_precision": 0.3982808022922636, "eval_allNLI-dev_max_recall": 0.9248554913294798, "eval_sequential_score": 0.6718309029852861, "eval_sts-test_pearson_cosine": 0.2593214673103316, "eval_sts-test_pearson_dot": 0.2856451479091534, "eval_sts-test_pearson_euclidean": 0.2633545673906765, "eval_sts-test_pearson_manhattan": 0.27094196150257477, "eval_sts-test_pearson_max": 0.2856451479091534, "eval_sts-test_spearman_cosine": 0.3239060946012997, "eval_sts-test_spearman_dot": 0.3020791143702586, "eval_sts-test_spearman_euclidean": 0.29537649419536166, "eval_sts-test_spearman_manhattan": 0.30477367732115745, "eval_sts-test_spearman_max": 0.3239060946012997, "eval_vitaminc-pairs_loss": 2.7820005416870117, "eval_vitaminc-pairs_runtime": 3.1613, "eval_vitaminc-pairs_samples_per_second": 40.489, "eval_vitaminc-pairs_steps_per_second": 0.316, "step": 160 }, { "epoch": 0.1646090534979424, "eval_negation-triplets_loss": 3.2458996772766113, "eval_negation-triplets_runtime": 0.7284, "eval_negation-triplets_samples_per_second": 175.738, "eval_negation-triplets_steps_per_second": 1.373, "step": 160 }, { "epoch": 0.1646090534979424, "eval_scitail-pairs-pos_loss": 0.9951260685920715, "eval_scitail-pairs-pos_runtime": 0.7927, "eval_scitail-pairs-pos_samples_per_second": 161.465, "eval_scitail-pairs-pos_steps_per_second": 1.261, "step": 160 }, { "epoch": 0.1646090534979424, "eval_scitail-pairs-qa_loss": 0.8330278396606445, "eval_scitail-pairs-qa_runtime": 0.5647, "eval_scitail-pairs-qa_samples_per_second": 226.68, "eval_scitail-pairs-qa_steps_per_second": 1.771, "step": 160 }, { "epoch": 0.1646090534979424, "eval_xsum-pairs_loss": 5.1889238357543945, "eval_xsum-pairs_runtime": 3.014, "eval_xsum-pairs_samples_per_second": 42.469, "eval_xsum-pairs_steps_per_second": 0.332, "step": 160 }, { "epoch": 0.1646090534979424, "eval_sciq_pairs_loss": 0.5267525315284729, "eval_sciq_pairs_runtime": 3.3969, "eval_sciq_pairs_samples_per_second": 37.682, "eval_sciq_pairs_steps_per_second": 0.294, "step": 160 }, { "epoch": 0.1646090534979424, "eval_qasc_pairs_loss": 2.3833937644958496, "eval_qasc_pairs_runtime": 0.5986, "eval_qasc_pairs_samples_per_second": 213.816, "eval_qasc_pairs_steps_per_second": 1.67, "step": 160 }, { "epoch": 0.1646090534979424, "eval_openbookqa_pairs_loss": 4.070493221282959, "eval_openbookqa_pairs_runtime": 0.5688, "eval_openbookqa_pairs_samples_per_second": 225.041, "eval_openbookqa_pairs_steps_per_second": 1.758, "step": 160 }, { "epoch": 0.1646090534979424, "eval_msmarco_pairs_loss": 5.934054374694824, "eval_msmarco_pairs_runtime": 1.5142, "eval_msmarco_pairs_samples_per_second": 84.535, "eval_msmarco_pairs_steps_per_second": 0.66, "step": 160 }, { "epoch": 0.1646090534979424, "eval_nq_pairs_loss": 5.961860656738281, "eval_nq_pairs_runtime": 2.8919, "eval_nq_pairs_samples_per_second": 44.261, "eval_nq_pairs_steps_per_second": 0.346, "step": 160 }, { "epoch": 0.1646090534979424, "eval_trivia_pairs_loss": 5.741409778594971, "eval_trivia_pairs_runtime": 3.4249, "eval_trivia_pairs_samples_per_second": 37.373, "eval_trivia_pairs_steps_per_second": 0.292, "step": 160 }, { "epoch": 0.1646090534979424, "eval_gooaq_pairs_loss": 4.762550354003906, "eval_gooaq_pairs_runtime": 0.9413, "eval_gooaq_pairs_samples_per_second": 135.983, "eval_gooaq_pairs_steps_per_second": 1.062, "step": 160 }, { "epoch": 0.1646090534979424, "eval_paws-pos_loss": 0.17168374359607697, "eval_paws-pos_runtime": 0.6912, "eval_paws-pos_samples_per_second": 185.177, "eval_paws-pos_steps_per_second": 1.447, "step": 160 }, { "epoch": 0.1646090534979424, "eval_global_dataset_loss": 3.0964090824127197, "eval_global_dataset_runtime": 13.333, "eval_global_dataset_samples_per_second": 31.201, "eval_global_dataset_steps_per_second": 0.3, "step": 160 }, { "epoch": 0.16563786008230452, "grad_norm": 16.593652725219727, "learning_rate": 5.742471443406022e-06, "loss": 4.6423, "step": 161 }, { "epoch": 0.16666666666666666, "grad_norm": 26.777013778686523, "learning_rate": 5.778816199376946e-06, "loss": 5.7346, "step": 162 }, { "epoch": 0.16769547325102882, "grad_norm": 15.474895477294922, "learning_rate": 5.815160955347871e-06, "loss": 3.3701, "step": 163 }, { "epoch": 0.16872427983539096, "grad_norm": 17.254573822021484, "learning_rate": 5.851505711318795e-06, "loss": 3.2528, "step": 164 }, { "epoch": 0.1697530864197531, "grad_norm": 17.283357620239258, "learning_rate": 5.887850467289719e-06, "loss": 4.5692, "step": 165 }, { "epoch": 0.17078189300411523, "grad_norm": 18.08893585205078, "learning_rate": 5.924195223260644e-06, "loss": 3.4966, "step": 166 }, { "epoch": 0.17181069958847736, "grad_norm": 17.073596954345703, "learning_rate": 5.960539979231567e-06, "loss": 4.5332, "step": 167 }, { "epoch": 0.1728395061728395, "grad_norm": 14.176384925842285, "learning_rate": 5.996884735202491e-06, "loss": 3.7013, "step": 168 }, { "epoch": 0.17386831275720166, "grad_norm": 21.731842041015625, "learning_rate": 6.033229491173416e-06, "loss": 5.3538, "step": 169 }, { "epoch": 0.1748971193415638, "grad_norm": 13.646337509155273, "learning_rate": 6.06957424714434e-06, "loss": 2.8866, "step": 170 }, { "epoch": 0.17592592592592593, "grad_norm": 17.945281982421875, "learning_rate": 6.105919003115264e-06, "loss": 2.9547, "step": 171 }, { "epoch": 0.17695473251028807, "grad_norm": 22.94412612915039, "learning_rate": 6.142263759086189e-06, "loss": 5.2232, "step": 172 }, { "epoch": 0.1779835390946502, "grad_norm": 33.40188980102539, "learning_rate": 6.178608515057113e-06, "loss": 5.6116, "step": 173 }, { "epoch": 0.17901234567901234, "grad_norm": 13.978586196899414, "learning_rate": 6.214953271028036e-06, "loss": 2.8644, "step": 174 }, { "epoch": 0.1800411522633745, "grad_norm": 22.077098846435547, "learning_rate": 6.251298026998961e-06, "loss": 1.9015, "step": 175 }, { "epoch": 0.18106995884773663, "grad_norm": 20.8638858795166, "learning_rate": 6.287642782969885e-06, "loss": 4.6564, "step": 176 }, { "epoch": 0.18209876543209877, "grad_norm": 14.965616226196289, "learning_rate": 6.323987538940809e-06, "loss": 3.0869, "step": 177 }, { "epoch": 0.1831275720164609, "grad_norm": 20.76239013671875, "learning_rate": 6.360332294911734e-06, "loss": 1.7242, "step": 178 }, { "epoch": 0.18415637860082304, "grad_norm": 16.349937438964844, "learning_rate": 6.396677050882658e-06, "loss": 4.275, "step": 179 }, { "epoch": 0.18518518518518517, "grad_norm": 23.983245849609375, "learning_rate": 6.433021806853582e-06, "loss": 5.6429, "step": 180 }, { "epoch": 0.18518518518518517, "eval_Qnli-dev_cosine_accuracy": 0.6484375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8662997484207153, "eval_Qnli-dev_cosine_ap": 0.6524497730088078, "eval_Qnli-dev_cosine_f1": 0.6678023850085179, "eval_Qnli-dev_cosine_f1_threshold": 0.8081307411193848, "eval_Qnli-dev_cosine_precision": 0.5584045584045584, "eval_Qnli-dev_cosine_recall": 0.8305084745762712, "eval_Qnli-dev_dot_accuracy": 0.623046875, "eval_Qnli-dev_dot_accuracy_threshold": 385.58721923828125, "eval_Qnli-dev_dot_ap": 0.5552533197510849, "eval_Qnli-dev_dot_f1": 0.6540145985401459, "eval_Qnli-dev_dot_f1_threshold": 316.07781982421875, "eval_Qnli-dev_dot_precision": 0.49888641425389757, "eval_Qnli-dev_dot_recall": 0.9491525423728814, "eval_Qnli-dev_euclidean_accuracy": 0.65625, "eval_Qnli-dev_euclidean_accuracy_threshold": 10.853160858154297, "eval_Qnli-dev_euclidean_ap": 0.6669108151611487, "eval_Qnli-dev_euclidean_f1": 0.6678200692041524, "eval_Qnli-dev_euclidean_f1_threshold": 12.968579292297363, "eval_Qnli-dev_euclidean_precision": 0.564327485380117, "eval_Qnli-dev_euclidean_recall": 0.8177966101694916, "eval_Qnli-dev_manhattan_accuracy": 0.6640625, "eval_Qnli-dev_manhattan_accuracy_threshold": 223.08535766601562, "eval_Qnli-dev_manhattan_ap": 0.6760180782489211, "eval_Qnli-dev_manhattan_f1": 0.6678082191780822, "eval_Qnli-dev_manhattan_f1_threshold": 253.64254760742188, "eval_Qnli-dev_manhattan_precision": 0.5603448275862069, "eval_Qnli-dev_manhattan_recall": 0.826271186440678, "eval_Qnli-dev_max_accuracy": 0.6640625, "eval_Qnli-dev_max_accuracy_threshold": 385.58721923828125, "eval_Qnli-dev_max_ap": 0.6760180782489211, "eval_Qnli-dev_max_f1": 0.6678200692041524, "eval_Qnli-dev_max_f1_threshold": 316.07781982421875, "eval_Qnli-dev_max_precision": 0.564327485380117, "eval_Qnli-dev_max_recall": 0.9491525423728814, "eval_allNLI-dev_cosine_accuracy": 0.671875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9789707064628601, "eval_allNLI-dev_cosine_ap": 0.4524477715499502, "eval_allNLI-dev_cosine_f1": 0.5444444444444444, "eval_allNLI-dev_cosine_f1_threshold": 0.8516685962677002, "eval_allNLI-dev_cosine_precision": 0.40054495912806537, "eval_allNLI-dev_cosine_recall": 0.8497109826589595, "eval_allNLI-dev_dot_accuracy": 0.66796875, "eval_allNLI-dev_dot_accuracy_threshold": 524.80029296875, "eval_allNLI-dev_dot_ap": 0.39641106298067524, "eval_allNLI-dev_dot_f1": 0.5137614678899083, "eval_allNLI-dev_dot_f1_threshold": 341.9022216796875, "eval_allNLI-dev_dot_precision": 0.3492723492723493, "eval_allNLI-dev_dot_recall": 0.9710982658959537, "eval_allNLI-dev_euclidean_accuracy": 0.671875, "eval_allNLI-dev_euclidean_accuracy_threshold": 4.58498477935791, "eval_allNLI-dev_euclidean_ap": 0.4516531171082357, "eval_allNLI-dev_euclidean_f1": 0.552, "eval_allNLI-dev_euclidean_f1_threshold": 11.532356262207031, "eval_allNLI-dev_euclidean_precision": 0.42201834862385323, "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, "eval_allNLI-dev_manhattan_accuracy": 0.671875, "eval_allNLI-dev_manhattan_accuracy_threshold": 107.1644058227539, "eval_allNLI-dev_manhattan_ap": 0.45547703653651306, "eval_allNLI-dev_manhattan_f1": 0.5475285171102661, "eval_allNLI-dev_manhattan_f1_threshold": 226.54490661621094, "eval_allNLI-dev_manhattan_precision": 0.40793201133144474, "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, "eval_allNLI-dev_max_accuracy": 0.671875, "eval_allNLI-dev_max_accuracy_threshold": 524.80029296875, "eval_allNLI-dev_max_ap": 0.45547703653651306, "eval_allNLI-dev_max_f1": 0.552, "eval_allNLI-dev_max_f1_threshold": 341.9022216796875, "eval_allNLI-dev_max_precision": 0.42201834862385323, "eval_allNLI-dev_max_recall": 0.9710982658959537, "eval_sequential_score": 0.6760180782489211, "eval_sts-test_pearson_cosine": 0.3078712504181006, "eval_sts-test_pearson_dot": 0.30687493336995575, "eval_sts-test_pearson_euclidean": 0.3149216629485325, "eval_sts-test_pearson_manhattan": 0.32260490964216404, "eval_sts-test_pearson_max": 0.32260490964216404, "eval_sts-test_spearman_cosine": 0.3756490245649231, "eval_sts-test_spearman_dot": 0.32074983288662573, "eval_sts-test_spearman_euclidean": 0.34933389557767713, "eval_sts-test_spearman_manhattan": 0.35486430121168766, "eval_sts-test_spearman_max": 0.3756490245649231, "eval_vitaminc-pairs_loss": 2.734696388244629, "eval_vitaminc-pairs_runtime": 3.1523, "eval_vitaminc-pairs_samples_per_second": 40.606, "eval_vitaminc-pairs_steps_per_second": 0.317, "step": 180 }, { "epoch": 0.18518518518518517, "eval_negation-triplets_loss": 3.099722146987915, "eval_negation-triplets_runtime": 0.7375, "eval_negation-triplets_samples_per_second": 173.567, "eval_negation-triplets_steps_per_second": 1.356, "step": 180 }, { "epoch": 0.18518518518518517, "eval_scitail-pairs-pos_loss": 0.6654092073440552, "eval_scitail-pairs-pos_runtime": 0.7978, "eval_scitail-pairs-pos_samples_per_second": 160.446, "eval_scitail-pairs-pos_steps_per_second": 1.253, "step": 180 }, { "epoch": 0.18518518518518517, "eval_scitail-pairs-qa_loss": 0.4432713985443115, "eval_scitail-pairs-qa_runtime": 0.5613, "eval_scitail-pairs-qa_samples_per_second": 228.051, "eval_scitail-pairs-qa_steps_per_second": 1.782, "step": 180 }, { "epoch": 0.18518518518518517, "eval_xsum-pairs_loss": 4.390190601348877, "eval_xsum-pairs_runtime": 3.0147, "eval_xsum-pairs_samples_per_second": 42.459, "eval_xsum-pairs_steps_per_second": 0.332, "step": 180 }, { "epoch": 0.18518518518518517, "eval_sciq_pairs_loss": 0.43316569924354553, "eval_sciq_pairs_runtime": 3.4401, "eval_sciq_pairs_samples_per_second": 37.208, "eval_sciq_pairs_steps_per_second": 0.291, "step": 180 }, { "epoch": 0.18518518518518517, "eval_qasc_pairs_loss": 2.034595251083374, "eval_qasc_pairs_runtime": 0.5991, "eval_qasc_pairs_samples_per_second": 213.665, "eval_qasc_pairs_steps_per_second": 1.669, "step": 180 }, { "epoch": 0.18518518518518517, "eval_openbookqa_pairs_loss": 3.2861831188201904, "eval_openbookqa_pairs_runtime": 0.5738, "eval_openbookqa_pairs_samples_per_second": 223.062, "eval_openbookqa_pairs_steps_per_second": 1.743, "step": 180 }, { "epoch": 0.18518518518518517, "eval_msmarco_pairs_loss": 5.0437421798706055, "eval_msmarco_pairs_runtime": 1.529, "eval_msmarco_pairs_samples_per_second": 83.713, "eval_msmarco_pairs_steps_per_second": 0.654, "step": 180 }, { "epoch": 0.18518518518518517, "eval_nq_pairs_loss": 5.238871097564697, "eval_nq_pairs_runtime": 2.9133, "eval_nq_pairs_samples_per_second": 43.936, "eval_nq_pairs_steps_per_second": 0.343, "step": 180 }, { "epoch": 0.18518518518518517, "eval_trivia_pairs_loss": 5.040083885192871, "eval_trivia_pairs_runtime": 3.4307, "eval_trivia_pairs_samples_per_second": 37.311, "eval_trivia_pairs_steps_per_second": 0.291, "step": 180 }, { "epoch": 0.18518518518518517, "eval_gooaq_pairs_loss": 4.043346881866455, "eval_gooaq_pairs_runtime": 0.947, "eval_gooaq_pairs_samples_per_second": 135.164, "eval_gooaq_pairs_steps_per_second": 1.056, "step": 180 }, { "epoch": 0.18518518518518517, "eval_paws-pos_loss": 0.15505897998809814, "eval_paws-pos_runtime": 0.6791, "eval_paws-pos_samples_per_second": 188.472, "eval_paws-pos_steps_per_second": 1.472, "step": 180 }, { "epoch": 0.18518518518518517, "eval_global_dataset_loss": 2.481849193572998, "eval_global_dataset_runtime": 13.3357, "eval_global_dataset_samples_per_second": 31.194, "eval_global_dataset_steps_per_second": 0.3, "step": 180 }, { "epoch": 0.18621399176954734, "grad_norm": 23.770732879638672, "learning_rate": 6.469366562824507e-06, "loss": 4.1785, "step": 181 }, { "epoch": 0.18724279835390947, "grad_norm": 15.494194984436035, "learning_rate": 6.505711318795431e-06, "loss": 2.8654, "step": 182 }, { "epoch": 0.1882716049382716, "grad_norm": 15.12114429473877, "learning_rate": 6.542056074766354e-06, "loss": 2.9405, "step": 183 }, { "epoch": 0.18930041152263374, "grad_norm": 14.066164016723633, "learning_rate": 6.578400830737279e-06, "loss": 2.6342, "step": 184 }, { "epoch": 0.19032921810699588, "grad_norm": 15.717785835266113, "learning_rate": 6.614745586708203e-06, "loss": 3.8401, "step": 185 }, { "epoch": 0.19135802469135801, "grad_norm": 13.966800689697266, "learning_rate": 6.651090342679127e-06, "loss": 3.6136, "step": 186 }, { "epoch": 0.19238683127572018, "grad_norm": 15.663467407226562, "learning_rate": 6.687435098650052e-06, "loss": 3.0736, "step": 187 }, { "epoch": 0.1934156378600823, "grad_norm": 26.5308837890625, "learning_rate": 6.723779854620976e-06, "loss": 5.4694, "step": 188 }, { "epoch": 0.19444444444444445, "grad_norm": 15.630946159362793, "learning_rate": 6.7601246105919e-06, "loss": 2.7903, "step": 189 }, { "epoch": 0.19547325102880658, "grad_norm": 20.857738494873047, "learning_rate": 6.796469366562825e-06, "loss": 3.8552, "step": 190 }, { "epoch": 0.19650205761316872, "grad_norm": 21.9176082611084, "learning_rate": 6.832814122533748e-06, "loss": 4.133, "step": 191 }, { "epoch": 0.19753086419753085, "grad_norm": 13.483908653259277, "learning_rate": 6.869158878504672e-06, "loss": 3.3172, "step": 192 }, { "epoch": 0.19855967078189302, "grad_norm": 22.11282730102539, "learning_rate": 6.905503634475597e-06, "loss": 4.5717, "step": 193 }, { "epoch": 0.19958847736625515, "grad_norm": 14.112103462219238, "learning_rate": 6.941848390446521e-06, "loss": 2.6895, "step": 194 }, { "epoch": 0.2006172839506173, "grad_norm": 26.939516067504883, "learning_rate": 6.978193146417445e-06, "loss": 4.8326, "step": 195 }, { "epoch": 0.20164609053497942, "grad_norm": 21.34830665588379, "learning_rate": 7.01453790238837e-06, "loss": 4.6939, "step": 196 }, { "epoch": 0.20267489711934156, "grad_norm": 15.555133819580078, "learning_rate": 7.050882658359294e-06, "loss": 2.6114, "step": 197 }, { "epoch": 0.2037037037037037, "grad_norm": 18.566102981567383, "learning_rate": 7.087227414330217e-06, "loss": 4.0035, "step": 198 }, { "epoch": 0.20473251028806586, "grad_norm": 20.652629852294922, "learning_rate": 7.123572170301142e-06, "loss": 4.6457, "step": 199 }, { "epoch": 0.205761316872428, "grad_norm": 13.168004989624023, "learning_rate": 7.159916926272066e-06, "loss": 2.8982, "step": 200 }, { "epoch": 0.205761316872428, "eval_Qnli-dev_cosine_accuracy": 0.6640625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8493491411209106, "eval_Qnli-dev_cosine_ap": 0.6558066754883848, "eval_Qnli-dev_cosine_f1": 0.6813559322033899, "eval_Qnli-dev_cosine_f1_threshold": 0.8149238228797913, "eval_Qnli-dev_cosine_precision": 0.5677966101694916, "eval_Qnli-dev_cosine_recall": 0.8516949152542372, "eval_Qnli-dev_dot_accuracy": 0.625, "eval_Qnli-dev_dot_accuracy_threshold": 415.6668395996094, "eval_Qnli-dev_dot_ap": 0.5566403929041909, "eval_Qnli-dev_dot_f1": 0.6607929515418502, "eval_Qnli-dev_dot_f1_threshold": 356.9716491699219, "eval_Qnli-dev_dot_precision": 0.5056179775280899, "eval_Qnli-dev_dot_recall": 0.9533898305084746, "eval_Qnli-dev_euclidean_accuracy": 0.666015625, "eval_Qnli-dev_euclidean_accuracy_threshold": 11.828110694885254, "eval_Qnli-dev_euclidean_ap": 0.6719522122361475, "eval_Qnli-dev_euclidean_f1": 0.6812080536912752, "eval_Qnli-dev_euclidean_f1_threshold": 13.571544647216797, "eval_Qnli-dev_euclidean_precision": 0.5638888888888889, "eval_Qnli-dev_euclidean_recall": 0.8601694915254238, "eval_Qnli-dev_manhattan_accuracy": 0.666015625, "eval_Qnli-dev_manhattan_accuracy_threshold": 251.22454833984375, "eval_Qnli-dev_manhattan_ap": 0.6748374333603533, "eval_Qnli-dev_manhattan_f1": 0.687813021702838, "eval_Qnli-dev_manhattan_f1_threshold": 265.57940673828125, "eval_Qnli-dev_manhattan_precision": 0.5674931129476584, "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, "eval_Qnli-dev_max_accuracy": 0.666015625, "eval_Qnli-dev_max_accuracy_threshold": 415.6668395996094, "eval_Qnli-dev_max_ap": 0.6748374333603533, "eval_Qnli-dev_max_f1": 0.687813021702838, "eval_Qnli-dev_max_f1_threshold": 356.9716491699219, "eval_Qnli-dev_max_precision": 0.5677966101694916, "eval_Qnli-dev_max_recall": 0.9533898305084746, "eval_allNLI-dev_cosine_accuracy": 0.673828125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9595370292663574, "eval_allNLI-dev_cosine_ap": 0.4708419415503453, "eval_allNLI-dev_cosine_f1": 0.5435684647302905, "eval_allNLI-dev_cosine_f1_threshold": 0.8678731918334961, "eval_allNLI-dev_cosine_precision": 0.42394822006472493, "eval_allNLI-dev_cosine_recall": 0.7572254335260116, "eval_allNLI-dev_dot_accuracy": 0.669921875, "eval_allNLI-dev_dot_accuracy_threshold": 541.4039306640625, "eval_allNLI-dev_dot_ap": 0.4068383335197935, "eval_allNLI-dev_dot_f1": 0.521875, "eval_allNLI-dev_dot_f1_threshold": 372.6112060546875, "eval_allNLI-dev_dot_precision": 0.3576017130620985, "eval_allNLI-dev_dot_recall": 0.9653179190751445, "eval_allNLI-dev_euclidean_accuracy": 0.673828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 6.293747901916504, "eval_allNLI-dev_euclidean_ap": 0.47115509667092914, "eval_allNLI-dev_euclidean_f1": 0.544, "eval_allNLI-dev_euclidean_f1_threshold": 11.993677139282227, "eval_allNLI-dev_euclidean_precision": 0.41590214067278286, "eval_allNLI-dev_euclidean_recall": 0.7861271676300579, "eval_allNLI-dev_manhattan_accuracy": 0.67578125, "eval_allNLI-dev_manhattan_accuracy_threshold": 141.41348266601562, "eval_allNLI-dev_manhattan_ap": 0.4773047143908946, "eval_allNLI-dev_manhattan_f1": 0.5521235521235521, "eval_allNLI-dev_manhattan_f1_threshold": 238.16036987304688, "eval_allNLI-dev_manhattan_precision": 0.4144927536231884, "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, "eval_allNLI-dev_max_accuracy": 0.67578125, "eval_allNLI-dev_max_accuracy_threshold": 541.4039306640625, "eval_allNLI-dev_max_ap": 0.4773047143908946, "eval_allNLI-dev_max_f1": 0.5521235521235521, "eval_allNLI-dev_max_f1_threshold": 372.6112060546875, "eval_allNLI-dev_max_precision": 0.42394822006472493, "eval_allNLI-dev_max_recall": 0.9653179190751445, "eval_sequential_score": 0.6748374333603533, "eval_sts-test_pearson_cosine": 0.37650448121246105, "eval_sts-test_pearson_dot": 0.34722884377459334, "eval_sts-test_pearson_euclidean": 0.3832908339538646, "eval_sts-test_pearson_manhattan": 0.3852968520690805, "eval_sts-test_pearson_max": 0.3852968520690805, "eval_sts-test_spearman_cosine": 0.4334008406493539, "eval_sts-test_spearman_dot": 0.35710334107288355, "eval_sts-test_spearman_euclidean": 0.408594276683612, "eval_sts-test_spearman_manhattan": 0.4087942700707702, "eval_sts-test_spearman_max": 0.4334008406493539, "eval_vitaminc-pairs_loss": 2.7392194271087646, "eval_vitaminc-pairs_runtime": 3.1693, "eval_vitaminc-pairs_samples_per_second": 40.387, "eval_vitaminc-pairs_steps_per_second": 0.316, "step": 200 }, { "epoch": 0.205761316872428, "eval_negation-triplets_loss": 2.8204259872436523, "eval_negation-triplets_runtime": 0.7414, "eval_negation-triplets_samples_per_second": 172.651, "eval_negation-triplets_steps_per_second": 1.349, "step": 200 }, { "epoch": 0.205761316872428, "eval_scitail-pairs-pos_loss": 0.5787031054496765, "eval_scitail-pairs-pos_runtime": 0.8028, "eval_scitail-pairs-pos_samples_per_second": 159.448, "eval_scitail-pairs-pos_steps_per_second": 1.246, "step": 200 }, { "epoch": 0.205761316872428, "eval_scitail-pairs-qa_loss": 0.3260263204574585, "eval_scitail-pairs-qa_runtime": 0.5665, "eval_scitail-pairs-qa_samples_per_second": 225.934, "eval_scitail-pairs-qa_steps_per_second": 1.765, "step": 200 }, { "epoch": 0.205761316872428, "eval_xsum-pairs_loss": 3.6776349544525146, "eval_xsum-pairs_runtime": 3.0162, "eval_xsum-pairs_samples_per_second": 42.437, "eval_xsum-pairs_steps_per_second": 0.332, "step": 200 }, { "epoch": 0.205761316872428, "eval_sciq_pairs_loss": 0.3696608543395996, "eval_sciq_pairs_runtime": 3.4228, "eval_sciq_pairs_samples_per_second": 37.396, "eval_sciq_pairs_steps_per_second": 0.292, "step": 200 }, { "epoch": 0.205761316872428, "eval_qasc_pairs_loss": 1.7308318614959717, "eval_qasc_pairs_runtime": 0.5951, "eval_qasc_pairs_samples_per_second": 215.086, "eval_qasc_pairs_steps_per_second": 1.68, "step": 200 }, { "epoch": 0.205761316872428, "eval_openbookqa_pairs_loss": 2.8745713233947754, "eval_openbookqa_pairs_runtime": 0.5756, "eval_openbookqa_pairs_samples_per_second": 222.393, "eval_openbookqa_pairs_steps_per_second": 1.737, "step": 200 }, { "epoch": 0.205761316872428, "eval_msmarco_pairs_loss": 4.048874378204346, "eval_msmarco_pairs_runtime": 1.5159, "eval_msmarco_pairs_samples_per_second": 84.439, "eval_msmarco_pairs_steps_per_second": 0.66, "step": 200 }, { "epoch": 0.205761316872428, "eval_nq_pairs_loss": 4.402989387512207, "eval_nq_pairs_runtime": 2.8983, "eval_nq_pairs_samples_per_second": 44.163, "eval_nq_pairs_steps_per_second": 0.345, "step": 200 }, { "epoch": 0.205761316872428, "eval_trivia_pairs_loss": 4.454685688018799, "eval_trivia_pairs_runtime": 3.4474, "eval_trivia_pairs_samples_per_second": 37.129, "eval_trivia_pairs_steps_per_second": 0.29, "step": 200 }, { "epoch": 0.205761316872428, "eval_gooaq_pairs_loss": 3.200054168701172, "eval_gooaq_pairs_runtime": 0.956, "eval_gooaq_pairs_samples_per_second": 133.894, "eval_gooaq_pairs_steps_per_second": 1.046, "step": 200 }, { "epoch": 0.205761316872428, "eval_paws-pos_loss": 0.17940819263458252, "eval_paws-pos_runtime": 0.6752, "eval_paws-pos_samples_per_second": 189.577, "eval_paws-pos_steps_per_second": 1.481, "step": 200 }, { "epoch": 0.205761316872428, "eval_global_dataset_loss": 2.0389692783355713, "eval_global_dataset_runtime": 13.3595, "eval_global_dataset_samples_per_second": 31.139, "eval_global_dataset_steps_per_second": 0.299, "step": 200 }, { "epoch": 0.20679012345679013, "grad_norm": 20.6440372467041, "learning_rate": 7.19626168224299e-06, "loss": 4.1212, "step": 201 }, { "epoch": 0.20781893004115226, "grad_norm": 18.542131423950195, "learning_rate": 7.232606438213915e-06, "loss": 3.712, "step": 202 }, { "epoch": 0.2088477366255144, "grad_norm": 13.649810791015625, "learning_rate": 7.268951194184839e-06, "loss": 2.4475, "step": 203 }, { "epoch": 0.20987654320987653, "grad_norm": 14.254504203796387, "learning_rate": 7.305295950155763e-06, "loss": 2.0375, "step": 204 }, { "epoch": 0.2109053497942387, "grad_norm": 16.941804885864258, "learning_rate": 7.341640706126687e-06, "loss": 3.6423, "step": 205 }, { "epoch": 0.21193415637860083, "grad_norm": 24.76467514038086, "learning_rate": 7.3779854620976116e-06, "loss": 5.0227, "step": 206 }, { "epoch": 0.21296296296296297, "grad_norm": 27.70640754699707, "learning_rate": 7.414330218068535e-06, "loss": 4.743, "step": 207 }, { "epoch": 0.2139917695473251, "grad_norm": 19.96710777282715, "learning_rate": 7.450674974039459e-06, "loss": 4.502, "step": 208 }, { "epoch": 0.21502057613168724, "grad_norm": 13.25556468963623, "learning_rate": 7.487019730010384e-06, "loss": 2.2948, "step": 209 }, { "epoch": 0.21604938271604937, "grad_norm": 14.281882286071777, "learning_rate": 7.523364485981308e-06, "loss": 3.3056, "step": 210 }, { "epoch": 0.21707818930041153, "grad_norm": 12.938163757324219, "learning_rate": 7.559709241952232e-06, "loss": 2.1324, "step": 211 }, { "epoch": 0.21810699588477367, "grad_norm": 13.252862930297852, "learning_rate": 7.5960539979231565e-06, "loss": 2.2595, "step": 212 }, { "epoch": 0.2191358024691358, "grad_norm": 13.162984848022461, "learning_rate": 7.63239875389408e-06, "loss": 2.3108, "step": 213 }, { "epoch": 0.22016460905349794, "grad_norm": 12.221834182739258, "learning_rate": 7.668743509865004e-06, "loss": 2.7378, "step": 214 }, { "epoch": 0.22119341563786007, "grad_norm": 12.927008628845215, "learning_rate": 7.70508826583593e-06, "loss": 2.3095, "step": 215 }, { "epoch": 0.2222222222222222, "grad_norm": 18.692045211791992, "learning_rate": 7.741433021806853e-06, "loss": 3.4556, "step": 216 }, { "epoch": 0.22325102880658437, "grad_norm": 15.472359657287598, "learning_rate": 7.777777777777777e-06, "loss": 2.9786, "step": 217 }, { "epoch": 0.2242798353909465, "grad_norm": 12.372124671936035, "learning_rate": 7.814122533748701e-06, "loss": 1.9408, "step": 218 }, { "epoch": 0.22530864197530864, "grad_norm": 13.370574951171875, "learning_rate": 7.850467289719626e-06, "loss": 2.9416, "step": 219 }, { "epoch": 0.22633744855967078, "grad_norm": 12.76834487915039, "learning_rate": 7.88681204569055e-06, "loss": 2.0087, "step": 220 }, { "epoch": 0.22633744855967078, "eval_Qnli-dev_cosine_accuracy": 0.666015625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8367502689361572, "eval_Qnli-dev_cosine_ap": 0.657497384714659, "eval_Qnli-dev_cosine_f1": 0.6850828729281769, "eval_Qnli-dev_cosine_f1_threshold": 0.8328432440757751, "eval_Qnli-dev_cosine_precision": 0.6058631921824105, "eval_Qnli-dev_cosine_recall": 0.788135593220339, "eval_Qnli-dev_dot_accuracy": 0.62109375, "eval_Qnli-dev_dot_accuracy_threshold": 443.7711181640625, "eval_Qnli-dev_dot_ap": 0.561375704126675, "eval_Qnli-dev_dot_f1": 0.6607407407407409, "eval_Qnli-dev_dot_f1_threshold": 383.77728271484375, "eval_Qnli-dev_dot_precision": 0.5079726651480638, "eval_Qnli-dev_dot_recall": 0.9449152542372882, "eval_Qnli-dev_euclidean_accuracy": 0.666015625, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.143888473510742, "eval_Qnli-dev_euclidean_ap": 0.669642308468768, "eval_Qnli-dev_euclidean_f1": 0.6845637583892616, "eval_Qnli-dev_euclidean_f1_threshold": 14.205205917358398, "eval_Qnli-dev_euclidean_precision": 0.5666666666666667, "eval_Qnli-dev_euclidean_recall": 0.864406779661017, "eval_Qnli-dev_manhattan_accuracy": 0.658203125, "eval_Qnli-dev_manhattan_accuracy_threshold": 212.78713989257812, "eval_Qnli-dev_manhattan_ap": 0.6716707737018695, "eval_Qnli-dev_manhattan_f1": 0.6907894736842105, "eval_Qnli-dev_manhattan_f1_threshold": 283.0830383300781, "eval_Qnli-dev_manhattan_precision": 0.5645161290322581, "eval_Qnli-dev_manhattan_recall": 0.8898305084745762, "eval_Qnli-dev_max_accuracy": 0.666015625, "eval_Qnli-dev_max_accuracy_threshold": 443.7711181640625, "eval_Qnli-dev_max_ap": 0.6716707737018695, "eval_Qnli-dev_max_f1": 0.6907894736842105, "eval_Qnli-dev_max_f1_threshold": 383.77728271484375, "eval_Qnli-dev_max_precision": 0.6058631921824105, "eval_Qnli-dev_max_recall": 0.9449152542372882, "eval_allNLI-dev_cosine_accuracy": 0.677734375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9583283066749573, "eval_allNLI-dev_cosine_ap": 0.4885246068143708, "eval_allNLI-dev_cosine_f1": 0.5432937181663838, "eval_allNLI-dev_cosine_f1_threshold": 0.8263977766036987, "eval_allNLI-dev_cosine_precision": 0.38461538461538464, "eval_allNLI-dev_cosine_recall": 0.9248554913294798, "eval_allNLI-dev_dot_accuracy": 0.66796875, "eval_allNLI-dev_dot_accuracy_threshold": 568.2660522460938, "eval_allNLI-dev_dot_ap": 0.4122244378970726, "eval_allNLI-dev_dot_f1": 0.5243328100470958, "eval_allNLI-dev_dot_f1_threshold": 404.83544921875, "eval_allNLI-dev_dot_precision": 0.3599137931034483, "eval_allNLI-dev_dot_recall": 0.9653179190751445, "eval_allNLI-dev_euclidean_accuracy": 0.673828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 5.9007158279418945, "eval_allNLI-dev_euclidean_ap": 0.4870900115925687, "eval_allNLI-dev_euclidean_f1": 0.5494880546075086, "eval_allNLI-dev_euclidean_f1_threshold": 13.606775283813477, "eval_allNLI-dev_euclidean_precision": 0.3898305084745763, "eval_allNLI-dev_euclidean_recall": 0.930635838150289, "eval_allNLI-dev_manhattan_accuracy": 0.6796875, "eval_allNLI-dev_manhattan_accuracy_threshold": 149.96470642089844, "eval_allNLI-dev_manhattan_ap": 0.48775989657604024, "eval_allNLI-dev_manhattan_f1": 0.5509433962264151, "eval_allNLI-dev_manhattan_f1_threshold": 245.671875, "eval_allNLI-dev_manhattan_precision": 0.40896358543417366, "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, "eval_allNLI-dev_max_accuracy": 0.6796875, "eval_allNLI-dev_max_accuracy_threshold": 568.2660522460938, "eval_allNLI-dev_max_ap": 0.4885246068143708, "eval_allNLI-dev_max_f1": 0.5509433962264151, "eval_allNLI-dev_max_f1_threshold": 404.83544921875, "eval_allNLI-dev_max_precision": 0.40896358543417366, "eval_allNLI-dev_max_recall": 0.9653179190751445, "eval_sequential_score": 0.6716707737018695, "eval_sts-test_pearson_cosine": 0.47370132819582667, "eval_sts-test_pearson_dot": 0.4090770475954118, "eval_sts-test_pearson_euclidean": 0.47821395607635725, "eval_sts-test_pearson_manhattan": 0.4805462866477066, "eval_sts-test_pearson_max": 0.4805462866477066, "eval_sts-test_spearman_cosine": 0.5169709124658022, "eval_sts-test_spearman_dot": 0.4142341886542473, "eval_sts-test_spearman_euclidean": 0.4963594659966741, "eval_sts-test_spearman_manhattan": 0.49648047340747653, "eval_sts-test_spearman_max": 0.5169709124658022, "eval_vitaminc-pairs_loss": 2.7133967876434326, "eval_vitaminc-pairs_runtime": 3.1964, "eval_vitaminc-pairs_samples_per_second": 40.045, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 220 }, { "epoch": 0.22633744855967078, "eval_negation-triplets_loss": 2.680220603942871, "eval_negation-triplets_runtime": 0.7348, "eval_negation-triplets_samples_per_second": 174.208, "eval_negation-triplets_steps_per_second": 1.361, "step": 220 }, { "epoch": 0.22633744855967078, "eval_scitail-pairs-pos_loss": 0.5108461380004883, "eval_scitail-pairs-pos_runtime": 0.8205, "eval_scitail-pairs-pos_samples_per_second": 155.993, "eval_scitail-pairs-pos_steps_per_second": 1.219, "step": 220 }, { "epoch": 0.22633744855967078, "eval_scitail-pairs-qa_loss": 0.27794376015663147, "eval_scitail-pairs-qa_runtime": 0.5655, "eval_scitail-pairs-qa_samples_per_second": 226.329, "eval_scitail-pairs-qa_steps_per_second": 1.768, "step": 220 }, { "epoch": 0.22633744855967078, "eval_xsum-pairs_loss": 2.8043901920318604, "eval_xsum-pairs_runtime": 3.0149, "eval_xsum-pairs_samples_per_second": 42.456, "eval_xsum-pairs_steps_per_second": 0.332, "step": 220 }, { "epoch": 0.22633744855967078, "eval_sciq_pairs_loss": 0.32811373472213745, "eval_sciq_pairs_runtime": 3.3848, "eval_sciq_pairs_samples_per_second": 37.816, "eval_sciq_pairs_steps_per_second": 0.295, "step": 220 }, { "epoch": 0.22633744855967078, "eval_qasc_pairs_loss": 1.6458420753479004, "eval_qasc_pairs_runtime": 0.5968, "eval_qasc_pairs_samples_per_second": 214.461, "eval_qasc_pairs_steps_per_second": 1.675, "step": 220 }, { "epoch": 0.22633744855967078, "eval_openbookqa_pairs_loss": 2.5592660903930664, "eval_openbookqa_pairs_runtime": 0.5683, "eval_openbookqa_pairs_samples_per_second": 225.246, "eval_openbookqa_pairs_steps_per_second": 1.76, "step": 220 }, { "epoch": 0.22633744855967078, "eval_msmarco_pairs_loss": 3.5379371643066406, "eval_msmarco_pairs_runtime": 1.5102, "eval_msmarco_pairs_samples_per_second": 84.756, "eval_msmarco_pairs_steps_per_second": 0.662, "step": 220 }, { "epoch": 0.22633744855967078, "eval_nq_pairs_loss": 3.847370147705078, "eval_nq_pairs_runtime": 2.8908, "eval_nq_pairs_samples_per_second": 44.278, "eval_nq_pairs_steps_per_second": 0.346, "step": 220 }, { "epoch": 0.22633744855967078, "eval_trivia_pairs_loss": 3.6181681156158447, "eval_trivia_pairs_runtime": 3.4242, "eval_trivia_pairs_samples_per_second": 37.381, "eval_trivia_pairs_steps_per_second": 0.292, "step": 220 }, { "epoch": 0.22633744855967078, "eval_gooaq_pairs_loss": 2.7982828617095947, "eval_gooaq_pairs_runtime": 0.9365, "eval_gooaq_pairs_samples_per_second": 136.676, "eval_gooaq_pairs_steps_per_second": 1.068, "step": 220 }, { "epoch": 0.22633744855967078, "eval_paws-pos_loss": 0.1660136878490448, "eval_paws-pos_runtime": 0.6838, "eval_paws-pos_samples_per_second": 187.193, "eval_paws-pos_steps_per_second": 1.462, "step": 220 }, { "epoch": 0.22633744855967078, "eval_global_dataset_loss": 1.749915361404419, "eval_global_dataset_runtime": 13.3423, "eval_global_dataset_samples_per_second": 31.179, "eval_global_dataset_steps_per_second": 0.3, "step": 220 }, { "epoch": 0.2273662551440329, "grad_norm": 20.361539840698242, "learning_rate": 7.923156801661474e-06, "loss": 3.9528, "step": 221 }, { "epoch": 0.22839506172839505, "grad_norm": 15.792684555053711, "learning_rate": 7.959501557632398e-06, "loss": 1.1222, "step": 222 }, { "epoch": 0.2294238683127572, "grad_norm": 15.900016784667969, "learning_rate": 7.995846313603322e-06, "loss": 1.2289, "step": 223 }, { "epoch": 0.23045267489711935, "grad_norm": 14.649103164672852, "learning_rate": 8.032191069574247e-06, "loss": 1.193, "step": 224 }, { "epoch": 0.23148148148148148, "grad_norm": 24.6876277923584, "learning_rate": 8.068535825545171e-06, "loss": 3.8686, "step": 225 }, { "epoch": 0.23251028806584362, "grad_norm": 7.946255683898926, "learning_rate": 8.104880581516094e-06, "loss": 0.3667, "step": 226 }, { "epoch": 0.23353909465020575, "grad_norm": 14.419116020202637, "learning_rate": 8.14122533748702e-06, "loss": 2.1571, "step": 227 }, { "epoch": 0.2345679012345679, "grad_norm": 13.824968338012695, "learning_rate": 8.177570093457943e-06, "loss": 2.1233, "step": 228 }, { "epoch": 0.23559670781893005, "grad_norm": 15.247499465942383, "learning_rate": 8.213914849428867e-06, "loss": 3.0183, "step": 229 }, { "epoch": 0.2366255144032922, "grad_norm": 7.50793981552124, "learning_rate": 8.250259605399791e-06, "loss": 0.4344, "step": 230 }, { "epoch": 0.23765432098765432, "grad_norm": 13.505939483642578, "learning_rate": 8.286604361370715e-06, "loss": 2.6828, "step": 231 }, { "epoch": 0.23868312757201646, "grad_norm": 26.35114288330078, "learning_rate": 8.32294911734164e-06, "loss": 4.1836, "step": 232 }, { "epoch": 0.2397119341563786, "grad_norm": 15.033428192138672, "learning_rate": 8.359293873312565e-06, "loss": 1.8063, "step": 233 }, { "epoch": 0.24074074074074073, "grad_norm": 15.326811790466309, "learning_rate": 8.395638629283488e-06, "loss": 3.4165, "step": 234 }, { "epoch": 0.2417695473251029, "grad_norm": 17.31609344482422, "learning_rate": 8.431983385254412e-06, "loss": 3.1603, "step": 235 }, { "epoch": 0.24279835390946503, "grad_norm": 23.062973022460938, "learning_rate": 8.468328141225337e-06, "loss": 2.9251, "step": 236 }, { "epoch": 0.24382716049382716, "grad_norm": 15.594389915466309, "learning_rate": 8.504672897196261e-06, "loss": 1.726, "step": 237 }, { "epoch": 0.2448559670781893, "grad_norm": 10.86409854888916, "learning_rate": 8.541017653167185e-06, "loss": 0.7677, "step": 238 }, { "epoch": 0.24588477366255143, "grad_norm": 24.200529098510742, "learning_rate": 8.57736240913811e-06, "loss": 3.7962, "step": 239 }, { "epoch": 0.24691358024691357, "grad_norm": 23.083824157714844, "learning_rate": 8.613707165109033e-06, "loss": 3.6562, "step": 240 }, { "epoch": 0.24691358024691357, "eval_Qnli-dev_cosine_accuracy": 0.677734375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8187533020973206, "eval_Qnli-dev_cosine_ap": 0.6835720202375161, "eval_Qnli-dev_cosine_f1": 0.6900958466453675, "eval_Qnli-dev_cosine_f1_threshold": 0.7529827356338501, "eval_Qnli-dev_cosine_precision": 0.5538461538461539, "eval_Qnli-dev_cosine_recall": 0.9152542372881356, "eval_Qnli-dev_dot_accuracy": 0.62890625, "eval_Qnli-dev_dot_accuracy_threshold": 405.27545166015625, "eval_Qnli-dev_dot_ap": 0.5877863096211339, "eval_Qnli-dev_dot_f1": 0.6722408026755853, "eval_Qnli-dev_dot_f1_threshold": 367.1414794921875, "eval_Qnli-dev_dot_precision": 0.5552486187845304, "eval_Qnli-dev_dot_recall": 0.8516949152542372, "eval_Qnli-dev_euclidean_accuracy": 0.6796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 12.904159545898438, "eval_Qnli-dev_euclidean_ap": 0.6913218676999153, "eval_Qnli-dev_euclidean_f1": 0.6865148861646235, "eval_Qnli-dev_euclidean_f1_threshold": 14.621212005615234, "eval_Qnli-dev_euclidean_precision": 0.5850746268656717, "eval_Qnli-dev_euclidean_recall": 0.8305084745762712, "eval_Qnli-dev_manhattan_accuracy": 0.677734375, "eval_Qnli-dev_manhattan_accuracy_threshold": 254.7897491455078, "eval_Qnli-dev_manhattan_ap": 0.698230594032758, "eval_Qnli-dev_manhattan_f1": 0.6965517241379311, "eval_Qnli-dev_manhattan_f1_threshold": 293.552734375, "eval_Qnli-dev_manhattan_precision": 0.5872093023255814, "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, "eval_Qnli-dev_max_accuracy": 0.6796875, "eval_Qnli-dev_max_accuracy_threshold": 405.27545166015625, "eval_Qnli-dev_max_ap": 0.698230594032758, "eval_Qnli-dev_max_f1": 0.6965517241379311, "eval_Qnli-dev_max_f1_threshold": 367.1414794921875, "eval_Qnli-dev_max_precision": 0.5872093023255814, "eval_Qnli-dev_max_recall": 0.9152542372881356, "eval_allNLI-dev_cosine_accuracy": 0.6796875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9396699666976929, "eval_allNLI-dev_cosine_ap": 0.49305382035680395, "eval_allNLI-dev_cosine_f1": 0.5477477477477477, "eval_allNLI-dev_cosine_f1_threshold": 0.8165856003761292, "eval_allNLI-dev_cosine_precision": 0.39790575916230364, "eval_allNLI-dev_cosine_recall": 0.8786127167630058, "eval_allNLI-dev_dot_accuracy": 0.66796875, "eval_allNLI-dev_dot_accuracy_threshold": 519.2574462890625, "eval_allNLI-dev_dot_ap": 0.41169754525200597, "eval_allNLI-dev_dot_f1": 0.5198098256735342, "eval_allNLI-dev_dot_f1_threshold": 369.37896728515625, "eval_allNLI-dev_dot_precision": 0.35807860262008734, "eval_allNLI-dev_dot_recall": 0.9479768786127167, "eval_allNLI-dev_euclidean_accuracy": 0.681640625, "eval_allNLI-dev_euclidean_accuracy_threshold": 8.951443672180176, "eval_allNLI-dev_euclidean_ap": 0.4972712047553571, "eval_allNLI-dev_euclidean_f1": 0.5478424015009381, "eval_allNLI-dev_euclidean_f1_threshold": 13.30331802368164, "eval_allNLI-dev_euclidean_precision": 0.40555555555555556, "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, "eval_allNLI-dev_manhattan_accuracy": 0.681640625, "eval_allNLI-dev_manhattan_accuracy_threshold": 168.29837036132812, "eval_allNLI-dev_manhattan_ap": 0.49849435311963386, "eval_allNLI-dev_manhattan_f1": 0.555765595463138, "eval_allNLI-dev_manhattan_f1_threshold": 261.11334228515625, "eval_allNLI-dev_manhattan_precision": 0.41292134831460675, "eval_allNLI-dev_manhattan_recall": 0.8497109826589595, "eval_allNLI-dev_max_accuracy": 0.681640625, "eval_allNLI-dev_max_accuracy_threshold": 519.2574462890625, "eval_allNLI-dev_max_ap": 0.49849435311963386, "eval_allNLI-dev_max_f1": 0.555765595463138, "eval_allNLI-dev_max_f1_threshold": 369.37896728515625, "eval_allNLI-dev_max_precision": 0.41292134831460675, "eval_allNLI-dev_max_recall": 0.9479768786127167, "eval_sequential_score": 0.698230594032758, "eval_sts-test_pearson_cosine": 0.5422399822302852, "eval_sts-test_pearson_dot": 0.47800101935982187, "eval_sts-test_pearson_euclidean": 0.5410879554786593, "eval_sts-test_pearson_manhattan": 0.5428179293731825, "eval_sts-test_pearson_max": 0.5428179293731825, "eval_sts-test_spearman_cosine": 0.5703833329868931, "eval_sts-test_spearman_dot": 0.48296505545213714, "eval_sts-test_spearman_euclidean": 0.5529472570210532, "eval_sts-test_spearman_manhattan": 0.5558088764307263, "eval_sts-test_spearman_max": 0.5703833329868931, "eval_vitaminc-pairs_loss": 2.7294111251831055, "eval_vitaminc-pairs_runtime": 3.173, "eval_vitaminc-pairs_samples_per_second": 40.34, "eval_vitaminc-pairs_steps_per_second": 0.315, "step": 240 }, { "epoch": 0.24691358024691357, "eval_negation-triplets_loss": 2.5460636615753174, "eval_negation-triplets_runtime": 0.7253, "eval_negation-triplets_samples_per_second": 176.49, "eval_negation-triplets_steps_per_second": 1.379, "step": 240 }, { "epoch": 0.24691358024691357, "eval_scitail-pairs-pos_loss": 0.45181718468666077, "eval_scitail-pairs-pos_runtime": 0.7815, "eval_scitail-pairs-pos_samples_per_second": 163.781, "eval_scitail-pairs-pos_steps_per_second": 1.28, "step": 240 }, { "epoch": 0.24691358024691357, "eval_scitail-pairs-qa_loss": 0.11772796511650085, "eval_scitail-pairs-qa_runtime": 0.5646, "eval_scitail-pairs-qa_samples_per_second": 226.703, "eval_scitail-pairs-qa_steps_per_second": 1.771, "step": 240 }, { "epoch": 0.24691358024691357, "eval_xsum-pairs_loss": 2.541783571243286, "eval_xsum-pairs_runtime": 3.0187, "eval_xsum-pairs_samples_per_second": 42.402, "eval_xsum-pairs_steps_per_second": 0.331, "step": 240 }, { "epoch": 0.24691358024691357, "eval_sciq_pairs_loss": 0.2976074516773224, "eval_sciq_pairs_runtime": 3.4151, "eval_sciq_pairs_samples_per_second": 37.48, "eval_sciq_pairs_steps_per_second": 0.293, "step": 240 }, { "epoch": 0.24691358024691357, "eval_qasc_pairs_loss": 1.538482427597046, "eval_qasc_pairs_runtime": 0.5897, "eval_qasc_pairs_samples_per_second": 217.058, "eval_qasc_pairs_steps_per_second": 1.696, "step": 240 }, { "epoch": 0.24691358024691357, "eval_openbookqa_pairs_loss": 2.2948145866394043, "eval_openbookqa_pairs_runtime": 0.5656, "eval_openbookqa_pairs_samples_per_second": 226.301, "eval_openbookqa_pairs_steps_per_second": 1.768, "step": 240 }, { "epoch": 0.24691358024691357, "eval_msmarco_pairs_loss": 3.3644864559173584, "eval_msmarco_pairs_runtime": 1.5156, "eval_msmarco_pairs_samples_per_second": 84.457, "eval_msmarco_pairs_steps_per_second": 0.66, "step": 240 }, { "epoch": 0.24691358024691357, "eval_nq_pairs_loss": 3.7770235538482666, "eval_nq_pairs_runtime": 2.9074, "eval_nq_pairs_samples_per_second": 44.025, "eval_nq_pairs_steps_per_second": 0.344, "step": 240 }, { "epoch": 0.24691358024691357, "eval_trivia_pairs_loss": 3.4960672855377197, "eval_trivia_pairs_runtime": 3.4254, "eval_trivia_pairs_samples_per_second": 37.368, "eval_trivia_pairs_steps_per_second": 0.292, "step": 240 }, { "epoch": 0.24691358024691357, "eval_gooaq_pairs_loss": 2.5963170528411865, "eval_gooaq_pairs_runtime": 0.9352, "eval_gooaq_pairs_samples_per_second": 136.874, "eval_gooaq_pairs_steps_per_second": 1.069, "step": 240 }, { "epoch": 0.24691358024691357, "eval_paws-pos_loss": 0.09364856779575348, "eval_paws-pos_runtime": 0.6826, "eval_paws-pos_samples_per_second": 187.531, "eval_paws-pos_steps_per_second": 1.465, "step": 240 }, { "epoch": 0.24691358024691357, "eval_global_dataset_loss": 1.6046679019927979, "eval_global_dataset_runtime": 13.3573, "eval_global_dataset_samples_per_second": 31.144, "eval_global_dataset_steps_per_second": 0.299, "step": 240 }, { "epoch": 0.24794238683127573, "grad_norm": 18.60301971435547, "learning_rate": 8.650051921079957e-06, "loss": 2.8804, "step": 241 }, { "epoch": 0.24897119341563786, "grad_norm": 15.278526306152344, "learning_rate": 8.686396677050883e-06, "loss": 1.7607, "step": 242 }, { "epoch": 0.25, "grad_norm": 23.43361473083496, "learning_rate": 8.722741433021805e-06, "loss": 3.877, "step": 243 }, { "epoch": 0.25102880658436216, "grad_norm": 13.10021686553955, "learning_rate": 8.75908618899273e-06, "loss": 1.6768, "step": 244 }, { "epoch": 0.25205761316872427, "grad_norm": 11.166913032531738, "learning_rate": 8.795430944963655e-06, "loss": 0.8368, "step": 245 }, { "epoch": 0.25308641975308643, "grad_norm": 12.863570213317871, "learning_rate": 8.831775700934577e-06, "loss": 1.6864, "step": 246 }, { "epoch": 0.25411522633744854, "grad_norm": 12.436037063598633, "learning_rate": 8.868120456905503e-06, "loss": 1.3478, "step": 247 }, { "epoch": 0.2551440329218107, "grad_norm": 13.372901916503906, "learning_rate": 8.904465212876427e-06, "loss": 1.7764, "step": 248 }, { "epoch": 0.25617283950617287, "grad_norm": 14.600456237792969, "learning_rate": 8.94080996884735e-06, "loss": 1.8495, "step": 249 }, { "epoch": 0.257201646090535, "grad_norm": 5.363873481750488, "learning_rate": 8.977154724818275e-06, "loss": 0.285, "step": 250 }, { "epoch": 0.25823045267489714, "grad_norm": 12.498610496520996, "learning_rate": 9.0134994807892e-06, "loss": 1.6516, "step": 251 }, { "epoch": 0.25925925925925924, "grad_norm": 13.058953285217285, "learning_rate": 9.049844236760123e-06, "loss": 1.5781, "step": 252 }, { "epoch": 0.2602880658436214, "grad_norm": 27.41628074645996, "learning_rate": 9.086188992731047e-06, "loss": 4.791, "step": 253 }, { "epoch": 0.2613168724279835, "grad_norm": 21.457780838012695, "learning_rate": 9.122533748701973e-06, "loss": 3.7502, "step": 254 }, { "epoch": 0.2623456790123457, "grad_norm": 13.806361198425293, "learning_rate": 9.158878504672895e-06, "loss": 2.6088, "step": 255 }, { "epoch": 0.26337448559670784, "grad_norm": 20.073028564453125, "learning_rate": 9.195223260643821e-06, "loss": 3.2271, "step": 256 }, { "epoch": 0.26440329218106995, "grad_norm": 10.656987190246582, "learning_rate": 9.231568016614745e-06, "loss": 1.3969, "step": 257 }, { "epoch": 0.2654320987654321, "grad_norm": 15.233261108398438, "learning_rate": 9.267912772585667e-06, "loss": 2.5281, "step": 258 }, { "epoch": 0.2664609053497942, "grad_norm": 17.79701805114746, "learning_rate": 9.304257528556593e-06, "loss": 2.8561, "step": 259 }, { "epoch": 0.2674897119341564, "grad_norm": 21.97925567626953, "learning_rate": 9.340602284527517e-06, "loss": 3.9495, "step": 260 }, { "epoch": 0.2674897119341564, "eval_Qnli-dev_cosine_accuracy": 0.689453125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8466764092445374, "eval_Qnli-dev_cosine_ap": 0.6871140391818324, "eval_Qnli-dev_cosine_f1": 0.6923076923076923, "eval_Qnli-dev_cosine_f1_threshold": 0.7720080018043518, "eval_Qnli-dev_cosine_precision": 0.5567010309278351, "eval_Qnli-dev_cosine_recall": 0.9152542372881356, "eval_Qnli-dev_dot_accuracy": 0.62890625, "eval_Qnli-dev_dot_accuracy_threshold": 437.21807861328125, "eval_Qnli-dev_dot_ap": 0.5853275156115014, "eval_Qnli-dev_dot_f1": 0.6721581548599671, "eval_Qnli-dev_dot_f1_threshold": 389.718017578125, "eval_Qnli-dev_dot_precision": 0.5498652291105122, "eval_Qnli-dev_dot_recall": 0.864406779661017, "eval_Qnli-dev_euclidean_accuracy": 0.681640625, "eval_Qnli-dev_euclidean_accuracy_threshold": 12.65598201751709, "eval_Qnli-dev_euclidean_ap": 0.6939343189181921, "eval_Qnli-dev_euclidean_f1": 0.692436974789916, "eval_Qnli-dev_euclidean_f1_threshold": 14.59019660949707, "eval_Qnli-dev_euclidean_precision": 0.5738161559888579, "eval_Qnli-dev_euclidean_recall": 0.8728813559322034, "eval_Qnli-dev_manhattan_accuracy": 0.6796875, "eval_Qnli-dev_manhattan_accuracy_threshold": 252.4490966796875, "eval_Qnli-dev_manhattan_ap": 0.6988409881438541, "eval_Qnli-dev_manhattan_f1": 0.6989966555183945, "eval_Qnli-dev_manhattan_f1_threshold": 291.64801025390625, "eval_Qnli-dev_manhattan_precision": 0.5773480662983426, "eval_Qnli-dev_manhattan_recall": 0.885593220338983, "eval_Qnli-dev_max_accuracy": 0.689453125, "eval_Qnli-dev_max_accuracy_threshold": 437.21807861328125, "eval_Qnli-dev_max_ap": 0.6988409881438541, "eval_Qnli-dev_max_f1": 0.6989966555183945, "eval_Qnli-dev_max_f1_threshold": 389.718017578125, "eval_Qnli-dev_max_precision": 0.5773480662983426, "eval_Qnli-dev_max_recall": 0.9152542372881356, "eval_allNLI-dev_cosine_accuracy": 0.6875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9255372285842896, "eval_allNLI-dev_cosine_ap": 0.5019030389232606, "eval_allNLI-dev_cosine_f1": 0.5451263537906137, "eval_allNLI-dev_cosine_f1_threshold": 0.8344321250915527, "eval_allNLI-dev_cosine_precision": 0.3963254593175853, "eval_allNLI-dev_cosine_recall": 0.8728323699421965, "eval_allNLI-dev_dot_accuracy": 0.666015625, "eval_allNLI-dev_dot_accuracy_threshold": 537.950439453125, "eval_allNLI-dev_dot_ap": 0.4104683717008714, "eval_allNLI-dev_dot_f1": 0.5224111282843895, "eval_allNLI-dev_dot_f1_threshold": 388.04473876953125, "eval_allNLI-dev_dot_precision": 0.35654008438818563, "eval_allNLI-dev_dot_recall": 0.976878612716763, "eval_allNLI-dev_euclidean_accuracy": 0.68359375, "eval_allNLI-dev_euclidean_accuracy_threshold": 8.841395378112793, "eval_allNLI-dev_euclidean_ap": 0.5047465503781696, "eval_allNLI-dev_euclidean_f1": 0.5511811023622047, "eval_allNLI-dev_euclidean_f1_threshold": 12.596972465515137, "eval_allNLI-dev_euclidean_precision": 0.417910447761194, "eval_allNLI-dev_euclidean_recall": 0.8092485549132948, "eval_allNLI-dev_manhattan_accuracy": 0.689453125, "eval_allNLI-dev_manhattan_accuracy_threshold": 179.2438201904297, "eval_allNLI-dev_manhattan_ap": 0.5057091077225248, "eval_allNLI-dev_manhattan_f1": 0.5523012552301255, "eval_allNLI-dev_manhattan_f1_threshold": 238.46624755859375, "eval_allNLI-dev_manhattan_precision": 0.43278688524590164, "eval_allNLI-dev_manhattan_recall": 0.7630057803468208, "eval_allNLI-dev_max_accuracy": 0.689453125, "eval_allNLI-dev_max_accuracy_threshold": 537.950439453125, "eval_allNLI-dev_max_ap": 0.5057091077225248, "eval_allNLI-dev_max_f1": 0.5523012552301255, "eval_allNLI-dev_max_f1_threshold": 388.04473876953125, "eval_allNLI-dev_max_precision": 0.43278688524590164, "eval_allNLI-dev_max_recall": 0.976878612716763, "eval_sequential_score": 0.6988409881438541, "eval_sts-test_pearson_cosine": 0.6101007482373539, "eval_sts-test_pearson_dot": 0.5171078610309542, "eval_sts-test_pearson_euclidean": 0.6100203378514052, "eval_sts-test_pearson_manhattan": 0.6071628090659706, "eval_sts-test_pearson_max": 0.6101007482373539, "eval_sts-test_spearman_cosine": 0.628839936686977, "eval_sts-test_spearman_dot": 0.5099761113052808, "eval_sts-test_spearman_euclidean": 0.6134216055947527, "eval_sts-test_spearman_manhattan": 0.6119040008184474, "eval_sts-test_spearman_max": 0.628839936686977, "eval_vitaminc-pairs_loss": 2.755634069442749, "eval_vitaminc-pairs_runtime": 3.1645, "eval_vitaminc-pairs_samples_per_second": 40.448, "eval_vitaminc-pairs_steps_per_second": 0.316, "step": 260 }, { "epoch": 0.2674897119341564, "eval_negation-triplets_loss": 2.3755366802215576, "eval_negation-triplets_runtime": 0.7383, "eval_negation-triplets_samples_per_second": 173.38, "eval_negation-triplets_steps_per_second": 1.355, "step": 260 }, { "epoch": 0.2674897119341564, "eval_scitail-pairs-pos_loss": 0.38322263956069946, "eval_scitail-pairs-pos_runtime": 0.7914, "eval_scitail-pairs-pos_samples_per_second": 161.747, "eval_scitail-pairs-pos_steps_per_second": 1.264, "step": 260 }, { "epoch": 0.2674897119341564, "eval_scitail-pairs-qa_loss": 0.10294085741043091, "eval_scitail-pairs-qa_runtime": 0.5772, "eval_scitail-pairs-qa_samples_per_second": 221.746, "eval_scitail-pairs-qa_steps_per_second": 1.732, "step": 260 }, { "epoch": 0.2674897119341564, "eval_xsum-pairs_loss": 2.2755026817321777, "eval_xsum-pairs_runtime": 3.0186, "eval_xsum-pairs_samples_per_second": 42.403, "eval_xsum-pairs_steps_per_second": 0.331, "step": 260 }, { "epoch": 0.2674897119341564, "eval_sciq_pairs_loss": 0.24845057725906372, "eval_sciq_pairs_runtime": 3.4052, "eval_sciq_pairs_samples_per_second": 37.59, "eval_sciq_pairs_steps_per_second": 0.294, "step": 260 }, { "epoch": 0.2674897119341564, "eval_qasc_pairs_loss": 1.4006080627441406, "eval_qasc_pairs_runtime": 0.5915, "eval_qasc_pairs_samples_per_second": 216.401, "eval_qasc_pairs_steps_per_second": 1.691, "step": 260 }, { "epoch": 0.2674897119341564, "eval_openbookqa_pairs_loss": 2.0268588066101074, "eval_openbookqa_pairs_runtime": 0.5688, "eval_openbookqa_pairs_samples_per_second": 225.017, "eval_openbookqa_pairs_steps_per_second": 1.758, "step": 260 }, { "epoch": 0.2674897119341564, "eval_msmarco_pairs_loss": 2.9229013919830322, "eval_msmarco_pairs_runtime": 1.5117, "eval_msmarco_pairs_samples_per_second": 84.673, "eval_msmarco_pairs_steps_per_second": 0.662, "step": 260 }, { "epoch": 0.2674897119341564, "eval_nq_pairs_loss": 3.4232370853424072, "eval_nq_pairs_runtime": 2.8888, "eval_nq_pairs_samples_per_second": 44.309, "eval_nq_pairs_steps_per_second": 0.346, "step": 260 }, { "epoch": 0.2674897119341564, "eval_trivia_pairs_loss": 3.135023832321167, "eval_trivia_pairs_runtime": 3.4305, "eval_trivia_pairs_samples_per_second": 37.312, "eval_trivia_pairs_steps_per_second": 0.291, "step": 260 }, { "epoch": 0.2674897119341564, "eval_gooaq_pairs_loss": 2.3922266960144043, "eval_gooaq_pairs_runtime": 0.9351, "eval_gooaq_pairs_samples_per_second": 136.888, "eval_gooaq_pairs_steps_per_second": 1.069, "step": 260 }, { "epoch": 0.2674897119341564, "eval_paws-pos_loss": 0.08843281120061874, "eval_paws-pos_runtime": 0.6715, "eval_paws-pos_samples_per_second": 190.608, "eval_paws-pos_steps_per_second": 1.489, "step": 260 }, { "epoch": 0.2674897119341564, "eval_global_dataset_loss": 1.4206469058990479, "eval_global_dataset_runtime": 13.3286, "eval_global_dataset_samples_per_second": 31.211, "eval_global_dataset_steps_per_second": 0.3, "step": 260 }, { "epoch": 0.26851851851851855, "grad_norm": 12.555780410766602, "learning_rate": 9.376947040498441e-06, "loss": 1.7349, "step": 261 }, { "epoch": 0.26954732510288065, "grad_norm": 13.038395881652832, "learning_rate": 9.413291796469365e-06, "loss": 1.7189, "step": 262 }, { "epoch": 0.2705761316872428, "grad_norm": 13.202376365661621, "learning_rate": 9.44963655244029e-06, "loss": 1.7282, "step": 263 }, { "epoch": 0.2716049382716049, "grad_norm": 17.815078735351562, "learning_rate": 9.485981308411213e-06, "loss": 2.9146, "step": 264 }, { "epoch": 0.2726337448559671, "grad_norm": 13.262603759765625, "learning_rate": 9.522326064382139e-06, "loss": 1.7603, "step": 265 }, { "epoch": 0.2736625514403292, "grad_norm": 12.508451461791992, "learning_rate": 9.558670820353063e-06, "loss": 1.823, "step": 266 }, { "epoch": 0.27469135802469136, "grad_norm": 12.313492774963379, "learning_rate": 9.595015576323985e-06, "loss": 1.5984, "step": 267 }, { "epoch": 0.2757201646090535, "grad_norm": 12.14000415802002, "learning_rate": 9.631360332294911e-06, "loss": 1.651, "step": 268 }, { "epoch": 0.2767489711934156, "grad_norm": 14.698229789733887, "learning_rate": 9.667705088265835e-06, "loss": 2.615, "step": 269 }, { "epoch": 0.2777777777777778, "grad_norm": 12.209722518920898, "learning_rate": 9.704049844236759e-06, "loss": 2.6608, "step": 270 }, { "epoch": 0.2788065843621399, "grad_norm": 13.545384407043457, "learning_rate": 9.740394600207683e-06, "loss": 2.2558, "step": 271 }, { "epoch": 0.27983539094650206, "grad_norm": 11.335700988769531, "learning_rate": 9.776739356178609e-06, "loss": 1.2155, "step": 272 }, { "epoch": 0.2808641975308642, "grad_norm": 11.750332832336426, "learning_rate": 9.813084112149531e-06, "loss": 1.4727, "step": 273 }, { "epoch": 0.28189300411522633, "grad_norm": 20.200624465942383, "learning_rate": 9.849428868120455e-06, "loss": 3.6394, "step": 274 }, { "epoch": 0.2829218106995885, "grad_norm": 11.674026489257812, "learning_rate": 9.88577362409138e-06, "loss": 2.1385, "step": 275 }, { "epoch": 0.2839506172839506, "grad_norm": 16.86899185180664, "learning_rate": 9.922118380062303e-06, "loss": 2.3953, "step": 276 }, { "epoch": 0.28497942386831276, "grad_norm": 11.407617568969727, "learning_rate": 9.958463136033229e-06, "loss": 1.488, "step": 277 }, { "epoch": 0.28600823045267487, "grad_norm": 11.607297897338867, "learning_rate": 9.994807892004153e-06, "loss": 1.2893, "step": 278 }, { "epoch": 0.28703703703703703, "grad_norm": 11.757554054260254, "learning_rate": 1.0031152647975077e-05, "loss": 0.7678, "step": 279 }, { "epoch": 0.2880658436213992, "grad_norm": 12.077320098876953, "learning_rate": 1.0067497403946001e-05, "loss": 2.6801, "step": 280 }, { "epoch": 0.2880658436213992, "eval_Qnli-dev_cosine_accuracy": 0.67578125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8415871262550354, "eval_Qnli-dev_cosine_ap": 0.6912671058237406, "eval_Qnli-dev_cosine_f1": 0.6869983948635633, "eval_Qnli-dev_cosine_f1_threshold": 0.7735732197761536, "eval_Qnli-dev_cosine_precision": 0.5529715762273901, "eval_Qnli-dev_cosine_recall": 0.9067796610169492, "eval_Qnli-dev_dot_accuracy": 0.62109375, "eval_Qnli-dev_dot_accuracy_threshold": 432.6428527832031, "eval_Qnli-dev_dot_ap": 0.5797796012757845, "eval_Qnli-dev_dot_f1": 0.6719492868462758, "eval_Qnli-dev_dot_f1_threshold": 393.2371826171875, "eval_Qnli-dev_dot_precision": 0.5367088607594936, "eval_Qnli-dev_dot_recall": 0.8983050847457628, "eval_Qnli-dev_euclidean_accuracy": 0.67578125, "eval_Qnli-dev_euclidean_accuracy_threshold": 11.884414672851562, "eval_Qnli-dev_euclidean_ap": 0.6992665036179804, "eval_Qnli-dev_euclidean_f1": 0.6915584415584415, "eval_Qnli-dev_euclidean_f1_threshold": 15.279256820678711, "eval_Qnli-dev_euclidean_precision": 0.5605263157894737, "eval_Qnli-dev_euclidean_recall": 0.902542372881356, "eval_Qnli-dev_manhattan_accuracy": 0.67578125, "eval_Qnli-dev_manhattan_accuracy_threshold": 254.77352905273438, "eval_Qnli-dev_manhattan_ap": 0.7020052347678023, "eval_Qnli-dev_manhattan_f1": 0.6893039049235994, "eval_Qnli-dev_manhattan_f1_threshold": 293.3916931152344, "eval_Qnli-dev_manhattan_precision": 0.5750708215297451, "eval_Qnli-dev_manhattan_recall": 0.8601694915254238, "eval_Qnli-dev_max_accuracy": 0.67578125, "eval_Qnli-dev_max_accuracy_threshold": 432.6428527832031, "eval_Qnli-dev_max_ap": 0.7020052347678023, "eval_Qnli-dev_max_f1": 0.6915584415584415, "eval_Qnli-dev_max_f1_threshold": 393.2371826171875, "eval_Qnli-dev_max_precision": 0.5750708215297451, "eval_Qnli-dev_max_recall": 0.9067796610169492, "eval_allNLI-dev_cosine_accuracy": 0.703125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.923446536064148, "eval_allNLI-dev_cosine_ap": 0.5118589589722005, "eval_allNLI-dev_cosine_f1": 0.5532786885245901, "eval_allNLI-dev_cosine_f1_threshold": 0.849584698677063, "eval_allNLI-dev_cosine_precision": 0.42857142857142855, "eval_allNLI-dev_cosine_recall": 0.7803468208092486, "eval_allNLI-dev_dot_accuracy": 0.6640625, "eval_allNLI-dev_dot_accuracy_threshold": 588.6370239257812, "eval_allNLI-dev_dot_ap": 0.4159723261021614, "eval_allNLI-dev_dot_f1": 0.5263157894736842, "eval_allNLI-dev_dot_f1_threshold": 431.5047607421875, "eval_allNLI-dev_dot_precision": 0.3835978835978836, "eval_allNLI-dev_dot_recall": 0.838150289017341, "eval_allNLI-dev_euclidean_accuracy": 0.705078125, "eval_allNLI-dev_euclidean_accuracy_threshold": 9.087307929992676, "eval_allNLI-dev_euclidean_ap": 0.5172051717681727, "eval_allNLI-dev_euclidean_f1": 0.5603112840466926, "eval_allNLI-dev_euclidean_f1_threshold": 13.293811798095703, "eval_allNLI-dev_euclidean_precision": 0.4222873900293255, "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, "eval_allNLI-dev_manhattan_accuracy": 0.701171875, "eval_allNLI-dev_manhattan_accuracy_threshold": 185.59756469726562, "eval_allNLI-dev_manhattan_ap": 0.5124717600689392, "eval_allNLI-dev_manhattan_f1": 0.5559999999999999, "eval_allNLI-dev_manhattan_f1_threshold": 259.2859802246094, "eval_allNLI-dev_manhattan_precision": 0.42507645259938837, "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, "eval_allNLI-dev_max_accuracy": 0.705078125, "eval_allNLI-dev_max_accuracy_threshold": 588.6370239257812, "eval_allNLI-dev_max_ap": 0.5172051717681727, "eval_allNLI-dev_max_f1": 0.5603112840466926, "eval_allNLI-dev_max_f1_threshold": 431.5047607421875, "eval_allNLI-dev_max_precision": 0.42857142857142855, "eval_allNLI-dev_max_recall": 0.838150289017341, "eval_sequential_score": 0.7020052347678023, "eval_sts-test_pearson_cosine": 0.6687245612921815, "eval_sts-test_pearson_dot": 0.5734739396427984, "eval_sts-test_pearson_euclidean": 0.6718719757622864, "eval_sts-test_pearson_manhattan": 0.6670343168111552, "eval_sts-test_pearson_max": 0.6718719757622864, "eval_sts-test_spearman_cosine": 0.6819400829060788, "eval_sts-test_spearman_dot": 0.556021400171074, "eval_sts-test_spearman_euclidean": 0.6684485621243225, "eval_sts-test_spearman_manhattan": 0.665065859027008, "eval_sts-test_spearman_max": 0.6819400829060788, "eval_vitaminc-pairs_loss": 2.8099753856658936, "eval_vitaminc-pairs_runtime": 3.1872, "eval_vitaminc-pairs_samples_per_second": 40.16, "eval_vitaminc-pairs_steps_per_second": 0.314, "step": 280 }, { "epoch": 0.2880658436213992, "eval_negation-triplets_loss": 2.1310224533081055, "eval_negation-triplets_runtime": 0.735, "eval_negation-triplets_samples_per_second": 174.146, "eval_negation-triplets_steps_per_second": 1.361, "step": 280 }, { "epoch": 0.2880658436213992, "eval_scitail-pairs-pos_loss": 0.33672308921813965, "eval_scitail-pairs-pos_runtime": 0.7788, "eval_scitail-pairs-pos_samples_per_second": 164.351, "eval_scitail-pairs-pos_steps_per_second": 1.284, "step": 280 }, { "epoch": 0.2880658436213992, "eval_scitail-pairs-qa_loss": 0.10085483640432358, "eval_scitail-pairs-qa_runtime": 0.5632, "eval_scitail-pairs-qa_samples_per_second": 227.27, "eval_scitail-pairs-qa_steps_per_second": 1.776, "step": 280 }, { "epoch": 0.2880658436213992, "eval_xsum-pairs_loss": 1.8792424201965332, "eval_xsum-pairs_runtime": 3.0158, "eval_xsum-pairs_samples_per_second": 42.443, "eval_xsum-pairs_steps_per_second": 0.332, "step": 280 }, { "epoch": 0.2880658436213992, "eval_sciq_pairs_loss": 0.21897011995315552, "eval_sciq_pairs_runtime": 3.4085, "eval_sciq_pairs_samples_per_second": 37.553, "eval_sciq_pairs_steps_per_second": 0.293, "step": 280 }, { "epoch": 0.2880658436213992, "eval_qasc_pairs_loss": 1.3423388004302979, "eval_qasc_pairs_runtime": 0.5957, "eval_qasc_pairs_samples_per_second": 214.885, "eval_qasc_pairs_steps_per_second": 1.679, "step": 280 }, { "epoch": 0.2880658436213992, "eval_openbookqa_pairs_loss": 1.8888919353485107, "eval_openbookqa_pairs_runtime": 0.5744, "eval_openbookqa_pairs_samples_per_second": 222.832, "eval_openbookqa_pairs_steps_per_second": 1.741, "step": 280 }, { "epoch": 0.2880658436213992, "eval_msmarco_pairs_loss": 2.50892972946167, "eval_msmarco_pairs_runtime": 1.5144, "eval_msmarco_pairs_samples_per_second": 84.524, "eval_msmarco_pairs_steps_per_second": 0.66, "step": 280 }, { "epoch": 0.2880658436213992, "eval_nq_pairs_loss": 3.0089173316955566, "eval_nq_pairs_runtime": 2.9076, "eval_nq_pairs_samples_per_second": 44.022, "eval_nq_pairs_steps_per_second": 0.344, "step": 280 }, { "epoch": 0.2880658436213992, "eval_trivia_pairs_loss": 2.672011137008667, "eval_trivia_pairs_runtime": 3.4362, "eval_trivia_pairs_samples_per_second": 37.25, "eval_trivia_pairs_steps_per_second": 0.291, "step": 280 }, { "epoch": 0.2880658436213992, "eval_gooaq_pairs_loss": 2.007201671600342, "eval_gooaq_pairs_runtime": 0.9479, "eval_gooaq_pairs_samples_per_second": 135.03, "eval_gooaq_pairs_steps_per_second": 1.055, "step": 280 }, { "epoch": 0.2880658436213992, "eval_paws-pos_loss": 0.0831906795501709, "eval_paws-pos_runtime": 0.6861, "eval_paws-pos_samples_per_second": 186.568, "eval_paws-pos_steps_per_second": 1.458, "step": 280 }, { "epoch": 0.2880658436213992, "eval_global_dataset_loss": 1.250847578048706, "eval_global_dataset_runtime": 13.3637, "eval_global_dataset_samples_per_second": 31.129, "eval_global_dataset_steps_per_second": 0.299, "step": 280 }, { "epoch": 0.2890946502057613, "grad_norm": 11.982275009155273, "learning_rate": 1.0103842159916927e-05, "loss": 1.4147, "step": 281 }, { "epoch": 0.29012345679012347, "grad_norm": 4.2981367111206055, "learning_rate": 1.0140186915887849e-05, "loss": 0.2152, "step": 282 }, { "epoch": 0.2911522633744856, "grad_norm": 11.808545112609863, "learning_rate": 1.0176531671858773e-05, "loss": 1.3908, "step": 283 }, { "epoch": 0.29218106995884774, "grad_norm": 17.394630432128906, "learning_rate": 1.0212876427829699e-05, "loss": 2.6877, "step": 284 }, { "epoch": 0.2932098765432099, "grad_norm": 12.021160125732422, "learning_rate": 1.0249221183800621e-05, "loss": 1.1107, "step": 285 }, { "epoch": 0.294238683127572, "grad_norm": 21.536834716796875, "learning_rate": 1.0285565939771547e-05, "loss": 3.3415, "step": 286 }, { "epoch": 0.2952674897119342, "grad_norm": 13.677733421325684, "learning_rate": 1.032191069574247e-05, "loss": 1.6332, "step": 287 }, { "epoch": 0.2962962962962963, "grad_norm": 12.351741790771484, "learning_rate": 1.0358255451713395e-05, "loss": 2.5232, "step": 288 }, { "epoch": 0.29732510288065844, "grad_norm": 23.78185272216797, "learning_rate": 1.0394600207684319e-05, "loss": 4.1329, "step": 289 }, { "epoch": 0.29835390946502055, "grad_norm": 13.199095726013184, "learning_rate": 1.0430944963655244e-05, "loss": 1.3592, "step": 290 }, { "epoch": 0.2993827160493827, "grad_norm": 3.1243560314178467, "learning_rate": 1.0467289719626167e-05, "loss": 0.1327, "step": 291 }, { "epoch": 0.3004115226337449, "grad_norm": 14.93930721282959, "learning_rate": 1.050363447559709e-05, "loss": 2.2552, "step": 292 }, { "epoch": 0.301440329218107, "grad_norm": 13.743828773498535, "learning_rate": 1.0539979231568017e-05, "loss": 1.5245, "step": 293 }, { "epoch": 0.30246913580246915, "grad_norm": 14.517548561096191, "learning_rate": 1.0576323987538939e-05, "loss": 2.9299, "step": 294 }, { "epoch": 0.30349794238683125, "grad_norm": 12.055514335632324, "learning_rate": 1.0612668743509865e-05, "loss": 0.6583, "step": 295 }, { "epoch": 0.3045267489711934, "grad_norm": 15.698110580444336, "learning_rate": 1.0649013499480789e-05, "loss": 2.0637, "step": 296 }, { "epoch": 0.3055555555555556, "grad_norm": 19.17127227783203, "learning_rate": 1.0685358255451711e-05, "loss": 2.6771, "step": 297 }, { "epoch": 0.3065843621399177, "grad_norm": 13.811963081359863, "learning_rate": 1.0721703011422637e-05, "loss": 2.755, "step": 298 }, { "epoch": 0.30761316872427985, "grad_norm": 24.679113388061523, "learning_rate": 1.075804776739356e-05, "loss": 3.3638, "step": 299 }, { "epoch": 0.30864197530864196, "grad_norm": 10.830105781555176, "learning_rate": 1.0794392523364485e-05, "loss": 0.7053, "step": 300 }, { "epoch": 0.30864197530864196, "eval_Qnli-dev_cosine_accuracy": 0.66796875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8199188113212585, "eval_Qnli-dev_cosine_ap": 0.6995325123375109, "eval_Qnli-dev_cosine_f1": 0.6897689768976898, "eval_Qnli-dev_cosine_f1_threshold": 0.7571749091148376, "eval_Qnli-dev_cosine_precision": 0.5648648648648649, "eval_Qnli-dev_cosine_recall": 0.885593220338983, "eval_Qnli-dev_dot_accuracy": 0.62890625, "eval_Qnli-dev_dot_accuracy_threshold": 423.8924255371094, "eval_Qnli-dev_dot_ap": 0.5832842248431986, "eval_Qnli-dev_dot_f1": 0.6645865834633385, "eval_Qnli-dev_dot_f1_threshold": 364.68280029296875, "eval_Qnli-dev_dot_precision": 0.5259259259259259, "eval_Qnli-dev_dot_recall": 0.902542372881356, "eval_Qnli-dev_euclidean_accuracy": 0.689453125, "eval_Qnli-dev_euclidean_accuracy_threshold": 12.101522445678711, "eval_Qnli-dev_euclidean_ap": 0.7073708901870908, "eval_Qnli-dev_euclidean_f1": 0.6832061068702291, "eval_Qnli-dev_euclidean_f1_threshold": 14.215328216552734, "eval_Qnli-dev_euclidean_precision": 0.6215277777777778, "eval_Qnli-dev_euclidean_recall": 0.7584745762711864, "eval_Qnli-dev_manhattan_accuracy": 0.689453125, "eval_Qnli-dev_manhattan_accuracy_threshold": 253.76065063476562, "eval_Qnli-dev_manhattan_ap": 0.709921595260574, "eval_Qnli-dev_manhattan_f1": 0.6918032786885246, "eval_Qnli-dev_manhattan_f1_threshold": 316.2789306640625, "eval_Qnli-dev_manhattan_precision": 0.5641711229946524, "eval_Qnli-dev_manhattan_recall": 0.8940677966101694, "eval_Qnli-dev_max_accuracy": 0.689453125, "eval_Qnli-dev_max_accuracy_threshold": 423.8924255371094, "eval_Qnli-dev_max_ap": 0.709921595260574, "eval_Qnli-dev_max_f1": 0.6918032786885246, "eval_Qnli-dev_max_f1_threshold": 364.68280029296875, "eval_Qnli-dev_max_precision": 0.6215277777777778, "eval_Qnli-dev_max_recall": 0.902542372881356, "eval_allNLI-dev_cosine_accuracy": 0.701171875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9037463068962097, "eval_allNLI-dev_cosine_ap": 0.5128913377360289, "eval_allNLI-dev_cosine_f1": 0.5613305613305613, "eval_allNLI-dev_cosine_f1_threshold": 0.8335003852844238, "eval_allNLI-dev_cosine_precision": 0.4383116883116883, "eval_allNLI-dev_cosine_recall": 0.7803468208092486, "eval_allNLI-dev_dot_accuracy": 0.6640625, "eval_allNLI-dev_dot_accuracy_threshold": 571.2269897460938, "eval_allNLI-dev_dot_ap": 0.41426010345468745, "eval_allNLI-dev_dot_f1": 0.5328330206378986, "eval_allNLI-dev_dot_f1_threshold": 416.7803955078125, "eval_allNLI-dev_dot_precision": 0.39444444444444443, "eval_allNLI-dev_dot_recall": 0.8208092485549133, "eval_allNLI-dev_euclidean_accuracy": 0.70703125, "eval_allNLI-dev_euclidean_accuracy_threshold": 9.502985954284668, "eval_allNLI-dev_euclidean_ap": 0.5173963035988031, "eval_allNLI-dev_euclidean_f1": 0.563600782778865, "eval_allNLI-dev_euclidean_f1_threshold": 13.729564666748047, "eval_allNLI-dev_euclidean_precision": 0.4260355029585799, "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, "eval_allNLI-dev_manhattan_accuracy": 0.703125, "eval_allNLI-dev_manhattan_accuracy_threshold": 195.28097534179688, "eval_allNLI-dev_manhattan_ap": 0.5158767394143106, "eval_allNLI-dev_manhattan_f1": 0.5584158415841585, "eval_allNLI-dev_manhattan_f1_threshold": 274.1793212890625, "eval_allNLI-dev_manhattan_precision": 0.4246987951807229, "eval_allNLI-dev_manhattan_recall": 0.815028901734104, "eval_allNLI-dev_max_accuracy": 0.70703125, "eval_allNLI-dev_max_accuracy_threshold": 571.2269897460938, "eval_allNLI-dev_max_ap": 0.5173963035988031, "eval_allNLI-dev_max_f1": 0.563600782778865, "eval_allNLI-dev_max_f1_threshold": 416.7803955078125, "eval_allNLI-dev_max_precision": 0.4383116883116883, "eval_allNLI-dev_max_recall": 0.8323699421965318, "eval_sequential_score": 0.709921595260574, "eval_sts-test_pearson_cosine": 0.6984052089046398, "eval_sts-test_pearson_dot": 0.6135443185288071, "eval_sts-test_pearson_euclidean": 0.7020042331376551, "eval_sts-test_pearson_manhattan": 0.6988284227220897, "eval_sts-test_pearson_max": 0.7020042331376551, "eval_sts-test_spearman_cosine": 0.7071684275104347, "eval_sts-test_spearman_dot": 0.5928461838910447, "eval_sts-test_spearman_euclidean": 0.6956956808813058, "eval_sts-test_spearman_manhattan": 0.6929552031362194, "eval_sts-test_spearman_max": 0.7071684275104347, "eval_vitaminc-pairs_loss": 2.8123340606689453, "eval_vitaminc-pairs_runtime": 3.2467, "eval_vitaminc-pairs_samples_per_second": 39.424, "eval_vitaminc-pairs_steps_per_second": 0.308, "step": 300 }, { "epoch": 0.30864197530864196, "eval_negation-triplets_loss": 1.9351751804351807, "eval_negation-triplets_runtime": 0.7607, "eval_negation-triplets_samples_per_second": 168.261, "eval_negation-triplets_steps_per_second": 1.315, "step": 300 }, { "epoch": 0.30864197530864196, "eval_scitail-pairs-pos_loss": 0.314525842666626, "eval_scitail-pairs-pos_runtime": 0.834, "eval_scitail-pairs-pos_samples_per_second": 153.478, "eval_scitail-pairs-pos_steps_per_second": 1.199, "step": 300 }, { "epoch": 0.30864197530864196, "eval_scitail-pairs-qa_loss": 0.07327353954315186, "eval_scitail-pairs-qa_runtime": 0.645, "eval_scitail-pairs-qa_samples_per_second": 198.438, "eval_scitail-pairs-qa_steps_per_second": 1.55, "step": 300 }, { "epoch": 0.30864197530864196, "eval_xsum-pairs_loss": 1.707858920097351, "eval_xsum-pairs_runtime": 3.0454, "eval_xsum-pairs_samples_per_second": 42.031, "eval_xsum-pairs_steps_per_second": 0.328, "step": 300 }, { "epoch": 0.30864197530864196, "eval_sciq_pairs_loss": 0.19993656873703003, "eval_sciq_pairs_runtime": 3.4559, "eval_sciq_pairs_samples_per_second": 37.038, "eval_sciq_pairs_steps_per_second": 0.289, "step": 300 }, { "epoch": 0.30864197530864196, "eval_qasc_pairs_loss": 1.3199025392532349, "eval_qasc_pairs_runtime": 0.5957, "eval_qasc_pairs_samples_per_second": 214.886, "eval_qasc_pairs_steps_per_second": 1.679, "step": 300 }, { "epoch": 0.30864197530864196, "eval_openbookqa_pairs_loss": 1.777836799621582, "eval_openbookqa_pairs_runtime": 0.5685, "eval_openbookqa_pairs_samples_per_second": 225.137, "eval_openbookqa_pairs_steps_per_second": 1.759, "step": 300 }, { "epoch": 0.30864197530864196, "eval_msmarco_pairs_loss": 2.3514328002929688, "eval_msmarco_pairs_runtime": 1.5104, "eval_msmarco_pairs_samples_per_second": 84.746, "eval_msmarco_pairs_steps_per_second": 0.662, "step": 300 }, { "epoch": 0.30864197530864196, "eval_nq_pairs_loss": 2.9204494953155518, "eval_nq_pairs_runtime": 2.8927, "eval_nq_pairs_samples_per_second": 44.249, "eval_nq_pairs_steps_per_second": 0.346, "step": 300 }, { "epoch": 0.30864197530864196, "eval_trivia_pairs_loss": 2.5231525897979736, "eval_trivia_pairs_runtime": 3.4302, "eval_trivia_pairs_samples_per_second": 37.315, "eval_trivia_pairs_steps_per_second": 0.292, "step": 300 }, { "epoch": 0.30864197530864196, "eval_gooaq_pairs_loss": 1.7379323244094849, "eval_gooaq_pairs_runtime": 0.956, "eval_gooaq_pairs_samples_per_second": 133.895, "eval_gooaq_pairs_steps_per_second": 1.046, "step": 300 }, { "epoch": 0.30864197530864196, "eval_paws-pos_loss": 0.06269291788339615, "eval_paws-pos_runtime": 0.6964, "eval_paws-pos_samples_per_second": 183.815, "eval_paws-pos_steps_per_second": 1.436, "step": 300 }, { "epoch": 0.30864197530864196, "eval_global_dataset_loss": 1.1462713479995728, "eval_global_dataset_runtime": 13.4482, "eval_global_dataset_samples_per_second": 30.933, "eval_global_dataset_steps_per_second": 0.297, "step": 300 }, { "epoch": 0.3096707818930041, "grad_norm": 22.287065505981445, "learning_rate": 1.0830737279335409e-05, "loss": 3.1007, "step": 301 }, { "epoch": 0.31069958847736623, "grad_norm": 11.660481452941895, "learning_rate": 1.0867082035306334e-05, "loss": 1.0158, "step": 302 }, { "epoch": 0.3117283950617284, "grad_norm": 21.95073699951172, "learning_rate": 1.0903426791277257e-05, "loss": 2.8023, "step": 303 }, { "epoch": 0.31275720164609055, "grad_norm": 22.914106369018555, "learning_rate": 1.0939771547248182e-05, "loss": 3.0464, "step": 304 }, { "epoch": 0.31378600823045266, "grad_norm": 11.899238586425781, "learning_rate": 1.0976116303219106e-05, "loss": 1.1718, "step": 305 }, { "epoch": 0.3148148148148148, "grad_norm": 11.318571090698242, "learning_rate": 1.1012461059190029e-05, "loss": 1.0429, "step": 306 }, { "epoch": 0.31584362139917693, "grad_norm": 13.055448532104492, "learning_rate": 1.1048805815160954e-05, "loss": 1.3403, "step": 307 }, { "epoch": 0.3168724279835391, "grad_norm": 17.78560447692871, "learning_rate": 1.1085150571131879e-05, "loss": 2.5188, "step": 308 }, { "epoch": 0.31790123456790126, "grad_norm": 13.747932434082031, "learning_rate": 1.1121495327102803e-05, "loss": 1.3855, "step": 309 }, { "epoch": 0.31893004115226337, "grad_norm": 12.256226539611816, "learning_rate": 1.1157840083073727e-05, "loss": 1.1986, "step": 310 }, { "epoch": 0.31995884773662553, "grad_norm": 16.054819107055664, "learning_rate": 1.1194184839044652e-05, "loss": 2.2651, "step": 311 }, { "epoch": 0.32098765432098764, "grad_norm": 16.100807189941406, "learning_rate": 1.1230529595015575e-05, "loss": 2.5339, "step": 312 }, { "epoch": 0.3220164609053498, "grad_norm": 11.739322662353516, "learning_rate": 1.12668743509865e-05, "loss": 1.3687, "step": 313 }, { "epoch": 0.3230452674897119, "grad_norm": 13.98962116241455, "learning_rate": 1.1303219106957424e-05, "loss": 2.1529, "step": 314 }, { "epoch": 0.32407407407407407, "grad_norm": 16.564695358276367, "learning_rate": 1.1339563862928347e-05, "loss": 2.3677, "step": 315 }, { "epoch": 0.32510288065843623, "grad_norm": 18.29457664489746, "learning_rate": 1.1375908618899272e-05, "loss": 2.6956, "step": 316 }, { "epoch": 0.32613168724279834, "grad_norm": 17.147747039794922, "learning_rate": 1.1412253374870196e-05, "loss": 2.4821, "step": 317 }, { "epoch": 0.3271604938271605, "grad_norm": 11.078995704650879, "learning_rate": 1.144859813084112e-05, "loss": 1.2667, "step": 318 }, { "epoch": 0.3281893004115226, "grad_norm": 16.17939567565918, "learning_rate": 1.1484942886812044e-05, "loss": 2.4697, "step": 319 }, { "epoch": 0.3292181069958848, "grad_norm": 17.299062728881836, "learning_rate": 1.152128764278297e-05, "loss": 2.488, "step": 320 }, { "epoch": 0.3292181069958848, "eval_Qnli-dev_cosine_accuracy": 0.671875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8567001223564148, "eval_Qnli-dev_cosine_ap": 0.7064956495603609, "eval_Qnli-dev_cosine_f1": 0.6941580756013745, "eval_Qnli-dev_cosine_f1_threshold": 0.8039928674697876, "eval_Qnli-dev_cosine_precision": 0.5838150289017341, "eval_Qnli-dev_cosine_recall": 0.8559322033898306, "eval_Qnli-dev_dot_accuracy": 0.638671875, "eval_Qnli-dev_dot_accuracy_threshold": 439.119384765625, "eval_Qnli-dev_dot_ap": 0.5828187078602152, "eval_Qnli-dev_dot_f1": 0.6656534954407294, "eval_Qnli-dev_dot_f1_threshold": 394.38739013671875, "eval_Qnli-dev_dot_precision": 0.518957345971564, "eval_Qnli-dev_dot_recall": 0.9279661016949152, "eval_Qnli-dev_euclidean_accuracy": 0.689453125, "eval_Qnli-dev_euclidean_accuracy_threshold": 12.089064598083496, "eval_Qnli-dev_euclidean_ap": 0.7175927858077272, "eval_Qnli-dev_euclidean_f1": 0.6872964169381107, "eval_Qnli-dev_euclidean_f1_threshold": 15.139961242675781, "eval_Qnli-dev_euclidean_precision": 0.5582010582010583, "eval_Qnli-dev_euclidean_recall": 0.8940677966101694, "eval_Qnli-dev_manhattan_accuracy": 0.685546875, "eval_Qnli-dev_manhattan_accuracy_threshold": 247.93148803710938, "eval_Qnli-dev_manhattan_ap": 0.7165821170066472, "eval_Qnli-dev_manhattan_f1": 0.6962457337883959, "eval_Qnli-dev_manhattan_f1_threshold": 293.9665222167969, "eval_Qnli-dev_manhattan_precision": 0.5828571428571429, "eval_Qnli-dev_manhattan_recall": 0.864406779661017, "eval_Qnli-dev_max_accuracy": 0.689453125, "eval_Qnli-dev_max_accuracy_threshold": 439.119384765625, "eval_Qnli-dev_max_ap": 0.7175927858077272, "eval_Qnli-dev_max_f1": 0.6962457337883959, "eval_Qnli-dev_max_f1_threshold": 394.38739013671875, "eval_Qnli-dev_max_precision": 0.5838150289017341, "eval_Qnli-dev_max_recall": 0.9279661016949152, "eval_allNLI-dev_cosine_accuracy": 0.705078125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9175689816474915, "eval_allNLI-dev_cosine_ap": 0.5186646269211405, "eval_allNLI-dev_cosine_f1": 0.5641025641025641, "eval_allNLI-dev_cosine_f1_threshold": 0.8398832082748413, "eval_allNLI-dev_cosine_precision": 0.4281437125748503, "eval_allNLI-dev_cosine_recall": 0.8265895953757225, "eval_allNLI-dev_dot_accuracy": 0.662109375, "eval_allNLI-dev_dot_accuracy_threshold": 579.238037109375, "eval_allNLI-dev_dot_ap": 0.4102009424801914, "eval_allNLI-dev_dot_f1": 0.5353159851301116, "eval_allNLI-dev_dot_f1_threshold": 434.8841552734375, "eval_allNLI-dev_dot_precision": 0.39452054794520547, "eval_allNLI-dev_dot_recall": 0.8323699421965318, "eval_allNLI-dev_euclidean_accuracy": 0.705078125, "eval_allNLI-dev_euclidean_accuracy_threshold": 8.86550521850586, "eval_allNLI-dev_euclidean_ap": 0.521605930352391, "eval_allNLI-dev_euclidean_f1": 0.5725971370143149, "eval_allNLI-dev_euclidean_f1_threshold": 12.825733184814453, "eval_allNLI-dev_euclidean_precision": 0.4430379746835443, "eval_allNLI-dev_euclidean_recall": 0.8092485549132948, "eval_allNLI-dev_manhattan_accuracy": 0.697265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 182.48983764648438, "eval_allNLI-dev_manhattan_ap": 0.5205552000244451, "eval_allNLI-dev_manhattan_f1": 0.5609756097560975, "eval_allNLI-dev_manhattan_f1_threshold": 258.8172302246094, "eval_allNLI-dev_manhattan_precision": 0.43260188087774293, "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, "eval_allNLI-dev_max_accuracy": 0.705078125, "eval_allNLI-dev_max_accuracy_threshold": 579.238037109375, "eval_allNLI-dev_max_ap": 0.521605930352391, "eval_allNLI-dev_max_f1": 0.5725971370143149, "eval_allNLI-dev_max_f1_threshold": 434.8841552734375, "eval_allNLI-dev_max_precision": 0.4430379746835443, "eval_allNLI-dev_max_recall": 0.8323699421965318, "eval_sequential_score": 0.7175927858077272, "eval_sts-test_pearson_cosine": 0.7136972276507711, "eval_sts-test_pearson_dot": 0.6211895317774989, "eval_sts-test_pearson_euclidean": 0.7206975023993004, "eval_sts-test_pearson_manhattan": 0.7173500334495002, "eval_sts-test_pearson_max": 0.7206975023993004, "eval_sts-test_spearman_cosine": 0.7248006482549532, "eval_sts-test_spearman_dot": 0.6004732756237651, "eval_sts-test_spearman_euclidean": 0.7142591753753792, "eval_sts-test_spearman_manhattan": 0.709346784661184, "eval_sts-test_spearman_max": 0.7248006482549532, "eval_vitaminc-pairs_loss": 2.848787546157837, "eval_vitaminc-pairs_runtime": 3.1955, "eval_vitaminc-pairs_samples_per_second": 40.057, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 320 }, { "epoch": 0.3292181069958848, "eval_negation-triplets_loss": 1.9014121294021606, "eval_negation-triplets_runtime": 0.7402, "eval_negation-triplets_samples_per_second": 172.919, "eval_negation-triplets_steps_per_second": 1.351, "step": 320 }, { "epoch": 0.3292181069958848, "eval_scitail-pairs-pos_loss": 0.2929946184158325, "eval_scitail-pairs-pos_runtime": 0.8038, "eval_scitail-pairs-pos_samples_per_second": 159.243, "eval_scitail-pairs-pos_steps_per_second": 1.244, "step": 320 }, { "epoch": 0.3292181069958848, "eval_scitail-pairs-qa_loss": 0.05852370336651802, "eval_scitail-pairs-qa_runtime": 0.5794, "eval_scitail-pairs-qa_samples_per_second": 220.912, "eval_scitail-pairs-qa_steps_per_second": 1.726, "step": 320 }, { "epoch": 0.3292181069958848, "eval_xsum-pairs_loss": 1.6594665050506592, "eval_xsum-pairs_runtime": 3.0252, "eval_xsum-pairs_samples_per_second": 42.312, "eval_xsum-pairs_steps_per_second": 0.331, "step": 320 }, { "epoch": 0.3292181069958848, "eval_sciq_pairs_loss": 0.18286681175231934, "eval_sciq_pairs_runtime": 3.4208, "eval_sciq_pairs_samples_per_second": 37.419, "eval_sciq_pairs_steps_per_second": 0.292, "step": 320 }, { "epoch": 0.3292181069958848, "eval_qasc_pairs_loss": 1.2028858661651611, "eval_qasc_pairs_runtime": 0.5972, "eval_qasc_pairs_samples_per_second": 214.327, "eval_qasc_pairs_steps_per_second": 1.674, "step": 320 }, { "epoch": 0.3292181069958848, "eval_openbookqa_pairs_loss": 1.7855560779571533, "eval_openbookqa_pairs_runtime": 0.584, "eval_openbookqa_pairs_samples_per_second": 219.164, "eval_openbookqa_pairs_steps_per_second": 1.712, "step": 320 }, { "epoch": 0.3292181069958848, "eval_msmarco_pairs_loss": 2.167186975479126, "eval_msmarco_pairs_runtime": 1.5188, "eval_msmarco_pairs_samples_per_second": 84.278, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 320 }, { "epoch": 0.3292181069958848, "eval_nq_pairs_loss": 2.60081148147583, "eval_nq_pairs_runtime": 2.8945, "eval_nq_pairs_samples_per_second": 44.222, "eval_nq_pairs_steps_per_second": 0.345, "step": 320 }, { "epoch": 0.3292181069958848, "eval_trivia_pairs_loss": 2.2225193977355957, "eval_trivia_pairs_runtime": 3.4352, "eval_trivia_pairs_samples_per_second": 37.262, "eval_trivia_pairs_steps_per_second": 0.291, "step": 320 }, { "epoch": 0.3292181069958848, "eval_gooaq_pairs_loss": 1.6083563566207886, "eval_gooaq_pairs_runtime": 0.9378, "eval_gooaq_pairs_samples_per_second": 136.489, "eval_gooaq_pairs_steps_per_second": 1.066, "step": 320 }, { "epoch": 0.3292181069958848, "eval_paws-pos_loss": 0.06881627440452576, "eval_paws-pos_runtime": 0.6797, "eval_paws-pos_samples_per_second": 188.32, "eval_paws-pos_steps_per_second": 1.471, "step": 320 }, { "epoch": 0.3292181069958848, "eval_global_dataset_loss": 1.0821770429611206, "eval_global_dataset_runtime": 13.336, "eval_global_dataset_samples_per_second": 31.194, "eval_global_dataset_steps_per_second": 0.3, "step": 320 }, { "epoch": 0.33024691358024694, "grad_norm": 11.460700035095215, "learning_rate": 1.1557632398753892e-05, "loss": 1.1381, "step": 321 }, { "epoch": 0.33127572016460904, "grad_norm": 17.357484817504883, "learning_rate": 1.1593977154724816e-05, "loss": 1.74, "step": 322 }, { "epoch": 0.3323045267489712, "grad_norm": 11.96126937866211, "learning_rate": 1.1630321910695742e-05, "loss": 1.2228, "step": 323 }, { "epoch": 0.3333333333333333, "grad_norm": 11.488797187805176, "learning_rate": 1.1666666666666665e-05, "loss": 1.1824, "step": 324 }, { "epoch": 0.3343621399176955, "grad_norm": 9.767298698425293, "learning_rate": 1.170301142263759e-05, "loss": 0.9803, "step": 325 }, { "epoch": 0.33539094650205764, "grad_norm": 3.6967291831970215, "learning_rate": 1.1739356178608514e-05, "loss": 0.2092, "step": 326 }, { "epoch": 0.33641975308641975, "grad_norm": 15.058168411254883, "learning_rate": 1.1775700934579438e-05, "loss": 1.9639, "step": 327 }, { "epoch": 0.3374485596707819, "grad_norm": 13.334756851196289, "learning_rate": 1.1812045690550362e-05, "loss": 2.6183, "step": 328 }, { "epoch": 0.338477366255144, "grad_norm": 11.534461975097656, "learning_rate": 1.1848390446521288e-05, "loss": 1.2629, "step": 329 }, { "epoch": 0.3395061728395062, "grad_norm": 17.404090881347656, "learning_rate": 1.188473520249221e-05, "loss": 2.3154, "step": 330 }, { "epoch": 0.3405349794238683, "grad_norm": 21.002731323242188, "learning_rate": 1.1921079958463134e-05, "loss": 2.9437, "step": 331 }, { "epoch": 0.34156378600823045, "grad_norm": 3.4560208320617676, "learning_rate": 1.195742471443406e-05, "loss": 0.1481, "step": 332 }, { "epoch": 0.3425925925925926, "grad_norm": 16.013757705688477, "learning_rate": 1.1993769470404982e-05, "loss": 2.0438, "step": 333 }, { "epoch": 0.3436213991769547, "grad_norm": 12.2532377243042, "learning_rate": 1.2030114226375908e-05, "loss": 1.1147, "step": 334 }, { "epoch": 0.3446502057613169, "grad_norm": 18.853534698486328, "learning_rate": 1.2066458982346832e-05, "loss": 2.4102, "step": 335 }, { "epoch": 0.345679012345679, "grad_norm": 22.490856170654297, "learning_rate": 1.2102803738317756e-05, "loss": 3.1869, "step": 336 }, { "epoch": 0.34670781893004116, "grad_norm": 8.61929702758789, "learning_rate": 1.213914849428868e-05, "loss": 0.464, "step": 337 }, { "epoch": 0.3477366255144033, "grad_norm": 18.195655822753906, "learning_rate": 1.2175493250259606e-05, "loss": 2.3863, "step": 338 }, { "epoch": 0.3487654320987654, "grad_norm": 17.072919845581055, "learning_rate": 1.2211838006230528e-05, "loss": 2.1807, "step": 339 }, { "epoch": 0.3497942386831276, "grad_norm": 24.929763793945312, "learning_rate": 1.2248182762201452e-05, "loss": 2.6664, "step": 340 }, { "epoch": 0.3497942386831276, "eval_Qnli-dev_cosine_accuracy": 0.677734375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8400751352310181, "eval_Qnli-dev_cosine_ap": 0.7066658175282814, "eval_Qnli-dev_cosine_f1": 0.6932409012131717, "eval_Qnli-dev_cosine_f1_threshold": 0.7699183225631714, "eval_Qnli-dev_cosine_precision": 0.5865102639296188, "eval_Qnli-dev_cosine_recall": 0.847457627118644, "eval_Qnli-dev_dot_accuracy": 0.650390625, "eval_Qnli-dev_dot_accuracy_threshold": 403.3339538574219, "eval_Qnli-dev_dot_ap": 0.6108280098987123, "eval_Qnli-dev_dot_f1": 0.6724738675958188, "eval_Qnli-dev_dot_f1_threshold": 391.1240234375, "eval_Qnli-dev_dot_precision": 0.5710059171597633, "eval_Qnli-dev_dot_recall": 0.8177966101694916, "eval_Qnli-dev_euclidean_accuracy": 0.69140625, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.067573547363281, "eval_Qnli-dev_euclidean_ap": 0.7152312759485101, "eval_Qnli-dev_euclidean_f1": 0.6883561643835616, "eval_Qnli-dev_euclidean_f1_threshold": 15.466879844665527, "eval_Qnli-dev_euclidean_precision": 0.5775862068965517, "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, "eval_Qnli-dev_manhattan_accuracy": 0.689453125, "eval_Qnli-dev_manhattan_accuracy_threshold": 263.4106140136719, "eval_Qnli-dev_manhattan_ap": 0.7125157018628507, "eval_Qnli-dev_manhattan_f1": 0.6950596252129473, "eval_Qnli-dev_manhattan_f1_threshold": 310.44476318359375, "eval_Qnli-dev_manhattan_precision": 0.5811965811965812, "eval_Qnli-dev_manhattan_recall": 0.864406779661017, "eval_Qnli-dev_max_accuracy": 0.69140625, "eval_Qnli-dev_max_accuracy_threshold": 403.3339538574219, "eval_Qnli-dev_max_ap": 0.7152312759485101, "eval_Qnli-dev_max_f1": 0.6950596252129473, "eval_Qnli-dev_max_f1_threshold": 391.1240234375, "eval_Qnli-dev_max_precision": 0.5865102639296188, "eval_Qnli-dev_max_recall": 0.864406779661017, "eval_allNLI-dev_cosine_accuracy": 0.697265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9136956930160522, "eval_allNLI-dev_cosine_ap": 0.5195967953345066, "eval_allNLI-dev_cosine_f1": 0.5657370517928287, "eval_allNLI-dev_cosine_f1_threshold": 0.8129154443740845, "eval_allNLI-dev_cosine_precision": 0.4316109422492401, "eval_allNLI-dev_cosine_recall": 0.8208092485549133, "eval_allNLI-dev_dot_accuracy": 0.673828125, "eval_allNLI-dev_dot_accuracy_threshold": 493.11651611328125, "eval_allNLI-dev_dot_ap": 0.43280102596891956, "eval_allNLI-dev_dot_f1": 0.5358361774744028, "eval_allNLI-dev_dot_f1_threshold": 382.6061096191406, "eval_allNLI-dev_dot_precision": 0.3801452784503632, "eval_allNLI-dev_dot_recall": 0.9075144508670521, "eval_allNLI-dev_euclidean_accuracy": 0.701171875, "eval_allNLI-dev_euclidean_accuracy_threshold": 9.347522735595703, "eval_allNLI-dev_euclidean_ap": 0.5242211625716244, "eval_allNLI-dev_euclidean_f1": 0.5731958762886599, "eval_allNLI-dev_euclidean_f1_threshold": 13.492986679077148, "eval_allNLI-dev_euclidean_precision": 0.44551282051282054, "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, "eval_allNLI-dev_manhattan_accuracy": 0.6953125, "eval_allNLI-dev_manhattan_accuracy_threshold": 197.63035583496094, "eval_allNLI-dev_manhattan_ap": 0.5226190870456224, "eval_allNLI-dev_manhattan_f1": 0.570281124497992, "eval_allNLI-dev_manhattan_f1_threshold": 274.56903076171875, "eval_allNLI-dev_manhattan_precision": 0.4369230769230769, "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, "eval_allNLI-dev_max_accuracy": 0.701171875, "eval_allNLI-dev_max_accuracy_threshold": 493.11651611328125, "eval_allNLI-dev_max_ap": 0.5242211625716244, "eval_allNLI-dev_max_f1": 0.5731958762886599, "eval_allNLI-dev_max_f1_threshold": 382.6061096191406, "eval_allNLI-dev_max_precision": 0.44551282051282054, "eval_allNLI-dev_max_recall": 0.9075144508670521, "eval_sequential_score": 0.7152312759485101, "eval_sts-test_pearson_cosine": 0.7271689674461207, "eval_sts-test_pearson_dot": 0.6547412928047769, "eval_sts-test_pearson_euclidean": 0.7347019450426402, "eval_sts-test_pearson_manhattan": 0.7265236751697546, "eval_sts-test_pearson_max": 0.7347019450426402, "eval_sts-test_spearman_cosine": 0.7360126740352158, "eval_sts-test_spearman_dot": 0.6379495482070717, "eval_sts-test_spearman_euclidean": 0.7249498937116934, "eval_sts-test_spearman_manhattan": 0.7182097719797506, "eval_sts-test_spearman_max": 0.7360126740352158, "eval_vitaminc-pairs_loss": 2.921300172805786, "eval_vitaminc-pairs_runtime": 3.1577, "eval_vitaminc-pairs_samples_per_second": 40.536, "eval_vitaminc-pairs_steps_per_second": 0.317, "step": 340 }, { "epoch": 0.3497942386831276, "eval_negation-triplets_loss": 1.800155758857727, "eval_negation-triplets_runtime": 0.73, "eval_negation-triplets_samples_per_second": 175.338, "eval_negation-triplets_steps_per_second": 1.37, "step": 340 }, { "epoch": 0.3497942386831276, "eval_scitail-pairs-pos_loss": 0.24910371005535126, "eval_scitail-pairs-pos_runtime": 0.7849, "eval_scitail-pairs-pos_samples_per_second": 163.07, "eval_scitail-pairs-pos_steps_per_second": 1.274, "step": 340 }, { "epoch": 0.3497942386831276, "eval_scitail-pairs-qa_loss": 0.03286855295300484, "eval_scitail-pairs-qa_runtime": 0.5749, "eval_scitail-pairs-qa_samples_per_second": 222.634, "eval_scitail-pairs-qa_steps_per_second": 1.739, "step": 340 }, { "epoch": 0.3497942386831276, "eval_xsum-pairs_loss": 1.4957503080368042, "eval_xsum-pairs_runtime": 3.0086, "eval_xsum-pairs_samples_per_second": 42.545, "eval_xsum-pairs_steps_per_second": 0.332, "step": 340 }, { "epoch": 0.3497942386831276, "eval_sciq_pairs_loss": 0.17354349792003632, "eval_sciq_pairs_runtime": 3.4138, "eval_sciq_pairs_samples_per_second": 37.494, "eval_sciq_pairs_steps_per_second": 0.293, "step": 340 }, { "epoch": 0.3497942386831276, "eval_qasc_pairs_loss": 1.0957040786743164, "eval_qasc_pairs_runtime": 0.5968, "eval_qasc_pairs_samples_per_second": 214.493, "eval_qasc_pairs_steps_per_second": 1.676, "step": 340 }, { "epoch": 0.3497942386831276, "eval_openbookqa_pairs_loss": 1.6939177513122559, "eval_openbookqa_pairs_runtime": 0.5747, "eval_openbookqa_pairs_samples_per_second": 222.741, "eval_openbookqa_pairs_steps_per_second": 1.74, "step": 340 }, { "epoch": 0.3497942386831276, "eval_msmarco_pairs_loss": 2.079050302505493, "eval_msmarco_pairs_runtime": 1.5095, "eval_msmarco_pairs_samples_per_second": 84.798, "eval_msmarco_pairs_steps_per_second": 0.662, "step": 340 }, { "epoch": 0.3497942386831276, "eval_nq_pairs_loss": 2.6143176555633545, "eval_nq_pairs_runtime": 2.8884, "eval_nq_pairs_samples_per_second": 44.314, "eval_nq_pairs_steps_per_second": 0.346, "step": 340 }, { "epoch": 0.3497942386831276, "eval_trivia_pairs_loss": 2.1714344024658203, "eval_trivia_pairs_runtime": 3.4339, "eval_trivia_pairs_samples_per_second": 37.275, "eval_trivia_pairs_steps_per_second": 0.291, "step": 340 }, { "epoch": 0.3497942386831276, "eval_gooaq_pairs_loss": 1.5302671194076538, "eval_gooaq_pairs_runtime": 0.9419, "eval_gooaq_pairs_samples_per_second": 135.901, "eval_gooaq_pairs_steps_per_second": 1.062, "step": 340 }, { "epoch": 0.3497942386831276, "eval_paws-pos_loss": 0.05603673681616783, "eval_paws-pos_runtime": 0.6803, "eval_paws-pos_samples_per_second": 188.158, "eval_paws-pos_steps_per_second": 1.47, "step": 340 }, { "epoch": 0.3497942386831276, "eval_global_dataset_loss": 1.029583215713501, "eval_global_dataset_runtime": 13.3497, "eval_global_dataset_samples_per_second": 31.162, "eval_global_dataset_steps_per_second": 0.3, "step": 340 }, { "epoch": 0.3508230452674897, "grad_norm": 13.51473331451416, "learning_rate": 1.2284527518172378e-05, "loss": 2.0315, "step": 341 }, { "epoch": 0.35185185185185186, "grad_norm": 13.707486152648926, "learning_rate": 1.23208722741433e-05, "loss": 2.025, "step": 342 }, { "epoch": 0.35288065843621397, "grad_norm": 14.628217697143555, "learning_rate": 1.2357217030114226e-05, "loss": 2.7561, "step": 343 }, { "epoch": 0.35390946502057613, "grad_norm": 16.766042709350586, "learning_rate": 1.239356178608515e-05, "loss": 2.2758, "step": 344 }, { "epoch": 0.3549382716049383, "grad_norm": 12.628337860107422, "learning_rate": 1.2429906542056072e-05, "loss": 1.2086, "step": 345 }, { "epoch": 0.3559670781893004, "grad_norm": 12.519665718078613, "learning_rate": 1.2466251298026998e-05, "loss": 1.1789, "step": 346 }, { "epoch": 0.35699588477366256, "grad_norm": 15.279417037963867, "learning_rate": 1.2502596053997922e-05, "loss": 2.0743, "step": 347 }, { "epoch": 0.35802469135802467, "grad_norm": 2.5973308086395264, "learning_rate": 1.2538940809968846e-05, "loss": 0.1388, "step": 348 }, { "epoch": 0.35905349794238683, "grad_norm": 11.533571243286133, "learning_rate": 1.257528556593977e-05, "loss": 1.007, "step": 349 }, { "epoch": 0.360082304526749, "grad_norm": 12.749200820922852, "learning_rate": 1.2611630321910696e-05, "loss": 1.0298, "step": 350 }, { "epoch": 0.3611111111111111, "grad_norm": 19.385435104370117, "learning_rate": 1.2647975077881618e-05, "loss": 2.3117, "step": 351 }, { "epoch": 0.36213991769547327, "grad_norm": 13.895768165588379, "learning_rate": 1.2684319833852544e-05, "loss": 1.1729, "step": 352 }, { "epoch": 0.3631687242798354, "grad_norm": 14.044111251831055, "learning_rate": 1.2720664589823468e-05, "loss": 1.8233, "step": 353 }, { "epoch": 0.36419753086419754, "grad_norm": 13.1631441116333, "learning_rate": 1.275700934579439e-05, "loss": 1.2639, "step": 354 }, { "epoch": 0.36522633744855965, "grad_norm": 12.106592178344727, "learning_rate": 1.2793354101765316e-05, "loss": 1.1652, "step": 355 }, { "epoch": 0.3662551440329218, "grad_norm": 12.02451229095459, "learning_rate": 1.282969885773624e-05, "loss": 1.1368, "step": 356 }, { "epoch": 0.36728395061728397, "grad_norm": 11.719282150268555, "learning_rate": 1.2866043613707164e-05, "loss": 1.122, "step": 357 }, { "epoch": 0.3683127572016461, "grad_norm": 12.295735359191895, "learning_rate": 1.2902388369678088e-05, "loss": 1.2503, "step": 358 }, { "epoch": 0.36934156378600824, "grad_norm": 12.906529426574707, "learning_rate": 1.2938733125649014e-05, "loss": 2.2025, "step": 359 }, { "epoch": 0.37037037037037035, "grad_norm": 11.613821983337402, "learning_rate": 1.2975077881619936e-05, "loss": 0.9197, "step": 360 }, { "epoch": 0.37037037037037035, "eval_Qnli-dev_cosine_accuracy": 0.67578125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8274970650672913, "eval_Qnli-dev_cosine_ap": 0.7107856399181414, "eval_Qnli-dev_cosine_f1": 0.6886446886446885, "eval_Qnli-dev_cosine_f1_threshold": 0.7976377010345459, "eval_Qnli-dev_cosine_precision": 0.6064516129032258, "eval_Qnli-dev_cosine_recall": 0.7966101694915254, "eval_Qnli-dev_dot_accuracy": 0.638671875, "eval_Qnli-dev_dot_accuracy_threshold": 421.1475830078125, "eval_Qnli-dev_dot_ap": 0.6026157605644281, "eval_Qnli-dev_dot_f1": 0.6688, "eval_Qnli-dev_dot_f1_threshold": 383.72686767578125, "eval_Qnli-dev_dot_precision": 0.5372750642673522, "eval_Qnli-dev_dot_recall": 0.885593220338983, "eval_Qnli-dev_euclidean_accuracy": 0.69140625, "eval_Qnli-dev_euclidean_accuracy_threshold": 12.509709358215332, "eval_Qnli-dev_euclidean_ap": 0.7205377940978139, "eval_Qnli-dev_euclidean_f1": 0.6884681583476765, "eval_Qnli-dev_euclidean_f1_threshold": 15.236268997192383, "eval_Qnli-dev_euclidean_precision": 0.5797101449275363, "eval_Qnli-dev_euclidean_recall": 0.847457627118644, "eval_Qnli-dev_manhattan_accuracy": 0.6875, "eval_Qnli-dev_manhattan_accuracy_threshold": 255.33294677734375, "eval_Qnli-dev_manhattan_ap": 0.7196447998198872, "eval_Qnli-dev_manhattan_f1": 0.6936026936026936, "eval_Qnli-dev_manhattan_f1_threshold": 311.228271484375, "eval_Qnli-dev_manhattan_precision": 0.5754189944134078, "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, "eval_Qnli-dev_max_accuracy": 0.69140625, "eval_Qnli-dev_max_accuracy_threshold": 421.1475830078125, "eval_Qnli-dev_max_ap": 0.7205377940978139, "eval_Qnli-dev_max_f1": 0.6936026936026936, "eval_Qnli-dev_max_f1_threshold": 383.72686767578125, "eval_Qnli-dev_max_precision": 0.6064516129032258, "eval_Qnli-dev_max_recall": 0.885593220338983, "eval_allNLI-dev_cosine_accuracy": 0.71484375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9123337864875793, "eval_allNLI-dev_cosine_ap": 0.5454586491658389, "eval_allNLI-dev_cosine_f1": 0.5625, "eval_allNLI-dev_cosine_f1_threshold": 0.8062101006507874, "eval_allNLI-dev_cosine_precision": 0.4247787610619469, "eval_allNLI-dev_cosine_recall": 0.8323699421965318, "eval_allNLI-dev_dot_accuracy": 0.673828125, "eval_allNLI-dev_dot_accuracy_threshold": 486.6114807128906, "eval_allNLI-dev_dot_ap": 0.45389873900128597, "eval_allNLI-dev_dot_f1": 0.5345132743362832, "eval_allNLI-dev_dot_f1_threshold": 395.1532897949219, "eval_allNLI-dev_dot_precision": 0.3852040816326531, "eval_allNLI-dev_dot_recall": 0.8728323699421965, "eval_allNLI-dev_euclidean_accuracy": 0.71484375, "eval_allNLI-dev_euclidean_accuracy_threshold": 9.256704330444336, "eval_allNLI-dev_euclidean_ap": 0.5468473795344413, "eval_allNLI-dev_euclidean_f1": 0.5690721649484537, "eval_allNLI-dev_euclidean_f1_threshold": 13.748187065124512, "eval_allNLI-dev_euclidean_precision": 0.4423076923076923, "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, "eval_allNLI-dev_manhattan_accuracy": 0.7109375, "eval_allNLI-dev_manhattan_accuracy_threshold": 186.86061096191406, "eval_allNLI-dev_manhattan_ap": 0.5447298734564391, "eval_allNLI-dev_manhattan_f1": 0.5643153526970954, "eval_allNLI-dev_manhattan_f1_threshold": 275.2322082519531, "eval_allNLI-dev_manhattan_precision": 0.4401294498381877, "eval_allNLI-dev_manhattan_recall": 0.7861271676300579, "eval_allNLI-dev_max_accuracy": 0.71484375, "eval_allNLI-dev_max_accuracy_threshold": 486.6114807128906, "eval_allNLI-dev_max_ap": 0.5468473795344413, "eval_allNLI-dev_max_f1": 0.5690721649484537, "eval_allNLI-dev_max_f1_threshold": 395.1532897949219, "eval_allNLI-dev_max_precision": 0.4423076923076923, "eval_allNLI-dev_max_recall": 0.8728323699421965, "eval_sequential_score": 0.7205377940978139, "eval_sts-test_pearson_cosine": 0.7499734901337567, "eval_sts-test_pearson_dot": 0.6944181989389289, "eval_sts-test_pearson_euclidean": 0.7581460883701887, "eval_sts-test_pearson_manhattan": 0.7494485428076179, "eval_sts-test_pearson_max": 0.7581460883701887, "eval_sts-test_spearman_cosine": 0.7613686939883212, "eval_sts-test_spearman_dot": 0.6777020937010519, "eval_sts-test_spearman_euclidean": 0.748221668416756, "eval_sts-test_spearman_manhattan": 0.7396471477291182, "eval_sts-test_spearman_max": 0.7613686939883212, "eval_vitaminc-pairs_loss": 2.936203718185425, "eval_vitaminc-pairs_runtime": 3.1741, "eval_vitaminc-pairs_samples_per_second": 40.326, "eval_vitaminc-pairs_steps_per_second": 0.315, "step": 360 }, { "epoch": 0.37037037037037035, "eval_negation-triplets_loss": 1.7166328430175781, "eval_negation-triplets_runtime": 0.7398, "eval_negation-triplets_samples_per_second": 173.027, "eval_negation-triplets_steps_per_second": 1.352, "step": 360 }, { "epoch": 0.37037037037037035, "eval_scitail-pairs-pos_loss": 0.24884574115276337, "eval_scitail-pairs-pos_runtime": 0.7997, "eval_scitail-pairs-pos_samples_per_second": 160.064, "eval_scitail-pairs-pos_steps_per_second": 1.25, "step": 360 }, { "epoch": 0.37037037037037035, "eval_scitail-pairs-qa_loss": 0.03187813237309456, "eval_scitail-pairs-qa_runtime": 0.5642, "eval_scitail-pairs-qa_samples_per_second": 226.873, "eval_scitail-pairs-qa_steps_per_second": 1.772, "step": 360 }, { "epoch": 0.37037037037037035, "eval_xsum-pairs_loss": 1.3931405544281006, "eval_xsum-pairs_runtime": 3.0204, "eval_xsum-pairs_samples_per_second": 42.378, "eval_xsum-pairs_steps_per_second": 0.331, "step": 360 }, { "epoch": 0.37037037037037035, "eval_sciq_pairs_loss": 0.16648775339126587, "eval_sciq_pairs_runtime": 3.3956, "eval_sciq_pairs_samples_per_second": 37.695, "eval_sciq_pairs_steps_per_second": 0.294, "step": 360 }, { "epoch": 0.37037037037037035, "eval_qasc_pairs_loss": 0.9755259156227112, "eval_qasc_pairs_runtime": 0.5919, "eval_qasc_pairs_samples_per_second": 216.262, "eval_qasc_pairs_steps_per_second": 1.69, "step": 360 }, { "epoch": 0.37037037037037035, "eval_openbookqa_pairs_loss": 1.6076797246932983, "eval_openbookqa_pairs_runtime": 0.5876, "eval_openbookqa_pairs_samples_per_second": 217.828, "eval_openbookqa_pairs_steps_per_second": 1.702, "step": 360 }, { "epoch": 0.37037037037037035, "eval_msmarco_pairs_loss": 2.0155866146087646, "eval_msmarco_pairs_runtime": 1.5287, "eval_msmarco_pairs_samples_per_second": 83.731, "eval_msmarco_pairs_steps_per_second": 0.654, "step": 360 }, { "epoch": 0.37037037037037035, "eval_nq_pairs_loss": 2.5515902042388916, "eval_nq_pairs_runtime": 2.8881, "eval_nq_pairs_samples_per_second": 44.319, "eval_nq_pairs_steps_per_second": 0.346, "step": 360 }, { "epoch": 0.37037037037037035, "eval_trivia_pairs_loss": 1.8931869268417358, "eval_trivia_pairs_runtime": 3.4335, "eval_trivia_pairs_samples_per_second": 37.28, "eval_trivia_pairs_steps_per_second": 0.291, "step": 360 }, { "epoch": 0.37037037037037035, "eval_gooaq_pairs_loss": 1.4498214721679688, "eval_gooaq_pairs_runtime": 0.9445, "eval_gooaq_pairs_samples_per_second": 135.517, "eval_gooaq_pairs_steps_per_second": 1.059, "step": 360 }, { "epoch": 0.37037037037037035, "eval_paws-pos_loss": 0.05163538083434105, "eval_paws-pos_runtime": 0.6805, "eval_paws-pos_samples_per_second": 188.1, "eval_paws-pos_steps_per_second": 1.47, "step": 360 }, { "epoch": 0.37037037037037035, "eval_global_dataset_loss": 0.9714978933334351, "eval_global_dataset_runtime": 13.3457, "eval_global_dataset_samples_per_second": 31.171, "eval_global_dataset_steps_per_second": 0.3, "step": 360 }, { "epoch": 0.3713991769547325, "grad_norm": 8.986026763916016, "learning_rate": 1.3011422637590862e-05, "loss": 0.4565, "step": 361 }, { "epoch": 0.3724279835390947, "grad_norm": 10.55431079864502, "learning_rate": 1.3047767393561786e-05, "loss": 0.9519, "step": 362 }, { "epoch": 0.3734567901234568, "grad_norm": 22.92361831665039, "learning_rate": 1.3084112149532708e-05, "loss": 2.876, "step": 363 }, { "epoch": 0.37448559670781895, "grad_norm": 13.740486145019531, "learning_rate": 1.3120456905503634e-05, "loss": 1.5415, "step": 364 }, { "epoch": 0.37551440329218105, "grad_norm": 12.367791175842285, "learning_rate": 1.3156801661474558e-05, "loss": 1.017, "step": 365 }, { "epoch": 0.3765432098765432, "grad_norm": 13.127511978149414, "learning_rate": 1.3193146417445482e-05, "loss": 1.4156, "step": 366 }, { "epoch": 0.3775720164609053, "grad_norm": 24.860748291015625, "learning_rate": 1.3229491173416406e-05, "loss": 3.272, "step": 367 }, { "epoch": 0.3786008230452675, "grad_norm": 10.243440628051758, "learning_rate": 1.3265835929387332e-05, "loss": 0.8205, "step": 368 }, { "epoch": 0.37962962962962965, "grad_norm": 19.33579444885254, "learning_rate": 1.3302180685358254e-05, "loss": 2.7472, "step": 369 }, { "epoch": 0.38065843621399176, "grad_norm": 2.6139795780181885, "learning_rate": 1.3338525441329178e-05, "loss": 0.1251, "step": 370 }, { "epoch": 0.3816872427983539, "grad_norm": 13.30015754699707, "learning_rate": 1.3374870197300104e-05, "loss": 1.7165, "step": 371 }, { "epoch": 0.38271604938271603, "grad_norm": 15.629612922668457, "learning_rate": 1.3411214953271026e-05, "loss": 2.0682, "step": 372 }, { "epoch": 0.3837448559670782, "grad_norm": 2.9140241146087646, "learning_rate": 1.3447559709241952e-05, "loss": 0.1312, "step": 373 }, { "epoch": 0.38477366255144035, "grad_norm": 14.68766975402832, "learning_rate": 1.3483904465212876e-05, "loss": 1.473, "step": 374 }, { "epoch": 0.38580246913580246, "grad_norm": 11.001675605773926, "learning_rate": 1.35202492211838e-05, "loss": 0.9278, "step": 375 }, { "epoch": 0.3868312757201646, "grad_norm": 18.111684799194336, "learning_rate": 1.3556593977154724e-05, "loss": 1.9585, "step": 376 }, { "epoch": 0.38786008230452673, "grad_norm": 19.245637893676758, "learning_rate": 1.359293873312565e-05, "loss": 2.3601, "step": 377 }, { "epoch": 0.3888888888888889, "grad_norm": 11.811524391174316, "learning_rate": 1.3629283489096572e-05, "loss": 1.0816, "step": 378 }, { "epoch": 0.389917695473251, "grad_norm": 15.023236274719238, "learning_rate": 1.3665628245067496e-05, "loss": 2.3182, "step": 379 }, { "epoch": 0.39094650205761317, "grad_norm": 16.875747680664062, "learning_rate": 1.3701973001038421e-05, "loss": 2.0826, "step": 380 }, { "epoch": 0.39094650205761317, "eval_Qnli-dev_cosine_accuracy": 0.669921875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8552969098091125, "eval_Qnli-dev_cosine_ap": 0.7113127062240453, "eval_Qnli-dev_cosine_f1": 0.6916221033868093, "eval_Qnli-dev_cosine_f1_threshold": 0.781796932220459, "eval_Qnli-dev_cosine_precision": 0.5969230769230769, "eval_Qnli-dev_cosine_recall": 0.8220338983050848, "eval_Qnli-dev_dot_accuracy": 0.638671875, "eval_Qnli-dev_dot_accuracy_threshold": 427.3189697265625, "eval_Qnli-dev_dot_ap": 0.5994490036489998, "eval_Qnli-dev_dot_f1": 0.6635367762128326, "eval_Qnli-dev_dot_f1_threshold": 378.4907531738281, "eval_Qnli-dev_dot_precision": 0.5260545905707196, "eval_Qnli-dev_dot_recall": 0.8983050847457628, "eval_Qnli-dev_euclidean_accuracy": 0.677734375, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.223026275634766, "eval_Qnli-dev_euclidean_ap": 0.7220014624049731, "eval_Qnli-dev_euclidean_f1": 0.6837606837606838, "eval_Qnli-dev_euclidean_f1_threshold": 15.54850959777832, "eval_Qnli-dev_euclidean_precision": 0.5730659025787965, "eval_Qnli-dev_euclidean_recall": 0.847457627118644, "eval_Qnli-dev_manhattan_accuracy": 0.685546875, "eval_Qnli-dev_manhattan_accuracy_threshold": 273.4452209472656, "eval_Qnli-dev_manhattan_ap": 0.7213585266821472, "eval_Qnli-dev_manhattan_f1": 0.6920415224913494, "eval_Qnli-dev_manhattan_f1_threshold": 312.36419677734375, "eval_Qnli-dev_manhattan_precision": 0.5847953216374269, "eval_Qnli-dev_manhattan_recall": 0.847457627118644, "eval_Qnli-dev_max_accuracy": 0.685546875, "eval_Qnli-dev_max_accuracy_threshold": 427.3189697265625, "eval_Qnli-dev_max_ap": 0.7220014624049731, "eval_Qnli-dev_max_f1": 0.6920415224913494, "eval_Qnli-dev_max_f1_threshold": 378.4907531738281, "eval_Qnli-dev_max_precision": 0.5969230769230769, "eval_Qnli-dev_max_recall": 0.8983050847457628, "eval_allNLI-dev_cosine_accuracy": 0.712890625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9154093861579895, "eval_allNLI-dev_cosine_ap": 0.53447469371308, "eval_allNLI-dev_cosine_f1": 0.5582329317269077, "eval_allNLI-dev_cosine_f1_threshold": 0.8176555633544922, "eval_allNLI-dev_cosine_precision": 0.4276923076923077, "eval_allNLI-dev_cosine_recall": 0.8034682080924855, "eval_allNLI-dev_dot_accuracy": 0.6640625, "eval_allNLI-dev_dot_accuracy_threshold": 565.86181640625, "eval_allNLI-dev_dot_ap": 0.4315444199034263, "eval_allNLI-dev_dot_f1": 0.5361552028218695, "eval_allNLI-dev_dot_f1_threshold": 401.46759033203125, "eval_allNLI-dev_dot_precision": 0.38578680203045684, "eval_allNLI-dev_dot_recall": 0.8786127167630058, "eval_allNLI-dev_euclidean_accuracy": 0.708984375, "eval_allNLI-dev_euclidean_accuracy_threshold": 9.178808212280273, "eval_allNLI-dev_euclidean_ap": 0.5387071754068331, "eval_allNLI-dev_euclidean_f1": 0.5644171779141105, "eval_allNLI-dev_euclidean_f1_threshold": 13.749273300170898, "eval_allNLI-dev_euclidean_precision": 0.43670886075949367, "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, "eval_allNLI-dev_manhattan_accuracy": 0.708984375, "eval_allNLI-dev_manhattan_accuracy_threshold": 188.05450439453125, "eval_allNLI-dev_manhattan_ap": 0.536761796295294, "eval_allNLI-dev_manhattan_f1": 0.5641025641025641, "eval_allNLI-dev_manhattan_f1_threshold": 269.14678955078125, "eval_allNLI-dev_manhattan_precision": 0.44745762711864406, "eval_allNLI-dev_manhattan_recall": 0.7630057803468208, "eval_allNLI-dev_max_accuracy": 0.712890625, "eval_allNLI-dev_max_accuracy_threshold": 565.86181640625, "eval_allNLI-dev_max_ap": 0.5387071754068331, "eval_allNLI-dev_max_f1": 0.5644171779141105, "eval_allNLI-dev_max_f1_threshold": 401.46759033203125, "eval_allNLI-dev_max_precision": 0.44745762711864406, "eval_allNLI-dev_max_recall": 0.8786127167630058, "eval_sequential_score": 0.7220014624049731, "eval_sts-test_pearson_cosine": 0.751982795791559, "eval_sts-test_pearson_dot": 0.6746803146097003, "eval_sts-test_pearson_euclidean": 0.7662220636606787, "eval_sts-test_pearson_manhattan": 0.7605555708771703, "eval_sts-test_pearson_max": 0.7662220636606787, "eval_sts-test_spearman_cosine": 0.7683159693092451, "eval_sts-test_spearman_dot": 0.6569558116813516, "eval_sts-test_spearman_euclidean": 0.7569410489375177, "eval_sts-test_spearman_manhattan": 0.7506770806267793, "eval_sts-test_spearman_max": 0.7683159693092451, "eval_vitaminc-pairs_loss": 3.076357841491699, "eval_vitaminc-pairs_runtime": 3.1717, "eval_vitaminc-pairs_samples_per_second": 40.357, "eval_vitaminc-pairs_steps_per_second": 0.315, "step": 380 }, { "epoch": 0.39094650205761317, "eval_negation-triplets_loss": 1.6979268789291382, "eval_negation-triplets_runtime": 0.7352, "eval_negation-triplets_samples_per_second": 174.1, "eval_negation-triplets_steps_per_second": 1.36, "step": 380 }, { "epoch": 0.39094650205761317, "eval_scitail-pairs-pos_loss": 0.24082684516906738, "eval_scitail-pairs-pos_runtime": 0.7875, "eval_scitail-pairs-pos_samples_per_second": 162.532, "eval_scitail-pairs-pos_steps_per_second": 1.27, "step": 380 }, { "epoch": 0.39094650205761317, "eval_scitail-pairs-qa_loss": 0.027928592637181282, "eval_scitail-pairs-qa_runtime": 0.5645, "eval_scitail-pairs-qa_samples_per_second": 226.73, "eval_scitail-pairs-qa_steps_per_second": 1.771, "step": 380 }, { "epoch": 0.39094650205761317, "eval_xsum-pairs_loss": 1.2622292041778564, "eval_xsum-pairs_runtime": 3.0159, "eval_xsum-pairs_samples_per_second": 42.441, "eval_xsum-pairs_steps_per_second": 0.332, "step": 380 }, { "epoch": 0.39094650205761317, "eval_sciq_pairs_loss": 0.15188440680503845, "eval_sciq_pairs_runtime": 3.4598, "eval_sciq_pairs_samples_per_second": 36.996, "eval_sciq_pairs_steps_per_second": 0.289, "step": 380 }, { "epoch": 0.39094650205761317, "eval_qasc_pairs_loss": 1.0029363632202148, "eval_qasc_pairs_runtime": 0.6147, "eval_qasc_pairs_samples_per_second": 208.233, "eval_qasc_pairs_steps_per_second": 1.627, "step": 380 }, { "epoch": 0.39094650205761317, "eval_openbookqa_pairs_loss": 1.6912751197814941, "eval_openbookqa_pairs_runtime": 0.574, "eval_openbookqa_pairs_samples_per_second": 222.989, "eval_openbookqa_pairs_steps_per_second": 1.742, "step": 380 }, { "epoch": 0.39094650205761317, "eval_msmarco_pairs_loss": 1.9394227266311646, "eval_msmarco_pairs_runtime": 1.5149, "eval_msmarco_pairs_samples_per_second": 84.493, "eval_msmarco_pairs_steps_per_second": 0.66, "step": 380 }, { "epoch": 0.39094650205761317, "eval_nq_pairs_loss": 2.323462963104248, "eval_nq_pairs_runtime": 2.8969, "eval_nq_pairs_samples_per_second": 44.185, "eval_nq_pairs_steps_per_second": 0.345, "step": 380 }, { "epoch": 0.39094650205761317, "eval_trivia_pairs_loss": 1.7298884391784668, "eval_trivia_pairs_runtime": 3.4558, "eval_trivia_pairs_samples_per_second": 37.039, "eval_trivia_pairs_steps_per_second": 0.289, "step": 380 }, { "epoch": 0.39094650205761317, "eval_gooaq_pairs_loss": 1.3238028287887573, "eval_gooaq_pairs_runtime": 0.9462, "eval_gooaq_pairs_samples_per_second": 135.282, "eval_gooaq_pairs_steps_per_second": 1.057, "step": 380 }, { "epoch": 0.39094650205761317, "eval_paws-pos_loss": 0.04966222867369652, "eval_paws-pos_runtime": 0.68, "eval_paws-pos_samples_per_second": 188.239, "eval_paws-pos_steps_per_second": 1.471, "step": 380 }, { "epoch": 0.39094650205761317, "eval_global_dataset_loss": 0.941063642501831, "eval_global_dataset_runtime": 13.3724, "eval_global_dataset_samples_per_second": 31.109, "eval_global_dataset_steps_per_second": 0.299, "step": 380 }, { "epoch": 0.39197530864197533, "grad_norm": 7.882116317749023, "learning_rate": 1.3738317757009344e-05, "loss": 0.4021, "step": 381 }, { "epoch": 0.39300411522633744, "grad_norm": 11.462610244750977, "learning_rate": 1.377466251298027e-05, "loss": 1.0139, "step": 382 }, { "epoch": 0.3940329218106996, "grad_norm": 14.762428283691406, "learning_rate": 1.3811007268951194e-05, "loss": 1.4995, "step": 383 }, { "epoch": 0.3950617283950617, "grad_norm": 22.418067932128906, "learning_rate": 1.3847352024922118e-05, "loss": 2.4814, "step": 384 }, { "epoch": 0.39609053497942387, "grad_norm": 22.518835067749023, "learning_rate": 1.3883696780893042e-05, "loss": 2.715, "step": 385 }, { "epoch": 0.39711934156378603, "grad_norm": 16.43521499633789, "learning_rate": 1.3920041536863967e-05, "loss": 2.1024, "step": 386 }, { "epoch": 0.39814814814814814, "grad_norm": 11.84013557434082, "learning_rate": 1.395638629283489e-05, "loss": 0.9607, "step": 387 }, { "epoch": 0.3991769547325103, "grad_norm": 11.819180488586426, "learning_rate": 1.3992731048805814e-05, "loss": 0.9005, "step": 388 }, { "epoch": 0.4002057613168724, "grad_norm": 2.0303947925567627, "learning_rate": 1.402907580477674e-05, "loss": 0.0972, "step": 389 }, { "epoch": 0.4012345679012346, "grad_norm": 14.67570686340332, "learning_rate": 1.4065420560747662e-05, "loss": 1.7057, "step": 390 }, { "epoch": 0.4022633744855967, "grad_norm": 13.796504974365234, "learning_rate": 1.4101765316718587e-05, "loss": 2.1665, "step": 391 }, { "epoch": 0.40329218106995884, "grad_norm": 11.881211280822754, "learning_rate": 1.4138110072689511e-05, "loss": 1.2397, "step": 392 }, { "epoch": 0.404320987654321, "grad_norm": 14.59404182434082, "learning_rate": 1.4174454828660434e-05, "loss": 2.2046, "step": 393 }, { "epoch": 0.4053497942386831, "grad_norm": 2.643915891647339, "learning_rate": 1.421079958463136e-05, "loss": 0.1259, "step": 394 }, { "epoch": 0.4063786008230453, "grad_norm": 14.334051132202148, "learning_rate": 1.4247144340602283e-05, "loss": 1.5374, "step": 395 }, { "epoch": 0.4074074074074074, "grad_norm": 13.22938060760498, "learning_rate": 1.4283489096573207e-05, "loss": 1.0678, "step": 396 }, { "epoch": 0.40843621399176955, "grad_norm": 14.21168041229248, "learning_rate": 1.4319833852544132e-05, "loss": 1.6494, "step": 397 }, { "epoch": 0.4094650205761317, "grad_norm": 12.661002159118652, "learning_rate": 1.4356178608515057e-05, "loss": 1.8241, "step": 398 }, { "epoch": 0.4104938271604938, "grad_norm": 20.38874053955078, "learning_rate": 1.439252336448598e-05, "loss": 2.5338, "step": 399 }, { "epoch": 0.411522633744856, "grad_norm": 10.472373962402344, "learning_rate": 1.4428868120456905e-05, "loss": 0.728, "step": 400 }, { "epoch": 0.411522633744856, "eval_Qnli-dev_cosine_accuracy": 0.6640625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8612580299377441, "eval_Qnli-dev_cosine_ap": 0.7008833240002428, "eval_Qnli-dev_cosine_f1": 0.690391459074733, "eval_Qnli-dev_cosine_f1_threshold": 0.7775630950927734, "eval_Qnli-dev_cosine_precision": 0.5950920245398773, "eval_Qnli-dev_cosine_recall": 0.8220338983050848, "eval_Qnli-dev_dot_accuracy": 0.65625, "eval_Qnli-dev_dot_accuracy_threshold": 427.90985107421875, "eval_Qnli-dev_dot_ap": 0.5927778156562893, "eval_Qnli-dev_dot_f1": 0.662379421221865, "eval_Qnli-dev_dot_f1_threshold": 380.0942077636719, "eval_Qnli-dev_dot_precision": 0.533678756476684, "eval_Qnli-dev_dot_recall": 0.8728813559322034, "eval_Qnli-dev_euclidean_accuracy": 0.673828125, "eval_Qnli-dev_euclidean_accuracy_threshold": 12.01819133758545, "eval_Qnli-dev_euclidean_ap": 0.7113131862333142, "eval_Qnli-dev_euclidean_f1": 0.6801470588235294, "eval_Qnli-dev_euclidean_f1_threshold": 14.90008544921875, "eval_Qnli-dev_euclidean_precision": 0.6006493506493507, "eval_Qnli-dev_euclidean_recall": 0.7838983050847458, "eval_Qnli-dev_manhattan_accuracy": 0.673828125, "eval_Qnli-dev_manhattan_accuracy_threshold": 239.12254333496094, "eval_Qnli-dev_manhattan_ap": 0.7095040002502047, "eval_Qnli-dev_manhattan_f1": 0.6803418803418803, "eval_Qnli-dev_manhattan_f1_threshold": 318.11749267578125, "eval_Qnli-dev_manhattan_precision": 0.5702005730659025, "eval_Qnli-dev_manhattan_recall": 0.8432203389830508, "eval_Qnli-dev_max_accuracy": 0.673828125, "eval_Qnli-dev_max_accuracy_threshold": 427.90985107421875, "eval_Qnli-dev_max_ap": 0.7113131862333142, "eval_Qnli-dev_max_f1": 0.690391459074733, "eval_Qnli-dev_max_f1_threshold": 380.0942077636719, "eval_Qnli-dev_max_precision": 0.6006493506493507, "eval_Qnli-dev_max_recall": 0.8728813559322034, "eval_allNLI-dev_cosine_accuracy": 0.720703125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9071913957595825, "eval_allNLI-dev_cosine_ap": 0.5455260839546557, "eval_allNLI-dev_cosine_f1": 0.5627376425855514, "eval_allNLI-dev_cosine_f1_threshold": 0.7851958274841309, "eval_allNLI-dev_cosine_precision": 0.4192634560906516, "eval_allNLI-dev_cosine_recall": 0.8554913294797688, "eval_allNLI-dev_dot_accuracy": 0.673828125, "eval_allNLI-dev_dot_accuracy_threshold": 495.9515380859375, "eval_allNLI-dev_dot_ap": 0.4440619711184598, "eval_allNLI-dev_dot_f1": 0.5368248772504091, "eval_allNLI-dev_dot_f1_threshold": 365.49859619140625, "eval_allNLI-dev_dot_precision": 0.3744292237442922, "eval_allNLI-dev_dot_recall": 0.9479768786127167, "eval_allNLI-dev_euclidean_accuracy": 0.716796875, "eval_allNLI-dev_euclidean_accuracy_threshold": 9.831633567810059, "eval_allNLI-dev_euclidean_ap": 0.5498277088824723, "eval_allNLI-dev_euclidean_f1": 0.5628997867803838, "eval_allNLI-dev_euclidean_f1_threshold": 13.911539077758789, "eval_allNLI-dev_euclidean_precision": 0.44594594594594594, "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, "eval_allNLI-dev_manhattan_accuracy": 0.716796875, "eval_allNLI-dev_manhattan_accuracy_threshold": 193.09060668945312, "eval_allNLI-dev_manhattan_ap": 0.5499837592661042, "eval_allNLI-dev_manhattan_f1": 0.5671641791044776, "eval_allNLI-dev_manhattan_f1_threshold": 302.8914489746094, "eval_allNLI-dev_manhattan_precision": 0.418732782369146, "eval_allNLI-dev_manhattan_recall": 0.8786127167630058, "eval_allNLI-dev_max_accuracy": 0.720703125, "eval_allNLI-dev_max_accuracy_threshold": 495.9515380859375, "eval_allNLI-dev_max_ap": 0.5499837592661042, "eval_allNLI-dev_max_f1": 0.5671641791044776, "eval_allNLI-dev_max_f1_threshold": 365.49859619140625, "eval_allNLI-dev_max_precision": 0.44594594594594594, "eval_allNLI-dev_max_recall": 0.9479768786127167, "eval_sequential_score": 0.7113131862333142, "eval_sts-test_pearson_cosine": 0.7693717840654692, "eval_sts-test_pearson_dot": 0.7198031376100279, "eval_sts-test_pearson_euclidean": 0.783845420495406, "eval_sts-test_pearson_manhattan": 0.7789636540660673, "eval_sts-test_pearson_max": 0.783845420495406, "eval_sts-test_spearman_cosine": 0.7856816515409163, "eval_sts-test_spearman_dot": 0.6995584755108273, "eval_sts-test_spearman_euclidean": 0.7742835947670496, "eval_sts-test_spearman_manhattan": 0.766819666133362, "eval_sts-test_spearman_max": 0.7856816515409163, "eval_vitaminc-pairs_loss": 3.1967031955718994, "eval_vitaminc-pairs_runtime": 3.1694, "eval_vitaminc-pairs_samples_per_second": 40.387, "eval_vitaminc-pairs_steps_per_second": 0.316, "step": 400 }, { "epoch": 0.411522633744856, "eval_negation-triplets_loss": 1.6347670555114746, "eval_negation-triplets_runtime": 0.7366, "eval_negation-triplets_samples_per_second": 173.76, "eval_negation-triplets_steps_per_second": 1.358, "step": 400 }, { "epoch": 0.411522633744856, "eval_scitail-pairs-pos_loss": 0.2562161684036255, "eval_scitail-pairs-pos_runtime": 0.7943, "eval_scitail-pairs-pos_samples_per_second": 161.157, "eval_scitail-pairs-pos_steps_per_second": 1.259, "step": 400 }, { "epoch": 0.411522633744856, "eval_scitail-pairs-qa_loss": 0.026608988642692566, "eval_scitail-pairs-qa_runtime": 0.5671, "eval_scitail-pairs-qa_samples_per_second": 225.717, "eval_scitail-pairs-qa_steps_per_second": 1.763, "step": 400 }, { "epoch": 0.411522633744856, "eval_xsum-pairs_loss": 1.1463801860809326, "eval_xsum-pairs_runtime": 3.0137, "eval_xsum-pairs_samples_per_second": 42.472, "eval_xsum-pairs_steps_per_second": 0.332, "step": 400 }, { "epoch": 0.411522633744856, "eval_sciq_pairs_loss": 0.14410454034805298, "eval_sciq_pairs_runtime": 3.4298, "eval_sciq_pairs_samples_per_second": 37.319, "eval_sciq_pairs_steps_per_second": 0.292, "step": 400 }, { "epoch": 0.411522633744856, "eval_qasc_pairs_loss": 0.9308192729949951, "eval_qasc_pairs_runtime": 0.6033, "eval_qasc_pairs_samples_per_second": 212.183, "eval_qasc_pairs_steps_per_second": 1.658, "step": 400 }, { "epoch": 0.411522633744856, "eval_openbookqa_pairs_loss": 1.5551338195800781, "eval_openbookqa_pairs_runtime": 0.5715, "eval_openbookqa_pairs_samples_per_second": 223.986, "eval_openbookqa_pairs_steps_per_second": 1.75, "step": 400 }, { "epoch": 0.411522633744856, "eval_msmarco_pairs_loss": 1.754774570465088, "eval_msmarco_pairs_runtime": 1.5121, "eval_msmarco_pairs_samples_per_second": 84.651, "eval_msmarco_pairs_steps_per_second": 0.661, "step": 400 }, { "epoch": 0.411522633744856, "eval_nq_pairs_loss": 2.163573980331421, "eval_nq_pairs_runtime": 2.8927, "eval_nq_pairs_samples_per_second": 44.249, "eval_nq_pairs_steps_per_second": 0.346, "step": 400 }, { "epoch": 0.411522633744856, "eval_trivia_pairs_loss": 1.6104655265808105, "eval_trivia_pairs_runtime": 3.4302, "eval_trivia_pairs_samples_per_second": 37.316, "eval_trivia_pairs_steps_per_second": 0.292, "step": 400 }, { "epoch": 0.411522633744856, "eval_gooaq_pairs_loss": 1.2485657930374146, "eval_gooaq_pairs_runtime": 0.9371, "eval_gooaq_pairs_samples_per_second": 136.585, "eval_gooaq_pairs_steps_per_second": 1.067, "step": 400 }, { "epoch": 0.411522633744856, "eval_paws-pos_loss": 0.04668813571333885, "eval_paws-pos_runtime": 0.6799, "eval_paws-pos_samples_per_second": 188.263, "eval_paws-pos_steps_per_second": 1.471, "step": 400 }, { "epoch": 0.411522633744856, "eval_global_dataset_loss": 0.9041730165481567, "eval_global_dataset_runtime": 13.3404, "eval_global_dataset_samples_per_second": 31.183, "eval_global_dataset_steps_per_second": 0.3, "step": 400 }, { "epoch": 0.4125514403292181, "grad_norm": 12.87179183959961, "learning_rate": 1.446521287642783e-05, "loss": 1.6954, "step": 401 }, { "epoch": 0.41358024691358025, "grad_norm": 11.029813766479492, "learning_rate": 1.4501557632398752e-05, "loss": 0.8749, "step": 402 }, { "epoch": 0.41460905349794236, "grad_norm": 12.643345832824707, "learning_rate": 1.4537902388369677e-05, "loss": 1.6856, "step": 403 }, { "epoch": 0.4156378600823045, "grad_norm": 3.3279924392700195, "learning_rate": 1.4574247144340601e-05, "loss": 0.1254, "step": 404 }, { "epoch": 0.4166666666666667, "grad_norm": 10.091893196105957, "learning_rate": 1.4610591900311525e-05, "loss": 0.6986, "step": 405 }, { "epoch": 0.4176954732510288, "grad_norm": 18.337682723999023, "learning_rate": 1.464693665628245e-05, "loss": 2.179, "step": 406 }, { "epoch": 0.41872427983539096, "grad_norm": 19.924663543701172, "learning_rate": 1.4683281412253373e-05, "loss": 2.7119, "step": 407 }, { "epoch": 0.41975308641975306, "grad_norm": 11.52767562866211, "learning_rate": 1.4719626168224297e-05, "loss": 1.0071, "step": 408 }, { "epoch": 0.4207818930041152, "grad_norm": 13.5806884765625, "learning_rate": 1.4755970924195223e-05, "loss": 1.1775, "step": 409 }, { "epoch": 0.4218106995884774, "grad_norm": 12.70963191986084, "learning_rate": 1.4792315680166145e-05, "loss": 1.1221, "step": 410 }, { "epoch": 0.4228395061728395, "grad_norm": 15.391998291015625, "learning_rate": 1.482866043613707e-05, "loss": 2.146, "step": 411 }, { "epoch": 0.42386831275720166, "grad_norm": 13.149142265319824, "learning_rate": 1.4865005192107995e-05, "loss": 1.1117, "step": 412 }, { "epoch": 0.42489711934156377, "grad_norm": 12.455025672912598, "learning_rate": 1.4901349948078918e-05, "loss": 1.1356, "step": 413 }, { "epoch": 0.42592592592592593, "grad_norm": 11.366116523742676, "learning_rate": 1.4937694704049843e-05, "loss": 1.0309, "step": 414 }, { "epoch": 0.4269547325102881, "grad_norm": 18.122159957885742, "learning_rate": 1.4974039460020767e-05, "loss": 2.0714, "step": 415 }, { "epoch": 0.4279835390946502, "grad_norm": 15.322531700134277, "learning_rate": 1.501038421599169e-05, "loss": 2.4046, "step": 416 }, { "epoch": 0.42901234567901236, "grad_norm": 8.748584747314453, "learning_rate": 1.5046728971962615e-05, "loss": 0.3751, "step": 417 }, { "epoch": 0.43004115226337447, "grad_norm": 11.135249137878418, "learning_rate": 1.508307372793354e-05, "loss": 0.9194, "step": 418 }, { "epoch": 0.43106995884773663, "grad_norm": 2.534362554550171, "learning_rate": 1.5119418483904463e-05, "loss": 0.0876, "step": 419 }, { "epoch": 0.43209876543209874, "grad_norm": 11.441560745239258, "learning_rate": 1.5155763239875387e-05, "loss": 1.0256, "step": 420 }, { "epoch": 0.43209876543209874, "eval_Qnli-dev_cosine_accuracy": 0.669921875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.82877516746521, "eval_Qnli-dev_cosine_ap": 0.7039769311253984, "eval_Qnli-dev_cosine_f1": 0.6916524701873935, "eval_Qnli-dev_cosine_f1_threshold": 0.760260820388794, "eval_Qnli-dev_cosine_precision": 0.5783475783475783, "eval_Qnli-dev_cosine_recall": 0.8601694915254238, "eval_Qnli-dev_dot_accuracy": 0.65234375, "eval_Qnli-dev_dot_accuracy_threshold": 410.722412109375, "eval_Qnli-dev_dot_ap": 0.5998569073160949, "eval_Qnli-dev_dot_f1": 0.6757679180887372, "eval_Qnli-dev_dot_f1_threshold": 395.8144226074219, "eval_Qnli-dev_dot_precision": 0.5657142857142857, "eval_Qnli-dev_dot_recall": 0.8389830508474576, "eval_Qnli-dev_euclidean_accuracy": 0.67578125, "eval_Qnli-dev_euclidean_accuracy_threshold": 12.777875900268555, "eval_Qnli-dev_euclidean_ap": 0.7165015785234907, "eval_Qnli-dev_euclidean_f1": 0.6878306878306878, "eval_Qnli-dev_euclidean_f1_threshold": 15.445184707641602, "eval_Qnli-dev_euclidean_precision": 0.5891238670694864, "eval_Qnli-dev_euclidean_recall": 0.826271186440678, "eval_Qnli-dev_manhattan_accuracy": 0.6796875, "eval_Qnli-dev_manhattan_accuracy_threshold": 282.639892578125, "eval_Qnli-dev_manhattan_ap": 0.7163039161971565, "eval_Qnli-dev_manhattan_f1": 0.6854304635761589, "eval_Qnli-dev_manhattan_f1_threshold": 328.2414855957031, "eval_Qnli-dev_manhattan_precision": 0.5625, "eval_Qnli-dev_manhattan_recall": 0.8771186440677966, "eval_Qnli-dev_max_accuracy": 0.6796875, "eval_Qnli-dev_max_accuracy_threshold": 410.722412109375, "eval_Qnli-dev_max_ap": 0.7165015785234907, "eval_Qnli-dev_max_f1": 0.6916524701873935, "eval_Qnli-dev_max_f1_threshold": 395.8144226074219, "eval_Qnli-dev_max_precision": 0.5891238670694864, "eval_Qnli-dev_max_recall": 0.8771186440677966, "eval_allNLI-dev_cosine_accuracy": 0.712890625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9146419167518616, "eval_allNLI-dev_cosine_ap": 0.5483284911998406, "eval_allNLI-dev_cosine_f1": 0.5627376425855514, "eval_allNLI-dev_cosine_f1_threshold": 0.8004182577133179, "eval_allNLI-dev_cosine_precision": 0.4192634560906516, "eval_allNLI-dev_cosine_recall": 0.8554913294797688, "eval_allNLI-dev_dot_accuracy": 0.669921875, "eval_allNLI-dev_dot_accuracy_threshold": 488.09686279296875, "eval_allNLI-dev_dot_ap": 0.4365952351888237, "eval_allNLI-dev_dot_f1": 0.5369774919614149, "eval_allNLI-dev_dot_f1_threshold": 376.2415466308594, "eval_allNLI-dev_dot_precision": 0.37193763919821826, "eval_allNLI-dev_dot_recall": 0.9653179190751445, "eval_allNLI-dev_euclidean_accuracy": 0.716796875, "eval_allNLI-dev_euclidean_accuracy_threshold": 9.584402084350586, "eval_allNLI-dev_euclidean_ap": 0.552898886980593, "eval_allNLI-dev_euclidean_f1": 0.5714285714285714, "eval_allNLI-dev_euclidean_f1_threshold": 12.487444877624512, "eval_allNLI-dev_euclidean_precision": 0.4978540772532189, "eval_allNLI-dev_euclidean_recall": 0.6705202312138728, "eval_allNLI-dev_manhattan_accuracy": 0.71484375, "eval_allNLI-dev_manhattan_accuracy_threshold": 189.19187927246094, "eval_allNLI-dev_manhattan_ap": 0.5494702966109603, "eval_allNLI-dev_manhattan_f1": 0.5689655172413792, "eval_allNLI-dev_manhattan_f1_threshold": 273.392578125, "eval_allNLI-dev_manhattan_precision": 0.4536082474226804, "eval_allNLI-dev_manhattan_recall": 0.7630057803468208, "eval_allNLI-dev_max_accuracy": 0.716796875, "eval_allNLI-dev_max_accuracy_threshold": 488.09686279296875, "eval_allNLI-dev_max_ap": 0.552898886980593, "eval_allNLI-dev_max_f1": 0.5714285714285714, "eval_allNLI-dev_max_f1_threshold": 376.2415466308594, "eval_allNLI-dev_max_precision": 0.4978540772532189, "eval_allNLI-dev_max_recall": 0.9653179190751445, "eval_sequential_score": 0.7165015785234907, "eval_sts-test_pearson_cosine": 0.7669433117508329, "eval_sts-test_pearson_dot": 0.7202911200663573, "eval_sts-test_pearson_euclidean": 0.7808069960273615, "eval_sts-test_pearson_manhattan": 0.7749037661158105, "eval_sts-test_pearson_max": 0.7808069960273615, "eval_sts-test_spearman_cosine": 0.7837364484620162, "eval_sts-test_spearman_dot": 0.6980170630634619, "eval_sts-test_spearman_euclidean": 0.7726895995875491, "eval_sts-test_spearman_manhattan": 0.7648159284045623, "eval_sts-test_spearman_max": 0.7837364484620162, "eval_vitaminc-pairs_loss": 3.1363868713378906, "eval_vitaminc-pairs_runtime": 3.1813, "eval_vitaminc-pairs_samples_per_second": 40.235, "eval_vitaminc-pairs_steps_per_second": 0.314, "step": 420 }, { "epoch": 0.43209876543209874, "eval_negation-triplets_loss": 1.596004843711853, "eval_negation-triplets_runtime": 0.7373, "eval_negation-triplets_samples_per_second": 173.6, "eval_negation-triplets_steps_per_second": 1.356, "step": 420 }, { "epoch": 0.43209876543209874, "eval_scitail-pairs-pos_loss": 0.25697287917137146, "eval_scitail-pairs-pos_runtime": 0.8101, "eval_scitail-pairs-pos_samples_per_second": 158.012, "eval_scitail-pairs-pos_steps_per_second": 1.234, "step": 420 }, { "epoch": 0.43209876543209874, "eval_scitail-pairs-qa_loss": 0.025879494845867157, "eval_scitail-pairs-qa_runtime": 0.5751, "eval_scitail-pairs-qa_samples_per_second": 222.558, "eval_scitail-pairs-qa_steps_per_second": 1.739, "step": 420 }, { "epoch": 0.43209876543209874, "eval_xsum-pairs_loss": 1.168808102607727, "eval_xsum-pairs_runtime": 3.0187, "eval_xsum-pairs_samples_per_second": 42.402, "eval_xsum-pairs_steps_per_second": 0.331, "step": 420 }, { "epoch": 0.43209876543209874, "eval_sciq_pairs_loss": 0.1468716710805893, "eval_sciq_pairs_runtime": 3.3982, "eval_sciq_pairs_samples_per_second": 37.667, "eval_sciq_pairs_steps_per_second": 0.294, "step": 420 }, { "epoch": 0.43209876543209874, "eval_qasc_pairs_loss": 0.9004628658294678, "eval_qasc_pairs_runtime": 0.6001, "eval_qasc_pairs_samples_per_second": 213.312, "eval_qasc_pairs_steps_per_second": 1.666, "step": 420 }, { "epoch": 0.43209876543209874, "eval_openbookqa_pairs_loss": 1.666839361190796, "eval_openbookqa_pairs_runtime": 0.572, "eval_openbookqa_pairs_samples_per_second": 223.786, "eval_openbookqa_pairs_steps_per_second": 1.748, "step": 420 }, { "epoch": 0.43209876543209874, "eval_msmarco_pairs_loss": 1.7236199378967285, "eval_msmarco_pairs_runtime": 1.524, "eval_msmarco_pairs_samples_per_second": 83.991, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 420 }, { "epoch": 0.43209876543209874, "eval_nq_pairs_loss": 2.0736727714538574, "eval_nq_pairs_runtime": 2.901, "eval_nq_pairs_samples_per_second": 44.123, "eval_nq_pairs_steps_per_second": 0.345, "step": 420 }, { "epoch": 0.43209876543209874, "eval_trivia_pairs_loss": 1.4644691944122314, "eval_trivia_pairs_runtime": 3.4467, "eval_trivia_pairs_samples_per_second": 37.137, "eval_trivia_pairs_steps_per_second": 0.29, "step": 420 }, { "epoch": 0.43209876543209874, "eval_gooaq_pairs_loss": 1.197310209274292, "eval_gooaq_pairs_runtime": 0.9409, "eval_gooaq_pairs_samples_per_second": 136.041, "eval_gooaq_pairs_steps_per_second": 1.063, "step": 420 }, { "epoch": 0.43209876543209874, "eval_paws-pos_loss": 0.04723614454269409, "eval_paws-pos_runtime": 0.6946, "eval_paws-pos_samples_per_second": 184.277, "eval_paws-pos_steps_per_second": 1.44, "step": 420 }, { "epoch": 0.43209876543209874, "eval_global_dataset_loss": 0.8761696219444275, "eval_global_dataset_runtime": 13.3489, "eval_global_dataset_samples_per_second": 31.164, "eval_global_dataset_steps_per_second": 0.3, "step": 420 }, { "epoch": 0.4331275720164609, "grad_norm": 11.209942817687988, "learning_rate": 1.5192107995846313e-05, "loss": 0.75, "step": 421 }, { "epoch": 0.43415637860082307, "grad_norm": 12.36668872833252, "learning_rate": 1.5228452751817235e-05, "loss": 1.0858, "step": 422 }, { "epoch": 0.4351851851851852, "grad_norm": 20.955825805664062, "learning_rate": 1.526479750778816e-05, "loss": 2.2561, "step": 423 }, { "epoch": 0.43621399176954734, "grad_norm": 6.835966110229492, "learning_rate": 1.5301142263759087e-05, "loss": 0.2822, "step": 424 }, { "epoch": 0.43724279835390945, "grad_norm": 14.905786514282227, "learning_rate": 1.5337487019730007e-05, "loss": 1.286, "step": 425 }, { "epoch": 0.4382716049382716, "grad_norm": 16.917980194091797, "learning_rate": 1.537383177570093e-05, "loss": 1.6777, "step": 426 }, { "epoch": 0.43930041152263377, "grad_norm": 16.51511573791504, "learning_rate": 1.541017653167186e-05, "loss": 1.609, "step": 427 }, { "epoch": 0.4403292181069959, "grad_norm": 12.164166450500488, "learning_rate": 1.544652128764278e-05, "loss": 0.8033, "step": 428 }, { "epoch": 0.44135802469135804, "grad_norm": 23.55919647216797, "learning_rate": 1.5482866043613707e-05, "loss": 3.845, "step": 429 }, { "epoch": 0.44238683127572015, "grad_norm": 12.458250999450684, "learning_rate": 1.551921079958463e-05, "loss": 1.0592, "step": 430 }, { "epoch": 0.4434156378600823, "grad_norm": 11.092578887939453, "learning_rate": 1.5555555555555555e-05, "loss": 0.7032, "step": 431 }, { "epoch": 0.4444444444444444, "grad_norm": 1.7349342107772827, "learning_rate": 1.559190031152648e-05, "loss": 0.0603, "step": 432 }, { "epoch": 0.4454732510288066, "grad_norm": 18.856273651123047, "learning_rate": 1.5628245067497403e-05, "loss": 2.1237, "step": 433 }, { "epoch": 0.44650205761316875, "grad_norm": 14.356558799743652, "learning_rate": 1.5664589823468327e-05, "loss": 1.3715, "step": 434 }, { "epoch": 0.44753086419753085, "grad_norm": 17.297388076782227, "learning_rate": 1.570093457943925e-05, "loss": 2.1316, "step": 435 }, { "epoch": 0.448559670781893, "grad_norm": 14.070610046386719, "learning_rate": 1.5737279335410175e-05, "loss": 1.2195, "step": 436 }, { "epoch": 0.4495884773662551, "grad_norm": 19.30897331237793, "learning_rate": 1.57736240913811e-05, "loss": 3.4846, "step": 437 }, { "epoch": 0.4506172839506173, "grad_norm": 14.064212799072266, "learning_rate": 1.5809968847352023e-05, "loss": 2.1621, "step": 438 }, { "epoch": 0.45164609053497945, "grad_norm": 21.56043815612793, "learning_rate": 1.5846313603322947e-05, "loss": 2.6488, "step": 439 }, { "epoch": 0.45267489711934156, "grad_norm": 13.354293823242188, "learning_rate": 1.588265835929387e-05, "loss": 1.1381, "step": 440 }, { "epoch": 0.45267489711934156, "eval_Qnli-dev_cosine_accuracy": 0.6796875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8328311443328857, "eval_Qnli-dev_cosine_ap": 0.7086015466946073, "eval_Qnli-dev_cosine_f1": 0.684297520661157, "eval_Qnli-dev_cosine_f1_threshold": 0.7668030858039856, "eval_Qnli-dev_cosine_precision": 0.5609756097560976, "eval_Qnli-dev_cosine_recall": 0.8771186440677966, "eval_Qnli-dev_dot_accuracy": 0.638671875, "eval_Qnli-dev_dot_accuracy_threshold": 458.50982666015625, "eval_Qnli-dev_dot_ap": 0.5955959473763655, "eval_Qnli-dev_dot_f1": 0.6643598615916955, "eval_Qnli-dev_dot_f1_threshold": 421.7713623046875, "eval_Qnli-dev_dot_precision": 0.5614035087719298, "eval_Qnli-dev_dot_recall": 0.8135593220338984, "eval_Qnli-dev_euclidean_accuracy": 0.681640625, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.273839950561523, "eval_Qnli-dev_euclidean_ap": 0.7198905948186887, "eval_Qnli-dev_euclidean_f1": 0.686106346483705, "eval_Qnli-dev_euclidean_f1_threshold": 15.542667388916016, "eval_Qnli-dev_euclidean_precision": 0.5763688760806917, "eval_Qnli-dev_euclidean_recall": 0.847457627118644, "eval_Qnli-dev_manhattan_accuracy": 0.67578125, "eval_Qnli-dev_manhattan_accuracy_threshold": 271.1224060058594, "eval_Qnli-dev_manhattan_ap": 0.7163103084602304, "eval_Qnli-dev_manhattan_f1": 0.689655172413793, "eval_Qnli-dev_manhattan_f1_threshold": 314.0755310058594, "eval_Qnli-dev_manhattan_precision": 0.5813953488372093, "eval_Qnli-dev_manhattan_recall": 0.847457627118644, "eval_Qnli-dev_max_accuracy": 0.681640625, "eval_Qnli-dev_max_accuracy_threshold": 458.50982666015625, "eval_Qnli-dev_max_ap": 0.7198905948186887, "eval_Qnli-dev_max_f1": 0.689655172413793, "eval_Qnli-dev_max_f1_threshold": 421.7713623046875, "eval_Qnli-dev_max_precision": 0.5813953488372093, "eval_Qnli-dev_max_recall": 0.8771186440677966, "eval_allNLI-dev_cosine_accuracy": 0.712890625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9271770715713501, "eval_allNLI-dev_cosine_ap": 0.5491311356331465, "eval_allNLI-dev_cosine_f1": 0.5708245243128964, "eval_allNLI-dev_cosine_f1_threshold": 0.8302508592605591, "eval_allNLI-dev_cosine_precision": 0.45, "eval_allNLI-dev_cosine_recall": 0.7803468208092486, "eval_allNLI-dev_dot_accuracy": 0.669921875, "eval_allNLI-dev_dot_accuracy_threshold": 526.2755126953125, "eval_allNLI-dev_dot_ap": 0.4493470756775462, "eval_allNLI-dev_dot_f1": 0.5302782324058919, "eval_allNLI-dev_dot_f1_threshold": 409.7859802246094, "eval_allNLI-dev_dot_precision": 0.3698630136986301, "eval_allNLI-dev_dot_recall": 0.9364161849710982, "eval_allNLI-dev_euclidean_accuracy": 0.712890625, "eval_allNLI-dev_euclidean_accuracy_threshold": 8.942924499511719, "eval_allNLI-dev_euclidean_ap": 0.5529455148329905, "eval_allNLI-dev_euclidean_f1": 0.5751633986928105, "eval_allNLI-dev_euclidean_f1_threshold": 13.387319564819336, "eval_allNLI-dev_euclidean_precision": 0.46153846153846156, "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, "eval_allNLI-dev_manhattan_accuracy": 0.71875, "eval_allNLI-dev_manhattan_accuracy_threshold": 201.3372039794922, "eval_allNLI-dev_manhattan_ap": 0.5535429305922366, "eval_allNLI-dev_manhattan_f1": 0.5675057208237987, "eval_allNLI-dev_manhattan_f1_threshold": 263.4776611328125, "eval_allNLI-dev_manhattan_precision": 0.4696969696969697, "eval_allNLI-dev_manhattan_recall": 0.7167630057803468, "eval_allNLI-dev_max_accuracy": 0.71875, "eval_allNLI-dev_max_accuracy_threshold": 526.2755126953125, "eval_allNLI-dev_max_ap": 0.5535429305922366, "eval_allNLI-dev_max_f1": 0.5751633986928105, "eval_allNLI-dev_max_f1_threshold": 409.7859802246094, "eval_allNLI-dev_max_precision": 0.4696969696969697, "eval_allNLI-dev_max_recall": 0.9364161849710982, "eval_sequential_score": 0.7198905948186887, "eval_sts-test_pearson_cosine": 0.7695750855183039, "eval_sts-test_pearson_dot": 0.7238414788807679, "eval_sts-test_pearson_euclidean": 0.7903775285225014, "eval_sts-test_pearson_manhattan": 0.7850145963227658, "eval_sts-test_pearson_max": 0.7903775285225014, "eval_sts-test_spearman_cosine": 0.7907903212875741, "eval_sts-test_spearman_dot": 0.7040900777418432, "eval_sts-test_spearman_euclidean": 0.7829523168599161, "eval_sts-test_spearman_manhattan": 0.7755189085864977, "eval_sts-test_spearman_max": 0.7907903212875741, "eval_vitaminc-pairs_loss": 3.1597630977630615, "eval_vitaminc-pairs_runtime": 3.1844, "eval_vitaminc-pairs_samples_per_second": 40.196, "eval_vitaminc-pairs_steps_per_second": 0.314, "step": 440 }, { "epoch": 0.45267489711934156, "eval_negation-triplets_loss": 1.5735217332839966, "eval_negation-triplets_runtime": 0.7333, "eval_negation-triplets_samples_per_second": 174.547, "eval_negation-triplets_steps_per_second": 1.364, "step": 440 }, { "epoch": 0.45267489711934156, "eval_scitail-pairs-pos_loss": 0.23437997698783875, "eval_scitail-pairs-pos_runtime": 0.8055, "eval_scitail-pairs-pos_samples_per_second": 158.909, "eval_scitail-pairs-pos_steps_per_second": 1.241, "step": 440 }, { "epoch": 0.45267489711934156, "eval_scitail-pairs-qa_loss": 0.026873519644141197, "eval_scitail-pairs-qa_runtime": 0.5709, "eval_scitail-pairs-qa_samples_per_second": 224.215, "eval_scitail-pairs-qa_steps_per_second": 1.752, "step": 440 }, { "epoch": 0.45267489711934156, "eval_xsum-pairs_loss": 1.0596333742141724, "eval_xsum-pairs_runtime": 3.0101, "eval_xsum-pairs_samples_per_second": 42.523, "eval_xsum-pairs_steps_per_second": 0.332, "step": 440 }, { "epoch": 0.45267489711934156, "eval_sciq_pairs_loss": 0.14231224358081818, "eval_sciq_pairs_runtime": 3.4147, "eval_sciq_pairs_samples_per_second": 37.485, "eval_sciq_pairs_steps_per_second": 0.293, "step": 440 }, { "epoch": 0.45267489711934156, "eval_qasc_pairs_loss": 0.8660905361175537, "eval_qasc_pairs_runtime": 0.5984, "eval_qasc_pairs_samples_per_second": 213.886, "eval_qasc_pairs_steps_per_second": 1.671, "step": 440 }, { "epoch": 0.45267489711934156, "eval_openbookqa_pairs_loss": 1.5507510900497437, "eval_openbookqa_pairs_runtime": 0.576, "eval_openbookqa_pairs_samples_per_second": 222.233, "eval_openbookqa_pairs_steps_per_second": 1.736, "step": 440 }, { "epoch": 0.45267489711934156, "eval_msmarco_pairs_loss": 1.6068974733352661, "eval_msmarco_pairs_runtime": 1.5129, "eval_msmarco_pairs_samples_per_second": 84.608, "eval_msmarco_pairs_steps_per_second": 0.661, "step": 440 }, { "epoch": 0.45267489711934156, "eval_nq_pairs_loss": 2.067472457885742, "eval_nq_pairs_runtime": 2.8922, "eval_nq_pairs_samples_per_second": 44.258, "eval_nq_pairs_steps_per_second": 0.346, "step": 440 }, { "epoch": 0.45267489711934156, "eval_trivia_pairs_loss": 1.4165655374526978, "eval_trivia_pairs_runtime": 3.4314, "eval_trivia_pairs_samples_per_second": 37.303, "eval_trivia_pairs_steps_per_second": 0.291, "step": 440 }, { "epoch": 0.45267489711934156, "eval_gooaq_pairs_loss": 1.204696536064148, "eval_gooaq_pairs_runtime": 0.9383, "eval_gooaq_pairs_samples_per_second": 136.423, "eval_gooaq_pairs_steps_per_second": 1.066, "step": 440 }, { "epoch": 0.45267489711934156, "eval_paws-pos_loss": 0.04588289558887482, "eval_paws-pos_runtime": 0.6831, "eval_paws-pos_samples_per_second": 187.395, "eval_paws-pos_steps_per_second": 1.464, "step": 440 }, { "epoch": 0.45267489711934156, "eval_global_dataset_loss": 0.8645310997962952, "eval_global_dataset_runtime": 13.3758, "eval_global_dataset_samples_per_second": 31.101, "eval_global_dataset_steps_per_second": 0.299, "step": 440 }, { "epoch": 0.4537037037037037, "grad_norm": 14.464020729064941, "learning_rate": 1.5919003115264795e-05, "loss": 1.7843, "step": 441 }, { "epoch": 0.4547325102880658, "grad_norm": 15.444217681884766, "learning_rate": 1.595534787123572e-05, "loss": 1.6954, "step": 442 }, { "epoch": 0.455761316872428, "grad_norm": 10.515376091003418, "learning_rate": 1.5991692627206643e-05, "loss": 0.8673, "step": 443 }, { "epoch": 0.4567901234567901, "grad_norm": 10.527128219604492, "learning_rate": 1.6028037383177567e-05, "loss": 0.8696, "step": 444 }, { "epoch": 0.45781893004115226, "grad_norm": 13.480452537536621, "learning_rate": 1.6064382139148495e-05, "loss": 1.5461, "step": 445 }, { "epoch": 0.4588477366255144, "grad_norm": 11.253717422485352, "learning_rate": 1.6100726895119415e-05, "loss": 0.9683, "step": 446 }, { "epoch": 0.45987654320987653, "grad_norm": 12.138679504394531, "learning_rate": 1.6137071651090343e-05, "loss": 1.2983, "step": 447 }, { "epoch": 0.4609053497942387, "grad_norm": 2.5345211029052734, "learning_rate": 1.6173416407061267e-05, "loss": 0.0942, "step": 448 }, { "epoch": 0.4619341563786008, "grad_norm": 10.980514526367188, "learning_rate": 1.6209761163032187e-05, "loss": 0.8264, "step": 449 }, { "epoch": 0.46296296296296297, "grad_norm": 16.59669303894043, "learning_rate": 1.6246105919003115e-05, "loss": 2.1522, "step": 450 }, { "epoch": 0.46399176954732513, "grad_norm": 21.501604080200195, "learning_rate": 1.628245067497404e-05, "loss": 2.6668, "step": 451 }, { "epoch": 0.46502057613168724, "grad_norm": 11.803515434265137, "learning_rate": 1.6318795430944963e-05, "loss": 0.9999, "step": 452 }, { "epoch": 0.4660493827160494, "grad_norm": 13.230558395385742, "learning_rate": 1.6355140186915887e-05, "loss": 0.9551, "step": 453 }, { "epoch": 0.4670781893004115, "grad_norm": 11.019618034362793, "learning_rate": 1.639148494288681e-05, "loss": 0.8174, "step": 454 }, { "epoch": 0.46810699588477367, "grad_norm": 14.335307121276855, "learning_rate": 1.6427829698857735e-05, "loss": 1.6169, "step": 455 }, { "epoch": 0.4691358024691358, "grad_norm": 5.958987236022949, "learning_rate": 1.646417445482866e-05, "loss": 0.2584, "step": 456 }, { "epoch": 0.47016460905349794, "grad_norm": 14.919219970703125, "learning_rate": 1.6500519210799583e-05, "loss": 1.2947, "step": 457 }, { "epoch": 0.4711934156378601, "grad_norm": 12.892438888549805, "learning_rate": 1.6536863966770507e-05, "loss": 1.0283, "step": 458 }, { "epoch": 0.4722222222222222, "grad_norm": 12.579314231872559, "learning_rate": 1.657320872274143e-05, "loss": 1.0379, "step": 459 }, { "epoch": 0.4732510288065844, "grad_norm": 17.32071876525879, "learning_rate": 1.6609553478712355e-05, "loss": 2.4063, "step": 460 }, { "epoch": 0.4732510288065844, "eval_Qnli-dev_cosine_accuracy": 0.66015625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8584603071212769, "eval_Qnli-dev_cosine_ap": 0.6987965309483122, "eval_Qnli-dev_cosine_f1": 0.6860068259385665, "eval_Qnli-dev_cosine_f1_threshold": 0.767835259437561, "eval_Qnli-dev_cosine_precision": 0.5742857142857143, "eval_Qnli-dev_cosine_recall": 0.8516949152542372, "eval_Qnli-dev_dot_accuracy": 0.64453125, "eval_Qnli-dev_dot_accuracy_threshold": 446.875, "eval_Qnli-dev_dot_ap": 0.5901482043145834, "eval_Qnli-dev_dot_f1": 0.6643109540636043, "eval_Qnli-dev_dot_f1_threshold": 406.9656982421875, "eval_Qnli-dev_dot_precision": 0.5696969696969697, "eval_Qnli-dev_dot_recall": 0.7966101694915254, "eval_Qnli-dev_euclidean_accuracy": 0.66796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 11.938894271850586, "eval_Qnli-dev_euclidean_ap": 0.7074711630770054, "eval_Qnli-dev_euclidean_f1": 0.6917808219178082, "eval_Qnli-dev_euclidean_f1_threshold": 15.646432876586914, "eval_Qnli-dev_euclidean_precision": 0.5804597701149425, "eval_Qnli-dev_euclidean_recall": 0.8559322033898306, "eval_Qnli-dev_manhattan_accuracy": 0.6640625, "eval_Qnli-dev_manhattan_accuracy_threshold": 258.43310546875, "eval_Qnli-dev_manhattan_ap": 0.7074831376971712, "eval_Qnli-dev_manhattan_f1": 0.6837606837606838, "eval_Qnli-dev_manhattan_f1_threshold": 317.3417053222656, "eval_Qnli-dev_manhattan_precision": 0.5730659025787965, "eval_Qnli-dev_manhattan_recall": 0.847457627118644, "eval_Qnli-dev_max_accuracy": 0.66796875, "eval_Qnli-dev_max_accuracy_threshold": 446.875, "eval_Qnli-dev_max_ap": 0.7074831376971712, "eval_Qnli-dev_max_f1": 0.6917808219178082, "eval_Qnli-dev_max_f1_threshold": 406.9656982421875, "eval_Qnli-dev_max_precision": 0.5804597701149425, "eval_Qnli-dev_max_recall": 0.8559322033898306, "eval_allNLI-dev_cosine_accuracy": 0.71484375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9075762033462524, "eval_allNLI-dev_cosine_ap": 0.5556209722685957, "eval_allNLI-dev_cosine_f1": 0.5688487584650113, "eval_allNLI-dev_cosine_f1_threshold": 0.822675347328186, "eval_allNLI-dev_cosine_precision": 0.4666666666666667, "eval_allNLI-dev_cosine_recall": 0.7283236994219653, "eval_allNLI-dev_dot_accuracy": 0.666015625, "eval_allNLI-dev_dot_accuracy_threshold": 479.59765625, "eval_allNLI-dev_dot_ap": 0.4395722804668881, "eval_allNLI-dev_dot_f1": 0.5365853658536586, "eval_allNLI-dev_dot_f1_threshold": 378.3732604980469, "eval_allNLI-dev_dot_precision": 0.38403990024937656, "eval_allNLI-dev_dot_recall": 0.8901734104046243, "eval_allNLI-dev_euclidean_accuracy": 0.7265625, "eval_allNLI-dev_euclidean_accuracy_threshold": 9.884578704833984, "eval_allNLI-dev_euclidean_ap": 0.5607678101966321, "eval_allNLI-dev_euclidean_f1": 0.5841121495327103, "eval_allNLI-dev_euclidean_f1_threshold": 13.222391128540039, "eval_allNLI-dev_euclidean_precision": 0.49019607843137253, "eval_allNLI-dev_euclidean_recall": 0.7225433526011561, "eval_allNLI-dev_manhattan_accuracy": 0.71875, "eval_allNLI-dev_manhattan_accuracy_threshold": 207.45559692382812, "eval_allNLI-dev_manhattan_ap": 0.5594530995989421, "eval_allNLI-dev_manhattan_f1": 0.5664062499999999, "eval_allNLI-dev_manhattan_f1_threshold": 298.21136474609375, "eval_allNLI-dev_manhattan_precision": 0.4277286135693215, "eval_allNLI-dev_manhattan_recall": 0.838150289017341, "eval_allNLI-dev_max_accuracy": 0.7265625, "eval_allNLI-dev_max_accuracy_threshold": 479.59765625, "eval_allNLI-dev_max_ap": 0.5607678101966321, "eval_allNLI-dev_max_f1": 0.5841121495327103, "eval_allNLI-dev_max_f1_threshold": 378.3732604980469, "eval_allNLI-dev_max_precision": 0.49019607843137253, "eval_allNLI-dev_max_recall": 0.8901734104046243, "eval_sequential_score": 0.7074831376971712, "eval_sts-test_pearson_cosine": 0.7825801667596759, "eval_sts-test_pearson_dot": 0.7405682776064579, "eval_sts-test_pearson_euclidean": 0.8045260928771718, "eval_sts-test_pearson_manhattan": 0.8000832846763656, "eval_sts-test_pearson_max": 0.8045260928771718, "eval_sts-test_spearman_cosine": 0.8065376636535482, "eval_sts-test_spearman_dot": 0.7210651262128288, "eval_sts-test_spearman_euclidean": 0.7970397901896217, "eval_sts-test_spearman_manhattan": 0.790139056180545, "eval_sts-test_spearman_max": 0.8065376636535482, "eval_vitaminc-pairs_loss": 3.132262945175171, "eval_vitaminc-pairs_runtime": 3.1567, "eval_vitaminc-pairs_samples_per_second": 40.548, "eval_vitaminc-pairs_steps_per_second": 0.317, "step": 460 }, { "epoch": 0.4732510288065844, "eval_negation-triplets_loss": 1.4925687313079834, "eval_negation-triplets_runtime": 0.7314, "eval_negation-triplets_samples_per_second": 175.004, "eval_negation-triplets_steps_per_second": 1.367, "step": 460 }, { "epoch": 0.4732510288065844, "eval_scitail-pairs-pos_loss": 0.20003551244735718, "eval_scitail-pairs-pos_runtime": 0.7903, "eval_scitail-pairs-pos_samples_per_second": 161.967, "eval_scitail-pairs-pos_steps_per_second": 1.265, "step": 460 }, { "epoch": 0.4732510288065844, "eval_scitail-pairs-qa_loss": 0.019925443455576897, "eval_scitail-pairs-qa_runtime": 0.5973, "eval_scitail-pairs-qa_samples_per_second": 214.291, "eval_scitail-pairs-qa_steps_per_second": 1.674, "step": 460 }, { "epoch": 0.4732510288065844, "eval_xsum-pairs_loss": 1.011654019355774, "eval_xsum-pairs_runtime": 3.0219, "eval_xsum-pairs_samples_per_second": 42.358, "eval_xsum-pairs_steps_per_second": 0.331, "step": 460 }, { "epoch": 0.4732510288065844, "eval_sciq_pairs_loss": 0.1401093304157257, "eval_sciq_pairs_runtime": 3.4024, "eval_sciq_pairs_samples_per_second": 37.621, "eval_sciq_pairs_steps_per_second": 0.294, "step": 460 }, { "epoch": 0.4732510288065844, "eval_qasc_pairs_loss": 0.8895432949066162, "eval_qasc_pairs_runtime": 0.5956, "eval_qasc_pairs_samples_per_second": 214.909, "eval_qasc_pairs_steps_per_second": 1.679, "step": 460 }, { "epoch": 0.4732510288065844, "eval_openbookqa_pairs_loss": 1.5750139951705933, "eval_openbookqa_pairs_runtime": 0.5749, "eval_openbookqa_pairs_samples_per_second": 222.663, "eval_openbookqa_pairs_steps_per_second": 1.74, "step": 460 }, { "epoch": 0.4732510288065844, "eval_msmarco_pairs_loss": 1.5957564115524292, "eval_msmarco_pairs_runtime": 1.5166, "eval_msmarco_pairs_samples_per_second": 84.401, "eval_msmarco_pairs_steps_per_second": 0.659, "step": 460 }, { "epoch": 0.4732510288065844, "eval_nq_pairs_loss": 1.8501969575881958, "eval_nq_pairs_runtime": 2.9017, "eval_nq_pairs_samples_per_second": 44.113, "eval_nq_pairs_steps_per_second": 0.345, "step": 460 }, { "epoch": 0.4732510288065844, "eval_trivia_pairs_loss": 1.3718889951705933, "eval_trivia_pairs_runtime": 3.4316, "eval_trivia_pairs_samples_per_second": 37.3, "eval_trivia_pairs_steps_per_second": 0.291, "step": 460 }, { "epoch": 0.4732510288065844, "eval_gooaq_pairs_loss": 1.0226097106933594, "eval_gooaq_pairs_runtime": 0.9449, "eval_gooaq_pairs_samples_per_second": 135.461, "eval_gooaq_pairs_steps_per_second": 1.058, "step": 460 }, { "epoch": 0.4732510288065844, "eval_paws-pos_loss": 0.04436105117201805, "eval_paws-pos_runtime": 0.6816, "eval_paws-pos_samples_per_second": 187.781, "eval_paws-pos_steps_per_second": 1.467, "step": 460 }, { "epoch": 0.4732510288065844, "eval_global_dataset_loss": 0.8342341184616089, "eval_global_dataset_runtime": 13.3662, "eval_global_dataset_samples_per_second": 31.123, "eval_global_dataset_steps_per_second": 0.299, "step": 460 }, { "epoch": 0.4742798353909465, "grad_norm": 20.145654678344727, "learning_rate": 1.664589823468328e-05, "loss": 3.1972, "step": 461 }, { "epoch": 0.47530864197530864, "grad_norm": 10.836761474609375, "learning_rate": 1.6682242990654203e-05, "loss": 0.6914, "step": 462 }, { "epoch": 0.4763374485596708, "grad_norm": 13.806187629699707, "learning_rate": 1.671858774662513e-05, "loss": 2.1495, "step": 463 }, { "epoch": 0.4773662551440329, "grad_norm": 15.314764022827148, "learning_rate": 1.675493250259605e-05, "loss": 1.9195, "step": 464 }, { "epoch": 0.4783950617283951, "grad_norm": 2.3654873371124268, "learning_rate": 1.6791277258566975e-05, "loss": 0.0819, "step": 465 }, { "epoch": 0.4794238683127572, "grad_norm": 6.35114860534668, "learning_rate": 1.6827622014537902e-05, "loss": 0.2882, "step": 466 }, { "epoch": 0.48045267489711935, "grad_norm": 13.575540542602539, "learning_rate": 1.6863966770508823e-05, "loss": 1.3187, "step": 467 }, { "epoch": 0.48148148148148145, "grad_norm": 13.726608276367188, "learning_rate": 1.690031152647975e-05, "loss": 2.0175, "step": 468 }, { "epoch": 0.4825102880658436, "grad_norm": 12.422574996948242, "learning_rate": 1.6936656282450674e-05, "loss": 1.1298, "step": 469 }, { "epoch": 0.4835390946502058, "grad_norm": 10.693941116333008, "learning_rate": 1.69730010384216e-05, "loss": 0.751, "step": 470 }, { "epoch": 0.4845679012345679, "grad_norm": 17.281755447387695, "learning_rate": 1.7009345794392523e-05, "loss": 1.7641, "step": 471 }, { "epoch": 0.48559670781893005, "grad_norm": 13.825311660766602, "learning_rate": 1.7045690550363447e-05, "loss": 1.2676, "step": 472 }, { "epoch": 0.48662551440329216, "grad_norm": 13.023504257202148, "learning_rate": 1.708203530633437e-05, "loss": 1.2802, "step": 473 }, { "epoch": 0.4876543209876543, "grad_norm": 6.976680755615234, "learning_rate": 1.7118380062305295e-05, "loss": 0.2798, "step": 474 }, { "epoch": 0.4886831275720165, "grad_norm": 12.474639892578125, "learning_rate": 1.715472481827622e-05, "loss": 1.786, "step": 475 }, { "epoch": 0.4897119341563786, "grad_norm": 11.611064910888672, "learning_rate": 1.7191069574247143e-05, "loss": 0.9421, "step": 476 }, { "epoch": 0.49074074074074076, "grad_norm": 17.27467155456543, "learning_rate": 1.7227414330218067e-05, "loss": 1.8988, "step": 477 }, { "epoch": 0.49176954732510286, "grad_norm": 11.986361503601074, "learning_rate": 1.726375908618899e-05, "loss": 1.0397, "step": 478 }, { "epoch": 0.492798353909465, "grad_norm": 19.697477340698242, "learning_rate": 1.7300103842159915e-05, "loss": 2.2289, "step": 479 }, { "epoch": 0.49382716049382713, "grad_norm": 11.965368270874023, "learning_rate": 1.733644859813084e-05, "loss": 0.8923, "step": 480 }, { "epoch": 0.49382716049382713, "eval_Qnli-dev_cosine_accuracy": 0.677734375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8423784971237183, "eval_Qnli-dev_cosine_ap": 0.7145917918948612, "eval_Qnli-dev_cosine_f1": 0.6890459363957597, "eval_Qnli-dev_cosine_f1_threshold": 0.7728449106216431, "eval_Qnli-dev_cosine_precision": 0.5909090909090909, "eval_Qnli-dev_cosine_recall": 0.826271186440678, "eval_Qnli-dev_dot_accuracy": 0.666015625, "eval_Qnli-dev_dot_accuracy_threshold": 412.736083984375, "eval_Qnli-dev_dot_ap": 0.6183902376998758, "eval_Qnli-dev_dot_f1": 0.673040152963671, "eval_Qnli-dev_dot_f1_threshold": 410.0682373046875, "eval_Qnli-dev_dot_precision": 0.6132404181184669, "eval_Qnli-dev_dot_recall": 0.7457627118644068, "eval_Qnli-dev_euclidean_accuracy": 0.6796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.48289966583252, "eval_Qnli-dev_euclidean_ap": 0.7218061519598871, "eval_Qnli-dev_euclidean_f1": 0.6889279437609841, "eval_Qnli-dev_euclidean_f1_threshold": 15.510814666748047, "eval_Qnli-dev_euclidean_precision": 0.5885885885885885, "eval_Qnli-dev_euclidean_recall": 0.8305084745762712, "eval_Qnli-dev_manhattan_accuracy": 0.6796875, "eval_Qnli-dev_manhattan_accuracy_threshold": 277.7557678222656, "eval_Qnli-dev_manhattan_ap": 0.7243976667802744, "eval_Qnli-dev_manhattan_f1": 0.6917808219178082, "eval_Qnli-dev_manhattan_f1_threshold": 320.653564453125, "eval_Qnli-dev_manhattan_precision": 0.5804597701149425, "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, "eval_Qnli-dev_max_accuracy": 0.6796875, "eval_Qnli-dev_max_accuracy_threshold": 412.736083984375, "eval_Qnli-dev_max_ap": 0.7243976667802744, "eval_Qnli-dev_max_f1": 0.6917808219178082, "eval_Qnli-dev_max_f1_threshold": 410.0682373046875, "eval_Qnli-dev_max_precision": 0.6132404181184669, "eval_Qnli-dev_max_recall": 0.8559322033898306, "eval_allNLI-dev_cosine_accuracy": 0.712890625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9155895113945007, "eval_allNLI-dev_cosine_ap": 0.5587323061807457, "eval_allNLI-dev_cosine_f1": 0.569377990430622, "eval_allNLI-dev_cosine_f1_threshold": 0.8306180238723755, "eval_allNLI-dev_cosine_precision": 0.4857142857142857, "eval_allNLI-dev_cosine_recall": 0.6878612716763006, "eval_allNLI-dev_dot_accuracy": 0.669921875, "eval_allNLI-dev_dot_accuracy_threshold": 486.188232421875, "eval_allNLI-dev_dot_ap": 0.44895440516126245, "eval_allNLI-dev_dot_f1": 0.5326633165829145, "eval_allNLI-dev_dot_f1_threshold": 373.0961608886719, "eval_allNLI-dev_dot_precision": 0.375, "eval_allNLI-dev_dot_recall": 0.9190751445086706, "eval_allNLI-dev_euclidean_accuracy": 0.720703125, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.15213680267334, "eval_allNLI-dev_euclidean_ap": 0.5670459225360986, "eval_allNLI-dev_euclidean_f1": 0.5797101449275361, "eval_allNLI-dev_euclidean_f1_threshold": 13.239068984985352, "eval_allNLI-dev_euclidean_precision": 0.4979253112033195, "eval_allNLI-dev_euclidean_recall": 0.6936416184971098, "eval_allNLI-dev_manhattan_accuracy": 0.720703125, "eval_allNLI-dev_manhattan_accuracy_threshold": 198.6392822265625, "eval_allNLI-dev_manhattan_ap": 0.5637178226555747, "eval_allNLI-dev_manhattan_f1": 0.569620253164557, "eval_allNLI-dev_manhattan_f1_threshold": 287.952392578125, "eval_allNLI-dev_manhattan_precision": 0.4485049833887043, "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, "eval_allNLI-dev_max_accuracy": 0.720703125, "eval_allNLI-dev_max_accuracy_threshold": 486.188232421875, "eval_allNLI-dev_max_ap": 0.5670459225360986, "eval_allNLI-dev_max_f1": 0.5797101449275361, "eval_allNLI-dev_max_f1_threshold": 373.0961608886719, "eval_allNLI-dev_max_precision": 0.4979253112033195, "eval_allNLI-dev_max_recall": 0.9190751445086706, "eval_sequential_score": 0.7243976667802744, "eval_sts-test_pearson_cosine": 0.7891034120839744, "eval_sts-test_pearson_dot": 0.7520122002590104, "eval_sts-test_pearson_euclidean": 0.8084749326758871, "eval_sts-test_pearson_manhattan": 0.8035797835971765, "eval_sts-test_pearson_max": 0.8084749326758871, "eval_sts-test_spearman_cosine": 0.8092891054576755, "eval_sts-test_spearman_dot": 0.729727493626578, "eval_sts-test_spearman_euclidean": 0.7991726353075358, "eval_sts-test_spearman_manhattan": 0.7930649384015762, "eval_sts-test_spearman_max": 0.8092891054576755, "eval_vitaminc-pairs_loss": 2.9393234252929688, "eval_vitaminc-pairs_runtime": 3.181, "eval_vitaminc-pairs_samples_per_second": 40.239, "eval_vitaminc-pairs_steps_per_second": 0.314, "step": 480 }, { "epoch": 0.49382716049382713, "eval_negation-triplets_loss": 1.4414068460464478, "eval_negation-triplets_runtime": 0.752, "eval_negation-triplets_samples_per_second": 170.205, "eval_negation-triplets_steps_per_second": 1.33, "step": 480 }, { "epoch": 0.49382716049382713, "eval_scitail-pairs-pos_loss": 0.19124868512153625, "eval_scitail-pairs-pos_runtime": 0.801, "eval_scitail-pairs-pos_samples_per_second": 159.801, "eval_scitail-pairs-pos_steps_per_second": 1.248, "step": 480 }, { "epoch": 0.49382716049382713, "eval_scitail-pairs-qa_loss": 0.015640273690223694, "eval_scitail-pairs-qa_runtime": 0.5674, "eval_scitail-pairs-qa_samples_per_second": 225.595, "eval_scitail-pairs-qa_steps_per_second": 1.762, "step": 480 }, { "epoch": 0.49382716049382713, "eval_xsum-pairs_loss": 0.9755306839942932, "eval_xsum-pairs_runtime": 3.0208, "eval_xsum-pairs_samples_per_second": 42.373, "eval_xsum-pairs_steps_per_second": 0.331, "step": 480 }, { "epoch": 0.49382716049382713, "eval_sciq_pairs_loss": 0.14197379350662231, "eval_sciq_pairs_runtime": 3.4128, "eval_sciq_pairs_samples_per_second": 37.506, "eval_sciq_pairs_steps_per_second": 0.293, "step": 480 }, { "epoch": 0.49382716049382713, "eval_qasc_pairs_loss": 0.8245877623558044, "eval_qasc_pairs_runtime": 0.6116, "eval_qasc_pairs_samples_per_second": 209.289, "eval_qasc_pairs_steps_per_second": 1.635, "step": 480 }, { "epoch": 0.49382716049382713, "eval_openbookqa_pairs_loss": 1.38233482837677, "eval_openbookqa_pairs_runtime": 0.5798, "eval_openbookqa_pairs_samples_per_second": 220.762, "eval_openbookqa_pairs_steps_per_second": 1.725, "step": 480 }, { "epoch": 0.49382716049382713, "eval_msmarco_pairs_loss": 1.583013653755188, "eval_msmarco_pairs_runtime": 1.5116, "eval_msmarco_pairs_samples_per_second": 84.681, "eval_msmarco_pairs_steps_per_second": 0.662, "step": 480 }, { "epoch": 0.49382716049382713, "eval_nq_pairs_loss": 1.8946471214294434, "eval_nq_pairs_runtime": 2.889, "eval_nq_pairs_samples_per_second": 44.307, "eval_nq_pairs_steps_per_second": 0.346, "step": 480 }, { "epoch": 0.49382716049382713, "eval_trivia_pairs_loss": 1.2537095546722412, "eval_trivia_pairs_runtime": 3.4426, "eval_trivia_pairs_samples_per_second": 37.181, "eval_trivia_pairs_steps_per_second": 0.29, "step": 480 }, { "epoch": 0.49382716049382713, "eval_gooaq_pairs_loss": 0.978269636631012, "eval_gooaq_pairs_runtime": 0.946, "eval_gooaq_pairs_samples_per_second": 135.311, "eval_gooaq_pairs_steps_per_second": 1.057, "step": 480 }, { "epoch": 0.49382716049382713, "eval_paws-pos_loss": 0.04379463195800781, "eval_paws-pos_runtime": 0.6992, "eval_paws-pos_samples_per_second": 183.062, "eval_paws-pos_steps_per_second": 1.43, "step": 480 }, { "epoch": 0.49382716049382713, "eval_global_dataset_loss": 0.784004807472229, "eval_global_dataset_runtime": 13.3627, "eval_global_dataset_samples_per_second": 31.131, "eval_global_dataset_steps_per_second": 0.299, "step": 480 }, { "epoch": 0.4948559670781893, "grad_norm": 15.596723556518555, "learning_rate": 1.7372793354101766e-05, "loss": 1.5281, "step": 481 }, { "epoch": 0.49588477366255146, "grad_norm": 11.775968551635742, "learning_rate": 1.7409138110072687e-05, "loss": 1.4874, "step": 482 }, { "epoch": 0.49691358024691357, "grad_norm": 10.956304550170898, "learning_rate": 1.744548286604361e-05, "loss": 1.3973, "step": 483 }, { "epoch": 0.49794238683127573, "grad_norm": 9.66591739654541, "learning_rate": 1.7481827622014538e-05, "loss": 0.6967, "step": 484 }, { "epoch": 0.49897119341563784, "grad_norm": 15.71474838256836, "learning_rate": 1.751817237798546e-05, "loss": 1.8954, "step": 485 }, { "epoch": 0.5, "grad_norm": 16.29734992980957, "learning_rate": 1.7554517133956383e-05, "loss": 2.1666, "step": 486 }, { "epoch": 0.5010288065843621, "grad_norm": 11.766134262084961, "learning_rate": 1.759086188992731e-05, "loss": 0.9414, "step": 487 }, { "epoch": 0.5020576131687243, "grad_norm": 19.231468200683594, "learning_rate": 1.762720664589823e-05, "loss": 2.1697, "step": 488 }, { "epoch": 0.5030864197530864, "grad_norm": 14.636868476867676, "learning_rate": 1.7663551401869155e-05, "loss": 2.2224, "step": 489 }, { "epoch": 0.5041152263374485, "grad_norm": 9.892867088317871, "learning_rate": 1.7699896157840082e-05, "loss": 0.7158, "step": 490 }, { "epoch": 0.5051440329218106, "grad_norm": 10.343125343322754, "learning_rate": 1.7736240913811006e-05, "loss": 0.8864, "step": 491 }, { "epoch": 0.5061728395061729, "grad_norm": 11.846784591674805, "learning_rate": 1.7772585669781927e-05, "loss": 1.7706, "step": 492 }, { "epoch": 0.507201646090535, "grad_norm": 11.437203407287598, "learning_rate": 1.7808930425752854e-05, "loss": 1.0602, "step": 493 }, { "epoch": 0.5082304526748971, "grad_norm": 12.174988746643066, "learning_rate": 1.784527518172378e-05, "loss": 1.6377, "step": 494 }, { "epoch": 0.5092592592592593, "grad_norm": 2.9324963092803955, "learning_rate": 1.78816199376947e-05, "loss": 0.1079, "step": 495 }, { "epoch": 0.5102880658436214, "grad_norm": 9.480378150939941, "learning_rate": 1.7917964693665626e-05, "loss": 0.7662, "step": 496 }, { "epoch": 0.5113168724279835, "grad_norm": 11.27574348449707, "learning_rate": 1.795430944963655e-05, "loss": 1.662, "step": 497 }, { "epoch": 0.5123456790123457, "grad_norm": 11.860407829284668, "learning_rate": 1.7990654205607474e-05, "loss": 2.0872, "step": 498 }, { "epoch": 0.5133744855967078, "grad_norm": 9.084991455078125, "learning_rate": 1.80269989615784e-05, "loss": 0.6517, "step": 499 }, { "epoch": 0.51440329218107, "grad_norm": 10.730901718139648, "learning_rate": 1.8063343717549322e-05, "loss": 0.8729, "step": 500 }, { "epoch": 0.51440329218107, "eval_Qnli-dev_cosine_accuracy": 0.69140625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8430161476135254, "eval_Qnli-dev_cosine_ap": 0.7175942339872103, "eval_Qnli-dev_cosine_f1": 0.6925795053003534, "eval_Qnli-dev_cosine_f1_threshold": 0.7885958552360535, "eval_Qnli-dev_cosine_precision": 0.593939393939394, "eval_Qnli-dev_cosine_recall": 0.8305084745762712, "eval_Qnli-dev_dot_accuracy": 0.654296875, "eval_Qnli-dev_dot_accuracy_threshold": 444.99591064453125, "eval_Qnli-dev_dot_ap": 0.6038099250184231, "eval_Qnli-dev_dot_f1": 0.6687797147385103, "eval_Qnli-dev_dot_f1_threshold": 398.5889892578125, "eval_Qnli-dev_dot_precision": 0.5341772151898734, "eval_Qnli-dev_dot_recall": 0.8940677966101694, "eval_Qnli-dev_euclidean_accuracy": 0.6953125, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.040979385375977, "eval_Qnli-dev_euclidean_ap": 0.7271286610454261, "eval_Qnli-dev_euclidean_f1": 0.6927175843694494, "eval_Qnli-dev_euclidean_f1_threshold": 15.024581909179688, "eval_Qnli-dev_euclidean_precision": 0.5963302752293578, "eval_Qnli-dev_euclidean_recall": 0.826271186440678, "eval_Qnli-dev_manhattan_accuracy": 0.697265625, "eval_Qnli-dev_manhattan_accuracy_threshold": 281.1022033691406, "eval_Qnli-dev_manhattan_ap": 0.728019969713725, "eval_Qnli-dev_manhattan_f1": 0.6934306569343065, "eval_Qnli-dev_manhattan_f1_threshold": 299.29119873046875, "eval_Qnli-dev_manhattan_precision": 0.6089743589743589, "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, "eval_Qnli-dev_max_accuracy": 0.697265625, "eval_Qnli-dev_max_accuracy_threshold": 444.99591064453125, "eval_Qnli-dev_max_ap": 0.728019969713725, "eval_Qnli-dev_max_f1": 0.6934306569343065, "eval_Qnli-dev_max_f1_threshold": 398.5889892578125, "eval_Qnli-dev_max_precision": 0.6089743589743589, "eval_Qnli-dev_max_recall": 0.8940677966101694, "eval_allNLI-dev_cosine_accuracy": 0.72265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8899899125099182, "eval_allNLI-dev_cosine_ap": 0.5657808168208326, "eval_allNLI-dev_cosine_f1": 0.5817409766454352, "eval_allNLI-dev_cosine_f1_threshold": 0.8096699714660645, "eval_allNLI-dev_cosine_precision": 0.4597315436241611, "eval_allNLI-dev_cosine_recall": 0.791907514450867, "eval_allNLI-dev_dot_accuracy": 0.67578125, "eval_allNLI-dev_dot_accuracy_threshold": 478.03387451171875, "eval_allNLI-dev_dot_ap": 0.4579389209157686, "eval_allNLI-dev_dot_f1": 0.5588822355289421, "eval_allNLI-dev_dot_f1_threshold": 411.92333984375, "eval_allNLI-dev_dot_precision": 0.4268292682926829, "eval_allNLI-dev_dot_recall": 0.8092485549132948, "eval_allNLI-dev_euclidean_accuracy": 0.73046875, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.441316604614258, "eval_allNLI-dev_euclidean_ap": 0.5691945577806491, "eval_allNLI-dev_euclidean_f1": 0.5872340425531914, "eval_allNLI-dev_euclidean_f1_threshold": 14.106014251708984, "eval_allNLI-dev_euclidean_precision": 0.46464646464646464, "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, "eval_allNLI-dev_manhattan_accuracy": 0.7265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 219.55010986328125, "eval_allNLI-dev_manhattan_ap": 0.5696159330415428, "eval_allNLI-dev_manhattan_f1": 0.5752808988764044, "eval_allNLI-dev_manhattan_f1_threshold": 278.9423828125, "eval_allNLI-dev_manhattan_precision": 0.47058823529411764, "eval_allNLI-dev_manhattan_recall": 0.7398843930635838, "eval_allNLI-dev_max_accuracy": 0.73046875, "eval_allNLI-dev_max_accuracy_threshold": 478.03387451171875, "eval_allNLI-dev_max_ap": 0.5696159330415428, "eval_allNLI-dev_max_f1": 0.5872340425531914, "eval_allNLI-dev_max_f1_threshold": 411.92333984375, "eval_allNLI-dev_max_precision": 0.47058823529411764, "eval_allNLI-dev_max_recall": 0.8092485549132948, "eval_sequential_score": 0.728019969713725, "eval_sts-test_pearson_cosine": 0.7967354149956867, "eval_sts-test_pearson_dot": 0.7587343105275375, "eval_sts-test_pearson_euclidean": 0.8180154478758743, "eval_sts-test_pearson_manhattan": 0.8161849279054585, "eval_sts-test_pearson_max": 0.8180154478758743, "eval_sts-test_spearman_cosine": 0.8158280702696641, "eval_sts-test_spearman_dot": 0.7368859501500076, "eval_sts-test_spearman_euclidean": 0.8091461699287915, "eval_sts-test_spearman_manhattan": 0.8057763999460191, "eval_sts-test_spearman_max": 0.8158280702696641, "eval_vitaminc-pairs_loss": 2.9438083171844482, "eval_vitaminc-pairs_runtime": 3.189, "eval_vitaminc-pairs_samples_per_second": 40.138, "eval_vitaminc-pairs_steps_per_second": 0.314, "step": 500 }, { "epoch": 0.51440329218107, "eval_negation-triplets_loss": 1.3407632112503052, "eval_negation-triplets_runtime": 0.7388, "eval_negation-triplets_samples_per_second": 173.246, "eval_negation-triplets_steps_per_second": 1.353, "step": 500 }, { "epoch": 0.51440329218107, "eval_scitail-pairs-pos_loss": 0.2115849405527115, "eval_scitail-pairs-pos_runtime": 0.7971, "eval_scitail-pairs-pos_samples_per_second": 160.573, "eval_scitail-pairs-pos_steps_per_second": 1.254, "step": 500 }, { "epoch": 0.51440329218107, "eval_scitail-pairs-qa_loss": 0.018660105764865875, "eval_scitail-pairs-qa_runtime": 0.5693, "eval_scitail-pairs-qa_samples_per_second": 224.852, "eval_scitail-pairs-qa_steps_per_second": 1.757, "step": 500 }, { "epoch": 0.51440329218107, "eval_xsum-pairs_loss": 0.9552733898162842, "eval_xsum-pairs_runtime": 3.0194, "eval_xsum-pairs_samples_per_second": 42.392, "eval_xsum-pairs_steps_per_second": 0.331, "step": 500 }, { "epoch": 0.51440329218107, "eval_sciq_pairs_loss": 0.13849374651908875, "eval_sciq_pairs_runtime": 3.458, "eval_sciq_pairs_samples_per_second": 37.015, "eval_sciq_pairs_steps_per_second": 0.289, "step": 500 }, { "epoch": 0.51440329218107, "eval_qasc_pairs_loss": 0.8119698166847229, "eval_qasc_pairs_runtime": 0.5998, "eval_qasc_pairs_samples_per_second": 213.389, "eval_qasc_pairs_steps_per_second": 1.667, "step": 500 }, { "epoch": 0.51440329218107, "eval_openbookqa_pairs_loss": 1.512932538986206, "eval_openbookqa_pairs_runtime": 0.5734, "eval_openbookqa_pairs_samples_per_second": 223.22, "eval_openbookqa_pairs_steps_per_second": 1.744, "step": 500 }, { "epoch": 0.51440329218107, "eval_msmarco_pairs_loss": 1.4880919456481934, "eval_msmarco_pairs_runtime": 1.5132, "eval_msmarco_pairs_samples_per_second": 84.588, "eval_msmarco_pairs_steps_per_second": 0.661, "step": 500 }, { "epoch": 0.51440329218107, "eval_nq_pairs_loss": 1.750890851020813, "eval_nq_pairs_runtime": 2.8955, "eval_nq_pairs_samples_per_second": 44.206, "eval_nq_pairs_steps_per_second": 0.345, "step": 500 }, { "epoch": 0.51440329218107, "eval_trivia_pairs_loss": 1.3733922243118286, "eval_trivia_pairs_runtime": 3.4378, "eval_trivia_pairs_samples_per_second": 37.233, "eval_trivia_pairs_steps_per_second": 0.291, "step": 500 }, { "epoch": 0.51440329218107, "eval_gooaq_pairs_loss": 0.938717782497406, "eval_gooaq_pairs_runtime": 0.95, "eval_gooaq_pairs_samples_per_second": 134.741, "eval_gooaq_pairs_steps_per_second": 1.053, "step": 500 }, { "epoch": 0.51440329218107, "eval_paws-pos_loss": 0.04237303510308266, "eval_paws-pos_runtime": 0.6799, "eval_paws-pos_samples_per_second": 188.26, "eval_paws-pos_steps_per_second": 1.471, "step": 500 }, { "epoch": 0.51440329218107, "eval_global_dataset_loss": 0.7602720856666565, "eval_global_dataset_runtime": 13.3515, "eval_global_dataset_samples_per_second": 31.157, "eval_global_dataset_steps_per_second": 0.3, "step": 500 }, { "epoch": 0.5154320987654321, "grad_norm": 20.222156524658203, "learning_rate": 1.8099688473520246e-05, "loss": 3.6159, "step": 501 }, { "epoch": 0.5164609053497943, "grad_norm": 6.827728271484375, "learning_rate": 1.8136033229491174e-05, "loss": 0.2539, "step": 502 }, { "epoch": 0.5174897119341564, "grad_norm": 11.333172798156738, "learning_rate": 1.8172377985462095e-05, "loss": 0.8589, "step": 503 }, { "epoch": 0.5185185185185185, "grad_norm": 12.576927185058594, "learning_rate": 1.820872274143302e-05, "loss": 1.7416, "step": 504 }, { "epoch": 0.5195473251028807, "grad_norm": 15.945344924926758, "learning_rate": 1.8245067497403946e-05, "loss": 1.7693, "step": 505 }, { "epoch": 0.5205761316872428, "grad_norm": 17.440074920654297, "learning_rate": 1.8281412253374867e-05, "loss": 1.5639, "step": 506 }, { "epoch": 0.5216049382716049, "grad_norm": 11.141048431396484, "learning_rate": 1.831775700934579e-05, "loss": 0.8746, "step": 507 }, { "epoch": 0.522633744855967, "grad_norm": 15.599634170532227, "learning_rate": 1.8354101765316718e-05, "loss": 1.5769, "step": 508 }, { "epoch": 0.5236625514403292, "grad_norm": 10.608887672424316, "learning_rate": 1.8390446521287642e-05, "loss": 0.6175, "step": 509 }, { "epoch": 0.5246913580246914, "grad_norm": 11.312731742858887, "learning_rate": 1.8426791277258563e-05, "loss": 0.8312, "step": 510 }, { "epoch": 0.5257201646090535, "grad_norm": 9.91249942779541, "learning_rate": 1.846313603322949e-05, "loss": 0.867, "step": 511 }, { "epoch": 0.5267489711934157, "grad_norm": 11.7357816696167, "learning_rate": 1.8499480789200414e-05, "loss": 1.2859, "step": 512 }, { "epoch": 0.5277777777777778, "grad_norm": 21.4658203125, "learning_rate": 1.8535825545171335e-05, "loss": 2.2659, "step": 513 }, { "epoch": 0.5288065843621399, "grad_norm": 18.00661849975586, "learning_rate": 1.8572170301142262e-05, "loss": 1.7138, "step": 514 }, { "epoch": 0.529835390946502, "grad_norm": 7.337871074676514, "learning_rate": 1.8608515057113186e-05, "loss": 0.3393, "step": 515 }, { "epoch": 0.5308641975308642, "grad_norm": 12.568946838378906, "learning_rate": 1.864485981308411e-05, "loss": 0.9776, "step": 516 }, { "epoch": 0.5318930041152263, "grad_norm": 10.954802513122559, "learning_rate": 1.8681204569055034e-05, "loss": 0.6971, "step": 517 }, { "epoch": 0.5329218106995884, "grad_norm": 10.687813758850098, "learning_rate": 1.8717549325025958e-05, "loss": 0.6725, "step": 518 }, { "epoch": 0.5339506172839507, "grad_norm": 11.719423294067383, "learning_rate": 1.8753894080996882e-05, "loss": 0.6854, "step": 519 }, { "epoch": 0.5349794238683128, "grad_norm": 16.232799530029297, "learning_rate": 1.879023883696781e-05, "loss": 1.7726, "step": 520 }, { "epoch": 0.5349794238683128, "eval_Qnli-dev_cosine_accuracy": 0.6875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8248189687728882, "eval_Qnli-dev_cosine_ap": 0.7196731202506679, "eval_Qnli-dev_cosine_f1": 0.6947368421052632, "eval_Qnli-dev_cosine_f1_threshold": 0.7689546346664429, "eval_Qnli-dev_cosine_precision": 0.592814371257485, "eval_Qnli-dev_cosine_recall": 0.8389830508474576, "eval_Qnli-dev_dot_accuracy": 0.66015625, "eval_Qnli-dev_dot_accuracy_threshold": 419.0325927734375, "eval_Qnli-dev_dot_ap": 0.616348530166337, "eval_Qnli-dev_dot_f1": 0.6724436741767765, "eval_Qnli-dev_dot_f1_threshold": 393.9245300292969, "eval_Qnli-dev_dot_precision": 0.5689149560117303, "eval_Qnli-dev_dot_recall": 0.8220338983050848, "eval_Qnli-dev_euclidean_accuracy": 0.693359375, "eval_Qnli-dev_euclidean_accuracy_threshold": 12.748929977416992, "eval_Qnli-dev_euclidean_ap": 0.7309618868427656, "eval_Qnli-dev_euclidean_f1": 0.6943942133815552, "eval_Qnli-dev_euclidean_f1_threshold": 15.1475830078125, "eval_Qnli-dev_euclidean_precision": 0.6056782334384858, "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, "eval_Qnli-dev_manhattan_accuracy": 0.685546875, "eval_Qnli-dev_manhattan_accuracy_threshold": 265.4633483886719, "eval_Qnli-dev_manhattan_ap": 0.730577397962383, "eval_Qnli-dev_manhattan_f1": 0.6940298507462687, "eval_Qnli-dev_manhattan_f1_threshold": 303.4216613769531, "eval_Qnli-dev_manhattan_precision": 0.62, "eval_Qnli-dev_manhattan_recall": 0.788135593220339, "eval_Qnli-dev_max_accuracy": 0.693359375, "eval_Qnli-dev_max_accuracy_threshold": 419.0325927734375, "eval_Qnli-dev_max_ap": 0.7309618868427656, "eval_Qnli-dev_max_f1": 0.6947368421052632, "eval_Qnli-dev_max_f1_threshold": 393.9245300292969, "eval_Qnli-dev_max_precision": 0.62, "eval_Qnli-dev_max_recall": 0.8389830508474576, "eval_allNLI-dev_cosine_accuracy": 0.72265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8910384178161621, "eval_allNLI-dev_cosine_ap": 0.5627746050790838, "eval_allNLI-dev_cosine_f1": 0.5838509316770187, "eval_allNLI-dev_cosine_f1_threshold": 0.8071809411048889, "eval_allNLI-dev_cosine_precision": 0.45483870967741935, "eval_allNLI-dev_cosine_recall": 0.815028901734104, "eval_allNLI-dev_dot_accuracy": 0.67578125, "eval_allNLI-dev_dot_accuracy_threshold": 508.07659912109375, "eval_allNLI-dev_dot_ap": 0.4588019812939956, "eval_allNLI-dev_dot_f1": 0.5421686746987953, "eval_allNLI-dev_dot_f1_threshold": 413.8941650390625, "eval_allNLI-dev_dot_precision": 0.4153846153846154, "eval_allNLI-dev_dot_recall": 0.7803468208092486, "eval_allNLI-dev_euclidean_accuracy": 0.73046875, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.289400100708008, "eval_allNLI-dev_euclidean_ap": 0.5694426258019529, "eval_allNLI-dev_euclidean_f1": 0.5922746781115881, "eval_allNLI-dev_euclidean_f1_threshold": 14.015277862548828, "eval_allNLI-dev_euclidean_precision": 0.4709897610921502, "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 220.96963500976562, "eval_allNLI-dev_manhattan_ap": 0.5723061411584658, "eval_allNLI-dev_manhattan_f1": 0.5785876993166287, "eval_allNLI-dev_manhattan_f1_threshold": 278.1605224609375, "eval_allNLI-dev_manhattan_precision": 0.4774436090225564, "eval_allNLI-dev_manhattan_recall": 0.7341040462427746, "eval_allNLI-dev_max_accuracy": 0.732421875, "eval_allNLI-dev_max_accuracy_threshold": 508.07659912109375, "eval_allNLI-dev_max_ap": 0.5723061411584658, "eval_allNLI-dev_max_f1": 0.5922746781115881, "eval_allNLI-dev_max_f1_threshold": 413.8941650390625, "eval_allNLI-dev_max_precision": 0.4774436090225564, "eval_allNLI-dev_max_recall": 0.815028901734104, "eval_sequential_score": 0.7309618868427656, "eval_sts-test_pearson_cosine": 0.7944016690558295, "eval_sts-test_pearson_dot": 0.7340676184460866, "eval_sts-test_pearson_euclidean": 0.8206810004337891, "eval_sts-test_pearson_manhattan": 0.8198751359187904, "eval_sts-test_pearson_max": 0.8206810004337891, "eval_sts-test_spearman_cosine": 0.8158374232832949, "eval_sts-test_spearman_dot": 0.712276783998263, "eval_sts-test_spearman_euclidean": 0.8117007509340581, "eval_sts-test_spearman_manhattan": 0.8093512202084868, "eval_sts-test_spearman_max": 0.8158374232832949, "eval_vitaminc-pairs_loss": 2.9273321628570557, "eval_vitaminc-pairs_runtime": 3.1718, "eval_vitaminc-pairs_samples_per_second": 40.356, "eval_vitaminc-pairs_steps_per_second": 0.315, "step": 520 }, { "epoch": 0.5349794238683128, "eval_negation-triplets_loss": 1.3328778743743896, "eval_negation-triplets_runtime": 0.7347, "eval_negation-triplets_samples_per_second": 174.211, "eval_negation-triplets_steps_per_second": 1.361, "step": 520 }, { "epoch": 0.5349794238683128, "eval_scitail-pairs-pos_loss": 0.18687528371810913, "eval_scitail-pairs-pos_runtime": 0.8151, "eval_scitail-pairs-pos_samples_per_second": 157.044, "eval_scitail-pairs-pos_steps_per_second": 1.227, "step": 520 }, { "epoch": 0.5349794238683128, "eval_scitail-pairs-qa_loss": 0.014874367974698544, "eval_scitail-pairs-qa_runtime": 0.5765, "eval_scitail-pairs-qa_samples_per_second": 222.025, "eval_scitail-pairs-qa_steps_per_second": 1.735, "step": 520 }, { "epoch": 0.5349794238683128, "eval_xsum-pairs_loss": 0.86911940574646, "eval_xsum-pairs_runtime": 3.017, "eval_xsum-pairs_samples_per_second": 42.427, "eval_xsum-pairs_steps_per_second": 0.331, "step": 520 }, { "epoch": 0.5349794238683128, "eval_sciq_pairs_loss": 0.14434820413589478, "eval_sciq_pairs_runtime": 3.4284, "eval_sciq_pairs_samples_per_second": 37.335, "eval_sciq_pairs_steps_per_second": 0.292, "step": 520 }, { "epoch": 0.5349794238683128, "eval_qasc_pairs_loss": 0.7873150110244751, "eval_qasc_pairs_runtime": 0.6008, "eval_qasc_pairs_samples_per_second": 213.056, "eval_qasc_pairs_steps_per_second": 1.665, "step": 520 }, { "epoch": 0.5349794238683128, "eval_openbookqa_pairs_loss": 1.5795769691467285, "eval_openbookqa_pairs_runtime": 0.5771, "eval_openbookqa_pairs_samples_per_second": 221.803, "eval_openbookqa_pairs_steps_per_second": 1.733, "step": 520 }, { "epoch": 0.5349794238683128, "eval_msmarco_pairs_loss": 1.4810850620269775, "eval_msmarco_pairs_runtime": 1.525, "eval_msmarco_pairs_samples_per_second": 83.934, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 520 }, { "epoch": 0.5349794238683128, "eval_nq_pairs_loss": 1.7317595481872559, "eval_nq_pairs_runtime": 2.8997, "eval_nq_pairs_samples_per_second": 44.143, "eval_nq_pairs_steps_per_second": 0.345, "step": 520 }, { "epoch": 0.5349794238683128, "eval_trivia_pairs_loss": 1.2999101877212524, "eval_trivia_pairs_runtime": 3.4365, "eval_trivia_pairs_samples_per_second": 37.247, "eval_trivia_pairs_steps_per_second": 0.291, "step": 520 }, { "epoch": 0.5349794238683128, "eval_gooaq_pairs_loss": 0.903529167175293, "eval_gooaq_pairs_runtime": 0.9492, "eval_gooaq_pairs_samples_per_second": 134.844, "eval_gooaq_pairs_steps_per_second": 1.053, "step": 520 }, { "epoch": 0.5349794238683128, "eval_paws-pos_loss": 0.04194509983062744, "eval_paws-pos_runtime": 0.705, "eval_paws-pos_samples_per_second": 181.572, "eval_paws-pos_steps_per_second": 1.419, "step": 520 }, { "epoch": 0.5349794238683128, "eval_global_dataset_loss": 0.7329986095428467, "eval_global_dataset_runtime": 13.3667, "eval_global_dataset_samples_per_second": 31.122, "eval_global_dataset_steps_per_second": 0.299, "step": 520 }, { "epoch": 0.5360082304526749, "grad_norm": 9.531018257141113, "learning_rate": 1.882658359293873e-05, "loss": 0.6841, "step": 521 }, { "epoch": 0.5370370370370371, "grad_norm": 14.136958122253418, "learning_rate": 1.8862928348909654e-05, "loss": 1.4999, "step": 522 }, { "epoch": 0.5380658436213992, "grad_norm": 16.56440544128418, "learning_rate": 1.889927310488058e-05, "loss": 1.8423, "step": 523 }, { "epoch": 0.5390946502057613, "grad_norm": 18.816726684570312, "learning_rate": 1.8935617860851502e-05, "loss": 3.2063, "step": 524 }, { "epoch": 0.5401234567901234, "grad_norm": 9.336271286010742, "learning_rate": 1.8971962616822426e-05, "loss": 0.7876, "step": 525 }, { "epoch": 0.5411522633744856, "grad_norm": 9.695099830627441, "learning_rate": 1.9008307372793354e-05, "loss": 0.7463, "step": 526 }, { "epoch": 0.5421810699588477, "grad_norm": 16.809635162353516, "learning_rate": 1.9044652128764278e-05, "loss": 1.317, "step": 527 }, { "epoch": 0.5432098765432098, "grad_norm": 11.21884536743164, "learning_rate": 1.90809968847352e-05, "loss": 1.533, "step": 528 }, { "epoch": 0.5442386831275721, "grad_norm": 11.746585845947266, "learning_rate": 1.9117341640706126e-05, "loss": 0.9414, "step": 529 }, { "epoch": 0.5452674897119342, "grad_norm": 11.7705078125, "learning_rate": 1.915368639667705e-05, "loss": 0.8405, "step": 530 }, { "epoch": 0.5462962962962963, "grad_norm": 11.811210632324219, "learning_rate": 1.919003115264797e-05, "loss": 1.1217, "step": 531 }, { "epoch": 0.5473251028806584, "grad_norm": 8.906420707702637, "learning_rate": 1.9226375908618898e-05, "loss": 0.6404, "step": 532 }, { "epoch": 0.5483539094650206, "grad_norm": 8.888873100280762, "learning_rate": 1.9262720664589822e-05, "loss": 0.6283, "step": 533 }, { "epoch": 0.5493827160493827, "grad_norm": 2.18764591217041, "learning_rate": 1.9299065420560746e-05, "loss": 0.0678, "step": 534 }, { "epoch": 0.5504115226337448, "grad_norm": 8.759835243225098, "learning_rate": 1.933541017653167e-05, "loss": 0.5242, "step": 535 }, { "epoch": 0.551440329218107, "grad_norm": 18.4666748046875, "learning_rate": 1.9371754932502594e-05, "loss": 1.9928, "step": 536 }, { "epoch": 0.5524691358024691, "grad_norm": 11.737098693847656, "learning_rate": 1.9408099688473518e-05, "loss": 0.8622, "step": 537 }, { "epoch": 0.5534979423868313, "grad_norm": 14.750716209411621, "learning_rate": 1.9444444444444442e-05, "loss": 1.2746, "step": 538 }, { "epoch": 0.5545267489711934, "grad_norm": 11.672311782836914, "learning_rate": 1.9480789200415366e-05, "loss": 0.7844, "step": 539 }, { "epoch": 0.5555555555555556, "grad_norm": 12.69827651977539, "learning_rate": 1.951713395638629e-05, "loss": 1.041, "step": 540 }, { "epoch": 0.5555555555555556, "eval_Qnli-dev_cosine_accuracy": 0.677734375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7994829416275024, "eval_Qnli-dev_cosine_ap": 0.717657619195893, "eval_Qnli-dev_cosine_f1": 0.6919275123558485, "eval_Qnli-dev_cosine_f1_threshold": 0.7339121103286743, "eval_Qnli-dev_cosine_precision": 0.5660377358490566, "eval_Qnli-dev_cosine_recall": 0.8898305084745762, "eval_Qnli-dev_dot_accuracy": 0.68359375, "eval_Qnli-dev_dot_accuracy_threshold": 409.82696533203125, "eval_Qnli-dev_dot_ap": 0.6260001258234368, "eval_Qnli-dev_dot_f1": 0.6723549488054607, "eval_Qnli-dev_dot_f1_threshold": 380.0247802734375, "eval_Qnli-dev_dot_precision": 0.5628571428571428, "eval_Qnli-dev_dot_recall": 0.8347457627118644, "eval_Qnli-dev_euclidean_accuracy": 0.6796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.7105712890625, "eval_Qnli-dev_euclidean_ap": 0.7249308269630148, "eval_Qnli-dev_euclidean_f1": 0.6906710310965629, "eval_Qnli-dev_euclidean_f1_threshold": 16.837154388427734, "eval_Qnli-dev_euclidean_precision": 0.5626666666666666, "eval_Qnli-dev_euclidean_recall": 0.8940677966101694, "eval_Qnli-dev_manhattan_accuracy": 0.685546875, "eval_Qnli-dev_manhattan_accuracy_threshold": 283.3619384765625, "eval_Qnli-dev_manhattan_ap": 0.7235444857124764, "eval_Qnli-dev_manhattan_f1": 0.6901172529313233, "eval_Qnli-dev_manhattan_f1_threshold": 334.96246337890625, "eval_Qnli-dev_manhattan_precision": 0.5706371191135734, "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, "eval_Qnli-dev_max_accuracy": 0.685546875, "eval_Qnli-dev_max_accuracy_threshold": 409.82696533203125, "eval_Qnli-dev_max_ap": 0.7249308269630148, "eval_Qnli-dev_max_f1": 0.6919275123558485, "eval_Qnli-dev_max_f1_threshold": 380.0247802734375, "eval_Qnli-dev_max_precision": 0.5706371191135734, "eval_Qnli-dev_max_recall": 0.8940677966101694, "eval_allNLI-dev_cosine_accuracy": 0.720703125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.897883415222168, "eval_allNLI-dev_cosine_ap": 0.559606374648369, "eval_allNLI-dev_cosine_f1": 0.5806451612903226, "eval_allNLI-dev_cosine_f1_threshold": 0.7978842854499817, "eval_allNLI-dev_cosine_precision": 0.4623287671232877, "eval_allNLI-dev_cosine_recall": 0.7803468208092486, "eval_allNLI-dev_dot_accuracy": 0.6796875, "eval_allNLI-dev_dot_accuracy_threshold": 470.4619140625, "eval_allNLI-dev_dot_ap": 0.45733111663306314, "eval_allNLI-dev_dot_f1": 0.5478841870824054, "eval_allNLI-dev_dot_f1_threshold": 410.201171875, "eval_allNLI-dev_dot_precision": 0.44565217391304346, "eval_allNLI-dev_dot_recall": 0.7109826589595376, "eval_allNLI-dev_euclidean_accuracy": 0.720703125, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.147970199584961, "eval_allNLI-dev_euclidean_ap": 0.5660004356159096, "eval_allNLI-dev_euclidean_f1": 0.591792656587473, "eval_allNLI-dev_euclidean_f1_threshold": 14.38115119934082, "eval_allNLI-dev_euclidean_precision": 0.4724137931034483, "eval_allNLI-dev_euclidean_recall": 0.791907514450867, "eval_allNLI-dev_manhattan_accuracy": 0.71875, "eval_allNLI-dev_manhattan_accuracy_threshold": 207.6907958984375, "eval_allNLI-dev_manhattan_ap": 0.56719407577034, "eval_allNLI-dev_manhattan_f1": 0.587737843551797, "eval_allNLI-dev_manhattan_f1_threshold": 296.9386901855469, "eval_allNLI-dev_manhattan_precision": 0.4633333333333333, "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, "eval_allNLI-dev_max_accuracy": 0.720703125, "eval_allNLI-dev_max_accuracy_threshold": 470.4619140625, "eval_allNLI-dev_max_ap": 0.56719407577034, "eval_allNLI-dev_max_f1": 0.591792656587473, "eval_allNLI-dev_max_f1_threshold": 410.201171875, "eval_allNLI-dev_max_precision": 0.4724137931034483, "eval_allNLI-dev_max_recall": 0.8034682080924855, "eval_sequential_score": 0.7249308269630148, "eval_sts-test_pearson_cosine": 0.7981570472860724, "eval_sts-test_pearson_dot": 0.7528095037431898, "eval_sts-test_pearson_euclidean": 0.8221585052591076, "eval_sts-test_pearson_manhattan": 0.8186301303511336, "eval_sts-test_pearson_max": 0.8221585052591076, "eval_sts-test_spearman_cosine": 0.820562977481181, "eval_sts-test_spearman_dot": 0.7361068754404446, "eval_sts-test_spearman_euclidean": 0.8129253244507724, "eval_sts-test_spearman_manhattan": 0.8097035916406826, "eval_sts-test_spearman_max": 0.820562977481181, "eval_vitaminc-pairs_loss": 2.9952337741851807, "eval_vitaminc-pairs_runtime": 3.166, "eval_vitaminc-pairs_samples_per_second": 40.43, "eval_vitaminc-pairs_steps_per_second": 0.316, "step": 540 }, { "epoch": 0.5555555555555556, "eval_negation-triplets_loss": 1.2927732467651367, "eval_negation-triplets_runtime": 0.7377, "eval_negation-triplets_samples_per_second": 173.504, "eval_negation-triplets_steps_per_second": 1.355, "step": 540 }, { "epoch": 0.5555555555555556, "eval_scitail-pairs-pos_loss": 0.1593194603919983, "eval_scitail-pairs-pos_runtime": 0.8171, "eval_scitail-pairs-pos_samples_per_second": 156.657, "eval_scitail-pairs-pos_steps_per_second": 1.224, "step": 540 }, { "epoch": 0.5555555555555556, "eval_scitail-pairs-qa_loss": 0.016190586611628532, "eval_scitail-pairs-qa_runtime": 0.5737, "eval_scitail-pairs-qa_samples_per_second": 223.102, "eval_scitail-pairs-qa_steps_per_second": 1.743, "step": 540 }, { "epoch": 0.5555555555555556, "eval_xsum-pairs_loss": 0.7690907120704651, "eval_xsum-pairs_runtime": 3.0195, "eval_xsum-pairs_samples_per_second": 42.392, "eval_xsum-pairs_steps_per_second": 0.331, "step": 540 }, { "epoch": 0.5555555555555556, "eval_sciq_pairs_loss": 0.14176045358181, "eval_sciq_pairs_runtime": 3.4232, "eval_sciq_pairs_samples_per_second": 37.392, "eval_sciq_pairs_steps_per_second": 0.292, "step": 540 }, { "epoch": 0.5555555555555556, "eval_qasc_pairs_loss": 0.754072904586792, "eval_qasc_pairs_runtime": 0.599, "eval_qasc_pairs_samples_per_second": 213.697, "eval_qasc_pairs_steps_per_second": 1.67, "step": 540 }, { "epoch": 0.5555555555555556, "eval_openbookqa_pairs_loss": 1.468189001083374, "eval_openbookqa_pairs_runtime": 0.5764, "eval_openbookqa_pairs_samples_per_second": 222.08, "eval_openbookqa_pairs_steps_per_second": 1.735, "step": 540 }, { "epoch": 0.5555555555555556, "eval_msmarco_pairs_loss": 1.443937063217163, "eval_msmarco_pairs_runtime": 1.5215, "eval_msmarco_pairs_samples_per_second": 84.128, "eval_msmarco_pairs_steps_per_second": 0.657, "step": 540 }, { "epoch": 0.5555555555555556, "eval_nq_pairs_loss": 1.7499854564666748, "eval_nq_pairs_runtime": 2.9123, "eval_nq_pairs_samples_per_second": 43.951, "eval_nq_pairs_steps_per_second": 0.343, "step": 540 }, { "epoch": 0.5555555555555556, "eval_trivia_pairs_loss": 1.284538984298706, "eval_trivia_pairs_runtime": 3.4581, "eval_trivia_pairs_samples_per_second": 37.015, "eval_trivia_pairs_steps_per_second": 0.289, "step": 540 }, { "epoch": 0.5555555555555556, "eval_gooaq_pairs_loss": 0.8851069808006287, "eval_gooaq_pairs_runtime": 0.9412, "eval_gooaq_pairs_samples_per_second": 135.997, "eval_gooaq_pairs_steps_per_second": 1.062, "step": 540 }, { "epoch": 0.5555555555555556, "eval_paws-pos_loss": 0.04284976050257683, "eval_paws-pos_runtime": 0.678, "eval_paws-pos_samples_per_second": 188.793, "eval_paws-pos_steps_per_second": 1.475, "step": 540 }, { "epoch": 0.5555555555555556, "eval_global_dataset_loss": 0.7442251443862915, "eval_global_dataset_runtime": 13.3627, "eval_global_dataset_samples_per_second": 31.132, "eval_global_dataset_steps_per_second": 0.299, "step": 540 }, { "epoch": 0.5565843621399177, "grad_norm": 12.537612915039062, "learning_rate": 1.9553478712357217e-05, "loss": 0.9339, "step": 541 }, { "epoch": 0.5576131687242798, "grad_norm": 1.0051987171173096, "learning_rate": 1.9589823468328138e-05, "loss": 0.0237, "step": 542 }, { "epoch": 0.558641975308642, "grad_norm": 9.488045692443848, "learning_rate": 1.9626168224299062e-05, "loss": 0.4569, "step": 543 }, { "epoch": 0.5596707818930041, "grad_norm": 11.0010986328125, "learning_rate": 1.966251298026999e-05, "loss": 0.6537, "step": 544 }, { "epoch": 0.5606995884773662, "grad_norm": 16.367504119873047, "learning_rate": 1.969885773624091e-05, "loss": 1.5957, "step": 545 }, { "epoch": 0.5617283950617284, "grad_norm": 0.978878378868103, "learning_rate": 1.9735202492211834e-05, "loss": 0.0269, "step": 546 }, { "epoch": 0.5627572016460906, "grad_norm": 12.36868667602539, "learning_rate": 1.977154724818276e-05, "loss": 0.7591, "step": 547 }, { "epoch": 0.5637860082304527, "grad_norm": 11.471710205078125, "learning_rate": 1.9807892004153686e-05, "loss": 0.7064, "step": 548 }, { "epoch": 0.5648148148148148, "grad_norm": 15.039127349853516, "learning_rate": 1.9844236760124606e-05, "loss": 1.201, "step": 549 }, { "epoch": 0.565843621399177, "grad_norm": 11.709723472595215, "learning_rate": 1.9880581516095534e-05, "loss": 0.7516, "step": 550 }, { "epoch": 0.5668724279835391, "grad_norm": 2.1083853244781494, "learning_rate": 1.9916926272066458e-05, "loss": 0.0917, "step": 551 }, { "epoch": 0.5679012345679012, "grad_norm": 12.638484954833984, "learning_rate": 1.9953271028037378e-05, "loss": 0.9826, "step": 552 }, { "epoch": 0.5689300411522634, "grad_norm": 11.251784324645996, "learning_rate": 1.9989615784008306e-05, "loss": 0.8362, "step": 553 }, { "epoch": 0.5699588477366255, "grad_norm": 13.69099235534668, "learning_rate": 2.002596053997923e-05, "loss": 1.5957, "step": 554 }, { "epoch": 0.5709876543209876, "grad_norm": 15.196340560913086, "learning_rate": 2.0062305295950154e-05, "loss": 1.2807, "step": 555 }, { "epoch": 0.5720164609053497, "grad_norm": 14.767230987548828, "learning_rate": 2.0098650051921078e-05, "loss": 1.6863, "step": 556 }, { "epoch": 0.573045267489712, "grad_norm": 11.55445671081543, "learning_rate": 2.0134994807892002e-05, "loss": 1.5643, "step": 557 }, { "epoch": 0.5740740740740741, "grad_norm": 13.466323852539062, "learning_rate": 2.0171339563862926e-05, "loss": 1.2279, "step": 558 }, { "epoch": 0.5751028806584362, "grad_norm": 10.434534072875977, "learning_rate": 2.0207684319833853e-05, "loss": 0.7398, "step": 559 }, { "epoch": 0.5761316872427984, "grad_norm": 16.75852394104004, "learning_rate": 2.0244029075804774e-05, "loss": 1.7229, "step": 560 }, { "epoch": 0.5761316872427984, "eval_Qnli-dev_cosine_accuracy": 0.6875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7987607717514038, "eval_Qnli-dev_cosine_ap": 0.7245772032010487, "eval_Qnli-dev_cosine_f1": 0.7073608617594255, "eval_Qnli-dev_cosine_f1_threshold": 0.7755422592163086, "eval_Qnli-dev_cosine_precision": 0.6137071651090342, "eval_Qnli-dev_cosine_recall": 0.8347457627118644, "eval_Qnli-dev_dot_accuracy": 0.671875, "eval_Qnli-dev_dot_accuracy_threshold": 429.08099365234375, "eval_Qnli-dev_dot_ap": 0.618896987535733, "eval_Qnli-dev_dot_f1": 0.6784565916398714, "eval_Qnli-dev_dot_f1_threshold": 389.2666015625, "eval_Qnli-dev_dot_precision": 0.5466321243523317, "eval_Qnli-dev_dot_recall": 0.8940677966101694, "eval_Qnli-dev_euclidean_accuracy": 0.6953125, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.390548706054688, "eval_Qnli-dev_euclidean_ap": 0.7347399680383467, "eval_Qnli-dev_euclidean_f1": 0.6974169741697418, "eval_Qnli-dev_euclidean_f1_threshold": 15.172780990600586, "eval_Qnli-dev_euclidean_precision": 0.6176470588235294, "eval_Qnli-dev_euclidean_recall": 0.8008474576271186, "eval_Qnli-dev_manhattan_accuracy": 0.7109375, "eval_Qnli-dev_manhattan_accuracy_threshold": 299.5706787109375, "eval_Qnli-dev_manhattan_ap": 0.7368729396225034, "eval_Qnli-dev_manhattan_f1": 0.7120622568093385, "eval_Qnli-dev_manhattan_f1_threshold": 299.5706787109375, "eval_Qnli-dev_manhattan_precision": 0.658273381294964, "eval_Qnli-dev_manhattan_recall": 0.7754237288135594, "eval_Qnli-dev_max_accuracy": 0.7109375, "eval_Qnli-dev_max_accuracy_threshold": 429.08099365234375, "eval_Qnli-dev_max_ap": 0.7368729396225034, "eval_Qnli-dev_max_f1": 0.7120622568093385, "eval_Qnli-dev_max_f1_threshold": 389.2666015625, "eval_Qnli-dev_max_precision": 0.658273381294964, "eval_Qnli-dev_max_recall": 0.8940677966101694, "eval_allNLI-dev_cosine_accuracy": 0.71875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8877571821212769, "eval_allNLI-dev_cosine_ap": 0.5702315132181276, "eval_allNLI-dev_cosine_f1": 0.5930735930735931, "eval_allNLI-dev_cosine_f1_threshold": 0.8116433620452881, "eval_allNLI-dev_cosine_precision": 0.4740484429065744, "eval_allNLI-dev_cosine_recall": 0.791907514450867, "eval_allNLI-dev_dot_accuracy": 0.67578125, "eval_allNLI-dev_dot_accuracy_threshold": 478.5546875, "eval_allNLI-dev_dot_ap": 0.4739609661272707, "eval_allNLI-dev_dot_f1": 0.5494949494949496, "eval_allNLI-dev_dot_f1_threshold": 413.8797912597656, "eval_allNLI-dev_dot_precision": 0.422360248447205, "eval_allNLI-dev_dot_recall": 0.7861271676300579, "eval_allNLI-dev_euclidean_accuracy": 0.72265625, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.04772663116455, "eval_allNLI-dev_euclidean_ap": 0.57668991696855, "eval_allNLI-dev_euclidean_f1": 0.5995525727069352, "eval_allNLI-dev_euclidean_f1_threshold": 13.817825317382812, "eval_allNLI-dev_euclidean_precision": 0.48905109489051096, "eval_allNLI-dev_euclidean_recall": 0.7745664739884393, "eval_allNLI-dev_manhattan_accuracy": 0.71875, "eval_allNLI-dev_manhattan_accuracy_threshold": 211.58740234375, "eval_allNLI-dev_manhattan_ap": 0.578607497519579, "eval_allNLI-dev_manhattan_f1": 0.5882352941176471, "eval_allNLI-dev_manhattan_f1_threshold": 281.086181640625, "eval_allNLI-dev_manhattan_precision": 0.483271375464684, "eval_allNLI-dev_manhattan_recall": 0.7514450867052023, "eval_allNLI-dev_max_accuracy": 0.72265625, "eval_allNLI-dev_max_accuracy_threshold": 478.5546875, "eval_allNLI-dev_max_ap": 0.578607497519579, "eval_allNLI-dev_max_f1": 0.5995525727069352, "eval_allNLI-dev_max_f1_threshold": 413.8797912597656, "eval_allNLI-dev_max_precision": 0.48905109489051096, "eval_allNLI-dev_max_recall": 0.791907514450867, "eval_sequential_score": 0.7368729396225034, "eval_sts-test_pearson_cosine": 0.7951957837142611, "eval_sts-test_pearson_dot": 0.7487270214140551, "eval_sts-test_pearson_euclidean": 0.8178215451497555, "eval_sts-test_pearson_manhattan": 0.8154922571151692, "eval_sts-test_pearson_max": 0.8178215451497555, "eval_sts-test_spearman_cosine": 0.8174810476116783, "eval_sts-test_spearman_dot": 0.7310933468755048, "eval_sts-test_spearman_euclidean": 0.8105849677337864, "eval_sts-test_spearman_manhattan": 0.8080193779182173, "eval_sts-test_spearman_max": 0.8174810476116783, "eval_vitaminc-pairs_loss": 2.8546268939971924, "eval_vitaminc-pairs_runtime": 3.164, "eval_vitaminc-pairs_samples_per_second": 40.455, "eval_vitaminc-pairs_steps_per_second": 0.316, "step": 560 }, { "epoch": 0.5761316872427984, "eval_negation-triplets_loss": 1.2844172716140747, "eval_negation-triplets_runtime": 0.7354, "eval_negation-triplets_samples_per_second": 174.063, "eval_negation-triplets_steps_per_second": 1.36, "step": 560 }, { "epoch": 0.5761316872427984, "eval_scitail-pairs-pos_loss": 0.17617923021316528, "eval_scitail-pairs-pos_runtime": 0.804, "eval_scitail-pairs-pos_samples_per_second": 159.198, "eval_scitail-pairs-pos_steps_per_second": 1.244, "step": 560 }, { "epoch": 0.5761316872427984, "eval_scitail-pairs-qa_loss": 0.013183332979679108, "eval_scitail-pairs-qa_runtime": 0.5639, "eval_scitail-pairs-qa_samples_per_second": 226.973, "eval_scitail-pairs-qa_steps_per_second": 1.773, "step": 560 }, { "epoch": 0.5761316872427984, "eval_xsum-pairs_loss": 0.8270187973976135, "eval_xsum-pairs_runtime": 3.0144, "eval_xsum-pairs_samples_per_second": 42.463, "eval_xsum-pairs_steps_per_second": 0.332, "step": 560 }, { "epoch": 0.5761316872427984, "eval_sciq_pairs_loss": 0.1439501792192459, "eval_sciq_pairs_runtime": 3.4768, "eval_sciq_pairs_samples_per_second": 36.816, "eval_sciq_pairs_steps_per_second": 0.288, "step": 560 }, { "epoch": 0.5761316872427984, "eval_qasc_pairs_loss": 0.6848240494728088, "eval_qasc_pairs_runtime": 0.6196, "eval_qasc_pairs_samples_per_second": 206.597, "eval_qasc_pairs_steps_per_second": 1.614, "step": 560 }, { "epoch": 0.5761316872427984, "eval_openbookqa_pairs_loss": 1.4732991456985474, "eval_openbookqa_pairs_runtime": 0.5734, "eval_openbookqa_pairs_samples_per_second": 223.235, "eval_openbookqa_pairs_steps_per_second": 1.744, "step": 560 }, { "epoch": 0.5761316872427984, "eval_msmarco_pairs_loss": 1.4930459260940552, "eval_msmarco_pairs_runtime": 1.5133, "eval_msmarco_pairs_samples_per_second": 84.581, "eval_msmarco_pairs_steps_per_second": 0.661, "step": 560 }, { "epoch": 0.5761316872427984, "eval_nq_pairs_loss": 1.7120836973190308, "eval_nq_pairs_runtime": 2.8949, "eval_nq_pairs_samples_per_second": 44.216, "eval_nq_pairs_steps_per_second": 0.345, "step": 560 }, { "epoch": 0.5761316872427984, "eval_trivia_pairs_loss": 1.3425896167755127, "eval_trivia_pairs_runtime": 3.4363, "eval_trivia_pairs_samples_per_second": 37.249, "eval_trivia_pairs_steps_per_second": 0.291, "step": 560 }, { "epoch": 0.5761316872427984, "eval_gooaq_pairs_loss": 0.828025758266449, "eval_gooaq_pairs_runtime": 0.9422, "eval_gooaq_pairs_samples_per_second": 135.847, "eval_gooaq_pairs_steps_per_second": 1.061, "step": 560 }, { "epoch": 0.5761316872427984, "eval_paws-pos_loss": 0.039411623030900955, "eval_paws-pos_runtime": 0.6819, "eval_paws-pos_samples_per_second": 187.706, "eval_paws-pos_steps_per_second": 1.466, "step": 560 }, { "epoch": 0.5761316872427984, "eval_global_dataset_loss": 0.7242797613143921, "eval_global_dataset_runtime": 13.3545, "eval_global_dataset_samples_per_second": 31.151, "eval_global_dataset_steps_per_second": 0.3, "step": 560 }, { "epoch": 0.5771604938271605, "grad_norm": 8.372831344604492, "learning_rate": 2.0280373831775698e-05, "loss": 0.593, "step": 561 }, { "epoch": 0.5781893004115226, "grad_norm": 19.26259422302246, "learning_rate": 2.0316718587746625e-05, "loss": 1.8963, "step": 562 }, { "epoch": 0.5792181069958847, "grad_norm": 11.283585548400879, "learning_rate": 2.0353063343717546e-05, "loss": 0.743, "step": 563 }, { "epoch": 0.5802469135802469, "grad_norm": 8.997882843017578, "learning_rate": 2.038940809968847e-05, "loss": 0.5824, "step": 564 }, { "epoch": 0.581275720164609, "grad_norm": 13.550999641418457, "learning_rate": 2.0425752855659397e-05, "loss": 1.7532, "step": 565 }, { "epoch": 0.5823045267489712, "grad_norm": 8.910313606262207, "learning_rate": 2.046209761163032e-05, "loss": 0.6509, "step": 566 }, { "epoch": 0.5833333333333334, "grad_norm": 10.5217866897583, "learning_rate": 2.0498442367601242e-05, "loss": 0.7318, "step": 567 }, { "epoch": 0.5843621399176955, "grad_norm": 13.271885871887207, "learning_rate": 2.053478712357217e-05, "loss": 1.3168, "step": 568 }, { "epoch": 0.5853909465020576, "grad_norm": 9.908731460571289, "learning_rate": 2.0571131879543093e-05, "loss": 0.599, "step": 569 }, { "epoch": 0.5864197530864198, "grad_norm": 14.152383804321289, "learning_rate": 2.0607476635514014e-05, "loss": 1.672, "step": 570 }, { "epoch": 0.5874485596707819, "grad_norm": 9.812310218811035, "learning_rate": 2.064382139148494e-05, "loss": 0.7583, "step": 571 }, { "epoch": 0.588477366255144, "grad_norm": 5.6503825187683105, "learning_rate": 2.0680166147455865e-05, "loss": 0.1891, "step": 572 }, { "epoch": 0.5895061728395061, "grad_norm": 10.130154609680176, "learning_rate": 2.071651090342679e-05, "loss": 0.6344, "step": 573 }, { "epoch": 0.5905349794238683, "grad_norm": 15.343293190002441, "learning_rate": 2.0752855659397713e-05, "loss": 1.303, "step": 574 }, { "epoch": 0.5915637860082305, "grad_norm": 21.49701499938965, "learning_rate": 2.0789200415368637e-05, "loss": 2.2275, "step": 575 }, { "epoch": 0.5925925925925926, "grad_norm": 14.206128120422363, "learning_rate": 2.082554517133956e-05, "loss": 1.3081, "step": 576 }, { "epoch": 0.5936213991769548, "grad_norm": 9.159503936767578, "learning_rate": 2.086188992731049e-05, "loss": 0.5681, "step": 577 }, { "epoch": 0.5946502057613169, "grad_norm": 10.146199226379395, "learning_rate": 2.089823468328141e-05, "loss": 0.6258, "step": 578 }, { "epoch": 0.595679012345679, "grad_norm": 12.96678638458252, "learning_rate": 2.0934579439252334e-05, "loss": 1.1454, "step": 579 }, { "epoch": 0.5967078189300411, "grad_norm": 14.751097679138184, "learning_rate": 2.097092419522326e-05, "loss": 1.3416, "step": 580 }, { "epoch": 0.5967078189300411, "eval_Qnli-dev_cosine_accuracy": 0.66796875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8469637632369995, "eval_Qnli-dev_cosine_ap": 0.7090284432654561, "eval_Qnli-dev_cosine_f1": 0.6897689768976898, "eval_Qnli-dev_cosine_f1_threshold": 0.7387524843215942, "eval_Qnli-dev_cosine_precision": 0.5648648648648649, "eval_Qnli-dev_cosine_recall": 0.885593220338983, "eval_Qnli-dev_dot_accuracy": 0.6640625, "eval_Qnli-dev_dot_accuracy_threshold": 408.5235595703125, "eval_Qnli-dev_dot_ap": 0.6097543105824177, "eval_Qnli-dev_dot_f1": 0.6701754385964912, "eval_Qnli-dev_dot_f1_threshold": 390.4075012207031, "eval_Qnli-dev_dot_precision": 0.5718562874251497, "eval_Qnli-dev_dot_recall": 0.809322033898305, "eval_Qnli-dev_euclidean_accuracy": 0.677734375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.064620971679688, "eval_Qnli-dev_euclidean_ap": 0.7199645621423693, "eval_Qnli-dev_euclidean_f1": 0.6836734693877551, "eval_Qnli-dev_euclidean_f1_threshold": 16.033926010131836, "eval_Qnli-dev_euclidean_precision": 0.5710227272727273, "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, "eval_Qnli-dev_manhattan_accuracy": 0.681640625, "eval_Qnli-dev_manhattan_accuracy_threshold": 294.104248046875, "eval_Qnli-dev_manhattan_ap": 0.721231392124396, "eval_Qnli-dev_manhattan_f1": 0.6897810218978102, "eval_Qnli-dev_manhattan_f1_threshold": 310.521728515625, "eval_Qnli-dev_manhattan_precision": 0.6057692307692307, "eval_Qnli-dev_manhattan_recall": 0.8008474576271186, "eval_Qnli-dev_max_accuracy": 0.681640625, "eval_Qnli-dev_max_accuracy_threshold": 408.5235595703125, "eval_Qnli-dev_max_ap": 0.721231392124396, "eval_Qnli-dev_max_f1": 0.6897810218978102, "eval_Qnli-dev_max_f1_threshold": 390.4075012207031, "eval_Qnli-dev_max_precision": 0.6057692307692307, "eval_Qnli-dev_max_recall": 0.885593220338983, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8930063247680664, "eval_allNLI-dev_cosine_ap": 0.580516831193243, "eval_allNLI-dev_cosine_f1": 0.5932203389830509, "eval_allNLI-dev_cosine_f1_threshold": 0.792042076587677, "eval_allNLI-dev_cosine_precision": 0.4682274247491639, "eval_allNLI-dev_cosine_recall": 0.8092485549132948, "eval_allNLI-dev_dot_accuracy": 0.681640625, "eval_allNLI-dev_dot_accuracy_threshold": 479.3341064453125, "eval_allNLI-dev_dot_ap": 0.48669798557045457, "eval_allNLI-dev_dot_f1": 0.560919540229885, "eval_allNLI-dev_dot_f1_threshold": 413.0164794921875, "eval_allNLI-dev_dot_precision": 0.46564885496183206, "eval_allNLI-dev_dot_recall": 0.7052023121387283, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.375900268554688, "eval_allNLI-dev_euclidean_ap": 0.586159821151409, "eval_allNLI-dev_euclidean_f1": 0.5925925925925926, "eval_allNLI-dev_euclidean_f1_threshold": 13.825302124023438, "eval_allNLI-dev_euclidean_precision": 0.4942084942084942, "eval_allNLI-dev_euclidean_recall": 0.7398843930635838, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 213.74179077148438, "eval_allNLI-dev_manhattan_ap": 0.5867922982953583, "eval_allNLI-dev_manhattan_f1": 0.5903890160183066, "eval_allNLI-dev_manhattan_f1_threshold": 286.81524658203125, "eval_allNLI-dev_manhattan_precision": 0.48863636363636365, "eval_allNLI-dev_manhattan_recall": 0.7456647398843931, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 479.3341064453125, "eval_allNLI-dev_max_ap": 0.5867922982953583, "eval_allNLI-dev_max_f1": 0.5932203389830509, "eval_allNLI-dev_max_f1_threshold": 413.0164794921875, "eval_allNLI-dev_max_precision": 0.4942084942084942, "eval_allNLI-dev_max_recall": 0.8092485549132948, "eval_sequential_score": 0.721231392124396, "eval_sts-test_pearson_cosine": 0.8031708345006614, "eval_sts-test_pearson_dot": 0.7716469990772233, "eval_sts-test_pearson_euclidean": 0.8293403363982195, "eval_sts-test_pearson_manhattan": 0.8269704942343952, "eval_sts-test_pearson_max": 0.8293403363982195, "eval_sts-test_spearman_cosine": 0.8293793339853779, "eval_sts-test_spearman_dot": 0.7565175229997094, "eval_sts-test_spearman_euclidean": 0.8224314768980562, "eval_sts-test_spearman_manhattan": 0.81979553809958, "eval_sts-test_spearman_max": 0.8293793339853779, "eval_vitaminc-pairs_loss": 2.9443347454071045, "eval_vitaminc-pairs_runtime": 3.1898, "eval_vitaminc-pairs_samples_per_second": 40.127, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 580 }, { "epoch": 0.5967078189300411, "eval_negation-triplets_loss": 1.221449851989746, "eval_negation-triplets_runtime": 0.7486, "eval_negation-triplets_samples_per_second": 170.975, "eval_negation-triplets_steps_per_second": 1.336, "step": 580 }, { "epoch": 0.5967078189300411, "eval_scitail-pairs-pos_loss": 0.1803685873746872, "eval_scitail-pairs-pos_runtime": 0.829, "eval_scitail-pairs-pos_samples_per_second": 154.409, "eval_scitail-pairs-pos_steps_per_second": 1.206, "step": 580 }, { "epoch": 0.5967078189300411, "eval_scitail-pairs-qa_loss": 0.015901347622275352, "eval_scitail-pairs-qa_runtime": 0.5704, "eval_scitail-pairs-qa_samples_per_second": 224.404, "eval_scitail-pairs-qa_steps_per_second": 1.753, "step": 580 }, { "epoch": 0.5967078189300411, "eval_xsum-pairs_loss": 0.7095991969108582, "eval_xsum-pairs_runtime": 3.0163, "eval_xsum-pairs_samples_per_second": 42.436, "eval_xsum-pairs_steps_per_second": 0.332, "step": 580 }, { "epoch": 0.5967078189300411, "eval_sciq_pairs_loss": 0.13398276269435883, "eval_sciq_pairs_runtime": 3.4459, "eval_sciq_pairs_samples_per_second": 37.145, "eval_sciq_pairs_steps_per_second": 0.29, "step": 580 }, { "epoch": 0.5967078189300411, "eval_qasc_pairs_loss": 0.681054413318634, "eval_qasc_pairs_runtime": 0.6052, "eval_qasc_pairs_samples_per_second": 211.516, "eval_qasc_pairs_steps_per_second": 1.652, "step": 580 }, { "epoch": 0.5967078189300411, "eval_openbookqa_pairs_loss": 1.32936692237854, "eval_openbookqa_pairs_runtime": 0.578, "eval_openbookqa_pairs_samples_per_second": 221.445, "eval_openbookqa_pairs_steps_per_second": 1.73, "step": 580 }, { "epoch": 0.5967078189300411, "eval_msmarco_pairs_loss": 1.3513559103012085, "eval_msmarco_pairs_runtime": 1.5095, "eval_msmarco_pairs_samples_per_second": 84.796, "eval_msmarco_pairs_steps_per_second": 0.662, "step": 580 }, { "epoch": 0.5967078189300411, "eval_nq_pairs_loss": 1.6727423667907715, "eval_nq_pairs_runtime": 2.8997, "eval_nq_pairs_samples_per_second": 44.143, "eval_nq_pairs_steps_per_second": 0.345, "step": 580 }, { "epoch": 0.5967078189300411, "eval_trivia_pairs_loss": 1.1192874908447266, "eval_trivia_pairs_runtime": 3.4386, "eval_trivia_pairs_samples_per_second": 37.225, "eval_trivia_pairs_steps_per_second": 0.291, "step": 580 }, { "epoch": 0.5967078189300411, "eval_gooaq_pairs_loss": 0.8172786235809326, "eval_gooaq_pairs_runtime": 0.9533, "eval_gooaq_pairs_samples_per_second": 134.272, "eval_gooaq_pairs_steps_per_second": 1.049, "step": 580 }, { "epoch": 0.5967078189300411, "eval_paws-pos_loss": 0.03949186950922012, "eval_paws-pos_runtime": 0.6806, "eval_paws-pos_samples_per_second": 188.056, "eval_paws-pos_steps_per_second": 1.469, "step": 580 }, { "epoch": 0.5967078189300411, "eval_global_dataset_loss": 0.6970628499984741, "eval_global_dataset_runtime": 13.3615, "eval_global_dataset_samples_per_second": 31.134, "eval_global_dataset_steps_per_second": 0.299, "step": 580 }, { "epoch": 0.5977366255144033, "grad_norm": 14.620248794555664, "learning_rate": 2.100726895119418e-05, "loss": 1.6136, "step": 581 }, { "epoch": 0.5987654320987654, "grad_norm": 9.990836143493652, "learning_rate": 2.1043613707165106e-05, "loss": 0.5856, "step": 582 }, { "epoch": 0.5997942386831275, "grad_norm": 11.57479190826416, "learning_rate": 2.1079958463136033e-05, "loss": 0.7762, "step": 583 }, { "epoch": 0.6008230452674898, "grad_norm": 16.514976501464844, "learning_rate": 2.1116303219106954e-05, "loss": 2.0577, "step": 584 }, { "epoch": 0.6018518518518519, "grad_norm": 19.117877960205078, "learning_rate": 2.1152647975077878e-05, "loss": 1.8893, "step": 585 }, { "epoch": 0.602880658436214, "grad_norm": 1.2878212928771973, "learning_rate": 2.1188992731048805e-05, "loss": 0.0455, "step": 586 }, { "epoch": 0.6039094650205762, "grad_norm": 15.874303817749023, "learning_rate": 2.122533748701973e-05, "loss": 2.5615, "step": 587 }, { "epoch": 0.6049382716049383, "grad_norm": 9.337711334228516, "learning_rate": 2.126168224299065e-05, "loss": 0.593, "step": 588 }, { "epoch": 0.6059670781893004, "grad_norm": 10.22465991973877, "learning_rate": 2.1298026998961577e-05, "loss": 0.8033, "step": 589 }, { "epoch": 0.6069958847736625, "grad_norm": 9.863337516784668, "learning_rate": 2.13343717549325e-05, "loss": 0.694, "step": 590 }, { "epoch": 0.6080246913580247, "grad_norm": 12.331180572509766, "learning_rate": 2.1370716510903422e-05, "loss": 1.0183, "step": 591 }, { "epoch": 0.6090534979423868, "grad_norm": 9.044501304626465, "learning_rate": 2.140706126687435e-05, "loss": 0.6388, "step": 592 }, { "epoch": 0.6100823045267489, "grad_norm": 9.711915969848633, "learning_rate": 2.1443406022845273e-05, "loss": 0.7858, "step": 593 }, { "epoch": 0.6111111111111112, "grad_norm": 5.571502208709717, "learning_rate": 2.1479750778816197e-05, "loss": 0.1627, "step": 594 }, { "epoch": 0.6121399176954733, "grad_norm": 10.834738731384277, "learning_rate": 2.151609553478712e-05, "loss": 1.2084, "step": 595 }, { "epoch": 0.6131687242798354, "grad_norm": 11.250519752502441, "learning_rate": 2.1552440290758045e-05, "loss": 0.8371, "step": 596 }, { "epoch": 0.6141975308641975, "grad_norm": 12.769804000854492, "learning_rate": 2.158878504672897e-05, "loss": 1.0759, "step": 597 }, { "epoch": 0.6152263374485597, "grad_norm": 9.822973251342773, "learning_rate": 2.1625129802699897e-05, "loss": 0.6237, "step": 598 }, { "epoch": 0.6162551440329218, "grad_norm": 12.792522430419922, "learning_rate": 2.1661474558670817e-05, "loss": 0.9396, "step": 599 }, { "epoch": 0.6172839506172839, "grad_norm": 11.624062538146973, "learning_rate": 2.169781931464174e-05, "loss": 0.7352, "step": 600 }, { "epoch": 0.6172839506172839, "eval_Qnli-dev_cosine_accuracy": 0.685546875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7914708852767944, "eval_Qnli-dev_cosine_ap": 0.7227066968429299, "eval_Qnli-dev_cosine_f1": 0.6948529411764706, "eval_Qnli-dev_cosine_f1_threshold": 0.766169548034668, "eval_Qnli-dev_cosine_precision": 0.6136363636363636, "eval_Qnli-dev_cosine_recall": 0.8008474576271186, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 405.1741943359375, "eval_Qnli-dev_dot_ap": 0.6291761267009413, "eval_Qnli-dev_dot_f1": 0.6897810218978102, "eval_Qnli-dev_dot_f1_threshold": 382.8020935058594, "eval_Qnli-dev_dot_precision": 0.6057692307692307, "eval_Qnli-dev_dot_recall": 0.8008474576271186, "eval_Qnli-dev_euclidean_accuracy": 0.69140625, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.424887657165527, "eval_Qnli-dev_euclidean_ap": 0.7307017217323966, "eval_Qnli-dev_euclidean_f1": 0.6906710310965629, "eval_Qnli-dev_euclidean_f1_threshold": 17.00006675720215, "eval_Qnli-dev_euclidean_precision": 0.5626666666666666, "eval_Qnli-dev_euclidean_recall": 0.8940677966101694, "eval_Qnli-dev_manhattan_accuracy": 0.689453125, "eval_Qnli-dev_manhattan_accuracy_threshold": 309.230712890625, "eval_Qnli-dev_manhattan_ap": 0.7325013115093475, "eval_Qnli-dev_manhattan_f1": 0.6953528399311533, "eval_Qnli-dev_manhattan_f1_threshold": 332.23504638671875, "eval_Qnli-dev_manhattan_precision": 0.5855072463768116, "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, "eval_Qnli-dev_max_accuracy": 0.69140625, "eval_Qnli-dev_max_accuracy_threshold": 405.1741943359375, "eval_Qnli-dev_max_ap": 0.7325013115093475, "eval_Qnli-dev_max_f1": 0.6953528399311533, "eval_Qnli-dev_max_f1_threshold": 382.8020935058594, "eval_Qnli-dev_max_precision": 0.6136363636363636, "eval_Qnli-dev_max_recall": 0.8940677966101694, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8877395391464233, "eval_allNLI-dev_cosine_ap": 0.5814109945041677, "eval_allNLI-dev_cosine_f1": 0.5892116182572614, "eval_allNLI-dev_cosine_f1_threshold": 0.7833628058433533, "eval_allNLI-dev_cosine_precision": 0.459546925566343, "eval_allNLI-dev_cosine_recall": 0.8208092485549133, "eval_allNLI-dev_dot_accuracy": 0.68359375, "eval_allNLI-dev_dot_accuracy_threshold": 498.7593994140625, "eval_allNLI-dev_dot_ap": 0.49817236088425526, "eval_allNLI-dev_dot_f1": 0.5469728601252609, "eval_allNLI-dev_dot_f1_threshold": 396.20513916015625, "eval_allNLI-dev_dot_precision": 0.42810457516339867, "eval_allNLI-dev_dot_recall": 0.7572254335260116, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.987224578857422, "eval_allNLI-dev_euclidean_ap": 0.5868737853118521, "eval_allNLI-dev_euclidean_f1": 0.5991735537190083, "eval_allNLI-dev_euclidean_f1_threshold": 14.847602844238281, "eval_allNLI-dev_euclidean_precision": 0.4662379421221865, "eval_allNLI-dev_euclidean_recall": 0.838150289017341, "eval_allNLI-dev_manhattan_accuracy": 0.73046875, "eval_allNLI-dev_manhattan_accuracy_threshold": 217.56982421875, "eval_allNLI-dev_manhattan_ap": 0.5854235635053637, "eval_allNLI-dev_manhattan_f1": 0.5908096280087528, "eval_allNLI-dev_manhattan_f1_threshold": 296.2995300292969, "eval_allNLI-dev_manhattan_precision": 0.4753521126760563, "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 498.7593994140625, "eval_allNLI-dev_max_ap": 0.5868737853118521, "eval_allNLI-dev_max_f1": 0.5991735537190083, "eval_allNLI-dev_max_f1_threshold": 396.20513916015625, "eval_allNLI-dev_max_precision": 0.4753521126760563, "eval_allNLI-dev_max_recall": 0.838150289017341, "eval_sequential_score": 0.7325013115093475, "eval_sts-test_pearson_cosine": 0.8070716873912918, "eval_sts-test_pearson_dot": 0.7619397369954762, "eval_sts-test_pearson_euclidean": 0.8282272675602773, "eval_sts-test_pearson_manhattan": 0.8241390313463588, "eval_sts-test_pearson_max": 0.8282272675602773, "eval_sts-test_spearman_cosine": 0.8247862882724717, "eval_sts-test_spearman_dot": 0.7450420017923742, "eval_sts-test_spearman_euclidean": 0.819151701701942, "eval_sts-test_spearman_manhattan": 0.8149713968728485, "eval_sts-test_spearman_max": 0.8247862882724717, "eval_vitaminc-pairs_loss": 2.7805817127227783, "eval_vitaminc-pairs_runtime": 3.1769, "eval_vitaminc-pairs_samples_per_second": 40.291, "eval_vitaminc-pairs_steps_per_second": 0.315, "step": 600 }, { "epoch": 0.6172839506172839, "eval_negation-triplets_loss": 1.2412256002426147, "eval_negation-triplets_runtime": 0.7403, "eval_negation-triplets_samples_per_second": 172.908, "eval_negation-triplets_steps_per_second": 1.351, "step": 600 }, { "epoch": 0.6172839506172839, "eval_scitail-pairs-pos_loss": 0.19108502566814423, "eval_scitail-pairs-pos_runtime": 0.8102, "eval_scitail-pairs-pos_samples_per_second": 157.986, "eval_scitail-pairs-pos_steps_per_second": 1.234, "step": 600 }, { "epoch": 0.6172839506172839, "eval_scitail-pairs-qa_loss": 0.011316634714603424, "eval_scitail-pairs-qa_runtime": 0.5692, "eval_scitail-pairs-qa_samples_per_second": 224.889, "eval_scitail-pairs-qa_steps_per_second": 1.757, "step": 600 }, { "epoch": 0.6172839506172839, "eval_xsum-pairs_loss": 0.6977664232254028, "eval_xsum-pairs_runtime": 3.0198, "eval_xsum-pairs_samples_per_second": 42.387, "eval_xsum-pairs_steps_per_second": 0.331, "step": 600 }, { "epoch": 0.6172839506172839, "eval_sciq_pairs_loss": 0.13763564825057983, "eval_sciq_pairs_runtime": 3.413, "eval_sciq_pairs_samples_per_second": 37.503, "eval_sciq_pairs_steps_per_second": 0.293, "step": 600 }, { "epoch": 0.6172839506172839, "eval_qasc_pairs_loss": 0.6264404058456421, "eval_qasc_pairs_runtime": 0.5999, "eval_qasc_pairs_samples_per_second": 213.376, "eval_qasc_pairs_steps_per_second": 1.667, "step": 600 }, { "epoch": 0.6172839506172839, "eval_openbookqa_pairs_loss": 1.2759621143341064, "eval_openbookqa_pairs_runtime": 0.5867, "eval_openbookqa_pairs_samples_per_second": 218.169, "eval_openbookqa_pairs_steps_per_second": 1.704, "step": 600 }, { "epoch": 0.6172839506172839, "eval_msmarco_pairs_loss": 1.4110215902328491, "eval_msmarco_pairs_runtime": 1.5228, "eval_msmarco_pairs_samples_per_second": 84.054, "eval_msmarco_pairs_steps_per_second": 0.657, "step": 600 }, { "epoch": 0.6172839506172839, "eval_nq_pairs_loss": 1.654952883720398, "eval_nq_pairs_runtime": 2.9213, "eval_nq_pairs_samples_per_second": 43.816, "eval_nq_pairs_steps_per_second": 0.342, "step": 600 }, { "epoch": 0.6172839506172839, "eval_trivia_pairs_loss": 1.11814284324646, "eval_trivia_pairs_runtime": 3.4571, "eval_trivia_pairs_samples_per_second": 37.025, "eval_trivia_pairs_steps_per_second": 0.289, "step": 600 }, { "epoch": 0.6172839506172839, "eval_gooaq_pairs_loss": 0.8059184551239014, "eval_gooaq_pairs_runtime": 0.9451, "eval_gooaq_pairs_samples_per_second": 135.437, "eval_gooaq_pairs_steps_per_second": 1.058, "step": 600 }, { "epoch": 0.6172839506172839, "eval_paws-pos_loss": 0.038612980395555496, "eval_paws-pos_runtime": 0.6792, "eval_paws-pos_samples_per_second": 188.462, "eval_paws-pos_steps_per_second": 1.472, "step": 600 }, { "epoch": 0.6172839506172839, "eval_global_dataset_loss": 0.6731630563735962, "eval_global_dataset_runtime": 13.3755, "eval_global_dataset_samples_per_second": 31.102, "eval_global_dataset_steps_per_second": 0.299, "step": 600 }, { "epoch": 0.6183127572016461, "grad_norm": 9.29102611541748, "learning_rate": 2.173416407061267e-05, "loss": 0.5273, "step": 601 }, { "epoch": 0.6193415637860082, "grad_norm": 18.072662353515625, "learning_rate": 2.177050882658359e-05, "loss": 1.925, "step": 602 }, { "epoch": 0.6203703703703703, "grad_norm": 11.696316719055176, "learning_rate": 2.1806853582554513e-05, "loss": 0.8177, "step": 603 }, { "epoch": 0.6213991769547325, "grad_norm": 8.543580055236816, "learning_rate": 2.184319833852544e-05, "loss": 0.4747, "step": 604 }, { "epoch": 0.6224279835390947, "grad_norm": 11.905756950378418, "learning_rate": 2.1879543094496365e-05, "loss": 0.9485, "step": 605 }, { "epoch": 0.6234567901234568, "grad_norm": 13.481616020202637, "learning_rate": 2.1915887850467285e-05, "loss": 1.7983, "step": 606 }, { "epoch": 0.6244855967078189, "grad_norm": 4.5081787109375, "learning_rate": 2.1952232606438213e-05, "loss": 0.1446, "step": 607 }, { "epoch": 0.6255144032921811, "grad_norm": 10.28495979309082, "learning_rate": 2.1988577362409137e-05, "loss": 0.6929, "step": 608 }, { "epoch": 0.6265432098765432, "grad_norm": 0.8422635197639465, "learning_rate": 2.2024922118380058e-05, "loss": 0.056, "step": 609 }, { "epoch": 0.6275720164609053, "grad_norm": 10.7501220703125, "learning_rate": 2.2061266874350985e-05, "loss": 0.6738, "step": 610 }, { "epoch": 0.6286008230452675, "grad_norm": 13.118562698364258, "learning_rate": 2.209761163032191e-05, "loss": 1.4398, "step": 611 }, { "epoch": 0.6296296296296297, "grad_norm": 19.016132354736328, "learning_rate": 2.2133956386292833e-05, "loss": 3.152, "step": 612 }, { "epoch": 0.6306584362139918, "grad_norm": 16.179283142089844, "learning_rate": 2.2170301142263757e-05, "loss": 1.8703, "step": 613 }, { "epoch": 0.6316872427983539, "grad_norm": 1.413341999053955, "learning_rate": 2.220664589823468e-05, "loss": 0.0766, "step": 614 }, { "epoch": 0.6327160493827161, "grad_norm": 19.418697357177734, "learning_rate": 2.2242990654205605e-05, "loss": 2.4434, "step": 615 }, { "epoch": 0.6337448559670782, "grad_norm": 13.95297622680664, "learning_rate": 2.2279335410176532e-05, "loss": 1.4074, "step": 616 }, { "epoch": 0.6347736625514403, "grad_norm": 9.78261947631836, "learning_rate": 2.2315680166147453e-05, "loss": 0.7425, "step": 617 }, { "epoch": 0.6358024691358025, "grad_norm": 7.618975639343262, "learning_rate": 2.2352024922118377e-05, "loss": 0.466, "step": 618 }, { "epoch": 0.6368312757201646, "grad_norm": 11.607491493225098, "learning_rate": 2.2388369678089305e-05, "loss": 1.6586, "step": 619 }, { "epoch": 0.6378600823045267, "grad_norm": 7.107526779174805, "learning_rate": 2.2424714434060225e-05, "loss": 0.3817, "step": 620 }, { "epoch": 0.6378600823045267, "eval_Qnli-dev_cosine_accuracy": 0.693359375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8104921579360962, "eval_Qnli-dev_cosine_ap": 0.7372700421671432, "eval_Qnli-dev_cosine_f1": 0.7011070110701106, "eval_Qnli-dev_cosine_f1_threshold": 0.7957046627998352, "eval_Qnli-dev_cosine_precision": 0.6209150326797386, "eval_Qnli-dev_cosine_recall": 0.8050847457627118, "eval_Qnli-dev_dot_accuracy": 0.66015625, "eval_Qnli-dev_dot_accuracy_threshold": 438.48602294921875, "eval_Qnli-dev_dot_ap": 0.6254364606240859, "eval_Qnli-dev_dot_f1": 0.6798561151079136, "eval_Qnli-dev_dot_f1_threshold": 417.19720458984375, "eval_Qnli-dev_dot_precision": 0.590625, "eval_Qnli-dev_dot_recall": 0.8008474576271186, "eval_Qnli-dev_euclidean_accuracy": 0.701171875, "eval_Qnli-dev_euclidean_accuracy_threshold": 12.722761154174805, "eval_Qnli-dev_euclidean_ap": 0.7476820851309197, "eval_Qnli-dev_euclidean_f1": 0.6962457337883959, "eval_Qnli-dev_euclidean_f1_threshold": 15.658858299255371, "eval_Qnli-dev_euclidean_precision": 0.5828571428571429, "eval_Qnli-dev_euclidean_recall": 0.864406779661017, "eval_Qnli-dev_manhattan_accuracy": 0.693359375, "eval_Qnli-dev_manhattan_accuracy_threshold": 277.64154052734375, "eval_Qnli-dev_manhattan_ap": 0.747576429030092, "eval_Qnli-dev_manhattan_f1": 0.6969147005444646, "eval_Qnli-dev_manhattan_f1_threshold": 306.7862548828125, "eval_Qnli-dev_manhattan_precision": 0.6095238095238096, "eval_Qnli-dev_manhattan_recall": 0.8135593220338984, "eval_Qnli-dev_max_accuracy": 0.701171875, "eval_Qnli-dev_max_accuracy_threshold": 438.48602294921875, "eval_Qnli-dev_max_ap": 0.7476820851309197, "eval_Qnli-dev_max_f1": 0.7011070110701106, "eval_Qnli-dev_max_f1_threshold": 417.19720458984375, "eval_Qnli-dev_max_precision": 0.6209150326797386, "eval_Qnli-dev_max_recall": 0.864406779661017, "eval_allNLI-dev_cosine_accuracy": 0.734375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.9023975133895874, "eval_allNLI-dev_cosine_ap": 0.5865878816400992, "eval_allNLI-dev_cosine_f1": 0.5961123110151189, "eval_allNLI-dev_cosine_f1_threshold": 0.815485954284668, "eval_allNLI-dev_cosine_precision": 0.47586206896551725, "eval_allNLI-dev_cosine_recall": 0.7976878612716763, "eval_allNLI-dev_dot_accuracy": 0.6796875, "eval_allNLI-dev_dot_accuracy_threshold": 520.5687255859375, "eval_allNLI-dev_dot_ap": 0.50417908457673, "eval_allNLI-dev_dot_f1": 0.5764705882352941, "eval_allNLI-dev_dot_f1_threshold": 419.378662109375, "eval_allNLI-dev_dot_precision": 0.4362017804154303, "eval_allNLI-dev_dot_recall": 0.8497109826589595, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.461543083190918, "eval_allNLI-dev_euclidean_ap": 0.5891739143142342, "eval_allNLI-dev_euclidean_f1": 0.6018099547511312, "eval_allNLI-dev_euclidean_f1_threshold": 13.633740425109863, "eval_allNLI-dev_euclidean_precision": 0.4944237918215613, "eval_allNLI-dev_euclidean_recall": 0.7687861271676301, "eval_allNLI-dev_manhattan_accuracy": 0.734375, "eval_allNLI-dev_manhattan_accuracy_threshold": 205.95645141601562, "eval_allNLI-dev_manhattan_ap": 0.5909121718301882, "eval_allNLI-dev_manhattan_f1": 0.5978947368421053, "eval_allNLI-dev_manhattan_f1_threshold": 292.0635681152344, "eval_allNLI-dev_manhattan_precision": 0.47019867549668876, "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 520.5687255859375, "eval_allNLI-dev_max_ap": 0.5909121718301882, "eval_allNLI-dev_max_f1": 0.6018099547511312, "eval_allNLI-dev_max_f1_threshold": 419.378662109375, "eval_allNLI-dev_max_precision": 0.4944237918215613, "eval_allNLI-dev_max_recall": 0.8497109826589595, "eval_sequential_score": 0.7476820851309197, "eval_sts-test_pearson_cosine": 0.811803599688079, "eval_sts-test_pearson_dot": 0.7763025780752795, "eval_sts-test_pearson_euclidean": 0.834182762862252, "eval_sts-test_pearson_manhattan": 0.8306831599881925, "eval_sts-test_pearson_max": 0.834182762862252, "eval_sts-test_spearman_cosine": 0.8279280953297161, "eval_sts-test_spearman_dot": 0.7618572435089312, "eval_sts-test_spearman_euclidean": 0.8235176795145484, "eval_sts-test_spearman_manhattan": 0.8203718448437786, "eval_sts-test_spearman_max": 0.8279280953297161, "eval_vitaminc-pairs_loss": 2.7285807132720947, "eval_vitaminc-pairs_runtime": 3.1675, "eval_vitaminc-pairs_samples_per_second": 40.41, "eval_vitaminc-pairs_steps_per_second": 0.316, "step": 620 }, { "epoch": 0.6378600823045267, "eval_negation-triplets_loss": 1.2768163681030273, "eval_negation-triplets_runtime": 0.7451, "eval_negation-triplets_samples_per_second": 171.791, "eval_negation-triplets_steps_per_second": 1.342, "step": 620 }, { "epoch": 0.6378600823045267, "eval_scitail-pairs-pos_loss": 0.221151664853096, "eval_scitail-pairs-pos_runtime": 0.8023, "eval_scitail-pairs-pos_samples_per_second": 159.546, "eval_scitail-pairs-pos_steps_per_second": 1.246, "step": 620 }, { "epoch": 0.6378600823045267, "eval_scitail-pairs-qa_loss": 0.011276349425315857, "eval_scitail-pairs-qa_runtime": 0.5728, "eval_scitail-pairs-qa_samples_per_second": 223.455, "eval_scitail-pairs-qa_steps_per_second": 1.746, "step": 620 }, { "epoch": 0.6378600823045267, "eval_xsum-pairs_loss": 0.6888625025749207, "eval_xsum-pairs_runtime": 3.022, "eval_xsum-pairs_samples_per_second": 42.356, "eval_xsum-pairs_steps_per_second": 0.331, "step": 620 }, { "epoch": 0.6378600823045267, "eval_sciq_pairs_loss": 0.12679386138916016, "eval_sciq_pairs_runtime": 3.4396, "eval_sciq_pairs_samples_per_second": 37.213, "eval_sciq_pairs_steps_per_second": 0.291, "step": 620 }, { "epoch": 0.6378600823045267, "eval_qasc_pairs_loss": 0.6138037443161011, "eval_qasc_pairs_runtime": 0.6116, "eval_qasc_pairs_samples_per_second": 209.28, "eval_qasc_pairs_steps_per_second": 1.635, "step": 620 }, { "epoch": 0.6378600823045267, "eval_openbookqa_pairs_loss": 1.2520498037338257, "eval_openbookqa_pairs_runtime": 0.575, "eval_openbookqa_pairs_samples_per_second": 222.626, "eval_openbookqa_pairs_steps_per_second": 1.739, "step": 620 }, { "epoch": 0.6378600823045267, "eval_msmarco_pairs_loss": 1.2622545957565308, "eval_msmarco_pairs_runtime": 1.5106, "eval_msmarco_pairs_samples_per_second": 84.736, "eval_msmarco_pairs_steps_per_second": 0.662, "step": 620 }, { "epoch": 0.6378600823045267, "eval_nq_pairs_loss": 1.5863006114959717, "eval_nq_pairs_runtime": 2.9147, "eval_nq_pairs_samples_per_second": 43.915, "eval_nq_pairs_steps_per_second": 0.343, "step": 620 }, { "epoch": 0.6378600823045267, "eval_trivia_pairs_loss": 1.1821491718292236, "eval_trivia_pairs_runtime": 3.4369, "eval_trivia_pairs_samples_per_second": 37.243, "eval_trivia_pairs_steps_per_second": 0.291, "step": 620 }, { "epoch": 0.6378600823045267, "eval_gooaq_pairs_loss": 0.7643461227416992, "eval_gooaq_pairs_runtime": 0.9406, "eval_gooaq_pairs_samples_per_second": 136.089, "eval_gooaq_pairs_steps_per_second": 1.063, "step": 620 }, { "epoch": 0.6378600823045267, "eval_paws-pos_loss": 0.03508833795785904, "eval_paws-pos_runtime": 0.6812, "eval_paws-pos_samples_per_second": 187.907, "eval_paws-pos_steps_per_second": 1.468, "step": 620 }, { "epoch": 0.6378600823045267, "eval_global_dataset_loss": 0.6339895725250244, "eval_global_dataset_runtime": 13.3641, "eval_global_dataset_samples_per_second": 31.128, "eval_global_dataset_steps_per_second": 0.299, "step": 620 }, { "epoch": 0.6388888888888888, "grad_norm": 13.532258033752441, "learning_rate": 2.246105919003115e-05, "loss": 1.4414, "step": 621 }, { "epoch": 0.6399176954732511, "grad_norm": 9.563913345336914, "learning_rate": 2.2497403946002077e-05, "loss": 0.7481, "step": 622 }, { "epoch": 0.6409465020576132, "grad_norm": 10.86938762664795, "learning_rate": 2.2533748701973e-05, "loss": 0.8256, "step": 623 }, { "epoch": 0.6419753086419753, "grad_norm": 4.665733814239502, "learning_rate": 2.257009345794392e-05, "loss": 0.1559, "step": 624 }, { "epoch": 0.6430041152263375, "grad_norm": 10.261479377746582, "learning_rate": 2.260643821391485e-05, "loss": 0.8878, "step": 625 }, { "epoch": 0.6440329218106996, "grad_norm": 9.72616958618164, "learning_rate": 2.2642782969885773e-05, "loss": 0.5888, "step": 626 }, { "epoch": 0.6450617283950617, "grad_norm": 11.944307327270508, "learning_rate": 2.2679127725856693e-05, "loss": 1.0332, "step": 627 }, { "epoch": 0.6460905349794238, "grad_norm": 10.020615577697754, "learning_rate": 2.271547248182762e-05, "loss": 1.0121, "step": 628 }, { "epoch": 0.647119341563786, "grad_norm": 8.791054725646973, "learning_rate": 2.2751817237798545e-05, "loss": 0.6393, "step": 629 }, { "epoch": 0.6481481481481481, "grad_norm": 12.706099510192871, "learning_rate": 2.278816199376947e-05, "loss": 0.7494, "step": 630 }, { "epoch": 0.6491769547325102, "grad_norm": 3.587538480758667, "learning_rate": 2.2824506749740393e-05, "loss": 0.1088, "step": 631 }, { "epoch": 0.6502057613168725, "grad_norm": 16.609806060791016, "learning_rate": 2.2860851505711317e-05, "loss": 1.3588, "step": 632 }, { "epoch": 0.6512345679012346, "grad_norm": 1.4342639446258545, "learning_rate": 2.289719626168224e-05, "loss": 0.0403, "step": 633 }, { "epoch": 0.6522633744855967, "grad_norm": 25.457242965698242, "learning_rate": 2.2933541017653165e-05, "loss": 3.6884, "step": 634 }, { "epoch": 0.6532921810699589, "grad_norm": 19.651193618774414, "learning_rate": 2.296988577362409e-05, "loss": 1.6915, "step": 635 }, { "epoch": 0.654320987654321, "grad_norm": 10.904431343078613, "learning_rate": 2.3006230529595013e-05, "loss": 0.5166, "step": 636 }, { "epoch": 0.6553497942386831, "grad_norm": 20.06137466430664, "learning_rate": 2.304257528556594e-05, "loss": 1.8266, "step": 637 }, { "epoch": 0.6563786008230452, "grad_norm": 17.062715530395508, "learning_rate": 2.307892004153686e-05, "loss": 1.3875, "step": 638 }, { "epoch": 0.6574074074074074, "grad_norm": 21.51274299621582, "learning_rate": 2.3115264797507785e-05, "loss": 1.8874, "step": 639 }, { "epoch": 0.6584362139917695, "grad_norm": 1.2121050357818604, "learning_rate": 2.3151609553478712e-05, "loss": 0.0379, "step": 640 }, { "epoch": 0.6584362139917695, "eval_Qnli-dev_cosine_accuracy": 0.697265625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7842894792556763, "eval_Qnli-dev_cosine_ap": 0.7349992877103873, "eval_Qnli-dev_cosine_f1": 0.7003610108303249, "eval_Qnli-dev_cosine_f1_threshold": 0.7629624605178833, "eval_Qnli-dev_cosine_precision": 0.610062893081761, "eval_Qnli-dev_cosine_recall": 0.8220338983050848, "eval_Qnli-dev_dot_accuracy": 0.662109375, "eval_Qnli-dev_dot_accuracy_threshold": 385.76885986328125, "eval_Qnli-dev_dot_ap": 0.6359639073801129, "eval_Qnli-dev_dot_f1": 0.6838709677419356, "eval_Qnli-dev_dot_f1_threshold": 354.2484436035156, "eval_Qnli-dev_dot_precision": 0.5520833333333334, "eval_Qnli-dev_dot_recall": 0.8983050847457628, "eval_Qnli-dev_euclidean_accuracy": 0.701171875, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.231593132019043, "eval_Qnli-dev_euclidean_ap": 0.7462767063117786, "eval_Qnli-dev_euclidean_f1": 0.7047970479704797, "eval_Qnli-dev_euclidean_f1_threshold": 15.258886337280273, "eval_Qnli-dev_euclidean_precision": 0.6241830065359477, "eval_Qnli-dev_euclidean_recall": 0.809322033898305, "eval_Qnli-dev_manhattan_accuracy": 0.69921875, "eval_Qnli-dev_manhattan_accuracy_threshold": 265.3671875, "eval_Qnli-dev_manhattan_ap": 0.744418854148787, "eval_Qnli-dev_manhattan_f1": 0.708029197080292, "eval_Qnli-dev_manhattan_f1_threshold": 314.21258544921875, "eval_Qnli-dev_manhattan_precision": 0.6217948717948718, "eval_Qnli-dev_manhattan_recall": 0.8220338983050848, "eval_Qnli-dev_max_accuracy": 0.701171875, "eval_Qnli-dev_max_accuracy_threshold": 385.76885986328125, "eval_Qnli-dev_max_ap": 0.7462767063117786, "eval_Qnli-dev_max_f1": 0.708029197080292, "eval_Qnli-dev_max_f1_threshold": 354.2484436035156, "eval_Qnli-dev_max_precision": 0.6241830065359477, "eval_Qnli-dev_max_recall": 0.8983050847457628, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8901729583740234, "eval_allNLI-dev_cosine_ap": 0.5909042367020411, "eval_allNLI-dev_cosine_f1": 0.6091127098321343, "eval_allNLI-dev_cosine_f1_threshold": 0.8110285401344299, "eval_allNLI-dev_cosine_precision": 0.5204918032786885, "eval_allNLI-dev_cosine_recall": 0.7341040462427746, "eval_allNLI-dev_dot_accuracy": 0.6796875, "eval_allNLI-dev_dot_accuracy_threshold": 481.55474853515625, "eval_allNLI-dev_dot_ap": 0.4948903950504878, "eval_allNLI-dev_dot_f1": 0.569672131147541, "eval_allNLI-dev_dot_f1_threshold": 379.9951171875, "eval_allNLI-dev_dot_precision": 0.44126984126984126, "eval_allNLI-dev_dot_recall": 0.8034682080924855, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.585214614868164, "eval_allNLI-dev_euclidean_ap": 0.5947416283923239, "eval_allNLI-dev_euclidean_f1": 0.6009852216748768, "eval_allNLI-dev_euclidean_f1_threshold": 13.550745010375977, "eval_allNLI-dev_euclidean_precision": 0.5236051502145923, "eval_allNLI-dev_euclidean_recall": 0.7052023121387283, "eval_allNLI-dev_manhattan_accuracy": 0.734375, "eval_allNLI-dev_manhattan_accuracy_threshold": 228.28366088867188, "eval_allNLI-dev_manhattan_ap": 0.5918176918420521, "eval_allNLI-dev_manhattan_f1": 0.5991379310344827, "eval_allNLI-dev_manhattan_f1_threshold": 301.08868408203125, "eval_allNLI-dev_manhattan_precision": 0.47766323024054985, "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 481.55474853515625, "eval_allNLI-dev_max_ap": 0.5947416283923239, "eval_allNLI-dev_max_f1": 0.6091127098321343, "eval_allNLI-dev_max_f1_threshold": 379.9951171875, "eval_allNLI-dev_max_precision": 0.5236051502145923, "eval_allNLI-dev_max_recall": 0.8034682080924855, "eval_sequential_score": 0.7462767063117786, "eval_sts-test_pearson_cosine": 0.8118059789516554, "eval_sts-test_pearson_dot": 0.7734818279888613, "eval_sts-test_pearson_euclidean": 0.8386920311953987, "eval_sts-test_pearson_manhattan": 0.8356441135209492, "eval_sts-test_pearson_max": 0.8386920311953987, "eval_sts-test_spearman_cosine": 0.8328721251857153, "eval_sts-test_spearman_dot": 0.7551982558138911, "eval_sts-test_spearman_euclidean": 0.8285452152243036, "eval_sts-test_spearman_manhattan": 0.8259300410111131, "eval_sts-test_spearman_max": 0.8328721251857153, "eval_vitaminc-pairs_loss": 2.8136911392211914, "eval_vitaminc-pairs_runtime": 3.1765, "eval_vitaminc-pairs_samples_per_second": 40.296, "eval_vitaminc-pairs_steps_per_second": 0.315, "step": 640 }, { "epoch": 0.6584362139917695, "eval_negation-triplets_loss": 1.244739055633545, "eval_negation-triplets_runtime": 0.7519, "eval_negation-triplets_samples_per_second": 170.238, "eval_negation-triplets_steps_per_second": 1.33, "step": 640 }, { "epoch": 0.6584362139917695, "eval_scitail-pairs-pos_loss": 0.22537671029567719, "eval_scitail-pairs-pos_runtime": 0.8268, "eval_scitail-pairs-pos_samples_per_second": 154.805, "eval_scitail-pairs-pos_steps_per_second": 1.209, "step": 640 }, { "epoch": 0.6584362139917695, "eval_scitail-pairs-qa_loss": 0.014203112572431564, "eval_scitail-pairs-qa_runtime": 0.5719, "eval_scitail-pairs-qa_samples_per_second": 223.816, "eval_scitail-pairs-qa_steps_per_second": 1.749, "step": 640 }, { "epoch": 0.6584362139917695, "eval_xsum-pairs_loss": 0.6345345377922058, "eval_xsum-pairs_runtime": 3.0155, "eval_xsum-pairs_samples_per_second": 42.447, "eval_xsum-pairs_steps_per_second": 0.332, "step": 640 }, { "epoch": 0.6584362139917695, "eval_sciq_pairs_loss": 0.1278018057346344, "eval_sciq_pairs_runtime": 3.4439, "eval_sciq_pairs_samples_per_second": 37.167, "eval_sciq_pairs_steps_per_second": 0.29, "step": 640 }, { "epoch": 0.6584362139917695, "eval_qasc_pairs_loss": 0.5951372385025024, "eval_qasc_pairs_runtime": 0.6218, "eval_qasc_pairs_samples_per_second": 205.857, "eval_qasc_pairs_steps_per_second": 1.608, "step": 640 }, { "epoch": 0.6584362139917695, "eval_openbookqa_pairs_loss": 1.232675552368164, "eval_openbookqa_pairs_runtime": 0.582, "eval_openbookqa_pairs_samples_per_second": 219.948, "eval_openbookqa_pairs_steps_per_second": 1.718, "step": 640 }, { "epoch": 0.6584362139917695, "eval_msmarco_pairs_loss": 1.3142263889312744, "eval_msmarco_pairs_runtime": 1.5099, "eval_msmarco_pairs_samples_per_second": 84.773, "eval_msmarco_pairs_steps_per_second": 0.662, "step": 640 }, { "epoch": 0.6584362139917695, "eval_nq_pairs_loss": 1.6414275169372559, "eval_nq_pairs_runtime": 2.9022, "eval_nq_pairs_samples_per_second": 44.104, "eval_nq_pairs_steps_per_second": 0.345, "step": 640 }, { "epoch": 0.6584362139917695, "eval_trivia_pairs_loss": 1.201471209526062, "eval_trivia_pairs_runtime": 3.4361, "eval_trivia_pairs_samples_per_second": 37.252, "eval_trivia_pairs_steps_per_second": 0.291, "step": 640 }, { "epoch": 0.6584362139917695, "eval_gooaq_pairs_loss": 0.7273324131965637, "eval_gooaq_pairs_runtime": 0.9436, "eval_gooaq_pairs_samples_per_second": 135.656, "eval_gooaq_pairs_steps_per_second": 1.06, "step": 640 }, { "epoch": 0.6584362139917695, "eval_paws-pos_loss": 0.033227745443582535, "eval_paws-pos_runtime": 0.6799, "eval_paws-pos_samples_per_second": 188.253, "eval_paws-pos_steps_per_second": 1.471, "step": 640 }, { "epoch": 0.6584362139917695, "eval_global_dataset_loss": 0.644037663936615, "eval_global_dataset_runtime": 13.3785, "eval_global_dataset_samples_per_second": 31.095, "eval_global_dataset_steps_per_second": 0.299, "step": 640 }, { "epoch": 0.6594650205761317, "grad_norm": 2.2254586219787598, "learning_rate": 2.3187954309449633e-05, "loss": 0.2144, "step": 641 }, { "epoch": 0.6604938271604939, "grad_norm": 8.457268714904785, "learning_rate": 2.3224299065420557e-05, "loss": 0.5899, "step": 642 }, { "epoch": 0.661522633744856, "grad_norm": 16.62227439880371, "learning_rate": 2.3260643821391484e-05, "loss": 1.7055, "step": 643 }, { "epoch": 0.6625514403292181, "grad_norm": 9.388711929321289, "learning_rate": 2.329698857736241e-05, "loss": 0.5673, "step": 644 }, { "epoch": 0.6635802469135802, "grad_norm": 3.408893346786499, "learning_rate": 2.333333333333333e-05, "loss": 0.0845, "step": 645 }, { "epoch": 0.6646090534979424, "grad_norm": 11.298724174499512, "learning_rate": 2.3369678089304256e-05, "loss": 0.7168, "step": 646 }, { "epoch": 0.6656378600823045, "grad_norm": 16.72682762145996, "learning_rate": 2.340602284527518e-05, "loss": 2.6358, "step": 647 }, { "epoch": 0.6666666666666666, "grad_norm": 7.872361660003662, "learning_rate": 2.34423676012461e-05, "loss": 0.3951, "step": 648 }, { "epoch": 0.6676954732510288, "grad_norm": 9.12248420715332, "learning_rate": 2.347871235721703e-05, "loss": 0.584, "step": 649 }, { "epoch": 0.668724279835391, "grad_norm": 11.847990036010742, "learning_rate": 2.3515057113187953e-05, "loss": 0.9239, "step": 650 }, { "epoch": 0.6697530864197531, "grad_norm": 8.815132141113281, "learning_rate": 2.3551401869158877e-05, "loss": 0.576, "step": 651 }, { "epoch": 0.6707818930041153, "grad_norm": 13.088105201721191, "learning_rate": 2.35877466251298e-05, "loss": 1.2842, "step": 652 }, { "epoch": 0.6718106995884774, "grad_norm": 9.663747787475586, "learning_rate": 2.3624091381100725e-05, "loss": 0.7108, "step": 653 }, { "epoch": 0.6728395061728395, "grad_norm": 10.207884788513184, "learning_rate": 2.366043613707165e-05, "loss": 0.6935, "step": 654 }, { "epoch": 0.6738683127572016, "grad_norm": 10.963897705078125, "learning_rate": 2.3696780893042576e-05, "loss": 0.8278, "step": 655 }, { "epoch": 0.6748971193415638, "grad_norm": 9.319234848022461, "learning_rate": 2.3733125649013497e-05, "loss": 0.6456, "step": 656 }, { "epoch": 0.6759259259259259, "grad_norm": 14.43174934387207, "learning_rate": 2.376947040498442e-05, "loss": 1.8842, "step": 657 }, { "epoch": 0.676954732510288, "grad_norm": 13.448914527893066, "learning_rate": 2.3805815160955348e-05, "loss": 1.2572, "step": 658 }, { "epoch": 0.6779835390946503, "grad_norm": 8.692782402038574, "learning_rate": 2.384215991692627e-05, "loss": 0.6718, "step": 659 }, { "epoch": 0.6790123456790124, "grad_norm": 4.224426746368408, "learning_rate": 2.3878504672897193e-05, "loss": 0.1434, "step": 660 }, { "epoch": 0.6790123456790124, "eval_Qnli-dev_cosine_accuracy": 0.705078125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.819502055644989, "eval_Qnli-dev_cosine_ap": 0.7429310249805731, "eval_Qnli-dev_cosine_f1": 0.7023411371237458, "eval_Qnli-dev_cosine_f1_threshold": 0.7529304623603821, "eval_Qnli-dev_cosine_precision": 0.580110497237569, "eval_Qnli-dev_cosine_recall": 0.8898305084745762, "eval_Qnli-dev_dot_accuracy": 0.666015625, "eval_Qnli-dev_dot_accuracy_threshold": 392.81878662109375, "eval_Qnli-dev_dot_ap": 0.6658795733353435, "eval_Qnli-dev_dot_f1": 0.684981684981685, "eval_Qnli-dev_dot_f1_threshold": 388.4842529296875, "eval_Qnli-dev_dot_precision": 0.603225806451613, "eval_Qnli-dev_dot_recall": 0.7923728813559322, "eval_Qnli-dev_euclidean_accuracy": 0.70703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.631841659545898, "eval_Qnli-dev_euclidean_ap": 0.7487350788106928, "eval_Qnli-dev_euclidean_f1": 0.7015706806282722, "eval_Qnli-dev_euclidean_f1_threshold": 15.337552070617676, "eval_Qnli-dev_euclidean_precision": 0.5964391691394659, "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, "eval_Qnli-dev_manhattan_accuracy": 0.701171875, "eval_Qnli-dev_manhattan_accuracy_threshold": 295.4067077636719, "eval_Qnli-dev_manhattan_ap": 0.7482376569453756, "eval_Qnli-dev_manhattan_f1": 0.7113594040968343, "eval_Qnli-dev_manhattan_f1_threshold": 299.4460754394531, "eval_Qnli-dev_manhattan_precision": 0.6345514950166113, "eval_Qnli-dev_manhattan_recall": 0.809322033898305, "eval_Qnli-dev_max_accuracy": 0.70703125, "eval_Qnli-dev_max_accuracy_threshold": 392.81878662109375, "eval_Qnli-dev_max_ap": 0.7487350788106928, "eval_Qnli-dev_max_f1": 0.7113594040968343, "eval_Qnli-dev_max_f1_threshold": 388.4842529296875, "eval_Qnli-dev_max_precision": 0.6345514950166113, "eval_Qnli-dev_max_recall": 0.8898305084745762, "eval_allNLI-dev_cosine_accuracy": 0.734375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8959517478942871, "eval_allNLI-dev_cosine_ap": 0.5960858156370117, "eval_allNLI-dev_cosine_f1": 0.5995717344753748, "eval_allNLI-dev_cosine_f1_threshold": 0.7982358932495117, "eval_allNLI-dev_cosine_precision": 0.47619047619047616, "eval_allNLI-dev_cosine_recall": 0.8092485549132948, "eval_allNLI-dev_dot_accuracy": 0.689453125, "eval_allNLI-dev_dot_accuracy_threshold": 450.1946716308594, "eval_allNLI-dev_dot_ap": 0.5096208353059024, "eval_allNLI-dev_dot_f1": 0.5690021231422505, "eval_allNLI-dev_dot_f1_threshold": 398.77850341796875, "eval_allNLI-dev_dot_precision": 0.44966442953020136, "eval_allNLI-dev_dot_recall": 0.7745664739884393, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.649042129516602, "eval_allNLI-dev_euclidean_ap": 0.5979924892509634, "eval_allNLI-dev_euclidean_f1": 0.6090534979423868, "eval_allNLI-dev_euclidean_f1_threshold": 14.710177421569824, "eval_allNLI-dev_euclidean_precision": 0.4728434504792332, "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 218.83389282226562, "eval_allNLI-dev_manhattan_ap": 0.5954291033762709, "eval_allNLI-dev_manhattan_f1": 0.5973451327433628, "eval_allNLI-dev_manhattan_f1_threshold": 288.9541015625, "eval_allNLI-dev_manhattan_precision": 0.4838709677419355, "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 450.1946716308594, "eval_allNLI-dev_max_ap": 0.5979924892509634, "eval_allNLI-dev_max_f1": 0.6090534979423868, "eval_allNLI-dev_max_f1_threshold": 398.77850341796875, "eval_allNLI-dev_max_precision": 0.4838709677419355, "eval_allNLI-dev_max_recall": 0.8554913294797688, "eval_sequential_score": 0.7487350788106928, "eval_sts-test_pearson_cosine": 0.809748141776852, "eval_sts-test_pearson_dot": 0.7852622986479767, "eval_sts-test_pearson_euclidean": 0.8383482677548499, "eval_sts-test_pearson_manhattan": 0.8356178836101067, "eval_sts-test_pearson_max": 0.8383482677548499, "eval_sts-test_spearman_cosine": 0.8342041017297689, "eval_sts-test_spearman_dot": 0.7727315762707344, "eval_sts-test_spearman_euclidean": 0.8310839542830377, "eval_sts-test_spearman_manhattan": 0.8265729823835233, "eval_sts-test_spearman_max": 0.8342041017297689, "eval_vitaminc-pairs_loss": 2.8169939517974854, "eval_vitaminc-pairs_runtime": 3.1955, "eval_vitaminc-pairs_samples_per_second": 40.056, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 660 }, { "epoch": 0.6790123456790124, "eval_negation-triplets_loss": 1.216970443725586, "eval_negation-triplets_runtime": 0.7501, "eval_negation-triplets_samples_per_second": 170.642, "eval_negation-triplets_steps_per_second": 1.333, "step": 660 }, { "epoch": 0.6790123456790124, "eval_scitail-pairs-pos_loss": 0.2154267579317093, "eval_scitail-pairs-pos_runtime": 0.8251, "eval_scitail-pairs-pos_samples_per_second": 155.127, "eval_scitail-pairs-pos_steps_per_second": 1.212, "step": 660 }, { "epoch": 0.6790123456790124, "eval_scitail-pairs-qa_loss": 0.008771178312599659, "eval_scitail-pairs-qa_runtime": 0.5793, "eval_scitail-pairs-qa_samples_per_second": 220.954, "eval_scitail-pairs-qa_steps_per_second": 1.726, "step": 660 }, { "epoch": 0.6790123456790124, "eval_xsum-pairs_loss": 0.6624985933303833, "eval_xsum-pairs_runtime": 3.0194, "eval_xsum-pairs_samples_per_second": 42.393, "eval_xsum-pairs_steps_per_second": 0.331, "step": 660 }, { "epoch": 0.6790123456790124, "eval_sciq_pairs_loss": 0.12456458061933517, "eval_sciq_pairs_runtime": 3.4544, "eval_sciq_pairs_samples_per_second": 37.055, "eval_sciq_pairs_steps_per_second": 0.289, "step": 660 }, { "epoch": 0.6790123456790124, "eval_qasc_pairs_loss": 0.5933777093887329, "eval_qasc_pairs_runtime": 0.6095, "eval_qasc_pairs_samples_per_second": 209.991, "eval_qasc_pairs_steps_per_second": 1.641, "step": 660 }, { "epoch": 0.6790123456790124, "eval_openbookqa_pairs_loss": 1.2264533042907715, "eval_openbookqa_pairs_runtime": 0.5907, "eval_openbookqa_pairs_samples_per_second": 216.708, "eval_openbookqa_pairs_steps_per_second": 1.693, "step": 660 }, { "epoch": 0.6790123456790124, "eval_msmarco_pairs_loss": 1.2734606266021729, "eval_msmarco_pairs_runtime": 1.5181, "eval_msmarco_pairs_samples_per_second": 84.315, "eval_msmarco_pairs_steps_per_second": 0.659, "step": 660 }, { "epoch": 0.6790123456790124, "eval_nq_pairs_loss": 1.6421589851379395, "eval_nq_pairs_runtime": 2.8912, "eval_nq_pairs_samples_per_second": 44.272, "eval_nq_pairs_steps_per_second": 0.346, "step": 660 }, { "epoch": 0.6790123456790124, "eval_trivia_pairs_loss": 1.1045206785202026, "eval_trivia_pairs_runtime": 3.4335, "eval_trivia_pairs_samples_per_second": 37.28, "eval_trivia_pairs_steps_per_second": 0.291, "step": 660 }, { "epoch": 0.6790123456790124, "eval_gooaq_pairs_loss": 0.7241554856300354, "eval_gooaq_pairs_runtime": 0.9492, "eval_gooaq_pairs_samples_per_second": 134.856, "eval_gooaq_pairs_steps_per_second": 1.054, "step": 660 }, { "epoch": 0.6790123456790124, "eval_paws-pos_loss": 0.03431744873523712, "eval_paws-pos_runtime": 0.6884, "eval_paws-pos_samples_per_second": 185.934, "eval_paws-pos_steps_per_second": 1.453, "step": 660 }, { "epoch": 0.6790123456790124, "eval_global_dataset_loss": 0.6402216553688049, "eval_global_dataset_runtime": 13.4067, "eval_global_dataset_samples_per_second": 31.029, "eval_global_dataset_steps_per_second": 0.298, "step": 660 }, { "epoch": 0.6800411522633745, "grad_norm": 20.211734771728516, "learning_rate": 2.391484942886812e-05, "loss": 2.1395, "step": 661 }, { "epoch": 0.6810699588477366, "grad_norm": 7.7893218994140625, "learning_rate": 2.3951194184839044e-05, "loss": 0.6218, "step": 662 }, { "epoch": 0.6820987654320988, "grad_norm": 16.382932662963867, "learning_rate": 2.3987538940809965e-05, "loss": 1.691, "step": 663 }, { "epoch": 0.6831275720164609, "grad_norm": 13.506409645080566, "learning_rate": 2.4023883696780892e-05, "loss": 1.3362, "step": 664 }, { "epoch": 0.684156378600823, "grad_norm": 13.324780464172363, "learning_rate": 2.4060228452751816e-05, "loss": 1.1382, "step": 665 }, { "epoch": 0.6851851851851852, "grad_norm": 10.345579147338867, "learning_rate": 2.4096573208722737e-05, "loss": 1.0932, "step": 666 }, { "epoch": 0.6862139917695473, "grad_norm": 10.737591743469238, "learning_rate": 2.4132917964693664e-05, "loss": 0.9572, "step": 667 }, { "epoch": 0.6872427983539094, "grad_norm": 17.071697235107422, "learning_rate": 2.4169262720664588e-05, "loss": 1.9663, "step": 668 }, { "epoch": 0.6882716049382716, "grad_norm": 11.74267292022705, "learning_rate": 2.4205607476635512e-05, "loss": 0.8968, "step": 669 }, { "epoch": 0.6893004115226338, "grad_norm": 11.056696891784668, "learning_rate": 2.4241952232606436e-05, "loss": 0.7906, "step": 670 }, { "epoch": 0.6903292181069959, "grad_norm": 10.595043182373047, "learning_rate": 2.427829698857736e-05, "loss": 0.7443, "step": 671 }, { "epoch": 0.691358024691358, "grad_norm": 9.793761253356934, "learning_rate": 2.4314641744548284e-05, "loss": 0.6939, "step": 672 }, { "epoch": 0.6923868312757202, "grad_norm": 10.305285453796387, "learning_rate": 2.4350986500519212e-05, "loss": 1.202, "step": 673 }, { "epoch": 0.6934156378600823, "grad_norm": 1.1254714727401733, "learning_rate": 2.4387331256490132e-05, "loss": 0.0276, "step": 674 }, { "epoch": 0.6944444444444444, "grad_norm": 10.750346183776855, "learning_rate": 2.4423676012461056e-05, "loss": 1.121, "step": 675 }, { "epoch": 0.6954732510288066, "grad_norm": 9.77961254119873, "learning_rate": 2.4460020768431984e-05, "loss": 0.721, "step": 676 }, { "epoch": 0.6965020576131687, "grad_norm": 10.97049331665039, "learning_rate": 2.4496365524402904e-05, "loss": 1.0949, "step": 677 }, { "epoch": 0.6975308641975309, "grad_norm": 13.591765403747559, "learning_rate": 2.453271028037383e-05, "loss": 1.3044, "step": 678 }, { "epoch": 0.698559670781893, "grad_norm": 10.30559253692627, "learning_rate": 2.4569055036344756e-05, "loss": 0.6867, "step": 679 }, { "epoch": 0.6995884773662552, "grad_norm": 9.589376449584961, "learning_rate": 2.4605399792315676e-05, "loss": 0.6253, "step": 680 }, { "epoch": 0.6995884773662552, "eval_Qnli-dev_cosine_accuracy": 0.689453125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8195374011993408, "eval_Qnli-dev_cosine_ap": 0.7393646924153436, "eval_Qnli-dev_cosine_f1": 0.702054794520548, "eval_Qnli-dev_cosine_f1_threshold": 0.7384560704231262, "eval_Qnli-dev_cosine_precision": 0.5890804597701149, "eval_Qnli-dev_cosine_recall": 0.8686440677966102, "eval_Qnli-dev_dot_accuracy": 0.666015625, "eval_Qnli-dev_dot_accuracy_threshold": 395.6339111328125, "eval_Qnli-dev_dot_ap": 0.6696734110834349, "eval_Qnli-dev_dot_f1": 0.6894308943089431, "eval_Qnli-dev_dot_f1_threshold": 355.788330078125, "eval_Qnli-dev_dot_precision": 0.5593667546174143, "eval_Qnli-dev_dot_recall": 0.8983050847457628, "eval_Qnli-dev_euclidean_accuracy": 0.701171875, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.906505584716797, "eval_Qnli-dev_euclidean_ap": 0.7444803762790224, "eval_Qnli-dev_euclidean_f1": 0.7016949152542373, "eval_Qnli-dev_euclidean_f1_threshold": 16.160581588745117, "eval_Qnli-dev_euclidean_precision": 0.5847457627118644, "eval_Qnli-dev_euclidean_recall": 0.8771186440677966, "eval_Qnli-dev_manhattan_accuracy": 0.703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 292.0366516113281, "eval_Qnli-dev_manhattan_ap": 0.7455488536354595, "eval_Qnli-dev_manhattan_f1": 0.7037037037037036, "eval_Qnli-dev_manhattan_f1_threshold": 331.2184753417969, "eval_Qnli-dev_manhattan_precision": 0.5837988826815642, "eval_Qnli-dev_manhattan_recall": 0.885593220338983, "eval_Qnli-dev_max_accuracy": 0.703125, "eval_Qnli-dev_max_accuracy_threshold": 395.6339111328125, "eval_Qnli-dev_max_ap": 0.7455488536354595, "eval_Qnli-dev_max_f1": 0.7037037037037036, "eval_Qnli-dev_max_f1_threshold": 355.788330078125, "eval_Qnli-dev_max_precision": 0.5890804597701149, "eval_Qnli-dev_max_recall": 0.8983050847457628, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8807967901229858, "eval_allNLI-dev_cosine_ap": 0.5923755189013276, "eval_allNLI-dev_cosine_f1": 0.5885286783042394, "eval_allNLI-dev_cosine_f1_threshold": 0.8102627992630005, "eval_allNLI-dev_cosine_precision": 0.5175438596491229, "eval_allNLI-dev_cosine_recall": 0.6820809248554913, "eval_allNLI-dev_dot_accuracy": 0.689453125, "eval_allNLI-dev_dot_accuracy_threshold": 468.3880615234375, "eval_allNLI-dev_dot_ap": 0.5099487314518958, "eval_allNLI-dev_dot_f1": 0.5726872246696035, "eval_allNLI-dev_dot_f1_threshold": 388.5802001953125, "eval_allNLI-dev_dot_precision": 0.4626334519572954, "eval_allNLI-dev_dot_recall": 0.7514450867052023, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.58234691619873, "eval_allNLI-dev_euclidean_ap": 0.5960351196666029, "eval_allNLI-dev_euclidean_f1": 0.5934065934065934, "eval_allNLI-dev_euclidean_f1_threshold": 14.820318222045898, "eval_allNLI-dev_euclidean_precision": 0.4787234042553192, "eval_allNLI-dev_euclidean_recall": 0.7803468208092486, "eval_allNLI-dev_manhattan_accuracy": 0.734375, "eval_allNLI-dev_manhattan_accuracy_threshold": 219.4961700439453, "eval_allNLI-dev_manhattan_ap": 0.5953606180151316, "eval_allNLI-dev_manhattan_f1": 0.5929411764705882, "eval_allNLI-dev_manhattan_f1_threshold": 293.4901428222656, "eval_allNLI-dev_manhattan_precision": 0.5, "eval_allNLI-dev_manhattan_recall": 0.7283236994219653, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 468.3880615234375, "eval_allNLI-dev_max_ap": 0.5960351196666029, "eval_allNLI-dev_max_f1": 0.5934065934065934, "eval_allNLI-dev_max_f1_threshold": 388.5802001953125, "eval_allNLI-dev_max_precision": 0.5175438596491229, "eval_allNLI-dev_max_recall": 0.7803468208092486, "eval_sequential_score": 0.7455488536354595, "eval_sts-test_pearson_cosine": 0.8159881240293081, "eval_sts-test_pearson_dot": 0.7825955488055716, "eval_sts-test_pearson_euclidean": 0.8454112920840406, "eval_sts-test_pearson_manhattan": 0.8444832657606673, "eval_sts-test_pearson_max": 0.8454112920840406, "eval_sts-test_spearman_cosine": 0.8368029417325517, "eval_sts-test_spearman_dot": 0.7614820041821643, "eval_sts-test_spearman_euclidean": 0.8350227813056632, "eval_sts-test_spearman_manhattan": 0.8336858821228565, "eval_sts-test_spearman_max": 0.8368029417325517, "eval_vitaminc-pairs_loss": 2.8485310077667236, "eval_vitaminc-pairs_runtime": 3.1999, "eval_vitaminc-pairs_samples_per_second": 40.001, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 680 }, { "epoch": 0.6995884773662552, "eval_negation-triplets_loss": 1.1648355722427368, "eval_negation-triplets_runtime": 0.7448, "eval_negation-triplets_samples_per_second": 171.851, "eval_negation-triplets_steps_per_second": 1.343, "step": 680 }, { "epoch": 0.6995884773662552, "eval_scitail-pairs-pos_loss": 0.21600204706192017, "eval_scitail-pairs-pos_runtime": 0.8346, "eval_scitail-pairs-pos_samples_per_second": 153.362, "eval_scitail-pairs-pos_steps_per_second": 1.198, "step": 680 }, { "epoch": 0.6995884773662552, "eval_scitail-pairs-qa_loss": 0.00846769753843546, "eval_scitail-pairs-qa_runtime": 0.5928, "eval_scitail-pairs-qa_samples_per_second": 215.932, "eval_scitail-pairs-qa_steps_per_second": 1.687, "step": 680 }, { "epoch": 0.6995884773662552, "eval_xsum-pairs_loss": 0.6605619192123413, "eval_xsum-pairs_runtime": 3.025, "eval_xsum-pairs_samples_per_second": 42.314, "eval_xsum-pairs_steps_per_second": 0.331, "step": 680 }, { "epoch": 0.6995884773662552, "eval_sciq_pairs_loss": 0.12335162609815598, "eval_sciq_pairs_runtime": 3.4321, "eval_sciq_pairs_samples_per_second": 37.295, "eval_sciq_pairs_steps_per_second": 0.291, "step": 680 }, { "epoch": 0.6995884773662552, "eval_qasc_pairs_loss": 0.5843737721443176, "eval_qasc_pairs_runtime": 0.6047, "eval_qasc_pairs_samples_per_second": 211.678, "eval_qasc_pairs_steps_per_second": 1.654, "step": 680 }, { "epoch": 0.6995884773662552, "eval_openbookqa_pairs_loss": 1.2838267087936401, "eval_openbookqa_pairs_runtime": 0.5755, "eval_openbookqa_pairs_samples_per_second": 222.41, "eval_openbookqa_pairs_steps_per_second": 1.738, "step": 680 }, { "epoch": 0.6995884773662552, "eval_msmarco_pairs_loss": 1.3720968961715698, "eval_msmarco_pairs_runtime": 1.518, "eval_msmarco_pairs_samples_per_second": 84.323, "eval_msmarco_pairs_steps_per_second": 0.659, "step": 680 }, { "epoch": 0.6995884773662552, "eval_nq_pairs_loss": 1.5162333250045776, "eval_nq_pairs_runtime": 2.9004, "eval_nq_pairs_samples_per_second": 44.131, "eval_nq_pairs_steps_per_second": 0.345, "step": 680 }, { "epoch": 0.6995884773662552, "eval_trivia_pairs_loss": 1.12861168384552, "eval_trivia_pairs_runtime": 3.4369, "eval_trivia_pairs_samples_per_second": 37.243, "eval_trivia_pairs_steps_per_second": 0.291, "step": 680 }, { "epoch": 0.6995884773662552, "eval_gooaq_pairs_loss": 0.6519899368286133, "eval_gooaq_pairs_runtime": 0.9806, "eval_gooaq_pairs_samples_per_second": 130.53, "eval_gooaq_pairs_steps_per_second": 1.02, "step": 680 }, { "epoch": 0.6995884773662552, "eval_paws-pos_loss": 0.03412044420838356, "eval_paws-pos_runtime": 0.6854, "eval_paws-pos_samples_per_second": 186.764, "eval_paws-pos_steps_per_second": 1.459, "step": 680 }, { "epoch": 0.6995884773662552, "eval_global_dataset_loss": 0.6541453003883362, "eval_global_dataset_runtime": 13.4093, "eval_global_dataset_samples_per_second": 31.023, "eval_global_dataset_steps_per_second": 0.298, "step": 680 }, { "epoch": 0.7006172839506173, "grad_norm": 1.0433952808380127, "learning_rate": 2.46417445482866e-05, "loss": 0.063, "step": 681 }, { "epoch": 0.7016460905349794, "grad_norm": 16.610177993774414, "learning_rate": 2.4678089304257528e-05, "loss": 1.4254, "step": 682 }, { "epoch": 0.7026748971193416, "grad_norm": 19.412683486938477, "learning_rate": 2.4714434060228452e-05, "loss": 3.1631, "step": 683 }, { "epoch": 0.7037037037037037, "grad_norm": 13.261174201965332, "learning_rate": 2.4750778816199373e-05, "loss": 1.2375, "step": 684 }, { "epoch": 0.7047325102880658, "grad_norm": 9.231230735778809, "learning_rate": 2.47871235721703e-05, "loss": 0.5716, "step": 685 }, { "epoch": 0.7057613168724279, "grad_norm": 16.746212005615234, "learning_rate": 2.4823468328141224e-05, "loss": 2.939, "step": 686 }, { "epoch": 0.7067901234567902, "grad_norm": 16.490650177001953, "learning_rate": 2.4859813084112145e-05, "loss": 1.7054, "step": 687 }, { "epoch": 0.7078189300411523, "grad_norm": 8.707398414611816, "learning_rate": 2.4896157840083072e-05, "loss": 0.4784, "step": 688 }, { "epoch": 0.7088477366255144, "grad_norm": 9.790912628173828, "learning_rate": 2.4932502596053996e-05, "loss": 0.7157, "step": 689 }, { "epoch": 0.7098765432098766, "grad_norm": 8.632383346557617, "learning_rate": 2.496884735202492e-05, "loss": 0.6421, "step": 690 }, { "epoch": 0.7109053497942387, "grad_norm": 8.732678413391113, "learning_rate": 2.5005192107995844e-05, "loss": 0.6502, "step": 691 }, { "epoch": 0.7119341563786008, "grad_norm": 16.7855281829834, "learning_rate": 2.5041536863966768e-05, "loss": 3.4679, "step": 692 }, { "epoch": 0.7129629629629629, "grad_norm": 8.66584587097168, "learning_rate": 2.5077881619937692e-05, "loss": 0.5872, "step": 693 }, { "epoch": 0.7139917695473251, "grad_norm": 14.179039001464844, "learning_rate": 2.511422637590862e-05, "loss": 1.5769, "step": 694 }, { "epoch": 0.7150205761316872, "grad_norm": 8.276007652282715, "learning_rate": 2.515057113187954e-05, "loss": 0.5454, "step": 695 }, { "epoch": 0.7160493827160493, "grad_norm": 12.96976375579834, "learning_rate": 2.5186915887850464e-05, "loss": 1.4251, "step": 696 }, { "epoch": 0.7170781893004116, "grad_norm": 8.970144271850586, "learning_rate": 2.522326064382139e-05, "loss": 0.6667, "step": 697 }, { "epoch": 0.7181069958847737, "grad_norm": 1.4171106815338135, "learning_rate": 2.5259605399792312e-05, "loss": 0.0382, "step": 698 }, { "epoch": 0.7191358024691358, "grad_norm": 4.66494607925415, "learning_rate": 2.5295950155763236e-05, "loss": 0.1808, "step": 699 }, { "epoch": 0.720164609053498, "grad_norm": 9.647722244262695, "learning_rate": 2.5332294911734164e-05, "loss": 0.8819, "step": 700 }, { "epoch": 0.720164609053498, "eval_Qnli-dev_cosine_accuracy": 0.6875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8055030107498169, "eval_Qnli-dev_cosine_ap": 0.7346523188251083, "eval_Qnli-dev_cosine_f1": 0.7008849557522123, "eval_Qnli-dev_cosine_f1_threshold": 0.7691887021064758, "eval_Qnli-dev_cosine_precision": 0.601823708206687, "eval_Qnli-dev_cosine_recall": 0.8389830508474576, "eval_Qnli-dev_dot_accuracy": 0.66015625, "eval_Qnli-dev_dot_accuracy_threshold": 403.0814208984375, "eval_Qnli-dev_dot_ap": 0.6423809971933063, "eval_Qnli-dev_dot_f1": 0.6771929824561403, "eval_Qnli-dev_dot_f1_threshold": 380.7566833496094, "eval_Qnli-dev_dot_precision": 0.5778443113772455, "eval_Qnli-dev_dot_recall": 0.8177966101694916, "eval_Qnli-dev_euclidean_accuracy": 0.70703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.891797065734863, "eval_Qnli-dev_euclidean_ap": 0.7419509834416282, "eval_Qnli-dev_euclidean_f1": 0.7024221453287196, "eval_Qnli-dev_euclidean_f1_threshold": 15.521956443786621, "eval_Qnli-dev_euclidean_precision": 0.5935672514619883, "eval_Qnli-dev_euclidean_recall": 0.8601694915254238, "eval_Qnli-dev_manhattan_accuracy": 0.705078125, "eval_Qnli-dev_manhattan_accuracy_threshold": 284.7353515625, "eval_Qnli-dev_manhattan_ap": 0.7404308497091309, "eval_Qnli-dev_manhattan_f1": 0.6989619377162629, "eval_Qnli-dev_manhattan_f1_threshold": 318.97943115234375, "eval_Qnli-dev_manhattan_precision": 0.5906432748538012, "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, "eval_Qnli-dev_max_accuracy": 0.70703125, "eval_Qnli-dev_max_accuracy_threshold": 403.0814208984375, "eval_Qnli-dev_max_ap": 0.7419509834416282, "eval_Qnli-dev_max_f1": 0.7024221453287196, "eval_Qnli-dev_max_f1_threshold": 380.7566833496094, "eval_Qnli-dev_max_precision": 0.601823708206687, "eval_Qnli-dev_max_recall": 0.8601694915254238, "eval_allNLI-dev_cosine_accuracy": 0.73828125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8836915493011475, "eval_allNLI-dev_cosine_ap": 0.5879168509874412, "eval_allNLI-dev_cosine_f1": 0.5914893617021276, "eval_allNLI-dev_cosine_f1_threshold": 0.7854909896850586, "eval_allNLI-dev_cosine_precision": 0.468013468013468, "eval_allNLI-dev_cosine_recall": 0.8034682080924855, "eval_allNLI-dev_dot_accuracy": 0.689453125, "eval_allNLI-dev_dot_accuracy_threshold": 468.509765625, "eval_allNLI-dev_dot_ap": 0.5290950515284383, "eval_allNLI-dev_dot_f1": 0.576923076923077, "eval_allNLI-dev_dot_f1_threshold": 394.4248352050781, "eval_allNLI-dev_dot_precision": 0.4576271186440678, "eval_allNLI-dev_dot_recall": 0.7803468208092486, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.812106132507324, "eval_allNLI-dev_euclidean_ap": 0.5902832492425357, "eval_allNLI-dev_euclidean_f1": 0.5927505330490405, "eval_allNLI-dev_euclidean_f1_threshold": 14.791348457336426, "eval_allNLI-dev_euclidean_precision": 0.46959459459459457, "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 228.82778930664062, "eval_allNLI-dev_manhattan_ap": 0.5867522032562185, "eval_allNLI-dev_manhattan_f1": 0.592255125284738, "eval_allNLI-dev_manhattan_f1_threshold": 288.2390441894531, "eval_allNLI-dev_manhattan_precision": 0.48872180451127817, "eval_allNLI-dev_manhattan_recall": 0.7514450867052023, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 468.509765625, "eval_allNLI-dev_max_ap": 0.5902832492425357, "eval_allNLI-dev_max_f1": 0.5927505330490405, "eval_allNLI-dev_max_f1_threshold": 394.4248352050781, "eval_allNLI-dev_max_precision": 0.48872180451127817, "eval_allNLI-dev_max_recall": 0.8034682080924855, "eval_sequential_score": 0.7419509834416282, "eval_sts-test_pearson_cosine": 0.814122807787653, "eval_sts-test_pearson_dot": 0.7849759177486642, "eval_sts-test_pearson_euclidean": 0.8421714998904108, "eval_sts-test_pearson_manhattan": 0.8394866389200708, "eval_sts-test_pearson_max": 0.8421714998904108, "eval_sts-test_spearman_cosine": 0.837628602505223, "eval_sts-test_spearman_dot": 0.7737345862922999, "eval_sts-test_spearman_euclidean": 0.8339600731014016, "eval_sts-test_spearman_manhattan": 0.831537105555887, "eval_sts-test_spearman_max": 0.837628602505223, "eval_vitaminc-pairs_loss": 2.8523178100585938, "eval_vitaminc-pairs_runtime": 3.1968, "eval_vitaminc-pairs_samples_per_second": 40.039, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 700 }, { "epoch": 0.720164609053498, "eval_negation-triplets_loss": 1.1572741270065308, "eval_negation-triplets_runtime": 0.7545, "eval_negation-triplets_samples_per_second": 169.646, "eval_negation-triplets_steps_per_second": 1.325, "step": 700 }, { "epoch": 0.720164609053498, "eval_scitail-pairs-pos_loss": 0.21792583167552948, "eval_scitail-pairs-pos_runtime": 0.8512, "eval_scitail-pairs-pos_samples_per_second": 150.374, "eval_scitail-pairs-pos_steps_per_second": 1.175, "step": 700 }, { "epoch": 0.720164609053498, "eval_scitail-pairs-qa_loss": 0.014304843731224537, "eval_scitail-pairs-qa_runtime": 0.5821, "eval_scitail-pairs-qa_samples_per_second": 219.895, "eval_scitail-pairs-qa_steps_per_second": 1.718, "step": 700 }, { "epoch": 0.720164609053498, "eval_xsum-pairs_loss": 0.688365638256073, "eval_xsum-pairs_runtime": 3.0302, "eval_xsum-pairs_samples_per_second": 42.242, "eval_xsum-pairs_steps_per_second": 0.33, "step": 700 }, { "epoch": 0.720164609053498, "eval_sciq_pairs_loss": 0.12412170320749283, "eval_sciq_pairs_runtime": 3.4839, "eval_sciq_pairs_samples_per_second": 36.741, "eval_sciq_pairs_steps_per_second": 0.287, "step": 700 }, { "epoch": 0.720164609053498, "eval_qasc_pairs_loss": 0.5808658003807068, "eval_qasc_pairs_runtime": 0.6151, "eval_qasc_pairs_samples_per_second": 208.103, "eval_qasc_pairs_steps_per_second": 1.626, "step": 700 }, { "epoch": 0.720164609053498, "eval_openbookqa_pairs_loss": 1.136744499206543, "eval_openbookqa_pairs_runtime": 0.5932, "eval_openbookqa_pairs_samples_per_second": 215.777, "eval_openbookqa_pairs_steps_per_second": 1.686, "step": 700 }, { "epoch": 0.720164609053498, "eval_msmarco_pairs_loss": 1.2205469608306885, "eval_msmarco_pairs_runtime": 1.5248, "eval_msmarco_pairs_samples_per_second": 83.947, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 700 }, { "epoch": 0.720164609053498, "eval_nq_pairs_loss": 1.6330437660217285, "eval_nq_pairs_runtime": 2.9004, "eval_nq_pairs_samples_per_second": 44.131, "eval_nq_pairs_steps_per_second": 0.345, "step": 700 }, { "epoch": 0.720164609053498, "eval_trivia_pairs_loss": 1.195753812789917, "eval_trivia_pairs_runtime": 3.4466, "eval_trivia_pairs_samples_per_second": 37.138, "eval_trivia_pairs_steps_per_second": 0.29, "step": 700 }, { "epoch": 0.720164609053498, "eval_gooaq_pairs_loss": 0.7037076950073242, "eval_gooaq_pairs_runtime": 0.9565, "eval_gooaq_pairs_samples_per_second": 133.823, "eval_gooaq_pairs_steps_per_second": 1.045, "step": 700 }, { "epoch": 0.720164609053498, "eval_paws-pos_loss": 0.03305948153138161, "eval_paws-pos_runtime": 0.6963, "eval_paws-pos_samples_per_second": 183.824, "eval_paws-pos_steps_per_second": 1.436, "step": 700 }, { "epoch": 0.720164609053498, "eval_global_dataset_loss": 0.6367413401603699, "eval_global_dataset_runtime": 13.4188, "eval_global_dataset_samples_per_second": 31.001, "eval_global_dataset_steps_per_second": 0.298, "step": 700 }, { "epoch": 0.7211934156378601, "grad_norm": 9.350117683410645, "learning_rate": 2.5368639667705088e-05, "loss": 0.6013, "step": 701 }, { "epoch": 0.7222222222222222, "grad_norm": 11.008674621582031, "learning_rate": 2.540498442367601e-05, "loss": 1.181, "step": 702 }, { "epoch": 0.7232510288065843, "grad_norm": 12.882591247558594, "learning_rate": 2.5441329179646936e-05, "loss": 1.1574, "step": 703 }, { "epoch": 0.7242798353909465, "grad_norm": 10.404853820800781, "learning_rate": 2.547767393561786e-05, "loss": 0.6094, "step": 704 }, { "epoch": 0.7253086419753086, "grad_norm": 10.375190734863281, "learning_rate": 2.551401869158878e-05, "loss": 0.6303, "step": 705 }, { "epoch": 0.7263374485596708, "grad_norm": 11.026881217956543, "learning_rate": 2.5550363447559708e-05, "loss": 0.626, "step": 706 }, { "epoch": 0.727366255144033, "grad_norm": 9.781618118286133, "learning_rate": 2.5586708203530632e-05, "loss": 0.5284, "step": 707 }, { "epoch": 0.7283950617283951, "grad_norm": 2.4945054054260254, "learning_rate": 2.5623052959501556e-05, "loss": 0.0619, "step": 708 }, { "epoch": 0.7294238683127572, "grad_norm": 14.84467887878418, "learning_rate": 2.565939771547248e-05, "loss": 1.3394, "step": 709 }, { "epoch": 0.7304526748971193, "grad_norm": 3.6432929039001465, "learning_rate": 2.5695742471443404e-05, "loss": 0.0922, "step": 710 }, { "epoch": 0.7314814814814815, "grad_norm": 3.2191617488861084, "learning_rate": 2.5732087227414328e-05, "loss": 0.068, "step": 711 }, { "epoch": 0.7325102880658436, "grad_norm": 10.091761589050293, "learning_rate": 2.5768431983385255e-05, "loss": 0.5414, "step": 712 }, { "epoch": 0.7335390946502057, "grad_norm": 9.839192390441895, "learning_rate": 2.5804776739356176e-05, "loss": 0.5332, "step": 713 }, { "epoch": 0.7345679012345679, "grad_norm": 9.548250198364258, "learning_rate": 2.58411214953271e-05, "loss": 0.5112, "step": 714 }, { "epoch": 0.73559670781893, "grad_norm": 23.554458618164062, "learning_rate": 2.5877466251298027e-05, "loss": 3.5468, "step": 715 }, { "epoch": 0.7366255144032922, "grad_norm": 1.0547456741333008, "learning_rate": 2.5913811007268948e-05, "loss": 0.0244, "step": 716 }, { "epoch": 0.7376543209876543, "grad_norm": 10.332133293151855, "learning_rate": 2.5950155763239872e-05, "loss": 0.528, "step": 717 }, { "epoch": 0.7386831275720165, "grad_norm": 16.862545013427734, "learning_rate": 2.59865005192108e-05, "loss": 1.7134, "step": 718 }, { "epoch": 0.7397119341563786, "grad_norm": 9.824862480163574, "learning_rate": 2.6022845275181723e-05, "loss": 0.6181, "step": 719 }, { "epoch": 0.7407407407407407, "grad_norm": 17.680917739868164, "learning_rate": 2.6059190031152644e-05, "loss": 1.7897, "step": 720 }, { "epoch": 0.7407407407407407, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8089187741279602, "eval_Qnli-dev_cosine_ap": 0.7445288564241996, "eval_Qnli-dev_cosine_f1": 0.7088607594936709, "eval_Qnli-dev_cosine_f1_threshold": 0.7645823955535889, "eval_Qnli-dev_cosine_precision": 0.6182965299684543, "eval_Qnli-dev_cosine_recall": 0.8305084745762712, "eval_Qnli-dev_dot_accuracy": 0.66796875, "eval_Qnli-dev_dot_accuracy_threshold": 374.24700927734375, "eval_Qnli-dev_dot_ap": 0.6643934387620949, "eval_Qnli-dev_dot_f1": 0.6875, "eval_Qnli-dev_dot_f1_threshold": 374.24700927734375, "eval_Qnli-dev_dot_precision": 0.6071428571428571, "eval_Qnli-dev_dot_recall": 0.7923728813559322, "eval_Qnli-dev_euclidean_accuracy": 0.716796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.01733684539795, "eval_Qnli-dev_euclidean_ap": 0.7511376116503252, "eval_Qnli-dev_euclidean_f1": 0.7107750472589792, "eval_Qnli-dev_euclidean_f1_threshold": 14.925470352172852, "eval_Qnli-dev_euclidean_precision": 0.6416382252559727, "eval_Qnli-dev_euclidean_recall": 0.7966101694915254, "eval_Qnli-dev_manhattan_accuracy": 0.708984375, "eval_Qnli-dev_manhattan_accuracy_threshold": 279.2970886230469, "eval_Qnli-dev_manhattan_ap": 0.7531882826368892, "eval_Qnli-dev_manhattan_f1": 0.7052810902896082, "eval_Qnli-dev_manhattan_f1_threshold": 327.6318359375, "eval_Qnli-dev_manhattan_precision": 0.5897435897435898, "eval_Qnli-dev_manhattan_recall": 0.8771186440677966, "eval_Qnli-dev_max_accuracy": 0.716796875, "eval_Qnli-dev_max_accuracy_threshold": 374.24700927734375, "eval_Qnli-dev_max_ap": 0.7531882826368892, "eval_Qnli-dev_max_f1": 0.7107750472589792, "eval_Qnli-dev_max_f1_threshold": 374.24700927734375, "eval_Qnli-dev_max_precision": 0.6416382252559727, "eval_Qnli-dev_max_recall": 0.8771186440677966, "eval_allNLI-dev_cosine_accuracy": 0.740234375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8845050930976868, "eval_allNLI-dev_cosine_ap": 0.6029211833635529, "eval_allNLI-dev_cosine_f1": 0.6096033402922756, "eval_allNLI-dev_cosine_f1_threshold": 0.7970777750015259, "eval_allNLI-dev_cosine_precision": 0.477124183006536, "eval_allNLI-dev_cosine_recall": 0.8439306358381503, "eval_allNLI-dev_dot_accuracy": 0.693359375, "eval_allNLI-dev_dot_accuracy_threshold": 465.4620361328125, "eval_allNLI-dev_dot_ap": 0.512993085572406, "eval_allNLI-dev_dot_f1": 0.5753968253968255, "eval_allNLI-dev_dot_f1_threshold": 391.34271240234375, "eval_allNLI-dev_dot_precision": 0.4380664652567976, "eval_allNLI-dev_dot_recall": 0.838150289017341, "eval_allNLI-dev_euclidean_accuracy": 0.7421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.847936630249023, "eval_allNLI-dev_euclidean_ap": 0.6067823005817112, "eval_allNLI-dev_euclidean_f1": 0.60813704496788, "eval_allNLI-dev_euclidean_f1_threshold": 14.172441482543945, "eval_allNLI-dev_euclidean_precision": 0.48299319727891155, "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, "eval_allNLI-dev_manhattan_accuracy": 0.73828125, "eval_allNLI-dev_manhattan_accuracy_threshold": 217.6175537109375, "eval_allNLI-dev_manhattan_ap": 0.5978323891873064, "eval_allNLI-dev_manhattan_f1": 0.5991902834008097, "eval_allNLI-dev_manhattan_f1_threshold": 298.9595031738281, "eval_allNLI-dev_manhattan_precision": 0.46105919003115264, "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, "eval_allNLI-dev_max_accuracy": 0.7421875, "eval_allNLI-dev_max_accuracy_threshold": 465.4620361328125, "eval_allNLI-dev_max_ap": 0.6067823005817112, "eval_allNLI-dev_max_f1": 0.6096033402922756, "eval_allNLI-dev_max_f1_threshold": 391.34271240234375, "eval_allNLI-dev_max_precision": 0.48299319727891155, "eval_allNLI-dev_max_recall": 0.8554913294797688, "eval_sequential_score": 0.7531882826368892, "eval_sts-test_pearson_cosine": 0.7943928851510986, "eval_sts-test_pearson_dot": 0.7406480169219867, "eval_sts-test_pearson_euclidean": 0.8200699159277771, "eval_sts-test_pearson_manhattan": 0.8153052752015822, "eval_sts-test_pearson_max": 0.8200699159277771, "eval_sts-test_spearman_cosine": 0.814777534408501, "eval_sts-test_spearman_dot": 0.7252969844950452, "eval_sts-test_spearman_euclidean": 0.8124804521612804, "eval_sts-test_spearman_manhattan": 0.8084946543855285, "eval_sts-test_spearman_max": 0.814777534408501, "eval_vitaminc-pairs_loss": 2.5636518001556396, "eval_vitaminc-pairs_runtime": 3.2076, "eval_vitaminc-pairs_samples_per_second": 39.905, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 720 }, { "epoch": 0.7407407407407407, "eval_negation-triplets_loss": 1.1352839469909668, "eval_negation-triplets_runtime": 0.749, "eval_negation-triplets_samples_per_second": 170.903, "eval_negation-triplets_steps_per_second": 1.335, "step": 720 }, { "epoch": 0.7407407407407407, "eval_scitail-pairs-pos_loss": 0.2415001094341278, "eval_scitail-pairs-pos_runtime": 0.8417, "eval_scitail-pairs-pos_samples_per_second": 152.073, "eval_scitail-pairs-pos_steps_per_second": 1.188, "step": 720 }, { "epoch": 0.7407407407407407, "eval_scitail-pairs-qa_loss": 0.0037513382267206907, "eval_scitail-pairs-qa_runtime": 0.5837, "eval_scitail-pairs-qa_samples_per_second": 219.305, "eval_scitail-pairs-qa_steps_per_second": 1.713, "step": 720 }, { "epoch": 0.7407407407407407, "eval_xsum-pairs_loss": 0.7015084624290466, "eval_xsum-pairs_runtime": 3.0329, "eval_xsum-pairs_samples_per_second": 42.204, "eval_xsum-pairs_steps_per_second": 0.33, "step": 720 }, { "epoch": 0.7407407407407407, "eval_sciq_pairs_loss": 0.13029436767101288, "eval_sciq_pairs_runtime": 3.454, "eval_sciq_pairs_samples_per_second": 37.059, "eval_sciq_pairs_steps_per_second": 0.29, "step": 720 }, { "epoch": 0.7407407407407407, "eval_qasc_pairs_loss": 0.5081034302711487, "eval_qasc_pairs_runtime": 0.6041, "eval_qasc_pairs_samples_per_second": 211.882, "eval_qasc_pairs_steps_per_second": 1.655, "step": 720 }, { "epoch": 0.7407407407407407, "eval_openbookqa_pairs_loss": 1.2555147409439087, "eval_openbookqa_pairs_runtime": 0.5953, "eval_openbookqa_pairs_samples_per_second": 215.03, "eval_openbookqa_pairs_steps_per_second": 1.68, "step": 720 }, { "epoch": 0.7407407407407407, "eval_msmarco_pairs_loss": 1.305182695388794, "eval_msmarco_pairs_runtime": 1.5199, "eval_msmarco_pairs_samples_per_second": 84.214, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 720 }, { "epoch": 0.7407407407407407, "eval_nq_pairs_loss": 1.5818196535110474, "eval_nq_pairs_runtime": 2.8983, "eval_nq_pairs_samples_per_second": 44.163, "eval_nq_pairs_steps_per_second": 0.345, "step": 720 }, { "epoch": 0.7407407407407407, "eval_trivia_pairs_loss": 1.2283203601837158, "eval_trivia_pairs_runtime": 3.4398, "eval_trivia_pairs_samples_per_second": 37.212, "eval_trivia_pairs_steps_per_second": 0.291, "step": 720 }, { "epoch": 0.7407407407407407, "eval_gooaq_pairs_loss": 0.7275317907333374, "eval_gooaq_pairs_runtime": 0.948, "eval_gooaq_pairs_samples_per_second": 135.023, "eval_gooaq_pairs_steps_per_second": 1.055, "step": 720 }, { "epoch": 0.7407407407407407, "eval_paws-pos_loss": 0.03339088708162308, "eval_paws-pos_runtime": 0.6932, "eval_paws-pos_samples_per_second": 184.64, "eval_paws-pos_steps_per_second": 1.442, "step": 720 }, { "epoch": 0.7407407407407407, "eval_global_dataset_loss": 0.5928239226341248, "eval_global_dataset_runtime": 13.3878, "eval_global_dataset_samples_per_second": 31.073, "eval_global_dataset_steps_per_second": 0.299, "step": 720 }, { "epoch": 0.7417695473251029, "grad_norm": 10.516244888305664, "learning_rate": 2.609553478712357e-05, "loss": 0.7104, "step": 721 }, { "epoch": 0.742798353909465, "grad_norm": 0.8260862827301025, "learning_rate": 2.6131879543094495e-05, "loss": 0.0219, "step": 722 }, { "epoch": 0.7438271604938271, "grad_norm": 14.152036666870117, "learning_rate": 2.6168224299065416e-05, "loss": 1.3516, "step": 723 }, { "epoch": 0.7448559670781894, "grad_norm": 8.1348237991333, "learning_rate": 2.6204569055036344e-05, "loss": 0.5472, "step": 724 }, { "epoch": 0.7458847736625515, "grad_norm": 8.534761428833008, "learning_rate": 2.6240913811007268e-05, "loss": 0.5357, "step": 725 }, { "epoch": 0.7469135802469136, "grad_norm": 11.620552062988281, "learning_rate": 2.627725856697819e-05, "loss": 1.0346, "step": 726 }, { "epoch": 0.7479423868312757, "grad_norm": 10.823874473571777, "learning_rate": 2.6313603322949116e-05, "loss": 0.8461, "step": 727 }, { "epoch": 0.7489711934156379, "grad_norm": 14.860071182250977, "learning_rate": 2.634994807892004e-05, "loss": 1.7762, "step": 728 }, { "epoch": 0.75, "grad_norm": 9.170268058776855, "learning_rate": 2.6386292834890964e-05, "loss": 0.6121, "step": 729 }, { "epoch": 0.7510288065843621, "grad_norm": 3.6571240425109863, "learning_rate": 2.6422637590861888e-05, "loss": 0.1051, "step": 730 }, { "epoch": 0.7520576131687243, "grad_norm": 7.615705966949463, "learning_rate": 2.645898234683281e-05, "loss": 0.5804, "step": 731 }, { "epoch": 0.7530864197530864, "grad_norm": 11.42629337310791, "learning_rate": 2.6495327102803736e-05, "loss": 1.0625, "step": 732 }, { "epoch": 0.7541152263374485, "grad_norm": 1.1732608079910278, "learning_rate": 2.6531671858774663e-05, "loss": 0.0471, "step": 733 }, { "epoch": 0.7551440329218106, "grad_norm": 10.805855751037598, "learning_rate": 2.6568016614745584e-05, "loss": 0.767, "step": 734 }, { "epoch": 0.7561728395061729, "grad_norm": 7.8192009925842285, "learning_rate": 2.6604361370716508e-05, "loss": 0.4262, "step": 735 }, { "epoch": 0.757201646090535, "grad_norm": 14.414314270019531, "learning_rate": 2.6640706126687435e-05, "loss": 1.4077, "step": 736 }, { "epoch": 0.7582304526748971, "grad_norm": 18.263036727905273, "learning_rate": 2.6677050882658356e-05, "loss": 1.5963, "step": 737 }, { "epoch": 0.7592592592592593, "grad_norm": 11.086414337158203, "learning_rate": 2.671339563862928e-05, "loss": 1.2141, "step": 738 }, { "epoch": 0.7602880658436214, "grad_norm": 13.789649963378906, "learning_rate": 2.6749740394600207e-05, "loss": 1.454, "step": 739 }, { "epoch": 0.7613168724279835, "grad_norm": 9.959060668945312, "learning_rate": 2.678608515057113e-05, "loss": 0.696, "step": 740 }, { "epoch": 0.7613168724279835, "eval_Qnli-dev_cosine_accuracy": 0.701171875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7968876361846924, "eval_Qnli-dev_cosine_ap": 0.727602546794372, "eval_Qnli-dev_cosine_f1": 0.6979166666666667, "eval_Qnli-dev_cosine_f1_threshold": 0.7464833855628967, "eval_Qnli-dev_cosine_precision": 0.5911764705882353, "eval_Qnli-dev_cosine_recall": 0.8516949152542372, "eval_Qnli-dev_dot_accuracy": 0.666015625, "eval_Qnli-dev_dot_accuracy_threshold": 399.69769287109375, "eval_Qnli-dev_dot_ap": 0.645670123752458, "eval_Qnli-dev_dot_f1": 0.6833631484794276, "eval_Qnli-dev_dot_f1_threshold": 367.0381774902344, "eval_Qnli-dev_dot_precision": 0.5913312693498453, "eval_Qnli-dev_dot_recall": 0.809322033898305, "eval_Qnli-dev_euclidean_accuracy": 0.69921875, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.117036819458008, "eval_Qnli-dev_euclidean_ap": 0.7337639674568743, "eval_Qnli-dev_euclidean_f1": 0.7001675041876045, "eval_Qnli-dev_euclidean_f1_threshold": 16.333152770996094, "eval_Qnli-dev_euclidean_precision": 0.5789473684210527, "eval_Qnli-dev_euclidean_recall": 0.885593220338983, "eval_Qnli-dev_manhattan_accuracy": 0.703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 294.5778503417969, "eval_Qnli-dev_manhattan_ap": 0.7366181110833769, "eval_Qnli-dev_manhattan_f1": 0.6973180076628352, "eval_Qnli-dev_manhattan_f1_threshold": 302.7152099609375, "eval_Qnli-dev_manhattan_precision": 0.6363636363636364, "eval_Qnli-dev_manhattan_recall": 0.7711864406779662, "eval_Qnli-dev_max_accuracy": 0.703125, "eval_Qnli-dev_max_accuracy_threshold": 399.69769287109375, "eval_Qnli-dev_max_ap": 0.7366181110833769, "eval_Qnli-dev_max_f1": 0.7001675041876045, "eval_Qnli-dev_max_f1_threshold": 367.0381774902344, "eval_Qnli-dev_max_precision": 0.6363636363636364, "eval_Qnli-dev_max_recall": 0.885593220338983, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8710236549377441, "eval_allNLI-dev_cosine_ap": 0.5945373648797287, "eval_allNLI-dev_cosine_f1": 0.5975609756097561, "eval_allNLI-dev_cosine_f1_threshold": 0.7772917747497559, "eval_allNLI-dev_cosine_precision": 0.4608150470219436, "eval_allNLI-dev_cosine_recall": 0.8497109826589595, "eval_allNLI-dev_dot_accuracy": 0.6953125, "eval_allNLI-dev_dot_accuracy_threshold": 456.685546875, "eval_allNLI-dev_dot_ap": 0.516668964052817, "eval_allNLI-dev_dot_f1": 0.5790554414784395, "eval_allNLI-dev_dot_f1_threshold": 387.36737060546875, "eval_allNLI-dev_dot_precision": 0.44904458598726116, "eval_allNLI-dev_dot_recall": 0.815028901734104, "eval_allNLI-dev_euclidean_accuracy": 0.73828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.068269729614258, "eval_allNLI-dev_euclidean_ap": 0.5966992226114267, "eval_allNLI-dev_euclidean_f1": 0.5970772442588727, "eval_allNLI-dev_euclidean_f1_threshold": 14.66142463684082, "eval_allNLI-dev_euclidean_precision": 0.4673202614379085, "eval_allNLI-dev_euclidean_recall": 0.8265895953757225, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 237.41098022460938, "eval_allNLI-dev_manhattan_ap": 0.5880205832464749, "eval_allNLI-dev_manhattan_f1": 0.5914893617021276, "eval_allNLI-dev_manhattan_f1_threshold": 297.3165283203125, "eval_allNLI-dev_manhattan_precision": 0.468013468013468, "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 456.685546875, "eval_allNLI-dev_max_ap": 0.5966992226114267, "eval_allNLI-dev_max_f1": 0.5975609756097561, "eval_allNLI-dev_max_f1_threshold": 387.36737060546875, "eval_allNLI-dev_max_precision": 0.468013468013468, "eval_allNLI-dev_max_recall": 0.8497109826589595, "eval_sequential_score": 0.7366181110833769, "eval_sts-test_pearson_cosine": 0.8088324955753331, "eval_sts-test_pearson_dot": 0.7775578039423507, "eval_sts-test_pearson_euclidean": 0.8322159624410153, "eval_sts-test_pearson_manhattan": 0.8289905701496498, "eval_sts-test_pearson_max": 0.8322159624410153, "eval_sts-test_spearman_cosine": 0.829002036100587, "eval_sts-test_spearman_dot": 0.7651558142348298, "eval_sts-test_spearman_euclidean": 0.8238636515163652, "eval_sts-test_spearman_manhattan": 0.8193701326087933, "eval_sts-test_spearman_max": 0.829002036100587, "eval_vitaminc-pairs_loss": 2.652156114578247, "eval_vitaminc-pairs_runtime": 3.195, "eval_vitaminc-pairs_samples_per_second": 40.062, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 740 }, { "epoch": 0.7613168724279835, "eval_negation-triplets_loss": 1.1374459266662598, "eval_negation-triplets_runtime": 0.7568, "eval_negation-triplets_samples_per_second": 169.13, "eval_negation-triplets_steps_per_second": 1.321, "step": 740 }, { "epoch": 0.7613168724279835, "eval_scitail-pairs-pos_loss": 0.18683280050754547, "eval_scitail-pairs-pos_runtime": 0.8273, "eval_scitail-pairs-pos_samples_per_second": 154.717, "eval_scitail-pairs-pos_steps_per_second": 1.209, "step": 740 }, { "epoch": 0.7613168724279835, "eval_scitail-pairs-qa_loss": 0.004593902267515659, "eval_scitail-pairs-qa_runtime": 0.5777, "eval_scitail-pairs-qa_samples_per_second": 221.553, "eval_scitail-pairs-qa_steps_per_second": 1.731, "step": 740 }, { "epoch": 0.7613168724279835, "eval_xsum-pairs_loss": 0.7033074498176575, "eval_xsum-pairs_runtime": 3.0213, "eval_xsum-pairs_samples_per_second": 42.366, "eval_xsum-pairs_steps_per_second": 0.331, "step": 740 }, { "epoch": 0.7613168724279835, "eval_sciq_pairs_loss": 0.12240559607744217, "eval_sciq_pairs_runtime": 3.4526, "eval_sciq_pairs_samples_per_second": 37.074, "eval_sciq_pairs_steps_per_second": 0.29, "step": 740 }, { "epoch": 0.7613168724279835, "eval_qasc_pairs_loss": 0.5442161560058594, "eval_qasc_pairs_runtime": 0.609, "eval_qasc_pairs_samples_per_second": 210.184, "eval_qasc_pairs_steps_per_second": 1.642, "step": 740 }, { "epoch": 0.7613168724279835, "eval_openbookqa_pairs_loss": 1.1632599830627441, "eval_openbookqa_pairs_runtime": 0.5933, "eval_openbookqa_pairs_samples_per_second": 215.749, "eval_openbookqa_pairs_steps_per_second": 1.686, "step": 740 }, { "epoch": 0.7613168724279835, "eval_msmarco_pairs_loss": 1.1908891201019287, "eval_msmarco_pairs_runtime": 1.5422, "eval_msmarco_pairs_samples_per_second": 83.0, "eval_msmarco_pairs_steps_per_second": 0.648, "step": 740 }, { "epoch": 0.7613168724279835, "eval_nq_pairs_loss": 1.4470250606536865, "eval_nq_pairs_runtime": 2.8983, "eval_nq_pairs_samples_per_second": 44.165, "eval_nq_pairs_steps_per_second": 0.345, "step": 740 }, { "epoch": 0.7613168724279835, "eval_trivia_pairs_loss": 1.1257771253585815, "eval_trivia_pairs_runtime": 3.4458, "eval_trivia_pairs_samples_per_second": 37.147, "eval_trivia_pairs_steps_per_second": 0.29, "step": 740 }, { "epoch": 0.7613168724279835, "eval_gooaq_pairs_loss": 0.6294673085212708, "eval_gooaq_pairs_runtime": 0.9529, "eval_gooaq_pairs_samples_per_second": 134.331, "eval_gooaq_pairs_steps_per_second": 1.049, "step": 740 }, { "epoch": 0.7613168724279835, "eval_paws-pos_loss": 0.029657872393727303, "eval_paws-pos_runtime": 0.6916, "eval_paws-pos_samples_per_second": 185.091, "eval_paws-pos_steps_per_second": 1.446, "step": 740 }, { "epoch": 0.7613168724279835, "eval_global_dataset_loss": 0.574967622756958, "eval_global_dataset_runtime": 13.3853, "eval_global_dataset_samples_per_second": 31.079, "eval_global_dataset_steps_per_second": 0.299, "step": 740 }, { "epoch": 0.7623456790123457, "grad_norm": 8.358269691467285, "learning_rate": 2.6822429906542052e-05, "loss": 0.5052, "step": 741 }, { "epoch": 0.7633744855967078, "grad_norm": 3.5208804607391357, "learning_rate": 2.685877466251298e-05, "loss": 0.101, "step": 742 }, { "epoch": 0.76440329218107, "grad_norm": 14.886555671691895, "learning_rate": 2.6895119418483903e-05, "loss": 1.6467, "step": 743 }, { "epoch": 0.7654320987654321, "grad_norm": 10.37888240814209, "learning_rate": 2.6931464174454824e-05, "loss": 0.7924, "step": 744 }, { "epoch": 0.7664609053497943, "grad_norm": 14.076517105102539, "learning_rate": 2.696780893042575e-05, "loss": 1.6842, "step": 745 }, { "epoch": 0.7674897119341564, "grad_norm": 16.620922088623047, "learning_rate": 2.7004153686396675e-05, "loss": 2.809, "step": 746 }, { "epoch": 0.7685185185185185, "grad_norm": 22.974336624145508, "learning_rate": 2.70404984423676e-05, "loss": 1.9317, "step": 747 }, { "epoch": 0.7695473251028807, "grad_norm": 7.3669657707214355, "learning_rate": 2.7076843198338523e-05, "loss": 0.4177, "step": 748 }, { "epoch": 0.7705761316872428, "grad_norm": 10.947649002075195, "learning_rate": 2.7113187954309447e-05, "loss": 0.9269, "step": 749 }, { "epoch": 0.7716049382716049, "grad_norm": 9.538216590881348, "learning_rate": 2.714953271028037e-05, "loss": 0.9832, "step": 750 }, { "epoch": 0.772633744855967, "grad_norm": 7.307182312011719, "learning_rate": 2.71858774662513e-05, "loss": 0.4875, "step": 751 }, { "epoch": 0.7736625514403292, "grad_norm": 3.3512260913848877, "learning_rate": 2.722222222222222e-05, "loss": 0.1066, "step": 752 }, { "epoch": 0.7746913580246914, "grad_norm": 8.798376083374023, "learning_rate": 2.7258566978193143e-05, "loss": 0.4801, "step": 753 }, { "epoch": 0.7757201646090535, "grad_norm": 9.195924758911133, "learning_rate": 2.729491173416407e-05, "loss": 0.4494, "step": 754 }, { "epoch": 0.7767489711934157, "grad_norm": 6.361667156219482, "learning_rate": 2.733125649013499e-05, "loss": 0.254, "step": 755 }, { "epoch": 0.7777777777777778, "grad_norm": 11.094511985778809, "learning_rate": 2.7367601246105916e-05, "loss": 0.5735, "step": 756 }, { "epoch": 0.7788065843621399, "grad_norm": 13.668522834777832, "learning_rate": 2.7403946002076843e-05, "loss": 1.109, "step": 757 }, { "epoch": 0.779835390946502, "grad_norm": 9.678313255310059, "learning_rate": 2.7440290758047767e-05, "loss": 0.5538, "step": 758 }, { "epoch": 0.7808641975308642, "grad_norm": 18.492931365966797, "learning_rate": 2.7476635514018688e-05, "loss": 1.6073, "step": 759 }, { "epoch": 0.7818930041152263, "grad_norm": 20.688257217407227, "learning_rate": 2.7512980269989615e-05, "loss": 3.0436, "step": 760 }, { "epoch": 0.7818930041152263, "eval_Qnli-dev_cosine_accuracy": 0.703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7550844550132751, "eval_Qnli-dev_cosine_ap": 0.7364566550443425, "eval_Qnli-dev_cosine_f1": 0.7132075471698114, "eval_Qnli-dev_cosine_f1_threshold": 0.7550844550132751, "eval_Qnli-dev_cosine_precision": 0.6428571428571429, "eval_Qnli-dev_cosine_recall": 0.8008474576271186, "eval_Qnli-dev_dot_accuracy": 0.677734375, "eval_Qnli-dev_dot_accuracy_threshold": 369.6612854003906, "eval_Qnli-dev_dot_ap": 0.6549287118943474, "eval_Qnli-dev_dot_f1": 0.6805555555555555, "eval_Qnli-dev_dot_f1_threshold": 328.00164794921875, "eval_Qnli-dev_dot_precision": 0.5764705882352941, "eval_Qnli-dev_dot_recall": 0.8305084745762712, "eval_Qnli-dev_euclidean_accuracy": 0.705078125, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.8548583984375, "eval_Qnli-dev_euclidean_ap": 0.7442510510869947, "eval_Qnli-dev_euclidean_f1": 0.7076923076923077, "eval_Qnli-dev_euclidean_f1_threshold": 16.119770050048828, "eval_Qnli-dev_euclidean_precision": 0.5931232091690545, "eval_Qnli-dev_euclidean_recall": 0.8771186440677966, "eval_Qnli-dev_manhattan_accuracy": 0.701171875, "eval_Qnli-dev_manhattan_accuracy_threshold": 297.2847595214844, "eval_Qnli-dev_manhattan_ap": 0.7470615407792083, "eval_Qnli-dev_manhattan_f1": 0.7087198515769945, "eval_Qnli-dev_manhattan_f1_threshold": 312.7979431152344, "eval_Qnli-dev_manhattan_precision": 0.6303630363036303, "eval_Qnli-dev_manhattan_recall": 0.809322033898305, "eval_Qnli-dev_max_accuracy": 0.705078125, "eval_Qnli-dev_max_accuracy_threshold": 369.6612854003906, "eval_Qnli-dev_max_ap": 0.7470615407792083, "eval_Qnli-dev_max_f1": 0.7132075471698114, "eval_Qnli-dev_max_f1_threshold": 328.00164794921875, "eval_Qnli-dev_max_precision": 0.6428571428571429, "eval_Qnli-dev_max_recall": 0.8771186440677966, "eval_allNLI-dev_cosine_accuracy": 0.72265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8610843420028687, "eval_allNLI-dev_cosine_ap": 0.5881774055043343, "eval_allNLI-dev_cosine_f1": 0.5910064239828695, "eval_allNLI-dev_cosine_f1_threshold": 0.7733876705169678, "eval_allNLI-dev_cosine_precision": 0.46938775510204084, "eval_allNLI-dev_cosine_recall": 0.7976878612716763, "eval_allNLI-dev_dot_accuracy": 0.681640625, "eval_allNLI-dev_dot_accuracy_threshold": 442.53680419921875, "eval_allNLI-dev_dot_ap": 0.5120414811620706, "eval_allNLI-dev_dot_f1": 0.5700934579439252, "eval_allNLI-dev_dot_f1_threshold": 351.6019592285156, "eval_allNLI-dev_dot_precision": 0.47843137254901963, "eval_allNLI-dev_dot_recall": 0.7052023121387283, "eval_allNLI-dev_euclidean_accuracy": 0.732421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.514341354370117, "eval_allNLI-dev_euclidean_ap": 0.5926528852791054, "eval_allNLI-dev_euclidean_f1": 0.596, "eval_allNLI-dev_euclidean_f1_threshold": 14.976218223571777, "eval_allNLI-dev_euclidean_precision": 0.45565749235474007, "eval_allNLI-dev_euclidean_recall": 0.861271676300578, "eval_allNLI-dev_manhattan_accuracy": 0.71875, "eval_allNLI-dev_manhattan_accuracy_threshold": 222.68905639648438, "eval_allNLI-dev_manhattan_ap": 0.5863936149481368, "eval_allNLI-dev_manhattan_f1": 0.5978947368421053, "eval_allNLI-dev_manhattan_f1_threshold": 297.7838134765625, "eval_allNLI-dev_manhattan_precision": 0.47019867549668876, "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, "eval_allNLI-dev_max_accuracy": 0.732421875, "eval_allNLI-dev_max_accuracy_threshold": 442.53680419921875, "eval_allNLI-dev_max_ap": 0.5926528852791054, "eval_allNLI-dev_max_f1": 0.5978947368421053, "eval_allNLI-dev_max_f1_threshold": 351.6019592285156, "eval_allNLI-dev_max_precision": 0.47843137254901963, "eval_allNLI-dev_max_recall": 0.861271676300578, "eval_sequential_score": 0.7470615407792083, "eval_sts-test_pearson_cosine": 0.8068314455509153, "eval_sts-test_pearson_dot": 0.772425389013349, "eval_sts-test_pearson_euclidean": 0.8289777456195899, "eval_sts-test_pearson_manhattan": 0.8263159059644403, "eval_sts-test_pearson_max": 0.8289777456195899, "eval_sts-test_spearman_cosine": 0.8266359474083009, "eval_sts-test_spearman_dot": 0.7547315896601016, "eval_sts-test_spearman_euclidean": 0.8200646274343266, "eval_sts-test_spearman_manhattan": 0.8175935970340776, "eval_sts-test_spearman_max": 0.8266359474083009, "eval_vitaminc-pairs_loss": 2.7475264072418213, "eval_vitaminc-pairs_runtime": 3.1935, "eval_vitaminc-pairs_samples_per_second": 40.081, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 760 }, { "epoch": 0.7818930041152263, "eval_negation-triplets_loss": 1.0812993049621582, "eval_negation-triplets_runtime": 0.744, "eval_negation-triplets_samples_per_second": 172.045, "eval_negation-triplets_steps_per_second": 1.344, "step": 760 }, { "epoch": 0.7818930041152263, "eval_scitail-pairs-pos_loss": 0.1583121418952942, "eval_scitail-pairs-pos_runtime": 0.8387, "eval_scitail-pairs-pos_samples_per_second": 152.624, "eval_scitail-pairs-pos_steps_per_second": 1.192, "step": 760 }, { "epoch": 0.7818930041152263, "eval_scitail-pairs-qa_loss": 0.0030275785829871893, "eval_scitail-pairs-qa_runtime": 0.5811, "eval_scitail-pairs-qa_samples_per_second": 220.288, "eval_scitail-pairs-qa_steps_per_second": 1.721, "step": 760 }, { "epoch": 0.7818930041152263, "eval_xsum-pairs_loss": 0.6426714658737183, "eval_xsum-pairs_runtime": 3.0216, "eval_xsum-pairs_samples_per_second": 42.361, "eval_xsum-pairs_steps_per_second": 0.331, "step": 760 }, { "epoch": 0.7818930041152263, "eval_sciq_pairs_loss": 0.12087687849998474, "eval_sciq_pairs_runtime": 3.4733, "eval_sciq_pairs_samples_per_second": 36.852, "eval_sciq_pairs_steps_per_second": 0.288, "step": 760 }, { "epoch": 0.7818930041152263, "eval_qasc_pairs_loss": 0.5539246201515198, "eval_qasc_pairs_runtime": 0.6065, "eval_qasc_pairs_samples_per_second": 211.043, "eval_qasc_pairs_steps_per_second": 1.649, "step": 760 }, { "epoch": 0.7818930041152263, "eval_openbookqa_pairs_loss": 1.1023366451263428, "eval_openbookqa_pairs_runtime": 0.5847, "eval_openbookqa_pairs_samples_per_second": 218.917, "eval_openbookqa_pairs_steps_per_second": 1.71, "step": 760 }, { "epoch": 0.7818930041152263, "eval_msmarco_pairs_loss": 1.2618669271469116, "eval_msmarco_pairs_runtime": 1.5194, "eval_msmarco_pairs_samples_per_second": 84.242, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 760 }, { "epoch": 0.7818930041152263, "eval_nq_pairs_loss": 1.4234434366226196, "eval_nq_pairs_runtime": 2.9033, "eval_nq_pairs_samples_per_second": 44.088, "eval_nq_pairs_steps_per_second": 0.344, "step": 760 }, { "epoch": 0.7818930041152263, "eval_trivia_pairs_loss": 1.1620062589645386, "eval_trivia_pairs_runtime": 3.4422, "eval_trivia_pairs_samples_per_second": 37.185, "eval_trivia_pairs_steps_per_second": 0.291, "step": 760 }, { "epoch": 0.7818930041152263, "eval_gooaq_pairs_loss": 0.622553825378418, "eval_gooaq_pairs_runtime": 0.9454, "eval_gooaq_pairs_samples_per_second": 135.393, "eval_gooaq_pairs_steps_per_second": 1.058, "step": 760 }, { "epoch": 0.7818930041152263, "eval_paws-pos_loss": 0.029666246846318245, "eval_paws-pos_runtime": 0.6886, "eval_paws-pos_samples_per_second": 185.877, "eval_paws-pos_steps_per_second": 1.452, "step": 760 }, { "epoch": 0.7818930041152263, "eval_global_dataset_loss": 0.599385678768158, "eval_global_dataset_runtime": 13.3915, "eval_global_dataset_samples_per_second": 31.064, "eval_global_dataset_steps_per_second": 0.299, "step": 760 }, { "epoch": 0.7829218106995884, "grad_norm": 13.270237922668457, "learning_rate": 2.754932502596054e-05, "loss": 0.9703, "step": 761 }, { "epoch": 0.7839506172839507, "grad_norm": 10.215360641479492, "learning_rate": 2.758566978193146e-05, "loss": 1.0167, "step": 762 }, { "epoch": 0.7849794238683128, "grad_norm": 14.256012916564941, "learning_rate": 2.7622014537902387e-05, "loss": 1.8575, "step": 763 }, { "epoch": 0.7860082304526749, "grad_norm": 14.234450340270996, "learning_rate": 2.765835929387331e-05, "loss": 1.48, "step": 764 }, { "epoch": 0.7870370370370371, "grad_norm": 15.287798881530762, "learning_rate": 2.7694704049844235e-05, "loss": 1.5257, "step": 765 }, { "epoch": 0.7880658436213992, "grad_norm": 12.686257362365723, "learning_rate": 2.773104880581516e-05, "loss": 1.2119, "step": 766 }, { "epoch": 0.7890946502057613, "grad_norm": 11.21288013458252, "learning_rate": 2.7767393561786083e-05, "loss": 1.0656, "step": 767 }, { "epoch": 0.7901234567901234, "grad_norm": 9.147239685058594, "learning_rate": 2.7803738317757007e-05, "loss": 0.5485, "step": 768 }, { "epoch": 0.7911522633744856, "grad_norm": 8.927838325500488, "learning_rate": 2.7840083073727935e-05, "loss": 0.6264, "step": 769 }, { "epoch": 0.7921810699588477, "grad_norm": 12.626420974731445, "learning_rate": 2.7876427829698855e-05, "loss": 1.0876, "step": 770 }, { "epoch": 0.7932098765432098, "grad_norm": 8.545890808105469, "learning_rate": 2.791277258566978e-05, "loss": 0.5902, "step": 771 }, { "epoch": 0.7942386831275721, "grad_norm": 12.124262809753418, "learning_rate": 2.7949117341640707e-05, "loss": 0.9689, "step": 772 }, { "epoch": 0.7952674897119342, "grad_norm": 8.3804292678833, "learning_rate": 2.7985462097611627e-05, "loss": 0.5276, "step": 773 }, { "epoch": 0.7962962962962963, "grad_norm": 12.29673957824707, "learning_rate": 2.802180685358255e-05, "loss": 1.2571, "step": 774 }, { "epoch": 0.7973251028806584, "grad_norm": 6.740438938140869, "learning_rate": 2.805815160955348e-05, "loss": 0.3492, "step": 775 }, { "epoch": 0.7983539094650206, "grad_norm": 13.983535766601562, "learning_rate": 2.80944963655244e-05, "loss": 1.4877, "step": 776 }, { "epoch": 0.7993827160493827, "grad_norm": 10.374014854431152, "learning_rate": 2.8130841121495323e-05, "loss": 1.2044, "step": 777 }, { "epoch": 0.8004115226337448, "grad_norm": 14.681657791137695, "learning_rate": 2.816718587746625e-05, "loss": 1.2838, "step": 778 }, { "epoch": 0.801440329218107, "grad_norm": 8.073484420776367, "learning_rate": 2.8203530633437175e-05, "loss": 0.4491, "step": 779 }, { "epoch": 0.8024691358024691, "grad_norm": 14.766283988952637, "learning_rate": 2.8239875389408095e-05, "loss": 1.5724, "step": 780 }, { "epoch": 0.8024691358024691, "eval_Qnli-dev_cosine_accuracy": 0.708984375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8231204152107239, "eval_Qnli-dev_cosine_ap": 0.7391395822952389, "eval_Qnli-dev_cosine_f1": 0.7054545454545453, "eval_Qnli-dev_cosine_f1_threshold": 0.7827090620994568, "eval_Qnli-dev_cosine_precision": 0.6178343949044586, "eval_Qnli-dev_cosine_recall": 0.8220338983050848, "eval_Qnli-dev_dot_accuracy": 0.6640625, "eval_Qnli-dev_dot_accuracy_threshold": 410.32037353515625, "eval_Qnli-dev_dot_ap": 0.6504819630539224, "eval_Qnli-dev_dot_f1": 0.6780238500851788, "eval_Qnli-dev_dot_f1_threshold": 381.0080871582031, "eval_Qnli-dev_dot_precision": 0.5669515669515669, "eval_Qnli-dev_dot_recall": 0.8432203389830508, "eval_Qnli-dev_euclidean_accuracy": 0.70703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.04183578491211, "eval_Qnli-dev_euclidean_ap": 0.7438731947506383, "eval_Qnli-dev_euclidean_f1": 0.7050847457627119, "eval_Qnli-dev_euclidean_f1_threshold": 15.714797019958496, "eval_Qnli-dev_euclidean_precision": 0.5875706214689266, "eval_Qnli-dev_euclidean_recall": 0.8813559322033898, "eval_Qnli-dev_manhattan_accuracy": 0.701171875, "eval_Qnli-dev_manhattan_accuracy_threshold": 290.35009765625, "eval_Qnli-dev_manhattan_ap": 0.7446632934882194, "eval_Qnli-dev_manhattan_f1": 0.7015503875968992, "eval_Qnli-dev_manhattan_f1_threshold": 293.35986328125, "eval_Qnli-dev_manhattan_precision": 0.6464285714285715, "eval_Qnli-dev_manhattan_recall": 0.7669491525423728, "eval_Qnli-dev_max_accuracy": 0.708984375, "eval_Qnli-dev_max_accuracy_threshold": 410.32037353515625, "eval_Qnli-dev_max_ap": 0.7446632934882194, "eval_Qnli-dev_max_f1": 0.7054545454545453, "eval_Qnli-dev_max_f1_threshold": 381.0080871582031, "eval_Qnli-dev_max_precision": 0.6464285714285715, "eval_Qnli-dev_max_recall": 0.8813559322033898, "eval_allNLI-dev_cosine_accuracy": 0.7265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8960142135620117, "eval_allNLI-dev_cosine_ap": 0.5873660686997946, "eval_allNLI-dev_cosine_f1": 0.6017316017316017, "eval_allNLI-dev_cosine_f1_threshold": 0.8052390813827515, "eval_allNLI-dev_cosine_precision": 0.4809688581314879, "eval_allNLI-dev_cosine_recall": 0.8034682080924855, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 469.1497497558594, "eval_allNLI-dev_dot_ap": 0.5278322808998677, "eval_allNLI-dev_dot_f1": 0.5864978902953586, "eval_allNLI-dev_dot_f1_threshold": 398.7422790527344, "eval_allNLI-dev_dot_precision": 0.46179401993355484, "eval_allNLI-dev_dot_recall": 0.8034682080924855, "eval_allNLI-dev_euclidean_accuracy": 0.724609375, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.34986400604248, "eval_allNLI-dev_euclidean_ap": 0.5891503709712753, "eval_allNLI-dev_euclidean_f1": 0.6061855670103092, "eval_allNLI-dev_euclidean_f1_threshold": 14.381561279296875, "eval_allNLI-dev_euclidean_precision": 0.47115384615384615, "eval_allNLI-dev_euclidean_recall": 0.8497109826589595, "eval_allNLI-dev_manhattan_accuracy": 0.72265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 202.1298828125, "eval_allNLI-dev_manhattan_ap": 0.5845629157897831, "eval_allNLI-dev_manhattan_f1": 0.6017316017316017, "eval_allNLI-dev_manhattan_f1_threshold": 290.66619873046875, "eval_allNLI-dev_manhattan_precision": 0.4809688581314879, "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, "eval_allNLI-dev_max_accuracy": 0.7265625, "eval_allNLI-dev_max_accuracy_threshold": 469.1497497558594, "eval_allNLI-dev_max_ap": 0.5891503709712753, "eval_allNLI-dev_max_f1": 0.6061855670103092, "eval_allNLI-dev_max_f1_threshold": 398.7422790527344, "eval_allNLI-dev_max_precision": 0.4809688581314879, "eval_allNLI-dev_max_recall": 0.8497109826589595, "eval_sequential_score": 0.7446632934882194, "eval_sts-test_pearson_cosine": 0.8117461676559997, "eval_sts-test_pearson_dot": 0.7886477825036372, "eval_sts-test_pearson_euclidean": 0.8403273274655056, "eval_sts-test_pearson_manhattan": 0.8379940892338228, "eval_sts-test_pearson_max": 0.8403273274655056, "eval_sts-test_spearman_cosine": 0.8390338816154358, "eval_sts-test_spearman_dot": 0.7805059891559553, "eval_sts-test_spearman_euclidean": 0.8335163643447059, "eval_sts-test_spearman_manhattan": 0.8315699082304869, "eval_sts-test_spearman_max": 0.8390338816154358, "eval_vitaminc-pairs_loss": 2.659418821334839, "eval_vitaminc-pairs_runtime": 3.2064, "eval_vitaminc-pairs_samples_per_second": 39.92, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 780 }, { "epoch": 0.8024691358024691, "eval_negation-triplets_loss": 1.0590914487838745, "eval_negation-triplets_runtime": 0.7504, "eval_negation-triplets_samples_per_second": 170.58, "eval_negation-triplets_steps_per_second": 1.333, "step": 780 }, { "epoch": 0.8024691358024691, "eval_scitail-pairs-pos_loss": 0.1532289683818817, "eval_scitail-pairs-pos_runtime": 0.8417, "eval_scitail-pairs-pos_samples_per_second": 152.081, "eval_scitail-pairs-pos_steps_per_second": 1.188, "step": 780 }, { "epoch": 0.8024691358024691, "eval_scitail-pairs-qa_loss": 0.0047495742328464985, "eval_scitail-pairs-qa_runtime": 0.6048, "eval_scitail-pairs-qa_samples_per_second": 211.629, "eval_scitail-pairs-qa_steps_per_second": 1.653, "step": 780 }, { "epoch": 0.8024691358024691, "eval_xsum-pairs_loss": 0.6486428380012512, "eval_xsum-pairs_runtime": 3.0654, "eval_xsum-pairs_samples_per_second": 41.757, "eval_xsum-pairs_steps_per_second": 0.326, "step": 780 }, { "epoch": 0.8024691358024691, "eval_sciq_pairs_loss": 0.11410364508628845, "eval_sciq_pairs_runtime": 3.4977, "eval_sciq_pairs_samples_per_second": 36.596, "eval_sciq_pairs_steps_per_second": 0.286, "step": 780 }, { "epoch": 0.8024691358024691, "eval_qasc_pairs_loss": 0.5198172330856323, "eval_qasc_pairs_runtime": 0.6108, "eval_qasc_pairs_samples_per_second": 209.558, "eval_qasc_pairs_steps_per_second": 1.637, "step": 780 }, { "epoch": 0.8024691358024691, "eval_openbookqa_pairs_loss": 1.029971957206726, "eval_openbookqa_pairs_runtime": 0.5904, "eval_openbookqa_pairs_samples_per_second": 216.82, "eval_openbookqa_pairs_steps_per_second": 1.694, "step": 780 }, { "epoch": 0.8024691358024691, "eval_msmarco_pairs_loss": 1.185034990310669, "eval_msmarco_pairs_runtime": 1.5164, "eval_msmarco_pairs_samples_per_second": 84.41, "eval_msmarco_pairs_steps_per_second": 0.659, "step": 780 }, { "epoch": 0.8024691358024691, "eval_nq_pairs_loss": 1.3883589506149292, "eval_nq_pairs_runtime": 2.9107, "eval_nq_pairs_samples_per_second": 43.976, "eval_nq_pairs_steps_per_second": 0.344, "step": 780 }, { "epoch": 0.8024691358024691, "eval_trivia_pairs_loss": 1.0724446773529053, "eval_trivia_pairs_runtime": 3.4605, "eval_trivia_pairs_samples_per_second": 36.989, "eval_trivia_pairs_steps_per_second": 0.289, "step": 780 }, { "epoch": 0.8024691358024691, "eval_gooaq_pairs_loss": 0.5710882544517517, "eval_gooaq_pairs_runtime": 0.9518, "eval_gooaq_pairs_samples_per_second": 134.481, "eval_gooaq_pairs_steps_per_second": 1.051, "step": 780 }, { "epoch": 0.8024691358024691, "eval_paws-pos_loss": 0.031913165003061295, "eval_paws-pos_runtime": 0.6967, "eval_paws-pos_samples_per_second": 183.728, "eval_paws-pos_steps_per_second": 1.435, "step": 780 }, { "epoch": 0.8024691358024691, "eval_global_dataset_loss": 0.5888291597366333, "eval_global_dataset_runtime": 13.3816, "eval_global_dataset_samples_per_second": 31.088, "eval_global_dataset_steps_per_second": 0.299, "step": 780 }, { "epoch": 0.8034979423868313, "grad_norm": 12.456541061401367, "learning_rate": 2.8276220145379023e-05, "loss": 1.4439, "step": 781 }, { "epoch": 0.8045267489711934, "grad_norm": 1.0020017623901367, "learning_rate": 2.8312564901349947e-05, "loss": 0.0386, "step": 782 }, { "epoch": 0.8055555555555556, "grad_norm": 9.883146286010742, "learning_rate": 2.8348909657320867e-05, "loss": 0.5761, "step": 783 }, { "epoch": 0.8065843621399177, "grad_norm": 6.427492141723633, "learning_rate": 2.8385254413291795e-05, "loss": 0.2763, "step": 784 }, { "epoch": 0.8076131687242798, "grad_norm": 18.895719528198242, "learning_rate": 2.842159916926272e-05, "loss": 1.7799, "step": 785 }, { "epoch": 0.808641975308642, "grad_norm": 13.417122840881348, "learning_rate": 2.8457943925233643e-05, "loss": 1.6238, "step": 786 }, { "epoch": 0.8096707818930041, "grad_norm": 14.21626091003418, "learning_rate": 2.8494288681204567e-05, "loss": 1.2406, "step": 787 }, { "epoch": 0.8106995884773662, "grad_norm": 10.437925338745117, "learning_rate": 2.853063343717549e-05, "loss": 1.2021, "step": 788 }, { "epoch": 0.8117283950617284, "grad_norm": 9.257889747619629, "learning_rate": 2.8566978193146415e-05, "loss": 0.5357, "step": 789 }, { "epoch": 0.8127572016460906, "grad_norm": 7.7459940910339355, "learning_rate": 2.8603322949117342e-05, "loss": 0.3974, "step": 790 }, { "epoch": 0.8137860082304527, "grad_norm": 9.664865493774414, "learning_rate": 2.8639667705088263e-05, "loss": 0.681, "step": 791 }, { "epoch": 0.8148148148148148, "grad_norm": 1.9434237480163574, "learning_rate": 2.8676012461059187e-05, "loss": 0.0454, "step": 792 }, { "epoch": 0.815843621399177, "grad_norm": 11.688820838928223, "learning_rate": 2.8712357217030114e-05, "loss": 0.8601, "step": 793 }, { "epoch": 0.8168724279835391, "grad_norm": 8.6393461227417, "learning_rate": 2.8748701973001035e-05, "loss": 0.5149, "step": 794 }, { "epoch": 0.8179012345679012, "grad_norm": 3.408317804336548, "learning_rate": 2.878504672897196e-05, "loss": 0.1049, "step": 795 }, { "epoch": 0.8189300411522634, "grad_norm": 2.3510513305664062, "learning_rate": 2.8821391484942886e-05, "loss": 0.0591, "step": 796 }, { "epoch": 0.8199588477366255, "grad_norm": 19.143835067749023, "learning_rate": 2.885773624091381e-05, "loss": 1.7556, "step": 797 }, { "epoch": 0.8209876543209876, "grad_norm": 2.234999895095825, "learning_rate": 2.889408099688473e-05, "loss": 0.0651, "step": 798 }, { "epoch": 0.8220164609053497, "grad_norm": 7.49348783493042, "learning_rate": 2.893042575285566e-05, "loss": 0.3813, "step": 799 }, { "epoch": 0.823045267489712, "grad_norm": 8.669596672058105, "learning_rate": 2.8966770508826583e-05, "loss": 0.4154, "step": 800 }, { "epoch": 0.823045267489712, "eval_Qnli-dev_cosine_accuracy": 0.701171875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7969362139701843, "eval_Qnli-dev_cosine_ap": 0.7404462233066612, "eval_Qnli-dev_cosine_f1": 0.6980802792321116, "eval_Qnli-dev_cosine_f1_threshold": 0.7222884893417358, "eval_Qnli-dev_cosine_precision": 0.5934718100890207, "eval_Qnli-dev_cosine_recall": 0.847457627118644, "eval_Qnli-dev_dot_accuracy": 0.666015625, "eval_Qnli-dev_dot_accuracy_threshold": 375.7672119140625, "eval_Qnli-dev_dot_ap": 0.6479436940211677, "eval_Qnli-dev_dot_f1": 0.6717325227963526, "eval_Qnli-dev_dot_f1_threshold": 306.73577880859375, "eval_Qnli-dev_dot_precision": 0.523696682464455, "eval_Qnli-dev_dot_recall": 0.9364406779661016, "eval_Qnli-dev_euclidean_accuracy": 0.712890625, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.082931518554688, "eval_Qnli-dev_euclidean_ap": 0.7512343790385024, "eval_Qnli-dev_euclidean_f1": 0.7015706806282722, "eval_Qnli-dev_euclidean_f1_threshold": 16.305587768554688, "eval_Qnli-dev_euclidean_precision": 0.5964391691394659, "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, "eval_Qnli-dev_manhattan_accuracy": 0.70703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 294.2919006347656, "eval_Qnli-dev_manhattan_ap": 0.7497633403333601, "eval_Qnli-dev_manhattan_f1": 0.7025089605734768, "eval_Qnli-dev_manhattan_f1_threshold": 333.9628601074219, "eval_Qnli-dev_manhattan_precision": 0.6086956521739131, "eval_Qnli-dev_manhattan_recall": 0.8305084745762712, "eval_Qnli-dev_max_accuracy": 0.712890625, "eval_Qnli-dev_max_accuracy_threshold": 375.7672119140625, "eval_Qnli-dev_max_ap": 0.7512343790385024, "eval_Qnli-dev_max_f1": 0.7025089605734768, "eval_Qnli-dev_max_f1_threshold": 333.9628601074219, "eval_Qnli-dev_max_precision": 0.6086956521739131, "eval_Qnli-dev_max_recall": 0.9364406779661016, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8626278638839722, "eval_allNLI-dev_cosine_ap": 0.5970787093998088, "eval_allNLI-dev_cosine_f1": 0.6073752711496746, "eval_allNLI-dev_cosine_f1_threshold": 0.7643657326698303, "eval_allNLI-dev_cosine_precision": 0.4861111111111111, "eval_allNLI-dev_cosine_recall": 0.8092485549132948, "eval_allNLI-dev_dot_accuracy": 0.69921875, "eval_allNLI-dev_dot_accuracy_threshold": 427.77294921875, "eval_allNLI-dev_dot_ap": 0.5307372759630803, "eval_allNLI-dev_dot_f1": 0.5900900900900902, "eval_allNLI-dev_dot_f1_threshold": 365.810302734375, "eval_allNLI-dev_dot_precision": 0.4833948339483395, "eval_allNLI-dev_dot_recall": 0.7572254335260116, "eval_allNLI-dev_euclidean_accuracy": 0.732421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.048674583435059, "eval_allNLI-dev_euclidean_ap": 0.6004011776083211, "eval_allNLI-dev_euclidean_f1": 0.6140724946695097, "eval_allNLI-dev_euclidean_f1_threshold": 15.054520606994629, "eval_allNLI-dev_euclidean_precision": 0.4864864864864865, "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, "eval_allNLI-dev_manhattan_accuracy": 0.7265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 219.22201538085938, "eval_allNLI-dev_manhattan_ap": 0.5983608065576813, "eval_allNLI-dev_manhattan_f1": 0.6170678336980306, "eval_allNLI-dev_manhattan_f1_threshold": 307.1187744140625, "eval_allNLI-dev_manhattan_precision": 0.4964788732394366, "eval_allNLI-dev_manhattan_recall": 0.815028901734104, "eval_allNLI-dev_max_accuracy": 0.732421875, "eval_allNLI-dev_max_accuracy_threshold": 427.77294921875, "eval_allNLI-dev_max_ap": 0.6004011776083211, "eval_allNLI-dev_max_f1": 0.6170678336980306, "eval_allNLI-dev_max_f1_threshold": 365.810302734375, "eval_allNLI-dev_max_precision": 0.4964788732394366, "eval_allNLI-dev_max_recall": 0.8323699421965318, "eval_sequential_score": 0.7512343790385024, "eval_sts-test_pearson_cosine": 0.8198131802310901, "eval_sts-test_pearson_dot": 0.8060682717309235, "eval_sts-test_pearson_euclidean": 0.8431733382027362, "eval_sts-test_pearson_manhattan": 0.8415106486610984, "eval_sts-test_pearson_max": 0.8431733382027362, "eval_sts-test_spearman_cosine": 0.8405863476243647, "eval_sts-test_spearman_dot": 0.7927496657650738, "eval_sts-test_spearman_euclidean": 0.8335884138755459, "eval_sts-test_spearman_manhattan": 0.8322680279081929, "eval_sts-test_spearman_max": 0.8405863476243647, "eval_vitaminc-pairs_loss": 2.8224048614501953, "eval_vitaminc-pairs_runtime": 3.191, "eval_vitaminc-pairs_samples_per_second": 40.112, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 800 }, { "epoch": 0.823045267489712, "eval_negation-triplets_loss": 1.0728732347488403, "eval_negation-triplets_runtime": 0.7461, "eval_negation-triplets_samples_per_second": 171.567, "eval_negation-triplets_steps_per_second": 1.34, "step": 800 }, { "epoch": 0.823045267489712, "eval_scitail-pairs-pos_loss": 0.16487395763397217, "eval_scitail-pairs-pos_runtime": 0.8425, "eval_scitail-pairs-pos_samples_per_second": 151.933, "eval_scitail-pairs-pos_steps_per_second": 1.187, "step": 800 }, { "epoch": 0.823045267489712, "eval_scitail-pairs-qa_loss": 0.002279088133946061, "eval_scitail-pairs-qa_runtime": 0.5921, "eval_scitail-pairs-qa_samples_per_second": 216.187, "eval_scitail-pairs-qa_steps_per_second": 1.689, "step": 800 }, { "epoch": 0.823045267489712, "eval_xsum-pairs_loss": 0.6205843687057495, "eval_xsum-pairs_runtime": 3.0236, "eval_xsum-pairs_samples_per_second": 42.333, "eval_xsum-pairs_steps_per_second": 0.331, "step": 800 }, { "epoch": 0.823045267489712, "eval_sciq_pairs_loss": 0.13013440370559692, "eval_sciq_pairs_runtime": 3.4722, "eval_sciq_pairs_samples_per_second": 36.864, "eval_sciq_pairs_steps_per_second": 0.288, "step": 800 }, { "epoch": 0.823045267489712, "eval_qasc_pairs_loss": 0.45994842052459717, "eval_qasc_pairs_runtime": 0.6086, "eval_qasc_pairs_samples_per_second": 210.312, "eval_qasc_pairs_steps_per_second": 1.643, "step": 800 }, { "epoch": 0.823045267489712, "eval_openbookqa_pairs_loss": 1.1286119222640991, "eval_openbookqa_pairs_runtime": 0.5867, "eval_openbookqa_pairs_samples_per_second": 218.188, "eval_openbookqa_pairs_steps_per_second": 1.705, "step": 800 }, { "epoch": 0.823045267489712, "eval_msmarco_pairs_loss": 1.2176563739776611, "eval_msmarco_pairs_runtime": 1.5198, "eval_msmarco_pairs_samples_per_second": 84.221, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 800 }, { "epoch": 0.823045267489712, "eval_nq_pairs_loss": 1.3381102085113525, "eval_nq_pairs_runtime": 2.9052, "eval_nq_pairs_samples_per_second": 44.059, "eval_nq_pairs_steps_per_second": 0.344, "step": 800 }, { "epoch": 0.823045267489712, "eval_trivia_pairs_loss": 1.1009663343429565, "eval_trivia_pairs_runtime": 3.4646, "eval_trivia_pairs_samples_per_second": 36.946, "eval_trivia_pairs_steps_per_second": 0.289, "step": 800 }, { "epoch": 0.823045267489712, "eval_gooaq_pairs_loss": 0.6627429723739624, "eval_gooaq_pairs_runtime": 0.9514, "eval_gooaq_pairs_samples_per_second": 134.542, "eval_gooaq_pairs_steps_per_second": 1.051, "step": 800 }, { "epoch": 0.823045267489712, "eval_paws-pos_loss": 0.03013201802968979, "eval_paws-pos_runtime": 0.6964, "eval_paws-pos_samples_per_second": 183.798, "eval_paws-pos_steps_per_second": 1.436, "step": 800 }, { "epoch": 0.823045267489712, "eval_global_dataset_loss": 0.5941927433013916, "eval_global_dataset_runtime": 13.3686, "eval_global_dataset_samples_per_second": 31.118, "eval_global_dataset_steps_per_second": 0.299, "step": 800 }, { "epoch": 0.8240740740740741, "grad_norm": 14.879446029663086, "learning_rate": 2.9003115264797503e-05, "loss": 1.0372, "step": 801 }, { "epoch": 0.8251028806584362, "grad_norm": 9.06614875793457, "learning_rate": 2.903946002076843e-05, "loss": 0.4029, "step": 802 }, { "epoch": 0.8261316872427984, "grad_norm": 8.139265060424805, "learning_rate": 2.9075804776739355e-05, "loss": 0.376, "step": 803 }, { "epoch": 0.8271604938271605, "grad_norm": 13.07675838470459, "learning_rate": 2.911214953271028e-05, "loss": 1.4204, "step": 804 }, { "epoch": 0.8281893004115226, "grad_norm": 13.634737968444824, "learning_rate": 2.9148494288681203e-05, "loss": 0.1015, "step": 805 }, { "epoch": 0.8292181069958847, "grad_norm": 9.257582664489746, "learning_rate": 2.9184839044652127e-05, "loss": 0.7088, "step": 806 }, { "epoch": 0.8302469135802469, "grad_norm": 11.305009841918945, "learning_rate": 2.922118380062305e-05, "loss": 0.8444, "step": 807 }, { "epoch": 0.831275720164609, "grad_norm": 17.285337448120117, "learning_rate": 2.925752855659397e-05, "loss": 1.4104, "step": 808 }, { "epoch": 0.8323045267489712, "grad_norm": 21.70269012451172, "learning_rate": 2.92938733125649e-05, "loss": 3.4062, "step": 809 }, { "epoch": 0.8333333333333334, "grad_norm": 10.347410202026367, "learning_rate": 2.9330218068535823e-05, "loss": 0.632, "step": 810 }, { "epoch": 0.8343621399176955, "grad_norm": 12.212241172790527, "learning_rate": 2.9366562824506747e-05, "loss": 0.9835, "step": 811 }, { "epoch": 0.8353909465020576, "grad_norm": 12.607038497924805, "learning_rate": 2.940290758047767e-05, "loss": 1.6676, "step": 812 }, { "epoch": 0.8364197530864198, "grad_norm": 13.61136531829834, "learning_rate": 2.9439252336448595e-05, "loss": 1.1451, "step": 813 }, { "epoch": 0.8374485596707819, "grad_norm": 9.604448318481445, "learning_rate": 2.947559709241952e-05, "loss": 0.6491, "step": 814 }, { "epoch": 0.838477366255144, "grad_norm": 8.173309326171875, "learning_rate": 2.9511941848390446e-05, "loss": 0.4395, "step": 815 }, { "epoch": 0.8395061728395061, "grad_norm": 16.461246490478516, "learning_rate": 2.9548286604361367e-05, "loss": 1.5228, "step": 816 }, { "epoch": 0.8405349794238683, "grad_norm": 12.087141036987305, "learning_rate": 2.958463136033229e-05, "loss": 1.1335, "step": 817 }, { "epoch": 0.8415637860082305, "grad_norm": 10.887614250183105, "learning_rate": 2.962097611630322e-05, "loss": 1.034, "step": 818 }, { "epoch": 0.8425925925925926, "grad_norm": 9.939887046813965, "learning_rate": 2.965732087227414e-05, "loss": 0.8548, "step": 819 }, { "epoch": 0.8436213991769548, "grad_norm": 11.749360084533691, "learning_rate": 2.9693665628245063e-05, "loss": 1.0941, "step": 820 }, { "epoch": 0.8436213991769548, "eval_Qnli-dev_cosine_accuracy": 0.69921875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8176131248474121, "eval_Qnli-dev_cosine_ap": 0.7388220297886944, "eval_Qnli-dev_cosine_f1": 0.7024029574861368, "eval_Qnli-dev_cosine_f1_threshold": 0.7822612524032593, "eval_Qnli-dev_cosine_precision": 0.6229508196721312, "eval_Qnli-dev_cosine_recall": 0.8050847457627118, "eval_Qnli-dev_dot_accuracy": 0.658203125, "eval_Qnli-dev_dot_accuracy_threshold": 418.55694580078125, "eval_Qnli-dev_dot_ap": 0.6532207993640291, "eval_Qnli-dev_dot_f1": 0.6723842195540308, "eval_Qnli-dev_dot_f1_threshold": 375.86895751953125, "eval_Qnli-dev_dot_precision": 0.5648414985590778, "eval_Qnli-dev_dot_recall": 0.8305084745762712, "eval_Qnli-dev_euclidean_accuracy": 0.70703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.418533325195312, "eval_Qnli-dev_euclidean_ap": 0.7439997782812441, "eval_Qnli-dev_euclidean_f1": 0.7044673539518899, "eval_Qnli-dev_euclidean_f1_threshold": 15.451794624328613, "eval_Qnli-dev_euclidean_precision": 0.5924855491329479, "eval_Qnli-dev_euclidean_recall": 0.8686440677966102, "eval_Qnli-dev_manhattan_accuracy": 0.69921875, "eval_Qnli-dev_manhattan_accuracy_threshold": 281.7642517089844, "eval_Qnli-dev_manhattan_ap": 0.745379018581688, "eval_Qnli-dev_manhattan_f1": 0.70223752151463, "eval_Qnli-dev_manhattan_f1_threshold": 318.57647705078125, "eval_Qnli-dev_manhattan_precision": 0.591304347826087, "eval_Qnli-dev_manhattan_recall": 0.864406779661017, "eval_Qnli-dev_max_accuracy": 0.70703125, "eval_Qnli-dev_max_accuracy_threshold": 418.55694580078125, "eval_Qnli-dev_max_ap": 0.745379018581688, "eval_Qnli-dev_max_f1": 0.7044673539518899, "eval_Qnli-dev_max_f1_threshold": 375.86895751953125, "eval_Qnli-dev_max_precision": 0.6229508196721312, "eval_Qnli-dev_max_recall": 0.8686440677966102, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8881274461746216, "eval_allNLI-dev_cosine_ap": 0.6029662836602825, "eval_allNLI-dev_cosine_f1": 0.6047619047619047, "eval_allNLI-dev_cosine_f1_threshold": 0.8256221413612366, "eval_allNLI-dev_cosine_precision": 0.5141700404858299, "eval_allNLI-dev_cosine_recall": 0.7341040462427746, "eval_allNLI-dev_dot_accuracy": 0.697265625, "eval_allNLI-dev_dot_accuracy_threshold": 443.00537109375, "eval_allNLI-dev_dot_ap": 0.5307385620455893, "eval_allNLI-dev_dot_f1": 0.5751633986928105, "eval_allNLI-dev_dot_f1_threshold": 395.2938232421875, "eval_allNLI-dev_dot_precision": 0.46153846153846156, "eval_allNLI-dev_dot_recall": 0.7630057803468208, "eval_allNLI-dev_euclidean_accuracy": 0.7265625, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.200397491455078, "eval_allNLI-dev_euclidean_ap": 0.6054028078040996, "eval_allNLI-dev_euclidean_f1": 0.6042154566744731, "eval_allNLI-dev_euclidean_f1_threshold": 13.174400329589844, "eval_allNLI-dev_euclidean_precision": 0.5078740157480315, "eval_allNLI-dev_euclidean_recall": 0.7456647398843931, "eval_allNLI-dev_manhattan_accuracy": 0.73046875, "eval_allNLI-dev_manhattan_accuracy_threshold": 204.298828125, "eval_allNLI-dev_manhattan_ap": 0.6039169640925666, "eval_allNLI-dev_manhattan_f1": 0.5994694960212201, "eval_allNLI-dev_manhattan_f1_threshold": 254.79234313964844, "eval_allNLI-dev_manhattan_precision": 0.553921568627451, "eval_allNLI-dev_manhattan_recall": 0.653179190751445, "eval_allNLI-dev_max_accuracy": 0.73046875, "eval_allNLI-dev_max_accuracy_threshold": 443.00537109375, "eval_allNLI-dev_max_ap": 0.6054028078040996, "eval_allNLI-dev_max_f1": 0.6047619047619047, "eval_allNLI-dev_max_f1_threshold": 395.2938232421875, "eval_allNLI-dev_max_precision": 0.553921568627451, "eval_allNLI-dev_max_recall": 0.7630057803468208, "eval_sequential_score": 0.745379018581688, "eval_sts-test_pearson_cosine": 0.8219060292244447, "eval_sts-test_pearson_dot": 0.7914174536034212, "eval_sts-test_pearson_euclidean": 0.8498595001040936, "eval_sts-test_pearson_manhattan": 0.8479607961602269, "eval_sts-test_pearson_max": 0.8498595001040936, "eval_sts-test_spearman_cosine": 0.8433385949511971, "eval_sts-test_spearman_dot": 0.7770873060444821, "eval_sts-test_spearman_euclidean": 0.8418306116960912, "eval_sts-test_spearman_manhattan": 0.8404455601560273, "eval_sts-test_spearman_max": 0.8433385949511971, "eval_vitaminc-pairs_loss": 2.736114501953125, "eval_vitaminc-pairs_runtime": 3.2033, "eval_vitaminc-pairs_samples_per_second": 39.958, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 820 }, { "epoch": 0.8436213991769548, "eval_negation-triplets_loss": 1.0854538679122925, "eval_negation-triplets_runtime": 0.7435, "eval_negation-triplets_samples_per_second": 172.17, "eval_negation-triplets_steps_per_second": 1.345, "step": 820 }, { "epoch": 0.8436213991769548, "eval_scitail-pairs-pos_loss": 0.1739039570093155, "eval_scitail-pairs-pos_runtime": 0.8433, "eval_scitail-pairs-pos_samples_per_second": 151.783, "eval_scitail-pairs-pos_steps_per_second": 1.186, "step": 820 }, { "epoch": 0.8436213991769548, "eval_scitail-pairs-qa_loss": 0.003931767772883177, "eval_scitail-pairs-qa_runtime": 0.5977, "eval_scitail-pairs-qa_samples_per_second": 214.159, "eval_scitail-pairs-qa_steps_per_second": 1.673, "step": 820 }, { "epoch": 0.8436213991769548, "eval_xsum-pairs_loss": 0.6559375524520874, "eval_xsum-pairs_runtime": 3.0363, "eval_xsum-pairs_samples_per_second": 42.157, "eval_xsum-pairs_steps_per_second": 0.329, "step": 820 }, { "epoch": 0.8436213991769548, "eval_sciq_pairs_loss": 0.11851135641336441, "eval_sciq_pairs_runtime": 3.4675, "eval_sciq_pairs_samples_per_second": 36.914, "eval_sciq_pairs_steps_per_second": 0.288, "step": 820 }, { "epoch": 0.8436213991769548, "eval_qasc_pairs_loss": 0.3914608359336853, "eval_qasc_pairs_runtime": 0.6158, "eval_qasc_pairs_samples_per_second": 207.872, "eval_qasc_pairs_steps_per_second": 1.624, "step": 820 }, { "epoch": 0.8436213991769548, "eval_openbookqa_pairs_loss": 1.059507131576538, "eval_openbookqa_pairs_runtime": 0.5868, "eval_openbookqa_pairs_samples_per_second": 218.116, "eval_openbookqa_pairs_steps_per_second": 1.704, "step": 820 }, { "epoch": 0.8436213991769548, "eval_msmarco_pairs_loss": 1.0388420820236206, "eval_msmarco_pairs_runtime": 1.5195, "eval_msmarco_pairs_samples_per_second": 84.238, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 820 }, { "epoch": 0.8436213991769548, "eval_nq_pairs_loss": 1.4129403829574585, "eval_nq_pairs_runtime": 2.904, "eval_nq_pairs_samples_per_second": 44.077, "eval_nq_pairs_steps_per_second": 0.344, "step": 820 }, { "epoch": 0.8436213991769548, "eval_trivia_pairs_loss": 1.0265684127807617, "eval_trivia_pairs_runtime": 3.45, "eval_trivia_pairs_samples_per_second": 37.101, "eval_trivia_pairs_steps_per_second": 0.29, "step": 820 }, { "epoch": 0.8436213991769548, "eval_gooaq_pairs_loss": 0.6007567048072815, "eval_gooaq_pairs_runtime": 0.9464, "eval_gooaq_pairs_samples_per_second": 135.252, "eval_gooaq_pairs_steps_per_second": 1.057, "step": 820 }, { "epoch": 0.8436213991769548, "eval_paws-pos_loss": 0.03185836598277092, "eval_paws-pos_runtime": 0.6912, "eval_paws-pos_samples_per_second": 185.193, "eval_paws-pos_steps_per_second": 1.447, "step": 820 }, { "epoch": 0.8436213991769548, "eval_global_dataset_loss": 0.5914937257766724, "eval_global_dataset_runtime": 13.4037, "eval_global_dataset_samples_per_second": 31.036, "eval_global_dataset_steps_per_second": 0.298, "step": 820 }, { "epoch": 0.8446502057613169, "grad_norm": 10.527436256408691, "learning_rate": 2.973001038421599e-05, "loss": 1.3147, "step": 821 }, { "epoch": 0.845679012345679, "grad_norm": 13.003664016723633, "learning_rate": 2.9766355140186914e-05, "loss": 1.3354, "step": 822 }, { "epoch": 0.8467078189300411, "grad_norm": 2.857788324356079, "learning_rate": 2.9802699896157835e-05, "loss": 0.0733, "step": 823 }, { "epoch": 0.8477366255144033, "grad_norm": 11.483878135681152, "learning_rate": 2.9839044652128762e-05, "loss": 0.935, "step": 824 }, { "epoch": 0.8487654320987654, "grad_norm": 2.5351336002349854, "learning_rate": 2.9875389408099686e-05, "loss": 0.0684, "step": 825 }, { "epoch": 0.8497942386831275, "grad_norm": 8.322936058044434, "learning_rate": 2.9911734164070607e-05, "loss": 0.6753, "step": 826 }, { "epoch": 0.8508230452674898, "grad_norm": 1.9907835721969604, "learning_rate": 2.9948078920041534e-05, "loss": 0.0438, "step": 827 }, { "epoch": 0.8518518518518519, "grad_norm": 14.835284233093262, "learning_rate": 2.998442367601246e-05, "loss": 1.1541, "step": 828 }, { "epoch": 0.852880658436214, "grad_norm": 13.292768478393555, "learning_rate": 3.002076843198338e-05, "loss": 0.7087, "step": 829 }, { "epoch": 0.8539094650205762, "grad_norm": 9.636879920959473, "learning_rate": 3.0057113187954307e-05, "loss": 0.5391, "step": 830 }, { "epoch": 0.8549382716049383, "grad_norm": 8.648504257202148, "learning_rate": 3.009345794392523e-05, "loss": 0.3831, "step": 831 }, { "epoch": 0.8559670781893004, "grad_norm": 9.20128345489502, "learning_rate": 3.0129802699896155e-05, "loss": 0.4752, "step": 832 }, { "epoch": 0.8569958847736625, "grad_norm": 7.908294677734375, "learning_rate": 3.016614745586708e-05, "loss": 0.3662, "step": 833 }, { "epoch": 0.8580246913580247, "grad_norm": 18.368688583374023, "learning_rate": 3.0202492211838003e-05, "loss": 1.6192, "step": 834 }, { "epoch": 0.8590534979423868, "grad_norm": 2.4204726219177246, "learning_rate": 3.0238836967808927e-05, "loss": 0.0369, "step": 835 }, { "epoch": 0.8600823045267489, "grad_norm": 15.491935729980469, "learning_rate": 3.0275181723779854e-05, "loss": 1.3151, "step": 836 }, { "epoch": 0.8611111111111112, "grad_norm": 7.942100524902344, "learning_rate": 3.0311526479750775e-05, "loss": 0.4427, "step": 837 }, { "epoch": 0.8621399176954733, "grad_norm": 12.48727035522461, "learning_rate": 3.03478712357217e-05, "loss": 0.8185, "step": 838 }, { "epoch": 0.8631687242798354, "grad_norm": 9.763201713562012, "learning_rate": 3.0384215991692626e-05, "loss": 0.8389, "step": 839 }, { "epoch": 0.8641975308641975, "grad_norm": 1.3098586797714233, "learning_rate": 3.0420560747663547e-05, "loss": 0.0542, "step": 840 }, { "epoch": 0.8641975308641975, "eval_Qnli-dev_cosine_accuracy": 0.70703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7717105150222778, "eval_Qnli-dev_cosine_ap": 0.7440826784027825, "eval_Qnli-dev_cosine_f1": 0.7112676056338029, "eval_Qnli-dev_cosine_f1_threshold": 0.7245498895645142, "eval_Qnli-dev_cosine_precision": 0.608433734939759, "eval_Qnli-dev_cosine_recall": 0.8559322033898306, "eval_Qnli-dev_dot_accuracy": 0.666015625, "eval_Qnli-dev_dot_accuracy_threshold": 360.73333740234375, "eval_Qnli-dev_dot_ap": 0.6721991504226604, "eval_Qnli-dev_dot_f1": 0.6755852842809364, "eval_Qnli-dev_dot_f1_threshold": 324.03253173828125, "eval_Qnli-dev_dot_precision": 0.5580110497237569, "eval_Qnli-dev_dot_recall": 0.8559322033898306, "eval_Qnli-dev_euclidean_accuracy": 0.71484375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.42113208770752, "eval_Qnli-dev_euclidean_ap": 0.7499644370026034, "eval_Qnli-dev_euclidean_f1": 0.717391304347826, "eval_Qnli-dev_euclidean_f1_threshold": 15.633472442626953, "eval_Qnli-dev_euclidean_precision": 0.6265822784810127, "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, "eval_Qnli-dev_manhattan_accuracy": 0.716796875, "eval_Qnli-dev_manhattan_accuracy_threshold": 304.3686828613281, "eval_Qnli-dev_manhattan_ap": 0.7552844807907888, "eval_Qnli-dev_manhattan_f1": 0.7099236641221374, "eval_Qnli-dev_manhattan_f1_threshold": 313.06787109375, "eval_Qnli-dev_manhattan_precision": 0.6458333333333334, "eval_Qnli-dev_manhattan_recall": 0.788135593220339, "eval_Qnli-dev_max_accuracy": 0.716796875, "eval_Qnli-dev_max_accuracy_threshold": 360.73333740234375, "eval_Qnli-dev_max_ap": 0.7552844807907888, "eval_Qnli-dev_max_f1": 0.717391304347826, "eval_Qnli-dev_max_f1_threshold": 324.03253173828125, "eval_Qnli-dev_max_precision": 0.6458333333333334, "eval_Qnli-dev_max_recall": 0.8559322033898306, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8676639795303345, "eval_allNLI-dev_cosine_ap": 0.5974870153364504, "eval_allNLI-dev_cosine_f1": 0.5968819599109131, "eval_allNLI-dev_cosine_f1_threshold": 0.7826240062713623, "eval_allNLI-dev_cosine_precision": 0.4855072463768116, "eval_allNLI-dev_cosine_recall": 0.7745664739884393, "eval_allNLI-dev_dot_accuracy": 0.69140625, "eval_allNLI-dev_dot_accuracy_threshold": 390.4407653808594, "eval_allNLI-dev_dot_ap": 0.5132556641569763, "eval_allNLI-dev_dot_f1": 0.5889830508474576, "eval_allNLI-dev_dot_f1_threshold": 349.6282958984375, "eval_allNLI-dev_dot_precision": 0.46488294314381273, "eval_allNLI-dev_dot_recall": 0.8034682080924855, "eval_allNLI-dev_euclidean_accuracy": 0.7265625, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.85896110534668, "eval_allNLI-dev_euclidean_ap": 0.600945196021151, "eval_allNLI-dev_euclidean_f1": 0.6117136659436009, "eval_allNLI-dev_euclidean_f1_threshold": 14.226009368896484, "eval_allNLI-dev_euclidean_precision": 0.4895833333333333, "eval_allNLI-dev_euclidean_recall": 0.815028901734104, "eval_allNLI-dev_manhattan_accuracy": 0.724609375, "eval_allNLI-dev_manhattan_accuracy_threshold": 236.98345947265625, "eval_allNLI-dev_manhattan_ap": 0.595652875609926, "eval_allNLI-dev_manhattan_f1": 0.6052631578947368, "eval_allNLI-dev_manhattan_f1_threshold": 292.81805419921875, "eval_allNLI-dev_manhattan_precision": 0.4876325088339223, "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, "eval_allNLI-dev_max_accuracy": 0.73046875, "eval_allNLI-dev_max_accuracy_threshold": 390.4407653808594, "eval_allNLI-dev_max_ap": 0.600945196021151, "eval_allNLI-dev_max_f1": 0.6117136659436009, "eval_allNLI-dev_max_f1_threshold": 349.6282958984375, "eval_allNLI-dev_max_precision": 0.4895833333333333, "eval_allNLI-dev_max_recall": 0.815028901734104, "eval_sequential_score": 0.7552844807907888, "eval_sts-test_pearson_cosine": 0.8197263747311968, "eval_sts-test_pearson_dot": 0.7930399784089159, "eval_sts-test_pearson_euclidean": 0.8451878041621638, "eval_sts-test_pearson_manhattan": 0.8428233677391169, "eval_sts-test_pearson_max": 0.8451878041621638, "eval_sts-test_spearman_cosine": 0.8404950102998648, "eval_sts-test_spearman_dot": 0.7717234646053703, "eval_sts-test_spearman_euclidean": 0.8380116285514719, "eval_sts-test_spearman_manhattan": 0.8359618417747002, "eval_sts-test_spearman_max": 0.8404950102998648, "eval_vitaminc-pairs_loss": 2.756269693374634, "eval_vitaminc-pairs_runtime": 3.1914, "eval_vitaminc-pairs_samples_per_second": 40.108, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 840 }, { "epoch": 0.8641975308641975, "eval_negation-triplets_loss": 1.07953941822052, "eval_negation-triplets_runtime": 0.7561, "eval_negation-triplets_samples_per_second": 169.298, "eval_negation-triplets_steps_per_second": 1.323, "step": 840 }, { "epoch": 0.8641975308641975, "eval_scitail-pairs-pos_loss": 0.15982350707054138, "eval_scitail-pairs-pos_runtime": 0.8403, "eval_scitail-pairs-pos_samples_per_second": 152.323, "eval_scitail-pairs-pos_steps_per_second": 1.19, "step": 840 }, { "epoch": 0.8641975308641975, "eval_scitail-pairs-qa_loss": 0.000591381685808301, "eval_scitail-pairs-qa_runtime": 0.59, "eval_scitail-pairs-qa_samples_per_second": 216.958, "eval_scitail-pairs-qa_steps_per_second": 1.695, "step": 840 }, { "epoch": 0.8641975308641975, "eval_xsum-pairs_loss": 0.620231568813324, "eval_xsum-pairs_runtime": 3.0356, "eval_xsum-pairs_samples_per_second": 42.166, "eval_xsum-pairs_steps_per_second": 0.329, "step": 840 }, { "epoch": 0.8641975308641975, "eval_sciq_pairs_loss": 0.1389157921075821, "eval_sciq_pairs_runtime": 3.4358, "eval_sciq_pairs_samples_per_second": 37.255, "eval_sciq_pairs_steps_per_second": 0.291, "step": 840 }, { "epoch": 0.8641975308641975, "eval_qasc_pairs_loss": 0.3473445475101471, "eval_qasc_pairs_runtime": 0.6187, "eval_qasc_pairs_samples_per_second": 206.89, "eval_qasc_pairs_steps_per_second": 1.616, "step": 840 }, { "epoch": 0.8641975308641975, "eval_openbookqa_pairs_loss": 1.0230737924575806, "eval_openbookqa_pairs_runtime": 0.5845, "eval_openbookqa_pairs_samples_per_second": 218.995, "eval_openbookqa_pairs_steps_per_second": 1.711, "step": 840 }, { "epoch": 0.8641975308641975, "eval_msmarco_pairs_loss": 1.2953341007232666, "eval_msmarco_pairs_runtime": 1.5198, "eval_msmarco_pairs_samples_per_second": 84.223, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 840 }, { "epoch": 0.8641975308641975, "eval_nq_pairs_loss": 1.5245081186294556, "eval_nq_pairs_runtime": 2.9025, "eval_nq_pairs_samples_per_second": 44.1, "eval_nq_pairs_steps_per_second": 0.345, "step": 840 }, { "epoch": 0.8641975308641975, "eval_trivia_pairs_loss": 1.1853358745574951, "eval_trivia_pairs_runtime": 3.4357, "eval_trivia_pairs_samples_per_second": 37.256, "eval_trivia_pairs_steps_per_second": 0.291, "step": 840 }, { "epoch": 0.8641975308641975, "eval_gooaq_pairs_loss": 0.6523827910423279, "eval_gooaq_pairs_runtime": 0.954, "eval_gooaq_pairs_samples_per_second": 134.174, "eval_gooaq_pairs_steps_per_second": 1.048, "step": 840 }, { "epoch": 0.8641975308641975, "eval_paws-pos_loss": 0.029700685292482376, "eval_paws-pos_runtime": 0.692, "eval_paws-pos_samples_per_second": 184.977, "eval_paws-pos_steps_per_second": 1.445, "step": 840 }, { "epoch": 0.8641975308641975, "eval_global_dataset_loss": 0.5667285919189453, "eval_global_dataset_runtime": 13.3955, "eval_global_dataset_samples_per_second": 31.055, "eval_global_dataset_steps_per_second": 0.299, "step": 840 }, { "epoch": 0.8652263374485597, "grad_norm": 11.04948902130127, "learning_rate": 3.045690550363447e-05, "loss": 0.6135, "step": 841 }, { "epoch": 0.8662551440329218, "grad_norm": 13.294988632202148, "learning_rate": 3.0493250259605398e-05, "loss": 1.4091, "step": 842 }, { "epoch": 0.8672839506172839, "grad_norm": 11.463438987731934, "learning_rate": 3.052959501557632e-05, "loss": 0.6724, "step": 843 }, { "epoch": 0.8683127572016461, "grad_norm": 2.22076678276062, "learning_rate": 3.0565939771547246e-05, "loss": 0.0353, "step": 844 }, { "epoch": 0.8693415637860082, "grad_norm": 9.272378921508789, "learning_rate": 3.0602284527518174e-05, "loss": 0.5297, "step": 845 }, { "epoch": 0.8703703703703703, "grad_norm": 10.213794708251953, "learning_rate": 3.0638629283489094e-05, "loss": 0.5211, "step": 846 }, { "epoch": 0.8713991769547325, "grad_norm": 12.306347846984863, "learning_rate": 3.0674974039460015e-05, "loss": 0.8431, "step": 847 }, { "epoch": 0.8724279835390947, "grad_norm": 10.407583236694336, "learning_rate": 3.071131879543094e-05, "loss": 0.7195, "step": 848 }, { "epoch": 0.8734567901234568, "grad_norm": 8.843184471130371, "learning_rate": 3.074766355140186e-05, "loss": 0.4965, "step": 849 }, { "epoch": 0.8744855967078189, "grad_norm": 10.975191116333008, "learning_rate": 3.078400830737279e-05, "loss": 0.7786, "step": 850 }, { "epoch": 0.8755144032921811, "grad_norm": 16.885013580322266, "learning_rate": 3.082035306334372e-05, "loss": 1.7078, "step": 851 }, { "epoch": 0.8765432098765432, "grad_norm": 10.905181884765625, "learning_rate": 3.085669781931464e-05, "loss": 0.6685, "step": 852 }, { "epoch": 0.8775720164609053, "grad_norm": 12.853326797485352, "learning_rate": 3.089304257528556e-05, "loss": 1.2114, "step": 853 }, { "epoch": 0.8786008230452675, "grad_norm": 9.456357955932617, "learning_rate": 3.0929387331256486e-05, "loss": 0.6199, "step": 854 }, { "epoch": 0.8796296296296297, "grad_norm": 15.603614807128906, "learning_rate": 3.0965732087227414e-05, "loss": 1.3311, "step": 855 }, { "epoch": 0.8806584362139918, "grad_norm": 10.03974437713623, "learning_rate": 3.1002076843198334e-05, "loss": 0.5718, "step": 856 }, { "epoch": 0.8816872427983539, "grad_norm": 8.548869132995605, "learning_rate": 3.103842159916926e-05, "loss": 0.4969, "step": 857 }, { "epoch": 0.8827160493827161, "grad_norm": 13.353643417358398, "learning_rate": 3.107476635514018e-05, "loss": 1.1514, "step": 858 }, { "epoch": 0.8837448559670782, "grad_norm": 11.166017532348633, "learning_rate": 3.111111111111111e-05, "loss": 1.361, "step": 859 }, { "epoch": 0.8847736625514403, "grad_norm": 10.166590690612793, "learning_rate": 3.114745586708203e-05, "loss": 0.801, "step": 860 }, { "epoch": 0.8847736625514403, "eval_Qnli-dev_cosine_accuracy": 0.720703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7913862466812134, "eval_Qnli-dev_cosine_ap": 0.7484120022003069, "eval_Qnli-dev_cosine_f1": 0.7155635062611807, "eval_Qnli-dev_cosine_f1_threshold": 0.7564002275466919, "eval_Qnli-dev_cosine_precision": 0.6191950464396285, "eval_Qnli-dev_cosine_recall": 0.847457627118644, "eval_Qnli-dev_dot_accuracy": 0.669921875, "eval_Qnli-dev_dot_accuracy_threshold": 381.15460205078125, "eval_Qnli-dev_dot_ap": 0.6554039139593089, "eval_Qnli-dev_dot_f1": 0.6929982046678635, "eval_Qnli-dev_dot_f1_threshold": 375.46405029296875, "eval_Qnli-dev_dot_precision": 0.6012461059190031, "eval_Qnli-dev_dot_recall": 0.8177966101694916, "eval_Qnli-dev_euclidean_accuracy": 0.724609375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.634359359741211, "eval_Qnli-dev_euclidean_ap": 0.7543039429243505, "eval_Qnli-dev_euclidean_f1": 0.7129798903107861, "eval_Qnli-dev_euclidean_f1_threshold": 15.349479675292969, "eval_Qnli-dev_euclidean_precision": 0.6270096463022508, "eval_Qnli-dev_euclidean_recall": 0.826271186440678, "eval_Qnli-dev_manhattan_accuracy": 0.71484375, "eval_Qnli-dev_manhattan_accuracy_threshold": 285.07244873046875, "eval_Qnli-dev_manhattan_ap": 0.7579405691223697, "eval_Qnli-dev_manhattan_f1": 0.7142857142857143, "eval_Qnli-dev_manhattan_f1_threshold": 311.107421875, "eval_Qnli-dev_manhattan_precision": 0.6560283687943262, "eval_Qnli-dev_manhattan_recall": 0.7838983050847458, "eval_Qnli-dev_max_accuracy": 0.724609375, "eval_Qnli-dev_max_accuracy_threshold": 381.15460205078125, "eval_Qnli-dev_max_ap": 0.7579405691223697, "eval_Qnli-dev_max_f1": 0.7155635062611807, "eval_Qnli-dev_max_f1_threshold": 375.46405029296875, "eval_Qnli-dev_max_precision": 0.6560283687943262, "eval_Qnli-dev_max_recall": 0.847457627118644, "eval_allNLI-dev_cosine_accuracy": 0.736328125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8940784931182861, "eval_allNLI-dev_cosine_ap": 0.5955386793059732, "eval_allNLI-dev_cosine_f1": 0.6008064516129031, "eval_allNLI-dev_cosine_f1_threshold": 0.7910170555114746, "eval_allNLI-dev_cosine_precision": 0.4613003095975232, "eval_allNLI-dev_cosine_recall": 0.861271676300578, "eval_allNLI-dev_dot_accuracy": 0.69921875, "eval_allNLI-dev_dot_accuracy_threshold": 451.13623046875, "eval_allNLI-dev_dot_ap": 0.5115980330253447, "eval_allNLI-dev_dot_f1": 0.5823927765237021, "eval_allNLI-dev_dot_f1_threshold": 410.14447021484375, "eval_allNLI-dev_dot_precision": 0.4777777777777778, "eval_allNLI-dev_dot_recall": 0.7456647398843931, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.620901107788086, "eval_allNLI-dev_euclidean_ap": 0.5964863175832775, "eval_allNLI-dev_euclidean_f1": 0.6052104208416834, "eval_allNLI-dev_euclidean_f1_threshold": 14.621801376342773, "eval_allNLI-dev_euclidean_precision": 0.46319018404907975, "eval_allNLI-dev_euclidean_recall": 0.8728323699421965, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 226.17271423339844, "eval_allNLI-dev_manhattan_ap": 0.594460510313827, "eval_allNLI-dev_manhattan_f1": 0.6003976143141153, "eval_allNLI-dev_manhattan_f1_threshold": 303.3892517089844, "eval_allNLI-dev_manhattan_precision": 0.4575757575757576, "eval_allNLI-dev_manhattan_recall": 0.8728323699421965, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 451.13623046875, "eval_allNLI-dev_max_ap": 0.5964863175832775, "eval_allNLI-dev_max_f1": 0.6052104208416834, "eval_allNLI-dev_max_f1_threshold": 410.14447021484375, "eval_allNLI-dev_max_precision": 0.4777777777777778, "eval_allNLI-dev_max_recall": 0.8728323699421965, "eval_sequential_score": 0.7579405691223697, "eval_sts-test_pearson_cosine": 0.8116140949252031, "eval_sts-test_pearson_dot": 0.7799016605392657, "eval_sts-test_pearson_euclidean": 0.8407077538986545, "eval_sts-test_pearson_manhattan": 0.8400909131579789, "eval_sts-test_pearson_max": 0.8407077538986545, "eval_sts-test_spearman_cosine": 0.8376718769749885, "eval_sts-test_spearman_dot": 0.7616373358104539, "eval_sts-test_spearman_euclidean": 0.8340390590777574, "eval_sts-test_spearman_manhattan": 0.8319958059851489, "eval_sts-test_spearman_max": 0.8376718769749885, "eval_vitaminc-pairs_loss": 2.8492391109466553, "eval_vitaminc-pairs_runtime": 3.2108, "eval_vitaminc-pairs_samples_per_second": 39.866, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 860 }, { "epoch": 0.8847736625514403, "eval_negation-triplets_loss": 1.0788973569869995, "eval_negation-triplets_runtime": 0.7524, "eval_negation-triplets_samples_per_second": 170.133, "eval_negation-triplets_steps_per_second": 1.329, "step": 860 }, { "epoch": 0.8847736625514403, "eval_scitail-pairs-pos_loss": 0.19070731103420258, "eval_scitail-pairs-pos_runtime": 0.8535, "eval_scitail-pairs-pos_samples_per_second": 149.971, "eval_scitail-pairs-pos_steps_per_second": 1.172, "step": 860 }, { "epoch": 0.8847736625514403, "eval_scitail-pairs-qa_loss": 0.0008353625307790935, "eval_scitail-pairs-qa_runtime": 0.594, "eval_scitail-pairs-qa_samples_per_second": 215.481, "eval_scitail-pairs-qa_steps_per_second": 1.683, "step": 860 }, { "epoch": 0.8847736625514403, "eval_xsum-pairs_loss": 0.6224209666252136, "eval_xsum-pairs_runtime": 3.0265, "eval_xsum-pairs_samples_per_second": 42.293, "eval_xsum-pairs_steps_per_second": 0.33, "step": 860 }, { "epoch": 0.8847736625514403, "eval_sciq_pairs_loss": 0.12949666380882263, "eval_sciq_pairs_runtime": 3.4626, "eval_sciq_pairs_samples_per_second": 36.966, "eval_sciq_pairs_steps_per_second": 0.289, "step": 860 }, { "epoch": 0.8847736625514403, "eval_qasc_pairs_loss": 0.3247033953666687, "eval_qasc_pairs_runtime": 0.6062, "eval_qasc_pairs_samples_per_second": 211.165, "eval_qasc_pairs_steps_per_second": 1.65, "step": 860 }, { "epoch": 0.8847736625514403, "eval_openbookqa_pairs_loss": 1.1479803323745728, "eval_openbookqa_pairs_runtime": 0.6115, "eval_openbookqa_pairs_samples_per_second": 209.313, "eval_openbookqa_pairs_steps_per_second": 1.635, "step": 860 }, { "epoch": 0.8847736625514403, "eval_msmarco_pairs_loss": 1.1639130115509033, "eval_msmarco_pairs_runtime": 1.5482, "eval_msmarco_pairs_samples_per_second": 82.677, "eval_msmarco_pairs_steps_per_second": 0.646, "step": 860 }, { "epoch": 0.8847736625514403, "eval_nq_pairs_loss": 1.2727266550064087, "eval_nq_pairs_runtime": 2.8951, "eval_nq_pairs_samples_per_second": 44.212, "eval_nq_pairs_steps_per_second": 0.345, "step": 860 }, { "epoch": 0.8847736625514403, "eval_trivia_pairs_loss": 1.1261823177337646, "eval_trivia_pairs_runtime": 3.4344, "eval_trivia_pairs_samples_per_second": 37.27, "eval_trivia_pairs_steps_per_second": 0.291, "step": 860 }, { "epoch": 0.8847736625514403, "eval_gooaq_pairs_loss": 0.6438990831375122, "eval_gooaq_pairs_runtime": 0.949, "eval_gooaq_pairs_samples_per_second": 134.878, "eval_gooaq_pairs_steps_per_second": 1.054, "step": 860 }, { "epoch": 0.8847736625514403, "eval_paws-pos_loss": 0.02884558029472828, "eval_paws-pos_runtime": 0.6952, "eval_paws-pos_samples_per_second": 184.107, "eval_paws-pos_steps_per_second": 1.438, "step": 860 }, { "epoch": 0.8847736625514403, "eval_global_dataset_loss": 0.5700183510780334, "eval_global_dataset_runtime": 13.3817, "eval_global_dataset_samples_per_second": 31.087, "eval_global_dataset_steps_per_second": 0.299, "step": 860 }, { "epoch": 0.8858024691358025, "grad_norm": 14.423850059509277, "learning_rate": 3.118380062305296e-05, "loss": 1.5436, "step": 861 }, { "epoch": 0.8868312757201646, "grad_norm": 6.131687164306641, "learning_rate": 3.122014537902388e-05, "loss": 0.2666, "step": 862 }, { "epoch": 0.8878600823045267, "grad_norm": 8.292266845703125, "learning_rate": 3.1256490134994806e-05, "loss": 0.5436, "step": 863 }, { "epoch": 0.8888888888888888, "grad_norm": 15.915453910827637, "learning_rate": 3.1292834890965727e-05, "loss": 1.8489, "step": 864 }, { "epoch": 0.8899176954732511, "grad_norm": 15.952044486999512, "learning_rate": 3.1329179646936654e-05, "loss": 1.3624, "step": 865 }, { "epoch": 0.8909465020576132, "grad_norm": 0.0, "learning_rate": 3.136552440290758e-05, "loss": 0.0, "step": 866 }, { "epoch": 0.8919753086419753, "grad_norm": 8.352066993713379, "learning_rate": 3.14018691588785e-05, "loss": 0.5817, "step": 867 }, { "epoch": 0.8930041152263375, "grad_norm": 0.0, "learning_rate": 3.143821391484942e-05, "loss": 0.0, "step": 868 }, { "epoch": 0.8940329218106996, "grad_norm": 7.5998640060424805, "learning_rate": 3.147455867082035e-05, "loss": 0.4523, "step": 869 }, { "epoch": 0.8950617283950617, "grad_norm": 9.014819145202637, "learning_rate": 3.151090342679128e-05, "loss": 0.8566, "step": 870 }, { "epoch": 0.8960905349794238, "grad_norm": 9.435276985168457, "learning_rate": 3.15472481827622e-05, "loss": 0.5609, "step": 871 }, { "epoch": 0.897119341563786, "grad_norm": 6.9305219650268555, "learning_rate": 3.1583592938733126e-05, "loss": 0.4103, "step": 872 }, { "epoch": 0.8981481481481481, "grad_norm": 7.279191493988037, "learning_rate": 3.1619937694704046e-05, "loss": 0.4226, "step": 873 }, { "epoch": 0.8991769547325102, "grad_norm": 11.90969181060791, "learning_rate": 3.165628245067497e-05, "loss": 1.3344, "step": 874 }, { "epoch": 0.9002057613168725, "grad_norm": 1.5162785053253174, "learning_rate": 3.1692627206645894e-05, "loss": 0.0354, "step": 875 }, { "epoch": 0.9012345679012346, "grad_norm": 1.3723441362380981, "learning_rate": 3.172897196261682e-05, "loss": 0.0377, "step": 876 }, { "epoch": 0.9022633744855967, "grad_norm": 12.883326530456543, "learning_rate": 3.176531671858774e-05, "loss": 1.1204, "step": 877 }, { "epoch": 0.9032921810699589, "grad_norm": 0.0, "learning_rate": 3.180166147455867e-05, "loss": 0.0, "step": 878 }, { "epoch": 0.904320987654321, "grad_norm": 8.576708793640137, "learning_rate": 3.183800623052959e-05, "loss": 0.4222, "step": 879 }, { "epoch": 0.9053497942386831, "grad_norm": 9.18001651763916, "learning_rate": 3.187435098650052e-05, "loss": 0.5895, "step": 880 }, { "epoch": 0.9053497942386831, "eval_Qnli-dev_cosine_accuracy": 0.7109375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7849442958831787, "eval_Qnli-dev_cosine_ap": 0.7532800500199506, "eval_Qnli-dev_cosine_f1": 0.6976744186046511, "eval_Qnli-dev_cosine_f1_threshold": 0.7686007022857666, "eval_Qnli-dev_cosine_precision": 0.6428571428571429, "eval_Qnli-dev_cosine_recall": 0.7627118644067796, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 410.07305908203125, "eval_Qnli-dev_dot_ap": 0.6772857891299546, "eval_Qnli-dev_dot_f1": 0.6719745222929936, "eval_Qnli-dev_dot_f1_threshold": 344.32025146484375, "eval_Qnli-dev_dot_precision": 0.5382653061224489, "eval_Qnli-dev_dot_recall": 0.8940677966101694, "eval_Qnli-dev_euclidean_accuracy": 0.71484375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.577789306640625, "eval_Qnli-dev_euclidean_ap": 0.7582704343457749, "eval_Qnli-dev_euclidean_f1": 0.703125, "eval_Qnli-dev_euclidean_f1_threshold": 15.108866691589355, "eval_Qnli-dev_euclidean_precision": 0.6521739130434783, "eval_Qnli-dev_euclidean_recall": 0.7627118644067796, "eval_Qnli-dev_manhattan_accuracy": 0.712890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 296.60125732421875, "eval_Qnli-dev_manhattan_ap": 0.7626076733761872, "eval_Qnli-dev_manhattan_f1": 0.7037701974865348, "eval_Qnli-dev_manhattan_f1_threshold": 324.86553955078125, "eval_Qnli-dev_manhattan_precision": 0.6105919003115264, "eval_Qnli-dev_manhattan_recall": 0.8305084745762712, "eval_Qnli-dev_max_accuracy": 0.71484375, "eval_Qnli-dev_max_accuracy_threshold": 410.07305908203125, "eval_Qnli-dev_max_ap": 0.7626076733761872, "eval_Qnli-dev_max_f1": 0.7037701974865348, "eval_Qnli-dev_max_f1_threshold": 344.32025146484375, "eval_Qnli-dev_max_precision": 0.6521739130434783, "eval_Qnli-dev_max_recall": 0.8940677966101694, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8640371561050415, "eval_allNLI-dev_cosine_ap": 0.5971175118697716, "eval_allNLI-dev_cosine_f1": 0.5973451327433628, "eval_allNLI-dev_cosine_f1_threshold": 0.7674254179000854, "eval_allNLI-dev_cosine_precision": 0.4838709677419355, "eval_allNLI-dev_cosine_recall": 0.7803468208092486, "eval_allNLI-dev_dot_accuracy": 0.6953125, "eval_allNLI-dev_dot_accuracy_threshold": 423.58746337890625, "eval_allNLI-dev_dot_ap": 0.5249909079017288, "eval_allNLI-dev_dot_f1": 0.5751879699248119, "eval_allNLI-dev_dot_f1_threshold": 344.40423583984375, "eval_allNLI-dev_dot_precision": 0.42618384401114207, "eval_allNLI-dev_dot_recall": 0.884393063583815, "eval_allNLI-dev_euclidean_accuracy": 0.73046875, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.598902702331543, "eval_allNLI-dev_euclidean_ap": 0.6004950564369994, "eval_allNLI-dev_euclidean_f1": 0.5964125560538117, "eval_allNLI-dev_euclidean_f1_threshold": 14.896963119506836, "eval_allNLI-dev_euclidean_precision": 0.48717948717948717, "eval_allNLI-dev_euclidean_recall": 0.7687861271676301, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 225.4422149658203, "eval_allNLI-dev_manhattan_ap": 0.5975087335410215, "eval_allNLI-dev_manhattan_f1": 0.5961945031712473, "eval_allNLI-dev_manhattan_f1_threshold": 320.1939697265625, "eval_allNLI-dev_manhattan_precision": 0.47, "eval_allNLI-dev_manhattan_recall": 0.815028901734104, "eval_allNLI-dev_max_accuracy": 0.732421875, "eval_allNLI-dev_max_accuracy_threshold": 423.58746337890625, "eval_allNLI-dev_max_ap": 0.6004950564369994, "eval_allNLI-dev_max_f1": 0.5973451327433628, "eval_allNLI-dev_max_f1_threshold": 344.40423583984375, "eval_allNLI-dev_max_precision": 0.48717948717948717, "eval_allNLI-dev_max_recall": 0.884393063583815, "eval_sequential_score": 0.7626076733761872, "eval_sts-test_pearson_cosine": 0.8236499611459422, "eval_sts-test_pearson_dot": 0.7997972279606418, "eval_sts-test_pearson_euclidean": 0.8506072277669228, "eval_sts-test_pearson_manhattan": 0.8491673694905203, "eval_sts-test_pearson_max": 0.8506072277669228, "eval_sts-test_spearman_cosine": 0.8462692655571971, "eval_sts-test_spearman_dot": 0.7806738958886958, "eval_sts-test_spearman_euclidean": 0.8433683545400226, "eval_sts-test_spearman_manhattan": 0.8414020288657458, "eval_sts-test_spearman_max": 0.8462692655571971, "eval_vitaminc-pairs_loss": 2.9979610443115234, "eval_vitaminc-pairs_runtime": 3.229, "eval_vitaminc-pairs_samples_per_second": 39.641, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 880 }, { "epoch": 0.9053497942386831, "eval_negation-triplets_loss": 1.1333051919937134, "eval_negation-triplets_runtime": 0.7676, "eval_negation-triplets_samples_per_second": 166.759, "eval_negation-triplets_steps_per_second": 1.303, "step": 880 }, { "epoch": 0.9053497942386831, "eval_scitail-pairs-pos_loss": 0.1798580139875412, "eval_scitail-pairs-pos_runtime": 0.8519, "eval_scitail-pairs-pos_samples_per_second": 150.255, "eval_scitail-pairs-pos_steps_per_second": 1.174, "step": 880 }, { "epoch": 0.9053497942386831, "eval_scitail-pairs-qa_loss": 0.001077975844964385, "eval_scitail-pairs-qa_runtime": 0.5945, "eval_scitail-pairs-qa_samples_per_second": 215.312, "eval_scitail-pairs-qa_steps_per_second": 1.682, "step": 880 }, { "epoch": 0.9053497942386831, "eval_xsum-pairs_loss": 0.5809869170188904, "eval_xsum-pairs_runtime": 3.0246, "eval_xsum-pairs_samples_per_second": 42.32, "eval_xsum-pairs_steps_per_second": 0.331, "step": 880 }, { "epoch": 0.9053497942386831, "eval_sciq_pairs_loss": 0.12553882598876953, "eval_sciq_pairs_runtime": 3.4868, "eval_sciq_pairs_samples_per_second": 36.71, "eval_sciq_pairs_steps_per_second": 0.287, "step": 880 }, { "epoch": 0.9053497942386831, "eval_qasc_pairs_loss": 0.36939769983291626, "eval_qasc_pairs_runtime": 0.6232, "eval_qasc_pairs_samples_per_second": 205.388, "eval_qasc_pairs_steps_per_second": 1.605, "step": 880 }, { "epoch": 0.9053497942386831, "eval_openbookqa_pairs_loss": 1.1518361568450928, "eval_openbookqa_pairs_runtime": 0.5953, "eval_openbookqa_pairs_samples_per_second": 215.007, "eval_openbookqa_pairs_steps_per_second": 1.68, "step": 880 }, { "epoch": 0.9053497942386831, "eval_msmarco_pairs_loss": 1.1596477031707764, "eval_msmarco_pairs_runtime": 1.5245, "eval_msmarco_pairs_samples_per_second": 83.963, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 880 }, { "epoch": 0.9053497942386831, "eval_nq_pairs_loss": 1.362251877784729, "eval_nq_pairs_runtime": 2.9215, "eval_nq_pairs_samples_per_second": 43.813, "eval_nq_pairs_steps_per_second": 0.342, "step": 880 }, { "epoch": 0.9053497942386831, "eval_trivia_pairs_loss": 1.1808240413665771, "eval_trivia_pairs_runtime": 3.4414, "eval_trivia_pairs_samples_per_second": 37.194, "eval_trivia_pairs_steps_per_second": 0.291, "step": 880 }, { "epoch": 0.9053497942386831, "eval_gooaq_pairs_loss": 0.5690011382102966, "eval_gooaq_pairs_runtime": 0.9675, "eval_gooaq_pairs_samples_per_second": 132.304, "eval_gooaq_pairs_steps_per_second": 1.034, "step": 880 }, { "epoch": 0.9053497942386831, "eval_paws-pos_loss": 0.026389779523015022, "eval_paws-pos_runtime": 0.7184, "eval_paws-pos_samples_per_second": 178.184, "eval_paws-pos_steps_per_second": 1.392, "step": 880 }, { "epoch": 0.9053497942386831, "eval_global_dataset_loss": 0.6112414598464966, "eval_global_dataset_runtime": 13.4861, "eval_global_dataset_samples_per_second": 30.847, "eval_global_dataset_steps_per_second": 0.297, "step": 880 }, { "epoch": 0.9063786008230452, "grad_norm": 10.299396514892578, "learning_rate": 3.191069574247144e-05, "loss": 0.645, "step": 881 }, { "epoch": 0.9074074074074074, "grad_norm": 9.826093673706055, "learning_rate": 3.1947040498442366e-05, "loss": 0.593, "step": 882 }, { "epoch": 0.9084362139917695, "grad_norm": 8.907341003417969, "learning_rate": 3.1983385254413286e-05, "loss": 0.4322, "step": 883 }, { "epoch": 0.9094650205761317, "grad_norm": 2.4184072017669678, "learning_rate": 3.2019730010384214e-05, "loss": 0.0629, "step": 884 }, { "epoch": 0.9104938271604939, "grad_norm": 10.5604829788208, "learning_rate": 3.2056074766355134e-05, "loss": 1.0485, "step": 885 }, { "epoch": 0.911522633744856, "grad_norm": 7.115394592285156, "learning_rate": 3.209241952232606e-05, "loss": 0.3749, "step": 886 }, { "epoch": 0.9125514403292181, "grad_norm": 0.8468412756919861, "learning_rate": 3.212876427829699e-05, "loss": 0.0211, "step": 887 }, { "epoch": 0.9135802469135802, "grad_norm": 8.189038276672363, "learning_rate": 3.216510903426791e-05, "loss": 0.4037, "step": 888 }, { "epoch": 0.9146090534979424, "grad_norm": 13.44206428527832, "learning_rate": 3.220145379023883e-05, "loss": 1.3921, "step": 889 }, { "epoch": 0.9156378600823045, "grad_norm": 19.440120697021484, "learning_rate": 3.223779854620976e-05, "loss": 1.6863, "step": 890 }, { "epoch": 0.9166666666666666, "grad_norm": 10.532525062561035, "learning_rate": 3.2274143302180685e-05, "loss": 0.7386, "step": 891 }, { "epoch": 0.9176954732510288, "grad_norm": 17.706409454345703, "learning_rate": 3.2310488058151606e-05, "loss": 1.4284, "step": 892 }, { "epoch": 0.918724279835391, "grad_norm": 9.419487953186035, "learning_rate": 3.234683281412253e-05, "loss": 0.9042, "step": 893 }, { "epoch": 0.9197530864197531, "grad_norm": 8.609904289245605, "learning_rate": 3.2383177570093454e-05, "loss": 0.5269, "step": 894 }, { "epoch": 0.9207818930041153, "grad_norm": 1.3929104804992676, "learning_rate": 3.2419522326064375e-05, "loss": 0.0262, "step": 895 }, { "epoch": 0.9218106995884774, "grad_norm": 9.115885734558105, "learning_rate": 3.24558670820353e-05, "loss": 0.4829, "step": 896 }, { "epoch": 0.9228395061728395, "grad_norm": 2.3302252292633057, "learning_rate": 3.249221183800623e-05, "loss": 0.0402, "step": 897 }, { "epoch": 0.9238683127572016, "grad_norm": 16.796581268310547, "learning_rate": 3.252855659397715e-05, "loss": 1.7338, "step": 898 }, { "epoch": 0.9248971193415638, "grad_norm": 8.234830856323242, "learning_rate": 3.256490134994808e-05, "loss": 0.5067, "step": 899 }, { "epoch": 0.9259259259259259, "grad_norm": 0.0, "learning_rate": 3.2601246105919e-05, "loss": 0.0, "step": 900 }, { "epoch": 0.9259259259259259, "eval_Qnli-dev_cosine_accuracy": 0.720703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7808551788330078, "eval_Qnli-dev_cosine_ap": 0.7722935104666544, "eval_Qnli-dev_cosine_f1": 0.7180451127819548, "eval_Qnli-dev_cosine_f1_threshold": 0.7518417835235596, "eval_Qnli-dev_cosine_precision": 0.6452702702702703, "eval_Qnli-dev_cosine_recall": 0.809322033898305, "eval_Qnli-dev_dot_accuracy": 0.68359375, "eval_Qnli-dev_dot_accuracy_threshold": 375.266357421875, "eval_Qnli-dev_dot_ap": 0.705847017080601, "eval_Qnli-dev_dot_f1": 0.693103448275862, "eval_Qnli-dev_dot_f1_threshold": 338.9654541015625, "eval_Qnli-dev_dot_precision": 0.5843023255813954, "eval_Qnli-dev_dot_recall": 0.8516949152542372, "eval_Qnli-dev_euclidean_accuracy": 0.728515625, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.228927612304688, "eval_Qnli-dev_euclidean_ap": 0.7773104444806713, "eval_Qnli-dev_euclidean_f1": 0.71875, "eval_Qnli-dev_euclidean_f1_threshold": 14.866127014160156, "eval_Qnli-dev_euclidean_precision": 0.6666666666666666, "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, "eval_Qnli-dev_manhattan_accuracy": 0.732421875, "eval_Qnli-dev_manhattan_accuracy_threshold": 300.7906494140625, "eval_Qnli-dev_manhattan_ap": 0.7788691432055757, "eval_Qnli-dev_manhattan_f1": 0.7186858316221767, "eval_Qnli-dev_manhattan_f1_threshold": 300.7906494140625, "eval_Qnli-dev_manhattan_precision": 0.6972111553784861, "eval_Qnli-dev_manhattan_recall": 0.7415254237288136, "eval_Qnli-dev_max_accuracy": 0.732421875, "eval_Qnli-dev_max_accuracy_threshold": 375.266357421875, "eval_Qnli-dev_max_ap": 0.7788691432055757, "eval_Qnli-dev_max_f1": 0.71875, "eval_Qnli-dev_max_f1_threshold": 338.9654541015625, "eval_Qnli-dev_max_precision": 0.6972111553784861, "eval_Qnli-dev_max_recall": 0.8516949152542372, "eval_allNLI-dev_cosine_accuracy": 0.736328125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8723114728927612, "eval_allNLI-dev_cosine_ap": 0.6103803128378515, "eval_allNLI-dev_cosine_f1": 0.6218487394957984, "eval_allNLI-dev_cosine_f1_threshold": 0.7703201770782471, "eval_allNLI-dev_cosine_precision": 0.4884488448844885, "eval_allNLI-dev_cosine_recall": 0.8554913294797688, "eval_allNLI-dev_dot_accuracy": 0.6875, "eval_allNLI-dev_dot_accuracy_threshold": 450.7716064453125, "eval_allNLI-dev_dot_ap": 0.5404695476141235, "eval_allNLI-dev_dot_f1": 0.581532416502947, "eval_allNLI-dev_dot_f1_threshold": 342.1371765136719, "eval_allNLI-dev_dot_precision": 0.44047619047619047, "eval_allNLI-dev_dot_recall": 0.8554913294797688, "eval_allNLI-dev_euclidean_accuracy": 0.7421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.053747177124023, "eval_allNLI-dev_euclidean_ap": 0.6129537867372233, "eval_allNLI-dev_euclidean_f1": 0.6365591397849463, "eval_allNLI-dev_euclidean_f1_threshold": 14.339694023132324, "eval_allNLI-dev_euclidean_precision": 0.5068493150684932, "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, "eval_allNLI-dev_manhattan_accuracy": 0.736328125, "eval_allNLI-dev_manhattan_accuracy_threshold": 228.95936584472656, "eval_allNLI-dev_manhattan_ap": 0.6105307113025781, "eval_allNLI-dev_manhattan_f1": 0.6291666666666667, "eval_allNLI-dev_manhattan_f1_threshold": 303.91961669921875, "eval_allNLI-dev_manhattan_precision": 0.49185667752442996, "eval_allNLI-dev_manhattan_recall": 0.8728323699421965, "eval_allNLI-dev_max_accuracy": 0.7421875, "eval_allNLI-dev_max_accuracy_threshold": 450.7716064453125, "eval_allNLI-dev_max_ap": 0.6129537867372233, "eval_allNLI-dev_max_f1": 0.6365591397849463, "eval_allNLI-dev_max_f1_threshold": 342.1371765136719, "eval_allNLI-dev_max_precision": 0.5068493150684932, "eval_allNLI-dev_max_recall": 0.8728323699421965, "eval_sequential_score": 0.7788691432055757, "eval_sts-test_pearson_cosine": 0.821250155739432, "eval_sts-test_pearson_dot": 0.7884449998485221, "eval_sts-test_pearson_euclidean": 0.8476165058712835, "eval_sts-test_pearson_manhattan": 0.8428075499119236, "eval_sts-test_pearson_max": 0.8476165058712835, "eval_sts-test_spearman_cosine": 0.8461122781322361, "eval_sts-test_spearman_dot": 0.7783341209381078, "eval_sts-test_spearman_euclidean": 0.8401831835104896, "eval_sts-test_spearman_manhattan": 0.8381286083487489, "eval_sts-test_spearman_max": 0.8461122781322361, "eval_vitaminc-pairs_loss": 2.6282498836517334, "eval_vitaminc-pairs_runtime": 3.2006, "eval_vitaminc-pairs_samples_per_second": 39.992, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 900 }, { "epoch": 0.9259259259259259, "eval_negation-triplets_loss": 1.0272082090377808, "eval_negation-triplets_runtime": 0.7536, "eval_negation-triplets_samples_per_second": 169.85, "eval_negation-triplets_steps_per_second": 1.327, "step": 900 }, { "epoch": 0.9259259259259259, "eval_scitail-pairs-pos_loss": 0.1884053498506546, "eval_scitail-pairs-pos_runtime": 0.8451, "eval_scitail-pairs-pos_samples_per_second": 151.452, "eval_scitail-pairs-pos_steps_per_second": 1.183, "step": 900 }, { "epoch": 0.9259259259259259, "eval_scitail-pairs-qa_loss": 0.0004345515335444361, "eval_scitail-pairs-qa_runtime": 0.5987, "eval_scitail-pairs-qa_samples_per_second": 213.797, "eval_scitail-pairs-qa_steps_per_second": 1.67, "step": 900 }, { "epoch": 0.9259259259259259, "eval_xsum-pairs_loss": 0.5701841711997986, "eval_xsum-pairs_runtime": 3.0401, "eval_xsum-pairs_samples_per_second": 42.103, "eval_xsum-pairs_steps_per_second": 0.329, "step": 900 }, { "epoch": 0.9259259259259259, "eval_sciq_pairs_loss": 0.12735481560230255, "eval_sciq_pairs_runtime": 3.4518, "eval_sciq_pairs_samples_per_second": 37.082, "eval_sciq_pairs_steps_per_second": 0.29, "step": 900 }, { "epoch": 0.9259259259259259, "eval_qasc_pairs_loss": 0.33931973576545715, "eval_qasc_pairs_runtime": 0.6061, "eval_qasc_pairs_samples_per_second": 211.189, "eval_qasc_pairs_steps_per_second": 1.65, "step": 900 }, { "epoch": 0.9259259259259259, "eval_openbookqa_pairs_loss": 1.055425763130188, "eval_openbookqa_pairs_runtime": 0.5844, "eval_openbookqa_pairs_samples_per_second": 219.038, "eval_openbookqa_pairs_steps_per_second": 1.711, "step": 900 }, { "epoch": 0.9259259259259259, "eval_msmarco_pairs_loss": 1.1918123960494995, "eval_msmarco_pairs_runtime": 1.522, "eval_msmarco_pairs_samples_per_second": 84.102, "eval_msmarco_pairs_steps_per_second": 0.657, "step": 900 }, { "epoch": 0.9259259259259259, "eval_nq_pairs_loss": 1.2023570537567139, "eval_nq_pairs_runtime": 2.8987, "eval_nq_pairs_samples_per_second": 44.157, "eval_nq_pairs_steps_per_second": 0.345, "step": 900 }, { "epoch": 0.9259259259259259, "eval_trivia_pairs_loss": 1.2772942781448364, "eval_trivia_pairs_runtime": 3.4497, "eval_trivia_pairs_samples_per_second": 37.104, "eval_trivia_pairs_steps_per_second": 0.29, "step": 900 }, { "epoch": 0.9259259259259259, "eval_gooaq_pairs_loss": 0.561891496181488, "eval_gooaq_pairs_runtime": 0.9472, "eval_gooaq_pairs_samples_per_second": 135.133, "eval_gooaq_pairs_steps_per_second": 1.056, "step": 900 }, { "epoch": 0.9259259259259259, "eval_paws-pos_loss": 0.028995605185627937, "eval_paws-pos_runtime": 0.6977, "eval_paws-pos_samples_per_second": 183.452, "eval_paws-pos_steps_per_second": 1.433, "step": 900 }, { "epoch": 0.9259259259259259, "eval_global_dataset_loss": 0.5420277118682861, "eval_global_dataset_runtime": 13.3831, "eval_global_dataset_samples_per_second": 31.084, "eval_global_dataset_steps_per_second": 0.299, "step": 900 }, { "epoch": 0.926954732510288, "grad_norm": 11.65727710723877, "learning_rate": 3.2637590861889925e-05, "loss": 1.1782, "step": 901 }, { "epoch": 0.9279835390946503, "grad_norm": 16.935638427734375, "learning_rate": 3.267393561786085e-05, "loss": 2.7273, "step": 902 }, { "epoch": 0.9290123456790124, "grad_norm": 9.754632949829102, "learning_rate": 3.2710280373831774e-05, "loss": 0.4795, "step": 903 }, { "epoch": 0.9300411522633745, "grad_norm": 8.847827911376953, "learning_rate": 3.2746625129802694e-05, "loss": 0.5948, "step": 904 }, { "epoch": 0.9310699588477366, "grad_norm": 18.149011611938477, "learning_rate": 3.278296988577362e-05, "loss": 1.8623, "step": 905 }, { "epoch": 0.9320987654320988, "grad_norm": 14.660048484802246, "learning_rate": 3.281931464174454e-05, "loss": 1.5336, "step": 906 }, { "epoch": 0.9331275720164609, "grad_norm": 7.183665752410889, "learning_rate": 3.285565939771547e-05, "loss": 0.3394, "step": 907 }, { "epoch": 0.934156378600823, "grad_norm": 2.1199798583984375, "learning_rate": 3.28920041536864e-05, "loss": 0.048, "step": 908 }, { "epoch": 0.9351851851851852, "grad_norm": 11.716431617736816, "learning_rate": 3.292834890965732e-05, "loss": 1.326, "step": 909 }, { "epoch": 0.9362139917695473, "grad_norm": 9.196380615234375, "learning_rate": 3.296469366562824e-05, "loss": 1.0024, "step": 910 }, { "epoch": 0.9372427983539094, "grad_norm": 8.285309791564941, "learning_rate": 3.3001038421599166e-05, "loss": 0.5757, "step": 911 }, { "epoch": 0.9382716049382716, "grad_norm": 13.583939552307129, "learning_rate": 3.303738317757009e-05, "loss": 1.3069, "step": 912 }, { "epoch": 0.9393004115226338, "grad_norm": 8.123488426208496, "learning_rate": 3.3073727933541014e-05, "loss": 0.5979, "step": 913 }, { "epoch": 0.9403292181069959, "grad_norm": 2.981621503829956, "learning_rate": 3.311007268951194e-05, "loss": 0.0798, "step": 914 }, { "epoch": 0.941358024691358, "grad_norm": 9.019003868103027, "learning_rate": 3.314641744548286e-05, "loss": 0.4823, "step": 915 }, { "epoch": 0.9423868312757202, "grad_norm": 6.8525261878967285, "learning_rate": 3.318276220145379e-05, "loss": 0.3875, "step": 916 }, { "epoch": 0.9434156378600823, "grad_norm": 12.951005935668945, "learning_rate": 3.321910695742471e-05, "loss": 1.4076, "step": 917 }, { "epoch": 0.9444444444444444, "grad_norm": 7.870869159698486, "learning_rate": 3.325545171339564e-05, "loss": 0.3932, "step": 918 }, { "epoch": 0.9454732510288066, "grad_norm": 0.0, "learning_rate": 3.329179646936656e-05, "loss": 0.0, "step": 919 }, { "epoch": 0.9465020576131687, "grad_norm": 14.922693252563477, "learning_rate": 3.3328141225337485e-05, "loss": 1.3184, "step": 920 }, { "epoch": 0.9465020576131687, "eval_Qnli-dev_cosine_accuracy": 0.7109375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7943984866142273, "eval_Qnli-dev_cosine_ap": 0.7615856123966769, "eval_Qnli-dev_cosine_f1": 0.7084078711985689, "eval_Qnli-dev_cosine_f1_threshold": 0.7604844570159912, "eval_Qnli-dev_cosine_precision": 0.6130030959752322, "eval_Qnli-dev_cosine_recall": 0.8389830508474576, "eval_Qnli-dev_dot_accuracy": 0.671875, "eval_Qnli-dev_dot_accuracy_threshold": 397.829833984375, "eval_Qnli-dev_dot_ap": 0.704400855131843, "eval_Qnli-dev_dot_f1": 0.6764227642276421, "eval_Qnli-dev_dot_f1_threshold": 353.55364990234375, "eval_Qnli-dev_dot_precision": 0.5488126649076517, "eval_Qnli-dev_dot_recall": 0.8813559322033898, "eval_Qnli-dev_euclidean_accuracy": 0.720703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.625345230102539, "eval_Qnli-dev_euclidean_ap": 0.7640414687041563, "eval_Qnli-dev_euclidean_f1": 0.7021276595744681, "eval_Qnli-dev_euclidean_f1_threshold": 15.56657600402832, "eval_Qnli-dev_euclidean_precision": 0.6036585365853658, "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, "eval_Qnli-dev_manhattan_accuracy": 0.71484375, "eval_Qnli-dev_manhattan_accuracy_threshold": 268.2149353027344, "eval_Qnli-dev_manhattan_ap": 0.7651119124586969, "eval_Qnli-dev_manhattan_f1": 0.701492537313433, "eval_Qnli-dev_manhattan_f1_threshold": 313.13140869140625, "eval_Qnli-dev_manhattan_precision": 0.6266666666666667, "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, "eval_Qnli-dev_max_accuracy": 0.720703125, "eval_Qnli-dev_max_accuracy_threshold": 397.829833984375, "eval_Qnli-dev_max_ap": 0.7651119124586969, "eval_Qnli-dev_max_f1": 0.7084078711985689, "eval_Qnli-dev_max_f1_threshold": 353.55364990234375, "eval_Qnli-dev_max_precision": 0.6266666666666667, "eval_Qnli-dev_max_recall": 0.8813559322033898, "eval_allNLI-dev_cosine_accuracy": 0.734375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8742256164550781, "eval_allNLI-dev_cosine_ap": 0.6047802392072363, "eval_allNLI-dev_cosine_f1": 0.6266666666666667, "eval_allNLI-dev_cosine_f1_threshold": 0.8091484308242798, "eval_allNLI-dev_cosine_precision": 0.5090252707581228, "eval_allNLI-dev_cosine_recall": 0.815028901734104, "eval_allNLI-dev_dot_accuracy": 0.6875, "eval_allNLI-dev_dot_accuracy_threshold": 447.2115478515625, "eval_allNLI-dev_dot_ap": 0.5177728257758492, "eval_allNLI-dev_dot_f1": 0.5864978902953586, "eval_allNLI-dev_dot_f1_threshold": 374.89508056640625, "eval_allNLI-dev_dot_precision": 0.46179401993355484, "eval_allNLI-dev_dot_recall": 0.8034682080924855, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.74878978729248, "eval_allNLI-dev_euclidean_ap": 0.6093991550091531, "eval_allNLI-dev_euclidean_f1": 0.6281755196304851, "eval_allNLI-dev_euclidean_f1_threshold": 13.34119987487793, "eval_allNLI-dev_euclidean_precision": 0.5230769230769231, "eval_allNLI-dev_euclidean_recall": 0.7861271676300579, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 218.954833984375, "eval_allNLI-dev_manhattan_ap": 0.6071347019543764, "eval_allNLI-dev_manhattan_f1": 0.6219512195121951, "eval_allNLI-dev_manhattan_f1_threshold": 300.4981994628906, "eval_allNLI-dev_manhattan_precision": 0.47962382445141066, "eval_allNLI-dev_manhattan_recall": 0.884393063583815, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 447.2115478515625, "eval_allNLI-dev_max_ap": 0.6093991550091531, "eval_allNLI-dev_max_f1": 0.6281755196304851, "eval_allNLI-dev_max_f1_threshold": 374.89508056640625, "eval_allNLI-dev_max_precision": 0.5230769230769231, "eval_allNLI-dev_max_recall": 0.884393063583815, "eval_sequential_score": 0.7651119124586969, "eval_sts-test_pearson_cosine": 0.810519241797423, "eval_sts-test_pearson_dot": 0.7728322842621618, "eval_sts-test_pearson_euclidean": 0.8405931437646742, "eval_sts-test_pearson_manhattan": 0.8368109506847808, "eval_sts-test_pearson_max": 0.8405931437646742, "eval_sts-test_spearman_cosine": 0.8397552719490651, "eval_sts-test_spearman_dot": 0.7535431751625915, "eval_sts-test_spearman_euclidean": 0.8350731712259899, "eval_sts-test_spearman_manhattan": 0.8328478700020412, "eval_sts-test_spearman_max": 0.8397552719490651, "eval_vitaminc-pairs_loss": 2.8251101970672607, "eval_vitaminc-pairs_runtime": 3.2059, "eval_vitaminc-pairs_samples_per_second": 39.927, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 920 }, { "epoch": 0.9465020576131687, "eval_negation-triplets_loss": 1.0231643915176392, "eval_negation-triplets_runtime": 0.7426, "eval_negation-triplets_samples_per_second": 172.378, "eval_negation-triplets_steps_per_second": 1.347, "step": 920 }, { "epoch": 0.9465020576131687, "eval_scitail-pairs-pos_loss": 0.1975163370370865, "eval_scitail-pairs-pos_runtime": 0.8442, "eval_scitail-pairs-pos_samples_per_second": 151.623, "eval_scitail-pairs-pos_steps_per_second": 1.185, "step": 920 }, { "epoch": 0.9465020576131687, "eval_scitail-pairs-qa_loss": 0.0008590650395490229, "eval_scitail-pairs-qa_runtime": 0.5803, "eval_scitail-pairs-qa_samples_per_second": 220.575, "eval_scitail-pairs-qa_steps_per_second": 1.723, "step": 920 }, { "epoch": 0.9465020576131687, "eval_xsum-pairs_loss": 0.602358877658844, "eval_xsum-pairs_runtime": 3.0201, "eval_xsum-pairs_samples_per_second": 42.382, "eval_xsum-pairs_steps_per_second": 0.331, "step": 920 }, { "epoch": 0.9465020576131687, "eval_sciq_pairs_loss": 0.10820051282644272, "eval_sciq_pairs_runtime": 3.4586, "eval_sciq_pairs_samples_per_second": 37.009, "eval_sciq_pairs_steps_per_second": 0.289, "step": 920 }, { "epoch": 0.9465020576131687, "eval_qasc_pairs_loss": 0.38339564204216003, "eval_qasc_pairs_runtime": 0.6142, "eval_qasc_pairs_samples_per_second": 208.397, "eval_qasc_pairs_steps_per_second": 1.628, "step": 920 }, { "epoch": 0.9465020576131687, "eval_openbookqa_pairs_loss": 1.1135048866271973, "eval_openbookqa_pairs_runtime": 0.5908, "eval_openbookqa_pairs_samples_per_second": 216.637, "eval_openbookqa_pairs_steps_per_second": 1.692, "step": 920 }, { "epoch": 0.9465020576131687, "eval_msmarco_pairs_loss": 1.1654598712921143, "eval_msmarco_pairs_runtime": 1.519, "eval_msmarco_pairs_samples_per_second": 84.267, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 920 }, { "epoch": 0.9465020576131687, "eval_nq_pairs_loss": 1.1468371152877808, "eval_nq_pairs_runtime": 2.8907, "eval_nq_pairs_samples_per_second": 44.279, "eval_nq_pairs_steps_per_second": 0.346, "step": 920 }, { "epoch": 0.9465020576131687, "eval_trivia_pairs_loss": 1.069029688835144, "eval_trivia_pairs_runtime": 3.4494, "eval_trivia_pairs_samples_per_second": 37.107, "eval_trivia_pairs_steps_per_second": 0.29, "step": 920 }, { "epoch": 0.9465020576131687, "eval_gooaq_pairs_loss": 0.5463513731956482, "eval_gooaq_pairs_runtime": 0.9528, "eval_gooaq_pairs_samples_per_second": 134.336, "eval_gooaq_pairs_steps_per_second": 1.049, "step": 920 }, { "epoch": 0.9465020576131687, "eval_paws-pos_loss": 0.02908269874751568, "eval_paws-pos_runtime": 0.6915, "eval_paws-pos_samples_per_second": 185.109, "eval_paws-pos_steps_per_second": 1.446, "step": 920 }, { "epoch": 0.9465020576131687, "eval_global_dataset_loss": 0.548460066318512, "eval_global_dataset_runtime": 13.4034, "eval_global_dataset_samples_per_second": 31.037, "eval_global_dataset_steps_per_second": 0.298, "step": 920 }, { "epoch": 0.9475308641975309, "grad_norm": 11.01314640045166, "learning_rate": 3.3364485981308406e-05, "loss": 0.7138, "step": 921 }, { "epoch": 0.948559670781893, "grad_norm": 11.776330947875977, "learning_rate": 3.340083073727933e-05, "loss": 1.3098, "step": 922 }, { "epoch": 0.9495884773662552, "grad_norm": 8.179085731506348, "learning_rate": 3.343717549325026e-05, "loss": 0.4221, "step": 923 }, { "epoch": 0.9506172839506173, "grad_norm": 9.425230979919434, "learning_rate": 3.347352024922118e-05, "loss": 0.6967, "step": 924 }, { "epoch": 0.9516460905349794, "grad_norm": 15.566692352294922, "learning_rate": 3.35098650051921e-05, "loss": 1.5207, "step": 925 }, { "epoch": 0.9526748971193416, "grad_norm": 9.330801010131836, "learning_rate": 3.354620976116303e-05, "loss": 0.5197, "step": 926 }, { "epoch": 0.9537037037037037, "grad_norm": 3.745374917984009, "learning_rate": 3.358255451713395e-05, "loss": 0.0581, "step": 927 }, { "epoch": 0.9547325102880658, "grad_norm": 8.182941436767578, "learning_rate": 3.361889927310488e-05, "loss": 0.4411, "step": 928 }, { "epoch": 0.9557613168724279, "grad_norm": 8.651951789855957, "learning_rate": 3.3655244029075805e-05, "loss": 0.4771, "step": 929 }, { "epoch": 0.9567901234567902, "grad_norm": 8.148431777954102, "learning_rate": 3.3691588785046725e-05, "loss": 0.366, "step": 930 }, { "epoch": 0.9578189300411523, "grad_norm": 12.771159172058105, "learning_rate": 3.3727933541017646e-05, "loss": 1.1728, "step": 931 }, { "epoch": 0.9588477366255144, "grad_norm": 7.892995834350586, "learning_rate": 3.3764278296988573e-05, "loss": 0.473, "step": 932 }, { "epoch": 0.9598765432098766, "grad_norm": 14.070477485656738, "learning_rate": 3.38006230529595e-05, "loss": 1.1542, "step": 933 }, { "epoch": 0.9609053497942387, "grad_norm": 12.673274993896484, "learning_rate": 3.383696780893042e-05, "loss": 1.3993, "step": 934 }, { "epoch": 0.9619341563786008, "grad_norm": 1.1816976070404053, "learning_rate": 3.387331256490135e-05, "loss": 0.0468, "step": 935 }, { "epoch": 0.9629629629629629, "grad_norm": 7.3651814460754395, "learning_rate": 3.390965732087227e-05, "loss": 0.4248, "step": 936 }, { "epoch": 0.9639917695473251, "grad_norm": 6.860713958740234, "learning_rate": 3.39460020768432e-05, "loss": 0.3551, "step": 937 }, { "epoch": 0.9650205761316872, "grad_norm": 14.23501968383789, "learning_rate": 3.398234683281412e-05, "loss": 1.3652, "step": 938 }, { "epoch": 0.9660493827160493, "grad_norm": 20.718782424926758, "learning_rate": 3.4018691588785045e-05, "loss": 0.7506, "step": 939 }, { "epoch": 0.9670781893004116, "grad_norm": 7.975811958312988, "learning_rate": 3.4055036344755966e-05, "loss": 0.3937, "step": 940 }, { "epoch": 0.9670781893004116, "eval_Qnli-dev_cosine_accuracy": 0.70703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7996010184288025, "eval_Qnli-dev_cosine_ap": 0.7552849329220187, "eval_Qnli-dev_cosine_f1": 0.6943396226415095, "eval_Qnli-dev_cosine_f1_threshold": 0.7739279270172119, "eval_Qnli-dev_cosine_precision": 0.6258503401360545, "eval_Qnli-dev_cosine_recall": 0.7796610169491526, "eval_Qnli-dev_dot_accuracy": 0.65625, "eval_Qnli-dev_dot_accuracy_threshold": 404.8746337890625, "eval_Qnli-dev_dot_ap": 0.6817556270978887, "eval_Qnli-dev_dot_f1": 0.6753670473083198, "eval_Qnli-dev_dot_f1_threshold": 349.9105224609375, "eval_Qnli-dev_dot_precision": 0.5490716180371353, "eval_Qnli-dev_dot_recall": 0.8771186440677966, "eval_Qnli-dev_euclidean_accuracy": 0.716796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.779217720031738, "eval_Qnli-dev_euclidean_ap": 0.7613978748972194, "eval_Qnli-dev_euclidean_f1": 0.6984732824427481, "eval_Qnli-dev_euclidean_f1_threshold": 14.844427108764648, "eval_Qnli-dev_euclidean_precision": 0.6354166666666666, "eval_Qnli-dev_euclidean_recall": 0.7754237288135594, "eval_Qnli-dev_manhattan_accuracy": 0.71484375, "eval_Qnli-dev_manhattan_accuracy_threshold": 279.33502197265625, "eval_Qnli-dev_manhattan_ap": 0.7648285226426834, "eval_Qnli-dev_manhattan_f1": 0.6929982046678635, "eval_Qnli-dev_manhattan_f1_threshold": 318.5891418457031, "eval_Qnli-dev_manhattan_precision": 0.6012461059190031, "eval_Qnli-dev_manhattan_recall": 0.8177966101694916, "eval_Qnli-dev_max_accuracy": 0.716796875, "eval_Qnli-dev_max_accuracy_threshold": 404.8746337890625, "eval_Qnli-dev_max_ap": 0.7648285226426834, "eval_Qnli-dev_max_f1": 0.6984732824427481, "eval_Qnli-dev_max_f1_threshold": 349.9105224609375, "eval_Qnli-dev_max_precision": 0.6354166666666666, "eval_Qnli-dev_max_recall": 0.8771186440677966, "eval_allNLI-dev_cosine_accuracy": 0.73828125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8795170187950134, "eval_allNLI-dev_cosine_ap": 0.6090048928829667, "eval_allNLI-dev_cosine_f1": 0.6204081632653061, "eval_allNLI-dev_cosine_f1_threshold": 0.7640015482902527, "eval_allNLI-dev_cosine_precision": 0.4794952681388013, "eval_allNLI-dev_cosine_recall": 0.8786127167630058, "eval_allNLI-dev_dot_accuracy": 0.6953125, "eval_allNLI-dev_dot_accuracy_threshold": 420.4771728515625, "eval_allNLI-dev_dot_ap": 0.5376162680949538, "eval_allNLI-dev_dot_f1": 0.6000000000000001, "eval_allNLI-dev_dot_f1_threshold": 349.6429748535156, "eval_allNLI-dev_dot_precision": 0.45871559633027525, "eval_allNLI-dev_dot_recall": 0.8670520231213873, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.426948547363281, "eval_allNLI-dev_euclidean_ap": 0.6118636764768349, "eval_allNLI-dev_euclidean_f1": 0.6172839506172839, "eval_allNLI-dev_euclidean_f1_threshold": 14.738828659057617, "eval_allNLI-dev_euclidean_precision": 0.4792332268370607, "eval_allNLI-dev_euclidean_recall": 0.8670520231213873, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 222.07568359375, "eval_allNLI-dev_manhattan_ap": 0.6057381363735815, "eval_allNLI-dev_manhattan_f1": 0.6147704590818364, "eval_allNLI-dev_manhattan_f1_threshold": 310.0055847167969, "eval_allNLI-dev_manhattan_precision": 0.4695121951219512, "eval_allNLI-dev_manhattan_recall": 0.8901734104046243, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 420.4771728515625, "eval_allNLI-dev_max_ap": 0.6118636764768349, "eval_allNLI-dev_max_f1": 0.6204081632653061, "eval_allNLI-dev_max_f1_threshold": 349.6429748535156, "eval_allNLI-dev_max_precision": 0.4794952681388013, "eval_allNLI-dev_max_recall": 0.8901734104046243, "eval_sequential_score": 0.7648285226426834, "eval_sts-test_pearson_cosine": 0.8105857732868115, "eval_sts-test_pearson_dot": 0.7844085069475198, "eval_sts-test_pearson_euclidean": 0.8365544588951073, "eval_sts-test_pearson_manhattan": 0.8307666084036771, "eval_sts-test_pearson_max": 0.8365544588951073, "eval_sts-test_spearman_cosine": 0.8347892244724613, "eval_sts-test_spearman_dot": 0.7719863335147834, "eval_sts-test_spearman_euclidean": 0.8302817431713355, "eval_sts-test_spearman_manhattan": 0.8254758157079903, "eval_sts-test_spearman_max": 0.8347892244724613, "eval_vitaminc-pairs_loss": 2.983093500137329, "eval_vitaminc-pairs_runtime": 3.1808, "eval_vitaminc-pairs_samples_per_second": 40.242, "eval_vitaminc-pairs_steps_per_second": 0.314, "step": 940 }, { "epoch": 0.9670781893004116, "eval_negation-triplets_loss": 1.0402394533157349, "eval_negation-triplets_runtime": 0.7383, "eval_negation-triplets_samples_per_second": 173.37, "eval_negation-triplets_steps_per_second": 1.354, "step": 940 }, { "epoch": 0.9670781893004116, "eval_scitail-pairs-pos_loss": 0.18768535554409027, "eval_scitail-pairs-pos_runtime": 0.8276, "eval_scitail-pairs-pos_samples_per_second": 154.661, "eval_scitail-pairs-pos_steps_per_second": 1.208, "step": 940 }, { "epoch": 0.9670781893004116, "eval_scitail-pairs-qa_loss": 0.0010753768729045987, "eval_scitail-pairs-qa_runtime": 0.5786, "eval_scitail-pairs-qa_samples_per_second": 221.212, "eval_scitail-pairs-qa_steps_per_second": 1.728, "step": 940 }, { "epoch": 0.9670781893004116, "eval_xsum-pairs_loss": 0.5536904335021973, "eval_xsum-pairs_runtime": 3.0201, "eval_xsum-pairs_samples_per_second": 42.383, "eval_xsum-pairs_steps_per_second": 0.331, "step": 940 }, { "epoch": 0.9670781893004116, "eval_sciq_pairs_loss": 0.1257360428571701, "eval_sciq_pairs_runtime": 3.4449, "eval_sciq_pairs_samples_per_second": 37.157, "eval_sciq_pairs_steps_per_second": 0.29, "step": 940 }, { "epoch": 0.9670781893004116, "eval_qasc_pairs_loss": 0.3721018135547638, "eval_qasc_pairs_runtime": 0.606, "eval_qasc_pairs_samples_per_second": 211.232, "eval_qasc_pairs_steps_per_second": 1.65, "step": 940 }, { "epoch": 0.9670781893004116, "eval_openbookqa_pairs_loss": 1.0556271076202393, "eval_openbookqa_pairs_runtime": 0.578, "eval_openbookqa_pairs_samples_per_second": 221.472, "eval_openbookqa_pairs_steps_per_second": 1.73, "step": 940 }, { "epoch": 0.9670781893004116, "eval_msmarco_pairs_loss": 1.051499366760254, "eval_msmarco_pairs_runtime": 1.5165, "eval_msmarco_pairs_samples_per_second": 84.405, "eval_msmarco_pairs_steps_per_second": 0.659, "step": 940 }, { "epoch": 0.9670781893004116, "eval_nq_pairs_loss": 1.165411114692688, "eval_nq_pairs_runtime": 2.8962, "eval_nq_pairs_samples_per_second": 44.196, "eval_nq_pairs_steps_per_second": 0.345, "step": 940 }, { "epoch": 0.9670781893004116, "eval_trivia_pairs_loss": 1.085224986076355, "eval_trivia_pairs_runtime": 3.4302, "eval_trivia_pairs_samples_per_second": 37.316, "eval_trivia_pairs_steps_per_second": 0.292, "step": 940 }, { "epoch": 0.9670781893004116, "eval_gooaq_pairs_loss": 0.5926018953323364, "eval_gooaq_pairs_runtime": 0.9428, "eval_gooaq_pairs_samples_per_second": 135.77, "eval_gooaq_pairs_steps_per_second": 1.061, "step": 940 }, { "epoch": 0.9670781893004116, "eval_paws-pos_loss": 0.027635158970952034, "eval_paws-pos_runtime": 0.691, "eval_paws-pos_samples_per_second": 185.233, "eval_paws-pos_steps_per_second": 1.447, "step": 940 }, { "epoch": 0.9670781893004116, "eval_global_dataset_loss": 0.5423777103424072, "eval_global_dataset_runtime": 13.3745, "eval_global_dataset_samples_per_second": 31.104, "eval_global_dataset_steps_per_second": 0.299, "step": 940 }, { "epoch": 0.9681069958847737, "grad_norm": 11.614127159118652, "learning_rate": 3.409138110072689e-05, "loss": 0.8143, "step": 941 }, { "epoch": 0.9691358024691358, "grad_norm": 13.92485523223877, "learning_rate": 3.4127725856697814e-05, "loss": 1.2339, "step": 942 }, { "epoch": 0.970164609053498, "grad_norm": 12.26136302947998, "learning_rate": 3.416407061266874e-05, "loss": 0.9252, "step": 943 }, { "epoch": 0.9711934156378601, "grad_norm": 7.102560997009277, "learning_rate": 3.420041536863967e-05, "loss": 0.3292, "step": 944 }, { "epoch": 0.9722222222222222, "grad_norm": 19.000288009643555, "learning_rate": 3.423676012461059e-05, "loss": 2.3715, "step": 945 }, { "epoch": 0.9732510288065843, "grad_norm": 11.65499496459961, "learning_rate": 3.427310488058151e-05, "loss": 1.2257, "step": 946 }, { "epoch": 0.9742798353909465, "grad_norm": 8.42990779876709, "learning_rate": 3.430944963655244e-05, "loss": 0.6175, "step": 947 }, { "epoch": 0.9753086419753086, "grad_norm": 16.443498611450195, "learning_rate": 3.4345794392523365e-05, "loss": 1.6621, "step": 948 }, { "epoch": 0.9763374485596708, "grad_norm": 7.4084696769714355, "learning_rate": 3.4382139148494285e-05, "loss": 0.428, "step": 949 }, { "epoch": 0.977366255144033, "grad_norm": 15.619688034057617, "learning_rate": 3.441848390446521e-05, "loss": 1.4729, "step": 950 }, { "epoch": 0.9783950617283951, "grad_norm": 11.312886238098145, "learning_rate": 3.445482866043613e-05, "loss": 1.3042, "step": 951 }, { "epoch": 0.9794238683127572, "grad_norm": 7.754435062408447, "learning_rate": 3.4491173416407054e-05, "loss": 0.6735, "step": 952 }, { "epoch": 0.9804526748971193, "grad_norm": 1.0208377838134766, "learning_rate": 3.452751817237798e-05, "loss": 0.0191, "step": 953 }, { "epoch": 0.9814814814814815, "grad_norm": 10.611540794372559, "learning_rate": 3.456386292834891e-05, "loss": 0.857, "step": 954 }, { "epoch": 0.9825102880658436, "grad_norm": 10.76138687133789, "learning_rate": 3.460020768431983e-05, "loss": 0.2335, "step": 955 }, { "epoch": 0.9835390946502057, "grad_norm": 8.067065238952637, "learning_rate": 3.463655244029076e-05, "loss": 0.5272, "step": 956 }, { "epoch": 0.9845679012345679, "grad_norm": 9.368846893310547, "learning_rate": 3.467289719626168e-05, "loss": 0.721, "step": 957 }, { "epoch": 0.98559670781893, "grad_norm": 17.550647735595703, "learning_rate": 3.4709241952232605e-05, "loss": 0.4356, "step": 958 }, { "epoch": 0.9866255144032922, "grad_norm": 8.449636459350586, "learning_rate": 3.474558670820353e-05, "loss": 0.5033, "step": 959 }, { "epoch": 0.9876543209876543, "grad_norm": 12.751513481140137, "learning_rate": 3.478193146417445e-05, "loss": 0.6354, "step": 960 }, { "epoch": 0.9876543209876543, "eval_Qnli-dev_cosine_accuracy": 0.7109375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8011420369148254, "eval_Qnli-dev_cosine_ap": 0.7562970990202866, "eval_Qnli-dev_cosine_f1": 0.6953125, "eval_Qnli-dev_cosine_f1_threshold": 0.7688385248184204, "eval_Qnli-dev_cosine_precision": 0.644927536231884, "eval_Qnli-dev_cosine_recall": 0.7542372881355932, "eval_Qnli-dev_dot_accuracy": 0.677734375, "eval_Qnli-dev_dot_accuracy_threshold": 373.8548889160156, "eval_Qnli-dev_dot_ap": 0.707742039511076, "eval_Qnli-dev_dot_f1": 0.683111954459203, "eval_Qnli-dev_dot_f1_threshold": 362.323974609375, "eval_Qnli-dev_dot_precision": 0.6185567010309279, "eval_Qnli-dev_dot_recall": 0.7627118644067796, "eval_Qnli-dev_euclidean_accuracy": 0.712890625, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.823720932006836, "eval_Qnli-dev_euclidean_ap": 0.7579671387457134, "eval_Qnli-dev_euclidean_f1": 0.7015503875968992, "eval_Qnli-dev_euclidean_f1_threshold": 14.87757396697998, "eval_Qnli-dev_euclidean_precision": 0.6464285714285715, "eval_Qnli-dev_euclidean_recall": 0.7669491525423728, "eval_Qnli-dev_manhattan_accuracy": 0.712890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 285.1418762207031, "eval_Qnli-dev_manhattan_ap": 0.7610519271212217, "eval_Qnli-dev_manhattan_f1": 0.6953271028037383, "eval_Qnli-dev_manhattan_f1_threshold": 316.33892822265625, "eval_Qnli-dev_manhattan_precision": 0.6220735785953178, "eval_Qnli-dev_manhattan_recall": 0.788135593220339, "eval_Qnli-dev_max_accuracy": 0.712890625, "eval_Qnli-dev_max_accuracy_threshold": 373.8548889160156, "eval_Qnli-dev_max_ap": 0.7610519271212217, "eval_Qnli-dev_max_f1": 0.7015503875968992, "eval_Qnli-dev_max_f1_threshold": 362.323974609375, "eval_Qnli-dev_max_precision": 0.6464285714285715, "eval_Qnli-dev_max_recall": 0.788135593220339, "eval_allNLI-dev_cosine_accuracy": 0.73828125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8651937246322632, "eval_allNLI-dev_cosine_ap": 0.6113314809700446, "eval_allNLI-dev_cosine_f1": 0.6244541484716157, "eval_allNLI-dev_cosine_f1_threshold": 0.7694165110588074, "eval_allNLI-dev_cosine_precision": 0.5017543859649123, "eval_allNLI-dev_cosine_recall": 0.8265895953757225, "eval_allNLI-dev_dot_accuracy": 0.69140625, "eval_allNLI-dev_dot_accuracy_threshold": 429.3245849609375, "eval_allNLI-dev_dot_ap": 0.5481674603400284, "eval_allNLI-dev_dot_f1": 0.5954825462012321, "eval_allNLI-dev_dot_f1_threshold": 342.2791748046875, "eval_allNLI-dev_dot_precision": 0.46178343949044587, "eval_allNLI-dev_dot_recall": 0.838150289017341, "eval_allNLI-dev_euclidean_accuracy": 0.744140625, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.662067413330078, "eval_allNLI-dev_euclidean_ap": 0.6159942916219311, "eval_allNLI-dev_euclidean_f1": 0.6288416075650118, "eval_allNLI-dev_euclidean_f1_threshold": 13.927071571350098, "eval_allNLI-dev_euclidean_precision": 0.532, "eval_allNLI-dev_euclidean_recall": 0.7687861271676301, "eval_allNLI-dev_manhattan_accuracy": 0.736328125, "eval_allNLI-dev_manhattan_accuracy_threshold": 243.21441650390625, "eval_allNLI-dev_manhattan_ap": 0.6112452850944511, "eval_allNLI-dev_manhattan_f1": 0.6244131455399061, "eval_allNLI-dev_manhattan_f1_threshold": 291.2453308105469, "eval_allNLI-dev_manhattan_precision": 0.525691699604743, "eval_allNLI-dev_manhattan_recall": 0.7687861271676301, "eval_allNLI-dev_max_accuracy": 0.744140625, "eval_allNLI-dev_max_accuracy_threshold": 429.3245849609375, "eval_allNLI-dev_max_ap": 0.6159942916219311, "eval_allNLI-dev_max_f1": 0.6288416075650118, "eval_allNLI-dev_max_f1_threshold": 342.2791748046875, "eval_allNLI-dev_max_precision": 0.532, "eval_allNLI-dev_max_recall": 0.838150289017341, "eval_sequential_score": 0.7610519271212217, "eval_sts-test_pearson_cosine": 0.8206020544252598, "eval_sts-test_pearson_dot": 0.7998596455743565, "eval_sts-test_pearson_euclidean": 0.8493475930153472, "eval_sts-test_pearson_manhattan": 0.8452749220783444, "eval_sts-test_pearson_max": 0.8493475930153472, "eval_sts-test_spearman_cosine": 0.8453138708777522, "eval_sts-test_spearman_dot": 0.7850806868516911, "eval_sts-test_spearman_euclidean": 0.842515932513039, "eval_sts-test_spearman_manhattan": 0.8393217217287051, "eval_sts-test_spearman_max": 0.8453138708777522, "eval_vitaminc-pairs_loss": 3.093803644180298, "eval_vitaminc-pairs_runtime": 3.2017, "eval_vitaminc-pairs_samples_per_second": 39.979, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 960 }, { "epoch": 0.9876543209876543, "eval_negation-triplets_loss": 1.049690842628479, "eval_negation-triplets_runtime": 0.7405, "eval_negation-triplets_samples_per_second": 172.846, "eval_negation-triplets_steps_per_second": 1.35, "step": 960 }, { "epoch": 0.9876543209876543, "eval_scitail-pairs-pos_loss": 0.22198575735092163, "eval_scitail-pairs-pos_runtime": 0.874, "eval_scitail-pairs-pos_samples_per_second": 146.452, "eval_scitail-pairs-pos_steps_per_second": 1.144, "step": 960 }, { "epoch": 0.9876543209876543, "eval_scitail-pairs-qa_loss": 0.0007531806477345526, "eval_scitail-pairs-qa_runtime": 0.604, "eval_scitail-pairs-qa_samples_per_second": 211.93, "eval_scitail-pairs-qa_steps_per_second": 1.656, "step": 960 }, { "epoch": 0.9876543209876543, "eval_xsum-pairs_loss": 0.545045793056488, "eval_xsum-pairs_runtime": 3.0289, "eval_xsum-pairs_samples_per_second": 42.259, "eval_xsum-pairs_steps_per_second": 0.33, "step": 960 }, { "epoch": 0.9876543209876543, "eval_sciq_pairs_loss": 0.12160878628492355, "eval_sciq_pairs_runtime": 3.4833, "eval_sciq_pairs_samples_per_second": 36.746, "eval_sciq_pairs_steps_per_second": 0.287, "step": 960 }, { "epoch": 0.9876543209876543, "eval_qasc_pairs_loss": 0.3162378668785095, "eval_qasc_pairs_runtime": 0.6116, "eval_qasc_pairs_samples_per_second": 209.281, "eval_qasc_pairs_steps_per_second": 1.635, "step": 960 }, { "epoch": 0.9876543209876543, "eval_openbookqa_pairs_loss": 0.9658156633377075, "eval_openbookqa_pairs_runtime": 0.5939, "eval_openbookqa_pairs_samples_per_second": 215.528, "eval_openbookqa_pairs_steps_per_second": 1.684, "step": 960 }, { "epoch": 0.9876543209876543, "eval_msmarco_pairs_loss": 1.1362426280975342, "eval_msmarco_pairs_runtime": 1.5207, "eval_msmarco_pairs_samples_per_second": 84.172, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 960 }, { "epoch": 0.9876543209876543, "eval_nq_pairs_loss": 1.1873204708099365, "eval_nq_pairs_runtime": 2.9063, "eval_nq_pairs_samples_per_second": 44.043, "eval_nq_pairs_steps_per_second": 0.344, "step": 960 }, { "epoch": 0.9876543209876543, "eval_trivia_pairs_loss": 1.1470587253570557, "eval_trivia_pairs_runtime": 3.4649, "eval_trivia_pairs_samples_per_second": 36.942, "eval_trivia_pairs_steps_per_second": 0.289, "step": 960 }, { "epoch": 0.9876543209876543, "eval_gooaq_pairs_loss": 0.5810187458992004, "eval_gooaq_pairs_runtime": 0.9593, "eval_gooaq_pairs_samples_per_second": 133.431, "eval_gooaq_pairs_steps_per_second": 1.042, "step": 960 }, { "epoch": 0.9876543209876543, "eval_paws-pos_loss": 0.0258675217628479, "eval_paws-pos_runtime": 0.6941, "eval_paws-pos_samples_per_second": 184.413, "eval_paws-pos_steps_per_second": 1.441, "step": 960 }, { "epoch": 0.9876543209876543, "eval_global_dataset_loss": 0.551192581653595, "eval_global_dataset_runtime": 13.394, "eval_global_dataset_samples_per_second": 31.059, "eval_global_dataset_steps_per_second": 0.299, "step": 960 }, { "epoch": 0.9886831275720165, "grad_norm": 8.129959106445312, "learning_rate": 3.4818276220145373e-05, "loss": 0.3919, "step": 961 }, { "epoch": 0.9897119341563786, "grad_norm": 11.156733512878418, "learning_rate": 3.48546209761163e-05, "loss": 0.6961, "step": 962 }, { "epoch": 0.9907407407407407, "grad_norm": 17.619508743286133, "learning_rate": 3.489096573208722e-05, "loss": 0.9385, "step": 963 }, { "epoch": 0.9917695473251029, "grad_norm": 13.581380844116211, "learning_rate": 3.492731048805815e-05, "loss": 0.2489, "step": 964 }, { "epoch": 0.992798353909465, "grad_norm": 13.018972396850586, "learning_rate": 3.4963655244029076e-05, "loss": 1.0582, "step": 965 }, { "epoch": 0.9938271604938271, "grad_norm": 1.7356064319610596, "learning_rate": 3.5e-05, "loss": 0.1094, "step": 966 }, { "epoch": 0.9948559670781894, "grad_norm": 16.483444213867188, "learning_rate": 3.49999903396699e-05, "loss": 0.4915, "step": 967 }, { "epoch": 0.9958847736625515, "grad_norm": 13.589509963989258, "learning_rate": 3.4999961358695594e-05, "loss": 1.1361, "step": 968 }, { "epoch": 0.9969135802469136, "grad_norm": 14.672646522521973, "learning_rate": 3.499991305712508e-05, "loss": 0.6884, "step": 969 }, { "epoch": 0.9979423868312757, "grad_norm": 2.262558698654175, "learning_rate": 3.499984543503835e-05, "loss": 0.0148, "step": 970 }, { "epoch": 0.9989711934156379, "grad_norm": 23.297290802001953, "learning_rate": 3.499975849254739e-05, "loss": 1.0498, "step": 971 }, { "epoch": 1.0, "grad_norm": 19.72472381591797, "learning_rate": 3.499965222979617e-05, "loss": 0.6437, "step": 972 }, { "epoch": 1.0010288065843622, "grad_norm": 5.863245964050293, "learning_rate": 3.499952664696068e-05, "loss": 0.2412, "step": 973 }, { "epoch": 1.0020576131687242, "grad_norm": 9.423555374145508, "learning_rate": 3.499938174424889e-05, "loss": 0.5081, "step": 974 }, { "epoch": 1.0030864197530864, "grad_norm": 11.968791007995605, "learning_rate": 3.499921752190077e-05, "loss": 1.2637, "step": 975 }, { "epoch": 1.0041152263374487, "grad_norm": 8.803627014160156, "learning_rate": 3.4999033980188266e-05, "loss": 0.5428, "step": 976 }, { "epoch": 1.0051440329218106, "grad_norm": 11.566201210021973, "learning_rate": 3.4998831119415355e-05, "loss": 1.1313, "step": 977 }, { "epoch": 1.0061728395061729, "grad_norm": 9.46624755859375, "learning_rate": 3.499860893991797e-05, "loss": 0.6193, "step": 978 }, { "epoch": 1.007201646090535, "grad_norm": 8.126818656921387, "learning_rate": 3.499836744206405e-05, "loss": 0.5096, "step": 979 }, { "epoch": 1.008230452674897, "grad_norm": 7.624522686004639, "learning_rate": 3.4998106626253555e-05, "loss": 0.4309, "step": 980 }, { "epoch": 1.008230452674897, "eval_Qnli-dev_cosine_accuracy": 0.71484375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8019129633903503, "eval_Qnli-dev_cosine_ap": 0.7610205901818679, "eval_Qnli-dev_cosine_f1": 0.7036395147313692, "eval_Qnli-dev_cosine_f1_threshold": 0.7520031929016113, "eval_Qnli-dev_cosine_precision": 0.5953079178885631, "eval_Qnli-dev_cosine_recall": 0.8601694915254238, "eval_Qnli-dev_dot_accuracy": 0.6640625, "eval_Qnli-dev_dot_accuracy_threshold": 426.893310546875, "eval_Qnli-dev_dot_ap": 0.6947234515128509, "eval_Qnli-dev_dot_f1": 0.6785714285714286, "eval_Qnli-dev_dot_f1_threshold": 373.33868408203125, "eval_Qnli-dev_dot_precision": 0.5864197530864198, "eval_Qnli-dev_dot_recall": 0.8050847457627118, "eval_Qnli-dev_euclidean_accuracy": 0.71484375, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.8776273727417, "eval_Qnli-dev_euclidean_ap": 0.7647258098874301, "eval_Qnli-dev_euclidean_f1": 0.7109515260323159, "eval_Qnli-dev_euclidean_f1_threshold": 15.282470703125, "eval_Qnli-dev_euclidean_precision": 0.616822429906542, "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, "eval_Qnli-dev_manhattan_accuracy": 0.716796875, "eval_Qnli-dev_manhattan_accuracy_threshold": 271.2890319824219, "eval_Qnli-dev_manhattan_ap": 0.7681486389198227, "eval_Qnli-dev_manhattan_f1": 0.7112676056338029, "eval_Qnli-dev_manhattan_f1_threshold": 320.7895202636719, "eval_Qnli-dev_manhattan_precision": 0.608433734939759, "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, "eval_Qnli-dev_max_accuracy": 0.716796875, "eval_Qnli-dev_max_accuracy_threshold": 426.893310546875, "eval_Qnli-dev_max_ap": 0.7681486389198227, "eval_Qnli-dev_max_f1": 0.7112676056338029, "eval_Qnli-dev_max_f1_threshold": 373.33868408203125, "eval_Qnli-dev_max_precision": 0.616822429906542, "eval_Qnli-dev_max_recall": 0.8601694915254238, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8773996233940125, "eval_allNLI-dev_cosine_ap": 0.6069091134704014, "eval_allNLI-dev_cosine_f1": 0.6197802197802197, "eval_allNLI-dev_cosine_f1_threshold": 0.7960855960845947, "eval_allNLI-dev_cosine_precision": 0.5, "eval_allNLI-dev_cosine_recall": 0.815028901734104, "eval_allNLI-dev_dot_accuracy": 0.69140625, "eval_allNLI-dev_dot_accuracy_threshold": 444.69110107421875, "eval_allNLI-dev_dot_ap": 0.5273880047233465, "eval_allNLI-dev_dot_f1": 0.5879828326180258, "eval_allNLI-dev_dot_f1_threshold": 377.0397644042969, "eval_allNLI-dev_dot_precision": 0.46757679180887374, "eval_allNLI-dev_dot_recall": 0.791907514450867, "eval_allNLI-dev_euclidean_accuracy": 0.73046875, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.407779693603516, "eval_allNLI-dev_euclidean_ap": 0.6097577491057798, "eval_allNLI-dev_euclidean_f1": 0.6265060240963856, "eval_allNLI-dev_euclidean_f1_threshold": 13.258623123168945, "eval_allNLI-dev_euclidean_precision": 0.5371900826446281, "eval_allNLI-dev_euclidean_recall": 0.7514450867052023, "eval_allNLI-dev_manhattan_accuracy": 0.728515625, "eval_allNLI-dev_manhattan_accuracy_threshold": 233.98886108398438, "eval_allNLI-dev_manhattan_ap": 0.6053119147310726, "eval_allNLI-dev_manhattan_f1": 0.6284403669724771, "eval_allNLI-dev_manhattan_f1_threshold": 283.61029052734375, "eval_allNLI-dev_manhattan_precision": 0.5209125475285171, "eval_allNLI-dev_manhattan_recall": 0.791907514450867, "eval_allNLI-dev_max_accuracy": 0.73046875, "eval_allNLI-dev_max_accuracy_threshold": 444.69110107421875, "eval_allNLI-dev_max_ap": 0.6097577491057798, "eval_allNLI-dev_max_f1": 0.6284403669724771, "eval_allNLI-dev_max_f1_threshold": 377.0397644042969, "eval_allNLI-dev_max_precision": 0.5371900826446281, "eval_allNLI-dev_max_recall": 0.815028901734104, "eval_sequential_score": 0.7681486389198227, "eval_sts-test_pearson_cosine": 0.8029398971858455, "eval_sts-test_pearson_dot": 0.7674963822814747, "eval_sts-test_pearson_euclidean": 0.8352948242845171, "eval_sts-test_pearson_manhattan": 0.8333892654611063, "eval_sts-test_pearson_max": 0.8352948242845171, "eval_sts-test_spearman_cosine": 0.8309187773772422, "eval_sts-test_spearman_dot": 0.7509477362556745, "eval_sts-test_spearman_euclidean": 0.8291958090624117, "eval_sts-test_spearman_manhattan": 0.8273570692343619, "eval_sts-test_spearman_max": 0.8309187773772422, "eval_vitaminc-pairs_loss": 2.8231990337371826, "eval_vitaminc-pairs_runtime": 3.1885, "eval_vitaminc-pairs_samples_per_second": 40.144, "eval_vitaminc-pairs_steps_per_second": 0.314, "step": 980 }, { "epoch": 1.008230452674897, "eval_negation-triplets_loss": 1.0276439189910889, "eval_negation-triplets_runtime": 0.738, "eval_negation-triplets_samples_per_second": 173.431, "eval_negation-triplets_steps_per_second": 1.355, "step": 980 }, { "epoch": 1.008230452674897, "eval_scitail-pairs-pos_loss": 0.22538259625434875, "eval_scitail-pairs-pos_runtime": 0.882, "eval_scitail-pairs-pos_samples_per_second": 145.129, "eval_scitail-pairs-pos_steps_per_second": 1.134, "step": 980 }, { "epoch": 1.008230452674897, "eval_scitail-pairs-qa_loss": 0.0011585784377530217, "eval_scitail-pairs-qa_runtime": 0.587, "eval_scitail-pairs-qa_samples_per_second": 218.049, "eval_scitail-pairs-qa_steps_per_second": 1.704, "step": 980 }, { "epoch": 1.008230452674897, "eval_xsum-pairs_loss": 0.5889097452163696, "eval_xsum-pairs_runtime": 3.0279, "eval_xsum-pairs_samples_per_second": 42.274, "eval_xsum-pairs_steps_per_second": 0.33, "step": 980 }, { "epoch": 1.008230452674897, "eval_sciq_pairs_loss": 0.11867424100637436, "eval_sciq_pairs_runtime": 3.5072, "eval_sciq_pairs_samples_per_second": 36.496, "eval_sciq_pairs_steps_per_second": 0.285, "step": 980 }, { "epoch": 1.008230452674897, "eval_qasc_pairs_loss": 0.27829957008361816, "eval_qasc_pairs_runtime": 0.6017, "eval_qasc_pairs_samples_per_second": 212.72, "eval_qasc_pairs_steps_per_second": 1.662, "step": 980 }, { "epoch": 1.008230452674897, "eval_openbookqa_pairs_loss": 0.970773458480835, "eval_openbookqa_pairs_runtime": 0.6015, "eval_openbookqa_pairs_samples_per_second": 212.804, "eval_openbookqa_pairs_steps_per_second": 1.663, "step": 980 }, { "epoch": 1.008230452674897, "eval_msmarco_pairs_loss": 1.1431875228881836, "eval_msmarco_pairs_runtime": 1.5207, "eval_msmarco_pairs_samples_per_second": 84.17, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 980 }, { "epoch": 1.008230452674897, "eval_nq_pairs_loss": 1.1105154752731323, "eval_nq_pairs_runtime": 2.893, "eval_nq_pairs_samples_per_second": 44.244, "eval_nq_pairs_steps_per_second": 0.346, "step": 980 }, { "epoch": 1.008230452674897, "eval_trivia_pairs_loss": 1.1272141933441162, "eval_trivia_pairs_runtime": 3.4339, "eval_trivia_pairs_samples_per_second": 37.276, "eval_trivia_pairs_steps_per_second": 0.291, "step": 980 }, { "epoch": 1.008230452674897, "eval_gooaq_pairs_loss": 0.5513377785682678, "eval_gooaq_pairs_runtime": 0.9521, "eval_gooaq_pairs_samples_per_second": 134.441, "eval_gooaq_pairs_steps_per_second": 1.05, "step": 980 }, { "epoch": 1.008230452674897, "eval_paws-pos_loss": 0.025796374306082726, "eval_paws-pos_runtime": 0.7136, "eval_paws-pos_samples_per_second": 179.366, "eval_paws-pos_steps_per_second": 1.401, "step": 980 }, { "epoch": 1.008230452674897, "eval_global_dataset_loss": 0.5248242616653442, "eval_global_dataset_runtime": 13.4139, "eval_global_dataset_samples_per_second": 31.013, "eval_global_dataset_steps_per_second": 0.298, "step": 980 }, { "epoch": 1.0092592592592593, "grad_norm": 10.596141815185547, "learning_rate": 3.499782649291838e-05, "loss": 0.8267, "step": 981 }, { "epoch": 1.0102880658436213, "grad_norm": 9.392977714538574, "learning_rate": 3.499752704252246e-05, "loss": 0.6614, "step": 982 }, { "epoch": 1.0113168724279835, "grad_norm": 7.637691974639893, "learning_rate": 3.499720827556169e-05, "loss": 0.3601, "step": 983 }, { "epoch": 1.0123456790123457, "grad_norm": 6.8640947341918945, "learning_rate": 3.4996870192563984e-05, "loss": 0.33, "step": 984 }, { "epoch": 1.0133744855967077, "grad_norm": 8.076541900634766, "learning_rate": 3.499651279408921e-05, "loss": 0.3986, "step": 985 }, { "epoch": 1.01440329218107, "grad_norm": 10.192421913146973, "learning_rate": 3.499613608072924e-05, "loss": 0.6489, "step": 986 }, { "epoch": 1.0154320987654322, "grad_norm": 6.926242351531982, "learning_rate": 3.4995740053107946e-05, "loss": 0.2832, "step": 987 }, { "epoch": 1.0164609053497942, "grad_norm": 12.570526123046875, "learning_rate": 3.499532471188116e-05, "loss": 1.116, "step": 988 }, { "epoch": 1.0174897119341564, "grad_norm": 16.421586990356445, "learning_rate": 3.499489005773671e-05, "loss": 1.5305, "step": 989 }, { "epoch": 1.0185185185185186, "grad_norm": 8.618919372558594, "learning_rate": 3.4994436091394425e-05, "loss": 0.434, "step": 990 }, { "epoch": 1.0195473251028806, "grad_norm": 7.02255392074585, "learning_rate": 3.499396281360608e-05, "loss": 0.2973, "step": 991 }, { "epoch": 1.0205761316872428, "grad_norm": 10.480740547180176, "learning_rate": 3.499347022515545e-05, "loss": 0.5811, "step": 992 }, { "epoch": 1.021604938271605, "grad_norm": 12.898595809936523, "learning_rate": 3.4992958326858305e-05, "loss": 0.9475, "step": 993 }, { "epoch": 1.022633744855967, "grad_norm": 0.7765280604362488, "learning_rate": 3.499242711956236e-05, "loss": 0.0351, "step": 994 }, { "epoch": 1.0236625514403292, "grad_norm": 11.703328132629395, "learning_rate": 3.4991876604147334e-05, "loss": 1.0881, "step": 995 }, { "epoch": 1.0246913580246915, "grad_norm": 7.762533664703369, "learning_rate": 3.499130678152492e-05, "loss": 0.517, "step": 996 }, { "epoch": 1.0257201646090535, "grad_norm": 7.188144207000732, "learning_rate": 3.4990717652638754e-05, "loss": 0.521, "step": 997 }, { "epoch": 1.0267489711934157, "grad_norm": 7.4093122482299805, "learning_rate": 3.499010921846448e-05, "loss": 0.3103, "step": 998 }, { "epoch": 1.0277777777777777, "grad_norm": 7.3908562660217285, "learning_rate": 3.498948148000971e-05, "loss": 0.4539, "step": 999 }, { "epoch": 1.02880658436214, "grad_norm": 13.766965866088867, "learning_rate": 3.4988834438313996e-05, "loss": 1.0192, "step": 1000 }, { "epoch": 1.02880658436214, "eval_Qnli-dev_cosine_accuracy": 0.681640625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8225704431533813, "eval_Qnli-dev_cosine_ap": 0.7336489695537911, "eval_Qnli-dev_cosine_f1": 0.6912280701754385, "eval_Qnli-dev_cosine_f1_threshold": 0.7402236461639404, "eval_Qnli-dev_cosine_precision": 0.5898203592814372, "eval_Qnli-dev_cosine_recall": 0.8347457627118644, "eval_Qnli-dev_dot_accuracy": 0.646484375, "eval_Qnli-dev_dot_accuracy_threshold": 369.0775146484375, "eval_Qnli-dev_dot_ap": 0.660954834977831, "eval_Qnli-dev_dot_f1": 0.6806451612903225, "eval_Qnli-dev_dot_f1_threshold": 334.06298828125, "eval_Qnli-dev_dot_precision": 0.5494791666666666, "eval_Qnli-dev_dot_recall": 0.8940677966101694, "eval_Qnli-dev_euclidean_accuracy": 0.697265625, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.520076751708984, "eval_Qnli-dev_euclidean_ap": 0.74077949307834, "eval_Qnli-dev_euclidean_f1": 0.6872852233676976, "eval_Qnli-dev_euclidean_f1_threshold": 16.351831436157227, "eval_Qnli-dev_euclidean_precision": 0.5780346820809249, "eval_Qnli-dev_euclidean_recall": 0.847457627118644, "eval_Qnli-dev_manhattan_accuracy": 0.703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 281.56951904296875, "eval_Qnli-dev_manhattan_ap": 0.7456832832042077, "eval_Qnli-dev_manhattan_f1": 0.6857142857142857, "eval_Qnli-dev_manhattan_f1_threshold": 342.68011474609375, "eval_Qnli-dev_manhattan_precision": 0.5682451253481894, "eval_Qnli-dev_manhattan_recall": 0.864406779661017, "eval_Qnli-dev_max_accuracy": 0.703125, "eval_Qnli-dev_max_accuracy_threshold": 369.0775146484375, "eval_Qnli-dev_max_ap": 0.7456832832042077, "eval_Qnli-dev_max_f1": 0.6912280701754385, "eval_Qnli-dev_max_f1_threshold": 342.68011474609375, "eval_Qnli-dev_max_precision": 0.5898203592814372, "eval_Qnli-dev_max_recall": 0.8940677966101694, "eval_allNLI-dev_cosine_accuracy": 0.73828125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8777043223381042, "eval_allNLI-dev_cosine_ap": 0.6009867807750411, "eval_allNLI-dev_cosine_f1": 0.6055045871559632, "eval_allNLI-dev_cosine_f1_threshold": 0.7909231185913086, "eval_allNLI-dev_cosine_precision": 0.5019011406844106, "eval_allNLI-dev_cosine_recall": 0.7630057803468208, "eval_allNLI-dev_dot_accuracy": 0.689453125, "eval_allNLI-dev_dot_accuracy_threshold": 425.19970703125, "eval_allNLI-dev_dot_ap": 0.5083264191429433, "eval_allNLI-dev_dot_f1": 0.580046403712297, "eval_allNLI-dev_dot_f1_threshold": 373.0705261230469, "eval_allNLI-dev_dot_precision": 0.4844961240310077, "eval_allNLI-dev_dot_recall": 0.7225433526011561, "eval_allNLI-dev_euclidean_accuracy": 0.73828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.884292602539062, "eval_allNLI-dev_euclidean_ap": 0.6025702809405632, "eval_allNLI-dev_euclidean_f1": 0.6088888888888888, "eval_allNLI-dev_euclidean_f1_threshold": 14.338363647460938, "eval_allNLI-dev_euclidean_precision": 0.49458483754512633, "eval_allNLI-dev_euclidean_recall": 0.791907514450867, "eval_allNLI-dev_manhattan_accuracy": 0.73046875, "eval_allNLI-dev_manhattan_accuracy_threshold": 229.49404907226562, "eval_allNLI-dev_manhattan_ap": 0.6012372908721144, "eval_allNLI-dev_manhattan_f1": 0.6054279749478079, "eval_allNLI-dev_manhattan_f1_threshold": 306.45465087890625, "eval_allNLI-dev_manhattan_precision": 0.4738562091503268, "eval_allNLI-dev_manhattan_recall": 0.838150289017341, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 425.19970703125, "eval_allNLI-dev_max_ap": 0.6025702809405632, "eval_allNLI-dev_max_f1": 0.6088888888888888, "eval_allNLI-dev_max_f1_threshold": 373.0705261230469, "eval_allNLI-dev_max_precision": 0.5019011406844106, "eval_allNLI-dev_max_recall": 0.838150289017341, "eval_sequential_score": 0.7456832832042077, "eval_sts-test_pearson_cosine": 0.8134514242079779, "eval_sts-test_pearson_dot": 0.7960002284292096, "eval_sts-test_pearson_euclidean": 0.8455332055791209, "eval_sts-test_pearson_manhattan": 0.8451588222378992, "eval_sts-test_pearson_max": 0.8455332055791209, "eval_sts-test_spearman_cosine": 0.8487256175437097, "eval_sts-test_spearman_dot": 0.7791035741264715, "eval_sts-test_spearman_euclidean": 0.8429671594816486, "eval_sts-test_spearman_manhattan": 0.8425387556563487, "eval_sts-test_spearman_max": 0.8487256175437097, "eval_vitaminc-pairs_loss": 3.199394941329956, "eval_vitaminc-pairs_runtime": 3.1982, "eval_vitaminc-pairs_samples_per_second": 40.023, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 1000 }, { "epoch": 1.02880658436214, "eval_negation-triplets_loss": 1.0285611152648926, "eval_negation-triplets_runtime": 0.7516, "eval_negation-triplets_samples_per_second": 170.306, "eval_negation-triplets_steps_per_second": 1.331, "step": 1000 }, { "epoch": 1.02880658436214, "eval_scitail-pairs-pos_loss": 0.2147119641304016, "eval_scitail-pairs-pos_runtime": 0.8507, "eval_scitail-pairs-pos_samples_per_second": 150.461, "eval_scitail-pairs-pos_steps_per_second": 1.175, "step": 1000 }, { "epoch": 1.02880658436214, "eval_scitail-pairs-qa_loss": 0.002570149954408407, "eval_scitail-pairs-qa_runtime": 0.5847, "eval_scitail-pairs-qa_samples_per_second": 218.899, "eval_scitail-pairs-qa_steps_per_second": 1.71, "step": 1000 }, { "epoch": 1.02880658436214, "eval_xsum-pairs_loss": 0.5069864988327026, "eval_xsum-pairs_runtime": 3.0253, "eval_xsum-pairs_samples_per_second": 42.31, "eval_xsum-pairs_steps_per_second": 0.331, "step": 1000 }, { "epoch": 1.02880658436214, "eval_sciq_pairs_loss": 0.11535051465034485, "eval_sciq_pairs_runtime": 3.4646, "eval_sciq_pairs_samples_per_second": 36.945, "eval_sciq_pairs_steps_per_second": 0.289, "step": 1000 }, { "epoch": 1.02880658436214, "eval_qasc_pairs_loss": 0.40786802768707275, "eval_qasc_pairs_runtime": 0.6169, "eval_qasc_pairs_samples_per_second": 207.5, "eval_qasc_pairs_steps_per_second": 1.621, "step": 1000 }, { "epoch": 1.02880658436214, "eval_openbookqa_pairs_loss": 1.0345005989074707, "eval_openbookqa_pairs_runtime": 0.5841, "eval_openbookqa_pairs_samples_per_second": 219.127, "eval_openbookqa_pairs_steps_per_second": 1.712, "step": 1000 }, { "epoch": 1.02880658436214, "eval_msmarco_pairs_loss": 1.0275574922561646, "eval_msmarco_pairs_runtime": 1.52, "eval_msmarco_pairs_samples_per_second": 84.212, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 1000 }, { "epoch": 1.02880658436214, "eval_nq_pairs_loss": 1.2776100635528564, "eval_nq_pairs_runtime": 2.8968, "eval_nq_pairs_samples_per_second": 44.187, "eval_nq_pairs_steps_per_second": 0.345, "step": 1000 }, { "epoch": 1.02880658436214, "eval_trivia_pairs_loss": 1.0063875913619995, "eval_trivia_pairs_runtime": 3.4646, "eval_trivia_pairs_samples_per_second": 36.945, "eval_trivia_pairs_steps_per_second": 0.289, "step": 1000 }, { "epoch": 1.02880658436214, "eval_gooaq_pairs_loss": 0.5649043321609497, "eval_gooaq_pairs_runtime": 0.9541, "eval_gooaq_pairs_samples_per_second": 134.155, "eval_gooaq_pairs_steps_per_second": 1.048, "step": 1000 }, { "epoch": 1.02880658436214, "eval_paws-pos_loss": 0.025881968438625336, "eval_paws-pos_runtime": 0.6906, "eval_paws-pos_samples_per_second": 185.358, "eval_paws-pos_steps_per_second": 1.448, "step": 1000 }, { "epoch": 1.02880658436214, "eval_global_dataset_loss": 0.5549957752227783, "eval_global_dataset_runtime": 13.4023, "eval_global_dataset_samples_per_second": 31.039, "eval_global_dataset_steps_per_second": 0.298, "step": 1000 }, { "epoch": 1.0298353909465021, "grad_norm": 9.308526039123535, "learning_rate": 3.498816809444888e-05, "loss": 0.523, "step": 1001 }, { "epoch": 1.0308641975308641, "grad_norm": 13.073616981506348, "learning_rate": 3.498748244951788e-05, "loss": 0.9026, "step": 1002 }, { "epoch": 1.0318930041152263, "grad_norm": 11.270973205566406, "learning_rate": 3.498677750465644e-05, "loss": 0.7104, "step": 1003 }, { "epoch": 1.0329218106995885, "grad_norm": 12.925529479980469, "learning_rate": 3.4986053261032e-05, "loss": 1.1418, "step": 1004 }, { "epoch": 1.0339506172839505, "grad_norm": 14.090717315673828, "learning_rate": 3.498530971984395e-05, "loss": 1.2133, "step": 1005 }, { "epoch": 1.0349794238683128, "grad_norm": 12.810900688171387, "learning_rate": 3.498454688232363e-05, "loss": 1.1203, "step": 1006 }, { "epoch": 1.036008230452675, "grad_norm": 15.513686180114746, "learning_rate": 3.498376474973436e-05, "loss": 2.5254, "step": 1007 }, { "epoch": 1.037037037037037, "grad_norm": 6.373301982879639, "learning_rate": 3.498296332337137e-05, "loss": 0.3947, "step": 1008 }, { "epoch": 1.0380658436213992, "grad_norm": 12.136091232299805, "learning_rate": 3.498214260456188e-05, "loss": 1.0774, "step": 1009 }, { "epoch": 1.0390946502057614, "grad_norm": 12.024189949035645, "learning_rate": 3.4981302594665046e-05, "loss": 1.0608, "step": 1010 }, { "epoch": 1.0401234567901234, "grad_norm": 10.587035179138184, "learning_rate": 3.4980443295071976e-05, "loss": 0.9121, "step": 1011 }, { "epoch": 1.0411522633744856, "grad_norm": 5.42789888381958, "learning_rate": 3.497956470720571e-05, "loss": 0.2954, "step": 1012 }, { "epoch": 1.0421810699588478, "grad_norm": 11.198415756225586, "learning_rate": 3.497866683252125e-05, "loss": 1.2817, "step": 1013 }, { "epoch": 1.0432098765432098, "grad_norm": 9.719239234924316, "learning_rate": 3.497774967250552e-05, "loss": 0.8201, "step": 1014 }, { "epoch": 1.044238683127572, "grad_norm": 7.551983833312988, "learning_rate": 3.497681322867739e-05, "loss": 0.4308, "step": 1015 }, { "epoch": 1.045267489711934, "grad_norm": 12.508809089660645, "learning_rate": 3.497585750258767e-05, "loss": 1.2293, "step": 1016 }, { "epoch": 1.0462962962962963, "grad_norm": 6.985977649688721, "learning_rate": 3.4974882495819084e-05, "loss": 0.4804, "step": 1017 }, { "epoch": 1.0473251028806585, "grad_norm": 12.973464012145996, "learning_rate": 3.4973888209986306e-05, "loss": 1.3674, "step": 1018 }, { "epoch": 1.0483539094650205, "grad_norm": 11.896876335144043, "learning_rate": 3.497287464673593e-05, "loss": 0.9312, "step": 1019 }, { "epoch": 1.0493827160493827, "grad_norm": 8.766112327575684, "learning_rate": 3.497184180774647e-05, "loss": 0.7554, "step": 1020 }, { "epoch": 1.0493827160493827, "eval_Qnli-dev_cosine_accuracy": 0.697265625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7943562269210815, "eval_Qnli-dev_cosine_ap": 0.7559444632593828, "eval_Qnli-dev_cosine_f1": 0.7058823529411764, "eval_Qnli-dev_cosine_f1_threshold": 0.7659621238708496, "eval_Qnli-dev_cosine_precision": 0.6233766233766234, "eval_Qnli-dev_cosine_recall": 0.8135593220338984, "eval_Qnli-dev_dot_accuracy": 0.662109375, "eval_Qnli-dev_dot_accuracy_threshold": 394.54266357421875, "eval_Qnli-dev_dot_ap": 0.6915752876288561, "eval_Qnli-dev_dot_f1": 0.6793760831889082, "eval_Qnli-dev_dot_f1_threshold": 365.2876281738281, "eval_Qnli-dev_dot_precision": 0.5747800586510264, "eval_Qnli-dev_dot_recall": 0.8305084745762712, "eval_Qnli-dev_euclidean_accuracy": 0.708984375, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.502662658691406, "eval_Qnli-dev_euclidean_ap": 0.7624762126096433, "eval_Qnli-dev_euclidean_f1": 0.7111913357400722, "eval_Qnli-dev_euclidean_f1_threshold": 15.483503341674805, "eval_Qnli-dev_euclidean_precision": 0.6194968553459119, "eval_Qnli-dev_euclidean_recall": 0.8347457627118644, "eval_Qnli-dev_manhattan_accuracy": 0.70703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 280.207763671875, "eval_Qnli-dev_manhattan_ap": 0.7642042868629957, "eval_Qnli-dev_manhattan_f1": 0.6996466431095405, "eval_Qnli-dev_manhattan_f1_threshold": 327.005126953125, "eval_Qnli-dev_manhattan_precision": 0.6, "eval_Qnli-dev_manhattan_recall": 0.8389830508474576, "eval_Qnli-dev_max_accuracy": 0.708984375, "eval_Qnli-dev_max_accuracy_threshold": 394.54266357421875, "eval_Qnli-dev_max_ap": 0.7642042868629957, "eval_Qnli-dev_max_f1": 0.7111913357400722, "eval_Qnli-dev_max_f1_threshold": 365.2876281738281, "eval_Qnli-dev_max_precision": 0.6233766233766234, "eval_Qnli-dev_max_recall": 0.8389830508474576, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8626978397369385, "eval_allNLI-dev_cosine_ap": 0.6056903051173465, "eval_allNLI-dev_cosine_f1": 0.6147186147186148, "eval_allNLI-dev_cosine_f1_threshold": 0.770087718963623, "eval_allNLI-dev_cosine_precision": 0.4913494809688581, "eval_allNLI-dev_cosine_recall": 0.8208092485549133, "eval_allNLI-dev_dot_accuracy": 0.693359375, "eval_allNLI-dev_dot_accuracy_threshold": 426.302490234375, "eval_allNLI-dev_dot_ap": 0.5246631021855179, "eval_allNLI-dev_dot_f1": 0.5949367088607596, "eval_allNLI-dev_dot_f1_threshold": 351.2099304199219, "eval_allNLI-dev_dot_precision": 0.4684385382059801, "eval_allNLI-dev_dot_recall": 0.815028901734104, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.542158126831055, "eval_allNLI-dev_euclidean_ap": 0.6113964262053992, "eval_allNLI-dev_euclidean_f1": 0.6205357142857143, "eval_allNLI-dev_euclidean_f1_threshold": 14.319332122802734, "eval_allNLI-dev_euclidean_precision": 0.5054545454545455, "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, "eval_allNLI-dev_manhattan_accuracy": 0.734375, "eval_allNLI-dev_manhattan_accuracy_threshold": 234.59854125976562, "eval_allNLI-dev_manhattan_ap": 0.6096757095213863, "eval_allNLI-dev_manhattan_f1": 0.6198347107438017, "eval_allNLI-dev_manhattan_f1_threshold": 315.2457580566406, "eval_allNLI-dev_manhattan_precision": 0.48231511254019294, "eval_allNLI-dev_manhattan_recall": 0.8670520231213873, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 426.302490234375, "eval_allNLI-dev_max_ap": 0.6113964262053992, "eval_allNLI-dev_max_f1": 0.6205357142857143, "eval_allNLI-dev_max_f1_threshold": 351.2099304199219, "eval_allNLI-dev_max_precision": 0.5054545454545455, "eval_allNLI-dev_max_recall": 0.8670520231213873, "eval_sequential_score": 0.7642042868629957, "eval_sts-test_pearson_cosine": 0.8180272631352716, "eval_sts-test_pearson_dot": 0.800107028188555, "eval_sts-test_pearson_euclidean": 0.8465297896031135, "eval_sts-test_pearson_manhattan": 0.8446697206380651, "eval_sts-test_pearson_max": 0.8465297896031135, "eval_sts-test_spearman_cosine": 0.8460061728914771, "eval_sts-test_spearman_dot": 0.7791330652077285, "eval_sts-test_spearman_euclidean": 0.8418238095043598, "eval_sts-test_spearman_manhattan": 0.8409772498926187, "eval_sts-test_spearman_max": 0.8460061728914771, "eval_vitaminc-pairs_loss": 2.937673807144165, "eval_vitaminc-pairs_runtime": 3.1958, "eval_vitaminc-pairs_samples_per_second": 40.052, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 1020 }, { "epoch": 1.0493827160493827, "eval_negation-triplets_loss": 1.0229724645614624, "eval_negation-triplets_runtime": 0.7702, "eval_negation-triplets_samples_per_second": 166.188, "eval_negation-triplets_steps_per_second": 1.298, "step": 1020 }, { "epoch": 1.0493827160493827, "eval_scitail-pairs-pos_loss": 0.19694292545318604, "eval_scitail-pairs-pos_runtime": 0.8468, "eval_scitail-pairs-pos_samples_per_second": 151.149, "eval_scitail-pairs-pos_steps_per_second": 1.181, "step": 1020 }, { "epoch": 1.0493827160493827, "eval_scitail-pairs-qa_loss": 0.001685372437350452, "eval_scitail-pairs-qa_runtime": 0.5889, "eval_scitail-pairs-qa_samples_per_second": 217.351, "eval_scitail-pairs-qa_steps_per_second": 1.698, "step": 1020 }, { "epoch": 1.0493827160493827, "eval_xsum-pairs_loss": 0.44889509677886963, "eval_xsum-pairs_runtime": 3.0362, "eval_xsum-pairs_samples_per_second": 42.158, "eval_xsum-pairs_steps_per_second": 0.329, "step": 1020 }, { "epoch": 1.0493827160493827, "eval_sciq_pairs_loss": 0.11442571133375168, "eval_sciq_pairs_runtime": 3.4567, "eval_sciq_pairs_samples_per_second": 37.03, "eval_sciq_pairs_steps_per_second": 0.289, "step": 1020 }, { "epoch": 1.0493827160493827, "eval_qasc_pairs_loss": 0.3323410153388977, "eval_qasc_pairs_runtime": 0.612, "eval_qasc_pairs_samples_per_second": 209.163, "eval_qasc_pairs_steps_per_second": 1.634, "step": 1020 }, { "epoch": 1.0493827160493827, "eval_openbookqa_pairs_loss": 0.9421266317367554, "eval_openbookqa_pairs_runtime": 0.581, "eval_openbookqa_pairs_samples_per_second": 220.316, "eval_openbookqa_pairs_steps_per_second": 1.721, "step": 1020 }, { "epoch": 1.0493827160493827, "eval_msmarco_pairs_loss": 1.033428430557251, "eval_msmarco_pairs_runtime": 1.5117, "eval_msmarco_pairs_samples_per_second": 84.675, "eval_msmarco_pairs_steps_per_second": 0.662, "step": 1020 }, { "epoch": 1.0493827160493827, "eval_nq_pairs_loss": 1.0520647764205933, "eval_nq_pairs_runtime": 2.8939, "eval_nq_pairs_samples_per_second": 44.231, "eval_nq_pairs_steps_per_second": 0.346, "step": 1020 }, { "epoch": 1.0493827160493827, "eval_trivia_pairs_loss": 1.0933111906051636, "eval_trivia_pairs_runtime": 3.4373, "eval_trivia_pairs_samples_per_second": 37.238, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1020 }, { "epoch": 1.0493827160493827, "eval_gooaq_pairs_loss": 0.4880179166793823, "eval_gooaq_pairs_runtime": 0.9487, "eval_gooaq_pairs_samples_per_second": 134.922, "eval_gooaq_pairs_steps_per_second": 1.054, "step": 1020 }, { "epoch": 1.0493827160493827, "eval_paws-pos_loss": 0.026863040402531624, "eval_paws-pos_runtime": 0.6905, "eval_paws-pos_samples_per_second": 185.373, "eval_paws-pos_steps_per_second": 1.448, "step": 1020 }, { "epoch": 1.0493827160493827, "eval_global_dataset_loss": 0.5200657844543457, "eval_global_dataset_runtime": 13.3653, "eval_global_dataset_samples_per_second": 31.125, "eval_global_dataset_steps_per_second": 0.299, "step": 1020 }, { "epoch": 1.050411522633745, "grad_norm": 0.5952563881874084, "learning_rate": 3.497078969472837e-05, "loss": 0.027, "step": 1021 }, { "epoch": 1.051440329218107, "grad_norm": 1.7833058834075928, "learning_rate": 3.496971830942398e-05, "loss": 0.0478, "step": 1022 }, { "epoch": 1.0524691358024691, "grad_norm": 10.134953498840332, "learning_rate": 3.4968627653607597e-05, "loss": 0.4828, "step": 1023 }, { "epoch": 1.0534979423868314, "grad_norm": 7.823026657104492, "learning_rate": 3.496751772908539e-05, "loss": 0.4244, "step": 1024 }, { "epoch": 1.0545267489711934, "grad_norm": 9.383576393127441, "learning_rate": 3.4966388537695456e-05, "loss": 0.4789, "step": 1025 }, { "epoch": 1.0555555555555556, "grad_norm": 8.64692211151123, "learning_rate": 3.496524008130781e-05, "loss": 0.4748, "step": 1026 }, { "epoch": 1.0565843621399178, "grad_norm": 10.123908996582031, "learning_rate": 3.496407236182434e-05, "loss": 0.9816, "step": 1027 }, { "epoch": 1.0576131687242798, "grad_norm": 8.501023292541504, "learning_rate": 3.4962885381178896e-05, "loss": 0.417, "step": 1028 }, { "epoch": 1.058641975308642, "grad_norm": 2.9080381393432617, "learning_rate": 3.496167914133714e-05, "loss": 0.0292, "step": 1029 }, { "epoch": 1.059670781893004, "grad_norm": 15.60783576965332, "learning_rate": 3.49604536442967e-05, "loss": 1.287, "step": 1030 }, { "epoch": 1.0606995884773662, "grad_norm": 1.720328450202942, "learning_rate": 3.495920889208707e-05, "loss": 0.0098, "step": 1031 }, { "epoch": 1.0617283950617284, "grad_norm": 15.75252628326416, "learning_rate": 3.495794488676961e-05, "loss": 1.3394, "step": 1032 }, { "epoch": 1.0627572016460904, "grad_norm": 2.560807704925537, "learning_rate": 3.49566616304376e-05, "loss": 0.0586, "step": 1033 }, { "epoch": 1.0637860082304527, "grad_norm": 1.728555679321289, "learning_rate": 3.495535912521618e-05, "loss": 0.0479, "step": 1034 }, { "epoch": 1.0648148148148149, "grad_norm": 13.202831268310547, "learning_rate": 3.4954037373262374e-05, "loss": 1.0693, "step": 1035 }, { "epoch": 1.0658436213991769, "grad_norm": 13.138431549072266, "learning_rate": 3.495269637676508e-05, "loss": 1.1871, "step": 1036 }, { "epoch": 1.066872427983539, "grad_norm": 10.88890552520752, "learning_rate": 3.4951336137945066e-05, "loss": 0.6046, "step": 1037 }, { "epoch": 1.0679012345679013, "grad_norm": 11.555718421936035, "learning_rate": 3.494995665905495e-05, "loss": 0.9365, "step": 1038 }, { "epoch": 1.0689300411522633, "grad_norm": 8.685961723327637, "learning_rate": 3.494855794237925e-05, "loss": 0.4727, "step": 1039 }, { "epoch": 1.0699588477366255, "grad_norm": 7.188544750213623, "learning_rate": 3.494713999023429e-05, "loss": 0.5346, "step": 1040 }, { "epoch": 1.0699588477366255, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8146058320999146, "eval_Qnli-dev_cosine_ap": 0.76276523537112, "eval_Qnli-dev_cosine_f1": 0.6946107784431137, "eval_Qnli-dev_cosine_f1_threshold": 0.773271381855011, "eval_Qnli-dev_cosine_precision": 0.6566037735849056, "eval_Qnli-dev_cosine_recall": 0.7372881355932204, "eval_Qnli-dev_dot_accuracy": 0.671875, "eval_Qnli-dev_dot_accuracy_threshold": 380.0313720703125, "eval_Qnli-dev_dot_ap": 0.6987894282612087, "eval_Qnli-dev_dot_f1": 0.6859205776173285, "eval_Qnli-dev_dot_f1_threshold": 350.40252685546875, "eval_Qnli-dev_dot_precision": 0.5974842767295597, "eval_Qnli-dev_dot_recall": 0.8050847457627118, "eval_Qnli-dev_euclidean_accuracy": 0.72265625, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.222099304199219, "eval_Qnli-dev_euclidean_ap": 0.7667034334790043, "eval_Qnli-dev_euclidean_f1": 0.6943396226415095, "eval_Qnli-dev_euclidean_f1_threshold": 15.177824974060059, "eval_Qnli-dev_euclidean_precision": 0.6258503401360545, "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, "eval_Qnli-dev_manhattan_accuracy": 0.71875, "eval_Qnli-dev_manhattan_accuracy_threshold": 273.0506591796875, "eval_Qnli-dev_manhattan_ap": 0.7719711469934153, "eval_Qnli-dev_manhattan_f1": 0.6953125, "eval_Qnli-dev_manhattan_f1_threshold": 310.35589599609375, "eval_Qnli-dev_manhattan_precision": 0.644927536231884, "eval_Qnli-dev_manhattan_recall": 0.7542372881355932, "eval_Qnli-dev_max_accuracy": 0.72265625, "eval_Qnli-dev_max_accuracy_threshold": 380.0313720703125, "eval_Qnli-dev_max_ap": 0.7719711469934153, "eval_Qnli-dev_max_f1": 0.6953125, "eval_Qnli-dev_max_f1_threshold": 350.40252685546875, "eval_Qnli-dev_max_precision": 0.6566037735849056, "eval_Qnli-dev_max_recall": 0.8050847457627118, "eval_allNLI-dev_cosine_accuracy": 0.7265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.888862133026123, "eval_allNLI-dev_cosine_ap": 0.6052222488796517, "eval_allNLI-dev_cosine_f1": 0.6120481927710845, "eval_allNLI-dev_cosine_f1_threshold": 0.8043129444122314, "eval_allNLI-dev_cosine_precision": 0.5247933884297521, "eval_allNLI-dev_cosine_recall": 0.7341040462427746, "eval_allNLI-dev_dot_accuracy": 0.6953125, "eval_allNLI-dev_dot_accuracy_threshold": 416.451171875, "eval_allNLI-dev_dot_ap": 0.5275626318156367, "eval_allNLI-dev_dot_f1": 0.5814432989690721, "eval_allNLI-dev_dot_f1_threshold": 348.996337890625, "eval_allNLI-dev_dot_precision": 0.4519230769230769, "eval_allNLI-dev_dot_recall": 0.815028901734104, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.72227668762207, "eval_allNLI-dev_euclidean_ap": 0.6122355162858166, "eval_allNLI-dev_euclidean_f1": 0.6182669789227166, "eval_allNLI-dev_euclidean_f1_threshold": 13.71673583984375, "eval_allNLI-dev_euclidean_precision": 0.5196850393700787, "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 232.9837646484375, "eval_allNLI-dev_manhattan_ap": 0.61152275886359, "eval_allNLI-dev_manhattan_f1": 0.620253164556962, "eval_allNLI-dev_manhattan_f1_threshold": 302.649169921875, "eval_allNLI-dev_manhattan_precision": 0.4883720930232558, "eval_allNLI-dev_manhattan_recall": 0.8497109826589595, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 416.451171875, "eval_allNLI-dev_max_ap": 0.6122355162858166, "eval_allNLI-dev_max_f1": 0.620253164556962, "eval_allNLI-dev_max_f1_threshold": 348.996337890625, "eval_allNLI-dev_max_precision": 0.5247933884297521, "eval_allNLI-dev_max_recall": 0.8497109826589595, "eval_sequential_score": 0.7719711469934153, "eval_sts-test_pearson_cosine": 0.8236897870440283, "eval_sts-test_pearson_dot": 0.7902900873190821, "eval_sts-test_pearson_euclidean": 0.8533041040673283, "eval_sts-test_pearson_manhattan": 0.851439890014496, "eval_sts-test_pearson_max": 0.8533041040673283, "eval_sts-test_spearman_cosine": 0.8503772523478821, "eval_sts-test_spearman_dot": 0.7656649493356799, "eval_sts-test_spearman_euclidean": 0.848739218522416, "eval_sts-test_spearman_manhattan": 0.8478907827624531, "eval_sts-test_spearman_max": 0.8503772523478821, "eval_vitaminc-pairs_loss": 3.045991897583008, "eval_vitaminc-pairs_runtime": 3.193, "eval_vitaminc-pairs_samples_per_second": 40.088, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 1040 }, { "epoch": 1.0699588477366255, "eval_negation-triplets_loss": 1.0010513067245483, "eval_negation-triplets_runtime": 0.7632, "eval_negation-triplets_samples_per_second": 167.706, "eval_negation-triplets_steps_per_second": 1.31, "step": 1040 }, { "epoch": 1.0699588477366255, "eval_scitail-pairs-pos_loss": 0.18198701739311218, "eval_scitail-pairs-pos_runtime": 0.8414, "eval_scitail-pairs-pos_samples_per_second": 152.126, "eval_scitail-pairs-pos_steps_per_second": 1.188, "step": 1040 }, { "epoch": 1.0699588477366255, "eval_scitail-pairs-qa_loss": 0.0008363102679140866, "eval_scitail-pairs-qa_runtime": 0.5814, "eval_scitail-pairs-qa_samples_per_second": 220.176, "eval_scitail-pairs-qa_steps_per_second": 1.72, "step": 1040 }, { "epoch": 1.0699588477366255, "eval_xsum-pairs_loss": 0.46501919627189636, "eval_xsum-pairs_runtime": 3.0324, "eval_xsum-pairs_samples_per_second": 42.21, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1040 }, { "epoch": 1.0699588477366255, "eval_sciq_pairs_loss": 0.11307297646999359, "eval_sciq_pairs_runtime": 3.466, "eval_sciq_pairs_samples_per_second": 36.93, "eval_sciq_pairs_steps_per_second": 0.289, "step": 1040 }, { "epoch": 1.0699588477366255, "eval_qasc_pairs_loss": 0.29918473958969116, "eval_qasc_pairs_runtime": 0.6151, "eval_qasc_pairs_samples_per_second": 208.085, "eval_qasc_pairs_steps_per_second": 1.626, "step": 1040 }, { "epoch": 1.0699588477366255, "eval_openbookqa_pairs_loss": 0.9877970814704895, "eval_openbookqa_pairs_runtime": 0.5936, "eval_openbookqa_pairs_samples_per_second": 215.639, "eval_openbookqa_pairs_steps_per_second": 1.685, "step": 1040 }, { "epoch": 1.0699588477366255, "eval_msmarco_pairs_loss": 0.9953982830047607, "eval_msmarco_pairs_runtime": 1.5203, "eval_msmarco_pairs_samples_per_second": 84.194, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 1040 }, { "epoch": 1.0699588477366255, "eval_nq_pairs_loss": 1.0526180267333984, "eval_nq_pairs_runtime": 2.8941, "eval_nq_pairs_samples_per_second": 44.227, "eval_nq_pairs_steps_per_second": 0.346, "step": 1040 }, { "epoch": 1.0699588477366255, "eval_trivia_pairs_loss": 1.056014895439148, "eval_trivia_pairs_runtime": 3.458, "eval_trivia_pairs_samples_per_second": 37.016, "eval_trivia_pairs_steps_per_second": 0.289, "step": 1040 }, { "epoch": 1.0699588477366255, "eval_gooaq_pairs_loss": 0.5340279936790466, "eval_gooaq_pairs_runtime": 0.9526, "eval_gooaq_pairs_samples_per_second": 134.374, "eval_gooaq_pairs_steps_per_second": 1.05, "step": 1040 }, { "epoch": 1.0699588477366255, "eval_paws-pos_loss": 0.027126522734761238, "eval_paws-pos_runtime": 0.6936, "eval_paws-pos_samples_per_second": 184.538, "eval_paws-pos_steps_per_second": 1.442, "step": 1040 }, { "epoch": 1.0699588477366255, "eval_global_dataset_loss": 0.5354006290435791, "eval_global_dataset_runtime": 13.4232, "eval_global_dataset_samples_per_second": 30.991, "eval_global_dataset_steps_per_second": 0.298, "step": 1040 }, { "epoch": 1.0709876543209877, "grad_norm": 13.943881034851074, "learning_rate": 3.494570280496831e-05, "loss": 1.2332, "step": 1041 }, { "epoch": 1.0720164609053497, "grad_norm": 13.042203903198242, "learning_rate": 3.4944246388961354e-05, "loss": 1.0986, "step": 1042 }, { "epoch": 1.073045267489712, "grad_norm": 8.240731239318848, "learning_rate": 3.494277074462533e-05, "loss": 0.7022, "step": 1043 }, { "epoch": 1.074074074074074, "grad_norm": 7.854862689971924, "learning_rate": 3.494127587440399e-05, "loss": 0.3632, "step": 1044 }, { "epoch": 1.0751028806584362, "grad_norm": 12.290331840515137, "learning_rate": 3.493976178077293e-05, "loss": 1.0894, "step": 1045 }, { "epoch": 1.0761316872427984, "grad_norm": 6.150963306427002, "learning_rate": 3.493822846623956e-05, "loss": 0.238, "step": 1046 }, { "epoch": 1.0771604938271604, "grad_norm": 1.730427861213684, "learning_rate": 3.493667593334315e-05, "loss": 0.072, "step": 1047 }, { "epoch": 1.0781893004115226, "grad_norm": 14.845075607299805, "learning_rate": 3.4935104184654776e-05, "loss": 1.2267, "step": 1048 }, { "epoch": 1.0792181069958848, "grad_norm": 13.764507293701172, "learning_rate": 3.4933513222777345e-05, "loss": 1.456, "step": 1049 }, { "epoch": 1.0802469135802468, "grad_norm": 5.944631576538086, "learning_rate": 3.493190305034558e-05, "loss": 0.2923, "step": 1050 }, { "epoch": 1.081275720164609, "grad_norm": 8.024051666259766, "learning_rate": 3.4930273670026005e-05, "loss": 0.4242, "step": 1051 }, { "epoch": 1.0823045267489713, "grad_norm": 10.95007610321045, "learning_rate": 3.4928625084516986e-05, "loss": 1.0295, "step": 1052 }, { "epoch": 1.0833333333333333, "grad_norm": 11.2196683883667, "learning_rate": 3.492695729654866e-05, "loss": 0.8706, "step": 1053 }, { "epoch": 1.0843621399176955, "grad_norm": 6.774842739105225, "learning_rate": 3.4925270308882986e-05, "loss": 0.3193, "step": 1054 }, { "epoch": 1.0853909465020577, "grad_norm": 7.2438225746154785, "learning_rate": 3.492356412431371e-05, "loss": 0.2568, "step": 1055 }, { "epoch": 1.0864197530864197, "grad_norm": 8.959449768066406, "learning_rate": 3.4921838745666365e-05, "loss": 0.5477, "step": 1056 }, { "epoch": 1.087448559670782, "grad_norm": 6.107593536376953, "learning_rate": 3.492009417579829e-05, "loss": 0.3996, "step": 1057 }, { "epoch": 1.0884773662551441, "grad_norm": 9.177140235900879, "learning_rate": 3.491833041759857e-05, "loss": 0.4762, "step": 1058 }, { "epoch": 1.0895061728395061, "grad_norm": 6.854634761810303, "learning_rate": 3.49165474739881e-05, "loss": 0.3927, "step": 1059 }, { "epoch": 1.0905349794238683, "grad_norm": 13.550508499145508, "learning_rate": 3.491474534791954e-05, "loss": 1.3277, "step": 1060 }, { "epoch": 1.0905349794238683, "eval_Qnli-dev_cosine_accuracy": 0.705078125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8141260147094727, "eval_Qnli-dev_cosine_ap": 0.7552781802571633, "eval_Qnli-dev_cosine_f1": 0.6927374301675978, "eval_Qnli-dev_cosine_f1_threshold": 0.7760788202285767, "eval_Qnli-dev_cosine_precision": 0.6179401993355482, "eval_Qnli-dev_cosine_recall": 0.788135593220339, "eval_Qnli-dev_dot_accuracy": 0.68359375, "eval_Qnli-dev_dot_accuracy_threshold": 388.2694091796875, "eval_Qnli-dev_dot_ap": 0.6992409939483639, "eval_Qnli-dev_dot_f1": 0.6821705426356589, "eval_Qnli-dev_dot_f1_threshold": 383.9870300292969, "eval_Qnli-dev_dot_precision": 0.6285714285714286, "eval_Qnli-dev_dot_recall": 0.7457627118644068, "eval_Qnli-dev_euclidean_accuracy": 0.708984375, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.442824363708496, "eval_Qnli-dev_euclidean_ap": 0.7601416485652148, "eval_Qnli-dev_euclidean_f1": 0.6981818181818182, "eval_Qnli-dev_euclidean_f1_threshold": 15.075292587280273, "eval_Qnli-dev_euclidean_precision": 0.6114649681528662, "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, "eval_Qnli-dev_manhattan_accuracy": 0.70703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 276.9916687011719, "eval_Qnli-dev_manhattan_ap": 0.7611068790205133, "eval_Qnli-dev_manhattan_f1": 0.6919104991394148, "eval_Qnli-dev_manhattan_f1_threshold": 325.7022399902344, "eval_Qnli-dev_manhattan_precision": 0.5826086956521739, "eval_Qnli-dev_manhattan_recall": 0.8516949152542372, "eval_Qnli-dev_max_accuracy": 0.708984375, "eval_Qnli-dev_max_accuracy_threshold": 388.2694091796875, "eval_Qnli-dev_max_ap": 0.7611068790205133, "eval_Qnli-dev_max_f1": 0.6981818181818182, "eval_Qnli-dev_max_f1_threshold": 383.9870300292969, "eval_Qnli-dev_max_precision": 0.6285714285714286, "eval_Qnli-dev_max_recall": 0.8516949152542372, "eval_allNLI-dev_cosine_accuracy": 0.7265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.874409556388855, "eval_allNLI-dev_cosine_ap": 0.6009933067542925, "eval_allNLI-dev_cosine_f1": 0.6184210526315789, "eval_allNLI-dev_cosine_f1_threshold": 0.7953381538391113, "eval_allNLI-dev_cosine_precision": 0.49823321554770317, "eval_allNLI-dev_cosine_recall": 0.815028901734104, "eval_allNLI-dev_dot_accuracy": 0.69140625, "eval_allNLI-dev_dot_accuracy_threshold": 427.4722900390625, "eval_allNLI-dev_dot_ap": 0.5113560802806598, "eval_allNLI-dev_dot_f1": 0.5847665847665847, "eval_allNLI-dev_dot_f1_threshold": 381.553955078125, "eval_allNLI-dev_dot_precision": 0.5085470085470085, "eval_allNLI-dev_dot_recall": 0.6878612716763006, "eval_allNLI-dev_euclidean_accuracy": 0.73046875, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.330347061157227, "eval_allNLI-dev_euclidean_ap": 0.6074562934337298, "eval_allNLI-dev_euclidean_f1": 0.6263498920086393, "eval_allNLI-dev_euclidean_f1_threshold": 13.941089630126953, "eval_allNLI-dev_euclidean_precision": 0.5, "eval_allNLI-dev_euclidean_recall": 0.838150289017341, "eval_allNLI-dev_manhattan_accuracy": 0.7265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 220.7136688232422, "eval_allNLI-dev_manhattan_ap": 0.6048071745318456, "eval_allNLI-dev_manhattan_f1": 0.6197802197802197, "eval_allNLI-dev_manhattan_f1_threshold": 287.2364807128906, "eval_allNLI-dev_manhattan_precision": 0.5, "eval_allNLI-dev_manhattan_recall": 0.815028901734104, "eval_allNLI-dev_max_accuracy": 0.73046875, "eval_allNLI-dev_max_accuracy_threshold": 427.4722900390625, "eval_allNLI-dev_max_ap": 0.6074562934337298, "eval_allNLI-dev_max_f1": 0.6263498920086393, "eval_allNLI-dev_max_f1_threshold": 381.553955078125, "eval_allNLI-dev_max_precision": 0.5085470085470085, "eval_allNLI-dev_max_recall": 0.838150289017341, "eval_sequential_score": 0.7611068790205133, "eval_sts-test_pearson_cosine": 0.8075672126172166, "eval_sts-test_pearson_dot": 0.7866606214637348, "eval_sts-test_pearson_euclidean": 0.843653409349988, "eval_sts-test_pearson_manhattan": 0.8406546702228472, "eval_sts-test_pearson_max": 0.843653409349988, "eval_sts-test_spearman_cosine": 0.8475748836082914, "eval_sts-test_spearman_dot": 0.7727337690825425, "eval_sts-test_spearman_euclidean": 0.8438102732462625, "eval_sts-test_spearman_manhattan": 0.8412408795734754, "eval_sts-test_spearman_max": 0.8475748836082914, "eval_vitaminc-pairs_loss": 3.1600735187530518, "eval_vitaminc-pairs_runtime": 3.2213, "eval_vitaminc-pairs_samples_per_second": 39.736, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 1060 }, { "epoch": 1.0905349794238683, "eval_negation-triplets_loss": 0.9576802253723145, "eval_negation-triplets_runtime": 0.7718, "eval_negation-triplets_samples_per_second": 165.854, "eval_negation-triplets_steps_per_second": 1.296, "step": 1060 }, { "epoch": 1.0905349794238683, "eval_scitail-pairs-pos_loss": 0.17336298525333405, "eval_scitail-pairs-pos_runtime": 0.9032, "eval_scitail-pairs-pos_samples_per_second": 141.717, "eval_scitail-pairs-pos_steps_per_second": 1.107, "step": 1060 }, { "epoch": 1.0905349794238683, "eval_scitail-pairs-qa_loss": 0.0017815841129049659, "eval_scitail-pairs-qa_runtime": 0.5931, "eval_scitail-pairs-qa_samples_per_second": 215.827, "eval_scitail-pairs-qa_steps_per_second": 1.686, "step": 1060 }, { "epoch": 1.0905349794238683, "eval_xsum-pairs_loss": 0.4659762680530548, "eval_xsum-pairs_runtime": 3.0276, "eval_xsum-pairs_samples_per_second": 42.278, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1060 }, { "epoch": 1.0905349794238683, "eval_sciq_pairs_loss": 0.11056140810251236, "eval_sciq_pairs_runtime": 3.4934, "eval_sciq_pairs_samples_per_second": 36.641, "eval_sciq_pairs_steps_per_second": 0.286, "step": 1060 }, { "epoch": 1.0905349794238683, "eval_qasc_pairs_loss": 0.31533095240592957, "eval_qasc_pairs_runtime": 0.621, "eval_qasc_pairs_samples_per_second": 206.114, "eval_qasc_pairs_steps_per_second": 1.61, "step": 1060 }, { "epoch": 1.0905349794238683, "eval_openbookqa_pairs_loss": 0.9580376148223877, "eval_openbookqa_pairs_runtime": 0.5902, "eval_openbookqa_pairs_samples_per_second": 216.867, "eval_openbookqa_pairs_steps_per_second": 1.694, "step": 1060 }, { "epoch": 1.0905349794238683, "eval_msmarco_pairs_loss": 0.9790497422218323, "eval_msmarco_pairs_runtime": 1.5363, "eval_msmarco_pairs_samples_per_second": 83.316, "eval_msmarco_pairs_steps_per_second": 0.651, "step": 1060 }, { "epoch": 1.0905349794238683, "eval_nq_pairs_loss": 1.0773346424102783, "eval_nq_pairs_runtime": 2.9103, "eval_nq_pairs_samples_per_second": 43.982, "eval_nq_pairs_steps_per_second": 0.344, "step": 1060 }, { "epoch": 1.0905349794238683, "eval_trivia_pairs_loss": 1.1339694261550903, "eval_trivia_pairs_runtime": 3.4476, "eval_trivia_pairs_samples_per_second": 37.128, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1060 }, { "epoch": 1.0905349794238683, "eval_gooaq_pairs_loss": 0.5519257187843323, "eval_gooaq_pairs_runtime": 0.9565, "eval_gooaq_pairs_samples_per_second": 133.816, "eval_gooaq_pairs_steps_per_second": 1.045, "step": 1060 }, { "epoch": 1.0905349794238683, "eval_paws-pos_loss": 0.023419398814439774, "eval_paws-pos_runtime": 0.6933, "eval_paws-pos_samples_per_second": 184.623, "eval_paws-pos_steps_per_second": 1.442, "step": 1060 }, { "epoch": 1.0905349794238683, "eval_global_dataset_loss": 0.5671343207359314, "eval_global_dataset_runtime": 13.3988, "eval_global_dataset_samples_per_second": 31.047, "eval_global_dataset_steps_per_second": 0.299, "step": 1060 }, { "epoch": 1.0915637860082303, "grad_norm": 8.800882339477539, "learning_rate": 3.49129240423773e-05, "loss": 0.4485, "step": 1061 }, { "epoch": 1.0925925925925926, "grad_norm": 8.650297164916992, "learning_rate": 3.4911083560377576e-05, "loss": 0.7556, "step": 1062 }, { "epoch": 1.0936213991769548, "grad_norm": 1.9387682676315308, "learning_rate": 3.4909223904968304e-05, "loss": 0.0762, "step": 1063 }, { "epoch": 1.0946502057613168, "grad_norm": 13.118877410888672, "learning_rate": 3.490734507922918e-05, "loss": 1.2212, "step": 1064 }, { "epoch": 1.095679012345679, "grad_norm": 8.793366432189941, "learning_rate": 3.490544708627165e-05, "loss": 0.584, "step": 1065 }, { "epoch": 1.0967078189300412, "grad_norm": 15.72206974029541, "learning_rate": 3.490352992923889e-05, "loss": 1.3773, "step": 1066 }, { "epoch": 1.0977366255144032, "grad_norm": 8.144251823425293, "learning_rate": 3.490159361130583e-05, "loss": 0.4408, "step": 1067 }, { "epoch": 1.0987654320987654, "grad_norm": 1.2064863443374634, "learning_rate": 3.4899638135679124e-05, "loss": 0.0213, "step": 1068 }, { "epoch": 1.0997942386831276, "grad_norm": 7.808547496795654, "learning_rate": 3.489766350559714e-05, "loss": 0.3992, "step": 1069 }, { "epoch": 1.1008230452674896, "grad_norm": 14.440876960754395, "learning_rate": 3.489566972432997e-05, "loss": 1.0667, "step": 1070 }, { "epoch": 1.1018518518518519, "grad_norm": 1.1789801120758057, "learning_rate": 3.4893656795179454e-05, "loss": 0.0228, "step": 1071 }, { "epoch": 1.102880658436214, "grad_norm": 11.723593711853027, "learning_rate": 3.489162472147909e-05, "loss": 0.8587, "step": 1072 }, { "epoch": 1.103909465020576, "grad_norm": 10.564098358154297, "learning_rate": 3.488957350659412e-05, "loss": 0.7988, "step": 1073 }, { "epoch": 1.1049382716049383, "grad_norm": 15.705960273742676, "learning_rate": 3.488750315392148e-05, "loss": 1.4636, "step": 1074 }, { "epoch": 1.1059670781893005, "grad_norm": 14.437980651855469, "learning_rate": 3.488541366688978e-05, "loss": 1.381, "step": 1075 }, { "epoch": 1.1069958847736625, "grad_norm": 1.2517554759979248, "learning_rate": 3.488330504895932e-05, "loss": 0.0192, "step": 1076 }, { "epoch": 1.1080246913580247, "grad_norm": 9.632847785949707, "learning_rate": 3.4881177303622115e-05, "loss": 0.9076, "step": 1077 }, { "epoch": 1.1090534979423867, "grad_norm": 12.476523399353027, "learning_rate": 3.4879030434401814e-05, "loss": 1.2966, "step": 1078 }, { "epoch": 1.110082304526749, "grad_norm": 7.523418426513672, "learning_rate": 3.487686444485375e-05, "loss": 0.4105, "step": 1079 }, { "epoch": 1.1111111111111112, "grad_norm": 9.408167839050293, "learning_rate": 3.487467933856494e-05, "loss": 0.7761, "step": 1080 }, { "epoch": 1.1111111111111112, "eval_Qnli-dev_cosine_accuracy": 0.728515625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8194748759269714, "eval_Qnli-dev_cosine_ap": 0.7717294481550442, "eval_Qnli-dev_cosine_f1": 0.7117988394584139, "eval_Qnli-dev_cosine_f1_threshold": 0.7828003764152527, "eval_Qnli-dev_cosine_precision": 0.6548042704626335, "eval_Qnli-dev_cosine_recall": 0.7796610169491526, "eval_Qnli-dev_dot_accuracy": 0.671875, "eval_Qnli-dev_dot_accuracy_threshold": 402.6671142578125, "eval_Qnli-dev_dot_ap": 0.6996117845942682, "eval_Qnli-dev_dot_f1": 0.6843971631205673, "eval_Qnli-dev_dot_f1_threshold": 369.4736022949219, "eval_Qnli-dev_dot_precision": 0.5884146341463414, "eval_Qnli-dev_dot_recall": 0.8177966101694916, "eval_Qnli-dev_euclidean_accuracy": 0.736328125, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.2765531539917, "eval_Qnli-dev_euclidean_ap": 0.7778804447820293, "eval_Qnli-dev_euclidean_f1": 0.7123287671232877, "eval_Qnli-dev_euclidean_f1_threshold": 14.59056282043457, "eval_Qnli-dev_euclidean_precision": 0.6618181818181819, "eval_Qnli-dev_euclidean_recall": 0.7711864406779662, "eval_Qnli-dev_manhattan_accuracy": 0.73046875, "eval_Qnli-dev_manhattan_accuracy_threshold": 274.97943115234375, "eval_Qnli-dev_manhattan_ap": 0.7807881360124016, "eval_Qnli-dev_manhattan_f1": 0.7098039215686275, "eval_Qnli-dev_manhattan_f1_threshold": 303.7630615234375, "eval_Qnli-dev_manhattan_precision": 0.6605839416058394, "eval_Qnli-dev_manhattan_recall": 0.7669491525423728, "eval_Qnli-dev_max_accuracy": 0.736328125, "eval_Qnli-dev_max_accuracy_threshold": 402.6671142578125, "eval_Qnli-dev_max_ap": 0.7807881360124016, "eval_Qnli-dev_max_f1": 0.7123287671232877, "eval_Qnli-dev_max_f1_threshold": 369.4736022949219, "eval_Qnli-dev_max_precision": 0.6618181818181819, "eval_Qnli-dev_max_recall": 0.8177966101694916, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8717066049575806, "eval_allNLI-dev_cosine_ap": 0.5998410683079696, "eval_allNLI-dev_cosine_f1": 0.6223175965665236, "eval_allNLI-dev_cosine_f1_threshold": 0.7907012104988098, "eval_allNLI-dev_cosine_precision": 0.4948805460750853, "eval_allNLI-dev_cosine_recall": 0.838150289017341, "eval_allNLI-dev_dot_accuracy": 0.68359375, "eval_allNLI-dev_dot_accuracy_threshold": 451.738525390625, "eval_allNLI-dev_dot_ap": 0.5121578786664749, "eval_allNLI-dev_dot_f1": 0.586433260393873, "eval_allNLI-dev_dot_f1_threshold": 380.15142822265625, "eval_allNLI-dev_dot_precision": 0.47183098591549294, "eval_allNLI-dev_dot_recall": 0.7745664739884393, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.108678817749023, "eval_allNLI-dev_euclidean_ap": 0.6051852582402737, "eval_allNLI-dev_euclidean_f1": 0.6255506607929515, "eval_allNLI-dev_euclidean_f1_threshold": 13.981431007385254, "eval_allNLI-dev_euclidean_precision": 0.505338078291815, "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, "eval_allNLI-dev_manhattan_accuracy": 0.7265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 238.26559448242188, "eval_allNLI-dev_manhattan_ap": 0.6026811141388402, "eval_allNLI-dev_manhattan_f1": 0.6221198156682027, "eval_allNLI-dev_manhattan_f1_threshold": 281.940185546875, "eval_allNLI-dev_manhattan_precision": 0.5172413793103449, "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 451.738525390625, "eval_allNLI-dev_max_ap": 0.6051852582402737, "eval_allNLI-dev_max_f1": 0.6255506607929515, "eval_allNLI-dev_max_f1_threshold": 380.15142822265625, "eval_allNLI-dev_max_precision": 0.5172413793103449, "eval_allNLI-dev_max_recall": 0.838150289017341, "eval_sequential_score": 0.7807881360124016, "eval_sts-test_pearson_cosine": 0.8147331088110416, "eval_sts-test_pearson_dot": 0.7837159177071007, "eval_sts-test_pearson_euclidean": 0.8484296507954121, "eval_sts-test_pearson_manhattan": 0.8460797493325178, "eval_sts-test_pearson_max": 0.8484296507954121, "eval_sts-test_spearman_cosine": 0.8464454065220337, "eval_sts-test_spearman_dot": 0.7560551082157521, "eval_sts-test_spearman_euclidean": 0.845518965908709, "eval_sts-test_spearman_manhattan": 0.842875732654626, "eval_sts-test_spearman_max": 0.8464454065220337, "eval_vitaminc-pairs_loss": 3.086657762527466, "eval_vitaminc-pairs_runtime": 3.224, "eval_vitaminc-pairs_samples_per_second": 39.702, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 1080 }, { "epoch": 1.1111111111111112, "eval_negation-triplets_loss": 0.9766063690185547, "eval_negation-triplets_runtime": 0.7482, "eval_negation-triplets_samples_per_second": 171.078, "eval_negation-triplets_steps_per_second": 1.337, "step": 1080 }, { "epoch": 1.1111111111111112, "eval_scitail-pairs-pos_loss": 0.1879463493824005, "eval_scitail-pairs-pos_runtime": 0.865, "eval_scitail-pairs-pos_samples_per_second": 147.976, "eval_scitail-pairs-pos_steps_per_second": 1.156, "step": 1080 }, { "epoch": 1.1111111111111112, "eval_scitail-pairs-qa_loss": 0.0011153208324685693, "eval_scitail-pairs-qa_runtime": 0.5942, "eval_scitail-pairs-qa_samples_per_second": 215.424, "eval_scitail-pairs-qa_steps_per_second": 1.683, "step": 1080 }, { "epoch": 1.1111111111111112, "eval_xsum-pairs_loss": 0.510924756526947, "eval_xsum-pairs_runtime": 3.0274, "eval_xsum-pairs_samples_per_second": 42.28, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1080 }, { "epoch": 1.1111111111111112, "eval_sciq_pairs_loss": 0.10803800821304321, "eval_sciq_pairs_runtime": 3.4741, "eval_sciq_pairs_samples_per_second": 36.844, "eval_sciq_pairs_steps_per_second": 0.288, "step": 1080 }, { "epoch": 1.1111111111111112, "eval_qasc_pairs_loss": 0.3036348223686218, "eval_qasc_pairs_runtime": 0.611, "eval_qasc_pairs_samples_per_second": 209.496, "eval_qasc_pairs_steps_per_second": 1.637, "step": 1080 }, { "epoch": 1.1111111111111112, "eval_openbookqa_pairs_loss": 0.962807297706604, "eval_openbookqa_pairs_runtime": 0.5915, "eval_openbookqa_pairs_samples_per_second": 216.396, "eval_openbookqa_pairs_steps_per_second": 1.691, "step": 1080 }, { "epoch": 1.1111111111111112, "eval_msmarco_pairs_loss": 1.006622076034546, "eval_msmarco_pairs_runtime": 1.5255, "eval_msmarco_pairs_samples_per_second": 83.906, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 1080 }, { "epoch": 1.1111111111111112, "eval_nq_pairs_loss": 1.0572720766067505, "eval_nq_pairs_runtime": 2.8974, "eval_nq_pairs_samples_per_second": 44.177, "eval_nq_pairs_steps_per_second": 0.345, "step": 1080 }, { "epoch": 1.1111111111111112, "eval_trivia_pairs_loss": 1.1323765516281128, "eval_trivia_pairs_runtime": 3.4371, "eval_trivia_pairs_samples_per_second": 37.241, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1080 }, { "epoch": 1.1111111111111112, "eval_gooaq_pairs_loss": 0.5294322371482849, "eval_gooaq_pairs_runtime": 0.955, "eval_gooaq_pairs_samples_per_second": 134.034, "eval_gooaq_pairs_steps_per_second": 1.047, "step": 1080 }, { "epoch": 1.1111111111111112, "eval_paws-pos_loss": 0.023268481716513634, "eval_paws-pos_runtime": 0.6979, "eval_paws-pos_samples_per_second": 183.41, "eval_paws-pos_steps_per_second": 1.433, "step": 1080 }, { "epoch": 1.1111111111111112, "eval_global_dataset_loss": 0.5384740233421326, "eval_global_dataset_runtime": 13.4027, "eval_global_dataset_samples_per_second": 31.039, "eval_global_dataset_steps_per_second": 0.298, "step": 1080 }, { "epoch": 1.1121399176954732, "grad_norm": 7.842824935913086, "learning_rate": 3.4872475119154036e-05, "loss": 0.5059, "step": 1081 }, { "epoch": 1.1131687242798354, "grad_norm": 6.858529090881348, "learning_rate": 3.487025179027135e-05, "loss": 0.3282, "step": 1082 }, { "epoch": 1.1141975308641976, "grad_norm": 13.405868530273438, "learning_rate": 3.4868009355598834e-05, "loss": 1.224, "step": 1083 }, { "epoch": 1.1152263374485596, "grad_norm": 7.0010175704956055, "learning_rate": 3.4865747818850104e-05, "loss": 0.4268, "step": 1084 }, { "epoch": 1.1162551440329218, "grad_norm": 6.69268798828125, "learning_rate": 3.4863467183770377e-05, "loss": 0.4377, "step": 1085 }, { "epoch": 1.117283950617284, "grad_norm": 8.589255332946777, "learning_rate": 3.486116745413652e-05, "loss": 0.4703, "step": 1086 }, { "epoch": 1.118312757201646, "grad_norm": 6.45457124710083, "learning_rate": 3.4858848633757014e-05, "loss": 0.3646, "step": 1087 }, { "epoch": 1.1193415637860082, "grad_norm": 7.386177062988281, "learning_rate": 3.485651072647195e-05, "loss": 0.4196, "step": 1088 }, { "epoch": 1.1203703703703705, "grad_norm": 10.973357200622559, "learning_rate": 3.485415373615305e-05, "loss": 0.835, "step": 1089 }, { "epoch": 1.1213991769547325, "grad_norm": 6.898292064666748, "learning_rate": 3.485177766670361e-05, "loss": 0.3469, "step": 1090 }, { "epoch": 1.1224279835390947, "grad_norm": 6.510031700134277, "learning_rate": 3.484938252205855e-05, "loss": 0.3437, "step": 1091 }, { "epoch": 1.123456790123457, "grad_norm": 8.573420524597168, "learning_rate": 3.4846968306184344e-05, "loss": 0.4888, "step": 1092 }, { "epoch": 1.124485596707819, "grad_norm": 8.766611099243164, "learning_rate": 3.484453502307909e-05, "loss": 0.8271, "step": 1093 }, { "epoch": 1.125514403292181, "grad_norm": 10.891227722167969, "learning_rate": 3.484208267677243e-05, "loss": 0.816, "step": 1094 }, { "epoch": 1.126543209876543, "grad_norm": 10.850860595703125, "learning_rate": 3.483961127132559e-05, "loss": 0.7494, "step": 1095 }, { "epoch": 1.1275720164609053, "grad_norm": 8.748696327209473, "learning_rate": 3.483712081083137e-05, "loss": 0.491, "step": 1096 }, { "epoch": 1.1286008230452675, "grad_norm": 8.8179292678833, "learning_rate": 3.4834611299414096e-05, "loss": 0.5567, "step": 1097 }, { "epoch": 1.1296296296296295, "grad_norm": 8.042120933532715, "learning_rate": 3.483208274122967e-05, "loss": 0.3713, "step": 1098 }, { "epoch": 1.1306584362139918, "grad_norm": 7.089449882507324, "learning_rate": 3.4829535140465525e-05, "loss": 0.2869, "step": 1099 }, { "epoch": 1.131687242798354, "grad_norm": 13.324177742004395, "learning_rate": 3.482696850134063e-05, "loss": 1.1908, "step": 1100 }, { "epoch": 1.131687242798354, "eval_Qnli-dev_cosine_accuracy": 0.708984375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7852716445922852, "eval_Qnli-dev_cosine_ap": 0.7579178149830059, "eval_Qnli-dev_cosine_f1": 0.7042801556420233, "eval_Qnli-dev_cosine_f1_threshold": 0.7638626098632812, "eval_Qnli-dev_cosine_precision": 0.6510791366906474, "eval_Qnli-dev_cosine_recall": 0.7669491525423728, "eval_Qnli-dev_dot_accuracy": 0.66015625, "eval_Qnli-dev_dot_accuracy_threshold": 362.07611083984375, "eval_Qnli-dev_dot_ap": 0.6762055307597052, "eval_Qnli-dev_dot_f1": 0.6748091603053434, "eval_Qnli-dev_dot_f1_threshold": 307.3738098144531, "eval_Qnli-dev_dot_precision": 0.5274463007159904, "eval_Qnli-dev_dot_recall": 0.9364406779661016, "eval_Qnli-dev_euclidean_accuracy": 0.720703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.991537094116211, "eval_Qnli-dev_euclidean_ap": 0.7688638848850526, "eval_Qnli-dev_euclidean_f1": 0.7120622568093385, "eval_Qnli-dev_euclidean_f1_threshold": 15.058847427368164, "eval_Qnli-dev_euclidean_precision": 0.658273381294964, "eval_Qnli-dev_euclidean_recall": 0.7754237288135594, "eval_Qnli-dev_manhattan_accuracy": 0.71875, "eval_Qnli-dev_manhattan_accuracy_threshold": 274.3743896484375, "eval_Qnli-dev_manhattan_ap": 0.7729072014366346, "eval_Qnli-dev_manhattan_f1": 0.7126865671641792, "eval_Qnli-dev_manhattan_f1_threshold": 321.27783203125, "eval_Qnli-dev_manhattan_precision": 0.6366666666666667, "eval_Qnli-dev_manhattan_recall": 0.809322033898305, "eval_Qnli-dev_max_accuracy": 0.720703125, "eval_Qnli-dev_max_accuracy_threshold": 362.07611083984375, "eval_Qnli-dev_max_ap": 0.7729072014366346, "eval_Qnli-dev_max_f1": 0.7126865671641792, "eval_Qnli-dev_max_f1_threshold": 321.27783203125, "eval_Qnli-dev_max_precision": 0.658273381294964, "eval_Qnli-dev_max_recall": 0.9364406779661016, "eval_allNLI-dev_cosine_accuracy": 0.73828125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8397592306137085, "eval_allNLI-dev_cosine_ap": 0.6164056434044329, "eval_allNLI-dev_cosine_f1": 0.6271604938271605, "eval_allNLI-dev_cosine_f1_threshold": 0.7978222370147705, "eval_allNLI-dev_cosine_precision": 0.5474137931034483, "eval_allNLI-dev_cosine_recall": 0.7341040462427746, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 391.6920471191406, "eval_allNLI-dev_dot_ap": 0.539862255241969, "eval_allNLI-dev_dot_f1": 0.6, "eval_allNLI-dev_dot_f1_threshold": 339.165771484375, "eval_allNLI-dev_dot_precision": 0.46905537459283386, "eval_allNLI-dev_dot_recall": 0.8323699421965318, "eval_allNLI-dev_euclidean_accuracy": 0.740234375, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.107568740844727, "eval_allNLI-dev_euclidean_ap": 0.6213908286083798, "eval_allNLI-dev_euclidean_f1": 0.6368159203980099, "eval_allNLI-dev_euclidean_f1_threshold": 13.490726470947266, "eval_allNLI-dev_euclidean_precision": 0.5589519650655022, "eval_allNLI-dev_euclidean_recall": 0.7398843930635838, "eval_allNLI-dev_manhattan_accuracy": 0.734375, "eval_allNLI-dev_manhattan_accuracy_threshold": 244.9213409423828, "eval_allNLI-dev_manhattan_ap": 0.6189091075630544, "eval_allNLI-dev_manhattan_f1": 0.6305882352941177, "eval_allNLI-dev_manhattan_f1_threshold": 288.1800231933594, "eval_allNLI-dev_manhattan_precision": 0.5317460317460317, "eval_allNLI-dev_manhattan_recall": 0.7745664739884393, "eval_allNLI-dev_max_accuracy": 0.740234375, "eval_allNLI-dev_max_accuracy_threshold": 391.6920471191406, "eval_allNLI-dev_max_ap": 0.6213908286083798, "eval_allNLI-dev_max_f1": 0.6368159203980099, "eval_allNLI-dev_max_f1_threshold": 339.165771484375, "eval_allNLI-dev_max_precision": 0.5589519650655022, "eval_allNLI-dev_max_recall": 0.8323699421965318, "eval_sequential_score": 0.7729072014366346, "eval_sts-test_pearson_cosine": 0.819789322785147, "eval_sts-test_pearson_dot": 0.8020751242857842, "eval_sts-test_pearson_euclidean": 0.850970032232464, "eval_sts-test_pearson_manhattan": 0.848322515154853, "eval_sts-test_pearson_max": 0.850970032232464, "eval_sts-test_spearman_cosine": 0.8521221935320554, "eval_sts-test_spearman_dot": 0.7862019297058526, "eval_sts-test_spearman_euclidean": 0.849302013790431, "eval_sts-test_spearman_manhattan": 0.8471720327532458, "eval_sts-test_spearman_max": 0.8521221935320554, "eval_vitaminc-pairs_loss": 3.150930404663086, "eval_vitaminc-pairs_runtime": 3.1937, "eval_vitaminc-pairs_samples_per_second": 40.078, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 1100 }, { "epoch": 1.131687242798354, "eval_negation-triplets_loss": 1.010820746421814, "eval_negation-triplets_runtime": 0.748, "eval_negation-triplets_samples_per_second": 171.12, "eval_negation-triplets_steps_per_second": 1.337, "step": 1100 }, { "epoch": 1.131687242798354, "eval_scitail-pairs-pos_loss": 0.1789112091064453, "eval_scitail-pairs-pos_runtime": 0.9574, "eval_scitail-pairs-pos_samples_per_second": 133.702, "eval_scitail-pairs-pos_steps_per_second": 1.045, "step": 1100 }, { "epoch": 1.131687242798354, "eval_scitail-pairs-qa_loss": 0.0007675637607462704, "eval_scitail-pairs-qa_runtime": 0.6066, "eval_scitail-pairs-qa_samples_per_second": 211.01, "eval_scitail-pairs-qa_steps_per_second": 1.649, "step": 1100 }, { "epoch": 1.131687242798354, "eval_xsum-pairs_loss": 0.4392455816268921, "eval_xsum-pairs_runtime": 3.0225, "eval_xsum-pairs_samples_per_second": 42.349, "eval_xsum-pairs_steps_per_second": 0.331, "step": 1100 }, { "epoch": 1.131687242798354, "eval_sciq_pairs_loss": 0.10065167397260666, "eval_sciq_pairs_runtime": 3.4682, "eval_sciq_pairs_samples_per_second": 36.907, "eval_sciq_pairs_steps_per_second": 0.288, "step": 1100 }, { "epoch": 1.131687242798354, "eval_qasc_pairs_loss": 0.2483232170343399, "eval_qasc_pairs_runtime": 0.6184, "eval_qasc_pairs_samples_per_second": 206.995, "eval_qasc_pairs_steps_per_second": 1.617, "step": 1100 }, { "epoch": 1.131687242798354, "eval_openbookqa_pairs_loss": 0.9837387800216675, "eval_openbookqa_pairs_runtime": 0.5881, "eval_openbookqa_pairs_samples_per_second": 217.644, "eval_openbookqa_pairs_steps_per_second": 1.7, "step": 1100 }, { "epoch": 1.131687242798354, "eval_msmarco_pairs_loss": 0.9828016757965088, "eval_msmarco_pairs_runtime": 1.5194, "eval_msmarco_pairs_samples_per_second": 84.246, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 1100 }, { "epoch": 1.131687242798354, "eval_nq_pairs_loss": 0.9649257063865662, "eval_nq_pairs_runtime": 2.9079, "eval_nq_pairs_samples_per_second": 44.018, "eval_nq_pairs_steps_per_second": 0.344, "step": 1100 }, { "epoch": 1.131687242798354, "eval_trivia_pairs_loss": 1.0761849880218506, "eval_trivia_pairs_runtime": 3.4499, "eval_trivia_pairs_samples_per_second": 37.103, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1100 }, { "epoch": 1.131687242798354, "eval_gooaq_pairs_loss": 0.5363913178443909, "eval_gooaq_pairs_runtime": 0.9481, "eval_gooaq_pairs_samples_per_second": 135.008, "eval_gooaq_pairs_steps_per_second": 1.055, "step": 1100 }, { "epoch": 1.131687242798354, "eval_paws-pos_loss": 0.024601487442851067, "eval_paws-pos_runtime": 0.7005, "eval_paws-pos_samples_per_second": 182.735, "eval_paws-pos_steps_per_second": 1.428, "step": 1100 }, { "epoch": 1.131687242798354, "eval_global_dataset_loss": 0.5056115388870239, "eval_global_dataset_runtime": 13.4042, "eval_global_dataset_samples_per_second": 31.035, "eval_global_dataset_steps_per_second": 0.298, "step": 1100 }, { "epoch": 1.132716049382716, "grad_norm": 8.598079681396484, "learning_rate": 3.482438282810549e-05, "loss": 0.5443, "step": 1101 }, { "epoch": 1.1337448559670782, "grad_norm": 9.027626037597656, "learning_rate": 3.482177812504212e-05, "loss": 0.4682, "step": 1102 }, { "epoch": 1.1347736625514404, "grad_norm": 7.5870866775512695, "learning_rate": 3.4819154396464065e-05, "loss": 0.431, "step": 1103 }, { "epoch": 1.1358024691358024, "grad_norm": 13.918652534484863, "learning_rate": 3.4816511646716355e-05, "loss": 1.0068, "step": 1104 }, { "epoch": 1.1368312757201646, "grad_norm": 11.70114803314209, "learning_rate": 3.481384988017555e-05, "loss": 1.0226, "step": 1105 }, { "epoch": 1.1378600823045268, "grad_norm": 7.77272367477417, "learning_rate": 3.481116910124967e-05, "loss": 0.3054, "step": 1106 }, { "epoch": 1.1388888888888888, "grad_norm": 0.5209324955940247, "learning_rate": 3.480846931437824e-05, "loss": 0.0094, "step": 1107 }, { "epoch": 1.139917695473251, "grad_norm": 8.121362686157227, "learning_rate": 3.480575052403227e-05, "loss": 0.43, "step": 1108 }, { "epoch": 1.1409465020576133, "grad_norm": 13.162841796875, "learning_rate": 3.480301273471422e-05, "loss": 1.3944, "step": 1109 }, { "epoch": 1.1419753086419753, "grad_norm": 1.2725361585617065, "learning_rate": 3.480025595095803e-05, "loss": 0.064, "step": 1110 }, { "epoch": 1.1430041152263375, "grad_norm": 6.89387845993042, "learning_rate": 3.4797480177329084e-05, "loss": 0.3928, "step": 1111 }, { "epoch": 1.1440329218106995, "grad_norm": 6.913538455963135, "learning_rate": 3.4794685418424226e-05, "loss": 0.3311, "step": 1112 }, { "epoch": 1.1450617283950617, "grad_norm": 10.783838272094727, "learning_rate": 3.479187167887173e-05, "loss": 1.11, "step": 1113 }, { "epoch": 1.146090534979424, "grad_norm": 16.7984676361084, "learning_rate": 3.4789038963331315e-05, "loss": 1.585, "step": 1114 }, { "epoch": 1.147119341563786, "grad_norm": 9.383954048156738, "learning_rate": 3.478618727649411e-05, "loss": 0.684, "step": 1115 }, { "epoch": 1.1481481481481481, "grad_norm": 12.388830184936523, "learning_rate": 3.4783316623082674e-05, "loss": 1.2634, "step": 1116 }, { "epoch": 1.1491769547325104, "grad_norm": 10.167396545410156, "learning_rate": 3.478042700785097e-05, "loss": 0.5913, "step": 1117 }, { "epoch": 1.1502057613168724, "grad_norm": 6.721423149108887, "learning_rate": 3.477751843558437e-05, "loss": 0.3623, "step": 1118 }, { "epoch": 1.1512345679012346, "grad_norm": 6.884538173675537, "learning_rate": 3.477459091109962e-05, "loss": 0.4191, "step": 1119 }, { "epoch": 1.1522633744855968, "grad_norm": 6.610477924346924, "learning_rate": 3.4771644439244884e-05, "loss": 0.2817, "step": 1120 }, { "epoch": 1.1522633744855968, "eval_Qnli-dev_cosine_accuracy": 0.697265625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8267828822135925, "eval_Qnli-dev_cosine_ap": 0.7544823712371981, "eval_Qnli-dev_cosine_f1": 0.7014388489208634, "eval_Qnli-dev_cosine_f1_threshold": 0.7697343230247498, "eval_Qnli-dev_cosine_precision": 0.609375, "eval_Qnli-dev_cosine_recall": 0.826271186440678, "eval_Qnli-dev_dot_accuracy": 0.658203125, "eval_Qnli-dev_dot_accuracy_threshold": 383.5416259765625, "eval_Qnli-dev_dot_ap": 0.684055735450432, "eval_Qnli-dev_dot_f1": 0.6751361161524502, "eval_Qnli-dev_dot_f1_threshold": 378.13201904296875, "eval_Qnli-dev_dot_precision": 0.5904761904761905, "eval_Qnli-dev_dot_recall": 0.788135593220339, "eval_Qnli-dev_euclidean_accuracy": 0.71875, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.577463150024414, "eval_Qnli-dev_euclidean_ap": 0.762148579885024, "eval_Qnli-dev_euclidean_f1": 0.6947368421052632, "eval_Qnli-dev_euclidean_f1_threshold": 15.448970794677734, "eval_Qnli-dev_euclidean_precision": 0.592814371257485, "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, "eval_Qnli-dev_manhattan_accuracy": 0.720703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 270.780517578125, "eval_Qnli-dev_manhattan_ap": 0.76574766189019, "eval_Qnli-dev_manhattan_f1": 0.7012987012987013, "eval_Qnli-dev_manhattan_f1_threshold": 308.033203125, "eval_Qnli-dev_manhattan_precision": 0.6237623762376238, "eval_Qnli-dev_manhattan_recall": 0.8008474576271186, "eval_Qnli-dev_max_accuracy": 0.720703125, "eval_Qnli-dev_max_accuracy_threshold": 383.5416259765625, "eval_Qnli-dev_max_ap": 0.76574766189019, "eval_Qnli-dev_max_f1": 0.7014388489208634, "eval_Qnli-dev_max_f1_threshold": 378.13201904296875, "eval_Qnli-dev_max_precision": 0.6237623762376238, "eval_Qnli-dev_max_recall": 0.8389830508474576, "eval_allNLI-dev_cosine_accuracy": 0.720703125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.894988477230072, "eval_allNLI-dev_cosine_ap": 0.6092571598538613, "eval_allNLI-dev_cosine_f1": 0.6256157635467982, "eval_allNLI-dev_cosine_f1_threshold": 0.8311195373535156, "eval_allNLI-dev_cosine_precision": 0.5450643776824035, "eval_allNLI-dev_cosine_recall": 0.7341040462427746, "eval_allNLI-dev_dot_accuracy": 0.69140625, "eval_allNLI-dev_dot_accuracy_threshold": 468.3143310546875, "eval_allNLI-dev_dot_ap": 0.5304357149076337, "eval_allNLI-dev_dot_f1": 0.5925925925925927, "eval_allNLI-dev_dot_f1_threshold": 394.60333251953125, "eval_allNLI-dev_dot_precision": 0.4755244755244755, "eval_allNLI-dev_dot_recall": 0.7861271676300579, "eval_allNLI-dev_euclidean_accuracy": 0.732421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.653585433959961, "eval_allNLI-dev_euclidean_ap": 0.6154891058640852, "eval_allNLI-dev_euclidean_f1": 0.6350000000000001, "eval_allNLI-dev_euclidean_f1_threshold": 12.820733070373535, "eval_allNLI-dev_euclidean_precision": 0.5594713656387665, "eval_allNLI-dev_euclidean_recall": 0.7341040462427746, "eval_allNLI-dev_manhattan_accuracy": 0.724609375, "eval_allNLI-dev_manhattan_accuracy_threshold": 224.76516723632812, "eval_allNLI-dev_manhattan_ap": 0.6117858125138088, "eval_allNLI-dev_manhattan_f1": 0.6413301662707839, "eval_allNLI-dev_manhattan_f1_threshold": 272.82696533203125, "eval_allNLI-dev_manhattan_precision": 0.5443548387096774, "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, "eval_allNLI-dev_max_accuracy": 0.732421875, "eval_allNLI-dev_max_accuracy_threshold": 468.3143310546875, "eval_allNLI-dev_max_ap": 0.6154891058640852, "eval_allNLI-dev_max_f1": 0.6413301662707839, "eval_allNLI-dev_max_f1_threshold": 394.60333251953125, "eval_allNLI-dev_max_precision": 0.5594713656387665, "eval_allNLI-dev_max_recall": 0.7861271676300579, "eval_sequential_score": 0.76574766189019, "eval_sts-test_pearson_cosine": 0.8152326265483406, "eval_sts-test_pearson_dot": 0.7716218165990644, "eval_sts-test_pearson_euclidean": 0.8506633646999144, "eval_sts-test_pearson_manhattan": 0.8464190305111586, "eval_sts-test_pearson_max": 0.8506633646999144, "eval_sts-test_spearman_cosine": 0.8489454804909258, "eval_sts-test_spearman_dot": 0.7555045334468536, "eval_sts-test_spearman_euclidean": 0.8484793961035343, "eval_sts-test_spearman_manhattan": 0.8435001470048201, "eval_sts-test_spearman_max": 0.8489454804909258, "eval_vitaminc-pairs_loss": 2.7898471355438232, "eval_vitaminc-pairs_runtime": 3.2051, "eval_vitaminc-pairs_samples_per_second": 39.937, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 1120 }, { "epoch": 1.1522633744855968, "eval_negation-triplets_loss": 0.9653336405754089, "eval_negation-triplets_runtime": 0.7819, "eval_negation-triplets_samples_per_second": 163.71, "eval_negation-triplets_steps_per_second": 1.279, "step": 1120 }, { "epoch": 1.1522633744855968, "eval_scitail-pairs-pos_loss": 0.18886801600456238, "eval_scitail-pairs-pos_runtime": 0.8649, "eval_scitail-pairs-pos_samples_per_second": 147.987, "eval_scitail-pairs-pos_steps_per_second": 1.156, "step": 1120 }, { "epoch": 1.1522633744855968, "eval_scitail-pairs-qa_loss": 0.0019331619841977954, "eval_scitail-pairs-qa_runtime": 0.5969, "eval_scitail-pairs-qa_samples_per_second": 214.449, "eval_scitail-pairs-qa_steps_per_second": 1.675, "step": 1120 }, { "epoch": 1.1522633744855968, "eval_xsum-pairs_loss": 0.39326396584510803, "eval_xsum-pairs_runtime": 3.0216, "eval_xsum-pairs_samples_per_second": 42.362, "eval_xsum-pairs_steps_per_second": 0.331, "step": 1120 }, { "epoch": 1.1522633744855968, "eval_sciq_pairs_loss": 0.09701427817344666, "eval_sciq_pairs_runtime": 3.4815, "eval_sciq_pairs_samples_per_second": 36.766, "eval_sciq_pairs_steps_per_second": 0.287, "step": 1120 }, { "epoch": 1.1522633744855968, "eval_qasc_pairs_loss": 0.2738649249076843, "eval_qasc_pairs_runtime": 0.6237, "eval_qasc_pairs_samples_per_second": 205.243, "eval_qasc_pairs_steps_per_second": 1.603, "step": 1120 }, { "epoch": 1.1522633744855968, "eval_openbookqa_pairs_loss": 1.004379391670227, "eval_openbookqa_pairs_runtime": 0.6019, "eval_openbookqa_pairs_samples_per_second": 212.652, "eval_openbookqa_pairs_steps_per_second": 1.661, "step": 1120 }, { "epoch": 1.1522633744855968, "eval_msmarco_pairs_loss": 0.8981168866157532, "eval_msmarco_pairs_runtime": 1.5342, "eval_msmarco_pairs_samples_per_second": 83.432, "eval_msmarco_pairs_steps_per_second": 0.652, "step": 1120 }, { "epoch": 1.1522633744855968, "eval_nq_pairs_loss": 0.8773314952850342, "eval_nq_pairs_runtime": 2.9229, "eval_nq_pairs_samples_per_second": 43.792, "eval_nq_pairs_steps_per_second": 0.342, "step": 1120 }, { "epoch": 1.1522633744855968, "eval_trivia_pairs_loss": 0.9639384746551514, "eval_trivia_pairs_runtime": 3.4419, "eval_trivia_pairs_samples_per_second": 37.189, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1120 }, { "epoch": 1.1522633744855968, "eval_gooaq_pairs_loss": 0.46417081356048584, "eval_gooaq_pairs_runtime": 0.9594, "eval_gooaq_pairs_samples_per_second": 133.412, "eval_gooaq_pairs_steps_per_second": 1.042, "step": 1120 }, { "epoch": 1.1522633744855968, "eval_paws-pos_loss": 0.023940226063132286, "eval_paws-pos_runtime": 0.6937, "eval_paws-pos_samples_per_second": 184.508, "eval_paws-pos_steps_per_second": 1.441, "step": 1120 }, { "epoch": 1.1522633744855968, "eval_global_dataset_loss": 0.44758284091949463, "eval_global_dataset_runtime": 13.4266, "eval_global_dataset_samples_per_second": 30.983, "eval_global_dataset_steps_per_second": 0.298, "step": 1120 }, { "epoch": 1.1532921810699588, "grad_norm": 6.409517765045166, "learning_rate": 3.476867902489967e-05, "loss": 0.5464, "step": 1121 }, { "epoch": 1.154320987654321, "grad_norm": 0.6689707636833191, "learning_rate": 3.476569467297489e-05, "loss": 0.0391, "step": 1122 }, { "epoch": 1.155349794238683, "grad_norm": 8.022232055664062, "learning_rate": 3.4762691388412786e-05, "loss": 0.4436, "step": 1123 }, { "epoch": 1.1563786008230452, "grad_norm": 7.272008895874023, "learning_rate": 3.475966917618697e-05, "loss": 0.3815, "step": 1124 }, { "epoch": 1.1574074074074074, "grad_norm": 7.061352252960205, "learning_rate": 3.47566280413024e-05, "loss": 0.3715, "step": 1125 }, { "epoch": 1.1584362139917697, "grad_norm": 11.564949035644531, "learning_rate": 3.475356798879536e-05, "loss": 0.7445, "step": 1126 }, { "epoch": 1.1594650205761317, "grad_norm": 8.554880142211914, "learning_rate": 3.475048902373347e-05, "loss": 0.5149, "step": 1127 }, { "epoch": 1.1604938271604939, "grad_norm": 8.931382179260254, "learning_rate": 3.474739115121569e-05, "loss": 0.5129, "step": 1128 }, { "epoch": 1.1615226337448559, "grad_norm": 8.221881866455078, "learning_rate": 3.474427437637224e-05, "loss": 0.6473, "step": 1129 }, { "epoch": 1.162551440329218, "grad_norm": 7.380521297454834, "learning_rate": 3.4741138704364704e-05, "loss": 0.362, "step": 1130 }, { "epoch": 1.1635802469135803, "grad_norm": 13.867860794067383, "learning_rate": 3.4737984140385914e-05, "loss": 1.4168, "step": 1131 }, { "epoch": 1.1646090534979423, "grad_norm": 15.745050430297852, "learning_rate": 3.4734810689660025e-05, "loss": 2.5522, "step": 1132 }, { "epoch": 1.1656378600823045, "grad_norm": 8.687469482421875, "learning_rate": 3.473161835744243e-05, "loss": 0.8093, "step": 1133 }, { "epoch": 1.1666666666666667, "grad_norm": 10.833587646484375, "learning_rate": 3.4728407149019825e-05, "loss": 0.9214, "step": 1134 }, { "epoch": 1.1676954732510287, "grad_norm": 6.623847484588623, "learning_rate": 3.472517706971015e-05, "loss": 0.4376, "step": 1135 }, { "epoch": 1.168724279835391, "grad_norm": 5.630647659301758, "learning_rate": 3.47219281248626e-05, "loss": 0.2709, "step": 1136 }, { "epoch": 1.1697530864197532, "grad_norm": 10.486451148986816, "learning_rate": 3.47186603198576e-05, "loss": 0.9257, "step": 1137 }, { "epoch": 1.1707818930041152, "grad_norm": 6.501081943511963, "learning_rate": 3.471537366010684e-05, "loss": 0.4183, "step": 1138 }, { "epoch": 1.1718106995884774, "grad_norm": 9.043843269348145, "learning_rate": 3.4712068151053196e-05, "loss": 0.7341, "step": 1139 }, { "epoch": 1.1728395061728394, "grad_norm": 7.681128978729248, "learning_rate": 3.4708743798170794e-05, "loss": 0.9119, "step": 1140 }, { "epoch": 1.1728395061728394, "eval_Qnli-dev_cosine_accuracy": 0.724609375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8039048910140991, "eval_Qnli-dev_cosine_ap": 0.7787459564025726, "eval_Qnli-dev_cosine_f1": 0.7116564417177914, "eval_Qnli-dev_cosine_f1_threshold": 0.8039048910140991, "eval_Qnli-dev_cosine_precision": 0.6877470355731226, "eval_Qnli-dev_cosine_recall": 0.7372881355932204, "eval_Qnli-dev_dot_accuracy": 0.66796875, "eval_Qnli-dev_dot_accuracy_threshold": 403.3175964355469, "eval_Qnli-dev_dot_ap": 0.7102945682759505, "eval_Qnli-dev_dot_f1": 0.6881720430107527, "eval_Qnli-dev_dot_f1_threshold": 373.3174743652344, "eval_Qnli-dev_dot_precision": 0.5962732919254659, "eval_Qnli-dev_dot_recall": 0.8135593220338984, "eval_Qnli-dev_euclidean_accuracy": 0.732421875, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.80968189239502, "eval_Qnli-dev_euclidean_ap": 0.7834118400292112, "eval_Qnli-dev_euclidean_f1": 0.7209775967413441, "eval_Qnli-dev_euclidean_f1_threshold": 13.910100936889648, "eval_Qnli-dev_euclidean_precision": 0.6941176470588235, "eval_Qnli-dev_euclidean_recall": 0.75, "eval_Qnli-dev_manhattan_accuracy": 0.732421875, "eval_Qnli-dev_manhattan_accuracy_threshold": 271.7544250488281, "eval_Qnli-dev_manhattan_ap": 0.7864954913072755, "eval_Qnli-dev_manhattan_f1": 0.7164750957854407, "eval_Qnli-dev_manhattan_f1_threshold": 300.81494140625, "eval_Qnli-dev_manhattan_precision": 0.6538461538461539, "eval_Qnli-dev_manhattan_recall": 0.7923728813559322, "eval_Qnli-dev_max_accuracy": 0.732421875, "eval_Qnli-dev_max_accuracy_threshold": 403.3175964355469, "eval_Qnli-dev_max_ap": 0.7864954913072755, "eval_Qnli-dev_max_f1": 0.7209775967413441, "eval_Qnli-dev_max_f1_threshold": 373.3174743652344, "eval_Qnli-dev_max_precision": 0.6941176470588235, "eval_Qnli-dev_max_recall": 0.8135593220338984, "eval_allNLI-dev_cosine_accuracy": 0.7265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8706444501876831, "eval_allNLI-dev_cosine_ap": 0.6094726027413053, "eval_allNLI-dev_cosine_f1": 0.6515837104072397, "eval_allNLI-dev_cosine_f1_threshold": 0.8078594207763672, "eval_allNLI-dev_cosine_precision": 0.5353159851301115, "eval_allNLI-dev_cosine_recall": 0.8323699421965318, "eval_allNLI-dev_dot_accuracy": 0.697265625, "eval_allNLI-dev_dot_accuracy_threshold": 420.6922302246094, "eval_allNLI-dev_dot_ap": 0.543111680181283, "eval_allNLI-dev_dot_f1": 0.6101694915254238, "eval_allNLI-dev_dot_f1_threshold": 378.4337158203125, "eval_allNLI-dev_dot_precision": 0.4816053511705686, "eval_allNLI-dev_dot_recall": 0.8323699421965318, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.275704383850098, "eval_allNLI-dev_euclidean_ap": 0.6133755460229069, "eval_allNLI-dev_euclidean_f1": 0.6530612244897959, "eval_allNLI-dev_euclidean_f1_threshold": 13.450389862060547, "eval_allNLI-dev_euclidean_precision": 0.5373134328358209, "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, "eval_allNLI-dev_manhattan_accuracy": 0.724609375, "eval_allNLI-dev_manhattan_accuracy_threshold": 232.1541748046875, "eval_allNLI-dev_manhattan_ap": 0.6111577829536877, "eval_allNLI-dev_manhattan_f1": 0.6387665198237885, "eval_allNLI-dev_manhattan_f1_threshold": 284.5680847167969, "eval_allNLI-dev_manhattan_precision": 0.5160142348754448, "eval_allNLI-dev_manhattan_recall": 0.838150289017341, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 420.6922302246094, "eval_allNLI-dev_max_ap": 0.6133755460229069, "eval_allNLI-dev_max_f1": 0.6530612244897959, "eval_allNLI-dev_max_f1_threshold": 378.4337158203125, "eval_allNLI-dev_max_precision": 0.5373134328358209, "eval_allNLI-dev_max_recall": 0.838150289017341, "eval_sequential_score": 0.7864954913072755, "eval_sts-test_pearson_cosine": 0.8100150301098641, "eval_sts-test_pearson_dot": 0.7901249254403291, "eval_sts-test_pearson_euclidean": 0.8439240945658539, "eval_sts-test_pearson_manhattan": 0.8398801696817724, "eval_sts-test_pearson_max": 0.8439240945658539, "eval_sts-test_spearman_cosine": 0.8440087415800419, "eval_sts-test_spearman_dot": 0.7766431945264338, "eval_sts-test_spearman_euclidean": 0.8403773592336669, "eval_sts-test_spearman_manhattan": 0.8364035367246335, "eval_sts-test_spearman_max": 0.8440087415800419, "eval_vitaminc-pairs_loss": 2.794574737548828, "eval_vitaminc-pairs_runtime": 3.2146, "eval_vitaminc-pairs_samples_per_second": 39.819, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 1140 }, { "epoch": 1.1728395061728394, "eval_negation-triplets_loss": 0.9698534607887268, "eval_negation-triplets_runtime": 0.7629, "eval_negation-triplets_samples_per_second": 167.777, "eval_negation-triplets_steps_per_second": 1.311, "step": 1140 }, { "epoch": 1.1728395061728394, "eval_scitail-pairs-pos_loss": 0.1657916158437729, "eval_scitail-pairs-pos_runtime": 0.8646, "eval_scitail-pairs-pos_samples_per_second": 148.051, "eval_scitail-pairs-pos_steps_per_second": 1.157, "step": 1140 }, { "epoch": 1.1728395061728394, "eval_scitail-pairs-qa_loss": 0.0017395657487213612, "eval_scitail-pairs-qa_runtime": 0.6005, "eval_scitail-pairs-qa_samples_per_second": 213.167, "eval_scitail-pairs-qa_steps_per_second": 1.665, "step": 1140 }, { "epoch": 1.1728395061728394, "eval_xsum-pairs_loss": 0.43798020482063293, "eval_xsum-pairs_runtime": 3.0212, "eval_xsum-pairs_samples_per_second": 42.368, "eval_xsum-pairs_steps_per_second": 0.331, "step": 1140 }, { "epoch": 1.1728395061728394, "eval_sciq_pairs_loss": 0.10038822144269943, "eval_sciq_pairs_runtime": 3.4671, "eval_sciq_pairs_samples_per_second": 36.918, "eval_sciq_pairs_steps_per_second": 0.288, "step": 1140 }, { "epoch": 1.1728395061728394, "eval_qasc_pairs_loss": 0.3080596625804901, "eval_qasc_pairs_runtime": 0.6106, "eval_qasc_pairs_samples_per_second": 209.621, "eval_qasc_pairs_steps_per_second": 1.638, "step": 1140 }, { "epoch": 1.1728395061728394, "eval_openbookqa_pairs_loss": 1.0763821601867676, "eval_openbookqa_pairs_runtime": 0.594, "eval_openbookqa_pairs_samples_per_second": 215.489, "eval_openbookqa_pairs_steps_per_second": 1.684, "step": 1140 }, { "epoch": 1.1728395061728394, "eval_msmarco_pairs_loss": 1.0080692768096924, "eval_msmarco_pairs_runtime": 1.5236, "eval_msmarco_pairs_samples_per_second": 84.009, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 1140 }, { "epoch": 1.1728395061728394, "eval_nq_pairs_loss": 0.9931175708770752, "eval_nq_pairs_runtime": 2.9136, "eval_nq_pairs_samples_per_second": 43.932, "eval_nq_pairs_steps_per_second": 0.343, "step": 1140 }, { "epoch": 1.1728395061728394, "eval_trivia_pairs_loss": 0.9768161177635193, "eval_trivia_pairs_runtime": 3.4417, "eval_trivia_pairs_samples_per_second": 37.191, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1140 }, { "epoch": 1.1728395061728394, "eval_gooaq_pairs_loss": 0.49635979533195496, "eval_gooaq_pairs_runtime": 0.9526, "eval_gooaq_pairs_samples_per_second": 134.373, "eval_gooaq_pairs_steps_per_second": 1.05, "step": 1140 }, { "epoch": 1.1728395061728394, "eval_paws-pos_loss": 0.024062106385827065, "eval_paws-pos_runtime": 0.6914, "eval_paws-pos_samples_per_second": 185.134, "eval_paws-pos_steps_per_second": 1.446, "step": 1140 }, { "epoch": 1.1728395061728394, "eval_global_dataset_loss": 0.4706956744194031, "eval_global_dataset_runtime": 13.395, "eval_global_dataset_samples_per_second": 31.056, "eval_global_dataset_steps_per_second": 0.299, "step": 1140 }, { "epoch": 1.1738683127572016, "grad_norm": 9.898369789123535, "learning_rate": 3.470540060696494e-05, "loss": 0.5853, "step": 1141 }, { "epoch": 1.1748971193415638, "grad_norm": 5.598046779632568, "learning_rate": 3.470203858297216e-05, "loss": 0.2389, "step": 1142 }, { "epoch": 1.175925925925926, "grad_norm": 4.493359565734863, "learning_rate": 3.4698657731760135e-05, "loss": 0.2277, "step": 1143 }, { "epoch": 1.176954732510288, "grad_norm": 10.61166000366211, "learning_rate": 3.469525805892776e-05, "loss": 0.7624, "step": 1144 }, { "epoch": 1.1779835390946503, "grad_norm": 12.680320739746094, "learning_rate": 3.469183957010508e-05, "loss": 1.1422, "step": 1145 }, { "epoch": 1.1790123456790123, "grad_norm": 7.758006572723389, "learning_rate": 3.4688402270953314e-05, "loss": 0.3251, "step": 1146 }, { "epoch": 1.1800411522633745, "grad_norm": 2.305454730987549, "learning_rate": 3.4684946167164815e-05, "loss": 0.0637, "step": 1147 }, { "epoch": 1.1810699588477367, "grad_norm": 9.96699047088623, "learning_rate": 3.46814712644631e-05, "loss": 0.5474, "step": 1148 }, { "epoch": 1.1820987654320987, "grad_norm": 9.514446258544922, "learning_rate": 3.467797756860279e-05, "loss": 0.5393, "step": 1149 }, { "epoch": 1.183127572016461, "grad_norm": 3.7625515460968018, "learning_rate": 3.4674465085369644e-05, "loss": 0.0976, "step": 1150 }, { "epoch": 1.1841563786008231, "grad_norm": 12.09719181060791, "learning_rate": 3.467093382058054e-05, "loss": 0.736, "step": 1151 }, { "epoch": 1.1851851851851851, "grad_norm": 19.571088790893555, "learning_rate": 3.466738378008345e-05, "loss": 3.0791, "step": 1152 }, { "epoch": 1.1862139917695473, "grad_norm": 10.0186767578125, "learning_rate": 3.466381496975744e-05, "loss": 0.6167, "step": 1153 }, { "epoch": 1.1872427983539096, "grad_norm": 8.159412384033203, "learning_rate": 3.466022739551267e-05, "loss": 0.4412, "step": 1154 }, { "epoch": 1.1882716049382716, "grad_norm": 9.59557056427002, "learning_rate": 3.465662106329035e-05, "loss": 0.5349, "step": 1155 }, { "epoch": 1.1893004115226338, "grad_norm": 8.530840873718262, "learning_rate": 3.4652995979062786e-05, "loss": 0.4062, "step": 1156 }, { "epoch": 1.1903292181069958, "grad_norm": 11.586019515991211, "learning_rate": 3.4649352148833314e-05, "loss": 0.8493, "step": 1157 }, { "epoch": 1.191358024691358, "grad_norm": 11.703004837036133, "learning_rate": 3.4645689578636324e-05, "loss": 0.9965, "step": 1158 }, { "epoch": 1.1923868312757202, "grad_norm": 6.959652423858643, "learning_rate": 3.464200827453724e-05, "loss": 0.3258, "step": 1159 }, { "epoch": 1.1934156378600824, "grad_norm": 13.127931594848633, "learning_rate": 3.463830824263251e-05, "loss": 1.2638, "step": 1160 }, { "epoch": 1.1934156378600824, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7884823679924011, "eval_Qnli-dev_cosine_ap": 0.7781013870186712, "eval_Qnli-dev_cosine_f1": 0.7172675521821632, "eval_Qnli-dev_cosine_f1_threshold": 0.7717980146408081, "eval_Qnli-dev_cosine_precision": 0.6494845360824743, "eval_Qnli-dev_cosine_recall": 0.8008474576271186, "eval_Qnli-dev_dot_accuracy": 0.669921875, "eval_Qnli-dev_dot_accuracy_threshold": 396.5754089355469, "eval_Qnli-dev_dot_ap": 0.6988280377964569, "eval_Qnli-dev_dot_f1": 0.6963979416809606, "eval_Qnli-dev_dot_f1_threshold": 351.659912109375, "eval_Qnli-dev_dot_precision": 0.5850144092219021, "eval_Qnli-dev_dot_recall": 0.8601694915254238, "eval_Qnli-dev_euclidean_accuracy": 0.71875, "eval_Qnli-dev_euclidean_accuracy_threshold": 12.646437644958496, "eval_Qnli-dev_euclidean_ap": 0.7848777973576782, "eval_Qnli-dev_euclidean_f1": 0.7099236641221374, "eval_Qnli-dev_euclidean_f1_threshold": 14.714933395385742, "eval_Qnli-dev_euclidean_precision": 0.6458333333333334, "eval_Qnli-dev_euclidean_recall": 0.788135593220339, "eval_Qnli-dev_manhattan_accuracy": 0.728515625, "eval_Qnli-dev_manhattan_accuracy_threshold": 277.93170166015625, "eval_Qnli-dev_manhattan_ap": 0.7871009974620432, "eval_Qnli-dev_manhattan_f1": 0.710172744721689, "eval_Qnli-dev_manhattan_f1_threshold": 305.610595703125, "eval_Qnli-dev_manhattan_precision": 0.6491228070175439, "eval_Qnli-dev_manhattan_recall": 0.7838983050847458, "eval_Qnli-dev_max_accuracy": 0.728515625, "eval_Qnli-dev_max_accuracy_threshold": 396.5754089355469, "eval_Qnli-dev_max_ap": 0.7871009974620432, "eval_Qnli-dev_max_f1": 0.7172675521821632, "eval_Qnli-dev_max_f1_threshold": 351.659912109375, "eval_Qnli-dev_max_precision": 0.6494845360824743, "eval_Qnli-dev_max_recall": 0.8601694915254238, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8916759490966797, "eval_allNLI-dev_cosine_ap": 0.6141079268345387, "eval_allNLI-dev_cosine_f1": 0.6361556064073227, "eval_allNLI-dev_cosine_f1_threshold": 0.8081791400909424, "eval_allNLI-dev_cosine_precision": 0.5265151515151515, "eval_allNLI-dev_cosine_recall": 0.8034682080924855, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 442.17108154296875, "eval_allNLI-dev_dot_ap": 0.5355526369236585, "eval_allNLI-dev_dot_f1": 0.5909980430528375, "eval_allNLI-dev_dot_f1_threshold": 359.3589782714844, "eval_allNLI-dev_dot_precision": 0.4467455621301775, "eval_allNLI-dev_dot_recall": 0.8728323699421965, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.124547958374023, "eval_allNLI-dev_euclidean_ap": 0.6183305754610903, "eval_allNLI-dev_euclidean_f1": 0.6453089244851259, "eval_allNLI-dev_euclidean_f1_threshold": 13.58721923828125, "eval_allNLI-dev_euclidean_precision": 0.5340909090909091, "eval_allNLI-dev_euclidean_recall": 0.815028901734104, "eval_allNLI-dev_manhattan_accuracy": 0.728515625, "eval_allNLI-dev_manhattan_accuracy_threshold": 228.35107421875, "eval_allNLI-dev_manhattan_ap": 0.6137978661021017, "eval_allNLI-dev_manhattan_f1": 0.6401766004415012, "eval_allNLI-dev_manhattan_f1_threshold": 287.4211730957031, "eval_allNLI-dev_manhattan_precision": 0.5178571428571429, "eval_allNLI-dev_manhattan_recall": 0.838150289017341, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 442.17108154296875, "eval_allNLI-dev_max_ap": 0.6183305754610903, "eval_allNLI-dev_max_f1": 0.6453089244851259, "eval_allNLI-dev_max_f1_threshold": 359.3589782714844, "eval_allNLI-dev_max_precision": 0.5340909090909091, "eval_allNLI-dev_max_recall": 0.8728323699421965, "eval_sequential_score": 0.7871009974620432, "eval_sts-test_pearson_cosine": 0.820146580927034, "eval_sts-test_pearson_dot": 0.7810240390511689, "eval_sts-test_pearson_euclidean": 0.8541656559615043, "eval_sts-test_pearson_manhattan": 0.8504960752425201, "eval_sts-test_pearson_max": 0.8541656559615043, "eval_sts-test_spearman_cosine": 0.8510599328404078, "eval_sts-test_spearman_dot": 0.7624339977657435, "eval_sts-test_spearman_euclidean": 0.8496104394839404, "eval_sts-test_spearman_manhattan": 0.846809592287237, "eval_sts-test_spearman_max": 0.8510599328404078, "eval_vitaminc-pairs_loss": 2.661559581756592, "eval_vitaminc-pairs_runtime": 3.1856, "eval_vitaminc-pairs_samples_per_second": 40.18, "eval_vitaminc-pairs_steps_per_second": 0.314, "step": 1160 }, { "epoch": 1.1934156378600824, "eval_negation-triplets_loss": 0.9959814548492432, "eval_negation-triplets_runtime": 0.747, "eval_negation-triplets_samples_per_second": 171.355, "eval_negation-triplets_steps_per_second": 1.339, "step": 1160 }, { "epoch": 1.1934156378600824, "eval_scitail-pairs-pos_loss": 0.15917012095451355, "eval_scitail-pairs-pos_runtime": 0.8767, "eval_scitail-pairs-pos_samples_per_second": 145.994, "eval_scitail-pairs-pos_steps_per_second": 1.141, "step": 1160 }, { "epoch": 1.1934156378600824, "eval_scitail-pairs-qa_loss": 0.0012910376535728574, "eval_scitail-pairs-qa_runtime": 0.5892, "eval_scitail-pairs-qa_samples_per_second": 217.258, "eval_scitail-pairs-qa_steps_per_second": 1.697, "step": 1160 }, { "epoch": 1.1934156378600824, "eval_xsum-pairs_loss": 0.4237976372241974, "eval_xsum-pairs_runtime": 3.0191, "eval_xsum-pairs_samples_per_second": 42.397, "eval_xsum-pairs_steps_per_second": 0.331, "step": 1160 }, { "epoch": 1.1934156378600824, "eval_sciq_pairs_loss": 0.10055720806121826, "eval_sciq_pairs_runtime": 3.4827, "eval_sciq_pairs_samples_per_second": 36.753, "eval_sciq_pairs_steps_per_second": 0.287, "step": 1160 }, { "epoch": 1.1934156378600824, "eval_qasc_pairs_loss": 0.2926960587501526, "eval_qasc_pairs_runtime": 0.6094, "eval_qasc_pairs_samples_per_second": 210.052, "eval_qasc_pairs_steps_per_second": 1.641, "step": 1160 }, { "epoch": 1.1934156378600824, "eval_openbookqa_pairs_loss": 0.9382266998291016, "eval_openbookqa_pairs_runtime": 0.592, "eval_openbookqa_pairs_samples_per_second": 216.222, "eval_openbookqa_pairs_steps_per_second": 1.689, "step": 1160 }, { "epoch": 1.1934156378600824, "eval_msmarco_pairs_loss": 0.9833506941795349, "eval_msmarco_pairs_runtime": 1.53, "eval_msmarco_pairs_samples_per_second": 83.658, "eval_msmarco_pairs_steps_per_second": 0.654, "step": 1160 }, { "epoch": 1.1934156378600824, "eval_nq_pairs_loss": 1.0057848691940308, "eval_nq_pairs_runtime": 2.8963, "eval_nq_pairs_samples_per_second": 44.195, "eval_nq_pairs_steps_per_second": 0.345, "step": 1160 }, { "epoch": 1.1934156378600824, "eval_trivia_pairs_loss": 1.0242230892181396, "eval_trivia_pairs_runtime": 3.4471, "eval_trivia_pairs_samples_per_second": 37.133, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1160 }, { "epoch": 1.1934156378600824, "eval_gooaq_pairs_loss": 0.5412207841873169, "eval_gooaq_pairs_runtime": 0.9554, "eval_gooaq_pairs_samples_per_second": 133.977, "eval_gooaq_pairs_steps_per_second": 1.047, "step": 1160 }, { "epoch": 1.1934156378600824, "eval_paws-pos_loss": 0.023840811103582382, "eval_paws-pos_runtime": 0.7016, "eval_paws-pos_samples_per_second": 182.442, "eval_paws-pos_steps_per_second": 1.425, "step": 1160 }, { "epoch": 1.1934156378600824, "eval_global_dataset_loss": 0.4532192051410675, "eval_global_dataset_runtime": 13.3951, "eval_global_dataset_samples_per_second": 31.056, "eval_global_dataset_steps_per_second": 0.299, "step": 1160 }, { "epoch": 1.1944444444444444, "grad_norm": 7.833865165710449, "learning_rate": 3.46345894890496e-05, "loss": 0.3684, "step": 1161 }, { "epoch": 1.1954732510288066, "grad_norm": 9.430367469787598, "learning_rate": 3.463085201994697e-05, "loss": 0.7415, "step": 1162 }, { "epoch": 1.1965020576131686, "grad_norm": 9.99718952178955, "learning_rate": 3.4627095841514086e-05, "loss": 0.8754, "step": 1163 }, { "epoch": 1.1975308641975309, "grad_norm": 8.691899299621582, "learning_rate": 3.4623320959971386e-05, "loss": 0.8829, "step": 1164 }, { "epoch": 1.198559670781893, "grad_norm": 10.81286907196045, "learning_rate": 3.46195273815703e-05, "loss": 1.0219, "step": 1165 }, { "epoch": 1.199588477366255, "grad_norm": 8.638839721679688, "learning_rate": 3.461571511259319e-05, "loss": 0.5988, "step": 1166 }, { "epoch": 1.2006172839506173, "grad_norm": 10.151771545410156, "learning_rate": 3.46118841593534e-05, "loss": 0.736, "step": 1167 }, { "epoch": 1.2016460905349795, "grad_norm": 11.333561897277832, "learning_rate": 3.460803452819521e-05, "loss": 0.8933, "step": 1168 }, { "epoch": 1.2026748971193415, "grad_norm": 6.7991557121276855, "learning_rate": 3.4604166225493815e-05, "loss": 0.297, "step": 1169 }, { "epoch": 1.2037037037037037, "grad_norm": 9.933700561523438, "learning_rate": 3.460027925765535e-05, "loss": 0.4777, "step": 1170 }, { "epoch": 1.204732510288066, "grad_norm": 10.830368041992188, "learning_rate": 3.4596373631116855e-05, "loss": 0.7994, "step": 1171 }, { "epoch": 1.205761316872428, "grad_norm": 7.643840789794922, "learning_rate": 3.459244935234627e-05, "loss": 0.7322, "step": 1172 }, { "epoch": 1.2067901234567902, "grad_norm": 10.263672828674316, "learning_rate": 3.458850642784241e-05, "loss": 0.6891, "step": 1173 }, { "epoch": 1.2078189300411522, "grad_norm": 8.42940902709961, "learning_rate": 3.4584544864135e-05, "loss": 0.4517, "step": 1174 }, { "epoch": 1.2088477366255144, "grad_norm": 8.477632522583008, "learning_rate": 3.4580564667784594e-05, "loss": 0.3482, "step": 1175 }, { "epoch": 1.2098765432098766, "grad_norm": 7.089756011962891, "learning_rate": 3.4576565845382644e-05, "loss": 0.3659, "step": 1176 }, { "epoch": 1.2109053497942388, "grad_norm": 8.470621109008789, "learning_rate": 3.4572548403551405e-05, "loss": 0.4379, "step": 1177 }, { "epoch": 1.2119341563786008, "grad_norm": 15.569647789001465, "learning_rate": 3.4568512348944e-05, "loss": 1.4326, "step": 1178 }, { "epoch": 1.212962962962963, "grad_norm": 18.194835662841797, "learning_rate": 3.456445768824436e-05, "loss": 1.7587, "step": 1179 }, { "epoch": 1.213991769547325, "grad_norm": 14.349352836608887, "learning_rate": 3.456038442816724e-05, "loss": 2.3312, "step": 1180 }, { "epoch": 1.213991769547325, "eval_Qnli-dev_cosine_accuracy": 0.693359375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8052411079406738, "eval_Qnli-dev_cosine_ap": 0.7487309503825428, "eval_Qnli-dev_cosine_f1": 0.687615526802218, "eval_Qnli-dev_cosine_f1_threshold": 0.7747142314910889, "eval_Qnli-dev_cosine_precision": 0.6098360655737705, "eval_Qnli-dev_cosine_recall": 0.788135593220339, "eval_Qnli-dev_dot_accuracy": 0.65234375, "eval_Qnli-dev_dot_accuracy_threshold": 426.8647766113281, "eval_Qnli-dev_dot_ap": 0.6712063042870943, "eval_Qnli-dev_dot_f1": 0.6743421052631579, "eval_Qnli-dev_dot_f1_threshold": 352.2708740234375, "eval_Qnli-dev_dot_precision": 0.5510752688172043, "eval_Qnli-dev_dot_recall": 0.8686440677966102, "eval_Qnli-dev_euclidean_accuracy": 0.708984375, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.538395881652832, "eval_Qnli-dev_euclidean_ap": 0.7583458440082088, "eval_Qnli-dev_euclidean_f1": 0.6911764705882353, "eval_Qnli-dev_euclidean_f1_threshold": 14.996305465698242, "eval_Qnli-dev_euclidean_precision": 0.6103896103896104, "eval_Qnli-dev_euclidean_recall": 0.7966101694915254, "eval_Qnli-dev_manhattan_accuracy": 0.705078125, "eval_Qnli-dev_manhattan_accuracy_threshold": 286.489501953125, "eval_Qnli-dev_manhattan_ap": 0.7583351034380817, "eval_Qnli-dev_manhattan_f1": 0.6907630522088354, "eval_Qnli-dev_manhattan_f1_threshold": 295.4446716308594, "eval_Qnli-dev_manhattan_precision": 0.6564885496183206, "eval_Qnli-dev_manhattan_recall": 0.7288135593220338, "eval_Qnli-dev_max_accuracy": 0.708984375, "eval_Qnli-dev_max_accuracy_threshold": 426.8647766113281, "eval_Qnli-dev_max_ap": 0.7583458440082088, "eval_Qnli-dev_max_f1": 0.6911764705882353, "eval_Qnli-dev_max_f1_threshold": 352.2708740234375, "eval_Qnli-dev_max_precision": 0.6564885496183206, "eval_Qnli-dev_max_recall": 0.8686440677966102, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8811556696891785, "eval_allNLI-dev_cosine_ap": 0.6206064432918149, "eval_allNLI-dev_cosine_f1": 0.6299559471365639, "eval_allNLI-dev_cosine_f1_threshold": 0.786278486251831, "eval_allNLI-dev_cosine_precision": 0.5088967971530249, "eval_allNLI-dev_cosine_recall": 0.8265895953757225, "eval_allNLI-dev_dot_accuracy": 0.6953125, "eval_allNLI-dev_dot_accuracy_threshold": 458.428955078125, "eval_allNLI-dev_dot_ap": 0.5528922522101817, "eval_allNLI-dev_dot_f1": 0.6000000000000001, "eval_allNLI-dev_dot_f1_threshold": 379.02679443359375, "eval_allNLI-dev_dot_precision": 0.5019455252918288, "eval_allNLI-dev_dot_recall": 0.7456647398843931, "eval_allNLI-dev_euclidean_accuracy": 0.740234375, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.912817001342773, "eval_allNLI-dev_euclidean_ap": 0.6263417077919412, "eval_allNLI-dev_euclidean_f1": 0.6437768240343347, "eval_allNLI-dev_euclidean_f1_threshold": 14.441347122192383, "eval_allNLI-dev_euclidean_precision": 0.5119453924914675, "eval_allNLI-dev_euclidean_recall": 0.8670520231213873, "eval_allNLI-dev_manhattan_accuracy": 0.734375, "eval_allNLI-dev_manhattan_accuracy_threshold": 229.67034912109375, "eval_allNLI-dev_manhattan_ap": 0.6233519858399221, "eval_allNLI-dev_manhattan_f1": 0.6391304347826088, "eval_allNLI-dev_manhattan_f1_threshold": 297.35443115234375, "eval_allNLI-dev_manhattan_precision": 0.5121951219512195, "eval_allNLI-dev_manhattan_recall": 0.8497109826589595, "eval_allNLI-dev_max_accuracy": 0.740234375, "eval_allNLI-dev_max_accuracy_threshold": 458.428955078125, "eval_allNLI-dev_max_ap": 0.6263417077919412, "eval_allNLI-dev_max_f1": 0.6437768240343347, "eval_allNLI-dev_max_f1_threshold": 379.02679443359375, "eval_allNLI-dev_max_precision": 0.5121951219512195, "eval_allNLI-dev_max_recall": 0.8670520231213873, "eval_sequential_score": 0.7583458440082088, "eval_sts-test_pearson_cosine": 0.8269349269759517, "eval_sts-test_pearson_dot": 0.8126874113505397, "eval_sts-test_pearson_euclidean": 0.8597033444697677, "eval_sts-test_pearson_manhattan": 0.8579813103799231, "eval_sts-test_pearson_max": 0.8597033444697677, "eval_sts-test_spearman_cosine": 0.8598110867567574, "eval_sts-test_spearman_dot": 0.7956357193634666, "eval_sts-test_spearman_euclidean": 0.8565224060596263, "eval_sts-test_spearman_manhattan": 0.8547605936499083, "eval_sts-test_spearman_max": 0.8598110867567574, "eval_vitaminc-pairs_loss": 2.8959200382232666, "eval_vitaminc-pairs_runtime": 3.2217, "eval_vitaminc-pairs_samples_per_second": 39.73, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 1180 }, { "epoch": 1.213991769547325, "eval_negation-triplets_loss": 0.9950482249259949, "eval_negation-triplets_runtime": 0.7594, "eval_negation-triplets_samples_per_second": 168.55, "eval_negation-triplets_steps_per_second": 1.317, "step": 1180 }, { "epoch": 1.213991769547325, "eval_scitail-pairs-pos_loss": 0.1581113338470459, "eval_scitail-pairs-pos_runtime": 0.8461, "eval_scitail-pairs-pos_samples_per_second": 151.284, "eval_scitail-pairs-pos_steps_per_second": 1.182, "step": 1180 }, { "epoch": 1.213991769547325, "eval_scitail-pairs-qa_loss": 0.0018547942163422704, "eval_scitail-pairs-qa_runtime": 0.5864, "eval_scitail-pairs-qa_samples_per_second": 218.28, "eval_scitail-pairs-qa_steps_per_second": 1.705, "step": 1180 }, { "epoch": 1.213991769547325, "eval_xsum-pairs_loss": 0.37494468688964844, "eval_xsum-pairs_runtime": 3.0309, "eval_xsum-pairs_samples_per_second": 42.232, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1180 }, { "epoch": 1.213991769547325, "eval_sciq_pairs_loss": 0.10013505816459656, "eval_sciq_pairs_runtime": 3.4619, "eval_sciq_pairs_samples_per_second": 36.974, "eval_sciq_pairs_steps_per_second": 0.289, "step": 1180 }, { "epoch": 1.213991769547325, "eval_qasc_pairs_loss": 0.2670193314552307, "eval_qasc_pairs_runtime": 0.6174, "eval_qasc_pairs_samples_per_second": 207.326, "eval_qasc_pairs_steps_per_second": 1.62, "step": 1180 }, { "epoch": 1.213991769547325, "eval_openbookqa_pairs_loss": 0.9293997287750244, "eval_openbookqa_pairs_runtime": 0.6032, "eval_openbookqa_pairs_samples_per_second": 212.191, "eval_openbookqa_pairs_steps_per_second": 1.658, "step": 1180 }, { "epoch": 1.213991769547325, "eval_msmarco_pairs_loss": 1.0548136234283447, "eval_msmarco_pairs_runtime": 1.5248, "eval_msmarco_pairs_samples_per_second": 83.943, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 1180 }, { "epoch": 1.213991769547325, "eval_nq_pairs_loss": 1.0275964736938477, "eval_nq_pairs_runtime": 2.9112, "eval_nq_pairs_samples_per_second": 43.968, "eval_nq_pairs_steps_per_second": 0.344, "step": 1180 }, { "epoch": 1.213991769547325, "eval_trivia_pairs_loss": 1.0199402570724487, "eval_trivia_pairs_runtime": 3.4612, "eval_trivia_pairs_samples_per_second": 36.981, "eval_trivia_pairs_steps_per_second": 0.289, "step": 1180 }, { "epoch": 1.213991769547325, "eval_gooaq_pairs_loss": 0.5339373350143433, "eval_gooaq_pairs_runtime": 0.9598, "eval_gooaq_pairs_samples_per_second": 133.364, "eval_gooaq_pairs_steps_per_second": 1.042, "step": 1180 }, { "epoch": 1.213991769547325, "eval_paws-pos_loss": 0.023452265188097954, "eval_paws-pos_runtime": 0.706, "eval_paws-pos_samples_per_second": 181.293, "eval_paws-pos_steps_per_second": 1.416, "step": 1180 }, { "epoch": 1.213991769547325, "eval_global_dataset_loss": 0.49108394980430603, "eval_global_dataset_runtime": 13.391, "eval_global_dataset_samples_per_second": 31.066, "eval_global_dataset_steps_per_second": 0.299, "step": 1180 }, { "epoch": 1.2150205761316872, "grad_norm": 9.289645195007324, "learning_rate": 3.455629257545818e-05, "loss": 0.5342, "step": 1181 }, { "epoch": 1.2160493827160495, "grad_norm": 10.020589828491211, "learning_rate": 3.4552182136893516e-05, "loss": 0.8755, "step": 1182 }, { "epoch": 1.2170781893004115, "grad_norm": 6.009487152099609, "learning_rate": 3.4548053119280386e-05, "loss": 0.2909, "step": 1183 }, { "epoch": 1.2181069958847737, "grad_norm": 8.08018970489502, "learning_rate": 3.454390552945665e-05, "loss": 0.4853, "step": 1184 }, { "epoch": 1.2191358024691359, "grad_norm": 7.848354339599609, "learning_rate": 3.453973937429098e-05, "loss": 0.3582, "step": 1185 }, { "epoch": 1.2201646090534979, "grad_norm": 8.562787055969238, "learning_rate": 3.453555466068275e-05, "loss": 0.7855, "step": 1186 }, { "epoch": 1.22119341563786, "grad_norm": 7.858839511871338, "learning_rate": 3.4531351395562074e-05, "loss": 0.4932, "step": 1187 }, { "epoch": 1.2222222222222223, "grad_norm": 9.985002517700195, "learning_rate": 3.4527129585889806e-05, "loss": 0.7346, "step": 1188 }, { "epoch": 1.2232510288065843, "grad_norm": 8.69581127166748, "learning_rate": 3.452288923865751e-05, "loss": 0.853, "step": 1189 }, { "epoch": 1.2242798353909465, "grad_norm": 6.780130863189697, "learning_rate": 3.451863036088743e-05, "loss": 0.2163, "step": 1190 }, { "epoch": 1.2253086419753085, "grad_norm": 7.74264669418335, "learning_rate": 3.451435295963251e-05, "loss": 0.837, "step": 1191 }, { "epoch": 1.2263374485596708, "grad_norm": 7.056318283081055, "learning_rate": 3.451005704197637e-05, "loss": 0.3778, "step": 1192 }, { "epoch": 1.227366255144033, "grad_norm": 13.02022647857666, "learning_rate": 3.450574261503329e-05, "loss": 1.1152, "step": 1193 }, { "epoch": 1.228395061728395, "grad_norm": 1.0287100076675415, "learning_rate": 3.450140968594821e-05, "loss": 0.0215, "step": 1194 }, { "epoch": 1.2294238683127572, "grad_norm": 1.984660029411316, "learning_rate": 3.44970582618967e-05, "loss": 0.0653, "step": 1195 }, { "epoch": 1.2304526748971194, "grad_norm": 2.628512144088745, "learning_rate": 3.4492688350084975e-05, "loss": 0.0659, "step": 1196 }, { "epoch": 1.2314814814814814, "grad_norm": 12.873212814331055, "learning_rate": 3.448829995774985e-05, "loss": 0.9698, "step": 1197 }, { "epoch": 1.2325102880658436, "grad_norm": 0.1176617294549942, "learning_rate": 3.448389309215875e-05, "loss": 0.0016, "step": 1198 }, { "epoch": 1.2335390946502058, "grad_norm": 7.005835056304932, "learning_rate": 3.4479467760609685e-05, "loss": 0.327, "step": 1199 }, { "epoch": 1.2345679012345678, "grad_norm": 9.871505737304688, "learning_rate": 3.447502397043127e-05, "loss": 0.5392, "step": 1200 }, { "epoch": 1.2345679012345678, "eval_Qnli-dev_cosine_accuracy": 0.73046875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7553156614303589, "eval_Qnli-dev_cosine_ap": 0.7644410125365041, "eval_Qnli-dev_cosine_f1": 0.7136929460580914, "eval_Qnli-dev_cosine_f1_threshold": 0.7551485300064087, "eval_Qnli-dev_cosine_precision": 0.6991869918699187, "eval_Qnli-dev_cosine_recall": 0.7288135593220338, "eval_Qnli-dev_dot_accuracy": 0.68359375, "eval_Qnli-dev_dot_accuracy_threshold": 336.83642578125, "eval_Qnli-dev_dot_ap": 0.7022244731417389, "eval_Qnli-dev_dot_f1": 0.6980802792321116, "eval_Qnli-dev_dot_f1_threshold": 313.0473937988281, "eval_Qnli-dev_dot_precision": 0.5934718100890207, "eval_Qnli-dev_dot_recall": 0.847457627118644, "eval_Qnli-dev_euclidean_accuracy": 0.724609375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.911373138427734, "eval_Qnli-dev_euclidean_ap": 0.7727905960549314, "eval_Qnli-dev_euclidean_f1": 0.7065026362038664, "eval_Qnli-dev_euclidean_f1_threshold": 16.72470474243164, "eval_Qnli-dev_euclidean_precision": 0.6036036036036037, "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, "eval_Qnli-dev_manhattan_accuracy": 0.724609375, "eval_Qnli-dev_manhattan_accuracy_threshold": 322.9418640136719, "eval_Qnli-dev_manhattan_ap": 0.7764063644438948, "eval_Qnli-dev_manhattan_f1": 0.7210626185958254, "eval_Qnli-dev_manhattan_f1_threshold": 331.51031494140625, "eval_Qnli-dev_manhattan_precision": 0.6529209621993127, "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, "eval_Qnli-dev_max_accuracy": 0.73046875, "eval_Qnli-dev_max_accuracy_threshold": 336.83642578125, "eval_Qnli-dev_max_ap": 0.7764063644438948, "eval_Qnli-dev_max_f1": 0.7210626185958254, "eval_Qnli-dev_max_f1_threshold": 331.51031494140625, "eval_Qnli-dev_max_precision": 0.6991869918699187, "eval_Qnli-dev_max_recall": 0.8516949152542372, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8570448160171509, "eval_allNLI-dev_cosine_ap": 0.6091000724328542, "eval_allNLI-dev_cosine_f1": 0.6394557823129251, "eval_allNLI-dev_cosine_f1_threshold": 0.7650203704833984, "eval_allNLI-dev_cosine_precision": 0.5261194029850746, "eval_allNLI-dev_cosine_recall": 0.815028901734104, "eval_allNLI-dev_dot_accuracy": 0.701171875, "eval_allNLI-dev_dot_accuracy_threshold": 382.50775146484375, "eval_allNLI-dev_dot_ap": 0.5395734814507898, "eval_allNLI-dev_dot_f1": 0.5950782997762865, "eval_allNLI-dev_dot_f1_threshold": 326.72900390625, "eval_allNLI-dev_dot_precision": 0.4854014598540146, "eval_allNLI-dev_dot_recall": 0.7687861271676301, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.244440078735352, "eval_allNLI-dev_euclidean_ap": 0.6148857577952056, "eval_allNLI-dev_euclidean_f1": 0.6301369863013698, "eval_allNLI-dev_euclidean_f1_threshold": 14.23716926574707, "eval_allNLI-dev_euclidean_precision": 0.5207547169811321, "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, "eval_allNLI-dev_manhattan_accuracy": 0.728515625, "eval_allNLI-dev_manhattan_accuracy_threshold": 243.26353454589844, "eval_allNLI-dev_manhattan_ap": 0.6103850120371437, "eval_allNLI-dev_manhattan_f1": 0.6351931330472103, "eval_allNLI-dev_manhattan_f1_threshold": 307.9858093261719, "eval_allNLI-dev_manhattan_precision": 0.5051194539249146, "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 382.50775146484375, "eval_allNLI-dev_max_ap": 0.6148857577952056, "eval_allNLI-dev_max_f1": 0.6394557823129251, "eval_allNLI-dev_max_f1_threshold": 326.72900390625, "eval_allNLI-dev_max_precision": 0.5261194029850746, "eval_allNLI-dev_max_recall": 0.8554913294797688, "eval_sequential_score": 0.7764063644438948, "eval_sts-test_pearson_cosine": 0.8235604121641702, "eval_sts-test_pearson_dot": 0.80828423316969, "eval_sts-test_pearson_euclidean": 0.8522052534133291, "eval_sts-test_pearson_manhattan": 0.849902558974327, "eval_sts-test_pearson_max": 0.8522052534133291, "eval_sts-test_spearman_cosine": 0.8553053507548257, "eval_sts-test_spearman_dot": 0.7913984909298833, "eval_sts-test_spearman_euclidean": 0.8489471362875971, "eval_sts-test_spearman_manhattan": 0.8475455267808185, "eval_sts-test_spearman_max": 0.8553053507548257, "eval_vitaminc-pairs_loss": 2.840937852859497, "eval_vitaminc-pairs_runtime": 3.2226, "eval_vitaminc-pairs_samples_per_second": 39.719, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 1200 }, { "epoch": 1.2345679012345678, "eval_negation-triplets_loss": 1.012229323387146, "eval_negation-triplets_runtime": 0.7568, "eval_negation-triplets_samples_per_second": 169.124, "eval_negation-triplets_steps_per_second": 1.321, "step": 1200 }, { "epoch": 1.2345679012345678, "eval_scitail-pairs-pos_loss": 0.17076808214187622, "eval_scitail-pairs-pos_runtime": 0.8668, "eval_scitail-pairs-pos_samples_per_second": 147.676, "eval_scitail-pairs-pos_steps_per_second": 1.154, "step": 1200 }, { "epoch": 1.2345679012345678, "eval_scitail-pairs-qa_loss": 0.00030888148467056453, "eval_scitail-pairs-qa_runtime": 0.5951, "eval_scitail-pairs-qa_samples_per_second": 215.078, "eval_scitail-pairs-qa_steps_per_second": 1.68, "step": 1200 }, { "epoch": 1.2345679012345678, "eval_xsum-pairs_loss": 0.3719870150089264, "eval_xsum-pairs_runtime": 3.0306, "eval_xsum-pairs_samples_per_second": 42.236, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1200 }, { "epoch": 1.2345679012345678, "eval_sciq_pairs_loss": 0.11966367810964584, "eval_sciq_pairs_runtime": 3.492, "eval_sciq_pairs_samples_per_second": 36.655, "eval_sciq_pairs_steps_per_second": 0.286, "step": 1200 }, { "epoch": 1.2345679012345678, "eval_qasc_pairs_loss": 0.23714803159236908, "eval_qasc_pairs_runtime": 0.618, "eval_qasc_pairs_samples_per_second": 207.136, "eval_qasc_pairs_steps_per_second": 1.618, "step": 1200 }, { "epoch": 1.2345679012345678, "eval_openbookqa_pairs_loss": 0.9108031392097473, "eval_openbookqa_pairs_runtime": 0.5834, "eval_openbookqa_pairs_samples_per_second": 219.386, "eval_openbookqa_pairs_steps_per_second": 1.714, "step": 1200 }, { "epoch": 1.2345679012345678, "eval_msmarco_pairs_loss": 1.1426514387130737, "eval_msmarco_pairs_runtime": 1.5148, "eval_msmarco_pairs_samples_per_second": 84.501, "eval_msmarco_pairs_steps_per_second": 0.66, "step": 1200 }, { "epoch": 1.2345679012345678, "eval_nq_pairs_loss": 1.003651738166809, "eval_nq_pairs_runtime": 2.8978, "eval_nq_pairs_samples_per_second": 44.172, "eval_nq_pairs_steps_per_second": 0.345, "step": 1200 }, { "epoch": 1.2345679012345678, "eval_trivia_pairs_loss": 1.1226824522018433, "eval_trivia_pairs_runtime": 3.4587, "eval_trivia_pairs_samples_per_second": 37.008, "eval_trivia_pairs_steps_per_second": 0.289, "step": 1200 }, { "epoch": 1.2345679012345678, "eval_gooaq_pairs_loss": 0.5596855878829956, "eval_gooaq_pairs_runtime": 0.9703, "eval_gooaq_pairs_samples_per_second": 131.913, "eval_gooaq_pairs_steps_per_second": 1.031, "step": 1200 }, { "epoch": 1.2345679012345678, "eval_paws-pos_loss": 0.0236971452832222, "eval_paws-pos_runtime": 0.6866, "eval_paws-pos_samples_per_second": 186.416, "eval_paws-pos_steps_per_second": 1.456, "step": 1200 }, { "epoch": 1.2345679012345678, "eval_global_dataset_loss": 0.4764837920665741, "eval_global_dataset_runtime": 13.3664, "eval_global_dataset_samples_per_second": 31.123, "eval_global_dataset_steps_per_second": 0.299, "step": 1200 }, { "epoch": 1.23559670781893, "grad_norm": 10.80827522277832, "learning_rate": 3.4470561728982665e-05, "loss": 0.8535, "step": 1201 }, { "epoch": 1.2366255144032923, "grad_norm": 0.27277591824531555, "learning_rate": 3.44660810436536e-05, "loss": 0.0045, "step": 1202 }, { "epoch": 1.2376543209876543, "grad_norm": 8.913864135742188, "learning_rate": 3.4461581921864334e-05, "loss": 0.7916, "step": 1203 }, { "epoch": 1.2386831275720165, "grad_norm": 13.311179161071777, "learning_rate": 3.4457064371065673e-05, "loss": 0.9205, "step": 1204 }, { "epoch": 1.2397119341563787, "grad_norm": 6.5711188316345215, "learning_rate": 3.445252839873894e-05, "loss": 0.242, "step": 1205 }, { "epoch": 1.2407407407407407, "grad_norm": 13.579578399658203, "learning_rate": 3.4447974012395956e-05, "loss": 1.0257, "step": 1206 }, { "epoch": 1.241769547325103, "grad_norm": 10.705375671386719, "learning_rate": 3.444340121957905e-05, "loss": 0.7247, "step": 1207 }, { "epoch": 1.242798353909465, "grad_norm": 8.617849349975586, "learning_rate": 3.4438810027861015e-05, "loss": 0.3461, "step": 1208 }, { "epoch": 1.2438271604938271, "grad_norm": 8.483906745910645, "learning_rate": 3.4434200444845126e-05, "loss": 0.303, "step": 1209 }, { "epoch": 1.2448559670781894, "grad_norm": 1.0907119512557983, "learning_rate": 3.442957247816513e-05, "loss": 0.0228, "step": 1210 }, { "epoch": 1.2458847736625513, "grad_norm": 15.170351028442383, "learning_rate": 3.442492613548518e-05, "loss": 1.2976, "step": 1211 }, { "epoch": 1.2469135802469136, "grad_norm": 13.963634490966797, "learning_rate": 3.4420261424499885e-05, "loss": 1.173, "step": 1212 }, { "epoch": 1.2479423868312758, "grad_norm": 14.074200630187988, "learning_rate": 3.4415578352934285e-05, "loss": 0.9466, "step": 1213 }, { "epoch": 1.2489711934156378, "grad_norm": 6.741833209991455, "learning_rate": 3.44108769285438e-05, "loss": 0.2666, "step": 1214 }, { "epoch": 1.25, "grad_norm": 13.059562683105469, "learning_rate": 3.440615715911426e-05, "loss": 0.9441, "step": 1215 }, { "epoch": 1.2510288065843622, "grad_norm": 6.839930534362793, "learning_rate": 3.440141905246187e-05, "loss": 0.2589, "step": 1216 }, { "epoch": 1.2520576131687242, "grad_norm": 1.1594938039779663, "learning_rate": 3.43966626164332e-05, "loss": 0.0168, "step": 1217 }, { "epoch": 1.2530864197530864, "grad_norm": 6.989406108856201, "learning_rate": 3.439188785890518e-05, "loss": 0.3212, "step": 1218 }, { "epoch": 1.2541152263374484, "grad_norm": 7.311746597290039, "learning_rate": 3.438709478778507e-05, "loss": 0.3108, "step": 1219 }, { "epoch": 1.2551440329218106, "grad_norm": 7.0762529373168945, "learning_rate": 3.438228341101048e-05, "loss": 0.3269, "step": 1220 }, { "epoch": 1.2551440329218106, "eval_Qnli-dev_cosine_accuracy": 0.720703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7860354781150818, "eval_Qnli-dev_cosine_ap": 0.7633884571301464, "eval_Qnli-dev_cosine_f1": 0.7151051625239004, "eval_Qnli-dev_cosine_f1_threshold": 0.7650403380393982, "eval_Qnli-dev_cosine_precision": 0.6515679442508711, "eval_Qnli-dev_cosine_recall": 0.7923728813559322, "eval_Qnli-dev_dot_accuracy": 0.681640625, "eval_Qnli-dev_dot_accuracy_threshold": 369.2494812011719, "eval_Qnli-dev_dot_ap": 0.7005340525347556, "eval_Qnli-dev_dot_f1": 0.6983546617915904, "eval_Qnli-dev_dot_f1_threshold": 357.15740966796875, "eval_Qnli-dev_dot_precision": 0.6141479099678456, "eval_Qnli-dev_dot_recall": 0.809322033898305, "eval_Qnli-dev_euclidean_accuracy": 0.71875, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.266681671142578, "eval_Qnli-dev_euclidean_ap": 0.7715839499516464, "eval_Qnli-dev_euclidean_f1": 0.7142857142857143, "eval_Qnli-dev_euclidean_f1_threshold": 14.948558807373047, "eval_Qnli-dev_euclidean_precision": 0.6560283687943262, "eval_Qnli-dev_euclidean_recall": 0.7838983050847458, "eval_Qnli-dev_manhattan_accuracy": 0.71484375, "eval_Qnli-dev_manhattan_accuracy_threshold": 295.34088134765625, "eval_Qnli-dev_manhattan_ap": 0.7719441944715049, "eval_Qnli-dev_manhattan_f1": 0.7161904761904763, "eval_Qnli-dev_manhattan_f1_threshold": 311.98638916015625, "eval_Qnli-dev_manhattan_precision": 0.6505190311418685, "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, "eval_Qnli-dev_max_accuracy": 0.720703125, "eval_Qnli-dev_max_accuracy_threshold": 369.2494812011719, "eval_Qnli-dev_max_ap": 0.7719441944715049, "eval_Qnli-dev_max_f1": 0.7161904761904763, "eval_Qnli-dev_max_f1_threshold": 357.15740966796875, "eval_Qnli-dev_max_precision": 0.6560283687943262, "eval_Qnli-dev_max_recall": 0.809322033898305, "eval_allNLI-dev_cosine_accuracy": 0.73828125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8683989644050598, "eval_allNLI-dev_cosine_ap": 0.6251892984393883, "eval_allNLI-dev_cosine_f1": 0.6313465783664459, "eval_allNLI-dev_cosine_f1_threshold": 0.7872051000595093, "eval_allNLI-dev_cosine_precision": 0.5107142857142857, "eval_allNLI-dev_cosine_recall": 0.8265895953757225, "eval_allNLI-dev_dot_accuracy": 0.712890625, "eval_allNLI-dev_dot_accuracy_threshold": 412.98309326171875, "eval_allNLI-dev_dot_ap": 0.5622529803433328, "eval_allNLI-dev_dot_f1": 0.610091743119266, "eval_allNLI-dev_dot_f1_threshold": 367.0179443359375, "eval_allNLI-dev_dot_precision": 0.5057034220532319, "eval_allNLI-dev_dot_recall": 0.7687861271676301, "eval_allNLI-dev_euclidean_accuracy": 0.73828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.184566497802734, "eval_allNLI-dev_euclidean_ap": 0.6289609975782239, "eval_allNLI-dev_euclidean_f1": 0.6412556053811659, "eval_allNLI-dev_euclidean_f1_threshold": 14.102167129516602, "eval_allNLI-dev_euclidean_precision": 0.5238095238095238, "eval_allNLI-dev_euclidean_recall": 0.8265895953757225, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 234.21710205078125, "eval_allNLI-dev_manhattan_ap": 0.6266468776315474, "eval_allNLI-dev_manhattan_f1": 0.6417582417582418, "eval_allNLI-dev_manhattan_f1_threshold": 295.9653015136719, "eval_allNLI-dev_manhattan_precision": 0.5177304964539007, "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 412.98309326171875, "eval_allNLI-dev_max_ap": 0.6289609975782239, "eval_allNLI-dev_max_f1": 0.6417582417582418, "eval_allNLI-dev_max_f1_threshold": 367.0179443359375, "eval_allNLI-dev_max_precision": 0.5238095238095238, "eval_allNLI-dev_max_recall": 0.8439306358381503, "eval_sequential_score": 0.7719441944715049, "eval_sts-test_pearson_cosine": 0.8290268288134766, "eval_sts-test_pearson_dot": 0.8175868265444567, "eval_sts-test_pearson_euclidean": 0.8543569326462799, "eval_sts-test_pearson_manhattan": 0.8525659243366586, "eval_sts-test_pearson_max": 0.8543569326462799, "eval_sts-test_spearman_cosine": 0.853531321250748, "eval_sts-test_spearman_dot": 0.8038217095997041, "eval_sts-test_spearman_euclidean": 0.8480709960920763, "eval_sts-test_spearman_manhattan": 0.8464784777042019, "eval_sts-test_spearman_max": 0.853531321250748, "eval_vitaminc-pairs_loss": 2.7786405086517334, "eval_vitaminc-pairs_runtime": 3.2222, "eval_vitaminc-pairs_samples_per_second": 39.724, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 1220 }, { "epoch": 1.2551440329218106, "eval_negation-triplets_loss": 0.9489306211471558, "eval_negation-triplets_runtime": 0.7866, "eval_negation-triplets_samples_per_second": 162.732, "eval_negation-triplets_steps_per_second": 1.271, "step": 1220 }, { "epoch": 1.2551440329218106, "eval_scitail-pairs-pos_loss": 0.15525153279304504, "eval_scitail-pairs-pos_runtime": 0.9089, "eval_scitail-pairs-pos_samples_per_second": 140.834, "eval_scitail-pairs-pos_steps_per_second": 1.1, "step": 1220 }, { "epoch": 1.2551440329218106, "eval_scitail-pairs-qa_loss": 0.0006275809137150645, "eval_scitail-pairs-qa_runtime": 0.5938, "eval_scitail-pairs-qa_samples_per_second": 215.547, "eval_scitail-pairs-qa_steps_per_second": 1.684, "step": 1220 }, { "epoch": 1.2551440329218106, "eval_xsum-pairs_loss": 0.37118861079216003, "eval_xsum-pairs_runtime": 3.0288, "eval_xsum-pairs_samples_per_second": 42.261, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1220 }, { "epoch": 1.2551440329218106, "eval_sciq_pairs_loss": 0.11241748929023743, "eval_sciq_pairs_runtime": 3.4875, "eval_sciq_pairs_samples_per_second": 36.703, "eval_sciq_pairs_steps_per_second": 0.287, "step": 1220 }, { "epoch": 1.2551440329218106, "eval_qasc_pairs_loss": 0.22180773317813873, "eval_qasc_pairs_runtime": 0.6153, "eval_qasc_pairs_samples_per_second": 208.013, "eval_qasc_pairs_steps_per_second": 1.625, "step": 1220 }, { "epoch": 1.2551440329218106, "eval_openbookqa_pairs_loss": 0.8669531941413879, "eval_openbookqa_pairs_runtime": 0.5998, "eval_openbookqa_pairs_samples_per_second": 213.395, "eval_openbookqa_pairs_steps_per_second": 1.667, "step": 1220 }, { "epoch": 1.2551440329218106, "eval_msmarco_pairs_loss": 0.9564771056175232, "eval_msmarco_pairs_runtime": 1.5215, "eval_msmarco_pairs_samples_per_second": 84.127, "eval_msmarco_pairs_steps_per_second": 0.657, "step": 1220 }, { "epoch": 1.2551440329218106, "eval_nq_pairs_loss": 0.8097667098045349, "eval_nq_pairs_runtime": 2.9024, "eval_nq_pairs_samples_per_second": 44.102, "eval_nq_pairs_steps_per_second": 0.345, "step": 1220 }, { "epoch": 1.2551440329218106, "eval_trivia_pairs_loss": 0.9328098893165588, "eval_trivia_pairs_runtime": 3.4398, "eval_trivia_pairs_samples_per_second": 37.211, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1220 }, { "epoch": 1.2551440329218106, "eval_gooaq_pairs_loss": 0.46518075466156006, "eval_gooaq_pairs_runtime": 0.9561, "eval_gooaq_pairs_samples_per_second": 133.874, "eval_gooaq_pairs_steps_per_second": 1.046, "step": 1220 }, { "epoch": 1.2551440329218106, "eval_paws-pos_loss": 0.023140598088502884, "eval_paws-pos_runtime": 0.6996, "eval_paws-pos_samples_per_second": 182.966, "eval_paws-pos_steps_per_second": 1.429, "step": 1220 }, { "epoch": 1.2551440329218106, "eval_global_dataset_loss": 0.44948717951774597, "eval_global_dataset_runtime": 13.4315, "eval_global_dataset_samples_per_second": 30.972, "eval_global_dataset_steps_per_second": 0.298, "step": 1220 }, { "epoch": 1.2561728395061729, "grad_norm": 7.284200191497803, "learning_rate": 3.4377453736549316e-05, "loss": 0.341, "step": 1221 }, { "epoch": 1.257201646090535, "grad_norm": 0.6413462162017822, "learning_rate": 3.437260577239979e-05, "loss": 0.0162, "step": 1222 }, { "epoch": 1.258230452674897, "grad_norm": 7.572662353515625, "learning_rate": 3.436773952659041e-05, "loss": 0.3307, "step": 1223 }, { "epoch": 1.2592592592592593, "grad_norm": 6.2290215492248535, "learning_rate": 3.4362855007179945e-05, "loss": 0.2021, "step": 1224 }, { "epoch": 1.2602880658436213, "grad_norm": 17.566606521606445, "learning_rate": 3.435795222225745e-05, "loss": 2.8801, "step": 1225 }, { "epoch": 1.2613168724279835, "grad_norm": 11.829737663269043, "learning_rate": 3.43530311799422e-05, "loss": 1.1785, "step": 1226 }, { "epoch": 1.2623456790123457, "grad_norm": 8.333463668823242, "learning_rate": 3.4348091888383726e-05, "loss": 0.7136, "step": 1227 }, { "epoch": 1.263374485596708, "grad_norm": 8.75113582611084, "learning_rate": 3.434313435576178e-05, "loss": 0.6168, "step": 1228 }, { "epoch": 1.26440329218107, "grad_norm": 4.275041103363037, "learning_rate": 3.4338158590286305e-05, "loss": 0.2191, "step": 1229 }, { "epoch": 1.2654320987654322, "grad_norm": 9.158637046813965, "learning_rate": 3.4333164600197463e-05, "loss": 0.5133, "step": 1230 }, { "epoch": 1.2664609053497942, "grad_norm": 12.479742050170898, "learning_rate": 3.432815239376557e-05, "loss": 1.1907, "step": 1231 }, { "epoch": 1.2674897119341564, "grad_norm": 14.734970092773438, "learning_rate": 3.432312197929114e-05, "loss": 1.2727, "step": 1232 }, { "epoch": 1.2685185185185186, "grad_norm": 7.485629081726074, "learning_rate": 3.431807336510481e-05, "loss": 0.3664, "step": 1233 }, { "epoch": 1.2695473251028806, "grad_norm": 8.2068452835083, "learning_rate": 3.431300655956737e-05, "loss": 0.45, "step": 1234 }, { "epoch": 1.2705761316872428, "grad_norm": 7.955653667449951, "learning_rate": 3.430792157106975e-05, "loss": 0.4915, "step": 1235 }, { "epoch": 1.2716049382716048, "grad_norm": 10.168331146240234, "learning_rate": 3.430281840803297e-05, "loss": 0.8899, "step": 1236 }, { "epoch": 1.272633744855967, "grad_norm": 7.463403224945068, "learning_rate": 3.429769707890816e-05, "loss": 0.4613, "step": 1237 }, { "epoch": 1.2736625514403292, "grad_norm": 7.143692970275879, "learning_rate": 3.429255759217652e-05, "loss": 0.4084, "step": 1238 }, { "epoch": 1.2746913580246915, "grad_norm": 7.729737281799316, "learning_rate": 3.428739995634934e-05, "loss": 0.3376, "step": 1239 }, { "epoch": 1.2757201646090535, "grad_norm": 5.447694778442383, "learning_rate": 3.4282224179967966e-05, "loss": 0.2337, "step": 1240 }, { "epoch": 1.2757201646090535, "eval_Qnli-dev_cosine_accuracy": 0.720703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7990207672119141, "eval_Qnli-dev_cosine_ap": 0.7557640056578084, "eval_Qnli-dev_cosine_f1": 0.7027027027027027, "eval_Qnli-dev_cosine_f1_threshold": 0.7990207672119141, "eval_Qnli-dev_cosine_precision": 0.689795918367347, "eval_Qnli-dev_cosine_recall": 0.7161016949152542, "eval_Qnli-dev_dot_accuracy": 0.66796875, "eval_Qnli-dev_dot_accuracy_threshold": 403.472412109375, "eval_Qnli-dev_dot_ap": 0.6544341606245616, "eval_Qnli-dev_dot_f1": 0.6677067082683307, "eval_Qnli-dev_dot_f1_threshold": 342.8936767578125, "eval_Qnli-dev_dot_precision": 0.528395061728395, "eval_Qnli-dev_dot_recall": 0.9067796610169492, "eval_Qnli-dev_euclidean_accuracy": 0.72265625, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.223051071166992, "eval_Qnli-dev_euclidean_ap": 0.7661561981934673, "eval_Qnli-dev_euclidean_f1": 0.7115384615384616, "eval_Qnli-dev_euclidean_f1_threshold": 14.778949737548828, "eval_Qnli-dev_euclidean_precision": 0.6514084507042254, "eval_Qnli-dev_euclidean_recall": 0.7838983050847458, "eval_Qnli-dev_manhattan_accuracy": 0.716796875, "eval_Qnli-dev_manhattan_accuracy_threshold": 282.96795654296875, "eval_Qnli-dev_manhattan_ap": 0.7694773667201545, "eval_Qnli-dev_manhattan_f1": 0.7050092764378479, "eval_Qnli-dev_manhattan_f1_threshold": 313.93878173828125, "eval_Qnli-dev_manhattan_precision": 0.6270627062706271, "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, "eval_Qnli-dev_max_accuracy": 0.72265625, "eval_Qnli-dev_max_accuracy_threshold": 403.472412109375, "eval_Qnli-dev_max_ap": 0.7694773667201545, "eval_Qnli-dev_max_f1": 0.7115384615384616, "eval_Qnli-dev_max_f1_threshold": 342.8936767578125, "eval_Qnli-dev_max_precision": 0.689795918367347, "eval_Qnli-dev_max_recall": 0.9067796610169492, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8681868314743042, "eval_allNLI-dev_cosine_ap": 0.6185155955214422, "eval_allNLI-dev_cosine_f1": 0.6442953020134229, "eval_allNLI-dev_cosine_f1_threshold": 0.7863482236862183, "eval_allNLI-dev_cosine_precision": 0.5255474452554745, "eval_allNLI-dev_cosine_recall": 0.8323699421965318, "eval_allNLI-dev_dot_accuracy": 0.712890625, "eval_allNLI-dev_dot_accuracy_threshold": 406.2081298828125, "eval_allNLI-dev_dot_ap": 0.5474145940825146, "eval_allNLI-dev_dot_f1": 0.5979381443298969, "eval_allNLI-dev_dot_f1_threshold": 360.92095947265625, "eval_allNLI-dev_dot_precision": 0.46474358974358976, "eval_allNLI-dev_dot_recall": 0.838150289017341, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.027508735656738, "eval_allNLI-dev_euclidean_ap": 0.6248008682070734, "eval_allNLI-dev_euclidean_f1": 0.6482758620689655, "eval_allNLI-dev_euclidean_f1_threshold": 14.024650573730469, "eval_allNLI-dev_euclidean_precision": 0.5381679389312977, "eval_allNLI-dev_euclidean_recall": 0.815028901734104, "eval_allNLI-dev_manhattan_accuracy": 0.728515625, "eval_allNLI-dev_manhattan_accuracy_threshold": 249.40318298339844, "eval_allNLI-dev_manhattan_ap": 0.6226611056171882, "eval_allNLI-dev_manhattan_f1": 0.6471910112359551, "eval_allNLI-dev_manhattan_f1_threshold": 294.674560546875, "eval_allNLI-dev_manhattan_precision": 0.5294117647058824, "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 406.2081298828125, "eval_allNLI-dev_max_ap": 0.6248008682070734, "eval_allNLI-dev_max_f1": 0.6482758620689655, "eval_allNLI-dev_max_f1_threshold": 360.92095947265625, "eval_allNLI-dev_max_precision": 0.5381679389312977, "eval_allNLI-dev_max_recall": 0.838150289017341, "eval_sequential_score": 0.7694773667201545, "eval_sts-test_pearson_cosine": 0.8282825065385547, "eval_sts-test_pearson_dot": 0.8187070495760544, "eval_sts-test_pearson_euclidean": 0.857594156220547, "eval_sts-test_pearson_manhattan": 0.8547876350215236, "eval_sts-test_pearson_max": 0.857594156220547, "eval_sts-test_spearman_cosine": 0.8589356594045713, "eval_sts-test_spearman_dot": 0.8098749442205503, "eval_sts-test_spearman_euclidean": 0.8538316022146842, "eval_sts-test_spearman_manhattan": 0.8513935087428194, "eval_sts-test_spearman_max": 0.8589356594045713, "eval_vitaminc-pairs_loss": 2.63816499710083, "eval_vitaminc-pairs_runtime": 3.2121, "eval_vitaminc-pairs_samples_per_second": 39.849, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 1240 }, { "epoch": 1.2757201646090535, "eval_negation-triplets_loss": 0.9544748663902283, "eval_negation-triplets_runtime": 0.7781, "eval_negation-triplets_samples_per_second": 164.496, "eval_negation-triplets_steps_per_second": 1.285, "step": 1240 }, { "epoch": 1.2757201646090535, "eval_scitail-pairs-pos_loss": 0.16542193293571472, "eval_scitail-pairs-pos_runtime": 0.8693, "eval_scitail-pairs-pos_samples_per_second": 147.247, "eval_scitail-pairs-pos_steps_per_second": 1.15, "step": 1240 }, { "epoch": 1.2757201646090535, "eval_scitail-pairs-qa_loss": 0.0013295909157022834, "eval_scitail-pairs-qa_runtime": 0.6064, "eval_scitail-pairs-qa_samples_per_second": 211.091, "eval_scitail-pairs-qa_steps_per_second": 1.649, "step": 1240 }, { "epoch": 1.2757201646090535, "eval_xsum-pairs_loss": 0.35927343368530273, "eval_xsum-pairs_runtime": 3.0303, "eval_xsum-pairs_samples_per_second": 42.241, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1240 }, { "epoch": 1.2757201646090535, "eval_sciq_pairs_loss": 0.09439770132303238, "eval_sciq_pairs_runtime": 3.5147, "eval_sciq_pairs_samples_per_second": 36.419, "eval_sciq_pairs_steps_per_second": 0.285, "step": 1240 }, { "epoch": 1.2757201646090535, "eval_qasc_pairs_loss": 0.23836590349674225, "eval_qasc_pairs_runtime": 0.6293, "eval_qasc_pairs_samples_per_second": 203.416, "eval_qasc_pairs_steps_per_second": 1.589, "step": 1240 }, { "epoch": 1.2757201646090535, "eval_openbookqa_pairs_loss": 0.8297739624977112, "eval_openbookqa_pairs_runtime": 0.6132, "eval_openbookqa_pairs_samples_per_second": 208.746, "eval_openbookqa_pairs_steps_per_second": 1.631, "step": 1240 }, { "epoch": 1.2757201646090535, "eval_msmarco_pairs_loss": 0.9185351729393005, "eval_msmarco_pairs_runtime": 1.531, "eval_msmarco_pairs_samples_per_second": 83.606, "eval_msmarco_pairs_steps_per_second": 0.653, "step": 1240 }, { "epoch": 1.2757201646090535, "eval_nq_pairs_loss": 0.94951993227005, "eval_nq_pairs_runtime": 2.9064, "eval_nq_pairs_samples_per_second": 44.041, "eval_nq_pairs_steps_per_second": 0.344, "step": 1240 }, { "epoch": 1.2757201646090535, "eval_trivia_pairs_loss": 0.8465414643287659, "eval_trivia_pairs_runtime": 3.454, "eval_trivia_pairs_samples_per_second": 37.058, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1240 }, { "epoch": 1.2757201646090535, "eval_gooaq_pairs_loss": 0.5014381408691406, "eval_gooaq_pairs_runtime": 0.9553, "eval_gooaq_pairs_samples_per_second": 133.99, "eval_gooaq_pairs_steps_per_second": 1.047, "step": 1240 }, { "epoch": 1.2757201646090535, "eval_paws-pos_loss": 0.023835916072130203, "eval_paws-pos_runtime": 0.7011, "eval_paws-pos_samples_per_second": 182.58, "eval_paws-pos_steps_per_second": 1.426, "step": 1240 }, { "epoch": 1.2757201646090535, "eval_global_dataset_loss": 0.4214680790901184, "eval_global_dataset_runtime": 13.4349, "eval_global_dataset_samples_per_second": 30.964, "eval_global_dataset_steps_per_second": 0.298, "step": 1240 }, { "epoch": 1.2767489711934157, "grad_norm": 10.583321571350098, "learning_rate": 3.4277030271603763e-05, "loss": 1.122, "step": 1241 }, { "epoch": 1.2777777777777777, "grad_norm": 11.95809268951416, "learning_rate": 3.427181823985814e-05, "loss": 1.1248, "step": 1242 }, { "epoch": 1.27880658436214, "grad_norm": 9.490169525146484, "learning_rate": 3.4266588093362515e-05, "loss": 0.5031, "step": 1243 }, { "epoch": 1.2798353909465021, "grad_norm": 5.851307392120361, "learning_rate": 3.426133984077831e-05, "loss": 0.211, "step": 1244 }, { "epoch": 1.2808641975308643, "grad_norm": 5.588681221008301, "learning_rate": 3.425607349079693e-05, "loss": 0.2251, "step": 1245 }, { "epoch": 1.2818930041152263, "grad_norm": 14.139016151428223, "learning_rate": 3.425078905213975e-05, "loss": 1.2428, "step": 1246 }, { "epoch": 1.2829218106995885, "grad_norm": 6.52968168258667, "learning_rate": 3.4245486533558086e-05, "loss": 0.4932, "step": 1247 }, { "epoch": 1.2839506172839505, "grad_norm": 9.744477272033691, "learning_rate": 3.424016594383322e-05, "loss": 0.6784, "step": 1248 }, { "epoch": 1.2849794238683128, "grad_norm": 6.064051628112793, "learning_rate": 3.4234827291776355e-05, "loss": 0.282, "step": 1249 }, { "epoch": 1.286008230452675, "grad_norm": 5.103082656860352, "learning_rate": 3.422947058622859e-05, "loss": 0.1852, "step": 1250 }, { "epoch": 1.287037037037037, "grad_norm": 1.28008234500885, "learning_rate": 3.422409583606094e-05, "loss": 0.0276, "step": 1251 }, { "epoch": 1.2880658436213992, "grad_norm": 10.74918270111084, "learning_rate": 3.421870305017428e-05, "loss": 0.9884, "step": 1252 }, { "epoch": 1.2890946502057612, "grad_norm": 5.395549297332764, "learning_rate": 3.421329223749939e-05, "loss": 0.1635, "step": 1253 }, { "epoch": 1.2901234567901234, "grad_norm": 0.8584123849868774, "learning_rate": 3.420786340699687e-05, "loss": 0.035, "step": 1254 }, { "epoch": 1.2911522633744856, "grad_norm": 8.618278503417969, "learning_rate": 3.4202416567657175e-05, "loss": 0.3808, "step": 1255 }, { "epoch": 1.2921810699588478, "grad_norm": 12.293098449707031, "learning_rate": 3.419695172850058e-05, "loss": 1.009, "step": 1256 }, { "epoch": 1.2932098765432098, "grad_norm": 5.01506233215332, "learning_rate": 3.419146889857715e-05, "loss": 0.135, "step": 1257 }, { "epoch": 1.294238683127572, "grad_norm": 13.123412132263184, "learning_rate": 3.418596808696679e-05, "loss": 0.8017, "step": 1258 }, { "epoch": 1.295267489711934, "grad_norm": 6.234143257141113, "learning_rate": 3.418044930277914e-05, "loss": 0.3615, "step": 1259 }, { "epoch": 1.2962962962962963, "grad_norm": 9.980935096740723, "learning_rate": 3.4174912555153616e-05, "loss": 0.7826, "step": 1260 }, { "epoch": 1.2962962962962963, "eval_Qnli-dev_cosine_accuracy": 0.6875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7924839854240417, "eval_Qnli-dev_cosine_ap": 0.7332266491035113, "eval_Qnli-dev_cosine_f1": 0.6891891891891891, "eval_Qnli-dev_cosine_f1_threshold": 0.6866949796676636, "eval_Qnli-dev_cosine_precision": 0.5730337078651685, "eval_Qnli-dev_cosine_recall": 0.864406779661017, "eval_Qnli-dev_dot_accuracy": 0.662109375, "eval_Qnli-dev_dot_accuracy_threshold": 373.8640441894531, "eval_Qnli-dev_dot_ap": 0.6496724675564834, "eval_Qnli-dev_dot_f1": 0.684297520661157, "eval_Qnli-dev_dot_f1_threshold": 308.1207275390625, "eval_Qnli-dev_dot_precision": 0.5609756097560976, "eval_Qnli-dev_dot_recall": 0.8771186440677966, "eval_Qnli-dev_euclidean_accuracy": 0.703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.424274444580078, "eval_Qnli-dev_euclidean_ap": 0.7444622281460342, "eval_Qnli-dev_euclidean_f1": 0.6940170940170939, "eval_Qnli-dev_euclidean_f1_threshold": 16.856407165527344, "eval_Qnli-dev_euclidean_precision": 0.5816618911174785, "eval_Qnli-dev_euclidean_recall": 0.8601694915254238, "eval_Qnli-dev_manhattan_accuracy": 0.69921875, "eval_Qnli-dev_manhattan_accuracy_threshold": 289.1227722167969, "eval_Qnli-dev_manhattan_ap": 0.7459088632355457, "eval_Qnli-dev_manhattan_f1": 0.6959459459459459, "eval_Qnli-dev_manhattan_f1_threshold": 353.713623046875, "eval_Qnli-dev_manhattan_precision": 0.5786516853932584, "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, "eval_Qnli-dev_max_accuracy": 0.703125, "eval_Qnli-dev_max_accuracy_threshold": 373.8640441894531, "eval_Qnli-dev_max_ap": 0.7459088632355457, "eval_Qnli-dev_max_f1": 0.6959459459459459, "eval_Qnli-dev_max_f1_threshold": 353.713623046875, "eval_Qnli-dev_max_precision": 0.5816618911174785, "eval_Qnli-dev_max_recall": 0.8771186440677966, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.867729127407074, "eval_allNLI-dev_cosine_ap": 0.6125988322302354, "eval_allNLI-dev_cosine_f1": 0.6153846153846154, "eval_allNLI-dev_cosine_f1_threshold": 0.7971692681312561, "eval_allNLI-dev_cosine_precision": 0.5391304347826087, "eval_allNLI-dev_cosine_recall": 0.7167630057803468, "eval_allNLI-dev_dot_accuracy": 0.693359375, "eval_allNLI-dev_dot_accuracy_threshold": 387.8415222167969, "eval_allNLI-dev_dot_ap": 0.5292166386311624, "eval_allNLI-dev_dot_f1": 0.5884861407249466, "eval_allNLI-dev_dot_f1_threshold": 333.64898681640625, "eval_allNLI-dev_dot_precision": 0.46621621621621623, "eval_allNLI-dev_dot_recall": 0.7976878612716763, "eval_allNLI-dev_euclidean_accuracy": 0.73828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.273818969726562, "eval_allNLI-dev_euclidean_ap": 0.6193958040838967, "eval_allNLI-dev_euclidean_f1": 0.6253101736972705, "eval_allNLI-dev_euclidean_f1_threshold": 13.630767822265625, "eval_allNLI-dev_euclidean_precision": 0.5478260869565217, "eval_allNLI-dev_euclidean_recall": 0.7283236994219653, "eval_allNLI-dev_manhattan_accuracy": 0.7265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 227.3568572998047, "eval_allNLI-dev_manhattan_ap": 0.6143779984704311, "eval_allNLI-dev_manhattan_f1": 0.6343612334801761, "eval_allNLI-dev_manhattan_f1_threshold": 300.5953063964844, "eval_allNLI-dev_manhattan_precision": 0.5124555160142349, "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 387.8415222167969, "eval_allNLI-dev_max_ap": 0.6193958040838967, "eval_allNLI-dev_max_f1": 0.6343612334801761, "eval_allNLI-dev_max_f1_threshold": 333.64898681640625, "eval_allNLI-dev_max_precision": 0.5478260869565217, "eval_allNLI-dev_max_recall": 0.8323699421965318, "eval_sequential_score": 0.7459088632355457, "eval_sts-test_pearson_cosine": 0.8256896728141465, "eval_sts-test_pearson_dot": 0.80848155082256, "eval_sts-test_pearson_euclidean": 0.8575939093156039, "eval_sts-test_pearson_manhattan": 0.8561308953474318, "eval_sts-test_pearson_max": 0.8575939093156039, "eval_sts-test_spearman_cosine": 0.8587494525827694, "eval_sts-test_spearman_dot": 0.794722346118468, "eval_sts-test_spearman_euclidean": 0.8545395671698962, "eval_sts-test_spearman_manhattan": 0.8535123019646566, "eval_sts-test_spearman_max": 0.8587494525827694, "eval_vitaminc-pairs_loss": 2.8190577030181885, "eval_vitaminc-pairs_runtime": 3.2233, "eval_vitaminc-pairs_samples_per_second": 39.711, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 1260 }, { "epoch": 1.2962962962962963, "eval_negation-triplets_loss": 0.984671413898468, "eval_negation-triplets_runtime": 0.7677, "eval_negation-triplets_samples_per_second": 166.742, "eval_negation-triplets_steps_per_second": 1.303, "step": 1260 }, { "epoch": 1.2962962962962963, "eval_scitail-pairs-pos_loss": 0.1757514476776123, "eval_scitail-pairs-pos_runtime": 0.913, "eval_scitail-pairs-pos_samples_per_second": 140.198, "eval_scitail-pairs-pos_steps_per_second": 1.095, "step": 1260 }, { "epoch": 1.2962962962962963, "eval_scitail-pairs-qa_loss": 0.0008864524425007403, "eval_scitail-pairs-qa_runtime": 0.5941, "eval_scitail-pairs-qa_samples_per_second": 215.45, "eval_scitail-pairs-qa_steps_per_second": 1.683, "step": 1260 }, { "epoch": 1.2962962962962963, "eval_xsum-pairs_loss": 0.3529047966003418, "eval_xsum-pairs_runtime": 3.0315, "eval_xsum-pairs_samples_per_second": 42.224, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1260 }, { "epoch": 1.2962962962962963, "eval_sciq_pairs_loss": 0.10334491729736328, "eval_sciq_pairs_runtime": 3.4941, "eval_sciq_pairs_samples_per_second": 36.634, "eval_sciq_pairs_steps_per_second": 0.286, "step": 1260 }, { "epoch": 1.2962962962962963, "eval_qasc_pairs_loss": 0.2171602100133896, "eval_qasc_pairs_runtime": 0.6121, "eval_qasc_pairs_samples_per_second": 209.114, "eval_qasc_pairs_steps_per_second": 1.634, "step": 1260 }, { "epoch": 1.2962962962962963, "eval_openbookqa_pairs_loss": 0.9481621980667114, "eval_openbookqa_pairs_runtime": 0.5963, "eval_openbookqa_pairs_samples_per_second": 214.661, "eval_openbookqa_pairs_steps_per_second": 1.677, "step": 1260 }, { "epoch": 1.2962962962962963, "eval_msmarco_pairs_loss": 1.0396214723587036, "eval_msmarco_pairs_runtime": 1.5323, "eval_msmarco_pairs_samples_per_second": 83.534, "eval_msmarco_pairs_steps_per_second": 0.653, "step": 1260 }, { "epoch": 1.2962962962962963, "eval_nq_pairs_loss": 0.9104881882667542, "eval_nq_pairs_runtime": 2.9067, "eval_nq_pairs_samples_per_second": 44.037, "eval_nq_pairs_steps_per_second": 0.344, "step": 1260 }, { "epoch": 1.2962962962962963, "eval_trivia_pairs_loss": 0.8889181613922119, "eval_trivia_pairs_runtime": 3.4447, "eval_trivia_pairs_samples_per_second": 37.159, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1260 }, { "epoch": 1.2962962962962963, "eval_gooaq_pairs_loss": 0.4568992853164673, "eval_gooaq_pairs_runtime": 0.9571, "eval_gooaq_pairs_samples_per_second": 133.741, "eval_gooaq_pairs_steps_per_second": 1.045, "step": 1260 }, { "epoch": 1.2962962962962963, "eval_paws-pos_loss": 0.02366357110440731, "eval_paws-pos_runtime": 0.7027, "eval_paws-pos_samples_per_second": 182.156, "eval_paws-pos_steps_per_second": 1.423, "step": 1260 }, { "epoch": 1.2962962962962963, "eval_global_dataset_loss": 0.4424642324447632, "eval_global_dataset_runtime": 13.4124, "eval_global_dataset_samples_per_second": 31.016, "eval_global_dataset_steps_per_second": 0.298, "step": 1260 }, { "epoch": 1.2973251028806585, "grad_norm": 18.139724731445312, "learning_rate": 3.41693578532594e-05, "loss": 2.4823, "step": 1261 }, { "epoch": 1.2983539094650205, "grad_norm": 9.441933631896973, "learning_rate": 3.4163785206295374e-05, "loss": 0.388, "step": 1262 }, { "epoch": 1.2993827160493827, "grad_norm": 0.4781794250011444, "learning_rate": 3.415819462349017e-05, "loss": 0.0072, "step": 1263 }, { "epoch": 1.300411522633745, "grad_norm": 7.694046974182129, "learning_rate": 3.41525861141021e-05, "loss": 0.4752, "step": 1264 }, { "epoch": 1.301440329218107, "grad_norm": 6.207346439361572, "learning_rate": 3.414695968741918e-05, "loss": 0.2959, "step": 1265 }, { "epoch": 1.3024691358024691, "grad_norm": 11.718839645385742, "learning_rate": 3.4141315352759094e-05, "loss": 0.9776, "step": 1266 }, { "epoch": 1.3034979423868314, "grad_norm": 0.8530243039131165, "learning_rate": 3.413565311946917e-05, "loss": 0.0135, "step": 1267 }, { "epoch": 1.3045267489711934, "grad_norm": 8.410090446472168, "learning_rate": 3.4129972996926395e-05, "loss": 0.4569, "step": 1268 }, { "epoch": 1.3055555555555556, "grad_norm": 11.767143249511719, "learning_rate": 3.412427499453737e-05, "loss": 0.8287, "step": 1269 }, { "epoch": 1.3065843621399176, "grad_norm": 14.703503608703613, "learning_rate": 3.41185591217383e-05, "loss": 1.251, "step": 1270 }, { "epoch": 1.3076131687242798, "grad_norm": 15.810161590576172, "learning_rate": 3.411282538799501e-05, "loss": 1.2505, "step": 1271 }, { "epoch": 1.308641975308642, "grad_norm": 1.0557564496994019, "learning_rate": 3.410707380280288e-05, "loss": 0.0219, "step": 1272 }, { "epoch": 1.3096707818930042, "grad_norm": 14.226122856140137, "learning_rate": 3.4101304375686863e-05, "loss": 1.0106, "step": 1273 }, { "epoch": 1.3106995884773662, "grad_norm": 7.010471820831299, "learning_rate": 3.409551711620145e-05, "loss": 0.2392, "step": 1274 }, { "epoch": 1.3117283950617284, "grad_norm": 12.67631721496582, "learning_rate": 3.4089712033930676e-05, "loss": 0.9318, "step": 1275 }, { "epoch": 1.3127572016460904, "grad_norm": 13.45511531829834, "learning_rate": 3.408388913848808e-05, "loss": 0.9255, "step": 1276 }, { "epoch": 1.3137860082304527, "grad_norm": 5.24427604675293, "learning_rate": 3.407804843951672e-05, "loss": 0.2099, "step": 1277 }, { "epoch": 1.3148148148148149, "grad_norm": 6.305140495300293, "learning_rate": 3.4072189946689117e-05, "loss": 0.2442, "step": 1278 }, { "epoch": 1.3158436213991769, "grad_norm": 8.449353218078613, "learning_rate": 3.4066313669707255e-05, "loss": 0.363, "step": 1279 }, { "epoch": 1.316872427983539, "grad_norm": 9.684417724609375, "learning_rate": 3.40604196183026e-05, "loss": 0.6736, "step": 1280 }, { "epoch": 1.316872427983539, "eval_Qnli-dev_cosine_accuracy": 0.716796875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8024507761001587, "eval_Qnli-dev_cosine_ap": 0.758846186685613, "eval_Qnli-dev_cosine_f1": 0.7107750472589792, "eval_Qnli-dev_cosine_f1_threshold": 0.768567681312561, "eval_Qnli-dev_cosine_precision": 0.6416382252559727, "eval_Qnli-dev_cosine_recall": 0.7966101694915254, "eval_Qnli-dev_dot_accuracy": 0.666015625, "eval_Qnli-dev_dot_accuracy_threshold": 376.0484313964844, "eval_Qnli-dev_dot_ap": 0.6643802268647256, "eval_Qnli-dev_dot_f1": 0.680921052631579, "eval_Qnli-dev_dot_f1_threshold": 344.0978698730469, "eval_Qnli-dev_dot_precision": 0.5564516129032258, "eval_Qnli-dev_dot_recall": 0.8771186440677966, "eval_Qnli-dev_euclidean_accuracy": 0.72265625, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.912522315979004, "eval_Qnli-dev_euclidean_ap": 0.7678408212467321, "eval_Qnli-dev_euclidean_f1": 0.7212475633528265, "eval_Qnli-dev_euclidean_f1_threshold": 14.79525375366211, "eval_Qnli-dev_euclidean_precision": 0.6678700361010831, "eval_Qnli-dev_euclidean_recall": 0.7838983050847458, "eval_Qnli-dev_manhattan_accuracy": 0.73046875, "eval_Qnli-dev_manhattan_accuracy_threshold": 293.152587890625, "eval_Qnli-dev_manhattan_ap": 0.7687148416423243, "eval_Qnli-dev_manhattan_f1": 0.7190569744597249, "eval_Qnli-dev_manhattan_f1_threshold": 305.97088623046875, "eval_Qnli-dev_manhattan_precision": 0.6703296703296703, "eval_Qnli-dev_manhattan_recall": 0.7754237288135594, "eval_Qnli-dev_max_accuracy": 0.73046875, "eval_Qnli-dev_max_accuracy_threshold": 376.0484313964844, "eval_Qnli-dev_max_ap": 0.7687148416423243, "eval_Qnli-dev_max_f1": 0.7212475633528265, "eval_Qnli-dev_max_f1_threshold": 344.0978698730469, "eval_Qnli-dev_max_precision": 0.6703296703296703, "eval_Qnli-dev_max_recall": 0.8771186440677966, "eval_allNLI-dev_cosine_accuracy": 0.734375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.886374831199646, "eval_allNLI-dev_cosine_ap": 0.613323944711514, "eval_allNLI-dev_cosine_f1": 0.6330275229357799, "eval_allNLI-dev_cosine_f1_threshold": 0.7920593619346619, "eval_allNLI-dev_cosine_precision": 0.5247148288973384, "eval_allNLI-dev_cosine_recall": 0.7976878612716763, "eval_allNLI-dev_dot_accuracy": 0.689453125, "eval_allNLI-dev_dot_accuracy_threshold": 406.52789306640625, "eval_allNLI-dev_dot_ap": 0.5132629205305964, "eval_allNLI-dev_dot_f1": 0.6038543897216274, "eval_allNLI-dev_dot_f1_threshold": 353.9873046875, "eval_allNLI-dev_dot_precision": 0.47959183673469385, "eval_allNLI-dev_dot_recall": 0.815028901734104, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.70979118347168, "eval_allNLI-dev_euclidean_ap": 0.6214679779821198, "eval_allNLI-dev_euclidean_f1": 0.636144578313253, "eval_allNLI-dev_euclidean_f1_threshold": 13.583101272583008, "eval_allNLI-dev_euclidean_precision": 0.5454545454545454, "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 215.23228454589844, "eval_allNLI-dev_manhattan_ap": 0.6179219629758274, "eval_allNLI-dev_manhattan_f1": 0.6359447004608295, "eval_allNLI-dev_manhattan_f1_threshold": 288.486083984375, "eval_allNLI-dev_manhattan_precision": 0.5287356321839081, "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 406.52789306640625, "eval_allNLI-dev_max_ap": 0.6214679779821198, "eval_allNLI-dev_max_f1": 0.636144578313253, "eval_allNLI-dev_max_f1_threshold": 353.9873046875, "eval_allNLI-dev_max_precision": 0.5454545454545454, "eval_allNLI-dev_max_recall": 0.815028901734104, "eval_sequential_score": 0.7687148416423243, "eval_sts-test_pearson_cosine": 0.8344014092758152, "eval_sts-test_pearson_dot": 0.8153235464131754, "eval_sts-test_pearson_euclidean": 0.8611296224825529, "eval_sts-test_pearson_manhattan": 0.8606652463835092, "eval_sts-test_pearson_max": 0.8611296224825529, "eval_sts-test_spearman_cosine": 0.8615371429086238, "eval_sts-test_spearman_dot": 0.797847147696384, "eval_sts-test_spearman_euclidean": 0.8565606236368546, "eval_sts-test_spearman_manhattan": 0.8556613470391943, "eval_sts-test_spearman_max": 0.8615371429086238, "eval_vitaminc-pairs_loss": 2.8257858753204346, "eval_vitaminc-pairs_runtime": 3.2252, "eval_vitaminc-pairs_samples_per_second": 39.688, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 1280 }, { "epoch": 1.316872427983539, "eval_negation-triplets_loss": 0.9536899924278259, "eval_negation-triplets_runtime": 0.7582, "eval_negation-triplets_samples_per_second": 168.813, "eval_negation-triplets_steps_per_second": 1.319, "step": 1280 }, { "epoch": 1.316872427983539, "eval_scitail-pairs-pos_loss": 0.18427607417106628, "eval_scitail-pairs-pos_runtime": 0.8724, "eval_scitail-pairs-pos_samples_per_second": 146.726, "eval_scitail-pairs-pos_steps_per_second": 1.146, "step": 1280 }, { "epoch": 1.316872427983539, "eval_scitail-pairs-qa_loss": 0.000654980365652591, "eval_scitail-pairs-qa_runtime": 0.591, "eval_scitail-pairs-qa_samples_per_second": 216.575, "eval_scitail-pairs-qa_steps_per_second": 1.692, "step": 1280 }, { "epoch": 1.316872427983539, "eval_xsum-pairs_loss": 0.36986735463142395, "eval_xsum-pairs_runtime": 3.0241, "eval_xsum-pairs_samples_per_second": 42.327, "eval_xsum-pairs_steps_per_second": 0.331, "step": 1280 }, { "epoch": 1.316872427983539, "eval_sciq_pairs_loss": 0.09962441027164459, "eval_sciq_pairs_runtime": 3.4747, "eval_sciq_pairs_samples_per_second": 36.837, "eval_sciq_pairs_steps_per_second": 0.288, "step": 1280 }, { "epoch": 1.316872427983539, "eval_qasc_pairs_loss": 0.22889278829097748, "eval_qasc_pairs_runtime": 0.6143, "eval_qasc_pairs_samples_per_second": 208.36, "eval_qasc_pairs_steps_per_second": 1.628, "step": 1280 }, { "epoch": 1.316872427983539, "eval_openbookqa_pairs_loss": 0.8834983706474304, "eval_openbookqa_pairs_runtime": 0.599, "eval_openbookqa_pairs_samples_per_second": 213.688, "eval_openbookqa_pairs_steps_per_second": 1.669, "step": 1280 }, { "epoch": 1.316872427983539, "eval_msmarco_pairs_loss": 0.8783816695213318, "eval_msmarco_pairs_runtime": 1.5278, "eval_msmarco_pairs_samples_per_second": 83.781, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 1280 }, { "epoch": 1.316872427983539, "eval_nq_pairs_loss": 0.7243404984474182, "eval_nq_pairs_runtime": 2.9076, "eval_nq_pairs_samples_per_second": 44.023, "eval_nq_pairs_steps_per_second": 0.344, "step": 1280 }, { "epoch": 1.316872427983539, "eval_trivia_pairs_loss": 0.8404144048690796, "eval_trivia_pairs_runtime": 3.449, "eval_trivia_pairs_samples_per_second": 37.112, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1280 }, { "epoch": 1.316872427983539, "eval_gooaq_pairs_loss": 0.4605383574962616, "eval_gooaq_pairs_runtime": 0.956, "eval_gooaq_pairs_samples_per_second": 133.896, "eval_gooaq_pairs_steps_per_second": 1.046, "step": 1280 }, { "epoch": 1.316872427983539, "eval_paws-pos_loss": 0.02303312160074711, "eval_paws-pos_runtime": 0.7005, "eval_paws-pos_samples_per_second": 182.726, "eval_paws-pos_steps_per_second": 1.428, "step": 1280 }, { "epoch": 1.316872427983539, "eval_global_dataset_loss": 0.45325952768325806, "eval_global_dataset_runtime": 13.4175, "eval_global_dataset_samples_per_second": 31.004, "eval_global_dataset_steps_per_second": 0.298, "step": 1280 }, { "epoch": 1.3179012345679013, "grad_norm": 5.580132007598877, "learning_rate": 3.405450780223602e-05, "loss": 0.1946, "step": 1281 }, { "epoch": 1.3189300411522633, "grad_norm": 5.326640605926514, "learning_rate": 3.404857823129783e-05, "loss": 0.2769, "step": 1282 }, { "epoch": 1.3199588477366255, "grad_norm": 11.557867050170898, "learning_rate": 3.404263091530771e-05, "loss": 1.1166, "step": 1283 }, { "epoch": 1.3209876543209877, "grad_norm": 10.3661527633667, "learning_rate": 3.4036665864114786e-05, "loss": 0.9978, "step": 1284 }, { "epoch": 1.3220164609053497, "grad_norm": 6.954195499420166, "learning_rate": 3.4030683087597504e-05, "loss": 0.4038, "step": 1285 }, { "epoch": 1.323045267489712, "grad_norm": 9.20566177368164, "learning_rate": 3.402468259566367e-05, "loss": 0.6581, "step": 1286 }, { "epoch": 1.324074074074074, "grad_norm": 12.167695999145508, "learning_rate": 3.401866439825045e-05, "loss": 1.1956, "step": 1287 }, { "epoch": 1.3251028806584362, "grad_norm": 11.835347175598145, "learning_rate": 3.401262850532433e-05, "loss": 1.0445, "step": 1288 }, { "epoch": 1.3261316872427984, "grad_norm": 9.587698936462402, "learning_rate": 3.4006574926881066e-05, "loss": 0.757, "step": 1289 }, { "epoch": 1.3271604938271606, "grad_norm": 6.588235378265381, "learning_rate": 3.4000503672945744e-05, "loss": 0.2547, "step": 1290 }, { "epoch": 1.3281893004115226, "grad_norm": 8.87192440032959, "learning_rate": 3.399441475357269e-05, "loss": 0.7288, "step": 1291 }, { "epoch": 1.3292181069958848, "grad_norm": 10.09223461151123, "learning_rate": 3.398830817884552e-05, "loss": 0.8613, "step": 1292 }, { "epoch": 1.3302469135802468, "grad_norm": 5.7491679191589355, "learning_rate": 3.398218395887705e-05, "loss": 0.2981, "step": 1293 }, { "epoch": 1.331275720164609, "grad_norm": 8.702662467956543, "learning_rate": 3.397604210380934e-05, "loss": 0.4096, "step": 1294 }, { "epoch": 1.3323045267489713, "grad_norm": 7.047978401184082, "learning_rate": 3.396988262381365e-05, "loss": 0.3078, "step": 1295 }, { "epoch": 1.3333333333333333, "grad_norm": 6.934315204620361, "learning_rate": 3.3963705529090425e-05, "loss": 0.3934, "step": 1296 }, { "epoch": 1.3343621399176955, "grad_norm": 5.967520713806152, "learning_rate": 3.3957510829869284e-05, "loss": 0.2141, "step": 1297 }, { "epoch": 1.3353909465020577, "grad_norm": 1.0262237787246704, "learning_rate": 3.395129853640901e-05, "loss": 0.0665, "step": 1298 }, { "epoch": 1.3364197530864197, "grad_norm": 11.270498275756836, "learning_rate": 3.3945068658997495e-05, "loss": 0.914, "step": 1299 }, { "epoch": 1.337448559670782, "grad_norm": 10.944971084594727, "learning_rate": 3.393882120795178e-05, "loss": 1.0459, "step": 1300 }, { "epoch": 1.337448559670782, "eval_Qnli-dev_cosine_accuracy": 0.71484375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.792615532875061, "eval_Qnli-dev_cosine_ap": 0.749828962990434, "eval_Qnli-dev_cosine_f1": 0.699815837937385, "eval_Qnli-dev_cosine_f1_threshold": 0.7434613704681396, "eval_Qnli-dev_cosine_precision": 0.6188925081433225, "eval_Qnli-dev_cosine_recall": 0.8050847457627118, "eval_Qnli-dev_dot_accuracy": 0.658203125, "eval_Qnli-dev_dot_accuracy_threshold": 376.4273376464844, "eval_Qnli-dev_dot_ap": 0.6644867064734452, "eval_Qnli-dev_dot_f1": 0.6786296900489396, "eval_Qnli-dev_dot_f1_threshold": 324.79473876953125, "eval_Qnli-dev_dot_precision": 0.5517241379310345, "eval_Qnli-dev_dot_recall": 0.8813559322033898, "eval_Qnli-dev_euclidean_accuracy": 0.720703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.647710800170898, "eval_Qnli-dev_euclidean_ap": 0.7604047314276812, "eval_Qnli-dev_euclidean_f1": 0.700374531835206, "eval_Qnli-dev_euclidean_f1_threshold": 15.481462478637695, "eval_Qnli-dev_euclidean_precision": 0.62751677852349, "eval_Qnli-dev_euclidean_recall": 0.7923728813559322, "eval_Qnli-dev_manhattan_accuracy": 0.712890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 285.90618896484375, "eval_Qnli-dev_manhattan_ap": 0.7575847560194282, "eval_Qnli-dev_manhattan_f1": 0.7072243346007605, "eval_Qnli-dev_manhattan_f1_threshold": 319.30615234375, "eval_Qnli-dev_manhattan_precision": 0.6413793103448275, "eval_Qnli-dev_manhattan_recall": 0.788135593220339, "eval_Qnli-dev_max_accuracy": 0.720703125, "eval_Qnli-dev_max_accuracy_threshold": 376.4273376464844, "eval_Qnli-dev_max_ap": 0.7604047314276812, "eval_Qnli-dev_max_f1": 0.7072243346007605, "eval_Qnli-dev_max_f1_threshold": 324.79473876953125, "eval_Qnli-dev_max_precision": 0.6413793103448275, "eval_Qnli-dev_max_recall": 0.8813559322033898, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.882408082485199, "eval_allNLI-dev_cosine_ap": 0.6221112461174584, "eval_allNLI-dev_cosine_f1": 0.6326963906581741, "eval_allNLI-dev_cosine_f1_threshold": 0.7625564336776733, "eval_allNLI-dev_cosine_precision": 0.5, "eval_allNLI-dev_cosine_recall": 0.861271676300578, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 400.8938903808594, "eval_allNLI-dev_dot_ap": 0.5324965985830011, "eval_allNLI-dev_dot_f1": 0.5841784989858013, "eval_allNLI-dev_dot_f1_threshold": 343.65850830078125, "eval_allNLI-dev_dot_precision": 0.45, "eval_allNLI-dev_dot_recall": 0.8323699421965318, "eval_allNLI-dev_euclidean_accuracy": 0.73828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.216476440429688, "eval_allNLI-dev_euclidean_ap": 0.6296916777262784, "eval_allNLI-dev_euclidean_f1": 0.6420824295010846, "eval_allNLI-dev_euclidean_f1_threshold": 14.571252822875977, "eval_allNLI-dev_euclidean_precision": 0.5138888888888888, "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 233.3595428466797, "eval_allNLI-dev_manhattan_ap": 0.6231183643819906, "eval_allNLI-dev_manhattan_f1": 0.638477801268499, "eval_allNLI-dev_manhattan_f1_threshold": 310.4957275390625, "eval_allNLI-dev_manhattan_precision": 0.5033333333333333, "eval_allNLI-dev_manhattan_recall": 0.8728323699421965, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 400.8938903808594, "eval_allNLI-dev_max_ap": 0.6296916777262784, "eval_allNLI-dev_max_f1": 0.6420824295010846, "eval_allNLI-dev_max_f1_threshold": 343.65850830078125, "eval_allNLI-dev_max_precision": 0.5138888888888888, "eval_allNLI-dev_max_recall": 0.8728323699421965, "eval_sequential_score": 0.7604047314276812, "eval_sts-test_pearson_cosine": 0.8349821001879352, "eval_sts-test_pearson_dot": 0.8001679116980415, "eval_sts-test_pearson_euclidean": 0.8630253118943207, "eval_sts-test_pearson_manhattan": 0.8625882800897832, "eval_sts-test_pearson_max": 0.8630253118943207, "eval_sts-test_spearman_cosine": 0.861910144672321, "eval_sts-test_spearman_dot": 0.77834190765759, "eval_sts-test_spearman_euclidean": 0.8576496455828143, "eval_sts-test_spearman_manhattan": 0.8560976718377604, "eval_sts-test_spearman_max": 0.861910144672321, "eval_vitaminc-pairs_loss": 2.950286626815796, "eval_vitaminc-pairs_runtime": 3.2157, "eval_vitaminc-pairs_samples_per_second": 39.805, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 1300 }, { "epoch": 1.337448559670782, "eval_negation-triplets_loss": 1.0076651573181152, "eval_negation-triplets_runtime": 0.7627, "eval_negation-triplets_samples_per_second": 167.818, "eval_negation-triplets_steps_per_second": 1.311, "step": 1300 }, { "epoch": 1.337448559670782, "eval_scitail-pairs-pos_loss": 0.18644708395004272, "eval_scitail-pairs-pos_runtime": 0.9162, "eval_scitail-pairs-pos_samples_per_second": 139.706, "eval_scitail-pairs-pos_steps_per_second": 1.091, "step": 1300 }, { "epoch": 1.337448559670782, "eval_scitail-pairs-qa_loss": 0.0008939155377447605, "eval_scitail-pairs-qa_runtime": 0.5994, "eval_scitail-pairs-qa_samples_per_second": 213.553, "eval_scitail-pairs-qa_steps_per_second": 1.668, "step": 1300 }, { "epoch": 1.337448559670782, "eval_xsum-pairs_loss": 0.34629058837890625, "eval_xsum-pairs_runtime": 3.0298, "eval_xsum-pairs_samples_per_second": 42.247, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1300 }, { "epoch": 1.337448559670782, "eval_sciq_pairs_loss": 0.09945479035377502, "eval_sciq_pairs_runtime": 3.5024, "eval_sciq_pairs_samples_per_second": 36.546, "eval_sciq_pairs_steps_per_second": 0.286, "step": 1300 }, { "epoch": 1.337448559670782, "eval_qasc_pairs_loss": 0.21978069841861725, "eval_qasc_pairs_runtime": 0.6157, "eval_qasc_pairs_samples_per_second": 207.889, "eval_qasc_pairs_steps_per_second": 1.624, "step": 1300 }, { "epoch": 1.337448559670782, "eval_openbookqa_pairs_loss": 0.8806225061416626, "eval_openbookqa_pairs_runtime": 0.5926, "eval_openbookqa_pairs_samples_per_second": 216.006, "eval_openbookqa_pairs_steps_per_second": 1.688, "step": 1300 }, { "epoch": 1.337448559670782, "eval_msmarco_pairs_loss": 1.0302841663360596, "eval_msmarco_pairs_runtime": 1.5274, "eval_msmarco_pairs_samples_per_second": 83.802, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 1300 }, { "epoch": 1.337448559670782, "eval_nq_pairs_loss": 0.7833474278450012, "eval_nq_pairs_runtime": 2.9026, "eval_nq_pairs_samples_per_second": 44.098, "eval_nq_pairs_steps_per_second": 0.345, "step": 1300 }, { "epoch": 1.337448559670782, "eval_trivia_pairs_loss": 0.923805296421051, "eval_trivia_pairs_runtime": 3.4429, "eval_trivia_pairs_samples_per_second": 37.178, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1300 }, { "epoch": 1.337448559670782, "eval_gooaq_pairs_loss": 0.48123741149902344, "eval_gooaq_pairs_runtime": 0.9544, "eval_gooaq_pairs_samples_per_second": 134.112, "eval_gooaq_pairs_steps_per_second": 1.048, "step": 1300 }, { "epoch": 1.337448559670782, "eval_paws-pos_loss": 0.0238708034157753, "eval_paws-pos_runtime": 0.7062, "eval_paws-pos_samples_per_second": 181.245, "eval_paws-pos_steps_per_second": 1.416, "step": 1300 }, { "epoch": 1.337448559670782, "eval_global_dataset_loss": 0.4899948239326477, "eval_global_dataset_runtime": 13.4006, "eval_global_dataset_samples_per_second": 31.043, "eval_global_dataset_steps_per_second": 0.298, "step": 1300 }, { "epoch": 1.3384773662551441, "grad_norm": 5.659729480743408, "learning_rate": 3.393255619361798e-05, "loss": 0.422, "step": 1301 }, { "epoch": 1.3395061728395061, "grad_norm": 12.18124008178711, "learning_rate": 3.3926273626371344e-05, "loss": 0.982, "step": 1302 }, { "epoch": 1.3405349794238683, "grad_norm": 11.858753204345703, "learning_rate": 3.391997351661614e-05, "loss": 0.8423, "step": 1303 }, { "epoch": 1.3415637860082303, "grad_norm": 0.8034223318099976, "learning_rate": 3.391365587478571e-05, "loss": 0.0143, "step": 1304 }, { "epoch": 1.3425925925925926, "grad_norm": 10.877967834472656, "learning_rate": 3.3907320711342424e-05, "loss": 0.8905, "step": 1305 }, { "epoch": 1.3436213991769548, "grad_norm": 6.035357475280762, "learning_rate": 3.390096803677767e-05, "loss": 0.2113, "step": 1306 }, { "epoch": 1.344650205761317, "grad_norm": 11.848456382751465, "learning_rate": 3.389459786161184e-05, "loss": 1.1551, "step": 1307 }, { "epoch": 1.345679012345679, "grad_norm": 11.645739555358887, "learning_rate": 3.38882101963943e-05, "loss": 0.8173, "step": 1308 }, { "epoch": 1.3467078189300412, "grad_norm": 1.066010594367981, "learning_rate": 3.388180505170339e-05, "loss": 0.0205, "step": 1309 }, { "epoch": 1.3477366255144032, "grad_norm": 7.991639614105225, "learning_rate": 3.387538243814639e-05, "loss": 0.4653, "step": 1310 }, { "epoch": 1.3487654320987654, "grad_norm": 8.536792755126953, "learning_rate": 3.3868942366359516e-05, "loss": 0.5243, "step": 1311 }, { "epoch": 1.3497942386831276, "grad_norm": 12.100427627563477, "learning_rate": 3.386248484700788e-05, "loss": 0.9373, "step": 1312 }, { "epoch": 1.3508230452674896, "grad_norm": 8.525440216064453, "learning_rate": 3.385600989078551e-05, "loss": 0.9276, "step": 1313 }, { "epoch": 1.3518518518518519, "grad_norm": 7.700965404510498, "learning_rate": 3.3849517508415306e-05, "loss": 0.6381, "step": 1314 }, { "epoch": 1.3528806584362139, "grad_norm": 8.957015037536621, "learning_rate": 3.3843007710648995e-05, "loss": 0.9841, "step": 1315 }, { "epoch": 1.353909465020576, "grad_norm": 9.407490730285645, "learning_rate": 3.38364805082672e-05, "loss": 0.6191, "step": 1316 }, { "epoch": 1.3549382716049383, "grad_norm": 8.58808708190918, "learning_rate": 3.382993591207931e-05, "loss": 0.4349, "step": 1317 }, { "epoch": 1.3559670781893005, "grad_norm": 7.1622772216796875, "learning_rate": 3.382337393292358e-05, "loss": 0.3448, "step": 1318 }, { "epoch": 1.3569958847736625, "grad_norm": 8.696556091308594, "learning_rate": 3.3816794581666986e-05, "loss": 0.5121, "step": 1319 }, { "epoch": 1.3580246913580247, "grad_norm": 0.7031188607215881, "learning_rate": 3.3810197869205324e-05, "loss": 0.0379, "step": 1320 }, { "epoch": 1.3580246913580247, "eval_Qnli-dev_cosine_accuracy": 0.71484375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7805460691452026, "eval_Qnli-dev_cosine_ap": 0.7576269833878866, "eval_Qnli-dev_cosine_f1": 0.7047244094488188, "eval_Qnli-dev_cosine_f1_threshold": 0.7520368099212646, "eval_Qnli-dev_cosine_precision": 0.6580882352941176, "eval_Qnli-dev_cosine_recall": 0.7584745762711864, "eval_Qnli-dev_dot_accuracy": 0.666015625, "eval_Qnli-dev_dot_accuracy_threshold": 369.0809326171875, "eval_Qnli-dev_dot_ap": 0.6915697235432918, "eval_Qnli-dev_dot_f1": 0.6821963394342762, "eval_Qnli-dev_dot_f1_threshold": 319.52972412109375, "eval_Qnli-dev_dot_precision": 0.5616438356164384, "eval_Qnli-dev_dot_recall": 0.8686440677966102, "eval_Qnli-dev_euclidean_accuracy": 0.71875, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.404098510742188, "eval_Qnli-dev_euclidean_ap": 0.7639315840425045, "eval_Qnli-dev_euclidean_f1": 0.700587084148728, "eval_Qnli-dev_euclidean_f1_threshold": 15.238872528076172, "eval_Qnli-dev_euclidean_precision": 0.6509090909090909, "eval_Qnli-dev_euclidean_recall": 0.7584745762711864, "eval_Qnli-dev_manhattan_accuracy": 0.716796875, "eval_Qnli-dev_manhattan_accuracy_threshold": 286.43316650390625, "eval_Qnli-dev_manhattan_ap": 0.7691725685119097, "eval_Qnli-dev_manhattan_f1": 0.7054409005628519, "eval_Qnli-dev_manhattan_f1_threshold": 325.53839111328125, "eval_Qnli-dev_manhattan_precision": 0.632996632996633, "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, "eval_Qnli-dev_max_accuracy": 0.71875, "eval_Qnli-dev_max_accuracy_threshold": 369.0809326171875, "eval_Qnli-dev_max_ap": 0.7691725685119097, "eval_Qnli-dev_max_f1": 0.7054409005628519, "eval_Qnli-dev_max_f1_threshold": 325.53839111328125, "eval_Qnli-dev_max_precision": 0.6580882352941176, "eval_Qnli-dev_max_recall": 0.8686440677966102, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8764334321022034, "eval_allNLI-dev_cosine_ap": 0.6243270151502973, "eval_allNLI-dev_cosine_f1": 0.6389496717724288, "eval_allNLI-dev_cosine_f1_threshold": 0.7704949975013733, "eval_allNLI-dev_cosine_precision": 0.5140845070422535, "eval_allNLI-dev_cosine_recall": 0.8439306358381503, "eval_allNLI-dev_dot_accuracy": 0.697265625, "eval_allNLI-dev_dot_accuracy_threshold": 401.2423095703125, "eval_allNLI-dev_dot_ap": 0.546262957617912, "eval_allNLI-dev_dot_f1": 0.6046511627906976, "eval_allNLI-dev_dot_f1_threshold": 345.5875244140625, "eval_allNLI-dev_dot_precision": 0.4766666666666667, "eval_allNLI-dev_dot_recall": 0.8265895953757225, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.70079517364502, "eval_allNLI-dev_euclidean_ap": 0.6336567588303856, "eval_allNLI-dev_euclidean_f1": 0.6444906444906446, "eval_allNLI-dev_euclidean_f1_threshold": 15.083576202392578, "eval_allNLI-dev_euclidean_precision": 0.5032467532467533, "eval_allNLI-dev_euclidean_recall": 0.8959537572254336, "eval_allNLI-dev_manhattan_accuracy": 0.7265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 226.74481201171875, "eval_allNLI-dev_manhattan_ap": 0.6259645089300835, "eval_allNLI-dev_manhattan_f1": 0.644880174291939, "eval_allNLI-dev_manhattan_f1_threshold": 305.73834228515625, "eval_allNLI-dev_manhattan_precision": 0.5174825174825175, "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 401.2423095703125, "eval_allNLI-dev_max_ap": 0.6336567588303856, "eval_allNLI-dev_max_f1": 0.644880174291939, "eval_allNLI-dev_max_f1_threshold": 345.5875244140625, "eval_allNLI-dev_max_precision": 0.5174825174825175, "eval_allNLI-dev_max_recall": 0.8959537572254336, "eval_sequential_score": 0.7691725685119097, "eval_sts-test_pearson_cosine": 0.8295056997784466, "eval_sts-test_pearson_dot": 0.8078381072629077, "eval_sts-test_pearson_euclidean": 0.8609211328044077, "eval_sts-test_pearson_manhattan": 0.8588787323326552, "eval_sts-test_pearson_max": 0.8609211328044077, "eval_sts-test_spearman_cosine": 0.8590697821118088, "eval_sts-test_spearman_dot": 0.7903332319037223, "eval_sts-test_spearman_euclidean": 0.8571045752190669, "eval_sts-test_spearman_manhattan": 0.8545855267153452, "eval_sts-test_spearman_max": 0.8590697821118088, "eval_vitaminc-pairs_loss": 3.052609443664551, "eval_vitaminc-pairs_runtime": 3.2081, "eval_vitaminc-pairs_samples_per_second": 39.899, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 1320 }, { "epoch": 1.3580246913580247, "eval_negation-triplets_loss": 0.9799715280532837, "eval_negation-triplets_runtime": 0.7767, "eval_negation-triplets_samples_per_second": 164.795, "eval_negation-triplets_steps_per_second": 1.287, "step": 1320 }, { "epoch": 1.3580246913580247, "eval_scitail-pairs-pos_loss": 0.17218124866485596, "eval_scitail-pairs-pos_runtime": 0.883, "eval_scitail-pairs-pos_samples_per_second": 144.954, "eval_scitail-pairs-pos_steps_per_second": 1.132, "step": 1320 }, { "epoch": 1.3580246913580247, "eval_scitail-pairs-qa_loss": 0.0009102174080908298, "eval_scitail-pairs-qa_runtime": 0.6143, "eval_scitail-pairs-qa_samples_per_second": 208.372, "eval_scitail-pairs-qa_steps_per_second": 1.628, "step": 1320 }, { "epoch": 1.3580246913580247, "eval_xsum-pairs_loss": 0.33992800116539, "eval_xsum-pairs_runtime": 3.0691, "eval_xsum-pairs_samples_per_second": 41.706, "eval_xsum-pairs_steps_per_second": 0.326, "step": 1320 }, { "epoch": 1.3580246913580247, "eval_sciq_pairs_loss": 0.09722432494163513, "eval_sciq_pairs_runtime": 3.5028, "eval_sciq_pairs_samples_per_second": 36.542, "eval_sciq_pairs_steps_per_second": 0.285, "step": 1320 }, { "epoch": 1.3580246913580247, "eval_qasc_pairs_loss": 0.18059255182743073, "eval_qasc_pairs_runtime": 0.6225, "eval_qasc_pairs_samples_per_second": 205.626, "eval_qasc_pairs_steps_per_second": 1.606, "step": 1320 }, { "epoch": 1.3580246913580247, "eval_openbookqa_pairs_loss": 0.8701379895210266, "eval_openbookqa_pairs_runtime": 0.5939, "eval_openbookqa_pairs_samples_per_second": 215.541, "eval_openbookqa_pairs_steps_per_second": 1.684, "step": 1320 }, { "epoch": 1.3580246913580247, "eval_msmarco_pairs_loss": 1.001753807067871, "eval_msmarco_pairs_runtime": 1.5244, "eval_msmarco_pairs_samples_per_second": 83.967, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 1320 }, { "epoch": 1.3580246913580247, "eval_nq_pairs_loss": 0.7202290892601013, "eval_nq_pairs_runtime": 2.9028, "eval_nq_pairs_samples_per_second": 44.096, "eval_nq_pairs_steps_per_second": 0.344, "step": 1320 }, { "epoch": 1.3580246913580247, "eval_trivia_pairs_loss": 0.9316710829734802, "eval_trivia_pairs_runtime": 3.4486, "eval_trivia_pairs_samples_per_second": 37.117, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1320 }, { "epoch": 1.3580246913580247, "eval_gooaq_pairs_loss": 0.4748282730579376, "eval_gooaq_pairs_runtime": 0.9538, "eval_gooaq_pairs_samples_per_second": 134.196, "eval_gooaq_pairs_steps_per_second": 1.048, "step": 1320 }, { "epoch": 1.3580246913580247, "eval_paws-pos_loss": 0.022798927500844002, "eval_paws-pos_runtime": 0.7059, "eval_paws-pos_samples_per_second": 181.317, "eval_paws-pos_steps_per_second": 1.417, "step": 1320 }, { "epoch": 1.3580246913580247, "eval_global_dataset_loss": 0.5067123174667358, "eval_global_dataset_runtime": 13.3957, "eval_global_dataset_samples_per_second": 31.055, "eval_global_dataset_steps_per_second": 0.299, "step": 1320 }, { "epoch": 1.3590534979423867, "grad_norm": 7.470611095428467, "learning_rate": 3.380358380646312e-05, "loss": 0.3647, "step": 1321 }, { "epoch": 1.360082304526749, "grad_norm": 5.169586181640625, "learning_rate": 3.379695240439363e-05, "loss": 0.2223, "step": 1322 }, { "epoch": 1.3611111111111112, "grad_norm": 12.399897575378418, "learning_rate": 3.379030367397884e-05, "loss": 0.9171, "step": 1323 }, { "epoch": 1.3621399176954734, "grad_norm": 6.1529927253723145, "learning_rate": 3.378363762622942e-05, "loss": 0.2045, "step": 1324 }, { "epoch": 1.3631687242798354, "grad_norm": 10.532297134399414, "learning_rate": 3.3776954272184727e-05, "loss": 0.7837, "step": 1325 }, { "epoch": 1.3641975308641976, "grad_norm": 7.012182712554932, "learning_rate": 3.377025362291276e-05, "loss": 0.3129, "step": 1326 }, { "epoch": 1.3652263374485596, "grad_norm": 5.788620948791504, "learning_rate": 3.376353568951018e-05, "loss": 0.2301, "step": 1327 }, { "epoch": 1.3662551440329218, "grad_norm": 6.239854335784912, "learning_rate": 3.375680048310228e-05, "loss": 0.2602, "step": 1328 }, { "epoch": 1.367283950617284, "grad_norm": 5.890448093414307, "learning_rate": 3.375004801484292e-05, "loss": 0.275, "step": 1329 }, { "epoch": 1.368312757201646, "grad_norm": 5.364292621612549, "learning_rate": 3.3743278295914585e-05, "loss": 0.2825, "step": 1330 }, { "epoch": 1.3693415637860082, "grad_norm": 9.858009338378906, "learning_rate": 3.3736491337528304e-05, "loss": 0.8946, "step": 1331 }, { "epoch": 1.3703703703703702, "grad_norm": 5.880088806152344, "learning_rate": 3.372968715092367e-05, "loss": 0.2247, "step": 1332 }, { "epoch": 1.3713991769547325, "grad_norm": 1.4969556331634521, "learning_rate": 3.3722865747368794e-05, "loss": 0.0365, "step": 1333 }, { "epoch": 1.3724279835390947, "grad_norm": 6.889362812042236, "learning_rate": 3.371602713816031e-05, "loss": 0.3087, "step": 1334 }, { "epoch": 1.373456790123457, "grad_norm": 12.018694877624512, "learning_rate": 3.370917133462335e-05, "loss": 0.7406, "step": 1335 }, { "epoch": 1.374485596707819, "grad_norm": 8.535211563110352, "learning_rate": 3.37022983481115e-05, "loss": 0.6123, "step": 1336 }, { "epoch": 1.375514403292181, "grad_norm": 6.665390491485596, "learning_rate": 3.3695408190006815e-05, "loss": 0.2442, "step": 1337 }, { "epoch": 1.376543209876543, "grad_norm": 9.452113151550293, "learning_rate": 3.36885008717198e-05, "loss": 0.5433, "step": 1338 }, { "epoch": 1.3775720164609053, "grad_norm": 15.978325843811035, "learning_rate": 3.368157640468935e-05, "loss": 1.1899, "step": 1339 }, { "epoch": 1.3786008230452675, "grad_norm": 5.319306373596191, "learning_rate": 3.367463480038278e-05, "loss": 0.1734, "step": 1340 }, { "epoch": 1.3786008230452675, "eval_Qnli-dev_cosine_accuracy": 0.705078125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7876126766204834, "eval_Qnli-dev_cosine_ap": 0.7400085812314071, "eval_Qnli-dev_cosine_f1": 0.6983050847457627, "eval_Qnli-dev_cosine_f1_threshold": 0.7217756509780884, "eval_Qnli-dev_cosine_precision": 0.5819209039548022, "eval_Qnli-dev_cosine_recall": 0.8728813559322034, "eval_Qnli-dev_dot_accuracy": 0.671875, "eval_Qnli-dev_dot_accuracy_threshold": 369.82562255859375, "eval_Qnli-dev_dot_ap": 0.6517294833598514, "eval_Qnli-dev_dot_f1": 0.67008547008547, "eval_Qnli-dev_dot_f1_threshold": 337.6015930175781, "eval_Qnli-dev_dot_precision": 0.5616045845272206, "eval_Qnli-dev_dot_recall": 0.8305084745762712, "eval_Qnli-dev_euclidean_accuracy": 0.703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.102432250976562, "eval_Qnli-dev_euclidean_ap": 0.7523164499043167, "eval_Qnli-dev_euclidean_f1": 0.7030716723549488, "eval_Qnli-dev_euclidean_f1_threshold": 16.046783447265625, "eval_Qnli-dev_euclidean_precision": 0.5885714285714285, "eval_Qnli-dev_euclidean_recall": 0.8728813559322034, "eval_Qnli-dev_manhattan_accuracy": 0.705078125, "eval_Qnli-dev_manhattan_accuracy_threshold": 295.23388671875, "eval_Qnli-dev_manhattan_ap": 0.7556733058427012, "eval_Qnli-dev_manhattan_f1": 0.696095076400679, "eval_Qnli-dev_manhattan_f1_threshold": 334.82318115234375, "eval_Qnli-dev_manhattan_precision": 0.5807365439093485, "eval_Qnli-dev_manhattan_recall": 0.8686440677966102, "eval_Qnli-dev_max_accuracy": 0.705078125, "eval_Qnli-dev_max_accuracy_threshold": 369.82562255859375, "eval_Qnli-dev_max_ap": 0.7556733058427012, "eval_Qnli-dev_max_f1": 0.7030716723549488, "eval_Qnli-dev_max_f1_threshold": 337.6015930175781, "eval_Qnli-dev_max_precision": 0.5885714285714285, "eval_Qnli-dev_max_recall": 0.8728813559322034, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8959084749221802, "eval_allNLI-dev_cosine_ap": 0.6252038048871644, "eval_allNLI-dev_cosine_f1": 0.6382978723404256, "eval_allNLI-dev_cosine_f1_threshold": 0.7897143959999084, "eval_allNLI-dev_cosine_precision": 0.54, "eval_allNLI-dev_cosine_recall": 0.7803468208092486, "eval_allNLI-dev_dot_accuracy": 0.701171875, "eval_allNLI-dev_dot_accuracy_threshold": 449.27313232421875, "eval_allNLI-dev_dot_ap": 0.5613036858399412, "eval_allNLI-dev_dot_f1": 0.602020202020202, "eval_allNLI-dev_dot_f1_threshold": 347.0570068359375, "eval_allNLI-dev_dot_precision": 0.46273291925465837, "eval_allNLI-dev_dot_recall": 0.861271676300578, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.560067176818848, "eval_allNLI-dev_euclidean_ap": 0.6314617061154307, "eval_allNLI-dev_euclidean_f1": 0.6396396396396398, "eval_allNLI-dev_euclidean_f1_threshold": 14.363271713256836, "eval_allNLI-dev_euclidean_precision": 0.5239852398523985, "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, "eval_allNLI-dev_manhattan_accuracy": 0.73046875, "eval_allNLI-dev_manhattan_accuracy_threshold": 220.5458221435547, "eval_allNLI-dev_manhattan_ap": 0.6266918694412588, "eval_allNLI-dev_manhattan_f1": 0.6486486486486487, "eval_allNLI-dev_manhattan_f1_threshold": 300.31488037109375, "eval_allNLI-dev_manhattan_precision": 0.5313653136531366, "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 449.27313232421875, "eval_allNLI-dev_max_ap": 0.6314617061154307, "eval_allNLI-dev_max_f1": 0.6486486486486487, "eval_allNLI-dev_max_f1_threshold": 347.0570068359375, "eval_allNLI-dev_max_precision": 0.54, "eval_allNLI-dev_max_recall": 0.861271676300578, "eval_sequential_score": 0.7556733058427012, "eval_sts-test_pearson_cosine": 0.8261417254823802, "eval_sts-test_pearson_dot": 0.8012305316887692, "eval_sts-test_pearson_euclidean": 0.8622507133070001, "eval_sts-test_pearson_manhattan": 0.8618004189910506, "eval_sts-test_pearson_max": 0.8622507133070001, "eval_sts-test_spearman_cosine": 0.862160304223499, "eval_sts-test_spearman_dot": 0.7936632627663793, "eval_sts-test_spearman_euclidean": 0.8611724648795452, "eval_sts-test_spearman_manhattan": 0.8590668285285571, "eval_sts-test_spearman_max": 0.862160304223499, "eval_vitaminc-pairs_loss": 3.161919593811035, "eval_vitaminc-pairs_runtime": 3.2189, "eval_vitaminc-pairs_samples_per_second": 39.765, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 1340 }, { "epoch": 1.3786008230452675, "eval_negation-triplets_loss": 0.9676703214645386, "eval_negation-triplets_runtime": 0.7626, "eval_negation-triplets_samples_per_second": 167.839, "eval_negation-triplets_steps_per_second": 1.311, "step": 1340 }, { "epoch": 1.3786008230452675, "eval_scitail-pairs-pos_loss": 0.1650688350200653, "eval_scitail-pairs-pos_runtime": 0.8778, "eval_scitail-pairs-pos_samples_per_second": 145.823, "eval_scitail-pairs-pos_steps_per_second": 1.139, "step": 1340 }, { "epoch": 1.3786008230452675, "eval_scitail-pairs-qa_loss": 0.001209335750900209, "eval_scitail-pairs-qa_runtime": 0.6014, "eval_scitail-pairs-qa_samples_per_second": 212.819, "eval_scitail-pairs-qa_steps_per_second": 1.663, "step": 1340 }, { "epoch": 1.3786008230452675, "eval_xsum-pairs_loss": 0.3014724850654602, "eval_xsum-pairs_runtime": 3.0317, "eval_xsum-pairs_samples_per_second": 42.221, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1340 }, { "epoch": 1.3786008230452675, "eval_sciq_pairs_loss": 0.09812074154615402, "eval_sciq_pairs_runtime": 3.52, "eval_sciq_pairs_samples_per_second": 36.364, "eval_sciq_pairs_steps_per_second": 0.284, "step": 1340 }, { "epoch": 1.3786008230452675, "eval_qasc_pairs_loss": 0.20444463193416595, "eval_qasc_pairs_runtime": 0.6203, "eval_qasc_pairs_samples_per_second": 206.355, "eval_qasc_pairs_steps_per_second": 1.612, "step": 1340 }, { "epoch": 1.3786008230452675, "eval_openbookqa_pairs_loss": 0.8844213485717773, "eval_openbookqa_pairs_runtime": 0.5908, "eval_openbookqa_pairs_samples_per_second": 216.639, "eval_openbookqa_pairs_steps_per_second": 1.692, "step": 1340 }, { "epoch": 1.3786008230452675, "eval_msmarco_pairs_loss": 0.9858245849609375, "eval_msmarco_pairs_runtime": 1.5236, "eval_msmarco_pairs_samples_per_second": 84.014, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 1340 }, { "epoch": 1.3786008230452675, "eval_nq_pairs_loss": 0.7741794586181641, "eval_nq_pairs_runtime": 2.9359, "eval_nq_pairs_samples_per_second": 43.598, "eval_nq_pairs_steps_per_second": 0.341, "step": 1340 }, { "epoch": 1.3786008230452675, "eval_trivia_pairs_loss": 0.7956993579864502, "eval_trivia_pairs_runtime": 3.4425, "eval_trivia_pairs_samples_per_second": 37.183, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1340 }, { "epoch": 1.3786008230452675, "eval_gooaq_pairs_loss": 0.40307921171188354, "eval_gooaq_pairs_runtime": 0.9582, "eval_gooaq_pairs_samples_per_second": 133.578, "eval_gooaq_pairs_steps_per_second": 1.044, "step": 1340 }, { "epoch": 1.3786008230452675, "eval_paws-pos_loss": 0.022803284227848053, "eval_paws-pos_runtime": 0.7023, "eval_paws-pos_samples_per_second": 182.25, "eval_paws-pos_steps_per_second": 1.424, "step": 1340 }, { "epoch": 1.3786008230452675, "eval_global_dataset_loss": 0.4967878758907318, "eval_global_dataset_runtime": 13.3959, "eval_global_dataset_samples_per_second": 31.054, "eval_global_dataset_steps_per_second": 0.299, "step": 1340 }, { "epoch": 1.3796296296296298, "grad_norm": 12.965760231018066, "learning_rate": 3.366767607029578e-05, "loss": 1.0524, "step": 1341 }, { "epoch": 1.3806584362139918, "grad_norm": 1.4375091791152954, "learning_rate": 3.366070022595241e-05, "loss": 0.0603, "step": 1342 }, { "epoch": 1.381687242798354, "grad_norm": 8.675809860229492, "learning_rate": 3.3653707278905044e-05, "loss": 0.4451, "step": 1343 }, { "epoch": 1.382716049382716, "grad_norm": 10.920755386352539, "learning_rate": 3.364669724073441e-05, "loss": 0.8211, "step": 1344 }, { "epoch": 1.3837448559670782, "grad_norm": 0.6817147731781006, "learning_rate": 3.3639670123049534e-05, "loss": 0.022, "step": 1345 }, { "epoch": 1.3847736625514404, "grad_norm": 8.8646240234375, "learning_rate": 3.36326259374877e-05, "loss": 0.4733, "step": 1346 }, { "epoch": 1.3858024691358024, "grad_norm": 5.144080638885498, "learning_rate": 3.3625564695714496e-05, "loss": 0.2417, "step": 1347 }, { "epoch": 1.3868312757201646, "grad_norm": 10.667274475097656, "learning_rate": 3.3618486409423734e-05, "loss": 0.7385, "step": 1348 }, { "epoch": 1.3878600823045266, "grad_norm": 10.963183403015137, "learning_rate": 3.3611391090337456e-05, "loss": 0.7823, "step": 1349 }, { "epoch": 1.3888888888888888, "grad_norm": 6.086643218994141, "learning_rate": 3.3604278750205907e-05, "loss": 0.2875, "step": 1350 }, { "epoch": 1.389917695473251, "grad_norm": 10.556036949157715, "learning_rate": 3.359714940080753e-05, "loss": 0.8247, "step": 1351 }, { "epoch": 1.3909465020576133, "grad_norm": 9.976085662841797, "learning_rate": 3.359000305394893e-05, "loss": 0.7735, "step": 1352 }, { "epoch": 1.3919753086419753, "grad_norm": 0.6431965231895447, "learning_rate": 3.3582839721464866e-05, "loss": 0.0128, "step": 1353 }, { "epoch": 1.3930041152263375, "grad_norm": 6.725648403167725, "learning_rate": 3.3575659415218206e-05, "loss": 0.2489, "step": 1354 }, { "epoch": 1.3940329218106995, "grad_norm": 9.695779800415039, "learning_rate": 3.356846214709997e-05, "loss": 0.5558, "step": 1355 }, { "epoch": 1.3950617283950617, "grad_norm": 15.12929916381836, "learning_rate": 3.3561247929029215e-05, "loss": 1.142, "step": 1356 }, { "epoch": 1.396090534979424, "grad_norm": 13.447492599487305, "learning_rate": 3.355401677295311e-05, "loss": 1.2416, "step": 1357 }, { "epoch": 1.3971193415637861, "grad_norm": 8.626240730285645, "learning_rate": 3.354676869084685e-05, "loss": 0.5202, "step": 1358 }, { "epoch": 1.3981481481481481, "grad_norm": 6.097577095031738, "learning_rate": 3.353950369471368e-05, "loss": 0.1756, "step": 1359 }, { "epoch": 1.3991769547325104, "grad_norm": 5.241819858551025, "learning_rate": 3.3532221796584825e-05, "loss": 0.2559, "step": 1360 }, { "epoch": 1.3991769547325104, "eval_Qnli-dev_cosine_accuracy": 0.71484375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7863629460334778, "eval_Qnli-dev_cosine_ap": 0.7520452722161024, "eval_Qnli-dev_cosine_f1": 0.6998341625207297, "eval_Qnli-dev_cosine_f1_threshold": 0.7090976238250732, "eval_Qnli-dev_cosine_precision": 0.5749318801089919, "eval_Qnli-dev_cosine_recall": 0.8940677966101694, "eval_Qnli-dev_dot_accuracy": 0.658203125, "eval_Qnli-dev_dot_accuracy_threshold": 366.3503723144531, "eval_Qnli-dev_dot_ap": 0.6700128573840451, "eval_Qnli-dev_dot_f1": 0.6814580031695722, "eval_Qnli-dev_dot_f1_threshold": 316.33770751953125, "eval_Qnli-dev_dot_precision": 0.5443037974683544, "eval_Qnli-dev_dot_recall": 0.9110169491525424, "eval_Qnli-dev_euclidean_accuracy": 0.716796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.179301261901855, "eval_Qnli-dev_euclidean_ap": 0.7627701173135915, "eval_Qnli-dev_euclidean_f1": 0.7054545454545453, "eval_Qnli-dev_euclidean_f1_threshold": 15.506027221679688, "eval_Qnli-dev_euclidean_precision": 0.6178343949044586, "eval_Qnli-dev_euclidean_recall": 0.8220338983050848, "eval_Qnli-dev_manhattan_accuracy": 0.71875, "eval_Qnli-dev_manhattan_accuracy_threshold": 295.79803466796875, "eval_Qnli-dev_manhattan_ap": 0.7679690415434057, "eval_Qnli-dev_manhattan_f1": 0.7093235831809872, "eval_Qnli-dev_manhattan_f1_threshold": 323.1705322265625, "eval_Qnli-dev_manhattan_precision": 0.6237942122186495, "eval_Qnli-dev_manhattan_recall": 0.8220338983050848, "eval_Qnli-dev_max_accuracy": 0.71875, "eval_Qnli-dev_max_accuracy_threshold": 366.3503723144531, "eval_Qnli-dev_max_ap": 0.7679690415434057, "eval_Qnli-dev_max_f1": 0.7093235831809872, "eval_Qnli-dev_max_f1_threshold": 323.1705322265625, "eval_Qnli-dev_max_precision": 0.6237942122186495, "eval_Qnli-dev_max_recall": 0.9110169491525424, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8775778412818909, "eval_allNLI-dev_cosine_ap": 0.6233735118641757, "eval_allNLI-dev_cosine_f1": 0.6327433628318583, "eval_allNLI-dev_cosine_f1_threshold": 0.7827239036560059, "eval_allNLI-dev_cosine_precision": 0.5125448028673835, "eval_allNLI-dev_cosine_recall": 0.8265895953757225, "eval_allNLI-dev_dot_accuracy": 0.697265625, "eval_allNLI-dev_dot_accuracy_threshold": 436.905517578125, "eval_allNLI-dev_dot_ap": 0.5591141127494976, "eval_allNLI-dev_dot_f1": 0.6085192697768763, "eval_allNLI-dev_dot_f1_threshold": 348.33587646484375, "eval_allNLI-dev_dot_precision": 0.46875, "eval_allNLI-dev_dot_recall": 0.8670520231213873, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.58004379272461, "eval_allNLI-dev_euclidean_ap": 0.62762836308513, "eval_allNLI-dev_euclidean_f1": 0.6412556053811659, "eval_allNLI-dev_euclidean_f1_threshold": 14.265384674072266, "eval_allNLI-dev_euclidean_precision": 0.5238095238095238, "eval_allNLI-dev_euclidean_recall": 0.8265895953757225, "eval_allNLI-dev_manhattan_accuracy": 0.736328125, "eval_allNLI-dev_manhattan_accuracy_threshold": 229.06759643554688, "eval_allNLI-dev_manhattan_ap": 0.6243122484228764, "eval_allNLI-dev_manhattan_f1": 0.6426966292134833, "eval_allNLI-dev_manhattan_f1_threshold": 297.4107666015625, "eval_allNLI-dev_manhattan_precision": 0.5257352941176471, "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 436.905517578125, "eval_allNLI-dev_max_ap": 0.62762836308513, "eval_allNLI-dev_max_f1": 0.6426966292134833, "eval_allNLI-dev_max_f1_threshold": 348.33587646484375, "eval_allNLI-dev_max_precision": 0.5257352941176471, "eval_allNLI-dev_max_recall": 0.8670520231213873, "eval_sequential_score": 0.7679690415434057, "eval_sts-test_pearson_cosine": 0.83738764698893, "eval_sts-test_pearson_dot": 0.8183567007456142, "eval_sts-test_pearson_euclidean": 0.866690575863861, "eval_sts-test_pearson_manhattan": 0.8654764568761573, "eval_sts-test_pearson_max": 0.866690575863861, "eval_sts-test_spearman_cosine": 0.8645791546529655, "eval_sts-test_spearman_dot": 0.8141177665618077, "eval_sts-test_spearman_euclidean": 0.8612966496299064, "eval_sts-test_spearman_manhattan": 0.8601330273312072, "eval_sts-test_spearman_max": 0.8645791546529655, "eval_vitaminc-pairs_loss": 3.062483549118042, "eval_vitaminc-pairs_runtime": 3.209, "eval_vitaminc-pairs_samples_per_second": 39.888, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 1360 }, { "epoch": 1.3991769547325104, "eval_negation-triplets_loss": 0.9274396300315857, "eval_negation-triplets_runtime": 0.786, "eval_negation-triplets_samples_per_second": 162.845, "eval_negation-triplets_steps_per_second": 1.272, "step": 1360 }, { "epoch": 1.3991769547325104, "eval_scitail-pairs-pos_loss": 0.16325777769088745, "eval_scitail-pairs-pos_runtime": 0.8967, "eval_scitail-pairs-pos_samples_per_second": 142.748, "eval_scitail-pairs-pos_steps_per_second": 1.115, "step": 1360 }, { "epoch": 1.3991769547325104, "eval_scitail-pairs-qa_loss": 0.0012430674396455288, "eval_scitail-pairs-qa_runtime": 0.5982, "eval_scitail-pairs-qa_samples_per_second": 213.981, "eval_scitail-pairs-qa_steps_per_second": 1.672, "step": 1360 }, { "epoch": 1.3991769547325104, "eval_xsum-pairs_loss": 0.30085036158561707, "eval_xsum-pairs_runtime": 3.0286, "eval_xsum-pairs_samples_per_second": 42.264, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1360 }, { "epoch": 1.3991769547325104, "eval_sciq_pairs_loss": 0.08908446878194809, "eval_sciq_pairs_runtime": 3.4784, "eval_sciq_pairs_samples_per_second": 36.798, "eval_sciq_pairs_steps_per_second": 0.287, "step": 1360 }, { "epoch": 1.3991769547325104, "eval_qasc_pairs_loss": 0.21207481622695923, "eval_qasc_pairs_runtime": 0.6196, "eval_qasc_pairs_samples_per_second": 206.589, "eval_qasc_pairs_steps_per_second": 1.614, "step": 1360 }, { "epoch": 1.3991769547325104, "eval_openbookqa_pairs_loss": 0.8421127200126648, "eval_openbookqa_pairs_runtime": 0.5888, "eval_openbookqa_pairs_samples_per_second": 217.392, "eval_openbookqa_pairs_steps_per_second": 1.698, "step": 1360 }, { "epoch": 1.3991769547325104, "eval_msmarco_pairs_loss": 0.9122468829154968, "eval_msmarco_pairs_runtime": 1.526, "eval_msmarco_pairs_samples_per_second": 83.878, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 1360 }, { "epoch": 1.3991769547325104, "eval_nq_pairs_loss": 0.7917957305908203, "eval_nq_pairs_runtime": 2.9019, "eval_nq_pairs_samples_per_second": 44.109, "eval_nq_pairs_steps_per_second": 0.345, "step": 1360 }, { "epoch": 1.3991769547325104, "eval_trivia_pairs_loss": 0.8618249893188477, "eval_trivia_pairs_runtime": 3.4539, "eval_trivia_pairs_samples_per_second": 37.059, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1360 }, { "epoch": 1.3991769547325104, "eval_gooaq_pairs_loss": 0.42768269777297974, "eval_gooaq_pairs_runtime": 0.9557, "eval_gooaq_pairs_samples_per_second": 133.939, "eval_gooaq_pairs_steps_per_second": 1.046, "step": 1360 }, { "epoch": 1.3991769547325104, "eval_paws-pos_loss": 0.02307211607694626, "eval_paws-pos_runtime": 0.699, "eval_paws-pos_samples_per_second": 183.109, "eval_paws-pos_steps_per_second": 1.431, "step": 1360 }, { "epoch": 1.3991769547325104, "eval_global_dataset_loss": 0.4745106101036072, "eval_global_dataset_runtime": 13.4196, "eval_global_dataset_samples_per_second": 30.999, "eval_global_dataset_steps_per_second": 0.298, "step": 1360 }, { "epoch": 1.4002057613168724, "grad_norm": 1.443122148513794, "learning_rate": 3.352492300851954e-05, "loss": 0.0722, "step": 1361 }, { "epoch": 1.4012345679012346, "grad_norm": 8.220876693725586, "learning_rate": 3.351760734260503e-05, "loss": 0.5022, "step": 1362 }, { "epoch": 1.4022633744855968, "grad_norm": 9.237991333007812, "learning_rate": 3.3510274810956444e-05, "loss": 0.7064, "step": 1363 }, { "epoch": 1.4032921810699588, "grad_norm": 6.430814266204834, "learning_rate": 3.350292542571687e-05, "loss": 0.3364, "step": 1364 }, { "epoch": 1.404320987654321, "grad_norm": 10.146993637084961, "learning_rate": 3.349555919905731e-05, "loss": 0.7776, "step": 1365 }, { "epoch": 1.405349794238683, "grad_norm": 0.9642751812934875, "learning_rate": 3.3488176143176646e-05, "loss": 0.0347, "step": 1366 }, { "epoch": 1.4063786008230452, "grad_norm": 8.7808256149292, "learning_rate": 3.3480776270301645e-05, "loss": 0.4706, "step": 1367 }, { "epoch": 1.4074074074074074, "grad_norm": 5.345498085021973, "learning_rate": 3.34733595926869e-05, "loss": 0.187, "step": 1368 }, { "epoch": 1.4084362139917697, "grad_norm": 10.110101699829102, "learning_rate": 3.346592612261487e-05, "loss": 0.5993, "step": 1369 }, { "epoch": 1.4094650205761317, "grad_norm": 8.079977035522461, "learning_rate": 3.345847587239579e-05, "loss": 0.6859, "step": 1370 }, { "epoch": 1.4104938271604939, "grad_norm": 13.22767162322998, "learning_rate": 3.345100885436769e-05, "loss": 0.9238, "step": 1371 }, { "epoch": 1.4115226337448559, "grad_norm": 5.358603000640869, "learning_rate": 3.3443525080896384e-05, "loss": 0.1511, "step": 1372 }, { "epoch": 1.412551440329218, "grad_norm": 8.831647872924805, "learning_rate": 3.343602456437542e-05, "loss": 0.6568, "step": 1373 }, { "epoch": 1.4135802469135803, "grad_norm": 6.89711332321167, "learning_rate": 3.342850731722608e-05, "loss": 0.3368, "step": 1374 }, { "epoch": 1.4146090534979423, "grad_norm": 8.110711097717285, "learning_rate": 3.3420973351897355e-05, "loss": 0.5127, "step": 1375 }, { "epoch": 1.4156378600823045, "grad_norm": 0.5851256251335144, "learning_rate": 3.3413422680865914e-05, "loss": 0.0235, "step": 1376 }, { "epoch": 1.4166666666666667, "grad_norm": 7.06640100479126, "learning_rate": 3.340585531663609e-05, "loss": 0.3398, "step": 1377 }, { "epoch": 1.4176954732510287, "grad_norm": 10.556662559509277, "learning_rate": 3.3398271271739875e-05, "loss": 0.7283, "step": 1378 }, { "epoch": 1.418724279835391, "grad_norm": 13.46735954284668, "learning_rate": 3.339067055873687e-05, "loss": 1.0423, "step": 1379 }, { "epoch": 1.4197530864197532, "grad_norm": 5.427450180053711, "learning_rate": 3.3383053190214286e-05, "loss": 0.2566, "step": 1380 }, { "epoch": 1.4197530864197532, "eval_Qnli-dev_cosine_accuracy": 0.69921875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8063021302223206, "eval_Qnli-dev_cosine_ap": 0.74411759194727, "eval_Qnli-dev_cosine_f1": 0.69140625, "eval_Qnli-dev_cosine_f1_threshold": 0.7554892897605896, "eval_Qnli-dev_cosine_precision": 0.6413043478260869, "eval_Qnli-dev_cosine_recall": 0.75, "eval_Qnli-dev_dot_accuracy": 0.654296875, "eval_Qnli-dev_dot_accuracy_threshold": 381.40008544921875, "eval_Qnli-dev_dot_ap": 0.671022496389324, "eval_Qnli-dev_dot_f1": 0.6676970633693973, "eval_Qnli-dev_dot_f1_threshold": 306.57379150390625, "eval_Qnli-dev_dot_precision": 0.5255474452554745, "eval_Qnli-dev_dot_recall": 0.9152542372881356, "eval_Qnli-dev_euclidean_accuracy": 0.69921875, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.237188339233398, "eval_Qnli-dev_euclidean_ap": 0.7538181870941214, "eval_Qnli-dev_euclidean_f1": 0.6978557504873294, "eval_Qnli-dev_euclidean_f1_threshold": 15.234086990356445, "eval_Qnli-dev_euclidean_precision": 0.6462093862815884, "eval_Qnli-dev_euclidean_recall": 0.7584745762711864, "eval_Qnli-dev_manhattan_accuracy": 0.708984375, "eval_Qnli-dev_manhattan_accuracy_threshold": 286.4082336425781, "eval_Qnli-dev_manhattan_ap": 0.7616482458252489, "eval_Qnli-dev_manhattan_f1": 0.700374531835206, "eval_Qnli-dev_manhattan_f1_threshold": 324.758544921875, "eval_Qnli-dev_manhattan_precision": 0.62751677852349, "eval_Qnli-dev_manhattan_recall": 0.7923728813559322, "eval_Qnli-dev_max_accuracy": 0.708984375, "eval_Qnli-dev_max_accuracy_threshold": 381.40008544921875, "eval_Qnli-dev_max_ap": 0.7616482458252489, "eval_Qnli-dev_max_f1": 0.700374531835206, "eval_Qnli-dev_max_f1_threshold": 324.758544921875, "eval_Qnli-dev_max_precision": 0.6462093862815884, "eval_Qnli-dev_max_recall": 0.9152542372881356, "eval_allNLI-dev_cosine_accuracy": 0.734375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8630719184875488, "eval_allNLI-dev_cosine_ap": 0.6272275331563291, "eval_allNLI-dev_cosine_f1": 0.6347031963470319, "eval_allNLI-dev_cosine_f1_threshold": 0.773266077041626, "eval_allNLI-dev_cosine_precision": 0.5245283018867924, "eval_allNLI-dev_cosine_recall": 0.8034682080924855, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 395.307861328125, "eval_allNLI-dev_dot_ap": 0.563263501967994, "eval_allNLI-dev_dot_f1": 0.6070686070686071, "eval_allNLI-dev_dot_f1_threshold": 346.6265869140625, "eval_allNLI-dev_dot_precision": 0.474025974025974, "eval_allNLI-dev_dot_recall": 0.8439306358381503, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.851537704467773, "eval_allNLI-dev_euclidean_ap": 0.6326968806302966, "eval_allNLI-dev_euclidean_f1": 0.6485260770975058, "eval_allNLI-dev_euclidean_f1_threshold": 14.771903991699219, "eval_allNLI-dev_euclidean_precision": 0.5335820895522388, "eval_allNLI-dev_euclidean_recall": 0.8265895953757225, "eval_allNLI-dev_manhattan_accuracy": 0.728515625, "eval_allNLI-dev_manhattan_accuracy_threshold": 231.5478515625, "eval_allNLI-dev_manhattan_ap": 0.6294888875849509, "eval_allNLI-dev_manhattan_f1": 0.6382022471910113, "eval_allNLI-dev_manhattan_f1_threshold": 309.86102294921875, "eval_allNLI-dev_manhattan_precision": 0.5220588235294118, "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 395.307861328125, "eval_allNLI-dev_max_ap": 0.6326968806302966, "eval_allNLI-dev_max_f1": 0.6485260770975058, "eval_allNLI-dev_max_f1_threshold": 346.6265869140625, "eval_allNLI-dev_max_precision": 0.5335820895522388, "eval_allNLI-dev_max_recall": 0.8439306358381503, "eval_sequential_score": 0.7616482458252489, "eval_sts-test_pearson_cosine": 0.834879144813166, "eval_sts-test_pearson_dot": 0.8011799964530804, "eval_sts-test_pearson_euclidean": 0.8686352715928541, "eval_sts-test_pearson_manhattan": 0.8678638281425739, "eval_sts-test_pearson_max": 0.8686352715928541, "eval_sts-test_spearman_cosine": 0.8662917854264144, "eval_sts-test_spearman_dot": 0.7925740170641125, "eval_sts-test_spearman_euclidean": 0.8658375601232916, "eval_sts-test_spearman_manhattan": 0.8650538760338053, "eval_sts-test_spearman_max": 0.8662917854264144, "eval_vitaminc-pairs_loss": 3.0536015033721924, "eval_vitaminc-pairs_runtime": 3.2066, "eval_vitaminc-pairs_samples_per_second": 39.918, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 1380 }, { "epoch": 1.4197530864197532, "eval_negation-triplets_loss": 0.9620550870895386, "eval_negation-triplets_runtime": 0.7623, "eval_negation-triplets_samples_per_second": 167.916, "eval_negation-triplets_steps_per_second": 1.312, "step": 1380 }, { "epoch": 1.4197530864197532, "eval_scitail-pairs-pos_loss": 0.16859027743339539, "eval_scitail-pairs-pos_runtime": 0.8698, "eval_scitail-pairs-pos_samples_per_second": 147.167, "eval_scitail-pairs-pos_steps_per_second": 1.15, "step": 1380 }, { "epoch": 1.4197530864197532, "eval_scitail-pairs-qa_loss": 0.001436021295376122, "eval_scitail-pairs-qa_runtime": 0.5959, "eval_scitail-pairs-qa_samples_per_second": 214.79, "eval_scitail-pairs-qa_steps_per_second": 1.678, "step": 1380 }, { "epoch": 1.4197530864197532, "eval_xsum-pairs_loss": 0.3317752480506897, "eval_xsum-pairs_runtime": 3.0275, "eval_xsum-pairs_samples_per_second": 42.28, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1380 }, { "epoch": 1.4197530864197532, "eval_sciq_pairs_loss": 0.0831882655620575, "eval_sciq_pairs_runtime": 3.4867, "eval_sciq_pairs_samples_per_second": 36.711, "eval_sciq_pairs_steps_per_second": 0.287, "step": 1380 }, { "epoch": 1.4197530864197532, "eval_qasc_pairs_loss": 0.19361478090286255, "eval_qasc_pairs_runtime": 0.6177, "eval_qasc_pairs_samples_per_second": 207.225, "eval_qasc_pairs_steps_per_second": 1.619, "step": 1380 }, { "epoch": 1.4197530864197532, "eval_openbookqa_pairs_loss": 0.838322103023529, "eval_openbookqa_pairs_runtime": 0.5946, "eval_openbookqa_pairs_samples_per_second": 215.255, "eval_openbookqa_pairs_steps_per_second": 1.682, "step": 1380 }, { "epoch": 1.4197530864197532, "eval_msmarco_pairs_loss": 0.9363899827003479, "eval_msmarco_pairs_runtime": 1.5285, "eval_msmarco_pairs_samples_per_second": 83.741, "eval_msmarco_pairs_steps_per_second": 0.654, "step": 1380 }, { "epoch": 1.4197530864197532, "eval_nq_pairs_loss": 0.8688430190086365, "eval_nq_pairs_runtime": 2.9031, "eval_nq_pairs_samples_per_second": 44.092, "eval_nq_pairs_steps_per_second": 0.344, "step": 1380 }, { "epoch": 1.4197530864197532, "eval_trivia_pairs_loss": 0.993528425693512, "eval_trivia_pairs_runtime": 3.4413, "eval_trivia_pairs_samples_per_second": 37.195, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1380 }, { "epoch": 1.4197530864197532, "eval_gooaq_pairs_loss": 0.4363172650337219, "eval_gooaq_pairs_runtime": 0.9587, "eval_gooaq_pairs_samples_per_second": 133.51, "eval_gooaq_pairs_steps_per_second": 1.043, "step": 1380 }, { "epoch": 1.4197530864197532, "eval_paws-pos_loss": 0.02160307578742504, "eval_paws-pos_runtime": 0.7183, "eval_paws-pos_samples_per_second": 178.209, "eval_paws-pos_steps_per_second": 1.392, "step": 1380 }, { "epoch": 1.4197530864197532, "eval_global_dataset_loss": 0.5103635191917419, "eval_global_dataset_runtime": 13.3994, "eval_global_dataset_samples_per_second": 31.046, "eval_global_dataset_steps_per_second": 0.299, "step": 1380 }, { "epoch": 1.4207818930041152, "grad_norm": 6.8490495681762695, "learning_rate": 3.337541917878692e-05, "loss": 0.2802, "step": 1381 }, { "epoch": 1.4218106995884774, "grad_norm": 7.549937725067139, "learning_rate": 3.3367768537097136e-05, "loss": 0.3093, "step": 1382 }, { "epoch": 1.4228395061728394, "grad_norm": 10.411901473999023, "learning_rate": 3.336010127781481e-05, "loss": 0.8066, "step": 1383 }, { "epoch": 1.4238683127572016, "grad_norm": 7.990019798278809, "learning_rate": 3.335241741363736e-05, "loss": 0.3886, "step": 1384 }, { "epoch": 1.4248971193415638, "grad_norm": 5.726963996887207, "learning_rate": 3.334471695728972e-05, "loss": 0.2353, "step": 1385 }, { "epoch": 1.425925925925926, "grad_norm": 7.145617485046387, "learning_rate": 3.333699992152426e-05, "loss": 0.4184, "step": 1386 }, { "epoch": 1.426954732510288, "grad_norm": 10.926759719848633, "learning_rate": 3.3329266319120835e-05, "loss": 0.7746, "step": 1387 }, { "epoch": 1.4279835390946503, "grad_norm": 12.381770133972168, "learning_rate": 3.332151616288673e-05, "loss": 1.0693, "step": 1388 }, { "epoch": 1.4290123456790123, "grad_norm": 1.119705080986023, "learning_rate": 3.331374946565665e-05, "loss": 0.02, "step": 1389 }, { "epoch": 1.4300411522633745, "grad_norm": 7.537491798400879, "learning_rate": 3.3305966240292666e-05, "loss": 0.2794, "step": 1390 }, { "epoch": 1.4310699588477367, "grad_norm": 0.732020914554596, "learning_rate": 3.329816649968425e-05, "loss": 0.0115, "step": 1391 }, { "epoch": 1.4320987654320987, "grad_norm": 7.852914333343506, "learning_rate": 3.329035025674822e-05, "loss": 0.3645, "step": 1392 }, { "epoch": 1.433127572016461, "grad_norm": 6.324718952178955, "learning_rate": 3.3282517524428704e-05, "loss": 0.2254, "step": 1393 }, { "epoch": 1.4341563786008231, "grad_norm": 7.247324466705322, "learning_rate": 3.327466831569716e-05, "loss": 0.2813, "step": 1394 }, { "epoch": 1.4351851851851851, "grad_norm": 12.112274169921875, "learning_rate": 3.3266802643552327e-05, "loss": 0.8248, "step": 1395 }, { "epoch": 1.4362139917695473, "grad_norm": 0.8138561844825745, "learning_rate": 3.325892052102018e-05, "loss": 0.0158, "step": 1396 }, { "epoch": 1.4372427983539096, "grad_norm": 7.126984596252441, "learning_rate": 3.325102196115398e-05, "loss": 0.285, "step": 1397 }, { "epoch": 1.4382716049382716, "grad_norm": 9.047616958618164, "learning_rate": 3.3243106977034185e-05, "loss": 0.5326, "step": 1398 }, { "epoch": 1.4393004115226338, "grad_norm": 11.624143600463867, "learning_rate": 3.323517558176846e-05, "loss": 0.781, "step": 1399 }, { "epoch": 1.4403292181069958, "grad_norm": 6.335608005523682, "learning_rate": 3.3227227788491634e-05, "loss": 0.261, "step": 1400 }, { "epoch": 1.4403292181069958, "eval_Qnli-dev_cosine_accuracy": 0.7109375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7974846363067627, "eval_Qnli-dev_cosine_ap": 0.7523113514409753, "eval_Qnli-dev_cosine_f1": 0.6984126984126984, "eval_Qnli-dev_cosine_f1_threshold": 0.7209751605987549, "eval_Qnli-dev_cosine_precision": 0.5981873111782477, "eval_Qnli-dev_cosine_recall": 0.8389830508474576, "eval_Qnli-dev_dot_accuracy": 0.681640625, "eval_Qnli-dev_dot_accuracy_threshold": 381.9517517089844, "eval_Qnli-dev_dot_ap": 0.6812870817491152, "eval_Qnli-dev_dot_f1": 0.6745762711864407, "eval_Qnli-dev_dot_f1_threshold": 325.1109619140625, "eval_Qnli-dev_dot_precision": 0.5621468926553672, "eval_Qnli-dev_dot_recall": 0.8432203389830508, "eval_Qnli-dev_euclidean_accuracy": 0.7109375, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.50356674194336, "eval_Qnli-dev_euclidean_ap": 0.7619644519510701, "eval_Qnli-dev_euclidean_f1": 0.6952054794520549, "eval_Qnli-dev_euclidean_f1_threshold": 16.413734436035156, "eval_Qnli-dev_euclidean_precision": 0.5833333333333334, "eval_Qnli-dev_euclidean_recall": 0.8601694915254238, "eval_Qnli-dev_manhattan_accuracy": 0.720703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 291.779052734375, "eval_Qnli-dev_manhattan_ap": 0.767528548647967, "eval_Qnli-dev_manhattan_f1": 0.7100175746924429, "eval_Qnli-dev_manhattan_f1_threshold": 337.9719543457031, "eval_Qnli-dev_manhattan_precision": 0.6066066066066066, "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, "eval_Qnli-dev_max_accuracy": 0.720703125, "eval_Qnli-dev_max_accuracy_threshold": 381.9517517089844, "eval_Qnli-dev_max_ap": 0.767528548647967, "eval_Qnli-dev_max_f1": 0.7100175746924429, "eval_Qnli-dev_max_f1_threshold": 337.9719543457031, "eval_Qnli-dev_max_precision": 0.6066066066066066, "eval_Qnli-dev_max_recall": 0.8601694915254238, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8541853427886963, "eval_allNLI-dev_cosine_ap": 0.6255349351155984, "eval_allNLI-dev_cosine_f1": 0.6448598130841121, "eval_allNLI-dev_cosine_f1_threshold": 0.7745788097381592, "eval_allNLI-dev_cosine_precision": 0.5411764705882353, "eval_allNLI-dev_cosine_recall": 0.7976878612716763, "eval_allNLI-dev_dot_accuracy": 0.697265625, "eval_allNLI-dev_dot_accuracy_threshold": 385.14898681640625, "eval_allNLI-dev_dot_ap": 0.553538131502168, "eval_allNLI-dev_dot_f1": 0.6029411764705882, "eval_allNLI-dev_dot_f1_threshold": 357.1886901855469, "eval_allNLI-dev_dot_precision": 0.5234042553191489, "eval_allNLI-dev_dot_recall": 0.7109826589595376, "eval_allNLI-dev_euclidean_accuracy": 0.732421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.644266128540039, "eval_allNLI-dev_euclidean_ap": 0.632827860323889, "eval_allNLI-dev_euclidean_f1": 0.6467889908256881, "eval_allNLI-dev_euclidean_f1_threshold": 14.465250015258789, "eval_allNLI-dev_euclidean_precision": 0.5361216730038023, "eval_allNLI-dev_euclidean_recall": 0.815028901734104, "eval_allNLI-dev_manhattan_accuracy": 0.73828125, "eval_allNLI-dev_manhattan_accuracy_threshold": 246.18682861328125, "eval_allNLI-dev_manhattan_ap": 0.6280673121543834, "eval_allNLI-dev_manhattan_f1": 0.6416861826697893, "eval_allNLI-dev_manhattan_f1_threshold": 298.9549255371094, "eval_allNLI-dev_manhattan_precision": 0.5393700787401575, "eval_allNLI-dev_manhattan_recall": 0.791907514450867, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 385.14898681640625, "eval_allNLI-dev_max_ap": 0.632827860323889, "eval_allNLI-dev_max_f1": 0.6467889908256881, "eval_allNLI-dev_max_f1_threshold": 357.1886901855469, "eval_allNLI-dev_max_precision": 0.5411764705882353, "eval_allNLI-dev_max_recall": 0.815028901734104, "eval_sequential_score": 0.767528548647967, "eval_sts-test_pearson_cosine": 0.8294398782719722, "eval_sts-test_pearson_dot": 0.806709569277571, "eval_sts-test_pearson_euclidean": 0.8635655114321077, "eval_sts-test_pearson_manhattan": 0.8619051829676366, "eval_sts-test_pearson_max": 0.8635655114321077, "eval_sts-test_spearman_cosine": 0.863560929202527, "eval_sts-test_spearman_dot": 0.8028960745090844, "eval_sts-test_spearman_euclidean": 0.8622703923265218, "eval_sts-test_spearman_manhattan": 0.860564160983452, "eval_sts-test_spearman_max": 0.863560929202527, "eval_vitaminc-pairs_loss": 3.102480173110962, "eval_vitaminc-pairs_runtime": 3.5959, "eval_vitaminc-pairs_samples_per_second": 35.596, "eval_vitaminc-pairs_steps_per_second": 0.278, "step": 1400 }, { "epoch": 1.4403292181069958, "eval_negation-triplets_loss": 0.9550673365592957, "eval_negation-triplets_runtime": 0.7685, "eval_negation-triplets_samples_per_second": 166.547, "eval_negation-triplets_steps_per_second": 1.301, "step": 1400 }, { "epoch": 1.4403292181069958, "eval_scitail-pairs-pos_loss": 0.18612991273403168, "eval_scitail-pairs-pos_runtime": 0.8883, "eval_scitail-pairs-pos_samples_per_second": 144.089, "eval_scitail-pairs-pos_steps_per_second": 1.126, "step": 1400 }, { "epoch": 1.4403292181069958, "eval_scitail-pairs-qa_loss": 0.0008277587476186454, "eval_scitail-pairs-qa_runtime": 0.605, "eval_scitail-pairs-qa_samples_per_second": 211.554, "eval_scitail-pairs-qa_steps_per_second": 1.653, "step": 1400 }, { "epoch": 1.4403292181069958, "eval_xsum-pairs_loss": 0.29447418451309204, "eval_xsum-pairs_runtime": 3.0431, "eval_xsum-pairs_samples_per_second": 42.062, "eval_xsum-pairs_steps_per_second": 0.329, "step": 1400 }, { "epoch": 1.4403292181069958, "eval_sciq_pairs_loss": 0.0855700671672821, "eval_sciq_pairs_runtime": 3.4947, "eval_sciq_pairs_samples_per_second": 36.627, "eval_sciq_pairs_steps_per_second": 0.286, "step": 1400 }, { "epoch": 1.4403292181069958, "eval_qasc_pairs_loss": 0.18778705596923828, "eval_qasc_pairs_runtime": 0.613, "eval_qasc_pairs_samples_per_second": 208.822, "eval_qasc_pairs_steps_per_second": 1.631, "step": 1400 }, { "epoch": 1.4403292181069958, "eval_openbookqa_pairs_loss": 0.8724998235702515, "eval_openbookqa_pairs_runtime": 0.5965, "eval_openbookqa_pairs_samples_per_second": 214.582, "eval_openbookqa_pairs_steps_per_second": 1.676, "step": 1400 }, { "epoch": 1.4403292181069958, "eval_msmarco_pairs_loss": 0.898486852645874, "eval_msmarco_pairs_runtime": 1.521, "eval_msmarco_pairs_samples_per_second": 84.158, "eval_msmarco_pairs_steps_per_second": 0.657, "step": 1400 }, { "epoch": 1.4403292181069958, "eval_nq_pairs_loss": 0.8151339888572693, "eval_nq_pairs_runtime": 2.9108, "eval_nq_pairs_samples_per_second": 43.975, "eval_nq_pairs_steps_per_second": 0.344, "step": 1400 }, { "epoch": 1.4403292181069958, "eval_trivia_pairs_loss": 0.8818275928497314, "eval_trivia_pairs_runtime": 3.4441, "eval_trivia_pairs_samples_per_second": 37.165, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1400 }, { "epoch": 1.4403292181069958, "eval_gooaq_pairs_loss": 0.461891770362854, "eval_gooaq_pairs_runtime": 0.9488, "eval_gooaq_pairs_samples_per_second": 134.904, "eval_gooaq_pairs_steps_per_second": 1.054, "step": 1400 }, { "epoch": 1.4403292181069958, "eval_paws-pos_loss": 0.02126290462911129, "eval_paws-pos_runtime": 0.7036, "eval_paws-pos_samples_per_second": 181.912, "eval_paws-pos_steps_per_second": 1.421, "step": 1400 }, { "epoch": 1.4403292181069958, "eval_global_dataset_loss": 0.4936361312866211, "eval_global_dataset_runtime": 13.4229, "eval_global_dataset_samples_per_second": 30.992, "eval_global_dataset_steps_per_second": 0.298, "step": 1400 }, { "epoch": 1.441358024691358, "grad_norm": 17.88776206970215, "learning_rate": 3.321926361036571e-05, "loss": 2.83, "step": 1401 }, { "epoch": 1.4423868312757202, "grad_norm": 9.830528259277344, "learning_rate": 3.3211283060579815e-05, "loss": 0.4911, "step": 1402 }, { "epoch": 1.4434156378600824, "grad_norm": 6.779402256011963, "learning_rate": 3.320328615235019e-05, "loss": 0.256, "step": 1403 }, { "epoch": 1.4444444444444444, "grad_norm": 0.3222440183162689, "learning_rate": 3.319527289892017e-05, "loss": 0.0046, "step": 1404 }, { "epoch": 1.4454732510288066, "grad_norm": 12.658515930175781, "learning_rate": 3.3187243313560156e-05, "loss": 0.9102, "step": 1405 }, { "epoch": 1.4465020576131686, "grad_norm": 8.560856819152832, "learning_rate": 3.317919740956759e-05, "loss": 0.3865, "step": 1406 }, { "epoch": 1.4475308641975309, "grad_norm": 12.2800874710083, "learning_rate": 3.3171135200266955e-05, "loss": 1.0253, "step": 1407 }, { "epoch": 1.448559670781893, "grad_norm": 5.864134788513184, "learning_rate": 3.316305669900972e-05, "loss": 0.259, "step": 1408 }, { "epoch": 1.449588477366255, "grad_norm": 14.230968475341797, "learning_rate": 3.315496191917434e-05, "loss": 2.4152, "step": 1409 }, { "epoch": 1.4506172839506173, "grad_norm": 10.343489646911621, "learning_rate": 3.3146850874166234e-05, "loss": 0.9387, "step": 1410 }, { "epoch": 1.4516460905349795, "grad_norm": 11.05865478515625, "learning_rate": 3.313872357741774e-05, "loss": 0.9209, "step": 1411 }, { "epoch": 1.4526748971193415, "grad_norm": 7.32178258895874, "learning_rate": 3.313058004238812e-05, "loss": 0.3593, "step": 1412 }, { "epoch": 1.4537037037037037, "grad_norm": 9.29926872253418, "learning_rate": 3.312242028256356e-05, "loss": 0.6408, "step": 1413 }, { "epoch": 1.454732510288066, "grad_norm": 10.301321029663086, "learning_rate": 3.311424431145704e-05, "loss": 0.6872, "step": 1414 }, { "epoch": 1.455761316872428, "grad_norm": 4.875487327575684, "learning_rate": 3.310605214260846e-05, "loss": 0.3091, "step": 1415 }, { "epoch": 1.4567901234567902, "grad_norm": 5.230945587158203, "learning_rate": 3.30978437895845e-05, "loss": 0.2862, "step": 1416 }, { "epoch": 1.4578189300411522, "grad_norm": 10.013091087341309, "learning_rate": 3.3089619265978674e-05, "loss": 0.8637, "step": 1417 }, { "epoch": 1.4588477366255144, "grad_norm": 7.067183017730713, "learning_rate": 3.308137858541124e-05, "loss": 0.3158, "step": 1418 }, { "epoch": 1.4598765432098766, "grad_norm": 8.037884712219238, "learning_rate": 3.3073121761529245e-05, "loss": 0.6922, "step": 1419 }, { "epoch": 1.4609053497942388, "grad_norm": 0.15969954431056976, "learning_rate": 3.3064848808006447e-05, "loss": 0.0024, "step": 1420 }, { "epoch": 1.4609053497942388, "eval_Qnli-dev_cosine_accuracy": 0.68359375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8269785642623901, "eval_Qnli-dev_cosine_ap": 0.7237253656424805, "eval_Qnli-dev_cosine_f1": 0.6806282722513088, "eval_Qnli-dev_cosine_f1_threshold": 0.7489698529243469, "eval_Qnli-dev_cosine_precision": 0.5786350148367952, "eval_Qnli-dev_cosine_recall": 0.826271186440678, "eval_Qnli-dev_dot_accuracy": 0.634765625, "eval_Qnli-dev_dot_accuracy_threshold": 420.9667053222656, "eval_Qnli-dev_dot_ap": 0.6345480523803997, "eval_Qnli-dev_dot_f1": 0.6751592356687899, "eval_Qnli-dev_dot_f1_threshold": 345.52545166015625, "eval_Qnli-dev_dot_precision": 0.5408163265306123, "eval_Qnli-dev_dot_recall": 0.8983050847457628, "eval_Qnli-dev_euclidean_accuracy": 0.6875, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.188535690307617, "eval_Qnli-dev_euclidean_ap": 0.7344008197681215, "eval_Qnli-dev_euclidean_f1": 0.6852886405959031, "eval_Qnli-dev_euclidean_f1_threshold": 15.030804634094238, "eval_Qnli-dev_euclidean_precision": 0.6112956810631229, "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, "eval_Qnli-dev_manhattan_accuracy": 0.689453125, "eval_Qnli-dev_manhattan_accuracy_threshold": 264.3589782714844, "eval_Qnli-dev_manhattan_ap": 0.7396413308815867, "eval_Qnli-dev_manhattan_f1": 0.6823956442831216, "eval_Qnli-dev_manhattan_f1_threshold": 318.8542785644531, "eval_Qnli-dev_manhattan_precision": 0.5968253968253968, "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, "eval_Qnli-dev_max_accuracy": 0.689453125, "eval_Qnli-dev_max_accuracy_threshold": 420.9667053222656, "eval_Qnli-dev_max_ap": 0.7396413308815867, "eval_Qnli-dev_max_f1": 0.6852886405959031, "eval_Qnli-dev_max_f1_threshold": 345.52545166015625, "eval_Qnli-dev_max_precision": 0.6112956810631229, "eval_Qnli-dev_max_recall": 0.8983050847457628, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8926049470901489, "eval_allNLI-dev_cosine_ap": 0.6183972231090226, "eval_allNLI-dev_cosine_f1": 0.6339066339066338, "eval_allNLI-dev_cosine_f1_threshold": 0.8191593885421753, "eval_allNLI-dev_cosine_precision": 0.5512820512820513, "eval_allNLI-dev_cosine_recall": 0.7456647398843931, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 425.713134765625, "eval_allNLI-dev_dot_ap": 0.5372618188733738, "eval_allNLI-dev_dot_f1": 0.6022988505747127, "eval_allNLI-dev_dot_f1_threshold": 392.1880187988281, "eval_allNLI-dev_dot_precision": 0.5, "eval_allNLI-dev_dot_recall": 0.7572254335260116, "eval_allNLI-dev_euclidean_accuracy": 0.728515625, "eval_allNLI-dev_euclidean_accuracy_threshold": 9.652290344238281, "eval_allNLI-dev_euclidean_ap": 0.6233815893708006, "eval_allNLI-dev_euclidean_f1": 0.639269406392694, "eval_allNLI-dev_euclidean_f1_threshold": 13.957777976989746, "eval_allNLI-dev_euclidean_precision": 0.5283018867924528, "eval_allNLI-dev_euclidean_recall": 0.8092485549132948, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 243.81475830078125, "eval_allNLI-dev_manhattan_ap": 0.6188492062115317, "eval_allNLI-dev_manhattan_f1": 0.6338028169014085, "eval_allNLI-dev_manhattan_f1_threshold": 285.5887145996094, "eval_allNLI-dev_manhattan_precision": 0.5335968379446641, "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, "eval_allNLI-dev_max_accuracy": 0.732421875, "eval_allNLI-dev_max_accuracy_threshold": 425.713134765625, "eval_allNLI-dev_max_ap": 0.6233815893708006, "eval_allNLI-dev_max_f1": 0.639269406392694, "eval_allNLI-dev_max_f1_threshold": 392.1880187988281, "eval_allNLI-dev_max_precision": 0.5512820512820513, "eval_allNLI-dev_max_recall": 0.8092485549132948, "eval_sequential_score": 0.7396413308815867, "eval_sts-test_pearson_cosine": 0.8326547769095449, "eval_sts-test_pearson_dot": 0.8089814814025779, "eval_sts-test_pearson_euclidean": 0.868581267221316, "eval_sts-test_pearson_manhattan": 0.8668280311976726, "eval_sts-test_pearson_max": 0.868581267221316, "eval_sts-test_spearman_cosine": 0.869660212871345, "eval_sts-test_spearman_dot": 0.8006806185626417, "eval_sts-test_spearman_euclidean": 0.867850740368245, "eval_sts-test_spearman_manhattan": 0.8662515092911621, "eval_sts-test_spearman_max": 0.869660212871345, "eval_vitaminc-pairs_loss": 3.123586416244507, "eval_vitaminc-pairs_runtime": 3.2162, "eval_vitaminc-pairs_samples_per_second": 39.799, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 1420 }, { "epoch": 1.4609053497942388, "eval_negation-triplets_loss": 0.9562065601348877, "eval_negation-triplets_runtime": 0.7629, "eval_negation-triplets_samples_per_second": 167.789, "eval_negation-triplets_steps_per_second": 1.311, "step": 1420 }, { "epoch": 1.4609053497942388, "eval_scitail-pairs-pos_loss": 0.15029510855674744, "eval_scitail-pairs-pos_runtime": 0.869, "eval_scitail-pairs-pos_samples_per_second": 147.29, "eval_scitail-pairs-pos_steps_per_second": 1.151, "step": 1420 }, { "epoch": 1.4609053497942388, "eval_scitail-pairs-qa_loss": 0.0026416766922920942, "eval_scitail-pairs-qa_runtime": 0.5945, "eval_scitail-pairs-qa_samples_per_second": 215.29, "eval_scitail-pairs-qa_steps_per_second": 1.682, "step": 1420 }, { "epoch": 1.4609053497942388, "eval_xsum-pairs_loss": 0.3665918707847595, "eval_xsum-pairs_runtime": 3.0261, "eval_xsum-pairs_samples_per_second": 42.299, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1420 }, { "epoch": 1.4609053497942388, "eval_sciq_pairs_loss": 0.08483776450157166, "eval_sciq_pairs_runtime": 3.5017, "eval_sciq_pairs_samples_per_second": 36.554, "eval_sciq_pairs_steps_per_second": 0.286, "step": 1420 }, { "epoch": 1.4609053497942388, "eval_qasc_pairs_loss": 0.24360936880111694, "eval_qasc_pairs_runtime": 0.6209, "eval_qasc_pairs_samples_per_second": 206.168, "eval_qasc_pairs_steps_per_second": 1.611, "step": 1420 }, { "epoch": 1.4609053497942388, "eval_openbookqa_pairs_loss": 0.8522987365722656, "eval_openbookqa_pairs_runtime": 0.5921, "eval_openbookqa_pairs_samples_per_second": 216.19, "eval_openbookqa_pairs_steps_per_second": 1.689, "step": 1420 }, { "epoch": 1.4609053497942388, "eval_msmarco_pairs_loss": 0.8556464910507202, "eval_msmarco_pairs_runtime": 1.5245, "eval_msmarco_pairs_samples_per_second": 83.96, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 1420 }, { "epoch": 1.4609053497942388, "eval_nq_pairs_loss": 0.9329224228858948, "eval_nq_pairs_runtime": 2.9268, "eval_nq_pairs_samples_per_second": 43.734, "eval_nq_pairs_steps_per_second": 0.342, "step": 1420 }, { "epoch": 1.4609053497942388, "eval_trivia_pairs_loss": 0.6846005320549011, "eval_trivia_pairs_runtime": 3.442, "eval_trivia_pairs_samples_per_second": 37.188, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1420 }, { "epoch": 1.4609053497942388, "eval_gooaq_pairs_loss": 0.45954039692878723, "eval_gooaq_pairs_runtime": 0.9602, "eval_gooaq_pairs_samples_per_second": 133.311, "eval_gooaq_pairs_steps_per_second": 1.041, "step": 1420 }, { "epoch": 1.4609053497942388, "eval_paws-pos_loss": 0.021524449810385704, "eval_paws-pos_runtime": 0.6947, "eval_paws-pos_samples_per_second": 184.253, "eval_paws-pos_steps_per_second": 1.439, "step": 1420 }, { "epoch": 1.4609053497942388, "eval_global_dataset_loss": 0.504304826259613, "eval_global_dataset_runtime": 13.3809, "eval_global_dataset_samples_per_second": 31.089, "eval_global_dataset_steps_per_second": 0.299, "step": 1420 }, { "epoch": 1.4619341563786008, "grad_norm": 6.770277976989746, "learning_rate": 3.305655973854333e-05, "loss": 0.3382, "step": 1421 }, { "epoch": 1.462962962962963, "grad_norm": 10.697244644165039, "learning_rate": 3.304825456686707e-05, "loss": 0.9051, "step": 1422 }, { "epoch": 1.463991769547325, "grad_norm": 12.099508285522461, "learning_rate": 3.3039933306731486e-05, "loss": 0.8357, "step": 1423 }, { "epoch": 1.4650205761316872, "grad_norm": 5.6456732749938965, "learning_rate": 3.303159597191706e-05, "loss": 0.2571, "step": 1424 }, { "epoch": 1.4660493827160495, "grad_norm": 5.662939071655273, "learning_rate": 3.30232425762309e-05, "loss": 0.2161, "step": 1425 }, { "epoch": 1.4670781893004115, "grad_norm": 6.24656867980957, "learning_rate": 3.3014873133506684e-05, "loss": 0.2699, "step": 1426 }, { "epoch": 1.4681069958847737, "grad_norm": 8.071493148803711, "learning_rate": 3.30064876576047e-05, "loss": 0.4325, "step": 1427 }, { "epoch": 1.4691358024691357, "grad_norm": 0.7010164260864258, "learning_rate": 3.299808616241177e-05, "loss": 0.0202, "step": 1428 }, { "epoch": 1.4701646090534979, "grad_norm": 7.959322452545166, "learning_rate": 3.2989668661841234e-05, "loss": 0.4367, "step": 1429 }, { "epoch": 1.47119341563786, "grad_norm": 7.032750606536865, "learning_rate": 3.298123516983295e-05, "loss": 0.2904, "step": 1430 }, { "epoch": 1.4722222222222223, "grad_norm": 7.717670917510986, "learning_rate": 3.297278570035327e-05, "loss": 0.3461, "step": 1431 }, { "epoch": 1.4732510288065843, "grad_norm": 12.34119701385498, "learning_rate": 3.2964320267394986e-05, "loss": 1.1376, "step": 1432 }, { "epoch": 1.4742798353909465, "grad_norm": 16.769927978515625, "learning_rate": 3.295583888497733e-05, "loss": 2.4901, "step": 1433 }, { "epoch": 1.4753086419753085, "grad_norm": 5.696785926818848, "learning_rate": 3.294734156714596e-05, "loss": 0.1807, "step": 1434 }, { "epoch": 1.4763374485596708, "grad_norm": 8.693737983703613, "learning_rate": 3.2938828327972906e-05, "loss": 0.7702, "step": 1435 }, { "epoch": 1.477366255144033, "grad_norm": 11.573921203613281, "learning_rate": 3.293029918155659e-05, "loss": 0.8059, "step": 1436 }, { "epoch": 1.4783950617283952, "grad_norm": 0.6408414840698242, "learning_rate": 3.292175414202174e-05, "loss": 0.0171, "step": 1437 }, { "epoch": 1.4794238683127572, "grad_norm": 0.8278383016586304, "learning_rate": 3.2913193223519434e-05, "loss": 0.0214, "step": 1438 }, { "epoch": 1.4804526748971194, "grad_norm": 7.157841205596924, "learning_rate": 3.290461644022704e-05, "loss": 0.4367, "step": 1439 }, { "epoch": 1.4814814814814814, "grad_norm": 9.087885856628418, "learning_rate": 3.2896023806348194e-05, "loss": 0.8071, "step": 1440 }, { "epoch": 1.4814814814814814, "eval_Qnli-dev_cosine_accuracy": 0.70703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8073866367340088, "eval_Qnli-dev_cosine_ap": 0.7603012216062148, "eval_Qnli-dev_cosine_f1": 0.6948176583493282, "eval_Qnli-dev_cosine_f1_threshold": 0.756028413772583, "eval_Qnli-dev_cosine_precision": 0.6350877192982456, "eval_Qnli-dev_cosine_recall": 0.7669491525423728, "eval_Qnli-dev_dot_accuracy": 0.666015625, "eval_Qnli-dev_dot_accuracy_threshold": 383.9505615234375, "eval_Qnli-dev_dot_ap": 0.6946710918004182, "eval_Qnli-dev_dot_f1": 0.6813880126182966, "eval_Qnli-dev_dot_f1_threshold": 315.57696533203125, "eval_Qnli-dev_dot_precision": 0.542713567839196, "eval_Qnli-dev_dot_recall": 0.9152542372881356, "eval_Qnli-dev_euclidean_accuracy": 0.7109375, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.200727462768555, "eval_Qnli-dev_euclidean_ap": 0.766160629059141, "eval_Qnli-dev_euclidean_f1": 0.6974169741697418, "eval_Qnli-dev_euclidean_f1_threshold": 15.637357711791992, "eval_Qnli-dev_euclidean_precision": 0.6176470588235294, "eval_Qnli-dev_euclidean_recall": 0.8008474576271186, "eval_Qnli-dev_manhattan_accuracy": 0.712890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 270.85723876953125, "eval_Qnli-dev_manhattan_ap": 0.7701929827964322, "eval_Qnli-dev_manhattan_f1": 0.6984126984126984, "eval_Qnli-dev_manhattan_f1_threshold": 336.00274658203125, "eval_Qnli-dev_manhattan_precision": 0.5981873111782477, "eval_Qnli-dev_manhattan_recall": 0.8389830508474576, "eval_Qnli-dev_max_accuracy": 0.712890625, "eval_Qnli-dev_max_accuracy_threshold": 383.9505615234375, "eval_Qnli-dev_max_ap": 0.7701929827964322, "eval_Qnli-dev_max_f1": 0.6984126984126984, "eval_Qnli-dev_max_f1_threshold": 336.00274658203125, "eval_Qnli-dev_max_precision": 0.6350877192982456, "eval_Qnli-dev_max_recall": 0.9152542372881356, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.871171236038208, "eval_allNLI-dev_cosine_ap": 0.6362103263897821, "eval_allNLI-dev_cosine_f1": 0.6357615894039735, "eval_allNLI-dev_cosine_f1_threshold": 0.7768651843070984, "eval_allNLI-dev_cosine_precision": 0.5142857142857142, "eval_allNLI-dev_cosine_recall": 0.8323699421965318, "eval_allNLI-dev_dot_accuracy": 0.705078125, "eval_allNLI-dev_dot_accuracy_threshold": 398.95013427734375, "eval_allNLI-dev_dot_ap": 0.5661285579566687, "eval_allNLI-dev_dot_f1": 0.6172248803827751, "eval_allNLI-dev_dot_f1_threshold": 371.18365478515625, "eval_allNLI-dev_dot_precision": 0.5265306122448979, "eval_allNLI-dev_dot_recall": 0.7456647398843931, "eval_allNLI-dev_euclidean_accuracy": 0.73828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.6724853515625, "eval_allNLI-dev_euclidean_ap": 0.6400867765050245, "eval_allNLI-dev_euclidean_f1": 0.6431718061674009, "eval_allNLI-dev_euclidean_f1_threshold": 14.630485534667969, "eval_allNLI-dev_euclidean_precision": 0.5195729537366548, "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, "eval_allNLI-dev_manhattan_accuracy": 0.744140625, "eval_allNLI-dev_manhattan_accuracy_threshold": 225.78453063964844, "eval_allNLI-dev_manhattan_ap": 0.6350322702102289, "eval_allNLI-dev_manhattan_f1": 0.6329670329670329, "eval_allNLI-dev_manhattan_f1_threshold": 306.090087890625, "eval_allNLI-dev_manhattan_precision": 0.5106382978723404, "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, "eval_allNLI-dev_max_accuracy": 0.744140625, "eval_allNLI-dev_max_accuracy_threshold": 398.95013427734375, "eval_allNLI-dev_max_ap": 0.6400867765050245, "eval_allNLI-dev_max_f1": 0.6431718061674009, "eval_allNLI-dev_max_f1_threshold": 371.18365478515625, "eval_allNLI-dev_max_precision": 0.5265306122448979, "eval_allNLI-dev_max_recall": 0.8439306358381503, "eval_sequential_score": 0.7701929827964322, "eval_sts-test_pearson_cosine": 0.8343564728648352, "eval_sts-test_pearson_dot": 0.8077933555380535, "eval_sts-test_pearson_euclidean": 0.8676567979365759, "eval_sts-test_pearson_manhattan": 0.8650322762247608, "eval_sts-test_pearson_max": 0.8676567979365759, "eval_sts-test_spearman_cosine": 0.866502921877659, "eval_sts-test_spearman_dot": 0.8002891792792511, "eval_sts-test_spearman_euclidean": 0.8654190463267591, "eval_sts-test_spearman_manhattan": 0.8613856151375525, "eval_sts-test_spearman_max": 0.866502921877659, "eval_vitaminc-pairs_loss": 2.8277482986450195, "eval_vitaminc-pairs_runtime": 3.2374, "eval_vitaminc-pairs_samples_per_second": 39.538, "eval_vitaminc-pairs_steps_per_second": 0.309, "step": 1440 }, { "epoch": 1.4814814814814814, "eval_negation-triplets_loss": 0.9136925935745239, "eval_negation-triplets_runtime": 0.7555, "eval_negation-triplets_samples_per_second": 169.431, "eval_negation-triplets_steps_per_second": 1.324, "step": 1440 }, { "epoch": 1.4814814814814814, "eval_scitail-pairs-pos_loss": 0.14386087656021118, "eval_scitail-pairs-pos_runtime": 0.8962, "eval_scitail-pairs-pos_samples_per_second": 142.826, "eval_scitail-pairs-pos_steps_per_second": 1.116, "step": 1440 }, { "epoch": 1.4814814814814814, "eval_scitail-pairs-qa_loss": 0.0007967444253154099, "eval_scitail-pairs-qa_runtime": 0.6034, "eval_scitail-pairs-qa_samples_per_second": 212.122, "eval_scitail-pairs-qa_steps_per_second": 1.657, "step": 1440 }, { "epoch": 1.4814814814814814, "eval_xsum-pairs_loss": 0.31424856185913086, "eval_xsum-pairs_runtime": 3.0346, "eval_xsum-pairs_samples_per_second": 42.18, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1440 }, { "epoch": 1.4814814814814814, "eval_sciq_pairs_loss": 0.10615328699350357, "eval_sciq_pairs_runtime": 3.4997, "eval_sciq_pairs_samples_per_second": 36.574, "eval_sciq_pairs_steps_per_second": 0.286, "step": 1440 }, { "epoch": 1.4814814814814814, "eval_qasc_pairs_loss": 0.21712642908096313, "eval_qasc_pairs_runtime": 0.6188, "eval_qasc_pairs_samples_per_second": 206.853, "eval_qasc_pairs_steps_per_second": 1.616, "step": 1440 }, { "epoch": 1.4814814814814814, "eval_openbookqa_pairs_loss": 0.964438796043396, "eval_openbookqa_pairs_runtime": 0.5921, "eval_openbookqa_pairs_samples_per_second": 216.181, "eval_openbookqa_pairs_steps_per_second": 1.689, "step": 1440 }, { "epoch": 1.4814814814814814, "eval_msmarco_pairs_loss": 0.9308626651763916, "eval_msmarco_pairs_runtime": 1.5222, "eval_msmarco_pairs_samples_per_second": 84.088, "eval_msmarco_pairs_steps_per_second": 0.657, "step": 1440 }, { "epoch": 1.4814814814814814, "eval_nq_pairs_loss": 0.7551199793815613, "eval_nq_pairs_runtime": 2.8988, "eval_nq_pairs_samples_per_second": 44.157, "eval_nq_pairs_steps_per_second": 0.345, "step": 1440 }, { "epoch": 1.4814814814814814, "eval_trivia_pairs_loss": 0.8499756455421448, "eval_trivia_pairs_runtime": 3.4377, "eval_trivia_pairs_samples_per_second": 37.234, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1440 }, { "epoch": 1.4814814814814814, "eval_gooaq_pairs_loss": 0.43490731716156006, "eval_gooaq_pairs_runtime": 0.9586, "eval_gooaq_pairs_samples_per_second": 133.527, "eval_gooaq_pairs_steps_per_second": 1.043, "step": 1440 }, { "epoch": 1.4814814814814814, "eval_paws-pos_loss": 0.02190549112856388, "eval_paws-pos_runtime": 0.7061, "eval_paws-pos_samples_per_second": 181.282, "eval_paws-pos_steps_per_second": 1.416, "step": 1440 }, { "epoch": 1.4814814814814814, "eval_global_dataset_loss": 0.4621449112892151, "eval_global_dataset_runtime": 13.4004, "eval_global_dataset_samples_per_second": 31.044, "eval_global_dataset_steps_per_second": 0.298, "step": 1440 }, { "epoch": 1.4825102880658436, "grad_norm": 8.890467643737793, "learning_rate": 3.288741533611279e-05, "loss": 0.5508, "step": 1441 }, { "epoch": 1.4835390946502058, "grad_norm": 7.290862083435059, "learning_rate": 3.2878791043776936e-05, "loss": 0.3343, "step": 1442 }, { "epoch": 1.4845679012345678, "grad_norm": 10.71535873413086, "learning_rate": 3.2870150943622946e-05, "loss": 0.7613, "step": 1443 }, { "epoch": 1.48559670781893, "grad_norm": 7.571237564086914, "learning_rate": 3.2861495049959314e-05, "loss": 0.3076, "step": 1444 }, { "epoch": 1.486625514403292, "grad_norm": 6.289035797119141, "learning_rate": 3.2852823377120706e-05, "loss": 0.2865, "step": 1445 }, { "epoch": 1.4876543209876543, "grad_norm": 1.0428786277770996, "learning_rate": 3.284413593946788e-05, "loss": 0.0207, "step": 1446 }, { "epoch": 1.4886831275720165, "grad_norm": 7.875466823577881, "learning_rate": 3.283543275138774e-05, "loss": 0.6707, "step": 1447 }, { "epoch": 1.4897119341563787, "grad_norm": 6.708620548248291, "learning_rate": 3.282671382729324e-05, "loss": 0.244, "step": 1448 }, { "epoch": 1.4907407407407407, "grad_norm": 11.043272972106934, "learning_rate": 3.281797918162344e-05, "loss": 0.7385, "step": 1449 }, { "epoch": 1.491769547325103, "grad_norm": 8.62049674987793, "learning_rate": 3.280922882884338e-05, "loss": 0.4721, "step": 1450 }, { "epoch": 1.492798353909465, "grad_norm": 13.318764686584473, "learning_rate": 3.280046278344416e-05, "loss": 0.8804, "step": 1451 }, { "epoch": 1.4938271604938271, "grad_norm": 5.410976409912109, "learning_rate": 3.2791681059942836e-05, "loss": 0.1642, "step": 1452 }, { "epoch": 1.4948559670781894, "grad_norm": 9.992098808288574, "learning_rate": 3.278288367288244e-05, "loss": 0.5925, "step": 1453 }, { "epoch": 1.4958847736625516, "grad_norm": 8.653932571411133, "learning_rate": 3.277407063683194e-05, "loss": 0.6718, "step": 1454 }, { "epoch": 1.4969135802469136, "grad_norm": 7.988948345184326, "learning_rate": 3.2765241966386234e-05, "loss": 0.6227, "step": 1455 }, { "epoch": 1.4979423868312758, "grad_norm": 6.361024379730225, "learning_rate": 3.275639767616609e-05, "loss": 0.2413, "step": 1456 }, { "epoch": 1.4989711934156378, "grad_norm": 10.710992813110352, "learning_rate": 3.274753778081815e-05, "loss": 0.8372, "step": 1457 }, { "epoch": 1.5, "grad_norm": 13.870288848876953, "learning_rate": 3.2738662295014916e-05, "loss": 1.1328, "step": 1458 }, { "epoch": 1.5010288065843622, "grad_norm": 7.458224773406982, "learning_rate": 3.272977123345468e-05, "loss": 0.369, "step": 1459 }, { "epoch": 1.5020576131687244, "grad_norm": 12.56210708618164, "learning_rate": 3.272086461086156e-05, "loss": 0.8694, "step": 1460 }, { "epoch": 1.5020576131687244, "eval_Qnli-dev_cosine_accuracy": 0.701171875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8138365149497986, "eval_Qnli-dev_cosine_ap": 0.7489338568179515, "eval_Qnli-dev_cosine_f1": 0.689407540394973, "eval_Qnli-dev_cosine_f1_threshold": 0.7289687395095825, "eval_Qnli-dev_cosine_precision": 0.5981308411214953, "eval_Qnli-dev_cosine_recall": 0.8135593220338984, "eval_Qnli-dev_dot_accuracy": 0.669921875, "eval_Qnli-dev_dot_accuracy_threshold": 378.68115234375, "eval_Qnli-dev_dot_ap": 0.6565833142132881, "eval_Qnli-dev_dot_f1": 0.6633165829145728, "eval_Qnli-dev_dot_f1_threshold": 325.2407531738281, "eval_Qnli-dev_dot_precision": 0.5484764542936288, "eval_Qnli-dev_dot_recall": 0.8389830508474576, "eval_Qnli-dev_euclidean_accuracy": 0.708984375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.784200668334961, "eval_Qnli-dev_euclidean_ap": 0.7579849955722361, "eval_Qnli-dev_euclidean_f1": 0.6926229508196722, "eval_Qnli-dev_euclidean_f1_threshold": 14.93661117553711, "eval_Qnli-dev_euclidean_precision": 0.6706349206349206, "eval_Qnli-dev_euclidean_recall": 0.7161016949152542, "eval_Qnli-dev_manhattan_accuracy": 0.7109375, "eval_Qnli-dev_manhattan_accuracy_threshold": 287.81903076171875, "eval_Qnli-dev_manhattan_ap": 0.7627266401816915, "eval_Qnli-dev_manhattan_f1": 0.6902985074626867, "eval_Qnli-dev_manhattan_f1_threshold": 326.8884582519531, "eval_Qnli-dev_manhattan_precision": 0.6166666666666667, "eval_Qnli-dev_manhattan_recall": 0.7838983050847458, "eval_Qnli-dev_max_accuracy": 0.7109375, "eval_Qnli-dev_max_accuracy_threshold": 378.68115234375, "eval_Qnli-dev_max_ap": 0.7627266401816915, "eval_Qnli-dev_max_f1": 0.6926229508196722, "eval_Qnli-dev_max_f1_threshold": 326.8884582519531, "eval_Qnli-dev_max_precision": 0.6706349206349206, "eval_Qnli-dev_max_recall": 0.8389830508474576, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8671836853027344, "eval_allNLI-dev_cosine_ap": 0.6278391776727383, "eval_allNLI-dev_cosine_f1": 0.6343825665859564, "eval_allNLI-dev_cosine_f1_threshold": 0.7882962822914124, "eval_allNLI-dev_cosine_precision": 0.5458333333333333, "eval_allNLI-dev_cosine_recall": 0.7572254335260116, "eval_allNLI-dev_dot_accuracy": 0.708984375, "eval_allNLI-dev_dot_accuracy_threshold": 389.33245849609375, "eval_allNLI-dev_dot_ap": 0.5556601990139107, "eval_allNLI-dev_dot_f1": 0.5905511811023622, "eval_allNLI-dev_dot_f1_threshold": 326.88543701171875, "eval_allNLI-dev_dot_precision": 0.44776119402985076, "eval_allNLI-dev_dot_recall": 0.8670520231213873, "eval_allNLI-dev_euclidean_accuracy": 0.732421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.829912185668945, "eval_allNLI-dev_euclidean_ap": 0.6374580549695508, "eval_allNLI-dev_euclidean_f1": 0.641860465116279, "eval_allNLI-dev_euclidean_f1_threshold": 14.349370002746582, "eval_allNLI-dev_euclidean_precision": 0.5369649805447471, "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, "eval_allNLI-dev_manhattan_accuracy": 0.734375, "eval_allNLI-dev_manhattan_accuracy_threshold": 266.812255859375, "eval_allNLI-dev_manhattan_ap": 0.6310337853107585, "eval_allNLI-dev_manhattan_f1": 0.6344827586206897, "eval_allNLI-dev_manhattan_f1_threshold": 303.2599182128906, "eval_allNLI-dev_manhattan_precision": 0.5267175572519084, "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 389.33245849609375, "eval_allNLI-dev_max_ap": 0.6374580549695508, "eval_allNLI-dev_max_f1": 0.641860465116279, "eval_allNLI-dev_max_f1_threshold": 326.88543701171875, "eval_allNLI-dev_max_precision": 0.5458333333333333, "eval_allNLI-dev_max_recall": 0.8670520231213873, "eval_sequential_score": 0.7627266401816915, "eval_sts-test_pearson_cosine": 0.8294091384025749, "eval_sts-test_pearson_dot": 0.8188583363337693, "eval_sts-test_pearson_euclidean": 0.8645124410252298, "eval_sts-test_pearson_manhattan": 0.8627753187710065, "eval_sts-test_pearson_max": 0.8645124410252298, "eval_sts-test_spearman_cosine": 0.8689237413626268, "eval_sts-test_spearman_dot": 0.8192856869785479, "eval_sts-test_spearman_euclidean": 0.866917408060666, "eval_sts-test_spearman_manhattan": 0.8651706320747757, "eval_sts-test_spearman_max": 0.8689237413626268, "eval_vitaminc-pairs_loss": 3.11321759223938, "eval_vitaminc-pairs_runtime": 3.2191, "eval_vitaminc-pairs_samples_per_second": 39.762, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 1460 }, { "epoch": 1.5020576131687244, "eval_negation-triplets_loss": 0.9306308627128601, "eval_negation-triplets_runtime": 0.7656, "eval_negation-triplets_samples_per_second": 167.183, "eval_negation-triplets_steps_per_second": 1.306, "step": 1460 }, { "epoch": 1.5020576131687244, "eval_scitail-pairs-pos_loss": 0.14148494601249695, "eval_scitail-pairs-pos_runtime": 0.8816, "eval_scitail-pairs-pos_samples_per_second": 145.193, "eval_scitail-pairs-pos_steps_per_second": 1.134, "step": 1460 }, { "epoch": 1.5020576131687244, "eval_scitail-pairs-qa_loss": 0.0008370282012037933, "eval_scitail-pairs-qa_runtime": 0.5934, "eval_scitail-pairs-qa_samples_per_second": 215.717, "eval_scitail-pairs-qa_steps_per_second": 1.685, "step": 1460 }, { "epoch": 1.5020576131687244, "eval_xsum-pairs_loss": 0.33326202630996704, "eval_xsum-pairs_runtime": 3.0331, "eval_xsum-pairs_samples_per_second": 42.201, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1460 }, { "epoch": 1.5020576131687244, "eval_sciq_pairs_loss": 0.09718819707632065, "eval_sciq_pairs_runtime": 3.5207, "eval_sciq_pairs_samples_per_second": 36.356, "eval_sciq_pairs_steps_per_second": 0.284, "step": 1460 }, { "epoch": 1.5020576131687244, "eval_qasc_pairs_loss": 0.21921108663082123, "eval_qasc_pairs_runtime": 0.6377, "eval_qasc_pairs_samples_per_second": 200.709, "eval_qasc_pairs_steps_per_second": 1.568, "step": 1460 }, { "epoch": 1.5020576131687244, "eval_openbookqa_pairs_loss": 0.9528454542160034, "eval_openbookqa_pairs_runtime": 0.6003, "eval_openbookqa_pairs_samples_per_second": 213.227, "eval_openbookqa_pairs_steps_per_second": 1.666, "step": 1460 }, { "epoch": 1.5020576131687244, "eval_msmarco_pairs_loss": 0.8451707363128662, "eval_msmarco_pairs_runtime": 1.5256, "eval_msmarco_pairs_samples_per_second": 83.903, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 1460 }, { "epoch": 1.5020576131687244, "eval_nq_pairs_loss": 0.9173424243927002, "eval_nq_pairs_runtime": 2.9158, "eval_nq_pairs_samples_per_second": 43.899, "eval_nq_pairs_steps_per_second": 0.343, "step": 1460 }, { "epoch": 1.5020576131687244, "eval_trivia_pairs_loss": 0.7778195738792419, "eval_trivia_pairs_runtime": 3.4419, "eval_trivia_pairs_samples_per_second": 37.189, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1460 }, { "epoch": 1.5020576131687244, "eval_gooaq_pairs_loss": 0.4348945915699005, "eval_gooaq_pairs_runtime": 0.9544, "eval_gooaq_pairs_samples_per_second": 134.112, "eval_gooaq_pairs_steps_per_second": 1.048, "step": 1460 }, { "epoch": 1.5020576131687244, "eval_paws-pos_loss": 0.020275350660085678, "eval_paws-pos_runtime": 0.7179, "eval_paws-pos_samples_per_second": 178.302, "eval_paws-pos_steps_per_second": 1.393, "step": 1460 }, { "epoch": 1.5020576131687244, "eval_global_dataset_loss": 0.49529680609703064, "eval_global_dataset_runtime": 13.3943, "eval_global_dataset_samples_per_second": 31.058, "eval_global_dataset_steps_per_second": 0.299, "step": 1460 }, { "epoch": 1.5030864197530864, "grad_norm": 11.088354110717773, "learning_rate": 3.271194244198541e-05, "loss": 0.875, "step": 1461 }, { "epoch": 1.5041152263374484, "grad_norm": 6.408971786499023, "learning_rate": 3.2703004741601854e-05, "loss": 0.2141, "step": 1462 }, { "epoch": 1.5051440329218106, "grad_norm": 6.966059684753418, "learning_rate": 3.2694051524512225e-05, "loss": 0.2514, "step": 1463 }, { "epoch": 1.5061728395061729, "grad_norm": 8.408098220825195, "learning_rate": 3.268508280554356e-05, "loss": 0.8124, "step": 1464 }, { "epoch": 1.507201646090535, "grad_norm": 8.215058326721191, "learning_rate": 3.267609859954857e-05, "loss": 0.4547, "step": 1465 }, { "epoch": 1.508230452674897, "grad_norm": 9.22248649597168, "learning_rate": 3.266709892140561e-05, "loss": 0.6529, "step": 1466 }, { "epoch": 1.5092592592592593, "grad_norm": 0.6787060499191284, "learning_rate": 3.265808378601864e-05, "loss": 0.0202, "step": 1467 }, { "epoch": 1.5102880658436213, "grad_norm": 5.863772392272949, "learning_rate": 3.2649053208317254e-05, "loss": 0.2071, "step": 1468 }, { "epoch": 1.5113168724279835, "grad_norm": 8.698347091674805, "learning_rate": 3.2640007203256586e-05, "loss": 0.7461, "step": 1469 }, { "epoch": 1.5123456790123457, "grad_norm": 10.517783164978027, "learning_rate": 3.263094578581734e-05, "loss": 0.908, "step": 1470 }, { "epoch": 1.513374485596708, "grad_norm": 6.508285999298096, "learning_rate": 3.262186897100573e-05, "loss": 0.2534, "step": 1471 }, { "epoch": 1.51440329218107, "grad_norm": 7.264826774597168, "learning_rate": 3.261277677385348e-05, "loss": 0.2891, "step": 1472 }, { "epoch": 1.515432098765432, "grad_norm": 15.387980461120605, "learning_rate": 3.260366920941778e-05, "loss": 2.4869, "step": 1473 }, { "epoch": 1.5164609053497942, "grad_norm": 0.7510685324668884, "learning_rate": 3.2594546292781275e-05, "loss": 0.0319, "step": 1474 }, { "epoch": 1.5174897119341564, "grad_norm": 7.2782182693481445, "learning_rate": 3.258540803905203e-05, "loss": 0.355, "step": 1475 }, { "epoch": 1.5185185185185186, "grad_norm": 9.58236026763916, "learning_rate": 3.257625446336351e-05, "loss": 0.7358, "step": 1476 }, { "epoch": 1.5195473251028808, "grad_norm": 10.79014778137207, "learning_rate": 3.256708558087455e-05, "loss": 0.8566, "step": 1477 }, { "epoch": 1.5205761316872428, "grad_norm": 10.390580177307129, "learning_rate": 3.255790140676934e-05, "loss": 0.8171, "step": 1478 }, { "epoch": 1.5216049382716048, "grad_norm": 7.168967247009277, "learning_rate": 3.254870195625741e-05, "loss": 0.3609, "step": 1479 }, { "epoch": 1.522633744855967, "grad_norm": 7.27597188949585, "learning_rate": 3.253948724457354e-05, "loss": 0.4223, "step": 1480 }, { "epoch": 1.522633744855967, "eval_Qnli-dev_cosine_accuracy": 0.69921875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7863754034042358, "eval_Qnli-dev_cosine_ap": 0.7399947565743645, "eval_Qnli-dev_cosine_f1": 0.6920289855072463, "eval_Qnli-dev_cosine_f1_threshold": 0.7507023811340332, "eval_Qnli-dev_cosine_precision": 0.6044303797468354, "eval_Qnli-dev_cosine_recall": 0.809322033898305, "eval_Qnli-dev_dot_accuracy": 0.654296875, "eval_Qnli-dev_dot_accuracy_threshold": 414.4156494140625, "eval_Qnli-dev_dot_ap": 0.6567444831077266, "eval_Qnli-dev_dot_f1": 0.6688102893890675, "eval_Qnli-dev_dot_f1_threshold": 343.76507568359375, "eval_Qnli-dev_dot_precision": 0.538860103626943, "eval_Qnli-dev_dot_recall": 0.8813559322033898, "eval_Qnli-dev_euclidean_accuracy": 0.70703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.514190673828125, "eval_Qnli-dev_euclidean_ap": 0.7486153186225739, "eval_Qnli-dev_euclidean_f1": 0.6978557504873294, "eval_Qnli-dev_euclidean_f1_threshold": 15.083457946777344, "eval_Qnli-dev_euclidean_precision": 0.6462093862815884, "eval_Qnli-dev_euclidean_recall": 0.7584745762711864, "eval_Qnli-dev_manhattan_accuracy": 0.708984375, "eval_Qnli-dev_manhattan_accuracy_threshold": 279.00152587890625, "eval_Qnli-dev_manhattan_ap": 0.7553705353817792, "eval_Qnli-dev_manhattan_f1": 0.7043795620437957, "eval_Qnli-dev_manhattan_f1_threshold": 324.96124267578125, "eval_Qnli-dev_manhattan_precision": 0.6185897435897436, "eval_Qnli-dev_manhattan_recall": 0.8177966101694916, "eval_Qnli-dev_max_accuracy": 0.708984375, "eval_Qnli-dev_max_accuracy_threshold": 414.4156494140625, "eval_Qnli-dev_max_ap": 0.7553705353817792, "eval_Qnli-dev_max_f1": 0.7043795620437957, "eval_Qnli-dev_max_f1_threshold": 343.76507568359375, "eval_Qnli-dev_max_precision": 0.6462093862815884, "eval_Qnli-dev_max_recall": 0.8813559322033898, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8469090461730957, "eval_allNLI-dev_cosine_ap": 0.6234053979616159, "eval_allNLI-dev_cosine_f1": 0.6244343891402715, "eval_allNLI-dev_cosine_f1_threshold": 0.770348072052002, "eval_allNLI-dev_cosine_precision": 0.5130111524163569, "eval_allNLI-dev_cosine_recall": 0.7976878612716763, "eval_allNLI-dev_dot_accuracy": 0.70703125, "eval_allNLI-dev_dot_accuracy_threshold": 401.94757080078125, "eval_allNLI-dev_dot_ap": 0.5462802865754883, "eval_allNLI-dev_dot_f1": 0.5874439461883407, "eval_allNLI-dev_dot_f1_threshold": 362.9594421386719, "eval_allNLI-dev_dot_precision": 0.47985347985347987, "eval_allNLI-dev_dot_recall": 0.7572254335260116, "eval_allNLI-dev_euclidean_accuracy": 0.732421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.566485404968262, "eval_allNLI-dev_euclidean_ap": 0.6318132514300728, "eval_allNLI-dev_euclidean_f1": 0.6414253897550111, "eval_allNLI-dev_euclidean_f1_threshold": 14.94178581237793, "eval_allNLI-dev_euclidean_precision": 0.5217391304347826, "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, "eval_allNLI-dev_manhattan_accuracy": 0.736328125, "eval_allNLI-dev_manhattan_accuracy_threshold": 253.82769775390625, "eval_allNLI-dev_manhattan_ap": 0.6285449979338023, "eval_allNLI-dev_manhattan_f1": 0.6325167037861915, "eval_allNLI-dev_manhattan_f1_threshold": 313.50347900390625, "eval_allNLI-dev_manhattan_precision": 0.5144927536231884, "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 401.94757080078125, "eval_allNLI-dev_max_ap": 0.6318132514300728, "eval_allNLI-dev_max_f1": 0.6414253897550111, "eval_allNLI-dev_max_f1_threshold": 362.9594421386719, "eval_allNLI-dev_max_precision": 0.5217391304347826, "eval_allNLI-dev_max_recall": 0.8323699421965318, "eval_sequential_score": 0.7553705353817792, "eval_sts-test_pearson_cosine": 0.8425691617213031, "eval_sts-test_pearson_dot": 0.8181059364976482, "eval_sts-test_pearson_euclidean": 0.8746566787614325, "eval_sts-test_pearson_manhattan": 0.8722050605271279, "eval_sts-test_pearson_max": 0.8746566787614325, "eval_sts-test_spearman_cosine": 0.8730949616856577, "eval_sts-test_spearman_dot": 0.8034795414551055, "eval_sts-test_spearman_euclidean": 0.8725233733263987, "eval_sts-test_spearman_manhattan": 0.8695492744943556, "eval_sts-test_spearman_max": 0.8730949616856577, "eval_vitaminc-pairs_loss": 3.0648269653320312, "eval_vitaminc-pairs_runtime": 3.1997, "eval_vitaminc-pairs_samples_per_second": 40.004, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 1480 }, { "epoch": 1.522633744855967, "eval_negation-triplets_loss": 0.894192636013031, "eval_negation-triplets_runtime": 0.7516, "eval_negation-triplets_samples_per_second": 170.306, "eval_negation-triplets_steps_per_second": 1.331, "step": 1480 }, { "epoch": 1.522633744855967, "eval_scitail-pairs-pos_loss": 0.1328463852405548, "eval_scitail-pairs-pos_runtime": 0.8819, "eval_scitail-pairs-pos_samples_per_second": 145.138, "eval_scitail-pairs-pos_steps_per_second": 1.134, "step": 1480 }, { "epoch": 1.522633744855967, "eval_scitail-pairs-qa_loss": 0.0014575115637853742, "eval_scitail-pairs-qa_runtime": 0.6089, "eval_scitail-pairs-qa_samples_per_second": 210.199, "eval_scitail-pairs-qa_steps_per_second": 1.642, "step": 1480 }, { "epoch": 1.522633744855967, "eval_xsum-pairs_loss": 0.35409626364707947, "eval_xsum-pairs_runtime": 3.02, "eval_xsum-pairs_samples_per_second": 42.385, "eval_xsum-pairs_steps_per_second": 0.331, "step": 1480 }, { "epoch": 1.522633744855967, "eval_sciq_pairs_loss": 0.09546427428722382, "eval_sciq_pairs_runtime": 3.4916, "eval_sciq_pairs_samples_per_second": 36.659, "eval_sciq_pairs_steps_per_second": 0.286, "step": 1480 }, { "epoch": 1.522633744855967, "eval_qasc_pairs_loss": 0.2151322215795517, "eval_qasc_pairs_runtime": 0.6095, "eval_qasc_pairs_samples_per_second": 209.999, "eval_qasc_pairs_steps_per_second": 1.641, "step": 1480 }, { "epoch": 1.522633744855967, "eval_openbookqa_pairs_loss": 1.0014817714691162, "eval_openbookqa_pairs_runtime": 0.5892, "eval_openbookqa_pairs_samples_per_second": 217.251, "eval_openbookqa_pairs_steps_per_second": 1.697, "step": 1480 }, { "epoch": 1.522633744855967, "eval_msmarco_pairs_loss": 0.8051604628562927, "eval_msmarco_pairs_runtime": 1.5278, "eval_msmarco_pairs_samples_per_second": 83.782, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 1480 }, { "epoch": 1.522633744855967, "eval_nq_pairs_loss": 0.8040265440940857, "eval_nq_pairs_runtime": 2.916, "eval_nq_pairs_samples_per_second": 43.896, "eval_nq_pairs_steps_per_second": 0.343, "step": 1480 }, { "epoch": 1.522633744855967, "eval_trivia_pairs_loss": 0.7348816990852356, "eval_trivia_pairs_runtime": 3.4443, "eval_trivia_pairs_samples_per_second": 37.163, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1480 }, { "epoch": 1.522633744855967, "eval_gooaq_pairs_loss": 0.4094505310058594, "eval_gooaq_pairs_runtime": 0.9516, "eval_gooaq_pairs_samples_per_second": 134.512, "eval_gooaq_pairs_steps_per_second": 1.051, "step": 1480 }, { "epoch": 1.522633744855967, "eval_paws-pos_loss": 0.020600441843271255, "eval_paws-pos_runtime": 0.6972, "eval_paws-pos_samples_per_second": 183.597, "eval_paws-pos_steps_per_second": 1.434, "step": 1480 }, { "epoch": 1.522633744855967, "eval_global_dataset_loss": 0.4791772663593292, "eval_global_dataset_runtime": 13.3943, "eval_global_dataset_samples_per_second": 31.058, "eval_global_dataset_steps_per_second": 0.299, "step": 1480 }, { "epoch": 1.5236625514403292, "grad_norm": 5.952062606811523, "learning_rate": 3.253025728697784e-05, "loss": 0.2155, "step": 1481 }, { "epoch": 1.5246913580246915, "grad_norm": 6.088840961456299, "learning_rate": 3.252101209875562e-05, "loss": 0.3097, "step": 1482 }, { "epoch": 1.5257201646090535, "grad_norm": 6.238129138946533, "learning_rate": 3.251175169521745e-05, "loss": 0.3738, "step": 1483 }, { "epoch": 1.5267489711934157, "grad_norm": 7.6868367195129395, "learning_rate": 3.250247609169908e-05, "loss": 0.5758, "step": 1484 }, { "epoch": 1.5277777777777777, "grad_norm": 11.03587818145752, "learning_rate": 3.249318530356143e-05, "loss": 0.7701, "step": 1485 }, { "epoch": 1.52880658436214, "grad_norm": 12.658851623535156, "learning_rate": 3.248387934619058e-05, "loss": 0.9936, "step": 1486 }, { "epoch": 1.5298353909465021, "grad_norm": 4.862097263336182, "learning_rate": 3.2474558234997705e-05, "loss": 0.1421, "step": 1487 }, { "epoch": 1.5308641975308643, "grad_norm": 6.787068843841553, "learning_rate": 3.246522198541911e-05, "loss": 0.2403, "step": 1488 }, { "epoch": 1.5318930041152263, "grad_norm": 6.545001029968262, "learning_rate": 3.245587061291615e-05, "loss": 0.261, "step": 1489 }, { "epoch": 1.5329218106995883, "grad_norm": 4.37895393371582, "learning_rate": 3.2446504132975214e-05, "loss": 0.1273, "step": 1490 }, { "epoch": 1.5339506172839505, "grad_norm": 7.094578266143799, "learning_rate": 3.2437122561107735e-05, "loss": 0.2913, "step": 1491 }, { "epoch": 1.5349794238683128, "grad_norm": 10.808570861816406, "learning_rate": 3.242772591285012e-05, "loss": 0.6979, "step": 1492 }, { "epoch": 1.536008230452675, "grad_norm": 5.151909828186035, "learning_rate": 3.241831420376376e-05, "loss": 0.1969, "step": 1493 }, { "epoch": 1.5370370370370372, "grad_norm": 6.880885601043701, "learning_rate": 3.240888744943497e-05, "loss": 0.3544, "step": 1494 }, { "epoch": 1.5380658436213992, "grad_norm": 9.509146690368652, "learning_rate": 3.239944566547499e-05, "loss": 0.6795, "step": 1495 }, { "epoch": 1.5390946502057612, "grad_norm": 13.863608360290527, "learning_rate": 3.2389988867519944e-05, "loss": 2.128, "step": 1496 }, { "epoch": 1.5401234567901234, "grad_norm": 6.650979042053223, "learning_rate": 3.238051707123084e-05, "loss": 0.3413, "step": 1497 }, { "epoch": 1.5411522633744856, "grad_norm": 5.316281318664551, "learning_rate": 3.2371030292293493e-05, "loss": 0.2705, "step": 1498 }, { "epoch": 1.5421810699588478, "grad_norm": 7.931695938110352, "learning_rate": 3.236152854641855e-05, "loss": 0.3392, "step": 1499 }, { "epoch": 1.5432098765432098, "grad_norm": 7.471953392028809, "learning_rate": 3.235201184934143e-05, "loss": 0.5524, "step": 1500 }, { "epoch": 1.5432098765432098, "eval_Qnli-dev_cosine_accuracy": 0.701171875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8042779564857483, "eval_Qnli-dev_cosine_ap": 0.7534214412055271, "eval_Qnli-dev_cosine_f1": 0.7045454545454546, "eval_Qnli-dev_cosine_f1_threshold": 0.7383867502212524, "eval_Qnli-dev_cosine_precision": 0.636986301369863, "eval_Qnli-dev_cosine_recall": 0.788135593220339, "eval_Qnli-dev_dot_accuracy": 0.6875, "eval_Qnli-dev_dot_accuracy_threshold": 369.844482421875, "eval_Qnli-dev_dot_ap": 0.6895081600020698, "eval_Qnli-dev_dot_f1": 0.6854304635761589, "eval_Qnli-dev_dot_f1_threshold": 318.4272766113281, "eval_Qnli-dev_dot_precision": 0.5625, "eval_Qnli-dev_dot_recall": 0.8771186440677966, "eval_Qnli-dev_euclidean_accuracy": 0.7109375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.158506393432617, "eval_Qnli-dev_euclidean_ap": 0.7622707519701521, "eval_Qnli-dev_euclidean_f1": 0.7071823204419889, "eval_Qnli-dev_euclidean_f1_threshold": 15.887628555297852, "eval_Qnli-dev_euclidean_precision": 0.6254071661237784, "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, "eval_Qnli-dev_manhattan_accuracy": 0.7109375, "eval_Qnli-dev_manhattan_accuracy_threshold": 293.96429443359375, "eval_Qnli-dev_manhattan_ap": 0.7650563696239733, "eval_Qnli-dev_manhattan_f1": 0.707635009310987, "eval_Qnli-dev_manhattan_f1_threshold": 330.70098876953125, "eval_Qnli-dev_manhattan_precision": 0.6312292358803987, "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, "eval_Qnli-dev_max_accuracy": 0.7109375, "eval_Qnli-dev_max_accuracy_threshold": 369.844482421875, "eval_Qnli-dev_max_ap": 0.7650563696239733, "eval_Qnli-dev_max_f1": 0.707635009310987, "eval_Qnli-dev_max_f1_threshold": 330.70098876953125, "eval_Qnli-dev_max_precision": 0.636986301369863, "eval_Qnli-dev_max_recall": 0.8771186440677966, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8823579549789429, "eval_allNLI-dev_cosine_ap": 0.6215769808743192, "eval_allNLI-dev_cosine_f1": 0.6238532110091742, "eval_allNLI-dev_cosine_f1_threshold": 0.7631572484970093, "eval_allNLI-dev_cosine_precision": 0.5171102661596958, "eval_allNLI-dev_cosine_recall": 0.7861271676300579, "eval_allNLI-dev_dot_accuracy": 0.69921875, "eval_allNLI-dev_dot_accuracy_threshold": 381.78656005859375, "eval_allNLI-dev_dot_ap": 0.552089695804951, "eval_allNLI-dev_dot_f1": 0.5856832971800434, "eval_allNLI-dev_dot_f1_threshold": 337.59368896484375, "eval_allNLI-dev_dot_precision": 0.46875, "eval_allNLI-dev_dot_recall": 0.7803468208092486, "eval_allNLI-dev_euclidean_accuracy": 0.73046875, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.53109073638916, "eval_allNLI-dev_euclidean_ap": 0.6288347771169971, "eval_allNLI-dev_euclidean_f1": 0.6330275229357799, "eval_allNLI-dev_euclidean_f1_threshold": 14.756261825561523, "eval_allNLI-dev_euclidean_precision": 0.5247148288973384, "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, "eval_allNLI-dev_manhattan_accuracy": 0.736328125, "eval_allNLI-dev_manhattan_accuracy_threshold": 246.92401123046875, "eval_allNLI-dev_manhattan_ap": 0.6264506464143021, "eval_allNLI-dev_manhattan_f1": 0.6279069767441859, "eval_allNLI-dev_manhattan_f1_threshold": 307.99139404296875, "eval_allNLI-dev_manhattan_precision": 0.5252918287937743, "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 381.78656005859375, "eval_allNLI-dev_max_ap": 0.6288347771169971, "eval_allNLI-dev_max_f1": 0.6330275229357799, "eval_allNLI-dev_max_f1_threshold": 337.59368896484375, "eval_allNLI-dev_max_precision": 0.5252918287937743, "eval_allNLI-dev_max_recall": 0.7976878612716763, "eval_sequential_score": 0.7650563696239733, "eval_sts-test_pearson_cosine": 0.8428199951652109, "eval_sts-test_pearson_dot": 0.8277261750061927, "eval_sts-test_pearson_euclidean": 0.8730646316048926, "eval_sts-test_pearson_manhattan": 0.8707927459120908, "eval_sts-test_pearson_max": 0.8730646316048926, "eval_sts-test_spearman_cosine": 0.8720606257811182, "eval_sts-test_spearman_dot": 0.8119718988268286, "eval_sts-test_spearman_euclidean": 0.8701647833435331, "eval_sts-test_spearman_manhattan": 0.8675766388922228, "eval_sts-test_spearman_max": 0.8720606257811182, "eval_vitaminc-pairs_loss": 2.891284704208374, "eval_vitaminc-pairs_runtime": 3.217, "eval_vitaminc-pairs_samples_per_second": 39.789, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 1500 }, { "epoch": 1.5432098765432098, "eval_negation-triplets_loss": 0.9080420136451721, "eval_negation-triplets_runtime": 0.7506, "eval_negation-triplets_samples_per_second": 170.539, "eval_negation-triplets_steps_per_second": 1.332, "step": 1500 }, { "epoch": 1.5432098765432098, "eval_scitail-pairs-pos_loss": 0.1330765038728714, "eval_scitail-pairs-pos_runtime": 0.867, "eval_scitail-pairs-pos_samples_per_second": 147.634, "eval_scitail-pairs-pos_steps_per_second": 1.153, "step": 1500 }, { "epoch": 1.5432098765432098, "eval_scitail-pairs-qa_loss": 0.0014549298211932182, "eval_scitail-pairs-qa_runtime": 0.5915, "eval_scitail-pairs-qa_samples_per_second": 216.389, "eval_scitail-pairs-qa_steps_per_second": 1.691, "step": 1500 }, { "epoch": 1.5432098765432098, "eval_xsum-pairs_loss": 0.2871630787849426, "eval_xsum-pairs_runtime": 3.021, "eval_xsum-pairs_samples_per_second": 42.37, "eval_xsum-pairs_steps_per_second": 0.331, "step": 1500 }, { "epoch": 1.5432098765432098, "eval_sciq_pairs_loss": 0.10170305520296097, "eval_sciq_pairs_runtime": 3.5237, "eval_sciq_pairs_samples_per_second": 36.326, "eval_sciq_pairs_steps_per_second": 0.284, "step": 1500 }, { "epoch": 1.5432098765432098, "eval_qasc_pairs_loss": 0.2138214260339737, "eval_qasc_pairs_runtime": 0.6188, "eval_qasc_pairs_samples_per_second": 206.842, "eval_qasc_pairs_steps_per_second": 1.616, "step": 1500 }, { "epoch": 1.5432098765432098, "eval_openbookqa_pairs_loss": 0.9816868305206299, "eval_openbookqa_pairs_runtime": 0.5902, "eval_openbookqa_pairs_samples_per_second": 216.862, "eval_openbookqa_pairs_steps_per_second": 1.694, "step": 1500 }, { "epoch": 1.5432098765432098, "eval_msmarco_pairs_loss": 0.8624980449676514, "eval_msmarco_pairs_runtime": 1.5196, "eval_msmarco_pairs_samples_per_second": 84.233, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 1500 }, { "epoch": 1.5432098765432098, "eval_nq_pairs_loss": 0.8289986252784729, "eval_nq_pairs_runtime": 2.9042, "eval_nq_pairs_samples_per_second": 44.074, "eval_nq_pairs_steps_per_second": 0.344, "step": 1500 }, { "epoch": 1.5432098765432098, "eval_trivia_pairs_loss": 0.7315549850463867, "eval_trivia_pairs_runtime": 3.4425, "eval_trivia_pairs_samples_per_second": 37.182, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1500 }, { "epoch": 1.5432098765432098, "eval_gooaq_pairs_loss": 0.41331881284713745, "eval_gooaq_pairs_runtime": 0.9526, "eval_gooaq_pairs_samples_per_second": 134.373, "eval_gooaq_pairs_steps_per_second": 1.05, "step": 1500 }, { "epoch": 1.5432098765432098, "eval_paws-pos_loss": 0.021952003240585327, "eval_paws-pos_runtime": 0.725, "eval_paws-pos_samples_per_second": 176.558, "eval_paws-pos_steps_per_second": 1.379, "step": 1500 }, { "epoch": 1.5432098765432098, "eval_global_dataset_loss": 0.4588969647884369, "eval_global_dataset_runtime": 13.4076, "eval_global_dataset_samples_per_second": 31.027, "eval_global_dataset_steps_per_second": 0.298, "step": 1500 }, { "epoch": 1.544238683127572, "grad_norm": 8.104286193847656, "learning_rate": 3.234248021682235e-05, "loss": 0.3681, "step": 1501 }, { "epoch": 1.545267489711934, "grad_norm": 7.033088207244873, "learning_rate": 3.233293366464621e-05, "loss": 0.3433, "step": 1502 }, { "epoch": 1.5462962962962963, "grad_norm": 7.00164794921875, "learning_rate": 3.232337220862264e-05, "loss": 0.3047, "step": 1503 }, { "epoch": 1.5473251028806585, "grad_norm": 5.222539901733398, "learning_rate": 3.2313795864585965e-05, "loss": 0.2738, "step": 1504 }, { "epoch": 1.5483539094650207, "grad_norm": 5.596692085266113, "learning_rate": 3.230420464839515e-05, "loss": 0.2694, "step": 1505 }, { "epoch": 1.5493827160493827, "grad_norm": 1.1960200071334839, "learning_rate": 3.229459857593377e-05, "loss": 0.0264, "step": 1506 }, { "epoch": 1.5504115226337447, "grad_norm": 4.646385669708252, "learning_rate": 3.228497766311006e-05, "loss": 0.1286, "step": 1507 }, { "epoch": 1.551440329218107, "grad_norm": 12.591828346252441, "learning_rate": 3.227534192585677e-05, "loss": 1.102, "step": 1508 }, { "epoch": 1.5524691358024691, "grad_norm": 5.408351898193359, "learning_rate": 3.2265691380131236e-05, "loss": 0.1698, "step": 1509 }, { "epoch": 1.5534979423868314, "grad_norm": 7.685009956359863, "learning_rate": 3.2256026041915305e-05, "loss": 0.4219, "step": 1510 }, { "epoch": 1.5545267489711934, "grad_norm": 6.658691883087158, "learning_rate": 3.224634592721533e-05, "loss": 0.2761, "step": 1511 }, { "epoch": 1.5555555555555556, "grad_norm": 6.750312328338623, "learning_rate": 3.2236651052062116e-05, "loss": 0.2484, "step": 1512 }, { "epoch": 1.5565843621399176, "grad_norm": 6.82509708404541, "learning_rate": 3.222694143251094e-05, "loss": 0.3063, "step": 1513 }, { "epoch": 1.5576131687242798, "grad_norm": 0.0743848979473114, "learning_rate": 3.221721708464147e-05, "loss": 0.0011, "step": 1514 }, { "epoch": 1.558641975308642, "grad_norm": 5.526226997375488, "learning_rate": 3.220747802455778e-05, "loss": 0.1608, "step": 1515 }, { "epoch": 1.5596707818930042, "grad_norm": 4.90157413482666, "learning_rate": 3.219772426838831e-05, "loss": 0.1707, "step": 1516 }, { "epoch": 1.5606995884773662, "grad_norm": 9.953004837036133, "learning_rate": 3.218795583228583e-05, "loss": 0.6489, "step": 1517 }, { "epoch": 1.5617283950617284, "grad_norm": 0.38476648926734924, "learning_rate": 3.217817273242741e-05, "loss": 0.0064, "step": 1518 }, { "epoch": 1.5627572016460904, "grad_norm": 5.67715311050415, "learning_rate": 3.2168374985014436e-05, "loss": 0.1734, "step": 1519 }, { "epoch": 1.5637860082304527, "grad_norm": 4.921224594116211, "learning_rate": 3.215856260627252e-05, "loss": 0.1014, "step": 1520 }, { "epoch": 1.5637860082304527, "eval_Qnli-dev_cosine_accuracy": 0.703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8030712604522705, "eval_Qnli-dev_cosine_ap": 0.7546592106407485, "eval_Qnli-dev_cosine_f1": 0.7061068702290078, "eval_Qnli-dev_cosine_f1_threshold": 0.7434631586074829, "eval_Qnli-dev_cosine_precision": 0.6423611111111112, "eval_Qnli-dev_cosine_recall": 0.7838983050847458, "eval_Qnli-dev_dot_accuracy": 0.6640625, "eval_Qnli-dev_dot_accuracy_threshold": 383.8763732910156, "eval_Qnli-dev_dot_ap": 0.6833576985556973, "eval_Qnli-dev_dot_f1": 0.6830870279146142, "eval_Qnli-dev_dot_f1_threshold": 318.1232604980469, "eval_Qnli-dev_dot_precision": 0.5576407506702413, "eval_Qnli-dev_dot_recall": 0.8813559322033898, "eval_Qnli-dev_euclidean_accuracy": 0.708984375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.525314331054688, "eval_Qnli-dev_euclidean_ap": 0.7628895341943671, "eval_Qnli-dev_euclidean_f1": 0.7097966728280962, "eval_Qnli-dev_euclidean_f1_threshold": 15.842670440673828, "eval_Qnli-dev_euclidean_precision": 0.6295081967213115, "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, "eval_Qnli-dev_manhattan_accuracy": 0.720703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 303.1654052734375, "eval_Qnli-dev_manhattan_ap": 0.7657337153714383, "eval_Qnli-dev_manhattan_f1": 0.7077464788732394, "eval_Qnli-dev_manhattan_f1_threshold": 343.8772277832031, "eval_Qnli-dev_manhattan_precision": 0.6054216867469879, "eval_Qnli-dev_manhattan_recall": 0.8516949152542372, "eval_Qnli-dev_max_accuracy": 0.720703125, "eval_Qnli-dev_max_accuracy_threshold": 383.8763732910156, "eval_Qnli-dev_max_ap": 0.7657337153714383, "eval_Qnli-dev_max_f1": 0.7097966728280962, "eval_Qnli-dev_max_f1_threshold": 343.8772277832031, "eval_Qnli-dev_max_precision": 0.6423611111111112, "eval_Qnli-dev_max_recall": 0.8813559322033898, "eval_allNLI-dev_cosine_accuracy": 0.7265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8233051300048828, "eval_allNLI-dev_cosine_ap": 0.6182898470984927, "eval_allNLI-dev_cosine_f1": 0.6211764705882353, "eval_allNLI-dev_cosine_f1_threshold": 0.7539602518081665, "eval_allNLI-dev_cosine_precision": 0.5238095238095238, "eval_allNLI-dev_cosine_recall": 0.7630057803468208, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 390.0309753417969, "eval_allNLI-dev_dot_ap": 0.5474329580109125, "eval_allNLI-dev_dot_f1": 0.5889830508474576, "eval_allNLI-dev_dot_f1_threshold": 319.5199890136719, "eval_allNLI-dev_dot_precision": 0.46488294314381273, "eval_allNLI-dev_dot_recall": 0.8034682080924855, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.811637878417969, "eval_allNLI-dev_euclidean_ap": 0.629218327624065, "eval_allNLI-dev_euclidean_f1": 0.6280193236714975, "eval_allNLI-dev_euclidean_f1_threshold": 14.535062789916992, "eval_allNLI-dev_euclidean_precision": 0.5394190871369294, "eval_allNLI-dev_euclidean_recall": 0.7514450867052023, "eval_allNLI-dev_manhattan_accuracy": 0.728515625, "eval_allNLI-dev_manhattan_accuracy_threshold": 257.43560791015625, "eval_allNLI-dev_manhattan_ap": 0.6239453551794301, "eval_allNLI-dev_manhattan_f1": 0.625, "eval_allNLI-dev_manhattan_f1_threshold": 312.9381103515625, "eval_allNLI-dev_manhattan_precision": 0.5212355212355212, "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 390.0309753417969, "eval_allNLI-dev_max_ap": 0.629218327624065, "eval_allNLI-dev_max_f1": 0.6280193236714975, "eval_allNLI-dev_max_f1_threshold": 319.5199890136719, "eval_allNLI-dev_max_precision": 0.5394190871369294, "eval_allNLI-dev_max_recall": 0.8034682080924855, "eval_sequential_score": 0.7657337153714383, "eval_sts-test_pearson_cosine": 0.8447395315874453, "eval_sts-test_pearson_dot": 0.8347651049046418, "eval_sts-test_pearson_euclidean": 0.8708582861671369, "eval_sts-test_pearson_manhattan": 0.8686665949434926, "eval_sts-test_pearson_max": 0.8708582861671369, "eval_sts-test_spearman_cosine": 0.8700124500497567, "eval_sts-test_spearman_dot": 0.8272195486081061, "eval_sts-test_spearman_euclidean": 0.8661788779939239, "eval_sts-test_spearman_manhattan": 0.8634727692175859, "eval_sts-test_spearman_max": 0.8700124500497567, "eval_vitaminc-pairs_loss": 3.005648612976074, "eval_vitaminc-pairs_runtime": 3.2014, "eval_vitaminc-pairs_samples_per_second": 39.982, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 1520 }, { "epoch": 1.5637860082304527, "eval_negation-triplets_loss": 0.9295395612716675, "eval_negation-triplets_runtime": 0.7502, "eval_negation-triplets_samples_per_second": 170.611, "eval_negation-triplets_steps_per_second": 1.333, "step": 1520 }, { "epoch": 1.5637860082304527, "eval_scitail-pairs-pos_loss": 0.13157592713832855, "eval_scitail-pairs-pos_runtime": 0.8736, "eval_scitail-pairs-pos_samples_per_second": 146.513, "eval_scitail-pairs-pos_steps_per_second": 1.145, "step": 1520 }, { "epoch": 1.5637860082304527, "eval_scitail-pairs-qa_loss": 0.0016492550494149327, "eval_scitail-pairs-qa_runtime": 0.5929, "eval_scitail-pairs-qa_samples_per_second": 215.894, "eval_scitail-pairs-qa_steps_per_second": 1.687, "step": 1520 }, { "epoch": 1.5637860082304527, "eval_xsum-pairs_loss": 0.2971457839012146, "eval_xsum-pairs_runtime": 3.0288, "eval_xsum-pairs_samples_per_second": 42.26, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1520 }, { "epoch": 1.5637860082304527, "eval_sciq_pairs_loss": 0.09775245934724808, "eval_sciq_pairs_runtime": 3.4879, "eval_sciq_pairs_samples_per_second": 36.699, "eval_sciq_pairs_steps_per_second": 0.287, "step": 1520 }, { "epoch": 1.5637860082304527, "eval_qasc_pairs_loss": 0.20654577016830444, "eval_qasc_pairs_runtime": 0.6156, "eval_qasc_pairs_samples_per_second": 207.919, "eval_qasc_pairs_steps_per_second": 1.624, "step": 1520 }, { "epoch": 1.5637860082304527, "eval_openbookqa_pairs_loss": 0.9547919034957886, "eval_openbookqa_pairs_runtime": 0.5918, "eval_openbookqa_pairs_samples_per_second": 216.287, "eval_openbookqa_pairs_steps_per_second": 1.69, "step": 1520 }, { "epoch": 1.5637860082304527, "eval_msmarco_pairs_loss": 0.828711748123169, "eval_msmarco_pairs_runtime": 1.5226, "eval_msmarco_pairs_samples_per_second": 84.067, "eval_msmarco_pairs_steps_per_second": 0.657, "step": 1520 }, { "epoch": 1.5637860082304527, "eval_nq_pairs_loss": 0.8327388167381287, "eval_nq_pairs_runtime": 2.8978, "eval_nq_pairs_samples_per_second": 44.171, "eval_nq_pairs_steps_per_second": 0.345, "step": 1520 }, { "epoch": 1.5637860082304527, "eval_trivia_pairs_loss": 0.7141972780227661, "eval_trivia_pairs_runtime": 3.4447, "eval_trivia_pairs_samples_per_second": 37.159, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1520 }, { "epoch": 1.5637860082304527, "eval_gooaq_pairs_loss": 0.38889452815055847, "eval_gooaq_pairs_runtime": 0.9614, "eval_gooaq_pairs_samples_per_second": 133.139, "eval_gooaq_pairs_steps_per_second": 1.04, "step": 1520 }, { "epoch": 1.5637860082304527, "eval_paws-pos_loss": 0.021532831713557243, "eval_paws-pos_runtime": 0.706, "eval_paws-pos_samples_per_second": 181.303, "eval_paws-pos_steps_per_second": 1.416, "step": 1520 }, { "epoch": 1.5637860082304527, "eval_global_dataset_loss": 0.4648805856704712, "eval_global_dataset_runtime": 13.4317, "eval_global_dataset_samples_per_second": 30.972, "eval_global_dataset_steps_per_second": 0.298, "step": 1520 }, { "epoch": 1.5648148148148149, "grad_norm": 8.67354679107666, "learning_rate": 3.2148735612451525e-05, "loss": 0.4432, "step": 1521 }, { "epoch": 1.565843621399177, "grad_norm": 6.336428642272949, "learning_rate": 3.21388940198255e-05, "loss": 0.2337, "step": 1522 }, { "epoch": 1.566872427983539, "grad_norm": 0.8628045320510864, "learning_rate": 3.212903784469268e-05, "loss": 0.0261, "step": 1523 }, { "epoch": 1.567901234567901, "grad_norm": 7.230187892913818, "learning_rate": 3.2119167103375464e-05, "loss": 0.4077, "step": 1524 }, { "epoch": 1.5689300411522633, "grad_norm": 6.065439224243164, "learning_rate": 3.2109281812220336e-05, "loss": 0.2346, "step": 1525 }, { "epoch": 1.5699588477366255, "grad_norm": 8.417454719543457, "learning_rate": 3.20993819875979e-05, "loss": 0.5197, "step": 1526 }, { "epoch": 1.5709876543209877, "grad_norm": 8.803533554077148, "learning_rate": 3.208946764590285e-05, "loss": 0.4848, "step": 1527 }, { "epoch": 1.5720164609053497, "grad_norm": 8.672576904296875, "learning_rate": 3.207953880355387e-05, "loss": 0.4645, "step": 1528 }, { "epoch": 1.573045267489712, "grad_norm": 7.9989399909973145, "learning_rate": 3.2069595476993704e-05, "loss": 0.5898, "step": 1529 }, { "epoch": 1.574074074074074, "grad_norm": 7.975038528442383, "learning_rate": 3.205963768268905e-05, "loss": 0.4887, "step": 1530 }, { "epoch": 1.5751028806584362, "grad_norm": 6.674964427947998, "learning_rate": 3.204966543713058e-05, "loss": 0.2801, "step": 1531 }, { "epoch": 1.5761316872427984, "grad_norm": 11.67644214630127, "learning_rate": 3.20396787568329e-05, "loss": 0.8622, "step": 1532 }, { "epoch": 1.5771604938271606, "grad_norm": 3.8542089462280273, "learning_rate": 3.2029677658334525e-05, "loss": 0.1503, "step": 1533 }, { "epoch": 1.5781893004115226, "grad_norm": 10.990008354187012, "learning_rate": 3.2019662158197833e-05, "loss": 0.7369, "step": 1534 }, { "epoch": 1.5792181069958846, "grad_norm": 6.23648738861084, "learning_rate": 3.200963227300905e-05, "loss": 0.2353, "step": 1535 }, { "epoch": 1.5802469135802468, "grad_norm": 5.542776584625244, "learning_rate": 3.1999588019378255e-05, "loss": 0.2101, "step": 1536 }, { "epoch": 1.581275720164609, "grad_norm": 9.987077713012695, "learning_rate": 3.1989529413939284e-05, "loss": 0.7077, "step": 1537 }, { "epoch": 1.5823045267489713, "grad_norm": 3.745037794113159, "learning_rate": 3.197945647334976e-05, "loss": 0.2138, "step": 1538 }, { "epoch": 1.5833333333333335, "grad_norm": 5.842042922973633, "learning_rate": 3.1969369214291036e-05, "loss": 0.1992, "step": 1539 }, { "epoch": 1.5843621399176955, "grad_norm": 9.523786544799805, "learning_rate": 3.1959267653468206e-05, "loss": 0.64, "step": 1540 }, { "epoch": 1.5843621399176955, "eval_Qnli-dev_cosine_accuracy": 0.703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7817473411560059, "eval_Qnli-dev_cosine_ap": 0.7410033870456724, "eval_Qnli-dev_cosine_f1": 0.7069271758436945, "eval_Qnli-dev_cosine_f1_threshold": 0.737807035446167, "eval_Qnli-dev_cosine_precision": 0.6085626911314985, "eval_Qnli-dev_cosine_recall": 0.8432203389830508, "eval_Qnli-dev_dot_accuracy": 0.666015625, "eval_Qnli-dev_dot_accuracy_threshold": 388.49591064453125, "eval_Qnli-dev_dot_ap": 0.6689207865792897, "eval_Qnli-dev_dot_f1": 0.6719242902208202, "eval_Qnli-dev_dot_f1_threshold": 318.5028076171875, "eval_Qnli-dev_dot_precision": 0.535175879396985, "eval_Qnli-dev_dot_recall": 0.902542372881356, "eval_Qnli-dev_euclidean_accuracy": 0.708984375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.337549209594727, "eval_Qnli-dev_euclidean_ap": 0.7524832422833609, "eval_Qnli-dev_euclidean_f1": 0.7078039927404719, "eval_Qnli-dev_euclidean_f1_threshold": 15.774192810058594, "eval_Qnli-dev_euclidean_precision": 0.6190476190476191, "eval_Qnli-dev_euclidean_recall": 0.826271186440678, "eval_Qnli-dev_manhattan_accuracy": 0.703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 297.94952392578125, "eval_Qnli-dev_manhattan_ap": 0.7542425288738073, "eval_Qnli-dev_manhattan_f1": 0.7071428571428572, "eval_Qnli-dev_manhattan_f1_threshold": 335.511474609375, "eval_Qnli-dev_manhattan_precision": 0.6111111111111112, "eval_Qnli-dev_manhattan_recall": 0.8389830508474576, "eval_Qnli-dev_max_accuracy": 0.708984375, "eval_Qnli-dev_max_accuracy_threshold": 388.49591064453125, "eval_Qnli-dev_max_ap": 0.7542425288738073, "eval_Qnli-dev_max_f1": 0.7078039927404719, "eval_Qnli-dev_max_f1_threshold": 335.511474609375, "eval_Qnli-dev_max_precision": 0.6190476190476191, "eval_Qnli-dev_max_recall": 0.902542372881356, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8608343005180359, "eval_allNLI-dev_cosine_ap": 0.6267179476142488, "eval_allNLI-dev_cosine_f1": 0.6272727272727273, "eval_allNLI-dev_cosine_f1_threshold": 0.7647356986999512, "eval_allNLI-dev_cosine_precision": 0.5168539325842697, "eval_allNLI-dev_cosine_recall": 0.7976878612716763, "eval_allNLI-dev_dot_accuracy": 0.701171875, "eval_allNLI-dev_dot_accuracy_threshold": 421.30963134765625, "eval_allNLI-dev_dot_ap": 0.556836107395606, "eval_allNLI-dev_dot_f1": 0.5921325051759835, "eval_allNLI-dev_dot_f1_threshold": 335.15948486328125, "eval_allNLI-dev_dot_precision": 0.4612903225806452, "eval_allNLI-dev_dot_recall": 0.8265895953757225, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.202301025390625, "eval_allNLI-dev_euclidean_ap": 0.6344296636986556, "eval_allNLI-dev_euclidean_f1": 0.6367924528301887, "eval_allNLI-dev_euclidean_f1_threshold": 14.496380805969238, "eval_allNLI-dev_euclidean_precision": 0.5378486055776892, "eval_allNLI-dev_euclidean_recall": 0.7803468208092486, "eval_allNLI-dev_manhattan_accuracy": 0.728515625, "eval_allNLI-dev_manhattan_accuracy_threshold": 229.4099578857422, "eval_allNLI-dev_manhattan_ap": 0.6308141349336275, "eval_allNLI-dev_manhattan_f1": 0.6301369863013698, "eval_allNLI-dev_manhattan_f1_threshold": 308.3590087890625, "eval_allNLI-dev_manhattan_precision": 0.5207547169811321, "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 421.30963134765625, "eval_allNLI-dev_max_ap": 0.6344296636986556, "eval_allNLI-dev_max_f1": 0.6367924528301887, "eval_allNLI-dev_max_f1_threshold": 335.15948486328125, "eval_allNLI-dev_max_precision": 0.5378486055776892, "eval_allNLI-dev_max_recall": 0.8265895953757225, "eval_sequential_score": 0.7542425288738073, "eval_sts-test_pearson_cosine": 0.8339125795484035, "eval_sts-test_pearson_dot": 0.8201875759121224, "eval_sts-test_pearson_euclidean": 0.8658559476640173, "eval_sts-test_pearson_manhattan": 0.8635546610585563, "eval_sts-test_pearson_max": 0.8658559476640173, "eval_sts-test_spearman_cosine": 0.8690907978214008, "eval_sts-test_spearman_dot": 0.8219585008164104, "eval_sts-test_spearman_euclidean": 0.8649441011896124, "eval_sts-test_spearman_manhattan": 0.8623342523765273, "eval_sts-test_spearman_max": 0.8690907978214008, "eval_vitaminc-pairs_loss": 3.196216344833374, "eval_vitaminc-pairs_runtime": 3.208, "eval_vitaminc-pairs_samples_per_second": 39.9, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 1540 }, { "epoch": 1.5843621399176955, "eval_negation-triplets_loss": 0.9201185703277588, "eval_negation-triplets_runtime": 0.7623, "eval_negation-triplets_samples_per_second": 167.909, "eval_negation-triplets_steps_per_second": 1.312, "step": 1540 }, { "epoch": 1.5843621399176955, "eval_scitail-pairs-pos_loss": 0.15473031997680664, "eval_scitail-pairs-pos_runtime": 0.8829, "eval_scitail-pairs-pos_samples_per_second": 144.974, "eval_scitail-pairs-pos_steps_per_second": 1.133, "step": 1540 }, { "epoch": 1.5843621399176955, "eval_scitail-pairs-qa_loss": 0.0018154560821130872, "eval_scitail-pairs-qa_runtime": 0.5912, "eval_scitail-pairs-qa_samples_per_second": 216.494, "eval_scitail-pairs-qa_steps_per_second": 1.691, "step": 1540 }, { "epoch": 1.5843621399176955, "eval_xsum-pairs_loss": 0.3135191798210144, "eval_xsum-pairs_runtime": 3.0208, "eval_xsum-pairs_samples_per_second": 42.373, "eval_xsum-pairs_steps_per_second": 0.331, "step": 1540 }, { "epoch": 1.5843621399176955, "eval_sciq_pairs_loss": 0.09777497500181198, "eval_sciq_pairs_runtime": 3.491, "eval_sciq_pairs_samples_per_second": 36.666, "eval_sciq_pairs_steps_per_second": 0.286, "step": 1540 }, { "epoch": 1.5843621399176955, "eval_qasc_pairs_loss": 0.22917062044143677, "eval_qasc_pairs_runtime": 0.614, "eval_qasc_pairs_samples_per_second": 208.459, "eval_qasc_pairs_steps_per_second": 1.629, "step": 1540 }, { "epoch": 1.5843621399176955, "eval_openbookqa_pairs_loss": 0.9911245703697205, "eval_openbookqa_pairs_runtime": 0.5955, "eval_openbookqa_pairs_samples_per_second": 214.96, "eval_openbookqa_pairs_steps_per_second": 1.679, "step": 1540 }, { "epoch": 1.5843621399176955, "eval_msmarco_pairs_loss": 0.7844669818878174, "eval_msmarco_pairs_runtime": 1.5258, "eval_msmarco_pairs_samples_per_second": 83.89, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 1540 }, { "epoch": 1.5843621399176955, "eval_nq_pairs_loss": 0.827888548374176, "eval_nq_pairs_runtime": 2.8979, "eval_nq_pairs_samples_per_second": 44.17, "eval_nq_pairs_steps_per_second": 0.345, "step": 1540 }, { "epoch": 1.5843621399176955, "eval_trivia_pairs_loss": 0.7074177861213684, "eval_trivia_pairs_runtime": 3.4401, "eval_trivia_pairs_samples_per_second": 37.208, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1540 }, { "epoch": 1.5843621399176955, "eval_gooaq_pairs_loss": 0.35452744364738464, "eval_gooaq_pairs_runtime": 0.958, "eval_gooaq_pairs_samples_per_second": 133.605, "eval_gooaq_pairs_steps_per_second": 1.044, "step": 1540 }, { "epoch": 1.5843621399176955, "eval_paws-pos_loss": 0.02034500241279602, "eval_paws-pos_runtime": 0.7022, "eval_paws-pos_samples_per_second": 182.273, "eval_paws-pos_steps_per_second": 1.424, "step": 1540 }, { "epoch": 1.5843621399176955, "eval_global_dataset_loss": 0.4994642436504364, "eval_global_dataset_runtime": 13.4195, "eval_global_dataset_samples_per_second": 31.0, "eval_global_dataset_steps_per_second": 0.298, "step": 1540 }, { "epoch": 1.5853909465020575, "grad_norm": 5.8253326416015625, "learning_rate": 3.194915180761e-05, "loss": 0.2026, "step": 1541 }, { "epoch": 1.5864197530864197, "grad_norm": 10.959147453308105, "learning_rate": 3.1939021693468846e-05, "loss": 0.7612, "step": 1542 }, { "epoch": 1.587448559670782, "grad_norm": 5.3747711181640625, "learning_rate": 3.192887732782079e-05, "loss": 0.2014, "step": 1543 }, { "epoch": 1.5884773662551441, "grad_norm": 1.070946216583252, "learning_rate": 3.191871872746546e-05, "loss": 0.0185, "step": 1544 }, { "epoch": 1.5895061728395061, "grad_norm": 5.483205795288086, "learning_rate": 3.190854590922609e-05, "loss": 0.1747, "step": 1545 }, { "epoch": 1.5905349794238683, "grad_norm": 8.847293853759766, "learning_rate": 3.189835888994943e-05, "loss": 0.4167, "step": 1546 }, { "epoch": 1.5915637860082303, "grad_norm": 13.975677490234375, "learning_rate": 3.1888157686505757e-05, "loss": 1.1175, "step": 1547 }, { "epoch": 1.5925925925925926, "grad_norm": 11.167402267456055, "learning_rate": 3.1877942315788855e-05, "loss": 0.7267, "step": 1548 }, { "epoch": 1.5936213991769548, "grad_norm": 5.643833637237549, "learning_rate": 3.1867712794715957e-05, "loss": 0.1788, "step": 1549 }, { "epoch": 1.594650205761317, "grad_norm": 3.837049961090088, "learning_rate": 3.1857469140227714e-05, "loss": 0.1364, "step": 1550 }, { "epoch": 1.595679012345679, "grad_norm": 8.434807777404785, "learning_rate": 3.184721136928821e-05, "loss": 0.3264, "step": 1551 }, { "epoch": 1.596707818930041, "grad_norm": 11.318633079528809, "learning_rate": 3.183693949888489e-05, "loss": 0.7652, "step": 1552 }, { "epoch": 1.5977366255144032, "grad_norm": 9.858819961547852, "learning_rate": 3.1826653546028544e-05, "loss": 0.6605, "step": 1553 }, { "epoch": 1.5987654320987654, "grad_norm": 5.252237319946289, "learning_rate": 3.1816353527753304e-05, "loss": 0.1219, "step": 1554 }, { "epoch": 1.5997942386831276, "grad_norm": 5.182341575622559, "learning_rate": 3.1806039461116585e-05, "loss": 0.1417, "step": 1555 }, { "epoch": 1.6008230452674899, "grad_norm": 11.145753860473633, "learning_rate": 3.179571136319905e-05, "loss": 0.6634, "step": 1556 }, { "epoch": 1.6018518518518519, "grad_norm": 11.343064308166504, "learning_rate": 3.1785369251104636e-05, "loss": 0.8749, "step": 1557 }, { "epoch": 1.6028806584362139, "grad_norm": 0.5352690815925598, "learning_rate": 3.177501314196044e-05, "loss": 0.0083, "step": 1558 }, { "epoch": 1.603909465020576, "grad_norm": 12.607147216796875, "learning_rate": 3.1764643052916786e-05, "loss": 1.7723, "step": 1559 }, { "epoch": 1.6049382716049383, "grad_norm": 4.1937479972839355, "learning_rate": 3.1754259001147116e-05, "loss": 0.1408, "step": 1560 }, { "epoch": 1.6049382716049383, "eval_Qnli-dev_cosine_accuracy": 0.697265625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7923214435577393, "eval_Qnli-dev_cosine_ap": 0.736397950522414, "eval_Qnli-dev_cosine_f1": 0.708029197080292, "eval_Qnli-dev_cosine_f1_threshold": 0.7372498512268066, "eval_Qnli-dev_cosine_precision": 0.6217948717948718, "eval_Qnli-dev_cosine_recall": 0.8220338983050848, "eval_Qnli-dev_dot_accuracy": 0.673828125, "eval_Qnli-dev_dot_accuracy_threshold": 394.2844543457031, "eval_Qnli-dev_dot_ap": 0.6647394095843582, "eval_Qnli-dev_dot_f1": 0.6762075134168157, "eval_Qnli-dev_dot_f1_threshold": 343.790771484375, "eval_Qnli-dev_dot_precision": 0.5851393188854489, "eval_Qnli-dev_dot_recall": 0.8008474576271186, "eval_Qnli-dev_euclidean_accuracy": 0.703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.500602722167969, "eval_Qnli-dev_euclidean_ap": 0.7436495742157391, "eval_Qnli-dev_euclidean_f1": 0.7047970479704797, "eval_Qnli-dev_euclidean_f1_threshold": 15.69774055480957, "eval_Qnli-dev_euclidean_precision": 0.6241830065359477, "eval_Qnli-dev_euclidean_recall": 0.809322033898305, "eval_Qnli-dev_manhattan_accuracy": 0.701171875, "eval_Qnli-dev_manhattan_accuracy_threshold": 282.2923889160156, "eval_Qnli-dev_manhattan_ap": 0.746412343943242, "eval_Qnli-dev_manhattan_f1": 0.70223752151463, "eval_Qnli-dev_manhattan_f1_threshold": 341.86407470703125, "eval_Qnli-dev_manhattan_precision": 0.591304347826087, "eval_Qnli-dev_manhattan_recall": 0.864406779661017, "eval_Qnli-dev_max_accuracy": 0.703125, "eval_Qnli-dev_max_accuracy_threshold": 394.2844543457031, "eval_Qnli-dev_max_ap": 0.746412343943242, "eval_Qnli-dev_max_f1": 0.708029197080292, "eval_Qnli-dev_max_f1_threshold": 343.790771484375, "eval_Qnli-dev_max_precision": 0.6241830065359477, "eval_Qnli-dev_max_recall": 0.864406779661017, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8752395510673523, "eval_allNLI-dev_cosine_ap": 0.6214439988011942, "eval_allNLI-dev_cosine_f1": 0.6264501160092808, "eval_allNLI-dev_cosine_f1_threshold": 0.7859889268875122, "eval_allNLI-dev_cosine_precision": 0.5232558139534884, "eval_allNLI-dev_cosine_recall": 0.7803468208092486, "eval_allNLI-dev_dot_accuracy": 0.689453125, "eval_allNLI-dev_dot_accuracy_threshold": 446.91632080078125, "eval_allNLI-dev_dot_ap": 0.5413136716943259, "eval_allNLI-dev_dot_f1": 0.5879732739420935, "eval_allNLI-dev_dot_f1_threshold": 359.6739501953125, "eval_allNLI-dev_dot_precision": 0.4782608695652174, "eval_allNLI-dev_dot_recall": 0.7630057803468208, "eval_allNLI-dev_euclidean_accuracy": 0.73046875, "eval_allNLI-dev_euclidean_accuracy_threshold": 10.595601081848145, "eval_allNLI-dev_euclidean_ap": 0.6293013048776566, "eval_allNLI-dev_euclidean_f1": 0.6336633663366337, "eval_allNLI-dev_euclidean_f1_threshold": 13.83390998840332, "eval_allNLI-dev_euclidean_precision": 0.5541125541125541, "eval_allNLI-dev_euclidean_recall": 0.7398843930635838, "eval_allNLI-dev_manhattan_accuracy": 0.73828125, "eval_allNLI-dev_manhattan_accuracy_threshold": 250.4329833984375, "eval_allNLI-dev_manhattan_ap": 0.625141827320122, "eval_allNLI-dev_manhattan_f1": 0.6330935251798562, "eval_allNLI-dev_manhattan_f1_threshold": 294.185546875, "eval_allNLI-dev_manhattan_precision": 0.5409836065573771, "eval_allNLI-dev_manhattan_recall": 0.7630057803468208, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 446.91632080078125, "eval_allNLI-dev_max_ap": 0.6293013048776566, "eval_allNLI-dev_max_f1": 0.6336633663366337, "eval_allNLI-dev_max_f1_threshold": 359.6739501953125, "eval_allNLI-dev_max_precision": 0.5541125541125541, "eval_allNLI-dev_max_recall": 0.7803468208092486, "eval_sequential_score": 0.746412343943242, "eval_sts-test_pearson_cosine": 0.8302574308516089, "eval_sts-test_pearson_dot": 0.7999351461985135, "eval_sts-test_pearson_euclidean": 0.8662787058139827, "eval_sts-test_pearson_manhattan": 0.8640787481621535, "eval_sts-test_pearson_max": 0.8662787058139827, "eval_sts-test_spearman_cosine": 0.8668608872175287, "eval_sts-test_spearman_dot": 0.7830448177172121, "eval_sts-test_spearman_euclidean": 0.8657249211983695, "eval_sts-test_spearman_manhattan": 0.8622815801418696, "eval_sts-test_spearman_max": 0.8668608872175287, "eval_vitaminc-pairs_loss": 3.2677905559539795, "eval_vitaminc-pairs_runtime": 3.2153, "eval_vitaminc-pairs_samples_per_second": 39.809, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 1560 }, { "epoch": 1.6049382716049383, "eval_negation-triplets_loss": 0.8980169892311096, "eval_negation-triplets_runtime": 0.7647, "eval_negation-triplets_samples_per_second": 167.388, "eval_negation-triplets_steps_per_second": 1.308, "step": 1560 }, { "epoch": 1.6049382716049383, "eval_scitail-pairs-pos_loss": 0.15127724409103394, "eval_scitail-pairs-pos_runtime": 0.8723, "eval_scitail-pairs-pos_samples_per_second": 146.74, "eval_scitail-pairs-pos_steps_per_second": 1.146, "step": 1560 }, { "epoch": 1.6049382716049383, "eval_scitail-pairs-qa_loss": 0.0014785886742174625, "eval_scitail-pairs-qa_runtime": 0.6058, "eval_scitail-pairs-qa_samples_per_second": 211.278, "eval_scitail-pairs-qa_steps_per_second": 1.651, "step": 1560 }, { "epoch": 1.6049382716049383, "eval_xsum-pairs_loss": 0.36814171075820923, "eval_xsum-pairs_runtime": 3.0238, "eval_xsum-pairs_samples_per_second": 42.331, "eval_xsum-pairs_steps_per_second": 0.331, "step": 1560 }, { "epoch": 1.6049382716049383, "eval_sciq_pairs_loss": 0.1039256900548935, "eval_sciq_pairs_runtime": 3.5155, "eval_sciq_pairs_samples_per_second": 36.41, "eval_sciq_pairs_steps_per_second": 0.284, "step": 1560 }, { "epoch": 1.6049382716049383, "eval_qasc_pairs_loss": 0.20851899683475494, "eval_qasc_pairs_runtime": 0.6182, "eval_qasc_pairs_samples_per_second": 207.054, "eval_qasc_pairs_steps_per_second": 1.618, "step": 1560 }, { "epoch": 1.6049382716049383, "eval_openbookqa_pairs_loss": 0.9419054985046387, "eval_openbookqa_pairs_runtime": 0.5937, "eval_openbookqa_pairs_samples_per_second": 215.582, "eval_openbookqa_pairs_steps_per_second": 1.684, "step": 1560 }, { "epoch": 1.6049382716049383, "eval_msmarco_pairs_loss": 0.7457932829856873, "eval_msmarco_pairs_runtime": 1.5248, "eval_msmarco_pairs_samples_per_second": 83.945, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 1560 }, { "epoch": 1.6049382716049383, "eval_nq_pairs_loss": 0.8226298689842224, "eval_nq_pairs_runtime": 2.902, "eval_nq_pairs_samples_per_second": 44.108, "eval_nq_pairs_steps_per_second": 0.345, "step": 1560 }, { "epoch": 1.6049382716049383, "eval_trivia_pairs_loss": 0.6305390000343323, "eval_trivia_pairs_runtime": 3.4527, "eval_trivia_pairs_samples_per_second": 37.073, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1560 }, { "epoch": 1.6049382716049383, "eval_gooaq_pairs_loss": 0.39881452918052673, "eval_gooaq_pairs_runtime": 0.9524, "eval_gooaq_pairs_samples_per_second": 134.391, "eval_gooaq_pairs_steps_per_second": 1.05, "step": 1560 }, { "epoch": 1.6049382716049383, "eval_paws-pos_loss": 0.02105657197535038, "eval_paws-pos_runtime": 0.6996, "eval_paws-pos_samples_per_second": 182.974, "eval_paws-pos_steps_per_second": 1.429, "step": 1560 }, { "epoch": 1.6049382716049383, "eval_global_dataset_loss": 0.5037676692008972, "eval_global_dataset_runtime": 13.4259, "eval_global_dataset_samples_per_second": 30.985, "eval_global_dataset_steps_per_second": 0.298, "step": 1560 }, { "epoch": 1.6059670781893005, "grad_norm": 6.21460485458374, "learning_rate": 3.174386100384801e-05, "loss": 0.2573, "step": 1561 }, { "epoch": 1.6069958847736625, "grad_norm": 5.64373779296875, "learning_rate": 3.1733449078239137e-05, "loss": 0.2668, "step": 1562 }, { "epoch": 1.6080246913580247, "grad_norm": 6.738086223602295, "learning_rate": 3.172302324156325e-05, "loss": 0.2826, "step": 1563 }, { "epoch": 1.6090534979423867, "grad_norm": 5.761399745941162, "learning_rate": 3.1712583511086106e-05, "loss": 0.2666, "step": 1564 }, { "epoch": 1.610082304526749, "grad_norm": 6.337840557098389, "learning_rate": 3.17021299040965e-05, "loss": 0.282, "step": 1565 }, { "epoch": 1.6111111111111112, "grad_norm": 0.7291662693023682, "learning_rate": 3.16916624379062e-05, "loss": 0.0133, "step": 1566 }, { "epoch": 1.6121399176954734, "grad_norm": 6.175798416137695, "learning_rate": 3.1681181129849906e-05, "loss": 0.4253, "step": 1567 }, { "epoch": 1.6131687242798354, "grad_norm": 6.257718563079834, "learning_rate": 3.167068599728526e-05, "loss": 0.298, "step": 1568 }, { "epoch": 1.6141975308641974, "grad_norm": 7.281583786010742, "learning_rate": 3.166017705759282e-05, "loss": 0.341, "step": 1569 }, { "epoch": 1.6152263374485596, "grad_norm": 4.747096061706543, "learning_rate": 3.164965432817596e-05, "loss": 0.1514, "step": 1570 }, { "epoch": 1.6162551440329218, "grad_norm": 8.15545654296875, "learning_rate": 3.163911782646093e-05, "loss": 0.3729, "step": 1571 }, { "epoch": 1.617283950617284, "grad_norm": 6.875436305999756, "learning_rate": 3.162856756989676e-05, "loss": 0.2414, "step": 1572 }, { "epoch": 1.6183127572016462, "grad_norm": 5.717591762542725, "learning_rate": 3.1618003575955275e-05, "loss": 0.1577, "step": 1573 }, { "epoch": 1.6193415637860082, "grad_norm": 13.189390182495117, "learning_rate": 3.160742586213105e-05, "loss": 0.9965, "step": 1574 }, { "epoch": 1.6203703703703702, "grad_norm": 6.093446731567383, "learning_rate": 3.159683444594139e-05, "loss": 0.2172, "step": 1575 }, { "epoch": 1.6213991769547325, "grad_norm": 3.353471279144287, "learning_rate": 3.1586229344926255e-05, "loss": 0.078, "step": 1576 }, { "epoch": 1.6224279835390947, "grad_norm": 9.408513069152832, "learning_rate": 3.1575610576648305e-05, "loss": 0.3766, "step": 1577 }, { "epoch": 1.623456790123457, "grad_norm": 8.742712020874023, "learning_rate": 3.156497815869283e-05, "loss": 0.6639, "step": 1578 }, { "epoch": 1.624485596707819, "grad_norm": 1.3122910261154175, "learning_rate": 3.15543321086677e-05, "loss": 0.0204, "step": 1579 }, { "epoch": 1.625514403292181, "grad_norm": 4.740899562835693, "learning_rate": 3.1543672444203374e-05, "loss": 0.1661, "step": 1580 }, { "epoch": 1.625514403292181, "eval_Qnli-dev_cosine_accuracy": 0.708984375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7632798552513123, "eval_Qnli-dev_cosine_ap": 0.7595720144230074, "eval_Qnli-dev_cosine_f1": 0.7043189368770765, "eval_Qnli-dev_cosine_f1_threshold": 0.6868818998336792, "eval_Qnli-dev_cosine_precision": 0.5792349726775956, "eval_Qnli-dev_cosine_recall": 0.8983050847457628, "eval_Qnli-dev_dot_accuracy": 0.6875, "eval_Qnli-dev_dot_accuracy_threshold": 349.8937072753906, "eval_Qnli-dev_dot_ap": 0.6892873388235935, "eval_Qnli-dev_dot_f1": 0.6772655007949125, "eval_Qnli-dev_dot_f1_threshold": 292.2147216796875, "eval_Qnli-dev_dot_precision": 0.5419847328244275, "eval_Qnli-dev_dot_recall": 0.902542372881356, "eval_Qnli-dev_euclidean_accuracy": 0.705078125, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.526966094970703, "eval_Qnli-dev_euclidean_ap": 0.7674293463107565, "eval_Qnli-dev_euclidean_f1": 0.7198515769944341, "eval_Qnli-dev_euclidean_f1_threshold": 15.816378593444824, "eval_Qnli-dev_euclidean_precision": 0.6402640264026402, "eval_Qnli-dev_euclidean_recall": 0.8220338983050848, "eval_Qnli-dev_manhattan_accuracy": 0.71484375, "eval_Qnli-dev_manhattan_accuracy_threshold": 304.14422607421875, "eval_Qnli-dev_manhattan_ap": 0.7708594172622463, "eval_Qnli-dev_manhattan_f1": 0.7132616487455196, "eval_Qnli-dev_manhattan_f1_threshold": 336.89105224609375, "eval_Qnli-dev_manhattan_precision": 0.6180124223602484, "eval_Qnli-dev_manhattan_recall": 0.8432203389830508, "eval_Qnli-dev_max_accuracy": 0.71484375, "eval_Qnli-dev_max_accuracy_threshold": 349.8937072753906, "eval_Qnli-dev_max_ap": 0.7708594172622463, "eval_Qnli-dev_max_f1": 0.7198515769944341, "eval_Qnli-dev_max_f1_threshold": 336.89105224609375, "eval_Qnli-dev_max_precision": 0.6402640264026402, "eval_Qnli-dev_max_recall": 0.902542372881356, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8192890882492065, "eval_allNLI-dev_cosine_ap": 0.6178735143817917, "eval_allNLI-dev_cosine_f1": 0.6136865342163355, "eval_allNLI-dev_cosine_f1_threshold": 0.7249919772148132, "eval_allNLI-dev_cosine_precision": 0.49642857142857144, "eval_allNLI-dev_cosine_recall": 0.8034682080924855, "eval_allNLI-dev_dot_accuracy": 0.693359375, "eval_allNLI-dev_dot_accuracy_threshold": 406.23291015625, "eval_allNLI-dev_dot_ap": 0.5581205075045584, "eval_allNLI-dev_dot_f1": 0.5874125874125874, "eval_allNLI-dev_dot_f1_threshold": 314.37115478515625, "eval_allNLI-dev_dot_precision": 0.4921875, "eval_allNLI-dev_dot_recall": 0.7283236994219653, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.021381378173828, "eval_allNLI-dev_euclidean_ap": 0.6240732173975734, "eval_allNLI-dev_euclidean_f1": 0.6216867469879518, "eval_allNLI-dev_euclidean_f1_threshold": 14.67414379119873, "eval_allNLI-dev_euclidean_precision": 0.5330578512396694, "eval_allNLI-dev_euclidean_recall": 0.7456647398843931, "eval_allNLI-dev_manhattan_accuracy": 0.73046875, "eval_allNLI-dev_manhattan_accuracy_threshold": 247.3629913330078, "eval_allNLI-dev_manhattan_ap": 0.6206113939017133, "eval_allNLI-dev_manhattan_f1": 0.6206896551724138, "eval_allNLI-dev_manhattan_f1_threshold": 316.04638671875, "eval_allNLI-dev_manhattan_precision": 0.5152671755725191, "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 406.23291015625, "eval_allNLI-dev_max_ap": 0.6240732173975734, "eval_allNLI-dev_max_f1": 0.6216867469879518, "eval_allNLI-dev_max_f1_threshold": 316.04638671875, "eval_allNLI-dev_max_precision": 0.5330578512396694, "eval_allNLI-dev_max_recall": 0.8034682080924855, "eval_sequential_score": 0.7708594172622463, "eval_sts-test_pearson_cosine": 0.8400854742746162, "eval_sts-test_pearson_dot": 0.8281129949166816, "eval_sts-test_pearson_euclidean": 0.8643289450211398, "eval_sts-test_pearson_manhattan": 0.8620190577992299, "eval_sts-test_pearson_max": 0.8643289450211398, "eval_sts-test_spearman_cosine": 0.864160059090035, "eval_sts-test_spearman_dot": 0.8162842198770266, "eval_sts-test_spearman_euclidean": 0.8603101080725328, "eval_sts-test_spearman_manhattan": 0.8563359275534083, "eval_sts-test_spearman_max": 0.864160059090035, "eval_vitaminc-pairs_loss": 3.219008207321167, "eval_vitaminc-pairs_runtime": 3.2219, "eval_vitaminc-pairs_samples_per_second": 39.729, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 1580 }, { "epoch": 1.625514403292181, "eval_negation-triplets_loss": 1.0143779516220093, "eval_negation-triplets_runtime": 0.7617, "eval_negation-triplets_samples_per_second": 168.041, "eval_negation-triplets_steps_per_second": 1.313, "step": 1580 }, { "epoch": 1.625514403292181, "eval_scitail-pairs-pos_loss": 0.15095233917236328, "eval_scitail-pairs-pos_runtime": 0.8861, "eval_scitail-pairs-pos_samples_per_second": 144.456, "eval_scitail-pairs-pos_steps_per_second": 1.129, "step": 1580 }, { "epoch": 1.625514403292181, "eval_scitail-pairs-qa_loss": 0.0023409768473356962, "eval_scitail-pairs-qa_runtime": 0.6047, "eval_scitail-pairs-qa_samples_per_second": 211.683, "eval_scitail-pairs-qa_steps_per_second": 1.654, "step": 1580 }, { "epoch": 1.625514403292181, "eval_xsum-pairs_loss": 0.29153984785079956, "eval_xsum-pairs_runtime": 3.0345, "eval_xsum-pairs_samples_per_second": 42.182, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1580 }, { "epoch": 1.625514403292181, "eval_sciq_pairs_loss": 0.09576187282800674, "eval_sciq_pairs_runtime": 3.5115, "eval_sciq_pairs_samples_per_second": 36.451, "eval_sciq_pairs_steps_per_second": 0.285, "step": 1580 }, { "epoch": 1.625514403292181, "eval_qasc_pairs_loss": 0.22589659690856934, "eval_qasc_pairs_runtime": 0.6491, "eval_qasc_pairs_samples_per_second": 197.201, "eval_qasc_pairs_steps_per_second": 1.541, "step": 1580 }, { "epoch": 1.625514403292181, "eval_openbookqa_pairs_loss": 0.8184758424758911, "eval_openbookqa_pairs_runtime": 0.5983, "eval_openbookqa_pairs_samples_per_second": 213.94, "eval_openbookqa_pairs_steps_per_second": 1.671, "step": 1580 }, { "epoch": 1.625514403292181, "eval_msmarco_pairs_loss": 0.817125678062439, "eval_msmarco_pairs_runtime": 1.5279, "eval_msmarco_pairs_samples_per_second": 83.773, "eval_msmarco_pairs_steps_per_second": 0.654, "step": 1580 }, { "epoch": 1.625514403292181, "eval_nq_pairs_loss": 0.7911259531974792, "eval_nq_pairs_runtime": 2.9065, "eval_nq_pairs_samples_per_second": 44.039, "eval_nq_pairs_steps_per_second": 0.344, "step": 1580 }, { "epoch": 1.625514403292181, "eval_trivia_pairs_loss": 0.717950165271759, "eval_trivia_pairs_runtime": 3.4507, "eval_trivia_pairs_samples_per_second": 37.094, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1580 }, { "epoch": 1.625514403292181, "eval_gooaq_pairs_loss": 0.45841023325920105, "eval_gooaq_pairs_runtime": 0.9622, "eval_gooaq_pairs_samples_per_second": 133.029, "eval_gooaq_pairs_steps_per_second": 1.039, "step": 1580 }, { "epoch": 1.625514403292181, "eval_paws-pos_loss": 0.021705929189920425, "eval_paws-pos_runtime": 0.6991, "eval_paws-pos_samples_per_second": 183.088, "eval_paws-pos_steps_per_second": 1.43, "step": 1580 }, { "epoch": 1.625514403292181, "eval_global_dataset_loss": 0.48299312591552734, "eval_global_dataset_runtime": 13.3939, "eval_global_dataset_samples_per_second": 31.059, "eval_global_dataset_steps_per_second": 0.299, "step": 1580 }, { "epoch": 1.626543209876543, "grad_norm": 0.5153937935829163, "learning_rate": 3.153299918295288e-05, "loss": 0.0404, "step": 1581 }, { "epoch": 1.6275720164609053, "grad_norm": 6.195679664611816, "learning_rate": 3.1522312342591726e-05, "loss": 0.2553, "step": 1582 }, { "epoch": 1.6286008230452675, "grad_norm": 10.167468070983887, "learning_rate": 3.1511611940817934e-05, "loss": 0.6284, "step": 1583 }, { "epoch": 1.6296296296296298, "grad_norm": 15.11025619506836, "learning_rate": 3.150089799535197e-05, "loss": 2.1434, "step": 1584 }, { "epoch": 1.6306584362139918, "grad_norm": 12.672037124633789, "learning_rate": 3.1490170523936726e-05, "loss": 1.1293, "step": 1585 }, { "epoch": 1.6316872427983538, "grad_norm": 0.7210871577262878, "learning_rate": 3.147942954433751e-05, "loss": 0.0465, "step": 1586 }, { "epoch": 1.632716049382716, "grad_norm": 15.898063659667969, "learning_rate": 3.1468675074342006e-05, "loss": 1.235, "step": 1587 }, { "epoch": 1.6337448559670782, "grad_norm": 7.80556058883667, "learning_rate": 3.14579071317602e-05, "loss": 0.4631, "step": 1588 }, { "epoch": 1.6347736625514404, "grad_norm": 6.856632709503174, "learning_rate": 3.144712573442442e-05, "loss": 0.356, "step": 1589 }, { "epoch": 1.6358024691358026, "grad_norm": 4.719722747802734, "learning_rate": 3.1436330900189284e-05, "loss": 0.2033, "step": 1590 }, { "epoch": 1.6368312757201646, "grad_norm": 8.740306854248047, "learning_rate": 3.142552264693164e-05, "loss": 0.7071, "step": 1591 }, { "epoch": 1.6378600823045266, "grad_norm": 3.5525388717651367, "learning_rate": 3.141470099255056e-05, "loss": 0.1086, "step": 1592 }, { "epoch": 1.6388888888888888, "grad_norm": 8.827413558959961, "learning_rate": 3.140386595496733e-05, "loss": 0.6634, "step": 1593 }, { "epoch": 1.639917695473251, "grad_norm": 4.826879978179932, "learning_rate": 3.139301755212537e-05, "loss": 0.2143, "step": 1594 }, { "epoch": 1.6409465020576133, "grad_norm": 5.042708873748779, "learning_rate": 3.1382155801990265e-05, "loss": 0.2011, "step": 1595 }, { "epoch": 1.6419753086419753, "grad_norm": 0.6428842544555664, "learning_rate": 3.137128072254967e-05, "loss": 0.0116, "step": 1596 }, { "epoch": 1.6430041152263375, "grad_norm": 6.803867340087891, "learning_rate": 3.1360392331813356e-05, "loss": 0.3944, "step": 1597 }, { "epoch": 1.6440329218106995, "grad_norm": 4.369873046875, "learning_rate": 3.134949064781309e-05, "loss": 0.1438, "step": 1598 }, { "epoch": 1.6450617283950617, "grad_norm": 8.419076919555664, "learning_rate": 3.133857568860268e-05, "loss": 0.3185, "step": 1599 }, { "epoch": 1.646090534979424, "grad_norm": 7.200667858123779, "learning_rate": 3.132764747225794e-05, "loss": 0.4497, "step": 1600 }, { "epoch": 1.646090534979424, "eval_Qnli-dev_cosine_accuracy": 0.705078125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7726122140884399, "eval_Qnli-dev_cosine_ap": 0.7463055188938073, "eval_Qnli-dev_cosine_f1": 0.6965888689407541, "eval_Qnli-dev_cosine_f1_threshold": 0.7256693840026855, "eval_Qnli-dev_cosine_precision": 0.6043613707165109, "eval_Qnli-dev_cosine_recall": 0.8220338983050848, "eval_Qnli-dev_dot_accuracy": 0.666015625, "eval_Qnli-dev_dot_accuracy_threshold": 365.7900695800781, "eval_Qnli-dev_dot_ap": 0.6686481538000109, "eval_Qnli-dev_dot_f1": 0.6729857819905213, "eval_Qnli-dev_dot_f1_threshold": 310.37847900390625, "eval_Qnli-dev_dot_precision": 0.5365239294710328, "eval_Qnli-dev_dot_recall": 0.902542372881356, "eval_Qnli-dev_euclidean_accuracy": 0.703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.727681159973145, "eval_Qnli-dev_euclidean_ap": 0.7546221577715386, "eval_Qnli-dev_euclidean_f1": 0.7009174311926605, "eval_Qnli-dev_euclidean_f1_threshold": 15.86984634399414, "eval_Qnli-dev_euclidean_precision": 0.6181229773462783, "eval_Qnli-dev_euclidean_recall": 0.809322033898305, "eval_Qnli-dev_manhattan_accuracy": 0.705078125, "eval_Qnli-dev_manhattan_accuracy_threshold": 301.3721923828125, "eval_Qnli-dev_manhattan_ap": 0.7594406310315899, "eval_Qnli-dev_manhattan_f1": 0.704424778761062, "eval_Qnli-dev_manhattan_f1_threshold": 338.2559814453125, "eval_Qnli-dev_manhattan_precision": 0.6048632218844985, "eval_Qnli-dev_manhattan_recall": 0.8432203389830508, "eval_Qnli-dev_max_accuracy": 0.705078125, "eval_Qnli-dev_max_accuracy_threshold": 365.7900695800781, "eval_Qnli-dev_max_ap": 0.7594406310315899, "eval_Qnli-dev_max_f1": 0.704424778761062, "eval_Qnli-dev_max_f1_threshold": 338.2559814453125, "eval_Qnli-dev_max_precision": 0.6181229773462783, "eval_Qnli-dev_max_recall": 0.902542372881356, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8390976190567017, "eval_allNLI-dev_cosine_ap": 0.6197766477419683, "eval_allNLI-dev_cosine_f1": 0.6213151927437641, "eval_allNLI-dev_cosine_f1_threshold": 0.7497798204421997, "eval_allNLI-dev_cosine_precision": 0.5111940298507462, "eval_allNLI-dev_cosine_recall": 0.791907514450867, "eval_allNLI-dev_dot_accuracy": 0.7109375, "eval_allNLI-dev_dot_accuracy_threshold": 391.1158447265625, "eval_allNLI-dev_dot_ap": 0.5640544532184968, "eval_allNLI-dev_dot_f1": 0.5887265135699373, "eval_allNLI-dev_dot_f1_threshold": 326.6453857421875, "eval_allNLI-dev_dot_precision": 0.46078431372549017, "eval_allNLI-dev_dot_recall": 0.815028901734104, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.69601821899414, "eval_allNLI-dev_euclidean_ap": 0.6237364426707476, "eval_allNLI-dev_euclidean_f1": 0.6247288503253796, "eval_allNLI-dev_euclidean_f1_threshold": 15.531798362731934, "eval_allNLI-dev_euclidean_precision": 0.5, "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, "eval_allNLI-dev_manhattan_accuracy": 0.73046875, "eval_allNLI-dev_manhattan_accuracy_threshold": 257.890380859375, "eval_allNLI-dev_manhattan_ap": 0.6215067171697528, "eval_allNLI-dev_manhattan_f1": 0.6202247191011235, "eval_allNLI-dev_manhattan_f1_threshold": 317.50958251953125, "eval_allNLI-dev_manhattan_precision": 0.5073529411764706, "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 391.1158447265625, "eval_allNLI-dev_max_ap": 0.6237364426707476, "eval_allNLI-dev_max_f1": 0.6247288503253796, "eval_allNLI-dev_max_f1_threshold": 326.6453857421875, "eval_allNLI-dev_max_precision": 0.5111940298507462, "eval_allNLI-dev_max_recall": 0.8323699421965318, "eval_sequential_score": 0.7594406310315899, "eval_sts-test_pearson_cosine": 0.8440868643946784, "eval_sts-test_pearson_dot": 0.8408286771702145, "eval_sts-test_pearson_euclidean": 0.8720004318239576, "eval_sts-test_pearson_manhattan": 0.8701934437728628, "eval_sts-test_pearson_max": 0.8720004318239576, "eval_sts-test_spearman_cosine": 0.873001834310702, "eval_sts-test_spearman_dot": 0.838884815163741, "eval_sts-test_spearman_euclidean": 0.8694954387269019, "eval_sts-test_spearman_manhattan": 0.8660044823282814, "eval_sts-test_spearman_max": 0.873001834310702, "eval_vitaminc-pairs_loss": 3.2943060398101807, "eval_vitaminc-pairs_runtime": 3.2024, "eval_vitaminc-pairs_samples_per_second": 39.971, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 1600 }, { "epoch": 1.646090534979424, "eval_negation-triplets_loss": 0.9933223724365234, "eval_negation-triplets_runtime": 0.7492, "eval_negation-triplets_samples_per_second": 170.839, "eval_negation-triplets_steps_per_second": 1.335, "step": 1600 }, { "epoch": 1.646090534979424, "eval_scitail-pairs-pos_loss": 0.1557767689228058, "eval_scitail-pairs-pos_runtime": 0.867, "eval_scitail-pairs-pos_samples_per_second": 147.63, "eval_scitail-pairs-pos_steps_per_second": 1.153, "step": 1600 }, { "epoch": 1.646090534979424, "eval_scitail-pairs-qa_loss": 0.0011445347918197513, "eval_scitail-pairs-qa_runtime": 0.5991, "eval_scitail-pairs-qa_samples_per_second": 213.662, "eval_scitail-pairs-qa_steps_per_second": 1.669, "step": 1600 }, { "epoch": 1.646090534979424, "eval_xsum-pairs_loss": 0.3158724904060364, "eval_xsum-pairs_runtime": 3.0272, "eval_xsum-pairs_samples_per_second": 42.284, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1600 }, { "epoch": 1.646090534979424, "eval_sciq_pairs_loss": 0.10199625790119171, "eval_sciq_pairs_runtime": 3.5022, "eval_sciq_pairs_samples_per_second": 36.549, "eval_sciq_pairs_steps_per_second": 0.286, "step": 1600 }, { "epoch": 1.646090534979424, "eval_qasc_pairs_loss": 0.19696098566055298, "eval_qasc_pairs_runtime": 0.6188, "eval_qasc_pairs_samples_per_second": 206.868, "eval_qasc_pairs_steps_per_second": 1.616, "step": 1600 }, { "epoch": 1.646090534979424, "eval_openbookqa_pairs_loss": 0.8150601387023926, "eval_openbookqa_pairs_runtime": 0.5882, "eval_openbookqa_pairs_samples_per_second": 217.608, "eval_openbookqa_pairs_steps_per_second": 1.7, "step": 1600 }, { "epoch": 1.646090534979424, "eval_msmarco_pairs_loss": 0.8213596343994141, "eval_msmarco_pairs_runtime": 1.5212, "eval_msmarco_pairs_samples_per_second": 84.147, "eval_msmarco_pairs_steps_per_second": 0.657, "step": 1600 }, { "epoch": 1.646090534979424, "eval_nq_pairs_loss": 0.7698879241943359, "eval_nq_pairs_runtime": 2.9009, "eval_nq_pairs_samples_per_second": 44.124, "eval_nq_pairs_steps_per_second": 0.345, "step": 1600 }, { "epoch": 1.646090534979424, "eval_trivia_pairs_loss": 0.699388861656189, "eval_trivia_pairs_runtime": 3.4433, "eval_trivia_pairs_samples_per_second": 37.173, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1600 }, { "epoch": 1.646090534979424, "eval_gooaq_pairs_loss": 0.4309929609298706, "eval_gooaq_pairs_runtime": 0.9526, "eval_gooaq_pairs_samples_per_second": 134.368, "eval_gooaq_pairs_steps_per_second": 1.05, "step": 1600 }, { "epoch": 1.646090534979424, "eval_paws-pos_loss": 0.021949268877506256, "eval_paws-pos_runtime": 0.6983, "eval_paws-pos_samples_per_second": 183.301, "eval_paws-pos_steps_per_second": 1.432, "step": 1600 }, { "epoch": 1.646090534979424, "eval_global_dataset_loss": 0.4891248047351837, "eval_global_dataset_runtime": 13.4015, "eval_global_dataset_samples_per_second": 31.041, "eval_global_dataset_steps_per_second": 0.298, "step": 1600 }, { "epoch": 1.6471193415637861, "grad_norm": 5.759812831878662, "learning_rate": 3.131670601687659e-05, "loss": 0.3233, "step": 1601 }, { "epoch": 1.6481481481481481, "grad_norm": 7.317870140075684, "learning_rate": 3.130575134057831e-05, "loss": 0.257, "step": 1602 }, { "epoch": 1.6491769547325101, "grad_norm": 1.7312713861465454, "learning_rate": 3.129478346150466e-05, "loss": 0.0258, "step": 1603 }, { "epoch": 1.6502057613168724, "grad_norm": 10.138225555419922, "learning_rate": 3.128380239781907e-05, "loss": 0.4831, "step": 1604 }, { "epoch": 1.6512345679012346, "grad_norm": 0.5360116362571716, "learning_rate": 3.12728081677068e-05, "loss": 0.0047, "step": 1605 }, { "epoch": 1.6522633744855968, "grad_norm": 17.974763870239258, "learning_rate": 3.1261800789374926e-05, "loss": 2.6074, "step": 1606 }, { "epoch": 1.653292181069959, "grad_norm": 13.676968574523926, "learning_rate": 3.125078028105228e-05, "loss": 0.9363, "step": 1607 }, { "epoch": 1.654320987654321, "grad_norm": 5.995203018188477, "learning_rate": 3.1239746660989456e-05, "loss": 0.1652, "step": 1608 }, { "epoch": 1.655349794238683, "grad_norm": 12.427960395812988, "learning_rate": 3.1228699947458744e-05, "loss": 0.6762, "step": 1609 }, { "epoch": 1.6563786008230452, "grad_norm": 6.78856897354126, "learning_rate": 3.121764015875413e-05, "loss": 0.3767, "step": 1610 }, { "epoch": 1.6574074074074074, "grad_norm": 13.389119148254395, "learning_rate": 3.1206567313191256e-05, "loss": 0.9476, "step": 1611 }, { "epoch": 1.6584362139917697, "grad_norm": 1.0449633598327637, "learning_rate": 3.119548142910737e-05, "loss": 0.0285, "step": 1612 }, { "epoch": 1.6594650205761317, "grad_norm": 2.045689582824707, "learning_rate": 3.1184382524861326e-05, "loss": 0.1321, "step": 1613 }, { "epoch": 1.6604938271604939, "grad_norm": 5.213545322418213, "learning_rate": 3.117327061883354e-05, "loss": 0.1972, "step": 1614 }, { "epoch": 1.6615226337448559, "grad_norm": 12.71414566040039, "learning_rate": 3.116214572942597e-05, "loss": 0.8901, "step": 1615 }, { "epoch": 1.662551440329218, "grad_norm": 5.993893146514893, "learning_rate": 3.115100787506204e-05, "loss": 0.1987, "step": 1616 }, { "epoch": 1.6635802469135803, "grad_norm": 0.42078569531440735, "learning_rate": 3.1139857074186675e-05, "loss": 0.0073, "step": 1617 }, { "epoch": 1.6646090534979425, "grad_norm": 6.253912448883057, "learning_rate": 3.1128693345266235e-05, "loss": 0.2412, "step": 1618 }, { "epoch": 1.6656378600823045, "grad_norm": 13.241989135742188, "learning_rate": 3.1117516706788495e-05, "loss": 1.8227, "step": 1619 }, { "epoch": 1.6666666666666665, "grad_norm": 2.7509756088256836, "learning_rate": 3.110632717726259e-05, "loss": 0.0598, "step": 1620 }, { "epoch": 1.6666666666666665, "eval_Qnli-dev_cosine_accuracy": 0.705078125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.8112232685089111, "eval_Qnli-dev_cosine_ap": 0.7530566628283017, "eval_Qnli-dev_cosine_f1": 0.702803738317757, "eval_Qnli-dev_cosine_f1_threshold": 0.7403519153594971, "eval_Qnli-dev_cosine_precision": 0.6287625418060201, "eval_Qnli-dev_cosine_recall": 0.7966101694915254, "eval_Qnli-dev_dot_accuracy": 0.66796875, "eval_Qnli-dev_dot_accuracy_threshold": 389.0986022949219, "eval_Qnli-dev_dot_ap": 0.6839751331019408, "eval_Qnli-dev_dot_f1": 0.6869712351945855, "eval_Qnli-dev_dot_f1_threshold": 326.48431396484375, "eval_Qnli-dev_dot_precision": 0.571830985915493, "eval_Qnli-dev_dot_recall": 0.8601694915254238, "eval_Qnli-dev_euclidean_accuracy": 0.7109375, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.742420196533203, "eval_Qnli-dev_euclidean_ap": 0.7602323009588319, "eval_Qnli-dev_euclidean_f1": 0.7107750472589792, "eval_Qnli-dev_euclidean_f1_threshold": 15.595666885375977, "eval_Qnli-dev_euclidean_precision": 0.6416382252559727, "eval_Qnli-dev_euclidean_recall": 0.7966101694915254, "eval_Qnli-dev_manhattan_accuracy": 0.708984375, "eval_Qnli-dev_manhattan_accuracy_threshold": 279.71630859375, "eval_Qnli-dev_manhattan_ap": 0.7636945225564578, "eval_Qnli-dev_manhattan_f1": 0.7156308851224106, "eval_Qnli-dev_manhattan_f1_threshold": 326.18878173828125, "eval_Qnli-dev_manhattan_precision": 0.6440677966101694, "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, "eval_Qnli-dev_max_accuracy": 0.7109375, "eval_Qnli-dev_max_accuracy_threshold": 389.0986022949219, "eval_Qnli-dev_max_ap": 0.7636945225564578, "eval_Qnli-dev_max_f1": 0.7156308851224106, "eval_Qnli-dev_max_f1_threshold": 326.48431396484375, "eval_Qnli-dev_max_precision": 0.6440677966101694, "eval_Qnli-dev_max_recall": 0.8601694915254238, "eval_allNLI-dev_cosine_accuracy": 0.7265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8469563722610474, "eval_allNLI-dev_cosine_ap": 0.6148003055167395, "eval_allNLI-dev_cosine_f1": 0.6222222222222222, "eval_allNLI-dev_cosine_f1_threshold": 0.7551340460777283, "eval_allNLI-dev_cosine_precision": 0.5054151624548736, "eval_allNLI-dev_cosine_recall": 0.8092485549132948, "eval_allNLI-dev_dot_accuracy": 0.705078125, "eval_allNLI-dev_dot_accuracy_threshold": 401.8390808105469, "eval_allNLI-dev_dot_ap": 0.5574330740455032, "eval_allNLI-dev_dot_f1": 0.5793991416309013, "eval_allNLI-dev_dot_f1_threshold": 336.57501220703125, "eval_allNLI-dev_dot_precision": 0.46075085324232085, "eval_allNLI-dev_dot_recall": 0.7803468208092486, "eval_allNLI-dev_euclidean_accuracy": 0.744140625, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.117332458496094, "eval_allNLI-dev_euclidean_ap": 0.6202020587591924, "eval_allNLI-dev_euclidean_f1": 0.6337078651685394, "eval_allNLI-dev_euclidean_f1_threshold": 14.898088455200195, "eval_allNLI-dev_euclidean_precision": 0.5183823529411765, "eval_allNLI-dev_euclidean_recall": 0.815028901734104, "eval_allNLI-dev_manhattan_accuracy": 0.736328125, "eval_allNLI-dev_manhattan_accuracy_threshold": 248.040283203125, "eval_allNLI-dev_manhattan_ap": 0.6195824334566606, "eval_allNLI-dev_manhattan_f1": 0.6261261261261263, "eval_allNLI-dev_manhattan_f1_threshold": 310.2285461425781, "eval_allNLI-dev_manhattan_precision": 0.5129151291512916, "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, "eval_allNLI-dev_max_accuracy": 0.744140625, "eval_allNLI-dev_max_accuracy_threshold": 401.8390808105469, "eval_allNLI-dev_max_ap": 0.6202020587591924, "eval_allNLI-dev_max_f1": 0.6337078651685394, "eval_allNLI-dev_max_f1_threshold": 336.57501220703125, "eval_allNLI-dev_max_precision": 0.5183823529411765, "eval_allNLI-dev_max_recall": 0.815028901734104, "eval_sequential_score": 0.7636945225564578, "eval_sts-test_pearson_cosine": 0.8442697659120451, "eval_sts-test_pearson_dot": 0.8332794530197303, "eval_sts-test_pearson_euclidean": 0.8751710961819176, "eval_sts-test_pearson_manhattan": 0.8743056704972211, "eval_sts-test_pearson_max": 0.8751710961819176, "eval_sts-test_spearman_cosine": 0.8770357577637844, "eval_sts-test_spearman_dot": 0.8219956443633655, "eval_sts-test_spearman_euclidean": 0.8735976526048674, "eval_sts-test_spearman_manhattan": 0.8719677221624696, "eval_sts-test_spearman_max": 0.8770357577637844, "eval_vitaminc-pairs_loss": 3.147581100463867, "eval_vitaminc-pairs_runtime": 3.1941, "eval_vitaminc-pairs_samples_per_second": 40.074, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 1620 }, { "epoch": 1.6666666666666665, "eval_negation-triplets_loss": 0.9426246285438538, "eval_negation-triplets_runtime": 0.7561, "eval_negation-triplets_samples_per_second": 169.3, "eval_negation-triplets_steps_per_second": 1.323, "step": 1620 }, { "epoch": 1.6666666666666665, "eval_scitail-pairs-pos_loss": 0.15858975052833557, "eval_scitail-pairs-pos_runtime": 0.8858, "eval_scitail-pairs-pos_samples_per_second": 144.509, "eval_scitail-pairs-pos_steps_per_second": 1.129, "step": 1620 }, { "epoch": 1.6666666666666665, "eval_scitail-pairs-qa_loss": 0.0009499162551946938, "eval_scitail-pairs-qa_runtime": 0.6053, "eval_scitail-pairs-qa_samples_per_second": 211.474, "eval_scitail-pairs-qa_steps_per_second": 1.652, "step": 1620 }, { "epoch": 1.6666666666666665, "eval_xsum-pairs_loss": 0.28190702199935913, "eval_xsum-pairs_runtime": 3.0377, "eval_xsum-pairs_samples_per_second": 42.137, "eval_xsum-pairs_steps_per_second": 0.329, "step": 1620 }, { "epoch": 1.6666666666666665, "eval_sciq_pairs_loss": 0.09951034188270569, "eval_sciq_pairs_runtime": 3.5281, "eval_sciq_pairs_samples_per_second": 36.28, "eval_sciq_pairs_steps_per_second": 0.283, "step": 1620 }, { "epoch": 1.6666666666666665, "eval_qasc_pairs_loss": 0.19479617476463318, "eval_qasc_pairs_runtime": 0.6298, "eval_qasc_pairs_samples_per_second": 203.23, "eval_qasc_pairs_steps_per_second": 1.588, "step": 1620 }, { "epoch": 1.6666666666666665, "eval_openbookqa_pairs_loss": 0.8473735451698303, "eval_openbookqa_pairs_runtime": 0.5979, "eval_openbookqa_pairs_samples_per_second": 214.093, "eval_openbookqa_pairs_steps_per_second": 1.673, "step": 1620 }, { "epoch": 1.6666666666666665, "eval_msmarco_pairs_loss": 0.7654114365577698, "eval_msmarco_pairs_runtime": 1.5282, "eval_msmarco_pairs_samples_per_second": 83.759, "eval_msmarco_pairs_steps_per_second": 0.654, "step": 1620 }, { "epoch": 1.6666666666666665, "eval_nq_pairs_loss": 0.7675896286964417, "eval_nq_pairs_runtime": 2.9053, "eval_nq_pairs_samples_per_second": 44.057, "eval_nq_pairs_steps_per_second": 0.344, "step": 1620 }, { "epoch": 1.6666666666666665, "eval_trivia_pairs_loss": 0.6710144281387329, "eval_trivia_pairs_runtime": 3.4501, "eval_trivia_pairs_samples_per_second": 37.1, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1620 }, { "epoch": 1.6666666666666665, "eval_gooaq_pairs_loss": 0.5018545389175415, "eval_gooaq_pairs_runtime": 0.963, "eval_gooaq_pairs_samples_per_second": 132.918, "eval_gooaq_pairs_steps_per_second": 1.038, "step": 1620 }, { "epoch": 1.6666666666666665, "eval_paws-pos_loss": 0.0214223675429821, "eval_paws-pos_runtime": 0.7097, "eval_paws-pos_samples_per_second": 180.351, "eval_paws-pos_steps_per_second": 1.409, "step": 1620 }, { "epoch": 1.6666666666666665, "eval_global_dataset_loss": 0.4568376839160919, "eval_global_dataset_runtime": 13.4081, "eval_global_dataset_samples_per_second": 31.026, "eval_global_dataset_steps_per_second": 0.298, "step": 1620 }, { "epoch": 1.6676954732510287, "grad_norm": 4.740821361541748, "learning_rate": 3.109512477521901e-05, "loss": 0.1709, "step": 1621 }, { "epoch": 1.668724279835391, "grad_norm": 8.163890838623047, "learning_rate": 3.1083909519209574e-05, "loss": 0.3081, "step": 1622 }, { "epoch": 1.6697530864197532, "grad_norm": 5.643056869506836, "learning_rate": 3.1072681427807386e-05, "loss": 0.2765, "step": 1623 }, { "epoch": 1.6707818930041154, "grad_norm": 8.1829833984375, "learning_rate": 3.106144051960679e-05, "loss": 0.3699, "step": 1624 }, { "epoch": 1.6718106995884774, "grad_norm": 7.014549732208252, "learning_rate": 3.105018681322337e-05, "loss": 0.2915, "step": 1625 }, { "epoch": 1.6728395061728394, "grad_norm": 5.737176895141602, "learning_rate": 3.10389203272939e-05, "loss": 0.2271, "step": 1626 }, { "epoch": 1.6738683127572016, "grad_norm": 6.97775936126709, "learning_rate": 3.1027641080476315e-05, "loss": 0.3229, "step": 1627 }, { "epoch": 1.6748971193415638, "grad_norm": 5.210075855255127, "learning_rate": 3.101634909144969e-05, "loss": 0.2214, "step": 1628 }, { "epoch": 1.675925925925926, "grad_norm": 11.457197189331055, "learning_rate": 3.100504437891421e-05, "loss": 0.9589, "step": 1629 }, { "epoch": 1.676954732510288, "grad_norm": 7.409816265106201, "learning_rate": 3.09937269615911e-05, "loss": 0.3692, "step": 1630 }, { "epoch": 1.6779835390946503, "grad_norm": 5.9740986824035645, "learning_rate": 3.098239685822265e-05, "loss": 0.2368, "step": 1631 }, { "epoch": 1.6790123456790123, "grad_norm": 0.5974377989768982, "learning_rate": 3.097105408757215e-05, "loss": 0.0143, "step": 1632 }, { "epoch": 1.6800411522633745, "grad_norm": 12.626338005065918, "learning_rate": 3.0959698668423876e-05, "loss": 0.8766, "step": 1633 }, { "epoch": 1.6810699588477367, "grad_norm": 4.880333423614502, "learning_rate": 3.094833061958304e-05, "loss": 0.2831, "step": 1634 }, { "epoch": 1.682098765432099, "grad_norm": 11.46767807006836, "learning_rate": 3.0936949959875773e-05, "loss": 0.7252, "step": 1635 }, { "epoch": 1.683127572016461, "grad_norm": 10.119542121887207, "learning_rate": 3.0925556708149096e-05, "loss": 0.6238, "step": 1636 }, { "epoch": 1.684156378600823, "grad_norm": 9.226078987121582, "learning_rate": 3.091415088327088e-05, "loss": 0.4887, "step": 1637 }, { "epoch": 1.6851851851851851, "grad_norm": 7.247396469116211, "learning_rate": 3.090273250412981e-05, "loss": 0.4855, "step": 1638 }, { "epoch": 1.6862139917695473, "grad_norm": 7.731893539428711, "learning_rate": 3.089130158963537e-05, "loss": 0.4773, "step": 1639 }, { "epoch": 1.6872427983539096, "grad_norm": 11.5791597366333, "learning_rate": 3.087985815871781e-05, "loss": 0.7261, "step": 1640 }, { "epoch": 1.6872427983539096, "eval_Qnli-dev_cosine_accuracy": 0.7109375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7935091257095337, "eval_Qnli-dev_cosine_ap": 0.7644906675103209, "eval_Qnli-dev_cosine_f1": 0.7129798903107861, "eval_Qnli-dev_cosine_f1_threshold": 0.7131432890892029, "eval_Qnli-dev_cosine_precision": 0.6270096463022508, "eval_Qnli-dev_cosine_recall": 0.826271186440678, "eval_Qnli-dev_dot_accuracy": 0.68359375, "eval_Qnli-dev_dot_accuracy_threshold": 355.2696228027344, "eval_Qnli-dev_dot_ap": 0.7203838911649946, "eval_Qnli-dev_dot_f1": 0.6779089376053963, "eval_Qnli-dev_dot_f1_threshold": 305.30413818359375, "eval_Qnli-dev_dot_precision": 0.5630252100840336, "eval_Qnli-dev_dot_recall": 0.8516949152542372, "eval_Qnli-dev_euclidean_accuracy": 0.716796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.860023498535156, "eval_Qnli-dev_euclidean_ap": 0.7704725006394498, "eval_Qnli-dev_euclidean_f1": 0.7111111111111111, "eval_Qnli-dev_euclidean_f1_threshold": 16.034788131713867, "eval_Qnli-dev_euclidean_precision": 0.631578947368421, "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, "eval_Qnli-dev_manhattan_accuracy": 0.71875, "eval_Qnli-dev_manhattan_accuracy_threshold": 298.86627197265625, "eval_Qnli-dev_manhattan_ap": 0.7720556864200312, "eval_Qnli-dev_manhattan_f1": 0.7204502814258913, "eval_Qnli-dev_manhattan_f1_threshold": 334.255615234375, "eval_Qnli-dev_manhattan_precision": 0.6464646464646465, "eval_Qnli-dev_manhattan_recall": 0.8135593220338984, "eval_Qnli-dev_max_accuracy": 0.71875, "eval_Qnli-dev_max_accuracy_threshold": 355.2696228027344, "eval_Qnli-dev_max_ap": 0.7720556864200312, "eval_Qnli-dev_max_f1": 0.7204502814258913, "eval_Qnli-dev_max_f1_threshold": 334.255615234375, "eval_Qnli-dev_max_precision": 0.6464646464646465, "eval_Qnli-dev_max_recall": 0.8516949152542372, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8340206146240234, "eval_allNLI-dev_cosine_ap": 0.6154324502870555, "eval_allNLI-dev_cosine_f1": 0.6157303370786517, "eval_allNLI-dev_cosine_f1_threshold": 0.7429921627044678, "eval_allNLI-dev_cosine_precision": 0.5036764705882353, "eval_allNLI-dev_cosine_recall": 0.791907514450867, "eval_allNLI-dev_dot_accuracy": 0.69921875, "eval_allNLI-dev_dot_accuracy_threshold": 372.3382873535156, "eval_allNLI-dev_dot_ap": 0.5565432560970462, "eval_allNLI-dev_dot_f1": 0.5910165484633569, "eval_allNLI-dev_dot_f1_threshold": 336.52593994140625, "eval_allNLI-dev_dot_precision": 0.5, "eval_allNLI-dev_dot_recall": 0.7225433526011561, "eval_allNLI-dev_euclidean_accuracy": 0.73828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.632896423339844, "eval_allNLI-dev_euclidean_ap": 0.6216372789356083, "eval_allNLI-dev_euclidean_f1": 0.6266666666666667, "eval_allNLI-dev_euclidean_f1_threshold": 15.324407577514648, "eval_allNLI-dev_euclidean_precision": 0.5090252707581228, "eval_allNLI-dev_euclidean_recall": 0.815028901734104, "eval_allNLI-dev_manhattan_accuracy": 0.73046875, "eval_allNLI-dev_manhattan_accuracy_threshold": 260.36358642578125, "eval_allNLI-dev_manhattan_ap": 0.6201380486598296, "eval_allNLI-dev_manhattan_f1": 0.6284403669724771, "eval_allNLI-dev_manhattan_f1_threshold": 315.0744934082031, "eval_allNLI-dev_manhattan_precision": 0.5209125475285171, "eval_allNLI-dev_manhattan_recall": 0.791907514450867, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 372.3382873535156, "eval_allNLI-dev_max_ap": 0.6216372789356083, "eval_allNLI-dev_max_f1": 0.6284403669724771, "eval_allNLI-dev_max_f1_threshold": 336.52593994140625, "eval_allNLI-dev_max_precision": 0.5209125475285171, "eval_allNLI-dev_max_recall": 0.815028901734104, "eval_sequential_score": 0.7720556864200312, "eval_sts-test_pearson_cosine": 0.8480708109327747, "eval_sts-test_pearson_dot": 0.8351161910233019, "eval_sts-test_pearson_euclidean": 0.8779361059678183, "eval_sts-test_pearson_manhattan": 0.8765214608457297, "eval_sts-test_pearson_max": 0.8779361059678183, "eval_sts-test_spearman_cosine": 0.8782235904948963, "eval_sts-test_spearman_dot": 0.83017832300626, "eval_sts-test_spearman_euclidean": 0.8748063394237875, "eval_sts-test_spearman_manhattan": 0.8733159434144085, "eval_sts-test_spearman_max": 0.8782235904948963, "eval_vitaminc-pairs_loss": 3.1627960205078125, "eval_vitaminc-pairs_runtime": 3.2, "eval_vitaminc-pairs_samples_per_second": 40.0, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 1640 }, { "epoch": 1.6872427983539096, "eval_negation-triplets_loss": 0.9496148824691772, "eval_negation-triplets_runtime": 0.7615, "eval_negation-triplets_samples_per_second": 168.09, "eval_negation-triplets_steps_per_second": 1.313, "step": 1640 }, { "epoch": 1.6872427983539096, "eval_scitail-pairs-pos_loss": 0.1690126657485962, "eval_scitail-pairs-pos_runtime": 0.8832, "eval_scitail-pairs-pos_samples_per_second": 144.927, "eval_scitail-pairs-pos_steps_per_second": 1.132, "step": 1640 }, { "epoch": 1.6872427983539096, "eval_scitail-pairs-qa_loss": 0.0007716402760706842, "eval_scitail-pairs-qa_runtime": 0.6012, "eval_scitail-pairs-qa_samples_per_second": 212.92, "eval_scitail-pairs-qa_steps_per_second": 1.663, "step": 1640 }, { "epoch": 1.6872427983539096, "eval_xsum-pairs_loss": 0.29522186517715454, "eval_xsum-pairs_runtime": 3.0305, "eval_xsum-pairs_samples_per_second": 42.238, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1640 }, { "epoch": 1.6872427983539096, "eval_sciq_pairs_loss": 0.09298048168420792, "eval_sciq_pairs_runtime": 3.5126, "eval_sciq_pairs_samples_per_second": 36.441, "eval_sciq_pairs_steps_per_second": 0.285, "step": 1640 }, { "epoch": 1.6872427983539096, "eval_qasc_pairs_loss": 0.1856236308813095, "eval_qasc_pairs_runtime": 0.6208, "eval_qasc_pairs_samples_per_second": 206.172, "eval_qasc_pairs_steps_per_second": 1.611, "step": 1640 }, { "epoch": 1.6872427983539096, "eval_openbookqa_pairs_loss": 0.8703394532203674, "eval_openbookqa_pairs_runtime": 0.5949, "eval_openbookqa_pairs_samples_per_second": 215.164, "eval_openbookqa_pairs_steps_per_second": 1.681, "step": 1640 }, { "epoch": 1.6872427983539096, "eval_msmarco_pairs_loss": 0.8120965361595154, "eval_msmarco_pairs_runtime": 1.5191, "eval_msmarco_pairs_samples_per_second": 84.26, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 1640 }, { "epoch": 1.6872427983539096, "eval_nq_pairs_loss": 0.7762519121170044, "eval_nq_pairs_runtime": 2.9038, "eval_nq_pairs_samples_per_second": 44.08, "eval_nq_pairs_steps_per_second": 0.344, "step": 1640 }, { "epoch": 1.6872427983539096, "eval_trivia_pairs_loss": 0.6840327382087708, "eval_trivia_pairs_runtime": 3.4573, "eval_trivia_pairs_samples_per_second": 37.023, "eval_trivia_pairs_steps_per_second": 0.289, "step": 1640 }, { "epoch": 1.6872427983539096, "eval_gooaq_pairs_loss": 0.489113450050354, "eval_gooaq_pairs_runtime": 0.9594, "eval_gooaq_pairs_samples_per_second": 133.414, "eval_gooaq_pairs_steps_per_second": 1.042, "step": 1640 }, { "epoch": 1.6872427983539096, "eval_paws-pos_loss": 0.021868284791707993, "eval_paws-pos_runtime": 0.7078, "eval_paws-pos_samples_per_second": 180.833, "eval_paws-pos_steps_per_second": 1.413, "step": 1640 }, { "epoch": 1.6872427983539096, "eval_global_dataset_loss": 0.457489550113678, "eval_global_dataset_runtime": 13.4378, "eval_global_dataset_samples_per_second": 30.958, "eval_global_dataset_steps_per_second": 0.298, "step": 1640 }, { "epoch": 1.6882716049382716, "grad_norm": 5.879720687866211, "learning_rate": 3.0868402230328075e-05, "loss": 0.205, "step": 1641 }, { "epoch": 1.6893004115226338, "grad_norm": 7.6843342781066895, "learning_rate": 3.085693382343786e-05, "loss": 0.3409, "step": 1642 }, { "epoch": 1.6903292181069958, "grad_norm": 6.418579578399658, "learning_rate": 3.084545295703948e-05, "loss": 0.247, "step": 1643 }, { "epoch": 1.691358024691358, "grad_norm": 6.323958873748779, "learning_rate": 3.08339596501459e-05, "loss": 0.2139, "step": 1644 }, { "epoch": 1.6923868312757202, "grad_norm": 7.00066614151001, "learning_rate": 3.0822453921790696e-05, "loss": 0.5987, "step": 1645 }, { "epoch": 1.6934156378600824, "grad_norm": 0.08560808002948761, "learning_rate": 3.081093579102799e-05, "loss": 0.0012, "step": 1646 }, { "epoch": 1.6944444444444444, "grad_norm": 6.805712699890137, "learning_rate": 3.079940527693247e-05, "loss": 0.3878, "step": 1647 }, { "epoch": 1.6954732510288066, "grad_norm": 5.735049247741699, "learning_rate": 3.078786239859931e-05, "loss": 0.1863, "step": 1648 }, { "epoch": 1.6965020576131686, "grad_norm": 7.262701511383057, "learning_rate": 3.0776307175144185e-05, "loss": 0.4896, "step": 1649 }, { "epoch": 1.6975308641975309, "grad_norm": 10.45705509185791, "learning_rate": 3.076473962570319e-05, "loss": 0.79, "step": 1650 }, { "epoch": 1.698559670781893, "grad_norm": 6.432678699493408, "learning_rate": 3.075315976943284e-05, "loss": 0.2959, "step": 1651 }, { "epoch": 1.6995884773662553, "grad_norm": 6.148375034332275, "learning_rate": 3.0741567625510034e-05, "loss": 0.2332, "step": 1652 }, { "epoch": 1.7006172839506173, "grad_norm": 1.0993452072143555, "learning_rate": 3.0729963213132013e-05, "loss": 0.057, "step": 1653 }, { "epoch": 1.7016460905349793, "grad_norm": 11.179821968078613, "learning_rate": 3.071834655151635e-05, "loss": 0.5936, "step": 1654 }, { "epoch": 1.7026748971193415, "grad_norm": 14.889330863952637, "learning_rate": 3.070671765990089e-05, "loss": 1.9773, "step": 1655 }, { "epoch": 1.7037037037037037, "grad_norm": 8.183863639831543, "learning_rate": 3.0695076557543735e-05, "loss": 0.4096, "step": 1656 }, { "epoch": 1.704732510288066, "grad_norm": 5.217211723327637, "learning_rate": 3.068342326372321e-05, "loss": 0.2046, "step": 1657 }, { "epoch": 1.705761316872428, "grad_norm": 14.158547401428223, "learning_rate": 3.067175779773783e-05, "loss": 1.8898, "step": 1658 }, { "epoch": 1.7067901234567902, "grad_norm": 11.467806816101074, "learning_rate": 3.066008017890626e-05, "loss": 0.8136, "step": 1659 }, { "epoch": 1.7078189300411522, "grad_norm": 4.205172538757324, "learning_rate": 3.0648390426567306e-05, "loss": 0.1098, "step": 1660 }, { "epoch": 1.7078189300411522, "eval_Qnli-dev_cosine_accuracy": 0.69921875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7641236782073975, "eval_Qnli-dev_cosine_ap": 0.7543947094620718, "eval_Qnli-dev_cosine_f1": 0.6967509025270758, "eval_Qnli-dev_cosine_f1_threshold": 0.7194709777832031, "eval_Qnli-dev_cosine_precision": 0.6069182389937107, "eval_Qnli-dev_cosine_recall": 0.8177966101694916, "eval_Qnli-dev_dot_accuracy": 0.671875, "eval_Qnli-dev_dot_accuracy_threshold": 346.1076965332031, "eval_Qnli-dev_dot_ap": 0.6915011008394673, "eval_Qnli-dev_dot_f1": 0.6761006289308176, "eval_Qnli-dev_dot_f1_threshold": 295.66900634765625, "eval_Qnli-dev_dot_precision": 0.5375, "eval_Qnli-dev_dot_recall": 0.9110169491525424, "eval_Qnli-dev_euclidean_accuracy": 0.71484375, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.609542846679688, "eval_Qnli-dev_euclidean_ap": 0.7639767594998113, "eval_Qnli-dev_euclidean_f1": 0.7042253521126761, "eval_Qnli-dev_euclidean_f1_threshold": 16.083669662475586, "eval_Qnli-dev_euclidean_precision": 0.6024096385542169, "eval_Qnli-dev_euclidean_recall": 0.847457627118644, "eval_Qnli-dev_manhattan_accuracy": 0.70703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 275.1696472167969, "eval_Qnli-dev_manhattan_ap": 0.7667941208454679, "eval_Qnli-dev_manhattan_f1": 0.717391304347826, "eval_Qnli-dev_manhattan_f1_threshold": 331.35174560546875, "eval_Qnli-dev_manhattan_precision": 0.6265822784810127, "eval_Qnli-dev_manhattan_recall": 0.8389830508474576, "eval_Qnli-dev_max_accuracy": 0.71484375, "eval_Qnli-dev_max_accuracy_threshold": 346.1076965332031, "eval_Qnli-dev_max_ap": 0.7667941208454679, "eval_Qnli-dev_max_f1": 0.717391304347826, "eval_Qnli-dev_max_f1_threshold": 331.35174560546875, "eval_Qnli-dev_max_precision": 0.6265822784810127, "eval_Qnli-dev_max_recall": 0.9110169491525424, "eval_allNLI-dev_cosine_accuracy": 0.7265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8820390701293945, "eval_allNLI-dev_cosine_ap": 0.6219945288269941, "eval_allNLI-dev_cosine_f1": 0.6194331983805668, "eval_allNLI-dev_cosine_f1_threshold": 0.7163412570953369, "eval_allNLI-dev_cosine_precision": 0.4766355140186916, "eval_allNLI-dev_cosine_recall": 0.884393063583815, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 375.21624755859375, "eval_allNLI-dev_dot_ap": 0.5617613183656214, "eval_allNLI-dev_dot_f1": 0.5970149253731343, "eval_allNLI-dev_dot_f1_threshold": 319.013671875, "eval_allNLI-dev_dot_precision": 0.47297297297297297, "eval_allNLI-dev_dot_recall": 0.8092485549132948, "eval_allNLI-dev_euclidean_accuracy": 0.740234375, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.767152786254883, "eval_allNLI-dev_euclidean_ap": 0.6298058452930776, "eval_allNLI-dev_euclidean_f1": 0.6252676659528908, "eval_allNLI-dev_euclidean_f1_threshold": 15.217535018920898, "eval_allNLI-dev_euclidean_precision": 0.4965986394557823, "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, "eval_allNLI-dev_manhattan_accuracy": 0.734375, "eval_allNLI-dev_manhattan_accuracy_threshold": 268.7918701171875, "eval_allNLI-dev_manhattan_ap": 0.6292564669901771, "eval_allNLI-dev_manhattan_f1": 0.6310904872389791, "eval_allNLI-dev_manhattan_f1_threshold": 309.2000732421875, "eval_allNLI-dev_manhattan_precision": 0.5271317829457365, "eval_allNLI-dev_manhattan_recall": 0.7861271676300579, "eval_allNLI-dev_max_accuracy": 0.740234375, "eval_allNLI-dev_max_accuracy_threshold": 375.21624755859375, "eval_allNLI-dev_max_ap": 0.6298058452930776, "eval_allNLI-dev_max_f1": 0.6310904872389791, "eval_allNLI-dev_max_f1_threshold": 319.013671875, "eval_allNLI-dev_max_precision": 0.5271317829457365, "eval_allNLI-dev_max_recall": 0.884393063583815, "eval_sequential_score": 0.7667941208454679, "eval_sts-test_pearson_cosine": 0.8441353490634314, "eval_sts-test_pearson_dot": 0.834811441618899, "eval_sts-test_pearson_euclidean": 0.8741484768562252, "eval_sts-test_pearson_manhattan": 0.8719965858186344, "eval_sts-test_pearson_max": 0.8741484768562252, "eval_sts-test_spearman_cosine": 0.8769982562067383, "eval_sts-test_spearman_dot": 0.8341864697327808, "eval_sts-test_spearman_euclidean": 0.873439143637019, "eval_sts-test_spearman_manhattan": 0.8708601284430326, "eval_sts-test_spearman_max": 0.8769982562067383, "eval_vitaminc-pairs_loss": 2.9152801036834717, "eval_vitaminc-pairs_runtime": 3.2102, "eval_vitaminc-pairs_samples_per_second": 39.873, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 1660 }, { "epoch": 1.7078189300411522, "eval_negation-triplets_loss": 0.98664790391922, "eval_negation-triplets_runtime": 0.7545, "eval_negation-triplets_samples_per_second": 169.658, "eval_negation-triplets_steps_per_second": 1.325, "step": 1660 }, { "epoch": 1.7078189300411522, "eval_scitail-pairs-pos_loss": 0.17049828171730042, "eval_scitail-pairs-pos_runtime": 0.8776, "eval_scitail-pairs-pos_samples_per_second": 145.859, "eval_scitail-pairs-pos_steps_per_second": 1.14, "step": 1660 }, { "epoch": 1.7078189300411522, "eval_scitail-pairs-qa_loss": 0.0007929063285700977, "eval_scitail-pairs-qa_runtime": 0.5963, "eval_scitail-pairs-qa_samples_per_second": 214.658, "eval_scitail-pairs-qa_steps_per_second": 1.677, "step": 1660 }, { "epoch": 1.7078189300411522, "eval_xsum-pairs_loss": 0.30998116731643677, "eval_xsum-pairs_runtime": 3.0434, "eval_xsum-pairs_samples_per_second": 42.058, "eval_xsum-pairs_steps_per_second": 0.329, "step": 1660 }, { "epoch": 1.7078189300411522, "eval_sciq_pairs_loss": 0.088392473757267, "eval_sciq_pairs_runtime": 3.5172, "eval_sciq_pairs_samples_per_second": 36.392, "eval_sciq_pairs_steps_per_second": 0.284, "step": 1660 }, { "epoch": 1.7078189300411522, "eval_qasc_pairs_loss": 0.18838748335838318, "eval_qasc_pairs_runtime": 0.6126, "eval_qasc_pairs_samples_per_second": 208.935, "eval_qasc_pairs_steps_per_second": 1.632, "step": 1660 }, { "epoch": 1.7078189300411522, "eval_openbookqa_pairs_loss": 0.9197407960891724, "eval_openbookqa_pairs_runtime": 0.5926, "eval_openbookqa_pairs_samples_per_second": 215.986, "eval_openbookqa_pairs_steps_per_second": 1.687, "step": 1660 }, { "epoch": 1.7078189300411522, "eval_msmarco_pairs_loss": 0.8759288191795349, "eval_msmarco_pairs_runtime": 1.5263, "eval_msmarco_pairs_samples_per_second": 83.861, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 1660 }, { "epoch": 1.7078189300411522, "eval_nq_pairs_loss": 0.6125518083572388, "eval_nq_pairs_runtime": 2.9034, "eval_nq_pairs_samples_per_second": 44.087, "eval_nq_pairs_steps_per_second": 0.344, "step": 1660 }, { "epoch": 1.7078189300411522, "eval_trivia_pairs_loss": 0.8040640354156494, "eval_trivia_pairs_runtime": 3.4397, "eval_trivia_pairs_samples_per_second": 37.212, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1660 }, { "epoch": 1.7078189300411522, "eval_gooaq_pairs_loss": 0.46305325627326965, "eval_gooaq_pairs_runtime": 0.9574, "eval_gooaq_pairs_samples_per_second": 133.691, "eval_gooaq_pairs_steps_per_second": 1.044, "step": 1660 }, { "epoch": 1.7078189300411522, "eval_paws-pos_loss": 0.023760538548231125, "eval_paws-pos_runtime": 0.7042, "eval_paws-pos_samples_per_second": 181.772, "eval_paws-pos_steps_per_second": 1.42, "step": 1660 }, { "epoch": 1.7078189300411522, "eval_global_dataset_loss": 0.46334582567214966, "eval_global_dataset_runtime": 13.3948, "eval_global_dataset_samples_per_second": 31.057, "eval_global_dataset_steps_per_second": 0.299, "step": 1660 }, { "epoch": 1.7088477366255144, "grad_norm": 3.219351053237915, "learning_rate": 3.063668856007985e-05, "loss": 0.1057, "step": 1661 }, { "epoch": 1.7098765432098766, "grad_norm": 5.379256725311279, "learning_rate": 3.062497459882286e-05, "loss": 0.1945, "step": 1662 }, { "epoch": 1.7109053497942388, "grad_norm": 4.648935794830322, "learning_rate": 3.061324856219531e-05, "loss": 0.2146, "step": 1663 }, { "epoch": 1.7119341563786008, "grad_norm": 16.28590965270996, "learning_rate": 3.060151046961617e-05, "loss": 2.6085, "step": 1664 }, { "epoch": 1.7129629629629628, "grad_norm": 5.0320725440979, "learning_rate": 3.05897603405244e-05, "loss": 0.1174, "step": 1665 }, { "epoch": 1.713991769547325, "grad_norm": 11.108006477355957, "learning_rate": 3.057799819437889e-05, "loss": 0.7045, "step": 1666 }, { "epoch": 1.7150205761316872, "grad_norm": 5.2421650886535645, "learning_rate": 3.056622405065839e-05, "loss": 0.1569, "step": 1667 }, { "epoch": 1.7160493827160495, "grad_norm": 8.442304611206055, "learning_rate": 3.055443792886156e-05, "loss": 0.5327, "step": 1668 }, { "epoch": 1.7170781893004117, "grad_norm": 4.792802333831787, "learning_rate": 3.0542639848506894e-05, "loss": 0.1637, "step": 1669 }, { "epoch": 1.7181069958847737, "grad_norm": 0.7857884168624878, "learning_rate": 3.0530829829132676e-05, "loss": 0.0073, "step": 1670 }, { "epoch": 1.7191358024691357, "grad_norm": 1.1031395196914673, "learning_rate": 3.051900789029696e-05, "loss": 0.0214, "step": 1671 }, { "epoch": 1.7201646090534979, "grad_norm": 5.800541400909424, "learning_rate": 3.050717405157755e-05, "loss": 0.3875, "step": 1672 }, { "epoch": 1.72119341563786, "grad_norm": 5.825467586517334, "learning_rate": 3.049532833257195e-05, "loss": 0.2437, "step": 1673 }, { "epoch": 1.7222222222222223, "grad_norm": 8.15721321105957, "learning_rate": 3.048347075289735e-05, "loss": 0.5094, "step": 1674 }, { "epoch": 1.7232510288065843, "grad_norm": 8.726066589355469, "learning_rate": 3.047160133219056e-05, "loss": 0.5376, "step": 1675 }, { "epoch": 1.7242798353909465, "grad_norm": 5.205786228179932, "learning_rate": 3.0459720090108023e-05, "loss": 0.1742, "step": 1676 }, { "epoch": 1.7253086419753085, "grad_norm": 6.896229267120361, "learning_rate": 3.0447827046325737e-05, "loss": 0.2372, "step": 1677 }, { "epoch": 1.7263374485596708, "grad_norm": 4.733861923217773, "learning_rate": 3.0435922220539273e-05, "loss": 0.1288, "step": 1678 }, { "epoch": 1.727366255144033, "grad_norm": 7.039435863494873, "learning_rate": 3.042400563246368e-05, "loss": 0.2187, "step": 1679 }, { "epoch": 1.7283950617283952, "grad_norm": 0.8268962502479553, "learning_rate": 3.041207730183351e-05, "loss": 0.0119, "step": 1680 }, { "epoch": 1.7283950617283952, "eval_Qnli-dev_cosine_accuracy": 0.73046875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7552091479301453, "eval_Qnli-dev_cosine_ap": 0.764224023273828, "eval_Qnli-dev_cosine_f1": 0.709278350515464, "eval_Qnli-dev_cosine_f1_threshold": 0.7447389364242554, "eval_Qnli-dev_cosine_precision": 0.6907630522088354, "eval_Qnli-dev_cosine_recall": 0.7288135593220338, "eval_Qnli-dev_dot_accuracy": 0.6875, "eval_Qnli-dev_dot_accuracy_threshold": 350.8480224609375, "eval_Qnli-dev_dot_ap": 0.7128966552619984, "eval_Qnli-dev_dot_f1": 0.6815834767641997, "eval_Qnli-dev_dot_f1_threshold": 300.3233947753906, "eval_Qnli-dev_dot_precision": 0.5739130434782609, "eval_Qnli-dev_dot_recall": 0.8389830508474576, "eval_Qnli-dev_euclidean_accuracy": 0.732421875, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.173933982849121, "eval_Qnli-dev_euclidean_ap": 0.7714429636703933, "eval_Qnli-dev_euclidean_f1": 0.719191919191919, "eval_Qnli-dev_euclidean_f1_threshold": 15.535265922546387, "eval_Qnli-dev_euclidean_precision": 0.6872586872586872, "eval_Qnli-dev_euclidean_recall": 0.7542372881355932, "eval_Qnli-dev_manhattan_accuracy": 0.73046875, "eval_Qnli-dev_manhattan_accuracy_threshold": 314.58941650390625, "eval_Qnli-dev_manhattan_ap": 0.7748141826693925, "eval_Qnli-dev_manhattan_f1": 0.7241379310344829, "eval_Qnli-dev_manhattan_f1_threshold": 334.8643493652344, "eval_Qnli-dev_manhattan_precision": 0.6608391608391608, "eval_Qnli-dev_manhattan_recall": 0.8008474576271186, "eval_Qnli-dev_max_accuracy": 0.732421875, "eval_Qnli-dev_max_accuracy_threshold": 350.8480224609375, "eval_Qnli-dev_max_ap": 0.7748141826693925, "eval_Qnli-dev_max_f1": 0.7241379310344829, "eval_Qnli-dev_max_f1_threshold": 334.8643493652344, "eval_Qnli-dev_max_precision": 0.6907630522088354, "eval_Qnli-dev_max_recall": 0.8389830508474576, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8655421137809753, "eval_allNLI-dev_cosine_ap": 0.6199057502272656, "eval_allNLI-dev_cosine_f1": 0.6196581196581197, "eval_allNLI-dev_cosine_f1_threshold": 0.7334986329078674, "eval_allNLI-dev_cosine_precision": 0.4915254237288136, "eval_allNLI-dev_cosine_recall": 0.838150289017341, "eval_allNLI-dev_dot_accuracy": 0.697265625, "eval_allNLI-dev_dot_accuracy_threshold": 431.8148193359375, "eval_allNLI-dev_dot_ap": 0.5653917075856449, "eval_allNLI-dev_dot_f1": 0.5905511811023622, "eval_allNLI-dev_dot_f1_threshold": 312.14215087890625, "eval_allNLI-dev_dot_precision": 0.44776119402985076, "eval_allNLI-dev_dot_recall": 0.8670520231213873, "eval_allNLI-dev_euclidean_accuracy": 0.728515625, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.644176483154297, "eval_allNLI-dev_euclidean_ap": 0.6270448962719375, "eval_allNLI-dev_euclidean_f1": 0.6313559322033898, "eval_allNLI-dev_euclidean_f1_threshold": 15.444341659545898, "eval_allNLI-dev_euclidean_precision": 0.4983277591973244, "eval_allNLI-dev_euclidean_recall": 0.861271676300578, "eval_allNLI-dev_manhattan_accuracy": 0.734375, "eval_allNLI-dev_manhattan_accuracy_threshold": 266.3878479003906, "eval_allNLI-dev_manhattan_ap": 0.6278081776272781, "eval_allNLI-dev_manhattan_f1": 0.6272912423625254, "eval_allNLI-dev_manhattan_f1_threshold": 334.9619140625, "eval_allNLI-dev_manhattan_precision": 0.48427672955974843, "eval_allNLI-dev_manhattan_recall": 0.8901734104046243, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 431.8148193359375, "eval_allNLI-dev_max_ap": 0.6278081776272781, "eval_allNLI-dev_max_f1": 0.6313559322033898, "eval_allNLI-dev_max_f1_threshold": 334.9619140625, "eval_allNLI-dev_max_precision": 0.4983277591973244, "eval_allNLI-dev_max_recall": 0.8901734104046243, "eval_sequential_score": 0.7748141826693925, "eval_sts-test_pearson_cosine": 0.8387821231954086, "eval_sts-test_pearson_dot": 0.8194059778660282, "eval_sts-test_pearson_euclidean": 0.8701197886154768, "eval_sts-test_pearson_manhattan": 0.8688087659481534, "eval_sts-test_pearson_max": 0.8701197886154768, "eval_sts-test_spearman_cosine": 0.8716132921722499, "eval_sts-test_spearman_dot": 0.8160590762809665, "eval_sts-test_spearman_euclidean": 0.8687709605562364, "eval_sts-test_spearman_manhattan": 0.8678245608803312, "eval_sts-test_spearman_max": 0.8716132921722499, "eval_vitaminc-pairs_loss": 3.0897140502929688, "eval_vitaminc-pairs_runtime": 3.1984, "eval_vitaminc-pairs_samples_per_second": 40.021, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 1680 }, { "epoch": 1.7283950617283952, "eval_negation-triplets_loss": 0.9643632173538208, "eval_negation-triplets_runtime": 0.7595, "eval_negation-triplets_samples_per_second": 168.523, "eval_negation-triplets_steps_per_second": 1.317, "step": 1680 }, { "epoch": 1.7283950617283952, "eval_scitail-pairs-pos_loss": 0.16198822855949402, "eval_scitail-pairs-pos_runtime": 0.8802, "eval_scitail-pairs-pos_samples_per_second": 145.415, "eval_scitail-pairs-pos_steps_per_second": 1.136, "step": 1680 }, { "epoch": 1.7283950617283952, "eval_scitail-pairs-qa_loss": 0.000824337883386761, "eval_scitail-pairs-qa_runtime": 0.5936, "eval_scitail-pairs-qa_samples_per_second": 215.644, "eval_scitail-pairs-qa_steps_per_second": 1.685, "step": 1680 }, { "epoch": 1.7283950617283952, "eval_xsum-pairs_loss": 0.2713284194469452, "eval_xsum-pairs_runtime": 3.0341, "eval_xsum-pairs_samples_per_second": 42.187, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1680 }, { "epoch": 1.7283950617283952, "eval_sciq_pairs_loss": 0.09531116485595703, "eval_sciq_pairs_runtime": 3.531, "eval_sciq_pairs_samples_per_second": 36.25, "eval_sciq_pairs_steps_per_second": 0.283, "step": 1680 }, { "epoch": 1.7283950617283952, "eval_qasc_pairs_loss": 0.177913635969162, "eval_qasc_pairs_runtime": 0.6155, "eval_qasc_pairs_samples_per_second": 207.962, "eval_qasc_pairs_steps_per_second": 1.625, "step": 1680 }, { "epoch": 1.7283950617283952, "eval_openbookqa_pairs_loss": 0.8163385987281799, "eval_openbookqa_pairs_runtime": 0.5932, "eval_openbookqa_pairs_samples_per_second": 215.767, "eval_openbookqa_pairs_steps_per_second": 1.686, "step": 1680 }, { "epoch": 1.7283950617283952, "eval_msmarco_pairs_loss": 0.8340157866477966, "eval_msmarco_pairs_runtime": 1.5236, "eval_msmarco_pairs_samples_per_second": 84.012, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 1680 }, { "epoch": 1.7283950617283952, "eval_nq_pairs_loss": 0.7388545274734497, "eval_nq_pairs_runtime": 2.9067, "eval_nq_pairs_samples_per_second": 44.037, "eval_nq_pairs_steps_per_second": 0.344, "step": 1680 }, { "epoch": 1.7283950617283952, "eval_trivia_pairs_loss": 0.7573897838592529, "eval_trivia_pairs_runtime": 3.4382, "eval_trivia_pairs_samples_per_second": 37.228, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1680 }, { "epoch": 1.7283950617283952, "eval_gooaq_pairs_loss": 0.47093483805656433, "eval_gooaq_pairs_runtime": 0.9576, "eval_gooaq_pairs_samples_per_second": 133.674, "eval_gooaq_pairs_steps_per_second": 1.044, "step": 1680 }, { "epoch": 1.7283950617283952, "eval_paws-pos_loss": 0.02307811565697193, "eval_paws-pos_runtime": 0.698, "eval_paws-pos_samples_per_second": 183.381, "eval_paws-pos_steps_per_second": 1.433, "step": 1680 }, { "epoch": 1.7283950617283952, "eval_global_dataset_loss": 0.490926593542099, "eval_global_dataset_runtime": 13.4017, "eval_global_dataset_samples_per_second": 31.041, "eval_global_dataset_steps_per_second": 0.298, "step": 1680 }, { "epoch": 1.7294238683127572, "grad_norm": 11.065298080444336, "learning_rate": 3.040013724840275e-05, "loss": 0.8887, "step": 1681 }, { "epoch": 1.7304526748971192, "grad_norm": 0.5682714581489563, "learning_rate": 3.0388185491944807e-05, "loss": 0.0087, "step": 1682 }, { "epoch": 1.7314814814814814, "grad_norm": 0.45284152030944824, "learning_rate": 3.0376222052252462e-05, "loss": 0.0037, "step": 1683 }, { "epoch": 1.7325102880658436, "grad_norm": 5.418968677520752, "learning_rate": 3.0364246949137852e-05, "loss": 0.1736, "step": 1684 }, { "epoch": 1.7335390946502058, "grad_norm": 5.002779006958008, "learning_rate": 3.035226020243243e-05, "loss": 0.1277, "step": 1685 }, { "epoch": 1.734567901234568, "grad_norm": 5.912565231323242, "learning_rate": 3.0340261831986916e-05, "loss": 0.1518, "step": 1686 }, { "epoch": 1.73559670781893, "grad_norm": 15.636818885803223, "learning_rate": 3.032825185767129e-05, "loss": 2.463, "step": 1687 }, { "epoch": 1.736625514403292, "grad_norm": 0.1352023035287857, "learning_rate": 3.0316230299374764e-05, "loss": 0.0016, "step": 1688 }, { "epoch": 1.7376543209876543, "grad_norm": 4.616379261016846, "learning_rate": 3.0304197177005704e-05, "loss": 0.0972, "step": 1689 }, { "epoch": 1.7386831275720165, "grad_norm": 10.943326950073242, "learning_rate": 3.029215251049165e-05, "loss": 0.7593, "step": 1690 }, { "epoch": 1.7397119341563787, "grad_norm": 5.806167125701904, "learning_rate": 3.0280096319779248e-05, "loss": 0.2645, "step": 1691 }, { "epoch": 1.7407407407407407, "grad_norm": 11.589315414428711, "learning_rate": 3.026802862483423e-05, "loss": 0.7249, "step": 1692 }, { "epoch": 1.741769547325103, "grad_norm": 6.475594520568848, "learning_rate": 3.0255949445641384e-05, "loss": 0.2953, "step": 1693 }, { "epoch": 1.742798353909465, "grad_norm": 0.2328501045703888, "learning_rate": 3.0243858802204513e-05, "loss": 0.0028, "step": 1694 }, { "epoch": 1.7438271604938271, "grad_norm": 9.56154727935791, "learning_rate": 3.0231756714546414e-05, "loss": 0.5131, "step": 1695 }, { "epoch": 1.7448559670781894, "grad_norm": 4.136692523956299, "learning_rate": 3.021964320270882e-05, "loss": 0.2149, "step": 1696 }, { "epoch": 1.7458847736625516, "grad_norm": 5.113592147827148, "learning_rate": 3.0207518286752394e-05, "loss": 0.1498, "step": 1697 }, { "epoch": 1.7469135802469136, "grad_norm": 8.567078590393066, "learning_rate": 3.019538198675669e-05, "loss": 0.5195, "step": 1698 }, { "epoch": 1.7479423868312756, "grad_norm": 8.085282325744629, "learning_rate": 3.0183234322820095e-05, "loss": 0.3056, "step": 1699 }, { "epoch": 1.7489711934156378, "grad_norm": 11.967960357666016, "learning_rate": 3.017107531505984e-05, "loss": 0.963, "step": 1700 }, { "epoch": 1.7489711934156378, "eval_Qnli-dev_cosine_accuracy": 0.708984375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7604169845581055, "eval_Qnli-dev_cosine_ap": 0.7527365909888275, "eval_Qnli-dev_cosine_f1": 0.7007299270072993, "eval_Qnli-dev_cosine_f1_threshold": 0.725959300994873, "eval_Qnli-dev_cosine_precision": 0.6153846153846154, "eval_Qnli-dev_cosine_recall": 0.8135593220338984, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 362.0553283691406, "eval_Qnli-dev_dot_ap": 0.6872407956813215, "eval_Qnli-dev_dot_f1": 0.6820428336079077, "eval_Qnli-dev_dot_f1_threshold": 311.60198974609375, "eval_Qnli-dev_dot_precision": 0.5579514824797843, "eval_Qnli-dev_dot_recall": 0.8771186440677966, "eval_Qnli-dev_euclidean_accuracy": 0.716796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.010302543640137, "eval_Qnli-dev_euclidean_ap": 0.7629389626551923, "eval_Qnli-dev_euclidean_f1": 0.7172675521821632, "eval_Qnli-dev_euclidean_f1_threshold": 15.582939147949219, "eval_Qnli-dev_euclidean_precision": 0.6494845360824743, "eval_Qnli-dev_euclidean_recall": 0.8008474576271186, "eval_Qnli-dev_manhattan_accuracy": 0.7265625, "eval_Qnli-dev_manhattan_accuracy_threshold": 311.5965576171875, "eval_Qnli-dev_manhattan_ap": 0.7679652150220635, "eval_Qnli-dev_manhattan_f1": 0.7154471544715446, "eval_Qnli-dev_manhattan_f1_threshold": 314.4476318359375, "eval_Qnli-dev_manhattan_precision": 0.6875, "eval_Qnli-dev_manhattan_recall": 0.7457627118644068, "eval_Qnli-dev_max_accuracy": 0.7265625, "eval_Qnli-dev_max_accuracy_threshold": 362.0553283691406, "eval_Qnli-dev_max_ap": 0.7679652150220635, "eval_Qnli-dev_max_f1": 0.7172675521821632, "eval_Qnli-dev_max_f1_threshold": 314.4476318359375, "eval_Qnli-dev_max_precision": 0.6875, "eval_Qnli-dev_max_recall": 0.8771186440677966, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8628346920013428, "eval_allNLI-dev_cosine_ap": 0.6221178281447802, "eval_allNLI-dev_cosine_f1": 0.6227848101265824, "eval_allNLI-dev_cosine_f1_threshold": 0.7958093881607056, "eval_allNLI-dev_cosine_precision": 0.5540540540540541, "eval_allNLI-dev_cosine_recall": 0.7109826589595376, "eval_allNLI-dev_dot_accuracy": 0.697265625, "eval_allNLI-dev_dot_accuracy_threshold": 435.14715576171875, "eval_allNLI-dev_dot_ap": 0.5705940910885097, "eval_allNLI-dev_dot_f1": 0.5936842105263158, "eval_allNLI-dev_dot_f1_threshold": 340.51177978515625, "eval_allNLI-dev_dot_precision": 0.46688741721854304, "eval_allNLI-dev_dot_recall": 0.815028901734104, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.794170379638672, "eval_allNLI-dev_euclidean_ap": 0.6288270076880096, "eval_allNLI-dev_euclidean_f1": 0.6223277909738717, "eval_allNLI-dev_euclidean_f1_threshold": 14.098909378051758, "eval_allNLI-dev_euclidean_precision": 0.5282258064516129, "eval_allNLI-dev_euclidean_recall": 0.7572254335260116, "eval_allNLI-dev_manhattan_accuracy": 0.728515625, "eval_allNLI-dev_manhattan_accuracy_threshold": 239.71444702148438, "eval_allNLI-dev_manhattan_ap": 0.6284234020719052, "eval_allNLI-dev_manhattan_f1": 0.6225165562913907, "eval_allNLI-dev_manhattan_f1_threshold": 308.7938537597656, "eval_allNLI-dev_manhattan_precision": 0.5035714285714286, "eval_allNLI-dev_manhattan_recall": 0.815028901734104, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 435.14715576171875, "eval_allNLI-dev_max_ap": 0.6288270076880096, "eval_allNLI-dev_max_f1": 0.6227848101265824, "eval_allNLI-dev_max_f1_threshold": 340.51177978515625, "eval_allNLI-dev_max_precision": 0.5540540540540541, "eval_allNLI-dev_max_recall": 0.815028901734104, "eval_sequential_score": 0.7679652150220635, "eval_sts-test_pearson_cosine": 0.8360733316299009, "eval_sts-test_pearson_dot": 0.8174733173178559, "eval_sts-test_pearson_euclidean": 0.8659311063312447, "eval_sts-test_pearson_manhattan": 0.86362358195659, "eval_sts-test_pearson_max": 0.8659311063312447, "eval_sts-test_spearman_cosine": 0.8685504710913611, "eval_sts-test_spearman_dot": 0.8149406975075341, "eval_sts-test_spearman_euclidean": 0.864604483866913, "eval_sts-test_spearman_manhattan": 0.862906576258462, "eval_sts-test_spearman_max": 0.8685504710913611, "eval_vitaminc-pairs_loss": 2.857945442199707, "eval_vitaminc-pairs_runtime": 3.217, "eval_vitaminc-pairs_samples_per_second": 39.789, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 1700 }, { "epoch": 1.7489711934156378, "eval_negation-triplets_loss": 0.948692262172699, "eval_negation-triplets_runtime": 0.7517, "eval_negation-triplets_samples_per_second": 170.288, "eval_negation-triplets_steps_per_second": 1.33, "step": 1700 }, { "epoch": 1.7489711934156378, "eval_scitail-pairs-pos_loss": 0.1663469523191452, "eval_scitail-pairs-pos_runtime": 0.8813, "eval_scitail-pairs-pos_samples_per_second": 145.238, "eval_scitail-pairs-pos_steps_per_second": 1.135, "step": 1700 }, { "epoch": 1.7489711934156378, "eval_scitail-pairs-qa_loss": 0.001054697553627193, "eval_scitail-pairs-qa_runtime": 0.622, "eval_scitail-pairs-qa_samples_per_second": 205.776, "eval_scitail-pairs-qa_steps_per_second": 1.608, "step": 1700 }, { "epoch": 1.7489711934156378, "eval_xsum-pairs_loss": 0.33091622591018677, "eval_xsum-pairs_runtime": 3.0341, "eval_xsum-pairs_samples_per_second": 42.187, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1700 }, { "epoch": 1.7489711934156378, "eval_sciq_pairs_loss": 0.09529092162847519, "eval_sciq_pairs_runtime": 3.4966, "eval_sciq_pairs_samples_per_second": 36.607, "eval_sciq_pairs_steps_per_second": 0.286, "step": 1700 }, { "epoch": 1.7489711934156378, "eval_qasc_pairs_loss": 0.18329016864299774, "eval_qasc_pairs_runtime": 0.616, "eval_qasc_pairs_samples_per_second": 207.803, "eval_qasc_pairs_steps_per_second": 1.623, "step": 1700 }, { "epoch": 1.7489711934156378, "eval_openbookqa_pairs_loss": 0.8162494897842407, "eval_openbookqa_pairs_runtime": 0.5922, "eval_openbookqa_pairs_samples_per_second": 216.145, "eval_openbookqa_pairs_steps_per_second": 1.689, "step": 1700 }, { "epoch": 1.7489711934156378, "eval_msmarco_pairs_loss": 0.8438387513160706, "eval_msmarco_pairs_runtime": 1.5245, "eval_msmarco_pairs_samples_per_second": 83.965, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 1700 }, { "epoch": 1.7489711934156378, "eval_nq_pairs_loss": 0.742292046546936, "eval_nq_pairs_runtime": 2.9204, "eval_nq_pairs_samples_per_second": 43.83, "eval_nq_pairs_steps_per_second": 0.342, "step": 1700 }, { "epoch": 1.7489711934156378, "eval_trivia_pairs_loss": 0.7448931336402893, "eval_trivia_pairs_runtime": 3.4403, "eval_trivia_pairs_samples_per_second": 37.206, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1700 }, { "epoch": 1.7489711934156378, "eval_gooaq_pairs_loss": 0.4432274103164673, "eval_gooaq_pairs_runtime": 0.9523, "eval_gooaq_pairs_samples_per_second": 134.404, "eval_gooaq_pairs_steps_per_second": 1.05, "step": 1700 }, { "epoch": 1.7489711934156378, "eval_paws-pos_loss": 0.02417844533920288, "eval_paws-pos_runtime": 0.7045, "eval_paws-pos_samples_per_second": 181.702, "eval_paws-pos_steps_per_second": 1.42, "step": 1700 }, { "epoch": 1.7489711934156378, "eval_global_dataset_loss": 0.4446592926979065, "eval_global_dataset_runtime": 13.402, "eval_global_dataset_samples_per_second": 31.04, "eval_global_dataset_steps_per_second": 0.298, "step": 1700 }, { "epoch": 1.75, "grad_norm": 6.624517917633057, "learning_rate": 3.015890498361192e-05, "loss": 0.2111, "step": 1701 }, { "epoch": 1.7510288065843622, "grad_norm": 0.6696828007698059, "learning_rate": 3.0146723348631097e-05, "loss": 0.0115, "step": 1702 }, { "epoch": 1.7520576131687244, "grad_norm": 4.012869834899902, "learning_rate": 3.0134530430290853e-05, "loss": 0.2114, "step": 1703 }, { "epoch": 1.7530864197530864, "grad_norm": 8.354767799377441, "learning_rate": 3.0122326248783344e-05, "loss": 0.513, "step": 1704 }, { "epoch": 1.7541152263374484, "grad_norm": 0.6043940186500549, "learning_rate": 3.011011082431938e-05, "loss": 0.0274, "step": 1705 }, { "epoch": 1.7551440329218106, "grad_norm": 5.665579795837402, "learning_rate": 3.009788417712841e-05, "loss": 0.2203, "step": 1706 }, { "epoch": 1.7561728395061729, "grad_norm": 5.739221096038818, "learning_rate": 3.0085646327458443e-05, "loss": 0.2425, "step": 1707 }, { "epoch": 1.757201646090535, "grad_norm": 9.040894508361816, "learning_rate": 3.0073397295576058e-05, "loss": 0.5636, "step": 1708 }, { "epoch": 1.758230452674897, "grad_norm": 9.729278564453125, "learning_rate": 3.0061137101766337e-05, "loss": 0.5244, "step": 1709 }, { "epoch": 1.7592592592592593, "grad_norm": 8.09842300415039, "learning_rate": 3.0048865766332856e-05, "loss": 0.6237, "step": 1710 }, { "epoch": 1.7602880658436213, "grad_norm": 9.805746078491211, "learning_rate": 3.0036583309597647e-05, "loss": 0.7565, "step": 1711 }, { "epoch": 1.7613168724279835, "grad_norm": 7.657242298126221, "learning_rate": 3.0024289751901142e-05, "loss": 0.3421, "step": 1712 }, { "epoch": 1.7623456790123457, "grad_norm": 5.437000274658203, "learning_rate": 3.0011985113602184e-05, "loss": 0.2019, "step": 1713 }, { "epoch": 1.763374485596708, "grad_norm": 0.8471786379814148, "learning_rate": 2.999966941507794e-05, "loss": 0.0141, "step": 1714 }, { "epoch": 1.76440329218107, "grad_norm": 11.529598236083984, "learning_rate": 2.99873426767239e-05, "loss": 1.0066, "step": 1715 }, { "epoch": 1.765432098765432, "grad_norm": 7.448866367340088, "learning_rate": 2.997500491895385e-05, "loss": 0.375, "step": 1716 }, { "epoch": 1.7664609053497942, "grad_norm": 8.113354682922363, "learning_rate": 2.9962656162199813e-05, "loss": 0.7385, "step": 1717 }, { "epoch": 1.7674897119341564, "grad_norm": 13.676114082336426, "learning_rate": 2.995029642691202e-05, "loss": 1.9866, "step": 1718 }, { "epoch": 1.7685185185185186, "grad_norm": 11.367154121398926, "learning_rate": 2.9937925733558906e-05, "loss": 0.9198, "step": 1719 }, { "epoch": 1.7695473251028808, "grad_norm": 5.2121758460998535, "learning_rate": 2.992554410262703e-05, "loss": 0.2244, "step": 1720 }, { "epoch": 1.7695473251028808, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7742512226104736, "eval_Qnli-dev_cosine_ap": 0.7511678896557296, "eval_Qnli-dev_cosine_f1": 0.7015706806282722, "eval_Qnli-dev_cosine_f1_threshold": 0.7171704769134521, "eval_Qnli-dev_cosine_precision": 0.5964391691394659, "eval_Qnli-dev_cosine_recall": 0.8516949152542372, "eval_Qnli-dev_dot_accuracy": 0.66796875, "eval_Qnli-dev_dot_accuracy_threshold": 367.8869934082031, "eval_Qnli-dev_dot_ap": 0.6853356083866112, "eval_Qnli-dev_dot_f1": 0.6743421052631579, "eval_Qnli-dev_dot_f1_threshold": 324.0341796875, "eval_Qnli-dev_dot_precision": 0.5510752688172043, "eval_Qnli-dev_dot_recall": 0.8686440677966102, "eval_Qnli-dev_euclidean_accuracy": 0.72265625, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.813480377197266, "eval_Qnli-dev_euclidean_ap": 0.7629628392039234, "eval_Qnli-dev_euclidean_f1": 0.712871287128713, "eval_Qnli-dev_euclidean_f1_threshold": 15.134451866149902, "eval_Qnli-dev_euclidean_precision": 0.6691449814126395, "eval_Qnli-dev_euclidean_recall": 0.7627118644067796, "eval_Qnli-dev_manhattan_accuracy": 0.720703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 311.506103515625, "eval_Qnli-dev_manhattan_ap": 0.7642876785648607, "eval_Qnli-dev_manhattan_f1": 0.7161904761904763, "eval_Qnli-dev_manhattan_f1_threshold": 325.3240051269531, "eval_Qnli-dev_manhattan_precision": 0.6505190311418685, "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, "eval_Qnli-dev_max_accuracy": 0.72265625, "eval_Qnli-dev_max_accuracy_threshold": 367.8869934082031, "eval_Qnli-dev_max_ap": 0.7642876785648607, "eval_Qnli-dev_max_f1": 0.7161904761904763, "eval_Qnli-dev_max_f1_threshold": 325.3240051269531, "eval_Qnli-dev_max_precision": 0.6691449814126395, "eval_Qnli-dev_max_recall": 0.8686440677966102, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8379455804824829, "eval_allNLI-dev_cosine_ap": 0.6147320671005745, "eval_allNLI-dev_cosine_f1": 0.6237006237006237, "eval_allNLI-dev_cosine_f1_threshold": 0.7275093793869019, "eval_allNLI-dev_cosine_precision": 0.487012987012987, "eval_allNLI-dev_cosine_recall": 0.8670520231213873, "eval_allNLI-dev_dot_accuracy": 0.697265625, "eval_allNLI-dev_dot_accuracy_threshold": 425.1505432128906, "eval_allNLI-dev_dot_ap": 0.570508767236879, "eval_allNLI-dev_dot_f1": 0.5986078886310906, "eval_allNLI-dev_dot_f1_threshold": 344.5698547363281, "eval_allNLI-dev_dot_precision": 0.5, "eval_allNLI-dev_dot_recall": 0.7456647398843931, "eval_allNLI-dev_euclidean_accuracy": 0.7265625, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.189596176147461, "eval_allNLI-dev_euclidean_ap": 0.6212248418652493, "eval_allNLI-dev_euclidean_f1": 0.6286919831223629, "eval_allNLI-dev_euclidean_f1_threshold": 15.712522506713867, "eval_allNLI-dev_euclidean_precision": 0.4950166112956811, "eval_allNLI-dev_euclidean_recall": 0.861271676300578, "eval_allNLI-dev_manhattan_accuracy": 0.72265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 261.0384521484375, "eval_allNLI-dev_manhattan_ap": 0.6211968962942763, "eval_allNLI-dev_manhattan_f1": 0.634453781512605, "eval_allNLI-dev_manhattan_f1_threshold": 329.73175048828125, "eval_allNLI-dev_manhattan_precision": 0.49834983498349833, "eval_allNLI-dev_manhattan_recall": 0.8728323699421965, "eval_allNLI-dev_max_accuracy": 0.7265625, "eval_allNLI-dev_max_accuracy_threshold": 425.1505432128906, "eval_allNLI-dev_max_ap": 0.6212248418652493, "eval_allNLI-dev_max_f1": 0.634453781512605, "eval_allNLI-dev_max_f1_threshold": 344.5698547363281, "eval_allNLI-dev_max_precision": 0.5, "eval_allNLI-dev_max_recall": 0.8728323699421965, "eval_sequential_score": 0.7642876785648607, "eval_sts-test_pearson_cosine": 0.8467679944231753, "eval_sts-test_pearson_dot": 0.8348341822053502, "eval_sts-test_pearson_euclidean": 0.8741510716771601, "eval_sts-test_pearson_manhattan": 0.8712477302983147, "eval_sts-test_pearson_max": 0.8741510716771601, "eval_sts-test_spearman_cosine": 0.8743986106812506, "eval_sts-test_spearman_dot": 0.8325327316193228, "eval_sts-test_spearman_euclidean": 0.8708555638143707, "eval_sts-test_spearman_manhattan": 0.8677390412198123, "eval_sts-test_spearman_max": 0.8743986106812506, "eval_vitaminc-pairs_loss": 2.8903887271881104, "eval_vitaminc-pairs_runtime": 3.2232, "eval_vitaminc-pairs_samples_per_second": 39.712, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 1720 }, { "epoch": 1.7695473251028808, "eval_negation-triplets_loss": 0.9841980338096619, "eval_negation-triplets_runtime": 0.7681, "eval_negation-triplets_samples_per_second": 166.649, "eval_negation-triplets_steps_per_second": 1.302, "step": 1720 }, { "epoch": 1.7695473251028808, "eval_scitail-pairs-pos_loss": 0.15102441608905792, "eval_scitail-pairs-pos_runtime": 0.8832, "eval_scitail-pairs-pos_samples_per_second": 144.923, "eval_scitail-pairs-pos_steps_per_second": 1.132, "step": 1720 }, { "epoch": 1.7695473251028808, "eval_scitail-pairs-qa_loss": 0.0011362830409780145, "eval_scitail-pairs-qa_runtime": 0.6051, "eval_scitail-pairs-qa_samples_per_second": 211.549, "eval_scitail-pairs-qa_steps_per_second": 1.653, "step": 1720 }, { "epoch": 1.7695473251028808, "eval_xsum-pairs_loss": 0.29924851655960083, "eval_xsum-pairs_runtime": 3.0304, "eval_xsum-pairs_samples_per_second": 42.239, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1720 }, { "epoch": 1.7695473251028808, "eval_sciq_pairs_loss": 0.09244251996278763, "eval_sciq_pairs_runtime": 3.5097, "eval_sciq_pairs_samples_per_second": 36.471, "eval_sciq_pairs_steps_per_second": 0.285, "step": 1720 }, { "epoch": 1.7695473251028808, "eval_qasc_pairs_loss": 0.19185201823711395, "eval_qasc_pairs_runtime": 0.6201, "eval_qasc_pairs_samples_per_second": 206.412, "eval_qasc_pairs_steps_per_second": 1.613, "step": 1720 }, { "epoch": 1.7695473251028808, "eval_openbookqa_pairs_loss": 0.7592263221740723, "eval_openbookqa_pairs_runtime": 0.6012, "eval_openbookqa_pairs_samples_per_second": 212.916, "eval_openbookqa_pairs_steps_per_second": 1.663, "step": 1720 }, { "epoch": 1.7695473251028808, "eval_msmarco_pairs_loss": 0.7812709212303162, "eval_msmarco_pairs_runtime": 1.5207, "eval_msmarco_pairs_samples_per_second": 84.169, "eval_msmarco_pairs_steps_per_second": 0.658, "step": 1720 }, { "epoch": 1.7695473251028808, "eval_nq_pairs_loss": 0.691717803478241, "eval_nq_pairs_runtime": 2.914, "eval_nq_pairs_samples_per_second": 43.926, "eval_nq_pairs_steps_per_second": 0.343, "step": 1720 }, { "epoch": 1.7695473251028808, "eval_trivia_pairs_loss": 0.7600052952766418, "eval_trivia_pairs_runtime": 3.4577, "eval_trivia_pairs_samples_per_second": 37.019, "eval_trivia_pairs_steps_per_second": 0.289, "step": 1720 }, { "epoch": 1.7695473251028808, "eval_gooaq_pairs_loss": 0.44834327697753906, "eval_gooaq_pairs_runtime": 0.9546, "eval_gooaq_pairs_samples_per_second": 134.084, "eval_gooaq_pairs_steps_per_second": 1.048, "step": 1720 }, { "epoch": 1.7695473251028808, "eval_paws-pos_loss": 0.022823384031653404, "eval_paws-pos_runtime": 0.7025, "eval_paws-pos_samples_per_second": 182.197, "eval_paws-pos_steps_per_second": 1.423, "step": 1720 }, { "epoch": 1.7695473251028808, "eval_global_dataset_loss": 0.4340953230857849, "eval_global_dataset_runtime": 13.3972, "eval_global_dataset_samples_per_second": 31.051, "eval_global_dataset_steps_per_second": 0.299, "step": 1720 }, { "epoch": 1.7705761316872428, "grad_norm": 8.054941177368164, "learning_rate": 2.9913151554621073e-05, "loss": 0.5082, "step": 1721 }, { "epoch": 1.7716049382716048, "grad_norm": 6.68974494934082, "learning_rate": 2.9900748110063805e-05, "loss": 0.4326, "step": 1722 }, { "epoch": 1.772633744855967, "grad_norm": 4.167799472808838, "learning_rate": 2.988833378949602e-05, "loss": 0.1983, "step": 1723 }, { "epoch": 1.7736625514403292, "grad_norm": 1.2141571044921875, "learning_rate": 2.987590861347654e-05, "loss": 0.0274, "step": 1724 }, { "epoch": 1.7746913580246915, "grad_norm": 4.751448631286621, "learning_rate": 2.9863472602582166e-05, "loss": 0.1137, "step": 1725 }, { "epoch": 1.7757201646090535, "grad_norm": 4.7939043045043945, "learning_rate": 2.9851025777407626e-05, "loss": 0.1116, "step": 1726 }, { "epoch": 1.7767489711934157, "grad_norm": 3.551082134246826, "learning_rate": 2.9838568158565572e-05, "loss": 0.092, "step": 1727 }, { "epoch": 1.7777777777777777, "grad_norm": 6.242792129516602, "learning_rate": 2.9826099766686522e-05, "loss": 0.1858, "step": 1728 }, { "epoch": 1.77880658436214, "grad_norm": 8.63688850402832, "learning_rate": 2.9813620622418844e-05, "loss": 0.6419, "step": 1729 }, { "epoch": 1.7798353909465021, "grad_norm": 4.686567783355713, "learning_rate": 2.9801130746428707e-05, "loss": 0.1703, "step": 1730 }, { "epoch": 1.7808641975308643, "grad_norm": 12.06086254119873, "learning_rate": 2.9788630159400047e-05, "loss": 0.7053, "step": 1731 }, { "epoch": 1.7818930041152263, "grad_norm": 15.538905143737793, "learning_rate": 2.9776118882034548e-05, "loss": 2.1567, "step": 1732 }, { "epoch": 1.7829218106995883, "grad_norm": 8.181844711303711, "learning_rate": 2.9763596935051593e-05, "loss": 0.4677, "step": 1733 }, { "epoch": 1.7839506172839505, "grad_norm": 6.751974582672119, "learning_rate": 2.975106433918823e-05, "loss": 0.4804, "step": 1734 }, { "epoch": 1.7849794238683128, "grad_norm": 8.886804580688477, "learning_rate": 2.9738521115199155e-05, "loss": 0.5776, "step": 1735 }, { "epoch": 1.786008230452675, "grad_norm": 10.950471878051758, "learning_rate": 2.9725967283856647e-05, "loss": 0.6962, "step": 1736 }, { "epoch": 1.7870370370370372, "grad_norm": 9.701720237731934, "learning_rate": 2.971340286595057e-05, "loss": 0.6357, "step": 1737 }, { "epoch": 1.7880658436213992, "grad_norm": 7.431054592132568, "learning_rate": 2.9700827882288304e-05, "loss": 0.3795, "step": 1738 }, { "epoch": 1.7890946502057612, "grad_norm": 6.998332500457764, "learning_rate": 2.968824235369474e-05, "loss": 0.3097, "step": 1739 }, { "epoch": 1.7901234567901234, "grad_norm": 5.758817672729492, "learning_rate": 2.9675646301012223e-05, "loss": 0.1951, "step": 1740 }, { "epoch": 1.7901234567901234, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7681889533996582, "eval_Qnli-dev_cosine_ap": 0.7517988022702223, "eval_Qnli-dev_cosine_f1": 0.6947368421052632, "eval_Qnli-dev_cosine_f1_threshold": 0.7106826901435852, "eval_Qnli-dev_cosine_precision": 0.592814371257485, "eval_Qnli-dev_cosine_recall": 0.8389830508474576, "eval_Qnli-dev_dot_accuracy": 0.66796875, "eval_Qnli-dev_dot_accuracy_threshold": 350.132080078125, "eval_Qnli-dev_dot_ap": 0.6854254214315277, "eval_Qnli-dev_dot_f1": 0.6731078904991948, "eval_Qnli-dev_dot_f1_threshold": 299.95361328125, "eval_Qnli-dev_dot_precision": 0.5428571428571428, "eval_Qnli-dev_dot_recall": 0.885593220338983, "eval_Qnli-dev_euclidean_accuracy": 0.71875, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.489816665649414, "eval_Qnli-dev_euclidean_ap": 0.7624602971280614, "eval_Qnli-dev_euclidean_f1": 0.699619771863118, "eval_Qnli-dev_euclidean_f1_threshold": 15.463302612304688, "eval_Qnli-dev_euclidean_precision": 0.6344827586206897, "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, "eval_Qnli-dev_manhattan_accuracy": 0.724609375, "eval_Qnli-dev_manhattan_accuracy_threshold": 299.10162353515625, "eval_Qnli-dev_manhattan_ap": 0.7635226389705461, "eval_Qnli-dev_manhattan_f1": 0.7079646017699116, "eval_Qnli-dev_manhattan_f1_threshold": 339.8573913574219, "eval_Qnli-dev_manhattan_precision": 0.60790273556231, "eval_Qnli-dev_manhattan_recall": 0.847457627118644, "eval_Qnli-dev_max_accuracy": 0.724609375, "eval_Qnli-dev_max_accuracy_threshold": 350.132080078125, "eval_Qnli-dev_max_ap": 0.7635226389705461, "eval_Qnli-dev_max_f1": 0.7079646017699116, "eval_Qnli-dev_max_f1_threshold": 339.8573913574219, "eval_Qnli-dev_max_precision": 0.6344827586206897, "eval_Qnli-dev_max_recall": 0.885593220338983, "eval_allNLI-dev_cosine_accuracy": 0.72265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8514525890350342, "eval_allNLI-dev_cosine_ap": 0.6115432379651945, "eval_allNLI-dev_cosine_f1": 0.6218905472636818, "eval_allNLI-dev_cosine_f1_threshold": 0.7630959153175354, "eval_allNLI-dev_cosine_precision": 0.5458515283842795, "eval_allNLI-dev_cosine_recall": 0.7225433526011561, "eval_allNLI-dev_dot_accuracy": 0.6953125, "eval_allNLI-dev_dot_accuracy_threshold": 370.92681884765625, "eval_allNLI-dev_dot_ap": 0.5501280898628993, "eval_allNLI-dev_dot_f1": 0.5908096280087528, "eval_allNLI-dev_dot_f1_threshold": 313.216552734375, "eval_allNLI-dev_dot_precision": 0.4753521126760563, "eval_allNLI-dev_dot_recall": 0.7803468208092486, "eval_allNLI-dev_euclidean_accuracy": 0.7265625, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.800989151000977, "eval_allNLI-dev_euclidean_ap": 0.6186842072041568, "eval_allNLI-dev_euclidean_f1": 0.6338028169014085, "eval_allNLI-dev_euclidean_f1_threshold": 14.879294395446777, "eval_allNLI-dev_euclidean_precision": 0.5335968379446641, "eval_allNLI-dev_euclidean_recall": 0.7803468208092486, "eval_allNLI-dev_manhattan_accuracy": 0.72265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 247.43714904785156, "eval_allNLI-dev_manhattan_ap": 0.6174363481742949, "eval_allNLI-dev_manhattan_f1": 0.6382978723404256, "eval_allNLI-dev_manhattan_f1_threshold": 305.5586853027344, "eval_allNLI-dev_manhattan_precision": 0.54, "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, "eval_allNLI-dev_max_accuracy": 0.7265625, "eval_allNLI-dev_max_accuracy_threshold": 370.92681884765625, "eval_allNLI-dev_max_ap": 0.6186842072041568, "eval_allNLI-dev_max_f1": 0.6382978723404256, "eval_allNLI-dev_max_f1_threshold": 313.216552734375, "eval_allNLI-dev_max_precision": 0.5458515283842795, "eval_allNLI-dev_max_recall": 0.7803468208092486, "eval_sequential_score": 0.7635226389705461, "eval_sts-test_pearson_cosine": 0.8418497367623062, "eval_sts-test_pearson_dot": 0.8319914987981861, "eval_sts-test_pearson_euclidean": 0.870628075813072, "eval_sts-test_pearson_manhattan": 0.8687366912778405, "eval_sts-test_pearson_max": 0.870628075813072, "eval_sts-test_spearman_cosine": 0.8742475304227931, "eval_sts-test_spearman_dot": 0.8359519964971941, "eval_sts-test_spearman_euclidean": 0.8691112043965953, "eval_sts-test_spearman_manhattan": 0.866967350468379, "eval_sts-test_spearman_max": 0.8742475304227931, "eval_vitaminc-pairs_loss": 2.995783805847168, "eval_vitaminc-pairs_runtime": 3.2157, "eval_vitaminc-pairs_samples_per_second": 39.804, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 1740 }, { "epoch": 1.7901234567901234, "eval_negation-triplets_loss": 0.9560091495513916, "eval_negation-triplets_runtime": 0.7561, "eval_negation-triplets_samples_per_second": 169.294, "eval_negation-triplets_steps_per_second": 1.323, "step": 1740 }, { "epoch": 1.7901234567901234, "eval_scitail-pairs-pos_loss": 0.15315373241901398, "eval_scitail-pairs-pos_runtime": 0.9068, "eval_scitail-pairs-pos_samples_per_second": 141.158, "eval_scitail-pairs-pos_steps_per_second": 1.103, "step": 1740 }, { "epoch": 1.7901234567901234, "eval_scitail-pairs-qa_loss": 0.0012944067129865289, "eval_scitail-pairs-qa_runtime": 0.6418, "eval_scitail-pairs-qa_samples_per_second": 199.45, "eval_scitail-pairs-qa_steps_per_second": 1.558, "step": 1740 }, { "epoch": 1.7901234567901234, "eval_xsum-pairs_loss": 0.28352904319763184, "eval_xsum-pairs_runtime": 3.084, "eval_xsum-pairs_samples_per_second": 41.505, "eval_xsum-pairs_steps_per_second": 0.324, "step": 1740 }, { "epoch": 1.7901234567901234, "eval_sciq_pairs_loss": 0.09403456002473831, "eval_sciq_pairs_runtime": 3.5226, "eval_sciq_pairs_samples_per_second": 36.336, "eval_sciq_pairs_steps_per_second": 0.284, "step": 1740 }, { "epoch": 1.7901234567901234, "eval_qasc_pairs_loss": 0.17431268095970154, "eval_qasc_pairs_runtime": 0.6405, "eval_qasc_pairs_samples_per_second": 199.85, "eval_qasc_pairs_steps_per_second": 1.561, "step": 1740 }, { "epoch": 1.7901234567901234, "eval_openbookqa_pairs_loss": 0.7557939887046814, "eval_openbookqa_pairs_runtime": 0.6024, "eval_openbookqa_pairs_samples_per_second": 212.501, "eval_openbookqa_pairs_steps_per_second": 1.66, "step": 1740 }, { "epoch": 1.7901234567901234, "eval_msmarco_pairs_loss": 0.8533817529678345, "eval_msmarco_pairs_runtime": 1.5256, "eval_msmarco_pairs_samples_per_second": 83.902, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 1740 }, { "epoch": 1.7901234567901234, "eval_nq_pairs_loss": 0.7863667607307434, "eval_nq_pairs_runtime": 2.9002, "eval_nq_pairs_samples_per_second": 44.135, "eval_nq_pairs_steps_per_second": 0.345, "step": 1740 }, { "epoch": 1.7901234567901234, "eval_trivia_pairs_loss": 0.7835397124290466, "eval_trivia_pairs_runtime": 3.4388, "eval_trivia_pairs_samples_per_second": 37.222, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1740 }, { "epoch": 1.7901234567901234, "eval_gooaq_pairs_loss": 0.37319663166999817, "eval_gooaq_pairs_runtime": 0.9542, "eval_gooaq_pairs_samples_per_second": 134.141, "eval_gooaq_pairs_steps_per_second": 1.048, "step": 1740 }, { "epoch": 1.7901234567901234, "eval_paws-pos_loss": 0.021576495841145515, "eval_paws-pos_runtime": 0.7028, "eval_paws-pos_samples_per_second": 182.126, "eval_paws-pos_steps_per_second": 1.423, "step": 1740 }, { "epoch": 1.7901234567901234, "eval_global_dataset_loss": 0.4613242745399475, "eval_global_dataset_runtime": 13.3972, "eval_global_dataset_samples_per_second": 31.051, "eval_global_dataset_steps_per_second": 0.299, "step": 1740 }, { "epoch": 1.7911522633744856, "grad_norm": 4.965134143829346, "learning_rate": 2.9663039745100525e-05, "loss": 0.186, "step": 1741 }, { "epoch": 1.7921810699588478, "grad_norm": 8.625872611999512, "learning_rate": 2.9650422706836824e-05, "loss": 0.4295, "step": 1742 }, { "epoch": 1.7932098765432098, "grad_norm": 7.308938026428223, "learning_rate": 2.9637795207115638e-05, "loss": 0.4061, "step": 1743 }, { "epoch": 1.794238683127572, "grad_norm": 6.121941089630127, "learning_rate": 2.962515726684883e-05, "loss": 0.2292, "step": 1744 }, { "epoch": 1.795267489711934, "grad_norm": 5.254240036010742, "learning_rate": 2.9612508906965546e-05, "loss": 0.1824, "step": 1745 }, { "epoch": 1.7962962962962963, "grad_norm": 8.089666366577148, "learning_rate": 2.9599850148412184e-05, "loss": 0.5721, "step": 1746 }, { "epoch": 1.7973251028806585, "grad_norm": 3.1263222694396973, "learning_rate": 2.958718101215236e-05, "loss": 0.0676, "step": 1747 }, { "epoch": 1.7983539094650207, "grad_norm": 11.244380950927734, "learning_rate": 2.957450151916688e-05, "loss": 0.6563, "step": 1748 }, { "epoch": 1.7993827160493827, "grad_norm": 6.730269432067871, "learning_rate": 2.956181169045371e-05, "loss": 0.5341, "step": 1749 }, { "epoch": 1.8004115226337447, "grad_norm": 9.72786808013916, "learning_rate": 2.9549111547027912e-05, "loss": 0.5455, "step": 1750 }, { "epoch": 1.801440329218107, "grad_norm": 4.4380598068237305, "learning_rate": 2.9536401109921654e-05, "loss": 0.1567, "step": 1751 }, { "epoch": 1.8024691358024691, "grad_norm": 10.577998161315918, "learning_rate": 2.952368040018413e-05, "loss": 0.7594, "step": 1752 }, { "epoch": 1.8034979423868314, "grad_norm": 8.436469078063965, "learning_rate": 2.951094943888157e-05, "loss": 0.525, "step": 1753 }, { "epoch": 1.8045267489711934, "grad_norm": 0.7109900116920471, "learning_rate": 2.9498208247097146e-05, "loss": 0.0113, "step": 1754 }, { "epoch": 1.8055555555555556, "grad_norm": 6.9339118003845215, "learning_rate": 2.948545684593101e-05, "loss": 0.2422, "step": 1755 }, { "epoch": 1.8065843621399176, "grad_norm": 4.169605255126953, "learning_rate": 2.947269525650019e-05, "loss": 0.1063, "step": 1756 }, { "epoch": 1.8076131687242798, "grad_norm": 12.122464179992676, "learning_rate": 2.9459923499938614e-05, "loss": 0.9026, "step": 1757 }, { "epoch": 1.808641975308642, "grad_norm": 9.732711791992188, "learning_rate": 2.9447141597397024e-05, "loss": 0.6694, "step": 1758 }, { "epoch": 1.8096707818930042, "grad_norm": 7.252587795257568, "learning_rate": 2.9434349570042973e-05, "loss": 0.3589, "step": 1759 }, { "epoch": 1.8106995884773662, "grad_norm": 7.175245761871338, "learning_rate": 2.942154743906079e-05, "loss": 0.6306, "step": 1760 }, { "epoch": 1.8106995884773662, "eval_Qnli-dev_cosine_accuracy": 0.708984375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7840002179145813, "eval_Qnli-dev_cosine_ap": 0.7449589017070668, "eval_Qnli-dev_cosine_f1": 0.6887417218543046, "eval_Qnli-dev_cosine_f1_threshold": 0.6837900876998901, "eval_Qnli-dev_cosine_precision": 0.5652173913043478, "eval_Qnli-dev_cosine_recall": 0.8813559322033898, "eval_Qnli-dev_dot_accuracy": 0.6640625, "eval_Qnli-dev_dot_accuracy_threshold": 364.4993896484375, "eval_Qnli-dev_dot_ap": 0.6794151451255108, "eval_Qnli-dev_dot_f1": 0.6666666666666667, "eval_Qnli-dev_dot_f1_threshold": 312.6728210449219, "eval_Qnli-dev_dot_precision": 0.5494505494505495, "eval_Qnli-dev_dot_recall": 0.847457627118644, "eval_Qnli-dev_euclidean_accuracy": 0.71484375, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.930747985839844, "eval_Qnli-dev_euclidean_ap": 0.7542818639236084, "eval_Qnli-dev_euclidean_f1": 0.6925925925925925, "eval_Qnli-dev_euclidean_f1_threshold": 15.77761459350586, "eval_Qnli-dev_euclidean_precision": 0.6151315789473685, "eval_Qnli-dev_euclidean_recall": 0.7923728813559322, "eval_Qnli-dev_manhattan_accuracy": 0.712890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 315.01434326171875, "eval_Qnli-dev_manhattan_ap": 0.7559928251905965, "eval_Qnli-dev_manhattan_f1": 0.6969696969696969, "eval_Qnli-dev_manhattan_f1_threshold": 328.4678649902344, "eval_Qnli-dev_manhattan_precision": 0.6301369863013698, "eval_Qnli-dev_manhattan_recall": 0.7796610169491526, "eval_Qnli-dev_max_accuracy": 0.71484375, "eval_Qnli-dev_max_accuracy_threshold": 364.4993896484375, "eval_Qnli-dev_max_ap": 0.7559928251905965, "eval_Qnli-dev_max_f1": 0.6969696969696969, "eval_Qnli-dev_max_f1_threshold": 328.4678649902344, "eval_Qnli-dev_max_precision": 0.6301369863013698, "eval_Qnli-dev_max_recall": 0.8813559322033898, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8826321363449097, "eval_allNLI-dev_cosine_ap": 0.6195459354789438, "eval_allNLI-dev_cosine_f1": 0.6300715990453462, "eval_allNLI-dev_cosine_f1_threshold": 0.7773683071136475, "eval_allNLI-dev_cosine_precision": 0.5365853658536586, "eval_allNLI-dev_cosine_recall": 0.7630057803468208, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 395.90087890625, "eval_allNLI-dev_dot_ap": 0.5531812655258643, "eval_allNLI-dev_dot_f1": 0.5896907216494846, "eval_allNLI-dev_dot_f1_threshold": 334.66156005859375, "eval_allNLI-dev_dot_precision": 0.4583333333333333, "eval_allNLI-dev_dot_recall": 0.8265895953757225, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.04653549194336, "eval_allNLI-dev_euclidean_ap": 0.6236278489279753, "eval_allNLI-dev_euclidean_f1": 0.636144578313253, "eval_allNLI-dev_euclidean_f1_threshold": 14.140835762023926, "eval_allNLI-dev_euclidean_precision": 0.5454545454545454, "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, "eval_allNLI-dev_manhattan_accuracy": 0.7265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 237.88970947265625, "eval_allNLI-dev_manhattan_ap": 0.6230023335049633, "eval_allNLI-dev_manhattan_f1": 0.64, "eval_allNLI-dev_manhattan_f1_threshold": 287.5178527832031, "eval_allNLI-dev_manhattan_precision": 0.5638766519823789, "eval_allNLI-dev_manhattan_recall": 0.7398843930635838, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 395.90087890625, "eval_allNLI-dev_max_ap": 0.6236278489279753, "eval_allNLI-dev_max_f1": 0.64, "eval_allNLI-dev_max_f1_threshold": 334.66156005859375, "eval_allNLI-dev_max_precision": 0.5638766519823789, "eval_allNLI-dev_max_recall": 0.8265895953757225, "eval_sequential_score": 0.7559928251905965, "eval_sts-test_pearson_cosine": 0.8382935547706215, "eval_sts-test_pearson_dot": 0.8278419316801597, "eval_sts-test_pearson_euclidean": 0.8696684036856096, "eval_sts-test_pearson_manhattan": 0.8674603738859493, "eval_sts-test_pearson_max": 0.8696684036856096, "eval_sts-test_spearman_cosine": 0.8744328006449537, "eval_sts-test_spearman_dot": 0.8300299278234861, "eval_sts-test_spearman_euclidean": 0.8689570810523634, "eval_sts-test_spearman_manhattan": 0.8673386516841433, "eval_sts-test_spearman_max": 0.8744328006449537, "eval_vitaminc-pairs_loss": 3.0513458251953125, "eval_vitaminc-pairs_runtime": 3.2912, "eval_vitaminc-pairs_samples_per_second": 38.892, "eval_vitaminc-pairs_steps_per_second": 0.304, "step": 1760 }, { "epoch": 1.8106995884773662, "eval_negation-triplets_loss": 0.9192151427268982, "eval_negation-triplets_runtime": 0.811, "eval_negation-triplets_samples_per_second": 157.83, "eval_negation-triplets_steps_per_second": 1.233, "step": 1760 }, { "epoch": 1.8106995884773662, "eval_scitail-pairs-pos_loss": 0.1447685807943344, "eval_scitail-pairs-pos_runtime": 1.0028, "eval_scitail-pairs-pos_samples_per_second": 127.645, "eval_scitail-pairs-pos_steps_per_second": 0.997, "step": 1760 }, { "epoch": 1.8106995884773662, "eval_scitail-pairs-qa_loss": 0.0013520271750167012, "eval_scitail-pairs-qa_runtime": 0.6095, "eval_scitail-pairs-qa_samples_per_second": 210.022, "eval_scitail-pairs-qa_steps_per_second": 1.641, "step": 1760 }, { "epoch": 1.8106995884773662, "eval_xsum-pairs_loss": 0.3016371428966522, "eval_xsum-pairs_runtime": 3.0867, "eval_xsum-pairs_samples_per_second": 41.468, "eval_xsum-pairs_steps_per_second": 0.324, "step": 1760 }, { "epoch": 1.8106995884773662, "eval_sciq_pairs_loss": 0.09836392104625702, "eval_sciq_pairs_runtime": 3.4914, "eval_sciq_pairs_samples_per_second": 36.662, "eval_sciq_pairs_steps_per_second": 0.286, "step": 1760 }, { "epoch": 1.8106995884773662, "eval_qasc_pairs_loss": 0.1730758398771286, "eval_qasc_pairs_runtime": 0.6186, "eval_qasc_pairs_samples_per_second": 206.907, "eval_qasc_pairs_steps_per_second": 1.616, "step": 1760 }, { "epoch": 1.8106995884773662, "eval_openbookqa_pairs_loss": 0.8175860047340393, "eval_openbookqa_pairs_runtime": 0.6064, "eval_openbookqa_pairs_samples_per_second": 211.072, "eval_openbookqa_pairs_steps_per_second": 1.649, "step": 1760 }, { "epoch": 1.8106995884773662, "eval_msmarco_pairs_loss": 0.8001145720481873, "eval_msmarco_pairs_runtime": 1.5228, "eval_msmarco_pairs_samples_per_second": 84.057, "eval_msmarco_pairs_steps_per_second": 0.657, "step": 1760 }, { "epoch": 1.8106995884773662, "eval_nq_pairs_loss": 0.8027563691139221, "eval_nq_pairs_runtime": 2.8948, "eval_nq_pairs_samples_per_second": 44.218, "eval_nq_pairs_steps_per_second": 0.345, "step": 1760 }, { "epoch": 1.8106995884773662, "eval_trivia_pairs_loss": 0.742946982383728, "eval_trivia_pairs_runtime": 3.442, "eval_trivia_pairs_samples_per_second": 37.187, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1760 }, { "epoch": 1.8106995884773662, "eval_gooaq_pairs_loss": 0.3562733232975006, "eval_gooaq_pairs_runtime": 0.9552, "eval_gooaq_pairs_samples_per_second": 134.005, "eval_gooaq_pairs_steps_per_second": 1.047, "step": 1760 }, { "epoch": 1.8106995884773662, "eval_paws-pos_loss": 0.02172490954399109, "eval_paws-pos_runtime": 0.7012, "eval_paws-pos_samples_per_second": 182.533, "eval_paws-pos_steps_per_second": 1.426, "step": 1760 }, { "epoch": 1.8106995884773662, "eval_global_dataset_loss": 0.47637251019477844, "eval_global_dataset_runtime": 13.3916, "eval_global_dataset_samples_per_second": 31.064, "eval_global_dataset_steps_per_second": 0.299, "step": 1760 }, { "epoch": 1.8117283950617284, "grad_norm": 4.251852035522461, "learning_rate": 2.9408735225651523e-05, "loss": 0.1209, "step": 1761 }, { "epoch": 1.8127572016460904, "grad_norm": 4.266910552978516, "learning_rate": 2.939591295103292e-05, "loss": 0.1031, "step": 1762 }, { "epoch": 1.8137860082304527, "grad_norm": 7.430552959442139, "learning_rate": 2.93830806364394e-05, "loss": 0.3235, "step": 1763 }, { "epoch": 1.8148148148148149, "grad_norm": 0.7937636375427246, "learning_rate": 2.9370238303122e-05, "loss": 0.0158, "step": 1764 }, { "epoch": 1.815843621399177, "grad_norm": 8.257564544677734, "learning_rate": 2.9357385972348348e-05, "loss": 0.3182, "step": 1765 }, { "epoch": 1.816872427983539, "grad_norm": 5.81747579574585, "learning_rate": 2.9344523665402632e-05, "loss": 0.2515, "step": 1766 }, { "epoch": 1.817901234567901, "grad_norm": 0.7533502578735352, "learning_rate": 2.9331651403585563e-05, "loss": 0.009, "step": 1767 }, { "epoch": 1.8189300411522633, "grad_norm": 0.6928431391716003, "learning_rate": 2.9318769208214332e-05, "loss": 0.0098, "step": 1768 }, { "epoch": 1.8199588477366255, "grad_norm": 14.506959915161133, "learning_rate": 2.930587710062258e-05, "loss": 0.9151, "step": 1769 }, { "epoch": 1.8209876543209877, "grad_norm": 0.9877616763114929, "learning_rate": 2.929297510216038e-05, "loss": 0.0175, "step": 1770 }, { "epoch": 1.8220164609053497, "grad_norm": 4.854236125946045, "learning_rate": 2.9280063234194154e-05, "loss": 0.1808, "step": 1771 }, { "epoch": 1.823045267489712, "grad_norm": 3.8956329822540283, "learning_rate": 2.9267141518106698e-05, "loss": 0.0881, "step": 1772 }, { "epoch": 1.824074074074074, "grad_norm": 9.348139762878418, "learning_rate": 2.9254209975297103e-05, "loss": 0.4814, "step": 1773 }, { "epoch": 1.8251028806584362, "grad_norm": 4.507750034332275, "learning_rate": 2.9241268627180734e-05, "loss": 0.0891, "step": 1774 }, { "epoch": 1.8261316872427984, "grad_norm": 4.753886699676514, "learning_rate": 2.92283174951892e-05, "loss": 0.0898, "step": 1775 }, { "epoch": 1.8271604938271606, "grad_norm": 8.55908203125, "learning_rate": 2.92153566007703e-05, "loss": 0.5568, "step": 1776 }, { "epoch": 1.8281893004115226, "grad_norm": 17.864425659179688, "learning_rate": 2.9202385965388013e-05, "loss": 0.1711, "step": 1777 }, { "epoch": 1.8292181069958846, "grad_norm": 6.739587306976318, "learning_rate": 2.918940561052245e-05, "loss": 0.3617, "step": 1778 }, { "epoch": 1.8302469135802468, "grad_norm": 5.804736137390137, "learning_rate": 2.9176415557669798e-05, "loss": 0.2282, "step": 1779 }, { "epoch": 1.831275720164609, "grad_norm": 10.7015962600708, "learning_rate": 2.9163415828342342e-05, "loss": 0.5814, "step": 1780 }, { "epoch": 1.831275720164609, "eval_Qnli-dev_cosine_accuracy": 0.701171875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7543736100196838, "eval_Qnli-dev_cosine_ap": 0.7495294175428003, "eval_Qnli-dev_cosine_f1": 0.6951871657754012, "eval_Qnli-dev_cosine_f1_threshold": 0.6938208341598511, "eval_Qnli-dev_cosine_precision": 0.6, "eval_Qnli-dev_cosine_recall": 0.826271186440678, "eval_Qnli-dev_dot_accuracy": 0.65234375, "eval_Qnli-dev_dot_accuracy_threshold": 371.60345458984375, "eval_Qnli-dev_dot_ap": 0.6817688573950809, "eval_Qnli-dev_dot_f1": 0.678513731825525, "eval_Qnli-dev_dot_f1_threshold": 288.9908142089844, "eval_Qnli-dev_dot_precision": 0.5483028720626631, "eval_Qnli-dev_dot_recall": 0.8898305084745762, "eval_Qnli-dev_euclidean_accuracy": 0.712890625, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.358012199401855, "eval_Qnli-dev_euclidean_ap": 0.7612491219860132, "eval_Qnli-dev_euclidean_f1": 0.6977611940298506, "eval_Qnli-dev_euclidean_f1_threshold": 16.292171478271484, "eval_Qnli-dev_euclidean_precision": 0.6233333333333333, "eval_Qnli-dev_euclidean_recall": 0.7923728813559322, "eval_Qnli-dev_manhattan_accuracy": 0.7109375, "eval_Qnli-dev_manhattan_accuracy_threshold": 302.44561767578125, "eval_Qnli-dev_manhattan_ap": 0.7617218865087008, "eval_Qnli-dev_manhattan_f1": 0.6994535519125683, "eval_Qnli-dev_manhattan_f1_threshold": 346.8835144042969, "eval_Qnli-dev_manhattan_precision": 0.6134185303514377, "eval_Qnli-dev_manhattan_recall": 0.8135593220338984, "eval_Qnli-dev_max_accuracy": 0.712890625, "eval_Qnli-dev_max_accuracy_threshold": 371.60345458984375, "eval_Qnli-dev_max_ap": 0.7617218865087008, "eval_Qnli-dev_max_f1": 0.6994535519125683, "eval_Qnli-dev_max_f1_threshold": 346.8835144042969, "eval_Qnli-dev_max_precision": 0.6233333333333333, "eval_Qnli-dev_max_recall": 0.8898305084745762, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8212969899177551, "eval_allNLI-dev_cosine_ap": 0.6270104138292626, "eval_allNLI-dev_cosine_f1": 0.6271604938271605, "eval_allNLI-dev_cosine_f1_threshold": 0.7626806497573853, "eval_allNLI-dev_cosine_precision": 0.5474137931034483, "eval_allNLI-dev_cosine_recall": 0.7341040462427746, "eval_allNLI-dev_dot_accuracy": 0.70703125, "eval_allNLI-dev_dot_accuracy_threshold": 372.30584716796875, "eval_allNLI-dev_dot_ap": 0.5630877616252019, "eval_allNLI-dev_dot_f1": 0.5948275862068965, "eval_allNLI-dev_dot_f1_threshold": 307.044189453125, "eval_allNLI-dev_dot_precision": 0.4742268041237113, "eval_allNLI-dev_dot_recall": 0.7976878612716763, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.628206253051758, "eval_allNLI-dev_euclidean_ap": 0.6339028521728746, "eval_allNLI-dev_euclidean_f1": 0.6374695863746959, "eval_allNLI-dev_euclidean_f1_threshold": 14.277826309204102, "eval_allNLI-dev_euclidean_precision": 0.5504201680672269, "eval_allNLI-dev_euclidean_recall": 0.7572254335260116, "eval_allNLI-dev_manhattan_accuracy": 0.736328125, "eval_allNLI-dev_manhattan_accuracy_threshold": 274.76416015625, "eval_allNLI-dev_manhattan_ap": 0.6307982484285948, "eval_allNLI-dev_manhattan_f1": 0.6304909560723514, "eval_allNLI-dev_manhattan_f1_threshold": 288.71295166015625, "eval_allNLI-dev_manhattan_precision": 0.5700934579439252, "eval_allNLI-dev_manhattan_recall": 0.7052023121387283, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 372.30584716796875, "eval_allNLI-dev_max_ap": 0.6339028521728746, "eval_allNLI-dev_max_f1": 0.6374695863746959, "eval_allNLI-dev_max_f1_threshold": 307.044189453125, "eval_allNLI-dev_max_precision": 0.5700934579439252, "eval_allNLI-dev_max_recall": 0.7976878612716763, "eval_sequential_score": 0.7617218865087008, "eval_sts-test_pearson_cosine": 0.8392230551266312, "eval_sts-test_pearson_dot": 0.8331321270854175, "eval_sts-test_pearson_euclidean": 0.868004361861351, "eval_sts-test_pearson_manhattan": 0.8654231624267823, "eval_sts-test_pearson_max": 0.868004361861351, "eval_sts-test_spearman_cosine": 0.8720572246852526, "eval_sts-test_spearman_dot": 0.833290101967111, "eval_sts-test_spearman_euclidean": 0.8665680349629835, "eval_sts-test_spearman_manhattan": 0.8639320066619844, "eval_sts-test_spearman_max": 0.8720572246852526, "eval_vitaminc-pairs_loss": 2.969275712966919, "eval_vitaminc-pairs_runtime": 3.2173, "eval_vitaminc-pairs_samples_per_second": 39.785, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 1780 }, { "epoch": 1.831275720164609, "eval_negation-triplets_loss": 0.9597027897834778, "eval_negation-triplets_runtime": 0.7662, "eval_negation-triplets_samples_per_second": 167.069, "eval_negation-triplets_steps_per_second": 1.305, "step": 1780 }, { "epoch": 1.831275720164609, "eval_scitail-pairs-pos_loss": 0.1277042031288147, "eval_scitail-pairs-pos_runtime": 0.8835, "eval_scitail-pairs-pos_samples_per_second": 144.883, "eval_scitail-pairs-pos_steps_per_second": 1.132, "step": 1780 }, { "epoch": 1.831275720164609, "eval_scitail-pairs-qa_loss": 0.000502650742419064, "eval_scitail-pairs-qa_runtime": 0.6056, "eval_scitail-pairs-qa_samples_per_second": 211.376, "eval_scitail-pairs-qa_steps_per_second": 1.651, "step": 1780 }, { "epoch": 1.831275720164609, "eval_xsum-pairs_loss": 0.2729324698448181, "eval_xsum-pairs_runtime": 3.0278, "eval_xsum-pairs_samples_per_second": 42.274, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1780 }, { "epoch": 1.831275720164609, "eval_sciq_pairs_loss": 0.10411171615123749, "eval_sciq_pairs_runtime": 3.5189, "eval_sciq_pairs_samples_per_second": 36.375, "eval_sciq_pairs_steps_per_second": 0.284, "step": 1780 }, { "epoch": 1.831275720164609, "eval_qasc_pairs_loss": 0.18848010897636414, "eval_qasc_pairs_runtime": 0.6245, "eval_qasc_pairs_samples_per_second": 204.968, "eval_qasc_pairs_steps_per_second": 1.601, "step": 1780 }, { "epoch": 1.831275720164609, "eval_openbookqa_pairs_loss": 0.7955866456031799, "eval_openbookqa_pairs_runtime": 0.5976, "eval_openbookqa_pairs_samples_per_second": 214.183, "eval_openbookqa_pairs_steps_per_second": 1.673, "step": 1780 }, { "epoch": 1.831275720164609, "eval_msmarco_pairs_loss": 0.8654565215110779, "eval_msmarco_pairs_runtime": 1.5268, "eval_msmarco_pairs_samples_per_second": 83.838, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 1780 }, { "epoch": 1.831275720164609, "eval_nq_pairs_loss": 0.7890068888664246, "eval_nq_pairs_runtime": 2.9072, "eval_nq_pairs_samples_per_second": 44.028, "eval_nq_pairs_steps_per_second": 0.344, "step": 1780 }, { "epoch": 1.831275720164609, "eval_trivia_pairs_loss": 0.7905226349830627, "eval_trivia_pairs_runtime": 3.4505, "eval_trivia_pairs_samples_per_second": 37.096, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1780 }, { "epoch": 1.831275720164609, "eval_gooaq_pairs_loss": 0.4708109200000763, "eval_gooaq_pairs_runtime": 0.9715, "eval_gooaq_pairs_samples_per_second": 131.76, "eval_gooaq_pairs_steps_per_second": 1.029, "step": 1780 }, { "epoch": 1.831275720164609, "eval_paws-pos_loss": 0.022015145048499107, "eval_paws-pos_runtime": 0.7045, "eval_paws-pos_samples_per_second": 181.694, "eval_paws-pos_steps_per_second": 1.419, "step": 1780 }, { "epoch": 1.831275720164609, "eval_global_dataset_loss": 0.48972851037979126, "eval_global_dataset_runtime": 13.4186, "eval_global_dataset_samples_per_second": 31.002, "eval_global_dataset_steps_per_second": 0.298, "step": 1780 }, { "epoch": 1.8323045267489713, "grad_norm": 16.824098587036133, "learning_rate": 2.9150406444068354e-05, "loss": 2.3695, "step": 1781 }, { "epoch": 1.8333333333333335, "grad_norm": 7.3217387199401855, "learning_rate": 2.913738742639211e-05, "loss": 0.2563, "step": 1782 }, { "epoch": 1.8343621399176955, "grad_norm": 7.324367523193359, "learning_rate": 2.912435879687385e-05, "loss": 0.3371, "step": 1783 }, { "epoch": 1.8353909465020575, "grad_norm": 9.451821327209473, "learning_rate": 2.911132057708971e-05, "loss": 0.6057, "step": 1784 }, { "epoch": 1.8364197530864197, "grad_norm": 7.3754143714904785, "learning_rate": 2.9098272788631732e-05, "loss": 0.3104, "step": 1785 }, { "epoch": 1.837448559670782, "grad_norm": 4.6265411376953125, "learning_rate": 2.9085215453107785e-05, "loss": 0.1396, "step": 1786 }, { "epoch": 1.8384773662551441, "grad_norm": 4.895910263061523, "learning_rate": 2.9072148592141554e-05, "loss": 0.1254, "step": 1787 }, { "epoch": 1.8395061728395061, "grad_norm": 12.90682315826416, "learning_rate": 2.90590722273725e-05, "loss": 0.7074, "step": 1788 }, { "epoch": 1.8405349794238683, "grad_norm": 9.511048316955566, "learning_rate": 2.9045986380455827e-05, "loss": 0.602, "step": 1789 }, { "epoch": 1.8415637860082303, "grad_norm": 7.697326183319092, "learning_rate": 2.9032891073062443e-05, "loss": 0.3409, "step": 1790 }, { "epoch": 1.8425925925925926, "grad_norm": 7.475734233856201, "learning_rate": 2.9019786326878906e-05, "loss": 0.3956, "step": 1791 }, { "epoch": 1.8436213991769548, "grad_norm": 8.607638359069824, "learning_rate": 2.9006672163607424e-05, "loss": 0.4256, "step": 1792 }, { "epoch": 1.844650205761317, "grad_norm": 7.571669101715088, "learning_rate": 2.899354860496579e-05, "loss": 0.5065, "step": 1793 }, { "epoch": 1.845679012345679, "grad_norm": 10.287837028503418, "learning_rate": 2.898041567268737e-05, "loss": 0.6154, "step": 1794 }, { "epoch": 1.846707818930041, "grad_norm": 0.782716691493988, "learning_rate": 2.8967273388521022e-05, "loss": 0.0099, "step": 1795 }, { "epoch": 1.8477366255144032, "grad_norm": 6.425576210021973, "learning_rate": 2.8954121774231135e-05, "loss": 0.2757, "step": 1796 }, { "epoch": 1.8487654320987654, "grad_norm": 0.85555499792099, "learning_rate": 2.8940960851597516e-05, "loss": 0.01, "step": 1797 }, { "epoch": 1.8497942386831276, "grad_norm": 4.784443378448486, "learning_rate": 2.89277906424154e-05, "loss": 0.2952, "step": 1798 }, { "epoch": 1.8508230452674899, "grad_norm": 0.6040102243423462, "learning_rate": 2.8914611168495395e-05, "loss": 0.0057, "step": 1799 }, { "epoch": 1.8518518518518519, "grad_norm": 8.821825981140137, "learning_rate": 2.8901422451663457e-05, "loss": 0.4099, "step": 1800 }, { "epoch": 1.8518518518518519, "eval_Qnli-dev_cosine_accuracy": 0.708984375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.783872663974762, "eval_Qnli-dev_cosine_ap": 0.7466087603555676, "eval_Qnli-dev_cosine_f1": 0.693950177935943, "eval_Qnli-dev_cosine_f1_threshold": 0.712569534778595, "eval_Qnli-dev_cosine_precision": 0.598159509202454, "eval_Qnli-dev_cosine_recall": 0.826271186440678, "eval_Qnli-dev_dot_accuracy": 0.650390625, "eval_Qnli-dev_dot_accuracy_threshold": 377.8784484863281, "eval_Qnli-dev_dot_ap": 0.670055076912287, "eval_Qnli-dev_dot_f1": 0.6708860759493672, "eval_Qnli-dev_dot_f1_threshold": 295.422607421875, "eval_Qnli-dev_dot_precision": 0.5353535353535354, "eval_Qnli-dev_dot_recall": 0.8983050847457628, "eval_Qnli-dev_euclidean_accuracy": 0.70703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.184562683105469, "eval_Qnli-dev_euclidean_ap": 0.7566549840766721, "eval_Qnli-dev_euclidean_f1": 0.7007299270072993, "eval_Qnli-dev_euclidean_f1_threshold": 16.043079376220703, "eval_Qnli-dev_euclidean_precision": 0.6153846153846154, "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, "eval_Qnli-dev_manhattan_accuracy": 0.7109375, "eval_Qnli-dev_manhattan_accuracy_threshold": 299.1871337890625, "eval_Qnli-dev_manhattan_ap": 0.7581720001401686, "eval_Qnli-dev_manhattan_f1": 0.7041742286751361, "eval_Qnli-dev_manhattan_f1_threshold": 337.9171142578125, "eval_Qnli-dev_manhattan_precision": 0.6158730158730159, "eval_Qnli-dev_manhattan_recall": 0.8220338983050848, "eval_Qnli-dev_max_accuracy": 0.7109375, "eval_Qnli-dev_max_accuracy_threshold": 377.8784484863281, "eval_Qnli-dev_max_ap": 0.7581720001401686, "eval_Qnli-dev_max_f1": 0.7041742286751361, "eval_Qnli-dev_max_f1_threshold": 337.9171142578125, "eval_Qnli-dev_max_precision": 0.6158730158730159, "eval_Qnli-dev_max_recall": 0.8983050847457628, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8557825088500977, "eval_allNLI-dev_cosine_ap": 0.6226734040257053, "eval_allNLI-dev_cosine_f1": 0.624078624078624, "eval_allNLI-dev_cosine_f1_threshold": 0.7762553095817566, "eval_allNLI-dev_cosine_precision": 0.5427350427350427, "eval_allNLI-dev_cosine_recall": 0.7341040462427746, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 368.6101379394531, "eval_allNLI-dev_dot_ap": 0.5572318941843482, "eval_allNLI-dev_dot_f1": 0.6106194690265486, "eval_allNLI-dev_dot_f1_threshold": 321.041748046875, "eval_allNLI-dev_dot_precision": 0.4946236559139785, "eval_allNLI-dev_dot_recall": 0.7976878612716763, "eval_allNLI-dev_euclidean_accuracy": 0.732421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.30082893371582, "eval_allNLI-dev_euclidean_ap": 0.630235554734977, "eval_allNLI-dev_euclidean_f1": 0.6346153846153846, "eval_allNLI-dev_euclidean_f1_threshold": 14.17033576965332, "eval_allNLI-dev_euclidean_precision": 0.5432098765432098, "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, "eval_allNLI-dev_manhattan_accuracy": 0.7265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 237.4763641357422, "eval_allNLI-dev_manhattan_ap": 0.6276697585643951, "eval_allNLI-dev_manhattan_f1": 0.6357308584686775, "eval_allNLI-dev_manhattan_f1_threshold": 300.2784423828125, "eval_allNLI-dev_manhattan_precision": 0.5310077519379846, "eval_allNLI-dev_manhattan_recall": 0.791907514450867, "eval_allNLI-dev_max_accuracy": 0.732421875, "eval_allNLI-dev_max_accuracy_threshold": 368.6101379394531, "eval_allNLI-dev_max_ap": 0.630235554734977, "eval_allNLI-dev_max_f1": 0.6357308584686775, "eval_allNLI-dev_max_f1_threshold": 321.041748046875, "eval_allNLI-dev_max_precision": 0.5432098765432098, "eval_allNLI-dev_max_recall": 0.7976878612716763, "eval_sequential_score": 0.7581720001401686, "eval_sts-test_pearson_cosine": 0.8405514680248984, "eval_sts-test_pearson_dot": 0.826561548746697, "eval_sts-test_pearson_euclidean": 0.8713469017531787, "eval_sts-test_pearson_manhattan": 0.8678385992177855, "eval_sts-test_pearson_max": 0.8713469017531787, "eval_sts-test_spearman_cosine": 0.874277469016664, "eval_sts-test_spearman_dot": 0.826397333682562, "eval_sts-test_spearman_euclidean": 0.8706232600164878, "eval_sts-test_spearman_manhattan": 0.8674953258502748, "eval_sts-test_spearman_max": 0.874277469016664, "eval_vitaminc-pairs_loss": 3.052903890609741, "eval_vitaminc-pairs_runtime": 3.2066, "eval_vitaminc-pairs_samples_per_second": 39.918, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 1800 }, { "epoch": 1.8518518518518519, "eval_negation-triplets_loss": 0.9271054863929749, "eval_negation-triplets_runtime": 0.7603, "eval_negation-triplets_samples_per_second": 168.349, "eval_negation-triplets_steps_per_second": 1.315, "step": 1800 }, { "epoch": 1.8518518518518519, "eval_scitail-pairs-pos_loss": 0.1117212250828743, "eval_scitail-pairs-pos_runtime": 0.9101, "eval_scitail-pairs-pos_samples_per_second": 140.646, "eval_scitail-pairs-pos_steps_per_second": 1.099, "step": 1800 }, { "epoch": 1.8518518518518519, "eval_scitail-pairs-qa_loss": 0.0008321039495058358, "eval_scitail-pairs-qa_runtime": 0.5988, "eval_scitail-pairs-qa_samples_per_second": 213.773, "eval_scitail-pairs-qa_steps_per_second": 1.67, "step": 1800 }, { "epoch": 1.8518518518518519, "eval_xsum-pairs_loss": 0.2601509392261505, "eval_xsum-pairs_runtime": 3.0239, "eval_xsum-pairs_samples_per_second": 42.329, "eval_xsum-pairs_steps_per_second": 0.331, "step": 1800 }, { "epoch": 1.8518518518518519, "eval_sciq_pairs_loss": 0.09544568508863449, "eval_sciq_pairs_runtime": 3.5424, "eval_sciq_pairs_samples_per_second": 36.133, "eval_sciq_pairs_steps_per_second": 0.282, "step": 1800 }, { "epoch": 1.8518518518518519, "eval_qasc_pairs_loss": 0.16598990559577942, "eval_qasc_pairs_runtime": 0.6288, "eval_qasc_pairs_samples_per_second": 203.558, "eval_qasc_pairs_steps_per_second": 1.59, "step": 1800 }, { "epoch": 1.8518518518518519, "eval_openbookqa_pairs_loss": 0.7461561560630798, "eval_openbookqa_pairs_runtime": 0.6066, "eval_openbookqa_pairs_samples_per_second": 211.02, "eval_openbookqa_pairs_steps_per_second": 1.649, "step": 1800 }, { "epoch": 1.8518518518518519, "eval_msmarco_pairs_loss": 0.8211266994476318, "eval_msmarco_pairs_runtime": 1.5318, "eval_msmarco_pairs_samples_per_second": 83.562, "eval_msmarco_pairs_steps_per_second": 0.653, "step": 1800 }, { "epoch": 1.8518518518518519, "eval_nq_pairs_loss": 0.7967262864112854, "eval_nq_pairs_runtime": 2.9105, "eval_nq_pairs_samples_per_second": 43.979, "eval_nq_pairs_steps_per_second": 0.344, "step": 1800 }, { "epoch": 1.8518518518518519, "eval_trivia_pairs_loss": 0.7438980937004089, "eval_trivia_pairs_runtime": 3.4511, "eval_trivia_pairs_samples_per_second": 37.09, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1800 }, { "epoch": 1.8518518518518519, "eval_gooaq_pairs_loss": 0.4350385367870331, "eval_gooaq_pairs_runtime": 0.9556, "eval_gooaq_pairs_samples_per_second": 133.945, "eval_gooaq_pairs_steps_per_second": 1.046, "step": 1800 }, { "epoch": 1.8518518518518519, "eval_paws-pos_loss": 0.022378094494342804, "eval_paws-pos_runtime": 0.7027, "eval_paws-pos_samples_per_second": 182.149, "eval_paws-pos_steps_per_second": 1.423, "step": 1800 }, { "epoch": 1.8518518518518519, "eval_global_dataset_loss": 0.4930493235588074, "eval_global_dataset_runtime": 13.4096, "eval_global_dataset_samples_per_second": 31.023, "eval_global_dataset_steps_per_second": 0.298, "step": 1800 }, { "epoch": 1.8528806584362139, "grad_norm": 6.235814094543457, "learning_rate": 2.888822451376085e-05, "loss": 0.178, "step": 1801 }, { "epoch": 1.853909465020576, "grad_norm": 5.626006603240967, "learning_rate": 2.8875017376644103e-05, "loss": 0.176, "step": 1802 }, { "epoch": 1.8549382716049383, "grad_norm": 4.305861949920654, "learning_rate": 2.8861801062184983e-05, "loss": 0.0919, "step": 1803 }, { "epoch": 1.8559670781893005, "grad_norm": 5.0377912521362305, "learning_rate": 2.8848575592270457e-05, "loss": 0.2261, "step": 1804 }, { "epoch": 1.8569958847736625, "grad_norm": 3.85387921333313, "learning_rate": 2.8835340988802652e-05, "loss": 0.1215, "step": 1805 }, { "epoch": 1.8580246913580247, "grad_norm": 12.48168659210205, "learning_rate": 2.8822097273698814e-05, "loss": 0.6661, "step": 1806 }, { "epoch": 1.8590534979423867, "grad_norm": 0.6439504027366638, "learning_rate": 2.880884446889129e-05, "loss": 0.0122, "step": 1807 }, { "epoch": 1.860082304526749, "grad_norm": 11.227083206176758, "learning_rate": 2.8795582596327478e-05, "loss": 0.6757, "step": 1808 }, { "epoch": 1.8611111111111112, "grad_norm": 6.3678460121154785, "learning_rate": 2.8782311677969783e-05, "loss": 0.1978, "step": 1809 }, { "epoch": 1.8621399176954734, "grad_norm": 4.8387041091918945, "learning_rate": 2.8769031735795593e-05, "loss": 0.1788, "step": 1810 }, { "epoch": 1.8631687242798354, "grad_norm": 7.737652778625488, "learning_rate": 2.875574279179726e-05, "loss": 0.4695, "step": 1811 }, { "epoch": 1.8641975308641974, "grad_norm": 0.1465805470943451, "learning_rate": 2.8742444867982005e-05, "loss": 0.0018, "step": 1812 }, { "epoch": 1.8652263374485596, "grad_norm": 5.01085090637207, "learning_rate": 2.872913798637196e-05, "loss": 0.1199, "step": 1813 }, { "epoch": 1.8662551440329218, "grad_norm": 10.929647445678711, "learning_rate": 2.871582216900407e-05, "loss": 0.6889, "step": 1814 }, { "epoch": 1.867283950617284, "grad_norm": 5.263504981994629, "learning_rate": 2.870249743793008e-05, "loss": 0.1593, "step": 1815 }, { "epoch": 1.8683127572016462, "grad_norm": 0.27705076336860657, "learning_rate": 2.8689163815216498e-05, "loss": 0.0051, "step": 1816 }, { "epoch": 1.8693415637860082, "grad_norm": 6.253952503204346, "learning_rate": 2.867582132294456e-05, "loss": 0.162, "step": 1817 }, { "epoch": 1.8703703703703702, "grad_norm": 6.236087799072266, "learning_rate": 2.8662469983210184e-05, "loss": 0.1779, "step": 1818 }, { "epoch": 1.8713991769547325, "grad_norm": 6.740966796875, "learning_rate": 2.8649109818123948e-05, "loss": 0.225, "step": 1819 }, { "epoch": 1.8724279835390947, "grad_norm": 5.584411144256592, "learning_rate": 2.8635740849811043e-05, "loss": 0.1736, "step": 1820 }, { "epoch": 1.8724279835390947, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7693237662315369, "eval_Qnli-dev_cosine_ap": 0.7516011618078376, "eval_Qnli-dev_cosine_f1": 0.6892857142857143, "eval_Qnli-dev_cosine_f1_threshold": 0.68800950050354, "eval_Qnli-dev_cosine_precision": 0.595679012345679, "eval_Qnli-dev_cosine_recall": 0.8177966101694916, "eval_Qnli-dev_dot_accuracy": 0.6640625, "eval_Qnli-dev_dot_accuracy_threshold": 342.3182373046875, "eval_Qnli-dev_dot_ap": 0.6960339016150074, "eval_Qnli-dev_dot_f1": 0.6697965571205008, "eval_Qnli-dev_dot_f1_threshold": 267.3514404296875, "eval_Qnli-dev_dot_precision": 0.5310173697270472, "eval_Qnli-dev_dot_recall": 0.9067796610169492, "eval_Qnli-dev_euclidean_accuracy": 0.71484375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.13847541809082, "eval_Qnli-dev_euclidean_ap": 0.7593799713146617, "eval_Qnli-dev_euclidean_f1": 0.6990654205607476, "eval_Qnli-dev_euclidean_f1_threshold": 16.104825973510742, "eval_Qnli-dev_euclidean_precision": 0.6254180602006689, "eval_Qnli-dev_euclidean_recall": 0.7923728813559322, "eval_Qnli-dev_manhattan_accuracy": 0.71875, "eval_Qnli-dev_manhattan_accuracy_threshold": 298.31005859375, "eval_Qnli-dev_manhattan_ap": 0.7636995928661362, "eval_Qnli-dev_manhattan_f1": 0.7071823204419889, "eval_Qnli-dev_manhattan_f1_threshold": 340.46624755859375, "eval_Qnli-dev_manhattan_precision": 0.6254071661237784, "eval_Qnli-dev_manhattan_recall": 0.8135593220338984, "eval_Qnli-dev_max_accuracy": 0.71875, "eval_Qnli-dev_max_accuracy_threshold": 342.3182373046875, "eval_Qnli-dev_max_ap": 0.7636995928661362, "eval_Qnli-dev_max_f1": 0.7071823204419889, "eval_Qnli-dev_max_f1_threshold": 340.46624755859375, "eval_Qnli-dev_max_precision": 0.6254180602006689, "eval_Qnli-dev_max_recall": 0.9067796610169492, "eval_allNLI-dev_cosine_accuracy": 0.72265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8468435406684875, "eval_allNLI-dev_cosine_ap": 0.6198779739889414, "eval_allNLI-dev_cosine_f1": 0.6308068459657702, "eval_allNLI-dev_cosine_f1_threshold": 0.7570561766624451, "eval_allNLI-dev_cosine_precision": 0.5466101694915254, "eval_allNLI-dev_cosine_recall": 0.7456647398843931, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 346.5953674316406, "eval_allNLI-dev_dot_ap": 0.5522378285672351, "eval_allNLI-dev_dot_f1": 0.6, "eval_allNLI-dev_dot_f1_threshold": 292.556640625, "eval_allNLI-dev_dot_precision": 0.48736462093862815, "eval_allNLI-dev_dot_recall": 0.7803468208092486, "eval_allNLI-dev_euclidean_accuracy": 0.728515625, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.238094329833984, "eval_allNLI-dev_euclidean_ap": 0.6264571105036736, "eval_allNLI-dev_euclidean_f1": 0.6344827586206897, "eval_allNLI-dev_euclidean_f1_threshold": 14.68545150756836, "eval_allNLI-dev_euclidean_precision": 0.5267175572519084, "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, "eval_allNLI-dev_manhattan_accuracy": 0.7265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 248.9263916015625, "eval_allNLI-dev_manhattan_ap": 0.6257422625629346, "eval_allNLI-dev_manhattan_f1": 0.638095238095238, "eval_allNLI-dev_manhattan_f1_threshold": 302.66668701171875, "eval_allNLI-dev_manhattan_precision": 0.5425101214574899, "eval_allNLI-dev_manhattan_recall": 0.7745664739884393, "eval_allNLI-dev_max_accuracy": 0.728515625, "eval_allNLI-dev_max_accuracy_threshold": 346.5953674316406, "eval_allNLI-dev_max_ap": 0.6264571105036736, "eval_allNLI-dev_max_f1": 0.638095238095238, "eval_allNLI-dev_max_f1_threshold": 302.66668701171875, "eval_allNLI-dev_max_precision": 0.5466101694915254, "eval_allNLI-dev_max_recall": 0.7976878612716763, "eval_sequential_score": 0.7636995928661362, "eval_sts-test_pearson_cosine": 0.8424731952945942, "eval_sts-test_pearson_dot": 0.8333461802951254, "eval_sts-test_pearson_euclidean": 0.8674274376987042, "eval_sts-test_pearson_manhattan": 0.8628614026838428, "eval_sts-test_pearson_max": 0.8674274376987042, "eval_sts-test_spearman_cosine": 0.8718507423651927, "eval_sts-test_spearman_dot": 0.827070661161457, "eval_sts-test_spearman_euclidean": 0.8662244795292817, "eval_sts-test_spearman_manhattan": 0.8620014372455594, "eval_sts-test_spearman_max": 0.8718507423651927, "eval_vitaminc-pairs_loss": 3.304171323776245, "eval_vitaminc-pairs_runtime": 3.2079, "eval_vitaminc-pairs_samples_per_second": 39.902, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 1820 }, { "epoch": 1.8724279835390947, "eval_negation-triplets_loss": 0.9493024945259094, "eval_negation-triplets_runtime": 0.7585, "eval_negation-triplets_samples_per_second": 168.759, "eval_negation-triplets_steps_per_second": 1.318, "step": 1820 }, { "epoch": 1.8724279835390947, "eval_scitail-pairs-pos_loss": 0.12272996455430984, "eval_scitail-pairs-pos_runtime": 0.9056, "eval_scitail-pairs-pos_samples_per_second": 141.349, "eval_scitail-pairs-pos_steps_per_second": 1.104, "step": 1820 }, { "epoch": 1.8724279835390947, "eval_scitail-pairs-qa_loss": 0.0004829070239793509, "eval_scitail-pairs-qa_runtime": 0.6048, "eval_scitail-pairs-qa_samples_per_second": 211.628, "eval_scitail-pairs-qa_steps_per_second": 1.653, "step": 1820 }, { "epoch": 1.8724279835390947, "eval_xsum-pairs_loss": 0.23644520342350006, "eval_xsum-pairs_runtime": 3.0348, "eval_xsum-pairs_samples_per_second": 42.177, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1820 }, { "epoch": 1.8724279835390947, "eval_sciq_pairs_loss": 0.09980232268571854, "eval_sciq_pairs_runtime": 3.4869, "eval_sciq_pairs_samples_per_second": 36.709, "eval_sciq_pairs_steps_per_second": 0.287, "step": 1820 }, { "epoch": 1.8724279835390947, "eval_qasc_pairs_loss": 0.1501757949590683, "eval_qasc_pairs_runtime": 0.6196, "eval_qasc_pairs_samples_per_second": 206.579, "eval_qasc_pairs_steps_per_second": 1.614, "step": 1820 }, { "epoch": 1.8724279835390947, "eval_openbookqa_pairs_loss": 0.7412326335906982, "eval_openbookqa_pairs_runtime": 0.6034, "eval_openbookqa_pairs_samples_per_second": 212.118, "eval_openbookqa_pairs_steps_per_second": 1.657, "step": 1820 }, { "epoch": 1.8724279835390947, "eval_msmarco_pairs_loss": 0.9512736797332764, "eval_msmarco_pairs_runtime": 1.5248, "eval_msmarco_pairs_samples_per_second": 83.945, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 1820 }, { "epoch": 1.8724279835390947, "eval_nq_pairs_loss": 0.7984183430671692, "eval_nq_pairs_runtime": 2.9011, "eval_nq_pairs_samples_per_second": 44.121, "eval_nq_pairs_steps_per_second": 0.345, "step": 1820 }, { "epoch": 1.8724279835390947, "eval_trivia_pairs_loss": 0.7930619120597839, "eval_trivia_pairs_runtime": 3.4561, "eval_trivia_pairs_samples_per_second": 37.036, "eval_trivia_pairs_steps_per_second": 0.289, "step": 1820 }, { "epoch": 1.8724279835390947, "eval_gooaq_pairs_loss": 0.38777878880500793, "eval_gooaq_pairs_runtime": 0.9603, "eval_gooaq_pairs_samples_per_second": 133.297, "eval_gooaq_pairs_steps_per_second": 1.041, "step": 1820 }, { "epoch": 1.8724279835390947, "eval_paws-pos_loss": 0.022048471495509148, "eval_paws-pos_runtime": 0.7038, "eval_paws-pos_samples_per_second": 181.879, "eval_paws-pos_steps_per_second": 1.421, "step": 1820 }, { "epoch": 1.8724279835390947, "eval_global_dataset_loss": 0.5353642106056213, "eval_global_dataset_runtime": 13.4031, "eval_global_dataset_samples_per_second": 31.038, "eval_global_dataset_steps_per_second": 0.298, "step": 1820 }, { "epoch": 1.873456790123457, "grad_norm": 4.877135276794434, "learning_rate": 2.862236310041123e-05, "loss": 0.1192, "step": 1821 }, { "epoch": 1.874485596707819, "grad_norm": 5.794093608856201, "learning_rate": 2.8608976592078826e-05, "loss": 0.2173, "step": 1822 }, { "epoch": 1.875514403292181, "grad_norm": 11.300149917602539, "learning_rate": 2.8595581346982648e-05, "loss": 0.6745, "step": 1823 }, { "epoch": 1.876543209876543, "grad_norm": 6.0869364738464355, "learning_rate": 2.858217738730597e-05, "loss": 0.1692, "step": 1824 }, { "epoch": 1.8775720164609053, "grad_norm": 9.171350479125977, "learning_rate": 2.8568764735246514e-05, "loss": 0.4567, "step": 1825 }, { "epoch": 1.8786008230452675, "grad_norm": 6.638286113739014, "learning_rate": 2.855534341301639e-05, "loss": 0.2002, "step": 1826 }, { "epoch": 1.8796296296296298, "grad_norm": 10.658404350280762, "learning_rate": 2.8541913442842073e-05, "loss": 0.4919, "step": 1827 }, { "epoch": 1.8806584362139918, "grad_norm": 6.511582851409912, "learning_rate": 2.8528474846964346e-05, "loss": 0.1809, "step": 1828 }, { "epoch": 1.8816872427983538, "grad_norm": 3.9900856018066406, "learning_rate": 2.8515027647638286e-05, "loss": 0.1848, "step": 1829 }, { "epoch": 1.882716049382716, "grad_norm": 9.124105453491211, "learning_rate": 2.850157186713321e-05, "loss": 0.4554, "step": 1830 }, { "epoch": 1.8837448559670782, "grad_norm": 9.636565208435059, "learning_rate": 2.8488107527732665e-05, "loss": 0.6297, "step": 1831 }, { "epoch": 1.8847736625514404, "grad_norm": 8.252805709838867, "learning_rate": 2.8474634651734356e-05, "loss": 0.3771, "step": 1832 }, { "epoch": 1.8858024691358026, "grad_norm": 12.576844215393066, "learning_rate": 2.8461153261450115e-05, "loss": 0.8774, "step": 1833 }, { "epoch": 1.8868312757201646, "grad_norm": 4.328588962554932, "learning_rate": 2.84476633792059e-05, "loss": 0.1061, "step": 1834 }, { "epoch": 1.8878600823045266, "grad_norm": 6.682473182678223, "learning_rate": 2.8434165027341716e-05, "loss": 0.2193, "step": 1835 }, { "epoch": 1.8888888888888888, "grad_norm": 11.43628978729248, "learning_rate": 2.84206582282116e-05, "loss": 0.7368, "step": 1836 }, { "epoch": 1.889917695473251, "grad_norm": 10.52833080291748, "learning_rate": 2.8407143004183572e-05, "loss": 0.4926, "step": 1837 }, { "epoch": 1.8909465020576133, "grad_norm": 0.0, "learning_rate": 2.839361937763961e-05, "loss": 0.0, "step": 1838 }, { "epoch": 1.8919753086419753, "grad_norm": 6.224323272705078, "learning_rate": 2.8380087370975603e-05, "loss": 0.2516, "step": 1839 }, { "epoch": 1.8930041152263375, "grad_norm": 0.0, "learning_rate": 2.8366547006601316e-05, "loss": 0.0, "step": 1840 }, { "epoch": 1.8930041152263375, "eval_Qnli-dev_cosine_accuracy": 0.71484375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7687250971794128, "eval_Qnli-dev_cosine_ap": 0.7531877592950056, "eval_Qnli-dev_cosine_f1": 0.6921739130434782, "eval_Qnli-dev_cosine_f1_threshold": 0.684884786605835, "eval_Qnli-dev_cosine_precision": 0.5870206489675516, "eval_Qnli-dev_cosine_recall": 0.8432203389830508, "eval_Qnli-dev_dot_accuracy": 0.662109375, "eval_Qnli-dev_dot_accuracy_threshold": 363.7455749511719, "eval_Qnli-dev_dot_ap": 0.692932385125726, "eval_Qnli-dev_dot_f1": 0.6666666666666667, "eval_Qnli-dev_dot_f1_threshold": 283.91925048828125, "eval_Qnli-dev_dot_precision": 0.5285359801488834, "eval_Qnli-dev_dot_recall": 0.902542372881356, "eval_Qnli-dev_euclidean_accuracy": 0.72265625, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.425592422485352, "eval_Qnli-dev_euclidean_ap": 0.7613180383995255, "eval_Qnli-dev_euclidean_f1": 0.6973180076628352, "eval_Qnli-dev_euclidean_f1_threshold": 16.067230224609375, "eval_Qnli-dev_euclidean_precision": 0.6363636363636364, "eval_Qnli-dev_euclidean_recall": 0.7711864406779662, "eval_Qnli-dev_manhattan_accuracy": 0.720703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 300.03668212890625, "eval_Qnli-dev_manhattan_ap": 0.7670340428396949, "eval_Qnli-dev_manhattan_f1": 0.7027972027972028, "eval_Qnli-dev_manhattan_f1_threshold": 353.2535400390625, "eval_Qnli-dev_manhattan_precision": 0.5982142857142857, "eval_Qnli-dev_manhattan_recall": 0.8516949152542372, "eval_Qnli-dev_max_accuracy": 0.72265625, "eval_Qnli-dev_max_accuracy_threshold": 363.7455749511719, "eval_Qnli-dev_max_ap": 0.7670340428396949, "eval_Qnli-dev_max_f1": 0.7027972027972028, "eval_Qnli-dev_max_f1_threshold": 353.2535400390625, "eval_Qnli-dev_max_precision": 0.6363636363636364, "eval_Qnli-dev_max_recall": 0.902542372881356, "eval_allNLI-dev_cosine_accuracy": 0.720703125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8587692975997925, "eval_allNLI-dev_cosine_ap": 0.6100745668059124, "eval_allNLI-dev_cosine_f1": 0.6218097447795824, "eval_allNLI-dev_cosine_f1_threshold": 0.767665684223175, "eval_allNLI-dev_cosine_precision": 0.5193798449612403, "eval_allNLI-dev_cosine_recall": 0.7745664739884393, "eval_allNLI-dev_dot_accuracy": 0.701171875, "eval_allNLI-dev_dot_accuracy_threshold": 371.281494140625, "eval_allNLI-dev_dot_ap": 0.5377230025038826, "eval_allNLI-dev_dot_f1": 0.5897435897435898, "eval_allNLI-dev_dot_f1_threshold": 319.8837890625, "eval_allNLI-dev_dot_precision": 0.46779661016949153, "eval_allNLI-dev_dot_recall": 0.7976878612716763, "eval_allNLI-dev_euclidean_accuracy": 0.73046875, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.756385803222656, "eval_allNLI-dev_euclidean_ap": 0.6185203781662596, "eval_allNLI-dev_euclidean_f1": 0.6376146788990825, "eval_allNLI-dev_euclidean_f1_threshold": 14.343099594116211, "eval_allNLI-dev_euclidean_precision": 0.5285171102661597, "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, "eval_allNLI-dev_manhattan_accuracy": 0.7265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 245.87252807617188, "eval_allNLI-dev_manhattan_ap": 0.6186309105779424, "eval_allNLI-dev_manhattan_f1": 0.6303317535545023, "eval_allNLI-dev_manhattan_f1_threshold": 294.0959777832031, "eval_allNLI-dev_manhattan_precision": 0.5341365461847389, "eval_allNLI-dev_manhattan_recall": 0.7687861271676301, "eval_allNLI-dev_max_accuracy": 0.73046875, "eval_allNLI-dev_max_accuracy_threshold": 371.281494140625, "eval_allNLI-dev_max_ap": 0.6186309105779424, "eval_allNLI-dev_max_f1": 0.6376146788990825, "eval_allNLI-dev_max_f1_threshold": 319.8837890625, "eval_allNLI-dev_max_precision": 0.5341365461847389, "eval_allNLI-dev_max_recall": 0.8034682080924855, "eval_sequential_score": 0.7670340428396949, "eval_sts-test_pearson_cosine": 0.8331144980812912, "eval_sts-test_pearson_dot": 0.8017763371072992, "eval_sts-test_pearson_euclidean": 0.8636560345923409, "eval_sts-test_pearson_manhattan": 0.8596943370640907, "eval_sts-test_pearson_max": 0.8636560345923409, "eval_sts-test_spearman_cosine": 0.8641814054417187, "eval_sts-test_spearman_dot": 0.7944243474688627, "eval_sts-test_spearman_euclidean": 0.8618423017600516, "eval_sts-test_spearman_manhattan": 0.857660922900685, "eval_sts-test_spearman_max": 0.8641814054417187, "eval_vitaminc-pairs_loss": 3.11974835395813, "eval_vitaminc-pairs_runtime": 3.204, "eval_vitaminc-pairs_samples_per_second": 39.95, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 1840 }, { "epoch": 1.8930041152263375, "eval_negation-triplets_loss": 0.9159468412399292, "eval_negation-triplets_runtime": 0.7607, "eval_negation-triplets_samples_per_second": 168.268, "eval_negation-triplets_steps_per_second": 1.315, "step": 1840 }, { "epoch": 1.8930041152263375, "eval_scitail-pairs-pos_loss": 0.1481998711824417, "eval_scitail-pairs-pos_runtime": 0.9014, "eval_scitail-pairs-pos_samples_per_second": 141.999, "eval_scitail-pairs-pos_steps_per_second": 1.109, "step": 1840 }, { "epoch": 1.8930041152263375, "eval_scitail-pairs-qa_loss": 0.0006982347113080323, "eval_scitail-pairs-qa_runtime": 0.5961, "eval_scitail-pairs-qa_samples_per_second": 214.721, "eval_scitail-pairs-qa_steps_per_second": 1.678, "step": 1840 }, { "epoch": 1.8930041152263375, "eval_xsum-pairs_loss": 0.27408263087272644, "eval_xsum-pairs_runtime": 3.0322, "eval_xsum-pairs_samples_per_second": 42.213, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1840 }, { "epoch": 1.8930041152263375, "eval_sciq_pairs_loss": 0.09754869341850281, "eval_sciq_pairs_runtime": 3.5107, "eval_sciq_pairs_samples_per_second": 36.46, "eval_sciq_pairs_steps_per_second": 0.285, "step": 1840 }, { "epoch": 1.8930041152263375, "eval_qasc_pairs_loss": 0.1729738712310791, "eval_qasc_pairs_runtime": 0.6227, "eval_qasc_pairs_samples_per_second": 205.565, "eval_qasc_pairs_steps_per_second": 1.606, "step": 1840 }, { "epoch": 1.8930041152263375, "eval_openbookqa_pairs_loss": 0.7929932475090027, "eval_openbookqa_pairs_runtime": 0.5978, "eval_openbookqa_pairs_samples_per_second": 214.131, "eval_openbookqa_pairs_steps_per_second": 1.673, "step": 1840 }, { "epoch": 1.8930041152263375, "eval_msmarco_pairs_loss": 0.9313375949859619, "eval_msmarco_pairs_runtime": 1.5238, "eval_msmarco_pairs_samples_per_second": 84.001, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 1840 }, { "epoch": 1.8930041152263375, "eval_nq_pairs_loss": 0.6981325745582581, "eval_nq_pairs_runtime": 2.9057, "eval_nq_pairs_samples_per_second": 44.052, "eval_nq_pairs_steps_per_second": 0.344, "step": 1840 }, { "epoch": 1.8930041152263375, "eval_trivia_pairs_loss": 0.7952219843864441, "eval_trivia_pairs_runtime": 3.4478, "eval_trivia_pairs_samples_per_second": 37.125, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1840 }, { "epoch": 1.8930041152263375, "eval_gooaq_pairs_loss": 0.4432588815689087, "eval_gooaq_pairs_runtime": 0.9511, "eval_gooaq_pairs_samples_per_second": 134.582, "eval_gooaq_pairs_steps_per_second": 1.051, "step": 1840 }, { "epoch": 1.8930041152263375, "eval_paws-pos_loss": 0.021908223628997803, "eval_paws-pos_runtime": 0.701, "eval_paws-pos_samples_per_second": 182.601, "eval_paws-pos_steps_per_second": 1.427, "step": 1840 }, { "epoch": 1.8930041152263375, "eval_global_dataset_loss": 0.48257753252983093, "eval_global_dataset_runtime": 13.4317, "eval_global_dataset_samples_per_second": 30.971, "eval_global_dataset_steps_per_second": 0.298, "step": 1840 }, { "epoch": 1.8940329218106995, "grad_norm": 6.437840938568115, "learning_rate": 2.8352998306940368e-05, "loss": 0.1917, "step": 1841 }, { "epoch": 1.8950617283950617, "grad_norm": 6.392594337463379, "learning_rate": 2.8339441294430168e-05, "loss": 0.4027, "step": 1842 }, { "epoch": 1.896090534979424, "grad_norm": 5.803440570831299, "learning_rate": 2.8325875991521895e-05, "loss": 0.1881, "step": 1843 }, { "epoch": 1.8971193415637861, "grad_norm": 4.933880805969238, "learning_rate": 2.831230242068046e-05, "loss": 0.1529, "step": 1844 }, { "epoch": 1.8981481481481481, "grad_norm": 5.8727827072143555, "learning_rate": 2.8298720604384458e-05, "loss": 0.172, "step": 1845 }, { "epoch": 1.8991769547325101, "grad_norm": 10.907864570617676, "learning_rate": 2.8285130565126156e-05, "loss": 0.845, "step": 1846 }, { "epoch": 1.9002057613168724, "grad_norm": 0.1463947594165802, "learning_rate": 2.827153232541142e-05, "loss": 0.0012, "step": 1847 }, { "epoch": 1.9012345679012346, "grad_norm": 0.3656369149684906, "learning_rate": 2.8257925907759705e-05, "loss": 0.0182, "step": 1848 }, { "epoch": 1.9022633744855968, "grad_norm": 9.290519714355469, "learning_rate": 2.8244311334704012e-05, "loss": 0.4674, "step": 1849 }, { "epoch": 1.903292181069959, "grad_norm": 0.0, "learning_rate": 2.823068862879084e-05, "loss": 0.0, "step": 1850 }, { "epoch": 1.904320987654321, "grad_norm": 4.353809356689453, "learning_rate": 2.821705781258017e-05, "loss": 0.1461, "step": 1851 }, { "epoch": 1.905349794238683, "grad_norm": 5.968635559082031, "learning_rate": 2.8203418908645396e-05, "loss": 0.2166, "step": 1852 }, { "epoch": 1.9063786008230452, "grad_norm": 4.745438575744629, "learning_rate": 2.8189771939573323e-05, "loss": 0.2023, "step": 1853 }, { "epoch": 1.9074074074074074, "grad_norm": 5.100026607513428, "learning_rate": 2.8176116927964092e-05, "loss": 0.1569, "step": 1854 }, { "epoch": 1.9084362139917697, "grad_norm": 5.529203414916992, "learning_rate": 2.8162453896431182e-05, "loss": 0.1806, "step": 1855 }, { "epoch": 1.9094650205761317, "grad_norm": 0.36942559480667114, "learning_rate": 2.8148782867601348e-05, "loss": 0.0058, "step": 1856 }, { "epoch": 1.9104938271604939, "grad_norm": 7.7156829833984375, "learning_rate": 2.8135103864114582e-05, "loss": 0.5055, "step": 1857 }, { "epoch": 1.9115226337448559, "grad_norm": 4.606091499328613, "learning_rate": 2.8121416908624103e-05, "loss": 0.1331, "step": 1858 }, { "epoch": 1.912551440329218, "grad_norm": 0.5592970252037048, "learning_rate": 2.810772202379626e-05, "loss": 0.0108, "step": 1859 }, { "epoch": 1.9135802469135803, "grad_norm": 4.4713945388793945, "learning_rate": 2.8094019232310574e-05, "loss": 0.1008, "step": 1860 }, { "epoch": 1.9135802469135803, "eval_Qnli-dev_cosine_accuracy": 0.701171875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7632311582565308, "eval_Qnli-dev_cosine_ap": 0.7481703261239094, "eval_Qnli-dev_cosine_f1": 0.6878504672897197, "eval_Qnli-dev_cosine_f1_threshold": 0.7151565551757812, "eval_Qnli-dev_cosine_precision": 0.6153846153846154, "eval_Qnli-dev_cosine_recall": 0.7796610169491526, "eval_Qnli-dev_dot_accuracy": 0.66015625, "eval_Qnli-dev_dot_accuracy_threshold": 352.1583557128906, "eval_Qnli-dev_dot_ap": 0.682747256285489, "eval_Qnli-dev_dot_f1": 0.6666666666666666, "eval_Qnli-dev_dot_f1_threshold": 275.40057373046875, "eval_Qnli-dev_dot_precision": 0.5215311004784688, "eval_Qnli-dev_dot_recall": 0.923728813559322, "eval_Qnli-dev_euclidean_accuracy": 0.712890625, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.104459762573242, "eval_Qnli-dev_euclidean_ap": 0.7591388978562147, "eval_Qnli-dev_euclidean_f1": 0.6881287726358148, "eval_Qnli-dev_euclidean_f1_threshold": 15.602420806884766, "eval_Qnli-dev_euclidean_precision": 0.6551724137931034, "eval_Qnli-dev_euclidean_recall": 0.7245762711864406, "eval_Qnli-dev_manhattan_accuracy": 0.708984375, "eval_Qnli-dev_manhattan_accuracy_threshold": 294.6631774902344, "eval_Qnli-dev_manhattan_ap": 0.7638811662062783, "eval_Qnli-dev_manhattan_f1": 0.7037037037037036, "eval_Qnli-dev_manhattan_f1_threshold": 340.6040344238281, "eval_Qnli-dev_manhattan_precision": 0.625, "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, "eval_Qnli-dev_max_accuracy": 0.712890625, "eval_Qnli-dev_max_accuracy_threshold": 352.1583557128906, "eval_Qnli-dev_max_ap": 0.7638811662062783, "eval_Qnli-dev_max_f1": 0.7037037037037036, "eval_Qnli-dev_max_f1_threshold": 340.6040344238281, "eval_Qnli-dev_max_precision": 0.6551724137931034, "eval_Qnli-dev_max_recall": 0.923728813559322, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8235200643539429, "eval_allNLI-dev_cosine_ap": 0.6227597756748285, "eval_allNLI-dev_cosine_f1": 0.6270783847980997, "eval_allNLI-dev_cosine_f1_threshold": 0.759276807308197, "eval_allNLI-dev_cosine_precision": 0.532258064516129, "eval_allNLI-dev_cosine_recall": 0.7630057803468208, "eval_allNLI-dev_dot_accuracy": 0.693359375, "eval_allNLI-dev_dot_accuracy_threshold": 358.42010498046875, "eval_allNLI-dev_dot_ap": 0.5605397042220421, "eval_allNLI-dev_dot_f1": 0.5943775100401606, "eval_allNLI-dev_dot_f1_threshold": 290.23834228515625, "eval_allNLI-dev_dot_precision": 0.4553846153846154, "eval_allNLI-dev_dot_recall": 0.8554913294797688, "eval_allNLI-dev_euclidean_accuracy": 0.740234375, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.865135192871094, "eval_allNLI-dev_euclidean_ap": 0.6305429650760616, "eval_allNLI-dev_euclidean_f1": 0.6425339366515838, "eval_allNLI-dev_euclidean_f1_threshold": 14.834894180297852, "eval_allNLI-dev_euclidean_precision": 0.5278810408921933, "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, "eval_allNLI-dev_manhattan_accuracy": 0.73046875, "eval_allNLI-dev_manhattan_accuracy_threshold": 244.87319946289062, "eval_allNLI-dev_manhattan_ap": 0.6243970525375466, "eval_allNLI-dev_manhattan_f1": 0.6372093023255814, "eval_allNLI-dev_manhattan_f1_threshold": 308.1102294921875, "eval_allNLI-dev_manhattan_precision": 0.5330739299610895, "eval_allNLI-dev_manhattan_recall": 0.791907514450867, "eval_allNLI-dev_max_accuracy": 0.740234375, "eval_allNLI-dev_max_accuracy_threshold": 358.42010498046875, "eval_allNLI-dev_max_ap": 0.6305429650760616, "eval_allNLI-dev_max_f1": 0.6425339366515838, "eval_allNLI-dev_max_f1_threshold": 308.1102294921875, "eval_allNLI-dev_max_precision": 0.5330739299610895, "eval_allNLI-dev_max_recall": 0.8554913294797688, "eval_sequential_score": 0.7638811662062783, "eval_sts-test_pearson_cosine": 0.8365179097730663, "eval_sts-test_pearson_dot": 0.8110790198540068, "eval_sts-test_pearson_euclidean": 0.8667904044490078, "eval_sts-test_pearson_manhattan": 0.8637204920322059, "eval_sts-test_pearson_max": 0.8667904044490078, "eval_sts-test_spearman_cosine": 0.8700784134090477, "eval_sts-test_spearman_dot": 0.8067599431688792, "eval_sts-test_spearman_euclidean": 0.8667105677305149, "eval_sts-test_spearman_manhattan": 0.8637659794822224, "eval_sts-test_spearman_max": 0.8700784134090477, "eval_vitaminc-pairs_loss": 3.0288238525390625, "eval_vitaminc-pairs_runtime": 3.2392, "eval_vitaminc-pairs_samples_per_second": 39.516, "eval_vitaminc-pairs_steps_per_second": 0.309, "step": 1860 }, { "epoch": 1.9135802469135803, "eval_negation-triplets_loss": 0.9826973676681519, "eval_negation-triplets_runtime": 0.7565, "eval_negation-triplets_samples_per_second": 169.2, "eval_negation-triplets_steps_per_second": 1.322, "step": 1860 }, { "epoch": 1.9135802469135803, "eval_scitail-pairs-pos_loss": 0.14852353930473328, "eval_scitail-pairs-pos_runtime": 0.8986, "eval_scitail-pairs-pos_samples_per_second": 142.44, "eval_scitail-pairs-pos_steps_per_second": 1.113, "step": 1860 }, { "epoch": 1.9135802469135803, "eval_scitail-pairs-qa_loss": 0.0008748102118261158, "eval_scitail-pairs-qa_runtime": 0.6218, "eval_scitail-pairs-qa_samples_per_second": 205.852, "eval_scitail-pairs-qa_steps_per_second": 1.608, "step": 1860 }, { "epoch": 1.9135802469135803, "eval_xsum-pairs_loss": 0.28240200877189636, "eval_xsum-pairs_runtime": 3.0291, "eval_xsum-pairs_samples_per_second": 42.256, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1860 }, { "epoch": 1.9135802469135803, "eval_sciq_pairs_loss": 0.10066033899784088, "eval_sciq_pairs_runtime": 3.5255, "eval_sciq_pairs_samples_per_second": 36.307, "eval_sciq_pairs_steps_per_second": 0.284, "step": 1860 }, { "epoch": 1.9135802469135803, "eval_qasc_pairs_loss": 0.17016766965389252, "eval_qasc_pairs_runtime": 0.6154, "eval_qasc_pairs_samples_per_second": 207.998, "eval_qasc_pairs_steps_per_second": 1.625, "step": 1860 }, { "epoch": 1.9135802469135803, "eval_openbookqa_pairs_loss": 0.7975099086761475, "eval_openbookqa_pairs_runtime": 0.5928, "eval_openbookqa_pairs_samples_per_second": 215.92, "eval_openbookqa_pairs_steps_per_second": 1.687, "step": 1860 }, { "epoch": 1.9135802469135803, "eval_msmarco_pairs_loss": 0.9013682007789612, "eval_msmarco_pairs_runtime": 1.5261, "eval_msmarco_pairs_samples_per_second": 83.872, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 1860 }, { "epoch": 1.9135802469135803, "eval_nq_pairs_loss": 0.7462138533592224, "eval_nq_pairs_runtime": 2.9053, "eval_nq_pairs_samples_per_second": 44.057, "eval_nq_pairs_steps_per_second": 0.344, "step": 1860 }, { "epoch": 1.9135802469135803, "eval_trivia_pairs_loss": 0.8016729354858398, "eval_trivia_pairs_runtime": 3.4494, "eval_trivia_pairs_samples_per_second": 37.108, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1860 }, { "epoch": 1.9135802469135803, "eval_gooaq_pairs_loss": 0.41342735290527344, "eval_gooaq_pairs_runtime": 0.9541, "eval_gooaq_pairs_samples_per_second": 134.158, "eval_gooaq_pairs_steps_per_second": 1.048, "step": 1860 }, { "epoch": 1.9135802469135803, "eval_paws-pos_loss": 0.02217279002070427, "eval_paws-pos_runtime": 0.7057, "eval_paws-pos_samples_per_second": 181.377, "eval_paws-pos_steps_per_second": 1.417, "step": 1860 }, { "epoch": 1.9135802469135803, "eval_global_dataset_loss": 0.46486756205558777, "eval_global_dataset_runtime": 13.4252, "eval_global_dataset_samples_per_second": 30.986, "eval_global_dataset_steps_per_second": 0.298, "step": 1860 }, { "epoch": 1.9146090534979425, "grad_norm": 8.730395317077637, "learning_rate": 2.8080308556859634e-05, "loss": 0.5098, "step": 1861 }, { "epoch": 1.9156378600823045, "grad_norm": 10.401448249816895, "learning_rate": 2.8066590020149104e-05, "loss": 0.5998, "step": 1862 }, { "epoch": 1.9166666666666665, "grad_norm": 6.233475685119629, "learning_rate": 2.805286364489765e-05, "loss": 0.4014, "step": 1863 }, { "epoch": 1.9176954732510287, "grad_norm": 11.021480560302734, "learning_rate": 2.803912945383693e-05, "loss": 0.4707, "step": 1864 }, { "epoch": 1.918724279835391, "grad_norm": 7.44813871383667, "learning_rate": 2.8025387469711535e-05, "loss": 0.6546, "step": 1865 }, { "epoch": 1.9197530864197532, "grad_norm": 5.901599884033203, "learning_rate": 2.8011637715278977e-05, "loss": 0.2037, "step": 1866 }, { "epoch": 1.9207818930041154, "grad_norm": 0.18091580271720886, "learning_rate": 2.7997880213309625e-05, "loss": 0.003, "step": 1867 }, { "epoch": 1.9218106995884774, "grad_norm": 5.083710193634033, "learning_rate": 2.798411498658667e-05, "loss": 0.1487, "step": 1868 }, { "epoch": 1.9228395061728394, "grad_norm": 0.20561951398849487, "learning_rate": 2.7970342057906127e-05, "loss": 0.0033, "step": 1869 }, { "epoch": 1.9238683127572016, "grad_norm": 11.193281173706055, "learning_rate": 2.795656145007673e-05, "loss": 0.6399, "step": 1870 }, { "epoch": 1.9248971193415638, "grad_norm": 5.531721115112305, "learning_rate": 2.794277318591995e-05, "loss": 0.1786, "step": 1871 }, { "epoch": 1.925925925925926, "grad_norm": 0.0, "learning_rate": 2.792897728826993e-05, "loss": 0.0, "step": 1872 }, { "epoch": 1.926954732510288, "grad_norm": 9.208216667175293, "learning_rate": 2.791517377997346e-05, "loss": 0.4316, "step": 1873 }, { "epoch": 1.9279835390946503, "grad_norm": 13.096261024475098, "learning_rate": 2.790136268388993e-05, "loss": 1.7473, "step": 1874 }, { "epoch": 1.9290123456790123, "grad_norm": 7.10617208480835, "learning_rate": 2.78875440228913e-05, "loss": 0.2205, "step": 1875 }, { "epoch": 1.9300411522633745, "grad_norm": 4.60221004486084, "learning_rate": 2.7873717819862048e-05, "loss": 0.1184, "step": 1876 }, { "epoch": 1.9310699588477367, "grad_norm": 13.202059745788574, "learning_rate": 2.7859884097699152e-05, "loss": 0.9313, "step": 1877 }, { "epoch": 1.932098765432099, "grad_norm": 10.612934112548828, "learning_rate": 2.784604287931204e-05, "loss": 0.6615, "step": 1878 }, { "epoch": 1.933127572016461, "grad_norm": 2.505542755126953, "learning_rate": 2.783219418762255e-05, "loss": 0.0461, "step": 1879 }, { "epoch": 1.934156378600823, "grad_norm": 0.5168958902359009, "learning_rate": 2.7818338045564902e-05, "loss": 0.0076, "step": 1880 }, { "epoch": 1.934156378600823, "eval_Qnli-dev_cosine_accuracy": 0.697265625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7700129747390747, "eval_Qnli-dev_cosine_ap": 0.750412858057858, "eval_Qnli-dev_cosine_f1": 0.6950092421441774, "eval_Qnli-dev_cosine_f1_threshold": 0.7146259546279907, "eval_Qnli-dev_cosine_precision": 0.6163934426229508, "eval_Qnli-dev_cosine_recall": 0.7966101694915254, "eval_Qnli-dev_dot_accuracy": 0.654296875, "eval_Qnli-dev_dot_accuracy_threshold": 349.13232421875, "eval_Qnli-dev_dot_ap": 0.690508783570581, "eval_Qnli-dev_dot_f1": 0.6677115987460814, "eval_Qnli-dev_dot_f1_threshold": 288.740478515625, "eval_Qnli-dev_dot_precision": 0.5298507462686567, "eval_Qnli-dev_dot_recall": 0.902542372881356, "eval_Qnli-dev_euclidean_accuracy": 0.701171875, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.20905876159668, "eval_Qnli-dev_euclidean_ap": 0.7609109769968383, "eval_Qnli-dev_euclidean_f1": 0.6998087954110899, "eval_Qnli-dev_euclidean_f1_threshold": 16.04629898071289, "eval_Qnli-dev_euclidean_precision": 0.6376306620209059, "eval_Qnli-dev_euclidean_recall": 0.7754237288135594, "eval_Qnli-dev_manhattan_accuracy": 0.716796875, "eval_Qnli-dev_manhattan_accuracy_threshold": 327.05865478515625, "eval_Qnli-dev_manhattan_ap": 0.7647373667172418, "eval_Qnli-dev_manhattan_f1": 0.7091633466135459, "eval_Qnli-dev_manhattan_f1_threshold": 328.59735107421875, "eval_Qnli-dev_manhattan_precision": 0.6691729323308271, "eval_Qnli-dev_manhattan_recall": 0.7542372881355932, "eval_Qnli-dev_max_accuracy": 0.716796875, "eval_Qnli-dev_max_accuracy_threshold": 349.13232421875, "eval_Qnli-dev_max_ap": 0.7647373667172418, "eval_Qnli-dev_max_f1": 0.7091633466135459, "eval_Qnli-dev_max_f1_threshold": 328.59735107421875, "eval_Qnli-dev_max_precision": 0.6691729323308271, "eval_Qnli-dev_max_recall": 0.902542372881356, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8236054182052612, "eval_allNLI-dev_cosine_ap": 0.629894923701272, "eval_allNLI-dev_cosine_f1": 0.6376146788990825, "eval_allNLI-dev_cosine_f1_threshold": 0.7714892625808716, "eval_allNLI-dev_cosine_precision": 0.5285171102661597, "eval_allNLI-dev_cosine_recall": 0.8034682080924855, "eval_allNLI-dev_dot_accuracy": 0.697265625, "eval_allNLI-dev_dot_accuracy_threshold": 393.950927734375, "eval_allNLI-dev_dot_ap": 0.5528430810804673, "eval_allNLI-dev_dot_f1": 0.6018099547511312, "eval_allNLI-dev_dot_f1_threshold": 328.9024353027344, "eval_allNLI-dev_dot_precision": 0.4944237918215613, "eval_allNLI-dev_dot_recall": 0.7687861271676301, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.846963882446289, "eval_allNLI-dev_euclidean_ap": 0.6363626822602223, "eval_allNLI-dev_euclidean_f1": 0.6433260393873084, "eval_allNLI-dev_euclidean_f1_threshold": 14.617688179016113, "eval_allNLI-dev_euclidean_precision": 0.5176056338028169, "eval_allNLI-dev_euclidean_recall": 0.8497109826589595, "eval_allNLI-dev_manhattan_accuracy": 0.734375, "eval_allNLI-dev_manhattan_accuracy_threshold": 254.07041931152344, "eval_allNLI-dev_manhattan_ap": 0.6308216681329502, "eval_allNLI-dev_manhattan_f1": 0.6460176991150444, "eval_allNLI-dev_manhattan_f1_threshold": 304.54638671875, "eval_allNLI-dev_manhattan_precision": 0.5232974910394266, "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 393.950927734375, "eval_allNLI-dev_max_ap": 0.6363626822602223, "eval_allNLI-dev_max_f1": 0.6460176991150444, "eval_allNLI-dev_max_f1_threshold": 328.9024353027344, "eval_allNLI-dev_max_precision": 0.5285171102661597, "eval_allNLI-dev_max_recall": 0.8497109826589595, "eval_sequential_score": 0.7647373667172418, "eval_sts-test_pearson_cosine": 0.8373667820194939, "eval_sts-test_pearson_dot": 0.8226566862254409, "eval_sts-test_pearson_euclidean": 0.863935550264844, "eval_sts-test_pearson_manhattan": 0.8615154988093832, "eval_sts-test_pearson_max": 0.863935550264844, "eval_sts-test_spearman_cosine": 0.8706524825901765, "eval_sts-test_spearman_dot": 0.8226900496863757, "eval_sts-test_spearman_euclidean": 0.8633828639334505, "eval_sts-test_spearman_manhattan": 0.8609186804761946, "eval_sts-test_spearman_max": 0.8706524825901765, "eval_vitaminc-pairs_loss": 2.9923086166381836, "eval_vitaminc-pairs_runtime": 3.1959, "eval_vitaminc-pairs_samples_per_second": 40.051, "eval_vitaminc-pairs_steps_per_second": 0.313, "step": 1880 }, { "epoch": 1.934156378600823, "eval_negation-triplets_loss": 0.9319506287574768, "eval_negation-triplets_runtime": 0.7545, "eval_negation-triplets_samples_per_second": 169.643, "eval_negation-triplets_steps_per_second": 1.325, "step": 1880 }, { "epoch": 1.934156378600823, "eval_scitail-pairs-pos_loss": 0.13219445943832397, "eval_scitail-pairs-pos_runtime": 0.8936, "eval_scitail-pairs-pos_samples_per_second": 143.233, "eval_scitail-pairs-pos_steps_per_second": 1.119, "step": 1880 }, { "epoch": 1.934156378600823, "eval_scitail-pairs-qa_loss": 0.0009004413150250912, "eval_scitail-pairs-qa_runtime": 0.6049, "eval_scitail-pairs-qa_samples_per_second": 211.609, "eval_scitail-pairs-qa_steps_per_second": 1.653, "step": 1880 }, { "epoch": 1.934156378600823, "eval_xsum-pairs_loss": 0.21593664586544037, "eval_xsum-pairs_runtime": 3.0339, "eval_xsum-pairs_samples_per_second": 42.19, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1880 }, { "epoch": 1.934156378600823, "eval_sciq_pairs_loss": 0.10573841631412506, "eval_sciq_pairs_runtime": 3.5286, "eval_sciq_pairs_samples_per_second": 36.275, "eval_sciq_pairs_steps_per_second": 0.283, "step": 1880 }, { "epoch": 1.934156378600823, "eval_qasc_pairs_loss": 0.1666252315044403, "eval_qasc_pairs_runtime": 0.6395, "eval_qasc_pairs_samples_per_second": 200.154, "eval_qasc_pairs_steps_per_second": 1.564, "step": 1880 }, { "epoch": 1.934156378600823, "eval_openbookqa_pairs_loss": 0.7786993980407715, "eval_openbookqa_pairs_runtime": 0.6074, "eval_openbookqa_pairs_samples_per_second": 210.725, "eval_openbookqa_pairs_steps_per_second": 1.646, "step": 1880 }, { "epoch": 1.934156378600823, "eval_msmarco_pairs_loss": 0.8463943004608154, "eval_msmarco_pairs_runtime": 1.5321, "eval_msmarco_pairs_samples_per_second": 83.544, "eval_msmarco_pairs_steps_per_second": 0.653, "step": 1880 }, { "epoch": 1.934156378600823, "eval_nq_pairs_loss": 0.6969786286354065, "eval_nq_pairs_runtime": 2.9146, "eval_nq_pairs_samples_per_second": 43.918, "eval_nq_pairs_steps_per_second": 0.343, "step": 1880 }, { "epoch": 1.934156378600823, "eval_trivia_pairs_loss": 0.8204184174537659, "eval_trivia_pairs_runtime": 3.4385, "eval_trivia_pairs_samples_per_second": 37.225, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1880 }, { "epoch": 1.934156378600823, "eval_gooaq_pairs_loss": 0.39143991470336914, "eval_gooaq_pairs_runtime": 0.9502, "eval_gooaq_pairs_samples_per_second": 134.711, "eval_gooaq_pairs_steps_per_second": 1.052, "step": 1880 }, { "epoch": 1.934156378600823, "eval_paws-pos_loss": 0.02165652997791767, "eval_paws-pos_runtime": 0.7103, "eval_paws-pos_samples_per_second": 180.204, "eval_paws-pos_steps_per_second": 1.408, "step": 1880 }, { "epoch": 1.934156378600823, "eval_global_dataset_loss": 0.44243207573890686, "eval_global_dataset_runtime": 13.4194, "eval_global_dataset_samples_per_second": 31.0, "eval_global_dataset_steps_per_second": 0.298, "step": 1880 }, { "epoch": 1.9351851851851851, "grad_norm": 11.172481536865234, "learning_rate": 2.7804474476085652e-05, "loss": 0.8086, "step": 1881 }, { "epoch": 1.9362139917695473, "grad_norm": 6.315141677856445, "learning_rate": 2.7790603502143648e-05, "loss": 0.4009, "step": 1882 }, { "epoch": 1.9372427983539096, "grad_norm": 5.0940117835998535, "learning_rate": 2.777672514671002e-05, "loss": 0.1506, "step": 1883 }, { "epoch": 1.9382716049382716, "grad_norm": 10.881324768066406, "learning_rate": 2.77628394327681e-05, "loss": 0.6098, "step": 1884 }, { "epoch": 1.9393004115226338, "grad_norm": 7.277545928955078, "learning_rate": 2.774894638331342e-05, "loss": 0.3716, "step": 1885 }, { "epoch": 1.9403292181069958, "grad_norm": 0.8290622234344482, "learning_rate": 2.7735046021353654e-05, "loss": 0.0106, "step": 1886 }, { "epoch": 1.941358024691358, "grad_norm": 4.880166053771973, "learning_rate": 2.772113836990859e-05, "loss": 0.1585, "step": 1887 }, { "epoch": 1.9423868312757202, "grad_norm": 2.724102020263672, "learning_rate": 2.7707223452010087e-05, "loss": 0.0522, "step": 1888 }, { "epoch": 1.9434156378600824, "grad_norm": 9.703840255737305, "learning_rate": 2.769330129070204e-05, "loss": 0.6374, "step": 1889 }, { "epoch": 1.9444444444444444, "grad_norm": 3.389514446258545, "learning_rate": 2.7679371909040325e-05, "loss": 0.0609, "step": 1890 }, { "epoch": 1.9454732510288066, "grad_norm": 0.0, "learning_rate": 2.766543533009279e-05, "loss": 0.0, "step": 1891 }, { "epoch": 1.9465020576131686, "grad_norm": 11.60602855682373, "learning_rate": 2.7651491576939206e-05, "loss": 0.6847, "step": 1892 }, { "epoch": 1.9475308641975309, "grad_norm": 4.8318281173706055, "learning_rate": 2.7637540672671205e-05, "loss": 0.1891, "step": 1893 }, { "epoch": 1.948559670781893, "grad_norm": 8.430917739868164, "learning_rate": 2.7623582640392285e-05, "loss": 0.4884, "step": 1894 }, { "epoch": 1.9495884773662553, "grad_norm": 5.366695880889893, "learning_rate": 2.760961750321773e-05, "loss": 0.1411, "step": 1895 }, { "epoch": 1.9506172839506173, "grad_norm": 5.996824264526367, "learning_rate": 2.7595645284274608e-05, "loss": 0.1993, "step": 1896 }, { "epoch": 1.9516460905349793, "grad_norm": 11.138838768005371, "learning_rate": 2.758166600670169e-05, "loss": 0.6724, "step": 1897 }, { "epoch": 1.9526748971193415, "grad_norm": 4.469819068908691, "learning_rate": 2.756767969364946e-05, "loss": 0.1213, "step": 1898 }, { "epoch": 1.9537037037037037, "grad_norm": 0.2501906156539917, "learning_rate": 2.7553686368280037e-05, "loss": 0.0021, "step": 1899 }, { "epoch": 1.954732510288066, "grad_norm": 5.7854390144348145, "learning_rate": 2.7539686053767176e-05, "loss": 0.1918, "step": 1900 }, { "epoch": 1.954732510288066, "eval_Qnli-dev_cosine_accuracy": 0.705078125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7991540431976318, "eval_Qnli-dev_cosine_ap": 0.7486645945894437, "eval_Qnli-dev_cosine_f1": 0.6927592954990216, "eval_Qnli-dev_cosine_f1_threshold": 0.739192008972168, "eval_Qnli-dev_cosine_precision": 0.6436363636363637, "eval_Qnli-dev_cosine_recall": 0.75, "eval_Qnli-dev_dot_accuracy": 0.681640625, "eval_Qnli-dev_dot_accuracy_threshold": 352.77490234375, "eval_Qnli-dev_dot_ap": 0.701354132763163, "eval_Qnli-dev_dot_f1": 0.6666666666666667, "eval_Qnli-dev_dot_f1_threshold": 294.2525634765625, "eval_Qnli-dev_dot_precision": 0.5329949238578681, "eval_Qnli-dev_dot_recall": 0.8898305084745762, "eval_Qnli-dev_euclidean_accuracy": 0.720703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.000884056091309, "eval_Qnli-dev_euclidean_ap": 0.7575757639794356, "eval_Qnli-dev_euclidean_f1": 0.6961538461538461, "eval_Qnli-dev_euclidean_f1_threshold": 15.680222511291504, "eval_Qnli-dev_euclidean_precision": 0.6373239436619719, "eval_Qnli-dev_euclidean_recall": 0.7669491525423728, "eval_Qnli-dev_manhattan_accuracy": 0.72265625, "eval_Qnli-dev_manhattan_accuracy_threshold": 291.60565185546875, "eval_Qnli-dev_manhattan_ap": 0.7610718359836485, "eval_Qnli-dev_manhattan_f1": 0.6956521739130435, "eval_Qnli-dev_manhattan_f1_threshold": 331.0019836425781, "eval_Qnli-dev_manhattan_precision": 0.6279863481228669, "eval_Qnli-dev_manhattan_recall": 0.7796610169491526, "eval_Qnli-dev_max_accuracy": 0.72265625, "eval_Qnli-dev_max_accuracy_threshold": 352.77490234375, "eval_Qnli-dev_max_ap": 0.7610718359836485, "eval_Qnli-dev_max_f1": 0.6961538461538461, "eval_Qnli-dev_max_f1_threshold": 331.0019836425781, "eval_Qnli-dev_max_precision": 0.6436363636363637, "eval_Qnli-dev_max_recall": 0.8898305084745762, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8532309532165527, "eval_allNLI-dev_cosine_ap": 0.6302025367525289, "eval_allNLI-dev_cosine_f1": 0.6386946386946387, "eval_allNLI-dev_cosine_f1_threshold": 0.7679711580276489, "eval_allNLI-dev_cosine_precision": 0.53515625, "eval_allNLI-dev_cosine_recall": 0.791907514450867, "eval_allNLI-dev_dot_accuracy": 0.701171875, "eval_allNLI-dev_dot_accuracy_threshold": 372.23406982421875, "eval_allNLI-dev_dot_ap": 0.5637045533903416, "eval_allNLI-dev_dot_f1": 0.6143497757847535, "eval_allNLI-dev_dot_f1_threshold": 324.8187561035156, "eval_allNLI-dev_dot_precision": 0.5018315018315018, "eval_allNLI-dev_dot_recall": 0.791907514450867, "eval_allNLI-dev_euclidean_accuracy": 0.732421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.496063232421875, "eval_allNLI-dev_euclidean_ap": 0.6364019233983443, "eval_allNLI-dev_euclidean_f1": 0.6445916114790288, "eval_allNLI-dev_euclidean_f1_threshold": 14.737340927124023, "eval_allNLI-dev_euclidean_precision": 0.5214285714285715, "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, "eval_allNLI-dev_manhattan_accuracy": 0.73046875, "eval_allNLI-dev_manhattan_accuracy_threshold": 238.5453643798828, "eval_allNLI-dev_manhattan_ap": 0.6331464170000961, "eval_allNLI-dev_manhattan_f1": 0.6462882096069869, "eval_allNLI-dev_manhattan_f1_threshold": 312.4643249511719, "eval_allNLI-dev_manhattan_precision": 0.519298245614035, "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, "eval_allNLI-dev_max_accuracy": 0.732421875, "eval_allNLI-dev_max_accuracy_threshold": 372.23406982421875, "eval_allNLI-dev_max_ap": 0.6364019233983443, "eval_allNLI-dev_max_f1": 0.6462882096069869, "eval_allNLI-dev_max_f1_threshold": 324.8187561035156, "eval_allNLI-dev_max_precision": 0.53515625, "eval_allNLI-dev_max_recall": 0.8554913294797688, "eval_sequential_score": 0.7610718359836485, "eval_sts-test_pearson_cosine": 0.8331628946659952, "eval_sts-test_pearson_dot": 0.8164911059638393, "eval_sts-test_pearson_euclidean": 0.8645888643935662, "eval_sts-test_pearson_manhattan": 0.861337964752764, "eval_sts-test_pearson_max": 0.8645888643935662, "eval_sts-test_spearman_cosine": 0.8703052128017793, "eval_sts-test_spearman_dot": 0.819658330732154, "eval_sts-test_spearman_euclidean": 0.8649476812905237, "eval_sts-test_spearman_manhattan": 0.8619144407934146, "eval_sts-test_spearman_max": 0.8703052128017793, "eval_vitaminc-pairs_loss": 3.2663421630859375, "eval_vitaminc-pairs_runtime": 3.208, "eval_vitaminc-pairs_samples_per_second": 39.9, "eval_vitaminc-pairs_steps_per_second": 0.312, "step": 1900 }, { "epoch": 1.954732510288066, "eval_negation-triplets_loss": 0.9119235277175903, "eval_negation-triplets_runtime": 0.7555, "eval_negation-triplets_samples_per_second": 169.435, "eval_negation-triplets_steps_per_second": 1.324, "step": 1900 }, { "epoch": 1.954732510288066, "eval_scitail-pairs-pos_loss": 0.15329499542713165, "eval_scitail-pairs-pos_runtime": 0.9008, "eval_scitail-pairs-pos_samples_per_second": 142.102, "eval_scitail-pairs-pos_steps_per_second": 1.11, "step": 1900 }, { "epoch": 1.954732510288066, "eval_scitail-pairs-qa_loss": 0.0005385727272368968, "eval_scitail-pairs-qa_runtime": 0.5971, "eval_scitail-pairs-qa_samples_per_second": 214.356, "eval_scitail-pairs-qa_steps_per_second": 1.675, "step": 1900 }, { "epoch": 1.954732510288066, "eval_xsum-pairs_loss": 0.3071066439151764, "eval_xsum-pairs_runtime": 3.031, "eval_xsum-pairs_samples_per_second": 42.231, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1900 }, { "epoch": 1.954732510288066, "eval_sciq_pairs_loss": 0.09913761168718338, "eval_sciq_pairs_runtime": 3.4851, "eval_sciq_pairs_samples_per_second": 36.728, "eval_sciq_pairs_steps_per_second": 0.287, "step": 1900 }, { "epoch": 1.954732510288066, "eval_qasc_pairs_loss": 0.1846480518579483, "eval_qasc_pairs_runtime": 0.6217, "eval_qasc_pairs_samples_per_second": 205.895, "eval_qasc_pairs_steps_per_second": 1.609, "step": 1900 }, { "epoch": 1.954732510288066, "eval_openbookqa_pairs_loss": 0.7798338532447815, "eval_openbookqa_pairs_runtime": 0.6011, "eval_openbookqa_pairs_samples_per_second": 212.945, "eval_openbookqa_pairs_steps_per_second": 1.664, "step": 1900 }, { "epoch": 1.954732510288066, "eval_msmarco_pairs_loss": 0.9161882996559143, "eval_msmarco_pairs_runtime": 1.5239, "eval_msmarco_pairs_samples_per_second": 83.996, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 1900 }, { "epoch": 1.954732510288066, "eval_nq_pairs_loss": 0.7218447327613831, "eval_nq_pairs_runtime": 2.8999, "eval_nq_pairs_samples_per_second": 44.139, "eval_nq_pairs_steps_per_second": 0.345, "step": 1900 }, { "epoch": 1.954732510288066, "eval_trivia_pairs_loss": 0.8396673798561096, "eval_trivia_pairs_runtime": 3.449, "eval_trivia_pairs_samples_per_second": 37.112, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1900 }, { "epoch": 1.954732510288066, "eval_gooaq_pairs_loss": 0.3774771988391876, "eval_gooaq_pairs_runtime": 0.952, "eval_gooaq_pairs_samples_per_second": 134.454, "eval_gooaq_pairs_steps_per_second": 1.05, "step": 1900 }, { "epoch": 1.954732510288066, "eval_paws-pos_loss": 0.021937111392617226, "eval_paws-pos_runtime": 0.701, "eval_paws-pos_samples_per_second": 182.593, "eval_paws-pos_steps_per_second": 1.427, "step": 1900 }, { "epoch": 1.954732510288066, "eval_global_dataset_loss": 0.4879857301712036, "eval_global_dataset_runtime": 13.4188, "eval_global_dataset_samples_per_second": 31.001, "eval_global_dataset_steps_per_second": 0.298, "step": 1900 }, { "epoch": 1.955761316872428, "grad_norm": 3.453291893005371, "learning_rate": 2.7525678773296164e-05, "loss": 0.1054, "step": 1901 }, { "epoch": 1.9567901234567902, "grad_norm": 5.290757656097412, "learning_rate": 2.7511664550063875e-05, "loss": 0.1073, "step": 1902 }, { "epoch": 1.9578189300411522, "grad_norm": 9.06633472442627, "learning_rate": 2.749764340727864e-05, "loss": 0.4918, "step": 1903 }, { "epoch": 1.9588477366255144, "grad_norm": 5.419406414031982, "learning_rate": 2.7483615368160265e-05, "loss": 0.1429, "step": 1904 }, { "epoch": 1.9598765432098766, "grad_norm": 9.918625831604004, "learning_rate": 2.7469580455939988e-05, "loss": 0.4735, "step": 1905 }, { "epoch": 1.9609053497942388, "grad_norm": 9.553006172180176, "learning_rate": 2.745553869386041e-05, "loss": 0.5734, "step": 1906 }, { "epoch": 1.9619341563786008, "grad_norm": 0.6265835762023926, "learning_rate": 2.7441490105175482e-05, "loss": 0.0327, "step": 1907 }, { "epoch": 1.9629629629629628, "grad_norm": 6.599977493286133, "learning_rate": 2.7427434713150466e-05, "loss": 0.2095, "step": 1908 }, { "epoch": 1.963991769547325, "grad_norm": 3.6899020671844482, "learning_rate": 2.741337254106189e-05, "loss": 0.1096, "step": 1909 }, { "epoch": 1.9650205761316872, "grad_norm": 9.440287590026855, "learning_rate": 2.7399303612197496e-05, "loss": 0.5209, "step": 1910 }, { "epoch": 1.9660493827160495, "grad_norm": 10.141464233398438, "learning_rate": 2.7385227949856236e-05, "loss": 0.1833, "step": 1911 }, { "epoch": 1.9670781893004117, "grad_norm": 4.105213165283203, "learning_rate": 2.73711455773482e-05, "loss": 0.1049, "step": 1912 }, { "epoch": 1.9681069958847737, "grad_norm": 6.282651424407959, "learning_rate": 2.7357056517994592e-05, "loss": 0.2033, "step": 1913 }, { "epoch": 1.9691358024691357, "grad_norm": 11.242276191711426, "learning_rate": 2.734296079512769e-05, "loss": 0.6956, "step": 1914 }, { "epoch": 1.9701646090534979, "grad_norm": 6.709212779998779, "learning_rate": 2.7328858432090816e-05, "loss": 0.3216, "step": 1915 }, { "epoch": 1.97119341563786, "grad_norm": 4.293277740478516, "learning_rate": 2.7314749452238275e-05, "loss": 0.095, "step": 1916 }, { "epoch": 1.9722222222222223, "grad_norm": 16.324626922607422, "learning_rate": 2.7300633878935343e-05, "loss": 1.6029, "step": 1917 }, { "epoch": 1.9732510288065843, "grad_norm": 8.729584693908691, "learning_rate": 2.7286511735558198e-05, "loss": 0.5031, "step": 1918 }, { "epoch": 1.9742798353909465, "grad_norm": 5.903535842895508, "learning_rate": 2.7272383045493913e-05, "loss": 0.1559, "step": 1919 }, { "epoch": 1.9753086419753085, "grad_norm": 11.0974760055542, "learning_rate": 2.7258247832140398e-05, "loss": 0.689, "step": 1920 }, { "epoch": 1.9753086419753085, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7802547812461853, "eval_Qnli-dev_cosine_ap": 0.7589678827271121, "eval_Qnli-dev_cosine_f1": 0.7020872865275143, "eval_Qnli-dev_cosine_f1_threshold": 0.7303462028503418, "eval_Qnli-dev_cosine_precision": 0.6357388316151202, "eval_Qnli-dev_cosine_recall": 0.7838983050847458, "eval_Qnli-dev_dot_accuracy": 0.669921875, "eval_Qnli-dev_dot_accuracy_threshold": 373.420166015625, "eval_Qnli-dev_dot_ap": 0.6996726870091786, "eval_Qnli-dev_dot_f1": 0.6718750000000001, "eval_Qnli-dev_dot_f1_threshold": 297.3533630371094, "eval_Qnli-dev_dot_precision": 0.5321782178217822, "eval_Qnli-dev_dot_recall": 0.9110169491525424, "eval_Qnli-dev_euclidean_accuracy": 0.72265625, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.057470321655273, "eval_Qnli-dev_euclidean_ap": 0.7673911581951351, "eval_Qnli-dev_euclidean_f1": 0.7084870848708488, "eval_Qnli-dev_euclidean_f1_threshold": 16.23279571533203, "eval_Qnli-dev_euclidean_precision": 0.6274509803921569, "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, "eval_Qnli-dev_manhattan_accuracy": 0.712890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 283.3794250488281, "eval_Qnli-dev_manhattan_ap": 0.7693146584936291, "eval_Qnli-dev_manhattan_f1": 0.7095516569200779, "eval_Qnli-dev_manhattan_f1_threshold": 327.70379638671875, "eval_Qnli-dev_manhattan_precision": 0.6570397111913358, "eval_Qnli-dev_manhattan_recall": 0.7711864406779662, "eval_Qnli-dev_max_accuracy": 0.72265625, "eval_Qnli-dev_max_accuracy_threshold": 373.420166015625, "eval_Qnli-dev_max_ap": 0.7693146584936291, "eval_Qnli-dev_max_f1": 0.7095516569200779, "eval_Qnli-dev_max_f1_threshold": 327.70379638671875, "eval_Qnli-dev_max_precision": 0.6570397111913358, "eval_Qnli-dev_max_recall": 0.9110169491525424, "eval_allNLI-dev_cosine_accuracy": 0.73828125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8414372205734253, "eval_allNLI-dev_cosine_ap": 0.6362556022858551, "eval_allNLI-dev_cosine_f1": 0.6515837104072397, "eval_allNLI-dev_cosine_f1_threshold": 0.7504405975341797, "eval_allNLI-dev_cosine_precision": 0.5353159851301115, "eval_allNLI-dev_cosine_recall": 0.8323699421965318, "eval_allNLI-dev_dot_accuracy": 0.705078125, "eval_allNLI-dev_dot_accuracy_threshold": 370.8945007324219, "eval_allNLI-dev_dot_ap": 0.5842482557708913, "eval_allNLI-dev_dot_f1": 0.6205357142857143, "eval_allNLI-dev_dot_f1_threshold": 322.5114440917969, "eval_allNLI-dev_dot_precision": 0.5054545454545455, "eval_allNLI-dev_dot_recall": 0.8034682080924855, "eval_allNLI-dev_euclidean_accuracy": 0.740234375, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.304725646972656, "eval_allNLI-dev_euclidean_ap": 0.6444108155589591, "eval_allNLI-dev_euclidean_f1": 0.648068669527897, "eval_allNLI-dev_euclidean_f1_threshold": 15.276135444641113, "eval_allNLI-dev_euclidean_precision": 0.515358361774744, "eval_allNLI-dev_euclidean_recall": 0.8728323699421965, "eval_allNLI-dev_manhattan_accuracy": 0.73828125, "eval_allNLI-dev_manhattan_accuracy_threshold": 257.411376953125, "eval_allNLI-dev_manhattan_ap": 0.6378845688852381, "eval_allNLI-dev_manhattan_f1": 0.6479481641468683, "eval_allNLI-dev_manhattan_f1_threshold": 318.2986755371094, "eval_allNLI-dev_manhattan_precision": 0.5172413793103449, "eval_allNLI-dev_manhattan_recall": 0.8670520231213873, "eval_allNLI-dev_max_accuracy": 0.740234375, "eval_allNLI-dev_max_accuracy_threshold": 370.8945007324219, "eval_allNLI-dev_max_ap": 0.6444108155589591, "eval_allNLI-dev_max_f1": 0.6515837104072397, "eval_allNLI-dev_max_f1_threshold": 322.5114440917969, "eval_allNLI-dev_max_precision": 0.5353159851301115, "eval_allNLI-dev_max_recall": 0.8728323699421965, "eval_sequential_score": 0.7693146584936291, "eval_sts-test_pearson_cosine": 0.8356711247591408, "eval_sts-test_pearson_dot": 0.8162478084316604, "eval_sts-test_pearson_euclidean": 0.8653258137671263, "eval_sts-test_pearson_manhattan": 0.8629686568902456, "eval_sts-test_pearson_max": 0.8653258137671263, "eval_sts-test_spearman_cosine": 0.8672133481522474, "eval_sts-test_spearman_dot": 0.8157154760960033, "eval_sts-test_spearman_euclidean": 0.8632872752391186, "eval_sts-test_spearman_manhattan": 0.8611170180666812, "eval_sts-test_spearman_max": 0.8672133481522474, "eval_vitaminc-pairs_loss": 3.207522392272949, "eval_vitaminc-pairs_runtime": 3.2104, "eval_vitaminc-pairs_samples_per_second": 39.871, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 1920 }, { "epoch": 1.9753086419753085, "eval_negation-triplets_loss": 0.9503350853919983, "eval_negation-triplets_runtime": 0.7605, "eval_negation-triplets_samples_per_second": 168.312, "eval_negation-triplets_steps_per_second": 1.315, "step": 1920 }, { "epoch": 1.9753086419753085, "eval_scitail-pairs-pos_loss": 0.15005101263523102, "eval_scitail-pairs-pos_runtime": 0.897, "eval_scitail-pairs-pos_samples_per_second": 142.698, "eval_scitail-pairs-pos_steps_per_second": 1.115, "step": 1920 }, { "epoch": 1.9753086419753085, "eval_scitail-pairs-qa_loss": 0.0005707141826860607, "eval_scitail-pairs-qa_runtime": 0.6051, "eval_scitail-pairs-qa_samples_per_second": 211.551, "eval_scitail-pairs-qa_steps_per_second": 1.653, "step": 1920 }, { "epoch": 1.9753086419753085, "eval_xsum-pairs_loss": 0.24305416643619537, "eval_xsum-pairs_runtime": 3.0297, "eval_xsum-pairs_samples_per_second": 42.249, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1920 }, { "epoch": 1.9753086419753085, "eval_sciq_pairs_loss": 0.0905257910490036, "eval_sciq_pairs_runtime": 3.512, "eval_sciq_pairs_samples_per_second": 36.446, "eval_sciq_pairs_steps_per_second": 0.285, "step": 1920 }, { "epoch": 1.9753086419753085, "eval_qasc_pairs_loss": 0.1924685686826706, "eval_qasc_pairs_runtime": 0.6209, "eval_qasc_pairs_samples_per_second": 206.156, "eval_qasc_pairs_steps_per_second": 1.611, "step": 1920 }, { "epoch": 1.9753086419753085, "eval_openbookqa_pairs_loss": 0.8229547739028931, "eval_openbookqa_pairs_runtime": 0.598, "eval_openbookqa_pairs_samples_per_second": 214.034, "eval_openbookqa_pairs_steps_per_second": 1.672, "step": 1920 }, { "epoch": 1.9753086419753085, "eval_msmarco_pairs_loss": 0.8960761427879333, "eval_msmarco_pairs_runtime": 1.5297, "eval_msmarco_pairs_samples_per_second": 83.678, "eval_msmarco_pairs_steps_per_second": 0.654, "step": 1920 }, { "epoch": 1.9753086419753085, "eval_nq_pairs_loss": 0.7282431721687317, "eval_nq_pairs_runtime": 2.9033, "eval_nq_pairs_samples_per_second": 44.088, "eval_nq_pairs_steps_per_second": 0.344, "step": 1920 }, { "epoch": 1.9753086419753085, "eval_trivia_pairs_loss": 0.846880316734314, "eval_trivia_pairs_runtime": 3.4453, "eval_trivia_pairs_samples_per_second": 37.152, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1920 }, { "epoch": 1.9753086419753085, "eval_gooaq_pairs_loss": 0.4108849763870239, "eval_gooaq_pairs_runtime": 0.9576, "eval_gooaq_pairs_samples_per_second": 133.664, "eval_gooaq_pairs_steps_per_second": 1.044, "step": 1920 }, { "epoch": 1.9753086419753085, "eval_paws-pos_loss": 0.02292362041771412, "eval_paws-pos_runtime": 0.7402, "eval_paws-pos_samples_per_second": 172.929, "eval_paws-pos_steps_per_second": 1.351, "step": 1920 }, { "epoch": 1.9753086419753085, "eval_global_dataset_loss": 0.4752640426158905, "eval_global_dataset_runtime": 13.4207, "eval_global_dataset_samples_per_second": 30.997, "eval_global_dataset_steps_per_second": 0.298, "step": 1920 }, { "epoch": 1.9763374485596708, "grad_norm": 5.3949713706970215, "learning_rate": 2.7244106118906372e-05, "loss": 0.1694, "step": 1921 }, { "epoch": 1.977366255144033, "grad_norm": 11.391617774963379, "learning_rate": 2.722995792921131e-05, "loss": 0.6042, "step": 1922 }, { "epoch": 1.9783950617283952, "grad_norm": 8.24395751953125, "learning_rate": 2.721580328648541e-05, "loss": 0.4886, "step": 1923 }, { "epoch": 1.9794238683127572, "grad_norm": 4.720622539520264, "learning_rate": 2.7201642214169555e-05, "loss": 0.2256, "step": 1924 }, { "epoch": 1.9804526748971192, "grad_norm": 0.0937778577208519, "learning_rate": 2.71874747357153e-05, "loss": 0.0011, "step": 1925 }, { "epoch": 1.9814814814814814, "grad_norm": 5.925360202789307, "learning_rate": 2.7173300874584784e-05, "loss": 0.2649, "step": 1926 }, { "epoch": 1.9825102880658436, "grad_norm": 0.691866397857666, "learning_rate": 2.715912065425072e-05, "loss": 0.0076, "step": 1927 }, { "epoch": 1.9835390946502058, "grad_norm": 4.325530529022217, "learning_rate": 2.714493409819635e-05, "loss": 0.1577, "step": 1928 }, { "epoch": 1.984567901234568, "grad_norm": 6.408902168273926, "learning_rate": 2.7130741229915425e-05, "loss": 0.2949, "step": 1929 }, { "epoch": 1.98559670781893, "grad_norm": 8.462160110473633, "learning_rate": 2.711654207291213e-05, "loss": 0.0956, "step": 1930 }, { "epoch": 1.986625514403292, "grad_norm": 5.3168559074401855, "learning_rate": 2.710233665070108e-05, "loss": 0.1822, "step": 1931 }, { "epoch": 1.9876543209876543, "grad_norm": 6.17379093170166, "learning_rate": 2.7088124986807253e-05, "loss": 0.1155, "step": 1932 }, { "epoch": 1.9886831275720165, "grad_norm": 4.081679821014404, "learning_rate": 2.7073907104765966e-05, "loss": 0.0821, "step": 1933 }, { "epoch": 1.9897119341563787, "grad_norm": 6.978852272033691, "learning_rate": 2.705968302812284e-05, "loss": 0.2296, "step": 1934 }, { "epoch": 1.9907407407407407, "grad_norm": 11.4292573928833, "learning_rate": 2.704545278043375e-05, "loss": 0.2635, "step": 1935 }, { "epoch": 1.991769547325103, "grad_norm": 5.254778861999512, "learning_rate": 2.703121638526479e-05, "loss": 0.0338, "step": 1936 }, { "epoch": 1.992798353909465, "grad_norm": 10.508069038391113, "learning_rate": 2.701697386619224e-05, "loss": 0.5136, "step": 1937 }, { "epoch": 1.9938271604938271, "grad_norm": 1.189597487449646, "learning_rate": 2.700272524680252e-05, "loss": 0.0701, "step": 1938 }, { "epoch": 1.9948559670781894, "grad_norm": 9.364729881286621, "learning_rate": 2.6988470550692144e-05, "loss": 0.1657, "step": 1939 }, { "epoch": 1.9958847736625516, "grad_norm": 11.865019798278809, "learning_rate": 2.6974209801467692e-05, "loss": 0.6497, "step": 1940 }, { "epoch": 1.9958847736625516, "eval_Qnli-dev_cosine_accuracy": 0.720703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7849160432815552, "eval_Qnli-dev_cosine_ap": 0.7548562645268696, "eval_Qnli-dev_cosine_f1": 0.6881720430107527, "eval_Qnli-dev_cosine_f1_threshold": 0.6856629848480225, "eval_Qnli-dev_cosine_precision": 0.5962732919254659, "eval_Qnli-dev_cosine_recall": 0.8135593220338984, "eval_Qnli-dev_dot_accuracy": 0.6796875, "eval_Qnli-dev_dot_accuracy_threshold": 366.61492919921875, "eval_Qnli-dev_dot_ap": 0.7083100362161063, "eval_Qnli-dev_dot_f1": 0.6656626506024097, "eval_Qnli-dev_dot_f1_threshold": 257.82672119140625, "eval_Qnli-dev_dot_precision": 0.5163551401869159, "eval_Qnli-dev_dot_recall": 0.9364406779661016, "eval_Qnli-dev_euclidean_accuracy": 0.71875, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.144988059997559, "eval_Qnli-dev_euclidean_ap": 0.7614130013133746, "eval_Qnli-dev_euclidean_f1": 0.6964285714285714, "eval_Qnli-dev_euclidean_f1_threshold": 16.77385139465332, "eval_Qnli-dev_euclidean_precision": 0.6018518518518519, "eval_Qnli-dev_euclidean_recall": 0.826271186440678, "eval_Qnli-dev_manhattan_accuracy": 0.716796875, "eval_Qnli-dev_manhattan_accuracy_threshold": 298.89862060546875, "eval_Qnli-dev_manhattan_ap": 0.7656973656104571, "eval_Qnli-dev_manhattan_f1": 0.6992481203007519, "eval_Qnli-dev_manhattan_f1_threshold": 342.295166015625, "eval_Qnli-dev_manhattan_precision": 0.6283783783783784, "eval_Qnli-dev_manhattan_recall": 0.788135593220339, "eval_Qnli-dev_max_accuracy": 0.720703125, "eval_Qnli-dev_max_accuracy_threshold": 366.61492919921875, "eval_Qnli-dev_max_ap": 0.7656973656104571, "eval_Qnli-dev_max_f1": 0.6992481203007519, "eval_Qnli-dev_max_f1_threshold": 342.295166015625, "eval_Qnli-dev_max_precision": 0.6283783783783784, "eval_Qnli-dev_max_recall": 0.9364406779661016, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8133357167243958, "eval_allNLI-dev_cosine_ap": 0.6262180918993713, "eval_allNLI-dev_cosine_f1": 0.6378896882494005, "eval_allNLI-dev_cosine_f1_threshold": 0.7387667298316956, "eval_allNLI-dev_cosine_precision": 0.5450819672131147, "eval_allNLI-dev_cosine_recall": 0.7687861271676301, "eval_allNLI-dev_dot_accuracy": 0.70703125, "eval_allNLI-dev_dot_accuracy_threshold": 373.570556640625, "eval_allNLI-dev_dot_ap": 0.5578521452421703, "eval_allNLI-dev_dot_f1": 0.5994962216624685, "eval_allNLI-dev_dot_f1_threshold": 317.295654296875, "eval_allNLI-dev_dot_precision": 0.53125, "eval_allNLI-dev_dot_recall": 0.6878612716763006, "eval_allNLI-dev_euclidean_accuracy": 0.73828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.49420166015625, "eval_allNLI-dev_euclidean_ap": 0.6344157899094915, "eval_allNLI-dev_euclidean_f1": 0.6439024390243901, "eval_allNLI-dev_euclidean_f1_threshold": 14.732034683227539, "eval_allNLI-dev_euclidean_precision": 0.5569620253164557, "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, "eval_allNLI-dev_manhattan_accuracy": 0.728515625, "eval_allNLI-dev_manhattan_accuracy_threshold": 263.07177734375, "eval_allNLI-dev_manhattan_ap": 0.6281999925675901, "eval_allNLI-dev_manhattan_f1": 0.6425339366515838, "eval_allNLI-dev_manhattan_f1_threshold": 325.59112548828125, "eval_allNLI-dev_manhattan_precision": 0.5278810408921933, "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 373.570556640625, "eval_allNLI-dev_max_ap": 0.6344157899094915, "eval_allNLI-dev_max_f1": 0.6439024390243901, "eval_allNLI-dev_max_f1_threshold": 325.59112548828125, "eval_allNLI-dev_max_precision": 0.5569620253164557, "eval_allNLI-dev_max_recall": 0.8208092485549133, "eval_sequential_score": 0.7656973656104571, "eval_sts-test_pearson_cosine": 0.8350136239443786, "eval_sts-test_pearson_dot": 0.8107646355376409, "eval_sts-test_pearson_euclidean": 0.8645395213219016, "eval_sts-test_pearson_manhattan": 0.8626709824722729, "eval_sts-test_pearson_max": 0.8645395213219016, "eval_sts-test_spearman_cosine": 0.8660674921043207, "eval_sts-test_spearman_dot": 0.8101366943484286, "eval_sts-test_spearman_euclidean": 0.8627660125464315, "eval_sts-test_spearman_manhattan": 0.860602244306896, "eval_sts-test_spearman_max": 0.8660674921043207, "eval_vitaminc-pairs_loss": 3.455519914627075, "eval_vitaminc-pairs_runtime": 3.2189, "eval_vitaminc-pairs_samples_per_second": 39.765, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 1940 }, { "epoch": 1.9958847736625516, "eval_negation-triplets_loss": 1.0325738191604614, "eval_negation-triplets_runtime": 0.7543, "eval_negation-triplets_samples_per_second": 169.684, "eval_negation-triplets_steps_per_second": 1.326, "step": 1940 }, { "epoch": 1.9958847736625516, "eval_scitail-pairs-pos_loss": 0.16138723492622375, "eval_scitail-pairs-pos_runtime": 0.9059, "eval_scitail-pairs-pos_samples_per_second": 141.3, "eval_scitail-pairs-pos_steps_per_second": 1.104, "step": 1940 }, { "epoch": 1.9958847736625516, "eval_scitail-pairs-qa_loss": 0.00048324489034712315, "eval_scitail-pairs-qa_runtime": 0.6237, "eval_scitail-pairs-qa_samples_per_second": 205.235, "eval_scitail-pairs-qa_steps_per_second": 1.603, "step": 1940 }, { "epoch": 1.9958847736625516, "eval_xsum-pairs_loss": 0.31258535385131836, "eval_xsum-pairs_runtime": 3.027, "eval_xsum-pairs_samples_per_second": 42.286, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1940 }, { "epoch": 1.9958847736625516, "eval_sciq_pairs_loss": 0.09079165756702423, "eval_sciq_pairs_runtime": 3.9197, "eval_sciq_pairs_samples_per_second": 32.655, "eval_sciq_pairs_steps_per_second": 0.255, "step": 1940 }, { "epoch": 1.9958847736625516, "eval_qasc_pairs_loss": 0.16633209586143494, "eval_qasc_pairs_runtime": 0.6231, "eval_qasc_pairs_samples_per_second": 205.409, "eval_qasc_pairs_steps_per_second": 1.605, "step": 1940 }, { "epoch": 1.9958847736625516, "eval_openbookqa_pairs_loss": 0.8373302221298218, "eval_openbookqa_pairs_runtime": 0.5958, "eval_openbookqa_pairs_samples_per_second": 214.826, "eval_openbookqa_pairs_steps_per_second": 1.678, "step": 1940 }, { "epoch": 1.9958847736625516, "eval_msmarco_pairs_loss": 0.9623515605926514, "eval_msmarco_pairs_runtime": 1.5238, "eval_msmarco_pairs_samples_per_second": 83.999, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 1940 }, { "epoch": 1.9958847736625516, "eval_nq_pairs_loss": 0.7237948775291443, "eval_nq_pairs_runtime": 2.9153, "eval_nq_pairs_samples_per_second": 43.906, "eval_nq_pairs_steps_per_second": 0.343, "step": 1940 }, { "epoch": 1.9958847736625516, "eval_trivia_pairs_loss": 0.8588597774505615, "eval_trivia_pairs_runtime": 3.454, "eval_trivia_pairs_samples_per_second": 37.058, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1940 }, { "epoch": 1.9958847736625516, "eval_gooaq_pairs_loss": 0.38329169154167175, "eval_gooaq_pairs_runtime": 0.962, "eval_gooaq_pairs_samples_per_second": 133.049, "eval_gooaq_pairs_steps_per_second": 1.039, "step": 1940 }, { "epoch": 1.9958847736625516, "eval_paws-pos_loss": 0.022732464596629143, "eval_paws-pos_runtime": 0.7187, "eval_paws-pos_samples_per_second": 178.109, "eval_paws-pos_steps_per_second": 1.391, "step": 1940 }, { "epoch": 1.9958847736625516, "eval_global_dataset_loss": 0.5049639344215393, "eval_global_dataset_runtime": 13.4356, "eval_global_dataset_samples_per_second": 30.962, "eval_global_dataset_steps_per_second": 0.298, "step": 1940 }, { "epoch": 1.9969135802469136, "grad_norm": 10.780223846435547, "learning_rate": 2.6959943022745776e-05, "loss": 0.3913, "step": 1941 }, { "epoch": 1.9979423868312756, "grad_norm": 0.003871053922921419, "learning_rate": 2.6945670238152986e-05, "loss": 0.0, "step": 1942 }, { "epoch": 1.9989711934156378, "grad_norm": 9.154616355895996, "learning_rate": 2.6931391471325872e-05, "loss": 0.1129, "step": 1943 }, { "epoch": 2.0, "grad_norm": 12.245766639709473, "learning_rate": 2.691710674591087e-05, "loss": 0.1786, "step": 1944 }, { "epoch": 2.001028806584362, "grad_norm": 3.350267171859741, "learning_rate": 2.6902816085564297e-05, "loss": 0.0721, "step": 1945 }, { "epoch": 2.0020576131687244, "grad_norm": 5.552727699279785, "learning_rate": 2.6888519513952295e-05, "loss": 0.124, "step": 1946 }, { "epoch": 2.003086419753086, "grad_norm": 9.989136695861816, "learning_rate": 2.687421705475079e-05, "loss": 0.4688, "step": 1947 }, { "epoch": 2.0041152263374484, "grad_norm": 5.448371410369873, "learning_rate": 2.685990873164547e-05, "loss": 0.1731, "step": 1948 }, { "epoch": 2.0051440329218106, "grad_norm": 10.226446151733398, "learning_rate": 2.684559456833173e-05, "loss": 0.5941, "step": 1949 }, { "epoch": 2.006172839506173, "grad_norm": 5.5601654052734375, "learning_rate": 2.6831274588514627e-05, "loss": 0.1637, "step": 1950 }, { "epoch": 2.007201646090535, "grad_norm": 4.019494533538818, "learning_rate": 2.681694881590886e-05, "loss": 0.1451, "step": 1951 }, { "epoch": 2.0082304526748973, "grad_norm": 3.698535442352295, "learning_rate": 2.6802617274238724e-05, "loss": 0.1332, "step": 1952 }, { "epoch": 2.009259259259259, "grad_norm": 7.371061325073242, "learning_rate": 2.678827998723806e-05, "loss": 0.2481, "step": 1953 }, { "epoch": 2.0102880658436213, "grad_norm": 7.150874137878418, "learning_rate": 2.6773936978650225e-05, "loss": 0.21, "step": 1954 }, { "epoch": 2.0113168724279835, "grad_norm": 6.00358772277832, "learning_rate": 2.6759588272228055e-05, "loss": 0.149, "step": 1955 }, { "epoch": 2.0123456790123457, "grad_norm": 4.2081475257873535, "learning_rate": 2.674523389173382e-05, "loss": 0.1146, "step": 1956 }, { "epoch": 2.013374485596708, "grad_norm": 4.812724590301514, "learning_rate": 2.673087386093918e-05, "loss": 0.0926, "step": 1957 }, { "epoch": 2.01440329218107, "grad_norm": 6.93581485748291, "learning_rate": 2.671650820362517e-05, "loss": 0.234, "step": 1958 }, { "epoch": 2.015432098765432, "grad_norm": 3.500269651412964, "learning_rate": 2.6702136943582128e-05, "loss": 0.0647, "step": 1959 }, { "epoch": 2.016460905349794, "grad_norm": 8.786886215209961, "learning_rate": 2.6687760104609663e-05, "loss": 0.5429, "step": 1960 }, { "epoch": 2.016460905349794, "eval_Qnli-dev_cosine_accuracy": 0.71875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7799655199050903, "eval_Qnli-dev_cosine_ap": 0.7668841453694514, "eval_Qnli-dev_cosine_f1": 0.694589877835951, "eval_Qnli-dev_cosine_f1_threshold": 0.682135283946991, "eval_Qnli-dev_cosine_precision": 0.5905044510385756, "eval_Qnli-dev_cosine_recall": 0.8432203389830508, "eval_Qnli-dev_dot_accuracy": 0.673828125, "eval_Qnli-dev_dot_accuracy_threshold": 348.41790771484375, "eval_Qnli-dev_dot_ap": 0.712974117783976, "eval_Qnli-dev_dot_f1": 0.670807453416149, "eval_Qnli-dev_dot_f1_threshold": 270.2975158691406, "eval_Qnli-dev_dot_precision": 0.5294117647058824, "eval_Qnli-dev_dot_recall": 0.9152542372881356, "eval_Qnli-dev_euclidean_accuracy": 0.72265625, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.860166549682617, "eval_Qnli-dev_euclidean_ap": 0.7750223238293773, "eval_Qnli-dev_euclidean_f1": 0.6953405017921147, "eval_Qnli-dev_euclidean_f1_threshold": 16.256149291992188, "eval_Qnli-dev_euclidean_precision": 0.6024844720496895, "eval_Qnli-dev_euclidean_recall": 0.8220338983050848, "eval_Qnli-dev_manhattan_accuracy": 0.71875, "eval_Qnli-dev_manhattan_accuracy_threshold": 298.0458984375, "eval_Qnli-dev_manhattan_ap": 0.7789472745282582, "eval_Qnli-dev_manhattan_f1": 0.699410609037328, "eval_Qnli-dev_manhattan_f1_threshold": 327.1111145019531, "eval_Qnli-dev_manhattan_precision": 0.652014652014652, "eval_Qnli-dev_manhattan_recall": 0.7542372881355932, "eval_Qnli-dev_max_accuracy": 0.72265625, "eval_Qnli-dev_max_accuracy_threshold": 348.41790771484375, "eval_Qnli-dev_max_ap": 0.7789472745282582, "eval_Qnli-dev_max_f1": 0.699410609037328, "eval_Qnli-dev_max_f1_threshold": 327.1111145019531, "eval_Qnli-dev_max_precision": 0.652014652014652, "eval_Qnli-dev_max_recall": 0.9152542372881356, "eval_allNLI-dev_cosine_accuracy": 0.736328125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8098629117012024, "eval_allNLI-dev_cosine_ap": 0.6210919851298508, "eval_allNLI-dev_cosine_f1": 0.6349892008639308, "eval_allNLI-dev_cosine_f1_threshold": 0.721535325050354, "eval_allNLI-dev_cosine_precision": 0.506896551724138, "eval_allNLI-dev_cosine_recall": 0.8497109826589595, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 333.63330078125, "eval_allNLI-dev_dot_ap": 0.5569305748772116, "eval_allNLI-dev_dot_f1": 0.6032388663967612, "eval_allNLI-dev_dot_f1_threshold": 287.0180969238281, "eval_allNLI-dev_dot_precision": 0.46417445482866043, "eval_allNLI-dev_dot_recall": 0.861271676300578, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.557496070861816, "eval_allNLI-dev_euclidean_ap": 0.6281064411386492, "eval_allNLI-dev_euclidean_f1": 0.6514806378132119, "eval_allNLI-dev_euclidean_f1_threshold": 14.880706787109375, "eval_allNLI-dev_euclidean_precision": 0.5375939849624061, "eval_allNLI-dev_euclidean_recall": 0.8265895953757225, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 268.589111328125, "eval_allNLI-dev_manhattan_ap": 0.6243826773237957, "eval_allNLI-dev_manhattan_f1": 0.6447058823529411, "eval_allNLI-dev_manhattan_f1_threshold": 305.6214599609375, "eval_allNLI-dev_manhattan_precision": 0.5436507936507936, "eval_allNLI-dev_manhattan_recall": 0.791907514450867, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 333.63330078125, "eval_allNLI-dev_max_ap": 0.6281064411386492, "eval_allNLI-dev_max_f1": 0.6514806378132119, "eval_allNLI-dev_max_f1_threshold": 305.6214599609375, "eval_allNLI-dev_max_precision": 0.5436507936507936, "eval_allNLI-dev_max_recall": 0.861271676300578, "eval_sequential_score": 0.7789472745282582, "eval_sts-test_pearson_cosine": 0.8331541754108045, "eval_sts-test_pearson_dot": 0.820280291990983, "eval_sts-test_pearson_euclidean": 0.8614289265584285, "eval_sts-test_pearson_manhattan": 0.8597853920648006, "eval_sts-test_pearson_max": 0.8614289265584285, "eval_sts-test_spearman_cosine": 0.8643414359524544, "eval_sts-test_spearman_dot": 0.8199940546951122, "eval_sts-test_spearman_euclidean": 0.8593159588007231, "eval_sts-test_spearman_manhattan": 0.8576773466136155, "eval_sts-test_spearman_max": 0.8643414359524544, "eval_vitaminc-pairs_loss": 3.2571401596069336, "eval_vitaminc-pairs_runtime": 3.2348, "eval_vitaminc-pairs_samples_per_second": 39.569, "eval_vitaminc-pairs_steps_per_second": 0.309, "step": 1960 }, { "epoch": 2.016460905349794, "eval_negation-triplets_loss": 0.9964045286178589, "eval_negation-triplets_runtime": 0.7567, "eval_negation-triplets_samples_per_second": 169.152, "eval_negation-triplets_steps_per_second": 1.322, "step": 1960 }, { "epoch": 2.016460905349794, "eval_scitail-pairs-pos_loss": 0.15746238827705383, "eval_scitail-pairs-pos_runtime": 0.8951, "eval_scitail-pairs-pos_samples_per_second": 142.998, "eval_scitail-pairs-pos_steps_per_second": 1.117, "step": 1960 }, { "epoch": 2.016460905349794, "eval_scitail-pairs-qa_loss": 0.00042386981658637524, "eval_scitail-pairs-qa_runtime": 0.6122, "eval_scitail-pairs-qa_samples_per_second": 209.071, "eval_scitail-pairs-qa_steps_per_second": 1.633, "step": 1960 }, { "epoch": 2.016460905349794, "eval_xsum-pairs_loss": 0.2893158793449402, "eval_xsum-pairs_runtime": 3.0332, "eval_xsum-pairs_samples_per_second": 42.2, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1960 }, { "epoch": 2.016460905349794, "eval_sciq_pairs_loss": 0.08887187391519547, "eval_sciq_pairs_runtime": 3.5456, "eval_sciq_pairs_samples_per_second": 36.101, "eval_sciq_pairs_steps_per_second": 0.282, "step": 1960 }, { "epoch": 2.016460905349794, "eval_qasc_pairs_loss": 0.16652485728263855, "eval_qasc_pairs_runtime": 0.6255, "eval_qasc_pairs_samples_per_second": 204.633, "eval_qasc_pairs_steps_per_second": 1.599, "step": 1960 }, { "epoch": 2.016460905349794, "eval_openbookqa_pairs_loss": 0.8275821208953857, "eval_openbookqa_pairs_runtime": 0.5982, "eval_openbookqa_pairs_samples_per_second": 213.966, "eval_openbookqa_pairs_steps_per_second": 1.672, "step": 1960 }, { "epoch": 2.016460905349794, "eval_msmarco_pairs_loss": 0.8606109619140625, "eval_msmarco_pairs_runtime": 1.5254, "eval_msmarco_pairs_samples_per_second": 83.911, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 1960 }, { "epoch": 2.016460905349794, "eval_nq_pairs_loss": 0.7386643290519714, "eval_nq_pairs_runtime": 2.9044, "eval_nq_pairs_samples_per_second": 44.071, "eval_nq_pairs_steps_per_second": 0.344, "step": 1960 }, { "epoch": 2.016460905349794, "eval_trivia_pairs_loss": 0.8336133360862732, "eval_trivia_pairs_runtime": 3.4507, "eval_trivia_pairs_samples_per_second": 37.094, "eval_trivia_pairs_steps_per_second": 0.29, "step": 1960 }, { "epoch": 2.016460905349794, "eval_gooaq_pairs_loss": 0.40120747685432434, "eval_gooaq_pairs_runtime": 0.9551, "eval_gooaq_pairs_samples_per_second": 134.016, "eval_gooaq_pairs_steps_per_second": 1.047, "step": 1960 }, { "epoch": 2.016460905349794, "eval_paws-pos_loss": 0.02367236278951168, "eval_paws-pos_runtime": 0.7086, "eval_paws-pos_samples_per_second": 180.644, "eval_paws-pos_steps_per_second": 1.411, "step": 1960 }, { "epoch": 2.016460905349794, "eval_global_dataset_loss": 0.4686444401741028, "eval_global_dataset_runtime": 13.4368, "eval_global_dataset_samples_per_second": 30.96, "eval_global_dataset_steps_per_second": 0.298, "step": 1960 }, { "epoch": 2.0174897119341564, "grad_norm": 10.542884826660156, "learning_rate": 2.667337771051665e-05, "loss": 0.5538, "step": 1961 }, { "epoch": 2.0185185185185186, "grad_norm": 5.9770917892456055, "learning_rate": 2.665898978512113e-05, "loss": 0.1512, "step": 1962 }, { "epoch": 2.019547325102881, "grad_norm": 4.33903169631958, "learning_rate": 2.664459635225034e-05, "loss": 0.1231, "step": 1963 }, { "epoch": 2.0205761316872426, "grad_norm": 5.161752700805664, "learning_rate": 2.6630197435740606e-05, "loss": 0.1405, "step": 1964 }, { "epoch": 2.021604938271605, "grad_norm": 9.55565071105957, "learning_rate": 2.6615793059437357e-05, "loss": 0.4344, "step": 1965 }, { "epoch": 2.022633744855967, "grad_norm": 0.4918361008167267, "learning_rate": 2.660138324719505e-05, "loss": 0.0216, "step": 1966 }, { "epoch": 2.0236625514403292, "grad_norm": 7.198734760284424, "learning_rate": 2.658696802287715e-05, "loss": 0.3757, "step": 1967 }, { "epoch": 2.0246913580246915, "grad_norm": 5.002417087554932, "learning_rate": 2.657254741035608e-05, "loss": 0.176, "step": 1968 }, { "epoch": 2.0257201646090537, "grad_norm": 4.149772644042969, "learning_rate": 2.6558121433513207e-05, "loss": 0.1509, "step": 1969 }, { "epoch": 2.0267489711934155, "grad_norm": 4.617299556732178, "learning_rate": 2.654369011623875e-05, "loss": 0.0743, "step": 1970 }, { "epoch": 2.0277777777777777, "grad_norm": 4.817657470703125, "learning_rate": 2.6529253482431788e-05, "loss": 0.2057, "step": 1971 }, { "epoch": 2.02880658436214, "grad_norm": 9.108501434326172, "learning_rate": 2.6514811556000205e-05, "loss": 0.421, "step": 1972 }, { "epoch": 2.029835390946502, "grad_norm": 6.209652423858643, "learning_rate": 2.6500364360860653e-05, "loss": 0.1599, "step": 1973 }, { "epoch": 2.0308641975308643, "grad_norm": 6.913119792938232, "learning_rate": 2.64859119209385e-05, "loss": 0.1982, "step": 1974 }, { "epoch": 2.0318930041152266, "grad_norm": 8.143872261047363, "learning_rate": 2.6471454260167806e-05, "loss": 0.3077, "step": 1975 }, { "epoch": 2.0329218106995883, "grad_norm": 10.935625076293945, "learning_rate": 2.6456991402491276e-05, "loss": 0.8843, "step": 1976 }, { "epoch": 2.0339506172839505, "grad_norm": 11.155512809753418, "learning_rate": 2.6442523371860223e-05, "loss": 0.5713, "step": 1977 }, { "epoch": 2.0349794238683128, "grad_norm": 11.313560485839844, "learning_rate": 2.6428050192234512e-05, "loss": 0.5019, "step": 1978 }, { "epoch": 2.036008230452675, "grad_norm": 14.535584449768066, "learning_rate": 2.6413571887582567e-05, "loss": 1.8065, "step": 1979 }, { "epoch": 2.037037037037037, "grad_norm": 3.9616973400115967, "learning_rate": 2.639908848188126e-05, "loss": 0.1463, "step": 1980 }, { "epoch": 2.037037037037037, "eval_Qnli-dev_cosine_accuracy": 0.6953125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7812117338180542, "eval_Qnli-dev_cosine_ap": 0.7464080080794311, "eval_Qnli-dev_cosine_f1": 0.6757679180887372, "eval_Qnli-dev_cosine_f1_threshold": 0.6842661499977112, "eval_Qnli-dev_cosine_precision": 0.5657142857142857, "eval_Qnli-dev_cosine_recall": 0.8389830508474576, "eval_Qnli-dev_dot_accuracy": 0.662109375, "eval_Qnli-dev_dot_accuracy_threshold": 367.6085510253906, "eval_Qnli-dev_dot_ap": 0.6854949842070595, "eval_Qnli-dev_dot_f1": 0.667741935483871, "eval_Qnli-dev_dot_f1_threshold": 289.64093017578125, "eval_Qnli-dev_dot_precision": 0.5390625, "eval_Qnli-dev_dot_recall": 0.8771186440677966, "eval_Qnli-dev_euclidean_accuracy": 0.705078125, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.41029167175293, "eval_Qnli-dev_euclidean_ap": 0.7550512346104723, "eval_Qnli-dev_euclidean_f1": 0.6812080536912752, "eval_Qnli-dev_euclidean_f1_threshold": 16.79446792602539, "eval_Qnli-dev_euclidean_precision": 0.5638888888888889, "eval_Qnli-dev_euclidean_recall": 0.8601694915254238, "eval_Qnli-dev_manhattan_accuracy": 0.705078125, "eval_Qnli-dev_manhattan_accuracy_threshold": 294.1890563964844, "eval_Qnli-dev_manhattan_ap": 0.7580931618042414, "eval_Qnli-dev_manhattan_f1": 0.6812080536912752, "eval_Qnli-dev_manhattan_f1_threshold": 354.9899597167969, "eval_Qnli-dev_manhattan_precision": 0.5638888888888889, "eval_Qnli-dev_manhattan_recall": 0.8601694915254238, "eval_Qnli-dev_max_accuracy": 0.705078125, "eval_Qnli-dev_max_accuracy_threshold": 367.6085510253906, "eval_Qnli-dev_max_ap": 0.7580931618042414, "eval_Qnli-dev_max_f1": 0.6812080536912752, "eval_Qnli-dev_max_f1_threshold": 354.9899597167969, "eval_Qnli-dev_max_precision": 0.5657142857142857, "eval_Qnli-dev_max_recall": 0.8771186440677966, "eval_allNLI-dev_cosine_accuracy": 0.72265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8383222818374634, "eval_allNLI-dev_cosine_ap": 0.6090807801044571, "eval_allNLI-dev_cosine_f1": 0.623608017817372, "eval_allNLI-dev_cosine_f1_threshold": 0.7408950924873352, "eval_allNLI-dev_cosine_precision": 0.5072463768115942, "eval_allNLI-dev_cosine_recall": 0.8092485549132948, "eval_allNLI-dev_dot_accuracy": 0.693359375, "eval_allNLI-dev_dot_accuracy_threshold": 372.4272155761719, "eval_allNLI-dev_dot_ap": 0.5367891645145532, "eval_allNLI-dev_dot_f1": 0.5803757828810021, "eval_allNLI-dev_dot_f1_threshold": 300.437744140625, "eval_allNLI-dev_dot_precision": 0.4542483660130719, "eval_allNLI-dev_dot_recall": 0.8034682080924855, "eval_allNLI-dev_euclidean_accuracy": 0.724609375, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.517840385437012, "eval_allNLI-dev_euclidean_ap": 0.6183869555871211, "eval_allNLI-dev_euclidean_f1": 0.6450116009280741, "eval_allNLI-dev_euclidean_f1_threshold": 14.532339096069336, "eval_allNLI-dev_euclidean_precision": 0.5387596899224806, "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, "eval_allNLI-dev_manhattan_accuracy": 0.72265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 263.429443359375, "eval_allNLI-dev_manhattan_ap": 0.6133818291746855, "eval_allNLI-dev_manhattan_f1": 0.6403508771929824, "eval_allNLI-dev_manhattan_f1_threshold": 316.3074951171875, "eval_allNLI-dev_manhattan_precision": 0.5159010600706714, "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, "eval_allNLI-dev_max_accuracy": 0.724609375, "eval_allNLI-dev_max_accuracy_threshold": 372.4272155761719, "eval_allNLI-dev_max_ap": 0.6183869555871211, "eval_allNLI-dev_max_f1": 0.6450116009280741, "eval_allNLI-dev_max_f1_threshold": 316.3074951171875, "eval_allNLI-dev_max_precision": 0.5387596899224806, "eval_allNLI-dev_max_recall": 0.8439306358381503, "eval_sequential_score": 0.7580931618042414, "eval_sts-test_pearson_cosine": 0.8345099662016966, "eval_sts-test_pearson_dot": 0.8217163360084048, "eval_sts-test_pearson_euclidean": 0.8646307702719731, "eval_sts-test_pearson_manhattan": 0.8638614995339742, "eval_sts-test_pearson_max": 0.8646307702719731, "eval_sts-test_spearman_cosine": 0.8682611989377271, "eval_sts-test_spearman_dot": 0.8181228254512923, "eval_sts-test_spearman_euclidean": 0.8633254480650854, "eval_sts-test_spearman_manhattan": 0.8626637111628908, "eval_sts-test_spearman_max": 0.8682611989377271, "eval_vitaminc-pairs_loss": 3.4514224529266357, "eval_vitaminc-pairs_runtime": 3.2245, "eval_vitaminc-pairs_samples_per_second": 39.696, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 1980 }, { "epoch": 2.037037037037037, "eval_negation-triplets_loss": 0.9722065329551697, "eval_negation-triplets_runtime": 0.7656, "eval_negation-triplets_samples_per_second": 167.181, "eval_negation-triplets_steps_per_second": 1.306, "step": 1980 }, { "epoch": 2.037037037037037, "eval_scitail-pairs-pos_loss": 0.15476830303668976, "eval_scitail-pairs-pos_runtime": 0.9054, "eval_scitail-pairs-pos_samples_per_second": 141.373, "eval_scitail-pairs-pos_steps_per_second": 1.104, "step": 1980 }, { "epoch": 2.037037037037037, "eval_scitail-pairs-qa_loss": 0.0009460457949899137, "eval_scitail-pairs-qa_runtime": 0.6019, "eval_scitail-pairs-qa_samples_per_second": 212.665, "eval_scitail-pairs-qa_steps_per_second": 1.661, "step": 1980 }, { "epoch": 2.037037037037037, "eval_xsum-pairs_loss": 0.2883686423301697, "eval_xsum-pairs_runtime": 3.0286, "eval_xsum-pairs_samples_per_second": 42.263, "eval_xsum-pairs_steps_per_second": 0.33, "step": 1980 }, { "epoch": 2.037037037037037, "eval_sciq_pairs_loss": 0.09009577333927155, "eval_sciq_pairs_runtime": 3.5292, "eval_sciq_pairs_samples_per_second": 36.269, "eval_sciq_pairs_steps_per_second": 0.283, "step": 1980 }, { "epoch": 2.037037037037037, "eval_qasc_pairs_loss": 0.16658733785152435, "eval_qasc_pairs_runtime": 0.6249, "eval_qasc_pairs_samples_per_second": 204.848, "eval_qasc_pairs_steps_per_second": 1.6, "step": 1980 }, { "epoch": 2.037037037037037, "eval_openbookqa_pairs_loss": 0.8728303909301758, "eval_openbookqa_pairs_runtime": 0.6024, "eval_openbookqa_pairs_samples_per_second": 212.494, "eval_openbookqa_pairs_steps_per_second": 1.66, "step": 1980 }, { "epoch": 2.037037037037037, "eval_msmarco_pairs_loss": 0.8322451114654541, "eval_msmarco_pairs_runtime": 1.5386, "eval_msmarco_pairs_samples_per_second": 83.195, "eval_msmarco_pairs_steps_per_second": 0.65, "step": 1980 }, { "epoch": 2.037037037037037, "eval_nq_pairs_loss": 0.8093205094337463, "eval_nq_pairs_runtime": 2.9248, "eval_nq_pairs_samples_per_second": 43.764, "eval_nq_pairs_steps_per_second": 0.342, "step": 1980 }, { "epoch": 2.037037037037037, "eval_trivia_pairs_loss": 0.7549027800559998, "eval_trivia_pairs_runtime": 3.4405, "eval_trivia_pairs_samples_per_second": 37.204, "eval_trivia_pairs_steps_per_second": 0.291, "step": 1980 }, { "epoch": 2.037037037037037, "eval_gooaq_pairs_loss": 0.4132753610610962, "eval_gooaq_pairs_runtime": 0.9589, "eval_gooaq_pairs_samples_per_second": 133.492, "eval_gooaq_pairs_steps_per_second": 1.043, "step": 1980 }, { "epoch": 2.037037037037037, "eval_paws-pos_loss": 0.02331428974866867, "eval_paws-pos_runtime": 0.7077, "eval_paws-pos_samples_per_second": 180.861, "eval_paws-pos_steps_per_second": 1.413, "step": 1980 }, { "epoch": 2.037037037037037, "eval_global_dataset_loss": 0.49280592799186707, "eval_global_dataset_runtime": 13.3958, "eval_global_dataset_samples_per_second": 31.054, "eval_global_dataset_steps_per_second": 0.299, "step": 1980 }, { "epoch": 2.038065843621399, "grad_norm": 8.284627914428711, "learning_rate": 2.6384599999115946e-05, "loss": 0.3849, "step": 1981 }, { "epoch": 2.039094650205761, "grad_norm": 9.021122932434082, "learning_rate": 2.6370106463280364e-05, "loss": 0.4461, "step": 1982 }, { "epoch": 2.0401234567901234, "grad_norm": 7.323623180389404, "learning_rate": 2.6355607898376633e-05, "loss": 0.2719, "step": 1983 }, { "epoch": 2.0411522633744856, "grad_norm": 4.637526035308838, "learning_rate": 2.6341104328415194e-05, "loss": 0.1207, "step": 1984 }, { "epoch": 2.042181069958848, "grad_norm": 9.165292739868164, "learning_rate": 2.6326595777414787e-05, "loss": 0.5871, "step": 1985 }, { "epoch": 2.04320987654321, "grad_norm": 7.3292717933654785, "learning_rate": 2.631208226940238e-05, "loss": 0.4427, "step": 1986 }, { "epoch": 2.044238683127572, "grad_norm": 4.765210151672363, "learning_rate": 2.6297563828413177e-05, "loss": 0.1111, "step": 1987 }, { "epoch": 2.045267489711934, "grad_norm": 10.265494346618652, "learning_rate": 2.628304047849053e-05, "loss": 0.5194, "step": 1988 }, { "epoch": 2.0462962962962963, "grad_norm": 4.825836658477783, "learning_rate": 2.626851224368593e-05, "loss": 0.2192, "step": 1989 }, { "epoch": 2.0473251028806585, "grad_norm": 9.529881477355957, "learning_rate": 2.625397914805896e-05, "loss": 0.6813, "step": 1990 }, { "epoch": 2.0483539094650207, "grad_norm": 8.750849723815918, "learning_rate": 2.6239441215677242e-05, "loss": 0.3654, "step": 1991 }, { "epoch": 2.049382716049383, "grad_norm": 7.3342390060424805, "learning_rate": 2.622489847061642e-05, "loss": 0.4521, "step": 1992 }, { "epoch": 2.0504115226337447, "grad_norm": 0.5652264356613159, "learning_rate": 2.62103509369601e-05, "loss": 0.025, "step": 1993 }, { "epoch": 2.051440329218107, "grad_norm": 0.5458277463912964, "learning_rate": 2.6195798638799823e-05, "loss": 0.0074, "step": 1994 }, { "epoch": 2.052469135802469, "grad_norm": 5.055678367614746, "learning_rate": 2.6181241600235016e-05, "loss": 0.1088, "step": 1995 }, { "epoch": 2.0534979423868314, "grad_norm": 4.4560723304748535, "learning_rate": 2.616667984537296e-05, "loss": 0.1488, "step": 1996 }, { "epoch": 2.0545267489711936, "grad_norm": 6.173320293426514, "learning_rate": 2.6152113398328745e-05, "loss": 0.2028, "step": 1997 }, { "epoch": 2.0555555555555554, "grad_norm": 6.1648688316345215, "learning_rate": 2.6137542283225232e-05, "loss": 0.2184, "step": 1998 }, { "epoch": 2.0565843621399176, "grad_norm": 7.91170072555542, "learning_rate": 2.612296652419301e-05, "loss": 0.5343, "step": 1999 }, { "epoch": 2.05761316872428, "grad_norm": 4.491048812866211, "learning_rate": 2.6108386145370363e-05, "loss": 0.1262, "step": 2000 }, { "epoch": 2.05761316872428, "eval_Qnli-dev_cosine_accuracy": 0.70703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7682234048843384, "eval_Qnli-dev_cosine_ap": 0.7510537802067643, "eval_Qnli-dev_cosine_f1": 0.6835443037974683, "eval_Qnli-dev_cosine_f1_threshold": 0.6483661532402039, "eval_Qnli-dev_cosine_precision": 0.5454545454545454, "eval_Qnli-dev_cosine_recall": 0.9152542372881356, "eval_Qnli-dev_dot_accuracy": 0.66015625, "eval_Qnli-dev_dot_accuracy_threshold": 359.16650390625, "eval_Qnli-dev_dot_ap": 0.6893133438021438, "eval_Qnli-dev_dot_f1": 0.6696696696696697, "eval_Qnli-dev_dot_f1_threshold": 266.4525146484375, "eval_Qnli-dev_dot_precision": 0.5186046511627908, "eval_Qnli-dev_dot_recall": 0.9449152542372882, "eval_Qnli-dev_euclidean_accuracy": 0.7109375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.601415634155273, "eval_Qnli-dev_euclidean_ap": 0.7605293151101415, "eval_Qnli-dev_euclidean_f1": 0.6851549755301795, "eval_Qnli-dev_euclidean_f1_threshold": 17.26052474975586, "eval_Qnli-dev_euclidean_precision": 0.5570291777188329, "eval_Qnli-dev_euclidean_recall": 0.8898305084745762, "eval_Qnli-dev_manhattan_accuracy": 0.7109375, "eval_Qnli-dev_manhattan_accuracy_threshold": 290.5079040527344, "eval_Qnli-dev_manhattan_ap": 0.7626097117636834, "eval_Qnli-dev_manhattan_f1": 0.6870503597122302, "eval_Qnli-dev_manhattan_f1_threshold": 342.42266845703125, "eval_Qnli-dev_manhattan_precision": 0.596875, "eval_Qnli-dev_manhattan_recall": 0.809322033898305, "eval_Qnli-dev_max_accuracy": 0.7109375, "eval_Qnli-dev_max_accuracy_threshold": 359.16650390625, "eval_Qnli-dev_max_ap": 0.7626097117636834, "eval_Qnli-dev_max_f1": 0.6870503597122302, "eval_Qnli-dev_max_f1_threshold": 342.42266845703125, "eval_Qnli-dev_max_precision": 0.596875, "eval_Qnli-dev_max_recall": 0.9449152542372882, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8330150842666626, "eval_allNLI-dev_cosine_ap": 0.6144608435852125, "eval_allNLI-dev_cosine_f1": 0.6232558139534884, "eval_allNLI-dev_cosine_f1_threshold": 0.7408770322799683, "eval_allNLI-dev_cosine_precision": 0.5214007782101168, "eval_allNLI-dev_cosine_recall": 0.7745664739884393, "eval_allNLI-dev_dot_accuracy": 0.708984375, "eval_allNLI-dev_dot_accuracy_threshold": 372.2314758300781, "eval_allNLI-dev_dot_ap": 0.5396368385808891, "eval_allNLI-dev_dot_f1": 0.5879732739420935, "eval_allNLI-dev_dot_f1_threshold": 303.21478271484375, "eval_allNLI-dev_dot_precision": 0.4782608695652174, "eval_allNLI-dev_dot_recall": 0.7630057803468208, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.515340805053711, "eval_allNLI-dev_euclidean_ap": 0.6229221967651163, "eval_allNLI-dev_euclidean_f1": 0.6367713004484306, "eval_allNLI-dev_euclidean_f1_threshold": 15.163028717041016, "eval_allNLI-dev_euclidean_precision": 0.5201465201465202, "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, "eval_allNLI-dev_manhattan_accuracy": 0.728515625, "eval_allNLI-dev_manhattan_accuracy_threshold": 262.06451416015625, "eval_allNLI-dev_manhattan_ap": 0.6200187620678886, "eval_allNLI-dev_manhattan_f1": 0.6374133949191686, "eval_allNLI-dev_manhattan_f1_threshold": 313.9089050292969, "eval_allNLI-dev_manhattan_precision": 0.5307692307692308, "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 372.2314758300781, "eval_allNLI-dev_max_ap": 0.6229221967651163, "eval_allNLI-dev_max_f1": 0.6374133949191686, "eval_allNLI-dev_max_f1_threshold": 313.9089050292969, "eval_allNLI-dev_max_precision": 0.5307692307692308, "eval_allNLI-dev_max_recall": 0.8208092485549133, "eval_sequential_score": 0.7626097117636834, "eval_sts-test_pearson_cosine": 0.8439968343279236, "eval_sts-test_pearson_dot": 0.8315464594535094, "eval_sts-test_pearson_euclidean": 0.8690583950822408, "eval_sts-test_pearson_manhattan": 0.8680001653553668, "eval_sts-test_pearson_max": 0.8690583950822408, "eval_sts-test_spearman_cosine": 0.8698704066136644, "eval_sts-test_spearman_dot": 0.8205329493847875, "eval_sts-test_spearman_euclidean": 0.8651157670283098, "eval_sts-test_spearman_manhattan": 0.8637660242334838, "eval_sts-test_spearman_max": 0.8698704066136644, "eval_vitaminc-pairs_loss": 3.2383663654327393, "eval_vitaminc-pairs_runtime": 3.226, "eval_vitaminc-pairs_samples_per_second": 39.678, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 2000 }, { "epoch": 2.05761316872428, "eval_negation-triplets_loss": 1.002803087234497, "eval_negation-triplets_runtime": 0.7586, "eval_negation-triplets_samples_per_second": 168.721, "eval_negation-triplets_steps_per_second": 1.318, "step": 2000 }, { "epoch": 2.05761316872428, "eval_scitail-pairs-pos_loss": 0.13571438193321228, "eval_scitail-pairs-pos_runtime": 0.9128, "eval_scitail-pairs-pos_samples_per_second": 140.234, "eval_scitail-pairs-pos_steps_per_second": 1.096, "step": 2000 }, { "epoch": 2.05761316872428, "eval_scitail-pairs-qa_loss": 0.000615874829236418, "eval_scitail-pairs-qa_runtime": 0.6036, "eval_scitail-pairs-qa_samples_per_second": 212.044, "eval_scitail-pairs-qa_steps_per_second": 1.657, "step": 2000 }, { "epoch": 2.05761316872428, "eval_xsum-pairs_loss": 0.25147178769111633, "eval_xsum-pairs_runtime": 3.025, "eval_xsum-pairs_samples_per_second": 42.314, "eval_xsum-pairs_steps_per_second": 0.331, "step": 2000 }, { "epoch": 2.05761316872428, "eval_sciq_pairs_loss": 0.09387277066707611, "eval_sciq_pairs_runtime": 3.5238, "eval_sciq_pairs_samples_per_second": 36.325, "eval_sciq_pairs_steps_per_second": 0.284, "step": 2000 }, { "epoch": 2.05761316872428, "eval_qasc_pairs_loss": 0.16152480244636536, "eval_qasc_pairs_runtime": 0.6221, "eval_qasc_pairs_samples_per_second": 205.763, "eval_qasc_pairs_steps_per_second": 1.608, "step": 2000 }, { "epoch": 2.05761316872428, "eval_openbookqa_pairs_loss": 0.8188515901565552, "eval_openbookqa_pairs_runtime": 0.597, "eval_openbookqa_pairs_samples_per_second": 214.408, "eval_openbookqa_pairs_steps_per_second": 1.675, "step": 2000 }, { "epoch": 2.05761316872428, "eval_msmarco_pairs_loss": 0.8767502307891846, "eval_msmarco_pairs_runtime": 1.5218, "eval_msmarco_pairs_samples_per_second": 84.109, "eval_msmarco_pairs_steps_per_second": 0.657, "step": 2000 }, { "epoch": 2.05761316872428, "eval_nq_pairs_loss": 0.6645520925521851, "eval_nq_pairs_runtime": 2.9058, "eval_nq_pairs_samples_per_second": 44.049, "eval_nq_pairs_steps_per_second": 0.344, "step": 2000 }, { "epoch": 2.05761316872428, "eval_trivia_pairs_loss": 0.8132636547088623, "eval_trivia_pairs_runtime": 3.4612, "eval_trivia_pairs_samples_per_second": 36.982, "eval_trivia_pairs_steps_per_second": 0.289, "step": 2000 }, { "epoch": 2.05761316872428, "eval_gooaq_pairs_loss": 0.41634610295295715, "eval_gooaq_pairs_runtime": 0.9655, "eval_gooaq_pairs_samples_per_second": 132.57, "eval_gooaq_pairs_steps_per_second": 1.036, "step": 2000 }, { "epoch": 2.05761316872428, "eval_paws-pos_loss": 0.023877838626503944, "eval_paws-pos_runtime": 0.7028, "eval_paws-pos_samples_per_second": 182.128, "eval_paws-pos_steps_per_second": 1.423, "step": 2000 }, { "epoch": 2.05761316872428, "eval_global_dataset_loss": 0.46081268787384033, "eval_global_dataset_runtime": 13.403, "eval_global_dataset_samples_per_second": 31.038, "eval_global_dataset_steps_per_second": 0.298, "step": 2000 }, { "epoch": 2.058641975308642, "grad_norm": 0.42414379119873047, "learning_rate": 2.609380117090322e-05, "loss": 0.0128, "step": 2001 }, { "epoch": 2.0596707818930042, "grad_norm": 11.059983253479004, "learning_rate": 2.6079211624945133e-05, "loss": 0.7624, "step": 2002 }, { "epoch": 2.0606995884773665, "grad_norm": 0.11559353023767471, "learning_rate": 2.6064617531657205e-05, "loss": 0.0013, "step": 2003 }, { "epoch": 2.0617283950617282, "grad_norm": 9.283576965332031, "learning_rate": 2.6050018915208087e-05, "loss": 0.4478, "step": 2004 }, { "epoch": 2.0627572016460904, "grad_norm": 2.7311601638793945, "learning_rate": 2.603541579977391e-05, "loss": 0.0241, "step": 2005 }, { "epoch": 2.0637860082304527, "grad_norm": 0.4573628902435303, "learning_rate": 2.602080820953826e-05, "loss": 0.0063, "step": 2006 }, { "epoch": 2.064814814814815, "grad_norm": 10.511451721191406, "learning_rate": 2.600619616869214e-05, "loss": 0.6027, "step": 2007 }, { "epoch": 2.065843621399177, "grad_norm": 11.383633613586426, "learning_rate": 2.5991579701433906e-05, "loss": 0.6883, "step": 2008 }, { "epoch": 2.0668724279835393, "grad_norm": 6.3434648513793945, "learning_rate": 2.597695883196926e-05, "loss": 0.1842, "step": 2009 }, { "epoch": 2.067901234567901, "grad_norm": 6.79525899887085, "learning_rate": 2.596233358451119e-05, "loss": 0.2991, "step": 2010 }, { "epoch": 2.0689300411522633, "grad_norm": 3.5601322650909424, "learning_rate": 2.5947703983279935e-05, "loss": 0.096, "step": 2011 }, { "epoch": 2.0699588477366255, "grad_norm": 4.522330284118652, "learning_rate": 2.5933070052502936e-05, "loss": 0.2244, "step": 2012 }, { "epoch": 2.0709876543209877, "grad_norm": 9.466999053955078, "learning_rate": 2.5918431816414814e-05, "loss": 0.5941, "step": 2013 }, { "epoch": 2.07201646090535, "grad_norm": 8.346899032592773, "learning_rate": 2.5903789299257317e-05, "loss": 0.4248, "step": 2014 }, { "epoch": 2.0730452674897117, "grad_norm": 5.5073370933532715, "learning_rate": 2.5889142525279284e-05, "loss": 0.3148, "step": 2015 }, { "epoch": 2.074074074074074, "grad_norm": 5.379796981811523, "learning_rate": 2.5874491518736604e-05, "loss": 0.1327, "step": 2016 }, { "epoch": 2.075102880658436, "grad_norm": 9.268583297729492, "learning_rate": 2.5859836303892156e-05, "loss": 0.447, "step": 2017 }, { "epoch": 2.0761316872427984, "grad_norm": 2.687713146209717, "learning_rate": 2.584517690501583e-05, "loss": 0.0471, "step": 2018 }, { "epoch": 2.0771604938271606, "grad_norm": 0.9793359041213989, "learning_rate": 2.5830513346384398e-05, "loss": 0.0168, "step": 2019 }, { "epoch": 2.078189300411523, "grad_norm": 9.080259323120117, "learning_rate": 2.581584565228156e-05, "loss": 0.4154, "step": 2020 }, { "epoch": 2.078189300411523, "eval_Qnli-dev_cosine_accuracy": 0.70703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7904222011566162, "eval_Qnli-dev_cosine_ap": 0.7518479061341453, "eval_Qnli-dev_cosine_f1": 0.6824324324324325, "eval_Qnli-dev_cosine_f1_threshold": 0.6756579279899597, "eval_Qnli-dev_cosine_precision": 0.5674157303370787, "eval_Qnli-dev_cosine_recall": 0.8559322033898306, "eval_Qnli-dev_dot_accuracy": 0.66796875, "eval_Qnli-dev_dot_accuracy_threshold": 370.0274658203125, "eval_Qnli-dev_dot_ap": 0.6955155895020637, "eval_Qnli-dev_dot_f1": 0.6616084977238239, "eval_Qnli-dev_dot_f1_threshold": 267.6722717285156, "eval_Qnli-dev_dot_precision": 0.5153664302600472, "eval_Qnli-dev_dot_recall": 0.923728813559322, "eval_Qnli-dev_euclidean_accuracy": 0.71484375, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.895347595214844, "eval_Qnli-dev_euclidean_ap": 0.7601878991676787, "eval_Qnli-dev_euclidean_f1": 0.6854130052724077, "eval_Qnli-dev_euclidean_f1_threshold": 16.323875427246094, "eval_Qnli-dev_euclidean_precision": 0.5855855855855856, "eval_Qnli-dev_euclidean_recall": 0.826271186440678, "eval_Qnli-dev_manhattan_accuracy": 0.71484375, "eval_Qnli-dev_manhattan_accuracy_threshold": 295.86700439453125, "eval_Qnli-dev_manhattan_ap": 0.7636402467773312, "eval_Qnli-dev_manhattan_f1": 0.6891651865008882, "eval_Qnli-dev_manhattan_f1_threshold": 342.58648681640625, "eval_Qnli-dev_manhattan_precision": 0.5932721712538226, "eval_Qnli-dev_manhattan_recall": 0.8220338983050848, "eval_Qnli-dev_max_accuracy": 0.71484375, "eval_Qnli-dev_max_accuracy_threshold": 370.0274658203125, "eval_Qnli-dev_max_ap": 0.7636402467773312, "eval_Qnli-dev_max_f1": 0.6891651865008882, "eval_Qnli-dev_max_f1_threshold": 342.58648681640625, "eval_Qnli-dev_max_precision": 0.5932721712538226, "eval_Qnli-dev_max_recall": 0.923728813559322, "eval_allNLI-dev_cosine_accuracy": 0.736328125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8497622013092041, "eval_allNLI-dev_cosine_ap": 0.6279407506111134, "eval_allNLI-dev_cosine_f1": 0.6337078651685394, "eval_allNLI-dev_cosine_f1_threshold": 0.7338807582855225, "eval_allNLI-dev_cosine_precision": 0.5183823529411765, "eval_allNLI-dev_cosine_recall": 0.815028901734104, "eval_allNLI-dev_dot_accuracy": 0.712890625, "eval_allNLI-dev_dot_accuracy_threshold": 371.5069580078125, "eval_allNLI-dev_dot_ap": 0.5554045589310075, "eval_allNLI-dev_dot_f1": 0.5982142857142857, "eval_allNLI-dev_dot_f1_threshold": 308.48822021484375, "eval_allNLI-dev_dot_precision": 0.48727272727272725, "eval_allNLI-dev_dot_recall": 0.7745664739884393, "eval_allNLI-dev_euclidean_accuracy": 0.7421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.008502960205078, "eval_allNLI-dev_euclidean_ap": 0.6391160730245441, "eval_allNLI-dev_euclidean_f1": 0.6438356164383561, "eval_allNLI-dev_euclidean_f1_threshold": 15.02491569519043, "eval_allNLI-dev_euclidean_precision": 0.5320754716981132, "eval_allNLI-dev_euclidean_recall": 0.815028901734104, "eval_allNLI-dev_manhattan_accuracy": 0.734375, "eval_allNLI-dev_manhattan_accuracy_threshold": 238.62452697753906, "eval_allNLI-dev_manhattan_ap": 0.634583204732944, "eval_allNLI-dev_manhattan_f1": 0.6514806378132119, "eval_allNLI-dev_manhattan_f1_threshold": 316.03973388671875, "eval_allNLI-dev_manhattan_precision": 0.5375939849624061, "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, "eval_allNLI-dev_max_accuracy": 0.7421875, "eval_allNLI-dev_max_accuracy_threshold": 371.5069580078125, "eval_allNLI-dev_max_ap": 0.6391160730245441, "eval_allNLI-dev_max_f1": 0.6514806378132119, "eval_allNLI-dev_max_f1_threshold": 316.03973388671875, "eval_allNLI-dev_max_precision": 0.5375939849624061, "eval_allNLI-dev_max_recall": 0.8265895953757225, "eval_sequential_score": 0.7636402467773312, "eval_sts-test_pearson_cosine": 0.8422361279891084, "eval_sts-test_pearson_dot": 0.8296134956353065, "eval_sts-test_pearson_euclidean": 0.8714590085163401, "eval_sts-test_pearson_manhattan": 0.8698635898461213, "eval_sts-test_pearson_max": 0.8714590085163401, "eval_sts-test_spearman_cosine": 0.8724577037234441, "eval_sts-test_spearman_dot": 0.8229498307587528, "eval_sts-test_spearman_euclidean": 0.868813340000774, "eval_sts-test_spearman_manhattan": 0.8667433256538535, "eval_sts-test_spearman_max": 0.8724577037234441, "eval_vitaminc-pairs_loss": 3.5048067569732666, "eval_vitaminc-pairs_runtime": 3.2149, "eval_vitaminc-pairs_samples_per_second": 39.815, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 2020 }, { "epoch": 2.078189300411523, "eval_negation-triplets_loss": 0.99112468957901, "eval_negation-triplets_runtime": 0.7681, "eval_negation-triplets_samples_per_second": 166.635, "eval_negation-triplets_steps_per_second": 1.302, "step": 2020 }, { "epoch": 2.078189300411523, "eval_scitail-pairs-pos_loss": 0.14013606309890747, "eval_scitail-pairs-pos_runtime": 0.9281, "eval_scitail-pairs-pos_samples_per_second": 137.913, "eval_scitail-pairs-pos_steps_per_second": 1.077, "step": 2020 }, { "epoch": 2.078189300411523, "eval_scitail-pairs-qa_loss": 0.00047457279288209975, "eval_scitail-pairs-qa_runtime": 0.6071, "eval_scitail-pairs-qa_samples_per_second": 210.837, "eval_scitail-pairs-qa_steps_per_second": 1.647, "step": 2020 }, { "epoch": 2.078189300411523, "eval_xsum-pairs_loss": 0.26533740758895874, "eval_xsum-pairs_runtime": 3.0384, "eval_xsum-pairs_samples_per_second": 42.127, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2020 }, { "epoch": 2.078189300411523, "eval_sciq_pairs_loss": 0.09102991223335266, "eval_sciq_pairs_runtime": 3.517, "eval_sciq_pairs_samples_per_second": 36.395, "eval_sciq_pairs_steps_per_second": 0.284, "step": 2020 }, { "epoch": 2.078189300411523, "eval_qasc_pairs_loss": 0.16256316006183624, "eval_qasc_pairs_runtime": 0.6336, "eval_qasc_pairs_samples_per_second": 202.012, "eval_qasc_pairs_steps_per_second": 1.578, "step": 2020 }, { "epoch": 2.078189300411523, "eval_openbookqa_pairs_loss": 0.8943702578544617, "eval_openbookqa_pairs_runtime": 0.6033, "eval_openbookqa_pairs_samples_per_second": 212.157, "eval_openbookqa_pairs_steps_per_second": 1.657, "step": 2020 }, { "epoch": 2.078189300411523, "eval_msmarco_pairs_loss": 0.8894786834716797, "eval_msmarco_pairs_runtime": 1.5394, "eval_msmarco_pairs_samples_per_second": 83.152, "eval_msmarco_pairs_steps_per_second": 0.65, "step": 2020 }, { "epoch": 2.078189300411523, "eval_nq_pairs_loss": 0.667719841003418, "eval_nq_pairs_runtime": 2.906, "eval_nq_pairs_samples_per_second": 44.047, "eval_nq_pairs_steps_per_second": 0.344, "step": 2020 }, { "epoch": 2.078189300411523, "eval_trivia_pairs_loss": 0.7819646000862122, "eval_trivia_pairs_runtime": 3.4554, "eval_trivia_pairs_samples_per_second": 37.044, "eval_trivia_pairs_steps_per_second": 0.289, "step": 2020 }, { "epoch": 2.078189300411523, "eval_gooaq_pairs_loss": 0.4292497932910919, "eval_gooaq_pairs_runtime": 0.9612, "eval_gooaq_pairs_samples_per_second": 133.169, "eval_gooaq_pairs_steps_per_second": 1.04, "step": 2020 }, { "epoch": 2.078189300411523, "eval_paws-pos_loss": 0.02286423370242119, "eval_paws-pos_runtime": 0.7141, "eval_paws-pos_samples_per_second": 179.242, "eval_paws-pos_steps_per_second": 1.4, "step": 2020 }, { "epoch": 2.078189300411523, "eval_global_dataset_loss": 0.5070220232009888, "eval_global_dataset_runtime": 13.4139, "eval_global_dataset_samples_per_second": 31.013, "eval_global_dataset_steps_per_second": 0.298, "step": 2020 }, { "epoch": 2.0792181069958846, "grad_norm": 10.09373664855957, "learning_rate": 2.580117384699783e-05, "loss": 0.5803, "step": 2021 }, { "epoch": 2.080246913580247, "grad_norm": 3.2723073959350586, "learning_rate": 2.5786497954830568e-05, "loss": 0.0786, "step": 2022 }, { "epoch": 2.081275720164609, "grad_norm": 4.15331506729126, "learning_rate": 2.5771818000083865e-05, "loss": 0.0877, "step": 2023 }, { "epoch": 2.0823045267489713, "grad_norm": 7.957801342010498, "learning_rate": 2.575713400706857e-05, "loss": 0.3655, "step": 2024 }, { "epoch": 2.0833333333333335, "grad_norm": 8.468852043151855, "learning_rate": 2.574244600010219e-05, "loss": 0.2755, "step": 2025 }, { "epoch": 2.0843621399176957, "grad_norm": 4.392659664154053, "learning_rate": 2.572775400350891e-05, "loss": 0.0915, "step": 2026 }, { "epoch": 2.0853909465020575, "grad_norm": 2.75890851020813, "learning_rate": 2.5713058041619504e-05, "loss": 0.0344, "step": 2027 }, { "epoch": 2.0864197530864197, "grad_norm": 5.2819437980651855, "learning_rate": 2.5698358138771318e-05, "loss": 0.1478, "step": 2028 }, { "epoch": 2.087448559670782, "grad_norm": 4.025913238525391, "learning_rate": 2.568365431930822e-05, "loss": 0.1627, "step": 2029 }, { "epoch": 2.088477366255144, "grad_norm": 4.903158187866211, "learning_rate": 2.566894660758056e-05, "loss": 0.1453, "step": 2030 }, { "epoch": 2.0895061728395063, "grad_norm": 4.8980393409729, "learning_rate": 2.5654235027945153e-05, "loss": 0.1726, "step": 2031 }, { "epoch": 2.090534979423868, "grad_norm": 10.14993953704834, "learning_rate": 2.5639519604765206e-05, "loss": 0.6505, "step": 2032 }, { "epoch": 2.0915637860082303, "grad_norm": 5.390047550201416, "learning_rate": 2.5624800362410293e-05, "loss": 0.1241, "step": 2033 }, { "epoch": 2.0925925925925926, "grad_norm": 8.93951416015625, "learning_rate": 2.5610077325256296e-05, "loss": 0.4726, "step": 2034 }, { "epoch": 2.093621399176955, "grad_norm": 1.5915225744247437, "learning_rate": 2.559535051768542e-05, "loss": 0.0298, "step": 2035 }, { "epoch": 2.094650205761317, "grad_norm": 9.25654125213623, "learning_rate": 2.558061996408608e-05, "loss": 0.4278, "step": 2036 }, { "epoch": 2.095679012345679, "grad_norm": 5.480092525482178, "learning_rate": 2.556588568885291e-05, "loss": 0.1557, "step": 2037 }, { "epoch": 2.096707818930041, "grad_norm": 10.635049819946289, "learning_rate": 2.5551147716386696e-05, "loss": 0.499, "step": 2038 }, { "epoch": 2.097736625514403, "grad_norm": 4.839813709259033, "learning_rate": 2.553640607109437e-05, "loss": 0.1144, "step": 2039 }, { "epoch": 2.0987654320987654, "grad_norm": 0.09921301901340485, "learning_rate": 2.5521660777388916e-05, "loss": 0.0013, "step": 2040 }, { "epoch": 2.0987654320987654, "eval_Qnli-dev_cosine_accuracy": 0.71875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7668793201446533, "eval_Qnli-dev_cosine_ap": 0.7622251014689494, "eval_Qnli-dev_cosine_f1": 0.6905537459283388, "eval_Qnli-dev_cosine_f1_threshold": 0.6393879652023315, "eval_Qnli-dev_cosine_precision": 0.5608465608465608, "eval_Qnli-dev_cosine_recall": 0.8983050847457628, "eval_Qnli-dev_dot_accuracy": 0.673828125, "eval_Qnli-dev_dot_accuracy_threshold": 346.4505615234375, "eval_Qnli-dev_dot_ap": 0.7133157603299303, "eval_Qnli-dev_dot_f1": 0.6750788643533123, "eval_Qnli-dev_dot_f1_threshold": 265.27191162109375, "eval_Qnli-dev_dot_precision": 0.5376884422110553, "eval_Qnli-dev_dot_recall": 0.9067796610169492, "eval_Qnli-dev_euclidean_accuracy": 0.728515625, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.158382415771484, "eval_Qnli-dev_euclidean_ap": 0.7683574103231411, "eval_Qnli-dev_euclidean_f1": 0.6934865900383143, "eval_Qnli-dev_euclidean_f1_threshold": 16.00537872314453, "eval_Qnli-dev_euclidean_precision": 0.6328671328671329, "eval_Qnli-dev_euclidean_recall": 0.7669491525423728, "eval_Qnli-dev_manhattan_accuracy": 0.71875, "eval_Qnli-dev_manhattan_accuracy_threshold": 297.2159423828125, "eval_Qnli-dev_manhattan_ap": 0.7687026270805394, "eval_Qnli-dev_manhattan_f1": 0.6946564885496184, "eval_Qnli-dev_manhattan_f1_threshold": 338.4189453125, "eval_Qnli-dev_manhattan_precision": 0.6319444444444444, "eval_Qnli-dev_manhattan_recall": 0.7711864406779662, "eval_Qnli-dev_max_accuracy": 0.728515625, "eval_Qnli-dev_max_accuracy_threshold": 346.4505615234375, "eval_Qnli-dev_max_ap": 0.7687026270805394, "eval_Qnli-dev_max_f1": 0.6946564885496184, "eval_Qnli-dev_max_f1_threshold": 338.4189453125, "eval_Qnli-dev_max_precision": 0.6328671328671329, "eval_Qnli-dev_max_recall": 0.9067796610169492, "eval_allNLI-dev_cosine_accuracy": 0.736328125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8452146053314209, "eval_allNLI-dev_cosine_ap": 0.6275018164212229, "eval_allNLI-dev_cosine_f1": 0.639618138424821, "eval_allNLI-dev_cosine_f1_threshold": 0.7643710970878601, "eval_allNLI-dev_cosine_precision": 0.5447154471544715, "eval_allNLI-dev_cosine_recall": 0.7745664739884393, "eval_allNLI-dev_dot_accuracy": 0.701171875, "eval_allNLI-dev_dot_accuracy_threshold": 361.28192138671875, "eval_allNLI-dev_dot_ap": 0.5517615704791942, "eval_allNLI-dev_dot_f1": 0.592039800995025, "eval_allNLI-dev_dot_f1_threshold": 328.10443115234375, "eval_allNLI-dev_dot_precision": 0.519650655021834, "eval_allNLI-dev_dot_recall": 0.6878612716763006, "eval_allNLI-dev_euclidean_accuracy": 0.744140625, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.57768726348877, "eval_allNLI-dev_euclidean_ap": 0.6387425565934844, "eval_allNLI-dev_euclidean_f1": 0.6481012658227848, "eval_allNLI-dev_euclidean_f1_threshold": 13.804973602294922, "eval_allNLI-dev_euclidean_precision": 0.5765765765765766, "eval_allNLI-dev_euclidean_recall": 0.7398843930635838, "eval_allNLI-dev_manhattan_accuracy": 0.740234375, "eval_allNLI-dev_manhattan_accuracy_threshold": 256.33929443359375, "eval_allNLI-dev_manhattan_ap": 0.634668026581342, "eval_allNLI-dev_manhattan_f1": 0.6467661691542288, "eval_allNLI-dev_manhattan_f1_threshold": 295.89422607421875, "eval_allNLI-dev_manhattan_precision": 0.5676855895196506, "eval_allNLI-dev_manhattan_recall": 0.7514450867052023, "eval_allNLI-dev_max_accuracy": 0.744140625, "eval_allNLI-dev_max_accuracy_threshold": 361.28192138671875, "eval_allNLI-dev_max_ap": 0.6387425565934844, "eval_allNLI-dev_max_f1": 0.6481012658227848, "eval_allNLI-dev_max_f1_threshold": 328.10443115234375, "eval_allNLI-dev_max_precision": 0.5765765765765766, "eval_allNLI-dev_max_recall": 0.7745664739884393, "eval_sequential_score": 0.7687026270805394, "eval_sts-test_pearson_cosine": 0.8345019444404698, "eval_sts-test_pearson_dot": 0.8151075704022561, "eval_sts-test_pearson_euclidean": 0.8657053003604382, "eval_sts-test_pearson_manhattan": 0.8633038097057222, "eval_sts-test_pearson_max": 0.8657053003604382, "eval_sts-test_spearman_cosine": 0.8688792138575424, "eval_sts-test_spearman_dot": 0.8095249893564697, "eval_sts-test_spearman_euclidean": 0.8656618219198077, "eval_sts-test_spearman_manhattan": 0.8632414051961924, "eval_sts-test_spearman_max": 0.8688792138575424, "eval_vitaminc-pairs_loss": 3.372237205505371, "eval_vitaminc-pairs_runtime": 3.2264, "eval_vitaminc-pairs_samples_per_second": 39.673, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 2040 }, { "epoch": 2.0987654320987654, "eval_negation-triplets_loss": 0.9596391320228577, "eval_negation-triplets_runtime": 0.7685, "eval_negation-triplets_samples_per_second": 166.552, "eval_negation-triplets_steps_per_second": 1.301, "step": 2040 }, { "epoch": 2.0987654320987654, "eval_scitail-pairs-pos_loss": 0.14105528593063354, "eval_scitail-pairs-pos_runtime": 0.9, "eval_scitail-pairs-pos_samples_per_second": 142.217, "eval_scitail-pairs-pos_steps_per_second": 1.111, "step": 2040 }, { "epoch": 2.0987654320987654, "eval_scitail-pairs-qa_loss": 0.0004064729146193713, "eval_scitail-pairs-qa_runtime": 0.6026, "eval_scitail-pairs-qa_samples_per_second": 212.415, "eval_scitail-pairs-qa_steps_per_second": 1.659, "step": 2040 }, { "epoch": 2.0987654320987654, "eval_xsum-pairs_loss": 0.25447919964790344, "eval_xsum-pairs_runtime": 3.0356, "eval_xsum-pairs_samples_per_second": 42.166, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2040 }, { "epoch": 2.0987654320987654, "eval_sciq_pairs_loss": 0.09601989388465881, "eval_sciq_pairs_runtime": 3.5482, "eval_sciq_pairs_samples_per_second": 36.074, "eval_sciq_pairs_steps_per_second": 0.282, "step": 2040 }, { "epoch": 2.0987654320987654, "eval_qasc_pairs_loss": 0.16032877564430237, "eval_qasc_pairs_runtime": 0.628, "eval_qasc_pairs_samples_per_second": 203.829, "eval_qasc_pairs_steps_per_second": 1.592, "step": 2040 }, { "epoch": 2.0987654320987654, "eval_openbookqa_pairs_loss": 0.8722372055053711, "eval_openbookqa_pairs_runtime": 0.5946, "eval_openbookqa_pairs_samples_per_second": 215.28, "eval_openbookqa_pairs_steps_per_second": 1.682, "step": 2040 }, { "epoch": 2.0987654320987654, "eval_msmarco_pairs_loss": 0.9411458969116211, "eval_msmarco_pairs_runtime": 1.5246, "eval_msmarco_pairs_samples_per_second": 83.956, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 2040 }, { "epoch": 2.0987654320987654, "eval_nq_pairs_loss": 0.6812416911125183, "eval_nq_pairs_runtime": 2.9197, "eval_nq_pairs_samples_per_second": 43.84, "eval_nq_pairs_steps_per_second": 0.342, "step": 2040 }, { "epoch": 2.0987654320987654, "eval_trivia_pairs_loss": 0.8062442541122437, "eval_trivia_pairs_runtime": 3.4404, "eval_trivia_pairs_samples_per_second": 37.205, "eval_trivia_pairs_steps_per_second": 0.291, "step": 2040 }, { "epoch": 2.0987654320987654, "eval_gooaq_pairs_loss": 0.4616175889968872, "eval_gooaq_pairs_runtime": 0.9621, "eval_gooaq_pairs_samples_per_second": 133.039, "eval_gooaq_pairs_steps_per_second": 1.039, "step": 2040 }, { "epoch": 2.0987654320987654, "eval_paws-pos_loss": 0.02278071828186512, "eval_paws-pos_runtime": 0.7094, "eval_paws-pos_samples_per_second": 180.443, "eval_paws-pos_steps_per_second": 1.41, "step": 2040 }, { "epoch": 2.0987654320987654, "eval_global_dataset_loss": 0.4955148696899414, "eval_global_dataset_runtime": 13.4048, "eval_global_dataset_samples_per_second": 31.034, "eval_global_dataset_steps_per_second": 0.298, "step": 2040 }, { "epoch": 2.0997942386831276, "grad_norm": 3.425891876220703, "learning_rate": 2.5506911859689383e-05, "loss": 0.0587, "step": 2041 }, { "epoch": 2.10082304526749, "grad_norm": 9.27571964263916, "learning_rate": 2.5492159342420798e-05, "loss": 0.3979, "step": 2042 }, { "epoch": 2.1018518518518516, "grad_norm": 0.14763645827770233, "learning_rate": 2.547740325001419e-05, "loss": 0.0009, "step": 2043 }, { "epoch": 2.102880658436214, "grad_norm": 7.345943450927734, "learning_rate": 2.5462643606906473e-05, "loss": 0.308, "step": 2044 }, { "epoch": 2.103909465020576, "grad_norm": 8.69579792022705, "learning_rate": 2.544788043754044e-05, "loss": 0.4668, "step": 2045 }, { "epoch": 2.1049382716049383, "grad_norm": 12.675374031066895, "learning_rate": 2.5433113766364742e-05, "loss": 0.6717, "step": 2046 }, { "epoch": 2.1059670781893005, "grad_norm": 13.579185485839844, "learning_rate": 2.5418343617833826e-05, "loss": 0.9457, "step": 2047 }, { "epoch": 2.1069958847736627, "grad_norm": 0.1838522106409073, "learning_rate": 2.5403570016407892e-05, "loss": 0.0023, "step": 2048 }, { "epoch": 2.1080246913580245, "grad_norm": 6.007643222808838, "learning_rate": 2.5388792986552858e-05, "loss": 0.3713, "step": 2049 }, { "epoch": 2.1090534979423867, "grad_norm": 9.373748779296875, "learning_rate": 2.537401255274032e-05, "loss": 0.5438, "step": 2050 }, { "epoch": 2.110082304526749, "grad_norm": 5.532814025878906, "learning_rate": 2.535922873944752e-05, "loss": 0.0994, "step": 2051 }, { "epoch": 2.111111111111111, "grad_norm": 5.089415550231934, "learning_rate": 2.5344441571157284e-05, "loss": 0.1818, "step": 2052 }, { "epoch": 2.1121399176954734, "grad_norm": 3.955972909927368, "learning_rate": 2.5329651072357998e-05, "loss": 0.1319, "step": 2053 }, { "epoch": 2.1131687242798356, "grad_norm": 3.837581157684326, "learning_rate": 2.5314857267543565e-05, "loss": 0.0868, "step": 2054 }, { "epoch": 2.1141975308641974, "grad_norm": 10.354877471923828, "learning_rate": 2.530006018121337e-05, "loss": 0.5951, "step": 2055 }, { "epoch": 2.1152263374485596, "grad_norm": 5.332611560821533, "learning_rate": 2.528525983787222e-05, "loss": 0.2214, "step": 2056 }, { "epoch": 2.116255144032922, "grad_norm": 5.781221389770508, "learning_rate": 2.527045626203032e-05, "loss": 0.1633, "step": 2057 }, { "epoch": 2.117283950617284, "grad_norm": 4.2456865310668945, "learning_rate": 2.5255649478203235e-05, "loss": 0.128, "step": 2058 }, { "epoch": 2.1183127572016462, "grad_norm": 2.930485963821411, "learning_rate": 2.5240839510911826e-05, "loss": 0.0727, "step": 2059 }, { "epoch": 2.119341563786008, "grad_norm": 3.6109559535980225, "learning_rate": 2.5226026384682246e-05, "loss": 0.1023, "step": 2060 }, { "epoch": 2.119341563786008, "eval_Qnli-dev_cosine_accuracy": 0.720703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7480126023292542, "eval_Qnli-dev_cosine_ap": 0.767977183157839, "eval_Qnli-dev_cosine_f1": 0.7039337474120082, "eval_Qnli-dev_cosine_f1_threshold": 0.7480126023292542, "eval_Qnli-dev_cosine_precision": 0.6882591093117408, "eval_Qnli-dev_cosine_recall": 0.7203389830508474, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 341.8458251953125, "eval_Qnli-dev_dot_ap": 0.6935774131125908, "eval_Qnli-dev_dot_f1": 0.6687898089171974, "eval_Qnli-dev_dot_f1_threshold": 286.7041015625, "eval_Qnli-dev_dot_precision": 0.5357142857142857, "eval_Qnli-dev_dot_recall": 0.8898305084745762, "eval_Qnli-dev_euclidean_accuracy": 0.728515625, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.393925666809082, "eval_Qnli-dev_euclidean_ap": 0.7771121353577738, "eval_Qnli-dev_euclidean_f1": 0.7071428571428572, "eval_Qnli-dev_euclidean_f1_threshold": 16.4425048828125, "eval_Qnli-dev_euclidean_precision": 0.6111111111111112, "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, "eval_Qnli-dev_manhattan_accuracy": 0.72265625, "eval_Qnli-dev_manhattan_accuracy_threshold": 305.10614013671875, "eval_Qnli-dev_manhattan_ap": 0.7774678086751587, "eval_Qnli-dev_manhattan_f1": 0.7065217391304347, "eval_Qnli-dev_manhattan_f1_threshold": 344.903564453125, "eval_Qnli-dev_manhattan_precision": 0.6170886075949367, "eval_Qnli-dev_manhattan_recall": 0.826271186440678, "eval_Qnli-dev_max_accuracy": 0.728515625, "eval_Qnli-dev_max_accuracy_threshold": 341.8458251953125, "eval_Qnli-dev_max_ap": 0.7774678086751587, "eval_Qnli-dev_max_f1": 0.7071428571428572, "eval_Qnli-dev_max_f1_threshold": 344.903564453125, "eval_Qnli-dev_max_precision": 0.6882591093117408, "eval_Qnli-dev_max_recall": 0.8898305084745762, "eval_allNLI-dev_cosine_accuracy": 0.736328125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8361479640007019, "eval_allNLI-dev_cosine_ap": 0.6224493771747687, "eval_allNLI-dev_cosine_f1": 0.6384976525821597, "eval_allNLI-dev_cosine_f1_threshold": 0.7641709446907043, "eval_allNLI-dev_cosine_precision": 0.5375494071146245, "eval_allNLI-dev_cosine_recall": 0.7861271676300579, "eval_allNLI-dev_dot_accuracy": 0.697265625, "eval_allNLI-dev_dot_accuracy_threshold": 356.0317687988281, "eval_allNLI-dev_dot_ap": 0.5510419909533901, "eval_allNLI-dev_dot_f1": 0.5950782997762865, "eval_allNLI-dev_dot_f1_threshold": 327.3216552734375, "eval_allNLI-dev_dot_precision": 0.4854014598540146, "eval_allNLI-dev_dot_recall": 0.7687861271676301, "eval_allNLI-dev_euclidean_accuracy": 0.74609375, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.004873275756836, "eval_allNLI-dev_euclidean_ap": 0.6341950766090391, "eval_allNLI-dev_euclidean_f1": 0.657074340527578, "eval_allNLI-dev_euclidean_f1_threshold": 14.403692245483398, "eval_allNLI-dev_euclidean_precision": 0.5614754098360656, "eval_allNLI-dev_euclidean_recall": 0.791907514450867, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 249.1562957763672, "eval_allNLI-dev_manhattan_ap": 0.6308801066932301, "eval_allNLI-dev_manhattan_f1": 0.6538461538461539, "eval_allNLI-dev_manhattan_f1_threshold": 302.3179626464844, "eval_allNLI-dev_manhattan_precision": 0.5596707818930041, "eval_allNLI-dev_manhattan_recall": 0.7861271676300579, "eval_allNLI-dev_max_accuracy": 0.74609375, "eval_allNLI-dev_max_accuracy_threshold": 356.0317687988281, "eval_allNLI-dev_max_ap": 0.6341950766090391, "eval_allNLI-dev_max_f1": 0.657074340527578, "eval_allNLI-dev_max_f1_threshold": 327.3216552734375, "eval_allNLI-dev_max_precision": 0.5614754098360656, "eval_allNLI-dev_max_recall": 0.791907514450867, "eval_sequential_score": 0.7774678086751587, "eval_sts-test_pearson_cosine": 0.8399665361475932, "eval_sts-test_pearson_dot": 0.816581123696129, "eval_sts-test_pearson_euclidean": 0.8717879622779746, "eval_sts-test_pearson_manhattan": 0.8697766668566801, "eval_sts-test_pearson_max": 0.8717879622779746, "eval_sts-test_spearman_cosine": 0.8709748259259787, "eval_sts-test_spearman_dot": 0.8050431952793824, "eval_sts-test_spearman_euclidean": 0.8700087356950612, "eval_sts-test_spearman_manhattan": 0.8682787414321924, "eval_sts-test_spearman_max": 0.8709748259259787, "eval_vitaminc-pairs_loss": 3.4252164363861084, "eval_vitaminc-pairs_runtime": 3.2375, "eval_vitaminc-pairs_samples_per_second": 39.537, "eval_vitaminc-pairs_steps_per_second": 0.309, "step": 2060 }, { "epoch": 2.119341563786008, "eval_negation-triplets_loss": 0.9267296195030212, "eval_negation-triplets_runtime": 0.7748, "eval_negation-triplets_samples_per_second": 165.196, "eval_negation-triplets_steps_per_second": 1.291, "step": 2060 }, { "epoch": 2.119341563786008, "eval_scitail-pairs-pos_loss": 0.13756400346755981, "eval_scitail-pairs-pos_runtime": 0.897, "eval_scitail-pairs-pos_samples_per_second": 142.704, "eval_scitail-pairs-pos_steps_per_second": 1.115, "step": 2060 }, { "epoch": 2.119341563786008, "eval_scitail-pairs-qa_loss": 0.0004476790491025895, "eval_scitail-pairs-qa_runtime": 0.6013, "eval_scitail-pairs-qa_samples_per_second": 212.858, "eval_scitail-pairs-qa_steps_per_second": 1.663, "step": 2060 }, { "epoch": 2.119341563786008, "eval_xsum-pairs_loss": 0.23516154289245605, "eval_xsum-pairs_runtime": 3.0375, "eval_xsum-pairs_samples_per_second": 42.14, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2060 }, { "epoch": 2.119341563786008, "eval_sciq_pairs_loss": 0.09569169580936432, "eval_sciq_pairs_runtime": 3.5174, "eval_sciq_pairs_samples_per_second": 36.39, "eval_sciq_pairs_steps_per_second": 0.284, "step": 2060 }, { "epoch": 2.119341563786008, "eval_qasc_pairs_loss": 0.17149178683757782, "eval_qasc_pairs_runtime": 0.6246, "eval_qasc_pairs_samples_per_second": 204.946, "eval_qasc_pairs_steps_per_second": 1.601, "step": 2060 }, { "epoch": 2.119341563786008, "eval_openbookqa_pairs_loss": 0.9316143989562988, "eval_openbookqa_pairs_runtime": 0.5994, "eval_openbookqa_pairs_samples_per_second": 213.551, "eval_openbookqa_pairs_steps_per_second": 1.668, "step": 2060 }, { "epoch": 2.119341563786008, "eval_msmarco_pairs_loss": 0.9574117660522461, "eval_msmarco_pairs_runtime": 1.5268, "eval_msmarco_pairs_samples_per_second": 83.837, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 2060 }, { "epoch": 2.119341563786008, "eval_nq_pairs_loss": 0.7034321427345276, "eval_nq_pairs_runtime": 2.9095, "eval_nq_pairs_samples_per_second": 43.993, "eval_nq_pairs_steps_per_second": 0.344, "step": 2060 }, { "epoch": 2.119341563786008, "eval_trivia_pairs_loss": 0.8535366058349609, "eval_trivia_pairs_runtime": 3.4538, "eval_trivia_pairs_samples_per_second": 37.061, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2060 }, { "epoch": 2.119341563786008, "eval_gooaq_pairs_loss": 0.44533970952033997, "eval_gooaq_pairs_runtime": 0.9547, "eval_gooaq_pairs_samples_per_second": 134.069, "eval_gooaq_pairs_steps_per_second": 1.047, "step": 2060 }, { "epoch": 2.119341563786008, "eval_paws-pos_loss": 0.02282855100929737, "eval_paws-pos_runtime": 0.7252, "eval_paws-pos_samples_per_second": 176.511, "eval_paws-pos_steps_per_second": 1.379, "step": 2060 }, { "epoch": 2.119341563786008, "eval_global_dataset_loss": 0.5080205202102661, "eval_global_dataset_runtime": 13.4116, "eval_global_dataset_samples_per_second": 31.018, "eval_global_dataset_steps_per_second": 0.298, "step": 2060 }, { "epoch": 2.1203703703703702, "grad_norm": 6.7308454513549805, "learning_rate": 2.521121012404587e-05, "loss": 0.2912, "step": 2061 }, { "epoch": 2.1213991769547325, "grad_norm": 3.2159008979797363, "learning_rate": 2.519639075353926e-05, "loss": 0.0734, "step": 2062 }, { "epoch": 2.1224279835390947, "grad_norm": 4.35980224609375, "learning_rate": 2.5181568297704134e-05, "loss": 0.0851, "step": 2063 }, { "epoch": 2.123456790123457, "grad_norm": 7.116025924682617, "learning_rate": 2.5166742781087318e-05, "loss": 0.2077, "step": 2064 }, { "epoch": 2.124485596707819, "grad_norm": 6.992701530456543, "learning_rate": 2.515191422824071e-05, "loss": 0.3857, "step": 2065 }, { "epoch": 2.125514403292181, "grad_norm": 9.226235389709473, "learning_rate": 2.5137082663721225e-05, "loss": 0.4809, "step": 2066 }, { "epoch": 2.126543209876543, "grad_norm": 6.9023542404174805, "learning_rate": 2.512224811209078e-05, "loss": 0.2717, "step": 2067 }, { "epoch": 2.1275720164609053, "grad_norm": 5.9378252029418945, "learning_rate": 2.5107410597916224e-05, "loss": 0.14, "step": 2068 }, { "epoch": 2.1286008230452675, "grad_norm": 4.982329845428467, "learning_rate": 2.5092570145769328e-05, "loss": 0.2054, "step": 2069 }, { "epoch": 2.1296296296296298, "grad_norm": 4.590819835662842, "learning_rate": 2.5077726780226723e-05, "loss": 0.1258, "step": 2070 }, { "epoch": 2.1306584362139915, "grad_norm": 4.748352527618408, "learning_rate": 2.506288052586985e-05, "loss": 0.1127, "step": 2071 }, { "epoch": 2.1316872427983538, "grad_norm": 9.490756034851074, "learning_rate": 2.5048031407284954e-05, "loss": 0.5839, "step": 2072 }, { "epoch": 2.132716049382716, "grad_norm": 6.421467304229736, "learning_rate": 2.5033179449063016e-05, "loss": 0.2173, "step": 2073 }, { "epoch": 2.133744855967078, "grad_norm": 4.624624252319336, "learning_rate": 2.501832467579972e-05, "loss": 0.1179, "step": 2074 }, { "epoch": 2.1347736625514404, "grad_norm": 4.480711460113525, "learning_rate": 2.500346711209541e-05, "loss": 0.1996, "step": 2075 }, { "epoch": 2.1358024691358026, "grad_norm": 10.61433219909668, "learning_rate": 2.4988606782555047e-05, "loss": 0.4491, "step": 2076 }, { "epoch": 2.1368312757201644, "grad_norm": 9.619646072387695, "learning_rate": 2.4973743711788185e-05, "loss": 0.5571, "step": 2077 }, { "epoch": 2.1378600823045266, "grad_norm": 4.616343975067139, "learning_rate": 2.4958877924408912e-05, "loss": 0.0762, "step": 2078 }, { "epoch": 2.138888888888889, "grad_norm": 0.04099346324801445, "learning_rate": 2.49440094450358e-05, "loss": 0.0004, "step": 2079 }, { "epoch": 2.139917695473251, "grad_norm": 6.589290142059326, "learning_rate": 2.4929138298291908e-05, "loss": 0.1611, "step": 2080 }, { "epoch": 2.139917695473251, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7704996466636658, "eval_Qnli-dev_cosine_ap": 0.7682777720813325, "eval_Qnli-dev_cosine_f1": 0.7024221453287196, "eval_Qnli-dev_cosine_f1_threshold": 0.674142599105835, "eval_Qnli-dev_cosine_precision": 0.5935672514619883, "eval_Qnli-dev_cosine_recall": 0.8601694915254238, "eval_Qnli-dev_dot_accuracy": 0.671875, "eval_Qnli-dev_dot_accuracy_threshold": 355.11505126953125, "eval_Qnli-dev_dot_ap": 0.7107837950507052, "eval_Qnli-dev_dot_f1": 0.6722129783693842, "eval_Qnli-dev_dot_f1_threshold": 287.2233581542969, "eval_Qnli-dev_dot_precision": 0.5534246575342465, "eval_Qnli-dev_dot_recall": 0.8559322033898306, "eval_Qnli-dev_euclidean_accuracy": 0.71875, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.478130340576172, "eval_Qnli-dev_euclidean_ap": 0.7758602239414594, "eval_Qnli-dev_euclidean_f1": 0.7012522361359571, "eval_Qnli-dev_euclidean_f1_threshold": 16.683025360107422, "eval_Qnli-dev_euclidean_precision": 0.6068111455108359, "eval_Qnli-dev_euclidean_recall": 0.8305084745762712, "eval_Qnli-dev_manhattan_accuracy": 0.71875, "eval_Qnli-dev_manhattan_accuracy_threshold": 312.4605407714844, "eval_Qnli-dev_manhattan_ap": 0.7728610432273579, "eval_Qnli-dev_manhattan_f1": 0.7058823529411764, "eval_Qnli-dev_manhattan_f1_threshold": 345.48614501953125, "eval_Qnli-dev_manhattan_precision": 0.6233766233766234, "eval_Qnli-dev_manhattan_recall": 0.8135593220338984, "eval_Qnli-dev_max_accuracy": 0.71875, "eval_Qnli-dev_max_accuracy_threshold": 355.11505126953125, "eval_Qnli-dev_max_ap": 0.7758602239414594, "eval_Qnli-dev_max_f1": 0.7058823529411764, "eval_Qnli-dev_max_f1_threshold": 345.48614501953125, "eval_Qnli-dev_max_precision": 0.6233766233766234, "eval_Qnli-dev_max_recall": 0.8601694915254238, "eval_allNLI-dev_cosine_accuracy": 0.7421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8157534003257751, "eval_allNLI-dev_cosine_ap": 0.6364302454210037, "eval_allNLI-dev_cosine_f1": 0.6417582417582418, "eval_allNLI-dev_cosine_f1_threshold": 0.7176768779754639, "eval_allNLI-dev_cosine_precision": 0.5177304964539007, "eval_allNLI-dev_cosine_recall": 0.8439306358381503, "eval_allNLI-dev_dot_accuracy": 0.70703125, "eval_allNLI-dev_dot_accuracy_threshold": 350.124267578125, "eval_allNLI-dev_dot_ap": 0.5715311234068332, "eval_allNLI-dev_dot_f1": 0.6215644820295984, "eval_allNLI-dev_dot_f1_threshold": 292.3736572265625, "eval_allNLI-dev_dot_precision": 0.49, "eval_allNLI-dev_dot_recall": 0.8497109826589595, "eval_allNLI-dev_euclidean_accuracy": 0.74609375, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.996118545532227, "eval_allNLI-dev_euclidean_ap": 0.6427037302950565, "eval_allNLI-dev_euclidean_f1": 0.6490384615384616, "eval_allNLI-dev_euclidean_f1_threshold": 14.670398712158203, "eval_allNLI-dev_euclidean_precision": 0.5555555555555556, "eval_allNLI-dev_euclidean_recall": 0.7803468208092486, "eval_allNLI-dev_manhattan_accuracy": 0.74609375, "eval_allNLI-dev_manhattan_accuracy_threshold": 280.2452392578125, "eval_allNLI-dev_manhattan_ap": 0.6417070299829928, "eval_allNLI-dev_manhattan_f1": 0.6459330143540669, "eval_allNLI-dev_manhattan_f1_threshold": 309.9227294921875, "eval_allNLI-dev_manhattan_precision": 0.5510204081632653, "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, "eval_allNLI-dev_max_accuracy": 0.74609375, "eval_allNLI-dev_max_accuracy_threshold": 350.124267578125, "eval_allNLI-dev_max_ap": 0.6427037302950565, "eval_allNLI-dev_max_f1": 0.6490384615384616, "eval_allNLI-dev_max_f1_threshold": 309.9227294921875, "eval_allNLI-dev_max_precision": 0.5555555555555556, "eval_allNLI-dev_max_recall": 0.8497109826589595, "eval_sequential_score": 0.7758602239414594, "eval_sts-test_pearson_cosine": 0.8442033196393799, "eval_sts-test_pearson_dot": 0.8366727645093097, "eval_sts-test_pearson_euclidean": 0.8716408035418403, "eval_sts-test_pearson_manhattan": 0.8701963414499901, "eval_sts-test_pearson_max": 0.8716408035418403, "eval_sts-test_spearman_cosine": 0.8748100090272218, "eval_sts-test_spearman_dot": 0.8311957876852548, "eval_sts-test_spearman_euclidean": 0.8698756901949446, "eval_sts-test_spearman_manhattan": 0.8686798022367821, "eval_sts-test_spearman_max": 0.8748100090272218, "eval_vitaminc-pairs_loss": 3.404799222946167, "eval_vitaminc-pairs_runtime": 3.2177, "eval_vitaminc-pairs_samples_per_second": 39.779, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 2080 }, { "epoch": 2.139917695473251, "eval_negation-triplets_loss": 0.9570626020431519, "eval_negation-triplets_runtime": 0.7789, "eval_negation-triplets_samples_per_second": 164.331, "eval_negation-triplets_steps_per_second": 1.284, "step": 2080 }, { "epoch": 2.139917695473251, "eval_scitail-pairs-pos_loss": 0.14190398156642914, "eval_scitail-pairs-pos_runtime": 0.9178, "eval_scitail-pairs-pos_samples_per_second": 139.46, "eval_scitail-pairs-pos_steps_per_second": 1.09, "step": 2080 }, { "epoch": 2.139917695473251, "eval_scitail-pairs-qa_loss": 0.00019659155805129558, "eval_scitail-pairs-qa_runtime": 0.608, "eval_scitail-pairs-qa_samples_per_second": 210.535, "eval_scitail-pairs-qa_steps_per_second": 1.645, "step": 2080 }, { "epoch": 2.139917695473251, "eval_xsum-pairs_loss": 0.2618999183177948, "eval_xsum-pairs_runtime": 3.0347, "eval_xsum-pairs_samples_per_second": 42.179, "eval_xsum-pairs_steps_per_second": 0.33, "step": 2080 }, { "epoch": 2.139917695473251, "eval_sciq_pairs_loss": 0.10014694184064865, "eval_sciq_pairs_runtime": 3.5167, "eval_sciq_pairs_samples_per_second": 36.398, "eval_sciq_pairs_steps_per_second": 0.284, "step": 2080 }, { "epoch": 2.139917695473251, "eval_qasc_pairs_loss": 0.1779145896434784, "eval_qasc_pairs_runtime": 0.6251, "eval_qasc_pairs_samples_per_second": 204.758, "eval_qasc_pairs_steps_per_second": 1.6, "step": 2080 }, { "epoch": 2.139917695473251, "eval_openbookqa_pairs_loss": 0.91819167137146, "eval_openbookqa_pairs_runtime": 0.6012, "eval_openbookqa_pairs_samples_per_second": 212.918, "eval_openbookqa_pairs_steps_per_second": 1.663, "step": 2080 }, { "epoch": 2.139917695473251, "eval_msmarco_pairs_loss": 0.8954432010650635, "eval_msmarco_pairs_runtime": 1.5269, "eval_msmarco_pairs_samples_per_second": 83.832, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 2080 }, { "epoch": 2.139917695473251, "eval_nq_pairs_loss": 0.6187785267829895, "eval_nq_pairs_runtime": 2.9105, "eval_nq_pairs_samples_per_second": 43.978, "eval_nq_pairs_steps_per_second": 0.344, "step": 2080 }, { "epoch": 2.139917695473251, "eval_trivia_pairs_loss": 0.9466494917869568, "eval_trivia_pairs_runtime": 3.4508, "eval_trivia_pairs_samples_per_second": 37.092, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2080 }, { "epoch": 2.139917695473251, "eval_gooaq_pairs_loss": 0.41242125630378723, "eval_gooaq_pairs_runtime": 0.9631, "eval_gooaq_pairs_samples_per_second": 132.908, "eval_gooaq_pairs_steps_per_second": 1.038, "step": 2080 }, { "epoch": 2.139917695473251, "eval_paws-pos_loss": 0.0224080178886652, "eval_paws-pos_runtime": 0.7079, "eval_paws-pos_samples_per_second": 180.81, "eval_paws-pos_steps_per_second": 1.413, "step": 2080 }, { "epoch": 2.139917695473251, "eval_global_dataset_loss": 0.4992813766002655, "eval_global_dataset_runtime": 13.4133, "eval_global_dataset_samples_per_second": 31.014, "eval_global_dataset_steps_per_second": 0.298, "step": 2080 }, { "epoch": 2.1409465020576133, "grad_norm": 10.416133880615234, "learning_rate": 2.491426450880469e-05, "loss": 0.7257, "step": 2081 }, { "epoch": 2.1419753086419755, "grad_norm": 0.7645072340965271, "learning_rate": 2.489938810120598e-05, "loss": 0.052, "step": 2082 }, { "epoch": 2.1430041152263373, "grad_norm": 4.174610614776611, "learning_rate": 2.4884509100131952e-05, "loss": 0.1194, "step": 2083 }, { "epoch": 2.1440329218106995, "grad_norm": 3.7576026916503906, "learning_rate": 2.486962753022308e-05, "loss": 0.102, "step": 2084 }, { "epoch": 2.1450617283950617, "grad_norm": 8.263005256652832, "learning_rate": 2.485474341612408e-05, "loss": 0.4642, "step": 2085 }, { "epoch": 2.146090534979424, "grad_norm": 12.31833267211914, "learning_rate": 2.4839856782483885e-05, "loss": 0.7639, "step": 2086 }, { "epoch": 2.147119341563786, "grad_norm": 6.041854381561279, "learning_rate": 2.4824967653955605e-05, "loss": 0.1744, "step": 2087 }, { "epoch": 2.148148148148148, "grad_norm": 11.772113800048828, "learning_rate": 2.4810076055196484e-05, "loss": 0.7136, "step": 2088 }, { "epoch": 2.14917695473251, "grad_norm": 5.519857406616211, "learning_rate": 2.4795182010867846e-05, "loss": 0.1451, "step": 2089 }, { "epoch": 2.1502057613168724, "grad_norm": 4.091213703155518, "learning_rate": 2.478028554563508e-05, "loss": 0.0667, "step": 2090 }, { "epoch": 2.1512345679012346, "grad_norm": 4.884109020233154, "learning_rate": 2.4765386684167563e-05, "loss": 0.1793, "step": 2091 }, { "epoch": 2.152263374485597, "grad_norm": 1.743223786354065, "learning_rate": 2.475048545113866e-05, "loss": 0.0274, "step": 2092 }, { "epoch": 2.153292181069959, "grad_norm": 3.984705686569214, "learning_rate": 2.4735581871225657e-05, "loss": 0.2496, "step": 2093 }, { "epoch": 2.154320987654321, "grad_norm": 0.780319094657898, "learning_rate": 2.472067596910972e-05, "loss": 0.0399, "step": 2094 }, { "epoch": 2.155349794238683, "grad_norm": 5.758412837982178, "learning_rate": 2.470576776947588e-05, "loss": 0.1306, "step": 2095 }, { "epoch": 2.156378600823045, "grad_norm": 4.503692150115967, "learning_rate": 2.4690857297012933e-05, "loss": 0.1086, "step": 2096 }, { "epoch": 2.1574074074074074, "grad_norm": 4.799394130706787, "learning_rate": 2.46759445764135e-05, "loss": 0.1679, "step": 2097 }, { "epoch": 2.1584362139917697, "grad_norm": 7.399900436401367, "learning_rate": 2.4661029632373862e-05, "loss": 0.2773, "step": 2098 }, { "epoch": 2.159465020576132, "grad_norm": 7.369089603424072, "learning_rate": 2.464611248959402e-05, "loss": 0.3006, "step": 2099 }, { "epoch": 2.1604938271604937, "grad_norm": 5.1560869216918945, "learning_rate": 2.4631193172777604e-05, "loss": 0.1716, "step": 2100 }, { "epoch": 2.1604938271604937, "eval_Qnli-dev_cosine_accuracy": 0.7109375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7782326340675354, "eval_Qnli-dev_cosine_ap": 0.757763107379561, "eval_Qnli-dev_cosine_f1": 0.687813021702838, "eval_Qnli-dev_cosine_f1_threshold": 0.675483226776123, "eval_Qnli-dev_cosine_precision": 0.5674931129476584, "eval_Qnli-dev_cosine_recall": 0.8728813559322034, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 356.8968505859375, "eval_Qnli-dev_dot_ap": 0.6873747505236181, "eval_Qnli-dev_dot_f1": 0.6698113207547169, "eval_Qnli-dev_dot_f1_threshold": 282.644775390625, "eval_Qnli-dev_dot_precision": 0.5325, "eval_Qnli-dev_dot_recall": 0.902542372881356, "eval_Qnli-dev_euclidean_accuracy": 0.71484375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.423986434936523, "eval_Qnli-dev_euclidean_ap": 0.7648791936584969, "eval_Qnli-dev_euclidean_f1": 0.7001733102253034, "eval_Qnli-dev_euclidean_f1_threshold": 16.778839111328125, "eval_Qnli-dev_euclidean_precision": 0.592375366568915, "eval_Qnli-dev_euclidean_recall": 0.8559322033898306, "eval_Qnli-dev_manhattan_accuracy": 0.7109375, "eval_Qnli-dev_manhattan_accuracy_threshold": 301.74945068359375, "eval_Qnli-dev_manhattan_ap": 0.7650566601023531, "eval_Qnli-dev_manhattan_f1": 0.693200663349917, "eval_Qnli-dev_manhattan_f1_threshold": 361.003173828125, "eval_Qnli-dev_manhattan_precision": 0.5694822888283378, "eval_Qnli-dev_manhattan_recall": 0.885593220338983, "eval_Qnli-dev_max_accuracy": 0.71484375, "eval_Qnli-dev_max_accuracy_threshold": 356.8968505859375, "eval_Qnli-dev_max_ap": 0.7650566601023531, "eval_Qnli-dev_max_f1": 0.7001733102253034, "eval_Qnli-dev_max_f1_threshold": 361.003173828125, "eval_Qnli-dev_max_precision": 0.592375366568915, "eval_Qnli-dev_max_recall": 0.902542372881356, "eval_allNLI-dev_cosine_accuracy": 0.736328125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8163294792175293, "eval_allNLI-dev_cosine_ap": 0.6310447182631718, "eval_allNLI-dev_cosine_f1": 0.6367924528301887, "eval_allNLI-dev_cosine_f1_threshold": 0.7563125491142273, "eval_allNLI-dev_cosine_precision": 0.5378486055776892, "eval_allNLI-dev_cosine_recall": 0.7803468208092486, "eval_allNLI-dev_dot_accuracy": 0.70703125, "eval_allNLI-dev_dot_accuracy_threshold": 370.98956298828125, "eval_allNLI-dev_dot_ap": 0.5647638928000094, "eval_allNLI-dev_dot_f1": 0.6167400881057269, "eval_allNLI-dev_dot_f1_threshold": 315.9678649902344, "eval_allNLI-dev_dot_precision": 0.498220640569395, "eval_allNLI-dev_dot_recall": 0.8092485549132948, "eval_allNLI-dev_euclidean_accuracy": 0.74609375, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.188844680786133, "eval_allNLI-dev_euclidean_ap": 0.6389873418454206, "eval_allNLI-dev_euclidean_f1": 0.6511627906976744, "eval_allNLI-dev_euclidean_f1_threshold": 14.704500198364258, "eval_allNLI-dev_euclidean_precision": 0.5447470817120622, "eval_allNLI-dev_euclidean_recall": 0.8092485549132948, "eval_allNLI-dev_manhattan_accuracy": 0.73828125, "eval_allNLI-dev_manhattan_accuracy_threshold": 274.9945068359375, "eval_allNLI-dev_manhattan_ap": 0.6360244459755536, "eval_allNLI-dev_manhattan_f1": 0.6529680365296804, "eval_allNLI-dev_manhattan_f1_threshold": 312.77740478515625, "eval_allNLI-dev_manhattan_precision": 0.539622641509434, "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, "eval_allNLI-dev_max_accuracy": 0.74609375, "eval_allNLI-dev_max_accuracy_threshold": 370.98956298828125, "eval_allNLI-dev_max_ap": 0.6389873418454206, "eval_allNLI-dev_max_f1": 0.6529680365296804, "eval_allNLI-dev_max_f1_threshold": 315.9678649902344, "eval_allNLI-dev_max_precision": 0.5447470817120622, "eval_allNLI-dev_max_recall": 0.8265895953757225, "eval_sequential_score": 0.7650566601023531, "eval_sts-test_pearson_cosine": 0.8466836188121143, "eval_sts-test_pearson_dot": 0.8355830896354994, "eval_sts-test_pearson_euclidean": 0.875485343008167, "eval_sts-test_pearson_manhattan": 0.8748475934516446, "eval_sts-test_pearson_max": 0.875485343008167, "eval_sts-test_spearman_cosine": 0.8781566426078548, "eval_sts-test_spearman_dot": 0.8303461402364777, "eval_sts-test_spearman_euclidean": 0.8742917894203092, "eval_sts-test_spearman_manhattan": 0.8729370344842075, "eval_sts-test_spearman_max": 0.8781566426078548, "eval_vitaminc-pairs_loss": 3.370380163192749, "eval_vitaminc-pairs_runtime": 3.228, "eval_vitaminc-pairs_samples_per_second": 39.653, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 2100 }, { "epoch": 2.1604938271604937, "eval_negation-triplets_loss": 0.8976505994796753, "eval_negation-triplets_runtime": 0.7647, "eval_negation-triplets_samples_per_second": 167.379, "eval_negation-triplets_steps_per_second": 1.308, "step": 2100 }, { "epoch": 2.1604938271604937, "eval_scitail-pairs-pos_loss": 0.13327747583389282, "eval_scitail-pairs-pos_runtime": 0.9109, "eval_scitail-pairs-pos_samples_per_second": 140.516, "eval_scitail-pairs-pos_steps_per_second": 1.098, "step": 2100 }, { "epoch": 2.1604938271604937, "eval_scitail-pairs-qa_loss": 0.0003133401623927057, "eval_scitail-pairs-qa_runtime": 0.6093, "eval_scitail-pairs-qa_samples_per_second": 210.065, "eval_scitail-pairs-qa_steps_per_second": 1.641, "step": 2100 }, { "epoch": 2.1604938271604937, "eval_xsum-pairs_loss": 0.2953195869922638, "eval_xsum-pairs_runtime": 3.0347, "eval_xsum-pairs_samples_per_second": 42.178, "eval_xsum-pairs_steps_per_second": 0.33, "step": 2100 }, { "epoch": 2.1604938271604937, "eval_sciq_pairs_loss": 0.096987284719944, "eval_sciq_pairs_runtime": 3.5139, "eval_sciq_pairs_samples_per_second": 36.427, "eval_sciq_pairs_steps_per_second": 0.285, "step": 2100 }, { "epoch": 2.1604938271604937, "eval_qasc_pairs_loss": 0.17670966684818268, "eval_qasc_pairs_runtime": 0.6263, "eval_qasc_pairs_samples_per_second": 204.37, "eval_qasc_pairs_steps_per_second": 1.597, "step": 2100 }, { "epoch": 2.1604938271604937, "eval_openbookqa_pairs_loss": 0.9312919974327087, "eval_openbookqa_pairs_runtime": 0.6039, "eval_openbookqa_pairs_samples_per_second": 211.964, "eval_openbookqa_pairs_steps_per_second": 1.656, "step": 2100 }, { "epoch": 2.1604938271604937, "eval_msmarco_pairs_loss": 0.8172768950462341, "eval_msmarco_pairs_runtime": 1.5305, "eval_msmarco_pairs_samples_per_second": 83.635, "eval_msmarco_pairs_steps_per_second": 0.653, "step": 2100 }, { "epoch": 2.1604938271604937, "eval_nq_pairs_loss": 0.7029488682746887, "eval_nq_pairs_runtime": 2.9032, "eval_nq_pairs_samples_per_second": 44.089, "eval_nq_pairs_steps_per_second": 0.344, "step": 2100 }, { "epoch": 2.1604938271604937, "eval_trivia_pairs_loss": 0.8234426975250244, "eval_trivia_pairs_runtime": 3.4781, "eval_trivia_pairs_samples_per_second": 36.802, "eval_trivia_pairs_steps_per_second": 0.288, "step": 2100 }, { "epoch": 2.1604938271604937, "eval_gooaq_pairs_loss": 0.38398078083992004, "eval_gooaq_pairs_runtime": 0.961, "eval_gooaq_pairs_samples_per_second": 133.192, "eval_gooaq_pairs_steps_per_second": 1.041, "step": 2100 }, { "epoch": 2.1604938271604937, "eval_paws-pos_loss": 0.022808760404586792, "eval_paws-pos_runtime": 0.7084, "eval_paws-pos_samples_per_second": 180.688, "eval_paws-pos_steps_per_second": 1.412, "step": 2100 }, { "epoch": 2.1604938271604937, "eval_global_dataset_loss": 0.48940902948379517, "eval_global_dataset_runtime": 13.4199, "eval_global_dataset_samples_per_second": 30.999, "eval_global_dataset_steps_per_second": 0.298, "step": 2100 }, { "epoch": 2.161522633744856, "grad_norm": 6.568051338195801, "learning_rate": 2.461627170663186e-05, "loss": 0.3582, "step": 2101 }, { "epoch": 2.162551440329218, "grad_norm": 5.944247722625732, "learning_rate": 2.4601348115867564e-05, "loss": 0.1507, "step": 2102 }, { "epoch": 2.1635802469135803, "grad_norm": 10.45062255859375, "learning_rate": 2.4586422425199033e-05, "loss": 0.7578, "step": 2103 }, { "epoch": 2.1646090534979425, "grad_norm": 14.21491813659668, "learning_rate": 2.4571494659344057e-05, "loss": 1.832, "step": 2104 }, { "epoch": 2.1656378600823043, "grad_norm": 7.499361991882324, "learning_rate": 2.4556564843023855e-05, "loss": 0.5014, "step": 2105 }, { "epoch": 2.1666666666666665, "grad_norm": 9.022387504577637, "learning_rate": 2.4541633000963055e-05, "loss": 0.4058, "step": 2106 }, { "epoch": 2.1676954732510287, "grad_norm": 5.3525166511535645, "learning_rate": 2.452669915788963e-05, "loss": 0.1921, "step": 2107 }, { "epoch": 2.168724279835391, "grad_norm": 4.118523120880127, "learning_rate": 2.4511763338534864e-05, "loss": 0.1086, "step": 2108 }, { "epoch": 2.169753086419753, "grad_norm": 8.975444793701172, "learning_rate": 2.449682556763333e-05, "loss": 0.5926, "step": 2109 }, { "epoch": 2.1707818930041154, "grad_norm": 4.746514320373535, "learning_rate": 2.4481885869922812e-05, "loss": 0.2029, "step": 2110 }, { "epoch": 2.171810699588477, "grad_norm": 8.580132484436035, "learning_rate": 2.44669442701443e-05, "loss": 0.3947, "step": 2111 }, { "epoch": 2.1728395061728394, "grad_norm": 6.105350017547607, "learning_rate": 2.445200079304192e-05, "loss": 0.4369, "step": 2112 }, { "epoch": 2.1738683127572016, "grad_norm": 7.118346691131592, "learning_rate": 2.4437055463362933e-05, "loss": 0.3152, "step": 2113 }, { "epoch": 2.174897119341564, "grad_norm": 2.816272735595703, "learning_rate": 2.4422108305857637e-05, "loss": 0.0426, "step": 2114 }, { "epoch": 2.175925925925926, "grad_norm": 2.252847194671631, "learning_rate": 2.4407159345279374e-05, "loss": 0.0696, "step": 2115 }, { "epoch": 2.1769547325102883, "grad_norm": 7.112878322601318, "learning_rate": 2.439220860638446e-05, "loss": 0.2596, "step": 2116 }, { "epoch": 2.17798353909465, "grad_norm": 8.24142837524414, "learning_rate": 2.437725611393219e-05, "loss": 0.4628, "step": 2117 }, { "epoch": 2.1790123456790123, "grad_norm": 3.664426803588867, "learning_rate": 2.436230189268471e-05, "loss": 0.0567, "step": 2118 }, { "epoch": 2.1800411522633745, "grad_norm": 1.27444589138031, "learning_rate": 2.4347345967407072e-05, "loss": 0.0208, "step": 2119 }, { "epoch": 2.1810699588477367, "grad_norm": 4.611666679382324, "learning_rate": 2.433238836286713e-05, "loss": 0.1303, "step": 2120 }, { "epoch": 2.1810699588477367, "eval_Qnli-dev_cosine_accuracy": 0.705078125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7831888198852539, "eval_Qnli-dev_cosine_ap": 0.7659577175540198, "eval_Qnli-dev_cosine_f1": 0.7093235831809872, "eval_Qnli-dev_cosine_f1_threshold": 0.706408679485321, "eval_Qnli-dev_cosine_precision": 0.6237942122186495, "eval_Qnli-dev_cosine_recall": 0.8220338983050848, "eval_Qnli-dev_dot_accuracy": 0.671875, "eval_Qnli-dev_dot_accuracy_threshold": 354.57574462890625, "eval_Qnli-dev_dot_ap": 0.7142960906071153, "eval_Qnli-dev_dot_f1": 0.6812816188870151, "eval_Qnli-dev_dot_f1_threshold": 297.7771911621094, "eval_Qnli-dev_dot_precision": 0.5658263305322129, "eval_Qnli-dev_dot_recall": 0.8559322033898306, "eval_Qnli-dev_euclidean_accuracy": 0.71875, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.078405380249023, "eval_Qnli-dev_euclidean_ap": 0.7712189177124892, "eval_Qnli-dev_euclidean_f1": 0.6994727592267135, "eval_Qnli-dev_euclidean_f1_threshold": 16.541831970214844, "eval_Qnli-dev_euclidean_precision": 0.5975975975975976, "eval_Qnli-dev_euclidean_recall": 0.8432203389830508, "eval_Qnli-dev_manhattan_accuracy": 0.71484375, "eval_Qnli-dev_manhattan_accuracy_threshold": 297.5526123046875, "eval_Qnli-dev_manhattan_ap": 0.7743633598322663, "eval_Qnli-dev_manhattan_f1": 0.70061099796334, "eval_Qnli-dev_manhattan_f1_threshold": 320.9577331542969, "eval_Qnli-dev_manhattan_precision": 0.6745098039215687, "eval_Qnli-dev_manhattan_recall": 0.7288135593220338, "eval_Qnli-dev_max_accuracy": 0.71875, "eval_Qnli-dev_max_accuracy_threshold": 354.57574462890625, "eval_Qnli-dev_max_ap": 0.7743633598322663, "eval_Qnli-dev_max_f1": 0.7093235831809872, "eval_Qnli-dev_max_f1_threshold": 320.9577331542969, "eval_Qnli-dev_max_precision": 0.6745098039215687, "eval_Qnli-dev_max_recall": 0.8559322033898306, "eval_allNLI-dev_cosine_accuracy": 0.736328125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8165189027786255, "eval_allNLI-dev_cosine_ap": 0.6288426913268541, "eval_allNLI-dev_cosine_f1": 0.6465116279069768, "eval_allNLI-dev_cosine_f1_threshold": 0.7471227645874023, "eval_allNLI-dev_cosine_precision": 0.5408560311284046, "eval_allNLI-dev_cosine_recall": 0.8034682080924855, "eval_allNLI-dev_dot_accuracy": 0.708984375, "eval_allNLI-dev_dot_accuracy_threshold": 355.15179443359375, "eval_allNLI-dev_dot_ap": 0.5568715482689243, "eval_allNLI-dev_dot_f1": 0.5954545454545453, "eval_allNLI-dev_dot_f1_threshold": 319.06378173828125, "eval_allNLI-dev_dot_precision": 0.49063670411985016, "eval_allNLI-dev_dot_recall": 0.7572254335260116, "eval_allNLI-dev_euclidean_accuracy": 0.7421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.634376525878906, "eval_allNLI-dev_euclidean_ap": 0.6383365591990686, "eval_allNLI-dev_euclidean_f1": 0.6508313539192399, "eval_allNLI-dev_euclidean_f1_threshold": 14.622352600097656, "eval_allNLI-dev_euclidean_precision": 0.5524193548387096, "eval_allNLI-dev_euclidean_recall": 0.791907514450867, "eval_allNLI-dev_manhattan_accuracy": 0.748046875, "eval_allNLI-dev_manhattan_accuracy_threshold": 279.4615173339844, "eval_allNLI-dev_manhattan_ap": 0.6353048860025735, "eval_allNLI-dev_manhattan_f1": 0.6515837104072397, "eval_allNLI-dev_manhattan_f1_threshold": 313.66204833984375, "eval_allNLI-dev_manhattan_precision": 0.5353159851301115, "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, "eval_allNLI-dev_max_accuracy": 0.748046875, "eval_allNLI-dev_max_accuracy_threshold": 355.15179443359375, "eval_allNLI-dev_max_ap": 0.6383365591990686, "eval_allNLI-dev_max_f1": 0.6515837104072397, "eval_allNLI-dev_max_f1_threshold": 319.06378173828125, "eval_allNLI-dev_max_precision": 0.5524193548387096, "eval_allNLI-dev_max_recall": 0.8323699421965318, "eval_sequential_score": 0.7743633598322663, "eval_sts-test_pearson_cosine": 0.8413452215643851, "eval_sts-test_pearson_dot": 0.8227410976773657, "eval_sts-test_pearson_euclidean": 0.8730961658540073, "eval_sts-test_pearson_manhattan": 0.8706832766511694, "eval_sts-test_pearson_max": 0.8730961658540073, "eval_sts-test_spearman_cosine": 0.8747658395322285, "eval_sts-test_spearman_dot": 0.8170051179467807, "eval_sts-test_spearman_euclidean": 0.8716662776657373, "eval_sts-test_spearman_manhattan": 0.8683805505518579, "eval_sts-test_spearman_max": 0.8747658395322285, "eval_vitaminc-pairs_loss": 3.3158156871795654, "eval_vitaminc-pairs_runtime": 3.2432, "eval_vitaminc-pairs_samples_per_second": 39.467, "eval_vitaminc-pairs_steps_per_second": 0.308, "step": 2120 }, { "epoch": 2.1810699588477367, "eval_negation-triplets_loss": 0.8905675411224365, "eval_negation-triplets_runtime": 0.776, "eval_negation-triplets_samples_per_second": 164.945, "eval_negation-triplets_steps_per_second": 1.289, "step": 2120 }, { "epoch": 2.1810699588477367, "eval_scitail-pairs-pos_loss": 0.11103859543800354, "eval_scitail-pairs-pos_runtime": 0.9444, "eval_scitail-pairs-pos_samples_per_second": 135.532, "eval_scitail-pairs-pos_steps_per_second": 1.059, "step": 2120 }, { "epoch": 2.1810699588477367, "eval_scitail-pairs-qa_loss": 0.00043057429138571024, "eval_scitail-pairs-qa_runtime": 0.6124, "eval_scitail-pairs-qa_samples_per_second": 209.017, "eval_scitail-pairs-qa_steps_per_second": 1.633, "step": 2120 }, { "epoch": 2.1810699588477367, "eval_xsum-pairs_loss": 0.26308897137641907, "eval_xsum-pairs_runtime": 3.03, "eval_xsum-pairs_samples_per_second": 42.245, "eval_xsum-pairs_steps_per_second": 0.33, "step": 2120 }, { "epoch": 2.1810699588477367, "eval_sciq_pairs_loss": 0.09947659075260162, "eval_sciq_pairs_runtime": 3.5279, "eval_sciq_pairs_samples_per_second": 36.282, "eval_sciq_pairs_steps_per_second": 0.283, "step": 2120 }, { "epoch": 2.1810699588477367, "eval_qasc_pairs_loss": 0.16730889678001404, "eval_qasc_pairs_runtime": 0.6409, "eval_qasc_pairs_samples_per_second": 199.734, "eval_qasc_pairs_steps_per_second": 1.56, "step": 2120 }, { "epoch": 2.1810699588477367, "eval_openbookqa_pairs_loss": 0.8517054319381714, "eval_openbookqa_pairs_runtime": 0.6078, "eval_openbookqa_pairs_samples_per_second": 210.596, "eval_openbookqa_pairs_steps_per_second": 1.645, "step": 2120 }, { "epoch": 2.1810699588477367, "eval_msmarco_pairs_loss": 0.8803500533103943, "eval_msmarco_pairs_runtime": 1.5294, "eval_msmarco_pairs_samples_per_second": 83.691, "eval_msmarco_pairs_steps_per_second": 0.654, "step": 2120 }, { "epoch": 2.1810699588477367, "eval_nq_pairs_loss": 0.7211570143699646, "eval_nq_pairs_runtime": 2.9044, "eval_nq_pairs_samples_per_second": 44.072, "eval_nq_pairs_steps_per_second": 0.344, "step": 2120 }, { "epoch": 2.1810699588477367, "eval_trivia_pairs_loss": 0.8099327087402344, "eval_trivia_pairs_runtime": 3.4499, "eval_trivia_pairs_samples_per_second": 37.102, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2120 }, { "epoch": 2.1810699588477367, "eval_gooaq_pairs_loss": 0.4508455991744995, "eval_gooaq_pairs_runtime": 0.9607, "eval_gooaq_pairs_samples_per_second": 133.237, "eval_gooaq_pairs_steps_per_second": 1.041, "step": 2120 }, { "epoch": 2.1810699588477367, "eval_paws-pos_loss": 0.02280852012336254, "eval_paws-pos_runtime": 0.7113, "eval_paws-pos_samples_per_second": 179.951, "eval_paws-pos_steps_per_second": 1.406, "step": 2120 }, { "epoch": 2.1810699588477367, "eval_global_dataset_loss": 0.5049570202827454, "eval_global_dataset_runtime": 13.4082, "eval_global_dataset_samples_per_second": 31.026, "eval_global_dataset_steps_per_second": 0.298, "step": 2120 }, { "epoch": 2.182098765432099, "grad_norm": 7.965096473693848, "learning_rate": 2.431742910383552e-05, "loss": 0.3001, "step": 2121 }, { "epoch": 2.1831275720164607, "grad_norm": 1.3078244924545288, "learning_rate": 2.4302468215085635e-05, "loss": 0.013, "step": 2122 }, { "epoch": 2.184156378600823, "grad_norm": 8.0444974899292, "learning_rate": 2.4287505721393537e-05, "loss": 0.3598, "step": 2123 }, { "epoch": 2.185185185185185, "grad_norm": 15.775069236755371, "learning_rate": 2.4272541647537966e-05, "loss": 2.2029, "step": 2124 }, { "epoch": 2.1862139917695473, "grad_norm": 5.384196758270264, "learning_rate": 2.4257576018300283e-05, "loss": 0.1542, "step": 2125 }, { "epoch": 2.1872427983539096, "grad_norm": 5.319241046905518, "learning_rate": 2.4242608858464408e-05, "loss": 0.1127, "step": 2126 }, { "epoch": 2.1882716049382718, "grad_norm": 5.162769794464111, "learning_rate": 2.4227640192816813e-05, "loss": 0.1667, "step": 2127 }, { "epoch": 2.1893004115226335, "grad_norm": 4.0983357429504395, "learning_rate": 2.421267004614645e-05, "loss": 0.0737, "step": 2128 }, { "epoch": 2.1903292181069958, "grad_norm": 8.134471893310547, "learning_rate": 2.4197698443244726e-05, "loss": 0.3904, "step": 2129 }, { "epoch": 2.191358024691358, "grad_norm": 7.660701274871826, "learning_rate": 2.418272540890547e-05, "loss": 0.3314, "step": 2130 }, { "epoch": 2.19238683127572, "grad_norm": 5.454843997955322, "learning_rate": 2.4167750967924873e-05, "loss": 0.139, "step": 2131 }, { "epoch": 2.1934156378600824, "grad_norm": 12.22930908203125, "learning_rate": 2.4152775145101455e-05, "loss": 0.6952, "step": 2132 }, { "epoch": 2.1944444444444446, "grad_norm": 4.859829902648926, "learning_rate": 2.413779796523602e-05, "loss": 0.1082, "step": 2133 }, { "epoch": 2.1954732510288064, "grad_norm": 7.867696285247803, "learning_rate": 2.4122819453131633e-05, "loss": 0.3075, "step": 2134 }, { "epoch": 2.1965020576131686, "grad_norm": 7.93525505065918, "learning_rate": 2.4107839633593557e-05, "loss": 0.3398, "step": 2135 }, { "epoch": 2.197530864197531, "grad_norm": 7.2044806480407715, "learning_rate": 2.409285853142922e-05, "loss": 0.3804, "step": 2136 }, { "epoch": 2.198559670781893, "grad_norm": 9.675668716430664, "learning_rate": 2.4077876171448172e-05, "loss": 0.4977, "step": 2137 }, { "epoch": 2.1995884773662553, "grad_norm": 4.7285261154174805, "learning_rate": 2.4062892578462054e-05, "loss": 0.2119, "step": 2138 }, { "epoch": 2.200617283950617, "grad_norm": 8.28393268585205, "learning_rate": 2.4047907777284544e-05, "loss": 0.3307, "step": 2139 }, { "epoch": 2.2016460905349793, "grad_norm": 9.960400581359863, "learning_rate": 2.403292179273131e-05, "loss": 0.4782, "step": 2140 }, { "epoch": 2.2016460905349793, "eval_Qnli-dev_cosine_accuracy": 0.701171875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.765581488609314, "eval_Qnli-dev_cosine_ap": 0.7580039059833005, "eval_Qnli-dev_cosine_f1": 0.6927374301675978, "eval_Qnli-dev_cosine_f1_threshold": 0.7120383381843567, "eval_Qnli-dev_cosine_precision": 0.6179401993355482, "eval_Qnli-dev_cosine_recall": 0.788135593220339, "eval_Qnli-dev_dot_accuracy": 0.66796875, "eval_Qnli-dev_dot_accuracy_threshold": 349.69696044921875, "eval_Qnli-dev_dot_ap": 0.7077644564177754, "eval_Qnli-dev_dot_f1": 0.673434856175973, "eval_Qnli-dev_dot_f1_threshold": 297.18817138671875, "eval_Qnli-dev_dot_precision": 0.5605633802816902, "eval_Qnli-dev_dot_recall": 0.8432203389830508, "eval_Qnli-dev_euclidean_accuracy": 0.708984375, "eval_Qnli-dev_euclidean_accuracy_threshold": 13.917366027832031, "eval_Qnli-dev_euclidean_ap": 0.7640492109932211, "eval_Qnli-dev_euclidean_f1": 0.7022900763358778, "eval_Qnli-dev_euclidean_f1_threshold": 15.718003273010254, "eval_Qnli-dev_euclidean_precision": 0.6388888888888888, "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, "eval_Qnli-dev_manhattan_accuracy": 0.712890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 290.0631103515625, "eval_Qnli-dev_manhattan_ap": 0.7682290439889975, "eval_Qnli-dev_manhattan_f1": 0.7018867924528301, "eval_Qnli-dev_manhattan_f1_threshold": 334.3270263671875, "eval_Qnli-dev_manhattan_precision": 0.6326530612244898, "eval_Qnli-dev_manhattan_recall": 0.788135593220339, "eval_Qnli-dev_max_accuracy": 0.712890625, "eval_Qnli-dev_max_accuracy_threshold": 349.69696044921875, "eval_Qnli-dev_max_ap": 0.7682290439889975, "eval_Qnli-dev_max_f1": 0.7022900763358778, "eval_Qnli-dev_max_f1_threshold": 334.3270263671875, "eval_Qnli-dev_max_precision": 0.6388888888888888, "eval_Qnli-dev_max_recall": 0.8432203389830508, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8141307830810547, "eval_allNLI-dev_cosine_ap": 0.6345853397101863, "eval_allNLI-dev_cosine_f1": 0.6462264150943396, "eval_allNLI-dev_cosine_f1_threshold": 0.7495874166488647, "eval_allNLI-dev_cosine_precision": 0.545816733067729, "eval_allNLI-dev_cosine_recall": 0.791907514450867, "eval_allNLI-dev_dot_accuracy": 0.70703125, "eval_allNLI-dev_dot_accuracy_threshold": 346.0972595214844, "eval_allNLI-dev_dot_ap": 0.5619172672788605, "eval_allNLI-dev_dot_f1": 0.6056644880174291, "eval_allNLI-dev_dot_f1_threshold": 307.35089111328125, "eval_allNLI-dev_dot_precision": 0.486013986013986, "eval_allNLI-dev_dot_recall": 0.8034682080924855, "eval_allNLI-dev_euclidean_accuracy": 0.740234375, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.474712371826172, "eval_allNLI-dev_euclidean_ap": 0.6442614108351035, "eval_allNLI-dev_euclidean_f1": 0.6512702078521939, "eval_allNLI-dev_euclidean_f1_threshold": 14.748228073120117, "eval_allNLI-dev_euclidean_precision": 0.5423076923076923, "eval_allNLI-dev_euclidean_recall": 0.815028901734104, "eval_allNLI-dev_manhattan_accuracy": 0.740234375, "eval_allNLI-dev_manhattan_accuracy_threshold": 278.2958984375, "eval_allNLI-dev_manhattan_ap": 0.6424357248372661, "eval_allNLI-dev_manhattan_f1": 0.6523809523809525, "eval_allNLI-dev_manhattan_f1_threshold": 303.55712890625, "eval_allNLI-dev_manhattan_precision": 0.5546558704453441, "eval_allNLI-dev_manhattan_recall": 0.791907514450867, "eval_allNLI-dev_max_accuracy": 0.740234375, "eval_allNLI-dev_max_accuracy_threshold": 346.0972595214844, "eval_allNLI-dev_max_ap": 0.6442614108351035, "eval_allNLI-dev_max_f1": 0.6523809523809525, "eval_allNLI-dev_max_f1_threshold": 307.35089111328125, "eval_allNLI-dev_max_precision": 0.5546558704453441, "eval_allNLI-dev_max_recall": 0.815028901734104, "eval_sequential_score": 0.7682290439889975, "eval_sts-test_pearson_cosine": 0.8459220593950094, "eval_sts-test_pearson_dot": 0.8327689789009902, "eval_sts-test_pearson_euclidean": 0.8765604391215667, "eval_sts-test_pearson_manhattan": 0.8740908290341837, "eval_sts-test_pearson_max": 0.8765604391215667, "eval_sts-test_spearman_cosine": 0.877046811325348, "eval_sts-test_spearman_dot": 0.8285615046834501, "eval_sts-test_spearman_euclidean": 0.8735896868803397, "eval_sts-test_spearman_manhattan": 0.8719228813985556, "eval_sts-test_spearman_max": 0.877046811325348, "eval_vitaminc-pairs_loss": 3.2018566131591797, "eval_vitaminc-pairs_runtime": 3.2479, "eval_vitaminc-pairs_samples_per_second": 39.41, "eval_vitaminc-pairs_steps_per_second": 0.308, "step": 2140 }, { "epoch": 2.2016460905349793, "eval_negation-triplets_loss": 0.9213140606880188, "eval_negation-triplets_runtime": 0.7717, "eval_negation-triplets_samples_per_second": 165.875, "eval_negation-triplets_steps_per_second": 1.296, "step": 2140 }, { "epoch": 2.2016460905349793, "eval_scitail-pairs-pos_loss": 0.1060742661356926, "eval_scitail-pairs-pos_runtime": 0.9175, "eval_scitail-pairs-pos_samples_per_second": 139.511, "eval_scitail-pairs-pos_steps_per_second": 1.09, "step": 2140 }, { "epoch": 2.2016460905349793, "eval_scitail-pairs-qa_loss": 0.0005529926856979728, "eval_scitail-pairs-qa_runtime": 0.6123, "eval_scitail-pairs-qa_samples_per_second": 209.05, "eval_scitail-pairs-qa_steps_per_second": 1.633, "step": 2140 }, { "epoch": 2.2016460905349793, "eval_xsum-pairs_loss": 0.2825109660625458, "eval_xsum-pairs_runtime": 3.0356, "eval_xsum-pairs_samples_per_second": 42.166, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2140 }, { "epoch": 2.2016460905349793, "eval_sciq_pairs_loss": 0.09794219583272934, "eval_sciq_pairs_runtime": 3.5238, "eval_sciq_pairs_samples_per_second": 36.325, "eval_sciq_pairs_steps_per_second": 0.284, "step": 2140 }, { "epoch": 2.2016460905349793, "eval_qasc_pairs_loss": 0.1473490297794342, "eval_qasc_pairs_runtime": 0.6205, "eval_qasc_pairs_samples_per_second": 206.29, "eval_qasc_pairs_steps_per_second": 1.612, "step": 2140 }, { "epoch": 2.2016460905349793, "eval_openbookqa_pairs_loss": 0.8643597364425659, "eval_openbookqa_pairs_runtime": 0.6001, "eval_openbookqa_pairs_samples_per_second": 213.286, "eval_openbookqa_pairs_steps_per_second": 1.666, "step": 2140 }, { "epoch": 2.2016460905349793, "eval_msmarco_pairs_loss": 0.8728999495506287, "eval_msmarco_pairs_runtime": 1.5248, "eval_msmarco_pairs_samples_per_second": 83.947, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 2140 }, { "epoch": 2.2016460905349793, "eval_nq_pairs_loss": 0.7015603184700012, "eval_nq_pairs_runtime": 2.9064, "eval_nq_pairs_samples_per_second": 44.04, "eval_nq_pairs_steps_per_second": 0.344, "step": 2140 }, { "epoch": 2.2016460905349793, "eval_trivia_pairs_loss": 0.8600196838378906, "eval_trivia_pairs_runtime": 3.4526, "eval_trivia_pairs_samples_per_second": 37.073, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2140 }, { "epoch": 2.2016460905349793, "eval_gooaq_pairs_loss": 0.41348978877067566, "eval_gooaq_pairs_runtime": 0.9592, "eval_gooaq_pairs_samples_per_second": 133.451, "eval_gooaq_pairs_steps_per_second": 1.043, "step": 2140 }, { "epoch": 2.2016460905349793, "eval_paws-pos_loss": 0.02336895279586315, "eval_paws-pos_runtime": 0.7157, "eval_paws-pos_samples_per_second": 178.857, "eval_paws-pos_steps_per_second": 1.397, "step": 2140 }, { "epoch": 2.2016460905349793, "eval_global_dataset_loss": 0.48977920413017273, "eval_global_dataset_runtime": 13.4172, "eval_global_dataset_samples_per_second": 31.005, "eval_global_dataset_steps_per_second": 0.298, "step": 2140 }, { "epoch": 2.2026748971193415, "grad_norm": 4.7281365394592285, "learning_rate": 2.4017934649620005e-05, "loss": 0.0815, "step": 2141 }, { "epoch": 2.2037037037037037, "grad_norm": 5.841175556182861, "learning_rate": 2.4002946372770173e-05, "loss": 0.1414, "step": 2142 }, { "epoch": 2.204732510288066, "grad_norm": 10.14210033416748, "learning_rate": 2.3987956987003262e-05, "loss": 0.4976, "step": 2143 }, { "epoch": 2.205761316872428, "grad_norm": 6.116671562194824, "learning_rate": 2.3972966517142535e-05, "loss": 0.3755, "step": 2144 }, { "epoch": 2.20679012345679, "grad_norm": 7.912659168243408, "learning_rate": 2.3957974988013056e-05, "loss": 0.276, "step": 2145 }, { "epoch": 2.207818930041152, "grad_norm": 4.809524059295654, "learning_rate": 2.3942982424441652e-05, "loss": 0.1198, "step": 2146 }, { "epoch": 2.2088477366255144, "grad_norm": 3.9036455154418945, "learning_rate": 2.3927988851256852e-05, "loss": 0.0837, "step": 2147 }, { "epoch": 2.2098765432098766, "grad_norm": 5.67017936706543, "learning_rate": 2.3912994293288868e-05, "loss": 0.2129, "step": 2148 }, { "epoch": 2.210905349794239, "grad_norm": 3.9855170249938965, "learning_rate": 2.3897998775369527e-05, "loss": 0.0752, "step": 2149 }, { "epoch": 2.211934156378601, "grad_norm": 9.995519638061523, "learning_rate": 2.3883002322332263e-05, "loss": 0.5484, "step": 2150 }, { "epoch": 2.212962962962963, "grad_norm": 12.262630462646484, "learning_rate": 2.3868004959012048e-05, "loss": 0.7996, "step": 2151 }, { "epoch": 2.213991769547325, "grad_norm": 13.158291816711426, "learning_rate": 2.3853006710245366e-05, "loss": 1.6459, "step": 2152 }, { "epoch": 2.2150205761316872, "grad_norm": 4.3596367835998535, "learning_rate": 2.383800760087016e-05, "loss": 0.0923, "step": 2153 }, { "epoch": 2.2160493827160495, "grad_norm": 7.671905994415283, "learning_rate": 2.382300765572581e-05, "loss": 0.4755, "step": 2154 }, { "epoch": 2.2170781893004117, "grad_norm": 3.321316719055176, "learning_rate": 2.3808006899653076e-05, "loss": 0.0699, "step": 2155 }, { "epoch": 2.2181069958847734, "grad_norm": 5.187155723571777, "learning_rate": 2.3793005357494044e-05, "loss": 0.1471, "step": 2156 }, { "epoch": 2.2191358024691357, "grad_norm": 3.165531873703003, "learning_rate": 2.3778003054092144e-05, "loss": 0.0703, "step": 2157 }, { "epoch": 2.220164609053498, "grad_norm": 5.704407691955566, "learning_rate": 2.376300001429201e-05, "loss": 0.2989, "step": 2158 }, { "epoch": 2.22119341563786, "grad_norm": 4.120200157165527, "learning_rate": 2.3747996262939545e-05, "loss": 0.0834, "step": 2159 }, { "epoch": 2.2222222222222223, "grad_norm": 6.008984088897705, "learning_rate": 2.3732991824881802e-05, "loss": 0.2205, "step": 2160 }, { "epoch": 2.2222222222222223, "eval_Qnli-dev_cosine_accuracy": 0.71875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7424961924552917, "eval_Qnli-dev_cosine_ap": 0.7690305002007699, "eval_Qnli-dev_cosine_f1": 0.6987447698744769, "eval_Qnli-dev_cosine_f1_threshold": 0.7424961924552917, "eval_Qnli-dev_cosine_precision": 0.6900826446280992, "eval_Qnli-dev_cosine_recall": 0.7076271186440678, "eval_Qnli-dev_dot_accuracy": 0.6796875, "eval_Qnli-dev_dot_accuracy_threshold": 345.74554443359375, "eval_Qnli-dev_dot_ap": 0.7234396167600803, "eval_Qnli-dev_dot_f1": 0.6786355475763016, "eval_Qnli-dev_dot_f1_threshold": 308.107177734375, "eval_Qnli-dev_dot_precision": 0.5887850467289719, "eval_Qnli-dev_dot_recall": 0.8008474576271186, "eval_Qnli-dev_euclidean_accuracy": 0.72265625, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.977933883666992, "eval_Qnli-dev_euclidean_ap": 0.775756805101809, "eval_Qnli-dev_euclidean_f1": 0.7104247104247103, "eval_Qnli-dev_euclidean_f1_threshold": 15.89659595489502, "eval_Qnli-dev_euclidean_precision": 0.6524822695035462, "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, "eval_Qnli-dev_manhattan_accuracy": 0.71484375, "eval_Qnli-dev_manhattan_accuracy_threshold": 308.61138916015625, "eval_Qnli-dev_manhattan_ap": 0.7777692408499661, "eval_Qnli-dev_manhattan_f1": 0.710172744721689, "eval_Qnli-dev_manhattan_f1_threshold": 335.52056884765625, "eval_Qnli-dev_manhattan_precision": 0.6491228070175439, "eval_Qnli-dev_manhattan_recall": 0.7838983050847458, "eval_Qnli-dev_max_accuracy": 0.72265625, "eval_Qnli-dev_max_accuracy_threshold": 345.74554443359375, "eval_Qnli-dev_max_ap": 0.7777692408499661, "eval_Qnli-dev_max_f1": 0.7104247104247103, "eval_Qnli-dev_max_f1_threshold": 335.52056884765625, "eval_Qnli-dev_max_precision": 0.6900826446280992, "eval_Qnli-dev_max_recall": 0.8008474576271186, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.816895604133606, "eval_allNLI-dev_cosine_ap": 0.6317297508629345, "eval_allNLI-dev_cosine_f1": 0.6492374727668845, "eval_allNLI-dev_cosine_f1_threshold": 0.7320500612258911, "eval_allNLI-dev_cosine_precision": 0.5209790209790209, "eval_allNLI-dev_cosine_recall": 0.861271676300578, "eval_allNLI-dev_dot_accuracy": 0.705078125, "eval_allNLI-dev_dot_accuracy_threshold": 345.56048583984375, "eval_allNLI-dev_dot_ap": 0.5710281774303337, "eval_allNLI-dev_dot_f1": 0.6117647058823529, "eval_allNLI-dev_dot_f1_threshold": 325.12750244140625, "eval_allNLI-dev_dot_precision": 0.5158730158730159, "eval_allNLI-dev_dot_recall": 0.7514450867052023, "eval_allNLI-dev_euclidean_accuracy": 0.7421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.781333923339844, "eval_allNLI-dev_euclidean_ap": 0.6402188977328761, "eval_allNLI-dev_euclidean_f1": 0.6510538641686183, "eval_allNLI-dev_euclidean_f1_threshold": 14.694303512573242, "eval_allNLI-dev_euclidean_precision": 0.547244094488189, "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, "eval_allNLI-dev_manhattan_accuracy": 0.740234375, "eval_allNLI-dev_manhattan_accuracy_threshold": 253.4004364013672, "eval_allNLI-dev_manhattan_ap": 0.6386852941077191, "eval_allNLI-dev_manhattan_f1": 0.6507592190889371, "eval_allNLI-dev_manhattan_f1_threshold": 321.07574462890625, "eval_allNLI-dev_manhattan_precision": 0.5208333333333334, "eval_allNLI-dev_manhattan_recall": 0.8670520231213873, "eval_allNLI-dev_max_accuracy": 0.7421875, "eval_allNLI-dev_max_accuracy_threshold": 345.56048583984375, "eval_allNLI-dev_max_ap": 0.6402188977328761, "eval_allNLI-dev_max_f1": 0.6510538641686183, "eval_allNLI-dev_max_f1_threshold": 325.12750244140625, "eval_allNLI-dev_max_precision": 0.547244094488189, "eval_allNLI-dev_max_recall": 0.8670520231213873, "eval_sequential_score": 0.7777692408499661, "eval_sts-test_pearson_cosine": 0.8503418350196617, "eval_sts-test_pearson_dot": 0.8399142019186396, "eval_sts-test_pearson_euclidean": 0.8776232485258656, "eval_sts-test_pearson_manhattan": 0.8749053666923767, "eval_sts-test_pearson_max": 0.8776232485258656, "eval_sts-test_spearman_cosine": 0.8801646317064868, "eval_sts-test_spearman_dot": 0.8413177174892843, "eval_sts-test_spearman_euclidean": 0.8759278955367787, "eval_sts-test_spearman_manhattan": 0.8726772534118303, "eval_sts-test_spearman_max": 0.8801646317064868, "eval_vitaminc-pairs_loss": 3.0154216289520264, "eval_vitaminc-pairs_runtime": 3.2312, "eval_vitaminc-pairs_samples_per_second": 39.614, "eval_vitaminc-pairs_steps_per_second": 0.309, "step": 2160 }, { "epoch": 2.2222222222222223, "eval_negation-triplets_loss": 0.9069598317146301, "eval_negation-triplets_runtime": 0.7616, "eval_negation-triplets_samples_per_second": 168.07, "eval_negation-triplets_steps_per_second": 1.313, "step": 2160 }, { "epoch": 2.2222222222222223, "eval_scitail-pairs-pos_loss": 0.11139774322509766, "eval_scitail-pairs-pos_runtime": 0.911, "eval_scitail-pairs-pos_samples_per_second": 140.499, "eval_scitail-pairs-pos_steps_per_second": 1.098, "step": 2160 }, { "epoch": 2.2222222222222223, "eval_scitail-pairs-qa_loss": 0.00041776278521865606, "eval_scitail-pairs-qa_runtime": 0.6074, "eval_scitail-pairs-qa_samples_per_second": 210.737, "eval_scitail-pairs-qa_steps_per_second": 1.646, "step": 2160 }, { "epoch": 2.2222222222222223, "eval_xsum-pairs_loss": 0.28882691264152527, "eval_xsum-pairs_runtime": 3.0328, "eval_xsum-pairs_samples_per_second": 42.205, "eval_xsum-pairs_steps_per_second": 0.33, "step": 2160 }, { "epoch": 2.2222222222222223, "eval_sciq_pairs_loss": 0.09604327380657196, "eval_sciq_pairs_runtime": 3.561, "eval_sciq_pairs_samples_per_second": 35.945, "eval_sciq_pairs_steps_per_second": 0.281, "step": 2160 }, { "epoch": 2.2222222222222223, "eval_qasc_pairs_loss": 0.14951874315738678, "eval_qasc_pairs_runtime": 0.6248, "eval_qasc_pairs_samples_per_second": 204.865, "eval_qasc_pairs_steps_per_second": 1.601, "step": 2160 }, { "epoch": 2.2222222222222223, "eval_openbookqa_pairs_loss": 0.7421671152114868, "eval_openbookqa_pairs_runtime": 0.597, "eval_openbookqa_pairs_samples_per_second": 214.403, "eval_openbookqa_pairs_steps_per_second": 1.675, "step": 2160 }, { "epoch": 2.2222222222222223, "eval_msmarco_pairs_loss": 0.8618593811988831, "eval_msmarco_pairs_runtime": 1.5259, "eval_msmarco_pairs_samples_per_second": 83.884, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 2160 }, { "epoch": 2.2222222222222223, "eval_nq_pairs_loss": 0.7470789551734924, "eval_nq_pairs_runtime": 2.9046, "eval_nq_pairs_samples_per_second": 44.068, "eval_nq_pairs_steps_per_second": 0.344, "step": 2160 }, { "epoch": 2.2222222222222223, "eval_trivia_pairs_loss": 0.9290419220924377, "eval_trivia_pairs_runtime": 3.449, "eval_trivia_pairs_samples_per_second": 37.112, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2160 }, { "epoch": 2.2222222222222223, "eval_gooaq_pairs_loss": 0.3941831886768341, "eval_gooaq_pairs_runtime": 0.9612, "eval_gooaq_pairs_samples_per_second": 133.174, "eval_gooaq_pairs_steps_per_second": 1.04, "step": 2160 }, { "epoch": 2.2222222222222223, "eval_paws-pos_loss": 0.022844700142741203, "eval_paws-pos_runtime": 0.7045, "eval_paws-pos_samples_per_second": 181.684, "eval_paws-pos_steps_per_second": 1.419, "step": 2160 }, { "epoch": 2.2222222222222223, "eval_global_dataset_loss": 0.4469379186630249, "eval_global_dataset_runtime": 13.4019, "eval_global_dataset_samples_per_second": 31.04, "eval_global_dataset_steps_per_second": 0.298, "step": 2160 }, { "epoch": 2.2232510288065845, "grad_norm": 8.738439559936523, "learning_rate": 2.3717986724966976e-05, "loss": 0.5686, "step": 2161 }, { "epoch": 2.2242798353909463, "grad_norm": 4.119027137756348, "learning_rate": 2.3702980988044372e-05, "loss": 0.1064, "step": 2162 }, { "epoch": 2.2253086419753085, "grad_norm": 5.774399280548096, "learning_rate": 2.3687974638964328e-05, "loss": 0.3596, "step": 2163 }, { "epoch": 2.2263374485596708, "grad_norm": 3.8991827964782715, "learning_rate": 2.3672967702578222e-05, "loss": 0.1035, "step": 2164 }, { "epoch": 2.227366255144033, "grad_norm": 9.624704360961914, "learning_rate": 2.3657960203738376e-05, "loss": 0.4331, "step": 2165 }, { "epoch": 2.228395061728395, "grad_norm": 0.12300071120262146, "learning_rate": 2.364295216729806e-05, "loss": 0.0016, "step": 2166 }, { "epoch": 2.2294238683127574, "grad_norm": 0.8719635605812073, "learning_rate": 2.362794361811144e-05, "loss": 0.0151, "step": 2167 }, { "epoch": 2.230452674897119, "grad_norm": 0.6506179571151733, "learning_rate": 2.3612934581033514e-05, "loss": 0.0137, "step": 2168 }, { "epoch": 2.2314814814814814, "grad_norm": 9.637247085571289, "learning_rate": 2.3597925080920098e-05, "loss": 0.4622, "step": 2169 }, { "epoch": 2.2325102880658436, "grad_norm": 0.01775330677628517, "learning_rate": 2.3582915142627773e-05, "loss": 0.0002, "step": 2170 }, { "epoch": 2.233539094650206, "grad_norm": 5.711460590362549, "learning_rate": 2.3567904791013857e-05, "loss": 0.2227, "step": 2171 }, { "epoch": 2.234567901234568, "grad_norm": 7.3310346603393555, "learning_rate": 2.3552894050936323e-05, "loss": 0.1894, "step": 2172 }, { "epoch": 2.23559670781893, "grad_norm": 7.858341693878174, "learning_rate": 2.3537882947253826e-05, "loss": 0.3815, "step": 2173 }, { "epoch": 2.236625514403292, "grad_norm": 0.04247892647981644, "learning_rate": 2.3522871504825584e-05, "loss": 0.0005, "step": 2174 }, { "epoch": 2.2376543209876543, "grad_norm": 5.890376567840576, "learning_rate": 2.3507859748511408e-05, "loss": 0.3953, "step": 2175 }, { "epoch": 2.2386831275720165, "grad_norm": 9.880730628967285, "learning_rate": 2.3492847703171613e-05, "loss": 0.3849, "step": 2176 }, { "epoch": 2.2397119341563787, "grad_norm": 4.678164958953857, "learning_rate": 2.3477835393666995e-05, "loss": 0.1336, "step": 2177 }, { "epoch": 2.240740740740741, "grad_norm": 8.650569915771484, "learning_rate": 2.346282284485879e-05, "loss": 0.3762, "step": 2178 }, { "epoch": 2.2417695473251027, "grad_norm": 6.9241204261779785, "learning_rate": 2.344781008160862e-05, "loss": 0.2863, "step": 2179 }, { "epoch": 2.242798353909465, "grad_norm": 4.224590301513672, "learning_rate": 2.343279712877848e-05, "loss": 0.0982, "step": 2180 }, { "epoch": 2.242798353909465, "eval_Qnli-dev_cosine_accuracy": 0.724609375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7531611919403076, "eval_Qnli-dev_cosine_ap": 0.7725679080287509, "eval_Qnli-dev_cosine_f1": 0.7007299270072993, "eval_Qnli-dev_cosine_f1_threshold": 0.6888394355773926, "eval_Qnli-dev_cosine_precision": 0.6153846153846154, "eval_Qnli-dev_cosine_recall": 0.8135593220338984, "eval_Qnli-dev_dot_accuracy": 0.689453125, "eval_Qnli-dev_dot_accuracy_threshold": 354.9976806640625, "eval_Qnli-dev_dot_ap": 0.7329775964784278, "eval_Qnli-dev_dot_f1": 0.6797153024911032, "eval_Qnli-dev_dot_f1_threshold": 292.02471923828125, "eval_Qnli-dev_dot_precision": 0.5858895705521472, "eval_Qnli-dev_dot_recall": 0.809322033898305, "eval_Qnli-dev_euclidean_accuracy": 0.72265625, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.06117057800293, "eval_Qnli-dev_euclidean_ap": 0.7781723429549365, "eval_Qnli-dev_euclidean_f1": 0.7039337474120082, "eval_Qnli-dev_euclidean_f1_threshold": 15.460293769836426, "eval_Qnli-dev_euclidean_precision": 0.6882591093117408, "eval_Qnli-dev_euclidean_recall": 0.7203389830508474, "eval_Qnli-dev_manhattan_accuracy": 0.72265625, "eval_Qnli-dev_manhattan_accuracy_threshold": 308.5833740234375, "eval_Qnli-dev_manhattan_ap": 0.7795451684351757, "eval_Qnli-dev_manhattan_f1": 0.7035175879396984, "eval_Qnli-dev_manhattan_f1_threshold": 364.977783203125, "eval_Qnli-dev_manhattan_precision": 0.5817174515235457, "eval_Qnli-dev_manhattan_recall": 0.8898305084745762, "eval_Qnli-dev_max_accuracy": 0.724609375, "eval_Qnli-dev_max_accuracy_threshold": 354.9976806640625, "eval_Qnli-dev_max_ap": 0.7795451684351757, "eval_Qnli-dev_max_f1": 0.7039337474120082, "eval_Qnli-dev_max_f1_threshold": 364.977783203125, "eval_Qnli-dev_max_precision": 0.6882591093117408, "eval_Qnli-dev_max_recall": 0.8898305084745762, "eval_allNLI-dev_cosine_accuracy": 0.7265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8510603308677673, "eval_allNLI-dev_cosine_ap": 0.6330996470858502, "eval_allNLI-dev_cosine_f1": 0.6413301662707839, "eval_allNLI-dev_cosine_f1_threshold": 0.7421815395355225, "eval_allNLI-dev_cosine_precision": 0.5443548387096774, "eval_allNLI-dev_cosine_recall": 0.7803468208092486, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 333.49041748046875, "eval_allNLI-dev_dot_ap": 0.5698646244511296, "eval_allNLI-dev_dot_f1": 0.6167800453514739, "eval_allNLI-dev_dot_f1_threshold": 306.33770751953125, "eval_allNLI-dev_dot_precision": 0.5074626865671642, "eval_allNLI-dev_dot_recall": 0.7861271676300579, "eval_allNLI-dev_euclidean_accuracy": 0.73828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.0964994430542, "eval_allNLI-dev_euclidean_ap": 0.6408181010160761, "eval_allNLI-dev_euclidean_f1": 0.6411483253588517, "eval_allNLI-dev_euclidean_f1_threshold": 14.643377304077148, "eval_allNLI-dev_euclidean_precision": 0.5469387755102041, "eval_allNLI-dev_euclidean_recall": 0.7745664739884393, "eval_allNLI-dev_manhattan_accuracy": 0.7421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 270.64501953125, "eval_allNLI-dev_manhattan_ap": 0.6407437219853288, "eval_allNLI-dev_manhattan_f1": 0.6438356164383561, "eval_allNLI-dev_manhattan_f1_threshold": 314.3695068359375, "eval_allNLI-dev_manhattan_precision": 0.5320754716981132, "eval_allNLI-dev_manhattan_recall": 0.815028901734104, "eval_allNLI-dev_max_accuracy": 0.7421875, "eval_allNLI-dev_max_accuracy_threshold": 333.49041748046875, "eval_allNLI-dev_max_ap": 0.6408181010160761, "eval_allNLI-dev_max_f1": 0.6438356164383561, "eval_allNLI-dev_max_f1_threshold": 314.3695068359375, "eval_allNLI-dev_max_precision": 0.5469387755102041, "eval_allNLI-dev_max_recall": 0.815028901734104, "eval_sequential_score": 0.7795451684351757, "eval_sts-test_pearson_cosine": 0.8488464435945837, "eval_sts-test_pearson_dot": 0.8320500172324081, "eval_sts-test_pearson_euclidean": 0.8751944024128597, "eval_sts-test_pearson_manhattan": 0.8731738889750866, "eval_sts-test_pearson_max": 0.8751944024128597, "eval_sts-test_spearman_cosine": 0.8770244804459136, "eval_sts-test_spearman_dot": 0.8288539989279042, "eval_sts-test_spearman_euclidean": 0.8723124858802291, "eval_sts-test_spearman_manhattan": 0.8698531803104647, "eval_sts-test_spearman_max": 0.8770244804459136, "eval_vitaminc-pairs_loss": 2.9559402465820312, "eval_vitaminc-pairs_runtime": 3.2153, "eval_vitaminc-pairs_samples_per_second": 39.809, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 2180 }, { "epoch": 2.242798353909465, "eval_negation-triplets_loss": 0.9456331729888916, "eval_negation-triplets_runtime": 0.7645, "eval_negation-triplets_samples_per_second": 167.42, "eval_negation-triplets_steps_per_second": 1.308, "step": 2180 }, { "epoch": 2.242798353909465, "eval_scitail-pairs-pos_loss": 0.14271265268325806, "eval_scitail-pairs-pos_runtime": 0.8996, "eval_scitail-pairs-pos_samples_per_second": 142.286, "eval_scitail-pairs-pos_steps_per_second": 1.112, "step": 2180 }, { "epoch": 2.242798353909465, "eval_scitail-pairs-qa_loss": 0.00024114079133141786, "eval_scitail-pairs-qa_runtime": 0.6074, "eval_scitail-pairs-qa_samples_per_second": 210.732, "eval_scitail-pairs-qa_steps_per_second": 1.646, "step": 2180 }, { "epoch": 2.242798353909465, "eval_xsum-pairs_loss": 0.2774715721607208, "eval_xsum-pairs_runtime": 3.0374, "eval_xsum-pairs_samples_per_second": 42.141, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2180 }, { "epoch": 2.242798353909465, "eval_sciq_pairs_loss": 0.10312435775995255, "eval_sciq_pairs_runtime": 3.5401, "eval_sciq_pairs_samples_per_second": 36.157, "eval_sciq_pairs_steps_per_second": 0.282, "step": 2180 }, { "epoch": 2.242798353909465, "eval_qasc_pairs_loss": 0.15675711631774902, "eval_qasc_pairs_runtime": 0.6209, "eval_qasc_pairs_samples_per_second": 206.166, "eval_qasc_pairs_steps_per_second": 1.611, "step": 2180 }, { "epoch": 2.242798353909465, "eval_openbookqa_pairs_loss": 0.726242184638977, "eval_openbookqa_pairs_runtime": 0.5976, "eval_openbookqa_pairs_samples_per_second": 214.199, "eval_openbookqa_pairs_steps_per_second": 1.673, "step": 2180 }, { "epoch": 2.242798353909465, "eval_msmarco_pairs_loss": 0.947030782699585, "eval_msmarco_pairs_runtime": 1.5253, "eval_msmarco_pairs_samples_per_second": 83.919, "eval_msmarco_pairs_steps_per_second": 0.656, "step": 2180 }, { "epoch": 2.242798353909465, "eval_nq_pairs_loss": 0.6687220931053162, "eval_nq_pairs_runtime": 2.9002, "eval_nq_pairs_samples_per_second": 44.135, "eval_nq_pairs_steps_per_second": 0.345, "step": 2180 }, { "epoch": 2.242798353909465, "eval_trivia_pairs_loss": 0.9305880069732666, "eval_trivia_pairs_runtime": 3.4438, "eval_trivia_pairs_samples_per_second": 37.169, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2180 }, { "epoch": 2.242798353909465, "eval_gooaq_pairs_loss": 0.35884031653404236, "eval_gooaq_pairs_runtime": 0.9628, "eval_gooaq_pairs_samples_per_second": 132.949, "eval_gooaq_pairs_steps_per_second": 1.039, "step": 2180 }, { "epoch": 2.242798353909465, "eval_paws-pos_loss": 0.023016272112727165, "eval_paws-pos_runtime": 0.7091, "eval_paws-pos_samples_per_second": 180.513, "eval_paws-pos_steps_per_second": 1.41, "step": 2180 }, { "epoch": 2.242798353909465, "eval_global_dataset_loss": 0.4351368546485901, "eval_global_dataset_runtime": 13.3994, "eval_global_dataset_samples_per_second": 31.046, "eval_global_dataset_steps_per_second": 0.299, "step": 2180 }, { "epoch": 2.243827160493827, "grad_norm": 3.153961658477783, "learning_rate": 2.3417784011230672e-05, "loss": 0.054, "step": 2181 }, { "epoch": 2.2448559670781894, "grad_norm": 0.6234809160232544, "learning_rate": 2.340277075382775e-05, "loss": 0.0188, "step": 2182 }, { "epoch": 2.2458847736625516, "grad_norm": 11.650721549987793, "learning_rate": 2.3387757381432535e-05, "loss": 0.624, "step": 2183 }, { "epoch": 2.246913580246914, "grad_norm": 10.369563102722168, "learning_rate": 2.337274391890802e-05, "loss": 0.6085, "step": 2184 }, { "epoch": 2.2479423868312756, "grad_norm": 8.880125045776367, "learning_rate": 2.3357730391117345e-05, "loss": 0.4456, "step": 2185 }, { "epoch": 2.248971193415638, "grad_norm": 3.8903770446777344, "learning_rate": 2.3342716822923764e-05, "loss": 0.0507, "step": 2186 }, { "epoch": 2.25, "grad_norm": 8.858869552612305, "learning_rate": 2.3327703239190587e-05, "loss": 0.3086, "step": 2187 }, { "epoch": 2.251028806584362, "grad_norm": 3.737985372543335, "learning_rate": 2.331268966478117e-05, "loss": 0.0655, "step": 2188 }, { "epoch": 2.2520576131687244, "grad_norm": 0.21452650427818298, "learning_rate": 2.329767612455883e-05, "loss": 0.0018, "step": 2189 }, { "epoch": 2.253086419753086, "grad_norm": 4.297518253326416, "learning_rate": 2.3282662643386853e-05, "loss": 0.0847, "step": 2190 }, { "epoch": 2.2541152263374484, "grad_norm": 5.4159770011901855, "learning_rate": 2.3267649246128396e-05, "loss": 0.1242, "step": 2191 }, { "epoch": 2.2551440329218106, "grad_norm": 4.10284948348999, "learning_rate": 2.3252635957646513e-05, "loss": 0.0872, "step": 2192 }, { "epoch": 2.256172839506173, "grad_norm": 3.423576831817627, "learning_rate": 2.3237622802804044e-05, "loss": 0.1335, "step": 2193 }, { "epoch": 2.257201646090535, "grad_norm": 0.31105852127075195, "learning_rate": 2.3222609806463642e-05, "loss": 0.0086, "step": 2194 }, { "epoch": 2.2582304526748973, "grad_norm": 3.8018033504486084, "learning_rate": 2.3207596993487648e-05, "loss": 0.1193, "step": 2195 }, { "epoch": 2.259259259259259, "grad_norm": 3.9367120265960693, "learning_rate": 2.3192584388738163e-05, "loss": 0.0772, "step": 2196 }, { "epoch": 2.2602880658436213, "grad_norm": 16.381790161132812, "learning_rate": 2.3177572017076892e-05, "loss": 2.2076, "step": 2197 }, { "epoch": 2.2613168724279835, "grad_norm": 11.494051933288574, "learning_rate": 2.316255990336517e-05, "loss": 0.6439, "step": 2198 }, { "epoch": 2.2623456790123457, "grad_norm": 7.255645751953125, "learning_rate": 2.314754807246392e-05, "loss": 0.5676, "step": 2199 }, { "epoch": 2.263374485596708, "grad_norm": 7.189925193786621, "learning_rate": 2.3132536549233565e-05, "loss": 0.3284, "step": 2200 }, { "epoch": 2.263374485596708, "eval_Qnli-dev_cosine_accuracy": 0.708984375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7566676139831543, "eval_Qnli-dev_cosine_ap": 0.7640013905505368, "eval_Qnli-dev_cosine_f1": 0.7085714285714286, "eval_Qnli-dev_cosine_f1_threshold": 0.7158597707748413, "eval_Qnli-dev_cosine_precision": 0.643598615916955, "eval_Qnli-dev_cosine_recall": 0.788135593220339, "eval_Qnli-dev_dot_accuracy": 0.671875, "eval_Qnli-dev_dot_accuracy_threshold": 362.87335205078125, "eval_Qnli-dev_dot_ap": 0.7091137345101766, "eval_Qnli-dev_dot_f1": 0.6744186046511628, "eval_Qnli-dev_dot_f1_threshold": 289.0480041503906, "eval_Qnli-dev_dot_precision": 0.5546448087431693, "eval_Qnli-dev_dot_recall": 0.8601694915254238, "eval_Qnli-dev_euclidean_accuracy": 0.71484375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.758613586425781, "eval_Qnli-dev_euclidean_ap": 0.7709525163079047, "eval_Qnli-dev_euclidean_f1": 0.7072243346007605, "eval_Qnli-dev_euclidean_f1_threshold": 15.905702590942383, "eval_Qnli-dev_euclidean_precision": 0.6413793103448275, "eval_Qnli-dev_euclidean_recall": 0.788135593220339, "eval_Qnli-dev_manhattan_accuracy": 0.71875, "eval_Qnli-dev_manhattan_accuracy_threshold": 297.7829284667969, "eval_Qnli-dev_manhattan_ap": 0.7734867279761796, "eval_Qnli-dev_manhattan_f1": 0.7137681159420289, "eval_Qnli-dev_manhattan_f1_threshold": 344.4150695800781, "eval_Qnli-dev_manhattan_precision": 0.6234177215189873, "eval_Qnli-dev_manhattan_recall": 0.8347457627118644, "eval_Qnli-dev_max_accuracy": 0.71875, "eval_Qnli-dev_max_accuracy_threshold": 362.87335205078125, "eval_Qnli-dev_max_ap": 0.7734867279761796, "eval_Qnli-dev_max_f1": 0.7137681159420289, "eval_Qnli-dev_max_f1_threshold": 344.4150695800781, "eval_Qnli-dev_max_precision": 0.643598615916955, "eval_Qnli-dev_max_recall": 0.8601694915254238, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8046222925186157, "eval_allNLI-dev_cosine_ap": 0.6355167479239126, "eval_allNLI-dev_cosine_f1": 0.6329670329670329, "eval_allNLI-dev_cosine_f1_threshold": 0.7259559631347656, "eval_allNLI-dev_cosine_precision": 0.5106382978723404, "eval_allNLI-dev_cosine_recall": 0.8323699421965318, "eval_allNLI-dev_dot_accuracy": 0.705078125, "eval_allNLI-dev_dot_accuracy_threshold": 392.41937255859375, "eval_allNLI-dev_dot_ap": 0.5725432880092478, "eval_allNLI-dev_dot_f1": 0.6116504854368932, "eval_allNLI-dev_dot_f1_threshold": 312.41143798828125, "eval_allNLI-dev_dot_precision": 0.5271966527196653, "eval_allNLI-dev_dot_recall": 0.7283236994219653, "eval_allNLI-dev_euclidean_accuracy": 0.748046875, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.786861419677734, "eval_allNLI-dev_euclidean_ap": 0.6435132670771473, "eval_allNLI-dev_euclidean_f1": 0.6423982869379016, "eval_allNLI-dev_euclidean_f1_threshold": 15.413522720336914, "eval_allNLI-dev_euclidean_precision": 0.5102040816326531, "eval_allNLI-dev_euclidean_recall": 0.8670520231213873, "eval_allNLI-dev_manhattan_accuracy": 0.740234375, "eval_allNLI-dev_manhattan_accuracy_threshold": 270.7494812011719, "eval_allNLI-dev_manhattan_ap": 0.640275133606661, "eval_allNLI-dev_manhattan_f1": 0.6422413793103448, "eval_allNLI-dev_manhattan_f1_threshold": 320.50738525390625, "eval_allNLI-dev_manhattan_precision": 0.5120274914089347, "eval_allNLI-dev_manhattan_recall": 0.861271676300578, "eval_allNLI-dev_max_accuracy": 0.748046875, "eval_allNLI-dev_max_accuracy_threshold": 392.41937255859375, "eval_allNLI-dev_max_ap": 0.6435132670771473, "eval_allNLI-dev_max_f1": 0.6423982869379016, "eval_allNLI-dev_max_f1_threshold": 320.50738525390625, "eval_allNLI-dev_max_precision": 0.5271966527196653, "eval_allNLI-dev_max_recall": 0.8670520231213873, "eval_sequential_score": 0.7734867279761796, "eval_sts-test_pearson_cosine": 0.8508751459497785, "eval_sts-test_pearson_dot": 0.8346471700814747, "eval_sts-test_pearson_euclidean": 0.8763811495016889, "eval_sts-test_pearson_manhattan": 0.874745695018937, "eval_sts-test_pearson_max": 0.8763811495016889, "eval_sts-test_spearman_cosine": 0.8763064464568886, "eval_sts-test_spearman_dot": 0.8267857746314393, "eval_sts-test_spearman_euclidean": 0.8713593287638537, "eval_sts-test_spearman_manhattan": 0.8700427914049802, "eval_sts-test_spearman_max": 0.8763064464568886, "eval_vitaminc-pairs_loss": 3.0157594680786133, "eval_vitaminc-pairs_runtime": 3.2341, "eval_vitaminc-pairs_samples_per_second": 39.579, "eval_vitaminc-pairs_steps_per_second": 0.309, "step": 2200 }, { "epoch": 2.263374485596708, "eval_negation-triplets_loss": 0.9241347312927246, "eval_negation-triplets_runtime": 0.7772, "eval_negation-triplets_samples_per_second": 164.684, "eval_negation-triplets_steps_per_second": 1.287, "step": 2200 }, { "epoch": 2.263374485596708, "eval_scitail-pairs-pos_loss": 0.13951894640922546, "eval_scitail-pairs-pos_runtime": 0.9917, "eval_scitail-pairs-pos_samples_per_second": 129.078, "eval_scitail-pairs-pos_steps_per_second": 1.008, "step": 2200 }, { "epoch": 2.263374485596708, "eval_scitail-pairs-qa_loss": 0.0003016136179212481, "eval_scitail-pairs-qa_runtime": 0.6291, "eval_scitail-pairs-qa_samples_per_second": 203.454, "eval_scitail-pairs-qa_steps_per_second": 1.589, "step": 2200 }, { "epoch": 2.263374485596708, "eval_xsum-pairs_loss": 0.2963438034057617, "eval_xsum-pairs_runtime": 3.0494, "eval_xsum-pairs_samples_per_second": 41.976, "eval_xsum-pairs_steps_per_second": 0.328, "step": 2200 }, { "epoch": 2.263374485596708, "eval_sciq_pairs_loss": 0.103203684091568, "eval_sciq_pairs_runtime": 3.5416, "eval_sciq_pairs_samples_per_second": 36.142, "eval_sciq_pairs_steps_per_second": 0.282, "step": 2200 }, { "epoch": 2.263374485596708, "eval_qasc_pairs_loss": 0.16400018334388733, "eval_qasc_pairs_runtime": 0.6278, "eval_qasc_pairs_samples_per_second": 203.89, "eval_qasc_pairs_steps_per_second": 1.593, "step": 2200 }, { "epoch": 2.263374485596708, "eval_openbookqa_pairs_loss": 0.7553422451019287, "eval_openbookqa_pairs_runtime": 0.6066, "eval_openbookqa_pairs_samples_per_second": 211.012, "eval_openbookqa_pairs_steps_per_second": 1.649, "step": 2200 }, { "epoch": 2.263374485596708, "eval_msmarco_pairs_loss": 0.8942117094993591, "eval_msmarco_pairs_runtime": 1.5404, "eval_msmarco_pairs_samples_per_second": 83.096, "eval_msmarco_pairs_steps_per_second": 0.649, "step": 2200 }, { "epoch": 2.263374485596708, "eval_nq_pairs_loss": 0.6588362455368042, "eval_nq_pairs_runtime": 2.9136, "eval_nq_pairs_samples_per_second": 43.932, "eval_nq_pairs_steps_per_second": 0.343, "step": 2200 }, { "epoch": 2.263374485596708, "eval_trivia_pairs_loss": 0.8141089081764221, "eval_trivia_pairs_runtime": 3.4421, "eval_trivia_pairs_samples_per_second": 37.187, "eval_trivia_pairs_steps_per_second": 0.291, "step": 2200 }, { "epoch": 2.263374485596708, "eval_gooaq_pairs_loss": 0.34560713171958923, "eval_gooaq_pairs_runtime": 0.9596, "eval_gooaq_pairs_samples_per_second": 133.39, "eval_gooaq_pairs_steps_per_second": 1.042, "step": 2200 }, { "epoch": 2.263374485596708, "eval_paws-pos_loss": 0.023279238492250443, "eval_paws-pos_runtime": 0.7179, "eval_paws-pos_samples_per_second": 178.304, "eval_paws-pos_steps_per_second": 1.393, "step": 2200 }, { "epoch": 2.263374485596708, "eval_global_dataset_loss": 0.43096092343330383, "eval_global_dataset_runtime": 13.4413, "eval_global_dataset_samples_per_second": 30.949, "eval_global_dataset_steps_per_second": 0.298, "step": 2200 }, { "epoch": 2.26440329218107, "grad_norm": 2.2469570636749268, "learning_rate": 2.3117525358534053e-05, "loss": 0.0701, "step": 2201 }, { "epoch": 2.265432098765432, "grad_norm": 5.729778289794922, "learning_rate": 2.3102514525224764e-05, "loss": 0.1448, "step": 2202 }, { "epoch": 2.266460905349794, "grad_norm": 11.240240097045898, "learning_rate": 2.3087504074164473e-05, "loss": 0.8315, "step": 2203 }, { "epoch": 2.2674897119341564, "grad_norm": 11.073486328125, "learning_rate": 2.307249403021135e-05, "loss": 0.6518, "step": 2204 }, { "epoch": 2.2685185185185186, "grad_norm": 5.503248691558838, "learning_rate": 2.3057484418222877e-05, "loss": 0.1358, "step": 2205 }, { "epoch": 2.269547325102881, "grad_norm": 2.9899778366088867, "learning_rate": 2.3042475263055816e-05, "loss": 0.0593, "step": 2206 }, { "epoch": 2.2705761316872426, "grad_norm": 4.398398399353027, "learning_rate": 2.3027466589566173e-05, "loss": 0.1171, "step": 2207 }, { "epoch": 2.271604938271605, "grad_norm": 8.233288764953613, "learning_rate": 2.3012458422609178e-05, "loss": 0.3938, "step": 2208 }, { "epoch": 2.272633744855967, "grad_norm": 5.72066068649292, "learning_rate": 2.2997450787039184e-05, "loss": 0.1561, "step": 2209 }, { "epoch": 2.2736625514403292, "grad_norm": 4.385392189025879, "learning_rate": 2.2982443707709698e-05, "loss": 0.1494, "step": 2210 }, { "epoch": 2.2746913580246915, "grad_norm": 4.462737083435059, "learning_rate": 2.296743720947328e-05, "loss": 0.1013, "step": 2211 }, { "epoch": 2.2757201646090537, "grad_norm": 3.73474383354187, "learning_rate": 2.295243131718156e-05, "loss": 0.1059, "step": 2212 }, { "epoch": 2.2767489711934155, "grad_norm": 9.668663024902344, "learning_rate": 2.293742605568513e-05, "loss": 0.652, "step": 2213 }, { "epoch": 2.2777777777777777, "grad_norm": 7.706223964691162, "learning_rate": 2.2922421449833554e-05, "loss": 0.4134, "step": 2214 }, { "epoch": 2.27880658436214, "grad_norm": 6.752259254455566, "learning_rate": 2.290741752447531e-05, "loss": 0.2024, "step": 2215 }, { "epoch": 2.279835390946502, "grad_norm": 1.5901849269866943, "learning_rate": 2.2892414304457746e-05, "loss": 0.0232, "step": 2216 }, { "epoch": 2.2808641975308643, "grad_norm": 4.390174865722656, "learning_rate": 2.2877411814627048e-05, "loss": 0.1113, "step": 2217 }, { "epoch": 2.2818930041152266, "grad_norm": 9.297547340393066, "learning_rate": 2.2862410079828187e-05, "loss": 0.4733, "step": 2218 }, { "epoch": 2.2829218106995883, "grad_norm": 5.782696723937988, "learning_rate": 2.2847409124904876e-05, "loss": 0.2747, "step": 2219 }, { "epoch": 2.2839506172839505, "grad_norm": 7.616298675537109, "learning_rate": 2.2832408974699553e-05, "loss": 0.3177, "step": 2220 }, { "epoch": 2.2839506172839505, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7431902885437012, "eval_Qnli-dev_cosine_ap": 0.7633843829363348, "eval_Qnli-dev_cosine_f1": 0.7148148148148147, "eval_Qnli-dev_cosine_f1_threshold": 0.7029442191123962, "eval_Qnli-dev_cosine_precision": 0.6348684210526315, "eval_Qnli-dev_cosine_recall": 0.8177966101694916, "eval_Qnli-dev_dot_accuracy": 0.6796875, "eval_Qnli-dev_dot_accuracy_threshold": 345.458251953125, "eval_Qnli-dev_dot_ap": 0.7175678328566067, "eval_Qnli-dev_dot_f1": 0.6727272727272727, "eval_Qnli-dev_dot_f1_threshold": 265.48504638671875, "eval_Qnli-dev_dot_precision": 0.5235849056603774, "eval_Qnli-dev_dot_recall": 0.940677966101695, "eval_Qnli-dev_euclidean_accuracy": 0.716796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.796134948730469, "eval_Qnli-dev_euclidean_ap": 0.7693916423310228, "eval_Qnli-dev_euclidean_f1": 0.7173489278752436, "eval_Qnli-dev_euclidean_f1_threshold": 15.845483779907227, "eval_Qnli-dev_euclidean_precision": 0.6642599277978339, "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, "eval_Qnli-dev_manhattan_accuracy": 0.716796875, "eval_Qnli-dev_manhattan_accuracy_threshold": 333.9942321777344, "eval_Qnli-dev_manhattan_ap": 0.7722873766535558, "eval_Qnli-dev_manhattan_f1": 0.7195357833655707, "eval_Qnli-dev_manhattan_f1_threshold": 334.17718505859375, "eval_Qnli-dev_manhattan_precision": 0.6619217081850534, "eval_Qnli-dev_manhattan_recall": 0.788135593220339, "eval_Qnli-dev_max_accuracy": 0.716796875, "eval_Qnli-dev_max_accuracy_threshold": 345.458251953125, "eval_Qnli-dev_max_ap": 0.7722873766535558, "eval_Qnli-dev_max_f1": 0.7195357833655707, "eval_Qnli-dev_max_f1_threshold": 334.17718505859375, "eval_Qnli-dev_max_precision": 0.6642599277978339, "eval_Qnli-dev_max_recall": 0.940677966101695, "eval_allNLI-dev_cosine_accuracy": 0.736328125, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8010204434394836, "eval_allNLI-dev_cosine_ap": 0.6357351561568579, "eval_allNLI-dev_cosine_f1": 0.6347826086956523, "eval_allNLI-dev_cosine_f1_threshold": 0.721085786819458, "eval_allNLI-dev_cosine_precision": 0.5087108013937283, "eval_allNLI-dev_cosine_recall": 0.8439306358381503, "eval_allNLI-dev_dot_accuracy": 0.701171875, "eval_allNLI-dev_dot_accuracy_threshold": 389.8740539550781, "eval_allNLI-dev_dot_ap": 0.5720718350764233, "eval_allNLI-dev_dot_f1": 0.6068965517241378, "eval_allNLI-dev_dot_f1_threshold": 307.6630859375, "eval_allNLI-dev_dot_precision": 0.5038167938931297, "eval_allNLI-dev_dot_recall": 0.7630057803468208, "eval_allNLI-dev_euclidean_accuracy": 0.74609375, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.75674819946289, "eval_allNLI-dev_euclidean_ap": 0.6437870159604936, "eval_allNLI-dev_euclidean_f1": 0.6318082788671023, "eval_allNLI-dev_euclidean_f1_threshold": 15.444945335388184, "eval_allNLI-dev_euclidean_precision": 0.506993006993007, "eval_allNLI-dev_euclidean_recall": 0.838150289017341, "eval_allNLI-dev_manhattan_accuracy": 0.744140625, "eval_allNLI-dev_manhattan_accuracy_threshold": 274.901611328125, "eval_allNLI-dev_manhattan_ap": 0.6431584559956899, "eval_allNLI-dev_manhattan_f1": 0.6379310344827586, "eval_allNLI-dev_manhattan_f1_threshold": 325.1876220703125, "eval_allNLI-dev_manhattan_precision": 0.5085910652920962, "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, "eval_allNLI-dev_max_accuracy": 0.74609375, "eval_allNLI-dev_max_accuracy_threshold": 389.8740539550781, "eval_allNLI-dev_max_ap": 0.6437870159604936, "eval_allNLI-dev_max_f1": 0.6379310344827586, "eval_allNLI-dev_max_f1_threshold": 325.1876220703125, "eval_allNLI-dev_max_precision": 0.5087108013937283, "eval_allNLI-dev_max_recall": 0.8554913294797688, "eval_sequential_score": 0.7722873766535558, "eval_sts-test_pearson_cosine": 0.8490522214967452, "eval_sts-test_pearson_dot": 0.8303687950150636, "eval_sts-test_pearson_euclidean": 0.8776849450029162, "eval_sts-test_pearson_manhattan": 0.8753737515387936, "eval_sts-test_pearson_max": 0.8776849450029162, "eval_sts-test_spearman_cosine": 0.8763409496794213, "eval_sts-test_spearman_dot": 0.8277738824829615, "eval_sts-test_spearman_euclidean": 0.8735485157198595, "eval_sts-test_spearman_manhattan": 0.8715688989209495, "eval_sts-test_spearman_max": 0.8763409496794213, "eval_vitaminc-pairs_loss": 2.9399707317352295, "eval_vitaminc-pairs_runtime": 3.227, "eval_vitaminc-pairs_samples_per_second": 39.665, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 2220 }, { "epoch": 2.2839506172839505, "eval_negation-triplets_loss": 0.9159117341041565, "eval_negation-triplets_runtime": 0.7877, "eval_negation-triplets_samples_per_second": 162.497, "eval_negation-triplets_steps_per_second": 1.27, "step": 2220 }, { "epoch": 2.2839506172839505, "eval_scitail-pairs-pos_loss": 0.1581612527370453, "eval_scitail-pairs-pos_runtime": 0.9105, "eval_scitail-pairs-pos_samples_per_second": 140.58, "eval_scitail-pairs-pos_steps_per_second": 1.098, "step": 2220 }, { "epoch": 2.2839506172839505, "eval_scitail-pairs-qa_loss": 0.0003244028484914452, "eval_scitail-pairs-qa_runtime": 0.6082, "eval_scitail-pairs-qa_samples_per_second": 210.447, "eval_scitail-pairs-qa_steps_per_second": 1.644, "step": 2220 }, { "epoch": 2.2839506172839505, "eval_xsum-pairs_loss": 0.22244523465633392, "eval_xsum-pairs_runtime": 3.0427, "eval_xsum-pairs_samples_per_second": 42.068, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2220 }, { "epoch": 2.2839506172839505, "eval_sciq_pairs_loss": 0.09049579501152039, "eval_sciq_pairs_runtime": 3.5159, "eval_sciq_pairs_samples_per_second": 36.406, "eval_sciq_pairs_steps_per_second": 0.284, "step": 2220 }, { "epoch": 2.2839506172839505, "eval_qasc_pairs_loss": 0.1519252359867096, "eval_qasc_pairs_runtime": 0.627, "eval_qasc_pairs_samples_per_second": 204.131, "eval_qasc_pairs_steps_per_second": 1.595, "step": 2220 }, { "epoch": 2.2839506172839505, "eval_openbookqa_pairs_loss": 0.6787049174308777, "eval_openbookqa_pairs_runtime": 0.6152, "eval_openbookqa_pairs_samples_per_second": 208.067, "eval_openbookqa_pairs_steps_per_second": 1.626, "step": 2220 }, { "epoch": 2.2839506172839505, "eval_msmarco_pairs_loss": 0.841157853603363, "eval_msmarco_pairs_runtime": 1.5315, "eval_msmarco_pairs_samples_per_second": 83.578, "eval_msmarco_pairs_steps_per_second": 0.653, "step": 2220 }, { "epoch": 2.2839506172839505, "eval_nq_pairs_loss": 0.6983832716941833, "eval_nq_pairs_runtime": 2.9231, "eval_nq_pairs_samples_per_second": 43.788, "eval_nq_pairs_steps_per_second": 0.342, "step": 2220 }, { "epoch": 2.2839506172839505, "eval_trivia_pairs_loss": 0.7856079339981079, "eval_trivia_pairs_runtime": 3.4531, "eval_trivia_pairs_samples_per_second": 37.069, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2220 }, { "epoch": 2.2839506172839505, "eval_gooaq_pairs_loss": 0.3422422707080841, "eval_gooaq_pairs_runtime": 0.9637, "eval_gooaq_pairs_samples_per_second": 132.816, "eval_gooaq_pairs_steps_per_second": 1.038, "step": 2220 }, { "epoch": 2.2839506172839505, "eval_paws-pos_loss": 0.022888919338583946, "eval_paws-pos_runtime": 0.7095, "eval_paws-pos_samples_per_second": 180.407, "eval_paws-pos_steps_per_second": 1.409, "step": 2220 }, { "epoch": 2.2839506172839505, "eval_global_dataset_loss": 0.4147057831287384, "eval_global_dataset_runtime": 13.4279, "eval_global_dataset_samples_per_second": 30.98, "eval_global_dataset_steps_per_second": 0.298, "step": 2220 }, { "epoch": 2.2849794238683128, "grad_norm": 4.396790027618408, "learning_rate": 2.2817409654053316e-05, "loss": 0.1144, "step": 2221 }, { "epoch": 2.286008230452675, "grad_norm": 3.684896230697632, "learning_rate": 2.280241118780589e-05, "loss": 0.0587, "step": 2222 }, { "epoch": 2.287037037037037, "grad_norm": 0.7990652322769165, "learning_rate": 2.278741360079558e-05, "loss": 0.0078, "step": 2223 }, { "epoch": 2.288065843621399, "grad_norm": 8.240120887756348, "learning_rate": 2.277241691785924e-05, "loss": 0.4262, "step": 2224 }, { "epoch": 2.289094650205761, "grad_norm": 1.9743107557296753, "learning_rate": 2.2757421163832226e-05, "loss": 0.0242, "step": 2225 }, { "epoch": 2.2901234567901234, "grad_norm": 0.4200769066810608, "learning_rate": 2.274242636354835e-05, "loss": 0.0203, "step": 2226 }, { "epoch": 2.2911522633744856, "grad_norm": 4.178246021270752, "learning_rate": 2.272743254183986e-05, "loss": 0.1026, "step": 2227 }, { "epoch": 2.292181069958848, "grad_norm": 10.072691917419434, "learning_rate": 2.2712439723537363e-05, "loss": 0.464, "step": 2228 }, { "epoch": 2.29320987654321, "grad_norm": 4.121541500091553, "learning_rate": 2.2697447933469823e-05, "loss": 0.0832, "step": 2229 }, { "epoch": 2.294238683127572, "grad_norm": 8.78761100769043, "learning_rate": 2.2682457196464492e-05, "loss": 0.3638, "step": 2230 }, { "epoch": 2.295267489711934, "grad_norm": 4.9234299659729, "learning_rate": 2.2667467537346872e-05, "loss": 0.2132, "step": 2231 }, { "epoch": 2.2962962962962963, "grad_norm": 7.4635701179504395, "learning_rate": 2.2652478980940688e-05, "loss": 0.3787, "step": 2232 }, { "epoch": 2.2973251028806585, "grad_norm": 15.064518928527832, "learning_rate": 2.2637491552067842e-05, "loss": 1.8201, "step": 2233 }, { "epoch": 2.2983539094650207, "grad_norm": 5.404539108276367, "learning_rate": 2.262250527554837e-05, "loss": 0.1351, "step": 2234 }, { "epoch": 2.299382716049383, "grad_norm": 0.02893410064280033, "learning_rate": 2.2607520176200378e-05, "loss": 0.0003, "step": 2235 }, { "epoch": 2.3004115226337447, "grad_norm": 6.287380695343018, "learning_rate": 2.2592536278840047e-05, "loss": 0.2763, "step": 2236 }, { "epoch": 2.301440329218107, "grad_norm": 4.5157389640808105, "learning_rate": 2.257755360828156e-05, "loss": 0.1501, "step": 2237 }, { "epoch": 2.302469135802469, "grad_norm": 7.763871669769287, "learning_rate": 2.2562572189337068e-05, "loss": 0.3872, "step": 2238 }, { "epoch": 2.3034979423868314, "grad_norm": 0.24251669645309448, "learning_rate": 2.2547592046816648e-05, "loss": 0.0036, "step": 2239 }, { "epoch": 2.3045267489711936, "grad_norm": 4.772953033447266, "learning_rate": 2.253261320552826e-05, "loss": 0.0906, "step": 2240 }, { "epoch": 2.3045267489711936, "eval_Qnli-dev_cosine_accuracy": 0.70703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7303512096405029, "eval_Qnli-dev_cosine_ap": 0.7515026165638969, "eval_Qnli-dev_cosine_f1": 0.7020872865275143, "eval_Qnli-dev_cosine_f1_threshold": 0.6963032484054565, "eval_Qnli-dev_cosine_precision": 0.6357388316151202, "eval_Qnli-dev_cosine_recall": 0.7838983050847458, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 339.2628479003906, "eval_Qnli-dev_dot_ap": 0.7090629129430073, "eval_Qnli-dev_dot_f1": 0.6728624535315985, "eval_Qnli-dev_dot_f1_threshold": 305.44061279296875, "eval_Qnli-dev_dot_precision": 0.5993377483443708, "eval_Qnli-dev_dot_recall": 0.7669491525423728, "eval_Qnli-dev_euclidean_accuracy": 0.708984375, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.274980545043945, "eval_Qnli-dev_euclidean_ap": 0.7594778709350474, "eval_Qnli-dev_euclidean_f1": 0.7017543859649122, "eval_Qnli-dev_euclidean_f1_threshold": 16.277881622314453, "eval_Qnli-dev_euclidean_precision": 0.6498194945848376, "eval_Qnli-dev_euclidean_recall": 0.7627118644067796, "eval_Qnli-dev_manhattan_accuracy": 0.705078125, "eval_Qnli-dev_manhattan_accuracy_threshold": 330.57635498046875, "eval_Qnli-dev_manhattan_ap": 0.7623445246610333, "eval_Qnli-dev_manhattan_f1": 0.7102803738317757, "eval_Qnli-dev_manhattan_f1_threshold": 347.587890625, "eval_Qnli-dev_manhattan_precision": 0.6354515050167224, "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, "eval_Qnli-dev_max_accuracy": 0.708984375, "eval_Qnli-dev_max_accuracy_threshold": 339.2628479003906, "eval_Qnli-dev_max_ap": 0.7623445246610333, "eval_Qnli-dev_max_f1": 0.7102803738317757, "eval_Qnli-dev_max_f1_threshold": 347.587890625, "eval_Qnli-dev_max_precision": 0.6498194945848376, "eval_Qnli-dev_max_recall": 0.8050847457627118, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.800418496131897, "eval_allNLI-dev_cosine_ap": 0.6273045426657029, "eval_allNLI-dev_cosine_f1": 0.6303854875283447, "eval_allNLI-dev_cosine_f1_threshold": 0.721612811088562, "eval_allNLI-dev_cosine_precision": 0.5186567164179104, "eval_allNLI-dev_cosine_recall": 0.8034682080924855, "eval_allNLI-dev_dot_accuracy": 0.705078125, "eval_allNLI-dev_dot_accuracy_threshold": 350.8966064453125, "eval_allNLI-dev_dot_ap": 0.5661295928549741, "eval_allNLI-dev_dot_f1": 0.5956521739130435, "eval_allNLI-dev_dot_f1_threshold": 291.92437744140625, "eval_allNLI-dev_dot_precision": 0.47735191637630664, "eval_allNLI-dev_dot_recall": 0.791907514450867, "eval_allNLI-dev_euclidean_accuracy": 0.7421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.805671691894531, "eval_allNLI-dev_euclidean_ap": 0.6372099149848509, "eval_allNLI-dev_euclidean_f1": 0.6306695464362851, "eval_allNLI-dev_euclidean_f1_threshold": 15.717982292175293, "eval_allNLI-dev_euclidean_precision": 0.503448275862069, "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, "eval_allNLI-dev_manhattan_accuracy": 0.7421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 286.83154296875, "eval_allNLI-dev_manhattan_ap": 0.6350522019686492, "eval_allNLI-dev_manhattan_f1": 0.6338797814207651, "eval_allNLI-dev_manhattan_f1_threshold": 288.48553466796875, "eval_allNLI-dev_manhattan_precision": 0.6010362694300518, "eval_allNLI-dev_manhattan_recall": 0.6705202312138728, "eval_allNLI-dev_max_accuracy": 0.7421875, "eval_allNLI-dev_max_accuracy_threshold": 350.8966064453125, "eval_allNLI-dev_max_ap": 0.6372099149848509, "eval_allNLI-dev_max_f1": 0.6338797814207651, "eval_allNLI-dev_max_f1_threshold": 291.92437744140625, "eval_allNLI-dev_max_precision": 0.6010362694300518, "eval_allNLI-dev_max_recall": 0.8439306358381503, "eval_sequential_score": 0.7623445246610333, "eval_sts-test_pearson_cosine": 0.8444884977431116, "eval_sts-test_pearson_dot": 0.825329221666323, "eval_sts-test_pearson_euclidean": 0.8754671018631355, "eval_sts-test_pearson_manhattan": 0.8726714683911371, "eval_sts-test_pearson_max": 0.8754671018631355, "eval_sts-test_spearman_cosine": 0.875980388766391, "eval_sts-test_spearman_dot": 0.8276308574515545, "eval_sts-test_spearman_euclidean": 0.873646789489875, "eval_sts-test_spearman_manhattan": 0.871694157701584, "eval_sts-test_spearman_max": 0.875980388766391, "eval_vitaminc-pairs_loss": 3.1677722930908203, "eval_vitaminc-pairs_runtime": 3.2259, "eval_vitaminc-pairs_samples_per_second": 39.679, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 2240 }, { "epoch": 2.3045267489711936, "eval_negation-triplets_loss": 0.9451866149902344, "eval_negation-triplets_runtime": 0.7626, "eval_negation-triplets_samples_per_second": 167.847, "eval_negation-triplets_steps_per_second": 1.311, "step": 2240 }, { "epoch": 2.3045267489711936, "eval_scitail-pairs-pos_loss": 0.15744654834270477, "eval_scitail-pairs-pos_runtime": 0.929, "eval_scitail-pairs-pos_samples_per_second": 137.776, "eval_scitail-pairs-pos_steps_per_second": 1.076, "step": 2240 }, { "epoch": 2.3045267489711936, "eval_scitail-pairs-qa_loss": 0.000507019751239568, "eval_scitail-pairs-qa_runtime": 0.6089, "eval_scitail-pairs-qa_samples_per_second": 210.213, "eval_scitail-pairs-qa_steps_per_second": 1.642, "step": 2240 }, { "epoch": 2.3045267489711936, "eval_xsum-pairs_loss": 0.2686212360858917, "eval_xsum-pairs_runtime": 3.0452, "eval_xsum-pairs_samples_per_second": 42.034, "eval_xsum-pairs_steps_per_second": 0.328, "step": 2240 }, { "epoch": 2.3045267489711936, "eval_sciq_pairs_loss": 0.09164952486753464, "eval_sciq_pairs_runtime": 3.5398, "eval_sciq_pairs_samples_per_second": 36.16, "eval_sciq_pairs_steps_per_second": 0.283, "step": 2240 }, { "epoch": 2.3045267489711936, "eval_qasc_pairs_loss": 0.16437920928001404, "eval_qasc_pairs_runtime": 0.621, "eval_qasc_pairs_samples_per_second": 206.104, "eval_qasc_pairs_steps_per_second": 1.61, "step": 2240 }, { "epoch": 2.3045267489711936, "eval_openbookqa_pairs_loss": 0.7067421674728394, "eval_openbookqa_pairs_runtime": 0.6055, "eval_openbookqa_pairs_samples_per_second": 211.388, "eval_openbookqa_pairs_steps_per_second": 1.651, "step": 2240 }, { "epoch": 2.3045267489711936, "eval_msmarco_pairs_loss": 0.8379898071289062, "eval_msmarco_pairs_runtime": 1.53, "eval_msmarco_pairs_samples_per_second": 83.662, "eval_msmarco_pairs_steps_per_second": 0.654, "step": 2240 }, { "epoch": 2.3045267489711936, "eval_nq_pairs_loss": 0.7834599614143372, "eval_nq_pairs_runtime": 2.9163, "eval_nq_pairs_samples_per_second": 43.892, "eval_nq_pairs_steps_per_second": 0.343, "step": 2240 }, { "epoch": 2.3045267489711936, "eval_trivia_pairs_loss": 0.8072291016578674, "eval_trivia_pairs_runtime": 3.4746, "eval_trivia_pairs_samples_per_second": 36.838, "eval_trivia_pairs_steps_per_second": 0.288, "step": 2240 }, { "epoch": 2.3045267489711936, "eval_gooaq_pairs_loss": 0.34852299094200134, "eval_gooaq_pairs_runtime": 0.9582, "eval_gooaq_pairs_samples_per_second": 133.59, "eval_gooaq_pairs_steps_per_second": 1.044, "step": 2240 }, { "epoch": 2.3045267489711936, "eval_paws-pos_loss": 0.021990040317177773, "eval_paws-pos_runtime": 0.7094, "eval_paws-pos_samples_per_second": 180.435, "eval_paws-pos_steps_per_second": 1.41, "step": 2240 }, { "epoch": 2.3045267489711936, "eval_global_dataset_loss": 0.4435296654701233, "eval_global_dataset_runtime": 13.4215, "eval_global_dataset_samples_per_second": 30.995, "eval_global_dataset_steps_per_second": 0.298, "step": 2240 }, { "epoch": 2.3055555555555554, "grad_norm": 8.643778800964355, "learning_rate": 2.2517635690277715e-05, "loss": 0.2783, "step": 2241 }, { "epoch": 2.3065843621399176, "grad_norm": 9.34395694732666, "learning_rate": 2.2502659525868627e-05, "loss": 0.4265, "step": 2242 }, { "epoch": 2.30761316872428, "grad_norm": 10.448383331298828, "learning_rate": 2.2487684737102373e-05, "loss": 0.5171, "step": 2243 }, { "epoch": 2.308641975308642, "grad_norm": 0.3076767027378082, "learning_rate": 2.2472711348778043e-05, "loss": 0.0038, "step": 2244 }, { "epoch": 2.3096707818930042, "grad_norm": 10.618779182434082, "learning_rate": 2.2457739385692424e-05, "loss": 0.3861, "step": 2245 }, { "epoch": 2.310699588477366, "grad_norm": 4.680063247680664, "learning_rate": 2.2442768872639933e-05, "loss": 0.0761, "step": 2246 }, { "epoch": 2.3117283950617282, "grad_norm": 10.028347969055176, "learning_rate": 2.2427799834412586e-05, "loss": 0.4338, "step": 2247 }, { "epoch": 2.3127572016460904, "grad_norm": 9.804010391235352, "learning_rate": 2.2412832295799952e-05, "loss": 0.4161, "step": 2248 }, { "epoch": 2.3137860082304527, "grad_norm": 2.3687469959259033, "learning_rate": 2.239786628158913e-05, "loss": 0.0251, "step": 2249 }, { "epoch": 2.314814814814815, "grad_norm": 3.091796636581421, "learning_rate": 2.2382901816564676e-05, "loss": 0.0371, "step": 2250 }, { "epoch": 2.315843621399177, "grad_norm": 4.977518081665039, "learning_rate": 2.2367938925508602e-05, "loss": 0.0975, "step": 2251 }, { "epoch": 2.3168724279835393, "grad_norm": 8.08765983581543, "learning_rate": 2.2352977633200298e-05, "loss": 0.282, "step": 2252 }, { "epoch": 2.317901234567901, "grad_norm": 2.7775073051452637, "learning_rate": 2.2338017964416506e-05, "loss": 0.0343, "step": 2253 }, { "epoch": 2.3189300411522633, "grad_norm": 3.464078664779663, "learning_rate": 2.2323059943931295e-05, "loss": 0.0756, "step": 2254 }, { "epoch": 2.3199588477366255, "grad_norm": 9.049628257751465, "learning_rate": 2.2308103596515983e-05, "loss": 0.5555, "step": 2255 }, { "epoch": 2.3209876543209877, "grad_norm": 11.142632484436035, "learning_rate": 2.229314894693913e-05, "loss": 0.6313, "step": 2256 }, { "epoch": 2.32201646090535, "grad_norm": 4.431145668029785, "learning_rate": 2.227819601996649e-05, "loss": 0.1288, "step": 2257 }, { "epoch": 2.3230452674897117, "grad_norm": 8.381731986999512, "learning_rate": 2.2263244840360944e-05, "loss": 0.3806, "step": 2258 }, { "epoch": 2.324074074074074, "grad_norm": 12.163583755493164, "learning_rate": 2.2248295432882498e-05, "loss": 0.8261, "step": 2259 }, { "epoch": 2.325102880658436, "grad_norm": 9.38312816619873, "learning_rate": 2.223334782228822e-05, "loss": 0.4223, "step": 2260 }, { "epoch": 2.325102880658436, "eval_Qnli-dev_cosine_accuracy": 0.701171875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7691091895103455, "eval_Qnli-dev_cosine_ap": 0.75401176894518, "eval_Qnli-dev_cosine_f1": 0.7029126213592233, "eval_Qnli-dev_cosine_f1_threshold": 0.7210485339164734, "eval_Qnli-dev_cosine_precision": 0.6487455197132617, "eval_Qnli-dev_cosine_recall": 0.7669491525423728, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 352.1729736328125, "eval_Qnli-dev_dot_ap": 0.7022030363786389, "eval_Qnli-dev_dot_f1": 0.6810631229235881, "eval_Qnli-dev_dot_f1_threshold": 304.30010986328125, "eval_Qnli-dev_dot_precision": 0.5601092896174863, "eval_Qnli-dev_dot_recall": 0.8686440677966102, "eval_Qnli-dev_euclidean_accuracy": 0.708984375, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.070708274841309, "eval_Qnli-dev_euclidean_ap": 0.7613860714428011, "eval_Qnli-dev_euclidean_f1": 0.7050359712230215, "eval_Qnli-dev_euclidean_f1_threshold": 16.56011962890625, "eval_Qnli-dev_euclidean_precision": 0.6125, "eval_Qnli-dev_euclidean_recall": 0.8305084745762712, "eval_Qnli-dev_manhattan_accuracy": 0.708984375, "eval_Qnli-dev_manhattan_accuracy_threshold": 333.4063415527344, "eval_Qnli-dev_manhattan_ap": 0.7651250959093233, "eval_Qnli-dev_manhattan_f1": 0.7086330935251799, "eval_Qnli-dev_manhattan_f1_threshold": 349.4019470214844, "eval_Qnli-dev_manhattan_precision": 0.615625, "eval_Qnli-dev_manhattan_recall": 0.8347457627118644, "eval_Qnli-dev_max_accuracy": 0.708984375, "eval_Qnli-dev_max_accuracy_threshold": 352.1729736328125, "eval_Qnli-dev_max_ap": 0.7651250959093233, "eval_Qnli-dev_max_f1": 0.7086330935251799, "eval_Qnli-dev_max_f1_threshold": 349.4019470214844, "eval_Qnli-dev_max_precision": 0.6487455197132617, "eval_Qnli-dev_max_recall": 0.8686440677966102, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.7986735105514526, "eval_allNLI-dev_cosine_ap": 0.6265153528207018, "eval_allNLI-dev_cosine_f1": 0.6255924170616114, "eval_allNLI-dev_cosine_f1_threshold": 0.7433013916015625, "eval_allNLI-dev_cosine_precision": 0.5301204819277109, "eval_allNLI-dev_cosine_recall": 0.7630057803468208, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 356.342041015625, "eval_allNLI-dev_dot_ap": 0.5577487198695564, "eval_allNLI-dev_dot_f1": 0.5900900900900902, "eval_allNLI-dev_dot_f1_threshold": 304.54986572265625, "eval_allNLI-dev_dot_precision": 0.4833948339483395, "eval_allNLI-dev_dot_recall": 0.7572254335260116, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.965154647827148, "eval_allNLI-dev_euclidean_ap": 0.6362386582588571, "eval_allNLI-dev_euclidean_f1": 0.6318537859007832, "eval_allNLI-dev_euclidean_f1_threshold": 14.021746635437012, "eval_allNLI-dev_euclidean_precision": 0.5761904761904761, "eval_allNLI-dev_euclidean_recall": 0.6994219653179191, "eval_allNLI-dev_manhattan_accuracy": 0.73828125, "eval_allNLI-dev_manhattan_accuracy_threshold": 270.27947998046875, "eval_allNLI-dev_manhattan_ap": 0.6339213685799745, "eval_allNLI-dev_manhattan_f1": 0.6341463414634145, "eval_allNLI-dev_manhattan_f1_threshold": 319.86932373046875, "eval_allNLI-dev_manhattan_precision": 0.5143884892086331, "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 356.342041015625, "eval_allNLI-dev_max_ap": 0.6362386582588571, "eval_allNLI-dev_max_f1": 0.6341463414634145, "eval_allNLI-dev_max_f1_threshold": 319.86932373046875, "eval_allNLI-dev_max_precision": 0.5761904761904761, "eval_allNLI-dev_max_recall": 0.8265895953757225, "eval_sequential_score": 0.7651250959093233, "eval_sts-test_pearson_cosine": 0.8526757483775252, "eval_sts-test_pearson_dot": 0.8421971908216404, "eval_sts-test_pearson_euclidean": 0.8793390424737427, "eval_sts-test_pearson_manhattan": 0.8761631845902708, "eval_sts-test_pearson_max": 0.8793390424737427, "eval_sts-test_spearman_cosine": 0.8804492944801976, "eval_sts-test_spearman_dot": 0.8426240963118221, "eval_sts-test_spearman_euclidean": 0.8757499197702253, "eval_sts-test_spearman_manhattan": 0.8738397121777335, "eval_sts-test_spearman_max": 0.8804492944801976, "eval_vitaminc-pairs_loss": 3.0241594314575195, "eval_vitaminc-pairs_runtime": 3.2167, "eval_vitaminc-pairs_samples_per_second": 39.792, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 2260 }, { "epoch": 2.325102880658436, "eval_negation-triplets_loss": 0.9448370933532715, "eval_negation-triplets_runtime": 0.7746, "eval_negation-triplets_samples_per_second": 165.253, "eval_negation-triplets_steps_per_second": 1.291, "step": 2260 }, { "epoch": 2.325102880658436, "eval_scitail-pairs-pos_loss": 0.16884900629520416, "eval_scitail-pairs-pos_runtime": 0.9384, "eval_scitail-pairs-pos_samples_per_second": 136.4, "eval_scitail-pairs-pos_steps_per_second": 1.066, "step": 2260 }, { "epoch": 2.325102880658436, "eval_scitail-pairs-qa_loss": 0.00046156090684235096, "eval_scitail-pairs-qa_runtime": 0.6172, "eval_scitail-pairs-qa_samples_per_second": 207.373, "eval_scitail-pairs-qa_steps_per_second": 1.62, "step": 2260 }, { "epoch": 2.325102880658436, "eval_xsum-pairs_loss": 0.29921481013298035, "eval_xsum-pairs_runtime": 3.0371, "eval_xsum-pairs_samples_per_second": 42.146, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2260 }, { "epoch": 2.325102880658436, "eval_sciq_pairs_loss": 0.09571722894906998, "eval_sciq_pairs_runtime": 3.5243, "eval_sciq_pairs_samples_per_second": 36.32, "eval_sciq_pairs_steps_per_second": 0.284, "step": 2260 }, { "epoch": 2.325102880658436, "eval_qasc_pairs_loss": 0.16877269744873047, "eval_qasc_pairs_runtime": 0.6226, "eval_qasc_pairs_samples_per_second": 205.603, "eval_qasc_pairs_steps_per_second": 1.606, "step": 2260 }, { "epoch": 2.325102880658436, "eval_openbookqa_pairs_loss": 0.6946222186088562, "eval_openbookqa_pairs_runtime": 0.6114, "eval_openbookqa_pairs_samples_per_second": 209.355, "eval_openbookqa_pairs_steps_per_second": 1.636, "step": 2260 }, { "epoch": 2.325102880658436, "eval_msmarco_pairs_loss": 0.7874183058738708, "eval_msmarco_pairs_runtime": 1.5315, "eval_msmarco_pairs_samples_per_second": 83.577, "eval_msmarco_pairs_steps_per_second": 0.653, "step": 2260 }, { "epoch": 2.325102880658436, "eval_nq_pairs_loss": 0.6316083669662476, "eval_nq_pairs_runtime": 2.9025, "eval_nq_pairs_samples_per_second": 44.1, "eval_nq_pairs_steps_per_second": 0.345, "step": 2260 }, { "epoch": 2.325102880658436, "eval_trivia_pairs_loss": 0.8147802948951721, "eval_trivia_pairs_runtime": 3.4458, "eval_trivia_pairs_samples_per_second": 37.146, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2260 }, { "epoch": 2.325102880658436, "eval_gooaq_pairs_loss": 0.35202357172966003, "eval_gooaq_pairs_runtime": 0.9651, "eval_gooaq_pairs_samples_per_second": 132.633, "eval_gooaq_pairs_steps_per_second": 1.036, "step": 2260 }, { "epoch": 2.325102880658436, "eval_paws-pos_loss": 0.022588880732655525, "eval_paws-pos_runtime": 0.7109, "eval_paws-pos_samples_per_second": 180.061, "eval_paws-pos_steps_per_second": 1.407, "step": 2260 }, { "epoch": 2.325102880658436, "eval_global_dataset_loss": 0.45734867453575134, "eval_global_dataset_runtime": 13.4411, "eval_global_dataset_samples_per_second": 30.95, "eval_global_dataset_steps_per_second": 0.298, "step": 2260 }, { "epoch": 2.3261316872427984, "grad_norm": 5.772022247314453, "learning_rate": 2.221840203333219e-05, "loss": 0.2516, "step": 2261 }, { "epoch": 2.3271604938271606, "grad_norm": 3.9961254596710205, "learning_rate": 2.2203458090765486e-05, "loss": 0.0595, "step": 2262 }, { "epoch": 2.3281893004115224, "grad_norm": 6.449882984161377, "learning_rate": 2.2188516019336126e-05, "loss": 0.2802, "step": 2263 }, { "epoch": 2.3292181069958846, "grad_norm": 8.825777053833008, "learning_rate": 2.2173575843789013e-05, "loss": 0.4514, "step": 2264 }, { "epoch": 2.330246913580247, "grad_norm": 4.293075084686279, "learning_rate": 2.2158637588865937e-05, "loss": 0.1173, "step": 2265 }, { "epoch": 2.331275720164609, "grad_norm": 4.642062187194824, "learning_rate": 2.214370127930549e-05, "loss": 0.111, "step": 2266 }, { "epoch": 2.3323045267489713, "grad_norm": 4.957849025726318, "learning_rate": 2.2128766939843043e-05, "loss": 0.1221, "step": 2267 }, { "epoch": 2.3333333333333335, "grad_norm": 6.430418968200684, "learning_rate": 2.211383459521071e-05, "loss": 0.2215, "step": 2268 }, { "epoch": 2.3343621399176957, "grad_norm": 3.42794132232666, "learning_rate": 2.209890427013729e-05, "loss": 0.076, "step": 2269 }, { "epoch": 2.3353909465020575, "grad_norm": 1.1023659706115723, "learning_rate": 2.208397598934826e-05, "loss": 0.0639, "step": 2270 }, { "epoch": 2.3364197530864197, "grad_norm": 8.584929466247559, "learning_rate": 2.2069049777565694e-05, "loss": 0.4891, "step": 2271 }, { "epoch": 2.337448559670782, "grad_norm": 7.326855182647705, "learning_rate": 2.2054125659508233e-05, "loss": 0.3682, "step": 2272 }, { "epoch": 2.338477366255144, "grad_norm": 4.641539573669434, "learning_rate": 2.2039203659891072e-05, "loss": 0.2303, "step": 2273 }, { "epoch": 2.3395061728395063, "grad_norm": 9.670384407043457, "learning_rate": 2.2024283803425884e-05, "loss": 0.6301, "step": 2274 }, { "epoch": 2.340534979423868, "grad_norm": 7.244576454162598, "learning_rate": 2.2009366114820795e-05, "loss": 0.3037, "step": 2275 }, { "epoch": 2.3415637860082303, "grad_norm": 0.10709922760725021, "learning_rate": 2.1994450618780344e-05, "loss": 0.0013, "step": 2276 }, { "epoch": 2.3425925925925926, "grad_norm": 8.981964111328125, "learning_rate": 2.1979537340005425e-05, "loss": 0.5925, "step": 2277 }, { "epoch": 2.343621399176955, "grad_norm": 3.5664937496185303, "learning_rate": 2.196462630319329e-05, "loss": 0.0863, "step": 2278 }, { "epoch": 2.344650205761317, "grad_norm": 9.481403350830078, "learning_rate": 2.194971753303744e-05, "loss": 0.5741, "step": 2279 }, { "epoch": 2.3456790123456788, "grad_norm": 7.456191062927246, "learning_rate": 2.193481105422766e-05, "loss": 0.2837, "step": 2280 }, { "epoch": 2.3456790123456788, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7566351294517517, "eval_Qnli-dev_cosine_ap": 0.7609971289365627, "eval_Qnli-dev_cosine_f1": 0.7145631067961165, "eval_Qnli-dev_cosine_f1_threshold": 0.7312784194946289, "eval_Qnli-dev_cosine_precision": 0.6594982078853047, "eval_Qnli-dev_cosine_recall": 0.7796610169491526, "eval_Qnli-dev_dot_accuracy": 0.6875, "eval_Qnli-dev_dot_accuracy_threshold": 365.13922119140625, "eval_Qnli-dev_dot_ap": 0.7117488467964865, "eval_Qnli-dev_dot_f1": 0.6746575342465754, "eval_Qnli-dev_dot_f1_threshold": 312.92230224609375, "eval_Qnli-dev_dot_precision": 0.5660919540229885, "eval_Qnli-dev_dot_recall": 0.8347457627118644, "eval_Qnli-dev_euclidean_accuracy": 0.72265625, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.736825942993164, "eval_Qnli-dev_euclidean_ap": 0.7670844970871598, "eval_Qnli-dev_euclidean_f1": 0.7237354085603113, "eval_Qnli-dev_euclidean_f1_threshold": 15.739640235900879, "eval_Qnli-dev_euclidean_precision": 0.6690647482014388, "eval_Qnli-dev_euclidean_recall": 0.788135593220339, "eval_Qnli-dev_manhattan_accuracy": 0.724609375, "eval_Qnli-dev_manhattan_accuracy_threshold": 322.1856689453125, "eval_Qnli-dev_manhattan_ap": 0.7692971927519606, "eval_Qnli-dev_manhattan_f1": 0.7169811320754716, "eval_Qnli-dev_manhattan_f1_threshold": 337.4688415527344, "eval_Qnli-dev_manhattan_precision": 0.6462585034013606, "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, "eval_Qnli-dev_max_accuracy": 0.724609375, "eval_Qnli-dev_max_accuracy_threshold": 365.13922119140625, "eval_Qnli-dev_max_ap": 0.7692971927519606, "eval_Qnli-dev_max_f1": 0.7237354085603113, "eval_Qnli-dev_max_f1_threshold": 337.4688415527344, "eval_Qnli-dev_max_precision": 0.6690647482014388, "eval_Qnli-dev_max_recall": 0.8347457627118644, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8959578275680542, "eval_allNLI-dev_cosine_ap": 0.6288210825785365, "eval_allNLI-dev_cosine_f1": 0.641860465116279, "eval_allNLI-dev_cosine_f1_threshold": 0.7660266160964966, "eval_allNLI-dev_cosine_precision": 0.5369649805447471, "eval_allNLI-dev_cosine_recall": 0.7976878612716763, "eval_allNLI-dev_dot_accuracy": 0.705078125, "eval_allNLI-dev_dot_accuracy_threshold": 360.33917236328125, "eval_allNLI-dev_dot_ap": 0.5581058350375936, "eval_allNLI-dev_dot_f1": 0.591375770020534, "eval_allNLI-dev_dot_f1_threshold": 311.787109375, "eval_allNLI-dev_dot_precision": 0.4585987261146497, "eval_allNLI-dev_dot_recall": 0.8323699421965318, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.02200698852539, "eval_allNLI-dev_euclidean_ap": 0.6385020312240187, "eval_allNLI-dev_euclidean_f1": 0.6510538641686183, "eval_allNLI-dev_euclidean_f1_threshold": 14.25075626373291, "eval_allNLI-dev_euclidean_precision": 0.547244094488189, "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, "eval_allNLI-dev_manhattan_accuracy": 0.736328125, "eval_allNLI-dev_manhattan_accuracy_threshold": 257.76812744140625, "eval_allNLI-dev_manhattan_ap": 0.6353966723714252, "eval_allNLI-dev_manhattan_f1": 0.6453089244851259, "eval_allNLI-dev_manhattan_f1_threshold": 302.9503173828125, "eval_allNLI-dev_manhattan_precision": 0.5340909090909091, "eval_allNLI-dev_manhattan_recall": 0.815028901734104, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 360.33917236328125, "eval_allNLI-dev_max_ap": 0.6385020312240187, "eval_allNLI-dev_max_f1": 0.6510538641686183, "eval_allNLI-dev_max_f1_threshold": 311.787109375, "eval_allNLI-dev_max_precision": 0.547244094488189, "eval_allNLI-dev_max_recall": 0.8323699421965318, "eval_sequential_score": 0.7692971927519606, "eval_sts-test_pearson_cosine": 0.8514642254937852, "eval_sts-test_pearson_dot": 0.8334266195184966, "eval_sts-test_pearson_euclidean": 0.8774851170069259, "eval_sts-test_pearson_manhattan": 0.874385112208525, "eval_sts-test_pearson_max": 0.8774851170069259, "eval_sts-test_spearman_cosine": 0.8771915816559491, "eval_sts-test_spearman_dot": 0.8273027859542943, "eval_sts-test_spearman_euclidean": 0.8732949998240772, "eval_sts-test_spearman_manhattan": 0.8717807961436378, "eval_sts-test_spearman_max": 0.8771915816559491, "eval_vitaminc-pairs_loss": 3.007596015930176, "eval_vitaminc-pairs_runtime": 3.2197, "eval_vitaminc-pairs_samples_per_second": 39.755, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 2280 }, { "epoch": 2.3456790123456788, "eval_negation-triplets_loss": 0.8995927572250366, "eval_negation-triplets_runtime": 0.7637, "eval_negation-triplets_samples_per_second": 167.606, "eval_negation-triplets_steps_per_second": 1.309, "step": 2280 }, { "epoch": 2.3456790123456788, "eval_scitail-pairs-pos_loss": 0.17473338544368744, "eval_scitail-pairs-pos_runtime": 0.9416, "eval_scitail-pairs-pos_samples_per_second": 135.939, "eval_scitail-pairs-pos_steps_per_second": 1.062, "step": 2280 }, { "epoch": 2.3456790123456788, "eval_scitail-pairs-qa_loss": 0.0005349889979697764, "eval_scitail-pairs-qa_runtime": 0.6285, "eval_scitail-pairs-qa_samples_per_second": 203.675, "eval_scitail-pairs-qa_steps_per_second": 1.591, "step": 2280 }, { "epoch": 2.3456790123456788, "eval_xsum-pairs_loss": 0.2932433784008026, "eval_xsum-pairs_runtime": 3.0444, "eval_xsum-pairs_samples_per_second": 42.044, "eval_xsum-pairs_steps_per_second": 0.328, "step": 2280 }, { "epoch": 2.3456790123456788, "eval_sciq_pairs_loss": 0.09884592145681381, "eval_sciq_pairs_runtime": 3.5665, "eval_sciq_pairs_samples_per_second": 35.889, "eval_sciq_pairs_steps_per_second": 0.28, "step": 2280 }, { "epoch": 2.3456790123456788, "eval_qasc_pairs_loss": 0.1590500771999359, "eval_qasc_pairs_runtime": 0.6249, "eval_qasc_pairs_samples_per_second": 204.834, "eval_qasc_pairs_steps_per_second": 1.6, "step": 2280 }, { "epoch": 2.3456790123456788, "eval_openbookqa_pairs_loss": 0.6874719262123108, "eval_openbookqa_pairs_runtime": 0.602, "eval_openbookqa_pairs_samples_per_second": 212.628, "eval_openbookqa_pairs_steps_per_second": 1.661, "step": 2280 }, { "epoch": 2.3456790123456788, "eval_msmarco_pairs_loss": 0.8092954158782959, "eval_msmarco_pairs_runtime": 1.5273, "eval_msmarco_pairs_samples_per_second": 83.81, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 2280 }, { "epoch": 2.3456790123456788, "eval_nq_pairs_loss": 0.6119550466537476, "eval_nq_pairs_runtime": 2.9044, "eval_nq_pairs_samples_per_second": 44.07, "eval_nq_pairs_steps_per_second": 0.344, "step": 2280 }, { "epoch": 2.3456790123456788, "eval_trivia_pairs_loss": 0.8267652988433838, "eval_trivia_pairs_runtime": 3.443, "eval_trivia_pairs_samples_per_second": 37.177, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2280 }, { "epoch": 2.3456790123456788, "eval_gooaq_pairs_loss": 0.31177884340286255, "eval_gooaq_pairs_runtime": 0.9587, "eval_gooaq_pairs_samples_per_second": 133.519, "eval_gooaq_pairs_steps_per_second": 1.043, "step": 2280 }, { "epoch": 2.3456790123456788, "eval_paws-pos_loss": 0.023131083697080612, "eval_paws-pos_runtime": 0.7115, "eval_paws-pos_samples_per_second": 179.914, "eval_paws-pos_steps_per_second": 1.406, "step": 2280 }, { "epoch": 2.3456790123456788, "eval_global_dataset_loss": 0.4496609568595886, "eval_global_dataset_runtime": 13.4253, "eval_global_dataset_samples_per_second": 30.986, "eval_global_dataset_steps_per_second": 0.298, "step": 2280 }, { "epoch": 2.346707818930041, "grad_norm": 0.18925702571868896, "learning_rate": 2.1919906891449918e-05, "loss": 0.0027, "step": 2281 }, { "epoch": 2.347736625514403, "grad_norm": 3.967648506164551, "learning_rate": 2.1905005069386335e-05, "loss": 0.1548, "step": 2282 }, { "epoch": 2.3487654320987654, "grad_norm": 5.869375228881836, "learning_rate": 2.1890105612715197e-05, "loss": 0.2339, "step": 2283 }, { "epoch": 2.3497942386831276, "grad_norm": 10.786419868469238, "learning_rate": 2.1875208546110827e-05, "loss": 0.5421, "step": 2284 }, { "epoch": 2.35082304526749, "grad_norm": 7.9114251136779785, "learning_rate": 2.1860313894243623e-05, "loss": 0.6218, "step": 2285 }, { "epoch": 2.351851851851852, "grad_norm": 7.046651363372803, "learning_rate": 2.184542168177996e-05, "loss": 0.3833, "step": 2286 }, { "epoch": 2.352880658436214, "grad_norm": 7.676875114440918, "learning_rate": 2.18305319333822e-05, "loss": 0.6528, "step": 2287 }, { "epoch": 2.353909465020576, "grad_norm": 6.6297712326049805, "learning_rate": 2.1815644673708592e-05, "loss": 0.2831, "step": 2288 }, { "epoch": 2.3549382716049383, "grad_norm": 3.635673999786377, "learning_rate": 2.180075992741329e-05, "loss": 0.0959, "step": 2289 }, { "epoch": 2.3559670781893005, "grad_norm": 4.281613826751709, "learning_rate": 2.1785877719146274e-05, "loss": 0.128, "step": 2290 }, { "epoch": 2.3569958847736627, "grad_norm": 6.85472297668457, "learning_rate": 2.1770998073553326e-05, "loss": 0.1727, "step": 2291 }, { "epoch": 2.3580246913580245, "grad_norm": 1.0257395505905151, "learning_rate": 2.1756121015275976e-05, "loss": 0.0418, "step": 2292 }, { "epoch": 2.3590534979423867, "grad_norm": 4.922633171081543, "learning_rate": 2.1741246568951482e-05, "loss": 0.1231, "step": 2293 }, { "epoch": 2.360082304526749, "grad_norm": 3.1864991188049316, "learning_rate": 2.172637475921276e-05, "loss": 0.0568, "step": 2294 }, { "epoch": 2.361111111111111, "grad_norm": 10.265893936157227, "learning_rate": 2.171150561068837e-05, "loss": 0.4443, "step": 2295 }, { "epoch": 2.3621399176954734, "grad_norm": 2.11238956451416, "learning_rate": 2.1696639148002472e-05, "loss": 0.0415, "step": 2296 }, { "epoch": 2.363168724279835, "grad_norm": 9.106158256530762, "learning_rate": 2.1681775395774764e-05, "loss": 0.4245, "step": 2297 }, { "epoch": 2.3641975308641974, "grad_norm": 5.098234176635742, "learning_rate": 2.166691437862045e-05, "loss": 0.1398, "step": 2298 }, { "epoch": 2.3652263374485596, "grad_norm": 2.0659897327423096, "learning_rate": 2.1652056121150233e-05, "loss": 0.0312, "step": 2299 }, { "epoch": 2.366255144032922, "grad_norm": 4.4549078941345215, "learning_rate": 2.1637200647970217e-05, "loss": 0.088, "step": 2300 }, { "epoch": 2.366255144032922, "eval_Qnli-dev_cosine_accuracy": 0.708984375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.711749792098999, "eval_Qnli-dev_cosine_ap": 0.7581653072806273, "eval_Qnli-dev_cosine_f1": 0.7095588235294118, "eval_Qnli-dev_cosine_f1_threshold": 0.6768925189971924, "eval_Qnli-dev_cosine_precision": 0.6266233766233766, "eval_Qnli-dev_cosine_recall": 0.8177966101694916, "eval_Qnli-dev_dot_accuracy": 0.677734375, "eval_Qnli-dev_dot_accuracy_threshold": 330.6556091308594, "eval_Qnli-dev_dot_ap": 0.7246830432229524, "eval_Qnli-dev_dot_f1": 0.6814310051107325, "eval_Qnli-dev_dot_f1_threshold": 277.5843811035156, "eval_Qnli-dev_dot_precision": 0.5698005698005698, "eval_Qnli-dev_dot_recall": 0.847457627118644, "eval_Qnli-dev_euclidean_accuracy": 0.716796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 16.400651931762695, "eval_Qnli-dev_euclidean_ap": 0.7628671779699692, "eval_Qnli-dev_euclidean_f1": 0.7238095238095237, "eval_Qnli-dev_euclidean_f1_threshold": 16.400651931762695, "eval_Qnli-dev_euclidean_precision": 0.657439446366782, "eval_Qnli-dev_euclidean_recall": 0.8050847457627118, "eval_Qnli-dev_manhattan_accuracy": 0.71484375, "eval_Qnli-dev_manhattan_accuracy_threshold": 335.8609313964844, "eval_Qnli-dev_manhattan_ap": 0.7667709215373246, "eval_Qnli-dev_manhattan_f1": 0.7140319715808171, "eval_Qnli-dev_manhattan_f1_threshold": 358.5963439941406, "eval_Qnli-dev_manhattan_precision": 0.6146788990825688, "eval_Qnli-dev_manhattan_recall": 0.8516949152542372, "eval_Qnli-dev_max_accuracy": 0.716796875, "eval_Qnli-dev_max_accuracy_threshold": 335.8609313964844, "eval_Qnli-dev_max_ap": 0.7667709215373246, "eval_Qnli-dev_max_f1": 0.7238095238095237, "eval_Qnli-dev_max_f1_threshold": 358.5963439941406, "eval_Qnli-dev_max_precision": 0.657439446366782, "eval_Qnli-dev_max_recall": 0.8516949152542372, "eval_allNLI-dev_cosine_accuracy": 0.7265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8382487297058105, "eval_allNLI-dev_cosine_ap": 0.6355044057508394, "eval_allNLI-dev_cosine_f1": 0.639618138424821, "eval_allNLI-dev_cosine_f1_threshold": 0.7509103417396545, "eval_allNLI-dev_cosine_precision": 0.5447154471544715, "eval_allNLI-dev_cosine_recall": 0.7745664739884393, "eval_allNLI-dev_dot_accuracy": 0.7109375, "eval_allNLI-dev_dot_accuracy_threshold": 362.3534851074219, "eval_allNLI-dev_dot_ap": 0.5746903583642451, "eval_allNLI-dev_dot_f1": 0.5949367088607596, "eval_allNLI-dev_dot_f1_threshold": 298.28326416015625, "eval_allNLI-dev_dot_precision": 0.4684385382059801, "eval_allNLI-dev_dot_recall": 0.815028901734104, "eval_allNLI-dev_euclidean_accuracy": 0.7421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.083795547485352, "eval_allNLI-dev_euclidean_ap": 0.6471876064039366, "eval_allNLI-dev_euclidean_f1": 0.6481481481481481, "eval_allNLI-dev_euclidean_f1_threshold": 14.829469680786133, "eval_allNLI-dev_euclidean_precision": 0.5405405405405406, "eval_allNLI-dev_euclidean_recall": 0.8092485549132948, "eval_allNLI-dev_manhattan_accuracy": 0.744140625, "eval_allNLI-dev_manhattan_accuracy_threshold": 261.477783203125, "eval_allNLI-dev_manhattan_ap": 0.641664744292025, "eval_allNLI-dev_manhattan_f1": 0.6415929203539823, "eval_allNLI-dev_manhattan_f1_threshold": 319.8987731933594, "eval_allNLI-dev_manhattan_precision": 0.5197132616487455, "eval_allNLI-dev_manhattan_recall": 0.838150289017341, "eval_allNLI-dev_max_accuracy": 0.744140625, "eval_allNLI-dev_max_accuracy_threshold": 362.3534851074219, "eval_allNLI-dev_max_ap": 0.6471876064039366, "eval_allNLI-dev_max_f1": 0.6481481481481481, "eval_allNLI-dev_max_f1_threshold": 319.8987731933594, "eval_allNLI-dev_max_precision": 0.5447154471544715, "eval_allNLI-dev_max_recall": 0.838150289017341, "eval_sequential_score": 0.7667709215373246, "eval_sts-test_pearson_cosine": 0.8482468972393741, "eval_sts-test_pearson_dot": 0.8224592183870103, "eval_sts-test_pearson_euclidean": 0.8758260852544988, "eval_sts-test_pearson_manhattan": 0.8721834882062033, "eval_sts-test_pearson_max": 0.8758260852544988, "eval_sts-test_spearman_cosine": 0.8752404714105455, "eval_sts-test_spearman_dot": 0.8155248804737375, "eval_sts-test_spearman_euclidean": 0.8718644810024397, "eval_sts-test_spearman_manhattan": 0.8689638832440949, "eval_sts-test_spearman_max": 0.8752404714105455, "eval_vitaminc-pairs_loss": 3.0794012546539307, "eval_vitaminc-pairs_runtime": 3.2449, "eval_vitaminc-pairs_samples_per_second": 39.447, "eval_vitaminc-pairs_steps_per_second": 0.308, "step": 2300 }, { "epoch": 2.366255144032922, "eval_negation-triplets_loss": 0.9212685823440552, "eval_negation-triplets_runtime": 0.7636, "eval_negation-triplets_samples_per_second": 167.622, "eval_negation-triplets_steps_per_second": 1.31, "step": 2300 }, { "epoch": 2.366255144032922, "eval_scitail-pairs-pos_loss": 0.16639116406440735, "eval_scitail-pairs-pos_runtime": 0.9211, "eval_scitail-pairs-pos_samples_per_second": 138.972, "eval_scitail-pairs-pos_steps_per_second": 1.086, "step": 2300 }, { "epoch": 2.366255144032922, "eval_scitail-pairs-qa_loss": 0.0005323602235876024, "eval_scitail-pairs-qa_runtime": 0.6159, "eval_scitail-pairs-qa_samples_per_second": 207.827, "eval_scitail-pairs-qa_steps_per_second": 1.624, "step": 2300 }, { "epoch": 2.366255144032922, "eval_xsum-pairs_loss": 0.24832576513290405, "eval_xsum-pairs_runtime": 3.0282, "eval_xsum-pairs_samples_per_second": 42.269, "eval_xsum-pairs_steps_per_second": 0.33, "step": 2300 }, { "epoch": 2.366255144032922, "eval_sciq_pairs_loss": 0.09831339120864868, "eval_sciq_pairs_runtime": 3.5302, "eval_sciq_pairs_samples_per_second": 36.258, "eval_sciq_pairs_steps_per_second": 0.283, "step": 2300 }, { "epoch": 2.366255144032922, "eval_qasc_pairs_loss": 0.1492229402065277, "eval_qasc_pairs_runtime": 0.6295, "eval_qasc_pairs_samples_per_second": 203.341, "eval_qasc_pairs_steps_per_second": 1.589, "step": 2300 }, { "epoch": 2.366255144032922, "eval_openbookqa_pairs_loss": 0.6935264468193054, "eval_openbookqa_pairs_runtime": 0.607, "eval_openbookqa_pairs_samples_per_second": 210.86, "eval_openbookqa_pairs_steps_per_second": 1.647, "step": 2300 }, { "epoch": 2.366255144032922, "eval_msmarco_pairs_loss": 0.8593921661376953, "eval_msmarco_pairs_runtime": 1.5351, "eval_msmarco_pairs_samples_per_second": 83.381, "eval_msmarco_pairs_steps_per_second": 0.651, "step": 2300 }, { "epoch": 2.366255144032922, "eval_nq_pairs_loss": 0.6413222551345825, "eval_nq_pairs_runtime": 2.9143, "eval_nq_pairs_samples_per_second": 43.921, "eval_nq_pairs_steps_per_second": 0.343, "step": 2300 }, { "epoch": 2.366255144032922, "eval_trivia_pairs_loss": 0.875969648361206, "eval_trivia_pairs_runtime": 3.4479, "eval_trivia_pairs_samples_per_second": 37.124, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2300 }, { "epoch": 2.366255144032922, "eval_gooaq_pairs_loss": 0.33981260657310486, "eval_gooaq_pairs_runtime": 0.9586, "eval_gooaq_pairs_samples_per_second": 133.522, "eval_gooaq_pairs_steps_per_second": 1.043, "step": 2300 }, { "epoch": 2.366255144032922, "eval_paws-pos_loss": 0.023348888382315636, "eval_paws-pos_runtime": 0.7074, "eval_paws-pos_samples_per_second": 180.944, "eval_paws-pos_steps_per_second": 1.414, "step": 2300 }, { "epoch": 2.366255144032922, "eval_global_dataset_loss": 0.4635227918624878, "eval_global_dataset_runtime": 13.4057, "eval_global_dataset_samples_per_second": 31.032, "eval_global_dataset_steps_per_second": 0.298, "step": 2300 }, { "epoch": 2.367283950617284, "grad_norm": 4.1970977783203125, "learning_rate": 2.1622347983681913e-05, "loss": 0.1321, "step": 2301 }, { "epoch": 2.3683127572016462, "grad_norm": 2.755897045135498, "learning_rate": 2.1607498152882166e-05, "loss": 0.1181, "step": 2302 }, { "epoch": 2.3693415637860085, "grad_norm": 8.307868003845215, "learning_rate": 2.1592651180163142e-05, "loss": 0.392, "step": 2303 }, { "epoch": 2.3703703703703702, "grad_norm": 5.575843334197998, "learning_rate": 2.1577807090112272e-05, "loss": 0.1, "step": 2304 }, { "epoch": 2.3713991769547325, "grad_norm": 0.1779100000858307, "learning_rate": 2.1562965907312197e-05, "loss": 0.0029, "step": 2305 }, { "epoch": 2.3724279835390947, "grad_norm": 3.030696392059326, "learning_rate": 2.1548127656340767e-05, "loss": 0.0665, "step": 2306 }, { "epoch": 2.373456790123457, "grad_norm": 6.725527286529541, "learning_rate": 2.153329236177096e-05, "loss": 0.1854, "step": 2307 }, { "epoch": 2.374485596707819, "grad_norm": 7.689750671386719, "learning_rate": 2.151846004817086e-05, "loss": 0.2986, "step": 2308 }, { "epoch": 2.375514403292181, "grad_norm": 3.769759178161621, "learning_rate": 2.1503630740103627e-05, "loss": 0.0676, "step": 2309 }, { "epoch": 2.376543209876543, "grad_norm": 6.60612678527832, "learning_rate": 2.1488804462127425e-05, "loss": 0.1793, "step": 2310 }, { "epoch": 2.3775720164609053, "grad_norm": 11.391586303710938, "learning_rate": 2.147398123879541e-05, "loss": 0.5551, "step": 2311 }, { "epoch": 2.3786008230452675, "grad_norm": 1.5389795303344727, "learning_rate": 2.1459161094655687e-05, "loss": 0.0187, "step": 2312 }, { "epoch": 2.3796296296296298, "grad_norm": 9.632418632507324, "learning_rate": 2.1444344054251252e-05, "loss": 0.4346, "step": 2313 }, { "epoch": 2.3806584362139915, "grad_norm": 0.7773526310920715, "learning_rate": 2.1429530142119952e-05, "loss": 0.0341, "step": 2314 }, { "epoch": 2.3816872427983538, "grad_norm": 5.237499237060547, "learning_rate": 2.141471938279447e-05, "loss": 0.1108, "step": 2315 }, { "epoch": 2.382716049382716, "grad_norm": 8.371172904968262, "learning_rate": 2.1399911800802263e-05, "loss": 0.3826, "step": 2316 }, { "epoch": 2.383744855967078, "grad_norm": 0.7036436200141907, "learning_rate": 2.1385107420665525e-05, "loss": 0.0205, "step": 2317 }, { "epoch": 2.3847736625514404, "grad_norm": 7.287191867828369, "learning_rate": 2.1370306266901146e-05, "loss": 0.2393, "step": 2318 }, { "epoch": 2.3858024691358026, "grad_norm": 4.022860527038574, "learning_rate": 2.1355508364020674e-05, "loss": 0.1191, "step": 2319 }, { "epoch": 2.386831275720165, "grad_norm": 8.647807121276855, "learning_rate": 2.1340713736530272e-05, "loss": 0.4757, "step": 2320 }, { "epoch": 2.386831275720165, "eval_Qnli-dev_cosine_accuracy": 0.7109375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7119765281677246, "eval_Qnli-dev_cosine_ap": 0.7602990911520537, "eval_Qnli-dev_cosine_f1": 0.7132075471698114, "eval_Qnli-dev_cosine_f1_threshold": 0.6880638003349304, "eval_Qnli-dev_cosine_precision": 0.6428571428571429, "eval_Qnli-dev_cosine_recall": 0.8008474576271186, "eval_Qnli-dev_dot_accuracy": 0.689453125, "eval_Qnli-dev_dot_accuracy_threshold": 325.6799011230469, "eval_Qnli-dev_dot_ap": 0.7217468530564131, "eval_Qnli-dev_dot_f1": 0.6856187290969901, "eval_Qnli-dev_dot_f1_threshold": 274.34625244140625, "eval_Qnli-dev_dot_precision": 0.5662983425414365, "eval_Qnli-dev_dot_recall": 0.8686440677966102, "eval_Qnli-dev_euclidean_accuracy": 0.716796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.682184219360352, "eval_Qnli-dev_euclidean_ap": 0.7659527366732183, "eval_Qnli-dev_euclidean_f1": 0.7175572519083969, "eval_Qnli-dev_euclidean_f1_threshold": 16.30190658569336, "eval_Qnli-dev_euclidean_precision": 0.6527777777777778, "eval_Qnli-dev_euclidean_recall": 0.7966101694915254, "eval_Qnli-dev_manhattan_accuracy": 0.71875, "eval_Qnli-dev_manhattan_accuracy_threshold": 308.93658447265625, "eval_Qnli-dev_manhattan_ap": 0.7675819061657483, "eval_Qnli-dev_manhattan_f1": 0.717391304347826, "eval_Qnli-dev_manhattan_f1_threshold": 351.19854736328125, "eval_Qnli-dev_manhattan_precision": 0.6265822784810127, "eval_Qnli-dev_manhattan_recall": 0.8389830508474576, "eval_Qnli-dev_max_accuracy": 0.71875, "eval_Qnli-dev_max_accuracy_threshold": 325.6799011230469, "eval_Qnli-dev_max_ap": 0.7675819061657483, "eval_Qnli-dev_max_f1": 0.7175572519083969, "eval_Qnli-dev_max_f1_threshold": 351.19854736328125, "eval_Qnli-dev_max_precision": 0.6527777777777778, "eval_Qnli-dev_max_recall": 0.8686440677966102, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8598533868789673, "eval_allNLI-dev_cosine_ap": 0.6381914221277092, "eval_allNLI-dev_cosine_f1": 0.6361556064073227, "eval_allNLI-dev_cosine_f1_threshold": 0.7326873540878296, "eval_allNLI-dev_cosine_precision": 0.5265151515151515, "eval_allNLI-dev_cosine_recall": 0.8034682080924855, "eval_allNLI-dev_dot_accuracy": 0.70703125, "eval_allNLI-dev_dot_accuracy_threshold": 359.8466796875, "eval_allNLI-dev_dot_ap": 0.584074636820295, "eval_allNLI-dev_dot_f1": 0.608695652173913, "eval_allNLI-dev_dot_f1_threshold": 308.1065368652344, "eval_allNLI-dev_dot_precision": 0.5037878787878788, "eval_allNLI-dev_dot_recall": 0.7687861271676301, "eval_allNLI-dev_euclidean_accuracy": 0.744140625, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.918830871582031, "eval_allNLI-dev_euclidean_ap": 0.6483938210018565, "eval_allNLI-dev_euclidean_f1": 0.6467889908256881, "eval_allNLI-dev_euclidean_f1_threshold": 15.164717674255371, "eval_allNLI-dev_euclidean_precision": 0.5361216730038023, "eval_allNLI-dev_euclidean_recall": 0.815028901734104, "eval_allNLI-dev_manhattan_accuracy": 0.7421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 265.463623046875, "eval_allNLI-dev_manhattan_ap": 0.6450488160783302, "eval_allNLI-dev_manhattan_f1": 0.6393088552915768, "eval_allNLI-dev_manhattan_f1_threshold": 328.457763671875, "eval_allNLI-dev_manhattan_precision": 0.5103448275862069, "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, "eval_allNLI-dev_max_accuracy": 0.744140625, "eval_allNLI-dev_max_accuracy_threshold": 359.8466796875, "eval_allNLI-dev_max_ap": 0.6483938210018565, "eval_allNLI-dev_max_f1": 0.6467889908256881, "eval_allNLI-dev_max_f1_threshold": 328.457763671875, "eval_allNLI-dev_max_precision": 0.5361216730038023, "eval_allNLI-dev_max_recall": 0.8554913294797688, "eval_sequential_score": 0.7675819061657483, "eval_sts-test_pearson_cosine": 0.8508923448809838, "eval_sts-test_pearson_dot": 0.8321847179139741, "eval_sts-test_pearson_euclidean": 0.8767776571778518, "eval_sts-test_pearson_manhattan": 0.8726214447799912, "eval_sts-test_pearson_max": 0.8767776571778518, "eval_sts-test_spearman_cosine": 0.8750843790108124, "eval_sts-test_spearman_dot": 0.8275167864862678, "eval_sts-test_spearman_euclidean": 0.8715298758210163, "eval_sts-test_spearman_manhattan": 0.8675941366354268, "eval_sts-test_spearman_max": 0.8750843790108124, "eval_vitaminc-pairs_loss": 3.3888425827026367, "eval_vitaminc-pairs_runtime": 3.2188, "eval_vitaminc-pairs_samples_per_second": 39.766, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 2320 }, { "epoch": 2.386831275720165, "eval_negation-triplets_loss": 0.9331700205802917, "eval_negation-triplets_runtime": 0.7626, "eval_negation-triplets_samples_per_second": 167.851, "eval_negation-triplets_steps_per_second": 1.311, "step": 2320 }, { "epoch": 2.386831275720165, "eval_scitail-pairs-pos_loss": 0.1767732799053192, "eval_scitail-pairs-pos_runtime": 0.9371, "eval_scitail-pairs-pos_samples_per_second": 136.589, "eval_scitail-pairs-pos_steps_per_second": 1.067, "step": 2320 }, { "epoch": 2.386831275720165, "eval_scitail-pairs-qa_loss": 0.0004786076897289604, "eval_scitail-pairs-qa_runtime": 0.6259, "eval_scitail-pairs-qa_samples_per_second": 204.52, "eval_scitail-pairs-qa_steps_per_second": 1.598, "step": 2320 }, { "epoch": 2.386831275720165, "eval_xsum-pairs_loss": 0.24477504193782806, "eval_xsum-pairs_runtime": 3.0559, "eval_xsum-pairs_samples_per_second": 41.886, "eval_xsum-pairs_steps_per_second": 0.327, "step": 2320 }, { "epoch": 2.386831275720165, "eval_sciq_pairs_loss": 0.09893258661031723, "eval_sciq_pairs_runtime": 3.5335, "eval_sciq_pairs_samples_per_second": 36.225, "eval_sciq_pairs_steps_per_second": 0.283, "step": 2320 }, { "epoch": 2.386831275720165, "eval_qasc_pairs_loss": 0.14177586138248444, "eval_qasc_pairs_runtime": 0.6247, "eval_qasc_pairs_samples_per_second": 204.908, "eval_qasc_pairs_steps_per_second": 1.601, "step": 2320 }, { "epoch": 2.386831275720165, "eval_openbookqa_pairs_loss": 0.6997162103652954, "eval_openbookqa_pairs_runtime": 0.6036, "eval_openbookqa_pairs_samples_per_second": 212.059, "eval_openbookqa_pairs_steps_per_second": 1.657, "step": 2320 }, { "epoch": 2.386831275720165, "eval_msmarco_pairs_loss": 0.8556872010231018, "eval_msmarco_pairs_runtime": 1.5328, "eval_msmarco_pairs_samples_per_second": 83.509, "eval_msmarco_pairs_steps_per_second": 0.652, "step": 2320 }, { "epoch": 2.386831275720165, "eval_nq_pairs_loss": 0.6516047120094299, "eval_nq_pairs_runtime": 2.9184, "eval_nq_pairs_samples_per_second": 43.859, "eval_nq_pairs_steps_per_second": 0.343, "step": 2320 }, { "epoch": 2.386831275720165, "eval_trivia_pairs_loss": 0.7871389389038086, "eval_trivia_pairs_runtime": 3.4438, "eval_trivia_pairs_samples_per_second": 37.168, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2320 }, { "epoch": 2.386831275720165, "eval_gooaq_pairs_loss": 0.3521522879600525, "eval_gooaq_pairs_runtime": 0.9594, "eval_gooaq_pairs_samples_per_second": 133.416, "eval_gooaq_pairs_steps_per_second": 1.042, "step": 2320 }, { "epoch": 2.386831275720165, "eval_paws-pos_loss": 0.022935805842280388, "eval_paws-pos_runtime": 0.7123, "eval_paws-pos_samples_per_second": 179.693, "eval_paws-pos_steps_per_second": 1.404, "step": 2320 }, { "epoch": 2.386831275720165, "eval_global_dataset_loss": 0.5043801665306091, "eval_global_dataset_runtime": 13.4067, "eval_global_dataset_samples_per_second": 31.029, "eval_global_dataset_steps_per_second": 0.298, "step": 2320 }, { "epoch": 2.3878600823045266, "grad_norm": 8.141152381896973, "learning_rate": 2.132592240893068e-05, "loss": 0.2414, "step": 2321 }, { "epoch": 2.388888888888889, "grad_norm": 5.903191566467285, "learning_rate": 2.131113440571718e-05, "loss": 0.2045, "step": 2322 }, { "epoch": 2.389917695473251, "grad_norm": 7.989975452423096, "learning_rate": 2.1296349751379535e-05, "loss": 0.3283, "step": 2323 }, { "epoch": 2.3909465020576133, "grad_norm": 9.426039695739746, "learning_rate": 2.1281568470401975e-05, "loss": 0.3372, "step": 2324 }, { "epoch": 2.3919753086419755, "grad_norm": 0.242423877120018, "learning_rate": 2.126679058726313e-05, "loss": 0.0025, "step": 2325 }, { "epoch": 2.3930041152263373, "grad_norm": 4.283985614776611, "learning_rate": 2.1252016126436018e-05, "loss": 0.092, "step": 2326 }, { "epoch": 2.3940329218106995, "grad_norm": 6.385652542114258, "learning_rate": 2.1237245112387977e-05, "loss": 0.2207, "step": 2327 }, { "epoch": 2.3950617283950617, "grad_norm": 10.515926361083984, "learning_rate": 2.122247756958065e-05, "loss": 0.4517, "step": 2328 }, { "epoch": 2.396090534979424, "grad_norm": 10.573301315307617, "learning_rate": 2.1207713522469914e-05, "loss": 0.6023, "step": 2329 }, { "epoch": 2.397119341563786, "grad_norm": 4.354704856872559, "learning_rate": 2.119295299550587e-05, "loss": 0.1475, "step": 2330 }, { "epoch": 2.398148148148148, "grad_norm": 3.8229165077209473, "learning_rate": 2.117819601313279e-05, "loss": 0.0387, "step": 2331 }, { "epoch": 2.39917695473251, "grad_norm": 3.688175916671753, "learning_rate": 2.1163442599789068e-05, "loss": 0.0988, "step": 2332 }, { "epoch": 2.4002057613168724, "grad_norm": 0.6866854429244995, "learning_rate": 2.114869277990719e-05, "loss": 0.0434, "step": 2333 }, { "epoch": 2.4012345679012346, "grad_norm": 6.986307621002197, "learning_rate": 2.1133946577913697e-05, "loss": 0.2121, "step": 2334 }, { "epoch": 2.402263374485597, "grad_norm": 7.146950721740723, "learning_rate": 2.1119204018229125e-05, "loss": 0.2746, "step": 2335 }, { "epoch": 2.403292181069959, "grad_norm": 2.9970691204071045, "learning_rate": 2.1104465125268006e-05, "loss": 0.0735, "step": 2336 }, { "epoch": 2.4043209876543212, "grad_norm": 5.8535966873168945, "learning_rate": 2.108972992343876e-05, "loss": 0.2131, "step": 2337 }, { "epoch": 2.405349794238683, "grad_norm": 0.47091808915138245, "learning_rate": 2.1074998437143725e-05, "loss": 0.0251, "step": 2338 }, { "epoch": 2.406378600823045, "grad_norm": 5.7281599044799805, "learning_rate": 2.1060270690779072e-05, "loss": 0.1822, "step": 2339 }, { "epoch": 2.4074074074074074, "grad_norm": 2.2652881145477295, "learning_rate": 2.1045546708734784e-05, "loss": 0.0657, "step": 2340 }, { "epoch": 2.4074074074074074, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.747207522392273, "eval_Qnli-dev_cosine_ap": 0.7550750129926996, "eval_Qnli-dev_cosine_f1": 0.7150837988826815, "eval_Qnli-dev_cosine_f1_threshold": 0.6943259835243225, "eval_Qnli-dev_cosine_precision": 0.6378737541528239, "eval_Qnli-dev_cosine_recall": 0.8135593220338984, "eval_Qnli-dev_dot_accuracy": 0.693359375, "eval_Qnli-dev_dot_accuracy_threshold": 330.9173583984375, "eval_Qnli-dev_dot_ap": 0.7157403439240755, "eval_Qnli-dev_dot_f1": 0.6846543001686342, "eval_Qnli-dev_dot_f1_threshold": 279.15374755859375, "eval_Qnli-dev_dot_precision": 0.5686274509803921, "eval_Qnli-dev_dot_recall": 0.8601694915254238, "eval_Qnli-dev_euclidean_accuracy": 0.71875, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.921449661254883, "eval_Qnli-dev_euclidean_ap": 0.7598621951557832, "eval_Qnli-dev_euclidean_f1": 0.71875, "eval_Qnli-dev_euclidean_f1_threshold": 15.921449661254883, "eval_Qnli-dev_euclidean_precision": 0.6666666666666666, "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, "eval_Qnli-dev_manhattan_accuracy": 0.720703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 304.71258544921875, "eval_Qnli-dev_manhattan_ap": 0.76479265959043, "eval_Qnli-dev_manhattan_f1": 0.7161904761904763, "eval_Qnli-dev_manhattan_f1_threshold": 337.3750915527344, "eval_Qnli-dev_manhattan_precision": 0.6505190311418685, "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, "eval_Qnli-dev_max_accuracy": 0.720703125, "eval_Qnli-dev_max_accuracy_threshold": 330.9173583984375, "eval_Qnli-dev_max_ap": 0.76479265959043, "eval_Qnli-dev_max_f1": 0.71875, "eval_Qnli-dev_max_f1_threshold": 337.3750915527344, "eval_Qnli-dev_max_precision": 0.6666666666666666, "eval_Qnli-dev_max_recall": 0.8601694915254238, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8150783777236938, "eval_allNLI-dev_cosine_ap": 0.6403760642877065, "eval_allNLI-dev_cosine_f1": 0.6407766990291262, "eval_allNLI-dev_cosine_f1_threshold": 0.7527122497558594, "eval_allNLI-dev_cosine_precision": 0.5523012552301255, "eval_allNLI-dev_cosine_recall": 0.7630057803468208, "eval_allNLI-dev_dot_accuracy": 0.70703125, "eval_allNLI-dev_dot_accuracy_threshold": 378.8970947265625, "eval_allNLI-dev_dot_ap": 0.5821239761612493, "eval_allNLI-dev_dot_f1": 0.6058091286307054, "eval_allNLI-dev_dot_f1_threshold": 294.75982666015625, "eval_allNLI-dev_dot_precision": 0.47249190938511326, "eval_allNLI-dev_dot_recall": 0.8439306358381503, "eval_allNLI-dev_euclidean_accuracy": 0.7421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.004294395446777, "eval_allNLI-dev_euclidean_ap": 0.6506164162252022, "eval_allNLI-dev_euclidean_f1": 0.6469248291571754, "eval_allNLI-dev_euclidean_f1_threshold": 15.118417739868164, "eval_allNLI-dev_euclidean_precision": 0.5338345864661654, "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, "eval_allNLI-dev_manhattan_accuracy": 0.740234375, "eval_allNLI-dev_manhattan_accuracy_threshold": 263.478515625, "eval_allNLI-dev_manhattan_ap": 0.6493825145479308, "eval_allNLI-dev_manhattan_f1": 0.6433260393873084, "eval_allNLI-dev_manhattan_f1_threshold": 325.490234375, "eval_allNLI-dev_manhattan_precision": 0.5176056338028169, "eval_allNLI-dev_manhattan_recall": 0.8497109826589595, "eval_allNLI-dev_max_accuracy": 0.7421875, "eval_allNLI-dev_max_accuracy_threshold": 378.8970947265625, "eval_allNLI-dev_max_ap": 0.6506164162252022, "eval_allNLI-dev_max_f1": 0.6469248291571754, "eval_allNLI-dev_max_f1_threshold": 325.490234375, "eval_allNLI-dev_max_precision": 0.5523012552301255, "eval_allNLI-dev_max_recall": 0.8497109826589595, "eval_sequential_score": 0.76479265959043, "eval_sts-test_pearson_cosine": 0.8505505641571589, "eval_sts-test_pearson_dot": 0.8319891412078086, "eval_sts-test_pearson_euclidean": 0.8756369096783705, "eval_sts-test_pearson_manhattan": 0.8719660063964456, "eval_sts-test_pearson_max": 0.8756369096783705, "eval_sts-test_spearman_cosine": 0.8744222393472654, "eval_sts-test_spearman_dot": 0.8277676620576281, "eval_sts-test_spearman_euclidean": 0.8700285157525961, "eval_sts-test_spearman_manhattan": 0.8667942078380557, "eval_sts-test_spearman_max": 0.8744222393472654, "eval_vitaminc-pairs_loss": 3.2281875610351562, "eval_vitaminc-pairs_runtime": 3.3027, "eval_vitaminc-pairs_samples_per_second": 38.756, "eval_vitaminc-pairs_steps_per_second": 0.303, "step": 2340 }, { "epoch": 2.4074074074074074, "eval_negation-triplets_loss": 0.9330846667289734, "eval_negation-triplets_runtime": 0.8036, "eval_negation-triplets_samples_per_second": 159.289, "eval_negation-triplets_steps_per_second": 1.244, "step": 2340 }, { "epoch": 2.4074074074074074, "eval_scitail-pairs-pos_loss": 0.15138240158557892, "eval_scitail-pairs-pos_runtime": 1.0164, "eval_scitail-pairs-pos_samples_per_second": 125.938, "eval_scitail-pairs-pos_steps_per_second": 0.984, "step": 2340 }, { "epoch": 2.4074074074074074, "eval_scitail-pairs-qa_loss": 0.0004746115591842681, "eval_scitail-pairs-qa_runtime": 0.682, "eval_scitail-pairs-qa_samples_per_second": 187.679, "eval_scitail-pairs-qa_steps_per_second": 1.466, "step": 2340 }, { "epoch": 2.4074074074074074, "eval_xsum-pairs_loss": 0.24052901566028595, "eval_xsum-pairs_runtime": 3.058, "eval_xsum-pairs_samples_per_second": 41.857, "eval_xsum-pairs_steps_per_second": 0.327, "step": 2340 }, { "epoch": 2.4074074074074074, "eval_sciq_pairs_loss": 0.09549580514431, "eval_sciq_pairs_runtime": 3.5902, "eval_sciq_pairs_samples_per_second": 35.653, "eval_sciq_pairs_steps_per_second": 0.279, "step": 2340 }, { "epoch": 2.4074074074074074, "eval_qasc_pairs_loss": 0.13644863665103912, "eval_qasc_pairs_runtime": 0.6343, "eval_qasc_pairs_samples_per_second": 201.796, "eval_qasc_pairs_steps_per_second": 1.577, "step": 2340 }, { "epoch": 2.4074074074074074, "eval_openbookqa_pairs_loss": 0.7199987769126892, "eval_openbookqa_pairs_runtime": 0.6065, "eval_openbookqa_pairs_samples_per_second": 211.057, "eval_openbookqa_pairs_steps_per_second": 1.649, "step": 2340 }, { "epoch": 2.4074074074074074, "eval_msmarco_pairs_loss": 0.8501226902008057, "eval_msmarco_pairs_runtime": 1.54, "eval_msmarco_pairs_samples_per_second": 83.115, "eval_msmarco_pairs_steps_per_second": 0.649, "step": 2340 }, { "epoch": 2.4074074074074074, "eval_nq_pairs_loss": 0.6119047999382019, "eval_nq_pairs_runtime": 2.9156, "eval_nq_pairs_samples_per_second": 43.902, "eval_nq_pairs_steps_per_second": 0.343, "step": 2340 }, { "epoch": 2.4074074074074074, "eval_trivia_pairs_loss": 0.8336032629013062, "eval_trivia_pairs_runtime": 3.4539, "eval_trivia_pairs_samples_per_second": 37.06, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2340 }, { "epoch": 2.4074074074074074, "eval_gooaq_pairs_loss": 0.3352070450782776, "eval_gooaq_pairs_runtime": 0.9839, "eval_gooaq_pairs_samples_per_second": 130.091, "eval_gooaq_pairs_steps_per_second": 1.016, "step": 2340 }, { "epoch": 2.4074074074074074, "eval_paws-pos_loss": 0.022952904924750328, "eval_paws-pos_runtime": 0.7263, "eval_paws-pos_samples_per_second": 176.236, "eval_paws-pos_steps_per_second": 1.377, "step": 2340 }, { "epoch": 2.4074074074074074, "eval_global_dataset_loss": 0.4841943085193634, "eval_global_dataset_runtime": 13.5471, "eval_global_dataset_samples_per_second": 30.708, "eval_global_dataset_steps_per_second": 0.295, "step": 2340 }, { "epoch": 2.4084362139917697, "grad_norm": 8.437246322631836, "learning_rate": 2.103082651539461e-05, "loss": 0.3478, "step": 2341 }, { "epoch": 2.409465020576132, "grad_norm": 6.477084159851074, "learning_rate": 2.1016110135136024e-05, "loss": 0.4349, "step": 2342 }, { "epoch": 2.4104938271604937, "grad_norm": 10.612602233886719, "learning_rate": 2.1001397592330188e-05, "loss": 0.527, "step": 2343 }, { "epoch": 2.411522633744856, "grad_norm": 2.255913019180298, "learning_rate": 2.0986688911341888e-05, "loss": 0.0234, "step": 2344 }, { "epoch": 2.412551440329218, "grad_norm": 6.696039199829102, "learning_rate": 2.0971984116529553e-05, "loss": 0.3944, "step": 2345 }, { "epoch": 2.4135802469135803, "grad_norm": 3.830223798751831, "learning_rate": 2.0957283232245142e-05, "loss": 0.167, "step": 2346 }, { "epoch": 2.4146090534979425, "grad_norm": 5.635642051696777, "learning_rate": 2.094258628283416e-05, "loss": 0.2363, "step": 2347 }, { "epoch": 2.4156378600823043, "grad_norm": 0.8433836102485657, "learning_rate": 2.092789329263558e-05, "loss": 0.0189, "step": 2348 }, { "epoch": 2.4166666666666665, "grad_norm": 5.297120094299316, "learning_rate": 2.091320428598183e-05, "loss": 0.1442, "step": 2349 }, { "epoch": 2.4176954732510287, "grad_norm": 8.437000274658203, "learning_rate": 2.0898519287198733e-05, "loss": 0.2991, "step": 2350 }, { "epoch": 2.418724279835391, "grad_norm": 11.086387634277344, "learning_rate": 2.088383832060548e-05, "loss": 0.596, "step": 2351 }, { "epoch": 2.419753086419753, "grad_norm": 3.563920497894287, "learning_rate": 2.086916141051458e-05, "loss": 0.0736, "step": 2352 }, { "epoch": 2.4207818930041154, "grad_norm": 4.009937286376953, "learning_rate": 2.0854488581231832e-05, "loss": 0.072, "step": 2353 }, { "epoch": 2.4218106995884776, "grad_norm": 3.8591458797454834, "learning_rate": 2.0839819857056263e-05, "loss": 0.0782, "step": 2354 }, { "epoch": 2.4228395061728394, "grad_norm": 8.588478088378906, "learning_rate": 2.082515526228012e-05, "loss": 0.4359, "step": 2355 }, { "epoch": 2.4238683127572016, "grad_norm": 5.57858943939209, "learning_rate": 2.0810494821188793e-05, "loss": 0.153, "step": 2356 }, { "epoch": 2.424897119341564, "grad_norm": 6.5189690589904785, "learning_rate": 2.0795838558060812e-05, "loss": 0.1654, "step": 2357 }, { "epoch": 2.425925925925926, "grad_norm": 5.958938121795654, "learning_rate": 2.0781186497167762e-05, "loss": 0.1721, "step": 2358 }, { "epoch": 2.4269547325102883, "grad_norm": 8.767863273620605, "learning_rate": 2.0766538662774305e-05, "loss": 0.4326, "step": 2359 }, { "epoch": 2.42798353909465, "grad_norm": 8.790294647216797, "learning_rate": 2.075189507913807e-05, "loss": 0.424, "step": 2360 }, { "epoch": 2.42798353909465, "eval_Qnli-dev_cosine_accuracy": 0.71875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7381449937820435, "eval_Qnli-dev_cosine_ap": 0.7455820885957152, "eval_Qnli-dev_cosine_f1": 0.7046632124352332, "eval_Qnli-dev_cosine_f1_threshold": 0.6711353063583374, "eval_Qnli-dev_cosine_precision": 0.5947521865889213, "eval_Qnli-dev_cosine_recall": 0.864406779661017, "eval_Qnli-dev_dot_accuracy": 0.673828125, "eval_Qnli-dev_dot_accuracy_threshold": 344.9980163574219, "eval_Qnli-dev_dot_ap": 0.6941399390575902, "eval_Qnli-dev_dot_f1": 0.6766169154228855, "eval_Qnli-dev_dot_f1_threshold": 286.1156005859375, "eval_Qnli-dev_dot_precision": 0.555858310626703, "eval_Qnli-dev_dot_recall": 0.864406779661017, "eval_Qnli-dev_euclidean_accuracy": 0.71484375, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.076042175292969, "eval_Qnli-dev_euclidean_ap": 0.7541186023300442, "eval_Qnli-dev_euclidean_f1": 0.7079303675048356, "eval_Qnli-dev_euclidean_f1_threshold": 15.928156852722168, "eval_Qnli-dev_euclidean_precision": 0.6512455516014235, "eval_Qnli-dev_euclidean_recall": 0.7754237288135594, "eval_Qnli-dev_manhattan_accuracy": 0.712890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 306.5092468261719, "eval_Qnli-dev_manhattan_ap": 0.759303239951657, "eval_Qnli-dev_manhattan_f1": 0.7153284671532847, "eval_Qnli-dev_manhattan_f1_threshold": 344.607421875, "eval_Qnli-dev_manhattan_precision": 0.6282051282051282, "eval_Qnli-dev_manhattan_recall": 0.8305084745762712, "eval_Qnli-dev_max_accuracy": 0.71875, "eval_Qnli-dev_max_accuracy_threshold": 344.9980163574219, "eval_Qnli-dev_max_ap": 0.759303239951657, "eval_Qnli-dev_max_f1": 0.7153284671532847, "eval_Qnli-dev_max_f1_threshold": 344.607421875, "eval_Qnli-dev_max_precision": 0.6512455516014235, "eval_Qnli-dev_max_recall": 0.864406779661017, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.7885736227035522, "eval_allNLI-dev_cosine_ap": 0.6295434410984027, "eval_allNLI-dev_cosine_f1": 0.6414253897550111, "eval_allNLI-dev_cosine_f1_threshold": 0.7364753484725952, "eval_allNLI-dev_cosine_precision": 0.5217391304347826, "eval_allNLI-dev_cosine_recall": 0.8323699421965318, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 366.090576171875, "eval_allNLI-dev_dot_ap": 0.5672938930377167, "eval_allNLI-dev_dot_f1": 0.6017699115044248, "eval_allNLI-dev_dot_f1_threshold": 315.785888671875, "eval_allNLI-dev_dot_precision": 0.4874551971326165, "eval_allNLI-dev_dot_recall": 0.7861271676300579, "eval_allNLI-dev_euclidean_accuracy": 0.7421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.185325622558594, "eval_allNLI-dev_euclidean_ap": 0.6386797664225503, "eval_allNLI-dev_euclidean_f1": 0.6442307692307693, "eval_allNLI-dev_euclidean_f1_threshold": 14.415718078613281, "eval_allNLI-dev_euclidean_precision": 0.551440329218107, "eval_allNLI-dev_euclidean_recall": 0.7745664739884393, "eval_allNLI-dev_manhattan_accuracy": 0.736328125, "eval_allNLI-dev_manhattan_accuracy_threshold": 266.48516845703125, "eval_allNLI-dev_manhattan_ap": 0.6373586973705275, "eval_allNLI-dev_manhattan_f1": 0.6425120772946861, "eval_allNLI-dev_manhattan_f1_threshold": 305.5716247558594, "eval_allNLI-dev_manhattan_precision": 0.5518672199170125, "eval_allNLI-dev_manhattan_recall": 0.7687861271676301, "eval_allNLI-dev_max_accuracy": 0.7421875, "eval_allNLI-dev_max_accuracy_threshold": 366.090576171875, "eval_allNLI-dev_max_ap": 0.6386797664225503, "eval_allNLI-dev_max_f1": 0.6442307692307693, "eval_allNLI-dev_max_f1_threshold": 315.785888671875, "eval_allNLI-dev_max_precision": 0.5518672199170125, "eval_allNLI-dev_max_recall": 0.8323699421965318, "eval_sequential_score": 0.759303239951657, "eval_sts-test_pearson_cosine": 0.8490502945589666, "eval_sts-test_pearson_dot": 0.8297688891502646, "eval_sts-test_pearson_euclidean": 0.8768496189760543, "eval_sts-test_pearson_manhattan": 0.8733387541973802, "eval_sts-test_pearson_max": 0.8768496189760543, "eval_sts-test_spearman_cosine": 0.8766096810040769, "eval_sts-test_spearman_dot": 0.8247449828582074, "eval_sts-test_spearman_euclidean": 0.8736062448470544, "eval_sts-test_spearman_manhattan": 0.8709237647367312, "eval_sts-test_spearman_max": 0.8766096810040769, "eval_vitaminc-pairs_loss": 3.183577537536621, "eval_vitaminc-pairs_runtime": 3.2485, "eval_vitaminc-pairs_samples_per_second": 39.403, "eval_vitaminc-pairs_steps_per_second": 0.308, "step": 2360 }, { "epoch": 2.42798353909465, "eval_negation-triplets_loss": 0.9375492334365845, "eval_negation-triplets_runtime": 0.7707, "eval_negation-triplets_samples_per_second": 166.088, "eval_negation-triplets_steps_per_second": 1.298, "step": 2360 }, { "epoch": 2.42798353909465, "eval_scitail-pairs-pos_loss": 0.1525314599275589, "eval_scitail-pairs-pos_runtime": 0.9182, "eval_scitail-pairs-pos_samples_per_second": 139.397, "eval_scitail-pairs-pos_steps_per_second": 1.089, "step": 2360 }, { "epoch": 2.42798353909465, "eval_scitail-pairs-qa_loss": 0.0005260963807813823, "eval_scitail-pairs-qa_runtime": 0.6088, "eval_scitail-pairs-qa_samples_per_second": 210.247, "eval_scitail-pairs-qa_steps_per_second": 1.643, "step": 2360 }, { "epoch": 2.42798353909465, "eval_xsum-pairs_loss": 0.27072906494140625, "eval_xsum-pairs_runtime": 3.0331, "eval_xsum-pairs_samples_per_second": 42.201, "eval_xsum-pairs_steps_per_second": 0.33, "step": 2360 }, { "epoch": 2.42798353909465, "eval_sciq_pairs_loss": 0.08968310803174973, "eval_sciq_pairs_runtime": 3.5294, "eval_sciq_pairs_samples_per_second": 36.267, "eval_sciq_pairs_steps_per_second": 0.283, "step": 2360 }, { "epoch": 2.42798353909465, "eval_qasc_pairs_loss": 0.14335228502750397, "eval_qasc_pairs_runtime": 0.6387, "eval_qasc_pairs_samples_per_second": 200.422, "eval_qasc_pairs_steps_per_second": 1.566, "step": 2360 }, { "epoch": 2.42798353909465, "eval_openbookqa_pairs_loss": 0.7194057106971741, "eval_openbookqa_pairs_runtime": 0.6049, "eval_openbookqa_pairs_samples_per_second": 211.617, "eval_openbookqa_pairs_steps_per_second": 1.653, "step": 2360 }, { "epoch": 2.42798353909465, "eval_msmarco_pairs_loss": 0.7818937301635742, "eval_msmarco_pairs_runtime": 1.533, "eval_msmarco_pairs_samples_per_second": 83.498, "eval_msmarco_pairs_steps_per_second": 0.652, "step": 2360 }, { "epoch": 2.42798353909465, "eval_nq_pairs_loss": 0.6466493606567383, "eval_nq_pairs_runtime": 2.9087, "eval_nq_pairs_samples_per_second": 44.006, "eval_nq_pairs_steps_per_second": 0.344, "step": 2360 }, { "epoch": 2.42798353909465, "eval_trivia_pairs_loss": 0.8176050782203674, "eval_trivia_pairs_runtime": 3.4468, "eval_trivia_pairs_samples_per_second": 37.136, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2360 }, { "epoch": 2.42798353909465, "eval_gooaq_pairs_loss": 0.33769553899765015, "eval_gooaq_pairs_runtime": 0.9622, "eval_gooaq_pairs_samples_per_second": 133.033, "eval_gooaq_pairs_steps_per_second": 1.039, "step": 2360 }, { "epoch": 2.42798353909465, "eval_paws-pos_loss": 0.022663207724690437, "eval_paws-pos_runtime": 0.7105, "eval_paws-pos_samples_per_second": 180.154, "eval_paws-pos_steps_per_second": 1.407, "step": 2360 }, { "epoch": 2.42798353909465, "eval_global_dataset_loss": 0.4742085337638855, "eval_global_dataset_runtime": 13.4203, "eval_global_dataset_samples_per_second": 30.998, "eval_global_dataset_steps_per_second": 0.298, "step": 2360 }, { "epoch": 2.4290123456790123, "grad_norm": 0.2542645335197449, "learning_rate": 2.0737255770509666e-05, "loss": 0.0039, "step": 2361 }, { "epoch": 2.4300411522633745, "grad_norm": 4.680812835693359, "learning_rate": 2.0722620761132614e-05, "loss": 0.1155, "step": 2362 }, { "epoch": 2.4310699588477367, "grad_norm": 0.015261419117450714, "learning_rate": 2.070799007524332e-05, "loss": 0.0002, "step": 2363 }, { "epoch": 2.432098765432099, "grad_norm": 6.4781174659729, "learning_rate": 2.0693363737071018e-05, "loss": 0.1702, "step": 2364 }, { "epoch": 2.4331275720164607, "grad_norm": 3.860341787338257, "learning_rate": 2.0678741770837764e-05, "loss": 0.0731, "step": 2365 }, { "epoch": 2.434156378600823, "grad_norm": 5.25972318649292, "learning_rate": 2.0664124200758362e-05, "loss": 0.1017, "step": 2366 }, { "epoch": 2.435185185185185, "grad_norm": 8.531905174255371, "learning_rate": 2.0649511051040328e-05, "loss": 0.3434, "step": 2367 }, { "epoch": 2.4362139917695473, "grad_norm": 0.18288291990756989, "learning_rate": 2.0634902345883864e-05, "loss": 0.0026, "step": 2368 }, { "epoch": 2.4372427983539096, "grad_norm": 4.627861976623535, "learning_rate": 2.0620298109481818e-05, "loss": 0.1196, "step": 2369 }, { "epoch": 2.4382716049382718, "grad_norm": 5.738020420074463, "learning_rate": 2.060569836601963e-05, "loss": 0.2487, "step": 2370 }, { "epoch": 2.439300411522634, "grad_norm": 9.200518608093262, "learning_rate": 2.0591103139675304e-05, "loss": 0.4018, "step": 2371 }, { "epoch": 2.4403292181069958, "grad_norm": 4.573552131652832, "learning_rate": 2.0576512454619352e-05, "loss": 0.09, "step": 2372 }, { "epoch": 2.441358024691358, "grad_norm": 15.333046913146973, "learning_rate": 2.0561926335014787e-05, "loss": 1.9889, "step": 2373 }, { "epoch": 2.44238683127572, "grad_norm": 4.621583461761475, "learning_rate": 2.0547344805017036e-05, "loss": 0.1348, "step": 2374 }, { "epoch": 2.4434156378600824, "grad_norm": 4.141235828399658, "learning_rate": 2.0532767888773946e-05, "loss": 0.0876, "step": 2375 }, { "epoch": 2.4444444444444446, "grad_norm": 0.08058172464370728, "learning_rate": 2.0518195610425714e-05, "loss": 0.0009, "step": 2376 }, { "epoch": 2.4454732510288064, "grad_norm": 11.431347846984863, "learning_rate": 2.050362799410485e-05, "loss": 0.4829, "step": 2377 }, { "epoch": 2.4465020576131686, "grad_norm": 6.493156909942627, "learning_rate": 2.048906506393616e-05, "loss": 0.1801, "step": 2378 }, { "epoch": 2.447530864197531, "grad_norm": 11.291666030883789, "learning_rate": 2.0474506844036678e-05, "loss": 0.5785, "step": 2379 }, { "epoch": 2.448559670781893, "grad_norm": 4.573147773742676, "learning_rate": 2.045995335851564e-05, "loss": 0.0832, "step": 2380 }, { "epoch": 2.448559670781893, "eval_Qnli-dev_cosine_accuracy": 0.708984375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7453248500823975, "eval_Qnli-dev_cosine_ap": 0.7515482666783928, "eval_Qnli-dev_cosine_f1": 0.7071428571428572, "eval_Qnli-dev_cosine_f1_threshold": 0.6855937242507935, "eval_Qnli-dev_cosine_precision": 0.6111111111111112, "eval_Qnli-dev_cosine_recall": 0.8389830508474576, "eval_Qnli-dev_dot_accuracy": 0.685546875, "eval_Qnli-dev_dot_accuracy_threshold": 351.0618591308594, "eval_Qnli-dev_dot_ap": 0.6940425266526606, "eval_Qnli-dev_dot_f1": 0.6785714285714286, "eval_Qnli-dev_dot_f1_threshold": 282.13201904296875, "eval_Qnli-dev_dot_precision": 0.55, "eval_Qnli-dev_dot_recall": 0.885593220338983, "eval_Qnli-dev_euclidean_accuracy": 0.7109375, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.241870880126953, "eval_Qnli-dev_euclidean_ap": 0.7581399600026446, "eval_Qnli-dev_euclidean_f1": 0.7093235831809872, "eval_Qnli-dev_euclidean_f1_threshold": 16.369251251220703, "eval_Qnli-dev_euclidean_precision": 0.6237942122186495, "eval_Qnli-dev_euclidean_recall": 0.8220338983050848, "eval_Qnli-dev_manhattan_accuracy": 0.712890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 309.08245849609375, "eval_Qnli-dev_manhattan_ap": 0.7642020259418071, "eval_Qnli-dev_manhattan_f1": 0.7108655616942909, "eval_Qnli-dev_manhattan_f1_threshold": 342.5469970703125, "eval_Qnli-dev_manhattan_precision": 0.6286644951140065, "eval_Qnli-dev_manhattan_recall": 0.8177966101694916, "eval_Qnli-dev_max_accuracy": 0.712890625, "eval_Qnli-dev_max_accuracy_threshold": 351.0618591308594, "eval_Qnli-dev_max_ap": 0.7642020259418071, "eval_Qnli-dev_max_f1": 0.7108655616942909, "eval_Qnli-dev_max_f1_threshold": 342.5469970703125, "eval_Qnli-dev_max_precision": 0.6286644951140065, "eval_Qnli-dev_max_recall": 0.885593220338983, "eval_allNLI-dev_cosine_accuracy": 0.732421875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.788677990436554, "eval_allNLI-dev_cosine_ap": 0.6268711298115903, "eval_allNLI-dev_cosine_f1": 0.6441441441441441, "eval_allNLI-dev_cosine_f1_threshold": 0.7389147281646729, "eval_allNLI-dev_cosine_precision": 0.5276752767527675, "eval_allNLI-dev_cosine_recall": 0.8265895953757225, "eval_allNLI-dev_dot_accuracy": 0.69921875, "eval_allNLI-dev_dot_accuracy_threshold": 370.53497314453125, "eval_allNLI-dev_dot_ap": 0.5657282206617683, "eval_allNLI-dev_dot_f1": 0.6053811659192826, "eval_allNLI-dev_dot_f1_threshold": 311.272705078125, "eval_allNLI-dev_dot_precision": 0.4945054945054945, "eval_allNLI-dev_dot_recall": 0.7803468208092486, "eval_allNLI-dev_euclidean_accuracy": 0.740234375, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.105224609375, "eval_allNLI-dev_euclidean_ap": 0.635176479184964, "eval_allNLI-dev_euclidean_f1": 0.6452991452991454, "eval_allNLI-dev_euclidean_f1_threshold": 15.597915649414062, "eval_allNLI-dev_euclidean_precision": 0.511864406779661, "eval_allNLI-dev_euclidean_recall": 0.8728323699421965, "eval_allNLI-dev_manhattan_accuracy": 0.73828125, "eval_allNLI-dev_manhattan_accuracy_threshold": 266.0863037109375, "eval_allNLI-dev_manhattan_ap": 0.6328374451791714, "eval_allNLI-dev_manhattan_f1": 0.6428571428571429, "eval_allNLI-dev_manhattan_f1_threshold": 317.72406005859375, "eval_allNLI-dev_manhattan_precision": 0.5236363636363637, "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, "eval_allNLI-dev_max_accuracy": 0.740234375, "eval_allNLI-dev_max_accuracy_threshold": 370.53497314453125, "eval_allNLI-dev_max_ap": 0.635176479184964, "eval_allNLI-dev_max_f1": 0.6452991452991454, "eval_allNLI-dev_max_f1_threshold": 317.72406005859375, "eval_allNLI-dev_max_precision": 0.5276752767527675, "eval_allNLI-dev_max_recall": 0.8728323699421965, "eval_sequential_score": 0.7642020259418071, "eval_sts-test_pearson_cosine": 0.8453833948407055, "eval_sts-test_pearson_dot": 0.8237173239473772, "eval_sts-test_pearson_euclidean": 0.8742859955629088, "eval_sts-test_pearson_manhattan": 0.8708241682671891, "eval_sts-test_pearson_max": 0.8742859955629088, "eval_sts-test_spearman_cosine": 0.8745370263327341, "eval_sts-test_spearman_dot": 0.8180517156969415, "eval_sts-test_spearman_euclidean": 0.8719163924656539, "eval_sts-test_spearman_manhattan": 0.8684305377108321, "eval_sts-test_spearman_max": 0.8745370263327341, "eval_vitaminc-pairs_loss": 3.2834718227386475, "eval_vitaminc-pairs_runtime": 3.2558, "eval_vitaminc-pairs_samples_per_second": 39.315, "eval_vitaminc-pairs_steps_per_second": 0.307, "step": 2380 }, { "epoch": 2.448559670781893, "eval_negation-triplets_loss": 0.9011225700378418, "eval_negation-triplets_runtime": 0.7828, "eval_negation-triplets_samples_per_second": 163.52, "eval_negation-triplets_steps_per_second": 1.278, "step": 2380 }, { "epoch": 2.448559670781893, "eval_scitail-pairs-pos_loss": 0.14622709155082703, "eval_scitail-pairs-pos_runtime": 0.954, "eval_scitail-pairs-pos_samples_per_second": 134.173, "eval_scitail-pairs-pos_steps_per_second": 1.048, "step": 2380 }, { "epoch": 2.448559670781893, "eval_scitail-pairs-qa_loss": 0.0003964989446103573, "eval_scitail-pairs-qa_runtime": 0.6281, "eval_scitail-pairs-qa_samples_per_second": 203.779, "eval_scitail-pairs-qa_steps_per_second": 1.592, "step": 2380 }, { "epoch": 2.448559670781893, "eval_xsum-pairs_loss": 0.25987619161605835, "eval_xsum-pairs_runtime": 3.0394, "eval_xsum-pairs_samples_per_second": 42.114, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2380 }, { "epoch": 2.448559670781893, "eval_sciq_pairs_loss": 0.09321531653404236, "eval_sciq_pairs_runtime": 3.5312, "eval_sciq_pairs_samples_per_second": 36.249, "eval_sciq_pairs_steps_per_second": 0.283, "step": 2380 }, { "epoch": 2.448559670781893, "eval_qasc_pairs_loss": 0.14463794231414795, "eval_qasc_pairs_runtime": 0.627, "eval_qasc_pairs_samples_per_second": 204.134, "eval_qasc_pairs_steps_per_second": 1.595, "step": 2380 }, { "epoch": 2.448559670781893, "eval_openbookqa_pairs_loss": 0.6942688822746277, "eval_openbookqa_pairs_runtime": 0.6113, "eval_openbookqa_pairs_samples_per_second": 209.385, "eval_openbookqa_pairs_steps_per_second": 1.636, "step": 2380 }, { "epoch": 2.448559670781893, "eval_msmarco_pairs_loss": 0.7531794905662537, "eval_msmarco_pairs_runtime": 1.5318, "eval_msmarco_pairs_samples_per_second": 83.561, "eval_msmarco_pairs_steps_per_second": 0.653, "step": 2380 }, { "epoch": 2.448559670781893, "eval_nq_pairs_loss": 0.6968003511428833, "eval_nq_pairs_runtime": 2.9055, "eval_nq_pairs_samples_per_second": 44.055, "eval_nq_pairs_steps_per_second": 0.344, "step": 2380 }, { "epoch": 2.448559670781893, "eval_trivia_pairs_loss": 0.7705097198486328, "eval_trivia_pairs_runtime": 3.4531, "eval_trivia_pairs_samples_per_second": 37.068, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2380 }, { "epoch": 2.448559670781893, "eval_gooaq_pairs_loss": 0.3276014029979706, "eval_gooaq_pairs_runtime": 0.9593, "eval_gooaq_pairs_samples_per_second": 133.432, "eval_gooaq_pairs_steps_per_second": 1.042, "step": 2380 }, { "epoch": 2.448559670781893, "eval_paws-pos_loss": 0.02253994718194008, "eval_paws-pos_runtime": 0.7171, "eval_paws-pos_samples_per_second": 178.501, "eval_paws-pos_steps_per_second": 1.395, "step": 2380 }, { "epoch": 2.448559670781893, "eval_global_dataset_loss": 0.47114360332489014, "eval_global_dataset_runtime": 13.4412, "eval_global_dataset_samples_per_second": 30.95, "eval_global_dataset_steps_per_second": 0.298, "step": 2380 }, { "epoch": 2.4495884773662553, "grad_norm": 14.15799331665039, "learning_rate": 2.0445404631474436e-05, "loss": 1.8056, "step": 2381 }, { "epoch": 2.450617283950617, "grad_norm": 7.68344783782959, "learning_rate": 2.0430860687006592e-05, "loss": 0.3692, "step": 2382 }, { "epoch": 2.4516460905349793, "grad_norm": 9.818589210510254, "learning_rate": 2.0416321549197695e-05, "loss": 0.5529, "step": 2383 }, { "epoch": 2.4526748971193415, "grad_norm": 3.900679588317871, "learning_rate": 2.0401787242125384e-05, "loss": 0.0598, "step": 2384 }, { "epoch": 2.4537037037037037, "grad_norm": 6.681536674499512, "learning_rate": 2.0387257789859287e-05, "loss": 0.2639, "step": 2385 }, { "epoch": 2.454732510288066, "grad_norm": 8.679218292236328, "learning_rate": 2.0372733216461014e-05, "loss": 0.2728, "step": 2386 }, { "epoch": 2.455761316872428, "grad_norm": 4.3105149269104, "learning_rate": 2.0358213545984065e-05, "loss": 0.1657, "step": 2387 }, { "epoch": 2.45679012345679, "grad_norm": 2.6358437538146973, "learning_rate": 2.034369880247384e-05, "loss": 0.1206, "step": 2388 }, { "epoch": 2.457818930041152, "grad_norm": 8.414362907409668, "learning_rate": 2.032918900996758e-05, "loss": 0.5121, "step": 2389 }, { "epoch": 2.4588477366255144, "grad_norm": 5.507661819458008, "learning_rate": 2.0314684192494322e-05, "loss": 0.1428, "step": 2390 }, { "epoch": 2.4598765432098766, "grad_norm": 8.246047973632812, "learning_rate": 2.030018437407486e-05, "loss": 0.4525, "step": 2391 }, { "epoch": 2.460905349794239, "grad_norm": 0.056204650551080704, "learning_rate": 2.028568957872172e-05, "loss": 0.0008, "step": 2392 }, { "epoch": 2.461934156378601, "grad_norm": 3.879879951477051, "learning_rate": 2.0271199830439098e-05, "loss": 0.0755, "step": 2393 }, { "epoch": 2.462962962962963, "grad_norm": 8.25973129272461, "learning_rate": 2.025671515322284e-05, "loss": 0.4055, "step": 2394 }, { "epoch": 2.463991769547325, "grad_norm": 8.683910369873047, "learning_rate": 2.0242235571060384e-05, "loss": 0.3136, "step": 2395 }, { "epoch": 2.4650205761316872, "grad_norm": 3.877725124359131, "learning_rate": 2.0227761107930747e-05, "loss": 0.051, "step": 2396 }, { "epoch": 2.4660493827160495, "grad_norm": 3.031614303588867, "learning_rate": 2.0213291787804453e-05, "loss": 0.0551, "step": 2397 }, { "epoch": 2.4670781893004117, "grad_norm": 3.2729036808013916, "learning_rate": 2.0198827634643523e-05, "loss": 0.0787, "step": 2398 }, { "epoch": 2.4681069958847734, "grad_norm": 5.738383769989014, "learning_rate": 2.01843686724014e-05, "loss": 0.1634, "step": 2399 }, { "epoch": 2.4691358024691357, "grad_norm": 0.38787469267845154, "learning_rate": 2.016991492502296e-05, "loss": 0.0065, "step": 2400 }, { "epoch": 2.4691358024691357, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.750226616859436, "eval_Qnli-dev_cosine_ap": 0.7494456144921429, "eval_Qnli-dev_cosine_f1": 0.6999999999999998, "eval_Qnli-dev_cosine_f1_threshold": 0.6726129055023193, "eval_Qnli-dev_cosine_precision": 0.5769230769230769, "eval_Qnli-dev_cosine_recall": 0.8898305084745762, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 351.0986328125, "eval_Qnli-dev_dot_ap": 0.6983219066086047, "eval_Qnli-dev_dot_f1": 0.6773162939297124, "eval_Qnli-dev_dot_f1_threshold": 288.8251037597656, "eval_Qnli-dev_dot_precision": 0.5435897435897435, "eval_Qnli-dev_dot_recall": 0.8983050847457628, "eval_Qnli-dev_euclidean_accuracy": 0.708984375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.695568084716797, "eval_Qnli-dev_euclidean_ap": 0.7543898549379728, "eval_Qnli-dev_euclidean_f1": 0.6984126984126984, "eval_Qnli-dev_euclidean_f1_threshold": 16.431072235107422, "eval_Qnli-dev_euclidean_precision": 0.5981873111782477, "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, "eval_Qnli-dev_manhattan_accuracy": 0.708984375, "eval_Qnli-dev_manhattan_accuracy_threshold": 300.5904235839844, "eval_Qnli-dev_manhattan_ap": 0.7590128670443661, "eval_Qnli-dev_manhattan_f1": 0.7042253521126761, "eval_Qnli-dev_manhattan_f1_threshold": 346.5229187011719, "eval_Qnli-dev_manhattan_precision": 0.6024096385542169, "eval_Qnli-dev_manhattan_recall": 0.847457627118644, "eval_Qnli-dev_max_accuracy": 0.712890625, "eval_Qnli-dev_max_accuracy_threshold": 351.0986328125, "eval_Qnli-dev_max_ap": 0.7590128670443661, "eval_Qnli-dev_max_f1": 0.7042253521126761, "eval_Qnli-dev_max_f1_threshold": 346.5229187011719, "eval_Qnli-dev_max_precision": 0.6024096385542169, "eval_Qnli-dev_max_recall": 0.8983050847457628, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.797395646572113, "eval_allNLI-dev_cosine_ap": 0.6266402980803186, "eval_allNLI-dev_cosine_f1": 0.6436285097192225, "eval_allNLI-dev_cosine_f1_threshold": 0.7329103946685791, "eval_allNLI-dev_cosine_precision": 0.5137931034482759, "eval_allNLI-dev_cosine_recall": 0.861271676300578, "eval_allNLI-dev_dot_accuracy": 0.697265625, "eval_allNLI-dev_dot_accuracy_threshold": 398.2210388183594, "eval_allNLI-dev_dot_ap": 0.5662089458229917, "eval_allNLI-dev_dot_f1": 0.5990783410138248, "eval_allNLI-dev_dot_f1_threshold": 319.97662353515625, "eval_allNLI-dev_dot_precision": 0.49808429118773945, "eval_allNLI-dev_dot_recall": 0.7514450867052023, "eval_allNLI-dev_euclidean_accuracy": 0.73828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.369623184204102, "eval_allNLI-dev_euclidean_ap": 0.6343492968170963, "eval_allNLI-dev_euclidean_f1": 0.6460176991150444, "eval_allNLI-dev_euclidean_f1_threshold": 15.137269973754883, "eval_allNLI-dev_euclidean_precision": 0.5232974910394266, "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, "eval_allNLI-dev_manhattan_accuracy": 0.744140625, "eval_allNLI-dev_manhattan_accuracy_threshold": 259.2945556640625, "eval_allNLI-dev_manhattan_ap": 0.6320388560985828, "eval_allNLI-dev_manhattan_f1": 0.6363636363636364, "eval_allNLI-dev_manhattan_f1_threshold": 321.5367736816406, "eval_allNLI-dev_manhattan_precision": 0.5086505190311419, "eval_allNLI-dev_manhattan_recall": 0.8497109826589595, "eval_allNLI-dev_max_accuracy": 0.744140625, "eval_allNLI-dev_max_accuracy_threshold": 398.2210388183594, "eval_allNLI-dev_max_ap": 0.6343492968170963, "eval_allNLI-dev_max_f1": 0.6460176991150444, "eval_allNLI-dev_max_f1_threshold": 321.5367736816406, "eval_allNLI-dev_max_precision": 0.5232974910394266, "eval_allNLI-dev_max_recall": 0.861271676300578, "eval_sequential_score": 0.7590128670443661, "eval_sts-test_pearson_cosine": 0.8456032466147674, "eval_sts-test_pearson_dot": 0.8275772102194185, "eval_sts-test_pearson_euclidean": 0.8745086682855159, "eval_sts-test_pearson_manhattan": 0.871147628347342, "eval_sts-test_pearson_max": 0.8745086682855159, "eval_sts-test_spearman_cosine": 0.8759779721982759, "eval_sts-test_spearman_dot": 0.8257146084387749, "eval_sts-test_spearman_euclidean": 0.8725826939965103, "eval_sts-test_spearman_manhattan": 0.8695588512642933, "eval_sts-test_spearman_max": 0.8759779721982759, "eval_vitaminc-pairs_loss": 3.0925190448760986, "eval_vitaminc-pairs_runtime": 3.2183, "eval_vitaminc-pairs_samples_per_second": 39.773, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 2400 }, { "epoch": 2.4691358024691357, "eval_negation-triplets_loss": 0.8976351618766785, "eval_negation-triplets_runtime": 0.7633, "eval_negation-triplets_samples_per_second": 167.704, "eval_negation-triplets_steps_per_second": 1.31, "step": 2400 }, { "epoch": 2.4691358024691357, "eval_scitail-pairs-pos_loss": 0.1458752602338791, "eval_scitail-pairs-pos_runtime": 0.939, "eval_scitail-pairs-pos_samples_per_second": 136.31, "eval_scitail-pairs-pos_steps_per_second": 1.065, "step": 2400 }, { "epoch": 2.4691358024691357, "eval_scitail-pairs-qa_loss": 0.0004099968646187335, "eval_scitail-pairs-qa_runtime": 0.6149, "eval_scitail-pairs-qa_samples_per_second": 208.162, "eval_scitail-pairs-qa_steps_per_second": 1.626, "step": 2400 }, { "epoch": 2.4691358024691357, "eval_xsum-pairs_loss": 0.2671279311180115, "eval_xsum-pairs_runtime": 3.0252, "eval_xsum-pairs_samples_per_second": 42.312, "eval_xsum-pairs_steps_per_second": 0.331, "step": 2400 }, { "epoch": 2.4691358024691357, "eval_sciq_pairs_loss": 0.08947267383337021, "eval_sciq_pairs_runtime": 3.519, "eval_sciq_pairs_samples_per_second": 36.374, "eval_sciq_pairs_steps_per_second": 0.284, "step": 2400 }, { "epoch": 2.4691358024691357, "eval_qasc_pairs_loss": 0.15361525118350983, "eval_qasc_pairs_runtime": 0.6252, "eval_qasc_pairs_samples_per_second": 204.739, "eval_qasc_pairs_steps_per_second": 1.6, "step": 2400 }, { "epoch": 2.4691358024691357, "eval_openbookqa_pairs_loss": 0.6690425872802734, "eval_openbookqa_pairs_runtime": 0.6103, "eval_openbookqa_pairs_samples_per_second": 209.74, "eval_openbookqa_pairs_steps_per_second": 1.639, "step": 2400 }, { "epoch": 2.4691358024691357, "eval_msmarco_pairs_loss": 0.7587112188339233, "eval_msmarco_pairs_runtime": 1.529, "eval_msmarco_pairs_samples_per_second": 83.713, "eval_msmarco_pairs_steps_per_second": 0.654, "step": 2400 }, { "epoch": 2.4691358024691357, "eval_nq_pairs_loss": 0.6752411723136902, "eval_nq_pairs_runtime": 2.9092, "eval_nq_pairs_samples_per_second": 43.998, "eval_nq_pairs_steps_per_second": 0.344, "step": 2400 }, { "epoch": 2.4691358024691357, "eval_trivia_pairs_loss": 0.7570594549179077, "eval_trivia_pairs_runtime": 3.4514, "eval_trivia_pairs_samples_per_second": 37.087, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2400 }, { "epoch": 2.4691358024691357, "eval_gooaq_pairs_loss": 0.29824191331863403, "eval_gooaq_pairs_runtime": 0.9694, "eval_gooaq_pairs_samples_per_second": 132.035, "eval_gooaq_pairs_steps_per_second": 1.032, "step": 2400 }, { "epoch": 2.4691358024691357, "eval_paws-pos_loss": 0.02212175540626049, "eval_paws-pos_runtime": 0.7106, "eval_paws-pos_samples_per_second": 180.14, "eval_paws-pos_steps_per_second": 1.407, "step": 2400 }, { "epoch": 2.4691358024691357, "eval_global_dataset_loss": 0.4253956079483032, "eval_global_dataset_runtime": 13.4477, "eval_global_dataset_samples_per_second": 30.935, "eval_global_dataset_steps_per_second": 0.297, "step": 2400 }, { "epoch": 2.470164609053498, "grad_norm": 5.478442668914795, "learning_rate": 2.015546641644441e-05, "loss": 0.1903, "step": 2401 }, { "epoch": 2.47119341563786, "grad_norm": 4.002430438995361, "learning_rate": 2.014102317059331e-05, "loss": 0.0899, "step": 2402 }, { "epoch": 2.4722222222222223, "grad_norm": 4.9087724685668945, "learning_rate": 2.012658521138849e-05, "loss": 0.1488, "step": 2403 }, { "epoch": 2.4732510288065845, "grad_norm": 10.662351608276367, "learning_rate": 2.0112152562740022e-05, "loss": 0.5978, "step": 2404 }, { "epoch": 2.4742798353909463, "grad_norm": 13.157684326171875, "learning_rate": 2.009772524854919e-05, "loss": 1.6215, "step": 2405 }, { "epoch": 2.4753086419753085, "grad_norm": 2.8923072814941406, "learning_rate": 2.008330329270845e-05, "loss": 0.045, "step": 2406 }, { "epoch": 2.4763374485596708, "grad_norm": 5.9885663986206055, "learning_rate": 2.0068886719101372e-05, "loss": 0.2916, "step": 2407 }, { "epoch": 2.477366255144033, "grad_norm": 8.352883338928223, "learning_rate": 2.0054475551602615e-05, "loss": 0.4255, "step": 2408 }, { "epoch": 2.478395061728395, "grad_norm": 0.45715031027793884, "learning_rate": 2.0040069814077894e-05, "loss": 0.0082, "step": 2409 }, { "epoch": 2.4794238683127574, "grad_norm": 0.21182315051555634, "learning_rate": 2.002566953038392e-05, "loss": 0.0036, "step": 2410 }, { "epoch": 2.480452674897119, "grad_norm": 4.614677906036377, "learning_rate": 2.0011274724368375e-05, "loss": 0.1344, "step": 2411 }, { "epoch": 2.4814814814814814, "grad_norm": 5.564375877380371, "learning_rate": 1.9996885419869886e-05, "loss": 0.3181, "step": 2412 }, { "epoch": 2.4825102880658436, "grad_norm": 6.444988250732422, "learning_rate": 1.9982501640717944e-05, "loss": 0.2268, "step": 2413 }, { "epoch": 2.483539094650206, "grad_norm": 3.7375905513763428, "learning_rate": 1.99681234107329e-05, "loss": 0.063, "step": 2414 }, { "epoch": 2.484567901234568, "grad_norm": 7.726154804229736, "learning_rate": 1.9953750753725924e-05, "loss": 0.2874, "step": 2415 }, { "epoch": 2.48559670781893, "grad_norm": 4.854942798614502, "learning_rate": 1.9939383693498947e-05, "loss": 0.1126, "step": 2416 }, { "epoch": 2.486625514403292, "grad_norm": 4.493672847747803, "learning_rate": 1.992502225384463e-05, "loss": 0.1051, "step": 2417 }, { "epoch": 2.4876543209876543, "grad_norm": 0.311063289642334, "learning_rate": 1.9910666458546334e-05, "loss": 0.0042, "step": 2418 }, { "epoch": 2.4886831275720165, "grad_norm": 6.245817184448242, "learning_rate": 1.9896316331378068e-05, "loss": 0.3168, "step": 2419 }, { "epoch": 2.4897119341563787, "grad_norm": 2.906459331512451, "learning_rate": 1.988197189610445e-05, "loss": 0.0494, "step": 2420 }, { "epoch": 2.4897119341563787, "eval_Qnli-dev_cosine_accuracy": 0.716796875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7279399037361145, "eval_Qnli-dev_cosine_ap": 0.7563310893572132, "eval_Qnli-dev_cosine_f1": 0.7014681892332789, "eval_Qnli-dev_cosine_f1_threshold": 0.6324313879013062, "eval_Qnli-dev_cosine_precision": 0.5702917771883289, "eval_Qnli-dev_cosine_recall": 0.9110169491525424, "eval_Qnli-dev_dot_accuracy": 0.68359375, "eval_Qnli-dev_dot_accuracy_threshold": 339.02484130859375, "eval_Qnli-dev_dot_ap": 0.7113151516571359, "eval_Qnli-dev_dot_f1": 0.6840390879478827, "eval_Qnli-dev_dot_f1_threshold": 269.8001708984375, "eval_Qnli-dev_dot_precision": 0.5555555555555556, "eval_Qnli-dev_dot_recall": 0.8898305084745762, "eval_Qnli-dev_euclidean_accuracy": 0.712890625, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.123961448669434, "eval_Qnli-dev_euclidean_ap": 0.7617899957515049, "eval_Qnli-dev_euclidean_f1": 0.7028862478777589, "eval_Qnli-dev_euclidean_f1_threshold": 17.275789260864258, "eval_Qnli-dev_euclidean_precision": 0.5864022662889519, "eval_Qnli-dev_euclidean_recall": 0.8771186440677966, "eval_Qnli-dev_manhattan_accuracy": 0.71875, "eval_Qnli-dev_manhattan_accuracy_threshold": 315.0229797363281, "eval_Qnli-dev_manhattan_ap": 0.7656838953165184, "eval_Qnli-dev_manhattan_f1": 0.7097902097902098, "eval_Qnli-dev_manhattan_f1_threshold": 357.1176452636719, "eval_Qnli-dev_manhattan_precision": 0.6041666666666666, "eval_Qnli-dev_manhattan_recall": 0.8601694915254238, "eval_Qnli-dev_max_accuracy": 0.71875, "eval_Qnli-dev_max_accuracy_threshold": 339.02484130859375, "eval_Qnli-dev_max_ap": 0.7656838953165184, "eval_Qnli-dev_max_f1": 0.7097902097902098, "eval_Qnli-dev_max_f1_threshold": 357.1176452636719, "eval_Qnli-dev_max_precision": 0.6041666666666666, "eval_Qnli-dev_max_recall": 0.9110169491525424, "eval_allNLI-dev_cosine_accuracy": 0.7265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8329659700393677, "eval_allNLI-dev_cosine_ap": 0.6336579252185592, "eval_allNLI-dev_cosine_f1": 0.642369020501139, "eval_allNLI-dev_cosine_f1_threshold": 0.729081928730011, "eval_allNLI-dev_cosine_precision": 0.5300751879699248, "eval_allNLI-dev_cosine_recall": 0.815028901734104, "eval_allNLI-dev_dot_accuracy": 0.69921875, "eval_allNLI-dev_dot_accuracy_threshold": 378.456787109375, "eval_allNLI-dev_dot_ap": 0.5712809714617196, "eval_allNLI-dev_dot_f1": 0.6015936254980079, "eval_allNLI-dev_dot_f1_threshold": 282.9576416015625, "eval_allNLI-dev_dot_precision": 0.45896656534954405, "eval_allNLI-dev_dot_recall": 0.8728323699421965, "eval_allNLI-dev_euclidean_accuracy": 0.73828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.69471263885498, "eval_allNLI-dev_euclidean_ap": 0.6410698746813956, "eval_allNLI-dev_euclidean_f1": 0.6417582417582418, "eval_allNLI-dev_euclidean_f1_threshold": 15.446972846984863, "eval_allNLI-dev_euclidean_precision": 0.5177304964539007, "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, "eval_allNLI-dev_manhattan_accuracy": 0.74609375, "eval_allNLI-dev_manhattan_accuracy_threshold": 265.9760437011719, "eval_allNLI-dev_manhattan_ap": 0.6378198371784047, "eval_allNLI-dev_manhattan_f1": 0.6394849785407726, "eval_allNLI-dev_manhattan_f1_threshold": 329.0739440917969, "eval_allNLI-dev_manhattan_precision": 0.5085324232081911, "eval_allNLI-dev_manhattan_recall": 0.861271676300578, "eval_allNLI-dev_max_accuracy": 0.74609375, "eval_allNLI-dev_max_accuracy_threshold": 378.456787109375, "eval_allNLI-dev_max_ap": 0.6410698746813956, "eval_allNLI-dev_max_f1": 0.642369020501139, "eval_allNLI-dev_max_f1_threshold": 329.0739440917969, "eval_allNLI-dev_max_precision": 0.5300751879699248, "eval_allNLI-dev_max_recall": 0.8728323699421965, "eval_sequential_score": 0.7656838953165184, "eval_sts-test_pearson_cosine": 0.8484670353258724, "eval_sts-test_pearson_dot": 0.8294021934370874, "eval_sts-test_pearson_euclidean": 0.874417885747803, "eval_sts-test_pearson_manhattan": 0.8714963913215741, "eval_sts-test_pearson_max": 0.874417885747803, "eval_sts-test_spearman_cosine": 0.8739520378438258, "eval_sts-test_spearman_dot": 0.8246635355624751, "eval_sts-test_spearman_euclidean": 0.869761395473351, "eval_sts-test_spearman_manhattan": 0.867299494330426, "eval_sts-test_spearman_max": 0.8739520378438258, "eval_vitaminc-pairs_loss": 3.1515254974365234, "eval_vitaminc-pairs_runtime": 3.2604, "eval_vitaminc-pairs_samples_per_second": 39.258, "eval_vitaminc-pairs_steps_per_second": 0.307, "step": 2420 }, { "epoch": 2.4897119341563787, "eval_negation-triplets_loss": 0.9106173515319824, "eval_negation-triplets_runtime": 0.7727, "eval_negation-triplets_samples_per_second": 165.663, "eval_negation-triplets_steps_per_second": 1.294, "step": 2420 }, { "epoch": 2.4897119341563787, "eval_scitail-pairs-pos_loss": 0.14981313049793243, "eval_scitail-pairs-pos_runtime": 0.9357, "eval_scitail-pairs-pos_samples_per_second": 136.79, "eval_scitail-pairs-pos_steps_per_second": 1.069, "step": 2420 }, { "epoch": 2.4897119341563787, "eval_scitail-pairs-qa_loss": 0.0003851282817777246, "eval_scitail-pairs-qa_runtime": 0.6167, "eval_scitail-pairs-qa_samples_per_second": 207.562, "eval_scitail-pairs-qa_steps_per_second": 1.622, "step": 2420 }, { "epoch": 2.4897119341563787, "eval_xsum-pairs_loss": 0.2592019736766815, "eval_xsum-pairs_runtime": 3.0356, "eval_xsum-pairs_samples_per_second": 42.167, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2420 }, { "epoch": 2.4897119341563787, "eval_sciq_pairs_loss": 0.10065734386444092, "eval_sciq_pairs_runtime": 3.5374, "eval_sciq_pairs_samples_per_second": 36.185, "eval_sciq_pairs_steps_per_second": 0.283, "step": 2420 }, { "epoch": 2.4897119341563787, "eval_qasc_pairs_loss": 0.1522054374217987, "eval_qasc_pairs_runtime": 0.6254, "eval_qasc_pairs_samples_per_second": 204.678, "eval_qasc_pairs_steps_per_second": 1.599, "step": 2420 }, { "epoch": 2.4897119341563787, "eval_openbookqa_pairs_loss": 0.6953917741775513, "eval_openbookqa_pairs_runtime": 0.6118, "eval_openbookqa_pairs_samples_per_second": 209.22, "eval_openbookqa_pairs_steps_per_second": 1.635, "step": 2420 }, { "epoch": 2.4897119341563787, "eval_msmarco_pairs_loss": 0.7634124755859375, "eval_msmarco_pairs_runtime": 1.5293, "eval_msmarco_pairs_samples_per_second": 83.697, "eval_msmarco_pairs_steps_per_second": 0.654, "step": 2420 }, { "epoch": 2.4897119341563787, "eval_nq_pairs_loss": 0.5921059250831604, "eval_nq_pairs_runtime": 2.9026, "eval_nq_pairs_samples_per_second": 44.098, "eval_nq_pairs_steps_per_second": 0.345, "step": 2420 }, { "epoch": 2.4897119341563787, "eval_trivia_pairs_loss": 0.8200302720069885, "eval_trivia_pairs_runtime": 3.4533, "eval_trivia_pairs_samples_per_second": 37.066, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2420 }, { "epoch": 2.4897119341563787, "eval_gooaq_pairs_loss": 0.3450007438659668, "eval_gooaq_pairs_runtime": 0.9531, "eval_gooaq_pairs_samples_per_second": 134.292, "eval_gooaq_pairs_steps_per_second": 1.049, "step": 2420 }, { "epoch": 2.4897119341563787, "eval_paws-pos_loss": 0.02186736650764942, "eval_paws-pos_runtime": 0.7101, "eval_paws-pos_samples_per_second": 180.246, "eval_paws-pos_steps_per_second": 1.408, "step": 2420 }, { "epoch": 2.4897119341563787, "eval_global_dataset_loss": 0.433958500623703, "eval_global_dataset_runtime": 13.4125, "eval_global_dataset_samples_per_second": 31.016, "eval_global_dataset_steps_per_second": 0.298, "step": 2420 }, { "epoch": 2.490740740740741, "grad_norm": 7.837283611297607, "learning_rate": 1.986763317648068e-05, "loss": 0.2357, "step": 2421 }, { "epoch": 2.4917695473251027, "grad_norm": 6.507171630859375, "learning_rate": 1.985330019625249e-05, "loss": 0.1965, "step": 2422 }, { "epoch": 2.492798353909465, "grad_norm": 9.107889175415039, "learning_rate": 1.983897297915611e-05, "loss": 0.338, "step": 2423 }, { "epoch": 2.493827160493827, "grad_norm": 1.823559045791626, "learning_rate": 1.9824651548918213e-05, "loss": 0.0235, "step": 2424 }, { "epoch": 2.4948559670781894, "grad_norm": 6.848126411437988, "learning_rate": 1.9810335929255904e-05, "loss": 0.1863, "step": 2425 }, { "epoch": 2.4958847736625516, "grad_norm": 6.037849426269531, "learning_rate": 1.9796026143876657e-05, "loss": 0.3568, "step": 2426 }, { "epoch": 2.496913580246914, "grad_norm": 6.2176408767700195, "learning_rate": 1.9781722216478288e-05, "loss": 0.358, "step": 2427 }, { "epoch": 2.4979423868312756, "grad_norm": 4.008078098297119, "learning_rate": 1.976742417074891e-05, "loss": 0.1074, "step": 2428 }, { "epoch": 2.498971193415638, "grad_norm": 9.871790885925293, "learning_rate": 1.9753132030366893e-05, "loss": 0.4596, "step": 2429 }, { "epoch": 2.5, "grad_norm": 11.414341926574707, "learning_rate": 1.973884581900083e-05, "loss": 0.6031, "step": 2430 }, { "epoch": 2.501028806584362, "grad_norm": 5.77439546585083, "learning_rate": 1.9724565560309505e-05, "loss": 0.1052, "step": 2431 }, { "epoch": 2.5020576131687244, "grad_norm": 9.882329940795898, "learning_rate": 1.971029127794183e-05, "loss": 0.4088, "step": 2432 }, { "epoch": 2.503086419753086, "grad_norm": 8.163681030273438, "learning_rate": 1.9696022995536813e-05, "loss": 0.3387, "step": 2433 }, { "epoch": 2.5041152263374484, "grad_norm": 4.288134574890137, "learning_rate": 1.9681760736723547e-05, "loss": 0.0472, "step": 2434 }, { "epoch": 2.5051440329218106, "grad_norm": 4.319838047027588, "learning_rate": 1.966750452512114e-05, "loss": 0.0673, "step": 2435 }, { "epoch": 2.506172839506173, "grad_norm": 6.5712690353393555, "learning_rate": 1.9653254384338684e-05, "loss": 0.4204, "step": 2436 }, { "epoch": 2.507201646090535, "grad_norm": 5.69049596786499, "learning_rate": 1.9639010337975223e-05, "loss": 0.1546, "step": 2437 }, { "epoch": 2.508230452674897, "grad_norm": 8.044054985046387, "learning_rate": 1.962477240961969e-05, "loss": 0.2912, "step": 2438 }, { "epoch": 2.5092592592592595, "grad_norm": 0.8142794966697693, "learning_rate": 1.9610540622850916e-05, "loss": 0.0179, "step": 2439 }, { "epoch": 2.5102880658436213, "grad_norm": 3.2492103576660156, "learning_rate": 1.959631500123754e-05, "loss": 0.0495, "step": 2440 }, { "epoch": 2.5102880658436213, "eval_Qnli-dev_cosine_accuracy": 0.705078125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7196286916732788, "eval_Qnli-dev_cosine_ap": 0.7484318389982814, "eval_Qnli-dev_cosine_f1": 0.7011070110701106, "eval_Qnli-dev_cosine_f1_threshold": 0.6782007217407227, "eval_Qnli-dev_cosine_precision": 0.6209150326797386, "eval_Qnli-dev_cosine_recall": 0.8050847457627118, "eval_Qnli-dev_dot_accuracy": 0.677734375, "eval_Qnli-dev_dot_accuracy_threshold": 338.0035705566406, "eval_Qnli-dev_dot_ap": 0.6990954266604491, "eval_Qnli-dev_dot_f1": 0.672566371681416, "eval_Qnli-dev_dot_f1_threshold": 285.77838134765625, "eval_Qnli-dev_dot_precision": 0.5775075987841946, "eval_Qnli-dev_dot_recall": 0.8050847457627118, "eval_Qnli-dev_euclidean_accuracy": 0.716796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.771583557128906, "eval_Qnli-dev_euclidean_ap": 0.7580153060009305, "eval_Qnli-dev_euclidean_f1": 0.7067669172932329, "eval_Qnli-dev_euclidean_f1_threshold": 16.642635345458984, "eval_Qnli-dev_euclidean_precision": 0.6351351351351351, "eval_Qnli-dev_euclidean_recall": 0.7966101694915254, "eval_Qnli-dev_manhattan_accuracy": 0.708984375, "eval_Qnli-dev_manhattan_accuracy_threshold": 327.4893798828125, "eval_Qnli-dev_manhattan_ap": 0.7587013184358515, "eval_Qnli-dev_manhattan_f1": 0.7032590051457975, "eval_Qnli-dev_manhattan_f1_threshold": 367.247314453125, "eval_Qnli-dev_manhattan_precision": 0.590778097982709, "eval_Qnli-dev_manhattan_recall": 0.8686440677966102, "eval_Qnli-dev_max_accuracy": 0.716796875, "eval_Qnli-dev_max_accuracy_threshold": 338.0035705566406, "eval_Qnli-dev_max_ap": 0.7587013184358515, "eval_Qnli-dev_max_f1": 0.7067669172932329, "eval_Qnli-dev_max_f1_threshold": 367.247314453125, "eval_Qnli-dev_max_precision": 0.6351351351351351, "eval_Qnli-dev_max_recall": 0.8686440677966102, "eval_allNLI-dev_cosine_accuracy": 0.72265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8781421184539795, "eval_allNLI-dev_cosine_ap": 0.6316663997422921, "eval_allNLI-dev_cosine_f1": 0.6376146788990825, "eval_allNLI-dev_cosine_f1_threshold": 0.7252265810966492, "eval_allNLI-dev_cosine_precision": 0.5285171102661597, "eval_allNLI-dev_cosine_recall": 0.8034682080924855, "eval_allNLI-dev_dot_accuracy": 0.703125, "eval_allNLI-dev_dot_accuracy_threshold": 350.6553039550781, "eval_allNLI-dev_dot_ap": 0.5659037596127712, "eval_allNLI-dev_dot_f1": 0.5963302752293578, "eval_allNLI-dev_dot_f1_threshold": 300.537109375, "eval_allNLI-dev_dot_precision": 0.49429657794676807, "eval_allNLI-dev_dot_recall": 0.7514450867052023, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.204728126525879, "eval_allNLI-dev_euclidean_ap": 0.6398362687375942, "eval_allNLI-dev_euclidean_f1": 0.641860465116279, "eval_allNLI-dev_euclidean_f1_threshold": 15.186336517333984, "eval_allNLI-dev_euclidean_precision": 0.5369649805447471, "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, "eval_allNLI-dev_manhattan_accuracy": 0.744140625, "eval_allNLI-dev_manhattan_accuracy_threshold": 284.4888000488281, "eval_allNLI-dev_manhattan_ap": 0.6342854799220193, "eval_allNLI-dev_manhattan_f1": 0.6349206349206349, "eval_allNLI-dev_manhattan_f1_threshold": 321.4658508300781, "eval_allNLI-dev_manhattan_precision": 0.5223880597014925, "eval_allNLI-dev_manhattan_recall": 0.8092485549132948, "eval_allNLI-dev_max_accuracy": 0.744140625, "eval_allNLI-dev_max_accuracy_threshold": 350.6553039550781, "eval_allNLI-dev_max_ap": 0.6398362687375942, "eval_allNLI-dev_max_f1": 0.641860465116279, "eval_allNLI-dev_max_f1_threshold": 321.4658508300781, "eval_allNLI-dev_max_precision": 0.5369649805447471, "eval_allNLI-dev_max_recall": 0.8092485549132948, "eval_sequential_score": 0.7587013184358515, "eval_sts-test_pearson_cosine": 0.8523364191500853, "eval_sts-test_pearson_dot": 0.8391425584258833, "eval_sts-test_pearson_euclidean": 0.8775257393178382, "eval_sts-test_pearson_manhattan": 0.875433900765537, "eval_sts-test_pearson_max": 0.8775257393178382, "eval_sts-test_spearman_cosine": 0.8772542334218971, "eval_sts-test_spearman_dot": 0.8356396326926823, "eval_sts-test_spearman_euclidean": 0.873932750050166, "eval_sts-test_spearman_manhattan": 0.8717948480397149, "eval_sts-test_spearman_max": 0.8772542334218971, "eval_vitaminc-pairs_loss": 3.189671754837036, "eval_vitaminc-pairs_runtime": 3.6225, "eval_vitaminc-pairs_samples_per_second": 35.334, "eval_vitaminc-pairs_steps_per_second": 0.276, "step": 2440 }, { "epoch": 2.5102880658436213, "eval_negation-triplets_loss": 0.9069141149520874, "eval_negation-triplets_runtime": 0.7788, "eval_negation-triplets_samples_per_second": 164.365, "eval_negation-triplets_steps_per_second": 1.284, "step": 2440 }, { "epoch": 2.5102880658436213, "eval_scitail-pairs-pos_loss": 0.14732320606708527, "eval_scitail-pairs-pos_runtime": 0.9241, "eval_scitail-pairs-pos_samples_per_second": 138.513, "eval_scitail-pairs-pos_steps_per_second": 1.082, "step": 2440 }, { "epoch": 2.5102880658436213, "eval_scitail-pairs-qa_loss": 0.0005962368450127542, "eval_scitail-pairs-qa_runtime": 0.6459, "eval_scitail-pairs-qa_samples_per_second": 198.171, "eval_scitail-pairs-qa_steps_per_second": 1.548, "step": 2440 }, { "epoch": 2.5102880658436213, "eval_xsum-pairs_loss": 0.3051843047142029, "eval_xsum-pairs_runtime": 3.0561, "eval_xsum-pairs_samples_per_second": 41.883, "eval_xsum-pairs_steps_per_second": 0.327, "step": 2440 }, { "epoch": 2.5102880658436213, "eval_sciq_pairs_loss": 0.09776122123003006, "eval_sciq_pairs_runtime": 3.5714, "eval_sciq_pairs_samples_per_second": 35.84, "eval_sciq_pairs_steps_per_second": 0.28, "step": 2440 }, { "epoch": 2.5102880658436213, "eval_qasc_pairs_loss": 0.15243121981620789, "eval_qasc_pairs_runtime": 0.6261, "eval_qasc_pairs_samples_per_second": 204.437, "eval_qasc_pairs_steps_per_second": 1.597, "step": 2440 }, { "epoch": 2.5102880658436213, "eval_openbookqa_pairs_loss": 0.7259861826896667, "eval_openbookqa_pairs_runtime": 0.6061, "eval_openbookqa_pairs_samples_per_second": 211.175, "eval_openbookqa_pairs_steps_per_second": 1.65, "step": 2440 }, { "epoch": 2.5102880658436213, "eval_msmarco_pairs_loss": 0.7631006836891174, "eval_msmarco_pairs_runtime": 1.5348, "eval_msmarco_pairs_samples_per_second": 83.399, "eval_msmarco_pairs_steps_per_second": 0.652, "step": 2440 }, { "epoch": 2.5102880658436213, "eval_nq_pairs_loss": 0.6975298523902893, "eval_nq_pairs_runtime": 2.9019, "eval_nq_pairs_samples_per_second": 44.109, "eval_nq_pairs_steps_per_second": 0.345, "step": 2440 }, { "epoch": 2.5102880658436213, "eval_trivia_pairs_loss": 0.8489959239959717, "eval_trivia_pairs_runtime": 3.4531, "eval_trivia_pairs_samples_per_second": 37.068, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2440 }, { "epoch": 2.5102880658436213, "eval_gooaq_pairs_loss": 0.35373830795288086, "eval_gooaq_pairs_runtime": 0.9612, "eval_gooaq_pairs_samples_per_second": 133.168, "eval_gooaq_pairs_steps_per_second": 1.04, "step": 2440 }, { "epoch": 2.5102880658436213, "eval_paws-pos_loss": 0.021134065464138985, "eval_paws-pos_runtime": 0.7168, "eval_paws-pos_samples_per_second": 178.571, "eval_paws-pos_steps_per_second": 1.395, "step": 2440 }, { "epoch": 2.5102880658436213, "eval_global_dataset_loss": 0.4480046033859253, "eval_global_dataset_runtime": 13.4322, "eval_global_dataset_samples_per_second": 30.97, "eval_global_dataset_steps_per_second": 0.298, "step": 2440 }, { "epoch": 2.5113168724279835, "grad_norm": 7.582404613494873, "learning_rate": 1.958209556833799e-05, "loss": 0.479, "step": 2441 }, { "epoch": 2.5123456790123457, "grad_norm": 9.112093925476074, "learning_rate": 1.956788234770046e-05, "loss": 0.533, "step": 2442 }, { "epoch": 2.513374485596708, "grad_norm": 4.532588481903076, "learning_rate": 1.9553675362862837e-05, "loss": 0.0853, "step": 2443 }, { "epoch": 2.51440329218107, "grad_norm": 5.598708152770996, "learning_rate": 1.9539474637352706e-05, "loss": 0.0872, "step": 2444 }, { "epoch": 2.515432098765432, "grad_norm": 16.361116409301758, "learning_rate": 1.952528019468726e-05, "loss": 2.0415, "step": 2445 }, { "epoch": 2.516460905349794, "grad_norm": 1.0714730024337769, "learning_rate": 1.9511092058373308e-05, "loss": 0.0219, "step": 2446 }, { "epoch": 2.5174897119341564, "grad_norm": 5.480446815490723, "learning_rate": 1.94969102519072e-05, "loss": 0.125, "step": 2447 }, { "epoch": 2.5185185185185186, "grad_norm": 7.554908275604248, "learning_rate": 1.9482734798774816e-05, "loss": 0.3067, "step": 2448 }, { "epoch": 2.519547325102881, "grad_norm": 8.397703170776367, "learning_rate": 1.9468565722451504e-05, "loss": 0.3694, "step": 2449 }, { "epoch": 2.5205761316872426, "grad_norm": 10.139104843139648, "learning_rate": 1.9454403046402057e-05, "loss": 0.4156, "step": 2450 }, { "epoch": 2.521604938271605, "grad_norm": 4.592851161956787, "learning_rate": 1.944024679408067e-05, "loss": 0.1045, "step": 2451 }, { "epoch": 2.522633744855967, "grad_norm": 5.958555221557617, "learning_rate": 1.9426096988930898e-05, "loss": 0.2124, "step": 2452 }, { "epoch": 2.5236625514403292, "grad_norm": 3.141430377960205, "learning_rate": 1.941195365438561e-05, "loss": 0.0836, "step": 2453 }, { "epoch": 2.5246913580246915, "grad_norm": 3.026766538619995, "learning_rate": 1.939781681386699e-05, "loss": 0.0625, "step": 2454 }, { "epoch": 2.5257201646090532, "grad_norm": 4.351663589477539, "learning_rate": 1.9383686490786415e-05, "loss": 0.173, "step": 2455 }, { "epoch": 2.526748971193416, "grad_norm": 5.852506160736084, "learning_rate": 1.9369562708544525e-05, "loss": 0.2642, "step": 2456 }, { "epoch": 2.5277777777777777, "grad_norm": 8.36409854888916, "learning_rate": 1.9355445490531092e-05, "loss": 0.3243, "step": 2457 }, { "epoch": 2.52880658436214, "grad_norm": 10.044306755065918, "learning_rate": 1.9341334860125038e-05, "loss": 0.4475, "step": 2458 }, { "epoch": 2.529835390946502, "grad_norm": 1.544262170791626, "learning_rate": 1.9327230840694345e-05, "loss": 0.0241, "step": 2459 }, { "epoch": 2.5308641975308643, "grad_norm": 3.9103894233703613, "learning_rate": 1.9313133455596088e-05, "loss": 0.0623, "step": 2460 }, { "epoch": 2.5308641975308643, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7407504320144653, "eval_Qnli-dev_cosine_ap": 0.752596488399803, "eval_Qnli-dev_cosine_f1": 0.7054263565891473, "eval_Qnli-dev_cosine_f1_threshold": 0.7087960243225098, "eval_Qnli-dev_cosine_precision": 0.65, "eval_Qnli-dev_cosine_recall": 0.7711864406779662, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 346.5068664550781, "eval_Qnli-dev_dot_ap": 0.6927262375072062, "eval_Qnli-dev_dot_f1": 0.6785714285714286, "eval_Qnli-dev_dot_f1_threshold": 276.58709716796875, "eval_Qnli-dev_dot_precision": 0.55, "eval_Qnli-dev_dot_recall": 0.885593220338983, "eval_Qnli-dev_euclidean_accuracy": 0.71875, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.385201454162598, "eval_Qnli-dev_euclidean_ap": 0.7633146911155302, "eval_Qnli-dev_euclidean_f1": 0.7082568807339449, "eval_Qnli-dev_euclidean_f1_threshold": 16.51651382446289, "eval_Qnli-dev_euclidean_precision": 0.6245954692556634, "eval_Qnli-dev_euclidean_recall": 0.8177966101694916, "eval_Qnli-dev_manhattan_accuracy": 0.73046875, "eval_Qnli-dev_manhattan_accuracy_threshold": 328.64874267578125, "eval_Qnli-dev_manhattan_ap": 0.7656719075878926, "eval_Qnli-dev_manhattan_f1": 0.7160493827160493, "eval_Qnli-dev_manhattan_f1_threshold": 328.64874267578125, "eval_Qnli-dev_manhattan_precision": 0.696, "eval_Qnli-dev_manhattan_recall": 0.7372881355932204, "eval_Qnli-dev_max_accuracy": 0.73046875, "eval_Qnli-dev_max_accuracy_threshold": 346.5068664550781, "eval_Qnli-dev_max_ap": 0.7656719075878926, "eval_Qnli-dev_max_f1": 0.7160493827160493, "eval_Qnli-dev_max_f1_threshold": 328.64874267578125, "eval_Qnli-dev_max_precision": 0.696, "eval_Qnli-dev_max_recall": 0.885593220338983, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8690991401672363, "eval_allNLI-dev_cosine_ap": 0.6297108739162276, "eval_allNLI-dev_cosine_f1": 0.6351351351351351, "eval_allNLI-dev_cosine_f1_threshold": 0.7144136428833008, "eval_allNLI-dev_cosine_precision": 0.5202952029520295, "eval_allNLI-dev_cosine_recall": 0.815028901734104, "eval_allNLI-dev_dot_accuracy": 0.705078125, "eval_allNLI-dev_dot_accuracy_threshold": 356.1709899902344, "eval_allNLI-dev_dot_ap": 0.5676168691745578, "eval_allNLI-dev_dot_f1": 0.5977011494252873, "eval_allNLI-dev_dot_f1_threshold": 300.47137451171875, "eval_allNLI-dev_dot_precision": 0.4961832061068702, "eval_allNLI-dev_dot_recall": 0.7514450867052023, "eval_allNLI-dev_euclidean_accuracy": 0.73828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.693002700805664, "eval_allNLI-dev_euclidean_ap": 0.6393368882621303, "eval_allNLI-dev_euclidean_f1": 0.6331877729257641, "eval_allNLI-dev_euclidean_f1_threshold": 15.953073501586914, "eval_allNLI-dev_euclidean_precision": 0.5087719298245614, "eval_allNLI-dev_euclidean_recall": 0.838150289017341, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 280.0594482421875, "eval_allNLI-dev_manhattan_ap": 0.6374081391015416, "eval_allNLI-dev_manhattan_f1": 0.6320541760722348, "eval_allNLI-dev_manhattan_f1_threshold": 326.39691162109375, "eval_allNLI-dev_manhattan_precision": 0.5185185185185185, "eval_allNLI-dev_manhattan_recall": 0.8092485549132948, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 356.1709899902344, "eval_allNLI-dev_max_ap": 0.6393368882621303, "eval_allNLI-dev_max_f1": 0.6351351351351351, "eval_allNLI-dev_max_f1_threshold": 326.39691162109375, "eval_allNLI-dev_max_precision": 0.5202952029520295, "eval_allNLI-dev_max_recall": 0.838150289017341, "eval_sequential_score": 0.7656719075878926, "eval_sts-test_pearson_cosine": 0.8530913606098072, "eval_sts-test_pearson_dot": 0.8401484364504492, "eval_sts-test_pearson_euclidean": 0.877896036519132, "eval_sts-test_pearson_manhattan": 0.8764060505738172, "eval_sts-test_pearson_max": 0.877896036519132, "eval_sts-test_spearman_cosine": 0.8769788789105557, "eval_sts-test_spearman_dot": 0.8340232171312247, "eval_sts-test_spearman_euclidean": 0.8724001536012949, "eval_sts-test_spearman_manhattan": 0.8710444141374423, "eval_sts-test_spearman_max": 0.8769788789105557, "eval_vitaminc-pairs_loss": 3.105875015258789, "eval_vitaminc-pairs_runtime": 3.257, "eval_vitaminc-pairs_samples_per_second": 39.3, "eval_vitaminc-pairs_steps_per_second": 0.307, "step": 2460 }, { "epoch": 2.5308641975308643, "eval_negation-triplets_loss": 0.9383314847946167, "eval_negation-triplets_runtime": 0.7818, "eval_negation-triplets_samples_per_second": 163.73, "eval_negation-triplets_steps_per_second": 1.279, "step": 2460 }, { "epoch": 2.5308641975308643, "eval_scitail-pairs-pos_loss": 0.14859730005264282, "eval_scitail-pairs-pos_runtime": 0.9769, "eval_scitail-pairs-pos_samples_per_second": 131.032, "eval_scitail-pairs-pos_steps_per_second": 1.024, "step": 2460 }, { "epoch": 2.5308641975308643, "eval_scitail-pairs-qa_loss": 0.00045218339073471725, "eval_scitail-pairs-qa_runtime": 0.6264, "eval_scitail-pairs-qa_samples_per_second": 204.357, "eval_scitail-pairs-qa_steps_per_second": 1.597, "step": 2460 }, { "epoch": 2.5308641975308643, "eval_xsum-pairs_loss": 0.2749998867511749, "eval_xsum-pairs_runtime": 3.0382, "eval_xsum-pairs_samples_per_second": 42.131, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2460 }, { "epoch": 2.5308641975308643, "eval_sciq_pairs_loss": 0.09291612356901169, "eval_sciq_pairs_runtime": 3.5694, "eval_sciq_pairs_samples_per_second": 35.861, "eval_sciq_pairs_steps_per_second": 0.28, "step": 2460 }, { "epoch": 2.5308641975308643, "eval_qasc_pairs_loss": 0.15513256192207336, "eval_qasc_pairs_runtime": 0.6358, "eval_qasc_pairs_samples_per_second": 201.332, "eval_qasc_pairs_steps_per_second": 1.573, "step": 2460 }, { "epoch": 2.5308641975308643, "eval_openbookqa_pairs_loss": 0.7656955718994141, "eval_openbookqa_pairs_runtime": 0.6138, "eval_openbookqa_pairs_samples_per_second": 208.537, "eval_openbookqa_pairs_steps_per_second": 1.629, "step": 2460 }, { "epoch": 2.5308641975308643, "eval_msmarco_pairs_loss": 0.6963688731193542, "eval_msmarco_pairs_runtime": 1.5379, "eval_msmarco_pairs_samples_per_second": 83.229, "eval_msmarco_pairs_steps_per_second": 0.65, "step": 2460 }, { "epoch": 2.5308641975308643, "eval_nq_pairs_loss": 0.5959857702255249, "eval_nq_pairs_runtime": 2.9087, "eval_nq_pairs_samples_per_second": 44.006, "eval_nq_pairs_steps_per_second": 0.344, "step": 2460 }, { "epoch": 2.5308641975308643, "eval_trivia_pairs_loss": 0.778878927230835, "eval_trivia_pairs_runtime": 3.458, "eval_trivia_pairs_samples_per_second": 37.015, "eval_trivia_pairs_steps_per_second": 0.289, "step": 2460 }, { "epoch": 2.5308641975308643, "eval_gooaq_pairs_loss": 0.3291349411010742, "eval_gooaq_pairs_runtime": 0.9585, "eval_gooaq_pairs_samples_per_second": 133.543, "eval_gooaq_pairs_steps_per_second": 1.043, "step": 2460 }, { "epoch": 2.5308641975308643, "eval_paws-pos_loss": 0.02157442830502987, "eval_paws-pos_runtime": 0.7148, "eval_paws-pos_samples_per_second": 179.068, "eval_paws-pos_steps_per_second": 1.399, "step": 2460 }, { "epoch": 2.5308641975308643, "eval_global_dataset_loss": 0.42489147186279297, "eval_global_dataset_runtime": 13.4607, "eval_global_dataset_samples_per_second": 30.905, "eval_global_dataset_steps_per_second": 0.297, "step": 2460 }, { "epoch": 2.5318930041152266, "grad_norm": 4.6252593994140625, "learning_rate": 1.9299042728176326e-05, "loss": 0.0896, "step": 2461 }, { "epoch": 2.5329218106995883, "grad_norm": 2.53317928314209, "learning_rate": 1.9284958681770098e-05, "loss": 0.051, "step": 2462 }, { "epoch": 2.5339506172839505, "grad_norm": 3.673436403274536, "learning_rate": 1.9270881339701388e-05, "loss": 0.0903, "step": 2463 }, { "epoch": 2.5349794238683128, "grad_norm": 8.360755920410156, "learning_rate": 1.9256810725283066e-05, "loss": 0.3172, "step": 2464 }, { "epoch": 2.536008230452675, "grad_norm": 3.940634250640869, "learning_rate": 1.9242746861816868e-05, "loss": 0.0968, "step": 2465 }, { "epoch": 2.537037037037037, "grad_norm": 4.461543560028076, "learning_rate": 1.922868977259335e-05, "loss": 0.1176, "step": 2466 }, { "epoch": 2.538065843621399, "grad_norm": 7.496928691864014, "learning_rate": 1.921463948089184e-05, "loss": 0.2366, "step": 2467 }, { "epoch": 2.539094650205761, "grad_norm": 13.089897155761719, "learning_rate": 1.9200596009980426e-05, "loss": 1.5581, "step": 2468 }, { "epoch": 2.5401234567901234, "grad_norm": 3.959763288497925, "learning_rate": 1.9186559383115883e-05, "loss": 0.1791, "step": 2469 }, { "epoch": 2.5411522633744856, "grad_norm": 4.033388137817383, "learning_rate": 1.9172529623543666e-05, "loss": 0.1678, "step": 2470 }, { "epoch": 2.542181069958848, "grad_norm": 4.85703706741333, "learning_rate": 1.9158506754497846e-05, "loss": 0.098, "step": 2471 }, { "epoch": 2.5432098765432096, "grad_norm": 5.970437526702881, "learning_rate": 1.9144490799201084e-05, "loss": 0.2545, "step": 2472 }, { "epoch": 2.5442386831275723, "grad_norm": 4.862588405609131, "learning_rate": 1.91304817808646e-05, "loss": 0.1202, "step": 2473 }, { "epoch": 2.545267489711934, "grad_norm": 5.714596271514893, "learning_rate": 1.9116479722688123e-05, "loss": 0.1414, "step": 2474 }, { "epoch": 2.5462962962962963, "grad_norm": 4.372839450836182, "learning_rate": 1.9102484647859853e-05, "loss": 0.0874, "step": 2475 }, { "epoch": 2.5473251028806585, "grad_norm": 3.4210047721862793, "learning_rate": 1.9088496579556417e-05, "loss": 0.1012, "step": 2476 }, { "epoch": 2.5483539094650207, "grad_norm": 5.6102824211120605, "learning_rate": 1.907451554094286e-05, "loss": 0.117, "step": 2477 }, { "epoch": 2.549382716049383, "grad_norm": 0.21065284311771393, "learning_rate": 1.9060541555172567e-05, "loss": 0.002, "step": 2478 }, { "epoch": 2.5504115226337447, "grad_norm": 2.502822160720825, "learning_rate": 1.904657464538725e-05, "loss": 0.0382, "step": 2479 }, { "epoch": 2.551440329218107, "grad_norm": 11.489274978637695, "learning_rate": 1.90326148347169e-05, "loss": 0.6951, "step": 2480 }, { "epoch": 2.551440329218107, "eval_Qnli-dev_cosine_accuracy": 0.70703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7235764265060425, "eval_Qnli-dev_cosine_ap": 0.7540424832102679, "eval_Qnli-dev_cosine_f1": 0.7028985507246377, "eval_Qnli-dev_cosine_f1_threshold": 0.6627092361450195, "eval_Qnli-dev_cosine_precision": 0.6139240506329114, "eval_Qnli-dev_cosine_recall": 0.8220338983050848, "eval_Qnli-dev_dot_accuracy": 0.677734375, "eval_Qnli-dev_dot_accuracy_threshold": 327.3599853515625, "eval_Qnli-dev_dot_ap": 0.7065210985613086, "eval_Qnli-dev_dot_f1": 0.6797385620915033, "eval_Qnli-dev_dot_f1_threshold": 267.470458984375, "eval_Qnli-dev_dot_precision": 0.5531914893617021, "eval_Qnli-dev_dot_recall": 0.8813559322033898, "eval_Qnli-dev_euclidean_accuracy": 0.70703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.622426986694336, "eval_Qnli-dev_euclidean_ap": 0.7608195910206452, "eval_Qnli-dev_euclidean_f1": 0.7029126213592233, "eval_Qnli-dev_euclidean_f1_threshold": 16.548377990722656, "eval_Qnli-dev_euclidean_precision": 0.6487455197132617, "eval_Qnli-dev_euclidean_recall": 0.7669491525423728, "eval_Qnli-dev_manhattan_accuracy": 0.716796875, "eval_Qnli-dev_manhattan_accuracy_threshold": 323.97406005859375, "eval_Qnli-dev_manhattan_ap": 0.764546497634706, "eval_Qnli-dev_manhattan_f1": 0.7022900763358778, "eval_Qnli-dev_manhattan_f1_threshold": 349.9832458496094, "eval_Qnli-dev_manhattan_precision": 0.6388888888888888, "eval_Qnli-dev_manhattan_recall": 0.7796610169491526, "eval_Qnli-dev_max_accuracy": 0.716796875, "eval_Qnli-dev_max_accuracy_threshold": 327.3599853515625, "eval_Qnli-dev_max_ap": 0.764546497634706, "eval_Qnli-dev_max_f1": 0.7029126213592233, "eval_Qnli-dev_max_f1_threshold": 349.9832458496094, "eval_Qnli-dev_max_precision": 0.6487455197132617, "eval_Qnli-dev_max_recall": 0.8813559322033898, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8546959161758423, "eval_allNLI-dev_cosine_ap": 0.6281747307519334, "eval_allNLI-dev_cosine_f1": 0.6283185840707965, "eval_allNLI-dev_cosine_f1_threshold": 0.6967825889587402, "eval_allNLI-dev_cosine_precision": 0.5089605734767025, "eval_allNLI-dev_cosine_recall": 0.8208092485549133, "eval_allNLI-dev_dot_accuracy": 0.708984375, "eval_allNLI-dev_dot_accuracy_threshold": 349.8631591796875, "eval_allNLI-dev_dot_ap": 0.5628056401151449, "eval_allNLI-dev_dot_f1": 0.5871121718377088, "eval_allNLI-dev_dot_f1_threshold": 293.33551025390625, "eval_allNLI-dev_dot_precision": 0.5, "eval_allNLI-dev_dot_recall": 0.7109826589595376, "eval_allNLI-dev_euclidean_accuracy": 0.732421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.06941032409668, "eval_allNLI-dev_euclidean_ap": 0.6384589662241201, "eval_allNLI-dev_euclidean_f1": 0.6483050847457628, "eval_allNLI-dev_euclidean_f1_threshold": 16.427034378051758, "eval_allNLI-dev_euclidean_precision": 0.5117056856187291, "eval_allNLI-dev_euclidean_recall": 0.884393063583815, "eval_allNLI-dev_manhattan_accuracy": 0.7265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 249.3073272705078, "eval_allNLI-dev_manhattan_ap": 0.6353373798330103, "eval_allNLI-dev_manhattan_f1": 0.6422018348623854, "eval_allNLI-dev_manhattan_f1_threshold": 327.3839416503906, "eval_allNLI-dev_manhattan_precision": 0.532319391634981, "eval_allNLI-dev_manhattan_recall": 0.8092485549132948, "eval_allNLI-dev_max_accuracy": 0.732421875, "eval_allNLI-dev_max_accuracy_threshold": 349.8631591796875, "eval_allNLI-dev_max_ap": 0.6384589662241201, "eval_allNLI-dev_max_f1": 0.6483050847457628, "eval_allNLI-dev_max_f1_threshold": 327.3839416503906, "eval_allNLI-dev_max_precision": 0.532319391634981, "eval_allNLI-dev_max_recall": 0.884393063583815, "eval_sequential_score": 0.764546497634706, "eval_sts-test_pearson_cosine": 0.8510650247778009, "eval_sts-test_pearson_dot": 0.8371268204952556, "eval_sts-test_pearson_euclidean": 0.8754890924438443, "eval_sts-test_pearson_manhattan": 0.87360841059011, "eval_sts-test_pearson_max": 0.8754890924438443, "eval_sts-test_spearman_cosine": 0.876451932807672, "eval_sts-test_spearman_dot": 0.831523143162333, "eval_sts-test_spearman_euclidean": 0.8712764941790182, "eval_sts-test_spearman_manhattan": 0.8695444861093868, "eval_sts-test_spearman_max": 0.876451932807672, "eval_vitaminc-pairs_loss": 3.1325862407684326, "eval_vitaminc-pairs_runtime": 3.2327, "eval_vitaminc-pairs_samples_per_second": 39.595, "eval_vitaminc-pairs_steps_per_second": 0.309, "step": 2480 }, { "epoch": 2.551440329218107, "eval_negation-triplets_loss": 0.9421901702880859, "eval_negation-triplets_runtime": 0.7662, "eval_negation-triplets_samples_per_second": 167.056, "eval_negation-triplets_steps_per_second": 1.305, "step": 2480 }, { "epoch": 2.551440329218107, "eval_scitail-pairs-pos_loss": 0.14855390787124634, "eval_scitail-pairs-pos_runtime": 0.918, "eval_scitail-pairs-pos_samples_per_second": 139.434, "eval_scitail-pairs-pos_steps_per_second": 1.089, "step": 2480 }, { "epoch": 2.551440329218107, "eval_scitail-pairs-qa_loss": 0.0005054974462836981, "eval_scitail-pairs-qa_runtime": 0.6202, "eval_scitail-pairs-qa_samples_per_second": 206.379, "eval_scitail-pairs-qa_steps_per_second": 1.612, "step": 2480 }, { "epoch": 2.551440329218107, "eval_xsum-pairs_loss": 0.29268449544906616, "eval_xsum-pairs_runtime": 3.038, "eval_xsum-pairs_samples_per_second": 42.134, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2480 }, { "epoch": 2.551440329218107, "eval_sciq_pairs_loss": 0.10132085531949997, "eval_sciq_pairs_runtime": 3.5482, "eval_sciq_pairs_samples_per_second": 36.074, "eval_sciq_pairs_steps_per_second": 0.282, "step": 2480 }, { "epoch": 2.551440329218107, "eval_qasc_pairs_loss": 0.1551382690668106, "eval_qasc_pairs_runtime": 0.6228, "eval_qasc_pairs_samples_per_second": 205.524, "eval_qasc_pairs_steps_per_second": 1.606, "step": 2480 }, { "epoch": 2.551440329218107, "eval_openbookqa_pairs_loss": 0.7559497952461243, "eval_openbookqa_pairs_runtime": 0.6147, "eval_openbookqa_pairs_samples_per_second": 208.226, "eval_openbookqa_pairs_steps_per_second": 1.627, "step": 2480 }, { "epoch": 2.551440329218107, "eval_msmarco_pairs_loss": 0.7051388621330261, "eval_msmarco_pairs_runtime": 1.5364, "eval_msmarco_pairs_samples_per_second": 83.312, "eval_msmarco_pairs_steps_per_second": 0.651, "step": 2480 }, { "epoch": 2.551440329218107, "eval_nq_pairs_loss": 0.667251467704773, "eval_nq_pairs_runtime": 2.9014, "eval_nq_pairs_samples_per_second": 44.116, "eval_nq_pairs_steps_per_second": 0.345, "step": 2480 }, { "epoch": 2.551440329218107, "eval_trivia_pairs_loss": 0.7416086792945862, "eval_trivia_pairs_runtime": 3.4386, "eval_trivia_pairs_samples_per_second": 37.224, "eval_trivia_pairs_steps_per_second": 0.291, "step": 2480 }, { "epoch": 2.551440329218107, "eval_gooaq_pairs_loss": 0.3381101191043854, "eval_gooaq_pairs_runtime": 0.9602, "eval_gooaq_pairs_samples_per_second": 133.306, "eval_gooaq_pairs_steps_per_second": 1.041, "step": 2480 }, { "epoch": 2.551440329218107, "eval_paws-pos_loss": 0.021783526986837387, "eval_paws-pos_runtime": 0.706, "eval_paws-pos_samples_per_second": 181.293, "eval_paws-pos_steps_per_second": 1.416, "step": 2480 }, { "epoch": 2.551440329218107, "eval_global_dataset_loss": 0.4171276092529297, "eval_global_dataset_runtime": 13.4191, "eval_global_dataset_samples_per_second": 31.001, "eval_global_dataset_steps_per_second": 0.298, "step": 2480 }, { "epoch": 2.552469135802469, "grad_norm": 3.4090147018432617, "learning_rate": 1.901866214627976e-05, "loss": 0.046, "step": 2481 }, { "epoch": 2.5534979423868314, "grad_norm": 4.396731853485107, "learning_rate": 1.900471660318227e-05, "loss": 0.1056, "step": 2482 }, { "epoch": 2.5545267489711936, "grad_norm": 5.821942329406738, "learning_rate": 1.899077822851903e-05, "loss": 0.1582, "step": 2483 }, { "epoch": 2.5555555555555554, "grad_norm": 2.747316360473633, "learning_rate": 1.8976847045372786e-05, "loss": 0.041, "step": 2484 }, { "epoch": 2.5565843621399176, "grad_norm": 3.0699405670166016, "learning_rate": 1.896292307681436e-05, "loss": 0.0631, "step": 2485 }, { "epoch": 2.55761316872428, "grad_norm": 0.045280568301677704, "learning_rate": 1.8949006345902635e-05, "loss": 0.0004, "step": 2486 }, { "epoch": 2.558641975308642, "grad_norm": 2.42556095123291, "learning_rate": 1.8935096875684504e-05, "loss": 0.0251, "step": 2487 }, { "epoch": 2.5596707818930042, "grad_norm": 3.1204159259796143, "learning_rate": 1.892119468919484e-05, "loss": 0.0449, "step": 2488 }, { "epoch": 2.560699588477366, "grad_norm": 6.2450761795043945, "learning_rate": 1.8907299809456446e-05, "loss": 0.2126, "step": 2489 }, { "epoch": 2.5617283950617287, "grad_norm": 0.7859638929367065, "learning_rate": 1.889341225948003e-05, "loss": 0.0108, "step": 2490 }, { "epoch": 2.5627572016460904, "grad_norm": 2.73036527633667, "learning_rate": 1.8879532062264164e-05, "loss": 0.0314, "step": 2491 }, { "epoch": 2.5637860082304527, "grad_norm": 1.6652979850769043, "learning_rate": 1.886565924079523e-05, "loss": 0.0164, "step": 2492 }, { "epoch": 2.564814814814815, "grad_norm": 7.437589168548584, "learning_rate": 1.885179381804742e-05, "loss": 0.2823, "step": 2493 }, { "epoch": 2.565843621399177, "grad_norm": 3.439244508743286, "learning_rate": 1.883793581698265e-05, "loss": 0.0704, "step": 2494 }, { "epoch": 2.5668724279835393, "grad_norm": 0.3689349889755249, "learning_rate": 1.882408526055056e-05, "loss": 0.0027, "step": 2495 }, { "epoch": 2.567901234567901, "grad_norm": 6.352327346801758, "learning_rate": 1.8810242171688445e-05, "loss": 0.1596, "step": 2496 }, { "epoch": 2.5689300411522633, "grad_norm": 2.9392714500427246, "learning_rate": 1.879640657332125e-05, "loss": 0.0535, "step": 2497 }, { "epoch": 2.5699588477366255, "grad_norm": 6.382355213165283, "learning_rate": 1.878257848836151e-05, "loss": 0.2505, "step": 2498 }, { "epoch": 2.5709876543209877, "grad_norm": 5.4166646003723145, "learning_rate": 1.8768757939709314e-05, "loss": 0.1589, "step": 2499 }, { "epoch": 2.57201646090535, "grad_norm": 6.669981479644775, "learning_rate": 1.8754944950252273e-05, "loss": 0.2284, "step": 2500 }, { "epoch": 2.57201646090535, "eval_Qnli-dev_cosine_accuracy": 0.703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7508093118667603, "eval_Qnli-dev_cosine_ap": 0.75389562409459, "eval_Qnli-dev_cosine_f1": 0.7065217391304347, "eval_Qnli-dev_cosine_f1_threshold": 0.6699330806732178, "eval_Qnli-dev_cosine_precision": 0.6170886075949367, "eval_Qnli-dev_cosine_recall": 0.826271186440678, "eval_Qnli-dev_dot_accuracy": 0.685546875, "eval_Qnli-dev_dot_accuracy_threshold": 329.80792236328125, "eval_Qnli-dev_dot_ap": 0.7032755465478735, "eval_Qnli-dev_dot_f1": 0.6814814814814815, "eval_Qnli-dev_dot_f1_threshold": 299.5719909667969, "eval_Qnli-dev_dot_precision": 0.6052631578947368, "eval_Qnli-dev_dot_recall": 0.7796610169491526, "eval_Qnli-dev_euclidean_accuracy": 0.712890625, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.958727836608887, "eval_Qnli-dev_euclidean_ap": 0.7606991166855714, "eval_Qnli-dev_euclidean_f1": 0.707635009310987, "eval_Qnli-dev_euclidean_f1_threshold": 16.814437866210938, "eval_Qnli-dev_euclidean_precision": 0.6312292358803987, "eval_Qnli-dev_euclidean_recall": 0.8050847457627118, "eval_Qnli-dev_manhattan_accuracy": 0.70703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 330.868896484375, "eval_Qnli-dev_manhattan_ap": 0.7647529526432109, "eval_Qnli-dev_manhattan_f1": 0.7080979284369116, "eval_Qnli-dev_manhattan_f1_threshold": 351.5562744140625, "eval_Qnli-dev_manhattan_precision": 0.6372881355932203, "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, "eval_Qnli-dev_max_accuracy": 0.712890625, "eval_Qnli-dev_max_accuracy_threshold": 330.868896484375, "eval_Qnli-dev_max_ap": 0.7647529526432109, "eval_Qnli-dev_max_f1": 0.7080979284369116, "eval_Qnli-dev_max_f1_threshold": 351.5562744140625, "eval_Qnli-dev_max_precision": 0.6372881355932203, "eval_Qnli-dev_max_recall": 0.826271186440678, "eval_allNLI-dev_cosine_accuracy": 0.73046875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8467543125152588, "eval_allNLI-dev_cosine_ap": 0.6302371293598495, "eval_allNLI-dev_cosine_f1": 0.6396396396396398, "eval_allNLI-dev_cosine_f1_threshold": 0.7136609554290771, "eval_allNLI-dev_cosine_precision": 0.5239852398523985, "eval_allNLI-dev_cosine_recall": 0.8208092485549133, "eval_allNLI-dev_dot_accuracy": 0.708984375, "eval_allNLI-dev_dot_accuracy_threshold": 344.5650939941406, "eval_allNLI-dev_dot_ap": 0.5712397273103638, "eval_allNLI-dev_dot_f1": 0.5934959349593496, "eval_allNLI-dev_dot_f1_threshold": 274.1793212890625, "eval_allNLI-dev_dot_precision": 0.45768025078369906, "eval_allNLI-dev_dot_recall": 0.8439306358381503, "eval_allNLI-dev_euclidean_accuracy": 0.728515625, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.898115158081055, "eval_allNLI-dev_euclidean_ap": 0.6395675204903274, "eval_allNLI-dev_euclidean_f1": 0.6491228070175439, "eval_allNLI-dev_euclidean_f1_threshold": 15.7131986618042, "eval_allNLI-dev_euclidean_precision": 0.5229681978798587, "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 248.5677032470703, "eval_allNLI-dev_manhattan_ap": 0.6393176330798684, "eval_allNLI-dev_manhattan_f1": 0.6471910112359551, "eval_allNLI-dev_manhattan_f1_threshold": 323.1290283203125, "eval_allNLI-dev_manhattan_precision": 0.5294117647058824, "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, "eval_allNLI-dev_max_accuracy": 0.732421875, "eval_allNLI-dev_max_accuracy_threshold": 344.5650939941406, "eval_allNLI-dev_max_ap": 0.6395675204903274, "eval_allNLI-dev_max_f1": 0.6491228070175439, "eval_allNLI-dev_max_f1_threshold": 323.1290283203125, "eval_allNLI-dev_max_precision": 0.5294117647058824, "eval_allNLI-dev_max_recall": 0.8554913294797688, "eval_sequential_score": 0.7647529526432109, "eval_sts-test_pearson_cosine": 0.8505961008679468, "eval_sts-test_pearson_dot": 0.8358389714977501, "eval_sts-test_pearson_euclidean": 0.8751416725717206, "eval_sts-test_pearson_manhattan": 0.8729980849539773, "eval_sts-test_pearson_max": 0.8751416725717206, "eval_sts-test_spearman_cosine": 0.8766837443416795, "eval_sts-test_spearman_dot": 0.8315599734504581, "eval_sts-test_spearman_euclidean": 0.8714117772422043, "eval_sts-test_spearman_manhattan": 0.8695461866573196, "eval_sts-test_spearman_max": 0.8766837443416795, "eval_vitaminc-pairs_loss": 3.2234039306640625, "eval_vitaminc-pairs_runtime": 3.2109, "eval_vitaminc-pairs_samples_per_second": 39.864, "eval_vitaminc-pairs_steps_per_second": 0.311, "step": 2500 }, { "epoch": 2.57201646090535, "eval_negation-triplets_loss": 0.9287065863609314, "eval_negation-triplets_runtime": 0.7814, "eval_negation-triplets_samples_per_second": 163.818, "eval_negation-triplets_steps_per_second": 1.28, "step": 2500 }, { "epoch": 2.57201646090535, "eval_scitail-pairs-pos_loss": 0.14092357456684113, "eval_scitail-pairs-pos_runtime": 0.9253, "eval_scitail-pairs-pos_samples_per_second": 138.329, "eval_scitail-pairs-pos_steps_per_second": 1.081, "step": 2500 }, { "epoch": 2.57201646090535, "eval_scitail-pairs-qa_loss": 0.0007257835823111236, "eval_scitail-pairs-qa_runtime": 0.6388, "eval_scitail-pairs-qa_samples_per_second": 200.389, "eval_scitail-pairs-qa_steps_per_second": 1.566, "step": 2500 }, { "epoch": 2.57201646090535, "eval_xsum-pairs_loss": 0.28815510869026184, "eval_xsum-pairs_runtime": 3.0438, "eval_xsum-pairs_samples_per_second": 42.053, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2500 }, { "epoch": 2.57201646090535, "eval_sciq_pairs_loss": 0.09281651675701141, "eval_sciq_pairs_runtime": 3.5332, "eval_sciq_pairs_samples_per_second": 36.228, "eval_sciq_pairs_steps_per_second": 0.283, "step": 2500 }, { "epoch": 2.57201646090535, "eval_qasc_pairs_loss": 0.14021874964237213, "eval_qasc_pairs_runtime": 0.6203, "eval_qasc_pairs_samples_per_second": 206.341, "eval_qasc_pairs_steps_per_second": 1.612, "step": 2500 }, { "epoch": 2.57201646090535, "eval_openbookqa_pairs_loss": 0.7466042637825012, "eval_openbookqa_pairs_runtime": 0.5976, "eval_openbookqa_pairs_samples_per_second": 214.181, "eval_openbookqa_pairs_steps_per_second": 1.673, "step": 2500 }, { "epoch": 2.57201646090535, "eval_msmarco_pairs_loss": 0.7611977458000183, "eval_msmarco_pairs_runtime": 1.5286, "eval_msmarco_pairs_samples_per_second": 83.739, "eval_msmarco_pairs_steps_per_second": 0.654, "step": 2500 }, { "epoch": 2.57201646090535, "eval_nq_pairs_loss": 0.6677074432373047, "eval_nq_pairs_runtime": 2.9071, "eval_nq_pairs_samples_per_second": 44.031, "eval_nq_pairs_steps_per_second": 0.344, "step": 2500 }, { "epoch": 2.57201646090535, "eval_trivia_pairs_loss": 0.8220326900482178, "eval_trivia_pairs_runtime": 3.4497, "eval_trivia_pairs_samples_per_second": 37.105, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2500 }, { "epoch": 2.57201646090535, "eval_gooaq_pairs_loss": 0.34412798285484314, "eval_gooaq_pairs_runtime": 0.9551, "eval_gooaq_pairs_samples_per_second": 134.023, "eval_gooaq_pairs_steps_per_second": 1.047, "step": 2500 }, { "epoch": 2.57201646090535, "eval_paws-pos_loss": 0.02183370850980282, "eval_paws-pos_runtime": 0.7089, "eval_paws-pos_samples_per_second": 180.556, "eval_paws-pos_steps_per_second": 1.411, "step": 2500 }, { "epoch": 2.57201646090535, "eval_global_dataset_loss": 0.42913469672203064, "eval_global_dataset_runtime": 13.4248, "eval_global_dataset_samples_per_second": 30.987, "eval_global_dataset_steps_per_second": 0.298, "step": 2500 }, { "epoch": 2.5730452674897117, "grad_norm": 6.166476726531982, "learning_rate": 1.8741139542865475e-05, "loss": 0.3454, "step": 2501 }, { "epoch": 2.574074074074074, "grad_norm": 5.319652080535889, "learning_rate": 1.8727341740411467e-05, "loss": 0.1877, "step": 2502 }, { "epoch": 2.575102880658436, "grad_norm": 4.261229991912842, "learning_rate": 1.8713551565740183e-05, "loss": 0.0757, "step": 2503 }, { "epoch": 2.5761316872427984, "grad_norm": 9.842738151550293, "learning_rate": 1.869976904168893e-05, "loss": 0.5875, "step": 2504 }, { "epoch": 2.5771604938271606, "grad_norm": 2.6120381355285645, "learning_rate": 1.8685994191082353e-05, "loss": 0.0567, "step": 2505 }, { "epoch": 2.5781893004115224, "grad_norm": 10.804230690002441, "learning_rate": 1.867222703673238e-05, "loss": 0.4306, "step": 2506 }, { "epoch": 2.5792181069958846, "grad_norm": 3.8133013248443604, "learning_rate": 1.86584676014382e-05, "loss": 0.0697, "step": 2507 }, { "epoch": 2.580246913580247, "grad_norm": 3.132995367050171, "learning_rate": 1.8644715907986223e-05, "loss": 0.0485, "step": 2508 }, { "epoch": 2.581275720164609, "grad_norm": 6.798681735992432, "learning_rate": 1.8630971979150018e-05, "loss": 0.2556, "step": 2509 }, { "epoch": 2.5823045267489713, "grad_norm": 2.599490165710449, "learning_rate": 1.8617235837690317e-05, "loss": 0.126, "step": 2510 }, { "epoch": 2.5833333333333335, "grad_norm": 3.5127954483032227, "learning_rate": 1.860350750635495e-05, "loss": 0.0554, "step": 2511 }, { "epoch": 2.5843621399176957, "grad_norm": 8.277996063232422, "learning_rate": 1.8589787007878803e-05, "loss": 0.3234, "step": 2512 }, { "epoch": 2.5853909465020575, "grad_norm": 2.4647581577301025, "learning_rate": 1.8576074364983802e-05, "loss": 0.0504, "step": 2513 }, { "epoch": 2.5864197530864197, "grad_norm": 8.635161399841309, "learning_rate": 1.856236960037886e-05, "loss": 0.318, "step": 2514 }, { "epoch": 2.587448559670782, "grad_norm": 4.386387348175049, "learning_rate": 1.8548672736759843e-05, "loss": 0.0953, "step": 2515 }, { "epoch": 2.588477366255144, "grad_norm": 0.3948823809623718, "learning_rate": 1.8534983796809533e-05, "loss": 0.0039, "step": 2516 }, { "epoch": 2.5895061728395063, "grad_norm": 3.7030575275421143, "learning_rate": 1.8521302803197583e-05, "loss": 0.071, "step": 2517 }, { "epoch": 2.590534979423868, "grad_norm": 5.527285575866699, "learning_rate": 1.8507629778580503e-05, "loss": 0.1703, "step": 2518 }, { "epoch": 2.5915637860082303, "grad_norm": 13.0263090133667, "learning_rate": 1.8493964745601586e-05, "loss": 0.7024, "step": 2519 }, { "epoch": 2.5925925925925926, "grad_norm": 9.349004745483398, "learning_rate": 1.8480307726890904e-05, "loss": 0.435, "step": 2520 }, { "epoch": 2.5925925925925926, "eval_Qnli-dev_cosine_accuracy": 0.708984375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7251886129379272, "eval_Qnli-dev_cosine_ap": 0.748327681996432, "eval_Qnli-dev_cosine_f1": 0.7020109689213895, "eval_Qnli-dev_cosine_f1_threshold": 0.688901424407959, "eval_Qnli-dev_cosine_precision": 0.617363344051447, "eval_Qnli-dev_cosine_recall": 0.8135593220338984, "eval_Qnli-dev_dot_accuracy": 0.6796875, "eval_Qnli-dev_dot_accuracy_threshold": 340.33660888671875, "eval_Qnli-dev_dot_ap": 0.6793311070321438, "eval_Qnli-dev_dot_f1": 0.6759098786828422, "eval_Qnli-dev_dot_f1_threshold": 289.5785217285156, "eval_Qnli-dev_dot_precision": 0.5718475073313783, "eval_Qnli-dev_dot_recall": 0.826271186440678, "eval_Qnli-dev_euclidean_accuracy": 0.703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.005366325378418, "eval_Qnli-dev_euclidean_ap": 0.7567749567491601, "eval_Qnli-dev_euclidean_f1": 0.7054263565891473, "eval_Qnli-dev_euclidean_f1_threshold": 16.299209594726562, "eval_Qnli-dev_euclidean_precision": 0.65, "eval_Qnli-dev_euclidean_recall": 0.7711864406779662, "eval_Qnli-dev_manhattan_accuracy": 0.708984375, "eval_Qnli-dev_manhattan_accuracy_threshold": 323.7666320800781, "eval_Qnli-dev_manhattan_ap": 0.7609706736263989, "eval_Qnli-dev_manhattan_f1": 0.7060998151571164, "eval_Qnli-dev_manhattan_f1_threshold": 351.2233581542969, "eval_Qnli-dev_manhattan_precision": 0.6262295081967213, "eval_Qnli-dev_manhattan_recall": 0.809322033898305, "eval_Qnli-dev_max_accuracy": 0.708984375, "eval_Qnli-dev_max_accuracy_threshold": 340.33660888671875, "eval_Qnli-dev_max_ap": 0.7609706736263989, "eval_Qnli-dev_max_f1": 0.7060998151571164, "eval_Qnli-dev_max_f1_threshold": 351.2233581542969, "eval_Qnli-dev_max_precision": 0.65, "eval_Qnli-dev_max_recall": 0.826271186440678, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8538202047348022, "eval_allNLI-dev_cosine_ap": 0.6214566976645772, "eval_allNLI-dev_cosine_f1": 0.625, "eval_allNLI-dev_cosine_f1_threshold": 0.7159340381622314, "eval_allNLI-dev_cosine_precision": 0.509090909090909, "eval_allNLI-dev_cosine_recall": 0.8092485549132948, "eval_allNLI-dev_dot_accuracy": 0.701171875, "eval_allNLI-dev_dot_accuracy_threshold": 353.05670166015625, "eval_allNLI-dev_dot_ap": 0.5597025212427812, "eval_allNLI-dev_dot_f1": 0.5868263473053892, "eval_allNLI-dev_dot_f1_threshold": 270.795654296875, "eval_allNLI-dev_dot_precision": 0.4481707317073171, "eval_allNLI-dev_dot_recall": 0.8497109826589595, "eval_allNLI-dev_euclidean_accuracy": 0.728515625, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.772331237792969, "eval_allNLI-dev_euclidean_ap": 0.6341426110785574, "eval_allNLI-dev_euclidean_f1": 0.6391304347826088, "eval_allNLI-dev_euclidean_f1_threshold": 15.549590110778809, "eval_allNLI-dev_euclidean_precision": 0.5121951219512195, "eval_allNLI-dev_euclidean_recall": 0.8497109826589595, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 244.70553588867188, "eval_allNLI-dev_manhattan_ap": 0.6314637809247962, "eval_allNLI-dev_manhattan_f1": 0.6369710467706012, "eval_allNLI-dev_manhattan_f1_threshold": 323.2559509277344, "eval_allNLI-dev_manhattan_precision": 0.5181159420289855, "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, "eval_allNLI-dev_max_accuracy": 0.732421875, "eval_allNLI-dev_max_accuracy_threshold": 353.05670166015625, "eval_allNLI-dev_max_ap": 0.6341426110785574, "eval_allNLI-dev_max_f1": 0.6391304347826088, "eval_allNLI-dev_max_f1_threshold": 323.2559509277344, "eval_allNLI-dev_max_precision": 0.5181159420289855, "eval_allNLI-dev_max_recall": 0.8497109826589595, "eval_sequential_score": 0.7609706736263989, "eval_sts-test_pearson_cosine": 0.8494417270705765, "eval_sts-test_pearson_dot": 0.8306741764067529, "eval_sts-test_pearson_euclidean": 0.8768724691331442, "eval_sts-test_pearson_manhattan": 0.874886252119832, "eval_sts-test_pearson_max": 0.8768724691331442, "eval_sts-test_spearman_cosine": 0.8789788575333988, "eval_sts-test_spearman_dot": 0.8285574770699249, "eval_sts-test_spearman_euclidean": 0.8751109612600788, "eval_sts-test_spearman_manhattan": 0.8725621531675317, "eval_sts-test_spearman_max": 0.8789788575333988, "eval_vitaminc-pairs_loss": 3.379549264907837, "eval_vitaminc-pairs_runtime": 3.2572, "eval_vitaminc-pairs_samples_per_second": 39.297, "eval_vitaminc-pairs_steps_per_second": 0.307, "step": 2520 }, { "epoch": 2.5925925925925926, "eval_negation-triplets_loss": 0.9225123524665833, "eval_negation-triplets_runtime": 0.7796, "eval_negation-triplets_samples_per_second": 164.176, "eval_negation-triplets_steps_per_second": 1.283, "step": 2520 }, { "epoch": 2.5925925925925926, "eval_scitail-pairs-pos_loss": 0.14036637544631958, "eval_scitail-pairs-pos_runtime": 0.9591, "eval_scitail-pairs-pos_samples_per_second": 133.464, "eval_scitail-pairs-pos_steps_per_second": 1.043, "step": 2520 }, { "epoch": 2.5925925925925926, "eval_scitail-pairs-qa_loss": 0.0007249915506690741, "eval_scitail-pairs-qa_runtime": 0.6179, "eval_scitail-pairs-qa_samples_per_second": 207.158, "eval_scitail-pairs-qa_steps_per_second": 1.618, "step": 2520 }, { "epoch": 2.5925925925925926, "eval_xsum-pairs_loss": 0.2940075397491455, "eval_xsum-pairs_runtime": 3.0367, "eval_xsum-pairs_samples_per_second": 42.151, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2520 }, { "epoch": 2.5925925925925926, "eval_sciq_pairs_loss": 0.08835644274950027, "eval_sciq_pairs_runtime": 3.5629, "eval_sciq_pairs_samples_per_second": 35.926, "eval_sciq_pairs_steps_per_second": 0.281, "step": 2520 }, { "epoch": 2.5925925925925926, "eval_qasc_pairs_loss": 0.13494905829429626, "eval_qasc_pairs_runtime": 0.6263, "eval_qasc_pairs_samples_per_second": 204.363, "eval_qasc_pairs_steps_per_second": 1.597, "step": 2520 }, { "epoch": 2.5925925925925926, "eval_openbookqa_pairs_loss": 0.8005498051643372, "eval_openbookqa_pairs_runtime": 0.5979, "eval_openbookqa_pairs_samples_per_second": 214.082, "eval_openbookqa_pairs_steps_per_second": 1.673, "step": 2520 }, { "epoch": 2.5925925925925926, "eval_msmarco_pairs_loss": 0.697590172290802, "eval_msmarco_pairs_runtime": 1.5285, "eval_msmarco_pairs_samples_per_second": 83.741, "eval_msmarco_pairs_steps_per_second": 0.654, "step": 2520 }, { "epoch": 2.5925925925925926, "eval_nq_pairs_loss": 0.6648739576339722, "eval_nq_pairs_runtime": 2.8993, "eval_nq_pairs_samples_per_second": 44.148, "eval_nq_pairs_steps_per_second": 0.345, "step": 2520 }, { "epoch": 2.5925925925925926, "eval_trivia_pairs_loss": 0.8040751814842224, "eval_trivia_pairs_runtime": 3.4418, "eval_trivia_pairs_samples_per_second": 37.19, "eval_trivia_pairs_steps_per_second": 0.291, "step": 2520 }, { "epoch": 2.5925925925925926, "eval_gooaq_pairs_loss": 0.3208771347999573, "eval_gooaq_pairs_runtime": 0.9599, "eval_gooaq_pairs_samples_per_second": 133.345, "eval_gooaq_pairs_steps_per_second": 1.042, "step": 2520 }, { "epoch": 2.5925925925925926, "eval_paws-pos_loss": 0.021342573687434196, "eval_paws-pos_runtime": 0.7082, "eval_paws-pos_samples_per_second": 180.736, "eval_paws-pos_steps_per_second": 1.412, "step": 2520 }, { "epoch": 2.5925925925925926, "eval_global_dataset_loss": 0.4530204236507416, "eval_global_dataset_runtime": 13.4367, "eval_global_dataset_samples_per_second": 30.96, "eval_global_dataset_steps_per_second": 0.298, "step": 2520 }, { "epoch": 2.593621399176955, "grad_norm": 1.7578164339065552, "learning_rate": 1.8466658745065253e-05, "loss": 0.0269, "step": 2521 }, { "epoch": 2.594650205761317, "grad_norm": 2.2386653423309326, "learning_rate": 1.845301782272812e-05, "loss": 0.0366, "step": 2522 }, { "epoch": 2.5956790123456788, "grad_norm": 4.242228984832764, "learning_rate": 1.843938498246964e-05, "loss": 0.0975, "step": 2523 }, { "epoch": 2.596707818930041, "grad_norm": 8.557851791381836, "learning_rate": 1.8425760246866573e-05, "loss": 0.3245, "step": 2524 }, { "epoch": 2.597736625514403, "grad_norm": 9.70186996459961, "learning_rate": 1.8412143638482252e-05, "loss": 0.4127, "step": 2525 }, { "epoch": 2.5987654320987654, "grad_norm": 2.1967456340789795, "learning_rate": 1.8398535179866544e-05, "loss": 0.0285, "step": 2526 }, { "epoch": 2.5997942386831276, "grad_norm": 4.0927815437316895, "learning_rate": 1.8384934893555843e-05, "loss": 0.0737, "step": 2527 }, { "epoch": 2.60082304526749, "grad_norm": 9.168094635009766, "learning_rate": 1.837134280207297e-05, "loss": 0.3748, "step": 2528 }, { "epoch": 2.601851851851852, "grad_norm": 11.000916481018066, "learning_rate": 1.835775892792721e-05, "loss": 0.4902, "step": 2529 }, { "epoch": 2.602880658436214, "grad_norm": 0.0416768416762352, "learning_rate": 1.8344183293614233e-05, "loss": 0.0005, "step": 2530 }, { "epoch": 2.603909465020576, "grad_norm": 11.488398551940918, "learning_rate": 1.833061592161605e-05, "loss": 1.1583, "step": 2531 }, { "epoch": 2.6049382716049383, "grad_norm": 2.7752737998962402, "learning_rate": 1.8317056834400997e-05, "loss": 0.0334, "step": 2532 }, { "epoch": 2.6059670781893005, "grad_norm": 2.4390814304351807, "learning_rate": 1.8303506054423688e-05, "loss": 0.053, "step": 2533 }, { "epoch": 2.6069958847736627, "grad_norm": 3.8222033977508545, "learning_rate": 1.8289963604124984e-05, "loss": 0.1288, "step": 2534 }, { "epoch": 2.6080246913580245, "grad_norm": 5.402822971343994, "learning_rate": 1.8276429505931945e-05, "loss": 0.1425, "step": 2535 }, { "epoch": 2.6090534979423867, "grad_norm": 4.8009562492370605, "learning_rate": 1.8262903782257816e-05, "loss": 0.1451, "step": 2536 }, { "epoch": 2.610082304526749, "grad_norm": 5.965047359466553, "learning_rate": 1.8249386455501952e-05, "loss": 0.1908, "step": 2537 }, { "epoch": 2.611111111111111, "grad_norm": 0.5453029274940491, "learning_rate": 1.8235877548049805e-05, "loss": 0.0044, "step": 2538 }, { "epoch": 2.6121399176954734, "grad_norm": 5.0793538093566895, "learning_rate": 1.8222377082272904e-05, "loss": 0.2378, "step": 2539 }, { "epoch": 2.613168724279835, "grad_norm": 4.566463470458984, "learning_rate": 1.8208885080528774e-05, "loss": 0.1066, "step": 2540 }, { "epoch": 2.613168724279835, "eval_Qnli-dev_cosine_accuracy": 0.697265625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7347972989082336, "eval_Qnli-dev_cosine_ap": 0.7416243803529952, "eval_Qnli-dev_cosine_f1": 0.7039711191335739, "eval_Qnli-dev_cosine_f1_threshold": 0.6872456669807434, "eval_Qnli-dev_cosine_precision": 0.6132075471698113, "eval_Qnli-dev_cosine_recall": 0.826271186440678, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 334.18353271484375, "eval_Qnli-dev_dot_ap": 0.6809839038409365, "eval_Qnli-dev_dot_f1": 0.6714031971580817, "eval_Qnli-dev_dot_f1_threshold": 295.66534423828125, "eval_Qnli-dev_dot_precision": 0.5779816513761468, "eval_Qnli-dev_dot_recall": 0.8008474576271186, "eval_Qnli-dev_euclidean_accuracy": 0.708984375, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.0936279296875, "eval_Qnli-dev_euclidean_ap": 0.7511411683064413, "eval_Qnli-dev_euclidean_f1": 0.6996336996336996, "eval_Qnli-dev_euclidean_f1_threshold": 16.44924545288086, "eval_Qnli-dev_euclidean_precision": 0.6161290322580645, "eval_Qnli-dev_euclidean_recall": 0.809322033898305, "eval_Qnli-dev_manhattan_accuracy": 0.70703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 308.08575439453125, "eval_Qnli-dev_manhattan_ap": 0.7550849365274038, "eval_Qnli-dev_manhattan_f1": 0.7110266159695817, "eval_Qnli-dev_manhattan_f1_threshold": 341.57763671875, "eval_Qnli-dev_manhattan_precision": 0.6448275862068965, "eval_Qnli-dev_manhattan_recall": 0.7923728813559322, "eval_Qnli-dev_max_accuracy": 0.708984375, "eval_Qnli-dev_max_accuracy_threshold": 334.18353271484375, "eval_Qnli-dev_max_ap": 0.7550849365274038, "eval_Qnli-dev_max_f1": 0.7110266159695817, "eval_Qnli-dev_max_f1_threshold": 341.57763671875, "eval_Qnli-dev_max_precision": 0.6448275862068965, "eval_Qnli-dev_max_recall": 0.826271186440678, "eval_allNLI-dev_cosine_accuracy": 0.72265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8740963935852051, "eval_allNLI-dev_cosine_ap": 0.6207954004070142, "eval_allNLI-dev_cosine_f1": 0.6305882352941177, "eval_allNLI-dev_cosine_f1_threshold": 0.7428080439567566, "eval_allNLI-dev_cosine_precision": 0.5317460317460317, "eval_allNLI-dev_cosine_recall": 0.7745664739884393, "eval_allNLI-dev_dot_accuracy": 0.693359375, "eval_allNLI-dev_dot_accuracy_threshold": 369.31964111328125, "eval_allNLI-dev_dot_ap": 0.5515733361000523, "eval_allNLI-dev_dot_f1": 0.5858585858585859, "eval_allNLI-dev_dot_f1_threshold": 283.7347717285156, "eval_allNLI-dev_dot_precision": 0.4503105590062112, "eval_allNLI-dev_dot_recall": 0.838150289017341, "eval_allNLI-dev_euclidean_accuracy": 0.744140625, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.991886138916016, "eval_allNLI-dev_euclidean_ap": 0.6339687991970019, "eval_allNLI-dev_euclidean_f1": 0.6458797327394209, "eval_allNLI-dev_euclidean_f1_threshold": 15.262733459472656, "eval_allNLI-dev_euclidean_precision": 0.5253623188405797, "eval_allNLI-dev_euclidean_recall": 0.838150289017341, "eval_allNLI-dev_manhattan_accuracy": 0.736328125, "eval_allNLI-dev_manhattan_accuracy_threshold": 280.542236328125, "eval_allNLI-dev_manhattan_ap": 0.6282077789783579, "eval_allNLI-dev_manhattan_f1": 0.6431718061674009, "eval_allNLI-dev_manhattan_f1_threshold": 323.2728576660156, "eval_allNLI-dev_manhattan_precision": 0.5195729537366548, "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, "eval_allNLI-dev_max_accuracy": 0.744140625, "eval_allNLI-dev_max_accuracy_threshold": 369.31964111328125, "eval_allNLI-dev_max_ap": 0.6339687991970019, "eval_allNLI-dev_max_f1": 0.6458797327394209, "eval_allNLI-dev_max_f1_threshold": 323.2728576660156, "eval_allNLI-dev_max_precision": 0.5317460317460317, "eval_allNLI-dev_max_recall": 0.8439306358381503, "eval_sequential_score": 0.7550849365274038, "eval_sts-test_pearson_cosine": 0.8436892173450835, "eval_sts-test_pearson_dot": 0.8161614056600369, "eval_sts-test_pearson_euclidean": 0.8747850736377845, "eval_sts-test_pearson_manhattan": 0.8725689127354326, "eval_sts-test_pearson_max": 0.8747850736377845, "eval_sts-test_spearman_cosine": 0.8754703138890516, "eval_sts-test_spearman_dot": 0.8092124465469122, "eval_sts-test_spearman_euclidean": 0.8734285823393305, "eval_sts-test_spearman_manhattan": 0.8706312257410156, "eval_sts-test_spearman_max": 0.8754703138890516, "eval_vitaminc-pairs_loss": 3.3029744625091553, "eval_vitaminc-pairs_runtime": 3.2343, "eval_vitaminc-pairs_samples_per_second": 39.576, "eval_vitaminc-pairs_steps_per_second": 0.309, "step": 2540 }, { "epoch": 2.613168724279835, "eval_negation-triplets_loss": 0.9069310426712036, "eval_negation-triplets_runtime": 0.7774, "eval_negation-triplets_samples_per_second": 164.659, "eval_negation-triplets_steps_per_second": 1.286, "step": 2540 }, { "epoch": 2.613168724279835, "eval_scitail-pairs-pos_loss": 0.1403876394033432, "eval_scitail-pairs-pos_runtime": 0.9445, "eval_scitail-pairs-pos_samples_per_second": 135.525, "eval_scitail-pairs-pos_steps_per_second": 1.059, "step": 2540 }, { "epoch": 2.613168724279835, "eval_scitail-pairs-qa_loss": 0.0007205409347079694, "eval_scitail-pairs-qa_runtime": 0.6132, "eval_scitail-pairs-qa_samples_per_second": 208.74, "eval_scitail-pairs-qa_steps_per_second": 1.631, "step": 2540 }, { "epoch": 2.613168724279835, "eval_xsum-pairs_loss": 0.33914807438850403, "eval_xsum-pairs_runtime": 3.0378, "eval_xsum-pairs_samples_per_second": 42.135, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2540 }, { "epoch": 2.613168724279835, "eval_sciq_pairs_loss": 0.09370269626379013, "eval_sciq_pairs_runtime": 3.5523, "eval_sciq_pairs_samples_per_second": 36.033, "eval_sciq_pairs_steps_per_second": 0.282, "step": 2540 }, { "epoch": 2.613168724279835, "eval_qasc_pairs_loss": 0.14020417630672455, "eval_qasc_pairs_runtime": 0.6277, "eval_qasc_pairs_samples_per_second": 203.912, "eval_qasc_pairs_steps_per_second": 1.593, "step": 2540 }, { "epoch": 2.613168724279835, "eval_openbookqa_pairs_loss": 0.7885816097259521, "eval_openbookqa_pairs_runtime": 0.6152, "eval_openbookqa_pairs_samples_per_second": 208.077, "eval_openbookqa_pairs_steps_per_second": 1.626, "step": 2540 }, { "epoch": 2.613168724279835, "eval_msmarco_pairs_loss": 0.69005286693573, "eval_msmarco_pairs_runtime": 1.5287, "eval_msmarco_pairs_samples_per_second": 83.731, "eval_msmarco_pairs_steps_per_second": 0.654, "step": 2540 }, { "epoch": 2.613168724279835, "eval_nq_pairs_loss": 0.644152045249939, "eval_nq_pairs_runtime": 2.9048, "eval_nq_pairs_samples_per_second": 44.065, "eval_nq_pairs_steps_per_second": 0.344, "step": 2540 }, { "epoch": 2.613168724279835, "eval_trivia_pairs_loss": 0.7462302446365356, "eval_trivia_pairs_runtime": 3.4523, "eval_trivia_pairs_samples_per_second": 37.077, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2540 }, { "epoch": 2.613168724279835, "eval_gooaq_pairs_loss": 0.2984876334667206, "eval_gooaq_pairs_runtime": 0.9676, "eval_gooaq_pairs_samples_per_second": 132.281, "eval_gooaq_pairs_steps_per_second": 1.033, "step": 2540 }, { "epoch": 2.613168724279835, "eval_paws-pos_loss": 0.02198866941034794, "eval_paws-pos_runtime": 0.7284, "eval_paws-pos_samples_per_second": 175.727, "eval_paws-pos_steps_per_second": 1.373, "step": 2540 }, { "epoch": 2.613168724279835, "eval_global_dataset_loss": 0.4452175796031952, "eval_global_dataset_runtime": 13.4391, "eval_global_dataset_samples_per_second": 30.954, "eval_global_dataset_steps_per_second": 0.298, "step": 2540 }, { "epoch": 2.6141975308641974, "grad_norm": 4.7599639892578125, "learning_rate": 1.8195401565160936e-05, "loss": 0.1052, "step": 2541 }, { "epoch": 2.6152263374485596, "grad_norm": 2.892470121383667, "learning_rate": 1.8181926558498852e-05, "loss": 0.0689, "step": 2542 }, { "epoch": 2.616255144032922, "grad_norm": 5.532009601593018, "learning_rate": 1.8168460082857903e-05, "loss": 0.1281, "step": 2543 }, { "epoch": 2.617283950617284, "grad_norm": 3.4185428619384766, "learning_rate": 1.8155002160539324e-05, "loss": 0.0802, "step": 2544 }, { "epoch": 2.6183127572016462, "grad_norm": 2.5799851417541504, "learning_rate": 1.814155281383021e-05, "loss": 0.0336, "step": 2545 }, { "epoch": 2.6193415637860085, "grad_norm": 9.6151123046875, "learning_rate": 1.8128112065003422e-05, "loss": 0.3981, "step": 2546 }, { "epoch": 2.6203703703703702, "grad_norm": 3.292311429977417, "learning_rate": 1.8114679936317617e-05, "loss": 0.0528, "step": 2547 }, { "epoch": 2.6213991769547325, "grad_norm": 2.3397133350372314, "learning_rate": 1.810125645001716e-05, "loss": 0.019, "step": 2548 }, { "epoch": 2.6224279835390947, "grad_norm": 5.602199554443359, "learning_rate": 1.808784162833209e-05, "loss": 0.1287, "step": 2549 }, { "epoch": 2.623456790123457, "grad_norm": 8.078383445739746, "learning_rate": 1.807443549347812e-05, "loss": 0.3798, "step": 2550 }, { "epoch": 2.624485596707819, "grad_norm": 0.13280944526195526, "learning_rate": 1.8061038067656566e-05, "loss": 0.002, "step": 2551 }, { "epoch": 2.625514403292181, "grad_norm": 3.115669012069702, "learning_rate": 1.804764937305433e-05, "loss": 0.0509, "step": 2552 }, { "epoch": 2.626543209876543, "grad_norm": 0.5160894393920898, "learning_rate": 1.8034269431843837e-05, "loss": 0.0403, "step": 2553 }, { "epoch": 2.6275720164609053, "grad_norm": 2.8853940963745117, "learning_rate": 1.8020898266183028e-05, "loss": 0.0691, "step": 2554 }, { "epoch": 2.6286008230452675, "grad_norm": 6.755553245544434, "learning_rate": 1.8007535898215322e-05, "loss": 0.2631, "step": 2555 }, { "epoch": 2.6296296296296298, "grad_norm": 13.587359428405762, "learning_rate": 1.7994182350069544e-05, "loss": 1.5158, "step": 2556 }, { "epoch": 2.6306584362139915, "grad_norm": 10.73571491241455, "learning_rate": 1.798083764385993e-05, "loss": 0.7129, "step": 2557 }, { "epoch": 2.6316872427983538, "grad_norm": 0.8754851818084717, "learning_rate": 1.7967501801686066e-05, "loss": 0.0526, "step": 2558 }, { "epoch": 2.632716049382716, "grad_norm": 12.748086929321289, "learning_rate": 1.7954174845632863e-05, "loss": 0.574, "step": 2559 }, { "epoch": 2.633744855967078, "grad_norm": 6.738278865814209, "learning_rate": 1.794085679777052e-05, "loss": 0.2269, "step": 2560 }, { "epoch": 2.633744855967078, "eval_Qnli-dev_cosine_accuracy": 0.69921875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7472108602523804, "eval_Qnli-dev_cosine_ap": 0.7419581770426393, "eval_Qnli-dev_cosine_f1": 0.700348432055749, "eval_Qnli-dev_cosine_f1_threshold": 0.668160080909729, "eval_Qnli-dev_cosine_precision": 0.5946745562130178, "eval_Qnli-dev_cosine_recall": 0.8516949152542372, "eval_Qnli-dev_dot_accuracy": 0.677734375, "eval_Qnli-dev_dot_accuracy_threshold": 330.5736389160156, "eval_Qnli-dev_dot_ap": 0.689383636845089, "eval_Qnli-dev_dot_f1": 0.672661870503597, "eval_Qnli-dev_dot_f1_threshold": 303.59796142578125, "eval_Qnli-dev_dot_precision": 0.584375, "eval_Qnli-dev_dot_recall": 0.7923728813559322, "eval_Qnli-dev_euclidean_accuracy": 0.705078125, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.200478553771973, "eval_Qnli-dev_euclidean_ap": 0.752752505015462, "eval_Qnli-dev_euclidean_f1": 0.7082568807339449, "eval_Qnli-dev_euclidean_f1_threshold": 16.415273666381836, "eval_Qnli-dev_euclidean_precision": 0.6245954692556634, "eval_Qnli-dev_euclidean_recall": 0.8177966101694916, "eval_Qnli-dev_manhattan_accuracy": 0.69921875, "eval_Qnli-dev_manhattan_accuracy_threshold": 308.05975341796875, "eval_Qnli-dev_manhattan_ap": 0.7543040216671292, "eval_Qnli-dev_manhattan_f1": 0.7080979284369116, "eval_Qnli-dev_manhattan_f1_threshold": 342.9434814453125, "eval_Qnli-dev_manhattan_precision": 0.6372881355932203, "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, "eval_Qnli-dev_max_accuracy": 0.705078125, "eval_Qnli-dev_max_accuracy_threshold": 330.5736389160156, "eval_Qnli-dev_max_ap": 0.7543040216671292, "eval_Qnli-dev_max_f1": 0.7082568807339449, "eval_Qnli-dev_max_f1_threshold": 342.9434814453125, "eval_Qnli-dev_max_precision": 0.6372881355932203, "eval_Qnli-dev_max_recall": 0.8516949152542372, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8039998412132263, "eval_allNLI-dev_cosine_ap": 0.6220996013993169, "eval_allNLI-dev_cosine_f1": 0.6305882352941177, "eval_allNLI-dev_cosine_f1_threshold": 0.7354094982147217, "eval_allNLI-dev_cosine_precision": 0.5317460317460317, "eval_allNLI-dev_cosine_recall": 0.7745664739884393, "eval_allNLI-dev_dot_accuracy": 0.697265625, "eval_allNLI-dev_dot_accuracy_threshold": 346.41259765625, "eval_allNLI-dev_dot_ap": 0.5565229013900539, "eval_allNLI-dev_dot_f1": 0.5831702544031311, "eval_allNLI-dev_dot_f1_threshold": 274.1090087890625, "eval_allNLI-dev_dot_precision": 0.4408284023668639, "eval_allNLI-dev_dot_recall": 0.861271676300578, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.22194766998291, "eval_allNLI-dev_euclidean_ap": 0.6346136337315481, "eval_allNLI-dev_euclidean_f1": 0.6438356164383561, "eval_allNLI-dev_euclidean_f1_threshold": 15.188009262084961, "eval_allNLI-dev_euclidean_precision": 0.5320754716981132, "eval_allNLI-dev_euclidean_recall": 0.815028901734104, "eval_allNLI-dev_manhattan_accuracy": 0.740234375, "eval_allNLI-dev_manhattan_accuracy_threshold": 284.653564453125, "eval_allNLI-dev_manhattan_ap": 0.6298799583559361, "eval_allNLI-dev_manhattan_f1": 0.64, "eval_allNLI-dev_manhattan_f1_threshold": 323.64794921875, "eval_allNLI-dev_manhattan_precision": 0.51985559566787, "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, "eval_allNLI-dev_max_accuracy": 0.740234375, "eval_allNLI-dev_max_accuracy_threshold": 346.41259765625, "eval_allNLI-dev_max_ap": 0.6346136337315481, "eval_allNLI-dev_max_f1": 0.6438356164383561, "eval_allNLI-dev_max_f1_threshold": 323.64794921875, "eval_allNLI-dev_max_precision": 0.5320754716981132, "eval_allNLI-dev_max_recall": 0.861271676300578, "eval_sequential_score": 0.7543040216671292, "eval_sts-test_pearson_cosine": 0.8459506097334918, "eval_sts-test_pearson_dot": 0.8208837992692455, "eval_sts-test_pearson_euclidean": 0.8731667167526915, "eval_sts-test_pearson_manhattan": 0.8710894756324609, "eval_sts-test_pearson_max": 0.8731667167526915, "eval_sts-test_spearman_cosine": 0.874299978901144, "eval_sts-test_spearman_dot": 0.8121811109738333, "eval_sts-test_spearman_euclidean": 0.8705162149992397, "eval_sts-test_spearman_manhattan": 0.8681815864437118, "eval_sts-test_spearman_max": 0.874299978901144, "eval_vitaminc-pairs_loss": 3.281205415725708, "eval_vitaminc-pairs_runtime": 3.2246, "eval_vitaminc-pairs_samples_per_second": 39.695, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 2560 }, { "epoch": 2.633744855967078, "eval_negation-triplets_loss": 0.9458452463150024, "eval_negation-triplets_runtime": 0.7729, "eval_negation-triplets_samples_per_second": 165.612, "eval_negation-triplets_steps_per_second": 1.294, "step": 2560 }, { "epoch": 2.633744855967078, "eval_scitail-pairs-pos_loss": 0.13827168941497803, "eval_scitail-pairs-pos_runtime": 0.9569, "eval_scitail-pairs-pos_samples_per_second": 133.771, "eval_scitail-pairs-pos_steps_per_second": 1.045, "step": 2560 }, { "epoch": 2.633744855967078, "eval_scitail-pairs-qa_loss": 0.0007812771946191788, "eval_scitail-pairs-qa_runtime": 0.6123, "eval_scitail-pairs-qa_samples_per_second": 209.046, "eval_scitail-pairs-qa_steps_per_second": 1.633, "step": 2560 }, { "epoch": 2.633744855967078, "eval_xsum-pairs_loss": 0.3036934435367584, "eval_xsum-pairs_runtime": 3.038, "eval_xsum-pairs_samples_per_second": 42.133, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2560 }, { "epoch": 2.633744855967078, "eval_sciq_pairs_loss": 0.08964813500642776, "eval_sciq_pairs_runtime": 3.5442, "eval_sciq_pairs_samples_per_second": 36.115, "eval_sciq_pairs_steps_per_second": 0.282, "step": 2560 }, { "epoch": 2.633744855967078, "eval_qasc_pairs_loss": 0.15069030225276947, "eval_qasc_pairs_runtime": 0.6563, "eval_qasc_pairs_samples_per_second": 195.04, "eval_qasc_pairs_steps_per_second": 1.524, "step": 2560 }, { "epoch": 2.633744855967078, "eval_openbookqa_pairs_loss": 0.7651960849761963, "eval_openbookqa_pairs_runtime": 0.6179, "eval_openbookqa_pairs_samples_per_second": 207.165, "eval_openbookqa_pairs_steps_per_second": 1.618, "step": 2560 }, { "epoch": 2.633744855967078, "eval_msmarco_pairs_loss": 0.7059141993522644, "eval_msmarco_pairs_runtime": 1.5273, "eval_msmarco_pairs_samples_per_second": 83.81, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 2560 }, { "epoch": 2.633744855967078, "eval_nq_pairs_loss": 0.6533631086349487, "eval_nq_pairs_runtime": 2.9088, "eval_nq_pairs_samples_per_second": 44.004, "eval_nq_pairs_steps_per_second": 0.344, "step": 2560 }, { "epoch": 2.633744855967078, "eval_trivia_pairs_loss": 0.7508307695388794, "eval_trivia_pairs_runtime": 3.4492, "eval_trivia_pairs_samples_per_second": 37.11, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2560 }, { "epoch": 2.633744855967078, "eval_gooaq_pairs_loss": 0.3268025517463684, "eval_gooaq_pairs_runtime": 0.9595, "eval_gooaq_pairs_samples_per_second": 133.4, "eval_gooaq_pairs_steps_per_second": 1.042, "step": 2560 }, { "epoch": 2.633744855967078, "eval_paws-pos_loss": 0.021816374734044075, "eval_paws-pos_runtime": 0.7168, "eval_paws-pos_samples_per_second": 178.561, "eval_paws-pos_steps_per_second": 1.395, "step": 2560 }, { "epoch": 2.633744855967078, "eval_global_dataset_loss": 0.4567856788635254, "eval_global_dataset_runtime": 13.4388, "eval_global_dataset_samples_per_second": 30.955, "eval_global_dataset_steps_per_second": 0.298, "step": 2560 }, { "epoch": 2.6347736625514404, "grad_norm": 4.527674674987793, "learning_rate": 1.7927547680154473e-05, "loss": 0.1645, "step": 2561 }, { "epoch": 2.6358024691358026, "grad_norm": 4.207751750946045, "learning_rate": 1.7914247514825372e-05, "loss": 0.079, "step": 2562 }, { "epoch": 2.636831275720165, "grad_norm": 8.055123329162598, "learning_rate": 1.7900956323809044e-05, "loss": 0.3544, "step": 2563 }, { "epoch": 2.6378600823045266, "grad_norm": 1.727632761001587, "learning_rate": 1.7887674129116458e-05, "loss": 0.0274, "step": 2564 }, { "epoch": 2.638888888888889, "grad_norm": 6.485104560852051, "learning_rate": 1.787440095274367e-05, "loss": 0.2277, "step": 2565 }, { "epoch": 2.639917695473251, "grad_norm": 2.8004956245422363, "learning_rate": 1.7861136816671826e-05, "loss": 0.0422, "step": 2566 }, { "epoch": 2.6409465020576133, "grad_norm": 3.481257915496826, "learning_rate": 1.7847881742867072e-05, "loss": 0.0641, "step": 2567 }, { "epoch": 2.6419753086419755, "grad_norm": 0.19010227918624878, "learning_rate": 1.7834635753280572e-05, "loss": 0.0019, "step": 2568 }, { "epoch": 2.6430041152263373, "grad_norm": 4.518033027648926, "learning_rate": 1.7821398869848427e-05, "loss": 0.1717, "step": 2569 }, { "epoch": 2.6440329218106995, "grad_norm": 3.3492650985717773, "learning_rate": 1.7808171114491665e-05, "loss": 0.0414, "step": 2570 }, { "epoch": 2.6450617283950617, "grad_norm": 6.0391411781311035, "learning_rate": 1.7794952509116194e-05, "loss": 0.1508, "step": 2571 }, { "epoch": 2.646090534979424, "grad_norm": 6.24597692489624, "learning_rate": 1.7781743075612785e-05, "loss": 0.287, "step": 2572 }, { "epoch": 2.647119341563786, "grad_norm": 4.371918678283691, "learning_rate": 1.7768542835856997e-05, "loss": 0.1636, "step": 2573 }, { "epoch": 2.648148148148148, "grad_norm": 5.617849826812744, "learning_rate": 1.775535181170918e-05, "loss": 0.1417, "step": 2574 }, { "epoch": 2.64917695473251, "grad_norm": 0.7585577368736267, "learning_rate": 1.7742170025014406e-05, "loss": 0.0057, "step": 2575 }, { "epoch": 2.6502057613168724, "grad_norm": 5.705314636230469, "learning_rate": 1.7728997497602476e-05, "loss": 0.1221, "step": 2576 }, { "epoch": 2.6512345679012346, "grad_norm": 0.08371909707784653, "learning_rate": 1.771583425128782e-05, "loss": 0.0007, "step": 2577 }, { "epoch": 2.652263374485597, "grad_norm": 15.912981033325195, "learning_rate": 1.7702680307869542e-05, "loss": 2.0316, "step": 2578 }, { "epoch": 2.653292181069959, "grad_norm": 8.79644775390625, "learning_rate": 1.7689535689131294e-05, "loss": 0.3543, "step": 2579 }, { "epoch": 2.6543209876543212, "grad_norm": 3.05710506439209, "learning_rate": 1.767640041684133e-05, "loss": 0.0657, "step": 2580 }, { "epoch": 2.6543209876543212, "eval_Qnli-dev_cosine_accuracy": 0.69921875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7484172582626343, "eval_Qnli-dev_cosine_ap": 0.7444016607108496, "eval_Qnli-dev_cosine_f1": 0.7052631578947368, "eval_Qnli-dev_cosine_f1_threshold": 0.6621572375297546, "eval_Qnli-dev_cosine_precision": 0.6017964071856288, "eval_Qnli-dev_cosine_recall": 0.8516949152542372, "eval_Qnli-dev_dot_accuracy": 0.673828125, "eval_Qnli-dev_dot_accuracy_threshold": 324.5037841796875, "eval_Qnli-dev_dot_ap": 0.6846581759479151, "eval_Qnli-dev_dot_f1": 0.6753731343283581, "eval_Qnli-dev_dot_f1_threshold": 301.7471923828125, "eval_Qnli-dev_dot_precision": 0.6033333333333334, "eval_Qnli-dev_dot_recall": 0.7669491525423728, "eval_Qnli-dev_euclidean_accuracy": 0.701171875, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.733160018920898, "eval_Qnli-dev_euclidean_ap": 0.7541699764293507, "eval_Qnli-dev_euclidean_f1": 0.7097902097902098, "eval_Qnli-dev_euclidean_f1_threshold": 17.237119674682617, "eval_Qnli-dev_euclidean_precision": 0.6041666666666666, "eval_Qnli-dev_euclidean_recall": 0.8601694915254238, "eval_Qnli-dev_manhattan_accuracy": 0.701171875, "eval_Qnli-dev_manhattan_accuracy_threshold": 310.09771728515625, "eval_Qnli-dev_manhattan_ap": 0.757899803996903, "eval_Qnli-dev_manhattan_f1": 0.7077464788732394, "eval_Qnli-dev_manhattan_f1_threshold": 360.0821533203125, "eval_Qnli-dev_manhattan_precision": 0.6054216867469879, "eval_Qnli-dev_manhattan_recall": 0.8516949152542372, "eval_Qnli-dev_max_accuracy": 0.701171875, "eval_Qnli-dev_max_accuracy_threshold": 324.5037841796875, "eval_Qnli-dev_max_ap": 0.757899803996903, "eval_Qnli-dev_max_f1": 0.7097902097902098, "eval_Qnli-dev_max_f1_threshold": 360.0821533203125, "eval_Qnli-dev_max_precision": 0.6054216867469879, "eval_Qnli-dev_max_recall": 0.8601694915254238, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8518314361572266, "eval_allNLI-dev_cosine_ap": 0.6251283386043572, "eval_allNLI-dev_cosine_f1": 0.6275395033860045, "eval_allNLI-dev_cosine_f1_threshold": 0.700609028339386, "eval_allNLI-dev_cosine_precision": 0.5148148148148148, "eval_allNLI-dev_cosine_recall": 0.8034682080924855, "eval_allNLI-dev_dot_accuracy": 0.70703125, "eval_allNLI-dev_dot_accuracy_threshold": 335.95489501953125, "eval_allNLI-dev_dot_ap": 0.5695989977515594, "eval_allNLI-dev_dot_f1": 0.594704684317719, "eval_allNLI-dev_dot_f1_threshold": 267.34747314453125, "eval_allNLI-dev_dot_precision": 0.4591194968553459, "eval_allNLI-dev_dot_recall": 0.8439306358381503, "eval_allNLI-dev_euclidean_accuracy": 0.73828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.570003509521484, "eval_allNLI-dev_euclidean_ap": 0.63537324994232, "eval_allNLI-dev_euclidean_f1": 0.6434782608695653, "eval_allNLI-dev_euclidean_f1_threshold": 15.92835807800293, "eval_allNLI-dev_euclidean_precision": 0.5156794425087108, "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, "eval_allNLI-dev_manhattan_accuracy": 0.736328125, "eval_allNLI-dev_manhattan_accuracy_threshold": 286.9229736328125, "eval_allNLI-dev_manhattan_ap": 0.6304260331979171, "eval_allNLI-dev_manhattan_f1": 0.6433260393873084, "eval_allNLI-dev_manhattan_f1_threshold": 334.8063659667969, "eval_allNLI-dev_manhattan_precision": 0.5176056338028169, "eval_allNLI-dev_manhattan_recall": 0.8497109826589595, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 335.95489501953125, "eval_allNLI-dev_max_ap": 0.63537324994232, "eval_allNLI-dev_max_f1": 0.6434782608695653, "eval_allNLI-dev_max_f1_threshold": 334.8063659667969, "eval_allNLI-dev_max_precision": 0.5176056338028169, "eval_allNLI-dev_max_recall": 0.8554913294797688, "eval_sequential_score": 0.757899803996903, "eval_sts-test_pearson_cosine": 0.8515681442014782, "eval_sts-test_pearson_dot": 0.8421992816407995, "eval_sts-test_pearson_euclidean": 0.8739837908620688, "eval_sts-test_pearson_manhattan": 0.871596824982543, "eval_sts-test_pearson_max": 0.8739837908620688, "eval_sts-test_spearman_cosine": 0.8758156146219479, "eval_sts-test_spearman_dot": 0.8362005002517007, "eval_sts-test_spearman_euclidean": 0.8699935202661881, "eval_sts-test_spearman_manhattan": 0.867855304996907, "eval_sts-test_spearman_max": 0.8758156146219479, "eval_vitaminc-pairs_loss": 3.213340997695923, "eval_vitaminc-pairs_runtime": 3.2431, "eval_vitaminc-pairs_samples_per_second": 39.469, "eval_vitaminc-pairs_steps_per_second": 0.308, "step": 2580 }, { "epoch": 2.6543209876543212, "eval_negation-triplets_loss": 0.9520102143287659, "eval_negation-triplets_runtime": 0.7768, "eval_negation-triplets_samples_per_second": 164.78, "eval_negation-triplets_steps_per_second": 1.287, "step": 2580 }, { "epoch": 2.6543209876543212, "eval_scitail-pairs-pos_loss": 0.13387437164783478, "eval_scitail-pairs-pos_runtime": 0.967, "eval_scitail-pairs-pos_samples_per_second": 132.363, "eval_scitail-pairs-pos_steps_per_second": 1.034, "step": 2580 }, { "epoch": 2.6543209876543212, "eval_scitail-pairs-qa_loss": 0.0005432313773781061, "eval_scitail-pairs-qa_runtime": 0.6286, "eval_scitail-pairs-qa_samples_per_second": 203.62, "eval_scitail-pairs-qa_steps_per_second": 1.591, "step": 2580 }, { "epoch": 2.6543209876543212, "eval_xsum-pairs_loss": 0.300295889377594, "eval_xsum-pairs_runtime": 3.0364, "eval_xsum-pairs_samples_per_second": 42.155, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2580 }, { "epoch": 2.6543209876543212, "eval_sciq_pairs_loss": 0.0935114249587059, "eval_sciq_pairs_runtime": 3.5687, "eval_sciq_pairs_samples_per_second": 35.868, "eval_sciq_pairs_steps_per_second": 0.28, "step": 2580 }, { "epoch": 2.6543209876543212, "eval_qasc_pairs_loss": 0.148627370595932, "eval_qasc_pairs_runtime": 0.6366, "eval_qasc_pairs_samples_per_second": 201.054, "eval_qasc_pairs_steps_per_second": 1.571, "step": 2580 }, { "epoch": 2.6543209876543212, "eval_openbookqa_pairs_loss": 0.7378137707710266, "eval_openbookqa_pairs_runtime": 0.616, "eval_openbookqa_pairs_samples_per_second": 207.795, "eval_openbookqa_pairs_steps_per_second": 1.623, "step": 2580 }, { "epoch": 2.6543209876543212, "eval_msmarco_pairs_loss": 0.7179670929908752, "eval_msmarco_pairs_runtime": 1.5425, "eval_msmarco_pairs_samples_per_second": 82.983, "eval_msmarco_pairs_steps_per_second": 0.648, "step": 2580 }, { "epoch": 2.6543209876543212, "eval_nq_pairs_loss": 0.583605170249939, "eval_nq_pairs_runtime": 2.9252, "eval_nq_pairs_samples_per_second": 43.757, "eval_nq_pairs_steps_per_second": 0.342, "step": 2580 }, { "epoch": 2.6543209876543212, "eval_trivia_pairs_loss": 0.7238264679908752, "eval_trivia_pairs_runtime": 3.4828, "eval_trivia_pairs_samples_per_second": 36.752, "eval_trivia_pairs_steps_per_second": 0.287, "step": 2580 }, { "epoch": 2.6543209876543212, "eval_gooaq_pairs_loss": 0.3407573401927948, "eval_gooaq_pairs_runtime": 0.9661, "eval_gooaq_pairs_samples_per_second": 132.493, "eval_gooaq_pairs_steps_per_second": 1.035, "step": 2580 }, { "epoch": 2.6543209876543212, "eval_paws-pos_loss": 0.021885672584176064, "eval_paws-pos_runtime": 0.7226, "eval_paws-pos_samples_per_second": 177.133, "eval_paws-pos_steps_per_second": 1.384, "step": 2580 }, { "epoch": 2.6543209876543212, "eval_global_dataset_loss": 0.43512460589408875, "eval_global_dataset_runtime": 13.4367, "eval_global_dataset_samples_per_second": 30.96, "eval_global_dataset_steps_per_second": 0.298, "step": 2580 }, { "epoch": 2.655349794238683, "grad_norm": 9.852272987365723, "learning_rate": 1.7663274512752394e-05, "loss": 0.4626, "step": 2581 }, { "epoch": 2.656378600823045, "grad_norm": 6.879485130310059, "learning_rate": 1.7650157998601722e-05, "loss": 0.2376, "step": 2582 }, { "epoch": 2.6574074074074074, "grad_norm": 9.315069198608398, "learning_rate": 1.7637050896111006e-05, "loss": 0.3867, "step": 2583 }, { "epoch": 2.6584362139917697, "grad_norm": 1.0761109590530396, "learning_rate": 1.7623953226986355e-05, "loss": 0.0226, "step": 2584 }, { "epoch": 2.659465020576132, "grad_norm": 1.5962140560150146, "learning_rate": 1.7610865012918247e-05, "loss": 0.1158, "step": 2585 }, { "epoch": 2.6604938271604937, "grad_norm": 3.737316131591797, "learning_rate": 1.7597786275581496e-05, "loss": 0.1177, "step": 2586 }, { "epoch": 2.661522633744856, "grad_norm": 9.39920711517334, "learning_rate": 1.758471703663525e-05, "loss": 0.4314, "step": 2587 }, { "epoch": 2.662551440329218, "grad_norm": 3.40377140045166, "learning_rate": 1.757165731772289e-05, "loss": 0.041, "step": 2588 }, { "epoch": 2.6635802469135803, "grad_norm": 0.13125638663768768, "learning_rate": 1.755860714047206e-05, "loss": 0.0015, "step": 2589 }, { "epoch": 2.6646090534979425, "grad_norm": 3.656142473220825, "learning_rate": 1.7545566526494593e-05, "loss": 0.0685, "step": 2590 }, { "epoch": 2.6656378600823043, "grad_norm": 12.456727027893066, "learning_rate": 1.7532535497386475e-05, "loss": 1.3724, "step": 2591 }, { "epoch": 2.6666666666666665, "grad_norm": 2.0424954891204834, "learning_rate": 1.7519514074727837e-05, "loss": 0.0311, "step": 2592 }, { "epoch": 2.6676954732510287, "grad_norm": 2.686506748199463, "learning_rate": 1.7506502280082887e-05, "loss": 0.026, "step": 2593 }, { "epoch": 2.668724279835391, "grad_norm": 4.565975666046143, "learning_rate": 1.7493500134999892e-05, "loss": 0.0979, "step": 2594 }, { "epoch": 2.669753086419753, "grad_norm": 3.2041702270507812, "learning_rate": 1.7480507661011138e-05, "loss": 0.0593, "step": 2595 }, { "epoch": 2.6707818930041154, "grad_norm": 7.0108442306518555, "learning_rate": 1.7467524879632908e-05, "loss": 0.2319, "step": 2596 }, { "epoch": 2.6718106995884776, "grad_norm": 4.191745758056641, "learning_rate": 1.745455181236541e-05, "loss": 0.095, "step": 2597 }, { "epoch": 2.6728395061728394, "grad_norm": 3.530073881149292, "learning_rate": 1.7441588480692786e-05, "loss": 0.0763, "step": 2598 }, { "epoch": 2.6738683127572016, "grad_norm": 4.993064880371094, "learning_rate": 1.7428634906083047e-05, "loss": 0.1286, "step": 2599 }, { "epoch": 2.674897119341564, "grad_norm": 3.2204020023345947, "learning_rate": 1.7415691109988037e-05, "loss": 0.0851, "step": 2600 }, { "epoch": 2.674897119341564, "eval_Qnli-dev_cosine_accuracy": 0.6953125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7362703084945679, "eval_Qnli-dev_cosine_ap": 0.7478703569504768, "eval_Qnli-dev_cosine_f1": 0.7046632124352332, "eval_Qnli-dev_cosine_f1_threshold": 0.6684526205062866, "eval_Qnli-dev_cosine_precision": 0.5947521865889213, "eval_Qnli-dev_cosine_recall": 0.864406779661017, "eval_Qnli-dev_dot_accuracy": 0.669921875, "eval_Qnli-dev_dot_accuracy_threshold": 336.400634765625, "eval_Qnli-dev_dot_ap": 0.6824974445362095, "eval_Qnli-dev_dot_f1": 0.6837881219903691, "eval_Qnli-dev_dot_f1_threshold": 271.37933349609375, "eval_Qnli-dev_dot_precision": 0.5503875968992248, "eval_Qnli-dev_dot_recall": 0.902542372881356, "eval_Qnli-dev_euclidean_accuracy": 0.701171875, "eval_Qnli-dev_euclidean_accuracy_threshold": 16.319154739379883, "eval_Qnli-dev_euclidean_ap": 0.7570243909931207, "eval_Qnli-dev_euclidean_f1": 0.7207207207207207, "eval_Qnli-dev_euclidean_f1_threshold": 16.61956214904785, "eval_Qnli-dev_euclidean_precision": 0.6269592476489029, "eval_Qnli-dev_euclidean_recall": 0.847457627118644, "eval_Qnli-dev_manhattan_accuracy": 0.703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 325.358642578125, "eval_Qnli-dev_manhattan_ap": 0.7623669439249948, "eval_Qnli-dev_manhattan_f1": 0.7175843694493783, "eval_Qnli-dev_manhattan_f1_threshold": 353.44708251953125, "eval_Qnli-dev_manhattan_precision": 0.617737003058104, "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, "eval_Qnli-dev_max_accuracy": 0.703125, "eval_Qnli-dev_max_accuracy_threshold": 336.400634765625, "eval_Qnli-dev_max_ap": 0.7623669439249948, "eval_Qnli-dev_max_f1": 0.7207207207207207, "eval_Qnli-dev_max_f1_threshold": 353.44708251953125, "eval_Qnli-dev_max_precision": 0.6269592476489029, "eval_Qnli-dev_max_recall": 0.902542372881356, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8365390300750732, "eval_allNLI-dev_cosine_ap": 0.6243747006523906, "eval_allNLI-dev_cosine_f1": 0.6308068459657702, "eval_allNLI-dev_cosine_f1_threshold": 0.7326910495758057, "eval_allNLI-dev_cosine_precision": 0.5466101694915254, "eval_allNLI-dev_cosine_recall": 0.7456647398843931, "eval_allNLI-dev_dot_accuracy": 0.70703125, "eval_allNLI-dev_dot_accuracy_threshold": 349.9114990234375, "eval_allNLI-dev_dot_ap": 0.5636032934145848, "eval_allNLI-dev_dot_f1": 0.591715976331361, "eval_allNLI-dev_dot_f1_threshold": 268.2897033691406, "eval_allNLI-dev_dot_precision": 0.4491017964071856, "eval_allNLI-dev_dot_recall": 0.8670520231213873, "eval_allNLI-dev_euclidean_accuracy": 0.732421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.68728256225586, "eval_allNLI-dev_euclidean_ap": 0.6367778394387558, "eval_allNLI-dev_euclidean_f1": 0.6469248291571754, "eval_allNLI-dev_euclidean_f1_threshold": 15.481573104858398, "eval_allNLI-dev_euclidean_precision": 0.5338345864661654, "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, "eval_allNLI-dev_manhattan_accuracy": 0.736328125, "eval_allNLI-dev_manhattan_accuracy_threshold": 276.6816101074219, "eval_allNLI-dev_manhattan_ap": 0.633052882633361, "eval_allNLI-dev_manhattan_f1": 0.647450110864745, "eval_allNLI-dev_manhattan_f1_threshold": 330.48126220703125, "eval_allNLI-dev_manhattan_precision": 0.5251798561151079, "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 349.9114990234375, "eval_allNLI-dev_max_ap": 0.6367778394387558, "eval_allNLI-dev_max_f1": 0.647450110864745, "eval_allNLI-dev_max_f1_threshold": 330.48126220703125, "eval_allNLI-dev_max_precision": 0.5466101694915254, "eval_allNLI-dev_max_recall": 0.8670520231213873, "eval_sequential_score": 0.7623669439249948, "eval_sts-test_pearson_cosine": 0.852024318882, "eval_sts-test_pearson_dot": 0.8435240743260703, "eval_sts-test_pearson_euclidean": 0.8766620274603868, "eval_sts-test_pearson_manhattan": 0.8746535751378708, "eval_sts-test_pearson_max": 0.8766620274603868, "eval_sts-test_spearman_cosine": 0.8783670630389171, "eval_sts-test_spearman_dot": 0.8399603222287605, "eval_sts-test_spearman_euclidean": 0.8732234873083737, "eval_sts-test_spearman_manhattan": 0.871147386789904, "eval_sts-test_spearman_max": 0.8783670630389171, "eval_vitaminc-pairs_loss": 3.1473426818847656, "eval_vitaminc-pairs_runtime": 3.2208, "eval_vitaminc-pairs_samples_per_second": 39.741, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 2600 }, { "epoch": 2.674897119341564, "eval_negation-triplets_loss": 0.901240885257721, "eval_negation-triplets_runtime": 0.7721, "eval_negation-triplets_samples_per_second": 165.781, "eval_negation-triplets_steps_per_second": 1.295, "step": 2600 }, { "epoch": 2.674897119341564, "eval_scitail-pairs-pos_loss": 0.12968897819519043, "eval_scitail-pairs-pos_runtime": 0.9408, "eval_scitail-pairs-pos_samples_per_second": 136.053, "eval_scitail-pairs-pos_steps_per_second": 1.063, "step": 2600 }, { "epoch": 2.674897119341564, "eval_scitail-pairs-qa_loss": 0.0007262742146849632, "eval_scitail-pairs-qa_runtime": 0.6206, "eval_scitail-pairs-qa_samples_per_second": 206.251, "eval_scitail-pairs-qa_steps_per_second": 1.611, "step": 2600 }, { "epoch": 2.674897119341564, "eval_xsum-pairs_loss": 0.31425875425338745, "eval_xsum-pairs_runtime": 3.0277, "eval_xsum-pairs_samples_per_second": 42.276, "eval_xsum-pairs_steps_per_second": 0.33, "step": 2600 }, { "epoch": 2.674897119341564, "eval_sciq_pairs_loss": 0.09135416150093079, "eval_sciq_pairs_runtime": 3.5898, "eval_sciq_pairs_samples_per_second": 35.657, "eval_sciq_pairs_steps_per_second": 0.279, "step": 2600 }, { "epoch": 2.674897119341564, "eval_qasc_pairs_loss": 0.12798336148262024, "eval_qasc_pairs_runtime": 0.6364, "eval_qasc_pairs_samples_per_second": 201.12, "eval_qasc_pairs_steps_per_second": 1.571, "step": 2600 }, { "epoch": 2.674897119341564, "eval_openbookqa_pairs_loss": 0.7262670397758484, "eval_openbookqa_pairs_runtime": 0.6041, "eval_openbookqa_pairs_samples_per_second": 211.877, "eval_openbookqa_pairs_steps_per_second": 1.655, "step": 2600 }, { "epoch": 2.674897119341564, "eval_msmarco_pairs_loss": 0.6486848592758179, "eval_msmarco_pairs_runtime": 1.5339, "eval_msmarco_pairs_samples_per_second": 83.448, "eval_msmarco_pairs_steps_per_second": 0.652, "step": 2600 }, { "epoch": 2.674897119341564, "eval_nq_pairs_loss": 0.559202253818512, "eval_nq_pairs_runtime": 2.9068, "eval_nq_pairs_samples_per_second": 44.034, "eval_nq_pairs_steps_per_second": 0.344, "step": 2600 }, { "epoch": 2.674897119341564, "eval_trivia_pairs_loss": 0.710675835609436, "eval_trivia_pairs_runtime": 3.4625, "eval_trivia_pairs_samples_per_second": 36.968, "eval_trivia_pairs_steps_per_second": 0.289, "step": 2600 }, { "epoch": 2.674897119341564, "eval_gooaq_pairs_loss": 0.33728501200675964, "eval_gooaq_pairs_runtime": 0.9584, "eval_gooaq_pairs_samples_per_second": 133.561, "eval_gooaq_pairs_steps_per_second": 1.043, "step": 2600 }, { "epoch": 2.674897119341564, "eval_paws-pos_loss": 0.021707231178879738, "eval_paws-pos_runtime": 0.7152, "eval_paws-pos_samples_per_second": 178.962, "eval_paws-pos_steps_per_second": 1.398, "step": 2600 }, { "epoch": 2.674897119341564, "eval_global_dataset_loss": 0.4227386713027954, "eval_global_dataset_runtime": 13.427, "eval_global_dataset_samples_per_second": 30.982, "eval_global_dataset_steps_per_second": 0.298, "step": 2600 }, { "epoch": 2.675925925925926, "grad_norm": 8.135689735412598, "learning_rate": 1.740275711384342e-05, "loss": 0.3919, "step": 2601 }, { "epoch": 2.6769547325102883, "grad_norm": 3.8438615798950195, "learning_rate": 1.7389832939068632e-05, "loss": 0.1138, "step": 2602 }, { "epoch": 2.67798353909465, "grad_norm": 3.447300910949707, "learning_rate": 1.7376918607066828e-05, "loss": 0.091, "step": 2603 }, { "epoch": 2.6790123456790123, "grad_norm": 0.17035308480262756, "learning_rate": 1.7364014139224874e-05, "loss": 0.0028, "step": 2604 }, { "epoch": 2.6800411522633745, "grad_norm": 9.796527862548828, "learning_rate": 1.7351119556913306e-05, "loss": 0.3596, "step": 2605 }, { "epoch": 2.6810699588477367, "grad_norm": 4.091612339019775, "learning_rate": 1.7338234881486276e-05, "loss": 0.156, "step": 2606 }, { "epoch": 2.682098765432099, "grad_norm": 8.733772277832031, "learning_rate": 1.732536013428153e-05, "loss": 0.3413, "step": 2607 }, { "epoch": 2.6831275720164607, "grad_norm": 7.2233099937438965, "learning_rate": 1.7312495336620394e-05, "loss": 0.2312, "step": 2608 }, { "epoch": 2.684156378600823, "grad_norm": 6.816574573516846, "learning_rate": 1.7299640509807683e-05, "loss": 0.213, "step": 2609 }, { "epoch": 2.685185185185185, "grad_norm": 5.4541144371032715, "learning_rate": 1.7286795675131732e-05, "loss": 0.2369, "step": 2610 }, { "epoch": 2.6862139917695473, "grad_norm": 4.839052677154541, "learning_rate": 1.727396085386431e-05, "loss": 0.1865, "step": 2611 }, { "epoch": 2.6872427983539096, "grad_norm": 8.429651260375977, "learning_rate": 1.72611360672606e-05, "loss": 0.3433, "step": 2612 }, { "epoch": 2.6882716049382713, "grad_norm": 4.5881218910217285, "learning_rate": 1.7248321336559187e-05, "loss": 0.1045, "step": 2613 }, { "epoch": 2.689300411522634, "grad_norm": 4.84063720703125, "learning_rate": 1.7235516682981983e-05, "loss": 0.1291, "step": 2614 }, { "epoch": 2.6903292181069958, "grad_norm": 2.6606414318084717, "learning_rate": 1.7222722127734216e-05, "loss": 0.0334, "step": 2615 }, { "epoch": 2.691358024691358, "grad_norm": 5.476185321807861, "learning_rate": 1.7209937692004394e-05, "loss": 0.1277, "step": 2616 }, { "epoch": 2.69238683127572, "grad_norm": 5.779826641082764, "learning_rate": 1.7197163396964275e-05, "loss": 0.2863, "step": 2617 }, { "epoch": 2.6934156378600824, "grad_norm": 0.04902912676334381, "learning_rate": 1.7184399263768802e-05, "loss": 0.0004, "step": 2618 }, { "epoch": 2.6944444444444446, "grad_norm": 4.969849586486816, "learning_rate": 1.717164531355611e-05, "loss": 0.1686, "step": 2619 }, { "epoch": 2.6954732510288064, "grad_norm": 5.742938995361328, "learning_rate": 1.715890156744746e-05, "loss": 0.1179, "step": 2620 }, { "epoch": 2.6954732510288064, "eval_Qnli-dev_cosine_accuracy": 0.6953125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7264567613601685, "eval_Qnli-dev_cosine_ap": 0.7465763928871585, "eval_Qnli-dev_cosine_f1": 0.7020109689213895, "eval_Qnli-dev_cosine_f1_threshold": 0.6638573408126831, "eval_Qnli-dev_cosine_precision": 0.617363344051447, "eval_Qnli-dev_cosine_recall": 0.8135593220338984, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 312.00946044921875, "eval_Qnli-dev_dot_ap": 0.6902863737048937, "eval_Qnli-dev_dot_f1": 0.6763754045307444, "eval_Qnli-dev_dot_f1_threshold": 252.58135986328125, "eval_Qnli-dev_dot_precision": 0.5471204188481675, "eval_Qnli-dev_dot_recall": 0.885593220338983, "eval_Qnli-dev_euclidean_accuracy": 0.701171875, "eval_Qnli-dev_euclidean_accuracy_threshold": 16.147762298583984, "eval_Qnli-dev_euclidean_ap": 0.7578725873698218, "eval_Qnli-dev_euclidean_f1": 0.7084870848708488, "eval_Qnli-dev_euclidean_f1_threshold": 16.909732818603516, "eval_Qnli-dev_euclidean_precision": 0.6274509803921569, "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, "eval_Qnli-dev_manhattan_accuracy": 0.705078125, "eval_Qnli-dev_manhattan_accuracy_threshold": 325.0968017578125, "eval_Qnli-dev_manhattan_ap": 0.7598659302283028, "eval_Qnli-dev_manhattan_f1": 0.7084078711985689, "eval_Qnli-dev_manhattan_f1_threshold": 360.3451843261719, "eval_Qnli-dev_manhattan_precision": 0.6130030959752322, "eval_Qnli-dev_manhattan_recall": 0.8389830508474576, "eval_Qnli-dev_max_accuracy": 0.705078125, "eval_Qnli-dev_max_accuracy_threshold": 325.0968017578125, "eval_Qnli-dev_max_ap": 0.7598659302283028, "eval_Qnli-dev_max_f1": 0.7084870848708488, "eval_Qnli-dev_max_f1_threshold": 360.3451843261719, "eval_Qnli-dev_max_precision": 0.6274509803921569, "eval_Qnli-dev_max_recall": 0.885593220338983, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8582905530929565, "eval_allNLI-dev_cosine_ap": 0.6247010454525644, "eval_allNLI-dev_cosine_f1": 0.6297229219143576, "eval_allNLI-dev_cosine_f1_threshold": 0.7234146595001221, "eval_allNLI-dev_cosine_precision": 0.5580357142857143, "eval_allNLI-dev_cosine_recall": 0.7225433526011561, "eval_allNLI-dev_dot_accuracy": 0.6953125, "eval_allNLI-dev_dot_accuracy_threshold": 354.59814453125, "eval_allNLI-dev_dot_ap": 0.56744815106673, "eval_allNLI-dev_dot_f1": 0.5860113421550095, "eval_allNLI-dev_dot_f1_threshold": 243.18655395507812, "eval_allNLI-dev_dot_precision": 0.4353932584269663, "eval_allNLI-dev_dot_recall": 0.8959537572254336, "eval_allNLI-dev_euclidean_accuracy": 0.7421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.53614616394043, "eval_allNLI-dev_euclidean_ap": 0.6376923089630202, "eval_allNLI-dev_euclidean_f1": 0.6381156316916489, "eval_allNLI-dev_euclidean_f1_threshold": 16.240928649902344, "eval_allNLI-dev_euclidean_precision": 0.5068027210884354, "eval_allNLI-dev_euclidean_recall": 0.861271676300578, "eval_allNLI-dev_manhattan_accuracy": 0.740234375, "eval_allNLI-dev_manhattan_accuracy_threshold": 278.1414489746094, "eval_allNLI-dev_manhattan_ap": 0.6341257200825056, "eval_allNLI-dev_manhattan_f1": 0.6445916114790288, "eval_allNLI-dev_manhattan_f1_threshold": 338.42498779296875, "eval_allNLI-dev_manhattan_precision": 0.5214285714285715, "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, "eval_allNLI-dev_max_accuracy": 0.7421875, "eval_allNLI-dev_max_accuracy_threshold": 354.59814453125, "eval_allNLI-dev_max_ap": 0.6376923089630202, "eval_allNLI-dev_max_f1": 0.6445916114790288, "eval_allNLI-dev_max_f1_threshold": 338.42498779296875, "eval_allNLI-dev_max_precision": 0.5580357142857143, "eval_allNLI-dev_max_recall": 0.8959537572254336, "eval_sequential_score": 0.7598659302283028, "eval_sts-test_pearson_cosine": 0.8542773846250993, "eval_sts-test_pearson_dot": 0.8440504553035415, "eval_sts-test_pearson_euclidean": 0.8794116876646535, "eval_sts-test_pearson_manhattan": 0.87794635022087, "eval_sts-test_pearson_max": 0.8794116876646535, "eval_sts-test_spearman_cosine": 0.8821762009060301, "eval_sts-test_spearman_dot": 0.8404598805596726, "eval_sts-test_spearman_euclidean": 0.8769202995093944, "eval_sts-test_spearman_manhattan": 0.8752871469761767, "eval_sts-test_spearman_max": 0.8821762009060301, "eval_vitaminc-pairs_loss": 3.269437551498413, "eval_vitaminc-pairs_runtime": 3.2254, "eval_vitaminc-pairs_samples_per_second": 39.685, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 2620 }, { "epoch": 2.6954732510288064, "eval_negation-triplets_loss": 0.9486592411994934, "eval_negation-triplets_runtime": 0.7633, "eval_negation-triplets_samples_per_second": 167.686, "eval_negation-triplets_steps_per_second": 1.31, "step": 2620 }, { "epoch": 2.6954732510288064, "eval_scitail-pairs-pos_loss": 0.14502786099910736, "eval_scitail-pairs-pos_runtime": 0.9436, "eval_scitail-pairs-pos_samples_per_second": 135.656, "eval_scitail-pairs-pos_steps_per_second": 1.06, "step": 2620 }, { "epoch": 2.6954732510288064, "eval_scitail-pairs-qa_loss": 0.000535947794560343, "eval_scitail-pairs-qa_runtime": 0.6202, "eval_scitail-pairs-qa_samples_per_second": 206.389, "eval_scitail-pairs-qa_steps_per_second": 1.612, "step": 2620 }, { "epoch": 2.6954732510288064, "eval_xsum-pairs_loss": 0.3298634886741638, "eval_xsum-pairs_runtime": 3.0314, "eval_xsum-pairs_samples_per_second": 42.225, "eval_xsum-pairs_steps_per_second": 0.33, "step": 2620 }, { "epoch": 2.6954732510288064, "eval_sciq_pairs_loss": 0.09292829036712646, "eval_sciq_pairs_runtime": 3.5539, "eval_sciq_pairs_samples_per_second": 36.017, "eval_sciq_pairs_steps_per_second": 0.281, "step": 2620 }, { "epoch": 2.6954732510288064, "eval_qasc_pairs_loss": 0.1371685266494751, "eval_qasc_pairs_runtime": 0.6331, "eval_qasc_pairs_samples_per_second": 202.18, "eval_qasc_pairs_steps_per_second": 1.58, "step": 2620 }, { "epoch": 2.6954732510288064, "eval_openbookqa_pairs_loss": 0.744242787361145, "eval_openbookqa_pairs_runtime": 0.6086, "eval_openbookqa_pairs_samples_per_second": 210.325, "eval_openbookqa_pairs_steps_per_second": 1.643, "step": 2620 }, { "epoch": 2.6954732510288064, "eval_msmarco_pairs_loss": 0.7878044843673706, "eval_msmarco_pairs_runtime": 1.5273, "eval_msmarco_pairs_samples_per_second": 83.811, "eval_msmarco_pairs_steps_per_second": 0.655, "step": 2620 }, { "epoch": 2.6954732510288064, "eval_nq_pairs_loss": 0.5722874999046326, "eval_nq_pairs_runtime": 2.9128, "eval_nq_pairs_samples_per_second": 43.944, "eval_nq_pairs_steps_per_second": 0.343, "step": 2620 }, { "epoch": 2.6954732510288064, "eval_trivia_pairs_loss": 0.7739209532737732, "eval_trivia_pairs_runtime": 3.4641, "eval_trivia_pairs_samples_per_second": 36.951, "eval_trivia_pairs_steps_per_second": 0.289, "step": 2620 }, { "epoch": 2.6954732510288064, "eval_gooaq_pairs_loss": 0.34145432710647583, "eval_gooaq_pairs_runtime": 0.9903, "eval_gooaq_pairs_samples_per_second": 129.248, "eval_gooaq_pairs_steps_per_second": 1.01, "step": 2620 }, { "epoch": 2.6954732510288064, "eval_paws-pos_loss": 0.02111244387924671, "eval_paws-pos_runtime": 0.7177, "eval_paws-pos_samples_per_second": 178.338, "eval_paws-pos_steps_per_second": 1.393, "step": 2620 }, { "epoch": 2.6954732510288064, "eval_global_dataset_loss": 0.44551166892051697, "eval_global_dataset_runtime": 13.4327, "eval_global_dataset_samples_per_second": 30.969, "eval_global_dataset_steps_per_second": 0.298, "step": 2620 }, { "epoch": 2.6965020576131686, "grad_norm": 5.093355178833008, "learning_rate": 1.714616804654722e-05, "loss": 0.215, "step": 2621 }, { "epoch": 2.697530864197531, "grad_norm": 9.597935676574707, "learning_rate": 1.7133444771942817e-05, "loss": 0.5451, "step": 2622 }, { "epoch": 2.698559670781893, "grad_norm": 5.977519989013672, "learning_rate": 1.7120731764704718e-05, "loss": 0.1519, "step": 2623 }, { "epoch": 2.6995884773662553, "grad_norm": 4.844002723693848, "learning_rate": 1.7108029045886376e-05, "loss": 0.1012, "step": 2624 }, { "epoch": 2.700617283950617, "grad_norm": 1.202161431312561, "learning_rate": 1.7095336636524217e-05, "loss": 0.0676, "step": 2625 }, { "epoch": 2.7016460905349793, "grad_norm": 10.282391548156738, "learning_rate": 1.7082654557637587e-05, "loss": 0.3419, "step": 2626 }, { "epoch": 2.7026748971193415, "grad_norm": 14.298833847045898, "learning_rate": 1.706998283022873e-05, "loss": 1.3958, "step": 2627 }, { "epoch": 2.7037037037037037, "grad_norm": 6.158807754516602, "learning_rate": 1.7057321475282737e-05, "loss": 0.1862, "step": 2628 }, { "epoch": 2.704732510288066, "grad_norm": 3.5302419662475586, "learning_rate": 1.7044670513767534e-05, "loss": 0.0612, "step": 2629 }, { "epoch": 2.7057613168724277, "grad_norm": 14.628544807434082, "learning_rate": 1.703202996663382e-05, "loss": 1.6331, "step": 2630 }, { "epoch": 2.7067901234567904, "grad_norm": 10.040212631225586, "learning_rate": 1.7019399854815075e-05, "loss": 0.448, "step": 2631 }, { "epoch": 2.707818930041152, "grad_norm": 2.6035237312316895, "learning_rate": 1.700678019922746e-05, "loss": 0.0283, "step": 2632 }, { "epoch": 2.7088477366255144, "grad_norm": 3.1965601444244385, "learning_rate": 1.699417102076985e-05, "loss": 0.0444, "step": 2633 }, { "epoch": 2.7098765432098766, "grad_norm": 3.0109784603118896, "learning_rate": 1.6981572340323754e-05, "loss": 0.065, "step": 2634 }, { "epoch": 2.710905349794239, "grad_norm": 4.103814601898193, "learning_rate": 1.69689841787533e-05, "loss": 0.1107, "step": 2635 }, { "epoch": 2.711934156378601, "grad_norm": 15.227310180664062, "learning_rate": 1.6956406556905195e-05, "loss": 1.9622, "step": 2636 }, { "epoch": 2.712962962962963, "grad_norm": 2.7210817337036133, "learning_rate": 1.6943839495608693e-05, "loss": 0.0402, "step": 2637 }, { "epoch": 2.713991769547325, "grad_norm": 10.53106689453125, "learning_rate": 1.693128301567556e-05, "loss": 0.461, "step": 2638 }, { "epoch": 2.7150205761316872, "grad_norm": 2.2448031902313232, "learning_rate": 1.6918737137900032e-05, "loss": 0.0318, "step": 2639 }, { "epoch": 2.7160493827160495, "grad_norm": 7.479017734527588, "learning_rate": 1.6906201883058804e-05, "loss": 0.2836, "step": 2640 }, { "epoch": 2.7160493827160495, "eval_Qnli-dev_cosine_accuracy": 0.69921875, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7608738541603088, "eval_Qnli-dev_cosine_ap": 0.7504648480135697, "eval_Qnli-dev_cosine_f1": 0.6958041958041957, "eval_Qnli-dev_cosine_f1_threshold": 0.6541914939880371, "eval_Qnli-dev_cosine_precision": 0.5922619047619048, "eval_Qnli-dev_cosine_recall": 0.8432203389830508, "eval_Qnli-dev_dot_accuracy": 0.671875, "eval_Qnli-dev_dot_accuracy_threshold": 320.27880859375, "eval_Qnli-dev_dot_ap": 0.6977582700768947, "eval_Qnli-dev_dot_f1": 0.6773618538324421, "eval_Qnli-dev_dot_f1_threshold": 278.5557861328125, "eval_Qnli-dev_dot_precision": 0.5846153846153846, "eval_Qnli-dev_dot_recall": 0.8050847457627118, "eval_Qnli-dev_euclidean_accuracy": 0.7109375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.542463302612305, "eval_Qnli-dev_euclidean_ap": 0.7593045753840203, "eval_Qnli-dev_euclidean_f1": 0.7058823529411765, "eval_Qnli-dev_euclidean_f1_threshold": 16.979202270507812, "eval_Qnli-dev_euclidean_precision": 0.6092307692307692, "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, "eval_Qnli-dev_manhattan_accuracy": 0.712890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 309.9955749511719, "eval_Qnli-dev_manhattan_ap": 0.7642166286754024, "eval_Qnli-dev_manhattan_f1": 0.7145421903052065, "eval_Qnli-dev_manhattan_f1_threshold": 355.369140625, "eval_Qnli-dev_manhattan_precision": 0.6199376947040498, "eval_Qnli-dev_manhattan_recall": 0.8432203389830508, "eval_Qnli-dev_max_accuracy": 0.712890625, "eval_Qnli-dev_max_accuracy_threshold": 320.27880859375, "eval_Qnli-dev_max_ap": 0.7642166286754024, "eval_Qnli-dev_max_f1": 0.7145421903052065, "eval_Qnli-dev_max_f1_threshold": 355.369140625, "eval_Qnli-dev_max_precision": 0.6199376947040498, "eval_Qnli-dev_max_recall": 0.8432203389830508, "eval_allNLI-dev_cosine_accuracy": 0.724609375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8462996482849121, "eval_allNLI-dev_cosine_ap": 0.6217246413185425, "eval_allNLI-dev_cosine_f1": 0.6253229974160207, "eval_allNLI-dev_cosine_f1_threshold": 0.7429014444351196, "eval_allNLI-dev_cosine_precision": 0.5654205607476636, "eval_allNLI-dev_cosine_recall": 0.6994219653179191, "eval_allNLI-dev_dot_accuracy": 0.6953125, "eval_allNLI-dev_dot_accuracy_threshold": 327.000244140625, "eval_allNLI-dev_dot_ap": 0.5672222769156174, "eval_allNLI-dev_dot_f1": 0.5930735930735931, "eval_allNLI-dev_dot_f1_threshold": 274.185546875, "eval_allNLI-dev_dot_precision": 0.4740484429065744, "eval_allNLI-dev_dot_recall": 0.791907514450867, "eval_allNLI-dev_euclidean_accuracy": 0.732421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.945903778076172, "eval_allNLI-dev_euclidean_ap": 0.6344760902348712, "eval_allNLI-dev_euclidean_f1": 0.6385809312638581, "eval_allNLI-dev_euclidean_f1_threshold": 15.824264526367188, "eval_allNLI-dev_euclidean_precision": 0.5179856115107914, "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, "eval_allNLI-dev_manhattan_accuracy": 0.740234375, "eval_allNLI-dev_manhattan_accuracy_threshold": 280.1436462402344, "eval_allNLI-dev_manhattan_ap": 0.6317219848787154, "eval_allNLI-dev_manhattan_f1": 0.6387665198237885, "eval_allNLI-dev_manhattan_f1_threshold": 332.62432861328125, "eval_allNLI-dev_manhattan_precision": 0.5160142348754448, "eval_allNLI-dev_manhattan_recall": 0.838150289017341, "eval_allNLI-dev_max_accuracy": 0.740234375, "eval_allNLI-dev_max_accuracy_threshold": 327.000244140625, "eval_allNLI-dev_max_ap": 0.6344760902348712, "eval_allNLI-dev_max_f1": 0.6387665198237885, "eval_allNLI-dev_max_f1_threshold": 332.62432861328125, "eval_allNLI-dev_max_precision": 0.5654205607476636, "eval_allNLI-dev_max_recall": 0.838150289017341, "eval_sequential_score": 0.7642166286754024, "eval_sts-test_pearson_cosine": 0.8531249568563335, "eval_sts-test_pearson_dot": 0.8410599401603429, "eval_sts-test_pearson_euclidean": 0.8792229546512194, "eval_sts-test_pearson_manhattan": 0.8772237708895925, "eval_sts-test_pearson_max": 0.8792229546512194, "eval_sts-test_spearman_cosine": 0.8805264009035749, "eval_sts-test_spearman_dot": 0.8390134750402413, "eval_sts-test_spearman_euclidean": 0.876211126270125, "eval_sts-test_spearman_manhattan": 0.8747582318177919, "eval_sts-test_spearman_max": 0.8805264009035749, "eval_vitaminc-pairs_loss": 3.1040732860565186, "eval_vitaminc-pairs_runtime": 3.2282, "eval_vitaminc-pairs_samples_per_second": 39.65, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 2640 }, { "epoch": 2.7160493827160495, "eval_negation-triplets_loss": 0.9482282400131226, "eval_negation-triplets_runtime": 0.7828, "eval_negation-triplets_samples_per_second": 163.505, "eval_negation-triplets_steps_per_second": 1.277, "step": 2640 }, { "epoch": 2.7160493827160495, "eval_scitail-pairs-pos_loss": 0.15201443433761597, "eval_scitail-pairs-pos_runtime": 0.9809, "eval_scitail-pairs-pos_samples_per_second": 130.493, "eval_scitail-pairs-pos_steps_per_second": 1.019, "step": 2640 }, { "epoch": 2.7160493827160495, "eval_scitail-pairs-qa_loss": 0.0005068474565632641, "eval_scitail-pairs-qa_runtime": 0.6194, "eval_scitail-pairs-qa_samples_per_second": 206.649, "eval_scitail-pairs-qa_steps_per_second": 1.614, "step": 2640 }, { "epoch": 2.7160493827160495, "eval_xsum-pairs_loss": 0.33086055517196655, "eval_xsum-pairs_runtime": 3.0354, "eval_xsum-pairs_samples_per_second": 42.169, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2640 }, { "epoch": 2.7160493827160495, "eval_sciq_pairs_loss": 0.09085152298212051, "eval_sciq_pairs_runtime": 3.5531, "eval_sciq_pairs_samples_per_second": 36.025, "eval_sciq_pairs_steps_per_second": 0.281, "step": 2640 }, { "epoch": 2.7160493827160495, "eval_qasc_pairs_loss": 0.13306795060634613, "eval_qasc_pairs_runtime": 0.6301, "eval_qasc_pairs_samples_per_second": 203.143, "eval_qasc_pairs_steps_per_second": 1.587, "step": 2640 }, { "epoch": 2.7160493827160495, "eval_openbookqa_pairs_loss": 0.7930919528007507, "eval_openbookqa_pairs_runtime": 0.6168, "eval_openbookqa_pairs_samples_per_second": 207.526, "eval_openbookqa_pairs_steps_per_second": 1.621, "step": 2640 }, { "epoch": 2.7160493827160495, "eval_msmarco_pairs_loss": 0.7768175601959229, "eval_msmarco_pairs_runtime": 1.5311, "eval_msmarco_pairs_samples_per_second": 83.599, "eval_msmarco_pairs_steps_per_second": 0.653, "step": 2640 }, { "epoch": 2.7160493827160495, "eval_nq_pairs_loss": 0.5487918257713318, "eval_nq_pairs_runtime": 2.9193, "eval_nq_pairs_samples_per_second": 43.847, "eval_nq_pairs_steps_per_second": 0.343, "step": 2640 }, { "epoch": 2.7160493827160495, "eval_trivia_pairs_loss": 0.720573365688324, "eval_trivia_pairs_runtime": 3.4466, "eval_trivia_pairs_samples_per_second": 37.138, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2640 }, { "epoch": 2.7160493827160495, "eval_gooaq_pairs_loss": 0.3493151366710663, "eval_gooaq_pairs_runtime": 0.9651, "eval_gooaq_pairs_samples_per_second": 132.625, "eval_gooaq_pairs_steps_per_second": 1.036, "step": 2640 }, { "epoch": 2.7160493827160495, "eval_paws-pos_loss": 0.021203767508268356, "eval_paws-pos_runtime": 0.7138, "eval_paws-pos_samples_per_second": 179.31, "eval_paws-pos_steps_per_second": 1.401, "step": 2640 }, { "epoch": 2.7160493827160495, "eval_global_dataset_loss": 0.434063583612442, "eval_global_dataset_runtime": 13.4165, "eval_global_dataset_samples_per_second": 31.007, "eval_global_dataset_steps_per_second": 0.298, "step": 2640 }, { "epoch": 2.7170781893004117, "grad_norm": 4.200658321380615, "learning_rate": 1.6893677271910957e-05, "loss": 0.0786, "step": 2641 }, { "epoch": 2.7181069958847734, "grad_norm": 0.03509858250617981, "learning_rate": 1.688116332519796e-05, "loss": 0.0004, "step": 2642 }, { "epoch": 2.7191358024691357, "grad_norm": 0.29538682103157043, "learning_rate": 1.6868660063643617e-05, "loss": 0.0033, "step": 2643 }, { "epoch": 2.720164609053498, "grad_norm": 3.6793017387390137, "learning_rate": 1.6856167507954037e-05, "loss": 0.1469, "step": 2644 }, { "epoch": 2.72119341563786, "grad_norm": 1.8203033208847046, "learning_rate": 1.6843685678817593e-05, "loss": 0.0252, "step": 2645 }, { "epoch": 2.7222222222222223, "grad_norm": 7.600049018859863, "learning_rate": 1.6831214596904908e-05, "loss": 0.3796, "step": 2646 }, { "epoch": 2.723251028806584, "grad_norm": 6.077390193939209, "learning_rate": 1.6818754282868797e-05, "loss": 0.2078, "step": 2647 }, { "epoch": 2.7242798353909468, "grad_norm": 2.5528903007507324, "learning_rate": 1.680630475734425e-05, "loss": 0.0381, "step": 2648 }, { "epoch": 2.7253086419753085, "grad_norm": 5.169680595397949, "learning_rate": 1.679386604094837e-05, "loss": 0.1156, "step": 2649 }, { "epoch": 2.7263374485596708, "grad_norm": 3.173422336578369, "learning_rate": 1.6781438154280396e-05, "loss": 0.0556, "step": 2650 }, { "epoch": 2.727366255144033, "grad_norm": 2.6182334423065186, "learning_rate": 1.676902111792159e-05, "loss": 0.035, "step": 2651 }, { "epoch": 2.728395061728395, "grad_norm": 0.15302547812461853, "learning_rate": 1.6756614952435288e-05, "loss": 0.0018, "step": 2652 }, { "epoch": 2.7294238683127574, "grad_norm": 8.696645736694336, "learning_rate": 1.6744219678366787e-05, "loss": 0.5049, "step": 2653 }, { "epoch": 2.730452674897119, "grad_norm": 0.4114903211593628, "learning_rate": 1.6731835316243373e-05, "loss": 0.0029, "step": 2654 }, { "epoch": 2.7314814814814814, "grad_norm": 0.19095362722873688, "learning_rate": 1.6719461886574242e-05, "loss": 0.0017, "step": 2655 }, { "epoch": 2.7325102880658436, "grad_norm": 3.784839630126953, "learning_rate": 1.67070994098505e-05, "loss": 0.0654, "step": 2656 }, { "epoch": 2.733539094650206, "grad_norm": 4.237402439117432, "learning_rate": 1.66947479065451e-05, "loss": 0.088, "step": 2657 }, { "epoch": 2.734567901234568, "grad_norm": 1.9196566343307495, "learning_rate": 1.6682407397112838e-05, "loss": 0.0496, "step": 2658 }, { "epoch": 2.73559670781893, "grad_norm": 13.894206047058105, "learning_rate": 1.6670077901990297e-05, "loss": 1.6792, "step": 2659 }, { "epoch": 2.736625514403292, "grad_norm": 0.026885371655225754, "learning_rate": 1.665775944159581e-05, "loss": 0.0003, "step": 2660 }, { "epoch": 2.736625514403292, "eval_Qnli-dev_cosine_accuracy": 0.71484375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7080373764038086, "eval_Qnli-dev_cosine_ap": 0.7579441540222451, "eval_Qnli-dev_cosine_f1": 0.7058823529411764, "eval_Qnli-dev_cosine_f1_threshold": 0.6827559471130371, "eval_Qnli-dev_cosine_precision": 0.6391752577319587, "eval_Qnli-dev_cosine_recall": 0.788135593220339, "eval_Qnli-dev_dot_accuracy": 0.6875, "eval_Qnli-dev_dot_accuracy_threshold": 304.76373291015625, "eval_Qnli-dev_dot_ap": 0.7141193663938065, "eval_Qnli-dev_dot_f1": 0.6869712351945855, "eval_Qnli-dev_dot_f1_threshold": 262.9793395996094, "eval_Qnli-dev_dot_precision": 0.571830985915493, "eval_Qnli-dev_dot_recall": 0.8601694915254238, "eval_Qnli-dev_euclidean_accuracy": 0.7109375, "eval_Qnli-dev_euclidean_accuracy_threshold": 16.5640869140625, "eval_Qnli-dev_euclidean_ap": 0.7651917406401838, "eval_Qnli-dev_euclidean_f1": 0.7238805970149254, "eval_Qnli-dev_euclidean_f1_threshold": 16.59751319885254, "eval_Qnli-dev_euclidean_precision": 0.6466666666666666, "eval_Qnli-dev_euclidean_recall": 0.8220338983050848, "eval_Qnli-dev_manhattan_accuracy": 0.712890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 307.7052001953125, "eval_Qnli-dev_manhattan_ap": 0.7683952998129552, "eval_Qnli-dev_manhattan_f1": 0.7223230490018148, "eval_Qnli-dev_manhattan_f1_threshold": 352.3563232421875, "eval_Qnli-dev_manhattan_precision": 0.6317460317460317, "eval_Qnli-dev_manhattan_recall": 0.8432203389830508, "eval_Qnli-dev_max_accuracy": 0.71484375, "eval_Qnli-dev_max_accuracy_threshold": 307.7052001953125, "eval_Qnli-dev_max_ap": 0.7683952998129552, "eval_Qnli-dev_max_f1": 0.7238805970149254, "eval_Qnli-dev_max_f1_threshold": 352.3563232421875, "eval_Qnli-dev_max_precision": 0.6466666666666666, "eval_Qnli-dev_max_recall": 0.8601694915254238, "eval_allNLI-dev_cosine_accuracy": 0.7265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.828792154788971, "eval_allNLI-dev_cosine_ap": 0.619929643793518, "eval_allNLI-dev_cosine_f1": 0.6217391304347827, "eval_allNLI-dev_cosine_f1_threshold": 0.6918565630912781, "eval_allNLI-dev_cosine_precision": 0.49825783972125437, "eval_allNLI-dev_cosine_recall": 0.8265895953757225, "eval_allNLI-dev_dot_accuracy": 0.697265625, "eval_allNLI-dev_dot_accuracy_threshold": 324.3567810058594, "eval_allNLI-dev_dot_ap": 0.5635273837147275, "eval_allNLI-dev_dot_f1": 0.5959367945823928, "eval_allNLI-dev_dot_f1_threshold": 279.8157958984375, "eval_allNLI-dev_dot_precision": 0.4888888888888889, "eval_allNLI-dev_dot_recall": 0.7630057803468208, "eval_allNLI-dev_euclidean_accuracy": 0.732421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 11.771275520324707, "eval_allNLI-dev_euclidean_ap": 0.6333535014147081, "eval_allNLI-dev_euclidean_f1": 0.6369710467706012, "eval_allNLI-dev_euclidean_f1_threshold": 15.625327110290527, "eval_allNLI-dev_euclidean_precision": 0.5181159420289855, "eval_allNLI-dev_euclidean_recall": 0.8265895953757225, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 253.17062377929688, "eval_allNLI-dev_manhattan_ap": 0.6287360415195495, "eval_allNLI-dev_manhattan_f1": 0.6387665198237885, "eval_allNLI-dev_manhattan_f1_threshold": 329.28631591796875, "eval_allNLI-dev_manhattan_precision": 0.5160142348754448, "eval_allNLI-dev_manhattan_recall": 0.838150289017341, "eval_allNLI-dev_max_accuracy": 0.732421875, "eval_allNLI-dev_max_accuracy_threshold": 324.3567810058594, "eval_allNLI-dev_max_ap": 0.6333535014147081, "eval_allNLI-dev_max_f1": 0.6387665198237885, "eval_allNLI-dev_max_f1_threshold": 329.28631591796875, "eval_allNLI-dev_max_precision": 0.5181159420289855, "eval_allNLI-dev_max_recall": 0.838150289017341, "eval_sequential_score": 0.7683952998129552, "eval_sts-test_pearson_cosine": 0.8494285211763248, "eval_sts-test_pearson_dot": 0.8340288033061978, "eval_sts-test_pearson_euclidean": 0.8772214900096512, "eval_sts-test_pearson_manhattan": 0.8750134150403021, "eval_sts-test_pearson_max": 0.8772214900096512, "eval_sts-test_spearman_cosine": 0.8775768005140064, "eval_sts-test_spearman_dot": 0.8309296071824989, "eval_sts-test_spearman_euclidean": 0.8736704181558897, "eval_sts-test_spearman_manhattan": 0.871827561211792, "eval_sts-test_spearman_max": 0.8775768005140064, "eval_vitaminc-pairs_loss": 3.114762544631958, "eval_vitaminc-pairs_runtime": 3.2281, "eval_vitaminc-pairs_samples_per_second": 39.651, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 2660 }, { "epoch": 2.736625514403292, "eval_negation-triplets_loss": 0.954352855682373, "eval_negation-triplets_runtime": 0.7738, "eval_negation-triplets_samples_per_second": 165.408, "eval_negation-triplets_steps_per_second": 1.292, "step": 2660 }, { "epoch": 2.736625514403292, "eval_scitail-pairs-pos_loss": 0.14429861307144165, "eval_scitail-pairs-pos_runtime": 0.9537, "eval_scitail-pairs-pos_samples_per_second": 134.219, "eval_scitail-pairs-pos_steps_per_second": 1.049, "step": 2660 }, { "epoch": 2.736625514403292, "eval_scitail-pairs-qa_loss": 0.0005340906209312379, "eval_scitail-pairs-qa_runtime": 0.6212, "eval_scitail-pairs-qa_samples_per_second": 206.059, "eval_scitail-pairs-qa_steps_per_second": 1.61, "step": 2660 }, { "epoch": 2.736625514403292, "eval_xsum-pairs_loss": 0.276018887758255, "eval_xsum-pairs_runtime": 3.0349, "eval_xsum-pairs_samples_per_second": 42.176, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2660 }, { "epoch": 2.736625514403292, "eval_sciq_pairs_loss": 0.09169301390647888, "eval_sciq_pairs_runtime": 3.5621, "eval_sciq_pairs_samples_per_second": 35.934, "eval_sciq_pairs_steps_per_second": 0.281, "step": 2660 }, { "epoch": 2.736625514403292, "eval_qasc_pairs_loss": 0.13745173811912537, "eval_qasc_pairs_runtime": 0.6311, "eval_qasc_pairs_samples_per_second": 202.817, "eval_qasc_pairs_steps_per_second": 1.585, "step": 2660 }, { "epoch": 2.736625514403292, "eval_openbookqa_pairs_loss": 0.7925641536712646, "eval_openbookqa_pairs_runtime": 0.6056, "eval_openbookqa_pairs_samples_per_second": 211.359, "eval_openbookqa_pairs_steps_per_second": 1.651, "step": 2660 }, { "epoch": 2.736625514403292, "eval_msmarco_pairs_loss": 0.7752300500869751, "eval_msmarco_pairs_runtime": 1.5354, "eval_msmarco_pairs_samples_per_second": 83.366, "eval_msmarco_pairs_steps_per_second": 0.651, "step": 2660 }, { "epoch": 2.736625514403292, "eval_nq_pairs_loss": 0.5743083357810974, "eval_nq_pairs_runtime": 2.9253, "eval_nq_pairs_samples_per_second": 43.757, "eval_nq_pairs_steps_per_second": 0.342, "step": 2660 }, { "epoch": 2.736625514403292, "eval_trivia_pairs_loss": 0.766108512878418, "eval_trivia_pairs_runtime": 3.4915, "eval_trivia_pairs_samples_per_second": 36.661, "eval_trivia_pairs_steps_per_second": 0.286, "step": 2660 }, { "epoch": 2.736625514403292, "eval_gooaq_pairs_loss": 0.3886713981628418, "eval_gooaq_pairs_runtime": 0.9614, "eval_gooaq_pairs_samples_per_second": 133.143, "eval_gooaq_pairs_steps_per_second": 1.04, "step": 2660 }, { "epoch": 2.736625514403292, "eval_paws-pos_loss": 0.020917167887091637, "eval_paws-pos_runtime": 0.7162, "eval_paws-pos_samples_per_second": 178.711, "eval_paws-pos_steps_per_second": 1.396, "step": 2660 }, { "epoch": 2.736625514403292, "eval_global_dataset_loss": 0.43585139513015747, "eval_global_dataset_runtime": 13.4401, "eval_global_dataset_samples_per_second": 30.952, "eval_global_dataset_steps_per_second": 0.298, "step": 2660 }, { "epoch": 2.7376543209876543, "grad_norm": 3.678781747817993, "learning_rate": 1.664545203632945e-05, "loss": 0.0415, "step": 2661 }, { "epoch": 2.7386831275720165, "grad_norm": 7.833728313446045, "learning_rate": 1.6633155706572976e-05, "loss": 0.2631, "step": 2662 }, { "epoch": 2.7397119341563787, "grad_norm": 3.6902894973754883, "learning_rate": 1.662087047268981e-05, "loss": 0.1259, "step": 2663 }, { "epoch": 2.7407407407407405, "grad_norm": 9.759456634521484, "learning_rate": 1.6608596355024988e-05, "loss": 0.3776, "step": 2664 }, { "epoch": 2.741769547325103, "grad_norm": 3.246983051300049, "learning_rate": 1.6596333373905147e-05, "loss": 0.1, "step": 2665 }, { "epoch": 2.742798353909465, "grad_norm": 0.031980000436306, "learning_rate": 1.658408154963847e-05, "loss": 0.0003, "step": 2666 }, { "epoch": 2.743827160493827, "grad_norm": 8.92718505859375, "learning_rate": 1.6571840902514685e-05, "loss": 0.2497, "step": 2667 }, { "epoch": 2.7448559670781894, "grad_norm": 3.099560499191284, "learning_rate": 1.655961145280499e-05, "loss": 0.1167, "step": 2668 }, { "epoch": 2.7458847736625516, "grad_norm": 1.9758539199829102, "learning_rate": 1.6547393220762042e-05, "loss": 0.0215, "step": 2669 }, { "epoch": 2.746913580246914, "grad_norm": 6.694230556488037, "learning_rate": 1.6535186226619927e-05, "loss": 0.2423, "step": 2670 }, { "epoch": 2.7479423868312756, "grad_norm": 5.057843208312988, "learning_rate": 1.652299049059412e-05, "loss": 0.093, "step": 2671 }, { "epoch": 2.748971193415638, "grad_norm": 8.066476821899414, "learning_rate": 1.6510806032881444e-05, "loss": 0.3505, "step": 2672 }, { "epoch": 2.75, "grad_norm": 5.387277603149414, "learning_rate": 1.6498632873660064e-05, "loss": 0.1175, "step": 2673 }, { "epoch": 2.751028806584362, "grad_norm": 0.711836040019989, "learning_rate": 1.6486471033089403e-05, "loss": 0.0073, "step": 2674 }, { "epoch": 2.7520576131687244, "grad_norm": 2.7773194313049316, "learning_rate": 1.647432053131017e-05, "loss": 0.1032, "step": 2675 }, { "epoch": 2.753086419753086, "grad_norm": 6.849653244018555, "learning_rate": 1.6462181388444283e-05, "loss": 0.251, "step": 2676 }, { "epoch": 2.7541152263374484, "grad_norm": 0.7139111757278442, "learning_rate": 1.6450053624594846e-05, "loss": 0.0269, "step": 2677 }, { "epoch": 2.7551440329218106, "grad_norm": 4.462314605712891, "learning_rate": 1.6437937259846115e-05, "loss": 0.0832, "step": 2678 }, { "epoch": 2.756172839506173, "grad_norm": 4.129017353057861, "learning_rate": 1.642583231426349e-05, "loss": 0.1454, "step": 2679 }, { "epoch": 2.757201646090535, "grad_norm": 7.224521160125732, "learning_rate": 1.6413738807893438e-05, "loss": 0.2274, "step": 2680 }, { "epoch": 2.757201646090535, "eval_Qnli-dev_cosine_accuracy": 0.697265625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7046054601669312, "eval_Qnli-dev_cosine_ap": 0.7537362088662101, "eval_Qnli-dev_cosine_f1": 0.7037037037037036, "eval_Qnli-dev_cosine_f1_threshold": 0.6772632598876953, "eval_Qnli-dev_cosine_precision": 0.625, "eval_Qnli-dev_cosine_recall": 0.8050847457627118, "eval_Qnli-dev_dot_accuracy": 0.677734375, "eval_Qnli-dev_dot_accuracy_threshold": 320.1844482421875, "eval_Qnli-dev_dot_ap": 0.7035198813997825, "eval_Qnli-dev_dot_f1": 0.6823529411764706, "eval_Qnli-dev_dot_f1_threshold": 263.5550842285156, "eval_Qnli-dev_dot_precision": 0.5654596100278552, "eval_Qnli-dev_dot_recall": 0.8601694915254238, "eval_Qnli-dev_euclidean_accuracy": 0.705078125, "eval_Qnli-dev_euclidean_accuracy_threshold": 16.209491729736328, "eval_Qnli-dev_euclidean_ap": 0.7622031724115181, "eval_Qnli-dev_euclidean_f1": 0.7126865671641792, "eval_Qnli-dev_euclidean_f1_threshold": 16.525169372558594, "eval_Qnli-dev_euclidean_precision": 0.6366666666666667, "eval_Qnli-dev_euclidean_recall": 0.809322033898305, "eval_Qnli-dev_manhattan_accuracy": 0.703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 300.4136657714844, "eval_Qnli-dev_manhattan_ap": 0.7645123847173098, "eval_Qnli-dev_manhattan_f1": 0.7090909090909091, "eval_Qnli-dev_manhattan_f1_threshold": 352.8323059082031, "eval_Qnli-dev_manhattan_precision": 0.6210191082802548, "eval_Qnli-dev_manhattan_recall": 0.826271186440678, "eval_Qnli-dev_max_accuracy": 0.705078125, "eval_Qnli-dev_max_accuracy_threshold": 320.1844482421875, "eval_Qnli-dev_max_ap": 0.7645123847173098, "eval_Qnli-dev_max_f1": 0.7126865671641792, "eval_Qnli-dev_max_f1_threshold": 352.8323059082031, "eval_Qnli-dev_max_precision": 0.6366666666666667, "eval_Qnli-dev_max_recall": 0.8601694915254238, "eval_allNLI-dev_cosine_accuracy": 0.71875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8609461188316345, "eval_allNLI-dev_cosine_ap": 0.6170580449180622, "eval_allNLI-dev_cosine_f1": 0.631578947368421, "eval_allNLI-dev_cosine_f1_threshold": 0.7266778945922852, "eval_allNLI-dev_cosine_precision": 0.5575221238938053, "eval_allNLI-dev_cosine_recall": 0.7283236994219653, "eval_allNLI-dev_dot_accuracy": 0.6953125, "eval_allNLI-dev_dot_accuracy_threshold": 324.1557312011719, "eval_allNLI-dev_dot_ap": 0.565273836406907, "eval_allNLI-dev_dot_f1": 0.6004319654427647, "eval_allNLI-dev_dot_f1_threshold": 266.6668701171875, "eval_allNLI-dev_dot_precision": 0.4793103448275862, "eval_allNLI-dev_dot_recall": 0.8034682080924855, "eval_allNLI-dev_euclidean_accuracy": 0.728515625, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.061691284179688, "eval_allNLI-dev_euclidean_ap": 0.6274731706435148, "eval_allNLI-dev_euclidean_f1": 0.6310904872389791, "eval_allNLI-dev_euclidean_f1_threshold": 15.339818954467773, "eval_allNLI-dev_euclidean_precision": 0.5271317829457365, "eval_allNLI-dev_euclidean_recall": 0.7861271676300579, "eval_allNLI-dev_manhattan_accuracy": 0.728515625, "eval_allNLI-dev_manhattan_accuracy_threshold": 286.12884521484375, "eval_allNLI-dev_manhattan_ap": 0.623966417497906, "eval_allNLI-dev_manhattan_f1": 0.6301969365426696, "eval_allNLI-dev_manhattan_f1_threshold": 335.5853271484375, "eval_allNLI-dev_manhattan_precision": 0.5070422535211268, "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, "eval_allNLI-dev_max_accuracy": 0.728515625, "eval_allNLI-dev_max_accuracy_threshold": 324.1557312011719, "eval_allNLI-dev_max_ap": 0.6274731706435148, "eval_allNLI-dev_max_f1": 0.631578947368421, "eval_allNLI-dev_max_f1_threshold": 335.5853271484375, "eval_allNLI-dev_max_precision": 0.5575221238938053, "eval_allNLI-dev_max_recall": 0.8323699421965318, "eval_sequential_score": 0.7645123847173098, "eval_sts-test_pearson_cosine": 0.8530015875024386, "eval_sts-test_pearson_dot": 0.8419479833262773, "eval_sts-test_pearson_euclidean": 0.8777796073110009, "eval_sts-test_pearson_manhattan": 0.8753230909839307, "eval_sts-test_pearson_max": 0.8777796073110009, "eval_sts-test_spearman_cosine": 0.8790284419310207, "eval_sts-test_spearman_dot": 0.8402478043319388, "eval_sts-test_spearman_euclidean": 0.8736524729000716, "eval_sts-test_spearman_manhattan": 0.8721407752902702, "eval_sts-test_spearman_max": 0.8790284419310207, "eval_vitaminc-pairs_loss": 3.1029434204101562, "eval_vitaminc-pairs_runtime": 3.3077, "eval_vitaminc-pairs_samples_per_second": 38.698, "eval_vitaminc-pairs_steps_per_second": 0.302, "step": 2680 }, { "epoch": 2.757201646090535, "eval_negation-triplets_loss": 0.9448590278625488, "eval_negation-triplets_runtime": 0.7908, "eval_negation-triplets_samples_per_second": 161.864, "eval_negation-triplets_steps_per_second": 1.265, "step": 2680 }, { "epoch": 2.757201646090535, "eval_scitail-pairs-pos_loss": 0.14555397629737854, "eval_scitail-pairs-pos_runtime": 0.9768, "eval_scitail-pairs-pos_samples_per_second": 131.034, "eval_scitail-pairs-pos_steps_per_second": 1.024, "step": 2680 }, { "epoch": 2.757201646090535, "eval_scitail-pairs-qa_loss": 0.0005990744684822857, "eval_scitail-pairs-qa_runtime": 0.6319, "eval_scitail-pairs-qa_samples_per_second": 202.571, "eval_scitail-pairs-qa_steps_per_second": 1.583, "step": 2680 }, { "epoch": 2.757201646090535, "eval_xsum-pairs_loss": 0.30878543853759766, "eval_xsum-pairs_runtime": 3.0435, "eval_xsum-pairs_samples_per_second": 42.057, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2680 }, { "epoch": 2.757201646090535, "eval_sciq_pairs_loss": 0.09530141204595566, "eval_sciq_pairs_runtime": 3.6034, "eval_sciq_pairs_samples_per_second": 35.522, "eval_sciq_pairs_steps_per_second": 0.278, "step": 2680 }, { "epoch": 2.757201646090535, "eval_qasc_pairs_loss": 0.14009356498718262, "eval_qasc_pairs_runtime": 0.6374, "eval_qasc_pairs_samples_per_second": 200.82, "eval_qasc_pairs_steps_per_second": 1.569, "step": 2680 }, { "epoch": 2.757201646090535, "eval_openbookqa_pairs_loss": 0.7762793898582458, "eval_openbookqa_pairs_runtime": 0.6159, "eval_openbookqa_pairs_samples_per_second": 207.839, "eval_openbookqa_pairs_steps_per_second": 1.624, "step": 2680 }, { "epoch": 2.757201646090535, "eval_msmarco_pairs_loss": 0.7799912095069885, "eval_msmarco_pairs_runtime": 1.5349, "eval_msmarco_pairs_samples_per_second": 83.393, "eval_msmarco_pairs_steps_per_second": 0.652, "step": 2680 }, { "epoch": 2.757201646090535, "eval_nq_pairs_loss": 0.5424676537513733, "eval_nq_pairs_runtime": 2.9244, "eval_nq_pairs_samples_per_second": 43.769, "eval_nq_pairs_steps_per_second": 0.342, "step": 2680 }, { "epoch": 2.757201646090535, "eval_trivia_pairs_loss": 0.7372913956642151, "eval_trivia_pairs_runtime": 3.4683, "eval_trivia_pairs_samples_per_second": 36.905, "eval_trivia_pairs_steps_per_second": 0.288, "step": 2680 }, { "epoch": 2.757201646090535, "eval_gooaq_pairs_loss": 0.3634032607078552, "eval_gooaq_pairs_runtime": 0.9653, "eval_gooaq_pairs_samples_per_second": 132.599, "eval_gooaq_pairs_steps_per_second": 1.036, "step": 2680 }, { "epoch": 2.757201646090535, "eval_paws-pos_loss": 0.021029140800237656, "eval_paws-pos_runtime": 0.7165, "eval_paws-pos_samples_per_second": 178.635, "eval_paws-pos_steps_per_second": 1.396, "step": 2680 }, { "epoch": 2.757201646090535, "eval_global_dataset_loss": 0.440264493227005, "eval_global_dataset_runtime": 13.4557, "eval_global_dataset_samples_per_second": 30.916, "eval_global_dataset_steps_per_second": 0.297, "step": 2680 }, { "epoch": 2.758230452674897, "grad_norm": 8.498957633972168, "learning_rate": 1.6401656760763484e-05, "loss": 0.2619, "step": 2681 }, { "epoch": 2.7592592592592595, "grad_norm": 6.170947074890137, "learning_rate": 1.6389586192882197e-05, "loss": 0.3302, "step": 2682 }, { "epoch": 2.7602880658436213, "grad_norm": 10.186309814453125, "learning_rate": 1.6377527124239104e-05, "loss": 0.435, "step": 2683 }, { "epoch": 2.7613168724279835, "grad_norm": 5.844569683074951, "learning_rate": 1.636547957480472e-05, "loss": 0.1459, "step": 2684 }, { "epoch": 2.7623456790123457, "grad_norm": 3.951009750366211, "learning_rate": 1.635344356453046e-05, "loss": 0.0832, "step": 2685 }, { "epoch": 2.763374485596708, "grad_norm": 0.7449941635131836, "learning_rate": 1.6341419113348636e-05, "loss": 0.0123, "step": 2686 }, { "epoch": 2.76440329218107, "grad_norm": 9.954071998596191, "learning_rate": 1.6329406241172428e-05, "loss": 0.5258, "step": 2687 }, { "epoch": 2.765432098765432, "grad_norm": 4.382876873016357, "learning_rate": 1.6317404967895826e-05, "loss": 0.1112, "step": 2688 }, { "epoch": 2.766460905349794, "grad_norm": 8.176844596862793, "learning_rate": 1.630541531339362e-05, "loss": 0.3373, "step": 2689 }, { "epoch": 2.7674897119341564, "grad_norm": 14.57414722442627, "learning_rate": 1.6293437297521353e-05, "loss": 1.669, "step": 2690 }, { "epoch": 2.7685185185185186, "grad_norm": 10.49014663696289, "learning_rate": 1.6281470940115303e-05, "loss": 0.4753, "step": 2691 }, { "epoch": 2.769547325102881, "grad_norm": 3.6068954467773438, "learning_rate": 1.6269516260992424e-05, "loss": 0.0656, "step": 2692 }, { "epoch": 2.7705761316872426, "grad_norm": 7.1533684730529785, "learning_rate": 1.6257573279950353e-05, "loss": 0.255, "step": 2693 }, { "epoch": 2.771604938271605, "grad_norm": 5.761252403259277, "learning_rate": 1.624564201676734e-05, "loss": 0.2164, "step": 2694 }, { "epoch": 2.772633744855967, "grad_norm": 3.0845518112182617, "learning_rate": 1.623372249120223e-05, "loss": 0.0969, "step": 2695 }, { "epoch": 2.7736625514403292, "grad_norm": 0.36267200112342834, "learning_rate": 1.6221814722994435e-05, "loss": 0.0076, "step": 2696 }, { "epoch": 2.7746913580246915, "grad_norm": 2.4578866958618164, "learning_rate": 1.6209918731863888e-05, "loss": 0.0247, "step": 2697 }, { "epoch": 2.7757201646090532, "grad_norm": 0.8951087594032288, "learning_rate": 1.6198034537511024e-05, "loss": 0.0092, "step": 2698 }, { "epoch": 2.776748971193416, "grad_norm": 1.2517441511154175, "learning_rate": 1.6186162159616742e-05, "loss": 0.0135, "step": 2699 }, { "epoch": 2.7777777777777777, "grad_norm": 2.9929397106170654, "learning_rate": 1.6174301617842374e-05, "loss": 0.0417, "step": 2700 }, { "epoch": 2.7777777777777777, "eval_Qnli-dev_cosine_accuracy": 0.697265625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7066538333892822, "eval_Qnli-dev_cosine_ap": 0.7474109560058457, "eval_Qnli-dev_cosine_f1": 0.7026022304832713, "eval_Qnli-dev_cosine_f1_threshold": 0.6845518946647644, "eval_Qnli-dev_cosine_precision": 0.6258278145695364, "eval_Qnli-dev_cosine_recall": 0.8008474576271186, "eval_Qnli-dev_dot_accuracy": 0.6640625, "eval_Qnli-dev_dot_accuracy_threshold": 331.36737060546875, "eval_Qnli-dev_dot_ap": 0.6951621490967169, "eval_Qnli-dev_dot_f1": 0.6794425087108014, "eval_Qnli-dev_dot_f1_threshold": 273.2421875, "eval_Qnli-dev_dot_precision": 0.5769230769230769, "eval_Qnli-dev_dot_recall": 0.826271186440678, "eval_Qnli-dev_euclidean_accuracy": 0.70703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.760058403015137, "eval_Qnli-dev_euclidean_ap": 0.7560836241081209, "eval_Qnli-dev_euclidean_f1": 0.7163120567375886, "eval_Qnli-dev_euclidean_f1_threshold": 16.950374603271484, "eval_Qnli-dev_euclidean_precision": 0.6158536585365854, "eval_Qnli-dev_euclidean_recall": 0.8559322033898306, "eval_Qnli-dev_manhattan_accuracy": 0.705078125, "eval_Qnli-dev_manhattan_accuracy_threshold": 333.1535949707031, "eval_Qnli-dev_manhattan_ap": 0.757741537774554, "eval_Qnli-dev_manhattan_f1": 0.7065420560747663, "eval_Qnli-dev_manhattan_f1_threshold": 346.87506103515625, "eval_Qnli-dev_manhattan_precision": 0.6321070234113713, "eval_Qnli-dev_manhattan_recall": 0.8008474576271186, "eval_Qnli-dev_max_accuracy": 0.70703125, "eval_Qnli-dev_max_accuracy_threshold": 333.1535949707031, "eval_Qnli-dev_max_ap": 0.757741537774554, "eval_Qnli-dev_max_f1": 0.7163120567375886, "eval_Qnli-dev_max_f1_threshold": 346.87506103515625, "eval_Qnli-dev_max_precision": 0.6321070234113713, "eval_Qnli-dev_max_recall": 0.8559322033898306, "eval_allNLI-dev_cosine_accuracy": 0.716796875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8345000147819519, "eval_allNLI-dev_cosine_ap": 0.6113199764503221, "eval_allNLI-dev_cosine_f1": 0.6167023554603853, "eval_allNLI-dev_cosine_f1_threshold": 0.6633158326148987, "eval_allNLI-dev_cosine_precision": 0.4897959183673469, "eval_allNLI-dev_cosine_recall": 0.8323699421965318, "eval_allNLI-dev_dot_accuracy": 0.6953125, "eval_allNLI-dev_dot_accuracy_threshold": 360.4277648925781, "eval_allNLI-dev_dot_ap": 0.5582963711025538, "eval_allNLI-dev_dot_f1": 0.5882352941176471, "eval_allNLI-dev_dot_f1_threshold": 239.11224365234375, "eval_allNLI-dev_dot_precision": 0.44510385756676557, "eval_allNLI-dev_dot_recall": 0.8670520231213873, "eval_allNLI-dev_euclidean_accuracy": 0.720703125, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.189748764038086, "eval_allNLI-dev_euclidean_ap": 0.62282092072183, "eval_allNLI-dev_euclidean_f1": 0.638477801268499, "eval_allNLI-dev_euclidean_f1_threshold": 16.350440979003906, "eval_allNLI-dev_euclidean_precision": 0.5033333333333333, "eval_allNLI-dev_euclidean_recall": 0.8728323699421965, "eval_allNLI-dev_manhattan_accuracy": 0.7265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 274.83148193359375, "eval_allNLI-dev_manhattan_ap": 0.6195434179324866, "eval_allNLI-dev_manhattan_f1": 0.6324786324786326, "eval_allNLI-dev_manhattan_f1_threshold": 342.87457275390625, "eval_allNLI-dev_manhattan_precision": 0.5016949152542373, "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, "eval_allNLI-dev_max_accuracy": 0.7265625, "eval_allNLI-dev_max_accuracy_threshold": 360.4277648925781, "eval_allNLI-dev_max_ap": 0.62282092072183, "eval_allNLI-dev_max_f1": 0.638477801268499, "eval_allNLI-dev_max_f1_threshold": 342.87457275390625, "eval_allNLI-dev_max_precision": 0.5033333333333333, "eval_allNLI-dev_max_recall": 0.8728323699421965, "eval_sequential_score": 0.757741537774554, "eval_sts-test_pearson_cosine": 0.8570775024704771, "eval_sts-test_pearson_dot": 0.8471548700075804, "eval_sts-test_pearson_euclidean": 0.879640942151199, "eval_sts-test_pearson_manhattan": 0.8765446720048727, "eval_sts-test_pearson_max": 0.879640942151199, "eval_sts-test_spearman_cosine": 0.8821037933650988, "eval_sts-test_spearman_dot": 0.8457290730809416, "eval_sts-test_spearman_euclidean": 0.8765245193536491, "eval_sts-test_spearman_manhattan": 0.8735012136365687, "eval_sts-test_spearman_max": 0.8821037933650988, "eval_vitaminc-pairs_loss": 3.143115997314453, "eval_vitaminc-pairs_runtime": 3.2395, "eval_vitaminc-pairs_samples_per_second": 39.512, "eval_vitaminc-pairs_steps_per_second": 0.309, "step": 2700 }, { "epoch": 2.7777777777777777, "eval_negation-triplets_loss": 0.9546037316322327, "eval_negation-triplets_runtime": 0.7759, "eval_negation-triplets_samples_per_second": 164.962, "eval_negation-triplets_steps_per_second": 1.289, "step": 2700 }, { "epoch": 2.7777777777777777, "eval_scitail-pairs-pos_loss": 0.15170079469680786, "eval_scitail-pairs-pos_runtime": 0.9601, "eval_scitail-pairs-pos_samples_per_second": 133.325, "eval_scitail-pairs-pos_steps_per_second": 1.042, "step": 2700 }, { "epoch": 2.7777777777777777, "eval_scitail-pairs-qa_loss": 0.0005720890476368368, "eval_scitail-pairs-qa_runtime": 0.6368, "eval_scitail-pairs-qa_samples_per_second": 201.011, "eval_scitail-pairs-qa_steps_per_second": 1.57, "step": 2700 }, { "epoch": 2.7777777777777777, "eval_xsum-pairs_loss": 0.2862165868282318, "eval_xsum-pairs_runtime": 3.048, "eval_xsum-pairs_samples_per_second": 41.994, "eval_xsum-pairs_steps_per_second": 0.328, "step": 2700 }, { "epoch": 2.7777777777777777, "eval_sciq_pairs_loss": 0.09642098098993301, "eval_sciq_pairs_runtime": 3.6411, "eval_sciq_pairs_samples_per_second": 35.154, "eval_sciq_pairs_steps_per_second": 0.275, "step": 2700 }, { "epoch": 2.7777777777777777, "eval_qasc_pairs_loss": 0.14277957379817963, "eval_qasc_pairs_runtime": 0.6446, "eval_qasc_pairs_samples_per_second": 198.56, "eval_qasc_pairs_steps_per_second": 1.551, "step": 2700 }, { "epoch": 2.7777777777777777, "eval_openbookqa_pairs_loss": 0.7724245190620422, "eval_openbookqa_pairs_runtime": 0.6344, "eval_openbookqa_pairs_samples_per_second": 201.75, "eval_openbookqa_pairs_steps_per_second": 1.576, "step": 2700 }, { "epoch": 2.7777777777777777, "eval_msmarco_pairs_loss": 0.7664374709129333, "eval_msmarco_pairs_runtime": 1.5498, "eval_msmarco_pairs_samples_per_second": 82.593, "eval_msmarco_pairs_steps_per_second": 0.645, "step": 2700 }, { "epoch": 2.7777777777777777, "eval_nq_pairs_loss": 0.5656165480613708, "eval_nq_pairs_runtime": 2.9291, "eval_nq_pairs_samples_per_second": 43.699, "eval_nq_pairs_steps_per_second": 0.341, "step": 2700 }, { "epoch": 2.7777777777777777, "eval_trivia_pairs_loss": 0.7302601933479309, "eval_trivia_pairs_runtime": 3.4845, "eval_trivia_pairs_samples_per_second": 36.734, "eval_trivia_pairs_steps_per_second": 0.287, "step": 2700 }, { "epoch": 2.7777777777777777, "eval_gooaq_pairs_loss": 0.3398068845272064, "eval_gooaq_pairs_runtime": 0.9716, "eval_gooaq_pairs_samples_per_second": 131.747, "eval_gooaq_pairs_steps_per_second": 1.029, "step": 2700 }, { "epoch": 2.7777777777777777, "eval_paws-pos_loss": 0.02093500830233097, "eval_paws-pos_runtime": 0.7422, "eval_paws-pos_samples_per_second": 172.466, "eval_paws-pos_steps_per_second": 1.347, "step": 2700 }, { "epoch": 2.7777777777777777, "eval_global_dataset_loss": 0.4515623450279236, "eval_global_dataset_runtime": 13.4927, "eval_global_dataset_samples_per_second": 30.831, "eval_global_dataset_steps_per_second": 0.296, "step": 2700 }, { "epoch": 2.77880658436214, "grad_norm": 8.108509063720703, "learning_rate": 1.6162452931829648e-05, "loss": 0.3879, "step": 2701 }, { "epoch": 2.779835390946502, "grad_norm": 2.898448944091797, "learning_rate": 1.615061612120065e-05, "loss": 0.0628, "step": 2702 }, { "epoch": 2.7808641975308643, "grad_norm": 10.097766876220703, "learning_rate": 1.6138791205557824e-05, "loss": 0.3868, "step": 2703 }, { "epoch": 2.7818930041152266, "grad_norm": 14.745611190795898, "learning_rate": 1.6126978204483887e-05, "loss": 1.6837, "step": 2704 }, { "epoch": 2.7829218106995883, "grad_norm": 6.330893516540527, "learning_rate": 1.6115177137541845e-05, "loss": 0.2385, "step": 2705 }, { "epoch": 2.7839506172839505, "grad_norm": 6.994366645812988, "learning_rate": 1.610338802427493e-05, "loss": 0.4075, "step": 2706 }, { "epoch": 2.7849794238683128, "grad_norm": 5.6483259201049805, "learning_rate": 1.6091610884206575e-05, "loss": 0.1842, "step": 2707 }, { "epoch": 2.786008230452675, "grad_norm": 8.341057777404785, "learning_rate": 1.6079845736840396e-05, "loss": 0.2927, "step": 2708 }, { "epoch": 2.787037037037037, "grad_norm": 8.771723747253418, "learning_rate": 1.6068092601660145e-05, "loss": 0.3293, "step": 2709 }, { "epoch": 2.788065843621399, "grad_norm": 6.1769256591796875, "learning_rate": 1.6056351498129675e-05, "loss": 0.191, "step": 2710 }, { "epoch": 2.789094650205761, "grad_norm": 4.579040050506592, "learning_rate": 1.6044622445692917e-05, "loss": 0.1099, "step": 2711 }, { "epoch": 2.7901234567901234, "grad_norm": 5.16159200668335, "learning_rate": 1.6032905463773846e-05, "loss": 0.1346, "step": 2712 }, { "epoch": 2.7911522633744856, "grad_norm": 3.8335490226745605, "learning_rate": 1.602120057177645e-05, "loss": 0.0671, "step": 2713 }, { "epoch": 2.792181069958848, "grad_norm": 5.815287113189697, "learning_rate": 1.600950778908469e-05, "loss": 0.1514, "step": 2714 }, { "epoch": 2.7932098765432096, "grad_norm": 5.692095756530762, "learning_rate": 1.5997827135062475e-05, "loss": 0.1491, "step": 2715 }, { "epoch": 2.7942386831275723, "grad_norm": 3.4166159629821777, "learning_rate": 1.5986158629053638e-05, "loss": 0.0804, "step": 2716 }, { "epoch": 2.795267489711934, "grad_norm": 3.5850372314453125, "learning_rate": 1.5974502290381874e-05, "loss": 0.0634, "step": 2717 }, { "epoch": 2.7962962962962963, "grad_norm": 7.487916469573975, "learning_rate": 1.5962858138350744e-05, "loss": 0.3546, "step": 2718 }, { "epoch": 2.7973251028806585, "grad_norm": 3.306466817855835, "learning_rate": 1.595122619224362e-05, "loss": 0.0575, "step": 2719 }, { "epoch": 2.7983539094650207, "grad_norm": 9.476130485534668, "learning_rate": 1.593960647132367e-05, "loss": 0.4007, "step": 2720 }, { "epoch": 2.7983539094650207, "eval_Qnli-dev_cosine_accuracy": 0.708984375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7189085483551025, "eval_Qnli-dev_cosine_ap": 0.7516205361227193, "eval_Qnli-dev_cosine_f1": 0.70703125, "eval_Qnli-dev_cosine_f1_threshold": 0.7023769617080688, "eval_Qnli-dev_cosine_precision": 0.6557971014492754, "eval_Qnli-dev_cosine_recall": 0.7669491525423728, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 326.70703125, "eval_Qnli-dev_dot_ap": 0.6971201712167774, "eval_Qnli-dev_dot_f1": 0.6745762711864407, "eval_Qnli-dev_dot_f1_threshold": 277.0602111816406, "eval_Qnli-dev_dot_precision": 0.5621468926553672, "eval_Qnli-dev_dot_recall": 0.8432203389830508, "eval_Qnli-dev_euclidean_accuracy": 0.712890625, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.637823104858398, "eval_Qnli-dev_euclidean_ap": 0.7610873786880517, "eval_Qnli-dev_euclidean_f1": 0.7113594040968343, "eval_Qnli-dev_euclidean_f1_threshold": 16.481548309326172, "eval_Qnli-dev_euclidean_precision": 0.6345514950166113, "eval_Qnli-dev_euclidean_recall": 0.809322033898305, "eval_Qnli-dev_manhattan_accuracy": 0.712890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 328.9098205566406, "eval_Qnli-dev_manhattan_ap": 0.7625359213945974, "eval_Qnli-dev_manhattan_f1": 0.7093235831809872, "eval_Qnli-dev_manhattan_f1_threshold": 348.7232971191406, "eval_Qnli-dev_manhattan_precision": 0.6237942122186495, "eval_Qnli-dev_manhattan_recall": 0.8220338983050848, "eval_Qnli-dev_max_accuracy": 0.712890625, "eval_Qnli-dev_max_accuracy_threshold": 328.9098205566406, "eval_Qnli-dev_max_ap": 0.7625359213945974, "eval_Qnli-dev_max_f1": 0.7113594040968343, "eval_Qnli-dev_max_f1_threshold": 348.7232971191406, "eval_Qnli-dev_max_precision": 0.6557971014492754, "eval_Qnli-dev_max_recall": 0.8432203389830508, "eval_allNLI-dev_cosine_accuracy": 0.71484375, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8465907573699951, "eval_allNLI-dev_cosine_ap": 0.6049884885576247, "eval_allNLI-dev_cosine_f1": 0.6177777777777778, "eval_allNLI-dev_cosine_f1_threshold": 0.6971791386604309, "eval_allNLI-dev_cosine_precision": 0.5018050541516246, "eval_allNLI-dev_cosine_recall": 0.8034682080924855, "eval_allNLI-dev_dot_accuracy": 0.69140625, "eval_allNLI-dev_dot_accuracy_threshold": 385.4921875, "eval_allNLI-dev_dot_ap": 0.548330202309652, "eval_allNLI-dev_dot_f1": 0.592, "eval_allNLI-dev_dot_f1_threshold": 259.08551025390625, "eval_allNLI-dev_dot_precision": 0.4525993883792049, "eval_allNLI-dev_dot_recall": 0.8554913294797688, "eval_allNLI-dev_euclidean_accuracy": 0.71875, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.63183879852295, "eval_allNLI-dev_euclidean_ap": 0.6159870425233637, "eval_allNLI-dev_euclidean_f1": 0.6340425531914894, "eval_allNLI-dev_euclidean_f1_threshold": 16.068784713745117, "eval_allNLI-dev_euclidean_precision": 0.5016835016835017, "eval_allNLI-dev_euclidean_recall": 0.861271676300578, "eval_allNLI-dev_manhattan_accuracy": 0.71875, "eval_allNLI-dev_manhattan_accuracy_threshold": 276.88348388671875, "eval_allNLI-dev_manhattan_ap": 0.6170778409389708, "eval_allNLI-dev_manhattan_f1": 0.6341463414634145, "eval_allNLI-dev_manhattan_f1_threshold": 346.664794921875, "eval_allNLI-dev_manhattan_precision": 0.4890282131661442, "eval_allNLI-dev_manhattan_recall": 0.9017341040462428, "eval_allNLI-dev_max_accuracy": 0.71875, "eval_allNLI-dev_max_accuracy_threshold": 385.4921875, "eval_allNLI-dev_max_ap": 0.6170778409389708, "eval_allNLI-dev_max_f1": 0.6341463414634145, "eval_allNLI-dev_max_f1_threshold": 346.664794921875, "eval_allNLI-dev_max_precision": 0.5018050541516246, "eval_allNLI-dev_max_recall": 0.9017341040462428, "eval_sequential_score": 0.7625359213945974, "eval_sts-test_pearson_cosine": 0.8555026370500123, "eval_sts-test_pearson_dot": 0.8409608218632749, "eval_sts-test_pearson_euclidean": 0.8796599797207689, "eval_sts-test_pearson_manhattan": 0.8764989279214906, "eval_sts-test_pearson_max": 0.8796599797207689, "eval_sts-test_spearman_cosine": 0.881412744386693, "eval_sts-test_spearman_dot": 0.8379413690710875, "eval_sts-test_spearman_euclidean": 0.8766233301388011, "eval_sts-test_spearman_manhattan": 0.8747592610968039, "eval_sts-test_spearman_max": 0.881412744386693, "eval_vitaminc-pairs_loss": 3.038001298904419, "eval_vitaminc-pairs_runtime": 3.2757, "eval_vitaminc-pairs_samples_per_second": 39.075, "eval_vitaminc-pairs_steps_per_second": 0.305, "step": 2720 }, { "epoch": 2.7983539094650207, "eval_negation-triplets_loss": 0.9369493126869202, "eval_negation-triplets_runtime": 0.7782, "eval_negation-triplets_samples_per_second": 164.473, "eval_negation-triplets_steps_per_second": 1.285, "step": 2720 }, { "epoch": 2.7983539094650207, "eval_scitail-pairs-pos_loss": 0.14069011807441711, "eval_scitail-pairs-pos_runtime": 0.9623, "eval_scitail-pairs-pos_samples_per_second": 133.021, "eval_scitail-pairs-pos_steps_per_second": 1.039, "step": 2720 }, { "epoch": 2.7983539094650207, "eval_scitail-pairs-qa_loss": 0.0005728623946197331, "eval_scitail-pairs-qa_runtime": 0.6374, "eval_scitail-pairs-qa_samples_per_second": 200.82, "eval_scitail-pairs-qa_steps_per_second": 1.569, "step": 2720 }, { "epoch": 2.7983539094650207, "eval_xsum-pairs_loss": 0.2676815390586853, "eval_xsum-pairs_runtime": 3.0547, "eval_xsum-pairs_samples_per_second": 41.903, "eval_xsum-pairs_steps_per_second": 0.327, "step": 2720 }, { "epoch": 2.7983539094650207, "eval_sciq_pairs_loss": 0.09498130530118942, "eval_sciq_pairs_runtime": 3.6503, "eval_sciq_pairs_samples_per_second": 35.066, "eval_sciq_pairs_steps_per_second": 0.274, "step": 2720 }, { "epoch": 2.7983539094650207, "eval_qasc_pairs_loss": 0.15024949610233307, "eval_qasc_pairs_runtime": 0.6455, "eval_qasc_pairs_samples_per_second": 198.302, "eval_qasc_pairs_steps_per_second": 1.549, "step": 2720 }, { "epoch": 2.7983539094650207, "eval_openbookqa_pairs_loss": 0.7193230390548706, "eval_openbookqa_pairs_runtime": 0.6143, "eval_openbookqa_pairs_samples_per_second": 208.355, "eval_openbookqa_pairs_steps_per_second": 1.628, "step": 2720 }, { "epoch": 2.7983539094650207, "eval_msmarco_pairs_loss": 0.7443469166755676, "eval_msmarco_pairs_runtime": 1.5324, "eval_msmarco_pairs_samples_per_second": 83.53, "eval_msmarco_pairs_steps_per_second": 0.653, "step": 2720 }, { "epoch": 2.7983539094650207, "eval_nq_pairs_loss": 0.606505274772644, "eval_nq_pairs_runtime": 2.9157, "eval_nq_pairs_samples_per_second": 43.9, "eval_nq_pairs_steps_per_second": 0.343, "step": 2720 }, { "epoch": 2.7983539094650207, "eval_trivia_pairs_loss": 0.7206546068191528, "eval_trivia_pairs_runtime": 3.4659, "eval_trivia_pairs_samples_per_second": 36.931, "eval_trivia_pairs_steps_per_second": 0.289, "step": 2720 }, { "epoch": 2.7983539094650207, "eval_gooaq_pairs_loss": 0.29458290338516235, "eval_gooaq_pairs_runtime": 0.9668, "eval_gooaq_pairs_samples_per_second": 132.4, "eval_gooaq_pairs_steps_per_second": 1.034, "step": 2720 }, { "epoch": 2.7983539094650207, "eval_paws-pos_loss": 0.02097736857831478, "eval_paws-pos_runtime": 0.7305, "eval_paws-pos_samples_per_second": 175.226, "eval_paws-pos_steps_per_second": 1.369, "step": 2720 }, { "epoch": 2.7983539094650207, "eval_global_dataset_loss": 0.4219074249267578, "eval_global_dataset_runtime": 13.4448, "eval_global_dataset_samples_per_second": 30.941, "eval_global_dataset_steps_per_second": 0.298, "step": 2720 }, { "epoch": 2.799382716049383, "grad_norm": 6.275634765625, "learning_rate": 1.59279989948338e-05, "loss": 0.3801, "step": 2721 }, { "epoch": 2.8004115226337447, "grad_norm": 7.3199052810668945, "learning_rate": 1.5916403781996657e-05, "loss": 0.2157, "step": 2722 }, { "epoch": 2.801440329218107, "grad_norm": 2.804945945739746, "learning_rate": 1.590482085201456e-05, "loss": 0.0392, "step": 2723 }, { "epoch": 2.802469135802469, "grad_norm": 8.218881607055664, "learning_rate": 1.5893250224069504e-05, "loss": 0.2946, "step": 2724 }, { "epoch": 2.8034979423868314, "grad_norm": 5.83034086227417, "learning_rate": 1.58816919173231e-05, "loss": 0.2639, "step": 2725 }, { "epoch": 2.8045267489711936, "grad_norm": 0.6256617307662964, "learning_rate": 1.587014595091656e-05, "loss": 0.0076, "step": 2726 }, { "epoch": 2.8055555555555554, "grad_norm": 3.435020685195923, "learning_rate": 1.5858612343970658e-05, "loss": 0.0757, "step": 2727 }, { "epoch": 2.8065843621399176, "grad_norm": 2.691457509994507, "learning_rate": 1.5847091115585697e-05, "loss": 0.0314, "step": 2728 }, { "epoch": 2.80761316872428, "grad_norm": 8.473223686218262, "learning_rate": 1.5835582284841485e-05, "loss": 0.3507, "step": 2729 }, { "epoch": 2.808641975308642, "grad_norm": 7.120917320251465, "learning_rate": 1.5824085870797302e-05, "loss": 0.3587, "step": 2730 }, { "epoch": 2.8096707818930042, "grad_norm": 4.546665191650391, "learning_rate": 1.5812601892491855e-05, "loss": 0.1349, "step": 2731 }, { "epoch": 2.810699588477366, "grad_norm": 5.792424201965332, "learning_rate": 1.5801130368943254e-05, "loss": 0.2941, "step": 2732 }, { "epoch": 2.8117283950617287, "grad_norm": 2.48039174079895, "learning_rate": 1.5789671319149004e-05, "loss": 0.0324, "step": 2733 }, { "epoch": 2.8127572016460904, "grad_norm": 3.857983350753784, "learning_rate": 1.5778224762085934e-05, "loss": 0.0668, "step": 2734 }, { "epoch": 2.8137860082304527, "grad_norm": 5.270884037017822, "learning_rate": 1.5766790716710195e-05, "loss": 0.2135, "step": 2735 }, { "epoch": 2.814814814814815, "grad_norm": 0.6275924444198608, "learning_rate": 1.5755369201957202e-05, "loss": 0.0085, "step": 2736 }, { "epoch": 2.815843621399177, "grad_norm": 6.545689582824707, "learning_rate": 1.5743960236741647e-05, "loss": 0.1285, "step": 2737 }, { "epoch": 2.8168724279835393, "grad_norm": 4.781361103057861, "learning_rate": 1.5732563839957408e-05, "loss": 0.1586, "step": 2738 }, { "epoch": 2.817901234567901, "grad_norm": 0.475481241941452, "learning_rate": 1.572118003047757e-05, "loss": 0.0061, "step": 2739 }, { "epoch": 2.8189300411522633, "grad_norm": 0.20973369479179382, "learning_rate": 1.5709808827154356e-05, "loss": 0.0024, "step": 2740 }, { "epoch": 2.8189300411522633, "eval_Qnli-dev_cosine_accuracy": 0.70703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7085399031639099, "eval_Qnli-dev_cosine_ap": 0.7453523982548529, "eval_Qnli-dev_cosine_f1": 0.704331450094162, "eval_Qnli-dev_cosine_f1_threshold": 0.6817607879638672, "eval_Qnli-dev_cosine_precision": 0.6338983050847458, "eval_Qnli-dev_cosine_recall": 0.7923728813559322, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 327.62469482421875, "eval_Qnli-dev_dot_ap": 0.6965056697956273, "eval_Qnli-dev_dot_f1": 0.6770833333333334, "eval_Qnli-dev_dot_f1_threshold": 275.64495849609375, "eval_Qnli-dev_dot_precision": 0.5735294117647058, "eval_Qnli-dev_dot_recall": 0.826271186440678, "eval_Qnli-dev_euclidean_accuracy": 0.70703125, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.68637466430664, "eval_Qnli-dev_euclidean_ap": 0.7540728524000946, "eval_Qnli-dev_euclidean_f1": 0.7129798903107861, "eval_Qnli-dev_euclidean_f1_threshold": 16.86321258544922, "eval_Qnli-dev_euclidean_precision": 0.6270096463022508, "eval_Qnli-dev_euclidean_recall": 0.826271186440678, "eval_Qnli-dev_manhattan_accuracy": 0.7109375, "eval_Qnli-dev_manhattan_accuracy_threshold": 317.72479248046875, "eval_Qnli-dev_manhattan_ap": 0.756280736914761, "eval_Qnli-dev_manhattan_f1": 0.709433962264151, "eval_Qnli-dev_manhattan_f1_threshold": 349.44464111328125, "eval_Qnli-dev_manhattan_precision": 0.6394557823129252, "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, "eval_Qnli-dev_max_accuracy": 0.7109375, "eval_Qnli-dev_max_accuracy_threshold": 327.62469482421875, "eval_Qnli-dev_max_ap": 0.756280736914761, "eval_Qnli-dev_max_f1": 0.7129798903107861, "eval_Qnli-dev_max_f1_threshold": 349.44464111328125, "eval_Qnli-dev_max_precision": 0.6394557823129252, "eval_Qnli-dev_max_recall": 0.826271186440678, "eval_allNLI-dev_cosine_accuracy": 0.71875, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8615297079086304, "eval_allNLI-dev_cosine_ap": 0.616330916698455, "eval_allNLI-dev_cosine_f1": 0.6262626262626262, "eval_allNLI-dev_cosine_f1_threshold": 0.7314289808273315, "eval_allNLI-dev_cosine_precision": 0.5560538116591929, "eval_allNLI-dev_cosine_recall": 0.7167630057803468, "eval_allNLI-dev_dot_accuracy": 0.6953125, "eval_allNLI-dev_dot_accuracy_threshold": 322.00518798828125, "eval_allNLI-dev_dot_ap": 0.5637340192149372, "eval_allNLI-dev_dot_f1": 0.5924276169265033, "eval_allNLI-dev_dot_f1_threshold": 276.2833251953125, "eval_allNLI-dev_dot_precision": 0.48188405797101447, "eval_allNLI-dev_dot_recall": 0.7687861271676301, "eval_allNLI-dev_euclidean_accuracy": 0.724609375, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.3758544921875, "eval_allNLI-dev_euclidean_ap": 0.626603754735309, "eval_allNLI-dev_euclidean_f1": 0.6378132118451024, "eval_allNLI-dev_euclidean_f1_threshold": 15.599414825439453, "eval_allNLI-dev_euclidean_precision": 0.5263157894736842, "eval_allNLI-dev_euclidean_recall": 0.8092485549132948, "eval_allNLI-dev_manhattan_accuracy": 0.7265625, "eval_allNLI-dev_manhattan_accuracy_threshold": 294.27972412109375, "eval_allNLI-dev_manhattan_ap": 0.6242142361109848, "eval_allNLI-dev_manhattan_f1": 0.6391304347826088, "eval_allNLI-dev_manhattan_f1_threshold": 337.76708984375, "eval_allNLI-dev_manhattan_precision": 0.5121951219512195, "eval_allNLI-dev_manhattan_recall": 0.8497109826589595, "eval_allNLI-dev_max_accuracy": 0.7265625, "eval_allNLI-dev_max_accuracy_threshold": 322.00518798828125, "eval_allNLI-dev_max_ap": 0.626603754735309, "eval_allNLI-dev_max_f1": 0.6391304347826088, "eval_allNLI-dev_max_f1_threshold": 337.76708984375, "eval_allNLI-dev_max_precision": 0.5560538116591929, "eval_allNLI-dev_max_recall": 0.8497109826589595, "eval_sequential_score": 0.756280736914761, "eval_sts-test_pearson_cosine": 0.8535671019846407, "eval_sts-test_pearson_dot": 0.8409586612706103, "eval_sts-test_pearson_euclidean": 0.8803702271468186, "eval_sts-test_pearson_manhattan": 0.8778816502186445, "eval_sts-test_pearson_max": 0.8803702271468186, "eval_sts-test_spearman_cosine": 0.88250521217978, "eval_sts-test_spearman_dot": 0.8377973147606687, "eval_sts-test_spearman_euclidean": 0.8778226191928293, "eval_sts-test_spearman_manhattan": 0.8764324212577054, "eval_sts-test_spearman_max": 0.88250521217978, "eval_vitaminc-pairs_loss": 3.053738594055176, "eval_vitaminc-pairs_runtime": 3.2317, "eval_vitaminc-pairs_samples_per_second": 39.608, "eval_vitaminc-pairs_steps_per_second": 0.309, "step": 2740 }, { "epoch": 2.8189300411522633, "eval_negation-triplets_loss": 0.9466304183006287, "eval_negation-triplets_runtime": 0.7692, "eval_negation-triplets_samples_per_second": 166.415, "eval_negation-triplets_steps_per_second": 1.3, "step": 2740 }, { "epoch": 2.8189300411522633, "eval_scitail-pairs-pos_loss": 0.143385648727417, "eval_scitail-pairs-pos_runtime": 0.9519, "eval_scitail-pairs-pos_samples_per_second": 134.463, "eval_scitail-pairs-pos_steps_per_second": 1.05, "step": 2740 }, { "epoch": 2.8189300411522633, "eval_scitail-pairs-qa_loss": 0.0005614217952825129, "eval_scitail-pairs-qa_runtime": 0.628, "eval_scitail-pairs-qa_samples_per_second": 203.827, "eval_scitail-pairs-qa_steps_per_second": 1.592, "step": 2740 }, { "epoch": 2.8189300411522633, "eval_xsum-pairs_loss": 0.2813880145549774, "eval_xsum-pairs_runtime": 3.0351, "eval_xsum-pairs_samples_per_second": 42.174, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2740 }, { "epoch": 2.8189300411522633, "eval_sciq_pairs_loss": 0.09257730096578598, "eval_sciq_pairs_runtime": 3.5536, "eval_sciq_pairs_samples_per_second": 36.02, "eval_sciq_pairs_steps_per_second": 0.281, "step": 2740 }, { "epoch": 2.8189300411522633, "eval_qasc_pairs_loss": 0.15334747731685638, "eval_qasc_pairs_runtime": 0.6299, "eval_qasc_pairs_samples_per_second": 203.198, "eval_qasc_pairs_steps_per_second": 1.587, "step": 2740 }, { "epoch": 2.8189300411522633, "eval_openbookqa_pairs_loss": 0.7420552968978882, "eval_openbookqa_pairs_runtime": 0.6065, "eval_openbookqa_pairs_samples_per_second": 211.053, "eval_openbookqa_pairs_steps_per_second": 1.649, "step": 2740 }, { "epoch": 2.8189300411522633, "eval_msmarco_pairs_loss": 0.8207499980926514, "eval_msmarco_pairs_runtime": 1.533, "eval_msmarco_pairs_samples_per_second": 83.498, "eval_msmarco_pairs_steps_per_second": 0.652, "step": 2740 }, { "epoch": 2.8189300411522633, "eval_nq_pairs_loss": 0.6011037230491638, "eval_nq_pairs_runtime": 2.9213, "eval_nq_pairs_samples_per_second": 43.817, "eval_nq_pairs_steps_per_second": 0.342, "step": 2740 }, { "epoch": 2.8189300411522633, "eval_trivia_pairs_loss": 0.7101057767868042, "eval_trivia_pairs_runtime": 3.4485, "eval_trivia_pairs_samples_per_second": 37.118, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2740 }, { "epoch": 2.8189300411522633, "eval_gooaq_pairs_loss": 0.2900901138782501, "eval_gooaq_pairs_runtime": 0.9601, "eval_gooaq_pairs_samples_per_second": 133.315, "eval_gooaq_pairs_steps_per_second": 1.042, "step": 2740 }, { "epoch": 2.8189300411522633, "eval_paws-pos_loss": 0.02033262513577938, "eval_paws-pos_runtime": 0.7158, "eval_paws-pos_samples_per_second": 178.819, "eval_paws-pos_steps_per_second": 1.397, "step": 2740 }, { "epoch": 2.8189300411522633, "eval_global_dataset_loss": 0.4326111674308777, "eval_global_dataset_runtime": 13.4564, "eval_global_dataset_samples_per_second": 30.915, "eval_global_dataset_steps_per_second": 0.297, "step": 2740 }, { "epoch": 2.8199588477366255, "grad_norm": 11.34538745880127, "learning_rate": 1.5698450248819136e-05, "loss": 0.5073, "step": 2741 }, { "epoch": 2.8209876543209877, "grad_norm": 0.17202626168727875, "learning_rate": 1.5687104314282355e-05, "loss": 0.0027, "step": 2742 }, { "epoch": 2.82201646090535, "grad_norm": 4.1724748611450195, "learning_rate": 1.567577104233351e-05, "loss": 0.0433, "step": 2743 }, { "epoch": 2.8230452674897117, "grad_norm": 3.6806344985961914, "learning_rate": 1.5664450451741155e-05, "loss": 0.0523, "step": 2744 }, { "epoch": 2.824074074074074, "grad_norm": 6.95517635345459, "learning_rate": 1.5653142561252822e-05, "loss": 0.2392, "step": 2745 }, { "epoch": 2.825102880658436, "grad_norm": 2.2580952644348145, "learning_rate": 1.564184738959502e-05, "loss": 0.0291, "step": 2746 }, { "epoch": 2.8261316872427984, "grad_norm": 1.407421588897705, "learning_rate": 1.5630564955473192e-05, "loss": 0.0155, "step": 2747 }, { "epoch": 2.8271604938271606, "grad_norm": 6.2319488525390625, "learning_rate": 1.5619295277571685e-05, "loss": 0.2612, "step": 2748 }, { "epoch": 2.8281893004115224, "grad_norm": 1.0354793071746826, "learning_rate": 1.5608038374553728e-05, "loss": 0.0054, "step": 2749 }, { "epoch": 2.8292181069958846, "grad_norm": 5.098178863525391, "learning_rate": 1.559679426506139e-05, "loss": 0.1838, "step": 2750 }, { "epoch": 2.830246913580247, "grad_norm": 5.5478129386901855, "learning_rate": 1.5585562967715547e-05, "loss": 0.1766, "step": 2751 }, { "epoch": 2.831275720164609, "grad_norm": 9.083112716674805, "learning_rate": 1.557434450111586e-05, "loss": 0.3047, "step": 2752 }, { "epoch": 2.8323045267489713, "grad_norm": 15.356025695800781, "learning_rate": 1.5563138883840755e-05, "loss": 1.7843, "step": 2753 }, { "epoch": 2.8333333333333335, "grad_norm": 3.988752841949463, "learning_rate": 1.5551946134447366e-05, "loss": 0.0582, "step": 2754 }, { "epoch": 2.8343621399176957, "grad_norm": 4.775399684906006, "learning_rate": 1.554076627147151e-05, "loss": 0.1396, "step": 2755 }, { "epoch": 2.8353909465020575, "grad_norm": 7.570713520050049, "learning_rate": 1.5529599313427685e-05, "loss": 0.2896, "step": 2756 }, { "epoch": 2.8364197530864197, "grad_norm": 5.494324207305908, "learning_rate": 1.5518445278808992e-05, "loss": 0.1183, "step": 2757 }, { "epoch": 2.837448559670782, "grad_norm": 4.127956390380859, "learning_rate": 1.5507304186087163e-05, "loss": 0.056, "step": 2758 }, { "epoch": 2.838477366255144, "grad_norm": 2.8361735343933105, "learning_rate": 1.549617605371246e-05, "loss": 0.0327, "step": 2759 }, { "epoch": 2.8395061728395063, "grad_norm": 11.014533996582031, "learning_rate": 1.5485060900113706e-05, "loss": 0.3523, "step": 2760 }, { "epoch": 2.8395061728395063, "eval_Qnli-dev_cosine_accuracy": 0.708984375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7046625018119812, "eval_Qnli-dev_cosine_ap": 0.7507042570127316, "eval_Qnli-dev_cosine_f1": 0.7090558766859345, "eval_Qnli-dev_cosine_f1_threshold": 0.6831667423248291, "eval_Qnli-dev_cosine_precision": 0.6501766784452296, "eval_Qnli-dev_cosine_recall": 0.7796610169491526, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 324.8873291015625, "eval_Qnli-dev_dot_ap": 0.7063006801430822, "eval_Qnli-dev_dot_f1": 0.6758147512864494, "eval_Qnli-dev_dot_f1_threshold": 266.49249267578125, "eval_Qnli-dev_dot_precision": 0.5677233429394812, "eval_Qnli-dev_dot_recall": 0.8347457627118644, "eval_Qnli-dev_euclidean_accuracy": 0.708984375, "eval_Qnli-dev_euclidean_accuracy_threshold": 16.19821548461914, "eval_Qnli-dev_euclidean_ap": 0.759952293297675, "eval_Qnli-dev_euclidean_f1": 0.7101449275362318, "eval_Qnli-dev_euclidean_f1_threshold": 17.107688903808594, "eval_Qnli-dev_euclidean_precision": 0.620253164556962, "eval_Qnli-dev_euclidean_recall": 0.8305084745762712, "eval_Qnli-dev_manhattan_accuracy": 0.71484375, "eval_Qnli-dev_manhattan_accuracy_threshold": 324.5292663574219, "eval_Qnli-dev_manhattan_ap": 0.7611700653136995, "eval_Qnli-dev_manhattan_f1": 0.711864406779661, "eval_Qnli-dev_manhattan_f1_threshold": 352.1539611816406, "eval_Qnli-dev_manhattan_precision": 0.6406779661016949, "eval_Qnli-dev_manhattan_recall": 0.8008474576271186, "eval_Qnli-dev_max_accuracy": 0.71484375, "eval_Qnli-dev_max_accuracy_threshold": 324.8873291015625, "eval_Qnli-dev_max_ap": 0.7611700653136995, "eval_Qnli-dev_max_f1": 0.711864406779661, "eval_Qnli-dev_max_f1_threshold": 352.1539611816406, "eval_Qnli-dev_max_precision": 0.6501766784452296, "eval_Qnli-dev_max_recall": 0.8347457627118644, "eval_allNLI-dev_cosine_accuracy": 0.72265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.7721467614173889, "eval_allNLI-dev_cosine_ap": 0.6228748282437173, "eval_allNLI-dev_cosine_f1": 0.6247086247086246, "eval_allNLI-dev_cosine_f1_threshold": 0.6959929466247559, "eval_allNLI-dev_cosine_precision": 0.5234375, "eval_allNLI-dev_cosine_recall": 0.7745664739884393, "eval_allNLI-dev_dot_accuracy": 0.693359375, "eval_allNLI-dev_dot_accuracy_threshold": 358.69061279296875, "eval_allNLI-dev_dot_ap": 0.5675046751587932, "eval_allNLI-dev_dot_f1": 0.5958254269449715, "eval_allNLI-dev_dot_f1_threshold": 238.50958251953125, "eval_allNLI-dev_dot_precision": 0.4435028248587571, "eval_allNLI-dev_dot_recall": 0.9075144508670521, "eval_allNLI-dev_euclidean_accuracy": 0.732421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.964503288269043, "eval_allNLI-dev_euclidean_ap": 0.6348006449228631, "eval_allNLI-dev_euclidean_f1": 0.6451612903225807, "eval_allNLI-dev_euclidean_f1_threshold": 16.178855895996094, "eval_allNLI-dev_euclidean_precision": 0.5136986301369864, "eval_allNLI-dev_euclidean_recall": 0.8670520231213873, "eval_allNLI-dev_manhattan_accuracy": 0.734375, "eval_allNLI-dev_manhattan_accuracy_threshold": 283.2148132324219, "eval_allNLI-dev_manhattan_ap": 0.6326260484349147, "eval_allNLI-dev_manhattan_f1": 0.6423982869379016, "eval_allNLI-dev_manhattan_f1_threshold": 343.67523193359375, "eval_allNLI-dev_manhattan_precision": 0.5102040816326531, "eval_allNLI-dev_manhattan_recall": 0.8670520231213873, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 358.69061279296875, "eval_allNLI-dev_max_ap": 0.6348006449228631, "eval_allNLI-dev_max_f1": 0.6451612903225807, "eval_allNLI-dev_max_f1_threshold": 343.67523193359375, "eval_allNLI-dev_max_precision": 0.5234375, "eval_allNLI-dev_max_recall": 0.9075144508670521, "eval_sequential_score": 0.7611700653136995, "eval_sts-test_pearson_cosine": 0.8533118819421557, "eval_sts-test_pearson_dot": 0.8429773657595403, "eval_sts-test_pearson_euclidean": 0.8794638615021286, "eval_sts-test_pearson_manhattan": 0.876522154347345, "eval_sts-test_pearson_max": 0.8794638615021286, "eval_sts-test_spearman_cosine": 0.8805929460292639, "eval_sts-test_spearman_dot": 0.8398663445798384, "eval_sts-test_spearman_euclidean": 0.8762706006965142, "eval_sts-test_spearman_manhattan": 0.8738430685223377, "eval_sts-test_spearman_max": 0.8805929460292639, "eval_vitaminc-pairs_loss": 3.01678466796875, "eval_vitaminc-pairs_runtime": 3.2433, "eval_vitaminc-pairs_samples_per_second": 39.466, "eval_vitaminc-pairs_steps_per_second": 0.308, "step": 2760 }, { "epoch": 2.8395061728395063, "eval_negation-triplets_loss": 0.9780347943305969, "eval_negation-triplets_runtime": 0.7693, "eval_negation-triplets_samples_per_second": 166.392, "eval_negation-triplets_steps_per_second": 1.3, "step": 2760 }, { "epoch": 2.8395061728395063, "eval_scitail-pairs-pos_loss": 0.14913403987884521, "eval_scitail-pairs-pos_runtime": 0.9513, "eval_scitail-pairs-pos_samples_per_second": 134.559, "eval_scitail-pairs-pos_steps_per_second": 1.051, "step": 2760 }, { "epoch": 2.8395061728395063, "eval_scitail-pairs-qa_loss": 0.0005038591916672885, "eval_scitail-pairs-qa_runtime": 0.619, "eval_scitail-pairs-qa_samples_per_second": 206.786, "eval_scitail-pairs-qa_steps_per_second": 1.616, "step": 2760 }, { "epoch": 2.8395061728395063, "eval_xsum-pairs_loss": 0.22989708185195923, "eval_xsum-pairs_runtime": 3.0336, "eval_xsum-pairs_samples_per_second": 42.194, "eval_xsum-pairs_steps_per_second": 0.33, "step": 2760 }, { "epoch": 2.8395061728395063, "eval_sciq_pairs_loss": 0.09686174988746643, "eval_sciq_pairs_runtime": 3.5918, "eval_sciq_pairs_samples_per_second": 35.637, "eval_sciq_pairs_steps_per_second": 0.278, "step": 2760 }, { "epoch": 2.8395061728395063, "eval_qasc_pairs_loss": 0.15604230761528015, "eval_qasc_pairs_runtime": 0.6392, "eval_qasc_pairs_samples_per_second": 200.235, "eval_qasc_pairs_steps_per_second": 1.564, "step": 2760 }, { "epoch": 2.8395061728395063, "eval_openbookqa_pairs_loss": 0.723438560962677, "eval_openbookqa_pairs_runtime": 0.6212, "eval_openbookqa_pairs_samples_per_second": 206.061, "eval_openbookqa_pairs_steps_per_second": 1.61, "step": 2760 }, { "epoch": 2.8395061728395063, "eval_msmarco_pairs_loss": 0.8274163603782654, "eval_msmarco_pairs_runtime": 1.5401, "eval_msmarco_pairs_samples_per_second": 83.113, "eval_msmarco_pairs_steps_per_second": 0.649, "step": 2760 }, { "epoch": 2.8395061728395063, "eval_nq_pairs_loss": 0.5706248879432678, "eval_nq_pairs_runtime": 2.9281, "eval_nq_pairs_samples_per_second": 43.714, "eval_nq_pairs_steps_per_second": 0.342, "step": 2760 }, { "epoch": 2.8395061728395063, "eval_trivia_pairs_loss": 0.7182236909866333, "eval_trivia_pairs_runtime": 3.4746, "eval_trivia_pairs_samples_per_second": 36.839, "eval_trivia_pairs_steps_per_second": 0.288, "step": 2760 }, { "epoch": 2.8395061728395063, "eval_gooaq_pairs_loss": 0.3059069514274597, "eval_gooaq_pairs_runtime": 0.9722, "eval_gooaq_pairs_samples_per_second": 131.663, "eval_gooaq_pairs_steps_per_second": 1.029, "step": 2760 }, { "epoch": 2.8395061728395063, "eval_paws-pos_loss": 0.020632443949580193, "eval_paws-pos_runtime": 0.728, "eval_paws-pos_samples_per_second": 175.822, "eval_paws-pos_steps_per_second": 1.374, "step": 2760 }, { "epoch": 2.8395061728395063, "eval_global_dataset_loss": 0.4452262222766876, "eval_global_dataset_runtime": 13.5093, "eval_global_dataset_samples_per_second": 30.794, "eval_global_dataset_steps_per_second": 0.296, "step": 2760 }, { "epoch": 2.840534979423868, "grad_norm": 8.413227081298828, "learning_rate": 1.547395874369822e-05, "loss": 0.3041, "step": 2761 }, { "epoch": 2.8415637860082303, "grad_norm": 5.359739780426025, "learning_rate": 1.5462869602851813e-05, "loss": 0.1672, "step": 2762 }, { "epoch": 2.8425925925925926, "grad_norm": 6.931215286254883, "learning_rate": 1.5451793495938723e-05, "loss": 0.224, "step": 2763 }, { "epoch": 2.843621399176955, "grad_norm": 5.827603340148926, "learning_rate": 1.544073044130161e-05, "loss": 0.1863, "step": 2764 }, { "epoch": 2.844650205761317, "grad_norm": 5.684159278869629, "learning_rate": 1.542968045726151e-05, "loss": 0.2144, "step": 2765 }, { "epoch": 2.8456790123456788, "grad_norm": 7.933666706085205, "learning_rate": 1.541864356211784e-05, "loss": 0.2654, "step": 2766 }, { "epoch": 2.846707818930041, "grad_norm": 0.039714861661195755, "learning_rate": 1.5407619774148303e-05, "loss": 0.0005, "step": 2767 }, { "epoch": 2.847736625514403, "grad_norm": 4.054157257080078, "learning_rate": 1.5396609111608933e-05, "loss": 0.0922, "step": 2768 }, { "epoch": 2.8487654320987654, "grad_norm": 0.10455355048179626, "learning_rate": 1.5385611592733996e-05, "loss": 0.0016, "step": 2769 }, { "epoch": 2.8497942386831276, "grad_norm": 4.049312114715576, "learning_rate": 1.537462723573602e-05, "loss": 0.2332, "step": 2770 }, { "epoch": 2.85082304526749, "grad_norm": 0.2220650017261505, "learning_rate": 1.536365605880571e-05, "loss": 0.0021, "step": 2771 }, { "epoch": 2.851851851851852, "grad_norm": 4.803959369659424, "learning_rate": 1.5352698080111964e-05, "loss": 0.1657, "step": 2772 }, { "epoch": 2.852880658436214, "grad_norm": 3.475996971130371, "learning_rate": 1.5341753317801806e-05, "loss": 0.0445, "step": 2773 }, { "epoch": 2.853909465020576, "grad_norm": 3.820665121078491, "learning_rate": 1.533082179000039e-05, "loss": 0.0926, "step": 2774 }, { "epoch": 2.8549382716049383, "grad_norm": 1.5347743034362793, "learning_rate": 1.531990351481094e-05, "loss": 0.0146, "step": 2775 }, { "epoch": 2.8559670781893005, "grad_norm": 2.5668106079101562, "learning_rate": 1.5308998510314735e-05, "loss": 0.0707, "step": 2776 }, { "epoch": 2.8569958847736627, "grad_norm": 3.510948419570923, "learning_rate": 1.529810679457109e-05, "loss": 0.051, "step": 2777 }, { "epoch": 2.8580246913580245, "grad_norm": 10.56995964050293, "learning_rate": 1.5287228385617287e-05, "loss": 0.3408, "step": 2778 }, { "epoch": 2.8590534979423867, "grad_norm": 0.16016097366809845, "learning_rate": 1.52763633014686e-05, "loss": 0.0026, "step": 2779 }, { "epoch": 2.860082304526749, "grad_norm": 8.470218658447266, "learning_rate": 1.5265511560118224e-05, "loss": 0.2904, "step": 2780 }, { "epoch": 2.860082304526749, "eval_Qnli-dev_cosine_accuracy": 0.70703125, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7249701619148254, "eval_Qnli-dev_cosine_ap": 0.753005216772905, "eval_Qnli-dev_cosine_f1": 0.7087198515769945, "eval_Qnli-dev_cosine_f1_threshold": 0.6696175336837769, "eval_Qnli-dev_cosine_precision": 0.6303630363036303, "eval_Qnli-dev_cosine_recall": 0.809322033898305, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 318.8723449707031, "eval_Qnli-dev_dot_ap": 0.7062885357695161, "eval_Qnli-dev_dot_f1": 0.6732026143790849, "eval_Qnli-dev_dot_f1_threshold": 256.91363525390625, "eval_Qnli-dev_dot_precision": 0.5478723404255319, "eval_Qnli-dev_dot_recall": 0.8728813559322034, "eval_Qnli-dev_euclidean_accuracy": 0.7109375, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.319984436035156, "eval_Qnli-dev_euclidean_ap": 0.7624737004717766, "eval_Qnli-dev_euclidean_f1": 0.7153846153846154, "eval_Qnli-dev_euclidean_f1_threshold": 16.373828887939453, "eval_Qnli-dev_euclidean_precision": 0.6549295774647887, "eval_Qnli-dev_euclidean_recall": 0.788135593220339, "eval_Qnli-dev_manhattan_accuracy": 0.71484375, "eval_Qnli-dev_manhattan_accuracy_threshold": 344.95257568359375, "eval_Qnli-dev_manhattan_ap": 0.7653891447634631, "eval_Qnli-dev_manhattan_f1": 0.7210626185958254, "eval_Qnli-dev_manhattan_f1_threshold": 346.61578369140625, "eval_Qnli-dev_manhattan_precision": 0.6529209621993127, "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, "eval_Qnli-dev_max_accuracy": 0.71484375, "eval_Qnli-dev_max_accuracy_threshold": 344.95257568359375, "eval_Qnli-dev_max_ap": 0.7653891447634631, "eval_Qnli-dev_max_f1": 0.7210626185958254, "eval_Qnli-dev_max_f1_threshold": 346.61578369140625, "eval_Qnli-dev_max_precision": 0.6549295774647887, "eval_Qnli-dev_max_recall": 0.8728813559322034, "eval_allNLI-dev_cosine_accuracy": 0.7265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8414279222488403, "eval_allNLI-dev_cosine_ap": 0.6246765146154852, "eval_allNLI-dev_cosine_f1": 0.625531914893617, "eval_allNLI-dev_cosine_f1_threshold": 0.6659203767776489, "eval_allNLI-dev_cosine_precision": 0.494949494949495, "eval_allNLI-dev_cosine_recall": 0.8497109826589595, "eval_allNLI-dev_dot_accuracy": 0.69921875, "eval_allNLI-dev_dot_accuracy_threshold": 348.42572021484375, "eval_allNLI-dev_dot_ap": 0.5653530792930468, "eval_allNLI-dev_dot_f1": 0.5908096280087528, "eval_allNLI-dev_dot_f1_threshold": 258.6340026855469, "eval_allNLI-dev_dot_precision": 0.4753521126760563, "eval_allNLI-dev_dot_recall": 0.7803468208092486, "eval_allNLI-dev_euclidean_accuracy": 0.73046875, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.908769607543945, "eval_allNLI-dev_euclidean_ap": 0.6342216357516873, "eval_allNLI-dev_euclidean_f1": 0.6423982869379016, "eval_allNLI-dev_euclidean_f1_threshold": 16.078041076660156, "eval_allNLI-dev_euclidean_precision": 0.5102040816326531, "eval_allNLI-dev_euclidean_recall": 0.8670520231213873, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 279.65191650390625, "eval_allNLI-dev_manhattan_ap": 0.6316056597363421, "eval_allNLI-dev_manhattan_f1": 0.6431718061674009, "eval_allNLI-dev_manhattan_f1_threshold": 335.85015869140625, "eval_allNLI-dev_manhattan_precision": 0.5195729537366548, "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, "eval_allNLI-dev_max_accuracy": 0.732421875, "eval_allNLI-dev_max_accuracy_threshold": 348.42572021484375, "eval_allNLI-dev_max_ap": 0.6342216357516873, "eval_allNLI-dev_max_f1": 0.6431718061674009, "eval_allNLI-dev_max_f1_threshold": 335.85015869140625, "eval_allNLI-dev_max_precision": 0.5195729537366548, "eval_allNLI-dev_max_recall": 0.8670520231213873, "eval_sequential_score": 0.7653891447634631, "eval_sts-test_pearson_cosine": 0.8535313159853823, "eval_sts-test_pearson_dot": 0.8424756159060989, "eval_sts-test_pearson_euclidean": 0.8804248671851974, "eval_sts-test_pearson_manhattan": 0.8776988910774568, "eval_sts-test_pearson_max": 0.8804248671851974, "eval_sts-test_spearman_cosine": 0.8813343401767353, "eval_sts-test_spearman_dot": 0.8386432478547502, "eval_sts-test_spearman_euclidean": 0.8770134716356113, "eval_sts-test_spearman_manhattan": 0.874476075114719, "eval_sts-test_spearman_max": 0.8813343401767353, "eval_vitaminc-pairs_loss": 2.947045087814331, "eval_vitaminc-pairs_runtime": 3.3131, "eval_vitaminc-pairs_samples_per_second": 38.634, "eval_vitaminc-pairs_steps_per_second": 0.302, "step": 2780 }, { "epoch": 2.860082304526749, "eval_negation-triplets_loss": 0.9763628840446472, "eval_negation-triplets_runtime": 0.8063, "eval_negation-triplets_samples_per_second": 158.746, "eval_negation-triplets_steps_per_second": 1.24, "step": 2780 }, { "epoch": 2.860082304526749, "eval_scitail-pairs-pos_loss": 0.1335550993680954, "eval_scitail-pairs-pos_runtime": 1.034, "eval_scitail-pairs-pos_samples_per_second": 123.786, "eval_scitail-pairs-pos_steps_per_second": 0.967, "step": 2780 }, { "epoch": 2.860082304526749, "eval_scitail-pairs-qa_loss": 0.00048267480451613665, "eval_scitail-pairs-qa_runtime": 0.658, "eval_scitail-pairs-qa_samples_per_second": 194.533, "eval_scitail-pairs-qa_steps_per_second": 1.52, "step": 2780 }, { "epoch": 2.860082304526749, "eval_xsum-pairs_loss": 0.21704894304275513, "eval_xsum-pairs_runtime": 3.0585, "eval_xsum-pairs_samples_per_second": 41.851, "eval_xsum-pairs_steps_per_second": 0.327, "step": 2780 }, { "epoch": 2.860082304526749, "eval_sciq_pairs_loss": 0.09612545371055603, "eval_sciq_pairs_runtime": 3.6995, "eval_sciq_pairs_samples_per_second": 34.599, "eval_sciq_pairs_steps_per_second": 0.27, "step": 2780 }, { "epoch": 2.860082304526749, "eval_qasc_pairs_loss": 0.13636448979377747, "eval_qasc_pairs_runtime": 0.6599, "eval_qasc_pairs_samples_per_second": 193.956, "eval_qasc_pairs_steps_per_second": 1.515, "step": 2780 }, { "epoch": 2.860082304526749, "eval_openbookqa_pairs_loss": 0.681692361831665, "eval_openbookqa_pairs_runtime": 0.6356, "eval_openbookqa_pairs_samples_per_second": 201.394, "eval_openbookqa_pairs_steps_per_second": 1.573, "step": 2780 }, { "epoch": 2.860082304526749, "eval_msmarco_pairs_loss": 0.8047864437103271, "eval_msmarco_pairs_runtime": 1.5538, "eval_msmarco_pairs_samples_per_second": 82.38, "eval_msmarco_pairs_steps_per_second": 0.644, "step": 2780 }, { "epoch": 2.860082304526749, "eval_nq_pairs_loss": 0.5886882543563843, "eval_nq_pairs_runtime": 2.9353, "eval_nq_pairs_samples_per_second": 43.607, "eval_nq_pairs_steps_per_second": 0.341, "step": 2780 }, { "epoch": 2.860082304526749, "eval_trivia_pairs_loss": 0.7312610149383545, "eval_trivia_pairs_runtime": 3.4925, "eval_trivia_pairs_samples_per_second": 36.65, "eval_trivia_pairs_steps_per_second": 0.286, "step": 2780 }, { "epoch": 2.860082304526749, "eval_gooaq_pairs_loss": 0.3003983497619629, "eval_gooaq_pairs_runtime": 0.9771, "eval_gooaq_pairs_samples_per_second": 130.994, "eval_gooaq_pairs_steps_per_second": 1.023, "step": 2780 }, { "epoch": 2.860082304526749, "eval_paws-pos_loss": 0.021690567955374718, "eval_paws-pos_runtime": 0.7529, "eval_paws-pos_samples_per_second": 170.015, "eval_paws-pos_steps_per_second": 1.328, "step": 2780 }, { "epoch": 2.860082304526749, "eval_global_dataset_loss": 0.43853503465652466, "eval_global_dataset_runtime": 13.5535, "eval_global_dataset_samples_per_second": 30.693, "eval_global_dataset_steps_per_second": 0.295, "step": 2780 }, { "epoch": 2.861111111111111, "grad_norm": 3.4452385902404785, "learning_rate": 1.5254673179537245e-05, "loss": 0.0445, "step": 2781 }, { "epoch": 2.8621399176954734, "grad_norm": 3.121638536453247, "learning_rate": 1.5243848177674643e-05, "loss": 0.0747, "step": 2782 }, { "epoch": 2.863168724279835, "grad_norm": 6.5182414054870605, "learning_rate": 1.5233036572457236e-05, "loss": 0.3198, "step": 2783 }, { "epoch": 2.8641975308641974, "grad_norm": 0.09131138771772385, "learning_rate": 1.5222238381789647e-05, "loss": 0.0007, "step": 2784 }, { "epoch": 2.8652263374485596, "grad_norm": 2.7931461334228516, "learning_rate": 1.5211453623554292e-05, "loss": 0.0375, "step": 2785 }, { "epoch": 2.866255144032922, "grad_norm": 6.989501476287842, "learning_rate": 1.5200682315611346e-05, "loss": 0.216, "step": 2786 }, { "epoch": 2.867283950617284, "grad_norm": 3.951084852218628, "learning_rate": 1.5189924475798695e-05, "loss": 0.1062, "step": 2787 }, { "epoch": 2.8683127572016462, "grad_norm": 0.30321571230888367, "learning_rate": 1.5179180121931943e-05, "loss": 0.0033, "step": 2788 }, { "epoch": 2.8693415637860085, "grad_norm": 3.421691656112671, "learning_rate": 1.5168449271804337e-05, "loss": 0.0516, "step": 2789 }, { "epoch": 2.8703703703703702, "grad_norm": 5.104531288146973, "learning_rate": 1.5157731943186781e-05, "loss": 0.0978, "step": 2790 }, { "epoch": 2.8713991769547325, "grad_norm": 3.4732930660247803, "learning_rate": 1.5147028153827774e-05, "loss": 0.0613, "step": 2791 }, { "epoch": 2.8724279835390947, "grad_norm": 2.909090042114258, "learning_rate": 1.5136337921453395e-05, "loss": 0.0458, "step": 2792 }, { "epoch": 2.873456790123457, "grad_norm": 3.3270890712738037, "learning_rate": 1.5125661263767274e-05, "loss": 0.0473, "step": 2793 }, { "epoch": 2.874485596707819, "grad_norm": 4.291104316711426, "learning_rate": 1.5114998198450561e-05, "loss": 0.1053, "step": 2794 }, { "epoch": 2.875514403292181, "grad_norm": 9.61398983001709, "learning_rate": 1.5104348743161897e-05, "loss": 0.3085, "step": 2795 }, { "epoch": 2.876543209876543, "grad_norm": 1.8854296207427979, "learning_rate": 1.5093712915537383e-05, "loss": 0.0306, "step": 2796 }, { "epoch": 2.8775720164609053, "grad_norm": 5.396960258483887, "learning_rate": 1.5083090733190546e-05, "loss": 0.1675, "step": 2797 }, { "epoch": 2.8786008230452675, "grad_norm": 4.63054895401001, "learning_rate": 1.5072482213712327e-05, "loss": 0.0873, "step": 2798 }, { "epoch": 2.8796296296296298, "grad_norm": 7.028068542480469, "learning_rate": 1.5061887374671033e-05, "loss": 0.1715, "step": 2799 }, { "epoch": 2.8806584362139915, "grad_norm": 3.4828648567199707, "learning_rate": 1.5051306233612318e-05, "loss": 0.0644, "step": 2800 }, { "epoch": 2.8806584362139915, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.707133412361145, "eval_Qnli-dev_cosine_ap": 0.7467230485117031, "eval_Qnli-dev_cosine_f1": 0.6956521739130435, "eval_Qnli-dev_cosine_f1_threshold": 0.6531994938850403, "eval_Qnli-dev_cosine_precision": 0.6075949367088608, "eval_Qnli-dev_cosine_recall": 0.8135593220338984, "eval_Qnli-dev_dot_accuracy": 0.67578125, "eval_Qnli-dev_dot_accuracy_threshold": 312.34259033203125, "eval_Qnli-dev_dot_ap": 0.7030283977256135, "eval_Qnli-dev_dot_f1": 0.6699346405228759, "eval_Qnli-dev_dot_f1_threshold": 243.75765991210938, "eval_Qnli-dev_dot_precision": 0.5452127659574468, "eval_Qnli-dev_dot_recall": 0.8686440677966102, "eval_Qnli-dev_euclidean_accuracy": 0.712890625, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.162054061889648, "eval_Qnli-dev_euclidean_ap": 0.7545333049305465, "eval_Qnli-dev_euclidean_f1": 0.7047619047619047, "eval_Qnli-dev_euclidean_f1_threshold": 16.501787185668945, "eval_Qnli-dev_euclidean_precision": 0.6401384083044983, "eval_Qnli-dev_euclidean_recall": 0.7838983050847458, "eval_Qnli-dev_manhattan_accuracy": 0.712890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 321.15911865234375, "eval_Qnli-dev_manhattan_ap": 0.7588811102917644, "eval_Qnli-dev_manhattan_f1": 0.708955223880597, "eval_Qnli-dev_manhattan_f1_threshold": 352.2951965332031, "eval_Qnli-dev_manhattan_precision": 0.6333333333333333, "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, "eval_Qnli-dev_max_accuracy": 0.712890625, "eval_Qnli-dev_max_accuracy_threshold": 321.15911865234375, "eval_Qnli-dev_max_ap": 0.7588811102917644, "eval_Qnli-dev_max_f1": 0.708955223880597, "eval_Qnli-dev_max_f1_threshold": 352.2951965332031, "eval_Qnli-dev_max_precision": 0.6401384083044983, "eval_Qnli-dev_max_recall": 0.8686440677966102, "eval_allNLI-dev_cosine_accuracy": 0.72265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8479229211807251, "eval_allNLI-dev_cosine_ap": 0.618992574967938, "eval_allNLI-dev_cosine_f1": 0.6252771618625276, "eval_allNLI-dev_cosine_f1_threshold": 0.6774418354034424, "eval_allNLI-dev_cosine_precision": 0.5071942446043165, "eval_allNLI-dev_cosine_recall": 0.815028901734104, "eval_allNLI-dev_dot_accuracy": 0.701171875, "eval_allNLI-dev_dot_accuracy_threshold": 340.0029296875, "eval_allNLI-dev_dot_ap": 0.55901460469066, "eval_allNLI-dev_dot_f1": 0.5914221218961625, "eval_allNLI-dev_dot_f1_threshold": 259.9163818359375, "eval_allNLI-dev_dot_precision": 0.48518518518518516, "eval_allNLI-dev_dot_recall": 0.7572254335260116, "eval_allNLI-dev_euclidean_accuracy": 0.7265625, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.150836944580078, "eval_allNLI-dev_euclidean_ap": 0.6315040846740883, "eval_allNLI-dev_euclidean_f1": 0.644880174291939, "eval_allNLI-dev_euclidean_f1_threshold": 16.05705451965332, "eval_allNLI-dev_euclidean_precision": 0.5174825174825175, "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, "eval_allNLI-dev_manhattan_accuracy": 0.724609375, "eval_allNLI-dev_manhattan_accuracy_threshold": 264.17425537109375, "eval_allNLI-dev_manhattan_ap": 0.6270021252492941, "eval_allNLI-dev_manhattan_f1": 0.6355555555555555, "eval_allNLI-dev_manhattan_f1_threshold": 332.84814453125, "eval_allNLI-dev_manhattan_precision": 0.516245487364621, "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, "eval_allNLI-dev_max_accuracy": 0.7265625, "eval_allNLI-dev_max_accuracy_threshold": 340.0029296875, "eval_allNLI-dev_max_ap": 0.6315040846740883, "eval_allNLI-dev_max_f1": 0.644880174291939, "eval_allNLI-dev_max_f1_threshold": 332.84814453125, "eval_allNLI-dev_max_precision": 0.5174825174825175, "eval_allNLI-dev_max_recall": 0.8554913294797688, "eval_sequential_score": 0.7588811102917644, "eval_sts-test_pearson_cosine": 0.8553072192611906, "eval_sts-test_pearson_dot": 0.8418706878624547, "eval_sts-test_pearson_euclidean": 0.8804664709342795, "eval_sts-test_pearson_manhattan": 0.8775895837827928, "eval_sts-test_pearson_max": 0.8804664709342795, "eval_sts-test_spearman_cosine": 0.8816786116306192, "eval_sts-test_spearman_dot": 0.8357844477745447, "eval_sts-test_spearman_euclidean": 0.877386607653093, "eval_sts-test_spearman_manhattan": 0.8738525110384914, "eval_sts-test_spearman_max": 0.8816786116306192, "eval_vitaminc-pairs_loss": 3.1068437099456787, "eval_vitaminc-pairs_runtime": 3.3079, "eval_vitaminc-pairs_samples_per_second": 38.696, "eval_vitaminc-pairs_steps_per_second": 0.302, "step": 2800 }, { "epoch": 2.8806584362139915, "eval_negation-triplets_loss": 0.9766592383384705, "eval_negation-triplets_runtime": 0.8051, "eval_negation-triplets_samples_per_second": 158.977, "eval_negation-triplets_steps_per_second": 1.242, "step": 2800 }, { "epoch": 2.8806584362139915, "eval_scitail-pairs-pos_loss": 0.13415968418121338, "eval_scitail-pairs-pos_runtime": 1.0252, "eval_scitail-pairs-pos_samples_per_second": 124.85, "eval_scitail-pairs-pos_steps_per_second": 0.975, "step": 2800 }, { "epoch": 2.8806584362139915, "eval_scitail-pairs-qa_loss": 0.000566856237128377, "eval_scitail-pairs-qa_runtime": 0.6795, "eval_scitail-pairs-qa_samples_per_second": 188.371, "eval_scitail-pairs-qa_steps_per_second": 1.472, "step": 2800 }, { "epoch": 2.8806584362139915, "eval_xsum-pairs_loss": 0.23061877489089966, "eval_xsum-pairs_runtime": 3.0771, "eval_xsum-pairs_samples_per_second": 41.597, "eval_xsum-pairs_steps_per_second": 0.325, "step": 2800 }, { "epoch": 2.8806584362139915, "eval_sciq_pairs_loss": 0.09913340210914612, "eval_sciq_pairs_runtime": 3.6873, "eval_sciq_pairs_samples_per_second": 34.714, "eval_sciq_pairs_steps_per_second": 0.271, "step": 2800 }, { "epoch": 2.8806584362139915, "eval_qasc_pairs_loss": 0.14059878885746002, "eval_qasc_pairs_runtime": 0.6647, "eval_qasc_pairs_samples_per_second": 192.555, "eval_qasc_pairs_steps_per_second": 1.504, "step": 2800 }, { "epoch": 2.8806584362139915, "eval_openbookqa_pairs_loss": 0.6660757660865784, "eval_openbookqa_pairs_runtime": 0.631, "eval_openbookqa_pairs_samples_per_second": 202.853, "eval_openbookqa_pairs_steps_per_second": 1.585, "step": 2800 }, { "epoch": 2.8806584362139915, "eval_msmarco_pairs_loss": 0.8374690413475037, "eval_msmarco_pairs_runtime": 1.552, "eval_msmarco_pairs_samples_per_second": 82.472, "eval_msmarco_pairs_steps_per_second": 0.644, "step": 2800 }, { "epoch": 2.8806584362139915, "eval_nq_pairs_loss": 0.5945377945899963, "eval_nq_pairs_runtime": 2.9405, "eval_nq_pairs_samples_per_second": 43.531, "eval_nq_pairs_steps_per_second": 0.34, "step": 2800 }, { "epoch": 2.8806584362139915, "eval_trivia_pairs_loss": 0.7081261277198792, "eval_trivia_pairs_runtime": 3.4663, "eval_trivia_pairs_samples_per_second": 36.927, "eval_trivia_pairs_steps_per_second": 0.288, "step": 2800 }, { "epoch": 2.8806584362139915, "eval_gooaq_pairs_loss": 0.3039236068725586, "eval_gooaq_pairs_runtime": 0.9635, "eval_gooaq_pairs_samples_per_second": 132.853, "eval_gooaq_pairs_steps_per_second": 1.038, "step": 2800 }, { "epoch": 2.8806584362139915, "eval_paws-pos_loss": 0.021994709968566895, "eval_paws-pos_runtime": 0.7199, "eval_paws-pos_samples_per_second": 177.814, "eval_paws-pos_steps_per_second": 1.389, "step": 2800 }, { "epoch": 2.8806584362139915, "eval_global_dataset_loss": 0.4557924270629883, "eval_global_dataset_runtime": 13.4332, "eval_global_dataset_samples_per_second": 30.968, "eval_global_dataset_steps_per_second": 0.298, "step": 2800 }, { "epoch": 2.8816872427983538, "grad_norm": 2.778085947036743, "learning_rate": 1.5040738808059146e-05, "loss": 0.0882, "step": 2801 }, { "epoch": 2.882716049382716, "grad_norm": 7.227503776550293, "learning_rate": 1.5030185115511775e-05, "loss": 0.241, "step": 2802 }, { "epoch": 2.883744855967078, "grad_norm": 7.289071083068848, "learning_rate": 1.5019645173447718e-05, "loss": 0.2773, "step": 2803 }, { "epoch": 2.8847736625514404, "grad_norm": 6.984453201293945, "learning_rate": 1.5009118999321717e-05, "loss": 0.2181, "step": 2804 }, { "epoch": 2.8858024691358026, "grad_norm": 9.313874244689941, "learning_rate": 1.4998606610565706e-05, "loss": 0.4059, "step": 2805 }, { "epoch": 2.886831275720165, "grad_norm": 2.0022683143615723, "learning_rate": 1.4988108024588799e-05, "loss": 0.0205, "step": 2806 }, { "epoch": 2.8878600823045266, "grad_norm": 4.580897808074951, "learning_rate": 1.4977623258777249e-05, "loss": 0.1, "step": 2807 }, { "epoch": 2.888888888888889, "grad_norm": 9.304757118225098, "learning_rate": 1.496715233049442e-05, "loss": 0.3347, "step": 2808 }, { "epoch": 2.889917695473251, "grad_norm": 6.6457929611206055, "learning_rate": 1.4956695257080762e-05, "loss": 0.1546, "step": 2809 }, { "epoch": 2.8909465020576133, "grad_norm": 0.0, "learning_rate": 1.4946252055853781e-05, "loss": 0.0, "step": 2810 }, { "epoch": 2.8919753086419755, "grad_norm": 4.20051908493042, "learning_rate": 1.493582274410801e-05, "loss": 0.0928, "step": 2811 }, { "epoch": 2.8930041152263373, "grad_norm": 0.0, "learning_rate": 1.4925407339114982e-05, "loss": 0.0, "step": 2812 }, { "epoch": 2.8940329218106995, "grad_norm": 1.402176856994629, "learning_rate": 1.4915005858123191e-05, "loss": 0.0152, "step": 2813 }, { "epoch": 2.8950617283950617, "grad_norm": 4.665887355804443, "learning_rate": 1.4904618318358079e-05, "loss": 0.1831, "step": 2814 }, { "epoch": 2.896090534979424, "grad_norm": 3.1025822162628174, "learning_rate": 1.4894244737022005e-05, "loss": 0.0478, "step": 2815 }, { "epoch": 2.897119341563786, "grad_norm": 3.428171157836914, "learning_rate": 1.4883885131294202e-05, "loss": 0.074, "step": 2816 }, { "epoch": 2.898148148148148, "grad_norm": 4.187236785888672, "learning_rate": 1.4873539518330766e-05, "loss": 0.0548, "step": 2817 }, { "epoch": 2.89917695473251, "grad_norm": 9.12755012512207, "learning_rate": 1.486320791526461e-05, "loss": 0.3734, "step": 2818 }, { "epoch": 2.9002057613168724, "grad_norm": 0.04635777324438095, "learning_rate": 1.4852890339205466e-05, "loss": 0.0004, "step": 2819 }, { "epoch": 2.9012345679012346, "grad_norm": 0.42167264223098755, "learning_rate": 1.4842586807239813e-05, "loss": 0.016, "step": 2820 }, { "epoch": 2.9012345679012346, "eval_Qnli-dev_cosine_accuracy": 0.708984375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7221112251281738, "eval_Qnli-dev_cosine_ap": 0.7438538096882514, "eval_Qnli-dev_cosine_f1": 0.6994328922495274, "eval_Qnli-dev_cosine_f1_threshold": 0.6667539477348328, "eval_Qnli-dev_cosine_precision": 0.6313993174061433, "eval_Qnli-dev_cosine_recall": 0.7838983050847458, "eval_Qnli-dev_dot_accuracy": 0.66796875, "eval_Qnli-dev_dot_accuracy_threshold": 317.252197265625, "eval_Qnli-dev_dot_ap": 0.6984118059482834, "eval_Qnli-dev_dot_f1": 0.6689075630252102, "eval_Qnli-dev_dot_f1_threshold": 254.1169891357422, "eval_Qnli-dev_dot_precision": 0.5543175487465181, "eval_Qnli-dev_dot_recall": 0.8432203389830508, "eval_Qnli-dev_euclidean_accuracy": 0.716796875, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.042196273803711, "eval_Qnli-dev_euclidean_ap": 0.7531958823206504, "eval_Qnli-dev_euclidean_f1": 0.6969696969696969, "eval_Qnli-dev_euclidean_f1_threshold": 16.798561096191406, "eval_Qnli-dev_euclidean_precision": 0.6301369863013698, "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, "eval_Qnli-dev_manhattan_accuracy": 0.712890625, "eval_Qnli-dev_manhattan_accuracy_threshold": 327.0810241699219, "eval_Qnli-dev_manhattan_ap": 0.7570772554186656, "eval_Qnli-dev_manhattan_f1": 0.6981818181818182, "eval_Qnli-dev_manhattan_f1_threshold": 361.4710998535156, "eval_Qnli-dev_manhattan_precision": 0.6114649681528662, "eval_Qnli-dev_manhattan_recall": 0.8135593220338984, "eval_Qnli-dev_max_accuracy": 0.716796875, "eval_Qnli-dev_max_accuracy_threshold": 327.0810241699219, "eval_Qnli-dev_max_ap": 0.7570772554186656, "eval_Qnli-dev_max_f1": 0.6994328922495274, "eval_Qnli-dev_max_f1_threshold": 361.4710998535156, "eval_Qnli-dev_max_precision": 0.6313993174061433, "eval_Qnli-dev_max_recall": 0.8432203389830508, "eval_allNLI-dev_cosine_accuracy": 0.72265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8403334617614746, "eval_allNLI-dev_cosine_ap": 0.6185711074915496, "eval_allNLI-dev_cosine_f1": 0.6187363834422658, "eval_allNLI-dev_cosine_f1_threshold": 0.6757407784461975, "eval_allNLI-dev_cosine_precision": 0.4965034965034965, "eval_allNLI-dev_cosine_recall": 0.8208092485549133, "eval_allNLI-dev_dot_accuracy": 0.693359375, "eval_allNLI-dev_dot_accuracy_threshold": 355.16400146484375, "eval_allNLI-dev_dot_ap": 0.5598003212827662, "eval_allNLI-dev_dot_f1": 0.594810379241517, "eval_allNLI-dev_dot_f1_threshold": 244.2494354248047, "eval_allNLI-dev_dot_precision": 0.45426829268292684, "eval_allNLI-dev_dot_recall": 0.861271676300578, "eval_allNLI-dev_euclidean_accuracy": 0.73046875, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.610702514648438, "eval_allNLI-dev_euclidean_ap": 0.6301645562616878, "eval_allNLI-dev_euclidean_f1": 0.6345733041575492, "eval_allNLI-dev_euclidean_f1_threshold": 15.977076530456543, "eval_allNLI-dev_euclidean_precision": 0.5105633802816901, "eval_allNLI-dev_euclidean_recall": 0.838150289017341, "eval_allNLI-dev_manhattan_accuracy": 0.728515625, "eval_allNLI-dev_manhattan_accuracy_threshold": 272.03045654296875, "eval_allNLI-dev_manhattan_ap": 0.6257716209322925, "eval_allNLI-dev_manhattan_f1": 0.6222222222222222, "eval_allNLI-dev_manhattan_f1_threshold": 332.40606689453125, "eval_allNLI-dev_manhattan_precision": 0.5054151624548736, "eval_allNLI-dev_manhattan_recall": 0.8092485549132948, "eval_allNLI-dev_max_accuracy": 0.73046875, "eval_allNLI-dev_max_accuracy_threshold": 355.16400146484375, "eval_allNLI-dev_max_ap": 0.6301645562616878, "eval_allNLI-dev_max_f1": 0.6345733041575492, "eval_allNLI-dev_max_f1_threshold": 332.40606689453125, "eval_allNLI-dev_max_precision": 0.5105633802816901, "eval_allNLI-dev_max_recall": 0.861271676300578, "eval_sequential_score": 0.7570772554186656, "eval_sts-test_pearson_cosine": 0.8521806988836129, "eval_sts-test_pearson_dot": 0.8374705808072017, "eval_sts-test_pearson_euclidean": 0.8774845652428795, "eval_sts-test_pearson_manhattan": 0.874412569499323, "eval_sts-test_pearson_max": 0.8774845652428795, "eval_sts-test_spearman_cosine": 0.8789184433305205, "eval_sts-test_spearman_dot": 0.8302908724286594, "eval_sts-test_spearman_euclidean": 0.874971382075799, "eval_sts-test_spearman_manhattan": 0.8708639323002508, "eval_sts-test_spearman_max": 0.8789184433305205, "eval_vitaminc-pairs_loss": 3.1181490421295166, "eval_vitaminc-pairs_runtime": 3.2291, "eval_vitaminc-pairs_samples_per_second": 39.639, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 2820 }, { "epoch": 2.9012345679012346, "eval_negation-triplets_loss": 0.9533400535583496, "eval_negation-triplets_runtime": 0.7865, "eval_negation-triplets_samples_per_second": 162.746, "eval_negation-triplets_steps_per_second": 1.271, "step": 2820 }, { "epoch": 2.9012345679012346, "eval_scitail-pairs-pos_loss": 0.15093332529067993, "eval_scitail-pairs-pos_runtime": 0.9744, "eval_scitail-pairs-pos_samples_per_second": 131.359, "eval_scitail-pairs-pos_steps_per_second": 1.026, "step": 2820 }, { "epoch": 2.9012345679012346, "eval_scitail-pairs-qa_loss": 0.000580314954277128, "eval_scitail-pairs-qa_runtime": 0.6321, "eval_scitail-pairs-qa_samples_per_second": 202.5, "eval_scitail-pairs-qa_steps_per_second": 1.582, "step": 2820 }, { "epoch": 2.9012345679012346, "eval_xsum-pairs_loss": 0.2606850266456604, "eval_xsum-pairs_runtime": 3.0289, "eval_xsum-pairs_samples_per_second": 42.26, "eval_xsum-pairs_steps_per_second": 0.33, "step": 2820 }, { "epoch": 2.9012345679012346, "eval_sciq_pairs_loss": 0.09818581491708755, "eval_sciq_pairs_runtime": 3.5566, "eval_sciq_pairs_samples_per_second": 35.989, "eval_sciq_pairs_steps_per_second": 0.281, "step": 2820 }, { "epoch": 2.9012345679012346, "eval_qasc_pairs_loss": 0.14023363590240479, "eval_qasc_pairs_runtime": 0.6316, "eval_qasc_pairs_samples_per_second": 202.644, "eval_qasc_pairs_steps_per_second": 1.583, "step": 2820 }, { "epoch": 2.9012345679012346, "eval_openbookqa_pairs_loss": 0.6993199586868286, "eval_openbookqa_pairs_runtime": 0.6092, "eval_openbookqa_pairs_samples_per_second": 210.099, "eval_openbookqa_pairs_steps_per_second": 1.641, "step": 2820 }, { "epoch": 2.9012345679012346, "eval_msmarco_pairs_loss": 0.8902102112770081, "eval_msmarco_pairs_runtime": 1.5354, "eval_msmarco_pairs_samples_per_second": 83.365, "eval_msmarco_pairs_steps_per_second": 0.651, "step": 2820 }, { "epoch": 2.9012345679012346, "eval_nq_pairs_loss": 0.6062837839126587, "eval_nq_pairs_runtime": 2.9159, "eval_nq_pairs_samples_per_second": 43.897, "eval_nq_pairs_steps_per_second": 0.343, "step": 2820 }, { "epoch": 2.9012345679012346, "eval_trivia_pairs_loss": 0.7439590692520142, "eval_trivia_pairs_runtime": 3.4504, "eval_trivia_pairs_samples_per_second": 37.097, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2820 }, { "epoch": 2.9012345679012346, "eval_gooaq_pairs_loss": 0.3295494616031647, "eval_gooaq_pairs_runtime": 0.9605, "eval_gooaq_pairs_samples_per_second": 133.27, "eval_gooaq_pairs_steps_per_second": 1.041, "step": 2820 }, { "epoch": 2.9012345679012346, "eval_paws-pos_loss": 0.021959390491247177, "eval_paws-pos_runtime": 0.7161, "eval_paws-pos_samples_per_second": 178.748, "eval_paws-pos_steps_per_second": 1.396, "step": 2820 }, { "epoch": 2.9012345679012346, "eval_global_dataset_loss": 0.4646758437156677, "eval_global_dataset_runtime": 13.4683, "eval_global_dataset_samples_per_second": 30.887, "eval_global_dataset_steps_per_second": 0.297, "step": 2820 }, { "epoch": 2.902263374485597, "grad_norm": 8.362189292907715, "learning_rate": 1.4832297336430889e-05, "loss": 0.2273, "step": 2821 }, { "epoch": 2.903292181069959, "grad_norm": 0.0, "learning_rate": 1.4822021943818636e-05, "loss": 0.0, "step": 2822 }, { "epoch": 2.9043209876543212, "grad_norm": 4.826416492462158, "learning_rate": 1.4811760646419692e-05, "loss": 0.0861, "step": 2823 }, { "epoch": 2.905349794238683, "grad_norm": 5.3283185958862305, "learning_rate": 1.480151346122734e-05, "loss": 0.0833, "step": 2824 }, { "epoch": 2.906378600823045, "grad_norm": 3.127690553665161, "learning_rate": 1.4791280405211503e-05, "loss": 0.1294, "step": 2825 }, { "epoch": 2.9074074074074074, "grad_norm": 3.8759260177612305, "learning_rate": 1.4781061495318694e-05, "loss": 0.0651, "step": 2826 }, { "epoch": 2.9084362139917697, "grad_norm": 4.43839168548584, "learning_rate": 1.4770856748472011e-05, "loss": 0.0856, "step": 2827 }, { "epoch": 2.909465020576132, "grad_norm": 0.28235915303230286, "learning_rate": 1.4760666181571092e-05, "loss": 0.0038, "step": 2828 }, { "epoch": 2.9104938271604937, "grad_norm": 5.451260566711426, "learning_rate": 1.475048981149209e-05, "loss": 0.2477, "step": 2829 }, { "epoch": 2.911522633744856, "grad_norm": 2.619663953781128, "learning_rate": 1.4740327655087657e-05, "loss": 0.0415, "step": 2830 }, { "epoch": 2.912551440329218, "grad_norm": 0.2536655068397522, "learning_rate": 1.4730179729186889e-05, "loss": 0.0029, "step": 2831 }, { "epoch": 2.9135802469135803, "grad_norm": 1.8212436437606812, "learning_rate": 1.4720046050595333e-05, "loss": 0.0169, "step": 2832 }, { "epoch": 2.9146090534979425, "grad_norm": 6.6340837478637695, "learning_rate": 1.4709926636094934e-05, "loss": 0.2376, "step": 2833 }, { "epoch": 2.9156378600823043, "grad_norm": 7.484673976898193, "learning_rate": 1.4699821502444012e-05, "loss": 0.2344, "step": 2834 }, { "epoch": 2.9166666666666665, "grad_norm": 4.752827167510986, "learning_rate": 1.4689730666377235e-05, "loss": 0.2089, "step": 2835 }, { "epoch": 2.9176954732510287, "grad_norm": 6.245153903961182, "learning_rate": 1.467965414460561e-05, "loss": 0.1756, "step": 2836 }, { "epoch": 2.918724279835391, "grad_norm": 6.7615065574646, "learning_rate": 1.4669591953816422e-05, "loss": 0.4231, "step": 2837 }, { "epoch": 2.919753086419753, "grad_norm": 3.019848346710205, "learning_rate": 1.4659544110673226e-05, "loss": 0.0709, "step": 2838 }, { "epoch": 2.9207818930041154, "grad_norm": 0.6952615976333618, "learning_rate": 1.4649510631815817e-05, "loss": 0.0042, "step": 2839 }, { "epoch": 2.9218106995884776, "grad_norm": 2.6654038429260254, "learning_rate": 1.4639491533860213e-05, "loss": 0.0598, "step": 2840 }, { "epoch": 2.9218106995884776, "eval_Qnli-dev_cosine_accuracy": 0.7265625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.72481769323349, "eval_Qnli-dev_cosine_ap": 0.7573652742069512, "eval_Qnli-dev_cosine_f1": 0.7080979284369116, "eval_Qnli-dev_cosine_f1_threshold": 0.6733008623123169, "eval_Qnli-dev_cosine_precision": 0.6372881355932203, "eval_Qnli-dev_cosine_recall": 0.7966101694915254, "eval_Qnli-dev_dot_accuracy": 0.6796875, "eval_Qnli-dev_dot_accuracy_threshold": 307.27618408203125, "eval_Qnli-dev_dot_ap": 0.7037895850652937, "eval_Qnli-dev_dot_f1": 0.6731707317073171, "eval_Qnli-dev_dot_f1_threshold": 253.0263671875, "eval_Qnli-dev_dot_precision": 0.5461741424802111, "eval_Qnli-dev_dot_recall": 0.8771186440677966, "eval_Qnli-dev_euclidean_accuracy": 0.7265625, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.4810209274292, "eval_Qnli-dev_euclidean_ap": 0.7653873312993269, "eval_Qnli-dev_euclidean_f1": 0.7131782945736433, "eval_Qnli-dev_euclidean_f1_threshold": 16.469762802124023, "eval_Qnli-dev_euclidean_precision": 0.6571428571428571, "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, "eval_Qnli-dev_manhattan_accuracy": 0.720703125, "eval_Qnli-dev_manhattan_accuracy_threshold": 318.63385009765625, "eval_Qnli-dev_manhattan_ap": 0.7697271377002631, "eval_Qnli-dev_manhattan_f1": 0.7172675521821632, "eval_Qnli-dev_manhattan_f1_threshold": 352.7674255371094, "eval_Qnli-dev_manhattan_precision": 0.6494845360824743, "eval_Qnli-dev_manhattan_recall": 0.8008474576271186, "eval_Qnli-dev_max_accuracy": 0.7265625, "eval_Qnli-dev_max_accuracy_threshold": 318.63385009765625, "eval_Qnli-dev_max_ap": 0.7697271377002631, "eval_Qnli-dev_max_f1": 0.7172675521821632, "eval_Qnli-dev_max_f1_threshold": 352.7674255371094, "eval_Qnli-dev_max_precision": 0.6571428571428571, "eval_Qnli-dev_max_recall": 0.8771186440677966, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.7948463559150696, "eval_allNLI-dev_cosine_ap": 0.6269859618531588, "eval_allNLI-dev_cosine_f1": 0.6294642857142856, "eval_allNLI-dev_cosine_f1_threshold": 0.6982643604278564, "eval_allNLI-dev_cosine_precision": 0.5127272727272727, "eval_allNLI-dev_cosine_recall": 0.815028901734104, "eval_allNLI-dev_dot_accuracy": 0.69921875, "eval_allNLI-dev_dot_accuracy_threshold": 346.297607421875, "eval_allNLI-dev_dot_ap": 0.5666067657475051, "eval_allNLI-dev_dot_f1": 0.5930232558139534, "eval_allNLI-dev_dot_f1_threshold": 247.86825561523438, "eval_allNLI-dev_dot_precision": 0.446064139941691, "eval_allNLI-dev_dot_recall": 0.884393063583815, "eval_allNLI-dev_euclidean_accuracy": 0.73828125, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.141094207763672, "eval_allNLI-dev_euclidean_ap": 0.6372672441681139, "eval_allNLI-dev_euclidean_f1": 0.6479481641468683, "eval_allNLI-dev_euclidean_f1_threshold": 15.871328353881836, "eval_allNLI-dev_euclidean_precision": 0.5172413793103449, "eval_allNLI-dev_euclidean_recall": 0.8670520231213873, "eval_allNLI-dev_manhattan_accuracy": 0.732421875, "eval_allNLI-dev_manhattan_accuracy_threshold": 292.9789733886719, "eval_allNLI-dev_manhattan_ap": 0.6337184836807339, "eval_allNLI-dev_manhattan_f1": 0.6373626373626373, "eval_allNLI-dev_manhattan_f1_threshold": 329.0118408203125, "eval_allNLI-dev_manhattan_precision": 0.5141843971631206, "eval_allNLI-dev_manhattan_recall": 0.838150289017341, "eval_allNLI-dev_max_accuracy": 0.73828125, "eval_allNLI-dev_max_accuracy_threshold": 346.297607421875, "eval_allNLI-dev_max_ap": 0.6372672441681139, "eval_allNLI-dev_max_f1": 0.6479481641468683, "eval_allNLI-dev_max_f1_threshold": 329.0118408203125, "eval_allNLI-dev_max_precision": 0.5172413793103449, "eval_allNLI-dev_max_recall": 0.884393063583815, "eval_sequential_score": 0.7697271377002631, "eval_sts-test_pearson_cosine": 0.8519785783992986, "eval_sts-test_pearson_dot": 0.8386586693162577, "eval_sts-test_pearson_euclidean": 0.8757232830713848, "eval_sts-test_pearson_manhattan": 0.8728913735173637, "eval_sts-test_pearson_max": 0.8757232830713848, "eval_sts-test_spearman_cosine": 0.879991310071118, "eval_sts-test_spearman_dot": 0.8338387971830308, "eval_sts-test_spearman_euclidean": 0.8739731604392024, "eval_sts-test_spearman_manhattan": 0.8705086967873259, "eval_sts-test_spearman_max": 0.879991310071118, "eval_vitaminc-pairs_loss": 2.9960062503814697, "eval_vitaminc-pairs_runtime": 3.2307, "eval_vitaminc-pairs_samples_per_second": 39.62, "eval_vitaminc-pairs_steps_per_second": 0.31, "step": 2840 }, { "epoch": 2.9218106995884776, "eval_negation-triplets_loss": 0.9421368837356567, "eval_negation-triplets_runtime": 0.8178, "eval_negation-triplets_samples_per_second": 156.51, "eval_negation-triplets_steps_per_second": 1.223, "step": 2840 }, { "epoch": 2.9218106995884776, "eval_scitail-pairs-pos_loss": 0.16442981362342834, "eval_scitail-pairs-pos_runtime": 0.9613, "eval_scitail-pairs-pos_samples_per_second": 133.157, "eval_scitail-pairs-pos_steps_per_second": 1.04, "step": 2840 }, { "epoch": 2.9218106995884776, "eval_scitail-pairs-qa_loss": 0.00035181219573132694, "eval_scitail-pairs-qa_runtime": 0.629, "eval_scitail-pairs-qa_samples_per_second": 203.506, "eval_scitail-pairs-qa_steps_per_second": 1.59, "step": 2840 }, { "epoch": 2.9218106995884776, "eval_xsum-pairs_loss": 0.2470112442970276, "eval_xsum-pairs_runtime": 3.0401, "eval_xsum-pairs_samples_per_second": 42.105, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2840 }, { "epoch": 2.9218106995884776, "eval_sciq_pairs_loss": 0.09907594323158264, "eval_sciq_pairs_runtime": 3.5954, "eval_sciq_pairs_samples_per_second": 35.601, "eval_sciq_pairs_steps_per_second": 0.278, "step": 2840 }, { "epoch": 2.9218106995884776, "eval_qasc_pairs_loss": 0.14264091849327087, "eval_qasc_pairs_runtime": 0.6362, "eval_qasc_pairs_samples_per_second": 201.182, "eval_qasc_pairs_steps_per_second": 1.572, "step": 2840 }, { "epoch": 2.9218106995884776, "eval_openbookqa_pairs_loss": 0.7669063806533813, "eval_openbookqa_pairs_runtime": 0.6128, "eval_openbookqa_pairs_samples_per_second": 208.871, "eval_openbookqa_pairs_steps_per_second": 1.632, "step": 2840 }, { "epoch": 2.9218106995884776, "eval_msmarco_pairs_loss": 0.923215925693512, "eval_msmarco_pairs_runtime": 1.5316, "eval_msmarco_pairs_samples_per_second": 83.572, "eval_msmarco_pairs_steps_per_second": 0.653, "step": 2840 }, { "epoch": 2.9218106995884776, "eval_nq_pairs_loss": 0.6398614645004272, "eval_nq_pairs_runtime": 2.9016, "eval_nq_pairs_samples_per_second": 44.114, "eval_nq_pairs_steps_per_second": 0.345, "step": 2840 }, { "epoch": 2.9218106995884776, "eval_trivia_pairs_loss": 0.7547330856323242, "eval_trivia_pairs_runtime": 3.4486, "eval_trivia_pairs_samples_per_second": 37.117, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2840 }, { "epoch": 2.9218106995884776, "eval_gooaq_pairs_loss": 0.3374624252319336, "eval_gooaq_pairs_runtime": 0.9753, "eval_gooaq_pairs_samples_per_second": 131.244, "eval_gooaq_pairs_steps_per_second": 1.025, "step": 2840 }, { "epoch": 2.9218106995884776, "eval_paws-pos_loss": 0.02189790830016136, "eval_paws-pos_runtime": 0.7266, "eval_paws-pos_samples_per_second": 176.155, "eval_paws-pos_steps_per_second": 1.376, "step": 2840 }, { "epoch": 2.9218106995884776, "eval_global_dataset_loss": 0.43860962986946106, "eval_global_dataset_runtime": 13.4508, "eval_global_dataset_samples_per_second": 30.928, "eval_global_dataset_steps_per_second": 0.297, "step": 2840 }, { "epoch": 2.9228395061728394, "grad_norm": 0.0740504264831543, "learning_rate": 1.4629486833398598e-05, "loss": 0.0011, "step": 2841 }, { "epoch": 2.9238683127572016, "grad_norm": 9.339200973510742, "learning_rate": 1.4619496546999329e-05, "loss": 0.3033, "step": 2842 }, { "epoch": 2.924897119341564, "grad_norm": 3.228916645050049, "learning_rate": 1.4609520691206879e-05, "loss": 0.0467, "step": 2843 }, { "epoch": 2.925925925925926, "grad_norm": 0.0, "learning_rate": 1.4599559282541839e-05, "loss": 0.0, "step": 2844 }, { "epoch": 2.9269547325102883, "grad_norm": 6.2286505699157715, "learning_rate": 1.4589612337500855e-05, "loss": 0.1768, "step": 2845 }, { "epoch": 2.92798353909465, "grad_norm": 13.214132308959961, "learning_rate": 1.457967987255664e-05, "loss": 1.2735, "step": 2846 }, { "epoch": 2.9290123456790123, "grad_norm": 3.428947925567627, "learning_rate": 1.4569761904157909e-05, "loss": 0.0482, "step": 2847 }, { "epoch": 2.9300411522633745, "grad_norm": 2.467708110809326, "learning_rate": 1.4559858448729386e-05, "loss": 0.0529, "step": 2848 }, { "epoch": 2.9310699588477367, "grad_norm": 11.081552505493164, "learning_rate": 1.4549969522671751e-05, "loss": 0.5014, "step": 2849 }, { "epoch": 2.932098765432099, "grad_norm": 9.443807601928711, "learning_rate": 1.4540095142361623e-05, "loss": 0.3181, "step": 2850 }, { "epoch": 2.9331275720164607, "grad_norm": 2.813779592514038, "learning_rate": 1.4530235324151538e-05, "loss": 0.0308, "step": 2851 }, { "epoch": 2.934156378600823, "grad_norm": 0.12526783347129822, "learning_rate": 1.452039008436991e-05, "loss": 0.0012, "step": 2852 }, { "epoch": 2.935185185185185, "grad_norm": 9.631052017211914, "learning_rate": 1.4510559439321018e-05, "loss": 0.4408, "step": 2853 }, { "epoch": 2.9362139917695473, "grad_norm": 6.089962959289551, "learning_rate": 1.4500743405284963e-05, "loss": 0.227, "step": 2854 }, { "epoch": 2.9372427983539096, "grad_norm": 2.5051560401916504, "learning_rate": 1.4490941998517654e-05, "loss": 0.035, "step": 2855 }, { "epoch": 2.9382716049382713, "grad_norm": 9.44104290008545, "learning_rate": 1.4481155235250772e-05, "loss": 0.3961, "step": 2856 }, { "epoch": 2.939300411522634, "grad_norm": 5.807121753692627, "learning_rate": 1.4471383131691757e-05, "loss": 0.1849, "step": 2857 }, { "epoch": 2.9403292181069958, "grad_norm": 0.17494520545005798, "learning_rate": 1.4461625704023759e-05, "loss": 0.0022, "step": 2858 }, { "epoch": 2.941358024691358, "grad_norm": 2.6973705291748047, "learning_rate": 1.4451882968405635e-05, "loss": 0.0302, "step": 2859 }, { "epoch": 2.94238683127572, "grad_norm": 1.9522150754928589, "learning_rate": 1.4442154940971901e-05, "loss": 0.0296, "step": 2860 }, { "epoch": 2.94238683127572, "eval_Qnli-dev_cosine_accuracy": 0.712890625, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7184366583824158, "eval_Qnli-dev_cosine_ap": 0.7429929526999689, "eval_Qnli-dev_cosine_f1": 0.6967509025270758, "eval_Qnli-dev_cosine_f1_threshold": 0.65997713804245, "eval_Qnli-dev_cosine_precision": 0.6069182389937107, "eval_Qnli-dev_cosine_recall": 0.8177966101694916, "eval_Qnli-dev_dot_accuracy": 0.671875, "eval_Qnli-dev_dot_accuracy_threshold": 328.22698974609375, "eval_Qnli-dev_dot_ap": 0.6907976265087441, "eval_Qnli-dev_dot_f1": 0.6697965571205008, "eval_Qnli-dev_dot_f1_threshold": 245.0462646484375, "eval_Qnli-dev_dot_precision": 0.5310173697270472, "eval_Qnli-dev_dot_recall": 0.9067796610169492, "eval_Qnli-dev_euclidean_accuracy": 0.7109375, "eval_Qnli-dev_euclidean_accuracy_threshold": 14.632600784301758, "eval_Qnli-dev_euclidean_ap": 0.7534677454401899, "eval_Qnli-dev_euclidean_f1": 0.7040816326530612, "eval_Qnli-dev_euclidean_f1_threshold": 17.613479614257812, "eval_Qnli-dev_euclidean_precision": 0.5880681818181818, "eval_Qnli-dev_euclidean_recall": 0.8771186440677966, "eval_Qnli-dev_manhattan_accuracy": 0.716796875, "eval_Qnli-dev_manhattan_accuracy_threshold": 320.0233154296875, "eval_Qnli-dev_manhattan_ap": 0.7577669707012404, "eval_Qnli-dev_manhattan_f1": 0.7090909090909091, "eval_Qnli-dev_manhattan_f1_threshold": 356.7293701171875, "eval_Qnli-dev_manhattan_precision": 0.6210191082802548, "eval_Qnli-dev_manhattan_recall": 0.826271186440678, "eval_Qnli-dev_max_accuracy": 0.716796875, "eval_Qnli-dev_max_accuracy_threshold": 328.22698974609375, "eval_Qnli-dev_max_ap": 0.7577669707012404, "eval_Qnli-dev_max_f1": 0.7090909090909091, "eval_Qnli-dev_max_f1_threshold": 356.7293701171875, "eval_Qnli-dev_max_precision": 0.6210191082802548, "eval_Qnli-dev_max_recall": 0.9067796610169492, "eval_allNLI-dev_cosine_accuracy": 0.72265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.7885868549346924, "eval_allNLI-dev_cosine_ap": 0.6237921800998874, "eval_allNLI-dev_cosine_f1": 0.6282973621103117, "eval_allNLI-dev_cosine_f1_threshold": 0.7194356918334961, "eval_allNLI-dev_cosine_precision": 0.5368852459016393, "eval_allNLI-dev_cosine_recall": 0.7572254335260116, "eval_allNLI-dev_dot_accuracy": 0.701171875, "eval_allNLI-dev_dot_accuracy_threshold": 341.58795166015625, "eval_allNLI-dev_dot_ap": 0.5609537284925898, "eval_allNLI-dev_dot_f1": 0.592901878914405, "eval_allNLI-dev_dot_f1_threshold": 260.64361572265625, "eval_allNLI-dev_dot_precision": 0.46405228758169936, "eval_allNLI-dev_dot_recall": 0.8208092485549133, "eval_allNLI-dev_euclidean_accuracy": 0.734375, "eval_allNLI-dev_euclidean_accuracy_threshold": 13.216349601745605, "eval_allNLI-dev_euclidean_ap": 0.6344705686594263, "eval_allNLI-dev_euclidean_f1": 0.642369020501139, "eval_allNLI-dev_euclidean_f1_threshold": 15.330223083496094, "eval_allNLI-dev_euclidean_precision": 0.5300751879699248, "eval_allNLI-dev_euclidean_recall": 0.815028901734104, "eval_allNLI-dev_manhattan_accuracy": 0.728515625, "eval_allNLI-dev_manhattan_accuracy_threshold": 284.2862243652344, "eval_allNLI-dev_manhattan_ap": 0.6308030475661037, "eval_allNLI-dev_manhattan_f1": 0.6379310344827586, "eval_allNLI-dev_manhattan_f1_threshold": 331.8832702636719, "eval_allNLI-dev_manhattan_precision": 0.5085910652920962, "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, "eval_allNLI-dev_max_accuracy": 0.734375, "eval_allNLI-dev_max_accuracy_threshold": 341.58795166015625, "eval_allNLI-dev_max_ap": 0.6344705686594263, "eval_allNLI-dev_max_f1": 0.642369020501139, "eval_allNLI-dev_max_f1_threshold": 331.8832702636719, "eval_allNLI-dev_max_precision": 0.5368852459016393, "eval_allNLI-dev_max_recall": 0.8554913294797688, "eval_sequential_score": 0.7577669707012404, "eval_sts-test_pearson_cosine": 0.8499488143164071, "eval_sts-test_pearson_dot": 0.8380176864651012, "eval_sts-test_pearson_euclidean": 0.8756363210475218, "eval_sts-test_pearson_manhattan": 0.873406967543155, "eval_sts-test_pearson_max": 0.8756363210475218, "eval_sts-test_spearman_cosine": 0.8806137106145495, "eval_sts-test_spearman_dot": 0.8368226770388252, "eval_sts-test_spearman_euclidean": 0.8744245216615962, "eval_sts-test_spearman_manhattan": 0.8718300225311684, "eval_sts-test_spearman_max": 0.8806137106145495, "eval_vitaminc-pairs_loss": 3.0377848148345947, "eval_vitaminc-pairs_runtime": 3.2422, "eval_vitaminc-pairs_samples_per_second": 39.48, "eval_vitaminc-pairs_steps_per_second": 0.308, "step": 2860 }, { "epoch": 2.94238683127572, "eval_negation-triplets_loss": 0.9155681729316711, "eval_negation-triplets_runtime": 0.7873, "eval_negation-triplets_samples_per_second": 162.587, "eval_negation-triplets_steps_per_second": 1.27, "step": 2860 }, { "epoch": 2.94238683127572, "eval_scitail-pairs-pos_loss": 0.16575367748737335, "eval_scitail-pairs-pos_runtime": 0.9702, "eval_scitail-pairs-pos_samples_per_second": 131.928, "eval_scitail-pairs-pos_steps_per_second": 1.031, "step": 2860 }, { "epoch": 2.94238683127572, "eval_scitail-pairs-qa_loss": 0.0004558742220979184, "eval_scitail-pairs-qa_runtime": 0.6252, "eval_scitail-pairs-qa_samples_per_second": 204.744, "eval_scitail-pairs-qa_steps_per_second": 1.6, "step": 2860 }, { "epoch": 2.94238683127572, "eval_xsum-pairs_loss": 0.24804741144180298, "eval_xsum-pairs_runtime": 3.0514, "eval_xsum-pairs_samples_per_second": 41.948, "eval_xsum-pairs_steps_per_second": 0.328, "step": 2860 }, { "epoch": 2.94238683127572, "eval_sciq_pairs_loss": 0.09952737390995026, "eval_sciq_pairs_runtime": 3.6161, "eval_sciq_pairs_samples_per_second": 35.397, "eval_sciq_pairs_steps_per_second": 0.277, "step": 2860 }, { "epoch": 2.94238683127572, "eval_qasc_pairs_loss": 0.14225469529628754, "eval_qasc_pairs_runtime": 0.6394, "eval_qasc_pairs_samples_per_second": 200.2, "eval_qasc_pairs_steps_per_second": 1.564, "step": 2860 }, { "epoch": 2.94238683127572, "eval_openbookqa_pairs_loss": 0.7218084931373596, "eval_openbookqa_pairs_runtime": 0.6108, "eval_openbookqa_pairs_samples_per_second": 209.563, "eval_openbookqa_pairs_steps_per_second": 1.637, "step": 2860 }, { "epoch": 2.94238683127572, "eval_msmarco_pairs_loss": 0.8812248706817627, "eval_msmarco_pairs_runtime": 1.5323, "eval_msmarco_pairs_samples_per_second": 83.535, "eval_msmarco_pairs_steps_per_second": 0.653, "step": 2860 }, { "epoch": 2.94238683127572, "eval_nq_pairs_loss": 0.6806061863899231, "eval_nq_pairs_runtime": 2.9102, "eval_nq_pairs_samples_per_second": 43.983, "eval_nq_pairs_steps_per_second": 0.344, "step": 2860 }, { "epoch": 2.94238683127572, "eval_trivia_pairs_loss": 0.7633498907089233, "eval_trivia_pairs_runtime": 3.4751, "eval_trivia_pairs_samples_per_second": 36.834, "eval_trivia_pairs_steps_per_second": 0.288, "step": 2860 }, { "epoch": 2.94238683127572, "eval_gooaq_pairs_loss": 0.31598424911499023, "eval_gooaq_pairs_runtime": 0.9581, "eval_gooaq_pairs_samples_per_second": 133.598, "eval_gooaq_pairs_steps_per_second": 1.044, "step": 2860 }, { "epoch": 2.94238683127572, "eval_paws-pos_loss": 0.021135499700903893, "eval_paws-pos_runtime": 0.7184, "eval_paws-pos_samples_per_second": 178.178, "eval_paws-pos_steps_per_second": 1.392, "step": 2860 }, { "epoch": 2.94238683127572, "eval_global_dataset_loss": 0.43569043278694153, "eval_global_dataset_runtime": 13.4451, "eval_global_dataset_samples_per_second": 30.941, "eval_global_dataset_steps_per_second": 0.298, "step": 2860 }, { "epoch": 2.9434156378600824, "grad_norm": 8.649219512939453, "learning_rate": 1.4432441637832728e-05, "loss": 0.362, "step": 2861 }, { "epoch": 2.9444444444444446, "grad_norm": 3.982344150543213, "learning_rate": 1.4422743075073891e-05, "loss": 0.0574, "step": 2862 }, { "epoch": 2.9454732510288064, "grad_norm": 0.0, "learning_rate": 1.4413059268756761e-05, "loss": 0.0, "step": 2863 }, { "epoch": 2.9465020576131686, "grad_norm": 8.45999813079834, "learning_rate": 1.4403390234918265e-05, "loss": 0.2814, "step": 2864 }, { "epoch": 2.947530864197531, "grad_norm": 1.6499093770980835, "learning_rate": 1.4393735989570879e-05, "loss": 0.042, "step": 2865 }, { "epoch": 2.948559670781893, "grad_norm": 5.839423656463623, "learning_rate": 1.4384096548702572e-05, "loss": 0.1911, "step": 2866 }, { "epoch": 2.9495884773662553, "grad_norm": 2.027810573577881, "learning_rate": 1.4374471928276807e-05, "loss": 0.0297, "step": 2867 }, { "epoch": 2.950617283950617, "grad_norm": 5.306059837341309, "learning_rate": 1.4364862144232492e-05, "loss": 0.1216, "step": 2868 }, { "epoch": 2.9516460905349793, "grad_norm": 7.637890338897705, "learning_rate": 1.4355267212483988e-05, "loss": 0.2677, "step": 2869 }, { "epoch": 2.9526748971193415, "grad_norm": 1.3979178667068481, "learning_rate": 1.4345687148921033e-05, "loss": 0.0177, "step": 2870 }, { "epoch": 2.9537037037037037, "grad_norm": 0.5386278033256531, "learning_rate": 1.4336121969408756e-05, "loss": 0.0028, "step": 2871 }, { "epoch": 2.954732510288066, "grad_norm": 3.8933804035186768, "learning_rate": 1.4326571689787642e-05, "loss": 0.0537, "step": 2872 }, { "epoch": 2.9557613168724277, "grad_norm": 1.9044185876846313, "learning_rate": 1.4317036325873487e-05, "loss": 0.0445, "step": 2873 }, { "epoch": 2.9567901234567904, "grad_norm": 1.4395440816879272, "learning_rate": 1.4307515893457401e-05, "loss": 0.0135, "step": 2874 }, { "epoch": 2.957818930041152, "grad_norm": 10.126184463500977, "learning_rate": 1.4298010408305752e-05, "loss": 0.4396, "step": 2875 }, { "epoch": 2.9588477366255144, "grad_norm": 2.7239129543304443, "learning_rate": 1.4288519886160163e-05, "loss": 0.0289, "step": 2876 }, { "epoch": 2.9598765432098766, "grad_norm": 6.64143705368042, "learning_rate": 1.4279044342737476e-05, "loss": 0.1832, "step": 2877 }, { "epoch": 2.960905349794239, "grad_norm": 8.81223201751709, "learning_rate": 1.426958379372973e-05, "loss": 0.2477, "step": 2878 }, { "epoch": 2.961934156378601, "grad_norm": 0.6399573087692261, "learning_rate": 1.4260138254804126e-05, "loss": 0.029, "step": 2879 }, { "epoch": 2.962962962962963, "grad_norm": 3.247241497039795, "learning_rate": 1.425070774160301e-05, "loss": 0.0397, "step": 2880 }, { "epoch": 2.962962962962963, "eval_Qnli-dev_cosine_accuracy": 0.7109375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7368021011352539, "eval_Qnli-dev_cosine_ap": 0.744202656690273, "eval_Qnli-dev_cosine_f1": 0.691358024691358, "eval_Qnli-dev_cosine_f1_threshold": 0.6519253253936768, "eval_Qnli-dev_cosine_precision": 0.5921450151057401, "eval_Qnli-dev_cosine_recall": 0.8305084745762712, "eval_Qnli-dev_dot_accuracy": 0.671875, "eval_Qnli-dev_dot_accuracy_threshold": 314.99029541015625, "eval_Qnli-dev_dot_ap": 0.6987296052903826, "eval_Qnli-dev_dot_f1": 0.6666666666666667, "eval_Qnli-dev_dot_f1_threshold": 262.0609130859375, "eval_Qnli-dev_dot_precision": 0.5549295774647888, "eval_Qnli-dev_dot_recall": 0.8347457627118644, "eval_Qnli-dev_euclidean_accuracy": 0.71484375, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.15121841430664, "eval_Qnli-dev_euclidean_ap": 0.7522084025905192, "eval_Qnli-dev_euclidean_f1": 0.6994328922495274, "eval_Qnli-dev_euclidean_f1_threshold": 16.430572509765625, "eval_Qnli-dev_euclidean_precision": 0.6313993174061433, "eval_Qnli-dev_euclidean_recall": 0.7838983050847458, "eval_Qnli-dev_manhattan_accuracy": 0.716796875, "eval_Qnli-dev_manhattan_accuracy_threshold": 316.09954833984375, "eval_Qnli-dev_manhattan_ap": 0.7555046711913105, "eval_Qnli-dev_manhattan_f1": 0.7078039927404719, "eval_Qnli-dev_manhattan_f1_threshold": 353.5134582519531, "eval_Qnli-dev_manhattan_precision": 0.6190476190476191, "eval_Qnli-dev_manhattan_recall": 0.826271186440678, "eval_Qnli-dev_max_accuracy": 0.716796875, "eval_Qnli-dev_max_accuracy_threshold": 316.09954833984375, "eval_Qnli-dev_max_ap": 0.7555046711913105, "eval_Qnli-dev_max_f1": 0.7078039927404719, "eval_Qnli-dev_max_f1_threshold": 353.5134582519531, "eval_Qnli-dev_max_precision": 0.6313993174061433, "eval_Qnli-dev_max_recall": 0.8347457627118644, "eval_allNLI-dev_cosine_accuracy": 0.72265625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.8335912823677063, "eval_allNLI-dev_cosine_ap": 0.6206263072830802, "eval_allNLI-dev_cosine_f1": 0.6281755196304851, "eval_allNLI-dev_cosine_f1_threshold": 0.6995470523834229, "eval_allNLI-dev_cosine_precision": 0.5230769230769231, "eval_allNLI-dev_cosine_recall": 0.7861271676300579, "eval_allNLI-dev_dot_accuracy": 0.693359375, "eval_allNLI-dev_dot_accuracy_threshold": 334.1104736328125, "eval_allNLI-dev_dot_ap": 0.5611137607864107, "eval_allNLI-dev_dot_f1": 0.5986696230598669, "eval_allNLI-dev_dot_f1_threshold": 259.05865478515625, "eval_allNLI-dev_dot_precision": 0.4856115107913669, "eval_allNLI-dev_dot_recall": 0.7803468208092486, "eval_allNLI-dev_euclidean_accuracy": 0.732421875, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.999246597290039, "eval_allNLI-dev_euclidean_ap": 0.6332640088123168, "eval_allNLI-dev_euclidean_f1": 0.6417582417582418, "eval_allNLI-dev_euclidean_f1_threshold": 15.786087989807129, "eval_allNLI-dev_euclidean_precision": 0.5177304964539007, "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, "eval_allNLI-dev_manhattan_accuracy": 0.73046875, "eval_allNLI-dev_manhattan_accuracy_threshold": 256.450927734375, "eval_allNLI-dev_manhattan_ap": 0.6318744223041122, "eval_allNLI-dev_manhattan_f1": 0.6430155210643015, "eval_allNLI-dev_manhattan_f1_threshold": 328.28265380859375, "eval_allNLI-dev_manhattan_precision": 0.5215827338129496, "eval_allNLI-dev_manhattan_recall": 0.838150289017341, "eval_allNLI-dev_max_accuracy": 0.732421875, "eval_allNLI-dev_max_accuracy_threshold": 334.1104736328125, "eval_allNLI-dev_max_ap": 0.6332640088123168, "eval_allNLI-dev_max_f1": 0.6430155210643015, "eval_allNLI-dev_max_f1_threshold": 328.28265380859375, "eval_allNLI-dev_max_precision": 0.5230769230769231, "eval_allNLI-dev_max_recall": 0.8439306358381503, "eval_sequential_score": 0.7555046711913105, "eval_sts-test_pearson_cosine": 0.8480014788677841, "eval_sts-test_pearson_dot": 0.8368646668654693, "eval_sts-test_pearson_euclidean": 0.872414719120367, "eval_sts-test_pearson_manhattan": 0.8704315991049113, "eval_sts-test_pearson_max": 0.872414719120367, "eval_sts-test_spearman_cosine": 0.8759407839000595, "eval_sts-test_spearman_dot": 0.8348948374493452, "eval_sts-test_spearman_euclidean": 0.8711618414473334, "eval_sts-test_spearman_manhattan": 0.8681829737328148, "eval_sts-test_spearman_max": 0.8759407839000595, "eval_vitaminc-pairs_loss": 3.100550889968872, "eval_vitaminc-pairs_runtime": 3.276, "eval_vitaminc-pairs_samples_per_second": 39.072, "eval_vitaminc-pairs_steps_per_second": 0.305, "step": 2880 }, { "epoch": 2.962962962962963, "eval_negation-triplets_loss": 0.9214716553688049, "eval_negation-triplets_runtime": 0.7827, "eval_negation-triplets_samples_per_second": 163.539, "eval_negation-triplets_steps_per_second": 1.278, "step": 2880 }, { "epoch": 2.962962962962963, "eval_scitail-pairs-pos_loss": 0.16351692378520966, "eval_scitail-pairs-pos_runtime": 0.9862, "eval_scitail-pairs-pos_samples_per_second": 129.793, "eval_scitail-pairs-pos_steps_per_second": 1.014, "step": 2880 }, { "epoch": 2.962962962962963, "eval_scitail-pairs-qa_loss": 0.00035399440093897283, "eval_scitail-pairs-qa_runtime": 0.6471, "eval_scitail-pairs-qa_samples_per_second": 197.818, "eval_scitail-pairs-qa_steps_per_second": 1.545, "step": 2880 }, { "epoch": 2.962962962962963, "eval_xsum-pairs_loss": 0.2608776390552521, "eval_xsum-pairs_runtime": 3.0439, "eval_xsum-pairs_samples_per_second": 42.051, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2880 }, { "epoch": 2.962962962962963, "eval_sciq_pairs_loss": 0.10060150176286697, "eval_sciq_pairs_runtime": 3.5694, "eval_sciq_pairs_samples_per_second": 35.86, "eval_sciq_pairs_steps_per_second": 0.28, "step": 2880 }, { "epoch": 2.962962962962963, "eval_qasc_pairs_loss": 0.15300729870796204, "eval_qasc_pairs_runtime": 0.6556, "eval_qasc_pairs_samples_per_second": 195.241, "eval_qasc_pairs_steps_per_second": 1.525, "step": 2880 }, { "epoch": 2.962962962962963, "eval_openbookqa_pairs_loss": 0.7465758323669434, "eval_openbookqa_pairs_runtime": 0.6293, "eval_openbookqa_pairs_samples_per_second": 203.405, "eval_openbookqa_pairs_steps_per_second": 1.589, "step": 2880 }, { "epoch": 2.962962962962963, "eval_msmarco_pairs_loss": 0.8570266962051392, "eval_msmarco_pairs_runtime": 1.5355, "eval_msmarco_pairs_samples_per_second": 83.363, "eval_msmarco_pairs_steps_per_second": 0.651, "step": 2880 }, { "epoch": 2.962962962962963, "eval_nq_pairs_loss": 0.6320933103561401, "eval_nq_pairs_runtime": 2.9181, "eval_nq_pairs_samples_per_second": 43.865, "eval_nq_pairs_steps_per_second": 0.343, "step": 2880 }, { "epoch": 2.962962962962963, "eval_trivia_pairs_loss": 0.819422721862793, "eval_trivia_pairs_runtime": 3.452, "eval_trivia_pairs_samples_per_second": 37.08, "eval_trivia_pairs_steps_per_second": 0.29, "step": 2880 }, { "epoch": 2.962962962962963, "eval_gooaq_pairs_loss": 0.2903611660003662, "eval_gooaq_pairs_runtime": 0.9645, "eval_gooaq_pairs_samples_per_second": 132.709, "eval_gooaq_pairs_steps_per_second": 1.037, "step": 2880 }, { "epoch": 2.962962962962963, "eval_paws-pos_loss": 0.02105851098895073, "eval_paws-pos_runtime": 0.7238, "eval_paws-pos_samples_per_second": 176.85, "eval_paws-pos_steps_per_second": 1.382, "step": 2880 }, { "epoch": 2.962962962962963, "eval_global_dataset_loss": 0.4413754343986511, "eval_global_dataset_runtime": 13.4762, "eval_global_dataset_samples_per_second": 30.869, "eval_global_dataset_steps_per_second": 0.297, "step": 2880 }, { "epoch": 2.963991769547325, "grad_norm": 1.8559857606887817, "learning_rate": 1.4241292269743853e-05, "loss": 0.0319, "step": 2881 }, { "epoch": 2.9650205761316872, "grad_norm": 9.35877799987793, "learning_rate": 1.4231891854819196e-05, "loss": 0.2628, "step": 2882 }, { "epoch": 2.9660493827160495, "grad_norm": 7.47057580947876, "learning_rate": 1.4222506512396669e-05, "loss": 0.0654, "step": 2883 }, { "epoch": 2.9670781893004117, "grad_norm": 1.6799421310424805, "learning_rate": 1.4213136258018927e-05, "loss": 0.0143, "step": 2884 }, { "epoch": 2.9681069958847734, "grad_norm": 3.2614622116088867, "learning_rate": 1.4203781107203643e-05, "loss": 0.0348, "step": 2885 }, { "epoch": 2.9691358024691357, "grad_norm": 9.124346733093262, "learning_rate": 1.4194441075443473e-05, "loss": 0.3294, "step": 2886 }, { "epoch": 2.970164609053498, "grad_norm": 3.7280704975128174, "learning_rate": 1.4185116178206042e-05, "loss": 0.1118, "step": 2887 }, { "epoch": 2.97119341563786, "grad_norm": 2.497082471847534, "learning_rate": 1.4175806430933906e-05, "loss": 0.0266, "step": 2888 }, { "epoch": 2.9722222222222223, "grad_norm": 15.666872024536133, "learning_rate": 1.4166511849044528e-05, "loss": 1.1315, "step": 2889 }, { "epoch": 2.973251028806584, "grad_norm": 6.291482448577881, "learning_rate": 1.415723244793027e-05, "loss": 0.2302, "step": 2890 }, { "epoch": 2.9742798353909468, "grad_norm": 4.02936315536499, "learning_rate": 1.414796824295834e-05, "loss": 0.0576, "step": 2891 }, { "epoch": 2.9753086419753085, "grad_norm": 8.624472618103027, "learning_rate": 1.4138719249470786e-05, "loss": 0.2746, "step": 2892 }, { "epoch": 2.9763374485596708, "grad_norm": 3.1753811836242676, "learning_rate": 1.4129485482784468e-05, "loss": 0.053, "step": 2893 }, { "epoch": 2.977366255144033, "grad_norm": 9.290059089660645, "learning_rate": 1.412026695819102e-05, "loss": 0.2791, "step": 2894 }, { "epoch": 2.978395061728395, "grad_norm": 5.828028202056885, "learning_rate": 1.4111063690956849e-05, "loss": 0.2019, "step": 2895 }, { "epoch": 2.9794238683127574, "grad_norm": 4.854930877685547, "learning_rate": 1.410187569632308e-05, "loss": 0.1596, "step": 2896 }, { "epoch": 2.980452674897119, "grad_norm": 0.021844957023859024, "learning_rate": 1.4092702989505553e-05, "loss": 0.0002, "step": 2897 }, { "epoch": 2.9814814814814814, "grad_norm": 4.01804256439209, "learning_rate": 1.408354558569479e-05, "loss": 0.1104, "step": 2898 }, { "epoch": 2.9825102880658436, "grad_norm": 1.7121597528457642, "learning_rate": 1.4074403500055972e-05, "loss": 0.0063, "step": 2899 }, { "epoch": 2.983539094650206, "grad_norm": 2.552294969558716, "learning_rate": 1.406527674772891e-05, "loss": 0.0628, "step": 2900 }, { "epoch": 2.983539094650206, "eval_Qnli-dev_cosine_accuracy": 0.708984375, "eval_Qnli-dev_cosine_accuracy_threshold": 0.7327213287353516, "eval_Qnli-dev_cosine_ap": 0.7469727456326335, "eval_Qnli-dev_cosine_f1": 0.6946564885496184, "eval_Qnli-dev_cosine_f1_threshold": 0.693007230758667, "eval_Qnli-dev_cosine_precision": 0.6319444444444444, "eval_Qnli-dev_cosine_recall": 0.7711864406779662, "eval_Qnli-dev_dot_accuracy": 0.66796875, "eval_Qnli-dev_dot_accuracy_threshold": 324.18719482421875, "eval_Qnli-dev_dot_ap": 0.7019974567756222, "eval_Qnli-dev_dot_f1": 0.667741935483871, "eval_Qnli-dev_dot_f1_threshold": 253.47186279296875, "eval_Qnli-dev_dot_precision": 0.5390625, "eval_Qnli-dev_dot_recall": 0.8771186440677966, "eval_Qnli-dev_euclidean_accuracy": 0.708984375, "eval_Qnli-dev_euclidean_accuracy_threshold": 15.688040733337402, "eval_Qnli-dev_euclidean_ap": 0.7556721346513768, "eval_Qnli-dev_euclidean_f1": 0.70703125, "eval_Qnli-dev_euclidean_f1_threshold": 16.195533752441406, "eval_Qnli-dev_euclidean_precision": 0.6557971014492754, "eval_Qnli-dev_euclidean_recall": 0.7669491525423728, "eval_Qnli-dev_manhattan_accuracy": 0.71484375, "eval_Qnli-dev_manhattan_accuracy_threshold": 327.8430480957031, "eval_Qnli-dev_manhattan_ap": 0.7585944269198737, "eval_Qnli-dev_manhattan_f1": 0.7030965391621129, "eval_Qnli-dev_manhattan_f1_threshold": 352.80926513671875, "eval_Qnli-dev_manhattan_precision": 0.6166134185303515, "eval_Qnli-dev_manhattan_recall": 0.8177966101694916, "eval_Qnli-dev_max_accuracy": 0.71484375, "eval_Qnli-dev_max_accuracy_threshold": 327.8430480957031, "eval_Qnli-dev_max_ap": 0.7585944269198737, "eval_Qnli-dev_max_f1": 0.70703125, "eval_Qnli-dev_max_f1_threshold": 352.80926513671875, "eval_Qnli-dev_max_precision": 0.6557971014492754, "eval_Qnli-dev_max_recall": 0.8771186440677966, "eval_allNLI-dev_cosine_accuracy": 0.728515625, "eval_allNLI-dev_cosine_accuracy_threshold": 0.7969900369644165, "eval_allNLI-dev_cosine_ap": 0.6248909740744295, "eval_allNLI-dev_cosine_f1": 0.6313253012048192, "eval_allNLI-dev_cosine_f1_threshold": 0.7039787769317627, "eval_allNLI-dev_cosine_precision": 0.5413223140495868, "eval_allNLI-dev_cosine_recall": 0.7572254335260116, "eval_allNLI-dev_dot_accuracy": 0.69921875, "eval_allNLI-dev_dot_accuracy_threshold": 328.640380859375, "eval_allNLI-dev_dot_ap": 0.5605197424721456, "eval_allNLI-dev_dot_f1": 0.593186372745491, "eval_allNLI-dev_dot_f1_threshold": 243.58334350585938, "eval_allNLI-dev_dot_precision": 0.4539877300613497, "eval_allNLI-dev_dot_recall": 0.8554913294797688, "eval_allNLI-dev_euclidean_accuracy": 0.736328125, "eval_allNLI-dev_euclidean_accuracy_threshold": 12.87012767791748, "eval_allNLI-dev_euclidean_ap": 0.6377365457614158, "eval_allNLI-dev_euclidean_f1": 0.6491228070175439, "eval_allNLI-dev_euclidean_f1_threshold": 15.911476135253906, "eval_allNLI-dev_euclidean_precision": 0.5229681978798587, "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, "eval_allNLI-dev_manhattan_accuracy": 0.734375, "eval_allNLI-dev_manhattan_accuracy_threshold": 296.2361755371094, "eval_allNLI-dev_manhattan_ap": 0.634572927514979, "eval_allNLI-dev_manhattan_f1": 0.65, "eval_allNLI-dev_manhattan_f1_threshold": 328.3108215332031, "eval_allNLI-dev_manhattan_precision": 0.5355805243445693, "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, "eval_allNLI-dev_max_accuracy": 0.736328125, "eval_allNLI-dev_max_accuracy_threshold": 328.640380859375, "eval_allNLI-dev_max_ap": 0.6377365457614158, "eval_allNLI-dev_max_f1": 0.65, "eval_allNLI-dev_max_f1_threshold": 328.3108215332031, "eval_allNLI-dev_max_precision": 0.5413223140495868, "eval_allNLI-dev_max_recall": 0.8554913294797688, "eval_sequential_score": 0.7585944269198737, "eval_sts-test_pearson_cosine": 0.8531514218099535, "eval_sts-test_pearson_dot": 0.8410736547581009, "eval_sts-test_pearson_euclidean": 0.8753085976770247, "eval_sts-test_pearson_manhattan": 0.8732663029873282, "eval_sts-test_pearson_max": 0.8753085976770247, "eval_sts-test_spearman_cosine": 0.8782629716049206, "eval_sts-test_spearman_dot": 0.8396353833197975, "eval_sts-test_spearman_euclidean": 0.8730297590978103, "eval_sts-test_spearman_manhattan": 0.8700326328686442, "eval_sts-test_spearman_max": 0.8782629716049206, "eval_vitaminc-pairs_loss": 3.1885459423065186, "eval_vitaminc-pairs_runtime": 3.249, "eval_vitaminc-pairs_samples_per_second": 39.396, "eval_vitaminc-pairs_steps_per_second": 0.308, "step": 2900 }, { "epoch": 2.983539094650206, "eval_negation-triplets_loss": 0.9424842000007629, "eval_negation-triplets_runtime": 0.7754, "eval_negation-triplets_samples_per_second": 165.074, "eval_negation-triplets_steps_per_second": 1.29, "step": 2900 }, { "epoch": 2.983539094650206, "eval_scitail-pairs-pos_loss": 0.16403542459011078, "eval_scitail-pairs-pos_runtime": 0.984, "eval_scitail-pairs-pos_samples_per_second": 130.082, "eval_scitail-pairs-pos_steps_per_second": 1.016, "step": 2900 }, { "epoch": 2.983539094650206, "eval_scitail-pairs-qa_loss": 0.0003662670496851206, "eval_scitail-pairs-qa_runtime": 0.6296, "eval_scitail-pairs-qa_samples_per_second": 203.297, "eval_scitail-pairs-qa_steps_per_second": 1.588, "step": 2900 }, { "epoch": 2.983539094650206, "eval_xsum-pairs_loss": 0.2721264660358429, "eval_xsum-pairs_runtime": 3.0415, "eval_xsum-pairs_samples_per_second": 42.084, "eval_xsum-pairs_steps_per_second": 0.329, "step": 2900 }, { "epoch": 2.983539094650206, "eval_sciq_pairs_loss": 0.09958149492740631, "eval_sciq_pairs_runtime": 3.5979, "eval_sciq_pairs_samples_per_second": 35.576, "eval_sciq_pairs_steps_per_second": 0.278, "step": 2900 }, { "epoch": 2.983539094650206, "eval_qasc_pairs_loss": 0.15240588784217834, "eval_qasc_pairs_runtime": 0.6359, "eval_qasc_pairs_samples_per_second": 201.3, "eval_qasc_pairs_steps_per_second": 1.573, "step": 2900 }, { "epoch": 2.983539094650206, "eval_openbookqa_pairs_loss": 0.7252082824707031, "eval_openbookqa_pairs_runtime": 0.6105, "eval_openbookqa_pairs_samples_per_second": 209.653, "eval_openbookqa_pairs_steps_per_second": 1.638, "step": 2900 }, { "epoch": 2.983539094650206, "eval_msmarco_pairs_loss": 0.8146727085113525, "eval_msmarco_pairs_runtime": 1.5362, "eval_msmarco_pairs_samples_per_second": 83.32, "eval_msmarco_pairs_steps_per_second": 0.651, "step": 2900 }, { "epoch": 2.983539094650206, "eval_nq_pairs_loss": 0.6266657114028931, "eval_nq_pairs_runtime": 2.9208, "eval_nq_pairs_samples_per_second": 43.824, "eval_nq_pairs_steps_per_second": 0.342, "step": 2900 }, { "epoch": 2.983539094650206, "eval_trivia_pairs_loss": 0.76347815990448, "eval_trivia_pairs_runtime": 3.4717, "eval_trivia_pairs_samples_per_second": 36.87, "eval_trivia_pairs_steps_per_second": 0.288, "step": 2900 }, { "epoch": 2.983539094650206, "eval_gooaq_pairs_loss": 0.30829522013664246, "eval_gooaq_pairs_runtime": 0.9943, "eval_gooaq_pairs_samples_per_second": 128.734, "eval_gooaq_pairs_steps_per_second": 1.006, "step": 2900 }, { "epoch": 2.983539094650206, "eval_paws-pos_loss": 0.02102976106107235, "eval_paws-pos_runtime": 0.7203, "eval_paws-pos_samples_per_second": 177.695, "eval_paws-pos_steps_per_second": 1.388, "step": 2900 }, { "epoch": 2.983539094650206, "eval_global_dataset_loss": 0.4458039700984955, "eval_global_dataset_runtime": 13.4537, "eval_global_dataset_samples_per_second": 30.921, "eval_global_dataset_steps_per_second": 0.297, "step": 2900 }, { "epoch": 2.984567901234568, "grad_norm": 3.5001864433288574, "learning_rate": 1.4056165343828016e-05, "loss": 0.0889, "step": 2901 }, { "epoch": 2.98559670781893, "grad_norm": 0.2163437455892563, "learning_rate": 1.40470693034423e-05, "loss": 0.001, "step": 2902 }, { "epoch": 2.986625514403292, "grad_norm": 5.974316120147705, "learning_rate": 1.4037988641635308e-05, "loss": 0.1345, "step": 2903 }, { "epoch": 2.9876543209876543, "grad_norm": 4.87230920791626, "learning_rate": 1.4028923373445143e-05, "loss": 0.0546, "step": 2904 }, { "epoch": 2.9886831275720165, "grad_norm": 1.8750593662261963, "learning_rate": 1.4019873513884392e-05, "loss": 0.0224, "step": 2905 }, { "epoch": 2.9897119341563787, "grad_norm": 6.106016159057617, "learning_rate": 1.4010839077940138e-05, "loss": 0.1088, "step": 2906 }, { "epoch": 2.9907407407407405, "grad_norm": 6.1090006828308105, "learning_rate": 1.4001820080573921e-05, "loss": 0.0894, "step": 2907 }, { "epoch": 2.991769547325103, "grad_norm": 0.13261619210243225, "learning_rate": 1.3992816536721708e-05, "loss": 0.0007, "step": 2908 }, { "epoch": 2.992798353909465, "grad_norm": 8.47977066040039, "learning_rate": 1.3983828461293875e-05, "loss": 0.3362, "step": 2909 }, { "epoch": 2.993827160493827, "grad_norm": 1.2591878175735474, "learning_rate": 1.397485586917519e-05, "loss": 0.0845, "step": 2910 }, { "epoch": 2.9948559670781894, "grad_norm": 4.556859016418457, "learning_rate": 1.396589877522477e-05, "loss": 0.0372, "step": 2911 }, { "epoch": 2.9958847736625516, "grad_norm": 8.75589370727539, "learning_rate": 1.3956957194276064e-05, "loss": 0.2896, "step": 2912 }, { "epoch": 2.996913580246914, "grad_norm": 10.133983612060547, "learning_rate": 1.3948031141136842e-05, "loss": 0.1929, "step": 2913 }, { "epoch": 2.9979423868312756, "grad_norm": 0.00930853933095932, "learning_rate": 1.3939120630589154e-05, "loss": 0.0, "step": 2914 }, { "epoch": 2.998971193415638, "grad_norm": 10.040975570678711, "learning_rate": 1.3930225677389305e-05, "loss": 0.0814, "step": 2915 }, { "epoch": 3.0, "grad_norm": 11.688448905944824, "learning_rate": 1.3921346296267846e-05, "loss": 0.138, "step": 2916 } ], "logging_steps": 1, "max_steps": 2916, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 292, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 128, "trial_name": null, "trial_params": null }