diff --git "a/checkpoint-2628/trainer_state.json" "b/checkpoint-2628/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-2628/trainer_state.json" @@ -0,0 +1,43712 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.7037037037037037, + "eval_steps": 20, + "global_step": 2628, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00102880658436214, + "grad_norm": NaN, + "learning_rate": 0.0, + "loss": 6.6792, + "step": 1 + }, + { + "epoch": 0.00205761316872428, + "grad_norm": NaN, + "learning_rate": 0.0, + "loss": 7.1528, + "step": 2 + }, + { + "epoch": 0.0030864197530864196, + "grad_norm": 56.1685905456543, + "learning_rate": 3.634475597092419e-08, + "loss": 7.5608, + "step": 3 + }, + { + "epoch": 0.00411522633744856, + "grad_norm": 47.3843994140625, + "learning_rate": 7.268951194184838e-08, + "loss": 7.9785, + "step": 4 + }, + { + "epoch": 0.0051440329218107, + "grad_norm": Infinity, + "learning_rate": 7.268951194184838e-08, + "loss": 8.3006, + "step": 5 + }, + { + "epoch": 0.006172839506172839, + "grad_norm": 30.015766143798828, + "learning_rate": 1.0903426791277258e-07, + "loss": 7.2339, + "step": 6 + }, + { + "epoch": 0.00720164609053498, + "grad_norm": 38.63535690307617, + "learning_rate": 1.4537902388369677e-07, + "loss": 6.9116, + "step": 7 + }, + { + "epoch": 0.00823045267489712, + "grad_norm": 43.006290435791016, + "learning_rate": 1.8172377985462097e-07, + "loss": 7.5568, + "step": 8 + }, + { + "epoch": 0.009259259259259259, + "grad_norm": 32.70055389404297, + "learning_rate": 2.1806853582554515e-07, + "loss": 7.3119, + "step": 9 + }, + { + "epoch": 0.0102880658436214, + "grad_norm": 34.09101486206055, + "learning_rate": 2.5441329179646936e-07, + "loss": 7.3456, + "step": 10 + }, + { + "epoch": 0.01131687242798354, + "grad_norm": 46.04302978515625, + "learning_rate": 2.9075804776739353e-07, + "loss": 7.3899, + "step": 11 + }, + { + "epoch": 0.012345679012345678, + "grad_norm": 39.30464172363281, + "learning_rate": 3.271028037383177e-07, + "loss": 7.1603, + "step": 12 + }, + { + "epoch": 0.013374485596707819, + "grad_norm": 45.96063995361328, + "learning_rate": 3.6344755970924194e-07, + "loss": 7.5501, + "step": 13 + }, + { + "epoch": 0.01440329218106996, + "grad_norm": 31.248769760131836, + "learning_rate": 3.997923156801661e-07, + "loss": 7.1211, + "step": 14 + }, + { + "epoch": 0.015432098765432098, + "grad_norm": 37.31939697265625, + "learning_rate": 4.361370716510903e-07, + "loss": 6.6898, + "step": 15 + }, + { + "epoch": 0.01646090534979424, + "grad_norm": 57.4151725769043, + "learning_rate": 4.724818276220145e-07, + "loss": 7.9275, + "step": 16 + }, + { + "epoch": 0.01748971193415638, + "grad_norm": 60.12082290649414, + "learning_rate": 5.088265835929387e-07, + "loss": 8.8934, + "step": 17 + }, + { + "epoch": 0.018518518518518517, + "grad_norm": 46.84602355957031, + "learning_rate": 5.451713395638628e-07, + "loss": 7.7481, + "step": 18 + }, + { + "epoch": 0.01954732510288066, + "grad_norm": 49.21991729736328, + "learning_rate": 5.815160955347871e-07, + "loss": 7.9482, + "step": 19 + }, + { + "epoch": 0.0205761316872428, + "grad_norm": 28.904695510864258, + "learning_rate": 6.178608515057113e-07, + "loss": 7.2578, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_Qnli-dev_cosine_accuracy": 0.599609375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.9141477346420288, + "eval_Qnli-dev_cosine_ap": 0.5547693808475234, + "eval_Qnli-dev_cosine_f1": 0.6315789473684211, + "eval_Qnli-dev_cosine_f1_threshold": 0.6694607138633728, + "eval_Qnli-dev_cosine_precision": 0.4633663366336634, + "eval_Qnli-dev_cosine_recall": 0.9915254237288136, + "eval_Qnli-dev_dot_accuracy": 0.576171875, + "eval_Qnli-dev_dot_accuracy_threshold": 375.9344177246094, + "eval_Qnli-dev_dot_ap": 0.4951635671727113, + "eval_Qnli-dev_dot_f1": 0.6291834002677376, + "eval_Qnli-dev_dot_f1_threshold": 237.4730682373047, + "eval_Qnli-dev_dot_precision": 0.4598825831702544, + "eval_Qnli-dev_dot_recall": 0.9957627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.603515625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 8.203678131103516, + "eval_Qnli-dev_euclidean_ap": 0.5621167645444726, + "eval_Qnli-dev_euclidean_f1": 0.6307277628032345, + "eval_Qnli-dev_euclidean_f1_threshold": 17.37430763244629, + "eval_Qnli-dev_euclidean_precision": 0.4624505928853755, + "eval_Qnli-dev_euclidean_recall": 0.9915254237288136, + "eval_Qnli-dev_manhattan_accuracy": 0.615234375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 168.66110229492188, + "eval_Qnli-dev_manhattan_ap": 0.6068938574265019, + "eval_Qnli-dev_manhattan_f1": 0.629878869448183, + "eval_Qnli-dev_manhattan_f1_threshold": 250.46356201171875, + "eval_Qnli-dev_manhattan_precision": 0.46153846153846156, + "eval_Qnli-dev_manhattan_recall": 0.9915254237288136, + "eval_Qnli-dev_max_accuracy": 0.615234375, + "eval_Qnli-dev_max_accuracy_threshold": 375.9344177246094, + "eval_Qnli-dev_max_ap": 0.6068938574265019, + "eval_Qnli-dev_max_f1": 0.6315789473684211, + "eval_Qnli-dev_max_f1_threshold": 250.46356201171875, + "eval_Qnli-dev_max_precision": 0.4633663366336634, + "eval_Qnli-dev_max_recall": 0.9957627118644068, + "eval_allNLI-dev_cosine_accuracy": 0.6640625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9848551154136658, + "eval_allNLI-dev_cosine_ap": 0.34665869156342183, + "eval_allNLI-dev_cosine_f1": 0.5105105105105106, + "eval_allNLI-dev_cosine_f1_threshold": 0.726775050163269, + "eval_allNLI-dev_cosine_precision": 0.3448275862068966, + "eval_allNLI-dev_cosine_recall": 0.9826589595375722, + "eval_allNLI-dev_dot_accuracy": 0.66015625, + "eval_allNLI-dev_dot_accuracy_threshold": 510.3038330078125, + "eval_allNLI-dev_dot_ap": 0.3325722102020561, + "eval_allNLI-dev_dot_f1": 0.5081240768094535, + "eval_allNLI-dev_dot_f1_threshold": 321.1283264160156, + "eval_allNLI-dev_dot_precision": 0.3412698412698413, + "eval_allNLI-dev_dot_recall": 0.9942196531791907, + "eval_allNLI-dev_euclidean_accuracy": 0.6640625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 3.5479774475097656, + "eval_allNLI-dev_euclidean_ap": 0.35150722956160885, + "eval_allNLI-dev_euclidean_f1": 0.5120481927710844, + "eval_allNLI-dev_euclidean_f1_threshold": 16.336387634277344, + "eval_allNLI-dev_euclidean_precision": 0.34623217922606925, + "eval_allNLI-dev_euclidean_recall": 0.9826589595375722, + "eval_allNLI-dev_manhattan_accuracy": 0.6640625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 59.594974517822266, + "eval_allNLI-dev_manhattan_ap": 0.3707715964225075, + "eval_allNLI-dev_manhattan_f1": 0.5096870342771982, + "eval_allNLI-dev_manhattan_f1_threshold": 246.92552185058594, + "eval_allNLI-dev_manhattan_precision": 0.3433734939759036, + "eval_allNLI-dev_manhattan_recall": 0.9884393063583815, + "eval_allNLI-dev_max_accuracy": 0.6640625, + "eval_allNLI-dev_max_accuracy_threshold": 510.3038330078125, + "eval_allNLI-dev_max_ap": 0.3707715964225075, + "eval_allNLI-dev_max_f1": 0.5120481927710844, + "eval_allNLI-dev_max_f1_threshold": 321.1283264160156, + "eval_allNLI-dev_max_precision": 0.34623217922606925, + "eval_allNLI-dev_max_recall": 0.9942196531791907, + "eval_sequential_score": 0.6068938574265019, + "eval_sts-test_pearson_cosine": 0.15158128737060533, + "eval_sts-test_pearson_dot": 0.28519318322703113, + "eval_sts-test_pearson_euclidean": 0.14101979920513222, + "eval_sts-test_pearson_manhattan": 0.18765507958122332, + "eval_sts-test_pearson_max": 0.28519318322703113, + "eval_sts-test_spearman_cosine": 0.19495891500289336, + "eval_sts-test_spearman_dot": 0.2996743605881303, + "eval_sts-test_spearman_euclidean": 0.16263986728485438, + "eval_sts-test_spearman_manhattan": 0.20827944121487316, + "eval_sts-test_spearman_max": 0.2996743605881303, + "eval_vitaminc-pairs_loss": 3.0276453495025635, + "eval_vitaminc-pairs_runtime": 3.2256, + "eval_vitaminc-pairs_samples_per_second": 39.683, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_negation-triplets_loss": 4.588223457336426, + "eval_negation-triplets_runtime": 0.7341, + "eval_negation-triplets_samples_per_second": 174.361, + "eval_negation-triplets_steps_per_second": 1.362, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_scitail-pairs-pos_loss": 2.112419366836548, + "eval_scitail-pairs-pos_runtime": 0.8038, + "eval_scitail-pairs-pos_samples_per_second": 159.242, + "eval_scitail-pairs-pos_steps_per_second": 1.244, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_scitail-pairs-qa_loss": 2.4077870845794678, + "eval_scitail-pairs-qa_runtime": 0.5735, + "eval_scitail-pairs-qa_samples_per_second": 223.199, + "eval_scitail-pairs-qa_steps_per_second": 1.744, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_xsum-pairs_loss": 7.2197651863098145, + "eval_xsum-pairs_runtime": 3.0069, + "eval_xsum-pairs_samples_per_second": 42.568, + "eval_xsum-pairs_steps_per_second": 0.333, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_sciq_pairs_loss": 0.8614505529403687, + "eval_sciq_pairs_runtime": 3.4174, + "eval_sciq_pairs_samples_per_second": 37.455, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_qasc_pairs_loss": 5.234526634216309, + "eval_qasc_pairs_runtime": 0.5917, + "eval_qasc_pairs_samples_per_second": 216.327, + "eval_qasc_pairs_steps_per_second": 1.69, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_openbookqa_pairs_loss": 5.655325889587402, + "eval_openbookqa_pairs_runtime": 0.5683, + "eval_openbookqa_pairs_samples_per_second": 225.252, + "eval_openbookqa_pairs_steps_per_second": 1.76, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_msmarco_pairs_loss": 15.688506126403809, + "eval_msmarco_pairs_runtime": 1.5377, + "eval_msmarco_pairs_samples_per_second": 83.243, + "eval_msmarco_pairs_steps_per_second": 0.65, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_nq_pairs_loss": 14.804839134216309, + "eval_nq_pairs_runtime": 2.884, + "eval_nq_pairs_samples_per_second": 44.382, + "eval_nq_pairs_steps_per_second": 0.347, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_trivia_pairs_loss": 9.255401611328125, + "eval_trivia_pairs_runtime": 3.4138, + "eval_trivia_pairs_samples_per_second": 37.495, + "eval_trivia_pairs_steps_per_second": 0.293, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_gooaq_pairs_loss": 10.233977317810059, + "eval_gooaq_pairs_runtime": 0.951, + "eval_gooaq_pairs_samples_per_second": 134.592, + "eval_gooaq_pairs_steps_per_second": 1.051, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_paws-pos_loss": 1.2437409162521362, + "eval_paws-pos_runtime": 0.6909, + "eval_paws-pos_samples_per_second": 185.275, + "eval_paws-pos_steps_per_second": 1.447, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_global_dataset_loss": 6.9472880363464355, + "eval_global_dataset_runtime": 13.3358, + "eval_global_dataset_samples_per_second": 31.194, + "eval_global_dataset_steps_per_second": 0.3, + "step": 20 + }, + { + "epoch": 0.021604938271604937, + "grad_norm": 129.84130859375, + "learning_rate": 6.542056074766354e-07, + "loss": 14.6214, + "step": 21 + }, + { + "epoch": 0.02263374485596708, + "grad_norm": 25.52641487121582, + "learning_rate": 6.905503634475597e-07, + "loss": 2.7071, + "step": 22 + }, + { + "epoch": 0.023662551440329218, + "grad_norm": 52.22842025756836, + "learning_rate": 7.268951194184839e-07, + "loss": 7.3533, + "step": 23 + }, + { + "epoch": 0.024691358024691357, + "grad_norm": 18.928892135620117, + "learning_rate": 7.63239875389408e-07, + "loss": 5.8828, + "step": 24 + }, + { + "epoch": 0.0257201646090535, + "grad_norm": 18.747142791748047, + "learning_rate": 7.995846313603322e-07, + "loss": 5.549, + "step": 25 + }, + { + "epoch": 0.026748971193415638, + "grad_norm": 39.345096588134766, + "learning_rate": 8.359293873312565e-07, + "loss": 7.0614, + "step": 26 + }, + { + "epoch": 0.027777777777777776, + "grad_norm": 16.357666015625, + "learning_rate": 8.722741433021806e-07, + "loss": 5.4115, + "step": 27 + }, + { + "epoch": 0.02880658436213992, + "grad_norm": 143.72604370117188, + "learning_rate": 9.086188992731048e-07, + "loss": 14.986, + "step": 28 + }, + { + "epoch": 0.029835390946502057, + "grad_norm": 29.933956146240234, + "learning_rate": 9.44963655244029e-07, + "loss": 6.5017, + "step": 29 + }, + { + "epoch": 0.030864197530864196, + "grad_norm": 24.71169662475586, + "learning_rate": 9.813084112149532e-07, + "loss": 6.8621, + "step": 30 + }, + { + "epoch": 0.03189300411522634, + "grad_norm": 48.559242248535156, + "learning_rate": 1.0176531671858774e-06, + "loss": 7.6911, + "step": 31 + }, + { + "epoch": 0.03292181069958848, + "grad_norm": 43.564395904541016, + "learning_rate": 1.0539979231568014e-06, + "loss": 7.3478, + "step": 32 + }, + { + "epoch": 0.033950617283950615, + "grad_norm": 71.6847152709961, + "learning_rate": 1.0903426791277257e-06, + "loss": 9.8953, + "step": 33 + }, + { + "epoch": 0.03497942386831276, + "grad_norm": 130.1976776123047, + "learning_rate": 1.12668743509865e-06, + "loss": 14.7971, + "step": 34 + }, + { + "epoch": 0.0360082304526749, + "grad_norm": 25.184886932373047, + "learning_rate": 1.1630321910695741e-06, + "loss": 6.6194, + "step": 35 + }, + { + "epoch": 0.037037037037037035, + "grad_norm": 15.403931617736816, + "learning_rate": 1.1993769470404982e-06, + "loss": 5.397, + "step": 36 + }, + { + "epoch": 0.03806584362139918, + "grad_norm": 97.28205871582031, + "learning_rate": 1.2357217030114226e-06, + "loss": 9.3816, + "step": 37 + }, + { + "epoch": 0.03909465020576132, + "grad_norm": 113.59951782226562, + "learning_rate": 1.2720664589823466e-06, + "loss": 13.3627, + "step": 38 + }, + { + "epoch": 0.040123456790123455, + "grad_norm": 91.30632781982422, + "learning_rate": 1.3084112149532708e-06, + "loss": 9.0198, + "step": 39 + }, + { + "epoch": 0.0411522633744856, + "grad_norm": 34.121768951416016, + "learning_rate": 1.344755970924195e-06, + "loss": 6.3785, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_Qnli-dev_cosine_accuracy": 0.599609375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.9141815304756165, + "eval_Qnli-dev_cosine_ap": 0.560800067413789, + "eval_Qnli-dev_cosine_f1": 0.6315789473684211, + "eval_Qnli-dev_cosine_f1_threshold": 0.71217280626297, + "eval_Qnli-dev_cosine_precision": 0.4633663366336634, + "eval_Qnli-dev_cosine_recall": 0.9915254237288136, + "eval_Qnli-dev_dot_accuracy": 0.580078125, + "eval_Qnli-dev_dot_accuracy_threshold": 383.35107421875, + "eval_Qnli-dev_dot_ap": 0.4975321617530368, + "eval_Qnli-dev_dot_f1": 0.6291834002677376, + "eval_Qnli-dev_dot_f1_threshold": 237.07284545898438, + "eval_Qnli-dev_dot_precision": 0.4598825831702544, + "eval_Qnli-dev_dot_recall": 0.9957627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.59375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 8.007088661193848, + "eval_Qnli-dev_euclidean_ap": 0.5669399990959784, + "eval_Qnli-dev_euclidean_f1": 0.6307277628032345, + "eval_Qnli-dev_euclidean_f1_threshold": 16.091142654418945, + "eval_Qnli-dev_euclidean_precision": 0.4624505928853755, + "eval_Qnli-dev_euclidean_recall": 0.9915254237288136, + "eval_Qnli-dev_manhattan_accuracy": 0.6171875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 165.9488067626953, + "eval_Qnli-dev_manhattan_ap": 0.6135192533670535, + "eval_Qnli-dev_manhattan_f1": 0.629878869448183, + "eval_Qnli-dev_manhattan_f1_threshold": 239.3395233154297, + "eval_Qnli-dev_manhattan_precision": 0.46153846153846156, + "eval_Qnli-dev_manhattan_recall": 0.9915254237288136, + "eval_Qnli-dev_max_accuracy": 0.6171875, + "eval_Qnli-dev_max_accuracy_threshold": 383.35107421875, + "eval_Qnli-dev_max_ap": 0.6135192533670535, + "eval_Qnli-dev_max_f1": 0.6315789473684211, + "eval_Qnli-dev_max_f1_threshold": 239.3395233154297, + "eval_Qnli-dev_max_precision": 0.4633663366336634, + "eval_Qnli-dev_max_recall": 0.9957627118644068, + "eval_allNLI-dev_cosine_accuracy": 0.6640625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9857255220413208, + "eval_allNLI-dev_cosine_ap": 0.35080477217886274, + "eval_allNLI-dev_cosine_f1": 0.5090361445783133, + "eval_allNLI-dev_cosine_f1_threshold": 0.7498464584350586, + "eval_allNLI-dev_cosine_precision": 0.34419551934826886, + "eval_allNLI-dev_cosine_recall": 0.976878612716763, + "eval_allNLI-dev_dot_accuracy": 0.66015625, + "eval_allNLI-dev_dot_accuracy_threshold": 511.11175537109375, + "eval_allNLI-dev_dot_ap": 0.33267543574243635, + "eval_allNLI-dev_dot_f1": 0.5066273932253312, + "eval_allNLI-dev_dot_f1_threshold": 327.406494140625, + "eval_allNLI-dev_dot_precision": 0.33992094861660077, + "eval_allNLI-dev_dot_recall": 0.9942196531791907, + "eval_allNLI-dev_euclidean_accuracy": 0.666015625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 4.716782569885254, + "eval_allNLI-dev_euclidean_ap": 0.3570960285605865, + "eval_allNLI-dev_euclidean_f1": 0.5113464447806354, + "eval_allNLI-dev_euclidean_f1_threshold": 15.28095817565918, + "eval_allNLI-dev_euclidean_precision": 0.3463114754098361, + "eval_allNLI-dev_euclidean_recall": 0.976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.6640625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 58.618408203125, + "eval_allNLI-dev_manhattan_ap": 0.3738907800968901, + "eval_allNLI-dev_manhattan_f1": 0.5096296296296297, + "eval_allNLI-dev_manhattan_f1_threshold": 251.27957153320312, + "eval_allNLI-dev_manhattan_precision": 0.3426294820717131, + "eval_allNLI-dev_manhattan_recall": 0.9942196531791907, + "eval_allNLI-dev_max_accuracy": 0.666015625, + "eval_allNLI-dev_max_accuracy_threshold": 511.11175537109375, + "eval_allNLI-dev_max_ap": 0.3738907800968901, + "eval_allNLI-dev_max_f1": 0.5113464447806354, + "eval_allNLI-dev_max_f1_threshold": 327.406494140625, + "eval_allNLI-dev_max_precision": 0.3463114754098361, + "eval_allNLI-dev_max_recall": 0.9942196531791907, + "eval_sequential_score": 0.6135192533670535, + "eval_sts-test_pearson_cosine": 0.15947289948306198, + "eval_sts-test_pearson_dot": 0.30037019316788005, + "eval_sts-test_pearson_euclidean": 0.14704475799070915, + "eval_sts-test_pearson_manhattan": 0.1919977257434266, + "eval_sts-test_pearson_max": 0.30037019316788005, + "eval_sts-test_spearman_cosine": 0.2043480876529001, + "eval_sts-test_spearman_dot": 0.312789299505278, + "eval_sts-test_spearman_euclidean": 0.16989717934469764, + "eval_sts-test_spearman_manhattan": 0.21343563680112884, + "eval_sts-test_spearman_max": 0.312789299505278, + "eval_vitaminc-pairs_loss": 2.966029167175293, + "eval_vitaminc-pairs_runtime": 3.1856, + "eval_vitaminc-pairs_samples_per_second": 40.18, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_negation-triplets_loss": 4.4332098960876465, + "eval_negation-triplets_runtime": 0.7499, + "eval_negation-triplets_samples_per_second": 170.686, + "eval_negation-triplets_steps_per_second": 1.333, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_scitail-pairs-pos_loss": 2.064517021179199, + "eval_scitail-pairs-pos_runtime": 0.7727, + "eval_scitail-pairs-pos_samples_per_second": 165.651, + "eval_scitail-pairs-pos_steps_per_second": 1.294, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_scitail-pairs-qa_loss": 2.3564093112945557, + "eval_scitail-pairs-qa_runtime": 0.561, + "eval_scitail-pairs-qa_samples_per_second": 228.169, + "eval_scitail-pairs-qa_steps_per_second": 1.783, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_xsum-pairs_loss": 6.930158615112305, + "eval_xsum-pairs_runtime": 3.0064, + "eval_xsum-pairs_samples_per_second": 42.575, + "eval_xsum-pairs_steps_per_second": 0.333, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_sciq_pairs_loss": 0.781018078327179, + "eval_sciq_pairs_runtime": 3.3616, + "eval_sciq_pairs_samples_per_second": 38.077, + "eval_sciq_pairs_steps_per_second": 0.297, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_qasc_pairs_loss": 4.685440540313721, + "eval_qasc_pairs_runtime": 0.5973, + "eval_qasc_pairs_samples_per_second": 214.304, + "eval_qasc_pairs_steps_per_second": 1.674, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_openbookqa_pairs_loss": 5.424518585205078, + "eval_openbookqa_pairs_runtime": 0.5716, + "eval_openbookqa_pairs_samples_per_second": 223.932, + "eval_openbookqa_pairs_steps_per_second": 1.749, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_msmarco_pairs_loss": 13.714217185974121, + "eval_msmarco_pairs_runtime": 1.5089, + "eval_msmarco_pairs_samples_per_second": 84.831, + "eval_msmarco_pairs_steps_per_second": 0.663, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_nq_pairs_loss": 12.863033294677734, + "eval_nq_pairs_runtime": 2.8862, + "eval_nq_pairs_samples_per_second": 44.35, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_trivia_pairs_loss": 8.43865966796875, + "eval_trivia_pairs_runtime": 3.4314, + "eval_trivia_pairs_samples_per_second": 37.303, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_gooaq_pairs_loss": 9.148645401000977, + "eval_gooaq_pairs_runtime": 0.9461, + "eval_gooaq_pairs_samples_per_second": 135.299, + "eval_gooaq_pairs_steps_per_second": 1.057, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_paws-pos_loss": 1.288989543914795, + "eval_paws-pos_runtime": 0.6728, + "eval_paws-pos_samples_per_second": 190.262, + "eval_paws-pos_steps_per_second": 1.486, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_global_dataset_loss": 6.3770952224731445, + "eval_global_dataset_runtime": 13.329, + "eval_global_dataset_samples_per_second": 31.21, + "eval_global_dataset_steps_per_second": 0.3, + "step": 40 + }, + { + "epoch": 0.04218106995884774, + "grad_norm": 58.63786315917969, + "learning_rate": 1.3811007268951193e-06, + "loss": 8.8223, + "step": 41 + }, + { + "epoch": 0.043209876543209874, + "grad_norm": 19.849580764770508, + "learning_rate": 1.4174454828660433e-06, + "loss": 5.7515, + "step": 42 + }, + { + "epoch": 0.044238683127572016, + "grad_norm": 32.95113754272461, + "learning_rate": 1.4537902388369678e-06, + "loss": 6.6943, + "step": 43 + }, + { + "epoch": 0.04526748971193416, + "grad_norm": 115.43840026855469, + "learning_rate": 1.4901349948078918e-06, + "loss": 12.7157, + "step": 44 + }, + { + "epoch": 0.046296296296296294, + "grad_norm": 16.027889251708984, + "learning_rate": 1.526479750778816e-06, + "loss": 5.729, + "step": 45 + }, + { + "epoch": 0.047325102880658436, + "grad_norm": 55.49090576171875, + "learning_rate": 1.5628245067497403e-06, + "loss": 8.843, + "step": 46 + }, + { + "epoch": 0.04835390946502058, + "grad_norm": 20.623491287231445, + "learning_rate": 1.5991692627206645e-06, + "loss": 6.8743, + "step": 47 + }, + { + "epoch": 0.04938271604938271, + "grad_norm": 19.15467643737793, + "learning_rate": 1.6355140186915885e-06, + "loss": 5.5015, + "step": 48 + }, + { + "epoch": 0.050411522633744855, + "grad_norm": 24.568716049194336, + "learning_rate": 1.671858774662513e-06, + "loss": 2.7761, + "step": 49 + }, + { + "epoch": 0.051440329218107, + "grad_norm": 17.216365814208984, + "learning_rate": 1.708203530633437e-06, + "loss": 4.6785, + "step": 50 + }, + { + "epoch": 0.05246913580246913, + "grad_norm": 27.83530616760254, + "learning_rate": 1.7445482866043612e-06, + "loss": 6.6596, + "step": 51 + }, + { + "epoch": 0.053497942386831275, + "grad_norm": 14.741978645324707, + "learning_rate": 1.7808930425752854e-06, + "loss": 5.4409, + "step": 52 + }, + { + "epoch": 0.05452674897119342, + "grad_norm": 27.180707931518555, + "learning_rate": 1.8172377985462097e-06, + "loss": 6.3967, + "step": 53 + }, + { + "epoch": 0.05555555555555555, + "grad_norm": 26.400497436523438, + "learning_rate": 1.8535825545171337e-06, + "loss": 6.3174, + "step": 54 + }, + { + "epoch": 0.056584362139917695, + "grad_norm": 19.098752975463867, + "learning_rate": 1.889927310488058e-06, + "loss": 5.5442, + "step": 55 + }, + { + "epoch": 0.05761316872427984, + "grad_norm": 21.40766716003418, + "learning_rate": 1.9262720664589824e-06, + "loss": 5.9004, + "step": 56 + }, + { + "epoch": 0.05864197530864197, + "grad_norm": 25.238555908203125, + "learning_rate": 1.9626168224299064e-06, + "loss": 2.9543, + "step": 57 + }, + { + "epoch": 0.059670781893004114, + "grad_norm": 21.333162307739258, + "learning_rate": 1.9989615784008304e-06, + "loss": 6.4092, + "step": 58 + }, + { + "epoch": 0.060699588477366256, + "grad_norm": 24.3674373626709, + "learning_rate": 2.035306334371755e-06, + "loss": 3.083, + "step": 59 + }, + { + "epoch": 0.06172839506172839, + "grad_norm": 77.95449829101562, + "learning_rate": 2.071651090342679e-06, + "loss": 10.6811, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_Qnli-dev_cosine_accuracy": 0.591796875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.9277275800704956, + "eval_Qnli-dev_cosine_ap": 0.5663256424153785, + "eval_Qnli-dev_cosine_f1": 0.6302864938608458, + "eval_Qnli-dev_cosine_f1_threshold": 0.7895882725715637, + "eval_Qnli-dev_cosine_precision": 0.4647887323943662, + "eval_Qnli-dev_cosine_recall": 0.9788135593220338, + "eval_Qnli-dev_dot_accuracy": 0.58203125, + "eval_Qnli-dev_dot_accuracy_threshold": 386.7135009765625, + "eval_Qnli-dev_dot_ap": 0.5015283426358628, + "eval_Qnli-dev_dot_f1": 0.6291834002677376, + "eval_Qnli-dev_dot_f1_threshold": 233.70668029785156, + "eval_Qnli-dev_dot_precision": 0.4598825831702544, + "eval_Qnli-dev_dot_recall": 0.9957627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.591796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 7.363377571105957, + "eval_Qnli-dev_euclidean_ap": 0.5754970319906212, + "eval_Qnli-dev_euclidean_f1": 0.6322930800542741, + "eval_Qnli-dev_euclidean_f1_threshold": 13.553762435913086, + "eval_Qnli-dev_euclidean_precision": 0.46506986027944114, + "eval_Qnli-dev_euclidean_recall": 0.9872881355932204, + "eval_Qnli-dev_manhattan_accuracy": 0.62890625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 166.58721923828125, + "eval_Qnli-dev_manhattan_ap": 0.6222630621246192, + "eval_Qnli-dev_manhattan_f1": 0.6346483704974271, + "eval_Qnli-dev_manhattan_f1_threshold": 178.5355224609375, + "eval_Qnli-dev_manhattan_precision": 0.5331412103746398, + "eval_Qnli-dev_manhattan_recall": 0.7838983050847458, + "eval_Qnli-dev_max_accuracy": 0.62890625, + "eval_Qnli-dev_max_accuracy_threshold": 386.7135009765625, + "eval_Qnli-dev_max_ap": 0.6222630621246192, + "eval_Qnli-dev_max_f1": 0.6346483704974271, + "eval_Qnli-dev_max_f1_threshold": 233.70668029785156, + "eval_Qnli-dev_max_precision": 0.5331412103746398, + "eval_Qnli-dev_max_recall": 0.9957627118644068, + "eval_allNLI-dev_cosine_accuracy": 0.6640625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9866780042648315, + "eval_allNLI-dev_cosine_ap": 0.3639636732129889, + "eval_allNLI-dev_cosine_f1": 0.5089285714285714, + "eval_allNLI-dev_cosine_f1_threshold": 0.7668333053588867, + "eval_allNLI-dev_cosine_precision": 0.342685370741483, + "eval_allNLI-dev_cosine_recall": 0.9884393063583815, + "eval_allNLI-dev_dot_accuracy": 0.662109375, + "eval_allNLI-dev_dot_accuracy_threshold": 494.3717956542969, + "eval_allNLI-dev_dot_ap": 0.3315337132453944, + "eval_allNLI-dev_dot_f1": 0.5065885797950219, + "eval_allNLI-dev_dot_f1_threshold": 322.2677001953125, + "eval_allNLI-dev_dot_precision": 0.3392156862745098, + "eval_allNLI-dev_dot_recall": 1.0, + "eval_allNLI-dev_euclidean_accuracy": 0.66796875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 4.485382080078125, + "eval_allNLI-dev_euclidean_ap": 0.36871559335230386, + "eval_allNLI-dev_euclidean_f1": 0.5096870342771982, + "eval_allNLI-dev_euclidean_f1_threshold": 14.86199951171875, + "eval_allNLI-dev_euclidean_precision": 0.3433734939759036, + "eval_allNLI-dev_euclidean_recall": 0.9884393063583815, + "eval_allNLI-dev_manhattan_accuracy": 0.6640625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 57.50782012939453, + "eval_allNLI-dev_manhattan_ap": 0.3800340904251822, + "eval_allNLI-dev_manhattan_f1": 0.5074183976261127, + "eval_allNLI-dev_manhattan_f1_threshold": 237.91455078125, + "eval_allNLI-dev_manhattan_precision": 0.3413173652694611, + "eval_allNLI-dev_manhattan_recall": 0.9884393063583815, + "eval_allNLI-dev_max_accuracy": 0.66796875, + "eval_allNLI-dev_max_accuracy_threshold": 494.3717956542969, + "eval_allNLI-dev_max_ap": 0.3800340904251822, + "eval_allNLI-dev_max_f1": 0.5096870342771982, + "eval_allNLI-dev_max_f1_threshold": 322.2677001953125, + "eval_allNLI-dev_max_precision": 0.3433734939759036, + "eval_allNLI-dev_max_recall": 1.0, + "eval_sequential_score": 0.6222630621246192, + "eval_sts-test_pearson_cosine": 0.18291082738946113, + "eval_sts-test_pearson_dot": 0.316123119088567, + "eval_sts-test_pearson_euclidean": 0.16506396318167735, + "eval_sts-test_pearson_manhattan": 0.20347659235425056, + "eval_sts-test_pearson_max": 0.316123119088567, + "eval_sts-test_spearman_cosine": 0.22625803672256098, + "eval_sts-test_spearman_dot": 0.32449976483491805, + "eval_sts-test_spearman_euclidean": 0.18659512800514774, + "eval_sts-test_spearman_manhattan": 0.2238469730125765, + "eval_sts-test_spearman_max": 0.32449976483491805, + "eval_vitaminc-pairs_loss": 2.9115335941314697, + "eval_vitaminc-pairs_runtime": 3.1703, + "eval_vitaminc-pairs_samples_per_second": 40.374, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_negation-triplets_loss": 4.1277852058410645, + "eval_negation-triplets_runtime": 0.7484, + "eval_negation-triplets_samples_per_second": 171.033, + "eval_negation-triplets_steps_per_second": 1.336, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_scitail-pairs-pos_loss": 1.9703718423843384, + "eval_scitail-pairs-pos_runtime": 0.8012, + "eval_scitail-pairs-pos_samples_per_second": 159.764, + "eval_scitail-pairs-pos_steps_per_second": 1.248, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_scitail-pairs-qa_loss": 2.240999221801758, + "eval_scitail-pairs-qa_runtime": 0.5627, + "eval_scitail-pairs-qa_samples_per_second": 227.467, + "eval_scitail-pairs-qa_steps_per_second": 1.777, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_xsum-pairs_loss": 6.690690994262695, + "eval_xsum-pairs_runtime": 3.0003, + "eval_xsum-pairs_samples_per_second": 42.663, + "eval_xsum-pairs_steps_per_second": 0.333, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_sciq_pairs_loss": 0.703199565410614, + "eval_sciq_pairs_runtime": 3.4121, + "eval_sciq_pairs_samples_per_second": 37.513, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_qasc_pairs_loss": 4.066890239715576, + "eval_qasc_pairs_runtime": 0.6223, + "eval_qasc_pairs_samples_per_second": 205.675, + "eval_qasc_pairs_steps_per_second": 1.607, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_openbookqa_pairs_loss": 5.092636585235596, + "eval_openbookqa_pairs_runtime": 0.5896, + "eval_openbookqa_pairs_samples_per_second": 217.085, + "eval_openbookqa_pairs_steps_per_second": 1.696, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_msmarco_pairs_loss": 11.276179313659668, + "eval_msmarco_pairs_runtime": 1.5132, + "eval_msmarco_pairs_samples_per_second": 84.591, + "eval_msmarco_pairs_steps_per_second": 0.661, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_nq_pairs_loss": 10.514344215393066, + "eval_nq_pairs_runtime": 2.9064, + "eval_nq_pairs_samples_per_second": 44.041, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_trivia_pairs_loss": 7.659719467163086, + "eval_trivia_pairs_runtime": 3.436, + "eval_trivia_pairs_samples_per_second": 37.253, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_gooaq_pairs_loss": 7.905792236328125, + "eval_gooaq_pairs_runtime": 0.9586, + "eval_gooaq_pairs_samples_per_second": 133.534, + "eval_gooaq_pairs_steps_per_second": 1.043, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_paws-pos_loss": 1.2882633209228516, + "eval_paws-pos_runtime": 0.6775, + "eval_paws-pos_samples_per_second": 188.929, + "eval_paws-pos_steps_per_second": 1.476, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_global_dataset_loss": 5.7225518226623535, + "eval_global_dataset_runtime": 13.3827, + "eval_global_dataset_samples_per_second": 31.085, + "eval_global_dataset_steps_per_second": 0.299, + "step": 60 + }, + { + "epoch": 0.06275720164609054, + "grad_norm": 18.45367431640625, + "learning_rate": 2.107995846313603e-06, + "loss": 4.6024, + "step": 61 + }, + { + "epoch": 0.06378600823045268, + "grad_norm": 17.7939510345459, + "learning_rate": 2.1443406022845273e-06, + "loss": 4.4378, + "step": 62 + }, + { + "epoch": 0.06481481481481481, + "grad_norm": 19.061763763427734, + "learning_rate": 2.1806853582554513e-06, + "loss": 6.4802, + "step": 63 + }, + { + "epoch": 0.06584362139917696, + "grad_norm": 77.05914306640625, + "learning_rate": 2.2170301142263758e-06, + "loss": 10.9004, + "step": 64 + }, + { + "epoch": 0.0668724279835391, + "grad_norm": 20.099227905273438, + "learning_rate": 2.2533748701973e-06, + "loss": 6.7516, + "step": 65 + }, + { + "epoch": 0.06790123456790123, + "grad_norm": 53.35956573486328, + "learning_rate": 2.289719626168224e-06, + "loss": 7.7821, + "step": 66 + }, + { + "epoch": 0.06893004115226338, + "grad_norm": 23.51174545288086, + "learning_rate": 2.3260643821391483e-06, + "loss": 6.1714, + "step": 67 + }, + { + "epoch": 0.06995884773662552, + "grad_norm": 11.979568481445312, + "learning_rate": 2.3624091381100727e-06, + "loss": 5.3013, + "step": 68 + }, + { + "epoch": 0.07098765432098765, + "grad_norm": 50.14888381958008, + "learning_rate": 2.3987538940809963e-06, + "loss": 9.0397, + "step": 69 + }, + { + "epoch": 0.0720164609053498, + "grad_norm": 52.993473052978516, + "learning_rate": 2.4350986500519208e-06, + "loss": 9.3361, + "step": 70 + }, + { + "epoch": 0.07304526748971193, + "grad_norm": 16.7055721282959, + "learning_rate": 2.471443406022845e-06, + "loss": 5.1927, + "step": 71 + }, + { + "epoch": 0.07407407407407407, + "grad_norm": 17.894912719726562, + "learning_rate": 2.5077881619937692e-06, + "loss": 5.6994, + "step": 72 + }, + { + "epoch": 0.07510288065843622, + "grad_norm": 29.04665184020996, + "learning_rate": 2.5441329179646932e-06, + "loss": 7.5132, + "step": 73 + }, + { + "epoch": 0.07613168724279835, + "grad_norm": 14.857793807983398, + "learning_rate": 2.5804776739356177e-06, + "loss": 5.4796, + "step": 74 + }, + { + "epoch": 0.07716049382716049, + "grad_norm": 24.775344848632812, + "learning_rate": 2.6168224299065417e-06, + "loss": 2.7714, + "step": 75 + }, + { + "epoch": 0.07818930041152264, + "grad_norm": 49.390663146972656, + "learning_rate": 2.653167185877466e-06, + "loss": 8.9842, + "step": 76 + }, + { + "epoch": 0.07921810699588477, + "grad_norm": 65.65110778808594, + "learning_rate": 2.68951194184839e-06, + "loss": 10.1764, + "step": 77 + }, + { + "epoch": 0.08024691358024691, + "grad_norm": 13.745916366577148, + "learning_rate": 2.725856697819314e-06, + "loss": 5.0512, + "step": 78 + }, + { + "epoch": 0.08127572016460906, + "grad_norm": 14.591425895690918, + "learning_rate": 2.7622014537902386e-06, + "loss": 5.5013, + "step": 79 + }, + { + "epoch": 0.0823045267489712, + "grad_norm": 14.892078399658203, + "learning_rate": 2.798546209761163e-06, + "loss": 5.4496, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_Qnli-dev_cosine_accuracy": 0.591796875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.9341771602630615, + "eval_Qnli-dev_cosine_ap": 0.576947319869376, + "eval_Qnli-dev_cosine_f1": 0.6346153846153846, + "eval_Qnli-dev_cosine_f1_threshold": 0.8134556412696838, + "eval_Qnli-dev_cosine_precision": 0.4695121951219512, + "eval_Qnli-dev_cosine_recall": 0.9788135593220338, + "eval_Qnli-dev_dot_accuracy": 0.580078125, + "eval_Qnli-dev_dot_accuracy_threshold": 388.09979248046875, + "eval_Qnli-dev_dot_ap": 0.5032087471570361, + "eval_Qnli-dev_dot_f1": 0.6291834002677376, + "eval_Qnli-dev_dot_f1_threshold": 230.6592254638672, + "eval_Qnli-dev_dot_precision": 0.4598825831702544, + "eval_Qnli-dev_dot_recall": 0.9957627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.6015625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 7.462021827697754, + "eval_Qnli-dev_euclidean_ap": 0.5838970485633856, + "eval_Qnli-dev_euclidean_f1": 0.6344827586206897, + "eval_Qnli-dev_euclidean_f1_threshold": 12.409799575805664, + "eval_Qnli-dev_euclidean_precision": 0.4703476482617587, + "eval_Qnli-dev_euclidean_recall": 0.9745762711864406, + "eval_Qnli-dev_manhattan_accuracy": 0.62890625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 163.1259002685547, + "eval_Qnli-dev_manhattan_ap": 0.630845221732911, + "eval_Qnli-dev_manhattan_f1": 0.6355421686746988, + "eval_Qnli-dev_manhattan_f1_threshold": 187.08981323242188, + "eval_Qnli-dev_manhattan_precision": 0.4929906542056075, + "eval_Qnli-dev_manhattan_recall": 0.8940677966101694, + "eval_Qnli-dev_max_accuracy": 0.62890625, + "eval_Qnli-dev_max_accuracy_threshold": 388.09979248046875, + "eval_Qnli-dev_max_ap": 0.630845221732911, + "eval_Qnli-dev_max_f1": 0.6355421686746988, + "eval_Qnli-dev_max_f1_threshold": 230.6592254638672, + "eval_Qnli-dev_max_precision": 0.4929906542056075, + "eval_Qnli-dev_max_recall": 0.9957627118644068, + "eval_allNLI-dev_cosine_accuracy": 0.666015625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.97718346118927, + "eval_allNLI-dev_cosine_ap": 0.3756015628227595, + "eval_allNLI-dev_cosine_f1": 0.5068702290076336, + "eval_allNLI-dev_cosine_f1_threshold": 0.8239856958389282, + "eval_allNLI-dev_cosine_precision": 0.34439834024896265, + "eval_allNLI-dev_cosine_recall": 0.9595375722543352, + "eval_allNLI-dev_dot_accuracy": 0.662109375, + "eval_allNLI-dev_dot_accuracy_threshold": 495.46832275390625, + "eval_allNLI-dev_dot_ap": 0.332020946884521, + "eval_allNLI-dev_dot_f1": 0.5036603221083455, + "eval_allNLI-dev_dot_f1_threshold": 312.1241760253906, + "eval_allNLI-dev_dot_precision": 0.33725490196078434, + "eval_allNLI-dev_dot_recall": 0.9942196531791907, + "eval_allNLI-dev_euclidean_accuracy": 0.66796875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 4.271183013916016, + "eval_allNLI-dev_euclidean_ap": 0.3777199146320434, + "eval_allNLI-dev_euclidean_f1": 0.5091463414634146, + "eval_allNLI-dev_euclidean_f1_threshold": 12.89515209197998, + "eval_allNLI-dev_euclidean_precision": 0.34575569358178054, + "eval_allNLI-dev_euclidean_recall": 0.9653179190751445, + "eval_allNLI-dev_manhattan_accuracy": 0.666015625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 88.01801300048828, + "eval_allNLI-dev_manhattan_ap": 0.38882171851359393, + "eval_allNLI-dev_manhattan_f1": 0.5068285280728376, + "eval_allNLI-dev_manhattan_f1_threshold": 208.61183166503906, + "eval_allNLI-dev_manhattan_precision": 0.3436213991769547, + "eval_allNLI-dev_manhattan_recall": 0.9653179190751445, + "eval_allNLI-dev_max_accuracy": 0.66796875, + "eval_allNLI-dev_max_accuracy_threshold": 495.46832275390625, + "eval_allNLI-dev_max_ap": 0.38882171851359393, + "eval_allNLI-dev_max_f1": 0.5091463414634146, + "eval_allNLI-dev_max_f1_threshold": 312.1241760253906, + "eval_allNLI-dev_max_precision": 0.34575569358178054, + "eval_allNLI-dev_max_recall": 0.9942196531791907, + "eval_sequential_score": 0.630845221732911, + "eval_sts-test_pearson_cosine": 0.2082090854077526, + "eval_sts-test_pearson_dot": 0.31968251773175477, + "eval_sts-test_pearson_euclidean": 0.18565762314607082, + "eval_sts-test_pearson_manhattan": 0.21750467365326087, + "eval_sts-test_pearson_max": 0.31968251773175477, + "eval_sts-test_spearman_cosine": 0.2475509554001572, + "eval_sts-test_spearman_dot": 0.32583854357070313, + "eval_sts-test_spearman_euclidean": 0.20592825469263046, + "eval_sts-test_spearman_manhattan": 0.23787152606876585, + "eval_sts-test_spearman_max": 0.32583854357070313, + "eval_vitaminc-pairs_loss": 2.887739896774292, + "eval_vitaminc-pairs_runtime": 3.1934, + "eval_vitaminc-pairs_samples_per_second": 40.083, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_negation-triplets_loss": 3.878300666809082, + "eval_negation-triplets_runtime": 0.7531, + "eval_negation-triplets_samples_per_second": 169.96, + "eval_negation-triplets_steps_per_second": 1.328, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_scitail-pairs-pos_loss": 1.8523993492126465, + "eval_scitail-pairs-pos_runtime": 0.7885, + "eval_scitail-pairs-pos_samples_per_second": 162.341, + "eval_scitail-pairs-pos_steps_per_second": 1.268, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_scitail-pairs-qa_loss": 2.0656681060791016, + "eval_scitail-pairs-qa_runtime": 0.5722, + "eval_scitail-pairs-qa_samples_per_second": 223.714, + "eval_scitail-pairs-qa_steps_per_second": 1.748, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_xsum-pairs_loss": 6.511655807495117, + "eval_xsum-pairs_runtime": 3.0191, + "eval_xsum-pairs_samples_per_second": 42.397, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_sciq_pairs_loss": 0.6626698970794678, + "eval_sciq_pairs_runtime": 3.445, + "eval_sciq_pairs_samples_per_second": 37.156, + "eval_sciq_pairs_steps_per_second": 0.29, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_qasc_pairs_loss": 3.637084722518921, + "eval_qasc_pairs_runtime": 0.6139, + "eval_qasc_pairs_samples_per_second": 208.501, + "eval_qasc_pairs_steps_per_second": 1.629, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_openbookqa_pairs_loss": 4.819972038269043, + "eval_openbookqa_pairs_runtime": 0.5735, + "eval_openbookqa_pairs_samples_per_second": 223.194, + "eval_openbookqa_pairs_steps_per_second": 1.744, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_msmarco_pairs_loss": 9.547957420349121, + "eval_msmarco_pairs_runtime": 1.5165, + "eval_msmarco_pairs_samples_per_second": 84.404, + "eval_msmarco_pairs_steps_per_second": 0.659, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_nq_pairs_loss": 8.830368995666504, + "eval_nq_pairs_runtime": 2.8979, + "eval_nq_pairs_samples_per_second": 44.17, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_trivia_pairs_loss": 7.168319225311279, + "eval_trivia_pairs_runtime": 3.4425, + "eval_trivia_pairs_samples_per_second": 37.182, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_gooaq_pairs_loss": 7.121779918670654, + "eval_gooaq_pairs_runtime": 0.9493, + "eval_gooaq_pairs_samples_per_second": 134.835, + "eval_gooaq_pairs_steps_per_second": 1.053, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_paws-pos_loss": 1.2316638231277466, + "eval_paws-pos_runtime": 0.6943, + "eval_paws-pos_samples_per_second": 184.348, + "eval_paws-pos_steps_per_second": 1.44, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_global_dataset_loss": 5.2695698738098145, + "eval_global_dataset_runtime": 13.3665, + "eval_global_dataset_samples_per_second": 31.123, + "eval_global_dataset_steps_per_second": 0.299, + "step": 80 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 48.79065704345703, + "learning_rate": 2.8348909657320867e-06, + "loss": 9.1986, + "step": 81 + }, + { + "epoch": 0.08436213991769548, + "grad_norm": 13.215801239013672, + "learning_rate": 2.871235721703011e-06, + "loss": 5.5779, + "step": 82 + }, + { + "epoch": 0.08539094650205761, + "grad_norm": 14.010391235351562, + "learning_rate": 2.9075804776739355e-06, + "loss": 5.446, + "step": 83 + }, + { + "epoch": 0.08641975308641975, + "grad_norm": 14.401032447814941, + "learning_rate": 2.9439252336448596e-06, + "loss": 5.5707, + "step": 84 + }, + { + "epoch": 0.0874485596707819, + "grad_norm": 11.690423011779785, + "learning_rate": 2.9802699896157836e-06, + "loss": 5.064, + "step": 85 + }, + { + "epoch": 0.08847736625514403, + "grad_norm": 14.510086059570312, + "learning_rate": 3.016614745586708e-06, + "loss": 5.1192, + "step": 86 + }, + { + "epoch": 0.08950617283950617, + "grad_norm": 11.915549278259277, + "learning_rate": 3.052959501557632e-06, + "loss": 5.0992, + "step": 87 + }, + { + "epoch": 0.09053497942386832, + "grad_norm": 11.183893203735352, + "learning_rate": 3.0893042575285565e-06, + "loss": 5.1639, + "step": 88 + }, + { + "epoch": 0.09156378600823045, + "grad_norm": 23.76273536682129, + "learning_rate": 3.1256490134994805e-06, + "loss": 6.4692, + "step": 89 + }, + { + "epoch": 0.09259259259259259, + "grad_norm": 13.50161075592041, + "learning_rate": 3.1619937694704045e-06, + "loss": 5.1285, + "step": 90 + }, + { + "epoch": 0.09362139917695474, + "grad_norm": 25.397741317749023, + "learning_rate": 3.198338525441329e-06, + "loss": 2.8464, + "step": 91 + }, + { + "epoch": 0.09465020576131687, + "grad_norm": 12.421465873718262, + "learning_rate": 3.2346832814122534e-06, + "loss": 4.9592, + "step": 92 + }, + { + "epoch": 0.09567901234567901, + "grad_norm": 12.573847770690918, + "learning_rate": 3.271028037383177e-06, + "loss": 5.1014, + "step": 93 + }, + { + "epoch": 0.09670781893004116, + "grad_norm": 34.48383331298828, + "learning_rate": 3.3073727933541015e-06, + "loss": 8.0528, + "step": 94 + }, + { + "epoch": 0.09773662551440329, + "grad_norm": 22.98038673400879, + "learning_rate": 3.343717549325026e-06, + "loss": 6.5803, + "step": 95 + }, + { + "epoch": 0.09876543209876543, + "grad_norm": 18.991193771362305, + "learning_rate": 3.38006230529595e-06, + "loss": 3.9696, + "step": 96 + }, + { + "epoch": 0.09979423868312758, + "grad_norm": 14.325688362121582, + "learning_rate": 3.416407061266874e-06, + "loss": 5.2509, + "step": 97 + }, + { + "epoch": 0.10082304526748971, + "grad_norm": 32.09270095825195, + "learning_rate": 3.4527518172377984e-06, + "loss": 7.8866, + "step": 98 + }, + { + "epoch": 0.10185185185185185, + "grad_norm": 28.032167434692383, + "learning_rate": 3.4890965732087224e-06, + "loss": 2.4669, + "step": 99 + }, + { + "epoch": 0.102880658436214, + "grad_norm": 19.722026824951172, + "learning_rate": 3.525441329179647e-06, + "loss": 6.8252, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_Qnli-dev_cosine_accuracy": 0.60546875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.9359708428382874, + "eval_Qnli-dev_cosine_ap": 0.5859495580627428, + "eval_Qnli-dev_cosine_f1": 0.6305278174037089, + "eval_Qnli-dev_cosine_f1_threshold": 0.8434731960296631, + "eval_Qnli-dev_cosine_precision": 0.4752688172043011, + "eval_Qnli-dev_cosine_recall": 0.9364406779661016, + "eval_Qnli-dev_dot_accuracy": 0.58203125, + "eval_Qnli-dev_dot_accuracy_threshold": 392.71923828125, + "eval_Qnli-dev_dot_ap": 0.5087577253973941, + "eval_Qnli-dev_dot_f1": 0.6291834002677376, + "eval_Qnli-dev_dot_f1_threshold": 236.47132873535156, + "eval_Qnli-dev_dot_precision": 0.4598825831702544, + "eval_Qnli-dev_dot_recall": 0.9957627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.603515625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 7.414036750793457, + "eval_Qnli-dev_euclidean_ap": 0.59330387039405, + "eval_Qnli-dev_euclidean_f1": 0.6291834002677376, + "eval_Qnli-dev_euclidean_f1_threshold": 18.49761962890625, + "eval_Qnli-dev_euclidean_precision": 0.4598825831702544, + "eval_Qnli-dev_euclidean_recall": 0.9957627118644068, + "eval_Qnli-dev_manhattan_accuracy": 0.6328125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 158.1238555908203, + "eval_Qnli-dev_manhattan_ap": 0.636242439203504, + "eval_Qnli-dev_manhattan_f1": 0.640746500777605, + "eval_Qnli-dev_manhattan_f1_threshold": 185.45480346679688, + "eval_Qnli-dev_manhattan_precision": 0.5061425061425061, + "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, + "eval_Qnli-dev_max_accuracy": 0.6328125, + "eval_Qnli-dev_max_accuracy_threshold": 392.71923828125, + "eval_Qnli-dev_max_ap": 0.636242439203504, + "eval_Qnli-dev_max_f1": 0.640746500777605, + "eval_Qnli-dev_max_f1_threshold": 236.47132873535156, + "eval_Qnli-dev_max_precision": 0.5061425061425061, + "eval_Qnli-dev_max_recall": 0.9957627118644068, + "eval_allNLI-dev_cosine_accuracy": 0.671875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9702135324478149, + "eval_allNLI-dev_cosine_ap": 0.38635245174664545, + "eval_allNLI-dev_cosine_f1": 0.5051698670605613, + "eval_allNLI-dev_cosine_f1_threshold": 0.7689170837402344, + "eval_allNLI-dev_cosine_precision": 0.3392857142857143, + "eval_allNLI-dev_cosine_recall": 0.9884393063583815, + "eval_allNLI-dev_dot_accuracy": 0.662109375, + "eval_allNLI-dev_dot_accuracy_threshold": 499.2386779785156, + "eval_allNLI-dev_dot_ap": 0.33354410040006655, + "eval_allNLI-dev_dot_f1": 0.5036603221083455, + "eval_allNLI-dev_dot_f1_threshold": 310.7790222167969, + "eval_allNLI-dev_dot_precision": 0.33725490196078434, + "eval_allNLI-dev_dot_recall": 0.9942196531791907, + "eval_allNLI-dev_euclidean_accuracy": 0.669921875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 5.469601154327393, + "eval_allNLI-dev_euclidean_ap": 0.38522819959781573, + "eval_allNLI-dev_euclidean_f1": 0.5059171597633136, + "eval_allNLI-dev_euclidean_f1_threshold": 14.400506019592285, + "eval_allNLI-dev_euclidean_precision": 0.3399602385685885, + "eval_allNLI-dev_euclidean_recall": 0.9884393063583815, + "eval_allNLI-dev_manhattan_accuracy": 0.66796875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 83.476806640625, + "eval_allNLI-dev_manhattan_ap": 0.398882755775317, + "eval_allNLI-dev_manhattan_f1": 0.5051395007342143, + "eval_allNLI-dev_manhattan_f1_threshold": 256.5009765625, + "eval_allNLI-dev_manhattan_precision": 0.33858267716535434, + "eval_allNLI-dev_manhattan_recall": 0.9942196531791907, + "eval_allNLI-dev_max_accuracy": 0.671875, + "eval_allNLI-dev_max_accuracy_threshold": 499.2386779785156, + "eval_allNLI-dev_max_ap": 0.398882755775317, + "eval_allNLI-dev_max_f1": 0.5059171597633136, + "eval_allNLI-dev_max_f1_threshold": 310.7790222167969, + "eval_allNLI-dev_max_precision": 0.3399602385685885, + "eval_allNLI-dev_max_recall": 0.9942196531791907, + "eval_sequential_score": 0.636242439203504, + "eval_sts-test_pearson_cosine": 0.233326009931931, + "eval_sts-test_pearson_dot": 0.3167806500856212, + "eval_sts-test_pearson_euclidean": 0.20945664323942717, + "eval_sts-test_pearson_manhattan": 0.23559165515257938, + "eval_sts-test_pearson_max": 0.3167806500856212, + "eval_sts-test_spearman_cosine": 0.2687911570918344, + "eval_sts-test_spearman_dot": 0.32229956906860985, + "eval_sts-test_spearman_euclidean": 0.22929892968536797, + "eval_sts-test_spearman_manhattan": 0.25574708751351516, + "eval_sts-test_spearman_max": 0.32229956906860985, + "eval_vitaminc-pairs_loss": 2.8645708560943604, + "eval_vitaminc-pairs_runtime": 3.1781, + "eval_vitaminc-pairs_samples_per_second": 40.275, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_negation-triplets_loss": 3.715083599090576, + "eval_negation-triplets_runtime": 0.7412, + "eval_negation-triplets_samples_per_second": 172.701, + "eval_negation-triplets_steps_per_second": 1.349, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_scitail-pairs-pos_loss": 1.6467901468276978, + "eval_scitail-pairs-pos_runtime": 0.828, + "eval_scitail-pairs-pos_samples_per_second": 154.583, + "eval_scitail-pairs-pos_steps_per_second": 1.208, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_scitail-pairs-qa_loss": 1.8661956787109375, + "eval_scitail-pairs-qa_runtime": 0.5663, + "eval_scitail-pairs-qa_samples_per_second": 226.026, + "eval_scitail-pairs-qa_steps_per_second": 1.766, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_xsum-pairs_loss": 6.297423839569092, + "eval_xsum-pairs_runtime": 3.0214, + "eval_xsum-pairs_samples_per_second": 42.364, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_sciq_pairs_loss": 0.6386430263519287, + "eval_sciq_pairs_runtime": 3.404, + "eval_sciq_pairs_samples_per_second": 37.603, + "eval_sciq_pairs_steps_per_second": 0.294, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_qasc_pairs_loss": 3.3296892642974854, + "eval_qasc_pairs_runtime": 0.5903, + "eval_qasc_pairs_samples_per_second": 216.831, + "eval_qasc_pairs_steps_per_second": 1.694, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_openbookqa_pairs_loss": 4.621798038482666, + "eval_openbookqa_pairs_runtime": 0.5726, + "eval_openbookqa_pairs_samples_per_second": 223.561, + "eval_openbookqa_pairs_steps_per_second": 1.747, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_msmarco_pairs_loss": 8.393180847167969, + "eval_msmarco_pairs_runtime": 1.5114, + "eval_msmarco_pairs_samples_per_second": 84.687, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_nq_pairs_loss": 7.865816116333008, + "eval_nq_pairs_runtime": 2.888, + "eval_nq_pairs_samples_per_second": 44.321, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_trivia_pairs_loss": 6.768343925476074, + "eval_trivia_pairs_runtime": 3.4313, + "eval_trivia_pairs_samples_per_second": 37.303, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_gooaq_pairs_loss": 6.616071701049805, + "eval_gooaq_pairs_runtime": 0.9398, + "eval_gooaq_pairs_samples_per_second": 136.205, + "eval_gooaq_pairs_steps_per_second": 1.064, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_paws-pos_loss": 1.1048204898834229, + "eval_paws-pos_runtime": 0.6813, + "eval_paws-pos_samples_per_second": 187.888, + "eval_paws-pos_steps_per_second": 1.468, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_global_dataset_loss": 4.85481595993042, + "eval_global_dataset_runtime": 13.3418, + "eval_global_dataset_samples_per_second": 31.18, + "eval_global_dataset_steps_per_second": 0.3, + "step": 100 + }, + { + "epoch": 0.10390946502057613, + "grad_norm": 16.656429290771484, + "learning_rate": 3.561786085150571e-06, + "loss": 5.3891, + "step": 101 + }, + { + "epoch": 0.10493827160493827, + "grad_norm": 35.718448638916016, + "learning_rate": 3.598130841121495e-06, + "loss": 8.2981, + "step": 102 + }, + { + "epoch": 0.10596707818930041, + "grad_norm": 33.725162506103516, + "learning_rate": 3.6344755970924193e-06, + "loss": 8.0844, + "step": 103 + }, + { + "epoch": 0.10699588477366255, + "grad_norm": 19.359039306640625, + "learning_rate": 3.6708203530633433e-06, + "loss": 4.0626, + "step": 104 + }, + { + "epoch": 0.10802469135802469, + "grad_norm": 14.194345474243164, + "learning_rate": 3.7071651090342674e-06, + "loss": 4.8851, + "step": 105 + }, + { + "epoch": 0.10905349794238683, + "grad_norm": 14.311044692993164, + "learning_rate": 3.743509865005192e-06, + "loss": 5.1174, + "step": 106 + }, + { + "epoch": 0.11008230452674897, + "grad_norm": 13.10085678100586, + "learning_rate": 3.779854620976116e-06, + "loss": 4.973, + "step": 107 + }, + { + "epoch": 0.1111111111111111, + "grad_norm": 14.114293098449707, + "learning_rate": 3.81619937694704e-06, + "loss": 5.2879, + "step": 108 + }, + { + "epoch": 0.11213991769547325, + "grad_norm": 13.561037063598633, + "learning_rate": 3.852544132917965e-06, + "loss": 4.9833, + "step": 109 + }, + { + "epoch": 0.11316872427983539, + "grad_norm": 14.047689437866211, + "learning_rate": 3.888888888888889e-06, + "loss": 5.1619, + "step": 110 + }, + { + "epoch": 0.11419753086419752, + "grad_norm": 37.19677734375, + "learning_rate": 3.925233644859813e-06, + "loss": 8.2926, + "step": 111 + }, + { + "epoch": 0.11522633744855967, + "grad_norm": 12.727770805358887, + "learning_rate": 3.961578400830737e-06, + "loss": 4.7478, + "step": 112 + }, + { + "epoch": 0.11625514403292181, + "grad_norm": 12.889444351196289, + "learning_rate": 3.997923156801661e-06, + "loss": 4.7644, + "step": 113 + }, + { + "epoch": 0.11728395061728394, + "grad_norm": 20.46539878845215, + "learning_rate": 4.034267912772586e-06, + "loss": 6.379, + "step": 114 + }, + { + "epoch": 0.1183127572016461, + "grad_norm": 14.000577926635742, + "learning_rate": 4.07061266874351e-06, + "loss": 4.8567, + "step": 115 + }, + { + "epoch": 0.11934156378600823, + "grad_norm": 14.417937278747559, + "learning_rate": 4.106957424714434e-06, + "loss": 4.939, + "step": 116 + }, + { + "epoch": 0.12037037037037036, + "grad_norm": 36.433433532714844, + "learning_rate": 4.143302180685358e-06, + "loss": 6.4653, + "step": 117 + }, + { + "epoch": 0.12139917695473251, + "grad_norm": 15.122117042541504, + "learning_rate": 4.179646936656283e-06, + "loss": 5.0902, + "step": 118 + }, + { + "epoch": 0.12242798353909465, + "grad_norm": 15.600722312927246, + "learning_rate": 4.215991692627206e-06, + "loss": 4.4886, + "step": 119 + }, + { + "epoch": 0.12345679012345678, + "grad_norm": 18.391870498657227, + "learning_rate": 4.252336448598131e-06, + "loss": 6.2223, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_Qnli-dev_cosine_accuracy": 0.619140625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.9281325340270996, + "eval_Qnli-dev_cosine_ap": 0.6014574414783835, + "eval_Qnli-dev_cosine_f1": 0.6309341500765697, + "eval_Qnli-dev_cosine_f1_threshold": 0.8621190786361694, + "eval_Qnli-dev_cosine_precision": 0.4940047961630695, + "eval_Qnli-dev_cosine_recall": 0.8728813559322034, + "eval_Qnli-dev_dot_accuracy": 0.58984375, + "eval_Qnli-dev_dot_accuracy_threshold": 388.7757568359375, + "eval_Qnli-dev_dot_ap": 0.5127748615151599, + "eval_Qnli-dev_dot_f1": 0.6304044630404463, + "eval_Qnli-dev_dot_f1_threshold": 322.849853515625, + "eval_Qnli-dev_dot_precision": 0.4698544698544699, + "eval_Qnli-dev_dot_recall": 0.9576271186440678, + "eval_Qnli-dev_euclidean_accuracy": 0.6171875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 8.507330894470215, + "eval_Qnli-dev_euclidean_ap": 0.6089590025180598, + "eval_Qnli-dev_euclidean_f1": 0.6291834002677376, + "eval_Qnli-dev_euclidean_f1_threshold": 18.0284423828125, + "eval_Qnli-dev_euclidean_precision": 0.4598825831702544, + "eval_Qnli-dev_euclidean_recall": 0.9957627118644068, + "eval_Qnli-dev_manhattan_accuracy": 0.642578125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 169.31954956054688, + "eval_Qnli-dev_manhattan_ap": 0.6439314246828807, + "eval_Qnli-dev_manhattan_f1": 0.6509433962264151, + "eval_Qnli-dev_manhattan_f1_threshold": 195.28048706054688, + "eval_Qnli-dev_manhattan_precision": 0.5175, + "eval_Qnli-dev_manhattan_recall": 0.8771186440677966, + "eval_Qnli-dev_max_accuracy": 0.642578125, + "eval_Qnli-dev_max_accuracy_threshold": 388.7757568359375, + "eval_Qnli-dev_max_ap": 0.6439314246828807, + "eval_Qnli-dev_max_f1": 0.6509433962264151, + "eval_Qnli-dev_max_f1_threshold": 322.849853515625, + "eval_Qnli-dev_max_precision": 0.5175, + "eval_Qnli-dev_max_recall": 0.9957627118644068, + "eval_allNLI-dev_cosine_accuracy": 0.66796875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9790990948677063, + "eval_allNLI-dev_cosine_ap": 0.3955241297150008, + "eval_allNLI-dev_cosine_f1": 0.5052005943536404, + "eval_allNLI-dev_cosine_f1_threshold": 0.7795530557632446, + "eval_allNLI-dev_cosine_precision": 0.34, + "eval_allNLI-dev_cosine_recall": 0.9826589595375722, + "eval_allNLI-dev_dot_accuracy": 0.6640625, + "eval_allNLI-dev_dot_accuracy_threshold": 514.1408081054688, + "eval_allNLI-dev_dot_ap": 0.3428427300114505, + "eval_allNLI-dev_dot_f1": 0.5043988269794721, + "eval_allNLI-dev_dot_f1_threshold": 316.1231994628906, + "eval_allNLI-dev_dot_precision": 0.3379174852652259, + "eval_allNLI-dev_dot_recall": 0.9942196531791907, + "eval_allNLI-dev_euclidean_accuracy": 0.669921875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 4.674912452697754, + "eval_allNLI-dev_euclidean_ap": 0.394931606062899, + "eval_allNLI-dev_euclidean_f1": 0.5067873303167421, + "eval_allNLI-dev_euclidean_f1_threshold": 13.242253303527832, + "eval_allNLI-dev_euclidean_precision": 0.34285714285714286, + "eval_allNLI-dev_euclidean_recall": 0.9710982658959537, + "eval_allNLI-dev_manhattan_accuracy": 0.669921875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 84.45820617675781, + "eval_allNLI-dev_manhattan_ap": 0.40742769361596887, + "eval_allNLI-dev_manhattan_f1": 0.5058479532163743, + "eval_allNLI-dev_manhattan_f1_threshold": 293.953369140625, + "eval_allNLI-dev_manhattan_precision": 0.3385518590998043, + "eval_allNLI-dev_manhattan_recall": 1.0, + "eval_allNLI-dev_max_accuracy": 0.669921875, + "eval_allNLI-dev_max_accuracy_threshold": 514.1408081054688, + "eval_allNLI-dev_max_ap": 0.40742769361596887, + "eval_allNLI-dev_max_f1": 0.5067873303167421, + "eval_allNLI-dev_max_f1_threshold": 316.1231994628906, + "eval_allNLI-dev_max_precision": 0.34285714285714286, + "eval_allNLI-dev_max_recall": 1.0, + "eval_sequential_score": 0.6439314246828807, + "eval_sts-test_pearson_cosine": 0.25252985635600256, + "eval_sts-test_pearson_dot": 0.3099351189652281, + "eval_sts-test_pearson_euclidean": 0.23142843084411574, + "eval_sts-test_pearson_manhattan": 0.2502258002878053, + "eval_sts-test_pearson_max": 0.3099351189652281, + "eval_sts-test_spearman_cosine": 0.28591643554731094, + "eval_sts-test_spearman_dot": 0.3177811684597045, + "eval_sts-test_spearman_euclidean": 0.24943896636699894, + "eval_sts-test_spearman_manhattan": 0.2700833945157724, + "eval_sts-test_spearman_max": 0.3177811684597045, + "eval_vitaminc-pairs_loss": 2.8456013202667236, + "eval_vitaminc-pairs_runtime": 3.1683, + "eval_vitaminc-pairs_samples_per_second": 40.4, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_negation-triplets_loss": 3.5346930027008057, + "eval_negation-triplets_runtime": 0.7273, + "eval_negation-triplets_samples_per_second": 175.983, + "eval_negation-triplets_steps_per_second": 1.375, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_scitail-pairs-pos_loss": 1.3952267169952393, + "eval_scitail-pairs-pos_runtime": 0.7901, + "eval_scitail-pairs-pos_samples_per_second": 162.002, + "eval_scitail-pairs-pos_steps_per_second": 1.266, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_scitail-pairs-qa_loss": 1.6222929954528809, + "eval_scitail-pairs-qa_runtime": 0.5623, + "eval_scitail-pairs-qa_samples_per_second": 227.629, + "eval_scitail-pairs-qa_steps_per_second": 1.778, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_xsum-pairs_loss": 6.103888988494873, + "eval_xsum-pairs_runtime": 3.0165, + "eval_xsum-pairs_samples_per_second": 42.433, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_sciq_pairs_loss": 0.6113746762275696, + "eval_sciq_pairs_runtime": 3.3757, + "eval_sciq_pairs_samples_per_second": 37.918, + "eval_sciq_pairs_steps_per_second": 0.296, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_qasc_pairs_loss": 3.058934450149536, + "eval_qasc_pairs_runtime": 0.59, + "eval_qasc_pairs_samples_per_second": 216.943, + "eval_qasc_pairs_steps_per_second": 1.695, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_openbookqa_pairs_loss": 4.499716758728027, + "eval_openbookqa_pairs_runtime": 0.5708, + "eval_openbookqa_pairs_samples_per_second": 224.263, + "eval_openbookqa_pairs_steps_per_second": 1.752, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_msmarco_pairs_loss": 7.5348734855651855, + "eval_msmarco_pairs_runtime": 1.514, + "eval_msmarco_pairs_samples_per_second": 84.546, + "eval_msmarco_pairs_steps_per_second": 0.661, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_nq_pairs_loss": 7.202226638793945, + "eval_nq_pairs_runtime": 2.8915, + "eval_nq_pairs_samples_per_second": 44.268, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_trivia_pairs_loss": 6.395583152770996, + "eval_trivia_pairs_runtime": 3.4281, + "eval_trivia_pairs_samples_per_second": 37.338, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_gooaq_pairs_loss": 6.247038841247559, + "eval_gooaq_pairs_runtime": 0.9411, + "eval_gooaq_pairs_samples_per_second": 136.018, + "eval_gooaq_pairs_steps_per_second": 1.063, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_paws-pos_loss": 0.8818368911743164, + "eval_paws-pos_runtime": 0.7042, + "eval_paws-pos_samples_per_second": 181.755, + "eval_paws-pos_steps_per_second": 1.42, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_global_dataset_loss": 4.362409591674805, + "eval_global_dataset_runtime": 13.3877, + "eval_global_dataset_samples_per_second": 31.073, + "eval_global_dataset_steps_per_second": 0.299, + "step": 120 + }, + { + "epoch": 0.12448559670781893, + "grad_norm": 14.784011840820312, + "learning_rate": 4.288681204569055e-06, + "loss": 4.5958, + "step": 121 + }, + { + "epoch": 0.12551440329218108, + "grad_norm": 16.07524871826172, + "learning_rate": 4.325025960539979e-06, + "loss": 6.2355, + "step": 122 + }, + { + "epoch": 0.12654320987654322, + "grad_norm": 25.21320915222168, + "learning_rate": 4.361370716510903e-06, + "loss": 6.0763, + "step": 123 + }, + { + "epoch": 0.12757201646090535, + "grad_norm": 13.882258415222168, + "learning_rate": 4.3977154724818276e-06, + "loss": 4.719, + "step": 124 + }, + { + "epoch": 0.1286008230452675, + "grad_norm": 25.57428741455078, + "learning_rate": 4.4340602284527516e-06, + "loss": 6.0796, + "step": 125 + }, + { + "epoch": 0.12962962962962962, + "grad_norm": 14.860637664794922, + "learning_rate": 4.470404984423675e-06, + "loss": 4.284, + "step": 126 + }, + { + "epoch": 0.13065843621399176, + "grad_norm": 14.258697509765625, + "learning_rate": 4.5067497403946e-06, + "loss": 4.4948, + "step": 127 + }, + { + "epoch": 0.13168724279835392, + "grad_norm": 12.680214881896973, + "learning_rate": 4.543094496365524e-06, + "loss": 5.5456, + "step": 128 + }, + { + "epoch": 0.13271604938271606, + "grad_norm": 18.65047836303711, + "learning_rate": 4.579439252336448e-06, + "loss": 5.8322, + "step": 129 + }, + { + "epoch": 0.1337448559670782, + "grad_norm": 14.29658031463623, + "learning_rate": 4.6157840083073725e-06, + "loss": 4.0772, + "step": 130 + }, + { + "epoch": 0.13477366255144033, + "grad_norm": 14.54943561553955, + "learning_rate": 4.6521287642782965e-06, + "loss": 4.3008, + "step": 131 + }, + { + "epoch": 0.13580246913580246, + "grad_norm": 25.34575080871582, + "learning_rate": 4.6884735202492206e-06, + "loss": 6.878, + "step": 132 + }, + { + "epoch": 0.1368312757201646, + "grad_norm": 20.41341781616211, + "learning_rate": 4.724818276220145e-06, + "loss": 6.1897, + "step": 133 + }, + { + "epoch": 0.13786008230452676, + "grad_norm": 12.791062355041504, + "learning_rate": 4.7611630321910694e-06, + "loss": 4.142, + "step": 134 + }, + { + "epoch": 0.1388888888888889, + "grad_norm": 32.09108352661133, + "learning_rate": 4.797507788161993e-06, + "loss": 1.7782, + "step": 135 + }, + { + "epoch": 0.13991769547325103, + "grad_norm": 15.483809471130371, + "learning_rate": 4.8338525441329175e-06, + "loss": 3.9578, + "step": 136 + }, + { + "epoch": 0.14094650205761317, + "grad_norm": 17.372329711914062, + "learning_rate": 4.8701973001038415e-06, + "loss": 5.8774, + "step": 137 + }, + { + "epoch": 0.1419753086419753, + "grad_norm": 31.082347869873047, + "learning_rate": 4.9065420560747655e-06, + "loss": 1.6068, + "step": 138 + }, + { + "epoch": 0.14300411522633744, + "grad_norm": 13.522706985473633, + "learning_rate": 4.94288681204569e-06, + "loss": 4.164, + "step": 139 + }, + { + "epoch": 0.1440329218106996, + "grad_norm": 12.907632827758789, + "learning_rate": 4.979231568016614e-06, + "loss": 3.8015, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_Qnli-dev_cosine_accuracy": 0.6328125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.9141117334365845, + "eval_Qnli-dev_cosine_ap": 0.6198480681016185, + "eval_Qnli-dev_cosine_f1": 0.6397608370702541, + "eval_Qnli-dev_cosine_f1_threshold": 0.8335354328155518, + "eval_Qnli-dev_cosine_precision": 0.4942263279445728, + "eval_Qnli-dev_cosine_recall": 0.9067796610169492, + "eval_Qnli-dev_dot_accuracy": 0.599609375, + "eval_Qnli-dev_dot_accuracy_threshold": 405.96319580078125, + "eval_Qnli-dev_dot_ap": 0.5257457885237911, + "eval_Qnli-dev_dot_f1": 0.6291834002677376, + "eval_Qnli-dev_dot_f1_threshold": 256.84857177734375, + "eval_Qnli-dev_dot_precision": 0.4598825831702544, + "eval_Qnli-dev_dot_recall": 0.9957627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.63671875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 9.261069297790527, + "eval_Qnli-dev_euclidean_ap": 0.6306496803563475, + "eval_Qnli-dev_euclidean_f1": 0.636094674556213, + "eval_Qnli-dev_euclidean_f1_threshold": 12.323160171508789, + "eval_Qnli-dev_euclidean_precision": 0.48863636363636365, + "eval_Qnli-dev_euclidean_recall": 0.9110169491525424, + "eval_Qnli-dev_manhattan_accuracy": 0.646484375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 171.06039428710938, + "eval_Qnli-dev_manhattan_ap": 0.6564076451753581, + "eval_Qnli-dev_manhattan_f1": 0.6487341772151899, + "eval_Qnli-dev_manhattan_f1_threshold": 217.7759552001953, + "eval_Qnli-dev_manhattan_precision": 0.5176767676767676, + "eval_Qnli-dev_manhattan_recall": 0.8686440677966102, + "eval_Qnli-dev_max_accuracy": 0.646484375, + "eval_Qnli-dev_max_accuracy_threshold": 405.96319580078125, + "eval_Qnli-dev_max_ap": 0.6564076451753581, + "eval_Qnli-dev_max_f1": 0.6487341772151899, + "eval_Qnli-dev_max_f1_threshold": 256.84857177734375, + "eval_Qnli-dev_max_precision": 0.5176767676767676, + "eval_Qnli-dev_max_recall": 0.9957627118644068, + "eval_allNLI-dev_cosine_accuracy": 0.66796875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9803125262260437, + "eval_allNLI-dev_cosine_ap": 0.40304954675643245, + "eval_allNLI-dev_cosine_f1": 0.5073313782991202, + "eval_allNLI-dev_cosine_f1_threshold": 0.7168662548065186, + "eval_allNLI-dev_cosine_precision": 0.33988212180746563, + "eval_allNLI-dev_cosine_recall": 1.0, + "eval_allNLI-dev_dot_accuracy": 0.662109375, + "eval_allNLI-dev_dot_accuracy_threshold": 522.0433959960938, + "eval_allNLI-dev_dot_ap": 0.3516359548665584, + "eval_allNLI-dev_dot_f1": 0.5065885797950219, + "eval_allNLI-dev_dot_f1_threshold": 309.74951171875, + "eval_allNLI-dev_dot_precision": 0.3392156862745098, + "eval_allNLI-dev_dot_recall": 1.0, + "eval_allNLI-dev_euclidean_accuracy": 0.669921875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 6.128700256347656, + "eval_allNLI-dev_euclidean_ap": 0.4017412298983858, + "eval_allNLI-dev_euclidean_f1": 0.5088235294117648, + "eval_allNLI-dev_euclidean_f1_threshold": 16.340839385986328, + "eval_allNLI-dev_euclidean_precision": 0.34122287968441817, + "eval_allNLI-dev_euclidean_recall": 1.0, + "eval_allNLI-dev_manhattan_accuracy": 0.66796875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 83.17814636230469, + "eval_allNLI-dev_manhattan_ap": 0.41319055063996046, + "eval_allNLI-dev_manhattan_f1": 0.5186567164179104, + "eval_allNLI-dev_manhattan_f1_threshold": 201.40753173828125, + "eval_allNLI-dev_manhattan_precision": 0.38292011019283745, + "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, + "eval_allNLI-dev_max_accuracy": 0.669921875, + "eval_allNLI-dev_max_accuracy_threshold": 522.0433959960938, + "eval_allNLI-dev_max_ap": 0.41319055063996046, + "eval_allNLI-dev_max_f1": 0.5186567164179104, + "eval_allNLI-dev_max_f1_threshold": 309.74951171875, + "eval_allNLI-dev_max_precision": 0.38292011019283745, + "eval_allNLI-dev_max_recall": 1.0, + "eval_sequential_score": 0.6564076451753581, + "eval_sts-test_pearson_cosine": 0.2550498328876235, + "eval_sts-test_pearson_dot": 0.29981841169421564, + "eval_sts-test_pearson_euclidean": 0.24239763397446795, + "eval_sts-test_pearson_manhattan": 0.25344327521082516, + "eval_sts-test_pearson_max": 0.29981841169421564, + "eval_sts-test_spearman_cosine": 0.29254387360307027, + "eval_sts-test_spearman_dot": 0.3123169499412918, + "eval_sts-test_spearman_euclidean": 0.26282456091304185, + "eval_sts-test_spearman_manhattan": 0.27282288773310837, + "eval_sts-test_spearman_max": 0.3123169499412918, + "eval_vitaminc-pairs_loss": 2.815100908279419, + "eval_vitaminc-pairs_runtime": 3.1739, + "eval_vitaminc-pairs_samples_per_second": 40.329, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_negation-triplets_loss": 3.359119176864624, + "eval_negation-triplets_runtime": 0.7309, + "eval_negation-triplets_samples_per_second": 175.129, + "eval_negation-triplets_steps_per_second": 1.368, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_scitail-pairs-pos_loss": 1.217439889907837, + "eval_scitail-pairs-pos_runtime": 0.8032, + "eval_scitail-pairs-pos_samples_per_second": 159.359, + "eval_scitail-pairs-pos_steps_per_second": 1.245, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_scitail-pairs-qa_loss": 1.2241069078445435, + "eval_scitail-pairs-qa_runtime": 0.5639, + "eval_scitail-pairs-qa_samples_per_second": 226.986, + "eval_scitail-pairs-qa_steps_per_second": 1.773, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_xsum-pairs_loss": 5.798659801483154, + "eval_xsum-pairs_runtime": 3.0162, + "eval_xsum-pairs_samples_per_second": 42.437, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_sciq_pairs_loss": 0.5781325697898865, + "eval_sciq_pairs_runtime": 3.3872, + "eval_sciq_pairs_samples_per_second": 37.789, + "eval_sciq_pairs_steps_per_second": 0.295, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_qasc_pairs_loss": 2.687833070755005, + "eval_qasc_pairs_runtime": 0.5882, + "eval_qasc_pairs_samples_per_second": 217.599, + "eval_qasc_pairs_steps_per_second": 1.7, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_openbookqa_pairs_loss": 4.54829216003418, + "eval_openbookqa_pairs_runtime": 0.5719, + "eval_openbookqa_pairs_samples_per_second": 223.814, + "eval_openbookqa_pairs_steps_per_second": 1.749, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_msmarco_pairs_loss": 6.890722274780273, + "eval_msmarco_pairs_runtime": 1.5121, + "eval_msmarco_pairs_samples_per_second": 84.65, + "eval_msmarco_pairs_steps_per_second": 0.661, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_nq_pairs_loss": 6.53361701965332, + "eval_nq_pairs_runtime": 2.8908, + "eval_nq_pairs_samples_per_second": 44.278, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_trivia_pairs_loss": 6.02173376083374, + "eval_trivia_pairs_runtime": 3.4272, + "eval_trivia_pairs_samples_per_second": 37.348, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_gooaq_pairs_loss": 5.6636834144592285, + "eval_gooaq_pairs_runtime": 0.9412, + "eval_gooaq_pairs_samples_per_second": 135.991, + "eval_gooaq_pairs_steps_per_second": 1.062, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_paws-pos_loss": 0.4957215487957001, + "eval_paws-pos_runtime": 0.6753, + "eval_paws-pos_samples_per_second": 189.545, + "eval_paws-pos_steps_per_second": 1.481, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_global_dataset_loss": 3.7551324367523193, + "eval_global_dataset_runtime": 13.3267, + "eval_global_dataset_samples_per_second": 31.215, + "eval_global_dataset_steps_per_second": 0.3, + "step": 140 + }, + { + "epoch": 0.14506172839506173, + "grad_norm": 15.316786766052246, + "learning_rate": 5.0155763239875384e-06, + "loss": 4.1039, + "step": 141 + }, + { + "epoch": 0.14609053497942387, + "grad_norm": 40.62092971801758, + "learning_rate": 5.051921079958463e-06, + "loss": 6.598, + "step": 142 + }, + { + "epoch": 0.147119341563786, + "grad_norm": 15.075995445251465, + "learning_rate": 5.0882658359293865e-06, + "loss": 3.8367, + "step": 143 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 35.782997131347656, + "learning_rate": 5.1246105919003105e-06, + "loss": 6.798, + "step": 144 + }, + { + "epoch": 0.14917695473251028, + "grad_norm": 18.818031311035156, + "learning_rate": 5.160955347871235e-06, + "loss": 5.9084, + "step": 145 + }, + { + "epoch": 0.15020576131687244, + "grad_norm": 13.719802856445312, + "learning_rate": 5.197300103842159e-06, + "loss": 3.8655, + "step": 146 + }, + { + "epoch": 0.15123456790123457, + "grad_norm": 14.322517395019531, + "learning_rate": 5.233644859813083e-06, + "loss": 3.493, + "step": 147 + }, + { + "epoch": 0.1522633744855967, + "grad_norm": 13.363450050354004, + "learning_rate": 5.269989615784008e-06, + "loss": 3.4505, + "step": 148 + }, + { + "epoch": 0.15329218106995884, + "grad_norm": 15.056668281555176, + "learning_rate": 5.306334371754932e-06, + "loss": 3.9404, + "step": 149 + }, + { + "epoch": 0.15432098765432098, + "grad_norm": 22.7237491607666, + "learning_rate": 5.3426791277258555e-06, + "loss": 0.977, + "step": 150 + }, + { + "epoch": 0.15534979423868311, + "grad_norm": 12.547760009765625, + "learning_rate": 5.37902388369678e-06, + "loss": 3.5493, + "step": 151 + }, + { + "epoch": 0.15637860082304528, + "grad_norm": 13.1975679397583, + "learning_rate": 5.415368639667704e-06, + "loss": 3.2979, + "step": 152 + }, + { + "epoch": 0.1574074074074074, + "grad_norm": 13.909899711608887, + "learning_rate": 5.451713395638628e-06, + "loss": 3.375, + "step": 153 + }, + { + "epoch": 0.15843621399176955, + "grad_norm": 28.727937698364258, + "learning_rate": 5.488058151609553e-06, + "loss": 6.1056, + "step": 154 + }, + { + "epoch": 0.15946502057613168, + "grad_norm": 14.287629127502441, + "learning_rate": 5.524402907580477e-06, + "loss": 3.4521, + "step": 155 + }, + { + "epoch": 0.16049382716049382, + "grad_norm": 13.95241641998291, + "learning_rate": 5.560747663551401e-06, + "loss": 3.3153, + "step": 156 + }, + { + "epoch": 0.16152263374485595, + "grad_norm": 16.688383102416992, + "learning_rate": 5.597092419522326e-06, + "loss": 3.4398, + "step": 157 + }, + { + "epoch": 0.16255144032921812, + "grad_norm": 14.100769996643066, + "learning_rate": 5.63343717549325e-06, + "loss": 3.393, + "step": 158 + }, + { + "epoch": 0.16358024691358025, + "grad_norm": 36.206336975097656, + "learning_rate": 5.669781931464173e-06, + "loss": 6.2424, + "step": 159 + }, + { + "epoch": 0.1646090534979424, + "grad_norm": 25.714920043945312, + "learning_rate": 5.706126687435098e-06, + "loss": 5.7107, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_Qnli-dev_cosine_accuracy": 0.650390625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.881838858127594, + "eval_Qnli-dev_cosine_ap": 0.6388216177805902, + "eval_Qnli-dev_cosine_f1": 0.6594090202177294, + "eval_Qnli-dev_cosine_f1_threshold": 0.7928681969642639, + "eval_Qnli-dev_cosine_precision": 0.5208845208845209, + "eval_Qnli-dev_cosine_recall": 0.8983050847457628, + "eval_Qnli-dev_dot_accuracy": 0.599609375, + "eval_Qnli-dev_dot_accuracy_threshold": 383.6549072265625, + "eval_Qnli-dev_dot_ap": 0.530471167859375, + "eval_Qnli-dev_dot_f1": 0.6400000000000001, + "eval_Qnli-dev_dot_f1_threshold": 328.07598876953125, + "eval_Qnli-dev_dot_precision": 0.5024154589371981, + "eval_Qnli-dev_dot_recall": 0.8813559322033898, + "eval_Qnli-dev_euclidean_accuracy": 0.6640625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 10.708932876586914, + "eval_Qnli-dev_euclidean_ap": 0.6617842095463597, + "eval_Qnli-dev_euclidean_f1": 0.65086887835703, + "eval_Qnli-dev_euclidean_f1_threshold": 13.247828483581543, + "eval_Qnli-dev_euclidean_precision": 0.5188916876574308, + "eval_Qnli-dev_euclidean_recall": 0.8728813559322034, + "eval_Qnli-dev_manhattan_accuracy": 0.671875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 203.75650024414062, + "eval_Qnli-dev_manhattan_ap": 0.6718309029852861, + "eval_Qnli-dev_manhattan_f1": 0.6581875993640699, + "eval_Qnli-dev_manhattan_f1_threshold": 251.2660675048828, + "eval_Qnli-dev_manhattan_precision": 0.5267175572519084, + "eval_Qnli-dev_manhattan_recall": 0.8771186440677966, + "eval_Qnli-dev_max_accuracy": 0.671875, + "eval_Qnli-dev_max_accuracy_threshold": 383.6549072265625, + "eval_Qnli-dev_max_ap": 0.6718309029852861, + "eval_Qnli-dev_max_f1": 0.6594090202177294, + "eval_Qnli-dev_max_f1_threshold": 328.07598876953125, + "eval_Qnli-dev_max_precision": 0.5267175572519084, + "eval_Qnli-dev_max_recall": 0.8983050847457628, + "eval_allNLI-dev_cosine_accuracy": 0.669921875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.97170090675354, + "eval_allNLI-dev_cosine_ap": 0.42392325835068695, + "eval_allNLI-dev_cosine_f1": 0.532319391634981, + "eval_allNLI-dev_cosine_f1_threshold": 0.8698199987411499, + "eval_allNLI-dev_cosine_precision": 0.39660056657223797, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.666015625, + "eval_allNLI-dev_dot_accuracy_threshold": 489.699951171875, + "eval_allNLI-dev_dot_ap": 0.37488661345316393, + "eval_allNLI-dev_dot_f1": 0.5079365079365079, + "eval_allNLI-dev_dot_f1_threshold": 370.46728515625, + "eval_allNLI-dev_dot_precision": 0.350109409190372, + "eval_allNLI-dev_dot_recall": 0.9248554913294798, + "eval_allNLI-dev_euclidean_accuracy": 0.669921875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 5.109055995941162, + "eval_allNLI-dev_euclidean_ap": 0.42414702832207185, + "eval_allNLI-dev_euclidean_f1": 0.5325670498084292, + "eval_allNLI-dev_euclidean_f1_threshold": 11.284603118896484, + "eval_allNLI-dev_euclidean_precision": 0.3982808022922636, + "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, + "eval_allNLI-dev_manhattan_accuracy": 0.671875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 118.02589416503906, + "eval_allNLI-dev_manhattan_ap": 0.42975205717472725, + "eval_allNLI-dev_manhattan_f1": 0.5330948121645797, + "eval_allNLI-dev_manhattan_f1_threshold": 225.3105010986328, + "eval_allNLI-dev_manhattan_precision": 0.3860103626943005, + "eval_allNLI-dev_manhattan_recall": 0.861271676300578, + "eval_allNLI-dev_max_accuracy": 0.671875, + "eval_allNLI-dev_max_accuracy_threshold": 489.699951171875, + "eval_allNLI-dev_max_ap": 0.42975205717472725, + "eval_allNLI-dev_max_f1": 0.5330948121645797, + "eval_allNLI-dev_max_f1_threshold": 370.46728515625, + "eval_allNLI-dev_max_precision": 0.3982808022922636, + "eval_allNLI-dev_max_recall": 0.9248554913294798, + "eval_sequential_score": 0.6718309029852861, + "eval_sts-test_pearson_cosine": 0.2593214673103316, + "eval_sts-test_pearson_dot": 0.2856451479091534, + "eval_sts-test_pearson_euclidean": 0.2633545673906765, + "eval_sts-test_pearson_manhattan": 0.27094196150257477, + "eval_sts-test_pearson_max": 0.2856451479091534, + "eval_sts-test_spearman_cosine": 0.3239060946012997, + "eval_sts-test_spearman_dot": 0.3020791143702586, + "eval_sts-test_spearman_euclidean": 0.29537649419536166, + "eval_sts-test_spearman_manhattan": 0.30477367732115745, + "eval_sts-test_spearman_max": 0.3239060946012997, + "eval_vitaminc-pairs_loss": 2.7820005416870117, + "eval_vitaminc-pairs_runtime": 3.1613, + "eval_vitaminc-pairs_samples_per_second": 40.489, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_negation-triplets_loss": 3.2458996772766113, + "eval_negation-triplets_runtime": 0.7284, + "eval_negation-triplets_samples_per_second": 175.738, + "eval_negation-triplets_steps_per_second": 1.373, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_scitail-pairs-pos_loss": 0.9951260685920715, + "eval_scitail-pairs-pos_runtime": 0.7927, + "eval_scitail-pairs-pos_samples_per_second": 161.465, + "eval_scitail-pairs-pos_steps_per_second": 1.261, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_scitail-pairs-qa_loss": 0.8330278396606445, + "eval_scitail-pairs-qa_runtime": 0.5647, + "eval_scitail-pairs-qa_samples_per_second": 226.68, + "eval_scitail-pairs-qa_steps_per_second": 1.771, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_xsum-pairs_loss": 5.1889238357543945, + "eval_xsum-pairs_runtime": 3.014, + "eval_xsum-pairs_samples_per_second": 42.469, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_sciq_pairs_loss": 0.5267525315284729, + "eval_sciq_pairs_runtime": 3.3969, + "eval_sciq_pairs_samples_per_second": 37.682, + "eval_sciq_pairs_steps_per_second": 0.294, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_qasc_pairs_loss": 2.3833937644958496, + "eval_qasc_pairs_runtime": 0.5986, + "eval_qasc_pairs_samples_per_second": 213.816, + "eval_qasc_pairs_steps_per_second": 1.67, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_openbookqa_pairs_loss": 4.070493221282959, + "eval_openbookqa_pairs_runtime": 0.5688, + "eval_openbookqa_pairs_samples_per_second": 225.041, + "eval_openbookqa_pairs_steps_per_second": 1.758, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_msmarco_pairs_loss": 5.934054374694824, + "eval_msmarco_pairs_runtime": 1.5142, + "eval_msmarco_pairs_samples_per_second": 84.535, + "eval_msmarco_pairs_steps_per_second": 0.66, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_nq_pairs_loss": 5.961860656738281, + "eval_nq_pairs_runtime": 2.8919, + "eval_nq_pairs_samples_per_second": 44.261, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_trivia_pairs_loss": 5.741409778594971, + "eval_trivia_pairs_runtime": 3.4249, + "eval_trivia_pairs_samples_per_second": 37.373, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_gooaq_pairs_loss": 4.762550354003906, + "eval_gooaq_pairs_runtime": 0.9413, + "eval_gooaq_pairs_samples_per_second": 135.983, + "eval_gooaq_pairs_steps_per_second": 1.062, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_paws-pos_loss": 0.17168374359607697, + "eval_paws-pos_runtime": 0.6912, + "eval_paws-pos_samples_per_second": 185.177, + "eval_paws-pos_steps_per_second": 1.447, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_global_dataset_loss": 3.0964090824127197, + "eval_global_dataset_runtime": 13.333, + "eval_global_dataset_samples_per_second": 31.201, + "eval_global_dataset_steps_per_second": 0.3, + "step": 160 + }, + { + "epoch": 0.16563786008230452, + "grad_norm": 16.593652725219727, + "learning_rate": 5.742471443406022e-06, + "loss": 4.6423, + "step": 161 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 26.777013778686523, + "learning_rate": 5.778816199376946e-06, + "loss": 5.7346, + "step": 162 + }, + { + "epoch": 0.16769547325102882, + "grad_norm": 15.474895477294922, + "learning_rate": 5.815160955347871e-06, + "loss": 3.3701, + "step": 163 + }, + { + "epoch": 0.16872427983539096, + "grad_norm": 17.254573822021484, + "learning_rate": 5.851505711318795e-06, + "loss": 3.2528, + "step": 164 + }, + { + "epoch": 0.1697530864197531, + "grad_norm": 17.283357620239258, + "learning_rate": 5.887850467289719e-06, + "loss": 4.5692, + "step": 165 + }, + { + "epoch": 0.17078189300411523, + "grad_norm": 18.08893585205078, + "learning_rate": 5.924195223260644e-06, + "loss": 3.4966, + "step": 166 + }, + { + "epoch": 0.17181069958847736, + "grad_norm": 17.073596954345703, + "learning_rate": 5.960539979231567e-06, + "loss": 4.5332, + "step": 167 + }, + { + "epoch": 0.1728395061728395, + "grad_norm": 14.176384925842285, + "learning_rate": 5.996884735202491e-06, + "loss": 3.7013, + "step": 168 + }, + { + "epoch": 0.17386831275720166, + "grad_norm": 21.731842041015625, + "learning_rate": 6.033229491173416e-06, + "loss": 5.3538, + "step": 169 + }, + { + "epoch": 0.1748971193415638, + "grad_norm": 13.646337509155273, + "learning_rate": 6.06957424714434e-06, + "loss": 2.8866, + "step": 170 + }, + { + "epoch": 0.17592592592592593, + "grad_norm": 17.945281982421875, + "learning_rate": 6.105919003115264e-06, + "loss": 2.9547, + "step": 171 + }, + { + "epoch": 0.17695473251028807, + "grad_norm": 22.94412612915039, + "learning_rate": 6.142263759086189e-06, + "loss": 5.2232, + "step": 172 + }, + { + "epoch": 0.1779835390946502, + "grad_norm": 33.40188980102539, + "learning_rate": 6.178608515057113e-06, + "loss": 5.6116, + "step": 173 + }, + { + "epoch": 0.17901234567901234, + "grad_norm": 13.978586196899414, + "learning_rate": 6.214953271028036e-06, + "loss": 2.8644, + "step": 174 + }, + { + "epoch": 0.1800411522633745, + "grad_norm": 22.077098846435547, + "learning_rate": 6.251298026998961e-06, + "loss": 1.9015, + "step": 175 + }, + { + "epoch": 0.18106995884773663, + "grad_norm": 20.8638858795166, + "learning_rate": 6.287642782969885e-06, + "loss": 4.6564, + "step": 176 + }, + { + "epoch": 0.18209876543209877, + "grad_norm": 14.965616226196289, + "learning_rate": 6.323987538940809e-06, + "loss": 3.0869, + "step": 177 + }, + { + "epoch": 0.1831275720164609, + "grad_norm": 20.76239013671875, + "learning_rate": 6.360332294911734e-06, + "loss": 1.7242, + "step": 178 + }, + { + "epoch": 0.18415637860082304, + "grad_norm": 16.349937438964844, + "learning_rate": 6.396677050882658e-06, + "loss": 4.275, + "step": 179 + }, + { + "epoch": 0.18518518518518517, + "grad_norm": 23.983245849609375, + "learning_rate": 6.433021806853582e-06, + "loss": 5.6429, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_Qnli-dev_cosine_accuracy": 0.6484375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8662997484207153, + "eval_Qnli-dev_cosine_ap": 0.6524497730088078, + "eval_Qnli-dev_cosine_f1": 0.6678023850085179, + "eval_Qnli-dev_cosine_f1_threshold": 0.8081307411193848, + "eval_Qnli-dev_cosine_precision": 0.5584045584045584, + "eval_Qnli-dev_cosine_recall": 0.8305084745762712, + "eval_Qnli-dev_dot_accuracy": 0.623046875, + "eval_Qnli-dev_dot_accuracy_threshold": 385.58721923828125, + "eval_Qnli-dev_dot_ap": 0.5552533197510849, + "eval_Qnli-dev_dot_f1": 0.6540145985401459, + "eval_Qnli-dev_dot_f1_threshold": 316.07781982421875, + "eval_Qnli-dev_dot_precision": 0.49888641425389757, + "eval_Qnli-dev_dot_recall": 0.9491525423728814, + "eval_Qnli-dev_euclidean_accuracy": 0.65625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 10.853160858154297, + "eval_Qnli-dev_euclidean_ap": 0.6669108151611487, + "eval_Qnli-dev_euclidean_f1": 0.6678200692041524, + "eval_Qnli-dev_euclidean_f1_threshold": 12.968579292297363, + "eval_Qnli-dev_euclidean_precision": 0.564327485380117, + "eval_Qnli-dev_euclidean_recall": 0.8177966101694916, + "eval_Qnli-dev_manhattan_accuracy": 0.6640625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 223.08535766601562, + "eval_Qnli-dev_manhattan_ap": 0.6760180782489211, + "eval_Qnli-dev_manhattan_f1": 0.6678082191780822, + "eval_Qnli-dev_manhattan_f1_threshold": 253.64254760742188, + "eval_Qnli-dev_manhattan_precision": 0.5603448275862069, + "eval_Qnli-dev_manhattan_recall": 0.826271186440678, + "eval_Qnli-dev_max_accuracy": 0.6640625, + "eval_Qnli-dev_max_accuracy_threshold": 385.58721923828125, + "eval_Qnli-dev_max_ap": 0.6760180782489211, + "eval_Qnli-dev_max_f1": 0.6678200692041524, + "eval_Qnli-dev_max_f1_threshold": 316.07781982421875, + "eval_Qnli-dev_max_precision": 0.564327485380117, + "eval_Qnli-dev_max_recall": 0.9491525423728814, + "eval_allNLI-dev_cosine_accuracy": 0.671875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9789707064628601, + "eval_allNLI-dev_cosine_ap": 0.4524477715499502, + "eval_allNLI-dev_cosine_f1": 0.5444444444444444, + "eval_allNLI-dev_cosine_f1_threshold": 0.8516685962677002, + "eval_allNLI-dev_cosine_precision": 0.40054495912806537, + "eval_allNLI-dev_cosine_recall": 0.8497109826589595, + "eval_allNLI-dev_dot_accuracy": 0.66796875, + "eval_allNLI-dev_dot_accuracy_threshold": 524.80029296875, + "eval_allNLI-dev_dot_ap": 0.39641106298067524, + "eval_allNLI-dev_dot_f1": 0.5137614678899083, + "eval_allNLI-dev_dot_f1_threshold": 341.9022216796875, + "eval_allNLI-dev_dot_precision": 0.3492723492723493, + "eval_allNLI-dev_dot_recall": 0.9710982658959537, + "eval_allNLI-dev_euclidean_accuracy": 0.671875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 4.58498477935791, + "eval_allNLI-dev_euclidean_ap": 0.4516531171082357, + "eval_allNLI-dev_euclidean_f1": 0.552, + "eval_allNLI-dev_euclidean_f1_threshold": 11.532356262207031, + "eval_allNLI-dev_euclidean_precision": 0.42201834862385323, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.671875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 107.1644058227539, + "eval_allNLI-dev_manhattan_ap": 0.45547703653651306, + "eval_allNLI-dev_manhattan_f1": 0.5475285171102661, + "eval_allNLI-dev_manhattan_f1_threshold": 226.54490661621094, + "eval_allNLI-dev_manhattan_precision": 0.40793201133144474, + "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, + "eval_allNLI-dev_max_accuracy": 0.671875, + "eval_allNLI-dev_max_accuracy_threshold": 524.80029296875, + "eval_allNLI-dev_max_ap": 0.45547703653651306, + "eval_allNLI-dev_max_f1": 0.552, + "eval_allNLI-dev_max_f1_threshold": 341.9022216796875, + "eval_allNLI-dev_max_precision": 0.42201834862385323, + "eval_allNLI-dev_max_recall": 0.9710982658959537, + "eval_sequential_score": 0.6760180782489211, + "eval_sts-test_pearson_cosine": 0.3078712504181006, + "eval_sts-test_pearson_dot": 0.30687493336995575, + "eval_sts-test_pearson_euclidean": 0.3149216629485325, + "eval_sts-test_pearson_manhattan": 0.32260490964216404, + "eval_sts-test_pearson_max": 0.32260490964216404, + "eval_sts-test_spearman_cosine": 0.3756490245649231, + "eval_sts-test_spearman_dot": 0.32074983288662573, + "eval_sts-test_spearman_euclidean": 0.34933389557767713, + "eval_sts-test_spearman_manhattan": 0.35486430121168766, + "eval_sts-test_spearman_max": 0.3756490245649231, + "eval_vitaminc-pairs_loss": 2.734696388244629, + "eval_vitaminc-pairs_runtime": 3.1523, + "eval_vitaminc-pairs_samples_per_second": 40.606, + "eval_vitaminc-pairs_steps_per_second": 0.317, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_negation-triplets_loss": 3.099722146987915, + "eval_negation-triplets_runtime": 0.7375, + "eval_negation-triplets_samples_per_second": 173.567, + "eval_negation-triplets_steps_per_second": 1.356, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_scitail-pairs-pos_loss": 0.6654092073440552, + "eval_scitail-pairs-pos_runtime": 0.7978, + "eval_scitail-pairs-pos_samples_per_second": 160.446, + "eval_scitail-pairs-pos_steps_per_second": 1.253, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_scitail-pairs-qa_loss": 0.4432713985443115, + "eval_scitail-pairs-qa_runtime": 0.5613, + "eval_scitail-pairs-qa_samples_per_second": 228.051, + "eval_scitail-pairs-qa_steps_per_second": 1.782, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_xsum-pairs_loss": 4.390190601348877, + "eval_xsum-pairs_runtime": 3.0147, + "eval_xsum-pairs_samples_per_second": 42.459, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_sciq_pairs_loss": 0.43316569924354553, + "eval_sciq_pairs_runtime": 3.4401, + "eval_sciq_pairs_samples_per_second": 37.208, + "eval_sciq_pairs_steps_per_second": 0.291, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_qasc_pairs_loss": 2.034595251083374, + "eval_qasc_pairs_runtime": 0.5991, + "eval_qasc_pairs_samples_per_second": 213.665, + "eval_qasc_pairs_steps_per_second": 1.669, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_openbookqa_pairs_loss": 3.2861831188201904, + "eval_openbookqa_pairs_runtime": 0.5738, + "eval_openbookqa_pairs_samples_per_second": 223.062, + "eval_openbookqa_pairs_steps_per_second": 1.743, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_msmarco_pairs_loss": 5.0437421798706055, + "eval_msmarco_pairs_runtime": 1.529, + "eval_msmarco_pairs_samples_per_second": 83.713, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_nq_pairs_loss": 5.238871097564697, + "eval_nq_pairs_runtime": 2.9133, + "eval_nq_pairs_samples_per_second": 43.936, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_trivia_pairs_loss": 5.040083885192871, + "eval_trivia_pairs_runtime": 3.4307, + "eval_trivia_pairs_samples_per_second": 37.311, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_gooaq_pairs_loss": 4.043346881866455, + "eval_gooaq_pairs_runtime": 0.947, + "eval_gooaq_pairs_samples_per_second": 135.164, + "eval_gooaq_pairs_steps_per_second": 1.056, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_paws-pos_loss": 0.15505897998809814, + "eval_paws-pos_runtime": 0.6791, + "eval_paws-pos_samples_per_second": 188.472, + "eval_paws-pos_steps_per_second": 1.472, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_global_dataset_loss": 2.481849193572998, + "eval_global_dataset_runtime": 13.3357, + "eval_global_dataset_samples_per_second": 31.194, + "eval_global_dataset_steps_per_second": 0.3, + "step": 180 + }, + { + "epoch": 0.18621399176954734, + "grad_norm": 23.770732879638672, + "learning_rate": 6.469366562824507e-06, + "loss": 4.1785, + "step": 181 + }, + { + "epoch": 0.18724279835390947, + "grad_norm": 15.494194984436035, + "learning_rate": 6.505711318795431e-06, + "loss": 2.8654, + "step": 182 + }, + { + "epoch": 0.1882716049382716, + "grad_norm": 15.12114429473877, + "learning_rate": 6.542056074766354e-06, + "loss": 2.9405, + "step": 183 + }, + { + "epoch": 0.18930041152263374, + "grad_norm": 14.066164016723633, + "learning_rate": 6.578400830737279e-06, + "loss": 2.6342, + "step": 184 + }, + { + "epoch": 0.19032921810699588, + "grad_norm": 15.717785835266113, + "learning_rate": 6.614745586708203e-06, + "loss": 3.8401, + "step": 185 + }, + { + "epoch": 0.19135802469135801, + "grad_norm": 13.966800689697266, + "learning_rate": 6.651090342679127e-06, + "loss": 3.6136, + "step": 186 + }, + { + "epoch": 0.19238683127572018, + "grad_norm": 15.663467407226562, + "learning_rate": 6.687435098650052e-06, + "loss": 3.0736, + "step": 187 + }, + { + "epoch": 0.1934156378600823, + "grad_norm": 26.5308837890625, + "learning_rate": 6.723779854620976e-06, + "loss": 5.4694, + "step": 188 + }, + { + "epoch": 0.19444444444444445, + "grad_norm": 15.630946159362793, + "learning_rate": 6.7601246105919e-06, + "loss": 2.7903, + "step": 189 + }, + { + "epoch": 0.19547325102880658, + "grad_norm": 20.857738494873047, + "learning_rate": 6.796469366562825e-06, + "loss": 3.8552, + "step": 190 + }, + { + "epoch": 0.19650205761316872, + "grad_norm": 21.9176082611084, + "learning_rate": 6.832814122533748e-06, + "loss": 4.133, + "step": 191 + }, + { + "epoch": 0.19753086419753085, + "grad_norm": 13.483908653259277, + "learning_rate": 6.869158878504672e-06, + "loss": 3.3172, + "step": 192 + }, + { + "epoch": 0.19855967078189302, + "grad_norm": 22.11282730102539, + "learning_rate": 6.905503634475597e-06, + "loss": 4.5717, + "step": 193 + }, + { + "epoch": 0.19958847736625515, + "grad_norm": 14.112103462219238, + "learning_rate": 6.941848390446521e-06, + "loss": 2.6895, + "step": 194 + }, + { + "epoch": 0.2006172839506173, + "grad_norm": 26.939516067504883, + "learning_rate": 6.978193146417445e-06, + "loss": 4.8326, + "step": 195 + }, + { + "epoch": 0.20164609053497942, + "grad_norm": 21.34830665588379, + "learning_rate": 7.01453790238837e-06, + "loss": 4.6939, + "step": 196 + }, + { + "epoch": 0.20267489711934156, + "grad_norm": 15.555133819580078, + "learning_rate": 7.050882658359294e-06, + "loss": 2.6114, + "step": 197 + }, + { + "epoch": 0.2037037037037037, + "grad_norm": 18.566102981567383, + "learning_rate": 7.087227414330217e-06, + "loss": 4.0035, + "step": 198 + }, + { + "epoch": 0.20473251028806586, + "grad_norm": 20.652629852294922, + "learning_rate": 7.123572170301142e-06, + "loss": 4.6457, + "step": 199 + }, + { + "epoch": 0.205761316872428, + "grad_norm": 13.168004989624023, + "learning_rate": 7.159916926272066e-06, + "loss": 2.8982, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_Qnli-dev_cosine_accuracy": 0.6640625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8493491411209106, + "eval_Qnli-dev_cosine_ap": 0.6558066754883848, + "eval_Qnli-dev_cosine_f1": 0.6813559322033899, + "eval_Qnli-dev_cosine_f1_threshold": 0.8149238228797913, + "eval_Qnli-dev_cosine_precision": 0.5677966101694916, + "eval_Qnli-dev_cosine_recall": 0.8516949152542372, + "eval_Qnli-dev_dot_accuracy": 0.625, + "eval_Qnli-dev_dot_accuracy_threshold": 415.6668395996094, + "eval_Qnli-dev_dot_ap": 0.5566403929041909, + "eval_Qnli-dev_dot_f1": 0.6607929515418502, + "eval_Qnli-dev_dot_f1_threshold": 356.9716491699219, + "eval_Qnli-dev_dot_precision": 0.5056179775280899, + "eval_Qnli-dev_dot_recall": 0.9533898305084746, + "eval_Qnli-dev_euclidean_accuracy": 0.666015625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 11.828110694885254, + "eval_Qnli-dev_euclidean_ap": 0.6719522122361475, + "eval_Qnli-dev_euclidean_f1": 0.6812080536912752, + "eval_Qnli-dev_euclidean_f1_threshold": 13.571544647216797, + "eval_Qnli-dev_euclidean_precision": 0.5638888888888889, + "eval_Qnli-dev_euclidean_recall": 0.8601694915254238, + "eval_Qnli-dev_manhattan_accuracy": 0.666015625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 251.22454833984375, + "eval_Qnli-dev_manhattan_ap": 0.6748374333603533, + "eval_Qnli-dev_manhattan_f1": 0.687813021702838, + "eval_Qnli-dev_manhattan_f1_threshold": 265.57940673828125, + "eval_Qnli-dev_manhattan_precision": 0.5674931129476584, + "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, + "eval_Qnli-dev_max_accuracy": 0.666015625, + "eval_Qnli-dev_max_accuracy_threshold": 415.6668395996094, + "eval_Qnli-dev_max_ap": 0.6748374333603533, + "eval_Qnli-dev_max_f1": 0.687813021702838, + "eval_Qnli-dev_max_f1_threshold": 356.9716491699219, + "eval_Qnli-dev_max_precision": 0.5677966101694916, + "eval_Qnli-dev_max_recall": 0.9533898305084746, + "eval_allNLI-dev_cosine_accuracy": 0.673828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9595370292663574, + "eval_allNLI-dev_cosine_ap": 0.4708419415503453, + "eval_allNLI-dev_cosine_f1": 0.5435684647302905, + "eval_allNLI-dev_cosine_f1_threshold": 0.8678731918334961, + "eval_allNLI-dev_cosine_precision": 0.42394822006472493, + "eval_allNLI-dev_cosine_recall": 0.7572254335260116, + "eval_allNLI-dev_dot_accuracy": 0.669921875, + "eval_allNLI-dev_dot_accuracy_threshold": 541.4039306640625, + "eval_allNLI-dev_dot_ap": 0.4068383335197935, + "eval_allNLI-dev_dot_f1": 0.521875, + "eval_allNLI-dev_dot_f1_threshold": 372.6112060546875, + "eval_allNLI-dev_dot_precision": 0.3576017130620985, + "eval_allNLI-dev_dot_recall": 0.9653179190751445, + "eval_allNLI-dev_euclidean_accuracy": 0.673828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 6.293747901916504, + "eval_allNLI-dev_euclidean_ap": 0.47115509667092914, + "eval_allNLI-dev_euclidean_f1": 0.544, + "eval_allNLI-dev_euclidean_f1_threshold": 11.993677139282227, + "eval_allNLI-dev_euclidean_precision": 0.41590214067278286, + "eval_allNLI-dev_euclidean_recall": 0.7861271676300579, + "eval_allNLI-dev_manhattan_accuracy": 0.67578125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 141.41348266601562, + "eval_allNLI-dev_manhattan_ap": 0.4773047143908946, + "eval_allNLI-dev_manhattan_f1": 0.5521235521235521, + "eval_allNLI-dev_manhattan_f1_threshold": 238.16036987304688, + "eval_allNLI-dev_manhattan_precision": 0.4144927536231884, + "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, + "eval_allNLI-dev_max_accuracy": 0.67578125, + "eval_allNLI-dev_max_accuracy_threshold": 541.4039306640625, + "eval_allNLI-dev_max_ap": 0.4773047143908946, + "eval_allNLI-dev_max_f1": 0.5521235521235521, + "eval_allNLI-dev_max_f1_threshold": 372.6112060546875, + "eval_allNLI-dev_max_precision": 0.42394822006472493, + "eval_allNLI-dev_max_recall": 0.9653179190751445, + "eval_sequential_score": 0.6748374333603533, + "eval_sts-test_pearson_cosine": 0.37650448121246105, + "eval_sts-test_pearson_dot": 0.34722884377459334, + "eval_sts-test_pearson_euclidean": 0.3832908339538646, + "eval_sts-test_pearson_manhattan": 0.3852968520690805, + "eval_sts-test_pearson_max": 0.3852968520690805, + "eval_sts-test_spearman_cosine": 0.4334008406493539, + "eval_sts-test_spearman_dot": 0.35710334107288355, + "eval_sts-test_spearman_euclidean": 0.408594276683612, + "eval_sts-test_spearman_manhattan": 0.4087942700707702, + "eval_sts-test_spearman_max": 0.4334008406493539, + "eval_vitaminc-pairs_loss": 2.7392194271087646, + "eval_vitaminc-pairs_runtime": 3.1693, + "eval_vitaminc-pairs_samples_per_second": 40.387, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_negation-triplets_loss": 2.8204259872436523, + "eval_negation-triplets_runtime": 0.7414, + "eval_negation-triplets_samples_per_second": 172.651, + "eval_negation-triplets_steps_per_second": 1.349, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_scitail-pairs-pos_loss": 0.5787031054496765, + "eval_scitail-pairs-pos_runtime": 0.8028, + "eval_scitail-pairs-pos_samples_per_second": 159.448, + "eval_scitail-pairs-pos_steps_per_second": 1.246, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_scitail-pairs-qa_loss": 0.3260263204574585, + "eval_scitail-pairs-qa_runtime": 0.5665, + "eval_scitail-pairs-qa_samples_per_second": 225.934, + "eval_scitail-pairs-qa_steps_per_second": 1.765, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_xsum-pairs_loss": 3.6776349544525146, + "eval_xsum-pairs_runtime": 3.0162, + "eval_xsum-pairs_samples_per_second": 42.437, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_sciq_pairs_loss": 0.3696608543395996, + "eval_sciq_pairs_runtime": 3.4228, + "eval_sciq_pairs_samples_per_second": 37.396, + "eval_sciq_pairs_steps_per_second": 0.292, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_qasc_pairs_loss": 1.7308318614959717, + "eval_qasc_pairs_runtime": 0.5951, + "eval_qasc_pairs_samples_per_second": 215.086, + "eval_qasc_pairs_steps_per_second": 1.68, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_openbookqa_pairs_loss": 2.8745713233947754, + "eval_openbookqa_pairs_runtime": 0.5756, + "eval_openbookqa_pairs_samples_per_second": 222.393, + "eval_openbookqa_pairs_steps_per_second": 1.737, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_msmarco_pairs_loss": 4.048874378204346, + "eval_msmarco_pairs_runtime": 1.5159, + "eval_msmarco_pairs_samples_per_second": 84.439, + "eval_msmarco_pairs_steps_per_second": 0.66, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_nq_pairs_loss": 4.402989387512207, + "eval_nq_pairs_runtime": 2.8983, + "eval_nq_pairs_samples_per_second": 44.163, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_trivia_pairs_loss": 4.454685688018799, + "eval_trivia_pairs_runtime": 3.4474, + "eval_trivia_pairs_samples_per_second": 37.129, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_gooaq_pairs_loss": 3.200054168701172, + "eval_gooaq_pairs_runtime": 0.956, + "eval_gooaq_pairs_samples_per_second": 133.894, + "eval_gooaq_pairs_steps_per_second": 1.046, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_paws-pos_loss": 0.17940819263458252, + "eval_paws-pos_runtime": 0.6752, + "eval_paws-pos_samples_per_second": 189.577, + "eval_paws-pos_steps_per_second": 1.481, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_global_dataset_loss": 2.0389692783355713, + "eval_global_dataset_runtime": 13.3595, + "eval_global_dataset_samples_per_second": 31.139, + "eval_global_dataset_steps_per_second": 0.299, + "step": 200 + }, + { + "epoch": 0.20679012345679013, + "grad_norm": 20.6440372467041, + "learning_rate": 7.19626168224299e-06, + "loss": 4.1212, + "step": 201 + }, + { + "epoch": 0.20781893004115226, + "grad_norm": 18.542131423950195, + "learning_rate": 7.232606438213915e-06, + "loss": 3.712, + "step": 202 + }, + { + "epoch": 0.2088477366255144, + "grad_norm": 13.649810791015625, + "learning_rate": 7.268951194184839e-06, + "loss": 2.4475, + "step": 203 + }, + { + "epoch": 0.20987654320987653, + "grad_norm": 14.254504203796387, + "learning_rate": 7.305295950155763e-06, + "loss": 2.0375, + "step": 204 + }, + { + "epoch": 0.2109053497942387, + "grad_norm": 16.941804885864258, + "learning_rate": 7.341640706126687e-06, + "loss": 3.6423, + "step": 205 + }, + { + "epoch": 0.21193415637860083, + "grad_norm": 24.76467514038086, + "learning_rate": 7.3779854620976116e-06, + "loss": 5.0227, + "step": 206 + }, + { + "epoch": 0.21296296296296297, + "grad_norm": 27.70640754699707, + "learning_rate": 7.414330218068535e-06, + "loss": 4.743, + "step": 207 + }, + { + "epoch": 0.2139917695473251, + "grad_norm": 19.96710777282715, + "learning_rate": 7.450674974039459e-06, + "loss": 4.502, + "step": 208 + }, + { + "epoch": 0.21502057613168724, + "grad_norm": 13.25556468963623, + "learning_rate": 7.487019730010384e-06, + "loss": 2.2948, + "step": 209 + }, + { + "epoch": 0.21604938271604937, + "grad_norm": 14.281882286071777, + "learning_rate": 7.523364485981308e-06, + "loss": 3.3056, + "step": 210 + }, + { + "epoch": 0.21707818930041153, + "grad_norm": 12.938163757324219, + "learning_rate": 7.559709241952232e-06, + "loss": 2.1324, + "step": 211 + }, + { + "epoch": 0.21810699588477367, + "grad_norm": 13.252862930297852, + "learning_rate": 7.5960539979231565e-06, + "loss": 2.2595, + "step": 212 + }, + { + "epoch": 0.2191358024691358, + "grad_norm": 13.162984848022461, + "learning_rate": 7.63239875389408e-06, + "loss": 2.3108, + "step": 213 + }, + { + "epoch": 0.22016460905349794, + "grad_norm": 12.221834182739258, + "learning_rate": 7.668743509865004e-06, + "loss": 2.7378, + "step": 214 + }, + { + "epoch": 0.22119341563786007, + "grad_norm": 12.927008628845215, + "learning_rate": 7.70508826583593e-06, + "loss": 2.3095, + "step": 215 + }, + { + "epoch": 0.2222222222222222, + "grad_norm": 18.692045211791992, + "learning_rate": 7.741433021806853e-06, + "loss": 3.4556, + "step": 216 + }, + { + "epoch": 0.22325102880658437, + "grad_norm": 15.472359657287598, + "learning_rate": 7.777777777777777e-06, + "loss": 2.9786, + "step": 217 + }, + { + "epoch": 0.2242798353909465, + "grad_norm": 12.372124671936035, + "learning_rate": 7.814122533748701e-06, + "loss": 1.9408, + "step": 218 + }, + { + "epoch": 0.22530864197530864, + "grad_norm": 13.370574951171875, + "learning_rate": 7.850467289719626e-06, + "loss": 2.9416, + "step": 219 + }, + { + "epoch": 0.22633744855967078, + "grad_norm": 12.76834487915039, + "learning_rate": 7.88681204569055e-06, + "loss": 2.0087, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_Qnli-dev_cosine_accuracy": 0.666015625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8367502689361572, + "eval_Qnli-dev_cosine_ap": 0.657497384714659, + "eval_Qnli-dev_cosine_f1": 0.6850828729281769, + "eval_Qnli-dev_cosine_f1_threshold": 0.8328432440757751, + "eval_Qnli-dev_cosine_precision": 0.6058631921824105, + "eval_Qnli-dev_cosine_recall": 0.788135593220339, + "eval_Qnli-dev_dot_accuracy": 0.62109375, + "eval_Qnli-dev_dot_accuracy_threshold": 443.7711181640625, + "eval_Qnli-dev_dot_ap": 0.561375704126675, + "eval_Qnli-dev_dot_f1": 0.6607407407407409, + "eval_Qnli-dev_dot_f1_threshold": 383.77728271484375, + "eval_Qnli-dev_dot_precision": 0.5079726651480638, + "eval_Qnli-dev_dot_recall": 0.9449152542372882, + "eval_Qnli-dev_euclidean_accuracy": 0.666015625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.143888473510742, + "eval_Qnli-dev_euclidean_ap": 0.669642308468768, + "eval_Qnli-dev_euclidean_f1": 0.6845637583892616, + "eval_Qnli-dev_euclidean_f1_threshold": 14.205205917358398, + "eval_Qnli-dev_euclidean_precision": 0.5666666666666667, + "eval_Qnli-dev_euclidean_recall": 0.864406779661017, + "eval_Qnli-dev_manhattan_accuracy": 0.658203125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 212.78713989257812, + "eval_Qnli-dev_manhattan_ap": 0.6716707737018695, + "eval_Qnli-dev_manhattan_f1": 0.6907894736842105, + "eval_Qnli-dev_manhattan_f1_threshold": 283.0830383300781, + "eval_Qnli-dev_manhattan_precision": 0.5645161290322581, + "eval_Qnli-dev_manhattan_recall": 0.8898305084745762, + "eval_Qnli-dev_max_accuracy": 0.666015625, + "eval_Qnli-dev_max_accuracy_threshold": 443.7711181640625, + "eval_Qnli-dev_max_ap": 0.6716707737018695, + "eval_Qnli-dev_max_f1": 0.6907894736842105, + "eval_Qnli-dev_max_f1_threshold": 383.77728271484375, + "eval_Qnli-dev_max_precision": 0.6058631921824105, + "eval_Qnli-dev_max_recall": 0.9449152542372882, + "eval_allNLI-dev_cosine_accuracy": 0.677734375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9583283066749573, + "eval_allNLI-dev_cosine_ap": 0.4885246068143708, + "eval_allNLI-dev_cosine_f1": 0.5432937181663838, + "eval_allNLI-dev_cosine_f1_threshold": 0.8263977766036987, + "eval_allNLI-dev_cosine_precision": 0.38461538461538464, + "eval_allNLI-dev_cosine_recall": 0.9248554913294798, + "eval_allNLI-dev_dot_accuracy": 0.66796875, + "eval_allNLI-dev_dot_accuracy_threshold": 568.2660522460938, + "eval_allNLI-dev_dot_ap": 0.4122244378970726, + "eval_allNLI-dev_dot_f1": 0.5243328100470958, + "eval_allNLI-dev_dot_f1_threshold": 404.83544921875, + "eval_allNLI-dev_dot_precision": 0.3599137931034483, + "eval_allNLI-dev_dot_recall": 0.9653179190751445, + "eval_allNLI-dev_euclidean_accuracy": 0.673828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 5.9007158279418945, + "eval_allNLI-dev_euclidean_ap": 0.4870900115925687, + "eval_allNLI-dev_euclidean_f1": 0.5494880546075086, + "eval_allNLI-dev_euclidean_f1_threshold": 13.606775283813477, + "eval_allNLI-dev_euclidean_precision": 0.3898305084745763, + "eval_allNLI-dev_euclidean_recall": 0.930635838150289, + "eval_allNLI-dev_manhattan_accuracy": 0.6796875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 149.96470642089844, + "eval_allNLI-dev_manhattan_ap": 0.48775989657604024, + "eval_allNLI-dev_manhattan_f1": 0.5509433962264151, + "eval_allNLI-dev_manhattan_f1_threshold": 245.671875, + "eval_allNLI-dev_manhattan_precision": 0.40896358543417366, + "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, + "eval_allNLI-dev_max_accuracy": 0.6796875, + "eval_allNLI-dev_max_accuracy_threshold": 568.2660522460938, + "eval_allNLI-dev_max_ap": 0.4885246068143708, + "eval_allNLI-dev_max_f1": 0.5509433962264151, + "eval_allNLI-dev_max_f1_threshold": 404.83544921875, + "eval_allNLI-dev_max_precision": 0.40896358543417366, + "eval_allNLI-dev_max_recall": 0.9653179190751445, + "eval_sequential_score": 0.6716707737018695, + "eval_sts-test_pearson_cosine": 0.47370132819582667, + "eval_sts-test_pearson_dot": 0.4090770475954118, + "eval_sts-test_pearson_euclidean": 0.47821395607635725, + "eval_sts-test_pearson_manhattan": 0.4805462866477066, + "eval_sts-test_pearson_max": 0.4805462866477066, + "eval_sts-test_spearman_cosine": 0.5169709124658022, + "eval_sts-test_spearman_dot": 0.4142341886542473, + "eval_sts-test_spearman_euclidean": 0.4963594659966741, + "eval_sts-test_spearman_manhattan": 0.49648047340747653, + "eval_sts-test_spearman_max": 0.5169709124658022, + "eval_vitaminc-pairs_loss": 2.7133967876434326, + "eval_vitaminc-pairs_runtime": 3.1964, + "eval_vitaminc-pairs_samples_per_second": 40.045, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_negation-triplets_loss": 2.680220603942871, + "eval_negation-triplets_runtime": 0.7348, + "eval_negation-triplets_samples_per_second": 174.208, + "eval_negation-triplets_steps_per_second": 1.361, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_scitail-pairs-pos_loss": 0.5108461380004883, + "eval_scitail-pairs-pos_runtime": 0.8205, + "eval_scitail-pairs-pos_samples_per_second": 155.993, + "eval_scitail-pairs-pos_steps_per_second": 1.219, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_scitail-pairs-qa_loss": 0.27794376015663147, + "eval_scitail-pairs-qa_runtime": 0.5655, + "eval_scitail-pairs-qa_samples_per_second": 226.329, + "eval_scitail-pairs-qa_steps_per_second": 1.768, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_xsum-pairs_loss": 2.8043901920318604, + "eval_xsum-pairs_runtime": 3.0149, + "eval_xsum-pairs_samples_per_second": 42.456, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_sciq_pairs_loss": 0.32811373472213745, + "eval_sciq_pairs_runtime": 3.3848, + "eval_sciq_pairs_samples_per_second": 37.816, + "eval_sciq_pairs_steps_per_second": 0.295, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_qasc_pairs_loss": 1.6458420753479004, + "eval_qasc_pairs_runtime": 0.5968, + "eval_qasc_pairs_samples_per_second": 214.461, + "eval_qasc_pairs_steps_per_second": 1.675, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_openbookqa_pairs_loss": 2.5592660903930664, + "eval_openbookqa_pairs_runtime": 0.5683, + "eval_openbookqa_pairs_samples_per_second": 225.246, + "eval_openbookqa_pairs_steps_per_second": 1.76, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_msmarco_pairs_loss": 3.5379371643066406, + "eval_msmarco_pairs_runtime": 1.5102, + "eval_msmarco_pairs_samples_per_second": 84.756, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_nq_pairs_loss": 3.847370147705078, + "eval_nq_pairs_runtime": 2.8908, + "eval_nq_pairs_samples_per_second": 44.278, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_trivia_pairs_loss": 3.6181681156158447, + "eval_trivia_pairs_runtime": 3.4242, + "eval_trivia_pairs_samples_per_second": 37.381, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_gooaq_pairs_loss": 2.7982828617095947, + "eval_gooaq_pairs_runtime": 0.9365, + "eval_gooaq_pairs_samples_per_second": 136.676, + "eval_gooaq_pairs_steps_per_second": 1.068, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_paws-pos_loss": 0.1660136878490448, + "eval_paws-pos_runtime": 0.6838, + "eval_paws-pos_samples_per_second": 187.193, + "eval_paws-pos_steps_per_second": 1.462, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_global_dataset_loss": 1.749915361404419, + "eval_global_dataset_runtime": 13.3423, + "eval_global_dataset_samples_per_second": 31.179, + "eval_global_dataset_steps_per_second": 0.3, + "step": 220 + }, + { + "epoch": 0.2273662551440329, + "grad_norm": 20.361539840698242, + "learning_rate": 7.923156801661474e-06, + "loss": 3.9528, + "step": 221 + }, + { + "epoch": 0.22839506172839505, + "grad_norm": 15.792684555053711, + "learning_rate": 7.959501557632398e-06, + "loss": 1.1222, + "step": 222 + }, + { + "epoch": 0.2294238683127572, + "grad_norm": 15.900016784667969, + "learning_rate": 7.995846313603322e-06, + "loss": 1.2289, + "step": 223 + }, + { + "epoch": 0.23045267489711935, + "grad_norm": 14.649103164672852, + "learning_rate": 8.032191069574247e-06, + "loss": 1.193, + "step": 224 + }, + { + "epoch": 0.23148148148148148, + "grad_norm": 24.6876277923584, + "learning_rate": 8.068535825545171e-06, + "loss": 3.8686, + "step": 225 + }, + { + "epoch": 0.23251028806584362, + "grad_norm": 7.946255683898926, + "learning_rate": 8.104880581516094e-06, + "loss": 0.3667, + "step": 226 + }, + { + "epoch": 0.23353909465020575, + "grad_norm": 14.419116020202637, + "learning_rate": 8.14122533748702e-06, + "loss": 2.1571, + "step": 227 + }, + { + "epoch": 0.2345679012345679, + "grad_norm": 13.824968338012695, + "learning_rate": 8.177570093457943e-06, + "loss": 2.1233, + "step": 228 + }, + { + "epoch": 0.23559670781893005, + "grad_norm": 15.247499465942383, + "learning_rate": 8.213914849428867e-06, + "loss": 3.0183, + "step": 229 + }, + { + "epoch": 0.2366255144032922, + "grad_norm": 7.50793981552124, + "learning_rate": 8.250259605399791e-06, + "loss": 0.4344, + "step": 230 + }, + { + "epoch": 0.23765432098765432, + "grad_norm": 13.505939483642578, + "learning_rate": 8.286604361370715e-06, + "loss": 2.6828, + "step": 231 + }, + { + "epoch": 0.23868312757201646, + "grad_norm": 26.35114288330078, + "learning_rate": 8.32294911734164e-06, + "loss": 4.1836, + "step": 232 + }, + { + "epoch": 0.2397119341563786, + "grad_norm": 15.033428192138672, + "learning_rate": 8.359293873312565e-06, + "loss": 1.8063, + "step": 233 + }, + { + "epoch": 0.24074074074074073, + "grad_norm": 15.326811790466309, + "learning_rate": 8.395638629283488e-06, + "loss": 3.4165, + "step": 234 + }, + { + "epoch": 0.2417695473251029, + "grad_norm": 17.31609344482422, + "learning_rate": 8.431983385254412e-06, + "loss": 3.1603, + "step": 235 + }, + { + "epoch": 0.24279835390946503, + "grad_norm": 23.062973022460938, + "learning_rate": 8.468328141225337e-06, + "loss": 2.9251, + "step": 236 + }, + { + "epoch": 0.24382716049382716, + "grad_norm": 15.594389915466309, + "learning_rate": 8.504672897196261e-06, + "loss": 1.726, + "step": 237 + }, + { + "epoch": 0.2448559670781893, + "grad_norm": 10.86409854888916, + "learning_rate": 8.541017653167185e-06, + "loss": 0.7677, + "step": 238 + }, + { + "epoch": 0.24588477366255143, + "grad_norm": 24.200529098510742, + "learning_rate": 8.57736240913811e-06, + "loss": 3.7962, + "step": 239 + }, + { + "epoch": 0.24691358024691357, + "grad_norm": 23.083824157714844, + "learning_rate": 8.613707165109033e-06, + "loss": 3.6562, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_Qnli-dev_cosine_accuracy": 0.677734375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8187533020973206, + "eval_Qnli-dev_cosine_ap": 0.6835720202375161, + "eval_Qnli-dev_cosine_f1": 0.6900958466453675, + "eval_Qnli-dev_cosine_f1_threshold": 0.7529827356338501, + "eval_Qnli-dev_cosine_precision": 0.5538461538461539, + "eval_Qnli-dev_cosine_recall": 0.9152542372881356, + "eval_Qnli-dev_dot_accuracy": 0.62890625, + "eval_Qnli-dev_dot_accuracy_threshold": 405.27545166015625, + "eval_Qnli-dev_dot_ap": 0.5877863096211339, + "eval_Qnli-dev_dot_f1": 0.6722408026755853, + "eval_Qnli-dev_dot_f1_threshold": 367.1414794921875, + "eval_Qnli-dev_dot_precision": 0.5552486187845304, + "eval_Qnli-dev_dot_recall": 0.8516949152542372, + "eval_Qnli-dev_euclidean_accuracy": 0.6796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.904159545898438, + "eval_Qnli-dev_euclidean_ap": 0.6913218676999153, + "eval_Qnli-dev_euclidean_f1": 0.6865148861646235, + "eval_Qnli-dev_euclidean_f1_threshold": 14.621212005615234, + "eval_Qnli-dev_euclidean_precision": 0.5850746268656717, + "eval_Qnli-dev_euclidean_recall": 0.8305084745762712, + "eval_Qnli-dev_manhattan_accuracy": 0.677734375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 254.7897491455078, + "eval_Qnli-dev_manhattan_ap": 0.698230594032758, + "eval_Qnli-dev_manhattan_f1": 0.6965517241379311, + "eval_Qnli-dev_manhattan_f1_threshold": 293.552734375, + "eval_Qnli-dev_manhattan_precision": 0.5872093023255814, + "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, + "eval_Qnli-dev_max_accuracy": 0.6796875, + "eval_Qnli-dev_max_accuracy_threshold": 405.27545166015625, + "eval_Qnli-dev_max_ap": 0.698230594032758, + "eval_Qnli-dev_max_f1": 0.6965517241379311, + "eval_Qnli-dev_max_f1_threshold": 367.1414794921875, + "eval_Qnli-dev_max_precision": 0.5872093023255814, + "eval_Qnli-dev_max_recall": 0.9152542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.6796875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9396699666976929, + "eval_allNLI-dev_cosine_ap": 0.49305382035680395, + "eval_allNLI-dev_cosine_f1": 0.5477477477477477, + "eval_allNLI-dev_cosine_f1_threshold": 0.8165856003761292, + "eval_allNLI-dev_cosine_precision": 0.39790575916230364, + "eval_allNLI-dev_cosine_recall": 0.8786127167630058, + "eval_allNLI-dev_dot_accuracy": 0.66796875, + "eval_allNLI-dev_dot_accuracy_threshold": 519.2574462890625, + "eval_allNLI-dev_dot_ap": 0.41169754525200597, + "eval_allNLI-dev_dot_f1": 0.5198098256735342, + "eval_allNLI-dev_dot_f1_threshold": 369.37896728515625, + "eval_allNLI-dev_dot_precision": 0.35807860262008734, + "eval_allNLI-dev_dot_recall": 0.9479768786127167, + "eval_allNLI-dev_euclidean_accuracy": 0.681640625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 8.951443672180176, + "eval_allNLI-dev_euclidean_ap": 0.4972712047553571, + "eval_allNLI-dev_euclidean_f1": 0.5478424015009381, + "eval_allNLI-dev_euclidean_f1_threshold": 13.30331802368164, + "eval_allNLI-dev_euclidean_precision": 0.40555555555555556, + "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, + "eval_allNLI-dev_manhattan_accuracy": 0.681640625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 168.29837036132812, + "eval_allNLI-dev_manhattan_ap": 0.49849435311963386, + "eval_allNLI-dev_manhattan_f1": 0.555765595463138, + "eval_allNLI-dev_manhattan_f1_threshold": 261.11334228515625, + "eval_allNLI-dev_manhattan_precision": 0.41292134831460675, + "eval_allNLI-dev_manhattan_recall": 0.8497109826589595, + "eval_allNLI-dev_max_accuracy": 0.681640625, + "eval_allNLI-dev_max_accuracy_threshold": 519.2574462890625, + "eval_allNLI-dev_max_ap": 0.49849435311963386, + "eval_allNLI-dev_max_f1": 0.555765595463138, + "eval_allNLI-dev_max_f1_threshold": 369.37896728515625, + "eval_allNLI-dev_max_precision": 0.41292134831460675, + "eval_allNLI-dev_max_recall": 0.9479768786127167, + "eval_sequential_score": 0.698230594032758, + "eval_sts-test_pearson_cosine": 0.5422399822302852, + "eval_sts-test_pearson_dot": 0.47800101935982187, + "eval_sts-test_pearson_euclidean": 0.5410879554786593, + "eval_sts-test_pearson_manhattan": 0.5428179293731825, + "eval_sts-test_pearson_max": 0.5428179293731825, + "eval_sts-test_spearman_cosine": 0.5703833329868931, + "eval_sts-test_spearman_dot": 0.48296505545213714, + "eval_sts-test_spearman_euclidean": 0.5529472570210532, + "eval_sts-test_spearman_manhattan": 0.5558088764307263, + "eval_sts-test_spearman_max": 0.5703833329868931, + "eval_vitaminc-pairs_loss": 2.7294111251831055, + "eval_vitaminc-pairs_runtime": 3.173, + "eval_vitaminc-pairs_samples_per_second": 40.34, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_negation-triplets_loss": 2.5460636615753174, + "eval_negation-triplets_runtime": 0.7253, + "eval_negation-triplets_samples_per_second": 176.49, + "eval_negation-triplets_steps_per_second": 1.379, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_scitail-pairs-pos_loss": 0.45181718468666077, + "eval_scitail-pairs-pos_runtime": 0.7815, + "eval_scitail-pairs-pos_samples_per_second": 163.781, + "eval_scitail-pairs-pos_steps_per_second": 1.28, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_scitail-pairs-qa_loss": 0.11772796511650085, + "eval_scitail-pairs-qa_runtime": 0.5646, + "eval_scitail-pairs-qa_samples_per_second": 226.703, + "eval_scitail-pairs-qa_steps_per_second": 1.771, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_xsum-pairs_loss": 2.541783571243286, + "eval_xsum-pairs_runtime": 3.0187, + "eval_xsum-pairs_samples_per_second": 42.402, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_sciq_pairs_loss": 0.2976074516773224, + "eval_sciq_pairs_runtime": 3.4151, + "eval_sciq_pairs_samples_per_second": 37.48, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_qasc_pairs_loss": 1.538482427597046, + "eval_qasc_pairs_runtime": 0.5897, + "eval_qasc_pairs_samples_per_second": 217.058, + "eval_qasc_pairs_steps_per_second": 1.696, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_openbookqa_pairs_loss": 2.2948145866394043, + "eval_openbookqa_pairs_runtime": 0.5656, + "eval_openbookqa_pairs_samples_per_second": 226.301, + "eval_openbookqa_pairs_steps_per_second": 1.768, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_msmarco_pairs_loss": 3.3644864559173584, + "eval_msmarco_pairs_runtime": 1.5156, + "eval_msmarco_pairs_samples_per_second": 84.457, + "eval_msmarco_pairs_steps_per_second": 0.66, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_nq_pairs_loss": 3.7770235538482666, + "eval_nq_pairs_runtime": 2.9074, + "eval_nq_pairs_samples_per_second": 44.025, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_trivia_pairs_loss": 3.4960672855377197, + "eval_trivia_pairs_runtime": 3.4254, + "eval_trivia_pairs_samples_per_second": 37.368, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_gooaq_pairs_loss": 2.5963170528411865, + "eval_gooaq_pairs_runtime": 0.9352, + "eval_gooaq_pairs_samples_per_second": 136.874, + "eval_gooaq_pairs_steps_per_second": 1.069, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_paws-pos_loss": 0.09364856779575348, + "eval_paws-pos_runtime": 0.6826, + "eval_paws-pos_samples_per_second": 187.531, + "eval_paws-pos_steps_per_second": 1.465, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_global_dataset_loss": 1.6046679019927979, + "eval_global_dataset_runtime": 13.3573, + "eval_global_dataset_samples_per_second": 31.144, + "eval_global_dataset_steps_per_second": 0.299, + "step": 240 + }, + { + "epoch": 0.24794238683127573, + "grad_norm": 18.60301971435547, + "learning_rate": 8.650051921079957e-06, + "loss": 2.8804, + "step": 241 + }, + { + "epoch": 0.24897119341563786, + "grad_norm": 15.278526306152344, + "learning_rate": 8.686396677050883e-06, + "loss": 1.7607, + "step": 242 + }, + { + "epoch": 0.25, + "grad_norm": 23.43361473083496, + "learning_rate": 8.722741433021805e-06, + "loss": 3.877, + "step": 243 + }, + { + "epoch": 0.25102880658436216, + "grad_norm": 13.10021686553955, + "learning_rate": 8.75908618899273e-06, + "loss": 1.6768, + "step": 244 + }, + { + "epoch": 0.25205761316872427, + "grad_norm": 11.166913032531738, + "learning_rate": 8.795430944963655e-06, + "loss": 0.8368, + "step": 245 + }, + { + "epoch": 0.25308641975308643, + "grad_norm": 12.863570213317871, + "learning_rate": 8.831775700934577e-06, + "loss": 1.6864, + "step": 246 + }, + { + "epoch": 0.25411522633744854, + "grad_norm": 12.436037063598633, + "learning_rate": 8.868120456905503e-06, + "loss": 1.3478, + "step": 247 + }, + { + "epoch": 0.2551440329218107, + "grad_norm": 13.372901916503906, + "learning_rate": 8.904465212876427e-06, + "loss": 1.7764, + "step": 248 + }, + { + "epoch": 0.25617283950617287, + "grad_norm": 14.600456237792969, + "learning_rate": 8.94080996884735e-06, + "loss": 1.8495, + "step": 249 + }, + { + "epoch": 0.257201646090535, + "grad_norm": 5.363873481750488, + "learning_rate": 8.977154724818275e-06, + "loss": 0.285, + "step": 250 + }, + { + "epoch": 0.25823045267489714, + "grad_norm": 12.498610496520996, + "learning_rate": 9.0134994807892e-06, + "loss": 1.6516, + "step": 251 + }, + { + "epoch": 0.25925925925925924, + "grad_norm": 13.058953285217285, + "learning_rate": 9.049844236760123e-06, + "loss": 1.5781, + "step": 252 + }, + { + "epoch": 0.2602880658436214, + "grad_norm": 27.41628074645996, + "learning_rate": 9.086188992731047e-06, + "loss": 4.791, + "step": 253 + }, + { + "epoch": 0.2613168724279835, + "grad_norm": 21.457780838012695, + "learning_rate": 9.122533748701973e-06, + "loss": 3.7502, + "step": 254 + }, + { + "epoch": 0.2623456790123457, + "grad_norm": 13.806361198425293, + "learning_rate": 9.158878504672895e-06, + "loss": 2.6088, + "step": 255 + }, + { + "epoch": 0.26337448559670784, + "grad_norm": 20.073028564453125, + "learning_rate": 9.195223260643821e-06, + "loss": 3.2271, + "step": 256 + }, + { + "epoch": 0.26440329218106995, + "grad_norm": 10.656987190246582, + "learning_rate": 9.231568016614745e-06, + "loss": 1.3969, + "step": 257 + }, + { + "epoch": 0.2654320987654321, + "grad_norm": 15.233261108398438, + "learning_rate": 9.267912772585667e-06, + "loss": 2.5281, + "step": 258 + }, + { + "epoch": 0.2664609053497942, + "grad_norm": 17.79701805114746, + "learning_rate": 9.304257528556593e-06, + "loss": 2.8561, + "step": 259 + }, + { + "epoch": 0.2674897119341564, + "grad_norm": 21.97925567626953, + "learning_rate": 9.340602284527517e-06, + "loss": 3.9495, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_Qnli-dev_cosine_accuracy": 0.689453125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8466764092445374, + "eval_Qnli-dev_cosine_ap": 0.6871140391818324, + "eval_Qnli-dev_cosine_f1": 0.6923076923076923, + "eval_Qnli-dev_cosine_f1_threshold": 0.7720080018043518, + "eval_Qnli-dev_cosine_precision": 0.5567010309278351, + "eval_Qnli-dev_cosine_recall": 0.9152542372881356, + "eval_Qnli-dev_dot_accuracy": 0.62890625, + "eval_Qnli-dev_dot_accuracy_threshold": 437.21807861328125, + "eval_Qnli-dev_dot_ap": 0.5853275156115014, + "eval_Qnli-dev_dot_f1": 0.6721581548599671, + "eval_Qnli-dev_dot_f1_threshold": 389.718017578125, + "eval_Qnli-dev_dot_precision": 0.5498652291105122, + "eval_Qnli-dev_dot_recall": 0.864406779661017, + "eval_Qnli-dev_euclidean_accuracy": 0.681640625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.65598201751709, + "eval_Qnli-dev_euclidean_ap": 0.6939343189181921, + "eval_Qnli-dev_euclidean_f1": 0.692436974789916, + "eval_Qnli-dev_euclidean_f1_threshold": 14.59019660949707, + "eval_Qnli-dev_euclidean_precision": 0.5738161559888579, + "eval_Qnli-dev_euclidean_recall": 0.8728813559322034, + "eval_Qnli-dev_manhattan_accuracy": 0.6796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 252.4490966796875, + "eval_Qnli-dev_manhattan_ap": 0.6988409881438541, + "eval_Qnli-dev_manhattan_f1": 0.6989966555183945, + "eval_Qnli-dev_manhattan_f1_threshold": 291.64801025390625, + "eval_Qnli-dev_manhattan_precision": 0.5773480662983426, + "eval_Qnli-dev_manhattan_recall": 0.885593220338983, + "eval_Qnli-dev_max_accuracy": 0.689453125, + "eval_Qnli-dev_max_accuracy_threshold": 437.21807861328125, + "eval_Qnli-dev_max_ap": 0.6988409881438541, + "eval_Qnli-dev_max_f1": 0.6989966555183945, + "eval_Qnli-dev_max_f1_threshold": 389.718017578125, + "eval_Qnli-dev_max_precision": 0.5773480662983426, + "eval_Qnli-dev_max_recall": 0.9152542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.6875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9255372285842896, + "eval_allNLI-dev_cosine_ap": 0.5019030389232606, + "eval_allNLI-dev_cosine_f1": 0.5451263537906137, + "eval_allNLI-dev_cosine_f1_threshold": 0.8344321250915527, + "eval_allNLI-dev_cosine_precision": 0.3963254593175853, + "eval_allNLI-dev_cosine_recall": 0.8728323699421965, + "eval_allNLI-dev_dot_accuracy": 0.666015625, + "eval_allNLI-dev_dot_accuracy_threshold": 537.950439453125, + "eval_allNLI-dev_dot_ap": 0.4104683717008714, + "eval_allNLI-dev_dot_f1": 0.5224111282843895, + "eval_allNLI-dev_dot_f1_threshold": 388.04473876953125, + "eval_allNLI-dev_dot_precision": 0.35654008438818563, + "eval_allNLI-dev_dot_recall": 0.976878612716763, + "eval_allNLI-dev_euclidean_accuracy": 0.68359375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 8.841395378112793, + "eval_allNLI-dev_euclidean_ap": 0.5047465503781696, + "eval_allNLI-dev_euclidean_f1": 0.5511811023622047, + "eval_allNLI-dev_euclidean_f1_threshold": 12.596972465515137, + "eval_allNLI-dev_euclidean_precision": 0.417910447761194, + "eval_allNLI-dev_euclidean_recall": 0.8092485549132948, + "eval_allNLI-dev_manhattan_accuracy": 0.689453125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 179.2438201904297, + "eval_allNLI-dev_manhattan_ap": 0.5057091077225248, + "eval_allNLI-dev_manhattan_f1": 0.5523012552301255, + "eval_allNLI-dev_manhattan_f1_threshold": 238.46624755859375, + "eval_allNLI-dev_manhattan_precision": 0.43278688524590164, + "eval_allNLI-dev_manhattan_recall": 0.7630057803468208, + "eval_allNLI-dev_max_accuracy": 0.689453125, + "eval_allNLI-dev_max_accuracy_threshold": 537.950439453125, + "eval_allNLI-dev_max_ap": 0.5057091077225248, + "eval_allNLI-dev_max_f1": 0.5523012552301255, + "eval_allNLI-dev_max_f1_threshold": 388.04473876953125, + "eval_allNLI-dev_max_precision": 0.43278688524590164, + "eval_allNLI-dev_max_recall": 0.976878612716763, + "eval_sequential_score": 0.6988409881438541, + "eval_sts-test_pearson_cosine": 0.6101007482373539, + "eval_sts-test_pearson_dot": 0.5171078610309542, + "eval_sts-test_pearson_euclidean": 0.6100203378514052, + "eval_sts-test_pearson_manhattan": 0.6071628090659706, + "eval_sts-test_pearson_max": 0.6101007482373539, + "eval_sts-test_spearman_cosine": 0.628839936686977, + "eval_sts-test_spearman_dot": 0.5099761113052808, + "eval_sts-test_spearman_euclidean": 0.6134216055947527, + "eval_sts-test_spearman_manhattan": 0.6119040008184474, + "eval_sts-test_spearman_max": 0.628839936686977, + "eval_vitaminc-pairs_loss": 2.755634069442749, + "eval_vitaminc-pairs_runtime": 3.1645, + "eval_vitaminc-pairs_samples_per_second": 40.448, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_negation-triplets_loss": 2.3755366802215576, + "eval_negation-triplets_runtime": 0.7383, + "eval_negation-triplets_samples_per_second": 173.38, + "eval_negation-triplets_steps_per_second": 1.355, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_scitail-pairs-pos_loss": 0.38322263956069946, + "eval_scitail-pairs-pos_runtime": 0.7914, + "eval_scitail-pairs-pos_samples_per_second": 161.747, + "eval_scitail-pairs-pos_steps_per_second": 1.264, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_scitail-pairs-qa_loss": 0.10294085741043091, + "eval_scitail-pairs-qa_runtime": 0.5772, + "eval_scitail-pairs-qa_samples_per_second": 221.746, + "eval_scitail-pairs-qa_steps_per_second": 1.732, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_xsum-pairs_loss": 2.2755026817321777, + "eval_xsum-pairs_runtime": 3.0186, + "eval_xsum-pairs_samples_per_second": 42.403, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_sciq_pairs_loss": 0.24845057725906372, + "eval_sciq_pairs_runtime": 3.4052, + "eval_sciq_pairs_samples_per_second": 37.59, + "eval_sciq_pairs_steps_per_second": 0.294, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_qasc_pairs_loss": 1.4006080627441406, + "eval_qasc_pairs_runtime": 0.5915, + "eval_qasc_pairs_samples_per_second": 216.401, + "eval_qasc_pairs_steps_per_second": 1.691, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_openbookqa_pairs_loss": 2.0268588066101074, + "eval_openbookqa_pairs_runtime": 0.5688, + "eval_openbookqa_pairs_samples_per_second": 225.017, + "eval_openbookqa_pairs_steps_per_second": 1.758, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_msmarco_pairs_loss": 2.9229013919830322, + "eval_msmarco_pairs_runtime": 1.5117, + "eval_msmarco_pairs_samples_per_second": 84.673, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_nq_pairs_loss": 3.4232370853424072, + "eval_nq_pairs_runtime": 2.8888, + "eval_nq_pairs_samples_per_second": 44.309, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_trivia_pairs_loss": 3.135023832321167, + "eval_trivia_pairs_runtime": 3.4305, + "eval_trivia_pairs_samples_per_second": 37.312, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_gooaq_pairs_loss": 2.3922266960144043, + "eval_gooaq_pairs_runtime": 0.9351, + "eval_gooaq_pairs_samples_per_second": 136.888, + "eval_gooaq_pairs_steps_per_second": 1.069, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_paws-pos_loss": 0.08843281120061874, + "eval_paws-pos_runtime": 0.6715, + "eval_paws-pos_samples_per_second": 190.608, + "eval_paws-pos_steps_per_second": 1.489, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_global_dataset_loss": 1.4206469058990479, + "eval_global_dataset_runtime": 13.3286, + "eval_global_dataset_samples_per_second": 31.211, + "eval_global_dataset_steps_per_second": 0.3, + "step": 260 + }, + { + "epoch": 0.26851851851851855, + "grad_norm": 12.555780410766602, + "learning_rate": 9.376947040498441e-06, + "loss": 1.7349, + "step": 261 + }, + { + "epoch": 0.26954732510288065, + "grad_norm": 13.038395881652832, + "learning_rate": 9.413291796469365e-06, + "loss": 1.7189, + "step": 262 + }, + { + "epoch": 0.2705761316872428, + "grad_norm": 13.202376365661621, + "learning_rate": 9.44963655244029e-06, + "loss": 1.7282, + "step": 263 + }, + { + "epoch": 0.2716049382716049, + "grad_norm": 17.815078735351562, + "learning_rate": 9.485981308411213e-06, + "loss": 2.9146, + "step": 264 + }, + { + "epoch": 0.2726337448559671, + "grad_norm": 13.262603759765625, + "learning_rate": 9.522326064382139e-06, + "loss": 1.7603, + "step": 265 + }, + { + "epoch": 0.2736625514403292, + "grad_norm": 12.508451461791992, + "learning_rate": 9.558670820353063e-06, + "loss": 1.823, + "step": 266 + }, + { + "epoch": 0.27469135802469136, + "grad_norm": 12.313492774963379, + "learning_rate": 9.595015576323985e-06, + "loss": 1.5984, + "step": 267 + }, + { + "epoch": 0.2757201646090535, + "grad_norm": 12.14000415802002, + "learning_rate": 9.631360332294911e-06, + "loss": 1.651, + "step": 268 + }, + { + "epoch": 0.2767489711934156, + "grad_norm": 14.698229789733887, + "learning_rate": 9.667705088265835e-06, + "loss": 2.615, + "step": 269 + }, + { + "epoch": 0.2777777777777778, + "grad_norm": 12.209722518920898, + "learning_rate": 9.704049844236759e-06, + "loss": 2.6608, + "step": 270 + }, + { + "epoch": 0.2788065843621399, + "grad_norm": 13.545384407043457, + "learning_rate": 9.740394600207683e-06, + "loss": 2.2558, + "step": 271 + }, + { + "epoch": 0.27983539094650206, + "grad_norm": 11.335700988769531, + "learning_rate": 9.776739356178609e-06, + "loss": 1.2155, + "step": 272 + }, + { + "epoch": 0.2808641975308642, + "grad_norm": 11.750332832336426, + "learning_rate": 9.813084112149531e-06, + "loss": 1.4727, + "step": 273 + }, + { + "epoch": 0.28189300411522633, + "grad_norm": 20.200624465942383, + "learning_rate": 9.849428868120455e-06, + "loss": 3.6394, + "step": 274 + }, + { + "epoch": 0.2829218106995885, + "grad_norm": 11.674026489257812, + "learning_rate": 9.88577362409138e-06, + "loss": 2.1385, + "step": 275 + }, + { + "epoch": 0.2839506172839506, + "grad_norm": 16.86899185180664, + "learning_rate": 9.922118380062303e-06, + "loss": 2.3953, + "step": 276 + }, + { + "epoch": 0.28497942386831276, + "grad_norm": 11.407617568969727, + "learning_rate": 9.958463136033229e-06, + "loss": 1.488, + "step": 277 + }, + { + "epoch": 0.28600823045267487, + "grad_norm": 11.607297897338867, + "learning_rate": 9.994807892004153e-06, + "loss": 1.2893, + "step": 278 + }, + { + "epoch": 0.28703703703703703, + "grad_norm": 11.757554054260254, + "learning_rate": 1.0031152647975077e-05, + "loss": 0.7678, + "step": 279 + }, + { + "epoch": 0.2880658436213992, + "grad_norm": 12.077320098876953, + "learning_rate": 1.0067497403946001e-05, + "loss": 2.6801, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_Qnli-dev_cosine_accuracy": 0.67578125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8415871262550354, + "eval_Qnli-dev_cosine_ap": 0.6912671058237406, + "eval_Qnli-dev_cosine_f1": 0.6869983948635633, + "eval_Qnli-dev_cosine_f1_threshold": 0.7735732197761536, + "eval_Qnli-dev_cosine_precision": 0.5529715762273901, + "eval_Qnli-dev_cosine_recall": 0.9067796610169492, + "eval_Qnli-dev_dot_accuracy": 0.62109375, + "eval_Qnli-dev_dot_accuracy_threshold": 432.6428527832031, + "eval_Qnli-dev_dot_ap": 0.5797796012757845, + "eval_Qnli-dev_dot_f1": 0.6719492868462758, + "eval_Qnli-dev_dot_f1_threshold": 393.2371826171875, + "eval_Qnli-dev_dot_precision": 0.5367088607594936, + "eval_Qnli-dev_dot_recall": 0.8983050847457628, + "eval_Qnli-dev_euclidean_accuracy": 0.67578125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 11.884414672851562, + "eval_Qnli-dev_euclidean_ap": 0.6992665036179804, + "eval_Qnli-dev_euclidean_f1": 0.6915584415584415, + "eval_Qnli-dev_euclidean_f1_threshold": 15.279256820678711, + "eval_Qnli-dev_euclidean_precision": 0.5605263157894737, + "eval_Qnli-dev_euclidean_recall": 0.902542372881356, + "eval_Qnli-dev_manhattan_accuracy": 0.67578125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 254.77352905273438, + "eval_Qnli-dev_manhattan_ap": 0.7020052347678023, + "eval_Qnli-dev_manhattan_f1": 0.6893039049235994, + "eval_Qnli-dev_manhattan_f1_threshold": 293.3916931152344, + "eval_Qnli-dev_manhattan_precision": 0.5750708215297451, + "eval_Qnli-dev_manhattan_recall": 0.8601694915254238, + "eval_Qnli-dev_max_accuracy": 0.67578125, + "eval_Qnli-dev_max_accuracy_threshold": 432.6428527832031, + "eval_Qnli-dev_max_ap": 0.7020052347678023, + "eval_Qnli-dev_max_f1": 0.6915584415584415, + "eval_Qnli-dev_max_f1_threshold": 393.2371826171875, + "eval_Qnli-dev_max_precision": 0.5750708215297451, + "eval_Qnli-dev_max_recall": 0.9067796610169492, + "eval_allNLI-dev_cosine_accuracy": 0.703125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.923446536064148, + "eval_allNLI-dev_cosine_ap": 0.5118589589722005, + "eval_allNLI-dev_cosine_f1": 0.5532786885245901, + "eval_allNLI-dev_cosine_f1_threshold": 0.849584698677063, + "eval_allNLI-dev_cosine_precision": 0.42857142857142855, + "eval_allNLI-dev_cosine_recall": 0.7803468208092486, + "eval_allNLI-dev_dot_accuracy": 0.6640625, + "eval_allNLI-dev_dot_accuracy_threshold": 588.6370239257812, + "eval_allNLI-dev_dot_ap": 0.4159723261021614, + "eval_allNLI-dev_dot_f1": 0.5263157894736842, + "eval_allNLI-dev_dot_f1_threshold": 431.5047607421875, + "eval_allNLI-dev_dot_precision": 0.3835978835978836, + "eval_allNLI-dev_dot_recall": 0.838150289017341, + "eval_allNLI-dev_euclidean_accuracy": 0.705078125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.087307929992676, + "eval_allNLI-dev_euclidean_ap": 0.5172051717681727, + "eval_allNLI-dev_euclidean_f1": 0.5603112840466926, + "eval_allNLI-dev_euclidean_f1_threshold": 13.293811798095703, + "eval_allNLI-dev_euclidean_precision": 0.4222873900293255, + "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, + "eval_allNLI-dev_manhattan_accuracy": 0.701171875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 185.59756469726562, + "eval_allNLI-dev_manhattan_ap": 0.5124717600689392, + "eval_allNLI-dev_manhattan_f1": 0.5559999999999999, + "eval_allNLI-dev_manhattan_f1_threshold": 259.2859802246094, + "eval_allNLI-dev_manhattan_precision": 0.42507645259938837, + "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, + "eval_allNLI-dev_max_accuracy": 0.705078125, + "eval_allNLI-dev_max_accuracy_threshold": 588.6370239257812, + "eval_allNLI-dev_max_ap": 0.5172051717681727, + "eval_allNLI-dev_max_f1": 0.5603112840466926, + "eval_allNLI-dev_max_f1_threshold": 431.5047607421875, + "eval_allNLI-dev_max_precision": 0.42857142857142855, + "eval_allNLI-dev_max_recall": 0.838150289017341, + "eval_sequential_score": 0.7020052347678023, + "eval_sts-test_pearson_cosine": 0.6687245612921815, + "eval_sts-test_pearson_dot": 0.5734739396427984, + "eval_sts-test_pearson_euclidean": 0.6718719757622864, + "eval_sts-test_pearson_manhattan": 0.6670343168111552, + "eval_sts-test_pearson_max": 0.6718719757622864, + "eval_sts-test_spearman_cosine": 0.6819400829060788, + "eval_sts-test_spearman_dot": 0.556021400171074, + "eval_sts-test_spearman_euclidean": 0.6684485621243225, + "eval_sts-test_spearman_manhattan": 0.665065859027008, + "eval_sts-test_spearman_max": 0.6819400829060788, + "eval_vitaminc-pairs_loss": 2.8099753856658936, + "eval_vitaminc-pairs_runtime": 3.1872, + "eval_vitaminc-pairs_samples_per_second": 40.16, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_negation-triplets_loss": 2.1310224533081055, + "eval_negation-triplets_runtime": 0.735, + "eval_negation-triplets_samples_per_second": 174.146, + "eval_negation-triplets_steps_per_second": 1.361, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_scitail-pairs-pos_loss": 0.33672308921813965, + "eval_scitail-pairs-pos_runtime": 0.7788, + "eval_scitail-pairs-pos_samples_per_second": 164.351, + "eval_scitail-pairs-pos_steps_per_second": 1.284, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_scitail-pairs-qa_loss": 0.10085483640432358, + "eval_scitail-pairs-qa_runtime": 0.5632, + "eval_scitail-pairs-qa_samples_per_second": 227.27, + "eval_scitail-pairs-qa_steps_per_second": 1.776, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_xsum-pairs_loss": 1.8792424201965332, + "eval_xsum-pairs_runtime": 3.0158, + "eval_xsum-pairs_samples_per_second": 42.443, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_sciq_pairs_loss": 0.21897011995315552, + "eval_sciq_pairs_runtime": 3.4085, + "eval_sciq_pairs_samples_per_second": 37.553, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_qasc_pairs_loss": 1.3423388004302979, + "eval_qasc_pairs_runtime": 0.5957, + "eval_qasc_pairs_samples_per_second": 214.885, + "eval_qasc_pairs_steps_per_second": 1.679, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_openbookqa_pairs_loss": 1.8888919353485107, + "eval_openbookqa_pairs_runtime": 0.5744, + "eval_openbookqa_pairs_samples_per_second": 222.832, + "eval_openbookqa_pairs_steps_per_second": 1.741, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_msmarco_pairs_loss": 2.50892972946167, + "eval_msmarco_pairs_runtime": 1.5144, + "eval_msmarco_pairs_samples_per_second": 84.524, + "eval_msmarco_pairs_steps_per_second": 0.66, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_nq_pairs_loss": 3.0089173316955566, + "eval_nq_pairs_runtime": 2.9076, + "eval_nq_pairs_samples_per_second": 44.022, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_trivia_pairs_loss": 2.672011137008667, + "eval_trivia_pairs_runtime": 3.4362, + "eval_trivia_pairs_samples_per_second": 37.25, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_gooaq_pairs_loss": 2.007201671600342, + "eval_gooaq_pairs_runtime": 0.9479, + "eval_gooaq_pairs_samples_per_second": 135.03, + "eval_gooaq_pairs_steps_per_second": 1.055, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_paws-pos_loss": 0.0831906795501709, + "eval_paws-pos_runtime": 0.6861, + "eval_paws-pos_samples_per_second": 186.568, + "eval_paws-pos_steps_per_second": 1.458, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_global_dataset_loss": 1.250847578048706, + "eval_global_dataset_runtime": 13.3637, + "eval_global_dataset_samples_per_second": 31.129, + "eval_global_dataset_steps_per_second": 0.299, + "step": 280 + }, + { + "epoch": 0.2890946502057613, + "grad_norm": 11.982275009155273, + "learning_rate": 1.0103842159916927e-05, + "loss": 1.4147, + "step": 281 + }, + { + "epoch": 0.29012345679012347, + "grad_norm": 4.2981367111206055, + "learning_rate": 1.0140186915887849e-05, + "loss": 0.2152, + "step": 282 + }, + { + "epoch": 0.2911522633744856, + "grad_norm": 11.808545112609863, + "learning_rate": 1.0176531671858773e-05, + "loss": 1.3908, + "step": 283 + }, + { + "epoch": 0.29218106995884774, + "grad_norm": 17.394630432128906, + "learning_rate": 1.0212876427829699e-05, + "loss": 2.6877, + "step": 284 + }, + { + "epoch": 0.2932098765432099, + "grad_norm": 12.021160125732422, + "learning_rate": 1.0249221183800621e-05, + "loss": 1.1107, + "step": 285 + }, + { + "epoch": 0.294238683127572, + "grad_norm": 21.536834716796875, + "learning_rate": 1.0285565939771547e-05, + "loss": 3.3415, + "step": 286 + }, + { + "epoch": 0.2952674897119342, + "grad_norm": 13.677733421325684, + "learning_rate": 1.032191069574247e-05, + "loss": 1.6332, + "step": 287 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 12.351741790771484, + "learning_rate": 1.0358255451713395e-05, + "loss": 2.5232, + "step": 288 + }, + { + "epoch": 0.29732510288065844, + "grad_norm": 23.78185272216797, + "learning_rate": 1.0394600207684319e-05, + "loss": 4.1329, + "step": 289 + }, + { + "epoch": 0.29835390946502055, + "grad_norm": 13.199095726013184, + "learning_rate": 1.0430944963655244e-05, + "loss": 1.3592, + "step": 290 + }, + { + "epoch": 0.2993827160493827, + "grad_norm": 3.1243560314178467, + "learning_rate": 1.0467289719626167e-05, + "loss": 0.1327, + "step": 291 + }, + { + "epoch": 0.3004115226337449, + "grad_norm": 14.93930721282959, + "learning_rate": 1.050363447559709e-05, + "loss": 2.2552, + "step": 292 + }, + { + "epoch": 0.301440329218107, + "grad_norm": 13.743828773498535, + "learning_rate": 1.0539979231568017e-05, + "loss": 1.5245, + "step": 293 + }, + { + "epoch": 0.30246913580246915, + "grad_norm": 14.517548561096191, + "learning_rate": 1.0576323987538939e-05, + "loss": 2.9299, + "step": 294 + }, + { + "epoch": 0.30349794238683125, + "grad_norm": 12.055514335632324, + "learning_rate": 1.0612668743509865e-05, + "loss": 0.6583, + "step": 295 + }, + { + "epoch": 0.3045267489711934, + "grad_norm": 15.698110580444336, + "learning_rate": 1.0649013499480789e-05, + "loss": 2.0637, + "step": 296 + }, + { + "epoch": 0.3055555555555556, + "grad_norm": 19.17127227783203, + "learning_rate": 1.0685358255451711e-05, + "loss": 2.6771, + "step": 297 + }, + { + "epoch": 0.3065843621399177, + "grad_norm": 13.811963081359863, + "learning_rate": 1.0721703011422637e-05, + "loss": 2.755, + "step": 298 + }, + { + "epoch": 0.30761316872427985, + "grad_norm": 24.679113388061523, + "learning_rate": 1.075804776739356e-05, + "loss": 3.3638, + "step": 299 + }, + { + "epoch": 0.30864197530864196, + "grad_norm": 10.830105781555176, + "learning_rate": 1.0794392523364485e-05, + "loss": 0.7053, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_Qnli-dev_cosine_accuracy": 0.66796875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8199188113212585, + "eval_Qnli-dev_cosine_ap": 0.6995325123375109, + "eval_Qnli-dev_cosine_f1": 0.6897689768976898, + "eval_Qnli-dev_cosine_f1_threshold": 0.7571749091148376, + "eval_Qnli-dev_cosine_precision": 0.5648648648648649, + "eval_Qnli-dev_cosine_recall": 0.885593220338983, + "eval_Qnli-dev_dot_accuracy": 0.62890625, + "eval_Qnli-dev_dot_accuracy_threshold": 423.8924255371094, + "eval_Qnli-dev_dot_ap": 0.5832842248431986, + "eval_Qnli-dev_dot_f1": 0.6645865834633385, + "eval_Qnli-dev_dot_f1_threshold": 364.68280029296875, + "eval_Qnli-dev_dot_precision": 0.5259259259259259, + "eval_Qnli-dev_dot_recall": 0.902542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.689453125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.101522445678711, + "eval_Qnli-dev_euclidean_ap": 0.7073708901870908, + "eval_Qnli-dev_euclidean_f1": 0.6832061068702291, + "eval_Qnli-dev_euclidean_f1_threshold": 14.215328216552734, + "eval_Qnli-dev_euclidean_precision": 0.6215277777777778, + "eval_Qnli-dev_euclidean_recall": 0.7584745762711864, + "eval_Qnli-dev_manhattan_accuracy": 0.689453125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 253.76065063476562, + "eval_Qnli-dev_manhattan_ap": 0.709921595260574, + "eval_Qnli-dev_manhattan_f1": 0.6918032786885246, + "eval_Qnli-dev_manhattan_f1_threshold": 316.2789306640625, + "eval_Qnli-dev_manhattan_precision": 0.5641711229946524, + "eval_Qnli-dev_manhattan_recall": 0.8940677966101694, + "eval_Qnli-dev_max_accuracy": 0.689453125, + "eval_Qnli-dev_max_accuracy_threshold": 423.8924255371094, + "eval_Qnli-dev_max_ap": 0.709921595260574, + "eval_Qnli-dev_max_f1": 0.6918032786885246, + "eval_Qnli-dev_max_f1_threshold": 364.68280029296875, + "eval_Qnli-dev_max_precision": 0.6215277777777778, + "eval_Qnli-dev_max_recall": 0.902542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.701171875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9037463068962097, + "eval_allNLI-dev_cosine_ap": 0.5128913377360289, + "eval_allNLI-dev_cosine_f1": 0.5613305613305613, + "eval_allNLI-dev_cosine_f1_threshold": 0.8335003852844238, + "eval_allNLI-dev_cosine_precision": 0.4383116883116883, + "eval_allNLI-dev_cosine_recall": 0.7803468208092486, + "eval_allNLI-dev_dot_accuracy": 0.6640625, + "eval_allNLI-dev_dot_accuracy_threshold": 571.2269897460938, + "eval_allNLI-dev_dot_ap": 0.41426010345468745, + "eval_allNLI-dev_dot_f1": 0.5328330206378986, + "eval_allNLI-dev_dot_f1_threshold": 416.7803955078125, + "eval_allNLI-dev_dot_precision": 0.39444444444444443, + "eval_allNLI-dev_dot_recall": 0.8208092485549133, + "eval_allNLI-dev_euclidean_accuracy": 0.70703125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.502985954284668, + "eval_allNLI-dev_euclidean_ap": 0.5173963035988031, + "eval_allNLI-dev_euclidean_f1": 0.563600782778865, + "eval_allNLI-dev_euclidean_f1_threshold": 13.729564666748047, + "eval_allNLI-dev_euclidean_precision": 0.4260355029585799, + "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, + "eval_allNLI-dev_manhattan_accuracy": 0.703125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 195.28097534179688, + "eval_allNLI-dev_manhattan_ap": 0.5158767394143106, + "eval_allNLI-dev_manhattan_f1": 0.5584158415841585, + "eval_allNLI-dev_manhattan_f1_threshold": 274.1793212890625, + "eval_allNLI-dev_manhattan_precision": 0.4246987951807229, + "eval_allNLI-dev_manhattan_recall": 0.815028901734104, + "eval_allNLI-dev_max_accuracy": 0.70703125, + "eval_allNLI-dev_max_accuracy_threshold": 571.2269897460938, + "eval_allNLI-dev_max_ap": 0.5173963035988031, + "eval_allNLI-dev_max_f1": 0.563600782778865, + "eval_allNLI-dev_max_f1_threshold": 416.7803955078125, + "eval_allNLI-dev_max_precision": 0.4383116883116883, + "eval_allNLI-dev_max_recall": 0.8323699421965318, + "eval_sequential_score": 0.709921595260574, + "eval_sts-test_pearson_cosine": 0.6984052089046398, + "eval_sts-test_pearson_dot": 0.6135443185288071, + "eval_sts-test_pearson_euclidean": 0.7020042331376551, + "eval_sts-test_pearson_manhattan": 0.6988284227220897, + "eval_sts-test_pearson_max": 0.7020042331376551, + "eval_sts-test_spearman_cosine": 0.7071684275104347, + "eval_sts-test_spearman_dot": 0.5928461838910447, + "eval_sts-test_spearman_euclidean": 0.6956956808813058, + "eval_sts-test_spearman_manhattan": 0.6929552031362194, + "eval_sts-test_spearman_max": 0.7071684275104347, + "eval_vitaminc-pairs_loss": 2.8123340606689453, + "eval_vitaminc-pairs_runtime": 3.2467, + "eval_vitaminc-pairs_samples_per_second": 39.424, + "eval_vitaminc-pairs_steps_per_second": 0.308, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_negation-triplets_loss": 1.9351751804351807, + "eval_negation-triplets_runtime": 0.7607, + "eval_negation-triplets_samples_per_second": 168.261, + "eval_negation-triplets_steps_per_second": 1.315, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_scitail-pairs-pos_loss": 0.314525842666626, + "eval_scitail-pairs-pos_runtime": 0.834, + "eval_scitail-pairs-pos_samples_per_second": 153.478, + "eval_scitail-pairs-pos_steps_per_second": 1.199, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_scitail-pairs-qa_loss": 0.07327353954315186, + "eval_scitail-pairs-qa_runtime": 0.645, + "eval_scitail-pairs-qa_samples_per_second": 198.438, + "eval_scitail-pairs-qa_steps_per_second": 1.55, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_xsum-pairs_loss": 1.707858920097351, + "eval_xsum-pairs_runtime": 3.0454, + "eval_xsum-pairs_samples_per_second": 42.031, + "eval_xsum-pairs_steps_per_second": 0.328, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_sciq_pairs_loss": 0.19993656873703003, + "eval_sciq_pairs_runtime": 3.4559, + "eval_sciq_pairs_samples_per_second": 37.038, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_qasc_pairs_loss": 1.3199025392532349, + "eval_qasc_pairs_runtime": 0.5957, + "eval_qasc_pairs_samples_per_second": 214.886, + "eval_qasc_pairs_steps_per_second": 1.679, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_openbookqa_pairs_loss": 1.777836799621582, + "eval_openbookqa_pairs_runtime": 0.5685, + "eval_openbookqa_pairs_samples_per_second": 225.137, + "eval_openbookqa_pairs_steps_per_second": 1.759, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_msmarco_pairs_loss": 2.3514328002929688, + "eval_msmarco_pairs_runtime": 1.5104, + "eval_msmarco_pairs_samples_per_second": 84.746, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_nq_pairs_loss": 2.9204494953155518, + "eval_nq_pairs_runtime": 2.8927, + "eval_nq_pairs_samples_per_second": 44.249, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_trivia_pairs_loss": 2.5231525897979736, + "eval_trivia_pairs_runtime": 3.4302, + "eval_trivia_pairs_samples_per_second": 37.315, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_gooaq_pairs_loss": 1.7379323244094849, + "eval_gooaq_pairs_runtime": 0.956, + "eval_gooaq_pairs_samples_per_second": 133.895, + "eval_gooaq_pairs_steps_per_second": 1.046, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_paws-pos_loss": 0.06269291788339615, + "eval_paws-pos_runtime": 0.6964, + "eval_paws-pos_samples_per_second": 183.815, + "eval_paws-pos_steps_per_second": 1.436, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_global_dataset_loss": 1.1462713479995728, + "eval_global_dataset_runtime": 13.4482, + "eval_global_dataset_samples_per_second": 30.933, + "eval_global_dataset_steps_per_second": 0.297, + "step": 300 + }, + { + "epoch": 0.3096707818930041, + "grad_norm": 22.287065505981445, + "learning_rate": 1.0830737279335409e-05, + "loss": 3.1007, + "step": 301 + }, + { + "epoch": 0.31069958847736623, + "grad_norm": 11.660481452941895, + "learning_rate": 1.0867082035306334e-05, + "loss": 1.0158, + "step": 302 + }, + { + "epoch": 0.3117283950617284, + "grad_norm": 21.95073699951172, + "learning_rate": 1.0903426791277257e-05, + "loss": 2.8023, + "step": 303 + }, + { + "epoch": 0.31275720164609055, + "grad_norm": 22.914106369018555, + "learning_rate": 1.0939771547248182e-05, + "loss": 3.0464, + "step": 304 + }, + { + "epoch": 0.31378600823045266, + "grad_norm": 11.899238586425781, + "learning_rate": 1.0976116303219106e-05, + "loss": 1.1718, + "step": 305 + }, + { + "epoch": 0.3148148148148148, + "grad_norm": 11.318571090698242, + "learning_rate": 1.1012461059190029e-05, + "loss": 1.0429, + "step": 306 + }, + { + "epoch": 0.31584362139917693, + "grad_norm": 13.055448532104492, + "learning_rate": 1.1048805815160954e-05, + "loss": 1.3403, + "step": 307 + }, + { + "epoch": 0.3168724279835391, + "grad_norm": 17.78560447692871, + "learning_rate": 1.1085150571131879e-05, + "loss": 2.5188, + "step": 308 + }, + { + "epoch": 0.31790123456790126, + "grad_norm": 13.747932434082031, + "learning_rate": 1.1121495327102803e-05, + "loss": 1.3855, + "step": 309 + }, + { + "epoch": 0.31893004115226337, + "grad_norm": 12.256226539611816, + "learning_rate": 1.1157840083073727e-05, + "loss": 1.1986, + "step": 310 + }, + { + "epoch": 0.31995884773662553, + "grad_norm": 16.054819107055664, + "learning_rate": 1.1194184839044652e-05, + "loss": 2.2651, + "step": 311 + }, + { + "epoch": 0.32098765432098764, + "grad_norm": 16.100807189941406, + "learning_rate": 1.1230529595015575e-05, + "loss": 2.5339, + "step": 312 + }, + { + "epoch": 0.3220164609053498, + "grad_norm": 11.739322662353516, + "learning_rate": 1.12668743509865e-05, + "loss": 1.3687, + "step": 313 + }, + { + "epoch": 0.3230452674897119, + "grad_norm": 13.98962116241455, + "learning_rate": 1.1303219106957424e-05, + "loss": 2.1529, + "step": 314 + }, + { + "epoch": 0.32407407407407407, + "grad_norm": 16.564695358276367, + "learning_rate": 1.1339563862928347e-05, + "loss": 2.3677, + "step": 315 + }, + { + "epoch": 0.32510288065843623, + "grad_norm": 18.29457664489746, + "learning_rate": 1.1375908618899272e-05, + "loss": 2.6956, + "step": 316 + }, + { + "epoch": 0.32613168724279834, + "grad_norm": 17.147747039794922, + "learning_rate": 1.1412253374870196e-05, + "loss": 2.4821, + "step": 317 + }, + { + "epoch": 0.3271604938271605, + "grad_norm": 11.078995704650879, + "learning_rate": 1.144859813084112e-05, + "loss": 1.2667, + "step": 318 + }, + { + "epoch": 0.3281893004115226, + "grad_norm": 16.17939567565918, + "learning_rate": 1.1484942886812044e-05, + "loss": 2.4697, + "step": 319 + }, + { + "epoch": 0.3292181069958848, + "grad_norm": 17.299062728881836, + "learning_rate": 1.152128764278297e-05, + "loss": 2.488, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_Qnli-dev_cosine_accuracy": 0.671875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8567001223564148, + "eval_Qnli-dev_cosine_ap": 0.7064956495603609, + "eval_Qnli-dev_cosine_f1": 0.6941580756013745, + "eval_Qnli-dev_cosine_f1_threshold": 0.8039928674697876, + "eval_Qnli-dev_cosine_precision": 0.5838150289017341, + "eval_Qnli-dev_cosine_recall": 0.8559322033898306, + "eval_Qnli-dev_dot_accuracy": 0.638671875, + "eval_Qnli-dev_dot_accuracy_threshold": 439.119384765625, + "eval_Qnli-dev_dot_ap": 0.5828187078602152, + "eval_Qnli-dev_dot_f1": 0.6656534954407294, + "eval_Qnli-dev_dot_f1_threshold": 394.38739013671875, + "eval_Qnli-dev_dot_precision": 0.518957345971564, + "eval_Qnli-dev_dot_recall": 0.9279661016949152, + "eval_Qnli-dev_euclidean_accuracy": 0.689453125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.089064598083496, + "eval_Qnli-dev_euclidean_ap": 0.7175927858077272, + "eval_Qnli-dev_euclidean_f1": 0.6872964169381107, + "eval_Qnli-dev_euclidean_f1_threshold": 15.139961242675781, + "eval_Qnli-dev_euclidean_precision": 0.5582010582010583, + "eval_Qnli-dev_euclidean_recall": 0.8940677966101694, + "eval_Qnli-dev_manhattan_accuracy": 0.685546875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 247.93148803710938, + "eval_Qnli-dev_manhattan_ap": 0.7165821170066472, + "eval_Qnli-dev_manhattan_f1": 0.6962457337883959, + "eval_Qnli-dev_manhattan_f1_threshold": 293.9665222167969, + "eval_Qnli-dev_manhattan_precision": 0.5828571428571429, + "eval_Qnli-dev_manhattan_recall": 0.864406779661017, + "eval_Qnli-dev_max_accuracy": 0.689453125, + "eval_Qnli-dev_max_accuracy_threshold": 439.119384765625, + "eval_Qnli-dev_max_ap": 0.7175927858077272, + "eval_Qnli-dev_max_f1": 0.6962457337883959, + "eval_Qnli-dev_max_f1_threshold": 394.38739013671875, + "eval_Qnli-dev_max_precision": 0.5838150289017341, + "eval_Qnli-dev_max_recall": 0.9279661016949152, + "eval_allNLI-dev_cosine_accuracy": 0.705078125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9175689816474915, + "eval_allNLI-dev_cosine_ap": 0.5186646269211405, + "eval_allNLI-dev_cosine_f1": 0.5641025641025641, + "eval_allNLI-dev_cosine_f1_threshold": 0.8398832082748413, + "eval_allNLI-dev_cosine_precision": 0.4281437125748503, + "eval_allNLI-dev_cosine_recall": 0.8265895953757225, + "eval_allNLI-dev_dot_accuracy": 0.662109375, + "eval_allNLI-dev_dot_accuracy_threshold": 579.238037109375, + "eval_allNLI-dev_dot_ap": 0.4102009424801914, + "eval_allNLI-dev_dot_f1": 0.5353159851301116, + "eval_allNLI-dev_dot_f1_threshold": 434.8841552734375, + "eval_allNLI-dev_dot_precision": 0.39452054794520547, + "eval_allNLI-dev_dot_recall": 0.8323699421965318, + "eval_allNLI-dev_euclidean_accuracy": 0.705078125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 8.86550521850586, + "eval_allNLI-dev_euclidean_ap": 0.521605930352391, + "eval_allNLI-dev_euclidean_f1": 0.5725971370143149, + "eval_allNLI-dev_euclidean_f1_threshold": 12.825733184814453, + "eval_allNLI-dev_euclidean_precision": 0.4430379746835443, + "eval_allNLI-dev_euclidean_recall": 0.8092485549132948, + "eval_allNLI-dev_manhattan_accuracy": 0.697265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 182.48983764648438, + "eval_allNLI-dev_manhattan_ap": 0.5205552000244451, + "eval_allNLI-dev_manhattan_f1": 0.5609756097560975, + "eval_allNLI-dev_manhattan_f1_threshold": 258.8172302246094, + "eval_allNLI-dev_manhattan_precision": 0.43260188087774293, + "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, + "eval_allNLI-dev_max_accuracy": 0.705078125, + "eval_allNLI-dev_max_accuracy_threshold": 579.238037109375, + "eval_allNLI-dev_max_ap": 0.521605930352391, + "eval_allNLI-dev_max_f1": 0.5725971370143149, + "eval_allNLI-dev_max_f1_threshold": 434.8841552734375, + "eval_allNLI-dev_max_precision": 0.4430379746835443, + "eval_allNLI-dev_max_recall": 0.8323699421965318, + "eval_sequential_score": 0.7175927858077272, + "eval_sts-test_pearson_cosine": 0.7136972276507711, + "eval_sts-test_pearson_dot": 0.6211895317774989, + "eval_sts-test_pearson_euclidean": 0.7206975023993004, + "eval_sts-test_pearson_manhattan": 0.7173500334495002, + "eval_sts-test_pearson_max": 0.7206975023993004, + "eval_sts-test_spearman_cosine": 0.7248006482549532, + "eval_sts-test_spearman_dot": 0.6004732756237651, + "eval_sts-test_spearman_euclidean": 0.7142591753753792, + "eval_sts-test_spearman_manhattan": 0.709346784661184, + "eval_sts-test_spearman_max": 0.7248006482549532, + "eval_vitaminc-pairs_loss": 2.848787546157837, + "eval_vitaminc-pairs_runtime": 3.1955, + "eval_vitaminc-pairs_samples_per_second": 40.057, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_negation-triplets_loss": 1.9014121294021606, + "eval_negation-triplets_runtime": 0.7402, + "eval_negation-triplets_samples_per_second": 172.919, + "eval_negation-triplets_steps_per_second": 1.351, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_scitail-pairs-pos_loss": 0.2929946184158325, + "eval_scitail-pairs-pos_runtime": 0.8038, + "eval_scitail-pairs-pos_samples_per_second": 159.243, + "eval_scitail-pairs-pos_steps_per_second": 1.244, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_scitail-pairs-qa_loss": 0.05852370336651802, + "eval_scitail-pairs-qa_runtime": 0.5794, + "eval_scitail-pairs-qa_samples_per_second": 220.912, + "eval_scitail-pairs-qa_steps_per_second": 1.726, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_xsum-pairs_loss": 1.6594665050506592, + "eval_xsum-pairs_runtime": 3.0252, + "eval_xsum-pairs_samples_per_second": 42.312, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_sciq_pairs_loss": 0.18286681175231934, + "eval_sciq_pairs_runtime": 3.4208, + "eval_sciq_pairs_samples_per_second": 37.419, + "eval_sciq_pairs_steps_per_second": 0.292, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_qasc_pairs_loss": 1.2028858661651611, + "eval_qasc_pairs_runtime": 0.5972, + "eval_qasc_pairs_samples_per_second": 214.327, + "eval_qasc_pairs_steps_per_second": 1.674, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_openbookqa_pairs_loss": 1.7855560779571533, + "eval_openbookqa_pairs_runtime": 0.584, + "eval_openbookqa_pairs_samples_per_second": 219.164, + "eval_openbookqa_pairs_steps_per_second": 1.712, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_msmarco_pairs_loss": 2.167186975479126, + "eval_msmarco_pairs_runtime": 1.5188, + "eval_msmarco_pairs_samples_per_second": 84.278, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_nq_pairs_loss": 2.60081148147583, + "eval_nq_pairs_runtime": 2.8945, + "eval_nq_pairs_samples_per_second": 44.222, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_trivia_pairs_loss": 2.2225193977355957, + "eval_trivia_pairs_runtime": 3.4352, + "eval_trivia_pairs_samples_per_second": 37.262, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_gooaq_pairs_loss": 1.6083563566207886, + "eval_gooaq_pairs_runtime": 0.9378, + "eval_gooaq_pairs_samples_per_second": 136.489, + "eval_gooaq_pairs_steps_per_second": 1.066, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_paws-pos_loss": 0.06881627440452576, + "eval_paws-pos_runtime": 0.6797, + "eval_paws-pos_samples_per_second": 188.32, + "eval_paws-pos_steps_per_second": 1.471, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_global_dataset_loss": 1.0821770429611206, + "eval_global_dataset_runtime": 13.336, + "eval_global_dataset_samples_per_second": 31.194, + "eval_global_dataset_steps_per_second": 0.3, + "step": 320 + }, + { + "epoch": 0.33024691358024694, + "grad_norm": 11.460700035095215, + "learning_rate": 1.1557632398753892e-05, + "loss": 1.1381, + "step": 321 + }, + { + "epoch": 0.33127572016460904, + "grad_norm": 17.357484817504883, + "learning_rate": 1.1593977154724816e-05, + "loss": 1.74, + "step": 322 + }, + { + "epoch": 0.3323045267489712, + "grad_norm": 11.96126937866211, + "learning_rate": 1.1630321910695742e-05, + "loss": 1.2228, + "step": 323 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 11.488797187805176, + "learning_rate": 1.1666666666666665e-05, + "loss": 1.1824, + "step": 324 + }, + { + "epoch": 0.3343621399176955, + "grad_norm": 9.767298698425293, + "learning_rate": 1.170301142263759e-05, + "loss": 0.9803, + "step": 325 + }, + { + "epoch": 0.33539094650205764, + "grad_norm": 3.6967291831970215, + "learning_rate": 1.1739356178608514e-05, + "loss": 0.2092, + "step": 326 + }, + { + "epoch": 0.33641975308641975, + "grad_norm": 15.058168411254883, + "learning_rate": 1.1775700934579438e-05, + "loss": 1.9639, + "step": 327 + }, + { + "epoch": 0.3374485596707819, + "grad_norm": 13.334756851196289, + "learning_rate": 1.1812045690550362e-05, + "loss": 2.6183, + "step": 328 + }, + { + "epoch": 0.338477366255144, + "grad_norm": 11.534461975097656, + "learning_rate": 1.1848390446521288e-05, + "loss": 1.2629, + "step": 329 + }, + { + "epoch": 0.3395061728395062, + "grad_norm": 17.404090881347656, + "learning_rate": 1.188473520249221e-05, + "loss": 2.3154, + "step": 330 + }, + { + "epoch": 0.3405349794238683, + "grad_norm": 21.002731323242188, + "learning_rate": 1.1921079958463134e-05, + "loss": 2.9437, + "step": 331 + }, + { + "epoch": 0.34156378600823045, + "grad_norm": 3.4560208320617676, + "learning_rate": 1.195742471443406e-05, + "loss": 0.1481, + "step": 332 + }, + { + "epoch": 0.3425925925925926, + "grad_norm": 16.013757705688477, + "learning_rate": 1.1993769470404982e-05, + "loss": 2.0438, + "step": 333 + }, + { + "epoch": 0.3436213991769547, + "grad_norm": 12.2532377243042, + "learning_rate": 1.2030114226375908e-05, + "loss": 1.1147, + "step": 334 + }, + { + "epoch": 0.3446502057613169, + "grad_norm": 18.853534698486328, + "learning_rate": 1.2066458982346832e-05, + "loss": 2.4102, + "step": 335 + }, + { + "epoch": 0.345679012345679, + "grad_norm": 22.490856170654297, + "learning_rate": 1.2102803738317756e-05, + "loss": 3.1869, + "step": 336 + }, + { + "epoch": 0.34670781893004116, + "grad_norm": 8.61929702758789, + "learning_rate": 1.213914849428868e-05, + "loss": 0.464, + "step": 337 + }, + { + "epoch": 0.3477366255144033, + "grad_norm": 18.195655822753906, + "learning_rate": 1.2175493250259606e-05, + "loss": 2.3863, + "step": 338 + }, + { + "epoch": 0.3487654320987654, + "grad_norm": 17.072919845581055, + "learning_rate": 1.2211838006230528e-05, + "loss": 2.1807, + "step": 339 + }, + { + "epoch": 0.3497942386831276, + "grad_norm": 24.929763793945312, + "learning_rate": 1.2248182762201452e-05, + "loss": 2.6664, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_Qnli-dev_cosine_accuracy": 0.677734375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8400751352310181, + "eval_Qnli-dev_cosine_ap": 0.7066658175282814, + "eval_Qnli-dev_cosine_f1": 0.6932409012131717, + "eval_Qnli-dev_cosine_f1_threshold": 0.7699183225631714, + "eval_Qnli-dev_cosine_precision": 0.5865102639296188, + "eval_Qnli-dev_cosine_recall": 0.847457627118644, + "eval_Qnli-dev_dot_accuracy": 0.650390625, + "eval_Qnli-dev_dot_accuracy_threshold": 403.3339538574219, + "eval_Qnli-dev_dot_ap": 0.6108280098987123, + "eval_Qnli-dev_dot_f1": 0.6724738675958188, + "eval_Qnli-dev_dot_f1_threshold": 391.1240234375, + "eval_Qnli-dev_dot_precision": 0.5710059171597633, + "eval_Qnli-dev_dot_recall": 0.8177966101694916, + "eval_Qnli-dev_euclidean_accuracy": 0.69140625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.067573547363281, + "eval_Qnli-dev_euclidean_ap": 0.7152312759485101, + "eval_Qnli-dev_euclidean_f1": 0.6883561643835616, + "eval_Qnli-dev_euclidean_f1_threshold": 15.466879844665527, + "eval_Qnli-dev_euclidean_precision": 0.5775862068965517, + "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, + "eval_Qnli-dev_manhattan_accuracy": 0.689453125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 263.4106140136719, + "eval_Qnli-dev_manhattan_ap": 0.7125157018628507, + "eval_Qnli-dev_manhattan_f1": 0.6950596252129473, + "eval_Qnli-dev_manhattan_f1_threshold": 310.44476318359375, + "eval_Qnli-dev_manhattan_precision": 0.5811965811965812, + "eval_Qnli-dev_manhattan_recall": 0.864406779661017, + "eval_Qnli-dev_max_accuracy": 0.69140625, + "eval_Qnli-dev_max_accuracy_threshold": 403.3339538574219, + "eval_Qnli-dev_max_ap": 0.7152312759485101, + "eval_Qnli-dev_max_f1": 0.6950596252129473, + "eval_Qnli-dev_max_f1_threshold": 391.1240234375, + "eval_Qnli-dev_max_precision": 0.5865102639296188, + "eval_Qnli-dev_max_recall": 0.864406779661017, + "eval_allNLI-dev_cosine_accuracy": 0.697265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9136956930160522, + "eval_allNLI-dev_cosine_ap": 0.5195967953345066, + "eval_allNLI-dev_cosine_f1": 0.5657370517928287, + "eval_allNLI-dev_cosine_f1_threshold": 0.8129154443740845, + "eval_allNLI-dev_cosine_precision": 0.4316109422492401, + "eval_allNLI-dev_cosine_recall": 0.8208092485549133, + "eval_allNLI-dev_dot_accuracy": 0.673828125, + "eval_allNLI-dev_dot_accuracy_threshold": 493.11651611328125, + "eval_allNLI-dev_dot_ap": 0.43280102596891956, + "eval_allNLI-dev_dot_f1": 0.5358361774744028, + "eval_allNLI-dev_dot_f1_threshold": 382.6061096191406, + "eval_allNLI-dev_dot_precision": 0.3801452784503632, + "eval_allNLI-dev_dot_recall": 0.9075144508670521, + "eval_allNLI-dev_euclidean_accuracy": 0.701171875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.347522735595703, + "eval_allNLI-dev_euclidean_ap": 0.5242211625716244, + "eval_allNLI-dev_euclidean_f1": 0.5731958762886599, + "eval_allNLI-dev_euclidean_f1_threshold": 13.492986679077148, + "eval_allNLI-dev_euclidean_precision": 0.44551282051282054, + "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, + "eval_allNLI-dev_manhattan_accuracy": 0.6953125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 197.63035583496094, + "eval_allNLI-dev_manhattan_ap": 0.5226190870456224, + "eval_allNLI-dev_manhattan_f1": 0.570281124497992, + "eval_allNLI-dev_manhattan_f1_threshold": 274.56903076171875, + "eval_allNLI-dev_manhattan_precision": 0.4369230769230769, + "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, + "eval_allNLI-dev_max_accuracy": 0.701171875, + "eval_allNLI-dev_max_accuracy_threshold": 493.11651611328125, + "eval_allNLI-dev_max_ap": 0.5242211625716244, + "eval_allNLI-dev_max_f1": 0.5731958762886599, + "eval_allNLI-dev_max_f1_threshold": 382.6061096191406, + "eval_allNLI-dev_max_precision": 0.44551282051282054, + "eval_allNLI-dev_max_recall": 0.9075144508670521, + "eval_sequential_score": 0.7152312759485101, + "eval_sts-test_pearson_cosine": 0.7271689674461207, + "eval_sts-test_pearson_dot": 0.6547412928047769, + "eval_sts-test_pearson_euclidean": 0.7347019450426402, + "eval_sts-test_pearson_manhattan": 0.7265236751697546, + "eval_sts-test_pearson_max": 0.7347019450426402, + "eval_sts-test_spearman_cosine": 0.7360126740352158, + "eval_sts-test_spearman_dot": 0.6379495482070717, + "eval_sts-test_spearman_euclidean": 0.7249498937116934, + "eval_sts-test_spearman_manhattan": 0.7182097719797506, + "eval_sts-test_spearman_max": 0.7360126740352158, + "eval_vitaminc-pairs_loss": 2.921300172805786, + "eval_vitaminc-pairs_runtime": 3.1577, + "eval_vitaminc-pairs_samples_per_second": 40.536, + "eval_vitaminc-pairs_steps_per_second": 0.317, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_negation-triplets_loss": 1.800155758857727, + "eval_negation-triplets_runtime": 0.73, + "eval_negation-triplets_samples_per_second": 175.338, + "eval_negation-triplets_steps_per_second": 1.37, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_scitail-pairs-pos_loss": 0.24910371005535126, + "eval_scitail-pairs-pos_runtime": 0.7849, + "eval_scitail-pairs-pos_samples_per_second": 163.07, + "eval_scitail-pairs-pos_steps_per_second": 1.274, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_scitail-pairs-qa_loss": 0.03286855295300484, + "eval_scitail-pairs-qa_runtime": 0.5749, + "eval_scitail-pairs-qa_samples_per_second": 222.634, + "eval_scitail-pairs-qa_steps_per_second": 1.739, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_xsum-pairs_loss": 1.4957503080368042, + "eval_xsum-pairs_runtime": 3.0086, + "eval_xsum-pairs_samples_per_second": 42.545, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_sciq_pairs_loss": 0.17354349792003632, + "eval_sciq_pairs_runtime": 3.4138, + "eval_sciq_pairs_samples_per_second": 37.494, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_qasc_pairs_loss": 1.0957040786743164, + "eval_qasc_pairs_runtime": 0.5968, + "eval_qasc_pairs_samples_per_second": 214.493, + "eval_qasc_pairs_steps_per_second": 1.676, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_openbookqa_pairs_loss": 1.6939177513122559, + "eval_openbookqa_pairs_runtime": 0.5747, + "eval_openbookqa_pairs_samples_per_second": 222.741, + "eval_openbookqa_pairs_steps_per_second": 1.74, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_msmarco_pairs_loss": 2.079050302505493, + "eval_msmarco_pairs_runtime": 1.5095, + "eval_msmarco_pairs_samples_per_second": 84.798, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_nq_pairs_loss": 2.6143176555633545, + "eval_nq_pairs_runtime": 2.8884, + "eval_nq_pairs_samples_per_second": 44.314, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_trivia_pairs_loss": 2.1714344024658203, + "eval_trivia_pairs_runtime": 3.4339, + "eval_trivia_pairs_samples_per_second": 37.275, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_gooaq_pairs_loss": 1.5302671194076538, + "eval_gooaq_pairs_runtime": 0.9419, + "eval_gooaq_pairs_samples_per_second": 135.901, + "eval_gooaq_pairs_steps_per_second": 1.062, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_paws-pos_loss": 0.05603673681616783, + "eval_paws-pos_runtime": 0.6803, + "eval_paws-pos_samples_per_second": 188.158, + "eval_paws-pos_steps_per_second": 1.47, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_global_dataset_loss": 1.029583215713501, + "eval_global_dataset_runtime": 13.3497, + "eval_global_dataset_samples_per_second": 31.162, + "eval_global_dataset_steps_per_second": 0.3, + "step": 340 + }, + { + "epoch": 0.3508230452674897, + "grad_norm": 13.51473331451416, + "learning_rate": 1.2284527518172378e-05, + "loss": 2.0315, + "step": 341 + }, + { + "epoch": 0.35185185185185186, + "grad_norm": 13.707486152648926, + "learning_rate": 1.23208722741433e-05, + "loss": 2.025, + "step": 342 + }, + { + "epoch": 0.35288065843621397, + "grad_norm": 14.628217697143555, + "learning_rate": 1.2357217030114226e-05, + "loss": 2.7561, + "step": 343 + }, + { + "epoch": 0.35390946502057613, + "grad_norm": 16.766042709350586, + "learning_rate": 1.239356178608515e-05, + "loss": 2.2758, + "step": 344 + }, + { + "epoch": 0.3549382716049383, + "grad_norm": 12.628337860107422, + "learning_rate": 1.2429906542056072e-05, + "loss": 1.2086, + "step": 345 + }, + { + "epoch": 0.3559670781893004, + "grad_norm": 12.519665718078613, + "learning_rate": 1.2466251298026998e-05, + "loss": 1.1789, + "step": 346 + }, + { + "epoch": 0.35699588477366256, + "grad_norm": 15.279417037963867, + "learning_rate": 1.2502596053997922e-05, + "loss": 2.0743, + "step": 347 + }, + { + "epoch": 0.35802469135802467, + "grad_norm": 2.5973308086395264, + "learning_rate": 1.2538940809968846e-05, + "loss": 0.1388, + "step": 348 + }, + { + "epoch": 0.35905349794238683, + "grad_norm": 11.533571243286133, + "learning_rate": 1.257528556593977e-05, + "loss": 1.007, + "step": 349 + }, + { + "epoch": 0.360082304526749, + "grad_norm": 12.749200820922852, + "learning_rate": 1.2611630321910696e-05, + "loss": 1.0298, + "step": 350 + }, + { + "epoch": 0.3611111111111111, + "grad_norm": 19.385435104370117, + "learning_rate": 1.2647975077881618e-05, + "loss": 2.3117, + "step": 351 + }, + { + "epoch": 0.36213991769547327, + "grad_norm": 13.895768165588379, + "learning_rate": 1.2684319833852544e-05, + "loss": 1.1729, + "step": 352 + }, + { + "epoch": 0.3631687242798354, + "grad_norm": 14.044111251831055, + "learning_rate": 1.2720664589823468e-05, + "loss": 1.8233, + "step": 353 + }, + { + "epoch": 0.36419753086419754, + "grad_norm": 13.1631441116333, + "learning_rate": 1.275700934579439e-05, + "loss": 1.2639, + "step": 354 + }, + { + "epoch": 0.36522633744855965, + "grad_norm": 12.106592178344727, + "learning_rate": 1.2793354101765316e-05, + "loss": 1.1652, + "step": 355 + }, + { + "epoch": 0.3662551440329218, + "grad_norm": 12.02451229095459, + "learning_rate": 1.282969885773624e-05, + "loss": 1.1368, + "step": 356 + }, + { + "epoch": 0.36728395061728397, + "grad_norm": 11.719282150268555, + "learning_rate": 1.2866043613707164e-05, + "loss": 1.122, + "step": 357 + }, + { + "epoch": 0.3683127572016461, + "grad_norm": 12.295735359191895, + "learning_rate": 1.2902388369678088e-05, + "loss": 1.2503, + "step": 358 + }, + { + "epoch": 0.36934156378600824, + "grad_norm": 12.906529426574707, + "learning_rate": 1.2938733125649014e-05, + "loss": 2.2025, + "step": 359 + }, + { + "epoch": 0.37037037037037035, + "grad_norm": 11.613821983337402, + "learning_rate": 1.2975077881619936e-05, + "loss": 0.9197, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_Qnli-dev_cosine_accuracy": 0.67578125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8274970650672913, + "eval_Qnli-dev_cosine_ap": 0.7107856399181414, + "eval_Qnli-dev_cosine_f1": 0.6886446886446885, + "eval_Qnli-dev_cosine_f1_threshold": 0.7976377010345459, + "eval_Qnli-dev_cosine_precision": 0.6064516129032258, + "eval_Qnli-dev_cosine_recall": 0.7966101694915254, + "eval_Qnli-dev_dot_accuracy": 0.638671875, + "eval_Qnli-dev_dot_accuracy_threshold": 421.1475830078125, + "eval_Qnli-dev_dot_ap": 0.6026157605644281, + "eval_Qnli-dev_dot_f1": 0.6688, + "eval_Qnli-dev_dot_f1_threshold": 383.72686767578125, + "eval_Qnli-dev_dot_precision": 0.5372750642673522, + "eval_Qnli-dev_dot_recall": 0.885593220338983, + "eval_Qnli-dev_euclidean_accuracy": 0.69140625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.509709358215332, + "eval_Qnli-dev_euclidean_ap": 0.7205377940978139, + "eval_Qnli-dev_euclidean_f1": 0.6884681583476765, + "eval_Qnli-dev_euclidean_f1_threshold": 15.236268997192383, + "eval_Qnli-dev_euclidean_precision": 0.5797101449275363, + "eval_Qnli-dev_euclidean_recall": 0.847457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.6875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 255.33294677734375, + "eval_Qnli-dev_manhattan_ap": 0.7196447998198872, + "eval_Qnli-dev_manhattan_f1": 0.6936026936026936, + "eval_Qnli-dev_manhattan_f1_threshold": 311.228271484375, + "eval_Qnli-dev_manhattan_precision": 0.5754189944134078, + "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, + "eval_Qnli-dev_max_accuracy": 0.69140625, + "eval_Qnli-dev_max_accuracy_threshold": 421.1475830078125, + "eval_Qnli-dev_max_ap": 0.7205377940978139, + "eval_Qnli-dev_max_f1": 0.6936026936026936, + "eval_Qnli-dev_max_f1_threshold": 383.72686767578125, + "eval_Qnli-dev_max_precision": 0.6064516129032258, + "eval_Qnli-dev_max_recall": 0.885593220338983, + "eval_allNLI-dev_cosine_accuracy": 0.71484375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9123337864875793, + "eval_allNLI-dev_cosine_ap": 0.5454586491658389, + "eval_allNLI-dev_cosine_f1": 0.5625, + "eval_allNLI-dev_cosine_f1_threshold": 0.8062101006507874, + "eval_allNLI-dev_cosine_precision": 0.4247787610619469, + "eval_allNLI-dev_cosine_recall": 0.8323699421965318, + "eval_allNLI-dev_dot_accuracy": 0.673828125, + "eval_allNLI-dev_dot_accuracy_threshold": 486.6114807128906, + "eval_allNLI-dev_dot_ap": 0.45389873900128597, + "eval_allNLI-dev_dot_f1": 0.5345132743362832, + "eval_allNLI-dev_dot_f1_threshold": 395.1532897949219, + "eval_allNLI-dev_dot_precision": 0.3852040816326531, + "eval_allNLI-dev_dot_recall": 0.8728323699421965, + "eval_allNLI-dev_euclidean_accuracy": 0.71484375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.256704330444336, + "eval_allNLI-dev_euclidean_ap": 0.5468473795344413, + "eval_allNLI-dev_euclidean_f1": 0.5690721649484537, + "eval_allNLI-dev_euclidean_f1_threshold": 13.748187065124512, + "eval_allNLI-dev_euclidean_precision": 0.4423076923076923, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.7109375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 186.86061096191406, + "eval_allNLI-dev_manhattan_ap": 0.5447298734564391, + "eval_allNLI-dev_manhattan_f1": 0.5643153526970954, + "eval_allNLI-dev_manhattan_f1_threshold": 275.2322082519531, + "eval_allNLI-dev_manhattan_precision": 0.4401294498381877, + "eval_allNLI-dev_manhattan_recall": 0.7861271676300579, + "eval_allNLI-dev_max_accuracy": 0.71484375, + "eval_allNLI-dev_max_accuracy_threshold": 486.6114807128906, + "eval_allNLI-dev_max_ap": 0.5468473795344413, + "eval_allNLI-dev_max_f1": 0.5690721649484537, + "eval_allNLI-dev_max_f1_threshold": 395.1532897949219, + "eval_allNLI-dev_max_precision": 0.4423076923076923, + "eval_allNLI-dev_max_recall": 0.8728323699421965, + "eval_sequential_score": 0.7205377940978139, + "eval_sts-test_pearson_cosine": 0.7499734901337567, + "eval_sts-test_pearson_dot": 0.6944181989389289, + "eval_sts-test_pearson_euclidean": 0.7581460883701887, + "eval_sts-test_pearson_manhattan": 0.7494485428076179, + "eval_sts-test_pearson_max": 0.7581460883701887, + "eval_sts-test_spearman_cosine": 0.7613686939883212, + "eval_sts-test_spearman_dot": 0.6777020937010519, + "eval_sts-test_spearman_euclidean": 0.748221668416756, + "eval_sts-test_spearman_manhattan": 0.7396471477291182, + "eval_sts-test_spearman_max": 0.7613686939883212, + "eval_vitaminc-pairs_loss": 2.936203718185425, + "eval_vitaminc-pairs_runtime": 3.1741, + "eval_vitaminc-pairs_samples_per_second": 40.326, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_negation-triplets_loss": 1.7166328430175781, + "eval_negation-triplets_runtime": 0.7398, + "eval_negation-triplets_samples_per_second": 173.027, + "eval_negation-triplets_steps_per_second": 1.352, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_scitail-pairs-pos_loss": 0.24884574115276337, + "eval_scitail-pairs-pos_runtime": 0.7997, + "eval_scitail-pairs-pos_samples_per_second": 160.064, + "eval_scitail-pairs-pos_steps_per_second": 1.25, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_scitail-pairs-qa_loss": 0.03187813237309456, + "eval_scitail-pairs-qa_runtime": 0.5642, + "eval_scitail-pairs-qa_samples_per_second": 226.873, + "eval_scitail-pairs-qa_steps_per_second": 1.772, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_xsum-pairs_loss": 1.3931405544281006, + "eval_xsum-pairs_runtime": 3.0204, + "eval_xsum-pairs_samples_per_second": 42.378, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_sciq_pairs_loss": 0.16648775339126587, + "eval_sciq_pairs_runtime": 3.3956, + "eval_sciq_pairs_samples_per_second": 37.695, + "eval_sciq_pairs_steps_per_second": 0.294, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_qasc_pairs_loss": 0.9755259156227112, + "eval_qasc_pairs_runtime": 0.5919, + "eval_qasc_pairs_samples_per_second": 216.262, + "eval_qasc_pairs_steps_per_second": 1.69, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_openbookqa_pairs_loss": 1.6076797246932983, + "eval_openbookqa_pairs_runtime": 0.5876, + "eval_openbookqa_pairs_samples_per_second": 217.828, + "eval_openbookqa_pairs_steps_per_second": 1.702, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_msmarco_pairs_loss": 2.0155866146087646, + "eval_msmarco_pairs_runtime": 1.5287, + "eval_msmarco_pairs_samples_per_second": 83.731, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_nq_pairs_loss": 2.5515902042388916, + "eval_nq_pairs_runtime": 2.8881, + "eval_nq_pairs_samples_per_second": 44.319, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_trivia_pairs_loss": 1.8931869268417358, + "eval_trivia_pairs_runtime": 3.4335, + "eval_trivia_pairs_samples_per_second": 37.28, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_gooaq_pairs_loss": 1.4498214721679688, + "eval_gooaq_pairs_runtime": 0.9445, + "eval_gooaq_pairs_samples_per_second": 135.517, + "eval_gooaq_pairs_steps_per_second": 1.059, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_paws-pos_loss": 0.05163538083434105, + "eval_paws-pos_runtime": 0.6805, + "eval_paws-pos_samples_per_second": 188.1, + "eval_paws-pos_steps_per_second": 1.47, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_global_dataset_loss": 0.9714978933334351, + "eval_global_dataset_runtime": 13.3457, + "eval_global_dataset_samples_per_second": 31.171, + "eval_global_dataset_steps_per_second": 0.3, + "step": 360 + }, + { + "epoch": 0.3713991769547325, + "grad_norm": 8.986026763916016, + "learning_rate": 1.3011422637590862e-05, + "loss": 0.4565, + "step": 361 + }, + { + "epoch": 0.3724279835390947, + "grad_norm": 10.55431079864502, + "learning_rate": 1.3047767393561786e-05, + "loss": 0.9519, + "step": 362 + }, + { + "epoch": 0.3734567901234568, + "grad_norm": 22.92361831665039, + "learning_rate": 1.3084112149532708e-05, + "loss": 2.876, + "step": 363 + }, + { + "epoch": 0.37448559670781895, + "grad_norm": 13.740486145019531, + "learning_rate": 1.3120456905503634e-05, + "loss": 1.5415, + "step": 364 + }, + { + "epoch": 0.37551440329218105, + "grad_norm": 12.367791175842285, + "learning_rate": 1.3156801661474558e-05, + "loss": 1.017, + "step": 365 + }, + { + "epoch": 0.3765432098765432, + "grad_norm": 13.127511978149414, + "learning_rate": 1.3193146417445482e-05, + "loss": 1.4156, + "step": 366 + }, + { + "epoch": 0.3775720164609053, + "grad_norm": 24.860748291015625, + "learning_rate": 1.3229491173416406e-05, + "loss": 3.272, + "step": 367 + }, + { + "epoch": 0.3786008230452675, + "grad_norm": 10.243440628051758, + "learning_rate": 1.3265835929387332e-05, + "loss": 0.8205, + "step": 368 + }, + { + "epoch": 0.37962962962962965, + "grad_norm": 19.33579444885254, + "learning_rate": 1.3302180685358254e-05, + "loss": 2.7472, + "step": 369 + }, + { + "epoch": 0.38065843621399176, + "grad_norm": 2.6139795780181885, + "learning_rate": 1.3338525441329178e-05, + "loss": 0.1251, + "step": 370 + }, + { + "epoch": 0.3816872427983539, + "grad_norm": 13.30015754699707, + "learning_rate": 1.3374870197300104e-05, + "loss": 1.7165, + "step": 371 + }, + { + "epoch": 0.38271604938271603, + "grad_norm": 15.629612922668457, + "learning_rate": 1.3411214953271026e-05, + "loss": 2.0682, + "step": 372 + }, + { + "epoch": 0.3837448559670782, + "grad_norm": 2.9140241146087646, + "learning_rate": 1.3447559709241952e-05, + "loss": 0.1312, + "step": 373 + }, + { + "epoch": 0.38477366255144035, + "grad_norm": 14.68766975402832, + "learning_rate": 1.3483904465212876e-05, + "loss": 1.473, + "step": 374 + }, + { + "epoch": 0.38580246913580246, + "grad_norm": 11.001675605773926, + "learning_rate": 1.35202492211838e-05, + "loss": 0.9278, + "step": 375 + }, + { + "epoch": 0.3868312757201646, + "grad_norm": 18.111684799194336, + "learning_rate": 1.3556593977154724e-05, + "loss": 1.9585, + "step": 376 + }, + { + "epoch": 0.38786008230452673, + "grad_norm": 19.245637893676758, + "learning_rate": 1.359293873312565e-05, + "loss": 2.3601, + "step": 377 + }, + { + "epoch": 0.3888888888888889, + "grad_norm": 11.811524391174316, + "learning_rate": 1.3629283489096572e-05, + "loss": 1.0816, + "step": 378 + }, + { + "epoch": 0.389917695473251, + "grad_norm": 15.023236274719238, + "learning_rate": 1.3665628245067496e-05, + "loss": 2.3182, + "step": 379 + }, + { + "epoch": 0.39094650205761317, + "grad_norm": 16.875747680664062, + "learning_rate": 1.3701973001038421e-05, + "loss": 2.0826, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_Qnli-dev_cosine_accuracy": 0.669921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8552969098091125, + "eval_Qnli-dev_cosine_ap": 0.7113127062240453, + "eval_Qnli-dev_cosine_f1": 0.6916221033868093, + "eval_Qnli-dev_cosine_f1_threshold": 0.781796932220459, + "eval_Qnli-dev_cosine_precision": 0.5969230769230769, + "eval_Qnli-dev_cosine_recall": 0.8220338983050848, + "eval_Qnli-dev_dot_accuracy": 0.638671875, + "eval_Qnli-dev_dot_accuracy_threshold": 427.3189697265625, + "eval_Qnli-dev_dot_ap": 0.5994490036489998, + "eval_Qnli-dev_dot_f1": 0.6635367762128326, + "eval_Qnli-dev_dot_f1_threshold": 378.4907531738281, + "eval_Qnli-dev_dot_precision": 0.5260545905707196, + "eval_Qnli-dev_dot_recall": 0.8983050847457628, + "eval_Qnli-dev_euclidean_accuracy": 0.677734375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.223026275634766, + "eval_Qnli-dev_euclidean_ap": 0.7220014624049731, + "eval_Qnli-dev_euclidean_f1": 0.6837606837606838, + "eval_Qnli-dev_euclidean_f1_threshold": 15.54850959777832, + "eval_Qnli-dev_euclidean_precision": 0.5730659025787965, + "eval_Qnli-dev_euclidean_recall": 0.847457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.685546875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 273.4452209472656, + "eval_Qnli-dev_manhattan_ap": 0.7213585266821472, + "eval_Qnli-dev_manhattan_f1": 0.6920415224913494, + "eval_Qnli-dev_manhattan_f1_threshold": 312.36419677734375, + "eval_Qnli-dev_manhattan_precision": 0.5847953216374269, + "eval_Qnli-dev_manhattan_recall": 0.847457627118644, + "eval_Qnli-dev_max_accuracy": 0.685546875, + "eval_Qnli-dev_max_accuracy_threshold": 427.3189697265625, + "eval_Qnli-dev_max_ap": 0.7220014624049731, + "eval_Qnli-dev_max_f1": 0.6920415224913494, + "eval_Qnli-dev_max_f1_threshold": 378.4907531738281, + "eval_Qnli-dev_max_precision": 0.5969230769230769, + "eval_Qnli-dev_max_recall": 0.8983050847457628, + "eval_allNLI-dev_cosine_accuracy": 0.712890625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9154093861579895, + "eval_allNLI-dev_cosine_ap": 0.53447469371308, + "eval_allNLI-dev_cosine_f1": 0.5582329317269077, + "eval_allNLI-dev_cosine_f1_threshold": 0.8176555633544922, + "eval_allNLI-dev_cosine_precision": 0.4276923076923077, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.6640625, + "eval_allNLI-dev_dot_accuracy_threshold": 565.86181640625, + "eval_allNLI-dev_dot_ap": 0.4315444199034263, + "eval_allNLI-dev_dot_f1": 0.5361552028218695, + "eval_allNLI-dev_dot_f1_threshold": 401.46759033203125, + "eval_allNLI-dev_dot_precision": 0.38578680203045684, + "eval_allNLI-dev_dot_recall": 0.8786127167630058, + "eval_allNLI-dev_euclidean_accuracy": 0.708984375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.178808212280273, + "eval_allNLI-dev_euclidean_ap": 0.5387071754068331, + "eval_allNLI-dev_euclidean_f1": 0.5644171779141105, + "eval_allNLI-dev_euclidean_f1_threshold": 13.749273300170898, + "eval_allNLI-dev_euclidean_precision": 0.43670886075949367, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.708984375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 188.05450439453125, + "eval_allNLI-dev_manhattan_ap": 0.536761796295294, + "eval_allNLI-dev_manhattan_f1": 0.5641025641025641, + "eval_allNLI-dev_manhattan_f1_threshold": 269.14678955078125, + "eval_allNLI-dev_manhattan_precision": 0.44745762711864406, + "eval_allNLI-dev_manhattan_recall": 0.7630057803468208, + "eval_allNLI-dev_max_accuracy": 0.712890625, + "eval_allNLI-dev_max_accuracy_threshold": 565.86181640625, + "eval_allNLI-dev_max_ap": 0.5387071754068331, + "eval_allNLI-dev_max_f1": 0.5644171779141105, + "eval_allNLI-dev_max_f1_threshold": 401.46759033203125, + "eval_allNLI-dev_max_precision": 0.44745762711864406, + "eval_allNLI-dev_max_recall": 0.8786127167630058, + "eval_sequential_score": 0.7220014624049731, + "eval_sts-test_pearson_cosine": 0.751982795791559, + "eval_sts-test_pearson_dot": 0.6746803146097003, + "eval_sts-test_pearson_euclidean": 0.7662220636606787, + "eval_sts-test_pearson_manhattan": 0.7605555708771703, + "eval_sts-test_pearson_max": 0.7662220636606787, + "eval_sts-test_spearman_cosine": 0.7683159693092451, + "eval_sts-test_spearman_dot": 0.6569558116813516, + "eval_sts-test_spearman_euclidean": 0.7569410489375177, + "eval_sts-test_spearman_manhattan": 0.7506770806267793, + "eval_sts-test_spearman_max": 0.7683159693092451, + "eval_vitaminc-pairs_loss": 3.076357841491699, + "eval_vitaminc-pairs_runtime": 3.1717, + "eval_vitaminc-pairs_samples_per_second": 40.357, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_negation-triplets_loss": 1.6979268789291382, + "eval_negation-triplets_runtime": 0.7352, + "eval_negation-triplets_samples_per_second": 174.1, + "eval_negation-triplets_steps_per_second": 1.36, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_scitail-pairs-pos_loss": 0.24082684516906738, + "eval_scitail-pairs-pos_runtime": 0.7875, + "eval_scitail-pairs-pos_samples_per_second": 162.532, + "eval_scitail-pairs-pos_steps_per_second": 1.27, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_scitail-pairs-qa_loss": 0.027928592637181282, + "eval_scitail-pairs-qa_runtime": 0.5645, + "eval_scitail-pairs-qa_samples_per_second": 226.73, + "eval_scitail-pairs-qa_steps_per_second": 1.771, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_xsum-pairs_loss": 1.2622292041778564, + "eval_xsum-pairs_runtime": 3.0159, + "eval_xsum-pairs_samples_per_second": 42.441, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_sciq_pairs_loss": 0.15188440680503845, + "eval_sciq_pairs_runtime": 3.4598, + "eval_sciq_pairs_samples_per_second": 36.996, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_qasc_pairs_loss": 1.0029363632202148, + "eval_qasc_pairs_runtime": 0.6147, + "eval_qasc_pairs_samples_per_second": 208.233, + "eval_qasc_pairs_steps_per_second": 1.627, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_openbookqa_pairs_loss": 1.6912751197814941, + "eval_openbookqa_pairs_runtime": 0.574, + "eval_openbookqa_pairs_samples_per_second": 222.989, + "eval_openbookqa_pairs_steps_per_second": 1.742, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_msmarco_pairs_loss": 1.9394227266311646, + "eval_msmarco_pairs_runtime": 1.5149, + "eval_msmarco_pairs_samples_per_second": 84.493, + "eval_msmarco_pairs_steps_per_second": 0.66, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_nq_pairs_loss": 2.323462963104248, + "eval_nq_pairs_runtime": 2.8969, + "eval_nq_pairs_samples_per_second": 44.185, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_trivia_pairs_loss": 1.7298884391784668, + "eval_trivia_pairs_runtime": 3.4558, + "eval_trivia_pairs_samples_per_second": 37.039, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_gooaq_pairs_loss": 1.3238028287887573, + "eval_gooaq_pairs_runtime": 0.9462, + "eval_gooaq_pairs_samples_per_second": 135.282, + "eval_gooaq_pairs_steps_per_second": 1.057, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_paws-pos_loss": 0.04966222867369652, + "eval_paws-pos_runtime": 0.68, + "eval_paws-pos_samples_per_second": 188.239, + "eval_paws-pos_steps_per_second": 1.471, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_global_dataset_loss": 0.941063642501831, + "eval_global_dataset_runtime": 13.3724, + "eval_global_dataset_samples_per_second": 31.109, + "eval_global_dataset_steps_per_second": 0.299, + "step": 380 + }, + { + "epoch": 0.39197530864197533, + "grad_norm": 7.882116317749023, + "learning_rate": 1.3738317757009344e-05, + "loss": 0.4021, + "step": 381 + }, + { + "epoch": 0.39300411522633744, + "grad_norm": 11.462610244750977, + "learning_rate": 1.377466251298027e-05, + "loss": 1.0139, + "step": 382 + }, + { + "epoch": 0.3940329218106996, + "grad_norm": 14.762428283691406, + "learning_rate": 1.3811007268951194e-05, + "loss": 1.4995, + "step": 383 + }, + { + "epoch": 0.3950617283950617, + "grad_norm": 22.418067932128906, + "learning_rate": 1.3847352024922118e-05, + "loss": 2.4814, + "step": 384 + }, + { + "epoch": 0.39609053497942387, + "grad_norm": 22.518835067749023, + "learning_rate": 1.3883696780893042e-05, + "loss": 2.715, + "step": 385 + }, + { + "epoch": 0.39711934156378603, + "grad_norm": 16.43521499633789, + "learning_rate": 1.3920041536863967e-05, + "loss": 2.1024, + "step": 386 + }, + { + "epoch": 0.39814814814814814, + "grad_norm": 11.84013557434082, + "learning_rate": 1.395638629283489e-05, + "loss": 0.9607, + "step": 387 + }, + { + "epoch": 0.3991769547325103, + "grad_norm": 11.819180488586426, + "learning_rate": 1.3992731048805814e-05, + "loss": 0.9005, + "step": 388 + }, + { + "epoch": 0.4002057613168724, + "grad_norm": 2.0303947925567627, + "learning_rate": 1.402907580477674e-05, + "loss": 0.0972, + "step": 389 + }, + { + "epoch": 0.4012345679012346, + "grad_norm": 14.67570686340332, + "learning_rate": 1.4065420560747662e-05, + "loss": 1.7057, + "step": 390 + }, + { + "epoch": 0.4022633744855967, + "grad_norm": 13.796504974365234, + "learning_rate": 1.4101765316718587e-05, + "loss": 2.1665, + "step": 391 + }, + { + "epoch": 0.40329218106995884, + "grad_norm": 11.881211280822754, + "learning_rate": 1.4138110072689511e-05, + "loss": 1.2397, + "step": 392 + }, + { + "epoch": 0.404320987654321, + "grad_norm": 14.59404182434082, + "learning_rate": 1.4174454828660434e-05, + "loss": 2.2046, + "step": 393 + }, + { + "epoch": 0.4053497942386831, + "grad_norm": 2.643915891647339, + "learning_rate": 1.421079958463136e-05, + "loss": 0.1259, + "step": 394 + }, + { + "epoch": 0.4063786008230453, + "grad_norm": 14.334051132202148, + "learning_rate": 1.4247144340602283e-05, + "loss": 1.5374, + "step": 395 + }, + { + "epoch": 0.4074074074074074, + "grad_norm": 13.22938060760498, + "learning_rate": 1.4283489096573207e-05, + "loss": 1.0678, + "step": 396 + }, + { + "epoch": 0.40843621399176955, + "grad_norm": 14.21168041229248, + "learning_rate": 1.4319833852544132e-05, + "loss": 1.6494, + "step": 397 + }, + { + "epoch": 0.4094650205761317, + "grad_norm": 12.661002159118652, + "learning_rate": 1.4356178608515057e-05, + "loss": 1.8241, + "step": 398 + }, + { + "epoch": 0.4104938271604938, + "grad_norm": 20.38874053955078, + "learning_rate": 1.439252336448598e-05, + "loss": 2.5338, + "step": 399 + }, + { + "epoch": 0.411522633744856, + "grad_norm": 10.472373962402344, + "learning_rate": 1.4428868120456905e-05, + "loss": 0.728, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_Qnli-dev_cosine_accuracy": 0.6640625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8612580299377441, + "eval_Qnli-dev_cosine_ap": 0.7008833240002428, + "eval_Qnli-dev_cosine_f1": 0.690391459074733, + "eval_Qnli-dev_cosine_f1_threshold": 0.7775630950927734, + "eval_Qnli-dev_cosine_precision": 0.5950920245398773, + "eval_Qnli-dev_cosine_recall": 0.8220338983050848, + "eval_Qnli-dev_dot_accuracy": 0.65625, + "eval_Qnli-dev_dot_accuracy_threshold": 427.90985107421875, + "eval_Qnli-dev_dot_ap": 0.5927778156562893, + "eval_Qnli-dev_dot_f1": 0.662379421221865, + "eval_Qnli-dev_dot_f1_threshold": 380.0942077636719, + "eval_Qnli-dev_dot_precision": 0.533678756476684, + "eval_Qnli-dev_dot_recall": 0.8728813559322034, + "eval_Qnli-dev_euclidean_accuracy": 0.673828125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.01819133758545, + "eval_Qnli-dev_euclidean_ap": 0.7113131862333142, + "eval_Qnli-dev_euclidean_f1": 0.6801470588235294, + "eval_Qnli-dev_euclidean_f1_threshold": 14.90008544921875, + "eval_Qnli-dev_euclidean_precision": 0.6006493506493507, + "eval_Qnli-dev_euclidean_recall": 0.7838983050847458, + "eval_Qnli-dev_manhattan_accuracy": 0.673828125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 239.12254333496094, + "eval_Qnli-dev_manhattan_ap": 0.7095040002502047, + "eval_Qnli-dev_manhattan_f1": 0.6803418803418803, + "eval_Qnli-dev_manhattan_f1_threshold": 318.11749267578125, + "eval_Qnli-dev_manhattan_precision": 0.5702005730659025, + "eval_Qnli-dev_manhattan_recall": 0.8432203389830508, + "eval_Qnli-dev_max_accuracy": 0.673828125, + "eval_Qnli-dev_max_accuracy_threshold": 427.90985107421875, + "eval_Qnli-dev_max_ap": 0.7113131862333142, + "eval_Qnli-dev_max_f1": 0.690391459074733, + "eval_Qnli-dev_max_f1_threshold": 380.0942077636719, + "eval_Qnli-dev_max_precision": 0.6006493506493507, + "eval_Qnli-dev_max_recall": 0.8728813559322034, + "eval_allNLI-dev_cosine_accuracy": 0.720703125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9071913957595825, + "eval_allNLI-dev_cosine_ap": 0.5455260839546557, + "eval_allNLI-dev_cosine_f1": 0.5627376425855514, + "eval_allNLI-dev_cosine_f1_threshold": 0.7851958274841309, + "eval_allNLI-dev_cosine_precision": 0.4192634560906516, + "eval_allNLI-dev_cosine_recall": 0.8554913294797688, + "eval_allNLI-dev_dot_accuracy": 0.673828125, + "eval_allNLI-dev_dot_accuracy_threshold": 495.9515380859375, + "eval_allNLI-dev_dot_ap": 0.4440619711184598, + "eval_allNLI-dev_dot_f1": 0.5368248772504091, + "eval_allNLI-dev_dot_f1_threshold": 365.49859619140625, + "eval_allNLI-dev_dot_precision": 0.3744292237442922, + "eval_allNLI-dev_dot_recall": 0.9479768786127167, + "eval_allNLI-dev_euclidean_accuracy": 0.716796875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.831633567810059, + "eval_allNLI-dev_euclidean_ap": 0.5498277088824723, + "eval_allNLI-dev_euclidean_f1": 0.5628997867803838, + "eval_allNLI-dev_euclidean_f1_threshold": 13.911539077758789, + "eval_allNLI-dev_euclidean_precision": 0.44594594594594594, + "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, + "eval_allNLI-dev_manhattan_accuracy": 0.716796875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 193.09060668945312, + "eval_allNLI-dev_manhattan_ap": 0.5499837592661042, + "eval_allNLI-dev_manhattan_f1": 0.5671641791044776, + "eval_allNLI-dev_manhattan_f1_threshold": 302.8914489746094, + "eval_allNLI-dev_manhattan_precision": 0.418732782369146, + "eval_allNLI-dev_manhattan_recall": 0.8786127167630058, + "eval_allNLI-dev_max_accuracy": 0.720703125, + "eval_allNLI-dev_max_accuracy_threshold": 495.9515380859375, + "eval_allNLI-dev_max_ap": 0.5499837592661042, + "eval_allNLI-dev_max_f1": 0.5671641791044776, + "eval_allNLI-dev_max_f1_threshold": 365.49859619140625, + "eval_allNLI-dev_max_precision": 0.44594594594594594, + "eval_allNLI-dev_max_recall": 0.9479768786127167, + "eval_sequential_score": 0.7113131862333142, + "eval_sts-test_pearson_cosine": 0.7693717840654692, + "eval_sts-test_pearson_dot": 0.7198031376100279, + "eval_sts-test_pearson_euclidean": 0.783845420495406, + "eval_sts-test_pearson_manhattan": 0.7789636540660673, + "eval_sts-test_pearson_max": 0.783845420495406, + "eval_sts-test_spearman_cosine": 0.7856816515409163, + "eval_sts-test_spearman_dot": 0.6995584755108273, + "eval_sts-test_spearman_euclidean": 0.7742835947670496, + "eval_sts-test_spearman_manhattan": 0.766819666133362, + "eval_sts-test_spearman_max": 0.7856816515409163, + "eval_vitaminc-pairs_loss": 3.1967031955718994, + "eval_vitaminc-pairs_runtime": 3.1694, + "eval_vitaminc-pairs_samples_per_second": 40.387, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_negation-triplets_loss": 1.6347670555114746, + "eval_negation-triplets_runtime": 0.7366, + "eval_negation-triplets_samples_per_second": 173.76, + "eval_negation-triplets_steps_per_second": 1.358, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_scitail-pairs-pos_loss": 0.2562161684036255, + "eval_scitail-pairs-pos_runtime": 0.7943, + "eval_scitail-pairs-pos_samples_per_second": 161.157, + "eval_scitail-pairs-pos_steps_per_second": 1.259, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_scitail-pairs-qa_loss": 0.026608988642692566, + "eval_scitail-pairs-qa_runtime": 0.5671, + "eval_scitail-pairs-qa_samples_per_second": 225.717, + "eval_scitail-pairs-qa_steps_per_second": 1.763, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_xsum-pairs_loss": 1.1463801860809326, + "eval_xsum-pairs_runtime": 3.0137, + "eval_xsum-pairs_samples_per_second": 42.472, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_sciq_pairs_loss": 0.14410454034805298, + "eval_sciq_pairs_runtime": 3.4298, + "eval_sciq_pairs_samples_per_second": 37.319, + "eval_sciq_pairs_steps_per_second": 0.292, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_qasc_pairs_loss": 0.9308192729949951, + "eval_qasc_pairs_runtime": 0.6033, + "eval_qasc_pairs_samples_per_second": 212.183, + "eval_qasc_pairs_steps_per_second": 1.658, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_openbookqa_pairs_loss": 1.5551338195800781, + "eval_openbookqa_pairs_runtime": 0.5715, + "eval_openbookqa_pairs_samples_per_second": 223.986, + "eval_openbookqa_pairs_steps_per_second": 1.75, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_msmarco_pairs_loss": 1.754774570465088, + "eval_msmarco_pairs_runtime": 1.5121, + "eval_msmarco_pairs_samples_per_second": 84.651, + "eval_msmarco_pairs_steps_per_second": 0.661, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_nq_pairs_loss": 2.163573980331421, + "eval_nq_pairs_runtime": 2.8927, + "eval_nq_pairs_samples_per_second": 44.249, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_trivia_pairs_loss": 1.6104655265808105, + "eval_trivia_pairs_runtime": 3.4302, + "eval_trivia_pairs_samples_per_second": 37.316, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_gooaq_pairs_loss": 1.2485657930374146, + "eval_gooaq_pairs_runtime": 0.9371, + "eval_gooaq_pairs_samples_per_second": 136.585, + "eval_gooaq_pairs_steps_per_second": 1.067, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_paws-pos_loss": 0.04668813571333885, + "eval_paws-pos_runtime": 0.6799, + "eval_paws-pos_samples_per_second": 188.263, + "eval_paws-pos_steps_per_second": 1.471, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_global_dataset_loss": 0.9041730165481567, + "eval_global_dataset_runtime": 13.3404, + "eval_global_dataset_samples_per_second": 31.183, + "eval_global_dataset_steps_per_second": 0.3, + "step": 400 + }, + { + "epoch": 0.4125514403292181, + "grad_norm": 12.87179183959961, + "learning_rate": 1.446521287642783e-05, + "loss": 1.6954, + "step": 401 + }, + { + "epoch": 0.41358024691358025, + "grad_norm": 11.029813766479492, + "learning_rate": 1.4501557632398752e-05, + "loss": 0.8749, + "step": 402 + }, + { + "epoch": 0.41460905349794236, + "grad_norm": 12.643345832824707, + "learning_rate": 1.4537902388369677e-05, + "loss": 1.6856, + "step": 403 + }, + { + "epoch": 0.4156378600823045, + "grad_norm": 3.3279924392700195, + "learning_rate": 1.4574247144340601e-05, + "loss": 0.1254, + "step": 404 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 10.091893196105957, + "learning_rate": 1.4610591900311525e-05, + "loss": 0.6986, + "step": 405 + }, + { + "epoch": 0.4176954732510288, + "grad_norm": 18.337682723999023, + "learning_rate": 1.464693665628245e-05, + "loss": 2.179, + "step": 406 + }, + { + "epoch": 0.41872427983539096, + "grad_norm": 19.924663543701172, + "learning_rate": 1.4683281412253373e-05, + "loss": 2.7119, + "step": 407 + }, + { + "epoch": 0.41975308641975306, + "grad_norm": 11.52767562866211, + "learning_rate": 1.4719626168224297e-05, + "loss": 1.0071, + "step": 408 + }, + { + "epoch": 0.4207818930041152, + "grad_norm": 13.5806884765625, + "learning_rate": 1.4755970924195223e-05, + "loss": 1.1775, + "step": 409 + }, + { + "epoch": 0.4218106995884774, + "grad_norm": 12.70963191986084, + "learning_rate": 1.4792315680166145e-05, + "loss": 1.1221, + "step": 410 + }, + { + "epoch": 0.4228395061728395, + "grad_norm": 15.391998291015625, + "learning_rate": 1.482866043613707e-05, + "loss": 2.146, + "step": 411 + }, + { + "epoch": 0.42386831275720166, + "grad_norm": 13.149142265319824, + "learning_rate": 1.4865005192107995e-05, + "loss": 1.1117, + "step": 412 + }, + { + "epoch": 0.42489711934156377, + "grad_norm": 12.455025672912598, + "learning_rate": 1.4901349948078918e-05, + "loss": 1.1356, + "step": 413 + }, + { + "epoch": 0.42592592592592593, + "grad_norm": 11.366116523742676, + "learning_rate": 1.4937694704049843e-05, + "loss": 1.0309, + "step": 414 + }, + { + "epoch": 0.4269547325102881, + "grad_norm": 18.122159957885742, + "learning_rate": 1.4974039460020767e-05, + "loss": 2.0714, + "step": 415 + }, + { + "epoch": 0.4279835390946502, + "grad_norm": 15.322531700134277, + "learning_rate": 1.501038421599169e-05, + "loss": 2.4046, + "step": 416 + }, + { + "epoch": 0.42901234567901236, + "grad_norm": 8.748584747314453, + "learning_rate": 1.5046728971962615e-05, + "loss": 0.3751, + "step": 417 + }, + { + "epoch": 0.43004115226337447, + "grad_norm": 11.135249137878418, + "learning_rate": 1.508307372793354e-05, + "loss": 0.9194, + "step": 418 + }, + { + "epoch": 0.43106995884773663, + "grad_norm": 2.534362554550171, + "learning_rate": 1.5119418483904463e-05, + "loss": 0.0876, + "step": 419 + }, + { + "epoch": 0.43209876543209874, + "grad_norm": 11.441560745239258, + "learning_rate": 1.5155763239875387e-05, + "loss": 1.0256, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_Qnli-dev_cosine_accuracy": 0.669921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.82877516746521, + "eval_Qnli-dev_cosine_ap": 0.7039769311253984, + "eval_Qnli-dev_cosine_f1": 0.6916524701873935, + "eval_Qnli-dev_cosine_f1_threshold": 0.760260820388794, + "eval_Qnli-dev_cosine_precision": 0.5783475783475783, + "eval_Qnli-dev_cosine_recall": 0.8601694915254238, + "eval_Qnli-dev_dot_accuracy": 0.65234375, + "eval_Qnli-dev_dot_accuracy_threshold": 410.722412109375, + "eval_Qnli-dev_dot_ap": 0.5998569073160949, + "eval_Qnli-dev_dot_f1": 0.6757679180887372, + "eval_Qnli-dev_dot_f1_threshold": 395.8144226074219, + "eval_Qnli-dev_dot_precision": 0.5657142857142857, + "eval_Qnli-dev_dot_recall": 0.8389830508474576, + "eval_Qnli-dev_euclidean_accuracy": 0.67578125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.777875900268555, + "eval_Qnli-dev_euclidean_ap": 0.7165015785234907, + "eval_Qnli-dev_euclidean_f1": 0.6878306878306878, + "eval_Qnli-dev_euclidean_f1_threshold": 15.445184707641602, + "eval_Qnli-dev_euclidean_precision": 0.5891238670694864, + "eval_Qnli-dev_euclidean_recall": 0.826271186440678, + "eval_Qnli-dev_manhattan_accuracy": 0.6796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 282.639892578125, + "eval_Qnli-dev_manhattan_ap": 0.7163039161971565, + "eval_Qnli-dev_manhattan_f1": 0.6854304635761589, + "eval_Qnli-dev_manhattan_f1_threshold": 328.2414855957031, + "eval_Qnli-dev_manhattan_precision": 0.5625, + "eval_Qnli-dev_manhattan_recall": 0.8771186440677966, + "eval_Qnli-dev_max_accuracy": 0.6796875, + "eval_Qnli-dev_max_accuracy_threshold": 410.722412109375, + "eval_Qnli-dev_max_ap": 0.7165015785234907, + "eval_Qnli-dev_max_f1": 0.6916524701873935, + "eval_Qnli-dev_max_f1_threshold": 395.8144226074219, + "eval_Qnli-dev_max_precision": 0.5891238670694864, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.712890625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9146419167518616, + "eval_allNLI-dev_cosine_ap": 0.5483284911998406, + "eval_allNLI-dev_cosine_f1": 0.5627376425855514, + "eval_allNLI-dev_cosine_f1_threshold": 0.8004182577133179, + "eval_allNLI-dev_cosine_precision": 0.4192634560906516, + "eval_allNLI-dev_cosine_recall": 0.8554913294797688, + "eval_allNLI-dev_dot_accuracy": 0.669921875, + "eval_allNLI-dev_dot_accuracy_threshold": 488.09686279296875, + "eval_allNLI-dev_dot_ap": 0.4365952351888237, + "eval_allNLI-dev_dot_f1": 0.5369774919614149, + "eval_allNLI-dev_dot_f1_threshold": 376.2415466308594, + "eval_allNLI-dev_dot_precision": 0.37193763919821826, + "eval_allNLI-dev_dot_recall": 0.9653179190751445, + "eval_allNLI-dev_euclidean_accuracy": 0.716796875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.584402084350586, + "eval_allNLI-dev_euclidean_ap": 0.552898886980593, + "eval_allNLI-dev_euclidean_f1": 0.5714285714285714, + "eval_allNLI-dev_euclidean_f1_threshold": 12.487444877624512, + "eval_allNLI-dev_euclidean_precision": 0.4978540772532189, + "eval_allNLI-dev_euclidean_recall": 0.6705202312138728, + "eval_allNLI-dev_manhattan_accuracy": 0.71484375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 189.19187927246094, + "eval_allNLI-dev_manhattan_ap": 0.5494702966109603, + "eval_allNLI-dev_manhattan_f1": 0.5689655172413792, + "eval_allNLI-dev_manhattan_f1_threshold": 273.392578125, + "eval_allNLI-dev_manhattan_precision": 0.4536082474226804, + "eval_allNLI-dev_manhattan_recall": 0.7630057803468208, + "eval_allNLI-dev_max_accuracy": 0.716796875, + "eval_allNLI-dev_max_accuracy_threshold": 488.09686279296875, + "eval_allNLI-dev_max_ap": 0.552898886980593, + "eval_allNLI-dev_max_f1": 0.5714285714285714, + "eval_allNLI-dev_max_f1_threshold": 376.2415466308594, + "eval_allNLI-dev_max_precision": 0.4978540772532189, + "eval_allNLI-dev_max_recall": 0.9653179190751445, + "eval_sequential_score": 0.7165015785234907, + "eval_sts-test_pearson_cosine": 0.7669433117508329, + "eval_sts-test_pearson_dot": 0.7202911200663573, + "eval_sts-test_pearson_euclidean": 0.7808069960273615, + "eval_sts-test_pearson_manhattan": 0.7749037661158105, + "eval_sts-test_pearson_max": 0.7808069960273615, + "eval_sts-test_spearman_cosine": 0.7837364484620162, + "eval_sts-test_spearman_dot": 0.6980170630634619, + "eval_sts-test_spearman_euclidean": 0.7726895995875491, + "eval_sts-test_spearman_manhattan": 0.7648159284045623, + "eval_sts-test_spearman_max": 0.7837364484620162, + "eval_vitaminc-pairs_loss": 3.1363868713378906, + "eval_vitaminc-pairs_runtime": 3.1813, + "eval_vitaminc-pairs_samples_per_second": 40.235, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_negation-triplets_loss": 1.596004843711853, + "eval_negation-triplets_runtime": 0.7373, + "eval_negation-triplets_samples_per_second": 173.6, + "eval_negation-triplets_steps_per_second": 1.356, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_scitail-pairs-pos_loss": 0.25697287917137146, + "eval_scitail-pairs-pos_runtime": 0.8101, + "eval_scitail-pairs-pos_samples_per_second": 158.012, + "eval_scitail-pairs-pos_steps_per_second": 1.234, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_scitail-pairs-qa_loss": 0.025879494845867157, + "eval_scitail-pairs-qa_runtime": 0.5751, + "eval_scitail-pairs-qa_samples_per_second": 222.558, + "eval_scitail-pairs-qa_steps_per_second": 1.739, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_xsum-pairs_loss": 1.168808102607727, + "eval_xsum-pairs_runtime": 3.0187, + "eval_xsum-pairs_samples_per_second": 42.402, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_sciq_pairs_loss": 0.1468716710805893, + "eval_sciq_pairs_runtime": 3.3982, + "eval_sciq_pairs_samples_per_second": 37.667, + "eval_sciq_pairs_steps_per_second": 0.294, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_qasc_pairs_loss": 0.9004628658294678, + "eval_qasc_pairs_runtime": 0.6001, + "eval_qasc_pairs_samples_per_second": 213.312, + "eval_qasc_pairs_steps_per_second": 1.666, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_openbookqa_pairs_loss": 1.666839361190796, + "eval_openbookqa_pairs_runtime": 0.572, + "eval_openbookqa_pairs_samples_per_second": 223.786, + "eval_openbookqa_pairs_steps_per_second": 1.748, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_msmarco_pairs_loss": 1.7236199378967285, + "eval_msmarco_pairs_runtime": 1.524, + "eval_msmarco_pairs_samples_per_second": 83.991, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_nq_pairs_loss": 2.0736727714538574, + "eval_nq_pairs_runtime": 2.901, + "eval_nq_pairs_samples_per_second": 44.123, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_trivia_pairs_loss": 1.4644691944122314, + "eval_trivia_pairs_runtime": 3.4467, + "eval_trivia_pairs_samples_per_second": 37.137, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_gooaq_pairs_loss": 1.197310209274292, + "eval_gooaq_pairs_runtime": 0.9409, + "eval_gooaq_pairs_samples_per_second": 136.041, + "eval_gooaq_pairs_steps_per_second": 1.063, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_paws-pos_loss": 0.04723614454269409, + "eval_paws-pos_runtime": 0.6946, + "eval_paws-pos_samples_per_second": 184.277, + "eval_paws-pos_steps_per_second": 1.44, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_global_dataset_loss": 0.8761696219444275, + "eval_global_dataset_runtime": 13.3489, + "eval_global_dataset_samples_per_second": 31.164, + "eval_global_dataset_steps_per_second": 0.3, + "step": 420 + }, + { + "epoch": 0.4331275720164609, + "grad_norm": 11.209942817687988, + "learning_rate": 1.5192107995846313e-05, + "loss": 0.75, + "step": 421 + }, + { + "epoch": 0.43415637860082307, + "grad_norm": 12.36668872833252, + "learning_rate": 1.5228452751817235e-05, + "loss": 1.0858, + "step": 422 + }, + { + "epoch": 0.4351851851851852, + "grad_norm": 20.955825805664062, + "learning_rate": 1.526479750778816e-05, + "loss": 2.2561, + "step": 423 + }, + { + "epoch": 0.43621399176954734, + "grad_norm": 6.835966110229492, + "learning_rate": 1.5301142263759087e-05, + "loss": 0.2822, + "step": 424 + }, + { + "epoch": 0.43724279835390945, + "grad_norm": 14.905786514282227, + "learning_rate": 1.5337487019730007e-05, + "loss": 1.286, + "step": 425 + }, + { + "epoch": 0.4382716049382716, + "grad_norm": 16.917980194091797, + "learning_rate": 1.537383177570093e-05, + "loss": 1.6777, + "step": 426 + }, + { + "epoch": 0.43930041152263377, + "grad_norm": 16.51511573791504, + "learning_rate": 1.541017653167186e-05, + "loss": 1.609, + "step": 427 + }, + { + "epoch": 0.4403292181069959, + "grad_norm": 12.164166450500488, + "learning_rate": 1.544652128764278e-05, + "loss": 0.8033, + "step": 428 + }, + { + "epoch": 0.44135802469135804, + "grad_norm": 23.55919647216797, + "learning_rate": 1.5482866043613707e-05, + "loss": 3.845, + "step": 429 + }, + { + "epoch": 0.44238683127572015, + "grad_norm": 12.458250999450684, + "learning_rate": 1.551921079958463e-05, + "loss": 1.0592, + "step": 430 + }, + { + "epoch": 0.4434156378600823, + "grad_norm": 11.092578887939453, + "learning_rate": 1.5555555555555555e-05, + "loss": 0.7032, + "step": 431 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 1.7349342107772827, + "learning_rate": 1.559190031152648e-05, + "loss": 0.0603, + "step": 432 + }, + { + "epoch": 0.4454732510288066, + "grad_norm": 18.856273651123047, + "learning_rate": 1.5628245067497403e-05, + "loss": 2.1237, + "step": 433 + }, + { + "epoch": 0.44650205761316875, + "grad_norm": 14.356558799743652, + "learning_rate": 1.5664589823468327e-05, + "loss": 1.3715, + "step": 434 + }, + { + "epoch": 0.44753086419753085, + "grad_norm": 17.297388076782227, + "learning_rate": 1.570093457943925e-05, + "loss": 2.1316, + "step": 435 + }, + { + "epoch": 0.448559670781893, + "grad_norm": 14.070610046386719, + "learning_rate": 1.5737279335410175e-05, + "loss": 1.2195, + "step": 436 + }, + { + "epoch": 0.4495884773662551, + "grad_norm": 19.30897331237793, + "learning_rate": 1.57736240913811e-05, + "loss": 3.4846, + "step": 437 + }, + { + "epoch": 0.4506172839506173, + "grad_norm": 14.064212799072266, + "learning_rate": 1.5809968847352023e-05, + "loss": 2.1621, + "step": 438 + }, + { + "epoch": 0.45164609053497945, + "grad_norm": 21.56043815612793, + "learning_rate": 1.5846313603322947e-05, + "loss": 2.6488, + "step": 439 + }, + { + "epoch": 0.45267489711934156, + "grad_norm": 13.354293823242188, + "learning_rate": 1.588265835929387e-05, + "loss": 1.1381, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_Qnli-dev_cosine_accuracy": 0.6796875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8328311443328857, + "eval_Qnli-dev_cosine_ap": 0.7086015466946073, + "eval_Qnli-dev_cosine_f1": 0.684297520661157, + "eval_Qnli-dev_cosine_f1_threshold": 0.7668030858039856, + "eval_Qnli-dev_cosine_precision": 0.5609756097560976, + "eval_Qnli-dev_cosine_recall": 0.8771186440677966, + "eval_Qnli-dev_dot_accuracy": 0.638671875, + "eval_Qnli-dev_dot_accuracy_threshold": 458.50982666015625, + "eval_Qnli-dev_dot_ap": 0.5955959473763655, + "eval_Qnli-dev_dot_f1": 0.6643598615916955, + "eval_Qnli-dev_dot_f1_threshold": 421.7713623046875, + "eval_Qnli-dev_dot_precision": 0.5614035087719298, + "eval_Qnli-dev_dot_recall": 0.8135593220338984, + "eval_Qnli-dev_euclidean_accuracy": 0.681640625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.273839950561523, + "eval_Qnli-dev_euclidean_ap": 0.7198905948186887, + "eval_Qnli-dev_euclidean_f1": 0.686106346483705, + "eval_Qnli-dev_euclidean_f1_threshold": 15.542667388916016, + "eval_Qnli-dev_euclidean_precision": 0.5763688760806917, + "eval_Qnli-dev_euclidean_recall": 0.847457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.67578125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 271.1224060058594, + "eval_Qnli-dev_manhattan_ap": 0.7163103084602304, + "eval_Qnli-dev_manhattan_f1": 0.689655172413793, + "eval_Qnli-dev_manhattan_f1_threshold": 314.0755310058594, + "eval_Qnli-dev_manhattan_precision": 0.5813953488372093, + "eval_Qnli-dev_manhattan_recall": 0.847457627118644, + "eval_Qnli-dev_max_accuracy": 0.681640625, + "eval_Qnli-dev_max_accuracy_threshold": 458.50982666015625, + "eval_Qnli-dev_max_ap": 0.7198905948186887, + "eval_Qnli-dev_max_f1": 0.689655172413793, + "eval_Qnli-dev_max_f1_threshold": 421.7713623046875, + "eval_Qnli-dev_max_precision": 0.5813953488372093, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.712890625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9271770715713501, + "eval_allNLI-dev_cosine_ap": 0.5491311356331465, + "eval_allNLI-dev_cosine_f1": 0.5708245243128964, + "eval_allNLI-dev_cosine_f1_threshold": 0.8302508592605591, + "eval_allNLI-dev_cosine_precision": 0.45, + "eval_allNLI-dev_cosine_recall": 0.7803468208092486, + "eval_allNLI-dev_dot_accuracy": 0.669921875, + "eval_allNLI-dev_dot_accuracy_threshold": 526.2755126953125, + "eval_allNLI-dev_dot_ap": 0.4493470756775462, + "eval_allNLI-dev_dot_f1": 0.5302782324058919, + "eval_allNLI-dev_dot_f1_threshold": 409.7859802246094, + "eval_allNLI-dev_dot_precision": 0.3698630136986301, + "eval_allNLI-dev_dot_recall": 0.9364161849710982, + "eval_allNLI-dev_euclidean_accuracy": 0.712890625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 8.942924499511719, + "eval_allNLI-dev_euclidean_ap": 0.5529455148329905, + "eval_allNLI-dev_euclidean_f1": 0.5751633986928105, + "eval_allNLI-dev_euclidean_f1_threshold": 13.387319564819336, + "eval_allNLI-dev_euclidean_precision": 0.46153846153846156, + "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, + "eval_allNLI-dev_manhattan_accuracy": 0.71875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 201.3372039794922, + "eval_allNLI-dev_manhattan_ap": 0.5535429305922366, + "eval_allNLI-dev_manhattan_f1": 0.5675057208237987, + "eval_allNLI-dev_manhattan_f1_threshold": 263.4776611328125, + "eval_allNLI-dev_manhattan_precision": 0.4696969696969697, + "eval_allNLI-dev_manhattan_recall": 0.7167630057803468, + "eval_allNLI-dev_max_accuracy": 0.71875, + "eval_allNLI-dev_max_accuracy_threshold": 526.2755126953125, + "eval_allNLI-dev_max_ap": 0.5535429305922366, + "eval_allNLI-dev_max_f1": 0.5751633986928105, + "eval_allNLI-dev_max_f1_threshold": 409.7859802246094, + "eval_allNLI-dev_max_precision": 0.4696969696969697, + "eval_allNLI-dev_max_recall": 0.9364161849710982, + "eval_sequential_score": 0.7198905948186887, + "eval_sts-test_pearson_cosine": 0.7695750855183039, + "eval_sts-test_pearson_dot": 0.7238414788807679, + "eval_sts-test_pearson_euclidean": 0.7903775285225014, + "eval_sts-test_pearson_manhattan": 0.7850145963227658, + "eval_sts-test_pearson_max": 0.7903775285225014, + "eval_sts-test_spearman_cosine": 0.7907903212875741, + "eval_sts-test_spearman_dot": 0.7040900777418432, + "eval_sts-test_spearman_euclidean": 0.7829523168599161, + "eval_sts-test_spearman_manhattan": 0.7755189085864977, + "eval_sts-test_spearman_max": 0.7907903212875741, + "eval_vitaminc-pairs_loss": 3.1597630977630615, + "eval_vitaminc-pairs_runtime": 3.1844, + "eval_vitaminc-pairs_samples_per_second": 40.196, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_negation-triplets_loss": 1.5735217332839966, + "eval_negation-triplets_runtime": 0.7333, + "eval_negation-triplets_samples_per_second": 174.547, + "eval_negation-triplets_steps_per_second": 1.364, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_scitail-pairs-pos_loss": 0.23437997698783875, + "eval_scitail-pairs-pos_runtime": 0.8055, + "eval_scitail-pairs-pos_samples_per_second": 158.909, + "eval_scitail-pairs-pos_steps_per_second": 1.241, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_scitail-pairs-qa_loss": 0.026873519644141197, + "eval_scitail-pairs-qa_runtime": 0.5709, + "eval_scitail-pairs-qa_samples_per_second": 224.215, + "eval_scitail-pairs-qa_steps_per_second": 1.752, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_xsum-pairs_loss": 1.0596333742141724, + "eval_xsum-pairs_runtime": 3.0101, + "eval_xsum-pairs_samples_per_second": 42.523, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_sciq_pairs_loss": 0.14231224358081818, + "eval_sciq_pairs_runtime": 3.4147, + "eval_sciq_pairs_samples_per_second": 37.485, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_qasc_pairs_loss": 0.8660905361175537, + "eval_qasc_pairs_runtime": 0.5984, + "eval_qasc_pairs_samples_per_second": 213.886, + "eval_qasc_pairs_steps_per_second": 1.671, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_openbookqa_pairs_loss": 1.5507510900497437, + "eval_openbookqa_pairs_runtime": 0.576, + "eval_openbookqa_pairs_samples_per_second": 222.233, + "eval_openbookqa_pairs_steps_per_second": 1.736, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_msmarco_pairs_loss": 1.6068974733352661, + "eval_msmarco_pairs_runtime": 1.5129, + "eval_msmarco_pairs_samples_per_second": 84.608, + "eval_msmarco_pairs_steps_per_second": 0.661, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_nq_pairs_loss": 2.067472457885742, + "eval_nq_pairs_runtime": 2.8922, + "eval_nq_pairs_samples_per_second": 44.258, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_trivia_pairs_loss": 1.4165655374526978, + "eval_trivia_pairs_runtime": 3.4314, + "eval_trivia_pairs_samples_per_second": 37.303, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_gooaq_pairs_loss": 1.204696536064148, + "eval_gooaq_pairs_runtime": 0.9383, + "eval_gooaq_pairs_samples_per_second": 136.423, + "eval_gooaq_pairs_steps_per_second": 1.066, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_paws-pos_loss": 0.04588289558887482, + "eval_paws-pos_runtime": 0.6831, + "eval_paws-pos_samples_per_second": 187.395, + "eval_paws-pos_steps_per_second": 1.464, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_global_dataset_loss": 0.8645310997962952, + "eval_global_dataset_runtime": 13.3758, + "eval_global_dataset_samples_per_second": 31.101, + "eval_global_dataset_steps_per_second": 0.299, + "step": 440 + }, + { + "epoch": 0.4537037037037037, + "grad_norm": 14.464020729064941, + "learning_rate": 1.5919003115264795e-05, + "loss": 1.7843, + "step": 441 + }, + { + "epoch": 0.4547325102880658, + "grad_norm": 15.444217681884766, + "learning_rate": 1.595534787123572e-05, + "loss": 1.6954, + "step": 442 + }, + { + "epoch": 0.455761316872428, + "grad_norm": 10.515376091003418, + "learning_rate": 1.5991692627206643e-05, + "loss": 0.8673, + "step": 443 + }, + { + "epoch": 0.4567901234567901, + "grad_norm": 10.527128219604492, + "learning_rate": 1.6028037383177567e-05, + "loss": 0.8696, + "step": 444 + }, + { + "epoch": 0.45781893004115226, + "grad_norm": 13.480452537536621, + "learning_rate": 1.6064382139148495e-05, + "loss": 1.5461, + "step": 445 + }, + { + "epoch": 0.4588477366255144, + "grad_norm": 11.253717422485352, + "learning_rate": 1.6100726895119415e-05, + "loss": 0.9683, + "step": 446 + }, + { + "epoch": 0.45987654320987653, + "grad_norm": 12.138679504394531, + "learning_rate": 1.6137071651090343e-05, + "loss": 1.2983, + "step": 447 + }, + { + "epoch": 0.4609053497942387, + "grad_norm": 2.5345211029052734, + "learning_rate": 1.6173416407061267e-05, + "loss": 0.0942, + "step": 448 + }, + { + "epoch": 0.4619341563786008, + "grad_norm": 10.980514526367188, + "learning_rate": 1.6209761163032187e-05, + "loss": 0.8264, + "step": 449 + }, + { + "epoch": 0.46296296296296297, + "grad_norm": 16.59669303894043, + "learning_rate": 1.6246105919003115e-05, + "loss": 2.1522, + "step": 450 + }, + { + "epoch": 0.46399176954732513, + "grad_norm": 21.501604080200195, + "learning_rate": 1.628245067497404e-05, + "loss": 2.6668, + "step": 451 + }, + { + "epoch": 0.46502057613168724, + "grad_norm": 11.803515434265137, + "learning_rate": 1.6318795430944963e-05, + "loss": 0.9999, + "step": 452 + }, + { + "epoch": 0.4660493827160494, + "grad_norm": 13.230558395385742, + "learning_rate": 1.6355140186915887e-05, + "loss": 0.9551, + "step": 453 + }, + { + "epoch": 0.4670781893004115, + "grad_norm": 11.019618034362793, + "learning_rate": 1.639148494288681e-05, + "loss": 0.8174, + "step": 454 + }, + { + "epoch": 0.46810699588477367, + "grad_norm": 14.335307121276855, + "learning_rate": 1.6427829698857735e-05, + "loss": 1.6169, + "step": 455 + }, + { + "epoch": 0.4691358024691358, + "grad_norm": 5.958987236022949, + "learning_rate": 1.646417445482866e-05, + "loss": 0.2584, + "step": 456 + }, + { + "epoch": 0.47016460905349794, + "grad_norm": 14.919219970703125, + "learning_rate": 1.6500519210799583e-05, + "loss": 1.2947, + "step": 457 + }, + { + "epoch": 0.4711934156378601, + "grad_norm": 12.892438888549805, + "learning_rate": 1.6536863966770507e-05, + "loss": 1.0283, + "step": 458 + }, + { + "epoch": 0.4722222222222222, + "grad_norm": 12.579314231872559, + "learning_rate": 1.657320872274143e-05, + "loss": 1.0379, + "step": 459 + }, + { + "epoch": 0.4732510288065844, + "grad_norm": 17.32071876525879, + "learning_rate": 1.6609553478712355e-05, + "loss": 2.4063, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_Qnli-dev_cosine_accuracy": 0.66015625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8584603071212769, + "eval_Qnli-dev_cosine_ap": 0.6987965309483122, + "eval_Qnli-dev_cosine_f1": 0.6860068259385665, + "eval_Qnli-dev_cosine_f1_threshold": 0.767835259437561, + "eval_Qnli-dev_cosine_precision": 0.5742857142857143, + "eval_Qnli-dev_cosine_recall": 0.8516949152542372, + "eval_Qnli-dev_dot_accuracy": 0.64453125, + "eval_Qnli-dev_dot_accuracy_threshold": 446.875, + "eval_Qnli-dev_dot_ap": 0.5901482043145834, + "eval_Qnli-dev_dot_f1": 0.6643109540636043, + "eval_Qnli-dev_dot_f1_threshold": 406.9656982421875, + "eval_Qnli-dev_dot_precision": 0.5696969696969697, + "eval_Qnli-dev_dot_recall": 0.7966101694915254, + "eval_Qnli-dev_euclidean_accuracy": 0.66796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 11.938894271850586, + "eval_Qnli-dev_euclidean_ap": 0.7074711630770054, + "eval_Qnli-dev_euclidean_f1": 0.6917808219178082, + "eval_Qnli-dev_euclidean_f1_threshold": 15.646432876586914, + "eval_Qnli-dev_euclidean_precision": 0.5804597701149425, + "eval_Qnli-dev_euclidean_recall": 0.8559322033898306, + "eval_Qnli-dev_manhattan_accuracy": 0.6640625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 258.43310546875, + "eval_Qnli-dev_manhattan_ap": 0.7074831376971712, + "eval_Qnli-dev_manhattan_f1": 0.6837606837606838, + "eval_Qnli-dev_manhattan_f1_threshold": 317.3417053222656, + "eval_Qnli-dev_manhattan_precision": 0.5730659025787965, + "eval_Qnli-dev_manhattan_recall": 0.847457627118644, + "eval_Qnli-dev_max_accuracy": 0.66796875, + "eval_Qnli-dev_max_accuracy_threshold": 446.875, + "eval_Qnli-dev_max_ap": 0.7074831376971712, + "eval_Qnli-dev_max_f1": 0.6917808219178082, + "eval_Qnli-dev_max_f1_threshold": 406.9656982421875, + "eval_Qnli-dev_max_precision": 0.5804597701149425, + "eval_Qnli-dev_max_recall": 0.8559322033898306, + "eval_allNLI-dev_cosine_accuracy": 0.71484375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9075762033462524, + "eval_allNLI-dev_cosine_ap": 0.5556209722685957, + "eval_allNLI-dev_cosine_f1": 0.5688487584650113, + "eval_allNLI-dev_cosine_f1_threshold": 0.822675347328186, + "eval_allNLI-dev_cosine_precision": 0.4666666666666667, + "eval_allNLI-dev_cosine_recall": 0.7283236994219653, + "eval_allNLI-dev_dot_accuracy": 0.666015625, + "eval_allNLI-dev_dot_accuracy_threshold": 479.59765625, + "eval_allNLI-dev_dot_ap": 0.4395722804668881, + "eval_allNLI-dev_dot_f1": 0.5365853658536586, + "eval_allNLI-dev_dot_f1_threshold": 378.3732604980469, + "eval_allNLI-dev_dot_precision": 0.38403990024937656, + "eval_allNLI-dev_dot_recall": 0.8901734104046243, + "eval_allNLI-dev_euclidean_accuracy": 0.7265625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.884578704833984, + "eval_allNLI-dev_euclidean_ap": 0.5607678101966321, + "eval_allNLI-dev_euclidean_f1": 0.5841121495327103, + "eval_allNLI-dev_euclidean_f1_threshold": 13.222391128540039, + "eval_allNLI-dev_euclidean_precision": 0.49019607843137253, + "eval_allNLI-dev_euclidean_recall": 0.7225433526011561, + "eval_allNLI-dev_manhattan_accuracy": 0.71875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 207.45559692382812, + "eval_allNLI-dev_manhattan_ap": 0.5594530995989421, + "eval_allNLI-dev_manhattan_f1": 0.5664062499999999, + "eval_allNLI-dev_manhattan_f1_threshold": 298.21136474609375, + "eval_allNLI-dev_manhattan_precision": 0.4277286135693215, + "eval_allNLI-dev_manhattan_recall": 0.838150289017341, + "eval_allNLI-dev_max_accuracy": 0.7265625, + "eval_allNLI-dev_max_accuracy_threshold": 479.59765625, + "eval_allNLI-dev_max_ap": 0.5607678101966321, + "eval_allNLI-dev_max_f1": 0.5841121495327103, + "eval_allNLI-dev_max_f1_threshold": 378.3732604980469, + "eval_allNLI-dev_max_precision": 0.49019607843137253, + "eval_allNLI-dev_max_recall": 0.8901734104046243, + "eval_sequential_score": 0.7074831376971712, + "eval_sts-test_pearson_cosine": 0.7825801667596759, + "eval_sts-test_pearson_dot": 0.7405682776064579, + "eval_sts-test_pearson_euclidean": 0.8045260928771718, + "eval_sts-test_pearson_manhattan": 0.8000832846763656, + "eval_sts-test_pearson_max": 0.8045260928771718, + "eval_sts-test_spearman_cosine": 0.8065376636535482, + "eval_sts-test_spearman_dot": 0.7210651262128288, + "eval_sts-test_spearman_euclidean": 0.7970397901896217, + "eval_sts-test_spearman_manhattan": 0.790139056180545, + "eval_sts-test_spearman_max": 0.8065376636535482, + "eval_vitaminc-pairs_loss": 3.132262945175171, + "eval_vitaminc-pairs_runtime": 3.1567, + "eval_vitaminc-pairs_samples_per_second": 40.548, + "eval_vitaminc-pairs_steps_per_second": 0.317, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_negation-triplets_loss": 1.4925687313079834, + "eval_negation-triplets_runtime": 0.7314, + "eval_negation-triplets_samples_per_second": 175.004, + "eval_negation-triplets_steps_per_second": 1.367, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_scitail-pairs-pos_loss": 0.20003551244735718, + "eval_scitail-pairs-pos_runtime": 0.7903, + "eval_scitail-pairs-pos_samples_per_second": 161.967, + "eval_scitail-pairs-pos_steps_per_second": 1.265, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_scitail-pairs-qa_loss": 0.019925443455576897, + "eval_scitail-pairs-qa_runtime": 0.5973, + "eval_scitail-pairs-qa_samples_per_second": 214.291, + "eval_scitail-pairs-qa_steps_per_second": 1.674, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_xsum-pairs_loss": 1.011654019355774, + "eval_xsum-pairs_runtime": 3.0219, + "eval_xsum-pairs_samples_per_second": 42.358, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_sciq_pairs_loss": 0.1401093304157257, + "eval_sciq_pairs_runtime": 3.4024, + "eval_sciq_pairs_samples_per_second": 37.621, + "eval_sciq_pairs_steps_per_second": 0.294, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_qasc_pairs_loss": 0.8895432949066162, + "eval_qasc_pairs_runtime": 0.5956, + "eval_qasc_pairs_samples_per_second": 214.909, + "eval_qasc_pairs_steps_per_second": 1.679, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_openbookqa_pairs_loss": 1.5750139951705933, + "eval_openbookqa_pairs_runtime": 0.5749, + "eval_openbookqa_pairs_samples_per_second": 222.663, + "eval_openbookqa_pairs_steps_per_second": 1.74, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_msmarco_pairs_loss": 1.5957564115524292, + "eval_msmarco_pairs_runtime": 1.5166, + "eval_msmarco_pairs_samples_per_second": 84.401, + "eval_msmarco_pairs_steps_per_second": 0.659, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_nq_pairs_loss": 1.8501969575881958, + "eval_nq_pairs_runtime": 2.9017, + "eval_nq_pairs_samples_per_second": 44.113, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_trivia_pairs_loss": 1.3718889951705933, + "eval_trivia_pairs_runtime": 3.4316, + "eval_trivia_pairs_samples_per_second": 37.3, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_gooaq_pairs_loss": 1.0226097106933594, + "eval_gooaq_pairs_runtime": 0.9449, + "eval_gooaq_pairs_samples_per_second": 135.461, + "eval_gooaq_pairs_steps_per_second": 1.058, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_paws-pos_loss": 0.04436105117201805, + "eval_paws-pos_runtime": 0.6816, + "eval_paws-pos_samples_per_second": 187.781, + "eval_paws-pos_steps_per_second": 1.467, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_global_dataset_loss": 0.8342341184616089, + "eval_global_dataset_runtime": 13.3662, + "eval_global_dataset_samples_per_second": 31.123, + "eval_global_dataset_steps_per_second": 0.299, + "step": 460 + }, + { + "epoch": 0.4742798353909465, + "grad_norm": 20.145654678344727, + "learning_rate": 1.664589823468328e-05, + "loss": 3.1972, + "step": 461 + }, + { + "epoch": 0.47530864197530864, + "grad_norm": 10.836761474609375, + "learning_rate": 1.6682242990654203e-05, + "loss": 0.6914, + "step": 462 + }, + { + "epoch": 0.4763374485596708, + "grad_norm": 13.806187629699707, + "learning_rate": 1.671858774662513e-05, + "loss": 2.1495, + "step": 463 + }, + { + "epoch": 0.4773662551440329, + "grad_norm": 15.314764022827148, + "learning_rate": 1.675493250259605e-05, + "loss": 1.9195, + "step": 464 + }, + { + "epoch": 0.4783950617283951, + "grad_norm": 2.3654873371124268, + "learning_rate": 1.6791277258566975e-05, + "loss": 0.0819, + "step": 465 + }, + { + "epoch": 0.4794238683127572, + "grad_norm": 6.35114860534668, + "learning_rate": 1.6827622014537902e-05, + "loss": 0.2882, + "step": 466 + }, + { + "epoch": 0.48045267489711935, + "grad_norm": 13.575540542602539, + "learning_rate": 1.6863966770508823e-05, + "loss": 1.3187, + "step": 467 + }, + { + "epoch": 0.48148148148148145, + "grad_norm": 13.726608276367188, + "learning_rate": 1.690031152647975e-05, + "loss": 2.0175, + "step": 468 + }, + { + "epoch": 0.4825102880658436, + "grad_norm": 12.422574996948242, + "learning_rate": 1.6936656282450674e-05, + "loss": 1.1298, + "step": 469 + }, + { + "epoch": 0.4835390946502058, + "grad_norm": 10.693941116333008, + "learning_rate": 1.69730010384216e-05, + "loss": 0.751, + "step": 470 + }, + { + "epoch": 0.4845679012345679, + "grad_norm": 17.281755447387695, + "learning_rate": 1.7009345794392523e-05, + "loss": 1.7641, + "step": 471 + }, + { + "epoch": 0.48559670781893005, + "grad_norm": 13.825311660766602, + "learning_rate": 1.7045690550363447e-05, + "loss": 1.2676, + "step": 472 + }, + { + "epoch": 0.48662551440329216, + "grad_norm": 13.023504257202148, + "learning_rate": 1.708203530633437e-05, + "loss": 1.2802, + "step": 473 + }, + { + "epoch": 0.4876543209876543, + "grad_norm": 6.976680755615234, + "learning_rate": 1.7118380062305295e-05, + "loss": 0.2798, + "step": 474 + }, + { + "epoch": 0.4886831275720165, + "grad_norm": 12.474639892578125, + "learning_rate": 1.715472481827622e-05, + "loss": 1.786, + "step": 475 + }, + { + "epoch": 0.4897119341563786, + "grad_norm": 11.611064910888672, + "learning_rate": 1.7191069574247143e-05, + "loss": 0.9421, + "step": 476 + }, + { + "epoch": 0.49074074074074076, + "grad_norm": 17.27467155456543, + "learning_rate": 1.7227414330218067e-05, + "loss": 1.8988, + "step": 477 + }, + { + "epoch": 0.49176954732510286, + "grad_norm": 11.986361503601074, + "learning_rate": 1.726375908618899e-05, + "loss": 1.0397, + "step": 478 + }, + { + "epoch": 0.492798353909465, + "grad_norm": 19.697477340698242, + "learning_rate": 1.7300103842159915e-05, + "loss": 2.2289, + "step": 479 + }, + { + "epoch": 0.49382716049382713, + "grad_norm": 11.965368270874023, + "learning_rate": 1.733644859813084e-05, + "loss": 0.8923, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_Qnli-dev_cosine_accuracy": 0.677734375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8423784971237183, + "eval_Qnli-dev_cosine_ap": 0.7145917918948612, + "eval_Qnli-dev_cosine_f1": 0.6890459363957597, + "eval_Qnli-dev_cosine_f1_threshold": 0.7728449106216431, + "eval_Qnli-dev_cosine_precision": 0.5909090909090909, + "eval_Qnli-dev_cosine_recall": 0.826271186440678, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 412.736083984375, + "eval_Qnli-dev_dot_ap": 0.6183902376998758, + "eval_Qnli-dev_dot_f1": 0.673040152963671, + "eval_Qnli-dev_dot_f1_threshold": 410.0682373046875, + "eval_Qnli-dev_dot_precision": 0.6132404181184669, + "eval_Qnli-dev_dot_recall": 0.7457627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.6796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.48289966583252, + "eval_Qnli-dev_euclidean_ap": 0.7218061519598871, + "eval_Qnli-dev_euclidean_f1": 0.6889279437609841, + "eval_Qnli-dev_euclidean_f1_threshold": 15.510814666748047, + "eval_Qnli-dev_euclidean_precision": 0.5885885885885885, + "eval_Qnli-dev_euclidean_recall": 0.8305084745762712, + "eval_Qnli-dev_manhattan_accuracy": 0.6796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 277.7557678222656, + "eval_Qnli-dev_manhattan_ap": 0.7243976667802744, + "eval_Qnli-dev_manhattan_f1": 0.6917808219178082, + "eval_Qnli-dev_manhattan_f1_threshold": 320.653564453125, + "eval_Qnli-dev_manhattan_precision": 0.5804597701149425, + "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, + "eval_Qnli-dev_max_accuracy": 0.6796875, + "eval_Qnli-dev_max_accuracy_threshold": 412.736083984375, + "eval_Qnli-dev_max_ap": 0.7243976667802744, + "eval_Qnli-dev_max_f1": 0.6917808219178082, + "eval_Qnli-dev_max_f1_threshold": 410.0682373046875, + "eval_Qnli-dev_max_precision": 0.6132404181184669, + "eval_Qnli-dev_max_recall": 0.8559322033898306, + "eval_allNLI-dev_cosine_accuracy": 0.712890625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9155895113945007, + "eval_allNLI-dev_cosine_ap": 0.5587323061807457, + "eval_allNLI-dev_cosine_f1": 0.569377990430622, + "eval_allNLI-dev_cosine_f1_threshold": 0.8306180238723755, + "eval_allNLI-dev_cosine_precision": 0.4857142857142857, + "eval_allNLI-dev_cosine_recall": 0.6878612716763006, + "eval_allNLI-dev_dot_accuracy": 0.669921875, + "eval_allNLI-dev_dot_accuracy_threshold": 486.188232421875, + "eval_allNLI-dev_dot_ap": 0.44895440516126245, + "eval_allNLI-dev_dot_f1": 0.5326633165829145, + "eval_allNLI-dev_dot_f1_threshold": 373.0961608886719, + "eval_allNLI-dev_dot_precision": 0.375, + "eval_allNLI-dev_dot_recall": 0.9190751445086706, + "eval_allNLI-dev_euclidean_accuracy": 0.720703125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.15213680267334, + "eval_allNLI-dev_euclidean_ap": 0.5670459225360986, + "eval_allNLI-dev_euclidean_f1": 0.5797101449275361, + "eval_allNLI-dev_euclidean_f1_threshold": 13.239068984985352, + "eval_allNLI-dev_euclidean_precision": 0.4979253112033195, + "eval_allNLI-dev_euclidean_recall": 0.6936416184971098, + "eval_allNLI-dev_manhattan_accuracy": 0.720703125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 198.6392822265625, + "eval_allNLI-dev_manhattan_ap": 0.5637178226555747, + "eval_allNLI-dev_manhattan_f1": 0.569620253164557, + "eval_allNLI-dev_manhattan_f1_threshold": 287.952392578125, + "eval_allNLI-dev_manhattan_precision": 0.4485049833887043, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.720703125, + "eval_allNLI-dev_max_accuracy_threshold": 486.188232421875, + "eval_allNLI-dev_max_ap": 0.5670459225360986, + "eval_allNLI-dev_max_f1": 0.5797101449275361, + "eval_allNLI-dev_max_f1_threshold": 373.0961608886719, + "eval_allNLI-dev_max_precision": 0.4979253112033195, + "eval_allNLI-dev_max_recall": 0.9190751445086706, + "eval_sequential_score": 0.7243976667802744, + "eval_sts-test_pearson_cosine": 0.7891034120839744, + "eval_sts-test_pearson_dot": 0.7520122002590104, + "eval_sts-test_pearson_euclidean": 0.8084749326758871, + "eval_sts-test_pearson_manhattan": 0.8035797835971765, + "eval_sts-test_pearson_max": 0.8084749326758871, + "eval_sts-test_spearman_cosine": 0.8092891054576755, + "eval_sts-test_spearman_dot": 0.729727493626578, + "eval_sts-test_spearman_euclidean": 0.7991726353075358, + "eval_sts-test_spearman_manhattan": 0.7930649384015762, + "eval_sts-test_spearman_max": 0.8092891054576755, + "eval_vitaminc-pairs_loss": 2.9393234252929688, + "eval_vitaminc-pairs_runtime": 3.181, + "eval_vitaminc-pairs_samples_per_second": 40.239, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_negation-triplets_loss": 1.4414068460464478, + "eval_negation-triplets_runtime": 0.752, + "eval_negation-triplets_samples_per_second": 170.205, + "eval_negation-triplets_steps_per_second": 1.33, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_scitail-pairs-pos_loss": 0.19124868512153625, + "eval_scitail-pairs-pos_runtime": 0.801, + "eval_scitail-pairs-pos_samples_per_second": 159.801, + "eval_scitail-pairs-pos_steps_per_second": 1.248, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_scitail-pairs-qa_loss": 0.015640273690223694, + "eval_scitail-pairs-qa_runtime": 0.5674, + "eval_scitail-pairs-qa_samples_per_second": 225.595, + "eval_scitail-pairs-qa_steps_per_second": 1.762, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_xsum-pairs_loss": 0.9755306839942932, + "eval_xsum-pairs_runtime": 3.0208, + "eval_xsum-pairs_samples_per_second": 42.373, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_sciq_pairs_loss": 0.14197379350662231, + "eval_sciq_pairs_runtime": 3.4128, + "eval_sciq_pairs_samples_per_second": 37.506, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_qasc_pairs_loss": 0.8245877623558044, + "eval_qasc_pairs_runtime": 0.6116, + "eval_qasc_pairs_samples_per_second": 209.289, + "eval_qasc_pairs_steps_per_second": 1.635, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_openbookqa_pairs_loss": 1.38233482837677, + "eval_openbookqa_pairs_runtime": 0.5798, + "eval_openbookqa_pairs_samples_per_second": 220.762, + "eval_openbookqa_pairs_steps_per_second": 1.725, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_msmarco_pairs_loss": 1.583013653755188, + "eval_msmarco_pairs_runtime": 1.5116, + "eval_msmarco_pairs_samples_per_second": 84.681, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_nq_pairs_loss": 1.8946471214294434, + "eval_nq_pairs_runtime": 2.889, + "eval_nq_pairs_samples_per_second": 44.307, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_trivia_pairs_loss": 1.2537095546722412, + "eval_trivia_pairs_runtime": 3.4426, + "eval_trivia_pairs_samples_per_second": 37.181, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_gooaq_pairs_loss": 0.978269636631012, + "eval_gooaq_pairs_runtime": 0.946, + "eval_gooaq_pairs_samples_per_second": 135.311, + "eval_gooaq_pairs_steps_per_second": 1.057, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_paws-pos_loss": 0.04379463195800781, + "eval_paws-pos_runtime": 0.6992, + "eval_paws-pos_samples_per_second": 183.062, + "eval_paws-pos_steps_per_second": 1.43, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_global_dataset_loss": 0.784004807472229, + "eval_global_dataset_runtime": 13.3627, + "eval_global_dataset_samples_per_second": 31.131, + "eval_global_dataset_steps_per_second": 0.299, + "step": 480 + }, + { + "epoch": 0.4948559670781893, + "grad_norm": 15.596723556518555, + "learning_rate": 1.7372793354101766e-05, + "loss": 1.5281, + "step": 481 + }, + { + "epoch": 0.49588477366255146, + "grad_norm": 11.775968551635742, + "learning_rate": 1.7409138110072687e-05, + "loss": 1.4874, + "step": 482 + }, + { + "epoch": 0.49691358024691357, + "grad_norm": 10.956304550170898, + "learning_rate": 1.744548286604361e-05, + "loss": 1.3973, + "step": 483 + }, + { + "epoch": 0.49794238683127573, + "grad_norm": 9.66591739654541, + "learning_rate": 1.7481827622014538e-05, + "loss": 0.6967, + "step": 484 + }, + { + "epoch": 0.49897119341563784, + "grad_norm": 15.71474838256836, + "learning_rate": 1.751817237798546e-05, + "loss": 1.8954, + "step": 485 + }, + { + "epoch": 0.5, + "grad_norm": 16.29734992980957, + "learning_rate": 1.7554517133956383e-05, + "loss": 2.1666, + "step": 486 + }, + { + "epoch": 0.5010288065843621, + "grad_norm": 11.766134262084961, + "learning_rate": 1.759086188992731e-05, + "loss": 0.9414, + "step": 487 + }, + { + "epoch": 0.5020576131687243, + "grad_norm": 19.231468200683594, + "learning_rate": 1.762720664589823e-05, + "loss": 2.1697, + "step": 488 + }, + { + "epoch": 0.5030864197530864, + "grad_norm": 14.636868476867676, + "learning_rate": 1.7663551401869155e-05, + "loss": 2.2224, + "step": 489 + }, + { + "epoch": 0.5041152263374485, + "grad_norm": 9.892867088317871, + "learning_rate": 1.7699896157840082e-05, + "loss": 0.7158, + "step": 490 + }, + { + "epoch": 0.5051440329218106, + "grad_norm": 10.343125343322754, + "learning_rate": 1.7736240913811006e-05, + "loss": 0.8864, + "step": 491 + }, + { + "epoch": 0.5061728395061729, + "grad_norm": 11.846784591674805, + "learning_rate": 1.7772585669781927e-05, + "loss": 1.7706, + "step": 492 + }, + { + "epoch": 0.507201646090535, + "grad_norm": 11.437203407287598, + "learning_rate": 1.7808930425752854e-05, + "loss": 1.0602, + "step": 493 + }, + { + "epoch": 0.5082304526748971, + "grad_norm": 12.174988746643066, + "learning_rate": 1.784527518172378e-05, + "loss": 1.6377, + "step": 494 + }, + { + "epoch": 0.5092592592592593, + "grad_norm": 2.9324963092803955, + "learning_rate": 1.78816199376947e-05, + "loss": 0.1079, + "step": 495 + }, + { + "epoch": 0.5102880658436214, + "grad_norm": 9.480378150939941, + "learning_rate": 1.7917964693665626e-05, + "loss": 0.7662, + "step": 496 + }, + { + "epoch": 0.5113168724279835, + "grad_norm": 11.27574348449707, + "learning_rate": 1.795430944963655e-05, + "loss": 1.662, + "step": 497 + }, + { + "epoch": 0.5123456790123457, + "grad_norm": 11.860407829284668, + "learning_rate": 1.7990654205607474e-05, + "loss": 2.0872, + "step": 498 + }, + { + "epoch": 0.5133744855967078, + "grad_norm": 9.084991455078125, + "learning_rate": 1.80269989615784e-05, + "loss": 0.6517, + "step": 499 + }, + { + "epoch": 0.51440329218107, + "grad_norm": 10.730901718139648, + "learning_rate": 1.8063343717549322e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_Qnli-dev_cosine_accuracy": 0.69140625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8430161476135254, + "eval_Qnli-dev_cosine_ap": 0.7175942339872103, + "eval_Qnli-dev_cosine_f1": 0.6925795053003534, + "eval_Qnli-dev_cosine_f1_threshold": 0.7885958552360535, + "eval_Qnli-dev_cosine_precision": 0.593939393939394, + "eval_Qnli-dev_cosine_recall": 0.8305084745762712, + "eval_Qnli-dev_dot_accuracy": 0.654296875, + "eval_Qnli-dev_dot_accuracy_threshold": 444.99591064453125, + "eval_Qnli-dev_dot_ap": 0.6038099250184231, + "eval_Qnli-dev_dot_f1": 0.6687797147385103, + "eval_Qnli-dev_dot_f1_threshold": 398.5889892578125, + "eval_Qnli-dev_dot_precision": 0.5341772151898734, + "eval_Qnli-dev_dot_recall": 0.8940677966101694, + "eval_Qnli-dev_euclidean_accuracy": 0.6953125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.040979385375977, + "eval_Qnli-dev_euclidean_ap": 0.7271286610454261, + "eval_Qnli-dev_euclidean_f1": 0.6927175843694494, + "eval_Qnli-dev_euclidean_f1_threshold": 15.024581909179688, + "eval_Qnli-dev_euclidean_precision": 0.5963302752293578, + "eval_Qnli-dev_euclidean_recall": 0.826271186440678, + "eval_Qnli-dev_manhattan_accuracy": 0.697265625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 281.1022033691406, + "eval_Qnli-dev_manhattan_ap": 0.728019969713725, + "eval_Qnli-dev_manhattan_f1": 0.6934306569343065, + "eval_Qnli-dev_manhattan_f1_threshold": 299.29119873046875, + "eval_Qnli-dev_manhattan_precision": 0.6089743589743589, + "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, + "eval_Qnli-dev_max_accuracy": 0.697265625, + "eval_Qnli-dev_max_accuracy_threshold": 444.99591064453125, + "eval_Qnli-dev_max_ap": 0.728019969713725, + "eval_Qnli-dev_max_f1": 0.6934306569343065, + "eval_Qnli-dev_max_f1_threshold": 398.5889892578125, + "eval_Qnli-dev_max_precision": 0.6089743589743589, + "eval_Qnli-dev_max_recall": 0.8940677966101694, + "eval_allNLI-dev_cosine_accuracy": 0.72265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8899899125099182, + "eval_allNLI-dev_cosine_ap": 0.5657808168208326, + "eval_allNLI-dev_cosine_f1": 0.5817409766454352, + "eval_allNLI-dev_cosine_f1_threshold": 0.8096699714660645, + "eval_allNLI-dev_cosine_precision": 0.4597315436241611, + "eval_allNLI-dev_cosine_recall": 0.791907514450867, + "eval_allNLI-dev_dot_accuracy": 0.67578125, + "eval_allNLI-dev_dot_accuracy_threshold": 478.03387451171875, + "eval_allNLI-dev_dot_ap": 0.4579389209157686, + "eval_allNLI-dev_dot_f1": 0.5588822355289421, + "eval_allNLI-dev_dot_f1_threshold": 411.92333984375, + "eval_allNLI-dev_dot_precision": 0.4268292682926829, + "eval_allNLI-dev_dot_recall": 0.8092485549132948, + "eval_allNLI-dev_euclidean_accuracy": 0.73046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.441316604614258, + "eval_allNLI-dev_euclidean_ap": 0.5691945577806491, + "eval_allNLI-dev_euclidean_f1": 0.5872340425531914, + "eval_allNLI-dev_euclidean_f1_threshold": 14.106014251708984, + "eval_allNLI-dev_euclidean_precision": 0.46464646464646464, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.7265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 219.55010986328125, + "eval_allNLI-dev_manhattan_ap": 0.5696159330415428, + "eval_allNLI-dev_manhattan_f1": 0.5752808988764044, + "eval_allNLI-dev_manhattan_f1_threshold": 278.9423828125, + "eval_allNLI-dev_manhattan_precision": 0.47058823529411764, + "eval_allNLI-dev_manhattan_recall": 0.7398843930635838, + "eval_allNLI-dev_max_accuracy": 0.73046875, + "eval_allNLI-dev_max_accuracy_threshold": 478.03387451171875, + "eval_allNLI-dev_max_ap": 0.5696159330415428, + "eval_allNLI-dev_max_f1": 0.5872340425531914, + "eval_allNLI-dev_max_f1_threshold": 411.92333984375, + "eval_allNLI-dev_max_precision": 0.47058823529411764, + "eval_allNLI-dev_max_recall": 0.8092485549132948, + "eval_sequential_score": 0.728019969713725, + "eval_sts-test_pearson_cosine": 0.7967354149956867, + "eval_sts-test_pearson_dot": 0.7587343105275375, + "eval_sts-test_pearson_euclidean": 0.8180154478758743, + "eval_sts-test_pearson_manhattan": 0.8161849279054585, + "eval_sts-test_pearson_max": 0.8180154478758743, + "eval_sts-test_spearman_cosine": 0.8158280702696641, + "eval_sts-test_spearman_dot": 0.7368859501500076, + "eval_sts-test_spearman_euclidean": 0.8091461699287915, + "eval_sts-test_spearman_manhattan": 0.8057763999460191, + "eval_sts-test_spearman_max": 0.8158280702696641, + "eval_vitaminc-pairs_loss": 2.9438083171844482, + "eval_vitaminc-pairs_runtime": 3.189, + "eval_vitaminc-pairs_samples_per_second": 40.138, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_negation-triplets_loss": 1.3407632112503052, + "eval_negation-triplets_runtime": 0.7388, + "eval_negation-triplets_samples_per_second": 173.246, + "eval_negation-triplets_steps_per_second": 1.353, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_scitail-pairs-pos_loss": 0.2115849405527115, + "eval_scitail-pairs-pos_runtime": 0.7971, + "eval_scitail-pairs-pos_samples_per_second": 160.573, + "eval_scitail-pairs-pos_steps_per_second": 1.254, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_scitail-pairs-qa_loss": 0.018660105764865875, + "eval_scitail-pairs-qa_runtime": 0.5693, + "eval_scitail-pairs-qa_samples_per_second": 224.852, + "eval_scitail-pairs-qa_steps_per_second": 1.757, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_xsum-pairs_loss": 0.9552733898162842, + "eval_xsum-pairs_runtime": 3.0194, + "eval_xsum-pairs_samples_per_second": 42.392, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_sciq_pairs_loss": 0.13849374651908875, + "eval_sciq_pairs_runtime": 3.458, + "eval_sciq_pairs_samples_per_second": 37.015, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_qasc_pairs_loss": 0.8119698166847229, + "eval_qasc_pairs_runtime": 0.5998, + "eval_qasc_pairs_samples_per_second": 213.389, + "eval_qasc_pairs_steps_per_second": 1.667, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_openbookqa_pairs_loss": 1.512932538986206, + "eval_openbookqa_pairs_runtime": 0.5734, + "eval_openbookqa_pairs_samples_per_second": 223.22, + "eval_openbookqa_pairs_steps_per_second": 1.744, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_msmarco_pairs_loss": 1.4880919456481934, + "eval_msmarco_pairs_runtime": 1.5132, + "eval_msmarco_pairs_samples_per_second": 84.588, + "eval_msmarco_pairs_steps_per_second": 0.661, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_nq_pairs_loss": 1.750890851020813, + "eval_nq_pairs_runtime": 2.8955, + "eval_nq_pairs_samples_per_second": 44.206, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_trivia_pairs_loss": 1.3733922243118286, + "eval_trivia_pairs_runtime": 3.4378, + "eval_trivia_pairs_samples_per_second": 37.233, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_gooaq_pairs_loss": 0.938717782497406, + "eval_gooaq_pairs_runtime": 0.95, + "eval_gooaq_pairs_samples_per_second": 134.741, + "eval_gooaq_pairs_steps_per_second": 1.053, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_paws-pos_loss": 0.04237303510308266, + "eval_paws-pos_runtime": 0.6799, + "eval_paws-pos_samples_per_second": 188.26, + "eval_paws-pos_steps_per_second": 1.471, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_global_dataset_loss": 0.7602720856666565, + "eval_global_dataset_runtime": 13.3515, + "eval_global_dataset_samples_per_second": 31.157, + "eval_global_dataset_steps_per_second": 0.3, + "step": 500 + }, + { + "epoch": 0.5154320987654321, + "grad_norm": 20.222156524658203, + "learning_rate": 1.8099688473520246e-05, + "loss": 3.6159, + "step": 501 + }, + { + "epoch": 0.5164609053497943, + "grad_norm": 6.827728271484375, + "learning_rate": 1.8136033229491174e-05, + "loss": 0.2539, + "step": 502 + }, + { + "epoch": 0.5174897119341564, + "grad_norm": 11.333172798156738, + "learning_rate": 1.8172377985462095e-05, + "loss": 0.8589, + "step": 503 + }, + { + "epoch": 0.5185185185185185, + "grad_norm": 12.576927185058594, + "learning_rate": 1.820872274143302e-05, + "loss": 1.7416, + "step": 504 + }, + { + "epoch": 0.5195473251028807, + "grad_norm": 15.945344924926758, + "learning_rate": 1.8245067497403946e-05, + "loss": 1.7693, + "step": 505 + }, + { + "epoch": 0.5205761316872428, + "grad_norm": 17.440074920654297, + "learning_rate": 1.8281412253374867e-05, + "loss": 1.5639, + "step": 506 + }, + { + "epoch": 0.5216049382716049, + "grad_norm": 11.141048431396484, + "learning_rate": 1.831775700934579e-05, + "loss": 0.8746, + "step": 507 + }, + { + "epoch": 0.522633744855967, + "grad_norm": 15.599634170532227, + "learning_rate": 1.8354101765316718e-05, + "loss": 1.5769, + "step": 508 + }, + { + "epoch": 0.5236625514403292, + "grad_norm": 10.608887672424316, + "learning_rate": 1.8390446521287642e-05, + "loss": 0.6175, + "step": 509 + }, + { + "epoch": 0.5246913580246914, + "grad_norm": 11.312731742858887, + "learning_rate": 1.8426791277258563e-05, + "loss": 0.8312, + "step": 510 + }, + { + "epoch": 0.5257201646090535, + "grad_norm": 9.91249942779541, + "learning_rate": 1.846313603322949e-05, + "loss": 0.867, + "step": 511 + }, + { + "epoch": 0.5267489711934157, + "grad_norm": 11.7357816696167, + "learning_rate": 1.8499480789200414e-05, + "loss": 1.2859, + "step": 512 + }, + { + "epoch": 0.5277777777777778, + "grad_norm": 21.4658203125, + "learning_rate": 1.8535825545171335e-05, + "loss": 2.2659, + "step": 513 + }, + { + "epoch": 0.5288065843621399, + "grad_norm": 18.00661849975586, + "learning_rate": 1.8572170301142262e-05, + "loss": 1.7138, + "step": 514 + }, + { + "epoch": 0.529835390946502, + "grad_norm": 7.337871074676514, + "learning_rate": 1.8608515057113186e-05, + "loss": 0.3393, + "step": 515 + }, + { + "epoch": 0.5308641975308642, + "grad_norm": 12.568946838378906, + "learning_rate": 1.864485981308411e-05, + "loss": 0.9776, + "step": 516 + }, + { + "epoch": 0.5318930041152263, + "grad_norm": 10.954802513122559, + "learning_rate": 1.8681204569055034e-05, + "loss": 0.6971, + "step": 517 + }, + { + "epoch": 0.5329218106995884, + "grad_norm": 10.687813758850098, + "learning_rate": 1.8717549325025958e-05, + "loss": 0.6725, + "step": 518 + }, + { + "epoch": 0.5339506172839507, + "grad_norm": 11.719423294067383, + "learning_rate": 1.8753894080996882e-05, + "loss": 0.6854, + "step": 519 + }, + { + "epoch": 0.5349794238683128, + "grad_norm": 16.232799530029297, + "learning_rate": 1.879023883696781e-05, + "loss": 1.7726, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_Qnli-dev_cosine_accuracy": 0.6875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8248189687728882, + "eval_Qnli-dev_cosine_ap": 0.7196731202506679, + "eval_Qnli-dev_cosine_f1": 0.6947368421052632, + "eval_Qnli-dev_cosine_f1_threshold": 0.7689546346664429, + "eval_Qnli-dev_cosine_precision": 0.592814371257485, + "eval_Qnli-dev_cosine_recall": 0.8389830508474576, + "eval_Qnli-dev_dot_accuracy": 0.66015625, + "eval_Qnli-dev_dot_accuracy_threshold": 419.0325927734375, + "eval_Qnli-dev_dot_ap": 0.616348530166337, + "eval_Qnli-dev_dot_f1": 0.6724436741767765, + "eval_Qnli-dev_dot_f1_threshold": 393.9245300292969, + "eval_Qnli-dev_dot_precision": 0.5689149560117303, + "eval_Qnli-dev_dot_recall": 0.8220338983050848, + "eval_Qnli-dev_euclidean_accuracy": 0.693359375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.748929977416992, + "eval_Qnli-dev_euclidean_ap": 0.7309618868427656, + "eval_Qnli-dev_euclidean_f1": 0.6943942133815552, + "eval_Qnli-dev_euclidean_f1_threshold": 15.1475830078125, + "eval_Qnli-dev_euclidean_precision": 0.6056782334384858, + "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, + "eval_Qnli-dev_manhattan_accuracy": 0.685546875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 265.4633483886719, + "eval_Qnli-dev_manhattan_ap": 0.730577397962383, + "eval_Qnli-dev_manhattan_f1": 0.6940298507462687, + "eval_Qnli-dev_manhattan_f1_threshold": 303.4216613769531, + "eval_Qnli-dev_manhattan_precision": 0.62, + "eval_Qnli-dev_manhattan_recall": 0.788135593220339, + "eval_Qnli-dev_max_accuracy": 0.693359375, + "eval_Qnli-dev_max_accuracy_threshold": 419.0325927734375, + "eval_Qnli-dev_max_ap": 0.7309618868427656, + "eval_Qnli-dev_max_f1": 0.6947368421052632, + "eval_Qnli-dev_max_f1_threshold": 393.9245300292969, + "eval_Qnli-dev_max_precision": 0.62, + "eval_Qnli-dev_max_recall": 0.8389830508474576, + "eval_allNLI-dev_cosine_accuracy": 0.72265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8910384178161621, + "eval_allNLI-dev_cosine_ap": 0.5627746050790838, + "eval_allNLI-dev_cosine_f1": 0.5838509316770187, + "eval_allNLI-dev_cosine_f1_threshold": 0.8071809411048889, + "eval_allNLI-dev_cosine_precision": 0.45483870967741935, + "eval_allNLI-dev_cosine_recall": 0.815028901734104, + "eval_allNLI-dev_dot_accuracy": 0.67578125, + "eval_allNLI-dev_dot_accuracy_threshold": 508.07659912109375, + "eval_allNLI-dev_dot_ap": 0.4588019812939956, + "eval_allNLI-dev_dot_f1": 0.5421686746987953, + "eval_allNLI-dev_dot_f1_threshold": 413.8941650390625, + "eval_allNLI-dev_dot_precision": 0.4153846153846154, + "eval_allNLI-dev_dot_recall": 0.7803468208092486, + "eval_allNLI-dev_euclidean_accuracy": 0.73046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.289400100708008, + "eval_allNLI-dev_euclidean_ap": 0.5694426258019529, + "eval_allNLI-dev_euclidean_f1": 0.5922746781115881, + "eval_allNLI-dev_euclidean_f1_threshold": 14.015277862548828, + "eval_allNLI-dev_euclidean_precision": 0.4709897610921502, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 220.96963500976562, + "eval_allNLI-dev_manhattan_ap": 0.5723061411584658, + "eval_allNLI-dev_manhattan_f1": 0.5785876993166287, + "eval_allNLI-dev_manhattan_f1_threshold": 278.1605224609375, + "eval_allNLI-dev_manhattan_precision": 0.4774436090225564, + "eval_allNLI-dev_manhattan_recall": 0.7341040462427746, + "eval_allNLI-dev_max_accuracy": 0.732421875, + "eval_allNLI-dev_max_accuracy_threshold": 508.07659912109375, + "eval_allNLI-dev_max_ap": 0.5723061411584658, + "eval_allNLI-dev_max_f1": 0.5922746781115881, + "eval_allNLI-dev_max_f1_threshold": 413.8941650390625, + "eval_allNLI-dev_max_precision": 0.4774436090225564, + "eval_allNLI-dev_max_recall": 0.815028901734104, + "eval_sequential_score": 0.7309618868427656, + "eval_sts-test_pearson_cosine": 0.7944016690558295, + "eval_sts-test_pearson_dot": 0.7340676184460866, + "eval_sts-test_pearson_euclidean": 0.8206810004337891, + "eval_sts-test_pearson_manhattan": 0.8198751359187904, + "eval_sts-test_pearson_max": 0.8206810004337891, + "eval_sts-test_spearman_cosine": 0.8158374232832949, + "eval_sts-test_spearman_dot": 0.712276783998263, + "eval_sts-test_spearman_euclidean": 0.8117007509340581, + "eval_sts-test_spearman_manhattan": 0.8093512202084868, + "eval_sts-test_spearman_max": 0.8158374232832949, + "eval_vitaminc-pairs_loss": 2.9273321628570557, + "eval_vitaminc-pairs_runtime": 3.1718, + "eval_vitaminc-pairs_samples_per_second": 40.356, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_negation-triplets_loss": 1.3328778743743896, + "eval_negation-triplets_runtime": 0.7347, + "eval_negation-triplets_samples_per_second": 174.211, + "eval_negation-triplets_steps_per_second": 1.361, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_scitail-pairs-pos_loss": 0.18687528371810913, + "eval_scitail-pairs-pos_runtime": 0.8151, + "eval_scitail-pairs-pos_samples_per_second": 157.044, + "eval_scitail-pairs-pos_steps_per_second": 1.227, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_scitail-pairs-qa_loss": 0.014874367974698544, + "eval_scitail-pairs-qa_runtime": 0.5765, + "eval_scitail-pairs-qa_samples_per_second": 222.025, + "eval_scitail-pairs-qa_steps_per_second": 1.735, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_xsum-pairs_loss": 0.86911940574646, + "eval_xsum-pairs_runtime": 3.017, + "eval_xsum-pairs_samples_per_second": 42.427, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_sciq_pairs_loss": 0.14434820413589478, + "eval_sciq_pairs_runtime": 3.4284, + "eval_sciq_pairs_samples_per_second": 37.335, + "eval_sciq_pairs_steps_per_second": 0.292, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_qasc_pairs_loss": 0.7873150110244751, + "eval_qasc_pairs_runtime": 0.6008, + "eval_qasc_pairs_samples_per_second": 213.056, + "eval_qasc_pairs_steps_per_second": 1.665, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_openbookqa_pairs_loss": 1.5795769691467285, + "eval_openbookqa_pairs_runtime": 0.5771, + "eval_openbookqa_pairs_samples_per_second": 221.803, + "eval_openbookqa_pairs_steps_per_second": 1.733, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_msmarco_pairs_loss": 1.4810850620269775, + "eval_msmarco_pairs_runtime": 1.525, + "eval_msmarco_pairs_samples_per_second": 83.934, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_nq_pairs_loss": 1.7317595481872559, + "eval_nq_pairs_runtime": 2.8997, + "eval_nq_pairs_samples_per_second": 44.143, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_trivia_pairs_loss": 1.2999101877212524, + "eval_trivia_pairs_runtime": 3.4365, + "eval_trivia_pairs_samples_per_second": 37.247, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_gooaq_pairs_loss": 0.903529167175293, + "eval_gooaq_pairs_runtime": 0.9492, + "eval_gooaq_pairs_samples_per_second": 134.844, + "eval_gooaq_pairs_steps_per_second": 1.053, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_paws-pos_loss": 0.04194509983062744, + "eval_paws-pos_runtime": 0.705, + "eval_paws-pos_samples_per_second": 181.572, + "eval_paws-pos_steps_per_second": 1.419, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_global_dataset_loss": 0.7329986095428467, + "eval_global_dataset_runtime": 13.3667, + "eval_global_dataset_samples_per_second": 31.122, + "eval_global_dataset_steps_per_second": 0.299, + "step": 520 + }, + { + "epoch": 0.5360082304526749, + "grad_norm": 9.531018257141113, + "learning_rate": 1.882658359293873e-05, + "loss": 0.6841, + "step": 521 + }, + { + "epoch": 0.5370370370370371, + "grad_norm": 14.136958122253418, + "learning_rate": 1.8862928348909654e-05, + "loss": 1.4999, + "step": 522 + }, + { + "epoch": 0.5380658436213992, + "grad_norm": 16.56440544128418, + "learning_rate": 1.889927310488058e-05, + "loss": 1.8423, + "step": 523 + }, + { + "epoch": 0.5390946502057613, + "grad_norm": 18.816726684570312, + "learning_rate": 1.8935617860851502e-05, + "loss": 3.2063, + "step": 524 + }, + { + "epoch": 0.5401234567901234, + "grad_norm": 9.336271286010742, + "learning_rate": 1.8971962616822426e-05, + "loss": 0.7876, + "step": 525 + }, + { + "epoch": 0.5411522633744856, + "grad_norm": 9.695099830627441, + "learning_rate": 1.9008307372793354e-05, + "loss": 0.7463, + "step": 526 + }, + { + "epoch": 0.5421810699588477, + "grad_norm": 16.809635162353516, + "learning_rate": 1.9044652128764278e-05, + "loss": 1.317, + "step": 527 + }, + { + "epoch": 0.5432098765432098, + "grad_norm": 11.21884536743164, + "learning_rate": 1.90809968847352e-05, + "loss": 1.533, + "step": 528 + }, + { + "epoch": 0.5442386831275721, + "grad_norm": 11.746585845947266, + "learning_rate": 1.9117341640706126e-05, + "loss": 0.9414, + "step": 529 + }, + { + "epoch": 0.5452674897119342, + "grad_norm": 11.7705078125, + "learning_rate": 1.915368639667705e-05, + "loss": 0.8405, + "step": 530 + }, + { + "epoch": 0.5462962962962963, + "grad_norm": 11.811210632324219, + "learning_rate": 1.919003115264797e-05, + "loss": 1.1217, + "step": 531 + }, + { + "epoch": 0.5473251028806584, + "grad_norm": 8.906420707702637, + "learning_rate": 1.9226375908618898e-05, + "loss": 0.6404, + "step": 532 + }, + { + "epoch": 0.5483539094650206, + "grad_norm": 8.888873100280762, + "learning_rate": 1.9262720664589822e-05, + "loss": 0.6283, + "step": 533 + }, + { + "epoch": 0.5493827160493827, + "grad_norm": 2.18764591217041, + "learning_rate": 1.9299065420560746e-05, + "loss": 0.0678, + "step": 534 + }, + { + "epoch": 0.5504115226337448, + "grad_norm": 8.759835243225098, + "learning_rate": 1.933541017653167e-05, + "loss": 0.5242, + "step": 535 + }, + { + "epoch": 0.551440329218107, + "grad_norm": 18.4666748046875, + "learning_rate": 1.9371754932502594e-05, + "loss": 1.9928, + "step": 536 + }, + { + "epoch": 0.5524691358024691, + "grad_norm": 11.737098693847656, + "learning_rate": 1.9408099688473518e-05, + "loss": 0.8622, + "step": 537 + }, + { + "epoch": 0.5534979423868313, + "grad_norm": 14.750716209411621, + "learning_rate": 1.9444444444444442e-05, + "loss": 1.2746, + "step": 538 + }, + { + "epoch": 0.5545267489711934, + "grad_norm": 11.672311782836914, + "learning_rate": 1.9480789200415366e-05, + "loss": 0.7844, + "step": 539 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 12.69827651977539, + "learning_rate": 1.951713395638629e-05, + "loss": 1.041, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_Qnli-dev_cosine_accuracy": 0.677734375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7994829416275024, + "eval_Qnli-dev_cosine_ap": 0.717657619195893, + "eval_Qnli-dev_cosine_f1": 0.6919275123558485, + "eval_Qnli-dev_cosine_f1_threshold": 0.7339121103286743, + "eval_Qnli-dev_cosine_precision": 0.5660377358490566, + "eval_Qnli-dev_cosine_recall": 0.8898305084745762, + "eval_Qnli-dev_dot_accuracy": 0.68359375, + "eval_Qnli-dev_dot_accuracy_threshold": 409.82696533203125, + "eval_Qnli-dev_dot_ap": 0.6260001258234368, + "eval_Qnli-dev_dot_f1": 0.6723549488054607, + "eval_Qnli-dev_dot_f1_threshold": 380.0247802734375, + "eval_Qnli-dev_dot_precision": 0.5628571428571428, + "eval_Qnli-dev_dot_recall": 0.8347457627118644, + "eval_Qnli-dev_euclidean_accuracy": 0.6796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.7105712890625, + "eval_Qnli-dev_euclidean_ap": 0.7249308269630148, + "eval_Qnli-dev_euclidean_f1": 0.6906710310965629, + "eval_Qnli-dev_euclidean_f1_threshold": 16.837154388427734, + "eval_Qnli-dev_euclidean_precision": 0.5626666666666666, + "eval_Qnli-dev_euclidean_recall": 0.8940677966101694, + "eval_Qnli-dev_manhattan_accuracy": 0.685546875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 283.3619384765625, + "eval_Qnli-dev_manhattan_ap": 0.7235444857124764, + "eval_Qnli-dev_manhattan_f1": 0.6901172529313233, + "eval_Qnli-dev_manhattan_f1_threshold": 334.96246337890625, + "eval_Qnli-dev_manhattan_precision": 0.5706371191135734, + "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, + "eval_Qnli-dev_max_accuracy": 0.685546875, + "eval_Qnli-dev_max_accuracy_threshold": 409.82696533203125, + "eval_Qnli-dev_max_ap": 0.7249308269630148, + "eval_Qnli-dev_max_f1": 0.6919275123558485, + "eval_Qnli-dev_max_f1_threshold": 380.0247802734375, + "eval_Qnli-dev_max_precision": 0.5706371191135734, + "eval_Qnli-dev_max_recall": 0.8940677966101694, + "eval_allNLI-dev_cosine_accuracy": 0.720703125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.897883415222168, + "eval_allNLI-dev_cosine_ap": 0.559606374648369, + "eval_allNLI-dev_cosine_f1": 0.5806451612903226, + "eval_allNLI-dev_cosine_f1_threshold": 0.7978842854499817, + "eval_allNLI-dev_cosine_precision": 0.4623287671232877, + "eval_allNLI-dev_cosine_recall": 0.7803468208092486, + "eval_allNLI-dev_dot_accuracy": 0.6796875, + "eval_allNLI-dev_dot_accuracy_threshold": 470.4619140625, + "eval_allNLI-dev_dot_ap": 0.45733111663306314, + "eval_allNLI-dev_dot_f1": 0.5478841870824054, + "eval_allNLI-dev_dot_f1_threshold": 410.201171875, + "eval_allNLI-dev_dot_precision": 0.44565217391304346, + "eval_allNLI-dev_dot_recall": 0.7109826589595376, + "eval_allNLI-dev_euclidean_accuracy": 0.720703125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.147970199584961, + "eval_allNLI-dev_euclidean_ap": 0.5660004356159096, + "eval_allNLI-dev_euclidean_f1": 0.591792656587473, + "eval_allNLI-dev_euclidean_f1_threshold": 14.38115119934082, + "eval_allNLI-dev_euclidean_precision": 0.4724137931034483, + "eval_allNLI-dev_euclidean_recall": 0.791907514450867, + "eval_allNLI-dev_manhattan_accuracy": 0.71875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 207.6907958984375, + "eval_allNLI-dev_manhattan_ap": 0.56719407577034, + "eval_allNLI-dev_manhattan_f1": 0.587737843551797, + "eval_allNLI-dev_manhattan_f1_threshold": 296.9386901855469, + "eval_allNLI-dev_manhattan_precision": 0.4633333333333333, + "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, + "eval_allNLI-dev_max_accuracy": 0.720703125, + "eval_allNLI-dev_max_accuracy_threshold": 470.4619140625, + "eval_allNLI-dev_max_ap": 0.56719407577034, + "eval_allNLI-dev_max_f1": 0.591792656587473, + "eval_allNLI-dev_max_f1_threshold": 410.201171875, + "eval_allNLI-dev_max_precision": 0.4724137931034483, + "eval_allNLI-dev_max_recall": 0.8034682080924855, + "eval_sequential_score": 0.7249308269630148, + "eval_sts-test_pearson_cosine": 0.7981570472860724, + "eval_sts-test_pearson_dot": 0.7528095037431898, + "eval_sts-test_pearson_euclidean": 0.8221585052591076, + "eval_sts-test_pearson_manhattan": 0.8186301303511336, + "eval_sts-test_pearson_max": 0.8221585052591076, + "eval_sts-test_spearman_cosine": 0.820562977481181, + "eval_sts-test_spearman_dot": 0.7361068754404446, + "eval_sts-test_spearman_euclidean": 0.8129253244507724, + "eval_sts-test_spearman_manhattan": 0.8097035916406826, + "eval_sts-test_spearman_max": 0.820562977481181, + "eval_vitaminc-pairs_loss": 2.9952337741851807, + "eval_vitaminc-pairs_runtime": 3.166, + "eval_vitaminc-pairs_samples_per_second": 40.43, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_negation-triplets_loss": 1.2927732467651367, + "eval_negation-triplets_runtime": 0.7377, + "eval_negation-triplets_samples_per_second": 173.504, + "eval_negation-triplets_steps_per_second": 1.355, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_scitail-pairs-pos_loss": 0.1593194603919983, + "eval_scitail-pairs-pos_runtime": 0.8171, + "eval_scitail-pairs-pos_samples_per_second": 156.657, + "eval_scitail-pairs-pos_steps_per_second": 1.224, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_scitail-pairs-qa_loss": 0.016190586611628532, + "eval_scitail-pairs-qa_runtime": 0.5737, + "eval_scitail-pairs-qa_samples_per_second": 223.102, + "eval_scitail-pairs-qa_steps_per_second": 1.743, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_xsum-pairs_loss": 0.7690907120704651, + "eval_xsum-pairs_runtime": 3.0195, + "eval_xsum-pairs_samples_per_second": 42.392, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_sciq_pairs_loss": 0.14176045358181, + "eval_sciq_pairs_runtime": 3.4232, + "eval_sciq_pairs_samples_per_second": 37.392, + "eval_sciq_pairs_steps_per_second": 0.292, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_qasc_pairs_loss": 0.754072904586792, + "eval_qasc_pairs_runtime": 0.599, + "eval_qasc_pairs_samples_per_second": 213.697, + "eval_qasc_pairs_steps_per_second": 1.67, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_openbookqa_pairs_loss": 1.468189001083374, + "eval_openbookqa_pairs_runtime": 0.5764, + "eval_openbookqa_pairs_samples_per_second": 222.08, + "eval_openbookqa_pairs_steps_per_second": 1.735, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_msmarco_pairs_loss": 1.443937063217163, + "eval_msmarco_pairs_runtime": 1.5215, + "eval_msmarco_pairs_samples_per_second": 84.128, + "eval_msmarco_pairs_steps_per_second": 0.657, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_nq_pairs_loss": 1.7499854564666748, + "eval_nq_pairs_runtime": 2.9123, + "eval_nq_pairs_samples_per_second": 43.951, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_trivia_pairs_loss": 1.284538984298706, + "eval_trivia_pairs_runtime": 3.4581, + "eval_trivia_pairs_samples_per_second": 37.015, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_gooaq_pairs_loss": 0.8851069808006287, + "eval_gooaq_pairs_runtime": 0.9412, + "eval_gooaq_pairs_samples_per_second": 135.997, + "eval_gooaq_pairs_steps_per_second": 1.062, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_paws-pos_loss": 0.04284976050257683, + "eval_paws-pos_runtime": 0.678, + "eval_paws-pos_samples_per_second": 188.793, + "eval_paws-pos_steps_per_second": 1.475, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_global_dataset_loss": 0.7442251443862915, + "eval_global_dataset_runtime": 13.3627, + "eval_global_dataset_samples_per_second": 31.132, + "eval_global_dataset_steps_per_second": 0.299, + "step": 540 + }, + { + "epoch": 0.5565843621399177, + "grad_norm": 12.537612915039062, + "learning_rate": 1.9553478712357217e-05, + "loss": 0.9339, + "step": 541 + }, + { + "epoch": 0.5576131687242798, + "grad_norm": 1.0051987171173096, + "learning_rate": 1.9589823468328138e-05, + "loss": 0.0237, + "step": 542 + }, + { + "epoch": 0.558641975308642, + "grad_norm": 9.488045692443848, + "learning_rate": 1.9626168224299062e-05, + "loss": 0.4569, + "step": 543 + }, + { + "epoch": 0.5596707818930041, + "grad_norm": 11.0010986328125, + "learning_rate": 1.966251298026999e-05, + "loss": 0.6537, + "step": 544 + }, + { + "epoch": 0.5606995884773662, + "grad_norm": 16.367504119873047, + "learning_rate": 1.969885773624091e-05, + "loss": 1.5957, + "step": 545 + }, + { + "epoch": 0.5617283950617284, + "grad_norm": 0.978878378868103, + "learning_rate": 1.9735202492211834e-05, + "loss": 0.0269, + "step": 546 + }, + { + "epoch": 0.5627572016460906, + "grad_norm": 12.36868667602539, + "learning_rate": 1.977154724818276e-05, + "loss": 0.7591, + "step": 547 + }, + { + "epoch": 0.5637860082304527, + "grad_norm": 11.471710205078125, + "learning_rate": 1.9807892004153686e-05, + "loss": 0.7064, + "step": 548 + }, + { + "epoch": 0.5648148148148148, + "grad_norm": 15.039127349853516, + "learning_rate": 1.9844236760124606e-05, + "loss": 1.201, + "step": 549 + }, + { + "epoch": 0.565843621399177, + "grad_norm": 11.709723472595215, + "learning_rate": 1.9880581516095534e-05, + "loss": 0.7516, + "step": 550 + }, + { + "epoch": 0.5668724279835391, + "grad_norm": 2.1083853244781494, + "learning_rate": 1.9916926272066458e-05, + "loss": 0.0917, + "step": 551 + }, + { + "epoch": 0.5679012345679012, + "grad_norm": 12.638484954833984, + "learning_rate": 1.9953271028037378e-05, + "loss": 0.9826, + "step": 552 + }, + { + "epoch": 0.5689300411522634, + "grad_norm": 11.251784324645996, + "learning_rate": 1.9989615784008306e-05, + "loss": 0.8362, + "step": 553 + }, + { + "epoch": 0.5699588477366255, + "grad_norm": 13.69099235534668, + "learning_rate": 2.002596053997923e-05, + "loss": 1.5957, + "step": 554 + }, + { + "epoch": 0.5709876543209876, + "grad_norm": 15.196340560913086, + "learning_rate": 2.0062305295950154e-05, + "loss": 1.2807, + "step": 555 + }, + { + "epoch": 0.5720164609053497, + "grad_norm": 14.767230987548828, + "learning_rate": 2.0098650051921078e-05, + "loss": 1.6863, + "step": 556 + }, + { + "epoch": 0.573045267489712, + "grad_norm": 11.55445671081543, + "learning_rate": 2.0134994807892002e-05, + "loss": 1.5643, + "step": 557 + }, + { + "epoch": 0.5740740740740741, + "grad_norm": 13.466323852539062, + "learning_rate": 2.0171339563862926e-05, + "loss": 1.2279, + "step": 558 + }, + { + "epoch": 0.5751028806584362, + "grad_norm": 10.434534072875977, + "learning_rate": 2.0207684319833853e-05, + "loss": 0.7398, + "step": 559 + }, + { + "epoch": 0.5761316872427984, + "grad_norm": 16.75852394104004, + "learning_rate": 2.0244029075804774e-05, + "loss": 1.7229, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_Qnli-dev_cosine_accuracy": 0.6875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7987607717514038, + "eval_Qnli-dev_cosine_ap": 0.7245772032010487, + "eval_Qnli-dev_cosine_f1": 0.7073608617594255, + "eval_Qnli-dev_cosine_f1_threshold": 0.7755422592163086, + "eval_Qnli-dev_cosine_precision": 0.6137071651090342, + "eval_Qnli-dev_cosine_recall": 0.8347457627118644, + "eval_Qnli-dev_dot_accuracy": 0.671875, + "eval_Qnli-dev_dot_accuracy_threshold": 429.08099365234375, + "eval_Qnli-dev_dot_ap": 0.618896987535733, + "eval_Qnli-dev_dot_f1": 0.6784565916398714, + "eval_Qnli-dev_dot_f1_threshold": 389.2666015625, + "eval_Qnli-dev_dot_precision": 0.5466321243523317, + "eval_Qnli-dev_dot_recall": 0.8940677966101694, + "eval_Qnli-dev_euclidean_accuracy": 0.6953125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.390548706054688, + "eval_Qnli-dev_euclidean_ap": 0.7347399680383467, + "eval_Qnli-dev_euclidean_f1": 0.6974169741697418, + "eval_Qnli-dev_euclidean_f1_threshold": 15.172780990600586, + "eval_Qnli-dev_euclidean_precision": 0.6176470588235294, + "eval_Qnli-dev_euclidean_recall": 0.8008474576271186, + "eval_Qnli-dev_manhattan_accuracy": 0.7109375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 299.5706787109375, + "eval_Qnli-dev_manhattan_ap": 0.7368729396225034, + "eval_Qnli-dev_manhattan_f1": 0.7120622568093385, + "eval_Qnli-dev_manhattan_f1_threshold": 299.5706787109375, + "eval_Qnli-dev_manhattan_precision": 0.658273381294964, + "eval_Qnli-dev_manhattan_recall": 0.7754237288135594, + "eval_Qnli-dev_max_accuracy": 0.7109375, + "eval_Qnli-dev_max_accuracy_threshold": 429.08099365234375, + "eval_Qnli-dev_max_ap": 0.7368729396225034, + "eval_Qnli-dev_max_f1": 0.7120622568093385, + "eval_Qnli-dev_max_f1_threshold": 389.2666015625, + "eval_Qnli-dev_max_precision": 0.658273381294964, + "eval_Qnli-dev_max_recall": 0.8940677966101694, + "eval_allNLI-dev_cosine_accuracy": 0.71875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8877571821212769, + "eval_allNLI-dev_cosine_ap": 0.5702315132181276, + "eval_allNLI-dev_cosine_f1": 0.5930735930735931, + "eval_allNLI-dev_cosine_f1_threshold": 0.8116433620452881, + "eval_allNLI-dev_cosine_precision": 0.4740484429065744, + "eval_allNLI-dev_cosine_recall": 0.791907514450867, + "eval_allNLI-dev_dot_accuracy": 0.67578125, + "eval_allNLI-dev_dot_accuracy_threshold": 478.5546875, + "eval_allNLI-dev_dot_ap": 0.4739609661272707, + "eval_allNLI-dev_dot_f1": 0.5494949494949496, + "eval_allNLI-dev_dot_f1_threshold": 413.8797912597656, + "eval_allNLI-dev_dot_precision": 0.422360248447205, + "eval_allNLI-dev_dot_recall": 0.7861271676300579, + "eval_allNLI-dev_euclidean_accuracy": 0.72265625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.04772663116455, + "eval_allNLI-dev_euclidean_ap": 0.57668991696855, + "eval_allNLI-dev_euclidean_f1": 0.5995525727069352, + "eval_allNLI-dev_euclidean_f1_threshold": 13.817825317382812, + "eval_allNLI-dev_euclidean_precision": 0.48905109489051096, + "eval_allNLI-dev_euclidean_recall": 0.7745664739884393, + "eval_allNLI-dev_manhattan_accuracy": 0.71875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 211.58740234375, + "eval_allNLI-dev_manhattan_ap": 0.578607497519579, + "eval_allNLI-dev_manhattan_f1": 0.5882352941176471, + "eval_allNLI-dev_manhattan_f1_threshold": 281.086181640625, + "eval_allNLI-dev_manhattan_precision": 0.483271375464684, + "eval_allNLI-dev_manhattan_recall": 0.7514450867052023, + "eval_allNLI-dev_max_accuracy": 0.72265625, + "eval_allNLI-dev_max_accuracy_threshold": 478.5546875, + "eval_allNLI-dev_max_ap": 0.578607497519579, + "eval_allNLI-dev_max_f1": 0.5995525727069352, + "eval_allNLI-dev_max_f1_threshold": 413.8797912597656, + "eval_allNLI-dev_max_precision": 0.48905109489051096, + "eval_allNLI-dev_max_recall": 0.791907514450867, + "eval_sequential_score": 0.7368729396225034, + "eval_sts-test_pearson_cosine": 0.7951957837142611, + "eval_sts-test_pearson_dot": 0.7487270214140551, + "eval_sts-test_pearson_euclidean": 0.8178215451497555, + "eval_sts-test_pearson_manhattan": 0.8154922571151692, + "eval_sts-test_pearson_max": 0.8178215451497555, + "eval_sts-test_spearman_cosine": 0.8174810476116783, + "eval_sts-test_spearman_dot": 0.7310933468755048, + "eval_sts-test_spearman_euclidean": 0.8105849677337864, + "eval_sts-test_spearman_manhattan": 0.8080193779182173, + "eval_sts-test_spearman_max": 0.8174810476116783, + "eval_vitaminc-pairs_loss": 2.8546268939971924, + "eval_vitaminc-pairs_runtime": 3.164, + "eval_vitaminc-pairs_samples_per_second": 40.455, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_negation-triplets_loss": 1.2844172716140747, + "eval_negation-triplets_runtime": 0.7354, + "eval_negation-triplets_samples_per_second": 174.063, + "eval_negation-triplets_steps_per_second": 1.36, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_scitail-pairs-pos_loss": 0.17617923021316528, + "eval_scitail-pairs-pos_runtime": 0.804, + "eval_scitail-pairs-pos_samples_per_second": 159.198, + "eval_scitail-pairs-pos_steps_per_second": 1.244, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_scitail-pairs-qa_loss": 0.013183332979679108, + "eval_scitail-pairs-qa_runtime": 0.5639, + "eval_scitail-pairs-qa_samples_per_second": 226.973, + "eval_scitail-pairs-qa_steps_per_second": 1.773, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_xsum-pairs_loss": 0.8270187973976135, + "eval_xsum-pairs_runtime": 3.0144, + "eval_xsum-pairs_samples_per_second": 42.463, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_sciq_pairs_loss": 0.1439501792192459, + "eval_sciq_pairs_runtime": 3.4768, + "eval_sciq_pairs_samples_per_second": 36.816, + "eval_sciq_pairs_steps_per_second": 0.288, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_qasc_pairs_loss": 0.6848240494728088, + "eval_qasc_pairs_runtime": 0.6196, + "eval_qasc_pairs_samples_per_second": 206.597, + "eval_qasc_pairs_steps_per_second": 1.614, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_openbookqa_pairs_loss": 1.4732991456985474, + "eval_openbookqa_pairs_runtime": 0.5734, + "eval_openbookqa_pairs_samples_per_second": 223.235, + "eval_openbookqa_pairs_steps_per_second": 1.744, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_msmarco_pairs_loss": 1.4930459260940552, + "eval_msmarco_pairs_runtime": 1.5133, + "eval_msmarco_pairs_samples_per_second": 84.581, + "eval_msmarco_pairs_steps_per_second": 0.661, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_nq_pairs_loss": 1.7120836973190308, + "eval_nq_pairs_runtime": 2.8949, + "eval_nq_pairs_samples_per_second": 44.216, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_trivia_pairs_loss": 1.3425896167755127, + "eval_trivia_pairs_runtime": 3.4363, + "eval_trivia_pairs_samples_per_second": 37.249, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_gooaq_pairs_loss": 0.828025758266449, + "eval_gooaq_pairs_runtime": 0.9422, + "eval_gooaq_pairs_samples_per_second": 135.847, + "eval_gooaq_pairs_steps_per_second": 1.061, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_paws-pos_loss": 0.039411623030900955, + "eval_paws-pos_runtime": 0.6819, + "eval_paws-pos_samples_per_second": 187.706, + "eval_paws-pos_steps_per_second": 1.466, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_global_dataset_loss": 0.7242797613143921, + "eval_global_dataset_runtime": 13.3545, + "eval_global_dataset_samples_per_second": 31.151, + "eval_global_dataset_steps_per_second": 0.3, + "step": 560 + }, + { + "epoch": 0.5771604938271605, + "grad_norm": 8.372831344604492, + "learning_rate": 2.0280373831775698e-05, + "loss": 0.593, + "step": 561 + }, + { + "epoch": 0.5781893004115226, + "grad_norm": 19.26259422302246, + "learning_rate": 2.0316718587746625e-05, + "loss": 1.8963, + "step": 562 + }, + { + "epoch": 0.5792181069958847, + "grad_norm": 11.283585548400879, + "learning_rate": 2.0353063343717546e-05, + "loss": 0.743, + "step": 563 + }, + { + "epoch": 0.5802469135802469, + "grad_norm": 8.997882843017578, + "learning_rate": 2.038940809968847e-05, + "loss": 0.5824, + "step": 564 + }, + { + "epoch": 0.581275720164609, + "grad_norm": 13.550999641418457, + "learning_rate": 2.0425752855659397e-05, + "loss": 1.7532, + "step": 565 + }, + { + "epoch": 0.5823045267489712, + "grad_norm": 8.910313606262207, + "learning_rate": 2.046209761163032e-05, + "loss": 0.6509, + "step": 566 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 10.5217866897583, + "learning_rate": 2.0498442367601242e-05, + "loss": 0.7318, + "step": 567 + }, + { + "epoch": 0.5843621399176955, + "grad_norm": 13.271885871887207, + "learning_rate": 2.053478712357217e-05, + "loss": 1.3168, + "step": 568 + }, + { + "epoch": 0.5853909465020576, + "grad_norm": 9.908731460571289, + "learning_rate": 2.0571131879543093e-05, + "loss": 0.599, + "step": 569 + }, + { + "epoch": 0.5864197530864198, + "grad_norm": 14.152383804321289, + "learning_rate": 2.0607476635514014e-05, + "loss": 1.672, + "step": 570 + }, + { + "epoch": 0.5874485596707819, + "grad_norm": 9.812310218811035, + "learning_rate": 2.064382139148494e-05, + "loss": 0.7583, + "step": 571 + }, + { + "epoch": 0.588477366255144, + "grad_norm": 5.6503825187683105, + "learning_rate": 2.0680166147455865e-05, + "loss": 0.1891, + "step": 572 + }, + { + "epoch": 0.5895061728395061, + "grad_norm": 10.130154609680176, + "learning_rate": 2.071651090342679e-05, + "loss": 0.6344, + "step": 573 + }, + { + "epoch": 0.5905349794238683, + "grad_norm": 15.343293190002441, + "learning_rate": 2.0752855659397713e-05, + "loss": 1.303, + "step": 574 + }, + { + "epoch": 0.5915637860082305, + "grad_norm": 21.49701499938965, + "learning_rate": 2.0789200415368637e-05, + "loss": 2.2275, + "step": 575 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 14.206128120422363, + "learning_rate": 2.082554517133956e-05, + "loss": 1.3081, + "step": 576 + }, + { + "epoch": 0.5936213991769548, + "grad_norm": 9.159503936767578, + "learning_rate": 2.086188992731049e-05, + "loss": 0.5681, + "step": 577 + }, + { + "epoch": 0.5946502057613169, + "grad_norm": 10.146199226379395, + "learning_rate": 2.089823468328141e-05, + "loss": 0.6258, + "step": 578 + }, + { + "epoch": 0.595679012345679, + "grad_norm": 12.96678638458252, + "learning_rate": 2.0934579439252334e-05, + "loss": 1.1454, + "step": 579 + }, + { + "epoch": 0.5967078189300411, + "grad_norm": 14.751097679138184, + "learning_rate": 2.097092419522326e-05, + "loss": 1.3416, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_Qnli-dev_cosine_accuracy": 0.66796875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8469637632369995, + "eval_Qnli-dev_cosine_ap": 0.7090284432654561, + "eval_Qnli-dev_cosine_f1": 0.6897689768976898, + "eval_Qnli-dev_cosine_f1_threshold": 0.7387524843215942, + "eval_Qnli-dev_cosine_precision": 0.5648648648648649, + "eval_Qnli-dev_cosine_recall": 0.885593220338983, + "eval_Qnli-dev_dot_accuracy": 0.6640625, + "eval_Qnli-dev_dot_accuracy_threshold": 408.5235595703125, + "eval_Qnli-dev_dot_ap": 0.6097543105824177, + "eval_Qnli-dev_dot_f1": 0.6701754385964912, + "eval_Qnli-dev_dot_f1_threshold": 390.4075012207031, + "eval_Qnli-dev_dot_precision": 0.5718562874251497, + "eval_Qnli-dev_dot_recall": 0.809322033898305, + "eval_Qnli-dev_euclidean_accuracy": 0.677734375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.064620971679688, + "eval_Qnli-dev_euclidean_ap": 0.7199645621423693, + "eval_Qnli-dev_euclidean_f1": 0.6836734693877551, + "eval_Qnli-dev_euclidean_f1_threshold": 16.033926010131836, + "eval_Qnli-dev_euclidean_precision": 0.5710227272727273, + "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, + "eval_Qnli-dev_manhattan_accuracy": 0.681640625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 294.104248046875, + "eval_Qnli-dev_manhattan_ap": 0.721231392124396, + "eval_Qnli-dev_manhattan_f1": 0.6897810218978102, + "eval_Qnli-dev_manhattan_f1_threshold": 310.521728515625, + "eval_Qnli-dev_manhattan_precision": 0.6057692307692307, + "eval_Qnli-dev_manhattan_recall": 0.8008474576271186, + "eval_Qnli-dev_max_accuracy": 0.681640625, + "eval_Qnli-dev_max_accuracy_threshold": 408.5235595703125, + "eval_Qnli-dev_max_ap": 0.721231392124396, + "eval_Qnli-dev_max_f1": 0.6897810218978102, + "eval_Qnli-dev_max_f1_threshold": 390.4075012207031, + "eval_Qnli-dev_max_precision": 0.6057692307692307, + "eval_Qnli-dev_max_recall": 0.885593220338983, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8930063247680664, + "eval_allNLI-dev_cosine_ap": 0.580516831193243, + "eval_allNLI-dev_cosine_f1": 0.5932203389830509, + "eval_allNLI-dev_cosine_f1_threshold": 0.792042076587677, + "eval_allNLI-dev_cosine_precision": 0.4682274247491639, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.681640625, + "eval_allNLI-dev_dot_accuracy_threshold": 479.3341064453125, + "eval_allNLI-dev_dot_ap": 0.48669798557045457, + "eval_allNLI-dev_dot_f1": 0.560919540229885, + "eval_allNLI-dev_dot_f1_threshold": 413.0164794921875, + "eval_allNLI-dev_dot_precision": 0.46564885496183206, + "eval_allNLI-dev_dot_recall": 0.7052023121387283, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.375900268554688, + "eval_allNLI-dev_euclidean_ap": 0.586159821151409, + "eval_allNLI-dev_euclidean_f1": 0.5925925925925926, + "eval_allNLI-dev_euclidean_f1_threshold": 13.825302124023438, + "eval_allNLI-dev_euclidean_precision": 0.4942084942084942, + "eval_allNLI-dev_euclidean_recall": 0.7398843930635838, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 213.74179077148438, + "eval_allNLI-dev_manhattan_ap": 0.5867922982953583, + "eval_allNLI-dev_manhattan_f1": 0.5903890160183066, + "eval_allNLI-dev_manhattan_f1_threshold": 286.81524658203125, + "eval_allNLI-dev_manhattan_precision": 0.48863636363636365, + "eval_allNLI-dev_manhattan_recall": 0.7456647398843931, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 479.3341064453125, + "eval_allNLI-dev_max_ap": 0.5867922982953583, + "eval_allNLI-dev_max_f1": 0.5932203389830509, + "eval_allNLI-dev_max_f1_threshold": 413.0164794921875, + "eval_allNLI-dev_max_precision": 0.4942084942084942, + "eval_allNLI-dev_max_recall": 0.8092485549132948, + "eval_sequential_score": 0.721231392124396, + "eval_sts-test_pearson_cosine": 0.8031708345006614, + "eval_sts-test_pearson_dot": 0.7716469990772233, + "eval_sts-test_pearson_euclidean": 0.8293403363982195, + "eval_sts-test_pearson_manhattan": 0.8269704942343952, + "eval_sts-test_pearson_max": 0.8293403363982195, + "eval_sts-test_spearman_cosine": 0.8293793339853779, + "eval_sts-test_spearman_dot": 0.7565175229997094, + "eval_sts-test_spearman_euclidean": 0.8224314768980562, + "eval_sts-test_spearman_manhattan": 0.81979553809958, + "eval_sts-test_spearman_max": 0.8293793339853779, + "eval_vitaminc-pairs_loss": 2.9443347454071045, + "eval_vitaminc-pairs_runtime": 3.1898, + "eval_vitaminc-pairs_samples_per_second": 40.127, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_negation-triplets_loss": 1.221449851989746, + "eval_negation-triplets_runtime": 0.7486, + "eval_negation-triplets_samples_per_second": 170.975, + "eval_negation-triplets_steps_per_second": 1.336, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_scitail-pairs-pos_loss": 0.1803685873746872, + "eval_scitail-pairs-pos_runtime": 0.829, + "eval_scitail-pairs-pos_samples_per_second": 154.409, + "eval_scitail-pairs-pos_steps_per_second": 1.206, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_scitail-pairs-qa_loss": 0.015901347622275352, + "eval_scitail-pairs-qa_runtime": 0.5704, + "eval_scitail-pairs-qa_samples_per_second": 224.404, + "eval_scitail-pairs-qa_steps_per_second": 1.753, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_xsum-pairs_loss": 0.7095991969108582, + "eval_xsum-pairs_runtime": 3.0163, + "eval_xsum-pairs_samples_per_second": 42.436, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_sciq_pairs_loss": 0.13398276269435883, + "eval_sciq_pairs_runtime": 3.4459, + "eval_sciq_pairs_samples_per_second": 37.145, + "eval_sciq_pairs_steps_per_second": 0.29, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_qasc_pairs_loss": 0.681054413318634, + "eval_qasc_pairs_runtime": 0.6052, + "eval_qasc_pairs_samples_per_second": 211.516, + "eval_qasc_pairs_steps_per_second": 1.652, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_openbookqa_pairs_loss": 1.32936692237854, + "eval_openbookqa_pairs_runtime": 0.578, + "eval_openbookqa_pairs_samples_per_second": 221.445, + "eval_openbookqa_pairs_steps_per_second": 1.73, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_msmarco_pairs_loss": 1.3513559103012085, + "eval_msmarco_pairs_runtime": 1.5095, + "eval_msmarco_pairs_samples_per_second": 84.796, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_nq_pairs_loss": 1.6727423667907715, + "eval_nq_pairs_runtime": 2.8997, + "eval_nq_pairs_samples_per_second": 44.143, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_trivia_pairs_loss": 1.1192874908447266, + "eval_trivia_pairs_runtime": 3.4386, + "eval_trivia_pairs_samples_per_second": 37.225, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_gooaq_pairs_loss": 0.8172786235809326, + "eval_gooaq_pairs_runtime": 0.9533, + "eval_gooaq_pairs_samples_per_second": 134.272, + "eval_gooaq_pairs_steps_per_second": 1.049, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_paws-pos_loss": 0.03949186950922012, + "eval_paws-pos_runtime": 0.6806, + "eval_paws-pos_samples_per_second": 188.056, + "eval_paws-pos_steps_per_second": 1.469, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_global_dataset_loss": 0.6970628499984741, + "eval_global_dataset_runtime": 13.3615, + "eval_global_dataset_samples_per_second": 31.134, + "eval_global_dataset_steps_per_second": 0.299, + "step": 580 + }, + { + "epoch": 0.5977366255144033, + "grad_norm": 14.620248794555664, + "learning_rate": 2.100726895119418e-05, + "loss": 1.6136, + "step": 581 + }, + { + "epoch": 0.5987654320987654, + "grad_norm": 9.990836143493652, + "learning_rate": 2.1043613707165106e-05, + "loss": 0.5856, + "step": 582 + }, + { + "epoch": 0.5997942386831275, + "grad_norm": 11.57479190826416, + "learning_rate": 2.1079958463136033e-05, + "loss": 0.7762, + "step": 583 + }, + { + "epoch": 0.6008230452674898, + "grad_norm": 16.514976501464844, + "learning_rate": 2.1116303219106954e-05, + "loss": 2.0577, + "step": 584 + }, + { + "epoch": 0.6018518518518519, + "grad_norm": 19.117877960205078, + "learning_rate": 2.1152647975077878e-05, + "loss": 1.8893, + "step": 585 + }, + { + "epoch": 0.602880658436214, + "grad_norm": 1.2878212928771973, + "learning_rate": 2.1188992731048805e-05, + "loss": 0.0455, + "step": 586 + }, + { + "epoch": 0.6039094650205762, + "grad_norm": 15.874303817749023, + "learning_rate": 2.122533748701973e-05, + "loss": 2.5615, + "step": 587 + }, + { + "epoch": 0.6049382716049383, + "grad_norm": 9.337711334228516, + "learning_rate": 2.126168224299065e-05, + "loss": 0.593, + "step": 588 + }, + { + "epoch": 0.6059670781893004, + "grad_norm": 10.22465991973877, + "learning_rate": 2.1298026998961577e-05, + "loss": 0.8033, + "step": 589 + }, + { + "epoch": 0.6069958847736625, + "grad_norm": 9.863337516784668, + "learning_rate": 2.13343717549325e-05, + "loss": 0.694, + "step": 590 + }, + { + "epoch": 0.6080246913580247, + "grad_norm": 12.331180572509766, + "learning_rate": 2.1370716510903422e-05, + "loss": 1.0183, + "step": 591 + }, + { + "epoch": 0.6090534979423868, + "grad_norm": 9.044501304626465, + "learning_rate": 2.140706126687435e-05, + "loss": 0.6388, + "step": 592 + }, + { + "epoch": 0.6100823045267489, + "grad_norm": 9.711915969848633, + "learning_rate": 2.1443406022845273e-05, + "loss": 0.7858, + "step": 593 + }, + { + "epoch": 0.6111111111111112, + "grad_norm": 5.571502208709717, + "learning_rate": 2.1479750778816197e-05, + "loss": 0.1627, + "step": 594 + }, + { + "epoch": 0.6121399176954733, + "grad_norm": 10.834738731384277, + "learning_rate": 2.151609553478712e-05, + "loss": 1.2084, + "step": 595 + }, + { + "epoch": 0.6131687242798354, + "grad_norm": 11.250519752502441, + "learning_rate": 2.1552440290758045e-05, + "loss": 0.8371, + "step": 596 + }, + { + "epoch": 0.6141975308641975, + "grad_norm": 12.769804000854492, + "learning_rate": 2.158878504672897e-05, + "loss": 1.0759, + "step": 597 + }, + { + "epoch": 0.6152263374485597, + "grad_norm": 9.822973251342773, + "learning_rate": 2.1625129802699897e-05, + "loss": 0.6237, + "step": 598 + }, + { + "epoch": 0.6162551440329218, + "grad_norm": 12.792522430419922, + "learning_rate": 2.1661474558670817e-05, + "loss": 0.9396, + "step": 599 + }, + { + "epoch": 0.6172839506172839, + "grad_norm": 11.624062538146973, + "learning_rate": 2.169781931464174e-05, + "loss": 0.7352, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_Qnli-dev_cosine_accuracy": 0.685546875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7914708852767944, + "eval_Qnli-dev_cosine_ap": 0.7227066968429299, + "eval_Qnli-dev_cosine_f1": 0.6948529411764706, + "eval_Qnli-dev_cosine_f1_threshold": 0.766169548034668, + "eval_Qnli-dev_cosine_precision": 0.6136363636363636, + "eval_Qnli-dev_cosine_recall": 0.8008474576271186, + "eval_Qnli-dev_dot_accuracy": 0.67578125, + "eval_Qnli-dev_dot_accuracy_threshold": 405.1741943359375, + "eval_Qnli-dev_dot_ap": 0.6291761267009413, + "eval_Qnli-dev_dot_f1": 0.6897810218978102, + "eval_Qnli-dev_dot_f1_threshold": 382.8020935058594, + "eval_Qnli-dev_dot_precision": 0.6057692307692307, + "eval_Qnli-dev_dot_recall": 0.8008474576271186, + "eval_Qnli-dev_euclidean_accuracy": 0.69140625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.424887657165527, + "eval_Qnli-dev_euclidean_ap": 0.7307017217323966, + "eval_Qnli-dev_euclidean_f1": 0.6906710310965629, + "eval_Qnli-dev_euclidean_f1_threshold": 17.00006675720215, + "eval_Qnli-dev_euclidean_precision": 0.5626666666666666, + "eval_Qnli-dev_euclidean_recall": 0.8940677966101694, + "eval_Qnli-dev_manhattan_accuracy": 0.689453125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 309.230712890625, + "eval_Qnli-dev_manhattan_ap": 0.7325013115093475, + "eval_Qnli-dev_manhattan_f1": 0.6953528399311533, + "eval_Qnli-dev_manhattan_f1_threshold": 332.23504638671875, + "eval_Qnli-dev_manhattan_precision": 0.5855072463768116, + "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, + "eval_Qnli-dev_max_accuracy": 0.69140625, + "eval_Qnli-dev_max_accuracy_threshold": 405.1741943359375, + "eval_Qnli-dev_max_ap": 0.7325013115093475, + "eval_Qnli-dev_max_f1": 0.6953528399311533, + "eval_Qnli-dev_max_f1_threshold": 382.8020935058594, + "eval_Qnli-dev_max_precision": 0.6136363636363636, + "eval_Qnli-dev_max_recall": 0.8940677966101694, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8877395391464233, + "eval_allNLI-dev_cosine_ap": 0.5814109945041677, + "eval_allNLI-dev_cosine_f1": 0.5892116182572614, + "eval_allNLI-dev_cosine_f1_threshold": 0.7833628058433533, + "eval_allNLI-dev_cosine_precision": 0.459546925566343, + "eval_allNLI-dev_cosine_recall": 0.8208092485549133, + "eval_allNLI-dev_dot_accuracy": 0.68359375, + "eval_allNLI-dev_dot_accuracy_threshold": 498.7593994140625, + "eval_allNLI-dev_dot_ap": 0.49817236088425526, + "eval_allNLI-dev_dot_f1": 0.5469728601252609, + "eval_allNLI-dev_dot_f1_threshold": 396.20513916015625, + "eval_allNLI-dev_dot_precision": 0.42810457516339867, + "eval_allNLI-dev_dot_recall": 0.7572254335260116, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.987224578857422, + "eval_allNLI-dev_euclidean_ap": 0.5868737853118521, + "eval_allNLI-dev_euclidean_f1": 0.5991735537190083, + "eval_allNLI-dev_euclidean_f1_threshold": 14.847602844238281, + "eval_allNLI-dev_euclidean_precision": 0.4662379421221865, + "eval_allNLI-dev_euclidean_recall": 0.838150289017341, + "eval_allNLI-dev_manhattan_accuracy": 0.73046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 217.56982421875, + "eval_allNLI-dev_manhattan_ap": 0.5854235635053637, + "eval_allNLI-dev_manhattan_f1": 0.5908096280087528, + "eval_allNLI-dev_manhattan_f1_threshold": 296.2995300292969, + "eval_allNLI-dev_manhattan_precision": 0.4753521126760563, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 498.7593994140625, + "eval_allNLI-dev_max_ap": 0.5868737853118521, + "eval_allNLI-dev_max_f1": 0.5991735537190083, + "eval_allNLI-dev_max_f1_threshold": 396.20513916015625, + "eval_allNLI-dev_max_precision": 0.4753521126760563, + "eval_allNLI-dev_max_recall": 0.838150289017341, + "eval_sequential_score": 0.7325013115093475, + "eval_sts-test_pearson_cosine": 0.8070716873912918, + "eval_sts-test_pearson_dot": 0.7619397369954762, + "eval_sts-test_pearson_euclidean": 0.8282272675602773, + "eval_sts-test_pearson_manhattan": 0.8241390313463588, + "eval_sts-test_pearson_max": 0.8282272675602773, + "eval_sts-test_spearman_cosine": 0.8247862882724717, + "eval_sts-test_spearman_dot": 0.7450420017923742, + "eval_sts-test_spearman_euclidean": 0.819151701701942, + "eval_sts-test_spearman_manhattan": 0.8149713968728485, + "eval_sts-test_spearman_max": 0.8247862882724717, + "eval_vitaminc-pairs_loss": 2.7805817127227783, + "eval_vitaminc-pairs_runtime": 3.1769, + "eval_vitaminc-pairs_samples_per_second": 40.291, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_negation-triplets_loss": 1.2412256002426147, + "eval_negation-triplets_runtime": 0.7403, + "eval_negation-triplets_samples_per_second": 172.908, + "eval_negation-triplets_steps_per_second": 1.351, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_scitail-pairs-pos_loss": 0.19108502566814423, + "eval_scitail-pairs-pos_runtime": 0.8102, + "eval_scitail-pairs-pos_samples_per_second": 157.986, + "eval_scitail-pairs-pos_steps_per_second": 1.234, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_scitail-pairs-qa_loss": 0.011316634714603424, + "eval_scitail-pairs-qa_runtime": 0.5692, + "eval_scitail-pairs-qa_samples_per_second": 224.889, + "eval_scitail-pairs-qa_steps_per_second": 1.757, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_xsum-pairs_loss": 0.6977664232254028, + "eval_xsum-pairs_runtime": 3.0198, + "eval_xsum-pairs_samples_per_second": 42.387, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_sciq_pairs_loss": 0.13763564825057983, + "eval_sciq_pairs_runtime": 3.413, + "eval_sciq_pairs_samples_per_second": 37.503, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_qasc_pairs_loss": 0.6264404058456421, + "eval_qasc_pairs_runtime": 0.5999, + "eval_qasc_pairs_samples_per_second": 213.376, + "eval_qasc_pairs_steps_per_second": 1.667, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_openbookqa_pairs_loss": 1.2759621143341064, + "eval_openbookqa_pairs_runtime": 0.5867, + "eval_openbookqa_pairs_samples_per_second": 218.169, + "eval_openbookqa_pairs_steps_per_second": 1.704, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_msmarco_pairs_loss": 1.4110215902328491, + "eval_msmarco_pairs_runtime": 1.5228, + "eval_msmarco_pairs_samples_per_second": 84.054, + "eval_msmarco_pairs_steps_per_second": 0.657, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_nq_pairs_loss": 1.654952883720398, + "eval_nq_pairs_runtime": 2.9213, + "eval_nq_pairs_samples_per_second": 43.816, + "eval_nq_pairs_steps_per_second": 0.342, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_trivia_pairs_loss": 1.11814284324646, + "eval_trivia_pairs_runtime": 3.4571, + "eval_trivia_pairs_samples_per_second": 37.025, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_gooaq_pairs_loss": 0.8059184551239014, + "eval_gooaq_pairs_runtime": 0.9451, + "eval_gooaq_pairs_samples_per_second": 135.437, + "eval_gooaq_pairs_steps_per_second": 1.058, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_paws-pos_loss": 0.038612980395555496, + "eval_paws-pos_runtime": 0.6792, + "eval_paws-pos_samples_per_second": 188.462, + "eval_paws-pos_steps_per_second": 1.472, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_global_dataset_loss": 0.6731630563735962, + "eval_global_dataset_runtime": 13.3755, + "eval_global_dataset_samples_per_second": 31.102, + "eval_global_dataset_steps_per_second": 0.299, + "step": 600 + }, + { + "epoch": 0.6183127572016461, + "grad_norm": 9.29102611541748, + "learning_rate": 2.173416407061267e-05, + "loss": 0.5273, + "step": 601 + }, + { + "epoch": 0.6193415637860082, + "grad_norm": 18.072662353515625, + "learning_rate": 2.177050882658359e-05, + "loss": 1.925, + "step": 602 + }, + { + "epoch": 0.6203703703703703, + "grad_norm": 11.696316719055176, + "learning_rate": 2.1806853582554513e-05, + "loss": 0.8177, + "step": 603 + }, + { + "epoch": 0.6213991769547325, + "grad_norm": 8.543580055236816, + "learning_rate": 2.184319833852544e-05, + "loss": 0.4747, + "step": 604 + }, + { + "epoch": 0.6224279835390947, + "grad_norm": 11.905756950378418, + "learning_rate": 2.1879543094496365e-05, + "loss": 0.9485, + "step": 605 + }, + { + "epoch": 0.6234567901234568, + "grad_norm": 13.481616020202637, + "learning_rate": 2.1915887850467285e-05, + "loss": 1.7983, + "step": 606 + }, + { + "epoch": 0.6244855967078189, + "grad_norm": 4.5081787109375, + "learning_rate": 2.1952232606438213e-05, + "loss": 0.1446, + "step": 607 + }, + { + "epoch": 0.6255144032921811, + "grad_norm": 10.28495979309082, + "learning_rate": 2.1988577362409137e-05, + "loss": 0.6929, + "step": 608 + }, + { + "epoch": 0.6265432098765432, + "grad_norm": 0.8422635197639465, + "learning_rate": 2.2024922118380058e-05, + "loss": 0.056, + "step": 609 + }, + { + "epoch": 0.6275720164609053, + "grad_norm": 10.7501220703125, + "learning_rate": 2.2061266874350985e-05, + "loss": 0.6738, + "step": 610 + }, + { + "epoch": 0.6286008230452675, + "grad_norm": 13.118562698364258, + "learning_rate": 2.209761163032191e-05, + "loss": 1.4398, + "step": 611 + }, + { + "epoch": 0.6296296296296297, + "grad_norm": 19.016132354736328, + "learning_rate": 2.2133956386292833e-05, + "loss": 3.152, + "step": 612 + }, + { + "epoch": 0.6306584362139918, + "grad_norm": 16.179283142089844, + "learning_rate": 2.2170301142263757e-05, + "loss": 1.8703, + "step": 613 + }, + { + "epoch": 0.6316872427983539, + "grad_norm": 1.413341999053955, + "learning_rate": 2.220664589823468e-05, + "loss": 0.0766, + "step": 614 + }, + { + "epoch": 0.6327160493827161, + "grad_norm": 19.418697357177734, + "learning_rate": 2.2242990654205605e-05, + "loss": 2.4434, + "step": 615 + }, + { + "epoch": 0.6337448559670782, + "grad_norm": 13.95297622680664, + "learning_rate": 2.2279335410176532e-05, + "loss": 1.4074, + "step": 616 + }, + { + "epoch": 0.6347736625514403, + "grad_norm": 9.78261947631836, + "learning_rate": 2.2315680166147453e-05, + "loss": 0.7425, + "step": 617 + }, + { + "epoch": 0.6358024691358025, + "grad_norm": 7.618975639343262, + "learning_rate": 2.2352024922118377e-05, + "loss": 0.466, + "step": 618 + }, + { + "epoch": 0.6368312757201646, + "grad_norm": 11.607491493225098, + "learning_rate": 2.2388369678089305e-05, + "loss": 1.6586, + "step": 619 + }, + { + "epoch": 0.6378600823045267, + "grad_norm": 7.107526779174805, + "learning_rate": 2.2424714434060225e-05, + "loss": 0.3817, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_Qnli-dev_cosine_accuracy": 0.693359375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8104921579360962, + "eval_Qnli-dev_cosine_ap": 0.7372700421671432, + "eval_Qnli-dev_cosine_f1": 0.7011070110701106, + "eval_Qnli-dev_cosine_f1_threshold": 0.7957046627998352, + "eval_Qnli-dev_cosine_precision": 0.6209150326797386, + "eval_Qnli-dev_cosine_recall": 0.8050847457627118, + "eval_Qnli-dev_dot_accuracy": 0.66015625, + "eval_Qnli-dev_dot_accuracy_threshold": 438.48602294921875, + "eval_Qnli-dev_dot_ap": 0.6254364606240859, + "eval_Qnli-dev_dot_f1": 0.6798561151079136, + "eval_Qnli-dev_dot_f1_threshold": 417.19720458984375, + "eval_Qnli-dev_dot_precision": 0.590625, + "eval_Qnli-dev_dot_recall": 0.8008474576271186, + "eval_Qnli-dev_euclidean_accuracy": 0.701171875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.722761154174805, + "eval_Qnli-dev_euclidean_ap": 0.7476820851309197, + "eval_Qnli-dev_euclidean_f1": 0.6962457337883959, + "eval_Qnli-dev_euclidean_f1_threshold": 15.658858299255371, + "eval_Qnli-dev_euclidean_precision": 0.5828571428571429, + "eval_Qnli-dev_euclidean_recall": 0.864406779661017, + "eval_Qnli-dev_manhattan_accuracy": 0.693359375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 277.64154052734375, + "eval_Qnli-dev_manhattan_ap": 0.747576429030092, + "eval_Qnli-dev_manhattan_f1": 0.6969147005444646, + "eval_Qnli-dev_manhattan_f1_threshold": 306.7862548828125, + "eval_Qnli-dev_manhattan_precision": 0.6095238095238096, + "eval_Qnli-dev_manhattan_recall": 0.8135593220338984, + "eval_Qnli-dev_max_accuracy": 0.701171875, + "eval_Qnli-dev_max_accuracy_threshold": 438.48602294921875, + "eval_Qnli-dev_max_ap": 0.7476820851309197, + "eval_Qnli-dev_max_f1": 0.7011070110701106, + "eval_Qnli-dev_max_f1_threshold": 417.19720458984375, + "eval_Qnli-dev_max_precision": 0.6209150326797386, + "eval_Qnli-dev_max_recall": 0.864406779661017, + "eval_allNLI-dev_cosine_accuracy": 0.734375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9023975133895874, + "eval_allNLI-dev_cosine_ap": 0.5865878816400992, + "eval_allNLI-dev_cosine_f1": 0.5961123110151189, + "eval_allNLI-dev_cosine_f1_threshold": 0.815485954284668, + "eval_allNLI-dev_cosine_precision": 0.47586206896551725, + "eval_allNLI-dev_cosine_recall": 0.7976878612716763, + "eval_allNLI-dev_dot_accuracy": 0.6796875, + "eval_allNLI-dev_dot_accuracy_threshold": 520.5687255859375, + "eval_allNLI-dev_dot_ap": 0.50417908457673, + "eval_allNLI-dev_dot_f1": 0.5764705882352941, + "eval_allNLI-dev_dot_f1_threshold": 419.378662109375, + "eval_allNLI-dev_dot_precision": 0.4362017804154303, + "eval_allNLI-dev_dot_recall": 0.8497109826589595, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.461543083190918, + "eval_allNLI-dev_euclidean_ap": 0.5891739143142342, + "eval_allNLI-dev_euclidean_f1": 0.6018099547511312, + "eval_allNLI-dev_euclidean_f1_threshold": 13.633740425109863, + "eval_allNLI-dev_euclidean_precision": 0.4944237918215613, + "eval_allNLI-dev_euclidean_recall": 0.7687861271676301, + "eval_allNLI-dev_manhattan_accuracy": 0.734375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 205.95645141601562, + "eval_allNLI-dev_manhattan_ap": 0.5909121718301882, + "eval_allNLI-dev_manhattan_f1": 0.5978947368421053, + "eval_allNLI-dev_manhattan_f1_threshold": 292.0635681152344, + "eval_allNLI-dev_manhattan_precision": 0.47019867549668876, + "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 520.5687255859375, + "eval_allNLI-dev_max_ap": 0.5909121718301882, + "eval_allNLI-dev_max_f1": 0.6018099547511312, + "eval_allNLI-dev_max_f1_threshold": 419.378662109375, + "eval_allNLI-dev_max_precision": 0.4944237918215613, + "eval_allNLI-dev_max_recall": 0.8497109826589595, + "eval_sequential_score": 0.7476820851309197, + "eval_sts-test_pearson_cosine": 0.811803599688079, + "eval_sts-test_pearson_dot": 0.7763025780752795, + "eval_sts-test_pearson_euclidean": 0.834182762862252, + "eval_sts-test_pearson_manhattan": 0.8306831599881925, + "eval_sts-test_pearson_max": 0.834182762862252, + "eval_sts-test_spearman_cosine": 0.8279280953297161, + "eval_sts-test_spearman_dot": 0.7618572435089312, + "eval_sts-test_spearman_euclidean": 0.8235176795145484, + "eval_sts-test_spearman_manhattan": 0.8203718448437786, + "eval_sts-test_spearman_max": 0.8279280953297161, + "eval_vitaminc-pairs_loss": 2.7285807132720947, + "eval_vitaminc-pairs_runtime": 3.1675, + "eval_vitaminc-pairs_samples_per_second": 40.41, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_negation-triplets_loss": 1.2768163681030273, + "eval_negation-triplets_runtime": 0.7451, + "eval_negation-triplets_samples_per_second": 171.791, + "eval_negation-triplets_steps_per_second": 1.342, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_scitail-pairs-pos_loss": 0.221151664853096, + "eval_scitail-pairs-pos_runtime": 0.8023, + "eval_scitail-pairs-pos_samples_per_second": 159.546, + "eval_scitail-pairs-pos_steps_per_second": 1.246, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_scitail-pairs-qa_loss": 0.011276349425315857, + "eval_scitail-pairs-qa_runtime": 0.5728, + "eval_scitail-pairs-qa_samples_per_second": 223.455, + "eval_scitail-pairs-qa_steps_per_second": 1.746, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_xsum-pairs_loss": 0.6888625025749207, + "eval_xsum-pairs_runtime": 3.022, + "eval_xsum-pairs_samples_per_second": 42.356, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_sciq_pairs_loss": 0.12679386138916016, + "eval_sciq_pairs_runtime": 3.4396, + "eval_sciq_pairs_samples_per_second": 37.213, + "eval_sciq_pairs_steps_per_second": 0.291, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_qasc_pairs_loss": 0.6138037443161011, + "eval_qasc_pairs_runtime": 0.6116, + "eval_qasc_pairs_samples_per_second": 209.28, + "eval_qasc_pairs_steps_per_second": 1.635, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_openbookqa_pairs_loss": 1.2520498037338257, + "eval_openbookqa_pairs_runtime": 0.575, + "eval_openbookqa_pairs_samples_per_second": 222.626, + "eval_openbookqa_pairs_steps_per_second": 1.739, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_msmarco_pairs_loss": 1.2622545957565308, + "eval_msmarco_pairs_runtime": 1.5106, + "eval_msmarco_pairs_samples_per_second": 84.736, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_nq_pairs_loss": 1.5863006114959717, + "eval_nq_pairs_runtime": 2.9147, + "eval_nq_pairs_samples_per_second": 43.915, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_trivia_pairs_loss": 1.1821491718292236, + "eval_trivia_pairs_runtime": 3.4369, + "eval_trivia_pairs_samples_per_second": 37.243, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_gooaq_pairs_loss": 0.7643461227416992, + "eval_gooaq_pairs_runtime": 0.9406, + "eval_gooaq_pairs_samples_per_second": 136.089, + "eval_gooaq_pairs_steps_per_second": 1.063, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_paws-pos_loss": 0.03508833795785904, + "eval_paws-pos_runtime": 0.6812, + "eval_paws-pos_samples_per_second": 187.907, + "eval_paws-pos_steps_per_second": 1.468, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_global_dataset_loss": 0.6339895725250244, + "eval_global_dataset_runtime": 13.3641, + "eval_global_dataset_samples_per_second": 31.128, + "eval_global_dataset_steps_per_second": 0.299, + "step": 620 + }, + { + "epoch": 0.6388888888888888, + "grad_norm": 13.532258033752441, + "learning_rate": 2.246105919003115e-05, + "loss": 1.4414, + "step": 621 + }, + { + "epoch": 0.6399176954732511, + "grad_norm": 9.563913345336914, + "learning_rate": 2.2497403946002077e-05, + "loss": 0.7481, + "step": 622 + }, + { + "epoch": 0.6409465020576132, + "grad_norm": 10.86938762664795, + "learning_rate": 2.2533748701973e-05, + "loss": 0.8256, + "step": 623 + }, + { + "epoch": 0.6419753086419753, + "grad_norm": 4.665733814239502, + "learning_rate": 2.257009345794392e-05, + "loss": 0.1559, + "step": 624 + }, + { + "epoch": 0.6430041152263375, + "grad_norm": 10.261479377746582, + "learning_rate": 2.260643821391485e-05, + "loss": 0.8878, + "step": 625 + }, + { + "epoch": 0.6440329218106996, + "grad_norm": 9.72616958618164, + "learning_rate": 2.2642782969885773e-05, + "loss": 0.5888, + "step": 626 + }, + { + "epoch": 0.6450617283950617, + "grad_norm": 11.944307327270508, + "learning_rate": 2.2679127725856693e-05, + "loss": 1.0332, + "step": 627 + }, + { + "epoch": 0.6460905349794238, + "grad_norm": 10.020615577697754, + "learning_rate": 2.271547248182762e-05, + "loss": 1.0121, + "step": 628 + }, + { + "epoch": 0.647119341563786, + "grad_norm": 8.791054725646973, + "learning_rate": 2.2751817237798545e-05, + "loss": 0.6393, + "step": 629 + }, + { + "epoch": 0.6481481481481481, + "grad_norm": 12.706099510192871, + "learning_rate": 2.278816199376947e-05, + "loss": 0.7494, + "step": 630 + }, + { + "epoch": 0.6491769547325102, + "grad_norm": 3.587538480758667, + "learning_rate": 2.2824506749740393e-05, + "loss": 0.1088, + "step": 631 + }, + { + "epoch": 0.6502057613168725, + "grad_norm": 16.609806060791016, + "learning_rate": 2.2860851505711317e-05, + "loss": 1.3588, + "step": 632 + }, + { + "epoch": 0.6512345679012346, + "grad_norm": 1.4342639446258545, + "learning_rate": 2.289719626168224e-05, + "loss": 0.0403, + "step": 633 + }, + { + "epoch": 0.6522633744855967, + "grad_norm": 25.457242965698242, + "learning_rate": 2.2933541017653165e-05, + "loss": 3.6884, + "step": 634 + }, + { + "epoch": 0.6532921810699589, + "grad_norm": 19.651193618774414, + "learning_rate": 2.296988577362409e-05, + "loss": 1.6915, + "step": 635 + }, + { + "epoch": 0.654320987654321, + "grad_norm": 10.904431343078613, + "learning_rate": 2.3006230529595013e-05, + "loss": 0.5166, + "step": 636 + }, + { + "epoch": 0.6553497942386831, + "grad_norm": 20.06137466430664, + "learning_rate": 2.304257528556594e-05, + "loss": 1.8266, + "step": 637 + }, + { + "epoch": 0.6563786008230452, + "grad_norm": 17.062715530395508, + "learning_rate": 2.307892004153686e-05, + "loss": 1.3875, + "step": 638 + }, + { + "epoch": 0.6574074074074074, + "grad_norm": 21.51274299621582, + "learning_rate": 2.3115264797507785e-05, + "loss": 1.8874, + "step": 639 + }, + { + "epoch": 0.6584362139917695, + "grad_norm": 1.2121050357818604, + "learning_rate": 2.3151609553478712e-05, + "loss": 0.0379, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_Qnli-dev_cosine_accuracy": 0.697265625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7842894792556763, + "eval_Qnli-dev_cosine_ap": 0.7349992877103873, + "eval_Qnli-dev_cosine_f1": 0.7003610108303249, + "eval_Qnli-dev_cosine_f1_threshold": 0.7629624605178833, + "eval_Qnli-dev_cosine_precision": 0.610062893081761, + "eval_Qnli-dev_cosine_recall": 0.8220338983050848, + "eval_Qnli-dev_dot_accuracy": 0.662109375, + "eval_Qnli-dev_dot_accuracy_threshold": 385.76885986328125, + "eval_Qnli-dev_dot_ap": 0.6359639073801129, + "eval_Qnli-dev_dot_f1": 0.6838709677419356, + "eval_Qnli-dev_dot_f1_threshold": 354.2484436035156, + "eval_Qnli-dev_dot_precision": 0.5520833333333334, + "eval_Qnli-dev_dot_recall": 0.8983050847457628, + "eval_Qnli-dev_euclidean_accuracy": 0.701171875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.231593132019043, + "eval_Qnli-dev_euclidean_ap": 0.7462767063117786, + "eval_Qnli-dev_euclidean_f1": 0.7047970479704797, + "eval_Qnli-dev_euclidean_f1_threshold": 15.258886337280273, + "eval_Qnli-dev_euclidean_precision": 0.6241830065359477, + "eval_Qnli-dev_euclidean_recall": 0.809322033898305, + "eval_Qnli-dev_manhattan_accuracy": 0.69921875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 265.3671875, + "eval_Qnli-dev_manhattan_ap": 0.744418854148787, + "eval_Qnli-dev_manhattan_f1": 0.708029197080292, + "eval_Qnli-dev_manhattan_f1_threshold": 314.21258544921875, + "eval_Qnli-dev_manhattan_precision": 0.6217948717948718, + "eval_Qnli-dev_manhattan_recall": 0.8220338983050848, + "eval_Qnli-dev_max_accuracy": 0.701171875, + "eval_Qnli-dev_max_accuracy_threshold": 385.76885986328125, + "eval_Qnli-dev_max_ap": 0.7462767063117786, + "eval_Qnli-dev_max_f1": 0.708029197080292, + "eval_Qnli-dev_max_f1_threshold": 354.2484436035156, + "eval_Qnli-dev_max_precision": 0.6241830065359477, + "eval_Qnli-dev_max_recall": 0.8983050847457628, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8901729583740234, + "eval_allNLI-dev_cosine_ap": 0.5909042367020411, + "eval_allNLI-dev_cosine_f1": 0.6091127098321343, + "eval_allNLI-dev_cosine_f1_threshold": 0.8110285401344299, + "eval_allNLI-dev_cosine_precision": 0.5204918032786885, + "eval_allNLI-dev_cosine_recall": 0.7341040462427746, + "eval_allNLI-dev_dot_accuracy": 0.6796875, + "eval_allNLI-dev_dot_accuracy_threshold": 481.55474853515625, + "eval_allNLI-dev_dot_ap": 0.4948903950504878, + "eval_allNLI-dev_dot_f1": 0.569672131147541, + "eval_allNLI-dev_dot_f1_threshold": 379.9951171875, + "eval_allNLI-dev_dot_precision": 0.44126984126984126, + "eval_allNLI-dev_dot_recall": 0.8034682080924855, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.585214614868164, + "eval_allNLI-dev_euclidean_ap": 0.5947416283923239, + "eval_allNLI-dev_euclidean_f1": 0.6009852216748768, + "eval_allNLI-dev_euclidean_f1_threshold": 13.550745010375977, + "eval_allNLI-dev_euclidean_precision": 0.5236051502145923, + "eval_allNLI-dev_euclidean_recall": 0.7052023121387283, + "eval_allNLI-dev_manhattan_accuracy": 0.734375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 228.28366088867188, + "eval_allNLI-dev_manhattan_ap": 0.5918176918420521, + "eval_allNLI-dev_manhattan_f1": 0.5991379310344827, + "eval_allNLI-dev_manhattan_f1_threshold": 301.08868408203125, + "eval_allNLI-dev_manhattan_precision": 0.47766323024054985, + "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 481.55474853515625, + "eval_allNLI-dev_max_ap": 0.5947416283923239, + "eval_allNLI-dev_max_f1": 0.6091127098321343, + "eval_allNLI-dev_max_f1_threshold": 379.9951171875, + "eval_allNLI-dev_max_precision": 0.5236051502145923, + "eval_allNLI-dev_max_recall": 0.8034682080924855, + "eval_sequential_score": 0.7462767063117786, + "eval_sts-test_pearson_cosine": 0.8118059789516554, + "eval_sts-test_pearson_dot": 0.7734818279888613, + "eval_sts-test_pearson_euclidean": 0.8386920311953987, + "eval_sts-test_pearson_manhattan": 0.8356441135209492, + "eval_sts-test_pearson_max": 0.8386920311953987, + "eval_sts-test_spearman_cosine": 0.8328721251857153, + "eval_sts-test_spearman_dot": 0.7551982558138911, + "eval_sts-test_spearman_euclidean": 0.8285452152243036, + "eval_sts-test_spearman_manhattan": 0.8259300410111131, + "eval_sts-test_spearman_max": 0.8328721251857153, + "eval_vitaminc-pairs_loss": 2.8136911392211914, + "eval_vitaminc-pairs_runtime": 3.1765, + "eval_vitaminc-pairs_samples_per_second": 40.296, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_negation-triplets_loss": 1.244739055633545, + "eval_negation-triplets_runtime": 0.7519, + "eval_negation-triplets_samples_per_second": 170.238, + "eval_negation-triplets_steps_per_second": 1.33, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_scitail-pairs-pos_loss": 0.22537671029567719, + "eval_scitail-pairs-pos_runtime": 0.8268, + "eval_scitail-pairs-pos_samples_per_second": 154.805, + "eval_scitail-pairs-pos_steps_per_second": 1.209, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_scitail-pairs-qa_loss": 0.014203112572431564, + "eval_scitail-pairs-qa_runtime": 0.5719, + "eval_scitail-pairs-qa_samples_per_second": 223.816, + "eval_scitail-pairs-qa_steps_per_second": 1.749, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_xsum-pairs_loss": 0.6345345377922058, + "eval_xsum-pairs_runtime": 3.0155, + "eval_xsum-pairs_samples_per_second": 42.447, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_sciq_pairs_loss": 0.1278018057346344, + "eval_sciq_pairs_runtime": 3.4439, + "eval_sciq_pairs_samples_per_second": 37.167, + "eval_sciq_pairs_steps_per_second": 0.29, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_qasc_pairs_loss": 0.5951372385025024, + "eval_qasc_pairs_runtime": 0.6218, + "eval_qasc_pairs_samples_per_second": 205.857, + "eval_qasc_pairs_steps_per_second": 1.608, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_openbookqa_pairs_loss": 1.232675552368164, + "eval_openbookqa_pairs_runtime": 0.582, + "eval_openbookqa_pairs_samples_per_second": 219.948, + "eval_openbookqa_pairs_steps_per_second": 1.718, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_msmarco_pairs_loss": 1.3142263889312744, + "eval_msmarco_pairs_runtime": 1.5099, + "eval_msmarco_pairs_samples_per_second": 84.773, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_nq_pairs_loss": 1.6414275169372559, + "eval_nq_pairs_runtime": 2.9022, + "eval_nq_pairs_samples_per_second": 44.104, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_trivia_pairs_loss": 1.201471209526062, + "eval_trivia_pairs_runtime": 3.4361, + "eval_trivia_pairs_samples_per_second": 37.252, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_gooaq_pairs_loss": 0.7273324131965637, + "eval_gooaq_pairs_runtime": 0.9436, + "eval_gooaq_pairs_samples_per_second": 135.656, + "eval_gooaq_pairs_steps_per_second": 1.06, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_paws-pos_loss": 0.033227745443582535, + "eval_paws-pos_runtime": 0.6799, + "eval_paws-pos_samples_per_second": 188.253, + "eval_paws-pos_steps_per_second": 1.471, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_global_dataset_loss": 0.644037663936615, + "eval_global_dataset_runtime": 13.3785, + "eval_global_dataset_samples_per_second": 31.095, + "eval_global_dataset_steps_per_second": 0.299, + "step": 640 + }, + { + "epoch": 0.6594650205761317, + "grad_norm": 2.2254586219787598, + "learning_rate": 2.3187954309449633e-05, + "loss": 0.2144, + "step": 641 + }, + { + "epoch": 0.6604938271604939, + "grad_norm": 8.457268714904785, + "learning_rate": 2.3224299065420557e-05, + "loss": 0.5899, + "step": 642 + }, + { + "epoch": 0.661522633744856, + "grad_norm": 16.62227439880371, + "learning_rate": 2.3260643821391484e-05, + "loss": 1.7055, + "step": 643 + }, + { + "epoch": 0.6625514403292181, + "grad_norm": 9.388711929321289, + "learning_rate": 2.329698857736241e-05, + "loss": 0.5673, + "step": 644 + }, + { + "epoch": 0.6635802469135802, + "grad_norm": 3.408893346786499, + "learning_rate": 2.333333333333333e-05, + "loss": 0.0845, + "step": 645 + }, + { + "epoch": 0.6646090534979424, + "grad_norm": 11.298724174499512, + "learning_rate": 2.3369678089304256e-05, + "loss": 0.7168, + "step": 646 + }, + { + "epoch": 0.6656378600823045, + "grad_norm": 16.72682762145996, + "learning_rate": 2.340602284527518e-05, + "loss": 2.6358, + "step": 647 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 7.872361660003662, + "learning_rate": 2.34423676012461e-05, + "loss": 0.3951, + "step": 648 + }, + { + "epoch": 0.6676954732510288, + "grad_norm": 9.12248420715332, + "learning_rate": 2.347871235721703e-05, + "loss": 0.584, + "step": 649 + }, + { + "epoch": 0.668724279835391, + "grad_norm": 11.847990036010742, + "learning_rate": 2.3515057113187953e-05, + "loss": 0.9239, + "step": 650 + }, + { + "epoch": 0.6697530864197531, + "grad_norm": 8.815132141113281, + "learning_rate": 2.3551401869158877e-05, + "loss": 0.576, + "step": 651 + }, + { + "epoch": 0.6707818930041153, + "grad_norm": 13.088105201721191, + "learning_rate": 2.35877466251298e-05, + "loss": 1.2842, + "step": 652 + }, + { + "epoch": 0.6718106995884774, + "grad_norm": 9.663747787475586, + "learning_rate": 2.3624091381100725e-05, + "loss": 0.7108, + "step": 653 + }, + { + "epoch": 0.6728395061728395, + "grad_norm": 10.207884788513184, + "learning_rate": 2.366043613707165e-05, + "loss": 0.6935, + "step": 654 + }, + { + "epoch": 0.6738683127572016, + "grad_norm": 10.963897705078125, + "learning_rate": 2.3696780893042576e-05, + "loss": 0.8278, + "step": 655 + }, + { + "epoch": 0.6748971193415638, + "grad_norm": 9.319234848022461, + "learning_rate": 2.3733125649013497e-05, + "loss": 0.6456, + "step": 656 + }, + { + "epoch": 0.6759259259259259, + "grad_norm": 14.43174934387207, + "learning_rate": 2.376947040498442e-05, + "loss": 1.8842, + "step": 657 + }, + { + "epoch": 0.676954732510288, + "grad_norm": 13.448914527893066, + "learning_rate": 2.3805815160955348e-05, + "loss": 1.2572, + "step": 658 + }, + { + "epoch": 0.6779835390946503, + "grad_norm": 8.692782402038574, + "learning_rate": 2.384215991692627e-05, + "loss": 0.6718, + "step": 659 + }, + { + "epoch": 0.6790123456790124, + "grad_norm": 4.224426746368408, + "learning_rate": 2.3878504672897193e-05, + "loss": 0.1434, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_Qnli-dev_cosine_accuracy": 0.705078125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.819502055644989, + "eval_Qnli-dev_cosine_ap": 0.7429310249805731, + "eval_Qnli-dev_cosine_f1": 0.7023411371237458, + "eval_Qnli-dev_cosine_f1_threshold": 0.7529304623603821, + "eval_Qnli-dev_cosine_precision": 0.580110497237569, + "eval_Qnli-dev_cosine_recall": 0.8898305084745762, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 392.81878662109375, + "eval_Qnli-dev_dot_ap": 0.6658795733353435, + "eval_Qnli-dev_dot_f1": 0.684981684981685, + "eval_Qnli-dev_dot_f1_threshold": 388.4842529296875, + "eval_Qnli-dev_dot_precision": 0.603225806451613, + "eval_Qnli-dev_dot_recall": 0.7923728813559322, + "eval_Qnli-dev_euclidean_accuracy": 0.70703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.631841659545898, + "eval_Qnli-dev_euclidean_ap": 0.7487350788106928, + "eval_Qnli-dev_euclidean_f1": 0.7015706806282722, + "eval_Qnli-dev_euclidean_f1_threshold": 15.337552070617676, + "eval_Qnli-dev_euclidean_precision": 0.5964391691394659, + "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, + "eval_Qnli-dev_manhattan_accuracy": 0.701171875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 295.4067077636719, + "eval_Qnli-dev_manhattan_ap": 0.7482376569453756, + "eval_Qnli-dev_manhattan_f1": 0.7113594040968343, + "eval_Qnli-dev_manhattan_f1_threshold": 299.4460754394531, + "eval_Qnli-dev_manhattan_precision": 0.6345514950166113, + "eval_Qnli-dev_manhattan_recall": 0.809322033898305, + "eval_Qnli-dev_max_accuracy": 0.70703125, + "eval_Qnli-dev_max_accuracy_threshold": 392.81878662109375, + "eval_Qnli-dev_max_ap": 0.7487350788106928, + "eval_Qnli-dev_max_f1": 0.7113594040968343, + "eval_Qnli-dev_max_f1_threshold": 388.4842529296875, + "eval_Qnli-dev_max_precision": 0.6345514950166113, + "eval_Qnli-dev_max_recall": 0.8898305084745762, + "eval_allNLI-dev_cosine_accuracy": 0.734375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8959517478942871, + "eval_allNLI-dev_cosine_ap": 0.5960858156370117, + "eval_allNLI-dev_cosine_f1": 0.5995717344753748, + "eval_allNLI-dev_cosine_f1_threshold": 0.7982358932495117, + "eval_allNLI-dev_cosine_precision": 0.47619047619047616, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.689453125, + "eval_allNLI-dev_dot_accuracy_threshold": 450.1946716308594, + "eval_allNLI-dev_dot_ap": 0.5096208353059024, + "eval_allNLI-dev_dot_f1": 0.5690021231422505, + "eval_allNLI-dev_dot_f1_threshold": 398.77850341796875, + "eval_allNLI-dev_dot_precision": 0.44966442953020136, + "eval_allNLI-dev_dot_recall": 0.7745664739884393, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.649042129516602, + "eval_allNLI-dev_euclidean_ap": 0.5979924892509634, + "eval_allNLI-dev_euclidean_f1": 0.6090534979423868, + "eval_allNLI-dev_euclidean_f1_threshold": 14.710177421569824, + "eval_allNLI-dev_euclidean_precision": 0.4728434504792332, + "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 218.83389282226562, + "eval_allNLI-dev_manhattan_ap": 0.5954291033762709, + "eval_allNLI-dev_manhattan_f1": 0.5973451327433628, + "eval_allNLI-dev_manhattan_f1_threshold": 288.9541015625, + "eval_allNLI-dev_manhattan_precision": 0.4838709677419355, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 450.1946716308594, + "eval_allNLI-dev_max_ap": 0.5979924892509634, + "eval_allNLI-dev_max_f1": 0.6090534979423868, + "eval_allNLI-dev_max_f1_threshold": 398.77850341796875, + "eval_allNLI-dev_max_precision": 0.4838709677419355, + "eval_allNLI-dev_max_recall": 0.8554913294797688, + "eval_sequential_score": 0.7487350788106928, + "eval_sts-test_pearson_cosine": 0.809748141776852, + "eval_sts-test_pearson_dot": 0.7852622986479767, + "eval_sts-test_pearson_euclidean": 0.8383482677548499, + "eval_sts-test_pearson_manhattan": 0.8356178836101067, + "eval_sts-test_pearson_max": 0.8383482677548499, + "eval_sts-test_spearman_cosine": 0.8342041017297689, + "eval_sts-test_spearman_dot": 0.7727315762707344, + "eval_sts-test_spearman_euclidean": 0.8310839542830377, + "eval_sts-test_spearman_manhattan": 0.8265729823835233, + "eval_sts-test_spearman_max": 0.8342041017297689, + "eval_vitaminc-pairs_loss": 2.8169939517974854, + "eval_vitaminc-pairs_runtime": 3.1955, + "eval_vitaminc-pairs_samples_per_second": 40.056, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_negation-triplets_loss": 1.216970443725586, + "eval_negation-triplets_runtime": 0.7501, + "eval_negation-triplets_samples_per_second": 170.642, + "eval_negation-triplets_steps_per_second": 1.333, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_scitail-pairs-pos_loss": 0.2154267579317093, + "eval_scitail-pairs-pos_runtime": 0.8251, + "eval_scitail-pairs-pos_samples_per_second": 155.127, + "eval_scitail-pairs-pos_steps_per_second": 1.212, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_scitail-pairs-qa_loss": 0.008771178312599659, + "eval_scitail-pairs-qa_runtime": 0.5793, + "eval_scitail-pairs-qa_samples_per_second": 220.954, + "eval_scitail-pairs-qa_steps_per_second": 1.726, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_xsum-pairs_loss": 0.6624985933303833, + "eval_xsum-pairs_runtime": 3.0194, + "eval_xsum-pairs_samples_per_second": 42.393, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_sciq_pairs_loss": 0.12456458061933517, + "eval_sciq_pairs_runtime": 3.4544, + "eval_sciq_pairs_samples_per_second": 37.055, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_qasc_pairs_loss": 0.5933777093887329, + "eval_qasc_pairs_runtime": 0.6095, + "eval_qasc_pairs_samples_per_second": 209.991, + "eval_qasc_pairs_steps_per_second": 1.641, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_openbookqa_pairs_loss": 1.2264533042907715, + "eval_openbookqa_pairs_runtime": 0.5907, + "eval_openbookqa_pairs_samples_per_second": 216.708, + "eval_openbookqa_pairs_steps_per_second": 1.693, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_msmarco_pairs_loss": 1.2734606266021729, + "eval_msmarco_pairs_runtime": 1.5181, + "eval_msmarco_pairs_samples_per_second": 84.315, + "eval_msmarco_pairs_steps_per_second": 0.659, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_nq_pairs_loss": 1.6421589851379395, + "eval_nq_pairs_runtime": 2.8912, + "eval_nq_pairs_samples_per_second": 44.272, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_trivia_pairs_loss": 1.1045206785202026, + "eval_trivia_pairs_runtime": 3.4335, + "eval_trivia_pairs_samples_per_second": 37.28, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_gooaq_pairs_loss": 0.7241554856300354, + "eval_gooaq_pairs_runtime": 0.9492, + "eval_gooaq_pairs_samples_per_second": 134.856, + "eval_gooaq_pairs_steps_per_second": 1.054, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_paws-pos_loss": 0.03431744873523712, + "eval_paws-pos_runtime": 0.6884, + "eval_paws-pos_samples_per_second": 185.934, + "eval_paws-pos_steps_per_second": 1.453, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_global_dataset_loss": 0.6402216553688049, + "eval_global_dataset_runtime": 13.4067, + "eval_global_dataset_samples_per_second": 31.029, + "eval_global_dataset_steps_per_second": 0.298, + "step": 660 + }, + { + "epoch": 0.6800411522633745, + "grad_norm": 20.211734771728516, + "learning_rate": 2.391484942886812e-05, + "loss": 2.1395, + "step": 661 + }, + { + "epoch": 0.6810699588477366, + "grad_norm": 7.7893218994140625, + "learning_rate": 2.3951194184839044e-05, + "loss": 0.6218, + "step": 662 + }, + { + "epoch": 0.6820987654320988, + "grad_norm": 16.382932662963867, + "learning_rate": 2.3987538940809965e-05, + "loss": 1.691, + "step": 663 + }, + { + "epoch": 0.6831275720164609, + "grad_norm": 13.506409645080566, + "learning_rate": 2.4023883696780892e-05, + "loss": 1.3362, + "step": 664 + }, + { + "epoch": 0.684156378600823, + "grad_norm": 13.324780464172363, + "learning_rate": 2.4060228452751816e-05, + "loss": 1.1382, + "step": 665 + }, + { + "epoch": 0.6851851851851852, + "grad_norm": 10.345579147338867, + "learning_rate": 2.4096573208722737e-05, + "loss": 1.0932, + "step": 666 + }, + { + "epoch": 0.6862139917695473, + "grad_norm": 10.737591743469238, + "learning_rate": 2.4132917964693664e-05, + "loss": 0.9572, + "step": 667 + }, + { + "epoch": 0.6872427983539094, + "grad_norm": 17.071697235107422, + "learning_rate": 2.4169262720664588e-05, + "loss": 1.9663, + "step": 668 + }, + { + "epoch": 0.6882716049382716, + "grad_norm": 11.74267292022705, + "learning_rate": 2.4205607476635512e-05, + "loss": 0.8968, + "step": 669 + }, + { + "epoch": 0.6893004115226338, + "grad_norm": 11.056696891784668, + "learning_rate": 2.4241952232606436e-05, + "loss": 0.7906, + "step": 670 + }, + { + "epoch": 0.6903292181069959, + "grad_norm": 10.595043182373047, + "learning_rate": 2.427829698857736e-05, + "loss": 0.7443, + "step": 671 + }, + { + "epoch": 0.691358024691358, + "grad_norm": 9.793761253356934, + "learning_rate": 2.4314641744548284e-05, + "loss": 0.6939, + "step": 672 + }, + { + "epoch": 0.6923868312757202, + "grad_norm": 10.305285453796387, + "learning_rate": 2.4350986500519212e-05, + "loss": 1.202, + "step": 673 + }, + { + "epoch": 0.6934156378600823, + "grad_norm": 1.1254714727401733, + "learning_rate": 2.4387331256490132e-05, + "loss": 0.0276, + "step": 674 + }, + { + "epoch": 0.6944444444444444, + "grad_norm": 10.750346183776855, + "learning_rate": 2.4423676012461056e-05, + "loss": 1.121, + "step": 675 + }, + { + "epoch": 0.6954732510288066, + "grad_norm": 9.77961254119873, + "learning_rate": 2.4460020768431984e-05, + "loss": 0.721, + "step": 676 + }, + { + "epoch": 0.6965020576131687, + "grad_norm": 10.97049331665039, + "learning_rate": 2.4496365524402904e-05, + "loss": 1.0949, + "step": 677 + }, + { + "epoch": 0.6975308641975309, + "grad_norm": 13.591765403747559, + "learning_rate": 2.453271028037383e-05, + "loss": 1.3044, + "step": 678 + }, + { + "epoch": 0.698559670781893, + "grad_norm": 10.30559253692627, + "learning_rate": 2.4569055036344756e-05, + "loss": 0.6867, + "step": 679 + }, + { + "epoch": 0.6995884773662552, + "grad_norm": 9.589376449584961, + "learning_rate": 2.4605399792315676e-05, + "loss": 0.6253, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_Qnli-dev_cosine_accuracy": 0.689453125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8195374011993408, + "eval_Qnli-dev_cosine_ap": 0.7393646924153436, + "eval_Qnli-dev_cosine_f1": 0.702054794520548, + "eval_Qnli-dev_cosine_f1_threshold": 0.7384560704231262, + "eval_Qnli-dev_cosine_precision": 0.5890804597701149, + "eval_Qnli-dev_cosine_recall": 0.8686440677966102, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 395.6339111328125, + "eval_Qnli-dev_dot_ap": 0.6696734110834349, + "eval_Qnli-dev_dot_f1": 0.6894308943089431, + "eval_Qnli-dev_dot_f1_threshold": 355.788330078125, + "eval_Qnli-dev_dot_precision": 0.5593667546174143, + "eval_Qnli-dev_dot_recall": 0.8983050847457628, + "eval_Qnli-dev_euclidean_accuracy": 0.701171875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.906505584716797, + "eval_Qnli-dev_euclidean_ap": 0.7444803762790224, + "eval_Qnli-dev_euclidean_f1": 0.7016949152542373, + "eval_Qnli-dev_euclidean_f1_threshold": 16.160581588745117, + "eval_Qnli-dev_euclidean_precision": 0.5847457627118644, + "eval_Qnli-dev_euclidean_recall": 0.8771186440677966, + "eval_Qnli-dev_manhattan_accuracy": 0.703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 292.0366516113281, + "eval_Qnli-dev_manhattan_ap": 0.7455488536354595, + "eval_Qnli-dev_manhattan_f1": 0.7037037037037036, + "eval_Qnli-dev_manhattan_f1_threshold": 331.2184753417969, + "eval_Qnli-dev_manhattan_precision": 0.5837988826815642, + "eval_Qnli-dev_manhattan_recall": 0.885593220338983, + "eval_Qnli-dev_max_accuracy": 0.703125, + "eval_Qnli-dev_max_accuracy_threshold": 395.6339111328125, + "eval_Qnli-dev_max_ap": 0.7455488536354595, + "eval_Qnli-dev_max_f1": 0.7037037037037036, + "eval_Qnli-dev_max_f1_threshold": 355.788330078125, + "eval_Qnli-dev_max_precision": 0.5890804597701149, + "eval_Qnli-dev_max_recall": 0.8983050847457628, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8807967901229858, + "eval_allNLI-dev_cosine_ap": 0.5923755189013276, + "eval_allNLI-dev_cosine_f1": 0.5885286783042394, + "eval_allNLI-dev_cosine_f1_threshold": 0.8102627992630005, + "eval_allNLI-dev_cosine_precision": 0.5175438596491229, + "eval_allNLI-dev_cosine_recall": 0.6820809248554913, + "eval_allNLI-dev_dot_accuracy": 0.689453125, + "eval_allNLI-dev_dot_accuracy_threshold": 468.3880615234375, + "eval_allNLI-dev_dot_ap": 0.5099487314518958, + "eval_allNLI-dev_dot_f1": 0.5726872246696035, + "eval_allNLI-dev_dot_f1_threshold": 388.5802001953125, + "eval_allNLI-dev_dot_precision": 0.4626334519572954, + "eval_allNLI-dev_dot_recall": 0.7514450867052023, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.58234691619873, + "eval_allNLI-dev_euclidean_ap": 0.5960351196666029, + "eval_allNLI-dev_euclidean_f1": 0.5934065934065934, + "eval_allNLI-dev_euclidean_f1_threshold": 14.820318222045898, + "eval_allNLI-dev_euclidean_precision": 0.4787234042553192, + "eval_allNLI-dev_euclidean_recall": 0.7803468208092486, + "eval_allNLI-dev_manhattan_accuracy": 0.734375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 219.4961700439453, + "eval_allNLI-dev_manhattan_ap": 0.5953606180151316, + "eval_allNLI-dev_manhattan_f1": 0.5929411764705882, + "eval_allNLI-dev_manhattan_f1_threshold": 293.4901428222656, + "eval_allNLI-dev_manhattan_precision": 0.5, + "eval_allNLI-dev_manhattan_recall": 0.7283236994219653, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 468.3880615234375, + "eval_allNLI-dev_max_ap": 0.5960351196666029, + "eval_allNLI-dev_max_f1": 0.5934065934065934, + "eval_allNLI-dev_max_f1_threshold": 388.5802001953125, + "eval_allNLI-dev_max_precision": 0.5175438596491229, + "eval_allNLI-dev_max_recall": 0.7803468208092486, + "eval_sequential_score": 0.7455488536354595, + "eval_sts-test_pearson_cosine": 0.8159881240293081, + "eval_sts-test_pearson_dot": 0.7825955488055716, + "eval_sts-test_pearson_euclidean": 0.8454112920840406, + "eval_sts-test_pearson_manhattan": 0.8444832657606673, + "eval_sts-test_pearson_max": 0.8454112920840406, + "eval_sts-test_spearman_cosine": 0.8368029417325517, + "eval_sts-test_spearman_dot": 0.7614820041821643, + "eval_sts-test_spearman_euclidean": 0.8350227813056632, + "eval_sts-test_spearman_manhattan": 0.8336858821228565, + "eval_sts-test_spearman_max": 0.8368029417325517, + "eval_vitaminc-pairs_loss": 2.8485310077667236, + "eval_vitaminc-pairs_runtime": 3.1999, + "eval_vitaminc-pairs_samples_per_second": 40.001, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_negation-triplets_loss": 1.1648355722427368, + "eval_negation-triplets_runtime": 0.7448, + "eval_negation-triplets_samples_per_second": 171.851, + "eval_negation-triplets_steps_per_second": 1.343, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_scitail-pairs-pos_loss": 0.21600204706192017, + "eval_scitail-pairs-pos_runtime": 0.8346, + "eval_scitail-pairs-pos_samples_per_second": 153.362, + "eval_scitail-pairs-pos_steps_per_second": 1.198, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_scitail-pairs-qa_loss": 0.00846769753843546, + "eval_scitail-pairs-qa_runtime": 0.5928, + "eval_scitail-pairs-qa_samples_per_second": 215.932, + "eval_scitail-pairs-qa_steps_per_second": 1.687, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_xsum-pairs_loss": 0.6605619192123413, + "eval_xsum-pairs_runtime": 3.025, + "eval_xsum-pairs_samples_per_second": 42.314, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_sciq_pairs_loss": 0.12335162609815598, + "eval_sciq_pairs_runtime": 3.4321, + "eval_sciq_pairs_samples_per_second": 37.295, + "eval_sciq_pairs_steps_per_second": 0.291, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_qasc_pairs_loss": 0.5843737721443176, + "eval_qasc_pairs_runtime": 0.6047, + "eval_qasc_pairs_samples_per_second": 211.678, + "eval_qasc_pairs_steps_per_second": 1.654, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_openbookqa_pairs_loss": 1.2838267087936401, + "eval_openbookqa_pairs_runtime": 0.5755, + "eval_openbookqa_pairs_samples_per_second": 222.41, + "eval_openbookqa_pairs_steps_per_second": 1.738, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_msmarco_pairs_loss": 1.3720968961715698, + "eval_msmarco_pairs_runtime": 1.518, + "eval_msmarco_pairs_samples_per_second": 84.323, + "eval_msmarco_pairs_steps_per_second": 0.659, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_nq_pairs_loss": 1.5162333250045776, + "eval_nq_pairs_runtime": 2.9004, + "eval_nq_pairs_samples_per_second": 44.131, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_trivia_pairs_loss": 1.12861168384552, + "eval_trivia_pairs_runtime": 3.4369, + "eval_trivia_pairs_samples_per_second": 37.243, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_gooaq_pairs_loss": 0.6519899368286133, + "eval_gooaq_pairs_runtime": 0.9806, + "eval_gooaq_pairs_samples_per_second": 130.53, + "eval_gooaq_pairs_steps_per_second": 1.02, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_paws-pos_loss": 0.03412044420838356, + "eval_paws-pos_runtime": 0.6854, + "eval_paws-pos_samples_per_second": 186.764, + "eval_paws-pos_steps_per_second": 1.459, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_global_dataset_loss": 0.6541453003883362, + "eval_global_dataset_runtime": 13.4093, + "eval_global_dataset_samples_per_second": 31.023, + "eval_global_dataset_steps_per_second": 0.298, + "step": 680 + }, + { + "epoch": 0.7006172839506173, + "grad_norm": 1.0433952808380127, + "learning_rate": 2.46417445482866e-05, + "loss": 0.063, + "step": 681 + }, + { + "epoch": 0.7016460905349794, + "grad_norm": 16.610177993774414, + "learning_rate": 2.4678089304257528e-05, + "loss": 1.4254, + "step": 682 + }, + { + "epoch": 0.7026748971193416, + "grad_norm": 19.412683486938477, + "learning_rate": 2.4714434060228452e-05, + "loss": 3.1631, + "step": 683 + }, + { + "epoch": 0.7037037037037037, + "grad_norm": 13.261174201965332, + "learning_rate": 2.4750778816199373e-05, + "loss": 1.2375, + "step": 684 + }, + { + "epoch": 0.7047325102880658, + "grad_norm": 9.231230735778809, + "learning_rate": 2.47871235721703e-05, + "loss": 0.5716, + "step": 685 + }, + { + "epoch": 0.7057613168724279, + "grad_norm": 16.746212005615234, + "learning_rate": 2.4823468328141224e-05, + "loss": 2.939, + "step": 686 + }, + { + "epoch": 0.7067901234567902, + "grad_norm": 16.490650177001953, + "learning_rate": 2.4859813084112145e-05, + "loss": 1.7054, + "step": 687 + }, + { + "epoch": 0.7078189300411523, + "grad_norm": 8.707398414611816, + "learning_rate": 2.4896157840083072e-05, + "loss": 0.4784, + "step": 688 + }, + { + "epoch": 0.7088477366255144, + "grad_norm": 9.790912628173828, + "learning_rate": 2.4932502596053996e-05, + "loss": 0.7157, + "step": 689 + }, + { + "epoch": 0.7098765432098766, + "grad_norm": 8.632383346557617, + "learning_rate": 2.496884735202492e-05, + "loss": 0.6421, + "step": 690 + }, + { + "epoch": 0.7109053497942387, + "grad_norm": 8.732678413391113, + "learning_rate": 2.5005192107995844e-05, + "loss": 0.6502, + "step": 691 + }, + { + "epoch": 0.7119341563786008, + "grad_norm": 16.7855281829834, + "learning_rate": 2.5041536863966768e-05, + "loss": 3.4679, + "step": 692 + }, + { + "epoch": 0.7129629629629629, + "grad_norm": 8.66584587097168, + "learning_rate": 2.5077881619937692e-05, + "loss": 0.5872, + "step": 693 + }, + { + "epoch": 0.7139917695473251, + "grad_norm": 14.179039001464844, + "learning_rate": 2.511422637590862e-05, + "loss": 1.5769, + "step": 694 + }, + { + "epoch": 0.7150205761316872, + "grad_norm": 8.276007652282715, + "learning_rate": 2.515057113187954e-05, + "loss": 0.5454, + "step": 695 + }, + { + "epoch": 0.7160493827160493, + "grad_norm": 12.96976375579834, + "learning_rate": 2.5186915887850464e-05, + "loss": 1.4251, + "step": 696 + }, + { + "epoch": 0.7170781893004116, + "grad_norm": 8.970144271850586, + "learning_rate": 2.522326064382139e-05, + "loss": 0.6667, + "step": 697 + }, + { + "epoch": 0.7181069958847737, + "grad_norm": 1.4171106815338135, + "learning_rate": 2.5259605399792312e-05, + "loss": 0.0382, + "step": 698 + }, + { + "epoch": 0.7191358024691358, + "grad_norm": 4.66494607925415, + "learning_rate": 2.5295950155763236e-05, + "loss": 0.1808, + "step": 699 + }, + { + "epoch": 0.720164609053498, + "grad_norm": 9.647722244262695, + "learning_rate": 2.5332294911734164e-05, + "loss": 0.8819, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_Qnli-dev_cosine_accuracy": 0.6875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8055030107498169, + "eval_Qnli-dev_cosine_ap": 0.7346523188251083, + "eval_Qnli-dev_cosine_f1": 0.7008849557522123, + "eval_Qnli-dev_cosine_f1_threshold": 0.7691887021064758, + "eval_Qnli-dev_cosine_precision": 0.601823708206687, + "eval_Qnli-dev_cosine_recall": 0.8389830508474576, + "eval_Qnli-dev_dot_accuracy": 0.66015625, + "eval_Qnli-dev_dot_accuracy_threshold": 403.0814208984375, + "eval_Qnli-dev_dot_ap": 0.6423809971933063, + "eval_Qnli-dev_dot_f1": 0.6771929824561403, + "eval_Qnli-dev_dot_f1_threshold": 380.7566833496094, + "eval_Qnli-dev_dot_precision": 0.5778443113772455, + "eval_Qnli-dev_dot_recall": 0.8177966101694916, + "eval_Qnli-dev_euclidean_accuracy": 0.70703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.891797065734863, + "eval_Qnli-dev_euclidean_ap": 0.7419509834416282, + "eval_Qnli-dev_euclidean_f1": 0.7024221453287196, + "eval_Qnli-dev_euclidean_f1_threshold": 15.521956443786621, + "eval_Qnli-dev_euclidean_precision": 0.5935672514619883, + "eval_Qnli-dev_euclidean_recall": 0.8601694915254238, + "eval_Qnli-dev_manhattan_accuracy": 0.705078125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 284.7353515625, + "eval_Qnli-dev_manhattan_ap": 0.7404308497091309, + "eval_Qnli-dev_manhattan_f1": 0.6989619377162629, + "eval_Qnli-dev_manhattan_f1_threshold": 318.97943115234375, + "eval_Qnli-dev_manhattan_precision": 0.5906432748538012, + "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, + "eval_Qnli-dev_max_accuracy": 0.70703125, + "eval_Qnli-dev_max_accuracy_threshold": 403.0814208984375, + "eval_Qnli-dev_max_ap": 0.7419509834416282, + "eval_Qnli-dev_max_f1": 0.7024221453287196, + "eval_Qnli-dev_max_f1_threshold": 380.7566833496094, + "eval_Qnli-dev_max_precision": 0.601823708206687, + "eval_Qnli-dev_max_recall": 0.8601694915254238, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8836915493011475, + "eval_allNLI-dev_cosine_ap": 0.5879168509874412, + "eval_allNLI-dev_cosine_f1": 0.5914893617021276, + "eval_allNLI-dev_cosine_f1_threshold": 0.7854909896850586, + "eval_allNLI-dev_cosine_precision": 0.468013468013468, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.689453125, + "eval_allNLI-dev_dot_accuracy_threshold": 468.509765625, + "eval_allNLI-dev_dot_ap": 0.5290950515284383, + "eval_allNLI-dev_dot_f1": 0.576923076923077, + "eval_allNLI-dev_dot_f1_threshold": 394.4248352050781, + "eval_allNLI-dev_dot_precision": 0.4576271186440678, + "eval_allNLI-dev_dot_recall": 0.7803468208092486, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.812106132507324, + "eval_allNLI-dev_euclidean_ap": 0.5902832492425357, + "eval_allNLI-dev_euclidean_f1": 0.5927505330490405, + "eval_allNLI-dev_euclidean_f1_threshold": 14.791348457336426, + "eval_allNLI-dev_euclidean_precision": 0.46959459459459457, + "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 228.82778930664062, + "eval_allNLI-dev_manhattan_ap": 0.5867522032562185, + "eval_allNLI-dev_manhattan_f1": 0.592255125284738, + "eval_allNLI-dev_manhattan_f1_threshold": 288.2390441894531, + "eval_allNLI-dev_manhattan_precision": 0.48872180451127817, + "eval_allNLI-dev_manhattan_recall": 0.7514450867052023, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 468.509765625, + "eval_allNLI-dev_max_ap": 0.5902832492425357, + "eval_allNLI-dev_max_f1": 0.5927505330490405, + "eval_allNLI-dev_max_f1_threshold": 394.4248352050781, + "eval_allNLI-dev_max_precision": 0.48872180451127817, + "eval_allNLI-dev_max_recall": 0.8034682080924855, + "eval_sequential_score": 0.7419509834416282, + "eval_sts-test_pearson_cosine": 0.814122807787653, + "eval_sts-test_pearson_dot": 0.7849759177486642, + "eval_sts-test_pearson_euclidean": 0.8421714998904108, + "eval_sts-test_pearson_manhattan": 0.8394866389200708, + "eval_sts-test_pearson_max": 0.8421714998904108, + "eval_sts-test_spearman_cosine": 0.837628602505223, + "eval_sts-test_spearman_dot": 0.7737345862922999, + "eval_sts-test_spearman_euclidean": 0.8339600731014016, + "eval_sts-test_spearman_manhattan": 0.831537105555887, + "eval_sts-test_spearman_max": 0.837628602505223, + "eval_vitaminc-pairs_loss": 2.8523178100585938, + "eval_vitaminc-pairs_runtime": 3.1968, + "eval_vitaminc-pairs_samples_per_second": 40.039, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_negation-triplets_loss": 1.1572741270065308, + "eval_negation-triplets_runtime": 0.7545, + "eval_negation-triplets_samples_per_second": 169.646, + "eval_negation-triplets_steps_per_second": 1.325, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_scitail-pairs-pos_loss": 0.21792583167552948, + "eval_scitail-pairs-pos_runtime": 0.8512, + "eval_scitail-pairs-pos_samples_per_second": 150.374, + "eval_scitail-pairs-pos_steps_per_second": 1.175, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_scitail-pairs-qa_loss": 0.014304843731224537, + "eval_scitail-pairs-qa_runtime": 0.5821, + "eval_scitail-pairs-qa_samples_per_second": 219.895, + "eval_scitail-pairs-qa_steps_per_second": 1.718, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_xsum-pairs_loss": 0.688365638256073, + "eval_xsum-pairs_runtime": 3.0302, + "eval_xsum-pairs_samples_per_second": 42.242, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_sciq_pairs_loss": 0.12412170320749283, + "eval_sciq_pairs_runtime": 3.4839, + "eval_sciq_pairs_samples_per_second": 36.741, + "eval_sciq_pairs_steps_per_second": 0.287, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_qasc_pairs_loss": 0.5808658003807068, + "eval_qasc_pairs_runtime": 0.6151, + "eval_qasc_pairs_samples_per_second": 208.103, + "eval_qasc_pairs_steps_per_second": 1.626, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_openbookqa_pairs_loss": 1.136744499206543, + "eval_openbookqa_pairs_runtime": 0.5932, + "eval_openbookqa_pairs_samples_per_second": 215.777, + "eval_openbookqa_pairs_steps_per_second": 1.686, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_msmarco_pairs_loss": 1.2205469608306885, + "eval_msmarco_pairs_runtime": 1.5248, + "eval_msmarco_pairs_samples_per_second": 83.947, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_nq_pairs_loss": 1.6330437660217285, + "eval_nq_pairs_runtime": 2.9004, + "eval_nq_pairs_samples_per_second": 44.131, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_trivia_pairs_loss": 1.195753812789917, + "eval_trivia_pairs_runtime": 3.4466, + "eval_trivia_pairs_samples_per_second": 37.138, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_gooaq_pairs_loss": 0.7037076950073242, + "eval_gooaq_pairs_runtime": 0.9565, + "eval_gooaq_pairs_samples_per_second": 133.823, + "eval_gooaq_pairs_steps_per_second": 1.045, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_paws-pos_loss": 0.03305948153138161, + "eval_paws-pos_runtime": 0.6963, + "eval_paws-pos_samples_per_second": 183.824, + "eval_paws-pos_steps_per_second": 1.436, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_global_dataset_loss": 0.6367413401603699, + "eval_global_dataset_runtime": 13.4188, + "eval_global_dataset_samples_per_second": 31.001, + "eval_global_dataset_steps_per_second": 0.298, + "step": 700 + }, + { + "epoch": 0.7211934156378601, + "grad_norm": 9.350117683410645, + "learning_rate": 2.5368639667705088e-05, + "loss": 0.6013, + "step": 701 + }, + { + "epoch": 0.7222222222222222, + "grad_norm": 11.008674621582031, + "learning_rate": 2.540498442367601e-05, + "loss": 1.181, + "step": 702 + }, + { + "epoch": 0.7232510288065843, + "grad_norm": 12.882591247558594, + "learning_rate": 2.5441329179646936e-05, + "loss": 1.1574, + "step": 703 + }, + { + "epoch": 0.7242798353909465, + "grad_norm": 10.404853820800781, + "learning_rate": 2.547767393561786e-05, + "loss": 0.6094, + "step": 704 + }, + { + "epoch": 0.7253086419753086, + "grad_norm": 10.375190734863281, + "learning_rate": 2.551401869158878e-05, + "loss": 0.6303, + "step": 705 + }, + { + "epoch": 0.7263374485596708, + "grad_norm": 11.026881217956543, + "learning_rate": 2.5550363447559708e-05, + "loss": 0.626, + "step": 706 + }, + { + "epoch": 0.727366255144033, + "grad_norm": 9.781618118286133, + "learning_rate": 2.5586708203530632e-05, + "loss": 0.5284, + "step": 707 + }, + { + "epoch": 0.7283950617283951, + "grad_norm": 2.4945054054260254, + "learning_rate": 2.5623052959501556e-05, + "loss": 0.0619, + "step": 708 + }, + { + "epoch": 0.7294238683127572, + "grad_norm": 14.84467887878418, + "learning_rate": 2.565939771547248e-05, + "loss": 1.3394, + "step": 709 + }, + { + "epoch": 0.7304526748971193, + "grad_norm": 3.6432929039001465, + "learning_rate": 2.5695742471443404e-05, + "loss": 0.0922, + "step": 710 + }, + { + "epoch": 0.7314814814814815, + "grad_norm": 3.2191617488861084, + "learning_rate": 2.5732087227414328e-05, + "loss": 0.068, + "step": 711 + }, + { + "epoch": 0.7325102880658436, + "grad_norm": 10.091761589050293, + "learning_rate": 2.5768431983385255e-05, + "loss": 0.5414, + "step": 712 + }, + { + "epoch": 0.7335390946502057, + "grad_norm": 9.839192390441895, + "learning_rate": 2.5804776739356176e-05, + "loss": 0.5332, + "step": 713 + }, + { + "epoch": 0.7345679012345679, + "grad_norm": 9.548250198364258, + "learning_rate": 2.58411214953271e-05, + "loss": 0.5112, + "step": 714 + }, + { + "epoch": 0.73559670781893, + "grad_norm": 23.554458618164062, + "learning_rate": 2.5877466251298027e-05, + "loss": 3.5468, + "step": 715 + }, + { + "epoch": 0.7366255144032922, + "grad_norm": 1.0547456741333008, + "learning_rate": 2.5913811007268948e-05, + "loss": 0.0244, + "step": 716 + }, + { + "epoch": 0.7376543209876543, + "grad_norm": 10.332133293151855, + "learning_rate": 2.5950155763239872e-05, + "loss": 0.528, + "step": 717 + }, + { + "epoch": 0.7386831275720165, + "grad_norm": 16.862545013427734, + "learning_rate": 2.59865005192108e-05, + "loss": 1.7134, + "step": 718 + }, + { + "epoch": 0.7397119341563786, + "grad_norm": 9.824862480163574, + "learning_rate": 2.6022845275181723e-05, + "loss": 0.6181, + "step": 719 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 17.680917739868164, + "learning_rate": 2.6059190031152644e-05, + "loss": 1.7897, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_Qnli-dev_cosine_accuracy": 0.712890625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8089187741279602, + "eval_Qnli-dev_cosine_ap": 0.7445288564241996, + "eval_Qnli-dev_cosine_f1": 0.7088607594936709, + "eval_Qnli-dev_cosine_f1_threshold": 0.7645823955535889, + "eval_Qnli-dev_cosine_precision": 0.6182965299684543, + "eval_Qnli-dev_cosine_recall": 0.8305084745762712, + "eval_Qnli-dev_dot_accuracy": 0.66796875, + "eval_Qnli-dev_dot_accuracy_threshold": 374.24700927734375, + "eval_Qnli-dev_dot_ap": 0.6643934387620949, + "eval_Qnli-dev_dot_f1": 0.6875, + "eval_Qnli-dev_dot_f1_threshold": 374.24700927734375, + "eval_Qnli-dev_dot_precision": 0.6071428571428571, + "eval_Qnli-dev_dot_recall": 0.7923728813559322, + "eval_Qnli-dev_euclidean_accuracy": 0.716796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.01733684539795, + "eval_Qnli-dev_euclidean_ap": 0.7511376116503252, + "eval_Qnli-dev_euclidean_f1": 0.7107750472589792, + "eval_Qnli-dev_euclidean_f1_threshold": 14.925470352172852, + "eval_Qnli-dev_euclidean_precision": 0.6416382252559727, + "eval_Qnli-dev_euclidean_recall": 0.7966101694915254, + "eval_Qnli-dev_manhattan_accuracy": 0.708984375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 279.2970886230469, + "eval_Qnli-dev_manhattan_ap": 0.7531882826368892, + "eval_Qnli-dev_manhattan_f1": 0.7052810902896082, + "eval_Qnli-dev_manhattan_f1_threshold": 327.6318359375, + "eval_Qnli-dev_manhattan_precision": 0.5897435897435898, + "eval_Qnli-dev_manhattan_recall": 0.8771186440677966, + "eval_Qnli-dev_max_accuracy": 0.716796875, + "eval_Qnli-dev_max_accuracy_threshold": 374.24700927734375, + "eval_Qnli-dev_max_ap": 0.7531882826368892, + "eval_Qnli-dev_max_f1": 0.7107750472589792, + "eval_Qnli-dev_max_f1_threshold": 374.24700927734375, + "eval_Qnli-dev_max_precision": 0.6416382252559727, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.740234375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8845050930976868, + "eval_allNLI-dev_cosine_ap": 0.6029211833635529, + "eval_allNLI-dev_cosine_f1": 0.6096033402922756, + "eval_allNLI-dev_cosine_f1_threshold": 0.7970777750015259, + "eval_allNLI-dev_cosine_precision": 0.477124183006536, + "eval_allNLI-dev_cosine_recall": 0.8439306358381503, + "eval_allNLI-dev_dot_accuracy": 0.693359375, + "eval_allNLI-dev_dot_accuracy_threshold": 465.4620361328125, + "eval_allNLI-dev_dot_ap": 0.512993085572406, + "eval_allNLI-dev_dot_f1": 0.5753968253968255, + "eval_allNLI-dev_dot_f1_threshold": 391.34271240234375, + "eval_allNLI-dev_dot_precision": 0.4380664652567976, + "eval_allNLI-dev_dot_recall": 0.838150289017341, + "eval_allNLI-dev_euclidean_accuracy": 0.7421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.847936630249023, + "eval_allNLI-dev_euclidean_ap": 0.6067823005817112, + "eval_allNLI-dev_euclidean_f1": 0.60813704496788, + "eval_allNLI-dev_euclidean_f1_threshold": 14.172441482543945, + "eval_allNLI-dev_euclidean_precision": 0.48299319727891155, + "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, + "eval_allNLI-dev_manhattan_accuracy": 0.73828125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 217.6175537109375, + "eval_allNLI-dev_manhattan_ap": 0.5978323891873064, + "eval_allNLI-dev_manhattan_f1": 0.5991902834008097, + "eval_allNLI-dev_manhattan_f1_threshold": 298.9595031738281, + "eval_allNLI-dev_manhattan_precision": 0.46105919003115264, + "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, + "eval_allNLI-dev_max_accuracy": 0.7421875, + "eval_allNLI-dev_max_accuracy_threshold": 465.4620361328125, + "eval_allNLI-dev_max_ap": 0.6067823005817112, + "eval_allNLI-dev_max_f1": 0.6096033402922756, + "eval_allNLI-dev_max_f1_threshold": 391.34271240234375, + "eval_allNLI-dev_max_precision": 0.48299319727891155, + "eval_allNLI-dev_max_recall": 0.8554913294797688, + "eval_sequential_score": 0.7531882826368892, + "eval_sts-test_pearson_cosine": 0.7943928851510986, + "eval_sts-test_pearson_dot": 0.7406480169219867, + "eval_sts-test_pearson_euclidean": 0.8200699159277771, + "eval_sts-test_pearson_manhattan": 0.8153052752015822, + "eval_sts-test_pearson_max": 0.8200699159277771, + "eval_sts-test_spearman_cosine": 0.814777534408501, + "eval_sts-test_spearman_dot": 0.7252969844950452, + "eval_sts-test_spearman_euclidean": 0.8124804521612804, + "eval_sts-test_spearman_manhattan": 0.8084946543855285, + "eval_sts-test_spearman_max": 0.814777534408501, + "eval_vitaminc-pairs_loss": 2.5636518001556396, + "eval_vitaminc-pairs_runtime": 3.2076, + "eval_vitaminc-pairs_samples_per_second": 39.905, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_negation-triplets_loss": 1.1352839469909668, + "eval_negation-triplets_runtime": 0.749, + "eval_negation-triplets_samples_per_second": 170.903, + "eval_negation-triplets_steps_per_second": 1.335, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_scitail-pairs-pos_loss": 0.2415001094341278, + "eval_scitail-pairs-pos_runtime": 0.8417, + "eval_scitail-pairs-pos_samples_per_second": 152.073, + "eval_scitail-pairs-pos_steps_per_second": 1.188, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_scitail-pairs-qa_loss": 0.0037513382267206907, + "eval_scitail-pairs-qa_runtime": 0.5837, + "eval_scitail-pairs-qa_samples_per_second": 219.305, + "eval_scitail-pairs-qa_steps_per_second": 1.713, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_xsum-pairs_loss": 0.7015084624290466, + "eval_xsum-pairs_runtime": 3.0329, + "eval_xsum-pairs_samples_per_second": 42.204, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_sciq_pairs_loss": 0.13029436767101288, + "eval_sciq_pairs_runtime": 3.454, + "eval_sciq_pairs_samples_per_second": 37.059, + "eval_sciq_pairs_steps_per_second": 0.29, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_qasc_pairs_loss": 0.5081034302711487, + "eval_qasc_pairs_runtime": 0.6041, + "eval_qasc_pairs_samples_per_second": 211.882, + "eval_qasc_pairs_steps_per_second": 1.655, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_openbookqa_pairs_loss": 1.2555147409439087, + "eval_openbookqa_pairs_runtime": 0.5953, + "eval_openbookqa_pairs_samples_per_second": 215.03, + "eval_openbookqa_pairs_steps_per_second": 1.68, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_msmarco_pairs_loss": 1.305182695388794, + "eval_msmarco_pairs_runtime": 1.5199, + "eval_msmarco_pairs_samples_per_second": 84.214, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_nq_pairs_loss": 1.5818196535110474, + "eval_nq_pairs_runtime": 2.8983, + "eval_nq_pairs_samples_per_second": 44.163, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_trivia_pairs_loss": 1.2283203601837158, + "eval_trivia_pairs_runtime": 3.4398, + "eval_trivia_pairs_samples_per_second": 37.212, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_gooaq_pairs_loss": 0.7275317907333374, + "eval_gooaq_pairs_runtime": 0.948, + "eval_gooaq_pairs_samples_per_second": 135.023, + "eval_gooaq_pairs_steps_per_second": 1.055, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_paws-pos_loss": 0.03339088708162308, + "eval_paws-pos_runtime": 0.6932, + "eval_paws-pos_samples_per_second": 184.64, + "eval_paws-pos_steps_per_second": 1.442, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_global_dataset_loss": 0.5928239226341248, + "eval_global_dataset_runtime": 13.3878, + "eval_global_dataset_samples_per_second": 31.073, + "eval_global_dataset_steps_per_second": 0.299, + "step": 720 + }, + { + "epoch": 0.7417695473251029, + "grad_norm": 10.516244888305664, + "learning_rate": 2.609553478712357e-05, + "loss": 0.7104, + "step": 721 + }, + { + "epoch": 0.742798353909465, + "grad_norm": 0.8260862827301025, + "learning_rate": 2.6131879543094495e-05, + "loss": 0.0219, + "step": 722 + }, + { + "epoch": 0.7438271604938271, + "grad_norm": 14.152036666870117, + "learning_rate": 2.6168224299065416e-05, + "loss": 1.3516, + "step": 723 + }, + { + "epoch": 0.7448559670781894, + "grad_norm": 8.1348237991333, + "learning_rate": 2.6204569055036344e-05, + "loss": 0.5472, + "step": 724 + }, + { + "epoch": 0.7458847736625515, + "grad_norm": 8.534761428833008, + "learning_rate": 2.6240913811007268e-05, + "loss": 0.5357, + "step": 725 + }, + { + "epoch": 0.7469135802469136, + "grad_norm": 11.620552062988281, + "learning_rate": 2.627725856697819e-05, + "loss": 1.0346, + "step": 726 + }, + { + "epoch": 0.7479423868312757, + "grad_norm": 10.823874473571777, + "learning_rate": 2.6313603322949116e-05, + "loss": 0.8461, + "step": 727 + }, + { + "epoch": 0.7489711934156379, + "grad_norm": 14.860071182250977, + "learning_rate": 2.634994807892004e-05, + "loss": 1.7762, + "step": 728 + }, + { + "epoch": 0.75, + "grad_norm": 9.170268058776855, + "learning_rate": 2.6386292834890964e-05, + "loss": 0.6121, + "step": 729 + }, + { + "epoch": 0.7510288065843621, + "grad_norm": 3.6571240425109863, + "learning_rate": 2.6422637590861888e-05, + "loss": 0.1051, + "step": 730 + }, + { + "epoch": 0.7520576131687243, + "grad_norm": 7.615705966949463, + "learning_rate": 2.645898234683281e-05, + "loss": 0.5804, + "step": 731 + }, + { + "epoch": 0.7530864197530864, + "grad_norm": 11.42629337310791, + "learning_rate": 2.6495327102803736e-05, + "loss": 1.0625, + "step": 732 + }, + { + "epoch": 0.7541152263374485, + "grad_norm": 1.1732608079910278, + "learning_rate": 2.6531671858774663e-05, + "loss": 0.0471, + "step": 733 + }, + { + "epoch": 0.7551440329218106, + "grad_norm": 10.805855751037598, + "learning_rate": 2.6568016614745584e-05, + "loss": 0.767, + "step": 734 + }, + { + "epoch": 0.7561728395061729, + "grad_norm": 7.8192009925842285, + "learning_rate": 2.6604361370716508e-05, + "loss": 0.4262, + "step": 735 + }, + { + "epoch": 0.757201646090535, + "grad_norm": 14.414314270019531, + "learning_rate": 2.6640706126687435e-05, + "loss": 1.4077, + "step": 736 + }, + { + "epoch": 0.7582304526748971, + "grad_norm": 18.263036727905273, + "learning_rate": 2.6677050882658356e-05, + "loss": 1.5963, + "step": 737 + }, + { + "epoch": 0.7592592592592593, + "grad_norm": 11.086414337158203, + "learning_rate": 2.671339563862928e-05, + "loss": 1.2141, + "step": 738 + }, + { + "epoch": 0.7602880658436214, + "grad_norm": 13.789649963378906, + "learning_rate": 2.6749740394600207e-05, + "loss": 1.454, + "step": 739 + }, + { + "epoch": 0.7613168724279835, + "grad_norm": 9.959060668945312, + "learning_rate": 2.678608515057113e-05, + "loss": 0.696, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_Qnli-dev_cosine_accuracy": 0.701171875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7968876361846924, + "eval_Qnli-dev_cosine_ap": 0.727602546794372, + "eval_Qnli-dev_cosine_f1": 0.6979166666666667, + "eval_Qnli-dev_cosine_f1_threshold": 0.7464833855628967, + "eval_Qnli-dev_cosine_precision": 0.5911764705882353, + "eval_Qnli-dev_cosine_recall": 0.8516949152542372, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 399.69769287109375, + "eval_Qnli-dev_dot_ap": 0.645670123752458, + "eval_Qnli-dev_dot_f1": 0.6833631484794276, + "eval_Qnli-dev_dot_f1_threshold": 367.0381774902344, + "eval_Qnli-dev_dot_precision": 0.5913312693498453, + "eval_Qnli-dev_dot_recall": 0.809322033898305, + "eval_Qnli-dev_euclidean_accuracy": 0.69921875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.117036819458008, + "eval_Qnli-dev_euclidean_ap": 0.7337639674568743, + "eval_Qnli-dev_euclidean_f1": 0.7001675041876045, + "eval_Qnli-dev_euclidean_f1_threshold": 16.333152770996094, + "eval_Qnli-dev_euclidean_precision": 0.5789473684210527, + "eval_Qnli-dev_euclidean_recall": 0.885593220338983, + "eval_Qnli-dev_manhattan_accuracy": 0.703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 294.5778503417969, + "eval_Qnli-dev_manhattan_ap": 0.7366181110833769, + "eval_Qnli-dev_manhattan_f1": 0.6973180076628352, + "eval_Qnli-dev_manhattan_f1_threshold": 302.7152099609375, + "eval_Qnli-dev_manhattan_precision": 0.6363636363636364, + "eval_Qnli-dev_manhattan_recall": 0.7711864406779662, + "eval_Qnli-dev_max_accuracy": 0.703125, + "eval_Qnli-dev_max_accuracy_threshold": 399.69769287109375, + "eval_Qnli-dev_max_ap": 0.7366181110833769, + "eval_Qnli-dev_max_f1": 0.7001675041876045, + "eval_Qnli-dev_max_f1_threshold": 367.0381774902344, + "eval_Qnli-dev_max_precision": 0.6363636363636364, + "eval_Qnli-dev_max_recall": 0.885593220338983, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8710236549377441, + "eval_allNLI-dev_cosine_ap": 0.5945373648797287, + "eval_allNLI-dev_cosine_f1": 0.5975609756097561, + "eval_allNLI-dev_cosine_f1_threshold": 0.7772917747497559, + "eval_allNLI-dev_cosine_precision": 0.4608150470219436, + "eval_allNLI-dev_cosine_recall": 0.8497109826589595, + "eval_allNLI-dev_dot_accuracy": 0.6953125, + "eval_allNLI-dev_dot_accuracy_threshold": 456.685546875, + "eval_allNLI-dev_dot_ap": 0.516668964052817, + "eval_allNLI-dev_dot_f1": 0.5790554414784395, + "eval_allNLI-dev_dot_f1_threshold": 387.36737060546875, + "eval_allNLI-dev_dot_precision": 0.44904458598726116, + "eval_allNLI-dev_dot_recall": 0.815028901734104, + "eval_allNLI-dev_euclidean_accuracy": 0.73828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.068269729614258, + "eval_allNLI-dev_euclidean_ap": 0.5966992226114267, + "eval_allNLI-dev_euclidean_f1": 0.5970772442588727, + "eval_allNLI-dev_euclidean_f1_threshold": 14.66142463684082, + "eval_allNLI-dev_euclidean_precision": 0.4673202614379085, + "eval_allNLI-dev_euclidean_recall": 0.8265895953757225, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 237.41098022460938, + "eval_allNLI-dev_manhattan_ap": 0.5880205832464749, + "eval_allNLI-dev_manhattan_f1": 0.5914893617021276, + "eval_allNLI-dev_manhattan_f1_threshold": 297.3165283203125, + "eval_allNLI-dev_manhattan_precision": 0.468013468013468, + "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 456.685546875, + "eval_allNLI-dev_max_ap": 0.5966992226114267, + "eval_allNLI-dev_max_f1": 0.5975609756097561, + "eval_allNLI-dev_max_f1_threshold": 387.36737060546875, + "eval_allNLI-dev_max_precision": 0.468013468013468, + "eval_allNLI-dev_max_recall": 0.8497109826589595, + "eval_sequential_score": 0.7366181110833769, + "eval_sts-test_pearson_cosine": 0.8088324955753331, + "eval_sts-test_pearson_dot": 0.7775578039423507, + "eval_sts-test_pearson_euclidean": 0.8322159624410153, + "eval_sts-test_pearson_manhattan": 0.8289905701496498, + "eval_sts-test_pearson_max": 0.8322159624410153, + "eval_sts-test_spearman_cosine": 0.829002036100587, + "eval_sts-test_spearman_dot": 0.7651558142348298, + "eval_sts-test_spearman_euclidean": 0.8238636515163652, + "eval_sts-test_spearman_manhattan": 0.8193701326087933, + "eval_sts-test_spearman_max": 0.829002036100587, + "eval_vitaminc-pairs_loss": 2.652156114578247, + "eval_vitaminc-pairs_runtime": 3.195, + "eval_vitaminc-pairs_samples_per_second": 40.062, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_negation-triplets_loss": 1.1374459266662598, + "eval_negation-triplets_runtime": 0.7568, + "eval_negation-triplets_samples_per_second": 169.13, + "eval_negation-triplets_steps_per_second": 1.321, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_scitail-pairs-pos_loss": 0.18683280050754547, + "eval_scitail-pairs-pos_runtime": 0.8273, + "eval_scitail-pairs-pos_samples_per_second": 154.717, + "eval_scitail-pairs-pos_steps_per_second": 1.209, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_scitail-pairs-qa_loss": 0.004593902267515659, + "eval_scitail-pairs-qa_runtime": 0.5777, + "eval_scitail-pairs-qa_samples_per_second": 221.553, + "eval_scitail-pairs-qa_steps_per_second": 1.731, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_xsum-pairs_loss": 0.7033074498176575, + "eval_xsum-pairs_runtime": 3.0213, + "eval_xsum-pairs_samples_per_second": 42.366, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_sciq_pairs_loss": 0.12240559607744217, + "eval_sciq_pairs_runtime": 3.4526, + "eval_sciq_pairs_samples_per_second": 37.074, + "eval_sciq_pairs_steps_per_second": 0.29, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_qasc_pairs_loss": 0.5442161560058594, + "eval_qasc_pairs_runtime": 0.609, + "eval_qasc_pairs_samples_per_second": 210.184, + "eval_qasc_pairs_steps_per_second": 1.642, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_openbookqa_pairs_loss": 1.1632599830627441, + "eval_openbookqa_pairs_runtime": 0.5933, + "eval_openbookqa_pairs_samples_per_second": 215.749, + "eval_openbookqa_pairs_steps_per_second": 1.686, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_msmarco_pairs_loss": 1.1908891201019287, + "eval_msmarco_pairs_runtime": 1.5422, + "eval_msmarco_pairs_samples_per_second": 83.0, + "eval_msmarco_pairs_steps_per_second": 0.648, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_nq_pairs_loss": 1.4470250606536865, + "eval_nq_pairs_runtime": 2.8983, + "eval_nq_pairs_samples_per_second": 44.165, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_trivia_pairs_loss": 1.1257771253585815, + "eval_trivia_pairs_runtime": 3.4458, + "eval_trivia_pairs_samples_per_second": 37.147, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_gooaq_pairs_loss": 0.6294673085212708, + "eval_gooaq_pairs_runtime": 0.9529, + "eval_gooaq_pairs_samples_per_second": 134.331, + "eval_gooaq_pairs_steps_per_second": 1.049, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_paws-pos_loss": 0.029657872393727303, + "eval_paws-pos_runtime": 0.6916, + "eval_paws-pos_samples_per_second": 185.091, + "eval_paws-pos_steps_per_second": 1.446, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_global_dataset_loss": 0.574967622756958, + "eval_global_dataset_runtime": 13.3853, + "eval_global_dataset_samples_per_second": 31.079, + "eval_global_dataset_steps_per_second": 0.299, + "step": 740 + }, + { + "epoch": 0.7623456790123457, + "grad_norm": 8.358269691467285, + "learning_rate": 2.6822429906542052e-05, + "loss": 0.5052, + "step": 741 + }, + { + "epoch": 0.7633744855967078, + "grad_norm": 3.5208804607391357, + "learning_rate": 2.685877466251298e-05, + "loss": 0.101, + "step": 742 + }, + { + "epoch": 0.76440329218107, + "grad_norm": 14.886555671691895, + "learning_rate": 2.6895119418483903e-05, + "loss": 1.6467, + "step": 743 + }, + { + "epoch": 0.7654320987654321, + "grad_norm": 10.37888240814209, + "learning_rate": 2.6931464174454824e-05, + "loss": 0.7924, + "step": 744 + }, + { + "epoch": 0.7664609053497943, + "grad_norm": 14.076517105102539, + "learning_rate": 2.696780893042575e-05, + "loss": 1.6842, + "step": 745 + }, + { + "epoch": 0.7674897119341564, + "grad_norm": 16.620922088623047, + "learning_rate": 2.7004153686396675e-05, + "loss": 2.809, + "step": 746 + }, + { + "epoch": 0.7685185185185185, + "grad_norm": 22.974336624145508, + "learning_rate": 2.70404984423676e-05, + "loss": 1.9317, + "step": 747 + }, + { + "epoch": 0.7695473251028807, + "grad_norm": 7.3669657707214355, + "learning_rate": 2.7076843198338523e-05, + "loss": 0.4177, + "step": 748 + }, + { + "epoch": 0.7705761316872428, + "grad_norm": 10.947649002075195, + "learning_rate": 2.7113187954309447e-05, + "loss": 0.9269, + "step": 749 + }, + { + "epoch": 0.7716049382716049, + "grad_norm": 9.538216590881348, + "learning_rate": 2.714953271028037e-05, + "loss": 0.9832, + "step": 750 + }, + { + "epoch": 0.772633744855967, + "grad_norm": 7.307182312011719, + "learning_rate": 2.71858774662513e-05, + "loss": 0.4875, + "step": 751 + }, + { + "epoch": 0.7736625514403292, + "grad_norm": 3.3512260913848877, + "learning_rate": 2.722222222222222e-05, + "loss": 0.1066, + "step": 752 + }, + { + "epoch": 0.7746913580246914, + "grad_norm": 8.798376083374023, + "learning_rate": 2.7258566978193143e-05, + "loss": 0.4801, + "step": 753 + }, + { + "epoch": 0.7757201646090535, + "grad_norm": 9.195924758911133, + "learning_rate": 2.729491173416407e-05, + "loss": 0.4494, + "step": 754 + }, + { + "epoch": 0.7767489711934157, + "grad_norm": 6.361667156219482, + "learning_rate": 2.733125649013499e-05, + "loss": 0.254, + "step": 755 + }, + { + "epoch": 0.7777777777777778, + "grad_norm": 11.094511985778809, + "learning_rate": 2.7367601246105916e-05, + "loss": 0.5735, + "step": 756 + }, + { + "epoch": 0.7788065843621399, + "grad_norm": 13.668522834777832, + "learning_rate": 2.7403946002076843e-05, + "loss": 1.109, + "step": 757 + }, + { + "epoch": 0.779835390946502, + "grad_norm": 9.678313255310059, + "learning_rate": 2.7440290758047767e-05, + "loss": 0.5538, + "step": 758 + }, + { + "epoch": 0.7808641975308642, + "grad_norm": 18.492931365966797, + "learning_rate": 2.7476635514018688e-05, + "loss": 1.6073, + "step": 759 + }, + { + "epoch": 0.7818930041152263, + "grad_norm": 20.688257217407227, + "learning_rate": 2.7512980269989615e-05, + "loss": 3.0436, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_Qnli-dev_cosine_accuracy": 0.703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7550844550132751, + "eval_Qnli-dev_cosine_ap": 0.7364566550443425, + "eval_Qnli-dev_cosine_f1": 0.7132075471698114, + "eval_Qnli-dev_cosine_f1_threshold": 0.7550844550132751, + "eval_Qnli-dev_cosine_precision": 0.6428571428571429, + "eval_Qnli-dev_cosine_recall": 0.8008474576271186, + "eval_Qnli-dev_dot_accuracy": 0.677734375, + "eval_Qnli-dev_dot_accuracy_threshold": 369.6612854003906, + "eval_Qnli-dev_dot_ap": 0.6549287118943474, + "eval_Qnli-dev_dot_f1": 0.6805555555555555, + "eval_Qnli-dev_dot_f1_threshold": 328.00164794921875, + "eval_Qnli-dev_dot_precision": 0.5764705882352941, + "eval_Qnli-dev_dot_recall": 0.8305084745762712, + "eval_Qnli-dev_euclidean_accuracy": 0.705078125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.8548583984375, + "eval_Qnli-dev_euclidean_ap": 0.7442510510869947, + "eval_Qnli-dev_euclidean_f1": 0.7076923076923077, + "eval_Qnli-dev_euclidean_f1_threshold": 16.119770050048828, + "eval_Qnli-dev_euclidean_precision": 0.5931232091690545, + "eval_Qnli-dev_euclidean_recall": 0.8771186440677966, + "eval_Qnli-dev_manhattan_accuracy": 0.701171875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 297.2847595214844, + "eval_Qnli-dev_manhattan_ap": 0.7470615407792083, + "eval_Qnli-dev_manhattan_f1": 0.7087198515769945, + "eval_Qnli-dev_manhattan_f1_threshold": 312.7979431152344, + "eval_Qnli-dev_manhattan_precision": 0.6303630363036303, + "eval_Qnli-dev_manhattan_recall": 0.809322033898305, + "eval_Qnli-dev_max_accuracy": 0.705078125, + "eval_Qnli-dev_max_accuracy_threshold": 369.6612854003906, + "eval_Qnli-dev_max_ap": 0.7470615407792083, + "eval_Qnli-dev_max_f1": 0.7132075471698114, + "eval_Qnli-dev_max_f1_threshold": 328.00164794921875, + "eval_Qnli-dev_max_precision": 0.6428571428571429, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.72265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8610843420028687, + "eval_allNLI-dev_cosine_ap": 0.5881774055043343, + "eval_allNLI-dev_cosine_f1": 0.5910064239828695, + "eval_allNLI-dev_cosine_f1_threshold": 0.7733876705169678, + "eval_allNLI-dev_cosine_precision": 0.46938775510204084, + "eval_allNLI-dev_cosine_recall": 0.7976878612716763, + "eval_allNLI-dev_dot_accuracy": 0.681640625, + "eval_allNLI-dev_dot_accuracy_threshold": 442.53680419921875, + "eval_allNLI-dev_dot_ap": 0.5120414811620706, + "eval_allNLI-dev_dot_f1": 0.5700934579439252, + "eval_allNLI-dev_dot_f1_threshold": 351.6019592285156, + "eval_allNLI-dev_dot_precision": 0.47843137254901963, + "eval_allNLI-dev_dot_recall": 0.7052023121387283, + "eval_allNLI-dev_euclidean_accuracy": 0.732421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.514341354370117, + "eval_allNLI-dev_euclidean_ap": 0.5926528852791054, + "eval_allNLI-dev_euclidean_f1": 0.596, + "eval_allNLI-dev_euclidean_f1_threshold": 14.976218223571777, + "eval_allNLI-dev_euclidean_precision": 0.45565749235474007, + "eval_allNLI-dev_euclidean_recall": 0.861271676300578, + "eval_allNLI-dev_manhattan_accuracy": 0.71875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 222.68905639648438, + "eval_allNLI-dev_manhattan_ap": 0.5863936149481368, + "eval_allNLI-dev_manhattan_f1": 0.5978947368421053, + "eval_allNLI-dev_manhattan_f1_threshold": 297.7838134765625, + "eval_allNLI-dev_manhattan_precision": 0.47019867549668876, + "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, + "eval_allNLI-dev_max_accuracy": 0.732421875, + "eval_allNLI-dev_max_accuracy_threshold": 442.53680419921875, + "eval_allNLI-dev_max_ap": 0.5926528852791054, + "eval_allNLI-dev_max_f1": 0.5978947368421053, + "eval_allNLI-dev_max_f1_threshold": 351.6019592285156, + "eval_allNLI-dev_max_precision": 0.47843137254901963, + "eval_allNLI-dev_max_recall": 0.861271676300578, + "eval_sequential_score": 0.7470615407792083, + "eval_sts-test_pearson_cosine": 0.8068314455509153, + "eval_sts-test_pearson_dot": 0.772425389013349, + "eval_sts-test_pearson_euclidean": 0.8289777456195899, + "eval_sts-test_pearson_manhattan": 0.8263159059644403, + "eval_sts-test_pearson_max": 0.8289777456195899, + "eval_sts-test_spearman_cosine": 0.8266359474083009, + "eval_sts-test_spearman_dot": 0.7547315896601016, + "eval_sts-test_spearman_euclidean": 0.8200646274343266, + "eval_sts-test_spearman_manhattan": 0.8175935970340776, + "eval_sts-test_spearman_max": 0.8266359474083009, + "eval_vitaminc-pairs_loss": 2.7475264072418213, + "eval_vitaminc-pairs_runtime": 3.1935, + "eval_vitaminc-pairs_samples_per_second": 40.081, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_negation-triplets_loss": 1.0812993049621582, + "eval_negation-triplets_runtime": 0.744, + "eval_negation-triplets_samples_per_second": 172.045, + "eval_negation-triplets_steps_per_second": 1.344, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_scitail-pairs-pos_loss": 0.1583121418952942, + "eval_scitail-pairs-pos_runtime": 0.8387, + "eval_scitail-pairs-pos_samples_per_second": 152.624, + "eval_scitail-pairs-pos_steps_per_second": 1.192, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_scitail-pairs-qa_loss": 0.0030275785829871893, + "eval_scitail-pairs-qa_runtime": 0.5811, + "eval_scitail-pairs-qa_samples_per_second": 220.288, + "eval_scitail-pairs-qa_steps_per_second": 1.721, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_xsum-pairs_loss": 0.6426714658737183, + "eval_xsum-pairs_runtime": 3.0216, + "eval_xsum-pairs_samples_per_second": 42.361, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_sciq_pairs_loss": 0.12087687849998474, + "eval_sciq_pairs_runtime": 3.4733, + "eval_sciq_pairs_samples_per_second": 36.852, + "eval_sciq_pairs_steps_per_second": 0.288, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_qasc_pairs_loss": 0.5539246201515198, + "eval_qasc_pairs_runtime": 0.6065, + "eval_qasc_pairs_samples_per_second": 211.043, + "eval_qasc_pairs_steps_per_second": 1.649, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_openbookqa_pairs_loss": 1.1023366451263428, + "eval_openbookqa_pairs_runtime": 0.5847, + "eval_openbookqa_pairs_samples_per_second": 218.917, + "eval_openbookqa_pairs_steps_per_second": 1.71, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_msmarco_pairs_loss": 1.2618669271469116, + "eval_msmarco_pairs_runtime": 1.5194, + "eval_msmarco_pairs_samples_per_second": 84.242, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_nq_pairs_loss": 1.4234434366226196, + "eval_nq_pairs_runtime": 2.9033, + "eval_nq_pairs_samples_per_second": 44.088, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_trivia_pairs_loss": 1.1620062589645386, + "eval_trivia_pairs_runtime": 3.4422, + "eval_trivia_pairs_samples_per_second": 37.185, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_gooaq_pairs_loss": 0.622553825378418, + "eval_gooaq_pairs_runtime": 0.9454, + "eval_gooaq_pairs_samples_per_second": 135.393, + "eval_gooaq_pairs_steps_per_second": 1.058, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_paws-pos_loss": 0.029666246846318245, + "eval_paws-pos_runtime": 0.6886, + "eval_paws-pos_samples_per_second": 185.877, + "eval_paws-pos_steps_per_second": 1.452, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_global_dataset_loss": 0.599385678768158, + "eval_global_dataset_runtime": 13.3915, + "eval_global_dataset_samples_per_second": 31.064, + "eval_global_dataset_steps_per_second": 0.299, + "step": 760 + }, + { + "epoch": 0.7829218106995884, + "grad_norm": 13.270237922668457, + "learning_rate": 2.754932502596054e-05, + "loss": 0.9703, + "step": 761 + }, + { + "epoch": 0.7839506172839507, + "grad_norm": 10.215360641479492, + "learning_rate": 2.758566978193146e-05, + "loss": 1.0167, + "step": 762 + }, + { + "epoch": 0.7849794238683128, + "grad_norm": 14.256012916564941, + "learning_rate": 2.7622014537902387e-05, + "loss": 1.8575, + "step": 763 + }, + { + "epoch": 0.7860082304526749, + "grad_norm": 14.234450340270996, + "learning_rate": 2.765835929387331e-05, + "loss": 1.48, + "step": 764 + }, + { + "epoch": 0.7870370370370371, + "grad_norm": 15.287798881530762, + "learning_rate": 2.7694704049844235e-05, + "loss": 1.5257, + "step": 765 + }, + { + "epoch": 0.7880658436213992, + "grad_norm": 12.686257362365723, + "learning_rate": 2.773104880581516e-05, + "loss": 1.2119, + "step": 766 + }, + { + "epoch": 0.7890946502057613, + "grad_norm": 11.21288013458252, + "learning_rate": 2.7767393561786083e-05, + "loss": 1.0656, + "step": 767 + }, + { + "epoch": 0.7901234567901234, + "grad_norm": 9.147239685058594, + "learning_rate": 2.7803738317757007e-05, + "loss": 0.5485, + "step": 768 + }, + { + "epoch": 0.7911522633744856, + "grad_norm": 8.927838325500488, + "learning_rate": 2.7840083073727935e-05, + "loss": 0.6264, + "step": 769 + }, + { + "epoch": 0.7921810699588477, + "grad_norm": 12.626420974731445, + "learning_rate": 2.7876427829698855e-05, + "loss": 1.0876, + "step": 770 + }, + { + "epoch": 0.7932098765432098, + "grad_norm": 8.545890808105469, + "learning_rate": 2.791277258566978e-05, + "loss": 0.5902, + "step": 771 + }, + { + "epoch": 0.7942386831275721, + "grad_norm": 12.124262809753418, + "learning_rate": 2.7949117341640707e-05, + "loss": 0.9689, + "step": 772 + }, + { + "epoch": 0.7952674897119342, + "grad_norm": 8.3804292678833, + "learning_rate": 2.7985462097611627e-05, + "loss": 0.5276, + "step": 773 + }, + { + "epoch": 0.7962962962962963, + "grad_norm": 12.29673957824707, + "learning_rate": 2.802180685358255e-05, + "loss": 1.2571, + "step": 774 + }, + { + "epoch": 0.7973251028806584, + "grad_norm": 6.740438938140869, + "learning_rate": 2.805815160955348e-05, + "loss": 0.3492, + "step": 775 + }, + { + "epoch": 0.7983539094650206, + "grad_norm": 13.983535766601562, + "learning_rate": 2.80944963655244e-05, + "loss": 1.4877, + "step": 776 + }, + { + "epoch": 0.7993827160493827, + "grad_norm": 10.374014854431152, + "learning_rate": 2.8130841121495323e-05, + "loss": 1.2044, + "step": 777 + }, + { + "epoch": 0.8004115226337448, + "grad_norm": 14.681657791137695, + "learning_rate": 2.816718587746625e-05, + "loss": 1.2838, + "step": 778 + }, + { + "epoch": 0.801440329218107, + "grad_norm": 8.073484420776367, + "learning_rate": 2.8203530633437175e-05, + "loss": 0.4491, + "step": 779 + }, + { + "epoch": 0.8024691358024691, + "grad_norm": 14.766283988952637, + "learning_rate": 2.8239875389408095e-05, + "loss": 1.5724, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_Qnli-dev_cosine_accuracy": 0.708984375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8231204152107239, + "eval_Qnli-dev_cosine_ap": 0.7391395822952389, + "eval_Qnli-dev_cosine_f1": 0.7054545454545453, + "eval_Qnli-dev_cosine_f1_threshold": 0.7827090620994568, + "eval_Qnli-dev_cosine_precision": 0.6178343949044586, + "eval_Qnli-dev_cosine_recall": 0.8220338983050848, + "eval_Qnli-dev_dot_accuracy": 0.6640625, + "eval_Qnli-dev_dot_accuracy_threshold": 410.32037353515625, + "eval_Qnli-dev_dot_ap": 0.6504819630539224, + "eval_Qnli-dev_dot_f1": 0.6780238500851788, + "eval_Qnli-dev_dot_f1_threshold": 381.0080871582031, + "eval_Qnli-dev_dot_precision": 0.5669515669515669, + "eval_Qnli-dev_dot_recall": 0.8432203389830508, + "eval_Qnli-dev_euclidean_accuracy": 0.70703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.04183578491211, + "eval_Qnli-dev_euclidean_ap": 0.7438731947506383, + "eval_Qnli-dev_euclidean_f1": 0.7050847457627119, + "eval_Qnli-dev_euclidean_f1_threshold": 15.714797019958496, + "eval_Qnli-dev_euclidean_precision": 0.5875706214689266, + "eval_Qnli-dev_euclidean_recall": 0.8813559322033898, + "eval_Qnli-dev_manhattan_accuracy": 0.701171875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 290.35009765625, + "eval_Qnli-dev_manhattan_ap": 0.7446632934882194, + "eval_Qnli-dev_manhattan_f1": 0.7015503875968992, + "eval_Qnli-dev_manhattan_f1_threshold": 293.35986328125, + "eval_Qnli-dev_manhattan_precision": 0.6464285714285715, + "eval_Qnli-dev_manhattan_recall": 0.7669491525423728, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 410.32037353515625, + "eval_Qnli-dev_max_ap": 0.7446632934882194, + "eval_Qnli-dev_max_f1": 0.7054545454545453, + "eval_Qnli-dev_max_f1_threshold": 381.0080871582031, + "eval_Qnli-dev_max_precision": 0.6464285714285715, + "eval_Qnli-dev_max_recall": 0.8813559322033898, + "eval_allNLI-dev_cosine_accuracy": 0.7265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8960142135620117, + "eval_allNLI-dev_cosine_ap": 0.5873660686997946, + "eval_allNLI-dev_cosine_f1": 0.6017316017316017, + "eval_allNLI-dev_cosine_f1_threshold": 0.8052390813827515, + "eval_allNLI-dev_cosine_precision": 0.4809688581314879, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 469.1497497558594, + "eval_allNLI-dev_dot_ap": 0.5278322808998677, + "eval_allNLI-dev_dot_f1": 0.5864978902953586, + "eval_allNLI-dev_dot_f1_threshold": 398.7422790527344, + "eval_allNLI-dev_dot_precision": 0.46179401993355484, + "eval_allNLI-dev_dot_recall": 0.8034682080924855, + "eval_allNLI-dev_euclidean_accuracy": 0.724609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.34986400604248, + "eval_allNLI-dev_euclidean_ap": 0.5891503709712753, + "eval_allNLI-dev_euclidean_f1": 0.6061855670103092, + "eval_allNLI-dev_euclidean_f1_threshold": 14.381561279296875, + "eval_allNLI-dev_euclidean_precision": 0.47115384615384615, + "eval_allNLI-dev_euclidean_recall": 0.8497109826589595, + "eval_allNLI-dev_manhattan_accuracy": 0.72265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 202.1298828125, + "eval_allNLI-dev_manhattan_ap": 0.5845629157897831, + "eval_allNLI-dev_manhattan_f1": 0.6017316017316017, + "eval_allNLI-dev_manhattan_f1_threshold": 290.66619873046875, + "eval_allNLI-dev_manhattan_precision": 0.4809688581314879, + "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, + "eval_allNLI-dev_max_accuracy": 0.7265625, + "eval_allNLI-dev_max_accuracy_threshold": 469.1497497558594, + "eval_allNLI-dev_max_ap": 0.5891503709712753, + "eval_allNLI-dev_max_f1": 0.6061855670103092, + "eval_allNLI-dev_max_f1_threshold": 398.7422790527344, + "eval_allNLI-dev_max_precision": 0.4809688581314879, + "eval_allNLI-dev_max_recall": 0.8497109826589595, + "eval_sequential_score": 0.7446632934882194, + "eval_sts-test_pearson_cosine": 0.8117461676559997, + "eval_sts-test_pearson_dot": 0.7886477825036372, + "eval_sts-test_pearson_euclidean": 0.8403273274655056, + "eval_sts-test_pearson_manhattan": 0.8379940892338228, + "eval_sts-test_pearson_max": 0.8403273274655056, + "eval_sts-test_spearman_cosine": 0.8390338816154358, + "eval_sts-test_spearman_dot": 0.7805059891559553, + "eval_sts-test_spearman_euclidean": 0.8335163643447059, + "eval_sts-test_spearman_manhattan": 0.8315699082304869, + "eval_sts-test_spearman_max": 0.8390338816154358, + "eval_vitaminc-pairs_loss": 2.659418821334839, + "eval_vitaminc-pairs_runtime": 3.2064, + "eval_vitaminc-pairs_samples_per_second": 39.92, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_negation-triplets_loss": 1.0590914487838745, + "eval_negation-triplets_runtime": 0.7504, + "eval_negation-triplets_samples_per_second": 170.58, + "eval_negation-triplets_steps_per_second": 1.333, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_scitail-pairs-pos_loss": 0.1532289683818817, + "eval_scitail-pairs-pos_runtime": 0.8417, + "eval_scitail-pairs-pos_samples_per_second": 152.081, + "eval_scitail-pairs-pos_steps_per_second": 1.188, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_scitail-pairs-qa_loss": 0.0047495742328464985, + "eval_scitail-pairs-qa_runtime": 0.6048, + "eval_scitail-pairs-qa_samples_per_second": 211.629, + "eval_scitail-pairs-qa_steps_per_second": 1.653, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_xsum-pairs_loss": 0.6486428380012512, + "eval_xsum-pairs_runtime": 3.0654, + "eval_xsum-pairs_samples_per_second": 41.757, + "eval_xsum-pairs_steps_per_second": 0.326, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_sciq_pairs_loss": 0.11410364508628845, + "eval_sciq_pairs_runtime": 3.4977, + "eval_sciq_pairs_samples_per_second": 36.596, + "eval_sciq_pairs_steps_per_second": 0.286, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_qasc_pairs_loss": 0.5198172330856323, + "eval_qasc_pairs_runtime": 0.6108, + "eval_qasc_pairs_samples_per_second": 209.558, + "eval_qasc_pairs_steps_per_second": 1.637, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_openbookqa_pairs_loss": 1.029971957206726, + "eval_openbookqa_pairs_runtime": 0.5904, + "eval_openbookqa_pairs_samples_per_second": 216.82, + "eval_openbookqa_pairs_steps_per_second": 1.694, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_msmarco_pairs_loss": 1.185034990310669, + "eval_msmarco_pairs_runtime": 1.5164, + "eval_msmarco_pairs_samples_per_second": 84.41, + "eval_msmarco_pairs_steps_per_second": 0.659, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_nq_pairs_loss": 1.3883589506149292, + "eval_nq_pairs_runtime": 2.9107, + "eval_nq_pairs_samples_per_second": 43.976, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_trivia_pairs_loss": 1.0724446773529053, + "eval_trivia_pairs_runtime": 3.4605, + "eval_trivia_pairs_samples_per_second": 36.989, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_gooaq_pairs_loss": 0.5710882544517517, + "eval_gooaq_pairs_runtime": 0.9518, + "eval_gooaq_pairs_samples_per_second": 134.481, + "eval_gooaq_pairs_steps_per_second": 1.051, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_paws-pos_loss": 0.031913165003061295, + "eval_paws-pos_runtime": 0.6967, + "eval_paws-pos_samples_per_second": 183.728, + "eval_paws-pos_steps_per_second": 1.435, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_global_dataset_loss": 0.5888291597366333, + "eval_global_dataset_runtime": 13.3816, + "eval_global_dataset_samples_per_second": 31.088, + "eval_global_dataset_steps_per_second": 0.299, + "step": 780 + }, + { + "epoch": 0.8034979423868313, + "grad_norm": 12.456541061401367, + "learning_rate": 2.8276220145379023e-05, + "loss": 1.4439, + "step": 781 + }, + { + "epoch": 0.8045267489711934, + "grad_norm": 1.0020017623901367, + "learning_rate": 2.8312564901349947e-05, + "loss": 0.0386, + "step": 782 + }, + { + "epoch": 0.8055555555555556, + "grad_norm": 9.883146286010742, + "learning_rate": 2.8348909657320867e-05, + "loss": 0.5761, + "step": 783 + }, + { + "epoch": 0.8065843621399177, + "grad_norm": 6.427492141723633, + "learning_rate": 2.8385254413291795e-05, + "loss": 0.2763, + "step": 784 + }, + { + "epoch": 0.8076131687242798, + "grad_norm": 18.895719528198242, + "learning_rate": 2.842159916926272e-05, + "loss": 1.7799, + "step": 785 + }, + { + "epoch": 0.808641975308642, + "grad_norm": 13.417122840881348, + "learning_rate": 2.8457943925233643e-05, + "loss": 1.6238, + "step": 786 + }, + { + "epoch": 0.8096707818930041, + "grad_norm": 14.21626091003418, + "learning_rate": 2.8494288681204567e-05, + "loss": 1.2406, + "step": 787 + }, + { + "epoch": 0.8106995884773662, + "grad_norm": 10.437925338745117, + "learning_rate": 2.853063343717549e-05, + "loss": 1.2021, + "step": 788 + }, + { + "epoch": 0.8117283950617284, + "grad_norm": 9.257889747619629, + "learning_rate": 2.8566978193146415e-05, + "loss": 0.5357, + "step": 789 + }, + { + "epoch": 0.8127572016460906, + "grad_norm": 7.7459940910339355, + "learning_rate": 2.8603322949117342e-05, + "loss": 0.3974, + "step": 790 + }, + { + "epoch": 0.8137860082304527, + "grad_norm": 9.664865493774414, + "learning_rate": 2.8639667705088263e-05, + "loss": 0.681, + "step": 791 + }, + { + "epoch": 0.8148148148148148, + "grad_norm": 1.9434237480163574, + "learning_rate": 2.8676012461059187e-05, + "loss": 0.0454, + "step": 792 + }, + { + "epoch": 0.815843621399177, + "grad_norm": 11.688820838928223, + "learning_rate": 2.8712357217030114e-05, + "loss": 0.8601, + "step": 793 + }, + { + "epoch": 0.8168724279835391, + "grad_norm": 8.6393461227417, + "learning_rate": 2.8748701973001035e-05, + "loss": 0.5149, + "step": 794 + }, + { + "epoch": 0.8179012345679012, + "grad_norm": 3.408317804336548, + "learning_rate": 2.878504672897196e-05, + "loss": 0.1049, + "step": 795 + }, + { + "epoch": 0.8189300411522634, + "grad_norm": 2.3510513305664062, + "learning_rate": 2.8821391484942886e-05, + "loss": 0.0591, + "step": 796 + }, + { + "epoch": 0.8199588477366255, + "grad_norm": 19.143835067749023, + "learning_rate": 2.885773624091381e-05, + "loss": 1.7556, + "step": 797 + }, + { + "epoch": 0.8209876543209876, + "grad_norm": 2.234999895095825, + "learning_rate": 2.889408099688473e-05, + "loss": 0.0651, + "step": 798 + }, + { + "epoch": 0.8220164609053497, + "grad_norm": 7.49348783493042, + "learning_rate": 2.893042575285566e-05, + "loss": 0.3813, + "step": 799 + }, + { + "epoch": 0.823045267489712, + "grad_norm": 8.669596672058105, + "learning_rate": 2.8966770508826583e-05, + "loss": 0.4154, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_Qnli-dev_cosine_accuracy": 0.701171875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7969362139701843, + "eval_Qnli-dev_cosine_ap": 0.7404462233066612, + "eval_Qnli-dev_cosine_f1": 0.6980802792321116, + "eval_Qnli-dev_cosine_f1_threshold": 0.7222884893417358, + "eval_Qnli-dev_cosine_precision": 0.5934718100890207, + "eval_Qnli-dev_cosine_recall": 0.847457627118644, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 375.7672119140625, + "eval_Qnli-dev_dot_ap": 0.6479436940211677, + "eval_Qnli-dev_dot_f1": 0.6717325227963526, + "eval_Qnli-dev_dot_f1_threshold": 306.73577880859375, + "eval_Qnli-dev_dot_precision": 0.523696682464455, + "eval_Qnli-dev_dot_recall": 0.9364406779661016, + "eval_Qnli-dev_euclidean_accuracy": 0.712890625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.082931518554688, + "eval_Qnli-dev_euclidean_ap": 0.7512343790385024, + "eval_Qnli-dev_euclidean_f1": 0.7015706806282722, + "eval_Qnli-dev_euclidean_f1_threshold": 16.305587768554688, + "eval_Qnli-dev_euclidean_precision": 0.5964391691394659, + "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, + "eval_Qnli-dev_manhattan_accuracy": 0.70703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 294.2919006347656, + "eval_Qnli-dev_manhattan_ap": 0.7497633403333601, + "eval_Qnli-dev_manhattan_f1": 0.7025089605734768, + "eval_Qnli-dev_manhattan_f1_threshold": 333.9628601074219, + "eval_Qnli-dev_manhattan_precision": 0.6086956521739131, + "eval_Qnli-dev_manhattan_recall": 0.8305084745762712, + "eval_Qnli-dev_max_accuracy": 0.712890625, + "eval_Qnli-dev_max_accuracy_threshold": 375.7672119140625, + "eval_Qnli-dev_max_ap": 0.7512343790385024, + "eval_Qnli-dev_max_f1": 0.7025089605734768, + "eval_Qnli-dev_max_f1_threshold": 333.9628601074219, + "eval_Qnli-dev_max_precision": 0.6086956521739131, + "eval_Qnli-dev_max_recall": 0.9364406779661016, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8626278638839722, + "eval_allNLI-dev_cosine_ap": 0.5970787093998088, + "eval_allNLI-dev_cosine_f1": 0.6073752711496746, + "eval_allNLI-dev_cosine_f1_threshold": 0.7643657326698303, + "eval_allNLI-dev_cosine_precision": 0.4861111111111111, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.69921875, + "eval_allNLI-dev_dot_accuracy_threshold": 427.77294921875, + "eval_allNLI-dev_dot_ap": 0.5307372759630803, + "eval_allNLI-dev_dot_f1": 0.5900900900900902, + "eval_allNLI-dev_dot_f1_threshold": 365.810302734375, + "eval_allNLI-dev_dot_precision": 0.4833948339483395, + "eval_allNLI-dev_dot_recall": 0.7572254335260116, + "eval_allNLI-dev_euclidean_accuracy": 0.732421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.048674583435059, + "eval_allNLI-dev_euclidean_ap": 0.6004011776083211, + "eval_allNLI-dev_euclidean_f1": 0.6140724946695097, + "eval_allNLI-dev_euclidean_f1_threshold": 15.054520606994629, + "eval_allNLI-dev_euclidean_precision": 0.4864864864864865, + "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, + "eval_allNLI-dev_manhattan_accuracy": 0.7265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 219.22201538085938, + "eval_allNLI-dev_manhattan_ap": 0.5983608065576813, + "eval_allNLI-dev_manhattan_f1": 0.6170678336980306, + "eval_allNLI-dev_manhattan_f1_threshold": 307.1187744140625, + "eval_allNLI-dev_manhattan_precision": 0.4964788732394366, + "eval_allNLI-dev_manhattan_recall": 0.815028901734104, + "eval_allNLI-dev_max_accuracy": 0.732421875, + "eval_allNLI-dev_max_accuracy_threshold": 427.77294921875, + "eval_allNLI-dev_max_ap": 0.6004011776083211, + "eval_allNLI-dev_max_f1": 0.6170678336980306, + "eval_allNLI-dev_max_f1_threshold": 365.810302734375, + "eval_allNLI-dev_max_precision": 0.4964788732394366, + "eval_allNLI-dev_max_recall": 0.8323699421965318, + "eval_sequential_score": 0.7512343790385024, + "eval_sts-test_pearson_cosine": 0.8198131802310901, + "eval_sts-test_pearson_dot": 0.8060682717309235, + "eval_sts-test_pearson_euclidean": 0.8431733382027362, + "eval_sts-test_pearson_manhattan": 0.8415106486610984, + "eval_sts-test_pearson_max": 0.8431733382027362, + "eval_sts-test_spearman_cosine": 0.8405863476243647, + "eval_sts-test_spearman_dot": 0.7927496657650738, + "eval_sts-test_spearman_euclidean": 0.8335884138755459, + "eval_sts-test_spearman_manhattan": 0.8322680279081929, + "eval_sts-test_spearman_max": 0.8405863476243647, + "eval_vitaminc-pairs_loss": 2.8224048614501953, + "eval_vitaminc-pairs_runtime": 3.191, + "eval_vitaminc-pairs_samples_per_second": 40.112, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_negation-triplets_loss": 1.0728732347488403, + "eval_negation-triplets_runtime": 0.7461, + "eval_negation-triplets_samples_per_second": 171.567, + "eval_negation-triplets_steps_per_second": 1.34, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_scitail-pairs-pos_loss": 0.16487395763397217, + "eval_scitail-pairs-pos_runtime": 0.8425, + "eval_scitail-pairs-pos_samples_per_second": 151.933, + "eval_scitail-pairs-pos_steps_per_second": 1.187, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_scitail-pairs-qa_loss": 0.002279088133946061, + "eval_scitail-pairs-qa_runtime": 0.5921, + "eval_scitail-pairs-qa_samples_per_second": 216.187, + "eval_scitail-pairs-qa_steps_per_second": 1.689, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_xsum-pairs_loss": 0.6205843687057495, + "eval_xsum-pairs_runtime": 3.0236, + "eval_xsum-pairs_samples_per_second": 42.333, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_sciq_pairs_loss": 0.13013440370559692, + "eval_sciq_pairs_runtime": 3.4722, + "eval_sciq_pairs_samples_per_second": 36.864, + "eval_sciq_pairs_steps_per_second": 0.288, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_qasc_pairs_loss": 0.45994842052459717, + "eval_qasc_pairs_runtime": 0.6086, + "eval_qasc_pairs_samples_per_second": 210.312, + "eval_qasc_pairs_steps_per_second": 1.643, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_openbookqa_pairs_loss": 1.1286119222640991, + "eval_openbookqa_pairs_runtime": 0.5867, + "eval_openbookqa_pairs_samples_per_second": 218.188, + "eval_openbookqa_pairs_steps_per_second": 1.705, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_msmarco_pairs_loss": 1.2176563739776611, + "eval_msmarco_pairs_runtime": 1.5198, + "eval_msmarco_pairs_samples_per_second": 84.221, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_nq_pairs_loss": 1.3381102085113525, + "eval_nq_pairs_runtime": 2.9052, + "eval_nq_pairs_samples_per_second": 44.059, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_trivia_pairs_loss": 1.1009663343429565, + "eval_trivia_pairs_runtime": 3.4646, + "eval_trivia_pairs_samples_per_second": 36.946, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_gooaq_pairs_loss": 0.6627429723739624, + "eval_gooaq_pairs_runtime": 0.9514, + "eval_gooaq_pairs_samples_per_second": 134.542, + "eval_gooaq_pairs_steps_per_second": 1.051, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_paws-pos_loss": 0.03013201802968979, + "eval_paws-pos_runtime": 0.6964, + "eval_paws-pos_samples_per_second": 183.798, + "eval_paws-pos_steps_per_second": 1.436, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_global_dataset_loss": 0.5941927433013916, + "eval_global_dataset_runtime": 13.3686, + "eval_global_dataset_samples_per_second": 31.118, + "eval_global_dataset_steps_per_second": 0.299, + "step": 800 + }, + { + "epoch": 0.8240740740740741, + "grad_norm": 14.879446029663086, + "learning_rate": 2.9003115264797503e-05, + "loss": 1.0372, + "step": 801 + }, + { + "epoch": 0.8251028806584362, + "grad_norm": 9.06614875793457, + "learning_rate": 2.903946002076843e-05, + "loss": 0.4029, + "step": 802 + }, + { + "epoch": 0.8261316872427984, + "grad_norm": 8.139265060424805, + "learning_rate": 2.9075804776739355e-05, + "loss": 0.376, + "step": 803 + }, + { + "epoch": 0.8271604938271605, + "grad_norm": 13.07675838470459, + "learning_rate": 2.911214953271028e-05, + "loss": 1.4204, + "step": 804 + }, + { + "epoch": 0.8281893004115226, + "grad_norm": 13.634737968444824, + "learning_rate": 2.9148494288681203e-05, + "loss": 0.1015, + "step": 805 + }, + { + "epoch": 0.8292181069958847, + "grad_norm": 9.257582664489746, + "learning_rate": 2.9184839044652127e-05, + "loss": 0.7088, + "step": 806 + }, + { + "epoch": 0.8302469135802469, + "grad_norm": 11.305009841918945, + "learning_rate": 2.922118380062305e-05, + "loss": 0.8444, + "step": 807 + }, + { + "epoch": 0.831275720164609, + "grad_norm": 17.285337448120117, + "learning_rate": 2.925752855659397e-05, + "loss": 1.4104, + "step": 808 + }, + { + "epoch": 0.8323045267489712, + "grad_norm": 21.70269012451172, + "learning_rate": 2.92938733125649e-05, + "loss": 3.4062, + "step": 809 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 10.347410202026367, + "learning_rate": 2.9330218068535823e-05, + "loss": 0.632, + "step": 810 + }, + { + "epoch": 0.8343621399176955, + "grad_norm": 12.212241172790527, + "learning_rate": 2.9366562824506747e-05, + "loss": 0.9835, + "step": 811 + }, + { + "epoch": 0.8353909465020576, + "grad_norm": 12.607038497924805, + "learning_rate": 2.940290758047767e-05, + "loss": 1.6676, + "step": 812 + }, + { + "epoch": 0.8364197530864198, + "grad_norm": 13.61136531829834, + "learning_rate": 2.9439252336448595e-05, + "loss": 1.1451, + "step": 813 + }, + { + "epoch": 0.8374485596707819, + "grad_norm": 9.604448318481445, + "learning_rate": 2.947559709241952e-05, + "loss": 0.6491, + "step": 814 + }, + { + "epoch": 0.838477366255144, + "grad_norm": 8.173309326171875, + "learning_rate": 2.9511941848390446e-05, + "loss": 0.4395, + "step": 815 + }, + { + "epoch": 0.8395061728395061, + "grad_norm": 16.461246490478516, + "learning_rate": 2.9548286604361367e-05, + "loss": 1.5228, + "step": 816 + }, + { + "epoch": 0.8405349794238683, + "grad_norm": 12.087141036987305, + "learning_rate": 2.958463136033229e-05, + "loss": 1.1335, + "step": 817 + }, + { + "epoch": 0.8415637860082305, + "grad_norm": 10.887614250183105, + "learning_rate": 2.962097611630322e-05, + "loss": 1.034, + "step": 818 + }, + { + "epoch": 0.8425925925925926, + "grad_norm": 9.939887046813965, + "learning_rate": 2.965732087227414e-05, + "loss": 0.8548, + "step": 819 + }, + { + "epoch": 0.8436213991769548, + "grad_norm": 11.749360084533691, + "learning_rate": 2.9693665628245063e-05, + "loss": 1.0941, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_Qnli-dev_cosine_accuracy": 0.69921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8176131248474121, + "eval_Qnli-dev_cosine_ap": 0.7388220297886944, + "eval_Qnli-dev_cosine_f1": 0.7024029574861368, + "eval_Qnli-dev_cosine_f1_threshold": 0.7822612524032593, + "eval_Qnli-dev_cosine_precision": 0.6229508196721312, + "eval_Qnli-dev_cosine_recall": 0.8050847457627118, + "eval_Qnli-dev_dot_accuracy": 0.658203125, + "eval_Qnli-dev_dot_accuracy_threshold": 418.55694580078125, + "eval_Qnli-dev_dot_ap": 0.6532207993640291, + "eval_Qnli-dev_dot_f1": 0.6723842195540308, + "eval_Qnli-dev_dot_f1_threshold": 375.86895751953125, + "eval_Qnli-dev_dot_precision": 0.5648414985590778, + "eval_Qnli-dev_dot_recall": 0.8305084745762712, + "eval_Qnli-dev_euclidean_accuracy": 0.70703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.418533325195312, + "eval_Qnli-dev_euclidean_ap": 0.7439997782812441, + "eval_Qnli-dev_euclidean_f1": 0.7044673539518899, + "eval_Qnli-dev_euclidean_f1_threshold": 15.451794624328613, + "eval_Qnli-dev_euclidean_precision": 0.5924855491329479, + "eval_Qnli-dev_euclidean_recall": 0.8686440677966102, + "eval_Qnli-dev_manhattan_accuracy": 0.69921875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 281.7642517089844, + "eval_Qnli-dev_manhattan_ap": 0.745379018581688, + "eval_Qnli-dev_manhattan_f1": 0.70223752151463, + "eval_Qnli-dev_manhattan_f1_threshold": 318.57647705078125, + "eval_Qnli-dev_manhattan_precision": 0.591304347826087, + "eval_Qnli-dev_manhattan_recall": 0.864406779661017, + "eval_Qnli-dev_max_accuracy": 0.70703125, + "eval_Qnli-dev_max_accuracy_threshold": 418.55694580078125, + "eval_Qnli-dev_max_ap": 0.745379018581688, + "eval_Qnli-dev_max_f1": 0.7044673539518899, + "eval_Qnli-dev_max_f1_threshold": 375.86895751953125, + "eval_Qnli-dev_max_precision": 0.6229508196721312, + "eval_Qnli-dev_max_recall": 0.8686440677966102, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8881274461746216, + "eval_allNLI-dev_cosine_ap": 0.6029662836602825, + "eval_allNLI-dev_cosine_f1": 0.6047619047619047, + "eval_allNLI-dev_cosine_f1_threshold": 0.8256221413612366, + "eval_allNLI-dev_cosine_precision": 0.5141700404858299, + "eval_allNLI-dev_cosine_recall": 0.7341040462427746, + "eval_allNLI-dev_dot_accuracy": 0.697265625, + "eval_allNLI-dev_dot_accuracy_threshold": 443.00537109375, + "eval_allNLI-dev_dot_ap": 0.5307385620455893, + "eval_allNLI-dev_dot_f1": 0.5751633986928105, + "eval_allNLI-dev_dot_f1_threshold": 395.2938232421875, + "eval_allNLI-dev_dot_precision": 0.46153846153846156, + "eval_allNLI-dev_dot_recall": 0.7630057803468208, + "eval_allNLI-dev_euclidean_accuracy": 0.7265625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.200397491455078, + "eval_allNLI-dev_euclidean_ap": 0.6054028078040996, + "eval_allNLI-dev_euclidean_f1": 0.6042154566744731, + "eval_allNLI-dev_euclidean_f1_threshold": 13.174400329589844, + "eval_allNLI-dev_euclidean_precision": 0.5078740157480315, + "eval_allNLI-dev_euclidean_recall": 0.7456647398843931, + "eval_allNLI-dev_manhattan_accuracy": 0.73046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 204.298828125, + "eval_allNLI-dev_manhattan_ap": 0.6039169640925666, + "eval_allNLI-dev_manhattan_f1": 0.5994694960212201, + "eval_allNLI-dev_manhattan_f1_threshold": 254.79234313964844, + "eval_allNLI-dev_manhattan_precision": 0.553921568627451, + "eval_allNLI-dev_manhattan_recall": 0.653179190751445, + "eval_allNLI-dev_max_accuracy": 0.73046875, + "eval_allNLI-dev_max_accuracy_threshold": 443.00537109375, + "eval_allNLI-dev_max_ap": 0.6054028078040996, + "eval_allNLI-dev_max_f1": 0.6047619047619047, + "eval_allNLI-dev_max_f1_threshold": 395.2938232421875, + "eval_allNLI-dev_max_precision": 0.553921568627451, + "eval_allNLI-dev_max_recall": 0.7630057803468208, + "eval_sequential_score": 0.745379018581688, + "eval_sts-test_pearson_cosine": 0.8219060292244447, + "eval_sts-test_pearson_dot": 0.7914174536034212, + "eval_sts-test_pearson_euclidean": 0.8498595001040936, + "eval_sts-test_pearson_manhattan": 0.8479607961602269, + "eval_sts-test_pearson_max": 0.8498595001040936, + "eval_sts-test_spearman_cosine": 0.8433385949511971, + "eval_sts-test_spearman_dot": 0.7770873060444821, + "eval_sts-test_spearman_euclidean": 0.8418306116960912, + "eval_sts-test_spearman_manhattan": 0.8404455601560273, + "eval_sts-test_spearman_max": 0.8433385949511971, + "eval_vitaminc-pairs_loss": 2.736114501953125, + "eval_vitaminc-pairs_runtime": 3.2033, + "eval_vitaminc-pairs_samples_per_second": 39.958, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_negation-triplets_loss": 1.0854538679122925, + "eval_negation-triplets_runtime": 0.7435, + "eval_negation-triplets_samples_per_second": 172.17, + "eval_negation-triplets_steps_per_second": 1.345, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_scitail-pairs-pos_loss": 0.1739039570093155, + "eval_scitail-pairs-pos_runtime": 0.8433, + "eval_scitail-pairs-pos_samples_per_second": 151.783, + "eval_scitail-pairs-pos_steps_per_second": 1.186, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_scitail-pairs-qa_loss": 0.003931767772883177, + "eval_scitail-pairs-qa_runtime": 0.5977, + "eval_scitail-pairs-qa_samples_per_second": 214.159, + "eval_scitail-pairs-qa_steps_per_second": 1.673, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_xsum-pairs_loss": 0.6559375524520874, + "eval_xsum-pairs_runtime": 3.0363, + "eval_xsum-pairs_samples_per_second": 42.157, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_sciq_pairs_loss": 0.11851135641336441, + "eval_sciq_pairs_runtime": 3.4675, + "eval_sciq_pairs_samples_per_second": 36.914, + "eval_sciq_pairs_steps_per_second": 0.288, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_qasc_pairs_loss": 0.3914608359336853, + "eval_qasc_pairs_runtime": 0.6158, + "eval_qasc_pairs_samples_per_second": 207.872, + "eval_qasc_pairs_steps_per_second": 1.624, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_openbookqa_pairs_loss": 1.059507131576538, + "eval_openbookqa_pairs_runtime": 0.5868, + "eval_openbookqa_pairs_samples_per_second": 218.116, + "eval_openbookqa_pairs_steps_per_second": 1.704, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_msmarco_pairs_loss": 1.0388420820236206, + "eval_msmarco_pairs_runtime": 1.5195, + "eval_msmarco_pairs_samples_per_second": 84.238, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_nq_pairs_loss": 1.4129403829574585, + "eval_nq_pairs_runtime": 2.904, + "eval_nq_pairs_samples_per_second": 44.077, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_trivia_pairs_loss": 1.0265684127807617, + "eval_trivia_pairs_runtime": 3.45, + "eval_trivia_pairs_samples_per_second": 37.101, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_gooaq_pairs_loss": 0.6007567048072815, + "eval_gooaq_pairs_runtime": 0.9464, + "eval_gooaq_pairs_samples_per_second": 135.252, + "eval_gooaq_pairs_steps_per_second": 1.057, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_paws-pos_loss": 0.03185836598277092, + "eval_paws-pos_runtime": 0.6912, + "eval_paws-pos_samples_per_second": 185.193, + "eval_paws-pos_steps_per_second": 1.447, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_global_dataset_loss": 0.5914937257766724, + "eval_global_dataset_runtime": 13.4037, + "eval_global_dataset_samples_per_second": 31.036, + "eval_global_dataset_steps_per_second": 0.298, + "step": 820 + }, + { + "epoch": 0.8446502057613169, + "grad_norm": 10.527436256408691, + "learning_rate": 2.973001038421599e-05, + "loss": 1.3147, + "step": 821 + }, + { + "epoch": 0.845679012345679, + "grad_norm": 13.003664016723633, + "learning_rate": 2.9766355140186914e-05, + "loss": 1.3354, + "step": 822 + }, + { + "epoch": 0.8467078189300411, + "grad_norm": 2.857788324356079, + "learning_rate": 2.9802699896157835e-05, + "loss": 0.0733, + "step": 823 + }, + { + "epoch": 0.8477366255144033, + "grad_norm": 11.483878135681152, + "learning_rate": 2.9839044652128762e-05, + "loss": 0.935, + "step": 824 + }, + { + "epoch": 0.8487654320987654, + "grad_norm": 2.5351336002349854, + "learning_rate": 2.9875389408099686e-05, + "loss": 0.0684, + "step": 825 + }, + { + "epoch": 0.8497942386831275, + "grad_norm": 8.322936058044434, + "learning_rate": 2.9911734164070607e-05, + "loss": 0.6753, + "step": 826 + }, + { + "epoch": 0.8508230452674898, + "grad_norm": 1.9907835721969604, + "learning_rate": 2.9948078920041534e-05, + "loss": 0.0438, + "step": 827 + }, + { + "epoch": 0.8518518518518519, + "grad_norm": 14.835284233093262, + "learning_rate": 2.998442367601246e-05, + "loss": 1.1541, + "step": 828 + }, + { + "epoch": 0.852880658436214, + "grad_norm": 13.292768478393555, + "learning_rate": 3.002076843198338e-05, + "loss": 0.7087, + "step": 829 + }, + { + "epoch": 0.8539094650205762, + "grad_norm": 9.636879920959473, + "learning_rate": 3.0057113187954307e-05, + "loss": 0.5391, + "step": 830 + }, + { + "epoch": 0.8549382716049383, + "grad_norm": 8.648504257202148, + "learning_rate": 3.009345794392523e-05, + "loss": 0.3831, + "step": 831 + }, + { + "epoch": 0.8559670781893004, + "grad_norm": 9.20128345489502, + "learning_rate": 3.0129802699896155e-05, + "loss": 0.4752, + "step": 832 + }, + { + "epoch": 0.8569958847736625, + "grad_norm": 7.908294677734375, + "learning_rate": 3.016614745586708e-05, + "loss": 0.3662, + "step": 833 + }, + { + "epoch": 0.8580246913580247, + "grad_norm": 18.368688583374023, + "learning_rate": 3.0202492211838003e-05, + "loss": 1.6192, + "step": 834 + }, + { + "epoch": 0.8590534979423868, + "grad_norm": 2.4204726219177246, + "learning_rate": 3.0238836967808927e-05, + "loss": 0.0369, + "step": 835 + }, + { + "epoch": 0.8600823045267489, + "grad_norm": 15.491935729980469, + "learning_rate": 3.0275181723779854e-05, + "loss": 1.3151, + "step": 836 + }, + { + "epoch": 0.8611111111111112, + "grad_norm": 7.942100524902344, + "learning_rate": 3.0311526479750775e-05, + "loss": 0.4427, + "step": 837 + }, + { + "epoch": 0.8621399176954733, + "grad_norm": 12.48727035522461, + "learning_rate": 3.03478712357217e-05, + "loss": 0.8185, + "step": 838 + }, + { + "epoch": 0.8631687242798354, + "grad_norm": 9.763201713562012, + "learning_rate": 3.0384215991692626e-05, + "loss": 0.8389, + "step": 839 + }, + { + "epoch": 0.8641975308641975, + "grad_norm": 1.3098586797714233, + "learning_rate": 3.0420560747663547e-05, + "loss": 0.0542, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_Qnli-dev_cosine_accuracy": 0.70703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7717105150222778, + "eval_Qnli-dev_cosine_ap": 0.7440826784027825, + "eval_Qnli-dev_cosine_f1": 0.7112676056338029, + "eval_Qnli-dev_cosine_f1_threshold": 0.7245498895645142, + "eval_Qnli-dev_cosine_precision": 0.608433734939759, + "eval_Qnli-dev_cosine_recall": 0.8559322033898306, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 360.73333740234375, + "eval_Qnli-dev_dot_ap": 0.6721991504226604, + "eval_Qnli-dev_dot_f1": 0.6755852842809364, + "eval_Qnli-dev_dot_f1_threshold": 324.03253173828125, + "eval_Qnli-dev_dot_precision": 0.5580110497237569, + "eval_Qnli-dev_dot_recall": 0.8559322033898306, + "eval_Qnli-dev_euclidean_accuracy": 0.71484375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.42113208770752, + "eval_Qnli-dev_euclidean_ap": 0.7499644370026034, + "eval_Qnli-dev_euclidean_f1": 0.717391304347826, + "eval_Qnli-dev_euclidean_f1_threshold": 15.633472442626953, + "eval_Qnli-dev_euclidean_precision": 0.6265822784810127, + "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, + "eval_Qnli-dev_manhattan_accuracy": 0.716796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 304.3686828613281, + "eval_Qnli-dev_manhattan_ap": 0.7552844807907888, + "eval_Qnli-dev_manhattan_f1": 0.7099236641221374, + "eval_Qnli-dev_manhattan_f1_threshold": 313.06787109375, + "eval_Qnli-dev_manhattan_precision": 0.6458333333333334, + "eval_Qnli-dev_manhattan_recall": 0.788135593220339, + "eval_Qnli-dev_max_accuracy": 0.716796875, + "eval_Qnli-dev_max_accuracy_threshold": 360.73333740234375, + "eval_Qnli-dev_max_ap": 0.7552844807907888, + "eval_Qnli-dev_max_f1": 0.717391304347826, + "eval_Qnli-dev_max_f1_threshold": 324.03253173828125, + "eval_Qnli-dev_max_precision": 0.6458333333333334, + "eval_Qnli-dev_max_recall": 0.8559322033898306, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8676639795303345, + "eval_allNLI-dev_cosine_ap": 0.5974870153364504, + "eval_allNLI-dev_cosine_f1": 0.5968819599109131, + "eval_allNLI-dev_cosine_f1_threshold": 0.7826240062713623, + "eval_allNLI-dev_cosine_precision": 0.4855072463768116, + "eval_allNLI-dev_cosine_recall": 0.7745664739884393, + "eval_allNLI-dev_dot_accuracy": 0.69140625, + "eval_allNLI-dev_dot_accuracy_threshold": 390.4407653808594, + "eval_allNLI-dev_dot_ap": 0.5132556641569763, + "eval_allNLI-dev_dot_f1": 0.5889830508474576, + "eval_allNLI-dev_dot_f1_threshold": 349.6282958984375, + "eval_allNLI-dev_dot_precision": 0.46488294314381273, + "eval_allNLI-dev_dot_recall": 0.8034682080924855, + "eval_allNLI-dev_euclidean_accuracy": 0.7265625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.85896110534668, + "eval_allNLI-dev_euclidean_ap": 0.600945196021151, + "eval_allNLI-dev_euclidean_f1": 0.6117136659436009, + "eval_allNLI-dev_euclidean_f1_threshold": 14.226009368896484, + "eval_allNLI-dev_euclidean_precision": 0.4895833333333333, + "eval_allNLI-dev_euclidean_recall": 0.815028901734104, + "eval_allNLI-dev_manhattan_accuracy": 0.724609375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 236.98345947265625, + "eval_allNLI-dev_manhattan_ap": 0.595652875609926, + "eval_allNLI-dev_manhattan_f1": 0.6052631578947368, + "eval_allNLI-dev_manhattan_f1_threshold": 292.81805419921875, + "eval_allNLI-dev_manhattan_precision": 0.4876325088339223, + "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, + "eval_allNLI-dev_max_accuracy": 0.73046875, + "eval_allNLI-dev_max_accuracy_threshold": 390.4407653808594, + "eval_allNLI-dev_max_ap": 0.600945196021151, + "eval_allNLI-dev_max_f1": 0.6117136659436009, + "eval_allNLI-dev_max_f1_threshold": 349.6282958984375, + "eval_allNLI-dev_max_precision": 0.4895833333333333, + "eval_allNLI-dev_max_recall": 0.815028901734104, + "eval_sequential_score": 0.7552844807907888, + "eval_sts-test_pearson_cosine": 0.8197263747311968, + "eval_sts-test_pearson_dot": 0.7930399784089159, + "eval_sts-test_pearson_euclidean": 0.8451878041621638, + "eval_sts-test_pearson_manhattan": 0.8428233677391169, + "eval_sts-test_pearson_max": 0.8451878041621638, + "eval_sts-test_spearman_cosine": 0.8404950102998648, + "eval_sts-test_spearman_dot": 0.7717234646053703, + "eval_sts-test_spearman_euclidean": 0.8380116285514719, + "eval_sts-test_spearman_manhattan": 0.8359618417747002, + "eval_sts-test_spearman_max": 0.8404950102998648, + "eval_vitaminc-pairs_loss": 2.756269693374634, + "eval_vitaminc-pairs_runtime": 3.1914, + "eval_vitaminc-pairs_samples_per_second": 40.108, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_negation-triplets_loss": 1.07953941822052, + "eval_negation-triplets_runtime": 0.7561, + "eval_negation-triplets_samples_per_second": 169.298, + "eval_negation-triplets_steps_per_second": 1.323, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_scitail-pairs-pos_loss": 0.15982350707054138, + "eval_scitail-pairs-pos_runtime": 0.8403, + "eval_scitail-pairs-pos_samples_per_second": 152.323, + "eval_scitail-pairs-pos_steps_per_second": 1.19, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_scitail-pairs-qa_loss": 0.000591381685808301, + "eval_scitail-pairs-qa_runtime": 0.59, + "eval_scitail-pairs-qa_samples_per_second": 216.958, + "eval_scitail-pairs-qa_steps_per_second": 1.695, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_xsum-pairs_loss": 0.620231568813324, + "eval_xsum-pairs_runtime": 3.0356, + "eval_xsum-pairs_samples_per_second": 42.166, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_sciq_pairs_loss": 0.1389157921075821, + "eval_sciq_pairs_runtime": 3.4358, + "eval_sciq_pairs_samples_per_second": 37.255, + "eval_sciq_pairs_steps_per_second": 0.291, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_qasc_pairs_loss": 0.3473445475101471, + "eval_qasc_pairs_runtime": 0.6187, + "eval_qasc_pairs_samples_per_second": 206.89, + "eval_qasc_pairs_steps_per_second": 1.616, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_openbookqa_pairs_loss": 1.0230737924575806, + "eval_openbookqa_pairs_runtime": 0.5845, + "eval_openbookqa_pairs_samples_per_second": 218.995, + "eval_openbookqa_pairs_steps_per_second": 1.711, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_msmarco_pairs_loss": 1.2953341007232666, + "eval_msmarco_pairs_runtime": 1.5198, + "eval_msmarco_pairs_samples_per_second": 84.223, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_nq_pairs_loss": 1.5245081186294556, + "eval_nq_pairs_runtime": 2.9025, + "eval_nq_pairs_samples_per_second": 44.1, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_trivia_pairs_loss": 1.1853358745574951, + "eval_trivia_pairs_runtime": 3.4357, + "eval_trivia_pairs_samples_per_second": 37.256, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_gooaq_pairs_loss": 0.6523827910423279, + "eval_gooaq_pairs_runtime": 0.954, + "eval_gooaq_pairs_samples_per_second": 134.174, + "eval_gooaq_pairs_steps_per_second": 1.048, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_paws-pos_loss": 0.029700685292482376, + "eval_paws-pos_runtime": 0.692, + "eval_paws-pos_samples_per_second": 184.977, + "eval_paws-pos_steps_per_second": 1.445, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_global_dataset_loss": 0.5667285919189453, + "eval_global_dataset_runtime": 13.3955, + "eval_global_dataset_samples_per_second": 31.055, + "eval_global_dataset_steps_per_second": 0.299, + "step": 840 + }, + { + "epoch": 0.8652263374485597, + "grad_norm": 11.04948902130127, + "learning_rate": 3.045690550363447e-05, + "loss": 0.6135, + "step": 841 + }, + { + "epoch": 0.8662551440329218, + "grad_norm": 13.294988632202148, + "learning_rate": 3.0493250259605398e-05, + "loss": 1.4091, + "step": 842 + }, + { + "epoch": 0.8672839506172839, + "grad_norm": 11.463438987731934, + "learning_rate": 3.052959501557632e-05, + "loss": 0.6724, + "step": 843 + }, + { + "epoch": 0.8683127572016461, + "grad_norm": 2.22076678276062, + "learning_rate": 3.0565939771547246e-05, + "loss": 0.0353, + "step": 844 + }, + { + "epoch": 0.8693415637860082, + "grad_norm": 9.272378921508789, + "learning_rate": 3.0602284527518174e-05, + "loss": 0.5297, + "step": 845 + }, + { + "epoch": 0.8703703703703703, + "grad_norm": 10.213794708251953, + "learning_rate": 3.0638629283489094e-05, + "loss": 0.5211, + "step": 846 + }, + { + "epoch": 0.8713991769547325, + "grad_norm": 12.306347846984863, + "learning_rate": 3.0674974039460015e-05, + "loss": 0.8431, + "step": 847 + }, + { + "epoch": 0.8724279835390947, + "grad_norm": 10.407583236694336, + "learning_rate": 3.071131879543094e-05, + "loss": 0.7195, + "step": 848 + }, + { + "epoch": 0.8734567901234568, + "grad_norm": 8.843184471130371, + "learning_rate": 3.074766355140186e-05, + "loss": 0.4965, + "step": 849 + }, + { + "epoch": 0.8744855967078189, + "grad_norm": 10.975191116333008, + "learning_rate": 3.078400830737279e-05, + "loss": 0.7786, + "step": 850 + }, + { + "epoch": 0.8755144032921811, + "grad_norm": 16.885013580322266, + "learning_rate": 3.082035306334372e-05, + "loss": 1.7078, + "step": 851 + }, + { + "epoch": 0.8765432098765432, + "grad_norm": 10.905181884765625, + "learning_rate": 3.085669781931464e-05, + "loss": 0.6685, + "step": 852 + }, + { + "epoch": 0.8775720164609053, + "grad_norm": 12.853326797485352, + "learning_rate": 3.089304257528556e-05, + "loss": 1.2114, + "step": 853 + }, + { + "epoch": 0.8786008230452675, + "grad_norm": 9.456357955932617, + "learning_rate": 3.0929387331256486e-05, + "loss": 0.6199, + "step": 854 + }, + { + "epoch": 0.8796296296296297, + "grad_norm": 15.603614807128906, + "learning_rate": 3.0965732087227414e-05, + "loss": 1.3311, + "step": 855 + }, + { + "epoch": 0.8806584362139918, + "grad_norm": 10.03974437713623, + "learning_rate": 3.1002076843198334e-05, + "loss": 0.5718, + "step": 856 + }, + { + "epoch": 0.8816872427983539, + "grad_norm": 8.548869132995605, + "learning_rate": 3.103842159916926e-05, + "loss": 0.4969, + "step": 857 + }, + { + "epoch": 0.8827160493827161, + "grad_norm": 13.353643417358398, + "learning_rate": 3.107476635514018e-05, + "loss": 1.1514, + "step": 858 + }, + { + "epoch": 0.8837448559670782, + "grad_norm": 11.166017532348633, + "learning_rate": 3.111111111111111e-05, + "loss": 1.361, + "step": 859 + }, + { + "epoch": 0.8847736625514403, + "grad_norm": 10.166590690612793, + "learning_rate": 3.114745586708203e-05, + "loss": 0.801, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_Qnli-dev_cosine_accuracy": 0.720703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7913862466812134, + "eval_Qnli-dev_cosine_ap": 0.7484120022003069, + "eval_Qnli-dev_cosine_f1": 0.7155635062611807, + "eval_Qnli-dev_cosine_f1_threshold": 0.7564002275466919, + "eval_Qnli-dev_cosine_precision": 0.6191950464396285, + "eval_Qnli-dev_cosine_recall": 0.847457627118644, + "eval_Qnli-dev_dot_accuracy": 0.669921875, + "eval_Qnli-dev_dot_accuracy_threshold": 381.15460205078125, + "eval_Qnli-dev_dot_ap": 0.6554039139593089, + "eval_Qnli-dev_dot_f1": 0.6929982046678635, + "eval_Qnli-dev_dot_f1_threshold": 375.46405029296875, + "eval_Qnli-dev_dot_precision": 0.6012461059190031, + "eval_Qnli-dev_dot_recall": 0.8177966101694916, + "eval_Qnli-dev_euclidean_accuracy": 0.724609375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.634359359741211, + "eval_Qnli-dev_euclidean_ap": 0.7543039429243505, + "eval_Qnli-dev_euclidean_f1": 0.7129798903107861, + "eval_Qnli-dev_euclidean_f1_threshold": 15.349479675292969, + "eval_Qnli-dev_euclidean_precision": 0.6270096463022508, + "eval_Qnli-dev_euclidean_recall": 0.826271186440678, + "eval_Qnli-dev_manhattan_accuracy": 0.71484375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 285.07244873046875, + "eval_Qnli-dev_manhattan_ap": 0.7579405691223697, + "eval_Qnli-dev_manhattan_f1": 0.7142857142857143, + "eval_Qnli-dev_manhattan_f1_threshold": 311.107421875, + "eval_Qnli-dev_manhattan_precision": 0.6560283687943262, + "eval_Qnli-dev_manhattan_recall": 0.7838983050847458, + "eval_Qnli-dev_max_accuracy": 0.724609375, + "eval_Qnli-dev_max_accuracy_threshold": 381.15460205078125, + "eval_Qnli-dev_max_ap": 0.7579405691223697, + "eval_Qnli-dev_max_f1": 0.7155635062611807, + "eval_Qnli-dev_max_f1_threshold": 375.46405029296875, + "eval_Qnli-dev_max_precision": 0.6560283687943262, + "eval_Qnli-dev_max_recall": 0.847457627118644, + "eval_allNLI-dev_cosine_accuracy": 0.736328125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8940784931182861, + "eval_allNLI-dev_cosine_ap": 0.5955386793059732, + "eval_allNLI-dev_cosine_f1": 0.6008064516129031, + "eval_allNLI-dev_cosine_f1_threshold": 0.7910170555114746, + "eval_allNLI-dev_cosine_precision": 0.4613003095975232, + "eval_allNLI-dev_cosine_recall": 0.861271676300578, + "eval_allNLI-dev_dot_accuracy": 0.69921875, + "eval_allNLI-dev_dot_accuracy_threshold": 451.13623046875, + "eval_allNLI-dev_dot_ap": 0.5115980330253447, + "eval_allNLI-dev_dot_f1": 0.5823927765237021, + "eval_allNLI-dev_dot_f1_threshold": 410.14447021484375, + "eval_allNLI-dev_dot_precision": 0.4777777777777778, + "eval_allNLI-dev_dot_recall": 0.7456647398843931, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.620901107788086, + "eval_allNLI-dev_euclidean_ap": 0.5964863175832775, + "eval_allNLI-dev_euclidean_f1": 0.6052104208416834, + "eval_allNLI-dev_euclidean_f1_threshold": 14.621801376342773, + "eval_allNLI-dev_euclidean_precision": 0.46319018404907975, + "eval_allNLI-dev_euclidean_recall": 0.8728323699421965, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 226.17271423339844, + "eval_allNLI-dev_manhattan_ap": 0.594460510313827, + "eval_allNLI-dev_manhattan_f1": 0.6003976143141153, + "eval_allNLI-dev_manhattan_f1_threshold": 303.3892517089844, + "eval_allNLI-dev_manhattan_precision": 0.4575757575757576, + "eval_allNLI-dev_manhattan_recall": 0.8728323699421965, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 451.13623046875, + "eval_allNLI-dev_max_ap": 0.5964863175832775, + "eval_allNLI-dev_max_f1": 0.6052104208416834, + "eval_allNLI-dev_max_f1_threshold": 410.14447021484375, + "eval_allNLI-dev_max_precision": 0.4777777777777778, + "eval_allNLI-dev_max_recall": 0.8728323699421965, + "eval_sequential_score": 0.7579405691223697, + "eval_sts-test_pearson_cosine": 0.8116140949252031, + "eval_sts-test_pearson_dot": 0.7799016605392657, + "eval_sts-test_pearson_euclidean": 0.8407077538986545, + "eval_sts-test_pearson_manhattan": 0.8400909131579789, + "eval_sts-test_pearson_max": 0.8407077538986545, + "eval_sts-test_spearman_cosine": 0.8376718769749885, + "eval_sts-test_spearman_dot": 0.7616373358104539, + "eval_sts-test_spearman_euclidean": 0.8340390590777574, + "eval_sts-test_spearman_manhattan": 0.8319958059851489, + "eval_sts-test_spearman_max": 0.8376718769749885, + "eval_vitaminc-pairs_loss": 2.8492391109466553, + "eval_vitaminc-pairs_runtime": 3.2108, + "eval_vitaminc-pairs_samples_per_second": 39.866, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_negation-triplets_loss": 1.0788973569869995, + "eval_negation-triplets_runtime": 0.7524, + "eval_negation-triplets_samples_per_second": 170.133, + "eval_negation-triplets_steps_per_second": 1.329, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_scitail-pairs-pos_loss": 0.19070731103420258, + "eval_scitail-pairs-pos_runtime": 0.8535, + "eval_scitail-pairs-pos_samples_per_second": 149.971, + "eval_scitail-pairs-pos_steps_per_second": 1.172, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_scitail-pairs-qa_loss": 0.0008353625307790935, + "eval_scitail-pairs-qa_runtime": 0.594, + "eval_scitail-pairs-qa_samples_per_second": 215.481, + "eval_scitail-pairs-qa_steps_per_second": 1.683, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_xsum-pairs_loss": 0.6224209666252136, + "eval_xsum-pairs_runtime": 3.0265, + "eval_xsum-pairs_samples_per_second": 42.293, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_sciq_pairs_loss": 0.12949666380882263, + "eval_sciq_pairs_runtime": 3.4626, + "eval_sciq_pairs_samples_per_second": 36.966, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_qasc_pairs_loss": 0.3247033953666687, + "eval_qasc_pairs_runtime": 0.6062, + "eval_qasc_pairs_samples_per_second": 211.165, + "eval_qasc_pairs_steps_per_second": 1.65, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_openbookqa_pairs_loss": 1.1479803323745728, + "eval_openbookqa_pairs_runtime": 0.6115, + "eval_openbookqa_pairs_samples_per_second": 209.313, + "eval_openbookqa_pairs_steps_per_second": 1.635, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_msmarco_pairs_loss": 1.1639130115509033, + "eval_msmarco_pairs_runtime": 1.5482, + "eval_msmarco_pairs_samples_per_second": 82.677, + "eval_msmarco_pairs_steps_per_second": 0.646, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_nq_pairs_loss": 1.2727266550064087, + "eval_nq_pairs_runtime": 2.8951, + "eval_nq_pairs_samples_per_second": 44.212, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_trivia_pairs_loss": 1.1261823177337646, + "eval_trivia_pairs_runtime": 3.4344, + "eval_trivia_pairs_samples_per_second": 37.27, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_gooaq_pairs_loss": 0.6438990831375122, + "eval_gooaq_pairs_runtime": 0.949, + "eval_gooaq_pairs_samples_per_second": 134.878, + "eval_gooaq_pairs_steps_per_second": 1.054, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_paws-pos_loss": 0.02884558029472828, + "eval_paws-pos_runtime": 0.6952, + "eval_paws-pos_samples_per_second": 184.107, + "eval_paws-pos_steps_per_second": 1.438, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_global_dataset_loss": 0.5700183510780334, + "eval_global_dataset_runtime": 13.3817, + "eval_global_dataset_samples_per_second": 31.087, + "eval_global_dataset_steps_per_second": 0.299, + "step": 860 + }, + { + "epoch": 0.8858024691358025, + "grad_norm": 14.423850059509277, + "learning_rate": 3.118380062305296e-05, + "loss": 1.5436, + "step": 861 + }, + { + "epoch": 0.8868312757201646, + "grad_norm": 6.131687164306641, + "learning_rate": 3.122014537902388e-05, + "loss": 0.2666, + "step": 862 + }, + { + "epoch": 0.8878600823045267, + "grad_norm": 8.292266845703125, + "learning_rate": 3.1256490134994806e-05, + "loss": 0.5436, + "step": 863 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 15.915453910827637, + "learning_rate": 3.1292834890965727e-05, + "loss": 1.8489, + "step": 864 + }, + { + "epoch": 0.8899176954732511, + "grad_norm": 15.952044486999512, + "learning_rate": 3.1329179646936654e-05, + "loss": 1.3624, + "step": 865 + }, + { + "epoch": 0.8909465020576132, + "grad_norm": 0.0, + "learning_rate": 3.136552440290758e-05, + "loss": 0.0, + "step": 866 + }, + { + "epoch": 0.8919753086419753, + "grad_norm": 8.352066993713379, + "learning_rate": 3.14018691588785e-05, + "loss": 0.5817, + "step": 867 + }, + { + "epoch": 0.8930041152263375, + "grad_norm": 0.0, + "learning_rate": 3.143821391484942e-05, + "loss": 0.0, + "step": 868 + }, + { + "epoch": 0.8940329218106996, + "grad_norm": 7.5998640060424805, + "learning_rate": 3.147455867082035e-05, + "loss": 0.4523, + "step": 869 + }, + { + "epoch": 0.8950617283950617, + "grad_norm": 9.014819145202637, + "learning_rate": 3.151090342679128e-05, + "loss": 0.8566, + "step": 870 + }, + { + "epoch": 0.8960905349794238, + "grad_norm": 9.435276985168457, + "learning_rate": 3.15472481827622e-05, + "loss": 0.5609, + "step": 871 + }, + { + "epoch": 0.897119341563786, + "grad_norm": 6.9305219650268555, + "learning_rate": 3.1583592938733126e-05, + "loss": 0.4103, + "step": 872 + }, + { + "epoch": 0.8981481481481481, + "grad_norm": 7.279191493988037, + "learning_rate": 3.1619937694704046e-05, + "loss": 0.4226, + "step": 873 + }, + { + "epoch": 0.8991769547325102, + "grad_norm": 11.90969181060791, + "learning_rate": 3.165628245067497e-05, + "loss": 1.3344, + "step": 874 + }, + { + "epoch": 0.9002057613168725, + "grad_norm": 1.5162785053253174, + "learning_rate": 3.1692627206645894e-05, + "loss": 0.0354, + "step": 875 + }, + { + "epoch": 0.9012345679012346, + "grad_norm": 1.3723441362380981, + "learning_rate": 3.172897196261682e-05, + "loss": 0.0377, + "step": 876 + }, + { + "epoch": 0.9022633744855967, + "grad_norm": 12.883326530456543, + "learning_rate": 3.176531671858774e-05, + "loss": 1.1204, + "step": 877 + }, + { + "epoch": 0.9032921810699589, + "grad_norm": 0.0, + "learning_rate": 3.180166147455867e-05, + "loss": 0.0, + "step": 878 + }, + { + "epoch": 0.904320987654321, + "grad_norm": 8.576708793640137, + "learning_rate": 3.183800623052959e-05, + "loss": 0.4222, + "step": 879 + }, + { + "epoch": 0.9053497942386831, + "grad_norm": 9.18001651763916, + "learning_rate": 3.187435098650052e-05, + "loss": 0.5895, + "step": 880 + }, + { + "epoch": 0.9053497942386831, + "eval_Qnli-dev_cosine_accuracy": 0.7109375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7849442958831787, + "eval_Qnli-dev_cosine_ap": 0.7532800500199506, + "eval_Qnli-dev_cosine_f1": 0.6976744186046511, + "eval_Qnli-dev_cosine_f1_threshold": 0.7686007022857666, + "eval_Qnli-dev_cosine_precision": 0.6428571428571429, + "eval_Qnli-dev_cosine_recall": 0.7627118644067796, + "eval_Qnli-dev_dot_accuracy": 0.67578125, + "eval_Qnli-dev_dot_accuracy_threshold": 410.07305908203125, + "eval_Qnli-dev_dot_ap": 0.6772857891299546, + "eval_Qnli-dev_dot_f1": 0.6719745222929936, + "eval_Qnli-dev_dot_f1_threshold": 344.32025146484375, + "eval_Qnli-dev_dot_precision": 0.5382653061224489, + "eval_Qnli-dev_dot_recall": 0.8940677966101694, + "eval_Qnli-dev_euclidean_accuracy": 0.71484375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.577789306640625, + "eval_Qnli-dev_euclidean_ap": 0.7582704343457749, + "eval_Qnli-dev_euclidean_f1": 0.703125, + "eval_Qnli-dev_euclidean_f1_threshold": 15.108866691589355, + "eval_Qnli-dev_euclidean_precision": 0.6521739130434783, + "eval_Qnli-dev_euclidean_recall": 0.7627118644067796, + "eval_Qnli-dev_manhattan_accuracy": 0.712890625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 296.60125732421875, + "eval_Qnli-dev_manhattan_ap": 0.7626076733761872, + "eval_Qnli-dev_manhattan_f1": 0.7037701974865348, + "eval_Qnli-dev_manhattan_f1_threshold": 324.86553955078125, + "eval_Qnli-dev_manhattan_precision": 0.6105919003115264, + "eval_Qnli-dev_manhattan_recall": 0.8305084745762712, + "eval_Qnli-dev_max_accuracy": 0.71484375, + "eval_Qnli-dev_max_accuracy_threshold": 410.07305908203125, + "eval_Qnli-dev_max_ap": 0.7626076733761872, + "eval_Qnli-dev_max_f1": 0.7037701974865348, + "eval_Qnli-dev_max_f1_threshold": 344.32025146484375, + "eval_Qnli-dev_max_precision": 0.6521739130434783, + "eval_Qnli-dev_max_recall": 0.8940677966101694, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8640371561050415, + "eval_allNLI-dev_cosine_ap": 0.5971175118697716, + "eval_allNLI-dev_cosine_f1": 0.5973451327433628, + "eval_allNLI-dev_cosine_f1_threshold": 0.7674254179000854, + "eval_allNLI-dev_cosine_precision": 0.4838709677419355, + "eval_allNLI-dev_cosine_recall": 0.7803468208092486, + "eval_allNLI-dev_dot_accuracy": 0.6953125, + "eval_allNLI-dev_dot_accuracy_threshold": 423.58746337890625, + "eval_allNLI-dev_dot_ap": 0.5249909079017288, + "eval_allNLI-dev_dot_f1": 0.5751879699248119, + "eval_allNLI-dev_dot_f1_threshold": 344.40423583984375, + "eval_allNLI-dev_dot_precision": 0.42618384401114207, + "eval_allNLI-dev_dot_recall": 0.884393063583815, + "eval_allNLI-dev_euclidean_accuracy": 0.73046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.598902702331543, + "eval_allNLI-dev_euclidean_ap": 0.6004950564369994, + "eval_allNLI-dev_euclidean_f1": 0.5964125560538117, + "eval_allNLI-dev_euclidean_f1_threshold": 14.896963119506836, + "eval_allNLI-dev_euclidean_precision": 0.48717948717948717, + "eval_allNLI-dev_euclidean_recall": 0.7687861271676301, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 225.4422149658203, + "eval_allNLI-dev_manhattan_ap": 0.5975087335410215, + "eval_allNLI-dev_manhattan_f1": 0.5961945031712473, + "eval_allNLI-dev_manhattan_f1_threshold": 320.1939697265625, + "eval_allNLI-dev_manhattan_precision": 0.47, + "eval_allNLI-dev_manhattan_recall": 0.815028901734104, + "eval_allNLI-dev_max_accuracy": 0.732421875, + "eval_allNLI-dev_max_accuracy_threshold": 423.58746337890625, + "eval_allNLI-dev_max_ap": 0.6004950564369994, + "eval_allNLI-dev_max_f1": 0.5973451327433628, + "eval_allNLI-dev_max_f1_threshold": 344.40423583984375, + "eval_allNLI-dev_max_precision": 0.48717948717948717, + "eval_allNLI-dev_max_recall": 0.884393063583815, + "eval_sequential_score": 0.7626076733761872, + "eval_sts-test_pearson_cosine": 0.8236499611459422, + "eval_sts-test_pearson_dot": 0.7997972279606418, + "eval_sts-test_pearson_euclidean": 0.8506072277669228, + "eval_sts-test_pearson_manhattan": 0.8491673694905203, + "eval_sts-test_pearson_max": 0.8506072277669228, + "eval_sts-test_spearman_cosine": 0.8462692655571971, + "eval_sts-test_spearman_dot": 0.7806738958886958, + "eval_sts-test_spearman_euclidean": 0.8433683545400226, + "eval_sts-test_spearman_manhattan": 0.8414020288657458, + "eval_sts-test_spearman_max": 0.8462692655571971, + "eval_vitaminc-pairs_loss": 2.9979610443115234, + "eval_vitaminc-pairs_runtime": 3.229, + "eval_vitaminc-pairs_samples_per_second": 39.641, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 880 + }, + { + "epoch": 0.9053497942386831, + "eval_negation-triplets_loss": 1.1333051919937134, + "eval_negation-triplets_runtime": 0.7676, + "eval_negation-triplets_samples_per_second": 166.759, + "eval_negation-triplets_steps_per_second": 1.303, + "step": 880 + }, + { + "epoch": 0.9053497942386831, + "eval_scitail-pairs-pos_loss": 0.1798580139875412, + "eval_scitail-pairs-pos_runtime": 0.8519, + "eval_scitail-pairs-pos_samples_per_second": 150.255, + "eval_scitail-pairs-pos_steps_per_second": 1.174, + "step": 880 + }, + { + "epoch": 0.9053497942386831, + "eval_scitail-pairs-qa_loss": 0.001077975844964385, + "eval_scitail-pairs-qa_runtime": 0.5945, + "eval_scitail-pairs-qa_samples_per_second": 215.312, + "eval_scitail-pairs-qa_steps_per_second": 1.682, + "step": 880 + }, + { + "epoch": 0.9053497942386831, + "eval_xsum-pairs_loss": 0.5809869170188904, + "eval_xsum-pairs_runtime": 3.0246, + "eval_xsum-pairs_samples_per_second": 42.32, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 880 + }, + { + "epoch": 0.9053497942386831, + "eval_sciq_pairs_loss": 0.12553882598876953, + "eval_sciq_pairs_runtime": 3.4868, + "eval_sciq_pairs_samples_per_second": 36.71, + "eval_sciq_pairs_steps_per_second": 0.287, + "step": 880 + }, + { + "epoch": 0.9053497942386831, + "eval_qasc_pairs_loss": 0.36939769983291626, + "eval_qasc_pairs_runtime": 0.6232, + "eval_qasc_pairs_samples_per_second": 205.388, + "eval_qasc_pairs_steps_per_second": 1.605, + "step": 880 + }, + { + "epoch": 0.9053497942386831, + "eval_openbookqa_pairs_loss": 1.1518361568450928, + "eval_openbookqa_pairs_runtime": 0.5953, + "eval_openbookqa_pairs_samples_per_second": 215.007, + "eval_openbookqa_pairs_steps_per_second": 1.68, + "step": 880 + }, + { + "epoch": 0.9053497942386831, + "eval_msmarco_pairs_loss": 1.1596477031707764, + "eval_msmarco_pairs_runtime": 1.5245, + "eval_msmarco_pairs_samples_per_second": 83.963, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 880 + }, + { + "epoch": 0.9053497942386831, + "eval_nq_pairs_loss": 1.362251877784729, + "eval_nq_pairs_runtime": 2.9215, + "eval_nq_pairs_samples_per_second": 43.813, + "eval_nq_pairs_steps_per_second": 0.342, + "step": 880 + }, + { + "epoch": 0.9053497942386831, + "eval_trivia_pairs_loss": 1.1808240413665771, + "eval_trivia_pairs_runtime": 3.4414, + "eval_trivia_pairs_samples_per_second": 37.194, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 880 + }, + { + "epoch": 0.9053497942386831, + "eval_gooaq_pairs_loss": 0.5690011382102966, + "eval_gooaq_pairs_runtime": 0.9675, + "eval_gooaq_pairs_samples_per_second": 132.304, + "eval_gooaq_pairs_steps_per_second": 1.034, + "step": 880 + }, + { + "epoch": 0.9053497942386831, + "eval_paws-pos_loss": 0.026389779523015022, + "eval_paws-pos_runtime": 0.7184, + "eval_paws-pos_samples_per_second": 178.184, + "eval_paws-pos_steps_per_second": 1.392, + "step": 880 + }, + { + "epoch": 0.9053497942386831, + "eval_global_dataset_loss": 0.6112414598464966, + "eval_global_dataset_runtime": 13.4861, + "eval_global_dataset_samples_per_second": 30.847, + "eval_global_dataset_steps_per_second": 0.297, + "step": 880 + }, + { + "epoch": 0.9063786008230452, + "grad_norm": 10.299396514892578, + "learning_rate": 3.191069574247144e-05, + "loss": 0.645, + "step": 881 + }, + { + "epoch": 0.9074074074074074, + "grad_norm": 9.826093673706055, + "learning_rate": 3.1947040498442366e-05, + "loss": 0.593, + "step": 882 + }, + { + "epoch": 0.9084362139917695, + "grad_norm": 8.907341003417969, + "learning_rate": 3.1983385254413286e-05, + "loss": 0.4322, + "step": 883 + }, + { + "epoch": 0.9094650205761317, + "grad_norm": 2.4184072017669678, + "learning_rate": 3.2019730010384214e-05, + "loss": 0.0629, + "step": 884 + }, + { + "epoch": 0.9104938271604939, + "grad_norm": 10.5604829788208, + "learning_rate": 3.2056074766355134e-05, + "loss": 1.0485, + "step": 885 + }, + { + "epoch": 0.911522633744856, + "grad_norm": 7.115394592285156, + "learning_rate": 3.209241952232606e-05, + "loss": 0.3749, + "step": 886 + }, + { + "epoch": 0.9125514403292181, + "grad_norm": 0.8468412756919861, + "learning_rate": 3.212876427829699e-05, + "loss": 0.0211, + "step": 887 + }, + { + "epoch": 0.9135802469135802, + "grad_norm": 8.189038276672363, + "learning_rate": 3.216510903426791e-05, + "loss": 0.4037, + "step": 888 + }, + { + "epoch": 0.9146090534979424, + "grad_norm": 13.44206428527832, + "learning_rate": 3.220145379023883e-05, + "loss": 1.3921, + "step": 889 + }, + { + "epoch": 0.9156378600823045, + "grad_norm": 19.440120697021484, + "learning_rate": 3.223779854620976e-05, + "loss": 1.6863, + "step": 890 + }, + { + "epoch": 0.9166666666666666, + "grad_norm": 10.532525062561035, + "learning_rate": 3.2274143302180685e-05, + "loss": 0.7386, + "step": 891 + }, + { + "epoch": 0.9176954732510288, + "grad_norm": 17.706409454345703, + "learning_rate": 3.2310488058151606e-05, + "loss": 1.4284, + "step": 892 + }, + { + "epoch": 0.918724279835391, + "grad_norm": 9.419487953186035, + "learning_rate": 3.234683281412253e-05, + "loss": 0.9042, + "step": 893 + }, + { + "epoch": 0.9197530864197531, + "grad_norm": 8.609904289245605, + "learning_rate": 3.2383177570093454e-05, + "loss": 0.5269, + "step": 894 + }, + { + "epoch": 0.9207818930041153, + "grad_norm": 1.3929104804992676, + "learning_rate": 3.2419522326064375e-05, + "loss": 0.0262, + "step": 895 + }, + { + "epoch": 0.9218106995884774, + "grad_norm": 9.115885734558105, + "learning_rate": 3.24558670820353e-05, + "loss": 0.4829, + "step": 896 + }, + { + "epoch": 0.9228395061728395, + "grad_norm": 2.3302252292633057, + "learning_rate": 3.249221183800623e-05, + "loss": 0.0402, + "step": 897 + }, + { + "epoch": 0.9238683127572016, + "grad_norm": 16.796581268310547, + "learning_rate": 3.252855659397715e-05, + "loss": 1.7338, + "step": 898 + }, + { + "epoch": 0.9248971193415638, + "grad_norm": 8.234830856323242, + "learning_rate": 3.256490134994808e-05, + "loss": 0.5067, + "step": 899 + }, + { + "epoch": 0.9259259259259259, + "grad_norm": 0.0, + "learning_rate": 3.2601246105919e-05, + "loss": 0.0, + "step": 900 + }, + { + "epoch": 0.9259259259259259, + "eval_Qnli-dev_cosine_accuracy": 0.720703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7808551788330078, + "eval_Qnli-dev_cosine_ap": 0.7722935104666544, + "eval_Qnli-dev_cosine_f1": 0.7180451127819548, + "eval_Qnli-dev_cosine_f1_threshold": 0.7518417835235596, + "eval_Qnli-dev_cosine_precision": 0.6452702702702703, + "eval_Qnli-dev_cosine_recall": 0.809322033898305, + "eval_Qnli-dev_dot_accuracy": 0.68359375, + "eval_Qnli-dev_dot_accuracy_threshold": 375.266357421875, + "eval_Qnli-dev_dot_ap": 0.705847017080601, + "eval_Qnli-dev_dot_f1": 0.693103448275862, + "eval_Qnli-dev_dot_f1_threshold": 338.9654541015625, + "eval_Qnli-dev_dot_precision": 0.5843023255813954, + "eval_Qnli-dev_dot_recall": 0.8516949152542372, + "eval_Qnli-dev_euclidean_accuracy": 0.728515625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.228927612304688, + "eval_Qnli-dev_euclidean_ap": 0.7773104444806713, + "eval_Qnli-dev_euclidean_f1": 0.71875, + "eval_Qnli-dev_euclidean_f1_threshold": 14.866127014160156, + "eval_Qnli-dev_euclidean_precision": 0.6666666666666666, + "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, + "eval_Qnli-dev_manhattan_accuracy": 0.732421875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 300.7906494140625, + "eval_Qnli-dev_manhattan_ap": 0.7788691432055757, + "eval_Qnli-dev_manhattan_f1": 0.7186858316221767, + "eval_Qnli-dev_manhattan_f1_threshold": 300.7906494140625, + "eval_Qnli-dev_manhattan_precision": 0.6972111553784861, + "eval_Qnli-dev_manhattan_recall": 0.7415254237288136, + "eval_Qnli-dev_max_accuracy": 0.732421875, + "eval_Qnli-dev_max_accuracy_threshold": 375.266357421875, + "eval_Qnli-dev_max_ap": 0.7788691432055757, + "eval_Qnli-dev_max_f1": 0.71875, + "eval_Qnli-dev_max_f1_threshold": 338.9654541015625, + "eval_Qnli-dev_max_precision": 0.6972111553784861, + "eval_Qnli-dev_max_recall": 0.8516949152542372, + "eval_allNLI-dev_cosine_accuracy": 0.736328125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8723114728927612, + "eval_allNLI-dev_cosine_ap": 0.6103803128378515, + "eval_allNLI-dev_cosine_f1": 0.6218487394957984, + "eval_allNLI-dev_cosine_f1_threshold": 0.7703201770782471, + "eval_allNLI-dev_cosine_precision": 0.4884488448844885, + "eval_allNLI-dev_cosine_recall": 0.8554913294797688, + "eval_allNLI-dev_dot_accuracy": 0.6875, + "eval_allNLI-dev_dot_accuracy_threshold": 450.7716064453125, + "eval_allNLI-dev_dot_ap": 0.5404695476141235, + "eval_allNLI-dev_dot_f1": 0.581532416502947, + "eval_allNLI-dev_dot_f1_threshold": 342.1371765136719, + "eval_allNLI-dev_dot_precision": 0.44047619047619047, + "eval_allNLI-dev_dot_recall": 0.8554913294797688, + "eval_allNLI-dev_euclidean_accuracy": 0.7421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.053747177124023, + "eval_allNLI-dev_euclidean_ap": 0.6129537867372233, + "eval_allNLI-dev_euclidean_f1": 0.6365591397849463, + "eval_allNLI-dev_euclidean_f1_threshold": 14.339694023132324, + "eval_allNLI-dev_euclidean_precision": 0.5068493150684932, + "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, + "eval_allNLI-dev_manhattan_accuracy": 0.736328125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 228.95936584472656, + "eval_allNLI-dev_manhattan_ap": 0.6105307113025781, + "eval_allNLI-dev_manhattan_f1": 0.6291666666666667, + "eval_allNLI-dev_manhattan_f1_threshold": 303.91961669921875, + "eval_allNLI-dev_manhattan_precision": 0.49185667752442996, + "eval_allNLI-dev_manhattan_recall": 0.8728323699421965, + "eval_allNLI-dev_max_accuracy": 0.7421875, + "eval_allNLI-dev_max_accuracy_threshold": 450.7716064453125, + "eval_allNLI-dev_max_ap": 0.6129537867372233, + "eval_allNLI-dev_max_f1": 0.6365591397849463, + "eval_allNLI-dev_max_f1_threshold": 342.1371765136719, + "eval_allNLI-dev_max_precision": 0.5068493150684932, + "eval_allNLI-dev_max_recall": 0.8728323699421965, + "eval_sequential_score": 0.7788691432055757, + "eval_sts-test_pearson_cosine": 0.821250155739432, + "eval_sts-test_pearson_dot": 0.7884449998485221, + "eval_sts-test_pearson_euclidean": 0.8476165058712835, + "eval_sts-test_pearson_manhattan": 0.8428075499119236, + "eval_sts-test_pearson_max": 0.8476165058712835, + "eval_sts-test_spearman_cosine": 0.8461122781322361, + "eval_sts-test_spearman_dot": 0.7783341209381078, + "eval_sts-test_spearman_euclidean": 0.8401831835104896, + "eval_sts-test_spearman_manhattan": 0.8381286083487489, + "eval_sts-test_spearman_max": 0.8461122781322361, + "eval_vitaminc-pairs_loss": 2.6282498836517334, + "eval_vitaminc-pairs_runtime": 3.2006, + "eval_vitaminc-pairs_samples_per_second": 39.992, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 900 + }, + { + "epoch": 0.9259259259259259, + "eval_negation-triplets_loss": 1.0272082090377808, + "eval_negation-triplets_runtime": 0.7536, + "eval_negation-triplets_samples_per_second": 169.85, + "eval_negation-triplets_steps_per_second": 1.327, + "step": 900 + }, + { + "epoch": 0.9259259259259259, + "eval_scitail-pairs-pos_loss": 0.1884053498506546, + "eval_scitail-pairs-pos_runtime": 0.8451, + "eval_scitail-pairs-pos_samples_per_second": 151.452, + "eval_scitail-pairs-pos_steps_per_second": 1.183, + "step": 900 + }, + { + "epoch": 0.9259259259259259, + "eval_scitail-pairs-qa_loss": 0.0004345515335444361, + "eval_scitail-pairs-qa_runtime": 0.5987, + "eval_scitail-pairs-qa_samples_per_second": 213.797, + "eval_scitail-pairs-qa_steps_per_second": 1.67, + "step": 900 + }, + { + "epoch": 0.9259259259259259, + "eval_xsum-pairs_loss": 0.5701841711997986, + "eval_xsum-pairs_runtime": 3.0401, + "eval_xsum-pairs_samples_per_second": 42.103, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 900 + }, + { + "epoch": 0.9259259259259259, + "eval_sciq_pairs_loss": 0.12735481560230255, + "eval_sciq_pairs_runtime": 3.4518, + "eval_sciq_pairs_samples_per_second": 37.082, + "eval_sciq_pairs_steps_per_second": 0.29, + "step": 900 + }, + { + "epoch": 0.9259259259259259, + "eval_qasc_pairs_loss": 0.33931973576545715, + "eval_qasc_pairs_runtime": 0.6061, + "eval_qasc_pairs_samples_per_second": 211.189, + "eval_qasc_pairs_steps_per_second": 1.65, + "step": 900 + }, + { + "epoch": 0.9259259259259259, + "eval_openbookqa_pairs_loss": 1.055425763130188, + "eval_openbookqa_pairs_runtime": 0.5844, + "eval_openbookqa_pairs_samples_per_second": 219.038, + "eval_openbookqa_pairs_steps_per_second": 1.711, + "step": 900 + }, + { + "epoch": 0.9259259259259259, + "eval_msmarco_pairs_loss": 1.1918123960494995, + "eval_msmarco_pairs_runtime": 1.522, + "eval_msmarco_pairs_samples_per_second": 84.102, + "eval_msmarco_pairs_steps_per_second": 0.657, + "step": 900 + }, + { + "epoch": 0.9259259259259259, + "eval_nq_pairs_loss": 1.2023570537567139, + "eval_nq_pairs_runtime": 2.8987, + "eval_nq_pairs_samples_per_second": 44.157, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 900 + }, + { + "epoch": 0.9259259259259259, + "eval_trivia_pairs_loss": 1.2772942781448364, + "eval_trivia_pairs_runtime": 3.4497, + "eval_trivia_pairs_samples_per_second": 37.104, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 900 + }, + { + "epoch": 0.9259259259259259, + "eval_gooaq_pairs_loss": 0.561891496181488, + "eval_gooaq_pairs_runtime": 0.9472, + "eval_gooaq_pairs_samples_per_second": 135.133, + "eval_gooaq_pairs_steps_per_second": 1.056, + "step": 900 + }, + { + "epoch": 0.9259259259259259, + "eval_paws-pos_loss": 0.028995605185627937, + "eval_paws-pos_runtime": 0.6977, + "eval_paws-pos_samples_per_second": 183.452, + "eval_paws-pos_steps_per_second": 1.433, + "step": 900 + }, + { + "epoch": 0.9259259259259259, + "eval_global_dataset_loss": 0.5420277118682861, + "eval_global_dataset_runtime": 13.3831, + "eval_global_dataset_samples_per_second": 31.084, + "eval_global_dataset_steps_per_second": 0.299, + "step": 900 + }, + { + "epoch": 0.926954732510288, + "grad_norm": 11.65727710723877, + "learning_rate": 3.2637590861889925e-05, + "loss": 1.1782, + "step": 901 + }, + { + "epoch": 0.9279835390946503, + "grad_norm": 16.935638427734375, + "learning_rate": 3.267393561786085e-05, + "loss": 2.7273, + "step": 902 + }, + { + "epoch": 0.9290123456790124, + "grad_norm": 9.754632949829102, + "learning_rate": 3.2710280373831774e-05, + "loss": 0.4795, + "step": 903 + }, + { + "epoch": 0.9300411522633745, + "grad_norm": 8.847827911376953, + "learning_rate": 3.2746625129802694e-05, + "loss": 0.5948, + "step": 904 + }, + { + "epoch": 0.9310699588477366, + "grad_norm": 18.149011611938477, + "learning_rate": 3.278296988577362e-05, + "loss": 1.8623, + "step": 905 + }, + { + "epoch": 0.9320987654320988, + "grad_norm": 14.660048484802246, + "learning_rate": 3.281931464174454e-05, + "loss": 1.5336, + "step": 906 + }, + { + "epoch": 0.9331275720164609, + "grad_norm": 7.183665752410889, + "learning_rate": 3.285565939771547e-05, + "loss": 0.3394, + "step": 907 + }, + { + "epoch": 0.934156378600823, + "grad_norm": 2.1199798583984375, + "learning_rate": 3.28920041536864e-05, + "loss": 0.048, + "step": 908 + }, + { + "epoch": 0.9351851851851852, + "grad_norm": 11.716431617736816, + "learning_rate": 3.292834890965732e-05, + "loss": 1.326, + "step": 909 + }, + { + "epoch": 0.9362139917695473, + "grad_norm": 9.196380615234375, + "learning_rate": 3.296469366562824e-05, + "loss": 1.0024, + "step": 910 + }, + { + "epoch": 0.9372427983539094, + "grad_norm": 8.285309791564941, + "learning_rate": 3.3001038421599166e-05, + "loss": 0.5757, + "step": 911 + }, + { + "epoch": 0.9382716049382716, + "grad_norm": 13.583939552307129, + "learning_rate": 3.303738317757009e-05, + "loss": 1.3069, + "step": 912 + }, + { + "epoch": 0.9393004115226338, + "grad_norm": 8.123488426208496, + "learning_rate": 3.3073727933541014e-05, + "loss": 0.5979, + "step": 913 + }, + { + "epoch": 0.9403292181069959, + "grad_norm": 2.981621503829956, + "learning_rate": 3.311007268951194e-05, + "loss": 0.0798, + "step": 914 + }, + { + "epoch": 0.941358024691358, + "grad_norm": 9.019003868103027, + "learning_rate": 3.314641744548286e-05, + "loss": 0.4823, + "step": 915 + }, + { + "epoch": 0.9423868312757202, + "grad_norm": 6.8525261878967285, + "learning_rate": 3.318276220145379e-05, + "loss": 0.3875, + "step": 916 + }, + { + "epoch": 0.9434156378600823, + "grad_norm": 12.951005935668945, + "learning_rate": 3.321910695742471e-05, + "loss": 1.4076, + "step": 917 + }, + { + "epoch": 0.9444444444444444, + "grad_norm": 7.870869159698486, + "learning_rate": 3.325545171339564e-05, + "loss": 0.3932, + "step": 918 + }, + { + "epoch": 0.9454732510288066, + "grad_norm": 0.0, + "learning_rate": 3.329179646936656e-05, + "loss": 0.0, + "step": 919 + }, + { + "epoch": 0.9465020576131687, + "grad_norm": 14.922693252563477, + "learning_rate": 3.3328141225337485e-05, + "loss": 1.3184, + "step": 920 + }, + { + "epoch": 0.9465020576131687, + "eval_Qnli-dev_cosine_accuracy": 0.7109375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7943984866142273, + "eval_Qnli-dev_cosine_ap": 0.7615856123966769, + "eval_Qnli-dev_cosine_f1": 0.7084078711985689, + "eval_Qnli-dev_cosine_f1_threshold": 0.7604844570159912, + "eval_Qnli-dev_cosine_precision": 0.6130030959752322, + "eval_Qnli-dev_cosine_recall": 0.8389830508474576, + "eval_Qnli-dev_dot_accuracy": 0.671875, + "eval_Qnli-dev_dot_accuracy_threshold": 397.829833984375, + "eval_Qnli-dev_dot_ap": 0.704400855131843, + "eval_Qnli-dev_dot_f1": 0.6764227642276421, + "eval_Qnli-dev_dot_f1_threshold": 353.55364990234375, + "eval_Qnli-dev_dot_precision": 0.5488126649076517, + "eval_Qnli-dev_dot_recall": 0.8813559322033898, + "eval_Qnli-dev_euclidean_accuracy": 0.720703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.625345230102539, + "eval_Qnli-dev_euclidean_ap": 0.7640414687041563, + "eval_Qnli-dev_euclidean_f1": 0.7021276595744681, + "eval_Qnli-dev_euclidean_f1_threshold": 15.56657600402832, + "eval_Qnli-dev_euclidean_precision": 0.6036585365853658, + "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, + "eval_Qnli-dev_manhattan_accuracy": 0.71484375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 268.2149353027344, + "eval_Qnli-dev_manhattan_ap": 0.7651119124586969, + "eval_Qnli-dev_manhattan_f1": 0.701492537313433, + "eval_Qnli-dev_manhattan_f1_threshold": 313.13140869140625, + "eval_Qnli-dev_manhattan_precision": 0.6266666666666667, + "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, + "eval_Qnli-dev_max_accuracy": 0.720703125, + "eval_Qnli-dev_max_accuracy_threshold": 397.829833984375, + "eval_Qnli-dev_max_ap": 0.7651119124586969, + "eval_Qnli-dev_max_f1": 0.7084078711985689, + "eval_Qnli-dev_max_f1_threshold": 353.55364990234375, + "eval_Qnli-dev_max_precision": 0.6266666666666667, + "eval_Qnli-dev_max_recall": 0.8813559322033898, + "eval_allNLI-dev_cosine_accuracy": 0.734375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8742256164550781, + "eval_allNLI-dev_cosine_ap": 0.6047802392072363, + "eval_allNLI-dev_cosine_f1": 0.6266666666666667, + "eval_allNLI-dev_cosine_f1_threshold": 0.8091484308242798, + "eval_allNLI-dev_cosine_precision": 0.5090252707581228, + "eval_allNLI-dev_cosine_recall": 0.815028901734104, + "eval_allNLI-dev_dot_accuracy": 0.6875, + "eval_allNLI-dev_dot_accuracy_threshold": 447.2115478515625, + "eval_allNLI-dev_dot_ap": 0.5177728257758492, + "eval_allNLI-dev_dot_f1": 0.5864978902953586, + "eval_allNLI-dev_dot_f1_threshold": 374.89508056640625, + "eval_allNLI-dev_dot_precision": 0.46179401993355484, + "eval_allNLI-dev_dot_recall": 0.8034682080924855, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.74878978729248, + "eval_allNLI-dev_euclidean_ap": 0.6093991550091531, + "eval_allNLI-dev_euclidean_f1": 0.6281755196304851, + "eval_allNLI-dev_euclidean_f1_threshold": 13.34119987487793, + "eval_allNLI-dev_euclidean_precision": 0.5230769230769231, + "eval_allNLI-dev_euclidean_recall": 0.7861271676300579, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 218.954833984375, + "eval_allNLI-dev_manhattan_ap": 0.6071347019543764, + "eval_allNLI-dev_manhattan_f1": 0.6219512195121951, + "eval_allNLI-dev_manhattan_f1_threshold": 300.4981994628906, + "eval_allNLI-dev_manhattan_precision": 0.47962382445141066, + "eval_allNLI-dev_manhattan_recall": 0.884393063583815, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 447.2115478515625, + "eval_allNLI-dev_max_ap": 0.6093991550091531, + "eval_allNLI-dev_max_f1": 0.6281755196304851, + "eval_allNLI-dev_max_f1_threshold": 374.89508056640625, + "eval_allNLI-dev_max_precision": 0.5230769230769231, + "eval_allNLI-dev_max_recall": 0.884393063583815, + "eval_sequential_score": 0.7651119124586969, + "eval_sts-test_pearson_cosine": 0.810519241797423, + "eval_sts-test_pearson_dot": 0.7728322842621618, + "eval_sts-test_pearson_euclidean": 0.8405931437646742, + "eval_sts-test_pearson_manhattan": 0.8368109506847808, + "eval_sts-test_pearson_max": 0.8405931437646742, + "eval_sts-test_spearman_cosine": 0.8397552719490651, + "eval_sts-test_spearman_dot": 0.7535431751625915, + "eval_sts-test_spearman_euclidean": 0.8350731712259899, + "eval_sts-test_spearman_manhattan": 0.8328478700020412, + "eval_sts-test_spearman_max": 0.8397552719490651, + "eval_vitaminc-pairs_loss": 2.8251101970672607, + "eval_vitaminc-pairs_runtime": 3.2059, + "eval_vitaminc-pairs_samples_per_second": 39.927, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 920 + }, + { + "epoch": 0.9465020576131687, + "eval_negation-triplets_loss": 1.0231643915176392, + "eval_negation-triplets_runtime": 0.7426, + "eval_negation-triplets_samples_per_second": 172.378, + "eval_negation-triplets_steps_per_second": 1.347, + "step": 920 + }, + { + "epoch": 0.9465020576131687, + "eval_scitail-pairs-pos_loss": 0.1975163370370865, + "eval_scitail-pairs-pos_runtime": 0.8442, + "eval_scitail-pairs-pos_samples_per_second": 151.623, + "eval_scitail-pairs-pos_steps_per_second": 1.185, + "step": 920 + }, + { + "epoch": 0.9465020576131687, + "eval_scitail-pairs-qa_loss": 0.0008590650395490229, + "eval_scitail-pairs-qa_runtime": 0.5803, + "eval_scitail-pairs-qa_samples_per_second": 220.575, + "eval_scitail-pairs-qa_steps_per_second": 1.723, + "step": 920 + }, + { + "epoch": 0.9465020576131687, + "eval_xsum-pairs_loss": 0.602358877658844, + "eval_xsum-pairs_runtime": 3.0201, + "eval_xsum-pairs_samples_per_second": 42.382, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 920 + }, + { + "epoch": 0.9465020576131687, + "eval_sciq_pairs_loss": 0.10820051282644272, + "eval_sciq_pairs_runtime": 3.4586, + "eval_sciq_pairs_samples_per_second": 37.009, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 920 + }, + { + "epoch": 0.9465020576131687, + "eval_qasc_pairs_loss": 0.38339564204216003, + "eval_qasc_pairs_runtime": 0.6142, + "eval_qasc_pairs_samples_per_second": 208.397, + "eval_qasc_pairs_steps_per_second": 1.628, + "step": 920 + }, + { + "epoch": 0.9465020576131687, + "eval_openbookqa_pairs_loss": 1.1135048866271973, + "eval_openbookqa_pairs_runtime": 0.5908, + "eval_openbookqa_pairs_samples_per_second": 216.637, + "eval_openbookqa_pairs_steps_per_second": 1.692, + "step": 920 + }, + { + "epoch": 0.9465020576131687, + "eval_msmarco_pairs_loss": 1.1654598712921143, + "eval_msmarco_pairs_runtime": 1.519, + "eval_msmarco_pairs_samples_per_second": 84.267, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 920 + }, + { + "epoch": 0.9465020576131687, + "eval_nq_pairs_loss": 1.1468371152877808, + "eval_nq_pairs_runtime": 2.8907, + "eval_nq_pairs_samples_per_second": 44.279, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 920 + }, + { + "epoch": 0.9465020576131687, + "eval_trivia_pairs_loss": 1.069029688835144, + "eval_trivia_pairs_runtime": 3.4494, + "eval_trivia_pairs_samples_per_second": 37.107, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 920 + }, + { + "epoch": 0.9465020576131687, + "eval_gooaq_pairs_loss": 0.5463513731956482, + "eval_gooaq_pairs_runtime": 0.9528, + "eval_gooaq_pairs_samples_per_second": 134.336, + "eval_gooaq_pairs_steps_per_second": 1.049, + "step": 920 + }, + { + "epoch": 0.9465020576131687, + "eval_paws-pos_loss": 0.02908269874751568, + "eval_paws-pos_runtime": 0.6915, + "eval_paws-pos_samples_per_second": 185.109, + "eval_paws-pos_steps_per_second": 1.446, + "step": 920 + }, + { + "epoch": 0.9465020576131687, + "eval_global_dataset_loss": 0.548460066318512, + "eval_global_dataset_runtime": 13.4034, + "eval_global_dataset_samples_per_second": 31.037, + "eval_global_dataset_steps_per_second": 0.298, + "step": 920 + }, + { + "epoch": 0.9475308641975309, + "grad_norm": 11.01314640045166, + "learning_rate": 3.3364485981308406e-05, + "loss": 0.7138, + "step": 921 + }, + { + "epoch": 0.948559670781893, + "grad_norm": 11.776330947875977, + "learning_rate": 3.340083073727933e-05, + "loss": 1.3098, + "step": 922 + }, + { + "epoch": 0.9495884773662552, + "grad_norm": 8.179085731506348, + "learning_rate": 3.343717549325026e-05, + "loss": 0.4221, + "step": 923 + }, + { + "epoch": 0.9506172839506173, + "grad_norm": 9.425230979919434, + "learning_rate": 3.347352024922118e-05, + "loss": 0.6967, + "step": 924 + }, + { + "epoch": 0.9516460905349794, + "grad_norm": 15.566692352294922, + "learning_rate": 3.35098650051921e-05, + "loss": 1.5207, + "step": 925 + }, + { + "epoch": 0.9526748971193416, + "grad_norm": 9.330801010131836, + "learning_rate": 3.354620976116303e-05, + "loss": 0.5197, + "step": 926 + }, + { + "epoch": 0.9537037037037037, + "grad_norm": 3.745374917984009, + "learning_rate": 3.358255451713395e-05, + "loss": 0.0581, + "step": 927 + }, + { + "epoch": 0.9547325102880658, + "grad_norm": 8.182941436767578, + "learning_rate": 3.361889927310488e-05, + "loss": 0.4411, + "step": 928 + }, + { + "epoch": 0.9557613168724279, + "grad_norm": 8.651951789855957, + "learning_rate": 3.3655244029075805e-05, + "loss": 0.4771, + "step": 929 + }, + { + "epoch": 0.9567901234567902, + "grad_norm": 8.148431777954102, + "learning_rate": 3.3691588785046725e-05, + "loss": 0.366, + "step": 930 + }, + { + "epoch": 0.9578189300411523, + "grad_norm": 12.771159172058105, + "learning_rate": 3.3727933541017646e-05, + "loss": 1.1728, + "step": 931 + }, + { + "epoch": 0.9588477366255144, + "grad_norm": 7.892995834350586, + "learning_rate": 3.3764278296988573e-05, + "loss": 0.473, + "step": 932 + }, + { + "epoch": 0.9598765432098766, + "grad_norm": 14.070477485656738, + "learning_rate": 3.38006230529595e-05, + "loss": 1.1542, + "step": 933 + }, + { + "epoch": 0.9609053497942387, + "grad_norm": 12.673274993896484, + "learning_rate": 3.383696780893042e-05, + "loss": 1.3993, + "step": 934 + }, + { + "epoch": 0.9619341563786008, + "grad_norm": 1.1816976070404053, + "learning_rate": 3.387331256490135e-05, + "loss": 0.0468, + "step": 935 + }, + { + "epoch": 0.9629629629629629, + "grad_norm": 7.3651814460754395, + "learning_rate": 3.390965732087227e-05, + "loss": 0.4248, + "step": 936 + }, + { + "epoch": 0.9639917695473251, + "grad_norm": 6.860713958740234, + "learning_rate": 3.39460020768432e-05, + "loss": 0.3551, + "step": 937 + }, + { + "epoch": 0.9650205761316872, + "grad_norm": 14.23501968383789, + "learning_rate": 3.398234683281412e-05, + "loss": 1.3652, + "step": 938 + }, + { + "epoch": 0.9660493827160493, + "grad_norm": 20.718782424926758, + "learning_rate": 3.4018691588785045e-05, + "loss": 0.7506, + "step": 939 + }, + { + "epoch": 0.9670781893004116, + "grad_norm": 7.975811958312988, + "learning_rate": 3.4055036344755966e-05, + "loss": 0.3937, + "step": 940 + }, + { + "epoch": 0.9670781893004116, + "eval_Qnli-dev_cosine_accuracy": 0.70703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7996010184288025, + "eval_Qnli-dev_cosine_ap": 0.7552849329220187, + "eval_Qnli-dev_cosine_f1": 0.6943396226415095, + "eval_Qnli-dev_cosine_f1_threshold": 0.7739279270172119, + "eval_Qnli-dev_cosine_precision": 0.6258503401360545, + "eval_Qnli-dev_cosine_recall": 0.7796610169491526, + "eval_Qnli-dev_dot_accuracy": 0.65625, + "eval_Qnli-dev_dot_accuracy_threshold": 404.8746337890625, + "eval_Qnli-dev_dot_ap": 0.6817556270978887, + "eval_Qnli-dev_dot_f1": 0.6753670473083198, + "eval_Qnli-dev_dot_f1_threshold": 349.9105224609375, + "eval_Qnli-dev_dot_precision": 0.5490716180371353, + "eval_Qnli-dev_dot_recall": 0.8771186440677966, + "eval_Qnli-dev_euclidean_accuracy": 0.716796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.779217720031738, + "eval_Qnli-dev_euclidean_ap": 0.7613978748972194, + "eval_Qnli-dev_euclidean_f1": 0.6984732824427481, + "eval_Qnli-dev_euclidean_f1_threshold": 14.844427108764648, + "eval_Qnli-dev_euclidean_precision": 0.6354166666666666, + "eval_Qnli-dev_euclidean_recall": 0.7754237288135594, + "eval_Qnli-dev_manhattan_accuracy": 0.71484375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 279.33502197265625, + "eval_Qnli-dev_manhattan_ap": 0.7648285226426834, + "eval_Qnli-dev_manhattan_f1": 0.6929982046678635, + "eval_Qnli-dev_manhattan_f1_threshold": 318.5891418457031, + "eval_Qnli-dev_manhattan_precision": 0.6012461059190031, + "eval_Qnli-dev_manhattan_recall": 0.8177966101694916, + "eval_Qnli-dev_max_accuracy": 0.716796875, + "eval_Qnli-dev_max_accuracy_threshold": 404.8746337890625, + "eval_Qnli-dev_max_ap": 0.7648285226426834, + "eval_Qnli-dev_max_f1": 0.6984732824427481, + "eval_Qnli-dev_max_f1_threshold": 349.9105224609375, + "eval_Qnli-dev_max_precision": 0.6354166666666666, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8795170187950134, + "eval_allNLI-dev_cosine_ap": 0.6090048928829667, + "eval_allNLI-dev_cosine_f1": 0.6204081632653061, + "eval_allNLI-dev_cosine_f1_threshold": 0.7640015482902527, + "eval_allNLI-dev_cosine_precision": 0.4794952681388013, + "eval_allNLI-dev_cosine_recall": 0.8786127167630058, + "eval_allNLI-dev_dot_accuracy": 0.6953125, + "eval_allNLI-dev_dot_accuracy_threshold": 420.4771728515625, + "eval_allNLI-dev_dot_ap": 0.5376162680949538, + "eval_allNLI-dev_dot_f1": 0.6000000000000001, + "eval_allNLI-dev_dot_f1_threshold": 349.6429748535156, + "eval_allNLI-dev_dot_precision": 0.45871559633027525, + "eval_allNLI-dev_dot_recall": 0.8670520231213873, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.426948547363281, + "eval_allNLI-dev_euclidean_ap": 0.6118636764768349, + "eval_allNLI-dev_euclidean_f1": 0.6172839506172839, + "eval_allNLI-dev_euclidean_f1_threshold": 14.738828659057617, + "eval_allNLI-dev_euclidean_precision": 0.4792332268370607, + "eval_allNLI-dev_euclidean_recall": 0.8670520231213873, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 222.07568359375, + "eval_allNLI-dev_manhattan_ap": 0.6057381363735815, + "eval_allNLI-dev_manhattan_f1": 0.6147704590818364, + "eval_allNLI-dev_manhattan_f1_threshold": 310.0055847167969, + "eval_allNLI-dev_manhattan_precision": 0.4695121951219512, + "eval_allNLI-dev_manhattan_recall": 0.8901734104046243, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 420.4771728515625, + "eval_allNLI-dev_max_ap": 0.6118636764768349, + "eval_allNLI-dev_max_f1": 0.6204081632653061, + "eval_allNLI-dev_max_f1_threshold": 349.6429748535156, + "eval_allNLI-dev_max_precision": 0.4794952681388013, + "eval_allNLI-dev_max_recall": 0.8901734104046243, + "eval_sequential_score": 0.7648285226426834, + "eval_sts-test_pearson_cosine": 0.8105857732868115, + "eval_sts-test_pearson_dot": 0.7844085069475198, + "eval_sts-test_pearson_euclidean": 0.8365544588951073, + "eval_sts-test_pearson_manhattan": 0.8307666084036771, + "eval_sts-test_pearson_max": 0.8365544588951073, + "eval_sts-test_spearman_cosine": 0.8347892244724613, + "eval_sts-test_spearman_dot": 0.7719863335147834, + "eval_sts-test_spearman_euclidean": 0.8302817431713355, + "eval_sts-test_spearman_manhattan": 0.8254758157079903, + "eval_sts-test_spearman_max": 0.8347892244724613, + "eval_vitaminc-pairs_loss": 2.983093500137329, + "eval_vitaminc-pairs_runtime": 3.1808, + "eval_vitaminc-pairs_samples_per_second": 40.242, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 940 + }, + { + "epoch": 0.9670781893004116, + "eval_negation-triplets_loss": 1.0402394533157349, + "eval_negation-triplets_runtime": 0.7383, + "eval_negation-triplets_samples_per_second": 173.37, + "eval_negation-triplets_steps_per_second": 1.354, + "step": 940 + }, + { + "epoch": 0.9670781893004116, + "eval_scitail-pairs-pos_loss": 0.18768535554409027, + "eval_scitail-pairs-pos_runtime": 0.8276, + "eval_scitail-pairs-pos_samples_per_second": 154.661, + "eval_scitail-pairs-pos_steps_per_second": 1.208, + "step": 940 + }, + { + "epoch": 0.9670781893004116, + "eval_scitail-pairs-qa_loss": 0.0010753768729045987, + "eval_scitail-pairs-qa_runtime": 0.5786, + "eval_scitail-pairs-qa_samples_per_second": 221.212, + "eval_scitail-pairs-qa_steps_per_second": 1.728, + "step": 940 + }, + { + "epoch": 0.9670781893004116, + "eval_xsum-pairs_loss": 0.5536904335021973, + "eval_xsum-pairs_runtime": 3.0201, + "eval_xsum-pairs_samples_per_second": 42.383, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 940 + }, + { + "epoch": 0.9670781893004116, + "eval_sciq_pairs_loss": 0.1257360428571701, + "eval_sciq_pairs_runtime": 3.4449, + "eval_sciq_pairs_samples_per_second": 37.157, + "eval_sciq_pairs_steps_per_second": 0.29, + "step": 940 + }, + { + "epoch": 0.9670781893004116, + "eval_qasc_pairs_loss": 0.3721018135547638, + "eval_qasc_pairs_runtime": 0.606, + "eval_qasc_pairs_samples_per_second": 211.232, + "eval_qasc_pairs_steps_per_second": 1.65, + "step": 940 + }, + { + "epoch": 0.9670781893004116, + "eval_openbookqa_pairs_loss": 1.0556271076202393, + "eval_openbookqa_pairs_runtime": 0.578, + "eval_openbookqa_pairs_samples_per_second": 221.472, + "eval_openbookqa_pairs_steps_per_second": 1.73, + "step": 940 + }, + { + "epoch": 0.9670781893004116, + "eval_msmarco_pairs_loss": 1.051499366760254, + "eval_msmarco_pairs_runtime": 1.5165, + "eval_msmarco_pairs_samples_per_second": 84.405, + "eval_msmarco_pairs_steps_per_second": 0.659, + "step": 940 + }, + { + "epoch": 0.9670781893004116, + "eval_nq_pairs_loss": 1.165411114692688, + "eval_nq_pairs_runtime": 2.8962, + "eval_nq_pairs_samples_per_second": 44.196, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 940 + }, + { + "epoch": 0.9670781893004116, + "eval_trivia_pairs_loss": 1.085224986076355, + "eval_trivia_pairs_runtime": 3.4302, + "eval_trivia_pairs_samples_per_second": 37.316, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 940 + }, + { + "epoch": 0.9670781893004116, + "eval_gooaq_pairs_loss": 0.5926018953323364, + "eval_gooaq_pairs_runtime": 0.9428, + "eval_gooaq_pairs_samples_per_second": 135.77, + "eval_gooaq_pairs_steps_per_second": 1.061, + "step": 940 + }, + { + "epoch": 0.9670781893004116, + "eval_paws-pos_loss": 0.027635158970952034, + "eval_paws-pos_runtime": 0.691, + "eval_paws-pos_samples_per_second": 185.233, + "eval_paws-pos_steps_per_second": 1.447, + "step": 940 + }, + { + "epoch": 0.9670781893004116, + "eval_global_dataset_loss": 0.5423777103424072, + "eval_global_dataset_runtime": 13.3745, + "eval_global_dataset_samples_per_second": 31.104, + "eval_global_dataset_steps_per_second": 0.299, + "step": 940 + }, + { + "epoch": 0.9681069958847737, + "grad_norm": 11.614127159118652, + "learning_rate": 3.409138110072689e-05, + "loss": 0.8143, + "step": 941 + }, + { + "epoch": 0.9691358024691358, + "grad_norm": 13.92485523223877, + "learning_rate": 3.4127725856697814e-05, + "loss": 1.2339, + "step": 942 + }, + { + "epoch": 0.970164609053498, + "grad_norm": 12.26136302947998, + "learning_rate": 3.416407061266874e-05, + "loss": 0.9252, + "step": 943 + }, + { + "epoch": 0.9711934156378601, + "grad_norm": 7.102560997009277, + "learning_rate": 3.420041536863967e-05, + "loss": 0.3292, + "step": 944 + }, + { + "epoch": 0.9722222222222222, + "grad_norm": 19.000288009643555, + "learning_rate": 3.423676012461059e-05, + "loss": 2.3715, + "step": 945 + }, + { + "epoch": 0.9732510288065843, + "grad_norm": 11.65499496459961, + "learning_rate": 3.427310488058151e-05, + "loss": 1.2257, + "step": 946 + }, + { + "epoch": 0.9742798353909465, + "grad_norm": 8.42990779876709, + "learning_rate": 3.430944963655244e-05, + "loss": 0.6175, + "step": 947 + }, + { + "epoch": 0.9753086419753086, + "grad_norm": 16.443498611450195, + "learning_rate": 3.4345794392523365e-05, + "loss": 1.6621, + "step": 948 + }, + { + "epoch": 0.9763374485596708, + "grad_norm": 7.4084696769714355, + "learning_rate": 3.4382139148494285e-05, + "loss": 0.428, + "step": 949 + }, + { + "epoch": 0.977366255144033, + "grad_norm": 15.619688034057617, + "learning_rate": 3.441848390446521e-05, + "loss": 1.4729, + "step": 950 + }, + { + "epoch": 0.9783950617283951, + "grad_norm": 11.312886238098145, + "learning_rate": 3.445482866043613e-05, + "loss": 1.3042, + "step": 951 + }, + { + "epoch": 0.9794238683127572, + "grad_norm": 7.754435062408447, + "learning_rate": 3.4491173416407054e-05, + "loss": 0.6735, + "step": 952 + }, + { + "epoch": 0.9804526748971193, + "grad_norm": 1.0208377838134766, + "learning_rate": 3.452751817237798e-05, + "loss": 0.0191, + "step": 953 + }, + { + "epoch": 0.9814814814814815, + "grad_norm": 10.611540794372559, + "learning_rate": 3.456386292834891e-05, + "loss": 0.857, + "step": 954 + }, + { + "epoch": 0.9825102880658436, + "grad_norm": 10.76138687133789, + "learning_rate": 3.460020768431983e-05, + "loss": 0.2335, + "step": 955 + }, + { + "epoch": 0.9835390946502057, + "grad_norm": 8.067065238952637, + "learning_rate": 3.463655244029076e-05, + "loss": 0.5272, + "step": 956 + }, + { + "epoch": 0.9845679012345679, + "grad_norm": 9.368846893310547, + "learning_rate": 3.467289719626168e-05, + "loss": 0.721, + "step": 957 + }, + { + "epoch": 0.98559670781893, + "grad_norm": 17.550647735595703, + "learning_rate": 3.4709241952232605e-05, + "loss": 0.4356, + "step": 958 + }, + { + "epoch": 0.9866255144032922, + "grad_norm": 8.449636459350586, + "learning_rate": 3.474558670820353e-05, + "loss": 0.5033, + "step": 959 + }, + { + "epoch": 0.9876543209876543, + "grad_norm": 12.751513481140137, + "learning_rate": 3.478193146417445e-05, + "loss": 0.6354, + "step": 960 + }, + { + "epoch": 0.9876543209876543, + "eval_Qnli-dev_cosine_accuracy": 0.7109375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8011420369148254, + "eval_Qnli-dev_cosine_ap": 0.7562970990202866, + "eval_Qnli-dev_cosine_f1": 0.6953125, + "eval_Qnli-dev_cosine_f1_threshold": 0.7688385248184204, + "eval_Qnli-dev_cosine_precision": 0.644927536231884, + "eval_Qnli-dev_cosine_recall": 0.7542372881355932, + "eval_Qnli-dev_dot_accuracy": 0.677734375, + "eval_Qnli-dev_dot_accuracy_threshold": 373.8548889160156, + "eval_Qnli-dev_dot_ap": 0.707742039511076, + "eval_Qnli-dev_dot_f1": 0.683111954459203, + "eval_Qnli-dev_dot_f1_threshold": 362.323974609375, + "eval_Qnli-dev_dot_precision": 0.6185567010309279, + "eval_Qnli-dev_dot_recall": 0.7627118644067796, + "eval_Qnli-dev_euclidean_accuracy": 0.712890625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.823720932006836, + "eval_Qnli-dev_euclidean_ap": 0.7579671387457134, + "eval_Qnli-dev_euclidean_f1": 0.7015503875968992, + "eval_Qnli-dev_euclidean_f1_threshold": 14.87757396697998, + "eval_Qnli-dev_euclidean_precision": 0.6464285714285715, + "eval_Qnli-dev_euclidean_recall": 0.7669491525423728, + "eval_Qnli-dev_manhattan_accuracy": 0.712890625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 285.1418762207031, + "eval_Qnli-dev_manhattan_ap": 0.7610519271212217, + "eval_Qnli-dev_manhattan_f1": 0.6953271028037383, + "eval_Qnli-dev_manhattan_f1_threshold": 316.33892822265625, + "eval_Qnli-dev_manhattan_precision": 0.6220735785953178, + "eval_Qnli-dev_manhattan_recall": 0.788135593220339, + "eval_Qnli-dev_max_accuracy": 0.712890625, + "eval_Qnli-dev_max_accuracy_threshold": 373.8548889160156, + "eval_Qnli-dev_max_ap": 0.7610519271212217, + "eval_Qnli-dev_max_f1": 0.7015503875968992, + "eval_Qnli-dev_max_f1_threshold": 362.323974609375, + "eval_Qnli-dev_max_precision": 0.6464285714285715, + "eval_Qnli-dev_max_recall": 0.788135593220339, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8651937246322632, + "eval_allNLI-dev_cosine_ap": 0.6113314809700446, + "eval_allNLI-dev_cosine_f1": 0.6244541484716157, + "eval_allNLI-dev_cosine_f1_threshold": 0.7694165110588074, + "eval_allNLI-dev_cosine_precision": 0.5017543859649123, + "eval_allNLI-dev_cosine_recall": 0.8265895953757225, + "eval_allNLI-dev_dot_accuracy": 0.69140625, + "eval_allNLI-dev_dot_accuracy_threshold": 429.3245849609375, + "eval_allNLI-dev_dot_ap": 0.5481674603400284, + "eval_allNLI-dev_dot_f1": 0.5954825462012321, + "eval_allNLI-dev_dot_f1_threshold": 342.2791748046875, + "eval_allNLI-dev_dot_precision": 0.46178343949044587, + "eval_allNLI-dev_dot_recall": 0.838150289017341, + "eval_allNLI-dev_euclidean_accuracy": 0.744140625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.662067413330078, + "eval_allNLI-dev_euclidean_ap": 0.6159942916219311, + "eval_allNLI-dev_euclidean_f1": 0.6288416075650118, + "eval_allNLI-dev_euclidean_f1_threshold": 13.927071571350098, + "eval_allNLI-dev_euclidean_precision": 0.532, + "eval_allNLI-dev_euclidean_recall": 0.7687861271676301, + "eval_allNLI-dev_manhattan_accuracy": 0.736328125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 243.21441650390625, + "eval_allNLI-dev_manhattan_ap": 0.6112452850944511, + "eval_allNLI-dev_manhattan_f1": 0.6244131455399061, + "eval_allNLI-dev_manhattan_f1_threshold": 291.2453308105469, + "eval_allNLI-dev_manhattan_precision": 0.525691699604743, + "eval_allNLI-dev_manhattan_recall": 0.7687861271676301, + "eval_allNLI-dev_max_accuracy": 0.744140625, + "eval_allNLI-dev_max_accuracy_threshold": 429.3245849609375, + "eval_allNLI-dev_max_ap": 0.6159942916219311, + "eval_allNLI-dev_max_f1": 0.6288416075650118, + "eval_allNLI-dev_max_f1_threshold": 342.2791748046875, + "eval_allNLI-dev_max_precision": 0.532, + "eval_allNLI-dev_max_recall": 0.838150289017341, + "eval_sequential_score": 0.7610519271212217, + "eval_sts-test_pearson_cosine": 0.8206020544252598, + "eval_sts-test_pearson_dot": 0.7998596455743565, + "eval_sts-test_pearson_euclidean": 0.8493475930153472, + "eval_sts-test_pearson_manhattan": 0.8452749220783444, + "eval_sts-test_pearson_max": 0.8493475930153472, + "eval_sts-test_spearman_cosine": 0.8453138708777522, + "eval_sts-test_spearman_dot": 0.7850806868516911, + "eval_sts-test_spearman_euclidean": 0.842515932513039, + "eval_sts-test_spearman_manhattan": 0.8393217217287051, + "eval_sts-test_spearman_max": 0.8453138708777522, + "eval_vitaminc-pairs_loss": 3.093803644180298, + "eval_vitaminc-pairs_runtime": 3.2017, + "eval_vitaminc-pairs_samples_per_second": 39.979, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 960 + }, + { + "epoch": 0.9876543209876543, + "eval_negation-triplets_loss": 1.049690842628479, + "eval_negation-triplets_runtime": 0.7405, + "eval_negation-triplets_samples_per_second": 172.846, + "eval_negation-triplets_steps_per_second": 1.35, + "step": 960 + }, + { + "epoch": 0.9876543209876543, + "eval_scitail-pairs-pos_loss": 0.22198575735092163, + "eval_scitail-pairs-pos_runtime": 0.874, + "eval_scitail-pairs-pos_samples_per_second": 146.452, + "eval_scitail-pairs-pos_steps_per_second": 1.144, + "step": 960 + }, + { + "epoch": 0.9876543209876543, + "eval_scitail-pairs-qa_loss": 0.0007531806477345526, + "eval_scitail-pairs-qa_runtime": 0.604, + "eval_scitail-pairs-qa_samples_per_second": 211.93, + "eval_scitail-pairs-qa_steps_per_second": 1.656, + "step": 960 + }, + { + "epoch": 0.9876543209876543, + "eval_xsum-pairs_loss": 0.545045793056488, + "eval_xsum-pairs_runtime": 3.0289, + "eval_xsum-pairs_samples_per_second": 42.259, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 960 + }, + { + "epoch": 0.9876543209876543, + "eval_sciq_pairs_loss": 0.12160878628492355, + "eval_sciq_pairs_runtime": 3.4833, + "eval_sciq_pairs_samples_per_second": 36.746, + "eval_sciq_pairs_steps_per_second": 0.287, + "step": 960 + }, + { + "epoch": 0.9876543209876543, + "eval_qasc_pairs_loss": 0.3162378668785095, + "eval_qasc_pairs_runtime": 0.6116, + "eval_qasc_pairs_samples_per_second": 209.281, + "eval_qasc_pairs_steps_per_second": 1.635, + "step": 960 + }, + { + "epoch": 0.9876543209876543, + "eval_openbookqa_pairs_loss": 0.9658156633377075, + "eval_openbookqa_pairs_runtime": 0.5939, + "eval_openbookqa_pairs_samples_per_second": 215.528, + "eval_openbookqa_pairs_steps_per_second": 1.684, + "step": 960 + }, + { + "epoch": 0.9876543209876543, + "eval_msmarco_pairs_loss": 1.1362426280975342, + "eval_msmarco_pairs_runtime": 1.5207, + "eval_msmarco_pairs_samples_per_second": 84.172, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 960 + }, + { + "epoch": 0.9876543209876543, + "eval_nq_pairs_loss": 1.1873204708099365, + "eval_nq_pairs_runtime": 2.9063, + "eval_nq_pairs_samples_per_second": 44.043, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 960 + }, + { + "epoch": 0.9876543209876543, + "eval_trivia_pairs_loss": 1.1470587253570557, + "eval_trivia_pairs_runtime": 3.4649, + "eval_trivia_pairs_samples_per_second": 36.942, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 960 + }, + { + "epoch": 0.9876543209876543, + "eval_gooaq_pairs_loss": 0.5810187458992004, + "eval_gooaq_pairs_runtime": 0.9593, + "eval_gooaq_pairs_samples_per_second": 133.431, + "eval_gooaq_pairs_steps_per_second": 1.042, + "step": 960 + }, + { + "epoch": 0.9876543209876543, + "eval_paws-pos_loss": 0.0258675217628479, + "eval_paws-pos_runtime": 0.6941, + "eval_paws-pos_samples_per_second": 184.413, + "eval_paws-pos_steps_per_second": 1.441, + "step": 960 + }, + { + "epoch": 0.9876543209876543, + "eval_global_dataset_loss": 0.551192581653595, + "eval_global_dataset_runtime": 13.394, + "eval_global_dataset_samples_per_second": 31.059, + "eval_global_dataset_steps_per_second": 0.299, + "step": 960 + }, + { + "epoch": 0.9886831275720165, + "grad_norm": 8.129959106445312, + "learning_rate": 3.4818276220145373e-05, + "loss": 0.3919, + "step": 961 + }, + { + "epoch": 0.9897119341563786, + "grad_norm": 11.156733512878418, + "learning_rate": 3.48546209761163e-05, + "loss": 0.6961, + "step": 962 + }, + { + "epoch": 0.9907407407407407, + "grad_norm": 17.619508743286133, + "learning_rate": 3.489096573208722e-05, + "loss": 0.9385, + "step": 963 + }, + { + "epoch": 0.9917695473251029, + "grad_norm": 13.581380844116211, + "learning_rate": 3.492731048805815e-05, + "loss": 0.2489, + "step": 964 + }, + { + "epoch": 0.992798353909465, + "grad_norm": 13.018972396850586, + "learning_rate": 3.4963655244029076e-05, + "loss": 1.0582, + "step": 965 + }, + { + "epoch": 0.9938271604938271, + "grad_norm": 1.7356064319610596, + "learning_rate": 3.5e-05, + "loss": 0.1094, + "step": 966 + }, + { + "epoch": 0.9948559670781894, + "grad_norm": 16.483444213867188, + "learning_rate": 3.49999903396699e-05, + "loss": 0.4915, + "step": 967 + }, + { + "epoch": 0.9958847736625515, + "grad_norm": 13.589509963989258, + "learning_rate": 3.4999961358695594e-05, + "loss": 1.1361, + "step": 968 + }, + { + "epoch": 0.9969135802469136, + "grad_norm": 14.672646522521973, + "learning_rate": 3.499991305712508e-05, + "loss": 0.6884, + "step": 969 + }, + { + "epoch": 0.9979423868312757, + "grad_norm": 2.262558698654175, + "learning_rate": 3.499984543503835e-05, + "loss": 0.0148, + "step": 970 + }, + { + "epoch": 0.9989711934156379, + "grad_norm": 23.297290802001953, + "learning_rate": 3.499975849254739e-05, + "loss": 1.0498, + "step": 971 + }, + { + "epoch": 1.0, + "grad_norm": 19.72472381591797, + "learning_rate": 3.499965222979617e-05, + "loss": 0.6437, + "step": 972 + }, + { + "epoch": 1.0010288065843622, + "grad_norm": 5.863245964050293, + "learning_rate": 3.499952664696068e-05, + "loss": 0.2412, + "step": 973 + }, + { + "epoch": 1.0020576131687242, + "grad_norm": 9.423555374145508, + "learning_rate": 3.499938174424889e-05, + "loss": 0.5081, + "step": 974 + }, + { + "epoch": 1.0030864197530864, + "grad_norm": 11.968791007995605, + "learning_rate": 3.499921752190077e-05, + "loss": 1.2637, + "step": 975 + }, + { + "epoch": 1.0041152263374487, + "grad_norm": 8.803627014160156, + "learning_rate": 3.4999033980188266e-05, + "loss": 0.5428, + "step": 976 + }, + { + "epoch": 1.0051440329218106, + "grad_norm": 11.566201210021973, + "learning_rate": 3.4998831119415355e-05, + "loss": 1.1313, + "step": 977 + }, + { + "epoch": 1.0061728395061729, + "grad_norm": 9.46624755859375, + "learning_rate": 3.499860893991797e-05, + "loss": 0.6193, + "step": 978 + }, + { + "epoch": 1.007201646090535, + "grad_norm": 8.126818656921387, + "learning_rate": 3.499836744206405e-05, + "loss": 0.5096, + "step": 979 + }, + { + "epoch": 1.008230452674897, + "grad_norm": 7.624522686004639, + "learning_rate": 3.4998106626253555e-05, + "loss": 0.4309, + "step": 980 + }, + { + "epoch": 1.008230452674897, + "eval_Qnli-dev_cosine_accuracy": 0.71484375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8019129633903503, + "eval_Qnli-dev_cosine_ap": 0.7610205901818679, + "eval_Qnli-dev_cosine_f1": 0.7036395147313692, + "eval_Qnli-dev_cosine_f1_threshold": 0.7520031929016113, + "eval_Qnli-dev_cosine_precision": 0.5953079178885631, + "eval_Qnli-dev_cosine_recall": 0.8601694915254238, + "eval_Qnli-dev_dot_accuracy": 0.6640625, + "eval_Qnli-dev_dot_accuracy_threshold": 426.893310546875, + "eval_Qnli-dev_dot_ap": 0.6947234515128509, + "eval_Qnli-dev_dot_f1": 0.6785714285714286, + "eval_Qnli-dev_dot_f1_threshold": 373.33868408203125, + "eval_Qnli-dev_dot_precision": 0.5864197530864198, + "eval_Qnli-dev_dot_recall": 0.8050847457627118, + "eval_Qnli-dev_euclidean_accuracy": 0.71484375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.8776273727417, + "eval_Qnli-dev_euclidean_ap": 0.7647258098874301, + "eval_Qnli-dev_euclidean_f1": 0.7109515260323159, + "eval_Qnli-dev_euclidean_f1_threshold": 15.282470703125, + "eval_Qnli-dev_euclidean_precision": 0.616822429906542, + "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, + "eval_Qnli-dev_manhattan_accuracy": 0.716796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 271.2890319824219, + "eval_Qnli-dev_manhattan_ap": 0.7681486389198227, + "eval_Qnli-dev_manhattan_f1": 0.7112676056338029, + "eval_Qnli-dev_manhattan_f1_threshold": 320.7895202636719, + "eval_Qnli-dev_manhattan_precision": 0.608433734939759, + "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, + "eval_Qnli-dev_max_accuracy": 0.716796875, + "eval_Qnli-dev_max_accuracy_threshold": 426.893310546875, + "eval_Qnli-dev_max_ap": 0.7681486389198227, + "eval_Qnli-dev_max_f1": 0.7112676056338029, + "eval_Qnli-dev_max_f1_threshold": 373.33868408203125, + "eval_Qnli-dev_max_precision": 0.616822429906542, + "eval_Qnli-dev_max_recall": 0.8601694915254238, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8773996233940125, + "eval_allNLI-dev_cosine_ap": 0.6069091134704014, + "eval_allNLI-dev_cosine_f1": 0.6197802197802197, + "eval_allNLI-dev_cosine_f1_threshold": 0.7960855960845947, + "eval_allNLI-dev_cosine_precision": 0.5, + "eval_allNLI-dev_cosine_recall": 0.815028901734104, + "eval_allNLI-dev_dot_accuracy": 0.69140625, + "eval_allNLI-dev_dot_accuracy_threshold": 444.69110107421875, + "eval_allNLI-dev_dot_ap": 0.5273880047233465, + "eval_allNLI-dev_dot_f1": 0.5879828326180258, + "eval_allNLI-dev_dot_f1_threshold": 377.0397644042969, + "eval_allNLI-dev_dot_precision": 0.46757679180887374, + "eval_allNLI-dev_dot_recall": 0.791907514450867, + "eval_allNLI-dev_euclidean_accuracy": 0.73046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.407779693603516, + "eval_allNLI-dev_euclidean_ap": 0.6097577491057798, + "eval_allNLI-dev_euclidean_f1": 0.6265060240963856, + "eval_allNLI-dev_euclidean_f1_threshold": 13.258623123168945, + "eval_allNLI-dev_euclidean_precision": 0.5371900826446281, + "eval_allNLI-dev_euclidean_recall": 0.7514450867052023, + "eval_allNLI-dev_manhattan_accuracy": 0.728515625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 233.98886108398438, + "eval_allNLI-dev_manhattan_ap": 0.6053119147310726, + "eval_allNLI-dev_manhattan_f1": 0.6284403669724771, + "eval_allNLI-dev_manhattan_f1_threshold": 283.61029052734375, + "eval_allNLI-dev_manhattan_precision": 0.5209125475285171, + "eval_allNLI-dev_manhattan_recall": 0.791907514450867, + "eval_allNLI-dev_max_accuracy": 0.73046875, + "eval_allNLI-dev_max_accuracy_threshold": 444.69110107421875, + "eval_allNLI-dev_max_ap": 0.6097577491057798, + "eval_allNLI-dev_max_f1": 0.6284403669724771, + "eval_allNLI-dev_max_f1_threshold": 377.0397644042969, + "eval_allNLI-dev_max_precision": 0.5371900826446281, + "eval_allNLI-dev_max_recall": 0.815028901734104, + "eval_sequential_score": 0.7681486389198227, + "eval_sts-test_pearson_cosine": 0.8029398971858455, + "eval_sts-test_pearson_dot": 0.7674963822814747, + "eval_sts-test_pearson_euclidean": 0.8352948242845171, + "eval_sts-test_pearson_manhattan": 0.8333892654611063, + "eval_sts-test_pearson_max": 0.8352948242845171, + "eval_sts-test_spearman_cosine": 0.8309187773772422, + "eval_sts-test_spearman_dot": 0.7509477362556745, + "eval_sts-test_spearman_euclidean": 0.8291958090624117, + "eval_sts-test_spearman_manhattan": 0.8273570692343619, + "eval_sts-test_spearman_max": 0.8309187773772422, + "eval_vitaminc-pairs_loss": 2.8231990337371826, + "eval_vitaminc-pairs_runtime": 3.1885, + "eval_vitaminc-pairs_samples_per_second": 40.144, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 980 + }, + { + "epoch": 1.008230452674897, + "eval_negation-triplets_loss": 1.0276439189910889, + "eval_negation-triplets_runtime": 0.738, + "eval_negation-triplets_samples_per_second": 173.431, + "eval_negation-triplets_steps_per_second": 1.355, + "step": 980 + }, + { + "epoch": 1.008230452674897, + "eval_scitail-pairs-pos_loss": 0.22538259625434875, + "eval_scitail-pairs-pos_runtime": 0.882, + "eval_scitail-pairs-pos_samples_per_second": 145.129, + "eval_scitail-pairs-pos_steps_per_second": 1.134, + "step": 980 + }, + { + "epoch": 1.008230452674897, + "eval_scitail-pairs-qa_loss": 0.0011585784377530217, + "eval_scitail-pairs-qa_runtime": 0.587, + "eval_scitail-pairs-qa_samples_per_second": 218.049, + "eval_scitail-pairs-qa_steps_per_second": 1.704, + "step": 980 + }, + { + "epoch": 1.008230452674897, + "eval_xsum-pairs_loss": 0.5889097452163696, + "eval_xsum-pairs_runtime": 3.0279, + "eval_xsum-pairs_samples_per_second": 42.274, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 980 + }, + { + "epoch": 1.008230452674897, + "eval_sciq_pairs_loss": 0.11867424100637436, + "eval_sciq_pairs_runtime": 3.5072, + "eval_sciq_pairs_samples_per_second": 36.496, + "eval_sciq_pairs_steps_per_second": 0.285, + "step": 980 + }, + { + "epoch": 1.008230452674897, + "eval_qasc_pairs_loss": 0.27829957008361816, + "eval_qasc_pairs_runtime": 0.6017, + "eval_qasc_pairs_samples_per_second": 212.72, + "eval_qasc_pairs_steps_per_second": 1.662, + "step": 980 + }, + { + "epoch": 1.008230452674897, + "eval_openbookqa_pairs_loss": 0.970773458480835, + "eval_openbookqa_pairs_runtime": 0.6015, + "eval_openbookqa_pairs_samples_per_second": 212.804, + "eval_openbookqa_pairs_steps_per_second": 1.663, + "step": 980 + }, + { + "epoch": 1.008230452674897, + "eval_msmarco_pairs_loss": 1.1431875228881836, + "eval_msmarco_pairs_runtime": 1.5207, + "eval_msmarco_pairs_samples_per_second": 84.17, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 980 + }, + { + "epoch": 1.008230452674897, + "eval_nq_pairs_loss": 1.1105154752731323, + "eval_nq_pairs_runtime": 2.893, + "eval_nq_pairs_samples_per_second": 44.244, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 980 + }, + { + "epoch": 1.008230452674897, + "eval_trivia_pairs_loss": 1.1272141933441162, + "eval_trivia_pairs_runtime": 3.4339, + "eval_trivia_pairs_samples_per_second": 37.276, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 980 + }, + { + "epoch": 1.008230452674897, + "eval_gooaq_pairs_loss": 0.5513377785682678, + "eval_gooaq_pairs_runtime": 0.9521, + "eval_gooaq_pairs_samples_per_second": 134.441, + "eval_gooaq_pairs_steps_per_second": 1.05, + "step": 980 + }, + { + "epoch": 1.008230452674897, + "eval_paws-pos_loss": 0.025796374306082726, + "eval_paws-pos_runtime": 0.7136, + "eval_paws-pos_samples_per_second": 179.366, + "eval_paws-pos_steps_per_second": 1.401, + "step": 980 + }, + { + "epoch": 1.008230452674897, + "eval_global_dataset_loss": 0.5248242616653442, + "eval_global_dataset_runtime": 13.4139, + "eval_global_dataset_samples_per_second": 31.013, + "eval_global_dataset_steps_per_second": 0.298, + "step": 980 + }, + { + "epoch": 1.0092592592592593, + "grad_norm": 10.596141815185547, + "learning_rate": 3.499782649291838e-05, + "loss": 0.8267, + "step": 981 + }, + { + "epoch": 1.0102880658436213, + "grad_norm": 9.392977714538574, + "learning_rate": 3.499752704252246e-05, + "loss": 0.6614, + "step": 982 + }, + { + "epoch": 1.0113168724279835, + "grad_norm": 7.637691974639893, + "learning_rate": 3.499720827556169e-05, + "loss": 0.3601, + "step": 983 + }, + { + "epoch": 1.0123456790123457, + "grad_norm": 6.8640947341918945, + "learning_rate": 3.4996870192563984e-05, + "loss": 0.33, + "step": 984 + }, + { + "epoch": 1.0133744855967077, + "grad_norm": 8.076541900634766, + "learning_rate": 3.499651279408921e-05, + "loss": 0.3986, + "step": 985 + }, + { + "epoch": 1.01440329218107, + "grad_norm": 10.192421913146973, + "learning_rate": 3.499613608072924e-05, + "loss": 0.6489, + "step": 986 + }, + { + "epoch": 1.0154320987654322, + "grad_norm": 6.926242351531982, + "learning_rate": 3.4995740053107946e-05, + "loss": 0.2832, + "step": 987 + }, + { + "epoch": 1.0164609053497942, + "grad_norm": 12.570526123046875, + "learning_rate": 3.499532471188116e-05, + "loss": 1.116, + "step": 988 + }, + { + "epoch": 1.0174897119341564, + "grad_norm": 16.421586990356445, + "learning_rate": 3.499489005773671e-05, + "loss": 1.5305, + "step": 989 + }, + { + "epoch": 1.0185185185185186, + "grad_norm": 8.618919372558594, + "learning_rate": 3.4994436091394425e-05, + "loss": 0.434, + "step": 990 + }, + { + "epoch": 1.0195473251028806, + "grad_norm": 7.02255392074585, + "learning_rate": 3.499396281360608e-05, + "loss": 0.2973, + "step": 991 + }, + { + "epoch": 1.0205761316872428, + "grad_norm": 10.480740547180176, + "learning_rate": 3.499347022515545e-05, + "loss": 0.5811, + "step": 992 + }, + { + "epoch": 1.021604938271605, + "grad_norm": 12.898595809936523, + "learning_rate": 3.4992958326858305e-05, + "loss": 0.9475, + "step": 993 + }, + { + "epoch": 1.022633744855967, + "grad_norm": 0.7765280604362488, + "learning_rate": 3.499242711956236e-05, + "loss": 0.0351, + "step": 994 + }, + { + "epoch": 1.0236625514403292, + "grad_norm": 11.703328132629395, + "learning_rate": 3.4991876604147334e-05, + "loss": 1.0881, + "step": 995 + }, + { + "epoch": 1.0246913580246915, + "grad_norm": 7.762533664703369, + "learning_rate": 3.499130678152492e-05, + "loss": 0.517, + "step": 996 + }, + { + "epoch": 1.0257201646090535, + "grad_norm": 7.188144207000732, + "learning_rate": 3.4990717652638754e-05, + "loss": 0.521, + "step": 997 + }, + { + "epoch": 1.0267489711934157, + "grad_norm": 7.4093122482299805, + "learning_rate": 3.499010921846448e-05, + "loss": 0.3103, + "step": 998 + }, + { + "epoch": 1.0277777777777777, + "grad_norm": 7.3908562660217285, + "learning_rate": 3.498948148000971e-05, + "loss": 0.4539, + "step": 999 + }, + { + "epoch": 1.02880658436214, + "grad_norm": 13.766965866088867, + "learning_rate": 3.4988834438313996e-05, + "loss": 1.0192, + "step": 1000 + }, + { + "epoch": 1.02880658436214, + "eval_Qnli-dev_cosine_accuracy": 0.681640625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8225704431533813, + "eval_Qnli-dev_cosine_ap": 0.7336489695537911, + "eval_Qnli-dev_cosine_f1": 0.6912280701754385, + "eval_Qnli-dev_cosine_f1_threshold": 0.7402236461639404, + "eval_Qnli-dev_cosine_precision": 0.5898203592814372, + "eval_Qnli-dev_cosine_recall": 0.8347457627118644, + "eval_Qnli-dev_dot_accuracy": 0.646484375, + "eval_Qnli-dev_dot_accuracy_threshold": 369.0775146484375, + "eval_Qnli-dev_dot_ap": 0.660954834977831, + "eval_Qnli-dev_dot_f1": 0.6806451612903225, + "eval_Qnli-dev_dot_f1_threshold": 334.06298828125, + "eval_Qnli-dev_dot_precision": 0.5494791666666666, + "eval_Qnli-dev_dot_recall": 0.8940677966101694, + "eval_Qnli-dev_euclidean_accuracy": 0.697265625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.520076751708984, + "eval_Qnli-dev_euclidean_ap": 0.74077949307834, + "eval_Qnli-dev_euclidean_f1": 0.6872852233676976, + "eval_Qnli-dev_euclidean_f1_threshold": 16.351831436157227, + "eval_Qnli-dev_euclidean_precision": 0.5780346820809249, + "eval_Qnli-dev_euclidean_recall": 0.847457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 281.56951904296875, + "eval_Qnli-dev_manhattan_ap": 0.7456832832042077, + "eval_Qnli-dev_manhattan_f1": 0.6857142857142857, + "eval_Qnli-dev_manhattan_f1_threshold": 342.68011474609375, + "eval_Qnli-dev_manhattan_precision": 0.5682451253481894, + "eval_Qnli-dev_manhattan_recall": 0.864406779661017, + "eval_Qnli-dev_max_accuracy": 0.703125, + "eval_Qnli-dev_max_accuracy_threshold": 369.0775146484375, + "eval_Qnli-dev_max_ap": 0.7456832832042077, + "eval_Qnli-dev_max_f1": 0.6912280701754385, + "eval_Qnli-dev_max_f1_threshold": 342.68011474609375, + "eval_Qnli-dev_max_precision": 0.5898203592814372, + "eval_Qnli-dev_max_recall": 0.8940677966101694, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8777043223381042, + "eval_allNLI-dev_cosine_ap": 0.6009867807750411, + "eval_allNLI-dev_cosine_f1": 0.6055045871559632, + "eval_allNLI-dev_cosine_f1_threshold": 0.7909231185913086, + "eval_allNLI-dev_cosine_precision": 0.5019011406844106, + "eval_allNLI-dev_cosine_recall": 0.7630057803468208, + "eval_allNLI-dev_dot_accuracy": 0.689453125, + "eval_allNLI-dev_dot_accuracy_threshold": 425.19970703125, + "eval_allNLI-dev_dot_ap": 0.5083264191429433, + "eval_allNLI-dev_dot_f1": 0.580046403712297, + "eval_allNLI-dev_dot_f1_threshold": 373.0705261230469, + "eval_allNLI-dev_dot_precision": 0.4844961240310077, + "eval_allNLI-dev_dot_recall": 0.7225433526011561, + "eval_allNLI-dev_euclidean_accuracy": 0.73828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.884292602539062, + "eval_allNLI-dev_euclidean_ap": 0.6025702809405632, + "eval_allNLI-dev_euclidean_f1": 0.6088888888888888, + "eval_allNLI-dev_euclidean_f1_threshold": 14.338363647460938, + "eval_allNLI-dev_euclidean_precision": 0.49458483754512633, + "eval_allNLI-dev_euclidean_recall": 0.791907514450867, + "eval_allNLI-dev_manhattan_accuracy": 0.73046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 229.49404907226562, + "eval_allNLI-dev_manhattan_ap": 0.6012372908721144, + "eval_allNLI-dev_manhattan_f1": 0.6054279749478079, + "eval_allNLI-dev_manhattan_f1_threshold": 306.45465087890625, + "eval_allNLI-dev_manhattan_precision": 0.4738562091503268, + "eval_allNLI-dev_manhattan_recall": 0.838150289017341, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 425.19970703125, + "eval_allNLI-dev_max_ap": 0.6025702809405632, + "eval_allNLI-dev_max_f1": 0.6088888888888888, + "eval_allNLI-dev_max_f1_threshold": 373.0705261230469, + "eval_allNLI-dev_max_precision": 0.5019011406844106, + "eval_allNLI-dev_max_recall": 0.838150289017341, + "eval_sequential_score": 0.7456832832042077, + "eval_sts-test_pearson_cosine": 0.8134514242079779, + "eval_sts-test_pearson_dot": 0.7960002284292096, + "eval_sts-test_pearson_euclidean": 0.8455332055791209, + "eval_sts-test_pearson_manhattan": 0.8451588222378992, + "eval_sts-test_pearson_max": 0.8455332055791209, + "eval_sts-test_spearman_cosine": 0.8487256175437097, + "eval_sts-test_spearman_dot": 0.7791035741264715, + "eval_sts-test_spearman_euclidean": 0.8429671594816486, + "eval_sts-test_spearman_manhattan": 0.8425387556563487, + "eval_sts-test_spearman_max": 0.8487256175437097, + "eval_vitaminc-pairs_loss": 3.199394941329956, + "eval_vitaminc-pairs_runtime": 3.1982, + "eval_vitaminc-pairs_samples_per_second": 40.023, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 1000 + }, + { + "epoch": 1.02880658436214, + "eval_negation-triplets_loss": 1.0285611152648926, + "eval_negation-triplets_runtime": 0.7516, + "eval_negation-triplets_samples_per_second": 170.306, + "eval_negation-triplets_steps_per_second": 1.331, + "step": 1000 + }, + { + "epoch": 1.02880658436214, + "eval_scitail-pairs-pos_loss": 0.2147119641304016, + "eval_scitail-pairs-pos_runtime": 0.8507, + "eval_scitail-pairs-pos_samples_per_second": 150.461, + "eval_scitail-pairs-pos_steps_per_second": 1.175, + "step": 1000 + }, + { + "epoch": 1.02880658436214, + "eval_scitail-pairs-qa_loss": 0.002570149954408407, + "eval_scitail-pairs-qa_runtime": 0.5847, + "eval_scitail-pairs-qa_samples_per_second": 218.899, + "eval_scitail-pairs-qa_steps_per_second": 1.71, + "step": 1000 + }, + { + "epoch": 1.02880658436214, + "eval_xsum-pairs_loss": 0.5069864988327026, + "eval_xsum-pairs_runtime": 3.0253, + "eval_xsum-pairs_samples_per_second": 42.31, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 1000 + }, + { + "epoch": 1.02880658436214, + "eval_sciq_pairs_loss": 0.11535051465034485, + "eval_sciq_pairs_runtime": 3.4646, + "eval_sciq_pairs_samples_per_second": 36.945, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 1000 + }, + { + "epoch": 1.02880658436214, + "eval_qasc_pairs_loss": 0.40786802768707275, + "eval_qasc_pairs_runtime": 0.6169, + "eval_qasc_pairs_samples_per_second": 207.5, + "eval_qasc_pairs_steps_per_second": 1.621, + "step": 1000 + }, + { + "epoch": 1.02880658436214, + "eval_openbookqa_pairs_loss": 1.0345005989074707, + "eval_openbookqa_pairs_runtime": 0.5841, + "eval_openbookqa_pairs_samples_per_second": 219.127, + "eval_openbookqa_pairs_steps_per_second": 1.712, + "step": 1000 + }, + { + "epoch": 1.02880658436214, + "eval_msmarco_pairs_loss": 1.0275574922561646, + "eval_msmarco_pairs_runtime": 1.52, + "eval_msmarco_pairs_samples_per_second": 84.212, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 1000 + }, + { + "epoch": 1.02880658436214, + "eval_nq_pairs_loss": 1.2776100635528564, + "eval_nq_pairs_runtime": 2.8968, + "eval_nq_pairs_samples_per_second": 44.187, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1000 + }, + { + "epoch": 1.02880658436214, + "eval_trivia_pairs_loss": 1.0063875913619995, + "eval_trivia_pairs_runtime": 3.4646, + "eval_trivia_pairs_samples_per_second": 36.945, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 1000 + }, + { + "epoch": 1.02880658436214, + "eval_gooaq_pairs_loss": 0.5649043321609497, + "eval_gooaq_pairs_runtime": 0.9541, + "eval_gooaq_pairs_samples_per_second": 134.155, + "eval_gooaq_pairs_steps_per_second": 1.048, + "step": 1000 + }, + { + "epoch": 1.02880658436214, + "eval_paws-pos_loss": 0.025881968438625336, + "eval_paws-pos_runtime": 0.6906, + "eval_paws-pos_samples_per_second": 185.358, + "eval_paws-pos_steps_per_second": 1.448, + "step": 1000 + }, + { + "epoch": 1.02880658436214, + "eval_global_dataset_loss": 0.5549957752227783, + "eval_global_dataset_runtime": 13.4023, + "eval_global_dataset_samples_per_second": 31.039, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1000 + }, + { + "epoch": 1.0298353909465021, + "grad_norm": 9.308526039123535, + "learning_rate": 3.498816809444888e-05, + "loss": 0.523, + "step": 1001 + }, + { + "epoch": 1.0308641975308641, + "grad_norm": 13.073616981506348, + "learning_rate": 3.498748244951788e-05, + "loss": 0.9026, + "step": 1002 + }, + { + "epoch": 1.0318930041152263, + "grad_norm": 11.270973205566406, + "learning_rate": 3.498677750465644e-05, + "loss": 0.7104, + "step": 1003 + }, + { + "epoch": 1.0329218106995885, + "grad_norm": 12.925529479980469, + "learning_rate": 3.4986053261032e-05, + "loss": 1.1418, + "step": 1004 + }, + { + "epoch": 1.0339506172839505, + "grad_norm": 14.090717315673828, + "learning_rate": 3.498530971984395e-05, + "loss": 1.2133, + "step": 1005 + }, + { + "epoch": 1.0349794238683128, + "grad_norm": 12.810900688171387, + "learning_rate": 3.498454688232363e-05, + "loss": 1.1203, + "step": 1006 + }, + { + "epoch": 1.036008230452675, + "grad_norm": 15.513686180114746, + "learning_rate": 3.498376474973436e-05, + "loss": 2.5254, + "step": 1007 + }, + { + "epoch": 1.037037037037037, + "grad_norm": 6.373301982879639, + "learning_rate": 3.498296332337137e-05, + "loss": 0.3947, + "step": 1008 + }, + { + "epoch": 1.0380658436213992, + "grad_norm": 12.136091232299805, + "learning_rate": 3.498214260456188e-05, + "loss": 1.0774, + "step": 1009 + }, + { + "epoch": 1.0390946502057614, + "grad_norm": 12.024189949035645, + "learning_rate": 3.4981302594665046e-05, + "loss": 1.0608, + "step": 1010 + }, + { + "epoch": 1.0401234567901234, + "grad_norm": 10.587035179138184, + "learning_rate": 3.4980443295071976e-05, + "loss": 0.9121, + "step": 1011 + }, + { + "epoch": 1.0411522633744856, + "grad_norm": 5.42789888381958, + "learning_rate": 3.497956470720571e-05, + "loss": 0.2954, + "step": 1012 + }, + { + "epoch": 1.0421810699588478, + "grad_norm": 11.198415756225586, + "learning_rate": 3.497866683252125e-05, + "loss": 1.2817, + "step": 1013 + }, + { + "epoch": 1.0432098765432098, + "grad_norm": 9.719239234924316, + "learning_rate": 3.497774967250552e-05, + "loss": 0.8201, + "step": 1014 + }, + { + "epoch": 1.044238683127572, + "grad_norm": 7.551983833312988, + "learning_rate": 3.497681322867739e-05, + "loss": 0.4308, + "step": 1015 + }, + { + "epoch": 1.045267489711934, + "grad_norm": 12.508809089660645, + "learning_rate": 3.497585750258767e-05, + "loss": 1.2293, + "step": 1016 + }, + { + "epoch": 1.0462962962962963, + "grad_norm": 6.985977649688721, + "learning_rate": 3.4974882495819084e-05, + "loss": 0.4804, + "step": 1017 + }, + { + "epoch": 1.0473251028806585, + "grad_norm": 12.973464012145996, + "learning_rate": 3.4973888209986306e-05, + "loss": 1.3674, + "step": 1018 + }, + { + "epoch": 1.0483539094650205, + "grad_norm": 11.896876335144043, + "learning_rate": 3.497287464673593e-05, + "loss": 0.9312, + "step": 1019 + }, + { + "epoch": 1.0493827160493827, + "grad_norm": 8.766112327575684, + "learning_rate": 3.497184180774647e-05, + "loss": 0.7554, + "step": 1020 + }, + { + "epoch": 1.0493827160493827, + "eval_Qnli-dev_cosine_accuracy": 0.697265625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7943562269210815, + "eval_Qnli-dev_cosine_ap": 0.7559444632593828, + "eval_Qnli-dev_cosine_f1": 0.7058823529411764, + "eval_Qnli-dev_cosine_f1_threshold": 0.7659621238708496, + "eval_Qnli-dev_cosine_precision": 0.6233766233766234, + "eval_Qnli-dev_cosine_recall": 0.8135593220338984, + "eval_Qnli-dev_dot_accuracy": 0.662109375, + "eval_Qnli-dev_dot_accuracy_threshold": 394.54266357421875, + "eval_Qnli-dev_dot_ap": 0.6915752876288561, + "eval_Qnli-dev_dot_f1": 0.6793760831889082, + "eval_Qnli-dev_dot_f1_threshold": 365.2876281738281, + "eval_Qnli-dev_dot_precision": 0.5747800586510264, + "eval_Qnli-dev_dot_recall": 0.8305084745762712, + "eval_Qnli-dev_euclidean_accuracy": 0.708984375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.502662658691406, + "eval_Qnli-dev_euclidean_ap": 0.7624762126096433, + "eval_Qnli-dev_euclidean_f1": 0.7111913357400722, + "eval_Qnli-dev_euclidean_f1_threshold": 15.483503341674805, + "eval_Qnli-dev_euclidean_precision": 0.6194968553459119, + "eval_Qnli-dev_euclidean_recall": 0.8347457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.70703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 280.207763671875, + "eval_Qnli-dev_manhattan_ap": 0.7642042868629957, + "eval_Qnli-dev_manhattan_f1": 0.6996466431095405, + "eval_Qnli-dev_manhattan_f1_threshold": 327.005126953125, + "eval_Qnli-dev_manhattan_precision": 0.6, + "eval_Qnli-dev_manhattan_recall": 0.8389830508474576, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 394.54266357421875, + "eval_Qnli-dev_max_ap": 0.7642042868629957, + "eval_Qnli-dev_max_f1": 0.7111913357400722, + "eval_Qnli-dev_max_f1_threshold": 365.2876281738281, + "eval_Qnli-dev_max_precision": 0.6233766233766234, + "eval_Qnli-dev_max_recall": 0.8389830508474576, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8626978397369385, + "eval_allNLI-dev_cosine_ap": 0.6056903051173465, + "eval_allNLI-dev_cosine_f1": 0.6147186147186148, + "eval_allNLI-dev_cosine_f1_threshold": 0.770087718963623, + "eval_allNLI-dev_cosine_precision": 0.4913494809688581, + "eval_allNLI-dev_cosine_recall": 0.8208092485549133, + "eval_allNLI-dev_dot_accuracy": 0.693359375, + "eval_allNLI-dev_dot_accuracy_threshold": 426.302490234375, + "eval_allNLI-dev_dot_ap": 0.5246631021855179, + "eval_allNLI-dev_dot_f1": 0.5949367088607596, + "eval_allNLI-dev_dot_f1_threshold": 351.2099304199219, + "eval_allNLI-dev_dot_precision": 0.4684385382059801, + "eval_allNLI-dev_dot_recall": 0.815028901734104, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.542158126831055, + "eval_allNLI-dev_euclidean_ap": 0.6113964262053992, + "eval_allNLI-dev_euclidean_f1": 0.6205357142857143, + "eval_allNLI-dev_euclidean_f1_threshold": 14.319332122802734, + "eval_allNLI-dev_euclidean_precision": 0.5054545454545455, + "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, + "eval_allNLI-dev_manhattan_accuracy": 0.734375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 234.59854125976562, + "eval_allNLI-dev_manhattan_ap": 0.6096757095213863, + "eval_allNLI-dev_manhattan_f1": 0.6198347107438017, + "eval_allNLI-dev_manhattan_f1_threshold": 315.2457580566406, + "eval_allNLI-dev_manhattan_precision": 0.48231511254019294, + "eval_allNLI-dev_manhattan_recall": 0.8670520231213873, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 426.302490234375, + "eval_allNLI-dev_max_ap": 0.6113964262053992, + "eval_allNLI-dev_max_f1": 0.6205357142857143, + "eval_allNLI-dev_max_f1_threshold": 351.2099304199219, + "eval_allNLI-dev_max_precision": 0.5054545454545455, + "eval_allNLI-dev_max_recall": 0.8670520231213873, + "eval_sequential_score": 0.7642042868629957, + "eval_sts-test_pearson_cosine": 0.8180272631352716, + "eval_sts-test_pearson_dot": 0.800107028188555, + "eval_sts-test_pearson_euclidean": 0.8465297896031135, + "eval_sts-test_pearson_manhattan": 0.8446697206380651, + "eval_sts-test_pearson_max": 0.8465297896031135, + "eval_sts-test_spearman_cosine": 0.8460061728914771, + "eval_sts-test_spearman_dot": 0.7791330652077285, + "eval_sts-test_spearman_euclidean": 0.8418238095043598, + "eval_sts-test_spearman_manhattan": 0.8409772498926187, + "eval_sts-test_spearman_max": 0.8460061728914771, + "eval_vitaminc-pairs_loss": 2.937673807144165, + "eval_vitaminc-pairs_runtime": 3.1958, + "eval_vitaminc-pairs_samples_per_second": 40.052, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 1020 + }, + { + "epoch": 1.0493827160493827, + "eval_negation-triplets_loss": 1.0229724645614624, + "eval_negation-triplets_runtime": 0.7702, + "eval_negation-triplets_samples_per_second": 166.188, + "eval_negation-triplets_steps_per_second": 1.298, + "step": 1020 + }, + { + "epoch": 1.0493827160493827, + "eval_scitail-pairs-pos_loss": 0.19694292545318604, + "eval_scitail-pairs-pos_runtime": 0.8468, + "eval_scitail-pairs-pos_samples_per_second": 151.149, + "eval_scitail-pairs-pos_steps_per_second": 1.181, + "step": 1020 + }, + { + "epoch": 1.0493827160493827, + "eval_scitail-pairs-qa_loss": 0.001685372437350452, + "eval_scitail-pairs-qa_runtime": 0.5889, + "eval_scitail-pairs-qa_samples_per_second": 217.351, + "eval_scitail-pairs-qa_steps_per_second": 1.698, + "step": 1020 + }, + { + "epoch": 1.0493827160493827, + "eval_xsum-pairs_loss": 0.44889509677886963, + "eval_xsum-pairs_runtime": 3.0362, + "eval_xsum-pairs_samples_per_second": 42.158, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 1020 + }, + { + "epoch": 1.0493827160493827, + "eval_sciq_pairs_loss": 0.11442571133375168, + "eval_sciq_pairs_runtime": 3.4567, + "eval_sciq_pairs_samples_per_second": 37.03, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 1020 + }, + { + "epoch": 1.0493827160493827, + "eval_qasc_pairs_loss": 0.3323410153388977, + "eval_qasc_pairs_runtime": 0.612, + "eval_qasc_pairs_samples_per_second": 209.163, + "eval_qasc_pairs_steps_per_second": 1.634, + "step": 1020 + }, + { + "epoch": 1.0493827160493827, + "eval_openbookqa_pairs_loss": 0.9421266317367554, + "eval_openbookqa_pairs_runtime": 0.581, + "eval_openbookqa_pairs_samples_per_second": 220.316, + "eval_openbookqa_pairs_steps_per_second": 1.721, + "step": 1020 + }, + { + "epoch": 1.0493827160493827, + "eval_msmarco_pairs_loss": 1.033428430557251, + "eval_msmarco_pairs_runtime": 1.5117, + "eval_msmarco_pairs_samples_per_second": 84.675, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 1020 + }, + { + "epoch": 1.0493827160493827, + "eval_nq_pairs_loss": 1.0520647764205933, + "eval_nq_pairs_runtime": 2.8939, + "eval_nq_pairs_samples_per_second": 44.231, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 1020 + }, + { + "epoch": 1.0493827160493827, + "eval_trivia_pairs_loss": 1.0933111906051636, + "eval_trivia_pairs_runtime": 3.4373, + "eval_trivia_pairs_samples_per_second": 37.238, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1020 + }, + { + "epoch": 1.0493827160493827, + "eval_gooaq_pairs_loss": 0.4880179166793823, + "eval_gooaq_pairs_runtime": 0.9487, + "eval_gooaq_pairs_samples_per_second": 134.922, + "eval_gooaq_pairs_steps_per_second": 1.054, + "step": 1020 + }, + { + "epoch": 1.0493827160493827, + "eval_paws-pos_loss": 0.026863040402531624, + "eval_paws-pos_runtime": 0.6905, + "eval_paws-pos_samples_per_second": 185.373, + "eval_paws-pos_steps_per_second": 1.448, + "step": 1020 + }, + { + "epoch": 1.0493827160493827, + "eval_global_dataset_loss": 0.5200657844543457, + "eval_global_dataset_runtime": 13.3653, + "eval_global_dataset_samples_per_second": 31.125, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1020 + }, + { + "epoch": 1.050411522633745, + "grad_norm": 0.5952563881874084, + "learning_rate": 3.497078969472837e-05, + "loss": 0.027, + "step": 1021 + }, + { + "epoch": 1.051440329218107, + "grad_norm": 1.7833058834075928, + "learning_rate": 3.496971830942398e-05, + "loss": 0.0478, + "step": 1022 + }, + { + "epoch": 1.0524691358024691, + "grad_norm": 10.134953498840332, + "learning_rate": 3.4968627653607597e-05, + "loss": 0.4828, + "step": 1023 + }, + { + "epoch": 1.0534979423868314, + "grad_norm": 7.823026657104492, + "learning_rate": 3.496751772908539e-05, + "loss": 0.4244, + "step": 1024 + }, + { + "epoch": 1.0545267489711934, + "grad_norm": 9.383576393127441, + "learning_rate": 3.4966388537695456e-05, + "loss": 0.4789, + "step": 1025 + }, + { + "epoch": 1.0555555555555556, + "grad_norm": 8.64692211151123, + "learning_rate": 3.496524008130781e-05, + "loss": 0.4748, + "step": 1026 + }, + { + "epoch": 1.0565843621399178, + "grad_norm": 10.123908996582031, + "learning_rate": 3.496407236182434e-05, + "loss": 0.9816, + "step": 1027 + }, + { + "epoch": 1.0576131687242798, + "grad_norm": 8.501023292541504, + "learning_rate": 3.4962885381178896e-05, + "loss": 0.417, + "step": 1028 + }, + { + "epoch": 1.058641975308642, + "grad_norm": 2.9080381393432617, + "learning_rate": 3.496167914133714e-05, + "loss": 0.0292, + "step": 1029 + }, + { + "epoch": 1.059670781893004, + "grad_norm": 15.60783576965332, + "learning_rate": 3.49604536442967e-05, + "loss": 1.287, + "step": 1030 + }, + { + "epoch": 1.0606995884773662, + "grad_norm": 1.720328450202942, + "learning_rate": 3.495920889208707e-05, + "loss": 0.0098, + "step": 1031 + }, + { + "epoch": 1.0617283950617284, + "grad_norm": 15.75252628326416, + "learning_rate": 3.495794488676961e-05, + "loss": 1.3394, + "step": 1032 + }, + { + "epoch": 1.0627572016460904, + "grad_norm": 2.560807704925537, + "learning_rate": 3.49566616304376e-05, + "loss": 0.0586, + "step": 1033 + }, + { + "epoch": 1.0637860082304527, + "grad_norm": 1.728555679321289, + "learning_rate": 3.495535912521618e-05, + "loss": 0.0479, + "step": 1034 + }, + { + "epoch": 1.0648148148148149, + "grad_norm": 13.202831268310547, + "learning_rate": 3.4954037373262374e-05, + "loss": 1.0693, + "step": 1035 + }, + { + "epoch": 1.0658436213991769, + "grad_norm": 13.138431549072266, + "learning_rate": 3.495269637676508e-05, + "loss": 1.1871, + "step": 1036 + }, + { + "epoch": 1.066872427983539, + "grad_norm": 10.88890552520752, + "learning_rate": 3.4951336137945066e-05, + "loss": 0.6046, + "step": 1037 + }, + { + "epoch": 1.0679012345679013, + "grad_norm": 11.555718421936035, + "learning_rate": 3.494995665905495e-05, + "loss": 0.9365, + "step": 1038 + }, + { + "epoch": 1.0689300411522633, + "grad_norm": 8.685961723327637, + "learning_rate": 3.494855794237925e-05, + "loss": 0.4727, + "step": 1039 + }, + { + "epoch": 1.0699588477366255, + "grad_norm": 7.188544750213623, + "learning_rate": 3.494713999023429e-05, + "loss": 0.5346, + "step": 1040 + }, + { + "epoch": 1.0699588477366255, + "eval_Qnli-dev_cosine_accuracy": 0.712890625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8146058320999146, + "eval_Qnli-dev_cosine_ap": 0.76276523537112, + "eval_Qnli-dev_cosine_f1": 0.6946107784431137, + "eval_Qnli-dev_cosine_f1_threshold": 0.773271381855011, + "eval_Qnli-dev_cosine_precision": 0.6566037735849056, + "eval_Qnli-dev_cosine_recall": 0.7372881355932204, + "eval_Qnli-dev_dot_accuracy": 0.671875, + "eval_Qnli-dev_dot_accuracy_threshold": 380.0313720703125, + "eval_Qnli-dev_dot_ap": 0.6987894282612087, + "eval_Qnli-dev_dot_f1": 0.6859205776173285, + "eval_Qnli-dev_dot_f1_threshold": 350.40252685546875, + "eval_Qnli-dev_dot_precision": 0.5974842767295597, + "eval_Qnli-dev_dot_recall": 0.8050847457627118, + "eval_Qnli-dev_euclidean_accuracy": 0.72265625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.222099304199219, + "eval_Qnli-dev_euclidean_ap": 0.7667034334790043, + "eval_Qnli-dev_euclidean_f1": 0.6943396226415095, + "eval_Qnli-dev_euclidean_f1_threshold": 15.177824974060059, + "eval_Qnli-dev_euclidean_precision": 0.6258503401360545, + "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, + "eval_Qnli-dev_manhattan_accuracy": 0.71875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 273.0506591796875, + "eval_Qnli-dev_manhattan_ap": 0.7719711469934153, + "eval_Qnli-dev_manhattan_f1": 0.6953125, + "eval_Qnli-dev_manhattan_f1_threshold": 310.35589599609375, + "eval_Qnli-dev_manhattan_precision": 0.644927536231884, + "eval_Qnli-dev_manhattan_recall": 0.7542372881355932, + "eval_Qnli-dev_max_accuracy": 0.72265625, + "eval_Qnli-dev_max_accuracy_threshold": 380.0313720703125, + "eval_Qnli-dev_max_ap": 0.7719711469934153, + "eval_Qnli-dev_max_f1": 0.6953125, + "eval_Qnli-dev_max_f1_threshold": 350.40252685546875, + "eval_Qnli-dev_max_precision": 0.6566037735849056, + "eval_Qnli-dev_max_recall": 0.8050847457627118, + "eval_allNLI-dev_cosine_accuracy": 0.7265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.888862133026123, + "eval_allNLI-dev_cosine_ap": 0.6052222488796517, + "eval_allNLI-dev_cosine_f1": 0.6120481927710845, + "eval_allNLI-dev_cosine_f1_threshold": 0.8043129444122314, + "eval_allNLI-dev_cosine_precision": 0.5247933884297521, + "eval_allNLI-dev_cosine_recall": 0.7341040462427746, + "eval_allNLI-dev_dot_accuracy": 0.6953125, + "eval_allNLI-dev_dot_accuracy_threshold": 416.451171875, + "eval_allNLI-dev_dot_ap": 0.5275626318156367, + "eval_allNLI-dev_dot_f1": 0.5814432989690721, + "eval_allNLI-dev_dot_f1_threshold": 348.996337890625, + "eval_allNLI-dev_dot_precision": 0.4519230769230769, + "eval_allNLI-dev_dot_recall": 0.815028901734104, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.72227668762207, + "eval_allNLI-dev_euclidean_ap": 0.6122355162858166, + "eval_allNLI-dev_euclidean_f1": 0.6182669789227166, + "eval_allNLI-dev_euclidean_f1_threshold": 13.71673583984375, + "eval_allNLI-dev_euclidean_precision": 0.5196850393700787, + "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 232.9837646484375, + "eval_allNLI-dev_manhattan_ap": 0.61152275886359, + "eval_allNLI-dev_manhattan_f1": 0.620253164556962, + "eval_allNLI-dev_manhattan_f1_threshold": 302.649169921875, + "eval_allNLI-dev_manhattan_precision": 0.4883720930232558, + "eval_allNLI-dev_manhattan_recall": 0.8497109826589595, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 416.451171875, + "eval_allNLI-dev_max_ap": 0.6122355162858166, + "eval_allNLI-dev_max_f1": 0.620253164556962, + "eval_allNLI-dev_max_f1_threshold": 348.996337890625, + "eval_allNLI-dev_max_precision": 0.5247933884297521, + "eval_allNLI-dev_max_recall": 0.8497109826589595, + "eval_sequential_score": 0.7719711469934153, + "eval_sts-test_pearson_cosine": 0.8236897870440283, + "eval_sts-test_pearson_dot": 0.7902900873190821, + "eval_sts-test_pearson_euclidean": 0.8533041040673283, + "eval_sts-test_pearson_manhattan": 0.851439890014496, + "eval_sts-test_pearson_max": 0.8533041040673283, + "eval_sts-test_spearman_cosine": 0.8503772523478821, + "eval_sts-test_spearman_dot": 0.7656649493356799, + "eval_sts-test_spearman_euclidean": 0.848739218522416, + "eval_sts-test_spearman_manhattan": 0.8478907827624531, + "eval_sts-test_spearman_max": 0.8503772523478821, + "eval_vitaminc-pairs_loss": 3.045991897583008, + "eval_vitaminc-pairs_runtime": 3.193, + "eval_vitaminc-pairs_samples_per_second": 40.088, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 1040 + }, + { + "epoch": 1.0699588477366255, + "eval_negation-triplets_loss": 1.0010513067245483, + "eval_negation-triplets_runtime": 0.7632, + "eval_negation-triplets_samples_per_second": 167.706, + "eval_negation-triplets_steps_per_second": 1.31, + "step": 1040 + }, + { + "epoch": 1.0699588477366255, + "eval_scitail-pairs-pos_loss": 0.18198701739311218, + "eval_scitail-pairs-pos_runtime": 0.8414, + "eval_scitail-pairs-pos_samples_per_second": 152.126, + "eval_scitail-pairs-pos_steps_per_second": 1.188, + "step": 1040 + }, + { + "epoch": 1.0699588477366255, + "eval_scitail-pairs-qa_loss": 0.0008363102679140866, + "eval_scitail-pairs-qa_runtime": 0.5814, + "eval_scitail-pairs-qa_samples_per_second": 220.176, + "eval_scitail-pairs-qa_steps_per_second": 1.72, + "step": 1040 + }, + { + "epoch": 1.0699588477366255, + "eval_xsum-pairs_loss": 0.46501919627189636, + "eval_xsum-pairs_runtime": 3.0324, + "eval_xsum-pairs_samples_per_second": 42.21, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1040 + }, + { + "epoch": 1.0699588477366255, + "eval_sciq_pairs_loss": 0.11307297646999359, + "eval_sciq_pairs_runtime": 3.466, + "eval_sciq_pairs_samples_per_second": 36.93, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 1040 + }, + { + "epoch": 1.0699588477366255, + "eval_qasc_pairs_loss": 0.29918473958969116, + "eval_qasc_pairs_runtime": 0.6151, + "eval_qasc_pairs_samples_per_second": 208.085, + "eval_qasc_pairs_steps_per_second": 1.626, + "step": 1040 + }, + { + "epoch": 1.0699588477366255, + "eval_openbookqa_pairs_loss": 0.9877970814704895, + "eval_openbookqa_pairs_runtime": 0.5936, + "eval_openbookqa_pairs_samples_per_second": 215.639, + "eval_openbookqa_pairs_steps_per_second": 1.685, + "step": 1040 + }, + { + "epoch": 1.0699588477366255, + "eval_msmarco_pairs_loss": 0.9953982830047607, + "eval_msmarco_pairs_runtime": 1.5203, + "eval_msmarco_pairs_samples_per_second": 84.194, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 1040 + }, + { + "epoch": 1.0699588477366255, + "eval_nq_pairs_loss": 1.0526180267333984, + "eval_nq_pairs_runtime": 2.8941, + "eval_nq_pairs_samples_per_second": 44.227, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 1040 + }, + { + "epoch": 1.0699588477366255, + "eval_trivia_pairs_loss": 1.056014895439148, + "eval_trivia_pairs_runtime": 3.458, + "eval_trivia_pairs_samples_per_second": 37.016, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 1040 + }, + { + "epoch": 1.0699588477366255, + "eval_gooaq_pairs_loss": 0.5340279936790466, + "eval_gooaq_pairs_runtime": 0.9526, + "eval_gooaq_pairs_samples_per_second": 134.374, + "eval_gooaq_pairs_steps_per_second": 1.05, + "step": 1040 + }, + { + "epoch": 1.0699588477366255, + "eval_paws-pos_loss": 0.027126522734761238, + "eval_paws-pos_runtime": 0.6936, + "eval_paws-pos_samples_per_second": 184.538, + "eval_paws-pos_steps_per_second": 1.442, + "step": 1040 + }, + { + "epoch": 1.0699588477366255, + "eval_global_dataset_loss": 0.5354006290435791, + "eval_global_dataset_runtime": 13.4232, + "eval_global_dataset_samples_per_second": 30.991, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1040 + }, + { + "epoch": 1.0709876543209877, + "grad_norm": 13.943881034851074, + "learning_rate": 3.494570280496831e-05, + "loss": 1.2332, + "step": 1041 + }, + { + "epoch": 1.0720164609053497, + "grad_norm": 13.042203903198242, + "learning_rate": 3.4944246388961354e-05, + "loss": 1.0986, + "step": 1042 + }, + { + "epoch": 1.073045267489712, + "grad_norm": 8.240731239318848, + "learning_rate": 3.494277074462533e-05, + "loss": 0.7022, + "step": 1043 + }, + { + "epoch": 1.074074074074074, + "grad_norm": 7.854862689971924, + "learning_rate": 3.494127587440399e-05, + "loss": 0.3632, + "step": 1044 + }, + { + "epoch": 1.0751028806584362, + "grad_norm": 12.290331840515137, + "learning_rate": 3.493976178077293e-05, + "loss": 1.0894, + "step": 1045 + }, + { + "epoch": 1.0761316872427984, + "grad_norm": 6.150963306427002, + "learning_rate": 3.493822846623956e-05, + "loss": 0.238, + "step": 1046 + }, + { + "epoch": 1.0771604938271604, + "grad_norm": 1.730427861213684, + "learning_rate": 3.493667593334315e-05, + "loss": 0.072, + "step": 1047 + }, + { + "epoch": 1.0781893004115226, + "grad_norm": 14.845075607299805, + "learning_rate": 3.4935104184654776e-05, + "loss": 1.2267, + "step": 1048 + }, + { + "epoch": 1.0792181069958848, + "grad_norm": 13.764507293701172, + "learning_rate": 3.4933513222777345e-05, + "loss": 1.456, + "step": 1049 + }, + { + "epoch": 1.0802469135802468, + "grad_norm": 5.944631576538086, + "learning_rate": 3.493190305034558e-05, + "loss": 0.2923, + "step": 1050 + }, + { + "epoch": 1.081275720164609, + "grad_norm": 8.024051666259766, + "learning_rate": 3.4930273670026005e-05, + "loss": 0.4242, + "step": 1051 + }, + { + "epoch": 1.0823045267489713, + "grad_norm": 10.95007610321045, + "learning_rate": 3.4928625084516986e-05, + "loss": 1.0295, + "step": 1052 + }, + { + "epoch": 1.0833333333333333, + "grad_norm": 11.2196683883667, + "learning_rate": 3.492695729654866e-05, + "loss": 0.8706, + "step": 1053 + }, + { + "epoch": 1.0843621399176955, + "grad_norm": 6.774842739105225, + "learning_rate": 3.4925270308882986e-05, + "loss": 0.3193, + "step": 1054 + }, + { + "epoch": 1.0853909465020577, + "grad_norm": 7.2438225746154785, + "learning_rate": 3.492356412431371e-05, + "loss": 0.2568, + "step": 1055 + }, + { + "epoch": 1.0864197530864197, + "grad_norm": 8.959449768066406, + "learning_rate": 3.4921838745666365e-05, + "loss": 0.5477, + "step": 1056 + }, + { + "epoch": 1.087448559670782, + "grad_norm": 6.107593536376953, + "learning_rate": 3.492009417579829e-05, + "loss": 0.3996, + "step": 1057 + }, + { + "epoch": 1.0884773662551441, + "grad_norm": 9.177140235900879, + "learning_rate": 3.491833041759857e-05, + "loss": 0.4762, + "step": 1058 + }, + { + "epoch": 1.0895061728395061, + "grad_norm": 6.854634761810303, + "learning_rate": 3.49165474739881e-05, + "loss": 0.3927, + "step": 1059 + }, + { + "epoch": 1.0905349794238683, + "grad_norm": 13.550508499145508, + "learning_rate": 3.491474534791954e-05, + "loss": 1.3277, + "step": 1060 + }, + { + "epoch": 1.0905349794238683, + "eval_Qnli-dev_cosine_accuracy": 0.705078125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8141260147094727, + "eval_Qnli-dev_cosine_ap": 0.7552781802571633, + "eval_Qnli-dev_cosine_f1": 0.6927374301675978, + "eval_Qnli-dev_cosine_f1_threshold": 0.7760788202285767, + "eval_Qnli-dev_cosine_precision": 0.6179401993355482, + "eval_Qnli-dev_cosine_recall": 0.788135593220339, + "eval_Qnli-dev_dot_accuracy": 0.68359375, + "eval_Qnli-dev_dot_accuracy_threshold": 388.2694091796875, + "eval_Qnli-dev_dot_ap": 0.6992409939483639, + "eval_Qnli-dev_dot_f1": 0.6821705426356589, + "eval_Qnli-dev_dot_f1_threshold": 383.9870300292969, + "eval_Qnli-dev_dot_precision": 0.6285714285714286, + "eval_Qnli-dev_dot_recall": 0.7457627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.708984375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.442824363708496, + "eval_Qnli-dev_euclidean_ap": 0.7601416485652148, + "eval_Qnli-dev_euclidean_f1": 0.6981818181818182, + "eval_Qnli-dev_euclidean_f1_threshold": 15.075292587280273, + "eval_Qnli-dev_euclidean_precision": 0.6114649681528662, + "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, + "eval_Qnli-dev_manhattan_accuracy": 0.70703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 276.9916687011719, + "eval_Qnli-dev_manhattan_ap": 0.7611068790205133, + "eval_Qnli-dev_manhattan_f1": 0.6919104991394148, + "eval_Qnli-dev_manhattan_f1_threshold": 325.7022399902344, + "eval_Qnli-dev_manhattan_precision": 0.5826086956521739, + "eval_Qnli-dev_manhattan_recall": 0.8516949152542372, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 388.2694091796875, + "eval_Qnli-dev_max_ap": 0.7611068790205133, + "eval_Qnli-dev_max_f1": 0.6981818181818182, + "eval_Qnli-dev_max_f1_threshold": 383.9870300292969, + "eval_Qnli-dev_max_precision": 0.6285714285714286, + "eval_Qnli-dev_max_recall": 0.8516949152542372, + "eval_allNLI-dev_cosine_accuracy": 0.7265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.874409556388855, + "eval_allNLI-dev_cosine_ap": 0.6009933067542925, + "eval_allNLI-dev_cosine_f1": 0.6184210526315789, + "eval_allNLI-dev_cosine_f1_threshold": 0.7953381538391113, + "eval_allNLI-dev_cosine_precision": 0.49823321554770317, + "eval_allNLI-dev_cosine_recall": 0.815028901734104, + "eval_allNLI-dev_dot_accuracy": 0.69140625, + "eval_allNLI-dev_dot_accuracy_threshold": 427.4722900390625, + "eval_allNLI-dev_dot_ap": 0.5113560802806598, + "eval_allNLI-dev_dot_f1": 0.5847665847665847, + "eval_allNLI-dev_dot_f1_threshold": 381.553955078125, + "eval_allNLI-dev_dot_precision": 0.5085470085470085, + "eval_allNLI-dev_dot_recall": 0.6878612716763006, + "eval_allNLI-dev_euclidean_accuracy": 0.73046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.330347061157227, + "eval_allNLI-dev_euclidean_ap": 0.6074562934337298, + "eval_allNLI-dev_euclidean_f1": 0.6263498920086393, + "eval_allNLI-dev_euclidean_f1_threshold": 13.941089630126953, + "eval_allNLI-dev_euclidean_precision": 0.5, + "eval_allNLI-dev_euclidean_recall": 0.838150289017341, + "eval_allNLI-dev_manhattan_accuracy": 0.7265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 220.7136688232422, + "eval_allNLI-dev_manhattan_ap": 0.6048071745318456, + "eval_allNLI-dev_manhattan_f1": 0.6197802197802197, + "eval_allNLI-dev_manhattan_f1_threshold": 287.2364807128906, + "eval_allNLI-dev_manhattan_precision": 0.5, + "eval_allNLI-dev_manhattan_recall": 0.815028901734104, + "eval_allNLI-dev_max_accuracy": 0.73046875, + "eval_allNLI-dev_max_accuracy_threshold": 427.4722900390625, + "eval_allNLI-dev_max_ap": 0.6074562934337298, + "eval_allNLI-dev_max_f1": 0.6263498920086393, + "eval_allNLI-dev_max_f1_threshold": 381.553955078125, + "eval_allNLI-dev_max_precision": 0.5085470085470085, + "eval_allNLI-dev_max_recall": 0.838150289017341, + "eval_sequential_score": 0.7611068790205133, + "eval_sts-test_pearson_cosine": 0.8075672126172166, + "eval_sts-test_pearson_dot": 0.7866606214637348, + "eval_sts-test_pearson_euclidean": 0.843653409349988, + "eval_sts-test_pearson_manhattan": 0.8406546702228472, + "eval_sts-test_pearson_max": 0.843653409349988, + "eval_sts-test_spearman_cosine": 0.8475748836082914, + "eval_sts-test_spearman_dot": 0.7727337690825425, + "eval_sts-test_spearman_euclidean": 0.8438102732462625, + "eval_sts-test_spearman_manhattan": 0.8412408795734754, + "eval_sts-test_spearman_max": 0.8475748836082914, + "eval_vitaminc-pairs_loss": 3.1600735187530518, + "eval_vitaminc-pairs_runtime": 3.2213, + "eval_vitaminc-pairs_samples_per_second": 39.736, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 1060 + }, + { + "epoch": 1.0905349794238683, + "eval_negation-triplets_loss": 0.9576802253723145, + "eval_negation-triplets_runtime": 0.7718, + "eval_negation-triplets_samples_per_second": 165.854, + "eval_negation-triplets_steps_per_second": 1.296, + "step": 1060 + }, + { + "epoch": 1.0905349794238683, + "eval_scitail-pairs-pos_loss": 0.17336298525333405, + "eval_scitail-pairs-pos_runtime": 0.9032, + "eval_scitail-pairs-pos_samples_per_second": 141.717, + "eval_scitail-pairs-pos_steps_per_second": 1.107, + "step": 1060 + }, + { + "epoch": 1.0905349794238683, + "eval_scitail-pairs-qa_loss": 0.0017815841129049659, + "eval_scitail-pairs-qa_runtime": 0.5931, + "eval_scitail-pairs-qa_samples_per_second": 215.827, + "eval_scitail-pairs-qa_steps_per_second": 1.686, + "step": 1060 + }, + { + "epoch": 1.0905349794238683, + "eval_xsum-pairs_loss": 0.4659762680530548, + "eval_xsum-pairs_runtime": 3.0276, + "eval_xsum-pairs_samples_per_second": 42.278, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1060 + }, + { + "epoch": 1.0905349794238683, + "eval_sciq_pairs_loss": 0.11056140810251236, + "eval_sciq_pairs_runtime": 3.4934, + "eval_sciq_pairs_samples_per_second": 36.641, + "eval_sciq_pairs_steps_per_second": 0.286, + "step": 1060 + }, + { + "epoch": 1.0905349794238683, + "eval_qasc_pairs_loss": 0.31533095240592957, + "eval_qasc_pairs_runtime": 0.621, + "eval_qasc_pairs_samples_per_second": 206.114, + "eval_qasc_pairs_steps_per_second": 1.61, + "step": 1060 + }, + { + "epoch": 1.0905349794238683, + "eval_openbookqa_pairs_loss": 0.9580376148223877, + "eval_openbookqa_pairs_runtime": 0.5902, + "eval_openbookqa_pairs_samples_per_second": 216.867, + "eval_openbookqa_pairs_steps_per_second": 1.694, + "step": 1060 + }, + { + "epoch": 1.0905349794238683, + "eval_msmarco_pairs_loss": 0.9790497422218323, + "eval_msmarco_pairs_runtime": 1.5363, + "eval_msmarco_pairs_samples_per_second": 83.316, + "eval_msmarco_pairs_steps_per_second": 0.651, + "step": 1060 + }, + { + "epoch": 1.0905349794238683, + "eval_nq_pairs_loss": 1.0773346424102783, + "eval_nq_pairs_runtime": 2.9103, + "eval_nq_pairs_samples_per_second": 43.982, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1060 + }, + { + "epoch": 1.0905349794238683, + "eval_trivia_pairs_loss": 1.1339694261550903, + "eval_trivia_pairs_runtime": 3.4476, + "eval_trivia_pairs_samples_per_second": 37.128, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1060 + }, + { + "epoch": 1.0905349794238683, + "eval_gooaq_pairs_loss": 0.5519257187843323, + "eval_gooaq_pairs_runtime": 0.9565, + "eval_gooaq_pairs_samples_per_second": 133.816, + "eval_gooaq_pairs_steps_per_second": 1.045, + "step": 1060 + }, + { + "epoch": 1.0905349794238683, + "eval_paws-pos_loss": 0.023419398814439774, + "eval_paws-pos_runtime": 0.6933, + "eval_paws-pos_samples_per_second": 184.623, + "eval_paws-pos_steps_per_second": 1.442, + "step": 1060 + }, + { + "epoch": 1.0905349794238683, + "eval_global_dataset_loss": 0.5671343207359314, + "eval_global_dataset_runtime": 13.3988, + "eval_global_dataset_samples_per_second": 31.047, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1060 + }, + { + "epoch": 1.0915637860082303, + "grad_norm": 8.800882339477539, + "learning_rate": 3.49129240423773e-05, + "loss": 0.4485, + "step": 1061 + }, + { + "epoch": 1.0925925925925926, + "grad_norm": 8.650297164916992, + "learning_rate": 3.4911083560377576e-05, + "loss": 0.7556, + "step": 1062 + }, + { + "epoch": 1.0936213991769548, + "grad_norm": 1.9387682676315308, + "learning_rate": 3.4909223904968304e-05, + "loss": 0.0762, + "step": 1063 + }, + { + "epoch": 1.0946502057613168, + "grad_norm": 13.118877410888672, + "learning_rate": 3.490734507922918e-05, + "loss": 1.2212, + "step": 1064 + }, + { + "epoch": 1.095679012345679, + "grad_norm": 8.793366432189941, + "learning_rate": 3.490544708627165e-05, + "loss": 0.584, + "step": 1065 + }, + { + "epoch": 1.0967078189300412, + "grad_norm": 15.72206974029541, + "learning_rate": 3.490352992923889e-05, + "loss": 1.3773, + "step": 1066 + }, + { + "epoch": 1.0977366255144032, + "grad_norm": 8.144251823425293, + "learning_rate": 3.490159361130583e-05, + "loss": 0.4408, + "step": 1067 + }, + { + "epoch": 1.0987654320987654, + "grad_norm": 1.2064863443374634, + "learning_rate": 3.4899638135679124e-05, + "loss": 0.0213, + "step": 1068 + }, + { + "epoch": 1.0997942386831276, + "grad_norm": 7.808547496795654, + "learning_rate": 3.489766350559714e-05, + "loss": 0.3992, + "step": 1069 + }, + { + "epoch": 1.1008230452674896, + "grad_norm": 14.440876960754395, + "learning_rate": 3.489566972432997e-05, + "loss": 1.0667, + "step": 1070 + }, + { + "epoch": 1.1018518518518519, + "grad_norm": 1.1789801120758057, + "learning_rate": 3.4893656795179454e-05, + "loss": 0.0228, + "step": 1071 + }, + { + "epoch": 1.102880658436214, + "grad_norm": 11.723593711853027, + "learning_rate": 3.489162472147909e-05, + "loss": 0.8587, + "step": 1072 + }, + { + "epoch": 1.103909465020576, + "grad_norm": 10.564098358154297, + "learning_rate": 3.488957350659412e-05, + "loss": 0.7988, + "step": 1073 + }, + { + "epoch": 1.1049382716049383, + "grad_norm": 15.705960273742676, + "learning_rate": 3.488750315392148e-05, + "loss": 1.4636, + "step": 1074 + }, + { + "epoch": 1.1059670781893005, + "grad_norm": 14.437980651855469, + "learning_rate": 3.488541366688978e-05, + "loss": 1.381, + "step": 1075 + }, + { + "epoch": 1.1069958847736625, + "grad_norm": 1.2517554759979248, + "learning_rate": 3.488330504895932e-05, + "loss": 0.0192, + "step": 1076 + }, + { + "epoch": 1.1080246913580247, + "grad_norm": 9.632847785949707, + "learning_rate": 3.4881177303622115e-05, + "loss": 0.9076, + "step": 1077 + }, + { + "epoch": 1.1090534979423867, + "grad_norm": 12.476523399353027, + "learning_rate": 3.4879030434401814e-05, + "loss": 1.2966, + "step": 1078 + }, + { + "epoch": 1.110082304526749, + "grad_norm": 7.523418426513672, + "learning_rate": 3.487686444485375e-05, + "loss": 0.4105, + "step": 1079 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 9.408167839050293, + "learning_rate": 3.487467933856494e-05, + "loss": 0.7761, + "step": 1080 + }, + { + "epoch": 1.1111111111111112, + "eval_Qnli-dev_cosine_accuracy": 0.728515625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8194748759269714, + "eval_Qnli-dev_cosine_ap": 0.7717294481550442, + "eval_Qnli-dev_cosine_f1": 0.7117988394584139, + "eval_Qnli-dev_cosine_f1_threshold": 0.7828003764152527, + "eval_Qnli-dev_cosine_precision": 0.6548042704626335, + "eval_Qnli-dev_cosine_recall": 0.7796610169491526, + "eval_Qnli-dev_dot_accuracy": 0.671875, + "eval_Qnli-dev_dot_accuracy_threshold": 402.6671142578125, + "eval_Qnli-dev_dot_ap": 0.6996117845942682, + "eval_Qnli-dev_dot_f1": 0.6843971631205673, + "eval_Qnli-dev_dot_f1_threshold": 369.4736022949219, + "eval_Qnli-dev_dot_precision": 0.5884146341463414, + "eval_Qnli-dev_dot_recall": 0.8177966101694916, + "eval_Qnli-dev_euclidean_accuracy": 0.736328125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.2765531539917, + "eval_Qnli-dev_euclidean_ap": 0.7778804447820293, + "eval_Qnli-dev_euclidean_f1": 0.7123287671232877, + "eval_Qnli-dev_euclidean_f1_threshold": 14.59056282043457, + "eval_Qnli-dev_euclidean_precision": 0.6618181818181819, + "eval_Qnli-dev_euclidean_recall": 0.7711864406779662, + "eval_Qnli-dev_manhattan_accuracy": 0.73046875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 274.97943115234375, + "eval_Qnli-dev_manhattan_ap": 0.7807881360124016, + "eval_Qnli-dev_manhattan_f1": 0.7098039215686275, + "eval_Qnli-dev_manhattan_f1_threshold": 303.7630615234375, + "eval_Qnli-dev_manhattan_precision": 0.6605839416058394, + "eval_Qnli-dev_manhattan_recall": 0.7669491525423728, + "eval_Qnli-dev_max_accuracy": 0.736328125, + "eval_Qnli-dev_max_accuracy_threshold": 402.6671142578125, + "eval_Qnli-dev_max_ap": 0.7807881360124016, + "eval_Qnli-dev_max_f1": 0.7123287671232877, + "eval_Qnli-dev_max_f1_threshold": 369.4736022949219, + "eval_Qnli-dev_max_precision": 0.6618181818181819, + "eval_Qnli-dev_max_recall": 0.8177966101694916, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8717066049575806, + "eval_allNLI-dev_cosine_ap": 0.5998410683079696, + "eval_allNLI-dev_cosine_f1": 0.6223175965665236, + "eval_allNLI-dev_cosine_f1_threshold": 0.7907012104988098, + "eval_allNLI-dev_cosine_precision": 0.4948805460750853, + "eval_allNLI-dev_cosine_recall": 0.838150289017341, + "eval_allNLI-dev_dot_accuracy": 0.68359375, + "eval_allNLI-dev_dot_accuracy_threshold": 451.738525390625, + "eval_allNLI-dev_dot_ap": 0.5121578786664749, + "eval_allNLI-dev_dot_f1": 0.586433260393873, + "eval_allNLI-dev_dot_f1_threshold": 380.15142822265625, + "eval_allNLI-dev_dot_precision": 0.47183098591549294, + "eval_allNLI-dev_dot_recall": 0.7745664739884393, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.108678817749023, + "eval_allNLI-dev_euclidean_ap": 0.6051852582402737, + "eval_allNLI-dev_euclidean_f1": 0.6255506607929515, + "eval_allNLI-dev_euclidean_f1_threshold": 13.981431007385254, + "eval_allNLI-dev_euclidean_precision": 0.505338078291815, + "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, + "eval_allNLI-dev_manhattan_accuracy": 0.7265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 238.26559448242188, + "eval_allNLI-dev_manhattan_ap": 0.6026811141388402, + "eval_allNLI-dev_manhattan_f1": 0.6221198156682027, + "eval_allNLI-dev_manhattan_f1_threshold": 281.940185546875, + "eval_allNLI-dev_manhattan_precision": 0.5172413793103449, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 451.738525390625, + "eval_allNLI-dev_max_ap": 0.6051852582402737, + "eval_allNLI-dev_max_f1": 0.6255506607929515, + "eval_allNLI-dev_max_f1_threshold": 380.15142822265625, + "eval_allNLI-dev_max_precision": 0.5172413793103449, + "eval_allNLI-dev_max_recall": 0.838150289017341, + "eval_sequential_score": 0.7807881360124016, + "eval_sts-test_pearson_cosine": 0.8147331088110416, + "eval_sts-test_pearson_dot": 0.7837159177071007, + "eval_sts-test_pearson_euclidean": 0.8484296507954121, + "eval_sts-test_pearson_manhattan": 0.8460797493325178, + "eval_sts-test_pearson_max": 0.8484296507954121, + "eval_sts-test_spearman_cosine": 0.8464454065220337, + "eval_sts-test_spearman_dot": 0.7560551082157521, + "eval_sts-test_spearman_euclidean": 0.845518965908709, + "eval_sts-test_spearman_manhattan": 0.842875732654626, + "eval_sts-test_spearman_max": 0.8464454065220337, + "eval_vitaminc-pairs_loss": 3.086657762527466, + "eval_vitaminc-pairs_runtime": 3.224, + "eval_vitaminc-pairs_samples_per_second": 39.702, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 1080 + }, + { + "epoch": 1.1111111111111112, + "eval_negation-triplets_loss": 0.9766063690185547, + "eval_negation-triplets_runtime": 0.7482, + "eval_negation-triplets_samples_per_second": 171.078, + "eval_negation-triplets_steps_per_second": 1.337, + "step": 1080 + }, + { + "epoch": 1.1111111111111112, + "eval_scitail-pairs-pos_loss": 0.1879463493824005, + "eval_scitail-pairs-pos_runtime": 0.865, + "eval_scitail-pairs-pos_samples_per_second": 147.976, + "eval_scitail-pairs-pos_steps_per_second": 1.156, + "step": 1080 + }, + { + "epoch": 1.1111111111111112, + "eval_scitail-pairs-qa_loss": 0.0011153208324685693, + "eval_scitail-pairs-qa_runtime": 0.5942, + "eval_scitail-pairs-qa_samples_per_second": 215.424, + "eval_scitail-pairs-qa_steps_per_second": 1.683, + "step": 1080 + }, + { + "epoch": 1.1111111111111112, + "eval_xsum-pairs_loss": 0.510924756526947, + "eval_xsum-pairs_runtime": 3.0274, + "eval_xsum-pairs_samples_per_second": 42.28, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1080 + }, + { + "epoch": 1.1111111111111112, + "eval_sciq_pairs_loss": 0.10803800821304321, + "eval_sciq_pairs_runtime": 3.4741, + "eval_sciq_pairs_samples_per_second": 36.844, + "eval_sciq_pairs_steps_per_second": 0.288, + "step": 1080 + }, + { + "epoch": 1.1111111111111112, + "eval_qasc_pairs_loss": 0.3036348223686218, + "eval_qasc_pairs_runtime": 0.611, + "eval_qasc_pairs_samples_per_second": 209.496, + "eval_qasc_pairs_steps_per_second": 1.637, + "step": 1080 + }, + { + "epoch": 1.1111111111111112, + "eval_openbookqa_pairs_loss": 0.962807297706604, + "eval_openbookqa_pairs_runtime": 0.5915, + "eval_openbookqa_pairs_samples_per_second": 216.396, + "eval_openbookqa_pairs_steps_per_second": 1.691, + "step": 1080 + }, + { + "epoch": 1.1111111111111112, + "eval_msmarco_pairs_loss": 1.006622076034546, + "eval_msmarco_pairs_runtime": 1.5255, + "eval_msmarco_pairs_samples_per_second": 83.906, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 1080 + }, + { + "epoch": 1.1111111111111112, + "eval_nq_pairs_loss": 1.0572720766067505, + "eval_nq_pairs_runtime": 2.8974, + "eval_nq_pairs_samples_per_second": 44.177, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1080 + }, + { + "epoch": 1.1111111111111112, + "eval_trivia_pairs_loss": 1.1323765516281128, + "eval_trivia_pairs_runtime": 3.4371, + "eval_trivia_pairs_samples_per_second": 37.241, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1080 + }, + { + "epoch": 1.1111111111111112, + "eval_gooaq_pairs_loss": 0.5294322371482849, + "eval_gooaq_pairs_runtime": 0.955, + "eval_gooaq_pairs_samples_per_second": 134.034, + "eval_gooaq_pairs_steps_per_second": 1.047, + "step": 1080 + }, + { + "epoch": 1.1111111111111112, + "eval_paws-pos_loss": 0.023268481716513634, + "eval_paws-pos_runtime": 0.6979, + "eval_paws-pos_samples_per_second": 183.41, + "eval_paws-pos_steps_per_second": 1.433, + "step": 1080 + }, + { + "epoch": 1.1111111111111112, + "eval_global_dataset_loss": 0.5384740233421326, + "eval_global_dataset_runtime": 13.4027, + "eval_global_dataset_samples_per_second": 31.039, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1080 + }, + { + "epoch": 1.1121399176954732, + "grad_norm": 7.842824935913086, + "learning_rate": 3.4872475119154036e-05, + "loss": 0.5059, + "step": 1081 + }, + { + "epoch": 1.1131687242798354, + "grad_norm": 6.858529090881348, + "learning_rate": 3.487025179027135e-05, + "loss": 0.3282, + "step": 1082 + }, + { + "epoch": 1.1141975308641976, + "grad_norm": 13.405868530273438, + "learning_rate": 3.4868009355598834e-05, + "loss": 1.224, + "step": 1083 + }, + { + "epoch": 1.1152263374485596, + "grad_norm": 7.0010175704956055, + "learning_rate": 3.4865747818850104e-05, + "loss": 0.4268, + "step": 1084 + }, + { + "epoch": 1.1162551440329218, + "grad_norm": 6.69268798828125, + "learning_rate": 3.4863467183770377e-05, + "loss": 0.4377, + "step": 1085 + }, + { + "epoch": 1.117283950617284, + "grad_norm": 8.589255332946777, + "learning_rate": 3.486116745413652e-05, + "loss": 0.4703, + "step": 1086 + }, + { + "epoch": 1.118312757201646, + "grad_norm": 6.45457124710083, + "learning_rate": 3.4858848633757014e-05, + "loss": 0.3646, + "step": 1087 + }, + { + "epoch": 1.1193415637860082, + "grad_norm": 7.386177062988281, + "learning_rate": 3.485651072647195e-05, + "loss": 0.4196, + "step": 1088 + }, + { + "epoch": 1.1203703703703705, + "grad_norm": 10.973357200622559, + "learning_rate": 3.485415373615305e-05, + "loss": 0.835, + "step": 1089 + }, + { + "epoch": 1.1213991769547325, + "grad_norm": 6.898292064666748, + "learning_rate": 3.485177766670361e-05, + "loss": 0.3469, + "step": 1090 + }, + { + "epoch": 1.1224279835390947, + "grad_norm": 6.510031700134277, + "learning_rate": 3.484938252205855e-05, + "loss": 0.3437, + "step": 1091 + }, + { + "epoch": 1.123456790123457, + "grad_norm": 8.573420524597168, + "learning_rate": 3.4846968306184344e-05, + "loss": 0.4888, + "step": 1092 + }, + { + "epoch": 1.124485596707819, + "grad_norm": 8.766611099243164, + "learning_rate": 3.484453502307909e-05, + "loss": 0.8271, + "step": 1093 + }, + { + "epoch": 1.125514403292181, + "grad_norm": 10.891227722167969, + "learning_rate": 3.484208267677243e-05, + "loss": 0.816, + "step": 1094 + }, + { + "epoch": 1.126543209876543, + "grad_norm": 10.850860595703125, + "learning_rate": 3.483961127132559e-05, + "loss": 0.7494, + "step": 1095 + }, + { + "epoch": 1.1275720164609053, + "grad_norm": 8.748696327209473, + "learning_rate": 3.483712081083137e-05, + "loss": 0.491, + "step": 1096 + }, + { + "epoch": 1.1286008230452675, + "grad_norm": 8.8179292678833, + "learning_rate": 3.4834611299414096e-05, + "loss": 0.5567, + "step": 1097 + }, + { + "epoch": 1.1296296296296295, + "grad_norm": 8.042120933532715, + "learning_rate": 3.483208274122967e-05, + "loss": 0.3713, + "step": 1098 + }, + { + "epoch": 1.1306584362139918, + "grad_norm": 7.089449882507324, + "learning_rate": 3.4829535140465525e-05, + "loss": 0.2869, + "step": 1099 + }, + { + "epoch": 1.131687242798354, + "grad_norm": 13.324177742004395, + "learning_rate": 3.482696850134063e-05, + "loss": 1.1908, + "step": 1100 + }, + { + "epoch": 1.131687242798354, + "eval_Qnli-dev_cosine_accuracy": 0.708984375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7852716445922852, + "eval_Qnli-dev_cosine_ap": 0.7579178149830059, + "eval_Qnli-dev_cosine_f1": 0.7042801556420233, + "eval_Qnli-dev_cosine_f1_threshold": 0.7638626098632812, + "eval_Qnli-dev_cosine_precision": 0.6510791366906474, + "eval_Qnli-dev_cosine_recall": 0.7669491525423728, + "eval_Qnli-dev_dot_accuracy": 0.66015625, + "eval_Qnli-dev_dot_accuracy_threshold": 362.07611083984375, + "eval_Qnli-dev_dot_ap": 0.6762055307597052, + "eval_Qnli-dev_dot_f1": 0.6748091603053434, + "eval_Qnli-dev_dot_f1_threshold": 307.3738098144531, + "eval_Qnli-dev_dot_precision": 0.5274463007159904, + "eval_Qnli-dev_dot_recall": 0.9364406779661016, + "eval_Qnli-dev_euclidean_accuracy": 0.720703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.991537094116211, + "eval_Qnli-dev_euclidean_ap": 0.7688638848850526, + "eval_Qnli-dev_euclidean_f1": 0.7120622568093385, + "eval_Qnli-dev_euclidean_f1_threshold": 15.058847427368164, + "eval_Qnli-dev_euclidean_precision": 0.658273381294964, + "eval_Qnli-dev_euclidean_recall": 0.7754237288135594, + "eval_Qnli-dev_manhattan_accuracy": 0.71875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 274.3743896484375, + "eval_Qnli-dev_manhattan_ap": 0.7729072014366346, + "eval_Qnli-dev_manhattan_f1": 0.7126865671641792, + "eval_Qnli-dev_manhattan_f1_threshold": 321.27783203125, + "eval_Qnli-dev_manhattan_precision": 0.6366666666666667, + "eval_Qnli-dev_manhattan_recall": 0.809322033898305, + "eval_Qnli-dev_max_accuracy": 0.720703125, + "eval_Qnli-dev_max_accuracy_threshold": 362.07611083984375, + "eval_Qnli-dev_max_ap": 0.7729072014366346, + "eval_Qnli-dev_max_f1": 0.7126865671641792, + "eval_Qnli-dev_max_f1_threshold": 321.27783203125, + "eval_Qnli-dev_max_precision": 0.658273381294964, + "eval_Qnli-dev_max_recall": 0.9364406779661016, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8397592306137085, + "eval_allNLI-dev_cosine_ap": 0.6164056434044329, + "eval_allNLI-dev_cosine_f1": 0.6271604938271605, + "eval_allNLI-dev_cosine_f1_threshold": 0.7978222370147705, + "eval_allNLI-dev_cosine_precision": 0.5474137931034483, + "eval_allNLI-dev_cosine_recall": 0.7341040462427746, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 391.6920471191406, + "eval_allNLI-dev_dot_ap": 0.539862255241969, + "eval_allNLI-dev_dot_f1": 0.6, + "eval_allNLI-dev_dot_f1_threshold": 339.165771484375, + "eval_allNLI-dev_dot_precision": 0.46905537459283386, + "eval_allNLI-dev_dot_recall": 0.8323699421965318, + "eval_allNLI-dev_euclidean_accuracy": 0.740234375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.107568740844727, + "eval_allNLI-dev_euclidean_ap": 0.6213908286083798, + "eval_allNLI-dev_euclidean_f1": 0.6368159203980099, + "eval_allNLI-dev_euclidean_f1_threshold": 13.490726470947266, + "eval_allNLI-dev_euclidean_precision": 0.5589519650655022, + "eval_allNLI-dev_euclidean_recall": 0.7398843930635838, + "eval_allNLI-dev_manhattan_accuracy": 0.734375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 244.9213409423828, + "eval_allNLI-dev_manhattan_ap": 0.6189091075630544, + "eval_allNLI-dev_manhattan_f1": 0.6305882352941177, + "eval_allNLI-dev_manhattan_f1_threshold": 288.1800231933594, + "eval_allNLI-dev_manhattan_precision": 0.5317460317460317, + "eval_allNLI-dev_manhattan_recall": 0.7745664739884393, + "eval_allNLI-dev_max_accuracy": 0.740234375, + "eval_allNLI-dev_max_accuracy_threshold": 391.6920471191406, + "eval_allNLI-dev_max_ap": 0.6213908286083798, + "eval_allNLI-dev_max_f1": 0.6368159203980099, + "eval_allNLI-dev_max_f1_threshold": 339.165771484375, + "eval_allNLI-dev_max_precision": 0.5589519650655022, + "eval_allNLI-dev_max_recall": 0.8323699421965318, + "eval_sequential_score": 0.7729072014366346, + "eval_sts-test_pearson_cosine": 0.819789322785147, + "eval_sts-test_pearson_dot": 0.8020751242857842, + "eval_sts-test_pearson_euclidean": 0.850970032232464, + "eval_sts-test_pearson_manhattan": 0.848322515154853, + "eval_sts-test_pearson_max": 0.850970032232464, + "eval_sts-test_spearman_cosine": 0.8521221935320554, + "eval_sts-test_spearman_dot": 0.7862019297058526, + "eval_sts-test_spearman_euclidean": 0.849302013790431, + "eval_sts-test_spearman_manhattan": 0.8471720327532458, + "eval_sts-test_spearman_max": 0.8521221935320554, + "eval_vitaminc-pairs_loss": 3.150930404663086, + "eval_vitaminc-pairs_runtime": 3.1937, + "eval_vitaminc-pairs_samples_per_second": 40.078, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 1100 + }, + { + "epoch": 1.131687242798354, + "eval_negation-triplets_loss": 1.010820746421814, + "eval_negation-triplets_runtime": 0.748, + "eval_negation-triplets_samples_per_second": 171.12, + "eval_negation-triplets_steps_per_second": 1.337, + "step": 1100 + }, + { + "epoch": 1.131687242798354, + "eval_scitail-pairs-pos_loss": 0.1789112091064453, + "eval_scitail-pairs-pos_runtime": 0.9574, + "eval_scitail-pairs-pos_samples_per_second": 133.702, + "eval_scitail-pairs-pos_steps_per_second": 1.045, + "step": 1100 + }, + { + "epoch": 1.131687242798354, + "eval_scitail-pairs-qa_loss": 0.0007675637607462704, + "eval_scitail-pairs-qa_runtime": 0.6066, + "eval_scitail-pairs-qa_samples_per_second": 211.01, + "eval_scitail-pairs-qa_steps_per_second": 1.649, + "step": 1100 + }, + { + "epoch": 1.131687242798354, + "eval_xsum-pairs_loss": 0.4392455816268921, + "eval_xsum-pairs_runtime": 3.0225, + "eval_xsum-pairs_samples_per_second": 42.349, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 1100 + }, + { + "epoch": 1.131687242798354, + "eval_sciq_pairs_loss": 0.10065167397260666, + "eval_sciq_pairs_runtime": 3.4682, + "eval_sciq_pairs_samples_per_second": 36.907, + "eval_sciq_pairs_steps_per_second": 0.288, + "step": 1100 + }, + { + "epoch": 1.131687242798354, + "eval_qasc_pairs_loss": 0.2483232170343399, + "eval_qasc_pairs_runtime": 0.6184, + "eval_qasc_pairs_samples_per_second": 206.995, + "eval_qasc_pairs_steps_per_second": 1.617, + "step": 1100 + }, + { + "epoch": 1.131687242798354, + "eval_openbookqa_pairs_loss": 0.9837387800216675, + "eval_openbookqa_pairs_runtime": 0.5881, + "eval_openbookqa_pairs_samples_per_second": 217.644, + "eval_openbookqa_pairs_steps_per_second": 1.7, + "step": 1100 + }, + { + "epoch": 1.131687242798354, + "eval_msmarco_pairs_loss": 0.9828016757965088, + "eval_msmarco_pairs_runtime": 1.5194, + "eval_msmarco_pairs_samples_per_second": 84.246, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 1100 + }, + { + "epoch": 1.131687242798354, + "eval_nq_pairs_loss": 0.9649257063865662, + "eval_nq_pairs_runtime": 2.9079, + "eval_nq_pairs_samples_per_second": 44.018, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1100 + }, + { + "epoch": 1.131687242798354, + "eval_trivia_pairs_loss": 1.0761849880218506, + "eval_trivia_pairs_runtime": 3.4499, + "eval_trivia_pairs_samples_per_second": 37.103, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1100 + }, + { + "epoch": 1.131687242798354, + "eval_gooaq_pairs_loss": 0.5363913178443909, + "eval_gooaq_pairs_runtime": 0.9481, + "eval_gooaq_pairs_samples_per_second": 135.008, + "eval_gooaq_pairs_steps_per_second": 1.055, + "step": 1100 + }, + { + "epoch": 1.131687242798354, + "eval_paws-pos_loss": 0.024601487442851067, + "eval_paws-pos_runtime": 0.7005, + "eval_paws-pos_samples_per_second": 182.735, + "eval_paws-pos_steps_per_second": 1.428, + "step": 1100 + }, + { + "epoch": 1.131687242798354, + "eval_global_dataset_loss": 0.5056115388870239, + "eval_global_dataset_runtime": 13.4042, + "eval_global_dataset_samples_per_second": 31.035, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1100 + }, + { + "epoch": 1.132716049382716, + "grad_norm": 8.598079681396484, + "learning_rate": 3.482438282810549e-05, + "loss": 0.5443, + "step": 1101 + }, + { + "epoch": 1.1337448559670782, + "grad_norm": 9.027626037597656, + "learning_rate": 3.482177812504212e-05, + "loss": 0.4682, + "step": 1102 + }, + { + "epoch": 1.1347736625514404, + "grad_norm": 7.5870866775512695, + "learning_rate": 3.4819154396464065e-05, + "loss": 0.431, + "step": 1103 + }, + { + "epoch": 1.1358024691358024, + "grad_norm": 13.918652534484863, + "learning_rate": 3.4816511646716355e-05, + "loss": 1.0068, + "step": 1104 + }, + { + "epoch": 1.1368312757201646, + "grad_norm": 11.70114803314209, + "learning_rate": 3.481384988017555e-05, + "loss": 1.0226, + "step": 1105 + }, + { + "epoch": 1.1378600823045268, + "grad_norm": 7.77272367477417, + "learning_rate": 3.481116910124967e-05, + "loss": 0.3054, + "step": 1106 + }, + { + "epoch": 1.1388888888888888, + "grad_norm": 0.5209324955940247, + "learning_rate": 3.480846931437824e-05, + "loss": 0.0094, + "step": 1107 + }, + { + "epoch": 1.139917695473251, + "grad_norm": 8.121362686157227, + "learning_rate": 3.480575052403227e-05, + "loss": 0.43, + "step": 1108 + }, + { + "epoch": 1.1409465020576133, + "grad_norm": 13.162841796875, + "learning_rate": 3.480301273471422e-05, + "loss": 1.3944, + "step": 1109 + }, + { + "epoch": 1.1419753086419753, + "grad_norm": 1.2725361585617065, + "learning_rate": 3.480025595095803e-05, + "loss": 0.064, + "step": 1110 + }, + { + "epoch": 1.1430041152263375, + "grad_norm": 6.89387845993042, + "learning_rate": 3.4797480177329084e-05, + "loss": 0.3928, + "step": 1111 + }, + { + "epoch": 1.1440329218106995, + "grad_norm": 6.913538455963135, + "learning_rate": 3.4794685418424226e-05, + "loss": 0.3311, + "step": 1112 + }, + { + "epoch": 1.1450617283950617, + "grad_norm": 10.783838272094727, + "learning_rate": 3.479187167887173e-05, + "loss": 1.11, + "step": 1113 + }, + { + "epoch": 1.146090534979424, + "grad_norm": 16.7984676361084, + "learning_rate": 3.4789038963331315e-05, + "loss": 1.585, + "step": 1114 + }, + { + "epoch": 1.147119341563786, + "grad_norm": 9.383954048156738, + "learning_rate": 3.478618727649411e-05, + "loss": 0.684, + "step": 1115 + }, + { + "epoch": 1.1481481481481481, + "grad_norm": 12.388830184936523, + "learning_rate": 3.4783316623082674e-05, + "loss": 1.2634, + "step": 1116 + }, + { + "epoch": 1.1491769547325104, + "grad_norm": 10.167396545410156, + "learning_rate": 3.478042700785097e-05, + "loss": 0.5913, + "step": 1117 + }, + { + "epoch": 1.1502057613168724, + "grad_norm": 6.721423149108887, + "learning_rate": 3.477751843558437e-05, + "loss": 0.3623, + "step": 1118 + }, + { + "epoch": 1.1512345679012346, + "grad_norm": 6.884538173675537, + "learning_rate": 3.477459091109962e-05, + "loss": 0.4191, + "step": 1119 + }, + { + "epoch": 1.1522633744855968, + "grad_norm": 6.610477924346924, + "learning_rate": 3.4771644439244884e-05, + "loss": 0.2817, + "step": 1120 + }, + { + "epoch": 1.1522633744855968, + "eval_Qnli-dev_cosine_accuracy": 0.697265625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8267828822135925, + "eval_Qnli-dev_cosine_ap": 0.7544823712371981, + "eval_Qnli-dev_cosine_f1": 0.7014388489208634, + "eval_Qnli-dev_cosine_f1_threshold": 0.7697343230247498, + "eval_Qnli-dev_cosine_precision": 0.609375, + "eval_Qnli-dev_cosine_recall": 0.826271186440678, + "eval_Qnli-dev_dot_accuracy": 0.658203125, + "eval_Qnli-dev_dot_accuracy_threshold": 383.5416259765625, + "eval_Qnli-dev_dot_ap": 0.684055735450432, + "eval_Qnli-dev_dot_f1": 0.6751361161524502, + "eval_Qnli-dev_dot_f1_threshold": 378.13201904296875, + "eval_Qnli-dev_dot_precision": 0.5904761904761905, + "eval_Qnli-dev_dot_recall": 0.788135593220339, + "eval_Qnli-dev_euclidean_accuracy": 0.71875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.577463150024414, + "eval_Qnli-dev_euclidean_ap": 0.762148579885024, + "eval_Qnli-dev_euclidean_f1": 0.6947368421052632, + "eval_Qnli-dev_euclidean_f1_threshold": 15.448970794677734, + "eval_Qnli-dev_euclidean_precision": 0.592814371257485, + "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, + "eval_Qnli-dev_manhattan_accuracy": 0.720703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 270.780517578125, + "eval_Qnli-dev_manhattan_ap": 0.76574766189019, + "eval_Qnli-dev_manhattan_f1": 0.7012987012987013, + "eval_Qnli-dev_manhattan_f1_threshold": 308.033203125, + "eval_Qnli-dev_manhattan_precision": 0.6237623762376238, + "eval_Qnli-dev_manhattan_recall": 0.8008474576271186, + "eval_Qnli-dev_max_accuracy": 0.720703125, + "eval_Qnli-dev_max_accuracy_threshold": 383.5416259765625, + "eval_Qnli-dev_max_ap": 0.76574766189019, + "eval_Qnli-dev_max_f1": 0.7014388489208634, + "eval_Qnli-dev_max_f1_threshold": 378.13201904296875, + "eval_Qnli-dev_max_precision": 0.6237623762376238, + "eval_Qnli-dev_max_recall": 0.8389830508474576, + "eval_allNLI-dev_cosine_accuracy": 0.720703125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.894988477230072, + "eval_allNLI-dev_cosine_ap": 0.6092571598538613, + "eval_allNLI-dev_cosine_f1": 0.6256157635467982, + "eval_allNLI-dev_cosine_f1_threshold": 0.8311195373535156, + "eval_allNLI-dev_cosine_precision": 0.5450643776824035, + "eval_allNLI-dev_cosine_recall": 0.7341040462427746, + "eval_allNLI-dev_dot_accuracy": 0.69140625, + "eval_allNLI-dev_dot_accuracy_threshold": 468.3143310546875, + "eval_allNLI-dev_dot_ap": 0.5304357149076337, + "eval_allNLI-dev_dot_f1": 0.5925925925925927, + "eval_allNLI-dev_dot_f1_threshold": 394.60333251953125, + "eval_allNLI-dev_dot_precision": 0.4755244755244755, + "eval_allNLI-dev_dot_recall": 0.7861271676300579, + "eval_allNLI-dev_euclidean_accuracy": 0.732421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.653585433959961, + "eval_allNLI-dev_euclidean_ap": 0.6154891058640852, + "eval_allNLI-dev_euclidean_f1": 0.6350000000000001, + "eval_allNLI-dev_euclidean_f1_threshold": 12.820733070373535, + "eval_allNLI-dev_euclidean_precision": 0.5594713656387665, + "eval_allNLI-dev_euclidean_recall": 0.7341040462427746, + "eval_allNLI-dev_manhattan_accuracy": 0.724609375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 224.76516723632812, + "eval_allNLI-dev_manhattan_ap": 0.6117858125138088, + "eval_allNLI-dev_manhattan_f1": 0.6413301662707839, + "eval_allNLI-dev_manhattan_f1_threshold": 272.82696533203125, + "eval_allNLI-dev_manhattan_precision": 0.5443548387096774, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.732421875, + "eval_allNLI-dev_max_accuracy_threshold": 468.3143310546875, + "eval_allNLI-dev_max_ap": 0.6154891058640852, + "eval_allNLI-dev_max_f1": 0.6413301662707839, + "eval_allNLI-dev_max_f1_threshold": 394.60333251953125, + "eval_allNLI-dev_max_precision": 0.5594713656387665, + "eval_allNLI-dev_max_recall": 0.7861271676300579, + "eval_sequential_score": 0.76574766189019, + "eval_sts-test_pearson_cosine": 0.8152326265483406, + "eval_sts-test_pearson_dot": 0.7716218165990644, + "eval_sts-test_pearson_euclidean": 0.8506633646999144, + "eval_sts-test_pearson_manhattan": 0.8464190305111586, + "eval_sts-test_pearson_max": 0.8506633646999144, + "eval_sts-test_spearman_cosine": 0.8489454804909258, + "eval_sts-test_spearman_dot": 0.7555045334468536, + "eval_sts-test_spearman_euclidean": 0.8484793961035343, + "eval_sts-test_spearman_manhattan": 0.8435001470048201, + "eval_sts-test_spearman_max": 0.8489454804909258, + "eval_vitaminc-pairs_loss": 2.7898471355438232, + "eval_vitaminc-pairs_runtime": 3.2051, + "eval_vitaminc-pairs_samples_per_second": 39.937, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 1120 + }, + { + "epoch": 1.1522633744855968, + "eval_negation-triplets_loss": 0.9653336405754089, + "eval_negation-triplets_runtime": 0.7819, + "eval_negation-triplets_samples_per_second": 163.71, + "eval_negation-triplets_steps_per_second": 1.279, + "step": 1120 + }, + { + "epoch": 1.1522633744855968, + "eval_scitail-pairs-pos_loss": 0.18886801600456238, + "eval_scitail-pairs-pos_runtime": 0.8649, + "eval_scitail-pairs-pos_samples_per_second": 147.987, + "eval_scitail-pairs-pos_steps_per_second": 1.156, + "step": 1120 + }, + { + "epoch": 1.1522633744855968, + "eval_scitail-pairs-qa_loss": 0.0019331619841977954, + "eval_scitail-pairs-qa_runtime": 0.5969, + "eval_scitail-pairs-qa_samples_per_second": 214.449, + "eval_scitail-pairs-qa_steps_per_second": 1.675, + "step": 1120 + }, + { + "epoch": 1.1522633744855968, + "eval_xsum-pairs_loss": 0.39326396584510803, + "eval_xsum-pairs_runtime": 3.0216, + "eval_xsum-pairs_samples_per_second": 42.362, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 1120 + }, + { + "epoch": 1.1522633744855968, + "eval_sciq_pairs_loss": 0.09701427817344666, + "eval_sciq_pairs_runtime": 3.4815, + "eval_sciq_pairs_samples_per_second": 36.766, + "eval_sciq_pairs_steps_per_second": 0.287, + "step": 1120 + }, + { + "epoch": 1.1522633744855968, + "eval_qasc_pairs_loss": 0.2738649249076843, + "eval_qasc_pairs_runtime": 0.6237, + "eval_qasc_pairs_samples_per_second": 205.243, + "eval_qasc_pairs_steps_per_second": 1.603, + "step": 1120 + }, + { + "epoch": 1.1522633744855968, + "eval_openbookqa_pairs_loss": 1.004379391670227, + "eval_openbookqa_pairs_runtime": 0.6019, + "eval_openbookqa_pairs_samples_per_second": 212.652, + "eval_openbookqa_pairs_steps_per_second": 1.661, + "step": 1120 + }, + { + "epoch": 1.1522633744855968, + "eval_msmarco_pairs_loss": 0.8981168866157532, + "eval_msmarco_pairs_runtime": 1.5342, + "eval_msmarco_pairs_samples_per_second": 83.432, + "eval_msmarco_pairs_steps_per_second": 0.652, + "step": 1120 + }, + { + "epoch": 1.1522633744855968, + "eval_nq_pairs_loss": 0.8773314952850342, + "eval_nq_pairs_runtime": 2.9229, + "eval_nq_pairs_samples_per_second": 43.792, + "eval_nq_pairs_steps_per_second": 0.342, + "step": 1120 + }, + { + "epoch": 1.1522633744855968, + "eval_trivia_pairs_loss": 0.9639384746551514, + "eval_trivia_pairs_runtime": 3.4419, + "eval_trivia_pairs_samples_per_second": 37.189, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1120 + }, + { + "epoch": 1.1522633744855968, + "eval_gooaq_pairs_loss": 0.46417081356048584, + "eval_gooaq_pairs_runtime": 0.9594, + "eval_gooaq_pairs_samples_per_second": 133.412, + "eval_gooaq_pairs_steps_per_second": 1.042, + "step": 1120 + }, + { + "epoch": 1.1522633744855968, + "eval_paws-pos_loss": 0.023940226063132286, + "eval_paws-pos_runtime": 0.6937, + "eval_paws-pos_samples_per_second": 184.508, + "eval_paws-pos_steps_per_second": 1.441, + "step": 1120 + }, + { + "epoch": 1.1522633744855968, + "eval_global_dataset_loss": 0.44758284091949463, + "eval_global_dataset_runtime": 13.4266, + "eval_global_dataset_samples_per_second": 30.983, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1120 + }, + { + "epoch": 1.1532921810699588, + "grad_norm": 6.409517765045166, + "learning_rate": 3.476867902489967e-05, + "loss": 0.5464, + "step": 1121 + }, + { + "epoch": 1.154320987654321, + "grad_norm": 0.6689707636833191, + "learning_rate": 3.476569467297489e-05, + "loss": 0.0391, + "step": 1122 + }, + { + "epoch": 1.155349794238683, + "grad_norm": 8.022232055664062, + "learning_rate": 3.4762691388412786e-05, + "loss": 0.4436, + "step": 1123 + }, + { + "epoch": 1.1563786008230452, + "grad_norm": 7.272008895874023, + "learning_rate": 3.475966917618697e-05, + "loss": 0.3815, + "step": 1124 + }, + { + "epoch": 1.1574074074074074, + "grad_norm": 7.061352252960205, + "learning_rate": 3.47566280413024e-05, + "loss": 0.3715, + "step": 1125 + }, + { + "epoch": 1.1584362139917697, + "grad_norm": 11.564949035644531, + "learning_rate": 3.475356798879536e-05, + "loss": 0.7445, + "step": 1126 + }, + { + "epoch": 1.1594650205761317, + "grad_norm": 8.554880142211914, + "learning_rate": 3.475048902373347e-05, + "loss": 0.5149, + "step": 1127 + }, + { + "epoch": 1.1604938271604939, + "grad_norm": 8.931382179260254, + "learning_rate": 3.474739115121569e-05, + "loss": 0.5129, + "step": 1128 + }, + { + "epoch": 1.1615226337448559, + "grad_norm": 8.221881866455078, + "learning_rate": 3.474427437637224e-05, + "loss": 0.6473, + "step": 1129 + }, + { + "epoch": 1.162551440329218, + "grad_norm": 7.380521297454834, + "learning_rate": 3.4741138704364704e-05, + "loss": 0.362, + "step": 1130 + }, + { + "epoch": 1.1635802469135803, + "grad_norm": 13.867860794067383, + "learning_rate": 3.4737984140385914e-05, + "loss": 1.4168, + "step": 1131 + }, + { + "epoch": 1.1646090534979423, + "grad_norm": 15.745050430297852, + "learning_rate": 3.4734810689660025e-05, + "loss": 2.5522, + "step": 1132 + }, + { + "epoch": 1.1656378600823045, + "grad_norm": 8.687469482421875, + "learning_rate": 3.473161835744243e-05, + "loss": 0.8093, + "step": 1133 + }, + { + "epoch": 1.1666666666666667, + "grad_norm": 10.833587646484375, + "learning_rate": 3.4728407149019825e-05, + "loss": 0.9214, + "step": 1134 + }, + { + "epoch": 1.1676954732510287, + "grad_norm": 6.623847484588623, + "learning_rate": 3.472517706971015e-05, + "loss": 0.4376, + "step": 1135 + }, + { + "epoch": 1.168724279835391, + "grad_norm": 5.630647659301758, + "learning_rate": 3.47219281248626e-05, + "loss": 0.2709, + "step": 1136 + }, + { + "epoch": 1.1697530864197532, + "grad_norm": 10.486451148986816, + "learning_rate": 3.47186603198576e-05, + "loss": 0.9257, + "step": 1137 + }, + { + "epoch": 1.1707818930041152, + "grad_norm": 6.501081943511963, + "learning_rate": 3.471537366010684e-05, + "loss": 0.4183, + "step": 1138 + }, + { + "epoch": 1.1718106995884774, + "grad_norm": 9.043843269348145, + "learning_rate": 3.4712068151053196e-05, + "loss": 0.7341, + "step": 1139 + }, + { + "epoch": 1.1728395061728394, + "grad_norm": 7.681128978729248, + "learning_rate": 3.4708743798170794e-05, + "loss": 0.9119, + "step": 1140 + }, + { + "epoch": 1.1728395061728394, + "eval_Qnli-dev_cosine_accuracy": 0.724609375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8039048910140991, + "eval_Qnli-dev_cosine_ap": 0.7787459564025726, + "eval_Qnli-dev_cosine_f1": 0.7116564417177914, + "eval_Qnli-dev_cosine_f1_threshold": 0.8039048910140991, + "eval_Qnli-dev_cosine_precision": 0.6877470355731226, + "eval_Qnli-dev_cosine_recall": 0.7372881355932204, + "eval_Qnli-dev_dot_accuracy": 0.66796875, + "eval_Qnli-dev_dot_accuracy_threshold": 403.3175964355469, + "eval_Qnli-dev_dot_ap": 0.7102945682759505, + "eval_Qnli-dev_dot_f1": 0.6881720430107527, + "eval_Qnli-dev_dot_f1_threshold": 373.3174743652344, + "eval_Qnli-dev_dot_precision": 0.5962732919254659, + "eval_Qnli-dev_dot_recall": 0.8135593220338984, + "eval_Qnli-dev_euclidean_accuracy": 0.732421875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.80968189239502, + "eval_Qnli-dev_euclidean_ap": 0.7834118400292112, + "eval_Qnli-dev_euclidean_f1": 0.7209775967413441, + "eval_Qnli-dev_euclidean_f1_threshold": 13.910100936889648, + "eval_Qnli-dev_euclidean_precision": 0.6941176470588235, + "eval_Qnli-dev_euclidean_recall": 0.75, + "eval_Qnli-dev_manhattan_accuracy": 0.732421875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 271.7544250488281, + "eval_Qnli-dev_manhattan_ap": 0.7864954913072755, + "eval_Qnli-dev_manhattan_f1": 0.7164750957854407, + "eval_Qnli-dev_manhattan_f1_threshold": 300.81494140625, + "eval_Qnli-dev_manhattan_precision": 0.6538461538461539, + "eval_Qnli-dev_manhattan_recall": 0.7923728813559322, + "eval_Qnli-dev_max_accuracy": 0.732421875, + "eval_Qnli-dev_max_accuracy_threshold": 403.3175964355469, + "eval_Qnli-dev_max_ap": 0.7864954913072755, + "eval_Qnli-dev_max_f1": 0.7209775967413441, + "eval_Qnli-dev_max_f1_threshold": 373.3174743652344, + "eval_Qnli-dev_max_precision": 0.6941176470588235, + "eval_Qnli-dev_max_recall": 0.8135593220338984, + "eval_allNLI-dev_cosine_accuracy": 0.7265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8706444501876831, + "eval_allNLI-dev_cosine_ap": 0.6094726027413053, + "eval_allNLI-dev_cosine_f1": 0.6515837104072397, + "eval_allNLI-dev_cosine_f1_threshold": 0.8078594207763672, + "eval_allNLI-dev_cosine_precision": 0.5353159851301115, + "eval_allNLI-dev_cosine_recall": 0.8323699421965318, + "eval_allNLI-dev_dot_accuracy": 0.697265625, + "eval_allNLI-dev_dot_accuracy_threshold": 420.6922302246094, + "eval_allNLI-dev_dot_ap": 0.543111680181283, + "eval_allNLI-dev_dot_f1": 0.6101694915254238, + "eval_allNLI-dev_dot_f1_threshold": 378.4337158203125, + "eval_allNLI-dev_dot_precision": 0.4816053511705686, + "eval_allNLI-dev_dot_recall": 0.8323699421965318, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.275704383850098, + "eval_allNLI-dev_euclidean_ap": 0.6133755460229069, + "eval_allNLI-dev_euclidean_f1": 0.6530612244897959, + "eval_allNLI-dev_euclidean_f1_threshold": 13.450389862060547, + "eval_allNLI-dev_euclidean_precision": 0.5373134328358209, + "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, + "eval_allNLI-dev_manhattan_accuracy": 0.724609375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 232.1541748046875, + "eval_allNLI-dev_manhattan_ap": 0.6111577829536877, + "eval_allNLI-dev_manhattan_f1": 0.6387665198237885, + "eval_allNLI-dev_manhattan_f1_threshold": 284.5680847167969, + "eval_allNLI-dev_manhattan_precision": 0.5160142348754448, + "eval_allNLI-dev_manhattan_recall": 0.838150289017341, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 420.6922302246094, + "eval_allNLI-dev_max_ap": 0.6133755460229069, + "eval_allNLI-dev_max_f1": 0.6530612244897959, + "eval_allNLI-dev_max_f1_threshold": 378.4337158203125, + "eval_allNLI-dev_max_precision": 0.5373134328358209, + "eval_allNLI-dev_max_recall": 0.838150289017341, + "eval_sequential_score": 0.7864954913072755, + "eval_sts-test_pearson_cosine": 0.8100150301098641, + "eval_sts-test_pearson_dot": 0.7901249254403291, + "eval_sts-test_pearson_euclidean": 0.8439240945658539, + "eval_sts-test_pearson_manhattan": 0.8398801696817724, + "eval_sts-test_pearson_max": 0.8439240945658539, + "eval_sts-test_spearman_cosine": 0.8440087415800419, + "eval_sts-test_spearman_dot": 0.7766431945264338, + "eval_sts-test_spearman_euclidean": 0.8403773592336669, + "eval_sts-test_spearman_manhattan": 0.8364035367246335, + "eval_sts-test_spearman_max": 0.8440087415800419, + "eval_vitaminc-pairs_loss": 2.794574737548828, + "eval_vitaminc-pairs_runtime": 3.2146, + "eval_vitaminc-pairs_samples_per_second": 39.819, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 1140 + }, + { + "epoch": 1.1728395061728394, + "eval_negation-triplets_loss": 0.9698534607887268, + "eval_negation-triplets_runtime": 0.7629, + "eval_negation-triplets_samples_per_second": 167.777, + "eval_negation-triplets_steps_per_second": 1.311, + "step": 1140 + }, + { + "epoch": 1.1728395061728394, + "eval_scitail-pairs-pos_loss": 0.1657916158437729, + "eval_scitail-pairs-pos_runtime": 0.8646, + "eval_scitail-pairs-pos_samples_per_second": 148.051, + "eval_scitail-pairs-pos_steps_per_second": 1.157, + "step": 1140 + }, + { + "epoch": 1.1728395061728394, + "eval_scitail-pairs-qa_loss": 0.0017395657487213612, + "eval_scitail-pairs-qa_runtime": 0.6005, + "eval_scitail-pairs-qa_samples_per_second": 213.167, + "eval_scitail-pairs-qa_steps_per_second": 1.665, + "step": 1140 + }, + { + "epoch": 1.1728395061728394, + "eval_xsum-pairs_loss": 0.43798020482063293, + "eval_xsum-pairs_runtime": 3.0212, + "eval_xsum-pairs_samples_per_second": 42.368, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 1140 + }, + { + "epoch": 1.1728395061728394, + "eval_sciq_pairs_loss": 0.10038822144269943, + "eval_sciq_pairs_runtime": 3.4671, + "eval_sciq_pairs_samples_per_second": 36.918, + "eval_sciq_pairs_steps_per_second": 0.288, + "step": 1140 + }, + { + "epoch": 1.1728395061728394, + "eval_qasc_pairs_loss": 0.3080596625804901, + "eval_qasc_pairs_runtime": 0.6106, + "eval_qasc_pairs_samples_per_second": 209.621, + "eval_qasc_pairs_steps_per_second": 1.638, + "step": 1140 + }, + { + "epoch": 1.1728395061728394, + "eval_openbookqa_pairs_loss": 1.0763821601867676, + "eval_openbookqa_pairs_runtime": 0.594, + "eval_openbookqa_pairs_samples_per_second": 215.489, + "eval_openbookqa_pairs_steps_per_second": 1.684, + "step": 1140 + }, + { + "epoch": 1.1728395061728394, + "eval_msmarco_pairs_loss": 1.0080692768096924, + "eval_msmarco_pairs_runtime": 1.5236, + "eval_msmarco_pairs_samples_per_second": 84.009, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 1140 + }, + { + "epoch": 1.1728395061728394, + "eval_nq_pairs_loss": 0.9931175708770752, + "eval_nq_pairs_runtime": 2.9136, + "eval_nq_pairs_samples_per_second": 43.932, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 1140 + }, + { + "epoch": 1.1728395061728394, + "eval_trivia_pairs_loss": 0.9768161177635193, + "eval_trivia_pairs_runtime": 3.4417, + "eval_trivia_pairs_samples_per_second": 37.191, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1140 + }, + { + "epoch": 1.1728395061728394, + "eval_gooaq_pairs_loss": 0.49635979533195496, + "eval_gooaq_pairs_runtime": 0.9526, + "eval_gooaq_pairs_samples_per_second": 134.373, + "eval_gooaq_pairs_steps_per_second": 1.05, + "step": 1140 + }, + { + "epoch": 1.1728395061728394, + "eval_paws-pos_loss": 0.024062106385827065, + "eval_paws-pos_runtime": 0.6914, + "eval_paws-pos_samples_per_second": 185.134, + "eval_paws-pos_steps_per_second": 1.446, + "step": 1140 + }, + { + "epoch": 1.1728395061728394, + "eval_global_dataset_loss": 0.4706956744194031, + "eval_global_dataset_runtime": 13.395, + "eval_global_dataset_samples_per_second": 31.056, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1140 + }, + { + "epoch": 1.1738683127572016, + "grad_norm": 9.898369789123535, + "learning_rate": 3.470540060696494e-05, + "loss": 0.5853, + "step": 1141 + }, + { + "epoch": 1.1748971193415638, + "grad_norm": 5.598046779632568, + "learning_rate": 3.470203858297216e-05, + "loss": 0.2389, + "step": 1142 + }, + { + "epoch": 1.175925925925926, + "grad_norm": 4.493359565734863, + "learning_rate": 3.4698657731760135e-05, + "loss": 0.2277, + "step": 1143 + }, + { + "epoch": 1.176954732510288, + "grad_norm": 10.61166000366211, + "learning_rate": 3.469525805892776e-05, + "loss": 0.7624, + "step": 1144 + }, + { + "epoch": 1.1779835390946503, + "grad_norm": 12.680320739746094, + "learning_rate": 3.469183957010508e-05, + "loss": 1.1422, + "step": 1145 + }, + { + "epoch": 1.1790123456790123, + "grad_norm": 7.758006572723389, + "learning_rate": 3.4688402270953314e-05, + "loss": 0.3251, + "step": 1146 + }, + { + "epoch": 1.1800411522633745, + "grad_norm": 2.305454730987549, + "learning_rate": 3.4684946167164815e-05, + "loss": 0.0637, + "step": 1147 + }, + { + "epoch": 1.1810699588477367, + "grad_norm": 9.96699047088623, + "learning_rate": 3.46814712644631e-05, + "loss": 0.5474, + "step": 1148 + }, + { + "epoch": 1.1820987654320987, + "grad_norm": 9.514446258544922, + "learning_rate": 3.467797756860279e-05, + "loss": 0.5393, + "step": 1149 + }, + { + "epoch": 1.183127572016461, + "grad_norm": 3.7625515460968018, + "learning_rate": 3.4674465085369644e-05, + "loss": 0.0976, + "step": 1150 + }, + { + "epoch": 1.1841563786008231, + "grad_norm": 12.09719181060791, + "learning_rate": 3.467093382058054e-05, + "loss": 0.736, + "step": 1151 + }, + { + "epoch": 1.1851851851851851, + "grad_norm": 19.571088790893555, + "learning_rate": 3.466738378008345e-05, + "loss": 3.0791, + "step": 1152 + }, + { + "epoch": 1.1862139917695473, + "grad_norm": 10.0186767578125, + "learning_rate": 3.466381496975744e-05, + "loss": 0.6167, + "step": 1153 + }, + { + "epoch": 1.1872427983539096, + "grad_norm": 8.159412384033203, + "learning_rate": 3.466022739551267e-05, + "loss": 0.4412, + "step": 1154 + }, + { + "epoch": 1.1882716049382716, + "grad_norm": 9.59557056427002, + "learning_rate": 3.465662106329035e-05, + "loss": 0.5349, + "step": 1155 + }, + { + "epoch": 1.1893004115226338, + "grad_norm": 8.530840873718262, + "learning_rate": 3.4652995979062786e-05, + "loss": 0.4062, + "step": 1156 + }, + { + "epoch": 1.1903292181069958, + "grad_norm": 11.586019515991211, + "learning_rate": 3.4649352148833314e-05, + "loss": 0.8493, + "step": 1157 + }, + { + "epoch": 1.191358024691358, + "grad_norm": 11.703004837036133, + "learning_rate": 3.4645689578636324e-05, + "loss": 0.9965, + "step": 1158 + }, + { + "epoch": 1.1923868312757202, + "grad_norm": 6.959652423858643, + "learning_rate": 3.464200827453724e-05, + "loss": 0.3258, + "step": 1159 + }, + { + "epoch": 1.1934156378600824, + "grad_norm": 13.127931594848633, + "learning_rate": 3.463830824263251e-05, + "loss": 1.2638, + "step": 1160 + }, + { + "epoch": 1.1934156378600824, + "eval_Qnli-dev_cosine_accuracy": 0.712890625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7884823679924011, + "eval_Qnli-dev_cosine_ap": 0.7781013870186712, + "eval_Qnli-dev_cosine_f1": 0.7172675521821632, + "eval_Qnli-dev_cosine_f1_threshold": 0.7717980146408081, + "eval_Qnli-dev_cosine_precision": 0.6494845360824743, + "eval_Qnli-dev_cosine_recall": 0.8008474576271186, + "eval_Qnli-dev_dot_accuracy": 0.669921875, + "eval_Qnli-dev_dot_accuracy_threshold": 396.5754089355469, + "eval_Qnli-dev_dot_ap": 0.6988280377964569, + "eval_Qnli-dev_dot_f1": 0.6963979416809606, + "eval_Qnli-dev_dot_f1_threshold": 351.659912109375, + "eval_Qnli-dev_dot_precision": 0.5850144092219021, + "eval_Qnli-dev_dot_recall": 0.8601694915254238, + "eval_Qnli-dev_euclidean_accuracy": 0.71875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.646437644958496, + "eval_Qnli-dev_euclidean_ap": 0.7848777973576782, + "eval_Qnli-dev_euclidean_f1": 0.7099236641221374, + "eval_Qnli-dev_euclidean_f1_threshold": 14.714933395385742, + "eval_Qnli-dev_euclidean_precision": 0.6458333333333334, + "eval_Qnli-dev_euclidean_recall": 0.788135593220339, + "eval_Qnli-dev_manhattan_accuracy": 0.728515625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 277.93170166015625, + "eval_Qnli-dev_manhattan_ap": 0.7871009974620432, + "eval_Qnli-dev_manhattan_f1": 0.710172744721689, + "eval_Qnli-dev_manhattan_f1_threshold": 305.610595703125, + "eval_Qnli-dev_manhattan_precision": 0.6491228070175439, + "eval_Qnli-dev_manhattan_recall": 0.7838983050847458, + "eval_Qnli-dev_max_accuracy": 0.728515625, + "eval_Qnli-dev_max_accuracy_threshold": 396.5754089355469, + "eval_Qnli-dev_max_ap": 0.7871009974620432, + "eval_Qnli-dev_max_f1": 0.7172675521821632, + "eval_Qnli-dev_max_f1_threshold": 351.659912109375, + "eval_Qnli-dev_max_precision": 0.6494845360824743, + "eval_Qnli-dev_max_recall": 0.8601694915254238, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8916759490966797, + "eval_allNLI-dev_cosine_ap": 0.6141079268345387, + "eval_allNLI-dev_cosine_f1": 0.6361556064073227, + "eval_allNLI-dev_cosine_f1_threshold": 0.8081791400909424, + "eval_allNLI-dev_cosine_precision": 0.5265151515151515, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 442.17108154296875, + "eval_allNLI-dev_dot_ap": 0.5355526369236585, + "eval_allNLI-dev_dot_f1": 0.5909980430528375, + "eval_allNLI-dev_dot_f1_threshold": 359.3589782714844, + "eval_allNLI-dev_dot_precision": 0.4467455621301775, + "eval_allNLI-dev_dot_recall": 0.8728323699421965, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.124547958374023, + "eval_allNLI-dev_euclidean_ap": 0.6183305754610903, + "eval_allNLI-dev_euclidean_f1": 0.6453089244851259, + "eval_allNLI-dev_euclidean_f1_threshold": 13.58721923828125, + "eval_allNLI-dev_euclidean_precision": 0.5340909090909091, + "eval_allNLI-dev_euclidean_recall": 0.815028901734104, + "eval_allNLI-dev_manhattan_accuracy": 0.728515625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 228.35107421875, + "eval_allNLI-dev_manhattan_ap": 0.6137978661021017, + "eval_allNLI-dev_manhattan_f1": 0.6401766004415012, + "eval_allNLI-dev_manhattan_f1_threshold": 287.4211730957031, + "eval_allNLI-dev_manhattan_precision": 0.5178571428571429, + "eval_allNLI-dev_manhattan_recall": 0.838150289017341, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 442.17108154296875, + "eval_allNLI-dev_max_ap": 0.6183305754610903, + "eval_allNLI-dev_max_f1": 0.6453089244851259, + "eval_allNLI-dev_max_f1_threshold": 359.3589782714844, + "eval_allNLI-dev_max_precision": 0.5340909090909091, + "eval_allNLI-dev_max_recall": 0.8728323699421965, + "eval_sequential_score": 0.7871009974620432, + "eval_sts-test_pearson_cosine": 0.820146580927034, + "eval_sts-test_pearson_dot": 0.7810240390511689, + "eval_sts-test_pearson_euclidean": 0.8541656559615043, + "eval_sts-test_pearson_manhattan": 0.8504960752425201, + "eval_sts-test_pearson_max": 0.8541656559615043, + "eval_sts-test_spearman_cosine": 0.8510599328404078, + "eval_sts-test_spearman_dot": 0.7624339977657435, + "eval_sts-test_spearman_euclidean": 0.8496104394839404, + "eval_sts-test_spearman_manhattan": 0.846809592287237, + "eval_sts-test_spearman_max": 0.8510599328404078, + "eval_vitaminc-pairs_loss": 2.661559581756592, + "eval_vitaminc-pairs_runtime": 3.1856, + "eval_vitaminc-pairs_samples_per_second": 40.18, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 1160 + }, + { + "epoch": 1.1934156378600824, + "eval_negation-triplets_loss": 0.9959814548492432, + "eval_negation-triplets_runtime": 0.747, + "eval_negation-triplets_samples_per_second": 171.355, + "eval_negation-triplets_steps_per_second": 1.339, + "step": 1160 + }, + { + "epoch": 1.1934156378600824, + "eval_scitail-pairs-pos_loss": 0.15917012095451355, + "eval_scitail-pairs-pos_runtime": 0.8767, + "eval_scitail-pairs-pos_samples_per_second": 145.994, + "eval_scitail-pairs-pos_steps_per_second": 1.141, + "step": 1160 + }, + { + "epoch": 1.1934156378600824, + "eval_scitail-pairs-qa_loss": 0.0012910376535728574, + "eval_scitail-pairs-qa_runtime": 0.5892, + "eval_scitail-pairs-qa_samples_per_second": 217.258, + "eval_scitail-pairs-qa_steps_per_second": 1.697, + "step": 1160 + }, + { + "epoch": 1.1934156378600824, + "eval_xsum-pairs_loss": 0.4237976372241974, + "eval_xsum-pairs_runtime": 3.0191, + "eval_xsum-pairs_samples_per_second": 42.397, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 1160 + }, + { + "epoch": 1.1934156378600824, + "eval_sciq_pairs_loss": 0.10055720806121826, + "eval_sciq_pairs_runtime": 3.4827, + "eval_sciq_pairs_samples_per_second": 36.753, + "eval_sciq_pairs_steps_per_second": 0.287, + "step": 1160 + }, + { + "epoch": 1.1934156378600824, + "eval_qasc_pairs_loss": 0.2926960587501526, + "eval_qasc_pairs_runtime": 0.6094, + "eval_qasc_pairs_samples_per_second": 210.052, + "eval_qasc_pairs_steps_per_second": 1.641, + "step": 1160 + }, + { + "epoch": 1.1934156378600824, + "eval_openbookqa_pairs_loss": 0.9382266998291016, + "eval_openbookqa_pairs_runtime": 0.592, + "eval_openbookqa_pairs_samples_per_second": 216.222, + "eval_openbookqa_pairs_steps_per_second": 1.689, + "step": 1160 + }, + { + "epoch": 1.1934156378600824, + "eval_msmarco_pairs_loss": 0.9833506941795349, + "eval_msmarco_pairs_runtime": 1.53, + "eval_msmarco_pairs_samples_per_second": 83.658, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 1160 + }, + { + "epoch": 1.1934156378600824, + "eval_nq_pairs_loss": 1.0057848691940308, + "eval_nq_pairs_runtime": 2.8963, + "eval_nq_pairs_samples_per_second": 44.195, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1160 + }, + { + "epoch": 1.1934156378600824, + "eval_trivia_pairs_loss": 1.0242230892181396, + "eval_trivia_pairs_runtime": 3.4471, + "eval_trivia_pairs_samples_per_second": 37.133, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1160 + }, + { + "epoch": 1.1934156378600824, + "eval_gooaq_pairs_loss": 0.5412207841873169, + "eval_gooaq_pairs_runtime": 0.9554, + "eval_gooaq_pairs_samples_per_second": 133.977, + "eval_gooaq_pairs_steps_per_second": 1.047, + "step": 1160 + }, + { + "epoch": 1.1934156378600824, + "eval_paws-pos_loss": 0.023840811103582382, + "eval_paws-pos_runtime": 0.7016, + "eval_paws-pos_samples_per_second": 182.442, + "eval_paws-pos_steps_per_second": 1.425, + "step": 1160 + }, + { + "epoch": 1.1934156378600824, + "eval_global_dataset_loss": 0.4532192051410675, + "eval_global_dataset_runtime": 13.3951, + "eval_global_dataset_samples_per_second": 31.056, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1160 + }, + { + "epoch": 1.1944444444444444, + "grad_norm": 7.833865165710449, + "learning_rate": 3.46345894890496e-05, + "loss": 0.3684, + "step": 1161 + }, + { + "epoch": 1.1954732510288066, + "grad_norm": 9.430367469787598, + "learning_rate": 3.463085201994697e-05, + "loss": 0.7415, + "step": 1162 + }, + { + "epoch": 1.1965020576131686, + "grad_norm": 9.99718952178955, + "learning_rate": 3.4627095841514086e-05, + "loss": 0.8754, + "step": 1163 + }, + { + "epoch": 1.1975308641975309, + "grad_norm": 8.691899299621582, + "learning_rate": 3.4623320959971386e-05, + "loss": 0.8829, + "step": 1164 + }, + { + "epoch": 1.198559670781893, + "grad_norm": 10.81286907196045, + "learning_rate": 3.46195273815703e-05, + "loss": 1.0219, + "step": 1165 + }, + { + "epoch": 1.199588477366255, + "grad_norm": 8.638839721679688, + "learning_rate": 3.461571511259319e-05, + "loss": 0.5988, + "step": 1166 + }, + { + "epoch": 1.2006172839506173, + "grad_norm": 10.151771545410156, + "learning_rate": 3.46118841593534e-05, + "loss": 0.736, + "step": 1167 + }, + { + "epoch": 1.2016460905349795, + "grad_norm": 11.333561897277832, + "learning_rate": 3.460803452819521e-05, + "loss": 0.8933, + "step": 1168 + }, + { + "epoch": 1.2026748971193415, + "grad_norm": 6.7991557121276855, + "learning_rate": 3.4604166225493815e-05, + "loss": 0.297, + "step": 1169 + }, + { + "epoch": 1.2037037037037037, + "grad_norm": 9.933700561523438, + "learning_rate": 3.460027925765535e-05, + "loss": 0.4777, + "step": 1170 + }, + { + "epoch": 1.204732510288066, + "grad_norm": 10.830368041992188, + "learning_rate": 3.4596373631116855e-05, + "loss": 0.7994, + "step": 1171 + }, + { + "epoch": 1.205761316872428, + "grad_norm": 7.643840789794922, + "learning_rate": 3.459244935234627e-05, + "loss": 0.7322, + "step": 1172 + }, + { + "epoch": 1.2067901234567902, + "grad_norm": 10.263672828674316, + "learning_rate": 3.458850642784241e-05, + "loss": 0.6891, + "step": 1173 + }, + { + "epoch": 1.2078189300411522, + "grad_norm": 8.42940902709961, + "learning_rate": 3.4584544864135e-05, + "loss": 0.4517, + "step": 1174 + }, + { + "epoch": 1.2088477366255144, + "grad_norm": 8.477632522583008, + "learning_rate": 3.4580564667784594e-05, + "loss": 0.3482, + "step": 1175 + }, + { + "epoch": 1.2098765432098766, + "grad_norm": 7.089756011962891, + "learning_rate": 3.4576565845382644e-05, + "loss": 0.3659, + "step": 1176 + }, + { + "epoch": 1.2109053497942388, + "grad_norm": 8.470621109008789, + "learning_rate": 3.4572548403551405e-05, + "loss": 0.4379, + "step": 1177 + }, + { + "epoch": 1.2119341563786008, + "grad_norm": 15.569647789001465, + "learning_rate": 3.4568512348944e-05, + "loss": 1.4326, + "step": 1178 + }, + { + "epoch": 1.212962962962963, + "grad_norm": 18.194835662841797, + "learning_rate": 3.456445768824436e-05, + "loss": 1.7587, + "step": 1179 + }, + { + "epoch": 1.213991769547325, + "grad_norm": 14.349352836608887, + "learning_rate": 3.456038442816724e-05, + "loss": 2.3312, + "step": 1180 + }, + { + "epoch": 1.213991769547325, + "eval_Qnli-dev_cosine_accuracy": 0.693359375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8052411079406738, + "eval_Qnli-dev_cosine_ap": 0.7487309503825428, + "eval_Qnli-dev_cosine_f1": 0.687615526802218, + "eval_Qnli-dev_cosine_f1_threshold": 0.7747142314910889, + "eval_Qnli-dev_cosine_precision": 0.6098360655737705, + "eval_Qnli-dev_cosine_recall": 0.788135593220339, + "eval_Qnli-dev_dot_accuracy": 0.65234375, + "eval_Qnli-dev_dot_accuracy_threshold": 426.8647766113281, + "eval_Qnli-dev_dot_ap": 0.6712063042870943, + "eval_Qnli-dev_dot_f1": 0.6743421052631579, + "eval_Qnli-dev_dot_f1_threshold": 352.2708740234375, + "eval_Qnli-dev_dot_precision": 0.5510752688172043, + "eval_Qnli-dev_dot_recall": 0.8686440677966102, + "eval_Qnli-dev_euclidean_accuracy": 0.708984375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.538395881652832, + "eval_Qnli-dev_euclidean_ap": 0.7583458440082088, + "eval_Qnli-dev_euclidean_f1": 0.6911764705882353, + "eval_Qnli-dev_euclidean_f1_threshold": 14.996305465698242, + "eval_Qnli-dev_euclidean_precision": 0.6103896103896104, + "eval_Qnli-dev_euclidean_recall": 0.7966101694915254, + "eval_Qnli-dev_manhattan_accuracy": 0.705078125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 286.489501953125, + "eval_Qnli-dev_manhattan_ap": 0.7583351034380817, + "eval_Qnli-dev_manhattan_f1": 0.6907630522088354, + "eval_Qnli-dev_manhattan_f1_threshold": 295.4446716308594, + "eval_Qnli-dev_manhattan_precision": 0.6564885496183206, + "eval_Qnli-dev_manhattan_recall": 0.7288135593220338, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 426.8647766113281, + "eval_Qnli-dev_max_ap": 0.7583458440082088, + "eval_Qnli-dev_max_f1": 0.6911764705882353, + "eval_Qnli-dev_max_f1_threshold": 352.2708740234375, + "eval_Qnli-dev_max_precision": 0.6564885496183206, + "eval_Qnli-dev_max_recall": 0.8686440677966102, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8811556696891785, + "eval_allNLI-dev_cosine_ap": 0.6206064432918149, + "eval_allNLI-dev_cosine_f1": 0.6299559471365639, + "eval_allNLI-dev_cosine_f1_threshold": 0.786278486251831, + "eval_allNLI-dev_cosine_precision": 0.5088967971530249, + "eval_allNLI-dev_cosine_recall": 0.8265895953757225, + "eval_allNLI-dev_dot_accuracy": 0.6953125, + "eval_allNLI-dev_dot_accuracy_threshold": 458.428955078125, + "eval_allNLI-dev_dot_ap": 0.5528922522101817, + "eval_allNLI-dev_dot_f1": 0.6000000000000001, + "eval_allNLI-dev_dot_f1_threshold": 379.02679443359375, + "eval_allNLI-dev_dot_precision": 0.5019455252918288, + "eval_allNLI-dev_dot_recall": 0.7456647398843931, + "eval_allNLI-dev_euclidean_accuracy": 0.740234375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.912817001342773, + "eval_allNLI-dev_euclidean_ap": 0.6263417077919412, + "eval_allNLI-dev_euclidean_f1": 0.6437768240343347, + "eval_allNLI-dev_euclidean_f1_threshold": 14.441347122192383, + "eval_allNLI-dev_euclidean_precision": 0.5119453924914675, + "eval_allNLI-dev_euclidean_recall": 0.8670520231213873, + "eval_allNLI-dev_manhattan_accuracy": 0.734375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 229.67034912109375, + "eval_allNLI-dev_manhattan_ap": 0.6233519858399221, + "eval_allNLI-dev_manhattan_f1": 0.6391304347826088, + "eval_allNLI-dev_manhattan_f1_threshold": 297.35443115234375, + "eval_allNLI-dev_manhattan_precision": 0.5121951219512195, + "eval_allNLI-dev_manhattan_recall": 0.8497109826589595, + "eval_allNLI-dev_max_accuracy": 0.740234375, + "eval_allNLI-dev_max_accuracy_threshold": 458.428955078125, + "eval_allNLI-dev_max_ap": 0.6263417077919412, + "eval_allNLI-dev_max_f1": 0.6437768240343347, + "eval_allNLI-dev_max_f1_threshold": 379.02679443359375, + "eval_allNLI-dev_max_precision": 0.5121951219512195, + "eval_allNLI-dev_max_recall": 0.8670520231213873, + "eval_sequential_score": 0.7583458440082088, + "eval_sts-test_pearson_cosine": 0.8269349269759517, + "eval_sts-test_pearson_dot": 0.8126874113505397, + "eval_sts-test_pearson_euclidean": 0.8597033444697677, + "eval_sts-test_pearson_manhattan": 0.8579813103799231, + "eval_sts-test_pearson_max": 0.8597033444697677, + "eval_sts-test_spearman_cosine": 0.8598110867567574, + "eval_sts-test_spearman_dot": 0.7956357193634666, + "eval_sts-test_spearman_euclidean": 0.8565224060596263, + "eval_sts-test_spearman_manhattan": 0.8547605936499083, + "eval_sts-test_spearman_max": 0.8598110867567574, + "eval_vitaminc-pairs_loss": 2.8959200382232666, + "eval_vitaminc-pairs_runtime": 3.2217, + "eval_vitaminc-pairs_samples_per_second": 39.73, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 1180 + }, + { + "epoch": 1.213991769547325, + "eval_negation-triplets_loss": 0.9950482249259949, + "eval_negation-triplets_runtime": 0.7594, + "eval_negation-triplets_samples_per_second": 168.55, + "eval_negation-triplets_steps_per_second": 1.317, + "step": 1180 + }, + { + "epoch": 1.213991769547325, + "eval_scitail-pairs-pos_loss": 0.1581113338470459, + "eval_scitail-pairs-pos_runtime": 0.8461, + "eval_scitail-pairs-pos_samples_per_second": 151.284, + "eval_scitail-pairs-pos_steps_per_second": 1.182, + "step": 1180 + }, + { + "epoch": 1.213991769547325, + "eval_scitail-pairs-qa_loss": 0.0018547942163422704, + "eval_scitail-pairs-qa_runtime": 0.5864, + "eval_scitail-pairs-qa_samples_per_second": 218.28, + "eval_scitail-pairs-qa_steps_per_second": 1.705, + "step": 1180 + }, + { + "epoch": 1.213991769547325, + "eval_xsum-pairs_loss": 0.37494468688964844, + "eval_xsum-pairs_runtime": 3.0309, + "eval_xsum-pairs_samples_per_second": 42.232, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1180 + }, + { + "epoch": 1.213991769547325, + "eval_sciq_pairs_loss": 0.10013505816459656, + "eval_sciq_pairs_runtime": 3.4619, + "eval_sciq_pairs_samples_per_second": 36.974, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 1180 + }, + { + "epoch": 1.213991769547325, + "eval_qasc_pairs_loss": 0.2670193314552307, + "eval_qasc_pairs_runtime": 0.6174, + "eval_qasc_pairs_samples_per_second": 207.326, + "eval_qasc_pairs_steps_per_second": 1.62, + "step": 1180 + }, + { + "epoch": 1.213991769547325, + "eval_openbookqa_pairs_loss": 0.9293997287750244, + "eval_openbookqa_pairs_runtime": 0.6032, + "eval_openbookqa_pairs_samples_per_second": 212.191, + "eval_openbookqa_pairs_steps_per_second": 1.658, + "step": 1180 + }, + { + "epoch": 1.213991769547325, + "eval_msmarco_pairs_loss": 1.0548136234283447, + "eval_msmarco_pairs_runtime": 1.5248, + "eval_msmarco_pairs_samples_per_second": 83.943, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 1180 + }, + { + "epoch": 1.213991769547325, + "eval_nq_pairs_loss": 1.0275964736938477, + "eval_nq_pairs_runtime": 2.9112, + "eval_nq_pairs_samples_per_second": 43.968, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1180 + }, + { + "epoch": 1.213991769547325, + "eval_trivia_pairs_loss": 1.0199402570724487, + "eval_trivia_pairs_runtime": 3.4612, + "eval_trivia_pairs_samples_per_second": 36.981, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 1180 + }, + { + "epoch": 1.213991769547325, + "eval_gooaq_pairs_loss": 0.5339373350143433, + "eval_gooaq_pairs_runtime": 0.9598, + "eval_gooaq_pairs_samples_per_second": 133.364, + "eval_gooaq_pairs_steps_per_second": 1.042, + "step": 1180 + }, + { + "epoch": 1.213991769547325, + "eval_paws-pos_loss": 0.023452265188097954, + "eval_paws-pos_runtime": 0.706, + "eval_paws-pos_samples_per_second": 181.293, + "eval_paws-pos_steps_per_second": 1.416, + "step": 1180 + }, + { + "epoch": 1.213991769547325, + "eval_global_dataset_loss": 0.49108394980430603, + "eval_global_dataset_runtime": 13.391, + "eval_global_dataset_samples_per_second": 31.066, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1180 + }, + { + "epoch": 1.2150205761316872, + "grad_norm": 9.289645195007324, + "learning_rate": 3.455629257545818e-05, + "loss": 0.5342, + "step": 1181 + }, + { + "epoch": 1.2160493827160495, + "grad_norm": 10.020589828491211, + "learning_rate": 3.4552182136893516e-05, + "loss": 0.8755, + "step": 1182 + }, + { + "epoch": 1.2170781893004115, + "grad_norm": 6.009487152099609, + "learning_rate": 3.4548053119280386e-05, + "loss": 0.2909, + "step": 1183 + }, + { + "epoch": 1.2181069958847737, + "grad_norm": 8.08018970489502, + "learning_rate": 3.454390552945665e-05, + "loss": 0.4853, + "step": 1184 + }, + { + "epoch": 1.2191358024691359, + "grad_norm": 7.848354339599609, + "learning_rate": 3.453973937429098e-05, + "loss": 0.3582, + "step": 1185 + }, + { + "epoch": 1.2201646090534979, + "grad_norm": 8.562787055969238, + "learning_rate": 3.453555466068275e-05, + "loss": 0.7855, + "step": 1186 + }, + { + "epoch": 1.22119341563786, + "grad_norm": 7.858839511871338, + "learning_rate": 3.4531351395562074e-05, + "loss": 0.4932, + "step": 1187 + }, + { + "epoch": 1.2222222222222223, + "grad_norm": 9.985002517700195, + "learning_rate": 3.4527129585889806e-05, + "loss": 0.7346, + "step": 1188 + }, + { + "epoch": 1.2232510288065843, + "grad_norm": 8.69581127166748, + "learning_rate": 3.452288923865751e-05, + "loss": 0.853, + "step": 1189 + }, + { + "epoch": 1.2242798353909465, + "grad_norm": 6.780130863189697, + "learning_rate": 3.451863036088743e-05, + "loss": 0.2163, + "step": 1190 + }, + { + "epoch": 1.2253086419753085, + "grad_norm": 7.74264669418335, + "learning_rate": 3.451435295963251e-05, + "loss": 0.837, + "step": 1191 + }, + { + "epoch": 1.2263374485596708, + "grad_norm": 7.056318283081055, + "learning_rate": 3.451005704197637e-05, + "loss": 0.3778, + "step": 1192 + }, + { + "epoch": 1.227366255144033, + "grad_norm": 13.02022647857666, + "learning_rate": 3.450574261503329e-05, + "loss": 1.1152, + "step": 1193 + }, + { + "epoch": 1.228395061728395, + "grad_norm": 1.0287100076675415, + "learning_rate": 3.450140968594821e-05, + "loss": 0.0215, + "step": 1194 + }, + { + "epoch": 1.2294238683127572, + "grad_norm": 1.984660029411316, + "learning_rate": 3.44970582618967e-05, + "loss": 0.0653, + "step": 1195 + }, + { + "epoch": 1.2304526748971194, + "grad_norm": 2.628512144088745, + "learning_rate": 3.4492688350084975e-05, + "loss": 0.0659, + "step": 1196 + }, + { + "epoch": 1.2314814814814814, + "grad_norm": 12.873212814331055, + "learning_rate": 3.448829995774985e-05, + "loss": 0.9698, + "step": 1197 + }, + { + "epoch": 1.2325102880658436, + "grad_norm": 0.1176617294549942, + "learning_rate": 3.448389309215875e-05, + "loss": 0.0016, + "step": 1198 + }, + { + "epoch": 1.2335390946502058, + "grad_norm": 7.005835056304932, + "learning_rate": 3.4479467760609685e-05, + "loss": 0.327, + "step": 1199 + }, + { + "epoch": 1.2345679012345678, + "grad_norm": 9.871505737304688, + "learning_rate": 3.447502397043127e-05, + "loss": 0.5392, + "step": 1200 + }, + { + "epoch": 1.2345679012345678, + "eval_Qnli-dev_cosine_accuracy": 0.73046875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7553156614303589, + "eval_Qnli-dev_cosine_ap": 0.7644410125365041, + "eval_Qnli-dev_cosine_f1": 0.7136929460580914, + "eval_Qnli-dev_cosine_f1_threshold": 0.7551485300064087, + "eval_Qnli-dev_cosine_precision": 0.6991869918699187, + "eval_Qnli-dev_cosine_recall": 0.7288135593220338, + "eval_Qnli-dev_dot_accuracy": 0.68359375, + "eval_Qnli-dev_dot_accuracy_threshold": 336.83642578125, + "eval_Qnli-dev_dot_ap": 0.7022244731417389, + "eval_Qnli-dev_dot_f1": 0.6980802792321116, + "eval_Qnli-dev_dot_f1_threshold": 313.0473937988281, + "eval_Qnli-dev_dot_precision": 0.5934718100890207, + "eval_Qnli-dev_dot_recall": 0.847457627118644, + "eval_Qnli-dev_euclidean_accuracy": 0.724609375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.911373138427734, + "eval_Qnli-dev_euclidean_ap": 0.7727905960549314, + "eval_Qnli-dev_euclidean_f1": 0.7065026362038664, + "eval_Qnli-dev_euclidean_f1_threshold": 16.72470474243164, + "eval_Qnli-dev_euclidean_precision": 0.6036036036036037, + "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, + "eval_Qnli-dev_manhattan_accuracy": 0.724609375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 322.9418640136719, + "eval_Qnli-dev_manhattan_ap": 0.7764063644438948, + "eval_Qnli-dev_manhattan_f1": 0.7210626185958254, + "eval_Qnli-dev_manhattan_f1_threshold": 331.51031494140625, + "eval_Qnli-dev_manhattan_precision": 0.6529209621993127, + "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, + "eval_Qnli-dev_max_accuracy": 0.73046875, + "eval_Qnli-dev_max_accuracy_threshold": 336.83642578125, + "eval_Qnli-dev_max_ap": 0.7764063644438948, + "eval_Qnli-dev_max_f1": 0.7210626185958254, + "eval_Qnli-dev_max_f1_threshold": 331.51031494140625, + "eval_Qnli-dev_max_precision": 0.6991869918699187, + "eval_Qnli-dev_max_recall": 0.8516949152542372, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8570448160171509, + "eval_allNLI-dev_cosine_ap": 0.6091000724328542, + "eval_allNLI-dev_cosine_f1": 0.6394557823129251, + "eval_allNLI-dev_cosine_f1_threshold": 0.7650203704833984, + "eval_allNLI-dev_cosine_precision": 0.5261194029850746, + "eval_allNLI-dev_cosine_recall": 0.815028901734104, + "eval_allNLI-dev_dot_accuracy": 0.701171875, + "eval_allNLI-dev_dot_accuracy_threshold": 382.50775146484375, + "eval_allNLI-dev_dot_ap": 0.5395734814507898, + "eval_allNLI-dev_dot_f1": 0.5950782997762865, + "eval_allNLI-dev_dot_f1_threshold": 326.72900390625, + "eval_allNLI-dev_dot_precision": 0.4854014598540146, + "eval_allNLI-dev_dot_recall": 0.7687861271676301, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.244440078735352, + "eval_allNLI-dev_euclidean_ap": 0.6148857577952056, + "eval_allNLI-dev_euclidean_f1": 0.6301369863013698, + "eval_allNLI-dev_euclidean_f1_threshold": 14.23716926574707, + "eval_allNLI-dev_euclidean_precision": 0.5207547169811321, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.728515625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 243.26353454589844, + "eval_allNLI-dev_manhattan_ap": 0.6103850120371437, + "eval_allNLI-dev_manhattan_f1": 0.6351931330472103, + "eval_allNLI-dev_manhattan_f1_threshold": 307.9858093261719, + "eval_allNLI-dev_manhattan_precision": 0.5051194539249146, + "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 382.50775146484375, + "eval_allNLI-dev_max_ap": 0.6148857577952056, + "eval_allNLI-dev_max_f1": 0.6394557823129251, + "eval_allNLI-dev_max_f1_threshold": 326.72900390625, + "eval_allNLI-dev_max_precision": 0.5261194029850746, + "eval_allNLI-dev_max_recall": 0.8554913294797688, + "eval_sequential_score": 0.7764063644438948, + "eval_sts-test_pearson_cosine": 0.8235604121641702, + "eval_sts-test_pearson_dot": 0.80828423316969, + "eval_sts-test_pearson_euclidean": 0.8522052534133291, + "eval_sts-test_pearson_manhattan": 0.849902558974327, + "eval_sts-test_pearson_max": 0.8522052534133291, + "eval_sts-test_spearman_cosine": 0.8553053507548257, + "eval_sts-test_spearman_dot": 0.7913984909298833, + "eval_sts-test_spearman_euclidean": 0.8489471362875971, + "eval_sts-test_spearman_manhattan": 0.8475455267808185, + "eval_sts-test_spearman_max": 0.8553053507548257, + "eval_vitaminc-pairs_loss": 2.840937852859497, + "eval_vitaminc-pairs_runtime": 3.2226, + "eval_vitaminc-pairs_samples_per_second": 39.719, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 1200 + }, + { + "epoch": 1.2345679012345678, + "eval_negation-triplets_loss": 1.012229323387146, + "eval_negation-triplets_runtime": 0.7568, + "eval_negation-triplets_samples_per_second": 169.124, + "eval_negation-triplets_steps_per_second": 1.321, + "step": 1200 + }, + { + "epoch": 1.2345679012345678, + "eval_scitail-pairs-pos_loss": 0.17076808214187622, + "eval_scitail-pairs-pos_runtime": 0.8668, + "eval_scitail-pairs-pos_samples_per_second": 147.676, + "eval_scitail-pairs-pos_steps_per_second": 1.154, + "step": 1200 + }, + { + "epoch": 1.2345679012345678, + "eval_scitail-pairs-qa_loss": 0.00030888148467056453, + "eval_scitail-pairs-qa_runtime": 0.5951, + "eval_scitail-pairs-qa_samples_per_second": 215.078, + "eval_scitail-pairs-qa_steps_per_second": 1.68, + "step": 1200 + }, + { + "epoch": 1.2345679012345678, + "eval_xsum-pairs_loss": 0.3719870150089264, + "eval_xsum-pairs_runtime": 3.0306, + "eval_xsum-pairs_samples_per_second": 42.236, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1200 + }, + { + "epoch": 1.2345679012345678, + "eval_sciq_pairs_loss": 0.11966367810964584, + "eval_sciq_pairs_runtime": 3.492, + "eval_sciq_pairs_samples_per_second": 36.655, + "eval_sciq_pairs_steps_per_second": 0.286, + "step": 1200 + }, + { + "epoch": 1.2345679012345678, + "eval_qasc_pairs_loss": 0.23714803159236908, + "eval_qasc_pairs_runtime": 0.618, + "eval_qasc_pairs_samples_per_second": 207.136, + "eval_qasc_pairs_steps_per_second": 1.618, + "step": 1200 + }, + { + "epoch": 1.2345679012345678, + "eval_openbookqa_pairs_loss": 0.9108031392097473, + "eval_openbookqa_pairs_runtime": 0.5834, + "eval_openbookqa_pairs_samples_per_second": 219.386, + "eval_openbookqa_pairs_steps_per_second": 1.714, + "step": 1200 + }, + { + "epoch": 1.2345679012345678, + "eval_msmarco_pairs_loss": 1.1426514387130737, + "eval_msmarco_pairs_runtime": 1.5148, + "eval_msmarco_pairs_samples_per_second": 84.501, + "eval_msmarco_pairs_steps_per_second": 0.66, + "step": 1200 + }, + { + "epoch": 1.2345679012345678, + "eval_nq_pairs_loss": 1.003651738166809, + "eval_nq_pairs_runtime": 2.8978, + "eval_nq_pairs_samples_per_second": 44.172, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1200 + }, + { + "epoch": 1.2345679012345678, + "eval_trivia_pairs_loss": 1.1226824522018433, + "eval_trivia_pairs_runtime": 3.4587, + "eval_trivia_pairs_samples_per_second": 37.008, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 1200 + }, + { + "epoch": 1.2345679012345678, + "eval_gooaq_pairs_loss": 0.5596855878829956, + "eval_gooaq_pairs_runtime": 0.9703, + "eval_gooaq_pairs_samples_per_second": 131.913, + "eval_gooaq_pairs_steps_per_second": 1.031, + "step": 1200 + }, + { + "epoch": 1.2345679012345678, + "eval_paws-pos_loss": 0.0236971452832222, + "eval_paws-pos_runtime": 0.6866, + "eval_paws-pos_samples_per_second": 186.416, + "eval_paws-pos_steps_per_second": 1.456, + "step": 1200 + }, + { + "epoch": 1.2345679012345678, + "eval_global_dataset_loss": 0.4764837920665741, + "eval_global_dataset_runtime": 13.3664, + "eval_global_dataset_samples_per_second": 31.123, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1200 + }, + { + "epoch": 1.23559670781893, + "grad_norm": 10.80827522277832, + "learning_rate": 3.4470561728982665e-05, + "loss": 0.8535, + "step": 1201 + }, + { + "epoch": 1.2366255144032923, + "grad_norm": 0.27277591824531555, + "learning_rate": 3.44660810436536e-05, + "loss": 0.0045, + "step": 1202 + }, + { + "epoch": 1.2376543209876543, + "grad_norm": 8.913864135742188, + "learning_rate": 3.4461581921864334e-05, + "loss": 0.7916, + "step": 1203 + }, + { + "epoch": 1.2386831275720165, + "grad_norm": 13.311179161071777, + "learning_rate": 3.4457064371065673e-05, + "loss": 0.9205, + "step": 1204 + }, + { + "epoch": 1.2397119341563787, + "grad_norm": 6.5711188316345215, + "learning_rate": 3.445252839873894e-05, + "loss": 0.242, + "step": 1205 + }, + { + "epoch": 1.2407407407407407, + "grad_norm": 13.579578399658203, + "learning_rate": 3.4447974012395956e-05, + "loss": 1.0257, + "step": 1206 + }, + { + "epoch": 1.241769547325103, + "grad_norm": 10.705375671386719, + "learning_rate": 3.444340121957905e-05, + "loss": 0.7247, + "step": 1207 + }, + { + "epoch": 1.242798353909465, + "grad_norm": 8.617849349975586, + "learning_rate": 3.4438810027861015e-05, + "loss": 0.3461, + "step": 1208 + }, + { + "epoch": 1.2438271604938271, + "grad_norm": 8.483906745910645, + "learning_rate": 3.4434200444845126e-05, + "loss": 0.303, + "step": 1209 + }, + { + "epoch": 1.2448559670781894, + "grad_norm": 1.0907119512557983, + "learning_rate": 3.442957247816513e-05, + "loss": 0.0228, + "step": 1210 + }, + { + "epoch": 1.2458847736625513, + "grad_norm": 15.170351028442383, + "learning_rate": 3.442492613548518e-05, + "loss": 1.2976, + "step": 1211 + }, + { + "epoch": 1.2469135802469136, + "grad_norm": 13.963634490966797, + "learning_rate": 3.4420261424499885e-05, + "loss": 1.173, + "step": 1212 + }, + { + "epoch": 1.2479423868312758, + "grad_norm": 14.074200630187988, + "learning_rate": 3.4415578352934285e-05, + "loss": 0.9466, + "step": 1213 + }, + { + "epoch": 1.2489711934156378, + "grad_norm": 6.741833209991455, + "learning_rate": 3.44108769285438e-05, + "loss": 0.2666, + "step": 1214 + }, + { + "epoch": 1.25, + "grad_norm": 13.059562683105469, + "learning_rate": 3.440615715911426e-05, + "loss": 0.9441, + "step": 1215 + }, + { + "epoch": 1.2510288065843622, + "grad_norm": 6.839930534362793, + "learning_rate": 3.440141905246187e-05, + "loss": 0.2589, + "step": 1216 + }, + { + "epoch": 1.2520576131687242, + "grad_norm": 1.1594938039779663, + "learning_rate": 3.43966626164332e-05, + "loss": 0.0168, + "step": 1217 + }, + { + "epoch": 1.2530864197530864, + "grad_norm": 6.989406108856201, + "learning_rate": 3.439188785890518e-05, + "loss": 0.3212, + "step": 1218 + }, + { + "epoch": 1.2541152263374484, + "grad_norm": 7.311746597290039, + "learning_rate": 3.438709478778507e-05, + "loss": 0.3108, + "step": 1219 + }, + { + "epoch": 1.2551440329218106, + "grad_norm": 7.0762529373168945, + "learning_rate": 3.438228341101048e-05, + "loss": 0.3269, + "step": 1220 + }, + { + "epoch": 1.2551440329218106, + "eval_Qnli-dev_cosine_accuracy": 0.720703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7860354781150818, + "eval_Qnli-dev_cosine_ap": 0.7633884571301464, + "eval_Qnli-dev_cosine_f1": 0.7151051625239004, + "eval_Qnli-dev_cosine_f1_threshold": 0.7650403380393982, + "eval_Qnli-dev_cosine_precision": 0.6515679442508711, + "eval_Qnli-dev_cosine_recall": 0.7923728813559322, + "eval_Qnli-dev_dot_accuracy": 0.681640625, + "eval_Qnli-dev_dot_accuracy_threshold": 369.2494812011719, + "eval_Qnli-dev_dot_ap": 0.7005340525347556, + "eval_Qnli-dev_dot_f1": 0.6983546617915904, + "eval_Qnli-dev_dot_f1_threshold": 357.15740966796875, + "eval_Qnli-dev_dot_precision": 0.6141479099678456, + "eval_Qnli-dev_dot_recall": 0.809322033898305, + "eval_Qnli-dev_euclidean_accuracy": 0.71875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.266681671142578, + "eval_Qnli-dev_euclidean_ap": 0.7715839499516464, + "eval_Qnli-dev_euclidean_f1": 0.7142857142857143, + "eval_Qnli-dev_euclidean_f1_threshold": 14.948558807373047, + "eval_Qnli-dev_euclidean_precision": 0.6560283687943262, + "eval_Qnli-dev_euclidean_recall": 0.7838983050847458, + "eval_Qnli-dev_manhattan_accuracy": 0.71484375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 295.34088134765625, + "eval_Qnli-dev_manhattan_ap": 0.7719441944715049, + "eval_Qnli-dev_manhattan_f1": 0.7161904761904763, + "eval_Qnli-dev_manhattan_f1_threshold": 311.98638916015625, + "eval_Qnli-dev_manhattan_precision": 0.6505190311418685, + "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, + "eval_Qnli-dev_max_accuracy": 0.720703125, + "eval_Qnli-dev_max_accuracy_threshold": 369.2494812011719, + "eval_Qnli-dev_max_ap": 0.7719441944715049, + "eval_Qnli-dev_max_f1": 0.7161904761904763, + "eval_Qnli-dev_max_f1_threshold": 357.15740966796875, + "eval_Qnli-dev_max_precision": 0.6560283687943262, + "eval_Qnli-dev_max_recall": 0.809322033898305, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8683989644050598, + "eval_allNLI-dev_cosine_ap": 0.6251892984393883, + "eval_allNLI-dev_cosine_f1": 0.6313465783664459, + "eval_allNLI-dev_cosine_f1_threshold": 0.7872051000595093, + "eval_allNLI-dev_cosine_precision": 0.5107142857142857, + "eval_allNLI-dev_cosine_recall": 0.8265895953757225, + "eval_allNLI-dev_dot_accuracy": 0.712890625, + "eval_allNLI-dev_dot_accuracy_threshold": 412.98309326171875, + "eval_allNLI-dev_dot_ap": 0.5622529803433328, + "eval_allNLI-dev_dot_f1": 0.610091743119266, + "eval_allNLI-dev_dot_f1_threshold": 367.0179443359375, + "eval_allNLI-dev_dot_precision": 0.5057034220532319, + "eval_allNLI-dev_dot_recall": 0.7687861271676301, + "eval_allNLI-dev_euclidean_accuracy": 0.73828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.184566497802734, + "eval_allNLI-dev_euclidean_ap": 0.6289609975782239, + "eval_allNLI-dev_euclidean_f1": 0.6412556053811659, + "eval_allNLI-dev_euclidean_f1_threshold": 14.102167129516602, + "eval_allNLI-dev_euclidean_precision": 0.5238095238095238, + "eval_allNLI-dev_euclidean_recall": 0.8265895953757225, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 234.21710205078125, + "eval_allNLI-dev_manhattan_ap": 0.6266468776315474, + "eval_allNLI-dev_manhattan_f1": 0.6417582417582418, + "eval_allNLI-dev_manhattan_f1_threshold": 295.9653015136719, + "eval_allNLI-dev_manhattan_precision": 0.5177304964539007, + "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 412.98309326171875, + "eval_allNLI-dev_max_ap": 0.6289609975782239, + "eval_allNLI-dev_max_f1": 0.6417582417582418, + "eval_allNLI-dev_max_f1_threshold": 367.0179443359375, + "eval_allNLI-dev_max_precision": 0.5238095238095238, + "eval_allNLI-dev_max_recall": 0.8439306358381503, + "eval_sequential_score": 0.7719441944715049, + "eval_sts-test_pearson_cosine": 0.8290268288134766, + "eval_sts-test_pearson_dot": 0.8175868265444567, + "eval_sts-test_pearson_euclidean": 0.8543569326462799, + "eval_sts-test_pearson_manhattan": 0.8525659243366586, + "eval_sts-test_pearson_max": 0.8543569326462799, + "eval_sts-test_spearman_cosine": 0.853531321250748, + "eval_sts-test_spearman_dot": 0.8038217095997041, + "eval_sts-test_spearman_euclidean": 0.8480709960920763, + "eval_sts-test_spearman_manhattan": 0.8464784777042019, + "eval_sts-test_spearman_max": 0.853531321250748, + "eval_vitaminc-pairs_loss": 2.7786405086517334, + "eval_vitaminc-pairs_runtime": 3.2222, + "eval_vitaminc-pairs_samples_per_second": 39.724, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 1220 + }, + { + "epoch": 1.2551440329218106, + "eval_negation-triplets_loss": 0.9489306211471558, + "eval_negation-triplets_runtime": 0.7866, + "eval_negation-triplets_samples_per_second": 162.732, + "eval_negation-triplets_steps_per_second": 1.271, + "step": 1220 + }, + { + "epoch": 1.2551440329218106, + "eval_scitail-pairs-pos_loss": 0.15525153279304504, + "eval_scitail-pairs-pos_runtime": 0.9089, + "eval_scitail-pairs-pos_samples_per_second": 140.834, + "eval_scitail-pairs-pos_steps_per_second": 1.1, + "step": 1220 + }, + { + "epoch": 1.2551440329218106, + "eval_scitail-pairs-qa_loss": 0.0006275809137150645, + "eval_scitail-pairs-qa_runtime": 0.5938, + "eval_scitail-pairs-qa_samples_per_second": 215.547, + "eval_scitail-pairs-qa_steps_per_second": 1.684, + "step": 1220 + }, + { + "epoch": 1.2551440329218106, + "eval_xsum-pairs_loss": 0.37118861079216003, + "eval_xsum-pairs_runtime": 3.0288, + "eval_xsum-pairs_samples_per_second": 42.261, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1220 + }, + { + "epoch": 1.2551440329218106, + "eval_sciq_pairs_loss": 0.11241748929023743, + "eval_sciq_pairs_runtime": 3.4875, + "eval_sciq_pairs_samples_per_second": 36.703, + "eval_sciq_pairs_steps_per_second": 0.287, + "step": 1220 + }, + { + "epoch": 1.2551440329218106, + "eval_qasc_pairs_loss": 0.22180773317813873, + "eval_qasc_pairs_runtime": 0.6153, + "eval_qasc_pairs_samples_per_second": 208.013, + "eval_qasc_pairs_steps_per_second": 1.625, + "step": 1220 + }, + { + "epoch": 1.2551440329218106, + "eval_openbookqa_pairs_loss": 0.8669531941413879, + "eval_openbookqa_pairs_runtime": 0.5998, + "eval_openbookqa_pairs_samples_per_second": 213.395, + "eval_openbookqa_pairs_steps_per_second": 1.667, + "step": 1220 + }, + { + "epoch": 1.2551440329218106, + "eval_msmarco_pairs_loss": 0.9564771056175232, + "eval_msmarco_pairs_runtime": 1.5215, + "eval_msmarco_pairs_samples_per_second": 84.127, + "eval_msmarco_pairs_steps_per_second": 0.657, + "step": 1220 + }, + { + "epoch": 1.2551440329218106, + "eval_nq_pairs_loss": 0.8097667098045349, + "eval_nq_pairs_runtime": 2.9024, + "eval_nq_pairs_samples_per_second": 44.102, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1220 + }, + { + "epoch": 1.2551440329218106, + "eval_trivia_pairs_loss": 0.9328098893165588, + "eval_trivia_pairs_runtime": 3.4398, + "eval_trivia_pairs_samples_per_second": 37.211, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1220 + }, + { + "epoch": 1.2551440329218106, + "eval_gooaq_pairs_loss": 0.46518075466156006, + "eval_gooaq_pairs_runtime": 0.9561, + "eval_gooaq_pairs_samples_per_second": 133.874, + "eval_gooaq_pairs_steps_per_second": 1.046, + "step": 1220 + }, + { + "epoch": 1.2551440329218106, + "eval_paws-pos_loss": 0.023140598088502884, + "eval_paws-pos_runtime": 0.6996, + "eval_paws-pos_samples_per_second": 182.966, + "eval_paws-pos_steps_per_second": 1.429, + "step": 1220 + }, + { + "epoch": 1.2551440329218106, + "eval_global_dataset_loss": 0.44948717951774597, + "eval_global_dataset_runtime": 13.4315, + "eval_global_dataset_samples_per_second": 30.972, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1220 + }, + { + "epoch": 1.2561728395061729, + "grad_norm": 7.284200191497803, + "learning_rate": 3.4377453736549316e-05, + "loss": 0.341, + "step": 1221 + }, + { + "epoch": 1.257201646090535, + "grad_norm": 0.6413462162017822, + "learning_rate": 3.437260577239979e-05, + "loss": 0.0162, + "step": 1222 + }, + { + "epoch": 1.258230452674897, + "grad_norm": 7.572662353515625, + "learning_rate": 3.436773952659041e-05, + "loss": 0.3307, + "step": 1223 + }, + { + "epoch": 1.2592592592592593, + "grad_norm": 6.2290215492248535, + "learning_rate": 3.4362855007179945e-05, + "loss": 0.2021, + "step": 1224 + }, + { + "epoch": 1.2602880658436213, + "grad_norm": 17.566606521606445, + "learning_rate": 3.435795222225745e-05, + "loss": 2.8801, + "step": 1225 + }, + { + "epoch": 1.2613168724279835, + "grad_norm": 11.829737663269043, + "learning_rate": 3.43530311799422e-05, + "loss": 1.1785, + "step": 1226 + }, + { + "epoch": 1.2623456790123457, + "grad_norm": 8.333463668823242, + "learning_rate": 3.4348091888383726e-05, + "loss": 0.7136, + "step": 1227 + }, + { + "epoch": 1.263374485596708, + "grad_norm": 8.75113582611084, + "learning_rate": 3.434313435576178e-05, + "loss": 0.6168, + "step": 1228 + }, + { + "epoch": 1.26440329218107, + "grad_norm": 4.275041103363037, + "learning_rate": 3.4338158590286305e-05, + "loss": 0.2191, + "step": 1229 + }, + { + "epoch": 1.2654320987654322, + "grad_norm": 9.158637046813965, + "learning_rate": 3.4333164600197463e-05, + "loss": 0.5133, + "step": 1230 + }, + { + "epoch": 1.2664609053497942, + "grad_norm": 12.479742050170898, + "learning_rate": 3.432815239376557e-05, + "loss": 1.1907, + "step": 1231 + }, + { + "epoch": 1.2674897119341564, + "grad_norm": 14.734970092773438, + "learning_rate": 3.432312197929114e-05, + "loss": 1.2727, + "step": 1232 + }, + { + "epoch": 1.2685185185185186, + "grad_norm": 7.485629081726074, + "learning_rate": 3.431807336510481e-05, + "loss": 0.3664, + "step": 1233 + }, + { + "epoch": 1.2695473251028806, + "grad_norm": 8.2068452835083, + "learning_rate": 3.431300655956737e-05, + "loss": 0.45, + "step": 1234 + }, + { + "epoch": 1.2705761316872428, + "grad_norm": 7.955653667449951, + "learning_rate": 3.430792157106975e-05, + "loss": 0.4915, + "step": 1235 + }, + { + "epoch": 1.2716049382716048, + "grad_norm": 10.168331146240234, + "learning_rate": 3.430281840803297e-05, + "loss": 0.8899, + "step": 1236 + }, + { + "epoch": 1.272633744855967, + "grad_norm": 7.463403224945068, + "learning_rate": 3.429769707890816e-05, + "loss": 0.4613, + "step": 1237 + }, + { + "epoch": 1.2736625514403292, + "grad_norm": 7.143692970275879, + "learning_rate": 3.429255759217652e-05, + "loss": 0.4084, + "step": 1238 + }, + { + "epoch": 1.2746913580246915, + "grad_norm": 7.729737281799316, + "learning_rate": 3.428739995634934e-05, + "loss": 0.3376, + "step": 1239 + }, + { + "epoch": 1.2757201646090535, + "grad_norm": 5.447694778442383, + "learning_rate": 3.4282224179967966e-05, + "loss": 0.2337, + "step": 1240 + }, + { + "epoch": 1.2757201646090535, + "eval_Qnli-dev_cosine_accuracy": 0.720703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7990207672119141, + "eval_Qnli-dev_cosine_ap": 0.7557640056578084, + "eval_Qnli-dev_cosine_f1": 0.7027027027027027, + "eval_Qnli-dev_cosine_f1_threshold": 0.7990207672119141, + "eval_Qnli-dev_cosine_precision": 0.689795918367347, + "eval_Qnli-dev_cosine_recall": 0.7161016949152542, + "eval_Qnli-dev_dot_accuracy": 0.66796875, + "eval_Qnli-dev_dot_accuracy_threshold": 403.472412109375, + "eval_Qnli-dev_dot_ap": 0.6544341606245616, + "eval_Qnli-dev_dot_f1": 0.6677067082683307, + "eval_Qnli-dev_dot_f1_threshold": 342.8936767578125, + "eval_Qnli-dev_dot_precision": 0.528395061728395, + "eval_Qnli-dev_dot_recall": 0.9067796610169492, + "eval_Qnli-dev_euclidean_accuracy": 0.72265625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.223051071166992, + "eval_Qnli-dev_euclidean_ap": 0.7661561981934673, + "eval_Qnli-dev_euclidean_f1": 0.7115384615384616, + "eval_Qnli-dev_euclidean_f1_threshold": 14.778949737548828, + "eval_Qnli-dev_euclidean_precision": 0.6514084507042254, + "eval_Qnli-dev_euclidean_recall": 0.7838983050847458, + "eval_Qnli-dev_manhattan_accuracy": 0.716796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 282.96795654296875, + "eval_Qnli-dev_manhattan_ap": 0.7694773667201545, + "eval_Qnli-dev_manhattan_f1": 0.7050092764378479, + "eval_Qnli-dev_manhattan_f1_threshold": 313.93878173828125, + "eval_Qnli-dev_manhattan_precision": 0.6270627062706271, + "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, + "eval_Qnli-dev_max_accuracy": 0.72265625, + "eval_Qnli-dev_max_accuracy_threshold": 403.472412109375, + "eval_Qnli-dev_max_ap": 0.7694773667201545, + "eval_Qnli-dev_max_f1": 0.7115384615384616, + "eval_Qnli-dev_max_f1_threshold": 342.8936767578125, + "eval_Qnli-dev_max_precision": 0.689795918367347, + "eval_Qnli-dev_max_recall": 0.9067796610169492, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8681868314743042, + "eval_allNLI-dev_cosine_ap": 0.6185155955214422, + "eval_allNLI-dev_cosine_f1": 0.6442953020134229, + "eval_allNLI-dev_cosine_f1_threshold": 0.7863482236862183, + "eval_allNLI-dev_cosine_precision": 0.5255474452554745, + "eval_allNLI-dev_cosine_recall": 0.8323699421965318, + "eval_allNLI-dev_dot_accuracy": 0.712890625, + "eval_allNLI-dev_dot_accuracy_threshold": 406.2081298828125, + "eval_allNLI-dev_dot_ap": 0.5474145940825146, + "eval_allNLI-dev_dot_f1": 0.5979381443298969, + "eval_allNLI-dev_dot_f1_threshold": 360.92095947265625, + "eval_allNLI-dev_dot_precision": 0.46474358974358976, + "eval_allNLI-dev_dot_recall": 0.838150289017341, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.027508735656738, + "eval_allNLI-dev_euclidean_ap": 0.6248008682070734, + "eval_allNLI-dev_euclidean_f1": 0.6482758620689655, + "eval_allNLI-dev_euclidean_f1_threshold": 14.024650573730469, + "eval_allNLI-dev_euclidean_precision": 0.5381679389312977, + "eval_allNLI-dev_euclidean_recall": 0.815028901734104, + "eval_allNLI-dev_manhattan_accuracy": 0.728515625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 249.40318298339844, + "eval_allNLI-dev_manhattan_ap": 0.6226611056171882, + "eval_allNLI-dev_manhattan_f1": 0.6471910112359551, + "eval_allNLI-dev_manhattan_f1_threshold": 294.674560546875, + "eval_allNLI-dev_manhattan_precision": 0.5294117647058824, + "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 406.2081298828125, + "eval_allNLI-dev_max_ap": 0.6248008682070734, + "eval_allNLI-dev_max_f1": 0.6482758620689655, + "eval_allNLI-dev_max_f1_threshold": 360.92095947265625, + "eval_allNLI-dev_max_precision": 0.5381679389312977, + "eval_allNLI-dev_max_recall": 0.838150289017341, + "eval_sequential_score": 0.7694773667201545, + "eval_sts-test_pearson_cosine": 0.8282825065385547, + "eval_sts-test_pearson_dot": 0.8187070495760544, + "eval_sts-test_pearson_euclidean": 0.857594156220547, + "eval_sts-test_pearson_manhattan": 0.8547876350215236, + "eval_sts-test_pearson_max": 0.857594156220547, + "eval_sts-test_spearman_cosine": 0.8589356594045713, + "eval_sts-test_spearman_dot": 0.8098749442205503, + "eval_sts-test_spearman_euclidean": 0.8538316022146842, + "eval_sts-test_spearman_manhattan": 0.8513935087428194, + "eval_sts-test_spearman_max": 0.8589356594045713, + "eval_vitaminc-pairs_loss": 2.63816499710083, + "eval_vitaminc-pairs_runtime": 3.2121, + "eval_vitaminc-pairs_samples_per_second": 39.849, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 1240 + }, + { + "epoch": 1.2757201646090535, + "eval_negation-triplets_loss": 0.9544748663902283, + "eval_negation-triplets_runtime": 0.7781, + "eval_negation-triplets_samples_per_second": 164.496, + "eval_negation-triplets_steps_per_second": 1.285, + "step": 1240 + }, + { + "epoch": 1.2757201646090535, + "eval_scitail-pairs-pos_loss": 0.16542193293571472, + "eval_scitail-pairs-pos_runtime": 0.8693, + "eval_scitail-pairs-pos_samples_per_second": 147.247, + "eval_scitail-pairs-pos_steps_per_second": 1.15, + "step": 1240 + }, + { + "epoch": 1.2757201646090535, + "eval_scitail-pairs-qa_loss": 0.0013295909157022834, + "eval_scitail-pairs-qa_runtime": 0.6064, + "eval_scitail-pairs-qa_samples_per_second": 211.091, + "eval_scitail-pairs-qa_steps_per_second": 1.649, + "step": 1240 + }, + { + "epoch": 1.2757201646090535, + "eval_xsum-pairs_loss": 0.35927343368530273, + "eval_xsum-pairs_runtime": 3.0303, + "eval_xsum-pairs_samples_per_second": 42.241, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1240 + }, + { + "epoch": 1.2757201646090535, + "eval_sciq_pairs_loss": 0.09439770132303238, + "eval_sciq_pairs_runtime": 3.5147, + "eval_sciq_pairs_samples_per_second": 36.419, + "eval_sciq_pairs_steps_per_second": 0.285, + "step": 1240 + }, + { + "epoch": 1.2757201646090535, + "eval_qasc_pairs_loss": 0.23836590349674225, + "eval_qasc_pairs_runtime": 0.6293, + "eval_qasc_pairs_samples_per_second": 203.416, + "eval_qasc_pairs_steps_per_second": 1.589, + "step": 1240 + }, + { + "epoch": 1.2757201646090535, + "eval_openbookqa_pairs_loss": 0.8297739624977112, + "eval_openbookqa_pairs_runtime": 0.6132, + "eval_openbookqa_pairs_samples_per_second": 208.746, + "eval_openbookqa_pairs_steps_per_second": 1.631, + "step": 1240 + }, + { + "epoch": 1.2757201646090535, + "eval_msmarco_pairs_loss": 0.9185351729393005, + "eval_msmarco_pairs_runtime": 1.531, + "eval_msmarco_pairs_samples_per_second": 83.606, + "eval_msmarco_pairs_steps_per_second": 0.653, + "step": 1240 + }, + { + "epoch": 1.2757201646090535, + "eval_nq_pairs_loss": 0.94951993227005, + "eval_nq_pairs_runtime": 2.9064, + "eval_nq_pairs_samples_per_second": 44.041, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1240 + }, + { + "epoch": 1.2757201646090535, + "eval_trivia_pairs_loss": 0.8465414643287659, + "eval_trivia_pairs_runtime": 3.454, + "eval_trivia_pairs_samples_per_second": 37.058, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1240 + }, + { + "epoch": 1.2757201646090535, + "eval_gooaq_pairs_loss": 0.5014381408691406, + "eval_gooaq_pairs_runtime": 0.9553, + "eval_gooaq_pairs_samples_per_second": 133.99, + "eval_gooaq_pairs_steps_per_second": 1.047, + "step": 1240 + }, + { + "epoch": 1.2757201646090535, + "eval_paws-pos_loss": 0.023835916072130203, + "eval_paws-pos_runtime": 0.7011, + "eval_paws-pos_samples_per_second": 182.58, + "eval_paws-pos_steps_per_second": 1.426, + "step": 1240 + }, + { + "epoch": 1.2757201646090535, + "eval_global_dataset_loss": 0.4214680790901184, + "eval_global_dataset_runtime": 13.4349, + "eval_global_dataset_samples_per_second": 30.964, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1240 + }, + { + "epoch": 1.2767489711934157, + "grad_norm": 10.583321571350098, + "learning_rate": 3.4277030271603763e-05, + "loss": 1.122, + "step": 1241 + }, + { + "epoch": 1.2777777777777777, + "grad_norm": 11.95809268951416, + "learning_rate": 3.427181823985814e-05, + "loss": 1.1248, + "step": 1242 + }, + { + "epoch": 1.27880658436214, + "grad_norm": 9.490169525146484, + "learning_rate": 3.4266588093362515e-05, + "loss": 0.5031, + "step": 1243 + }, + { + "epoch": 1.2798353909465021, + "grad_norm": 5.851307392120361, + "learning_rate": 3.426133984077831e-05, + "loss": 0.211, + "step": 1244 + }, + { + "epoch": 1.2808641975308643, + "grad_norm": 5.588681221008301, + "learning_rate": 3.425607349079693e-05, + "loss": 0.2251, + "step": 1245 + }, + { + "epoch": 1.2818930041152263, + "grad_norm": 14.139016151428223, + "learning_rate": 3.425078905213975e-05, + "loss": 1.2428, + "step": 1246 + }, + { + "epoch": 1.2829218106995885, + "grad_norm": 6.52968168258667, + "learning_rate": 3.4245486533558086e-05, + "loss": 0.4932, + "step": 1247 + }, + { + "epoch": 1.2839506172839505, + "grad_norm": 9.744477272033691, + "learning_rate": 3.424016594383322e-05, + "loss": 0.6784, + "step": 1248 + }, + { + "epoch": 1.2849794238683128, + "grad_norm": 6.064051628112793, + "learning_rate": 3.4234827291776355e-05, + "loss": 0.282, + "step": 1249 + }, + { + "epoch": 1.286008230452675, + "grad_norm": 5.103082656860352, + "learning_rate": 3.422947058622859e-05, + "loss": 0.1852, + "step": 1250 + }, + { + "epoch": 1.287037037037037, + "grad_norm": 1.28008234500885, + "learning_rate": 3.422409583606094e-05, + "loss": 0.0276, + "step": 1251 + }, + { + "epoch": 1.2880658436213992, + "grad_norm": 10.74918270111084, + "learning_rate": 3.421870305017428e-05, + "loss": 0.9884, + "step": 1252 + }, + { + "epoch": 1.2890946502057612, + "grad_norm": 5.395549297332764, + "learning_rate": 3.421329223749939e-05, + "loss": 0.1635, + "step": 1253 + }, + { + "epoch": 1.2901234567901234, + "grad_norm": 0.8584123849868774, + "learning_rate": 3.420786340699687e-05, + "loss": 0.035, + "step": 1254 + }, + { + "epoch": 1.2911522633744856, + "grad_norm": 8.618278503417969, + "learning_rate": 3.4202416567657175e-05, + "loss": 0.3808, + "step": 1255 + }, + { + "epoch": 1.2921810699588478, + "grad_norm": 12.293098449707031, + "learning_rate": 3.419695172850058e-05, + "loss": 1.009, + "step": 1256 + }, + { + "epoch": 1.2932098765432098, + "grad_norm": 5.01506233215332, + "learning_rate": 3.419146889857715e-05, + "loss": 0.135, + "step": 1257 + }, + { + "epoch": 1.294238683127572, + "grad_norm": 13.123412132263184, + "learning_rate": 3.418596808696679e-05, + "loss": 0.8017, + "step": 1258 + }, + { + "epoch": 1.295267489711934, + "grad_norm": 6.234143257141113, + "learning_rate": 3.418044930277914e-05, + "loss": 0.3615, + "step": 1259 + }, + { + "epoch": 1.2962962962962963, + "grad_norm": 9.980935096740723, + "learning_rate": 3.4174912555153616e-05, + "loss": 0.7826, + "step": 1260 + }, + { + "epoch": 1.2962962962962963, + "eval_Qnli-dev_cosine_accuracy": 0.6875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7924839854240417, + "eval_Qnli-dev_cosine_ap": 0.7332266491035113, + "eval_Qnli-dev_cosine_f1": 0.6891891891891891, + "eval_Qnli-dev_cosine_f1_threshold": 0.6866949796676636, + "eval_Qnli-dev_cosine_precision": 0.5730337078651685, + "eval_Qnli-dev_cosine_recall": 0.864406779661017, + "eval_Qnli-dev_dot_accuracy": 0.662109375, + "eval_Qnli-dev_dot_accuracy_threshold": 373.8640441894531, + "eval_Qnli-dev_dot_ap": 0.6496724675564834, + "eval_Qnli-dev_dot_f1": 0.684297520661157, + "eval_Qnli-dev_dot_f1_threshold": 308.1207275390625, + "eval_Qnli-dev_dot_precision": 0.5609756097560976, + "eval_Qnli-dev_dot_recall": 0.8771186440677966, + "eval_Qnli-dev_euclidean_accuracy": 0.703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.424274444580078, + "eval_Qnli-dev_euclidean_ap": 0.7444622281460342, + "eval_Qnli-dev_euclidean_f1": 0.6940170940170939, + "eval_Qnli-dev_euclidean_f1_threshold": 16.856407165527344, + "eval_Qnli-dev_euclidean_precision": 0.5816618911174785, + "eval_Qnli-dev_euclidean_recall": 0.8601694915254238, + "eval_Qnli-dev_manhattan_accuracy": 0.69921875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 289.1227722167969, + "eval_Qnli-dev_manhattan_ap": 0.7459088632355457, + "eval_Qnli-dev_manhattan_f1": 0.6959459459459459, + "eval_Qnli-dev_manhattan_f1_threshold": 353.713623046875, + "eval_Qnli-dev_manhattan_precision": 0.5786516853932584, + "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, + "eval_Qnli-dev_max_accuracy": 0.703125, + "eval_Qnli-dev_max_accuracy_threshold": 373.8640441894531, + "eval_Qnli-dev_max_ap": 0.7459088632355457, + "eval_Qnli-dev_max_f1": 0.6959459459459459, + "eval_Qnli-dev_max_f1_threshold": 353.713623046875, + "eval_Qnli-dev_max_precision": 0.5816618911174785, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.867729127407074, + "eval_allNLI-dev_cosine_ap": 0.6125988322302354, + "eval_allNLI-dev_cosine_f1": 0.6153846153846154, + "eval_allNLI-dev_cosine_f1_threshold": 0.7971692681312561, + "eval_allNLI-dev_cosine_precision": 0.5391304347826087, + "eval_allNLI-dev_cosine_recall": 0.7167630057803468, + "eval_allNLI-dev_dot_accuracy": 0.693359375, + "eval_allNLI-dev_dot_accuracy_threshold": 387.8415222167969, + "eval_allNLI-dev_dot_ap": 0.5292166386311624, + "eval_allNLI-dev_dot_f1": 0.5884861407249466, + "eval_allNLI-dev_dot_f1_threshold": 333.64898681640625, + "eval_allNLI-dev_dot_precision": 0.46621621621621623, + "eval_allNLI-dev_dot_recall": 0.7976878612716763, + "eval_allNLI-dev_euclidean_accuracy": 0.73828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.273818969726562, + "eval_allNLI-dev_euclidean_ap": 0.6193958040838967, + "eval_allNLI-dev_euclidean_f1": 0.6253101736972705, + "eval_allNLI-dev_euclidean_f1_threshold": 13.630767822265625, + "eval_allNLI-dev_euclidean_precision": 0.5478260869565217, + "eval_allNLI-dev_euclidean_recall": 0.7283236994219653, + "eval_allNLI-dev_manhattan_accuracy": 0.7265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 227.3568572998047, + "eval_allNLI-dev_manhattan_ap": 0.6143779984704311, + "eval_allNLI-dev_manhattan_f1": 0.6343612334801761, + "eval_allNLI-dev_manhattan_f1_threshold": 300.5953063964844, + "eval_allNLI-dev_manhattan_precision": 0.5124555160142349, + "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 387.8415222167969, + "eval_allNLI-dev_max_ap": 0.6193958040838967, + "eval_allNLI-dev_max_f1": 0.6343612334801761, + "eval_allNLI-dev_max_f1_threshold": 333.64898681640625, + "eval_allNLI-dev_max_precision": 0.5478260869565217, + "eval_allNLI-dev_max_recall": 0.8323699421965318, + "eval_sequential_score": 0.7459088632355457, + "eval_sts-test_pearson_cosine": 0.8256896728141465, + "eval_sts-test_pearson_dot": 0.80848155082256, + "eval_sts-test_pearson_euclidean": 0.8575939093156039, + "eval_sts-test_pearson_manhattan": 0.8561308953474318, + "eval_sts-test_pearson_max": 0.8575939093156039, + "eval_sts-test_spearman_cosine": 0.8587494525827694, + "eval_sts-test_spearman_dot": 0.794722346118468, + "eval_sts-test_spearman_euclidean": 0.8545395671698962, + "eval_sts-test_spearman_manhattan": 0.8535123019646566, + "eval_sts-test_spearman_max": 0.8587494525827694, + "eval_vitaminc-pairs_loss": 2.8190577030181885, + "eval_vitaminc-pairs_runtime": 3.2233, + "eval_vitaminc-pairs_samples_per_second": 39.711, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 1260 + }, + { + "epoch": 1.2962962962962963, + "eval_negation-triplets_loss": 0.984671413898468, + "eval_negation-triplets_runtime": 0.7677, + "eval_negation-triplets_samples_per_second": 166.742, + "eval_negation-triplets_steps_per_second": 1.303, + "step": 1260 + }, + { + "epoch": 1.2962962962962963, + "eval_scitail-pairs-pos_loss": 0.1757514476776123, + "eval_scitail-pairs-pos_runtime": 0.913, + "eval_scitail-pairs-pos_samples_per_second": 140.198, + "eval_scitail-pairs-pos_steps_per_second": 1.095, + "step": 1260 + }, + { + "epoch": 1.2962962962962963, + "eval_scitail-pairs-qa_loss": 0.0008864524425007403, + "eval_scitail-pairs-qa_runtime": 0.5941, + "eval_scitail-pairs-qa_samples_per_second": 215.45, + "eval_scitail-pairs-qa_steps_per_second": 1.683, + "step": 1260 + }, + { + "epoch": 1.2962962962962963, + "eval_xsum-pairs_loss": 0.3529047966003418, + "eval_xsum-pairs_runtime": 3.0315, + "eval_xsum-pairs_samples_per_second": 42.224, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1260 + }, + { + "epoch": 1.2962962962962963, + "eval_sciq_pairs_loss": 0.10334491729736328, + "eval_sciq_pairs_runtime": 3.4941, + "eval_sciq_pairs_samples_per_second": 36.634, + "eval_sciq_pairs_steps_per_second": 0.286, + "step": 1260 + }, + { + "epoch": 1.2962962962962963, + "eval_qasc_pairs_loss": 0.2171602100133896, + "eval_qasc_pairs_runtime": 0.6121, + "eval_qasc_pairs_samples_per_second": 209.114, + "eval_qasc_pairs_steps_per_second": 1.634, + "step": 1260 + }, + { + "epoch": 1.2962962962962963, + "eval_openbookqa_pairs_loss": 0.9481621980667114, + "eval_openbookqa_pairs_runtime": 0.5963, + "eval_openbookqa_pairs_samples_per_second": 214.661, + "eval_openbookqa_pairs_steps_per_second": 1.677, + "step": 1260 + }, + { + "epoch": 1.2962962962962963, + "eval_msmarco_pairs_loss": 1.0396214723587036, + "eval_msmarco_pairs_runtime": 1.5323, + "eval_msmarco_pairs_samples_per_second": 83.534, + "eval_msmarco_pairs_steps_per_second": 0.653, + "step": 1260 + }, + { + "epoch": 1.2962962962962963, + "eval_nq_pairs_loss": 0.9104881882667542, + "eval_nq_pairs_runtime": 2.9067, + "eval_nq_pairs_samples_per_second": 44.037, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1260 + }, + { + "epoch": 1.2962962962962963, + "eval_trivia_pairs_loss": 0.8889181613922119, + "eval_trivia_pairs_runtime": 3.4447, + "eval_trivia_pairs_samples_per_second": 37.159, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1260 + }, + { + "epoch": 1.2962962962962963, + "eval_gooaq_pairs_loss": 0.4568992853164673, + "eval_gooaq_pairs_runtime": 0.9571, + "eval_gooaq_pairs_samples_per_second": 133.741, + "eval_gooaq_pairs_steps_per_second": 1.045, + "step": 1260 + }, + { + "epoch": 1.2962962962962963, + "eval_paws-pos_loss": 0.02366357110440731, + "eval_paws-pos_runtime": 0.7027, + "eval_paws-pos_samples_per_second": 182.156, + "eval_paws-pos_steps_per_second": 1.423, + "step": 1260 + }, + { + "epoch": 1.2962962962962963, + "eval_global_dataset_loss": 0.4424642324447632, + "eval_global_dataset_runtime": 13.4124, + "eval_global_dataset_samples_per_second": 31.016, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1260 + }, + { + "epoch": 1.2973251028806585, + "grad_norm": 18.139724731445312, + "learning_rate": 3.41693578532594e-05, + "loss": 2.4823, + "step": 1261 + }, + { + "epoch": 1.2983539094650205, + "grad_norm": 9.441933631896973, + "learning_rate": 3.4163785206295374e-05, + "loss": 0.388, + "step": 1262 + }, + { + "epoch": 1.2993827160493827, + "grad_norm": 0.4781794250011444, + "learning_rate": 3.415819462349017e-05, + "loss": 0.0072, + "step": 1263 + }, + { + "epoch": 1.300411522633745, + "grad_norm": 7.694046974182129, + "learning_rate": 3.41525861141021e-05, + "loss": 0.4752, + "step": 1264 + }, + { + "epoch": 1.301440329218107, + "grad_norm": 6.207346439361572, + "learning_rate": 3.414695968741918e-05, + "loss": 0.2959, + "step": 1265 + }, + { + "epoch": 1.3024691358024691, + "grad_norm": 11.718839645385742, + "learning_rate": 3.4141315352759094e-05, + "loss": 0.9776, + "step": 1266 + }, + { + "epoch": 1.3034979423868314, + "grad_norm": 0.8530243039131165, + "learning_rate": 3.413565311946917e-05, + "loss": 0.0135, + "step": 1267 + }, + { + "epoch": 1.3045267489711934, + "grad_norm": 8.410090446472168, + "learning_rate": 3.4129972996926395e-05, + "loss": 0.4569, + "step": 1268 + }, + { + "epoch": 1.3055555555555556, + "grad_norm": 11.767143249511719, + "learning_rate": 3.412427499453737e-05, + "loss": 0.8287, + "step": 1269 + }, + { + "epoch": 1.3065843621399176, + "grad_norm": 14.703503608703613, + "learning_rate": 3.41185591217383e-05, + "loss": 1.251, + "step": 1270 + }, + { + "epoch": 1.3076131687242798, + "grad_norm": 15.810161590576172, + "learning_rate": 3.411282538799501e-05, + "loss": 1.2505, + "step": 1271 + }, + { + "epoch": 1.308641975308642, + "grad_norm": 1.0557564496994019, + "learning_rate": 3.410707380280288e-05, + "loss": 0.0219, + "step": 1272 + }, + { + "epoch": 1.3096707818930042, + "grad_norm": 14.226122856140137, + "learning_rate": 3.4101304375686863e-05, + "loss": 1.0106, + "step": 1273 + }, + { + "epoch": 1.3106995884773662, + "grad_norm": 7.010471820831299, + "learning_rate": 3.409551711620145e-05, + "loss": 0.2392, + "step": 1274 + }, + { + "epoch": 1.3117283950617284, + "grad_norm": 12.67631721496582, + "learning_rate": 3.4089712033930676e-05, + "loss": 0.9318, + "step": 1275 + }, + { + "epoch": 1.3127572016460904, + "grad_norm": 13.45511531829834, + "learning_rate": 3.408388913848808e-05, + "loss": 0.9255, + "step": 1276 + }, + { + "epoch": 1.3137860082304527, + "grad_norm": 5.24427604675293, + "learning_rate": 3.407804843951672e-05, + "loss": 0.2099, + "step": 1277 + }, + { + "epoch": 1.3148148148148149, + "grad_norm": 6.305140495300293, + "learning_rate": 3.4072189946689117e-05, + "loss": 0.2442, + "step": 1278 + }, + { + "epoch": 1.3158436213991769, + "grad_norm": 8.449353218078613, + "learning_rate": 3.4066313669707255e-05, + "loss": 0.363, + "step": 1279 + }, + { + "epoch": 1.316872427983539, + "grad_norm": 9.684417724609375, + "learning_rate": 3.40604196183026e-05, + "loss": 0.6736, + "step": 1280 + }, + { + "epoch": 1.316872427983539, + "eval_Qnli-dev_cosine_accuracy": 0.716796875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8024507761001587, + "eval_Qnli-dev_cosine_ap": 0.758846186685613, + "eval_Qnli-dev_cosine_f1": 0.7107750472589792, + "eval_Qnli-dev_cosine_f1_threshold": 0.768567681312561, + "eval_Qnli-dev_cosine_precision": 0.6416382252559727, + "eval_Qnli-dev_cosine_recall": 0.7966101694915254, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 376.0484313964844, + "eval_Qnli-dev_dot_ap": 0.6643802268647256, + "eval_Qnli-dev_dot_f1": 0.680921052631579, + "eval_Qnli-dev_dot_f1_threshold": 344.0978698730469, + "eval_Qnli-dev_dot_precision": 0.5564516129032258, + "eval_Qnli-dev_dot_recall": 0.8771186440677966, + "eval_Qnli-dev_euclidean_accuracy": 0.72265625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.912522315979004, + "eval_Qnli-dev_euclidean_ap": 0.7678408212467321, + "eval_Qnli-dev_euclidean_f1": 0.7212475633528265, + "eval_Qnli-dev_euclidean_f1_threshold": 14.79525375366211, + "eval_Qnli-dev_euclidean_precision": 0.6678700361010831, + "eval_Qnli-dev_euclidean_recall": 0.7838983050847458, + "eval_Qnli-dev_manhattan_accuracy": 0.73046875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 293.152587890625, + "eval_Qnli-dev_manhattan_ap": 0.7687148416423243, + "eval_Qnli-dev_manhattan_f1": 0.7190569744597249, + "eval_Qnli-dev_manhattan_f1_threshold": 305.97088623046875, + "eval_Qnli-dev_manhattan_precision": 0.6703296703296703, + "eval_Qnli-dev_manhattan_recall": 0.7754237288135594, + "eval_Qnli-dev_max_accuracy": 0.73046875, + "eval_Qnli-dev_max_accuracy_threshold": 376.0484313964844, + "eval_Qnli-dev_max_ap": 0.7687148416423243, + "eval_Qnli-dev_max_f1": 0.7212475633528265, + "eval_Qnli-dev_max_f1_threshold": 344.0978698730469, + "eval_Qnli-dev_max_precision": 0.6703296703296703, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.734375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.886374831199646, + "eval_allNLI-dev_cosine_ap": 0.613323944711514, + "eval_allNLI-dev_cosine_f1": 0.6330275229357799, + "eval_allNLI-dev_cosine_f1_threshold": 0.7920593619346619, + "eval_allNLI-dev_cosine_precision": 0.5247148288973384, + "eval_allNLI-dev_cosine_recall": 0.7976878612716763, + "eval_allNLI-dev_dot_accuracy": 0.689453125, + "eval_allNLI-dev_dot_accuracy_threshold": 406.52789306640625, + "eval_allNLI-dev_dot_ap": 0.5132629205305964, + "eval_allNLI-dev_dot_f1": 0.6038543897216274, + "eval_allNLI-dev_dot_f1_threshold": 353.9873046875, + "eval_allNLI-dev_dot_precision": 0.47959183673469385, + "eval_allNLI-dev_dot_recall": 0.815028901734104, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.70979118347168, + "eval_allNLI-dev_euclidean_ap": 0.6214679779821198, + "eval_allNLI-dev_euclidean_f1": 0.636144578313253, + "eval_allNLI-dev_euclidean_f1_threshold": 13.583101272583008, + "eval_allNLI-dev_euclidean_precision": 0.5454545454545454, + "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 215.23228454589844, + "eval_allNLI-dev_manhattan_ap": 0.6179219629758274, + "eval_allNLI-dev_manhattan_f1": 0.6359447004608295, + "eval_allNLI-dev_manhattan_f1_threshold": 288.486083984375, + "eval_allNLI-dev_manhattan_precision": 0.5287356321839081, + "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 406.52789306640625, + "eval_allNLI-dev_max_ap": 0.6214679779821198, + "eval_allNLI-dev_max_f1": 0.636144578313253, + "eval_allNLI-dev_max_f1_threshold": 353.9873046875, + "eval_allNLI-dev_max_precision": 0.5454545454545454, + "eval_allNLI-dev_max_recall": 0.815028901734104, + "eval_sequential_score": 0.7687148416423243, + "eval_sts-test_pearson_cosine": 0.8344014092758152, + "eval_sts-test_pearson_dot": 0.8153235464131754, + "eval_sts-test_pearson_euclidean": 0.8611296224825529, + "eval_sts-test_pearson_manhattan": 0.8606652463835092, + "eval_sts-test_pearson_max": 0.8611296224825529, + "eval_sts-test_spearman_cosine": 0.8615371429086238, + "eval_sts-test_spearman_dot": 0.797847147696384, + "eval_sts-test_spearman_euclidean": 0.8565606236368546, + "eval_sts-test_spearman_manhattan": 0.8556613470391943, + "eval_sts-test_spearman_max": 0.8615371429086238, + "eval_vitaminc-pairs_loss": 2.8257858753204346, + "eval_vitaminc-pairs_runtime": 3.2252, + "eval_vitaminc-pairs_samples_per_second": 39.688, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 1280 + }, + { + "epoch": 1.316872427983539, + "eval_negation-triplets_loss": 0.9536899924278259, + "eval_negation-triplets_runtime": 0.7582, + "eval_negation-triplets_samples_per_second": 168.813, + "eval_negation-triplets_steps_per_second": 1.319, + "step": 1280 + }, + { + "epoch": 1.316872427983539, + "eval_scitail-pairs-pos_loss": 0.18427607417106628, + "eval_scitail-pairs-pos_runtime": 0.8724, + "eval_scitail-pairs-pos_samples_per_second": 146.726, + "eval_scitail-pairs-pos_steps_per_second": 1.146, + "step": 1280 + }, + { + "epoch": 1.316872427983539, + "eval_scitail-pairs-qa_loss": 0.000654980365652591, + "eval_scitail-pairs-qa_runtime": 0.591, + "eval_scitail-pairs-qa_samples_per_second": 216.575, + "eval_scitail-pairs-qa_steps_per_second": 1.692, + "step": 1280 + }, + { + "epoch": 1.316872427983539, + "eval_xsum-pairs_loss": 0.36986735463142395, + "eval_xsum-pairs_runtime": 3.0241, + "eval_xsum-pairs_samples_per_second": 42.327, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 1280 + }, + { + "epoch": 1.316872427983539, + "eval_sciq_pairs_loss": 0.09962441027164459, + "eval_sciq_pairs_runtime": 3.4747, + "eval_sciq_pairs_samples_per_second": 36.837, + "eval_sciq_pairs_steps_per_second": 0.288, + "step": 1280 + }, + { + "epoch": 1.316872427983539, + "eval_qasc_pairs_loss": 0.22889278829097748, + "eval_qasc_pairs_runtime": 0.6143, + "eval_qasc_pairs_samples_per_second": 208.36, + "eval_qasc_pairs_steps_per_second": 1.628, + "step": 1280 + }, + { + "epoch": 1.316872427983539, + "eval_openbookqa_pairs_loss": 0.8834983706474304, + "eval_openbookqa_pairs_runtime": 0.599, + "eval_openbookqa_pairs_samples_per_second": 213.688, + "eval_openbookqa_pairs_steps_per_second": 1.669, + "step": 1280 + }, + { + "epoch": 1.316872427983539, + "eval_msmarco_pairs_loss": 0.8783816695213318, + "eval_msmarco_pairs_runtime": 1.5278, + "eval_msmarco_pairs_samples_per_second": 83.781, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 1280 + }, + { + "epoch": 1.316872427983539, + "eval_nq_pairs_loss": 0.7243404984474182, + "eval_nq_pairs_runtime": 2.9076, + "eval_nq_pairs_samples_per_second": 44.023, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1280 + }, + { + "epoch": 1.316872427983539, + "eval_trivia_pairs_loss": 0.8404144048690796, + "eval_trivia_pairs_runtime": 3.449, + "eval_trivia_pairs_samples_per_second": 37.112, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1280 + }, + { + "epoch": 1.316872427983539, + "eval_gooaq_pairs_loss": 0.4605383574962616, + "eval_gooaq_pairs_runtime": 0.956, + "eval_gooaq_pairs_samples_per_second": 133.896, + "eval_gooaq_pairs_steps_per_second": 1.046, + "step": 1280 + }, + { + "epoch": 1.316872427983539, + "eval_paws-pos_loss": 0.02303312160074711, + "eval_paws-pos_runtime": 0.7005, + "eval_paws-pos_samples_per_second": 182.726, + "eval_paws-pos_steps_per_second": 1.428, + "step": 1280 + }, + { + "epoch": 1.316872427983539, + "eval_global_dataset_loss": 0.45325952768325806, + "eval_global_dataset_runtime": 13.4175, + "eval_global_dataset_samples_per_second": 31.004, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1280 + }, + { + "epoch": 1.3179012345679013, + "grad_norm": 5.580132007598877, + "learning_rate": 3.405450780223602e-05, + "loss": 0.1946, + "step": 1281 + }, + { + "epoch": 1.3189300411522633, + "grad_norm": 5.326640605926514, + "learning_rate": 3.404857823129783e-05, + "loss": 0.2769, + "step": 1282 + }, + { + "epoch": 1.3199588477366255, + "grad_norm": 11.557867050170898, + "learning_rate": 3.404263091530771e-05, + "loss": 1.1166, + "step": 1283 + }, + { + "epoch": 1.3209876543209877, + "grad_norm": 10.3661527633667, + "learning_rate": 3.4036665864114786e-05, + "loss": 0.9978, + "step": 1284 + }, + { + "epoch": 1.3220164609053497, + "grad_norm": 6.954195499420166, + "learning_rate": 3.4030683087597504e-05, + "loss": 0.4038, + "step": 1285 + }, + { + "epoch": 1.323045267489712, + "grad_norm": 9.20566177368164, + "learning_rate": 3.402468259566367e-05, + "loss": 0.6581, + "step": 1286 + }, + { + "epoch": 1.324074074074074, + "grad_norm": 12.167695999145508, + "learning_rate": 3.401866439825045e-05, + "loss": 1.1956, + "step": 1287 + }, + { + "epoch": 1.3251028806584362, + "grad_norm": 11.835347175598145, + "learning_rate": 3.401262850532433e-05, + "loss": 1.0445, + "step": 1288 + }, + { + "epoch": 1.3261316872427984, + "grad_norm": 9.587698936462402, + "learning_rate": 3.4006574926881066e-05, + "loss": 0.757, + "step": 1289 + }, + { + "epoch": 1.3271604938271606, + "grad_norm": 6.588235378265381, + "learning_rate": 3.4000503672945744e-05, + "loss": 0.2547, + "step": 1290 + }, + { + "epoch": 1.3281893004115226, + "grad_norm": 8.87192440032959, + "learning_rate": 3.399441475357269e-05, + "loss": 0.7288, + "step": 1291 + }, + { + "epoch": 1.3292181069958848, + "grad_norm": 10.09223461151123, + "learning_rate": 3.398830817884552e-05, + "loss": 0.8613, + "step": 1292 + }, + { + "epoch": 1.3302469135802468, + "grad_norm": 5.7491679191589355, + "learning_rate": 3.398218395887705e-05, + "loss": 0.2981, + "step": 1293 + }, + { + "epoch": 1.331275720164609, + "grad_norm": 8.702662467956543, + "learning_rate": 3.397604210380934e-05, + "loss": 0.4096, + "step": 1294 + }, + { + "epoch": 1.3323045267489713, + "grad_norm": 7.047978401184082, + "learning_rate": 3.396988262381365e-05, + "loss": 0.3078, + "step": 1295 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 6.934315204620361, + "learning_rate": 3.3963705529090425e-05, + "loss": 0.3934, + "step": 1296 + }, + { + "epoch": 1.3343621399176955, + "grad_norm": 5.967520713806152, + "learning_rate": 3.3957510829869284e-05, + "loss": 0.2141, + "step": 1297 + }, + { + "epoch": 1.3353909465020577, + "grad_norm": 1.0262237787246704, + "learning_rate": 3.395129853640901e-05, + "loss": 0.0665, + "step": 1298 + }, + { + "epoch": 1.3364197530864197, + "grad_norm": 11.270498275756836, + "learning_rate": 3.3945068658997495e-05, + "loss": 0.914, + "step": 1299 + }, + { + "epoch": 1.337448559670782, + "grad_norm": 10.944971084594727, + "learning_rate": 3.393882120795178e-05, + "loss": 1.0459, + "step": 1300 + }, + { + "epoch": 1.337448559670782, + "eval_Qnli-dev_cosine_accuracy": 0.71484375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.792615532875061, + "eval_Qnli-dev_cosine_ap": 0.749828962990434, + "eval_Qnli-dev_cosine_f1": 0.699815837937385, + "eval_Qnli-dev_cosine_f1_threshold": 0.7434613704681396, + "eval_Qnli-dev_cosine_precision": 0.6188925081433225, + "eval_Qnli-dev_cosine_recall": 0.8050847457627118, + "eval_Qnli-dev_dot_accuracy": 0.658203125, + "eval_Qnli-dev_dot_accuracy_threshold": 376.4273376464844, + "eval_Qnli-dev_dot_ap": 0.6644867064734452, + "eval_Qnli-dev_dot_f1": 0.6786296900489396, + "eval_Qnli-dev_dot_f1_threshold": 324.79473876953125, + "eval_Qnli-dev_dot_precision": 0.5517241379310345, + "eval_Qnli-dev_dot_recall": 0.8813559322033898, + "eval_Qnli-dev_euclidean_accuracy": 0.720703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.647710800170898, + "eval_Qnli-dev_euclidean_ap": 0.7604047314276812, + "eval_Qnli-dev_euclidean_f1": 0.700374531835206, + "eval_Qnli-dev_euclidean_f1_threshold": 15.481462478637695, + "eval_Qnli-dev_euclidean_precision": 0.62751677852349, + "eval_Qnli-dev_euclidean_recall": 0.7923728813559322, + "eval_Qnli-dev_manhattan_accuracy": 0.712890625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 285.90618896484375, + "eval_Qnli-dev_manhattan_ap": 0.7575847560194282, + "eval_Qnli-dev_manhattan_f1": 0.7072243346007605, + "eval_Qnli-dev_manhattan_f1_threshold": 319.30615234375, + "eval_Qnli-dev_manhattan_precision": 0.6413793103448275, + "eval_Qnli-dev_manhattan_recall": 0.788135593220339, + "eval_Qnli-dev_max_accuracy": 0.720703125, + "eval_Qnli-dev_max_accuracy_threshold": 376.4273376464844, + "eval_Qnli-dev_max_ap": 0.7604047314276812, + "eval_Qnli-dev_max_f1": 0.7072243346007605, + "eval_Qnli-dev_max_f1_threshold": 324.79473876953125, + "eval_Qnli-dev_max_precision": 0.6413793103448275, + "eval_Qnli-dev_max_recall": 0.8813559322033898, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.882408082485199, + "eval_allNLI-dev_cosine_ap": 0.6221112461174584, + "eval_allNLI-dev_cosine_f1": 0.6326963906581741, + "eval_allNLI-dev_cosine_f1_threshold": 0.7625564336776733, + "eval_allNLI-dev_cosine_precision": 0.5, + "eval_allNLI-dev_cosine_recall": 0.861271676300578, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 400.8938903808594, + "eval_allNLI-dev_dot_ap": 0.5324965985830011, + "eval_allNLI-dev_dot_f1": 0.5841784989858013, + "eval_allNLI-dev_dot_f1_threshold": 343.65850830078125, + "eval_allNLI-dev_dot_precision": 0.45, + "eval_allNLI-dev_dot_recall": 0.8323699421965318, + "eval_allNLI-dev_euclidean_accuracy": 0.73828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.216476440429688, + "eval_allNLI-dev_euclidean_ap": 0.6296916777262784, + "eval_allNLI-dev_euclidean_f1": 0.6420824295010846, + "eval_allNLI-dev_euclidean_f1_threshold": 14.571252822875977, + "eval_allNLI-dev_euclidean_precision": 0.5138888888888888, + "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 233.3595428466797, + "eval_allNLI-dev_manhattan_ap": 0.6231183643819906, + "eval_allNLI-dev_manhattan_f1": 0.638477801268499, + "eval_allNLI-dev_manhattan_f1_threshold": 310.4957275390625, + "eval_allNLI-dev_manhattan_precision": 0.5033333333333333, + "eval_allNLI-dev_manhattan_recall": 0.8728323699421965, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 400.8938903808594, + "eval_allNLI-dev_max_ap": 0.6296916777262784, + "eval_allNLI-dev_max_f1": 0.6420824295010846, + "eval_allNLI-dev_max_f1_threshold": 343.65850830078125, + "eval_allNLI-dev_max_precision": 0.5138888888888888, + "eval_allNLI-dev_max_recall": 0.8728323699421965, + "eval_sequential_score": 0.7604047314276812, + "eval_sts-test_pearson_cosine": 0.8349821001879352, + "eval_sts-test_pearson_dot": 0.8001679116980415, + "eval_sts-test_pearson_euclidean": 0.8630253118943207, + "eval_sts-test_pearson_manhattan": 0.8625882800897832, + "eval_sts-test_pearson_max": 0.8630253118943207, + "eval_sts-test_spearman_cosine": 0.861910144672321, + "eval_sts-test_spearman_dot": 0.77834190765759, + "eval_sts-test_spearman_euclidean": 0.8576496455828143, + "eval_sts-test_spearman_manhattan": 0.8560976718377604, + "eval_sts-test_spearman_max": 0.861910144672321, + "eval_vitaminc-pairs_loss": 2.950286626815796, + "eval_vitaminc-pairs_runtime": 3.2157, + "eval_vitaminc-pairs_samples_per_second": 39.805, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 1300 + }, + { + "epoch": 1.337448559670782, + "eval_negation-triplets_loss": 1.0076651573181152, + "eval_negation-triplets_runtime": 0.7627, + "eval_negation-triplets_samples_per_second": 167.818, + "eval_negation-triplets_steps_per_second": 1.311, + "step": 1300 + }, + { + "epoch": 1.337448559670782, + "eval_scitail-pairs-pos_loss": 0.18644708395004272, + "eval_scitail-pairs-pos_runtime": 0.9162, + "eval_scitail-pairs-pos_samples_per_second": 139.706, + "eval_scitail-pairs-pos_steps_per_second": 1.091, + "step": 1300 + }, + { + "epoch": 1.337448559670782, + "eval_scitail-pairs-qa_loss": 0.0008939155377447605, + "eval_scitail-pairs-qa_runtime": 0.5994, + "eval_scitail-pairs-qa_samples_per_second": 213.553, + "eval_scitail-pairs-qa_steps_per_second": 1.668, + "step": 1300 + }, + { + "epoch": 1.337448559670782, + "eval_xsum-pairs_loss": 0.34629058837890625, + "eval_xsum-pairs_runtime": 3.0298, + "eval_xsum-pairs_samples_per_second": 42.247, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1300 + }, + { + "epoch": 1.337448559670782, + "eval_sciq_pairs_loss": 0.09945479035377502, + "eval_sciq_pairs_runtime": 3.5024, + "eval_sciq_pairs_samples_per_second": 36.546, + "eval_sciq_pairs_steps_per_second": 0.286, + "step": 1300 + }, + { + "epoch": 1.337448559670782, + "eval_qasc_pairs_loss": 0.21978069841861725, + "eval_qasc_pairs_runtime": 0.6157, + "eval_qasc_pairs_samples_per_second": 207.889, + "eval_qasc_pairs_steps_per_second": 1.624, + "step": 1300 + }, + { + "epoch": 1.337448559670782, + "eval_openbookqa_pairs_loss": 0.8806225061416626, + "eval_openbookqa_pairs_runtime": 0.5926, + "eval_openbookqa_pairs_samples_per_second": 216.006, + "eval_openbookqa_pairs_steps_per_second": 1.688, + "step": 1300 + }, + { + "epoch": 1.337448559670782, + "eval_msmarco_pairs_loss": 1.0302841663360596, + "eval_msmarco_pairs_runtime": 1.5274, + "eval_msmarco_pairs_samples_per_second": 83.802, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 1300 + }, + { + "epoch": 1.337448559670782, + "eval_nq_pairs_loss": 0.7833474278450012, + "eval_nq_pairs_runtime": 2.9026, + "eval_nq_pairs_samples_per_second": 44.098, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1300 + }, + { + "epoch": 1.337448559670782, + "eval_trivia_pairs_loss": 0.923805296421051, + "eval_trivia_pairs_runtime": 3.4429, + "eval_trivia_pairs_samples_per_second": 37.178, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1300 + }, + { + "epoch": 1.337448559670782, + "eval_gooaq_pairs_loss": 0.48123741149902344, + "eval_gooaq_pairs_runtime": 0.9544, + "eval_gooaq_pairs_samples_per_second": 134.112, + "eval_gooaq_pairs_steps_per_second": 1.048, + "step": 1300 + }, + { + "epoch": 1.337448559670782, + "eval_paws-pos_loss": 0.0238708034157753, + "eval_paws-pos_runtime": 0.7062, + "eval_paws-pos_samples_per_second": 181.245, + "eval_paws-pos_steps_per_second": 1.416, + "step": 1300 + }, + { + "epoch": 1.337448559670782, + "eval_global_dataset_loss": 0.4899948239326477, + "eval_global_dataset_runtime": 13.4006, + "eval_global_dataset_samples_per_second": 31.043, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1300 + }, + { + "epoch": 1.3384773662551441, + "grad_norm": 5.659729480743408, + "learning_rate": 3.393255619361798e-05, + "loss": 0.422, + "step": 1301 + }, + { + "epoch": 1.3395061728395061, + "grad_norm": 12.18124008178711, + "learning_rate": 3.3926273626371344e-05, + "loss": 0.982, + "step": 1302 + }, + { + "epoch": 1.3405349794238683, + "grad_norm": 11.858753204345703, + "learning_rate": 3.391997351661614e-05, + "loss": 0.8423, + "step": 1303 + }, + { + "epoch": 1.3415637860082303, + "grad_norm": 0.8034223318099976, + "learning_rate": 3.391365587478571e-05, + "loss": 0.0143, + "step": 1304 + }, + { + "epoch": 1.3425925925925926, + "grad_norm": 10.877967834472656, + "learning_rate": 3.3907320711342424e-05, + "loss": 0.8905, + "step": 1305 + }, + { + "epoch": 1.3436213991769548, + "grad_norm": 6.035357475280762, + "learning_rate": 3.390096803677767e-05, + "loss": 0.2113, + "step": 1306 + }, + { + "epoch": 1.344650205761317, + "grad_norm": 11.848456382751465, + "learning_rate": 3.389459786161184e-05, + "loss": 1.1551, + "step": 1307 + }, + { + "epoch": 1.345679012345679, + "grad_norm": 11.645739555358887, + "learning_rate": 3.38882101963943e-05, + "loss": 0.8173, + "step": 1308 + }, + { + "epoch": 1.3467078189300412, + "grad_norm": 1.066010594367981, + "learning_rate": 3.388180505170339e-05, + "loss": 0.0205, + "step": 1309 + }, + { + "epoch": 1.3477366255144032, + "grad_norm": 7.991639614105225, + "learning_rate": 3.387538243814639e-05, + "loss": 0.4653, + "step": 1310 + }, + { + "epoch": 1.3487654320987654, + "grad_norm": 8.536792755126953, + "learning_rate": 3.3868942366359516e-05, + "loss": 0.5243, + "step": 1311 + }, + { + "epoch": 1.3497942386831276, + "grad_norm": 12.100427627563477, + "learning_rate": 3.386248484700788e-05, + "loss": 0.9373, + "step": 1312 + }, + { + "epoch": 1.3508230452674896, + "grad_norm": 8.525440216064453, + "learning_rate": 3.385600989078551e-05, + "loss": 0.9276, + "step": 1313 + }, + { + "epoch": 1.3518518518518519, + "grad_norm": 7.700965404510498, + "learning_rate": 3.3849517508415306e-05, + "loss": 0.6381, + "step": 1314 + }, + { + "epoch": 1.3528806584362139, + "grad_norm": 8.957015037536621, + "learning_rate": 3.3843007710648995e-05, + "loss": 0.9841, + "step": 1315 + }, + { + "epoch": 1.353909465020576, + "grad_norm": 9.407490730285645, + "learning_rate": 3.38364805082672e-05, + "loss": 0.6191, + "step": 1316 + }, + { + "epoch": 1.3549382716049383, + "grad_norm": 8.58808708190918, + "learning_rate": 3.382993591207931e-05, + "loss": 0.4349, + "step": 1317 + }, + { + "epoch": 1.3559670781893005, + "grad_norm": 7.1622772216796875, + "learning_rate": 3.382337393292358e-05, + "loss": 0.3448, + "step": 1318 + }, + { + "epoch": 1.3569958847736625, + "grad_norm": 8.696556091308594, + "learning_rate": 3.3816794581666986e-05, + "loss": 0.5121, + "step": 1319 + }, + { + "epoch": 1.3580246913580247, + "grad_norm": 0.7031188607215881, + "learning_rate": 3.3810197869205324e-05, + "loss": 0.0379, + "step": 1320 + }, + { + "epoch": 1.3580246913580247, + "eval_Qnli-dev_cosine_accuracy": 0.71484375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7805460691452026, + "eval_Qnli-dev_cosine_ap": 0.7576269833878866, + "eval_Qnli-dev_cosine_f1": 0.7047244094488188, + "eval_Qnli-dev_cosine_f1_threshold": 0.7520368099212646, + "eval_Qnli-dev_cosine_precision": 0.6580882352941176, + "eval_Qnli-dev_cosine_recall": 0.7584745762711864, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 369.0809326171875, + "eval_Qnli-dev_dot_ap": 0.6915697235432918, + "eval_Qnli-dev_dot_f1": 0.6821963394342762, + "eval_Qnli-dev_dot_f1_threshold": 319.52972412109375, + "eval_Qnli-dev_dot_precision": 0.5616438356164384, + "eval_Qnli-dev_dot_recall": 0.8686440677966102, + "eval_Qnli-dev_euclidean_accuracy": 0.71875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.404098510742188, + "eval_Qnli-dev_euclidean_ap": 0.7639315840425045, + "eval_Qnli-dev_euclidean_f1": 0.700587084148728, + "eval_Qnli-dev_euclidean_f1_threshold": 15.238872528076172, + "eval_Qnli-dev_euclidean_precision": 0.6509090909090909, + "eval_Qnli-dev_euclidean_recall": 0.7584745762711864, + "eval_Qnli-dev_manhattan_accuracy": 0.716796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 286.43316650390625, + "eval_Qnli-dev_manhattan_ap": 0.7691725685119097, + "eval_Qnli-dev_manhattan_f1": 0.7054409005628519, + "eval_Qnli-dev_manhattan_f1_threshold": 325.53839111328125, + "eval_Qnli-dev_manhattan_precision": 0.632996632996633, + "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, + "eval_Qnli-dev_max_accuracy": 0.71875, + "eval_Qnli-dev_max_accuracy_threshold": 369.0809326171875, + "eval_Qnli-dev_max_ap": 0.7691725685119097, + "eval_Qnli-dev_max_f1": 0.7054409005628519, + "eval_Qnli-dev_max_f1_threshold": 325.53839111328125, + "eval_Qnli-dev_max_precision": 0.6580882352941176, + "eval_Qnli-dev_max_recall": 0.8686440677966102, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8764334321022034, + "eval_allNLI-dev_cosine_ap": 0.6243270151502973, + "eval_allNLI-dev_cosine_f1": 0.6389496717724288, + "eval_allNLI-dev_cosine_f1_threshold": 0.7704949975013733, + "eval_allNLI-dev_cosine_precision": 0.5140845070422535, + "eval_allNLI-dev_cosine_recall": 0.8439306358381503, + "eval_allNLI-dev_dot_accuracy": 0.697265625, + "eval_allNLI-dev_dot_accuracy_threshold": 401.2423095703125, + "eval_allNLI-dev_dot_ap": 0.546262957617912, + "eval_allNLI-dev_dot_f1": 0.6046511627906976, + "eval_allNLI-dev_dot_f1_threshold": 345.5875244140625, + "eval_allNLI-dev_dot_precision": 0.4766666666666667, + "eval_allNLI-dev_dot_recall": 0.8265895953757225, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.70079517364502, + "eval_allNLI-dev_euclidean_ap": 0.6336567588303856, + "eval_allNLI-dev_euclidean_f1": 0.6444906444906446, + "eval_allNLI-dev_euclidean_f1_threshold": 15.083576202392578, + "eval_allNLI-dev_euclidean_precision": 0.5032467532467533, + "eval_allNLI-dev_euclidean_recall": 0.8959537572254336, + "eval_allNLI-dev_manhattan_accuracy": 0.7265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 226.74481201171875, + "eval_allNLI-dev_manhattan_ap": 0.6259645089300835, + "eval_allNLI-dev_manhattan_f1": 0.644880174291939, + "eval_allNLI-dev_manhattan_f1_threshold": 305.73834228515625, + "eval_allNLI-dev_manhattan_precision": 0.5174825174825175, + "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 401.2423095703125, + "eval_allNLI-dev_max_ap": 0.6336567588303856, + "eval_allNLI-dev_max_f1": 0.644880174291939, + "eval_allNLI-dev_max_f1_threshold": 345.5875244140625, + "eval_allNLI-dev_max_precision": 0.5174825174825175, + "eval_allNLI-dev_max_recall": 0.8959537572254336, + "eval_sequential_score": 0.7691725685119097, + "eval_sts-test_pearson_cosine": 0.8295056997784466, + "eval_sts-test_pearson_dot": 0.8078381072629077, + "eval_sts-test_pearson_euclidean": 0.8609211328044077, + "eval_sts-test_pearson_manhattan": 0.8588787323326552, + "eval_sts-test_pearson_max": 0.8609211328044077, + "eval_sts-test_spearman_cosine": 0.8590697821118088, + "eval_sts-test_spearman_dot": 0.7903332319037223, + "eval_sts-test_spearman_euclidean": 0.8571045752190669, + "eval_sts-test_spearman_manhattan": 0.8545855267153452, + "eval_sts-test_spearman_max": 0.8590697821118088, + "eval_vitaminc-pairs_loss": 3.052609443664551, + "eval_vitaminc-pairs_runtime": 3.2081, + "eval_vitaminc-pairs_samples_per_second": 39.899, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 1320 + }, + { + "epoch": 1.3580246913580247, + "eval_negation-triplets_loss": 0.9799715280532837, + "eval_negation-triplets_runtime": 0.7767, + "eval_negation-triplets_samples_per_second": 164.795, + "eval_negation-triplets_steps_per_second": 1.287, + "step": 1320 + }, + { + "epoch": 1.3580246913580247, + "eval_scitail-pairs-pos_loss": 0.17218124866485596, + "eval_scitail-pairs-pos_runtime": 0.883, + "eval_scitail-pairs-pos_samples_per_second": 144.954, + "eval_scitail-pairs-pos_steps_per_second": 1.132, + "step": 1320 + }, + { + "epoch": 1.3580246913580247, + "eval_scitail-pairs-qa_loss": 0.0009102174080908298, + "eval_scitail-pairs-qa_runtime": 0.6143, + "eval_scitail-pairs-qa_samples_per_second": 208.372, + "eval_scitail-pairs-qa_steps_per_second": 1.628, + "step": 1320 + }, + { + "epoch": 1.3580246913580247, + "eval_xsum-pairs_loss": 0.33992800116539, + "eval_xsum-pairs_runtime": 3.0691, + "eval_xsum-pairs_samples_per_second": 41.706, + "eval_xsum-pairs_steps_per_second": 0.326, + "step": 1320 + }, + { + "epoch": 1.3580246913580247, + "eval_sciq_pairs_loss": 0.09722432494163513, + "eval_sciq_pairs_runtime": 3.5028, + "eval_sciq_pairs_samples_per_second": 36.542, + "eval_sciq_pairs_steps_per_second": 0.285, + "step": 1320 + }, + { + "epoch": 1.3580246913580247, + "eval_qasc_pairs_loss": 0.18059255182743073, + "eval_qasc_pairs_runtime": 0.6225, + "eval_qasc_pairs_samples_per_second": 205.626, + "eval_qasc_pairs_steps_per_second": 1.606, + "step": 1320 + }, + { + "epoch": 1.3580246913580247, + "eval_openbookqa_pairs_loss": 0.8701379895210266, + "eval_openbookqa_pairs_runtime": 0.5939, + "eval_openbookqa_pairs_samples_per_second": 215.541, + "eval_openbookqa_pairs_steps_per_second": 1.684, + "step": 1320 + }, + { + "epoch": 1.3580246913580247, + "eval_msmarco_pairs_loss": 1.001753807067871, + "eval_msmarco_pairs_runtime": 1.5244, + "eval_msmarco_pairs_samples_per_second": 83.967, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 1320 + }, + { + "epoch": 1.3580246913580247, + "eval_nq_pairs_loss": 0.7202290892601013, + "eval_nq_pairs_runtime": 2.9028, + "eval_nq_pairs_samples_per_second": 44.096, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1320 + }, + { + "epoch": 1.3580246913580247, + "eval_trivia_pairs_loss": 0.9316710829734802, + "eval_trivia_pairs_runtime": 3.4486, + "eval_trivia_pairs_samples_per_second": 37.117, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1320 + }, + { + "epoch": 1.3580246913580247, + "eval_gooaq_pairs_loss": 0.4748282730579376, + "eval_gooaq_pairs_runtime": 0.9538, + "eval_gooaq_pairs_samples_per_second": 134.196, + "eval_gooaq_pairs_steps_per_second": 1.048, + "step": 1320 + }, + { + "epoch": 1.3580246913580247, + "eval_paws-pos_loss": 0.022798927500844002, + "eval_paws-pos_runtime": 0.7059, + "eval_paws-pos_samples_per_second": 181.317, + "eval_paws-pos_steps_per_second": 1.417, + "step": 1320 + }, + { + "epoch": 1.3580246913580247, + "eval_global_dataset_loss": 0.5067123174667358, + "eval_global_dataset_runtime": 13.3957, + "eval_global_dataset_samples_per_second": 31.055, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1320 + }, + { + "epoch": 1.3590534979423867, + "grad_norm": 7.470611095428467, + "learning_rate": 3.380358380646312e-05, + "loss": 0.3647, + "step": 1321 + }, + { + "epoch": 1.360082304526749, + "grad_norm": 5.169586181640625, + "learning_rate": 3.379695240439363e-05, + "loss": 0.2223, + "step": 1322 + }, + { + "epoch": 1.3611111111111112, + "grad_norm": 12.399897575378418, + "learning_rate": 3.379030367397884e-05, + "loss": 0.9171, + "step": 1323 + }, + { + "epoch": 1.3621399176954734, + "grad_norm": 6.1529927253723145, + "learning_rate": 3.378363762622942e-05, + "loss": 0.2045, + "step": 1324 + }, + { + "epoch": 1.3631687242798354, + "grad_norm": 10.532297134399414, + "learning_rate": 3.3776954272184727e-05, + "loss": 0.7837, + "step": 1325 + }, + { + "epoch": 1.3641975308641976, + "grad_norm": 7.012182712554932, + "learning_rate": 3.377025362291276e-05, + "loss": 0.3129, + "step": 1326 + }, + { + "epoch": 1.3652263374485596, + "grad_norm": 5.788620948791504, + "learning_rate": 3.376353568951018e-05, + "loss": 0.2301, + "step": 1327 + }, + { + "epoch": 1.3662551440329218, + "grad_norm": 6.239854335784912, + "learning_rate": 3.375680048310228e-05, + "loss": 0.2602, + "step": 1328 + }, + { + "epoch": 1.367283950617284, + "grad_norm": 5.890448093414307, + "learning_rate": 3.375004801484292e-05, + "loss": 0.275, + "step": 1329 + }, + { + "epoch": 1.368312757201646, + "grad_norm": 5.364292621612549, + "learning_rate": 3.3743278295914585e-05, + "loss": 0.2825, + "step": 1330 + }, + { + "epoch": 1.3693415637860082, + "grad_norm": 9.858009338378906, + "learning_rate": 3.3736491337528304e-05, + "loss": 0.8946, + "step": 1331 + }, + { + "epoch": 1.3703703703703702, + "grad_norm": 5.880088806152344, + "learning_rate": 3.372968715092367e-05, + "loss": 0.2247, + "step": 1332 + }, + { + "epoch": 1.3713991769547325, + "grad_norm": 1.4969556331634521, + "learning_rate": 3.3722865747368794e-05, + "loss": 0.0365, + "step": 1333 + }, + { + "epoch": 1.3724279835390947, + "grad_norm": 6.889362812042236, + "learning_rate": 3.371602713816031e-05, + "loss": 0.3087, + "step": 1334 + }, + { + "epoch": 1.373456790123457, + "grad_norm": 12.018694877624512, + "learning_rate": 3.370917133462335e-05, + "loss": 0.7406, + "step": 1335 + }, + { + "epoch": 1.374485596707819, + "grad_norm": 8.535211563110352, + "learning_rate": 3.37022983481115e-05, + "loss": 0.6123, + "step": 1336 + }, + { + "epoch": 1.375514403292181, + "grad_norm": 6.665390491485596, + "learning_rate": 3.3695408190006815e-05, + "loss": 0.2442, + "step": 1337 + }, + { + "epoch": 1.376543209876543, + "grad_norm": 9.452113151550293, + "learning_rate": 3.36885008717198e-05, + "loss": 0.5433, + "step": 1338 + }, + { + "epoch": 1.3775720164609053, + "grad_norm": 15.978325843811035, + "learning_rate": 3.368157640468935e-05, + "loss": 1.1899, + "step": 1339 + }, + { + "epoch": 1.3786008230452675, + "grad_norm": 5.319306373596191, + "learning_rate": 3.367463480038278e-05, + "loss": 0.1734, + "step": 1340 + }, + { + "epoch": 1.3786008230452675, + "eval_Qnli-dev_cosine_accuracy": 0.705078125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7876126766204834, + "eval_Qnli-dev_cosine_ap": 0.7400085812314071, + "eval_Qnli-dev_cosine_f1": 0.6983050847457627, + "eval_Qnli-dev_cosine_f1_threshold": 0.7217756509780884, + "eval_Qnli-dev_cosine_precision": 0.5819209039548022, + "eval_Qnli-dev_cosine_recall": 0.8728813559322034, + "eval_Qnli-dev_dot_accuracy": 0.671875, + "eval_Qnli-dev_dot_accuracy_threshold": 369.82562255859375, + "eval_Qnli-dev_dot_ap": 0.6517294833598514, + "eval_Qnli-dev_dot_f1": 0.67008547008547, + "eval_Qnli-dev_dot_f1_threshold": 337.6015930175781, + "eval_Qnli-dev_dot_precision": 0.5616045845272206, + "eval_Qnli-dev_dot_recall": 0.8305084745762712, + "eval_Qnli-dev_euclidean_accuracy": 0.703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.102432250976562, + "eval_Qnli-dev_euclidean_ap": 0.7523164499043167, + "eval_Qnli-dev_euclidean_f1": 0.7030716723549488, + "eval_Qnli-dev_euclidean_f1_threshold": 16.046783447265625, + "eval_Qnli-dev_euclidean_precision": 0.5885714285714285, + "eval_Qnli-dev_euclidean_recall": 0.8728813559322034, + "eval_Qnli-dev_manhattan_accuracy": 0.705078125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 295.23388671875, + "eval_Qnli-dev_manhattan_ap": 0.7556733058427012, + "eval_Qnli-dev_manhattan_f1": 0.696095076400679, + "eval_Qnli-dev_manhattan_f1_threshold": 334.82318115234375, + "eval_Qnli-dev_manhattan_precision": 0.5807365439093485, + "eval_Qnli-dev_manhattan_recall": 0.8686440677966102, + "eval_Qnli-dev_max_accuracy": 0.705078125, + "eval_Qnli-dev_max_accuracy_threshold": 369.82562255859375, + "eval_Qnli-dev_max_ap": 0.7556733058427012, + "eval_Qnli-dev_max_f1": 0.7030716723549488, + "eval_Qnli-dev_max_f1_threshold": 337.6015930175781, + "eval_Qnli-dev_max_precision": 0.5885714285714285, + "eval_Qnli-dev_max_recall": 0.8728813559322034, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8959084749221802, + "eval_allNLI-dev_cosine_ap": 0.6252038048871644, + "eval_allNLI-dev_cosine_f1": 0.6382978723404256, + "eval_allNLI-dev_cosine_f1_threshold": 0.7897143959999084, + "eval_allNLI-dev_cosine_precision": 0.54, + "eval_allNLI-dev_cosine_recall": 0.7803468208092486, + "eval_allNLI-dev_dot_accuracy": 0.701171875, + "eval_allNLI-dev_dot_accuracy_threshold": 449.27313232421875, + "eval_allNLI-dev_dot_ap": 0.5613036858399412, + "eval_allNLI-dev_dot_f1": 0.602020202020202, + "eval_allNLI-dev_dot_f1_threshold": 347.0570068359375, + "eval_allNLI-dev_dot_precision": 0.46273291925465837, + "eval_allNLI-dev_dot_recall": 0.861271676300578, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.560067176818848, + "eval_allNLI-dev_euclidean_ap": 0.6314617061154307, + "eval_allNLI-dev_euclidean_f1": 0.6396396396396398, + "eval_allNLI-dev_euclidean_f1_threshold": 14.363271713256836, + "eval_allNLI-dev_euclidean_precision": 0.5239852398523985, + "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, + "eval_allNLI-dev_manhattan_accuracy": 0.73046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 220.5458221435547, + "eval_allNLI-dev_manhattan_ap": 0.6266918694412588, + "eval_allNLI-dev_manhattan_f1": 0.6486486486486487, + "eval_allNLI-dev_manhattan_f1_threshold": 300.31488037109375, + "eval_allNLI-dev_manhattan_precision": 0.5313653136531366, + "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 449.27313232421875, + "eval_allNLI-dev_max_ap": 0.6314617061154307, + "eval_allNLI-dev_max_f1": 0.6486486486486487, + "eval_allNLI-dev_max_f1_threshold": 347.0570068359375, + "eval_allNLI-dev_max_precision": 0.54, + "eval_allNLI-dev_max_recall": 0.861271676300578, + "eval_sequential_score": 0.7556733058427012, + "eval_sts-test_pearson_cosine": 0.8261417254823802, + "eval_sts-test_pearson_dot": 0.8012305316887692, + "eval_sts-test_pearson_euclidean": 0.8622507133070001, + "eval_sts-test_pearson_manhattan": 0.8618004189910506, + "eval_sts-test_pearson_max": 0.8622507133070001, + "eval_sts-test_spearman_cosine": 0.862160304223499, + "eval_sts-test_spearman_dot": 0.7936632627663793, + "eval_sts-test_spearman_euclidean": 0.8611724648795452, + "eval_sts-test_spearman_manhattan": 0.8590668285285571, + "eval_sts-test_spearman_max": 0.862160304223499, + "eval_vitaminc-pairs_loss": 3.161919593811035, + "eval_vitaminc-pairs_runtime": 3.2189, + "eval_vitaminc-pairs_samples_per_second": 39.765, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 1340 + }, + { + "epoch": 1.3786008230452675, + "eval_negation-triplets_loss": 0.9676703214645386, + "eval_negation-triplets_runtime": 0.7626, + "eval_negation-triplets_samples_per_second": 167.839, + "eval_negation-triplets_steps_per_second": 1.311, + "step": 1340 + }, + { + "epoch": 1.3786008230452675, + "eval_scitail-pairs-pos_loss": 0.1650688350200653, + "eval_scitail-pairs-pos_runtime": 0.8778, + "eval_scitail-pairs-pos_samples_per_second": 145.823, + "eval_scitail-pairs-pos_steps_per_second": 1.139, + "step": 1340 + }, + { + "epoch": 1.3786008230452675, + "eval_scitail-pairs-qa_loss": 0.001209335750900209, + "eval_scitail-pairs-qa_runtime": 0.6014, + "eval_scitail-pairs-qa_samples_per_second": 212.819, + "eval_scitail-pairs-qa_steps_per_second": 1.663, + "step": 1340 + }, + { + "epoch": 1.3786008230452675, + "eval_xsum-pairs_loss": 0.3014724850654602, + "eval_xsum-pairs_runtime": 3.0317, + "eval_xsum-pairs_samples_per_second": 42.221, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1340 + }, + { + "epoch": 1.3786008230452675, + "eval_sciq_pairs_loss": 0.09812074154615402, + "eval_sciq_pairs_runtime": 3.52, + "eval_sciq_pairs_samples_per_second": 36.364, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 1340 + }, + { + "epoch": 1.3786008230452675, + "eval_qasc_pairs_loss": 0.20444463193416595, + "eval_qasc_pairs_runtime": 0.6203, + "eval_qasc_pairs_samples_per_second": 206.355, + "eval_qasc_pairs_steps_per_second": 1.612, + "step": 1340 + }, + { + "epoch": 1.3786008230452675, + "eval_openbookqa_pairs_loss": 0.8844213485717773, + "eval_openbookqa_pairs_runtime": 0.5908, + "eval_openbookqa_pairs_samples_per_second": 216.639, + "eval_openbookqa_pairs_steps_per_second": 1.692, + "step": 1340 + }, + { + "epoch": 1.3786008230452675, + "eval_msmarco_pairs_loss": 0.9858245849609375, + "eval_msmarco_pairs_runtime": 1.5236, + "eval_msmarco_pairs_samples_per_second": 84.014, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 1340 + }, + { + "epoch": 1.3786008230452675, + "eval_nq_pairs_loss": 0.7741794586181641, + "eval_nq_pairs_runtime": 2.9359, + "eval_nq_pairs_samples_per_second": 43.598, + "eval_nq_pairs_steps_per_second": 0.341, + "step": 1340 + }, + { + "epoch": 1.3786008230452675, + "eval_trivia_pairs_loss": 0.7956993579864502, + "eval_trivia_pairs_runtime": 3.4425, + "eval_trivia_pairs_samples_per_second": 37.183, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1340 + }, + { + "epoch": 1.3786008230452675, + "eval_gooaq_pairs_loss": 0.40307921171188354, + "eval_gooaq_pairs_runtime": 0.9582, + "eval_gooaq_pairs_samples_per_second": 133.578, + "eval_gooaq_pairs_steps_per_second": 1.044, + "step": 1340 + }, + { + "epoch": 1.3786008230452675, + "eval_paws-pos_loss": 0.022803284227848053, + "eval_paws-pos_runtime": 0.7023, + "eval_paws-pos_samples_per_second": 182.25, + "eval_paws-pos_steps_per_second": 1.424, + "step": 1340 + }, + { + "epoch": 1.3786008230452675, + "eval_global_dataset_loss": 0.4967878758907318, + "eval_global_dataset_runtime": 13.3959, + "eval_global_dataset_samples_per_second": 31.054, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1340 + }, + { + "epoch": 1.3796296296296298, + "grad_norm": 12.965760231018066, + "learning_rate": 3.366767607029578e-05, + "loss": 1.0524, + "step": 1341 + }, + { + "epoch": 1.3806584362139918, + "grad_norm": 1.4375091791152954, + "learning_rate": 3.366070022595241e-05, + "loss": 0.0603, + "step": 1342 + }, + { + "epoch": 1.381687242798354, + "grad_norm": 8.675809860229492, + "learning_rate": 3.3653707278905044e-05, + "loss": 0.4451, + "step": 1343 + }, + { + "epoch": 1.382716049382716, + "grad_norm": 10.920755386352539, + "learning_rate": 3.364669724073441e-05, + "loss": 0.8211, + "step": 1344 + }, + { + "epoch": 1.3837448559670782, + "grad_norm": 0.6817147731781006, + "learning_rate": 3.3639670123049534e-05, + "loss": 0.022, + "step": 1345 + }, + { + "epoch": 1.3847736625514404, + "grad_norm": 8.8646240234375, + "learning_rate": 3.36326259374877e-05, + "loss": 0.4733, + "step": 1346 + }, + { + "epoch": 1.3858024691358024, + "grad_norm": 5.144080638885498, + "learning_rate": 3.3625564695714496e-05, + "loss": 0.2417, + "step": 1347 + }, + { + "epoch": 1.3868312757201646, + "grad_norm": 10.667274475097656, + "learning_rate": 3.3618486409423734e-05, + "loss": 0.7385, + "step": 1348 + }, + { + "epoch": 1.3878600823045266, + "grad_norm": 10.963183403015137, + "learning_rate": 3.3611391090337456e-05, + "loss": 0.7823, + "step": 1349 + }, + { + "epoch": 1.3888888888888888, + "grad_norm": 6.086643218994141, + "learning_rate": 3.3604278750205907e-05, + "loss": 0.2875, + "step": 1350 + }, + { + "epoch": 1.389917695473251, + "grad_norm": 10.556036949157715, + "learning_rate": 3.359714940080753e-05, + "loss": 0.8247, + "step": 1351 + }, + { + "epoch": 1.3909465020576133, + "grad_norm": 9.976085662841797, + "learning_rate": 3.359000305394893e-05, + "loss": 0.7735, + "step": 1352 + }, + { + "epoch": 1.3919753086419753, + "grad_norm": 0.6431965231895447, + "learning_rate": 3.3582839721464866e-05, + "loss": 0.0128, + "step": 1353 + }, + { + "epoch": 1.3930041152263375, + "grad_norm": 6.725648403167725, + "learning_rate": 3.3575659415218206e-05, + "loss": 0.2489, + "step": 1354 + }, + { + "epoch": 1.3940329218106995, + "grad_norm": 9.695779800415039, + "learning_rate": 3.356846214709997e-05, + "loss": 0.5558, + "step": 1355 + }, + { + "epoch": 1.3950617283950617, + "grad_norm": 15.12929916381836, + "learning_rate": 3.3561247929029215e-05, + "loss": 1.142, + "step": 1356 + }, + { + "epoch": 1.396090534979424, + "grad_norm": 13.447492599487305, + "learning_rate": 3.355401677295311e-05, + "loss": 1.2416, + "step": 1357 + }, + { + "epoch": 1.3971193415637861, + "grad_norm": 8.626240730285645, + "learning_rate": 3.354676869084685e-05, + "loss": 0.5202, + "step": 1358 + }, + { + "epoch": 1.3981481481481481, + "grad_norm": 6.097577095031738, + "learning_rate": 3.353950369471368e-05, + "loss": 0.1756, + "step": 1359 + }, + { + "epoch": 1.3991769547325104, + "grad_norm": 5.241819858551025, + "learning_rate": 3.3532221796584825e-05, + "loss": 0.2559, + "step": 1360 + }, + { + "epoch": 1.3991769547325104, + "eval_Qnli-dev_cosine_accuracy": 0.71484375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7863629460334778, + "eval_Qnli-dev_cosine_ap": 0.7520452722161024, + "eval_Qnli-dev_cosine_f1": 0.6998341625207297, + "eval_Qnli-dev_cosine_f1_threshold": 0.7090976238250732, + "eval_Qnli-dev_cosine_precision": 0.5749318801089919, + "eval_Qnli-dev_cosine_recall": 0.8940677966101694, + "eval_Qnli-dev_dot_accuracy": 0.658203125, + "eval_Qnli-dev_dot_accuracy_threshold": 366.3503723144531, + "eval_Qnli-dev_dot_ap": 0.6700128573840451, + "eval_Qnli-dev_dot_f1": 0.6814580031695722, + "eval_Qnli-dev_dot_f1_threshold": 316.33770751953125, + "eval_Qnli-dev_dot_precision": 0.5443037974683544, + "eval_Qnli-dev_dot_recall": 0.9110169491525424, + "eval_Qnli-dev_euclidean_accuracy": 0.716796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.179301261901855, + "eval_Qnli-dev_euclidean_ap": 0.7627701173135915, + "eval_Qnli-dev_euclidean_f1": 0.7054545454545453, + "eval_Qnli-dev_euclidean_f1_threshold": 15.506027221679688, + "eval_Qnli-dev_euclidean_precision": 0.6178343949044586, + "eval_Qnli-dev_euclidean_recall": 0.8220338983050848, + "eval_Qnli-dev_manhattan_accuracy": 0.71875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 295.79803466796875, + "eval_Qnli-dev_manhattan_ap": 0.7679690415434057, + "eval_Qnli-dev_manhattan_f1": 0.7093235831809872, + "eval_Qnli-dev_manhattan_f1_threshold": 323.1705322265625, + "eval_Qnli-dev_manhattan_precision": 0.6237942122186495, + "eval_Qnli-dev_manhattan_recall": 0.8220338983050848, + "eval_Qnli-dev_max_accuracy": 0.71875, + "eval_Qnli-dev_max_accuracy_threshold": 366.3503723144531, + "eval_Qnli-dev_max_ap": 0.7679690415434057, + "eval_Qnli-dev_max_f1": 0.7093235831809872, + "eval_Qnli-dev_max_f1_threshold": 323.1705322265625, + "eval_Qnli-dev_max_precision": 0.6237942122186495, + "eval_Qnli-dev_max_recall": 0.9110169491525424, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8775778412818909, + "eval_allNLI-dev_cosine_ap": 0.6233735118641757, + "eval_allNLI-dev_cosine_f1": 0.6327433628318583, + "eval_allNLI-dev_cosine_f1_threshold": 0.7827239036560059, + "eval_allNLI-dev_cosine_precision": 0.5125448028673835, + "eval_allNLI-dev_cosine_recall": 0.8265895953757225, + "eval_allNLI-dev_dot_accuracy": 0.697265625, + "eval_allNLI-dev_dot_accuracy_threshold": 436.905517578125, + "eval_allNLI-dev_dot_ap": 0.5591141127494976, + "eval_allNLI-dev_dot_f1": 0.6085192697768763, + "eval_allNLI-dev_dot_f1_threshold": 348.33587646484375, + "eval_allNLI-dev_dot_precision": 0.46875, + "eval_allNLI-dev_dot_recall": 0.8670520231213873, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.58004379272461, + "eval_allNLI-dev_euclidean_ap": 0.62762836308513, + "eval_allNLI-dev_euclidean_f1": 0.6412556053811659, + "eval_allNLI-dev_euclidean_f1_threshold": 14.265384674072266, + "eval_allNLI-dev_euclidean_precision": 0.5238095238095238, + "eval_allNLI-dev_euclidean_recall": 0.8265895953757225, + "eval_allNLI-dev_manhattan_accuracy": 0.736328125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 229.06759643554688, + "eval_allNLI-dev_manhattan_ap": 0.6243122484228764, + "eval_allNLI-dev_manhattan_f1": 0.6426966292134833, + "eval_allNLI-dev_manhattan_f1_threshold": 297.4107666015625, + "eval_allNLI-dev_manhattan_precision": 0.5257352941176471, + "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 436.905517578125, + "eval_allNLI-dev_max_ap": 0.62762836308513, + "eval_allNLI-dev_max_f1": 0.6426966292134833, + "eval_allNLI-dev_max_f1_threshold": 348.33587646484375, + "eval_allNLI-dev_max_precision": 0.5257352941176471, + "eval_allNLI-dev_max_recall": 0.8670520231213873, + "eval_sequential_score": 0.7679690415434057, + "eval_sts-test_pearson_cosine": 0.83738764698893, + "eval_sts-test_pearson_dot": 0.8183567007456142, + "eval_sts-test_pearson_euclidean": 0.866690575863861, + "eval_sts-test_pearson_manhattan": 0.8654764568761573, + "eval_sts-test_pearson_max": 0.866690575863861, + "eval_sts-test_spearman_cosine": 0.8645791546529655, + "eval_sts-test_spearman_dot": 0.8141177665618077, + "eval_sts-test_spearman_euclidean": 0.8612966496299064, + "eval_sts-test_spearman_manhattan": 0.8601330273312072, + "eval_sts-test_spearman_max": 0.8645791546529655, + "eval_vitaminc-pairs_loss": 3.062483549118042, + "eval_vitaminc-pairs_runtime": 3.209, + "eval_vitaminc-pairs_samples_per_second": 39.888, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 1360 + }, + { + "epoch": 1.3991769547325104, + "eval_negation-triplets_loss": 0.9274396300315857, + "eval_negation-triplets_runtime": 0.786, + "eval_negation-triplets_samples_per_second": 162.845, + "eval_negation-triplets_steps_per_second": 1.272, + "step": 1360 + }, + { + "epoch": 1.3991769547325104, + "eval_scitail-pairs-pos_loss": 0.16325777769088745, + "eval_scitail-pairs-pos_runtime": 0.8967, + "eval_scitail-pairs-pos_samples_per_second": 142.748, + "eval_scitail-pairs-pos_steps_per_second": 1.115, + "step": 1360 + }, + { + "epoch": 1.3991769547325104, + "eval_scitail-pairs-qa_loss": 0.0012430674396455288, + "eval_scitail-pairs-qa_runtime": 0.5982, + "eval_scitail-pairs-qa_samples_per_second": 213.981, + "eval_scitail-pairs-qa_steps_per_second": 1.672, + "step": 1360 + }, + { + "epoch": 1.3991769547325104, + "eval_xsum-pairs_loss": 0.30085036158561707, + "eval_xsum-pairs_runtime": 3.0286, + "eval_xsum-pairs_samples_per_second": 42.264, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1360 + }, + { + "epoch": 1.3991769547325104, + "eval_sciq_pairs_loss": 0.08908446878194809, + "eval_sciq_pairs_runtime": 3.4784, + "eval_sciq_pairs_samples_per_second": 36.798, + "eval_sciq_pairs_steps_per_second": 0.287, + "step": 1360 + }, + { + "epoch": 1.3991769547325104, + "eval_qasc_pairs_loss": 0.21207481622695923, + "eval_qasc_pairs_runtime": 0.6196, + "eval_qasc_pairs_samples_per_second": 206.589, + "eval_qasc_pairs_steps_per_second": 1.614, + "step": 1360 + }, + { + "epoch": 1.3991769547325104, + "eval_openbookqa_pairs_loss": 0.8421127200126648, + "eval_openbookqa_pairs_runtime": 0.5888, + "eval_openbookqa_pairs_samples_per_second": 217.392, + "eval_openbookqa_pairs_steps_per_second": 1.698, + "step": 1360 + }, + { + "epoch": 1.3991769547325104, + "eval_msmarco_pairs_loss": 0.9122468829154968, + "eval_msmarco_pairs_runtime": 1.526, + "eval_msmarco_pairs_samples_per_second": 83.878, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 1360 + }, + { + "epoch": 1.3991769547325104, + "eval_nq_pairs_loss": 0.7917957305908203, + "eval_nq_pairs_runtime": 2.9019, + "eval_nq_pairs_samples_per_second": 44.109, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1360 + }, + { + "epoch": 1.3991769547325104, + "eval_trivia_pairs_loss": 0.8618249893188477, + "eval_trivia_pairs_runtime": 3.4539, + "eval_trivia_pairs_samples_per_second": 37.059, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1360 + }, + { + "epoch": 1.3991769547325104, + "eval_gooaq_pairs_loss": 0.42768269777297974, + "eval_gooaq_pairs_runtime": 0.9557, + "eval_gooaq_pairs_samples_per_second": 133.939, + "eval_gooaq_pairs_steps_per_second": 1.046, + "step": 1360 + }, + { + "epoch": 1.3991769547325104, + "eval_paws-pos_loss": 0.02307211607694626, + "eval_paws-pos_runtime": 0.699, + "eval_paws-pos_samples_per_second": 183.109, + "eval_paws-pos_steps_per_second": 1.431, + "step": 1360 + }, + { + "epoch": 1.3991769547325104, + "eval_global_dataset_loss": 0.4745106101036072, + "eval_global_dataset_runtime": 13.4196, + "eval_global_dataset_samples_per_second": 30.999, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1360 + }, + { + "epoch": 1.4002057613168724, + "grad_norm": 1.443122148513794, + "learning_rate": 3.352492300851954e-05, + "loss": 0.0722, + "step": 1361 + }, + { + "epoch": 1.4012345679012346, + "grad_norm": 8.220876693725586, + "learning_rate": 3.351760734260503e-05, + "loss": 0.5022, + "step": 1362 + }, + { + "epoch": 1.4022633744855968, + "grad_norm": 9.237991333007812, + "learning_rate": 3.3510274810956444e-05, + "loss": 0.7064, + "step": 1363 + }, + { + "epoch": 1.4032921810699588, + "grad_norm": 6.430814266204834, + "learning_rate": 3.350292542571687e-05, + "loss": 0.3364, + "step": 1364 + }, + { + "epoch": 1.404320987654321, + "grad_norm": 10.146993637084961, + "learning_rate": 3.349555919905731e-05, + "loss": 0.7776, + "step": 1365 + }, + { + "epoch": 1.405349794238683, + "grad_norm": 0.9642751812934875, + "learning_rate": 3.3488176143176646e-05, + "loss": 0.0347, + "step": 1366 + }, + { + "epoch": 1.4063786008230452, + "grad_norm": 8.7808256149292, + "learning_rate": 3.3480776270301645e-05, + "loss": 0.4706, + "step": 1367 + }, + { + "epoch": 1.4074074074074074, + "grad_norm": 5.345498085021973, + "learning_rate": 3.34733595926869e-05, + "loss": 0.187, + "step": 1368 + }, + { + "epoch": 1.4084362139917697, + "grad_norm": 10.110101699829102, + "learning_rate": 3.346592612261487e-05, + "loss": 0.5993, + "step": 1369 + }, + { + "epoch": 1.4094650205761317, + "grad_norm": 8.079977035522461, + "learning_rate": 3.345847587239579e-05, + "loss": 0.6859, + "step": 1370 + }, + { + "epoch": 1.4104938271604939, + "grad_norm": 13.22767162322998, + "learning_rate": 3.345100885436769e-05, + "loss": 0.9238, + "step": 1371 + }, + { + "epoch": 1.4115226337448559, + "grad_norm": 5.358603000640869, + "learning_rate": 3.3443525080896384e-05, + "loss": 0.1511, + "step": 1372 + }, + { + "epoch": 1.412551440329218, + "grad_norm": 8.831647872924805, + "learning_rate": 3.343602456437542e-05, + "loss": 0.6568, + "step": 1373 + }, + { + "epoch": 1.4135802469135803, + "grad_norm": 6.89711332321167, + "learning_rate": 3.342850731722608e-05, + "loss": 0.3368, + "step": 1374 + }, + { + "epoch": 1.4146090534979423, + "grad_norm": 8.110711097717285, + "learning_rate": 3.3420973351897355e-05, + "loss": 0.5127, + "step": 1375 + }, + { + "epoch": 1.4156378600823045, + "grad_norm": 0.5851256251335144, + "learning_rate": 3.3413422680865914e-05, + "loss": 0.0235, + "step": 1376 + }, + { + "epoch": 1.4166666666666667, + "grad_norm": 7.06640100479126, + "learning_rate": 3.340585531663609e-05, + "loss": 0.3398, + "step": 1377 + }, + { + "epoch": 1.4176954732510287, + "grad_norm": 10.556662559509277, + "learning_rate": 3.3398271271739875e-05, + "loss": 0.7283, + "step": 1378 + }, + { + "epoch": 1.418724279835391, + "grad_norm": 13.46735954284668, + "learning_rate": 3.339067055873687e-05, + "loss": 1.0423, + "step": 1379 + }, + { + "epoch": 1.4197530864197532, + "grad_norm": 5.427450180053711, + "learning_rate": 3.3383053190214286e-05, + "loss": 0.2566, + "step": 1380 + }, + { + "epoch": 1.4197530864197532, + "eval_Qnli-dev_cosine_accuracy": 0.69921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8063021302223206, + "eval_Qnli-dev_cosine_ap": 0.74411759194727, + "eval_Qnli-dev_cosine_f1": 0.69140625, + "eval_Qnli-dev_cosine_f1_threshold": 0.7554892897605896, + "eval_Qnli-dev_cosine_precision": 0.6413043478260869, + "eval_Qnli-dev_cosine_recall": 0.75, + "eval_Qnli-dev_dot_accuracy": 0.654296875, + "eval_Qnli-dev_dot_accuracy_threshold": 381.40008544921875, + "eval_Qnli-dev_dot_ap": 0.671022496389324, + "eval_Qnli-dev_dot_f1": 0.6676970633693973, + "eval_Qnli-dev_dot_f1_threshold": 306.57379150390625, + "eval_Qnli-dev_dot_precision": 0.5255474452554745, + "eval_Qnli-dev_dot_recall": 0.9152542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.69921875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.237188339233398, + "eval_Qnli-dev_euclidean_ap": 0.7538181870941214, + "eval_Qnli-dev_euclidean_f1": 0.6978557504873294, + "eval_Qnli-dev_euclidean_f1_threshold": 15.234086990356445, + "eval_Qnli-dev_euclidean_precision": 0.6462093862815884, + "eval_Qnli-dev_euclidean_recall": 0.7584745762711864, + "eval_Qnli-dev_manhattan_accuracy": 0.708984375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 286.4082336425781, + "eval_Qnli-dev_manhattan_ap": 0.7616482458252489, + "eval_Qnli-dev_manhattan_f1": 0.700374531835206, + "eval_Qnli-dev_manhattan_f1_threshold": 324.758544921875, + "eval_Qnli-dev_manhattan_precision": 0.62751677852349, + "eval_Qnli-dev_manhattan_recall": 0.7923728813559322, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 381.40008544921875, + "eval_Qnli-dev_max_ap": 0.7616482458252489, + "eval_Qnli-dev_max_f1": 0.700374531835206, + "eval_Qnli-dev_max_f1_threshold": 324.758544921875, + "eval_Qnli-dev_max_precision": 0.6462093862815884, + "eval_Qnli-dev_max_recall": 0.9152542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.734375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8630719184875488, + "eval_allNLI-dev_cosine_ap": 0.6272275331563291, + "eval_allNLI-dev_cosine_f1": 0.6347031963470319, + "eval_allNLI-dev_cosine_f1_threshold": 0.773266077041626, + "eval_allNLI-dev_cosine_precision": 0.5245283018867924, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 395.307861328125, + "eval_allNLI-dev_dot_ap": 0.563263501967994, + "eval_allNLI-dev_dot_f1": 0.6070686070686071, + "eval_allNLI-dev_dot_f1_threshold": 346.6265869140625, + "eval_allNLI-dev_dot_precision": 0.474025974025974, + "eval_allNLI-dev_dot_recall": 0.8439306358381503, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.851537704467773, + "eval_allNLI-dev_euclidean_ap": 0.6326968806302966, + "eval_allNLI-dev_euclidean_f1": 0.6485260770975058, + "eval_allNLI-dev_euclidean_f1_threshold": 14.771903991699219, + "eval_allNLI-dev_euclidean_precision": 0.5335820895522388, + "eval_allNLI-dev_euclidean_recall": 0.8265895953757225, + "eval_allNLI-dev_manhattan_accuracy": 0.728515625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 231.5478515625, + "eval_allNLI-dev_manhattan_ap": 0.6294888875849509, + "eval_allNLI-dev_manhattan_f1": 0.6382022471910113, + "eval_allNLI-dev_manhattan_f1_threshold": 309.86102294921875, + "eval_allNLI-dev_manhattan_precision": 0.5220588235294118, + "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 395.307861328125, + "eval_allNLI-dev_max_ap": 0.6326968806302966, + "eval_allNLI-dev_max_f1": 0.6485260770975058, + "eval_allNLI-dev_max_f1_threshold": 346.6265869140625, + "eval_allNLI-dev_max_precision": 0.5335820895522388, + "eval_allNLI-dev_max_recall": 0.8439306358381503, + "eval_sequential_score": 0.7616482458252489, + "eval_sts-test_pearson_cosine": 0.834879144813166, + "eval_sts-test_pearson_dot": 0.8011799964530804, + "eval_sts-test_pearson_euclidean": 0.8686352715928541, + "eval_sts-test_pearson_manhattan": 0.8678638281425739, + "eval_sts-test_pearson_max": 0.8686352715928541, + "eval_sts-test_spearman_cosine": 0.8662917854264144, + "eval_sts-test_spearman_dot": 0.7925740170641125, + "eval_sts-test_spearman_euclidean": 0.8658375601232916, + "eval_sts-test_spearman_manhattan": 0.8650538760338053, + "eval_sts-test_spearman_max": 0.8662917854264144, + "eval_vitaminc-pairs_loss": 3.0536015033721924, + "eval_vitaminc-pairs_runtime": 3.2066, + "eval_vitaminc-pairs_samples_per_second": 39.918, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 1380 + }, + { + "epoch": 1.4197530864197532, + "eval_negation-triplets_loss": 0.9620550870895386, + "eval_negation-triplets_runtime": 0.7623, + "eval_negation-triplets_samples_per_second": 167.916, + "eval_negation-triplets_steps_per_second": 1.312, + "step": 1380 + }, + { + "epoch": 1.4197530864197532, + "eval_scitail-pairs-pos_loss": 0.16859027743339539, + "eval_scitail-pairs-pos_runtime": 0.8698, + "eval_scitail-pairs-pos_samples_per_second": 147.167, + "eval_scitail-pairs-pos_steps_per_second": 1.15, + "step": 1380 + }, + { + "epoch": 1.4197530864197532, + "eval_scitail-pairs-qa_loss": 0.001436021295376122, + "eval_scitail-pairs-qa_runtime": 0.5959, + "eval_scitail-pairs-qa_samples_per_second": 214.79, + "eval_scitail-pairs-qa_steps_per_second": 1.678, + "step": 1380 + }, + { + "epoch": 1.4197530864197532, + "eval_xsum-pairs_loss": 0.3317752480506897, + "eval_xsum-pairs_runtime": 3.0275, + "eval_xsum-pairs_samples_per_second": 42.28, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1380 + }, + { + "epoch": 1.4197530864197532, + "eval_sciq_pairs_loss": 0.0831882655620575, + "eval_sciq_pairs_runtime": 3.4867, + "eval_sciq_pairs_samples_per_second": 36.711, + "eval_sciq_pairs_steps_per_second": 0.287, + "step": 1380 + }, + { + "epoch": 1.4197530864197532, + "eval_qasc_pairs_loss": 0.19361478090286255, + "eval_qasc_pairs_runtime": 0.6177, + "eval_qasc_pairs_samples_per_second": 207.225, + "eval_qasc_pairs_steps_per_second": 1.619, + "step": 1380 + }, + { + "epoch": 1.4197530864197532, + "eval_openbookqa_pairs_loss": 0.838322103023529, + "eval_openbookqa_pairs_runtime": 0.5946, + "eval_openbookqa_pairs_samples_per_second": 215.255, + "eval_openbookqa_pairs_steps_per_second": 1.682, + "step": 1380 + }, + { + "epoch": 1.4197530864197532, + "eval_msmarco_pairs_loss": 0.9363899827003479, + "eval_msmarco_pairs_runtime": 1.5285, + "eval_msmarco_pairs_samples_per_second": 83.741, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 1380 + }, + { + "epoch": 1.4197530864197532, + "eval_nq_pairs_loss": 0.8688430190086365, + "eval_nq_pairs_runtime": 2.9031, + "eval_nq_pairs_samples_per_second": 44.092, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1380 + }, + { + "epoch": 1.4197530864197532, + "eval_trivia_pairs_loss": 0.993528425693512, + "eval_trivia_pairs_runtime": 3.4413, + "eval_trivia_pairs_samples_per_second": 37.195, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1380 + }, + { + "epoch": 1.4197530864197532, + "eval_gooaq_pairs_loss": 0.4363172650337219, + "eval_gooaq_pairs_runtime": 0.9587, + "eval_gooaq_pairs_samples_per_second": 133.51, + "eval_gooaq_pairs_steps_per_second": 1.043, + "step": 1380 + }, + { + "epoch": 1.4197530864197532, + "eval_paws-pos_loss": 0.02160307578742504, + "eval_paws-pos_runtime": 0.7183, + "eval_paws-pos_samples_per_second": 178.209, + "eval_paws-pos_steps_per_second": 1.392, + "step": 1380 + }, + { + "epoch": 1.4197530864197532, + "eval_global_dataset_loss": 0.5103635191917419, + "eval_global_dataset_runtime": 13.3994, + "eval_global_dataset_samples_per_second": 31.046, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1380 + }, + { + "epoch": 1.4207818930041152, + "grad_norm": 6.8490495681762695, + "learning_rate": 3.337541917878692e-05, + "loss": 0.2802, + "step": 1381 + }, + { + "epoch": 1.4218106995884774, + "grad_norm": 7.549937725067139, + "learning_rate": 3.3367768537097136e-05, + "loss": 0.3093, + "step": 1382 + }, + { + "epoch": 1.4228395061728394, + "grad_norm": 10.411901473999023, + "learning_rate": 3.336010127781481e-05, + "loss": 0.8066, + "step": 1383 + }, + { + "epoch": 1.4238683127572016, + "grad_norm": 7.990019798278809, + "learning_rate": 3.335241741363736e-05, + "loss": 0.3886, + "step": 1384 + }, + { + "epoch": 1.4248971193415638, + "grad_norm": 5.726963996887207, + "learning_rate": 3.334471695728972e-05, + "loss": 0.2353, + "step": 1385 + }, + { + "epoch": 1.425925925925926, + "grad_norm": 7.145617485046387, + "learning_rate": 3.333699992152426e-05, + "loss": 0.4184, + "step": 1386 + }, + { + "epoch": 1.426954732510288, + "grad_norm": 10.926759719848633, + "learning_rate": 3.3329266319120835e-05, + "loss": 0.7746, + "step": 1387 + }, + { + "epoch": 1.4279835390946503, + "grad_norm": 12.381770133972168, + "learning_rate": 3.332151616288673e-05, + "loss": 1.0693, + "step": 1388 + }, + { + "epoch": 1.4290123456790123, + "grad_norm": 1.119705080986023, + "learning_rate": 3.331374946565665e-05, + "loss": 0.02, + "step": 1389 + }, + { + "epoch": 1.4300411522633745, + "grad_norm": 7.537491798400879, + "learning_rate": 3.3305966240292666e-05, + "loss": 0.2794, + "step": 1390 + }, + { + "epoch": 1.4310699588477367, + "grad_norm": 0.732020914554596, + "learning_rate": 3.329816649968425e-05, + "loss": 0.0115, + "step": 1391 + }, + { + "epoch": 1.4320987654320987, + "grad_norm": 7.852914333343506, + "learning_rate": 3.329035025674822e-05, + "loss": 0.3645, + "step": 1392 + }, + { + "epoch": 1.433127572016461, + "grad_norm": 6.324718952178955, + "learning_rate": 3.3282517524428704e-05, + "loss": 0.2254, + "step": 1393 + }, + { + "epoch": 1.4341563786008231, + "grad_norm": 7.247324466705322, + "learning_rate": 3.327466831569716e-05, + "loss": 0.2813, + "step": 1394 + }, + { + "epoch": 1.4351851851851851, + "grad_norm": 12.112274169921875, + "learning_rate": 3.3266802643552327e-05, + "loss": 0.8248, + "step": 1395 + }, + { + "epoch": 1.4362139917695473, + "grad_norm": 0.8138561844825745, + "learning_rate": 3.325892052102018e-05, + "loss": 0.0158, + "step": 1396 + }, + { + "epoch": 1.4372427983539096, + "grad_norm": 7.126984596252441, + "learning_rate": 3.325102196115398e-05, + "loss": 0.285, + "step": 1397 + }, + { + "epoch": 1.4382716049382716, + "grad_norm": 9.047616958618164, + "learning_rate": 3.3243106977034185e-05, + "loss": 0.5326, + "step": 1398 + }, + { + "epoch": 1.4393004115226338, + "grad_norm": 11.624143600463867, + "learning_rate": 3.323517558176846e-05, + "loss": 0.781, + "step": 1399 + }, + { + "epoch": 1.4403292181069958, + "grad_norm": 6.335608005523682, + "learning_rate": 3.3227227788491634e-05, + "loss": 0.261, + "step": 1400 + }, + { + "epoch": 1.4403292181069958, + "eval_Qnli-dev_cosine_accuracy": 0.7109375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7974846363067627, + "eval_Qnli-dev_cosine_ap": 0.7523113514409753, + "eval_Qnli-dev_cosine_f1": 0.6984126984126984, + "eval_Qnli-dev_cosine_f1_threshold": 0.7209751605987549, + "eval_Qnli-dev_cosine_precision": 0.5981873111782477, + "eval_Qnli-dev_cosine_recall": 0.8389830508474576, + "eval_Qnli-dev_dot_accuracy": 0.681640625, + "eval_Qnli-dev_dot_accuracy_threshold": 381.9517517089844, + "eval_Qnli-dev_dot_ap": 0.6812870817491152, + "eval_Qnli-dev_dot_f1": 0.6745762711864407, + "eval_Qnli-dev_dot_f1_threshold": 325.1109619140625, + "eval_Qnli-dev_dot_precision": 0.5621468926553672, + "eval_Qnli-dev_dot_recall": 0.8432203389830508, + "eval_Qnli-dev_euclidean_accuracy": 0.7109375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.50356674194336, + "eval_Qnli-dev_euclidean_ap": 0.7619644519510701, + "eval_Qnli-dev_euclidean_f1": 0.6952054794520549, + "eval_Qnli-dev_euclidean_f1_threshold": 16.413734436035156, + "eval_Qnli-dev_euclidean_precision": 0.5833333333333334, + "eval_Qnli-dev_euclidean_recall": 0.8601694915254238, + "eval_Qnli-dev_manhattan_accuracy": 0.720703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 291.779052734375, + "eval_Qnli-dev_manhattan_ap": 0.767528548647967, + "eval_Qnli-dev_manhattan_f1": 0.7100175746924429, + "eval_Qnli-dev_manhattan_f1_threshold": 337.9719543457031, + "eval_Qnli-dev_manhattan_precision": 0.6066066066066066, + "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, + "eval_Qnli-dev_max_accuracy": 0.720703125, + "eval_Qnli-dev_max_accuracy_threshold": 381.9517517089844, + "eval_Qnli-dev_max_ap": 0.767528548647967, + "eval_Qnli-dev_max_f1": 0.7100175746924429, + "eval_Qnli-dev_max_f1_threshold": 337.9719543457031, + "eval_Qnli-dev_max_precision": 0.6066066066066066, + "eval_Qnli-dev_max_recall": 0.8601694915254238, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8541853427886963, + "eval_allNLI-dev_cosine_ap": 0.6255349351155984, + "eval_allNLI-dev_cosine_f1": 0.6448598130841121, + "eval_allNLI-dev_cosine_f1_threshold": 0.7745788097381592, + "eval_allNLI-dev_cosine_precision": 0.5411764705882353, + "eval_allNLI-dev_cosine_recall": 0.7976878612716763, + "eval_allNLI-dev_dot_accuracy": 0.697265625, + "eval_allNLI-dev_dot_accuracy_threshold": 385.14898681640625, + "eval_allNLI-dev_dot_ap": 0.553538131502168, + "eval_allNLI-dev_dot_f1": 0.6029411764705882, + "eval_allNLI-dev_dot_f1_threshold": 357.1886901855469, + "eval_allNLI-dev_dot_precision": 0.5234042553191489, + "eval_allNLI-dev_dot_recall": 0.7109826589595376, + "eval_allNLI-dev_euclidean_accuracy": 0.732421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.644266128540039, + "eval_allNLI-dev_euclidean_ap": 0.632827860323889, + "eval_allNLI-dev_euclidean_f1": 0.6467889908256881, + "eval_allNLI-dev_euclidean_f1_threshold": 14.465250015258789, + "eval_allNLI-dev_euclidean_precision": 0.5361216730038023, + "eval_allNLI-dev_euclidean_recall": 0.815028901734104, + "eval_allNLI-dev_manhattan_accuracy": 0.73828125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 246.18682861328125, + "eval_allNLI-dev_manhattan_ap": 0.6280673121543834, + "eval_allNLI-dev_manhattan_f1": 0.6416861826697893, + "eval_allNLI-dev_manhattan_f1_threshold": 298.9549255371094, + "eval_allNLI-dev_manhattan_precision": 0.5393700787401575, + "eval_allNLI-dev_manhattan_recall": 0.791907514450867, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 385.14898681640625, + "eval_allNLI-dev_max_ap": 0.632827860323889, + "eval_allNLI-dev_max_f1": 0.6467889908256881, + "eval_allNLI-dev_max_f1_threshold": 357.1886901855469, + "eval_allNLI-dev_max_precision": 0.5411764705882353, + "eval_allNLI-dev_max_recall": 0.815028901734104, + "eval_sequential_score": 0.767528548647967, + "eval_sts-test_pearson_cosine": 0.8294398782719722, + "eval_sts-test_pearson_dot": 0.806709569277571, + "eval_sts-test_pearson_euclidean": 0.8635655114321077, + "eval_sts-test_pearson_manhattan": 0.8619051829676366, + "eval_sts-test_pearson_max": 0.8635655114321077, + "eval_sts-test_spearman_cosine": 0.863560929202527, + "eval_sts-test_spearman_dot": 0.8028960745090844, + "eval_sts-test_spearman_euclidean": 0.8622703923265218, + "eval_sts-test_spearman_manhattan": 0.860564160983452, + "eval_sts-test_spearman_max": 0.863560929202527, + "eval_vitaminc-pairs_loss": 3.102480173110962, + "eval_vitaminc-pairs_runtime": 3.5959, + "eval_vitaminc-pairs_samples_per_second": 35.596, + "eval_vitaminc-pairs_steps_per_second": 0.278, + "step": 1400 + }, + { + "epoch": 1.4403292181069958, + "eval_negation-triplets_loss": 0.9550673365592957, + "eval_negation-triplets_runtime": 0.7685, + "eval_negation-triplets_samples_per_second": 166.547, + "eval_negation-triplets_steps_per_second": 1.301, + "step": 1400 + }, + { + "epoch": 1.4403292181069958, + "eval_scitail-pairs-pos_loss": 0.18612991273403168, + "eval_scitail-pairs-pos_runtime": 0.8883, + "eval_scitail-pairs-pos_samples_per_second": 144.089, + "eval_scitail-pairs-pos_steps_per_second": 1.126, + "step": 1400 + }, + { + "epoch": 1.4403292181069958, + "eval_scitail-pairs-qa_loss": 0.0008277587476186454, + "eval_scitail-pairs-qa_runtime": 0.605, + "eval_scitail-pairs-qa_samples_per_second": 211.554, + "eval_scitail-pairs-qa_steps_per_second": 1.653, + "step": 1400 + }, + { + "epoch": 1.4403292181069958, + "eval_xsum-pairs_loss": 0.29447418451309204, + "eval_xsum-pairs_runtime": 3.0431, + "eval_xsum-pairs_samples_per_second": 42.062, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 1400 + }, + { + "epoch": 1.4403292181069958, + "eval_sciq_pairs_loss": 0.0855700671672821, + "eval_sciq_pairs_runtime": 3.4947, + "eval_sciq_pairs_samples_per_second": 36.627, + "eval_sciq_pairs_steps_per_second": 0.286, + "step": 1400 + }, + { + "epoch": 1.4403292181069958, + "eval_qasc_pairs_loss": 0.18778705596923828, + "eval_qasc_pairs_runtime": 0.613, + "eval_qasc_pairs_samples_per_second": 208.822, + "eval_qasc_pairs_steps_per_second": 1.631, + "step": 1400 + }, + { + "epoch": 1.4403292181069958, + "eval_openbookqa_pairs_loss": 0.8724998235702515, + "eval_openbookqa_pairs_runtime": 0.5965, + "eval_openbookqa_pairs_samples_per_second": 214.582, + "eval_openbookqa_pairs_steps_per_second": 1.676, + "step": 1400 + }, + { + "epoch": 1.4403292181069958, + "eval_msmarco_pairs_loss": 0.898486852645874, + "eval_msmarco_pairs_runtime": 1.521, + "eval_msmarco_pairs_samples_per_second": 84.158, + "eval_msmarco_pairs_steps_per_second": 0.657, + "step": 1400 + }, + { + "epoch": 1.4403292181069958, + "eval_nq_pairs_loss": 0.8151339888572693, + "eval_nq_pairs_runtime": 2.9108, + "eval_nq_pairs_samples_per_second": 43.975, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1400 + }, + { + "epoch": 1.4403292181069958, + "eval_trivia_pairs_loss": 0.8818275928497314, + "eval_trivia_pairs_runtime": 3.4441, + "eval_trivia_pairs_samples_per_second": 37.165, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1400 + }, + { + "epoch": 1.4403292181069958, + "eval_gooaq_pairs_loss": 0.461891770362854, + "eval_gooaq_pairs_runtime": 0.9488, + "eval_gooaq_pairs_samples_per_second": 134.904, + "eval_gooaq_pairs_steps_per_second": 1.054, + "step": 1400 + }, + { + "epoch": 1.4403292181069958, + "eval_paws-pos_loss": 0.02126290462911129, + "eval_paws-pos_runtime": 0.7036, + "eval_paws-pos_samples_per_second": 181.912, + "eval_paws-pos_steps_per_second": 1.421, + "step": 1400 + }, + { + "epoch": 1.4403292181069958, + "eval_global_dataset_loss": 0.4936361312866211, + "eval_global_dataset_runtime": 13.4229, + "eval_global_dataset_samples_per_second": 30.992, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1400 + }, + { + "epoch": 1.441358024691358, + "grad_norm": 17.88776206970215, + "learning_rate": 3.321926361036571e-05, + "loss": 2.83, + "step": 1401 + }, + { + "epoch": 1.4423868312757202, + "grad_norm": 9.830528259277344, + "learning_rate": 3.3211283060579815e-05, + "loss": 0.4911, + "step": 1402 + }, + { + "epoch": 1.4434156378600824, + "grad_norm": 6.779402256011963, + "learning_rate": 3.320328615235019e-05, + "loss": 0.256, + "step": 1403 + }, + { + "epoch": 1.4444444444444444, + "grad_norm": 0.3222440183162689, + "learning_rate": 3.319527289892017e-05, + "loss": 0.0046, + "step": 1404 + }, + { + "epoch": 1.4454732510288066, + "grad_norm": 12.658515930175781, + "learning_rate": 3.3187243313560156e-05, + "loss": 0.9102, + "step": 1405 + }, + { + "epoch": 1.4465020576131686, + "grad_norm": 8.560856819152832, + "learning_rate": 3.317919740956759e-05, + "loss": 0.3865, + "step": 1406 + }, + { + "epoch": 1.4475308641975309, + "grad_norm": 12.2800874710083, + "learning_rate": 3.3171135200266955e-05, + "loss": 1.0253, + "step": 1407 + }, + { + "epoch": 1.448559670781893, + "grad_norm": 5.864134788513184, + "learning_rate": 3.316305669900972e-05, + "loss": 0.259, + "step": 1408 + }, + { + "epoch": 1.449588477366255, + "grad_norm": 14.230968475341797, + "learning_rate": 3.315496191917434e-05, + "loss": 2.4152, + "step": 1409 + }, + { + "epoch": 1.4506172839506173, + "grad_norm": 10.343489646911621, + "learning_rate": 3.3146850874166234e-05, + "loss": 0.9387, + "step": 1410 + }, + { + "epoch": 1.4516460905349795, + "grad_norm": 11.05865478515625, + "learning_rate": 3.313872357741774e-05, + "loss": 0.9209, + "step": 1411 + }, + { + "epoch": 1.4526748971193415, + "grad_norm": 7.32178258895874, + "learning_rate": 3.313058004238812e-05, + "loss": 0.3593, + "step": 1412 + }, + { + "epoch": 1.4537037037037037, + "grad_norm": 9.29926872253418, + "learning_rate": 3.312242028256356e-05, + "loss": 0.6408, + "step": 1413 + }, + { + "epoch": 1.454732510288066, + "grad_norm": 10.301321029663086, + "learning_rate": 3.311424431145704e-05, + "loss": 0.6872, + "step": 1414 + }, + { + "epoch": 1.455761316872428, + "grad_norm": 4.875487327575684, + "learning_rate": 3.310605214260846e-05, + "loss": 0.3091, + "step": 1415 + }, + { + "epoch": 1.4567901234567902, + "grad_norm": 5.230945587158203, + "learning_rate": 3.30978437895845e-05, + "loss": 0.2862, + "step": 1416 + }, + { + "epoch": 1.4578189300411522, + "grad_norm": 10.013091087341309, + "learning_rate": 3.3089619265978674e-05, + "loss": 0.8637, + "step": 1417 + }, + { + "epoch": 1.4588477366255144, + "grad_norm": 7.067183017730713, + "learning_rate": 3.308137858541124e-05, + "loss": 0.3158, + "step": 1418 + }, + { + "epoch": 1.4598765432098766, + "grad_norm": 8.037884712219238, + "learning_rate": 3.3073121761529245e-05, + "loss": 0.6922, + "step": 1419 + }, + { + "epoch": 1.4609053497942388, + "grad_norm": 0.15969954431056976, + "learning_rate": 3.3064848808006447e-05, + "loss": 0.0024, + "step": 1420 + }, + { + "epoch": 1.4609053497942388, + "eval_Qnli-dev_cosine_accuracy": 0.68359375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8269785642623901, + "eval_Qnli-dev_cosine_ap": 0.7237253656424805, + "eval_Qnli-dev_cosine_f1": 0.6806282722513088, + "eval_Qnli-dev_cosine_f1_threshold": 0.7489698529243469, + "eval_Qnli-dev_cosine_precision": 0.5786350148367952, + "eval_Qnli-dev_cosine_recall": 0.826271186440678, + "eval_Qnli-dev_dot_accuracy": 0.634765625, + "eval_Qnli-dev_dot_accuracy_threshold": 420.9667053222656, + "eval_Qnli-dev_dot_ap": 0.6345480523803997, + "eval_Qnli-dev_dot_f1": 0.6751592356687899, + "eval_Qnli-dev_dot_f1_threshold": 345.52545166015625, + "eval_Qnli-dev_dot_precision": 0.5408163265306123, + "eval_Qnli-dev_dot_recall": 0.8983050847457628, + "eval_Qnli-dev_euclidean_accuracy": 0.6875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.188535690307617, + "eval_Qnli-dev_euclidean_ap": 0.7344008197681215, + "eval_Qnli-dev_euclidean_f1": 0.6852886405959031, + "eval_Qnli-dev_euclidean_f1_threshold": 15.030804634094238, + "eval_Qnli-dev_euclidean_precision": 0.6112956810631229, + "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, + "eval_Qnli-dev_manhattan_accuracy": 0.689453125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 264.3589782714844, + "eval_Qnli-dev_manhattan_ap": 0.7396413308815867, + "eval_Qnli-dev_manhattan_f1": 0.6823956442831216, + "eval_Qnli-dev_manhattan_f1_threshold": 318.8542785644531, + "eval_Qnli-dev_manhattan_precision": 0.5968253968253968, + "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, + "eval_Qnli-dev_max_accuracy": 0.689453125, + "eval_Qnli-dev_max_accuracy_threshold": 420.9667053222656, + "eval_Qnli-dev_max_ap": 0.7396413308815867, + "eval_Qnli-dev_max_f1": 0.6852886405959031, + "eval_Qnli-dev_max_f1_threshold": 345.52545166015625, + "eval_Qnli-dev_max_precision": 0.6112956810631229, + "eval_Qnli-dev_max_recall": 0.8983050847457628, + "eval_allNLI-dev_cosine_accuracy": 0.724609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8926049470901489, + "eval_allNLI-dev_cosine_ap": 0.6183972231090226, + "eval_allNLI-dev_cosine_f1": 0.6339066339066338, + "eval_allNLI-dev_cosine_f1_threshold": 0.8191593885421753, + "eval_allNLI-dev_cosine_precision": 0.5512820512820513, + "eval_allNLI-dev_cosine_recall": 0.7456647398843931, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 425.713134765625, + "eval_allNLI-dev_dot_ap": 0.5372618188733738, + "eval_allNLI-dev_dot_f1": 0.6022988505747127, + "eval_allNLI-dev_dot_f1_threshold": 392.1880187988281, + "eval_allNLI-dev_dot_precision": 0.5, + "eval_allNLI-dev_dot_recall": 0.7572254335260116, + "eval_allNLI-dev_euclidean_accuracy": 0.728515625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.652290344238281, + "eval_allNLI-dev_euclidean_ap": 0.6233815893708006, + "eval_allNLI-dev_euclidean_f1": 0.639269406392694, + "eval_allNLI-dev_euclidean_f1_threshold": 13.957777976989746, + "eval_allNLI-dev_euclidean_precision": 0.5283018867924528, + "eval_allNLI-dev_euclidean_recall": 0.8092485549132948, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 243.81475830078125, + "eval_allNLI-dev_manhattan_ap": 0.6188492062115317, + "eval_allNLI-dev_manhattan_f1": 0.6338028169014085, + "eval_allNLI-dev_manhattan_f1_threshold": 285.5887145996094, + "eval_allNLI-dev_manhattan_precision": 0.5335968379446641, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.732421875, + "eval_allNLI-dev_max_accuracy_threshold": 425.713134765625, + "eval_allNLI-dev_max_ap": 0.6233815893708006, + "eval_allNLI-dev_max_f1": 0.639269406392694, + "eval_allNLI-dev_max_f1_threshold": 392.1880187988281, + "eval_allNLI-dev_max_precision": 0.5512820512820513, + "eval_allNLI-dev_max_recall": 0.8092485549132948, + "eval_sequential_score": 0.7396413308815867, + "eval_sts-test_pearson_cosine": 0.8326547769095449, + "eval_sts-test_pearson_dot": 0.8089814814025779, + "eval_sts-test_pearson_euclidean": 0.868581267221316, + "eval_sts-test_pearson_manhattan": 0.8668280311976726, + "eval_sts-test_pearson_max": 0.868581267221316, + "eval_sts-test_spearman_cosine": 0.869660212871345, + "eval_sts-test_spearman_dot": 0.8006806185626417, + "eval_sts-test_spearman_euclidean": 0.867850740368245, + "eval_sts-test_spearman_manhattan": 0.8662515092911621, + "eval_sts-test_spearman_max": 0.869660212871345, + "eval_vitaminc-pairs_loss": 3.123586416244507, + "eval_vitaminc-pairs_runtime": 3.2162, + "eval_vitaminc-pairs_samples_per_second": 39.799, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 1420 + }, + { + "epoch": 1.4609053497942388, + "eval_negation-triplets_loss": 0.9562065601348877, + "eval_negation-triplets_runtime": 0.7629, + "eval_negation-triplets_samples_per_second": 167.789, + "eval_negation-triplets_steps_per_second": 1.311, + "step": 1420 + }, + { + "epoch": 1.4609053497942388, + "eval_scitail-pairs-pos_loss": 0.15029510855674744, + "eval_scitail-pairs-pos_runtime": 0.869, + "eval_scitail-pairs-pos_samples_per_second": 147.29, + "eval_scitail-pairs-pos_steps_per_second": 1.151, + "step": 1420 + }, + { + "epoch": 1.4609053497942388, + "eval_scitail-pairs-qa_loss": 0.0026416766922920942, + "eval_scitail-pairs-qa_runtime": 0.5945, + "eval_scitail-pairs-qa_samples_per_second": 215.29, + "eval_scitail-pairs-qa_steps_per_second": 1.682, + "step": 1420 + }, + { + "epoch": 1.4609053497942388, + "eval_xsum-pairs_loss": 0.3665918707847595, + "eval_xsum-pairs_runtime": 3.0261, + "eval_xsum-pairs_samples_per_second": 42.299, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1420 + }, + { + "epoch": 1.4609053497942388, + "eval_sciq_pairs_loss": 0.08483776450157166, + "eval_sciq_pairs_runtime": 3.5017, + "eval_sciq_pairs_samples_per_second": 36.554, + "eval_sciq_pairs_steps_per_second": 0.286, + "step": 1420 + }, + { + "epoch": 1.4609053497942388, + "eval_qasc_pairs_loss": 0.24360936880111694, + "eval_qasc_pairs_runtime": 0.6209, + "eval_qasc_pairs_samples_per_second": 206.168, + "eval_qasc_pairs_steps_per_second": 1.611, + "step": 1420 + }, + { + "epoch": 1.4609053497942388, + "eval_openbookqa_pairs_loss": 0.8522987365722656, + "eval_openbookqa_pairs_runtime": 0.5921, + "eval_openbookqa_pairs_samples_per_second": 216.19, + "eval_openbookqa_pairs_steps_per_second": 1.689, + "step": 1420 + }, + { + "epoch": 1.4609053497942388, + "eval_msmarco_pairs_loss": 0.8556464910507202, + "eval_msmarco_pairs_runtime": 1.5245, + "eval_msmarco_pairs_samples_per_second": 83.96, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 1420 + }, + { + "epoch": 1.4609053497942388, + "eval_nq_pairs_loss": 0.9329224228858948, + "eval_nq_pairs_runtime": 2.9268, + "eval_nq_pairs_samples_per_second": 43.734, + "eval_nq_pairs_steps_per_second": 0.342, + "step": 1420 + }, + { + "epoch": 1.4609053497942388, + "eval_trivia_pairs_loss": 0.6846005320549011, + "eval_trivia_pairs_runtime": 3.442, + "eval_trivia_pairs_samples_per_second": 37.188, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1420 + }, + { + "epoch": 1.4609053497942388, + "eval_gooaq_pairs_loss": 0.45954039692878723, + "eval_gooaq_pairs_runtime": 0.9602, + "eval_gooaq_pairs_samples_per_second": 133.311, + "eval_gooaq_pairs_steps_per_second": 1.041, + "step": 1420 + }, + { + "epoch": 1.4609053497942388, + "eval_paws-pos_loss": 0.021524449810385704, + "eval_paws-pos_runtime": 0.6947, + "eval_paws-pos_samples_per_second": 184.253, + "eval_paws-pos_steps_per_second": 1.439, + "step": 1420 + }, + { + "epoch": 1.4609053497942388, + "eval_global_dataset_loss": 0.504304826259613, + "eval_global_dataset_runtime": 13.3809, + "eval_global_dataset_samples_per_second": 31.089, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1420 + }, + { + "epoch": 1.4619341563786008, + "grad_norm": 6.770277976989746, + "learning_rate": 3.305655973854333e-05, + "loss": 0.3382, + "step": 1421 + }, + { + "epoch": 1.462962962962963, + "grad_norm": 10.697244644165039, + "learning_rate": 3.304825456686707e-05, + "loss": 0.9051, + "step": 1422 + }, + { + "epoch": 1.463991769547325, + "grad_norm": 12.099508285522461, + "learning_rate": 3.3039933306731486e-05, + "loss": 0.8357, + "step": 1423 + }, + { + "epoch": 1.4650205761316872, + "grad_norm": 5.6456732749938965, + "learning_rate": 3.303159597191706e-05, + "loss": 0.2571, + "step": 1424 + }, + { + "epoch": 1.4660493827160495, + "grad_norm": 5.662939071655273, + "learning_rate": 3.30232425762309e-05, + "loss": 0.2161, + "step": 1425 + }, + { + "epoch": 1.4670781893004115, + "grad_norm": 6.24656867980957, + "learning_rate": 3.3014873133506684e-05, + "loss": 0.2699, + "step": 1426 + }, + { + "epoch": 1.4681069958847737, + "grad_norm": 8.071493148803711, + "learning_rate": 3.30064876576047e-05, + "loss": 0.4325, + "step": 1427 + }, + { + "epoch": 1.4691358024691357, + "grad_norm": 0.7010164260864258, + "learning_rate": 3.299808616241177e-05, + "loss": 0.0202, + "step": 1428 + }, + { + "epoch": 1.4701646090534979, + "grad_norm": 7.959322452545166, + "learning_rate": 3.2989668661841234e-05, + "loss": 0.4367, + "step": 1429 + }, + { + "epoch": 1.47119341563786, + "grad_norm": 7.032750606536865, + "learning_rate": 3.298123516983295e-05, + "loss": 0.2904, + "step": 1430 + }, + { + "epoch": 1.4722222222222223, + "grad_norm": 7.717670917510986, + "learning_rate": 3.297278570035327e-05, + "loss": 0.3461, + "step": 1431 + }, + { + "epoch": 1.4732510288065843, + "grad_norm": 12.34119701385498, + "learning_rate": 3.2964320267394986e-05, + "loss": 1.1376, + "step": 1432 + }, + { + "epoch": 1.4742798353909465, + "grad_norm": 16.769927978515625, + "learning_rate": 3.295583888497733e-05, + "loss": 2.4901, + "step": 1433 + }, + { + "epoch": 1.4753086419753085, + "grad_norm": 5.696785926818848, + "learning_rate": 3.294734156714596e-05, + "loss": 0.1807, + "step": 1434 + }, + { + "epoch": 1.4763374485596708, + "grad_norm": 8.693737983703613, + "learning_rate": 3.2938828327972906e-05, + "loss": 0.7702, + "step": 1435 + }, + { + "epoch": 1.477366255144033, + "grad_norm": 11.573921203613281, + "learning_rate": 3.293029918155659e-05, + "loss": 0.8059, + "step": 1436 + }, + { + "epoch": 1.4783950617283952, + "grad_norm": 0.6408414840698242, + "learning_rate": 3.292175414202174e-05, + "loss": 0.0171, + "step": 1437 + }, + { + "epoch": 1.4794238683127572, + "grad_norm": 0.8278383016586304, + "learning_rate": 3.2913193223519434e-05, + "loss": 0.0214, + "step": 1438 + }, + { + "epoch": 1.4804526748971194, + "grad_norm": 7.157841205596924, + "learning_rate": 3.290461644022704e-05, + "loss": 0.4367, + "step": 1439 + }, + { + "epoch": 1.4814814814814814, + "grad_norm": 9.087885856628418, + "learning_rate": 3.2896023806348194e-05, + "loss": 0.8071, + "step": 1440 + }, + { + "epoch": 1.4814814814814814, + "eval_Qnli-dev_cosine_accuracy": 0.70703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8073866367340088, + "eval_Qnli-dev_cosine_ap": 0.7603012216062148, + "eval_Qnli-dev_cosine_f1": 0.6948176583493282, + "eval_Qnli-dev_cosine_f1_threshold": 0.756028413772583, + "eval_Qnli-dev_cosine_precision": 0.6350877192982456, + "eval_Qnli-dev_cosine_recall": 0.7669491525423728, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 383.9505615234375, + "eval_Qnli-dev_dot_ap": 0.6946710918004182, + "eval_Qnli-dev_dot_f1": 0.6813880126182966, + "eval_Qnli-dev_dot_f1_threshold": 315.57696533203125, + "eval_Qnli-dev_dot_precision": 0.542713567839196, + "eval_Qnli-dev_dot_recall": 0.9152542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.7109375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.200727462768555, + "eval_Qnli-dev_euclidean_ap": 0.766160629059141, + "eval_Qnli-dev_euclidean_f1": 0.6974169741697418, + "eval_Qnli-dev_euclidean_f1_threshold": 15.637357711791992, + "eval_Qnli-dev_euclidean_precision": 0.6176470588235294, + "eval_Qnli-dev_euclidean_recall": 0.8008474576271186, + "eval_Qnli-dev_manhattan_accuracy": 0.712890625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 270.85723876953125, + "eval_Qnli-dev_manhattan_ap": 0.7701929827964322, + "eval_Qnli-dev_manhattan_f1": 0.6984126984126984, + "eval_Qnli-dev_manhattan_f1_threshold": 336.00274658203125, + "eval_Qnli-dev_manhattan_precision": 0.5981873111782477, + "eval_Qnli-dev_manhattan_recall": 0.8389830508474576, + "eval_Qnli-dev_max_accuracy": 0.712890625, + "eval_Qnli-dev_max_accuracy_threshold": 383.9505615234375, + "eval_Qnli-dev_max_ap": 0.7701929827964322, + "eval_Qnli-dev_max_f1": 0.6984126984126984, + "eval_Qnli-dev_max_f1_threshold": 336.00274658203125, + "eval_Qnli-dev_max_precision": 0.6350877192982456, + "eval_Qnli-dev_max_recall": 0.9152542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.871171236038208, + "eval_allNLI-dev_cosine_ap": 0.6362103263897821, + "eval_allNLI-dev_cosine_f1": 0.6357615894039735, + "eval_allNLI-dev_cosine_f1_threshold": 0.7768651843070984, + "eval_allNLI-dev_cosine_precision": 0.5142857142857142, + "eval_allNLI-dev_cosine_recall": 0.8323699421965318, + "eval_allNLI-dev_dot_accuracy": 0.705078125, + "eval_allNLI-dev_dot_accuracy_threshold": 398.95013427734375, + "eval_allNLI-dev_dot_ap": 0.5661285579566687, + "eval_allNLI-dev_dot_f1": 0.6172248803827751, + "eval_allNLI-dev_dot_f1_threshold": 371.18365478515625, + "eval_allNLI-dev_dot_precision": 0.5265306122448979, + "eval_allNLI-dev_dot_recall": 0.7456647398843931, + "eval_allNLI-dev_euclidean_accuracy": 0.73828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.6724853515625, + "eval_allNLI-dev_euclidean_ap": 0.6400867765050245, + "eval_allNLI-dev_euclidean_f1": 0.6431718061674009, + "eval_allNLI-dev_euclidean_f1_threshold": 14.630485534667969, + "eval_allNLI-dev_euclidean_precision": 0.5195729537366548, + "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, + "eval_allNLI-dev_manhattan_accuracy": 0.744140625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 225.78453063964844, + "eval_allNLI-dev_manhattan_ap": 0.6350322702102289, + "eval_allNLI-dev_manhattan_f1": 0.6329670329670329, + "eval_allNLI-dev_manhattan_f1_threshold": 306.090087890625, + "eval_allNLI-dev_manhattan_precision": 0.5106382978723404, + "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, + "eval_allNLI-dev_max_accuracy": 0.744140625, + "eval_allNLI-dev_max_accuracy_threshold": 398.95013427734375, + "eval_allNLI-dev_max_ap": 0.6400867765050245, + "eval_allNLI-dev_max_f1": 0.6431718061674009, + "eval_allNLI-dev_max_f1_threshold": 371.18365478515625, + "eval_allNLI-dev_max_precision": 0.5265306122448979, + "eval_allNLI-dev_max_recall": 0.8439306358381503, + "eval_sequential_score": 0.7701929827964322, + "eval_sts-test_pearson_cosine": 0.8343564728648352, + "eval_sts-test_pearson_dot": 0.8077933555380535, + "eval_sts-test_pearson_euclidean": 0.8676567979365759, + "eval_sts-test_pearson_manhattan": 0.8650322762247608, + "eval_sts-test_pearson_max": 0.8676567979365759, + "eval_sts-test_spearman_cosine": 0.866502921877659, + "eval_sts-test_spearman_dot": 0.8002891792792511, + "eval_sts-test_spearman_euclidean": 0.8654190463267591, + "eval_sts-test_spearman_manhattan": 0.8613856151375525, + "eval_sts-test_spearman_max": 0.866502921877659, + "eval_vitaminc-pairs_loss": 2.8277482986450195, + "eval_vitaminc-pairs_runtime": 3.2374, + "eval_vitaminc-pairs_samples_per_second": 39.538, + "eval_vitaminc-pairs_steps_per_second": 0.309, + "step": 1440 + }, + { + "epoch": 1.4814814814814814, + "eval_negation-triplets_loss": 0.9136925935745239, + "eval_negation-triplets_runtime": 0.7555, + "eval_negation-triplets_samples_per_second": 169.431, + "eval_negation-triplets_steps_per_second": 1.324, + "step": 1440 + }, + { + "epoch": 1.4814814814814814, + "eval_scitail-pairs-pos_loss": 0.14386087656021118, + "eval_scitail-pairs-pos_runtime": 0.8962, + "eval_scitail-pairs-pos_samples_per_second": 142.826, + "eval_scitail-pairs-pos_steps_per_second": 1.116, + "step": 1440 + }, + { + "epoch": 1.4814814814814814, + "eval_scitail-pairs-qa_loss": 0.0007967444253154099, + "eval_scitail-pairs-qa_runtime": 0.6034, + "eval_scitail-pairs-qa_samples_per_second": 212.122, + "eval_scitail-pairs-qa_steps_per_second": 1.657, + "step": 1440 + }, + { + "epoch": 1.4814814814814814, + "eval_xsum-pairs_loss": 0.31424856185913086, + "eval_xsum-pairs_runtime": 3.0346, + "eval_xsum-pairs_samples_per_second": 42.18, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1440 + }, + { + "epoch": 1.4814814814814814, + "eval_sciq_pairs_loss": 0.10615328699350357, + "eval_sciq_pairs_runtime": 3.4997, + "eval_sciq_pairs_samples_per_second": 36.574, + "eval_sciq_pairs_steps_per_second": 0.286, + "step": 1440 + }, + { + "epoch": 1.4814814814814814, + "eval_qasc_pairs_loss": 0.21712642908096313, + "eval_qasc_pairs_runtime": 0.6188, + "eval_qasc_pairs_samples_per_second": 206.853, + "eval_qasc_pairs_steps_per_second": 1.616, + "step": 1440 + }, + { + "epoch": 1.4814814814814814, + "eval_openbookqa_pairs_loss": 0.964438796043396, + "eval_openbookqa_pairs_runtime": 0.5921, + "eval_openbookqa_pairs_samples_per_second": 216.181, + "eval_openbookqa_pairs_steps_per_second": 1.689, + "step": 1440 + }, + { + "epoch": 1.4814814814814814, + "eval_msmarco_pairs_loss": 0.9308626651763916, + "eval_msmarco_pairs_runtime": 1.5222, + "eval_msmarco_pairs_samples_per_second": 84.088, + "eval_msmarco_pairs_steps_per_second": 0.657, + "step": 1440 + }, + { + "epoch": 1.4814814814814814, + "eval_nq_pairs_loss": 0.7551199793815613, + "eval_nq_pairs_runtime": 2.8988, + "eval_nq_pairs_samples_per_second": 44.157, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1440 + }, + { + "epoch": 1.4814814814814814, + "eval_trivia_pairs_loss": 0.8499756455421448, + "eval_trivia_pairs_runtime": 3.4377, + "eval_trivia_pairs_samples_per_second": 37.234, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1440 + }, + { + "epoch": 1.4814814814814814, + "eval_gooaq_pairs_loss": 0.43490731716156006, + "eval_gooaq_pairs_runtime": 0.9586, + "eval_gooaq_pairs_samples_per_second": 133.527, + "eval_gooaq_pairs_steps_per_second": 1.043, + "step": 1440 + }, + { + "epoch": 1.4814814814814814, + "eval_paws-pos_loss": 0.02190549112856388, + "eval_paws-pos_runtime": 0.7061, + "eval_paws-pos_samples_per_second": 181.282, + "eval_paws-pos_steps_per_second": 1.416, + "step": 1440 + }, + { + "epoch": 1.4814814814814814, + "eval_global_dataset_loss": 0.4621449112892151, + "eval_global_dataset_runtime": 13.4004, + "eval_global_dataset_samples_per_second": 31.044, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1440 + }, + { + "epoch": 1.4825102880658436, + "grad_norm": 8.890467643737793, + "learning_rate": 3.288741533611279e-05, + "loss": 0.5508, + "step": 1441 + }, + { + "epoch": 1.4835390946502058, + "grad_norm": 7.290862083435059, + "learning_rate": 3.2878791043776936e-05, + "loss": 0.3343, + "step": 1442 + }, + { + "epoch": 1.4845679012345678, + "grad_norm": 10.71535873413086, + "learning_rate": 3.2870150943622946e-05, + "loss": 0.7613, + "step": 1443 + }, + { + "epoch": 1.48559670781893, + "grad_norm": 7.571237564086914, + "learning_rate": 3.2861495049959314e-05, + "loss": 0.3076, + "step": 1444 + }, + { + "epoch": 1.486625514403292, + "grad_norm": 6.289035797119141, + "learning_rate": 3.2852823377120706e-05, + "loss": 0.2865, + "step": 1445 + }, + { + "epoch": 1.4876543209876543, + "grad_norm": 1.0428786277770996, + "learning_rate": 3.284413593946788e-05, + "loss": 0.0207, + "step": 1446 + }, + { + "epoch": 1.4886831275720165, + "grad_norm": 7.875466823577881, + "learning_rate": 3.283543275138774e-05, + "loss": 0.6707, + "step": 1447 + }, + { + "epoch": 1.4897119341563787, + "grad_norm": 6.708620548248291, + "learning_rate": 3.282671382729324e-05, + "loss": 0.244, + "step": 1448 + }, + { + "epoch": 1.4907407407407407, + "grad_norm": 11.043272972106934, + "learning_rate": 3.281797918162344e-05, + "loss": 0.7385, + "step": 1449 + }, + { + "epoch": 1.491769547325103, + "grad_norm": 8.62049674987793, + "learning_rate": 3.280922882884338e-05, + "loss": 0.4721, + "step": 1450 + }, + { + "epoch": 1.492798353909465, + "grad_norm": 13.318764686584473, + "learning_rate": 3.280046278344416e-05, + "loss": 0.8804, + "step": 1451 + }, + { + "epoch": 1.4938271604938271, + "grad_norm": 5.410976409912109, + "learning_rate": 3.2791681059942836e-05, + "loss": 0.1642, + "step": 1452 + }, + { + "epoch": 1.4948559670781894, + "grad_norm": 9.992098808288574, + "learning_rate": 3.278288367288244e-05, + "loss": 0.5925, + "step": 1453 + }, + { + "epoch": 1.4958847736625516, + "grad_norm": 8.653932571411133, + "learning_rate": 3.277407063683194e-05, + "loss": 0.6718, + "step": 1454 + }, + { + "epoch": 1.4969135802469136, + "grad_norm": 7.988948345184326, + "learning_rate": 3.2765241966386234e-05, + "loss": 0.6227, + "step": 1455 + }, + { + "epoch": 1.4979423868312758, + "grad_norm": 6.361024379730225, + "learning_rate": 3.275639767616609e-05, + "loss": 0.2413, + "step": 1456 + }, + { + "epoch": 1.4989711934156378, + "grad_norm": 10.710992813110352, + "learning_rate": 3.274753778081815e-05, + "loss": 0.8372, + "step": 1457 + }, + { + "epoch": 1.5, + "grad_norm": 13.870288848876953, + "learning_rate": 3.2738662295014916e-05, + "loss": 1.1328, + "step": 1458 + }, + { + "epoch": 1.5010288065843622, + "grad_norm": 7.458224773406982, + "learning_rate": 3.272977123345468e-05, + "loss": 0.369, + "step": 1459 + }, + { + "epoch": 1.5020576131687244, + "grad_norm": 12.56210708618164, + "learning_rate": 3.272086461086156e-05, + "loss": 0.8694, + "step": 1460 + }, + { + "epoch": 1.5020576131687244, + "eval_Qnli-dev_cosine_accuracy": 0.701171875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8138365149497986, + "eval_Qnli-dev_cosine_ap": 0.7489338568179515, + "eval_Qnli-dev_cosine_f1": 0.689407540394973, + "eval_Qnli-dev_cosine_f1_threshold": 0.7289687395095825, + "eval_Qnli-dev_cosine_precision": 0.5981308411214953, + "eval_Qnli-dev_cosine_recall": 0.8135593220338984, + "eval_Qnli-dev_dot_accuracy": 0.669921875, + "eval_Qnli-dev_dot_accuracy_threshold": 378.68115234375, + "eval_Qnli-dev_dot_ap": 0.6565833142132881, + "eval_Qnli-dev_dot_f1": 0.6633165829145728, + "eval_Qnli-dev_dot_f1_threshold": 325.2407531738281, + "eval_Qnli-dev_dot_precision": 0.5484764542936288, + "eval_Qnli-dev_dot_recall": 0.8389830508474576, + "eval_Qnli-dev_euclidean_accuracy": 0.708984375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.784200668334961, + "eval_Qnli-dev_euclidean_ap": 0.7579849955722361, + "eval_Qnli-dev_euclidean_f1": 0.6926229508196722, + "eval_Qnli-dev_euclidean_f1_threshold": 14.93661117553711, + "eval_Qnli-dev_euclidean_precision": 0.6706349206349206, + "eval_Qnli-dev_euclidean_recall": 0.7161016949152542, + "eval_Qnli-dev_manhattan_accuracy": 0.7109375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 287.81903076171875, + "eval_Qnli-dev_manhattan_ap": 0.7627266401816915, + "eval_Qnli-dev_manhattan_f1": 0.6902985074626867, + "eval_Qnli-dev_manhattan_f1_threshold": 326.8884582519531, + "eval_Qnli-dev_manhattan_precision": 0.6166666666666667, + "eval_Qnli-dev_manhattan_recall": 0.7838983050847458, + "eval_Qnli-dev_max_accuracy": 0.7109375, + "eval_Qnli-dev_max_accuracy_threshold": 378.68115234375, + "eval_Qnli-dev_max_ap": 0.7627266401816915, + "eval_Qnli-dev_max_f1": 0.6926229508196722, + "eval_Qnli-dev_max_f1_threshold": 326.8884582519531, + "eval_Qnli-dev_max_precision": 0.6706349206349206, + "eval_Qnli-dev_max_recall": 0.8389830508474576, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8671836853027344, + "eval_allNLI-dev_cosine_ap": 0.6278391776727383, + "eval_allNLI-dev_cosine_f1": 0.6343825665859564, + "eval_allNLI-dev_cosine_f1_threshold": 0.7882962822914124, + "eval_allNLI-dev_cosine_precision": 0.5458333333333333, + "eval_allNLI-dev_cosine_recall": 0.7572254335260116, + "eval_allNLI-dev_dot_accuracy": 0.708984375, + "eval_allNLI-dev_dot_accuracy_threshold": 389.33245849609375, + "eval_allNLI-dev_dot_ap": 0.5556601990139107, + "eval_allNLI-dev_dot_f1": 0.5905511811023622, + "eval_allNLI-dev_dot_f1_threshold": 326.88543701171875, + "eval_allNLI-dev_dot_precision": 0.44776119402985076, + "eval_allNLI-dev_dot_recall": 0.8670520231213873, + "eval_allNLI-dev_euclidean_accuracy": 0.732421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.829912185668945, + "eval_allNLI-dev_euclidean_ap": 0.6374580549695508, + "eval_allNLI-dev_euclidean_f1": 0.641860465116279, + "eval_allNLI-dev_euclidean_f1_threshold": 14.349370002746582, + "eval_allNLI-dev_euclidean_precision": 0.5369649805447471, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.734375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 266.812255859375, + "eval_allNLI-dev_manhattan_ap": 0.6310337853107585, + "eval_allNLI-dev_manhattan_f1": 0.6344827586206897, + "eval_allNLI-dev_manhattan_f1_threshold": 303.2599182128906, + "eval_allNLI-dev_manhattan_precision": 0.5267175572519084, + "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 389.33245849609375, + "eval_allNLI-dev_max_ap": 0.6374580549695508, + "eval_allNLI-dev_max_f1": 0.641860465116279, + "eval_allNLI-dev_max_f1_threshold": 326.88543701171875, + "eval_allNLI-dev_max_precision": 0.5458333333333333, + "eval_allNLI-dev_max_recall": 0.8670520231213873, + "eval_sequential_score": 0.7627266401816915, + "eval_sts-test_pearson_cosine": 0.8294091384025749, + "eval_sts-test_pearson_dot": 0.8188583363337693, + "eval_sts-test_pearson_euclidean": 0.8645124410252298, + "eval_sts-test_pearson_manhattan": 0.8627753187710065, + "eval_sts-test_pearson_max": 0.8645124410252298, + "eval_sts-test_spearman_cosine": 0.8689237413626268, + "eval_sts-test_spearman_dot": 0.8192856869785479, + "eval_sts-test_spearman_euclidean": 0.866917408060666, + "eval_sts-test_spearman_manhattan": 0.8651706320747757, + "eval_sts-test_spearman_max": 0.8689237413626268, + "eval_vitaminc-pairs_loss": 3.11321759223938, + "eval_vitaminc-pairs_runtime": 3.2191, + "eval_vitaminc-pairs_samples_per_second": 39.762, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 1460 + }, + { + "epoch": 1.5020576131687244, + "eval_negation-triplets_loss": 0.9306308627128601, + "eval_negation-triplets_runtime": 0.7656, + "eval_negation-triplets_samples_per_second": 167.183, + "eval_negation-triplets_steps_per_second": 1.306, + "step": 1460 + }, + { + "epoch": 1.5020576131687244, + "eval_scitail-pairs-pos_loss": 0.14148494601249695, + "eval_scitail-pairs-pos_runtime": 0.8816, + "eval_scitail-pairs-pos_samples_per_second": 145.193, + "eval_scitail-pairs-pos_steps_per_second": 1.134, + "step": 1460 + }, + { + "epoch": 1.5020576131687244, + "eval_scitail-pairs-qa_loss": 0.0008370282012037933, + "eval_scitail-pairs-qa_runtime": 0.5934, + "eval_scitail-pairs-qa_samples_per_second": 215.717, + "eval_scitail-pairs-qa_steps_per_second": 1.685, + "step": 1460 + }, + { + "epoch": 1.5020576131687244, + "eval_xsum-pairs_loss": 0.33326202630996704, + "eval_xsum-pairs_runtime": 3.0331, + "eval_xsum-pairs_samples_per_second": 42.201, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1460 + }, + { + "epoch": 1.5020576131687244, + "eval_sciq_pairs_loss": 0.09718819707632065, + "eval_sciq_pairs_runtime": 3.5207, + "eval_sciq_pairs_samples_per_second": 36.356, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 1460 + }, + { + "epoch": 1.5020576131687244, + "eval_qasc_pairs_loss": 0.21921108663082123, + "eval_qasc_pairs_runtime": 0.6377, + "eval_qasc_pairs_samples_per_second": 200.709, + "eval_qasc_pairs_steps_per_second": 1.568, + "step": 1460 + }, + { + "epoch": 1.5020576131687244, + "eval_openbookqa_pairs_loss": 0.9528454542160034, + "eval_openbookqa_pairs_runtime": 0.6003, + "eval_openbookqa_pairs_samples_per_second": 213.227, + "eval_openbookqa_pairs_steps_per_second": 1.666, + "step": 1460 + }, + { + "epoch": 1.5020576131687244, + "eval_msmarco_pairs_loss": 0.8451707363128662, + "eval_msmarco_pairs_runtime": 1.5256, + "eval_msmarco_pairs_samples_per_second": 83.903, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 1460 + }, + { + "epoch": 1.5020576131687244, + "eval_nq_pairs_loss": 0.9173424243927002, + "eval_nq_pairs_runtime": 2.9158, + "eval_nq_pairs_samples_per_second": 43.899, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 1460 + }, + { + "epoch": 1.5020576131687244, + "eval_trivia_pairs_loss": 0.7778195738792419, + "eval_trivia_pairs_runtime": 3.4419, + "eval_trivia_pairs_samples_per_second": 37.189, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1460 + }, + { + "epoch": 1.5020576131687244, + "eval_gooaq_pairs_loss": 0.4348945915699005, + "eval_gooaq_pairs_runtime": 0.9544, + "eval_gooaq_pairs_samples_per_second": 134.112, + "eval_gooaq_pairs_steps_per_second": 1.048, + "step": 1460 + }, + { + "epoch": 1.5020576131687244, + "eval_paws-pos_loss": 0.020275350660085678, + "eval_paws-pos_runtime": 0.7179, + "eval_paws-pos_samples_per_second": 178.302, + "eval_paws-pos_steps_per_second": 1.393, + "step": 1460 + }, + { + "epoch": 1.5020576131687244, + "eval_global_dataset_loss": 0.49529680609703064, + "eval_global_dataset_runtime": 13.3943, + "eval_global_dataset_samples_per_second": 31.058, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1460 + }, + { + "epoch": 1.5030864197530864, + "grad_norm": 11.088354110717773, + "learning_rate": 3.271194244198541e-05, + "loss": 0.875, + "step": 1461 + }, + { + "epoch": 1.5041152263374484, + "grad_norm": 6.408971786499023, + "learning_rate": 3.2703004741601854e-05, + "loss": 0.2141, + "step": 1462 + }, + { + "epoch": 1.5051440329218106, + "grad_norm": 6.966059684753418, + "learning_rate": 3.2694051524512225e-05, + "loss": 0.2514, + "step": 1463 + }, + { + "epoch": 1.5061728395061729, + "grad_norm": 8.408098220825195, + "learning_rate": 3.268508280554356e-05, + "loss": 0.8124, + "step": 1464 + }, + { + "epoch": 1.507201646090535, + "grad_norm": 8.215058326721191, + "learning_rate": 3.267609859954857e-05, + "loss": 0.4547, + "step": 1465 + }, + { + "epoch": 1.508230452674897, + "grad_norm": 9.22248649597168, + "learning_rate": 3.266709892140561e-05, + "loss": 0.6529, + "step": 1466 + }, + { + "epoch": 1.5092592592592593, + "grad_norm": 0.6787060499191284, + "learning_rate": 3.265808378601864e-05, + "loss": 0.0202, + "step": 1467 + }, + { + "epoch": 1.5102880658436213, + "grad_norm": 5.863772392272949, + "learning_rate": 3.2649053208317254e-05, + "loss": 0.2071, + "step": 1468 + }, + { + "epoch": 1.5113168724279835, + "grad_norm": 8.698347091674805, + "learning_rate": 3.2640007203256586e-05, + "loss": 0.7461, + "step": 1469 + }, + { + "epoch": 1.5123456790123457, + "grad_norm": 10.517783164978027, + "learning_rate": 3.263094578581734e-05, + "loss": 0.908, + "step": 1470 + }, + { + "epoch": 1.513374485596708, + "grad_norm": 6.508285999298096, + "learning_rate": 3.262186897100573e-05, + "loss": 0.2534, + "step": 1471 + }, + { + "epoch": 1.51440329218107, + "grad_norm": 7.264826774597168, + "learning_rate": 3.261277677385348e-05, + "loss": 0.2891, + "step": 1472 + }, + { + "epoch": 1.515432098765432, + "grad_norm": 15.387980461120605, + "learning_rate": 3.260366920941778e-05, + "loss": 2.4869, + "step": 1473 + }, + { + "epoch": 1.5164609053497942, + "grad_norm": 0.7510685324668884, + "learning_rate": 3.2594546292781275e-05, + "loss": 0.0319, + "step": 1474 + }, + { + "epoch": 1.5174897119341564, + "grad_norm": 7.2782182693481445, + "learning_rate": 3.258540803905203e-05, + "loss": 0.355, + "step": 1475 + }, + { + "epoch": 1.5185185185185186, + "grad_norm": 9.58236026763916, + "learning_rate": 3.257625446336351e-05, + "loss": 0.7358, + "step": 1476 + }, + { + "epoch": 1.5195473251028808, + "grad_norm": 10.79014778137207, + "learning_rate": 3.256708558087455e-05, + "loss": 0.8566, + "step": 1477 + }, + { + "epoch": 1.5205761316872428, + "grad_norm": 10.390580177307129, + "learning_rate": 3.255790140676934e-05, + "loss": 0.8171, + "step": 1478 + }, + { + "epoch": 1.5216049382716048, + "grad_norm": 7.168967247009277, + "learning_rate": 3.254870195625741e-05, + "loss": 0.3609, + "step": 1479 + }, + { + "epoch": 1.522633744855967, + "grad_norm": 7.27597188949585, + "learning_rate": 3.253948724457354e-05, + "loss": 0.4223, + "step": 1480 + }, + { + "epoch": 1.522633744855967, + "eval_Qnli-dev_cosine_accuracy": 0.69921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7863754034042358, + "eval_Qnli-dev_cosine_ap": 0.7399947565743645, + "eval_Qnli-dev_cosine_f1": 0.6920289855072463, + "eval_Qnli-dev_cosine_f1_threshold": 0.7507023811340332, + "eval_Qnli-dev_cosine_precision": 0.6044303797468354, + "eval_Qnli-dev_cosine_recall": 0.809322033898305, + "eval_Qnli-dev_dot_accuracy": 0.654296875, + "eval_Qnli-dev_dot_accuracy_threshold": 414.4156494140625, + "eval_Qnli-dev_dot_ap": 0.6567444831077266, + "eval_Qnli-dev_dot_f1": 0.6688102893890675, + "eval_Qnli-dev_dot_f1_threshold": 343.76507568359375, + "eval_Qnli-dev_dot_precision": 0.538860103626943, + "eval_Qnli-dev_dot_recall": 0.8813559322033898, + "eval_Qnli-dev_euclidean_accuracy": 0.70703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.514190673828125, + "eval_Qnli-dev_euclidean_ap": 0.7486153186225739, + "eval_Qnli-dev_euclidean_f1": 0.6978557504873294, + "eval_Qnli-dev_euclidean_f1_threshold": 15.083457946777344, + "eval_Qnli-dev_euclidean_precision": 0.6462093862815884, + "eval_Qnli-dev_euclidean_recall": 0.7584745762711864, + "eval_Qnli-dev_manhattan_accuracy": 0.708984375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 279.00152587890625, + "eval_Qnli-dev_manhattan_ap": 0.7553705353817792, + "eval_Qnli-dev_manhattan_f1": 0.7043795620437957, + "eval_Qnli-dev_manhattan_f1_threshold": 324.96124267578125, + "eval_Qnli-dev_manhattan_precision": 0.6185897435897436, + "eval_Qnli-dev_manhattan_recall": 0.8177966101694916, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 414.4156494140625, + "eval_Qnli-dev_max_ap": 0.7553705353817792, + "eval_Qnli-dev_max_f1": 0.7043795620437957, + "eval_Qnli-dev_max_f1_threshold": 343.76507568359375, + "eval_Qnli-dev_max_precision": 0.6462093862815884, + "eval_Qnli-dev_max_recall": 0.8813559322033898, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8469090461730957, + "eval_allNLI-dev_cosine_ap": 0.6234053979616159, + "eval_allNLI-dev_cosine_f1": 0.6244343891402715, + "eval_allNLI-dev_cosine_f1_threshold": 0.770348072052002, + "eval_allNLI-dev_cosine_precision": 0.5130111524163569, + "eval_allNLI-dev_cosine_recall": 0.7976878612716763, + "eval_allNLI-dev_dot_accuracy": 0.70703125, + "eval_allNLI-dev_dot_accuracy_threshold": 401.94757080078125, + "eval_allNLI-dev_dot_ap": 0.5462802865754883, + "eval_allNLI-dev_dot_f1": 0.5874439461883407, + "eval_allNLI-dev_dot_f1_threshold": 362.9594421386719, + "eval_allNLI-dev_dot_precision": 0.47985347985347987, + "eval_allNLI-dev_dot_recall": 0.7572254335260116, + "eval_allNLI-dev_euclidean_accuracy": 0.732421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.566485404968262, + "eval_allNLI-dev_euclidean_ap": 0.6318132514300728, + "eval_allNLI-dev_euclidean_f1": 0.6414253897550111, + "eval_allNLI-dev_euclidean_f1_threshold": 14.94178581237793, + "eval_allNLI-dev_euclidean_precision": 0.5217391304347826, + "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, + "eval_allNLI-dev_manhattan_accuracy": 0.736328125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 253.82769775390625, + "eval_allNLI-dev_manhattan_ap": 0.6285449979338023, + "eval_allNLI-dev_manhattan_f1": 0.6325167037861915, + "eval_allNLI-dev_manhattan_f1_threshold": 313.50347900390625, + "eval_allNLI-dev_manhattan_precision": 0.5144927536231884, + "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 401.94757080078125, + "eval_allNLI-dev_max_ap": 0.6318132514300728, + "eval_allNLI-dev_max_f1": 0.6414253897550111, + "eval_allNLI-dev_max_f1_threshold": 362.9594421386719, + "eval_allNLI-dev_max_precision": 0.5217391304347826, + "eval_allNLI-dev_max_recall": 0.8323699421965318, + "eval_sequential_score": 0.7553705353817792, + "eval_sts-test_pearson_cosine": 0.8425691617213031, + "eval_sts-test_pearson_dot": 0.8181059364976482, + "eval_sts-test_pearson_euclidean": 0.8746566787614325, + "eval_sts-test_pearson_manhattan": 0.8722050605271279, + "eval_sts-test_pearson_max": 0.8746566787614325, + "eval_sts-test_spearman_cosine": 0.8730949616856577, + "eval_sts-test_spearman_dot": 0.8034795414551055, + "eval_sts-test_spearman_euclidean": 0.8725233733263987, + "eval_sts-test_spearman_manhattan": 0.8695492744943556, + "eval_sts-test_spearman_max": 0.8730949616856577, + "eval_vitaminc-pairs_loss": 3.0648269653320312, + "eval_vitaminc-pairs_runtime": 3.1997, + "eval_vitaminc-pairs_samples_per_second": 40.004, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 1480 + }, + { + "epoch": 1.522633744855967, + "eval_negation-triplets_loss": 0.894192636013031, + "eval_negation-triplets_runtime": 0.7516, + "eval_negation-triplets_samples_per_second": 170.306, + "eval_negation-triplets_steps_per_second": 1.331, + "step": 1480 + }, + { + "epoch": 1.522633744855967, + "eval_scitail-pairs-pos_loss": 0.1328463852405548, + "eval_scitail-pairs-pos_runtime": 0.8819, + "eval_scitail-pairs-pos_samples_per_second": 145.138, + "eval_scitail-pairs-pos_steps_per_second": 1.134, + "step": 1480 + }, + { + "epoch": 1.522633744855967, + "eval_scitail-pairs-qa_loss": 0.0014575115637853742, + "eval_scitail-pairs-qa_runtime": 0.6089, + "eval_scitail-pairs-qa_samples_per_second": 210.199, + "eval_scitail-pairs-qa_steps_per_second": 1.642, + "step": 1480 + }, + { + "epoch": 1.522633744855967, + "eval_xsum-pairs_loss": 0.35409626364707947, + "eval_xsum-pairs_runtime": 3.02, + "eval_xsum-pairs_samples_per_second": 42.385, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 1480 + }, + { + "epoch": 1.522633744855967, + "eval_sciq_pairs_loss": 0.09546427428722382, + "eval_sciq_pairs_runtime": 3.4916, + "eval_sciq_pairs_samples_per_second": 36.659, + "eval_sciq_pairs_steps_per_second": 0.286, + "step": 1480 + }, + { + "epoch": 1.522633744855967, + "eval_qasc_pairs_loss": 0.2151322215795517, + "eval_qasc_pairs_runtime": 0.6095, + "eval_qasc_pairs_samples_per_second": 209.999, + "eval_qasc_pairs_steps_per_second": 1.641, + "step": 1480 + }, + { + "epoch": 1.522633744855967, + "eval_openbookqa_pairs_loss": 1.0014817714691162, + "eval_openbookqa_pairs_runtime": 0.5892, + "eval_openbookqa_pairs_samples_per_second": 217.251, + "eval_openbookqa_pairs_steps_per_second": 1.697, + "step": 1480 + }, + { + "epoch": 1.522633744855967, + "eval_msmarco_pairs_loss": 0.8051604628562927, + "eval_msmarco_pairs_runtime": 1.5278, + "eval_msmarco_pairs_samples_per_second": 83.782, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 1480 + }, + { + "epoch": 1.522633744855967, + "eval_nq_pairs_loss": 0.8040265440940857, + "eval_nq_pairs_runtime": 2.916, + "eval_nq_pairs_samples_per_second": 43.896, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 1480 + }, + { + "epoch": 1.522633744855967, + "eval_trivia_pairs_loss": 0.7348816990852356, + "eval_trivia_pairs_runtime": 3.4443, + "eval_trivia_pairs_samples_per_second": 37.163, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1480 + }, + { + "epoch": 1.522633744855967, + "eval_gooaq_pairs_loss": 0.4094505310058594, + "eval_gooaq_pairs_runtime": 0.9516, + "eval_gooaq_pairs_samples_per_second": 134.512, + "eval_gooaq_pairs_steps_per_second": 1.051, + "step": 1480 + }, + { + "epoch": 1.522633744855967, + "eval_paws-pos_loss": 0.020600441843271255, + "eval_paws-pos_runtime": 0.6972, + "eval_paws-pos_samples_per_second": 183.597, + "eval_paws-pos_steps_per_second": 1.434, + "step": 1480 + }, + { + "epoch": 1.522633744855967, + "eval_global_dataset_loss": 0.4791772663593292, + "eval_global_dataset_runtime": 13.3943, + "eval_global_dataset_samples_per_second": 31.058, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1480 + }, + { + "epoch": 1.5236625514403292, + "grad_norm": 5.952062606811523, + "learning_rate": 3.253025728697784e-05, + "loss": 0.2155, + "step": 1481 + }, + { + "epoch": 1.5246913580246915, + "grad_norm": 6.088840961456299, + "learning_rate": 3.252101209875562e-05, + "loss": 0.3097, + "step": 1482 + }, + { + "epoch": 1.5257201646090535, + "grad_norm": 6.238129138946533, + "learning_rate": 3.251175169521745e-05, + "loss": 0.3738, + "step": 1483 + }, + { + "epoch": 1.5267489711934157, + "grad_norm": 7.6868367195129395, + "learning_rate": 3.250247609169908e-05, + "loss": 0.5758, + "step": 1484 + }, + { + "epoch": 1.5277777777777777, + "grad_norm": 11.03587818145752, + "learning_rate": 3.249318530356143e-05, + "loss": 0.7701, + "step": 1485 + }, + { + "epoch": 1.52880658436214, + "grad_norm": 12.658851623535156, + "learning_rate": 3.248387934619058e-05, + "loss": 0.9936, + "step": 1486 + }, + { + "epoch": 1.5298353909465021, + "grad_norm": 4.862097263336182, + "learning_rate": 3.2474558234997705e-05, + "loss": 0.1421, + "step": 1487 + }, + { + "epoch": 1.5308641975308643, + "grad_norm": 6.787068843841553, + "learning_rate": 3.246522198541911e-05, + "loss": 0.2403, + "step": 1488 + }, + { + "epoch": 1.5318930041152263, + "grad_norm": 6.545001029968262, + "learning_rate": 3.245587061291615e-05, + "loss": 0.261, + "step": 1489 + }, + { + "epoch": 1.5329218106995883, + "grad_norm": 4.37895393371582, + "learning_rate": 3.2446504132975214e-05, + "loss": 0.1273, + "step": 1490 + }, + { + "epoch": 1.5339506172839505, + "grad_norm": 7.094578266143799, + "learning_rate": 3.2437122561107735e-05, + "loss": 0.2913, + "step": 1491 + }, + { + "epoch": 1.5349794238683128, + "grad_norm": 10.808570861816406, + "learning_rate": 3.242772591285012e-05, + "loss": 0.6979, + "step": 1492 + }, + { + "epoch": 1.536008230452675, + "grad_norm": 5.151909828186035, + "learning_rate": 3.241831420376376e-05, + "loss": 0.1969, + "step": 1493 + }, + { + "epoch": 1.5370370370370372, + "grad_norm": 6.880885601043701, + "learning_rate": 3.240888744943497e-05, + "loss": 0.3544, + "step": 1494 + }, + { + "epoch": 1.5380658436213992, + "grad_norm": 9.509146690368652, + "learning_rate": 3.239944566547499e-05, + "loss": 0.6795, + "step": 1495 + }, + { + "epoch": 1.5390946502057612, + "grad_norm": 13.863608360290527, + "learning_rate": 3.2389988867519944e-05, + "loss": 2.128, + "step": 1496 + }, + { + "epoch": 1.5401234567901234, + "grad_norm": 6.650979042053223, + "learning_rate": 3.238051707123084e-05, + "loss": 0.3413, + "step": 1497 + }, + { + "epoch": 1.5411522633744856, + "grad_norm": 5.316281318664551, + "learning_rate": 3.2371030292293493e-05, + "loss": 0.2705, + "step": 1498 + }, + { + "epoch": 1.5421810699588478, + "grad_norm": 7.931695938110352, + "learning_rate": 3.236152854641855e-05, + "loss": 0.3392, + "step": 1499 + }, + { + "epoch": 1.5432098765432098, + "grad_norm": 7.471953392028809, + "learning_rate": 3.235201184934143e-05, + "loss": 0.5524, + "step": 1500 + }, + { + "epoch": 1.5432098765432098, + "eval_Qnli-dev_cosine_accuracy": 0.701171875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8042779564857483, + "eval_Qnli-dev_cosine_ap": 0.7534214412055271, + "eval_Qnli-dev_cosine_f1": 0.7045454545454546, + "eval_Qnli-dev_cosine_f1_threshold": 0.7383867502212524, + "eval_Qnli-dev_cosine_precision": 0.636986301369863, + "eval_Qnli-dev_cosine_recall": 0.788135593220339, + "eval_Qnli-dev_dot_accuracy": 0.6875, + "eval_Qnli-dev_dot_accuracy_threshold": 369.844482421875, + "eval_Qnli-dev_dot_ap": 0.6895081600020698, + "eval_Qnli-dev_dot_f1": 0.6854304635761589, + "eval_Qnli-dev_dot_f1_threshold": 318.4272766113281, + "eval_Qnli-dev_dot_precision": 0.5625, + "eval_Qnli-dev_dot_recall": 0.8771186440677966, + "eval_Qnli-dev_euclidean_accuracy": 0.7109375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.158506393432617, + "eval_Qnli-dev_euclidean_ap": 0.7622707519701521, + "eval_Qnli-dev_euclidean_f1": 0.7071823204419889, + "eval_Qnli-dev_euclidean_f1_threshold": 15.887628555297852, + "eval_Qnli-dev_euclidean_precision": 0.6254071661237784, + "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, + "eval_Qnli-dev_manhattan_accuracy": 0.7109375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 293.96429443359375, + "eval_Qnli-dev_manhattan_ap": 0.7650563696239733, + "eval_Qnli-dev_manhattan_f1": 0.707635009310987, + "eval_Qnli-dev_manhattan_f1_threshold": 330.70098876953125, + "eval_Qnli-dev_manhattan_precision": 0.6312292358803987, + "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, + "eval_Qnli-dev_max_accuracy": 0.7109375, + "eval_Qnli-dev_max_accuracy_threshold": 369.844482421875, + "eval_Qnli-dev_max_ap": 0.7650563696239733, + "eval_Qnli-dev_max_f1": 0.707635009310987, + "eval_Qnli-dev_max_f1_threshold": 330.70098876953125, + "eval_Qnli-dev_max_precision": 0.636986301369863, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.724609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8823579549789429, + "eval_allNLI-dev_cosine_ap": 0.6215769808743192, + "eval_allNLI-dev_cosine_f1": 0.6238532110091742, + "eval_allNLI-dev_cosine_f1_threshold": 0.7631572484970093, + "eval_allNLI-dev_cosine_precision": 0.5171102661596958, + "eval_allNLI-dev_cosine_recall": 0.7861271676300579, + "eval_allNLI-dev_dot_accuracy": 0.69921875, + "eval_allNLI-dev_dot_accuracy_threshold": 381.78656005859375, + "eval_allNLI-dev_dot_ap": 0.552089695804951, + "eval_allNLI-dev_dot_f1": 0.5856832971800434, + "eval_allNLI-dev_dot_f1_threshold": 337.59368896484375, + "eval_allNLI-dev_dot_precision": 0.46875, + "eval_allNLI-dev_dot_recall": 0.7803468208092486, + "eval_allNLI-dev_euclidean_accuracy": 0.73046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.53109073638916, + "eval_allNLI-dev_euclidean_ap": 0.6288347771169971, + "eval_allNLI-dev_euclidean_f1": 0.6330275229357799, + "eval_allNLI-dev_euclidean_f1_threshold": 14.756261825561523, + "eval_allNLI-dev_euclidean_precision": 0.5247148288973384, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.736328125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 246.92401123046875, + "eval_allNLI-dev_manhattan_ap": 0.6264506464143021, + "eval_allNLI-dev_manhattan_f1": 0.6279069767441859, + "eval_allNLI-dev_manhattan_f1_threshold": 307.99139404296875, + "eval_allNLI-dev_manhattan_precision": 0.5252918287937743, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 381.78656005859375, + "eval_allNLI-dev_max_ap": 0.6288347771169971, + "eval_allNLI-dev_max_f1": 0.6330275229357799, + "eval_allNLI-dev_max_f1_threshold": 337.59368896484375, + "eval_allNLI-dev_max_precision": 0.5252918287937743, + "eval_allNLI-dev_max_recall": 0.7976878612716763, + "eval_sequential_score": 0.7650563696239733, + "eval_sts-test_pearson_cosine": 0.8428199951652109, + "eval_sts-test_pearson_dot": 0.8277261750061927, + "eval_sts-test_pearson_euclidean": 0.8730646316048926, + "eval_sts-test_pearson_manhattan": 0.8707927459120908, + "eval_sts-test_pearson_max": 0.8730646316048926, + "eval_sts-test_spearman_cosine": 0.8720606257811182, + "eval_sts-test_spearman_dot": 0.8119718988268286, + "eval_sts-test_spearman_euclidean": 0.8701647833435331, + "eval_sts-test_spearman_manhattan": 0.8675766388922228, + "eval_sts-test_spearman_max": 0.8720606257811182, + "eval_vitaminc-pairs_loss": 2.891284704208374, + "eval_vitaminc-pairs_runtime": 3.217, + "eval_vitaminc-pairs_samples_per_second": 39.789, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 1500 + }, + { + "epoch": 1.5432098765432098, + "eval_negation-triplets_loss": 0.9080420136451721, + "eval_negation-triplets_runtime": 0.7506, + "eval_negation-triplets_samples_per_second": 170.539, + "eval_negation-triplets_steps_per_second": 1.332, + "step": 1500 + }, + { + "epoch": 1.5432098765432098, + "eval_scitail-pairs-pos_loss": 0.1330765038728714, + "eval_scitail-pairs-pos_runtime": 0.867, + "eval_scitail-pairs-pos_samples_per_second": 147.634, + "eval_scitail-pairs-pos_steps_per_second": 1.153, + "step": 1500 + }, + { + "epoch": 1.5432098765432098, + "eval_scitail-pairs-qa_loss": 0.0014549298211932182, + "eval_scitail-pairs-qa_runtime": 0.5915, + "eval_scitail-pairs-qa_samples_per_second": 216.389, + "eval_scitail-pairs-qa_steps_per_second": 1.691, + "step": 1500 + }, + { + "epoch": 1.5432098765432098, + "eval_xsum-pairs_loss": 0.2871630787849426, + "eval_xsum-pairs_runtime": 3.021, + "eval_xsum-pairs_samples_per_second": 42.37, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 1500 + }, + { + "epoch": 1.5432098765432098, + "eval_sciq_pairs_loss": 0.10170305520296097, + "eval_sciq_pairs_runtime": 3.5237, + "eval_sciq_pairs_samples_per_second": 36.326, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 1500 + }, + { + "epoch": 1.5432098765432098, + "eval_qasc_pairs_loss": 0.2138214260339737, + "eval_qasc_pairs_runtime": 0.6188, + "eval_qasc_pairs_samples_per_second": 206.842, + "eval_qasc_pairs_steps_per_second": 1.616, + "step": 1500 + }, + { + "epoch": 1.5432098765432098, + "eval_openbookqa_pairs_loss": 0.9816868305206299, + "eval_openbookqa_pairs_runtime": 0.5902, + "eval_openbookqa_pairs_samples_per_second": 216.862, + "eval_openbookqa_pairs_steps_per_second": 1.694, + "step": 1500 + }, + { + "epoch": 1.5432098765432098, + "eval_msmarco_pairs_loss": 0.8624980449676514, + "eval_msmarco_pairs_runtime": 1.5196, + "eval_msmarco_pairs_samples_per_second": 84.233, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 1500 + }, + { + "epoch": 1.5432098765432098, + "eval_nq_pairs_loss": 0.8289986252784729, + "eval_nq_pairs_runtime": 2.9042, + "eval_nq_pairs_samples_per_second": 44.074, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1500 + }, + { + "epoch": 1.5432098765432098, + "eval_trivia_pairs_loss": 0.7315549850463867, + "eval_trivia_pairs_runtime": 3.4425, + "eval_trivia_pairs_samples_per_second": 37.182, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1500 + }, + { + "epoch": 1.5432098765432098, + "eval_gooaq_pairs_loss": 0.41331881284713745, + "eval_gooaq_pairs_runtime": 0.9526, + "eval_gooaq_pairs_samples_per_second": 134.373, + "eval_gooaq_pairs_steps_per_second": 1.05, + "step": 1500 + }, + { + "epoch": 1.5432098765432098, + "eval_paws-pos_loss": 0.021952003240585327, + "eval_paws-pos_runtime": 0.725, + "eval_paws-pos_samples_per_second": 176.558, + "eval_paws-pos_steps_per_second": 1.379, + "step": 1500 + }, + { + "epoch": 1.5432098765432098, + "eval_global_dataset_loss": 0.4588969647884369, + "eval_global_dataset_runtime": 13.4076, + "eval_global_dataset_samples_per_second": 31.027, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1500 + }, + { + "epoch": 1.544238683127572, + "grad_norm": 8.104286193847656, + "learning_rate": 3.234248021682235e-05, + "loss": 0.3681, + "step": 1501 + }, + { + "epoch": 1.545267489711934, + "grad_norm": 7.033088207244873, + "learning_rate": 3.233293366464621e-05, + "loss": 0.3433, + "step": 1502 + }, + { + "epoch": 1.5462962962962963, + "grad_norm": 7.00164794921875, + "learning_rate": 3.232337220862264e-05, + "loss": 0.3047, + "step": 1503 + }, + { + "epoch": 1.5473251028806585, + "grad_norm": 5.222539901733398, + "learning_rate": 3.2313795864585965e-05, + "loss": 0.2738, + "step": 1504 + }, + { + "epoch": 1.5483539094650207, + "grad_norm": 5.596692085266113, + "learning_rate": 3.230420464839515e-05, + "loss": 0.2694, + "step": 1505 + }, + { + "epoch": 1.5493827160493827, + "grad_norm": 1.1960200071334839, + "learning_rate": 3.229459857593377e-05, + "loss": 0.0264, + "step": 1506 + }, + { + "epoch": 1.5504115226337447, + "grad_norm": 4.646385669708252, + "learning_rate": 3.228497766311006e-05, + "loss": 0.1286, + "step": 1507 + }, + { + "epoch": 1.551440329218107, + "grad_norm": 12.591828346252441, + "learning_rate": 3.227534192585677e-05, + "loss": 1.102, + "step": 1508 + }, + { + "epoch": 1.5524691358024691, + "grad_norm": 5.408351898193359, + "learning_rate": 3.2265691380131236e-05, + "loss": 0.1698, + "step": 1509 + }, + { + "epoch": 1.5534979423868314, + "grad_norm": 7.685009956359863, + "learning_rate": 3.2256026041915305e-05, + "loss": 0.4219, + "step": 1510 + }, + { + "epoch": 1.5545267489711934, + "grad_norm": 6.658691883087158, + "learning_rate": 3.224634592721533e-05, + "loss": 0.2761, + "step": 1511 + }, + { + "epoch": 1.5555555555555556, + "grad_norm": 6.750312328338623, + "learning_rate": 3.2236651052062116e-05, + "loss": 0.2484, + "step": 1512 + }, + { + "epoch": 1.5565843621399176, + "grad_norm": 6.82509708404541, + "learning_rate": 3.222694143251094e-05, + "loss": 0.3063, + "step": 1513 + }, + { + "epoch": 1.5576131687242798, + "grad_norm": 0.0743848979473114, + "learning_rate": 3.221721708464147e-05, + "loss": 0.0011, + "step": 1514 + }, + { + "epoch": 1.558641975308642, + "grad_norm": 5.526226997375488, + "learning_rate": 3.220747802455778e-05, + "loss": 0.1608, + "step": 1515 + }, + { + "epoch": 1.5596707818930042, + "grad_norm": 4.90157413482666, + "learning_rate": 3.219772426838831e-05, + "loss": 0.1707, + "step": 1516 + }, + { + "epoch": 1.5606995884773662, + "grad_norm": 9.953004837036133, + "learning_rate": 3.218795583228583e-05, + "loss": 0.6489, + "step": 1517 + }, + { + "epoch": 1.5617283950617284, + "grad_norm": 0.38476648926734924, + "learning_rate": 3.217817273242741e-05, + "loss": 0.0064, + "step": 1518 + }, + { + "epoch": 1.5627572016460904, + "grad_norm": 5.67715311050415, + "learning_rate": 3.2168374985014436e-05, + "loss": 0.1734, + "step": 1519 + }, + { + "epoch": 1.5637860082304527, + "grad_norm": 4.921224594116211, + "learning_rate": 3.215856260627252e-05, + "loss": 0.1014, + "step": 1520 + }, + { + "epoch": 1.5637860082304527, + "eval_Qnli-dev_cosine_accuracy": 0.703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8030712604522705, + "eval_Qnli-dev_cosine_ap": 0.7546592106407485, + "eval_Qnli-dev_cosine_f1": 0.7061068702290078, + "eval_Qnli-dev_cosine_f1_threshold": 0.7434631586074829, + "eval_Qnli-dev_cosine_precision": 0.6423611111111112, + "eval_Qnli-dev_cosine_recall": 0.7838983050847458, + "eval_Qnli-dev_dot_accuracy": 0.6640625, + "eval_Qnli-dev_dot_accuracy_threshold": 383.8763732910156, + "eval_Qnli-dev_dot_ap": 0.6833576985556973, + "eval_Qnli-dev_dot_f1": 0.6830870279146142, + "eval_Qnli-dev_dot_f1_threshold": 318.1232604980469, + "eval_Qnli-dev_dot_precision": 0.5576407506702413, + "eval_Qnli-dev_dot_recall": 0.8813559322033898, + "eval_Qnli-dev_euclidean_accuracy": 0.708984375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.525314331054688, + "eval_Qnli-dev_euclidean_ap": 0.7628895341943671, + "eval_Qnli-dev_euclidean_f1": 0.7097966728280962, + "eval_Qnli-dev_euclidean_f1_threshold": 15.842670440673828, + "eval_Qnli-dev_euclidean_precision": 0.6295081967213115, + "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, + "eval_Qnli-dev_manhattan_accuracy": 0.720703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 303.1654052734375, + "eval_Qnli-dev_manhattan_ap": 0.7657337153714383, + "eval_Qnli-dev_manhattan_f1": 0.7077464788732394, + "eval_Qnli-dev_manhattan_f1_threshold": 343.8772277832031, + "eval_Qnli-dev_manhattan_precision": 0.6054216867469879, + "eval_Qnli-dev_manhattan_recall": 0.8516949152542372, + "eval_Qnli-dev_max_accuracy": 0.720703125, + "eval_Qnli-dev_max_accuracy_threshold": 383.8763732910156, + "eval_Qnli-dev_max_ap": 0.7657337153714383, + "eval_Qnli-dev_max_f1": 0.7097966728280962, + "eval_Qnli-dev_max_f1_threshold": 343.8772277832031, + "eval_Qnli-dev_max_precision": 0.6423611111111112, + "eval_Qnli-dev_max_recall": 0.8813559322033898, + "eval_allNLI-dev_cosine_accuracy": 0.7265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8233051300048828, + "eval_allNLI-dev_cosine_ap": 0.6182898470984927, + "eval_allNLI-dev_cosine_f1": 0.6211764705882353, + "eval_allNLI-dev_cosine_f1_threshold": 0.7539602518081665, + "eval_allNLI-dev_cosine_precision": 0.5238095238095238, + "eval_allNLI-dev_cosine_recall": 0.7630057803468208, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 390.0309753417969, + "eval_allNLI-dev_dot_ap": 0.5474329580109125, + "eval_allNLI-dev_dot_f1": 0.5889830508474576, + "eval_allNLI-dev_dot_f1_threshold": 319.5199890136719, + "eval_allNLI-dev_dot_precision": 0.46488294314381273, + "eval_allNLI-dev_dot_recall": 0.8034682080924855, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.811637878417969, + "eval_allNLI-dev_euclidean_ap": 0.629218327624065, + "eval_allNLI-dev_euclidean_f1": 0.6280193236714975, + "eval_allNLI-dev_euclidean_f1_threshold": 14.535062789916992, + "eval_allNLI-dev_euclidean_precision": 0.5394190871369294, + "eval_allNLI-dev_euclidean_recall": 0.7514450867052023, + "eval_allNLI-dev_manhattan_accuracy": 0.728515625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 257.43560791015625, + "eval_allNLI-dev_manhattan_ap": 0.6239453551794301, + "eval_allNLI-dev_manhattan_f1": 0.625, + "eval_allNLI-dev_manhattan_f1_threshold": 312.9381103515625, + "eval_allNLI-dev_manhattan_precision": 0.5212355212355212, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 390.0309753417969, + "eval_allNLI-dev_max_ap": 0.629218327624065, + "eval_allNLI-dev_max_f1": 0.6280193236714975, + "eval_allNLI-dev_max_f1_threshold": 319.5199890136719, + "eval_allNLI-dev_max_precision": 0.5394190871369294, + "eval_allNLI-dev_max_recall": 0.8034682080924855, + "eval_sequential_score": 0.7657337153714383, + "eval_sts-test_pearson_cosine": 0.8447395315874453, + "eval_sts-test_pearson_dot": 0.8347651049046418, + "eval_sts-test_pearson_euclidean": 0.8708582861671369, + "eval_sts-test_pearson_manhattan": 0.8686665949434926, + "eval_sts-test_pearson_max": 0.8708582861671369, + "eval_sts-test_spearman_cosine": 0.8700124500497567, + "eval_sts-test_spearman_dot": 0.8272195486081061, + "eval_sts-test_spearman_euclidean": 0.8661788779939239, + "eval_sts-test_spearman_manhattan": 0.8634727692175859, + "eval_sts-test_spearman_max": 0.8700124500497567, + "eval_vitaminc-pairs_loss": 3.005648612976074, + "eval_vitaminc-pairs_runtime": 3.2014, + "eval_vitaminc-pairs_samples_per_second": 39.982, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 1520 + }, + { + "epoch": 1.5637860082304527, + "eval_negation-triplets_loss": 0.9295395612716675, + "eval_negation-triplets_runtime": 0.7502, + "eval_negation-triplets_samples_per_second": 170.611, + "eval_negation-triplets_steps_per_second": 1.333, + "step": 1520 + }, + { + "epoch": 1.5637860082304527, + "eval_scitail-pairs-pos_loss": 0.13157592713832855, + "eval_scitail-pairs-pos_runtime": 0.8736, + "eval_scitail-pairs-pos_samples_per_second": 146.513, + "eval_scitail-pairs-pos_steps_per_second": 1.145, + "step": 1520 + }, + { + "epoch": 1.5637860082304527, + "eval_scitail-pairs-qa_loss": 0.0016492550494149327, + "eval_scitail-pairs-qa_runtime": 0.5929, + "eval_scitail-pairs-qa_samples_per_second": 215.894, + "eval_scitail-pairs-qa_steps_per_second": 1.687, + "step": 1520 + }, + { + "epoch": 1.5637860082304527, + "eval_xsum-pairs_loss": 0.2971457839012146, + "eval_xsum-pairs_runtime": 3.0288, + "eval_xsum-pairs_samples_per_second": 42.26, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1520 + }, + { + "epoch": 1.5637860082304527, + "eval_sciq_pairs_loss": 0.09775245934724808, + "eval_sciq_pairs_runtime": 3.4879, + "eval_sciq_pairs_samples_per_second": 36.699, + "eval_sciq_pairs_steps_per_second": 0.287, + "step": 1520 + }, + { + "epoch": 1.5637860082304527, + "eval_qasc_pairs_loss": 0.20654577016830444, + "eval_qasc_pairs_runtime": 0.6156, + "eval_qasc_pairs_samples_per_second": 207.919, + "eval_qasc_pairs_steps_per_second": 1.624, + "step": 1520 + }, + { + "epoch": 1.5637860082304527, + "eval_openbookqa_pairs_loss": 0.9547919034957886, + "eval_openbookqa_pairs_runtime": 0.5918, + "eval_openbookqa_pairs_samples_per_second": 216.287, + "eval_openbookqa_pairs_steps_per_second": 1.69, + "step": 1520 + }, + { + "epoch": 1.5637860082304527, + "eval_msmarco_pairs_loss": 0.828711748123169, + "eval_msmarco_pairs_runtime": 1.5226, + "eval_msmarco_pairs_samples_per_second": 84.067, + "eval_msmarco_pairs_steps_per_second": 0.657, + "step": 1520 + }, + { + "epoch": 1.5637860082304527, + "eval_nq_pairs_loss": 0.8327388167381287, + "eval_nq_pairs_runtime": 2.8978, + "eval_nq_pairs_samples_per_second": 44.171, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1520 + }, + { + "epoch": 1.5637860082304527, + "eval_trivia_pairs_loss": 0.7141972780227661, + "eval_trivia_pairs_runtime": 3.4447, + "eval_trivia_pairs_samples_per_second": 37.159, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1520 + }, + { + "epoch": 1.5637860082304527, + "eval_gooaq_pairs_loss": 0.38889452815055847, + "eval_gooaq_pairs_runtime": 0.9614, + "eval_gooaq_pairs_samples_per_second": 133.139, + "eval_gooaq_pairs_steps_per_second": 1.04, + "step": 1520 + }, + { + "epoch": 1.5637860082304527, + "eval_paws-pos_loss": 0.021532831713557243, + "eval_paws-pos_runtime": 0.706, + "eval_paws-pos_samples_per_second": 181.303, + "eval_paws-pos_steps_per_second": 1.416, + "step": 1520 + }, + { + "epoch": 1.5637860082304527, + "eval_global_dataset_loss": 0.4648805856704712, + "eval_global_dataset_runtime": 13.4317, + "eval_global_dataset_samples_per_second": 30.972, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1520 + }, + { + "epoch": 1.5648148148148149, + "grad_norm": 8.67354679107666, + "learning_rate": 3.2148735612451525e-05, + "loss": 0.4432, + "step": 1521 + }, + { + "epoch": 1.565843621399177, + "grad_norm": 6.336428642272949, + "learning_rate": 3.21388940198255e-05, + "loss": 0.2337, + "step": 1522 + }, + { + "epoch": 1.566872427983539, + "grad_norm": 0.8628045320510864, + "learning_rate": 3.212903784469268e-05, + "loss": 0.0261, + "step": 1523 + }, + { + "epoch": 1.567901234567901, + "grad_norm": 7.230187892913818, + "learning_rate": 3.2119167103375464e-05, + "loss": 0.4077, + "step": 1524 + }, + { + "epoch": 1.5689300411522633, + "grad_norm": 6.065439224243164, + "learning_rate": 3.2109281812220336e-05, + "loss": 0.2346, + "step": 1525 + }, + { + "epoch": 1.5699588477366255, + "grad_norm": 8.417454719543457, + "learning_rate": 3.20993819875979e-05, + "loss": 0.5197, + "step": 1526 + }, + { + "epoch": 1.5709876543209877, + "grad_norm": 8.803533554077148, + "learning_rate": 3.208946764590285e-05, + "loss": 0.4848, + "step": 1527 + }, + { + "epoch": 1.5720164609053497, + "grad_norm": 8.672576904296875, + "learning_rate": 3.207953880355387e-05, + "loss": 0.4645, + "step": 1528 + }, + { + "epoch": 1.573045267489712, + "grad_norm": 7.9989399909973145, + "learning_rate": 3.2069595476993704e-05, + "loss": 0.5898, + "step": 1529 + }, + { + "epoch": 1.574074074074074, + "grad_norm": 7.975038528442383, + "learning_rate": 3.205963768268905e-05, + "loss": 0.4887, + "step": 1530 + }, + { + "epoch": 1.5751028806584362, + "grad_norm": 6.674964427947998, + "learning_rate": 3.204966543713058e-05, + "loss": 0.2801, + "step": 1531 + }, + { + "epoch": 1.5761316872427984, + "grad_norm": 11.67644214630127, + "learning_rate": 3.20396787568329e-05, + "loss": 0.8622, + "step": 1532 + }, + { + "epoch": 1.5771604938271606, + "grad_norm": 3.8542089462280273, + "learning_rate": 3.2029677658334525e-05, + "loss": 0.1503, + "step": 1533 + }, + { + "epoch": 1.5781893004115226, + "grad_norm": 10.990008354187012, + "learning_rate": 3.2019662158197833e-05, + "loss": 0.7369, + "step": 1534 + }, + { + "epoch": 1.5792181069958846, + "grad_norm": 6.23648738861084, + "learning_rate": 3.200963227300905e-05, + "loss": 0.2353, + "step": 1535 + }, + { + "epoch": 1.5802469135802468, + "grad_norm": 5.542776584625244, + "learning_rate": 3.1999588019378255e-05, + "loss": 0.2101, + "step": 1536 + }, + { + "epoch": 1.581275720164609, + "grad_norm": 9.987077713012695, + "learning_rate": 3.1989529413939284e-05, + "loss": 0.7077, + "step": 1537 + }, + { + "epoch": 1.5823045267489713, + "grad_norm": 3.745037794113159, + "learning_rate": 3.197945647334976e-05, + "loss": 0.2138, + "step": 1538 + }, + { + "epoch": 1.5833333333333335, + "grad_norm": 5.842042922973633, + "learning_rate": 3.1969369214291036e-05, + "loss": 0.1992, + "step": 1539 + }, + { + "epoch": 1.5843621399176955, + "grad_norm": 9.523786544799805, + "learning_rate": 3.1959267653468206e-05, + "loss": 0.64, + "step": 1540 + }, + { + "epoch": 1.5843621399176955, + "eval_Qnli-dev_cosine_accuracy": 0.703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7817473411560059, + "eval_Qnli-dev_cosine_ap": 0.7410033870456724, + "eval_Qnli-dev_cosine_f1": 0.7069271758436945, + "eval_Qnli-dev_cosine_f1_threshold": 0.737807035446167, + "eval_Qnli-dev_cosine_precision": 0.6085626911314985, + "eval_Qnli-dev_cosine_recall": 0.8432203389830508, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 388.49591064453125, + "eval_Qnli-dev_dot_ap": 0.6689207865792897, + "eval_Qnli-dev_dot_f1": 0.6719242902208202, + "eval_Qnli-dev_dot_f1_threshold": 318.5028076171875, + "eval_Qnli-dev_dot_precision": 0.535175879396985, + "eval_Qnli-dev_dot_recall": 0.902542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.708984375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.337549209594727, + "eval_Qnli-dev_euclidean_ap": 0.7524832422833609, + "eval_Qnli-dev_euclidean_f1": 0.7078039927404719, + "eval_Qnli-dev_euclidean_f1_threshold": 15.774192810058594, + "eval_Qnli-dev_euclidean_precision": 0.6190476190476191, + "eval_Qnli-dev_euclidean_recall": 0.826271186440678, + "eval_Qnli-dev_manhattan_accuracy": 0.703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 297.94952392578125, + "eval_Qnli-dev_manhattan_ap": 0.7542425288738073, + "eval_Qnli-dev_manhattan_f1": 0.7071428571428572, + "eval_Qnli-dev_manhattan_f1_threshold": 335.511474609375, + "eval_Qnli-dev_manhattan_precision": 0.6111111111111112, + "eval_Qnli-dev_manhattan_recall": 0.8389830508474576, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 388.49591064453125, + "eval_Qnli-dev_max_ap": 0.7542425288738073, + "eval_Qnli-dev_max_f1": 0.7078039927404719, + "eval_Qnli-dev_max_f1_threshold": 335.511474609375, + "eval_Qnli-dev_max_precision": 0.6190476190476191, + "eval_Qnli-dev_max_recall": 0.902542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8608343005180359, + "eval_allNLI-dev_cosine_ap": 0.6267179476142488, + "eval_allNLI-dev_cosine_f1": 0.6272727272727273, + "eval_allNLI-dev_cosine_f1_threshold": 0.7647356986999512, + "eval_allNLI-dev_cosine_precision": 0.5168539325842697, + "eval_allNLI-dev_cosine_recall": 0.7976878612716763, + "eval_allNLI-dev_dot_accuracy": 0.701171875, + "eval_allNLI-dev_dot_accuracy_threshold": 421.30963134765625, + "eval_allNLI-dev_dot_ap": 0.556836107395606, + "eval_allNLI-dev_dot_f1": 0.5921325051759835, + "eval_allNLI-dev_dot_f1_threshold": 335.15948486328125, + "eval_allNLI-dev_dot_precision": 0.4612903225806452, + "eval_allNLI-dev_dot_recall": 0.8265895953757225, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.202301025390625, + "eval_allNLI-dev_euclidean_ap": 0.6344296636986556, + "eval_allNLI-dev_euclidean_f1": 0.6367924528301887, + "eval_allNLI-dev_euclidean_f1_threshold": 14.496380805969238, + "eval_allNLI-dev_euclidean_precision": 0.5378486055776892, + "eval_allNLI-dev_euclidean_recall": 0.7803468208092486, + "eval_allNLI-dev_manhattan_accuracy": 0.728515625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 229.4099578857422, + "eval_allNLI-dev_manhattan_ap": 0.6308141349336275, + "eval_allNLI-dev_manhattan_f1": 0.6301369863013698, + "eval_allNLI-dev_manhattan_f1_threshold": 308.3590087890625, + "eval_allNLI-dev_manhattan_precision": 0.5207547169811321, + "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 421.30963134765625, + "eval_allNLI-dev_max_ap": 0.6344296636986556, + "eval_allNLI-dev_max_f1": 0.6367924528301887, + "eval_allNLI-dev_max_f1_threshold": 335.15948486328125, + "eval_allNLI-dev_max_precision": 0.5378486055776892, + "eval_allNLI-dev_max_recall": 0.8265895953757225, + "eval_sequential_score": 0.7542425288738073, + "eval_sts-test_pearson_cosine": 0.8339125795484035, + "eval_sts-test_pearson_dot": 0.8201875759121224, + "eval_sts-test_pearson_euclidean": 0.8658559476640173, + "eval_sts-test_pearson_manhattan": 0.8635546610585563, + "eval_sts-test_pearson_max": 0.8658559476640173, + "eval_sts-test_spearman_cosine": 0.8690907978214008, + "eval_sts-test_spearman_dot": 0.8219585008164104, + "eval_sts-test_spearman_euclidean": 0.8649441011896124, + "eval_sts-test_spearman_manhattan": 0.8623342523765273, + "eval_sts-test_spearman_max": 0.8690907978214008, + "eval_vitaminc-pairs_loss": 3.196216344833374, + "eval_vitaminc-pairs_runtime": 3.208, + "eval_vitaminc-pairs_samples_per_second": 39.9, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 1540 + }, + { + "epoch": 1.5843621399176955, + "eval_negation-triplets_loss": 0.9201185703277588, + "eval_negation-triplets_runtime": 0.7623, + "eval_negation-triplets_samples_per_second": 167.909, + "eval_negation-triplets_steps_per_second": 1.312, + "step": 1540 + }, + { + "epoch": 1.5843621399176955, + "eval_scitail-pairs-pos_loss": 0.15473031997680664, + "eval_scitail-pairs-pos_runtime": 0.8829, + "eval_scitail-pairs-pos_samples_per_second": 144.974, + "eval_scitail-pairs-pos_steps_per_second": 1.133, + "step": 1540 + }, + { + "epoch": 1.5843621399176955, + "eval_scitail-pairs-qa_loss": 0.0018154560821130872, + "eval_scitail-pairs-qa_runtime": 0.5912, + "eval_scitail-pairs-qa_samples_per_second": 216.494, + "eval_scitail-pairs-qa_steps_per_second": 1.691, + "step": 1540 + }, + { + "epoch": 1.5843621399176955, + "eval_xsum-pairs_loss": 0.3135191798210144, + "eval_xsum-pairs_runtime": 3.0208, + "eval_xsum-pairs_samples_per_second": 42.373, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 1540 + }, + { + "epoch": 1.5843621399176955, + "eval_sciq_pairs_loss": 0.09777497500181198, + "eval_sciq_pairs_runtime": 3.491, + "eval_sciq_pairs_samples_per_second": 36.666, + "eval_sciq_pairs_steps_per_second": 0.286, + "step": 1540 + }, + { + "epoch": 1.5843621399176955, + "eval_qasc_pairs_loss": 0.22917062044143677, + "eval_qasc_pairs_runtime": 0.614, + "eval_qasc_pairs_samples_per_second": 208.459, + "eval_qasc_pairs_steps_per_second": 1.629, + "step": 1540 + }, + { + "epoch": 1.5843621399176955, + "eval_openbookqa_pairs_loss": 0.9911245703697205, + "eval_openbookqa_pairs_runtime": 0.5955, + "eval_openbookqa_pairs_samples_per_second": 214.96, + "eval_openbookqa_pairs_steps_per_second": 1.679, + "step": 1540 + }, + { + "epoch": 1.5843621399176955, + "eval_msmarco_pairs_loss": 0.7844669818878174, + "eval_msmarco_pairs_runtime": 1.5258, + "eval_msmarco_pairs_samples_per_second": 83.89, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 1540 + }, + { + "epoch": 1.5843621399176955, + "eval_nq_pairs_loss": 0.827888548374176, + "eval_nq_pairs_runtime": 2.8979, + "eval_nq_pairs_samples_per_second": 44.17, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1540 + }, + { + "epoch": 1.5843621399176955, + "eval_trivia_pairs_loss": 0.7074177861213684, + "eval_trivia_pairs_runtime": 3.4401, + "eval_trivia_pairs_samples_per_second": 37.208, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1540 + }, + { + "epoch": 1.5843621399176955, + "eval_gooaq_pairs_loss": 0.35452744364738464, + "eval_gooaq_pairs_runtime": 0.958, + "eval_gooaq_pairs_samples_per_second": 133.605, + "eval_gooaq_pairs_steps_per_second": 1.044, + "step": 1540 + }, + { + "epoch": 1.5843621399176955, + "eval_paws-pos_loss": 0.02034500241279602, + "eval_paws-pos_runtime": 0.7022, + "eval_paws-pos_samples_per_second": 182.273, + "eval_paws-pos_steps_per_second": 1.424, + "step": 1540 + }, + { + "epoch": 1.5843621399176955, + "eval_global_dataset_loss": 0.4994642436504364, + "eval_global_dataset_runtime": 13.4195, + "eval_global_dataset_samples_per_second": 31.0, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1540 + }, + { + "epoch": 1.5853909465020575, + "grad_norm": 5.8253326416015625, + "learning_rate": 3.194915180761e-05, + "loss": 0.2026, + "step": 1541 + }, + { + "epoch": 1.5864197530864197, + "grad_norm": 10.959147453308105, + "learning_rate": 3.1939021693468846e-05, + "loss": 0.7612, + "step": 1542 + }, + { + "epoch": 1.587448559670782, + "grad_norm": 5.3747711181640625, + "learning_rate": 3.192887732782079e-05, + "loss": 0.2014, + "step": 1543 + }, + { + "epoch": 1.5884773662551441, + "grad_norm": 1.070946216583252, + "learning_rate": 3.191871872746546e-05, + "loss": 0.0185, + "step": 1544 + }, + { + "epoch": 1.5895061728395061, + "grad_norm": 5.483205795288086, + "learning_rate": 3.190854590922609e-05, + "loss": 0.1747, + "step": 1545 + }, + { + "epoch": 1.5905349794238683, + "grad_norm": 8.847293853759766, + "learning_rate": 3.189835888994943e-05, + "loss": 0.4167, + "step": 1546 + }, + { + "epoch": 1.5915637860082303, + "grad_norm": 13.975677490234375, + "learning_rate": 3.1888157686505757e-05, + "loss": 1.1175, + "step": 1547 + }, + { + "epoch": 1.5925925925925926, + "grad_norm": 11.167402267456055, + "learning_rate": 3.1877942315788855e-05, + "loss": 0.7267, + "step": 1548 + }, + { + "epoch": 1.5936213991769548, + "grad_norm": 5.643833637237549, + "learning_rate": 3.1867712794715957e-05, + "loss": 0.1788, + "step": 1549 + }, + { + "epoch": 1.594650205761317, + "grad_norm": 3.837049961090088, + "learning_rate": 3.1857469140227714e-05, + "loss": 0.1364, + "step": 1550 + }, + { + "epoch": 1.595679012345679, + "grad_norm": 8.434807777404785, + "learning_rate": 3.184721136928821e-05, + "loss": 0.3264, + "step": 1551 + }, + { + "epoch": 1.596707818930041, + "grad_norm": 11.318633079528809, + "learning_rate": 3.183693949888489e-05, + "loss": 0.7652, + "step": 1552 + }, + { + "epoch": 1.5977366255144032, + "grad_norm": 9.858819961547852, + "learning_rate": 3.1826653546028544e-05, + "loss": 0.6605, + "step": 1553 + }, + { + "epoch": 1.5987654320987654, + "grad_norm": 5.252237319946289, + "learning_rate": 3.1816353527753304e-05, + "loss": 0.1219, + "step": 1554 + }, + { + "epoch": 1.5997942386831276, + "grad_norm": 5.182341575622559, + "learning_rate": 3.1806039461116585e-05, + "loss": 0.1417, + "step": 1555 + }, + { + "epoch": 1.6008230452674899, + "grad_norm": 11.145753860473633, + "learning_rate": 3.179571136319905e-05, + "loss": 0.6634, + "step": 1556 + }, + { + "epoch": 1.6018518518518519, + "grad_norm": 11.343064308166504, + "learning_rate": 3.1785369251104636e-05, + "loss": 0.8749, + "step": 1557 + }, + { + "epoch": 1.6028806584362139, + "grad_norm": 0.5352690815925598, + "learning_rate": 3.177501314196044e-05, + "loss": 0.0083, + "step": 1558 + }, + { + "epoch": 1.603909465020576, + "grad_norm": 12.607147216796875, + "learning_rate": 3.1764643052916786e-05, + "loss": 1.7723, + "step": 1559 + }, + { + "epoch": 1.6049382716049383, + "grad_norm": 4.1937479972839355, + "learning_rate": 3.1754259001147116e-05, + "loss": 0.1408, + "step": 1560 + }, + { + "epoch": 1.6049382716049383, + "eval_Qnli-dev_cosine_accuracy": 0.697265625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7923214435577393, + "eval_Qnli-dev_cosine_ap": 0.736397950522414, + "eval_Qnli-dev_cosine_f1": 0.708029197080292, + "eval_Qnli-dev_cosine_f1_threshold": 0.7372498512268066, + "eval_Qnli-dev_cosine_precision": 0.6217948717948718, + "eval_Qnli-dev_cosine_recall": 0.8220338983050848, + "eval_Qnli-dev_dot_accuracy": 0.673828125, + "eval_Qnli-dev_dot_accuracy_threshold": 394.2844543457031, + "eval_Qnli-dev_dot_ap": 0.6647394095843582, + "eval_Qnli-dev_dot_f1": 0.6762075134168157, + "eval_Qnli-dev_dot_f1_threshold": 343.790771484375, + "eval_Qnli-dev_dot_precision": 0.5851393188854489, + "eval_Qnli-dev_dot_recall": 0.8008474576271186, + "eval_Qnli-dev_euclidean_accuracy": 0.703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.500602722167969, + "eval_Qnli-dev_euclidean_ap": 0.7436495742157391, + "eval_Qnli-dev_euclidean_f1": 0.7047970479704797, + "eval_Qnli-dev_euclidean_f1_threshold": 15.69774055480957, + "eval_Qnli-dev_euclidean_precision": 0.6241830065359477, + "eval_Qnli-dev_euclidean_recall": 0.809322033898305, + "eval_Qnli-dev_manhattan_accuracy": 0.701171875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 282.2923889160156, + "eval_Qnli-dev_manhattan_ap": 0.746412343943242, + "eval_Qnli-dev_manhattan_f1": 0.70223752151463, + "eval_Qnli-dev_manhattan_f1_threshold": 341.86407470703125, + "eval_Qnli-dev_manhattan_precision": 0.591304347826087, + "eval_Qnli-dev_manhattan_recall": 0.864406779661017, + "eval_Qnli-dev_max_accuracy": 0.703125, + "eval_Qnli-dev_max_accuracy_threshold": 394.2844543457031, + "eval_Qnli-dev_max_ap": 0.746412343943242, + "eval_Qnli-dev_max_f1": 0.708029197080292, + "eval_Qnli-dev_max_f1_threshold": 343.790771484375, + "eval_Qnli-dev_max_precision": 0.6241830065359477, + "eval_Qnli-dev_max_recall": 0.864406779661017, + "eval_allNLI-dev_cosine_accuracy": 0.724609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8752395510673523, + "eval_allNLI-dev_cosine_ap": 0.6214439988011942, + "eval_allNLI-dev_cosine_f1": 0.6264501160092808, + "eval_allNLI-dev_cosine_f1_threshold": 0.7859889268875122, + "eval_allNLI-dev_cosine_precision": 0.5232558139534884, + "eval_allNLI-dev_cosine_recall": 0.7803468208092486, + "eval_allNLI-dev_dot_accuracy": 0.689453125, + "eval_allNLI-dev_dot_accuracy_threshold": 446.91632080078125, + "eval_allNLI-dev_dot_ap": 0.5413136716943259, + "eval_allNLI-dev_dot_f1": 0.5879732739420935, + "eval_allNLI-dev_dot_f1_threshold": 359.6739501953125, + "eval_allNLI-dev_dot_precision": 0.4782608695652174, + "eval_allNLI-dev_dot_recall": 0.7630057803468208, + "eval_allNLI-dev_euclidean_accuracy": 0.73046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.595601081848145, + "eval_allNLI-dev_euclidean_ap": 0.6293013048776566, + "eval_allNLI-dev_euclidean_f1": 0.6336633663366337, + "eval_allNLI-dev_euclidean_f1_threshold": 13.83390998840332, + "eval_allNLI-dev_euclidean_precision": 0.5541125541125541, + "eval_allNLI-dev_euclidean_recall": 0.7398843930635838, + "eval_allNLI-dev_manhattan_accuracy": 0.73828125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 250.4329833984375, + "eval_allNLI-dev_manhattan_ap": 0.625141827320122, + "eval_allNLI-dev_manhattan_f1": 0.6330935251798562, + "eval_allNLI-dev_manhattan_f1_threshold": 294.185546875, + "eval_allNLI-dev_manhattan_precision": 0.5409836065573771, + "eval_allNLI-dev_manhattan_recall": 0.7630057803468208, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 446.91632080078125, + "eval_allNLI-dev_max_ap": 0.6293013048776566, + "eval_allNLI-dev_max_f1": 0.6336633663366337, + "eval_allNLI-dev_max_f1_threshold": 359.6739501953125, + "eval_allNLI-dev_max_precision": 0.5541125541125541, + "eval_allNLI-dev_max_recall": 0.7803468208092486, + "eval_sequential_score": 0.746412343943242, + "eval_sts-test_pearson_cosine": 0.8302574308516089, + "eval_sts-test_pearson_dot": 0.7999351461985135, + "eval_sts-test_pearson_euclidean": 0.8662787058139827, + "eval_sts-test_pearson_manhattan": 0.8640787481621535, + "eval_sts-test_pearson_max": 0.8662787058139827, + "eval_sts-test_spearman_cosine": 0.8668608872175287, + "eval_sts-test_spearman_dot": 0.7830448177172121, + "eval_sts-test_spearman_euclidean": 0.8657249211983695, + "eval_sts-test_spearman_manhattan": 0.8622815801418696, + "eval_sts-test_spearman_max": 0.8668608872175287, + "eval_vitaminc-pairs_loss": 3.2677905559539795, + "eval_vitaminc-pairs_runtime": 3.2153, + "eval_vitaminc-pairs_samples_per_second": 39.809, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 1560 + }, + { + "epoch": 1.6049382716049383, + "eval_negation-triplets_loss": 0.8980169892311096, + "eval_negation-triplets_runtime": 0.7647, + "eval_negation-triplets_samples_per_second": 167.388, + "eval_negation-triplets_steps_per_second": 1.308, + "step": 1560 + }, + { + "epoch": 1.6049382716049383, + "eval_scitail-pairs-pos_loss": 0.15127724409103394, + "eval_scitail-pairs-pos_runtime": 0.8723, + "eval_scitail-pairs-pos_samples_per_second": 146.74, + "eval_scitail-pairs-pos_steps_per_second": 1.146, + "step": 1560 + }, + { + "epoch": 1.6049382716049383, + "eval_scitail-pairs-qa_loss": 0.0014785886742174625, + "eval_scitail-pairs-qa_runtime": 0.6058, + "eval_scitail-pairs-qa_samples_per_second": 211.278, + "eval_scitail-pairs-qa_steps_per_second": 1.651, + "step": 1560 + }, + { + "epoch": 1.6049382716049383, + "eval_xsum-pairs_loss": 0.36814171075820923, + "eval_xsum-pairs_runtime": 3.0238, + "eval_xsum-pairs_samples_per_second": 42.331, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 1560 + }, + { + "epoch": 1.6049382716049383, + "eval_sciq_pairs_loss": 0.1039256900548935, + "eval_sciq_pairs_runtime": 3.5155, + "eval_sciq_pairs_samples_per_second": 36.41, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 1560 + }, + { + "epoch": 1.6049382716049383, + "eval_qasc_pairs_loss": 0.20851899683475494, + "eval_qasc_pairs_runtime": 0.6182, + "eval_qasc_pairs_samples_per_second": 207.054, + "eval_qasc_pairs_steps_per_second": 1.618, + "step": 1560 + }, + { + "epoch": 1.6049382716049383, + "eval_openbookqa_pairs_loss": 0.9419054985046387, + "eval_openbookqa_pairs_runtime": 0.5937, + "eval_openbookqa_pairs_samples_per_second": 215.582, + "eval_openbookqa_pairs_steps_per_second": 1.684, + "step": 1560 + }, + { + "epoch": 1.6049382716049383, + "eval_msmarco_pairs_loss": 0.7457932829856873, + "eval_msmarco_pairs_runtime": 1.5248, + "eval_msmarco_pairs_samples_per_second": 83.945, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 1560 + }, + { + "epoch": 1.6049382716049383, + "eval_nq_pairs_loss": 0.8226298689842224, + "eval_nq_pairs_runtime": 2.902, + "eval_nq_pairs_samples_per_second": 44.108, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1560 + }, + { + "epoch": 1.6049382716049383, + "eval_trivia_pairs_loss": 0.6305390000343323, + "eval_trivia_pairs_runtime": 3.4527, + "eval_trivia_pairs_samples_per_second": 37.073, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1560 + }, + { + "epoch": 1.6049382716049383, + "eval_gooaq_pairs_loss": 0.39881452918052673, + "eval_gooaq_pairs_runtime": 0.9524, + "eval_gooaq_pairs_samples_per_second": 134.391, + "eval_gooaq_pairs_steps_per_second": 1.05, + "step": 1560 + }, + { + "epoch": 1.6049382716049383, + "eval_paws-pos_loss": 0.02105657197535038, + "eval_paws-pos_runtime": 0.6996, + "eval_paws-pos_samples_per_second": 182.974, + "eval_paws-pos_steps_per_second": 1.429, + "step": 1560 + }, + { + "epoch": 1.6049382716049383, + "eval_global_dataset_loss": 0.5037676692008972, + "eval_global_dataset_runtime": 13.4259, + "eval_global_dataset_samples_per_second": 30.985, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1560 + }, + { + "epoch": 1.6059670781893005, + "grad_norm": 6.21460485458374, + "learning_rate": 3.174386100384801e-05, + "loss": 0.2573, + "step": 1561 + }, + { + "epoch": 1.6069958847736625, + "grad_norm": 5.64373779296875, + "learning_rate": 3.1733449078239137e-05, + "loss": 0.2668, + "step": 1562 + }, + { + "epoch": 1.6080246913580247, + "grad_norm": 6.738086223602295, + "learning_rate": 3.172302324156325e-05, + "loss": 0.2826, + "step": 1563 + }, + { + "epoch": 1.6090534979423867, + "grad_norm": 5.761399745941162, + "learning_rate": 3.1712583511086106e-05, + "loss": 0.2666, + "step": 1564 + }, + { + "epoch": 1.610082304526749, + "grad_norm": 6.337840557098389, + "learning_rate": 3.17021299040965e-05, + "loss": 0.282, + "step": 1565 + }, + { + "epoch": 1.6111111111111112, + "grad_norm": 0.7291662693023682, + "learning_rate": 3.16916624379062e-05, + "loss": 0.0133, + "step": 1566 + }, + { + "epoch": 1.6121399176954734, + "grad_norm": 6.175798416137695, + "learning_rate": 3.1681181129849906e-05, + "loss": 0.4253, + "step": 1567 + }, + { + "epoch": 1.6131687242798354, + "grad_norm": 6.257718563079834, + "learning_rate": 3.167068599728526e-05, + "loss": 0.298, + "step": 1568 + }, + { + "epoch": 1.6141975308641974, + "grad_norm": 7.281583786010742, + "learning_rate": 3.166017705759282e-05, + "loss": 0.341, + "step": 1569 + }, + { + "epoch": 1.6152263374485596, + "grad_norm": 4.747096061706543, + "learning_rate": 3.164965432817596e-05, + "loss": 0.1514, + "step": 1570 + }, + { + "epoch": 1.6162551440329218, + "grad_norm": 8.15545654296875, + "learning_rate": 3.163911782646093e-05, + "loss": 0.3729, + "step": 1571 + }, + { + "epoch": 1.617283950617284, + "grad_norm": 6.875436305999756, + "learning_rate": 3.162856756989676e-05, + "loss": 0.2414, + "step": 1572 + }, + { + "epoch": 1.6183127572016462, + "grad_norm": 5.717591762542725, + "learning_rate": 3.1618003575955275e-05, + "loss": 0.1577, + "step": 1573 + }, + { + "epoch": 1.6193415637860082, + "grad_norm": 13.189390182495117, + "learning_rate": 3.160742586213105e-05, + "loss": 0.9965, + "step": 1574 + }, + { + "epoch": 1.6203703703703702, + "grad_norm": 6.093446731567383, + "learning_rate": 3.159683444594139e-05, + "loss": 0.2172, + "step": 1575 + }, + { + "epoch": 1.6213991769547325, + "grad_norm": 3.353471279144287, + "learning_rate": 3.1586229344926255e-05, + "loss": 0.078, + "step": 1576 + }, + { + "epoch": 1.6224279835390947, + "grad_norm": 9.408513069152832, + "learning_rate": 3.1575610576648305e-05, + "loss": 0.3766, + "step": 1577 + }, + { + "epoch": 1.623456790123457, + "grad_norm": 8.742712020874023, + "learning_rate": 3.156497815869283e-05, + "loss": 0.6639, + "step": 1578 + }, + { + "epoch": 1.624485596707819, + "grad_norm": 1.3122910261154175, + "learning_rate": 3.15543321086677e-05, + "loss": 0.0204, + "step": 1579 + }, + { + "epoch": 1.625514403292181, + "grad_norm": 4.740899562835693, + "learning_rate": 3.1543672444203374e-05, + "loss": 0.1661, + "step": 1580 + }, + { + "epoch": 1.625514403292181, + "eval_Qnli-dev_cosine_accuracy": 0.708984375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7632798552513123, + "eval_Qnli-dev_cosine_ap": 0.7595720144230074, + "eval_Qnli-dev_cosine_f1": 0.7043189368770765, + "eval_Qnli-dev_cosine_f1_threshold": 0.6868818998336792, + "eval_Qnli-dev_cosine_precision": 0.5792349726775956, + "eval_Qnli-dev_cosine_recall": 0.8983050847457628, + "eval_Qnli-dev_dot_accuracy": 0.6875, + "eval_Qnli-dev_dot_accuracy_threshold": 349.8937072753906, + "eval_Qnli-dev_dot_ap": 0.6892873388235935, + "eval_Qnli-dev_dot_f1": 0.6772655007949125, + "eval_Qnli-dev_dot_f1_threshold": 292.2147216796875, + "eval_Qnli-dev_dot_precision": 0.5419847328244275, + "eval_Qnli-dev_dot_recall": 0.902542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.705078125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.526966094970703, + "eval_Qnli-dev_euclidean_ap": 0.7674293463107565, + "eval_Qnli-dev_euclidean_f1": 0.7198515769944341, + "eval_Qnli-dev_euclidean_f1_threshold": 15.816378593444824, + "eval_Qnli-dev_euclidean_precision": 0.6402640264026402, + "eval_Qnli-dev_euclidean_recall": 0.8220338983050848, + "eval_Qnli-dev_manhattan_accuracy": 0.71484375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 304.14422607421875, + "eval_Qnli-dev_manhattan_ap": 0.7708594172622463, + "eval_Qnli-dev_manhattan_f1": 0.7132616487455196, + "eval_Qnli-dev_manhattan_f1_threshold": 336.89105224609375, + "eval_Qnli-dev_manhattan_precision": 0.6180124223602484, + "eval_Qnli-dev_manhattan_recall": 0.8432203389830508, + "eval_Qnli-dev_max_accuracy": 0.71484375, + "eval_Qnli-dev_max_accuracy_threshold": 349.8937072753906, + "eval_Qnli-dev_max_ap": 0.7708594172622463, + "eval_Qnli-dev_max_f1": 0.7198515769944341, + "eval_Qnli-dev_max_f1_threshold": 336.89105224609375, + "eval_Qnli-dev_max_precision": 0.6402640264026402, + "eval_Qnli-dev_max_recall": 0.902542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.724609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8192890882492065, + "eval_allNLI-dev_cosine_ap": 0.6178735143817917, + "eval_allNLI-dev_cosine_f1": 0.6136865342163355, + "eval_allNLI-dev_cosine_f1_threshold": 0.7249919772148132, + "eval_allNLI-dev_cosine_precision": 0.49642857142857144, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.693359375, + "eval_allNLI-dev_dot_accuracy_threshold": 406.23291015625, + "eval_allNLI-dev_dot_ap": 0.5581205075045584, + "eval_allNLI-dev_dot_f1": 0.5874125874125874, + "eval_allNLI-dev_dot_f1_threshold": 314.37115478515625, + "eval_allNLI-dev_dot_precision": 0.4921875, + "eval_allNLI-dev_dot_recall": 0.7283236994219653, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.021381378173828, + "eval_allNLI-dev_euclidean_ap": 0.6240732173975734, + "eval_allNLI-dev_euclidean_f1": 0.6216867469879518, + "eval_allNLI-dev_euclidean_f1_threshold": 14.67414379119873, + "eval_allNLI-dev_euclidean_precision": 0.5330578512396694, + "eval_allNLI-dev_euclidean_recall": 0.7456647398843931, + "eval_allNLI-dev_manhattan_accuracy": 0.73046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 247.3629913330078, + "eval_allNLI-dev_manhattan_ap": 0.6206113939017133, + "eval_allNLI-dev_manhattan_f1": 0.6206896551724138, + "eval_allNLI-dev_manhattan_f1_threshold": 316.04638671875, + "eval_allNLI-dev_manhattan_precision": 0.5152671755725191, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 406.23291015625, + "eval_allNLI-dev_max_ap": 0.6240732173975734, + "eval_allNLI-dev_max_f1": 0.6216867469879518, + "eval_allNLI-dev_max_f1_threshold": 316.04638671875, + "eval_allNLI-dev_max_precision": 0.5330578512396694, + "eval_allNLI-dev_max_recall": 0.8034682080924855, + "eval_sequential_score": 0.7708594172622463, + "eval_sts-test_pearson_cosine": 0.8400854742746162, + "eval_sts-test_pearson_dot": 0.8281129949166816, + "eval_sts-test_pearson_euclidean": 0.8643289450211398, + "eval_sts-test_pearson_manhattan": 0.8620190577992299, + "eval_sts-test_pearson_max": 0.8643289450211398, + "eval_sts-test_spearman_cosine": 0.864160059090035, + "eval_sts-test_spearman_dot": 0.8162842198770266, + "eval_sts-test_spearman_euclidean": 0.8603101080725328, + "eval_sts-test_spearman_manhattan": 0.8563359275534083, + "eval_sts-test_spearman_max": 0.864160059090035, + "eval_vitaminc-pairs_loss": 3.219008207321167, + "eval_vitaminc-pairs_runtime": 3.2219, + "eval_vitaminc-pairs_samples_per_second": 39.729, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 1580 + }, + { + "epoch": 1.625514403292181, + "eval_negation-triplets_loss": 1.0143779516220093, + "eval_negation-triplets_runtime": 0.7617, + "eval_negation-triplets_samples_per_second": 168.041, + "eval_negation-triplets_steps_per_second": 1.313, + "step": 1580 + }, + { + "epoch": 1.625514403292181, + "eval_scitail-pairs-pos_loss": 0.15095233917236328, + "eval_scitail-pairs-pos_runtime": 0.8861, + "eval_scitail-pairs-pos_samples_per_second": 144.456, + "eval_scitail-pairs-pos_steps_per_second": 1.129, + "step": 1580 + }, + { + "epoch": 1.625514403292181, + "eval_scitail-pairs-qa_loss": 0.0023409768473356962, + "eval_scitail-pairs-qa_runtime": 0.6047, + "eval_scitail-pairs-qa_samples_per_second": 211.683, + "eval_scitail-pairs-qa_steps_per_second": 1.654, + "step": 1580 + }, + { + "epoch": 1.625514403292181, + "eval_xsum-pairs_loss": 0.29153984785079956, + "eval_xsum-pairs_runtime": 3.0345, + "eval_xsum-pairs_samples_per_second": 42.182, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1580 + }, + { + "epoch": 1.625514403292181, + "eval_sciq_pairs_loss": 0.09576187282800674, + "eval_sciq_pairs_runtime": 3.5115, + "eval_sciq_pairs_samples_per_second": 36.451, + "eval_sciq_pairs_steps_per_second": 0.285, + "step": 1580 + }, + { + "epoch": 1.625514403292181, + "eval_qasc_pairs_loss": 0.22589659690856934, + "eval_qasc_pairs_runtime": 0.6491, + "eval_qasc_pairs_samples_per_second": 197.201, + "eval_qasc_pairs_steps_per_second": 1.541, + "step": 1580 + }, + { + "epoch": 1.625514403292181, + "eval_openbookqa_pairs_loss": 0.8184758424758911, + "eval_openbookqa_pairs_runtime": 0.5983, + "eval_openbookqa_pairs_samples_per_second": 213.94, + "eval_openbookqa_pairs_steps_per_second": 1.671, + "step": 1580 + }, + { + "epoch": 1.625514403292181, + "eval_msmarco_pairs_loss": 0.817125678062439, + "eval_msmarco_pairs_runtime": 1.5279, + "eval_msmarco_pairs_samples_per_second": 83.773, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 1580 + }, + { + "epoch": 1.625514403292181, + "eval_nq_pairs_loss": 0.7911259531974792, + "eval_nq_pairs_runtime": 2.9065, + "eval_nq_pairs_samples_per_second": 44.039, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1580 + }, + { + "epoch": 1.625514403292181, + "eval_trivia_pairs_loss": 0.717950165271759, + "eval_trivia_pairs_runtime": 3.4507, + "eval_trivia_pairs_samples_per_second": 37.094, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1580 + }, + { + "epoch": 1.625514403292181, + "eval_gooaq_pairs_loss": 0.45841023325920105, + "eval_gooaq_pairs_runtime": 0.9622, + "eval_gooaq_pairs_samples_per_second": 133.029, + "eval_gooaq_pairs_steps_per_second": 1.039, + "step": 1580 + }, + { + "epoch": 1.625514403292181, + "eval_paws-pos_loss": 0.021705929189920425, + "eval_paws-pos_runtime": 0.6991, + "eval_paws-pos_samples_per_second": 183.088, + "eval_paws-pos_steps_per_second": 1.43, + "step": 1580 + }, + { + "epoch": 1.625514403292181, + "eval_global_dataset_loss": 0.48299312591552734, + "eval_global_dataset_runtime": 13.3939, + "eval_global_dataset_samples_per_second": 31.059, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1580 + }, + { + "epoch": 1.626543209876543, + "grad_norm": 0.5153937935829163, + "learning_rate": 3.153299918295288e-05, + "loss": 0.0404, + "step": 1581 + }, + { + "epoch": 1.6275720164609053, + "grad_norm": 6.195679664611816, + "learning_rate": 3.1522312342591726e-05, + "loss": 0.2553, + "step": 1582 + }, + { + "epoch": 1.6286008230452675, + "grad_norm": 10.167468070983887, + "learning_rate": 3.1511611940817934e-05, + "loss": 0.6284, + "step": 1583 + }, + { + "epoch": 1.6296296296296298, + "grad_norm": 15.11025619506836, + "learning_rate": 3.150089799535197e-05, + "loss": 2.1434, + "step": 1584 + }, + { + "epoch": 1.6306584362139918, + "grad_norm": 12.672037124633789, + "learning_rate": 3.1490170523936726e-05, + "loss": 1.1293, + "step": 1585 + }, + { + "epoch": 1.6316872427983538, + "grad_norm": 0.7210871577262878, + "learning_rate": 3.147942954433751e-05, + "loss": 0.0465, + "step": 1586 + }, + { + "epoch": 1.632716049382716, + "grad_norm": 15.898063659667969, + "learning_rate": 3.1468675074342006e-05, + "loss": 1.235, + "step": 1587 + }, + { + "epoch": 1.6337448559670782, + "grad_norm": 7.80556058883667, + "learning_rate": 3.14579071317602e-05, + "loss": 0.4631, + "step": 1588 + }, + { + "epoch": 1.6347736625514404, + "grad_norm": 6.856632709503174, + "learning_rate": 3.144712573442442e-05, + "loss": 0.356, + "step": 1589 + }, + { + "epoch": 1.6358024691358026, + "grad_norm": 4.719722747802734, + "learning_rate": 3.1436330900189284e-05, + "loss": 0.2033, + "step": 1590 + }, + { + "epoch": 1.6368312757201646, + "grad_norm": 8.740306854248047, + "learning_rate": 3.142552264693164e-05, + "loss": 0.7071, + "step": 1591 + }, + { + "epoch": 1.6378600823045266, + "grad_norm": 3.5525388717651367, + "learning_rate": 3.141470099255056e-05, + "loss": 0.1086, + "step": 1592 + }, + { + "epoch": 1.6388888888888888, + "grad_norm": 8.827413558959961, + "learning_rate": 3.140386595496733e-05, + "loss": 0.6634, + "step": 1593 + }, + { + "epoch": 1.639917695473251, + "grad_norm": 4.826879978179932, + "learning_rate": 3.139301755212537e-05, + "loss": 0.2143, + "step": 1594 + }, + { + "epoch": 1.6409465020576133, + "grad_norm": 5.042708873748779, + "learning_rate": 3.1382155801990265e-05, + "loss": 0.2011, + "step": 1595 + }, + { + "epoch": 1.6419753086419753, + "grad_norm": 0.6428842544555664, + "learning_rate": 3.137128072254967e-05, + "loss": 0.0116, + "step": 1596 + }, + { + "epoch": 1.6430041152263375, + "grad_norm": 6.803867340087891, + "learning_rate": 3.1360392331813356e-05, + "loss": 0.3944, + "step": 1597 + }, + { + "epoch": 1.6440329218106995, + "grad_norm": 4.369873046875, + "learning_rate": 3.134949064781309e-05, + "loss": 0.1438, + "step": 1598 + }, + { + "epoch": 1.6450617283950617, + "grad_norm": 8.419076919555664, + "learning_rate": 3.133857568860268e-05, + "loss": 0.3185, + "step": 1599 + }, + { + "epoch": 1.646090534979424, + "grad_norm": 7.200667858123779, + "learning_rate": 3.132764747225794e-05, + "loss": 0.4497, + "step": 1600 + }, + { + "epoch": 1.646090534979424, + "eval_Qnli-dev_cosine_accuracy": 0.705078125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7726122140884399, + "eval_Qnli-dev_cosine_ap": 0.7463055188938073, + "eval_Qnli-dev_cosine_f1": 0.6965888689407541, + "eval_Qnli-dev_cosine_f1_threshold": 0.7256693840026855, + "eval_Qnli-dev_cosine_precision": 0.6043613707165109, + "eval_Qnli-dev_cosine_recall": 0.8220338983050848, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 365.7900695800781, + "eval_Qnli-dev_dot_ap": 0.6686481538000109, + "eval_Qnli-dev_dot_f1": 0.6729857819905213, + "eval_Qnli-dev_dot_f1_threshold": 310.37847900390625, + "eval_Qnli-dev_dot_precision": 0.5365239294710328, + "eval_Qnli-dev_dot_recall": 0.902542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.727681159973145, + "eval_Qnli-dev_euclidean_ap": 0.7546221577715386, + "eval_Qnli-dev_euclidean_f1": 0.7009174311926605, + "eval_Qnli-dev_euclidean_f1_threshold": 15.86984634399414, + "eval_Qnli-dev_euclidean_precision": 0.6181229773462783, + "eval_Qnli-dev_euclidean_recall": 0.809322033898305, + "eval_Qnli-dev_manhattan_accuracy": 0.705078125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 301.3721923828125, + "eval_Qnli-dev_manhattan_ap": 0.7594406310315899, + "eval_Qnli-dev_manhattan_f1": 0.704424778761062, + "eval_Qnli-dev_manhattan_f1_threshold": 338.2559814453125, + "eval_Qnli-dev_manhattan_precision": 0.6048632218844985, + "eval_Qnli-dev_manhattan_recall": 0.8432203389830508, + "eval_Qnli-dev_max_accuracy": 0.705078125, + "eval_Qnli-dev_max_accuracy_threshold": 365.7900695800781, + "eval_Qnli-dev_max_ap": 0.7594406310315899, + "eval_Qnli-dev_max_f1": 0.704424778761062, + "eval_Qnli-dev_max_f1_threshold": 338.2559814453125, + "eval_Qnli-dev_max_precision": 0.6181229773462783, + "eval_Qnli-dev_max_recall": 0.902542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.724609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8390976190567017, + "eval_allNLI-dev_cosine_ap": 0.6197766477419683, + "eval_allNLI-dev_cosine_f1": 0.6213151927437641, + "eval_allNLI-dev_cosine_f1_threshold": 0.7497798204421997, + "eval_allNLI-dev_cosine_precision": 0.5111940298507462, + "eval_allNLI-dev_cosine_recall": 0.791907514450867, + "eval_allNLI-dev_dot_accuracy": 0.7109375, + "eval_allNLI-dev_dot_accuracy_threshold": 391.1158447265625, + "eval_allNLI-dev_dot_ap": 0.5640544532184968, + "eval_allNLI-dev_dot_f1": 0.5887265135699373, + "eval_allNLI-dev_dot_f1_threshold": 326.6453857421875, + "eval_allNLI-dev_dot_precision": 0.46078431372549017, + "eval_allNLI-dev_dot_recall": 0.815028901734104, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.69601821899414, + "eval_allNLI-dev_euclidean_ap": 0.6237364426707476, + "eval_allNLI-dev_euclidean_f1": 0.6247288503253796, + "eval_allNLI-dev_euclidean_f1_threshold": 15.531798362731934, + "eval_allNLI-dev_euclidean_precision": 0.5, + "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, + "eval_allNLI-dev_manhattan_accuracy": 0.73046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 257.890380859375, + "eval_allNLI-dev_manhattan_ap": 0.6215067171697528, + "eval_allNLI-dev_manhattan_f1": 0.6202247191011235, + "eval_allNLI-dev_manhattan_f1_threshold": 317.50958251953125, + "eval_allNLI-dev_manhattan_precision": 0.5073529411764706, + "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 391.1158447265625, + "eval_allNLI-dev_max_ap": 0.6237364426707476, + "eval_allNLI-dev_max_f1": 0.6247288503253796, + "eval_allNLI-dev_max_f1_threshold": 326.6453857421875, + "eval_allNLI-dev_max_precision": 0.5111940298507462, + "eval_allNLI-dev_max_recall": 0.8323699421965318, + "eval_sequential_score": 0.7594406310315899, + "eval_sts-test_pearson_cosine": 0.8440868643946784, + "eval_sts-test_pearson_dot": 0.8408286771702145, + "eval_sts-test_pearson_euclidean": 0.8720004318239576, + "eval_sts-test_pearson_manhattan": 0.8701934437728628, + "eval_sts-test_pearson_max": 0.8720004318239576, + "eval_sts-test_spearman_cosine": 0.873001834310702, + "eval_sts-test_spearman_dot": 0.838884815163741, + "eval_sts-test_spearman_euclidean": 0.8694954387269019, + "eval_sts-test_spearman_manhattan": 0.8660044823282814, + "eval_sts-test_spearman_max": 0.873001834310702, + "eval_vitaminc-pairs_loss": 3.2943060398101807, + "eval_vitaminc-pairs_runtime": 3.2024, + "eval_vitaminc-pairs_samples_per_second": 39.971, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 1600 + }, + { + "epoch": 1.646090534979424, + "eval_negation-triplets_loss": 0.9933223724365234, + "eval_negation-triplets_runtime": 0.7492, + "eval_negation-triplets_samples_per_second": 170.839, + "eval_negation-triplets_steps_per_second": 1.335, + "step": 1600 + }, + { + "epoch": 1.646090534979424, + "eval_scitail-pairs-pos_loss": 0.1557767689228058, + "eval_scitail-pairs-pos_runtime": 0.867, + "eval_scitail-pairs-pos_samples_per_second": 147.63, + "eval_scitail-pairs-pos_steps_per_second": 1.153, + "step": 1600 + }, + { + "epoch": 1.646090534979424, + "eval_scitail-pairs-qa_loss": 0.0011445347918197513, + "eval_scitail-pairs-qa_runtime": 0.5991, + "eval_scitail-pairs-qa_samples_per_second": 213.662, + "eval_scitail-pairs-qa_steps_per_second": 1.669, + "step": 1600 + }, + { + "epoch": 1.646090534979424, + "eval_xsum-pairs_loss": 0.3158724904060364, + "eval_xsum-pairs_runtime": 3.0272, + "eval_xsum-pairs_samples_per_second": 42.284, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1600 + }, + { + "epoch": 1.646090534979424, + "eval_sciq_pairs_loss": 0.10199625790119171, + "eval_sciq_pairs_runtime": 3.5022, + "eval_sciq_pairs_samples_per_second": 36.549, + "eval_sciq_pairs_steps_per_second": 0.286, + "step": 1600 + }, + { + "epoch": 1.646090534979424, + "eval_qasc_pairs_loss": 0.19696098566055298, + "eval_qasc_pairs_runtime": 0.6188, + "eval_qasc_pairs_samples_per_second": 206.868, + "eval_qasc_pairs_steps_per_second": 1.616, + "step": 1600 + }, + { + "epoch": 1.646090534979424, + "eval_openbookqa_pairs_loss": 0.8150601387023926, + "eval_openbookqa_pairs_runtime": 0.5882, + "eval_openbookqa_pairs_samples_per_second": 217.608, + "eval_openbookqa_pairs_steps_per_second": 1.7, + "step": 1600 + }, + { + "epoch": 1.646090534979424, + "eval_msmarco_pairs_loss": 0.8213596343994141, + "eval_msmarco_pairs_runtime": 1.5212, + "eval_msmarco_pairs_samples_per_second": 84.147, + "eval_msmarco_pairs_steps_per_second": 0.657, + "step": 1600 + }, + { + "epoch": 1.646090534979424, + "eval_nq_pairs_loss": 0.7698879241943359, + "eval_nq_pairs_runtime": 2.9009, + "eval_nq_pairs_samples_per_second": 44.124, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1600 + }, + { + "epoch": 1.646090534979424, + "eval_trivia_pairs_loss": 0.699388861656189, + "eval_trivia_pairs_runtime": 3.4433, + "eval_trivia_pairs_samples_per_second": 37.173, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1600 + }, + { + "epoch": 1.646090534979424, + "eval_gooaq_pairs_loss": 0.4309929609298706, + "eval_gooaq_pairs_runtime": 0.9526, + "eval_gooaq_pairs_samples_per_second": 134.368, + "eval_gooaq_pairs_steps_per_second": 1.05, + "step": 1600 + }, + { + "epoch": 1.646090534979424, + "eval_paws-pos_loss": 0.021949268877506256, + "eval_paws-pos_runtime": 0.6983, + "eval_paws-pos_samples_per_second": 183.301, + "eval_paws-pos_steps_per_second": 1.432, + "step": 1600 + }, + { + "epoch": 1.646090534979424, + "eval_global_dataset_loss": 0.4891248047351837, + "eval_global_dataset_runtime": 13.4015, + "eval_global_dataset_samples_per_second": 31.041, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1600 + }, + { + "epoch": 1.6471193415637861, + "grad_norm": 5.759812831878662, + "learning_rate": 3.131670601687659e-05, + "loss": 0.3233, + "step": 1601 + }, + { + "epoch": 1.6481481481481481, + "grad_norm": 7.317870140075684, + "learning_rate": 3.130575134057831e-05, + "loss": 0.257, + "step": 1602 + }, + { + "epoch": 1.6491769547325101, + "grad_norm": 1.7312713861465454, + "learning_rate": 3.129478346150466e-05, + "loss": 0.0258, + "step": 1603 + }, + { + "epoch": 1.6502057613168724, + "grad_norm": 10.138225555419922, + "learning_rate": 3.128380239781907e-05, + "loss": 0.4831, + "step": 1604 + }, + { + "epoch": 1.6512345679012346, + "grad_norm": 0.5360116362571716, + "learning_rate": 3.12728081677068e-05, + "loss": 0.0047, + "step": 1605 + }, + { + "epoch": 1.6522633744855968, + "grad_norm": 17.974763870239258, + "learning_rate": 3.1261800789374926e-05, + "loss": 2.6074, + "step": 1606 + }, + { + "epoch": 1.653292181069959, + "grad_norm": 13.676968574523926, + "learning_rate": 3.125078028105228e-05, + "loss": 0.9363, + "step": 1607 + }, + { + "epoch": 1.654320987654321, + "grad_norm": 5.995203018188477, + "learning_rate": 3.1239746660989456e-05, + "loss": 0.1652, + "step": 1608 + }, + { + "epoch": 1.655349794238683, + "grad_norm": 12.427960395812988, + "learning_rate": 3.1228699947458744e-05, + "loss": 0.6762, + "step": 1609 + }, + { + "epoch": 1.6563786008230452, + "grad_norm": 6.78856897354126, + "learning_rate": 3.121764015875413e-05, + "loss": 0.3767, + "step": 1610 + }, + { + "epoch": 1.6574074074074074, + "grad_norm": 13.389119148254395, + "learning_rate": 3.1206567313191256e-05, + "loss": 0.9476, + "step": 1611 + }, + { + "epoch": 1.6584362139917697, + "grad_norm": 1.0449633598327637, + "learning_rate": 3.119548142910737e-05, + "loss": 0.0285, + "step": 1612 + }, + { + "epoch": 1.6594650205761317, + "grad_norm": 2.045689582824707, + "learning_rate": 3.1184382524861326e-05, + "loss": 0.1321, + "step": 1613 + }, + { + "epoch": 1.6604938271604939, + "grad_norm": 5.213545322418213, + "learning_rate": 3.117327061883354e-05, + "loss": 0.1972, + "step": 1614 + }, + { + "epoch": 1.6615226337448559, + "grad_norm": 12.71414566040039, + "learning_rate": 3.116214572942597e-05, + "loss": 0.8901, + "step": 1615 + }, + { + "epoch": 1.662551440329218, + "grad_norm": 5.993893146514893, + "learning_rate": 3.115100787506204e-05, + "loss": 0.1987, + "step": 1616 + }, + { + "epoch": 1.6635802469135803, + "grad_norm": 0.42078569531440735, + "learning_rate": 3.1139857074186675e-05, + "loss": 0.0073, + "step": 1617 + }, + { + "epoch": 1.6646090534979425, + "grad_norm": 6.253912448883057, + "learning_rate": 3.1128693345266235e-05, + "loss": 0.2412, + "step": 1618 + }, + { + "epoch": 1.6656378600823045, + "grad_norm": 13.241989135742188, + "learning_rate": 3.1117516706788495e-05, + "loss": 1.8227, + "step": 1619 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 2.7509756088256836, + "learning_rate": 3.110632717726259e-05, + "loss": 0.0598, + "step": 1620 + }, + { + "epoch": 1.6666666666666665, + "eval_Qnli-dev_cosine_accuracy": 0.705078125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8112232685089111, + "eval_Qnli-dev_cosine_ap": 0.7530566628283017, + "eval_Qnli-dev_cosine_f1": 0.702803738317757, + "eval_Qnli-dev_cosine_f1_threshold": 0.7403519153594971, + "eval_Qnli-dev_cosine_precision": 0.6287625418060201, + "eval_Qnli-dev_cosine_recall": 0.7966101694915254, + "eval_Qnli-dev_dot_accuracy": 0.66796875, + "eval_Qnli-dev_dot_accuracy_threshold": 389.0986022949219, + "eval_Qnli-dev_dot_ap": 0.6839751331019408, + "eval_Qnli-dev_dot_f1": 0.6869712351945855, + "eval_Qnli-dev_dot_f1_threshold": 326.48431396484375, + "eval_Qnli-dev_dot_precision": 0.571830985915493, + "eval_Qnli-dev_dot_recall": 0.8601694915254238, + "eval_Qnli-dev_euclidean_accuracy": 0.7109375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.742420196533203, + "eval_Qnli-dev_euclidean_ap": 0.7602323009588319, + "eval_Qnli-dev_euclidean_f1": 0.7107750472589792, + "eval_Qnli-dev_euclidean_f1_threshold": 15.595666885375977, + "eval_Qnli-dev_euclidean_precision": 0.6416382252559727, + "eval_Qnli-dev_euclidean_recall": 0.7966101694915254, + "eval_Qnli-dev_manhattan_accuracy": 0.708984375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 279.71630859375, + "eval_Qnli-dev_manhattan_ap": 0.7636945225564578, + "eval_Qnli-dev_manhattan_f1": 0.7156308851224106, + "eval_Qnli-dev_manhattan_f1_threshold": 326.18878173828125, + "eval_Qnli-dev_manhattan_precision": 0.6440677966101694, + "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, + "eval_Qnli-dev_max_accuracy": 0.7109375, + "eval_Qnli-dev_max_accuracy_threshold": 389.0986022949219, + "eval_Qnli-dev_max_ap": 0.7636945225564578, + "eval_Qnli-dev_max_f1": 0.7156308851224106, + "eval_Qnli-dev_max_f1_threshold": 326.48431396484375, + "eval_Qnli-dev_max_precision": 0.6440677966101694, + "eval_Qnli-dev_max_recall": 0.8601694915254238, + "eval_allNLI-dev_cosine_accuracy": 0.7265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8469563722610474, + "eval_allNLI-dev_cosine_ap": 0.6148003055167395, + "eval_allNLI-dev_cosine_f1": 0.6222222222222222, + "eval_allNLI-dev_cosine_f1_threshold": 0.7551340460777283, + "eval_allNLI-dev_cosine_precision": 0.5054151624548736, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.705078125, + "eval_allNLI-dev_dot_accuracy_threshold": 401.8390808105469, + "eval_allNLI-dev_dot_ap": 0.5574330740455032, + "eval_allNLI-dev_dot_f1": 0.5793991416309013, + "eval_allNLI-dev_dot_f1_threshold": 336.57501220703125, + "eval_allNLI-dev_dot_precision": 0.46075085324232085, + "eval_allNLI-dev_dot_recall": 0.7803468208092486, + "eval_allNLI-dev_euclidean_accuracy": 0.744140625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.117332458496094, + "eval_allNLI-dev_euclidean_ap": 0.6202020587591924, + "eval_allNLI-dev_euclidean_f1": 0.6337078651685394, + "eval_allNLI-dev_euclidean_f1_threshold": 14.898088455200195, + "eval_allNLI-dev_euclidean_precision": 0.5183823529411765, + "eval_allNLI-dev_euclidean_recall": 0.815028901734104, + "eval_allNLI-dev_manhattan_accuracy": 0.736328125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 248.040283203125, + "eval_allNLI-dev_manhattan_ap": 0.6195824334566606, + "eval_allNLI-dev_manhattan_f1": 0.6261261261261263, + "eval_allNLI-dev_manhattan_f1_threshold": 310.2285461425781, + "eval_allNLI-dev_manhattan_precision": 0.5129151291512916, + "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, + "eval_allNLI-dev_max_accuracy": 0.744140625, + "eval_allNLI-dev_max_accuracy_threshold": 401.8390808105469, + "eval_allNLI-dev_max_ap": 0.6202020587591924, + "eval_allNLI-dev_max_f1": 0.6337078651685394, + "eval_allNLI-dev_max_f1_threshold": 336.57501220703125, + "eval_allNLI-dev_max_precision": 0.5183823529411765, + "eval_allNLI-dev_max_recall": 0.815028901734104, + "eval_sequential_score": 0.7636945225564578, + "eval_sts-test_pearson_cosine": 0.8442697659120451, + "eval_sts-test_pearson_dot": 0.8332794530197303, + "eval_sts-test_pearson_euclidean": 0.8751710961819176, + "eval_sts-test_pearson_manhattan": 0.8743056704972211, + "eval_sts-test_pearson_max": 0.8751710961819176, + "eval_sts-test_spearman_cosine": 0.8770357577637844, + "eval_sts-test_spearman_dot": 0.8219956443633655, + "eval_sts-test_spearman_euclidean": 0.8735976526048674, + "eval_sts-test_spearman_manhattan": 0.8719677221624696, + "eval_sts-test_spearman_max": 0.8770357577637844, + "eval_vitaminc-pairs_loss": 3.147581100463867, + "eval_vitaminc-pairs_runtime": 3.1941, + "eval_vitaminc-pairs_samples_per_second": 40.074, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 1620 + }, + { + "epoch": 1.6666666666666665, + "eval_negation-triplets_loss": 0.9426246285438538, + "eval_negation-triplets_runtime": 0.7561, + "eval_negation-triplets_samples_per_second": 169.3, + "eval_negation-triplets_steps_per_second": 1.323, + "step": 1620 + }, + { + "epoch": 1.6666666666666665, + "eval_scitail-pairs-pos_loss": 0.15858975052833557, + "eval_scitail-pairs-pos_runtime": 0.8858, + "eval_scitail-pairs-pos_samples_per_second": 144.509, + "eval_scitail-pairs-pos_steps_per_second": 1.129, + "step": 1620 + }, + { + "epoch": 1.6666666666666665, + "eval_scitail-pairs-qa_loss": 0.0009499162551946938, + "eval_scitail-pairs-qa_runtime": 0.6053, + "eval_scitail-pairs-qa_samples_per_second": 211.474, + "eval_scitail-pairs-qa_steps_per_second": 1.652, + "step": 1620 + }, + { + "epoch": 1.6666666666666665, + "eval_xsum-pairs_loss": 0.28190702199935913, + "eval_xsum-pairs_runtime": 3.0377, + "eval_xsum-pairs_samples_per_second": 42.137, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 1620 + }, + { + "epoch": 1.6666666666666665, + "eval_sciq_pairs_loss": 0.09951034188270569, + "eval_sciq_pairs_runtime": 3.5281, + "eval_sciq_pairs_samples_per_second": 36.28, + "eval_sciq_pairs_steps_per_second": 0.283, + "step": 1620 + }, + { + "epoch": 1.6666666666666665, + "eval_qasc_pairs_loss": 0.19479617476463318, + "eval_qasc_pairs_runtime": 0.6298, + "eval_qasc_pairs_samples_per_second": 203.23, + "eval_qasc_pairs_steps_per_second": 1.588, + "step": 1620 + }, + { + "epoch": 1.6666666666666665, + "eval_openbookqa_pairs_loss": 0.8473735451698303, + "eval_openbookqa_pairs_runtime": 0.5979, + "eval_openbookqa_pairs_samples_per_second": 214.093, + "eval_openbookqa_pairs_steps_per_second": 1.673, + "step": 1620 + }, + { + "epoch": 1.6666666666666665, + "eval_msmarco_pairs_loss": 0.7654114365577698, + "eval_msmarco_pairs_runtime": 1.5282, + "eval_msmarco_pairs_samples_per_second": 83.759, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 1620 + }, + { + "epoch": 1.6666666666666665, + "eval_nq_pairs_loss": 0.7675896286964417, + "eval_nq_pairs_runtime": 2.9053, + "eval_nq_pairs_samples_per_second": 44.057, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1620 + }, + { + "epoch": 1.6666666666666665, + "eval_trivia_pairs_loss": 0.6710144281387329, + "eval_trivia_pairs_runtime": 3.4501, + "eval_trivia_pairs_samples_per_second": 37.1, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1620 + }, + { + "epoch": 1.6666666666666665, + "eval_gooaq_pairs_loss": 0.5018545389175415, + "eval_gooaq_pairs_runtime": 0.963, + "eval_gooaq_pairs_samples_per_second": 132.918, + "eval_gooaq_pairs_steps_per_second": 1.038, + "step": 1620 + }, + { + "epoch": 1.6666666666666665, + "eval_paws-pos_loss": 0.0214223675429821, + "eval_paws-pos_runtime": 0.7097, + "eval_paws-pos_samples_per_second": 180.351, + "eval_paws-pos_steps_per_second": 1.409, + "step": 1620 + }, + { + "epoch": 1.6666666666666665, + "eval_global_dataset_loss": 0.4568376839160919, + "eval_global_dataset_runtime": 13.4081, + "eval_global_dataset_samples_per_second": 31.026, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1620 + }, + { + "epoch": 1.6676954732510287, + "grad_norm": 4.740821361541748, + "learning_rate": 3.109512477521901e-05, + "loss": 0.1709, + "step": 1621 + }, + { + "epoch": 1.668724279835391, + "grad_norm": 8.163890838623047, + "learning_rate": 3.1083909519209574e-05, + "loss": 0.3081, + "step": 1622 + }, + { + "epoch": 1.6697530864197532, + "grad_norm": 5.643056869506836, + "learning_rate": 3.1072681427807386e-05, + "loss": 0.2765, + "step": 1623 + }, + { + "epoch": 1.6707818930041154, + "grad_norm": 8.1829833984375, + "learning_rate": 3.106144051960679e-05, + "loss": 0.3699, + "step": 1624 + }, + { + "epoch": 1.6718106995884774, + "grad_norm": 7.014549732208252, + "learning_rate": 3.105018681322337e-05, + "loss": 0.2915, + "step": 1625 + }, + { + "epoch": 1.6728395061728394, + "grad_norm": 5.737176895141602, + "learning_rate": 3.10389203272939e-05, + "loss": 0.2271, + "step": 1626 + }, + { + "epoch": 1.6738683127572016, + "grad_norm": 6.97775936126709, + "learning_rate": 3.1027641080476315e-05, + "loss": 0.3229, + "step": 1627 + }, + { + "epoch": 1.6748971193415638, + "grad_norm": 5.210075855255127, + "learning_rate": 3.101634909144969e-05, + "loss": 0.2214, + "step": 1628 + }, + { + "epoch": 1.675925925925926, + "grad_norm": 11.457197189331055, + "learning_rate": 3.100504437891421e-05, + "loss": 0.9589, + "step": 1629 + }, + { + "epoch": 1.676954732510288, + "grad_norm": 7.409816265106201, + "learning_rate": 3.09937269615911e-05, + "loss": 0.3692, + "step": 1630 + }, + { + "epoch": 1.6779835390946503, + "grad_norm": 5.9740986824035645, + "learning_rate": 3.098239685822265e-05, + "loss": 0.2368, + "step": 1631 + }, + { + "epoch": 1.6790123456790123, + "grad_norm": 0.5974377989768982, + "learning_rate": 3.097105408757215e-05, + "loss": 0.0143, + "step": 1632 + }, + { + "epoch": 1.6800411522633745, + "grad_norm": 12.626338005065918, + "learning_rate": 3.0959698668423876e-05, + "loss": 0.8766, + "step": 1633 + }, + { + "epoch": 1.6810699588477367, + "grad_norm": 4.880333423614502, + "learning_rate": 3.094833061958304e-05, + "loss": 0.2831, + "step": 1634 + }, + { + "epoch": 1.682098765432099, + "grad_norm": 11.46767807006836, + "learning_rate": 3.0936949959875773e-05, + "loss": 0.7252, + "step": 1635 + }, + { + "epoch": 1.683127572016461, + "grad_norm": 10.119542121887207, + "learning_rate": 3.0925556708149096e-05, + "loss": 0.6238, + "step": 1636 + }, + { + "epoch": 1.684156378600823, + "grad_norm": 9.226078987121582, + "learning_rate": 3.091415088327088e-05, + "loss": 0.4887, + "step": 1637 + }, + { + "epoch": 1.6851851851851851, + "grad_norm": 7.247396469116211, + "learning_rate": 3.090273250412981e-05, + "loss": 0.4855, + "step": 1638 + }, + { + "epoch": 1.6862139917695473, + "grad_norm": 7.731893539428711, + "learning_rate": 3.089130158963537e-05, + "loss": 0.4773, + "step": 1639 + }, + { + "epoch": 1.6872427983539096, + "grad_norm": 11.5791597366333, + "learning_rate": 3.087985815871781e-05, + "loss": 0.7261, + "step": 1640 + }, + { + "epoch": 1.6872427983539096, + "eval_Qnli-dev_cosine_accuracy": 0.7109375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7935091257095337, + "eval_Qnli-dev_cosine_ap": 0.7644906675103209, + "eval_Qnli-dev_cosine_f1": 0.7129798903107861, + "eval_Qnli-dev_cosine_f1_threshold": 0.7131432890892029, + "eval_Qnli-dev_cosine_precision": 0.6270096463022508, + "eval_Qnli-dev_cosine_recall": 0.826271186440678, + "eval_Qnli-dev_dot_accuracy": 0.68359375, + "eval_Qnli-dev_dot_accuracy_threshold": 355.2696228027344, + "eval_Qnli-dev_dot_ap": 0.7203838911649946, + "eval_Qnli-dev_dot_f1": 0.6779089376053963, + "eval_Qnli-dev_dot_f1_threshold": 305.30413818359375, + "eval_Qnli-dev_dot_precision": 0.5630252100840336, + "eval_Qnli-dev_dot_recall": 0.8516949152542372, + "eval_Qnli-dev_euclidean_accuracy": 0.716796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.860023498535156, + "eval_Qnli-dev_euclidean_ap": 0.7704725006394498, + "eval_Qnli-dev_euclidean_f1": 0.7111111111111111, + "eval_Qnli-dev_euclidean_f1_threshold": 16.034788131713867, + "eval_Qnli-dev_euclidean_precision": 0.631578947368421, + "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, + "eval_Qnli-dev_manhattan_accuracy": 0.71875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 298.86627197265625, + "eval_Qnli-dev_manhattan_ap": 0.7720556864200312, + "eval_Qnli-dev_manhattan_f1": 0.7204502814258913, + "eval_Qnli-dev_manhattan_f1_threshold": 334.255615234375, + "eval_Qnli-dev_manhattan_precision": 0.6464646464646465, + "eval_Qnli-dev_manhattan_recall": 0.8135593220338984, + "eval_Qnli-dev_max_accuracy": 0.71875, + "eval_Qnli-dev_max_accuracy_threshold": 355.2696228027344, + "eval_Qnli-dev_max_ap": 0.7720556864200312, + "eval_Qnli-dev_max_f1": 0.7204502814258913, + "eval_Qnli-dev_max_f1_threshold": 334.255615234375, + "eval_Qnli-dev_max_precision": 0.6464646464646465, + "eval_Qnli-dev_max_recall": 0.8516949152542372, + "eval_allNLI-dev_cosine_accuracy": 0.724609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8340206146240234, + "eval_allNLI-dev_cosine_ap": 0.6154324502870555, + "eval_allNLI-dev_cosine_f1": 0.6157303370786517, + "eval_allNLI-dev_cosine_f1_threshold": 0.7429921627044678, + "eval_allNLI-dev_cosine_precision": 0.5036764705882353, + "eval_allNLI-dev_cosine_recall": 0.791907514450867, + "eval_allNLI-dev_dot_accuracy": 0.69921875, + "eval_allNLI-dev_dot_accuracy_threshold": 372.3382873535156, + "eval_allNLI-dev_dot_ap": 0.5565432560970462, + "eval_allNLI-dev_dot_f1": 0.5910165484633569, + "eval_allNLI-dev_dot_f1_threshold": 336.52593994140625, + "eval_allNLI-dev_dot_precision": 0.5, + "eval_allNLI-dev_dot_recall": 0.7225433526011561, + "eval_allNLI-dev_euclidean_accuracy": 0.73828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.632896423339844, + "eval_allNLI-dev_euclidean_ap": 0.6216372789356083, + "eval_allNLI-dev_euclidean_f1": 0.6266666666666667, + "eval_allNLI-dev_euclidean_f1_threshold": 15.324407577514648, + "eval_allNLI-dev_euclidean_precision": 0.5090252707581228, + "eval_allNLI-dev_euclidean_recall": 0.815028901734104, + "eval_allNLI-dev_manhattan_accuracy": 0.73046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 260.36358642578125, + "eval_allNLI-dev_manhattan_ap": 0.6201380486598296, + "eval_allNLI-dev_manhattan_f1": 0.6284403669724771, + "eval_allNLI-dev_manhattan_f1_threshold": 315.0744934082031, + "eval_allNLI-dev_manhattan_precision": 0.5209125475285171, + "eval_allNLI-dev_manhattan_recall": 0.791907514450867, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 372.3382873535156, + "eval_allNLI-dev_max_ap": 0.6216372789356083, + "eval_allNLI-dev_max_f1": 0.6284403669724771, + "eval_allNLI-dev_max_f1_threshold": 336.52593994140625, + "eval_allNLI-dev_max_precision": 0.5209125475285171, + "eval_allNLI-dev_max_recall": 0.815028901734104, + "eval_sequential_score": 0.7720556864200312, + "eval_sts-test_pearson_cosine": 0.8480708109327747, + "eval_sts-test_pearson_dot": 0.8351161910233019, + "eval_sts-test_pearson_euclidean": 0.8779361059678183, + "eval_sts-test_pearson_manhattan": 0.8765214608457297, + "eval_sts-test_pearson_max": 0.8779361059678183, + "eval_sts-test_spearman_cosine": 0.8782235904948963, + "eval_sts-test_spearman_dot": 0.83017832300626, + "eval_sts-test_spearman_euclidean": 0.8748063394237875, + "eval_sts-test_spearman_manhattan": 0.8733159434144085, + "eval_sts-test_spearman_max": 0.8782235904948963, + "eval_vitaminc-pairs_loss": 3.1627960205078125, + "eval_vitaminc-pairs_runtime": 3.2, + "eval_vitaminc-pairs_samples_per_second": 40.0, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 1640 + }, + { + "epoch": 1.6872427983539096, + "eval_negation-triplets_loss": 0.9496148824691772, + "eval_negation-triplets_runtime": 0.7615, + "eval_negation-triplets_samples_per_second": 168.09, + "eval_negation-triplets_steps_per_second": 1.313, + "step": 1640 + }, + { + "epoch": 1.6872427983539096, + "eval_scitail-pairs-pos_loss": 0.1690126657485962, + "eval_scitail-pairs-pos_runtime": 0.8832, + "eval_scitail-pairs-pos_samples_per_second": 144.927, + "eval_scitail-pairs-pos_steps_per_second": 1.132, + "step": 1640 + }, + { + "epoch": 1.6872427983539096, + "eval_scitail-pairs-qa_loss": 0.0007716402760706842, + "eval_scitail-pairs-qa_runtime": 0.6012, + "eval_scitail-pairs-qa_samples_per_second": 212.92, + "eval_scitail-pairs-qa_steps_per_second": 1.663, + "step": 1640 + }, + { + "epoch": 1.6872427983539096, + "eval_xsum-pairs_loss": 0.29522186517715454, + "eval_xsum-pairs_runtime": 3.0305, + "eval_xsum-pairs_samples_per_second": 42.238, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1640 + }, + { + "epoch": 1.6872427983539096, + "eval_sciq_pairs_loss": 0.09298048168420792, + "eval_sciq_pairs_runtime": 3.5126, + "eval_sciq_pairs_samples_per_second": 36.441, + "eval_sciq_pairs_steps_per_second": 0.285, + "step": 1640 + }, + { + "epoch": 1.6872427983539096, + "eval_qasc_pairs_loss": 0.1856236308813095, + "eval_qasc_pairs_runtime": 0.6208, + "eval_qasc_pairs_samples_per_second": 206.172, + "eval_qasc_pairs_steps_per_second": 1.611, + "step": 1640 + }, + { + "epoch": 1.6872427983539096, + "eval_openbookqa_pairs_loss": 0.8703394532203674, + "eval_openbookqa_pairs_runtime": 0.5949, + "eval_openbookqa_pairs_samples_per_second": 215.164, + "eval_openbookqa_pairs_steps_per_second": 1.681, + "step": 1640 + }, + { + "epoch": 1.6872427983539096, + "eval_msmarco_pairs_loss": 0.8120965361595154, + "eval_msmarco_pairs_runtime": 1.5191, + "eval_msmarco_pairs_samples_per_second": 84.26, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 1640 + }, + { + "epoch": 1.6872427983539096, + "eval_nq_pairs_loss": 0.7762519121170044, + "eval_nq_pairs_runtime": 2.9038, + "eval_nq_pairs_samples_per_second": 44.08, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1640 + }, + { + "epoch": 1.6872427983539096, + "eval_trivia_pairs_loss": 0.6840327382087708, + "eval_trivia_pairs_runtime": 3.4573, + "eval_trivia_pairs_samples_per_second": 37.023, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 1640 + }, + { + "epoch": 1.6872427983539096, + "eval_gooaq_pairs_loss": 0.489113450050354, + "eval_gooaq_pairs_runtime": 0.9594, + "eval_gooaq_pairs_samples_per_second": 133.414, + "eval_gooaq_pairs_steps_per_second": 1.042, + "step": 1640 + }, + { + "epoch": 1.6872427983539096, + "eval_paws-pos_loss": 0.021868284791707993, + "eval_paws-pos_runtime": 0.7078, + "eval_paws-pos_samples_per_second": 180.833, + "eval_paws-pos_steps_per_second": 1.413, + "step": 1640 + }, + { + "epoch": 1.6872427983539096, + "eval_global_dataset_loss": 0.457489550113678, + "eval_global_dataset_runtime": 13.4378, + "eval_global_dataset_samples_per_second": 30.958, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1640 + }, + { + "epoch": 1.6882716049382716, + "grad_norm": 5.879720687866211, + "learning_rate": 3.0868402230328075e-05, + "loss": 0.205, + "step": 1641 + }, + { + "epoch": 1.6893004115226338, + "grad_norm": 7.6843342781066895, + "learning_rate": 3.085693382343786e-05, + "loss": 0.3409, + "step": 1642 + }, + { + "epoch": 1.6903292181069958, + "grad_norm": 6.418579578399658, + "learning_rate": 3.084545295703948e-05, + "loss": 0.247, + "step": 1643 + }, + { + "epoch": 1.691358024691358, + "grad_norm": 6.323958873748779, + "learning_rate": 3.08339596501459e-05, + "loss": 0.2139, + "step": 1644 + }, + { + "epoch": 1.6923868312757202, + "grad_norm": 7.00066614151001, + "learning_rate": 3.0822453921790696e-05, + "loss": 0.5987, + "step": 1645 + }, + { + "epoch": 1.6934156378600824, + "grad_norm": 0.08560808002948761, + "learning_rate": 3.081093579102799e-05, + "loss": 0.0012, + "step": 1646 + }, + { + "epoch": 1.6944444444444444, + "grad_norm": 6.805712699890137, + "learning_rate": 3.079940527693247e-05, + "loss": 0.3878, + "step": 1647 + }, + { + "epoch": 1.6954732510288066, + "grad_norm": 5.735049247741699, + "learning_rate": 3.078786239859931e-05, + "loss": 0.1863, + "step": 1648 + }, + { + "epoch": 1.6965020576131686, + "grad_norm": 7.262701511383057, + "learning_rate": 3.0776307175144185e-05, + "loss": 0.4896, + "step": 1649 + }, + { + "epoch": 1.6975308641975309, + "grad_norm": 10.45705509185791, + "learning_rate": 3.076473962570319e-05, + "loss": 0.79, + "step": 1650 + }, + { + "epoch": 1.698559670781893, + "grad_norm": 6.432678699493408, + "learning_rate": 3.075315976943284e-05, + "loss": 0.2959, + "step": 1651 + }, + { + "epoch": 1.6995884773662553, + "grad_norm": 6.148375034332275, + "learning_rate": 3.0741567625510034e-05, + "loss": 0.2332, + "step": 1652 + }, + { + "epoch": 1.7006172839506173, + "grad_norm": 1.0993452072143555, + "learning_rate": 3.0729963213132013e-05, + "loss": 0.057, + "step": 1653 + }, + { + "epoch": 1.7016460905349793, + "grad_norm": 11.179821968078613, + "learning_rate": 3.071834655151635e-05, + "loss": 0.5936, + "step": 1654 + }, + { + "epoch": 1.7026748971193415, + "grad_norm": 14.889330863952637, + "learning_rate": 3.070671765990089e-05, + "loss": 1.9773, + "step": 1655 + }, + { + "epoch": 1.7037037037037037, + "grad_norm": 8.183863639831543, + "learning_rate": 3.0695076557543735e-05, + "loss": 0.4096, + "step": 1656 + }, + { + "epoch": 1.704732510288066, + "grad_norm": 5.217211723327637, + "learning_rate": 3.068342326372321e-05, + "loss": 0.2046, + "step": 1657 + }, + { + "epoch": 1.705761316872428, + "grad_norm": 14.158547401428223, + "learning_rate": 3.067175779773783e-05, + "loss": 1.8898, + "step": 1658 + }, + { + "epoch": 1.7067901234567902, + "grad_norm": 11.467806816101074, + "learning_rate": 3.066008017890626e-05, + "loss": 0.8136, + "step": 1659 + }, + { + "epoch": 1.7078189300411522, + "grad_norm": 4.205172538757324, + "learning_rate": 3.0648390426567306e-05, + "loss": 0.1098, + "step": 1660 + }, + { + "epoch": 1.7078189300411522, + "eval_Qnli-dev_cosine_accuracy": 0.69921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7641236782073975, + "eval_Qnli-dev_cosine_ap": 0.7543947094620718, + "eval_Qnli-dev_cosine_f1": 0.6967509025270758, + "eval_Qnli-dev_cosine_f1_threshold": 0.7194709777832031, + "eval_Qnli-dev_cosine_precision": 0.6069182389937107, + "eval_Qnli-dev_cosine_recall": 0.8177966101694916, + "eval_Qnli-dev_dot_accuracy": 0.671875, + "eval_Qnli-dev_dot_accuracy_threshold": 346.1076965332031, + "eval_Qnli-dev_dot_ap": 0.6915011008394673, + "eval_Qnli-dev_dot_f1": 0.6761006289308176, + "eval_Qnli-dev_dot_f1_threshold": 295.66900634765625, + "eval_Qnli-dev_dot_precision": 0.5375, + "eval_Qnli-dev_dot_recall": 0.9110169491525424, + "eval_Qnli-dev_euclidean_accuracy": 0.71484375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.609542846679688, + "eval_Qnli-dev_euclidean_ap": 0.7639767594998113, + "eval_Qnli-dev_euclidean_f1": 0.7042253521126761, + "eval_Qnli-dev_euclidean_f1_threshold": 16.083669662475586, + "eval_Qnli-dev_euclidean_precision": 0.6024096385542169, + "eval_Qnli-dev_euclidean_recall": 0.847457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.70703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 275.1696472167969, + "eval_Qnli-dev_manhattan_ap": 0.7667941208454679, + "eval_Qnli-dev_manhattan_f1": 0.717391304347826, + "eval_Qnli-dev_manhattan_f1_threshold": 331.35174560546875, + "eval_Qnli-dev_manhattan_precision": 0.6265822784810127, + "eval_Qnli-dev_manhattan_recall": 0.8389830508474576, + "eval_Qnli-dev_max_accuracy": 0.71484375, + "eval_Qnli-dev_max_accuracy_threshold": 346.1076965332031, + "eval_Qnli-dev_max_ap": 0.7667941208454679, + "eval_Qnli-dev_max_f1": 0.717391304347826, + "eval_Qnli-dev_max_f1_threshold": 331.35174560546875, + "eval_Qnli-dev_max_precision": 0.6265822784810127, + "eval_Qnli-dev_max_recall": 0.9110169491525424, + "eval_allNLI-dev_cosine_accuracy": 0.7265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8820390701293945, + "eval_allNLI-dev_cosine_ap": 0.6219945288269941, + "eval_allNLI-dev_cosine_f1": 0.6194331983805668, + "eval_allNLI-dev_cosine_f1_threshold": 0.7163412570953369, + "eval_allNLI-dev_cosine_precision": 0.4766355140186916, + "eval_allNLI-dev_cosine_recall": 0.884393063583815, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 375.21624755859375, + "eval_allNLI-dev_dot_ap": 0.5617613183656214, + "eval_allNLI-dev_dot_f1": 0.5970149253731343, + "eval_allNLI-dev_dot_f1_threshold": 319.013671875, + "eval_allNLI-dev_dot_precision": 0.47297297297297297, + "eval_allNLI-dev_dot_recall": 0.8092485549132948, + "eval_allNLI-dev_euclidean_accuracy": 0.740234375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.767152786254883, + "eval_allNLI-dev_euclidean_ap": 0.6298058452930776, + "eval_allNLI-dev_euclidean_f1": 0.6252676659528908, + "eval_allNLI-dev_euclidean_f1_threshold": 15.217535018920898, + "eval_allNLI-dev_euclidean_precision": 0.4965986394557823, + "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, + "eval_allNLI-dev_manhattan_accuracy": 0.734375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 268.7918701171875, + "eval_allNLI-dev_manhattan_ap": 0.6292564669901771, + "eval_allNLI-dev_manhattan_f1": 0.6310904872389791, + "eval_allNLI-dev_manhattan_f1_threshold": 309.2000732421875, + "eval_allNLI-dev_manhattan_precision": 0.5271317829457365, + "eval_allNLI-dev_manhattan_recall": 0.7861271676300579, + "eval_allNLI-dev_max_accuracy": 0.740234375, + "eval_allNLI-dev_max_accuracy_threshold": 375.21624755859375, + "eval_allNLI-dev_max_ap": 0.6298058452930776, + "eval_allNLI-dev_max_f1": 0.6310904872389791, + "eval_allNLI-dev_max_f1_threshold": 319.013671875, + "eval_allNLI-dev_max_precision": 0.5271317829457365, + "eval_allNLI-dev_max_recall": 0.884393063583815, + "eval_sequential_score": 0.7667941208454679, + "eval_sts-test_pearson_cosine": 0.8441353490634314, + "eval_sts-test_pearson_dot": 0.834811441618899, + "eval_sts-test_pearson_euclidean": 0.8741484768562252, + "eval_sts-test_pearson_manhattan": 0.8719965858186344, + "eval_sts-test_pearson_max": 0.8741484768562252, + "eval_sts-test_spearman_cosine": 0.8769982562067383, + "eval_sts-test_spearman_dot": 0.8341864697327808, + "eval_sts-test_spearman_euclidean": 0.873439143637019, + "eval_sts-test_spearman_manhattan": 0.8708601284430326, + "eval_sts-test_spearman_max": 0.8769982562067383, + "eval_vitaminc-pairs_loss": 2.9152801036834717, + "eval_vitaminc-pairs_runtime": 3.2102, + "eval_vitaminc-pairs_samples_per_second": 39.873, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 1660 + }, + { + "epoch": 1.7078189300411522, + "eval_negation-triplets_loss": 0.98664790391922, + "eval_negation-triplets_runtime": 0.7545, + "eval_negation-triplets_samples_per_second": 169.658, + "eval_negation-triplets_steps_per_second": 1.325, + "step": 1660 + }, + { + "epoch": 1.7078189300411522, + "eval_scitail-pairs-pos_loss": 0.17049828171730042, + "eval_scitail-pairs-pos_runtime": 0.8776, + "eval_scitail-pairs-pos_samples_per_second": 145.859, + "eval_scitail-pairs-pos_steps_per_second": 1.14, + "step": 1660 + }, + { + "epoch": 1.7078189300411522, + "eval_scitail-pairs-qa_loss": 0.0007929063285700977, + "eval_scitail-pairs-qa_runtime": 0.5963, + "eval_scitail-pairs-qa_samples_per_second": 214.658, + "eval_scitail-pairs-qa_steps_per_second": 1.677, + "step": 1660 + }, + { + "epoch": 1.7078189300411522, + "eval_xsum-pairs_loss": 0.30998116731643677, + "eval_xsum-pairs_runtime": 3.0434, + "eval_xsum-pairs_samples_per_second": 42.058, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 1660 + }, + { + "epoch": 1.7078189300411522, + "eval_sciq_pairs_loss": 0.088392473757267, + "eval_sciq_pairs_runtime": 3.5172, + "eval_sciq_pairs_samples_per_second": 36.392, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 1660 + }, + { + "epoch": 1.7078189300411522, + "eval_qasc_pairs_loss": 0.18838748335838318, + "eval_qasc_pairs_runtime": 0.6126, + "eval_qasc_pairs_samples_per_second": 208.935, + "eval_qasc_pairs_steps_per_second": 1.632, + "step": 1660 + }, + { + "epoch": 1.7078189300411522, + "eval_openbookqa_pairs_loss": 0.9197407960891724, + "eval_openbookqa_pairs_runtime": 0.5926, + "eval_openbookqa_pairs_samples_per_second": 215.986, + "eval_openbookqa_pairs_steps_per_second": 1.687, + "step": 1660 + }, + { + "epoch": 1.7078189300411522, + "eval_msmarco_pairs_loss": 0.8759288191795349, + "eval_msmarco_pairs_runtime": 1.5263, + "eval_msmarco_pairs_samples_per_second": 83.861, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 1660 + }, + { + "epoch": 1.7078189300411522, + "eval_nq_pairs_loss": 0.6125518083572388, + "eval_nq_pairs_runtime": 2.9034, + "eval_nq_pairs_samples_per_second": 44.087, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1660 + }, + { + "epoch": 1.7078189300411522, + "eval_trivia_pairs_loss": 0.8040640354156494, + "eval_trivia_pairs_runtime": 3.4397, + "eval_trivia_pairs_samples_per_second": 37.212, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1660 + }, + { + "epoch": 1.7078189300411522, + "eval_gooaq_pairs_loss": 0.46305325627326965, + "eval_gooaq_pairs_runtime": 0.9574, + "eval_gooaq_pairs_samples_per_second": 133.691, + "eval_gooaq_pairs_steps_per_second": 1.044, + "step": 1660 + }, + { + "epoch": 1.7078189300411522, + "eval_paws-pos_loss": 0.023760538548231125, + "eval_paws-pos_runtime": 0.7042, + "eval_paws-pos_samples_per_second": 181.772, + "eval_paws-pos_steps_per_second": 1.42, + "step": 1660 + }, + { + "epoch": 1.7078189300411522, + "eval_global_dataset_loss": 0.46334582567214966, + "eval_global_dataset_runtime": 13.3948, + "eval_global_dataset_samples_per_second": 31.057, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1660 + }, + { + "epoch": 1.7088477366255144, + "grad_norm": 3.219351053237915, + "learning_rate": 3.063668856007985e-05, + "loss": 0.1057, + "step": 1661 + }, + { + "epoch": 1.7098765432098766, + "grad_norm": 5.379256725311279, + "learning_rate": 3.062497459882286e-05, + "loss": 0.1945, + "step": 1662 + }, + { + "epoch": 1.7109053497942388, + "grad_norm": 4.648935794830322, + "learning_rate": 3.061324856219531e-05, + "loss": 0.2146, + "step": 1663 + }, + { + "epoch": 1.7119341563786008, + "grad_norm": 16.28590965270996, + "learning_rate": 3.060151046961617e-05, + "loss": 2.6085, + "step": 1664 + }, + { + "epoch": 1.7129629629629628, + "grad_norm": 5.0320725440979, + "learning_rate": 3.05897603405244e-05, + "loss": 0.1174, + "step": 1665 + }, + { + "epoch": 1.713991769547325, + "grad_norm": 11.108006477355957, + "learning_rate": 3.057799819437889e-05, + "loss": 0.7045, + "step": 1666 + }, + { + "epoch": 1.7150205761316872, + "grad_norm": 5.2421650886535645, + "learning_rate": 3.056622405065839e-05, + "loss": 0.1569, + "step": 1667 + }, + { + "epoch": 1.7160493827160495, + "grad_norm": 8.442304611206055, + "learning_rate": 3.055443792886156e-05, + "loss": 0.5327, + "step": 1668 + }, + { + "epoch": 1.7170781893004117, + "grad_norm": 4.792802333831787, + "learning_rate": 3.0542639848506894e-05, + "loss": 0.1637, + "step": 1669 + }, + { + "epoch": 1.7181069958847737, + "grad_norm": 0.7857884168624878, + "learning_rate": 3.0530829829132676e-05, + "loss": 0.0073, + "step": 1670 + }, + { + "epoch": 1.7191358024691357, + "grad_norm": 1.1031395196914673, + "learning_rate": 3.051900789029696e-05, + "loss": 0.0214, + "step": 1671 + }, + { + "epoch": 1.7201646090534979, + "grad_norm": 5.800541400909424, + "learning_rate": 3.050717405157755e-05, + "loss": 0.3875, + "step": 1672 + }, + { + "epoch": 1.72119341563786, + "grad_norm": 5.825467586517334, + "learning_rate": 3.049532833257195e-05, + "loss": 0.2437, + "step": 1673 + }, + { + "epoch": 1.7222222222222223, + "grad_norm": 8.15721321105957, + "learning_rate": 3.048347075289735e-05, + "loss": 0.5094, + "step": 1674 + }, + { + "epoch": 1.7232510288065843, + "grad_norm": 8.726066589355469, + "learning_rate": 3.047160133219056e-05, + "loss": 0.5376, + "step": 1675 + }, + { + "epoch": 1.7242798353909465, + "grad_norm": 5.205786228179932, + "learning_rate": 3.0459720090108023e-05, + "loss": 0.1742, + "step": 1676 + }, + { + "epoch": 1.7253086419753085, + "grad_norm": 6.896229267120361, + "learning_rate": 3.0447827046325737e-05, + "loss": 0.2372, + "step": 1677 + }, + { + "epoch": 1.7263374485596708, + "grad_norm": 4.733861923217773, + "learning_rate": 3.0435922220539273e-05, + "loss": 0.1288, + "step": 1678 + }, + { + "epoch": 1.727366255144033, + "grad_norm": 7.039435863494873, + "learning_rate": 3.042400563246368e-05, + "loss": 0.2187, + "step": 1679 + }, + { + "epoch": 1.7283950617283952, + "grad_norm": 0.8268962502479553, + "learning_rate": 3.041207730183351e-05, + "loss": 0.0119, + "step": 1680 + }, + { + "epoch": 1.7283950617283952, + "eval_Qnli-dev_cosine_accuracy": 0.73046875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7552091479301453, + "eval_Qnli-dev_cosine_ap": 0.764224023273828, + "eval_Qnli-dev_cosine_f1": 0.709278350515464, + "eval_Qnli-dev_cosine_f1_threshold": 0.7447389364242554, + "eval_Qnli-dev_cosine_precision": 0.6907630522088354, + "eval_Qnli-dev_cosine_recall": 0.7288135593220338, + "eval_Qnli-dev_dot_accuracy": 0.6875, + "eval_Qnli-dev_dot_accuracy_threshold": 350.8480224609375, + "eval_Qnli-dev_dot_ap": 0.7128966552619984, + "eval_Qnli-dev_dot_f1": 0.6815834767641997, + "eval_Qnli-dev_dot_f1_threshold": 300.3233947753906, + "eval_Qnli-dev_dot_precision": 0.5739130434782609, + "eval_Qnli-dev_dot_recall": 0.8389830508474576, + "eval_Qnli-dev_euclidean_accuracy": 0.732421875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.173933982849121, + "eval_Qnli-dev_euclidean_ap": 0.7714429636703933, + "eval_Qnli-dev_euclidean_f1": 0.719191919191919, + "eval_Qnli-dev_euclidean_f1_threshold": 15.535265922546387, + "eval_Qnli-dev_euclidean_precision": 0.6872586872586872, + "eval_Qnli-dev_euclidean_recall": 0.7542372881355932, + "eval_Qnli-dev_manhattan_accuracy": 0.73046875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 314.58941650390625, + "eval_Qnli-dev_manhattan_ap": 0.7748141826693925, + "eval_Qnli-dev_manhattan_f1": 0.7241379310344829, + "eval_Qnli-dev_manhattan_f1_threshold": 334.8643493652344, + "eval_Qnli-dev_manhattan_precision": 0.6608391608391608, + "eval_Qnli-dev_manhattan_recall": 0.8008474576271186, + "eval_Qnli-dev_max_accuracy": 0.732421875, + "eval_Qnli-dev_max_accuracy_threshold": 350.8480224609375, + "eval_Qnli-dev_max_ap": 0.7748141826693925, + "eval_Qnli-dev_max_f1": 0.7241379310344829, + "eval_Qnli-dev_max_f1_threshold": 334.8643493652344, + "eval_Qnli-dev_max_precision": 0.6907630522088354, + "eval_Qnli-dev_max_recall": 0.8389830508474576, + "eval_allNLI-dev_cosine_accuracy": 0.724609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8655421137809753, + "eval_allNLI-dev_cosine_ap": 0.6199057502272656, + "eval_allNLI-dev_cosine_f1": 0.6196581196581197, + "eval_allNLI-dev_cosine_f1_threshold": 0.7334986329078674, + "eval_allNLI-dev_cosine_precision": 0.4915254237288136, + "eval_allNLI-dev_cosine_recall": 0.838150289017341, + "eval_allNLI-dev_dot_accuracy": 0.697265625, + "eval_allNLI-dev_dot_accuracy_threshold": 431.8148193359375, + "eval_allNLI-dev_dot_ap": 0.5653917075856449, + "eval_allNLI-dev_dot_f1": 0.5905511811023622, + "eval_allNLI-dev_dot_f1_threshold": 312.14215087890625, + "eval_allNLI-dev_dot_precision": 0.44776119402985076, + "eval_allNLI-dev_dot_recall": 0.8670520231213873, + "eval_allNLI-dev_euclidean_accuracy": 0.728515625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.644176483154297, + "eval_allNLI-dev_euclidean_ap": 0.6270448962719375, + "eval_allNLI-dev_euclidean_f1": 0.6313559322033898, + "eval_allNLI-dev_euclidean_f1_threshold": 15.444341659545898, + "eval_allNLI-dev_euclidean_precision": 0.4983277591973244, + "eval_allNLI-dev_euclidean_recall": 0.861271676300578, + "eval_allNLI-dev_manhattan_accuracy": 0.734375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 266.3878479003906, + "eval_allNLI-dev_manhattan_ap": 0.6278081776272781, + "eval_allNLI-dev_manhattan_f1": 0.6272912423625254, + "eval_allNLI-dev_manhattan_f1_threshold": 334.9619140625, + "eval_allNLI-dev_manhattan_precision": 0.48427672955974843, + "eval_allNLI-dev_manhattan_recall": 0.8901734104046243, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 431.8148193359375, + "eval_allNLI-dev_max_ap": 0.6278081776272781, + "eval_allNLI-dev_max_f1": 0.6313559322033898, + "eval_allNLI-dev_max_f1_threshold": 334.9619140625, + "eval_allNLI-dev_max_precision": 0.4983277591973244, + "eval_allNLI-dev_max_recall": 0.8901734104046243, + "eval_sequential_score": 0.7748141826693925, + "eval_sts-test_pearson_cosine": 0.8387821231954086, + "eval_sts-test_pearson_dot": 0.8194059778660282, + "eval_sts-test_pearson_euclidean": 0.8701197886154768, + "eval_sts-test_pearson_manhattan": 0.8688087659481534, + "eval_sts-test_pearson_max": 0.8701197886154768, + "eval_sts-test_spearman_cosine": 0.8716132921722499, + "eval_sts-test_spearman_dot": 0.8160590762809665, + "eval_sts-test_spearman_euclidean": 0.8687709605562364, + "eval_sts-test_spearman_manhattan": 0.8678245608803312, + "eval_sts-test_spearman_max": 0.8716132921722499, + "eval_vitaminc-pairs_loss": 3.0897140502929688, + "eval_vitaminc-pairs_runtime": 3.1984, + "eval_vitaminc-pairs_samples_per_second": 40.021, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 1680 + }, + { + "epoch": 1.7283950617283952, + "eval_negation-triplets_loss": 0.9643632173538208, + "eval_negation-triplets_runtime": 0.7595, + "eval_negation-triplets_samples_per_second": 168.523, + "eval_negation-triplets_steps_per_second": 1.317, + "step": 1680 + }, + { + "epoch": 1.7283950617283952, + "eval_scitail-pairs-pos_loss": 0.16198822855949402, + "eval_scitail-pairs-pos_runtime": 0.8802, + "eval_scitail-pairs-pos_samples_per_second": 145.415, + "eval_scitail-pairs-pos_steps_per_second": 1.136, + "step": 1680 + }, + { + "epoch": 1.7283950617283952, + "eval_scitail-pairs-qa_loss": 0.000824337883386761, + "eval_scitail-pairs-qa_runtime": 0.5936, + "eval_scitail-pairs-qa_samples_per_second": 215.644, + "eval_scitail-pairs-qa_steps_per_second": 1.685, + "step": 1680 + }, + { + "epoch": 1.7283950617283952, + "eval_xsum-pairs_loss": 0.2713284194469452, + "eval_xsum-pairs_runtime": 3.0341, + "eval_xsum-pairs_samples_per_second": 42.187, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1680 + }, + { + "epoch": 1.7283950617283952, + "eval_sciq_pairs_loss": 0.09531116485595703, + "eval_sciq_pairs_runtime": 3.531, + "eval_sciq_pairs_samples_per_second": 36.25, + "eval_sciq_pairs_steps_per_second": 0.283, + "step": 1680 + }, + { + "epoch": 1.7283950617283952, + "eval_qasc_pairs_loss": 0.177913635969162, + "eval_qasc_pairs_runtime": 0.6155, + "eval_qasc_pairs_samples_per_second": 207.962, + "eval_qasc_pairs_steps_per_second": 1.625, + "step": 1680 + }, + { + "epoch": 1.7283950617283952, + "eval_openbookqa_pairs_loss": 0.8163385987281799, + "eval_openbookqa_pairs_runtime": 0.5932, + "eval_openbookqa_pairs_samples_per_second": 215.767, + "eval_openbookqa_pairs_steps_per_second": 1.686, + "step": 1680 + }, + { + "epoch": 1.7283950617283952, + "eval_msmarco_pairs_loss": 0.8340157866477966, + "eval_msmarco_pairs_runtime": 1.5236, + "eval_msmarco_pairs_samples_per_second": 84.012, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 1680 + }, + { + "epoch": 1.7283950617283952, + "eval_nq_pairs_loss": 0.7388545274734497, + "eval_nq_pairs_runtime": 2.9067, + "eval_nq_pairs_samples_per_second": 44.037, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1680 + }, + { + "epoch": 1.7283950617283952, + "eval_trivia_pairs_loss": 0.7573897838592529, + "eval_trivia_pairs_runtime": 3.4382, + "eval_trivia_pairs_samples_per_second": 37.228, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1680 + }, + { + "epoch": 1.7283950617283952, + "eval_gooaq_pairs_loss": 0.47093483805656433, + "eval_gooaq_pairs_runtime": 0.9576, + "eval_gooaq_pairs_samples_per_second": 133.674, + "eval_gooaq_pairs_steps_per_second": 1.044, + "step": 1680 + }, + { + "epoch": 1.7283950617283952, + "eval_paws-pos_loss": 0.02307811565697193, + "eval_paws-pos_runtime": 0.698, + "eval_paws-pos_samples_per_second": 183.381, + "eval_paws-pos_steps_per_second": 1.433, + "step": 1680 + }, + { + "epoch": 1.7283950617283952, + "eval_global_dataset_loss": 0.490926593542099, + "eval_global_dataset_runtime": 13.4017, + "eval_global_dataset_samples_per_second": 31.041, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1680 + }, + { + "epoch": 1.7294238683127572, + "grad_norm": 11.065298080444336, + "learning_rate": 3.040013724840275e-05, + "loss": 0.8887, + "step": 1681 + }, + { + "epoch": 1.7304526748971192, + "grad_norm": 0.5682714581489563, + "learning_rate": 3.0388185491944807e-05, + "loss": 0.0087, + "step": 1682 + }, + { + "epoch": 1.7314814814814814, + "grad_norm": 0.45284152030944824, + "learning_rate": 3.0376222052252462e-05, + "loss": 0.0037, + "step": 1683 + }, + { + "epoch": 1.7325102880658436, + "grad_norm": 5.418968677520752, + "learning_rate": 3.0364246949137852e-05, + "loss": 0.1736, + "step": 1684 + }, + { + "epoch": 1.7335390946502058, + "grad_norm": 5.002779006958008, + "learning_rate": 3.035226020243243e-05, + "loss": 0.1277, + "step": 1685 + }, + { + "epoch": 1.734567901234568, + "grad_norm": 5.912565231323242, + "learning_rate": 3.0340261831986916e-05, + "loss": 0.1518, + "step": 1686 + }, + { + "epoch": 1.73559670781893, + "grad_norm": 15.636818885803223, + "learning_rate": 3.032825185767129e-05, + "loss": 2.463, + "step": 1687 + }, + { + "epoch": 1.736625514403292, + "grad_norm": 0.1352023035287857, + "learning_rate": 3.0316230299374764e-05, + "loss": 0.0016, + "step": 1688 + }, + { + "epoch": 1.7376543209876543, + "grad_norm": 4.616379261016846, + "learning_rate": 3.0304197177005704e-05, + "loss": 0.0972, + "step": 1689 + }, + { + "epoch": 1.7386831275720165, + "grad_norm": 10.943326950073242, + "learning_rate": 3.029215251049165e-05, + "loss": 0.7593, + "step": 1690 + }, + { + "epoch": 1.7397119341563787, + "grad_norm": 5.806167125701904, + "learning_rate": 3.0280096319779248e-05, + "loss": 0.2645, + "step": 1691 + }, + { + "epoch": 1.7407407407407407, + "grad_norm": 11.589315414428711, + "learning_rate": 3.026802862483423e-05, + "loss": 0.7249, + "step": 1692 + }, + { + "epoch": 1.741769547325103, + "grad_norm": 6.475594520568848, + "learning_rate": 3.0255949445641384e-05, + "loss": 0.2953, + "step": 1693 + }, + { + "epoch": 1.742798353909465, + "grad_norm": 0.2328501045703888, + "learning_rate": 3.0243858802204513e-05, + "loss": 0.0028, + "step": 1694 + }, + { + "epoch": 1.7438271604938271, + "grad_norm": 9.56154727935791, + "learning_rate": 3.0231756714546414e-05, + "loss": 0.5131, + "step": 1695 + }, + { + "epoch": 1.7448559670781894, + "grad_norm": 4.136692523956299, + "learning_rate": 3.021964320270882e-05, + "loss": 0.2149, + "step": 1696 + }, + { + "epoch": 1.7458847736625516, + "grad_norm": 5.113592147827148, + "learning_rate": 3.0207518286752394e-05, + "loss": 0.1498, + "step": 1697 + }, + { + "epoch": 1.7469135802469136, + "grad_norm": 8.567078590393066, + "learning_rate": 3.019538198675669e-05, + "loss": 0.5195, + "step": 1698 + }, + { + "epoch": 1.7479423868312756, + "grad_norm": 8.085282325744629, + "learning_rate": 3.0183234322820095e-05, + "loss": 0.3056, + "step": 1699 + }, + { + "epoch": 1.7489711934156378, + "grad_norm": 11.967960357666016, + "learning_rate": 3.017107531505984e-05, + "loss": 0.963, + "step": 1700 + }, + { + "epoch": 1.7489711934156378, + "eval_Qnli-dev_cosine_accuracy": 0.708984375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7604169845581055, + "eval_Qnli-dev_cosine_ap": 0.7527365909888275, + "eval_Qnli-dev_cosine_f1": 0.7007299270072993, + "eval_Qnli-dev_cosine_f1_threshold": 0.725959300994873, + "eval_Qnli-dev_cosine_precision": 0.6153846153846154, + "eval_Qnli-dev_cosine_recall": 0.8135593220338984, + "eval_Qnli-dev_dot_accuracy": 0.67578125, + "eval_Qnli-dev_dot_accuracy_threshold": 362.0553283691406, + "eval_Qnli-dev_dot_ap": 0.6872407956813215, + "eval_Qnli-dev_dot_f1": 0.6820428336079077, + "eval_Qnli-dev_dot_f1_threshold": 311.60198974609375, + "eval_Qnli-dev_dot_precision": 0.5579514824797843, + "eval_Qnli-dev_dot_recall": 0.8771186440677966, + "eval_Qnli-dev_euclidean_accuracy": 0.716796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.010302543640137, + "eval_Qnli-dev_euclidean_ap": 0.7629389626551923, + "eval_Qnli-dev_euclidean_f1": 0.7172675521821632, + "eval_Qnli-dev_euclidean_f1_threshold": 15.582939147949219, + "eval_Qnli-dev_euclidean_precision": 0.6494845360824743, + "eval_Qnli-dev_euclidean_recall": 0.8008474576271186, + "eval_Qnli-dev_manhattan_accuracy": 0.7265625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 311.5965576171875, + "eval_Qnli-dev_manhattan_ap": 0.7679652150220635, + "eval_Qnli-dev_manhattan_f1": 0.7154471544715446, + "eval_Qnli-dev_manhattan_f1_threshold": 314.4476318359375, + "eval_Qnli-dev_manhattan_precision": 0.6875, + "eval_Qnli-dev_manhattan_recall": 0.7457627118644068, + "eval_Qnli-dev_max_accuracy": 0.7265625, + "eval_Qnli-dev_max_accuracy_threshold": 362.0553283691406, + "eval_Qnli-dev_max_ap": 0.7679652150220635, + "eval_Qnli-dev_max_f1": 0.7172675521821632, + "eval_Qnli-dev_max_f1_threshold": 314.4476318359375, + "eval_Qnli-dev_max_precision": 0.6875, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8628346920013428, + "eval_allNLI-dev_cosine_ap": 0.6221178281447802, + "eval_allNLI-dev_cosine_f1": 0.6227848101265824, + "eval_allNLI-dev_cosine_f1_threshold": 0.7958093881607056, + "eval_allNLI-dev_cosine_precision": 0.5540540540540541, + "eval_allNLI-dev_cosine_recall": 0.7109826589595376, + "eval_allNLI-dev_dot_accuracy": 0.697265625, + "eval_allNLI-dev_dot_accuracy_threshold": 435.14715576171875, + "eval_allNLI-dev_dot_ap": 0.5705940910885097, + "eval_allNLI-dev_dot_f1": 0.5936842105263158, + "eval_allNLI-dev_dot_f1_threshold": 340.51177978515625, + "eval_allNLI-dev_dot_precision": 0.46688741721854304, + "eval_allNLI-dev_dot_recall": 0.815028901734104, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.794170379638672, + "eval_allNLI-dev_euclidean_ap": 0.6288270076880096, + "eval_allNLI-dev_euclidean_f1": 0.6223277909738717, + "eval_allNLI-dev_euclidean_f1_threshold": 14.098909378051758, + "eval_allNLI-dev_euclidean_precision": 0.5282258064516129, + "eval_allNLI-dev_euclidean_recall": 0.7572254335260116, + "eval_allNLI-dev_manhattan_accuracy": 0.728515625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 239.71444702148438, + "eval_allNLI-dev_manhattan_ap": 0.6284234020719052, + "eval_allNLI-dev_manhattan_f1": 0.6225165562913907, + "eval_allNLI-dev_manhattan_f1_threshold": 308.7938537597656, + "eval_allNLI-dev_manhattan_precision": 0.5035714285714286, + "eval_allNLI-dev_manhattan_recall": 0.815028901734104, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 435.14715576171875, + "eval_allNLI-dev_max_ap": 0.6288270076880096, + "eval_allNLI-dev_max_f1": 0.6227848101265824, + "eval_allNLI-dev_max_f1_threshold": 340.51177978515625, + "eval_allNLI-dev_max_precision": 0.5540540540540541, + "eval_allNLI-dev_max_recall": 0.815028901734104, + "eval_sequential_score": 0.7679652150220635, + "eval_sts-test_pearson_cosine": 0.8360733316299009, + "eval_sts-test_pearson_dot": 0.8174733173178559, + "eval_sts-test_pearson_euclidean": 0.8659311063312447, + "eval_sts-test_pearson_manhattan": 0.86362358195659, + "eval_sts-test_pearson_max": 0.8659311063312447, + "eval_sts-test_spearman_cosine": 0.8685504710913611, + "eval_sts-test_spearman_dot": 0.8149406975075341, + "eval_sts-test_spearman_euclidean": 0.864604483866913, + "eval_sts-test_spearman_manhattan": 0.862906576258462, + "eval_sts-test_spearman_max": 0.8685504710913611, + "eval_vitaminc-pairs_loss": 2.857945442199707, + "eval_vitaminc-pairs_runtime": 3.217, + "eval_vitaminc-pairs_samples_per_second": 39.789, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 1700 + }, + { + "epoch": 1.7489711934156378, + "eval_negation-triplets_loss": 0.948692262172699, + "eval_negation-triplets_runtime": 0.7517, + "eval_negation-triplets_samples_per_second": 170.288, + "eval_negation-triplets_steps_per_second": 1.33, + "step": 1700 + }, + { + "epoch": 1.7489711934156378, + "eval_scitail-pairs-pos_loss": 0.1663469523191452, + "eval_scitail-pairs-pos_runtime": 0.8813, + "eval_scitail-pairs-pos_samples_per_second": 145.238, + "eval_scitail-pairs-pos_steps_per_second": 1.135, + "step": 1700 + }, + { + "epoch": 1.7489711934156378, + "eval_scitail-pairs-qa_loss": 0.001054697553627193, + "eval_scitail-pairs-qa_runtime": 0.622, + "eval_scitail-pairs-qa_samples_per_second": 205.776, + "eval_scitail-pairs-qa_steps_per_second": 1.608, + "step": 1700 + }, + { + "epoch": 1.7489711934156378, + "eval_xsum-pairs_loss": 0.33091622591018677, + "eval_xsum-pairs_runtime": 3.0341, + "eval_xsum-pairs_samples_per_second": 42.187, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1700 + }, + { + "epoch": 1.7489711934156378, + "eval_sciq_pairs_loss": 0.09529092162847519, + "eval_sciq_pairs_runtime": 3.4966, + "eval_sciq_pairs_samples_per_second": 36.607, + "eval_sciq_pairs_steps_per_second": 0.286, + "step": 1700 + }, + { + "epoch": 1.7489711934156378, + "eval_qasc_pairs_loss": 0.18329016864299774, + "eval_qasc_pairs_runtime": 0.616, + "eval_qasc_pairs_samples_per_second": 207.803, + "eval_qasc_pairs_steps_per_second": 1.623, + "step": 1700 + }, + { + "epoch": 1.7489711934156378, + "eval_openbookqa_pairs_loss": 0.8162494897842407, + "eval_openbookqa_pairs_runtime": 0.5922, + "eval_openbookqa_pairs_samples_per_second": 216.145, + "eval_openbookqa_pairs_steps_per_second": 1.689, + "step": 1700 + }, + { + "epoch": 1.7489711934156378, + "eval_msmarco_pairs_loss": 0.8438387513160706, + "eval_msmarco_pairs_runtime": 1.5245, + "eval_msmarco_pairs_samples_per_second": 83.965, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 1700 + }, + { + "epoch": 1.7489711934156378, + "eval_nq_pairs_loss": 0.742292046546936, + "eval_nq_pairs_runtime": 2.9204, + "eval_nq_pairs_samples_per_second": 43.83, + "eval_nq_pairs_steps_per_second": 0.342, + "step": 1700 + }, + { + "epoch": 1.7489711934156378, + "eval_trivia_pairs_loss": 0.7448931336402893, + "eval_trivia_pairs_runtime": 3.4403, + "eval_trivia_pairs_samples_per_second": 37.206, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1700 + }, + { + "epoch": 1.7489711934156378, + "eval_gooaq_pairs_loss": 0.4432274103164673, + "eval_gooaq_pairs_runtime": 0.9523, + "eval_gooaq_pairs_samples_per_second": 134.404, + "eval_gooaq_pairs_steps_per_second": 1.05, + "step": 1700 + }, + { + "epoch": 1.7489711934156378, + "eval_paws-pos_loss": 0.02417844533920288, + "eval_paws-pos_runtime": 0.7045, + "eval_paws-pos_samples_per_second": 181.702, + "eval_paws-pos_steps_per_second": 1.42, + "step": 1700 + }, + { + "epoch": 1.7489711934156378, + "eval_global_dataset_loss": 0.4446592926979065, + "eval_global_dataset_runtime": 13.402, + "eval_global_dataset_samples_per_second": 31.04, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1700 + }, + { + "epoch": 1.75, + "grad_norm": 6.624517917633057, + "learning_rate": 3.015890498361192e-05, + "loss": 0.2111, + "step": 1701 + }, + { + "epoch": 1.7510288065843622, + "grad_norm": 0.6696828007698059, + "learning_rate": 3.0146723348631097e-05, + "loss": 0.0115, + "step": 1702 + }, + { + "epoch": 1.7520576131687244, + "grad_norm": 4.012869834899902, + "learning_rate": 3.0134530430290853e-05, + "loss": 0.2114, + "step": 1703 + }, + { + "epoch": 1.7530864197530864, + "grad_norm": 8.354767799377441, + "learning_rate": 3.0122326248783344e-05, + "loss": 0.513, + "step": 1704 + }, + { + "epoch": 1.7541152263374484, + "grad_norm": 0.6043940186500549, + "learning_rate": 3.011011082431938e-05, + "loss": 0.0274, + "step": 1705 + }, + { + "epoch": 1.7551440329218106, + "grad_norm": 5.665579795837402, + "learning_rate": 3.009788417712841e-05, + "loss": 0.2203, + "step": 1706 + }, + { + "epoch": 1.7561728395061729, + "grad_norm": 5.739221096038818, + "learning_rate": 3.0085646327458443e-05, + "loss": 0.2425, + "step": 1707 + }, + { + "epoch": 1.757201646090535, + "grad_norm": 9.040894508361816, + "learning_rate": 3.0073397295576058e-05, + "loss": 0.5636, + "step": 1708 + }, + { + "epoch": 1.758230452674897, + "grad_norm": 9.729278564453125, + "learning_rate": 3.0061137101766337e-05, + "loss": 0.5244, + "step": 1709 + }, + { + "epoch": 1.7592592592592593, + "grad_norm": 8.09842300415039, + "learning_rate": 3.0048865766332856e-05, + "loss": 0.6237, + "step": 1710 + }, + { + "epoch": 1.7602880658436213, + "grad_norm": 9.805746078491211, + "learning_rate": 3.0036583309597647e-05, + "loss": 0.7565, + "step": 1711 + }, + { + "epoch": 1.7613168724279835, + "grad_norm": 7.657242298126221, + "learning_rate": 3.0024289751901142e-05, + "loss": 0.3421, + "step": 1712 + }, + { + "epoch": 1.7623456790123457, + "grad_norm": 5.437000274658203, + "learning_rate": 3.0011985113602184e-05, + "loss": 0.2019, + "step": 1713 + }, + { + "epoch": 1.763374485596708, + "grad_norm": 0.8471786379814148, + "learning_rate": 2.999966941507794e-05, + "loss": 0.0141, + "step": 1714 + }, + { + "epoch": 1.76440329218107, + "grad_norm": 11.529598236083984, + "learning_rate": 2.99873426767239e-05, + "loss": 1.0066, + "step": 1715 + }, + { + "epoch": 1.765432098765432, + "grad_norm": 7.448866367340088, + "learning_rate": 2.997500491895385e-05, + "loss": 0.375, + "step": 1716 + }, + { + "epoch": 1.7664609053497942, + "grad_norm": 8.113354682922363, + "learning_rate": 2.9962656162199813e-05, + "loss": 0.7385, + "step": 1717 + }, + { + "epoch": 1.7674897119341564, + "grad_norm": 13.676114082336426, + "learning_rate": 2.995029642691202e-05, + "loss": 1.9866, + "step": 1718 + }, + { + "epoch": 1.7685185185185186, + "grad_norm": 11.367154121398926, + "learning_rate": 2.9937925733558906e-05, + "loss": 0.9198, + "step": 1719 + }, + { + "epoch": 1.7695473251028808, + "grad_norm": 5.2121758460998535, + "learning_rate": 2.992554410262703e-05, + "loss": 0.2244, + "step": 1720 + }, + { + "epoch": 1.7695473251028808, + "eval_Qnli-dev_cosine_accuracy": 0.712890625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7742512226104736, + "eval_Qnli-dev_cosine_ap": 0.7511678896557296, + "eval_Qnli-dev_cosine_f1": 0.7015706806282722, + "eval_Qnli-dev_cosine_f1_threshold": 0.7171704769134521, + "eval_Qnli-dev_cosine_precision": 0.5964391691394659, + "eval_Qnli-dev_cosine_recall": 0.8516949152542372, + "eval_Qnli-dev_dot_accuracy": 0.66796875, + "eval_Qnli-dev_dot_accuracy_threshold": 367.8869934082031, + "eval_Qnli-dev_dot_ap": 0.6853356083866112, + "eval_Qnli-dev_dot_f1": 0.6743421052631579, + "eval_Qnli-dev_dot_f1_threshold": 324.0341796875, + "eval_Qnli-dev_dot_precision": 0.5510752688172043, + "eval_Qnli-dev_dot_recall": 0.8686440677966102, + "eval_Qnli-dev_euclidean_accuracy": 0.72265625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.813480377197266, + "eval_Qnli-dev_euclidean_ap": 0.7629628392039234, + "eval_Qnli-dev_euclidean_f1": 0.712871287128713, + "eval_Qnli-dev_euclidean_f1_threshold": 15.134451866149902, + "eval_Qnli-dev_euclidean_precision": 0.6691449814126395, + "eval_Qnli-dev_euclidean_recall": 0.7627118644067796, + "eval_Qnli-dev_manhattan_accuracy": 0.720703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 311.506103515625, + "eval_Qnli-dev_manhattan_ap": 0.7642876785648607, + "eval_Qnli-dev_manhattan_f1": 0.7161904761904763, + "eval_Qnli-dev_manhattan_f1_threshold": 325.3240051269531, + "eval_Qnli-dev_manhattan_precision": 0.6505190311418685, + "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, + "eval_Qnli-dev_max_accuracy": 0.72265625, + "eval_Qnli-dev_max_accuracy_threshold": 367.8869934082031, + "eval_Qnli-dev_max_ap": 0.7642876785648607, + "eval_Qnli-dev_max_f1": 0.7161904761904763, + "eval_Qnli-dev_max_f1_threshold": 325.3240051269531, + "eval_Qnli-dev_max_precision": 0.6691449814126395, + "eval_Qnli-dev_max_recall": 0.8686440677966102, + "eval_allNLI-dev_cosine_accuracy": 0.724609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8379455804824829, + "eval_allNLI-dev_cosine_ap": 0.6147320671005745, + "eval_allNLI-dev_cosine_f1": 0.6237006237006237, + "eval_allNLI-dev_cosine_f1_threshold": 0.7275093793869019, + "eval_allNLI-dev_cosine_precision": 0.487012987012987, + "eval_allNLI-dev_cosine_recall": 0.8670520231213873, + "eval_allNLI-dev_dot_accuracy": 0.697265625, + "eval_allNLI-dev_dot_accuracy_threshold": 425.1505432128906, + "eval_allNLI-dev_dot_ap": 0.570508767236879, + "eval_allNLI-dev_dot_f1": 0.5986078886310906, + "eval_allNLI-dev_dot_f1_threshold": 344.5698547363281, + "eval_allNLI-dev_dot_precision": 0.5, + "eval_allNLI-dev_dot_recall": 0.7456647398843931, + "eval_allNLI-dev_euclidean_accuracy": 0.7265625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.189596176147461, + "eval_allNLI-dev_euclidean_ap": 0.6212248418652493, + "eval_allNLI-dev_euclidean_f1": 0.6286919831223629, + "eval_allNLI-dev_euclidean_f1_threshold": 15.712522506713867, + "eval_allNLI-dev_euclidean_precision": 0.4950166112956811, + "eval_allNLI-dev_euclidean_recall": 0.861271676300578, + "eval_allNLI-dev_manhattan_accuracy": 0.72265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 261.0384521484375, + "eval_allNLI-dev_manhattan_ap": 0.6211968962942763, + "eval_allNLI-dev_manhattan_f1": 0.634453781512605, + "eval_allNLI-dev_manhattan_f1_threshold": 329.73175048828125, + "eval_allNLI-dev_manhattan_precision": 0.49834983498349833, + "eval_allNLI-dev_manhattan_recall": 0.8728323699421965, + "eval_allNLI-dev_max_accuracy": 0.7265625, + "eval_allNLI-dev_max_accuracy_threshold": 425.1505432128906, + "eval_allNLI-dev_max_ap": 0.6212248418652493, + "eval_allNLI-dev_max_f1": 0.634453781512605, + "eval_allNLI-dev_max_f1_threshold": 344.5698547363281, + "eval_allNLI-dev_max_precision": 0.5, + "eval_allNLI-dev_max_recall": 0.8728323699421965, + "eval_sequential_score": 0.7642876785648607, + "eval_sts-test_pearson_cosine": 0.8467679944231753, + "eval_sts-test_pearson_dot": 0.8348341822053502, + "eval_sts-test_pearson_euclidean": 0.8741510716771601, + "eval_sts-test_pearson_manhattan": 0.8712477302983147, + "eval_sts-test_pearson_max": 0.8741510716771601, + "eval_sts-test_spearman_cosine": 0.8743986106812506, + "eval_sts-test_spearman_dot": 0.8325327316193228, + "eval_sts-test_spearman_euclidean": 0.8708555638143707, + "eval_sts-test_spearman_manhattan": 0.8677390412198123, + "eval_sts-test_spearman_max": 0.8743986106812506, + "eval_vitaminc-pairs_loss": 2.8903887271881104, + "eval_vitaminc-pairs_runtime": 3.2232, + "eval_vitaminc-pairs_samples_per_second": 39.712, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 1720 + }, + { + "epoch": 1.7695473251028808, + "eval_negation-triplets_loss": 0.9841980338096619, + "eval_negation-triplets_runtime": 0.7681, + "eval_negation-triplets_samples_per_second": 166.649, + "eval_negation-triplets_steps_per_second": 1.302, + "step": 1720 + }, + { + "epoch": 1.7695473251028808, + "eval_scitail-pairs-pos_loss": 0.15102441608905792, + "eval_scitail-pairs-pos_runtime": 0.8832, + "eval_scitail-pairs-pos_samples_per_second": 144.923, + "eval_scitail-pairs-pos_steps_per_second": 1.132, + "step": 1720 + }, + { + "epoch": 1.7695473251028808, + "eval_scitail-pairs-qa_loss": 0.0011362830409780145, + "eval_scitail-pairs-qa_runtime": 0.6051, + "eval_scitail-pairs-qa_samples_per_second": 211.549, + "eval_scitail-pairs-qa_steps_per_second": 1.653, + "step": 1720 + }, + { + "epoch": 1.7695473251028808, + "eval_xsum-pairs_loss": 0.29924851655960083, + "eval_xsum-pairs_runtime": 3.0304, + "eval_xsum-pairs_samples_per_second": 42.239, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1720 + }, + { + "epoch": 1.7695473251028808, + "eval_sciq_pairs_loss": 0.09244251996278763, + "eval_sciq_pairs_runtime": 3.5097, + "eval_sciq_pairs_samples_per_second": 36.471, + "eval_sciq_pairs_steps_per_second": 0.285, + "step": 1720 + }, + { + "epoch": 1.7695473251028808, + "eval_qasc_pairs_loss": 0.19185201823711395, + "eval_qasc_pairs_runtime": 0.6201, + "eval_qasc_pairs_samples_per_second": 206.412, + "eval_qasc_pairs_steps_per_second": 1.613, + "step": 1720 + }, + { + "epoch": 1.7695473251028808, + "eval_openbookqa_pairs_loss": 0.7592263221740723, + "eval_openbookqa_pairs_runtime": 0.6012, + "eval_openbookqa_pairs_samples_per_second": 212.916, + "eval_openbookqa_pairs_steps_per_second": 1.663, + "step": 1720 + }, + { + "epoch": 1.7695473251028808, + "eval_msmarco_pairs_loss": 0.7812709212303162, + "eval_msmarco_pairs_runtime": 1.5207, + "eval_msmarco_pairs_samples_per_second": 84.169, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 1720 + }, + { + "epoch": 1.7695473251028808, + "eval_nq_pairs_loss": 0.691717803478241, + "eval_nq_pairs_runtime": 2.914, + "eval_nq_pairs_samples_per_second": 43.926, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 1720 + }, + { + "epoch": 1.7695473251028808, + "eval_trivia_pairs_loss": 0.7600052952766418, + "eval_trivia_pairs_runtime": 3.4577, + "eval_trivia_pairs_samples_per_second": 37.019, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 1720 + }, + { + "epoch": 1.7695473251028808, + "eval_gooaq_pairs_loss": 0.44834327697753906, + "eval_gooaq_pairs_runtime": 0.9546, + "eval_gooaq_pairs_samples_per_second": 134.084, + "eval_gooaq_pairs_steps_per_second": 1.048, + "step": 1720 + }, + { + "epoch": 1.7695473251028808, + "eval_paws-pos_loss": 0.022823384031653404, + "eval_paws-pos_runtime": 0.7025, + "eval_paws-pos_samples_per_second": 182.197, + "eval_paws-pos_steps_per_second": 1.423, + "step": 1720 + }, + { + "epoch": 1.7695473251028808, + "eval_global_dataset_loss": 0.4340953230857849, + "eval_global_dataset_runtime": 13.3972, + "eval_global_dataset_samples_per_second": 31.051, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1720 + }, + { + "epoch": 1.7705761316872428, + "grad_norm": 8.054941177368164, + "learning_rate": 2.9913151554621073e-05, + "loss": 0.5082, + "step": 1721 + }, + { + "epoch": 1.7716049382716048, + "grad_norm": 6.68974494934082, + "learning_rate": 2.9900748110063805e-05, + "loss": 0.4326, + "step": 1722 + }, + { + "epoch": 1.772633744855967, + "grad_norm": 4.167799472808838, + "learning_rate": 2.988833378949602e-05, + "loss": 0.1983, + "step": 1723 + }, + { + "epoch": 1.7736625514403292, + "grad_norm": 1.2141571044921875, + "learning_rate": 2.987590861347654e-05, + "loss": 0.0274, + "step": 1724 + }, + { + "epoch": 1.7746913580246915, + "grad_norm": 4.751448631286621, + "learning_rate": 2.9863472602582166e-05, + "loss": 0.1137, + "step": 1725 + }, + { + "epoch": 1.7757201646090535, + "grad_norm": 4.7939043045043945, + "learning_rate": 2.9851025777407626e-05, + "loss": 0.1116, + "step": 1726 + }, + { + "epoch": 1.7767489711934157, + "grad_norm": 3.551082134246826, + "learning_rate": 2.9838568158565572e-05, + "loss": 0.092, + "step": 1727 + }, + { + "epoch": 1.7777777777777777, + "grad_norm": 6.242792129516602, + "learning_rate": 2.9826099766686522e-05, + "loss": 0.1858, + "step": 1728 + }, + { + "epoch": 1.77880658436214, + "grad_norm": 8.63688850402832, + "learning_rate": 2.9813620622418844e-05, + "loss": 0.6419, + "step": 1729 + }, + { + "epoch": 1.7798353909465021, + "grad_norm": 4.686567783355713, + "learning_rate": 2.9801130746428707e-05, + "loss": 0.1703, + "step": 1730 + }, + { + "epoch": 1.7808641975308643, + "grad_norm": 12.06086254119873, + "learning_rate": 2.9788630159400047e-05, + "loss": 0.7053, + "step": 1731 + }, + { + "epoch": 1.7818930041152263, + "grad_norm": 15.538905143737793, + "learning_rate": 2.9776118882034548e-05, + "loss": 2.1567, + "step": 1732 + }, + { + "epoch": 1.7829218106995883, + "grad_norm": 8.181844711303711, + "learning_rate": 2.9763596935051593e-05, + "loss": 0.4677, + "step": 1733 + }, + { + "epoch": 1.7839506172839505, + "grad_norm": 6.751974582672119, + "learning_rate": 2.975106433918823e-05, + "loss": 0.4804, + "step": 1734 + }, + { + "epoch": 1.7849794238683128, + "grad_norm": 8.886804580688477, + "learning_rate": 2.9738521115199155e-05, + "loss": 0.5776, + "step": 1735 + }, + { + "epoch": 1.786008230452675, + "grad_norm": 10.950471878051758, + "learning_rate": 2.9725967283856647e-05, + "loss": 0.6962, + "step": 1736 + }, + { + "epoch": 1.7870370370370372, + "grad_norm": 9.701720237731934, + "learning_rate": 2.971340286595057e-05, + "loss": 0.6357, + "step": 1737 + }, + { + "epoch": 1.7880658436213992, + "grad_norm": 7.431054592132568, + "learning_rate": 2.9700827882288304e-05, + "loss": 0.3795, + "step": 1738 + }, + { + "epoch": 1.7890946502057612, + "grad_norm": 6.998332500457764, + "learning_rate": 2.968824235369474e-05, + "loss": 0.3097, + "step": 1739 + }, + { + "epoch": 1.7901234567901234, + "grad_norm": 5.758817672729492, + "learning_rate": 2.9675646301012223e-05, + "loss": 0.1951, + "step": 1740 + }, + { + "epoch": 1.7901234567901234, + "eval_Qnli-dev_cosine_accuracy": 0.712890625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7681889533996582, + "eval_Qnli-dev_cosine_ap": 0.7517988022702223, + "eval_Qnli-dev_cosine_f1": 0.6947368421052632, + "eval_Qnli-dev_cosine_f1_threshold": 0.7106826901435852, + "eval_Qnli-dev_cosine_precision": 0.592814371257485, + "eval_Qnli-dev_cosine_recall": 0.8389830508474576, + "eval_Qnli-dev_dot_accuracy": 0.66796875, + "eval_Qnli-dev_dot_accuracy_threshold": 350.132080078125, + "eval_Qnli-dev_dot_ap": 0.6854254214315277, + "eval_Qnli-dev_dot_f1": 0.6731078904991948, + "eval_Qnli-dev_dot_f1_threshold": 299.95361328125, + "eval_Qnli-dev_dot_precision": 0.5428571428571428, + "eval_Qnli-dev_dot_recall": 0.885593220338983, + "eval_Qnli-dev_euclidean_accuracy": 0.71875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.489816665649414, + "eval_Qnli-dev_euclidean_ap": 0.7624602971280614, + "eval_Qnli-dev_euclidean_f1": 0.699619771863118, + "eval_Qnli-dev_euclidean_f1_threshold": 15.463302612304688, + "eval_Qnli-dev_euclidean_precision": 0.6344827586206897, + "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, + "eval_Qnli-dev_manhattan_accuracy": 0.724609375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 299.10162353515625, + "eval_Qnli-dev_manhattan_ap": 0.7635226389705461, + "eval_Qnli-dev_manhattan_f1": 0.7079646017699116, + "eval_Qnli-dev_manhattan_f1_threshold": 339.8573913574219, + "eval_Qnli-dev_manhattan_precision": 0.60790273556231, + "eval_Qnli-dev_manhattan_recall": 0.847457627118644, + "eval_Qnli-dev_max_accuracy": 0.724609375, + "eval_Qnli-dev_max_accuracy_threshold": 350.132080078125, + "eval_Qnli-dev_max_ap": 0.7635226389705461, + "eval_Qnli-dev_max_f1": 0.7079646017699116, + "eval_Qnli-dev_max_f1_threshold": 339.8573913574219, + "eval_Qnli-dev_max_precision": 0.6344827586206897, + "eval_Qnli-dev_max_recall": 0.885593220338983, + "eval_allNLI-dev_cosine_accuracy": 0.72265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8514525890350342, + "eval_allNLI-dev_cosine_ap": 0.6115432379651945, + "eval_allNLI-dev_cosine_f1": 0.6218905472636818, + "eval_allNLI-dev_cosine_f1_threshold": 0.7630959153175354, + "eval_allNLI-dev_cosine_precision": 0.5458515283842795, + "eval_allNLI-dev_cosine_recall": 0.7225433526011561, + "eval_allNLI-dev_dot_accuracy": 0.6953125, + "eval_allNLI-dev_dot_accuracy_threshold": 370.92681884765625, + "eval_allNLI-dev_dot_ap": 0.5501280898628993, + "eval_allNLI-dev_dot_f1": 0.5908096280087528, + "eval_allNLI-dev_dot_f1_threshold": 313.216552734375, + "eval_allNLI-dev_dot_precision": 0.4753521126760563, + "eval_allNLI-dev_dot_recall": 0.7803468208092486, + "eval_allNLI-dev_euclidean_accuracy": 0.7265625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.800989151000977, + "eval_allNLI-dev_euclidean_ap": 0.6186842072041568, + "eval_allNLI-dev_euclidean_f1": 0.6338028169014085, + "eval_allNLI-dev_euclidean_f1_threshold": 14.879294395446777, + "eval_allNLI-dev_euclidean_precision": 0.5335968379446641, + "eval_allNLI-dev_euclidean_recall": 0.7803468208092486, + "eval_allNLI-dev_manhattan_accuracy": 0.72265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 247.43714904785156, + "eval_allNLI-dev_manhattan_ap": 0.6174363481742949, + "eval_allNLI-dev_manhattan_f1": 0.6382978723404256, + "eval_allNLI-dev_manhattan_f1_threshold": 305.5586853027344, + "eval_allNLI-dev_manhattan_precision": 0.54, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.7265625, + "eval_allNLI-dev_max_accuracy_threshold": 370.92681884765625, + "eval_allNLI-dev_max_ap": 0.6186842072041568, + "eval_allNLI-dev_max_f1": 0.6382978723404256, + "eval_allNLI-dev_max_f1_threshold": 313.216552734375, + "eval_allNLI-dev_max_precision": 0.5458515283842795, + "eval_allNLI-dev_max_recall": 0.7803468208092486, + "eval_sequential_score": 0.7635226389705461, + "eval_sts-test_pearson_cosine": 0.8418497367623062, + "eval_sts-test_pearson_dot": 0.8319914987981861, + "eval_sts-test_pearson_euclidean": 0.870628075813072, + "eval_sts-test_pearson_manhattan": 0.8687366912778405, + "eval_sts-test_pearson_max": 0.870628075813072, + "eval_sts-test_spearman_cosine": 0.8742475304227931, + "eval_sts-test_spearman_dot": 0.8359519964971941, + "eval_sts-test_spearman_euclidean": 0.8691112043965953, + "eval_sts-test_spearman_manhattan": 0.866967350468379, + "eval_sts-test_spearman_max": 0.8742475304227931, + "eval_vitaminc-pairs_loss": 2.995783805847168, + "eval_vitaminc-pairs_runtime": 3.2157, + "eval_vitaminc-pairs_samples_per_second": 39.804, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 1740 + }, + { + "epoch": 1.7901234567901234, + "eval_negation-triplets_loss": 0.9560091495513916, + "eval_negation-triplets_runtime": 0.7561, + "eval_negation-triplets_samples_per_second": 169.294, + "eval_negation-triplets_steps_per_second": 1.323, + "step": 1740 + }, + { + "epoch": 1.7901234567901234, + "eval_scitail-pairs-pos_loss": 0.15315373241901398, + "eval_scitail-pairs-pos_runtime": 0.9068, + "eval_scitail-pairs-pos_samples_per_second": 141.158, + "eval_scitail-pairs-pos_steps_per_second": 1.103, + "step": 1740 + }, + { + "epoch": 1.7901234567901234, + "eval_scitail-pairs-qa_loss": 0.0012944067129865289, + "eval_scitail-pairs-qa_runtime": 0.6418, + "eval_scitail-pairs-qa_samples_per_second": 199.45, + "eval_scitail-pairs-qa_steps_per_second": 1.558, + "step": 1740 + }, + { + "epoch": 1.7901234567901234, + "eval_xsum-pairs_loss": 0.28352904319763184, + "eval_xsum-pairs_runtime": 3.084, + "eval_xsum-pairs_samples_per_second": 41.505, + "eval_xsum-pairs_steps_per_second": 0.324, + "step": 1740 + }, + { + "epoch": 1.7901234567901234, + "eval_sciq_pairs_loss": 0.09403456002473831, + "eval_sciq_pairs_runtime": 3.5226, + "eval_sciq_pairs_samples_per_second": 36.336, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 1740 + }, + { + "epoch": 1.7901234567901234, + "eval_qasc_pairs_loss": 0.17431268095970154, + "eval_qasc_pairs_runtime": 0.6405, + "eval_qasc_pairs_samples_per_second": 199.85, + "eval_qasc_pairs_steps_per_second": 1.561, + "step": 1740 + }, + { + "epoch": 1.7901234567901234, + "eval_openbookqa_pairs_loss": 0.7557939887046814, + "eval_openbookqa_pairs_runtime": 0.6024, + "eval_openbookqa_pairs_samples_per_second": 212.501, + "eval_openbookqa_pairs_steps_per_second": 1.66, + "step": 1740 + }, + { + "epoch": 1.7901234567901234, + "eval_msmarco_pairs_loss": 0.8533817529678345, + "eval_msmarco_pairs_runtime": 1.5256, + "eval_msmarco_pairs_samples_per_second": 83.902, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 1740 + }, + { + "epoch": 1.7901234567901234, + "eval_nq_pairs_loss": 0.7863667607307434, + "eval_nq_pairs_runtime": 2.9002, + "eval_nq_pairs_samples_per_second": 44.135, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1740 + }, + { + "epoch": 1.7901234567901234, + "eval_trivia_pairs_loss": 0.7835397124290466, + "eval_trivia_pairs_runtime": 3.4388, + "eval_trivia_pairs_samples_per_second": 37.222, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1740 + }, + { + "epoch": 1.7901234567901234, + "eval_gooaq_pairs_loss": 0.37319663166999817, + "eval_gooaq_pairs_runtime": 0.9542, + "eval_gooaq_pairs_samples_per_second": 134.141, + "eval_gooaq_pairs_steps_per_second": 1.048, + "step": 1740 + }, + { + "epoch": 1.7901234567901234, + "eval_paws-pos_loss": 0.021576495841145515, + "eval_paws-pos_runtime": 0.7028, + "eval_paws-pos_samples_per_second": 182.126, + "eval_paws-pos_steps_per_second": 1.423, + "step": 1740 + }, + { + "epoch": 1.7901234567901234, + "eval_global_dataset_loss": 0.4613242745399475, + "eval_global_dataset_runtime": 13.3972, + "eval_global_dataset_samples_per_second": 31.051, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1740 + }, + { + "epoch": 1.7911522633744856, + "grad_norm": 4.965134143829346, + "learning_rate": 2.9663039745100525e-05, + "loss": 0.186, + "step": 1741 + }, + { + "epoch": 1.7921810699588478, + "grad_norm": 8.625872611999512, + "learning_rate": 2.9650422706836824e-05, + "loss": 0.4295, + "step": 1742 + }, + { + "epoch": 1.7932098765432098, + "grad_norm": 7.308938026428223, + "learning_rate": 2.9637795207115638e-05, + "loss": 0.4061, + "step": 1743 + }, + { + "epoch": 1.794238683127572, + "grad_norm": 6.121941089630127, + "learning_rate": 2.962515726684883e-05, + "loss": 0.2292, + "step": 1744 + }, + { + "epoch": 1.795267489711934, + "grad_norm": 5.254240036010742, + "learning_rate": 2.9612508906965546e-05, + "loss": 0.1824, + "step": 1745 + }, + { + "epoch": 1.7962962962962963, + "grad_norm": 8.089666366577148, + "learning_rate": 2.9599850148412184e-05, + "loss": 0.5721, + "step": 1746 + }, + { + "epoch": 1.7973251028806585, + "grad_norm": 3.1263222694396973, + "learning_rate": 2.958718101215236e-05, + "loss": 0.0676, + "step": 1747 + }, + { + "epoch": 1.7983539094650207, + "grad_norm": 11.244380950927734, + "learning_rate": 2.957450151916688e-05, + "loss": 0.6563, + "step": 1748 + }, + { + "epoch": 1.7993827160493827, + "grad_norm": 6.730269432067871, + "learning_rate": 2.956181169045371e-05, + "loss": 0.5341, + "step": 1749 + }, + { + "epoch": 1.8004115226337447, + "grad_norm": 9.72786808013916, + "learning_rate": 2.9549111547027912e-05, + "loss": 0.5455, + "step": 1750 + }, + { + "epoch": 1.801440329218107, + "grad_norm": 4.4380598068237305, + "learning_rate": 2.9536401109921654e-05, + "loss": 0.1567, + "step": 1751 + }, + { + "epoch": 1.8024691358024691, + "grad_norm": 10.577998161315918, + "learning_rate": 2.952368040018413e-05, + "loss": 0.7594, + "step": 1752 + }, + { + "epoch": 1.8034979423868314, + "grad_norm": 8.436469078063965, + "learning_rate": 2.951094943888157e-05, + "loss": 0.525, + "step": 1753 + }, + { + "epoch": 1.8045267489711934, + "grad_norm": 0.7109900116920471, + "learning_rate": 2.9498208247097146e-05, + "loss": 0.0113, + "step": 1754 + }, + { + "epoch": 1.8055555555555556, + "grad_norm": 6.9339118003845215, + "learning_rate": 2.948545684593101e-05, + "loss": 0.2422, + "step": 1755 + }, + { + "epoch": 1.8065843621399176, + "grad_norm": 4.169605255126953, + "learning_rate": 2.947269525650019e-05, + "loss": 0.1063, + "step": 1756 + }, + { + "epoch": 1.8076131687242798, + "grad_norm": 12.122464179992676, + "learning_rate": 2.9459923499938614e-05, + "loss": 0.9026, + "step": 1757 + }, + { + "epoch": 1.808641975308642, + "grad_norm": 9.732711791992188, + "learning_rate": 2.9447141597397024e-05, + "loss": 0.6694, + "step": 1758 + }, + { + "epoch": 1.8096707818930042, + "grad_norm": 7.252587795257568, + "learning_rate": 2.9434349570042973e-05, + "loss": 0.3589, + "step": 1759 + }, + { + "epoch": 1.8106995884773662, + "grad_norm": 7.175245761871338, + "learning_rate": 2.942154743906079e-05, + "loss": 0.6306, + "step": 1760 + }, + { + "epoch": 1.8106995884773662, + "eval_Qnli-dev_cosine_accuracy": 0.708984375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7840002179145813, + "eval_Qnli-dev_cosine_ap": 0.7449589017070668, + "eval_Qnli-dev_cosine_f1": 0.6887417218543046, + "eval_Qnli-dev_cosine_f1_threshold": 0.6837900876998901, + "eval_Qnli-dev_cosine_precision": 0.5652173913043478, + "eval_Qnli-dev_cosine_recall": 0.8813559322033898, + "eval_Qnli-dev_dot_accuracy": 0.6640625, + "eval_Qnli-dev_dot_accuracy_threshold": 364.4993896484375, + "eval_Qnli-dev_dot_ap": 0.6794151451255108, + "eval_Qnli-dev_dot_f1": 0.6666666666666667, + "eval_Qnli-dev_dot_f1_threshold": 312.6728210449219, + "eval_Qnli-dev_dot_precision": 0.5494505494505495, + "eval_Qnli-dev_dot_recall": 0.847457627118644, + "eval_Qnli-dev_euclidean_accuracy": 0.71484375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.930747985839844, + "eval_Qnli-dev_euclidean_ap": 0.7542818639236084, + "eval_Qnli-dev_euclidean_f1": 0.6925925925925925, + "eval_Qnli-dev_euclidean_f1_threshold": 15.77761459350586, + "eval_Qnli-dev_euclidean_precision": 0.6151315789473685, + "eval_Qnli-dev_euclidean_recall": 0.7923728813559322, + "eval_Qnli-dev_manhattan_accuracy": 0.712890625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 315.01434326171875, + "eval_Qnli-dev_manhattan_ap": 0.7559928251905965, + "eval_Qnli-dev_manhattan_f1": 0.6969696969696969, + "eval_Qnli-dev_manhattan_f1_threshold": 328.4678649902344, + "eval_Qnli-dev_manhattan_precision": 0.6301369863013698, + "eval_Qnli-dev_manhattan_recall": 0.7796610169491526, + "eval_Qnli-dev_max_accuracy": 0.71484375, + "eval_Qnli-dev_max_accuracy_threshold": 364.4993896484375, + "eval_Qnli-dev_max_ap": 0.7559928251905965, + "eval_Qnli-dev_max_f1": 0.6969696969696969, + "eval_Qnli-dev_max_f1_threshold": 328.4678649902344, + "eval_Qnli-dev_max_precision": 0.6301369863013698, + "eval_Qnli-dev_max_recall": 0.8813559322033898, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8826321363449097, + "eval_allNLI-dev_cosine_ap": 0.6195459354789438, + "eval_allNLI-dev_cosine_f1": 0.6300715990453462, + "eval_allNLI-dev_cosine_f1_threshold": 0.7773683071136475, + "eval_allNLI-dev_cosine_precision": 0.5365853658536586, + "eval_allNLI-dev_cosine_recall": 0.7630057803468208, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 395.90087890625, + "eval_allNLI-dev_dot_ap": 0.5531812655258643, + "eval_allNLI-dev_dot_f1": 0.5896907216494846, + "eval_allNLI-dev_dot_f1_threshold": 334.66156005859375, + "eval_allNLI-dev_dot_precision": 0.4583333333333333, + "eval_allNLI-dev_dot_recall": 0.8265895953757225, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.04653549194336, + "eval_allNLI-dev_euclidean_ap": 0.6236278489279753, + "eval_allNLI-dev_euclidean_f1": 0.636144578313253, + "eval_allNLI-dev_euclidean_f1_threshold": 14.140835762023926, + "eval_allNLI-dev_euclidean_precision": 0.5454545454545454, + "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, + "eval_allNLI-dev_manhattan_accuracy": 0.7265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 237.88970947265625, + "eval_allNLI-dev_manhattan_ap": 0.6230023335049633, + "eval_allNLI-dev_manhattan_f1": 0.64, + "eval_allNLI-dev_manhattan_f1_threshold": 287.5178527832031, + "eval_allNLI-dev_manhattan_precision": 0.5638766519823789, + "eval_allNLI-dev_manhattan_recall": 0.7398843930635838, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 395.90087890625, + "eval_allNLI-dev_max_ap": 0.6236278489279753, + "eval_allNLI-dev_max_f1": 0.64, + "eval_allNLI-dev_max_f1_threshold": 334.66156005859375, + "eval_allNLI-dev_max_precision": 0.5638766519823789, + "eval_allNLI-dev_max_recall": 0.8265895953757225, + "eval_sequential_score": 0.7559928251905965, + "eval_sts-test_pearson_cosine": 0.8382935547706215, + "eval_sts-test_pearson_dot": 0.8278419316801597, + "eval_sts-test_pearson_euclidean": 0.8696684036856096, + "eval_sts-test_pearson_manhattan": 0.8674603738859493, + "eval_sts-test_pearson_max": 0.8696684036856096, + "eval_sts-test_spearman_cosine": 0.8744328006449537, + "eval_sts-test_spearman_dot": 0.8300299278234861, + "eval_sts-test_spearman_euclidean": 0.8689570810523634, + "eval_sts-test_spearman_manhattan": 0.8673386516841433, + "eval_sts-test_spearman_max": 0.8744328006449537, + "eval_vitaminc-pairs_loss": 3.0513458251953125, + "eval_vitaminc-pairs_runtime": 3.2912, + "eval_vitaminc-pairs_samples_per_second": 38.892, + "eval_vitaminc-pairs_steps_per_second": 0.304, + "step": 1760 + }, + { + "epoch": 1.8106995884773662, + "eval_negation-triplets_loss": 0.9192151427268982, + "eval_negation-triplets_runtime": 0.811, + "eval_negation-triplets_samples_per_second": 157.83, + "eval_negation-triplets_steps_per_second": 1.233, + "step": 1760 + }, + { + "epoch": 1.8106995884773662, + "eval_scitail-pairs-pos_loss": 0.1447685807943344, + "eval_scitail-pairs-pos_runtime": 1.0028, + "eval_scitail-pairs-pos_samples_per_second": 127.645, + "eval_scitail-pairs-pos_steps_per_second": 0.997, + "step": 1760 + }, + { + "epoch": 1.8106995884773662, + "eval_scitail-pairs-qa_loss": 0.0013520271750167012, + "eval_scitail-pairs-qa_runtime": 0.6095, + "eval_scitail-pairs-qa_samples_per_second": 210.022, + "eval_scitail-pairs-qa_steps_per_second": 1.641, + "step": 1760 + }, + { + "epoch": 1.8106995884773662, + "eval_xsum-pairs_loss": 0.3016371428966522, + "eval_xsum-pairs_runtime": 3.0867, + "eval_xsum-pairs_samples_per_second": 41.468, + "eval_xsum-pairs_steps_per_second": 0.324, + "step": 1760 + }, + { + "epoch": 1.8106995884773662, + "eval_sciq_pairs_loss": 0.09836392104625702, + "eval_sciq_pairs_runtime": 3.4914, + "eval_sciq_pairs_samples_per_second": 36.662, + "eval_sciq_pairs_steps_per_second": 0.286, + "step": 1760 + }, + { + "epoch": 1.8106995884773662, + "eval_qasc_pairs_loss": 0.1730758398771286, + "eval_qasc_pairs_runtime": 0.6186, + "eval_qasc_pairs_samples_per_second": 206.907, + "eval_qasc_pairs_steps_per_second": 1.616, + "step": 1760 + }, + { + "epoch": 1.8106995884773662, + "eval_openbookqa_pairs_loss": 0.8175860047340393, + "eval_openbookqa_pairs_runtime": 0.6064, + "eval_openbookqa_pairs_samples_per_second": 211.072, + "eval_openbookqa_pairs_steps_per_second": 1.649, + "step": 1760 + }, + { + "epoch": 1.8106995884773662, + "eval_msmarco_pairs_loss": 0.8001145720481873, + "eval_msmarco_pairs_runtime": 1.5228, + "eval_msmarco_pairs_samples_per_second": 84.057, + "eval_msmarco_pairs_steps_per_second": 0.657, + "step": 1760 + }, + { + "epoch": 1.8106995884773662, + "eval_nq_pairs_loss": 0.8027563691139221, + "eval_nq_pairs_runtime": 2.8948, + "eval_nq_pairs_samples_per_second": 44.218, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1760 + }, + { + "epoch": 1.8106995884773662, + "eval_trivia_pairs_loss": 0.742946982383728, + "eval_trivia_pairs_runtime": 3.442, + "eval_trivia_pairs_samples_per_second": 37.187, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1760 + }, + { + "epoch": 1.8106995884773662, + "eval_gooaq_pairs_loss": 0.3562733232975006, + "eval_gooaq_pairs_runtime": 0.9552, + "eval_gooaq_pairs_samples_per_second": 134.005, + "eval_gooaq_pairs_steps_per_second": 1.047, + "step": 1760 + }, + { + "epoch": 1.8106995884773662, + "eval_paws-pos_loss": 0.02172490954399109, + "eval_paws-pos_runtime": 0.7012, + "eval_paws-pos_samples_per_second": 182.533, + "eval_paws-pos_steps_per_second": 1.426, + "step": 1760 + }, + { + "epoch": 1.8106995884773662, + "eval_global_dataset_loss": 0.47637251019477844, + "eval_global_dataset_runtime": 13.3916, + "eval_global_dataset_samples_per_second": 31.064, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1760 + }, + { + "epoch": 1.8117283950617284, + "grad_norm": 4.251852035522461, + "learning_rate": 2.9408735225651523e-05, + "loss": 0.1209, + "step": 1761 + }, + { + "epoch": 1.8127572016460904, + "grad_norm": 4.266910552978516, + "learning_rate": 2.939591295103292e-05, + "loss": 0.1031, + "step": 1762 + }, + { + "epoch": 1.8137860082304527, + "grad_norm": 7.430552959442139, + "learning_rate": 2.93830806364394e-05, + "loss": 0.3235, + "step": 1763 + }, + { + "epoch": 1.8148148148148149, + "grad_norm": 0.7937636375427246, + "learning_rate": 2.9370238303122e-05, + "loss": 0.0158, + "step": 1764 + }, + { + "epoch": 1.815843621399177, + "grad_norm": 8.257564544677734, + "learning_rate": 2.9357385972348348e-05, + "loss": 0.3182, + "step": 1765 + }, + { + "epoch": 1.816872427983539, + "grad_norm": 5.81747579574585, + "learning_rate": 2.9344523665402632e-05, + "loss": 0.2515, + "step": 1766 + }, + { + "epoch": 1.817901234567901, + "grad_norm": 0.7533502578735352, + "learning_rate": 2.9331651403585563e-05, + "loss": 0.009, + "step": 1767 + }, + { + "epoch": 1.8189300411522633, + "grad_norm": 0.6928431391716003, + "learning_rate": 2.9318769208214332e-05, + "loss": 0.0098, + "step": 1768 + }, + { + "epoch": 1.8199588477366255, + "grad_norm": 14.506959915161133, + "learning_rate": 2.930587710062258e-05, + "loss": 0.9151, + "step": 1769 + }, + { + "epoch": 1.8209876543209877, + "grad_norm": 0.9877616763114929, + "learning_rate": 2.929297510216038e-05, + "loss": 0.0175, + "step": 1770 + }, + { + "epoch": 1.8220164609053497, + "grad_norm": 4.854236125946045, + "learning_rate": 2.9280063234194154e-05, + "loss": 0.1808, + "step": 1771 + }, + { + "epoch": 1.823045267489712, + "grad_norm": 3.8956329822540283, + "learning_rate": 2.9267141518106698e-05, + "loss": 0.0881, + "step": 1772 + }, + { + "epoch": 1.824074074074074, + "grad_norm": 9.348139762878418, + "learning_rate": 2.9254209975297103e-05, + "loss": 0.4814, + "step": 1773 + }, + { + "epoch": 1.8251028806584362, + "grad_norm": 4.507750034332275, + "learning_rate": 2.9241268627180734e-05, + "loss": 0.0891, + "step": 1774 + }, + { + "epoch": 1.8261316872427984, + "grad_norm": 4.753886699676514, + "learning_rate": 2.92283174951892e-05, + "loss": 0.0898, + "step": 1775 + }, + { + "epoch": 1.8271604938271606, + "grad_norm": 8.55908203125, + "learning_rate": 2.92153566007703e-05, + "loss": 0.5568, + "step": 1776 + }, + { + "epoch": 1.8281893004115226, + "grad_norm": 17.864425659179688, + "learning_rate": 2.9202385965388013e-05, + "loss": 0.1711, + "step": 1777 + }, + { + "epoch": 1.8292181069958846, + "grad_norm": 6.739587306976318, + "learning_rate": 2.918940561052245e-05, + "loss": 0.3617, + "step": 1778 + }, + { + "epoch": 1.8302469135802468, + "grad_norm": 5.804736137390137, + "learning_rate": 2.9176415557669798e-05, + "loss": 0.2282, + "step": 1779 + }, + { + "epoch": 1.831275720164609, + "grad_norm": 10.7015962600708, + "learning_rate": 2.9163415828342342e-05, + "loss": 0.5814, + "step": 1780 + }, + { + "epoch": 1.831275720164609, + "eval_Qnli-dev_cosine_accuracy": 0.701171875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7543736100196838, + "eval_Qnli-dev_cosine_ap": 0.7495294175428003, + "eval_Qnli-dev_cosine_f1": 0.6951871657754012, + "eval_Qnli-dev_cosine_f1_threshold": 0.6938208341598511, + "eval_Qnli-dev_cosine_precision": 0.6, + "eval_Qnli-dev_cosine_recall": 0.826271186440678, + "eval_Qnli-dev_dot_accuracy": 0.65234375, + "eval_Qnli-dev_dot_accuracy_threshold": 371.60345458984375, + "eval_Qnli-dev_dot_ap": 0.6817688573950809, + "eval_Qnli-dev_dot_f1": 0.678513731825525, + "eval_Qnli-dev_dot_f1_threshold": 288.9908142089844, + "eval_Qnli-dev_dot_precision": 0.5483028720626631, + "eval_Qnli-dev_dot_recall": 0.8898305084745762, + "eval_Qnli-dev_euclidean_accuracy": 0.712890625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.358012199401855, + "eval_Qnli-dev_euclidean_ap": 0.7612491219860132, + "eval_Qnli-dev_euclidean_f1": 0.6977611940298506, + "eval_Qnli-dev_euclidean_f1_threshold": 16.292171478271484, + "eval_Qnli-dev_euclidean_precision": 0.6233333333333333, + "eval_Qnli-dev_euclidean_recall": 0.7923728813559322, + "eval_Qnli-dev_manhattan_accuracy": 0.7109375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 302.44561767578125, + "eval_Qnli-dev_manhattan_ap": 0.7617218865087008, + "eval_Qnli-dev_manhattan_f1": 0.6994535519125683, + "eval_Qnli-dev_manhattan_f1_threshold": 346.8835144042969, + "eval_Qnli-dev_manhattan_precision": 0.6134185303514377, + "eval_Qnli-dev_manhattan_recall": 0.8135593220338984, + "eval_Qnli-dev_max_accuracy": 0.712890625, + "eval_Qnli-dev_max_accuracy_threshold": 371.60345458984375, + "eval_Qnli-dev_max_ap": 0.7617218865087008, + "eval_Qnli-dev_max_f1": 0.6994535519125683, + "eval_Qnli-dev_max_f1_threshold": 346.8835144042969, + "eval_Qnli-dev_max_precision": 0.6233333333333333, + "eval_Qnli-dev_max_recall": 0.8898305084745762, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8212969899177551, + "eval_allNLI-dev_cosine_ap": 0.6270104138292626, + "eval_allNLI-dev_cosine_f1": 0.6271604938271605, + "eval_allNLI-dev_cosine_f1_threshold": 0.7626806497573853, + "eval_allNLI-dev_cosine_precision": 0.5474137931034483, + "eval_allNLI-dev_cosine_recall": 0.7341040462427746, + "eval_allNLI-dev_dot_accuracy": 0.70703125, + "eval_allNLI-dev_dot_accuracy_threshold": 372.30584716796875, + "eval_allNLI-dev_dot_ap": 0.5630877616252019, + "eval_allNLI-dev_dot_f1": 0.5948275862068965, + "eval_allNLI-dev_dot_f1_threshold": 307.044189453125, + "eval_allNLI-dev_dot_precision": 0.4742268041237113, + "eval_allNLI-dev_dot_recall": 0.7976878612716763, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.628206253051758, + "eval_allNLI-dev_euclidean_ap": 0.6339028521728746, + "eval_allNLI-dev_euclidean_f1": 0.6374695863746959, + "eval_allNLI-dev_euclidean_f1_threshold": 14.277826309204102, + "eval_allNLI-dev_euclidean_precision": 0.5504201680672269, + "eval_allNLI-dev_euclidean_recall": 0.7572254335260116, + "eval_allNLI-dev_manhattan_accuracy": 0.736328125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 274.76416015625, + "eval_allNLI-dev_manhattan_ap": 0.6307982484285948, + "eval_allNLI-dev_manhattan_f1": 0.6304909560723514, + "eval_allNLI-dev_manhattan_f1_threshold": 288.71295166015625, + "eval_allNLI-dev_manhattan_precision": 0.5700934579439252, + "eval_allNLI-dev_manhattan_recall": 0.7052023121387283, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 372.30584716796875, + "eval_allNLI-dev_max_ap": 0.6339028521728746, + "eval_allNLI-dev_max_f1": 0.6374695863746959, + "eval_allNLI-dev_max_f1_threshold": 307.044189453125, + "eval_allNLI-dev_max_precision": 0.5700934579439252, + "eval_allNLI-dev_max_recall": 0.7976878612716763, + "eval_sequential_score": 0.7617218865087008, + "eval_sts-test_pearson_cosine": 0.8392230551266312, + "eval_sts-test_pearson_dot": 0.8331321270854175, + "eval_sts-test_pearson_euclidean": 0.868004361861351, + "eval_sts-test_pearson_manhattan": 0.8654231624267823, + "eval_sts-test_pearson_max": 0.868004361861351, + "eval_sts-test_spearman_cosine": 0.8720572246852526, + "eval_sts-test_spearman_dot": 0.833290101967111, + "eval_sts-test_spearman_euclidean": 0.8665680349629835, + "eval_sts-test_spearman_manhattan": 0.8639320066619844, + "eval_sts-test_spearman_max": 0.8720572246852526, + "eval_vitaminc-pairs_loss": 2.969275712966919, + "eval_vitaminc-pairs_runtime": 3.2173, + "eval_vitaminc-pairs_samples_per_second": 39.785, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 1780 + }, + { + "epoch": 1.831275720164609, + "eval_negation-triplets_loss": 0.9597027897834778, + "eval_negation-triplets_runtime": 0.7662, + "eval_negation-triplets_samples_per_second": 167.069, + "eval_negation-triplets_steps_per_second": 1.305, + "step": 1780 + }, + { + "epoch": 1.831275720164609, + "eval_scitail-pairs-pos_loss": 0.1277042031288147, + "eval_scitail-pairs-pos_runtime": 0.8835, + "eval_scitail-pairs-pos_samples_per_second": 144.883, + "eval_scitail-pairs-pos_steps_per_second": 1.132, + "step": 1780 + }, + { + "epoch": 1.831275720164609, + "eval_scitail-pairs-qa_loss": 0.000502650742419064, + "eval_scitail-pairs-qa_runtime": 0.6056, + "eval_scitail-pairs-qa_samples_per_second": 211.376, + "eval_scitail-pairs-qa_steps_per_second": 1.651, + "step": 1780 + }, + { + "epoch": 1.831275720164609, + "eval_xsum-pairs_loss": 0.2729324698448181, + "eval_xsum-pairs_runtime": 3.0278, + "eval_xsum-pairs_samples_per_second": 42.274, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1780 + }, + { + "epoch": 1.831275720164609, + "eval_sciq_pairs_loss": 0.10411171615123749, + "eval_sciq_pairs_runtime": 3.5189, + "eval_sciq_pairs_samples_per_second": 36.375, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 1780 + }, + { + "epoch": 1.831275720164609, + "eval_qasc_pairs_loss": 0.18848010897636414, + "eval_qasc_pairs_runtime": 0.6245, + "eval_qasc_pairs_samples_per_second": 204.968, + "eval_qasc_pairs_steps_per_second": 1.601, + "step": 1780 + }, + { + "epoch": 1.831275720164609, + "eval_openbookqa_pairs_loss": 0.7955866456031799, + "eval_openbookqa_pairs_runtime": 0.5976, + "eval_openbookqa_pairs_samples_per_second": 214.183, + "eval_openbookqa_pairs_steps_per_second": 1.673, + "step": 1780 + }, + { + "epoch": 1.831275720164609, + "eval_msmarco_pairs_loss": 0.8654565215110779, + "eval_msmarco_pairs_runtime": 1.5268, + "eval_msmarco_pairs_samples_per_second": 83.838, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 1780 + }, + { + "epoch": 1.831275720164609, + "eval_nq_pairs_loss": 0.7890068888664246, + "eval_nq_pairs_runtime": 2.9072, + "eval_nq_pairs_samples_per_second": 44.028, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1780 + }, + { + "epoch": 1.831275720164609, + "eval_trivia_pairs_loss": 0.7905226349830627, + "eval_trivia_pairs_runtime": 3.4505, + "eval_trivia_pairs_samples_per_second": 37.096, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1780 + }, + { + "epoch": 1.831275720164609, + "eval_gooaq_pairs_loss": 0.4708109200000763, + "eval_gooaq_pairs_runtime": 0.9715, + "eval_gooaq_pairs_samples_per_second": 131.76, + "eval_gooaq_pairs_steps_per_second": 1.029, + "step": 1780 + }, + { + "epoch": 1.831275720164609, + "eval_paws-pos_loss": 0.022015145048499107, + "eval_paws-pos_runtime": 0.7045, + "eval_paws-pos_samples_per_second": 181.694, + "eval_paws-pos_steps_per_second": 1.419, + "step": 1780 + }, + { + "epoch": 1.831275720164609, + "eval_global_dataset_loss": 0.48972851037979126, + "eval_global_dataset_runtime": 13.4186, + "eval_global_dataset_samples_per_second": 31.002, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1780 + }, + { + "epoch": 1.8323045267489713, + "grad_norm": 16.824098587036133, + "learning_rate": 2.9150406444068354e-05, + "loss": 2.3695, + "step": 1781 + }, + { + "epoch": 1.8333333333333335, + "grad_norm": 7.3217387199401855, + "learning_rate": 2.913738742639211e-05, + "loss": 0.2563, + "step": 1782 + }, + { + "epoch": 1.8343621399176955, + "grad_norm": 7.324367523193359, + "learning_rate": 2.912435879687385e-05, + "loss": 0.3371, + "step": 1783 + }, + { + "epoch": 1.8353909465020575, + "grad_norm": 9.451821327209473, + "learning_rate": 2.911132057708971e-05, + "loss": 0.6057, + "step": 1784 + }, + { + "epoch": 1.8364197530864197, + "grad_norm": 7.3754143714904785, + "learning_rate": 2.9098272788631732e-05, + "loss": 0.3104, + "step": 1785 + }, + { + "epoch": 1.837448559670782, + "grad_norm": 4.6265411376953125, + "learning_rate": 2.9085215453107785e-05, + "loss": 0.1396, + "step": 1786 + }, + { + "epoch": 1.8384773662551441, + "grad_norm": 4.895910263061523, + "learning_rate": 2.9072148592141554e-05, + "loss": 0.1254, + "step": 1787 + }, + { + "epoch": 1.8395061728395061, + "grad_norm": 12.90682315826416, + "learning_rate": 2.90590722273725e-05, + "loss": 0.7074, + "step": 1788 + }, + { + "epoch": 1.8405349794238683, + "grad_norm": 9.511048316955566, + "learning_rate": 2.9045986380455827e-05, + "loss": 0.602, + "step": 1789 + }, + { + "epoch": 1.8415637860082303, + "grad_norm": 7.697326183319092, + "learning_rate": 2.9032891073062443e-05, + "loss": 0.3409, + "step": 1790 + }, + { + "epoch": 1.8425925925925926, + "grad_norm": 7.475734233856201, + "learning_rate": 2.9019786326878906e-05, + "loss": 0.3956, + "step": 1791 + }, + { + "epoch": 1.8436213991769548, + "grad_norm": 8.607638359069824, + "learning_rate": 2.9006672163607424e-05, + "loss": 0.4256, + "step": 1792 + }, + { + "epoch": 1.844650205761317, + "grad_norm": 7.571669101715088, + "learning_rate": 2.899354860496579e-05, + "loss": 0.5065, + "step": 1793 + }, + { + "epoch": 1.845679012345679, + "grad_norm": 10.287837028503418, + "learning_rate": 2.898041567268737e-05, + "loss": 0.6154, + "step": 1794 + }, + { + "epoch": 1.846707818930041, + "grad_norm": 0.782716691493988, + "learning_rate": 2.8967273388521022e-05, + "loss": 0.0099, + "step": 1795 + }, + { + "epoch": 1.8477366255144032, + "grad_norm": 6.425576210021973, + "learning_rate": 2.8954121774231135e-05, + "loss": 0.2757, + "step": 1796 + }, + { + "epoch": 1.8487654320987654, + "grad_norm": 0.85555499792099, + "learning_rate": 2.8940960851597516e-05, + "loss": 0.01, + "step": 1797 + }, + { + "epoch": 1.8497942386831276, + "grad_norm": 4.784443378448486, + "learning_rate": 2.89277906424154e-05, + "loss": 0.2952, + "step": 1798 + }, + { + "epoch": 1.8508230452674899, + "grad_norm": 0.6040102243423462, + "learning_rate": 2.8914611168495395e-05, + "loss": 0.0057, + "step": 1799 + }, + { + "epoch": 1.8518518518518519, + "grad_norm": 8.821825981140137, + "learning_rate": 2.8901422451663457e-05, + "loss": 0.4099, + "step": 1800 + }, + { + "epoch": 1.8518518518518519, + "eval_Qnli-dev_cosine_accuracy": 0.708984375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.783872663974762, + "eval_Qnli-dev_cosine_ap": 0.7466087603555676, + "eval_Qnli-dev_cosine_f1": 0.693950177935943, + "eval_Qnli-dev_cosine_f1_threshold": 0.712569534778595, + "eval_Qnli-dev_cosine_precision": 0.598159509202454, + "eval_Qnli-dev_cosine_recall": 0.826271186440678, + "eval_Qnli-dev_dot_accuracy": 0.650390625, + "eval_Qnli-dev_dot_accuracy_threshold": 377.8784484863281, + "eval_Qnli-dev_dot_ap": 0.670055076912287, + "eval_Qnli-dev_dot_f1": 0.6708860759493672, + "eval_Qnli-dev_dot_f1_threshold": 295.422607421875, + "eval_Qnli-dev_dot_precision": 0.5353535353535354, + "eval_Qnli-dev_dot_recall": 0.8983050847457628, + "eval_Qnli-dev_euclidean_accuracy": 0.70703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.184562683105469, + "eval_Qnli-dev_euclidean_ap": 0.7566549840766721, + "eval_Qnli-dev_euclidean_f1": 0.7007299270072993, + "eval_Qnli-dev_euclidean_f1_threshold": 16.043079376220703, + "eval_Qnli-dev_euclidean_precision": 0.6153846153846154, + "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, + "eval_Qnli-dev_manhattan_accuracy": 0.7109375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 299.1871337890625, + "eval_Qnli-dev_manhattan_ap": 0.7581720001401686, + "eval_Qnli-dev_manhattan_f1": 0.7041742286751361, + "eval_Qnli-dev_manhattan_f1_threshold": 337.9171142578125, + "eval_Qnli-dev_manhattan_precision": 0.6158730158730159, + "eval_Qnli-dev_manhattan_recall": 0.8220338983050848, + "eval_Qnli-dev_max_accuracy": 0.7109375, + "eval_Qnli-dev_max_accuracy_threshold": 377.8784484863281, + "eval_Qnli-dev_max_ap": 0.7581720001401686, + "eval_Qnli-dev_max_f1": 0.7041742286751361, + "eval_Qnli-dev_max_f1_threshold": 337.9171142578125, + "eval_Qnli-dev_max_precision": 0.6158730158730159, + "eval_Qnli-dev_max_recall": 0.8983050847457628, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8557825088500977, + "eval_allNLI-dev_cosine_ap": 0.6226734040257053, + "eval_allNLI-dev_cosine_f1": 0.624078624078624, + "eval_allNLI-dev_cosine_f1_threshold": 0.7762553095817566, + "eval_allNLI-dev_cosine_precision": 0.5427350427350427, + "eval_allNLI-dev_cosine_recall": 0.7341040462427746, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 368.6101379394531, + "eval_allNLI-dev_dot_ap": 0.5572318941843482, + "eval_allNLI-dev_dot_f1": 0.6106194690265486, + "eval_allNLI-dev_dot_f1_threshold": 321.041748046875, + "eval_allNLI-dev_dot_precision": 0.4946236559139785, + "eval_allNLI-dev_dot_recall": 0.7976878612716763, + "eval_allNLI-dev_euclidean_accuracy": 0.732421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.30082893371582, + "eval_allNLI-dev_euclidean_ap": 0.630235554734977, + "eval_allNLI-dev_euclidean_f1": 0.6346153846153846, + "eval_allNLI-dev_euclidean_f1_threshold": 14.17033576965332, + "eval_allNLI-dev_euclidean_precision": 0.5432098765432098, + "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, + "eval_allNLI-dev_manhattan_accuracy": 0.7265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 237.4763641357422, + "eval_allNLI-dev_manhattan_ap": 0.6276697585643951, + "eval_allNLI-dev_manhattan_f1": 0.6357308584686775, + "eval_allNLI-dev_manhattan_f1_threshold": 300.2784423828125, + "eval_allNLI-dev_manhattan_precision": 0.5310077519379846, + "eval_allNLI-dev_manhattan_recall": 0.791907514450867, + "eval_allNLI-dev_max_accuracy": 0.732421875, + "eval_allNLI-dev_max_accuracy_threshold": 368.6101379394531, + "eval_allNLI-dev_max_ap": 0.630235554734977, + "eval_allNLI-dev_max_f1": 0.6357308584686775, + "eval_allNLI-dev_max_f1_threshold": 321.041748046875, + "eval_allNLI-dev_max_precision": 0.5432098765432098, + "eval_allNLI-dev_max_recall": 0.7976878612716763, + "eval_sequential_score": 0.7581720001401686, + "eval_sts-test_pearson_cosine": 0.8405514680248984, + "eval_sts-test_pearson_dot": 0.826561548746697, + "eval_sts-test_pearson_euclidean": 0.8713469017531787, + "eval_sts-test_pearson_manhattan": 0.8678385992177855, + "eval_sts-test_pearson_max": 0.8713469017531787, + "eval_sts-test_spearman_cosine": 0.874277469016664, + "eval_sts-test_spearman_dot": 0.826397333682562, + "eval_sts-test_spearman_euclidean": 0.8706232600164878, + "eval_sts-test_spearman_manhattan": 0.8674953258502748, + "eval_sts-test_spearman_max": 0.874277469016664, + "eval_vitaminc-pairs_loss": 3.052903890609741, + "eval_vitaminc-pairs_runtime": 3.2066, + "eval_vitaminc-pairs_samples_per_second": 39.918, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 1800 + }, + { + "epoch": 1.8518518518518519, + "eval_negation-triplets_loss": 0.9271054863929749, + "eval_negation-triplets_runtime": 0.7603, + "eval_negation-triplets_samples_per_second": 168.349, + "eval_negation-triplets_steps_per_second": 1.315, + "step": 1800 + }, + { + "epoch": 1.8518518518518519, + "eval_scitail-pairs-pos_loss": 0.1117212250828743, + "eval_scitail-pairs-pos_runtime": 0.9101, + "eval_scitail-pairs-pos_samples_per_second": 140.646, + "eval_scitail-pairs-pos_steps_per_second": 1.099, + "step": 1800 + }, + { + "epoch": 1.8518518518518519, + "eval_scitail-pairs-qa_loss": 0.0008321039495058358, + "eval_scitail-pairs-qa_runtime": 0.5988, + "eval_scitail-pairs-qa_samples_per_second": 213.773, + "eval_scitail-pairs-qa_steps_per_second": 1.67, + "step": 1800 + }, + { + "epoch": 1.8518518518518519, + "eval_xsum-pairs_loss": 0.2601509392261505, + "eval_xsum-pairs_runtime": 3.0239, + "eval_xsum-pairs_samples_per_second": 42.329, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 1800 + }, + { + "epoch": 1.8518518518518519, + "eval_sciq_pairs_loss": 0.09544568508863449, + "eval_sciq_pairs_runtime": 3.5424, + "eval_sciq_pairs_samples_per_second": 36.133, + "eval_sciq_pairs_steps_per_second": 0.282, + "step": 1800 + }, + { + "epoch": 1.8518518518518519, + "eval_qasc_pairs_loss": 0.16598990559577942, + "eval_qasc_pairs_runtime": 0.6288, + "eval_qasc_pairs_samples_per_second": 203.558, + "eval_qasc_pairs_steps_per_second": 1.59, + "step": 1800 + }, + { + "epoch": 1.8518518518518519, + "eval_openbookqa_pairs_loss": 0.7461561560630798, + "eval_openbookqa_pairs_runtime": 0.6066, + "eval_openbookqa_pairs_samples_per_second": 211.02, + "eval_openbookqa_pairs_steps_per_second": 1.649, + "step": 1800 + }, + { + "epoch": 1.8518518518518519, + "eval_msmarco_pairs_loss": 0.8211266994476318, + "eval_msmarco_pairs_runtime": 1.5318, + "eval_msmarco_pairs_samples_per_second": 83.562, + "eval_msmarco_pairs_steps_per_second": 0.653, + "step": 1800 + }, + { + "epoch": 1.8518518518518519, + "eval_nq_pairs_loss": 0.7967262864112854, + "eval_nq_pairs_runtime": 2.9105, + "eval_nq_pairs_samples_per_second": 43.979, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1800 + }, + { + "epoch": 1.8518518518518519, + "eval_trivia_pairs_loss": 0.7438980937004089, + "eval_trivia_pairs_runtime": 3.4511, + "eval_trivia_pairs_samples_per_second": 37.09, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1800 + }, + { + "epoch": 1.8518518518518519, + "eval_gooaq_pairs_loss": 0.4350385367870331, + "eval_gooaq_pairs_runtime": 0.9556, + "eval_gooaq_pairs_samples_per_second": 133.945, + "eval_gooaq_pairs_steps_per_second": 1.046, + "step": 1800 + }, + { + "epoch": 1.8518518518518519, + "eval_paws-pos_loss": 0.022378094494342804, + "eval_paws-pos_runtime": 0.7027, + "eval_paws-pos_samples_per_second": 182.149, + "eval_paws-pos_steps_per_second": 1.423, + "step": 1800 + }, + { + "epoch": 1.8518518518518519, + "eval_global_dataset_loss": 0.4930493235588074, + "eval_global_dataset_runtime": 13.4096, + "eval_global_dataset_samples_per_second": 31.023, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1800 + }, + { + "epoch": 1.8528806584362139, + "grad_norm": 6.235814094543457, + "learning_rate": 2.888822451376085e-05, + "loss": 0.178, + "step": 1801 + }, + { + "epoch": 1.853909465020576, + "grad_norm": 5.626006603240967, + "learning_rate": 2.8875017376644103e-05, + "loss": 0.176, + "step": 1802 + }, + { + "epoch": 1.8549382716049383, + "grad_norm": 4.305861949920654, + "learning_rate": 2.8861801062184983e-05, + "loss": 0.0919, + "step": 1803 + }, + { + "epoch": 1.8559670781893005, + "grad_norm": 5.0377912521362305, + "learning_rate": 2.8848575592270457e-05, + "loss": 0.2261, + "step": 1804 + }, + { + "epoch": 1.8569958847736625, + "grad_norm": 3.85387921333313, + "learning_rate": 2.8835340988802652e-05, + "loss": 0.1215, + "step": 1805 + }, + { + "epoch": 1.8580246913580247, + "grad_norm": 12.48168659210205, + "learning_rate": 2.8822097273698814e-05, + "loss": 0.6661, + "step": 1806 + }, + { + "epoch": 1.8590534979423867, + "grad_norm": 0.6439504027366638, + "learning_rate": 2.880884446889129e-05, + "loss": 0.0122, + "step": 1807 + }, + { + "epoch": 1.860082304526749, + "grad_norm": 11.227083206176758, + "learning_rate": 2.8795582596327478e-05, + "loss": 0.6757, + "step": 1808 + }, + { + "epoch": 1.8611111111111112, + "grad_norm": 6.3678460121154785, + "learning_rate": 2.8782311677969783e-05, + "loss": 0.1978, + "step": 1809 + }, + { + "epoch": 1.8621399176954734, + "grad_norm": 4.8387041091918945, + "learning_rate": 2.8769031735795593e-05, + "loss": 0.1788, + "step": 1810 + }, + { + "epoch": 1.8631687242798354, + "grad_norm": 7.737652778625488, + "learning_rate": 2.875574279179726e-05, + "loss": 0.4695, + "step": 1811 + }, + { + "epoch": 1.8641975308641974, + "grad_norm": 0.1465805470943451, + "learning_rate": 2.8742444867982005e-05, + "loss": 0.0018, + "step": 1812 + }, + { + "epoch": 1.8652263374485596, + "grad_norm": 5.01085090637207, + "learning_rate": 2.872913798637196e-05, + "loss": 0.1199, + "step": 1813 + }, + { + "epoch": 1.8662551440329218, + "grad_norm": 10.929647445678711, + "learning_rate": 2.871582216900407e-05, + "loss": 0.6889, + "step": 1814 + }, + { + "epoch": 1.867283950617284, + "grad_norm": 5.263504981994629, + "learning_rate": 2.870249743793008e-05, + "loss": 0.1593, + "step": 1815 + }, + { + "epoch": 1.8683127572016462, + "grad_norm": 0.27705076336860657, + "learning_rate": 2.8689163815216498e-05, + "loss": 0.0051, + "step": 1816 + }, + { + "epoch": 1.8693415637860082, + "grad_norm": 6.253952503204346, + "learning_rate": 2.867582132294456e-05, + "loss": 0.162, + "step": 1817 + }, + { + "epoch": 1.8703703703703702, + "grad_norm": 6.236087799072266, + "learning_rate": 2.8662469983210184e-05, + "loss": 0.1779, + "step": 1818 + }, + { + "epoch": 1.8713991769547325, + "grad_norm": 6.740966796875, + "learning_rate": 2.8649109818123948e-05, + "loss": 0.225, + "step": 1819 + }, + { + "epoch": 1.8724279835390947, + "grad_norm": 5.584411144256592, + "learning_rate": 2.8635740849811043e-05, + "loss": 0.1736, + "step": 1820 + }, + { + "epoch": 1.8724279835390947, + "eval_Qnli-dev_cosine_accuracy": 0.712890625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7693237662315369, + "eval_Qnli-dev_cosine_ap": 0.7516011618078376, + "eval_Qnli-dev_cosine_f1": 0.6892857142857143, + "eval_Qnli-dev_cosine_f1_threshold": 0.68800950050354, + "eval_Qnli-dev_cosine_precision": 0.595679012345679, + "eval_Qnli-dev_cosine_recall": 0.8177966101694916, + "eval_Qnli-dev_dot_accuracy": 0.6640625, + "eval_Qnli-dev_dot_accuracy_threshold": 342.3182373046875, + "eval_Qnli-dev_dot_ap": 0.6960339016150074, + "eval_Qnli-dev_dot_f1": 0.6697965571205008, + "eval_Qnli-dev_dot_f1_threshold": 267.3514404296875, + "eval_Qnli-dev_dot_precision": 0.5310173697270472, + "eval_Qnli-dev_dot_recall": 0.9067796610169492, + "eval_Qnli-dev_euclidean_accuracy": 0.71484375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.13847541809082, + "eval_Qnli-dev_euclidean_ap": 0.7593799713146617, + "eval_Qnli-dev_euclidean_f1": 0.6990654205607476, + "eval_Qnli-dev_euclidean_f1_threshold": 16.104825973510742, + "eval_Qnli-dev_euclidean_precision": 0.6254180602006689, + "eval_Qnli-dev_euclidean_recall": 0.7923728813559322, + "eval_Qnli-dev_manhattan_accuracy": 0.71875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 298.31005859375, + "eval_Qnli-dev_manhattan_ap": 0.7636995928661362, + "eval_Qnli-dev_manhattan_f1": 0.7071823204419889, + "eval_Qnli-dev_manhattan_f1_threshold": 340.46624755859375, + "eval_Qnli-dev_manhattan_precision": 0.6254071661237784, + "eval_Qnli-dev_manhattan_recall": 0.8135593220338984, + "eval_Qnli-dev_max_accuracy": 0.71875, + "eval_Qnli-dev_max_accuracy_threshold": 342.3182373046875, + "eval_Qnli-dev_max_ap": 0.7636995928661362, + "eval_Qnli-dev_max_f1": 0.7071823204419889, + "eval_Qnli-dev_max_f1_threshold": 340.46624755859375, + "eval_Qnli-dev_max_precision": 0.6254180602006689, + "eval_Qnli-dev_max_recall": 0.9067796610169492, + "eval_allNLI-dev_cosine_accuracy": 0.72265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8468435406684875, + "eval_allNLI-dev_cosine_ap": 0.6198779739889414, + "eval_allNLI-dev_cosine_f1": 0.6308068459657702, + "eval_allNLI-dev_cosine_f1_threshold": 0.7570561766624451, + "eval_allNLI-dev_cosine_precision": 0.5466101694915254, + "eval_allNLI-dev_cosine_recall": 0.7456647398843931, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 346.5953674316406, + "eval_allNLI-dev_dot_ap": 0.5522378285672351, + "eval_allNLI-dev_dot_f1": 0.6, + "eval_allNLI-dev_dot_f1_threshold": 292.556640625, + "eval_allNLI-dev_dot_precision": 0.48736462093862815, + "eval_allNLI-dev_dot_recall": 0.7803468208092486, + "eval_allNLI-dev_euclidean_accuracy": 0.728515625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.238094329833984, + "eval_allNLI-dev_euclidean_ap": 0.6264571105036736, + "eval_allNLI-dev_euclidean_f1": 0.6344827586206897, + "eval_allNLI-dev_euclidean_f1_threshold": 14.68545150756836, + "eval_allNLI-dev_euclidean_precision": 0.5267175572519084, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.7265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 248.9263916015625, + "eval_allNLI-dev_manhattan_ap": 0.6257422625629346, + "eval_allNLI-dev_manhattan_f1": 0.638095238095238, + "eval_allNLI-dev_manhattan_f1_threshold": 302.66668701171875, + "eval_allNLI-dev_manhattan_precision": 0.5425101214574899, + "eval_allNLI-dev_manhattan_recall": 0.7745664739884393, + "eval_allNLI-dev_max_accuracy": 0.728515625, + "eval_allNLI-dev_max_accuracy_threshold": 346.5953674316406, + "eval_allNLI-dev_max_ap": 0.6264571105036736, + "eval_allNLI-dev_max_f1": 0.638095238095238, + "eval_allNLI-dev_max_f1_threshold": 302.66668701171875, + "eval_allNLI-dev_max_precision": 0.5466101694915254, + "eval_allNLI-dev_max_recall": 0.7976878612716763, + "eval_sequential_score": 0.7636995928661362, + "eval_sts-test_pearson_cosine": 0.8424731952945942, + "eval_sts-test_pearson_dot": 0.8333461802951254, + "eval_sts-test_pearson_euclidean": 0.8674274376987042, + "eval_sts-test_pearson_manhattan": 0.8628614026838428, + "eval_sts-test_pearson_max": 0.8674274376987042, + "eval_sts-test_spearman_cosine": 0.8718507423651927, + "eval_sts-test_spearman_dot": 0.827070661161457, + "eval_sts-test_spearman_euclidean": 0.8662244795292817, + "eval_sts-test_spearman_manhattan": 0.8620014372455594, + "eval_sts-test_spearman_max": 0.8718507423651927, + "eval_vitaminc-pairs_loss": 3.304171323776245, + "eval_vitaminc-pairs_runtime": 3.2079, + "eval_vitaminc-pairs_samples_per_second": 39.902, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 1820 + }, + { + "epoch": 1.8724279835390947, + "eval_negation-triplets_loss": 0.9493024945259094, + "eval_negation-triplets_runtime": 0.7585, + "eval_negation-triplets_samples_per_second": 168.759, + "eval_negation-triplets_steps_per_second": 1.318, + "step": 1820 + }, + { + "epoch": 1.8724279835390947, + "eval_scitail-pairs-pos_loss": 0.12272996455430984, + "eval_scitail-pairs-pos_runtime": 0.9056, + "eval_scitail-pairs-pos_samples_per_second": 141.349, + "eval_scitail-pairs-pos_steps_per_second": 1.104, + "step": 1820 + }, + { + "epoch": 1.8724279835390947, + "eval_scitail-pairs-qa_loss": 0.0004829070239793509, + "eval_scitail-pairs-qa_runtime": 0.6048, + "eval_scitail-pairs-qa_samples_per_second": 211.628, + "eval_scitail-pairs-qa_steps_per_second": 1.653, + "step": 1820 + }, + { + "epoch": 1.8724279835390947, + "eval_xsum-pairs_loss": 0.23644520342350006, + "eval_xsum-pairs_runtime": 3.0348, + "eval_xsum-pairs_samples_per_second": 42.177, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1820 + }, + { + "epoch": 1.8724279835390947, + "eval_sciq_pairs_loss": 0.09980232268571854, + "eval_sciq_pairs_runtime": 3.4869, + "eval_sciq_pairs_samples_per_second": 36.709, + "eval_sciq_pairs_steps_per_second": 0.287, + "step": 1820 + }, + { + "epoch": 1.8724279835390947, + "eval_qasc_pairs_loss": 0.1501757949590683, + "eval_qasc_pairs_runtime": 0.6196, + "eval_qasc_pairs_samples_per_second": 206.579, + "eval_qasc_pairs_steps_per_second": 1.614, + "step": 1820 + }, + { + "epoch": 1.8724279835390947, + "eval_openbookqa_pairs_loss": 0.7412326335906982, + "eval_openbookqa_pairs_runtime": 0.6034, + "eval_openbookqa_pairs_samples_per_second": 212.118, + "eval_openbookqa_pairs_steps_per_second": 1.657, + "step": 1820 + }, + { + "epoch": 1.8724279835390947, + "eval_msmarco_pairs_loss": 0.9512736797332764, + "eval_msmarco_pairs_runtime": 1.5248, + "eval_msmarco_pairs_samples_per_second": 83.945, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 1820 + }, + { + "epoch": 1.8724279835390947, + "eval_nq_pairs_loss": 0.7984183430671692, + "eval_nq_pairs_runtime": 2.9011, + "eval_nq_pairs_samples_per_second": 44.121, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1820 + }, + { + "epoch": 1.8724279835390947, + "eval_trivia_pairs_loss": 0.7930619120597839, + "eval_trivia_pairs_runtime": 3.4561, + "eval_trivia_pairs_samples_per_second": 37.036, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 1820 + }, + { + "epoch": 1.8724279835390947, + "eval_gooaq_pairs_loss": 0.38777878880500793, + "eval_gooaq_pairs_runtime": 0.9603, + "eval_gooaq_pairs_samples_per_second": 133.297, + "eval_gooaq_pairs_steps_per_second": 1.041, + "step": 1820 + }, + { + "epoch": 1.8724279835390947, + "eval_paws-pos_loss": 0.022048471495509148, + "eval_paws-pos_runtime": 0.7038, + "eval_paws-pos_samples_per_second": 181.879, + "eval_paws-pos_steps_per_second": 1.421, + "step": 1820 + }, + { + "epoch": 1.8724279835390947, + "eval_global_dataset_loss": 0.5353642106056213, + "eval_global_dataset_runtime": 13.4031, + "eval_global_dataset_samples_per_second": 31.038, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1820 + }, + { + "epoch": 1.873456790123457, + "grad_norm": 4.877135276794434, + "learning_rate": 2.862236310041123e-05, + "loss": 0.1192, + "step": 1821 + }, + { + "epoch": 1.874485596707819, + "grad_norm": 5.794093608856201, + "learning_rate": 2.8608976592078826e-05, + "loss": 0.2173, + "step": 1822 + }, + { + "epoch": 1.875514403292181, + "grad_norm": 11.300149917602539, + "learning_rate": 2.8595581346982648e-05, + "loss": 0.6745, + "step": 1823 + }, + { + "epoch": 1.876543209876543, + "grad_norm": 6.0869364738464355, + "learning_rate": 2.858217738730597e-05, + "loss": 0.1692, + "step": 1824 + }, + { + "epoch": 1.8775720164609053, + "grad_norm": 9.171350479125977, + "learning_rate": 2.8568764735246514e-05, + "loss": 0.4567, + "step": 1825 + }, + { + "epoch": 1.8786008230452675, + "grad_norm": 6.638286113739014, + "learning_rate": 2.855534341301639e-05, + "loss": 0.2002, + "step": 1826 + }, + { + "epoch": 1.8796296296296298, + "grad_norm": 10.658404350280762, + "learning_rate": 2.8541913442842073e-05, + "loss": 0.4919, + "step": 1827 + }, + { + "epoch": 1.8806584362139918, + "grad_norm": 6.511582851409912, + "learning_rate": 2.8528474846964346e-05, + "loss": 0.1809, + "step": 1828 + }, + { + "epoch": 1.8816872427983538, + "grad_norm": 3.9900856018066406, + "learning_rate": 2.8515027647638286e-05, + "loss": 0.1848, + "step": 1829 + }, + { + "epoch": 1.882716049382716, + "grad_norm": 9.124105453491211, + "learning_rate": 2.850157186713321e-05, + "loss": 0.4554, + "step": 1830 + }, + { + "epoch": 1.8837448559670782, + "grad_norm": 9.636565208435059, + "learning_rate": 2.8488107527732665e-05, + "loss": 0.6297, + "step": 1831 + }, + { + "epoch": 1.8847736625514404, + "grad_norm": 8.252805709838867, + "learning_rate": 2.8474634651734356e-05, + "loss": 0.3771, + "step": 1832 + }, + { + "epoch": 1.8858024691358026, + "grad_norm": 12.576844215393066, + "learning_rate": 2.8461153261450115e-05, + "loss": 0.8774, + "step": 1833 + }, + { + "epoch": 1.8868312757201646, + "grad_norm": 4.328588962554932, + "learning_rate": 2.84476633792059e-05, + "loss": 0.1061, + "step": 1834 + }, + { + "epoch": 1.8878600823045266, + "grad_norm": 6.682473182678223, + "learning_rate": 2.8434165027341716e-05, + "loss": 0.2193, + "step": 1835 + }, + { + "epoch": 1.8888888888888888, + "grad_norm": 11.43628978729248, + "learning_rate": 2.84206582282116e-05, + "loss": 0.7368, + "step": 1836 + }, + { + "epoch": 1.889917695473251, + "grad_norm": 10.52833080291748, + "learning_rate": 2.8407143004183572e-05, + "loss": 0.4926, + "step": 1837 + }, + { + "epoch": 1.8909465020576133, + "grad_norm": 0.0, + "learning_rate": 2.839361937763961e-05, + "loss": 0.0, + "step": 1838 + }, + { + "epoch": 1.8919753086419753, + "grad_norm": 6.224323272705078, + "learning_rate": 2.8380087370975603e-05, + "loss": 0.2516, + "step": 1839 + }, + { + "epoch": 1.8930041152263375, + "grad_norm": 0.0, + "learning_rate": 2.8366547006601316e-05, + "loss": 0.0, + "step": 1840 + }, + { + "epoch": 1.8930041152263375, + "eval_Qnli-dev_cosine_accuracy": 0.71484375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7687250971794128, + "eval_Qnli-dev_cosine_ap": 0.7531877592950056, + "eval_Qnli-dev_cosine_f1": 0.6921739130434782, + "eval_Qnli-dev_cosine_f1_threshold": 0.684884786605835, + "eval_Qnli-dev_cosine_precision": 0.5870206489675516, + "eval_Qnli-dev_cosine_recall": 0.8432203389830508, + "eval_Qnli-dev_dot_accuracy": 0.662109375, + "eval_Qnli-dev_dot_accuracy_threshold": 363.7455749511719, + "eval_Qnli-dev_dot_ap": 0.692932385125726, + "eval_Qnli-dev_dot_f1": 0.6666666666666667, + "eval_Qnli-dev_dot_f1_threshold": 283.91925048828125, + "eval_Qnli-dev_dot_precision": 0.5285359801488834, + "eval_Qnli-dev_dot_recall": 0.902542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.72265625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.425592422485352, + "eval_Qnli-dev_euclidean_ap": 0.7613180383995255, + "eval_Qnli-dev_euclidean_f1": 0.6973180076628352, + "eval_Qnli-dev_euclidean_f1_threshold": 16.067230224609375, + "eval_Qnli-dev_euclidean_precision": 0.6363636363636364, + "eval_Qnli-dev_euclidean_recall": 0.7711864406779662, + "eval_Qnli-dev_manhattan_accuracy": 0.720703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 300.03668212890625, + "eval_Qnli-dev_manhattan_ap": 0.7670340428396949, + "eval_Qnli-dev_manhattan_f1": 0.7027972027972028, + "eval_Qnli-dev_manhattan_f1_threshold": 353.2535400390625, + "eval_Qnli-dev_manhattan_precision": 0.5982142857142857, + "eval_Qnli-dev_manhattan_recall": 0.8516949152542372, + "eval_Qnli-dev_max_accuracy": 0.72265625, + "eval_Qnli-dev_max_accuracy_threshold": 363.7455749511719, + "eval_Qnli-dev_max_ap": 0.7670340428396949, + "eval_Qnli-dev_max_f1": 0.7027972027972028, + "eval_Qnli-dev_max_f1_threshold": 353.2535400390625, + "eval_Qnli-dev_max_precision": 0.6363636363636364, + "eval_Qnli-dev_max_recall": 0.902542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.720703125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8587692975997925, + "eval_allNLI-dev_cosine_ap": 0.6100745668059124, + "eval_allNLI-dev_cosine_f1": 0.6218097447795824, + "eval_allNLI-dev_cosine_f1_threshold": 0.767665684223175, + "eval_allNLI-dev_cosine_precision": 0.5193798449612403, + "eval_allNLI-dev_cosine_recall": 0.7745664739884393, + "eval_allNLI-dev_dot_accuracy": 0.701171875, + "eval_allNLI-dev_dot_accuracy_threshold": 371.281494140625, + "eval_allNLI-dev_dot_ap": 0.5377230025038826, + "eval_allNLI-dev_dot_f1": 0.5897435897435898, + "eval_allNLI-dev_dot_f1_threshold": 319.8837890625, + "eval_allNLI-dev_dot_precision": 0.46779661016949153, + "eval_allNLI-dev_dot_recall": 0.7976878612716763, + "eval_allNLI-dev_euclidean_accuracy": 0.73046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.756385803222656, + "eval_allNLI-dev_euclidean_ap": 0.6185203781662596, + "eval_allNLI-dev_euclidean_f1": 0.6376146788990825, + "eval_allNLI-dev_euclidean_f1_threshold": 14.343099594116211, + "eval_allNLI-dev_euclidean_precision": 0.5285171102661597, + "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, + "eval_allNLI-dev_manhattan_accuracy": 0.7265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 245.87252807617188, + "eval_allNLI-dev_manhattan_ap": 0.6186309105779424, + "eval_allNLI-dev_manhattan_f1": 0.6303317535545023, + "eval_allNLI-dev_manhattan_f1_threshold": 294.0959777832031, + "eval_allNLI-dev_manhattan_precision": 0.5341365461847389, + "eval_allNLI-dev_manhattan_recall": 0.7687861271676301, + "eval_allNLI-dev_max_accuracy": 0.73046875, + "eval_allNLI-dev_max_accuracy_threshold": 371.281494140625, + "eval_allNLI-dev_max_ap": 0.6186309105779424, + "eval_allNLI-dev_max_f1": 0.6376146788990825, + "eval_allNLI-dev_max_f1_threshold": 319.8837890625, + "eval_allNLI-dev_max_precision": 0.5341365461847389, + "eval_allNLI-dev_max_recall": 0.8034682080924855, + "eval_sequential_score": 0.7670340428396949, + "eval_sts-test_pearson_cosine": 0.8331144980812912, + "eval_sts-test_pearson_dot": 0.8017763371072992, + "eval_sts-test_pearson_euclidean": 0.8636560345923409, + "eval_sts-test_pearson_manhattan": 0.8596943370640907, + "eval_sts-test_pearson_max": 0.8636560345923409, + "eval_sts-test_spearman_cosine": 0.8641814054417187, + "eval_sts-test_spearman_dot": 0.7944243474688627, + "eval_sts-test_spearman_euclidean": 0.8618423017600516, + "eval_sts-test_spearman_manhattan": 0.857660922900685, + "eval_sts-test_spearman_max": 0.8641814054417187, + "eval_vitaminc-pairs_loss": 3.11974835395813, + "eval_vitaminc-pairs_runtime": 3.204, + "eval_vitaminc-pairs_samples_per_second": 39.95, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 1840 + }, + { + "epoch": 1.8930041152263375, + "eval_negation-triplets_loss": 0.9159468412399292, + "eval_negation-triplets_runtime": 0.7607, + "eval_negation-triplets_samples_per_second": 168.268, + "eval_negation-triplets_steps_per_second": 1.315, + "step": 1840 + }, + { + "epoch": 1.8930041152263375, + "eval_scitail-pairs-pos_loss": 0.1481998711824417, + "eval_scitail-pairs-pos_runtime": 0.9014, + "eval_scitail-pairs-pos_samples_per_second": 141.999, + "eval_scitail-pairs-pos_steps_per_second": 1.109, + "step": 1840 + }, + { + "epoch": 1.8930041152263375, + "eval_scitail-pairs-qa_loss": 0.0006982347113080323, + "eval_scitail-pairs-qa_runtime": 0.5961, + "eval_scitail-pairs-qa_samples_per_second": 214.721, + "eval_scitail-pairs-qa_steps_per_second": 1.678, + "step": 1840 + }, + { + "epoch": 1.8930041152263375, + "eval_xsum-pairs_loss": 0.27408263087272644, + "eval_xsum-pairs_runtime": 3.0322, + "eval_xsum-pairs_samples_per_second": 42.213, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1840 + }, + { + "epoch": 1.8930041152263375, + "eval_sciq_pairs_loss": 0.09754869341850281, + "eval_sciq_pairs_runtime": 3.5107, + "eval_sciq_pairs_samples_per_second": 36.46, + "eval_sciq_pairs_steps_per_second": 0.285, + "step": 1840 + }, + { + "epoch": 1.8930041152263375, + "eval_qasc_pairs_loss": 0.1729738712310791, + "eval_qasc_pairs_runtime": 0.6227, + "eval_qasc_pairs_samples_per_second": 205.565, + "eval_qasc_pairs_steps_per_second": 1.606, + "step": 1840 + }, + { + "epoch": 1.8930041152263375, + "eval_openbookqa_pairs_loss": 0.7929932475090027, + "eval_openbookqa_pairs_runtime": 0.5978, + "eval_openbookqa_pairs_samples_per_second": 214.131, + "eval_openbookqa_pairs_steps_per_second": 1.673, + "step": 1840 + }, + { + "epoch": 1.8930041152263375, + "eval_msmarco_pairs_loss": 0.9313375949859619, + "eval_msmarco_pairs_runtime": 1.5238, + "eval_msmarco_pairs_samples_per_second": 84.001, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 1840 + }, + { + "epoch": 1.8930041152263375, + "eval_nq_pairs_loss": 0.6981325745582581, + "eval_nq_pairs_runtime": 2.9057, + "eval_nq_pairs_samples_per_second": 44.052, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1840 + }, + { + "epoch": 1.8930041152263375, + "eval_trivia_pairs_loss": 0.7952219843864441, + "eval_trivia_pairs_runtime": 3.4478, + "eval_trivia_pairs_samples_per_second": 37.125, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1840 + }, + { + "epoch": 1.8930041152263375, + "eval_gooaq_pairs_loss": 0.4432588815689087, + "eval_gooaq_pairs_runtime": 0.9511, + "eval_gooaq_pairs_samples_per_second": 134.582, + "eval_gooaq_pairs_steps_per_second": 1.051, + "step": 1840 + }, + { + "epoch": 1.8930041152263375, + "eval_paws-pos_loss": 0.021908223628997803, + "eval_paws-pos_runtime": 0.701, + "eval_paws-pos_samples_per_second": 182.601, + "eval_paws-pos_steps_per_second": 1.427, + "step": 1840 + }, + { + "epoch": 1.8930041152263375, + "eval_global_dataset_loss": 0.48257753252983093, + "eval_global_dataset_runtime": 13.4317, + "eval_global_dataset_samples_per_second": 30.971, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1840 + }, + { + "epoch": 1.8940329218106995, + "grad_norm": 6.437840938568115, + "learning_rate": 2.8352998306940368e-05, + "loss": 0.1917, + "step": 1841 + }, + { + "epoch": 1.8950617283950617, + "grad_norm": 6.392594337463379, + "learning_rate": 2.8339441294430168e-05, + "loss": 0.4027, + "step": 1842 + }, + { + "epoch": 1.896090534979424, + "grad_norm": 5.803440570831299, + "learning_rate": 2.8325875991521895e-05, + "loss": 0.1881, + "step": 1843 + }, + { + "epoch": 1.8971193415637861, + "grad_norm": 4.933880805969238, + "learning_rate": 2.831230242068046e-05, + "loss": 0.1529, + "step": 1844 + }, + { + "epoch": 1.8981481481481481, + "grad_norm": 5.8727827072143555, + "learning_rate": 2.8298720604384458e-05, + "loss": 0.172, + "step": 1845 + }, + { + "epoch": 1.8991769547325101, + "grad_norm": 10.907864570617676, + "learning_rate": 2.8285130565126156e-05, + "loss": 0.845, + "step": 1846 + }, + { + "epoch": 1.9002057613168724, + "grad_norm": 0.1463947594165802, + "learning_rate": 2.827153232541142e-05, + "loss": 0.0012, + "step": 1847 + }, + { + "epoch": 1.9012345679012346, + "grad_norm": 0.3656369149684906, + "learning_rate": 2.8257925907759705e-05, + "loss": 0.0182, + "step": 1848 + }, + { + "epoch": 1.9022633744855968, + "grad_norm": 9.290519714355469, + "learning_rate": 2.8244311334704012e-05, + "loss": 0.4674, + "step": 1849 + }, + { + "epoch": 1.903292181069959, + "grad_norm": 0.0, + "learning_rate": 2.823068862879084e-05, + "loss": 0.0, + "step": 1850 + }, + { + "epoch": 1.904320987654321, + "grad_norm": 4.353809356689453, + "learning_rate": 2.821705781258017e-05, + "loss": 0.1461, + "step": 1851 + }, + { + "epoch": 1.905349794238683, + "grad_norm": 5.968635559082031, + "learning_rate": 2.8203418908645396e-05, + "loss": 0.2166, + "step": 1852 + }, + { + "epoch": 1.9063786008230452, + "grad_norm": 4.745438575744629, + "learning_rate": 2.8189771939573323e-05, + "loss": 0.2023, + "step": 1853 + }, + { + "epoch": 1.9074074074074074, + "grad_norm": 5.100026607513428, + "learning_rate": 2.8176116927964092e-05, + "loss": 0.1569, + "step": 1854 + }, + { + "epoch": 1.9084362139917697, + "grad_norm": 5.529203414916992, + "learning_rate": 2.8162453896431182e-05, + "loss": 0.1806, + "step": 1855 + }, + { + "epoch": 1.9094650205761317, + "grad_norm": 0.36942559480667114, + "learning_rate": 2.8148782867601348e-05, + "loss": 0.0058, + "step": 1856 + }, + { + "epoch": 1.9104938271604939, + "grad_norm": 7.7156829833984375, + "learning_rate": 2.8135103864114582e-05, + "loss": 0.5055, + "step": 1857 + }, + { + "epoch": 1.9115226337448559, + "grad_norm": 4.606091499328613, + "learning_rate": 2.8121416908624103e-05, + "loss": 0.1331, + "step": 1858 + }, + { + "epoch": 1.912551440329218, + "grad_norm": 0.5592970252037048, + "learning_rate": 2.810772202379626e-05, + "loss": 0.0108, + "step": 1859 + }, + { + "epoch": 1.9135802469135803, + "grad_norm": 4.4713945388793945, + "learning_rate": 2.8094019232310574e-05, + "loss": 0.1008, + "step": 1860 + }, + { + "epoch": 1.9135802469135803, + "eval_Qnli-dev_cosine_accuracy": 0.701171875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7632311582565308, + "eval_Qnli-dev_cosine_ap": 0.7481703261239094, + "eval_Qnli-dev_cosine_f1": 0.6878504672897197, + "eval_Qnli-dev_cosine_f1_threshold": 0.7151565551757812, + "eval_Qnli-dev_cosine_precision": 0.6153846153846154, + "eval_Qnli-dev_cosine_recall": 0.7796610169491526, + "eval_Qnli-dev_dot_accuracy": 0.66015625, + "eval_Qnli-dev_dot_accuracy_threshold": 352.1583557128906, + "eval_Qnli-dev_dot_ap": 0.682747256285489, + "eval_Qnli-dev_dot_f1": 0.6666666666666666, + "eval_Qnli-dev_dot_f1_threshold": 275.40057373046875, + "eval_Qnli-dev_dot_precision": 0.5215311004784688, + "eval_Qnli-dev_dot_recall": 0.923728813559322, + "eval_Qnli-dev_euclidean_accuracy": 0.712890625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.104459762573242, + "eval_Qnli-dev_euclidean_ap": 0.7591388978562147, + "eval_Qnli-dev_euclidean_f1": 0.6881287726358148, + "eval_Qnli-dev_euclidean_f1_threshold": 15.602420806884766, + "eval_Qnli-dev_euclidean_precision": 0.6551724137931034, + "eval_Qnli-dev_euclidean_recall": 0.7245762711864406, + "eval_Qnli-dev_manhattan_accuracy": 0.708984375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 294.6631774902344, + "eval_Qnli-dev_manhattan_ap": 0.7638811662062783, + "eval_Qnli-dev_manhattan_f1": 0.7037037037037036, + "eval_Qnli-dev_manhattan_f1_threshold": 340.6040344238281, + "eval_Qnli-dev_manhattan_precision": 0.625, + "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, + "eval_Qnli-dev_max_accuracy": 0.712890625, + "eval_Qnli-dev_max_accuracy_threshold": 352.1583557128906, + "eval_Qnli-dev_max_ap": 0.7638811662062783, + "eval_Qnli-dev_max_f1": 0.7037037037037036, + "eval_Qnli-dev_max_f1_threshold": 340.6040344238281, + "eval_Qnli-dev_max_precision": 0.6551724137931034, + "eval_Qnli-dev_max_recall": 0.923728813559322, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8235200643539429, + "eval_allNLI-dev_cosine_ap": 0.6227597756748285, + "eval_allNLI-dev_cosine_f1": 0.6270783847980997, + "eval_allNLI-dev_cosine_f1_threshold": 0.759276807308197, + "eval_allNLI-dev_cosine_precision": 0.532258064516129, + "eval_allNLI-dev_cosine_recall": 0.7630057803468208, + "eval_allNLI-dev_dot_accuracy": 0.693359375, + "eval_allNLI-dev_dot_accuracy_threshold": 358.42010498046875, + "eval_allNLI-dev_dot_ap": 0.5605397042220421, + "eval_allNLI-dev_dot_f1": 0.5943775100401606, + "eval_allNLI-dev_dot_f1_threshold": 290.23834228515625, + "eval_allNLI-dev_dot_precision": 0.4553846153846154, + "eval_allNLI-dev_dot_recall": 0.8554913294797688, + "eval_allNLI-dev_euclidean_accuracy": 0.740234375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.865135192871094, + "eval_allNLI-dev_euclidean_ap": 0.6305429650760616, + "eval_allNLI-dev_euclidean_f1": 0.6425339366515838, + "eval_allNLI-dev_euclidean_f1_threshold": 14.834894180297852, + "eval_allNLI-dev_euclidean_precision": 0.5278810408921933, + "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, + "eval_allNLI-dev_manhattan_accuracy": 0.73046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 244.87319946289062, + "eval_allNLI-dev_manhattan_ap": 0.6243970525375466, + "eval_allNLI-dev_manhattan_f1": 0.6372093023255814, + "eval_allNLI-dev_manhattan_f1_threshold": 308.1102294921875, + "eval_allNLI-dev_manhattan_precision": 0.5330739299610895, + "eval_allNLI-dev_manhattan_recall": 0.791907514450867, + "eval_allNLI-dev_max_accuracy": 0.740234375, + "eval_allNLI-dev_max_accuracy_threshold": 358.42010498046875, + "eval_allNLI-dev_max_ap": 0.6305429650760616, + "eval_allNLI-dev_max_f1": 0.6425339366515838, + "eval_allNLI-dev_max_f1_threshold": 308.1102294921875, + "eval_allNLI-dev_max_precision": 0.5330739299610895, + "eval_allNLI-dev_max_recall": 0.8554913294797688, + "eval_sequential_score": 0.7638811662062783, + "eval_sts-test_pearson_cosine": 0.8365179097730663, + "eval_sts-test_pearson_dot": 0.8110790198540068, + "eval_sts-test_pearson_euclidean": 0.8667904044490078, + "eval_sts-test_pearson_manhattan": 0.8637204920322059, + "eval_sts-test_pearson_max": 0.8667904044490078, + "eval_sts-test_spearman_cosine": 0.8700784134090477, + "eval_sts-test_spearman_dot": 0.8067599431688792, + "eval_sts-test_spearman_euclidean": 0.8667105677305149, + "eval_sts-test_spearman_manhattan": 0.8637659794822224, + "eval_sts-test_spearman_max": 0.8700784134090477, + "eval_vitaminc-pairs_loss": 3.0288238525390625, + "eval_vitaminc-pairs_runtime": 3.2392, + "eval_vitaminc-pairs_samples_per_second": 39.516, + "eval_vitaminc-pairs_steps_per_second": 0.309, + "step": 1860 + }, + { + "epoch": 1.9135802469135803, + "eval_negation-triplets_loss": 0.9826973676681519, + "eval_negation-triplets_runtime": 0.7565, + "eval_negation-triplets_samples_per_second": 169.2, + "eval_negation-triplets_steps_per_second": 1.322, + "step": 1860 + }, + { + "epoch": 1.9135802469135803, + "eval_scitail-pairs-pos_loss": 0.14852353930473328, + "eval_scitail-pairs-pos_runtime": 0.8986, + "eval_scitail-pairs-pos_samples_per_second": 142.44, + "eval_scitail-pairs-pos_steps_per_second": 1.113, + "step": 1860 + }, + { + "epoch": 1.9135802469135803, + "eval_scitail-pairs-qa_loss": 0.0008748102118261158, + "eval_scitail-pairs-qa_runtime": 0.6218, + "eval_scitail-pairs-qa_samples_per_second": 205.852, + "eval_scitail-pairs-qa_steps_per_second": 1.608, + "step": 1860 + }, + { + "epoch": 1.9135802469135803, + "eval_xsum-pairs_loss": 0.28240200877189636, + "eval_xsum-pairs_runtime": 3.0291, + "eval_xsum-pairs_samples_per_second": 42.256, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1860 + }, + { + "epoch": 1.9135802469135803, + "eval_sciq_pairs_loss": 0.10066033899784088, + "eval_sciq_pairs_runtime": 3.5255, + "eval_sciq_pairs_samples_per_second": 36.307, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 1860 + }, + { + "epoch": 1.9135802469135803, + "eval_qasc_pairs_loss": 0.17016766965389252, + "eval_qasc_pairs_runtime": 0.6154, + "eval_qasc_pairs_samples_per_second": 207.998, + "eval_qasc_pairs_steps_per_second": 1.625, + "step": 1860 + }, + { + "epoch": 1.9135802469135803, + "eval_openbookqa_pairs_loss": 0.7975099086761475, + "eval_openbookqa_pairs_runtime": 0.5928, + "eval_openbookqa_pairs_samples_per_second": 215.92, + "eval_openbookqa_pairs_steps_per_second": 1.687, + "step": 1860 + }, + { + "epoch": 1.9135802469135803, + "eval_msmarco_pairs_loss": 0.9013682007789612, + "eval_msmarco_pairs_runtime": 1.5261, + "eval_msmarco_pairs_samples_per_second": 83.872, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 1860 + }, + { + "epoch": 1.9135802469135803, + "eval_nq_pairs_loss": 0.7462138533592224, + "eval_nq_pairs_runtime": 2.9053, + "eval_nq_pairs_samples_per_second": 44.057, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1860 + }, + { + "epoch": 1.9135802469135803, + "eval_trivia_pairs_loss": 0.8016729354858398, + "eval_trivia_pairs_runtime": 3.4494, + "eval_trivia_pairs_samples_per_second": 37.108, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1860 + }, + { + "epoch": 1.9135802469135803, + "eval_gooaq_pairs_loss": 0.41342735290527344, + "eval_gooaq_pairs_runtime": 0.9541, + "eval_gooaq_pairs_samples_per_second": 134.158, + "eval_gooaq_pairs_steps_per_second": 1.048, + "step": 1860 + }, + { + "epoch": 1.9135802469135803, + "eval_paws-pos_loss": 0.02217279002070427, + "eval_paws-pos_runtime": 0.7057, + "eval_paws-pos_samples_per_second": 181.377, + "eval_paws-pos_steps_per_second": 1.417, + "step": 1860 + }, + { + "epoch": 1.9135802469135803, + "eval_global_dataset_loss": 0.46486756205558777, + "eval_global_dataset_runtime": 13.4252, + "eval_global_dataset_samples_per_second": 30.986, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1860 + }, + { + "epoch": 1.9146090534979425, + "grad_norm": 8.730395317077637, + "learning_rate": 2.8080308556859634e-05, + "loss": 0.5098, + "step": 1861 + }, + { + "epoch": 1.9156378600823045, + "grad_norm": 10.401448249816895, + "learning_rate": 2.8066590020149104e-05, + "loss": 0.5998, + "step": 1862 + }, + { + "epoch": 1.9166666666666665, + "grad_norm": 6.233475685119629, + "learning_rate": 2.805286364489765e-05, + "loss": 0.4014, + "step": 1863 + }, + { + "epoch": 1.9176954732510287, + "grad_norm": 11.021480560302734, + "learning_rate": 2.803912945383693e-05, + "loss": 0.4707, + "step": 1864 + }, + { + "epoch": 1.918724279835391, + "grad_norm": 7.44813871383667, + "learning_rate": 2.8025387469711535e-05, + "loss": 0.6546, + "step": 1865 + }, + { + "epoch": 1.9197530864197532, + "grad_norm": 5.901599884033203, + "learning_rate": 2.8011637715278977e-05, + "loss": 0.2037, + "step": 1866 + }, + { + "epoch": 1.9207818930041154, + "grad_norm": 0.18091580271720886, + "learning_rate": 2.7997880213309625e-05, + "loss": 0.003, + "step": 1867 + }, + { + "epoch": 1.9218106995884774, + "grad_norm": 5.083710193634033, + "learning_rate": 2.798411498658667e-05, + "loss": 0.1487, + "step": 1868 + }, + { + "epoch": 1.9228395061728394, + "grad_norm": 0.20561951398849487, + "learning_rate": 2.7970342057906127e-05, + "loss": 0.0033, + "step": 1869 + }, + { + "epoch": 1.9238683127572016, + "grad_norm": 11.193281173706055, + "learning_rate": 2.795656145007673e-05, + "loss": 0.6399, + "step": 1870 + }, + { + "epoch": 1.9248971193415638, + "grad_norm": 5.531721115112305, + "learning_rate": 2.794277318591995e-05, + "loss": 0.1786, + "step": 1871 + }, + { + "epoch": 1.925925925925926, + "grad_norm": 0.0, + "learning_rate": 2.792897728826993e-05, + "loss": 0.0, + "step": 1872 + }, + { + "epoch": 1.926954732510288, + "grad_norm": 9.208216667175293, + "learning_rate": 2.791517377997346e-05, + "loss": 0.4316, + "step": 1873 + }, + { + "epoch": 1.9279835390946503, + "grad_norm": 13.096261024475098, + "learning_rate": 2.790136268388993e-05, + "loss": 1.7473, + "step": 1874 + }, + { + "epoch": 1.9290123456790123, + "grad_norm": 7.10617208480835, + "learning_rate": 2.78875440228913e-05, + "loss": 0.2205, + "step": 1875 + }, + { + "epoch": 1.9300411522633745, + "grad_norm": 4.60221004486084, + "learning_rate": 2.7873717819862048e-05, + "loss": 0.1184, + "step": 1876 + }, + { + "epoch": 1.9310699588477367, + "grad_norm": 13.202059745788574, + "learning_rate": 2.7859884097699152e-05, + "loss": 0.9313, + "step": 1877 + }, + { + "epoch": 1.932098765432099, + "grad_norm": 10.612934112548828, + "learning_rate": 2.784604287931204e-05, + "loss": 0.6615, + "step": 1878 + }, + { + "epoch": 1.933127572016461, + "grad_norm": 2.505542755126953, + "learning_rate": 2.783219418762255e-05, + "loss": 0.0461, + "step": 1879 + }, + { + "epoch": 1.934156378600823, + "grad_norm": 0.5168958902359009, + "learning_rate": 2.7818338045564902e-05, + "loss": 0.0076, + "step": 1880 + }, + { + "epoch": 1.934156378600823, + "eval_Qnli-dev_cosine_accuracy": 0.697265625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7700129747390747, + "eval_Qnli-dev_cosine_ap": 0.750412858057858, + "eval_Qnli-dev_cosine_f1": 0.6950092421441774, + "eval_Qnli-dev_cosine_f1_threshold": 0.7146259546279907, + "eval_Qnli-dev_cosine_precision": 0.6163934426229508, + "eval_Qnli-dev_cosine_recall": 0.7966101694915254, + "eval_Qnli-dev_dot_accuracy": 0.654296875, + "eval_Qnli-dev_dot_accuracy_threshold": 349.13232421875, + "eval_Qnli-dev_dot_ap": 0.690508783570581, + "eval_Qnli-dev_dot_f1": 0.6677115987460814, + "eval_Qnli-dev_dot_f1_threshold": 288.740478515625, + "eval_Qnli-dev_dot_precision": 0.5298507462686567, + "eval_Qnli-dev_dot_recall": 0.902542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.701171875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.20905876159668, + "eval_Qnli-dev_euclidean_ap": 0.7609109769968383, + "eval_Qnli-dev_euclidean_f1": 0.6998087954110899, + "eval_Qnli-dev_euclidean_f1_threshold": 16.04629898071289, + "eval_Qnli-dev_euclidean_precision": 0.6376306620209059, + "eval_Qnli-dev_euclidean_recall": 0.7754237288135594, + "eval_Qnli-dev_manhattan_accuracy": 0.716796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 327.05865478515625, + "eval_Qnli-dev_manhattan_ap": 0.7647373667172418, + "eval_Qnli-dev_manhattan_f1": 0.7091633466135459, + "eval_Qnli-dev_manhattan_f1_threshold": 328.59735107421875, + "eval_Qnli-dev_manhattan_precision": 0.6691729323308271, + "eval_Qnli-dev_manhattan_recall": 0.7542372881355932, + "eval_Qnli-dev_max_accuracy": 0.716796875, + "eval_Qnli-dev_max_accuracy_threshold": 349.13232421875, + "eval_Qnli-dev_max_ap": 0.7647373667172418, + "eval_Qnli-dev_max_f1": 0.7091633466135459, + "eval_Qnli-dev_max_f1_threshold": 328.59735107421875, + "eval_Qnli-dev_max_precision": 0.6691729323308271, + "eval_Qnli-dev_max_recall": 0.902542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8236054182052612, + "eval_allNLI-dev_cosine_ap": 0.629894923701272, + "eval_allNLI-dev_cosine_f1": 0.6376146788990825, + "eval_allNLI-dev_cosine_f1_threshold": 0.7714892625808716, + "eval_allNLI-dev_cosine_precision": 0.5285171102661597, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.697265625, + "eval_allNLI-dev_dot_accuracy_threshold": 393.950927734375, + "eval_allNLI-dev_dot_ap": 0.5528430810804673, + "eval_allNLI-dev_dot_f1": 0.6018099547511312, + "eval_allNLI-dev_dot_f1_threshold": 328.9024353027344, + "eval_allNLI-dev_dot_precision": 0.4944237918215613, + "eval_allNLI-dev_dot_recall": 0.7687861271676301, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.846963882446289, + "eval_allNLI-dev_euclidean_ap": 0.6363626822602223, + "eval_allNLI-dev_euclidean_f1": 0.6433260393873084, + "eval_allNLI-dev_euclidean_f1_threshold": 14.617688179016113, + "eval_allNLI-dev_euclidean_precision": 0.5176056338028169, + "eval_allNLI-dev_euclidean_recall": 0.8497109826589595, + "eval_allNLI-dev_manhattan_accuracy": 0.734375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 254.07041931152344, + "eval_allNLI-dev_manhattan_ap": 0.6308216681329502, + "eval_allNLI-dev_manhattan_f1": 0.6460176991150444, + "eval_allNLI-dev_manhattan_f1_threshold": 304.54638671875, + "eval_allNLI-dev_manhattan_precision": 0.5232974910394266, + "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 393.950927734375, + "eval_allNLI-dev_max_ap": 0.6363626822602223, + "eval_allNLI-dev_max_f1": 0.6460176991150444, + "eval_allNLI-dev_max_f1_threshold": 328.9024353027344, + "eval_allNLI-dev_max_precision": 0.5285171102661597, + "eval_allNLI-dev_max_recall": 0.8497109826589595, + "eval_sequential_score": 0.7647373667172418, + "eval_sts-test_pearson_cosine": 0.8373667820194939, + "eval_sts-test_pearson_dot": 0.8226566862254409, + "eval_sts-test_pearson_euclidean": 0.863935550264844, + "eval_sts-test_pearson_manhattan": 0.8615154988093832, + "eval_sts-test_pearson_max": 0.863935550264844, + "eval_sts-test_spearman_cosine": 0.8706524825901765, + "eval_sts-test_spearman_dot": 0.8226900496863757, + "eval_sts-test_spearman_euclidean": 0.8633828639334505, + "eval_sts-test_spearman_manhattan": 0.8609186804761946, + "eval_sts-test_spearman_max": 0.8706524825901765, + "eval_vitaminc-pairs_loss": 2.9923086166381836, + "eval_vitaminc-pairs_runtime": 3.1959, + "eval_vitaminc-pairs_samples_per_second": 40.051, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 1880 + }, + { + "epoch": 1.934156378600823, + "eval_negation-triplets_loss": 0.9319506287574768, + "eval_negation-triplets_runtime": 0.7545, + "eval_negation-triplets_samples_per_second": 169.643, + "eval_negation-triplets_steps_per_second": 1.325, + "step": 1880 + }, + { + "epoch": 1.934156378600823, + "eval_scitail-pairs-pos_loss": 0.13219445943832397, + "eval_scitail-pairs-pos_runtime": 0.8936, + "eval_scitail-pairs-pos_samples_per_second": 143.233, + "eval_scitail-pairs-pos_steps_per_second": 1.119, + "step": 1880 + }, + { + "epoch": 1.934156378600823, + "eval_scitail-pairs-qa_loss": 0.0009004413150250912, + "eval_scitail-pairs-qa_runtime": 0.6049, + "eval_scitail-pairs-qa_samples_per_second": 211.609, + "eval_scitail-pairs-qa_steps_per_second": 1.653, + "step": 1880 + }, + { + "epoch": 1.934156378600823, + "eval_xsum-pairs_loss": 0.21593664586544037, + "eval_xsum-pairs_runtime": 3.0339, + "eval_xsum-pairs_samples_per_second": 42.19, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1880 + }, + { + "epoch": 1.934156378600823, + "eval_sciq_pairs_loss": 0.10573841631412506, + "eval_sciq_pairs_runtime": 3.5286, + "eval_sciq_pairs_samples_per_second": 36.275, + "eval_sciq_pairs_steps_per_second": 0.283, + "step": 1880 + }, + { + "epoch": 1.934156378600823, + "eval_qasc_pairs_loss": 0.1666252315044403, + "eval_qasc_pairs_runtime": 0.6395, + "eval_qasc_pairs_samples_per_second": 200.154, + "eval_qasc_pairs_steps_per_second": 1.564, + "step": 1880 + }, + { + "epoch": 1.934156378600823, + "eval_openbookqa_pairs_loss": 0.7786993980407715, + "eval_openbookqa_pairs_runtime": 0.6074, + "eval_openbookqa_pairs_samples_per_second": 210.725, + "eval_openbookqa_pairs_steps_per_second": 1.646, + "step": 1880 + }, + { + "epoch": 1.934156378600823, + "eval_msmarco_pairs_loss": 0.8463943004608154, + "eval_msmarco_pairs_runtime": 1.5321, + "eval_msmarco_pairs_samples_per_second": 83.544, + "eval_msmarco_pairs_steps_per_second": 0.653, + "step": 1880 + }, + { + "epoch": 1.934156378600823, + "eval_nq_pairs_loss": 0.6969786286354065, + "eval_nq_pairs_runtime": 2.9146, + "eval_nq_pairs_samples_per_second": 43.918, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 1880 + }, + { + "epoch": 1.934156378600823, + "eval_trivia_pairs_loss": 0.8204184174537659, + "eval_trivia_pairs_runtime": 3.4385, + "eval_trivia_pairs_samples_per_second": 37.225, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1880 + }, + { + "epoch": 1.934156378600823, + "eval_gooaq_pairs_loss": 0.39143991470336914, + "eval_gooaq_pairs_runtime": 0.9502, + "eval_gooaq_pairs_samples_per_second": 134.711, + "eval_gooaq_pairs_steps_per_second": 1.052, + "step": 1880 + }, + { + "epoch": 1.934156378600823, + "eval_paws-pos_loss": 0.02165652997791767, + "eval_paws-pos_runtime": 0.7103, + "eval_paws-pos_samples_per_second": 180.204, + "eval_paws-pos_steps_per_second": 1.408, + "step": 1880 + }, + { + "epoch": 1.934156378600823, + "eval_global_dataset_loss": 0.44243207573890686, + "eval_global_dataset_runtime": 13.4194, + "eval_global_dataset_samples_per_second": 31.0, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1880 + }, + { + "epoch": 1.9351851851851851, + "grad_norm": 11.172481536865234, + "learning_rate": 2.7804474476085652e-05, + "loss": 0.8086, + "step": 1881 + }, + { + "epoch": 1.9362139917695473, + "grad_norm": 6.315141677856445, + "learning_rate": 2.7790603502143648e-05, + "loss": 0.4009, + "step": 1882 + }, + { + "epoch": 1.9372427983539096, + "grad_norm": 5.0940117835998535, + "learning_rate": 2.777672514671002e-05, + "loss": 0.1506, + "step": 1883 + }, + { + "epoch": 1.9382716049382716, + "grad_norm": 10.881324768066406, + "learning_rate": 2.77628394327681e-05, + "loss": 0.6098, + "step": 1884 + }, + { + "epoch": 1.9393004115226338, + "grad_norm": 7.277545928955078, + "learning_rate": 2.774894638331342e-05, + "loss": 0.3716, + "step": 1885 + }, + { + "epoch": 1.9403292181069958, + "grad_norm": 0.8290622234344482, + "learning_rate": 2.7735046021353654e-05, + "loss": 0.0106, + "step": 1886 + }, + { + "epoch": 1.941358024691358, + "grad_norm": 4.880166053771973, + "learning_rate": 2.772113836990859e-05, + "loss": 0.1585, + "step": 1887 + }, + { + "epoch": 1.9423868312757202, + "grad_norm": 2.724102020263672, + "learning_rate": 2.7707223452010087e-05, + "loss": 0.0522, + "step": 1888 + }, + { + "epoch": 1.9434156378600824, + "grad_norm": 9.703840255737305, + "learning_rate": 2.769330129070204e-05, + "loss": 0.6374, + "step": 1889 + }, + { + "epoch": 1.9444444444444444, + "grad_norm": 3.389514446258545, + "learning_rate": 2.7679371909040325e-05, + "loss": 0.0609, + "step": 1890 + }, + { + "epoch": 1.9454732510288066, + "grad_norm": 0.0, + "learning_rate": 2.766543533009279e-05, + "loss": 0.0, + "step": 1891 + }, + { + "epoch": 1.9465020576131686, + "grad_norm": 11.60602855682373, + "learning_rate": 2.7651491576939206e-05, + "loss": 0.6847, + "step": 1892 + }, + { + "epoch": 1.9475308641975309, + "grad_norm": 4.8318281173706055, + "learning_rate": 2.7637540672671205e-05, + "loss": 0.1891, + "step": 1893 + }, + { + "epoch": 1.948559670781893, + "grad_norm": 8.430917739868164, + "learning_rate": 2.7623582640392285e-05, + "loss": 0.4884, + "step": 1894 + }, + { + "epoch": 1.9495884773662553, + "grad_norm": 5.366695880889893, + "learning_rate": 2.760961750321773e-05, + "loss": 0.1411, + "step": 1895 + }, + { + "epoch": 1.9506172839506173, + "grad_norm": 5.996824264526367, + "learning_rate": 2.7595645284274608e-05, + "loss": 0.1993, + "step": 1896 + }, + { + "epoch": 1.9516460905349793, + "grad_norm": 11.138838768005371, + "learning_rate": 2.758166600670169e-05, + "loss": 0.6724, + "step": 1897 + }, + { + "epoch": 1.9526748971193415, + "grad_norm": 4.469819068908691, + "learning_rate": 2.756767969364946e-05, + "loss": 0.1213, + "step": 1898 + }, + { + "epoch": 1.9537037037037037, + "grad_norm": 0.2501906156539917, + "learning_rate": 2.7553686368280037e-05, + "loss": 0.0021, + "step": 1899 + }, + { + "epoch": 1.954732510288066, + "grad_norm": 5.7854390144348145, + "learning_rate": 2.7539686053767176e-05, + "loss": 0.1918, + "step": 1900 + }, + { + "epoch": 1.954732510288066, + "eval_Qnli-dev_cosine_accuracy": 0.705078125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7991540431976318, + "eval_Qnli-dev_cosine_ap": 0.7486645945894437, + "eval_Qnli-dev_cosine_f1": 0.6927592954990216, + "eval_Qnli-dev_cosine_f1_threshold": 0.739192008972168, + "eval_Qnli-dev_cosine_precision": 0.6436363636363637, + "eval_Qnli-dev_cosine_recall": 0.75, + "eval_Qnli-dev_dot_accuracy": 0.681640625, + "eval_Qnli-dev_dot_accuracy_threshold": 352.77490234375, + "eval_Qnli-dev_dot_ap": 0.701354132763163, + "eval_Qnli-dev_dot_f1": 0.6666666666666667, + "eval_Qnli-dev_dot_f1_threshold": 294.2525634765625, + "eval_Qnli-dev_dot_precision": 0.5329949238578681, + "eval_Qnli-dev_dot_recall": 0.8898305084745762, + "eval_Qnli-dev_euclidean_accuracy": 0.720703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.000884056091309, + "eval_Qnli-dev_euclidean_ap": 0.7575757639794356, + "eval_Qnli-dev_euclidean_f1": 0.6961538461538461, + "eval_Qnli-dev_euclidean_f1_threshold": 15.680222511291504, + "eval_Qnli-dev_euclidean_precision": 0.6373239436619719, + "eval_Qnli-dev_euclidean_recall": 0.7669491525423728, + "eval_Qnli-dev_manhattan_accuracy": 0.72265625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 291.60565185546875, + "eval_Qnli-dev_manhattan_ap": 0.7610718359836485, + "eval_Qnli-dev_manhattan_f1": 0.6956521739130435, + "eval_Qnli-dev_manhattan_f1_threshold": 331.0019836425781, + "eval_Qnli-dev_manhattan_precision": 0.6279863481228669, + "eval_Qnli-dev_manhattan_recall": 0.7796610169491526, + "eval_Qnli-dev_max_accuracy": 0.72265625, + "eval_Qnli-dev_max_accuracy_threshold": 352.77490234375, + "eval_Qnli-dev_max_ap": 0.7610718359836485, + "eval_Qnli-dev_max_f1": 0.6961538461538461, + "eval_Qnli-dev_max_f1_threshold": 331.0019836425781, + "eval_Qnli-dev_max_precision": 0.6436363636363637, + "eval_Qnli-dev_max_recall": 0.8898305084745762, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8532309532165527, + "eval_allNLI-dev_cosine_ap": 0.6302025367525289, + "eval_allNLI-dev_cosine_f1": 0.6386946386946387, + "eval_allNLI-dev_cosine_f1_threshold": 0.7679711580276489, + "eval_allNLI-dev_cosine_precision": 0.53515625, + "eval_allNLI-dev_cosine_recall": 0.791907514450867, + "eval_allNLI-dev_dot_accuracy": 0.701171875, + "eval_allNLI-dev_dot_accuracy_threshold": 372.23406982421875, + "eval_allNLI-dev_dot_ap": 0.5637045533903416, + "eval_allNLI-dev_dot_f1": 0.6143497757847535, + "eval_allNLI-dev_dot_f1_threshold": 324.8187561035156, + "eval_allNLI-dev_dot_precision": 0.5018315018315018, + "eval_allNLI-dev_dot_recall": 0.791907514450867, + "eval_allNLI-dev_euclidean_accuracy": 0.732421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.496063232421875, + "eval_allNLI-dev_euclidean_ap": 0.6364019233983443, + "eval_allNLI-dev_euclidean_f1": 0.6445916114790288, + "eval_allNLI-dev_euclidean_f1_threshold": 14.737340927124023, + "eval_allNLI-dev_euclidean_precision": 0.5214285714285715, + "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, + "eval_allNLI-dev_manhattan_accuracy": 0.73046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 238.5453643798828, + "eval_allNLI-dev_manhattan_ap": 0.6331464170000961, + "eval_allNLI-dev_manhattan_f1": 0.6462882096069869, + "eval_allNLI-dev_manhattan_f1_threshold": 312.4643249511719, + "eval_allNLI-dev_manhattan_precision": 0.519298245614035, + "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, + "eval_allNLI-dev_max_accuracy": 0.732421875, + "eval_allNLI-dev_max_accuracy_threshold": 372.23406982421875, + "eval_allNLI-dev_max_ap": 0.6364019233983443, + "eval_allNLI-dev_max_f1": 0.6462882096069869, + "eval_allNLI-dev_max_f1_threshold": 324.8187561035156, + "eval_allNLI-dev_max_precision": 0.53515625, + "eval_allNLI-dev_max_recall": 0.8554913294797688, + "eval_sequential_score": 0.7610718359836485, + "eval_sts-test_pearson_cosine": 0.8331628946659952, + "eval_sts-test_pearson_dot": 0.8164911059638393, + "eval_sts-test_pearson_euclidean": 0.8645888643935662, + "eval_sts-test_pearson_manhattan": 0.861337964752764, + "eval_sts-test_pearson_max": 0.8645888643935662, + "eval_sts-test_spearman_cosine": 0.8703052128017793, + "eval_sts-test_spearman_dot": 0.819658330732154, + "eval_sts-test_spearman_euclidean": 0.8649476812905237, + "eval_sts-test_spearman_manhattan": 0.8619144407934146, + "eval_sts-test_spearman_max": 0.8703052128017793, + "eval_vitaminc-pairs_loss": 3.2663421630859375, + "eval_vitaminc-pairs_runtime": 3.208, + "eval_vitaminc-pairs_samples_per_second": 39.9, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 1900 + }, + { + "epoch": 1.954732510288066, + "eval_negation-triplets_loss": 0.9119235277175903, + "eval_negation-triplets_runtime": 0.7555, + "eval_negation-triplets_samples_per_second": 169.435, + "eval_negation-triplets_steps_per_second": 1.324, + "step": 1900 + }, + { + "epoch": 1.954732510288066, + "eval_scitail-pairs-pos_loss": 0.15329499542713165, + "eval_scitail-pairs-pos_runtime": 0.9008, + "eval_scitail-pairs-pos_samples_per_second": 142.102, + "eval_scitail-pairs-pos_steps_per_second": 1.11, + "step": 1900 + }, + { + "epoch": 1.954732510288066, + "eval_scitail-pairs-qa_loss": 0.0005385727272368968, + "eval_scitail-pairs-qa_runtime": 0.5971, + "eval_scitail-pairs-qa_samples_per_second": 214.356, + "eval_scitail-pairs-qa_steps_per_second": 1.675, + "step": 1900 + }, + { + "epoch": 1.954732510288066, + "eval_xsum-pairs_loss": 0.3071066439151764, + "eval_xsum-pairs_runtime": 3.031, + "eval_xsum-pairs_samples_per_second": 42.231, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1900 + }, + { + "epoch": 1.954732510288066, + "eval_sciq_pairs_loss": 0.09913761168718338, + "eval_sciq_pairs_runtime": 3.4851, + "eval_sciq_pairs_samples_per_second": 36.728, + "eval_sciq_pairs_steps_per_second": 0.287, + "step": 1900 + }, + { + "epoch": 1.954732510288066, + "eval_qasc_pairs_loss": 0.1846480518579483, + "eval_qasc_pairs_runtime": 0.6217, + "eval_qasc_pairs_samples_per_second": 205.895, + "eval_qasc_pairs_steps_per_second": 1.609, + "step": 1900 + }, + { + "epoch": 1.954732510288066, + "eval_openbookqa_pairs_loss": 0.7798338532447815, + "eval_openbookqa_pairs_runtime": 0.6011, + "eval_openbookqa_pairs_samples_per_second": 212.945, + "eval_openbookqa_pairs_steps_per_second": 1.664, + "step": 1900 + }, + { + "epoch": 1.954732510288066, + "eval_msmarco_pairs_loss": 0.9161882996559143, + "eval_msmarco_pairs_runtime": 1.5239, + "eval_msmarco_pairs_samples_per_second": 83.996, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 1900 + }, + { + "epoch": 1.954732510288066, + "eval_nq_pairs_loss": 0.7218447327613831, + "eval_nq_pairs_runtime": 2.8999, + "eval_nq_pairs_samples_per_second": 44.139, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 1900 + }, + { + "epoch": 1.954732510288066, + "eval_trivia_pairs_loss": 0.8396673798561096, + "eval_trivia_pairs_runtime": 3.449, + "eval_trivia_pairs_samples_per_second": 37.112, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1900 + }, + { + "epoch": 1.954732510288066, + "eval_gooaq_pairs_loss": 0.3774771988391876, + "eval_gooaq_pairs_runtime": 0.952, + "eval_gooaq_pairs_samples_per_second": 134.454, + "eval_gooaq_pairs_steps_per_second": 1.05, + "step": 1900 + }, + { + "epoch": 1.954732510288066, + "eval_paws-pos_loss": 0.021937111392617226, + "eval_paws-pos_runtime": 0.701, + "eval_paws-pos_samples_per_second": 182.593, + "eval_paws-pos_steps_per_second": 1.427, + "step": 1900 + }, + { + "epoch": 1.954732510288066, + "eval_global_dataset_loss": 0.4879857301712036, + "eval_global_dataset_runtime": 13.4188, + "eval_global_dataset_samples_per_second": 31.001, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1900 + }, + { + "epoch": 1.955761316872428, + "grad_norm": 3.453291893005371, + "learning_rate": 2.7525678773296164e-05, + "loss": 0.1054, + "step": 1901 + }, + { + "epoch": 1.9567901234567902, + "grad_norm": 5.290757656097412, + "learning_rate": 2.7511664550063875e-05, + "loss": 0.1073, + "step": 1902 + }, + { + "epoch": 1.9578189300411522, + "grad_norm": 9.06633472442627, + "learning_rate": 2.749764340727864e-05, + "loss": 0.4918, + "step": 1903 + }, + { + "epoch": 1.9588477366255144, + "grad_norm": 5.419406414031982, + "learning_rate": 2.7483615368160265e-05, + "loss": 0.1429, + "step": 1904 + }, + { + "epoch": 1.9598765432098766, + "grad_norm": 9.918625831604004, + "learning_rate": 2.7469580455939988e-05, + "loss": 0.4735, + "step": 1905 + }, + { + "epoch": 1.9609053497942388, + "grad_norm": 9.553006172180176, + "learning_rate": 2.745553869386041e-05, + "loss": 0.5734, + "step": 1906 + }, + { + "epoch": 1.9619341563786008, + "grad_norm": 0.6265835762023926, + "learning_rate": 2.7441490105175482e-05, + "loss": 0.0327, + "step": 1907 + }, + { + "epoch": 1.9629629629629628, + "grad_norm": 6.599977493286133, + "learning_rate": 2.7427434713150466e-05, + "loss": 0.2095, + "step": 1908 + }, + { + "epoch": 1.963991769547325, + "grad_norm": 3.6899020671844482, + "learning_rate": 2.741337254106189e-05, + "loss": 0.1096, + "step": 1909 + }, + { + "epoch": 1.9650205761316872, + "grad_norm": 9.440287590026855, + "learning_rate": 2.7399303612197496e-05, + "loss": 0.5209, + "step": 1910 + }, + { + "epoch": 1.9660493827160495, + "grad_norm": 10.141464233398438, + "learning_rate": 2.7385227949856236e-05, + "loss": 0.1833, + "step": 1911 + }, + { + "epoch": 1.9670781893004117, + "grad_norm": 4.105213165283203, + "learning_rate": 2.73711455773482e-05, + "loss": 0.1049, + "step": 1912 + }, + { + "epoch": 1.9681069958847737, + "grad_norm": 6.282651424407959, + "learning_rate": 2.7357056517994592e-05, + "loss": 0.2033, + "step": 1913 + }, + { + "epoch": 1.9691358024691357, + "grad_norm": 11.242276191711426, + "learning_rate": 2.734296079512769e-05, + "loss": 0.6956, + "step": 1914 + }, + { + "epoch": 1.9701646090534979, + "grad_norm": 6.709212779998779, + "learning_rate": 2.7328858432090816e-05, + "loss": 0.3216, + "step": 1915 + }, + { + "epoch": 1.97119341563786, + "grad_norm": 4.293277740478516, + "learning_rate": 2.7314749452238275e-05, + "loss": 0.095, + "step": 1916 + }, + { + "epoch": 1.9722222222222223, + "grad_norm": 16.324626922607422, + "learning_rate": 2.7300633878935343e-05, + "loss": 1.6029, + "step": 1917 + }, + { + "epoch": 1.9732510288065843, + "grad_norm": 8.729584693908691, + "learning_rate": 2.7286511735558198e-05, + "loss": 0.5031, + "step": 1918 + }, + { + "epoch": 1.9742798353909465, + "grad_norm": 5.903535842895508, + "learning_rate": 2.7272383045493913e-05, + "loss": 0.1559, + "step": 1919 + }, + { + "epoch": 1.9753086419753085, + "grad_norm": 11.0974760055542, + "learning_rate": 2.7258247832140398e-05, + "loss": 0.689, + "step": 1920 + }, + { + "epoch": 1.9753086419753085, + "eval_Qnli-dev_cosine_accuracy": 0.712890625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7802547812461853, + "eval_Qnli-dev_cosine_ap": 0.7589678827271121, + "eval_Qnli-dev_cosine_f1": 0.7020872865275143, + "eval_Qnli-dev_cosine_f1_threshold": 0.7303462028503418, + "eval_Qnli-dev_cosine_precision": 0.6357388316151202, + "eval_Qnli-dev_cosine_recall": 0.7838983050847458, + "eval_Qnli-dev_dot_accuracy": 0.669921875, + "eval_Qnli-dev_dot_accuracy_threshold": 373.420166015625, + "eval_Qnli-dev_dot_ap": 0.6996726870091786, + "eval_Qnli-dev_dot_f1": 0.6718750000000001, + "eval_Qnli-dev_dot_f1_threshold": 297.3533630371094, + "eval_Qnli-dev_dot_precision": 0.5321782178217822, + "eval_Qnli-dev_dot_recall": 0.9110169491525424, + "eval_Qnli-dev_euclidean_accuracy": 0.72265625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.057470321655273, + "eval_Qnli-dev_euclidean_ap": 0.7673911581951351, + "eval_Qnli-dev_euclidean_f1": 0.7084870848708488, + "eval_Qnli-dev_euclidean_f1_threshold": 16.23279571533203, + "eval_Qnli-dev_euclidean_precision": 0.6274509803921569, + "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, + "eval_Qnli-dev_manhattan_accuracy": 0.712890625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 283.3794250488281, + "eval_Qnli-dev_manhattan_ap": 0.7693146584936291, + "eval_Qnli-dev_manhattan_f1": 0.7095516569200779, + "eval_Qnli-dev_manhattan_f1_threshold": 327.70379638671875, + "eval_Qnli-dev_manhattan_precision": 0.6570397111913358, + "eval_Qnli-dev_manhattan_recall": 0.7711864406779662, + "eval_Qnli-dev_max_accuracy": 0.72265625, + "eval_Qnli-dev_max_accuracy_threshold": 373.420166015625, + "eval_Qnli-dev_max_ap": 0.7693146584936291, + "eval_Qnli-dev_max_f1": 0.7095516569200779, + "eval_Qnli-dev_max_f1_threshold": 327.70379638671875, + "eval_Qnli-dev_max_precision": 0.6570397111913358, + "eval_Qnli-dev_max_recall": 0.9110169491525424, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8414372205734253, + "eval_allNLI-dev_cosine_ap": 0.6362556022858551, + "eval_allNLI-dev_cosine_f1": 0.6515837104072397, + "eval_allNLI-dev_cosine_f1_threshold": 0.7504405975341797, + "eval_allNLI-dev_cosine_precision": 0.5353159851301115, + "eval_allNLI-dev_cosine_recall": 0.8323699421965318, + "eval_allNLI-dev_dot_accuracy": 0.705078125, + "eval_allNLI-dev_dot_accuracy_threshold": 370.8945007324219, + "eval_allNLI-dev_dot_ap": 0.5842482557708913, + "eval_allNLI-dev_dot_f1": 0.6205357142857143, + "eval_allNLI-dev_dot_f1_threshold": 322.5114440917969, + "eval_allNLI-dev_dot_precision": 0.5054545454545455, + "eval_allNLI-dev_dot_recall": 0.8034682080924855, + "eval_allNLI-dev_euclidean_accuracy": 0.740234375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.304725646972656, + "eval_allNLI-dev_euclidean_ap": 0.6444108155589591, + "eval_allNLI-dev_euclidean_f1": 0.648068669527897, + "eval_allNLI-dev_euclidean_f1_threshold": 15.276135444641113, + "eval_allNLI-dev_euclidean_precision": 0.515358361774744, + "eval_allNLI-dev_euclidean_recall": 0.8728323699421965, + "eval_allNLI-dev_manhattan_accuracy": 0.73828125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 257.411376953125, + "eval_allNLI-dev_manhattan_ap": 0.6378845688852381, + "eval_allNLI-dev_manhattan_f1": 0.6479481641468683, + "eval_allNLI-dev_manhattan_f1_threshold": 318.2986755371094, + "eval_allNLI-dev_manhattan_precision": 0.5172413793103449, + "eval_allNLI-dev_manhattan_recall": 0.8670520231213873, + "eval_allNLI-dev_max_accuracy": 0.740234375, + "eval_allNLI-dev_max_accuracy_threshold": 370.8945007324219, + "eval_allNLI-dev_max_ap": 0.6444108155589591, + "eval_allNLI-dev_max_f1": 0.6515837104072397, + "eval_allNLI-dev_max_f1_threshold": 322.5114440917969, + "eval_allNLI-dev_max_precision": 0.5353159851301115, + "eval_allNLI-dev_max_recall": 0.8728323699421965, + "eval_sequential_score": 0.7693146584936291, + "eval_sts-test_pearson_cosine": 0.8356711247591408, + "eval_sts-test_pearson_dot": 0.8162478084316604, + "eval_sts-test_pearson_euclidean": 0.8653258137671263, + "eval_sts-test_pearson_manhattan": 0.8629686568902456, + "eval_sts-test_pearson_max": 0.8653258137671263, + "eval_sts-test_spearman_cosine": 0.8672133481522474, + "eval_sts-test_spearman_dot": 0.8157154760960033, + "eval_sts-test_spearman_euclidean": 0.8632872752391186, + "eval_sts-test_spearman_manhattan": 0.8611170180666812, + "eval_sts-test_spearman_max": 0.8672133481522474, + "eval_vitaminc-pairs_loss": 3.207522392272949, + "eval_vitaminc-pairs_runtime": 3.2104, + "eval_vitaminc-pairs_samples_per_second": 39.871, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 1920 + }, + { + "epoch": 1.9753086419753085, + "eval_negation-triplets_loss": 0.9503350853919983, + "eval_negation-triplets_runtime": 0.7605, + "eval_negation-triplets_samples_per_second": 168.312, + "eval_negation-triplets_steps_per_second": 1.315, + "step": 1920 + }, + { + "epoch": 1.9753086419753085, + "eval_scitail-pairs-pos_loss": 0.15005101263523102, + "eval_scitail-pairs-pos_runtime": 0.897, + "eval_scitail-pairs-pos_samples_per_second": 142.698, + "eval_scitail-pairs-pos_steps_per_second": 1.115, + "step": 1920 + }, + { + "epoch": 1.9753086419753085, + "eval_scitail-pairs-qa_loss": 0.0005707141826860607, + "eval_scitail-pairs-qa_runtime": 0.6051, + "eval_scitail-pairs-qa_samples_per_second": 211.551, + "eval_scitail-pairs-qa_steps_per_second": 1.653, + "step": 1920 + }, + { + "epoch": 1.9753086419753085, + "eval_xsum-pairs_loss": 0.24305416643619537, + "eval_xsum-pairs_runtime": 3.0297, + "eval_xsum-pairs_samples_per_second": 42.249, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1920 + }, + { + "epoch": 1.9753086419753085, + "eval_sciq_pairs_loss": 0.0905257910490036, + "eval_sciq_pairs_runtime": 3.512, + "eval_sciq_pairs_samples_per_second": 36.446, + "eval_sciq_pairs_steps_per_second": 0.285, + "step": 1920 + }, + { + "epoch": 1.9753086419753085, + "eval_qasc_pairs_loss": 0.1924685686826706, + "eval_qasc_pairs_runtime": 0.6209, + "eval_qasc_pairs_samples_per_second": 206.156, + "eval_qasc_pairs_steps_per_second": 1.611, + "step": 1920 + }, + { + "epoch": 1.9753086419753085, + "eval_openbookqa_pairs_loss": 0.8229547739028931, + "eval_openbookqa_pairs_runtime": 0.598, + "eval_openbookqa_pairs_samples_per_second": 214.034, + "eval_openbookqa_pairs_steps_per_second": 1.672, + "step": 1920 + }, + { + "epoch": 1.9753086419753085, + "eval_msmarco_pairs_loss": 0.8960761427879333, + "eval_msmarco_pairs_runtime": 1.5297, + "eval_msmarco_pairs_samples_per_second": 83.678, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 1920 + }, + { + "epoch": 1.9753086419753085, + "eval_nq_pairs_loss": 0.7282431721687317, + "eval_nq_pairs_runtime": 2.9033, + "eval_nq_pairs_samples_per_second": 44.088, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1920 + }, + { + "epoch": 1.9753086419753085, + "eval_trivia_pairs_loss": 0.846880316734314, + "eval_trivia_pairs_runtime": 3.4453, + "eval_trivia_pairs_samples_per_second": 37.152, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1920 + }, + { + "epoch": 1.9753086419753085, + "eval_gooaq_pairs_loss": 0.4108849763870239, + "eval_gooaq_pairs_runtime": 0.9576, + "eval_gooaq_pairs_samples_per_second": 133.664, + "eval_gooaq_pairs_steps_per_second": 1.044, + "step": 1920 + }, + { + "epoch": 1.9753086419753085, + "eval_paws-pos_loss": 0.02292362041771412, + "eval_paws-pos_runtime": 0.7402, + "eval_paws-pos_samples_per_second": 172.929, + "eval_paws-pos_steps_per_second": 1.351, + "step": 1920 + }, + { + "epoch": 1.9753086419753085, + "eval_global_dataset_loss": 0.4752640426158905, + "eval_global_dataset_runtime": 13.4207, + "eval_global_dataset_samples_per_second": 30.997, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1920 + }, + { + "epoch": 1.9763374485596708, + "grad_norm": 5.3949713706970215, + "learning_rate": 2.7244106118906372e-05, + "loss": 0.1694, + "step": 1921 + }, + { + "epoch": 1.977366255144033, + "grad_norm": 11.391617774963379, + "learning_rate": 2.722995792921131e-05, + "loss": 0.6042, + "step": 1922 + }, + { + "epoch": 1.9783950617283952, + "grad_norm": 8.24395751953125, + "learning_rate": 2.721580328648541e-05, + "loss": 0.4886, + "step": 1923 + }, + { + "epoch": 1.9794238683127572, + "grad_norm": 4.720622539520264, + "learning_rate": 2.7201642214169555e-05, + "loss": 0.2256, + "step": 1924 + }, + { + "epoch": 1.9804526748971192, + "grad_norm": 0.0937778577208519, + "learning_rate": 2.71874747357153e-05, + "loss": 0.0011, + "step": 1925 + }, + { + "epoch": 1.9814814814814814, + "grad_norm": 5.925360202789307, + "learning_rate": 2.7173300874584784e-05, + "loss": 0.2649, + "step": 1926 + }, + { + "epoch": 1.9825102880658436, + "grad_norm": 0.691866397857666, + "learning_rate": 2.715912065425072e-05, + "loss": 0.0076, + "step": 1927 + }, + { + "epoch": 1.9835390946502058, + "grad_norm": 4.325530529022217, + "learning_rate": 2.714493409819635e-05, + "loss": 0.1577, + "step": 1928 + }, + { + "epoch": 1.984567901234568, + "grad_norm": 6.408902168273926, + "learning_rate": 2.7130741229915425e-05, + "loss": 0.2949, + "step": 1929 + }, + { + "epoch": 1.98559670781893, + "grad_norm": 8.462160110473633, + "learning_rate": 2.711654207291213e-05, + "loss": 0.0956, + "step": 1930 + }, + { + "epoch": 1.986625514403292, + "grad_norm": 5.3168559074401855, + "learning_rate": 2.710233665070108e-05, + "loss": 0.1822, + "step": 1931 + }, + { + "epoch": 1.9876543209876543, + "grad_norm": 6.17379093170166, + "learning_rate": 2.7088124986807253e-05, + "loss": 0.1155, + "step": 1932 + }, + { + "epoch": 1.9886831275720165, + "grad_norm": 4.081679821014404, + "learning_rate": 2.7073907104765966e-05, + "loss": 0.0821, + "step": 1933 + }, + { + "epoch": 1.9897119341563787, + "grad_norm": 6.978852272033691, + "learning_rate": 2.705968302812284e-05, + "loss": 0.2296, + "step": 1934 + }, + { + "epoch": 1.9907407407407407, + "grad_norm": 11.4292573928833, + "learning_rate": 2.704545278043375e-05, + "loss": 0.2635, + "step": 1935 + }, + { + "epoch": 1.991769547325103, + "grad_norm": 5.254778861999512, + "learning_rate": 2.703121638526479e-05, + "loss": 0.0338, + "step": 1936 + }, + { + "epoch": 1.992798353909465, + "grad_norm": 10.508069038391113, + "learning_rate": 2.701697386619224e-05, + "loss": 0.5136, + "step": 1937 + }, + { + "epoch": 1.9938271604938271, + "grad_norm": 1.189597487449646, + "learning_rate": 2.700272524680252e-05, + "loss": 0.0701, + "step": 1938 + }, + { + "epoch": 1.9948559670781894, + "grad_norm": 9.364729881286621, + "learning_rate": 2.6988470550692144e-05, + "loss": 0.1657, + "step": 1939 + }, + { + "epoch": 1.9958847736625516, + "grad_norm": 11.865019798278809, + "learning_rate": 2.6974209801467692e-05, + "loss": 0.6497, + "step": 1940 + }, + { + "epoch": 1.9958847736625516, + "eval_Qnli-dev_cosine_accuracy": 0.720703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7849160432815552, + "eval_Qnli-dev_cosine_ap": 0.7548562645268696, + "eval_Qnli-dev_cosine_f1": 0.6881720430107527, + "eval_Qnli-dev_cosine_f1_threshold": 0.6856629848480225, + "eval_Qnli-dev_cosine_precision": 0.5962732919254659, + "eval_Qnli-dev_cosine_recall": 0.8135593220338984, + "eval_Qnli-dev_dot_accuracy": 0.6796875, + "eval_Qnli-dev_dot_accuracy_threshold": 366.61492919921875, + "eval_Qnli-dev_dot_ap": 0.7083100362161063, + "eval_Qnli-dev_dot_f1": 0.6656626506024097, + "eval_Qnli-dev_dot_f1_threshold": 257.82672119140625, + "eval_Qnli-dev_dot_precision": 0.5163551401869159, + "eval_Qnli-dev_dot_recall": 0.9364406779661016, + "eval_Qnli-dev_euclidean_accuracy": 0.71875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.144988059997559, + "eval_Qnli-dev_euclidean_ap": 0.7614130013133746, + "eval_Qnli-dev_euclidean_f1": 0.6964285714285714, + "eval_Qnli-dev_euclidean_f1_threshold": 16.77385139465332, + "eval_Qnli-dev_euclidean_precision": 0.6018518518518519, + "eval_Qnli-dev_euclidean_recall": 0.826271186440678, + "eval_Qnli-dev_manhattan_accuracy": 0.716796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 298.89862060546875, + "eval_Qnli-dev_manhattan_ap": 0.7656973656104571, + "eval_Qnli-dev_manhattan_f1": 0.6992481203007519, + "eval_Qnli-dev_manhattan_f1_threshold": 342.295166015625, + "eval_Qnli-dev_manhattan_precision": 0.6283783783783784, + "eval_Qnli-dev_manhattan_recall": 0.788135593220339, + "eval_Qnli-dev_max_accuracy": 0.720703125, + "eval_Qnli-dev_max_accuracy_threshold": 366.61492919921875, + "eval_Qnli-dev_max_ap": 0.7656973656104571, + "eval_Qnli-dev_max_f1": 0.6992481203007519, + "eval_Qnli-dev_max_f1_threshold": 342.295166015625, + "eval_Qnli-dev_max_precision": 0.6283783783783784, + "eval_Qnli-dev_max_recall": 0.9364406779661016, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8133357167243958, + "eval_allNLI-dev_cosine_ap": 0.6262180918993713, + "eval_allNLI-dev_cosine_f1": 0.6378896882494005, + "eval_allNLI-dev_cosine_f1_threshold": 0.7387667298316956, + "eval_allNLI-dev_cosine_precision": 0.5450819672131147, + "eval_allNLI-dev_cosine_recall": 0.7687861271676301, + "eval_allNLI-dev_dot_accuracy": 0.70703125, + "eval_allNLI-dev_dot_accuracy_threshold": 373.570556640625, + "eval_allNLI-dev_dot_ap": 0.5578521452421703, + "eval_allNLI-dev_dot_f1": 0.5994962216624685, + "eval_allNLI-dev_dot_f1_threshold": 317.295654296875, + "eval_allNLI-dev_dot_precision": 0.53125, + "eval_allNLI-dev_dot_recall": 0.6878612716763006, + "eval_allNLI-dev_euclidean_accuracy": 0.73828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.49420166015625, + "eval_allNLI-dev_euclidean_ap": 0.6344157899094915, + "eval_allNLI-dev_euclidean_f1": 0.6439024390243901, + "eval_allNLI-dev_euclidean_f1_threshold": 14.732034683227539, + "eval_allNLI-dev_euclidean_precision": 0.5569620253164557, + "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, + "eval_allNLI-dev_manhattan_accuracy": 0.728515625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 263.07177734375, + "eval_allNLI-dev_manhattan_ap": 0.6281999925675901, + "eval_allNLI-dev_manhattan_f1": 0.6425339366515838, + "eval_allNLI-dev_manhattan_f1_threshold": 325.59112548828125, + "eval_allNLI-dev_manhattan_precision": 0.5278810408921933, + "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 373.570556640625, + "eval_allNLI-dev_max_ap": 0.6344157899094915, + "eval_allNLI-dev_max_f1": 0.6439024390243901, + "eval_allNLI-dev_max_f1_threshold": 325.59112548828125, + "eval_allNLI-dev_max_precision": 0.5569620253164557, + "eval_allNLI-dev_max_recall": 0.8208092485549133, + "eval_sequential_score": 0.7656973656104571, + "eval_sts-test_pearson_cosine": 0.8350136239443786, + "eval_sts-test_pearson_dot": 0.8107646355376409, + "eval_sts-test_pearson_euclidean": 0.8645395213219016, + "eval_sts-test_pearson_manhattan": 0.8626709824722729, + "eval_sts-test_pearson_max": 0.8645395213219016, + "eval_sts-test_spearman_cosine": 0.8660674921043207, + "eval_sts-test_spearman_dot": 0.8101366943484286, + "eval_sts-test_spearman_euclidean": 0.8627660125464315, + "eval_sts-test_spearman_manhattan": 0.860602244306896, + "eval_sts-test_spearman_max": 0.8660674921043207, + "eval_vitaminc-pairs_loss": 3.455519914627075, + "eval_vitaminc-pairs_runtime": 3.2189, + "eval_vitaminc-pairs_samples_per_second": 39.765, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 1940 + }, + { + "epoch": 1.9958847736625516, + "eval_negation-triplets_loss": 1.0325738191604614, + "eval_negation-triplets_runtime": 0.7543, + "eval_negation-triplets_samples_per_second": 169.684, + "eval_negation-triplets_steps_per_second": 1.326, + "step": 1940 + }, + { + "epoch": 1.9958847736625516, + "eval_scitail-pairs-pos_loss": 0.16138723492622375, + "eval_scitail-pairs-pos_runtime": 0.9059, + "eval_scitail-pairs-pos_samples_per_second": 141.3, + "eval_scitail-pairs-pos_steps_per_second": 1.104, + "step": 1940 + }, + { + "epoch": 1.9958847736625516, + "eval_scitail-pairs-qa_loss": 0.00048324489034712315, + "eval_scitail-pairs-qa_runtime": 0.6237, + "eval_scitail-pairs-qa_samples_per_second": 205.235, + "eval_scitail-pairs-qa_steps_per_second": 1.603, + "step": 1940 + }, + { + "epoch": 1.9958847736625516, + "eval_xsum-pairs_loss": 0.31258535385131836, + "eval_xsum-pairs_runtime": 3.027, + "eval_xsum-pairs_samples_per_second": 42.286, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1940 + }, + { + "epoch": 1.9958847736625516, + "eval_sciq_pairs_loss": 0.09079165756702423, + "eval_sciq_pairs_runtime": 3.9197, + "eval_sciq_pairs_samples_per_second": 32.655, + "eval_sciq_pairs_steps_per_second": 0.255, + "step": 1940 + }, + { + "epoch": 1.9958847736625516, + "eval_qasc_pairs_loss": 0.16633209586143494, + "eval_qasc_pairs_runtime": 0.6231, + "eval_qasc_pairs_samples_per_second": 205.409, + "eval_qasc_pairs_steps_per_second": 1.605, + "step": 1940 + }, + { + "epoch": 1.9958847736625516, + "eval_openbookqa_pairs_loss": 0.8373302221298218, + "eval_openbookqa_pairs_runtime": 0.5958, + "eval_openbookqa_pairs_samples_per_second": 214.826, + "eval_openbookqa_pairs_steps_per_second": 1.678, + "step": 1940 + }, + { + "epoch": 1.9958847736625516, + "eval_msmarco_pairs_loss": 0.9623515605926514, + "eval_msmarco_pairs_runtime": 1.5238, + "eval_msmarco_pairs_samples_per_second": 83.999, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 1940 + }, + { + "epoch": 1.9958847736625516, + "eval_nq_pairs_loss": 0.7237948775291443, + "eval_nq_pairs_runtime": 2.9153, + "eval_nq_pairs_samples_per_second": 43.906, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 1940 + }, + { + "epoch": 1.9958847736625516, + "eval_trivia_pairs_loss": 0.8588597774505615, + "eval_trivia_pairs_runtime": 3.454, + "eval_trivia_pairs_samples_per_second": 37.058, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1940 + }, + { + "epoch": 1.9958847736625516, + "eval_gooaq_pairs_loss": 0.38329169154167175, + "eval_gooaq_pairs_runtime": 0.962, + "eval_gooaq_pairs_samples_per_second": 133.049, + "eval_gooaq_pairs_steps_per_second": 1.039, + "step": 1940 + }, + { + "epoch": 1.9958847736625516, + "eval_paws-pos_loss": 0.022732464596629143, + "eval_paws-pos_runtime": 0.7187, + "eval_paws-pos_samples_per_second": 178.109, + "eval_paws-pos_steps_per_second": 1.391, + "step": 1940 + }, + { + "epoch": 1.9958847736625516, + "eval_global_dataset_loss": 0.5049639344215393, + "eval_global_dataset_runtime": 13.4356, + "eval_global_dataset_samples_per_second": 30.962, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1940 + }, + { + "epoch": 1.9969135802469136, + "grad_norm": 10.780223846435547, + "learning_rate": 2.6959943022745776e-05, + "loss": 0.3913, + "step": 1941 + }, + { + "epoch": 1.9979423868312756, + "grad_norm": 0.003871053922921419, + "learning_rate": 2.6945670238152986e-05, + "loss": 0.0, + "step": 1942 + }, + { + "epoch": 1.9989711934156378, + "grad_norm": 9.154616355895996, + "learning_rate": 2.6931391471325872e-05, + "loss": 0.1129, + "step": 1943 + }, + { + "epoch": 2.0, + "grad_norm": 12.245766639709473, + "learning_rate": 2.691710674591087e-05, + "loss": 0.1786, + "step": 1944 + }, + { + "epoch": 2.001028806584362, + "grad_norm": 3.350267171859741, + "learning_rate": 2.6902816085564297e-05, + "loss": 0.0721, + "step": 1945 + }, + { + "epoch": 2.0020576131687244, + "grad_norm": 5.552727699279785, + "learning_rate": 2.6888519513952295e-05, + "loss": 0.124, + "step": 1946 + }, + { + "epoch": 2.003086419753086, + "grad_norm": 9.989136695861816, + "learning_rate": 2.687421705475079e-05, + "loss": 0.4688, + "step": 1947 + }, + { + "epoch": 2.0041152263374484, + "grad_norm": 5.448371410369873, + "learning_rate": 2.685990873164547e-05, + "loss": 0.1731, + "step": 1948 + }, + { + "epoch": 2.0051440329218106, + "grad_norm": 10.226446151733398, + "learning_rate": 2.684559456833173e-05, + "loss": 0.5941, + "step": 1949 + }, + { + "epoch": 2.006172839506173, + "grad_norm": 5.5601654052734375, + "learning_rate": 2.6831274588514627e-05, + "loss": 0.1637, + "step": 1950 + }, + { + "epoch": 2.007201646090535, + "grad_norm": 4.019494533538818, + "learning_rate": 2.681694881590886e-05, + "loss": 0.1451, + "step": 1951 + }, + { + "epoch": 2.0082304526748973, + "grad_norm": 3.698535442352295, + "learning_rate": 2.6802617274238724e-05, + "loss": 0.1332, + "step": 1952 + }, + { + "epoch": 2.009259259259259, + "grad_norm": 7.371061325073242, + "learning_rate": 2.678827998723806e-05, + "loss": 0.2481, + "step": 1953 + }, + { + "epoch": 2.0102880658436213, + "grad_norm": 7.150874137878418, + "learning_rate": 2.6773936978650225e-05, + "loss": 0.21, + "step": 1954 + }, + { + "epoch": 2.0113168724279835, + "grad_norm": 6.00358772277832, + "learning_rate": 2.6759588272228055e-05, + "loss": 0.149, + "step": 1955 + }, + { + "epoch": 2.0123456790123457, + "grad_norm": 4.2081475257873535, + "learning_rate": 2.674523389173382e-05, + "loss": 0.1146, + "step": 1956 + }, + { + "epoch": 2.013374485596708, + "grad_norm": 4.812724590301514, + "learning_rate": 2.673087386093918e-05, + "loss": 0.0926, + "step": 1957 + }, + { + "epoch": 2.01440329218107, + "grad_norm": 6.93581485748291, + "learning_rate": 2.671650820362517e-05, + "loss": 0.234, + "step": 1958 + }, + { + "epoch": 2.015432098765432, + "grad_norm": 3.500269651412964, + "learning_rate": 2.6702136943582128e-05, + "loss": 0.0647, + "step": 1959 + }, + { + "epoch": 2.016460905349794, + "grad_norm": 8.786886215209961, + "learning_rate": 2.6687760104609663e-05, + "loss": 0.5429, + "step": 1960 + }, + { + "epoch": 2.016460905349794, + "eval_Qnli-dev_cosine_accuracy": 0.71875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7799655199050903, + "eval_Qnli-dev_cosine_ap": 0.7668841453694514, + "eval_Qnli-dev_cosine_f1": 0.694589877835951, + "eval_Qnli-dev_cosine_f1_threshold": 0.682135283946991, + "eval_Qnli-dev_cosine_precision": 0.5905044510385756, + "eval_Qnli-dev_cosine_recall": 0.8432203389830508, + "eval_Qnli-dev_dot_accuracy": 0.673828125, + "eval_Qnli-dev_dot_accuracy_threshold": 348.41790771484375, + "eval_Qnli-dev_dot_ap": 0.712974117783976, + "eval_Qnli-dev_dot_f1": 0.670807453416149, + "eval_Qnli-dev_dot_f1_threshold": 270.2975158691406, + "eval_Qnli-dev_dot_precision": 0.5294117647058824, + "eval_Qnli-dev_dot_recall": 0.9152542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.72265625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.860166549682617, + "eval_Qnli-dev_euclidean_ap": 0.7750223238293773, + "eval_Qnli-dev_euclidean_f1": 0.6953405017921147, + "eval_Qnli-dev_euclidean_f1_threshold": 16.256149291992188, + "eval_Qnli-dev_euclidean_precision": 0.6024844720496895, + "eval_Qnli-dev_euclidean_recall": 0.8220338983050848, + "eval_Qnli-dev_manhattan_accuracy": 0.71875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 298.0458984375, + "eval_Qnli-dev_manhattan_ap": 0.7789472745282582, + "eval_Qnli-dev_manhattan_f1": 0.699410609037328, + "eval_Qnli-dev_manhattan_f1_threshold": 327.1111145019531, + "eval_Qnli-dev_manhattan_precision": 0.652014652014652, + "eval_Qnli-dev_manhattan_recall": 0.7542372881355932, + "eval_Qnli-dev_max_accuracy": 0.72265625, + "eval_Qnli-dev_max_accuracy_threshold": 348.41790771484375, + "eval_Qnli-dev_max_ap": 0.7789472745282582, + "eval_Qnli-dev_max_f1": 0.699410609037328, + "eval_Qnli-dev_max_f1_threshold": 327.1111145019531, + "eval_Qnli-dev_max_precision": 0.652014652014652, + "eval_Qnli-dev_max_recall": 0.9152542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.736328125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8098629117012024, + "eval_allNLI-dev_cosine_ap": 0.6210919851298508, + "eval_allNLI-dev_cosine_f1": 0.6349892008639308, + "eval_allNLI-dev_cosine_f1_threshold": 0.721535325050354, + "eval_allNLI-dev_cosine_precision": 0.506896551724138, + "eval_allNLI-dev_cosine_recall": 0.8497109826589595, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 333.63330078125, + "eval_allNLI-dev_dot_ap": 0.5569305748772116, + "eval_allNLI-dev_dot_f1": 0.6032388663967612, + "eval_allNLI-dev_dot_f1_threshold": 287.0180969238281, + "eval_allNLI-dev_dot_precision": 0.46417445482866043, + "eval_allNLI-dev_dot_recall": 0.861271676300578, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.557496070861816, + "eval_allNLI-dev_euclidean_ap": 0.6281064411386492, + "eval_allNLI-dev_euclidean_f1": 0.6514806378132119, + "eval_allNLI-dev_euclidean_f1_threshold": 14.880706787109375, + "eval_allNLI-dev_euclidean_precision": 0.5375939849624061, + "eval_allNLI-dev_euclidean_recall": 0.8265895953757225, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 268.589111328125, + "eval_allNLI-dev_manhattan_ap": 0.6243826773237957, + "eval_allNLI-dev_manhattan_f1": 0.6447058823529411, + "eval_allNLI-dev_manhattan_f1_threshold": 305.6214599609375, + "eval_allNLI-dev_manhattan_precision": 0.5436507936507936, + "eval_allNLI-dev_manhattan_recall": 0.791907514450867, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 333.63330078125, + "eval_allNLI-dev_max_ap": 0.6281064411386492, + "eval_allNLI-dev_max_f1": 0.6514806378132119, + "eval_allNLI-dev_max_f1_threshold": 305.6214599609375, + "eval_allNLI-dev_max_precision": 0.5436507936507936, + "eval_allNLI-dev_max_recall": 0.861271676300578, + "eval_sequential_score": 0.7789472745282582, + "eval_sts-test_pearson_cosine": 0.8331541754108045, + "eval_sts-test_pearson_dot": 0.820280291990983, + "eval_sts-test_pearson_euclidean": 0.8614289265584285, + "eval_sts-test_pearson_manhattan": 0.8597853920648006, + "eval_sts-test_pearson_max": 0.8614289265584285, + "eval_sts-test_spearman_cosine": 0.8643414359524544, + "eval_sts-test_spearman_dot": 0.8199940546951122, + "eval_sts-test_spearman_euclidean": 0.8593159588007231, + "eval_sts-test_spearman_manhattan": 0.8576773466136155, + "eval_sts-test_spearman_max": 0.8643414359524544, + "eval_vitaminc-pairs_loss": 3.2571401596069336, + "eval_vitaminc-pairs_runtime": 3.2348, + "eval_vitaminc-pairs_samples_per_second": 39.569, + "eval_vitaminc-pairs_steps_per_second": 0.309, + "step": 1960 + }, + { + "epoch": 2.016460905349794, + "eval_negation-triplets_loss": 0.9964045286178589, + "eval_negation-triplets_runtime": 0.7567, + "eval_negation-triplets_samples_per_second": 169.152, + "eval_negation-triplets_steps_per_second": 1.322, + "step": 1960 + }, + { + "epoch": 2.016460905349794, + "eval_scitail-pairs-pos_loss": 0.15746238827705383, + "eval_scitail-pairs-pos_runtime": 0.8951, + "eval_scitail-pairs-pos_samples_per_second": 142.998, + "eval_scitail-pairs-pos_steps_per_second": 1.117, + "step": 1960 + }, + { + "epoch": 2.016460905349794, + "eval_scitail-pairs-qa_loss": 0.00042386981658637524, + "eval_scitail-pairs-qa_runtime": 0.6122, + "eval_scitail-pairs-qa_samples_per_second": 209.071, + "eval_scitail-pairs-qa_steps_per_second": 1.633, + "step": 1960 + }, + { + "epoch": 2.016460905349794, + "eval_xsum-pairs_loss": 0.2893158793449402, + "eval_xsum-pairs_runtime": 3.0332, + "eval_xsum-pairs_samples_per_second": 42.2, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1960 + }, + { + "epoch": 2.016460905349794, + "eval_sciq_pairs_loss": 0.08887187391519547, + "eval_sciq_pairs_runtime": 3.5456, + "eval_sciq_pairs_samples_per_second": 36.101, + "eval_sciq_pairs_steps_per_second": 0.282, + "step": 1960 + }, + { + "epoch": 2.016460905349794, + "eval_qasc_pairs_loss": 0.16652485728263855, + "eval_qasc_pairs_runtime": 0.6255, + "eval_qasc_pairs_samples_per_second": 204.633, + "eval_qasc_pairs_steps_per_second": 1.599, + "step": 1960 + }, + { + "epoch": 2.016460905349794, + "eval_openbookqa_pairs_loss": 0.8275821208953857, + "eval_openbookqa_pairs_runtime": 0.5982, + "eval_openbookqa_pairs_samples_per_second": 213.966, + "eval_openbookqa_pairs_steps_per_second": 1.672, + "step": 1960 + }, + { + "epoch": 2.016460905349794, + "eval_msmarco_pairs_loss": 0.8606109619140625, + "eval_msmarco_pairs_runtime": 1.5254, + "eval_msmarco_pairs_samples_per_second": 83.911, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 1960 + }, + { + "epoch": 2.016460905349794, + "eval_nq_pairs_loss": 0.7386643290519714, + "eval_nq_pairs_runtime": 2.9044, + "eval_nq_pairs_samples_per_second": 44.071, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 1960 + }, + { + "epoch": 2.016460905349794, + "eval_trivia_pairs_loss": 0.8336133360862732, + "eval_trivia_pairs_runtime": 3.4507, + "eval_trivia_pairs_samples_per_second": 37.094, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 1960 + }, + { + "epoch": 2.016460905349794, + "eval_gooaq_pairs_loss": 0.40120747685432434, + "eval_gooaq_pairs_runtime": 0.9551, + "eval_gooaq_pairs_samples_per_second": 134.016, + "eval_gooaq_pairs_steps_per_second": 1.047, + "step": 1960 + }, + { + "epoch": 2.016460905349794, + "eval_paws-pos_loss": 0.02367236278951168, + "eval_paws-pos_runtime": 0.7086, + "eval_paws-pos_samples_per_second": 180.644, + "eval_paws-pos_steps_per_second": 1.411, + "step": 1960 + }, + { + "epoch": 2.016460905349794, + "eval_global_dataset_loss": 0.4686444401741028, + "eval_global_dataset_runtime": 13.4368, + "eval_global_dataset_samples_per_second": 30.96, + "eval_global_dataset_steps_per_second": 0.298, + "step": 1960 + }, + { + "epoch": 2.0174897119341564, + "grad_norm": 10.542884826660156, + "learning_rate": 2.667337771051665e-05, + "loss": 0.5538, + "step": 1961 + }, + { + "epoch": 2.0185185185185186, + "grad_norm": 5.9770917892456055, + "learning_rate": 2.665898978512113e-05, + "loss": 0.1512, + "step": 1962 + }, + { + "epoch": 2.019547325102881, + "grad_norm": 4.33903169631958, + "learning_rate": 2.664459635225034e-05, + "loss": 0.1231, + "step": 1963 + }, + { + "epoch": 2.0205761316872426, + "grad_norm": 5.161752700805664, + "learning_rate": 2.6630197435740606e-05, + "loss": 0.1405, + "step": 1964 + }, + { + "epoch": 2.021604938271605, + "grad_norm": 9.55565071105957, + "learning_rate": 2.6615793059437357e-05, + "loss": 0.4344, + "step": 1965 + }, + { + "epoch": 2.022633744855967, + "grad_norm": 0.4918361008167267, + "learning_rate": 2.660138324719505e-05, + "loss": 0.0216, + "step": 1966 + }, + { + "epoch": 2.0236625514403292, + "grad_norm": 7.198734760284424, + "learning_rate": 2.658696802287715e-05, + "loss": 0.3757, + "step": 1967 + }, + { + "epoch": 2.0246913580246915, + "grad_norm": 5.002417087554932, + "learning_rate": 2.657254741035608e-05, + "loss": 0.176, + "step": 1968 + }, + { + "epoch": 2.0257201646090537, + "grad_norm": 4.149772644042969, + "learning_rate": 2.6558121433513207e-05, + "loss": 0.1509, + "step": 1969 + }, + { + "epoch": 2.0267489711934155, + "grad_norm": 4.617299556732178, + "learning_rate": 2.654369011623875e-05, + "loss": 0.0743, + "step": 1970 + }, + { + "epoch": 2.0277777777777777, + "grad_norm": 4.817657470703125, + "learning_rate": 2.6529253482431788e-05, + "loss": 0.2057, + "step": 1971 + }, + { + "epoch": 2.02880658436214, + "grad_norm": 9.108501434326172, + "learning_rate": 2.6514811556000205e-05, + "loss": 0.421, + "step": 1972 + }, + { + "epoch": 2.029835390946502, + "grad_norm": 6.209652423858643, + "learning_rate": 2.6500364360860653e-05, + "loss": 0.1599, + "step": 1973 + }, + { + "epoch": 2.0308641975308643, + "grad_norm": 6.913119792938232, + "learning_rate": 2.64859119209385e-05, + "loss": 0.1982, + "step": 1974 + }, + { + "epoch": 2.0318930041152266, + "grad_norm": 8.143872261047363, + "learning_rate": 2.6471454260167806e-05, + "loss": 0.3077, + "step": 1975 + }, + { + "epoch": 2.0329218106995883, + "grad_norm": 10.935625076293945, + "learning_rate": 2.6456991402491276e-05, + "loss": 0.8843, + "step": 1976 + }, + { + "epoch": 2.0339506172839505, + "grad_norm": 11.155512809753418, + "learning_rate": 2.6442523371860223e-05, + "loss": 0.5713, + "step": 1977 + }, + { + "epoch": 2.0349794238683128, + "grad_norm": 11.313560485839844, + "learning_rate": 2.6428050192234512e-05, + "loss": 0.5019, + "step": 1978 + }, + { + "epoch": 2.036008230452675, + "grad_norm": 14.535584449768066, + "learning_rate": 2.6413571887582567e-05, + "loss": 1.8065, + "step": 1979 + }, + { + "epoch": 2.037037037037037, + "grad_norm": 3.9616973400115967, + "learning_rate": 2.639908848188126e-05, + "loss": 0.1463, + "step": 1980 + }, + { + "epoch": 2.037037037037037, + "eval_Qnli-dev_cosine_accuracy": 0.6953125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7812117338180542, + "eval_Qnli-dev_cosine_ap": 0.7464080080794311, + "eval_Qnli-dev_cosine_f1": 0.6757679180887372, + "eval_Qnli-dev_cosine_f1_threshold": 0.6842661499977112, + "eval_Qnli-dev_cosine_precision": 0.5657142857142857, + "eval_Qnli-dev_cosine_recall": 0.8389830508474576, + "eval_Qnli-dev_dot_accuracy": 0.662109375, + "eval_Qnli-dev_dot_accuracy_threshold": 367.6085510253906, + "eval_Qnli-dev_dot_ap": 0.6854949842070595, + "eval_Qnli-dev_dot_f1": 0.667741935483871, + "eval_Qnli-dev_dot_f1_threshold": 289.64093017578125, + "eval_Qnli-dev_dot_precision": 0.5390625, + "eval_Qnli-dev_dot_recall": 0.8771186440677966, + "eval_Qnli-dev_euclidean_accuracy": 0.705078125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.41029167175293, + "eval_Qnli-dev_euclidean_ap": 0.7550512346104723, + "eval_Qnli-dev_euclidean_f1": 0.6812080536912752, + "eval_Qnli-dev_euclidean_f1_threshold": 16.79446792602539, + "eval_Qnli-dev_euclidean_precision": 0.5638888888888889, + "eval_Qnli-dev_euclidean_recall": 0.8601694915254238, + "eval_Qnli-dev_manhattan_accuracy": 0.705078125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 294.1890563964844, + "eval_Qnli-dev_manhattan_ap": 0.7580931618042414, + "eval_Qnli-dev_manhattan_f1": 0.6812080536912752, + "eval_Qnli-dev_manhattan_f1_threshold": 354.9899597167969, + "eval_Qnli-dev_manhattan_precision": 0.5638888888888889, + "eval_Qnli-dev_manhattan_recall": 0.8601694915254238, + "eval_Qnli-dev_max_accuracy": 0.705078125, + "eval_Qnli-dev_max_accuracy_threshold": 367.6085510253906, + "eval_Qnli-dev_max_ap": 0.7580931618042414, + "eval_Qnli-dev_max_f1": 0.6812080536912752, + "eval_Qnli-dev_max_f1_threshold": 354.9899597167969, + "eval_Qnli-dev_max_precision": 0.5657142857142857, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.72265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8383222818374634, + "eval_allNLI-dev_cosine_ap": 0.6090807801044571, + "eval_allNLI-dev_cosine_f1": 0.623608017817372, + "eval_allNLI-dev_cosine_f1_threshold": 0.7408950924873352, + "eval_allNLI-dev_cosine_precision": 0.5072463768115942, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.693359375, + "eval_allNLI-dev_dot_accuracy_threshold": 372.4272155761719, + "eval_allNLI-dev_dot_ap": 0.5367891645145532, + "eval_allNLI-dev_dot_f1": 0.5803757828810021, + "eval_allNLI-dev_dot_f1_threshold": 300.437744140625, + "eval_allNLI-dev_dot_precision": 0.4542483660130719, + "eval_allNLI-dev_dot_recall": 0.8034682080924855, + "eval_allNLI-dev_euclidean_accuracy": 0.724609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.517840385437012, + "eval_allNLI-dev_euclidean_ap": 0.6183869555871211, + "eval_allNLI-dev_euclidean_f1": 0.6450116009280741, + "eval_allNLI-dev_euclidean_f1_threshold": 14.532339096069336, + "eval_allNLI-dev_euclidean_precision": 0.5387596899224806, + "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, + "eval_allNLI-dev_manhattan_accuracy": 0.72265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 263.429443359375, + "eval_allNLI-dev_manhattan_ap": 0.6133818291746855, + "eval_allNLI-dev_manhattan_f1": 0.6403508771929824, + "eval_allNLI-dev_manhattan_f1_threshold": 316.3074951171875, + "eval_allNLI-dev_manhattan_precision": 0.5159010600706714, + "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, + "eval_allNLI-dev_max_accuracy": 0.724609375, + "eval_allNLI-dev_max_accuracy_threshold": 372.4272155761719, + "eval_allNLI-dev_max_ap": 0.6183869555871211, + "eval_allNLI-dev_max_f1": 0.6450116009280741, + "eval_allNLI-dev_max_f1_threshold": 316.3074951171875, + "eval_allNLI-dev_max_precision": 0.5387596899224806, + "eval_allNLI-dev_max_recall": 0.8439306358381503, + "eval_sequential_score": 0.7580931618042414, + "eval_sts-test_pearson_cosine": 0.8345099662016966, + "eval_sts-test_pearson_dot": 0.8217163360084048, + "eval_sts-test_pearson_euclidean": 0.8646307702719731, + "eval_sts-test_pearson_manhattan": 0.8638614995339742, + "eval_sts-test_pearson_max": 0.8646307702719731, + "eval_sts-test_spearman_cosine": 0.8682611989377271, + "eval_sts-test_spearman_dot": 0.8181228254512923, + "eval_sts-test_spearman_euclidean": 0.8633254480650854, + "eval_sts-test_spearman_manhattan": 0.8626637111628908, + "eval_sts-test_spearman_max": 0.8682611989377271, + "eval_vitaminc-pairs_loss": 3.4514224529266357, + "eval_vitaminc-pairs_runtime": 3.2245, + "eval_vitaminc-pairs_samples_per_second": 39.696, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 1980 + }, + { + "epoch": 2.037037037037037, + "eval_negation-triplets_loss": 0.9722065329551697, + "eval_negation-triplets_runtime": 0.7656, + "eval_negation-triplets_samples_per_second": 167.181, + "eval_negation-triplets_steps_per_second": 1.306, + "step": 1980 + }, + { + "epoch": 2.037037037037037, + "eval_scitail-pairs-pos_loss": 0.15476830303668976, + "eval_scitail-pairs-pos_runtime": 0.9054, + "eval_scitail-pairs-pos_samples_per_second": 141.373, + "eval_scitail-pairs-pos_steps_per_second": 1.104, + "step": 1980 + }, + { + "epoch": 2.037037037037037, + "eval_scitail-pairs-qa_loss": 0.0009460457949899137, + "eval_scitail-pairs-qa_runtime": 0.6019, + "eval_scitail-pairs-qa_samples_per_second": 212.665, + "eval_scitail-pairs-qa_steps_per_second": 1.661, + "step": 1980 + }, + { + "epoch": 2.037037037037037, + "eval_xsum-pairs_loss": 0.2883686423301697, + "eval_xsum-pairs_runtime": 3.0286, + "eval_xsum-pairs_samples_per_second": 42.263, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 1980 + }, + { + "epoch": 2.037037037037037, + "eval_sciq_pairs_loss": 0.09009577333927155, + "eval_sciq_pairs_runtime": 3.5292, + "eval_sciq_pairs_samples_per_second": 36.269, + "eval_sciq_pairs_steps_per_second": 0.283, + "step": 1980 + }, + { + "epoch": 2.037037037037037, + "eval_qasc_pairs_loss": 0.16658733785152435, + "eval_qasc_pairs_runtime": 0.6249, + "eval_qasc_pairs_samples_per_second": 204.848, + "eval_qasc_pairs_steps_per_second": 1.6, + "step": 1980 + }, + { + "epoch": 2.037037037037037, + "eval_openbookqa_pairs_loss": 0.8728303909301758, + "eval_openbookqa_pairs_runtime": 0.6024, + "eval_openbookqa_pairs_samples_per_second": 212.494, + "eval_openbookqa_pairs_steps_per_second": 1.66, + "step": 1980 + }, + { + "epoch": 2.037037037037037, + "eval_msmarco_pairs_loss": 0.8322451114654541, + "eval_msmarco_pairs_runtime": 1.5386, + "eval_msmarco_pairs_samples_per_second": 83.195, + "eval_msmarco_pairs_steps_per_second": 0.65, + "step": 1980 + }, + { + "epoch": 2.037037037037037, + "eval_nq_pairs_loss": 0.8093205094337463, + "eval_nq_pairs_runtime": 2.9248, + "eval_nq_pairs_samples_per_second": 43.764, + "eval_nq_pairs_steps_per_second": 0.342, + "step": 1980 + }, + { + "epoch": 2.037037037037037, + "eval_trivia_pairs_loss": 0.7549027800559998, + "eval_trivia_pairs_runtime": 3.4405, + "eval_trivia_pairs_samples_per_second": 37.204, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 1980 + }, + { + "epoch": 2.037037037037037, + "eval_gooaq_pairs_loss": 0.4132753610610962, + "eval_gooaq_pairs_runtime": 0.9589, + "eval_gooaq_pairs_samples_per_second": 133.492, + "eval_gooaq_pairs_steps_per_second": 1.043, + "step": 1980 + }, + { + "epoch": 2.037037037037037, + "eval_paws-pos_loss": 0.02331428974866867, + "eval_paws-pos_runtime": 0.7077, + "eval_paws-pos_samples_per_second": 180.861, + "eval_paws-pos_steps_per_second": 1.413, + "step": 1980 + }, + { + "epoch": 2.037037037037037, + "eval_global_dataset_loss": 0.49280592799186707, + "eval_global_dataset_runtime": 13.3958, + "eval_global_dataset_samples_per_second": 31.054, + "eval_global_dataset_steps_per_second": 0.299, + "step": 1980 + }, + { + "epoch": 2.038065843621399, + "grad_norm": 8.284627914428711, + "learning_rate": 2.6384599999115946e-05, + "loss": 0.3849, + "step": 1981 + }, + { + "epoch": 2.039094650205761, + "grad_norm": 9.021122932434082, + "learning_rate": 2.6370106463280364e-05, + "loss": 0.4461, + "step": 1982 + }, + { + "epoch": 2.0401234567901234, + "grad_norm": 7.323623180389404, + "learning_rate": 2.6355607898376633e-05, + "loss": 0.2719, + "step": 1983 + }, + { + "epoch": 2.0411522633744856, + "grad_norm": 4.637526035308838, + "learning_rate": 2.6341104328415194e-05, + "loss": 0.1207, + "step": 1984 + }, + { + "epoch": 2.042181069958848, + "grad_norm": 9.165292739868164, + "learning_rate": 2.6326595777414787e-05, + "loss": 0.5871, + "step": 1985 + }, + { + "epoch": 2.04320987654321, + "grad_norm": 7.3292717933654785, + "learning_rate": 2.631208226940238e-05, + "loss": 0.4427, + "step": 1986 + }, + { + "epoch": 2.044238683127572, + "grad_norm": 4.765210151672363, + "learning_rate": 2.6297563828413177e-05, + "loss": 0.1111, + "step": 1987 + }, + { + "epoch": 2.045267489711934, + "grad_norm": 10.265494346618652, + "learning_rate": 2.628304047849053e-05, + "loss": 0.5194, + "step": 1988 + }, + { + "epoch": 2.0462962962962963, + "grad_norm": 4.825836658477783, + "learning_rate": 2.626851224368593e-05, + "loss": 0.2192, + "step": 1989 + }, + { + "epoch": 2.0473251028806585, + "grad_norm": 9.529881477355957, + "learning_rate": 2.625397914805896e-05, + "loss": 0.6813, + "step": 1990 + }, + { + "epoch": 2.0483539094650207, + "grad_norm": 8.750849723815918, + "learning_rate": 2.6239441215677242e-05, + "loss": 0.3654, + "step": 1991 + }, + { + "epoch": 2.049382716049383, + "grad_norm": 7.3342390060424805, + "learning_rate": 2.622489847061642e-05, + "loss": 0.4521, + "step": 1992 + }, + { + "epoch": 2.0504115226337447, + "grad_norm": 0.5652264356613159, + "learning_rate": 2.62103509369601e-05, + "loss": 0.025, + "step": 1993 + }, + { + "epoch": 2.051440329218107, + "grad_norm": 0.5458277463912964, + "learning_rate": 2.6195798638799823e-05, + "loss": 0.0074, + "step": 1994 + }, + { + "epoch": 2.052469135802469, + "grad_norm": 5.055678367614746, + "learning_rate": 2.6181241600235016e-05, + "loss": 0.1088, + "step": 1995 + }, + { + "epoch": 2.0534979423868314, + "grad_norm": 4.4560723304748535, + "learning_rate": 2.616667984537296e-05, + "loss": 0.1488, + "step": 1996 + }, + { + "epoch": 2.0545267489711936, + "grad_norm": 6.173320293426514, + "learning_rate": 2.6152113398328745e-05, + "loss": 0.2028, + "step": 1997 + }, + { + "epoch": 2.0555555555555554, + "grad_norm": 6.1648688316345215, + "learning_rate": 2.6137542283225232e-05, + "loss": 0.2184, + "step": 1998 + }, + { + "epoch": 2.0565843621399176, + "grad_norm": 7.91170072555542, + "learning_rate": 2.612296652419301e-05, + "loss": 0.5343, + "step": 1999 + }, + { + "epoch": 2.05761316872428, + "grad_norm": 4.491048812866211, + "learning_rate": 2.6108386145370363e-05, + "loss": 0.1262, + "step": 2000 + }, + { + "epoch": 2.05761316872428, + "eval_Qnli-dev_cosine_accuracy": 0.70703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7682234048843384, + "eval_Qnli-dev_cosine_ap": 0.7510537802067643, + "eval_Qnli-dev_cosine_f1": 0.6835443037974683, + "eval_Qnli-dev_cosine_f1_threshold": 0.6483661532402039, + "eval_Qnli-dev_cosine_precision": 0.5454545454545454, + "eval_Qnli-dev_cosine_recall": 0.9152542372881356, + "eval_Qnli-dev_dot_accuracy": 0.66015625, + "eval_Qnli-dev_dot_accuracy_threshold": 359.16650390625, + "eval_Qnli-dev_dot_ap": 0.6893133438021438, + "eval_Qnli-dev_dot_f1": 0.6696696696696697, + "eval_Qnli-dev_dot_f1_threshold": 266.4525146484375, + "eval_Qnli-dev_dot_precision": 0.5186046511627908, + "eval_Qnli-dev_dot_recall": 0.9449152542372882, + "eval_Qnli-dev_euclidean_accuracy": 0.7109375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.601415634155273, + "eval_Qnli-dev_euclidean_ap": 0.7605293151101415, + "eval_Qnli-dev_euclidean_f1": 0.6851549755301795, + "eval_Qnli-dev_euclidean_f1_threshold": 17.26052474975586, + "eval_Qnli-dev_euclidean_precision": 0.5570291777188329, + "eval_Qnli-dev_euclidean_recall": 0.8898305084745762, + "eval_Qnli-dev_manhattan_accuracy": 0.7109375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 290.5079040527344, + "eval_Qnli-dev_manhattan_ap": 0.7626097117636834, + "eval_Qnli-dev_manhattan_f1": 0.6870503597122302, + "eval_Qnli-dev_manhattan_f1_threshold": 342.42266845703125, + "eval_Qnli-dev_manhattan_precision": 0.596875, + "eval_Qnli-dev_manhattan_recall": 0.809322033898305, + "eval_Qnli-dev_max_accuracy": 0.7109375, + "eval_Qnli-dev_max_accuracy_threshold": 359.16650390625, + "eval_Qnli-dev_max_ap": 0.7626097117636834, + "eval_Qnli-dev_max_f1": 0.6870503597122302, + "eval_Qnli-dev_max_f1_threshold": 342.42266845703125, + "eval_Qnli-dev_max_precision": 0.596875, + "eval_Qnli-dev_max_recall": 0.9449152542372882, + "eval_allNLI-dev_cosine_accuracy": 0.724609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8330150842666626, + "eval_allNLI-dev_cosine_ap": 0.6144608435852125, + "eval_allNLI-dev_cosine_f1": 0.6232558139534884, + "eval_allNLI-dev_cosine_f1_threshold": 0.7408770322799683, + "eval_allNLI-dev_cosine_precision": 0.5214007782101168, + "eval_allNLI-dev_cosine_recall": 0.7745664739884393, + "eval_allNLI-dev_dot_accuracy": 0.708984375, + "eval_allNLI-dev_dot_accuracy_threshold": 372.2314758300781, + "eval_allNLI-dev_dot_ap": 0.5396368385808891, + "eval_allNLI-dev_dot_f1": 0.5879732739420935, + "eval_allNLI-dev_dot_f1_threshold": 303.21478271484375, + "eval_allNLI-dev_dot_precision": 0.4782608695652174, + "eval_allNLI-dev_dot_recall": 0.7630057803468208, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.515340805053711, + "eval_allNLI-dev_euclidean_ap": 0.6229221967651163, + "eval_allNLI-dev_euclidean_f1": 0.6367713004484306, + "eval_allNLI-dev_euclidean_f1_threshold": 15.163028717041016, + "eval_allNLI-dev_euclidean_precision": 0.5201465201465202, + "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, + "eval_allNLI-dev_manhattan_accuracy": 0.728515625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 262.06451416015625, + "eval_allNLI-dev_manhattan_ap": 0.6200187620678886, + "eval_allNLI-dev_manhattan_f1": 0.6374133949191686, + "eval_allNLI-dev_manhattan_f1_threshold": 313.9089050292969, + "eval_allNLI-dev_manhattan_precision": 0.5307692307692308, + "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 372.2314758300781, + "eval_allNLI-dev_max_ap": 0.6229221967651163, + "eval_allNLI-dev_max_f1": 0.6374133949191686, + "eval_allNLI-dev_max_f1_threshold": 313.9089050292969, + "eval_allNLI-dev_max_precision": 0.5307692307692308, + "eval_allNLI-dev_max_recall": 0.8208092485549133, + "eval_sequential_score": 0.7626097117636834, + "eval_sts-test_pearson_cosine": 0.8439968343279236, + "eval_sts-test_pearson_dot": 0.8315464594535094, + "eval_sts-test_pearson_euclidean": 0.8690583950822408, + "eval_sts-test_pearson_manhattan": 0.8680001653553668, + "eval_sts-test_pearson_max": 0.8690583950822408, + "eval_sts-test_spearman_cosine": 0.8698704066136644, + "eval_sts-test_spearman_dot": 0.8205329493847875, + "eval_sts-test_spearman_euclidean": 0.8651157670283098, + "eval_sts-test_spearman_manhattan": 0.8637660242334838, + "eval_sts-test_spearman_max": 0.8698704066136644, + "eval_vitaminc-pairs_loss": 3.2383663654327393, + "eval_vitaminc-pairs_runtime": 3.226, + "eval_vitaminc-pairs_samples_per_second": 39.678, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 2000 + }, + { + "epoch": 2.05761316872428, + "eval_negation-triplets_loss": 1.002803087234497, + "eval_negation-triplets_runtime": 0.7586, + "eval_negation-triplets_samples_per_second": 168.721, + "eval_negation-triplets_steps_per_second": 1.318, + "step": 2000 + }, + { + "epoch": 2.05761316872428, + "eval_scitail-pairs-pos_loss": 0.13571438193321228, + "eval_scitail-pairs-pos_runtime": 0.9128, + "eval_scitail-pairs-pos_samples_per_second": 140.234, + "eval_scitail-pairs-pos_steps_per_second": 1.096, + "step": 2000 + }, + { + "epoch": 2.05761316872428, + "eval_scitail-pairs-qa_loss": 0.000615874829236418, + "eval_scitail-pairs-qa_runtime": 0.6036, + "eval_scitail-pairs-qa_samples_per_second": 212.044, + "eval_scitail-pairs-qa_steps_per_second": 1.657, + "step": 2000 + }, + { + "epoch": 2.05761316872428, + "eval_xsum-pairs_loss": 0.25147178769111633, + "eval_xsum-pairs_runtime": 3.025, + "eval_xsum-pairs_samples_per_second": 42.314, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 2000 + }, + { + "epoch": 2.05761316872428, + "eval_sciq_pairs_loss": 0.09387277066707611, + "eval_sciq_pairs_runtime": 3.5238, + "eval_sciq_pairs_samples_per_second": 36.325, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 2000 + }, + { + "epoch": 2.05761316872428, + "eval_qasc_pairs_loss": 0.16152480244636536, + "eval_qasc_pairs_runtime": 0.6221, + "eval_qasc_pairs_samples_per_second": 205.763, + "eval_qasc_pairs_steps_per_second": 1.608, + "step": 2000 + }, + { + "epoch": 2.05761316872428, + "eval_openbookqa_pairs_loss": 0.8188515901565552, + "eval_openbookqa_pairs_runtime": 0.597, + "eval_openbookqa_pairs_samples_per_second": 214.408, + "eval_openbookqa_pairs_steps_per_second": 1.675, + "step": 2000 + }, + { + "epoch": 2.05761316872428, + "eval_msmarco_pairs_loss": 0.8767502307891846, + "eval_msmarco_pairs_runtime": 1.5218, + "eval_msmarco_pairs_samples_per_second": 84.109, + "eval_msmarco_pairs_steps_per_second": 0.657, + "step": 2000 + }, + { + "epoch": 2.05761316872428, + "eval_nq_pairs_loss": 0.6645520925521851, + "eval_nq_pairs_runtime": 2.9058, + "eval_nq_pairs_samples_per_second": 44.049, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2000 + }, + { + "epoch": 2.05761316872428, + "eval_trivia_pairs_loss": 0.8132636547088623, + "eval_trivia_pairs_runtime": 3.4612, + "eval_trivia_pairs_samples_per_second": 36.982, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 2000 + }, + { + "epoch": 2.05761316872428, + "eval_gooaq_pairs_loss": 0.41634610295295715, + "eval_gooaq_pairs_runtime": 0.9655, + "eval_gooaq_pairs_samples_per_second": 132.57, + "eval_gooaq_pairs_steps_per_second": 1.036, + "step": 2000 + }, + { + "epoch": 2.05761316872428, + "eval_paws-pos_loss": 0.023877838626503944, + "eval_paws-pos_runtime": 0.7028, + "eval_paws-pos_samples_per_second": 182.128, + "eval_paws-pos_steps_per_second": 1.423, + "step": 2000 + }, + { + "epoch": 2.05761316872428, + "eval_global_dataset_loss": 0.46081268787384033, + "eval_global_dataset_runtime": 13.403, + "eval_global_dataset_samples_per_second": 31.038, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2000 + }, + { + "epoch": 2.058641975308642, + "grad_norm": 0.42414379119873047, + "learning_rate": 2.609380117090322e-05, + "loss": 0.0128, + "step": 2001 + }, + { + "epoch": 2.0596707818930042, + "grad_norm": 11.059983253479004, + "learning_rate": 2.6079211624945133e-05, + "loss": 0.7624, + "step": 2002 + }, + { + "epoch": 2.0606995884773665, + "grad_norm": 0.11559353023767471, + "learning_rate": 2.6064617531657205e-05, + "loss": 0.0013, + "step": 2003 + }, + { + "epoch": 2.0617283950617282, + "grad_norm": 9.283576965332031, + "learning_rate": 2.6050018915208087e-05, + "loss": 0.4478, + "step": 2004 + }, + { + "epoch": 2.0627572016460904, + "grad_norm": 2.7311601638793945, + "learning_rate": 2.603541579977391e-05, + "loss": 0.0241, + "step": 2005 + }, + { + "epoch": 2.0637860082304527, + "grad_norm": 0.4573628902435303, + "learning_rate": 2.602080820953826e-05, + "loss": 0.0063, + "step": 2006 + }, + { + "epoch": 2.064814814814815, + "grad_norm": 10.511451721191406, + "learning_rate": 2.600619616869214e-05, + "loss": 0.6027, + "step": 2007 + }, + { + "epoch": 2.065843621399177, + "grad_norm": 11.383633613586426, + "learning_rate": 2.5991579701433906e-05, + "loss": 0.6883, + "step": 2008 + }, + { + "epoch": 2.0668724279835393, + "grad_norm": 6.3434648513793945, + "learning_rate": 2.597695883196926e-05, + "loss": 0.1842, + "step": 2009 + }, + { + "epoch": 2.067901234567901, + "grad_norm": 6.79525899887085, + "learning_rate": 2.596233358451119e-05, + "loss": 0.2991, + "step": 2010 + }, + { + "epoch": 2.0689300411522633, + "grad_norm": 3.5601322650909424, + "learning_rate": 2.5947703983279935e-05, + "loss": 0.096, + "step": 2011 + }, + { + "epoch": 2.0699588477366255, + "grad_norm": 4.522330284118652, + "learning_rate": 2.5933070052502936e-05, + "loss": 0.2244, + "step": 2012 + }, + { + "epoch": 2.0709876543209877, + "grad_norm": 9.466999053955078, + "learning_rate": 2.5918431816414814e-05, + "loss": 0.5941, + "step": 2013 + }, + { + "epoch": 2.07201646090535, + "grad_norm": 8.346899032592773, + "learning_rate": 2.5903789299257317e-05, + "loss": 0.4248, + "step": 2014 + }, + { + "epoch": 2.0730452674897117, + "grad_norm": 5.5073370933532715, + "learning_rate": 2.5889142525279284e-05, + "loss": 0.3148, + "step": 2015 + }, + { + "epoch": 2.074074074074074, + "grad_norm": 5.379796981811523, + "learning_rate": 2.5874491518736604e-05, + "loss": 0.1327, + "step": 2016 + }, + { + "epoch": 2.075102880658436, + "grad_norm": 9.268583297729492, + "learning_rate": 2.5859836303892156e-05, + "loss": 0.447, + "step": 2017 + }, + { + "epoch": 2.0761316872427984, + "grad_norm": 2.687713146209717, + "learning_rate": 2.584517690501583e-05, + "loss": 0.0471, + "step": 2018 + }, + { + "epoch": 2.0771604938271606, + "grad_norm": 0.9793359041213989, + "learning_rate": 2.5830513346384398e-05, + "loss": 0.0168, + "step": 2019 + }, + { + "epoch": 2.078189300411523, + "grad_norm": 9.080259323120117, + "learning_rate": 2.581584565228156e-05, + "loss": 0.4154, + "step": 2020 + }, + { + "epoch": 2.078189300411523, + "eval_Qnli-dev_cosine_accuracy": 0.70703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7904222011566162, + "eval_Qnli-dev_cosine_ap": 0.7518479061341453, + "eval_Qnli-dev_cosine_f1": 0.6824324324324325, + "eval_Qnli-dev_cosine_f1_threshold": 0.6756579279899597, + "eval_Qnli-dev_cosine_precision": 0.5674157303370787, + "eval_Qnli-dev_cosine_recall": 0.8559322033898306, + "eval_Qnli-dev_dot_accuracy": 0.66796875, + "eval_Qnli-dev_dot_accuracy_threshold": 370.0274658203125, + "eval_Qnli-dev_dot_ap": 0.6955155895020637, + "eval_Qnli-dev_dot_f1": 0.6616084977238239, + "eval_Qnli-dev_dot_f1_threshold": 267.6722717285156, + "eval_Qnli-dev_dot_precision": 0.5153664302600472, + "eval_Qnli-dev_dot_recall": 0.923728813559322, + "eval_Qnli-dev_euclidean_accuracy": 0.71484375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.895347595214844, + "eval_Qnli-dev_euclidean_ap": 0.7601878991676787, + "eval_Qnli-dev_euclidean_f1": 0.6854130052724077, + "eval_Qnli-dev_euclidean_f1_threshold": 16.323875427246094, + "eval_Qnli-dev_euclidean_precision": 0.5855855855855856, + "eval_Qnli-dev_euclidean_recall": 0.826271186440678, + "eval_Qnli-dev_manhattan_accuracy": 0.71484375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 295.86700439453125, + "eval_Qnli-dev_manhattan_ap": 0.7636402467773312, + "eval_Qnli-dev_manhattan_f1": 0.6891651865008882, + "eval_Qnli-dev_manhattan_f1_threshold": 342.58648681640625, + "eval_Qnli-dev_manhattan_precision": 0.5932721712538226, + "eval_Qnli-dev_manhattan_recall": 0.8220338983050848, + "eval_Qnli-dev_max_accuracy": 0.71484375, + "eval_Qnli-dev_max_accuracy_threshold": 370.0274658203125, + "eval_Qnli-dev_max_ap": 0.7636402467773312, + "eval_Qnli-dev_max_f1": 0.6891651865008882, + "eval_Qnli-dev_max_f1_threshold": 342.58648681640625, + "eval_Qnli-dev_max_precision": 0.5932721712538226, + "eval_Qnli-dev_max_recall": 0.923728813559322, + "eval_allNLI-dev_cosine_accuracy": 0.736328125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8497622013092041, + "eval_allNLI-dev_cosine_ap": 0.6279407506111134, + "eval_allNLI-dev_cosine_f1": 0.6337078651685394, + "eval_allNLI-dev_cosine_f1_threshold": 0.7338807582855225, + "eval_allNLI-dev_cosine_precision": 0.5183823529411765, + "eval_allNLI-dev_cosine_recall": 0.815028901734104, + "eval_allNLI-dev_dot_accuracy": 0.712890625, + "eval_allNLI-dev_dot_accuracy_threshold": 371.5069580078125, + "eval_allNLI-dev_dot_ap": 0.5554045589310075, + "eval_allNLI-dev_dot_f1": 0.5982142857142857, + "eval_allNLI-dev_dot_f1_threshold": 308.48822021484375, + "eval_allNLI-dev_dot_precision": 0.48727272727272725, + "eval_allNLI-dev_dot_recall": 0.7745664739884393, + "eval_allNLI-dev_euclidean_accuracy": 0.7421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.008502960205078, + "eval_allNLI-dev_euclidean_ap": 0.6391160730245441, + "eval_allNLI-dev_euclidean_f1": 0.6438356164383561, + "eval_allNLI-dev_euclidean_f1_threshold": 15.02491569519043, + "eval_allNLI-dev_euclidean_precision": 0.5320754716981132, + "eval_allNLI-dev_euclidean_recall": 0.815028901734104, + "eval_allNLI-dev_manhattan_accuracy": 0.734375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 238.62452697753906, + "eval_allNLI-dev_manhattan_ap": 0.634583204732944, + "eval_allNLI-dev_manhattan_f1": 0.6514806378132119, + "eval_allNLI-dev_manhattan_f1_threshold": 316.03973388671875, + "eval_allNLI-dev_manhattan_precision": 0.5375939849624061, + "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, + "eval_allNLI-dev_max_accuracy": 0.7421875, + "eval_allNLI-dev_max_accuracy_threshold": 371.5069580078125, + "eval_allNLI-dev_max_ap": 0.6391160730245441, + "eval_allNLI-dev_max_f1": 0.6514806378132119, + "eval_allNLI-dev_max_f1_threshold": 316.03973388671875, + "eval_allNLI-dev_max_precision": 0.5375939849624061, + "eval_allNLI-dev_max_recall": 0.8265895953757225, + "eval_sequential_score": 0.7636402467773312, + "eval_sts-test_pearson_cosine": 0.8422361279891084, + "eval_sts-test_pearson_dot": 0.8296134956353065, + "eval_sts-test_pearson_euclidean": 0.8714590085163401, + "eval_sts-test_pearson_manhattan": 0.8698635898461213, + "eval_sts-test_pearson_max": 0.8714590085163401, + "eval_sts-test_spearman_cosine": 0.8724577037234441, + "eval_sts-test_spearman_dot": 0.8229498307587528, + "eval_sts-test_spearman_euclidean": 0.868813340000774, + "eval_sts-test_spearman_manhattan": 0.8667433256538535, + "eval_sts-test_spearman_max": 0.8724577037234441, + "eval_vitaminc-pairs_loss": 3.5048067569732666, + "eval_vitaminc-pairs_runtime": 3.2149, + "eval_vitaminc-pairs_samples_per_second": 39.815, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 2020 + }, + { + "epoch": 2.078189300411523, + "eval_negation-triplets_loss": 0.99112468957901, + "eval_negation-triplets_runtime": 0.7681, + "eval_negation-triplets_samples_per_second": 166.635, + "eval_negation-triplets_steps_per_second": 1.302, + "step": 2020 + }, + { + "epoch": 2.078189300411523, + "eval_scitail-pairs-pos_loss": 0.14013606309890747, + "eval_scitail-pairs-pos_runtime": 0.9281, + "eval_scitail-pairs-pos_samples_per_second": 137.913, + "eval_scitail-pairs-pos_steps_per_second": 1.077, + "step": 2020 + }, + { + "epoch": 2.078189300411523, + "eval_scitail-pairs-qa_loss": 0.00047457279288209975, + "eval_scitail-pairs-qa_runtime": 0.6071, + "eval_scitail-pairs-qa_samples_per_second": 210.837, + "eval_scitail-pairs-qa_steps_per_second": 1.647, + "step": 2020 + }, + { + "epoch": 2.078189300411523, + "eval_xsum-pairs_loss": 0.26533740758895874, + "eval_xsum-pairs_runtime": 3.0384, + "eval_xsum-pairs_samples_per_second": 42.127, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2020 + }, + { + "epoch": 2.078189300411523, + "eval_sciq_pairs_loss": 0.09102991223335266, + "eval_sciq_pairs_runtime": 3.517, + "eval_sciq_pairs_samples_per_second": 36.395, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 2020 + }, + { + "epoch": 2.078189300411523, + "eval_qasc_pairs_loss": 0.16256316006183624, + "eval_qasc_pairs_runtime": 0.6336, + "eval_qasc_pairs_samples_per_second": 202.012, + "eval_qasc_pairs_steps_per_second": 1.578, + "step": 2020 + }, + { + "epoch": 2.078189300411523, + "eval_openbookqa_pairs_loss": 0.8943702578544617, + "eval_openbookqa_pairs_runtime": 0.6033, + "eval_openbookqa_pairs_samples_per_second": 212.157, + "eval_openbookqa_pairs_steps_per_second": 1.657, + "step": 2020 + }, + { + "epoch": 2.078189300411523, + "eval_msmarco_pairs_loss": 0.8894786834716797, + "eval_msmarco_pairs_runtime": 1.5394, + "eval_msmarco_pairs_samples_per_second": 83.152, + "eval_msmarco_pairs_steps_per_second": 0.65, + "step": 2020 + }, + { + "epoch": 2.078189300411523, + "eval_nq_pairs_loss": 0.667719841003418, + "eval_nq_pairs_runtime": 2.906, + "eval_nq_pairs_samples_per_second": 44.047, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2020 + }, + { + "epoch": 2.078189300411523, + "eval_trivia_pairs_loss": 0.7819646000862122, + "eval_trivia_pairs_runtime": 3.4554, + "eval_trivia_pairs_samples_per_second": 37.044, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 2020 + }, + { + "epoch": 2.078189300411523, + "eval_gooaq_pairs_loss": 0.4292497932910919, + "eval_gooaq_pairs_runtime": 0.9612, + "eval_gooaq_pairs_samples_per_second": 133.169, + "eval_gooaq_pairs_steps_per_second": 1.04, + "step": 2020 + }, + { + "epoch": 2.078189300411523, + "eval_paws-pos_loss": 0.02286423370242119, + "eval_paws-pos_runtime": 0.7141, + "eval_paws-pos_samples_per_second": 179.242, + "eval_paws-pos_steps_per_second": 1.4, + "step": 2020 + }, + { + "epoch": 2.078189300411523, + "eval_global_dataset_loss": 0.5070220232009888, + "eval_global_dataset_runtime": 13.4139, + "eval_global_dataset_samples_per_second": 31.013, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2020 + }, + { + "epoch": 2.0792181069958846, + "grad_norm": 10.09373664855957, + "learning_rate": 2.580117384699783e-05, + "loss": 0.5803, + "step": 2021 + }, + { + "epoch": 2.080246913580247, + "grad_norm": 3.2723073959350586, + "learning_rate": 2.5786497954830568e-05, + "loss": 0.0786, + "step": 2022 + }, + { + "epoch": 2.081275720164609, + "grad_norm": 4.15331506729126, + "learning_rate": 2.5771818000083865e-05, + "loss": 0.0877, + "step": 2023 + }, + { + "epoch": 2.0823045267489713, + "grad_norm": 7.957801342010498, + "learning_rate": 2.575713400706857e-05, + "loss": 0.3655, + "step": 2024 + }, + { + "epoch": 2.0833333333333335, + "grad_norm": 8.468852043151855, + "learning_rate": 2.574244600010219e-05, + "loss": 0.2755, + "step": 2025 + }, + { + "epoch": 2.0843621399176957, + "grad_norm": 4.392659664154053, + "learning_rate": 2.572775400350891e-05, + "loss": 0.0915, + "step": 2026 + }, + { + "epoch": 2.0853909465020575, + "grad_norm": 2.75890851020813, + "learning_rate": 2.5713058041619504e-05, + "loss": 0.0344, + "step": 2027 + }, + { + "epoch": 2.0864197530864197, + "grad_norm": 5.2819437980651855, + "learning_rate": 2.5698358138771318e-05, + "loss": 0.1478, + "step": 2028 + }, + { + "epoch": 2.087448559670782, + "grad_norm": 4.025913238525391, + "learning_rate": 2.568365431930822e-05, + "loss": 0.1627, + "step": 2029 + }, + { + "epoch": 2.088477366255144, + "grad_norm": 4.903158187866211, + "learning_rate": 2.566894660758056e-05, + "loss": 0.1453, + "step": 2030 + }, + { + "epoch": 2.0895061728395063, + "grad_norm": 4.8980393409729, + "learning_rate": 2.5654235027945153e-05, + "loss": 0.1726, + "step": 2031 + }, + { + "epoch": 2.090534979423868, + "grad_norm": 10.14993953704834, + "learning_rate": 2.5639519604765206e-05, + "loss": 0.6505, + "step": 2032 + }, + { + "epoch": 2.0915637860082303, + "grad_norm": 5.390047550201416, + "learning_rate": 2.5624800362410293e-05, + "loss": 0.1241, + "step": 2033 + }, + { + "epoch": 2.0925925925925926, + "grad_norm": 8.93951416015625, + "learning_rate": 2.5610077325256296e-05, + "loss": 0.4726, + "step": 2034 + }, + { + "epoch": 2.093621399176955, + "grad_norm": 1.5915225744247437, + "learning_rate": 2.559535051768542e-05, + "loss": 0.0298, + "step": 2035 + }, + { + "epoch": 2.094650205761317, + "grad_norm": 9.25654125213623, + "learning_rate": 2.558061996408608e-05, + "loss": 0.4278, + "step": 2036 + }, + { + "epoch": 2.095679012345679, + "grad_norm": 5.480092525482178, + "learning_rate": 2.556588568885291e-05, + "loss": 0.1557, + "step": 2037 + }, + { + "epoch": 2.096707818930041, + "grad_norm": 10.635049819946289, + "learning_rate": 2.5551147716386696e-05, + "loss": 0.499, + "step": 2038 + }, + { + "epoch": 2.097736625514403, + "grad_norm": 4.839813709259033, + "learning_rate": 2.553640607109437e-05, + "loss": 0.1144, + "step": 2039 + }, + { + "epoch": 2.0987654320987654, + "grad_norm": 0.09921301901340485, + "learning_rate": 2.5521660777388916e-05, + "loss": 0.0013, + "step": 2040 + }, + { + "epoch": 2.0987654320987654, + "eval_Qnli-dev_cosine_accuracy": 0.71875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7668793201446533, + "eval_Qnli-dev_cosine_ap": 0.7622251014689494, + "eval_Qnli-dev_cosine_f1": 0.6905537459283388, + "eval_Qnli-dev_cosine_f1_threshold": 0.6393879652023315, + "eval_Qnli-dev_cosine_precision": 0.5608465608465608, + "eval_Qnli-dev_cosine_recall": 0.8983050847457628, + "eval_Qnli-dev_dot_accuracy": 0.673828125, + "eval_Qnli-dev_dot_accuracy_threshold": 346.4505615234375, + "eval_Qnli-dev_dot_ap": 0.7133157603299303, + "eval_Qnli-dev_dot_f1": 0.6750788643533123, + "eval_Qnli-dev_dot_f1_threshold": 265.27191162109375, + "eval_Qnli-dev_dot_precision": 0.5376884422110553, + "eval_Qnli-dev_dot_recall": 0.9067796610169492, + "eval_Qnli-dev_euclidean_accuracy": 0.728515625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.158382415771484, + "eval_Qnli-dev_euclidean_ap": 0.7683574103231411, + "eval_Qnli-dev_euclidean_f1": 0.6934865900383143, + "eval_Qnli-dev_euclidean_f1_threshold": 16.00537872314453, + "eval_Qnli-dev_euclidean_precision": 0.6328671328671329, + "eval_Qnli-dev_euclidean_recall": 0.7669491525423728, + "eval_Qnli-dev_manhattan_accuracy": 0.71875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 297.2159423828125, + "eval_Qnli-dev_manhattan_ap": 0.7687026270805394, + "eval_Qnli-dev_manhattan_f1": 0.6946564885496184, + "eval_Qnli-dev_manhattan_f1_threshold": 338.4189453125, + "eval_Qnli-dev_manhattan_precision": 0.6319444444444444, + "eval_Qnli-dev_manhattan_recall": 0.7711864406779662, + "eval_Qnli-dev_max_accuracy": 0.728515625, + "eval_Qnli-dev_max_accuracy_threshold": 346.4505615234375, + "eval_Qnli-dev_max_ap": 0.7687026270805394, + "eval_Qnli-dev_max_f1": 0.6946564885496184, + "eval_Qnli-dev_max_f1_threshold": 338.4189453125, + "eval_Qnli-dev_max_precision": 0.6328671328671329, + "eval_Qnli-dev_max_recall": 0.9067796610169492, + "eval_allNLI-dev_cosine_accuracy": 0.736328125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8452146053314209, + "eval_allNLI-dev_cosine_ap": 0.6275018164212229, + "eval_allNLI-dev_cosine_f1": 0.639618138424821, + "eval_allNLI-dev_cosine_f1_threshold": 0.7643710970878601, + "eval_allNLI-dev_cosine_precision": 0.5447154471544715, + "eval_allNLI-dev_cosine_recall": 0.7745664739884393, + "eval_allNLI-dev_dot_accuracy": 0.701171875, + "eval_allNLI-dev_dot_accuracy_threshold": 361.28192138671875, + "eval_allNLI-dev_dot_ap": 0.5517615704791942, + "eval_allNLI-dev_dot_f1": 0.592039800995025, + "eval_allNLI-dev_dot_f1_threshold": 328.10443115234375, + "eval_allNLI-dev_dot_precision": 0.519650655021834, + "eval_allNLI-dev_dot_recall": 0.6878612716763006, + "eval_allNLI-dev_euclidean_accuracy": 0.744140625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.57768726348877, + "eval_allNLI-dev_euclidean_ap": 0.6387425565934844, + "eval_allNLI-dev_euclidean_f1": 0.6481012658227848, + "eval_allNLI-dev_euclidean_f1_threshold": 13.804973602294922, + "eval_allNLI-dev_euclidean_precision": 0.5765765765765766, + "eval_allNLI-dev_euclidean_recall": 0.7398843930635838, + "eval_allNLI-dev_manhattan_accuracy": 0.740234375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 256.33929443359375, + "eval_allNLI-dev_manhattan_ap": 0.634668026581342, + "eval_allNLI-dev_manhattan_f1": 0.6467661691542288, + "eval_allNLI-dev_manhattan_f1_threshold": 295.89422607421875, + "eval_allNLI-dev_manhattan_precision": 0.5676855895196506, + "eval_allNLI-dev_manhattan_recall": 0.7514450867052023, + "eval_allNLI-dev_max_accuracy": 0.744140625, + "eval_allNLI-dev_max_accuracy_threshold": 361.28192138671875, + "eval_allNLI-dev_max_ap": 0.6387425565934844, + "eval_allNLI-dev_max_f1": 0.6481012658227848, + "eval_allNLI-dev_max_f1_threshold": 328.10443115234375, + "eval_allNLI-dev_max_precision": 0.5765765765765766, + "eval_allNLI-dev_max_recall": 0.7745664739884393, + "eval_sequential_score": 0.7687026270805394, + "eval_sts-test_pearson_cosine": 0.8345019444404698, + "eval_sts-test_pearson_dot": 0.8151075704022561, + "eval_sts-test_pearson_euclidean": 0.8657053003604382, + "eval_sts-test_pearson_manhattan": 0.8633038097057222, + "eval_sts-test_pearson_max": 0.8657053003604382, + "eval_sts-test_spearman_cosine": 0.8688792138575424, + "eval_sts-test_spearman_dot": 0.8095249893564697, + "eval_sts-test_spearman_euclidean": 0.8656618219198077, + "eval_sts-test_spearman_manhattan": 0.8632414051961924, + "eval_sts-test_spearman_max": 0.8688792138575424, + "eval_vitaminc-pairs_loss": 3.372237205505371, + "eval_vitaminc-pairs_runtime": 3.2264, + "eval_vitaminc-pairs_samples_per_second": 39.673, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 2040 + }, + { + "epoch": 2.0987654320987654, + "eval_negation-triplets_loss": 0.9596391320228577, + "eval_negation-triplets_runtime": 0.7685, + "eval_negation-triplets_samples_per_second": 166.552, + "eval_negation-triplets_steps_per_second": 1.301, + "step": 2040 + }, + { + "epoch": 2.0987654320987654, + "eval_scitail-pairs-pos_loss": 0.14105528593063354, + "eval_scitail-pairs-pos_runtime": 0.9, + "eval_scitail-pairs-pos_samples_per_second": 142.217, + "eval_scitail-pairs-pos_steps_per_second": 1.111, + "step": 2040 + }, + { + "epoch": 2.0987654320987654, + "eval_scitail-pairs-qa_loss": 0.0004064729146193713, + "eval_scitail-pairs-qa_runtime": 0.6026, + "eval_scitail-pairs-qa_samples_per_second": 212.415, + "eval_scitail-pairs-qa_steps_per_second": 1.659, + "step": 2040 + }, + { + "epoch": 2.0987654320987654, + "eval_xsum-pairs_loss": 0.25447919964790344, + "eval_xsum-pairs_runtime": 3.0356, + "eval_xsum-pairs_samples_per_second": 42.166, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2040 + }, + { + "epoch": 2.0987654320987654, + "eval_sciq_pairs_loss": 0.09601989388465881, + "eval_sciq_pairs_runtime": 3.5482, + "eval_sciq_pairs_samples_per_second": 36.074, + "eval_sciq_pairs_steps_per_second": 0.282, + "step": 2040 + }, + { + "epoch": 2.0987654320987654, + "eval_qasc_pairs_loss": 0.16032877564430237, + "eval_qasc_pairs_runtime": 0.628, + "eval_qasc_pairs_samples_per_second": 203.829, + "eval_qasc_pairs_steps_per_second": 1.592, + "step": 2040 + }, + { + "epoch": 2.0987654320987654, + "eval_openbookqa_pairs_loss": 0.8722372055053711, + "eval_openbookqa_pairs_runtime": 0.5946, + "eval_openbookqa_pairs_samples_per_second": 215.28, + "eval_openbookqa_pairs_steps_per_second": 1.682, + "step": 2040 + }, + { + "epoch": 2.0987654320987654, + "eval_msmarco_pairs_loss": 0.9411458969116211, + "eval_msmarco_pairs_runtime": 1.5246, + "eval_msmarco_pairs_samples_per_second": 83.956, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 2040 + }, + { + "epoch": 2.0987654320987654, + "eval_nq_pairs_loss": 0.6812416911125183, + "eval_nq_pairs_runtime": 2.9197, + "eval_nq_pairs_samples_per_second": 43.84, + "eval_nq_pairs_steps_per_second": 0.342, + "step": 2040 + }, + { + "epoch": 2.0987654320987654, + "eval_trivia_pairs_loss": 0.8062442541122437, + "eval_trivia_pairs_runtime": 3.4404, + "eval_trivia_pairs_samples_per_second": 37.205, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 2040 + }, + { + "epoch": 2.0987654320987654, + "eval_gooaq_pairs_loss": 0.4616175889968872, + "eval_gooaq_pairs_runtime": 0.9621, + "eval_gooaq_pairs_samples_per_second": 133.039, + "eval_gooaq_pairs_steps_per_second": 1.039, + "step": 2040 + }, + { + "epoch": 2.0987654320987654, + "eval_paws-pos_loss": 0.02278071828186512, + "eval_paws-pos_runtime": 0.7094, + "eval_paws-pos_samples_per_second": 180.443, + "eval_paws-pos_steps_per_second": 1.41, + "step": 2040 + }, + { + "epoch": 2.0987654320987654, + "eval_global_dataset_loss": 0.4955148696899414, + "eval_global_dataset_runtime": 13.4048, + "eval_global_dataset_samples_per_second": 31.034, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2040 + }, + { + "epoch": 2.0997942386831276, + "grad_norm": 3.425891876220703, + "learning_rate": 2.5506911859689383e-05, + "loss": 0.0587, + "step": 2041 + }, + { + "epoch": 2.10082304526749, + "grad_norm": 9.27571964263916, + "learning_rate": 2.5492159342420798e-05, + "loss": 0.3979, + "step": 2042 + }, + { + "epoch": 2.1018518518518516, + "grad_norm": 0.14763645827770233, + "learning_rate": 2.547740325001419e-05, + "loss": 0.0009, + "step": 2043 + }, + { + "epoch": 2.102880658436214, + "grad_norm": 7.345943450927734, + "learning_rate": 2.5462643606906473e-05, + "loss": 0.308, + "step": 2044 + }, + { + "epoch": 2.103909465020576, + "grad_norm": 8.69579792022705, + "learning_rate": 2.544788043754044e-05, + "loss": 0.4668, + "step": 2045 + }, + { + "epoch": 2.1049382716049383, + "grad_norm": 12.675374031066895, + "learning_rate": 2.5433113766364742e-05, + "loss": 0.6717, + "step": 2046 + }, + { + "epoch": 2.1059670781893005, + "grad_norm": 13.579185485839844, + "learning_rate": 2.5418343617833826e-05, + "loss": 0.9457, + "step": 2047 + }, + { + "epoch": 2.1069958847736627, + "grad_norm": 0.1838522106409073, + "learning_rate": 2.5403570016407892e-05, + "loss": 0.0023, + "step": 2048 + }, + { + "epoch": 2.1080246913580245, + "grad_norm": 6.007643222808838, + "learning_rate": 2.5388792986552858e-05, + "loss": 0.3713, + "step": 2049 + }, + { + "epoch": 2.1090534979423867, + "grad_norm": 9.373748779296875, + "learning_rate": 2.537401255274032e-05, + "loss": 0.5438, + "step": 2050 + }, + { + "epoch": 2.110082304526749, + "grad_norm": 5.532814025878906, + "learning_rate": 2.535922873944752e-05, + "loss": 0.0994, + "step": 2051 + }, + { + "epoch": 2.111111111111111, + "grad_norm": 5.089415550231934, + "learning_rate": 2.5344441571157284e-05, + "loss": 0.1818, + "step": 2052 + }, + { + "epoch": 2.1121399176954734, + "grad_norm": 3.955972909927368, + "learning_rate": 2.5329651072357998e-05, + "loss": 0.1319, + "step": 2053 + }, + { + "epoch": 2.1131687242798356, + "grad_norm": 3.837581157684326, + "learning_rate": 2.5314857267543565e-05, + "loss": 0.0868, + "step": 2054 + }, + { + "epoch": 2.1141975308641974, + "grad_norm": 10.354877471923828, + "learning_rate": 2.530006018121337e-05, + "loss": 0.5951, + "step": 2055 + }, + { + "epoch": 2.1152263374485596, + "grad_norm": 5.332611560821533, + "learning_rate": 2.528525983787222e-05, + "loss": 0.2214, + "step": 2056 + }, + { + "epoch": 2.116255144032922, + "grad_norm": 5.781221389770508, + "learning_rate": 2.527045626203032e-05, + "loss": 0.1633, + "step": 2057 + }, + { + "epoch": 2.117283950617284, + "grad_norm": 4.2456865310668945, + "learning_rate": 2.5255649478203235e-05, + "loss": 0.128, + "step": 2058 + }, + { + "epoch": 2.1183127572016462, + "grad_norm": 2.930485963821411, + "learning_rate": 2.5240839510911826e-05, + "loss": 0.0727, + "step": 2059 + }, + { + "epoch": 2.119341563786008, + "grad_norm": 3.6109559535980225, + "learning_rate": 2.5226026384682246e-05, + "loss": 0.1023, + "step": 2060 + }, + { + "epoch": 2.119341563786008, + "eval_Qnli-dev_cosine_accuracy": 0.720703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7480126023292542, + "eval_Qnli-dev_cosine_ap": 0.767977183157839, + "eval_Qnli-dev_cosine_f1": 0.7039337474120082, + "eval_Qnli-dev_cosine_f1_threshold": 0.7480126023292542, + "eval_Qnli-dev_cosine_precision": 0.6882591093117408, + "eval_Qnli-dev_cosine_recall": 0.7203389830508474, + "eval_Qnli-dev_dot_accuracy": 0.67578125, + "eval_Qnli-dev_dot_accuracy_threshold": 341.8458251953125, + "eval_Qnli-dev_dot_ap": 0.6935774131125908, + "eval_Qnli-dev_dot_f1": 0.6687898089171974, + "eval_Qnli-dev_dot_f1_threshold": 286.7041015625, + "eval_Qnli-dev_dot_precision": 0.5357142857142857, + "eval_Qnli-dev_dot_recall": 0.8898305084745762, + "eval_Qnli-dev_euclidean_accuracy": 0.728515625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.393925666809082, + "eval_Qnli-dev_euclidean_ap": 0.7771121353577738, + "eval_Qnli-dev_euclidean_f1": 0.7071428571428572, + "eval_Qnli-dev_euclidean_f1_threshold": 16.4425048828125, + "eval_Qnli-dev_euclidean_precision": 0.6111111111111112, + "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, + "eval_Qnli-dev_manhattan_accuracy": 0.72265625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 305.10614013671875, + "eval_Qnli-dev_manhattan_ap": 0.7774678086751587, + "eval_Qnli-dev_manhattan_f1": 0.7065217391304347, + "eval_Qnli-dev_manhattan_f1_threshold": 344.903564453125, + "eval_Qnli-dev_manhattan_precision": 0.6170886075949367, + "eval_Qnli-dev_manhattan_recall": 0.826271186440678, + "eval_Qnli-dev_max_accuracy": 0.728515625, + "eval_Qnli-dev_max_accuracy_threshold": 341.8458251953125, + "eval_Qnli-dev_max_ap": 0.7774678086751587, + "eval_Qnli-dev_max_f1": 0.7071428571428572, + "eval_Qnli-dev_max_f1_threshold": 344.903564453125, + "eval_Qnli-dev_max_precision": 0.6882591093117408, + "eval_Qnli-dev_max_recall": 0.8898305084745762, + "eval_allNLI-dev_cosine_accuracy": 0.736328125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8361479640007019, + "eval_allNLI-dev_cosine_ap": 0.6224493771747687, + "eval_allNLI-dev_cosine_f1": 0.6384976525821597, + "eval_allNLI-dev_cosine_f1_threshold": 0.7641709446907043, + "eval_allNLI-dev_cosine_precision": 0.5375494071146245, + "eval_allNLI-dev_cosine_recall": 0.7861271676300579, + "eval_allNLI-dev_dot_accuracy": 0.697265625, + "eval_allNLI-dev_dot_accuracy_threshold": 356.0317687988281, + "eval_allNLI-dev_dot_ap": 0.5510419909533901, + "eval_allNLI-dev_dot_f1": 0.5950782997762865, + "eval_allNLI-dev_dot_f1_threshold": 327.3216552734375, + "eval_allNLI-dev_dot_precision": 0.4854014598540146, + "eval_allNLI-dev_dot_recall": 0.7687861271676301, + "eval_allNLI-dev_euclidean_accuracy": 0.74609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.004873275756836, + "eval_allNLI-dev_euclidean_ap": 0.6341950766090391, + "eval_allNLI-dev_euclidean_f1": 0.657074340527578, + "eval_allNLI-dev_euclidean_f1_threshold": 14.403692245483398, + "eval_allNLI-dev_euclidean_precision": 0.5614754098360656, + "eval_allNLI-dev_euclidean_recall": 0.791907514450867, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 249.1562957763672, + "eval_allNLI-dev_manhattan_ap": 0.6308801066932301, + "eval_allNLI-dev_manhattan_f1": 0.6538461538461539, + "eval_allNLI-dev_manhattan_f1_threshold": 302.3179626464844, + "eval_allNLI-dev_manhattan_precision": 0.5596707818930041, + "eval_allNLI-dev_manhattan_recall": 0.7861271676300579, + "eval_allNLI-dev_max_accuracy": 0.74609375, + "eval_allNLI-dev_max_accuracy_threshold": 356.0317687988281, + "eval_allNLI-dev_max_ap": 0.6341950766090391, + "eval_allNLI-dev_max_f1": 0.657074340527578, + "eval_allNLI-dev_max_f1_threshold": 327.3216552734375, + "eval_allNLI-dev_max_precision": 0.5614754098360656, + "eval_allNLI-dev_max_recall": 0.791907514450867, + "eval_sequential_score": 0.7774678086751587, + "eval_sts-test_pearson_cosine": 0.8399665361475932, + "eval_sts-test_pearson_dot": 0.816581123696129, + "eval_sts-test_pearson_euclidean": 0.8717879622779746, + "eval_sts-test_pearson_manhattan": 0.8697766668566801, + "eval_sts-test_pearson_max": 0.8717879622779746, + "eval_sts-test_spearman_cosine": 0.8709748259259787, + "eval_sts-test_spearman_dot": 0.8050431952793824, + "eval_sts-test_spearman_euclidean": 0.8700087356950612, + "eval_sts-test_spearman_manhattan": 0.8682787414321924, + "eval_sts-test_spearman_max": 0.8709748259259787, + "eval_vitaminc-pairs_loss": 3.4252164363861084, + "eval_vitaminc-pairs_runtime": 3.2375, + "eval_vitaminc-pairs_samples_per_second": 39.537, + "eval_vitaminc-pairs_steps_per_second": 0.309, + "step": 2060 + }, + { + "epoch": 2.119341563786008, + "eval_negation-triplets_loss": 0.9267296195030212, + "eval_negation-triplets_runtime": 0.7748, + "eval_negation-triplets_samples_per_second": 165.196, + "eval_negation-triplets_steps_per_second": 1.291, + "step": 2060 + }, + { + "epoch": 2.119341563786008, + "eval_scitail-pairs-pos_loss": 0.13756400346755981, + "eval_scitail-pairs-pos_runtime": 0.897, + "eval_scitail-pairs-pos_samples_per_second": 142.704, + "eval_scitail-pairs-pos_steps_per_second": 1.115, + "step": 2060 + }, + { + "epoch": 2.119341563786008, + "eval_scitail-pairs-qa_loss": 0.0004476790491025895, + "eval_scitail-pairs-qa_runtime": 0.6013, + "eval_scitail-pairs-qa_samples_per_second": 212.858, + "eval_scitail-pairs-qa_steps_per_second": 1.663, + "step": 2060 + }, + { + "epoch": 2.119341563786008, + "eval_xsum-pairs_loss": 0.23516154289245605, + "eval_xsum-pairs_runtime": 3.0375, + "eval_xsum-pairs_samples_per_second": 42.14, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2060 + }, + { + "epoch": 2.119341563786008, + "eval_sciq_pairs_loss": 0.09569169580936432, + "eval_sciq_pairs_runtime": 3.5174, + "eval_sciq_pairs_samples_per_second": 36.39, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 2060 + }, + { + "epoch": 2.119341563786008, + "eval_qasc_pairs_loss": 0.17149178683757782, + "eval_qasc_pairs_runtime": 0.6246, + "eval_qasc_pairs_samples_per_second": 204.946, + "eval_qasc_pairs_steps_per_second": 1.601, + "step": 2060 + }, + { + "epoch": 2.119341563786008, + "eval_openbookqa_pairs_loss": 0.9316143989562988, + "eval_openbookqa_pairs_runtime": 0.5994, + "eval_openbookqa_pairs_samples_per_second": 213.551, + "eval_openbookqa_pairs_steps_per_second": 1.668, + "step": 2060 + }, + { + "epoch": 2.119341563786008, + "eval_msmarco_pairs_loss": 0.9574117660522461, + "eval_msmarco_pairs_runtime": 1.5268, + "eval_msmarco_pairs_samples_per_second": 83.837, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 2060 + }, + { + "epoch": 2.119341563786008, + "eval_nq_pairs_loss": 0.7034321427345276, + "eval_nq_pairs_runtime": 2.9095, + "eval_nq_pairs_samples_per_second": 43.993, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2060 + }, + { + "epoch": 2.119341563786008, + "eval_trivia_pairs_loss": 0.8535366058349609, + "eval_trivia_pairs_runtime": 3.4538, + "eval_trivia_pairs_samples_per_second": 37.061, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2060 + }, + { + "epoch": 2.119341563786008, + "eval_gooaq_pairs_loss": 0.44533970952033997, + "eval_gooaq_pairs_runtime": 0.9547, + "eval_gooaq_pairs_samples_per_second": 134.069, + "eval_gooaq_pairs_steps_per_second": 1.047, + "step": 2060 + }, + { + "epoch": 2.119341563786008, + "eval_paws-pos_loss": 0.02282855100929737, + "eval_paws-pos_runtime": 0.7252, + "eval_paws-pos_samples_per_second": 176.511, + "eval_paws-pos_steps_per_second": 1.379, + "step": 2060 + }, + { + "epoch": 2.119341563786008, + "eval_global_dataset_loss": 0.5080205202102661, + "eval_global_dataset_runtime": 13.4116, + "eval_global_dataset_samples_per_second": 31.018, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2060 + }, + { + "epoch": 2.1203703703703702, + "grad_norm": 6.7308454513549805, + "learning_rate": 2.521121012404587e-05, + "loss": 0.2912, + "step": 2061 + }, + { + "epoch": 2.1213991769547325, + "grad_norm": 3.2159008979797363, + "learning_rate": 2.519639075353926e-05, + "loss": 0.0734, + "step": 2062 + }, + { + "epoch": 2.1224279835390947, + "grad_norm": 4.35980224609375, + "learning_rate": 2.5181568297704134e-05, + "loss": 0.0851, + "step": 2063 + }, + { + "epoch": 2.123456790123457, + "grad_norm": 7.116025924682617, + "learning_rate": 2.5166742781087318e-05, + "loss": 0.2077, + "step": 2064 + }, + { + "epoch": 2.124485596707819, + "grad_norm": 6.992701530456543, + "learning_rate": 2.515191422824071e-05, + "loss": 0.3857, + "step": 2065 + }, + { + "epoch": 2.125514403292181, + "grad_norm": 9.226235389709473, + "learning_rate": 2.5137082663721225e-05, + "loss": 0.4809, + "step": 2066 + }, + { + "epoch": 2.126543209876543, + "grad_norm": 6.9023542404174805, + "learning_rate": 2.512224811209078e-05, + "loss": 0.2717, + "step": 2067 + }, + { + "epoch": 2.1275720164609053, + "grad_norm": 5.9378252029418945, + "learning_rate": 2.5107410597916224e-05, + "loss": 0.14, + "step": 2068 + }, + { + "epoch": 2.1286008230452675, + "grad_norm": 4.982329845428467, + "learning_rate": 2.5092570145769328e-05, + "loss": 0.2054, + "step": 2069 + }, + { + "epoch": 2.1296296296296298, + "grad_norm": 4.590819835662842, + "learning_rate": 2.5077726780226723e-05, + "loss": 0.1258, + "step": 2070 + }, + { + "epoch": 2.1306584362139915, + "grad_norm": 4.748352527618408, + "learning_rate": 2.506288052586985e-05, + "loss": 0.1127, + "step": 2071 + }, + { + "epoch": 2.1316872427983538, + "grad_norm": 9.490756034851074, + "learning_rate": 2.5048031407284954e-05, + "loss": 0.5839, + "step": 2072 + }, + { + "epoch": 2.132716049382716, + "grad_norm": 6.421467304229736, + "learning_rate": 2.5033179449063016e-05, + "loss": 0.2173, + "step": 2073 + }, + { + "epoch": 2.133744855967078, + "grad_norm": 4.624624252319336, + "learning_rate": 2.501832467579972e-05, + "loss": 0.1179, + "step": 2074 + }, + { + "epoch": 2.1347736625514404, + "grad_norm": 4.480711460113525, + "learning_rate": 2.500346711209541e-05, + "loss": 0.1996, + "step": 2075 + }, + { + "epoch": 2.1358024691358026, + "grad_norm": 10.61433219909668, + "learning_rate": 2.4988606782555047e-05, + "loss": 0.4491, + "step": 2076 + }, + { + "epoch": 2.1368312757201644, + "grad_norm": 9.619646072387695, + "learning_rate": 2.4973743711788185e-05, + "loss": 0.5571, + "step": 2077 + }, + { + "epoch": 2.1378600823045266, + "grad_norm": 4.616343975067139, + "learning_rate": 2.4958877924408912e-05, + "loss": 0.0762, + "step": 2078 + }, + { + "epoch": 2.138888888888889, + "grad_norm": 0.04099346324801445, + "learning_rate": 2.49440094450358e-05, + "loss": 0.0004, + "step": 2079 + }, + { + "epoch": 2.139917695473251, + "grad_norm": 6.589290142059326, + "learning_rate": 2.4929138298291908e-05, + "loss": 0.1611, + "step": 2080 + }, + { + "epoch": 2.139917695473251, + "eval_Qnli-dev_cosine_accuracy": 0.712890625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7704996466636658, + "eval_Qnli-dev_cosine_ap": 0.7682777720813325, + "eval_Qnli-dev_cosine_f1": 0.7024221453287196, + "eval_Qnli-dev_cosine_f1_threshold": 0.674142599105835, + "eval_Qnli-dev_cosine_precision": 0.5935672514619883, + "eval_Qnli-dev_cosine_recall": 0.8601694915254238, + "eval_Qnli-dev_dot_accuracy": 0.671875, + "eval_Qnli-dev_dot_accuracy_threshold": 355.11505126953125, + "eval_Qnli-dev_dot_ap": 0.7107837950507052, + "eval_Qnli-dev_dot_f1": 0.6722129783693842, + "eval_Qnli-dev_dot_f1_threshold": 287.2233581542969, + "eval_Qnli-dev_dot_precision": 0.5534246575342465, + "eval_Qnli-dev_dot_recall": 0.8559322033898306, + "eval_Qnli-dev_euclidean_accuracy": 0.71875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.478130340576172, + "eval_Qnli-dev_euclidean_ap": 0.7758602239414594, + "eval_Qnli-dev_euclidean_f1": 0.7012522361359571, + "eval_Qnli-dev_euclidean_f1_threshold": 16.683025360107422, + "eval_Qnli-dev_euclidean_precision": 0.6068111455108359, + "eval_Qnli-dev_euclidean_recall": 0.8305084745762712, + "eval_Qnli-dev_manhattan_accuracy": 0.71875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 312.4605407714844, + "eval_Qnli-dev_manhattan_ap": 0.7728610432273579, + "eval_Qnli-dev_manhattan_f1": 0.7058823529411764, + "eval_Qnli-dev_manhattan_f1_threshold": 345.48614501953125, + "eval_Qnli-dev_manhattan_precision": 0.6233766233766234, + "eval_Qnli-dev_manhattan_recall": 0.8135593220338984, + "eval_Qnli-dev_max_accuracy": 0.71875, + "eval_Qnli-dev_max_accuracy_threshold": 355.11505126953125, + "eval_Qnli-dev_max_ap": 0.7758602239414594, + "eval_Qnli-dev_max_f1": 0.7058823529411764, + "eval_Qnli-dev_max_f1_threshold": 345.48614501953125, + "eval_Qnli-dev_max_precision": 0.6233766233766234, + "eval_Qnli-dev_max_recall": 0.8601694915254238, + "eval_allNLI-dev_cosine_accuracy": 0.7421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8157534003257751, + "eval_allNLI-dev_cosine_ap": 0.6364302454210037, + "eval_allNLI-dev_cosine_f1": 0.6417582417582418, + "eval_allNLI-dev_cosine_f1_threshold": 0.7176768779754639, + "eval_allNLI-dev_cosine_precision": 0.5177304964539007, + "eval_allNLI-dev_cosine_recall": 0.8439306358381503, + "eval_allNLI-dev_dot_accuracy": 0.70703125, + "eval_allNLI-dev_dot_accuracy_threshold": 350.124267578125, + "eval_allNLI-dev_dot_ap": 0.5715311234068332, + "eval_allNLI-dev_dot_f1": 0.6215644820295984, + "eval_allNLI-dev_dot_f1_threshold": 292.3736572265625, + "eval_allNLI-dev_dot_precision": 0.49, + "eval_allNLI-dev_dot_recall": 0.8497109826589595, + "eval_allNLI-dev_euclidean_accuracy": 0.74609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.996118545532227, + "eval_allNLI-dev_euclidean_ap": 0.6427037302950565, + "eval_allNLI-dev_euclidean_f1": 0.6490384615384616, + "eval_allNLI-dev_euclidean_f1_threshold": 14.670398712158203, + "eval_allNLI-dev_euclidean_precision": 0.5555555555555556, + "eval_allNLI-dev_euclidean_recall": 0.7803468208092486, + "eval_allNLI-dev_manhattan_accuracy": 0.74609375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 280.2452392578125, + "eval_allNLI-dev_manhattan_ap": 0.6417070299829928, + "eval_allNLI-dev_manhattan_f1": 0.6459330143540669, + "eval_allNLI-dev_manhattan_f1_threshold": 309.9227294921875, + "eval_allNLI-dev_manhattan_precision": 0.5510204081632653, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.74609375, + "eval_allNLI-dev_max_accuracy_threshold": 350.124267578125, + "eval_allNLI-dev_max_ap": 0.6427037302950565, + "eval_allNLI-dev_max_f1": 0.6490384615384616, + "eval_allNLI-dev_max_f1_threshold": 309.9227294921875, + "eval_allNLI-dev_max_precision": 0.5555555555555556, + "eval_allNLI-dev_max_recall": 0.8497109826589595, + "eval_sequential_score": 0.7758602239414594, + "eval_sts-test_pearson_cosine": 0.8442033196393799, + "eval_sts-test_pearson_dot": 0.8366727645093097, + "eval_sts-test_pearson_euclidean": 0.8716408035418403, + "eval_sts-test_pearson_manhattan": 0.8701963414499901, + "eval_sts-test_pearson_max": 0.8716408035418403, + "eval_sts-test_spearman_cosine": 0.8748100090272218, + "eval_sts-test_spearman_dot": 0.8311957876852548, + "eval_sts-test_spearman_euclidean": 0.8698756901949446, + "eval_sts-test_spearman_manhattan": 0.8686798022367821, + "eval_sts-test_spearman_max": 0.8748100090272218, + "eval_vitaminc-pairs_loss": 3.404799222946167, + "eval_vitaminc-pairs_runtime": 3.2177, + "eval_vitaminc-pairs_samples_per_second": 39.779, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 2080 + }, + { + "epoch": 2.139917695473251, + "eval_negation-triplets_loss": 0.9570626020431519, + "eval_negation-triplets_runtime": 0.7789, + "eval_negation-triplets_samples_per_second": 164.331, + "eval_negation-triplets_steps_per_second": 1.284, + "step": 2080 + }, + { + "epoch": 2.139917695473251, + "eval_scitail-pairs-pos_loss": 0.14190398156642914, + "eval_scitail-pairs-pos_runtime": 0.9178, + "eval_scitail-pairs-pos_samples_per_second": 139.46, + "eval_scitail-pairs-pos_steps_per_second": 1.09, + "step": 2080 + }, + { + "epoch": 2.139917695473251, + "eval_scitail-pairs-qa_loss": 0.00019659155805129558, + "eval_scitail-pairs-qa_runtime": 0.608, + "eval_scitail-pairs-qa_samples_per_second": 210.535, + "eval_scitail-pairs-qa_steps_per_second": 1.645, + "step": 2080 + }, + { + "epoch": 2.139917695473251, + "eval_xsum-pairs_loss": 0.2618999183177948, + "eval_xsum-pairs_runtime": 3.0347, + "eval_xsum-pairs_samples_per_second": 42.179, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 2080 + }, + { + "epoch": 2.139917695473251, + "eval_sciq_pairs_loss": 0.10014694184064865, + "eval_sciq_pairs_runtime": 3.5167, + "eval_sciq_pairs_samples_per_second": 36.398, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 2080 + }, + { + "epoch": 2.139917695473251, + "eval_qasc_pairs_loss": 0.1779145896434784, + "eval_qasc_pairs_runtime": 0.6251, + "eval_qasc_pairs_samples_per_second": 204.758, + "eval_qasc_pairs_steps_per_second": 1.6, + "step": 2080 + }, + { + "epoch": 2.139917695473251, + "eval_openbookqa_pairs_loss": 0.91819167137146, + "eval_openbookqa_pairs_runtime": 0.6012, + "eval_openbookqa_pairs_samples_per_second": 212.918, + "eval_openbookqa_pairs_steps_per_second": 1.663, + "step": 2080 + }, + { + "epoch": 2.139917695473251, + "eval_msmarco_pairs_loss": 0.8954432010650635, + "eval_msmarco_pairs_runtime": 1.5269, + "eval_msmarco_pairs_samples_per_second": 83.832, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 2080 + }, + { + "epoch": 2.139917695473251, + "eval_nq_pairs_loss": 0.6187785267829895, + "eval_nq_pairs_runtime": 2.9105, + "eval_nq_pairs_samples_per_second": 43.978, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2080 + }, + { + "epoch": 2.139917695473251, + "eval_trivia_pairs_loss": 0.9466494917869568, + "eval_trivia_pairs_runtime": 3.4508, + "eval_trivia_pairs_samples_per_second": 37.092, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2080 + }, + { + "epoch": 2.139917695473251, + "eval_gooaq_pairs_loss": 0.41242125630378723, + "eval_gooaq_pairs_runtime": 0.9631, + "eval_gooaq_pairs_samples_per_second": 132.908, + "eval_gooaq_pairs_steps_per_second": 1.038, + "step": 2080 + }, + { + "epoch": 2.139917695473251, + "eval_paws-pos_loss": 0.0224080178886652, + "eval_paws-pos_runtime": 0.7079, + "eval_paws-pos_samples_per_second": 180.81, + "eval_paws-pos_steps_per_second": 1.413, + "step": 2080 + }, + { + "epoch": 2.139917695473251, + "eval_global_dataset_loss": 0.4992813766002655, + "eval_global_dataset_runtime": 13.4133, + "eval_global_dataset_samples_per_second": 31.014, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2080 + }, + { + "epoch": 2.1409465020576133, + "grad_norm": 10.416133880615234, + "learning_rate": 2.491426450880469e-05, + "loss": 0.7257, + "step": 2081 + }, + { + "epoch": 2.1419753086419755, + "grad_norm": 0.7645072340965271, + "learning_rate": 2.489938810120598e-05, + "loss": 0.052, + "step": 2082 + }, + { + "epoch": 2.1430041152263373, + "grad_norm": 4.174610614776611, + "learning_rate": 2.4884509100131952e-05, + "loss": 0.1194, + "step": 2083 + }, + { + "epoch": 2.1440329218106995, + "grad_norm": 3.7576026916503906, + "learning_rate": 2.486962753022308e-05, + "loss": 0.102, + "step": 2084 + }, + { + "epoch": 2.1450617283950617, + "grad_norm": 8.263005256652832, + "learning_rate": 2.485474341612408e-05, + "loss": 0.4642, + "step": 2085 + }, + { + "epoch": 2.146090534979424, + "grad_norm": 12.31833267211914, + "learning_rate": 2.4839856782483885e-05, + "loss": 0.7639, + "step": 2086 + }, + { + "epoch": 2.147119341563786, + "grad_norm": 6.041854381561279, + "learning_rate": 2.4824967653955605e-05, + "loss": 0.1744, + "step": 2087 + }, + { + "epoch": 2.148148148148148, + "grad_norm": 11.772113800048828, + "learning_rate": 2.4810076055196484e-05, + "loss": 0.7136, + "step": 2088 + }, + { + "epoch": 2.14917695473251, + "grad_norm": 5.519857406616211, + "learning_rate": 2.4795182010867846e-05, + "loss": 0.1451, + "step": 2089 + }, + { + "epoch": 2.1502057613168724, + "grad_norm": 4.091213703155518, + "learning_rate": 2.478028554563508e-05, + "loss": 0.0667, + "step": 2090 + }, + { + "epoch": 2.1512345679012346, + "grad_norm": 4.884109020233154, + "learning_rate": 2.4765386684167563e-05, + "loss": 0.1793, + "step": 2091 + }, + { + "epoch": 2.152263374485597, + "grad_norm": 1.743223786354065, + "learning_rate": 2.475048545113866e-05, + "loss": 0.0274, + "step": 2092 + }, + { + "epoch": 2.153292181069959, + "grad_norm": 3.984705686569214, + "learning_rate": 2.4735581871225657e-05, + "loss": 0.2496, + "step": 2093 + }, + { + "epoch": 2.154320987654321, + "grad_norm": 0.780319094657898, + "learning_rate": 2.472067596910972e-05, + "loss": 0.0399, + "step": 2094 + }, + { + "epoch": 2.155349794238683, + "grad_norm": 5.758412837982178, + "learning_rate": 2.470576776947588e-05, + "loss": 0.1306, + "step": 2095 + }, + { + "epoch": 2.156378600823045, + "grad_norm": 4.503692150115967, + "learning_rate": 2.4690857297012933e-05, + "loss": 0.1086, + "step": 2096 + }, + { + "epoch": 2.1574074074074074, + "grad_norm": 4.799394130706787, + "learning_rate": 2.46759445764135e-05, + "loss": 0.1679, + "step": 2097 + }, + { + "epoch": 2.1584362139917697, + "grad_norm": 7.399900436401367, + "learning_rate": 2.4661029632373862e-05, + "loss": 0.2773, + "step": 2098 + }, + { + "epoch": 2.159465020576132, + "grad_norm": 7.369089603424072, + "learning_rate": 2.464611248959402e-05, + "loss": 0.3006, + "step": 2099 + }, + { + "epoch": 2.1604938271604937, + "grad_norm": 5.1560869216918945, + "learning_rate": 2.4631193172777604e-05, + "loss": 0.1716, + "step": 2100 + }, + { + "epoch": 2.1604938271604937, + "eval_Qnli-dev_cosine_accuracy": 0.7109375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7782326340675354, + "eval_Qnli-dev_cosine_ap": 0.757763107379561, + "eval_Qnli-dev_cosine_f1": 0.687813021702838, + "eval_Qnli-dev_cosine_f1_threshold": 0.675483226776123, + "eval_Qnli-dev_cosine_precision": 0.5674931129476584, + "eval_Qnli-dev_cosine_recall": 0.8728813559322034, + "eval_Qnli-dev_dot_accuracy": 0.67578125, + "eval_Qnli-dev_dot_accuracy_threshold": 356.8968505859375, + "eval_Qnli-dev_dot_ap": 0.6873747505236181, + "eval_Qnli-dev_dot_f1": 0.6698113207547169, + "eval_Qnli-dev_dot_f1_threshold": 282.644775390625, + "eval_Qnli-dev_dot_precision": 0.5325, + "eval_Qnli-dev_dot_recall": 0.902542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.71484375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.423986434936523, + "eval_Qnli-dev_euclidean_ap": 0.7648791936584969, + "eval_Qnli-dev_euclidean_f1": 0.7001733102253034, + "eval_Qnli-dev_euclidean_f1_threshold": 16.778839111328125, + "eval_Qnli-dev_euclidean_precision": 0.592375366568915, + "eval_Qnli-dev_euclidean_recall": 0.8559322033898306, + "eval_Qnli-dev_manhattan_accuracy": 0.7109375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 301.74945068359375, + "eval_Qnli-dev_manhattan_ap": 0.7650566601023531, + "eval_Qnli-dev_manhattan_f1": 0.693200663349917, + "eval_Qnli-dev_manhattan_f1_threshold": 361.003173828125, + "eval_Qnli-dev_manhattan_precision": 0.5694822888283378, + "eval_Qnli-dev_manhattan_recall": 0.885593220338983, + "eval_Qnli-dev_max_accuracy": 0.71484375, + "eval_Qnli-dev_max_accuracy_threshold": 356.8968505859375, + "eval_Qnli-dev_max_ap": 0.7650566601023531, + "eval_Qnli-dev_max_f1": 0.7001733102253034, + "eval_Qnli-dev_max_f1_threshold": 361.003173828125, + "eval_Qnli-dev_max_precision": 0.592375366568915, + "eval_Qnli-dev_max_recall": 0.902542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.736328125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8163294792175293, + "eval_allNLI-dev_cosine_ap": 0.6310447182631718, + "eval_allNLI-dev_cosine_f1": 0.6367924528301887, + "eval_allNLI-dev_cosine_f1_threshold": 0.7563125491142273, + "eval_allNLI-dev_cosine_precision": 0.5378486055776892, + "eval_allNLI-dev_cosine_recall": 0.7803468208092486, + "eval_allNLI-dev_dot_accuracy": 0.70703125, + "eval_allNLI-dev_dot_accuracy_threshold": 370.98956298828125, + "eval_allNLI-dev_dot_ap": 0.5647638928000094, + "eval_allNLI-dev_dot_f1": 0.6167400881057269, + "eval_allNLI-dev_dot_f1_threshold": 315.9678649902344, + "eval_allNLI-dev_dot_precision": 0.498220640569395, + "eval_allNLI-dev_dot_recall": 0.8092485549132948, + "eval_allNLI-dev_euclidean_accuracy": 0.74609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.188844680786133, + "eval_allNLI-dev_euclidean_ap": 0.6389873418454206, + "eval_allNLI-dev_euclidean_f1": 0.6511627906976744, + "eval_allNLI-dev_euclidean_f1_threshold": 14.704500198364258, + "eval_allNLI-dev_euclidean_precision": 0.5447470817120622, + "eval_allNLI-dev_euclidean_recall": 0.8092485549132948, + "eval_allNLI-dev_manhattan_accuracy": 0.73828125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 274.9945068359375, + "eval_allNLI-dev_manhattan_ap": 0.6360244459755536, + "eval_allNLI-dev_manhattan_f1": 0.6529680365296804, + "eval_allNLI-dev_manhattan_f1_threshold": 312.77740478515625, + "eval_allNLI-dev_manhattan_precision": 0.539622641509434, + "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, + "eval_allNLI-dev_max_accuracy": 0.74609375, + "eval_allNLI-dev_max_accuracy_threshold": 370.98956298828125, + "eval_allNLI-dev_max_ap": 0.6389873418454206, + "eval_allNLI-dev_max_f1": 0.6529680365296804, + "eval_allNLI-dev_max_f1_threshold": 315.9678649902344, + "eval_allNLI-dev_max_precision": 0.5447470817120622, + "eval_allNLI-dev_max_recall": 0.8265895953757225, + "eval_sequential_score": 0.7650566601023531, + "eval_sts-test_pearson_cosine": 0.8466836188121143, + "eval_sts-test_pearson_dot": 0.8355830896354994, + "eval_sts-test_pearson_euclidean": 0.875485343008167, + "eval_sts-test_pearson_manhattan": 0.8748475934516446, + "eval_sts-test_pearson_max": 0.875485343008167, + "eval_sts-test_spearman_cosine": 0.8781566426078548, + "eval_sts-test_spearman_dot": 0.8303461402364777, + "eval_sts-test_spearman_euclidean": 0.8742917894203092, + "eval_sts-test_spearman_manhattan": 0.8729370344842075, + "eval_sts-test_spearman_max": 0.8781566426078548, + "eval_vitaminc-pairs_loss": 3.370380163192749, + "eval_vitaminc-pairs_runtime": 3.228, + "eval_vitaminc-pairs_samples_per_second": 39.653, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 2100 + }, + { + "epoch": 2.1604938271604937, + "eval_negation-triplets_loss": 0.8976505994796753, + "eval_negation-triplets_runtime": 0.7647, + "eval_negation-triplets_samples_per_second": 167.379, + "eval_negation-triplets_steps_per_second": 1.308, + "step": 2100 + }, + { + "epoch": 2.1604938271604937, + "eval_scitail-pairs-pos_loss": 0.13327747583389282, + "eval_scitail-pairs-pos_runtime": 0.9109, + "eval_scitail-pairs-pos_samples_per_second": 140.516, + "eval_scitail-pairs-pos_steps_per_second": 1.098, + "step": 2100 + }, + { + "epoch": 2.1604938271604937, + "eval_scitail-pairs-qa_loss": 0.0003133401623927057, + "eval_scitail-pairs-qa_runtime": 0.6093, + "eval_scitail-pairs-qa_samples_per_second": 210.065, + "eval_scitail-pairs-qa_steps_per_second": 1.641, + "step": 2100 + }, + { + "epoch": 2.1604938271604937, + "eval_xsum-pairs_loss": 0.2953195869922638, + "eval_xsum-pairs_runtime": 3.0347, + "eval_xsum-pairs_samples_per_second": 42.178, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 2100 + }, + { + "epoch": 2.1604938271604937, + "eval_sciq_pairs_loss": 0.096987284719944, + "eval_sciq_pairs_runtime": 3.5139, + "eval_sciq_pairs_samples_per_second": 36.427, + "eval_sciq_pairs_steps_per_second": 0.285, + "step": 2100 + }, + { + "epoch": 2.1604938271604937, + "eval_qasc_pairs_loss": 0.17670966684818268, + "eval_qasc_pairs_runtime": 0.6263, + "eval_qasc_pairs_samples_per_second": 204.37, + "eval_qasc_pairs_steps_per_second": 1.597, + "step": 2100 + }, + { + "epoch": 2.1604938271604937, + "eval_openbookqa_pairs_loss": 0.9312919974327087, + "eval_openbookqa_pairs_runtime": 0.6039, + "eval_openbookqa_pairs_samples_per_second": 211.964, + "eval_openbookqa_pairs_steps_per_second": 1.656, + "step": 2100 + }, + { + "epoch": 2.1604938271604937, + "eval_msmarco_pairs_loss": 0.8172768950462341, + "eval_msmarco_pairs_runtime": 1.5305, + "eval_msmarco_pairs_samples_per_second": 83.635, + "eval_msmarco_pairs_steps_per_second": 0.653, + "step": 2100 + }, + { + "epoch": 2.1604938271604937, + "eval_nq_pairs_loss": 0.7029488682746887, + "eval_nq_pairs_runtime": 2.9032, + "eval_nq_pairs_samples_per_second": 44.089, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2100 + }, + { + "epoch": 2.1604938271604937, + "eval_trivia_pairs_loss": 0.8234426975250244, + "eval_trivia_pairs_runtime": 3.4781, + "eval_trivia_pairs_samples_per_second": 36.802, + "eval_trivia_pairs_steps_per_second": 0.288, + "step": 2100 + }, + { + "epoch": 2.1604938271604937, + "eval_gooaq_pairs_loss": 0.38398078083992004, + "eval_gooaq_pairs_runtime": 0.961, + "eval_gooaq_pairs_samples_per_second": 133.192, + "eval_gooaq_pairs_steps_per_second": 1.041, + "step": 2100 + }, + { + "epoch": 2.1604938271604937, + "eval_paws-pos_loss": 0.022808760404586792, + "eval_paws-pos_runtime": 0.7084, + "eval_paws-pos_samples_per_second": 180.688, + "eval_paws-pos_steps_per_second": 1.412, + "step": 2100 + }, + { + "epoch": 2.1604938271604937, + "eval_global_dataset_loss": 0.48940902948379517, + "eval_global_dataset_runtime": 13.4199, + "eval_global_dataset_samples_per_second": 30.999, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2100 + }, + { + "epoch": 2.161522633744856, + "grad_norm": 6.568051338195801, + "learning_rate": 2.461627170663186e-05, + "loss": 0.3582, + "step": 2101 + }, + { + "epoch": 2.162551440329218, + "grad_norm": 5.944247722625732, + "learning_rate": 2.4601348115867564e-05, + "loss": 0.1507, + "step": 2102 + }, + { + "epoch": 2.1635802469135803, + "grad_norm": 10.45062255859375, + "learning_rate": 2.4586422425199033e-05, + "loss": 0.7578, + "step": 2103 + }, + { + "epoch": 2.1646090534979425, + "grad_norm": 14.21491813659668, + "learning_rate": 2.4571494659344057e-05, + "loss": 1.832, + "step": 2104 + }, + { + "epoch": 2.1656378600823043, + "grad_norm": 7.499361991882324, + "learning_rate": 2.4556564843023855e-05, + "loss": 0.5014, + "step": 2105 + }, + { + "epoch": 2.1666666666666665, + "grad_norm": 9.022387504577637, + "learning_rate": 2.4541633000963055e-05, + "loss": 0.4058, + "step": 2106 + }, + { + "epoch": 2.1676954732510287, + "grad_norm": 5.3525166511535645, + "learning_rate": 2.452669915788963e-05, + "loss": 0.1921, + "step": 2107 + }, + { + "epoch": 2.168724279835391, + "grad_norm": 4.118523120880127, + "learning_rate": 2.4511763338534864e-05, + "loss": 0.1086, + "step": 2108 + }, + { + "epoch": 2.169753086419753, + "grad_norm": 8.975444793701172, + "learning_rate": 2.449682556763333e-05, + "loss": 0.5926, + "step": 2109 + }, + { + "epoch": 2.1707818930041154, + "grad_norm": 4.746514320373535, + "learning_rate": 2.4481885869922812e-05, + "loss": 0.2029, + "step": 2110 + }, + { + "epoch": 2.171810699588477, + "grad_norm": 8.580132484436035, + "learning_rate": 2.44669442701443e-05, + "loss": 0.3947, + "step": 2111 + }, + { + "epoch": 2.1728395061728394, + "grad_norm": 6.105350017547607, + "learning_rate": 2.445200079304192e-05, + "loss": 0.4369, + "step": 2112 + }, + { + "epoch": 2.1738683127572016, + "grad_norm": 7.118346691131592, + "learning_rate": 2.4437055463362933e-05, + "loss": 0.3152, + "step": 2113 + }, + { + "epoch": 2.174897119341564, + "grad_norm": 2.816272735595703, + "learning_rate": 2.4422108305857637e-05, + "loss": 0.0426, + "step": 2114 + }, + { + "epoch": 2.175925925925926, + "grad_norm": 2.252847194671631, + "learning_rate": 2.4407159345279374e-05, + "loss": 0.0696, + "step": 2115 + }, + { + "epoch": 2.1769547325102883, + "grad_norm": 7.112878322601318, + "learning_rate": 2.439220860638446e-05, + "loss": 0.2596, + "step": 2116 + }, + { + "epoch": 2.17798353909465, + "grad_norm": 8.24142837524414, + "learning_rate": 2.437725611393219e-05, + "loss": 0.4628, + "step": 2117 + }, + { + "epoch": 2.1790123456790123, + "grad_norm": 3.664426803588867, + "learning_rate": 2.436230189268471e-05, + "loss": 0.0567, + "step": 2118 + }, + { + "epoch": 2.1800411522633745, + "grad_norm": 1.27444589138031, + "learning_rate": 2.4347345967407072e-05, + "loss": 0.0208, + "step": 2119 + }, + { + "epoch": 2.1810699588477367, + "grad_norm": 4.611666679382324, + "learning_rate": 2.433238836286713e-05, + "loss": 0.1303, + "step": 2120 + }, + { + "epoch": 2.1810699588477367, + "eval_Qnli-dev_cosine_accuracy": 0.705078125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7831888198852539, + "eval_Qnli-dev_cosine_ap": 0.7659577175540198, + "eval_Qnli-dev_cosine_f1": 0.7093235831809872, + "eval_Qnli-dev_cosine_f1_threshold": 0.706408679485321, + "eval_Qnli-dev_cosine_precision": 0.6237942122186495, + "eval_Qnli-dev_cosine_recall": 0.8220338983050848, + "eval_Qnli-dev_dot_accuracy": 0.671875, + "eval_Qnli-dev_dot_accuracy_threshold": 354.57574462890625, + "eval_Qnli-dev_dot_ap": 0.7142960906071153, + "eval_Qnli-dev_dot_f1": 0.6812816188870151, + "eval_Qnli-dev_dot_f1_threshold": 297.7771911621094, + "eval_Qnli-dev_dot_precision": 0.5658263305322129, + "eval_Qnli-dev_dot_recall": 0.8559322033898306, + "eval_Qnli-dev_euclidean_accuracy": 0.71875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.078405380249023, + "eval_Qnli-dev_euclidean_ap": 0.7712189177124892, + "eval_Qnli-dev_euclidean_f1": 0.6994727592267135, + "eval_Qnli-dev_euclidean_f1_threshold": 16.541831970214844, + "eval_Qnli-dev_euclidean_precision": 0.5975975975975976, + "eval_Qnli-dev_euclidean_recall": 0.8432203389830508, + "eval_Qnli-dev_manhattan_accuracy": 0.71484375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 297.5526123046875, + "eval_Qnli-dev_manhattan_ap": 0.7743633598322663, + "eval_Qnli-dev_manhattan_f1": 0.70061099796334, + "eval_Qnli-dev_manhattan_f1_threshold": 320.9577331542969, + "eval_Qnli-dev_manhattan_precision": 0.6745098039215687, + "eval_Qnli-dev_manhattan_recall": 0.7288135593220338, + "eval_Qnli-dev_max_accuracy": 0.71875, + "eval_Qnli-dev_max_accuracy_threshold": 354.57574462890625, + "eval_Qnli-dev_max_ap": 0.7743633598322663, + "eval_Qnli-dev_max_f1": 0.7093235831809872, + "eval_Qnli-dev_max_f1_threshold": 320.9577331542969, + "eval_Qnli-dev_max_precision": 0.6745098039215687, + "eval_Qnli-dev_max_recall": 0.8559322033898306, + "eval_allNLI-dev_cosine_accuracy": 0.736328125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8165189027786255, + "eval_allNLI-dev_cosine_ap": 0.6288426913268541, + "eval_allNLI-dev_cosine_f1": 0.6465116279069768, + "eval_allNLI-dev_cosine_f1_threshold": 0.7471227645874023, + "eval_allNLI-dev_cosine_precision": 0.5408560311284046, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.708984375, + "eval_allNLI-dev_dot_accuracy_threshold": 355.15179443359375, + "eval_allNLI-dev_dot_ap": 0.5568715482689243, + "eval_allNLI-dev_dot_f1": 0.5954545454545453, + "eval_allNLI-dev_dot_f1_threshold": 319.06378173828125, + "eval_allNLI-dev_dot_precision": 0.49063670411985016, + "eval_allNLI-dev_dot_recall": 0.7572254335260116, + "eval_allNLI-dev_euclidean_accuracy": 0.7421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.634376525878906, + "eval_allNLI-dev_euclidean_ap": 0.6383365591990686, + "eval_allNLI-dev_euclidean_f1": 0.6508313539192399, + "eval_allNLI-dev_euclidean_f1_threshold": 14.622352600097656, + "eval_allNLI-dev_euclidean_precision": 0.5524193548387096, + "eval_allNLI-dev_euclidean_recall": 0.791907514450867, + "eval_allNLI-dev_manhattan_accuracy": 0.748046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 279.4615173339844, + "eval_allNLI-dev_manhattan_ap": 0.6353048860025735, + "eval_allNLI-dev_manhattan_f1": 0.6515837104072397, + "eval_allNLI-dev_manhattan_f1_threshold": 313.66204833984375, + "eval_allNLI-dev_manhattan_precision": 0.5353159851301115, + "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, + "eval_allNLI-dev_max_accuracy": 0.748046875, + "eval_allNLI-dev_max_accuracy_threshold": 355.15179443359375, + "eval_allNLI-dev_max_ap": 0.6383365591990686, + "eval_allNLI-dev_max_f1": 0.6515837104072397, + "eval_allNLI-dev_max_f1_threshold": 319.06378173828125, + "eval_allNLI-dev_max_precision": 0.5524193548387096, + "eval_allNLI-dev_max_recall": 0.8323699421965318, + "eval_sequential_score": 0.7743633598322663, + "eval_sts-test_pearson_cosine": 0.8413452215643851, + "eval_sts-test_pearson_dot": 0.8227410976773657, + "eval_sts-test_pearson_euclidean": 0.8730961658540073, + "eval_sts-test_pearson_manhattan": 0.8706832766511694, + "eval_sts-test_pearson_max": 0.8730961658540073, + "eval_sts-test_spearman_cosine": 0.8747658395322285, + "eval_sts-test_spearman_dot": 0.8170051179467807, + "eval_sts-test_spearman_euclidean": 0.8716662776657373, + "eval_sts-test_spearman_manhattan": 0.8683805505518579, + "eval_sts-test_spearman_max": 0.8747658395322285, + "eval_vitaminc-pairs_loss": 3.3158156871795654, + "eval_vitaminc-pairs_runtime": 3.2432, + "eval_vitaminc-pairs_samples_per_second": 39.467, + "eval_vitaminc-pairs_steps_per_second": 0.308, + "step": 2120 + }, + { + "epoch": 2.1810699588477367, + "eval_negation-triplets_loss": 0.8905675411224365, + "eval_negation-triplets_runtime": 0.776, + "eval_negation-triplets_samples_per_second": 164.945, + "eval_negation-triplets_steps_per_second": 1.289, + "step": 2120 + }, + { + "epoch": 2.1810699588477367, + "eval_scitail-pairs-pos_loss": 0.11103859543800354, + "eval_scitail-pairs-pos_runtime": 0.9444, + "eval_scitail-pairs-pos_samples_per_second": 135.532, + "eval_scitail-pairs-pos_steps_per_second": 1.059, + "step": 2120 + }, + { + "epoch": 2.1810699588477367, + "eval_scitail-pairs-qa_loss": 0.00043057429138571024, + "eval_scitail-pairs-qa_runtime": 0.6124, + "eval_scitail-pairs-qa_samples_per_second": 209.017, + "eval_scitail-pairs-qa_steps_per_second": 1.633, + "step": 2120 + }, + { + "epoch": 2.1810699588477367, + "eval_xsum-pairs_loss": 0.26308897137641907, + "eval_xsum-pairs_runtime": 3.03, + "eval_xsum-pairs_samples_per_second": 42.245, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 2120 + }, + { + "epoch": 2.1810699588477367, + "eval_sciq_pairs_loss": 0.09947659075260162, + "eval_sciq_pairs_runtime": 3.5279, + "eval_sciq_pairs_samples_per_second": 36.282, + "eval_sciq_pairs_steps_per_second": 0.283, + "step": 2120 + }, + { + "epoch": 2.1810699588477367, + "eval_qasc_pairs_loss": 0.16730889678001404, + "eval_qasc_pairs_runtime": 0.6409, + "eval_qasc_pairs_samples_per_second": 199.734, + "eval_qasc_pairs_steps_per_second": 1.56, + "step": 2120 + }, + { + "epoch": 2.1810699588477367, + "eval_openbookqa_pairs_loss": 0.8517054319381714, + "eval_openbookqa_pairs_runtime": 0.6078, + "eval_openbookqa_pairs_samples_per_second": 210.596, + "eval_openbookqa_pairs_steps_per_second": 1.645, + "step": 2120 + }, + { + "epoch": 2.1810699588477367, + "eval_msmarco_pairs_loss": 0.8803500533103943, + "eval_msmarco_pairs_runtime": 1.5294, + "eval_msmarco_pairs_samples_per_second": 83.691, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 2120 + }, + { + "epoch": 2.1810699588477367, + "eval_nq_pairs_loss": 0.7211570143699646, + "eval_nq_pairs_runtime": 2.9044, + "eval_nq_pairs_samples_per_second": 44.072, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2120 + }, + { + "epoch": 2.1810699588477367, + "eval_trivia_pairs_loss": 0.8099327087402344, + "eval_trivia_pairs_runtime": 3.4499, + "eval_trivia_pairs_samples_per_second": 37.102, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2120 + }, + { + "epoch": 2.1810699588477367, + "eval_gooaq_pairs_loss": 0.4508455991744995, + "eval_gooaq_pairs_runtime": 0.9607, + "eval_gooaq_pairs_samples_per_second": 133.237, + "eval_gooaq_pairs_steps_per_second": 1.041, + "step": 2120 + }, + { + "epoch": 2.1810699588477367, + "eval_paws-pos_loss": 0.02280852012336254, + "eval_paws-pos_runtime": 0.7113, + "eval_paws-pos_samples_per_second": 179.951, + "eval_paws-pos_steps_per_second": 1.406, + "step": 2120 + }, + { + "epoch": 2.1810699588477367, + "eval_global_dataset_loss": 0.5049570202827454, + "eval_global_dataset_runtime": 13.4082, + "eval_global_dataset_samples_per_second": 31.026, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2120 + }, + { + "epoch": 2.182098765432099, + "grad_norm": 7.965096473693848, + "learning_rate": 2.431742910383552e-05, + "loss": 0.3001, + "step": 2121 + }, + { + "epoch": 2.1831275720164607, + "grad_norm": 1.3078244924545288, + "learning_rate": 2.4302468215085635e-05, + "loss": 0.013, + "step": 2122 + }, + { + "epoch": 2.184156378600823, + "grad_norm": 8.0444974899292, + "learning_rate": 2.4287505721393537e-05, + "loss": 0.3598, + "step": 2123 + }, + { + "epoch": 2.185185185185185, + "grad_norm": 15.775069236755371, + "learning_rate": 2.4272541647537966e-05, + "loss": 2.2029, + "step": 2124 + }, + { + "epoch": 2.1862139917695473, + "grad_norm": 5.384196758270264, + "learning_rate": 2.4257576018300283e-05, + "loss": 0.1542, + "step": 2125 + }, + { + "epoch": 2.1872427983539096, + "grad_norm": 5.319241046905518, + "learning_rate": 2.4242608858464408e-05, + "loss": 0.1127, + "step": 2126 + }, + { + "epoch": 2.1882716049382718, + "grad_norm": 5.162769794464111, + "learning_rate": 2.4227640192816813e-05, + "loss": 0.1667, + "step": 2127 + }, + { + "epoch": 2.1893004115226335, + "grad_norm": 4.0983357429504395, + "learning_rate": 2.421267004614645e-05, + "loss": 0.0737, + "step": 2128 + }, + { + "epoch": 2.1903292181069958, + "grad_norm": 8.134471893310547, + "learning_rate": 2.4197698443244726e-05, + "loss": 0.3904, + "step": 2129 + }, + { + "epoch": 2.191358024691358, + "grad_norm": 7.660701274871826, + "learning_rate": 2.418272540890547e-05, + "loss": 0.3314, + "step": 2130 + }, + { + "epoch": 2.19238683127572, + "grad_norm": 5.454843997955322, + "learning_rate": 2.4167750967924873e-05, + "loss": 0.139, + "step": 2131 + }, + { + "epoch": 2.1934156378600824, + "grad_norm": 12.22930908203125, + "learning_rate": 2.4152775145101455e-05, + "loss": 0.6952, + "step": 2132 + }, + { + "epoch": 2.1944444444444446, + "grad_norm": 4.859829902648926, + "learning_rate": 2.413779796523602e-05, + "loss": 0.1082, + "step": 2133 + }, + { + "epoch": 2.1954732510288064, + "grad_norm": 7.867696285247803, + "learning_rate": 2.4122819453131633e-05, + "loss": 0.3075, + "step": 2134 + }, + { + "epoch": 2.1965020576131686, + "grad_norm": 7.93525505065918, + "learning_rate": 2.4107839633593557e-05, + "loss": 0.3398, + "step": 2135 + }, + { + "epoch": 2.197530864197531, + "grad_norm": 7.2044806480407715, + "learning_rate": 2.409285853142922e-05, + "loss": 0.3804, + "step": 2136 + }, + { + "epoch": 2.198559670781893, + "grad_norm": 9.675668716430664, + "learning_rate": 2.4077876171448172e-05, + "loss": 0.4977, + "step": 2137 + }, + { + "epoch": 2.1995884773662553, + "grad_norm": 4.7285261154174805, + "learning_rate": 2.4062892578462054e-05, + "loss": 0.2119, + "step": 2138 + }, + { + "epoch": 2.200617283950617, + "grad_norm": 8.28393268585205, + "learning_rate": 2.4047907777284544e-05, + "loss": 0.3307, + "step": 2139 + }, + { + "epoch": 2.2016460905349793, + "grad_norm": 9.960400581359863, + "learning_rate": 2.403292179273131e-05, + "loss": 0.4782, + "step": 2140 + }, + { + "epoch": 2.2016460905349793, + "eval_Qnli-dev_cosine_accuracy": 0.701171875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.765581488609314, + "eval_Qnli-dev_cosine_ap": 0.7580039059833005, + "eval_Qnli-dev_cosine_f1": 0.6927374301675978, + "eval_Qnli-dev_cosine_f1_threshold": 0.7120383381843567, + "eval_Qnli-dev_cosine_precision": 0.6179401993355482, + "eval_Qnli-dev_cosine_recall": 0.788135593220339, + "eval_Qnli-dev_dot_accuracy": 0.66796875, + "eval_Qnli-dev_dot_accuracy_threshold": 349.69696044921875, + "eval_Qnli-dev_dot_ap": 0.7077644564177754, + "eval_Qnli-dev_dot_f1": 0.673434856175973, + "eval_Qnli-dev_dot_f1_threshold": 297.18817138671875, + "eval_Qnli-dev_dot_precision": 0.5605633802816902, + "eval_Qnli-dev_dot_recall": 0.8432203389830508, + "eval_Qnli-dev_euclidean_accuracy": 0.708984375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.917366027832031, + "eval_Qnli-dev_euclidean_ap": 0.7640492109932211, + "eval_Qnli-dev_euclidean_f1": 0.7022900763358778, + "eval_Qnli-dev_euclidean_f1_threshold": 15.718003273010254, + "eval_Qnli-dev_euclidean_precision": 0.6388888888888888, + "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, + "eval_Qnli-dev_manhattan_accuracy": 0.712890625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 290.0631103515625, + "eval_Qnli-dev_manhattan_ap": 0.7682290439889975, + "eval_Qnli-dev_manhattan_f1": 0.7018867924528301, + "eval_Qnli-dev_manhattan_f1_threshold": 334.3270263671875, + "eval_Qnli-dev_manhattan_precision": 0.6326530612244898, + "eval_Qnli-dev_manhattan_recall": 0.788135593220339, + "eval_Qnli-dev_max_accuracy": 0.712890625, + "eval_Qnli-dev_max_accuracy_threshold": 349.69696044921875, + "eval_Qnli-dev_max_ap": 0.7682290439889975, + "eval_Qnli-dev_max_f1": 0.7022900763358778, + "eval_Qnli-dev_max_f1_threshold": 334.3270263671875, + "eval_Qnli-dev_max_precision": 0.6388888888888888, + "eval_Qnli-dev_max_recall": 0.8432203389830508, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8141307830810547, + "eval_allNLI-dev_cosine_ap": 0.6345853397101863, + "eval_allNLI-dev_cosine_f1": 0.6462264150943396, + "eval_allNLI-dev_cosine_f1_threshold": 0.7495874166488647, + "eval_allNLI-dev_cosine_precision": 0.545816733067729, + "eval_allNLI-dev_cosine_recall": 0.791907514450867, + "eval_allNLI-dev_dot_accuracy": 0.70703125, + "eval_allNLI-dev_dot_accuracy_threshold": 346.0972595214844, + "eval_allNLI-dev_dot_ap": 0.5619172672788605, + "eval_allNLI-dev_dot_f1": 0.6056644880174291, + "eval_allNLI-dev_dot_f1_threshold": 307.35089111328125, + "eval_allNLI-dev_dot_precision": 0.486013986013986, + "eval_allNLI-dev_dot_recall": 0.8034682080924855, + "eval_allNLI-dev_euclidean_accuracy": 0.740234375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.474712371826172, + "eval_allNLI-dev_euclidean_ap": 0.6442614108351035, + "eval_allNLI-dev_euclidean_f1": 0.6512702078521939, + "eval_allNLI-dev_euclidean_f1_threshold": 14.748228073120117, + "eval_allNLI-dev_euclidean_precision": 0.5423076923076923, + "eval_allNLI-dev_euclidean_recall": 0.815028901734104, + "eval_allNLI-dev_manhattan_accuracy": 0.740234375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 278.2958984375, + "eval_allNLI-dev_manhattan_ap": 0.6424357248372661, + "eval_allNLI-dev_manhattan_f1": 0.6523809523809525, + "eval_allNLI-dev_manhattan_f1_threshold": 303.55712890625, + "eval_allNLI-dev_manhattan_precision": 0.5546558704453441, + "eval_allNLI-dev_manhattan_recall": 0.791907514450867, + "eval_allNLI-dev_max_accuracy": 0.740234375, + "eval_allNLI-dev_max_accuracy_threshold": 346.0972595214844, + "eval_allNLI-dev_max_ap": 0.6442614108351035, + "eval_allNLI-dev_max_f1": 0.6523809523809525, + "eval_allNLI-dev_max_f1_threshold": 307.35089111328125, + "eval_allNLI-dev_max_precision": 0.5546558704453441, + "eval_allNLI-dev_max_recall": 0.815028901734104, + "eval_sequential_score": 0.7682290439889975, + "eval_sts-test_pearson_cosine": 0.8459220593950094, + "eval_sts-test_pearson_dot": 0.8327689789009902, + "eval_sts-test_pearson_euclidean": 0.8765604391215667, + "eval_sts-test_pearson_manhattan": 0.8740908290341837, + "eval_sts-test_pearson_max": 0.8765604391215667, + "eval_sts-test_spearman_cosine": 0.877046811325348, + "eval_sts-test_spearman_dot": 0.8285615046834501, + "eval_sts-test_spearman_euclidean": 0.8735896868803397, + "eval_sts-test_spearman_manhattan": 0.8719228813985556, + "eval_sts-test_spearman_max": 0.877046811325348, + "eval_vitaminc-pairs_loss": 3.2018566131591797, + "eval_vitaminc-pairs_runtime": 3.2479, + "eval_vitaminc-pairs_samples_per_second": 39.41, + "eval_vitaminc-pairs_steps_per_second": 0.308, + "step": 2140 + }, + { + "epoch": 2.2016460905349793, + "eval_negation-triplets_loss": 0.9213140606880188, + "eval_negation-triplets_runtime": 0.7717, + "eval_negation-triplets_samples_per_second": 165.875, + "eval_negation-triplets_steps_per_second": 1.296, + "step": 2140 + }, + { + "epoch": 2.2016460905349793, + "eval_scitail-pairs-pos_loss": 0.1060742661356926, + "eval_scitail-pairs-pos_runtime": 0.9175, + "eval_scitail-pairs-pos_samples_per_second": 139.511, + "eval_scitail-pairs-pos_steps_per_second": 1.09, + "step": 2140 + }, + { + "epoch": 2.2016460905349793, + "eval_scitail-pairs-qa_loss": 0.0005529926856979728, + "eval_scitail-pairs-qa_runtime": 0.6123, + "eval_scitail-pairs-qa_samples_per_second": 209.05, + "eval_scitail-pairs-qa_steps_per_second": 1.633, + "step": 2140 + }, + { + "epoch": 2.2016460905349793, + "eval_xsum-pairs_loss": 0.2825109660625458, + "eval_xsum-pairs_runtime": 3.0356, + "eval_xsum-pairs_samples_per_second": 42.166, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2140 + }, + { + "epoch": 2.2016460905349793, + "eval_sciq_pairs_loss": 0.09794219583272934, + "eval_sciq_pairs_runtime": 3.5238, + "eval_sciq_pairs_samples_per_second": 36.325, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 2140 + }, + { + "epoch": 2.2016460905349793, + "eval_qasc_pairs_loss": 0.1473490297794342, + "eval_qasc_pairs_runtime": 0.6205, + "eval_qasc_pairs_samples_per_second": 206.29, + "eval_qasc_pairs_steps_per_second": 1.612, + "step": 2140 + }, + { + "epoch": 2.2016460905349793, + "eval_openbookqa_pairs_loss": 0.8643597364425659, + "eval_openbookqa_pairs_runtime": 0.6001, + "eval_openbookqa_pairs_samples_per_second": 213.286, + "eval_openbookqa_pairs_steps_per_second": 1.666, + "step": 2140 + }, + { + "epoch": 2.2016460905349793, + "eval_msmarco_pairs_loss": 0.8728999495506287, + "eval_msmarco_pairs_runtime": 1.5248, + "eval_msmarco_pairs_samples_per_second": 83.947, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 2140 + }, + { + "epoch": 2.2016460905349793, + "eval_nq_pairs_loss": 0.7015603184700012, + "eval_nq_pairs_runtime": 2.9064, + "eval_nq_pairs_samples_per_second": 44.04, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2140 + }, + { + "epoch": 2.2016460905349793, + "eval_trivia_pairs_loss": 0.8600196838378906, + "eval_trivia_pairs_runtime": 3.4526, + "eval_trivia_pairs_samples_per_second": 37.073, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2140 + }, + { + "epoch": 2.2016460905349793, + "eval_gooaq_pairs_loss": 0.41348978877067566, + "eval_gooaq_pairs_runtime": 0.9592, + "eval_gooaq_pairs_samples_per_second": 133.451, + "eval_gooaq_pairs_steps_per_second": 1.043, + "step": 2140 + }, + { + "epoch": 2.2016460905349793, + "eval_paws-pos_loss": 0.02336895279586315, + "eval_paws-pos_runtime": 0.7157, + "eval_paws-pos_samples_per_second": 178.857, + "eval_paws-pos_steps_per_second": 1.397, + "step": 2140 + }, + { + "epoch": 2.2016460905349793, + "eval_global_dataset_loss": 0.48977920413017273, + "eval_global_dataset_runtime": 13.4172, + "eval_global_dataset_samples_per_second": 31.005, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2140 + }, + { + "epoch": 2.2026748971193415, + "grad_norm": 4.7281365394592285, + "learning_rate": 2.4017934649620005e-05, + "loss": 0.0815, + "step": 2141 + }, + { + "epoch": 2.2037037037037037, + "grad_norm": 5.841175556182861, + "learning_rate": 2.4002946372770173e-05, + "loss": 0.1414, + "step": 2142 + }, + { + "epoch": 2.204732510288066, + "grad_norm": 10.14210033416748, + "learning_rate": 2.3987956987003262e-05, + "loss": 0.4976, + "step": 2143 + }, + { + "epoch": 2.205761316872428, + "grad_norm": 6.116671562194824, + "learning_rate": 2.3972966517142535e-05, + "loss": 0.3755, + "step": 2144 + }, + { + "epoch": 2.20679012345679, + "grad_norm": 7.912659168243408, + "learning_rate": 2.3957974988013056e-05, + "loss": 0.276, + "step": 2145 + }, + { + "epoch": 2.207818930041152, + "grad_norm": 4.809524059295654, + "learning_rate": 2.3942982424441652e-05, + "loss": 0.1198, + "step": 2146 + }, + { + "epoch": 2.2088477366255144, + "grad_norm": 3.9036455154418945, + "learning_rate": 2.3927988851256852e-05, + "loss": 0.0837, + "step": 2147 + }, + { + "epoch": 2.2098765432098766, + "grad_norm": 5.67017936706543, + "learning_rate": 2.3912994293288868e-05, + "loss": 0.2129, + "step": 2148 + }, + { + "epoch": 2.210905349794239, + "grad_norm": 3.9855170249938965, + "learning_rate": 2.3897998775369527e-05, + "loss": 0.0752, + "step": 2149 + }, + { + "epoch": 2.211934156378601, + "grad_norm": 9.995519638061523, + "learning_rate": 2.3883002322332263e-05, + "loss": 0.5484, + "step": 2150 + }, + { + "epoch": 2.212962962962963, + "grad_norm": 12.262630462646484, + "learning_rate": 2.3868004959012048e-05, + "loss": 0.7996, + "step": 2151 + }, + { + "epoch": 2.213991769547325, + "grad_norm": 13.158291816711426, + "learning_rate": 2.3853006710245366e-05, + "loss": 1.6459, + "step": 2152 + }, + { + "epoch": 2.2150205761316872, + "grad_norm": 4.3596367835998535, + "learning_rate": 2.383800760087016e-05, + "loss": 0.0923, + "step": 2153 + }, + { + "epoch": 2.2160493827160495, + "grad_norm": 7.671905994415283, + "learning_rate": 2.382300765572581e-05, + "loss": 0.4755, + "step": 2154 + }, + { + "epoch": 2.2170781893004117, + "grad_norm": 3.321316719055176, + "learning_rate": 2.3808006899653076e-05, + "loss": 0.0699, + "step": 2155 + }, + { + "epoch": 2.2181069958847734, + "grad_norm": 5.187155723571777, + "learning_rate": 2.3793005357494044e-05, + "loss": 0.1471, + "step": 2156 + }, + { + "epoch": 2.2191358024691357, + "grad_norm": 3.165531873703003, + "learning_rate": 2.3778003054092144e-05, + "loss": 0.0703, + "step": 2157 + }, + { + "epoch": 2.220164609053498, + "grad_norm": 5.704407691955566, + "learning_rate": 2.376300001429201e-05, + "loss": 0.2989, + "step": 2158 + }, + { + "epoch": 2.22119341563786, + "grad_norm": 4.120200157165527, + "learning_rate": 2.3747996262939545e-05, + "loss": 0.0834, + "step": 2159 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 6.008984088897705, + "learning_rate": 2.3732991824881802e-05, + "loss": 0.2205, + "step": 2160 + }, + { + "epoch": 2.2222222222222223, + "eval_Qnli-dev_cosine_accuracy": 0.71875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7424961924552917, + "eval_Qnli-dev_cosine_ap": 0.7690305002007699, + "eval_Qnli-dev_cosine_f1": 0.6987447698744769, + "eval_Qnli-dev_cosine_f1_threshold": 0.7424961924552917, + "eval_Qnli-dev_cosine_precision": 0.6900826446280992, + "eval_Qnli-dev_cosine_recall": 0.7076271186440678, + "eval_Qnli-dev_dot_accuracy": 0.6796875, + "eval_Qnli-dev_dot_accuracy_threshold": 345.74554443359375, + "eval_Qnli-dev_dot_ap": 0.7234396167600803, + "eval_Qnli-dev_dot_f1": 0.6786355475763016, + "eval_Qnli-dev_dot_f1_threshold": 308.107177734375, + "eval_Qnli-dev_dot_precision": 0.5887850467289719, + "eval_Qnli-dev_dot_recall": 0.8008474576271186, + "eval_Qnli-dev_euclidean_accuracy": 0.72265625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.977933883666992, + "eval_Qnli-dev_euclidean_ap": 0.775756805101809, + "eval_Qnli-dev_euclidean_f1": 0.7104247104247103, + "eval_Qnli-dev_euclidean_f1_threshold": 15.89659595489502, + "eval_Qnli-dev_euclidean_precision": 0.6524822695035462, + "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, + "eval_Qnli-dev_manhattan_accuracy": 0.71484375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 308.61138916015625, + "eval_Qnli-dev_manhattan_ap": 0.7777692408499661, + "eval_Qnli-dev_manhattan_f1": 0.710172744721689, + "eval_Qnli-dev_manhattan_f1_threshold": 335.52056884765625, + "eval_Qnli-dev_manhattan_precision": 0.6491228070175439, + "eval_Qnli-dev_manhattan_recall": 0.7838983050847458, + "eval_Qnli-dev_max_accuracy": 0.72265625, + "eval_Qnli-dev_max_accuracy_threshold": 345.74554443359375, + "eval_Qnli-dev_max_ap": 0.7777692408499661, + "eval_Qnli-dev_max_f1": 0.7104247104247103, + "eval_Qnli-dev_max_f1_threshold": 335.52056884765625, + "eval_Qnli-dev_max_precision": 0.6900826446280992, + "eval_Qnli-dev_max_recall": 0.8008474576271186, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.816895604133606, + "eval_allNLI-dev_cosine_ap": 0.6317297508629345, + "eval_allNLI-dev_cosine_f1": 0.6492374727668845, + "eval_allNLI-dev_cosine_f1_threshold": 0.7320500612258911, + "eval_allNLI-dev_cosine_precision": 0.5209790209790209, + "eval_allNLI-dev_cosine_recall": 0.861271676300578, + "eval_allNLI-dev_dot_accuracy": 0.705078125, + "eval_allNLI-dev_dot_accuracy_threshold": 345.56048583984375, + "eval_allNLI-dev_dot_ap": 0.5710281774303337, + "eval_allNLI-dev_dot_f1": 0.6117647058823529, + "eval_allNLI-dev_dot_f1_threshold": 325.12750244140625, + "eval_allNLI-dev_dot_precision": 0.5158730158730159, + "eval_allNLI-dev_dot_recall": 0.7514450867052023, + "eval_allNLI-dev_euclidean_accuracy": 0.7421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.781333923339844, + "eval_allNLI-dev_euclidean_ap": 0.6402188977328761, + "eval_allNLI-dev_euclidean_f1": 0.6510538641686183, + "eval_allNLI-dev_euclidean_f1_threshold": 14.694303512573242, + "eval_allNLI-dev_euclidean_precision": 0.547244094488189, + "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, + "eval_allNLI-dev_manhattan_accuracy": 0.740234375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 253.4004364013672, + "eval_allNLI-dev_manhattan_ap": 0.6386852941077191, + "eval_allNLI-dev_manhattan_f1": 0.6507592190889371, + "eval_allNLI-dev_manhattan_f1_threshold": 321.07574462890625, + "eval_allNLI-dev_manhattan_precision": 0.5208333333333334, + "eval_allNLI-dev_manhattan_recall": 0.8670520231213873, + "eval_allNLI-dev_max_accuracy": 0.7421875, + "eval_allNLI-dev_max_accuracy_threshold": 345.56048583984375, + "eval_allNLI-dev_max_ap": 0.6402188977328761, + "eval_allNLI-dev_max_f1": 0.6510538641686183, + "eval_allNLI-dev_max_f1_threshold": 325.12750244140625, + "eval_allNLI-dev_max_precision": 0.547244094488189, + "eval_allNLI-dev_max_recall": 0.8670520231213873, + "eval_sequential_score": 0.7777692408499661, + "eval_sts-test_pearson_cosine": 0.8503418350196617, + "eval_sts-test_pearson_dot": 0.8399142019186396, + "eval_sts-test_pearson_euclidean": 0.8776232485258656, + "eval_sts-test_pearson_manhattan": 0.8749053666923767, + "eval_sts-test_pearson_max": 0.8776232485258656, + "eval_sts-test_spearman_cosine": 0.8801646317064868, + "eval_sts-test_spearman_dot": 0.8413177174892843, + "eval_sts-test_spearman_euclidean": 0.8759278955367787, + "eval_sts-test_spearman_manhattan": 0.8726772534118303, + "eval_sts-test_spearman_max": 0.8801646317064868, + "eval_vitaminc-pairs_loss": 3.0154216289520264, + "eval_vitaminc-pairs_runtime": 3.2312, + "eval_vitaminc-pairs_samples_per_second": 39.614, + "eval_vitaminc-pairs_steps_per_second": 0.309, + "step": 2160 + }, + { + "epoch": 2.2222222222222223, + "eval_negation-triplets_loss": 0.9069598317146301, + "eval_negation-triplets_runtime": 0.7616, + "eval_negation-triplets_samples_per_second": 168.07, + "eval_negation-triplets_steps_per_second": 1.313, + "step": 2160 + }, + { + "epoch": 2.2222222222222223, + "eval_scitail-pairs-pos_loss": 0.11139774322509766, + "eval_scitail-pairs-pos_runtime": 0.911, + "eval_scitail-pairs-pos_samples_per_second": 140.499, + "eval_scitail-pairs-pos_steps_per_second": 1.098, + "step": 2160 + }, + { + "epoch": 2.2222222222222223, + "eval_scitail-pairs-qa_loss": 0.00041776278521865606, + "eval_scitail-pairs-qa_runtime": 0.6074, + "eval_scitail-pairs-qa_samples_per_second": 210.737, + "eval_scitail-pairs-qa_steps_per_second": 1.646, + "step": 2160 + }, + { + "epoch": 2.2222222222222223, + "eval_xsum-pairs_loss": 0.28882691264152527, + "eval_xsum-pairs_runtime": 3.0328, + "eval_xsum-pairs_samples_per_second": 42.205, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 2160 + }, + { + "epoch": 2.2222222222222223, + "eval_sciq_pairs_loss": 0.09604327380657196, + "eval_sciq_pairs_runtime": 3.561, + "eval_sciq_pairs_samples_per_second": 35.945, + "eval_sciq_pairs_steps_per_second": 0.281, + "step": 2160 + }, + { + "epoch": 2.2222222222222223, + "eval_qasc_pairs_loss": 0.14951874315738678, + "eval_qasc_pairs_runtime": 0.6248, + "eval_qasc_pairs_samples_per_second": 204.865, + "eval_qasc_pairs_steps_per_second": 1.601, + "step": 2160 + }, + { + "epoch": 2.2222222222222223, + "eval_openbookqa_pairs_loss": 0.7421671152114868, + "eval_openbookqa_pairs_runtime": 0.597, + "eval_openbookqa_pairs_samples_per_second": 214.403, + "eval_openbookqa_pairs_steps_per_second": 1.675, + "step": 2160 + }, + { + "epoch": 2.2222222222222223, + "eval_msmarco_pairs_loss": 0.8618593811988831, + "eval_msmarco_pairs_runtime": 1.5259, + "eval_msmarco_pairs_samples_per_second": 83.884, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 2160 + }, + { + "epoch": 2.2222222222222223, + "eval_nq_pairs_loss": 0.7470789551734924, + "eval_nq_pairs_runtime": 2.9046, + "eval_nq_pairs_samples_per_second": 44.068, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2160 + }, + { + "epoch": 2.2222222222222223, + "eval_trivia_pairs_loss": 0.9290419220924377, + "eval_trivia_pairs_runtime": 3.449, + "eval_trivia_pairs_samples_per_second": 37.112, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2160 + }, + { + "epoch": 2.2222222222222223, + "eval_gooaq_pairs_loss": 0.3941831886768341, + "eval_gooaq_pairs_runtime": 0.9612, + "eval_gooaq_pairs_samples_per_second": 133.174, + "eval_gooaq_pairs_steps_per_second": 1.04, + "step": 2160 + }, + { + "epoch": 2.2222222222222223, + "eval_paws-pos_loss": 0.022844700142741203, + "eval_paws-pos_runtime": 0.7045, + "eval_paws-pos_samples_per_second": 181.684, + "eval_paws-pos_steps_per_second": 1.419, + "step": 2160 + }, + { + "epoch": 2.2222222222222223, + "eval_global_dataset_loss": 0.4469379186630249, + "eval_global_dataset_runtime": 13.4019, + "eval_global_dataset_samples_per_second": 31.04, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2160 + }, + { + "epoch": 2.2232510288065845, + "grad_norm": 8.738439559936523, + "learning_rate": 2.3717986724966976e-05, + "loss": 0.5686, + "step": 2161 + }, + { + "epoch": 2.2242798353909463, + "grad_norm": 4.119027137756348, + "learning_rate": 2.3702980988044372e-05, + "loss": 0.1064, + "step": 2162 + }, + { + "epoch": 2.2253086419753085, + "grad_norm": 5.774399280548096, + "learning_rate": 2.3687974638964328e-05, + "loss": 0.3596, + "step": 2163 + }, + { + "epoch": 2.2263374485596708, + "grad_norm": 3.8991827964782715, + "learning_rate": 2.3672967702578222e-05, + "loss": 0.1035, + "step": 2164 + }, + { + "epoch": 2.227366255144033, + "grad_norm": 9.624704360961914, + "learning_rate": 2.3657960203738376e-05, + "loss": 0.4331, + "step": 2165 + }, + { + "epoch": 2.228395061728395, + "grad_norm": 0.12300071120262146, + "learning_rate": 2.364295216729806e-05, + "loss": 0.0016, + "step": 2166 + }, + { + "epoch": 2.2294238683127574, + "grad_norm": 0.8719635605812073, + "learning_rate": 2.362794361811144e-05, + "loss": 0.0151, + "step": 2167 + }, + { + "epoch": 2.230452674897119, + "grad_norm": 0.6506179571151733, + "learning_rate": 2.3612934581033514e-05, + "loss": 0.0137, + "step": 2168 + }, + { + "epoch": 2.2314814814814814, + "grad_norm": 9.637247085571289, + "learning_rate": 2.3597925080920098e-05, + "loss": 0.4622, + "step": 2169 + }, + { + "epoch": 2.2325102880658436, + "grad_norm": 0.01775330677628517, + "learning_rate": 2.3582915142627773e-05, + "loss": 0.0002, + "step": 2170 + }, + { + "epoch": 2.233539094650206, + "grad_norm": 5.711460590362549, + "learning_rate": 2.3567904791013857e-05, + "loss": 0.2227, + "step": 2171 + }, + { + "epoch": 2.234567901234568, + "grad_norm": 7.3310346603393555, + "learning_rate": 2.3552894050936323e-05, + "loss": 0.1894, + "step": 2172 + }, + { + "epoch": 2.23559670781893, + "grad_norm": 7.858341693878174, + "learning_rate": 2.3537882947253826e-05, + "loss": 0.3815, + "step": 2173 + }, + { + "epoch": 2.236625514403292, + "grad_norm": 0.04247892647981644, + "learning_rate": 2.3522871504825584e-05, + "loss": 0.0005, + "step": 2174 + }, + { + "epoch": 2.2376543209876543, + "grad_norm": 5.890376567840576, + "learning_rate": 2.3507859748511408e-05, + "loss": 0.3953, + "step": 2175 + }, + { + "epoch": 2.2386831275720165, + "grad_norm": 9.880730628967285, + "learning_rate": 2.3492847703171613e-05, + "loss": 0.3849, + "step": 2176 + }, + { + "epoch": 2.2397119341563787, + "grad_norm": 4.678164958953857, + "learning_rate": 2.3477835393666995e-05, + "loss": 0.1336, + "step": 2177 + }, + { + "epoch": 2.240740740740741, + "grad_norm": 8.650569915771484, + "learning_rate": 2.346282284485879e-05, + "loss": 0.3762, + "step": 2178 + }, + { + "epoch": 2.2417695473251027, + "grad_norm": 6.9241204261779785, + "learning_rate": 2.344781008160862e-05, + "loss": 0.2863, + "step": 2179 + }, + { + "epoch": 2.242798353909465, + "grad_norm": 4.224590301513672, + "learning_rate": 2.343279712877848e-05, + "loss": 0.0982, + "step": 2180 + }, + { + "epoch": 2.242798353909465, + "eval_Qnli-dev_cosine_accuracy": 0.724609375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7531611919403076, + "eval_Qnli-dev_cosine_ap": 0.7725679080287509, + "eval_Qnli-dev_cosine_f1": 0.7007299270072993, + "eval_Qnli-dev_cosine_f1_threshold": 0.6888394355773926, + "eval_Qnli-dev_cosine_precision": 0.6153846153846154, + "eval_Qnli-dev_cosine_recall": 0.8135593220338984, + "eval_Qnli-dev_dot_accuracy": 0.689453125, + "eval_Qnli-dev_dot_accuracy_threshold": 354.9976806640625, + "eval_Qnli-dev_dot_ap": 0.7329775964784278, + "eval_Qnli-dev_dot_f1": 0.6797153024911032, + "eval_Qnli-dev_dot_f1_threshold": 292.02471923828125, + "eval_Qnli-dev_dot_precision": 0.5858895705521472, + "eval_Qnli-dev_dot_recall": 0.809322033898305, + "eval_Qnli-dev_euclidean_accuracy": 0.72265625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.06117057800293, + "eval_Qnli-dev_euclidean_ap": 0.7781723429549365, + "eval_Qnli-dev_euclidean_f1": 0.7039337474120082, + "eval_Qnli-dev_euclidean_f1_threshold": 15.460293769836426, + "eval_Qnli-dev_euclidean_precision": 0.6882591093117408, + "eval_Qnli-dev_euclidean_recall": 0.7203389830508474, + "eval_Qnli-dev_manhattan_accuracy": 0.72265625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 308.5833740234375, + "eval_Qnli-dev_manhattan_ap": 0.7795451684351757, + "eval_Qnli-dev_manhattan_f1": 0.7035175879396984, + "eval_Qnli-dev_manhattan_f1_threshold": 364.977783203125, + "eval_Qnli-dev_manhattan_precision": 0.5817174515235457, + "eval_Qnli-dev_manhattan_recall": 0.8898305084745762, + "eval_Qnli-dev_max_accuracy": 0.724609375, + "eval_Qnli-dev_max_accuracy_threshold": 354.9976806640625, + "eval_Qnli-dev_max_ap": 0.7795451684351757, + "eval_Qnli-dev_max_f1": 0.7039337474120082, + "eval_Qnli-dev_max_f1_threshold": 364.977783203125, + "eval_Qnli-dev_max_precision": 0.6882591093117408, + "eval_Qnli-dev_max_recall": 0.8898305084745762, + "eval_allNLI-dev_cosine_accuracy": 0.7265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8510603308677673, + "eval_allNLI-dev_cosine_ap": 0.6330996470858502, + "eval_allNLI-dev_cosine_f1": 0.6413301662707839, + "eval_allNLI-dev_cosine_f1_threshold": 0.7421815395355225, + "eval_allNLI-dev_cosine_precision": 0.5443548387096774, + "eval_allNLI-dev_cosine_recall": 0.7803468208092486, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 333.49041748046875, + "eval_allNLI-dev_dot_ap": 0.5698646244511296, + "eval_allNLI-dev_dot_f1": 0.6167800453514739, + "eval_allNLI-dev_dot_f1_threshold": 306.33770751953125, + "eval_allNLI-dev_dot_precision": 0.5074626865671642, + "eval_allNLI-dev_dot_recall": 0.7861271676300579, + "eval_allNLI-dev_euclidean_accuracy": 0.73828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.0964994430542, + "eval_allNLI-dev_euclidean_ap": 0.6408181010160761, + "eval_allNLI-dev_euclidean_f1": 0.6411483253588517, + "eval_allNLI-dev_euclidean_f1_threshold": 14.643377304077148, + "eval_allNLI-dev_euclidean_precision": 0.5469387755102041, + "eval_allNLI-dev_euclidean_recall": 0.7745664739884393, + "eval_allNLI-dev_manhattan_accuracy": 0.7421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 270.64501953125, + "eval_allNLI-dev_manhattan_ap": 0.6407437219853288, + "eval_allNLI-dev_manhattan_f1": 0.6438356164383561, + "eval_allNLI-dev_manhattan_f1_threshold": 314.3695068359375, + "eval_allNLI-dev_manhattan_precision": 0.5320754716981132, + "eval_allNLI-dev_manhattan_recall": 0.815028901734104, + "eval_allNLI-dev_max_accuracy": 0.7421875, + "eval_allNLI-dev_max_accuracy_threshold": 333.49041748046875, + "eval_allNLI-dev_max_ap": 0.6408181010160761, + "eval_allNLI-dev_max_f1": 0.6438356164383561, + "eval_allNLI-dev_max_f1_threshold": 314.3695068359375, + "eval_allNLI-dev_max_precision": 0.5469387755102041, + "eval_allNLI-dev_max_recall": 0.815028901734104, + "eval_sequential_score": 0.7795451684351757, + "eval_sts-test_pearson_cosine": 0.8488464435945837, + "eval_sts-test_pearson_dot": 0.8320500172324081, + "eval_sts-test_pearson_euclidean": 0.8751944024128597, + "eval_sts-test_pearson_manhattan": 0.8731738889750866, + "eval_sts-test_pearson_max": 0.8751944024128597, + "eval_sts-test_spearman_cosine": 0.8770244804459136, + "eval_sts-test_spearman_dot": 0.8288539989279042, + "eval_sts-test_spearman_euclidean": 0.8723124858802291, + "eval_sts-test_spearman_manhattan": 0.8698531803104647, + "eval_sts-test_spearman_max": 0.8770244804459136, + "eval_vitaminc-pairs_loss": 2.9559402465820312, + "eval_vitaminc-pairs_runtime": 3.2153, + "eval_vitaminc-pairs_samples_per_second": 39.809, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 2180 + }, + { + "epoch": 2.242798353909465, + "eval_negation-triplets_loss": 0.9456331729888916, + "eval_negation-triplets_runtime": 0.7645, + "eval_negation-triplets_samples_per_second": 167.42, + "eval_negation-triplets_steps_per_second": 1.308, + "step": 2180 + }, + { + "epoch": 2.242798353909465, + "eval_scitail-pairs-pos_loss": 0.14271265268325806, + "eval_scitail-pairs-pos_runtime": 0.8996, + "eval_scitail-pairs-pos_samples_per_second": 142.286, + "eval_scitail-pairs-pos_steps_per_second": 1.112, + "step": 2180 + }, + { + "epoch": 2.242798353909465, + "eval_scitail-pairs-qa_loss": 0.00024114079133141786, + "eval_scitail-pairs-qa_runtime": 0.6074, + "eval_scitail-pairs-qa_samples_per_second": 210.732, + "eval_scitail-pairs-qa_steps_per_second": 1.646, + "step": 2180 + }, + { + "epoch": 2.242798353909465, + "eval_xsum-pairs_loss": 0.2774715721607208, + "eval_xsum-pairs_runtime": 3.0374, + "eval_xsum-pairs_samples_per_second": 42.141, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2180 + }, + { + "epoch": 2.242798353909465, + "eval_sciq_pairs_loss": 0.10312435775995255, + "eval_sciq_pairs_runtime": 3.5401, + "eval_sciq_pairs_samples_per_second": 36.157, + "eval_sciq_pairs_steps_per_second": 0.282, + "step": 2180 + }, + { + "epoch": 2.242798353909465, + "eval_qasc_pairs_loss": 0.15675711631774902, + "eval_qasc_pairs_runtime": 0.6209, + "eval_qasc_pairs_samples_per_second": 206.166, + "eval_qasc_pairs_steps_per_second": 1.611, + "step": 2180 + }, + { + "epoch": 2.242798353909465, + "eval_openbookqa_pairs_loss": 0.726242184638977, + "eval_openbookqa_pairs_runtime": 0.5976, + "eval_openbookqa_pairs_samples_per_second": 214.199, + "eval_openbookqa_pairs_steps_per_second": 1.673, + "step": 2180 + }, + { + "epoch": 2.242798353909465, + "eval_msmarco_pairs_loss": 0.947030782699585, + "eval_msmarco_pairs_runtime": 1.5253, + "eval_msmarco_pairs_samples_per_second": 83.919, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 2180 + }, + { + "epoch": 2.242798353909465, + "eval_nq_pairs_loss": 0.6687220931053162, + "eval_nq_pairs_runtime": 2.9002, + "eval_nq_pairs_samples_per_second": 44.135, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 2180 + }, + { + "epoch": 2.242798353909465, + "eval_trivia_pairs_loss": 0.9305880069732666, + "eval_trivia_pairs_runtime": 3.4438, + "eval_trivia_pairs_samples_per_second": 37.169, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2180 + }, + { + "epoch": 2.242798353909465, + "eval_gooaq_pairs_loss": 0.35884031653404236, + "eval_gooaq_pairs_runtime": 0.9628, + "eval_gooaq_pairs_samples_per_second": 132.949, + "eval_gooaq_pairs_steps_per_second": 1.039, + "step": 2180 + }, + { + "epoch": 2.242798353909465, + "eval_paws-pos_loss": 0.023016272112727165, + "eval_paws-pos_runtime": 0.7091, + "eval_paws-pos_samples_per_second": 180.513, + "eval_paws-pos_steps_per_second": 1.41, + "step": 2180 + }, + { + "epoch": 2.242798353909465, + "eval_global_dataset_loss": 0.4351368546485901, + "eval_global_dataset_runtime": 13.3994, + "eval_global_dataset_samples_per_second": 31.046, + "eval_global_dataset_steps_per_second": 0.299, + "step": 2180 + }, + { + "epoch": 2.243827160493827, + "grad_norm": 3.153961658477783, + "learning_rate": 2.3417784011230672e-05, + "loss": 0.054, + "step": 2181 + }, + { + "epoch": 2.2448559670781894, + "grad_norm": 0.6234809160232544, + "learning_rate": 2.340277075382775e-05, + "loss": 0.0188, + "step": 2182 + }, + { + "epoch": 2.2458847736625516, + "grad_norm": 11.650721549987793, + "learning_rate": 2.3387757381432535e-05, + "loss": 0.624, + "step": 2183 + }, + { + "epoch": 2.246913580246914, + "grad_norm": 10.369563102722168, + "learning_rate": 2.337274391890802e-05, + "loss": 0.6085, + "step": 2184 + }, + { + "epoch": 2.2479423868312756, + "grad_norm": 8.880125045776367, + "learning_rate": 2.3357730391117345e-05, + "loss": 0.4456, + "step": 2185 + }, + { + "epoch": 2.248971193415638, + "grad_norm": 3.8903770446777344, + "learning_rate": 2.3342716822923764e-05, + "loss": 0.0507, + "step": 2186 + }, + { + "epoch": 2.25, + "grad_norm": 8.858869552612305, + "learning_rate": 2.3327703239190587e-05, + "loss": 0.3086, + "step": 2187 + }, + { + "epoch": 2.251028806584362, + "grad_norm": 3.737985372543335, + "learning_rate": 2.331268966478117e-05, + "loss": 0.0655, + "step": 2188 + }, + { + "epoch": 2.2520576131687244, + "grad_norm": 0.21452650427818298, + "learning_rate": 2.329767612455883e-05, + "loss": 0.0018, + "step": 2189 + }, + { + "epoch": 2.253086419753086, + "grad_norm": 4.297518253326416, + "learning_rate": 2.3282662643386853e-05, + "loss": 0.0847, + "step": 2190 + }, + { + "epoch": 2.2541152263374484, + "grad_norm": 5.4159770011901855, + "learning_rate": 2.3267649246128396e-05, + "loss": 0.1242, + "step": 2191 + }, + { + "epoch": 2.2551440329218106, + "grad_norm": 4.10284948348999, + "learning_rate": 2.3252635957646513e-05, + "loss": 0.0872, + "step": 2192 + }, + { + "epoch": 2.256172839506173, + "grad_norm": 3.423576831817627, + "learning_rate": 2.3237622802804044e-05, + "loss": 0.1335, + "step": 2193 + }, + { + "epoch": 2.257201646090535, + "grad_norm": 0.31105852127075195, + "learning_rate": 2.3222609806463642e-05, + "loss": 0.0086, + "step": 2194 + }, + { + "epoch": 2.2582304526748973, + "grad_norm": 3.8018033504486084, + "learning_rate": 2.3207596993487648e-05, + "loss": 0.1193, + "step": 2195 + }, + { + "epoch": 2.259259259259259, + "grad_norm": 3.9367120265960693, + "learning_rate": 2.3192584388738163e-05, + "loss": 0.0772, + "step": 2196 + }, + { + "epoch": 2.2602880658436213, + "grad_norm": 16.381790161132812, + "learning_rate": 2.3177572017076892e-05, + "loss": 2.2076, + "step": 2197 + }, + { + "epoch": 2.2613168724279835, + "grad_norm": 11.494051933288574, + "learning_rate": 2.316255990336517e-05, + "loss": 0.6439, + "step": 2198 + }, + { + "epoch": 2.2623456790123457, + "grad_norm": 7.255645751953125, + "learning_rate": 2.314754807246392e-05, + "loss": 0.5676, + "step": 2199 + }, + { + "epoch": 2.263374485596708, + "grad_norm": 7.189925193786621, + "learning_rate": 2.3132536549233565e-05, + "loss": 0.3284, + "step": 2200 + }, + { + "epoch": 2.263374485596708, + "eval_Qnli-dev_cosine_accuracy": 0.708984375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7566676139831543, + "eval_Qnli-dev_cosine_ap": 0.7640013905505368, + "eval_Qnli-dev_cosine_f1": 0.7085714285714286, + "eval_Qnli-dev_cosine_f1_threshold": 0.7158597707748413, + "eval_Qnli-dev_cosine_precision": 0.643598615916955, + "eval_Qnli-dev_cosine_recall": 0.788135593220339, + "eval_Qnli-dev_dot_accuracy": 0.671875, + "eval_Qnli-dev_dot_accuracy_threshold": 362.87335205078125, + "eval_Qnli-dev_dot_ap": 0.7091137345101766, + "eval_Qnli-dev_dot_f1": 0.6744186046511628, + "eval_Qnli-dev_dot_f1_threshold": 289.0480041503906, + "eval_Qnli-dev_dot_precision": 0.5546448087431693, + "eval_Qnli-dev_dot_recall": 0.8601694915254238, + "eval_Qnli-dev_euclidean_accuracy": 0.71484375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.758613586425781, + "eval_Qnli-dev_euclidean_ap": 0.7709525163079047, + "eval_Qnli-dev_euclidean_f1": 0.7072243346007605, + "eval_Qnli-dev_euclidean_f1_threshold": 15.905702590942383, + "eval_Qnli-dev_euclidean_precision": 0.6413793103448275, + "eval_Qnli-dev_euclidean_recall": 0.788135593220339, + "eval_Qnli-dev_manhattan_accuracy": 0.71875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 297.7829284667969, + "eval_Qnli-dev_manhattan_ap": 0.7734867279761796, + "eval_Qnli-dev_manhattan_f1": 0.7137681159420289, + "eval_Qnli-dev_manhattan_f1_threshold": 344.4150695800781, + "eval_Qnli-dev_manhattan_precision": 0.6234177215189873, + "eval_Qnli-dev_manhattan_recall": 0.8347457627118644, + "eval_Qnli-dev_max_accuracy": 0.71875, + "eval_Qnli-dev_max_accuracy_threshold": 362.87335205078125, + "eval_Qnli-dev_max_ap": 0.7734867279761796, + "eval_Qnli-dev_max_f1": 0.7137681159420289, + "eval_Qnli-dev_max_f1_threshold": 344.4150695800781, + "eval_Qnli-dev_max_precision": 0.643598615916955, + "eval_Qnli-dev_max_recall": 0.8601694915254238, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8046222925186157, + "eval_allNLI-dev_cosine_ap": 0.6355167479239126, + "eval_allNLI-dev_cosine_f1": 0.6329670329670329, + "eval_allNLI-dev_cosine_f1_threshold": 0.7259559631347656, + "eval_allNLI-dev_cosine_precision": 0.5106382978723404, + "eval_allNLI-dev_cosine_recall": 0.8323699421965318, + "eval_allNLI-dev_dot_accuracy": 0.705078125, + "eval_allNLI-dev_dot_accuracy_threshold": 392.41937255859375, + "eval_allNLI-dev_dot_ap": 0.5725432880092478, + "eval_allNLI-dev_dot_f1": 0.6116504854368932, + "eval_allNLI-dev_dot_f1_threshold": 312.41143798828125, + "eval_allNLI-dev_dot_precision": 0.5271966527196653, + "eval_allNLI-dev_dot_recall": 0.7283236994219653, + "eval_allNLI-dev_euclidean_accuracy": 0.748046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.786861419677734, + "eval_allNLI-dev_euclidean_ap": 0.6435132670771473, + "eval_allNLI-dev_euclidean_f1": 0.6423982869379016, + "eval_allNLI-dev_euclidean_f1_threshold": 15.413522720336914, + "eval_allNLI-dev_euclidean_precision": 0.5102040816326531, + "eval_allNLI-dev_euclidean_recall": 0.8670520231213873, + "eval_allNLI-dev_manhattan_accuracy": 0.740234375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 270.7494812011719, + "eval_allNLI-dev_manhattan_ap": 0.640275133606661, + "eval_allNLI-dev_manhattan_f1": 0.6422413793103448, + "eval_allNLI-dev_manhattan_f1_threshold": 320.50738525390625, + "eval_allNLI-dev_manhattan_precision": 0.5120274914089347, + "eval_allNLI-dev_manhattan_recall": 0.861271676300578, + "eval_allNLI-dev_max_accuracy": 0.748046875, + "eval_allNLI-dev_max_accuracy_threshold": 392.41937255859375, + "eval_allNLI-dev_max_ap": 0.6435132670771473, + "eval_allNLI-dev_max_f1": 0.6423982869379016, + "eval_allNLI-dev_max_f1_threshold": 320.50738525390625, + "eval_allNLI-dev_max_precision": 0.5271966527196653, + "eval_allNLI-dev_max_recall": 0.8670520231213873, + "eval_sequential_score": 0.7734867279761796, + "eval_sts-test_pearson_cosine": 0.8508751459497785, + "eval_sts-test_pearson_dot": 0.8346471700814747, + "eval_sts-test_pearson_euclidean": 0.8763811495016889, + "eval_sts-test_pearson_manhattan": 0.874745695018937, + "eval_sts-test_pearson_max": 0.8763811495016889, + "eval_sts-test_spearman_cosine": 0.8763064464568886, + "eval_sts-test_spearman_dot": 0.8267857746314393, + "eval_sts-test_spearman_euclidean": 0.8713593287638537, + "eval_sts-test_spearman_manhattan": 0.8700427914049802, + "eval_sts-test_spearman_max": 0.8763064464568886, + "eval_vitaminc-pairs_loss": 3.0157594680786133, + "eval_vitaminc-pairs_runtime": 3.2341, + "eval_vitaminc-pairs_samples_per_second": 39.579, + "eval_vitaminc-pairs_steps_per_second": 0.309, + "step": 2200 + }, + { + "epoch": 2.263374485596708, + "eval_negation-triplets_loss": 0.9241347312927246, + "eval_negation-triplets_runtime": 0.7772, + "eval_negation-triplets_samples_per_second": 164.684, + "eval_negation-triplets_steps_per_second": 1.287, + "step": 2200 + }, + { + "epoch": 2.263374485596708, + "eval_scitail-pairs-pos_loss": 0.13951894640922546, + "eval_scitail-pairs-pos_runtime": 0.9917, + "eval_scitail-pairs-pos_samples_per_second": 129.078, + "eval_scitail-pairs-pos_steps_per_second": 1.008, + "step": 2200 + }, + { + "epoch": 2.263374485596708, + "eval_scitail-pairs-qa_loss": 0.0003016136179212481, + "eval_scitail-pairs-qa_runtime": 0.6291, + "eval_scitail-pairs-qa_samples_per_second": 203.454, + "eval_scitail-pairs-qa_steps_per_second": 1.589, + "step": 2200 + }, + { + "epoch": 2.263374485596708, + "eval_xsum-pairs_loss": 0.2963438034057617, + "eval_xsum-pairs_runtime": 3.0494, + "eval_xsum-pairs_samples_per_second": 41.976, + "eval_xsum-pairs_steps_per_second": 0.328, + "step": 2200 + }, + { + "epoch": 2.263374485596708, + "eval_sciq_pairs_loss": 0.103203684091568, + "eval_sciq_pairs_runtime": 3.5416, + "eval_sciq_pairs_samples_per_second": 36.142, + "eval_sciq_pairs_steps_per_second": 0.282, + "step": 2200 + }, + { + "epoch": 2.263374485596708, + "eval_qasc_pairs_loss": 0.16400018334388733, + "eval_qasc_pairs_runtime": 0.6278, + "eval_qasc_pairs_samples_per_second": 203.89, + "eval_qasc_pairs_steps_per_second": 1.593, + "step": 2200 + }, + { + "epoch": 2.263374485596708, + "eval_openbookqa_pairs_loss": 0.7553422451019287, + "eval_openbookqa_pairs_runtime": 0.6066, + "eval_openbookqa_pairs_samples_per_second": 211.012, + "eval_openbookqa_pairs_steps_per_second": 1.649, + "step": 2200 + }, + { + "epoch": 2.263374485596708, + "eval_msmarco_pairs_loss": 0.8942117094993591, + "eval_msmarco_pairs_runtime": 1.5404, + "eval_msmarco_pairs_samples_per_second": 83.096, + "eval_msmarco_pairs_steps_per_second": 0.649, + "step": 2200 + }, + { + "epoch": 2.263374485596708, + "eval_nq_pairs_loss": 0.6588362455368042, + "eval_nq_pairs_runtime": 2.9136, + "eval_nq_pairs_samples_per_second": 43.932, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 2200 + }, + { + "epoch": 2.263374485596708, + "eval_trivia_pairs_loss": 0.8141089081764221, + "eval_trivia_pairs_runtime": 3.4421, + "eval_trivia_pairs_samples_per_second": 37.187, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 2200 + }, + { + "epoch": 2.263374485596708, + "eval_gooaq_pairs_loss": 0.34560713171958923, + "eval_gooaq_pairs_runtime": 0.9596, + "eval_gooaq_pairs_samples_per_second": 133.39, + "eval_gooaq_pairs_steps_per_second": 1.042, + "step": 2200 + }, + { + "epoch": 2.263374485596708, + "eval_paws-pos_loss": 0.023279238492250443, + "eval_paws-pos_runtime": 0.7179, + "eval_paws-pos_samples_per_second": 178.304, + "eval_paws-pos_steps_per_second": 1.393, + "step": 2200 + }, + { + "epoch": 2.263374485596708, + "eval_global_dataset_loss": 0.43096092343330383, + "eval_global_dataset_runtime": 13.4413, + "eval_global_dataset_samples_per_second": 30.949, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2200 + }, + { + "epoch": 2.26440329218107, + "grad_norm": 2.2469570636749268, + "learning_rate": 2.3117525358534053e-05, + "loss": 0.0701, + "step": 2201 + }, + { + "epoch": 2.265432098765432, + "grad_norm": 5.729778289794922, + "learning_rate": 2.3102514525224764e-05, + "loss": 0.1448, + "step": 2202 + }, + { + "epoch": 2.266460905349794, + "grad_norm": 11.240240097045898, + "learning_rate": 2.3087504074164473e-05, + "loss": 0.8315, + "step": 2203 + }, + { + "epoch": 2.2674897119341564, + "grad_norm": 11.073486328125, + "learning_rate": 2.307249403021135e-05, + "loss": 0.6518, + "step": 2204 + }, + { + "epoch": 2.2685185185185186, + "grad_norm": 5.503248691558838, + "learning_rate": 2.3057484418222877e-05, + "loss": 0.1358, + "step": 2205 + }, + { + "epoch": 2.269547325102881, + "grad_norm": 2.9899778366088867, + "learning_rate": 2.3042475263055816e-05, + "loss": 0.0593, + "step": 2206 + }, + { + "epoch": 2.2705761316872426, + "grad_norm": 4.398398399353027, + "learning_rate": 2.3027466589566173e-05, + "loss": 0.1171, + "step": 2207 + }, + { + "epoch": 2.271604938271605, + "grad_norm": 8.233288764953613, + "learning_rate": 2.3012458422609178e-05, + "loss": 0.3938, + "step": 2208 + }, + { + "epoch": 2.272633744855967, + "grad_norm": 5.72066068649292, + "learning_rate": 2.2997450787039184e-05, + "loss": 0.1561, + "step": 2209 + }, + { + "epoch": 2.2736625514403292, + "grad_norm": 4.385392189025879, + "learning_rate": 2.2982443707709698e-05, + "loss": 0.1494, + "step": 2210 + }, + { + "epoch": 2.2746913580246915, + "grad_norm": 4.462737083435059, + "learning_rate": 2.296743720947328e-05, + "loss": 0.1013, + "step": 2211 + }, + { + "epoch": 2.2757201646090537, + "grad_norm": 3.73474383354187, + "learning_rate": 2.295243131718156e-05, + "loss": 0.1059, + "step": 2212 + }, + { + "epoch": 2.2767489711934155, + "grad_norm": 9.668663024902344, + "learning_rate": 2.293742605568513e-05, + "loss": 0.652, + "step": 2213 + }, + { + "epoch": 2.2777777777777777, + "grad_norm": 7.706223964691162, + "learning_rate": 2.2922421449833554e-05, + "loss": 0.4134, + "step": 2214 + }, + { + "epoch": 2.27880658436214, + "grad_norm": 6.752259254455566, + "learning_rate": 2.290741752447531e-05, + "loss": 0.2024, + "step": 2215 + }, + { + "epoch": 2.279835390946502, + "grad_norm": 1.5901849269866943, + "learning_rate": 2.2892414304457746e-05, + "loss": 0.0232, + "step": 2216 + }, + { + "epoch": 2.2808641975308643, + "grad_norm": 4.390174865722656, + "learning_rate": 2.2877411814627048e-05, + "loss": 0.1113, + "step": 2217 + }, + { + "epoch": 2.2818930041152266, + "grad_norm": 9.297547340393066, + "learning_rate": 2.2862410079828187e-05, + "loss": 0.4733, + "step": 2218 + }, + { + "epoch": 2.2829218106995883, + "grad_norm": 5.782696723937988, + "learning_rate": 2.2847409124904876e-05, + "loss": 0.2747, + "step": 2219 + }, + { + "epoch": 2.2839506172839505, + "grad_norm": 7.616298675537109, + "learning_rate": 2.2832408974699553e-05, + "loss": 0.3177, + "step": 2220 + }, + { + "epoch": 2.2839506172839505, + "eval_Qnli-dev_cosine_accuracy": 0.712890625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7431902885437012, + "eval_Qnli-dev_cosine_ap": 0.7633843829363348, + "eval_Qnli-dev_cosine_f1": 0.7148148148148147, + "eval_Qnli-dev_cosine_f1_threshold": 0.7029442191123962, + "eval_Qnli-dev_cosine_precision": 0.6348684210526315, + "eval_Qnli-dev_cosine_recall": 0.8177966101694916, + "eval_Qnli-dev_dot_accuracy": 0.6796875, + "eval_Qnli-dev_dot_accuracy_threshold": 345.458251953125, + "eval_Qnli-dev_dot_ap": 0.7175678328566067, + "eval_Qnli-dev_dot_f1": 0.6727272727272727, + "eval_Qnli-dev_dot_f1_threshold": 265.48504638671875, + "eval_Qnli-dev_dot_precision": 0.5235849056603774, + "eval_Qnli-dev_dot_recall": 0.940677966101695, + "eval_Qnli-dev_euclidean_accuracy": 0.716796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.796134948730469, + "eval_Qnli-dev_euclidean_ap": 0.7693916423310228, + "eval_Qnli-dev_euclidean_f1": 0.7173489278752436, + "eval_Qnli-dev_euclidean_f1_threshold": 15.845483779907227, + "eval_Qnli-dev_euclidean_precision": 0.6642599277978339, + "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, + "eval_Qnli-dev_manhattan_accuracy": 0.716796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 333.9942321777344, + "eval_Qnli-dev_manhattan_ap": 0.7722873766535558, + "eval_Qnli-dev_manhattan_f1": 0.7195357833655707, + "eval_Qnli-dev_manhattan_f1_threshold": 334.17718505859375, + "eval_Qnli-dev_manhattan_precision": 0.6619217081850534, + "eval_Qnli-dev_manhattan_recall": 0.788135593220339, + "eval_Qnli-dev_max_accuracy": 0.716796875, + "eval_Qnli-dev_max_accuracy_threshold": 345.458251953125, + "eval_Qnli-dev_max_ap": 0.7722873766535558, + "eval_Qnli-dev_max_f1": 0.7195357833655707, + "eval_Qnli-dev_max_f1_threshold": 334.17718505859375, + "eval_Qnli-dev_max_precision": 0.6642599277978339, + "eval_Qnli-dev_max_recall": 0.940677966101695, + "eval_allNLI-dev_cosine_accuracy": 0.736328125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8010204434394836, + "eval_allNLI-dev_cosine_ap": 0.6357351561568579, + "eval_allNLI-dev_cosine_f1": 0.6347826086956523, + "eval_allNLI-dev_cosine_f1_threshold": 0.721085786819458, + "eval_allNLI-dev_cosine_precision": 0.5087108013937283, + "eval_allNLI-dev_cosine_recall": 0.8439306358381503, + "eval_allNLI-dev_dot_accuracy": 0.701171875, + "eval_allNLI-dev_dot_accuracy_threshold": 389.8740539550781, + "eval_allNLI-dev_dot_ap": 0.5720718350764233, + "eval_allNLI-dev_dot_f1": 0.6068965517241378, + "eval_allNLI-dev_dot_f1_threshold": 307.6630859375, + "eval_allNLI-dev_dot_precision": 0.5038167938931297, + "eval_allNLI-dev_dot_recall": 0.7630057803468208, + "eval_allNLI-dev_euclidean_accuracy": 0.74609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.75674819946289, + "eval_allNLI-dev_euclidean_ap": 0.6437870159604936, + "eval_allNLI-dev_euclidean_f1": 0.6318082788671023, + "eval_allNLI-dev_euclidean_f1_threshold": 15.444945335388184, + "eval_allNLI-dev_euclidean_precision": 0.506993006993007, + "eval_allNLI-dev_euclidean_recall": 0.838150289017341, + "eval_allNLI-dev_manhattan_accuracy": 0.744140625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 274.901611328125, + "eval_allNLI-dev_manhattan_ap": 0.6431584559956899, + "eval_allNLI-dev_manhattan_f1": 0.6379310344827586, + "eval_allNLI-dev_manhattan_f1_threshold": 325.1876220703125, + "eval_allNLI-dev_manhattan_precision": 0.5085910652920962, + "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, + "eval_allNLI-dev_max_accuracy": 0.74609375, + "eval_allNLI-dev_max_accuracy_threshold": 389.8740539550781, + "eval_allNLI-dev_max_ap": 0.6437870159604936, + "eval_allNLI-dev_max_f1": 0.6379310344827586, + "eval_allNLI-dev_max_f1_threshold": 325.1876220703125, + "eval_allNLI-dev_max_precision": 0.5087108013937283, + "eval_allNLI-dev_max_recall": 0.8554913294797688, + "eval_sequential_score": 0.7722873766535558, + "eval_sts-test_pearson_cosine": 0.8490522214967452, + "eval_sts-test_pearson_dot": 0.8303687950150636, + "eval_sts-test_pearson_euclidean": 0.8776849450029162, + "eval_sts-test_pearson_manhattan": 0.8753737515387936, + "eval_sts-test_pearson_max": 0.8776849450029162, + "eval_sts-test_spearman_cosine": 0.8763409496794213, + "eval_sts-test_spearman_dot": 0.8277738824829615, + "eval_sts-test_spearman_euclidean": 0.8735485157198595, + "eval_sts-test_spearman_manhattan": 0.8715688989209495, + "eval_sts-test_spearman_max": 0.8763409496794213, + "eval_vitaminc-pairs_loss": 2.9399707317352295, + "eval_vitaminc-pairs_runtime": 3.227, + "eval_vitaminc-pairs_samples_per_second": 39.665, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 2220 + }, + { + "epoch": 2.2839506172839505, + "eval_negation-triplets_loss": 0.9159117341041565, + "eval_negation-triplets_runtime": 0.7877, + "eval_negation-triplets_samples_per_second": 162.497, + "eval_negation-triplets_steps_per_second": 1.27, + "step": 2220 + }, + { + "epoch": 2.2839506172839505, + "eval_scitail-pairs-pos_loss": 0.1581612527370453, + "eval_scitail-pairs-pos_runtime": 0.9105, + "eval_scitail-pairs-pos_samples_per_second": 140.58, + "eval_scitail-pairs-pos_steps_per_second": 1.098, + "step": 2220 + }, + { + "epoch": 2.2839506172839505, + "eval_scitail-pairs-qa_loss": 0.0003244028484914452, + "eval_scitail-pairs-qa_runtime": 0.6082, + "eval_scitail-pairs-qa_samples_per_second": 210.447, + "eval_scitail-pairs-qa_steps_per_second": 1.644, + "step": 2220 + }, + { + "epoch": 2.2839506172839505, + "eval_xsum-pairs_loss": 0.22244523465633392, + "eval_xsum-pairs_runtime": 3.0427, + "eval_xsum-pairs_samples_per_second": 42.068, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2220 + }, + { + "epoch": 2.2839506172839505, + "eval_sciq_pairs_loss": 0.09049579501152039, + "eval_sciq_pairs_runtime": 3.5159, + "eval_sciq_pairs_samples_per_second": 36.406, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 2220 + }, + { + "epoch": 2.2839506172839505, + "eval_qasc_pairs_loss": 0.1519252359867096, + "eval_qasc_pairs_runtime": 0.627, + "eval_qasc_pairs_samples_per_second": 204.131, + "eval_qasc_pairs_steps_per_second": 1.595, + "step": 2220 + }, + { + "epoch": 2.2839506172839505, + "eval_openbookqa_pairs_loss": 0.6787049174308777, + "eval_openbookqa_pairs_runtime": 0.6152, + "eval_openbookqa_pairs_samples_per_second": 208.067, + "eval_openbookqa_pairs_steps_per_second": 1.626, + "step": 2220 + }, + { + "epoch": 2.2839506172839505, + "eval_msmarco_pairs_loss": 0.841157853603363, + "eval_msmarco_pairs_runtime": 1.5315, + "eval_msmarco_pairs_samples_per_second": 83.578, + "eval_msmarco_pairs_steps_per_second": 0.653, + "step": 2220 + }, + { + "epoch": 2.2839506172839505, + "eval_nq_pairs_loss": 0.6983832716941833, + "eval_nq_pairs_runtime": 2.9231, + "eval_nq_pairs_samples_per_second": 43.788, + "eval_nq_pairs_steps_per_second": 0.342, + "step": 2220 + }, + { + "epoch": 2.2839506172839505, + "eval_trivia_pairs_loss": 0.7856079339981079, + "eval_trivia_pairs_runtime": 3.4531, + "eval_trivia_pairs_samples_per_second": 37.069, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2220 + }, + { + "epoch": 2.2839506172839505, + "eval_gooaq_pairs_loss": 0.3422422707080841, + "eval_gooaq_pairs_runtime": 0.9637, + "eval_gooaq_pairs_samples_per_second": 132.816, + "eval_gooaq_pairs_steps_per_second": 1.038, + "step": 2220 + }, + { + "epoch": 2.2839506172839505, + "eval_paws-pos_loss": 0.022888919338583946, + "eval_paws-pos_runtime": 0.7095, + "eval_paws-pos_samples_per_second": 180.407, + "eval_paws-pos_steps_per_second": 1.409, + "step": 2220 + }, + { + "epoch": 2.2839506172839505, + "eval_global_dataset_loss": 0.4147057831287384, + "eval_global_dataset_runtime": 13.4279, + "eval_global_dataset_samples_per_second": 30.98, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2220 + }, + { + "epoch": 2.2849794238683128, + "grad_norm": 4.396790027618408, + "learning_rate": 2.2817409654053316e-05, + "loss": 0.1144, + "step": 2221 + }, + { + "epoch": 2.286008230452675, + "grad_norm": 3.684896230697632, + "learning_rate": 2.280241118780589e-05, + "loss": 0.0587, + "step": 2222 + }, + { + "epoch": 2.287037037037037, + "grad_norm": 0.7990652322769165, + "learning_rate": 2.278741360079558e-05, + "loss": 0.0078, + "step": 2223 + }, + { + "epoch": 2.288065843621399, + "grad_norm": 8.240120887756348, + "learning_rate": 2.277241691785924e-05, + "loss": 0.4262, + "step": 2224 + }, + { + "epoch": 2.289094650205761, + "grad_norm": 1.9743107557296753, + "learning_rate": 2.2757421163832226e-05, + "loss": 0.0242, + "step": 2225 + }, + { + "epoch": 2.2901234567901234, + "grad_norm": 0.4200769066810608, + "learning_rate": 2.274242636354835e-05, + "loss": 0.0203, + "step": 2226 + }, + { + "epoch": 2.2911522633744856, + "grad_norm": 4.178246021270752, + "learning_rate": 2.272743254183986e-05, + "loss": 0.1026, + "step": 2227 + }, + { + "epoch": 2.292181069958848, + "grad_norm": 10.072691917419434, + "learning_rate": 2.2712439723537363e-05, + "loss": 0.464, + "step": 2228 + }, + { + "epoch": 2.29320987654321, + "grad_norm": 4.121541500091553, + "learning_rate": 2.2697447933469823e-05, + "loss": 0.0832, + "step": 2229 + }, + { + "epoch": 2.294238683127572, + "grad_norm": 8.78761100769043, + "learning_rate": 2.2682457196464492e-05, + "loss": 0.3638, + "step": 2230 + }, + { + "epoch": 2.295267489711934, + "grad_norm": 4.9234299659729, + "learning_rate": 2.2667467537346872e-05, + "loss": 0.2132, + "step": 2231 + }, + { + "epoch": 2.2962962962962963, + "grad_norm": 7.4635701179504395, + "learning_rate": 2.2652478980940688e-05, + "loss": 0.3787, + "step": 2232 + }, + { + "epoch": 2.2973251028806585, + "grad_norm": 15.064518928527832, + "learning_rate": 2.2637491552067842e-05, + "loss": 1.8201, + "step": 2233 + }, + { + "epoch": 2.2983539094650207, + "grad_norm": 5.404539108276367, + "learning_rate": 2.262250527554837e-05, + "loss": 0.1351, + "step": 2234 + }, + { + "epoch": 2.299382716049383, + "grad_norm": 0.02893410064280033, + "learning_rate": 2.2607520176200378e-05, + "loss": 0.0003, + "step": 2235 + }, + { + "epoch": 2.3004115226337447, + "grad_norm": 6.287380695343018, + "learning_rate": 2.2592536278840047e-05, + "loss": 0.2763, + "step": 2236 + }, + { + "epoch": 2.301440329218107, + "grad_norm": 4.5157389640808105, + "learning_rate": 2.257755360828156e-05, + "loss": 0.1501, + "step": 2237 + }, + { + "epoch": 2.302469135802469, + "grad_norm": 7.763871669769287, + "learning_rate": 2.2562572189337068e-05, + "loss": 0.3872, + "step": 2238 + }, + { + "epoch": 2.3034979423868314, + "grad_norm": 0.24251669645309448, + "learning_rate": 2.2547592046816648e-05, + "loss": 0.0036, + "step": 2239 + }, + { + "epoch": 2.3045267489711936, + "grad_norm": 4.772953033447266, + "learning_rate": 2.253261320552826e-05, + "loss": 0.0906, + "step": 2240 + }, + { + "epoch": 2.3045267489711936, + "eval_Qnli-dev_cosine_accuracy": 0.70703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7303512096405029, + "eval_Qnli-dev_cosine_ap": 0.7515026165638969, + "eval_Qnli-dev_cosine_f1": 0.7020872865275143, + "eval_Qnli-dev_cosine_f1_threshold": 0.6963032484054565, + "eval_Qnli-dev_cosine_precision": 0.6357388316151202, + "eval_Qnli-dev_cosine_recall": 0.7838983050847458, + "eval_Qnli-dev_dot_accuracy": 0.67578125, + "eval_Qnli-dev_dot_accuracy_threshold": 339.2628479003906, + "eval_Qnli-dev_dot_ap": 0.7090629129430073, + "eval_Qnli-dev_dot_f1": 0.6728624535315985, + "eval_Qnli-dev_dot_f1_threshold": 305.44061279296875, + "eval_Qnli-dev_dot_precision": 0.5993377483443708, + "eval_Qnli-dev_dot_recall": 0.7669491525423728, + "eval_Qnli-dev_euclidean_accuracy": 0.708984375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.274980545043945, + "eval_Qnli-dev_euclidean_ap": 0.7594778709350474, + "eval_Qnli-dev_euclidean_f1": 0.7017543859649122, + "eval_Qnli-dev_euclidean_f1_threshold": 16.277881622314453, + "eval_Qnli-dev_euclidean_precision": 0.6498194945848376, + "eval_Qnli-dev_euclidean_recall": 0.7627118644067796, + "eval_Qnli-dev_manhattan_accuracy": 0.705078125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 330.57635498046875, + "eval_Qnli-dev_manhattan_ap": 0.7623445246610333, + "eval_Qnli-dev_manhattan_f1": 0.7102803738317757, + "eval_Qnli-dev_manhattan_f1_threshold": 347.587890625, + "eval_Qnli-dev_manhattan_precision": 0.6354515050167224, + "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 339.2628479003906, + "eval_Qnli-dev_max_ap": 0.7623445246610333, + "eval_Qnli-dev_max_f1": 0.7102803738317757, + "eval_Qnli-dev_max_f1_threshold": 347.587890625, + "eval_Qnli-dev_max_precision": 0.6498194945848376, + "eval_Qnli-dev_max_recall": 0.8050847457627118, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.800418496131897, + "eval_allNLI-dev_cosine_ap": 0.6273045426657029, + "eval_allNLI-dev_cosine_f1": 0.6303854875283447, + "eval_allNLI-dev_cosine_f1_threshold": 0.721612811088562, + "eval_allNLI-dev_cosine_precision": 0.5186567164179104, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.705078125, + "eval_allNLI-dev_dot_accuracy_threshold": 350.8966064453125, + "eval_allNLI-dev_dot_ap": 0.5661295928549741, + "eval_allNLI-dev_dot_f1": 0.5956521739130435, + "eval_allNLI-dev_dot_f1_threshold": 291.92437744140625, + "eval_allNLI-dev_dot_precision": 0.47735191637630664, + "eval_allNLI-dev_dot_recall": 0.791907514450867, + "eval_allNLI-dev_euclidean_accuracy": 0.7421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.805671691894531, + "eval_allNLI-dev_euclidean_ap": 0.6372099149848509, + "eval_allNLI-dev_euclidean_f1": 0.6306695464362851, + "eval_allNLI-dev_euclidean_f1_threshold": 15.717982292175293, + "eval_allNLI-dev_euclidean_precision": 0.503448275862069, + "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, + "eval_allNLI-dev_manhattan_accuracy": 0.7421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 286.83154296875, + "eval_allNLI-dev_manhattan_ap": 0.6350522019686492, + "eval_allNLI-dev_manhattan_f1": 0.6338797814207651, + "eval_allNLI-dev_manhattan_f1_threshold": 288.48553466796875, + "eval_allNLI-dev_manhattan_precision": 0.6010362694300518, + "eval_allNLI-dev_manhattan_recall": 0.6705202312138728, + "eval_allNLI-dev_max_accuracy": 0.7421875, + "eval_allNLI-dev_max_accuracy_threshold": 350.8966064453125, + "eval_allNLI-dev_max_ap": 0.6372099149848509, + "eval_allNLI-dev_max_f1": 0.6338797814207651, + "eval_allNLI-dev_max_f1_threshold": 291.92437744140625, + "eval_allNLI-dev_max_precision": 0.6010362694300518, + "eval_allNLI-dev_max_recall": 0.8439306358381503, + "eval_sequential_score": 0.7623445246610333, + "eval_sts-test_pearson_cosine": 0.8444884977431116, + "eval_sts-test_pearson_dot": 0.825329221666323, + "eval_sts-test_pearson_euclidean": 0.8754671018631355, + "eval_sts-test_pearson_manhattan": 0.8726714683911371, + "eval_sts-test_pearson_max": 0.8754671018631355, + "eval_sts-test_spearman_cosine": 0.875980388766391, + "eval_sts-test_spearman_dot": 0.8276308574515545, + "eval_sts-test_spearman_euclidean": 0.873646789489875, + "eval_sts-test_spearman_manhattan": 0.871694157701584, + "eval_sts-test_spearman_max": 0.875980388766391, + "eval_vitaminc-pairs_loss": 3.1677722930908203, + "eval_vitaminc-pairs_runtime": 3.2259, + "eval_vitaminc-pairs_samples_per_second": 39.679, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 2240 + }, + { + "epoch": 2.3045267489711936, + "eval_negation-triplets_loss": 0.9451866149902344, + "eval_negation-triplets_runtime": 0.7626, + "eval_negation-triplets_samples_per_second": 167.847, + "eval_negation-triplets_steps_per_second": 1.311, + "step": 2240 + }, + { + "epoch": 2.3045267489711936, + "eval_scitail-pairs-pos_loss": 0.15744654834270477, + "eval_scitail-pairs-pos_runtime": 0.929, + "eval_scitail-pairs-pos_samples_per_second": 137.776, + "eval_scitail-pairs-pos_steps_per_second": 1.076, + "step": 2240 + }, + { + "epoch": 2.3045267489711936, + "eval_scitail-pairs-qa_loss": 0.000507019751239568, + "eval_scitail-pairs-qa_runtime": 0.6089, + "eval_scitail-pairs-qa_samples_per_second": 210.213, + "eval_scitail-pairs-qa_steps_per_second": 1.642, + "step": 2240 + }, + { + "epoch": 2.3045267489711936, + "eval_xsum-pairs_loss": 0.2686212360858917, + "eval_xsum-pairs_runtime": 3.0452, + "eval_xsum-pairs_samples_per_second": 42.034, + "eval_xsum-pairs_steps_per_second": 0.328, + "step": 2240 + }, + { + "epoch": 2.3045267489711936, + "eval_sciq_pairs_loss": 0.09164952486753464, + "eval_sciq_pairs_runtime": 3.5398, + "eval_sciq_pairs_samples_per_second": 36.16, + "eval_sciq_pairs_steps_per_second": 0.283, + "step": 2240 + }, + { + "epoch": 2.3045267489711936, + "eval_qasc_pairs_loss": 0.16437920928001404, + "eval_qasc_pairs_runtime": 0.621, + "eval_qasc_pairs_samples_per_second": 206.104, + "eval_qasc_pairs_steps_per_second": 1.61, + "step": 2240 + }, + { + "epoch": 2.3045267489711936, + "eval_openbookqa_pairs_loss": 0.7067421674728394, + "eval_openbookqa_pairs_runtime": 0.6055, + "eval_openbookqa_pairs_samples_per_second": 211.388, + "eval_openbookqa_pairs_steps_per_second": 1.651, + "step": 2240 + }, + { + "epoch": 2.3045267489711936, + "eval_msmarco_pairs_loss": 0.8379898071289062, + "eval_msmarco_pairs_runtime": 1.53, + "eval_msmarco_pairs_samples_per_second": 83.662, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 2240 + }, + { + "epoch": 2.3045267489711936, + "eval_nq_pairs_loss": 0.7834599614143372, + "eval_nq_pairs_runtime": 2.9163, + "eval_nq_pairs_samples_per_second": 43.892, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 2240 + }, + { + "epoch": 2.3045267489711936, + "eval_trivia_pairs_loss": 0.8072291016578674, + "eval_trivia_pairs_runtime": 3.4746, + "eval_trivia_pairs_samples_per_second": 36.838, + "eval_trivia_pairs_steps_per_second": 0.288, + "step": 2240 + }, + { + "epoch": 2.3045267489711936, + "eval_gooaq_pairs_loss": 0.34852299094200134, + "eval_gooaq_pairs_runtime": 0.9582, + "eval_gooaq_pairs_samples_per_second": 133.59, + "eval_gooaq_pairs_steps_per_second": 1.044, + "step": 2240 + }, + { + "epoch": 2.3045267489711936, + "eval_paws-pos_loss": 0.021990040317177773, + "eval_paws-pos_runtime": 0.7094, + "eval_paws-pos_samples_per_second": 180.435, + "eval_paws-pos_steps_per_second": 1.41, + "step": 2240 + }, + { + "epoch": 2.3045267489711936, + "eval_global_dataset_loss": 0.4435296654701233, + "eval_global_dataset_runtime": 13.4215, + "eval_global_dataset_samples_per_second": 30.995, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2240 + }, + { + "epoch": 2.3055555555555554, + "grad_norm": 8.643778800964355, + "learning_rate": 2.2517635690277715e-05, + "loss": 0.2783, + "step": 2241 + }, + { + "epoch": 2.3065843621399176, + "grad_norm": 9.34395694732666, + "learning_rate": 2.2502659525868627e-05, + "loss": 0.4265, + "step": 2242 + }, + { + "epoch": 2.30761316872428, + "grad_norm": 10.448383331298828, + "learning_rate": 2.2487684737102373e-05, + "loss": 0.5171, + "step": 2243 + }, + { + "epoch": 2.308641975308642, + "grad_norm": 0.3076767027378082, + "learning_rate": 2.2472711348778043e-05, + "loss": 0.0038, + "step": 2244 + }, + { + "epoch": 2.3096707818930042, + "grad_norm": 10.618779182434082, + "learning_rate": 2.2457739385692424e-05, + "loss": 0.3861, + "step": 2245 + }, + { + "epoch": 2.310699588477366, + "grad_norm": 4.680063247680664, + "learning_rate": 2.2442768872639933e-05, + "loss": 0.0761, + "step": 2246 + }, + { + "epoch": 2.3117283950617282, + "grad_norm": 10.028347969055176, + "learning_rate": 2.2427799834412586e-05, + "loss": 0.4338, + "step": 2247 + }, + { + "epoch": 2.3127572016460904, + "grad_norm": 9.804010391235352, + "learning_rate": 2.2412832295799952e-05, + "loss": 0.4161, + "step": 2248 + }, + { + "epoch": 2.3137860082304527, + "grad_norm": 2.3687469959259033, + "learning_rate": 2.239786628158913e-05, + "loss": 0.0251, + "step": 2249 + }, + { + "epoch": 2.314814814814815, + "grad_norm": 3.091796636581421, + "learning_rate": 2.2382901816564676e-05, + "loss": 0.0371, + "step": 2250 + }, + { + "epoch": 2.315843621399177, + "grad_norm": 4.977518081665039, + "learning_rate": 2.2367938925508602e-05, + "loss": 0.0975, + "step": 2251 + }, + { + "epoch": 2.3168724279835393, + "grad_norm": 8.08765983581543, + "learning_rate": 2.2352977633200298e-05, + "loss": 0.282, + "step": 2252 + }, + { + "epoch": 2.317901234567901, + "grad_norm": 2.7775073051452637, + "learning_rate": 2.2338017964416506e-05, + "loss": 0.0343, + "step": 2253 + }, + { + "epoch": 2.3189300411522633, + "grad_norm": 3.464078664779663, + "learning_rate": 2.2323059943931295e-05, + "loss": 0.0756, + "step": 2254 + }, + { + "epoch": 2.3199588477366255, + "grad_norm": 9.049628257751465, + "learning_rate": 2.2308103596515983e-05, + "loss": 0.5555, + "step": 2255 + }, + { + "epoch": 2.3209876543209877, + "grad_norm": 11.142632484436035, + "learning_rate": 2.229314894693913e-05, + "loss": 0.6313, + "step": 2256 + }, + { + "epoch": 2.32201646090535, + "grad_norm": 4.431145668029785, + "learning_rate": 2.227819601996649e-05, + "loss": 0.1288, + "step": 2257 + }, + { + "epoch": 2.3230452674897117, + "grad_norm": 8.381731986999512, + "learning_rate": 2.2263244840360944e-05, + "loss": 0.3806, + "step": 2258 + }, + { + "epoch": 2.324074074074074, + "grad_norm": 12.163583755493164, + "learning_rate": 2.2248295432882498e-05, + "loss": 0.8261, + "step": 2259 + }, + { + "epoch": 2.325102880658436, + "grad_norm": 9.38312816619873, + "learning_rate": 2.223334782228822e-05, + "loss": 0.4223, + "step": 2260 + }, + { + "epoch": 2.325102880658436, + "eval_Qnli-dev_cosine_accuracy": 0.701171875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7691091895103455, + "eval_Qnli-dev_cosine_ap": 0.75401176894518, + "eval_Qnli-dev_cosine_f1": 0.7029126213592233, + "eval_Qnli-dev_cosine_f1_threshold": 0.7210485339164734, + "eval_Qnli-dev_cosine_precision": 0.6487455197132617, + "eval_Qnli-dev_cosine_recall": 0.7669491525423728, + "eval_Qnli-dev_dot_accuracy": 0.67578125, + "eval_Qnli-dev_dot_accuracy_threshold": 352.1729736328125, + "eval_Qnli-dev_dot_ap": 0.7022030363786389, + "eval_Qnli-dev_dot_f1": 0.6810631229235881, + "eval_Qnli-dev_dot_f1_threshold": 304.30010986328125, + "eval_Qnli-dev_dot_precision": 0.5601092896174863, + "eval_Qnli-dev_dot_recall": 0.8686440677966102, + "eval_Qnli-dev_euclidean_accuracy": 0.708984375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.070708274841309, + "eval_Qnli-dev_euclidean_ap": 0.7613860714428011, + "eval_Qnli-dev_euclidean_f1": 0.7050359712230215, + "eval_Qnli-dev_euclidean_f1_threshold": 16.56011962890625, + "eval_Qnli-dev_euclidean_precision": 0.6125, + "eval_Qnli-dev_euclidean_recall": 0.8305084745762712, + "eval_Qnli-dev_manhattan_accuracy": 0.708984375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 333.4063415527344, + "eval_Qnli-dev_manhattan_ap": 0.7651250959093233, + "eval_Qnli-dev_manhattan_f1": 0.7086330935251799, + "eval_Qnli-dev_manhattan_f1_threshold": 349.4019470214844, + "eval_Qnli-dev_manhattan_precision": 0.615625, + "eval_Qnli-dev_manhattan_recall": 0.8347457627118644, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 352.1729736328125, + "eval_Qnli-dev_max_ap": 0.7651250959093233, + "eval_Qnli-dev_max_f1": 0.7086330935251799, + "eval_Qnli-dev_max_f1_threshold": 349.4019470214844, + "eval_Qnli-dev_max_precision": 0.6487455197132617, + "eval_Qnli-dev_max_recall": 0.8686440677966102, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7986735105514526, + "eval_allNLI-dev_cosine_ap": 0.6265153528207018, + "eval_allNLI-dev_cosine_f1": 0.6255924170616114, + "eval_allNLI-dev_cosine_f1_threshold": 0.7433013916015625, + "eval_allNLI-dev_cosine_precision": 0.5301204819277109, + "eval_allNLI-dev_cosine_recall": 0.7630057803468208, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 356.342041015625, + "eval_allNLI-dev_dot_ap": 0.5577487198695564, + "eval_allNLI-dev_dot_f1": 0.5900900900900902, + "eval_allNLI-dev_dot_f1_threshold": 304.54986572265625, + "eval_allNLI-dev_dot_precision": 0.4833948339483395, + "eval_allNLI-dev_dot_recall": 0.7572254335260116, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.965154647827148, + "eval_allNLI-dev_euclidean_ap": 0.6362386582588571, + "eval_allNLI-dev_euclidean_f1": 0.6318537859007832, + "eval_allNLI-dev_euclidean_f1_threshold": 14.021746635437012, + "eval_allNLI-dev_euclidean_precision": 0.5761904761904761, + "eval_allNLI-dev_euclidean_recall": 0.6994219653179191, + "eval_allNLI-dev_manhattan_accuracy": 0.73828125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 270.27947998046875, + "eval_allNLI-dev_manhattan_ap": 0.6339213685799745, + "eval_allNLI-dev_manhattan_f1": 0.6341463414634145, + "eval_allNLI-dev_manhattan_f1_threshold": 319.86932373046875, + "eval_allNLI-dev_manhattan_precision": 0.5143884892086331, + "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 356.342041015625, + "eval_allNLI-dev_max_ap": 0.6362386582588571, + "eval_allNLI-dev_max_f1": 0.6341463414634145, + "eval_allNLI-dev_max_f1_threshold": 319.86932373046875, + "eval_allNLI-dev_max_precision": 0.5761904761904761, + "eval_allNLI-dev_max_recall": 0.8265895953757225, + "eval_sequential_score": 0.7651250959093233, + "eval_sts-test_pearson_cosine": 0.8526757483775252, + "eval_sts-test_pearson_dot": 0.8421971908216404, + "eval_sts-test_pearson_euclidean": 0.8793390424737427, + "eval_sts-test_pearson_manhattan": 0.8761631845902708, + "eval_sts-test_pearson_max": 0.8793390424737427, + "eval_sts-test_spearman_cosine": 0.8804492944801976, + "eval_sts-test_spearman_dot": 0.8426240963118221, + "eval_sts-test_spearman_euclidean": 0.8757499197702253, + "eval_sts-test_spearman_manhattan": 0.8738397121777335, + "eval_sts-test_spearman_max": 0.8804492944801976, + "eval_vitaminc-pairs_loss": 3.0241594314575195, + "eval_vitaminc-pairs_runtime": 3.2167, + "eval_vitaminc-pairs_samples_per_second": 39.792, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 2260 + }, + { + "epoch": 2.325102880658436, + "eval_negation-triplets_loss": 0.9448370933532715, + "eval_negation-triplets_runtime": 0.7746, + "eval_negation-triplets_samples_per_second": 165.253, + "eval_negation-triplets_steps_per_second": 1.291, + "step": 2260 + }, + { + "epoch": 2.325102880658436, + "eval_scitail-pairs-pos_loss": 0.16884900629520416, + "eval_scitail-pairs-pos_runtime": 0.9384, + "eval_scitail-pairs-pos_samples_per_second": 136.4, + "eval_scitail-pairs-pos_steps_per_second": 1.066, + "step": 2260 + }, + { + "epoch": 2.325102880658436, + "eval_scitail-pairs-qa_loss": 0.00046156090684235096, + "eval_scitail-pairs-qa_runtime": 0.6172, + "eval_scitail-pairs-qa_samples_per_second": 207.373, + "eval_scitail-pairs-qa_steps_per_second": 1.62, + "step": 2260 + }, + { + "epoch": 2.325102880658436, + "eval_xsum-pairs_loss": 0.29921481013298035, + "eval_xsum-pairs_runtime": 3.0371, + "eval_xsum-pairs_samples_per_second": 42.146, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2260 + }, + { + "epoch": 2.325102880658436, + "eval_sciq_pairs_loss": 0.09571722894906998, + "eval_sciq_pairs_runtime": 3.5243, + "eval_sciq_pairs_samples_per_second": 36.32, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 2260 + }, + { + "epoch": 2.325102880658436, + "eval_qasc_pairs_loss": 0.16877269744873047, + "eval_qasc_pairs_runtime": 0.6226, + "eval_qasc_pairs_samples_per_second": 205.603, + "eval_qasc_pairs_steps_per_second": 1.606, + "step": 2260 + }, + { + "epoch": 2.325102880658436, + "eval_openbookqa_pairs_loss": 0.6946222186088562, + "eval_openbookqa_pairs_runtime": 0.6114, + "eval_openbookqa_pairs_samples_per_second": 209.355, + "eval_openbookqa_pairs_steps_per_second": 1.636, + "step": 2260 + }, + { + "epoch": 2.325102880658436, + "eval_msmarco_pairs_loss": 0.7874183058738708, + "eval_msmarco_pairs_runtime": 1.5315, + "eval_msmarco_pairs_samples_per_second": 83.577, + "eval_msmarco_pairs_steps_per_second": 0.653, + "step": 2260 + }, + { + "epoch": 2.325102880658436, + "eval_nq_pairs_loss": 0.6316083669662476, + "eval_nq_pairs_runtime": 2.9025, + "eval_nq_pairs_samples_per_second": 44.1, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 2260 + }, + { + "epoch": 2.325102880658436, + "eval_trivia_pairs_loss": 0.8147802948951721, + "eval_trivia_pairs_runtime": 3.4458, + "eval_trivia_pairs_samples_per_second": 37.146, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2260 + }, + { + "epoch": 2.325102880658436, + "eval_gooaq_pairs_loss": 0.35202357172966003, + "eval_gooaq_pairs_runtime": 0.9651, + "eval_gooaq_pairs_samples_per_second": 132.633, + "eval_gooaq_pairs_steps_per_second": 1.036, + "step": 2260 + }, + { + "epoch": 2.325102880658436, + "eval_paws-pos_loss": 0.022588880732655525, + "eval_paws-pos_runtime": 0.7109, + "eval_paws-pos_samples_per_second": 180.061, + "eval_paws-pos_steps_per_second": 1.407, + "step": 2260 + }, + { + "epoch": 2.325102880658436, + "eval_global_dataset_loss": 0.45734867453575134, + "eval_global_dataset_runtime": 13.4411, + "eval_global_dataset_samples_per_second": 30.95, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2260 + }, + { + "epoch": 2.3261316872427984, + "grad_norm": 5.772022247314453, + "learning_rate": 2.221840203333219e-05, + "loss": 0.2516, + "step": 2261 + }, + { + "epoch": 2.3271604938271606, + "grad_norm": 3.9961254596710205, + "learning_rate": 2.2203458090765486e-05, + "loss": 0.0595, + "step": 2262 + }, + { + "epoch": 2.3281893004115224, + "grad_norm": 6.449882984161377, + "learning_rate": 2.2188516019336126e-05, + "loss": 0.2802, + "step": 2263 + }, + { + "epoch": 2.3292181069958846, + "grad_norm": 8.825777053833008, + "learning_rate": 2.2173575843789013e-05, + "loss": 0.4514, + "step": 2264 + }, + { + "epoch": 2.330246913580247, + "grad_norm": 4.293075084686279, + "learning_rate": 2.2158637588865937e-05, + "loss": 0.1173, + "step": 2265 + }, + { + "epoch": 2.331275720164609, + "grad_norm": 4.642062187194824, + "learning_rate": 2.214370127930549e-05, + "loss": 0.111, + "step": 2266 + }, + { + "epoch": 2.3323045267489713, + "grad_norm": 4.957849025726318, + "learning_rate": 2.2128766939843043e-05, + "loss": 0.1221, + "step": 2267 + }, + { + "epoch": 2.3333333333333335, + "grad_norm": 6.430418968200684, + "learning_rate": 2.211383459521071e-05, + "loss": 0.2215, + "step": 2268 + }, + { + "epoch": 2.3343621399176957, + "grad_norm": 3.42794132232666, + "learning_rate": 2.209890427013729e-05, + "loss": 0.076, + "step": 2269 + }, + { + "epoch": 2.3353909465020575, + "grad_norm": 1.1023659706115723, + "learning_rate": 2.208397598934826e-05, + "loss": 0.0639, + "step": 2270 + }, + { + "epoch": 2.3364197530864197, + "grad_norm": 8.584929466247559, + "learning_rate": 2.2069049777565694e-05, + "loss": 0.4891, + "step": 2271 + }, + { + "epoch": 2.337448559670782, + "grad_norm": 7.326855182647705, + "learning_rate": 2.2054125659508233e-05, + "loss": 0.3682, + "step": 2272 + }, + { + "epoch": 2.338477366255144, + "grad_norm": 4.641539573669434, + "learning_rate": 2.2039203659891072e-05, + "loss": 0.2303, + "step": 2273 + }, + { + "epoch": 2.3395061728395063, + "grad_norm": 9.670384407043457, + "learning_rate": 2.2024283803425884e-05, + "loss": 0.6301, + "step": 2274 + }, + { + "epoch": 2.340534979423868, + "grad_norm": 7.244576454162598, + "learning_rate": 2.2009366114820795e-05, + "loss": 0.3037, + "step": 2275 + }, + { + "epoch": 2.3415637860082303, + "grad_norm": 0.10709922760725021, + "learning_rate": 2.1994450618780344e-05, + "loss": 0.0013, + "step": 2276 + }, + { + "epoch": 2.3425925925925926, + "grad_norm": 8.981964111328125, + "learning_rate": 2.1979537340005425e-05, + "loss": 0.5925, + "step": 2277 + }, + { + "epoch": 2.343621399176955, + "grad_norm": 3.5664937496185303, + "learning_rate": 2.196462630319329e-05, + "loss": 0.0863, + "step": 2278 + }, + { + "epoch": 2.344650205761317, + "grad_norm": 9.481403350830078, + "learning_rate": 2.194971753303744e-05, + "loss": 0.5741, + "step": 2279 + }, + { + "epoch": 2.3456790123456788, + "grad_norm": 7.456191062927246, + "learning_rate": 2.193481105422766e-05, + "loss": 0.2837, + "step": 2280 + }, + { + "epoch": 2.3456790123456788, + "eval_Qnli-dev_cosine_accuracy": 0.712890625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7566351294517517, + "eval_Qnli-dev_cosine_ap": 0.7609971289365627, + "eval_Qnli-dev_cosine_f1": 0.7145631067961165, + "eval_Qnli-dev_cosine_f1_threshold": 0.7312784194946289, + "eval_Qnli-dev_cosine_precision": 0.6594982078853047, + "eval_Qnli-dev_cosine_recall": 0.7796610169491526, + "eval_Qnli-dev_dot_accuracy": 0.6875, + "eval_Qnli-dev_dot_accuracy_threshold": 365.13922119140625, + "eval_Qnli-dev_dot_ap": 0.7117488467964865, + "eval_Qnli-dev_dot_f1": 0.6746575342465754, + "eval_Qnli-dev_dot_f1_threshold": 312.92230224609375, + "eval_Qnli-dev_dot_precision": 0.5660919540229885, + "eval_Qnli-dev_dot_recall": 0.8347457627118644, + "eval_Qnli-dev_euclidean_accuracy": 0.72265625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.736825942993164, + "eval_Qnli-dev_euclidean_ap": 0.7670844970871598, + "eval_Qnli-dev_euclidean_f1": 0.7237354085603113, + "eval_Qnli-dev_euclidean_f1_threshold": 15.739640235900879, + "eval_Qnli-dev_euclidean_precision": 0.6690647482014388, + "eval_Qnli-dev_euclidean_recall": 0.788135593220339, + "eval_Qnli-dev_manhattan_accuracy": 0.724609375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 322.1856689453125, + "eval_Qnli-dev_manhattan_ap": 0.7692971927519606, + "eval_Qnli-dev_manhattan_f1": 0.7169811320754716, + "eval_Qnli-dev_manhattan_f1_threshold": 337.4688415527344, + "eval_Qnli-dev_manhattan_precision": 0.6462585034013606, + "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, + "eval_Qnli-dev_max_accuracy": 0.724609375, + "eval_Qnli-dev_max_accuracy_threshold": 365.13922119140625, + "eval_Qnli-dev_max_ap": 0.7692971927519606, + "eval_Qnli-dev_max_f1": 0.7237354085603113, + "eval_Qnli-dev_max_f1_threshold": 337.4688415527344, + "eval_Qnli-dev_max_precision": 0.6690647482014388, + "eval_Qnli-dev_max_recall": 0.8347457627118644, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8959578275680542, + "eval_allNLI-dev_cosine_ap": 0.6288210825785365, + "eval_allNLI-dev_cosine_f1": 0.641860465116279, + "eval_allNLI-dev_cosine_f1_threshold": 0.7660266160964966, + "eval_allNLI-dev_cosine_precision": 0.5369649805447471, + "eval_allNLI-dev_cosine_recall": 0.7976878612716763, + "eval_allNLI-dev_dot_accuracy": 0.705078125, + "eval_allNLI-dev_dot_accuracy_threshold": 360.33917236328125, + "eval_allNLI-dev_dot_ap": 0.5581058350375936, + "eval_allNLI-dev_dot_f1": 0.591375770020534, + "eval_allNLI-dev_dot_f1_threshold": 311.787109375, + "eval_allNLI-dev_dot_precision": 0.4585987261146497, + "eval_allNLI-dev_dot_recall": 0.8323699421965318, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.02200698852539, + "eval_allNLI-dev_euclidean_ap": 0.6385020312240187, + "eval_allNLI-dev_euclidean_f1": 0.6510538641686183, + "eval_allNLI-dev_euclidean_f1_threshold": 14.25075626373291, + "eval_allNLI-dev_euclidean_precision": 0.547244094488189, + "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, + "eval_allNLI-dev_manhattan_accuracy": 0.736328125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 257.76812744140625, + "eval_allNLI-dev_manhattan_ap": 0.6353966723714252, + "eval_allNLI-dev_manhattan_f1": 0.6453089244851259, + "eval_allNLI-dev_manhattan_f1_threshold": 302.9503173828125, + "eval_allNLI-dev_manhattan_precision": 0.5340909090909091, + "eval_allNLI-dev_manhattan_recall": 0.815028901734104, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 360.33917236328125, + "eval_allNLI-dev_max_ap": 0.6385020312240187, + "eval_allNLI-dev_max_f1": 0.6510538641686183, + "eval_allNLI-dev_max_f1_threshold": 311.787109375, + "eval_allNLI-dev_max_precision": 0.547244094488189, + "eval_allNLI-dev_max_recall": 0.8323699421965318, + "eval_sequential_score": 0.7692971927519606, + "eval_sts-test_pearson_cosine": 0.8514642254937852, + "eval_sts-test_pearson_dot": 0.8334266195184966, + "eval_sts-test_pearson_euclidean": 0.8774851170069259, + "eval_sts-test_pearson_manhattan": 0.874385112208525, + "eval_sts-test_pearson_max": 0.8774851170069259, + "eval_sts-test_spearman_cosine": 0.8771915816559491, + "eval_sts-test_spearman_dot": 0.8273027859542943, + "eval_sts-test_spearman_euclidean": 0.8732949998240772, + "eval_sts-test_spearman_manhattan": 0.8717807961436378, + "eval_sts-test_spearman_max": 0.8771915816559491, + "eval_vitaminc-pairs_loss": 3.007596015930176, + "eval_vitaminc-pairs_runtime": 3.2197, + "eval_vitaminc-pairs_samples_per_second": 39.755, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 2280 + }, + { + "epoch": 2.3456790123456788, + "eval_negation-triplets_loss": 0.8995927572250366, + "eval_negation-triplets_runtime": 0.7637, + "eval_negation-triplets_samples_per_second": 167.606, + "eval_negation-triplets_steps_per_second": 1.309, + "step": 2280 + }, + { + "epoch": 2.3456790123456788, + "eval_scitail-pairs-pos_loss": 0.17473338544368744, + "eval_scitail-pairs-pos_runtime": 0.9416, + "eval_scitail-pairs-pos_samples_per_second": 135.939, + "eval_scitail-pairs-pos_steps_per_second": 1.062, + "step": 2280 + }, + { + "epoch": 2.3456790123456788, + "eval_scitail-pairs-qa_loss": 0.0005349889979697764, + "eval_scitail-pairs-qa_runtime": 0.6285, + "eval_scitail-pairs-qa_samples_per_second": 203.675, + "eval_scitail-pairs-qa_steps_per_second": 1.591, + "step": 2280 + }, + { + "epoch": 2.3456790123456788, + "eval_xsum-pairs_loss": 0.2932433784008026, + "eval_xsum-pairs_runtime": 3.0444, + "eval_xsum-pairs_samples_per_second": 42.044, + "eval_xsum-pairs_steps_per_second": 0.328, + "step": 2280 + }, + { + "epoch": 2.3456790123456788, + "eval_sciq_pairs_loss": 0.09884592145681381, + "eval_sciq_pairs_runtime": 3.5665, + "eval_sciq_pairs_samples_per_second": 35.889, + "eval_sciq_pairs_steps_per_second": 0.28, + "step": 2280 + }, + { + "epoch": 2.3456790123456788, + "eval_qasc_pairs_loss": 0.1590500771999359, + "eval_qasc_pairs_runtime": 0.6249, + "eval_qasc_pairs_samples_per_second": 204.834, + "eval_qasc_pairs_steps_per_second": 1.6, + "step": 2280 + }, + { + "epoch": 2.3456790123456788, + "eval_openbookqa_pairs_loss": 0.6874719262123108, + "eval_openbookqa_pairs_runtime": 0.602, + "eval_openbookqa_pairs_samples_per_second": 212.628, + "eval_openbookqa_pairs_steps_per_second": 1.661, + "step": 2280 + }, + { + "epoch": 2.3456790123456788, + "eval_msmarco_pairs_loss": 0.8092954158782959, + "eval_msmarco_pairs_runtime": 1.5273, + "eval_msmarco_pairs_samples_per_second": 83.81, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 2280 + }, + { + "epoch": 2.3456790123456788, + "eval_nq_pairs_loss": 0.6119550466537476, + "eval_nq_pairs_runtime": 2.9044, + "eval_nq_pairs_samples_per_second": 44.07, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2280 + }, + { + "epoch": 2.3456790123456788, + "eval_trivia_pairs_loss": 0.8267652988433838, + "eval_trivia_pairs_runtime": 3.443, + "eval_trivia_pairs_samples_per_second": 37.177, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2280 + }, + { + "epoch": 2.3456790123456788, + "eval_gooaq_pairs_loss": 0.31177884340286255, + "eval_gooaq_pairs_runtime": 0.9587, + "eval_gooaq_pairs_samples_per_second": 133.519, + "eval_gooaq_pairs_steps_per_second": 1.043, + "step": 2280 + }, + { + "epoch": 2.3456790123456788, + "eval_paws-pos_loss": 0.023131083697080612, + "eval_paws-pos_runtime": 0.7115, + "eval_paws-pos_samples_per_second": 179.914, + "eval_paws-pos_steps_per_second": 1.406, + "step": 2280 + }, + { + "epoch": 2.3456790123456788, + "eval_global_dataset_loss": 0.4496609568595886, + "eval_global_dataset_runtime": 13.4253, + "eval_global_dataset_samples_per_second": 30.986, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2280 + }, + { + "epoch": 2.346707818930041, + "grad_norm": 0.18925702571868896, + "learning_rate": 2.1919906891449918e-05, + "loss": 0.0027, + "step": 2281 + }, + { + "epoch": 2.347736625514403, + "grad_norm": 3.967648506164551, + "learning_rate": 2.1905005069386335e-05, + "loss": 0.1548, + "step": 2282 + }, + { + "epoch": 2.3487654320987654, + "grad_norm": 5.869375228881836, + "learning_rate": 2.1890105612715197e-05, + "loss": 0.2339, + "step": 2283 + }, + { + "epoch": 2.3497942386831276, + "grad_norm": 10.786419868469238, + "learning_rate": 2.1875208546110827e-05, + "loss": 0.5421, + "step": 2284 + }, + { + "epoch": 2.35082304526749, + "grad_norm": 7.9114251136779785, + "learning_rate": 2.1860313894243623e-05, + "loss": 0.6218, + "step": 2285 + }, + { + "epoch": 2.351851851851852, + "grad_norm": 7.046651363372803, + "learning_rate": 2.184542168177996e-05, + "loss": 0.3833, + "step": 2286 + }, + { + "epoch": 2.352880658436214, + "grad_norm": 7.676875114440918, + "learning_rate": 2.18305319333822e-05, + "loss": 0.6528, + "step": 2287 + }, + { + "epoch": 2.353909465020576, + "grad_norm": 6.6297712326049805, + "learning_rate": 2.1815644673708592e-05, + "loss": 0.2831, + "step": 2288 + }, + { + "epoch": 2.3549382716049383, + "grad_norm": 3.635673999786377, + "learning_rate": 2.180075992741329e-05, + "loss": 0.0959, + "step": 2289 + }, + { + "epoch": 2.3559670781893005, + "grad_norm": 4.281613826751709, + "learning_rate": 2.1785877719146274e-05, + "loss": 0.128, + "step": 2290 + }, + { + "epoch": 2.3569958847736627, + "grad_norm": 6.85472297668457, + "learning_rate": 2.1770998073553326e-05, + "loss": 0.1727, + "step": 2291 + }, + { + "epoch": 2.3580246913580245, + "grad_norm": 1.0257395505905151, + "learning_rate": 2.1756121015275976e-05, + "loss": 0.0418, + "step": 2292 + }, + { + "epoch": 2.3590534979423867, + "grad_norm": 4.922633171081543, + "learning_rate": 2.1741246568951482e-05, + "loss": 0.1231, + "step": 2293 + }, + { + "epoch": 2.360082304526749, + "grad_norm": 3.1864991188049316, + "learning_rate": 2.172637475921276e-05, + "loss": 0.0568, + "step": 2294 + }, + { + "epoch": 2.361111111111111, + "grad_norm": 10.265893936157227, + "learning_rate": 2.171150561068837e-05, + "loss": 0.4443, + "step": 2295 + }, + { + "epoch": 2.3621399176954734, + "grad_norm": 2.11238956451416, + "learning_rate": 2.1696639148002472e-05, + "loss": 0.0415, + "step": 2296 + }, + { + "epoch": 2.363168724279835, + "grad_norm": 9.106158256530762, + "learning_rate": 2.1681775395774764e-05, + "loss": 0.4245, + "step": 2297 + }, + { + "epoch": 2.3641975308641974, + "grad_norm": 5.098234176635742, + "learning_rate": 2.166691437862045e-05, + "loss": 0.1398, + "step": 2298 + }, + { + "epoch": 2.3652263374485596, + "grad_norm": 2.0659897327423096, + "learning_rate": 2.1652056121150233e-05, + "loss": 0.0312, + "step": 2299 + }, + { + "epoch": 2.366255144032922, + "grad_norm": 4.4549078941345215, + "learning_rate": 2.1637200647970217e-05, + "loss": 0.088, + "step": 2300 + }, + { + "epoch": 2.366255144032922, + "eval_Qnli-dev_cosine_accuracy": 0.708984375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.711749792098999, + "eval_Qnli-dev_cosine_ap": 0.7581653072806273, + "eval_Qnli-dev_cosine_f1": 0.7095588235294118, + "eval_Qnli-dev_cosine_f1_threshold": 0.6768925189971924, + "eval_Qnli-dev_cosine_precision": 0.6266233766233766, + "eval_Qnli-dev_cosine_recall": 0.8177966101694916, + "eval_Qnli-dev_dot_accuracy": 0.677734375, + "eval_Qnli-dev_dot_accuracy_threshold": 330.6556091308594, + "eval_Qnli-dev_dot_ap": 0.7246830432229524, + "eval_Qnli-dev_dot_f1": 0.6814310051107325, + "eval_Qnli-dev_dot_f1_threshold": 277.5843811035156, + "eval_Qnli-dev_dot_precision": 0.5698005698005698, + "eval_Qnli-dev_dot_recall": 0.847457627118644, + "eval_Qnli-dev_euclidean_accuracy": 0.716796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 16.400651931762695, + "eval_Qnli-dev_euclidean_ap": 0.7628671779699692, + "eval_Qnli-dev_euclidean_f1": 0.7238095238095237, + "eval_Qnli-dev_euclidean_f1_threshold": 16.400651931762695, + "eval_Qnli-dev_euclidean_precision": 0.657439446366782, + "eval_Qnli-dev_euclidean_recall": 0.8050847457627118, + "eval_Qnli-dev_manhattan_accuracy": 0.71484375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 335.8609313964844, + "eval_Qnli-dev_manhattan_ap": 0.7667709215373246, + "eval_Qnli-dev_manhattan_f1": 0.7140319715808171, + "eval_Qnli-dev_manhattan_f1_threshold": 358.5963439941406, + "eval_Qnli-dev_manhattan_precision": 0.6146788990825688, + "eval_Qnli-dev_manhattan_recall": 0.8516949152542372, + "eval_Qnli-dev_max_accuracy": 0.716796875, + "eval_Qnli-dev_max_accuracy_threshold": 335.8609313964844, + "eval_Qnli-dev_max_ap": 0.7667709215373246, + "eval_Qnli-dev_max_f1": 0.7238095238095237, + "eval_Qnli-dev_max_f1_threshold": 358.5963439941406, + "eval_Qnli-dev_max_precision": 0.657439446366782, + "eval_Qnli-dev_max_recall": 0.8516949152542372, + "eval_allNLI-dev_cosine_accuracy": 0.7265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8382487297058105, + "eval_allNLI-dev_cosine_ap": 0.6355044057508394, + "eval_allNLI-dev_cosine_f1": 0.639618138424821, + "eval_allNLI-dev_cosine_f1_threshold": 0.7509103417396545, + "eval_allNLI-dev_cosine_precision": 0.5447154471544715, + "eval_allNLI-dev_cosine_recall": 0.7745664739884393, + "eval_allNLI-dev_dot_accuracy": 0.7109375, + "eval_allNLI-dev_dot_accuracy_threshold": 362.3534851074219, + "eval_allNLI-dev_dot_ap": 0.5746903583642451, + "eval_allNLI-dev_dot_f1": 0.5949367088607596, + "eval_allNLI-dev_dot_f1_threshold": 298.28326416015625, + "eval_allNLI-dev_dot_precision": 0.4684385382059801, + "eval_allNLI-dev_dot_recall": 0.815028901734104, + "eval_allNLI-dev_euclidean_accuracy": 0.7421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.083795547485352, + "eval_allNLI-dev_euclidean_ap": 0.6471876064039366, + "eval_allNLI-dev_euclidean_f1": 0.6481481481481481, + "eval_allNLI-dev_euclidean_f1_threshold": 14.829469680786133, + "eval_allNLI-dev_euclidean_precision": 0.5405405405405406, + "eval_allNLI-dev_euclidean_recall": 0.8092485549132948, + "eval_allNLI-dev_manhattan_accuracy": 0.744140625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 261.477783203125, + "eval_allNLI-dev_manhattan_ap": 0.641664744292025, + "eval_allNLI-dev_manhattan_f1": 0.6415929203539823, + "eval_allNLI-dev_manhattan_f1_threshold": 319.8987731933594, + "eval_allNLI-dev_manhattan_precision": 0.5197132616487455, + "eval_allNLI-dev_manhattan_recall": 0.838150289017341, + "eval_allNLI-dev_max_accuracy": 0.744140625, + "eval_allNLI-dev_max_accuracy_threshold": 362.3534851074219, + "eval_allNLI-dev_max_ap": 0.6471876064039366, + "eval_allNLI-dev_max_f1": 0.6481481481481481, + "eval_allNLI-dev_max_f1_threshold": 319.8987731933594, + "eval_allNLI-dev_max_precision": 0.5447154471544715, + "eval_allNLI-dev_max_recall": 0.838150289017341, + "eval_sequential_score": 0.7667709215373246, + "eval_sts-test_pearson_cosine": 0.8482468972393741, + "eval_sts-test_pearson_dot": 0.8224592183870103, + "eval_sts-test_pearson_euclidean": 0.8758260852544988, + "eval_sts-test_pearson_manhattan": 0.8721834882062033, + "eval_sts-test_pearson_max": 0.8758260852544988, + "eval_sts-test_spearman_cosine": 0.8752404714105455, + "eval_sts-test_spearman_dot": 0.8155248804737375, + "eval_sts-test_spearman_euclidean": 0.8718644810024397, + "eval_sts-test_spearman_manhattan": 0.8689638832440949, + "eval_sts-test_spearman_max": 0.8752404714105455, + "eval_vitaminc-pairs_loss": 3.0794012546539307, + "eval_vitaminc-pairs_runtime": 3.2449, + "eval_vitaminc-pairs_samples_per_second": 39.447, + "eval_vitaminc-pairs_steps_per_second": 0.308, + "step": 2300 + }, + { + "epoch": 2.366255144032922, + "eval_negation-triplets_loss": 0.9212685823440552, + "eval_negation-triplets_runtime": 0.7636, + "eval_negation-triplets_samples_per_second": 167.622, + "eval_negation-triplets_steps_per_second": 1.31, + "step": 2300 + }, + { + "epoch": 2.366255144032922, + "eval_scitail-pairs-pos_loss": 0.16639116406440735, + "eval_scitail-pairs-pos_runtime": 0.9211, + "eval_scitail-pairs-pos_samples_per_second": 138.972, + "eval_scitail-pairs-pos_steps_per_second": 1.086, + "step": 2300 + }, + { + "epoch": 2.366255144032922, + "eval_scitail-pairs-qa_loss": 0.0005323602235876024, + "eval_scitail-pairs-qa_runtime": 0.6159, + "eval_scitail-pairs-qa_samples_per_second": 207.827, + "eval_scitail-pairs-qa_steps_per_second": 1.624, + "step": 2300 + }, + { + "epoch": 2.366255144032922, + "eval_xsum-pairs_loss": 0.24832576513290405, + "eval_xsum-pairs_runtime": 3.0282, + "eval_xsum-pairs_samples_per_second": 42.269, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 2300 + }, + { + "epoch": 2.366255144032922, + "eval_sciq_pairs_loss": 0.09831339120864868, + "eval_sciq_pairs_runtime": 3.5302, + "eval_sciq_pairs_samples_per_second": 36.258, + "eval_sciq_pairs_steps_per_second": 0.283, + "step": 2300 + }, + { + "epoch": 2.366255144032922, + "eval_qasc_pairs_loss": 0.1492229402065277, + "eval_qasc_pairs_runtime": 0.6295, + "eval_qasc_pairs_samples_per_second": 203.341, + "eval_qasc_pairs_steps_per_second": 1.589, + "step": 2300 + }, + { + "epoch": 2.366255144032922, + "eval_openbookqa_pairs_loss": 0.6935264468193054, + "eval_openbookqa_pairs_runtime": 0.607, + "eval_openbookqa_pairs_samples_per_second": 210.86, + "eval_openbookqa_pairs_steps_per_second": 1.647, + "step": 2300 + }, + { + "epoch": 2.366255144032922, + "eval_msmarco_pairs_loss": 0.8593921661376953, + "eval_msmarco_pairs_runtime": 1.5351, + "eval_msmarco_pairs_samples_per_second": 83.381, + "eval_msmarco_pairs_steps_per_second": 0.651, + "step": 2300 + }, + { + "epoch": 2.366255144032922, + "eval_nq_pairs_loss": 0.6413222551345825, + "eval_nq_pairs_runtime": 2.9143, + "eval_nq_pairs_samples_per_second": 43.921, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 2300 + }, + { + "epoch": 2.366255144032922, + "eval_trivia_pairs_loss": 0.875969648361206, + "eval_trivia_pairs_runtime": 3.4479, + "eval_trivia_pairs_samples_per_second": 37.124, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2300 + }, + { + "epoch": 2.366255144032922, + "eval_gooaq_pairs_loss": 0.33981260657310486, + "eval_gooaq_pairs_runtime": 0.9586, + "eval_gooaq_pairs_samples_per_second": 133.522, + "eval_gooaq_pairs_steps_per_second": 1.043, + "step": 2300 + }, + { + "epoch": 2.366255144032922, + "eval_paws-pos_loss": 0.023348888382315636, + "eval_paws-pos_runtime": 0.7074, + "eval_paws-pos_samples_per_second": 180.944, + "eval_paws-pos_steps_per_second": 1.414, + "step": 2300 + }, + { + "epoch": 2.366255144032922, + "eval_global_dataset_loss": 0.4635227918624878, + "eval_global_dataset_runtime": 13.4057, + "eval_global_dataset_samples_per_second": 31.032, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2300 + }, + { + "epoch": 2.367283950617284, + "grad_norm": 4.1970977783203125, + "learning_rate": 2.1622347983681913e-05, + "loss": 0.1321, + "step": 2301 + }, + { + "epoch": 2.3683127572016462, + "grad_norm": 2.755897045135498, + "learning_rate": 2.1607498152882166e-05, + "loss": 0.1181, + "step": 2302 + }, + { + "epoch": 2.3693415637860085, + "grad_norm": 8.307868003845215, + "learning_rate": 2.1592651180163142e-05, + "loss": 0.392, + "step": 2303 + }, + { + "epoch": 2.3703703703703702, + "grad_norm": 5.575843334197998, + "learning_rate": 2.1577807090112272e-05, + "loss": 0.1, + "step": 2304 + }, + { + "epoch": 2.3713991769547325, + "grad_norm": 0.1779100000858307, + "learning_rate": 2.1562965907312197e-05, + "loss": 0.0029, + "step": 2305 + }, + { + "epoch": 2.3724279835390947, + "grad_norm": 3.030696392059326, + "learning_rate": 2.1548127656340767e-05, + "loss": 0.0665, + "step": 2306 + }, + { + "epoch": 2.373456790123457, + "grad_norm": 6.725527286529541, + "learning_rate": 2.153329236177096e-05, + "loss": 0.1854, + "step": 2307 + }, + { + "epoch": 2.374485596707819, + "grad_norm": 7.689750671386719, + "learning_rate": 2.151846004817086e-05, + "loss": 0.2986, + "step": 2308 + }, + { + "epoch": 2.375514403292181, + "grad_norm": 3.769759178161621, + "learning_rate": 2.1503630740103627e-05, + "loss": 0.0676, + "step": 2309 + }, + { + "epoch": 2.376543209876543, + "grad_norm": 6.60612678527832, + "learning_rate": 2.1488804462127425e-05, + "loss": 0.1793, + "step": 2310 + }, + { + "epoch": 2.3775720164609053, + "grad_norm": 11.391586303710938, + "learning_rate": 2.147398123879541e-05, + "loss": 0.5551, + "step": 2311 + }, + { + "epoch": 2.3786008230452675, + "grad_norm": 1.5389795303344727, + "learning_rate": 2.1459161094655687e-05, + "loss": 0.0187, + "step": 2312 + }, + { + "epoch": 2.3796296296296298, + "grad_norm": 9.632418632507324, + "learning_rate": 2.1444344054251252e-05, + "loss": 0.4346, + "step": 2313 + }, + { + "epoch": 2.3806584362139915, + "grad_norm": 0.7773526310920715, + "learning_rate": 2.1429530142119952e-05, + "loss": 0.0341, + "step": 2314 + }, + { + "epoch": 2.3816872427983538, + "grad_norm": 5.237499237060547, + "learning_rate": 2.141471938279447e-05, + "loss": 0.1108, + "step": 2315 + }, + { + "epoch": 2.382716049382716, + "grad_norm": 8.371172904968262, + "learning_rate": 2.1399911800802263e-05, + "loss": 0.3826, + "step": 2316 + }, + { + "epoch": 2.383744855967078, + "grad_norm": 0.7036436200141907, + "learning_rate": 2.1385107420665525e-05, + "loss": 0.0205, + "step": 2317 + }, + { + "epoch": 2.3847736625514404, + "grad_norm": 7.287191867828369, + "learning_rate": 2.1370306266901146e-05, + "loss": 0.2393, + "step": 2318 + }, + { + "epoch": 2.3858024691358026, + "grad_norm": 4.022860527038574, + "learning_rate": 2.1355508364020674e-05, + "loss": 0.1191, + "step": 2319 + }, + { + "epoch": 2.386831275720165, + "grad_norm": 8.647807121276855, + "learning_rate": 2.1340713736530272e-05, + "loss": 0.4757, + "step": 2320 + }, + { + "epoch": 2.386831275720165, + "eval_Qnli-dev_cosine_accuracy": 0.7109375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7119765281677246, + "eval_Qnli-dev_cosine_ap": 0.7602990911520537, + "eval_Qnli-dev_cosine_f1": 0.7132075471698114, + "eval_Qnli-dev_cosine_f1_threshold": 0.6880638003349304, + "eval_Qnli-dev_cosine_precision": 0.6428571428571429, + "eval_Qnli-dev_cosine_recall": 0.8008474576271186, + "eval_Qnli-dev_dot_accuracy": 0.689453125, + "eval_Qnli-dev_dot_accuracy_threshold": 325.6799011230469, + "eval_Qnli-dev_dot_ap": 0.7217468530564131, + "eval_Qnli-dev_dot_f1": 0.6856187290969901, + "eval_Qnli-dev_dot_f1_threshold": 274.34625244140625, + "eval_Qnli-dev_dot_precision": 0.5662983425414365, + "eval_Qnli-dev_dot_recall": 0.8686440677966102, + "eval_Qnli-dev_euclidean_accuracy": 0.716796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.682184219360352, + "eval_Qnli-dev_euclidean_ap": 0.7659527366732183, + "eval_Qnli-dev_euclidean_f1": 0.7175572519083969, + "eval_Qnli-dev_euclidean_f1_threshold": 16.30190658569336, + "eval_Qnli-dev_euclidean_precision": 0.6527777777777778, + "eval_Qnli-dev_euclidean_recall": 0.7966101694915254, + "eval_Qnli-dev_manhattan_accuracy": 0.71875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 308.93658447265625, + "eval_Qnli-dev_manhattan_ap": 0.7675819061657483, + "eval_Qnli-dev_manhattan_f1": 0.717391304347826, + "eval_Qnli-dev_manhattan_f1_threshold": 351.19854736328125, + "eval_Qnli-dev_manhattan_precision": 0.6265822784810127, + "eval_Qnli-dev_manhattan_recall": 0.8389830508474576, + "eval_Qnli-dev_max_accuracy": 0.71875, + "eval_Qnli-dev_max_accuracy_threshold": 325.6799011230469, + "eval_Qnli-dev_max_ap": 0.7675819061657483, + "eval_Qnli-dev_max_f1": 0.7175572519083969, + "eval_Qnli-dev_max_f1_threshold": 351.19854736328125, + "eval_Qnli-dev_max_precision": 0.6527777777777778, + "eval_Qnli-dev_max_recall": 0.8686440677966102, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8598533868789673, + "eval_allNLI-dev_cosine_ap": 0.6381914221277092, + "eval_allNLI-dev_cosine_f1": 0.6361556064073227, + "eval_allNLI-dev_cosine_f1_threshold": 0.7326873540878296, + "eval_allNLI-dev_cosine_precision": 0.5265151515151515, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.70703125, + "eval_allNLI-dev_dot_accuracy_threshold": 359.8466796875, + "eval_allNLI-dev_dot_ap": 0.584074636820295, + "eval_allNLI-dev_dot_f1": 0.608695652173913, + "eval_allNLI-dev_dot_f1_threshold": 308.1065368652344, + "eval_allNLI-dev_dot_precision": 0.5037878787878788, + "eval_allNLI-dev_dot_recall": 0.7687861271676301, + "eval_allNLI-dev_euclidean_accuracy": 0.744140625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.918830871582031, + "eval_allNLI-dev_euclidean_ap": 0.6483938210018565, + "eval_allNLI-dev_euclidean_f1": 0.6467889908256881, + "eval_allNLI-dev_euclidean_f1_threshold": 15.164717674255371, + "eval_allNLI-dev_euclidean_precision": 0.5361216730038023, + "eval_allNLI-dev_euclidean_recall": 0.815028901734104, + "eval_allNLI-dev_manhattan_accuracy": 0.7421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 265.463623046875, + "eval_allNLI-dev_manhattan_ap": 0.6450488160783302, + "eval_allNLI-dev_manhattan_f1": 0.6393088552915768, + "eval_allNLI-dev_manhattan_f1_threshold": 328.457763671875, + "eval_allNLI-dev_manhattan_precision": 0.5103448275862069, + "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, + "eval_allNLI-dev_max_accuracy": 0.744140625, + "eval_allNLI-dev_max_accuracy_threshold": 359.8466796875, + "eval_allNLI-dev_max_ap": 0.6483938210018565, + "eval_allNLI-dev_max_f1": 0.6467889908256881, + "eval_allNLI-dev_max_f1_threshold": 328.457763671875, + "eval_allNLI-dev_max_precision": 0.5361216730038023, + "eval_allNLI-dev_max_recall": 0.8554913294797688, + "eval_sequential_score": 0.7675819061657483, + "eval_sts-test_pearson_cosine": 0.8508923448809838, + "eval_sts-test_pearson_dot": 0.8321847179139741, + "eval_sts-test_pearson_euclidean": 0.8767776571778518, + "eval_sts-test_pearson_manhattan": 0.8726214447799912, + "eval_sts-test_pearson_max": 0.8767776571778518, + "eval_sts-test_spearman_cosine": 0.8750843790108124, + "eval_sts-test_spearman_dot": 0.8275167864862678, + "eval_sts-test_spearman_euclidean": 0.8715298758210163, + "eval_sts-test_spearman_manhattan": 0.8675941366354268, + "eval_sts-test_spearman_max": 0.8750843790108124, + "eval_vitaminc-pairs_loss": 3.3888425827026367, + "eval_vitaminc-pairs_runtime": 3.2188, + "eval_vitaminc-pairs_samples_per_second": 39.766, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 2320 + }, + { + "epoch": 2.386831275720165, + "eval_negation-triplets_loss": 0.9331700205802917, + "eval_negation-triplets_runtime": 0.7626, + "eval_negation-triplets_samples_per_second": 167.851, + "eval_negation-triplets_steps_per_second": 1.311, + "step": 2320 + }, + { + "epoch": 2.386831275720165, + "eval_scitail-pairs-pos_loss": 0.1767732799053192, + "eval_scitail-pairs-pos_runtime": 0.9371, + "eval_scitail-pairs-pos_samples_per_second": 136.589, + "eval_scitail-pairs-pos_steps_per_second": 1.067, + "step": 2320 + }, + { + "epoch": 2.386831275720165, + "eval_scitail-pairs-qa_loss": 0.0004786076897289604, + "eval_scitail-pairs-qa_runtime": 0.6259, + "eval_scitail-pairs-qa_samples_per_second": 204.52, + "eval_scitail-pairs-qa_steps_per_second": 1.598, + "step": 2320 + }, + { + "epoch": 2.386831275720165, + "eval_xsum-pairs_loss": 0.24477504193782806, + "eval_xsum-pairs_runtime": 3.0559, + "eval_xsum-pairs_samples_per_second": 41.886, + "eval_xsum-pairs_steps_per_second": 0.327, + "step": 2320 + }, + { + "epoch": 2.386831275720165, + "eval_sciq_pairs_loss": 0.09893258661031723, + "eval_sciq_pairs_runtime": 3.5335, + "eval_sciq_pairs_samples_per_second": 36.225, + "eval_sciq_pairs_steps_per_second": 0.283, + "step": 2320 + }, + { + "epoch": 2.386831275720165, + "eval_qasc_pairs_loss": 0.14177586138248444, + "eval_qasc_pairs_runtime": 0.6247, + "eval_qasc_pairs_samples_per_second": 204.908, + "eval_qasc_pairs_steps_per_second": 1.601, + "step": 2320 + }, + { + "epoch": 2.386831275720165, + "eval_openbookqa_pairs_loss": 0.6997162103652954, + "eval_openbookqa_pairs_runtime": 0.6036, + "eval_openbookqa_pairs_samples_per_second": 212.059, + "eval_openbookqa_pairs_steps_per_second": 1.657, + "step": 2320 + }, + { + "epoch": 2.386831275720165, + "eval_msmarco_pairs_loss": 0.8556872010231018, + "eval_msmarco_pairs_runtime": 1.5328, + "eval_msmarco_pairs_samples_per_second": 83.509, + "eval_msmarco_pairs_steps_per_second": 0.652, + "step": 2320 + }, + { + "epoch": 2.386831275720165, + "eval_nq_pairs_loss": 0.6516047120094299, + "eval_nq_pairs_runtime": 2.9184, + "eval_nq_pairs_samples_per_second": 43.859, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 2320 + }, + { + "epoch": 2.386831275720165, + "eval_trivia_pairs_loss": 0.7871389389038086, + "eval_trivia_pairs_runtime": 3.4438, + "eval_trivia_pairs_samples_per_second": 37.168, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2320 + }, + { + "epoch": 2.386831275720165, + "eval_gooaq_pairs_loss": 0.3521522879600525, + "eval_gooaq_pairs_runtime": 0.9594, + "eval_gooaq_pairs_samples_per_second": 133.416, + "eval_gooaq_pairs_steps_per_second": 1.042, + "step": 2320 + }, + { + "epoch": 2.386831275720165, + "eval_paws-pos_loss": 0.022935805842280388, + "eval_paws-pos_runtime": 0.7123, + "eval_paws-pos_samples_per_second": 179.693, + "eval_paws-pos_steps_per_second": 1.404, + "step": 2320 + }, + { + "epoch": 2.386831275720165, + "eval_global_dataset_loss": 0.5043801665306091, + "eval_global_dataset_runtime": 13.4067, + "eval_global_dataset_samples_per_second": 31.029, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2320 + }, + { + "epoch": 2.3878600823045266, + "grad_norm": 8.141152381896973, + "learning_rate": 2.132592240893068e-05, + "loss": 0.2414, + "step": 2321 + }, + { + "epoch": 2.388888888888889, + "grad_norm": 5.903191566467285, + "learning_rate": 2.131113440571718e-05, + "loss": 0.2045, + "step": 2322 + }, + { + "epoch": 2.389917695473251, + "grad_norm": 7.989975452423096, + "learning_rate": 2.1296349751379535e-05, + "loss": 0.3283, + "step": 2323 + }, + { + "epoch": 2.3909465020576133, + "grad_norm": 9.426039695739746, + "learning_rate": 2.1281568470401975e-05, + "loss": 0.3372, + "step": 2324 + }, + { + "epoch": 2.3919753086419755, + "grad_norm": 0.242423877120018, + "learning_rate": 2.126679058726313e-05, + "loss": 0.0025, + "step": 2325 + }, + { + "epoch": 2.3930041152263373, + "grad_norm": 4.283985614776611, + "learning_rate": 2.1252016126436018e-05, + "loss": 0.092, + "step": 2326 + }, + { + "epoch": 2.3940329218106995, + "grad_norm": 6.385652542114258, + "learning_rate": 2.1237245112387977e-05, + "loss": 0.2207, + "step": 2327 + }, + { + "epoch": 2.3950617283950617, + "grad_norm": 10.515926361083984, + "learning_rate": 2.122247756958065e-05, + "loss": 0.4517, + "step": 2328 + }, + { + "epoch": 2.396090534979424, + "grad_norm": 10.573301315307617, + "learning_rate": 2.1207713522469914e-05, + "loss": 0.6023, + "step": 2329 + }, + { + "epoch": 2.397119341563786, + "grad_norm": 4.354704856872559, + "learning_rate": 2.119295299550587e-05, + "loss": 0.1475, + "step": 2330 + }, + { + "epoch": 2.398148148148148, + "grad_norm": 3.8229165077209473, + "learning_rate": 2.117819601313279e-05, + "loss": 0.0387, + "step": 2331 + }, + { + "epoch": 2.39917695473251, + "grad_norm": 3.688175916671753, + "learning_rate": 2.1163442599789068e-05, + "loss": 0.0988, + "step": 2332 + }, + { + "epoch": 2.4002057613168724, + "grad_norm": 0.6866854429244995, + "learning_rate": 2.114869277990719e-05, + "loss": 0.0434, + "step": 2333 + }, + { + "epoch": 2.4012345679012346, + "grad_norm": 6.986307621002197, + "learning_rate": 2.1133946577913697e-05, + "loss": 0.2121, + "step": 2334 + }, + { + "epoch": 2.402263374485597, + "grad_norm": 7.146950721740723, + "learning_rate": 2.1119204018229125e-05, + "loss": 0.2746, + "step": 2335 + }, + { + "epoch": 2.403292181069959, + "grad_norm": 2.9970691204071045, + "learning_rate": 2.1104465125268006e-05, + "loss": 0.0735, + "step": 2336 + }, + { + "epoch": 2.4043209876543212, + "grad_norm": 5.8535966873168945, + "learning_rate": 2.108972992343876e-05, + "loss": 0.2131, + "step": 2337 + }, + { + "epoch": 2.405349794238683, + "grad_norm": 0.47091808915138245, + "learning_rate": 2.1074998437143725e-05, + "loss": 0.0251, + "step": 2338 + }, + { + "epoch": 2.406378600823045, + "grad_norm": 5.7281599044799805, + "learning_rate": 2.1060270690779072e-05, + "loss": 0.1822, + "step": 2339 + }, + { + "epoch": 2.4074074074074074, + "grad_norm": 2.2652881145477295, + "learning_rate": 2.1045546708734784e-05, + "loss": 0.0657, + "step": 2340 + }, + { + "epoch": 2.4074074074074074, + "eval_Qnli-dev_cosine_accuracy": 0.712890625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.747207522392273, + "eval_Qnli-dev_cosine_ap": 0.7550750129926996, + "eval_Qnli-dev_cosine_f1": 0.7150837988826815, + "eval_Qnli-dev_cosine_f1_threshold": 0.6943259835243225, + "eval_Qnli-dev_cosine_precision": 0.6378737541528239, + "eval_Qnli-dev_cosine_recall": 0.8135593220338984, + "eval_Qnli-dev_dot_accuracy": 0.693359375, + "eval_Qnli-dev_dot_accuracy_threshold": 330.9173583984375, + "eval_Qnli-dev_dot_ap": 0.7157403439240755, + "eval_Qnli-dev_dot_f1": 0.6846543001686342, + "eval_Qnli-dev_dot_f1_threshold": 279.15374755859375, + "eval_Qnli-dev_dot_precision": 0.5686274509803921, + "eval_Qnli-dev_dot_recall": 0.8601694915254238, + "eval_Qnli-dev_euclidean_accuracy": 0.71875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.921449661254883, + "eval_Qnli-dev_euclidean_ap": 0.7598621951557832, + "eval_Qnli-dev_euclidean_f1": 0.71875, + "eval_Qnli-dev_euclidean_f1_threshold": 15.921449661254883, + "eval_Qnli-dev_euclidean_precision": 0.6666666666666666, + "eval_Qnli-dev_euclidean_recall": 0.7796610169491526, + "eval_Qnli-dev_manhattan_accuracy": 0.720703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 304.71258544921875, + "eval_Qnli-dev_manhattan_ap": 0.76479265959043, + "eval_Qnli-dev_manhattan_f1": 0.7161904761904763, + "eval_Qnli-dev_manhattan_f1_threshold": 337.3750915527344, + "eval_Qnli-dev_manhattan_precision": 0.6505190311418685, + "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, + "eval_Qnli-dev_max_accuracy": 0.720703125, + "eval_Qnli-dev_max_accuracy_threshold": 330.9173583984375, + "eval_Qnli-dev_max_ap": 0.76479265959043, + "eval_Qnli-dev_max_f1": 0.71875, + "eval_Qnli-dev_max_f1_threshold": 337.3750915527344, + "eval_Qnli-dev_max_precision": 0.6666666666666666, + "eval_Qnli-dev_max_recall": 0.8601694915254238, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8150783777236938, + "eval_allNLI-dev_cosine_ap": 0.6403760642877065, + "eval_allNLI-dev_cosine_f1": 0.6407766990291262, + "eval_allNLI-dev_cosine_f1_threshold": 0.7527122497558594, + "eval_allNLI-dev_cosine_precision": 0.5523012552301255, + "eval_allNLI-dev_cosine_recall": 0.7630057803468208, + "eval_allNLI-dev_dot_accuracy": 0.70703125, + "eval_allNLI-dev_dot_accuracy_threshold": 378.8970947265625, + "eval_allNLI-dev_dot_ap": 0.5821239761612493, + "eval_allNLI-dev_dot_f1": 0.6058091286307054, + "eval_allNLI-dev_dot_f1_threshold": 294.75982666015625, + "eval_allNLI-dev_dot_precision": 0.47249190938511326, + "eval_allNLI-dev_dot_recall": 0.8439306358381503, + "eval_allNLI-dev_euclidean_accuracy": 0.7421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.004294395446777, + "eval_allNLI-dev_euclidean_ap": 0.6506164162252022, + "eval_allNLI-dev_euclidean_f1": 0.6469248291571754, + "eval_allNLI-dev_euclidean_f1_threshold": 15.118417739868164, + "eval_allNLI-dev_euclidean_precision": 0.5338345864661654, + "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, + "eval_allNLI-dev_manhattan_accuracy": 0.740234375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 263.478515625, + "eval_allNLI-dev_manhattan_ap": 0.6493825145479308, + "eval_allNLI-dev_manhattan_f1": 0.6433260393873084, + "eval_allNLI-dev_manhattan_f1_threshold": 325.490234375, + "eval_allNLI-dev_manhattan_precision": 0.5176056338028169, + "eval_allNLI-dev_manhattan_recall": 0.8497109826589595, + "eval_allNLI-dev_max_accuracy": 0.7421875, + "eval_allNLI-dev_max_accuracy_threshold": 378.8970947265625, + "eval_allNLI-dev_max_ap": 0.6506164162252022, + "eval_allNLI-dev_max_f1": 0.6469248291571754, + "eval_allNLI-dev_max_f1_threshold": 325.490234375, + "eval_allNLI-dev_max_precision": 0.5523012552301255, + "eval_allNLI-dev_max_recall": 0.8497109826589595, + "eval_sequential_score": 0.76479265959043, + "eval_sts-test_pearson_cosine": 0.8505505641571589, + "eval_sts-test_pearson_dot": 0.8319891412078086, + "eval_sts-test_pearson_euclidean": 0.8756369096783705, + "eval_sts-test_pearson_manhattan": 0.8719660063964456, + "eval_sts-test_pearson_max": 0.8756369096783705, + "eval_sts-test_spearman_cosine": 0.8744222393472654, + "eval_sts-test_spearman_dot": 0.8277676620576281, + "eval_sts-test_spearman_euclidean": 0.8700285157525961, + "eval_sts-test_spearman_manhattan": 0.8667942078380557, + "eval_sts-test_spearman_max": 0.8744222393472654, + "eval_vitaminc-pairs_loss": 3.2281875610351562, + "eval_vitaminc-pairs_runtime": 3.3027, + "eval_vitaminc-pairs_samples_per_second": 38.756, + "eval_vitaminc-pairs_steps_per_second": 0.303, + "step": 2340 + }, + { + "epoch": 2.4074074074074074, + "eval_negation-triplets_loss": 0.9330846667289734, + "eval_negation-triplets_runtime": 0.8036, + "eval_negation-triplets_samples_per_second": 159.289, + "eval_negation-triplets_steps_per_second": 1.244, + "step": 2340 + }, + { + "epoch": 2.4074074074074074, + "eval_scitail-pairs-pos_loss": 0.15138240158557892, + "eval_scitail-pairs-pos_runtime": 1.0164, + "eval_scitail-pairs-pos_samples_per_second": 125.938, + "eval_scitail-pairs-pos_steps_per_second": 0.984, + "step": 2340 + }, + { + "epoch": 2.4074074074074074, + "eval_scitail-pairs-qa_loss": 0.0004746115591842681, + "eval_scitail-pairs-qa_runtime": 0.682, + "eval_scitail-pairs-qa_samples_per_second": 187.679, + "eval_scitail-pairs-qa_steps_per_second": 1.466, + "step": 2340 + }, + { + "epoch": 2.4074074074074074, + "eval_xsum-pairs_loss": 0.24052901566028595, + "eval_xsum-pairs_runtime": 3.058, + "eval_xsum-pairs_samples_per_second": 41.857, + "eval_xsum-pairs_steps_per_second": 0.327, + "step": 2340 + }, + { + "epoch": 2.4074074074074074, + "eval_sciq_pairs_loss": 0.09549580514431, + "eval_sciq_pairs_runtime": 3.5902, + "eval_sciq_pairs_samples_per_second": 35.653, + "eval_sciq_pairs_steps_per_second": 0.279, + "step": 2340 + }, + { + "epoch": 2.4074074074074074, + "eval_qasc_pairs_loss": 0.13644863665103912, + "eval_qasc_pairs_runtime": 0.6343, + "eval_qasc_pairs_samples_per_second": 201.796, + "eval_qasc_pairs_steps_per_second": 1.577, + "step": 2340 + }, + { + "epoch": 2.4074074074074074, + "eval_openbookqa_pairs_loss": 0.7199987769126892, + "eval_openbookqa_pairs_runtime": 0.6065, + "eval_openbookqa_pairs_samples_per_second": 211.057, + "eval_openbookqa_pairs_steps_per_second": 1.649, + "step": 2340 + }, + { + "epoch": 2.4074074074074074, + "eval_msmarco_pairs_loss": 0.8501226902008057, + "eval_msmarco_pairs_runtime": 1.54, + "eval_msmarco_pairs_samples_per_second": 83.115, + "eval_msmarco_pairs_steps_per_second": 0.649, + "step": 2340 + }, + { + "epoch": 2.4074074074074074, + "eval_nq_pairs_loss": 0.6119047999382019, + "eval_nq_pairs_runtime": 2.9156, + "eval_nq_pairs_samples_per_second": 43.902, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 2340 + }, + { + "epoch": 2.4074074074074074, + "eval_trivia_pairs_loss": 0.8336032629013062, + "eval_trivia_pairs_runtime": 3.4539, + "eval_trivia_pairs_samples_per_second": 37.06, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2340 + }, + { + "epoch": 2.4074074074074074, + "eval_gooaq_pairs_loss": 0.3352070450782776, + "eval_gooaq_pairs_runtime": 0.9839, + "eval_gooaq_pairs_samples_per_second": 130.091, + "eval_gooaq_pairs_steps_per_second": 1.016, + "step": 2340 + }, + { + "epoch": 2.4074074074074074, + "eval_paws-pos_loss": 0.022952904924750328, + "eval_paws-pos_runtime": 0.7263, + "eval_paws-pos_samples_per_second": 176.236, + "eval_paws-pos_steps_per_second": 1.377, + "step": 2340 + }, + { + "epoch": 2.4074074074074074, + "eval_global_dataset_loss": 0.4841943085193634, + "eval_global_dataset_runtime": 13.5471, + "eval_global_dataset_samples_per_second": 30.708, + "eval_global_dataset_steps_per_second": 0.295, + "step": 2340 + }, + { + "epoch": 2.4084362139917697, + "grad_norm": 8.437246322631836, + "learning_rate": 2.103082651539461e-05, + "loss": 0.3478, + "step": 2341 + }, + { + "epoch": 2.409465020576132, + "grad_norm": 6.477084159851074, + "learning_rate": 2.1016110135136024e-05, + "loss": 0.4349, + "step": 2342 + }, + { + "epoch": 2.4104938271604937, + "grad_norm": 10.612602233886719, + "learning_rate": 2.1001397592330188e-05, + "loss": 0.527, + "step": 2343 + }, + { + "epoch": 2.411522633744856, + "grad_norm": 2.255913019180298, + "learning_rate": 2.0986688911341888e-05, + "loss": 0.0234, + "step": 2344 + }, + { + "epoch": 2.412551440329218, + "grad_norm": 6.696039199829102, + "learning_rate": 2.0971984116529553e-05, + "loss": 0.3944, + "step": 2345 + }, + { + "epoch": 2.4135802469135803, + "grad_norm": 3.830223798751831, + "learning_rate": 2.0957283232245142e-05, + "loss": 0.167, + "step": 2346 + }, + { + "epoch": 2.4146090534979425, + "grad_norm": 5.635642051696777, + "learning_rate": 2.094258628283416e-05, + "loss": 0.2363, + "step": 2347 + }, + { + "epoch": 2.4156378600823043, + "grad_norm": 0.8433836102485657, + "learning_rate": 2.092789329263558e-05, + "loss": 0.0189, + "step": 2348 + }, + { + "epoch": 2.4166666666666665, + "grad_norm": 5.297120094299316, + "learning_rate": 2.091320428598183e-05, + "loss": 0.1442, + "step": 2349 + }, + { + "epoch": 2.4176954732510287, + "grad_norm": 8.437000274658203, + "learning_rate": 2.0898519287198733e-05, + "loss": 0.2991, + "step": 2350 + }, + { + "epoch": 2.418724279835391, + "grad_norm": 11.086387634277344, + "learning_rate": 2.088383832060548e-05, + "loss": 0.596, + "step": 2351 + }, + { + "epoch": 2.419753086419753, + "grad_norm": 3.563920497894287, + "learning_rate": 2.086916141051458e-05, + "loss": 0.0736, + "step": 2352 + }, + { + "epoch": 2.4207818930041154, + "grad_norm": 4.009937286376953, + "learning_rate": 2.0854488581231832e-05, + "loss": 0.072, + "step": 2353 + }, + { + "epoch": 2.4218106995884776, + "grad_norm": 3.8591458797454834, + "learning_rate": 2.0839819857056263e-05, + "loss": 0.0782, + "step": 2354 + }, + { + "epoch": 2.4228395061728394, + "grad_norm": 8.588478088378906, + "learning_rate": 2.082515526228012e-05, + "loss": 0.4359, + "step": 2355 + }, + { + "epoch": 2.4238683127572016, + "grad_norm": 5.57858943939209, + "learning_rate": 2.0810494821188793e-05, + "loss": 0.153, + "step": 2356 + }, + { + "epoch": 2.424897119341564, + "grad_norm": 6.5189690589904785, + "learning_rate": 2.0795838558060812e-05, + "loss": 0.1654, + "step": 2357 + }, + { + "epoch": 2.425925925925926, + "grad_norm": 5.958938121795654, + "learning_rate": 2.0781186497167762e-05, + "loss": 0.1721, + "step": 2358 + }, + { + "epoch": 2.4269547325102883, + "grad_norm": 8.767863273620605, + "learning_rate": 2.0766538662774305e-05, + "loss": 0.4326, + "step": 2359 + }, + { + "epoch": 2.42798353909465, + "grad_norm": 8.790294647216797, + "learning_rate": 2.075189507913807e-05, + "loss": 0.424, + "step": 2360 + }, + { + "epoch": 2.42798353909465, + "eval_Qnli-dev_cosine_accuracy": 0.71875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7381449937820435, + "eval_Qnli-dev_cosine_ap": 0.7455820885957152, + "eval_Qnli-dev_cosine_f1": 0.7046632124352332, + "eval_Qnli-dev_cosine_f1_threshold": 0.6711353063583374, + "eval_Qnli-dev_cosine_precision": 0.5947521865889213, + "eval_Qnli-dev_cosine_recall": 0.864406779661017, + "eval_Qnli-dev_dot_accuracy": 0.673828125, + "eval_Qnli-dev_dot_accuracy_threshold": 344.9980163574219, + "eval_Qnli-dev_dot_ap": 0.6941399390575902, + "eval_Qnli-dev_dot_f1": 0.6766169154228855, + "eval_Qnli-dev_dot_f1_threshold": 286.1156005859375, + "eval_Qnli-dev_dot_precision": 0.555858310626703, + "eval_Qnli-dev_dot_recall": 0.864406779661017, + "eval_Qnli-dev_euclidean_accuracy": 0.71484375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.076042175292969, + "eval_Qnli-dev_euclidean_ap": 0.7541186023300442, + "eval_Qnli-dev_euclidean_f1": 0.7079303675048356, + "eval_Qnli-dev_euclidean_f1_threshold": 15.928156852722168, + "eval_Qnli-dev_euclidean_precision": 0.6512455516014235, + "eval_Qnli-dev_euclidean_recall": 0.7754237288135594, + "eval_Qnli-dev_manhattan_accuracy": 0.712890625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 306.5092468261719, + "eval_Qnli-dev_manhattan_ap": 0.759303239951657, + "eval_Qnli-dev_manhattan_f1": 0.7153284671532847, + "eval_Qnli-dev_manhattan_f1_threshold": 344.607421875, + "eval_Qnli-dev_manhattan_precision": 0.6282051282051282, + "eval_Qnli-dev_manhattan_recall": 0.8305084745762712, + "eval_Qnli-dev_max_accuracy": 0.71875, + "eval_Qnli-dev_max_accuracy_threshold": 344.9980163574219, + "eval_Qnli-dev_max_ap": 0.759303239951657, + "eval_Qnli-dev_max_f1": 0.7153284671532847, + "eval_Qnli-dev_max_f1_threshold": 344.607421875, + "eval_Qnli-dev_max_precision": 0.6512455516014235, + "eval_Qnli-dev_max_recall": 0.864406779661017, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7885736227035522, + "eval_allNLI-dev_cosine_ap": 0.6295434410984027, + "eval_allNLI-dev_cosine_f1": 0.6414253897550111, + "eval_allNLI-dev_cosine_f1_threshold": 0.7364753484725952, + "eval_allNLI-dev_cosine_precision": 0.5217391304347826, + "eval_allNLI-dev_cosine_recall": 0.8323699421965318, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 366.090576171875, + "eval_allNLI-dev_dot_ap": 0.5672938930377167, + "eval_allNLI-dev_dot_f1": 0.6017699115044248, + "eval_allNLI-dev_dot_f1_threshold": 315.785888671875, + "eval_allNLI-dev_dot_precision": 0.4874551971326165, + "eval_allNLI-dev_dot_recall": 0.7861271676300579, + "eval_allNLI-dev_euclidean_accuracy": 0.7421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.185325622558594, + "eval_allNLI-dev_euclidean_ap": 0.6386797664225503, + "eval_allNLI-dev_euclidean_f1": 0.6442307692307693, + "eval_allNLI-dev_euclidean_f1_threshold": 14.415718078613281, + "eval_allNLI-dev_euclidean_precision": 0.551440329218107, + "eval_allNLI-dev_euclidean_recall": 0.7745664739884393, + "eval_allNLI-dev_manhattan_accuracy": 0.736328125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 266.48516845703125, + "eval_allNLI-dev_manhattan_ap": 0.6373586973705275, + "eval_allNLI-dev_manhattan_f1": 0.6425120772946861, + "eval_allNLI-dev_manhattan_f1_threshold": 305.5716247558594, + "eval_allNLI-dev_manhattan_precision": 0.5518672199170125, + "eval_allNLI-dev_manhattan_recall": 0.7687861271676301, + "eval_allNLI-dev_max_accuracy": 0.7421875, + "eval_allNLI-dev_max_accuracy_threshold": 366.090576171875, + "eval_allNLI-dev_max_ap": 0.6386797664225503, + "eval_allNLI-dev_max_f1": 0.6442307692307693, + "eval_allNLI-dev_max_f1_threshold": 315.785888671875, + "eval_allNLI-dev_max_precision": 0.5518672199170125, + "eval_allNLI-dev_max_recall": 0.8323699421965318, + "eval_sequential_score": 0.759303239951657, + "eval_sts-test_pearson_cosine": 0.8490502945589666, + "eval_sts-test_pearson_dot": 0.8297688891502646, + "eval_sts-test_pearson_euclidean": 0.8768496189760543, + "eval_sts-test_pearson_manhattan": 0.8733387541973802, + "eval_sts-test_pearson_max": 0.8768496189760543, + "eval_sts-test_spearman_cosine": 0.8766096810040769, + "eval_sts-test_spearman_dot": 0.8247449828582074, + "eval_sts-test_spearman_euclidean": 0.8736062448470544, + "eval_sts-test_spearman_manhattan": 0.8709237647367312, + "eval_sts-test_spearman_max": 0.8766096810040769, + "eval_vitaminc-pairs_loss": 3.183577537536621, + "eval_vitaminc-pairs_runtime": 3.2485, + "eval_vitaminc-pairs_samples_per_second": 39.403, + "eval_vitaminc-pairs_steps_per_second": 0.308, + "step": 2360 + }, + { + "epoch": 2.42798353909465, + "eval_negation-triplets_loss": 0.9375492334365845, + "eval_negation-triplets_runtime": 0.7707, + "eval_negation-triplets_samples_per_second": 166.088, + "eval_negation-triplets_steps_per_second": 1.298, + "step": 2360 + }, + { + "epoch": 2.42798353909465, + "eval_scitail-pairs-pos_loss": 0.1525314599275589, + "eval_scitail-pairs-pos_runtime": 0.9182, + "eval_scitail-pairs-pos_samples_per_second": 139.397, + "eval_scitail-pairs-pos_steps_per_second": 1.089, + "step": 2360 + }, + { + "epoch": 2.42798353909465, + "eval_scitail-pairs-qa_loss": 0.0005260963807813823, + "eval_scitail-pairs-qa_runtime": 0.6088, + "eval_scitail-pairs-qa_samples_per_second": 210.247, + "eval_scitail-pairs-qa_steps_per_second": 1.643, + "step": 2360 + }, + { + "epoch": 2.42798353909465, + "eval_xsum-pairs_loss": 0.27072906494140625, + "eval_xsum-pairs_runtime": 3.0331, + "eval_xsum-pairs_samples_per_second": 42.201, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 2360 + }, + { + "epoch": 2.42798353909465, + "eval_sciq_pairs_loss": 0.08968310803174973, + "eval_sciq_pairs_runtime": 3.5294, + "eval_sciq_pairs_samples_per_second": 36.267, + "eval_sciq_pairs_steps_per_second": 0.283, + "step": 2360 + }, + { + "epoch": 2.42798353909465, + "eval_qasc_pairs_loss": 0.14335228502750397, + "eval_qasc_pairs_runtime": 0.6387, + "eval_qasc_pairs_samples_per_second": 200.422, + "eval_qasc_pairs_steps_per_second": 1.566, + "step": 2360 + }, + { + "epoch": 2.42798353909465, + "eval_openbookqa_pairs_loss": 0.7194057106971741, + "eval_openbookqa_pairs_runtime": 0.6049, + "eval_openbookqa_pairs_samples_per_second": 211.617, + "eval_openbookqa_pairs_steps_per_second": 1.653, + "step": 2360 + }, + { + "epoch": 2.42798353909465, + "eval_msmarco_pairs_loss": 0.7818937301635742, + "eval_msmarco_pairs_runtime": 1.533, + "eval_msmarco_pairs_samples_per_second": 83.498, + "eval_msmarco_pairs_steps_per_second": 0.652, + "step": 2360 + }, + { + "epoch": 2.42798353909465, + "eval_nq_pairs_loss": 0.6466493606567383, + "eval_nq_pairs_runtime": 2.9087, + "eval_nq_pairs_samples_per_second": 44.006, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2360 + }, + { + "epoch": 2.42798353909465, + "eval_trivia_pairs_loss": 0.8176050782203674, + "eval_trivia_pairs_runtime": 3.4468, + "eval_trivia_pairs_samples_per_second": 37.136, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2360 + }, + { + "epoch": 2.42798353909465, + "eval_gooaq_pairs_loss": 0.33769553899765015, + "eval_gooaq_pairs_runtime": 0.9622, + "eval_gooaq_pairs_samples_per_second": 133.033, + "eval_gooaq_pairs_steps_per_second": 1.039, + "step": 2360 + }, + { + "epoch": 2.42798353909465, + "eval_paws-pos_loss": 0.022663207724690437, + "eval_paws-pos_runtime": 0.7105, + "eval_paws-pos_samples_per_second": 180.154, + "eval_paws-pos_steps_per_second": 1.407, + "step": 2360 + }, + { + "epoch": 2.42798353909465, + "eval_global_dataset_loss": 0.4742085337638855, + "eval_global_dataset_runtime": 13.4203, + "eval_global_dataset_samples_per_second": 30.998, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2360 + }, + { + "epoch": 2.4290123456790123, + "grad_norm": 0.2542645335197449, + "learning_rate": 2.0737255770509666e-05, + "loss": 0.0039, + "step": 2361 + }, + { + "epoch": 2.4300411522633745, + "grad_norm": 4.680812835693359, + "learning_rate": 2.0722620761132614e-05, + "loss": 0.1155, + "step": 2362 + }, + { + "epoch": 2.4310699588477367, + "grad_norm": 0.015261419117450714, + "learning_rate": 2.070799007524332e-05, + "loss": 0.0002, + "step": 2363 + }, + { + "epoch": 2.432098765432099, + "grad_norm": 6.4781174659729, + "learning_rate": 2.0693363737071018e-05, + "loss": 0.1702, + "step": 2364 + }, + { + "epoch": 2.4331275720164607, + "grad_norm": 3.860341787338257, + "learning_rate": 2.0678741770837764e-05, + "loss": 0.0731, + "step": 2365 + }, + { + "epoch": 2.434156378600823, + "grad_norm": 5.25972318649292, + "learning_rate": 2.0664124200758362e-05, + "loss": 0.1017, + "step": 2366 + }, + { + "epoch": 2.435185185185185, + "grad_norm": 8.531905174255371, + "learning_rate": 2.0649511051040328e-05, + "loss": 0.3434, + "step": 2367 + }, + { + "epoch": 2.4362139917695473, + "grad_norm": 0.18288291990756989, + "learning_rate": 2.0634902345883864e-05, + "loss": 0.0026, + "step": 2368 + }, + { + "epoch": 2.4372427983539096, + "grad_norm": 4.627861976623535, + "learning_rate": 2.0620298109481818e-05, + "loss": 0.1196, + "step": 2369 + }, + { + "epoch": 2.4382716049382718, + "grad_norm": 5.738020420074463, + "learning_rate": 2.060569836601963e-05, + "loss": 0.2487, + "step": 2370 + }, + { + "epoch": 2.439300411522634, + "grad_norm": 9.200518608093262, + "learning_rate": 2.0591103139675304e-05, + "loss": 0.4018, + "step": 2371 + }, + { + "epoch": 2.4403292181069958, + "grad_norm": 4.573552131652832, + "learning_rate": 2.0576512454619352e-05, + "loss": 0.09, + "step": 2372 + }, + { + "epoch": 2.441358024691358, + "grad_norm": 15.333046913146973, + "learning_rate": 2.0561926335014787e-05, + "loss": 1.9889, + "step": 2373 + }, + { + "epoch": 2.44238683127572, + "grad_norm": 4.621583461761475, + "learning_rate": 2.0547344805017036e-05, + "loss": 0.1348, + "step": 2374 + }, + { + "epoch": 2.4434156378600824, + "grad_norm": 4.141235828399658, + "learning_rate": 2.0532767888773946e-05, + "loss": 0.0876, + "step": 2375 + }, + { + "epoch": 2.4444444444444446, + "grad_norm": 0.08058172464370728, + "learning_rate": 2.0518195610425714e-05, + "loss": 0.0009, + "step": 2376 + }, + { + "epoch": 2.4454732510288064, + "grad_norm": 11.431347846984863, + "learning_rate": 2.050362799410485e-05, + "loss": 0.4829, + "step": 2377 + }, + { + "epoch": 2.4465020576131686, + "grad_norm": 6.493156909942627, + "learning_rate": 2.048906506393616e-05, + "loss": 0.1801, + "step": 2378 + }, + { + "epoch": 2.447530864197531, + "grad_norm": 11.291666030883789, + "learning_rate": 2.0474506844036678e-05, + "loss": 0.5785, + "step": 2379 + }, + { + "epoch": 2.448559670781893, + "grad_norm": 4.573147773742676, + "learning_rate": 2.045995335851564e-05, + "loss": 0.0832, + "step": 2380 + }, + { + "epoch": 2.448559670781893, + "eval_Qnli-dev_cosine_accuracy": 0.708984375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7453248500823975, + "eval_Qnli-dev_cosine_ap": 0.7515482666783928, + "eval_Qnli-dev_cosine_f1": 0.7071428571428572, + "eval_Qnli-dev_cosine_f1_threshold": 0.6855937242507935, + "eval_Qnli-dev_cosine_precision": 0.6111111111111112, + "eval_Qnli-dev_cosine_recall": 0.8389830508474576, + "eval_Qnli-dev_dot_accuracy": 0.685546875, + "eval_Qnli-dev_dot_accuracy_threshold": 351.0618591308594, + "eval_Qnli-dev_dot_ap": 0.6940425266526606, + "eval_Qnli-dev_dot_f1": 0.6785714285714286, + "eval_Qnli-dev_dot_f1_threshold": 282.13201904296875, + "eval_Qnli-dev_dot_precision": 0.55, + "eval_Qnli-dev_dot_recall": 0.885593220338983, + "eval_Qnli-dev_euclidean_accuracy": 0.7109375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.241870880126953, + "eval_Qnli-dev_euclidean_ap": 0.7581399600026446, + "eval_Qnli-dev_euclidean_f1": 0.7093235831809872, + "eval_Qnli-dev_euclidean_f1_threshold": 16.369251251220703, + "eval_Qnli-dev_euclidean_precision": 0.6237942122186495, + "eval_Qnli-dev_euclidean_recall": 0.8220338983050848, + "eval_Qnli-dev_manhattan_accuracy": 0.712890625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 309.08245849609375, + "eval_Qnli-dev_manhattan_ap": 0.7642020259418071, + "eval_Qnli-dev_manhattan_f1": 0.7108655616942909, + "eval_Qnli-dev_manhattan_f1_threshold": 342.5469970703125, + "eval_Qnli-dev_manhattan_precision": 0.6286644951140065, + "eval_Qnli-dev_manhattan_recall": 0.8177966101694916, + "eval_Qnli-dev_max_accuracy": 0.712890625, + "eval_Qnli-dev_max_accuracy_threshold": 351.0618591308594, + "eval_Qnli-dev_max_ap": 0.7642020259418071, + "eval_Qnli-dev_max_f1": 0.7108655616942909, + "eval_Qnli-dev_max_f1_threshold": 342.5469970703125, + "eval_Qnli-dev_max_precision": 0.6286644951140065, + "eval_Qnli-dev_max_recall": 0.885593220338983, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.788677990436554, + "eval_allNLI-dev_cosine_ap": 0.6268711298115903, + "eval_allNLI-dev_cosine_f1": 0.6441441441441441, + "eval_allNLI-dev_cosine_f1_threshold": 0.7389147281646729, + "eval_allNLI-dev_cosine_precision": 0.5276752767527675, + "eval_allNLI-dev_cosine_recall": 0.8265895953757225, + "eval_allNLI-dev_dot_accuracy": 0.69921875, + "eval_allNLI-dev_dot_accuracy_threshold": 370.53497314453125, + "eval_allNLI-dev_dot_ap": 0.5657282206617683, + "eval_allNLI-dev_dot_f1": 0.6053811659192826, + "eval_allNLI-dev_dot_f1_threshold": 311.272705078125, + "eval_allNLI-dev_dot_precision": 0.4945054945054945, + "eval_allNLI-dev_dot_recall": 0.7803468208092486, + "eval_allNLI-dev_euclidean_accuracy": 0.740234375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.105224609375, + "eval_allNLI-dev_euclidean_ap": 0.635176479184964, + "eval_allNLI-dev_euclidean_f1": 0.6452991452991454, + "eval_allNLI-dev_euclidean_f1_threshold": 15.597915649414062, + "eval_allNLI-dev_euclidean_precision": 0.511864406779661, + "eval_allNLI-dev_euclidean_recall": 0.8728323699421965, + "eval_allNLI-dev_manhattan_accuracy": 0.73828125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 266.0863037109375, + "eval_allNLI-dev_manhattan_ap": 0.6328374451791714, + "eval_allNLI-dev_manhattan_f1": 0.6428571428571429, + "eval_allNLI-dev_manhattan_f1_threshold": 317.72406005859375, + "eval_allNLI-dev_manhattan_precision": 0.5236363636363637, + "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, + "eval_allNLI-dev_max_accuracy": 0.740234375, + "eval_allNLI-dev_max_accuracy_threshold": 370.53497314453125, + "eval_allNLI-dev_max_ap": 0.635176479184964, + "eval_allNLI-dev_max_f1": 0.6452991452991454, + "eval_allNLI-dev_max_f1_threshold": 317.72406005859375, + "eval_allNLI-dev_max_precision": 0.5276752767527675, + "eval_allNLI-dev_max_recall": 0.8728323699421965, + "eval_sequential_score": 0.7642020259418071, + "eval_sts-test_pearson_cosine": 0.8453833948407055, + "eval_sts-test_pearson_dot": 0.8237173239473772, + "eval_sts-test_pearson_euclidean": 0.8742859955629088, + "eval_sts-test_pearson_manhattan": 0.8708241682671891, + "eval_sts-test_pearson_max": 0.8742859955629088, + "eval_sts-test_spearman_cosine": 0.8745370263327341, + "eval_sts-test_spearman_dot": 0.8180517156969415, + "eval_sts-test_spearman_euclidean": 0.8719163924656539, + "eval_sts-test_spearman_manhattan": 0.8684305377108321, + "eval_sts-test_spearman_max": 0.8745370263327341, + "eval_vitaminc-pairs_loss": 3.2834718227386475, + "eval_vitaminc-pairs_runtime": 3.2558, + "eval_vitaminc-pairs_samples_per_second": 39.315, + "eval_vitaminc-pairs_steps_per_second": 0.307, + "step": 2380 + }, + { + "epoch": 2.448559670781893, + "eval_negation-triplets_loss": 0.9011225700378418, + "eval_negation-triplets_runtime": 0.7828, + "eval_negation-triplets_samples_per_second": 163.52, + "eval_negation-triplets_steps_per_second": 1.278, + "step": 2380 + }, + { + "epoch": 2.448559670781893, + "eval_scitail-pairs-pos_loss": 0.14622709155082703, + "eval_scitail-pairs-pos_runtime": 0.954, + "eval_scitail-pairs-pos_samples_per_second": 134.173, + "eval_scitail-pairs-pos_steps_per_second": 1.048, + "step": 2380 + }, + { + "epoch": 2.448559670781893, + "eval_scitail-pairs-qa_loss": 0.0003964989446103573, + "eval_scitail-pairs-qa_runtime": 0.6281, + "eval_scitail-pairs-qa_samples_per_second": 203.779, + "eval_scitail-pairs-qa_steps_per_second": 1.592, + "step": 2380 + }, + { + "epoch": 2.448559670781893, + "eval_xsum-pairs_loss": 0.25987619161605835, + "eval_xsum-pairs_runtime": 3.0394, + "eval_xsum-pairs_samples_per_second": 42.114, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2380 + }, + { + "epoch": 2.448559670781893, + "eval_sciq_pairs_loss": 0.09321531653404236, + "eval_sciq_pairs_runtime": 3.5312, + "eval_sciq_pairs_samples_per_second": 36.249, + "eval_sciq_pairs_steps_per_second": 0.283, + "step": 2380 + }, + { + "epoch": 2.448559670781893, + "eval_qasc_pairs_loss": 0.14463794231414795, + "eval_qasc_pairs_runtime": 0.627, + "eval_qasc_pairs_samples_per_second": 204.134, + "eval_qasc_pairs_steps_per_second": 1.595, + "step": 2380 + }, + { + "epoch": 2.448559670781893, + "eval_openbookqa_pairs_loss": 0.6942688822746277, + "eval_openbookqa_pairs_runtime": 0.6113, + "eval_openbookqa_pairs_samples_per_second": 209.385, + "eval_openbookqa_pairs_steps_per_second": 1.636, + "step": 2380 + }, + { + "epoch": 2.448559670781893, + "eval_msmarco_pairs_loss": 0.7531794905662537, + "eval_msmarco_pairs_runtime": 1.5318, + "eval_msmarco_pairs_samples_per_second": 83.561, + "eval_msmarco_pairs_steps_per_second": 0.653, + "step": 2380 + }, + { + "epoch": 2.448559670781893, + "eval_nq_pairs_loss": 0.6968003511428833, + "eval_nq_pairs_runtime": 2.9055, + "eval_nq_pairs_samples_per_second": 44.055, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2380 + }, + { + "epoch": 2.448559670781893, + "eval_trivia_pairs_loss": 0.7705097198486328, + "eval_trivia_pairs_runtime": 3.4531, + "eval_trivia_pairs_samples_per_second": 37.068, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2380 + }, + { + "epoch": 2.448559670781893, + "eval_gooaq_pairs_loss": 0.3276014029979706, + "eval_gooaq_pairs_runtime": 0.9593, + "eval_gooaq_pairs_samples_per_second": 133.432, + "eval_gooaq_pairs_steps_per_second": 1.042, + "step": 2380 + }, + { + "epoch": 2.448559670781893, + "eval_paws-pos_loss": 0.02253994718194008, + "eval_paws-pos_runtime": 0.7171, + "eval_paws-pos_samples_per_second": 178.501, + "eval_paws-pos_steps_per_second": 1.395, + "step": 2380 + }, + { + "epoch": 2.448559670781893, + "eval_global_dataset_loss": 0.47114360332489014, + "eval_global_dataset_runtime": 13.4412, + "eval_global_dataset_samples_per_second": 30.95, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2380 + }, + { + "epoch": 2.4495884773662553, + "grad_norm": 14.15799331665039, + "learning_rate": 2.0445404631474436e-05, + "loss": 1.8056, + "step": 2381 + }, + { + "epoch": 2.450617283950617, + "grad_norm": 7.68344783782959, + "learning_rate": 2.0430860687006592e-05, + "loss": 0.3692, + "step": 2382 + }, + { + "epoch": 2.4516460905349793, + "grad_norm": 9.818589210510254, + "learning_rate": 2.0416321549197695e-05, + "loss": 0.5529, + "step": 2383 + }, + { + "epoch": 2.4526748971193415, + "grad_norm": 3.900679588317871, + "learning_rate": 2.0401787242125384e-05, + "loss": 0.0598, + "step": 2384 + }, + { + "epoch": 2.4537037037037037, + "grad_norm": 6.681536674499512, + "learning_rate": 2.0387257789859287e-05, + "loss": 0.2639, + "step": 2385 + }, + { + "epoch": 2.454732510288066, + "grad_norm": 8.679218292236328, + "learning_rate": 2.0372733216461014e-05, + "loss": 0.2728, + "step": 2386 + }, + { + "epoch": 2.455761316872428, + "grad_norm": 4.3105149269104, + "learning_rate": 2.0358213545984065e-05, + "loss": 0.1657, + "step": 2387 + }, + { + "epoch": 2.45679012345679, + "grad_norm": 2.6358437538146973, + "learning_rate": 2.034369880247384e-05, + "loss": 0.1206, + "step": 2388 + }, + { + "epoch": 2.457818930041152, + "grad_norm": 8.414362907409668, + "learning_rate": 2.032918900996758e-05, + "loss": 0.5121, + "step": 2389 + }, + { + "epoch": 2.4588477366255144, + "grad_norm": 5.507661819458008, + "learning_rate": 2.0314684192494322e-05, + "loss": 0.1428, + "step": 2390 + }, + { + "epoch": 2.4598765432098766, + "grad_norm": 8.246047973632812, + "learning_rate": 2.030018437407486e-05, + "loss": 0.4525, + "step": 2391 + }, + { + "epoch": 2.460905349794239, + "grad_norm": 0.056204650551080704, + "learning_rate": 2.028568957872172e-05, + "loss": 0.0008, + "step": 2392 + }, + { + "epoch": 2.461934156378601, + "grad_norm": 3.879879951477051, + "learning_rate": 2.0271199830439098e-05, + "loss": 0.0755, + "step": 2393 + }, + { + "epoch": 2.462962962962963, + "grad_norm": 8.25973129272461, + "learning_rate": 2.025671515322284e-05, + "loss": 0.4055, + "step": 2394 + }, + { + "epoch": 2.463991769547325, + "grad_norm": 8.683910369873047, + "learning_rate": 2.0242235571060384e-05, + "loss": 0.3136, + "step": 2395 + }, + { + "epoch": 2.4650205761316872, + "grad_norm": 3.877725124359131, + "learning_rate": 2.0227761107930747e-05, + "loss": 0.051, + "step": 2396 + }, + { + "epoch": 2.4660493827160495, + "grad_norm": 3.031614303588867, + "learning_rate": 2.0213291787804453e-05, + "loss": 0.0551, + "step": 2397 + }, + { + "epoch": 2.4670781893004117, + "grad_norm": 3.2729036808013916, + "learning_rate": 2.0198827634643523e-05, + "loss": 0.0787, + "step": 2398 + }, + { + "epoch": 2.4681069958847734, + "grad_norm": 5.738383769989014, + "learning_rate": 2.01843686724014e-05, + "loss": 0.1634, + "step": 2399 + }, + { + "epoch": 2.4691358024691357, + "grad_norm": 0.38787469267845154, + "learning_rate": 2.016991492502296e-05, + "loss": 0.0065, + "step": 2400 + }, + { + "epoch": 2.4691358024691357, + "eval_Qnli-dev_cosine_accuracy": 0.712890625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.750226616859436, + "eval_Qnli-dev_cosine_ap": 0.7494456144921429, + "eval_Qnli-dev_cosine_f1": 0.6999999999999998, + "eval_Qnli-dev_cosine_f1_threshold": 0.6726129055023193, + "eval_Qnli-dev_cosine_precision": 0.5769230769230769, + "eval_Qnli-dev_cosine_recall": 0.8898305084745762, + "eval_Qnli-dev_dot_accuracy": 0.67578125, + "eval_Qnli-dev_dot_accuracy_threshold": 351.0986328125, + "eval_Qnli-dev_dot_ap": 0.6983219066086047, + "eval_Qnli-dev_dot_f1": 0.6773162939297124, + "eval_Qnli-dev_dot_f1_threshold": 288.8251037597656, + "eval_Qnli-dev_dot_precision": 0.5435897435897435, + "eval_Qnli-dev_dot_recall": 0.8983050847457628, + "eval_Qnli-dev_euclidean_accuracy": 0.708984375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.695568084716797, + "eval_Qnli-dev_euclidean_ap": 0.7543898549379728, + "eval_Qnli-dev_euclidean_f1": 0.6984126984126984, + "eval_Qnli-dev_euclidean_f1_threshold": 16.431072235107422, + "eval_Qnli-dev_euclidean_precision": 0.5981873111782477, + "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, + "eval_Qnli-dev_manhattan_accuracy": 0.708984375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 300.5904235839844, + "eval_Qnli-dev_manhattan_ap": 0.7590128670443661, + "eval_Qnli-dev_manhattan_f1": 0.7042253521126761, + "eval_Qnli-dev_manhattan_f1_threshold": 346.5229187011719, + "eval_Qnli-dev_manhattan_precision": 0.6024096385542169, + "eval_Qnli-dev_manhattan_recall": 0.847457627118644, + "eval_Qnli-dev_max_accuracy": 0.712890625, + "eval_Qnli-dev_max_accuracy_threshold": 351.0986328125, + "eval_Qnli-dev_max_ap": 0.7590128670443661, + "eval_Qnli-dev_max_f1": 0.7042253521126761, + "eval_Qnli-dev_max_f1_threshold": 346.5229187011719, + "eval_Qnli-dev_max_precision": 0.6024096385542169, + "eval_Qnli-dev_max_recall": 0.8983050847457628, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.797395646572113, + "eval_allNLI-dev_cosine_ap": 0.6266402980803186, + "eval_allNLI-dev_cosine_f1": 0.6436285097192225, + "eval_allNLI-dev_cosine_f1_threshold": 0.7329103946685791, + "eval_allNLI-dev_cosine_precision": 0.5137931034482759, + "eval_allNLI-dev_cosine_recall": 0.861271676300578, + "eval_allNLI-dev_dot_accuracy": 0.697265625, + "eval_allNLI-dev_dot_accuracy_threshold": 398.2210388183594, + "eval_allNLI-dev_dot_ap": 0.5662089458229917, + "eval_allNLI-dev_dot_f1": 0.5990783410138248, + "eval_allNLI-dev_dot_f1_threshold": 319.97662353515625, + "eval_allNLI-dev_dot_precision": 0.49808429118773945, + "eval_allNLI-dev_dot_recall": 0.7514450867052023, + "eval_allNLI-dev_euclidean_accuracy": 0.73828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.369623184204102, + "eval_allNLI-dev_euclidean_ap": 0.6343492968170963, + "eval_allNLI-dev_euclidean_f1": 0.6460176991150444, + "eval_allNLI-dev_euclidean_f1_threshold": 15.137269973754883, + "eval_allNLI-dev_euclidean_precision": 0.5232974910394266, + "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, + "eval_allNLI-dev_manhattan_accuracy": 0.744140625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 259.2945556640625, + "eval_allNLI-dev_manhattan_ap": 0.6320388560985828, + "eval_allNLI-dev_manhattan_f1": 0.6363636363636364, + "eval_allNLI-dev_manhattan_f1_threshold": 321.5367736816406, + "eval_allNLI-dev_manhattan_precision": 0.5086505190311419, + "eval_allNLI-dev_manhattan_recall": 0.8497109826589595, + "eval_allNLI-dev_max_accuracy": 0.744140625, + "eval_allNLI-dev_max_accuracy_threshold": 398.2210388183594, + "eval_allNLI-dev_max_ap": 0.6343492968170963, + "eval_allNLI-dev_max_f1": 0.6460176991150444, + "eval_allNLI-dev_max_f1_threshold": 321.5367736816406, + "eval_allNLI-dev_max_precision": 0.5232974910394266, + "eval_allNLI-dev_max_recall": 0.861271676300578, + "eval_sequential_score": 0.7590128670443661, + "eval_sts-test_pearson_cosine": 0.8456032466147674, + "eval_sts-test_pearson_dot": 0.8275772102194185, + "eval_sts-test_pearson_euclidean": 0.8745086682855159, + "eval_sts-test_pearson_manhattan": 0.871147628347342, + "eval_sts-test_pearson_max": 0.8745086682855159, + "eval_sts-test_spearman_cosine": 0.8759779721982759, + "eval_sts-test_spearman_dot": 0.8257146084387749, + "eval_sts-test_spearman_euclidean": 0.8725826939965103, + "eval_sts-test_spearman_manhattan": 0.8695588512642933, + "eval_sts-test_spearman_max": 0.8759779721982759, + "eval_vitaminc-pairs_loss": 3.0925190448760986, + "eval_vitaminc-pairs_runtime": 3.2183, + "eval_vitaminc-pairs_samples_per_second": 39.773, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 2400 + }, + { + "epoch": 2.4691358024691357, + "eval_negation-triplets_loss": 0.8976351618766785, + "eval_negation-triplets_runtime": 0.7633, + "eval_negation-triplets_samples_per_second": 167.704, + "eval_negation-triplets_steps_per_second": 1.31, + "step": 2400 + }, + { + "epoch": 2.4691358024691357, + "eval_scitail-pairs-pos_loss": 0.1458752602338791, + "eval_scitail-pairs-pos_runtime": 0.939, + "eval_scitail-pairs-pos_samples_per_second": 136.31, + "eval_scitail-pairs-pos_steps_per_second": 1.065, + "step": 2400 + }, + { + "epoch": 2.4691358024691357, + "eval_scitail-pairs-qa_loss": 0.0004099968646187335, + "eval_scitail-pairs-qa_runtime": 0.6149, + "eval_scitail-pairs-qa_samples_per_second": 208.162, + "eval_scitail-pairs-qa_steps_per_second": 1.626, + "step": 2400 + }, + { + "epoch": 2.4691358024691357, + "eval_xsum-pairs_loss": 0.2671279311180115, + "eval_xsum-pairs_runtime": 3.0252, + "eval_xsum-pairs_samples_per_second": 42.312, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 2400 + }, + { + "epoch": 2.4691358024691357, + "eval_sciq_pairs_loss": 0.08947267383337021, + "eval_sciq_pairs_runtime": 3.519, + "eval_sciq_pairs_samples_per_second": 36.374, + "eval_sciq_pairs_steps_per_second": 0.284, + "step": 2400 + }, + { + "epoch": 2.4691358024691357, + "eval_qasc_pairs_loss": 0.15361525118350983, + "eval_qasc_pairs_runtime": 0.6252, + "eval_qasc_pairs_samples_per_second": 204.739, + "eval_qasc_pairs_steps_per_second": 1.6, + "step": 2400 + }, + { + "epoch": 2.4691358024691357, + "eval_openbookqa_pairs_loss": 0.6690425872802734, + "eval_openbookqa_pairs_runtime": 0.6103, + "eval_openbookqa_pairs_samples_per_second": 209.74, + "eval_openbookqa_pairs_steps_per_second": 1.639, + "step": 2400 + }, + { + "epoch": 2.4691358024691357, + "eval_msmarco_pairs_loss": 0.7587112188339233, + "eval_msmarco_pairs_runtime": 1.529, + "eval_msmarco_pairs_samples_per_second": 83.713, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 2400 + }, + { + "epoch": 2.4691358024691357, + "eval_nq_pairs_loss": 0.6752411723136902, + "eval_nq_pairs_runtime": 2.9092, + "eval_nq_pairs_samples_per_second": 43.998, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2400 + }, + { + "epoch": 2.4691358024691357, + "eval_trivia_pairs_loss": 0.7570594549179077, + "eval_trivia_pairs_runtime": 3.4514, + "eval_trivia_pairs_samples_per_second": 37.087, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2400 + }, + { + "epoch": 2.4691358024691357, + "eval_gooaq_pairs_loss": 0.29824191331863403, + "eval_gooaq_pairs_runtime": 0.9694, + "eval_gooaq_pairs_samples_per_second": 132.035, + "eval_gooaq_pairs_steps_per_second": 1.032, + "step": 2400 + }, + { + "epoch": 2.4691358024691357, + "eval_paws-pos_loss": 0.02212175540626049, + "eval_paws-pos_runtime": 0.7106, + "eval_paws-pos_samples_per_second": 180.14, + "eval_paws-pos_steps_per_second": 1.407, + "step": 2400 + }, + { + "epoch": 2.4691358024691357, + "eval_global_dataset_loss": 0.4253956079483032, + "eval_global_dataset_runtime": 13.4477, + "eval_global_dataset_samples_per_second": 30.935, + "eval_global_dataset_steps_per_second": 0.297, + "step": 2400 + }, + { + "epoch": 2.470164609053498, + "grad_norm": 5.478442668914795, + "learning_rate": 2.015546641644441e-05, + "loss": 0.1903, + "step": 2401 + }, + { + "epoch": 2.47119341563786, + "grad_norm": 4.002430438995361, + "learning_rate": 2.014102317059331e-05, + "loss": 0.0899, + "step": 2402 + }, + { + "epoch": 2.4722222222222223, + "grad_norm": 4.9087724685668945, + "learning_rate": 2.012658521138849e-05, + "loss": 0.1488, + "step": 2403 + }, + { + "epoch": 2.4732510288065845, + "grad_norm": 10.662351608276367, + "learning_rate": 2.0112152562740022e-05, + "loss": 0.5978, + "step": 2404 + }, + { + "epoch": 2.4742798353909463, + "grad_norm": 13.157684326171875, + "learning_rate": 2.009772524854919e-05, + "loss": 1.6215, + "step": 2405 + }, + { + "epoch": 2.4753086419753085, + "grad_norm": 2.8923072814941406, + "learning_rate": 2.008330329270845e-05, + "loss": 0.045, + "step": 2406 + }, + { + "epoch": 2.4763374485596708, + "grad_norm": 5.9885663986206055, + "learning_rate": 2.0068886719101372e-05, + "loss": 0.2916, + "step": 2407 + }, + { + "epoch": 2.477366255144033, + "grad_norm": 8.352883338928223, + "learning_rate": 2.0054475551602615e-05, + "loss": 0.4255, + "step": 2408 + }, + { + "epoch": 2.478395061728395, + "grad_norm": 0.45715031027793884, + "learning_rate": 2.0040069814077894e-05, + "loss": 0.0082, + "step": 2409 + }, + { + "epoch": 2.4794238683127574, + "grad_norm": 0.21182315051555634, + "learning_rate": 2.002566953038392e-05, + "loss": 0.0036, + "step": 2410 + }, + { + "epoch": 2.480452674897119, + "grad_norm": 4.614677906036377, + "learning_rate": 2.0011274724368375e-05, + "loss": 0.1344, + "step": 2411 + }, + { + "epoch": 2.4814814814814814, + "grad_norm": 5.564375877380371, + "learning_rate": 1.9996885419869886e-05, + "loss": 0.3181, + "step": 2412 + }, + { + "epoch": 2.4825102880658436, + "grad_norm": 6.444988250732422, + "learning_rate": 1.9982501640717944e-05, + "loss": 0.2268, + "step": 2413 + }, + { + "epoch": 2.483539094650206, + "grad_norm": 3.7375905513763428, + "learning_rate": 1.99681234107329e-05, + "loss": 0.063, + "step": 2414 + }, + { + "epoch": 2.484567901234568, + "grad_norm": 7.726154804229736, + "learning_rate": 1.9953750753725924e-05, + "loss": 0.2874, + "step": 2415 + }, + { + "epoch": 2.48559670781893, + "grad_norm": 4.854942798614502, + "learning_rate": 1.9939383693498947e-05, + "loss": 0.1126, + "step": 2416 + }, + { + "epoch": 2.486625514403292, + "grad_norm": 4.493672847747803, + "learning_rate": 1.992502225384463e-05, + "loss": 0.1051, + "step": 2417 + }, + { + "epoch": 2.4876543209876543, + "grad_norm": 0.311063289642334, + "learning_rate": 1.9910666458546334e-05, + "loss": 0.0042, + "step": 2418 + }, + { + "epoch": 2.4886831275720165, + "grad_norm": 6.245817184448242, + "learning_rate": 1.9896316331378068e-05, + "loss": 0.3168, + "step": 2419 + }, + { + "epoch": 2.4897119341563787, + "grad_norm": 2.906459331512451, + "learning_rate": 1.988197189610445e-05, + "loss": 0.0494, + "step": 2420 + }, + { + "epoch": 2.4897119341563787, + "eval_Qnli-dev_cosine_accuracy": 0.716796875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7279399037361145, + "eval_Qnli-dev_cosine_ap": 0.7563310893572132, + "eval_Qnli-dev_cosine_f1": 0.7014681892332789, + "eval_Qnli-dev_cosine_f1_threshold": 0.6324313879013062, + "eval_Qnli-dev_cosine_precision": 0.5702917771883289, + "eval_Qnli-dev_cosine_recall": 0.9110169491525424, + "eval_Qnli-dev_dot_accuracy": 0.68359375, + "eval_Qnli-dev_dot_accuracy_threshold": 339.02484130859375, + "eval_Qnli-dev_dot_ap": 0.7113151516571359, + "eval_Qnli-dev_dot_f1": 0.6840390879478827, + "eval_Qnli-dev_dot_f1_threshold": 269.8001708984375, + "eval_Qnli-dev_dot_precision": 0.5555555555555556, + "eval_Qnli-dev_dot_recall": 0.8898305084745762, + "eval_Qnli-dev_euclidean_accuracy": 0.712890625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.123961448669434, + "eval_Qnli-dev_euclidean_ap": 0.7617899957515049, + "eval_Qnli-dev_euclidean_f1": 0.7028862478777589, + "eval_Qnli-dev_euclidean_f1_threshold": 17.275789260864258, + "eval_Qnli-dev_euclidean_precision": 0.5864022662889519, + "eval_Qnli-dev_euclidean_recall": 0.8771186440677966, + "eval_Qnli-dev_manhattan_accuracy": 0.71875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 315.0229797363281, + "eval_Qnli-dev_manhattan_ap": 0.7656838953165184, + "eval_Qnli-dev_manhattan_f1": 0.7097902097902098, + "eval_Qnli-dev_manhattan_f1_threshold": 357.1176452636719, + "eval_Qnli-dev_manhattan_precision": 0.6041666666666666, + "eval_Qnli-dev_manhattan_recall": 0.8601694915254238, + "eval_Qnli-dev_max_accuracy": 0.71875, + "eval_Qnli-dev_max_accuracy_threshold": 339.02484130859375, + "eval_Qnli-dev_max_ap": 0.7656838953165184, + "eval_Qnli-dev_max_f1": 0.7097902097902098, + "eval_Qnli-dev_max_f1_threshold": 357.1176452636719, + "eval_Qnli-dev_max_precision": 0.6041666666666666, + "eval_Qnli-dev_max_recall": 0.9110169491525424, + "eval_allNLI-dev_cosine_accuracy": 0.7265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8329659700393677, + "eval_allNLI-dev_cosine_ap": 0.6336579252185592, + "eval_allNLI-dev_cosine_f1": 0.642369020501139, + "eval_allNLI-dev_cosine_f1_threshold": 0.729081928730011, + "eval_allNLI-dev_cosine_precision": 0.5300751879699248, + "eval_allNLI-dev_cosine_recall": 0.815028901734104, + "eval_allNLI-dev_dot_accuracy": 0.69921875, + "eval_allNLI-dev_dot_accuracy_threshold": 378.456787109375, + "eval_allNLI-dev_dot_ap": 0.5712809714617196, + "eval_allNLI-dev_dot_f1": 0.6015936254980079, + "eval_allNLI-dev_dot_f1_threshold": 282.9576416015625, + "eval_allNLI-dev_dot_precision": 0.45896656534954405, + "eval_allNLI-dev_dot_recall": 0.8728323699421965, + "eval_allNLI-dev_euclidean_accuracy": 0.73828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.69471263885498, + "eval_allNLI-dev_euclidean_ap": 0.6410698746813956, + "eval_allNLI-dev_euclidean_f1": 0.6417582417582418, + "eval_allNLI-dev_euclidean_f1_threshold": 15.446972846984863, + "eval_allNLI-dev_euclidean_precision": 0.5177304964539007, + "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, + "eval_allNLI-dev_manhattan_accuracy": 0.74609375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 265.9760437011719, + "eval_allNLI-dev_manhattan_ap": 0.6378198371784047, + "eval_allNLI-dev_manhattan_f1": 0.6394849785407726, + "eval_allNLI-dev_manhattan_f1_threshold": 329.0739440917969, + "eval_allNLI-dev_manhattan_precision": 0.5085324232081911, + "eval_allNLI-dev_manhattan_recall": 0.861271676300578, + "eval_allNLI-dev_max_accuracy": 0.74609375, + "eval_allNLI-dev_max_accuracy_threshold": 378.456787109375, + "eval_allNLI-dev_max_ap": 0.6410698746813956, + "eval_allNLI-dev_max_f1": 0.642369020501139, + "eval_allNLI-dev_max_f1_threshold": 329.0739440917969, + "eval_allNLI-dev_max_precision": 0.5300751879699248, + "eval_allNLI-dev_max_recall": 0.8728323699421965, + "eval_sequential_score": 0.7656838953165184, + "eval_sts-test_pearson_cosine": 0.8484670353258724, + "eval_sts-test_pearson_dot": 0.8294021934370874, + "eval_sts-test_pearson_euclidean": 0.874417885747803, + "eval_sts-test_pearson_manhattan": 0.8714963913215741, + "eval_sts-test_pearson_max": 0.874417885747803, + "eval_sts-test_spearman_cosine": 0.8739520378438258, + "eval_sts-test_spearman_dot": 0.8246635355624751, + "eval_sts-test_spearman_euclidean": 0.869761395473351, + "eval_sts-test_spearman_manhattan": 0.867299494330426, + "eval_sts-test_spearman_max": 0.8739520378438258, + "eval_vitaminc-pairs_loss": 3.1515254974365234, + "eval_vitaminc-pairs_runtime": 3.2604, + "eval_vitaminc-pairs_samples_per_second": 39.258, + "eval_vitaminc-pairs_steps_per_second": 0.307, + "step": 2420 + }, + { + "epoch": 2.4897119341563787, + "eval_negation-triplets_loss": 0.9106173515319824, + "eval_negation-triplets_runtime": 0.7727, + "eval_negation-triplets_samples_per_second": 165.663, + "eval_negation-triplets_steps_per_second": 1.294, + "step": 2420 + }, + { + "epoch": 2.4897119341563787, + "eval_scitail-pairs-pos_loss": 0.14981313049793243, + "eval_scitail-pairs-pos_runtime": 0.9357, + "eval_scitail-pairs-pos_samples_per_second": 136.79, + "eval_scitail-pairs-pos_steps_per_second": 1.069, + "step": 2420 + }, + { + "epoch": 2.4897119341563787, + "eval_scitail-pairs-qa_loss": 0.0003851282817777246, + "eval_scitail-pairs-qa_runtime": 0.6167, + "eval_scitail-pairs-qa_samples_per_second": 207.562, + "eval_scitail-pairs-qa_steps_per_second": 1.622, + "step": 2420 + }, + { + "epoch": 2.4897119341563787, + "eval_xsum-pairs_loss": 0.2592019736766815, + "eval_xsum-pairs_runtime": 3.0356, + "eval_xsum-pairs_samples_per_second": 42.167, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2420 + }, + { + "epoch": 2.4897119341563787, + "eval_sciq_pairs_loss": 0.10065734386444092, + "eval_sciq_pairs_runtime": 3.5374, + "eval_sciq_pairs_samples_per_second": 36.185, + "eval_sciq_pairs_steps_per_second": 0.283, + "step": 2420 + }, + { + "epoch": 2.4897119341563787, + "eval_qasc_pairs_loss": 0.1522054374217987, + "eval_qasc_pairs_runtime": 0.6254, + "eval_qasc_pairs_samples_per_second": 204.678, + "eval_qasc_pairs_steps_per_second": 1.599, + "step": 2420 + }, + { + "epoch": 2.4897119341563787, + "eval_openbookqa_pairs_loss": 0.6953917741775513, + "eval_openbookqa_pairs_runtime": 0.6118, + "eval_openbookqa_pairs_samples_per_second": 209.22, + "eval_openbookqa_pairs_steps_per_second": 1.635, + "step": 2420 + }, + { + "epoch": 2.4897119341563787, + "eval_msmarco_pairs_loss": 0.7634124755859375, + "eval_msmarco_pairs_runtime": 1.5293, + "eval_msmarco_pairs_samples_per_second": 83.697, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 2420 + }, + { + "epoch": 2.4897119341563787, + "eval_nq_pairs_loss": 0.5921059250831604, + "eval_nq_pairs_runtime": 2.9026, + "eval_nq_pairs_samples_per_second": 44.098, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 2420 + }, + { + "epoch": 2.4897119341563787, + "eval_trivia_pairs_loss": 0.8200302720069885, + "eval_trivia_pairs_runtime": 3.4533, + "eval_trivia_pairs_samples_per_second": 37.066, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2420 + }, + { + "epoch": 2.4897119341563787, + "eval_gooaq_pairs_loss": 0.3450007438659668, + "eval_gooaq_pairs_runtime": 0.9531, + "eval_gooaq_pairs_samples_per_second": 134.292, + "eval_gooaq_pairs_steps_per_second": 1.049, + "step": 2420 + }, + { + "epoch": 2.4897119341563787, + "eval_paws-pos_loss": 0.02186736650764942, + "eval_paws-pos_runtime": 0.7101, + "eval_paws-pos_samples_per_second": 180.246, + "eval_paws-pos_steps_per_second": 1.408, + "step": 2420 + }, + { + "epoch": 2.4897119341563787, + "eval_global_dataset_loss": 0.433958500623703, + "eval_global_dataset_runtime": 13.4125, + "eval_global_dataset_samples_per_second": 31.016, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2420 + }, + { + "epoch": 2.490740740740741, + "grad_norm": 7.837283611297607, + "learning_rate": 1.986763317648068e-05, + "loss": 0.2357, + "step": 2421 + }, + { + "epoch": 2.4917695473251027, + "grad_norm": 6.507171630859375, + "learning_rate": 1.985330019625249e-05, + "loss": 0.1965, + "step": 2422 + }, + { + "epoch": 2.492798353909465, + "grad_norm": 9.107889175415039, + "learning_rate": 1.983897297915611e-05, + "loss": 0.338, + "step": 2423 + }, + { + "epoch": 2.493827160493827, + "grad_norm": 1.823559045791626, + "learning_rate": 1.9824651548918213e-05, + "loss": 0.0235, + "step": 2424 + }, + { + "epoch": 2.4948559670781894, + "grad_norm": 6.848126411437988, + "learning_rate": 1.9810335929255904e-05, + "loss": 0.1863, + "step": 2425 + }, + { + "epoch": 2.4958847736625516, + "grad_norm": 6.037849426269531, + "learning_rate": 1.9796026143876657e-05, + "loss": 0.3568, + "step": 2426 + }, + { + "epoch": 2.496913580246914, + "grad_norm": 6.2176408767700195, + "learning_rate": 1.9781722216478288e-05, + "loss": 0.358, + "step": 2427 + }, + { + "epoch": 2.4979423868312756, + "grad_norm": 4.008078098297119, + "learning_rate": 1.976742417074891e-05, + "loss": 0.1074, + "step": 2428 + }, + { + "epoch": 2.498971193415638, + "grad_norm": 9.871790885925293, + "learning_rate": 1.9753132030366893e-05, + "loss": 0.4596, + "step": 2429 + }, + { + "epoch": 2.5, + "grad_norm": 11.414341926574707, + "learning_rate": 1.973884581900083e-05, + "loss": 0.6031, + "step": 2430 + }, + { + "epoch": 2.501028806584362, + "grad_norm": 5.77439546585083, + "learning_rate": 1.9724565560309505e-05, + "loss": 0.1052, + "step": 2431 + }, + { + "epoch": 2.5020576131687244, + "grad_norm": 9.882329940795898, + "learning_rate": 1.971029127794183e-05, + "loss": 0.4088, + "step": 2432 + }, + { + "epoch": 2.503086419753086, + "grad_norm": 8.163681030273438, + "learning_rate": 1.9696022995536813e-05, + "loss": 0.3387, + "step": 2433 + }, + { + "epoch": 2.5041152263374484, + "grad_norm": 4.288134574890137, + "learning_rate": 1.9681760736723547e-05, + "loss": 0.0472, + "step": 2434 + }, + { + "epoch": 2.5051440329218106, + "grad_norm": 4.319838047027588, + "learning_rate": 1.966750452512114e-05, + "loss": 0.0673, + "step": 2435 + }, + { + "epoch": 2.506172839506173, + "grad_norm": 6.5712690353393555, + "learning_rate": 1.9653254384338684e-05, + "loss": 0.4204, + "step": 2436 + }, + { + "epoch": 2.507201646090535, + "grad_norm": 5.69049596786499, + "learning_rate": 1.9639010337975223e-05, + "loss": 0.1546, + "step": 2437 + }, + { + "epoch": 2.508230452674897, + "grad_norm": 8.044054985046387, + "learning_rate": 1.962477240961969e-05, + "loss": 0.2912, + "step": 2438 + }, + { + "epoch": 2.5092592592592595, + "grad_norm": 0.8142794966697693, + "learning_rate": 1.9610540622850916e-05, + "loss": 0.0179, + "step": 2439 + }, + { + "epoch": 2.5102880658436213, + "grad_norm": 3.2492103576660156, + "learning_rate": 1.959631500123754e-05, + "loss": 0.0495, + "step": 2440 + }, + { + "epoch": 2.5102880658436213, + "eval_Qnli-dev_cosine_accuracy": 0.705078125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7196286916732788, + "eval_Qnli-dev_cosine_ap": 0.7484318389982814, + "eval_Qnli-dev_cosine_f1": 0.7011070110701106, + "eval_Qnli-dev_cosine_f1_threshold": 0.6782007217407227, + "eval_Qnli-dev_cosine_precision": 0.6209150326797386, + "eval_Qnli-dev_cosine_recall": 0.8050847457627118, + "eval_Qnli-dev_dot_accuracy": 0.677734375, + "eval_Qnli-dev_dot_accuracy_threshold": 338.0035705566406, + "eval_Qnli-dev_dot_ap": 0.6990954266604491, + "eval_Qnli-dev_dot_f1": 0.672566371681416, + "eval_Qnli-dev_dot_f1_threshold": 285.77838134765625, + "eval_Qnli-dev_dot_precision": 0.5775075987841946, + "eval_Qnli-dev_dot_recall": 0.8050847457627118, + "eval_Qnli-dev_euclidean_accuracy": 0.716796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.771583557128906, + "eval_Qnli-dev_euclidean_ap": 0.7580153060009305, + "eval_Qnli-dev_euclidean_f1": 0.7067669172932329, + "eval_Qnli-dev_euclidean_f1_threshold": 16.642635345458984, + "eval_Qnli-dev_euclidean_precision": 0.6351351351351351, + "eval_Qnli-dev_euclidean_recall": 0.7966101694915254, + "eval_Qnli-dev_manhattan_accuracy": 0.708984375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 327.4893798828125, + "eval_Qnli-dev_manhattan_ap": 0.7587013184358515, + "eval_Qnli-dev_manhattan_f1": 0.7032590051457975, + "eval_Qnli-dev_manhattan_f1_threshold": 367.247314453125, + "eval_Qnli-dev_manhattan_precision": 0.590778097982709, + "eval_Qnli-dev_manhattan_recall": 0.8686440677966102, + "eval_Qnli-dev_max_accuracy": 0.716796875, + "eval_Qnli-dev_max_accuracy_threshold": 338.0035705566406, + "eval_Qnli-dev_max_ap": 0.7587013184358515, + "eval_Qnli-dev_max_f1": 0.7067669172932329, + "eval_Qnli-dev_max_f1_threshold": 367.247314453125, + "eval_Qnli-dev_max_precision": 0.6351351351351351, + "eval_Qnli-dev_max_recall": 0.8686440677966102, + "eval_allNLI-dev_cosine_accuracy": 0.72265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8781421184539795, + "eval_allNLI-dev_cosine_ap": 0.6316663997422921, + "eval_allNLI-dev_cosine_f1": 0.6376146788990825, + "eval_allNLI-dev_cosine_f1_threshold": 0.7252265810966492, + "eval_allNLI-dev_cosine_precision": 0.5285171102661597, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 350.6553039550781, + "eval_allNLI-dev_dot_ap": 0.5659037596127712, + "eval_allNLI-dev_dot_f1": 0.5963302752293578, + "eval_allNLI-dev_dot_f1_threshold": 300.537109375, + "eval_allNLI-dev_dot_precision": 0.49429657794676807, + "eval_allNLI-dev_dot_recall": 0.7514450867052023, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.204728126525879, + "eval_allNLI-dev_euclidean_ap": 0.6398362687375942, + "eval_allNLI-dev_euclidean_f1": 0.641860465116279, + "eval_allNLI-dev_euclidean_f1_threshold": 15.186336517333984, + "eval_allNLI-dev_euclidean_precision": 0.5369649805447471, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.744140625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 284.4888000488281, + "eval_allNLI-dev_manhattan_ap": 0.6342854799220193, + "eval_allNLI-dev_manhattan_f1": 0.6349206349206349, + "eval_allNLI-dev_manhattan_f1_threshold": 321.4658508300781, + "eval_allNLI-dev_manhattan_precision": 0.5223880597014925, + "eval_allNLI-dev_manhattan_recall": 0.8092485549132948, + "eval_allNLI-dev_max_accuracy": 0.744140625, + "eval_allNLI-dev_max_accuracy_threshold": 350.6553039550781, + "eval_allNLI-dev_max_ap": 0.6398362687375942, + "eval_allNLI-dev_max_f1": 0.641860465116279, + "eval_allNLI-dev_max_f1_threshold": 321.4658508300781, + "eval_allNLI-dev_max_precision": 0.5369649805447471, + "eval_allNLI-dev_max_recall": 0.8092485549132948, + "eval_sequential_score": 0.7587013184358515, + "eval_sts-test_pearson_cosine": 0.8523364191500853, + "eval_sts-test_pearson_dot": 0.8391425584258833, + "eval_sts-test_pearson_euclidean": 0.8775257393178382, + "eval_sts-test_pearson_manhattan": 0.875433900765537, + "eval_sts-test_pearson_max": 0.8775257393178382, + "eval_sts-test_spearman_cosine": 0.8772542334218971, + "eval_sts-test_spearman_dot": 0.8356396326926823, + "eval_sts-test_spearman_euclidean": 0.873932750050166, + "eval_sts-test_spearman_manhattan": 0.8717948480397149, + "eval_sts-test_spearman_max": 0.8772542334218971, + "eval_vitaminc-pairs_loss": 3.189671754837036, + "eval_vitaminc-pairs_runtime": 3.6225, + "eval_vitaminc-pairs_samples_per_second": 35.334, + "eval_vitaminc-pairs_steps_per_second": 0.276, + "step": 2440 + }, + { + "epoch": 2.5102880658436213, + "eval_negation-triplets_loss": 0.9069141149520874, + "eval_negation-triplets_runtime": 0.7788, + "eval_negation-triplets_samples_per_second": 164.365, + "eval_negation-triplets_steps_per_second": 1.284, + "step": 2440 + }, + { + "epoch": 2.5102880658436213, + "eval_scitail-pairs-pos_loss": 0.14732320606708527, + "eval_scitail-pairs-pos_runtime": 0.9241, + "eval_scitail-pairs-pos_samples_per_second": 138.513, + "eval_scitail-pairs-pos_steps_per_second": 1.082, + "step": 2440 + }, + { + "epoch": 2.5102880658436213, + "eval_scitail-pairs-qa_loss": 0.0005962368450127542, + "eval_scitail-pairs-qa_runtime": 0.6459, + "eval_scitail-pairs-qa_samples_per_second": 198.171, + "eval_scitail-pairs-qa_steps_per_second": 1.548, + "step": 2440 + }, + { + "epoch": 2.5102880658436213, + "eval_xsum-pairs_loss": 0.3051843047142029, + "eval_xsum-pairs_runtime": 3.0561, + "eval_xsum-pairs_samples_per_second": 41.883, + "eval_xsum-pairs_steps_per_second": 0.327, + "step": 2440 + }, + { + "epoch": 2.5102880658436213, + "eval_sciq_pairs_loss": 0.09776122123003006, + "eval_sciq_pairs_runtime": 3.5714, + "eval_sciq_pairs_samples_per_second": 35.84, + "eval_sciq_pairs_steps_per_second": 0.28, + "step": 2440 + }, + { + "epoch": 2.5102880658436213, + "eval_qasc_pairs_loss": 0.15243121981620789, + "eval_qasc_pairs_runtime": 0.6261, + "eval_qasc_pairs_samples_per_second": 204.437, + "eval_qasc_pairs_steps_per_second": 1.597, + "step": 2440 + }, + { + "epoch": 2.5102880658436213, + "eval_openbookqa_pairs_loss": 0.7259861826896667, + "eval_openbookqa_pairs_runtime": 0.6061, + "eval_openbookqa_pairs_samples_per_second": 211.175, + "eval_openbookqa_pairs_steps_per_second": 1.65, + "step": 2440 + }, + { + "epoch": 2.5102880658436213, + "eval_msmarco_pairs_loss": 0.7631006836891174, + "eval_msmarco_pairs_runtime": 1.5348, + "eval_msmarco_pairs_samples_per_second": 83.399, + "eval_msmarco_pairs_steps_per_second": 0.652, + "step": 2440 + }, + { + "epoch": 2.5102880658436213, + "eval_nq_pairs_loss": 0.6975298523902893, + "eval_nq_pairs_runtime": 2.9019, + "eval_nq_pairs_samples_per_second": 44.109, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 2440 + }, + { + "epoch": 2.5102880658436213, + "eval_trivia_pairs_loss": 0.8489959239959717, + "eval_trivia_pairs_runtime": 3.4531, + "eval_trivia_pairs_samples_per_second": 37.068, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2440 + }, + { + "epoch": 2.5102880658436213, + "eval_gooaq_pairs_loss": 0.35373830795288086, + "eval_gooaq_pairs_runtime": 0.9612, + "eval_gooaq_pairs_samples_per_second": 133.168, + "eval_gooaq_pairs_steps_per_second": 1.04, + "step": 2440 + }, + { + "epoch": 2.5102880658436213, + "eval_paws-pos_loss": 0.021134065464138985, + "eval_paws-pos_runtime": 0.7168, + "eval_paws-pos_samples_per_second": 178.571, + "eval_paws-pos_steps_per_second": 1.395, + "step": 2440 + }, + { + "epoch": 2.5102880658436213, + "eval_global_dataset_loss": 0.4480046033859253, + "eval_global_dataset_runtime": 13.4322, + "eval_global_dataset_samples_per_second": 30.97, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2440 + }, + { + "epoch": 2.5113168724279835, + "grad_norm": 7.582404613494873, + "learning_rate": 1.958209556833799e-05, + "loss": 0.479, + "step": 2441 + }, + { + "epoch": 2.5123456790123457, + "grad_norm": 9.112093925476074, + "learning_rate": 1.956788234770046e-05, + "loss": 0.533, + "step": 2442 + }, + { + "epoch": 2.513374485596708, + "grad_norm": 4.532588481903076, + "learning_rate": 1.9553675362862837e-05, + "loss": 0.0853, + "step": 2443 + }, + { + "epoch": 2.51440329218107, + "grad_norm": 5.598708152770996, + "learning_rate": 1.9539474637352706e-05, + "loss": 0.0872, + "step": 2444 + }, + { + "epoch": 2.515432098765432, + "grad_norm": 16.361116409301758, + "learning_rate": 1.952528019468726e-05, + "loss": 2.0415, + "step": 2445 + }, + { + "epoch": 2.516460905349794, + "grad_norm": 1.0714730024337769, + "learning_rate": 1.9511092058373308e-05, + "loss": 0.0219, + "step": 2446 + }, + { + "epoch": 2.5174897119341564, + "grad_norm": 5.480446815490723, + "learning_rate": 1.94969102519072e-05, + "loss": 0.125, + "step": 2447 + }, + { + "epoch": 2.5185185185185186, + "grad_norm": 7.554908275604248, + "learning_rate": 1.9482734798774816e-05, + "loss": 0.3067, + "step": 2448 + }, + { + "epoch": 2.519547325102881, + "grad_norm": 8.397703170776367, + "learning_rate": 1.9468565722451504e-05, + "loss": 0.3694, + "step": 2449 + }, + { + "epoch": 2.5205761316872426, + "grad_norm": 10.139104843139648, + "learning_rate": 1.9454403046402057e-05, + "loss": 0.4156, + "step": 2450 + }, + { + "epoch": 2.521604938271605, + "grad_norm": 4.592851161956787, + "learning_rate": 1.944024679408067e-05, + "loss": 0.1045, + "step": 2451 + }, + { + "epoch": 2.522633744855967, + "grad_norm": 5.958555221557617, + "learning_rate": 1.9426096988930898e-05, + "loss": 0.2124, + "step": 2452 + }, + { + "epoch": 2.5236625514403292, + "grad_norm": 3.141430377960205, + "learning_rate": 1.941195365438561e-05, + "loss": 0.0836, + "step": 2453 + }, + { + "epoch": 2.5246913580246915, + "grad_norm": 3.026766538619995, + "learning_rate": 1.939781681386699e-05, + "loss": 0.0625, + "step": 2454 + }, + { + "epoch": 2.5257201646090532, + "grad_norm": 4.351663589477539, + "learning_rate": 1.9383686490786415e-05, + "loss": 0.173, + "step": 2455 + }, + { + "epoch": 2.526748971193416, + "grad_norm": 5.852506160736084, + "learning_rate": 1.9369562708544525e-05, + "loss": 0.2642, + "step": 2456 + }, + { + "epoch": 2.5277777777777777, + "grad_norm": 8.36409854888916, + "learning_rate": 1.9355445490531092e-05, + "loss": 0.3243, + "step": 2457 + }, + { + "epoch": 2.52880658436214, + "grad_norm": 10.044306755065918, + "learning_rate": 1.9341334860125038e-05, + "loss": 0.4475, + "step": 2458 + }, + { + "epoch": 2.529835390946502, + "grad_norm": 1.544262170791626, + "learning_rate": 1.9327230840694345e-05, + "loss": 0.0241, + "step": 2459 + }, + { + "epoch": 2.5308641975308643, + "grad_norm": 3.9103894233703613, + "learning_rate": 1.9313133455596088e-05, + "loss": 0.0623, + "step": 2460 + }, + { + "epoch": 2.5308641975308643, + "eval_Qnli-dev_cosine_accuracy": 0.712890625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7407504320144653, + "eval_Qnli-dev_cosine_ap": 0.752596488399803, + "eval_Qnli-dev_cosine_f1": 0.7054263565891473, + "eval_Qnli-dev_cosine_f1_threshold": 0.7087960243225098, + "eval_Qnli-dev_cosine_precision": 0.65, + "eval_Qnli-dev_cosine_recall": 0.7711864406779662, + "eval_Qnli-dev_dot_accuracy": 0.67578125, + "eval_Qnli-dev_dot_accuracy_threshold": 346.5068664550781, + "eval_Qnli-dev_dot_ap": 0.6927262375072062, + "eval_Qnli-dev_dot_f1": 0.6785714285714286, + "eval_Qnli-dev_dot_f1_threshold": 276.58709716796875, + "eval_Qnli-dev_dot_precision": 0.55, + "eval_Qnli-dev_dot_recall": 0.885593220338983, + "eval_Qnli-dev_euclidean_accuracy": 0.71875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.385201454162598, + "eval_Qnli-dev_euclidean_ap": 0.7633146911155302, + "eval_Qnli-dev_euclidean_f1": 0.7082568807339449, + "eval_Qnli-dev_euclidean_f1_threshold": 16.51651382446289, + "eval_Qnli-dev_euclidean_precision": 0.6245954692556634, + "eval_Qnli-dev_euclidean_recall": 0.8177966101694916, + "eval_Qnli-dev_manhattan_accuracy": 0.73046875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 328.64874267578125, + "eval_Qnli-dev_manhattan_ap": 0.7656719075878926, + "eval_Qnli-dev_manhattan_f1": 0.7160493827160493, + "eval_Qnli-dev_manhattan_f1_threshold": 328.64874267578125, + "eval_Qnli-dev_manhattan_precision": 0.696, + "eval_Qnli-dev_manhattan_recall": 0.7372881355932204, + "eval_Qnli-dev_max_accuracy": 0.73046875, + "eval_Qnli-dev_max_accuracy_threshold": 346.5068664550781, + "eval_Qnli-dev_max_ap": 0.7656719075878926, + "eval_Qnli-dev_max_f1": 0.7160493827160493, + "eval_Qnli-dev_max_f1_threshold": 328.64874267578125, + "eval_Qnli-dev_max_precision": 0.696, + "eval_Qnli-dev_max_recall": 0.885593220338983, + "eval_allNLI-dev_cosine_accuracy": 0.724609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8690991401672363, + "eval_allNLI-dev_cosine_ap": 0.6297108739162276, + "eval_allNLI-dev_cosine_f1": 0.6351351351351351, + "eval_allNLI-dev_cosine_f1_threshold": 0.7144136428833008, + "eval_allNLI-dev_cosine_precision": 0.5202952029520295, + "eval_allNLI-dev_cosine_recall": 0.815028901734104, + "eval_allNLI-dev_dot_accuracy": 0.705078125, + "eval_allNLI-dev_dot_accuracy_threshold": 356.1709899902344, + "eval_allNLI-dev_dot_ap": 0.5676168691745578, + "eval_allNLI-dev_dot_f1": 0.5977011494252873, + "eval_allNLI-dev_dot_f1_threshold": 300.47137451171875, + "eval_allNLI-dev_dot_precision": 0.4961832061068702, + "eval_allNLI-dev_dot_recall": 0.7514450867052023, + "eval_allNLI-dev_euclidean_accuracy": 0.73828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.693002700805664, + "eval_allNLI-dev_euclidean_ap": 0.6393368882621303, + "eval_allNLI-dev_euclidean_f1": 0.6331877729257641, + "eval_allNLI-dev_euclidean_f1_threshold": 15.953073501586914, + "eval_allNLI-dev_euclidean_precision": 0.5087719298245614, + "eval_allNLI-dev_euclidean_recall": 0.838150289017341, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 280.0594482421875, + "eval_allNLI-dev_manhattan_ap": 0.6374081391015416, + "eval_allNLI-dev_manhattan_f1": 0.6320541760722348, + "eval_allNLI-dev_manhattan_f1_threshold": 326.39691162109375, + "eval_allNLI-dev_manhattan_precision": 0.5185185185185185, + "eval_allNLI-dev_manhattan_recall": 0.8092485549132948, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 356.1709899902344, + "eval_allNLI-dev_max_ap": 0.6393368882621303, + "eval_allNLI-dev_max_f1": 0.6351351351351351, + "eval_allNLI-dev_max_f1_threshold": 326.39691162109375, + "eval_allNLI-dev_max_precision": 0.5202952029520295, + "eval_allNLI-dev_max_recall": 0.838150289017341, + "eval_sequential_score": 0.7656719075878926, + "eval_sts-test_pearson_cosine": 0.8530913606098072, + "eval_sts-test_pearson_dot": 0.8401484364504492, + "eval_sts-test_pearson_euclidean": 0.877896036519132, + "eval_sts-test_pearson_manhattan": 0.8764060505738172, + "eval_sts-test_pearson_max": 0.877896036519132, + "eval_sts-test_spearman_cosine": 0.8769788789105557, + "eval_sts-test_spearman_dot": 0.8340232171312247, + "eval_sts-test_spearman_euclidean": 0.8724001536012949, + "eval_sts-test_spearman_manhattan": 0.8710444141374423, + "eval_sts-test_spearman_max": 0.8769788789105557, + "eval_vitaminc-pairs_loss": 3.105875015258789, + "eval_vitaminc-pairs_runtime": 3.257, + "eval_vitaminc-pairs_samples_per_second": 39.3, + "eval_vitaminc-pairs_steps_per_second": 0.307, + "step": 2460 + }, + { + "epoch": 2.5308641975308643, + "eval_negation-triplets_loss": 0.9383314847946167, + "eval_negation-triplets_runtime": 0.7818, + "eval_negation-triplets_samples_per_second": 163.73, + "eval_negation-triplets_steps_per_second": 1.279, + "step": 2460 + }, + { + "epoch": 2.5308641975308643, + "eval_scitail-pairs-pos_loss": 0.14859730005264282, + "eval_scitail-pairs-pos_runtime": 0.9769, + "eval_scitail-pairs-pos_samples_per_second": 131.032, + "eval_scitail-pairs-pos_steps_per_second": 1.024, + "step": 2460 + }, + { + "epoch": 2.5308641975308643, + "eval_scitail-pairs-qa_loss": 0.00045218339073471725, + "eval_scitail-pairs-qa_runtime": 0.6264, + "eval_scitail-pairs-qa_samples_per_second": 204.357, + "eval_scitail-pairs-qa_steps_per_second": 1.597, + "step": 2460 + }, + { + "epoch": 2.5308641975308643, + "eval_xsum-pairs_loss": 0.2749998867511749, + "eval_xsum-pairs_runtime": 3.0382, + "eval_xsum-pairs_samples_per_second": 42.131, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2460 + }, + { + "epoch": 2.5308641975308643, + "eval_sciq_pairs_loss": 0.09291612356901169, + "eval_sciq_pairs_runtime": 3.5694, + "eval_sciq_pairs_samples_per_second": 35.861, + "eval_sciq_pairs_steps_per_second": 0.28, + "step": 2460 + }, + { + "epoch": 2.5308641975308643, + "eval_qasc_pairs_loss": 0.15513256192207336, + "eval_qasc_pairs_runtime": 0.6358, + "eval_qasc_pairs_samples_per_second": 201.332, + "eval_qasc_pairs_steps_per_second": 1.573, + "step": 2460 + }, + { + "epoch": 2.5308641975308643, + "eval_openbookqa_pairs_loss": 0.7656955718994141, + "eval_openbookqa_pairs_runtime": 0.6138, + "eval_openbookqa_pairs_samples_per_second": 208.537, + "eval_openbookqa_pairs_steps_per_second": 1.629, + "step": 2460 + }, + { + "epoch": 2.5308641975308643, + "eval_msmarco_pairs_loss": 0.6963688731193542, + "eval_msmarco_pairs_runtime": 1.5379, + "eval_msmarco_pairs_samples_per_second": 83.229, + "eval_msmarco_pairs_steps_per_second": 0.65, + "step": 2460 + }, + { + "epoch": 2.5308641975308643, + "eval_nq_pairs_loss": 0.5959857702255249, + "eval_nq_pairs_runtime": 2.9087, + "eval_nq_pairs_samples_per_second": 44.006, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2460 + }, + { + "epoch": 2.5308641975308643, + "eval_trivia_pairs_loss": 0.778878927230835, + "eval_trivia_pairs_runtime": 3.458, + "eval_trivia_pairs_samples_per_second": 37.015, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 2460 + }, + { + "epoch": 2.5308641975308643, + "eval_gooaq_pairs_loss": 0.3291349411010742, + "eval_gooaq_pairs_runtime": 0.9585, + "eval_gooaq_pairs_samples_per_second": 133.543, + "eval_gooaq_pairs_steps_per_second": 1.043, + "step": 2460 + }, + { + "epoch": 2.5308641975308643, + "eval_paws-pos_loss": 0.02157442830502987, + "eval_paws-pos_runtime": 0.7148, + "eval_paws-pos_samples_per_second": 179.068, + "eval_paws-pos_steps_per_second": 1.399, + "step": 2460 + }, + { + "epoch": 2.5308641975308643, + "eval_global_dataset_loss": 0.42489147186279297, + "eval_global_dataset_runtime": 13.4607, + "eval_global_dataset_samples_per_second": 30.905, + "eval_global_dataset_steps_per_second": 0.297, + "step": 2460 + }, + { + "epoch": 2.5318930041152266, + "grad_norm": 4.6252593994140625, + "learning_rate": 1.9299042728176326e-05, + "loss": 0.0896, + "step": 2461 + }, + { + "epoch": 2.5329218106995883, + "grad_norm": 2.53317928314209, + "learning_rate": 1.9284958681770098e-05, + "loss": 0.051, + "step": 2462 + }, + { + "epoch": 2.5339506172839505, + "grad_norm": 3.673436403274536, + "learning_rate": 1.9270881339701388e-05, + "loss": 0.0903, + "step": 2463 + }, + { + "epoch": 2.5349794238683128, + "grad_norm": 8.360755920410156, + "learning_rate": 1.9256810725283066e-05, + "loss": 0.3172, + "step": 2464 + }, + { + "epoch": 2.536008230452675, + "grad_norm": 3.940634250640869, + "learning_rate": 1.9242746861816868e-05, + "loss": 0.0968, + "step": 2465 + }, + { + "epoch": 2.537037037037037, + "grad_norm": 4.461543560028076, + "learning_rate": 1.922868977259335e-05, + "loss": 0.1176, + "step": 2466 + }, + { + "epoch": 2.538065843621399, + "grad_norm": 7.496928691864014, + "learning_rate": 1.921463948089184e-05, + "loss": 0.2366, + "step": 2467 + }, + { + "epoch": 2.539094650205761, + "grad_norm": 13.089897155761719, + "learning_rate": 1.9200596009980426e-05, + "loss": 1.5581, + "step": 2468 + }, + { + "epoch": 2.5401234567901234, + "grad_norm": 3.959763288497925, + "learning_rate": 1.9186559383115883e-05, + "loss": 0.1791, + "step": 2469 + }, + { + "epoch": 2.5411522633744856, + "grad_norm": 4.033388137817383, + "learning_rate": 1.9172529623543666e-05, + "loss": 0.1678, + "step": 2470 + }, + { + "epoch": 2.542181069958848, + "grad_norm": 4.85703706741333, + "learning_rate": 1.9158506754497846e-05, + "loss": 0.098, + "step": 2471 + }, + { + "epoch": 2.5432098765432096, + "grad_norm": 5.970437526702881, + "learning_rate": 1.9144490799201084e-05, + "loss": 0.2545, + "step": 2472 + }, + { + "epoch": 2.5442386831275723, + "grad_norm": 4.862588405609131, + "learning_rate": 1.91304817808646e-05, + "loss": 0.1202, + "step": 2473 + }, + { + "epoch": 2.545267489711934, + "grad_norm": 5.714596271514893, + "learning_rate": 1.9116479722688123e-05, + "loss": 0.1414, + "step": 2474 + }, + { + "epoch": 2.5462962962962963, + "grad_norm": 4.372839450836182, + "learning_rate": 1.9102484647859853e-05, + "loss": 0.0874, + "step": 2475 + }, + { + "epoch": 2.5473251028806585, + "grad_norm": 3.4210047721862793, + "learning_rate": 1.9088496579556417e-05, + "loss": 0.1012, + "step": 2476 + }, + { + "epoch": 2.5483539094650207, + "grad_norm": 5.6102824211120605, + "learning_rate": 1.907451554094286e-05, + "loss": 0.117, + "step": 2477 + }, + { + "epoch": 2.549382716049383, + "grad_norm": 0.21065284311771393, + "learning_rate": 1.9060541555172567e-05, + "loss": 0.002, + "step": 2478 + }, + { + "epoch": 2.5504115226337447, + "grad_norm": 2.502822160720825, + "learning_rate": 1.904657464538725e-05, + "loss": 0.0382, + "step": 2479 + }, + { + "epoch": 2.551440329218107, + "grad_norm": 11.489274978637695, + "learning_rate": 1.90326148347169e-05, + "loss": 0.6951, + "step": 2480 + }, + { + "epoch": 2.551440329218107, + "eval_Qnli-dev_cosine_accuracy": 0.70703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7235764265060425, + "eval_Qnli-dev_cosine_ap": 0.7540424832102679, + "eval_Qnli-dev_cosine_f1": 0.7028985507246377, + "eval_Qnli-dev_cosine_f1_threshold": 0.6627092361450195, + "eval_Qnli-dev_cosine_precision": 0.6139240506329114, + "eval_Qnli-dev_cosine_recall": 0.8220338983050848, + "eval_Qnli-dev_dot_accuracy": 0.677734375, + "eval_Qnli-dev_dot_accuracy_threshold": 327.3599853515625, + "eval_Qnli-dev_dot_ap": 0.7065210985613086, + "eval_Qnli-dev_dot_f1": 0.6797385620915033, + "eval_Qnli-dev_dot_f1_threshold": 267.470458984375, + "eval_Qnli-dev_dot_precision": 0.5531914893617021, + "eval_Qnli-dev_dot_recall": 0.8813559322033898, + "eval_Qnli-dev_euclidean_accuracy": 0.70703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.622426986694336, + "eval_Qnli-dev_euclidean_ap": 0.7608195910206452, + "eval_Qnli-dev_euclidean_f1": 0.7029126213592233, + "eval_Qnli-dev_euclidean_f1_threshold": 16.548377990722656, + "eval_Qnli-dev_euclidean_precision": 0.6487455197132617, + "eval_Qnli-dev_euclidean_recall": 0.7669491525423728, + "eval_Qnli-dev_manhattan_accuracy": 0.716796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 323.97406005859375, + "eval_Qnli-dev_manhattan_ap": 0.764546497634706, + "eval_Qnli-dev_manhattan_f1": 0.7022900763358778, + "eval_Qnli-dev_manhattan_f1_threshold": 349.9832458496094, + "eval_Qnli-dev_manhattan_precision": 0.6388888888888888, + "eval_Qnli-dev_manhattan_recall": 0.7796610169491526, + "eval_Qnli-dev_max_accuracy": 0.716796875, + "eval_Qnli-dev_max_accuracy_threshold": 327.3599853515625, + "eval_Qnli-dev_max_ap": 0.764546497634706, + "eval_Qnli-dev_max_f1": 0.7029126213592233, + "eval_Qnli-dev_max_f1_threshold": 349.9832458496094, + "eval_Qnli-dev_max_precision": 0.6487455197132617, + "eval_Qnli-dev_max_recall": 0.8813559322033898, + "eval_allNLI-dev_cosine_accuracy": 0.724609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8546959161758423, + "eval_allNLI-dev_cosine_ap": 0.6281747307519334, + "eval_allNLI-dev_cosine_f1": 0.6283185840707965, + "eval_allNLI-dev_cosine_f1_threshold": 0.6967825889587402, + "eval_allNLI-dev_cosine_precision": 0.5089605734767025, + "eval_allNLI-dev_cosine_recall": 0.8208092485549133, + "eval_allNLI-dev_dot_accuracy": 0.708984375, + "eval_allNLI-dev_dot_accuracy_threshold": 349.8631591796875, + "eval_allNLI-dev_dot_ap": 0.5628056401151449, + "eval_allNLI-dev_dot_f1": 0.5871121718377088, + "eval_allNLI-dev_dot_f1_threshold": 293.33551025390625, + "eval_allNLI-dev_dot_precision": 0.5, + "eval_allNLI-dev_dot_recall": 0.7109826589595376, + "eval_allNLI-dev_euclidean_accuracy": 0.732421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.06941032409668, + "eval_allNLI-dev_euclidean_ap": 0.6384589662241201, + "eval_allNLI-dev_euclidean_f1": 0.6483050847457628, + "eval_allNLI-dev_euclidean_f1_threshold": 16.427034378051758, + "eval_allNLI-dev_euclidean_precision": 0.5117056856187291, + "eval_allNLI-dev_euclidean_recall": 0.884393063583815, + "eval_allNLI-dev_manhattan_accuracy": 0.7265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 249.3073272705078, + "eval_allNLI-dev_manhattan_ap": 0.6353373798330103, + "eval_allNLI-dev_manhattan_f1": 0.6422018348623854, + "eval_allNLI-dev_manhattan_f1_threshold": 327.3839416503906, + "eval_allNLI-dev_manhattan_precision": 0.532319391634981, + "eval_allNLI-dev_manhattan_recall": 0.8092485549132948, + "eval_allNLI-dev_max_accuracy": 0.732421875, + "eval_allNLI-dev_max_accuracy_threshold": 349.8631591796875, + "eval_allNLI-dev_max_ap": 0.6384589662241201, + "eval_allNLI-dev_max_f1": 0.6483050847457628, + "eval_allNLI-dev_max_f1_threshold": 327.3839416503906, + "eval_allNLI-dev_max_precision": 0.532319391634981, + "eval_allNLI-dev_max_recall": 0.884393063583815, + "eval_sequential_score": 0.764546497634706, + "eval_sts-test_pearson_cosine": 0.8510650247778009, + "eval_sts-test_pearson_dot": 0.8371268204952556, + "eval_sts-test_pearson_euclidean": 0.8754890924438443, + "eval_sts-test_pearson_manhattan": 0.87360841059011, + "eval_sts-test_pearson_max": 0.8754890924438443, + "eval_sts-test_spearman_cosine": 0.876451932807672, + "eval_sts-test_spearman_dot": 0.831523143162333, + "eval_sts-test_spearman_euclidean": 0.8712764941790182, + "eval_sts-test_spearman_manhattan": 0.8695444861093868, + "eval_sts-test_spearman_max": 0.876451932807672, + "eval_vitaminc-pairs_loss": 3.1325862407684326, + "eval_vitaminc-pairs_runtime": 3.2327, + "eval_vitaminc-pairs_samples_per_second": 39.595, + "eval_vitaminc-pairs_steps_per_second": 0.309, + "step": 2480 + }, + { + "epoch": 2.551440329218107, + "eval_negation-triplets_loss": 0.9421901702880859, + "eval_negation-triplets_runtime": 0.7662, + "eval_negation-triplets_samples_per_second": 167.056, + "eval_negation-triplets_steps_per_second": 1.305, + "step": 2480 + }, + { + "epoch": 2.551440329218107, + "eval_scitail-pairs-pos_loss": 0.14855390787124634, + "eval_scitail-pairs-pos_runtime": 0.918, + "eval_scitail-pairs-pos_samples_per_second": 139.434, + "eval_scitail-pairs-pos_steps_per_second": 1.089, + "step": 2480 + }, + { + "epoch": 2.551440329218107, + "eval_scitail-pairs-qa_loss": 0.0005054974462836981, + "eval_scitail-pairs-qa_runtime": 0.6202, + "eval_scitail-pairs-qa_samples_per_second": 206.379, + "eval_scitail-pairs-qa_steps_per_second": 1.612, + "step": 2480 + }, + { + "epoch": 2.551440329218107, + "eval_xsum-pairs_loss": 0.29268449544906616, + "eval_xsum-pairs_runtime": 3.038, + "eval_xsum-pairs_samples_per_second": 42.134, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2480 + }, + { + "epoch": 2.551440329218107, + "eval_sciq_pairs_loss": 0.10132085531949997, + "eval_sciq_pairs_runtime": 3.5482, + "eval_sciq_pairs_samples_per_second": 36.074, + "eval_sciq_pairs_steps_per_second": 0.282, + "step": 2480 + }, + { + "epoch": 2.551440329218107, + "eval_qasc_pairs_loss": 0.1551382690668106, + "eval_qasc_pairs_runtime": 0.6228, + "eval_qasc_pairs_samples_per_second": 205.524, + "eval_qasc_pairs_steps_per_second": 1.606, + "step": 2480 + }, + { + "epoch": 2.551440329218107, + "eval_openbookqa_pairs_loss": 0.7559497952461243, + "eval_openbookqa_pairs_runtime": 0.6147, + "eval_openbookqa_pairs_samples_per_second": 208.226, + "eval_openbookqa_pairs_steps_per_second": 1.627, + "step": 2480 + }, + { + "epoch": 2.551440329218107, + "eval_msmarco_pairs_loss": 0.7051388621330261, + "eval_msmarco_pairs_runtime": 1.5364, + "eval_msmarco_pairs_samples_per_second": 83.312, + "eval_msmarco_pairs_steps_per_second": 0.651, + "step": 2480 + }, + { + "epoch": 2.551440329218107, + "eval_nq_pairs_loss": 0.667251467704773, + "eval_nq_pairs_runtime": 2.9014, + "eval_nq_pairs_samples_per_second": 44.116, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 2480 + }, + { + "epoch": 2.551440329218107, + "eval_trivia_pairs_loss": 0.7416086792945862, + "eval_trivia_pairs_runtime": 3.4386, + "eval_trivia_pairs_samples_per_second": 37.224, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 2480 + }, + { + "epoch": 2.551440329218107, + "eval_gooaq_pairs_loss": 0.3381101191043854, + "eval_gooaq_pairs_runtime": 0.9602, + "eval_gooaq_pairs_samples_per_second": 133.306, + "eval_gooaq_pairs_steps_per_second": 1.041, + "step": 2480 + }, + { + "epoch": 2.551440329218107, + "eval_paws-pos_loss": 0.021783526986837387, + "eval_paws-pos_runtime": 0.706, + "eval_paws-pos_samples_per_second": 181.293, + "eval_paws-pos_steps_per_second": 1.416, + "step": 2480 + }, + { + "epoch": 2.551440329218107, + "eval_global_dataset_loss": 0.4171276092529297, + "eval_global_dataset_runtime": 13.4191, + "eval_global_dataset_samples_per_second": 31.001, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2480 + }, + { + "epoch": 2.552469135802469, + "grad_norm": 3.4090147018432617, + "learning_rate": 1.901866214627976e-05, + "loss": 0.046, + "step": 2481 + }, + { + "epoch": 2.5534979423868314, + "grad_norm": 4.396731853485107, + "learning_rate": 1.900471660318227e-05, + "loss": 0.1056, + "step": 2482 + }, + { + "epoch": 2.5545267489711936, + "grad_norm": 5.821942329406738, + "learning_rate": 1.899077822851903e-05, + "loss": 0.1582, + "step": 2483 + }, + { + "epoch": 2.5555555555555554, + "grad_norm": 2.747316360473633, + "learning_rate": 1.8976847045372786e-05, + "loss": 0.041, + "step": 2484 + }, + { + "epoch": 2.5565843621399176, + "grad_norm": 3.0699405670166016, + "learning_rate": 1.896292307681436e-05, + "loss": 0.0631, + "step": 2485 + }, + { + "epoch": 2.55761316872428, + "grad_norm": 0.045280568301677704, + "learning_rate": 1.8949006345902635e-05, + "loss": 0.0004, + "step": 2486 + }, + { + "epoch": 2.558641975308642, + "grad_norm": 2.42556095123291, + "learning_rate": 1.8935096875684504e-05, + "loss": 0.0251, + "step": 2487 + }, + { + "epoch": 2.5596707818930042, + "grad_norm": 3.1204159259796143, + "learning_rate": 1.892119468919484e-05, + "loss": 0.0449, + "step": 2488 + }, + { + "epoch": 2.560699588477366, + "grad_norm": 6.2450761795043945, + "learning_rate": 1.8907299809456446e-05, + "loss": 0.2126, + "step": 2489 + }, + { + "epoch": 2.5617283950617287, + "grad_norm": 0.7859638929367065, + "learning_rate": 1.889341225948003e-05, + "loss": 0.0108, + "step": 2490 + }, + { + "epoch": 2.5627572016460904, + "grad_norm": 2.73036527633667, + "learning_rate": 1.8879532062264164e-05, + "loss": 0.0314, + "step": 2491 + }, + { + "epoch": 2.5637860082304527, + "grad_norm": 1.6652979850769043, + "learning_rate": 1.886565924079523e-05, + "loss": 0.0164, + "step": 2492 + }, + { + "epoch": 2.564814814814815, + "grad_norm": 7.437589168548584, + "learning_rate": 1.885179381804742e-05, + "loss": 0.2823, + "step": 2493 + }, + { + "epoch": 2.565843621399177, + "grad_norm": 3.439244508743286, + "learning_rate": 1.883793581698265e-05, + "loss": 0.0704, + "step": 2494 + }, + { + "epoch": 2.5668724279835393, + "grad_norm": 0.3689349889755249, + "learning_rate": 1.882408526055056e-05, + "loss": 0.0027, + "step": 2495 + }, + { + "epoch": 2.567901234567901, + "grad_norm": 6.352327346801758, + "learning_rate": 1.8810242171688445e-05, + "loss": 0.1596, + "step": 2496 + }, + { + "epoch": 2.5689300411522633, + "grad_norm": 2.9392714500427246, + "learning_rate": 1.879640657332125e-05, + "loss": 0.0535, + "step": 2497 + }, + { + "epoch": 2.5699588477366255, + "grad_norm": 6.382355213165283, + "learning_rate": 1.878257848836151e-05, + "loss": 0.2505, + "step": 2498 + }, + { + "epoch": 2.5709876543209877, + "grad_norm": 5.4166646003723145, + "learning_rate": 1.8768757939709314e-05, + "loss": 0.1589, + "step": 2499 + }, + { + "epoch": 2.57201646090535, + "grad_norm": 6.669981479644775, + "learning_rate": 1.8754944950252273e-05, + "loss": 0.2284, + "step": 2500 + }, + { + "epoch": 2.57201646090535, + "eval_Qnli-dev_cosine_accuracy": 0.703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7508093118667603, + "eval_Qnli-dev_cosine_ap": 0.75389562409459, + "eval_Qnli-dev_cosine_f1": 0.7065217391304347, + "eval_Qnli-dev_cosine_f1_threshold": 0.6699330806732178, + "eval_Qnli-dev_cosine_precision": 0.6170886075949367, + "eval_Qnli-dev_cosine_recall": 0.826271186440678, + "eval_Qnli-dev_dot_accuracy": 0.685546875, + "eval_Qnli-dev_dot_accuracy_threshold": 329.80792236328125, + "eval_Qnli-dev_dot_ap": 0.7032755465478735, + "eval_Qnli-dev_dot_f1": 0.6814814814814815, + "eval_Qnli-dev_dot_f1_threshold": 299.5719909667969, + "eval_Qnli-dev_dot_precision": 0.6052631578947368, + "eval_Qnli-dev_dot_recall": 0.7796610169491526, + "eval_Qnli-dev_euclidean_accuracy": 0.712890625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.958727836608887, + "eval_Qnli-dev_euclidean_ap": 0.7606991166855714, + "eval_Qnli-dev_euclidean_f1": 0.707635009310987, + "eval_Qnli-dev_euclidean_f1_threshold": 16.814437866210938, + "eval_Qnli-dev_euclidean_precision": 0.6312292358803987, + "eval_Qnli-dev_euclidean_recall": 0.8050847457627118, + "eval_Qnli-dev_manhattan_accuracy": 0.70703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 330.868896484375, + "eval_Qnli-dev_manhattan_ap": 0.7647529526432109, + "eval_Qnli-dev_manhattan_f1": 0.7080979284369116, + "eval_Qnli-dev_manhattan_f1_threshold": 351.5562744140625, + "eval_Qnli-dev_manhattan_precision": 0.6372881355932203, + "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, + "eval_Qnli-dev_max_accuracy": 0.712890625, + "eval_Qnli-dev_max_accuracy_threshold": 330.868896484375, + "eval_Qnli-dev_max_ap": 0.7647529526432109, + "eval_Qnli-dev_max_f1": 0.7080979284369116, + "eval_Qnli-dev_max_f1_threshold": 351.5562744140625, + "eval_Qnli-dev_max_precision": 0.6372881355932203, + "eval_Qnli-dev_max_recall": 0.826271186440678, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8467543125152588, + "eval_allNLI-dev_cosine_ap": 0.6302371293598495, + "eval_allNLI-dev_cosine_f1": 0.6396396396396398, + "eval_allNLI-dev_cosine_f1_threshold": 0.7136609554290771, + "eval_allNLI-dev_cosine_precision": 0.5239852398523985, + "eval_allNLI-dev_cosine_recall": 0.8208092485549133, + "eval_allNLI-dev_dot_accuracy": 0.708984375, + "eval_allNLI-dev_dot_accuracy_threshold": 344.5650939941406, + "eval_allNLI-dev_dot_ap": 0.5712397273103638, + "eval_allNLI-dev_dot_f1": 0.5934959349593496, + "eval_allNLI-dev_dot_f1_threshold": 274.1793212890625, + "eval_allNLI-dev_dot_precision": 0.45768025078369906, + "eval_allNLI-dev_dot_recall": 0.8439306358381503, + "eval_allNLI-dev_euclidean_accuracy": 0.728515625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.898115158081055, + "eval_allNLI-dev_euclidean_ap": 0.6395675204903274, + "eval_allNLI-dev_euclidean_f1": 0.6491228070175439, + "eval_allNLI-dev_euclidean_f1_threshold": 15.7131986618042, + "eval_allNLI-dev_euclidean_precision": 0.5229681978798587, + "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 248.5677032470703, + "eval_allNLI-dev_manhattan_ap": 0.6393176330798684, + "eval_allNLI-dev_manhattan_f1": 0.6471910112359551, + "eval_allNLI-dev_manhattan_f1_threshold": 323.1290283203125, + "eval_allNLI-dev_manhattan_precision": 0.5294117647058824, + "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, + "eval_allNLI-dev_max_accuracy": 0.732421875, + "eval_allNLI-dev_max_accuracy_threshold": 344.5650939941406, + "eval_allNLI-dev_max_ap": 0.6395675204903274, + "eval_allNLI-dev_max_f1": 0.6491228070175439, + "eval_allNLI-dev_max_f1_threshold": 323.1290283203125, + "eval_allNLI-dev_max_precision": 0.5294117647058824, + "eval_allNLI-dev_max_recall": 0.8554913294797688, + "eval_sequential_score": 0.7647529526432109, + "eval_sts-test_pearson_cosine": 0.8505961008679468, + "eval_sts-test_pearson_dot": 0.8358389714977501, + "eval_sts-test_pearson_euclidean": 0.8751416725717206, + "eval_sts-test_pearson_manhattan": 0.8729980849539773, + "eval_sts-test_pearson_max": 0.8751416725717206, + "eval_sts-test_spearman_cosine": 0.8766837443416795, + "eval_sts-test_spearman_dot": 0.8315599734504581, + "eval_sts-test_spearman_euclidean": 0.8714117772422043, + "eval_sts-test_spearman_manhattan": 0.8695461866573196, + "eval_sts-test_spearman_max": 0.8766837443416795, + "eval_vitaminc-pairs_loss": 3.2234039306640625, + "eval_vitaminc-pairs_runtime": 3.2109, + "eval_vitaminc-pairs_samples_per_second": 39.864, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 2500 + }, + { + "epoch": 2.57201646090535, + "eval_negation-triplets_loss": 0.9287065863609314, + "eval_negation-triplets_runtime": 0.7814, + "eval_negation-triplets_samples_per_second": 163.818, + "eval_negation-triplets_steps_per_second": 1.28, + "step": 2500 + }, + { + "epoch": 2.57201646090535, + "eval_scitail-pairs-pos_loss": 0.14092357456684113, + "eval_scitail-pairs-pos_runtime": 0.9253, + "eval_scitail-pairs-pos_samples_per_second": 138.329, + "eval_scitail-pairs-pos_steps_per_second": 1.081, + "step": 2500 + }, + { + "epoch": 2.57201646090535, + "eval_scitail-pairs-qa_loss": 0.0007257835823111236, + "eval_scitail-pairs-qa_runtime": 0.6388, + "eval_scitail-pairs-qa_samples_per_second": 200.389, + "eval_scitail-pairs-qa_steps_per_second": 1.566, + "step": 2500 + }, + { + "epoch": 2.57201646090535, + "eval_xsum-pairs_loss": 0.28815510869026184, + "eval_xsum-pairs_runtime": 3.0438, + "eval_xsum-pairs_samples_per_second": 42.053, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2500 + }, + { + "epoch": 2.57201646090535, + "eval_sciq_pairs_loss": 0.09281651675701141, + "eval_sciq_pairs_runtime": 3.5332, + "eval_sciq_pairs_samples_per_second": 36.228, + "eval_sciq_pairs_steps_per_second": 0.283, + "step": 2500 + }, + { + "epoch": 2.57201646090535, + "eval_qasc_pairs_loss": 0.14021874964237213, + "eval_qasc_pairs_runtime": 0.6203, + "eval_qasc_pairs_samples_per_second": 206.341, + "eval_qasc_pairs_steps_per_second": 1.612, + "step": 2500 + }, + { + "epoch": 2.57201646090535, + "eval_openbookqa_pairs_loss": 0.7466042637825012, + "eval_openbookqa_pairs_runtime": 0.5976, + "eval_openbookqa_pairs_samples_per_second": 214.181, + "eval_openbookqa_pairs_steps_per_second": 1.673, + "step": 2500 + }, + { + "epoch": 2.57201646090535, + "eval_msmarco_pairs_loss": 0.7611977458000183, + "eval_msmarco_pairs_runtime": 1.5286, + "eval_msmarco_pairs_samples_per_second": 83.739, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 2500 + }, + { + "epoch": 2.57201646090535, + "eval_nq_pairs_loss": 0.6677074432373047, + "eval_nq_pairs_runtime": 2.9071, + "eval_nq_pairs_samples_per_second": 44.031, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2500 + }, + { + "epoch": 2.57201646090535, + "eval_trivia_pairs_loss": 0.8220326900482178, + "eval_trivia_pairs_runtime": 3.4497, + "eval_trivia_pairs_samples_per_second": 37.105, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2500 + }, + { + "epoch": 2.57201646090535, + "eval_gooaq_pairs_loss": 0.34412798285484314, + "eval_gooaq_pairs_runtime": 0.9551, + "eval_gooaq_pairs_samples_per_second": 134.023, + "eval_gooaq_pairs_steps_per_second": 1.047, + "step": 2500 + }, + { + "epoch": 2.57201646090535, + "eval_paws-pos_loss": 0.02183370850980282, + "eval_paws-pos_runtime": 0.7089, + "eval_paws-pos_samples_per_second": 180.556, + "eval_paws-pos_steps_per_second": 1.411, + "step": 2500 + }, + { + "epoch": 2.57201646090535, + "eval_global_dataset_loss": 0.42913469672203064, + "eval_global_dataset_runtime": 13.4248, + "eval_global_dataset_samples_per_second": 30.987, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2500 + }, + { + "epoch": 2.5730452674897117, + "grad_norm": 6.166476726531982, + "learning_rate": 1.8741139542865475e-05, + "loss": 0.3454, + "step": 2501 + }, + { + "epoch": 2.574074074074074, + "grad_norm": 5.319652080535889, + "learning_rate": 1.8727341740411467e-05, + "loss": 0.1877, + "step": 2502 + }, + { + "epoch": 2.575102880658436, + "grad_norm": 4.261229991912842, + "learning_rate": 1.8713551565740183e-05, + "loss": 0.0757, + "step": 2503 + }, + { + "epoch": 2.5761316872427984, + "grad_norm": 9.842738151550293, + "learning_rate": 1.869976904168893e-05, + "loss": 0.5875, + "step": 2504 + }, + { + "epoch": 2.5771604938271606, + "grad_norm": 2.6120381355285645, + "learning_rate": 1.8685994191082353e-05, + "loss": 0.0567, + "step": 2505 + }, + { + "epoch": 2.5781893004115224, + "grad_norm": 10.804230690002441, + "learning_rate": 1.867222703673238e-05, + "loss": 0.4306, + "step": 2506 + }, + { + "epoch": 2.5792181069958846, + "grad_norm": 3.8133013248443604, + "learning_rate": 1.86584676014382e-05, + "loss": 0.0697, + "step": 2507 + }, + { + "epoch": 2.580246913580247, + "grad_norm": 3.132995367050171, + "learning_rate": 1.8644715907986223e-05, + "loss": 0.0485, + "step": 2508 + }, + { + "epoch": 2.581275720164609, + "grad_norm": 6.798681735992432, + "learning_rate": 1.8630971979150018e-05, + "loss": 0.2556, + "step": 2509 + }, + { + "epoch": 2.5823045267489713, + "grad_norm": 2.599490165710449, + "learning_rate": 1.8617235837690317e-05, + "loss": 0.126, + "step": 2510 + }, + { + "epoch": 2.5833333333333335, + "grad_norm": 3.5127954483032227, + "learning_rate": 1.860350750635495e-05, + "loss": 0.0554, + "step": 2511 + }, + { + "epoch": 2.5843621399176957, + "grad_norm": 8.277996063232422, + "learning_rate": 1.8589787007878803e-05, + "loss": 0.3234, + "step": 2512 + }, + { + "epoch": 2.5853909465020575, + "grad_norm": 2.4647581577301025, + "learning_rate": 1.8576074364983802e-05, + "loss": 0.0504, + "step": 2513 + }, + { + "epoch": 2.5864197530864197, + "grad_norm": 8.635161399841309, + "learning_rate": 1.856236960037886e-05, + "loss": 0.318, + "step": 2514 + }, + { + "epoch": 2.587448559670782, + "grad_norm": 4.386387348175049, + "learning_rate": 1.8548672736759843e-05, + "loss": 0.0953, + "step": 2515 + }, + { + "epoch": 2.588477366255144, + "grad_norm": 0.3948823809623718, + "learning_rate": 1.8534983796809533e-05, + "loss": 0.0039, + "step": 2516 + }, + { + "epoch": 2.5895061728395063, + "grad_norm": 3.7030575275421143, + "learning_rate": 1.8521302803197583e-05, + "loss": 0.071, + "step": 2517 + }, + { + "epoch": 2.590534979423868, + "grad_norm": 5.527285575866699, + "learning_rate": 1.8507629778580503e-05, + "loss": 0.1703, + "step": 2518 + }, + { + "epoch": 2.5915637860082303, + "grad_norm": 13.0263090133667, + "learning_rate": 1.8493964745601586e-05, + "loss": 0.7024, + "step": 2519 + }, + { + "epoch": 2.5925925925925926, + "grad_norm": 9.349004745483398, + "learning_rate": 1.8480307726890904e-05, + "loss": 0.435, + "step": 2520 + }, + { + "epoch": 2.5925925925925926, + "eval_Qnli-dev_cosine_accuracy": 0.708984375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7251886129379272, + "eval_Qnli-dev_cosine_ap": 0.748327681996432, + "eval_Qnli-dev_cosine_f1": 0.7020109689213895, + "eval_Qnli-dev_cosine_f1_threshold": 0.688901424407959, + "eval_Qnli-dev_cosine_precision": 0.617363344051447, + "eval_Qnli-dev_cosine_recall": 0.8135593220338984, + "eval_Qnli-dev_dot_accuracy": 0.6796875, + "eval_Qnli-dev_dot_accuracy_threshold": 340.33660888671875, + "eval_Qnli-dev_dot_ap": 0.6793311070321438, + "eval_Qnli-dev_dot_f1": 0.6759098786828422, + "eval_Qnli-dev_dot_f1_threshold": 289.5785217285156, + "eval_Qnli-dev_dot_precision": 0.5718475073313783, + "eval_Qnli-dev_dot_recall": 0.826271186440678, + "eval_Qnli-dev_euclidean_accuracy": 0.703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.005366325378418, + "eval_Qnli-dev_euclidean_ap": 0.7567749567491601, + "eval_Qnli-dev_euclidean_f1": 0.7054263565891473, + "eval_Qnli-dev_euclidean_f1_threshold": 16.299209594726562, + "eval_Qnli-dev_euclidean_precision": 0.65, + "eval_Qnli-dev_euclidean_recall": 0.7711864406779662, + "eval_Qnli-dev_manhattan_accuracy": 0.708984375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 323.7666320800781, + "eval_Qnli-dev_manhattan_ap": 0.7609706736263989, + "eval_Qnli-dev_manhattan_f1": 0.7060998151571164, + "eval_Qnli-dev_manhattan_f1_threshold": 351.2233581542969, + "eval_Qnli-dev_manhattan_precision": 0.6262295081967213, + "eval_Qnli-dev_manhattan_recall": 0.809322033898305, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 340.33660888671875, + "eval_Qnli-dev_max_ap": 0.7609706736263989, + "eval_Qnli-dev_max_f1": 0.7060998151571164, + "eval_Qnli-dev_max_f1_threshold": 351.2233581542969, + "eval_Qnli-dev_max_precision": 0.65, + "eval_Qnli-dev_max_recall": 0.826271186440678, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8538202047348022, + "eval_allNLI-dev_cosine_ap": 0.6214566976645772, + "eval_allNLI-dev_cosine_f1": 0.625, + "eval_allNLI-dev_cosine_f1_threshold": 0.7159340381622314, + "eval_allNLI-dev_cosine_precision": 0.509090909090909, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.701171875, + "eval_allNLI-dev_dot_accuracy_threshold": 353.05670166015625, + "eval_allNLI-dev_dot_ap": 0.5597025212427812, + "eval_allNLI-dev_dot_f1": 0.5868263473053892, + "eval_allNLI-dev_dot_f1_threshold": 270.795654296875, + "eval_allNLI-dev_dot_precision": 0.4481707317073171, + "eval_allNLI-dev_dot_recall": 0.8497109826589595, + "eval_allNLI-dev_euclidean_accuracy": 0.728515625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.772331237792969, + "eval_allNLI-dev_euclidean_ap": 0.6341426110785574, + "eval_allNLI-dev_euclidean_f1": 0.6391304347826088, + "eval_allNLI-dev_euclidean_f1_threshold": 15.549590110778809, + "eval_allNLI-dev_euclidean_precision": 0.5121951219512195, + "eval_allNLI-dev_euclidean_recall": 0.8497109826589595, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 244.70553588867188, + "eval_allNLI-dev_manhattan_ap": 0.6314637809247962, + "eval_allNLI-dev_manhattan_f1": 0.6369710467706012, + "eval_allNLI-dev_manhattan_f1_threshold": 323.2559509277344, + "eval_allNLI-dev_manhattan_precision": 0.5181159420289855, + "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, + "eval_allNLI-dev_max_accuracy": 0.732421875, + "eval_allNLI-dev_max_accuracy_threshold": 353.05670166015625, + "eval_allNLI-dev_max_ap": 0.6341426110785574, + "eval_allNLI-dev_max_f1": 0.6391304347826088, + "eval_allNLI-dev_max_f1_threshold": 323.2559509277344, + "eval_allNLI-dev_max_precision": 0.5181159420289855, + "eval_allNLI-dev_max_recall": 0.8497109826589595, + "eval_sequential_score": 0.7609706736263989, + "eval_sts-test_pearson_cosine": 0.8494417270705765, + "eval_sts-test_pearson_dot": 0.8306741764067529, + "eval_sts-test_pearson_euclidean": 0.8768724691331442, + "eval_sts-test_pearson_manhattan": 0.874886252119832, + "eval_sts-test_pearson_max": 0.8768724691331442, + "eval_sts-test_spearman_cosine": 0.8789788575333988, + "eval_sts-test_spearman_dot": 0.8285574770699249, + "eval_sts-test_spearman_euclidean": 0.8751109612600788, + "eval_sts-test_spearman_manhattan": 0.8725621531675317, + "eval_sts-test_spearman_max": 0.8789788575333988, + "eval_vitaminc-pairs_loss": 3.379549264907837, + "eval_vitaminc-pairs_runtime": 3.2572, + "eval_vitaminc-pairs_samples_per_second": 39.297, + "eval_vitaminc-pairs_steps_per_second": 0.307, + "step": 2520 + }, + { + "epoch": 2.5925925925925926, + "eval_negation-triplets_loss": 0.9225123524665833, + "eval_negation-triplets_runtime": 0.7796, + "eval_negation-triplets_samples_per_second": 164.176, + "eval_negation-triplets_steps_per_second": 1.283, + "step": 2520 + }, + { + "epoch": 2.5925925925925926, + "eval_scitail-pairs-pos_loss": 0.14036637544631958, + "eval_scitail-pairs-pos_runtime": 0.9591, + "eval_scitail-pairs-pos_samples_per_second": 133.464, + "eval_scitail-pairs-pos_steps_per_second": 1.043, + "step": 2520 + }, + { + "epoch": 2.5925925925925926, + "eval_scitail-pairs-qa_loss": 0.0007249915506690741, + "eval_scitail-pairs-qa_runtime": 0.6179, + "eval_scitail-pairs-qa_samples_per_second": 207.158, + "eval_scitail-pairs-qa_steps_per_second": 1.618, + "step": 2520 + }, + { + "epoch": 2.5925925925925926, + "eval_xsum-pairs_loss": 0.2940075397491455, + "eval_xsum-pairs_runtime": 3.0367, + "eval_xsum-pairs_samples_per_second": 42.151, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2520 + }, + { + "epoch": 2.5925925925925926, + "eval_sciq_pairs_loss": 0.08835644274950027, + "eval_sciq_pairs_runtime": 3.5629, + "eval_sciq_pairs_samples_per_second": 35.926, + "eval_sciq_pairs_steps_per_second": 0.281, + "step": 2520 + }, + { + "epoch": 2.5925925925925926, + "eval_qasc_pairs_loss": 0.13494905829429626, + "eval_qasc_pairs_runtime": 0.6263, + "eval_qasc_pairs_samples_per_second": 204.363, + "eval_qasc_pairs_steps_per_second": 1.597, + "step": 2520 + }, + { + "epoch": 2.5925925925925926, + "eval_openbookqa_pairs_loss": 0.8005498051643372, + "eval_openbookqa_pairs_runtime": 0.5979, + "eval_openbookqa_pairs_samples_per_second": 214.082, + "eval_openbookqa_pairs_steps_per_second": 1.673, + "step": 2520 + }, + { + "epoch": 2.5925925925925926, + "eval_msmarco_pairs_loss": 0.697590172290802, + "eval_msmarco_pairs_runtime": 1.5285, + "eval_msmarco_pairs_samples_per_second": 83.741, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 2520 + }, + { + "epoch": 2.5925925925925926, + "eval_nq_pairs_loss": 0.6648739576339722, + "eval_nq_pairs_runtime": 2.8993, + "eval_nq_pairs_samples_per_second": 44.148, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 2520 + }, + { + "epoch": 2.5925925925925926, + "eval_trivia_pairs_loss": 0.8040751814842224, + "eval_trivia_pairs_runtime": 3.4418, + "eval_trivia_pairs_samples_per_second": 37.19, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 2520 + }, + { + "epoch": 2.5925925925925926, + "eval_gooaq_pairs_loss": 0.3208771347999573, + "eval_gooaq_pairs_runtime": 0.9599, + "eval_gooaq_pairs_samples_per_second": 133.345, + "eval_gooaq_pairs_steps_per_second": 1.042, + "step": 2520 + }, + { + "epoch": 2.5925925925925926, + "eval_paws-pos_loss": 0.021342573687434196, + "eval_paws-pos_runtime": 0.7082, + "eval_paws-pos_samples_per_second": 180.736, + "eval_paws-pos_steps_per_second": 1.412, + "step": 2520 + }, + { + "epoch": 2.5925925925925926, + "eval_global_dataset_loss": 0.4530204236507416, + "eval_global_dataset_runtime": 13.4367, + "eval_global_dataset_samples_per_second": 30.96, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2520 + }, + { + "epoch": 2.593621399176955, + "grad_norm": 1.7578164339065552, + "learning_rate": 1.8466658745065253e-05, + "loss": 0.0269, + "step": 2521 + }, + { + "epoch": 2.594650205761317, + "grad_norm": 2.2386653423309326, + "learning_rate": 1.845301782272812e-05, + "loss": 0.0366, + "step": 2522 + }, + { + "epoch": 2.5956790123456788, + "grad_norm": 4.242228984832764, + "learning_rate": 1.843938498246964e-05, + "loss": 0.0975, + "step": 2523 + }, + { + "epoch": 2.596707818930041, + "grad_norm": 8.557851791381836, + "learning_rate": 1.8425760246866573e-05, + "loss": 0.3245, + "step": 2524 + }, + { + "epoch": 2.597736625514403, + "grad_norm": 9.70186996459961, + "learning_rate": 1.8412143638482252e-05, + "loss": 0.4127, + "step": 2525 + }, + { + "epoch": 2.5987654320987654, + "grad_norm": 2.1967456340789795, + "learning_rate": 1.8398535179866544e-05, + "loss": 0.0285, + "step": 2526 + }, + { + "epoch": 2.5997942386831276, + "grad_norm": 4.0927815437316895, + "learning_rate": 1.8384934893555843e-05, + "loss": 0.0737, + "step": 2527 + }, + { + "epoch": 2.60082304526749, + "grad_norm": 9.168094635009766, + "learning_rate": 1.837134280207297e-05, + "loss": 0.3748, + "step": 2528 + }, + { + "epoch": 2.601851851851852, + "grad_norm": 11.000916481018066, + "learning_rate": 1.835775892792721e-05, + "loss": 0.4902, + "step": 2529 + }, + { + "epoch": 2.602880658436214, + "grad_norm": 0.0416768416762352, + "learning_rate": 1.8344183293614233e-05, + "loss": 0.0005, + "step": 2530 + }, + { + "epoch": 2.603909465020576, + "grad_norm": 11.488398551940918, + "learning_rate": 1.833061592161605e-05, + "loss": 1.1583, + "step": 2531 + }, + { + "epoch": 2.6049382716049383, + "grad_norm": 2.7752737998962402, + "learning_rate": 1.8317056834400997e-05, + "loss": 0.0334, + "step": 2532 + }, + { + "epoch": 2.6059670781893005, + "grad_norm": 2.4390814304351807, + "learning_rate": 1.8303506054423688e-05, + "loss": 0.053, + "step": 2533 + }, + { + "epoch": 2.6069958847736627, + "grad_norm": 3.8222033977508545, + "learning_rate": 1.8289963604124984e-05, + "loss": 0.1288, + "step": 2534 + }, + { + "epoch": 2.6080246913580245, + "grad_norm": 5.402822971343994, + "learning_rate": 1.8276429505931945e-05, + "loss": 0.1425, + "step": 2535 + }, + { + "epoch": 2.6090534979423867, + "grad_norm": 4.8009562492370605, + "learning_rate": 1.8262903782257816e-05, + "loss": 0.1451, + "step": 2536 + }, + { + "epoch": 2.610082304526749, + "grad_norm": 5.965047359466553, + "learning_rate": 1.8249386455501952e-05, + "loss": 0.1908, + "step": 2537 + }, + { + "epoch": 2.611111111111111, + "grad_norm": 0.5453029274940491, + "learning_rate": 1.8235877548049805e-05, + "loss": 0.0044, + "step": 2538 + }, + { + "epoch": 2.6121399176954734, + "grad_norm": 5.0793538093566895, + "learning_rate": 1.8222377082272904e-05, + "loss": 0.2378, + "step": 2539 + }, + { + "epoch": 2.613168724279835, + "grad_norm": 4.566463470458984, + "learning_rate": 1.8208885080528774e-05, + "loss": 0.1066, + "step": 2540 + }, + { + "epoch": 2.613168724279835, + "eval_Qnli-dev_cosine_accuracy": 0.697265625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7347972989082336, + "eval_Qnli-dev_cosine_ap": 0.7416243803529952, + "eval_Qnli-dev_cosine_f1": 0.7039711191335739, + "eval_Qnli-dev_cosine_f1_threshold": 0.6872456669807434, + "eval_Qnli-dev_cosine_precision": 0.6132075471698113, + "eval_Qnli-dev_cosine_recall": 0.826271186440678, + "eval_Qnli-dev_dot_accuracy": 0.67578125, + "eval_Qnli-dev_dot_accuracy_threshold": 334.18353271484375, + "eval_Qnli-dev_dot_ap": 0.6809839038409365, + "eval_Qnli-dev_dot_f1": 0.6714031971580817, + "eval_Qnli-dev_dot_f1_threshold": 295.66534423828125, + "eval_Qnli-dev_dot_precision": 0.5779816513761468, + "eval_Qnli-dev_dot_recall": 0.8008474576271186, + "eval_Qnli-dev_euclidean_accuracy": 0.708984375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.0936279296875, + "eval_Qnli-dev_euclidean_ap": 0.7511411683064413, + "eval_Qnli-dev_euclidean_f1": 0.6996336996336996, + "eval_Qnli-dev_euclidean_f1_threshold": 16.44924545288086, + "eval_Qnli-dev_euclidean_precision": 0.6161290322580645, + "eval_Qnli-dev_euclidean_recall": 0.809322033898305, + "eval_Qnli-dev_manhattan_accuracy": 0.70703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 308.08575439453125, + "eval_Qnli-dev_manhattan_ap": 0.7550849365274038, + "eval_Qnli-dev_manhattan_f1": 0.7110266159695817, + "eval_Qnli-dev_manhattan_f1_threshold": 341.57763671875, + "eval_Qnli-dev_manhattan_precision": 0.6448275862068965, + "eval_Qnli-dev_manhattan_recall": 0.7923728813559322, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 334.18353271484375, + "eval_Qnli-dev_max_ap": 0.7550849365274038, + "eval_Qnli-dev_max_f1": 0.7110266159695817, + "eval_Qnli-dev_max_f1_threshold": 341.57763671875, + "eval_Qnli-dev_max_precision": 0.6448275862068965, + "eval_Qnli-dev_max_recall": 0.826271186440678, + "eval_allNLI-dev_cosine_accuracy": 0.72265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8740963935852051, + "eval_allNLI-dev_cosine_ap": 0.6207954004070142, + "eval_allNLI-dev_cosine_f1": 0.6305882352941177, + "eval_allNLI-dev_cosine_f1_threshold": 0.7428080439567566, + "eval_allNLI-dev_cosine_precision": 0.5317460317460317, + "eval_allNLI-dev_cosine_recall": 0.7745664739884393, + "eval_allNLI-dev_dot_accuracy": 0.693359375, + "eval_allNLI-dev_dot_accuracy_threshold": 369.31964111328125, + "eval_allNLI-dev_dot_ap": 0.5515733361000523, + "eval_allNLI-dev_dot_f1": 0.5858585858585859, + "eval_allNLI-dev_dot_f1_threshold": 283.7347717285156, + "eval_allNLI-dev_dot_precision": 0.4503105590062112, + "eval_allNLI-dev_dot_recall": 0.838150289017341, + "eval_allNLI-dev_euclidean_accuracy": 0.744140625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.991886138916016, + "eval_allNLI-dev_euclidean_ap": 0.6339687991970019, + "eval_allNLI-dev_euclidean_f1": 0.6458797327394209, + "eval_allNLI-dev_euclidean_f1_threshold": 15.262733459472656, + "eval_allNLI-dev_euclidean_precision": 0.5253623188405797, + "eval_allNLI-dev_euclidean_recall": 0.838150289017341, + "eval_allNLI-dev_manhattan_accuracy": 0.736328125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 280.542236328125, + "eval_allNLI-dev_manhattan_ap": 0.6282077789783579, + "eval_allNLI-dev_manhattan_f1": 0.6431718061674009, + "eval_allNLI-dev_manhattan_f1_threshold": 323.2728576660156, + "eval_allNLI-dev_manhattan_precision": 0.5195729537366548, + "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, + "eval_allNLI-dev_max_accuracy": 0.744140625, + "eval_allNLI-dev_max_accuracy_threshold": 369.31964111328125, + "eval_allNLI-dev_max_ap": 0.6339687991970019, + "eval_allNLI-dev_max_f1": 0.6458797327394209, + "eval_allNLI-dev_max_f1_threshold": 323.2728576660156, + "eval_allNLI-dev_max_precision": 0.5317460317460317, + "eval_allNLI-dev_max_recall": 0.8439306358381503, + "eval_sequential_score": 0.7550849365274038, + "eval_sts-test_pearson_cosine": 0.8436892173450835, + "eval_sts-test_pearson_dot": 0.8161614056600369, + "eval_sts-test_pearson_euclidean": 0.8747850736377845, + "eval_sts-test_pearson_manhattan": 0.8725689127354326, + "eval_sts-test_pearson_max": 0.8747850736377845, + "eval_sts-test_spearman_cosine": 0.8754703138890516, + "eval_sts-test_spearman_dot": 0.8092124465469122, + "eval_sts-test_spearman_euclidean": 0.8734285823393305, + "eval_sts-test_spearman_manhattan": 0.8706312257410156, + "eval_sts-test_spearman_max": 0.8754703138890516, + "eval_vitaminc-pairs_loss": 3.3029744625091553, + "eval_vitaminc-pairs_runtime": 3.2343, + "eval_vitaminc-pairs_samples_per_second": 39.576, + "eval_vitaminc-pairs_steps_per_second": 0.309, + "step": 2540 + }, + { + "epoch": 2.613168724279835, + "eval_negation-triplets_loss": 0.9069310426712036, + "eval_negation-triplets_runtime": 0.7774, + "eval_negation-triplets_samples_per_second": 164.659, + "eval_negation-triplets_steps_per_second": 1.286, + "step": 2540 + }, + { + "epoch": 2.613168724279835, + "eval_scitail-pairs-pos_loss": 0.1403876394033432, + "eval_scitail-pairs-pos_runtime": 0.9445, + "eval_scitail-pairs-pos_samples_per_second": 135.525, + "eval_scitail-pairs-pos_steps_per_second": 1.059, + "step": 2540 + }, + { + "epoch": 2.613168724279835, + "eval_scitail-pairs-qa_loss": 0.0007205409347079694, + "eval_scitail-pairs-qa_runtime": 0.6132, + "eval_scitail-pairs-qa_samples_per_second": 208.74, + "eval_scitail-pairs-qa_steps_per_second": 1.631, + "step": 2540 + }, + { + "epoch": 2.613168724279835, + "eval_xsum-pairs_loss": 0.33914807438850403, + "eval_xsum-pairs_runtime": 3.0378, + "eval_xsum-pairs_samples_per_second": 42.135, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2540 + }, + { + "epoch": 2.613168724279835, + "eval_sciq_pairs_loss": 0.09370269626379013, + "eval_sciq_pairs_runtime": 3.5523, + "eval_sciq_pairs_samples_per_second": 36.033, + "eval_sciq_pairs_steps_per_second": 0.282, + "step": 2540 + }, + { + "epoch": 2.613168724279835, + "eval_qasc_pairs_loss": 0.14020417630672455, + "eval_qasc_pairs_runtime": 0.6277, + "eval_qasc_pairs_samples_per_second": 203.912, + "eval_qasc_pairs_steps_per_second": 1.593, + "step": 2540 + }, + { + "epoch": 2.613168724279835, + "eval_openbookqa_pairs_loss": 0.7885816097259521, + "eval_openbookqa_pairs_runtime": 0.6152, + "eval_openbookqa_pairs_samples_per_second": 208.077, + "eval_openbookqa_pairs_steps_per_second": 1.626, + "step": 2540 + }, + { + "epoch": 2.613168724279835, + "eval_msmarco_pairs_loss": 0.69005286693573, + "eval_msmarco_pairs_runtime": 1.5287, + "eval_msmarco_pairs_samples_per_second": 83.731, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 2540 + }, + { + "epoch": 2.613168724279835, + "eval_nq_pairs_loss": 0.644152045249939, + "eval_nq_pairs_runtime": 2.9048, + "eval_nq_pairs_samples_per_second": 44.065, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2540 + }, + { + "epoch": 2.613168724279835, + "eval_trivia_pairs_loss": 0.7462302446365356, + "eval_trivia_pairs_runtime": 3.4523, + "eval_trivia_pairs_samples_per_second": 37.077, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2540 + }, + { + "epoch": 2.613168724279835, + "eval_gooaq_pairs_loss": 0.2984876334667206, + "eval_gooaq_pairs_runtime": 0.9676, + "eval_gooaq_pairs_samples_per_second": 132.281, + "eval_gooaq_pairs_steps_per_second": 1.033, + "step": 2540 + }, + { + "epoch": 2.613168724279835, + "eval_paws-pos_loss": 0.02198866941034794, + "eval_paws-pos_runtime": 0.7284, + "eval_paws-pos_samples_per_second": 175.727, + "eval_paws-pos_steps_per_second": 1.373, + "step": 2540 + }, + { + "epoch": 2.613168724279835, + "eval_global_dataset_loss": 0.4452175796031952, + "eval_global_dataset_runtime": 13.4391, + "eval_global_dataset_samples_per_second": 30.954, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2540 + }, + { + "epoch": 2.6141975308641974, + "grad_norm": 4.7599639892578125, + "learning_rate": 1.8195401565160936e-05, + "loss": 0.1052, + "step": 2541 + }, + { + "epoch": 2.6152263374485596, + "grad_norm": 2.892470121383667, + "learning_rate": 1.8181926558498852e-05, + "loss": 0.0689, + "step": 2542 + }, + { + "epoch": 2.616255144032922, + "grad_norm": 5.532009601593018, + "learning_rate": 1.8168460082857903e-05, + "loss": 0.1281, + "step": 2543 + }, + { + "epoch": 2.617283950617284, + "grad_norm": 3.4185428619384766, + "learning_rate": 1.8155002160539324e-05, + "loss": 0.0802, + "step": 2544 + }, + { + "epoch": 2.6183127572016462, + "grad_norm": 2.5799851417541504, + "learning_rate": 1.814155281383021e-05, + "loss": 0.0336, + "step": 2545 + }, + { + "epoch": 2.6193415637860085, + "grad_norm": 9.6151123046875, + "learning_rate": 1.8128112065003422e-05, + "loss": 0.3981, + "step": 2546 + }, + { + "epoch": 2.6203703703703702, + "grad_norm": 3.292311429977417, + "learning_rate": 1.8114679936317617e-05, + "loss": 0.0528, + "step": 2547 + }, + { + "epoch": 2.6213991769547325, + "grad_norm": 2.3397133350372314, + "learning_rate": 1.810125645001716e-05, + "loss": 0.019, + "step": 2548 + }, + { + "epoch": 2.6224279835390947, + "grad_norm": 5.602199554443359, + "learning_rate": 1.808784162833209e-05, + "loss": 0.1287, + "step": 2549 + }, + { + "epoch": 2.623456790123457, + "grad_norm": 8.078383445739746, + "learning_rate": 1.807443549347812e-05, + "loss": 0.3798, + "step": 2550 + }, + { + "epoch": 2.624485596707819, + "grad_norm": 0.13280944526195526, + "learning_rate": 1.8061038067656566e-05, + "loss": 0.002, + "step": 2551 + }, + { + "epoch": 2.625514403292181, + "grad_norm": 3.115669012069702, + "learning_rate": 1.804764937305433e-05, + "loss": 0.0509, + "step": 2552 + }, + { + "epoch": 2.626543209876543, + "grad_norm": 0.5160894393920898, + "learning_rate": 1.8034269431843837e-05, + "loss": 0.0403, + "step": 2553 + }, + { + "epoch": 2.6275720164609053, + "grad_norm": 2.8853940963745117, + "learning_rate": 1.8020898266183028e-05, + "loss": 0.0691, + "step": 2554 + }, + { + "epoch": 2.6286008230452675, + "grad_norm": 6.755553245544434, + "learning_rate": 1.8007535898215322e-05, + "loss": 0.2631, + "step": 2555 + }, + { + "epoch": 2.6296296296296298, + "grad_norm": 13.587359428405762, + "learning_rate": 1.7994182350069544e-05, + "loss": 1.5158, + "step": 2556 + }, + { + "epoch": 2.6306584362139915, + "grad_norm": 10.73571491241455, + "learning_rate": 1.798083764385993e-05, + "loss": 0.7129, + "step": 2557 + }, + { + "epoch": 2.6316872427983538, + "grad_norm": 0.8754851818084717, + "learning_rate": 1.7967501801686066e-05, + "loss": 0.0526, + "step": 2558 + }, + { + "epoch": 2.632716049382716, + "grad_norm": 12.748086929321289, + "learning_rate": 1.7954174845632863e-05, + "loss": 0.574, + "step": 2559 + }, + { + "epoch": 2.633744855967078, + "grad_norm": 6.738278865814209, + "learning_rate": 1.794085679777052e-05, + "loss": 0.2269, + "step": 2560 + }, + { + "epoch": 2.633744855967078, + "eval_Qnli-dev_cosine_accuracy": 0.69921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7472108602523804, + "eval_Qnli-dev_cosine_ap": 0.7419581770426393, + "eval_Qnli-dev_cosine_f1": 0.700348432055749, + "eval_Qnli-dev_cosine_f1_threshold": 0.668160080909729, + "eval_Qnli-dev_cosine_precision": 0.5946745562130178, + "eval_Qnli-dev_cosine_recall": 0.8516949152542372, + "eval_Qnli-dev_dot_accuracy": 0.677734375, + "eval_Qnli-dev_dot_accuracy_threshold": 330.5736389160156, + "eval_Qnli-dev_dot_ap": 0.689383636845089, + "eval_Qnli-dev_dot_f1": 0.672661870503597, + "eval_Qnli-dev_dot_f1_threshold": 303.59796142578125, + "eval_Qnli-dev_dot_precision": 0.584375, + "eval_Qnli-dev_dot_recall": 0.7923728813559322, + "eval_Qnli-dev_euclidean_accuracy": 0.705078125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.200478553771973, + "eval_Qnli-dev_euclidean_ap": 0.752752505015462, + "eval_Qnli-dev_euclidean_f1": 0.7082568807339449, + "eval_Qnli-dev_euclidean_f1_threshold": 16.415273666381836, + "eval_Qnli-dev_euclidean_precision": 0.6245954692556634, + "eval_Qnli-dev_euclidean_recall": 0.8177966101694916, + "eval_Qnli-dev_manhattan_accuracy": 0.69921875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 308.05975341796875, + "eval_Qnli-dev_manhattan_ap": 0.7543040216671292, + "eval_Qnli-dev_manhattan_f1": 0.7080979284369116, + "eval_Qnli-dev_manhattan_f1_threshold": 342.9434814453125, + "eval_Qnli-dev_manhattan_precision": 0.6372881355932203, + "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, + "eval_Qnli-dev_max_accuracy": 0.705078125, + "eval_Qnli-dev_max_accuracy_threshold": 330.5736389160156, + "eval_Qnli-dev_max_ap": 0.7543040216671292, + "eval_Qnli-dev_max_f1": 0.7082568807339449, + "eval_Qnli-dev_max_f1_threshold": 342.9434814453125, + "eval_Qnli-dev_max_precision": 0.6372881355932203, + "eval_Qnli-dev_max_recall": 0.8516949152542372, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8039998412132263, + "eval_allNLI-dev_cosine_ap": 0.6220996013993169, + "eval_allNLI-dev_cosine_f1": 0.6305882352941177, + "eval_allNLI-dev_cosine_f1_threshold": 0.7354094982147217, + "eval_allNLI-dev_cosine_precision": 0.5317460317460317, + "eval_allNLI-dev_cosine_recall": 0.7745664739884393, + "eval_allNLI-dev_dot_accuracy": 0.697265625, + "eval_allNLI-dev_dot_accuracy_threshold": 346.41259765625, + "eval_allNLI-dev_dot_ap": 0.5565229013900539, + "eval_allNLI-dev_dot_f1": 0.5831702544031311, + "eval_allNLI-dev_dot_f1_threshold": 274.1090087890625, + "eval_allNLI-dev_dot_precision": 0.4408284023668639, + "eval_allNLI-dev_dot_recall": 0.861271676300578, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.22194766998291, + "eval_allNLI-dev_euclidean_ap": 0.6346136337315481, + "eval_allNLI-dev_euclidean_f1": 0.6438356164383561, + "eval_allNLI-dev_euclidean_f1_threshold": 15.188009262084961, + "eval_allNLI-dev_euclidean_precision": 0.5320754716981132, + "eval_allNLI-dev_euclidean_recall": 0.815028901734104, + "eval_allNLI-dev_manhattan_accuracy": 0.740234375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 284.653564453125, + "eval_allNLI-dev_manhattan_ap": 0.6298799583559361, + "eval_allNLI-dev_manhattan_f1": 0.64, + "eval_allNLI-dev_manhattan_f1_threshold": 323.64794921875, + "eval_allNLI-dev_manhattan_precision": 0.51985559566787, + "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, + "eval_allNLI-dev_max_accuracy": 0.740234375, + "eval_allNLI-dev_max_accuracy_threshold": 346.41259765625, + "eval_allNLI-dev_max_ap": 0.6346136337315481, + "eval_allNLI-dev_max_f1": 0.6438356164383561, + "eval_allNLI-dev_max_f1_threshold": 323.64794921875, + "eval_allNLI-dev_max_precision": 0.5320754716981132, + "eval_allNLI-dev_max_recall": 0.861271676300578, + "eval_sequential_score": 0.7543040216671292, + "eval_sts-test_pearson_cosine": 0.8459506097334918, + "eval_sts-test_pearson_dot": 0.8208837992692455, + "eval_sts-test_pearson_euclidean": 0.8731667167526915, + "eval_sts-test_pearson_manhattan": 0.8710894756324609, + "eval_sts-test_pearson_max": 0.8731667167526915, + "eval_sts-test_spearman_cosine": 0.874299978901144, + "eval_sts-test_spearman_dot": 0.8121811109738333, + "eval_sts-test_spearman_euclidean": 0.8705162149992397, + "eval_sts-test_spearman_manhattan": 0.8681815864437118, + "eval_sts-test_spearman_max": 0.874299978901144, + "eval_vitaminc-pairs_loss": 3.281205415725708, + "eval_vitaminc-pairs_runtime": 3.2246, + "eval_vitaminc-pairs_samples_per_second": 39.695, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 2560 + }, + { + "epoch": 2.633744855967078, + "eval_negation-triplets_loss": 0.9458452463150024, + "eval_negation-triplets_runtime": 0.7729, + "eval_negation-triplets_samples_per_second": 165.612, + "eval_negation-triplets_steps_per_second": 1.294, + "step": 2560 + }, + { + "epoch": 2.633744855967078, + "eval_scitail-pairs-pos_loss": 0.13827168941497803, + "eval_scitail-pairs-pos_runtime": 0.9569, + "eval_scitail-pairs-pos_samples_per_second": 133.771, + "eval_scitail-pairs-pos_steps_per_second": 1.045, + "step": 2560 + }, + { + "epoch": 2.633744855967078, + "eval_scitail-pairs-qa_loss": 0.0007812771946191788, + "eval_scitail-pairs-qa_runtime": 0.6123, + "eval_scitail-pairs-qa_samples_per_second": 209.046, + "eval_scitail-pairs-qa_steps_per_second": 1.633, + "step": 2560 + }, + { + "epoch": 2.633744855967078, + "eval_xsum-pairs_loss": 0.3036934435367584, + "eval_xsum-pairs_runtime": 3.038, + "eval_xsum-pairs_samples_per_second": 42.133, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2560 + }, + { + "epoch": 2.633744855967078, + "eval_sciq_pairs_loss": 0.08964813500642776, + "eval_sciq_pairs_runtime": 3.5442, + "eval_sciq_pairs_samples_per_second": 36.115, + "eval_sciq_pairs_steps_per_second": 0.282, + "step": 2560 + }, + { + "epoch": 2.633744855967078, + "eval_qasc_pairs_loss": 0.15069030225276947, + "eval_qasc_pairs_runtime": 0.6563, + "eval_qasc_pairs_samples_per_second": 195.04, + "eval_qasc_pairs_steps_per_second": 1.524, + "step": 2560 + }, + { + "epoch": 2.633744855967078, + "eval_openbookqa_pairs_loss": 0.7651960849761963, + "eval_openbookqa_pairs_runtime": 0.6179, + "eval_openbookqa_pairs_samples_per_second": 207.165, + "eval_openbookqa_pairs_steps_per_second": 1.618, + "step": 2560 + }, + { + "epoch": 2.633744855967078, + "eval_msmarco_pairs_loss": 0.7059141993522644, + "eval_msmarco_pairs_runtime": 1.5273, + "eval_msmarco_pairs_samples_per_second": 83.81, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 2560 + }, + { + "epoch": 2.633744855967078, + "eval_nq_pairs_loss": 0.6533631086349487, + "eval_nq_pairs_runtime": 2.9088, + "eval_nq_pairs_samples_per_second": 44.004, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2560 + }, + { + "epoch": 2.633744855967078, + "eval_trivia_pairs_loss": 0.7508307695388794, + "eval_trivia_pairs_runtime": 3.4492, + "eval_trivia_pairs_samples_per_second": 37.11, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 2560 + }, + { + "epoch": 2.633744855967078, + "eval_gooaq_pairs_loss": 0.3268025517463684, + "eval_gooaq_pairs_runtime": 0.9595, + "eval_gooaq_pairs_samples_per_second": 133.4, + "eval_gooaq_pairs_steps_per_second": 1.042, + "step": 2560 + }, + { + "epoch": 2.633744855967078, + "eval_paws-pos_loss": 0.021816374734044075, + "eval_paws-pos_runtime": 0.7168, + "eval_paws-pos_samples_per_second": 178.561, + "eval_paws-pos_steps_per_second": 1.395, + "step": 2560 + }, + { + "epoch": 2.633744855967078, + "eval_global_dataset_loss": 0.4567856788635254, + "eval_global_dataset_runtime": 13.4388, + "eval_global_dataset_samples_per_second": 30.955, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2560 + }, + { + "epoch": 2.6347736625514404, + "grad_norm": 4.527674674987793, + "learning_rate": 1.7927547680154473e-05, + "loss": 0.1645, + "step": 2561 + }, + { + "epoch": 2.6358024691358026, + "grad_norm": 4.207751750946045, + "learning_rate": 1.7914247514825372e-05, + "loss": 0.079, + "step": 2562 + }, + { + "epoch": 2.636831275720165, + "grad_norm": 8.055123329162598, + "learning_rate": 1.7900956323809044e-05, + "loss": 0.3544, + "step": 2563 + }, + { + "epoch": 2.6378600823045266, + "grad_norm": 1.727632761001587, + "learning_rate": 1.7887674129116458e-05, + "loss": 0.0274, + "step": 2564 + }, + { + "epoch": 2.638888888888889, + "grad_norm": 6.485104560852051, + "learning_rate": 1.787440095274367e-05, + "loss": 0.2277, + "step": 2565 + }, + { + "epoch": 2.639917695473251, + "grad_norm": 2.8004956245422363, + "learning_rate": 1.7861136816671826e-05, + "loss": 0.0422, + "step": 2566 + }, + { + "epoch": 2.6409465020576133, + "grad_norm": 3.481257915496826, + "learning_rate": 1.7847881742867072e-05, + "loss": 0.0641, + "step": 2567 + }, + { + "epoch": 2.6419753086419755, + "grad_norm": 0.19010227918624878, + "learning_rate": 1.7834635753280572e-05, + "loss": 0.0019, + "step": 2568 + }, + { + "epoch": 2.6430041152263373, + "grad_norm": 4.518033027648926, + "learning_rate": 1.7821398869848427e-05, + "loss": 0.1717, + "step": 2569 + }, + { + "epoch": 2.6440329218106995, + "grad_norm": 3.3492650985717773, + "learning_rate": 1.7808171114491665e-05, + "loss": 0.0414, + "step": 2570 + }, + { + "epoch": 2.6450617283950617, + "grad_norm": 6.0391411781311035, + "learning_rate": 1.7794952509116194e-05, + "loss": 0.1508, + "step": 2571 + }, + { + "epoch": 2.646090534979424, + "grad_norm": 6.24597692489624, + "learning_rate": 1.7781743075612785e-05, + "loss": 0.287, + "step": 2572 + }, + { + "epoch": 2.647119341563786, + "grad_norm": 4.371918678283691, + "learning_rate": 1.7768542835856997e-05, + "loss": 0.1636, + "step": 2573 + }, + { + "epoch": 2.648148148148148, + "grad_norm": 5.617849826812744, + "learning_rate": 1.775535181170918e-05, + "loss": 0.1417, + "step": 2574 + }, + { + "epoch": 2.64917695473251, + "grad_norm": 0.7585577368736267, + "learning_rate": 1.7742170025014406e-05, + "loss": 0.0057, + "step": 2575 + }, + { + "epoch": 2.6502057613168724, + "grad_norm": 5.705314636230469, + "learning_rate": 1.7728997497602476e-05, + "loss": 0.1221, + "step": 2576 + }, + { + "epoch": 2.6512345679012346, + "grad_norm": 0.08371909707784653, + "learning_rate": 1.771583425128782e-05, + "loss": 0.0007, + "step": 2577 + }, + { + "epoch": 2.652263374485597, + "grad_norm": 15.912981033325195, + "learning_rate": 1.7702680307869542e-05, + "loss": 2.0316, + "step": 2578 + }, + { + "epoch": 2.653292181069959, + "grad_norm": 8.79644775390625, + "learning_rate": 1.7689535689131294e-05, + "loss": 0.3543, + "step": 2579 + }, + { + "epoch": 2.6543209876543212, + "grad_norm": 3.05710506439209, + "learning_rate": 1.767640041684133e-05, + "loss": 0.0657, + "step": 2580 + }, + { + "epoch": 2.6543209876543212, + "eval_Qnli-dev_cosine_accuracy": 0.69921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7484172582626343, + "eval_Qnli-dev_cosine_ap": 0.7444016607108496, + "eval_Qnli-dev_cosine_f1": 0.7052631578947368, + "eval_Qnli-dev_cosine_f1_threshold": 0.6621572375297546, + "eval_Qnli-dev_cosine_precision": 0.6017964071856288, + "eval_Qnli-dev_cosine_recall": 0.8516949152542372, + "eval_Qnli-dev_dot_accuracy": 0.673828125, + "eval_Qnli-dev_dot_accuracy_threshold": 324.5037841796875, + "eval_Qnli-dev_dot_ap": 0.6846581759479151, + "eval_Qnli-dev_dot_f1": 0.6753731343283581, + "eval_Qnli-dev_dot_f1_threshold": 301.7471923828125, + "eval_Qnli-dev_dot_precision": 0.6033333333333334, + "eval_Qnli-dev_dot_recall": 0.7669491525423728, + "eval_Qnli-dev_euclidean_accuracy": 0.701171875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.733160018920898, + "eval_Qnli-dev_euclidean_ap": 0.7541699764293507, + "eval_Qnli-dev_euclidean_f1": 0.7097902097902098, + "eval_Qnli-dev_euclidean_f1_threshold": 17.237119674682617, + "eval_Qnli-dev_euclidean_precision": 0.6041666666666666, + "eval_Qnli-dev_euclidean_recall": 0.8601694915254238, + "eval_Qnli-dev_manhattan_accuracy": 0.701171875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 310.09771728515625, + "eval_Qnli-dev_manhattan_ap": 0.757899803996903, + "eval_Qnli-dev_manhattan_f1": 0.7077464788732394, + "eval_Qnli-dev_manhattan_f1_threshold": 360.0821533203125, + "eval_Qnli-dev_manhattan_precision": 0.6054216867469879, + "eval_Qnli-dev_manhattan_recall": 0.8516949152542372, + "eval_Qnli-dev_max_accuracy": 0.701171875, + "eval_Qnli-dev_max_accuracy_threshold": 324.5037841796875, + "eval_Qnli-dev_max_ap": 0.757899803996903, + "eval_Qnli-dev_max_f1": 0.7097902097902098, + "eval_Qnli-dev_max_f1_threshold": 360.0821533203125, + "eval_Qnli-dev_max_precision": 0.6054216867469879, + "eval_Qnli-dev_max_recall": 0.8601694915254238, + "eval_allNLI-dev_cosine_accuracy": 0.724609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8518314361572266, + "eval_allNLI-dev_cosine_ap": 0.6251283386043572, + "eval_allNLI-dev_cosine_f1": 0.6275395033860045, + "eval_allNLI-dev_cosine_f1_threshold": 0.700609028339386, + "eval_allNLI-dev_cosine_precision": 0.5148148148148148, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.70703125, + "eval_allNLI-dev_dot_accuracy_threshold": 335.95489501953125, + "eval_allNLI-dev_dot_ap": 0.5695989977515594, + "eval_allNLI-dev_dot_f1": 0.594704684317719, + "eval_allNLI-dev_dot_f1_threshold": 267.34747314453125, + "eval_allNLI-dev_dot_precision": 0.4591194968553459, + "eval_allNLI-dev_dot_recall": 0.8439306358381503, + "eval_allNLI-dev_euclidean_accuracy": 0.73828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.570003509521484, + "eval_allNLI-dev_euclidean_ap": 0.63537324994232, + "eval_allNLI-dev_euclidean_f1": 0.6434782608695653, + "eval_allNLI-dev_euclidean_f1_threshold": 15.92835807800293, + "eval_allNLI-dev_euclidean_precision": 0.5156794425087108, + "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, + "eval_allNLI-dev_manhattan_accuracy": 0.736328125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 286.9229736328125, + "eval_allNLI-dev_manhattan_ap": 0.6304260331979171, + "eval_allNLI-dev_manhattan_f1": 0.6433260393873084, + "eval_allNLI-dev_manhattan_f1_threshold": 334.8063659667969, + "eval_allNLI-dev_manhattan_precision": 0.5176056338028169, + "eval_allNLI-dev_manhattan_recall": 0.8497109826589595, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 335.95489501953125, + "eval_allNLI-dev_max_ap": 0.63537324994232, + "eval_allNLI-dev_max_f1": 0.6434782608695653, + "eval_allNLI-dev_max_f1_threshold": 334.8063659667969, + "eval_allNLI-dev_max_precision": 0.5176056338028169, + "eval_allNLI-dev_max_recall": 0.8554913294797688, + "eval_sequential_score": 0.757899803996903, + "eval_sts-test_pearson_cosine": 0.8515681442014782, + "eval_sts-test_pearson_dot": 0.8421992816407995, + "eval_sts-test_pearson_euclidean": 0.8739837908620688, + "eval_sts-test_pearson_manhattan": 0.871596824982543, + "eval_sts-test_pearson_max": 0.8739837908620688, + "eval_sts-test_spearman_cosine": 0.8758156146219479, + "eval_sts-test_spearman_dot": 0.8362005002517007, + "eval_sts-test_spearman_euclidean": 0.8699935202661881, + "eval_sts-test_spearman_manhattan": 0.867855304996907, + "eval_sts-test_spearman_max": 0.8758156146219479, + "eval_vitaminc-pairs_loss": 3.213340997695923, + "eval_vitaminc-pairs_runtime": 3.2431, + "eval_vitaminc-pairs_samples_per_second": 39.469, + "eval_vitaminc-pairs_steps_per_second": 0.308, + "step": 2580 + }, + { + "epoch": 2.6543209876543212, + "eval_negation-triplets_loss": 0.9520102143287659, + "eval_negation-triplets_runtime": 0.7768, + "eval_negation-triplets_samples_per_second": 164.78, + "eval_negation-triplets_steps_per_second": 1.287, + "step": 2580 + }, + { + "epoch": 2.6543209876543212, + "eval_scitail-pairs-pos_loss": 0.13387437164783478, + "eval_scitail-pairs-pos_runtime": 0.967, + "eval_scitail-pairs-pos_samples_per_second": 132.363, + "eval_scitail-pairs-pos_steps_per_second": 1.034, + "step": 2580 + }, + { + "epoch": 2.6543209876543212, + "eval_scitail-pairs-qa_loss": 0.0005432313773781061, + "eval_scitail-pairs-qa_runtime": 0.6286, + "eval_scitail-pairs-qa_samples_per_second": 203.62, + "eval_scitail-pairs-qa_steps_per_second": 1.591, + "step": 2580 + }, + { + "epoch": 2.6543209876543212, + "eval_xsum-pairs_loss": 0.300295889377594, + "eval_xsum-pairs_runtime": 3.0364, + "eval_xsum-pairs_samples_per_second": 42.155, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 2580 + }, + { + "epoch": 2.6543209876543212, + "eval_sciq_pairs_loss": 0.0935114249587059, + "eval_sciq_pairs_runtime": 3.5687, + "eval_sciq_pairs_samples_per_second": 35.868, + "eval_sciq_pairs_steps_per_second": 0.28, + "step": 2580 + }, + { + "epoch": 2.6543209876543212, + "eval_qasc_pairs_loss": 0.148627370595932, + "eval_qasc_pairs_runtime": 0.6366, + "eval_qasc_pairs_samples_per_second": 201.054, + "eval_qasc_pairs_steps_per_second": 1.571, + "step": 2580 + }, + { + "epoch": 2.6543209876543212, + "eval_openbookqa_pairs_loss": 0.7378137707710266, + "eval_openbookqa_pairs_runtime": 0.616, + "eval_openbookqa_pairs_samples_per_second": 207.795, + "eval_openbookqa_pairs_steps_per_second": 1.623, + "step": 2580 + }, + { + "epoch": 2.6543209876543212, + "eval_msmarco_pairs_loss": 0.7179670929908752, + "eval_msmarco_pairs_runtime": 1.5425, + "eval_msmarco_pairs_samples_per_second": 82.983, + "eval_msmarco_pairs_steps_per_second": 0.648, + "step": 2580 + }, + { + "epoch": 2.6543209876543212, + "eval_nq_pairs_loss": 0.583605170249939, + "eval_nq_pairs_runtime": 2.9252, + "eval_nq_pairs_samples_per_second": 43.757, + "eval_nq_pairs_steps_per_second": 0.342, + "step": 2580 + }, + { + "epoch": 2.6543209876543212, + "eval_trivia_pairs_loss": 0.7238264679908752, + "eval_trivia_pairs_runtime": 3.4828, + "eval_trivia_pairs_samples_per_second": 36.752, + "eval_trivia_pairs_steps_per_second": 0.287, + "step": 2580 + }, + { + "epoch": 2.6543209876543212, + "eval_gooaq_pairs_loss": 0.3407573401927948, + "eval_gooaq_pairs_runtime": 0.9661, + "eval_gooaq_pairs_samples_per_second": 132.493, + "eval_gooaq_pairs_steps_per_second": 1.035, + "step": 2580 + }, + { + "epoch": 2.6543209876543212, + "eval_paws-pos_loss": 0.021885672584176064, + "eval_paws-pos_runtime": 0.7226, + "eval_paws-pos_samples_per_second": 177.133, + "eval_paws-pos_steps_per_second": 1.384, + "step": 2580 + }, + { + "epoch": 2.6543209876543212, + "eval_global_dataset_loss": 0.43512460589408875, + "eval_global_dataset_runtime": 13.4367, + "eval_global_dataset_samples_per_second": 30.96, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2580 + }, + { + "epoch": 2.655349794238683, + "grad_norm": 9.852272987365723, + "learning_rate": 1.7663274512752394e-05, + "loss": 0.4626, + "step": 2581 + }, + { + "epoch": 2.656378600823045, + "grad_norm": 6.879485130310059, + "learning_rate": 1.7650157998601722e-05, + "loss": 0.2376, + "step": 2582 + }, + { + "epoch": 2.6574074074074074, + "grad_norm": 9.315069198608398, + "learning_rate": 1.7637050896111006e-05, + "loss": 0.3867, + "step": 2583 + }, + { + "epoch": 2.6584362139917697, + "grad_norm": 1.0761109590530396, + "learning_rate": 1.7623953226986355e-05, + "loss": 0.0226, + "step": 2584 + }, + { + "epoch": 2.659465020576132, + "grad_norm": 1.5962140560150146, + "learning_rate": 1.7610865012918247e-05, + "loss": 0.1158, + "step": 2585 + }, + { + "epoch": 2.6604938271604937, + "grad_norm": 3.737316131591797, + "learning_rate": 1.7597786275581496e-05, + "loss": 0.1177, + "step": 2586 + }, + { + "epoch": 2.661522633744856, + "grad_norm": 9.39920711517334, + "learning_rate": 1.758471703663525e-05, + "loss": 0.4314, + "step": 2587 + }, + { + "epoch": 2.662551440329218, + "grad_norm": 3.40377140045166, + "learning_rate": 1.757165731772289e-05, + "loss": 0.041, + "step": 2588 + }, + { + "epoch": 2.6635802469135803, + "grad_norm": 0.13125638663768768, + "learning_rate": 1.755860714047206e-05, + "loss": 0.0015, + "step": 2589 + }, + { + "epoch": 2.6646090534979425, + "grad_norm": 3.656142473220825, + "learning_rate": 1.7545566526494593e-05, + "loss": 0.0685, + "step": 2590 + }, + { + "epoch": 2.6656378600823043, + "grad_norm": 12.456727027893066, + "learning_rate": 1.7532535497386475e-05, + "loss": 1.3724, + "step": 2591 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 2.0424954891204834, + "learning_rate": 1.7519514074727837e-05, + "loss": 0.0311, + "step": 2592 + }, + { + "epoch": 2.6676954732510287, + "grad_norm": 2.686506748199463, + "learning_rate": 1.7506502280082887e-05, + "loss": 0.026, + "step": 2593 + }, + { + "epoch": 2.668724279835391, + "grad_norm": 4.565975666046143, + "learning_rate": 1.7493500134999892e-05, + "loss": 0.0979, + "step": 2594 + }, + { + "epoch": 2.669753086419753, + "grad_norm": 3.2041702270507812, + "learning_rate": 1.7480507661011138e-05, + "loss": 0.0593, + "step": 2595 + }, + { + "epoch": 2.6707818930041154, + "grad_norm": 7.0108442306518555, + "learning_rate": 1.7467524879632908e-05, + "loss": 0.2319, + "step": 2596 + }, + { + "epoch": 2.6718106995884776, + "grad_norm": 4.191745758056641, + "learning_rate": 1.745455181236541e-05, + "loss": 0.095, + "step": 2597 + }, + { + "epoch": 2.6728395061728394, + "grad_norm": 3.530073881149292, + "learning_rate": 1.7441588480692786e-05, + "loss": 0.0763, + "step": 2598 + }, + { + "epoch": 2.6738683127572016, + "grad_norm": 4.993064880371094, + "learning_rate": 1.7428634906083047e-05, + "loss": 0.1286, + "step": 2599 + }, + { + "epoch": 2.674897119341564, + "grad_norm": 3.2204020023345947, + "learning_rate": 1.7415691109988037e-05, + "loss": 0.0851, + "step": 2600 + }, + { + "epoch": 2.674897119341564, + "eval_Qnli-dev_cosine_accuracy": 0.6953125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7362703084945679, + "eval_Qnli-dev_cosine_ap": 0.7478703569504768, + "eval_Qnli-dev_cosine_f1": 0.7046632124352332, + "eval_Qnli-dev_cosine_f1_threshold": 0.6684526205062866, + "eval_Qnli-dev_cosine_precision": 0.5947521865889213, + "eval_Qnli-dev_cosine_recall": 0.864406779661017, + "eval_Qnli-dev_dot_accuracy": 0.669921875, + "eval_Qnli-dev_dot_accuracy_threshold": 336.400634765625, + "eval_Qnli-dev_dot_ap": 0.6824974445362095, + "eval_Qnli-dev_dot_f1": 0.6837881219903691, + "eval_Qnli-dev_dot_f1_threshold": 271.37933349609375, + "eval_Qnli-dev_dot_precision": 0.5503875968992248, + "eval_Qnli-dev_dot_recall": 0.902542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.701171875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 16.319154739379883, + "eval_Qnli-dev_euclidean_ap": 0.7570243909931207, + "eval_Qnli-dev_euclidean_f1": 0.7207207207207207, + "eval_Qnli-dev_euclidean_f1_threshold": 16.61956214904785, + "eval_Qnli-dev_euclidean_precision": 0.6269592476489029, + "eval_Qnli-dev_euclidean_recall": 0.847457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 325.358642578125, + "eval_Qnli-dev_manhattan_ap": 0.7623669439249948, + "eval_Qnli-dev_manhattan_f1": 0.7175843694493783, + "eval_Qnli-dev_manhattan_f1_threshold": 353.44708251953125, + "eval_Qnli-dev_manhattan_precision": 0.617737003058104, + "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, + "eval_Qnli-dev_max_accuracy": 0.703125, + "eval_Qnli-dev_max_accuracy_threshold": 336.400634765625, + "eval_Qnli-dev_max_ap": 0.7623669439249948, + "eval_Qnli-dev_max_f1": 0.7207207207207207, + "eval_Qnli-dev_max_f1_threshold": 353.44708251953125, + "eval_Qnli-dev_max_precision": 0.6269592476489029, + "eval_Qnli-dev_max_recall": 0.902542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.724609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8365390300750732, + "eval_allNLI-dev_cosine_ap": 0.6243747006523906, + "eval_allNLI-dev_cosine_f1": 0.6308068459657702, + "eval_allNLI-dev_cosine_f1_threshold": 0.7326910495758057, + "eval_allNLI-dev_cosine_precision": 0.5466101694915254, + "eval_allNLI-dev_cosine_recall": 0.7456647398843931, + "eval_allNLI-dev_dot_accuracy": 0.70703125, + "eval_allNLI-dev_dot_accuracy_threshold": 349.9114990234375, + "eval_allNLI-dev_dot_ap": 0.5636032934145848, + "eval_allNLI-dev_dot_f1": 0.591715976331361, + "eval_allNLI-dev_dot_f1_threshold": 268.2897033691406, + "eval_allNLI-dev_dot_precision": 0.4491017964071856, + "eval_allNLI-dev_dot_recall": 0.8670520231213873, + "eval_allNLI-dev_euclidean_accuracy": 0.732421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.68728256225586, + "eval_allNLI-dev_euclidean_ap": 0.6367778394387558, + "eval_allNLI-dev_euclidean_f1": 0.6469248291571754, + "eval_allNLI-dev_euclidean_f1_threshold": 15.481573104858398, + "eval_allNLI-dev_euclidean_precision": 0.5338345864661654, + "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, + "eval_allNLI-dev_manhattan_accuracy": 0.736328125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 276.6816101074219, + "eval_allNLI-dev_manhattan_ap": 0.633052882633361, + "eval_allNLI-dev_manhattan_f1": 0.647450110864745, + "eval_allNLI-dev_manhattan_f1_threshold": 330.48126220703125, + "eval_allNLI-dev_manhattan_precision": 0.5251798561151079, + "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 349.9114990234375, + "eval_allNLI-dev_max_ap": 0.6367778394387558, + "eval_allNLI-dev_max_f1": 0.647450110864745, + "eval_allNLI-dev_max_f1_threshold": 330.48126220703125, + "eval_allNLI-dev_max_precision": 0.5466101694915254, + "eval_allNLI-dev_max_recall": 0.8670520231213873, + "eval_sequential_score": 0.7623669439249948, + "eval_sts-test_pearson_cosine": 0.852024318882, + "eval_sts-test_pearson_dot": 0.8435240743260703, + "eval_sts-test_pearson_euclidean": 0.8766620274603868, + "eval_sts-test_pearson_manhattan": 0.8746535751378708, + "eval_sts-test_pearson_max": 0.8766620274603868, + "eval_sts-test_spearman_cosine": 0.8783670630389171, + "eval_sts-test_spearman_dot": 0.8399603222287605, + "eval_sts-test_spearman_euclidean": 0.8732234873083737, + "eval_sts-test_spearman_manhattan": 0.871147386789904, + "eval_sts-test_spearman_max": 0.8783670630389171, + "eval_vitaminc-pairs_loss": 3.1473426818847656, + "eval_vitaminc-pairs_runtime": 3.2208, + "eval_vitaminc-pairs_samples_per_second": 39.741, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 2600 + }, + { + "epoch": 2.674897119341564, + "eval_negation-triplets_loss": 0.901240885257721, + "eval_negation-triplets_runtime": 0.7721, + "eval_negation-triplets_samples_per_second": 165.781, + "eval_negation-triplets_steps_per_second": 1.295, + "step": 2600 + }, + { + "epoch": 2.674897119341564, + "eval_scitail-pairs-pos_loss": 0.12968897819519043, + "eval_scitail-pairs-pos_runtime": 0.9408, + "eval_scitail-pairs-pos_samples_per_second": 136.053, + "eval_scitail-pairs-pos_steps_per_second": 1.063, + "step": 2600 + }, + { + "epoch": 2.674897119341564, + "eval_scitail-pairs-qa_loss": 0.0007262742146849632, + "eval_scitail-pairs-qa_runtime": 0.6206, + "eval_scitail-pairs-qa_samples_per_second": 206.251, + "eval_scitail-pairs-qa_steps_per_second": 1.611, + "step": 2600 + }, + { + "epoch": 2.674897119341564, + "eval_xsum-pairs_loss": 0.31425875425338745, + "eval_xsum-pairs_runtime": 3.0277, + "eval_xsum-pairs_samples_per_second": 42.276, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 2600 + }, + { + "epoch": 2.674897119341564, + "eval_sciq_pairs_loss": 0.09135416150093079, + "eval_sciq_pairs_runtime": 3.5898, + "eval_sciq_pairs_samples_per_second": 35.657, + "eval_sciq_pairs_steps_per_second": 0.279, + "step": 2600 + }, + { + "epoch": 2.674897119341564, + "eval_qasc_pairs_loss": 0.12798336148262024, + "eval_qasc_pairs_runtime": 0.6364, + "eval_qasc_pairs_samples_per_second": 201.12, + "eval_qasc_pairs_steps_per_second": 1.571, + "step": 2600 + }, + { + "epoch": 2.674897119341564, + "eval_openbookqa_pairs_loss": 0.7262670397758484, + "eval_openbookqa_pairs_runtime": 0.6041, + "eval_openbookqa_pairs_samples_per_second": 211.877, + "eval_openbookqa_pairs_steps_per_second": 1.655, + "step": 2600 + }, + { + "epoch": 2.674897119341564, + "eval_msmarco_pairs_loss": 0.6486848592758179, + "eval_msmarco_pairs_runtime": 1.5339, + "eval_msmarco_pairs_samples_per_second": 83.448, + "eval_msmarco_pairs_steps_per_second": 0.652, + "step": 2600 + }, + { + "epoch": 2.674897119341564, + "eval_nq_pairs_loss": 0.559202253818512, + "eval_nq_pairs_runtime": 2.9068, + "eval_nq_pairs_samples_per_second": 44.034, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 2600 + }, + { + "epoch": 2.674897119341564, + "eval_trivia_pairs_loss": 0.710675835609436, + "eval_trivia_pairs_runtime": 3.4625, + "eval_trivia_pairs_samples_per_second": 36.968, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 2600 + }, + { + "epoch": 2.674897119341564, + "eval_gooaq_pairs_loss": 0.33728501200675964, + "eval_gooaq_pairs_runtime": 0.9584, + "eval_gooaq_pairs_samples_per_second": 133.561, + "eval_gooaq_pairs_steps_per_second": 1.043, + "step": 2600 + }, + { + "epoch": 2.674897119341564, + "eval_paws-pos_loss": 0.021707231178879738, + "eval_paws-pos_runtime": 0.7152, + "eval_paws-pos_samples_per_second": 178.962, + "eval_paws-pos_steps_per_second": 1.398, + "step": 2600 + }, + { + "epoch": 2.674897119341564, + "eval_global_dataset_loss": 0.4227386713027954, + "eval_global_dataset_runtime": 13.427, + "eval_global_dataset_samples_per_second": 30.982, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2600 + }, + { + "epoch": 2.675925925925926, + "grad_norm": 8.135689735412598, + "learning_rate": 1.740275711384342e-05, + "loss": 0.3919, + "step": 2601 + }, + { + "epoch": 2.6769547325102883, + "grad_norm": 3.8438615798950195, + "learning_rate": 1.7389832939068632e-05, + "loss": 0.1138, + "step": 2602 + }, + { + "epoch": 2.67798353909465, + "grad_norm": 3.447300910949707, + "learning_rate": 1.7376918607066828e-05, + "loss": 0.091, + "step": 2603 + }, + { + "epoch": 2.6790123456790123, + "grad_norm": 0.17035308480262756, + "learning_rate": 1.7364014139224874e-05, + "loss": 0.0028, + "step": 2604 + }, + { + "epoch": 2.6800411522633745, + "grad_norm": 9.796527862548828, + "learning_rate": 1.7351119556913306e-05, + "loss": 0.3596, + "step": 2605 + }, + { + "epoch": 2.6810699588477367, + "grad_norm": 4.091612339019775, + "learning_rate": 1.7338234881486276e-05, + "loss": 0.156, + "step": 2606 + }, + { + "epoch": 2.682098765432099, + "grad_norm": 8.733772277832031, + "learning_rate": 1.732536013428153e-05, + "loss": 0.3413, + "step": 2607 + }, + { + "epoch": 2.6831275720164607, + "grad_norm": 7.2233099937438965, + "learning_rate": 1.7312495336620394e-05, + "loss": 0.2312, + "step": 2608 + }, + { + "epoch": 2.684156378600823, + "grad_norm": 6.816574573516846, + "learning_rate": 1.7299640509807683e-05, + "loss": 0.213, + "step": 2609 + }, + { + "epoch": 2.685185185185185, + "grad_norm": 5.4541144371032715, + "learning_rate": 1.7286795675131732e-05, + "loss": 0.2369, + "step": 2610 + }, + { + "epoch": 2.6862139917695473, + "grad_norm": 4.839052677154541, + "learning_rate": 1.727396085386431e-05, + "loss": 0.1865, + "step": 2611 + }, + { + "epoch": 2.6872427983539096, + "grad_norm": 8.429651260375977, + "learning_rate": 1.72611360672606e-05, + "loss": 0.3433, + "step": 2612 + }, + { + "epoch": 2.6882716049382713, + "grad_norm": 4.5881218910217285, + "learning_rate": 1.7248321336559187e-05, + "loss": 0.1045, + "step": 2613 + }, + { + "epoch": 2.689300411522634, + "grad_norm": 4.84063720703125, + "learning_rate": 1.7235516682981983e-05, + "loss": 0.1291, + "step": 2614 + }, + { + "epoch": 2.6903292181069958, + "grad_norm": 2.6606414318084717, + "learning_rate": 1.7222722127734216e-05, + "loss": 0.0334, + "step": 2615 + }, + { + "epoch": 2.691358024691358, + "grad_norm": 5.476185321807861, + "learning_rate": 1.7209937692004394e-05, + "loss": 0.1277, + "step": 2616 + }, + { + "epoch": 2.69238683127572, + "grad_norm": 5.779826641082764, + "learning_rate": 1.7197163396964275e-05, + "loss": 0.2863, + "step": 2617 + }, + { + "epoch": 2.6934156378600824, + "grad_norm": 0.04902912676334381, + "learning_rate": 1.7184399263768802e-05, + "loss": 0.0004, + "step": 2618 + }, + { + "epoch": 2.6944444444444446, + "grad_norm": 4.969849586486816, + "learning_rate": 1.717164531355611e-05, + "loss": 0.1686, + "step": 2619 + }, + { + "epoch": 2.6954732510288064, + "grad_norm": 5.742938995361328, + "learning_rate": 1.715890156744746e-05, + "loss": 0.1179, + "step": 2620 + }, + { + "epoch": 2.6954732510288064, + "eval_Qnli-dev_cosine_accuracy": 0.6953125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7264567613601685, + "eval_Qnli-dev_cosine_ap": 0.7465763928871585, + "eval_Qnli-dev_cosine_f1": 0.7020109689213895, + "eval_Qnli-dev_cosine_f1_threshold": 0.6638573408126831, + "eval_Qnli-dev_cosine_precision": 0.617363344051447, + "eval_Qnli-dev_cosine_recall": 0.8135593220338984, + "eval_Qnli-dev_dot_accuracy": 0.67578125, + "eval_Qnli-dev_dot_accuracy_threshold": 312.00946044921875, + "eval_Qnli-dev_dot_ap": 0.6902863737048937, + "eval_Qnli-dev_dot_f1": 0.6763754045307444, + "eval_Qnli-dev_dot_f1_threshold": 252.58135986328125, + "eval_Qnli-dev_dot_precision": 0.5471204188481675, + "eval_Qnli-dev_dot_recall": 0.885593220338983, + "eval_Qnli-dev_euclidean_accuracy": 0.701171875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 16.147762298583984, + "eval_Qnli-dev_euclidean_ap": 0.7578725873698218, + "eval_Qnli-dev_euclidean_f1": 0.7084870848708488, + "eval_Qnli-dev_euclidean_f1_threshold": 16.909732818603516, + "eval_Qnli-dev_euclidean_precision": 0.6274509803921569, + "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, + "eval_Qnli-dev_manhattan_accuracy": 0.705078125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 325.0968017578125, + "eval_Qnli-dev_manhattan_ap": 0.7598659302283028, + "eval_Qnli-dev_manhattan_f1": 0.7084078711985689, + "eval_Qnli-dev_manhattan_f1_threshold": 360.3451843261719, + "eval_Qnli-dev_manhattan_precision": 0.6130030959752322, + "eval_Qnli-dev_manhattan_recall": 0.8389830508474576, + "eval_Qnli-dev_max_accuracy": 0.705078125, + "eval_Qnli-dev_max_accuracy_threshold": 325.0968017578125, + "eval_Qnli-dev_max_ap": 0.7598659302283028, + "eval_Qnli-dev_max_f1": 0.7084870848708488, + "eval_Qnli-dev_max_f1_threshold": 360.3451843261719, + "eval_Qnli-dev_max_precision": 0.6274509803921569, + "eval_Qnli-dev_max_recall": 0.885593220338983, + "eval_allNLI-dev_cosine_accuracy": 0.724609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8582905530929565, + "eval_allNLI-dev_cosine_ap": 0.6247010454525644, + "eval_allNLI-dev_cosine_f1": 0.6297229219143576, + "eval_allNLI-dev_cosine_f1_threshold": 0.7234146595001221, + "eval_allNLI-dev_cosine_precision": 0.5580357142857143, + "eval_allNLI-dev_cosine_recall": 0.7225433526011561, + "eval_allNLI-dev_dot_accuracy": 0.6953125, + "eval_allNLI-dev_dot_accuracy_threshold": 354.59814453125, + "eval_allNLI-dev_dot_ap": 0.56744815106673, + "eval_allNLI-dev_dot_f1": 0.5860113421550095, + "eval_allNLI-dev_dot_f1_threshold": 243.18655395507812, + "eval_allNLI-dev_dot_precision": 0.4353932584269663, + "eval_allNLI-dev_dot_recall": 0.8959537572254336, + "eval_allNLI-dev_euclidean_accuracy": 0.7421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.53614616394043, + "eval_allNLI-dev_euclidean_ap": 0.6376923089630202, + "eval_allNLI-dev_euclidean_f1": 0.6381156316916489, + "eval_allNLI-dev_euclidean_f1_threshold": 16.240928649902344, + "eval_allNLI-dev_euclidean_precision": 0.5068027210884354, + "eval_allNLI-dev_euclidean_recall": 0.861271676300578, + "eval_allNLI-dev_manhattan_accuracy": 0.740234375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 278.1414489746094, + "eval_allNLI-dev_manhattan_ap": 0.6341257200825056, + "eval_allNLI-dev_manhattan_f1": 0.6445916114790288, + "eval_allNLI-dev_manhattan_f1_threshold": 338.42498779296875, + "eval_allNLI-dev_manhattan_precision": 0.5214285714285715, + "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, + "eval_allNLI-dev_max_accuracy": 0.7421875, + "eval_allNLI-dev_max_accuracy_threshold": 354.59814453125, + "eval_allNLI-dev_max_ap": 0.6376923089630202, + "eval_allNLI-dev_max_f1": 0.6445916114790288, + "eval_allNLI-dev_max_f1_threshold": 338.42498779296875, + "eval_allNLI-dev_max_precision": 0.5580357142857143, + "eval_allNLI-dev_max_recall": 0.8959537572254336, + "eval_sequential_score": 0.7598659302283028, + "eval_sts-test_pearson_cosine": 0.8542773846250993, + "eval_sts-test_pearson_dot": 0.8440504553035415, + "eval_sts-test_pearson_euclidean": 0.8794116876646535, + "eval_sts-test_pearson_manhattan": 0.87794635022087, + "eval_sts-test_pearson_max": 0.8794116876646535, + "eval_sts-test_spearman_cosine": 0.8821762009060301, + "eval_sts-test_spearman_dot": 0.8404598805596726, + "eval_sts-test_spearman_euclidean": 0.8769202995093944, + "eval_sts-test_spearman_manhattan": 0.8752871469761767, + "eval_sts-test_spearman_max": 0.8821762009060301, + "eval_vitaminc-pairs_loss": 3.269437551498413, + "eval_vitaminc-pairs_runtime": 3.2254, + "eval_vitaminc-pairs_samples_per_second": 39.685, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 2620 + }, + { + "epoch": 2.6954732510288064, + "eval_negation-triplets_loss": 0.9486592411994934, + "eval_negation-triplets_runtime": 0.7633, + "eval_negation-triplets_samples_per_second": 167.686, + "eval_negation-triplets_steps_per_second": 1.31, + "step": 2620 + }, + { + "epoch": 2.6954732510288064, + "eval_scitail-pairs-pos_loss": 0.14502786099910736, + "eval_scitail-pairs-pos_runtime": 0.9436, + "eval_scitail-pairs-pos_samples_per_second": 135.656, + "eval_scitail-pairs-pos_steps_per_second": 1.06, + "step": 2620 + }, + { + "epoch": 2.6954732510288064, + "eval_scitail-pairs-qa_loss": 0.000535947794560343, + "eval_scitail-pairs-qa_runtime": 0.6202, + "eval_scitail-pairs-qa_samples_per_second": 206.389, + "eval_scitail-pairs-qa_steps_per_second": 1.612, + "step": 2620 + }, + { + "epoch": 2.6954732510288064, + "eval_xsum-pairs_loss": 0.3298634886741638, + "eval_xsum-pairs_runtime": 3.0314, + "eval_xsum-pairs_samples_per_second": 42.225, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 2620 + }, + { + "epoch": 2.6954732510288064, + "eval_sciq_pairs_loss": 0.09292829036712646, + "eval_sciq_pairs_runtime": 3.5539, + "eval_sciq_pairs_samples_per_second": 36.017, + "eval_sciq_pairs_steps_per_second": 0.281, + "step": 2620 + }, + { + "epoch": 2.6954732510288064, + "eval_qasc_pairs_loss": 0.1371685266494751, + "eval_qasc_pairs_runtime": 0.6331, + "eval_qasc_pairs_samples_per_second": 202.18, + "eval_qasc_pairs_steps_per_second": 1.58, + "step": 2620 + }, + { + "epoch": 2.6954732510288064, + "eval_openbookqa_pairs_loss": 0.744242787361145, + "eval_openbookqa_pairs_runtime": 0.6086, + "eval_openbookqa_pairs_samples_per_second": 210.325, + "eval_openbookqa_pairs_steps_per_second": 1.643, + "step": 2620 + }, + { + "epoch": 2.6954732510288064, + "eval_msmarco_pairs_loss": 0.7878044843673706, + "eval_msmarco_pairs_runtime": 1.5273, + "eval_msmarco_pairs_samples_per_second": 83.811, + "eval_msmarco_pairs_steps_per_second": 0.655, + "step": 2620 + }, + { + "epoch": 2.6954732510288064, + "eval_nq_pairs_loss": 0.5722874999046326, + "eval_nq_pairs_runtime": 2.9128, + "eval_nq_pairs_samples_per_second": 43.944, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 2620 + }, + { + "epoch": 2.6954732510288064, + "eval_trivia_pairs_loss": 0.7739209532737732, + "eval_trivia_pairs_runtime": 3.4641, + "eval_trivia_pairs_samples_per_second": 36.951, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 2620 + }, + { + "epoch": 2.6954732510288064, + "eval_gooaq_pairs_loss": 0.34145432710647583, + "eval_gooaq_pairs_runtime": 0.9903, + "eval_gooaq_pairs_samples_per_second": 129.248, + "eval_gooaq_pairs_steps_per_second": 1.01, + "step": 2620 + }, + { + "epoch": 2.6954732510288064, + "eval_paws-pos_loss": 0.02111244387924671, + "eval_paws-pos_runtime": 0.7177, + "eval_paws-pos_samples_per_second": 178.338, + "eval_paws-pos_steps_per_second": 1.393, + "step": 2620 + }, + { + "epoch": 2.6954732510288064, + "eval_global_dataset_loss": 0.44551166892051697, + "eval_global_dataset_runtime": 13.4327, + "eval_global_dataset_samples_per_second": 30.969, + "eval_global_dataset_steps_per_second": 0.298, + "step": 2620 + }, + { + "epoch": 2.6965020576131686, + "grad_norm": 5.093355178833008, + "learning_rate": 1.714616804654722e-05, + "loss": 0.215, + "step": 2621 + }, + { + "epoch": 2.697530864197531, + "grad_norm": 9.597935676574707, + "learning_rate": 1.7133444771942817e-05, + "loss": 0.5451, + "step": 2622 + }, + { + "epoch": 2.698559670781893, + "grad_norm": 5.977519989013672, + "learning_rate": 1.7120731764704718e-05, + "loss": 0.1519, + "step": 2623 + }, + { + "epoch": 2.6995884773662553, + "grad_norm": 4.844002723693848, + "learning_rate": 1.7108029045886376e-05, + "loss": 0.1012, + "step": 2624 + }, + { + "epoch": 2.700617283950617, + "grad_norm": 1.202161431312561, + "learning_rate": 1.7095336636524217e-05, + "loss": 0.0676, + "step": 2625 + }, + { + "epoch": 2.7016460905349793, + "grad_norm": 10.282391548156738, + "learning_rate": 1.7082654557637587e-05, + "loss": 0.3419, + "step": 2626 + }, + { + "epoch": 2.7026748971193415, + "grad_norm": 14.298833847045898, + "learning_rate": 1.706998283022873e-05, + "loss": 1.3958, + "step": 2627 + }, + { + "epoch": 2.7037037037037037, + "grad_norm": 6.158807754516602, + "learning_rate": 1.7057321475282737e-05, + "loss": 0.1862, + "step": 2628 + } + ], + "logging_steps": 1, + "max_steps": 2916, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 292, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": null +}