diff --git "a/checkpoint-876/trainer_state.json" "b/checkpoint-876/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-876/trainer_state.json" @@ -0,0 +1,14464 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9012345679012346, + "eval_steps": 20, + "global_step": 876, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00102880658436214, + "grad_norm": NaN, + "learning_rate": 0.0, + "loss": 6.6792, + "step": 1 + }, + { + "epoch": 0.00205761316872428, + "grad_norm": NaN, + "learning_rate": 0.0, + "loss": 7.1528, + "step": 2 + }, + { + "epoch": 0.0030864197530864196, + "grad_norm": 56.1685905456543, + "learning_rate": 3.634475597092419e-08, + "loss": 7.5608, + "step": 3 + }, + { + "epoch": 0.00411522633744856, + "grad_norm": 47.3843994140625, + "learning_rate": 7.268951194184838e-08, + "loss": 7.9785, + "step": 4 + }, + { + "epoch": 0.0051440329218107, + "grad_norm": Infinity, + "learning_rate": 7.268951194184838e-08, + "loss": 8.3006, + "step": 5 + }, + { + "epoch": 0.006172839506172839, + "grad_norm": 30.015766143798828, + "learning_rate": 1.0903426791277258e-07, + "loss": 7.2339, + "step": 6 + }, + { + "epoch": 0.00720164609053498, + "grad_norm": 38.63535690307617, + "learning_rate": 1.4537902388369677e-07, + "loss": 6.9116, + "step": 7 + }, + { + "epoch": 0.00823045267489712, + "grad_norm": 43.006290435791016, + "learning_rate": 1.8172377985462097e-07, + "loss": 7.5568, + "step": 8 + }, + { + "epoch": 0.009259259259259259, + "grad_norm": 32.70055389404297, + "learning_rate": 2.1806853582554515e-07, + "loss": 7.3119, + "step": 9 + }, + { + "epoch": 0.0102880658436214, + "grad_norm": 34.09101486206055, + "learning_rate": 2.5441329179646936e-07, + "loss": 7.3456, + "step": 10 + }, + { + "epoch": 0.01131687242798354, + "grad_norm": 46.04302978515625, + "learning_rate": 2.9075804776739353e-07, + "loss": 7.3899, + "step": 11 + }, + { + "epoch": 0.012345679012345678, + "grad_norm": 39.30464172363281, + "learning_rate": 3.271028037383177e-07, + "loss": 7.1603, + "step": 12 + }, + { + "epoch": 0.013374485596707819, + "grad_norm": 45.96063995361328, + "learning_rate": 3.6344755970924194e-07, + "loss": 7.5501, + "step": 13 + }, + { + "epoch": 0.01440329218106996, + "grad_norm": 31.248769760131836, + "learning_rate": 3.997923156801661e-07, + "loss": 7.1211, + "step": 14 + }, + { + "epoch": 0.015432098765432098, + "grad_norm": 37.31939697265625, + "learning_rate": 4.361370716510903e-07, + "loss": 6.6898, + "step": 15 + }, + { + "epoch": 0.01646090534979424, + "grad_norm": 57.4151725769043, + "learning_rate": 4.724818276220145e-07, + "loss": 7.9275, + "step": 16 + }, + { + "epoch": 0.01748971193415638, + "grad_norm": 60.12082290649414, + "learning_rate": 5.088265835929387e-07, + "loss": 8.8934, + "step": 17 + }, + { + "epoch": 0.018518518518518517, + "grad_norm": 46.84602355957031, + "learning_rate": 5.451713395638628e-07, + "loss": 7.7481, + "step": 18 + }, + { + "epoch": 0.01954732510288066, + "grad_norm": 49.21991729736328, + "learning_rate": 5.815160955347871e-07, + "loss": 7.9482, + "step": 19 + }, + { + "epoch": 0.0205761316872428, + "grad_norm": 28.904695510864258, + "learning_rate": 6.178608515057113e-07, + "loss": 7.2578, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_Qnli-dev_cosine_accuracy": 0.599609375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.9141477346420288, + "eval_Qnli-dev_cosine_ap": 0.5547693808475234, + "eval_Qnli-dev_cosine_f1": 0.6315789473684211, + "eval_Qnli-dev_cosine_f1_threshold": 0.6694607138633728, + "eval_Qnli-dev_cosine_precision": 0.4633663366336634, + "eval_Qnli-dev_cosine_recall": 0.9915254237288136, + "eval_Qnli-dev_dot_accuracy": 0.576171875, + "eval_Qnli-dev_dot_accuracy_threshold": 375.9344177246094, + "eval_Qnli-dev_dot_ap": 0.4951635671727113, + "eval_Qnli-dev_dot_f1": 0.6291834002677376, + "eval_Qnli-dev_dot_f1_threshold": 237.4730682373047, + "eval_Qnli-dev_dot_precision": 0.4598825831702544, + "eval_Qnli-dev_dot_recall": 0.9957627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.603515625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 8.203678131103516, + "eval_Qnli-dev_euclidean_ap": 0.5621167645444726, + "eval_Qnli-dev_euclidean_f1": 0.6307277628032345, + "eval_Qnli-dev_euclidean_f1_threshold": 17.37430763244629, + "eval_Qnli-dev_euclidean_precision": 0.4624505928853755, + "eval_Qnli-dev_euclidean_recall": 0.9915254237288136, + "eval_Qnli-dev_manhattan_accuracy": 0.615234375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 168.66110229492188, + "eval_Qnli-dev_manhattan_ap": 0.6068938574265019, + "eval_Qnli-dev_manhattan_f1": 0.629878869448183, + "eval_Qnli-dev_manhattan_f1_threshold": 250.46356201171875, + "eval_Qnli-dev_manhattan_precision": 0.46153846153846156, + "eval_Qnli-dev_manhattan_recall": 0.9915254237288136, + "eval_Qnli-dev_max_accuracy": 0.615234375, + "eval_Qnli-dev_max_accuracy_threshold": 375.9344177246094, + "eval_Qnli-dev_max_ap": 0.6068938574265019, + "eval_Qnli-dev_max_f1": 0.6315789473684211, + "eval_Qnli-dev_max_f1_threshold": 250.46356201171875, + "eval_Qnli-dev_max_precision": 0.4633663366336634, + "eval_Qnli-dev_max_recall": 0.9957627118644068, + "eval_allNLI-dev_cosine_accuracy": 0.6640625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9848551154136658, + "eval_allNLI-dev_cosine_ap": 0.34665869156342183, + "eval_allNLI-dev_cosine_f1": 0.5105105105105106, + "eval_allNLI-dev_cosine_f1_threshold": 0.726775050163269, + "eval_allNLI-dev_cosine_precision": 0.3448275862068966, + "eval_allNLI-dev_cosine_recall": 0.9826589595375722, + "eval_allNLI-dev_dot_accuracy": 0.66015625, + "eval_allNLI-dev_dot_accuracy_threshold": 510.3038330078125, + "eval_allNLI-dev_dot_ap": 0.3325722102020561, + "eval_allNLI-dev_dot_f1": 0.5081240768094535, + "eval_allNLI-dev_dot_f1_threshold": 321.1283264160156, + "eval_allNLI-dev_dot_precision": 0.3412698412698413, + "eval_allNLI-dev_dot_recall": 0.9942196531791907, + "eval_allNLI-dev_euclidean_accuracy": 0.6640625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 3.5479774475097656, + "eval_allNLI-dev_euclidean_ap": 0.35150722956160885, + "eval_allNLI-dev_euclidean_f1": 0.5120481927710844, + "eval_allNLI-dev_euclidean_f1_threshold": 16.336387634277344, + "eval_allNLI-dev_euclidean_precision": 0.34623217922606925, + "eval_allNLI-dev_euclidean_recall": 0.9826589595375722, + "eval_allNLI-dev_manhattan_accuracy": 0.6640625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 59.594974517822266, + "eval_allNLI-dev_manhattan_ap": 0.3707715964225075, + "eval_allNLI-dev_manhattan_f1": 0.5096870342771982, + "eval_allNLI-dev_manhattan_f1_threshold": 246.92552185058594, + "eval_allNLI-dev_manhattan_precision": 0.3433734939759036, + "eval_allNLI-dev_manhattan_recall": 0.9884393063583815, + "eval_allNLI-dev_max_accuracy": 0.6640625, + "eval_allNLI-dev_max_accuracy_threshold": 510.3038330078125, + "eval_allNLI-dev_max_ap": 0.3707715964225075, + "eval_allNLI-dev_max_f1": 0.5120481927710844, + "eval_allNLI-dev_max_f1_threshold": 321.1283264160156, + "eval_allNLI-dev_max_precision": 0.34623217922606925, + "eval_allNLI-dev_max_recall": 0.9942196531791907, + "eval_sequential_score": 0.6068938574265019, + "eval_sts-test_pearson_cosine": 0.15158128737060533, + "eval_sts-test_pearson_dot": 0.28519318322703113, + "eval_sts-test_pearson_euclidean": 0.14101979920513222, + "eval_sts-test_pearson_manhattan": 0.18765507958122332, + "eval_sts-test_pearson_max": 0.28519318322703113, + "eval_sts-test_spearman_cosine": 0.19495891500289336, + "eval_sts-test_spearman_dot": 0.2996743605881303, + "eval_sts-test_spearman_euclidean": 0.16263986728485438, + "eval_sts-test_spearman_manhattan": 0.20827944121487316, + "eval_sts-test_spearman_max": 0.2996743605881303, + "eval_vitaminc-pairs_loss": 3.0276453495025635, + "eval_vitaminc-pairs_runtime": 3.2256, + "eval_vitaminc-pairs_samples_per_second": 39.683, + "eval_vitaminc-pairs_steps_per_second": 0.31, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_negation-triplets_loss": 4.588223457336426, + "eval_negation-triplets_runtime": 0.7341, + "eval_negation-triplets_samples_per_second": 174.361, + "eval_negation-triplets_steps_per_second": 1.362, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_scitail-pairs-pos_loss": 2.112419366836548, + "eval_scitail-pairs-pos_runtime": 0.8038, + "eval_scitail-pairs-pos_samples_per_second": 159.242, + "eval_scitail-pairs-pos_steps_per_second": 1.244, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_scitail-pairs-qa_loss": 2.4077870845794678, + "eval_scitail-pairs-qa_runtime": 0.5735, + "eval_scitail-pairs-qa_samples_per_second": 223.199, + "eval_scitail-pairs-qa_steps_per_second": 1.744, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_xsum-pairs_loss": 7.2197651863098145, + "eval_xsum-pairs_runtime": 3.0069, + "eval_xsum-pairs_samples_per_second": 42.568, + "eval_xsum-pairs_steps_per_second": 0.333, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_sciq_pairs_loss": 0.8614505529403687, + "eval_sciq_pairs_runtime": 3.4174, + "eval_sciq_pairs_samples_per_second": 37.455, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_qasc_pairs_loss": 5.234526634216309, + "eval_qasc_pairs_runtime": 0.5917, + "eval_qasc_pairs_samples_per_second": 216.327, + "eval_qasc_pairs_steps_per_second": 1.69, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_openbookqa_pairs_loss": 5.655325889587402, + "eval_openbookqa_pairs_runtime": 0.5683, + "eval_openbookqa_pairs_samples_per_second": 225.252, + "eval_openbookqa_pairs_steps_per_second": 1.76, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_msmarco_pairs_loss": 15.688506126403809, + "eval_msmarco_pairs_runtime": 1.5377, + "eval_msmarco_pairs_samples_per_second": 83.243, + "eval_msmarco_pairs_steps_per_second": 0.65, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_nq_pairs_loss": 14.804839134216309, + "eval_nq_pairs_runtime": 2.884, + "eval_nq_pairs_samples_per_second": 44.382, + "eval_nq_pairs_steps_per_second": 0.347, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_trivia_pairs_loss": 9.255401611328125, + "eval_trivia_pairs_runtime": 3.4138, + "eval_trivia_pairs_samples_per_second": 37.495, + "eval_trivia_pairs_steps_per_second": 0.293, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_gooaq_pairs_loss": 10.233977317810059, + "eval_gooaq_pairs_runtime": 0.951, + "eval_gooaq_pairs_samples_per_second": 134.592, + "eval_gooaq_pairs_steps_per_second": 1.051, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_paws-pos_loss": 1.2437409162521362, + "eval_paws-pos_runtime": 0.6909, + "eval_paws-pos_samples_per_second": 185.275, + "eval_paws-pos_steps_per_second": 1.447, + "step": 20 + }, + { + "epoch": 0.0205761316872428, + "eval_global_dataset_loss": 6.9472880363464355, + "eval_global_dataset_runtime": 13.3358, + "eval_global_dataset_samples_per_second": 31.194, + "eval_global_dataset_steps_per_second": 0.3, + "step": 20 + }, + { + "epoch": 0.021604938271604937, + "grad_norm": 129.84130859375, + "learning_rate": 6.542056074766354e-07, + "loss": 14.6214, + "step": 21 + }, + { + "epoch": 0.02263374485596708, + "grad_norm": 25.52641487121582, + "learning_rate": 6.905503634475597e-07, + "loss": 2.7071, + "step": 22 + }, + { + "epoch": 0.023662551440329218, + "grad_norm": 52.22842025756836, + "learning_rate": 7.268951194184839e-07, + "loss": 7.3533, + "step": 23 + }, + { + "epoch": 0.024691358024691357, + "grad_norm": 18.928892135620117, + "learning_rate": 7.63239875389408e-07, + "loss": 5.8828, + "step": 24 + }, + { + "epoch": 0.0257201646090535, + "grad_norm": 18.747142791748047, + "learning_rate": 7.995846313603322e-07, + "loss": 5.549, + "step": 25 + }, + { + "epoch": 0.026748971193415638, + "grad_norm": 39.345096588134766, + "learning_rate": 8.359293873312565e-07, + "loss": 7.0614, + "step": 26 + }, + { + "epoch": 0.027777777777777776, + "grad_norm": 16.357666015625, + "learning_rate": 8.722741433021806e-07, + "loss": 5.4115, + "step": 27 + }, + { + "epoch": 0.02880658436213992, + "grad_norm": 143.72604370117188, + "learning_rate": 9.086188992731048e-07, + "loss": 14.986, + "step": 28 + }, + { + "epoch": 0.029835390946502057, + "grad_norm": 29.933956146240234, + "learning_rate": 9.44963655244029e-07, + "loss": 6.5017, + "step": 29 + }, + { + "epoch": 0.030864197530864196, + "grad_norm": 24.71169662475586, + "learning_rate": 9.813084112149532e-07, + "loss": 6.8621, + "step": 30 + }, + { + "epoch": 0.03189300411522634, + "grad_norm": 48.559242248535156, + "learning_rate": 1.0176531671858774e-06, + "loss": 7.6911, + "step": 31 + }, + { + "epoch": 0.03292181069958848, + "grad_norm": 43.564395904541016, + "learning_rate": 1.0539979231568014e-06, + "loss": 7.3478, + "step": 32 + }, + { + "epoch": 0.033950617283950615, + "grad_norm": 71.6847152709961, + "learning_rate": 1.0903426791277257e-06, + "loss": 9.8953, + "step": 33 + }, + { + "epoch": 0.03497942386831276, + "grad_norm": 130.1976776123047, + "learning_rate": 1.12668743509865e-06, + "loss": 14.7971, + "step": 34 + }, + { + "epoch": 0.0360082304526749, + "grad_norm": 25.184886932373047, + "learning_rate": 1.1630321910695741e-06, + "loss": 6.6194, + "step": 35 + }, + { + "epoch": 0.037037037037037035, + "grad_norm": 15.403931617736816, + "learning_rate": 1.1993769470404982e-06, + "loss": 5.397, + "step": 36 + }, + { + "epoch": 0.03806584362139918, + "grad_norm": 97.28205871582031, + "learning_rate": 1.2357217030114226e-06, + "loss": 9.3816, + "step": 37 + }, + { + "epoch": 0.03909465020576132, + "grad_norm": 113.59951782226562, + "learning_rate": 1.2720664589823466e-06, + "loss": 13.3627, + "step": 38 + }, + { + "epoch": 0.040123456790123455, + "grad_norm": 91.30632781982422, + "learning_rate": 1.3084112149532708e-06, + "loss": 9.0198, + "step": 39 + }, + { + "epoch": 0.0411522633744856, + "grad_norm": 34.121768951416016, + "learning_rate": 1.344755970924195e-06, + "loss": 6.3785, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_Qnli-dev_cosine_accuracy": 0.599609375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.9141815304756165, + "eval_Qnli-dev_cosine_ap": 0.560800067413789, + "eval_Qnli-dev_cosine_f1": 0.6315789473684211, + "eval_Qnli-dev_cosine_f1_threshold": 0.71217280626297, + "eval_Qnli-dev_cosine_precision": 0.4633663366336634, + "eval_Qnli-dev_cosine_recall": 0.9915254237288136, + "eval_Qnli-dev_dot_accuracy": 0.580078125, + "eval_Qnli-dev_dot_accuracy_threshold": 383.35107421875, + "eval_Qnli-dev_dot_ap": 0.4975321617530368, + "eval_Qnli-dev_dot_f1": 0.6291834002677376, + "eval_Qnli-dev_dot_f1_threshold": 237.07284545898438, + "eval_Qnli-dev_dot_precision": 0.4598825831702544, + "eval_Qnli-dev_dot_recall": 0.9957627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.59375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 8.007088661193848, + "eval_Qnli-dev_euclidean_ap": 0.5669399990959784, + "eval_Qnli-dev_euclidean_f1": 0.6307277628032345, + "eval_Qnli-dev_euclidean_f1_threshold": 16.091142654418945, + "eval_Qnli-dev_euclidean_precision": 0.4624505928853755, + "eval_Qnli-dev_euclidean_recall": 0.9915254237288136, + "eval_Qnli-dev_manhattan_accuracy": 0.6171875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 165.9488067626953, + "eval_Qnli-dev_manhattan_ap": 0.6135192533670535, + "eval_Qnli-dev_manhattan_f1": 0.629878869448183, + "eval_Qnli-dev_manhattan_f1_threshold": 239.3395233154297, + "eval_Qnli-dev_manhattan_precision": 0.46153846153846156, + "eval_Qnli-dev_manhattan_recall": 0.9915254237288136, + "eval_Qnli-dev_max_accuracy": 0.6171875, + "eval_Qnli-dev_max_accuracy_threshold": 383.35107421875, + "eval_Qnli-dev_max_ap": 0.6135192533670535, + "eval_Qnli-dev_max_f1": 0.6315789473684211, + "eval_Qnli-dev_max_f1_threshold": 239.3395233154297, + "eval_Qnli-dev_max_precision": 0.4633663366336634, + "eval_Qnli-dev_max_recall": 0.9957627118644068, + "eval_allNLI-dev_cosine_accuracy": 0.6640625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9857255220413208, + "eval_allNLI-dev_cosine_ap": 0.35080477217886274, + "eval_allNLI-dev_cosine_f1": 0.5090361445783133, + "eval_allNLI-dev_cosine_f1_threshold": 0.7498464584350586, + "eval_allNLI-dev_cosine_precision": 0.34419551934826886, + "eval_allNLI-dev_cosine_recall": 0.976878612716763, + "eval_allNLI-dev_dot_accuracy": 0.66015625, + "eval_allNLI-dev_dot_accuracy_threshold": 511.11175537109375, + "eval_allNLI-dev_dot_ap": 0.33267543574243635, + "eval_allNLI-dev_dot_f1": 0.5066273932253312, + "eval_allNLI-dev_dot_f1_threshold": 327.406494140625, + "eval_allNLI-dev_dot_precision": 0.33992094861660077, + "eval_allNLI-dev_dot_recall": 0.9942196531791907, + "eval_allNLI-dev_euclidean_accuracy": 0.666015625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 4.716782569885254, + "eval_allNLI-dev_euclidean_ap": 0.3570960285605865, + "eval_allNLI-dev_euclidean_f1": 0.5113464447806354, + "eval_allNLI-dev_euclidean_f1_threshold": 15.28095817565918, + "eval_allNLI-dev_euclidean_precision": 0.3463114754098361, + "eval_allNLI-dev_euclidean_recall": 0.976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.6640625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 58.618408203125, + "eval_allNLI-dev_manhattan_ap": 0.3738907800968901, + "eval_allNLI-dev_manhattan_f1": 0.5096296296296297, + "eval_allNLI-dev_manhattan_f1_threshold": 251.27957153320312, + "eval_allNLI-dev_manhattan_precision": 0.3426294820717131, + "eval_allNLI-dev_manhattan_recall": 0.9942196531791907, + "eval_allNLI-dev_max_accuracy": 0.666015625, + "eval_allNLI-dev_max_accuracy_threshold": 511.11175537109375, + "eval_allNLI-dev_max_ap": 0.3738907800968901, + "eval_allNLI-dev_max_f1": 0.5113464447806354, + "eval_allNLI-dev_max_f1_threshold": 327.406494140625, + "eval_allNLI-dev_max_precision": 0.3463114754098361, + "eval_allNLI-dev_max_recall": 0.9942196531791907, + "eval_sequential_score": 0.6135192533670535, + "eval_sts-test_pearson_cosine": 0.15947289948306198, + "eval_sts-test_pearson_dot": 0.30037019316788005, + "eval_sts-test_pearson_euclidean": 0.14704475799070915, + "eval_sts-test_pearson_manhattan": 0.1919977257434266, + "eval_sts-test_pearson_max": 0.30037019316788005, + "eval_sts-test_spearman_cosine": 0.2043480876529001, + "eval_sts-test_spearman_dot": 0.312789299505278, + "eval_sts-test_spearman_euclidean": 0.16989717934469764, + "eval_sts-test_spearman_manhattan": 0.21343563680112884, + "eval_sts-test_spearman_max": 0.312789299505278, + "eval_vitaminc-pairs_loss": 2.966029167175293, + "eval_vitaminc-pairs_runtime": 3.1856, + "eval_vitaminc-pairs_samples_per_second": 40.18, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_negation-triplets_loss": 4.4332098960876465, + "eval_negation-triplets_runtime": 0.7499, + "eval_negation-triplets_samples_per_second": 170.686, + "eval_negation-triplets_steps_per_second": 1.333, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_scitail-pairs-pos_loss": 2.064517021179199, + "eval_scitail-pairs-pos_runtime": 0.7727, + "eval_scitail-pairs-pos_samples_per_second": 165.651, + "eval_scitail-pairs-pos_steps_per_second": 1.294, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_scitail-pairs-qa_loss": 2.3564093112945557, + "eval_scitail-pairs-qa_runtime": 0.561, + "eval_scitail-pairs-qa_samples_per_second": 228.169, + "eval_scitail-pairs-qa_steps_per_second": 1.783, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_xsum-pairs_loss": 6.930158615112305, + "eval_xsum-pairs_runtime": 3.0064, + "eval_xsum-pairs_samples_per_second": 42.575, + "eval_xsum-pairs_steps_per_second": 0.333, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_sciq_pairs_loss": 0.781018078327179, + "eval_sciq_pairs_runtime": 3.3616, + "eval_sciq_pairs_samples_per_second": 38.077, + "eval_sciq_pairs_steps_per_second": 0.297, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_qasc_pairs_loss": 4.685440540313721, + "eval_qasc_pairs_runtime": 0.5973, + "eval_qasc_pairs_samples_per_second": 214.304, + "eval_qasc_pairs_steps_per_second": 1.674, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_openbookqa_pairs_loss": 5.424518585205078, + "eval_openbookqa_pairs_runtime": 0.5716, + "eval_openbookqa_pairs_samples_per_second": 223.932, + "eval_openbookqa_pairs_steps_per_second": 1.749, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_msmarco_pairs_loss": 13.714217185974121, + "eval_msmarco_pairs_runtime": 1.5089, + "eval_msmarco_pairs_samples_per_second": 84.831, + "eval_msmarco_pairs_steps_per_second": 0.663, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_nq_pairs_loss": 12.863033294677734, + "eval_nq_pairs_runtime": 2.8862, + "eval_nq_pairs_samples_per_second": 44.35, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_trivia_pairs_loss": 8.43865966796875, + "eval_trivia_pairs_runtime": 3.4314, + "eval_trivia_pairs_samples_per_second": 37.303, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_gooaq_pairs_loss": 9.148645401000977, + "eval_gooaq_pairs_runtime": 0.9461, + "eval_gooaq_pairs_samples_per_second": 135.299, + "eval_gooaq_pairs_steps_per_second": 1.057, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_paws-pos_loss": 1.288989543914795, + "eval_paws-pos_runtime": 0.6728, + "eval_paws-pos_samples_per_second": 190.262, + "eval_paws-pos_steps_per_second": 1.486, + "step": 40 + }, + { + "epoch": 0.0411522633744856, + "eval_global_dataset_loss": 6.3770952224731445, + "eval_global_dataset_runtime": 13.329, + "eval_global_dataset_samples_per_second": 31.21, + "eval_global_dataset_steps_per_second": 0.3, + "step": 40 + }, + { + "epoch": 0.04218106995884774, + "grad_norm": 58.63786315917969, + "learning_rate": 1.3811007268951193e-06, + "loss": 8.8223, + "step": 41 + }, + { + "epoch": 0.043209876543209874, + "grad_norm": 19.849580764770508, + "learning_rate": 1.4174454828660433e-06, + "loss": 5.7515, + "step": 42 + }, + { + "epoch": 0.044238683127572016, + "grad_norm": 32.95113754272461, + "learning_rate": 1.4537902388369678e-06, + "loss": 6.6943, + "step": 43 + }, + { + "epoch": 0.04526748971193416, + "grad_norm": 115.43840026855469, + "learning_rate": 1.4901349948078918e-06, + "loss": 12.7157, + "step": 44 + }, + { + "epoch": 0.046296296296296294, + "grad_norm": 16.027889251708984, + "learning_rate": 1.526479750778816e-06, + "loss": 5.729, + "step": 45 + }, + { + "epoch": 0.047325102880658436, + "grad_norm": 55.49090576171875, + "learning_rate": 1.5628245067497403e-06, + "loss": 8.843, + "step": 46 + }, + { + "epoch": 0.04835390946502058, + "grad_norm": 20.623491287231445, + "learning_rate": 1.5991692627206645e-06, + "loss": 6.8743, + "step": 47 + }, + { + "epoch": 0.04938271604938271, + "grad_norm": 19.15467643737793, + "learning_rate": 1.6355140186915885e-06, + "loss": 5.5015, + "step": 48 + }, + { + "epoch": 0.050411522633744855, + "grad_norm": 24.568716049194336, + "learning_rate": 1.671858774662513e-06, + "loss": 2.7761, + "step": 49 + }, + { + "epoch": 0.051440329218107, + "grad_norm": 17.216365814208984, + "learning_rate": 1.708203530633437e-06, + "loss": 4.6785, + "step": 50 + }, + { + "epoch": 0.05246913580246913, + "grad_norm": 27.83530616760254, + "learning_rate": 1.7445482866043612e-06, + "loss": 6.6596, + "step": 51 + }, + { + "epoch": 0.053497942386831275, + "grad_norm": 14.741978645324707, + "learning_rate": 1.7808930425752854e-06, + "loss": 5.4409, + "step": 52 + }, + { + "epoch": 0.05452674897119342, + "grad_norm": 27.180707931518555, + "learning_rate": 1.8172377985462097e-06, + "loss": 6.3967, + "step": 53 + }, + { + "epoch": 0.05555555555555555, + "grad_norm": 26.400497436523438, + "learning_rate": 1.8535825545171337e-06, + "loss": 6.3174, + "step": 54 + }, + { + "epoch": 0.056584362139917695, + "grad_norm": 19.098752975463867, + "learning_rate": 1.889927310488058e-06, + "loss": 5.5442, + "step": 55 + }, + { + "epoch": 0.05761316872427984, + "grad_norm": 21.40766716003418, + "learning_rate": 1.9262720664589824e-06, + "loss": 5.9004, + "step": 56 + }, + { + "epoch": 0.05864197530864197, + "grad_norm": 25.238555908203125, + "learning_rate": 1.9626168224299064e-06, + "loss": 2.9543, + "step": 57 + }, + { + "epoch": 0.059670781893004114, + "grad_norm": 21.333162307739258, + "learning_rate": 1.9989615784008304e-06, + "loss": 6.4092, + "step": 58 + }, + { + "epoch": 0.060699588477366256, + "grad_norm": 24.3674373626709, + "learning_rate": 2.035306334371755e-06, + "loss": 3.083, + "step": 59 + }, + { + "epoch": 0.06172839506172839, + "grad_norm": 77.95449829101562, + "learning_rate": 2.071651090342679e-06, + "loss": 10.6811, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_Qnli-dev_cosine_accuracy": 0.591796875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.9277275800704956, + "eval_Qnli-dev_cosine_ap": 0.5663256424153785, + "eval_Qnli-dev_cosine_f1": 0.6302864938608458, + "eval_Qnli-dev_cosine_f1_threshold": 0.7895882725715637, + "eval_Qnli-dev_cosine_precision": 0.4647887323943662, + "eval_Qnli-dev_cosine_recall": 0.9788135593220338, + "eval_Qnli-dev_dot_accuracy": 0.58203125, + "eval_Qnli-dev_dot_accuracy_threshold": 386.7135009765625, + "eval_Qnli-dev_dot_ap": 0.5015283426358628, + "eval_Qnli-dev_dot_f1": 0.6291834002677376, + "eval_Qnli-dev_dot_f1_threshold": 233.70668029785156, + "eval_Qnli-dev_dot_precision": 0.4598825831702544, + "eval_Qnli-dev_dot_recall": 0.9957627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.591796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 7.363377571105957, + "eval_Qnli-dev_euclidean_ap": 0.5754970319906212, + "eval_Qnli-dev_euclidean_f1": 0.6322930800542741, + "eval_Qnli-dev_euclidean_f1_threshold": 13.553762435913086, + "eval_Qnli-dev_euclidean_precision": 0.46506986027944114, + "eval_Qnli-dev_euclidean_recall": 0.9872881355932204, + "eval_Qnli-dev_manhattan_accuracy": 0.62890625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 166.58721923828125, + "eval_Qnli-dev_manhattan_ap": 0.6222630621246192, + "eval_Qnli-dev_manhattan_f1": 0.6346483704974271, + "eval_Qnli-dev_manhattan_f1_threshold": 178.5355224609375, + "eval_Qnli-dev_manhattan_precision": 0.5331412103746398, + "eval_Qnli-dev_manhattan_recall": 0.7838983050847458, + "eval_Qnli-dev_max_accuracy": 0.62890625, + "eval_Qnli-dev_max_accuracy_threshold": 386.7135009765625, + "eval_Qnli-dev_max_ap": 0.6222630621246192, + "eval_Qnli-dev_max_f1": 0.6346483704974271, + "eval_Qnli-dev_max_f1_threshold": 233.70668029785156, + "eval_Qnli-dev_max_precision": 0.5331412103746398, + "eval_Qnli-dev_max_recall": 0.9957627118644068, + "eval_allNLI-dev_cosine_accuracy": 0.6640625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9866780042648315, + "eval_allNLI-dev_cosine_ap": 0.3639636732129889, + "eval_allNLI-dev_cosine_f1": 0.5089285714285714, + "eval_allNLI-dev_cosine_f1_threshold": 0.7668333053588867, + "eval_allNLI-dev_cosine_precision": 0.342685370741483, + "eval_allNLI-dev_cosine_recall": 0.9884393063583815, + "eval_allNLI-dev_dot_accuracy": 0.662109375, + "eval_allNLI-dev_dot_accuracy_threshold": 494.3717956542969, + "eval_allNLI-dev_dot_ap": 0.3315337132453944, + "eval_allNLI-dev_dot_f1": 0.5065885797950219, + "eval_allNLI-dev_dot_f1_threshold": 322.2677001953125, + "eval_allNLI-dev_dot_precision": 0.3392156862745098, + "eval_allNLI-dev_dot_recall": 1.0, + "eval_allNLI-dev_euclidean_accuracy": 0.66796875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 4.485382080078125, + "eval_allNLI-dev_euclidean_ap": 0.36871559335230386, + "eval_allNLI-dev_euclidean_f1": 0.5096870342771982, + "eval_allNLI-dev_euclidean_f1_threshold": 14.86199951171875, + "eval_allNLI-dev_euclidean_precision": 0.3433734939759036, + "eval_allNLI-dev_euclidean_recall": 0.9884393063583815, + "eval_allNLI-dev_manhattan_accuracy": 0.6640625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 57.50782012939453, + "eval_allNLI-dev_manhattan_ap": 0.3800340904251822, + "eval_allNLI-dev_manhattan_f1": 0.5074183976261127, + "eval_allNLI-dev_manhattan_f1_threshold": 237.91455078125, + "eval_allNLI-dev_manhattan_precision": 0.3413173652694611, + "eval_allNLI-dev_manhattan_recall": 0.9884393063583815, + "eval_allNLI-dev_max_accuracy": 0.66796875, + "eval_allNLI-dev_max_accuracy_threshold": 494.3717956542969, + "eval_allNLI-dev_max_ap": 0.3800340904251822, + "eval_allNLI-dev_max_f1": 0.5096870342771982, + "eval_allNLI-dev_max_f1_threshold": 322.2677001953125, + "eval_allNLI-dev_max_precision": 0.3433734939759036, + "eval_allNLI-dev_max_recall": 1.0, + "eval_sequential_score": 0.6222630621246192, + "eval_sts-test_pearson_cosine": 0.18291082738946113, + "eval_sts-test_pearson_dot": 0.316123119088567, + "eval_sts-test_pearson_euclidean": 0.16506396318167735, + "eval_sts-test_pearson_manhattan": 0.20347659235425056, + "eval_sts-test_pearson_max": 0.316123119088567, + "eval_sts-test_spearman_cosine": 0.22625803672256098, + "eval_sts-test_spearman_dot": 0.32449976483491805, + "eval_sts-test_spearman_euclidean": 0.18659512800514774, + "eval_sts-test_spearman_manhattan": 0.2238469730125765, + "eval_sts-test_spearman_max": 0.32449976483491805, + "eval_vitaminc-pairs_loss": 2.9115335941314697, + "eval_vitaminc-pairs_runtime": 3.1703, + "eval_vitaminc-pairs_samples_per_second": 40.374, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_negation-triplets_loss": 4.1277852058410645, + "eval_negation-triplets_runtime": 0.7484, + "eval_negation-triplets_samples_per_second": 171.033, + "eval_negation-triplets_steps_per_second": 1.336, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_scitail-pairs-pos_loss": 1.9703718423843384, + "eval_scitail-pairs-pos_runtime": 0.8012, + "eval_scitail-pairs-pos_samples_per_second": 159.764, + "eval_scitail-pairs-pos_steps_per_second": 1.248, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_scitail-pairs-qa_loss": 2.240999221801758, + "eval_scitail-pairs-qa_runtime": 0.5627, + "eval_scitail-pairs-qa_samples_per_second": 227.467, + "eval_scitail-pairs-qa_steps_per_second": 1.777, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_xsum-pairs_loss": 6.690690994262695, + "eval_xsum-pairs_runtime": 3.0003, + "eval_xsum-pairs_samples_per_second": 42.663, + "eval_xsum-pairs_steps_per_second": 0.333, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_sciq_pairs_loss": 0.703199565410614, + "eval_sciq_pairs_runtime": 3.4121, + "eval_sciq_pairs_samples_per_second": 37.513, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_qasc_pairs_loss": 4.066890239715576, + "eval_qasc_pairs_runtime": 0.6223, + "eval_qasc_pairs_samples_per_second": 205.675, + "eval_qasc_pairs_steps_per_second": 1.607, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_openbookqa_pairs_loss": 5.092636585235596, + "eval_openbookqa_pairs_runtime": 0.5896, + "eval_openbookqa_pairs_samples_per_second": 217.085, + "eval_openbookqa_pairs_steps_per_second": 1.696, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_msmarco_pairs_loss": 11.276179313659668, + "eval_msmarco_pairs_runtime": 1.5132, + "eval_msmarco_pairs_samples_per_second": 84.591, + "eval_msmarco_pairs_steps_per_second": 0.661, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_nq_pairs_loss": 10.514344215393066, + "eval_nq_pairs_runtime": 2.9064, + "eval_nq_pairs_samples_per_second": 44.041, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_trivia_pairs_loss": 7.659719467163086, + "eval_trivia_pairs_runtime": 3.436, + "eval_trivia_pairs_samples_per_second": 37.253, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_gooaq_pairs_loss": 7.905792236328125, + "eval_gooaq_pairs_runtime": 0.9586, + "eval_gooaq_pairs_samples_per_second": 133.534, + "eval_gooaq_pairs_steps_per_second": 1.043, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_paws-pos_loss": 1.2882633209228516, + "eval_paws-pos_runtime": 0.6775, + "eval_paws-pos_samples_per_second": 188.929, + "eval_paws-pos_steps_per_second": 1.476, + "step": 60 + }, + { + "epoch": 0.06172839506172839, + "eval_global_dataset_loss": 5.7225518226623535, + "eval_global_dataset_runtime": 13.3827, + "eval_global_dataset_samples_per_second": 31.085, + "eval_global_dataset_steps_per_second": 0.299, + "step": 60 + }, + { + "epoch": 0.06275720164609054, + "grad_norm": 18.45367431640625, + "learning_rate": 2.107995846313603e-06, + "loss": 4.6024, + "step": 61 + }, + { + "epoch": 0.06378600823045268, + "grad_norm": 17.7939510345459, + "learning_rate": 2.1443406022845273e-06, + "loss": 4.4378, + "step": 62 + }, + { + "epoch": 0.06481481481481481, + "grad_norm": 19.061763763427734, + "learning_rate": 2.1806853582554513e-06, + "loss": 6.4802, + "step": 63 + }, + { + "epoch": 0.06584362139917696, + "grad_norm": 77.05914306640625, + "learning_rate": 2.2170301142263758e-06, + "loss": 10.9004, + "step": 64 + }, + { + "epoch": 0.0668724279835391, + "grad_norm": 20.099227905273438, + "learning_rate": 2.2533748701973e-06, + "loss": 6.7516, + "step": 65 + }, + { + "epoch": 0.06790123456790123, + "grad_norm": 53.35956573486328, + "learning_rate": 2.289719626168224e-06, + "loss": 7.7821, + "step": 66 + }, + { + "epoch": 0.06893004115226338, + "grad_norm": 23.51174545288086, + "learning_rate": 2.3260643821391483e-06, + "loss": 6.1714, + "step": 67 + }, + { + "epoch": 0.06995884773662552, + "grad_norm": 11.979568481445312, + "learning_rate": 2.3624091381100727e-06, + "loss": 5.3013, + "step": 68 + }, + { + "epoch": 0.07098765432098765, + "grad_norm": 50.14888381958008, + "learning_rate": 2.3987538940809963e-06, + "loss": 9.0397, + "step": 69 + }, + { + "epoch": 0.0720164609053498, + "grad_norm": 52.993473052978516, + "learning_rate": 2.4350986500519208e-06, + "loss": 9.3361, + "step": 70 + }, + { + "epoch": 0.07304526748971193, + "grad_norm": 16.7055721282959, + "learning_rate": 2.471443406022845e-06, + "loss": 5.1927, + "step": 71 + }, + { + "epoch": 0.07407407407407407, + "grad_norm": 17.894912719726562, + "learning_rate": 2.5077881619937692e-06, + "loss": 5.6994, + "step": 72 + }, + { + "epoch": 0.07510288065843622, + "grad_norm": 29.04665184020996, + "learning_rate": 2.5441329179646932e-06, + "loss": 7.5132, + "step": 73 + }, + { + "epoch": 0.07613168724279835, + "grad_norm": 14.857793807983398, + "learning_rate": 2.5804776739356177e-06, + "loss": 5.4796, + "step": 74 + }, + { + "epoch": 0.07716049382716049, + "grad_norm": 24.775344848632812, + "learning_rate": 2.6168224299065417e-06, + "loss": 2.7714, + "step": 75 + }, + { + "epoch": 0.07818930041152264, + "grad_norm": 49.390663146972656, + "learning_rate": 2.653167185877466e-06, + "loss": 8.9842, + "step": 76 + }, + { + "epoch": 0.07921810699588477, + "grad_norm": 65.65110778808594, + "learning_rate": 2.68951194184839e-06, + "loss": 10.1764, + "step": 77 + }, + { + "epoch": 0.08024691358024691, + "grad_norm": 13.745916366577148, + "learning_rate": 2.725856697819314e-06, + "loss": 5.0512, + "step": 78 + }, + { + "epoch": 0.08127572016460906, + "grad_norm": 14.591425895690918, + "learning_rate": 2.7622014537902386e-06, + "loss": 5.5013, + "step": 79 + }, + { + "epoch": 0.0823045267489712, + "grad_norm": 14.892078399658203, + "learning_rate": 2.798546209761163e-06, + "loss": 5.4496, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_Qnli-dev_cosine_accuracy": 0.591796875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.9341771602630615, + "eval_Qnli-dev_cosine_ap": 0.576947319869376, + "eval_Qnli-dev_cosine_f1": 0.6346153846153846, + "eval_Qnli-dev_cosine_f1_threshold": 0.8134556412696838, + "eval_Qnli-dev_cosine_precision": 0.4695121951219512, + "eval_Qnli-dev_cosine_recall": 0.9788135593220338, + "eval_Qnli-dev_dot_accuracy": 0.580078125, + "eval_Qnli-dev_dot_accuracy_threshold": 388.09979248046875, + "eval_Qnli-dev_dot_ap": 0.5032087471570361, + "eval_Qnli-dev_dot_f1": 0.6291834002677376, + "eval_Qnli-dev_dot_f1_threshold": 230.6592254638672, + "eval_Qnli-dev_dot_precision": 0.4598825831702544, + "eval_Qnli-dev_dot_recall": 0.9957627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.6015625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 7.462021827697754, + "eval_Qnli-dev_euclidean_ap": 0.5838970485633856, + "eval_Qnli-dev_euclidean_f1": 0.6344827586206897, + "eval_Qnli-dev_euclidean_f1_threshold": 12.409799575805664, + "eval_Qnli-dev_euclidean_precision": 0.4703476482617587, + "eval_Qnli-dev_euclidean_recall": 0.9745762711864406, + "eval_Qnli-dev_manhattan_accuracy": 0.62890625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 163.1259002685547, + "eval_Qnli-dev_manhattan_ap": 0.630845221732911, + "eval_Qnli-dev_manhattan_f1": 0.6355421686746988, + "eval_Qnli-dev_manhattan_f1_threshold": 187.08981323242188, + "eval_Qnli-dev_manhattan_precision": 0.4929906542056075, + "eval_Qnli-dev_manhattan_recall": 0.8940677966101694, + "eval_Qnli-dev_max_accuracy": 0.62890625, + "eval_Qnli-dev_max_accuracy_threshold": 388.09979248046875, + "eval_Qnli-dev_max_ap": 0.630845221732911, + "eval_Qnli-dev_max_f1": 0.6355421686746988, + "eval_Qnli-dev_max_f1_threshold": 230.6592254638672, + "eval_Qnli-dev_max_precision": 0.4929906542056075, + "eval_Qnli-dev_max_recall": 0.9957627118644068, + "eval_allNLI-dev_cosine_accuracy": 0.666015625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.97718346118927, + "eval_allNLI-dev_cosine_ap": 0.3756015628227595, + "eval_allNLI-dev_cosine_f1": 0.5068702290076336, + "eval_allNLI-dev_cosine_f1_threshold": 0.8239856958389282, + "eval_allNLI-dev_cosine_precision": 0.34439834024896265, + "eval_allNLI-dev_cosine_recall": 0.9595375722543352, + "eval_allNLI-dev_dot_accuracy": 0.662109375, + "eval_allNLI-dev_dot_accuracy_threshold": 495.46832275390625, + "eval_allNLI-dev_dot_ap": 0.332020946884521, + "eval_allNLI-dev_dot_f1": 0.5036603221083455, + "eval_allNLI-dev_dot_f1_threshold": 312.1241760253906, + "eval_allNLI-dev_dot_precision": 0.33725490196078434, + "eval_allNLI-dev_dot_recall": 0.9942196531791907, + "eval_allNLI-dev_euclidean_accuracy": 0.66796875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 4.271183013916016, + "eval_allNLI-dev_euclidean_ap": 0.3777199146320434, + "eval_allNLI-dev_euclidean_f1": 0.5091463414634146, + "eval_allNLI-dev_euclidean_f1_threshold": 12.89515209197998, + "eval_allNLI-dev_euclidean_precision": 0.34575569358178054, + "eval_allNLI-dev_euclidean_recall": 0.9653179190751445, + "eval_allNLI-dev_manhattan_accuracy": 0.666015625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 88.01801300048828, + "eval_allNLI-dev_manhattan_ap": 0.38882171851359393, + "eval_allNLI-dev_manhattan_f1": 0.5068285280728376, + "eval_allNLI-dev_manhattan_f1_threshold": 208.61183166503906, + "eval_allNLI-dev_manhattan_precision": 0.3436213991769547, + "eval_allNLI-dev_manhattan_recall": 0.9653179190751445, + "eval_allNLI-dev_max_accuracy": 0.66796875, + "eval_allNLI-dev_max_accuracy_threshold": 495.46832275390625, + "eval_allNLI-dev_max_ap": 0.38882171851359393, + "eval_allNLI-dev_max_f1": 0.5091463414634146, + "eval_allNLI-dev_max_f1_threshold": 312.1241760253906, + "eval_allNLI-dev_max_precision": 0.34575569358178054, + "eval_allNLI-dev_max_recall": 0.9942196531791907, + "eval_sequential_score": 0.630845221732911, + "eval_sts-test_pearson_cosine": 0.2082090854077526, + "eval_sts-test_pearson_dot": 0.31968251773175477, + "eval_sts-test_pearson_euclidean": 0.18565762314607082, + "eval_sts-test_pearson_manhattan": 0.21750467365326087, + "eval_sts-test_pearson_max": 0.31968251773175477, + "eval_sts-test_spearman_cosine": 0.2475509554001572, + "eval_sts-test_spearman_dot": 0.32583854357070313, + "eval_sts-test_spearman_euclidean": 0.20592825469263046, + "eval_sts-test_spearman_manhattan": 0.23787152606876585, + "eval_sts-test_spearman_max": 0.32583854357070313, + "eval_vitaminc-pairs_loss": 2.887739896774292, + "eval_vitaminc-pairs_runtime": 3.1934, + "eval_vitaminc-pairs_samples_per_second": 40.083, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_negation-triplets_loss": 3.878300666809082, + "eval_negation-triplets_runtime": 0.7531, + "eval_negation-triplets_samples_per_second": 169.96, + "eval_negation-triplets_steps_per_second": 1.328, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_scitail-pairs-pos_loss": 1.8523993492126465, + "eval_scitail-pairs-pos_runtime": 0.7885, + "eval_scitail-pairs-pos_samples_per_second": 162.341, + "eval_scitail-pairs-pos_steps_per_second": 1.268, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_scitail-pairs-qa_loss": 2.0656681060791016, + "eval_scitail-pairs-qa_runtime": 0.5722, + "eval_scitail-pairs-qa_samples_per_second": 223.714, + "eval_scitail-pairs-qa_steps_per_second": 1.748, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_xsum-pairs_loss": 6.511655807495117, + "eval_xsum-pairs_runtime": 3.0191, + "eval_xsum-pairs_samples_per_second": 42.397, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_sciq_pairs_loss": 0.6626698970794678, + "eval_sciq_pairs_runtime": 3.445, + "eval_sciq_pairs_samples_per_second": 37.156, + "eval_sciq_pairs_steps_per_second": 0.29, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_qasc_pairs_loss": 3.637084722518921, + "eval_qasc_pairs_runtime": 0.6139, + "eval_qasc_pairs_samples_per_second": 208.501, + "eval_qasc_pairs_steps_per_second": 1.629, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_openbookqa_pairs_loss": 4.819972038269043, + "eval_openbookqa_pairs_runtime": 0.5735, + "eval_openbookqa_pairs_samples_per_second": 223.194, + "eval_openbookqa_pairs_steps_per_second": 1.744, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_msmarco_pairs_loss": 9.547957420349121, + "eval_msmarco_pairs_runtime": 1.5165, + "eval_msmarco_pairs_samples_per_second": 84.404, + "eval_msmarco_pairs_steps_per_second": 0.659, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_nq_pairs_loss": 8.830368995666504, + "eval_nq_pairs_runtime": 2.8979, + "eval_nq_pairs_samples_per_second": 44.17, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_trivia_pairs_loss": 7.168319225311279, + "eval_trivia_pairs_runtime": 3.4425, + "eval_trivia_pairs_samples_per_second": 37.182, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_gooaq_pairs_loss": 7.121779918670654, + "eval_gooaq_pairs_runtime": 0.9493, + "eval_gooaq_pairs_samples_per_second": 134.835, + "eval_gooaq_pairs_steps_per_second": 1.053, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_paws-pos_loss": 1.2316638231277466, + "eval_paws-pos_runtime": 0.6943, + "eval_paws-pos_samples_per_second": 184.348, + "eval_paws-pos_steps_per_second": 1.44, + "step": 80 + }, + { + "epoch": 0.0823045267489712, + "eval_global_dataset_loss": 5.2695698738098145, + "eval_global_dataset_runtime": 13.3665, + "eval_global_dataset_samples_per_second": 31.123, + "eval_global_dataset_steps_per_second": 0.299, + "step": 80 + }, + { + "epoch": 0.08333333333333333, + "grad_norm": 48.79065704345703, + "learning_rate": 2.8348909657320867e-06, + "loss": 9.1986, + "step": 81 + }, + { + "epoch": 0.08436213991769548, + "grad_norm": 13.215801239013672, + "learning_rate": 2.871235721703011e-06, + "loss": 5.5779, + "step": 82 + }, + { + "epoch": 0.08539094650205761, + "grad_norm": 14.010391235351562, + "learning_rate": 2.9075804776739355e-06, + "loss": 5.446, + "step": 83 + }, + { + "epoch": 0.08641975308641975, + "grad_norm": 14.401032447814941, + "learning_rate": 2.9439252336448596e-06, + "loss": 5.5707, + "step": 84 + }, + { + "epoch": 0.0874485596707819, + "grad_norm": 11.690423011779785, + "learning_rate": 2.9802699896157836e-06, + "loss": 5.064, + "step": 85 + }, + { + "epoch": 0.08847736625514403, + "grad_norm": 14.510086059570312, + "learning_rate": 3.016614745586708e-06, + "loss": 5.1192, + "step": 86 + }, + { + "epoch": 0.08950617283950617, + "grad_norm": 11.915549278259277, + "learning_rate": 3.052959501557632e-06, + "loss": 5.0992, + "step": 87 + }, + { + "epoch": 0.09053497942386832, + "grad_norm": 11.183893203735352, + "learning_rate": 3.0893042575285565e-06, + "loss": 5.1639, + "step": 88 + }, + { + "epoch": 0.09156378600823045, + "grad_norm": 23.76273536682129, + "learning_rate": 3.1256490134994805e-06, + "loss": 6.4692, + "step": 89 + }, + { + "epoch": 0.09259259259259259, + "grad_norm": 13.50161075592041, + "learning_rate": 3.1619937694704045e-06, + "loss": 5.1285, + "step": 90 + }, + { + "epoch": 0.09362139917695474, + "grad_norm": 25.397741317749023, + "learning_rate": 3.198338525441329e-06, + "loss": 2.8464, + "step": 91 + }, + { + "epoch": 0.09465020576131687, + "grad_norm": 12.421465873718262, + "learning_rate": 3.2346832814122534e-06, + "loss": 4.9592, + "step": 92 + }, + { + "epoch": 0.09567901234567901, + "grad_norm": 12.573847770690918, + "learning_rate": 3.271028037383177e-06, + "loss": 5.1014, + "step": 93 + }, + { + "epoch": 0.09670781893004116, + "grad_norm": 34.48383331298828, + "learning_rate": 3.3073727933541015e-06, + "loss": 8.0528, + "step": 94 + }, + { + "epoch": 0.09773662551440329, + "grad_norm": 22.98038673400879, + "learning_rate": 3.343717549325026e-06, + "loss": 6.5803, + "step": 95 + }, + { + "epoch": 0.09876543209876543, + "grad_norm": 18.991193771362305, + "learning_rate": 3.38006230529595e-06, + "loss": 3.9696, + "step": 96 + }, + { + "epoch": 0.09979423868312758, + "grad_norm": 14.325688362121582, + "learning_rate": 3.416407061266874e-06, + "loss": 5.2509, + "step": 97 + }, + { + "epoch": 0.10082304526748971, + "grad_norm": 32.09270095825195, + "learning_rate": 3.4527518172377984e-06, + "loss": 7.8866, + "step": 98 + }, + { + "epoch": 0.10185185185185185, + "grad_norm": 28.032167434692383, + "learning_rate": 3.4890965732087224e-06, + "loss": 2.4669, + "step": 99 + }, + { + "epoch": 0.102880658436214, + "grad_norm": 19.722026824951172, + "learning_rate": 3.525441329179647e-06, + "loss": 6.8252, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_Qnli-dev_cosine_accuracy": 0.60546875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.9359708428382874, + "eval_Qnli-dev_cosine_ap": 0.5859495580627428, + "eval_Qnli-dev_cosine_f1": 0.6305278174037089, + "eval_Qnli-dev_cosine_f1_threshold": 0.8434731960296631, + "eval_Qnli-dev_cosine_precision": 0.4752688172043011, + "eval_Qnli-dev_cosine_recall": 0.9364406779661016, + "eval_Qnli-dev_dot_accuracy": 0.58203125, + "eval_Qnli-dev_dot_accuracy_threshold": 392.71923828125, + "eval_Qnli-dev_dot_ap": 0.5087577253973941, + "eval_Qnli-dev_dot_f1": 0.6291834002677376, + "eval_Qnli-dev_dot_f1_threshold": 236.47132873535156, + "eval_Qnli-dev_dot_precision": 0.4598825831702544, + "eval_Qnli-dev_dot_recall": 0.9957627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.603515625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 7.414036750793457, + "eval_Qnli-dev_euclidean_ap": 0.59330387039405, + "eval_Qnli-dev_euclidean_f1": 0.6291834002677376, + "eval_Qnli-dev_euclidean_f1_threshold": 18.49761962890625, + "eval_Qnli-dev_euclidean_precision": 0.4598825831702544, + "eval_Qnli-dev_euclidean_recall": 0.9957627118644068, + "eval_Qnli-dev_manhattan_accuracy": 0.6328125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 158.1238555908203, + "eval_Qnli-dev_manhattan_ap": 0.636242439203504, + "eval_Qnli-dev_manhattan_f1": 0.640746500777605, + "eval_Qnli-dev_manhattan_f1_threshold": 185.45480346679688, + "eval_Qnli-dev_manhattan_precision": 0.5061425061425061, + "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, + "eval_Qnli-dev_max_accuracy": 0.6328125, + "eval_Qnli-dev_max_accuracy_threshold": 392.71923828125, + "eval_Qnli-dev_max_ap": 0.636242439203504, + "eval_Qnli-dev_max_f1": 0.640746500777605, + "eval_Qnli-dev_max_f1_threshold": 236.47132873535156, + "eval_Qnli-dev_max_precision": 0.5061425061425061, + "eval_Qnli-dev_max_recall": 0.9957627118644068, + "eval_allNLI-dev_cosine_accuracy": 0.671875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9702135324478149, + "eval_allNLI-dev_cosine_ap": 0.38635245174664545, + "eval_allNLI-dev_cosine_f1": 0.5051698670605613, + "eval_allNLI-dev_cosine_f1_threshold": 0.7689170837402344, + "eval_allNLI-dev_cosine_precision": 0.3392857142857143, + "eval_allNLI-dev_cosine_recall": 0.9884393063583815, + "eval_allNLI-dev_dot_accuracy": 0.662109375, + "eval_allNLI-dev_dot_accuracy_threshold": 499.2386779785156, + "eval_allNLI-dev_dot_ap": 0.33354410040006655, + "eval_allNLI-dev_dot_f1": 0.5036603221083455, + "eval_allNLI-dev_dot_f1_threshold": 310.7790222167969, + "eval_allNLI-dev_dot_precision": 0.33725490196078434, + "eval_allNLI-dev_dot_recall": 0.9942196531791907, + "eval_allNLI-dev_euclidean_accuracy": 0.669921875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 5.469601154327393, + "eval_allNLI-dev_euclidean_ap": 0.38522819959781573, + "eval_allNLI-dev_euclidean_f1": 0.5059171597633136, + "eval_allNLI-dev_euclidean_f1_threshold": 14.400506019592285, + "eval_allNLI-dev_euclidean_precision": 0.3399602385685885, + "eval_allNLI-dev_euclidean_recall": 0.9884393063583815, + "eval_allNLI-dev_manhattan_accuracy": 0.66796875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 83.476806640625, + "eval_allNLI-dev_manhattan_ap": 0.398882755775317, + "eval_allNLI-dev_manhattan_f1": 0.5051395007342143, + "eval_allNLI-dev_manhattan_f1_threshold": 256.5009765625, + "eval_allNLI-dev_manhattan_precision": 0.33858267716535434, + "eval_allNLI-dev_manhattan_recall": 0.9942196531791907, + "eval_allNLI-dev_max_accuracy": 0.671875, + "eval_allNLI-dev_max_accuracy_threshold": 499.2386779785156, + "eval_allNLI-dev_max_ap": 0.398882755775317, + "eval_allNLI-dev_max_f1": 0.5059171597633136, + "eval_allNLI-dev_max_f1_threshold": 310.7790222167969, + "eval_allNLI-dev_max_precision": 0.3399602385685885, + "eval_allNLI-dev_max_recall": 0.9942196531791907, + "eval_sequential_score": 0.636242439203504, + "eval_sts-test_pearson_cosine": 0.233326009931931, + "eval_sts-test_pearson_dot": 0.3167806500856212, + "eval_sts-test_pearson_euclidean": 0.20945664323942717, + "eval_sts-test_pearson_manhattan": 0.23559165515257938, + "eval_sts-test_pearson_max": 0.3167806500856212, + "eval_sts-test_spearman_cosine": 0.2687911570918344, + "eval_sts-test_spearman_dot": 0.32229956906860985, + "eval_sts-test_spearman_euclidean": 0.22929892968536797, + "eval_sts-test_spearman_manhattan": 0.25574708751351516, + "eval_sts-test_spearman_max": 0.32229956906860985, + "eval_vitaminc-pairs_loss": 2.8645708560943604, + "eval_vitaminc-pairs_runtime": 3.1781, + "eval_vitaminc-pairs_samples_per_second": 40.275, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_negation-triplets_loss": 3.715083599090576, + "eval_negation-triplets_runtime": 0.7412, + "eval_negation-triplets_samples_per_second": 172.701, + "eval_negation-triplets_steps_per_second": 1.349, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_scitail-pairs-pos_loss": 1.6467901468276978, + "eval_scitail-pairs-pos_runtime": 0.828, + "eval_scitail-pairs-pos_samples_per_second": 154.583, + "eval_scitail-pairs-pos_steps_per_second": 1.208, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_scitail-pairs-qa_loss": 1.8661956787109375, + "eval_scitail-pairs-qa_runtime": 0.5663, + "eval_scitail-pairs-qa_samples_per_second": 226.026, + "eval_scitail-pairs-qa_steps_per_second": 1.766, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_xsum-pairs_loss": 6.297423839569092, + "eval_xsum-pairs_runtime": 3.0214, + "eval_xsum-pairs_samples_per_second": 42.364, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_sciq_pairs_loss": 0.6386430263519287, + "eval_sciq_pairs_runtime": 3.404, + "eval_sciq_pairs_samples_per_second": 37.603, + "eval_sciq_pairs_steps_per_second": 0.294, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_qasc_pairs_loss": 3.3296892642974854, + "eval_qasc_pairs_runtime": 0.5903, + "eval_qasc_pairs_samples_per_second": 216.831, + "eval_qasc_pairs_steps_per_second": 1.694, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_openbookqa_pairs_loss": 4.621798038482666, + "eval_openbookqa_pairs_runtime": 0.5726, + "eval_openbookqa_pairs_samples_per_second": 223.561, + "eval_openbookqa_pairs_steps_per_second": 1.747, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_msmarco_pairs_loss": 8.393180847167969, + "eval_msmarco_pairs_runtime": 1.5114, + "eval_msmarco_pairs_samples_per_second": 84.687, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_nq_pairs_loss": 7.865816116333008, + "eval_nq_pairs_runtime": 2.888, + "eval_nq_pairs_samples_per_second": 44.321, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_trivia_pairs_loss": 6.768343925476074, + "eval_trivia_pairs_runtime": 3.4313, + "eval_trivia_pairs_samples_per_second": 37.303, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_gooaq_pairs_loss": 6.616071701049805, + "eval_gooaq_pairs_runtime": 0.9398, + "eval_gooaq_pairs_samples_per_second": 136.205, + "eval_gooaq_pairs_steps_per_second": 1.064, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_paws-pos_loss": 1.1048204898834229, + "eval_paws-pos_runtime": 0.6813, + "eval_paws-pos_samples_per_second": 187.888, + "eval_paws-pos_steps_per_second": 1.468, + "step": 100 + }, + { + "epoch": 0.102880658436214, + "eval_global_dataset_loss": 4.85481595993042, + "eval_global_dataset_runtime": 13.3418, + "eval_global_dataset_samples_per_second": 31.18, + "eval_global_dataset_steps_per_second": 0.3, + "step": 100 + }, + { + "epoch": 0.10390946502057613, + "grad_norm": 16.656429290771484, + "learning_rate": 3.561786085150571e-06, + "loss": 5.3891, + "step": 101 + }, + { + "epoch": 0.10493827160493827, + "grad_norm": 35.718448638916016, + "learning_rate": 3.598130841121495e-06, + "loss": 8.2981, + "step": 102 + }, + { + "epoch": 0.10596707818930041, + "grad_norm": 33.725162506103516, + "learning_rate": 3.6344755970924193e-06, + "loss": 8.0844, + "step": 103 + }, + { + "epoch": 0.10699588477366255, + "grad_norm": 19.359039306640625, + "learning_rate": 3.6708203530633433e-06, + "loss": 4.0626, + "step": 104 + }, + { + "epoch": 0.10802469135802469, + "grad_norm": 14.194345474243164, + "learning_rate": 3.7071651090342674e-06, + "loss": 4.8851, + "step": 105 + }, + { + "epoch": 0.10905349794238683, + "grad_norm": 14.311044692993164, + "learning_rate": 3.743509865005192e-06, + "loss": 5.1174, + "step": 106 + }, + { + "epoch": 0.11008230452674897, + "grad_norm": 13.10085678100586, + "learning_rate": 3.779854620976116e-06, + "loss": 4.973, + "step": 107 + }, + { + "epoch": 0.1111111111111111, + "grad_norm": 14.114293098449707, + "learning_rate": 3.81619937694704e-06, + "loss": 5.2879, + "step": 108 + }, + { + "epoch": 0.11213991769547325, + "grad_norm": 13.561037063598633, + "learning_rate": 3.852544132917965e-06, + "loss": 4.9833, + "step": 109 + }, + { + "epoch": 0.11316872427983539, + "grad_norm": 14.047689437866211, + "learning_rate": 3.888888888888889e-06, + "loss": 5.1619, + "step": 110 + }, + { + "epoch": 0.11419753086419752, + "grad_norm": 37.19677734375, + "learning_rate": 3.925233644859813e-06, + "loss": 8.2926, + "step": 111 + }, + { + "epoch": 0.11522633744855967, + "grad_norm": 12.727770805358887, + "learning_rate": 3.961578400830737e-06, + "loss": 4.7478, + "step": 112 + }, + { + "epoch": 0.11625514403292181, + "grad_norm": 12.889444351196289, + "learning_rate": 3.997923156801661e-06, + "loss": 4.7644, + "step": 113 + }, + { + "epoch": 0.11728395061728394, + "grad_norm": 20.46539878845215, + "learning_rate": 4.034267912772586e-06, + "loss": 6.379, + "step": 114 + }, + { + "epoch": 0.1183127572016461, + "grad_norm": 14.000577926635742, + "learning_rate": 4.07061266874351e-06, + "loss": 4.8567, + "step": 115 + }, + { + "epoch": 0.11934156378600823, + "grad_norm": 14.417937278747559, + "learning_rate": 4.106957424714434e-06, + "loss": 4.939, + "step": 116 + }, + { + "epoch": 0.12037037037037036, + "grad_norm": 36.433433532714844, + "learning_rate": 4.143302180685358e-06, + "loss": 6.4653, + "step": 117 + }, + { + "epoch": 0.12139917695473251, + "grad_norm": 15.122117042541504, + "learning_rate": 4.179646936656283e-06, + "loss": 5.0902, + "step": 118 + }, + { + "epoch": 0.12242798353909465, + "grad_norm": 15.600722312927246, + "learning_rate": 4.215991692627206e-06, + "loss": 4.4886, + "step": 119 + }, + { + "epoch": 0.12345679012345678, + "grad_norm": 18.391870498657227, + "learning_rate": 4.252336448598131e-06, + "loss": 6.2223, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_Qnli-dev_cosine_accuracy": 0.619140625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.9281325340270996, + "eval_Qnli-dev_cosine_ap": 0.6014574414783835, + "eval_Qnli-dev_cosine_f1": 0.6309341500765697, + "eval_Qnli-dev_cosine_f1_threshold": 0.8621190786361694, + "eval_Qnli-dev_cosine_precision": 0.4940047961630695, + "eval_Qnli-dev_cosine_recall": 0.8728813559322034, + "eval_Qnli-dev_dot_accuracy": 0.58984375, + "eval_Qnli-dev_dot_accuracy_threshold": 388.7757568359375, + "eval_Qnli-dev_dot_ap": 0.5127748615151599, + "eval_Qnli-dev_dot_f1": 0.6304044630404463, + "eval_Qnli-dev_dot_f1_threshold": 322.849853515625, + "eval_Qnli-dev_dot_precision": 0.4698544698544699, + "eval_Qnli-dev_dot_recall": 0.9576271186440678, + "eval_Qnli-dev_euclidean_accuracy": 0.6171875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 8.507330894470215, + "eval_Qnli-dev_euclidean_ap": 0.6089590025180598, + "eval_Qnli-dev_euclidean_f1": 0.6291834002677376, + "eval_Qnli-dev_euclidean_f1_threshold": 18.0284423828125, + "eval_Qnli-dev_euclidean_precision": 0.4598825831702544, + "eval_Qnli-dev_euclidean_recall": 0.9957627118644068, + "eval_Qnli-dev_manhattan_accuracy": 0.642578125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 169.31954956054688, + "eval_Qnli-dev_manhattan_ap": 0.6439314246828807, + "eval_Qnli-dev_manhattan_f1": 0.6509433962264151, + "eval_Qnli-dev_manhattan_f1_threshold": 195.28048706054688, + "eval_Qnli-dev_manhattan_precision": 0.5175, + "eval_Qnli-dev_manhattan_recall": 0.8771186440677966, + "eval_Qnli-dev_max_accuracy": 0.642578125, + "eval_Qnli-dev_max_accuracy_threshold": 388.7757568359375, + "eval_Qnli-dev_max_ap": 0.6439314246828807, + "eval_Qnli-dev_max_f1": 0.6509433962264151, + "eval_Qnli-dev_max_f1_threshold": 322.849853515625, + "eval_Qnli-dev_max_precision": 0.5175, + "eval_Qnli-dev_max_recall": 0.9957627118644068, + "eval_allNLI-dev_cosine_accuracy": 0.66796875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9790990948677063, + "eval_allNLI-dev_cosine_ap": 0.3955241297150008, + "eval_allNLI-dev_cosine_f1": 0.5052005943536404, + "eval_allNLI-dev_cosine_f1_threshold": 0.7795530557632446, + "eval_allNLI-dev_cosine_precision": 0.34, + "eval_allNLI-dev_cosine_recall": 0.9826589595375722, + "eval_allNLI-dev_dot_accuracy": 0.6640625, + "eval_allNLI-dev_dot_accuracy_threshold": 514.1408081054688, + "eval_allNLI-dev_dot_ap": 0.3428427300114505, + "eval_allNLI-dev_dot_f1": 0.5043988269794721, + "eval_allNLI-dev_dot_f1_threshold": 316.1231994628906, + "eval_allNLI-dev_dot_precision": 0.3379174852652259, + "eval_allNLI-dev_dot_recall": 0.9942196531791907, + "eval_allNLI-dev_euclidean_accuracy": 0.669921875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 4.674912452697754, + "eval_allNLI-dev_euclidean_ap": 0.394931606062899, + "eval_allNLI-dev_euclidean_f1": 0.5067873303167421, + "eval_allNLI-dev_euclidean_f1_threshold": 13.242253303527832, + "eval_allNLI-dev_euclidean_precision": 0.34285714285714286, + "eval_allNLI-dev_euclidean_recall": 0.9710982658959537, + "eval_allNLI-dev_manhattan_accuracy": 0.669921875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 84.45820617675781, + "eval_allNLI-dev_manhattan_ap": 0.40742769361596887, + "eval_allNLI-dev_manhattan_f1": 0.5058479532163743, + "eval_allNLI-dev_manhattan_f1_threshold": 293.953369140625, + "eval_allNLI-dev_manhattan_precision": 0.3385518590998043, + "eval_allNLI-dev_manhattan_recall": 1.0, + "eval_allNLI-dev_max_accuracy": 0.669921875, + "eval_allNLI-dev_max_accuracy_threshold": 514.1408081054688, + "eval_allNLI-dev_max_ap": 0.40742769361596887, + "eval_allNLI-dev_max_f1": 0.5067873303167421, + "eval_allNLI-dev_max_f1_threshold": 316.1231994628906, + "eval_allNLI-dev_max_precision": 0.34285714285714286, + "eval_allNLI-dev_max_recall": 1.0, + "eval_sequential_score": 0.6439314246828807, + "eval_sts-test_pearson_cosine": 0.25252985635600256, + "eval_sts-test_pearson_dot": 0.3099351189652281, + "eval_sts-test_pearson_euclidean": 0.23142843084411574, + "eval_sts-test_pearson_manhattan": 0.2502258002878053, + "eval_sts-test_pearson_max": 0.3099351189652281, + "eval_sts-test_spearman_cosine": 0.28591643554731094, + "eval_sts-test_spearman_dot": 0.3177811684597045, + "eval_sts-test_spearman_euclidean": 0.24943896636699894, + "eval_sts-test_spearman_manhattan": 0.2700833945157724, + "eval_sts-test_spearman_max": 0.3177811684597045, + "eval_vitaminc-pairs_loss": 2.8456013202667236, + "eval_vitaminc-pairs_runtime": 3.1683, + "eval_vitaminc-pairs_samples_per_second": 40.4, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_negation-triplets_loss": 3.5346930027008057, + "eval_negation-triplets_runtime": 0.7273, + "eval_negation-triplets_samples_per_second": 175.983, + "eval_negation-triplets_steps_per_second": 1.375, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_scitail-pairs-pos_loss": 1.3952267169952393, + "eval_scitail-pairs-pos_runtime": 0.7901, + "eval_scitail-pairs-pos_samples_per_second": 162.002, + "eval_scitail-pairs-pos_steps_per_second": 1.266, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_scitail-pairs-qa_loss": 1.6222929954528809, + "eval_scitail-pairs-qa_runtime": 0.5623, + "eval_scitail-pairs-qa_samples_per_second": 227.629, + "eval_scitail-pairs-qa_steps_per_second": 1.778, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_xsum-pairs_loss": 6.103888988494873, + "eval_xsum-pairs_runtime": 3.0165, + "eval_xsum-pairs_samples_per_second": 42.433, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_sciq_pairs_loss": 0.6113746762275696, + "eval_sciq_pairs_runtime": 3.3757, + "eval_sciq_pairs_samples_per_second": 37.918, + "eval_sciq_pairs_steps_per_second": 0.296, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_qasc_pairs_loss": 3.058934450149536, + "eval_qasc_pairs_runtime": 0.59, + "eval_qasc_pairs_samples_per_second": 216.943, + "eval_qasc_pairs_steps_per_second": 1.695, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_openbookqa_pairs_loss": 4.499716758728027, + "eval_openbookqa_pairs_runtime": 0.5708, + "eval_openbookqa_pairs_samples_per_second": 224.263, + "eval_openbookqa_pairs_steps_per_second": 1.752, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_msmarco_pairs_loss": 7.5348734855651855, + "eval_msmarco_pairs_runtime": 1.514, + "eval_msmarco_pairs_samples_per_second": 84.546, + "eval_msmarco_pairs_steps_per_second": 0.661, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_nq_pairs_loss": 7.202226638793945, + "eval_nq_pairs_runtime": 2.8915, + "eval_nq_pairs_samples_per_second": 44.268, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_trivia_pairs_loss": 6.395583152770996, + "eval_trivia_pairs_runtime": 3.4281, + "eval_trivia_pairs_samples_per_second": 37.338, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_gooaq_pairs_loss": 6.247038841247559, + "eval_gooaq_pairs_runtime": 0.9411, + "eval_gooaq_pairs_samples_per_second": 136.018, + "eval_gooaq_pairs_steps_per_second": 1.063, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_paws-pos_loss": 0.8818368911743164, + "eval_paws-pos_runtime": 0.7042, + "eval_paws-pos_samples_per_second": 181.755, + "eval_paws-pos_steps_per_second": 1.42, + "step": 120 + }, + { + "epoch": 0.12345679012345678, + "eval_global_dataset_loss": 4.362409591674805, + "eval_global_dataset_runtime": 13.3877, + "eval_global_dataset_samples_per_second": 31.073, + "eval_global_dataset_steps_per_second": 0.299, + "step": 120 + }, + { + "epoch": 0.12448559670781893, + "grad_norm": 14.784011840820312, + "learning_rate": 4.288681204569055e-06, + "loss": 4.5958, + "step": 121 + }, + { + "epoch": 0.12551440329218108, + "grad_norm": 16.07524871826172, + "learning_rate": 4.325025960539979e-06, + "loss": 6.2355, + "step": 122 + }, + { + "epoch": 0.12654320987654322, + "grad_norm": 25.21320915222168, + "learning_rate": 4.361370716510903e-06, + "loss": 6.0763, + "step": 123 + }, + { + "epoch": 0.12757201646090535, + "grad_norm": 13.882258415222168, + "learning_rate": 4.3977154724818276e-06, + "loss": 4.719, + "step": 124 + }, + { + "epoch": 0.1286008230452675, + "grad_norm": 25.57428741455078, + "learning_rate": 4.4340602284527516e-06, + "loss": 6.0796, + "step": 125 + }, + { + "epoch": 0.12962962962962962, + "grad_norm": 14.860637664794922, + "learning_rate": 4.470404984423675e-06, + "loss": 4.284, + "step": 126 + }, + { + "epoch": 0.13065843621399176, + "grad_norm": 14.258697509765625, + "learning_rate": 4.5067497403946e-06, + "loss": 4.4948, + "step": 127 + }, + { + "epoch": 0.13168724279835392, + "grad_norm": 12.680214881896973, + "learning_rate": 4.543094496365524e-06, + "loss": 5.5456, + "step": 128 + }, + { + "epoch": 0.13271604938271606, + "grad_norm": 18.65047836303711, + "learning_rate": 4.579439252336448e-06, + "loss": 5.8322, + "step": 129 + }, + { + "epoch": 0.1337448559670782, + "grad_norm": 14.29658031463623, + "learning_rate": 4.6157840083073725e-06, + "loss": 4.0772, + "step": 130 + }, + { + "epoch": 0.13477366255144033, + "grad_norm": 14.54943561553955, + "learning_rate": 4.6521287642782965e-06, + "loss": 4.3008, + "step": 131 + }, + { + "epoch": 0.13580246913580246, + "grad_norm": 25.34575080871582, + "learning_rate": 4.6884735202492206e-06, + "loss": 6.878, + "step": 132 + }, + { + "epoch": 0.1368312757201646, + "grad_norm": 20.41341781616211, + "learning_rate": 4.724818276220145e-06, + "loss": 6.1897, + "step": 133 + }, + { + "epoch": 0.13786008230452676, + "grad_norm": 12.791062355041504, + "learning_rate": 4.7611630321910694e-06, + "loss": 4.142, + "step": 134 + }, + { + "epoch": 0.1388888888888889, + "grad_norm": 32.09108352661133, + "learning_rate": 4.797507788161993e-06, + "loss": 1.7782, + "step": 135 + }, + { + "epoch": 0.13991769547325103, + "grad_norm": 15.483809471130371, + "learning_rate": 4.8338525441329175e-06, + "loss": 3.9578, + "step": 136 + }, + { + "epoch": 0.14094650205761317, + "grad_norm": 17.372329711914062, + "learning_rate": 4.8701973001038415e-06, + "loss": 5.8774, + "step": 137 + }, + { + "epoch": 0.1419753086419753, + "grad_norm": 31.082347869873047, + "learning_rate": 4.9065420560747655e-06, + "loss": 1.6068, + "step": 138 + }, + { + "epoch": 0.14300411522633744, + "grad_norm": 13.522706985473633, + "learning_rate": 4.94288681204569e-06, + "loss": 4.164, + "step": 139 + }, + { + "epoch": 0.1440329218106996, + "grad_norm": 12.907632827758789, + "learning_rate": 4.979231568016614e-06, + "loss": 3.8015, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_Qnli-dev_cosine_accuracy": 0.6328125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.9141117334365845, + "eval_Qnli-dev_cosine_ap": 0.6198480681016185, + "eval_Qnli-dev_cosine_f1": 0.6397608370702541, + "eval_Qnli-dev_cosine_f1_threshold": 0.8335354328155518, + "eval_Qnli-dev_cosine_precision": 0.4942263279445728, + "eval_Qnli-dev_cosine_recall": 0.9067796610169492, + "eval_Qnli-dev_dot_accuracy": 0.599609375, + "eval_Qnli-dev_dot_accuracy_threshold": 405.96319580078125, + "eval_Qnli-dev_dot_ap": 0.5257457885237911, + "eval_Qnli-dev_dot_f1": 0.6291834002677376, + "eval_Qnli-dev_dot_f1_threshold": 256.84857177734375, + "eval_Qnli-dev_dot_precision": 0.4598825831702544, + "eval_Qnli-dev_dot_recall": 0.9957627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.63671875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 9.261069297790527, + "eval_Qnli-dev_euclidean_ap": 0.6306496803563475, + "eval_Qnli-dev_euclidean_f1": 0.636094674556213, + "eval_Qnli-dev_euclidean_f1_threshold": 12.323160171508789, + "eval_Qnli-dev_euclidean_precision": 0.48863636363636365, + "eval_Qnli-dev_euclidean_recall": 0.9110169491525424, + "eval_Qnli-dev_manhattan_accuracy": 0.646484375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 171.06039428710938, + "eval_Qnli-dev_manhattan_ap": 0.6564076451753581, + "eval_Qnli-dev_manhattan_f1": 0.6487341772151899, + "eval_Qnli-dev_manhattan_f1_threshold": 217.7759552001953, + "eval_Qnli-dev_manhattan_precision": 0.5176767676767676, + "eval_Qnli-dev_manhattan_recall": 0.8686440677966102, + "eval_Qnli-dev_max_accuracy": 0.646484375, + "eval_Qnli-dev_max_accuracy_threshold": 405.96319580078125, + "eval_Qnli-dev_max_ap": 0.6564076451753581, + "eval_Qnli-dev_max_f1": 0.6487341772151899, + "eval_Qnli-dev_max_f1_threshold": 256.84857177734375, + "eval_Qnli-dev_max_precision": 0.5176767676767676, + "eval_Qnli-dev_max_recall": 0.9957627118644068, + "eval_allNLI-dev_cosine_accuracy": 0.66796875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9803125262260437, + "eval_allNLI-dev_cosine_ap": 0.40304954675643245, + "eval_allNLI-dev_cosine_f1": 0.5073313782991202, + "eval_allNLI-dev_cosine_f1_threshold": 0.7168662548065186, + "eval_allNLI-dev_cosine_precision": 0.33988212180746563, + "eval_allNLI-dev_cosine_recall": 1.0, + "eval_allNLI-dev_dot_accuracy": 0.662109375, + "eval_allNLI-dev_dot_accuracy_threshold": 522.0433959960938, + "eval_allNLI-dev_dot_ap": 0.3516359548665584, + "eval_allNLI-dev_dot_f1": 0.5065885797950219, + "eval_allNLI-dev_dot_f1_threshold": 309.74951171875, + "eval_allNLI-dev_dot_precision": 0.3392156862745098, + "eval_allNLI-dev_dot_recall": 1.0, + "eval_allNLI-dev_euclidean_accuracy": 0.669921875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 6.128700256347656, + "eval_allNLI-dev_euclidean_ap": 0.4017412298983858, + "eval_allNLI-dev_euclidean_f1": 0.5088235294117648, + "eval_allNLI-dev_euclidean_f1_threshold": 16.340839385986328, + "eval_allNLI-dev_euclidean_precision": 0.34122287968441817, + "eval_allNLI-dev_euclidean_recall": 1.0, + "eval_allNLI-dev_manhattan_accuracy": 0.66796875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 83.17814636230469, + "eval_allNLI-dev_manhattan_ap": 0.41319055063996046, + "eval_allNLI-dev_manhattan_f1": 0.5186567164179104, + "eval_allNLI-dev_manhattan_f1_threshold": 201.40753173828125, + "eval_allNLI-dev_manhattan_precision": 0.38292011019283745, + "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, + "eval_allNLI-dev_max_accuracy": 0.669921875, + "eval_allNLI-dev_max_accuracy_threshold": 522.0433959960938, + "eval_allNLI-dev_max_ap": 0.41319055063996046, + "eval_allNLI-dev_max_f1": 0.5186567164179104, + "eval_allNLI-dev_max_f1_threshold": 309.74951171875, + "eval_allNLI-dev_max_precision": 0.38292011019283745, + "eval_allNLI-dev_max_recall": 1.0, + "eval_sequential_score": 0.6564076451753581, + "eval_sts-test_pearson_cosine": 0.2550498328876235, + "eval_sts-test_pearson_dot": 0.29981841169421564, + "eval_sts-test_pearson_euclidean": 0.24239763397446795, + "eval_sts-test_pearson_manhattan": 0.25344327521082516, + "eval_sts-test_pearson_max": 0.29981841169421564, + "eval_sts-test_spearman_cosine": 0.29254387360307027, + "eval_sts-test_spearman_dot": 0.3123169499412918, + "eval_sts-test_spearman_euclidean": 0.26282456091304185, + "eval_sts-test_spearman_manhattan": 0.27282288773310837, + "eval_sts-test_spearman_max": 0.3123169499412918, + "eval_vitaminc-pairs_loss": 2.815100908279419, + "eval_vitaminc-pairs_runtime": 3.1739, + "eval_vitaminc-pairs_samples_per_second": 40.329, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_negation-triplets_loss": 3.359119176864624, + "eval_negation-triplets_runtime": 0.7309, + "eval_negation-triplets_samples_per_second": 175.129, + "eval_negation-triplets_steps_per_second": 1.368, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_scitail-pairs-pos_loss": 1.217439889907837, + "eval_scitail-pairs-pos_runtime": 0.8032, + "eval_scitail-pairs-pos_samples_per_second": 159.359, + "eval_scitail-pairs-pos_steps_per_second": 1.245, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_scitail-pairs-qa_loss": 1.2241069078445435, + "eval_scitail-pairs-qa_runtime": 0.5639, + "eval_scitail-pairs-qa_samples_per_second": 226.986, + "eval_scitail-pairs-qa_steps_per_second": 1.773, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_xsum-pairs_loss": 5.798659801483154, + "eval_xsum-pairs_runtime": 3.0162, + "eval_xsum-pairs_samples_per_second": 42.437, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_sciq_pairs_loss": 0.5781325697898865, + "eval_sciq_pairs_runtime": 3.3872, + "eval_sciq_pairs_samples_per_second": 37.789, + "eval_sciq_pairs_steps_per_second": 0.295, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_qasc_pairs_loss": 2.687833070755005, + "eval_qasc_pairs_runtime": 0.5882, + "eval_qasc_pairs_samples_per_second": 217.599, + "eval_qasc_pairs_steps_per_second": 1.7, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_openbookqa_pairs_loss": 4.54829216003418, + "eval_openbookqa_pairs_runtime": 0.5719, + "eval_openbookqa_pairs_samples_per_second": 223.814, + "eval_openbookqa_pairs_steps_per_second": 1.749, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_msmarco_pairs_loss": 6.890722274780273, + "eval_msmarco_pairs_runtime": 1.5121, + "eval_msmarco_pairs_samples_per_second": 84.65, + "eval_msmarco_pairs_steps_per_second": 0.661, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_nq_pairs_loss": 6.53361701965332, + "eval_nq_pairs_runtime": 2.8908, + "eval_nq_pairs_samples_per_second": 44.278, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_trivia_pairs_loss": 6.02173376083374, + "eval_trivia_pairs_runtime": 3.4272, + "eval_trivia_pairs_samples_per_second": 37.348, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_gooaq_pairs_loss": 5.6636834144592285, + "eval_gooaq_pairs_runtime": 0.9412, + "eval_gooaq_pairs_samples_per_second": 135.991, + "eval_gooaq_pairs_steps_per_second": 1.062, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_paws-pos_loss": 0.4957215487957001, + "eval_paws-pos_runtime": 0.6753, + "eval_paws-pos_samples_per_second": 189.545, + "eval_paws-pos_steps_per_second": 1.481, + "step": 140 + }, + { + "epoch": 0.1440329218106996, + "eval_global_dataset_loss": 3.7551324367523193, + "eval_global_dataset_runtime": 13.3267, + "eval_global_dataset_samples_per_second": 31.215, + "eval_global_dataset_steps_per_second": 0.3, + "step": 140 + }, + { + "epoch": 0.14506172839506173, + "grad_norm": 15.316786766052246, + "learning_rate": 5.0155763239875384e-06, + "loss": 4.1039, + "step": 141 + }, + { + "epoch": 0.14609053497942387, + "grad_norm": 40.62092971801758, + "learning_rate": 5.051921079958463e-06, + "loss": 6.598, + "step": 142 + }, + { + "epoch": 0.147119341563786, + "grad_norm": 15.075995445251465, + "learning_rate": 5.0882658359293865e-06, + "loss": 3.8367, + "step": 143 + }, + { + "epoch": 0.14814814814814814, + "grad_norm": 35.782997131347656, + "learning_rate": 5.1246105919003105e-06, + "loss": 6.798, + "step": 144 + }, + { + "epoch": 0.14917695473251028, + "grad_norm": 18.818031311035156, + "learning_rate": 5.160955347871235e-06, + "loss": 5.9084, + "step": 145 + }, + { + "epoch": 0.15020576131687244, + "grad_norm": 13.719802856445312, + "learning_rate": 5.197300103842159e-06, + "loss": 3.8655, + "step": 146 + }, + { + "epoch": 0.15123456790123457, + "grad_norm": 14.322517395019531, + "learning_rate": 5.233644859813083e-06, + "loss": 3.493, + "step": 147 + }, + { + "epoch": 0.1522633744855967, + "grad_norm": 13.363450050354004, + "learning_rate": 5.269989615784008e-06, + "loss": 3.4505, + "step": 148 + }, + { + "epoch": 0.15329218106995884, + "grad_norm": 15.056668281555176, + "learning_rate": 5.306334371754932e-06, + "loss": 3.9404, + "step": 149 + }, + { + "epoch": 0.15432098765432098, + "grad_norm": 22.7237491607666, + "learning_rate": 5.3426791277258555e-06, + "loss": 0.977, + "step": 150 + }, + { + "epoch": 0.15534979423868311, + "grad_norm": 12.547760009765625, + "learning_rate": 5.37902388369678e-06, + "loss": 3.5493, + "step": 151 + }, + { + "epoch": 0.15637860082304528, + "grad_norm": 13.1975679397583, + "learning_rate": 5.415368639667704e-06, + "loss": 3.2979, + "step": 152 + }, + { + "epoch": 0.1574074074074074, + "grad_norm": 13.909899711608887, + "learning_rate": 5.451713395638628e-06, + "loss": 3.375, + "step": 153 + }, + { + "epoch": 0.15843621399176955, + "grad_norm": 28.727937698364258, + "learning_rate": 5.488058151609553e-06, + "loss": 6.1056, + "step": 154 + }, + { + "epoch": 0.15946502057613168, + "grad_norm": 14.287629127502441, + "learning_rate": 5.524402907580477e-06, + "loss": 3.4521, + "step": 155 + }, + { + "epoch": 0.16049382716049382, + "grad_norm": 13.95241641998291, + "learning_rate": 5.560747663551401e-06, + "loss": 3.3153, + "step": 156 + }, + { + "epoch": 0.16152263374485595, + "grad_norm": 16.688383102416992, + "learning_rate": 5.597092419522326e-06, + "loss": 3.4398, + "step": 157 + }, + { + "epoch": 0.16255144032921812, + "grad_norm": 14.100769996643066, + "learning_rate": 5.63343717549325e-06, + "loss": 3.393, + "step": 158 + }, + { + "epoch": 0.16358024691358025, + "grad_norm": 36.206336975097656, + "learning_rate": 5.669781931464173e-06, + "loss": 6.2424, + "step": 159 + }, + { + "epoch": 0.1646090534979424, + "grad_norm": 25.714920043945312, + "learning_rate": 5.706126687435098e-06, + "loss": 5.7107, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_Qnli-dev_cosine_accuracy": 0.650390625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.881838858127594, + "eval_Qnli-dev_cosine_ap": 0.6388216177805902, + "eval_Qnli-dev_cosine_f1": 0.6594090202177294, + "eval_Qnli-dev_cosine_f1_threshold": 0.7928681969642639, + "eval_Qnli-dev_cosine_precision": 0.5208845208845209, + "eval_Qnli-dev_cosine_recall": 0.8983050847457628, + "eval_Qnli-dev_dot_accuracy": 0.599609375, + "eval_Qnli-dev_dot_accuracy_threshold": 383.6549072265625, + "eval_Qnli-dev_dot_ap": 0.530471167859375, + "eval_Qnli-dev_dot_f1": 0.6400000000000001, + "eval_Qnli-dev_dot_f1_threshold": 328.07598876953125, + "eval_Qnli-dev_dot_precision": 0.5024154589371981, + "eval_Qnli-dev_dot_recall": 0.8813559322033898, + "eval_Qnli-dev_euclidean_accuracy": 0.6640625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 10.708932876586914, + "eval_Qnli-dev_euclidean_ap": 0.6617842095463597, + "eval_Qnli-dev_euclidean_f1": 0.65086887835703, + "eval_Qnli-dev_euclidean_f1_threshold": 13.247828483581543, + "eval_Qnli-dev_euclidean_precision": 0.5188916876574308, + "eval_Qnli-dev_euclidean_recall": 0.8728813559322034, + "eval_Qnli-dev_manhattan_accuracy": 0.671875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 203.75650024414062, + "eval_Qnli-dev_manhattan_ap": 0.6718309029852861, + "eval_Qnli-dev_manhattan_f1": 0.6581875993640699, + "eval_Qnli-dev_manhattan_f1_threshold": 251.2660675048828, + "eval_Qnli-dev_manhattan_precision": 0.5267175572519084, + "eval_Qnli-dev_manhattan_recall": 0.8771186440677966, + "eval_Qnli-dev_max_accuracy": 0.671875, + "eval_Qnli-dev_max_accuracy_threshold": 383.6549072265625, + "eval_Qnli-dev_max_ap": 0.6718309029852861, + "eval_Qnli-dev_max_f1": 0.6594090202177294, + "eval_Qnli-dev_max_f1_threshold": 328.07598876953125, + "eval_Qnli-dev_max_precision": 0.5267175572519084, + "eval_Qnli-dev_max_recall": 0.8983050847457628, + "eval_allNLI-dev_cosine_accuracy": 0.669921875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.97170090675354, + "eval_allNLI-dev_cosine_ap": 0.42392325835068695, + "eval_allNLI-dev_cosine_f1": 0.532319391634981, + "eval_allNLI-dev_cosine_f1_threshold": 0.8698199987411499, + "eval_allNLI-dev_cosine_precision": 0.39660056657223797, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.666015625, + "eval_allNLI-dev_dot_accuracy_threshold": 489.699951171875, + "eval_allNLI-dev_dot_ap": 0.37488661345316393, + "eval_allNLI-dev_dot_f1": 0.5079365079365079, + "eval_allNLI-dev_dot_f1_threshold": 370.46728515625, + "eval_allNLI-dev_dot_precision": 0.350109409190372, + "eval_allNLI-dev_dot_recall": 0.9248554913294798, + "eval_allNLI-dev_euclidean_accuracy": 0.669921875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 5.109055995941162, + "eval_allNLI-dev_euclidean_ap": 0.42414702832207185, + "eval_allNLI-dev_euclidean_f1": 0.5325670498084292, + "eval_allNLI-dev_euclidean_f1_threshold": 11.284603118896484, + "eval_allNLI-dev_euclidean_precision": 0.3982808022922636, + "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, + "eval_allNLI-dev_manhattan_accuracy": 0.671875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 118.02589416503906, + "eval_allNLI-dev_manhattan_ap": 0.42975205717472725, + "eval_allNLI-dev_manhattan_f1": 0.5330948121645797, + "eval_allNLI-dev_manhattan_f1_threshold": 225.3105010986328, + "eval_allNLI-dev_manhattan_precision": 0.3860103626943005, + "eval_allNLI-dev_manhattan_recall": 0.861271676300578, + "eval_allNLI-dev_max_accuracy": 0.671875, + "eval_allNLI-dev_max_accuracy_threshold": 489.699951171875, + "eval_allNLI-dev_max_ap": 0.42975205717472725, + "eval_allNLI-dev_max_f1": 0.5330948121645797, + "eval_allNLI-dev_max_f1_threshold": 370.46728515625, + "eval_allNLI-dev_max_precision": 0.3982808022922636, + "eval_allNLI-dev_max_recall": 0.9248554913294798, + "eval_sequential_score": 0.6718309029852861, + "eval_sts-test_pearson_cosine": 0.2593214673103316, + "eval_sts-test_pearson_dot": 0.2856451479091534, + "eval_sts-test_pearson_euclidean": 0.2633545673906765, + "eval_sts-test_pearson_manhattan": 0.27094196150257477, + "eval_sts-test_pearson_max": 0.2856451479091534, + "eval_sts-test_spearman_cosine": 0.3239060946012997, + "eval_sts-test_spearman_dot": 0.3020791143702586, + "eval_sts-test_spearman_euclidean": 0.29537649419536166, + "eval_sts-test_spearman_manhattan": 0.30477367732115745, + "eval_sts-test_spearman_max": 0.3239060946012997, + "eval_vitaminc-pairs_loss": 2.7820005416870117, + "eval_vitaminc-pairs_runtime": 3.1613, + "eval_vitaminc-pairs_samples_per_second": 40.489, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_negation-triplets_loss": 3.2458996772766113, + "eval_negation-triplets_runtime": 0.7284, + "eval_negation-triplets_samples_per_second": 175.738, + "eval_negation-triplets_steps_per_second": 1.373, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_scitail-pairs-pos_loss": 0.9951260685920715, + "eval_scitail-pairs-pos_runtime": 0.7927, + "eval_scitail-pairs-pos_samples_per_second": 161.465, + "eval_scitail-pairs-pos_steps_per_second": 1.261, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_scitail-pairs-qa_loss": 0.8330278396606445, + "eval_scitail-pairs-qa_runtime": 0.5647, + "eval_scitail-pairs-qa_samples_per_second": 226.68, + "eval_scitail-pairs-qa_steps_per_second": 1.771, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_xsum-pairs_loss": 5.1889238357543945, + "eval_xsum-pairs_runtime": 3.014, + "eval_xsum-pairs_samples_per_second": 42.469, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_sciq_pairs_loss": 0.5267525315284729, + "eval_sciq_pairs_runtime": 3.3969, + "eval_sciq_pairs_samples_per_second": 37.682, + "eval_sciq_pairs_steps_per_second": 0.294, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_qasc_pairs_loss": 2.3833937644958496, + "eval_qasc_pairs_runtime": 0.5986, + "eval_qasc_pairs_samples_per_second": 213.816, + "eval_qasc_pairs_steps_per_second": 1.67, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_openbookqa_pairs_loss": 4.070493221282959, + "eval_openbookqa_pairs_runtime": 0.5688, + "eval_openbookqa_pairs_samples_per_second": 225.041, + "eval_openbookqa_pairs_steps_per_second": 1.758, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_msmarco_pairs_loss": 5.934054374694824, + "eval_msmarco_pairs_runtime": 1.5142, + "eval_msmarco_pairs_samples_per_second": 84.535, + "eval_msmarco_pairs_steps_per_second": 0.66, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_nq_pairs_loss": 5.961860656738281, + "eval_nq_pairs_runtime": 2.8919, + "eval_nq_pairs_samples_per_second": 44.261, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_trivia_pairs_loss": 5.741409778594971, + "eval_trivia_pairs_runtime": 3.4249, + "eval_trivia_pairs_samples_per_second": 37.373, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_gooaq_pairs_loss": 4.762550354003906, + "eval_gooaq_pairs_runtime": 0.9413, + "eval_gooaq_pairs_samples_per_second": 135.983, + "eval_gooaq_pairs_steps_per_second": 1.062, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_paws-pos_loss": 0.17168374359607697, + "eval_paws-pos_runtime": 0.6912, + "eval_paws-pos_samples_per_second": 185.177, + "eval_paws-pos_steps_per_second": 1.447, + "step": 160 + }, + { + "epoch": 0.1646090534979424, + "eval_global_dataset_loss": 3.0964090824127197, + "eval_global_dataset_runtime": 13.333, + "eval_global_dataset_samples_per_second": 31.201, + "eval_global_dataset_steps_per_second": 0.3, + "step": 160 + }, + { + "epoch": 0.16563786008230452, + "grad_norm": 16.593652725219727, + "learning_rate": 5.742471443406022e-06, + "loss": 4.6423, + "step": 161 + }, + { + "epoch": 0.16666666666666666, + "grad_norm": 26.777013778686523, + "learning_rate": 5.778816199376946e-06, + "loss": 5.7346, + "step": 162 + }, + { + "epoch": 0.16769547325102882, + "grad_norm": 15.474895477294922, + "learning_rate": 5.815160955347871e-06, + "loss": 3.3701, + "step": 163 + }, + { + "epoch": 0.16872427983539096, + "grad_norm": 17.254573822021484, + "learning_rate": 5.851505711318795e-06, + "loss": 3.2528, + "step": 164 + }, + { + "epoch": 0.1697530864197531, + "grad_norm": 17.283357620239258, + "learning_rate": 5.887850467289719e-06, + "loss": 4.5692, + "step": 165 + }, + { + "epoch": 0.17078189300411523, + "grad_norm": 18.08893585205078, + "learning_rate": 5.924195223260644e-06, + "loss": 3.4966, + "step": 166 + }, + { + "epoch": 0.17181069958847736, + "grad_norm": 17.073596954345703, + "learning_rate": 5.960539979231567e-06, + "loss": 4.5332, + "step": 167 + }, + { + "epoch": 0.1728395061728395, + "grad_norm": 14.176384925842285, + "learning_rate": 5.996884735202491e-06, + "loss": 3.7013, + "step": 168 + }, + { + "epoch": 0.17386831275720166, + "grad_norm": 21.731842041015625, + "learning_rate": 6.033229491173416e-06, + "loss": 5.3538, + "step": 169 + }, + { + "epoch": 0.1748971193415638, + "grad_norm": 13.646337509155273, + "learning_rate": 6.06957424714434e-06, + "loss": 2.8866, + "step": 170 + }, + { + "epoch": 0.17592592592592593, + "grad_norm": 17.945281982421875, + "learning_rate": 6.105919003115264e-06, + "loss": 2.9547, + "step": 171 + }, + { + "epoch": 0.17695473251028807, + "grad_norm": 22.94412612915039, + "learning_rate": 6.142263759086189e-06, + "loss": 5.2232, + "step": 172 + }, + { + "epoch": 0.1779835390946502, + "grad_norm": 33.40188980102539, + "learning_rate": 6.178608515057113e-06, + "loss": 5.6116, + "step": 173 + }, + { + "epoch": 0.17901234567901234, + "grad_norm": 13.978586196899414, + "learning_rate": 6.214953271028036e-06, + "loss": 2.8644, + "step": 174 + }, + { + "epoch": 0.1800411522633745, + "grad_norm": 22.077098846435547, + "learning_rate": 6.251298026998961e-06, + "loss": 1.9015, + "step": 175 + }, + { + "epoch": 0.18106995884773663, + "grad_norm": 20.8638858795166, + "learning_rate": 6.287642782969885e-06, + "loss": 4.6564, + "step": 176 + }, + { + "epoch": 0.18209876543209877, + "grad_norm": 14.965616226196289, + "learning_rate": 6.323987538940809e-06, + "loss": 3.0869, + "step": 177 + }, + { + "epoch": 0.1831275720164609, + "grad_norm": 20.76239013671875, + "learning_rate": 6.360332294911734e-06, + "loss": 1.7242, + "step": 178 + }, + { + "epoch": 0.18415637860082304, + "grad_norm": 16.349937438964844, + "learning_rate": 6.396677050882658e-06, + "loss": 4.275, + "step": 179 + }, + { + "epoch": 0.18518518518518517, + "grad_norm": 23.983245849609375, + "learning_rate": 6.433021806853582e-06, + "loss": 5.6429, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_Qnli-dev_cosine_accuracy": 0.6484375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8662997484207153, + "eval_Qnli-dev_cosine_ap": 0.6524497730088078, + "eval_Qnli-dev_cosine_f1": 0.6678023850085179, + "eval_Qnli-dev_cosine_f1_threshold": 0.8081307411193848, + "eval_Qnli-dev_cosine_precision": 0.5584045584045584, + "eval_Qnli-dev_cosine_recall": 0.8305084745762712, + "eval_Qnli-dev_dot_accuracy": 0.623046875, + "eval_Qnli-dev_dot_accuracy_threshold": 385.58721923828125, + "eval_Qnli-dev_dot_ap": 0.5552533197510849, + "eval_Qnli-dev_dot_f1": 0.6540145985401459, + "eval_Qnli-dev_dot_f1_threshold": 316.07781982421875, + "eval_Qnli-dev_dot_precision": 0.49888641425389757, + "eval_Qnli-dev_dot_recall": 0.9491525423728814, + "eval_Qnli-dev_euclidean_accuracy": 0.65625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 10.853160858154297, + "eval_Qnli-dev_euclidean_ap": 0.6669108151611487, + "eval_Qnli-dev_euclidean_f1": 0.6678200692041524, + "eval_Qnli-dev_euclidean_f1_threshold": 12.968579292297363, + "eval_Qnli-dev_euclidean_precision": 0.564327485380117, + "eval_Qnli-dev_euclidean_recall": 0.8177966101694916, + "eval_Qnli-dev_manhattan_accuracy": 0.6640625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 223.08535766601562, + "eval_Qnli-dev_manhattan_ap": 0.6760180782489211, + "eval_Qnli-dev_manhattan_f1": 0.6678082191780822, + "eval_Qnli-dev_manhattan_f1_threshold": 253.64254760742188, + "eval_Qnli-dev_manhattan_precision": 0.5603448275862069, + "eval_Qnli-dev_manhattan_recall": 0.826271186440678, + "eval_Qnli-dev_max_accuracy": 0.6640625, + "eval_Qnli-dev_max_accuracy_threshold": 385.58721923828125, + "eval_Qnli-dev_max_ap": 0.6760180782489211, + "eval_Qnli-dev_max_f1": 0.6678200692041524, + "eval_Qnli-dev_max_f1_threshold": 316.07781982421875, + "eval_Qnli-dev_max_precision": 0.564327485380117, + "eval_Qnli-dev_max_recall": 0.9491525423728814, + "eval_allNLI-dev_cosine_accuracy": 0.671875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9789707064628601, + "eval_allNLI-dev_cosine_ap": 0.4524477715499502, + "eval_allNLI-dev_cosine_f1": 0.5444444444444444, + "eval_allNLI-dev_cosine_f1_threshold": 0.8516685962677002, + "eval_allNLI-dev_cosine_precision": 0.40054495912806537, + "eval_allNLI-dev_cosine_recall": 0.8497109826589595, + "eval_allNLI-dev_dot_accuracy": 0.66796875, + "eval_allNLI-dev_dot_accuracy_threshold": 524.80029296875, + "eval_allNLI-dev_dot_ap": 0.39641106298067524, + "eval_allNLI-dev_dot_f1": 0.5137614678899083, + "eval_allNLI-dev_dot_f1_threshold": 341.9022216796875, + "eval_allNLI-dev_dot_precision": 0.3492723492723493, + "eval_allNLI-dev_dot_recall": 0.9710982658959537, + "eval_allNLI-dev_euclidean_accuracy": 0.671875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 4.58498477935791, + "eval_allNLI-dev_euclidean_ap": 0.4516531171082357, + "eval_allNLI-dev_euclidean_f1": 0.552, + "eval_allNLI-dev_euclidean_f1_threshold": 11.532356262207031, + "eval_allNLI-dev_euclidean_precision": 0.42201834862385323, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.671875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 107.1644058227539, + "eval_allNLI-dev_manhattan_ap": 0.45547703653651306, + "eval_allNLI-dev_manhattan_f1": 0.5475285171102661, + "eval_allNLI-dev_manhattan_f1_threshold": 226.54490661621094, + "eval_allNLI-dev_manhattan_precision": 0.40793201133144474, + "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, + "eval_allNLI-dev_max_accuracy": 0.671875, + "eval_allNLI-dev_max_accuracy_threshold": 524.80029296875, + "eval_allNLI-dev_max_ap": 0.45547703653651306, + "eval_allNLI-dev_max_f1": 0.552, + "eval_allNLI-dev_max_f1_threshold": 341.9022216796875, + "eval_allNLI-dev_max_precision": 0.42201834862385323, + "eval_allNLI-dev_max_recall": 0.9710982658959537, + "eval_sequential_score": 0.6760180782489211, + "eval_sts-test_pearson_cosine": 0.3078712504181006, + "eval_sts-test_pearson_dot": 0.30687493336995575, + "eval_sts-test_pearson_euclidean": 0.3149216629485325, + "eval_sts-test_pearson_manhattan": 0.32260490964216404, + "eval_sts-test_pearson_max": 0.32260490964216404, + "eval_sts-test_spearman_cosine": 0.3756490245649231, + "eval_sts-test_spearman_dot": 0.32074983288662573, + "eval_sts-test_spearman_euclidean": 0.34933389557767713, + "eval_sts-test_spearman_manhattan": 0.35486430121168766, + "eval_sts-test_spearman_max": 0.3756490245649231, + "eval_vitaminc-pairs_loss": 2.734696388244629, + "eval_vitaminc-pairs_runtime": 3.1523, + "eval_vitaminc-pairs_samples_per_second": 40.606, + "eval_vitaminc-pairs_steps_per_second": 0.317, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_negation-triplets_loss": 3.099722146987915, + "eval_negation-triplets_runtime": 0.7375, + "eval_negation-triplets_samples_per_second": 173.567, + "eval_negation-triplets_steps_per_second": 1.356, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_scitail-pairs-pos_loss": 0.6654092073440552, + "eval_scitail-pairs-pos_runtime": 0.7978, + "eval_scitail-pairs-pos_samples_per_second": 160.446, + "eval_scitail-pairs-pos_steps_per_second": 1.253, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_scitail-pairs-qa_loss": 0.4432713985443115, + "eval_scitail-pairs-qa_runtime": 0.5613, + "eval_scitail-pairs-qa_samples_per_second": 228.051, + "eval_scitail-pairs-qa_steps_per_second": 1.782, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_xsum-pairs_loss": 4.390190601348877, + "eval_xsum-pairs_runtime": 3.0147, + "eval_xsum-pairs_samples_per_second": 42.459, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_sciq_pairs_loss": 0.43316569924354553, + "eval_sciq_pairs_runtime": 3.4401, + "eval_sciq_pairs_samples_per_second": 37.208, + "eval_sciq_pairs_steps_per_second": 0.291, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_qasc_pairs_loss": 2.034595251083374, + "eval_qasc_pairs_runtime": 0.5991, + "eval_qasc_pairs_samples_per_second": 213.665, + "eval_qasc_pairs_steps_per_second": 1.669, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_openbookqa_pairs_loss": 3.2861831188201904, + "eval_openbookqa_pairs_runtime": 0.5738, + "eval_openbookqa_pairs_samples_per_second": 223.062, + "eval_openbookqa_pairs_steps_per_second": 1.743, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_msmarco_pairs_loss": 5.0437421798706055, + "eval_msmarco_pairs_runtime": 1.529, + "eval_msmarco_pairs_samples_per_second": 83.713, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_nq_pairs_loss": 5.238871097564697, + "eval_nq_pairs_runtime": 2.9133, + "eval_nq_pairs_samples_per_second": 43.936, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_trivia_pairs_loss": 5.040083885192871, + "eval_trivia_pairs_runtime": 3.4307, + "eval_trivia_pairs_samples_per_second": 37.311, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_gooaq_pairs_loss": 4.043346881866455, + "eval_gooaq_pairs_runtime": 0.947, + "eval_gooaq_pairs_samples_per_second": 135.164, + "eval_gooaq_pairs_steps_per_second": 1.056, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_paws-pos_loss": 0.15505897998809814, + "eval_paws-pos_runtime": 0.6791, + "eval_paws-pos_samples_per_second": 188.472, + "eval_paws-pos_steps_per_second": 1.472, + "step": 180 + }, + { + "epoch": 0.18518518518518517, + "eval_global_dataset_loss": 2.481849193572998, + "eval_global_dataset_runtime": 13.3357, + "eval_global_dataset_samples_per_second": 31.194, + "eval_global_dataset_steps_per_second": 0.3, + "step": 180 + }, + { + "epoch": 0.18621399176954734, + "grad_norm": 23.770732879638672, + "learning_rate": 6.469366562824507e-06, + "loss": 4.1785, + "step": 181 + }, + { + "epoch": 0.18724279835390947, + "grad_norm": 15.494194984436035, + "learning_rate": 6.505711318795431e-06, + "loss": 2.8654, + "step": 182 + }, + { + "epoch": 0.1882716049382716, + "grad_norm": 15.12114429473877, + "learning_rate": 6.542056074766354e-06, + "loss": 2.9405, + "step": 183 + }, + { + "epoch": 0.18930041152263374, + "grad_norm": 14.066164016723633, + "learning_rate": 6.578400830737279e-06, + "loss": 2.6342, + "step": 184 + }, + { + "epoch": 0.19032921810699588, + "grad_norm": 15.717785835266113, + "learning_rate": 6.614745586708203e-06, + "loss": 3.8401, + "step": 185 + }, + { + "epoch": 0.19135802469135801, + "grad_norm": 13.966800689697266, + "learning_rate": 6.651090342679127e-06, + "loss": 3.6136, + "step": 186 + }, + { + "epoch": 0.19238683127572018, + "grad_norm": 15.663467407226562, + "learning_rate": 6.687435098650052e-06, + "loss": 3.0736, + "step": 187 + }, + { + "epoch": 0.1934156378600823, + "grad_norm": 26.5308837890625, + "learning_rate": 6.723779854620976e-06, + "loss": 5.4694, + "step": 188 + }, + { + "epoch": 0.19444444444444445, + "grad_norm": 15.630946159362793, + "learning_rate": 6.7601246105919e-06, + "loss": 2.7903, + "step": 189 + }, + { + "epoch": 0.19547325102880658, + "grad_norm": 20.857738494873047, + "learning_rate": 6.796469366562825e-06, + "loss": 3.8552, + "step": 190 + }, + { + "epoch": 0.19650205761316872, + "grad_norm": 21.9176082611084, + "learning_rate": 6.832814122533748e-06, + "loss": 4.133, + "step": 191 + }, + { + "epoch": 0.19753086419753085, + "grad_norm": 13.483908653259277, + "learning_rate": 6.869158878504672e-06, + "loss": 3.3172, + "step": 192 + }, + { + "epoch": 0.19855967078189302, + "grad_norm": 22.11282730102539, + "learning_rate": 6.905503634475597e-06, + "loss": 4.5717, + "step": 193 + }, + { + "epoch": 0.19958847736625515, + "grad_norm": 14.112103462219238, + "learning_rate": 6.941848390446521e-06, + "loss": 2.6895, + "step": 194 + }, + { + "epoch": 0.2006172839506173, + "grad_norm": 26.939516067504883, + "learning_rate": 6.978193146417445e-06, + "loss": 4.8326, + "step": 195 + }, + { + "epoch": 0.20164609053497942, + "grad_norm": 21.34830665588379, + "learning_rate": 7.01453790238837e-06, + "loss": 4.6939, + "step": 196 + }, + { + "epoch": 0.20267489711934156, + "grad_norm": 15.555133819580078, + "learning_rate": 7.050882658359294e-06, + "loss": 2.6114, + "step": 197 + }, + { + "epoch": 0.2037037037037037, + "grad_norm": 18.566102981567383, + "learning_rate": 7.087227414330217e-06, + "loss": 4.0035, + "step": 198 + }, + { + "epoch": 0.20473251028806586, + "grad_norm": 20.652629852294922, + "learning_rate": 7.123572170301142e-06, + "loss": 4.6457, + "step": 199 + }, + { + "epoch": 0.205761316872428, + "grad_norm": 13.168004989624023, + "learning_rate": 7.159916926272066e-06, + "loss": 2.8982, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_Qnli-dev_cosine_accuracy": 0.6640625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8493491411209106, + "eval_Qnli-dev_cosine_ap": 0.6558066754883848, + "eval_Qnli-dev_cosine_f1": 0.6813559322033899, + "eval_Qnli-dev_cosine_f1_threshold": 0.8149238228797913, + "eval_Qnli-dev_cosine_precision": 0.5677966101694916, + "eval_Qnli-dev_cosine_recall": 0.8516949152542372, + "eval_Qnli-dev_dot_accuracy": 0.625, + "eval_Qnli-dev_dot_accuracy_threshold": 415.6668395996094, + "eval_Qnli-dev_dot_ap": 0.5566403929041909, + "eval_Qnli-dev_dot_f1": 0.6607929515418502, + "eval_Qnli-dev_dot_f1_threshold": 356.9716491699219, + "eval_Qnli-dev_dot_precision": 0.5056179775280899, + "eval_Qnli-dev_dot_recall": 0.9533898305084746, + "eval_Qnli-dev_euclidean_accuracy": 0.666015625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 11.828110694885254, + "eval_Qnli-dev_euclidean_ap": 0.6719522122361475, + "eval_Qnli-dev_euclidean_f1": 0.6812080536912752, + "eval_Qnli-dev_euclidean_f1_threshold": 13.571544647216797, + "eval_Qnli-dev_euclidean_precision": 0.5638888888888889, + "eval_Qnli-dev_euclidean_recall": 0.8601694915254238, + "eval_Qnli-dev_manhattan_accuracy": 0.666015625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 251.22454833984375, + "eval_Qnli-dev_manhattan_ap": 0.6748374333603533, + "eval_Qnli-dev_manhattan_f1": 0.687813021702838, + "eval_Qnli-dev_manhattan_f1_threshold": 265.57940673828125, + "eval_Qnli-dev_manhattan_precision": 0.5674931129476584, + "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, + "eval_Qnli-dev_max_accuracy": 0.666015625, + "eval_Qnli-dev_max_accuracy_threshold": 415.6668395996094, + "eval_Qnli-dev_max_ap": 0.6748374333603533, + "eval_Qnli-dev_max_f1": 0.687813021702838, + "eval_Qnli-dev_max_f1_threshold": 356.9716491699219, + "eval_Qnli-dev_max_precision": 0.5677966101694916, + "eval_Qnli-dev_max_recall": 0.9533898305084746, + "eval_allNLI-dev_cosine_accuracy": 0.673828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9595370292663574, + "eval_allNLI-dev_cosine_ap": 0.4708419415503453, + "eval_allNLI-dev_cosine_f1": 0.5435684647302905, + "eval_allNLI-dev_cosine_f1_threshold": 0.8678731918334961, + "eval_allNLI-dev_cosine_precision": 0.42394822006472493, + "eval_allNLI-dev_cosine_recall": 0.7572254335260116, + "eval_allNLI-dev_dot_accuracy": 0.669921875, + "eval_allNLI-dev_dot_accuracy_threshold": 541.4039306640625, + "eval_allNLI-dev_dot_ap": 0.4068383335197935, + "eval_allNLI-dev_dot_f1": 0.521875, + "eval_allNLI-dev_dot_f1_threshold": 372.6112060546875, + "eval_allNLI-dev_dot_precision": 0.3576017130620985, + "eval_allNLI-dev_dot_recall": 0.9653179190751445, + "eval_allNLI-dev_euclidean_accuracy": 0.673828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 6.293747901916504, + "eval_allNLI-dev_euclidean_ap": 0.47115509667092914, + "eval_allNLI-dev_euclidean_f1": 0.544, + "eval_allNLI-dev_euclidean_f1_threshold": 11.993677139282227, + "eval_allNLI-dev_euclidean_precision": 0.41590214067278286, + "eval_allNLI-dev_euclidean_recall": 0.7861271676300579, + "eval_allNLI-dev_manhattan_accuracy": 0.67578125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 141.41348266601562, + "eval_allNLI-dev_manhattan_ap": 0.4773047143908946, + "eval_allNLI-dev_manhattan_f1": 0.5521235521235521, + "eval_allNLI-dev_manhattan_f1_threshold": 238.16036987304688, + "eval_allNLI-dev_manhattan_precision": 0.4144927536231884, + "eval_allNLI-dev_manhattan_recall": 0.8265895953757225, + "eval_allNLI-dev_max_accuracy": 0.67578125, + "eval_allNLI-dev_max_accuracy_threshold": 541.4039306640625, + "eval_allNLI-dev_max_ap": 0.4773047143908946, + "eval_allNLI-dev_max_f1": 0.5521235521235521, + "eval_allNLI-dev_max_f1_threshold": 372.6112060546875, + "eval_allNLI-dev_max_precision": 0.42394822006472493, + "eval_allNLI-dev_max_recall": 0.9653179190751445, + "eval_sequential_score": 0.6748374333603533, + "eval_sts-test_pearson_cosine": 0.37650448121246105, + "eval_sts-test_pearson_dot": 0.34722884377459334, + "eval_sts-test_pearson_euclidean": 0.3832908339538646, + "eval_sts-test_pearson_manhattan": 0.3852968520690805, + "eval_sts-test_pearson_max": 0.3852968520690805, + "eval_sts-test_spearman_cosine": 0.4334008406493539, + "eval_sts-test_spearman_dot": 0.35710334107288355, + "eval_sts-test_spearman_euclidean": 0.408594276683612, + "eval_sts-test_spearman_manhattan": 0.4087942700707702, + "eval_sts-test_spearman_max": 0.4334008406493539, + "eval_vitaminc-pairs_loss": 2.7392194271087646, + "eval_vitaminc-pairs_runtime": 3.1693, + "eval_vitaminc-pairs_samples_per_second": 40.387, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_negation-triplets_loss": 2.8204259872436523, + "eval_negation-triplets_runtime": 0.7414, + "eval_negation-triplets_samples_per_second": 172.651, + "eval_negation-triplets_steps_per_second": 1.349, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_scitail-pairs-pos_loss": 0.5787031054496765, + "eval_scitail-pairs-pos_runtime": 0.8028, + "eval_scitail-pairs-pos_samples_per_second": 159.448, + "eval_scitail-pairs-pos_steps_per_second": 1.246, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_scitail-pairs-qa_loss": 0.3260263204574585, + "eval_scitail-pairs-qa_runtime": 0.5665, + "eval_scitail-pairs-qa_samples_per_second": 225.934, + "eval_scitail-pairs-qa_steps_per_second": 1.765, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_xsum-pairs_loss": 3.6776349544525146, + "eval_xsum-pairs_runtime": 3.0162, + "eval_xsum-pairs_samples_per_second": 42.437, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_sciq_pairs_loss": 0.3696608543395996, + "eval_sciq_pairs_runtime": 3.4228, + "eval_sciq_pairs_samples_per_second": 37.396, + "eval_sciq_pairs_steps_per_second": 0.292, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_qasc_pairs_loss": 1.7308318614959717, + "eval_qasc_pairs_runtime": 0.5951, + "eval_qasc_pairs_samples_per_second": 215.086, + "eval_qasc_pairs_steps_per_second": 1.68, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_openbookqa_pairs_loss": 2.8745713233947754, + "eval_openbookqa_pairs_runtime": 0.5756, + "eval_openbookqa_pairs_samples_per_second": 222.393, + "eval_openbookqa_pairs_steps_per_second": 1.737, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_msmarco_pairs_loss": 4.048874378204346, + "eval_msmarco_pairs_runtime": 1.5159, + "eval_msmarco_pairs_samples_per_second": 84.439, + "eval_msmarco_pairs_steps_per_second": 0.66, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_nq_pairs_loss": 4.402989387512207, + "eval_nq_pairs_runtime": 2.8983, + "eval_nq_pairs_samples_per_second": 44.163, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_trivia_pairs_loss": 4.454685688018799, + "eval_trivia_pairs_runtime": 3.4474, + "eval_trivia_pairs_samples_per_second": 37.129, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_gooaq_pairs_loss": 3.200054168701172, + "eval_gooaq_pairs_runtime": 0.956, + "eval_gooaq_pairs_samples_per_second": 133.894, + "eval_gooaq_pairs_steps_per_second": 1.046, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_paws-pos_loss": 0.17940819263458252, + "eval_paws-pos_runtime": 0.6752, + "eval_paws-pos_samples_per_second": 189.577, + "eval_paws-pos_steps_per_second": 1.481, + "step": 200 + }, + { + "epoch": 0.205761316872428, + "eval_global_dataset_loss": 2.0389692783355713, + "eval_global_dataset_runtime": 13.3595, + "eval_global_dataset_samples_per_second": 31.139, + "eval_global_dataset_steps_per_second": 0.299, + "step": 200 + }, + { + "epoch": 0.20679012345679013, + "grad_norm": 20.6440372467041, + "learning_rate": 7.19626168224299e-06, + "loss": 4.1212, + "step": 201 + }, + { + "epoch": 0.20781893004115226, + "grad_norm": 18.542131423950195, + "learning_rate": 7.232606438213915e-06, + "loss": 3.712, + "step": 202 + }, + { + "epoch": 0.2088477366255144, + "grad_norm": 13.649810791015625, + "learning_rate": 7.268951194184839e-06, + "loss": 2.4475, + "step": 203 + }, + { + "epoch": 0.20987654320987653, + "grad_norm": 14.254504203796387, + "learning_rate": 7.305295950155763e-06, + "loss": 2.0375, + "step": 204 + }, + { + "epoch": 0.2109053497942387, + "grad_norm": 16.941804885864258, + "learning_rate": 7.341640706126687e-06, + "loss": 3.6423, + "step": 205 + }, + { + "epoch": 0.21193415637860083, + "grad_norm": 24.76467514038086, + "learning_rate": 7.3779854620976116e-06, + "loss": 5.0227, + "step": 206 + }, + { + "epoch": 0.21296296296296297, + "grad_norm": 27.70640754699707, + "learning_rate": 7.414330218068535e-06, + "loss": 4.743, + "step": 207 + }, + { + "epoch": 0.2139917695473251, + "grad_norm": 19.96710777282715, + "learning_rate": 7.450674974039459e-06, + "loss": 4.502, + "step": 208 + }, + { + "epoch": 0.21502057613168724, + "grad_norm": 13.25556468963623, + "learning_rate": 7.487019730010384e-06, + "loss": 2.2948, + "step": 209 + }, + { + "epoch": 0.21604938271604937, + "grad_norm": 14.281882286071777, + "learning_rate": 7.523364485981308e-06, + "loss": 3.3056, + "step": 210 + }, + { + "epoch": 0.21707818930041153, + "grad_norm": 12.938163757324219, + "learning_rate": 7.559709241952232e-06, + "loss": 2.1324, + "step": 211 + }, + { + "epoch": 0.21810699588477367, + "grad_norm": 13.252862930297852, + "learning_rate": 7.5960539979231565e-06, + "loss": 2.2595, + "step": 212 + }, + { + "epoch": 0.2191358024691358, + "grad_norm": 13.162984848022461, + "learning_rate": 7.63239875389408e-06, + "loss": 2.3108, + "step": 213 + }, + { + "epoch": 0.22016460905349794, + "grad_norm": 12.221834182739258, + "learning_rate": 7.668743509865004e-06, + "loss": 2.7378, + "step": 214 + }, + { + "epoch": 0.22119341563786007, + "grad_norm": 12.927008628845215, + "learning_rate": 7.70508826583593e-06, + "loss": 2.3095, + "step": 215 + }, + { + "epoch": 0.2222222222222222, + "grad_norm": 18.692045211791992, + "learning_rate": 7.741433021806853e-06, + "loss": 3.4556, + "step": 216 + }, + { + "epoch": 0.22325102880658437, + "grad_norm": 15.472359657287598, + "learning_rate": 7.777777777777777e-06, + "loss": 2.9786, + "step": 217 + }, + { + "epoch": 0.2242798353909465, + "grad_norm": 12.372124671936035, + "learning_rate": 7.814122533748701e-06, + "loss": 1.9408, + "step": 218 + }, + { + "epoch": 0.22530864197530864, + "grad_norm": 13.370574951171875, + "learning_rate": 7.850467289719626e-06, + "loss": 2.9416, + "step": 219 + }, + { + "epoch": 0.22633744855967078, + "grad_norm": 12.76834487915039, + "learning_rate": 7.88681204569055e-06, + "loss": 2.0087, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_Qnli-dev_cosine_accuracy": 0.666015625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8367502689361572, + "eval_Qnli-dev_cosine_ap": 0.657497384714659, + "eval_Qnli-dev_cosine_f1": 0.6850828729281769, + "eval_Qnli-dev_cosine_f1_threshold": 0.8328432440757751, + "eval_Qnli-dev_cosine_precision": 0.6058631921824105, + "eval_Qnli-dev_cosine_recall": 0.788135593220339, + "eval_Qnli-dev_dot_accuracy": 0.62109375, + "eval_Qnli-dev_dot_accuracy_threshold": 443.7711181640625, + "eval_Qnli-dev_dot_ap": 0.561375704126675, + "eval_Qnli-dev_dot_f1": 0.6607407407407409, + "eval_Qnli-dev_dot_f1_threshold": 383.77728271484375, + "eval_Qnli-dev_dot_precision": 0.5079726651480638, + "eval_Qnli-dev_dot_recall": 0.9449152542372882, + "eval_Qnli-dev_euclidean_accuracy": 0.666015625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.143888473510742, + "eval_Qnli-dev_euclidean_ap": 0.669642308468768, + "eval_Qnli-dev_euclidean_f1": 0.6845637583892616, + "eval_Qnli-dev_euclidean_f1_threshold": 14.205205917358398, + "eval_Qnli-dev_euclidean_precision": 0.5666666666666667, + "eval_Qnli-dev_euclidean_recall": 0.864406779661017, + "eval_Qnli-dev_manhattan_accuracy": 0.658203125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 212.78713989257812, + "eval_Qnli-dev_manhattan_ap": 0.6716707737018695, + "eval_Qnli-dev_manhattan_f1": 0.6907894736842105, + "eval_Qnli-dev_manhattan_f1_threshold": 283.0830383300781, + "eval_Qnli-dev_manhattan_precision": 0.5645161290322581, + "eval_Qnli-dev_manhattan_recall": 0.8898305084745762, + "eval_Qnli-dev_max_accuracy": 0.666015625, + "eval_Qnli-dev_max_accuracy_threshold": 443.7711181640625, + "eval_Qnli-dev_max_ap": 0.6716707737018695, + "eval_Qnli-dev_max_f1": 0.6907894736842105, + "eval_Qnli-dev_max_f1_threshold": 383.77728271484375, + "eval_Qnli-dev_max_precision": 0.6058631921824105, + "eval_Qnli-dev_max_recall": 0.9449152542372882, + "eval_allNLI-dev_cosine_accuracy": 0.677734375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9583283066749573, + "eval_allNLI-dev_cosine_ap": 0.4885246068143708, + "eval_allNLI-dev_cosine_f1": 0.5432937181663838, + "eval_allNLI-dev_cosine_f1_threshold": 0.8263977766036987, + "eval_allNLI-dev_cosine_precision": 0.38461538461538464, + "eval_allNLI-dev_cosine_recall": 0.9248554913294798, + "eval_allNLI-dev_dot_accuracy": 0.66796875, + "eval_allNLI-dev_dot_accuracy_threshold": 568.2660522460938, + "eval_allNLI-dev_dot_ap": 0.4122244378970726, + "eval_allNLI-dev_dot_f1": 0.5243328100470958, + "eval_allNLI-dev_dot_f1_threshold": 404.83544921875, + "eval_allNLI-dev_dot_precision": 0.3599137931034483, + "eval_allNLI-dev_dot_recall": 0.9653179190751445, + "eval_allNLI-dev_euclidean_accuracy": 0.673828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 5.9007158279418945, + "eval_allNLI-dev_euclidean_ap": 0.4870900115925687, + "eval_allNLI-dev_euclidean_f1": 0.5494880546075086, + "eval_allNLI-dev_euclidean_f1_threshold": 13.606775283813477, + "eval_allNLI-dev_euclidean_precision": 0.3898305084745763, + "eval_allNLI-dev_euclidean_recall": 0.930635838150289, + "eval_allNLI-dev_manhattan_accuracy": 0.6796875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 149.96470642089844, + "eval_allNLI-dev_manhattan_ap": 0.48775989657604024, + "eval_allNLI-dev_manhattan_f1": 0.5509433962264151, + "eval_allNLI-dev_manhattan_f1_threshold": 245.671875, + "eval_allNLI-dev_manhattan_precision": 0.40896358543417366, + "eval_allNLI-dev_manhattan_recall": 0.8439306358381503, + "eval_allNLI-dev_max_accuracy": 0.6796875, + "eval_allNLI-dev_max_accuracy_threshold": 568.2660522460938, + "eval_allNLI-dev_max_ap": 0.4885246068143708, + "eval_allNLI-dev_max_f1": 0.5509433962264151, + "eval_allNLI-dev_max_f1_threshold": 404.83544921875, + "eval_allNLI-dev_max_precision": 0.40896358543417366, + "eval_allNLI-dev_max_recall": 0.9653179190751445, + "eval_sequential_score": 0.6716707737018695, + "eval_sts-test_pearson_cosine": 0.47370132819582667, + "eval_sts-test_pearson_dot": 0.4090770475954118, + "eval_sts-test_pearson_euclidean": 0.47821395607635725, + "eval_sts-test_pearson_manhattan": 0.4805462866477066, + "eval_sts-test_pearson_max": 0.4805462866477066, + "eval_sts-test_spearman_cosine": 0.5169709124658022, + "eval_sts-test_spearman_dot": 0.4142341886542473, + "eval_sts-test_spearman_euclidean": 0.4963594659966741, + "eval_sts-test_spearman_manhattan": 0.49648047340747653, + "eval_sts-test_spearman_max": 0.5169709124658022, + "eval_vitaminc-pairs_loss": 2.7133967876434326, + "eval_vitaminc-pairs_runtime": 3.1964, + "eval_vitaminc-pairs_samples_per_second": 40.045, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_negation-triplets_loss": 2.680220603942871, + "eval_negation-triplets_runtime": 0.7348, + "eval_negation-triplets_samples_per_second": 174.208, + "eval_negation-triplets_steps_per_second": 1.361, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_scitail-pairs-pos_loss": 0.5108461380004883, + "eval_scitail-pairs-pos_runtime": 0.8205, + "eval_scitail-pairs-pos_samples_per_second": 155.993, + "eval_scitail-pairs-pos_steps_per_second": 1.219, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_scitail-pairs-qa_loss": 0.27794376015663147, + "eval_scitail-pairs-qa_runtime": 0.5655, + "eval_scitail-pairs-qa_samples_per_second": 226.329, + "eval_scitail-pairs-qa_steps_per_second": 1.768, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_xsum-pairs_loss": 2.8043901920318604, + "eval_xsum-pairs_runtime": 3.0149, + "eval_xsum-pairs_samples_per_second": 42.456, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_sciq_pairs_loss": 0.32811373472213745, + "eval_sciq_pairs_runtime": 3.3848, + "eval_sciq_pairs_samples_per_second": 37.816, + "eval_sciq_pairs_steps_per_second": 0.295, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_qasc_pairs_loss": 1.6458420753479004, + "eval_qasc_pairs_runtime": 0.5968, + "eval_qasc_pairs_samples_per_second": 214.461, + "eval_qasc_pairs_steps_per_second": 1.675, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_openbookqa_pairs_loss": 2.5592660903930664, + "eval_openbookqa_pairs_runtime": 0.5683, + "eval_openbookqa_pairs_samples_per_second": 225.246, + "eval_openbookqa_pairs_steps_per_second": 1.76, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_msmarco_pairs_loss": 3.5379371643066406, + "eval_msmarco_pairs_runtime": 1.5102, + "eval_msmarco_pairs_samples_per_second": 84.756, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_nq_pairs_loss": 3.847370147705078, + "eval_nq_pairs_runtime": 2.8908, + "eval_nq_pairs_samples_per_second": 44.278, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_trivia_pairs_loss": 3.6181681156158447, + "eval_trivia_pairs_runtime": 3.4242, + "eval_trivia_pairs_samples_per_second": 37.381, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_gooaq_pairs_loss": 2.7982828617095947, + "eval_gooaq_pairs_runtime": 0.9365, + "eval_gooaq_pairs_samples_per_second": 136.676, + "eval_gooaq_pairs_steps_per_second": 1.068, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_paws-pos_loss": 0.1660136878490448, + "eval_paws-pos_runtime": 0.6838, + "eval_paws-pos_samples_per_second": 187.193, + "eval_paws-pos_steps_per_second": 1.462, + "step": 220 + }, + { + "epoch": 0.22633744855967078, + "eval_global_dataset_loss": 1.749915361404419, + "eval_global_dataset_runtime": 13.3423, + "eval_global_dataset_samples_per_second": 31.179, + "eval_global_dataset_steps_per_second": 0.3, + "step": 220 + }, + { + "epoch": 0.2273662551440329, + "grad_norm": 20.361539840698242, + "learning_rate": 7.923156801661474e-06, + "loss": 3.9528, + "step": 221 + }, + { + "epoch": 0.22839506172839505, + "grad_norm": 15.792684555053711, + "learning_rate": 7.959501557632398e-06, + "loss": 1.1222, + "step": 222 + }, + { + "epoch": 0.2294238683127572, + "grad_norm": 15.900016784667969, + "learning_rate": 7.995846313603322e-06, + "loss": 1.2289, + "step": 223 + }, + { + "epoch": 0.23045267489711935, + "grad_norm": 14.649103164672852, + "learning_rate": 8.032191069574247e-06, + "loss": 1.193, + "step": 224 + }, + { + "epoch": 0.23148148148148148, + "grad_norm": 24.6876277923584, + "learning_rate": 8.068535825545171e-06, + "loss": 3.8686, + "step": 225 + }, + { + "epoch": 0.23251028806584362, + "grad_norm": 7.946255683898926, + "learning_rate": 8.104880581516094e-06, + "loss": 0.3667, + "step": 226 + }, + { + "epoch": 0.23353909465020575, + "grad_norm": 14.419116020202637, + "learning_rate": 8.14122533748702e-06, + "loss": 2.1571, + "step": 227 + }, + { + "epoch": 0.2345679012345679, + "grad_norm": 13.824968338012695, + "learning_rate": 8.177570093457943e-06, + "loss": 2.1233, + "step": 228 + }, + { + "epoch": 0.23559670781893005, + "grad_norm": 15.247499465942383, + "learning_rate": 8.213914849428867e-06, + "loss": 3.0183, + "step": 229 + }, + { + "epoch": 0.2366255144032922, + "grad_norm": 7.50793981552124, + "learning_rate": 8.250259605399791e-06, + "loss": 0.4344, + "step": 230 + }, + { + "epoch": 0.23765432098765432, + "grad_norm": 13.505939483642578, + "learning_rate": 8.286604361370715e-06, + "loss": 2.6828, + "step": 231 + }, + { + "epoch": 0.23868312757201646, + "grad_norm": 26.35114288330078, + "learning_rate": 8.32294911734164e-06, + "loss": 4.1836, + "step": 232 + }, + { + "epoch": 0.2397119341563786, + "grad_norm": 15.033428192138672, + "learning_rate": 8.359293873312565e-06, + "loss": 1.8063, + "step": 233 + }, + { + "epoch": 0.24074074074074073, + "grad_norm": 15.326811790466309, + "learning_rate": 8.395638629283488e-06, + "loss": 3.4165, + "step": 234 + }, + { + "epoch": 0.2417695473251029, + "grad_norm": 17.31609344482422, + "learning_rate": 8.431983385254412e-06, + "loss": 3.1603, + "step": 235 + }, + { + "epoch": 0.24279835390946503, + "grad_norm": 23.062973022460938, + "learning_rate": 8.468328141225337e-06, + "loss": 2.9251, + "step": 236 + }, + { + "epoch": 0.24382716049382716, + "grad_norm": 15.594389915466309, + "learning_rate": 8.504672897196261e-06, + "loss": 1.726, + "step": 237 + }, + { + "epoch": 0.2448559670781893, + "grad_norm": 10.86409854888916, + "learning_rate": 8.541017653167185e-06, + "loss": 0.7677, + "step": 238 + }, + { + "epoch": 0.24588477366255143, + "grad_norm": 24.200529098510742, + "learning_rate": 8.57736240913811e-06, + "loss": 3.7962, + "step": 239 + }, + { + "epoch": 0.24691358024691357, + "grad_norm": 23.083824157714844, + "learning_rate": 8.613707165109033e-06, + "loss": 3.6562, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_Qnli-dev_cosine_accuracy": 0.677734375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8187533020973206, + "eval_Qnli-dev_cosine_ap": 0.6835720202375161, + "eval_Qnli-dev_cosine_f1": 0.6900958466453675, + "eval_Qnli-dev_cosine_f1_threshold": 0.7529827356338501, + "eval_Qnli-dev_cosine_precision": 0.5538461538461539, + "eval_Qnli-dev_cosine_recall": 0.9152542372881356, + "eval_Qnli-dev_dot_accuracy": 0.62890625, + "eval_Qnli-dev_dot_accuracy_threshold": 405.27545166015625, + "eval_Qnli-dev_dot_ap": 0.5877863096211339, + "eval_Qnli-dev_dot_f1": 0.6722408026755853, + "eval_Qnli-dev_dot_f1_threshold": 367.1414794921875, + "eval_Qnli-dev_dot_precision": 0.5552486187845304, + "eval_Qnli-dev_dot_recall": 0.8516949152542372, + "eval_Qnli-dev_euclidean_accuracy": 0.6796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.904159545898438, + "eval_Qnli-dev_euclidean_ap": 0.6913218676999153, + "eval_Qnli-dev_euclidean_f1": 0.6865148861646235, + "eval_Qnli-dev_euclidean_f1_threshold": 14.621212005615234, + "eval_Qnli-dev_euclidean_precision": 0.5850746268656717, + "eval_Qnli-dev_euclidean_recall": 0.8305084745762712, + "eval_Qnli-dev_manhattan_accuracy": 0.677734375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 254.7897491455078, + "eval_Qnli-dev_manhattan_ap": 0.698230594032758, + "eval_Qnli-dev_manhattan_f1": 0.6965517241379311, + "eval_Qnli-dev_manhattan_f1_threshold": 293.552734375, + "eval_Qnli-dev_manhattan_precision": 0.5872093023255814, + "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, + "eval_Qnli-dev_max_accuracy": 0.6796875, + "eval_Qnli-dev_max_accuracy_threshold": 405.27545166015625, + "eval_Qnli-dev_max_ap": 0.698230594032758, + "eval_Qnli-dev_max_f1": 0.6965517241379311, + "eval_Qnli-dev_max_f1_threshold": 367.1414794921875, + "eval_Qnli-dev_max_precision": 0.5872093023255814, + "eval_Qnli-dev_max_recall": 0.9152542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.6796875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9396699666976929, + "eval_allNLI-dev_cosine_ap": 0.49305382035680395, + "eval_allNLI-dev_cosine_f1": 0.5477477477477477, + "eval_allNLI-dev_cosine_f1_threshold": 0.8165856003761292, + "eval_allNLI-dev_cosine_precision": 0.39790575916230364, + "eval_allNLI-dev_cosine_recall": 0.8786127167630058, + "eval_allNLI-dev_dot_accuracy": 0.66796875, + "eval_allNLI-dev_dot_accuracy_threshold": 519.2574462890625, + "eval_allNLI-dev_dot_ap": 0.41169754525200597, + "eval_allNLI-dev_dot_f1": 0.5198098256735342, + "eval_allNLI-dev_dot_f1_threshold": 369.37896728515625, + "eval_allNLI-dev_dot_precision": 0.35807860262008734, + "eval_allNLI-dev_dot_recall": 0.9479768786127167, + "eval_allNLI-dev_euclidean_accuracy": 0.681640625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 8.951443672180176, + "eval_allNLI-dev_euclidean_ap": 0.4972712047553571, + "eval_allNLI-dev_euclidean_f1": 0.5478424015009381, + "eval_allNLI-dev_euclidean_f1_threshold": 13.30331802368164, + "eval_allNLI-dev_euclidean_precision": 0.40555555555555556, + "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, + "eval_allNLI-dev_manhattan_accuracy": 0.681640625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 168.29837036132812, + "eval_allNLI-dev_manhattan_ap": 0.49849435311963386, + "eval_allNLI-dev_manhattan_f1": 0.555765595463138, + "eval_allNLI-dev_manhattan_f1_threshold": 261.11334228515625, + "eval_allNLI-dev_manhattan_precision": 0.41292134831460675, + "eval_allNLI-dev_manhattan_recall": 0.8497109826589595, + "eval_allNLI-dev_max_accuracy": 0.681640625, + "eval_allNLI-dev_max_accuracy_threshold": 519.2574462890625, + "eval_allNLI-dev_max_ap": 0.49849435311963386, + "eval_allNLI-dev_max_f1": 0.555765595463138, + "eval_allNLI-dev_max_f1_threshold": 369.37896728515625, + "eval_allNLI-dev_max_precision": 0.41292134831460675, + "eval_allNLI-dev_max_recall": 0.9479768786127167, + "eval_sequential_score": 0.698230594032758, + "eval_sts-test_pearson_cosine": 0.5422399822302852, + "eval_sts-test_pearson_dot": 0.47800101935982187, + "eval_sts-test_pearson_euclidean": 0.5410879554786593, + "eval_sts-test_pearson_manhattan": 0.5428179293731825, + "eval_sts-test_pearson_max": 0.5428179293731825, + "eval_sts-test_spearman_cosine": 0.5703833329868931, + "eval_sts-test_spearman_dot": 0.48296505545213714, + "eval_sts-test_spearman_euclidean": 0.5529472570210532, + "eval_sts-test_spearman_manhattan": 0.5558088764307263, + "eval_sts-test_spearman_max": 0.5703833329868931, + "eval_vitaminc-pairs_loss": 2.7294111251831055, + "eval_vitaminc-pairs_runtime": 3.173, + "eval_vitaminc-pairs_samples_per_second": 40.34, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_negation-triplets_loss": 2.5460636615753174, + "eval_negation-triplets_runtime": 0.7253, + "eval_negation-triplets_samples_per_second": 176.49, + "eval_negation-triplets_steps_per_second": 1.379, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_scitail-pairs-pos_loss": 0.45181718468666077, + "eval_scitail-pairs-pos_runtime": 0.7815, + "eval_scitail-pairs-pos_samples_per_second": 163.781, + "eval_scitail-pairs-pos_steps_per_second": 1.28, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_scitail-pairs-qa_loss": 0.11772796511650085, + "eval_scitail-pairs-qa_runtime": 0.5646, + "eval_scitail-pairs-qa_samples_per_second": 226.703, + "eval_scitail-pairs-qa_steps_per_second": 1.771, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_xsum-pairs_loss": 2.541783571243286, + "eval_xsum-pairs_runtime": 3.0187, + "eval_xsum-pairs_samples_per_second": 42.402, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_sciq_pairs_loss": 0.2976074516773224, + "eval_sciq_pairs_runtime": 3.4151, + "eval_sciq_pairs_samples_per_second": 37.48, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_qasc_pairs_loss": 1.538482427597046, + "eval_qasc_pairs_runtime": 0.5897, + "eval_qasc_pairs_samples_per_second": 217.058, + "eval_qasc_pairs_steps_per_second": 1.696, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_openbookqa_pairs_loss": 2.2948145866394043, + "eval_openbookqa_pairs_runtime": 0.5656, + "eval_openbookqa_pairs_samples_per_second": 226.301, + "eval_openbookqa_pairs_steps_per_second": 1.768, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_msmarco_pairs_loss": 3.3644864559173584, + "eval_msmarco_pairs_runtime": 1.5156, + "eval_msmarco_pairs_samples_per_second": 84.457, + "eval_msmarco_pairs_steps_per_second": 0.66, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_nq_pairs_loss": 3.7770235538482666, + "eval_nq_pairs_runtime": 2.9074, + "eval_nq_pairs_samples_per_second": 44.025, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_trivia_pairs_loss": 3.4960672855377197, + "eval_trivia_pairs_runtime": 3.4254, + "eval_trivia_pairs_samples_per_second": 37.368, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_gooaq_pairs_loss": 2.5963170528411865, + "eval_gooaq_pairs_runtime": 0.9352, + "eval_gooaq_pairs_samples_per_second": 136.874, + "eval_gooaq_pairs_steps_per_second": 1.069, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_paws-pos_loss": 0.09364856779575348, + "eval_paws-pos_runtime": 0.6826, + "eval_paws-pos_samples_per_second": 187.531, + "eval_paws-pos_steps_per_second": 1.465, + "step": 240 + }, + { + "epoch": 0.24691358024691357, + "eval_global_dataset_loss": 1.6046679019927979, + "eval_global_dataset_runtime": 13.3573, + "eval_global_dataset_samples_per_second": 31.144, + "eval_global_dataset_steps_per_second": 0.299, + "step": 240 + }, + { + "epoch": 0.24794238683127573, + "grad_norm": 18.60301971435547, + "learning_rate": 8.650051921079957e-06, + "loss": 2.8804, + "step": 241 + }, + { + "epoch": 0.24897119341563786, + "grad_norm": 15.278526306152344, + "learning_rate": 8.686396677050883e-06, + "loss": 1.7607, + "step": 242 + }, + { + "epoch": 0.25, + "grad_norm": 23.43361473083496, + "learning_rate": 8.722741433021805e-06, + "loss": 3.877, + "step": 243 + }, + { + "epoch": 0.25102880658436216, + "grad_norm": 13.10021686553955, + "learning_rate": 8.75908618899273e-06, + "loss": 1.6768, + "step": 244 + }, + { + "epoch": 0.25205761316872427, + "grad_norm": 11.166913032531738, + "learning_rate": 8.795430944963655e-06, + "loss": 0.8368, + "step": 245 + }, + { + "epoch": 0.25308641975308643, + "grad_norm": 12.863570213317871, + "learning_rate": 8.831775700934577e-06, + "loss": 1.6864, + "step": 246 + }, + { + "epoch": 0.25411522633744854, + "grad_norm": 12.436037063598633, + "learning_rate": 8.868120456905503e-06, + "loss": 1.3478, + "step": 247 + }, + { + "epoch": 0.2551440329218107, + "grad_norm": 13.372901916503906, + "learning_rate": 8.904465212876427e-06, + "loss": 1.7764, + "step": 248 + }, + { + "epoch": 0.25617283950617287, + "grad_norm": 14.600456237792969, + "learning_rate": 8.94080996884735e-06, + "loss": 1.8495, + "step": 249 + }, + { + "epoch": 0.257201646090535, + "grad_norm": 5.363873481750488, + "learning_rate": 8.977154724818275e-06, + "loss": 0.285, + "step": 250 + }, + { + "epoch": 0.25823045267489714, + "grad_norm": 12.498610496520996, + "learning_rate": 9.0134994807892e-06, + "loss": 1.6516, + "step": 251 + }, + { + "epoch": 0.25925925925925924, + "grad_norm": 13.058953285217285, + "learning_rate": 9.049844236760123e-06, + "loss": 1.5781, + "step": 252 + }, + { + "epoch": 0.2602880658436214, + "grad_norm": 27.41628074645996, + "learning_rate": 9.086188992731047e-06, + "loss": 4.791, + "step": 253 + }, + { + "epoch": 0.2613168724279835, + "grad_norm": 21.457780838012695, + "learning_rate": 9.122533748701973e-06, + "loss": 3.7502, + "step": 254 + }, + { + "epoch": 0.2623456790123457, + "grad_norm": 13.806361198425293, + "learning_rate": 9.158878504672895e-06, + "loss": 2.6088, + "step": 255 + }, + { + "epoch": 0.26337448559670784, + "grad_norm": 20.073028564453125, + "learning_rate": 9.195223260643821e-06, + "loss": 3.2271, + "step": 256 + }, + { + "epoch": 0.26440329218106995, + "grad_norm": 10.656987190246582, + "learning_rate": 9.231568016614745e-06, + "loss": 1.3969, + "step": 257 + }, + { + "epoch": 0.2654320987654321, + "grad_norm": 15.233261108398438, + "learning_rate": 9.267912772585667e-06, + "loss": 2.5281, + "step": 258 + }, + { + "epoch": 0.2664609053497942, + "grad_norm": 17.79701805114746, + "learning_rate": 9.304257528556593e-06, + "loss": 2.8561, + "step": 259 + }, + { + "epoch": 0.2674897119341564, + "grad_norm": 21.97925567626953, + "learning_rate": 9.340602284527517e-06, + "loss": 3.9495, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_Qnli-dev_cosine_accuracy": 0.689453125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8466764092445374, + "eval_Qnli-dev_cosine_ap": 0.6871140391818324, + "eval_Qnli-dev_cosine_f1": 0.6923076923076923, + "eval_Qnli-dev_cosine_f1_threshold": 0.7720080018043518, + "eval_Qnli-dev_cosine_precision": 0.5567010309278351, + "eval_Qnli-dev_cosine_recall": 0.9152542372881356, + "eval_Qnli-dev_dot_accuracy": 0.62890625, + "eval_Qnli-dev_dot_accuracy_threshold": 437.21807861328125, + "eval_Qnli-dev_dot_ap": 0.5853275156115014, + "eval_Qnli-dev_dot_f1": 0.6721581548599671, + "eval_Qnli-dev_dot_f1_threshold": 389.718017578125, + "eval_Qnli-dev_dot_precision": 0.5498652291105122, + "eval_Qnli-dev_dot_recall": 0.864406779661017, + "eval_Qnli-dev_euclidean_accuracy": 0.681640625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.65598201751709, + "eval_Qnli-dev_euclidean_ap": 0.6939343189181921, + "eval_Qnli-dev_euclidean_f1": 0.692436974789916, + "eval_Qnli-dev_euclidean_f1_threshold": 14.59019660949707, + "eval_Qnli-dev_euclidean_precision": 0.5738161559888579, + "eval_Qnli-dev_euclidean_recall": 0.8728813559322034, + "eval_Qnli-dev_manhattan_accuracy": 0.6796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 252.4490966796875, + "eval_Qnli-dev_manhattan_ap": 0.6988409881438541, + "eval_Qnli-dev_manhattan_f1": 0.6989966555183945, + "eval_Qnli-dev_manhattan_f1_threshold": 291.64801025390625, + "eval_Qnli-dev_manhattan_precision": 0.5773480662983426, + "eval_Qnli-dev_manhattan_recall": 0.885593220338983, + "eval_Qnli-dev_max_accuracy": 0.689453125, + "eval_Qnli-dev_max_accuracy_threshold": 437.21807861328125, + "eval_Qnli-dev_max_ap": 0.6988409881438541, + "eval_Qnli-dev_max_f1": 0.6989966555183945, + "eval_Qnli-dev_max_f1_threshold": 389.718017578125, + "eval_Qnli-dev_max_precision": 0.5773480662983426, + "eval_Qnli-dev_max_recall": 0.9152542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.6875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9255372285842896, + "eval_allNLI-dev_cosine_ap": 0.5019030389232606, + "eval_allNLI-dev_cosine_f1": 0.5451263537906137, + "eval_allNLI-dev_cosine_f1_threshold": 0.8344321250915527, + "eval_allNLI-dev_cosine_precision": 0.3963254593175853, + "eval_allNLI-dev_cosine_recall": 0.8728323699421965, + "eval_allNLI-dev_dot_accuracy": 0.666015625, + "eval_allNLI-dev_dot_accuracy_threshold": 537.950439453125, + "eval_allNLI-dev_dot_ap": 0.4104683717008714, + "eval_allNLI-dev_dot_f1": 0.5224111282843895, + "eval_allNLI-dev_dot_f1_threshold": 388.04473876953125, + "eval_allNLI-dev_dot_precision": 0.35654008438818563, + "eval_allNLI-dev_dot_recall": 0.976878612716763, + "eval_allNLI-dev_euclidean_accuracy": 0.68359375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 8.841395378112793, + "eval_allNLI-dev_euclidean_ap": 0.5047465503781696, + "eval_allNLI-dev_euclidean_f1": 0.5511811023622047, + "eval_allNLI-dev_euclidean_f1_threshold": 12.596972465515137, + "eval_allNLI-dev_euclidean_precision": 0.417910447761194, + "eval_allNLI-dev_euclidean_recall": 0.8092485549132948, + "eval_allNLI-dev_manhattan_accuracy": 0.689453125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 179.2438201904297, + "eval_allNLI-dev_manhattan_ap": 0.5057091077225248, + "eval_allNLI-dev_manhattan_f1": 0.5523012552301255, + "eval_allNLI-dev_manhattan_f1_threshold": 238.46624755859375, + "eval_allNLI-dev_manhattan_precision": 0.43278688524590164, + "eval_allNLI-dev_manhattan_recall": 0.7630057803468208, + "eval_allNLI-dev_max_accuracy": 0.689453125, + "eval_allNLI-dev_max_accuracy_threshold": 537.950439453125, + "eval_allNLI-dev_max_ap": 0.5057091077225248, + "eval_allNLI-dev_max_f1": 0.5523012552301255, + "eval_allNLI-dev_max_f1_threshold": 388.04473876953125, + "eval_allNLI-dev_max_precision": 0.43278688524590164, + "eval_allNLI-dev_max_recall": 0.976878612716763, + "eval_sequential_score": 0.6988409881438541, + "eval_sts-test_pearson_cosine": 0.6101007482373539, + "eval_sts-test_pearson_dot": 0.5171078610309542, + "eval_sts-test_pearson_euclidean": 0.6100203378514052, + "eval_sts-test_pearson_manhattan": 0.6071628090659706, + "eval_sts-test_pearson_max": 0.6101007482373539, + "eval_sts-test_spearman_cosine": 0.628839936686977, + "eval_sts-test_spearman_dot": 0.5099761113052808, + "eval_sts-test_spearman_euclidean": 0.6134216055947527, + "eval_sts-test_spearman_manhattan": 0.6119040008184474, + "eval_sts-test_spearman_max": 0.628839936686977, + "eval_vitaminc-pairs_loss": 2.755634069442749, + "eval_vitaminc-pairs_runtime": 3.1645, + "eval_vitaminc-pairs_samples_per_second": 40.448, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_negation-triplets_loss": 2.3755366802215576, + "eval_negation-triplets_runtime": 0.7383, + "eval_negation-triplets_samples_per_second": 173.38, + "eval_negation-triplets_steps_per_second": 1.355, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_scitail-pairs-pos_loss": 0.38322263956069946, + "eval_scitail-pairs-pos_runtime": 0.7914, + "eval_scitail-pairs-pos_samples_per_second": 161.747, + "eval_scitail-pairs-pos_steps_per_second": 1.264, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_scitail-pairs-qa_loss": 0.10294085741043091, + "eval_scitail-pairs-qa_runtime": 0.5772, + "eval_scitail-pairs-qa_samples_per_second": 221.746, + "eval_scitail-pairs-qa_steps_per_second": 1.732, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_xsum-pairs_loss": 2.2755026817321777, + "eval_xsum-pairs_runtime": 3.0186, + "eval_xsum-pairs_samples_per_second": 42.403, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_sciq_pairs_loss": 0.24845057725906372, + "eval_sciq_pairs_runtime": 3.4052, + "eval_sciq_pairs_samples_per_second": 37.59, + "eval_sciq_pairs_steps_per_second": 0.294, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_qasc_pairs_loss": 1.4006080627441406, + "eval_qasc_pairs_runtime": 0.5915, + "eval_qasc_pairs_samples_per_second": 216.401, + "eval_qasc_pairs_steps_per_second": 1.691, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_openbookqa_pairs_loss": 2.0268588066101074, + "eval_openbookqa_pairs_runtime": 0.5688, + "eval_openbookqa_pairs_samples_per_second": 225.017, + "eval_openbookqa_pairs_steps_per_second": 1.758, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_msmarco_pairs_loss": 2.9229013919830322, + "eval_msmarco_pairs_runtime": 1.5117, + "eval_msmarco_pairs_samples_per_second": 84.673, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_nq_pairs_loss": 3.4232370853424072, + "eval_nq_pairs_runtime": 2.8888, + "eval_nq_pairs_samples_per_second": 44.309, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_trivia_pairs_loss": 3.135023832321167, + "eval_trivia_pairs_runtime": 3.4305, + "eval_trivia_pairs_samples_per_second": 37.312, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_gooaq_pairs_loss": 2.3922266960144043, + "eval_gooaq_pairs_runtime": 0.9351, + "eval_gooaq_pairs_samples_per_second": 136.888, + "eval_gooaq_pairs_steps_per_second": 1.069, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_paws-pos_loss": 0.08843281120061874, + "eval_paws-pos_runtime": 0.6715, + "eval_paws-pos_samples_per_second": 190.608, + "eval_paws-pos_steps_per_second": 1.489, + "step": 260 + }, + { + "epoch": 0.2674897119341564, + "eval_global_dataset_loss": 1.4206469058990479, + "eval_global_dataset_runtime": 13.3286, + "eval_global_dataset_samples_per_second": 31.211, + "eval_global_dataset_steps_per_second": 0.3, + "step": 260 + }, + { + "epoch": 0.26851851851851855, + "grad_norm": 12.555780410766602, + "learning_rate": 9.376947040498441e-06, + "loss": 1.7349, + "step": 261 + }, + { + "epoch": 0.26954732510288065, + "grad_norm": 13.038395881652832, + "learning_rate": 9.413291796469365e-06, + "loss": 1.7189, + "step": 262 + }, + { + "epoch": 0.2705761316872428, + "grad_norm": 13.202376365661621, + "learning_rate": 9.44963655244029e-06, + "loss": 1.7282, + "step": 263 + }, + { + "epoch": 0.2716049382716049, + "grad_norm": 17.815078735351562, + "learning_rate": 9.485981308411213e-06, + "loss": 2.9146, + "step": 264 + }, + { + "epoch": 0.2726337448559671, + "grad_norm": 13.262603759765625, + "learning_rate": 9.522326064382139e-06, + "loss": 1.7603, + "step": 265 + }, + { + "epoch": 0.2736625514403292, + "grad_norm": 12.508451461791992, + "learning_rate": 9.558670820353063e-06, + "loss": 1.823, + "step": 266 + }, + { + "epoch": 0.27469135802469136, + "grad_norm": 12.313492774963379, + "learning_rate": 9.595015576323985e-06, + "loss": 1.5984, + "step": 267 + }, + { + "epoch": 0.2757201646090535, + "grad_norm": 12.14000415802002, + "learning_rate": 9.631360332294911e-06, + "loss": 1.651, + "step": 268 + }, + { + "epoch": 0.2767489711934156, + "grad_norm": 14.698229789733887, + "learning_rate": 9.667705088265835e-06, + "loss": 2.615, + "step": 269 + }, + { + "epoch": 0.2777777777777778, + "grad_norm": 12.209722518920898, + "learning_rate": 9.704049844236759e-06, + "loss": 2.6608, + "step": 270 + }, + { + "epoch": 0.2788065843621399, + "grad_norm": 13.545384407043457, + "learning_rate": 9.740394600207683e-06, + "loss": 2.2558, + "step": 271 + }, + { + "epoch": 0.27983539094650206, + "grad_norm": 11.335700988769531, + "learning_rate": 9.776739356178609e-06, + "loss": 1.2155, + "step": 272 + }, + { + "epoch": 0.2808641975308642, + "grad_norm": 11.750332832336426, + "learning_rate": 9.813084112149531e-06, + "loss": 1.4727, + "step": 273 + }, + { + "epoch": 0.28189300411522633, + "grad_norm": 20.200624465942383, + "learning_rate": 9.849428868120455e-06, + "loss": 3.6394, + "step": 274 + }, + { + "epoch": 0.2829218106995885, + "grad_norm": 11.674026489257812, + "learning_rate": 9.88577362409138e-06, + "loss": 2.1385, + "step": 275 + }, + { + "epoch": 0.2839506172839506, + "grad_norm": 16.86899185180664, + "learning_rate": 9.922118380062303e-06, + "loss": 2.3953, + "step": 276 + }, + { + "epoch": 0.28497942386831276, + "grad_norm": 11.407617568969727, + "learning_rate": 9.958463136033229e-06, + "loss": 1.488, + "step": 277 + }, + { + "epoch": 0.28600823045267487, + "grad_norm": 11.607297897338867, + "learning_rate": 9.994807892004153e-06, + "loss": 1.2893, + "step": 278 + }, + { + "epoch": 0.28703703703703703, + "grad_norm": 11.757554054260254, + "learning_rate": 1.0031152647975077e-05, + "loss": 0.7678, + "step": 279 + }, + { + "epoch": 0.2880658436213992, + "grad_norm": 12.077320098876953, + "learning_rate": 1.0067497403946001e-05, + "loss": 2.6801, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_Qnli-dev_cosine_accuracy": 0.67578125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8415871262550354, + "eval_Qnli-dev_cosine_ap": 0.6912671058237406, + "eval_Qnli-dev_cosine_f1": 0.6869983948635633, + "eval_Qnli-dev_cosine_f1_threshold": 0.7735732197761536, + "eval_Qnli-dev_cosine_precision": 0.5529715762273901, + "eval_Qnli-dev_cosine_recall": 0.9067796610169492, + "eval_Qnli-dev_dot_accuracy": 0.62109375, + "eval_Qnli-dev_dot_accuracy_threshold": 432.6428527832031, + "eval_Qnli-dev_dot_ap": 0.5797796012757845, + "eval_Qnli-dev_dot_f1": 0.6719492868462758, + "eval_Qnli-dev_dot_f1_threshold": 393.2371826171875, + "eval_Qnli-dev_dot_precision": 0.5367088607594936, + "eval_Qnli-dev_dot_recall": 0.8983050847457628, + "eval_Qnli-dev_euclidean_accuracy": 0.67578125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 11.884414672851562, + "eval_Qnli-dev_euclidean_ap": 0.6992665036179804, + "eval_Qnli-dev_euclidean_f1": 0.6915584415584415, + "eval_Qnli-dev_euclidean_f1_threshold": 15.279256820678711, + "eval_Qnli-dev_euclidean_precision": 0.5605263157894737, + "eval_Qnli-dev_euclidean_recall": 0.902542372881356, + "eval_Qnli-dev_manhattan_accuracy": 0.67578125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 254.77352905273438, + "eval_Qnli-dev_manhattan_ap": 0.7020052347678023, + "eval_Qnli-dev_manhattan_f1": 0.6893039049235994, + "eval_Qnli-dev_manhattan_f1_threshold": 293.3916931152344, + "eval_Qnli-dev_manhattan_precision": 0.5750708215297451, + "eval_Qnli-dev_manhattan_recall": 0.8601694915254238, + "eval_Qnli-dev_max_accuracy": 0.67578125, + "eval_Qnli-dev_max_accuracy_threshold": 432.6428527832031, + "eval_Qnli-dev_max_ap": 0.7020052347678023, + "eval_Qnli-dev_max_f1": 0.6915584415584415, + "eval_Qnli-dev_max_f1_threshold": 393.2371826171875, + "eval_Qnli-dev_max_precision": 0.5750708215297451, + "eval_Qnli-dev_max_recall": 0.9067796610169492, + "eval_allNLI-dev_cosine_accuracy": 0.703125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.923446536064148, + "eval_allNLI-dev_cosine_ap": 0.5118589589722005, + "eval_allNLI-dev_cosine_f1": 0.5532786885245901, + "eval_allNLI-dev_cosine_f1_threshold": 0.849584698677063, + "eval_allNLI-dev_cosine_precision": 0.42857142857142855, + "eval_allNLI-dev_cosine_recall": 0.7803468208092486, + "eval_allNLI-dev_dot_accuracy": 0.6640625, + "eval_allNLI-dev_dot_accuracy_threshold": 588.6370239257812, + "eval_allNLI-dev_dot_ap": 0.4159723261021614, + "eval_allNLI-dev_dot_f1": 0.5263157894736842, + "eval_allNLI-dev_dot_f1_threshold": 431.5047607421875, + "eval_allNLI-dev_dot_precision": 0.3835978835978836, + "eval_allNLI-dev_dot_recall": 0.838150289017341, + "eval_allNLI-dev_euclidean_accuracy": 0.705078125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.087307929992676, + "eval_allNLI-dev_euclidean_ap": 0.5172051717681727, + "eval_allNLI-dev_euclidean_f1": 0.5603112840466926, + "eval_allNLI-dev_euclidean_f1_threshold": 13.293811798095703, + "eval_allNLI-dev_euclidean_precision": 0.4222873900293255, + "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, + "eval_allNLI-dev_manhattan_accuracy": 0.701171875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 185.59756469726562, + "eval_allNLI-dev_manhattan_ap": 0.5124717600689392, + "eval_allNLI-dev_manhattan_f1": 0.5559999999999999, + "eval_allNLI-dev_manhattan_f1_threshold": 259.2859802246094, + "eval_allNLI-dev_manhattan_precision": 0.42507645259938837, + "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, + "eval_allNLI-dev_max_accuracy": 0.705078125, + "eval_allNLI-dev_max_accuracy_threshold": 588.6370239257812, + "eval_allNLI-dev_max_ap": 0.5172051717681727, + "eval_allNLI-dev_max_f1": 0.5603112840466926, + "eval_allNLI-dev_max_f1_threshold": 431.5047607421875, + "eval_allNLI-dev_max_precision": 0.42857142857142855, + "eval_allNLI-dev_max_recall": 0.838150289017341, + "eval_sequential_score": 0.7020052347678023, + "eval_sts-test_pearson_cosine": 0.6687245612921815, + "eval_sts-test_pearson_dot": 0.5734739396427984, + "eval_sts-test_pearson_euclidean": 0.6718719757622864, + "eval_sts-test_pearson_manhattan": 0.6670343168111552, + "eval_sts-test_pearson_max": 0.6718719757622864, + "eval_sts-test_spearman_cosine": 0.6819400829060788, + "eval_sts-test_spearman_dot": 0.556021400171074, + "eval_sts-test_spearman_euclidean": 0.6684485621243225, + "eval_sts-test_spearman_manhattan": 0.665065859027008, + "eval_sts-test_spearman_max": 0.6819400829060788, + "eval_vitaminc-pairs_loss": 2.8099753856658936, + "eval_vitaminc-pairs_runtime": 3.1872, + "eval_vitaminc-pairs_samples_per_second": 40.16, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_negation-triplets_loss": 2.1310224533081055, + "eval_negation-triplets_runtime": 0.735, + "eval_negation-triplets_samples_per_second": 174.146, + "eval_negation-triplets_steps_per_second": 1.361, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_scitail-pairs-pos_loss": 0.33672308921813965, + "eval_scitail-pairs-pos_runtime": 0.7788, + "eval_scitail-pairs-pos_samples_per_second": 164.351, + "eval_scitail-pairs-pos_steps_per_second": 1.284, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_scitail-pairs-qa_loss": 0.10085483640432358, + "eval_scitail-pairs-qa_runtime": 0.5632, + "eval_scitail-pairs-qa_samples_per_second": 227.27, + "eval_scitail-pairs-qa_steps_per_second": 1.776, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_xsum-pairs_loss": 1.8792424201965332, + "eval_xsum-pairs_runtime": 3.0158, + "eval_xsum-pairs_samples_per_second": 42.443, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_sciq_pairs_loss": 0.21897011995315552, + "eval_sciq_pairs_runtime": 3.4085, + "eval_sciq_pairs_samples_per_second": 37.553, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_qasc_pairs_loss": 1.3423388004302979, + "eval_qasc_pairs_runtime": 0.5957, + "eval_qasc_pairs_samples_per_second": 214.885, + "eval_qasc_pairs_steps_per_second": 1.679, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_openbookqa_pairs_loss": 1.8888919353485107, + "eval_openbookqa_pairs_runtime": 0.5744, + "eval_openbookqa_pairs_samples_per_second": 222.832, + "eval_openbookqa_pairs_steps_per_second": 1.741, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_msmarco_pairs_loss": 2.50892972946167, + "eval_msmarco_pairs_runtime": 1.5144, + "eval_msmarco_pairs_samples_per_second": 84.524, + "eval_msmarco_pairs_steps_per_second": 0.66, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_nq_pairs_loss": 3.0089173316955566, + "eval_nq_pairs_runtime": 2.9076, + "eval_nq_pairs_samples_per_second": 44.022, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_trivia_pairs_loss": 2.672011137008667, + "eval_trivia_pairs_runtime": 3.4362, + "eval_trivia_pairs_samples_per_second": 37.25, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_gooaq_pairs_loss": 2.007201671600342, + "eval_gooaq_pairs_runtime": 0.9479, + "eval_gooaq_pairs_samples_per_second": 135.03, + "eval_gooaq_pairs_steps_per_second": 1.055, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_paws-pos_loss": 0.0831906795501709, + "eval_paws-pos_runtime": 0.6861, + "eval_paws-pos_samples_per_second": 186.568, + "eval_paws-pos_steps_per_second": 1.458, + "step": 280 + }, + { + "epoch": 0.2880658436213992, + "eval_global_dataset_loss": 1.250847578048706, + "eval_global_dataset_runtime": 13.3637, + "eval_global_dataset_samples_per_second": 31.129, + "eval_global_dataset_steps_per_second": 0.299, + "step": 280 + }, + { + "epoch": 0.2890946502057613, + "grad_norm": 11.982275009155273, + "learning_rate": 1.0103842159916927e-05, + "loss": 1.4147, + "step": 281 + }, + { + "epoch": 0.29012345679012347, + "grad_norm": 4.2981367111206055, + "learning_rate": 1.0140186915887849e-05, + "loss": 0.2152, + "step": 282 + }, + { + "epoch": 0.2911522633744856, + "grad_norm": 11.808545112609863, + "learning_rate": 1.0176531671858773e-05, + "loss": 1.3908, + "step": 283 + }, + { + "epoch": 0.29218106995884774, + "grad_norm": 17.394630432128906, + "learning_rate": 1.0212876427829699e-05, + "loss": 2.6877, + "step": 284 + }, + { + "epoch": 0.2932098765432099, + "grad_norm": 12.021160125732422, + "learning_rate": 1.0249221183800621e-05, + "loss": 1.1107, + "step": 285 + }, + { + "epoch": 0.294238683127572, + "grad_norm": 21.536834716796875, + "learning_rate": 1.0285565939771547e-05, + "loss": 3.3415, + "step": 286 + }, + { + "epoch": 0.2952674897119342, + "grad_norm": 13.677733421325684, + "learning_rate": 1.032191069574247e-05, + "loss": 1.6332, + "step": 287 + }, + { + "epoch": 0.2962962962962963, + "grad_norm": 12.351741790771484, + "learning_rate": 1.0358255451713395e-05, + "loss": 2.5232, + "step": 288 + }, + { + "epoch": 0.29732510288065844, + "grad_norm": 23.78185272216797, + "learning_rate": 1.0394600207684319e-05, + "loss": 4.1329, + "step": 289 + }, + { + "epoch": 0.29835390946502055, + "grad_norm": 13.199095726013184, + "learning_rate": 1.0430944963655244e-05, + "loss": 1.3592, + "step": 290 + }, + { + "epoch": 0.2993827160493827, + "grad_norm": 3.1243560314178467, + "learning_rate": 1.0467289719626167e-05, + "loss": 0.1327, + "step": 291 + }, + { + "epoch": 0.3004115226337449, + "grad_norm": 14.93930721282959, + "learning_rate": 1.050363447559709e-05, + "loss": 2.2552, + "step": 292 + }, + { + "epoch": 0.301440329218107, + "grad_norm": 13.743828773498535, + "learning_rate": 1.0539979231568017e-05, + "loss": 1.5245, + "step": 293 + }, + { + "epoch": 0.30246913580246915, + "grad_norm": 14.517548561096191, + "learning_rate": 1.0576323987538939e-05, + "loss": 2.9299, + "step": 294 + }, + { + "epoch": 0.30349794238683125, + "grad_norm": 12.055514335632324, + "learning_rate": 1.0612668743509865e-05, + "loss": 0.6583, + "step": 295 + }, + { + "epoch": 0.3045267489711934, + "grad_norm": 15.698110580444336, + "learning_rate": 1.0649013499480789e-05, + "loss": 2.0637, + "step": 296 + }, + { + "epoch": 0.3055555555555556, + "grad_norm": 19.17127227783203, + "learning_rate": 1.0685358255451711e-05, + "loss": 2.6771, + "step": 297 + }, + { + "epoch": 0.3065843621399177, + "grad_norm": 13.811963081359863, + "learning_rate": 1.0721703011422637e-05, + "loss": 2.755, + "step": 298 + }, + { + "epoch": 0.30761316872427985, + "grad_norm": 24.679113388061523, + "learning_rate": 1.075804776739356e-05, + "loss": 3.3638, + "step": 299 + }, + { + "epoch": 0.30864197530864196, + "grad_norm": 10.830105781555176, + "learning_rate": 1.0794392523364485e-05, + "loss": 0.7053, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_Qnli-dev_cosine_accuracy": 0.66796875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8199188113212585, + "eval_Qnli-dev_cosine_ap": 0.6995325123375109, + "eval_Qnli-dev_cosine_f1": 0.6897689768976898, + "eval_Qnli-dev_cosine_f1_threshold": 0.7571749091148376, + "eval_Qnli-dev_cosine_precision": 0.5648648648648649, + "eval_Qnli-dev_cosine_recall": 0.885593220338983, + "eval_Qnli-dev_dot_accuracy": 0.62890625, + "eval_Qnli-dev_dot_accuracy_threshold": 423.8924255371094, + "eval_Qnli-dev_dot_ap": 0.5832842248431986, + "eval_Qnli-dev_dot_f1": 0.6645865834633385, + "eval_Qnli-dev_dot_f1_threshold": 364.68280029296875, + "eval_Qnli-dev_dot_precision": 0.5259259259259259, + "eval_Qnli-dev_dot_recall": 0.902542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.689453125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.101522445678711, + "eval_Qnli-dev_euclidean_ap": 0.7073708901870908, + "eval_Qnli-dev_euclidean_f1": 0.6832061068702291, + "eval_Qnli-dev_euclidean_f1_threshold": 14.215328216552734, + "eval_Qnli-dev_euclidean_precision": 0.6215277777777778, + "eval_Qnli-dev_euclidean_recall": 0.7584745762711864, + "eval_Qnli-dev_manhattan_accuracy": 0.689453125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 253.76065063476562, + "eval_Qnli-dev_manhattan_ap": 0.709921595260574, + "eval_Qnli-dev_manhattan_f1": 0.6918032786885246, + "eval_Qnli-dev_manhattan_f1_threshold": 316.2789306640625, + "eval_Qnli-dev_manhattan_precision": 0.5641711229946524, + "eval_Qnli-dev_manhattan_recall": 0.8940677966101694, + "eval_Qnli-dev_max_accuracy": 0.689453125, + "eval_Qnli-dev_max_accuracy_threshold": 423.8924255371094, + "eval_Qnli-dev_max_ap": 0.709921595260574, + "eval_Qnli-dev_max_f1": 0.6918032786885246, + "eval_Qnli-dev_max_f1_threshold": 364.68280029296875, + "eval_Qnli-dev_max_precision": 0.6215277777777778, + "eval_Qnli-dev_max_recall": 0.902542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.701171875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9037463068962097, + "eval_allNLI-dev_cosine_ap": 0.5128913377360289, + "eval_allNLI-dev_cosine_f1": 0.5613305613305613, + "eval_allNLI-dev_cosine_f1_threshold": 0.8335003852844238, + "eval_allNLI-dev_cosine_precision": 0.4383116883116883, + "eval_allNLI-dev_cosine_recall": 0.7803468208092486, + "eval_allNLI-dev_dot_accuracy": 0.6640625, + "eval_allNLI-dev_dot_accuracy_threshold": 571.2269897460938, + "eval_allNLI-dev_dot_ap": 0.41426010345468745, + "eval_allNLI-dev_dot_f1": 0.5328330206378986, + "eval_allNLI-dev_dot_f1_threshold": 416.7803955078125, + "eval_allNLI-dev_dot_precision": 0.39444444444444443, + "eval_allNLI-dev_dot_recall": 0.8208092485549133, + "eval_allNLI-dev_euclidean_accuracy": 0.70703125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.502985954284668, + "eval_allNLI-dev_euclidean_ap": 0.5173963035988031, + "eval_allNLI-dev_euclidean_f1": 0.563600782778865, + "eval_allNLI-dev_euclidean_f1_threshold": 13.729564666748047, + "eval_allNLI-dev_euclidean_precision": 0.4260355029585799, + "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, + "eval_allNLI-dev_manhattan_accuracy": 0.703125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 195.28097534179688, + "eval_allNLI-dev_manhattan_ap": 0.5158767394143106, + "eval_allNLI-dev_manhattan_f1": 0.5584158415841585, + "eval_allNLI-dev_manhattan_f1_threshold": 274.1793212890625, + "eval_allNLI-dev_manhattan_precision": 0.4246987951807229, + "eval_allNLI-dev_manhattan_recall": 0.815028901734104, + "eval_allNLI-dev_max_accuracy": 0.70703125, + "eval_allNLI-dev_max_accuracy_threshold": 571.2269897460938, + "eval_allNLI-dev_max_ap": 0.5173963035988031, + "eval_allNLI-dev_max_f1": 0.563600782778865, + "eval_allNLI-dev_max_f1_threshold": 416.7803955078125, + "eval_allNLI-dev_max_precision": 0.4383116883116883, + "eval_allNLI-dev_max_recall": 0.8323699421965318, + "eval_sequential_score": 0.709921595260574, + "eval_sts-test_pearson_cosine": 0.6984052089046398, + "eval_sts-test_pearson_dot": 0.6135443185288071, + "eval_sts-test_pearson_euclidean": 0.7020042331376551, + "eval_sts-test_pearson_manhattan": 0.6988284227220897, + "eval_sts-test_pearson_max": 0.7020042331376551, + "eval_sts-test_spearman_cosine": 0.7071684275104347, + "eval_sts-test_spearman_dot": 0.5928461838910447, + "eval_sts-test_spearman_euclidean": 0.6956956808813058, + "eval_sts-test_spearman_manhattan": 0.6929552031362194, + "eval_sts-test_spearman_max": 0.7071684275104347, + "eval_vitaminc-pairs_loss": 2.8123340606689453, + "eval_vitaminc-pairs_runtime": 3.2467, + "eval_vitaminc-pairs_samples_per_second": 39.424, + "eval_vitaminc-pairs_steps_per_second": 0.308, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_negation-triplets_loss": 1.9351751804351807, + "eval_negation-triplets_runtime": 0.7607, + "eval_negation-triplets_samples_per_second": 168.261, + "eval_negation-triplets_steps_per_second": 1.315, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_scitail-pairs-pos_loss": 0.314525842666626, + "eval_scitail-pairs-pos_runtime": 0.834, + "eval_scitail-pairs-pos_samples_per_second": 153.478, + "eval_scitail-pairs-pos_steps_per_second": 1.199, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_scitail-pairs-qa_loss": 0.07327353954315186, + "eval_scitail-pairs-qa_runtime": 0.645, + "eval_scitail-pairs-qa_samples_per_second": 198.438, + "eval_scitail-pairs-qa_steps_per_second": 1.55, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_xsum-pairs_loss": 1.707858920097351, + "eval_xsum-pairs_runtime": 3.0454, + "eval_xsum-pairs_samples_per_second": 42.031, + "eval_xsum-pairs_steps_per_second": 0.328, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_sciq_pairs_loss": 0.19993656873703003, + "eval_sciq_pairs_runtime": 3.4559, + "eval_sciq_pairs_samples_per_second": 37.038, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_qasc_pairs_loss": 1.3199025392532349, + "eval_qasc_pairs_runtime": 0.5957, + "eval_qasc_pairs_samples_per_second": 214.886, + "eval_qasc_pairs_steps_per_second": 1.679, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_openbookqa_pairs_loss": 1.777836799621582, + "eval_openbookqa_pairs_runtime": 0.5685, + "eval_openbookqa_pairs_samples_per_second": 225.137, + "eval_openbookqa_pairs_steps_per_second": 1.759, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_msmarco_pairs_loss": 2.3514328002929688, + "eval_msmarco_pairs_runtime": 1.5104, + "eval_msmarco_pairs_samples_per_second": 84.746, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_nq_pairs_loss": 2.9204494953155518, + "eval_nq_pairs_runtime": 2.8927, + "eval_nq_pairs_samples_per_second": 44.249, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_trivia_pairs_loss": 2.5231525897979736, + "eval_trivia_pairs_runtime": 3.4302, + "eval_trivia_pairs_samples_per_second": 37.315, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_gooaq_pairs_loss": 1.7379323244094849, + "eval_gooaq_pairs_runtime": 0.956, + "eval_gooaq_pairs_samples_per_second": 133.895, + "eval_gooaq_pairs_steps_per_second": 1.046, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_paws-pos_loss": 0.06269291788339615, + "eval_paws-pos_runtime": 0.6964, + "eval_paws-pos_samples_per_second": 183.815, + "eval_paws-pos_steps_per_second": 1.436, + "step": 300 + }, + { + "epoch": 0.30864197530864196, + "eval_global_dataset_loss": 1.1462713479995728, + "eval_global_dataset_runtime": 13.4482, + "eval_global_dataset_samples_per_second": 30.933, + "eval_global_dataset_steps_per_second": 0.297, + "step": 300 + }, + { + "epoch": 0.3096707818930041, + "grad_norm": 22.287065505981445, + "learning_rate": 1.0830737279335409e-05, + "loss": 3.1007, + "step": 301 + }, + { + "epoch": 0.31069958847736623, + "grad_norm": 11.660481452941895, + "learning_rate": 1.0867082035306334e-05, + "loss": 1.0158, + "step": 302 + }, + { + "epoch": 0.3117283950617284, + "grad_norm": 21.95073699951172, + "learning_rate": 1.0903426791277257e-05, + "loss": 2.8023, + "step": 303 + }, + { + "epoch": 0.31275720164609055, + "grad_norm": 22.914106369018555, + "learning_rate": 1.0939771547248182e-05, + "loss": 3.0464, + "step": 304 + }, + { + "epoch": 0.31378600823045266, + "grad_norm": 11.899238586425781, + "learning_rate": 1.0976116303219106e-05, + "loss": 1.1718, + "step": 305 + }, + { + "epoch": 0.3148148148148148, + "grad_norm": 11.318571090698242, + "learning_rate": 1.1012461059190029e-05, + "loss": 1.0429, + "step": 306 + }, + { + "epoch": 0.31584362139917693, + "grad_norm": 13.055448532104492, + "learning_rate": 1.1048805815160954e-05, + "loss": 1.3403, + "step": 307 + }, + { + "epoch": 0.3168724279835391, + "grad_norm": 17.78560447692871, + "learning_rate": 1.1085150571131879e-05, + "loss": 2.5188, + "step": 308 + }, + { + "epoch": 0.31790123456790126, + "grad_norm": 13.747932434082031, + "learning_rate": 1.1121495327102803e-05, + "loss": 1.3855, + "step": 309 + }, + { + "epoch": 0.31893004115226337, + "grad_norm": 12.256226539611816, + "learning_rate": 1.1157840083073727e-05, + "loss": 1.1986, + "step": 310 + }, + { + "epoch": 0.31995884773662553, + "grad_norm": 16.054819107055664, + "learning_rate": 1.1194184839044652e-05, + "loss": 2.2651, + "step": 311 + }, + { + "epoch": 0.32098765432098764, + "grad_norm": 16.100807189941406, + "learning_rate": 1.1230529595015575e-05, + "loss": 2.5339, + "step": 312 + }, + { + "epoch": 0.3220164609053498, + "grad_norm": 11.739322662353516, + "learning_rate": 1.12668743509865e-05, + "loss": 1.3687, + "step": 313 + }, + { + "epoch": 0.3230452674897119, + "grad_norm": 13.98962116241455, + "learning_rate": 1.1303219106957424e-05, + "loss": 2.1529, + "step": 314 + }, + { + "epoch": 0.32407407407407407, + "grad_norm": 16.564695358276367, + "learning_rate": 1.1339563862928347e-05, + "loss": 2.3677, + "step": 315 + }, + { + "epoch": 0.32510288065843623, + "grad_norm": 18.29457664489746, + "learning_rate": 1.1375908618899272e-05, + "loss": 2.6956, + "step": 316 + }, + { + "epoch": 0.32613168724279834, + "grad_norm": 17.147747039794922, + "learning_rate": 1.1412253374870196e-05, + "loss": 2.4821, + "step": 317 + }, + { + "epoch": 0.3271604938271605, + "grad_norm": 11.078995704650879, + "learning_rate": 1.144859813084112e-05, + "loss": 1.2667, + "step": 318 + }, + { + "epoch": 0.3281893004115226, + "grad_norm": 16.17939567565918, + "learning_rate": 1.1484942886812044e-05, + "loss": 2.4697, + "step": 319 + }, + { + "epoch": 0.3292181069958848, + "grad_norm": 17.299062728881836, + "learning_rate": 1.152128764278297e-05, + "loss": 2.488, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_Qnli-dev_cosine_accuracy": 0.671875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8567001223564148, + "eval_Qnli-dev_cosine_ap": 0.7064956495603609, + "eval_Qnli-dev_cosine_f1": 0.6941580756013745, + "eval_Qnli-dev_cosine_f1_threshold": 0.8039928674697876, + "eval_Qnli-dev_cosine_precision": 0.5838150289017341, + "eval_Qnli-dev_cosine_recall": 0.8559322033898306, + "eval_Qnli-dev_dot_accuracy": 0.638671875, + "eval_Qnli-dev_dot_accuracy_threshold": 439.119384765625, + "eval_Qnli-dev_dot_ap": 0.5828187078602152, + "eval_Qnli-dev_dot_f1": 0.6656534954407294, + "eval_Qnli-dev_dot_f1_threshold": 394.38739013671875, + "eval_Qnli-dev_dot_precision": 0.518957345971564, + "eval_Qnli-dev_dot_recall": 0.9279661016949152, + "eval_Qnli-dev_euclidean_accuracy": 0.689453125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.089064598083496, + "eval_Qnli-dev_euclidean_ap": 0.7175927858077272, + "eval_Qnli-dev_euclidean_f1": 0.6872964169381107, + "eval_Qnli-dev_euclidean_f1_threshold": 15.139961242675781, + "eval_Qnli-dev_euclidean_precision": 0.5582010582010583, + "eval_Qnli-dev_euclidean_recall": 0.8940677966101694, + "eval_Qnli-dev_manhattan_accuracy": 0.685546875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 247.93148803710938, + "eval_Qnli-dev_manhattan_ap": 0.7165821170066472, + "eval_Qnli-dev_manhattan_f1": 0.6962457337883959, + "eval_Qnli-dev_manhattan_f1_threshold": 293.9665222167969, + "eval_Qnli-dev_manhattan_precision": 0.5828571428571429, + "eval_Qnli-dev_manhattan_recall": 0.864406779661017, + "eval_Qnli-dev_max_accuracy": 0.689453125, + "eval_Qnli-dev_max_accuracy_threshold": 439.119384765625, + "eval_Qnli-dev_max_ap": 0.7175927858077272, + "eval_Qnli-dev_max_f1": 0.6962457337883959, + "eval_Qnli-dev_max_f1_threshold": 394.38739013671875, + "eval_Qnli-dev_max_precision": 0.5838150289017341, + "eval_Qnli-dev_max_recall": 0.9279661016949152, + "eval_allNLI-dev_cosine_accuracy": 0.705078125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9175689816474915, + "eval_allNLI-dev_cosine_ap": 0.5186646269211405, + "eval_allNLI-dev_cosine_f1": 0.5641025641025641, + "eval_allNLI-dev_cosine_f1_threshold": 0.8398832082748413, + "eval_allNLI-dev_cosine_precision": 0.4281437125748503, + "eval_allNLI-dev_cosine_recall": 0.8265895953757225, + "eval_allNLI-dev_dot_accuracy": 0.662109375, + "eval_allNLI-dev_dot_accuracy_threshold": 579.238037109375, + "eval_allNLI-dev_dot_ap": 0.4102009424801914, + "eval_allNLI-dev_dot_f1": 0.5353159851301116, + "eval_allNLI-dev_dot_f1_threshold": 434.8841552734375, + "eval_allNLI-dev_dot_precision": 0.39452054794520547, + "eval_allNLI-dev_dot_recall": 0.8323699421965318, + "eval_allNLI-dev_euclidean_accuracy": 0.705078125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 8.86550521850586, + "eval_allNLI-dev_euclidean_ap": 0.521605930352391, + "eval_allNLI-dev_euclidean_f1": 0.5725971370143149, + "eval_allNLI-dev_euclidean_f1_threshold": 12.825733184814453, + "eval_allNLI-dev_euclidean_precision": 0.4430379746835443, + "eval_allNLI-dev_euclidean_recall": 0.8092485549132948, + "eval_allNLI-dev_manhattan_accuracy": 0.697265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 182.48983764648438, + "eval_allNLI-dev_manhattan_ap": 0.5205552000244451, + "eval_allNLI-dev_manhattan_f1": 0.5609756097560975, + "eval_allNLI-dev_manhattan_f1_threshold": 258.8172302246094, + "eval_allNLI-dev_manhattan_precision": 0.43260188087774293, + "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, + "eval_allNLI-dev_max_accuracy": 0.705078125, + "eval_allNLI-dev_max_accuracy_threshold": 579.238037109375, + "eval_allNLI-dev_max_ap": 0.521605930352391, + "eval_allNLI-dev_max_f1": 0.5725971370143149, + "eval_allNLI-dev_max_f1_threshold": 434.8841552734375, + "eval_allNLI-dev_max_precision": 0.4430379746835443, + "eval_allNLI-dev_max_recall": 0.8323699421965318, + "eval_sequential_score": 0.7175927858077272, + "eval_sts-test_pearson_cosine": 0.7136972276507711, + "eval_sts-test_pearson_dot": 0.6211895317774989, + "eval_sts-test_pearson_euclidean": 0.7206975023993004, + "eval_sts-test_pearson_manhattan": 0.7173500334495002, + "eval_sts-test_pearson_max": 0.7206975023993004, + "eval_sts-test_spearman_cosine": 0.7248006482549532, + "eval_sts-test_spearman_dot": 0.6004732756237651, + "eval_sts-test_spearman_euclidean": 0.7142591753753792, + "eval_sts-test_spearman_manhattan": 0.709346784661184, + "eval_sts-test_spearman_max": 0.7248006482549532, + "eval_vitaminc-pairs_loss": 2.848787546157837, + "eval_vitaminc-pairs_runtime": 3.1955, + "eval_vitaminc-pairs_samples_per_second": 40.057, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_negation-triplets_loss": 1.9014121294021606, + "eval_negation-triplets_runtime": 0.7402, + "eval_negation-triplets_samples_per_second": 172.919, + "eval_negation-triplets_steps_per_second": 1.351, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_scitail-pairs-pos_loss": 0.2929946184158325, + "eval_scitail-pairs-pos_runtime": 0.8038, + "eval_scitail-pairs-pos_samples_per_second": 159.243, + "eval_scitail-pairs-pos_steps_per_second": 1.244, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_scitail-pairs-qa_loss": 0.05852370336651802, + "eval_scitail-pairs-qa_runtime": 0.5794, + "eval_scitail-pairs-qa_samples_per_second": 220.912, + "eval_scitail-pairs-qa_steps_per_second": 1.726, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_xsum-pairs_loss": 1.6594665050506592, + "eval_xsum-pairs_runtime": 3.0252, + "eval_xsum-pairs_samples_per_second": 42.312, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_sciq_pairs_loss": 0.18286681175231934, + "eval_sciq_pairs_runtime": 3.4208, + "eval_sciq_pairs_samples_per_second": 37.419, + "eval_sciq_pairs_steps_per_second": 0.292, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_qasc_pairs_loss": 1.2028858661651611, + "eval_qasc_pairs_runtime": 0.5972, + "eval_qasc_pairs_samples_per_second": 214.327, + "eval_qasc_pairs_steps_per_second": 1.674, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_openbookqa_pairs_loss": 1.7855560779571533, + "eval_openbookqa_pairs_runtime": 0.584, + "eval_openbookqa_pairs_samples_per_second": 219.164, + "eval_openbookqa_pairs_steps_per_second": 1.712, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_msmarco_pairs_loss": 2.167186975479126, + "eval_msmarco_pairs_runtime": 1.5188, + "eval_msmarco_pairs_samples_per_second": 84.278, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_nq_pairs_loss": 2.60081148147583, + "eval_nq_pairs_runtime": 2.8945, + "eval_nq_pairs_samples_per_second": 44.222, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_trivia_pairs_loss": 2.2225193977355957, + "eval_trivia_pairs_runtime": 3.4352, + "eval_trivia_pairs_samples_per_second": 37.262, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_gooaq_pairs_loss": 1.6083563566207886, + "eval_gooaq_pairs_runtime": 0.9378, + "eval_gooaq_pairs_samples_per_second": 136.489, + "eval_gooaq_pairs_steps_per_second": 1.066, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_paws-pos_loss": 0.06881627440452576, + "eval_paws-pos_runtime": 0.6797, + "eval_paws-pos_samples_per_second": 188.32, + "eval_paws-pos_steps_per_second": 1.471, + "step": 320 + }, + { + "epoch": 0.3292181069958848, + "eval_global_dataset_loss": 1.0821770429611206, + "eval_global_dataset_runtime": 13.336, + "eval_global_dataset_samples_per_second": 31.194, + "eval_global_dataset_steps_per_second": 0.3, + "step": 320 + }, + { + "epoch": 0.33024691358024694, + "grad_norm": 11.460700035095215, + "learning_rate": 1.1557632398753892e-05, + "loss": 1.1381, + "step": 321 + }, + { + "epoch": 0.33127572016460904, + "grad_norm": 17.357484817504883, + "learning_rate": 1.1593977154724816e-05, + "loss": 1.74, + "step": 322 + }, + { + "epoch": 0.3323045267489712, + "grad_norm": 11.96126937866211, + "learning_rate": 1.1630321910695742e-05, + "loss": 1.2228, + "step": 323 + }, + { + "epoch": 0.3333333333333333, + "grad_norm": 11.488797187805176, + "learning_rate": 1.1666666666666665e-05, + "loss": 1.1824, + "step": 324 + }, + { + "epoch": 0.3343621399176955, + "grad_norm": 9.767298698425293, + "learning_rate": 1.170301142263759e-05, + "loss": 0.9803, + "step": 325 + }, + { + "epoch": 0.33539094650205764, + "grad_norm": 3.6967291831970215, + "learning_rate": 1.1739356178608514e-05, + "loss": 0.2092, + "step": 326 + }, + { + "epoch": 0.33641975308641975, + "grad_norm": 15.058168411254883, + "learning_rate": 1.1775700934579438e-05, + "loss": 1.9639, + "step": 327 + }, + { + "epoch": 0.3374485596707819, + "grad_norm": 13.334756851196289, + "learning_rate": 1.1812045690550362e-05, + "loss": 2.6183, + "step": 328 + }, + { + "epoch": 0.338477366255144, + "grad_norm": 11.534461975097656, + "learning_rate": 1.1848390446521288e-05, + "loss": 1.2629, + "step": 329 + }, + { + "epoch": 0.3395061728395062, + "grad_norm": 17.404090881347656, + "learning_rate": 1.188473520249221e-05, + "loss": 2.3154, + "step": 330 + }, + { + "epoch": 0.3405349794238683, + "grad_norm": 21.002731323242188, + "learning_rate": 1.1921079958463134e-05, + "loss": 2.9437, + "step": 331 + }, + { + "epoch": 0.34156378600823045, + "grad_norm": 3.4560208320617676, + "learning_rate": 1.195742471443406e-05, + "loss": 0.1481, + "step": 332 + }, + { + "epoch": 0.3425925925925926, + "grad_norm": 16.013757705688477, + "learning_rate": 1.1993769470404982e-05, + "loss": 2.0438, + "step": 333 + }, + { + "epoch": 0.3436213991769547, + "grad_norm": 12.2532377243042, + "learning_rate": 1.2030114226375908e-05, + "loss": 1.1147, + "step": 334 + }, + { + "epoch": 0.3446502057613169, + "grad_norm": 18.853534698486328, + "learning_rate": 1.2066458982346832e-05, + "loss": 2.4102, + "step": 335 + }, + { + "epoch": 0.345679012345679, + "grad_norm": 22.490856170654297, + "learning_rate": 1.2102803738317756e-05, + "loss": 3.1869, + "step": 336 + }, + { + "epoch": 0.34670781893004116, + "grad_norm": 8.61929702758789, + "learning_rate": 1.213914849428868e-05, + "loss": 0.464, + "step": 337 + }, + { + "epoch": 0.3477366255144033, + "grad_norm": 18.195655822753906, + "learning_rate": 1.2175493250259606e-05, + "loss": 2.3863, + "step": 338 + }, + { + "epoch": 0.3487654320987654, + "grad_norm": 17.072919845581055, + "learning_rate": 1.2211838006230528e-05, + "loss": 2.1807, + "step": 339 + }, + { + "epoch": 0.3497942386831276, + "grad_norm": 24.929763793945312, + "learning_rate": 1.2248182762201452e-05, + "loss": 2.6664, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_Qnli-dev_cosine_accuracy": 0.677734375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8400751352310181, + "eval_Qnli-dev_cosine_ap": 0.7066658175282814, + "eval_Qnli-dev_cosine_f1": 0.6932409012131717, + "eval_Qnli-dev_cosine_f1_threshold": 0.7699183225631714, + "eval_Qnli-dev_cosine_precision": 0.5865102639296188, + "eval_Qnli-dev_cosine_recall": 0.847457627118644, + "eval_Qnli-dev_dot_accuracy": 0.650390625, + "eval_Qnli-dev_dot_accuracy_threshold": 403.3339538574219, + "eval_Qnli-dev_dot_ap": 0.6108280098987123, + "eval_Qnli-dev_dot_f1": 0.6724738675958188, + "eval_Qnli-dev_dot_f1_threshold": 391.1240234375, + "eval_Qnli-dev_dot_precision": 0.5710059171597633, + "eval_Qnli-dev_dot_recall": 0.8177966101694916, + "eval_Qnli-dev_euclidean_accuracy": 0.69140625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.067573547363281, + "eval_Qnli-dev_euclidean_ap": 0.7152312759485101, + "eval_Qnli-dev_euclidean_f1": 0.6883561643835616, + "eval_Qnli-dev_euclidean_f1_threshold": 15.466879844665527, + "eval_Qnli-dev_euclidean_precision": 0.5775862068965517, + "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, + "eval_Qnli-dev_manhattan_accuracy": 0.689453125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 263.4106140136719, + "eval_Qnli-dev_manhattan_ap": 0.7125157018628507, + "eval_Qnli-dev_manhattan_f1": 0.6950596252129473, + "eval_Qnli-dev_manhattan_f1_threshold": 310.44476318359375, + "eval_Qnli-dev_manhattan_precision": 0.5811965811965812, + "eval_Qnli-dev_manhattan_recall": 0.864406779661017, + "eval_Qnli-dev_max_accuracy": 0.69140625, + "eval_Qnli-dev_max_accuracy_threshold": 403.3339538574219, + "eval_Qnli-dev_max_ap": 0.7152312759485101, + "eval_Qnli-dev_max_f1": 0.6950596252129473, + "eval_Qnli-dev_max_f1_threshold": 391.1240234375, + "eval_Qnli-dev_max_precision": 0.5865102639296188, + "eval_Qnli-dev_max_recall": 0.864406779661017, + "eval_allNLI-dev_cosine_accuracy": 0.697265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9136956930160522, + "eval_allNLI-dev_cosine_ap": 0.5195967953345066, + "eval_allNLI-dev_cosine_f1": 0.5657370517928287, + "eval_allNLI-dev_cosine_f1_threshold": 0.8129154443740845, + "eval_allNLI-dev_cosine_precision": 0.4316109422492401, + "eval_allNLI-dev_cosine_recall": 0.8208092485549133, + "eval_allNLI-dev_dot_accuracy": 0.673828125, + "eval_allNLI-dev_dot_accuracy_threshold": 493.11651611328125, + "eval_allNLI-dev_dot_ap": 0.43280102596891956, + "eval_allNLI-dev_dot_f1": 0.5358361774744028, + "eval_allNLI-dev_dot_f1_threshold": 382.6061096191406, + "eval_allNLI-dev_dot_precision": 0.3801452784503632, + "eval_allNLI-dev_dot_recall": 0.9075144508670521, + "eval_allNLI-dev_euclidean_accuracy": 0.701171875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.347522735595703, + "eval_allNLI-dev_euclidean_ap": 0.5242211625716244, + "eval_allNLI-dev_euclidean_f1": 0.5731958762886599, + "eval_allNLI-dev_euclidean_f1_threshold": 13.492986679077148, + "eval_allNLI-dev_euclidean_precision": 0.44551282051282054, + "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, + "eval_allNLI-dev_manhattan_accuracy": 0.6953125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 197.63035583496094, + "eval_allNLI-dev_manhattan_ap": 0.5226190870456224, + "eval_allNLI-dev_manhattan_f1": 0.570281124497992, + "eval_allNLI-dev_manhattan_f1_threshold": 274.56903076171875, + "eval_allNLI-dev_manhattan_precision": 0.4369230769230769, + "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, + "eval_allNLI-dev_max_accuracy": 0.701171875, + "eval_allNLI-dev_max_accuracy_threshold": 493.11651611328125, + "eval_allNLI-dev_max_ap": 0.5242211625716244, + "eval_allNLI-dev_max_f1": 0.5731958762886599, + "eval_allNLI-dev_max_f1_threshold": 382.6061096191406, + "eval_allNLI-dev_max_precision": 0.44551282051282054, + "eval_allNLI-dev_max_recall": 0.9075144508670521, + "eval_sequential_score": 0.7152312759485101, + "eval_sts-test_pearson_cosine": 0.7271689674461207, + "eval_sts-test_pearson_dot": 0.6547412928047769, + "eval_sts-test_pearson_euclidean": 0.7347019450426402, + "eval_sts-test_pearson_manhattan": 0.7265236751697546, + "eval_sts-test_pearson_max": 0.7347019450426402, + "eval_sts-test_spearman_cosine": 0.7360126740352158, + "eval_sts-test_spearman_dot": 0.6379495482070717, + "eval_sts-test_spearman_euclidean": 0.7249498937116934, + "eval_sts-test_spearman_manhattan": 0.7182097719797506, + "eval_sts-test_spearman_max": 0.7360126740352158, + "eval_vitaminc-pairs_loss": 2.921300172805786, + "eval_vitaminc-pairs_runtime": 3.1577, + "eval_vitaminc-pairs_samples_per_second": 40.536, + "eval_vitaminc-pairs_steps_per_second": 0.317, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_negation-triplets_loss": 1.800155758857727, + "eval_negation-triplets_runtime": 0.73, + "eval_negation-triplets_samples_per_second": 175.338, + "eval_negation-triplets_steps_per_second": 1.37, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_scitail-pairs-pos_loss": 0.24910371005535126, + "eval_scitail-pairs-pos_runtime": 0.7849, + "eval_scitail-pairs-pos_samples_per_second": 163.07, + "eval_scitail-pairs-pos_steps_per_second": 1.274, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_scitail-pairs-qa_loss": 0.03286855295300484, + "eval_scitail-pairs-qa_runtime": 0.5749, + "eval_scitail-pairs-qa_samples_per_second": 222.634, + "eval_scitail-pairs-qa_steps_per_second": 1.739, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_xsum-pairs_loss": 1.4957503080368042, + "eval_xsum-pairs_runtime": 3.0086, + "eval_xsum-pairs_samples_per_second": 42.545, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_sciq_pairs_loss": 0.17354349792003632, + "eval_sciq_pairs_runtime": 3.4138, + "eval_sciq_pairs_samples_per_second": 37.494, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_qasc_pairs_loss": 1.0957040786743164, + "eval_qasc_pairs_runtime": 0.5968, + "eval_qasc_pairs_samples_per_second": 214.493, + "eval_qasc_pairs_steps_per_second": 1.676, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_openbookqa_pairs_loss": 1.6939177513122559, + "eval_openbookqa_pairs_runtime": 0.5747, + "eval_openbookqa_pairs_samples_per_second": 222.741, + "eval_openbookqa_pairs_steps_per_second": 1.74, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_msmarco_pairs_loss": 2.079050302505493, + "eval_msmarco_pairs_runtime": 1.5095, + "eval_msmarco_pairs_samples_per_second": 84.798, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_nq_pairs_loss": 2.6143176555633545, + "eval_nq_pairs_runtime": 2.8884, + "eval_nq_pairs_samples_per_second": 44.314, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_trivia_pairs_loss": 2.1714344024658203, + "eval_trivia_pairs_runtime": 3.4339, + "eval_trivia_pairs_samples_per_second": 37.275, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_gooaq_pairs_loss": 1.5302671194076538, + "eval_gooaq_pairs_runtime": 0.9419, + "eval_gooaq_pairs_samples_per_second": 135.901, + "eval_gooaq_pairs_steps_per_second": 1.062, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_paws-pos_loss": 0.05603673681616783, + "eval_paws-pos_runtime": 0.6803, + "eval_paws-pos_samples_per_second": 188.158, + "eval_paws-pos_steps_per_second": 1.47, + "step": 340 + }, + { + "epoch": 0.3497942386831276, + "eval_global_dataset_loss": 1.029583215713501, + "eval_global_dataset_runtime": 13.3497, + "eval_global_dataset_samples_per_second": 31.162, + "eval_global_dataset_steps_per_second": 0.3, + "step": 340 + }, + { + "epoch": 0.3508230452674897, + "grad_norm": 13.51473331451416, + "learning_rate": 1.2284527518172378e-05, + "loss": 2.0315, + "step": 341 + }, + { + "epoch": 0.35185185185185186, + "grad_norm": 13.707486152648926, + "learning_rate": 1.23208722741433e-05, + "loss": 2.025, + "step": 342 + }, + { + "epoch": 0.35288065843621397, + "grad_norm": 14.628217697143555, + "learning_rate": 1.2357217030114226e-05, + "loss": 2.7561, + "step": 343 + }, + { + "epoch": 0.35390946502057613, + "grad_norm": 16.766042709350586, + "learning_rate": 1.239356178608515e-05, + "loss": 2.2758, + "step": 344 + }, + { + "epoch": 0.3549382716049383, + "grad_norm": 12.628337860107422, + "learning_rate": 1.2429906542056072e-05, + "loss": 1.2086, + "step": 345 + }, + { + "epoch": 0.3559670781893004, + "grad_norm": 12.519665718078613, + "learning_rate": 1.2466251298026998e-05, + "loss": 1.1789, + "step": 346 + }, + { + "epoch": 0.35699588477366256, + "grad_norm": 15.279417037963867, + "learning_rate": 1.2502596053997922e-05, + "loss": 2.0743, + "step": 347 + }, + { + "epoch": 0.35802469135802467, + "grad_norm": 2.5973308086395264, + "learning_rate": 1.2538940809968846e-05, + "loss": 0.1388, + "step": 348 + }, + { + "epoch": 0.35905349794238683, + "grad_norm": 11.533571243286133, + "learning_rate": 1.257528556593977e-05, + "loss": 1.007, + "step": 349 + }, + { + "epoch": 0.360082304526749, + "grad_norm": 12.749200820922852, + "learning_rate": 1.2611630321910696e-05, + "loss": 1.0298, + "step": 350 + }, + { + "epoch": 0.3611111111111111, + "grad_norm": 19.385435104370117, + "learning_rate": 1.2647975077881618e-05, + "loss": 2.3117, + "step": 351 + }, + { + "epoch": 0.36213991769547327, + "grad_norm": 13.895768165588379, + "learning_rate": 1.2684319833852544e-05, + "loss": 1.1729, + "step": 352 + }, + { + "epoch": 0.3631687242798354, + "grad_norm": 14.044111251831055, + "learning_rate": 1.2720664589823468e-05, + "loss": 1.8233, + "step": 353 + }, + { + "epoch": 0.36419753086419754, + "grad_norm": 13.1631441116333, + "learning_rate": 1.275700934579439e-05, + "loss": 1.2639, + "step": 354 + }, + { + "epoch": 0.36522633744855965, + "grad_norm": 12.106592178344727, + "learning_rate": 1.2793354101765316e-05, + "loss": 1.1652, + "step": 355 + }, + { + "epoch": 0.3662551440329218, + "grad_norm": 12.02451229095459, + "learning_rate": 1.282969885773624e-05, + "loss": 1.1368, + "step": 356 + }, + { + "epoch": 0.36728395061728397, + "grad_norm": 11.719282150268555, + "learning_rate": 1.2866043613707164e-05, + "loss": 1.122, + "step": 357 + }, + { + "epoch": 0.3683127572016461, + "grad_norm": 12.295735359191895, + "learning_rate": 1.2902388369678088e-05, + "loss": 1.2503, + "step": 358 + }, + { + "epoch": 0.36934156378600824, + "grad_norm": 12.906529426574707, + "learning_rate": 1.2938733125649014e-05, + "loss": 2.2025, + "step": 359 + }, + { + "epoch": 0.37037037037037035, + "grad_norm": 11.613821983337402, + "learning_rate": 1.2975077881619936e-05, + "loss": 0.9197, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_Qnli-dev_cosine_accuracy": 0.67578125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8274970650672913, + "eval_Qnli-dev_cosine_ap": 0.7107856399181414, + "eval_Qnli-dev_cosine_f1": 0.6886446886446885, + "eval_Qnli-dev_cosine_f1_threshold": 0.7976377010345459, + "eval_Qnli-dev_cosine_precision": 0.6064516129032258, + "eval_Qnli-dev_cosine_recall": 0.7966101694915254, + "eval_Qnli-dev_dot_accuracy": 0.638671875, + "eval_Qnli-dev_dot_accuracy_threshold": 421.1475830078125, + "eval_Qnli-dev_dot_ap": 0.6026157605644281, + "eval_Qnli-dev_dot_f1": 0.6688, + "eval_Qnli-dev_dot_f1_threshold": 383.72686767578125, + "eval_Qnli-dev_dot_precision": 0.5372750642673522, + "eval_Qnli-dev_dot_recall": 0.885593220338983, + "eval_Qnli-dev_euclidean_accuracy": 0.69140625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.509709358215332, + "eval_Qnli-dev_euclidean_ap": 0.7205377940978139, + "eval_Qnli-dev_euclidean_f1": 0.6884681583476765, + "eval_Qnli-dev_euclidean_f1_threshold": 15.236268997192383, + "eval_Qnli-dev_euclidean_precision": 0.5797101449275363, + "eval_Qnli-dev_euclidean_recall": 0.847457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.6875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 255.33294677734375, + "eval_Qnli-dev_manhattan_ap": 0.7196447998198872, + "eval_Qnli-dev_manhattan_f1": 0.6936026936026936, + "eval_Qnli-dev_manhattan_f1_threshold": 311.228271484375, + "eval_Qnli-dev_manhattan_precision": 0.5754189944134078, + "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, + "eval_Qnli-dev_max_accuracy": 0.69140625, + "eval_Qnli-dev_max_accuracy_threshold": 421.1475830078125, + "eval_Qnli-dev_max_ap": 0.7205377940978139, + "eval_Qnli-dev_max_f1": 0.6936026936026936, + "eval_Qnli-dev_max_f1_threshold": 383.72686767578125, + "eval_Qnli-dev_max_precision": 0.6064516129032258, + "eval_Qnli-dev_max_recall": 0.885593220338983, + "eval_allNLI-dev_cosine_accuracy": 0.71484375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9123337864875793, + "eval_allNLI-dev_cosine_ap": 0.5454586491658389, + "eval_allNLI-dev_cosine_f1": 0.5625, + "eval_allNLI-dev_cosine_f1_threshold": 0.8062101006507874, + "eval_allNLI-dev_cosine_precision": 0.4247787610619469, + "eval_allNLI-dev_cosine_recall": 0.8323699421965318, + "eval_allNLI-dev_dot_accuracy": 0.673828125, + "eval_allNLI-dev_dot_accuracy_threshold": 486.6114807128906, + "eval_allNLI-dev_dot_ap": 0.45389873900128597, + "eval_allNLI-dev_dot_f1": 0.5345132743362832, + "eval_allNLI-dev_dot_f1_threshold": 395.1532897949219, + "eval_allNLI-dev_dot_precision": 0.3852040816326531, + "eval_allNLI-dev_dot_recall": 0.8728323699421965, + "eval_allNLI-dev_euclidean_accuracy": 0.71484375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.256704330444336, + "eval_allNLI-dev_euclidean_ap": 0.5468473795344413, + "eval_allNLI-dev_euclidean_f1": 0.5690721649484537, + "eval_allNLI-dev_euclidean_f1_threshold": 13.748187065124512, + "eval_allNLI-dev_euclidean_precision": 0.4423076923076923, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.7109375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 186.86061096191406, + "eval_allNLI-dev_manhattan_ap": 0.5447298734564391, + "eval_allNLI-dev_manhattan_f1": 0.5643153526970954, + "eval_allNLI-dev_manhattan_f1_threshold": 275.2322082519531, + "eval_allNLI-dev_manhattan_precision": 0.4401294498381877, + "eval_allNLI-dev_manhattan_recall": 0.7861271676300579, + "eval_allNLI-dev_max_accuracy": 0.71484375, + "eval_allNLI-dev_max_accuracy_threshold": 486.6114807128906, + "eval_allNLI-dev_max_ap": 0.5468473795344413, + "eval_allNLI-dev_max_f1": 0.5690721649484537, + "eval_allNLI-dev_max_f1_threshold": 395.1532897949219, + "eval_allNLI-dev_max_precision": 0.4423076923076923, + "eval_allNLI-dev_max_recall": 0.8728323699421965, + "eval_sequential_score": 0.7205377940978139, + "eval_sts-test_pearson_cosine": 0.7499734901337567, + "eval_sts-test_pearson_dot": 0.6944181989389289, + "eval_sts-test_pearson_euclidean": 0.7581460883701887, + "eval_sts-test_pearson_manhattan": 0.7494485428076179, + "eval_sts-test_pearson_max": 0.7581460883701887, + "eval_sts-test_spearman_cosine": 0.7613686939883212, + "eval_sts-test_spearman_dot": 0.6777020937010519, + "eval_sts-test_spearman_euclidean": 0.748221668416756, + "eval_sts-test_spearman_manhattan": 0.7396471477291182, + "eval_sts-test_spearman_max": 0.7613686939883212, + "eval_vitaminc-pairs_loss": 2.936203718185425, + "eval_vitaminc-pairs_runtime": 3.1741, + "eval_vitaminc-pairs_samples_per_second": 40.326, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_negation-triplets_loss": 1.7166328430175781, + "eval_negation-triplets_runtime": 0.7398, + "eval_negation-triplets_samples_per_second": 173.027, + "eval_negation-triplets_steps_per_second": 1.352, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_scitail-pairs-pos_loss": 0.24884574115276337, + "eval_scitail-pairs-pos_runtime": 0.7997, + "eval_scitail-pairs-pos_samples_per_second": 160.064, + "eval_scitail-pairs-pos_steps_per_second": 1.25, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_scitail-pairs-qa_loss": 0.03187813237309456, + "eval_scitail-pairs-qa_runtime": 0.5642, + "eval_scitail-pairs-qa_samples_per_second": 226.873, + "eval_scitail-pairs-qa_steps_per_second": 1.772, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_xsum-pairs_loss": 1.3931405544281006, + "eval_xsum-pairs_runtime": 3.0204, + "eval_xsum-pairs_samples_per_second": 42.378, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_sciq_pairs_loss": 0.16648775339126587, + "eval_sciq_pairs_runtime": 3.3956, + "eval_sciq_pairs_samples_per_second": 37.695, + "eval_sciq_pairs_steps_per_second": 0.294, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_qasc_pairs_loss": 0.9755259156227112, + "eval_qasc_pairs_runtime": 0.5919, + "eval_qasc_pairs_samples_per_second": 216.262, + "eval_qasc_pairs_steps_per_second": 1.69, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_openbookqa_pairs_loss": 1.6076797246932983, + "eval_openbookqa_pairs_runtime": 0.5876, + "eval_openbookqa_pairs_samples_per_second": 217.828, + "eval_openbookqa_pairs_steps_per_second": 1.702, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_msmarco_pairs_loss": 2.0155866146087646, + "eval_msmarco_pairs_runtime": 1.5287, + "eval_msmarco_pairs_samples_per_second": 83.731, + "eval_msmarco_pairs_steps_per_second": 0.654, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_nq_pairs_loss": 2.5515902042388916, + "eval_nq_pairs_runtime": 2.8881, + "eval_nq_pairs_samples_per_second": 44.319, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_trivia_pairs_loss": 1.8931869268417358, + "eval_trivia_pairs_runtime": 3.4335, + "eval_trivia_pairs_samples_per_second": 37.28, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_gooaq_pairs_loss": 1.4498214721679688, + "eval_gooaq_pairs_runtime": 0.9445, + "eval_gooaq_pairs_samples_per_second": 135.517, + "eval_gooaq_pairs_steps_per_second": 1.059, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_paws-pos_loss": 0.05163538083434105, + "eval_paws-pos_runtime": 0.6805, + "eval_paws-pos_samples_per_second": 188.1, + "eval_paws-pos_steps_per_second": 1.47, + "step": 360 + }, + { + "epoch": 0.37037037037037035, + "eval_global_dataset_loss": 0.9714978933334351, + "eval_global_dataset_runtime": 13.3457, + "eval_global_dataset_samples_per_second": 31.171, + "eval_global_dataset_steps_per_second": 0.3, + "step": 360 + }, + { + "epoch": 0.3713991769547325, + "grad_norm": 8.986026763916016, + "learning_rate": 1.3011422637590862e-05, + "loss": 0.4565, + "step": 361 + }, + { + "epoch": 0.3724279835390947, + "grad_norm": 10.55431079864502, + "learning_rate": 1.3047767393561786e-05, + "loss": 0.9519, + "step": 362 + }, + { + "epoch": 0.3734567901234568, + "grad_norm": 22.92361831665039, + "learning_rate": 1.3084112149532708e-05, + "loss": 2.876, + "step": 363 + }, + { + "epoch": 0.37448559670781895, + "grad_norm": 13.740486145019531, + "learning_rate": 1.3120456905503634e-05, + "loss": 1.5415, + "step": 364 + }, + { + "epoch": 0.37551440329218105, + "grad_norm": 12.367791175842285, + "learning_rate": 1.3156801661474558e-05, + "loss": 1.017, + "step": 365 + }, + { + "epoch": 0.3765432098765432, + "grad_norm": 13.127511978149414, + "learning_rate": 1.3193146417445482e-05, + "loss": 1.4156, + "step": 366 + }, + { + "epoch": 0.3775720164609053, + "grad_norm": 24.860748291015625, + "learning_rate": 1.3229491173416406e-05, + "loss": 3.272, + "step": 367 + }, + { + "epoch": 0.3786008230452675, + "grad_norm": 10.243440628051758, + "learning_rate": 1.3265835929387332e-05, + "loss": 0.8205, + "step": 368 + }, + { + "epoch": 0.37962962962962965, + "grad_norm": 19.33579444885254, + "learning_rate": 1.3302180685358254e-05, + "loss": 2.7472, + "step": 369 + }, + { + "epoch": 0.38065843621399176, + "grad_norm": 2.6139795780181885, + "learning_rate": 1.3338525441329178e-05, + "loss": 0.1251, + "step": 370 + }, + { + "epoch": 0.3816872427983539, + "grad_norm": 13.30015754699707, + "learning_rate": 1.3374870197300104e-05, + "loss": 1.7165, + "step": 371 + }, + { + "epoch": 0.38271604938271603, + "grad_norm": 15.629612922668457, + "learning_rate": 1.3411214953271026e-05, + "loss": 2.0682, + "step": 372 + }, + { + "epoch": 0.3837448559670782, + "grad_norm": 2.9140241146087646, + "learning_rate": 1.3447559709241952e-05, + "loss": 0.1312, + "step": 373 + }, + { + "epoch": 0.38477366255144035, + "grad_norm": 14.68766975402832, + "learning_rate": 1.3483904465212876e-05, + "loss": 1.473, + "step": 374 + }, + { + "epoch": 0.38580246913580246, + "grad_norm": 11.001675605773926, + "learning_rate": 1.35202492211838e-05, + "loss": 0.9278, + "step": 375 + }, + { + "epoch": 0.3868312757201646, + "grad_norm": 18.111684799194336, + "learning_rate": 1.3556593977154724e-05, + "loss": 1.9585, + "step": 376 + }, + { + "epoch": 0.38786008230452673, + "grad_norm": 19.245637893676758, + "learning_rate": 1.359293873312565e-05, + "loss": 2.3601, + "step": 377 + }, + { + "epoch": 0.3888888888888889, + "grad_norm": 11.811524391174316, + "learning_rate": 1.3629283489096572e-05, + "loss": 1.0816, + "step": 378 + }, + { + "epoch": 0.389917695473251, + "grad_norm": 15.023236274719238, + "learning_rate": 1.3665628245067496e-05, + "loss": 2.3182, + "step": 379 + }, + { + "epoch": 0.39094650205761317, + "grad_norm": 16.875747680664062, + "learning_rate": 1.3701973001038421e-05, + "loss": 2.0826, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_Qnli-dev_cosine_accuracy": 0.669921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8552969098091125, + "eval_Qnli-dev_cosine_ap": 0.7113127062240453, + "eval_Qnli-dev_cosine_f1": 0.6916221033868093, + "eval_Qnli-dev_cosine_f1_threshold": 0.781796932220459, + "eval_Qnli-dev_cosine_precision": 0.5969230769230769, + "eval_Qnli-dev_cosine_recall": 0.8220338983050848, + "eval_Qnli-dev_dot_accuracy": 0.638671875, + "eval_Qnli-dev_dot_accuracy_threshold": 427.3189697265625, + "eval_Qnli-dev_dot_ap": 0.5994490036489998, + "eval_Qnli-dev_dot_f1": 0.6635367762128326, + "eval_Qnli-dev_dot_f1_threshold": 378.4907531738281, + "eval_Qnli-dev_dot_precision": 0.5260545905707196, + "eval_Qnli-dev_dot_recall": 0.8983050847457628, + "eval_Qnli-dev_euclidean_accuracy": 0.677734375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.223026275634766, + "eval_Qnli-dev_euclidean_ap": 0.7220014624049731, + "eval_Qnli-dev_euclidean_f1": 0.6837606837606838, + "eval_Qnli-dev_euclidean_f1_threshold": 15.54850959777832, + "eval_Qnli-dev_euclidean_precision": 0.5730659025787965, + "eval_Qnli-dev_euclidean_recall": 0.847457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.685546875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 273.4452209472656, + "eval_Qnli-dev_manhattan_ap": 0.7213585266821472, + "eval_Qnli-dev_manhattan_f1": 0.6920415224913494, + "eval_Qnli-dev_manhattan_f1_threshold": 312.36419677734375, + "eval_Qnli-dev_manhattan_precision": 0.5847953216374269, + "eval_Qnli-dev_manhattan_recall": 0.847457627118644, + "eval_Qnli-dev_max_accuracy": 0.685546875, + "eval_Qnli-dev_max_accuracy_threshold": 427.3189697265625, + "eval_Qnli-dev_max_ap": 0.7220014624049731, + "eval_Qnli-dev_max_f1": 0.6920415224913494, + "eval_Qnli-dev_max_f1_threshold": 378.4907531738281, + "eval_Qnli-dev_max_precision": 0.5969230769230769, + "eval_Qnli-dev_max_recall": 0.8983050847457628, + "eval_allNLI-dev_cosine_accuracy": 0.712890625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9154093861579895, + "eval_allNLI-dev_cosine_ap": 0.53447469371308, + "eval_allNLI-dev_cosine_f1": 0.5582329317269077, + "eval_allNLI-dev_cosine_f1_threshold": 0.8176555633544922, + "eval_allNLI-dev_cosine_precision": 0.4276923076923077, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.6640625, + "eval_allNLI-dev_dot_accuracy_threshold": 565.86181640625, + "eval_allNLI-dev_dot_ap": 0.4315444199034263, + "eval_allNLI-dev_dot_f1": 0.5361552028218695, + "eval_allNLI-dev_dot_f1_threshold": 401.46759033203125, + "eval_allNLI-dev_dot_precision": 0.38578680203045684, + "eval_allNLI-dev_dot_recall": 0.8786127167630058, + "eval_allNLI-dev_euclidean_accuracy": 0.708984375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.178808212280273, + "eval_allNLI-dev_euclidean_ap": 0.5387071754068331, + "eval_allNLI-dev_euclidean_f1": 0.5644171779141105, + "eval_allNLI-dev_euclidean_f1_threshold": 13.749273300170898, + "eval_allNLI-dev_euclidean_precision": 0.43670886075949367, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.708984375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 188.05450439453125, + "eval_allNLI-dev_manhattan_ap": 0.536761796295294, + "eval_allNLI-dev_manhattan_f1": 0.5641025641025641, + "eval_allNLI-dev_manhattan_f1_threshold": 269.14678955078125, + "eval_allNLI-dev_manhattan_precision": 0.44745762711864406, + "eval_allNLI-dev_manhattan_recall": 0.7630057803468208, + "eval_allNLI-dev_max_accuracy": 0.712890625, + "eval_allNLI-dev_max_accuracy_threshold": 565.86181640625, + "eval_allNLI-dev_max_ap": 0.5387071754068331, + "eval_allNLI-dev_max_f1": 0.5644171779141105, + "eval_allNLI-dev_max_f1_threshold": 401.46759033203125, + "eval_allNLI-dev_max_precision": 0.44745762711864406, + "eval_allNLI-dev_max_recall": 0.8786127167630058, + "eval_sequential_score": 0.7220014624049731, + "eval_sts-test_pearson_cosine": 0.751982795791559, + "eval_sts-test_pearson_dot": 0.6746803146097003, + "eval_sts-test_pearson_euclidean": 0.7662220636606787, + "eval_sts-test_pearson_manhattan": 0.7605555708771703, + "eval_sts-test_pearson_max": 0.7662220636606787, + "eval_sts-test_spearman_cosine": 0.7683159693092451, + "eval_sts-test_spearman_dot": 0.6569558116813516, + "eval_sts-test_spearman_euclidean": 0.7569410489375177, + "eval_sts-test_spearman_manhattan": 0.7506770806267793, + "eval_sts-test_spearman_max": 0.7683159693092451, + "eval_vitaminc-pairs_loss": 3.076357841491699, + "eval_vitaminc-pairs_runtime": 3.1717, + "eval_vitaminc-pairs_samples_per_second": 40.357, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_negation-triplets_loss": 1.6979268789291382, + "eval_negation-triplets_runtime": 0.7352, + "eval_negation-triplets_samples_per_second": 174.1, + "eval_negation-triplets_steps_per_second": 1.36, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_scitail-pairs-pos_loss": 0.24082684516906738, + "eval_scitail-pairs-pos_runtime": 0.7875, + "eval_scitail-pairs-pos_samples_per_second": 162.532, + "eval_scitail-pairs-pos_steps_per_second": 1.27, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_scitail-pairs-qa_loss": 0.027928592637181282, + "eval_scitail-pairs-qa_runtime": 0.5645, + "eval_scitail-pairs-qa_samples_per_second": 226.73, + "eval_scitail-pairs-qa_steps_per_second": 1.771, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_xsum-pairs_loss": 1.2622292041778564, + "eval_xsum-pairs_runtime": 3.0159, + "eval_xsum-pairs_samples_per_second": 42.441, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_sciq_pairs_loss": 0.15188440680503845, + "eval_sciq_pairs_runtime": 3.4598, + "eval_sciq_pairs_samples_per_second": 36.996, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_qasc_pairs_loss": 1.0029363632202148, + "eval_qasc_pairs_runtime": 0.6147, + "eval_qasc_pairs_samples_per_second": 208.233, + "eval_qasc_pairs_steps_per_second": 1.627, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_openbookqa_pairs_loss": 1.6912751197814941, + "eval_openbookqa_pairs_runtime": 0.574, + "eval_openbookqa_pairs_samples_per_second": 222.989, + "eval_openbookqa_pairs_steps_per_second": 1.742, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_msmarco_pairs_loss": 1.9394227266311646, + "eval_msmarco_pairs_runtime": 1.5149, + "eval_msmarco_pairs_samples_per_second": 84.493, + "eval_msmarco_pairs_steps_per_second": 0.66, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_nq_pairs_loss": 2.323462963104248, + "eval_nq_pairs_runtime": 2.8969, + "eval_nq_pairs_samples_per_second": 44.185, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_trivia_pairs_loss": 1.7298884391784668, + "eval_trivia_pairs_runtime": 3.4558, + "eval_trivia_pairs_samples_per_second": 37.039, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_gooaq_pairs_loss": 1.3238028287887573, + "eval_gooaq_pairs_runtime": 0.9462, + "eval_gooaq_pairs_samples_per_second": 135.282, + "eval_gooaq_pairs_steps_per_second": 1.057, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_paws-pos_loss": 0.04966222867369652, + "eval_paws-pos_runtime": 0.68, + "eval_paws-pos_samples_per_second": 188.239, + "eval_paws-pos_steps_per_second": 1.471, + "step": 380 + }, + { + "epoch": 0.39094650205761317, + "eval_global_dataset_loss": 0.941063642501831, + "eval_global_dataset_runtime": 13.3724, + "eval_global_dataset_samples_per_second": 31.109, + "eval_global_dataset_steps_per_second": 0.299, + "step": 380 + }, + { + "epoch": 0.39197530864197533, + "grad_norm": 7.882116317749023, + "learning_rate": 1.3738317757009344e-05, + "loss": 0.4021, + "step": 381 + }, + { + "epoch": 0.39300411522633744, + "grad_norm": 11.462610244750977, + "learning_rate": 1.377466251298027e-05, + "loss": 1.0139, + "step": 382 + }, + { + "epoch": 0.3940329218106996, + "grad_norm": 14.762428283691406, + "learning_rate": 1.3811007268951194e-05, + "loss": 1.4995, + "step": 383 + }, + { + "epoch": 0.3950617283950617, + "grad_norm": 22.418067932128906, + "learning_rate": 1.3847352024922118e-05, + "loss": 2.4814, + "step": 384 + }, + { + "epoch": 0.39609053497942387, + "grad_norm": 22.518835067749023, + "learning_rate": 1.3883696780893042e-05, + "loss": 2.715, + "step": 385 + }, + { + "epoch": 0.39711934156378603, + "grad_norm": 16.43521499633789, + "learning_rate": 1.3920041536863967e-05, + "loss": 2.1024, + "step": 386 + }, + { + "epoch": 0.39814814814814814, + "grad_norm": 11.84013557434082, + "learning_rate": 1.395638629283489e-05, + "loss": 0.9607, + "step": 387 + }, + { + "epoch": 0.3991769547325103, + "grad_norm": 11.819180488586426, + "learning_rate": 1.3992731048805814e-05, + "loss": 0.9005, + "step": 388 + }, + { + "epoch": 0.4002057613168724, + "grad_norm": 2.0303947925567627, + "learning_rate": 1.402907580477674e-05, + "loss": 0.0972, + "step": 389 + }, + { + "epoch": 0.4012345679012346, + "grad_norm": 14.67570686340332, + "learning_rate": 1.4065420560747662e-05, + "loss": 1.7057, + "step": 390 + }, + { + "epoch": 0.4022633744855967, + "grad_norm": 13.796504974365234, + "learning_rate": 1.4101765316718587e-05, + "loss": 2.1665, + "step": 391 + }, + { + "epoch": 0.40329218106995884, + "grad_norm": 11.881211280822754, + "learning_rate": 1.4138110072689511e-05, + "loss": 1.2397, + "step": 392 + }, + { + "epoch": 0.404320987654321, + "grad_norm": 14.59404182434082, + "learning_rate": 1.4174454828660434e-05, + "loss": 2.2046, + "step": 393 + }, + { + "epoch": 0.4053497942386831, + "grad_norm": 2.643915891647339, + "learning_rate": 1.421079958463136e-05, + "loss": 0.1259, + "step": 394 + }, + { + "epoch": 0.4063786008230453, + "grad_norm": 14.334051132202148, + "learning_rate": 1.4247144340602283e-05, + "loss": 1.5374, + "step": 395 + }, + { + "epoch": 0.4074074074074074, + "grad_norm": 13.22938060760498, + "learning_rate": 1.4283489096573207e-05, + "loss": 1.0678, + "step": 396 + }, + { + "epoch": 0.40843621399176955, + "grad_norm": 14.21168041229248, + "learning_rate": 1.4319833852544132e-05, + "loss": 1.6494, + "step": 397 + }, + { + "epoch": 0.4094650205761317, + "grad_norm": 12.661002159118652, + "learning_rate": 1.4356178608515057e-05, + "loss": 1.8241, + "step": 398 + }, + { + "epoch": 0.4104938271604938, + "grad_norm": 20.38874053955078, + "learning_rate": 1.439252336448598e-05, + "loss": 2.5338, + "step": 399 + }, + { + "epoch": 0.411522633744856, + "grad_norm": 10.472373962402344, + "learning_rate": 1.4428868120456905e-05, + "loss": 0.728, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_Qnli-dev_cosine_accuracy": 0.6640625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8612580299377441, + "eval_Qnli-dev_cosine_ap": 0.7008833240002428, + "eval_Qnli-dev_cosine_f1": 0.690391459074733, + "eval_Qnli-dev_cosine_f1_threshold": 0.7775630950927734, + "eval_Qnli-dev_cosine_precision": 0.5950920245398773, + "eval_Qnli-dev_cosine_recall": 0.8220338983050848, + "eval_Qnli-dev_dot_accuracy": 0.65625, + "eval_Qnli-dev_dot_accuracy_threshold": 427.90985107421875, + "eval_Qnli-dev_dot_ap": 0.5927778156562893, + "eval_Qnli-dev_dot_f1": 0.662379421221865, + "eval_Qnli-dev_dot_f1_threshold": 380.0942077636719, + "eval_Qnli-dev_dot_precision": 0.533678756476684, + "eval_Qnli-dev_dot_recall": 0.8728813559322034, + "eval_Qnli-dev_euclidean_accuracy": 0.673828125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.01819133758545, + "eval_Qnli-dev_euclidean_ap": 0.7113131862333142, + "eval_Qnli-dev_euclidean_f1": 0.6801470588235294, + "eval_Qnli-dev_euclidean_f1_threshold": 14.90008544921875, + "eval_Qnli-dev_euclidean_precision": 0.6006493506493507, + "eval_Qnli-dev_euclidean_recall": 0.7838983050847458, + "eval_Qnli-dev_manhattan_accuracy": 0.673828125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 239.12254333496094, + "eval_Qnli-dev_manhattan_ap": 0.7095040002502047, + "eval_Qnli-dev_manhattan_f1": 0.6803418803418803, + "eval_Qnli-dev_manhattan_f1_threshold": 318.11749267578125, + "eval_Qnli-dev_manhattan_precision": 0.5702005730659025, + "eval_Qnli-dev_manhattan_recall": 0.8432203389830508, + "eval_Qnli-dev_max_accuracy": 0.673828125, + "eval_Qnli-dev_max_accuracy_threshold": 427.90985107421875, + "eval_Qnli-dev_max_ap": 0.7113131862333142, + "eval_Qnli-dev_max_f1": 0.690391459074733, + "eval_Qnli-dev_max_f1_threshold": 380.0942077636719, + "eval_Qnli-dev_max_precision": 0.6006493506493507, + "eval_Qnli-dev_max_recall": 0.8728813559322034, + "eval_allNLI-dev_cosine_accuracy": 0.720703125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9071913957595825, + "eval_allNLI-dev_cosine_ap": 0.5455260839546557, + "eval_allNLI-dev_cosine_f1": 0.5627376425855514, + "eval_allNLI-dev_cosine_f1_threshold": 0.7851958274841309, + "eval_allNLI-dev_cosine_precision": 0.4192634560906516, + "eval_allNLI-dev_cosine_recall": 0.8554913294797688, + "eval_allNLI-dev_dot_accuracy": 0.673828125, + "eval_allNLI-dev_dot_accuracy_threshold": 495.9515380859375, + "eval_allNLI-dev_dot_ap": 0.4440619711184598, + "eval_allNLI-dev_dot_f1": 0.5368248772504091, + "eval_allNLI-dev_dot_f1_threshold": 365.49859619140625, + "eval_allNLI-dev_dot_precision": 0.3744292237442922, + "eval_allNLI-dev_dot_recall": 0.9479768786127167, + "eval_allNLI-dev_euclidean_accuracy": 0.716796875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.831633567810059, + "eval_allNLI-dev_euclidean_ap": 0.5498277088824723, + "eval_allNLI-dev_euclidean_f1": 0.5628997867803838, + "eval_allNLI-dev_euclidean_f1_threshold": 13.911539077758789, + "eval_allNLI-dev_euclidean_precision": 0.44594594594594594, + "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, + "eval_allNLI-dev_manhattan_accuracy": 0.716796875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 193.09060668945312, + "eval_allNLI-dev_manhattan_ap": 0.5499837592661042, + "eval_allNLI-dev_manhattan_f1": 0.5671641791044776, + "eval_allNLI-dev_manhattan_f1_threshold": 302.8914489746094, + "eval_allNLI-dev_manhattan_precision": 0.418732782369146, + "eval_allNLI-dev_manhattan_recall": 0.8786127167630058, + "eval_allNLI-dev_max_accuracy": 0.720703125, + "eval_allNLI-dev_max_accuracy_threshold": 495.9515380859375, + "eval_allNLI-dev_max_ap": 0.5499837592661042, + "eval_allNLI-dev_max_f1": 0.5671641791044776, + "eval_allNLI-dev_max_f1_threshold": 365.49859619140625, + "eval_allNLI-dev_max_precision": 0.44594594594594594, + "eval_allNLI-dev_max_recall": 0.9479768786127167, + "eval_sequential_score": 0.7113131862333142, + "eval_sts-test_pearson_cosine": 0.7693717840654692, + "eval_sts-test_pearson_dot": 0.7198031376100279, + "eval_sts-test_pearson_euclidean": 0.783845420495406, + "eval_sts-test_pearson_manhattan": 0.7789636540660673, + "eval_sts-test_pearson_max": 0.783845420495406, + "eval_sts-test_spearman_cosine": 0.7856816515409163, + "eval_sts-test_spearman_dot": 0.6995584755108273, + "eval_sts-test_spearman_euclidean": 0.7742835947670496, + "eval_sts-test_spearman_manhattan": 0.766819666133362, + "eval_sts-test_spearman_max": 0.7856816515409163, + "eval_vitaminc-pairs_loss": 3.1967031955718994, + "eval_vitaminc-pairs_runtime": 3.1694, + "eval_vitaminc-pairs_samples_per_second": 40.387, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_negation-triplets_loss": 1.6347670555114746, + "eval_negation-triplets_runtime": 0.7366, + "eval_negation-triplets_samples_per_second": 173.76, + "eval_negation-triplets_steps_per_second": 1.358, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_scitail-pairs-pos_loss": 0.2562161684036255, + "eval_scitail-pairs-pos_runtime": 0.7943, + "eval_scitail-pairs-pos_samples_per_second": 161.157, + "eval_scitail-pairs-pos_steps_per_second": 1.259, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_scitail-pairs-qa_loss": 0.026608988642692566, + "eval_scitail-pairs-qa_runtime": 0.5671, + "eval_scitail-pairs-qa_samples_per_second": 225.717, + "eval_scitail-pairs-qa_steps_per_second": 1.763, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_xsum-pairs_loss": 1.1463801860809326, + "eval_xsum-pairs_runtime": 3.0137, + "eval_xsum-pairs_samples_per_second": 42.472, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_sciq_pairs_loss": 0.14410454034805298, + "eval_sciq_pairs_runtime": 3.4298, + "eval_sciq_pairs_samples_per_second": 37.319, + "eval_sciq_pairs_steps_per_second": 0.292, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_qasc_pairs_loss": 0.9308192729949951, + "eval_qasc_pairs_runtime": 0.6033, + "eval_qasc_pairs_samples_per_second": 212.183, + "eval_qasc_pairs_steps_per_second": 1.658, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_openbookqa_pairs_loss": 1.5551338195800781, + "eval_openbookqa_pairs_runtime": 0.5715, + "eval_openbookqa_pairs_samples_per_second": 223.986, + "eval_openbookqa_pairs_steps_per_second": 1.75, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_msmarco_pairs_loss": 1.754774570465088, + "eval_msmarco_pairs_runtime": 1.5121, + "eval_msmarco_pairs_samples_per_second": 84.651, + "eval_msmarco_pairs_steps_per_second": 0.661, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_nq_pairs_loss": 2.163573980331421, + "eval_nq_pairs_runtime": 2.8927, + "eval_nq_pairs_samples_per_second": 44.249, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_trivia_pairs_loss": 1.6104655265808105, + "eval_trivia_pairs_runtime": 3.4302, + "eval_trivia_pairs_samples_per_second": 37.316, + "eval_trivia_pairs_steps_per_second": 0.292, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_gooaq_pairs_loss": 1.2485657930374146, + "eval_gooaq_pairs_runtime": 0.9371, + "eval_gooaq_pairs_samples_per_second": 136.585, + "eval_gooaq_pairs_steps_per_second": 1.067, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_paws-pos_loss": 0.04668813571333885, + "eval_paws-pos_runtime": 0.6799, + "eval_paws-pos_samples_per_second": 188.263, + "eval_paws-pos_steps_per_second": 1.471, + "step": 400 + }, + { + "epoch": 0.411522633744856, + "eval_global_dataset_loss": 0.9041730165481567, + "eval_global_dataset_runtime": 13.3404, + "eval_global_dataset_samples_per_second": 31.183, + "eval_global_dataset_steps_per_second": 0.3, + "step": 400 + }, + { + "epoch": 0.4125514403292181, + "grad_norm": 12.87179183959961, + "learning_rate": 1.446521287642783e-05, + "loss": 1.6954, + "step": 401 + }, + { + "epoch": 0.41358024691358025, + "grad_norm": 11.029813766479492, + "learning_rate": 1.4501557632398752e-05, + "loss": 0.8749, + "step": 402 + }, + { + "epoch": 0.41460905349794236, + "grad_norm": 12.643345832824707, + "learning_rate": 1.4537902388369677e-05, + "loss": 1.6856, + "step": 403 + }, + { + "epoch": 0.4156378600823045, + "grad_norm": 3.3279924392700195, + "learning_rate": 1.4574247144340601e-05, + "loss": 0.1254, + "step": 404 + }, + { + "epoch": 0.4166666666666667, + "grad_norm": 10.091893196105957, + "learning_rate": 1.4610591900311525e-05, + "loss": 0.6986, + "step": 405 + }, + { + "epoch": 0.4176954732510288, + "grad_norm": 18.337682723999023, + "learning_rate": 1.464693665628245e-05, + "loss": 2.179, + "step": 406 + }, + { + "epoch": 0.41872427983539096, + "grad_norm": 19.924663543701172, + "learning_rate": 1.4683281412253373e-05, + "loss": 2.7119, + "step": 407 + }, + { + "epoch": 0.41975308641975306, + "grad_norm": 11.52767562866211, + "learning_rate": 1.4719626168224297e-05, + "loss": 1.0071, + "step": 408 + }, + { + "epoch": 0.4207818930041152, + "grad_norm": 13.5806884765625, + "learning_rate": 1.4755970924195223e-05, + "loss": 1.1775, + "step": 409 + }, + { + "epoch": 0.4218106995884774, + "grad_norm": 12.70963191986084, + "learning_rate": 1.4792315680166145e-05, + "loss": 1.1221, + "step": 410 + }, + { + "epoch": 0.4228395061728395, + "grad_norm": 15.391998291015625, + "learning_rate": 1.482866043613707e-05, + "loss": 2.146, + "step": 411 + }, + { + "epoch": 0.42386831275720166, + "grad_norm": 13.149142265319824, + "learning_rate": 1.4865005192107995e-05, + "loss": 1.1117, + "step": 412 + }, + { + "epoch": 0.42489711934156377, + "grad_norm": 12.455025672912598, + "learning_rate": 1.4901349948078918e-05, + "loss": 1.1356, + "step": 413 + }, + { + "epoch": 0.42592592592592593, + "grad_norm": 11.366116523742676, + "learning_rate": 1.4937694704049843e-05, + "loss": 1.0309, + "step": 414 + }, + { + "epoch": 0.4269547325102881, + "grad_norm": 18.122159957885742, + "learning_rate": 1.4974039460020767e-05, + "loss": 2.0714, + "step": 415 + }, + { + "epoch": 0.4279835390946502, + "grad_norm": 15.322531700134277, + "learning_rate": 1.501038421599169e-05, + "loss": 2.4046, + "step": 416 + }, + { + "epoch": 0.42901234567901236, + "grad_norm": 8.748584747314453, + "learning_rate": 1.5046728971962615e-05, + "loss": 0.3751, + "step": 417 + }, + { + "epoch": 0.43004115226337447, + "grad_norm": 11.135249137878418, + "learning_rate": 1.508307372793354e-05, + "loss": 0.9194, + "step": 418 + }, + { + "epoch": 0.43106995884773663, + "grad_norm": 2.534362554550171, + "learning_rate": 1.5119418483904463e-05, + "loss": 0.0876, + "step": 419 + }, + { + "epoch": 0.43209876543209874, + "grad_norm": 11.441560745239258, + "learning_rate": 1.5155763239875387e-05, + "loss": 1.0256, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_Qnli-dev_cosine_accuracy": 0.669921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.82877516746521, + "eval_Qnli-dev_cosine_ap": 0.7039769311253984, + "eval_Qnli-dev_cosine_f1": 0.6916524701873935, + "eval_Qnli-dev_cosine_f1_threshold": 0.760260820388794, + "eval_Qnli-dev_cosine_precision": 0.5783475783475783, + "eval_Qnli-dev_cosine_recall": 0.8601694915254238, + "eval_Qnli-dev_dot_accuracy": 0.65234375, + "eval_Qnli-dev_dot_accuracy_threshold": 410.722412109375, + "eval_Qnli-dev_dot_ap": 0.5998569073160949, + "eval_Qnli-dev_dot_f1": 0.6757679180887372, + "eval_Qnli-dev_dot_f1_threshold": 395.8144226074219, + "eval_Qnli-dev_dot_precision": 0.5657142857142857, + "eval_Qnli-dev_dot_recall": 0.8389830508474576, + "eval_Qnli-dev_euclidean_accuracy": 0.67578125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.777875900268555, + "eval_Qnli-dev_euclidean_ap": 0.7165015785234907, + "eval_Qnli-dev_euclidean_f1": 0.6878306878306878, + "eval_Qnli-dev_euclidean_f1_threshold": 15.445184707641602, + "eval_Qnli-dev_euclidean_precision": 0.5891238670694864, + "eval_Qnli-dev_euclidean_recall": 0.826271186440678, + "eval_Qnli-dev_manhattan_accuracy": 0.6796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 282.639892578125, + "eval_Qnli-dev_manhattan_ap": 0.7163039161971565, + "eval_Qnli-dev_manhattan_f1": 0.6854304635761589, + "eval_Qnli-dev_manhattan_f1_threshold": 328.2414855957031, + "eval_Qnli-dev_manhattan_precision": 0.5625, + "eval_Qnli-dev_manhattan_recall": 0.8771186440677966, + "eval_Qnli-dev_max_accuracy": 0.6796875, + "eval_Qnli-dev_max_accuracy_threshold": 410.722412109375, + "eval_Qnli-dev_max_ap": 0.7165015785234907, + "eval_Qnli-dev_max_f1": 0.6916524701873935, + "eval_Qnli-dev_max_f1_threshold": 395.8144226074219, + "eval_Qnli-dev_max_precision": 0.5891238670694864, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.712890625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9146419167518616, + "eval_allNLI-dev_cosine_ap": 0.5483284911998406, + "eval_allNLI-dev_cosine_f1": 0.5627376425855514, + "eval_allNLI-dev_cosine_f1_threshold": 0.8004182577133179, + "eval_allNLI-dev_cosine_precision": 0.4192634560906516, + "eval_allNLI-dev_cosine_recall": 0.8554913294797688, + "eval_allNLI-dev_dot_accuracy": 0.669921875, + "eval_allNLI-dev_dot_accuracy_threshold": 488.09686279296875, + "eval_allNLI-dev_dot_ap": 0.4365952351888237, + "eval_allNLI-dev_dot_f1": 0.5369774919614149, + "eval_allNLI-dev_dot_f1_threshold": 376.2415466308594, + "eval_allNLI-dev_dot_precision": 0.37193763919821826, + "eval_allNLI-dev_dot_recall": 0.9653179190751445, + "eval_allNLI-dev_euclidean_accuracy": 0.716796875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.584402084350586, + "eval_allNLI-dev_euclidean_ap": 0.552898886980593, + "eval_allNLI-dev_euclidean_f1": 0.5714285714285714, + "eval_allNLI-dev_euclidean_f1_threshold": 12.487444877624512, + "eval_allNLI-dev_euclidean_precision": 0.4978540772532189, + "eval_allNLI-dev_euclidean_recall": 0.6705202312138728, + "eval_allNLI-dev_manhattan_accuracy": 0.71484375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 189.19187927246094, + "eval_allNLI-dev_manhattan_ap": 0.5494702966109603, + "eval_allNLI-dev_manhattan_f1": 0.5689655172413792, + "eval_allNLI-dev_manhattan_f1_threshold": 273.392578125, + "eval_allNLI-dev_manhattan_precision": 0.4536082474226804, + "eval_allNLI-dev_manhattan_recall": 0.7630057803468208, + "eval_allNLI-dev_max_accuracy": 0.716796875, + "eval_allNLI-dev_max_accuracy_threshold": 488.09686279296875, + "eval_allNLI-dev_max_ap": 0.552898886980593, + "eval_allNLI-dev_max_f1": 0.5714285714285714, + "eval_allNLI-dev_max_f1_threshold": 376.2415466308594, + "eval_allNLI-dev_max_precision": 0.4978540772532189, + "eval_allNLI-dev_max_recall": 0.9653179190751445, + "eval_sequential_score": 0.7165015785234907, + "eval_sts-test_pearson_cosine": 0.7669433117508329, + "eval_sts-test_pearson_dot": 0.7202911200663573, + "eval_sts-test_pearson_euclidean": 0.7808069960273615, + "eval_sts-test_pearson_manhattan": 0.7749037661158105, + "eval_sts-test_pearson_max": 0.7808069960273615, + "eval_sts-test_spearman_cosine": 0.7837364484620162, + "eval_sts-test_spearman_dot": 0.6980170630634619, + "eval_sts-test_spearman_euclidean": 0.7726895995875491, + "eval_sts-test_spearman_manhattan": 0.7648159284045623, + "eval_sts-test_spearman_max": 0.7837364484620162, + "eval_vitaminc-pairs_loss": 3.1363868713378906, + "eval_vitaminc-pairs_runtime": 3.1813, + "eval_vitaminc-pairs_samples_per_second": 40.235, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_negation-triplets_loss": 1.596004843711853, + "eval_negation-triplets_runtime": 0.7373, + "eval_negation-triplets_samples_per_second": 173.6, + "eval_negation-triplets_steps_per_second": 1.356, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_scitail-pairs-pos_loss": 0.25697287917137146, + "eval_scitail-pairs-pos_runtime": 0.8101, + "eval_scitail-pairs-pos_samples_per_second": 158.012, + "eval_scitail-pairs-pos_steps_per_second": 1.234, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_scitail-pairs-qa_loss": 0.025879494845867157, + "eval_scitail-pairs-qa_runtime": 0.5751, + "eval_scitail-pairs-qa_samples_per_second": 222.558, + "eval_scitail-pairs-qa_steps_per_second": 1.739, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_xsum-pairs_loss": 1.168808102607727, + "eval_xsum-pairs_runtime": 3.0187, + "eval_xsum-pairs_samples_per_second": 42.402, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_sciq_pairs_loss": 0.1468716710805893, + "eval_sciq_pairs_runtime": 3.3982, + "eval_sciq_pairs_samples_per_second": 37.667, + "eval_sciq_pairs_steps_per_second": 0.294, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_qasc_pairs_loss": 0.9004628658294678, + "eval_qasc_pairs_runtime": 0.6001, + "eval_qasc_pairs_samples_per_second": 213.312, + "eval_qasc_pairs_steps_per_second": 1.666, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_openbookqa_pairs_loss": 1.666839361190796, + "eval_openbookqa_pairs_runtime": 0.572, + "eval_openbookqa_pairs_samples_per_second": 223.786, + "eval_openbookqa_pairs_steps_per_second": 1.748, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_msmarco_pairs_loss": 1.7236199378967285, + "eval_msmarco_pairs_runtime": 1.524, + "eval_msmarco_pairs_samples_per_second": 83.991, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_nq_pairs_loss": 2.0736727714538574, + "eval_nq_pairs_runtime": 2.901, + "eval_nq_pairs_samples_per_second": 44.123, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_trivia_pairs_loss": 1.4644691944122314, + "eval_trivia_pairs_runtime": 3.4467, + "eval_trivia_pairs_samples_per_second": 37.137, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_gooaq_pairs_loss": 1.197310209274292, + "eval_gooaq_pairs_runtime": 0.9409, + "eval_gooaq_pairs_samples_per_second": 136.041, + "eval_gooaq_pairs_steps_per_second": 1.063, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_paws-pos_loss": 0.04723614454269409, + "eval_paws-pos_runtime": 0.6946, + "eval_paws-pos_samples_per_second": 184.277, + "eval_paws-pos_steps_per_second": 1.44, + "step": 420 + }, + { + "epoch": 0.43209876543209874, + "eval_global_dataset_loss": 0.8761696219444275, + "eval_global_dataset_runtime": 13.3489, + "eval_global_dataset_samples_per_second": 31.164, + "eval_global_dataset_steps_per_second": 0.3, + "step": 420 + }, + { + "epoch": 0.4331275720164609, + "grad_norm": 11.209942817687988, + "learning_rate": 1.5192107995846313e-05, + "loss": 0.75, + "step": 421 + }, + { + "epoch": 0.43415637860082307, + "grad_norm": 12.36668872833252, + "learning_rate": 1.5228452751817235e-05, + "loss": 1.0858, + "step": 422 + }, + { + "epoch": 0.4351851851851852, + "grad_norm": 20.955825805664062, + "learning_rate": 1.526479750778816e-05, + "loss": 2.2561, + "step": 423 + }, + { + "epoch": 0.43621399176954734, + "grad_norm": 6.835966110229492, + "learning_rate": 1.5301142263759087e-05, + "loss": 0.2822, + "step": 424 + }, + { + "epoch": 0.43724279835390945, + "grad_norm": 14.905786514282227, + "learning_rate": 1.5337487019730007e-05, + "loss": 1.286, + "step": 425 + }, + { + "epoch": 0.4382716049382716, + "grad_norm": 16.917980194091797, + "learning_rate": 1.537383177570093e-05, + "loss": 1.6777, + "step": 426 + }, + { + "epoch": 0.43930041152263377, + "grad_norm": 16.51511573791504, + "learning_rate": 1.541017653167186e-05, + "loss": 1.609, + "step": 427 + }, + { + "epoch": 0.4403292181069959, + "grad_norm": 12.164166450500488, + "learning_rate": 1.544652128764278e-05, + "loss": 0.8033, + "step": 428 + }, + { + "epoch": 0.44135802469135804, + "grad_norm": 23.55919647216797, + "learning_rate": 1.5482866043613707e-05, + "loss": 3.845, + "step": 429 + }, + { + "epoch": 0.44238683127572015, + "grad_norm": 12.458250999450684, + "learning_rate": 1.551921079958463e-05, + "loss": 1.0592, + "step": 430 + }, + { + "epoch": 0.4434156378600823, + "grad_norm": 11.092578887939453, + "learning_rate": 1.5555555555555555e-05, + "loss": 0.7032, + "step": 431 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 1.7349342107772827, + "learning_rate": 1.559190031152648e-05, + "loss": 0.0603, + "step": 432 + }, + { + "epoch": 0.4454732510288066, + "grad_norm": 18.856273651123047, + "learning_rate": 1.5628245067497403e-05, + "loss": 2.1237, + "step": 433 + }, + { + "epoch": 0.44650205761316875, + "grad_norm": 14.356558799743652, + "learning_rate": 1.5664589823468327e-05, + "loss": 1.3715, + "step": 434 + }, + { + "epoch": 0.44753086419753085, + "grad_norm": 17.297388076782227, + "learning_rate": 1.570093457943925e-05, + "loss": 2.1316, + "step": 435 + }, + { + "epoch": 0.448559670781893, + "grad_norm": 14.070610046386719, + "learning_rate": 1.5737279335410175e-05, + "loss": 1.2195, + "step": 436 + }, + { + "epoch": 0.4495884773662551, + "grad_norm": 19.30897331237793, + "learning_rate": 1.57736240913811e-05, + "loss": 3.4846, + "step": 437 + }, + { + "epoch": 0.4506172839506173, + "grad_norm": 14.064212799072266, + "learning_rate": 1.5809968847352023e-05, + "loss": 2.1621, + "step": 438 + }, + { + "epoch": 0.45164609053497945, + "grad_norm": 21.56043815612793, + "learning_rate": 1.5846313603322947e-05, + "loss": 2.6488, + "step": 439 + }, + { + "epoch": 0.45267489711934156, + "grad_norm": 13.354293823242188, + "learning_rate": 1.588265835929387e-05, + "loss": 1.1381, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_Qnli-dev_cosine_accuracy": 0.6796875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8328311443328857, + "eval_Qnli-dev_cosine_ap": 0.7086015466946073, + "eval_Qnli-dev_cosine_f1": 0.684297520661157, + "eval_Qnli-dev_cosine_f1_threshold": 0.7668030858039856, + "eval_Qnli-dev_cosine_precision": 0.5609756097560976, + "eval_Qnli-dev_cosine_recall": 0.8771186440677966, + "eval_Qnli-dev_dot_accuracy": 0.638671875, + "eval_Qnli-dev_dot_accuracy_threshold": 458.50982666015625, + "eval_Qnli-dev_dot_ap": 0.5955959473763655, + "eval_Qnli-dev_dot_f1": 0.6643598615916955, + "eval_Qnli-dev_dot_f1_threshold": 421.7713623046875, + "eval_Qnli-dev_dot_precision": 0.5614035087719298, + "eval_Qnli-dev_dot_recall": 0.8135593220338984, + "eval_Qnli-dev_euclidean_accuracy": 0.681640625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.273839950561523, + "eval_Qnli-dev_euclidean_ap": 0.7198905948186887, + "eval_Qnli-dev_euclidean_f1": 0.686106346483705, + "eval_Qnli-dev_euclidean_f1_threshold": 15.542667388916016, + "eval_Qnli-dev_euclidean_precision": 0.5763688760806917, + "eval_Qnli-dev_euclidean_recall": 0.847457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.67578125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 271.1224060058594, + "eval_Qnli-dev_manhattan_ap": 0.7163103084602304, + "eval_Qnli-dev_manhattan_f1": 0.689655172413793, + "eval_Qnli-dev_manhattan_f1_threshold": 314.0755310058594, + "eval_Qnli-dev_manhattan_precision": 0.5813953488372093, + "eval_Qnli-dev_manhattan_recall": 0.847457627118644, + "eval_Qnli-dev_max_accuracy": 0.681640625, + "eval_Qnli-dev_max_accuracy_threshold": 458.50982666015625, + "eval_Qnli-dev_max_ap": 0.7198905948186887, + "eval_Qnli-dev_max_f1": 0.689655172413793, + "eval_Qnli-dev_max_f1_threshold": 421.7713623046875, + "eval_Qnli-dev_max_precision": 0.5813953488372093, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.712890625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9271770715713501, + "eval_allNLI-dev_cosine_ap": 0.5491311356331465, + "eval_allNLI-dev_cosine_f1": 0.5708245243128964, + "eval_allNLI-dev_cosine_f1_threshold": 0.8302508592605591, + "eval_allNLI-dev_cosine_precision": 0.45, + "eval_allNLI-dev_cosine_recall": 0.7803468208092486, + "eval_allNLI-dev_dot_accuracy": 0.669921875, + "eval_allNLI-dev_dot_accuracy_threshold": 526.2755126953125, + "eval_allNLI-dev_dot_ap": 0.4493470756775462, + "eval_allNLI-dev_dot_f1": 0.5302782324058919, + "eval_allNLI-dev_dot_f1_threshold": 409.7859802246094, + "eval_allNLI-dev_dot_precision": 0.3698630136986301, + "eval_allNLI-dev_dot_recall": 0.9364161849710982, + "eval_allNLI-dev_euclidean_accuracy": 0.712890625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 8.942924499511719, + "eval_allNLI-dev_euclidean_ap": 0.5529455148329905, + "eval_allNLI-dev_euclidean_f1": 0.5751633986928105, + "eval_allNLI-dev_euclidean_f1_threshold": 13.387319564819336, + "eval_allNLI-dev_euclidean_precision": 0.46153846153846156, + "eval_allNLI-dev_euclidean_recall": 0.7630057803468208, + "eval_allNLI-dev_manhattan_accuracy": 0.71875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 201.3372039794922, + "eval_allNLI-dev_manhattan_ap": 0.5535429305922366, + "eval_allNLI-dev_manhattan_f1": 0.5675057208237987, + "eval_allNLI-dev_manhattan_f1_threshold": 263.4776611328125, + "eval_allNLI-dev_manhattan_precision": 0.4696969696969697, + "eval_allNLI-dev_manhattan_recall": 0.7167630057803468, + "eval_allNLI-dev_max_accuracy": 0.71875, + "eval_allNLI-dev_max_accuracy_threshold": 526.2755126953125, + "eval_allNLI-dev_max_ap": 0.5535429305922366, + "eval_allNLI-dev_max_f1": 0.5751633986928105, + "eval_allNLI-dev_max_f1_threshold": 409.7859802246094, + "eval_allNLI-dev_max_precision": 0.4696969696969697, + "eval_allNLI-dev_max_recall": 0.9364161849710982, + "eval_sequential_score": 0.7198905948186887, + "eval_sts-test_pearson_cosine": 0.7695750855183039, + "eval_sts-test_pearson_dot": 0.7238414788807679, + "eval_sts-test_pearson_euclidean": 0.7903775285225014, + "eval_sts-test_pearson_manhattan": 0.7850145963227658, + "eval_sts-test_pearson_max": 0.7903775285225014, + "eval_sts-test_spearman_cosine": 0.7907903212875741, + "eval_sts-test_spearman_dot": 0.7040900777418432, + "eval_sts-test_spearman_euclidean": 0.7829523168599161, + "eval_sts-test_spearman_manhattan": 0.7755189085864977, + "eval_sts-test_spearman_max": 0.7907903212875741, + "eval_vitaminc-pairs_loss": 3.1597630977630615, + "eval_vitaminc-pairs_runtime": 3.1844, + "eval_vitaminc-pairs_samples_per_second": 40.196, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_negation-triplets_loss": 1.5735217332839966, + "eval_negation-triplets_runtime": 0.7333, + "eval_negation-triplets_samples_per_second": 174.547, + "eval_negation-triplets_steps_per_second": 1.364, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_scitail-pairs-pos_loss": 0.23437997698783875, + "eval_scitail-pairs-pos_runtime": 0.8055, + "eval_scitail-pairs-pos_samples_per_second": 158.909, + "eval_scitail-pairs-pos_steps_per_second": 1.241, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_scitail-pairs-qa_loss": 0.026873519644141197, + "eval_scitail-pairs-qa_runtime": 0.5709, + "eval_scitail-pairs-qa_samples_per_second": 224.215, + "eval_scitail-pairs-qa_steps_per_second": 1.752, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_xsum-pairs_loss": 1.0596333742141724, + "eval_xsum-pairs_runtime": 3.0101, + "eval_xsum-pairs_samples_per_second": 42.523, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_sciq_pairs_loss": 0.14231224358081818, + "eval_sciq_pairs_runtime": 3.4147, + "eval_sciq_pairs_samples_per_second": 37.485, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_qasc_pairs_loss": 0.8660905361175537, + "eval_qasc_pairs_runtime": 0.5984, + "eval_qasc_pairs_samples_per_second": 213.886, + "eval_qasc_pairs_steps_per_second": 1.671, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_openbookqa_pairs_loss": 1.5507510900497437, + "eval_openbookqa_pairs_runtime": 0.576, + "eval_openbookqa_pairs_samples_per_second": 222.233, + "eval_openbookqa_pairs_steps_per_second": 1.736, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_msmarco_pairs_loss": 1.6068974733352661, + "eval_msmarco_pairs_runtime": 1.5129, + "eval_msmarco_pairs_samples_per_second": 84.608, + "eval_msmarco_pairs_steps_per_second": 0.661, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_nq_pairs_loss": 2.067472457885742, + "eval_nq_pairs_runtime": 2.8922, + "eval_nq_pairs_samples_per_second": 44.258, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_trivia_pairs_loss": 1.4165655374526978, + "eval_trivia_pairs_runtime": 3.4314, + "eval_trivia_pairs_samples_per_second": 37.303, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_gooaq_pairs_loss": 1.204696536064148, + "eval_gooaq_pairs_runtime": 0.9383, + "eval_gooaq_pairs_samples_per_second": 136.423, + "eval_gooaq_pairs_steps_per_second": 1.066, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_paws-pos_loss": 0.04588289558887482, + "eval_paws-pos_runtime": 0.6831, + "eval_paws-pos_samples_per_second": 187.395, + "eval_paws-pos_steps_per_second": 1.464, + "step": 440 + }, + { + "epoch": 0.45267489711934156, + "eval_global_dataset_loss": 0.8645310997962952, + "eval_global_dataset_runtime": 13.3758, + "eval_global_dataset_samples_per_second": 31.101, + "eval_global_dataset_steps_per_second": 0.299, + "step": 440 + }, + { + "epoch": 0.4537037037037037, + "grad_norm": 14.464020729064941, + "learning_rate": 1.5919003115264795e-05, + "loss": 1.7843, + "step": 441 + }, + { + "epoch": 0.4547325102880658, + "grad_norm": 15.444217681884766, + "learning_rate": 1.595534787123572e-05, + "loss": 1.6954, + "step": 442 + }, + { + "epoch": 0.455761316872428, + "grad_norm": 10.515376091003418, + "learning_rate": 1.5991692627206643e-05, + "loss": 0.8673, + "step": 443 + }, + { + "epoch": 0.4567901234567901, + "grad_norm": 10.527128219604492, + "learning_rate": 1.6028037383177567e-05, + "loss": 0.8696, + "step": 444 + }, + { + "epoch": 0.45781893004115226, + "grad_norm": 13.480452537536621, + "learning_rate": 1.6064382139148495e-05, + "loss": 1.5461, + "step": 445 + }, + { + "epoch": 0.4588477366255144, + "grad_norm": 11.253717422485352, + "learning_rate": 1.6100726895119415e-05, + "loss": 0.9683, + "step": 446 + }, + { + "epoch": 0.45987654320987653, + "grad_norm": 12.138679504394531, + "learning_rate": 1.6137071651090343e-05, + "loss": 1.2983, + "step": 447 + }, + { + "epoch": 0.4609053497942387, + "grad_norm": 2.5345211029052734, + "learning_rate": 1.6173416407061267e-05, + "loss": 0.0942, + "step": 448 + }, + { + "epoch": 0.4619341563786008, + "grad_norm": 10.980514526367188, + "learning_rate": 1.6209761163032187e-05, + "loss": 0.8264, + "step": 449 + }, + { + "epoch": 0.46296296296296297, + "grad_norm": 16.59669303894043, + "learning_rate": 1.6246105919003115e-05, + "loss": 2.1522, + "step": 450 + }, + { + "epoch": 0.46399176954732513, + "grad_norm": 21.501604080200195, + "learning_rate": 1.628245067497404e-05, + "loss": 2.6668, + "step": 451 + }, + { + "epoch": 0.46502057613168724, + "grad_norm": 11.803515434265137, + "learning_rate": 1.6318795430944963e-05, + "loss": 0.9999, + "step": 452 + }, + { + "epoch": 0.4660493827160494, + "grad_norm": 13.230558395385742, + "learning_rate": 1.6355140186915887e-05, + "loss": 0.9551, + "step": 453 + }, + { + "epoch": 0.4670781893004115, + "grad_norm": 11.019618034362793, + "learning_rate": 1.639148494288681e-05, + "loss": 0.8174, + "step": 454 + }, + { + "epoch": 0.46810699588477367, + "grad_norm": 14.335307121276855, + "learning_rate": 1.6427829698857735e-05, + "loss": 1.6169, + "step": 455 + }, + { + "epoch": 0.4691358024691358, + "grad_norm": 5.958987236022949, + "learning_rate": 1.646417445482866e-05, + "loss": 0.2584, + "step": 456 + }, + { + "epoch": 0.47016460905349794, + "grad_norm": 14.919219970703125, + "learning_rate": 1.6500519210799583e-05, + "loss": 1.2947, + "step": 457 + }, + { + "epoch": 0.4711934156378601, + "grad_norm": 12.892438888549805, + "learning_rate": 1.6536863966770507e-05, + "loss": 1.0283, + "step": 458 + }, + { + "epoch": 0.4722222222222222, + "grad_norm": 12.579314231872559, + "learning_rate": 1.657320872274143e-05, + "loss": 1.0379, + "step": 459 + }, + { + "epoch": 0.4732510288065844, + "grad_norm": 17.32071876525879, + "learning_rate": 1.6609553478712355e-05, + "loss": 2.4063, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_Qnli-dev_cosine_accuracy": 0.66015625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8584603071212769, + "eval_Qnli-dev_cosine_ap": 0.6987965309483122, + "eval_Qnli-dev_cosine_f1": 0.6860068259385665, + "eval_Qnli-dev_cosine_f1_threshold": 0.767835259437561, + "eval_Qnli-dev_cosine_precision": 0.5742857142857143, + "eval_Qnli-dev_cosine_recall": 0.8516949152542372, + "eval_Qnli-dev_dot_accuracy": 0.64453125, + "eval_Qnli-dev_dot_accuracy_threshold": 446.875, + "eval_Qnli-dev_dot_ap": 0.5901482043145834, + "eval_Qnli-dev_dot_f1": 0.6643109540636043, + "eval_Qnli-dev_dot_f1_threshold": 406.9656982421875, + "eval_Qnli-dev_dot_precision": 0.5696969696969697, + "eval_Qnli-dev_dot_recall": 0.7966101694915254, + "eval_Qnli-dev_euclidean_accuracy": 0.66796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 11.938894271850586, + "eval_Qnli-dev_euclidean_ap": 0.7074711630770054, + "eval_Qnli-dev_euclidean_f1": 0.6917808219178082, + "eval_Qnli-dev_euclidean_f1_threshold": 15.646432876586914, + "eval_Qnli-dev_euclidean_precision": 0.5804597701149425, + "eval_Qnli-dev_euclidean_recall": 0.8559322033898306, + "eval_Qnli-dev_manhattan_accuracy": 0.6640625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 258.43310546875, + "eval_Qnli-dev_manhattan_ap": 0.7074831376971712, + "eval_Qnli-dev_manhattan_f1": 0.6837606837606838, + "eval_Qnli-dev_manhattan_f1_threshold": 317.3417053222656, + "eval_Qnli-dev_manhattan_precision": 0.5730659025787965, + "eval_Qnli-dev_manhattan_recall": 0.847457627118644, + "eval_Qnli-dev_max_accuracy": 0.66796875, + "eval_Qnli-dev_max_accuracy_threshold": 446.875, + "eval_Qnli-dev_max_ap": 0.7074831376971712, + "eval_Qnli-dev_max_f1": 0.6917808219178082, + "eval_Qnli-dev_max_f1_threshold": 406.9656982421875, + "eval_Qnli-dev_max_precision": 0.5804597701149425, + "eval_Qnli-dev_max_recall": 0.8559322033898306, + "eval_allNLI-dev_cosine_accuracy": 0.71484375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9075762033462524, + "eval_allNLI-dev_cosine_ap": 0.5556209722685957, + "eval_allNLI-dev_cosine_f1": 0.5688487584650113, + "eval_allNLI-dev_cosine_f1_threshold": 0.822675347328186, + "eval_allNLI-dev_cosine_precision": 0.4666666666666667, + "eval_allNLI-dev_cosine_recall": 0.7283236994219653, + "eval_allNLI-dev_dot_accuracy": 0.666015625, + "eval_allNLI-dev_dot_accuracy_threshold": 479.59765625, + "eval_allNLI-dev_dot_ap": 0.4395722804668881, + "eval_allNLI-dev_dot_f1": 0.5365853658536586, + "eval_allNLI-dev_dot_f1_threshold": 378.3732604980469, + "eval_allNLI-dev_dot_precision": 0.38403990024937656, + "eval_allNLI-dev_dot_recall": 0.8901734104046243, + "eval_allNLI-dev_euclidean_accuracy": 0.7265625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 9.884578704833984, + "eval_allNLI-dev_euclidean_ap": 0.5607678101966321, + "eval_allNLI-dev_euclidean_f1": 0.5841121495327103, + "eval_allNLI-dev_euclidean_f1_threshold": 13.222391128540039, + "eval_allNLI-dev_euclidean_precision": 0.49019607843137253, + "eval_allNLI-dev_euclidean_recall": 0.7225433526011561, + "eval_allNLI-dev_manhattan_accuracy": 0.71875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 207.45559692382812, + "eval_allNLI-dev_manhattan_ap": 0.5594530995989421, + "eval_allNLI-dev_manhattan_f1": 0.5664062499999999, + "eval_allNLI-dev_manhattan_f1_threshold": 298.21136474609375, + "eval_allNLI-dev_manhattan_precision": 0.4277286135693215, + "eval_allNLI-dev_manhattan_recall": 0.838150289017341, + "eval_allNLI-dev_max_accuracy": 0.7265625, + "eval_allNLI-dev_max_accuracy_threshold": 479.59765625, + "eval_allNLI-dev_max_ap": 0.5607678101966321, + "eval_allNLI-dev_max_f1": 0.5841121495327103, + "eval_allNLI-dev_max_f1_threshold": 378.3732604980469, + "eval_allNLI-dev_max_precision": 0.49019607843137253, + "eval_allNLI-dev_max_recall": 0.8901734104046243, + "eval_sequential_score": 0.7074831376971712, + "eval_sts-test_pearson_cosine": 0.7825801667596759, + "eval_sts-test_pearson_dot": 0.7405682776064579, + "eval_sts-test_pearson_euclidean": 0.8045260928771718, + "eval_sts-test_pearson_manhattan": 0.8000832846763656, + "eval_sts-test_pearson_max": 0.8045260928771718, + "eval_sts-test_spearman_cosine": 0.8065376636535482, + "eval_sts-test_spearman_dot": 0.7210651262128288, + "eval_sts-test_spearman_euclidean": 0.7970397901896217, + "eval_sts-test_spearman_manhattan": 0.790139056180545, + "eval_sts-test_spearman_max": 0.8065376636535482, + "eval_vitaminc-pairs_loss": 3.132262945175171, + "eval_vitaminc-pairs_runtime": 3.1567, + "eval_vitaminc-pairs_samples_per_second": 40.548, + "eval_vitaminc-pairs_steps_per_second": 0.317, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_negation-triplets_loss": 1.4925687313079834, + "eval_negation-triplets_runtime": 0.7314, + "eval_negation-triplets_samples_per_second": 175.004, + "eval_negation-triplets_steps_per_second": 1.367, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_scitail-pairs-pos_loss": 0.20003551244735718, + "eval_scitail-pairs-pos_runtime": 0.7903, + "eval_scitail-pairs-pos_samples_per_second": 161.967, + "eval_scitail-pairs-pos_steps_per_second": 1.265, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_scitail-pairs-qa_loss": 0.019925443455576897, + "eval_scitail-pairs-qa_runtime": 0.5973, + "eval_scitail-pairs-qa_samples_per_second": 214.291, + "eval_scitail-pairs-qa_steps_per_second": 1.674, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_xsum-pairs_loss": 1.011654019355774, + "eval_xsum-pairs_runtime": 3.0219, + "eval_xsum-pairs_samples_per_second": 42.358, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_sciq_pairs_loss": 0.1401093304157257, + "eval_sciq_pairs_runtime": 3.4024, + "eval_sciq_pairs_samples_per_second": 37.621, + "eval_sciq_pairs_steps_per_second": 0.294, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_qasc_pairs_loss": 0.8895432949066162, + "eval_qasc_pairs_runtime": 0.5956, + "eval_qasc_pairs_samples_per_second": 214.909, + "eval_qasc_pairs_steps_per_second": 1.679, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_openbookqa_pairs_loss": 1.5750139951705933, + "eval_openbookqa_pairs_runtime": 0.5749, + "eval_openbookqa_pairs_samples_per_second": 222.663, + "eval_openbookqa_pairs_steps_per_second": 1.74, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_msmarco_pairs_loss": 1.5957564115524292, + "eval_msmarco_pairs_runtime": 1.5166, + "eval_msmarco_pairs_samples_per_second": 84.401, + "eval_msmarco_pairs_steps_per_second": 0.659, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_nq_pairs_loss": 1.8501969575881958, + "eval_nq_pairs_runtime": 2.9017, + "eval_nq_pairs_samples_per_second": 44.113, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_trivia_pairs_loss": 1.3718889951705933, + "eval_trivia_pairs_runtime": 3.4316, + "eval_trivia_pairs_samples_per_second": 37.3, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_gooaq_pairs_loss": 1.0226097106933594, + "eval_gooaq_pairs_runtime": 0.9449, + "eval_gooaq_pairs_samples_per_second": 135.461, + "eval_gooaq_pairs_steps_per_second": 1.058, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_paws-pos_loss": 0.04436105117201805, + "eval_paws-pos_runtime": 0.6816, + "eval_paws-pos_samples_per_second": 187.781, + "eval_paws-pos_steps_per_second": 1.467, + "step": 460 + }, + { + "epoch": 0.4732510288065844, + "eval_global_dataset_loss": 0.8342341184616089, + "eval_global_dataset_runtime": 13.3662, + "eval_global_dataset_samples_per_second": 31.123, + "eval_global_dataset_steps_per_second": 0.299, + "step": 460 + }, + { + "epoch": 0.4742798353909465, + "grad_norm": 20.145654678344727, + "learning_rate": 1.664589823468328e-05, + "loss": 3.1972, + "step": 461 + }, + { + "epoch": 0.47530864197530864, + "grad_norm": 10.836761474609375, + "learning_rate": 1.6682242990654203e-05, + "loss": 0.6914, + "step": 462 + }, + { + "epoch": 0.4763374485596708, + "grad_norm": 13.806187629699707, + "learning_rate": 1.671858774662513e-05, + "loss": 2.1495, + "step": 463 + }, + { + "epoch": 0.4773662551440329, + "grad_norm": 15.314764022827148, + "learning_rate": 1.675493250259605e-05, + "loss": 1.9195, + "step": 464 + }, + { + "epoch": 0.4783950617283951, + "grad_norm": 2.3654873371124268, + "learning_rate": 1.6791277258566975e-05, + "loss": 0.0819, + "step": 465 + }, + { + "epoch": 0.4794238683127572, + "grad_norm": 6.35114860534668, + "learning_rate": 1.6827622014537902e-05, + "loss": 0.2882, + "step": 466 + }, + { + "epoch": 0.48045267489711935, + "grad_norm": 13.575540542602539, + "learning_rate": 1.6863966770508823e-05, + "loss": 1.3187, + "step": 467 + }, + { + "epoch": 0.48148148148148145, + "grad_norm": 13.726608276367188, + "learning_rate": 1.690031152647975e-05, + "loss": 2.0175, + "step": 468 + }, + { + "epoch": 0.4825102880658436, + "grad_norm": 12.422574996948242, + "learning_rate": 1.6936656282450674e-05, + "loss": 1.1298, + "step": 469 + }, + { + "epoch": 0.4835390946502058, + "grad_norm": 10.693941116333008, + "learning_rate": 1.69730010384216e-05, + "loss": 0.751, + "step": 470 + }, + { + "epoch": 0.4845679012345679, + "grad_norm": 17.281755447387695, + "learning_rate": 1.7009345794392523e-05, + "loss": 1.7641, + "step": 471 + }, + { + "epoch": 0.48559670781893005, + "grad_norm": 13.825311660766602, + "learning_rate": 1.7045690550363447e-05, + "loss": 1.2676, + "step": 472 + }, + { + "epoch": 0.48662551440329216, + "grad_norm": 13.023504257202148, + "learning_rate": 1.708203530633437e-05, + "loss": 1.2802, + "step": 473 + }, + { + "epoch": 0.4876543209876543, + "grad_norm": 6.976680755615234, + "learning_rate": 1.7118380062305295e-05, + "loss": 0.2798, + "step": 474 + }, + { + "epoch": 0.4886831275720165, + "grad_norm": 12.474639892578125, + "learning_rate": 1.715472481827622e-05, + "loss": 1.786, + "step": 475 + }, + { + "epoch": 0.4897119341563786, + "grad_norm": 11.611064910888672, + "learning_rate": 1.7191069574247143e-05, + "loss": 0.9421, + "step": 476 + }, + { + "epoch": 0.49074074074074076, + "grad_norm": 17.27467155456543, + "learning_rate": 1.7227414330218067e-05, + "loss": 1.8988, + "step": 477 + }, + { + "epoch": 0.49176954732510286, + "grad_norm": 11.986361503601074, + "learning_rate": 1.726375908618899e-05, + "loss": 1.0397, + "step": 478 + }, + { + "epoch": 0.492798353909465, + "grad_norm": 19.697477340698242, + "learning_rate": 1.7300103842159915e-05, + "loss": 2.2289, + "step": 479 + }, + { + "epoch": 0.49382716049382713, + "grad_norm": 11.965368270874023, + "learning_rate": 1.733644859813084e-05, + "loss": 0.8923, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_Qnli-dev_cosine_accuracy": 0.677734375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8423784971237183, + "eval_Qnli-dev_cosine_ap": 0.7145917918948612, + "eval_Qnli-dev_cosine_f1": 0.6890459363957597, + "eval_Qnli-dev_cosine_f1_threshold": 0.7728449106216431, + "eval_Qnli-dev_cosine_precision": 0.5909090909090909, + "eval_Qnli-dev_cosine_recall": 0.826271186440678, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 412.736083984375, + "eval_Qnli-dev_dot_ap": 0.6183902376998758, + "eval_Qnli-dev_dot_f1": 0.673040152963671, + "eval_Qnli-dev_dot_f1_threshold": 410.0682373046875, + "eval_Qnli-dev_dot_precision": 0.6132404181184669, + "eval_Qnli-dev_dot_recall": 0.7457627118644068, + "eval_Qnli-dev_euclidean_accuracy": 0.6796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.48289966583252, + "eval_Qnli-dev_euclidean_ap": 0.7218061519598871, + "eval_Qnli-dev_euclidean_f1": 0.6889279437609841, + "eval_Qnli-dev_euclidean_f1_threshold": 15.510814666748047, + "eval_Qnli-dev_euclidean_precision": 0.5885885885885885, + "eval_Qnli-dev_euclidean_recall": 0.8305084745762712, + "eval_Qnli-dev_manhattan_accuracy": 0.6796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 277.7557678222656, + "eval_Qnli-dev_manhattan_ap": 0.7243976667802744, + "eval_Qnli-dev_manhattan_f1": 0.6917808219178082, + "eval_Qnli-dev_manhattan_f1_threshold": 320.653564453125, + "eval_Qnli-dev_manhattan_precision": 0.5804597701149425, + "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, + "eval_Qnli-dev_max_accuracy": 0.6796875, + "eval_Qnli-dev_max_accuracy_threshold": 412.736083984375, + "eval_Qnli-dev_max_ap": 0.7243976667802744, + "eval_Qnli-dev_max_f1": 0.6917808219178082, + "eval_Qnli-dev_max_f1_threshold": 410.0682373046875, + "eval_Qnli-dev_max_precision": 0.6132404181184669, + "eval_Qnli-dev_max_recall": 0.8559322033898306, + "eval_allNLI-dev_cosine_accuracy": 0.712890625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9155895113945007, + "eval_allNLI-dev_cosine_ap": 0.5587323061807457, + "eval_allNLI-dev_cosine_f1": 0.569377990430622, + "eval_allNLI-dev_cosine_f1_threshold": 0.8306180238723755, + "eval_allNLI-dev_cosine_precision": 0.4857142857142857, + "eval_allNLI-dev_cosine_recall": 0.6878612716763006, + "eval_allNLI-dev_dot_accuracy": 0.669921875, + "eval_allNLI-dev_dot_accuracy_threshold": 486.188232421875, + "eval_allNLI-dev_dot_ap": 0.44895440516126245, + "eval_allNLI-dev_dot_f1": 0.5326633165829145, + "eval_allNLI-dev_dot_f1_threshold": 373.0961608886719, + "eval_allNLI-dev_dot_precision": 0.375, + "eval_allNLI-dev_dot_recall": 0.9190751445086706, + "eval_allNLI-dev_euclidean_accuracy": 0.720703125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.15213680267334, + "eval_allNLI-dev_euclidean_ap": 0.5670459225360986, + "eval_allNLI-dev_euclidean_f1": 0.5797101449275361, + "eval_allNLI-dev_euclidean_f1_threshold": 13.239068984985352, + "eval_allNLI-dev_euclidean_precision": 0.4979253112033195, + "eval_allNLI-dev_euclidean_recall": 0.6936416184971098, + "eval_allNLI-dev_manhattan_accuracy": 0.720703125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 198.6392822265625, + "eval_allNLI-dev_manhattan_ap": 0.5637178226555747, + "eval_allNLI-dev_manhattan_f1": 0.569620253164557, + "eval_allNLI-dev_manhattan_f1_threshold": 287.952392578125, + "eval_allNLI-dev_manhattan_precision": 0.4485049833887043, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.720703125, + "eval_allNLI-dev_max_accuracy_threshold": 486.188232421875, + "eval_allNLI-dev_max_ap": 0.5670459225360986, + "eval_allNLI-dev_max_f1": 0.5797101449275361, + "eval_allNLI-dev_max_f1_threshold": 373.0961608886719, + "eval_allNLI-dev_max_precision": 0.4979253112033195, + "eval_allNLI-dev_max_recall": 0.9190751445086706, + "eval_sequential_score": 0.7243976667802744, + "eval_sts-test_pearson_cosine": 0.7891034120839744, + "eval_sts-test_pearson_dot": 0.7520122002590104, + "eval_sts-test_pearson_euclidean": 0.8084749326758871, + "eval_sts-test_pearson_manhattan": 0.8035797835971765, + "eval_sts-test_pearson_max": 0.8084749326758871, + "eval_sts-test_spearman_cosine": 0.8092891054576755, + "eval_sts-test_spearman_dot": 0.729727493626578, + "eval_sts-test_spearman_euclidean": 0.7991726353075358, + "eval_sts-test_spearman_manhattan": 0.7930649384015762, + "eval_sts-test_spearman_max": 0.8092891054576755, + "eval_vitaminc-pairs_loss": 2.9393234252929688, + "eval_vitaminc-pairs_runtime": 3.181, + "eval_vitaminc-pairs_samples_per_second": 40.239, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_negation-triplets_loss": 1.4414068460464478, + "eval_negation-triplets_runtime": 0.752, + "eval_negation-triplets_samples_per_second": 170.205, + "eval_negation-triplets_steps_per_second": 1.33, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_scitail-pairs-pos_loss": 0.19124868512153625, + "eval_scitail-pairs-pos_runtime": 0.801, + "eval_scitail-pairs-pos_samples_per_second": 159.801, + "eval_scitail-pairs-pos_steps_per_second": 1.248, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_scitail-pairs-qa_loss": 0.015640273690223694, + "eval_scitail-pairs-qa_runtime": 0.5674, + "eval_scitail-pairs-qa_samples_per_second": 225.595, + "eval_scitail-pairs-qa_steps_per_second": 1.762, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_xsum-pairs_loss": 0.9755306839942932, + "eval_xsum-pairs_runtime": 3.0208, + "eval_xsum-pairs_samples_per_second": 42.373, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_sciq_pairs_loss": 0.14197379350662231, + "eval_sciq_pairs_runtime": 3.4128, + "eval_sciq_pairs_samples_per_second": 37.506, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_qasc_pairs_loss": 0.8245877623558044, + "eval_qasc_pairs_runtime": 0.6116, + "eval_qasc_pairs_samples_per_second": 209.289, + "eval_qasc_pairs_steps_per_second": 1.635, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_openbookqa_pairs_loss": 1.38233482837677, + "eval_openbookqa_pairs_runtime": 0.5798, + "eval_openbookqa_pairs_samples_per_second": 220.762, + "eval_openbookqa_pairs_steps_per_second": 1.725, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_msmarco_pairs_loss": 1.583013653755188, + "eval_msmarco_pairs_runtime": 1.5116, + "eval_msmarco_pairs_samples_per_second": 84.681, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_nq_pairs_loss": 1.8946471214294434, + "eval_nq_pairs_runtime": 2.889, + "eval_nq_pairs_samples_per_second": 44.307, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_trivia_pairs_loss": 1.2537095546722412, + "eval_trivia_pairs_runtime": 3.4426, + "eval_trivia_pairs_samples_per_second": 37.181, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_gooaq_pairs_loss": 0.978269636631012, + "eval_gooaq_pairs_runtime": 0.946, + "eval_gooaq_pairs_samples_per_second": 135.311, + "eval_gooaq_pairs_steps_per_second": 1.057, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_paws-pos_loss": 0.04379463195800781, + "eval_paws-pos_runtime": 0.6992, + "eval_paws-pos_samples_per_second": 183.062, + "eval_paws-pos_steps_per_second": 1.43, + "step": 480 + }, + { + "epoch": 0.49382716049382713, + "eval_global_dataset_loss": 0.784004807472229, + "eval_global_dataset_runtime": 13.3627, + "eval_global_dataset_samples_per_second": 31.131, + "eval_global_dataset_steps_per_second": 0.299, + "step": 480 + }, + { + "epoch": 0.4948559670781893, + "grad_norm": 15.596723556518555, + "learning_rate": 1.7372793354101766e-05, + "loss": 1.5281, + "step": 481 + }, + { + "epoch": 0.49588477366255146, + "grad_norm": 11.775968551635742, + "learning_rate": 1.7409138110072687e-05, + "loss": 1.4874, + "step": 482 + }, + { + "epoch": 0.49691358024691357, + "grad_norm": 10.956304550170898, + "learning_rate": 1.744548286604361e-05, + "loss": 1.3973, + "step": 483 + }, + { + "epoch": 0.49794238683127573, + "grad_norm": 9.66591739654541, + "learning_rate": 1.7481827622014538e-05, + "loss": 0.6967, + "step": 484 + }, + { + "epoch": 0.49897119341563784, + "grad_norm": 15.71474838256836, + "learning_rate": 1.751817237798546e-05, + "loss": 1.8954, + "step": 485 + }, + { + "epoch": 0.5, + "grad_norm": 16.29734992980957, + "learning_rate": 1.7554517133956383e-05, + "loss": 2.1666, + "step": 486 + }, + { + "epoch": 0.5010288065843621, + "grad_norm": 11.766134262084961, + "learning_rate": 1.759086188992731e-05, + "loss": 0.9414, + "step": 487 + }, + { + "epoch": 0.5020576131687243, + "grad_norm": 19.231468200683594, + "learning_rate": 1.762720664589823e-05, + "loss": 2.1697, + "step": 488 + }, + { + "epoch": 0.5030864197530864, + "grad_norm": 14.636868476867676, + "learning_rate": 1.7663551401869155e-05, + "loss": 2.2224, + "step": 489 + }, + { + "epoch": 0.5041152263374485, + "grad_norm": 9.892867088317871, + "learning_rate": 1.7699896157840082e-05, + "loss": 0.7158, + "step": 490 + }, + { + "epoch": 0.5051440329218106, + "grad_norm": 10.343125343322754, + "learning_rate": 1.7736240913811006e-05, + "loss": 0.8864, + "step": 491 + }, + { + "epoch": 0.5061728395061729, + "grad_norm": 11.846784591674805, + "learning_rate": 1.7772585669781927e-05, + "loss": 1.7706, + "step": 492 + }, + { + "epoch": 0.507201646090535, + "grad_norm": 11.437203407287598, + "learning_rate": 1.7808930425752854e-05, + "loss": 1.0602, + "step": 493 + }, + { + "epoch": 0.5082304526748971, + "grad_norm": 12.174988746643066, + "learning_rate": 1.784527518172378e-05, + "loss": 1.6377, + "step": 494 + }, + { + "epoch": 0.5092592592592593, + "grad_norm": 2.9324963092803955, + "learning_rate": 1.78816199376947e-05, + "loss": 0.1079, + "step": 495 + }, + { + "epoch": 0.5102880658436214, + "grad_norm": 9.480378150939941, + "learning_rate": 1.7917964693665626e-05, + "loss": 0.7662, + "step": 496 + }, + { + "epoch": 0.5113168724279835, + "grad_norm": 11.27574348449707, + "learning_rate": 1.795430944963655e-05, + "loss": 1.662, + "step": 497 + }, + { + "epoch": 0.5123456790123457, + "grad_norm": 11.860407829284668, + "learning_rate": 1.7990654205607474e-05, + "loss": 2.0872, + "step": 498 + }, + { + "epoch": 0.5133744855967078, + "grad_norm": 9.084991455078125, + "learning_rate": 1.80269989615784e-05, + "loss": 0.6517, + "step": 499 + }, + { + "epoch": 0.51440329218107, + "grad_norm": 10.730901718139648, + "learning_rate": 1.8063343717549322e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_Qnli-dev_cosine_accuracy": 0.69140625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8430161476135254, + "eval_Qnli-dev_cosine_ap": 0.7175942339872103, + "eval_Qnli-dev_cosine_f1": 0.6925795053003534, + "eval_Qnli-dev_cosine_f1_threshold": 0.7885958552360535, + "eval_Qnli-dev_cosine_precision": 0.593939393939394, + "eval_Qnli-dev_cosine_recall": 0.8305084745762712, + "eval_Qnli-dev_dot_accuracy": 0.654296875, + "eval_Qnli-dev_dot_accuracy_threshold": 444.99591064453125, + "eval_Qnli-dev_dot_ap": 0.6038099250184231, + "eval_Qnli-dev_dot_f1": 0.6687797147385103, + "eval_Qnli-dev_dot_f1_threshold": 398.5889892578125, + "eval_Qnli-dev_dot_precision": 0.5341772151898734, + "eval_Qnli-dev_dot_recall": 0.8940677966101694, + "eval_Qnli-dev_euclidean_accuracy": 0.6953125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.040979385375977, + "eval_Qnli-dev_euclidean_ap": 0.7271286610454261, + "eval_Qnli-dev_euclidean_f1": 0.6927175843694494, + "eval_Qnli-dev_euclidean_f1_threshold": 15.024581909179688, + "eval_Qnli-dev_euclidean_precision": 0.5963302752293578, + "eval_Qnli-dev_euclidean_recall": 0.826271186440678, + "eval_Qnli-dev_manhattan_accuracy": 0.697265625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 281.1022033691406, + "eval_Qnli-dev_manhattan_ap": 0.728019969713725, + "eval_Qnli-dev_manhattan_f1": 0.6934306569343065, + "eval_Qnli-dev_manhattan_f1_threshold": 299.29119873046875, + "eval_Qnli-dev_manhattan_precision": 0.6089743589743589, + "eval_Qnli-dev_manhattan_recall": 0.8050847457627118, + "eval_Qnli-dev_max_accuracy": 0.697265625, + "eval_Qnli-dev_max_accuracy_threshold": 444.99591064453125, + "eval_Qnli-dev_max_ap": 0.728019969713725, + "eval_Qnli-dev_max_f1": 0.6934306569343065, + "eval_Qnli-dev_max_f1_threshold": 398.5889892578125, + "eval_Qnli-dev_max_precision": 0.6089743589743589, + "eval_Qnli-dev_max_recall": 0.8940677966101694, + "eval_allNLI-dev_cosine_accuracy": 0.72265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8899899125099182, + "eval_allNLI-dev_cosine_ap": 0.5657808168208326, + "eval_allNLI-dev_cosine_f1": 0.5817409766454352, + "eval_allNLI-dev_cosine_f1_threshold": 0.8096699714660645, + "eval_allNLI-dev_cosine_precision": 0.4597315436241611, + "eval_allNLI-dev_cosine_recall": 0.791907514450867, + "eval_allNLI-dev_dot_accuracy": 0.67578125, + "eval_allNLI-dev_dot_accuracy_threshold": 478.03387451171875, + "eval_allNLI-dev_dot_ap": 0.4579389209157686, + "eval_allNLI-dev_dot_f1": 0.5588822355289421, + "eval_allNLI-dev_dot_f1_threshold": 411.92333984375, + "eval_allNLI-dev_dot_precision": 0.4268292682926829, + "eval_allNLI-dev_dot_recall": 0.8092485549132948, + "eval_allNLI-dev_euclidean_accuracy": 0.73046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.441316604614258, + "eval_allNLI-dev_euclidean_ap": 0.5691945577806491, + "eval_allNLI-dev_euclidean_f1": 0.5872340425531914, + "eval_allNLI-dev_euclidean_f1_threshold": 14.106014251708984, + "eval_allNLI-dev_euclidean_precision": 0.46464646464646464, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.7265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 219.55010986328125, + "eval_allNLI-dev_manhattan_ap": 0.5696159330415428, + "eval_allNLI-dev_manhattan_f1": 0.5752808988764044, + "eval_allNLI-dev_manhattan_f1_threshold": 278.9423828125, + "eval_allNLI-dev_manhattan_precision": 0.47058823529411764, + "eval_allNLI-dev_manhattan_recall": 0.7398843930635838, + "eval_allNLI-dev_max_accuracy": 0.73046875, + "eval_allNLI-dev_max_accuracy_threshold": 478.03387451171875, + "eval_allNLI-dev_max_ap": 0.5696159330415428, + "eval_allNLI-dev_max_f1": 0.5872340425531914, + "eval_allNLI-dev_max_f1_threshold": 411.92333984375, + "eval_allNLI-dev_max_precision": 0.47058823529411764, + "eval_allNLI-dev_max_recall": 0.8092485549132948, + "eval_sequential_score": 0.728019969713725, + "eval_sts-test_pearson_cosine": 0.7967354149956867, + "eval_sts-test_pearson_dot": 0.7587343105275375, + "eval_sts-test_pearson_euclidean": 0.8180154478758743, + "eval_sts-test_pearson_manhattan": 0.8161849279054585, + "eval_sts-test_pearson_max": 0.8180154478758743, + "eval_sts-test_spearman_cosine": 0.8158280702696641, + "eval_sts-test_spearman_dot": 0.7368859501500076, + "eval_sts-test_spearman_euclidean": 0.8091461699287915, + "eval_sts-test_spearman_manhattan": 0.8057763999460191, + "eval_sts-test_spearman_max": 0.8158280702696641, + "eval_vitaminc-pairs_loss": 2.9438083171844482, + "eval_vitaminc-pairs_runtime": 3.189, + "eval_vitaminc-pairs_samples_per_second": 40.138, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_negation-triplets_loss": 1.3407632112503052, + "eval_negation-triplets_runtime": 0.7388, + "eval_negation-triplets_samples_per_second": 173.246, + "eval_negation-triplets_steps_per_second": 1.353, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_scitail-pairs-pos_loss": 0.2115849405527115, + "eval_scitail-pairs-pos_runtime": 0.7971, + "eval_scitail-pairs-pos_samples_per_second": 160.573, + "eval_scitail-pairs-pos_steps_per_second": 1.254, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_scitail-pairs-qa_loss": 0.018660105764865875, + "eval_scitail-pairs-qa_runtime": 0.5693, + "eval_scitail-pairs-qa_samples_per_second": 224.852, + "eval_scitail-pairs-qa_steps_per_second": 1.757, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_xsum-pairs_loss": 0.9552733898162842, + "eval_xsum-pairs_runtime": 3.0194, + "eval_xsum-pairs_samples_per_second": 42.392, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_sciq_pairs_loss": 0.13849374651908875, + "eval_sciq_pairs_runtime": 3.458, + "eval_sciq_pairs_samples_per_second": 37.015, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_qasc_pairs_loss": 0.8119698166847229, + "eval_qasc_pairs_runtime": 0.5998, + "eval_qasc_pairs_samples_per_second": 213.389, + "eval_qasc_pairs_steps_per_second": 1.667, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_openbookqa_pairs_loss": 1.512932538986206, + "eval_openbookqa_pairs_runtime": 0.5734, + "eval_openbookqa_pairs_samples_per_second": 223.22, + "eval_openbookqa_pairs_steps_per_second": 1.744, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_msmarco_pairs_loss": 1.4880919456481934, + "eval_msmarco_pairs_runtime": 1.5132, + "eval_msmarco_pairs_samples_per_second": 84.588, + "eval_msmarco_pairs_steps_per_second": 0.661, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_nq_pairs_loss": 1.750890851020813, + "eval_nq_pairs_runtime": 2.8955, + "eval_nq_pairs_samples_per_second": 44.206, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_trivia_pairs_loss": 1.3733922243118286, + "eval_trivia_pairs_runtime": 3.4378, + "eval_trivia_pairs_samples_per_second": 37.233, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_gooaq_pairs_loss": 0.938717782497406, + "eval_gooaq_pairs_runtime": 0.95, + "eval_gooaq_pairs_samples_per_second": 134.741, + "eval_gooaq_pairs_steps_per_second": 1.053, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_paws-pos_loss": 0.04237303510308266, + "eval_paws-pos_runtime": 0.6799, + "eval_paws-pos_samples_per_second": 188.26, + "eval_paws-pos_steps_per_second": 1.471, + "step": 500 + }, + { + "epoch": 0.51440329218107, + "eval_global_dataset_loss": 0.7602720856666565, + "eval_global_dataset_runtime": 13.3515, + "eval_global_dataset_samples_per_second": 31.157, + "eval_global_dataset_steps_per_second": 0.3, + "step": 500 + }, + { + "epoch": 0.5154320987654321, + "grad_norm": 20.222156524658203, + "learning_rate": 1.8099688473520246e-05, + "loss": 3.6159, + "step": 501 + }, + { + "epoch": 0.5164609053497943, + "grad_norm": 6.827728271484375, + "learning_rate": 1.8136033229491174e-05, + "loss": 0.2539, + "step": 502 + }, + { + "epoch": 0.5174897119341564, + "grad_norm": 11.333172798156738, + "learning_rate": 1.8172377985462095e-05, + "loss": 0.8589, + "step": 503 + }, + { + "epoch": 0.5185185185185185, + "grad_norm": 12.576927185058594, + "learning_rate": 1.820872274143302e-05, + "loss": 1.7416, + "step": 504 + }, + { + "epoch": 0.5195473251028807, + "grad_norm": 15.945344924926758, + "learning_rate": 1.8245067497403946e-05, + "loss": 1.7693, + "step": 505 + }, + { + "epoch": 0.5205761316872428, + "grad_norm": 17.440074920654297, + "learning_rate": 1.8281412253374867e-05, + "loss": 1.5639, + "step": 506 + }, + { + "epoch": 0.5216049382716049, + "grad_norm": 11.141048431396484, + "learning_rate": 1.831775700934579e-05, + "loss": 0.8746, + "step": 507 + }, + { + "epoch": 0.522633744855967, + "grad_norm": 15.599634170532227, + "learning_rate": 1.8354101765316718e-05, + "loss": 1.5769, + "step": 508 + }, + { + "epoch": 0.5236625514403292, + "grad_norm": 10.608887672424316, + "learning_rate": 1.8390446521287642e-05, + "loss": 0.6175, + "step": 509 + }, + { + "epoch": 0.5246913580246914, + "grad_norm": 11.312731742858887, + "learning_rate": 1.8426791277258563e-05, + "loss": 0.8312, + "step": 510 + }, + { + "epoch": 0.5257201646090535, + "grad_norm": 9.91249942779541, + "learning_rate": 1.846313603322949e-05, + "loss": 0.867, + "step": 511 + }, + { + "epoch": 0.5267489711934157, + "grad_norm": 11.7357816696167, + "learning_rate": 1.8499480789200414e-05, + "loss": 1.2859, + "step": 512 + }, + { + "epoch": 0.5277777777777778, + "grad_norm": 21.4658203125, + "learning_rate": 1.8535825545171335e-05, + "loss": 2.2659, + "step": 513 + }, + { + "epoch": 0.5288065843621399, + "grad_norm": 18.00661849975586, + "learning_rate": 1.8572170301142262e-05, + "loss": 1.7138, + "step": 514 + }, + { + "epoch": 0.529835390946502, + "grad_norm": 7.337871074676514, + "learning_rate": 1.8608515057113186e-05, + "loss": 0.3393, + "step": 515 + }, + { + "epoch": 0.5308641975308642, + "grad_norm": 12.568946838378906, + "learning_rate": 1.864485981308411e-05, + "loss": 0.9776, + "step": 516 + }, + { + "epoch": 0.5318930041152263, + "grad_norm": 10.954802513122559, + "learning_rate": 1.8681204569055034e-05, + "loss": 0.6971, + "step": 517 + }, + { + "epoch": 0.5329218106995884, + "grad_norm": 10.687813758850098, + "learning_rate": 1.8717549325025958e-05, + "loss": 0.6725, + "step": 518 + }, + { + "epoch": 0.5339506172839507, + "grad_norm": 11.719423294067383, + "learning_rate": 1.8753894080996882e-05, + "loss": 0.6854, + "step": 519 + }, + { + "epoch": 0.5349794238683128, + "grad_norm": 16.232799530029297, + "learning_rate": 1.879023883696781e-05, + "loss": 1.7726, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_Qnli-dev_cosine_accuracy": 0.6875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8248189687728882, + "eval_Qnli-dev_cosine_ap": 0.7196731202506679, + "eval_Qnli-dev_cosine_f1": 0.6947368421052632, + "eval_Qnli-dev_cosine_f1_threshold": 0.7689546346664429, + "eval_Qnli-dev_cosine_precision": 0.592814371257485, + "eval_Qnli-dev_cosine_recall": 0.8389830508474576, + "eval_Qnli-dev_dot_accuracy": 0.66015625, + "eval_Qnli-dev_dot_accuracy_threshold": 419.0325927734375, + "eval_Qnli-dev_dot_ap": 0.616348530166337, + "eval_Qnli-dev_dot_f1": 0.6724436741767765, + "eval_Qnli-dev_dot_f1_threshold": 393.9245300292969, + "eval_Qnli-dev_dot_precision": 0.5689149560117303, + "eval_Qnli-dev_dot_recall": 0.8220338983050848, + "eval_Qnli-dev_euclidean_accuracy": 0.693359375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.748929977416992, + "eval_Qnli-dev_euclidean_ap": 0.7309618868427656, + "eval_Qnli-dev_euclidean_f1": 0.6943942133815552, + "eval_Qnli-dev_euclidean_f1_threshold": 15.1475830078125, + "eval_Qnli-dev_euclidean_precision": 0.6056782334384858, + "eval_Qnli-dev_euclidean_recall": 0.8135593220338984, + "eval_Qnli-dev_manhattan_accuracy": 0.685546875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 265.4633483886719, + "eval_Qnli-dev_manhattan_ap": 0.730577397962383, + "eval_Qnli-dev_manhattan_f1": 0.6940298507462687, + "eval_Qnli-dev_manhattan_f1_threshold": 303.4216613769531, + "eval_Qnli-dev_manhattan_precision": 0.62, + "eval_Qnli-dev_manhattan_recall": 0.788135593220339, + "eval_Qnli-dev_max_accuracy": 0.693359375, + "eval_Qnli-dev_max_accuracy_threshold": 419.0325927734375, + "eval_Qnli-dev_max_ap": 0.7309618868427656, + "eval_Qnli-dev_max_f1": 0.6947368421052632, + "eval_Qnli-dev_max_f1_threshold": 393.9245300292969, + "eval_Qnli-dev_max_precision": 0.62, + "eval_Qnli-dev_max_recall": 0.8389830508474576, + "eval_allNLI-dev_cosine_accuracy": 0.72265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8910384178161621, + "eval_allNLI-dev_cosine_ap": 0.5627746050790838, + "eval_allNLI-dev_cosine_f1": 0.5838509316770187, + "eval_allNLI-dev_cosine_f1_threshold": 0.8071809411048889, + "eval_allNLI-dev_cosine_precision": 0.45483870967741935, + "eval_allNLI-dev_cosine_recall": 0.815028901734104, + "eval_allNLI-dev_dot_accuracy": 0.67578125, + "eval_allNLI-dev_dot_accuracy_threshold": 508.07659912109375, + "eval_allNLI-dev_dot_ap": 0.4588019812939956, + "eval_allNLI-dev_dot_f1": 0.5421686746987953, + "eval_allNLI-dev_dot_f1_threshold": 413.8941650390625, + "eval_allNLI-dev_dot_precision": 0.4153846153846154, + "eval_allNLI-dev_dot_recall": 0.7803468208092486, + "eval_allNLI-dev_euclidean_accuracy": 0.73046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.289400100708008, + "eval_allNLI-dev_euclidean_ap": 0.5694426258019529, + "eval_allNLI-dev_euclidean_f1": 0.5922746781115881, + "eval_allNLI-dev_euclidean_f1_threshold": 14.015277862548828, + "eval_allNLI-dev_euclidean_precision": 0.4709897610921502, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 220.96963500976562, + "eval_allNLI-dev_manhattan_ap": 0.5723061411584658, + "eval_allNLI-dev_manhattan_f1": 0.5785876993166287, + "eval_allNLI-dev_manhattan_f1_threshold": 278.1605224609375, + "eval_allNLI-dev_manhattan_precision": 0.4774436090225564, + "eval_allNLI-dev_manhattan_recall": 0.7341040462427746, + "eval_allNLI-dev_max_accuracy": 0.732421875, + "eval_allNLI-dev_max_accuracy_threshold": 508.07659912109375, + "eval_allNLI-dev_max_ap": 0.5723061411584658, + "eval_allNLI-dev_max_f1": 0.5922746781115881, + "eval_allNLI-dev_max_f1_threshold": 413.8941650390625, + "eval_allNLI-dev_max_precision": 0.4774436090225564, + "eval_allNLI-dev_max_recall": 0.815028901734104, + "eval_sequential_score": 0.7309618868427656, + "eval_sts-test_pearson_cosine": 0.7944016690558295, + "eval_sts-test_pearson_dot": 0.7340676184460866, + "eval_sts-test_pearson_euclidean": 0.8206810004337891, + "eval_sts-test_pearson_manhattan": 0.8198751359187904, + "eval_sts-test_pearson_max": 0.8206810004337891, + "eval_sts-test_spearman_cosine": 0.8158374232832949, + "eval_sts-test_spearman_dot": 0.712276783998263, + "eval_sts-test_spearman_euclidean": 0.8117007509340581, + "eval_sts-test_spearman_manhattan": 0.8093512202084868, + "eval_sts-test_spearman_max": 0.8158374232832949, + "eval_vitaminc-pairs_loss": 2.9273321628570557, + "eval_vitaminc-pairs_runtime": 3.1718, + "eval_vitaminc-pairs_samples_per_second": 40.356, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_negation-triplets_loss": 1.3328778743743896, + "eval_negation-triplets_runtime": 0.7347, + "eval_negation-triplets_samples_per_second": 174.211, + "eval_negation-triplets_steps_per_second": 1.361, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_scitail-pairs-pos_loss": 0.18687528371810913, + "eval_scitail-pairs-pos_runtime": 0.8151, + "eval_scitail-pairs-pos_samples_per_second": 157.044, + "eval_scitail-pairs-pos_steps_per_second": 1.227, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_scitail-pairs-qa_loss": 0.014874367974698544, + "eval_scitail-pairs-qa_runtime": 0.5765, + "eval_scitail-pairs-qa_samples_per_second": 222.025, + "eval_scitail-pairs-qa_steps_per_second": 1.735, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_xsum-pairs_loss": 0.86911940574646, + "eval_xsum-pairs_runtime": 3.017, + "eval_xsum-pairs_samples_per_second": 42.427, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_sciq_pairs_loss": 0.14434820413589478, + "eval_sciq_pairs_runtime": 3.4284, + "eval_sciq_pairs_samples_per_second": 37.335, + "eval_sciq_pairs_steps_per_second": 0.292, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_qasc_pairs_loss": 0.7873150110244751, + "eval_qasc_pairs_runtime": 0.6008, + "eval_qasc_pairs_samples_per_second": 213.056, + "eval_qasc_pairs_steps_per_second": 1.665, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_openbookqa_pairs_loss": 1.5795769691467285, + "eval_openbookqa_pairs_runtime": 0.5771, + "eval_openbookqa_pairs_samples_per_second": 221.803, + "eval_openbookqa_pairs_steps_per_second": 1.733, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_msmarco_pairs_loss": 1.4810850620269775, + "eval_msmarco_pairs_runtime": 1.525, + "eval_msmarco_pairs_samples_per_second": 83.934, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_nq_pairs_loss": 1.7317595481872559, + "eval_nq_pairs_runtime": 2.8997, + "eval_nq_pairs_samples_per_second": 44.143, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_trivia_pairs_loss": 1.2999101877212524, + "eval_trivia_pairs_runtime": 3.4365, + "eval_trivia_pairs_samples_per_second": 37.247, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_gooaq_pairs_loss": 0.903529167175293, + "eval_gooaq_pairs_runtime": 0.9492, + "eval_gooaq_pairs_samples_per_second": 134.844, + "eval_gooaq_pairs_steps_per_second": 1.053, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_paws-pos_loss": 0.04194509983062744, + "eval_paws-pos_runtime": 0.705, + "eval_paws-pos_samples_per_second": 181.572, + "eval_paws-pos_steps_per_second": 1.419, + "step": 520 + }, + { + "epoch": 0.5349794238683128, + "eval_global_dataset_loss": 0.7329986095428467, + "eval_global_dataset_runtime": 13.3667, + "eval_global_dataset_samples_per_second": 31.122, + "eval_global_dataset_steps_per_second": 0.299, + "step": 520 + }, + { + "epoch": 0.5360082304526749, + "grad_norm": 9.531018257141113, + "learning_rate": 1.882658359293873e-05, + "loss": 0.6841, + "step": 521 + }, + { + "epoch": 0.5370370370370371, + "grad_norm": 14.136958122253418, + "learning_rate": 1.8862928348909654e-05, + "loss": 1.4999, + "step": 522 + }, + { + "epoch": 0.5380658436213992, + "grad_norm": 16.56440544128418, + "learning_rate": 1.889927310488058e-05, + "loss": 1.8423, + "step": 523 + }, + { + "epoch": 0.5390946502057613, + "grad_norm": 18.816726684570312, + "learning_rate": 1.8935617860851502e-05, + "loss": 3.2063, + "step": 524 + }, + { + "epoch": 0.5401234567901234, + "grad_norm": 9.336271286010742, + "learning_rate": 1.8971962616822426e-05, + "loss": 0.7876, + "step": 525 + }, + { + "epoch": 0.5411522633744856, + "grad_norm": 9.695099830627441, + "learning_rate": 1.9008307372793354e-05, + "loss": 0.7463, + "step": 526 + }, + { + "epoch": 0.5421810699588477, + "grad_norm": 16.809635162353516, + "learning_rate": 1.9044652128764278e-05, + "loss": 1.317, + "step": 527 + }, + { + "epoch": 0.5432098765432098, + "grad_norm": 11.21884536743164, + "learning_rate": 1.90809968847352e-05, + "loss": 1.533, + "step": 528 + }, + { + "epoch": 0.5442386831275721, + "grad_norm": 11.746585845947266, + "learning_rate": 1.9117341640706126e-05, + "loss": 0.9414, + "step": 529 + }, + { + "epoch": 0.5452674897119342, + "grad_norm": 11.7705078125, + "learning_rate": 1.915368639667705e-05, + "loss": 0.8405, + "step": 530 + }, + { + "epoch": 0.5462962962962963, + "grad_norm": 11.811210632324219, + "learning_rate": 1.919003115264797e-05, + "loss": 1.1217, + "step": 531 + }, + { + "epoch": 0.5473251028806584, + "grad_norm": 8.906420707702637, + "learning_rate": 1.9226375908618898e-05, + "loss": 0.6404, + "step": 532 + }, + { + "epoch": 0.5483539094650206, + "grad_norm": 8.888873100280762, + "learning_rate": 1.9262720664589822e-05, + "loss": 0.6283, + "step": 533 + }, + { + "epoch": 0.5493827160493827, + "grad_norm": 2.18764591217041, + "learning_rate": 1.9299065420560746e-05, + "loss": 0.0678, + "step": 534 + }, + { + "epoch": 0.5504115226337448, + "grad_norm": 8.759835243225098, + "learning_rate": 1.933541017653167e-05, + "loss": 0.5242, + "step": 535 + }, + { + "epoch": 0.551440329218107, + "grad_norm": 18.4666748046875, + "learning_rate": 1.9371754932502594e-05, + "loss": 1.9928, + "step": 536 + }, + { + "epoch": 0.5524691358024691, + "grad_norm": 11.737098693847656, + "learning_rate": 1.9408099688473518e-05, + "loss": 0.8622, + "step": 537 + }, + { + "epoch": 0.5534979423868313, + "grad_norm": 14.750716209411621, + "learning_rate": 1.9444444444444442e-05, + "loss": 1.2746, + "step": 538 + }, + { + "epoch": 0.5545267489711934, + "grad_norm": 11.672311782836914, + "learning_rate": 1.9480789200415366e-05, + "loss": 0.7844, + "step": 539 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 12.69827651977539, + "learning_rate": 1.951713395638629e-05, + "loss": 1.041, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_Qnli-dev_cosine_accuracy": 0.677734375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7994829416275024, + "eval_Qnli-dev_cosine_ap": 0.717657619195893, + "eval_Qnli-dev_cosine_f1": 0.6919275123558485, + "eval_Qnli-dev_cosine_f1_threshold": 0.7339121103286743, + "eval_Qnli-dev_cosine_precision": 0.5660377358490566, + "eval_Qnli-dev_cosine_recall": 0.8898305084745762, + "eval_Qnli-dev_dot_accuracy": 0.68359375, + "eval_Qnli-dev_dot_accuracy_threshold": 409.82696533203125, + "eval_Qnli-dev_dot_ap": 0.6260001258234368, + "eval_Qnli-dev_dot_f1": 0.6723549488054607, + "eval_Qnli-dev_dot_f1_threshold": 380.0247802734375, + "eval_Qnli-dev_dot_precision": 0.5628571428571428, + "eval_Qnli-dev_dot_recall": 0.8347457627118644, + "eval_Qnli-dev_euclidean_accuracy": 0.6796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.7105712890625, + "eval_Qnli-dev_euclidean_ap": 0.7249308269630148, + "eval_Qnli-dev_euclidean_f1": 0.6906710310965629, + "eval_Qnli-dev_euclidean_f1_threshold": 16.837154388427734, + "eval_Qnli-dev_euclidean_precision": 0.5626666666666666, + "eval_Qnli-dev_euclidean_recall": 0.8940677966101694, + "eval_Qnli-dev_manhattan_accuracy": 0.685546875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 283.3619384765625, + "eval_Qnli-dev_manhattan_ap": 0.7235444857124764, + "eval_Qnli-dev_manhattan_f1": 0.6901172529313233, + "eval_Qnli-dev_manhattan_f1_threshold": 334.96246337890625, + "eval_Qnli-dev_manhattan_precision": 0.5706371191135734, + "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, + "eval_Qnli-dev_max_accuracy": 0.685546875, + "eval_Qnli-dev_max_accuracy_threshold": 409.82696533203125, + "eval_Qnli-dev_max_ap": 0.7249308269630148, + "eval_Qnli-dev_max_f1": 0.6919275123558485, + "eval_Qnli-dev_max_f1_threshold": 380.0247802734375, + "eval_Qnli-dev_max_precision": 0.5706371191135734, + "eval_Qnli-dev_max_recall": 0.8940677966101694, + "eval_allNLI-dev_cosine_accuracy": 0.720703125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.897883415222168, + "eval_allNLI-dev_cosine_ap": 0.559606374648369, + "eval_allNLI-dev_cosine_f1": 0.5806451612903226, + "eval_allNLI-dev_cosine_f1_threshold": 0.7978842854499817, + "eval_allNLI-dev_cosine_precision": 0.4623287671232877, + "eval_allNLI-dev_cosine_recall": 0.7803468208092486, + "eval_allNLI-dev_dot_accuracy": 0.6796875, + "eval_allNLI-dev_dot_accuracy_threshold": 470.4619140625, + "eval_allNLI-dev_dot_ap": 0.45733111663306314, + "eval_allNLI-dev_dot_f1": 0.5478841870824054, + "eval_allNLI-dev_dot_f1_threshold": 410.201171875, + "eval_allNLI-dev_dot_precision": 0.44565217391304346, + "eval_allNLI-dev_dot_recall": 0.7109826589595376, + "eval_allNLI-dev_euclidean_accuracy": 0.720703125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.147970199584961, + "eval_allNLI-dev_euclidean_ap": 0.5660004356159096, + "eval_allNLI-dev_euclidean_f1": 0.591792656587473, + "eval_allNLI-dev_euclidean_f1_threshold": 14.38115119934082, + "eval_allNLI-dev_euclidean_precision": 0.4724137931034483, + "eval_allNLI-dev_euclidean_recall": 0.791907514450867, + "eval_allNLI-dev_manhattan_accuracy": 0.71875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 207.6907958984375, + "eval_allNLI-dev_manhattan_ap": 0.56719407577034, + "eval_allNLI-dev_manhattan_f1": 0.587737843551797, + "eval_allNLI-dev_manhattan_f1_threshold": 296.9386901855469, + "eval_allNLI-dev_manhattan_precision": 0.4633333333333333, + "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, + "eval_allNLI-dev_max_accuracy": 0.720703125, + "eval_allNLI-dev_max_accuracy_threshold": 470.4619140625, + "eval_allNLI-dev_max_ap": 0.56719407577034, + "eval_allNLI-dev_max_f1": 0.591792656587473, + "eval_allNLI-dev_max_f1_threshold": 410.201171875, + "eval_allNLI-dev_max_precision": 0.4724137931034483, + "eval_allNLI-dev_max_recall": 0.8034682080924855, + "eval_sequential_score": 0.7249308269630148, + "eval_sts-test_pearson_cosine": 0.7981570472860724, + "eval_sts-test_pearson_dot": 0.7528095037431898, + "eval_sts-test_pearson_euclidean": 0.8221585052591076, + "eval_sts-test_pearson_manhattan": 0.8186301303511336, + "eval_sts-test_pearson_max": 0.8221585052591076, + "eval_sts-test_spearman_cosine": 0.820562977481181, + "eval_sts-test_spearman_dot": 0.7361068754404446, + "eval_sts-test_spearman_euclidean": 0.8129253244507724, + "eval_sts-test_spearman_manhattan": 0.8097035916406826, + "eval_sts-test_spearman_max": 0.820562977481181, + "eval_vitaminc-pairs_loss": 2.9952337741851807, + "eval_vitaminc-pairs_runtime": 3.166, + "eval_vitaminc-pairs_samples_per_second": 40.43, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_negation-triplets_loss": 1.2927732467651367, + "eval_negation-triplets_runtime": 0.7377, + "eval_negation-triplets_samples_per_second": 173.504, + "eval_negation-triplets_steps_per_second": 1.355, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_scitail-pairs-pos_loss": 0.1593194603919983, + "eval_scitail-pairs-pos_runtime": 0.8171, + "eval_scitail-pairs-pos_samples_per_second": 156.657, + "eval_scitail-pairs-pos_steps_per_second": 1.224, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_scitail-pairs-qa_loss": 0.016190586611628532, + "eval_scitail-pairs-qa_runtime": 0.5737, + "eval_scitail-pairs-qa_samples_per_second": 223.102, + "eval_scitail-pairs-qa_steps_per_second": 1.743, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_xsum-pairs_loss": 0.7690907120704651, + "eval_xsum-pairs_runtime": 3.0195, + "eval_xsum-pairs_samples_per_second": 42.392, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_sciq_pairs_loss": 0.14176045358181, + "eval_sciq_pairs_runtime": 3.4232, + "eval_sciq_pairs_samples_per_second": 37.392, + "eval_sciq_pairs_steps_per_second": 0.292, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_qasc_pairs_loss": 0.754072904586792, + "eval_qasc_pairs_runtime": 0.599, + "eval_qasc_pairs_samples_per_second": 213.697, + "eval_qasc_pairs_steps_per_second": 1.67, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_openbookqa_pairs_loss": 1.468189001083374, + "eval_openbookqa_pairs_runtime": 0.5764, + "eval_openbookqa_pairs_samples_per_second": 222.08, + "eval_openbookqa_pairs_steps_per_second": 1.735, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_msmarco_pairs_loss": 1.443937063217163, + "eval_msmarco_pairs_runtime": 1.5215, + "eval_msmarco_pairs_samples_per_second": 84.128, + "eval_msmarco_pairs_steps_per_second": 0.657, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_nq_pairs_loss": 1.7499854564666748, + "eval_nq_pairs_runtime": 2.9123, + "eval_nq_pairs_samples_per_second": 43.951, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_trivia_pairs_loss": 1.284538984298706, + "eval_trivia_pairs_runtime": 3.4581, + "eval_trivia_pairs_samples_per_second": 37.015, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_gooaq_pairs_loss": 0.8851069808006287, + "eval_gooaq_pairs_runtime": 0.9412, + "eval_gooaq_pairs_samples_per_second": 135.997, + "eval_gooaq_pairs_steps_per_second": 1.062, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_paws-pos_loss": 0.04284976050257683, + "eval_paws-pos_runtime": 0.678, + "eval_paws-pos_samples_per_second": 188.793, + "eval_paws-pos_steps_per_second": 1.475, + "step": 540 + }, + { + "epoch": 0.5555555555555556, + "eval_global_dataset_loss": 0.7442251443862915, + "eval_global_dataset_runtime": 13.3627, + "eval_global_dataset_samples_per_second": 31.132, + "eval_global_dataset_steps_per_second": 0.299, + "step": 540 + }, + { + "epoch": 0.5565843621399177, + "grad_norm": 12.537612915039062, + "learning_rate": 1.9553478712357217e-05, + "loss": 0.9339, + "step": 541 + }, + { + "epoch": 0.5576131687242798, + "grad_norm": 1.0051987171173096, + "learning_rate": 1.9589823468328138e-05, + "loss": 0.0237, + "step": 542 + }, + { + "epoch": 0.558641975308642, + "grad_norm": 9.488045692443848, + "learning_rate": 1.9626168224299062e-05, + "loss": 0.4569, + "step": 543 + }, + { + "epoch": 0.5596707818930041, + "grad_norm": 11.0010986328125, + "learning_rate": 1.966251298026999e-05, + "loss": 0.6537, + "step": 544 + }, + { + "epoch": 0.5606995884773662, + "grad_norm": 16.367504119873047, + "learning_rate": 1.969885773624091e-05, + "loss": 1.5957, + "step": 545 + }, + { + "epoch": 0.5617283950617284, + "grad_norm": 0.978878378868103, + "learning_rate": 1.9735202492211834e-05, + "loss": 0.0269, + "step": 546 + }, + { + "epoch": 0.5627572016460906, + "grad_norm": 12.36868667602539, + "learning_rate": 1.977154724818276e-05, + "loss": 0.7591, + "step": 547 + }, + { + "epoch": 0.5637860082304527, + "grad_norm": 11.471710205078125, + "learning_rate": 1.9807892004153686e-05, + "loss": 0.7064, + "step": 548 + }, + { + "epoch": 0.5648148148148148, + "grad_norm": 15.039127349853516, + "learning_rate": 1.9844236760124606e-05, + "loss": 1.201, + "step": 549 + }, + { + "epoch": 0.565843621399177, + "grad_norm": 11.709723472595215, + "learning_rate": 1.9880581516095534e-05, + "loss": 0.7516, + "step": 550 + }, + { + "epoch": 0.5668724279835391, + "grad_norm": 2.1083853244781494, + "learning_rate": 1.9916926272066458e-05, + "loss": 0.0917, + "step": 551 + }, + { + "epoch": 0.5679012345679012, + "grad_norm": 12.638484954833984, + "learning_rate": 1.9953271028037378e-05, + "loss": 0.9826, + "step": 552 + }, + { + "epoch": 0.5689300411522634, + "grad_norm": 11.251784324645996, + "learning_rate": 1.9989615784008306e-05, + "loss": 0.8362, + "step": 553 + }, + { + "epoch": 0.5699588477366255, + "grad_norm": 13.69099235534668, + "learning_rate": 2.002596053997923e-05, + "loss": 1.5957, + "step": 554 + }, + { + "epoch": 0.5709876543209876, + "grad_norm": 15.196340560913086, + "learning_rate": 2.0062305295950154e-05, + "loss": 1.2807, + "step": 555 + }, + { + "epoch": 0.5720164609053497, + "grad_norm": 14.767230987548828, + "learning_rate": 2.0098650051921078e-05, + "loss": 1.6863, + "step": 556 + }, + { + "epoch": 0.573045267489712, + "grad_norm": 11.55445671081543, + "learning_rate": 2.0134994807892002e-05, + "loss": 1.5643, + "step": 557 + }, + { + "epoch": 0.5740740740740741, + "grad_norm": 13.466323852539062, + "learning_rate": 2.0171339563862926e-05, + "loss": 1.2279, + "step": 558 + }, + { + "epoch": 0.5751028806584362, + "grad_norm": 10.434534072875977, + "learning_rate": 2.0207684319833853e-05, + "loss": 0.7398, + "step": 559 + }, + { + "epoch": 0.5761316872427984, + "grad_norm": 16.75852394104004, + "learning_rate": 2.0244029075804774e-05, + "loss": 1.7229, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_Qnli-dev_cosine_accuracy": 0.6875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7987607717514038, + "eval_Qnli-dev_cosine_ap": 0.7245772032010487, + "eval_Qnli-dev_cosine_f1": 0.7073608617594255, + "eval_Qnli-dev_cosine_f1_threshold": 0.7755422592163086, + "eval_Qnli-dev_cosine_precision": 0.6137071651090342, + "eval_Qnli-dev_cosine_recall": 0.8347457627118644, + "eval_Qnli-dev_dot_accuracy": 0.671875, + "eval_Qnli-dev_dot_accuracy_threshold": 429.08099365234375, + "eval_Qnli-dev_dot_ap": 0.618896987535733, + "eval_Qnli-dev_dot_f1": 0.6784565916398714, + "eval_Qnli-dev_dot_f1_threshold": 389.2666015625, + "eval_Qnli-dev_dot_precision": 0.5466321243523317, + "eval_Qnli-dev_dot_recall": 0.8940677966101694, + "eval_Qnli-dev_euclidean_accuracy": 0.6953125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.390548706054688, + "eval_Qnli-dev_euclidean_ap": 0.7347399680383467, + "eval_Qnli-dev_euclidean_f1": 0.6974169741697418, + "eval_Qnli-dev_euclidean_f1_threshold": 15.172780990600586, + "eval_Qnli-dev_euclidean_precision": 0.6176470588235294, + "eval_Qnli-dev_euclidean_recall": 0.8008474576271186, + "eval_Qnli-dev_manhattan_accuracy": 0.7109375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 299.5706787109375, + "eval_Qnli-dev_manhattan_ap": 0.7368729396225034, + "eval_Qnli-dev_manhattan_f1": 0.7120622568093385, + "eval_Qnli-dev_manhattan_f1_threshold": 299.5706787109375, + "eval_Qnli-dev_manhattan_precision": 0.658273381294964, + "eval_Qnli-dev_manhattan_recall": 0.7754237288135594, + "eval_Qnli-dev_max_accuracy": 0.7109375, + "eval_Qnli-dev_max_accuracy_threshold": 429.08099365234375, + "eval_Qnli-dev_max_ap": 0.7368729396225034, + "eval_Qnli-dev_max_f1": 0.7120622568093385, + "eval_Qnli-dev_max_f1_threshold": 389.2666015625, + "eval_Qnli-dev_max_precision": 0.658273381294964, + "eval_Qnli-dev_max_recall": 0.8940677966101694, + "eval_allNLI-dev_cosine_accuracy": 0.71875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8877571821212769, + "eval_allNLI-dev_cosine_ap": 0.5702315132181276, + "eval_allNLI-dev_cosine_f1": 0.5930735930735931, + "eval_allNLI-dev_cosine_f1_threshold": 0.8116433620452881, + "eval_allNLI-dev_cosine_precision": 0.4740484429065744, + "eval_allNLI-dev_cosine_recall": 0.791907514450867, + "eval_allNLI-dev_dot_accuracy": 0.67578125, + "eval_allNLI-dev_dot_accuracy_threshold": 478.5546875, + "eval_allNLI-dev_dot_ap": 0.4739609661272707, + "eval_allNLI-dev_dot_f1": 0.5494949494949496, + "eval_allNLI-dev_dot_f1_threshold": 413.8797912597656, + "eval_allNLI-dev_dot_precision": 0.422360248447205, + "eval_allNLI-dev_dot_recall": 0.7861271676300579, + "eval_allNLI-dev_euclidean_accuracy": 0.72265625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.04772663116455, + "eval_allNLI-dev_euclidean_ap": 0.57668991696855, + "eval_allNLI-dev_euclidean_f1": 0.5995525727069352, + "eval_allNLI-dev_euclidean_f1_threshold": 13.817825317382812, + "eval_allNLI-dev_euclidean_precision": 0.48905109489051096, + "eval_allNLI-dev_euclidean_recall": 0.7745664739884393, + "eval_allNLI-dev_manhattan_accuracy": 0.71875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 211.58740234375, + "eval_allNLI-dev_manhattan_ap": 0.578607497519579, + "eval_allNLI-dev_manhattan_f1": 0.5882352941176471, + "eval_allNLI-dev_manhattan_f1_threshold": 281.086181640625, + "eval_allNLI-dev_manhattan_precision": 0.483271375464684, + "eval_allNLI-dev_manhattan_recall": 0.7514450867052023, + "eval_allNLI-dev_max_accuracy": 0.72265625, + "eval_allNLI-dev_max_accuracy_threshold": 478.5546875, + "eval_allNLI-dev_max_ap": 0.578607497519579, + "eval_allNLI-dev_max_f1": 0.5995525727069352, + "eval_allNLI-dev_max_f1_threshold": 413.8797912597656, + "eval_allNLI-dev_max_precision": 0.48905109489051096, + "eval_allNLI-dev_max_recall": 0.791907514450867, + "eval_sequential_score": 0.7368729396225034, + "eval_sts-test_pearson_cosine": 0.7951957837142611, + "eval_sts-test_pearson_dot": 0.7487270214140551, + "eval_sts-test_pearson_euclidean": 0.8178215451497555, + "eval_sts-test_pearson_manhattan": 0.8154922571151692, + "eval_sts-test_pearson_max": 0.8178215451497555, + "eval_sts-test_spearman_cosine": 0.8174810476116783, + "eval_sts-test_spearman_dot": 0.7310933468755048, + "eval_sts-test_spearman_euclidean": 0.8105849677337864, + "eval_sts-test_spearman_manhattan": 0.8080193779182173, + "eval_sts-test_spearman_max": 0.8174810476116783, + "eval_vitaminc-pairs_loss": 2.8546268939971924, + "eval_vitaminc-pairs_runtime": 3.164, + "eval_vitaminc-pairs_samples_per_second": 40.455, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_negation-triplets_loss": 1.2844172716140747, + "eval_negation-triplets_runtime": 0.7354, + "eval_negation-triplets_samples_per_second": 174.063, + "eval_negation-triplets_steps_per_second": 1.36, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_scitail-pairs-pos_loss": 0.17617923021316528, + "eval_scitail-pairs-pos_runtime": 0.804, + "eval_scitail-pairs-pos_samples_per_second": 159.198, + "eval_scitail-pairs-pos_steps_per_second": 1.244, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_scitail-pairs-qa_loss": 0.013183332979679108, + "eval_scitail-pairs-qa_runtime": 0.5639, + "eval_scitail-pairs-qa_samples_per_second": 226.973, + "eval_scitail-pairs-qa_steps_per_second": 1.773, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_xsum-pairs_loss": 0.8270187973976135, + "eval_xsum-pairs_runtime": 3.0144, + "eval_xsum-pairs_samples_per_second": 42.463, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_sciq_pairs_loss": 0.1439501792192459, + "eval_sciq_pairs_runtime": 3.4768, + "eval_sciq_pairs_samples_per_second": 36.816, + "eval_sciq_pairs_steps_per_second": 0.288, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_qasc_pairs_loss": 0.6848240494728088, + "eval_qasc_pairs_runtime": 0.6196, + "eval_qasc_pairs_samples_per_second": 206.597, + "eval_qasc_pairs_steps_per_second": 1.614, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_openbookqa_pairs_loss": 1.4732991456985474, + "eval_openbookqa_pairs_runtime": 0.5734, + "eval_openbookqa_pairs_samples_per_second": 223.235, + "eval_openbookqa_pairs_steps_per_second": 1.744, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_msmarco_pairs_loss": 1.4930459260940552, + "eval_msmarco_pairs_runtime": 1.5133, + "eval_msmarco_pairs_samples_per_second": 84.581, + "eval_msmarco_pairs_steps_per_second": 0.661, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_nq_pairs_loss": 1.7120836973190308, + "eval_nq_pairs_runtime": 2.8949, + "eval_nq_pairs_samples_per_second": 44.216, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_trivia_pairs_loss": 1.3425896167755127, + "eval_trivia_pairs_runtime": 3.4363, + "eval_trivia_pairs_samples_per_second": 37.249, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_gooaq_pairs_loss": 0.828025758266449, + "eval_gooaq_pairs_runtime": 0.9422, + "eval_gooaq_pairs_samples_per_second": 135.847, + "eval_gooaq_pairs_steps_per_second": 1.061, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_paws-pos_loss": 0.039411623030900955, + "eval_paws-pos_runtime": 0.6819, + "eval_paws-pos_samples_per_second": 187.706, + "eval_paws-pos_steps_per_second": 1.466, + "step": 560 + }, + { + "epoch": 0.5761316872427984, + "eval_global_dataset_loss": 0.7242797613143921, + "eval_global_dataset_runtime": 13.3545, + "eval_global_dataset_samples_per_second": 31.151, + "eval_global_dataset_steps_per_second": 0.3, + "step": 560 + }, + { + "epoch": 0.5771604938271605, + "grad_norm": 8.372831344604492, + "learning_rate": 2.0280373831775698e-05, + "loss": 0.593, + "step": 561 + }, + { + "epoch": 0.5781893004115226, + "grad_norm": 19.26259422302246, + "learning_rate": 2.0316718587746625e-05, + "loss": 1.8963, + "step": 562 + }, + { + "epoch": 0.5792181069958847, + "grad_norm": 11.283585548400879, + "learning_rate": 2.0353063343717546e-05, + "loss": 0.743, + "step": 563 + }, + { + "epoch": 0.5802469135802469, + "grad_norm": 8.997882843017578, + "learning_rate": 2.038940809968847e-05, + "loss": 0.5824, + "step": 564 + }, + { + "epoch": 0.581275720164609, + "grad_norm": 13.550999641418457, + "learning_rate": 2.0425752855659397e-05, + "loss": 1.7532, + "step": 565 + }, + { + "epoch": 0.5823045267489712, + "grad_norm": 8.910313606262207, + "learning_rate": 2.046209761163032e-05, + "loss": 0.6509, + "step": 566 + }, + { + "epoch": 0.5833333333333334, + "grad_norm": 10.5217866897583, + "learning_rate": 2.0498442367601242e-05, + "loss": 0.7318, + "step": 567 + }, + { + "epoch": 0.5843621399176955, + "grad_norm": 13.271885871887207, + "learning_rate": 2.053478712357217e-05, + "loss": 1.3168, + "step": 568 + }, + { + "epoch": 0.5853909465020576, + "grad_norm": 9.908731460571289, + "learning_rate": 2.0571131879543093e-05, + "loss": 0.599, + "step": 569 + }, + { + "epoch": 0.5864197530864198, + "grad_norm": 14.152383804321289, + "learning_rate": 2.0607476635514014e-05, + "loss": 1.672, + "step": 570 + }, + { + "epoch": 0.5874485596707819, + "grad_norm": 9.812310218811035, + "learning_rate": 2.064382139148494e-05, + "loss": 0.7583, + "step": 571 + }, + { + "epoch": 0.588477366255144, + "grad_norm": 5.6503825187683105, + "learning_rate": 2.0680166147455865e-05, + "loss": 0.1891, + "step": 572 + }, + { + "epoch": 0.5895061728395061, + "grad_norm": 10.130154609680176, + "learning_rate": 2.071651090342679e-05, + "loss": 0.6344, + "step": 573 + }, + { + "epoch": 0.5905349794238683, + "grad_norm": 15.343293190002441, + "learning_rate": 2.0752855659397713e-05, + "loss": 1.303, + "step": 574 + }, + { + "epoch": 0.5915637860082305, + "grad_norm": 21.49701499938965, + "learning_rate": 2.0789200415368637e-05, + "loss": 2.2275, + "step": 575 + }, + { + "epoch": 0.5925925925925926, + "grad_norm": 14.206128120422363, + "learning_rate": 2.082554517133956e-05, + "loss": 1.3081, + "step": 576 + }, + { + "epoch": 0.5936213991769548, + "grad_norm": 9.159503936767578, + "learning_rate": 2.086188992731049e-05, + "loss": 0.5681, + "step": 577 + }, + { + "epoch": 0.5946502057613169, + "grad_norm": 10.146199226379395, + "learning_rate": 2.089823468328141e-05, + "loss": 0.6258, + "step": 578 + }, + { + "epoch": 0.595679012345679, + "grad_norm": 12.96678638458252, + "learning_rate": 2.0934579439252334e-05, + "loss": 1.1454, + "step": 579 + }, + { + "epoch": 0.5967078189300411, + "grad_norm": 14.751097679138184, + "learning_rate": 2.097092419522326e-05, + "loss": 1.3416, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_Qnli-dev_cosine_accuracy": 0.66796875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8469637632369995, + "eval_Qnli-dev_cosine_ap": 0.7090284432654561, + "eval_Qnli-dev_cosine_f1": 0.6897689768976898, + "eval_Qnli-dev_cosine_f1_threshold": 0.7387524843215942, + "eval_Qnli-dev_cosine_precision": 0.5648648648648649, + "eval_Qnli-dev_cosine_recall": 0.885593220338983, + "eval_Qnli-dev_dot_accuracy": 0.6640625, + "eval_Qnli-dev_dot_accuracy_threshold": 408.5235595703125, + "eval_Qnli-dev_dot_ap": 0.6097543105824177, + "eval_Qnli-dev_dot_f1": 0.6701754385964912, + "eval_Qnli-dev_dot_f1_threshold": 390.4075012207031, + "eval_Qnli-dev_dot_precision": 0.5718562874251497, + "eval_Qnli-dev_dot_recall": 0.809322033898305, + "eval_Qnli-dev_euclidean_accuracy": 0.677734375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.064620971679688, + "eval_Qnli-dev_euclidean_ap": 0.7199645621423693, + "eval_Qnli-dev_euclidean_f1": 0.6836734693877551, + "eval_Qnli-dev_euclidean_f1_threshold": 16.033926010131836, + "eval_Qnli-dev_euclidean_precision": 0.5710227272727273, + "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, + "eval_Qnli-dev_manhattan_accuracy": 0.681640625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 294.104248046875, + "eval_Qnli-dev_manhattan_ap": 0.721231392124396, + "eval_Qnli-dev_manhattan_f1": 0.6897810218978102, + "eval_Qnli-dev_manhattan_f1_threshold": 310.521728515625, + "eval_Qnli-dev_manhattan_precision": 0.6057692307692307, + "eval_Qnli-dev_manhattan_recall": 0.8008474576271186, + "eval_Qnli-dev_max_accuracy": 0.681640625, + "eval_Qnli-dev_max_accuracy_threshold": 408.5235595703125, + "eval_Qnli-dev_max_ap": 0.721231392124396, + "eval_Qnli-dev_max_f1": 0.6897810218978102, + "eval_Qnli-dev_max_f1_threshold": 390.4075012207031, + "eval_Qnli-dev_max_precision": 0.6057692307692307, + "eval_Qnli-dev_max_recall": 0.885593220338983, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8930063247680664, + "eval_allNLI-dev_cosine_ap": 0.580516831193243, + "eval_allNLI-dev_cosine_f1": 0.5932203389830509, + "eval_allNLI-dev_cosine_f1_threshold": 0.792042076587677, + "eval_allNLI-dev_cosine_precision": 0.4682274247491639, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.681640625, + "eval_allNLI-dev_dot_accuracy_threshold": 479.3341064453125, + "eval_allNLI-dev_dot_ap": 0.48669798557045457, + "eval_allNLI-dev_dot_f1": 0.560919540229885, + "eval_allNLI-dev_dot_f1_threshold": 413.0164794921875, + "eval_allNLI-dev_dot_precision": 0.46564885496183206, + "eval_allNLI-dev_dot_recall": 0.7052023121387283, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.375900268554688, + "eval_allNLI-dev_euclidean_ap": 0.586159821151409, + "eval_allNLI-dev_euclidean_f1": 0.5925925925925926, + "eval_allNLI-dev_euclidean_f1_threshold": 13.825302124023438, + "eval_allNLI-dev_euclidean_precision": 0.4942084942084942, + "eval_allNLI-dev_euclidean_recall": 0.7398843930635838, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 213.74179077148438, + "eval_allNLI-dev_manhattan_ap": 0.5867922982953583, + "eval_allNLI-dev_manhattan_f1": 0.5903890160183066, + "eval_allNLI-dev_manhattan_f1_threshold": 286.81524658203125, + "eval_allNLI-dev_manhattan_precision": 0.48863636363636365, + "eval_allNLI-dev_manhattan_recall": 0.7456647398843931, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 479.3341064453125, + "eval_allNLI-dev_max_ap": 0.5867922982953583, + "eval_allNLI-dev_max_f1": 0.5932203389830509, + "eval_allNLI-dev_max_f1_threshold": 413.0164794921875, + "eval_allNLI-dev_max_precision": 0.4942084942084942, + "eval_allNLI-dev_max_recall": 0.8092485549132948, + "eval_sequential_score": 0.721231392124396, + "eval_sts-test_pearson_cosine": 0.8031708345006614, + "eval_sts-test_pearson_dot": 0.7716469990772233, + "eval_sts-test_pearson_euclidean": 0.8293403363982195, + "eval_sts-test_pearson_manhattan": 0.8269704942343952, + "eval_sts-test_pearson_max": 0.8293403363982195, + "eval_sts-test_spearman_cosine": 0.8293793339853779, + "eval_sts-test_spearman_dot": 0.7565175229997094, + "eval_sts-test_spearman_euclidean": 0.8224314768980562, + "eval_sts-test_spearman_manhattan": 0.81979553809958, + "eval_sts-test_spearman_max": 0.8293793339853779, + "eval_vitaminc-pairs_loss": 2.9443347454071045, + "eval_vitaminc-pairs_runtime": 3.1898, + "eval_vitaminc-pairs_samples_per_second": 40.127, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_negation-triplets_loss": 1.221449851989746, + "eval_negation-triplets_runtime": 0.7486, + "eval_negation-triplets_samples_per_second": 170.975, + "eval_negation-triplets_steps_per_second": 1.336, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_scitail-pairs-pos_loss": 0.1803685873746872, + "eval_scitail-pairs-pos_runtime": 0.829, + "eval_scitail-pairs-pos_samples_per_second": 154.409, + "eval_scitail-pairs-pos_steps_per_second": 1.206, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_scitail-pairs-qa_loss": 0.015901347622275352, + "eval_scitail-pairs-qa_runtime": 0.5704, + "eval_scitail-pairs-qa_samples_per_second": 224.404, + "eval_scitail-pairs-qa_steps_per_second": 1.753, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_xsum-pairs_loss": 0.7095991969108582, + "eval_xsum-pairs_runtime": 3.0163, + "eval_xsum-pairs_samples_per_second": 42.436, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_sciq_pairs_loss": 0.13398276269435883, + "eval_sciq_pairs_runtime": 3.4459, + "eval_sciq_pairs_samples_per_second": 37.145, + "eval_sciq_pairs_steps_per_second": 0.29, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_qasc_pairs_loss": 0.681054413318634, + "eval_qasc_pairs_runtime": 0.6052, + "eval_qasc_pairs_samples_per_second": 211.516, + "eval_qasc_pairs_steps_per_second": 1.652, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_openbookqa_pairs_loss": 1.32936692237854, + "eval_openbookqa_pairs_runtime": 0.578, + "eval_openbookqa_pairs_samples_per_second": 221.445, + "eval_openbookqa_pairs_steps_per_second": 1.73, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_msmarco_pairs_loss": 1.3513559103012085, + "eval_msmarco_pairs_runtime": 1.5095, + "eval_msmarco_pairs_samples_per_second": 84.796, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_nq_pairs_loss": 1.6727423667907715, + "eval_nq_pairs_runtime": 2.8997, + "eval_nq_pairs_samples_per_second": 44.143, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_trivia_pairs_loss": 1.1192874908447266, + "eval_trivia_pairs_runtime": 3.4386, + "eval_trivia_pairs_samples_per_second": 37.225, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_gooaq_pairs_loss": 0.8172786235809326, + "eval_gooaq_pairs_runtime": 0.9533, + "eval_gooaq_pairs_samples_per_second": 134.272, + "eval_gooaq_pairs_steps_per_second": 1.049, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_paws-pos_loss": 0.03949186950922012, + "eval_paws-pos_runtime": 0.6806, + "eval_paws-pos_samples_per_second": 188.056, + "eval_paws-pos_steps_per_second": 1.469, + "step": 580 + }, + { + "epoch": 0.5967078189300411, + "eval_global_dataset_loss": 0.6970628499984741, + "eval_global_dataset_runtime": 13.3615, + "eval_global_dataset_samples_per_second": 31.134, + "eval_global_dataset_steps_per_second": 0.299, + "step": 580 + }, + { + "epoch": 0.5977366255144033, + "grad_norm": 14.620248794555664, + "learning_rate": 2.100726895119418e-05, + "loss": 1.6136, + "step": 581 + }, + { + "epoch": 0.5987654320987654, + "grad_norm": 9.990836143493652, + "learning_rate": 2.1043613707165106e-05, + "loss": 0.5856, + "step": 582 + }, + { + "epoch": 0.5997942386831275, + "grad_norm": 11.57479190826416, + "learning_rate": 2.1079958463136033e-05, + "loss": 0.7762, + "step": 583 + }, + { + "epoch": 0.6008230452674898, + "grad_norm": 16.514976501464844, + "learning_rate": 2.1116303219106954e-05, + "loss": 2.0577, + "step": 584 + }, + { + "epoch": 0.6018518518518519, + "grad_norm": 19.117877960205078, + "learning_rate": 2.1152647975077878e-05, + "loss": 1.8893, + "step": 585 + }, + { + "epoch": 0.602880658436214, + "grad_norm": 1.2878212928771973, + "learning_rate": 2.1188992731048805e-05, + "loss": 0.0455, + "step": 586 + }, + { + "epoch": 0.6039094650205762, + "grad_norm": 15.874303817749023, + "learning_rate": 2.122533748701973e-05, + "loss": 2.5615, + "step": 587 + }, + { + "epoch": 0.6049382716049383, + "grad_norm": 9.337711334228516, + "learning_rate": 2.126168224299065e-05, + "loss": 0.593, + "step": 588 + }, + { + "epoch": 0.6059670781893004, + "grad_norm": 10.22465991973877, + "learning_rate": 2.1298026998961577e-05, + "loss": 0.8033, + "step": 589 + }, + { + "epoch": 0.6069958847736625, + "grad_norm": 9.863337516784668, + "learning_rate": 2.13343717549325e-05, + "loss": 0.694, + "step": 590 + }, + { + "epoch": 0.6080246913580247, + "grad_norm": 12.331180572509766, + "learning_rate": 2.1370716510903422e-05, + "loss": 1.0183, + "step": 591 + }, + { + "epoch": 0.6090534979423868, + "grad_norm": 9.044501304626465, + "learning_rate": 2.140706126687435e-05, + "loss": 0.6388, + "step": 592 + }, + { + "epoch": 0.6100823045267489, + "grad_norm": 9.711915969848633, + "learning_rate": 2.1443406022845273e-05, + "loss": 0.7858, + "step": 593 + }, + { + "epoch": 0.6111111111111112, + "grad_norm": 5.571502208709717, + "learning_rate": 2.1479750778816197e-05, + "loss": 0.1627, + "step": 594 + }, + { + "epoch": 0.6121399176954733, + "grad_norm": 10.834738731384277, + "learning_rate": 2.151609553478712e-05, + "loss": 1.2084, + "step": 595 + }, + { + "epoch": 0.6131687242798354, + "grad_norm": 11.250519752502441, + "learning_rate": 2.1552440290758045e-05, + "loss": 0.8371, + "step": 596 + }, + { + "epoch": 0.6141975308641975, + "grad_norm": 12.769804000854492, + "learning_rate": 2.158878504672897e-05, + "loss": 1.0759, + "step": 597 + }, + { + "epoch": 0.6152263374485597, + "grad_norm": 9.822973251342773, + "learning_rate": 2.1625129802699897e-05, + "loss": 0.6237, + "step": 598 + }, + { + "epoch": 0.6162551440329218, + "grad_norm": 12.792522430419922, + "learning_rate": 2.1661474558670817e-05, + "loss": 0.9396, + "step": 599 + }, + { + "epoch": 0.6172839506172839, + "grad_norm": 11.624062538146973, + "learning_rate": 2.169781931464174e-05, + "loss": 0.7352, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_Qnli-dev_cosine_accuracy": 0.685546875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7914708852767944, + "eval_Qnli-dev_cosine_ap": 0.7227066968429299, + "eval_Qnli-dev_cosine_f1": 0.6948529411764706, + "eval_Qnli-dev_cosine_f1_threshold": 0.766169548034668, + "eval_Qnli-dev_cosine_precision": 0.6136363636363636, + "eval_Qnli-dev_cosine_recall": 0.8008474576271186, + "eval_Qnli-dev_dot_accuracy": 0.67578125, + "eval_Qnli-dev_dot_accuracy_threshold": 405.1741943359375, + "eval_Qnli-dev_dot_ap": 0.6291761267009413, + "eval_Qnli-dev_dot_f1": 0.6897810218978102, + "eval_Qnli-dev_dot_f1_threshold": 382.8020935058594, + "eval_Qnli-dev_dot_precision": 0.6057692307692307, + "eval_Qnli-dev_dot_recall": 0.8008474576271186, + "eval_Qnli-dev_euclidean_accuracy": 0.69140625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.424887657165527, + "eval_Qnli-dev_euclidean_ap": 0.7307017217323966, + "eval_Qnli-dev_euclidean_f1": 0.6906710310965629, + "eval_Qnli-dev_euclidean_f1_threshold": 17.00006675720215, + "eval_Qnli-dev_euclidean_precision": 0.5626666666666666, + "eval_Qnli-dev_euclidean_recall": 0.8940677966101694, + "eval_Qnli-dev_manhattan_accuracy": 0.689453125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 309.230712890625, + "eval_Qnli-dev_manhattan_ap": 0.7325013115093475, + "eval_Qnli-dev_manhattan_f1": 0.6953528399311533, + "eval_Qnli-dev_manhattan_f1_threshold": 332.23504638671875, + "eval_Qnli-dev_manhattan_precision": 0.5855072463768116, + "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, + "eval_Qnli-dev_max_accuracy": 0.69140625, + "eval_Qnli-dev_max_accuracy_threshold": 405.1741943359375, + "eval_Qnli-dev_max_ap": 0.7325013115093475, + "eval_Qnli-dev_max_f1": 0.6953528399311533, + "eval_Qnli-dev_max_f1_threshold": 382.8020935058594, + "eval_Qnli-dev_max_precision": 0.6136363636363636, + "eval_Qnli-dev_max_recall": 0.8940677966101694, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8877395391464233, + "eval_allNLI-dev_cosine_ap": 0.5814109945041677, + "eval_allNLI-dev_cosine_f1": 0.5892116182572614, + "eval_allNLI-dev_cosine_f1_threshold": 0.7833628058433533, + "eval_allNLI-dev_cosine_precision": 0.459546925566343, + "eval_allNLI-dev_cosine_recall": 0.8208092485549133, + "eval_allNLI-dev_dot_accuracy": 0.68359375, + "eval_allNLI-dev_dot_accuracy_threshold": 498.7593994140625, + "eval_allNLI-dev_dot_ap": 0.49817236088425526, + "eval_allNLI-dev_dot_f1": 0.5469728601252609, + "eval_allNLI-dev_dot_f1_threshold": 396.20513916015625, + "eval_allNLI-dev_dot_precision": 0.42810457516339867, + "eval_allNLI-dev_dot_recall": 0.7572254335260116, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.987224578857422, + "eval_allNLI-dev_euclidean_ap": 0.5868737853118521, + "eval_allNLI-dev_euclidean_f1": 0.5991735537190083, + "eval_allNLI-dev_euclidean_f1_threshold": 14.847602844238281, + "eval_allNLI-dev_euclidean_precision": 0.4662379421221865, + "eval_allNLI-dev_euclidean_recall": 0.838150289017341, + "eval_allNLI-dev_manhattan_accuracy": 0.73046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 217.56982421875, + "eval_allNLI-dev_manhattan_ap": 0.5854235635053637, + "eval_allNLI-dev_manhattan_f1": 0.5908096280087528, + "eval_allNLI-dev_manhattan_f1_threshold": 296.2995300292969, + "eval_allNLI-dev_manhattan_precision": 0.4753521126760563, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 498.7593994140625, + "eval_allNLI-dev_max_ap": 0.5868737853118521, + "eval_allNLI-dev_max_f1": 0.5991735537190083, + "eval_allNLI-dev_max_f1_threshold": 396.20513916015625, + "eval_allNLI-dev_max_precision": 0.4753521126760563, + "eval_allNLI-dev_max_recall": 0.838150289017341, + "eval_sequential_score": 0.7325013115093475, + "eval_sts-test_pearson_cosine": 0.8070716873912918, + "eval_sts-test_pearson_dot": 0.7619397369954762, + "eval_sts-test_pearson_euclidean": 0.8282272675602773, + "eval_sts-test_pearson_manhattan": 0.8241390313463588, + "eval_sts-test_pearson_max": 0.8282272675602773, + "eval_sts-test_spearman_cosine": 0.8247862882724717, + "eval_sts-test_spearman_dot": 0.7450420017923742, + "eval_sts-test_spearman_euclidean": 0.819151701701942, + "eval_sts-test_spearman_manhattan": 0.8149713968728485, + "eval_sts-test_spearman_max": 0.8247862882724717, + "eval_vitaminc-pairs_loss": 2.7805817127227783, + "eval_vitaminc-pairs_runtime": 3.1769, + "eval_vitaminc-pairs_samples_per_second": 40.291, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_negation-triplets_loss": 1.2412256002426147, + "eval_negation-triplets_runtime": 0.7403, + "eval_negation-triplets_samples_per_second": 172.908, + "eval_negation-triplets_steps_per_second": 1.351, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_scitail-pairs-pos_loss": 0.19108502566814423, + "eval_scitail-pairs-pos_runtime": 0.8102, + "eval_scitail-pairs-pos_samples_per_second": 157.986, + "eval_scitail-pairs-pos_steps_per_second": 1.234, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_scitail-pairs-qa_loss": 0.011316634714603424, + "eval_scitail-pairs-qa_runtime": 0.5692, + "eval_scitail-pairs-qa_samples_per_second": 224.889, + "eval_scitail-pairs-qa_steps_per_second": 1.757, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_xsum-pairs_loss": 0.6977664232254028, + "eval_xsum-pairs_runtime": 3.0198, + "eval_xsum-pairs_samples_per_second": 42.387, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_sciq_pairs_loss": 0.13763564825057983, + "eval_sciq_pairs_runtime": 3.413, + "eval_sciq_pairs_samples_per_second": 37.503, + "eval_sciq_pairs_steps_per_second": 0.293, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_qasc_pairs_loss": 0.6264404058456421, + "eval_qasc_pairs_runtime": 0.5999, + "eval_qasc_pairs_samples_per_second": 213.376, + "eval_qasc_pairs_steps_per_second": 1.667, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_openbookqa_pairs_loss": 1.2759621143341064, + "eval_openbookqa_pairs_runtime": 0.5867, + "eval_openbookqa_pairs_samples_per_second": 218.169, + "eval_openbookqa_pairs_steps_per_second": 1.704, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_msmarco_pairs_loss": 1.4110215902328491, + "eval_msmarco_pairs_runtime": 1.5228, + "eval_msmarco_pairs_samples_per_second": 84.054, + "eval_msmarco_pairs_steps_per_second": 0.657, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_nq_pairs_loss": 1.654952883720398, + "eval_nq_pairs_runtime": 2.9213, + "eval_nq_pairs_samples_per_second": 43.816, + "eval_nq_pairs_steps_per_second": 0.342, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_trivia_pairs_loss": 1.11814284324646, + "eval_trivia_pairs_runtime": 3.4571, + "eval_trivia_pairs_samples_per_second": 37.025, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_gooaq_pairs_loss": 0.8059184551239014, + "eval_gooaq_pairs_runtime": 0.9451, + "eval_gooaq_pairs_samples_per_second": 135.437, + "eval_gooaq_pairs_steps_per_second": 1.058, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_paws-pos_loss": 0.038612980395555496, + "eval_paws-pos_runtime": 0.6792, + "eval_paws-pos_samples_per_second": 188.462, + "eval_paws-pos_steps_per_second": 1.472, + "step": 600 + }, + { + "epoch": 0.6172839506172839, + "eval_global_dataset_loss": 0.6731630563735962, + "eval_global_dataset_runtime": 13.3755, + "eval_global_dataset_samples_per_second": 31.102, + "eval_global_dataset_steps_per_second": 0.299, + "step": 600 + }, + { + "epoch": 0.6183127572016461, + "grad_norm": 9.29102611541748, + "learning_rate": 2.173416407061267e-05, + "loss": 0.5273, + "step": 601 + }, + { + "epoch": 0.6193415637860082, + "grad_norm": 18.072662353515625, + "learning_rate": 2.177050882658359e-05, + "loss": 1.925, + "step": 602 + }, + { + "epoch": 0.6203703703703703, + "grad_norm": 11.696316719055176, + "learning_rate": 2.1806853582554513e-05, + "loss": 0.8177, + "step": 603 + }, + { + "epoch": 0.6213991769547325, + "grad_norm": 8.543580055236816, + "learning_rate": 2.184319833852544e-05, + "loss": 0.4747, + "step": 604 + }, + { + "epoch": 0.6224279835390947, + "grad_norm": 11.905756950378418, + "learning_rate": 2.1879543094496365e-05, + "loss": 0.9485, + "step": 605 + }, + { + "epoch": 0.6234567901234568, + "grad_norm": 13.481616020202637, + "learning_rate": 2.1915887850467285e-05, + "loss": 1.7983, + "step": 606 + }, + { + "epoch": 0.6244855967078189, + "grad_norm": 4.5081787109375, + "learning_rate": 2.1952232606438213e-05, + "loss": 0.1446, + "step": 607 + }, + { + "epoch": 0.6255144032921811, + "grad_norm": 10.28495979309082, + "learning_rate": 2.1988577362409137e-05, + "loss": 0.6929, + "step": 608 + }, + { + "epoch": 0.6265432098765432, + "grad_norm": 0.8422635197639465, + "learning_rate": 2.2024922118380058e-05, + "loss": 0.056, + "step": 609 + }, + { + "epoch": 0.6275720164609053, + "grad_norm": 10.7501220703125, + "learning_rate": 2.2061266874350985e-05, + "loss": 0.6738, + "step": 610 + }, + { + "epoch": 0.6286008230452675, + "grad_norm": 13.118562698364258, + "learning_rate": 2.209761163032191e-05, + "loss": 1.4398, + "step": 611 + }, + { + "epoch": 0.6296296296296297, + "grad_norm": 19.016132354736328, + "learning_rate": 2.2133956386292833e-05, + "loss": 3.152, + "step": 612 + }, + { + "epoch": 0.6306584362139918, + "grad_norm": 16.179283142089844, + "learning_rate": 2.2170301142263757e-05, + "loss": 1.8703, + "step": 613 + }, + { + "epoch": 0.6316872427983539, + "grad_norm": 1.413341999053955, + "learning_rate": 2.220664589823468e-05, + "loss": 0.0766, + "step": 614 + }, + { + "epoch": 0.6327160493827161, + "grad_norm": 19.418697357177734, + "learning_rate": 2.2242990654205605e-05, + "loss": 2.4434, + "step": 615 + }, + { + "epoch": 0.6337448559670782, + "grad_norm": 13.95297622680664, + "learning_rate": 2.2279335410176532e-05, + "loss": 1.4074, + "step": 616 + }, + { + "epoch": 0.6347736625514403, + "grad_norm": 9.78261947631836, + "learning_rate": 2.2315680166147453e-05, + "loss": 0.7425, + "step": 617 + }, + { + "epoch": 0.6358024691358025, + "grad_norm": 7.618975639343262, + "learning_rate": 2.2352024922118377e-05, + "loss": 0.466, + "step": 618 + }, + { + "epoch": 0.6368312757201646, + "grad_norm": 11.607491493225098, + "learning_rate": 2.2388369678089305e-05, + "loss": 1.6586, + "step": 619 + }, + { + "epoch": 0.6378600823045267, + "grad_norm": 7.107526779174805, + "learning_rate": 2.2424714434060225e-05, + "loss": 0.3817, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_Qnli-dev_cosine_accuracy": 0.693359375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8104921579360962, + "eval_Qnli-dev_cosine_ap": 0.7372700421671432, + "eval_Qnli-dev_cosine_f1": 0.7011070110701106, + "eval_Qnli-dev_cosine_f1_threshold": 0.7957046627998352, + "eval_Qnli-dev_cosine_precision": 0.6209150326797386, + "eval_Qnli-dev_cosine_recall": 0.8050847457627118, + "eval_Qnli-dev_dot_accuracy": 0.66015625, + "eval_Qnli-dev_dot_accuracy_threshold": 438.48602294921875, + "eval_Qnli-dev_dot_ap": 0.6254364606240859, + "eval_Qnli-dev_dot_f1": 0.6798561151079136, + "eval_Qnli-dev_dot_f1_threshold": 417.19720458984375, + "eval_Qnli-dev_dot_precision": 0.590625, + "eval_Qnli-dev_dot_recall": 0.8008474576271186, + "eval_Qnli-dev_euclidean_accuracy": 0.701171875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 12.722761154174805, + "eval_Qnli-dev_euclidean_ap": 0.7476820851309197, + "eval_Qnli-dev_euclidean_f1": 0.6962457337883959, + "eval_Qnli-dev_euclidean_f1_threshold": 15.658858299255371, + "eval_Qnli-dev_euclidean_precision": 0.5828571428571429, + "eval_Qnli-dev_euclidean_recall": 0.864406779661017, + "eval_Qnli-dev_manhattan_accuracy": 0.693359375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 277.64154052734375, + "eval_Qnli-dev_manhattan_ap": 0.747576429030092, + "eval_Qnli-dev_manhattan_f1": 0.6969147005444646, + "eval_Qnli-dev_manhattan_f1_threshold": 306.7862548828125, + "eval_Qnli-dev_manhattan_precision": 0.6095238095238096, + "eval_Qnli-dev_manhattan_recall": 0.8135593220338984, + "eval_Qnli-dev_max_accuracy": 0.701171875, + "eval_Qnli-dev_max_accuracy_threshold": 438.48602294921875, + "eval_Qnli-dev_max_ap": 0.7476820851309197, + "eval_Qnli-dev_max_f1": 0.7011070110701106, + "eval_Qnli-dev_max_f1_threshold": 417.19720458984375, + "eval_Qnli-dev_max_precision": 0.6209150326797386, + "eval_Qnli-dev_max_recall": 0.864406779661017, + "eval_allNLI-dev_cosine_accuracy": 0.734375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.9023975133895874, + "eval_allNLI-dev_cosine_ap": 0.5865878816400992, + "eval_allNLI-dev_cosine_f1": 0.5961123110151189, + "eval_allNLI-dev_cosine_f1_threshold": 0.815485954284668, + "eval_allNLI-dev_cosine_precision": 0.47586206896551725, + "eval_allNLI-dev_cosine_recall": 0.7976878612716763, + "eval_allNLI-dev_dot_accuracy": 0.6796875, + "eval_allNLI-dev_dot_accuracy_threshold": 520.5687255859375, + "eval_allNLI-dev_dot_ap": 0.50417908457673, + "eval_allNLI-dev_dot_f1": 0.5764705882352941, + "eval_allNLI-dev_dot_f1_threshold": 419.378662109375, + "eval_allNLI-dev_dot_precision": 0.4362017804154303, + "eval_allNLI-dev_dot_recall": 0.8497109826589595, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.461543083190918, + "eval_allNLI-dev_euclidean_ap": 0.5891739143142342, + "eval_allNLI-dev_euclidean_f1": 0.6018099547511312, + "eval_allNLI-dev_euclidean_f1_threshold": 13.633740425109863, + "eval_allNLI-dev_euclidean_precision": 0.4944237918215613, + "eval_allNLI-dev_euclidean_recall": 0.7687861271676301, + "eval_allNLI-dev_manhattan_accuracy": 0.734375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 205.95645141601562, + "eval_allNLI-dev_manhattan_ap": 0.5909121718301882, + "eval_allNLI-dev_manhattan_f1": 0.5978947368421053, + "eval_allNLI-dev_manhattan_f1_threshold": 292.0635681152344, + "eval_allNLI-dev_manhattan_precision": 0.47019867549668876, + "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 520.5687255859375, + "eval_allNLI-dev_max_ap": 0.5909121718301882, + "eval_allNLI-dev_max_f1": 0.6018099547511312, + "eval_allNLI-dev_max_f1_threshold": 419.378662109375, + "eval_allNLI-dev_max_precision": 0.4944237918215613, + "eval_allNLI-dev_max_recall": 0.8497109826589595, + "eval_sequential_score": 0.7476820851309197, + "eval_sts-test_pearson_cosine": 0.811803599688079, + "eval_sts-test_pearson_dot": 0.7763025780752795, + "eval_sts-test_pearson_euclidean": 0.834182762862252, + "eval_sts-test_pearson_manhattan": 0.8306831599881925, + "eval_sts-test_pearson_max": 0.834182762862252, + "eval_sts-test_spearman_cosine": 0.8279280953297161, + "eval_sts-test_spearman_dot": 0.7618572435089312, + "eval_sts-test_spearman_euclidean": 0.8235176795145484, + "eval_sts-test_spearman_manhattan": 0.8203718448437786, + "eval_sts-test_spearman_max": 0.8279280953297161, + "eval_vitaminc-pairs_loss": 2.7285807132720947, + "eval_vitaminc-pairs_runtime": 3.1675, + "eval_vitaminc-pairs_samples_per_second": 40.41, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_negation-triplets_loss": 1.2768163681030273, + "eval_negation-triplets_runtime": 0.7451, + "eval_negation-triplets_samples_per_second": 171.791, + "eval_negation-triplets_steps_per_second": 1.342, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_scitail-pairs-pos_loss": 0.221151664853096, + "eval_scitail-pairs-pos_runtime": 0.8023, + "eval_scitail-pairs-pos_samples_per_second": 159.546, + "eval_scitail-pairs-pos_steps_per_second": 1.246, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_scitail-pairs-qa_loss": 0.011276349425315857, + "eval_scitail-pairs-qa_runtime": 0.5728, + "eval_scitail-pairs-qa_samples_per_second": 223.455, + "eval_scitail-pairs-qa_steps_per_second": 1.746, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_xsum-pairs_loss": 0.6888625025749207, + "eval_xsum-pairs_runtime": 3.022, + "eval_xsum-pairs_samples_per_second": 42.356, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_sciq_pairs_loss": 0.12679386138916016, + "eval_sciq_pairs_runtime": 3.4396, + "eval_sciq_pairs_samples_per_second": 37.213, + "eval_sciq_pairs_steps_per_second": 0.291, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_qasc_pairs_loss": 0.6138037443161011, + "eval_qasc_pairs_runtime": 0.6116, + "eval_qasc_pairs_samples_per_second": 209.28, + "eval_qasc_pairs_steps_per_second": 1.635, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_openbookqa_pairs_loss": 1.2520498037338257, + "eval_openbookqa_pairs_runtime": 0.575, + "eval_openbookqa_pairs_samples_per_second": 222.626, + "eval_openbookqa_pairs_steps_per_second": 1.739, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_msmarco_pairs_loss": 1.2622545957565308, + "eval_msmarco_pairs_runtime": 1.5106, + "eval_msmarco_pairs_samples_per_second": 84.736, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_nq_pairs_loss": 1.5863006114959717, + "eval_nq_pairs_runtime": 2.9147, + "eval_nq_pairs_samples_per_second": 43.915, + "eval_nq_pairs_steps_per_second": 0.343, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_trivia_pairs_loss": 1.1821491718292236, + "eval_trivia_pairs_runtime": 3.4369, + "eval_trivia_pairs_samples_per_second": 37.243, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_gooaq_pairs_loss": 0.7643461227416992, + "eval_gooaq_pairs_runtime": 0.9406, + "eval_gooaq_pairs_samples_per_second": 136.089, + "eval_gooaq_pairs_steps_per_second": 1.063, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_paws-pos_loss": 0.03508833795785904, + "eval_paws-pos_runtime": 0.6812, + "eval_paws-pos_samples_per_second": 187.907, + "eval_paws-pos_steps_per_second": 1.468, + "step": 620 + }, + { + "epoch": 0.6378600823045267, + "eval_global_dataset_loss": 0.6339895725250244, + "eval_global_dataset_runtime": 13.3641, + "eval_global_dataset_samples_per_second": 31.128, + "eval_global_dataset_steps_per_second": 0.299, + "step": 620 + }, + { + "epoch": 0.6388888888888888, + "grad_norm": 13.532258033752441, + "learning_rate": 2.246105919003115e-05, + "loss": 1.4414, + "step": 621 + }, + { + "epoch": 0.6399176954732511, + "grad_norm": 9.563913345336914, + "learning_rate": 2.2497403946002077e-05, + "loss": 0.7481, + "step": 622 + }, + { + "epoch": 0.6409465020576132, + "grad_norm": 10.86938762664795, + "learning_rate": 2.2533748701973e-05, + "loss": 0.8256, + "step": 623 + }, + { + "epoch": 0.6419753086419753, + "grad_norm": 4.665733814239502, + "learning_rate": 2.257009345794392e-05, + "loss": 0.1559, + "step": 624 + }, + { + "epoch": 0.6430041152263375, + "grad_norm": 10.261479377746582, + "learning_rate": 2.260643821391485e-05, + "loss": 0.8878, + "step": 625 + }, + { + "epoch": 0.6440329218106996, + "grad_norm": 9.72616958618164, + "learning_rate": 2.2642782969885773e-05, + "loss": 0.5888, + "step": 626 + }, + { + "epoch": 0.6450617283950617, + "grad_norm": 11.944307327270508, + "learning_rate": 2.2679127725856693e-05, + "loss": 1.0332, + "step": 627 + }, + { + "epoch": 0.6460905349794238, + "grad_norm": 10.020615577697754, + "learning_rate": 2.271547248182762e-05, + "loss": 1.0121, + "step": 628 + }, + { + "epoch": 0.647119341563786, + "grad_norm": 8.791054725646973, + "learning_rate": 2.2751817237798545e-05, + "loss": 0.6393, + "step": 629 + }, + { + "epoch": 0.6481481481481481, + "grad_norm": 12.706099510192871, + "learning_rate": 2.278816199376947e-05, + "loss": 0.7494, + "step": 630 + }, + { + "epoch": 0.6491769547325102, + "grad_norm": 3.587538480758667, + "learning_rate": 2.2824506749740393e-05, + "loss": 0.1088, + "step": 631 + }, + { + "epoch": 0.6502057613168725, + "grad_norm": 16.609806060791016, + "learning_rate": 2.2860851505711317e-05, + "loss": 1.3588, + "step": 632 + }, + { + "epoch": 0.6512345679012346, + "grad_norm": 1.4342639446258545, + "learning_rate": 2.289719626168224e-05, + "loss": 0.0403, + "step": 633 + }, + { + "epoch": 0.6522633744855967, + "grad_norm": 25.457242965698242, + "learning_rate": 2.2933541017653165e-05, + "loss": 3.6884, + "step": 634 + }, + { + "epoch": 0.6532921810699589, + "grad_norm": 19.651193618774414, + "learning_rate": 2.296988577362409e-05, + "loss": 1.6915, + "step": 635 + }, + { + "epoch": 0.654320987654321, + "grad_norm": 10.904431343078613, + "learning_rate": 2.3006230529595013e-05, + "loss": 0.5166, + "step": 636 + }, + { + "epoch": 0.6553497942386831, + "grad_norm": 20.06137466430664, + "learning_rate": 2.304257528556594e-05, + "loss": 1.8266, + "step": 637 + }, + { + "epoch": 0.6563786008230452, + "grad_norm": 17.062715530395508, + "learning_rate": 2.307892004153686e-05, + "loss": 1.3875, + "step": 638 + }, + { + "epoch": 0.6574074074074074, + "grad_norm": 21.51274299621582, + "learning_rate": 2.3115264797507785e-05, + "loss": 1.8874, + "step": 639 + }, + { + "epoch": 0.6584362139917695, + "grad_norm": 1.2121050357818604, + "learning_rate": 2.3151609553478712e-05, + "loss": 0.0379, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_Qnli-dev_cosine_accuracy": 0.697265625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7842894792556763, + "eval_Qnli-dev_cosine_ap": 0.7349992877103873, + "eval_Qnli-dev_cosine_f1": 0.7003610108303249, + "eval_Qnli-dev_cosine_f1_threshold": 0.7629624605178833, + "eval_Qnli-dev_cosine_precision": 0.610062893081761, + "eval_Qnli-dev_cosine_recall": 0.8220338983050848, + "eval_Qnli-dev_dot_accuracy": 0.662109375, + "eval_Qnli-dev_dot_accuracy_threshold": 385.76885986328125, + "eval_Qnli-dev_dot_ap": 0.6359639073801129, + "eval_Qnli-dev_dot_f1": 0.6838709677419356, + "eval_Qnli-dev_dot_f1_threshold": 354.2484436035156, + "eval_Qnli-dev_dot_precision": 0.5520833333333334, + "eval_Qnli-dev_dot_recall": 0.8983050847457628, + "eval_Qnli-dev_euclidean_accuracy": 0.701171875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.231593132019043, + "eval_Qnli-dev_euclidean_ap": 0.7462767063117786, + "eval_Qnli-dev_euclidean_f1": 0.7047970479704797, + "eval_Qnli-dev_euclidean_f1_threshold": 15.258886337280273, + "eval_Qnli-dev_euclidean_precision": 0.6241830065359477, + "eval_Qnli-dev_euclidean_recall": 0.809322033898305, + "eval_Qnli-dev_manhattan_accuracy": 0.69921875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 265.3671875, + "eval_Qnli-dev_manhattan_ap": 0.744418854148787, + "eval_Qnli-dev_manhattan_f1": 0.708029197080292, + "eval_Qnli-dev_manhattan_f1_threshold": 314.21258544921875, + "eval_Qnli-dev_manhattan_precision": 0.6217948717948718, + "eval_Qnli-dev_manhattan_recall": 0.8220338983050848, + "eval_Qnli-dev_max_accuracy": 0.701171875, + "eval_Qnli-dev_max_accuracy_threshold": 385.76885986328125, + "eval_Qnli-dev_max_ap": 0.7462767063117786, + "eval_Qnli-dev_max_f1": 0.708029197080292, + "eval_Qnli-dev_max_f1_threshold": 354.2484436035156, + "eval_Qnli-dev_max_precision": 0.6241830065359477, + "eval_Qnli-dev_max_recall": 0.8983050847457628, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8901729583740234, + "eval_allNLI-dev_cosine_ap": 0.5909042367020411, + "eval_allNLI-dev_cosine_f1": 0.6091127098321343, + "eval_allNLI-dev_cosine_f1_threshold": 0.8110285401344299, + "eval_allNLI-dev_cosine_precision": 0.5204918032786885, + "eval_allNLI-dev_cosine_recall": 0.7341040462427746, + "eval_allNLI-dev_dot_accuracy": 0.6796875, + "eval_allNLI-dev_dot_accuracy_threshold": 481.55474853515625, + "eval_allNLI-dev_dot_ap": 0.4948903950504878, + "eval_allNLI-dev_dot_f1": 0.569672131147541, + "eval_allNLI-dev_dot_f1_threshold": 379.9951171875, + "eval_allNLI-dev_dot_precision": 0.44126984126984126, + "eval_allNLI-dev_dot_recall": 0.8034682080924855, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.585214614868164, + "eval_allNLI-dev_euclidean_ap": 0.5947416283923239, + "eval_allNLI-dev_euclidean_f1": 0.6009852216748768, + "eval_allNLI-dev_euclidean_f1_threshold": 13.550745010375977, + "eval_allNLI-dev_euclidean_precision": 0.5236051502145923, + "eval_allNLI-dev_euclidean_recall": 0.7052023121387283, + "eval_allNLI-dev_manhattan_accuracy": 0.734375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 228.28366088867188, + "eval_allNLI-dev_manhattan_ap": 0.5918176918420521, + "eval_allNLI-dev_manhattan_f1": 0.5991379310344827, + "eval_allNLI-dev_manhattan_f1_threshold": 301.08868408203125, + "eval_allNLI-dev_manhattan_precision": 0.47766323024054985, + "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 481.55474853515625, + "eval_allNLI-dev_max_ap": 0.5947416283923239, + "eval_allNLI-dev_max_f1": 0.6091127098321343, + "eval_allNLI-dev_max_f1_threshold": 379.9951171875, + "eval_allNLI-dev_max_precision": 0.5236051502145923, + "eval_allNLI-dev_max_recall": 0.8034682080924855, + "eval_sequential_score": 0.7462767063117786, + "eval_sts-test_pearson_cosine": 0.8118059789516554, + "eval_sts-test_pearson_dot": 0.7734818279888613, + "eval_sts-test_pearson_euclidean": 0.8386920311953987, + "eval_sts-test_pearson_manhattan": 0.8356441135209492, + "eval_sts-test_pearson_max": 0.8386920311953987, + "eval_sts-test_spearman_cosine": 0.8328721251857153, + "eval_sts-test_spearman_dot": 0.7551982558138911, + "eval_sts-test_spearman_euclidean": 0.8285452152243036, + "eval_sts-test_spearman_manhattan": 0.8259300410111131, + "eval_sts-test_spearman_max": 0.8328721251857153, + "eval_vitaminc-pairs_loss": 2.8136911392211914, + "eval_vitaminc-pairs_runtime": 3.1765, + "eval_vitaminc-pairs_samples_per_second": 40.296, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_negation-triplets_loss": 1.244739055633545, + "eval_negation-triplets_runtime": 0.7519, + "eval_negation-triplets_samples_per_second": 170.238, + "eval_negation-triplets_steps_per_second": 1.33, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_scitail-pairs-pos_loss": 0.22537671029567719, + "eval_scitail-pairs-pos_runtime": 0.8268, + "eval_scitail-pairs-pos_samples_per_second": 154.805, + "eval_scitail-pairs-pos_steps_per_second": 1.209, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_scitail-pairs-qa_loss": 0.014203112572431564, + "eval_scitail-pairs-qa_runtime": 0.5719, + "eval_scitail-pairs-qa_samples_per_second": 223.816, + "eval_scitail-pairs-qa_steps_per_second": 1.749, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_xsum-pairs_loss": 0.6345345377922058, + "eval_xsum-pairs_runtime": 3.0155, + "eval_xsum-pairs_samples_per_second": 42.447, + "eval_xsum-pairs_steps_per_second": 0.332, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_sciq_pairs_loss": 0.1278018057346344, + "eval_sciq_pairs_runtime": 3.4439, + "eval_sciq_pairs_samples_per_second": 37.167, + "eval_sciq_pairs_steps_per_second": 0.29, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_qasc_pairs_loss": 0.5951372385025024, + "eval_qasc_pairs_runtime": 0.6218, + "eval_qasc_pairs_samples_per_second": 205.857, + "eval_qasc_pairs_steps_per_second": 1.608, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_openbookqa_pairs_loss": 1.232675552368164, + "eval_openbookqa_pairs_runtime": 0.582, + "eval_openbookqa_pairs_samples_per_second": 219.948, + "eval_openbookqa_pairs_steps_per_second": 1.718, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_msmarco_pairs_loss": 1.3142263889312744, + "eval_msmarco_pairs_runtime": 1.5099, + "eval_msmarco_pairs_samples_per_second": 84.773, + "eval_msmarco_pairs_steps_per_second": 0.662, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_nq_pairs_loss": 1.6414275169372559, + "eval_nq_pairs_runtime": 2.9022, + "eval_nq_pairs_samples_per_second": 44.104, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_trivia_pairs_loss": 1.201471209526062, + "eval_trivia_pairs_runtime": 3.4361, + "eval_trivia_pairs_samples_per_second": 37.252, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_gooaq_pairs_loss": 0.7273324131965637, + "eval_gooaq_pairs_runtime": 0.9436, + "eval_gooaq_pairs_samples_per_second": 135.656, + "eval_gooaq_pairs_steps_per_second": 1.06, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_paws-pos_loss": 0.033227745443582535, + "eval_paws-pos_runtime": 0.6799, + "eval_paws-pos_samples_per_second": 188.253, + "eval_paws-pos_steps_per_second": 1.471, + "step": 640 + }, + { + "epoch": 0.6584362139917695, + "eval_global_dataset_loss": 0.644037663936615, + "eval_global_dataset_runtime": 13.3785, + "eval_global_dataset_samples_per_second": 31.095, + "eval_global_dataset_steps_per_second": 0.299, + "step": 640 + }, + { + "epoch": 0.6594650205761317, + "grad_norm": 2.2254586219787598, + "learning_rate": 2.3187954309449633e-05, + "loss": 0.2144, + "step": 641 + }, + { + "epoch": 0.6604938271604939, + "grad_norm": 8.457268714904785, + "learning_rate": 2.3224299065420557e-05, + "loss": 0.5899, + "step": 642 + }, + { + "epoch": 0.661522633744856, + "grad_norm": 16.62227439880371, + "learning_rate": 2.3260643821391484e-05, + "loss": 1.7055, + "step": 643 + }, + { + "epoch": 0.6625514403292181, + "grad_norm": 9.388711929321289, + "learning_rate": 2.329698857736241e-05, + "loss": 0.5673, + "step": 644 + }, + { + "epoch": 0.6635802469135802, + "grad_norm": 3.408893346786499, + "learning_rate": 2.333333333333333e-05, + "loss": 0.0845, + "step": 645 + }, + { + "epoch": 0.6646090534979424, + "grad_norm": 11.298724174499512, + "learning_rate": 2.3369678089304256e-05, + "loss": 0.7168, + "step": 646 + }, + { + "epoch": 0.6656378600823045, + "grad_norm": 16.72682762145996, + "learning_rate": 2.340602284527518e-05, + "loss": 2.6358, + "step": 647 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 7.872361660003662, + "learning_rate": 2.34423676012461e-05, + "loss": 0.3951, + "step": 648 + }, + { + "epoch": 0.6676954732510288, + "grad_norm": 9.12248420715332, + "learning_rate": 2.347871235721703e-05, + "loss": 0.584, + "step": 649 + }, + { + "epoch": 0.668724279835391, + "grad_norm": 11.847990036010742, + "learning_rate": 2.3515057113187953e-05, + "loss": 0.9239, + "step": 650 + }, + { + "epoch": 0.6697530864197531, + "grad_norm": 8.815132141113281, + "learning_rate": 2.3551401869158877e-05, + "loss": 0.576, + "step": 651 + }, + { + "epoch": 0.6707818930041153, + "grad_norm": 13.088105201721191, + "learning_rate": 2.35877466251298e-05, + "loss": 1.2842, + "step": 652 + }, + { + "epoch": 0.6718106995884774, + "grad_norm": 9.663747787475586, + "learning_rate": 2.3624091381100725e-05, + "loss": 0.7108, + "step": 653 + }, + { + "epoch": 0.6728395061728395, + "grad_norm": 10.207884788513184, + "learning_rate": 2.366043613707165e-05, + "loss": 0.6935, + "step": 654 + }, + { + "epoch": 0.6738683127572016, + "grad_norm": 10.963897705078125, + "learning_rate": 2.3696780893042576e-05, + "loss": 0.8278, + "step": 655 + }, + { + "epoch": 0.6748971193415638, + "grad_norm": 9.319234848022461, + "learning_rate": 2.3733125649013497e-05, + "loss": 0.6456, + "step": 656 + }, + { + "epoch": 0.6759259259259259, + "grad_norm": 14.43174934387207, + "learning_rate": 2.376947040498442e-05, + "loss": 1.8842, + "step": 657 + }, + { + "epoch": 0.676954732510288, + "grad_norm": 13.448914527893066, + "learning_rate": 2.3805815160955348e-05, + "loss": 1.2572, + "step": 658 + }, + { + "epoch": 0.6779835390946503, + "grad_norm": 8.692782402038574, + "learning_rate": 2.384215991692627e-05, + "loss": 0.6718, + "step": 659 + }, + { + "epoch": 0.6790123456790124, + "grad_norm": 4.224426746368408, + "learning_rate": 2.3878504672897193e-05, + "loss": 0.1434, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_Qnli-dev_cosine_accuracy": 0.705078125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.819502055644989, + "eval_Qnli-dev_cosine_ap": 0.7429310249805731, + "eval_Qnli-dev_cosine_f1": 0.7023411371237458, + "eval_Qnli-dev_cosine_f1_threshold": 0.7529304623603821, + "eval_Qnli-dev_cosine_precision": 0.580110497237569, + "eval_Qnli-dev_cosine_recall": 0.8898305084745762, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 392.81878662109375, + "eval_Qnli-dev_dot_ap": 0.6658795733353435, + "eval_Qnli-dev_dot_f1": 0.684981684981685, + "eval_Qnli-dev_dot_f1_threshold": 388.4842529296875, + "eval_Qnli-dev_dot_precision": 0.603225806451613, + "eval_Qnli-dev_dot_recall": 0.7923728813559322, + "eval_Qnli-dev_euclidean_accuracy": 0.70703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.631841659545898, + "eval_Qnli-dev_euclidean_ap": 0.7487350788106928, + "eval_Qnli-dev_euclidean_f1": 0.7015706806282722, + "eval_Qnli-dev_euclidean_f1_threshold": 15.337552070617676, + "eval_Qnli-dev_euclidean_precision": 0.5964391691394659, + "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, + "eval_Qnli-dev_manhattan_accuracy": 0.701171875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 295.4067077636719, + "eval_Qnli-dev_manhattan_ap": 0.7482376569453756, + "eval_Qnli-dev_manhattan_f1": 0.7113594040968343, + "eval_Qnli-dev_manhattan_f1_threshold": 299.4460754394531, + "eval_Qnli-dev_manhattan_precision": 0.6345514950166113, + "eval_Qnli-dev_manhattan_recall": 0.809322033898305, + "eval_Qnli-dev_max_accuracy": 0.70703125, + "eval_Qnli-dev_max_accuracy_threshold": 392.81878662109375, + "eval_Qnli-dev_max_ap": 0.7487350788106928, + "eval_Qnli-dev_max_f1": 0.7113594040968343, + "eval_Qnli-dev_max_f1_threshold": 388.4842529296875, + "eval_Qnli-dev_max_precision": 0.6345514950166113, + "eval_Qnli-dev_max_recall": 0.8898305084745762, + "eval_allNLI-dev_cosine_accuracy": 0.734375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8959517478942871, + "eval_allNLI-dev_cosine_ap": 0.5960858156370117, + "eval_allNLI-dev_cosine_f1": 0.5995717344753748, + "eval_allNLI-dev_cosine_f1_threshold": 0.7982358932495117, + "eval_allNLI-dev_cosine_precision": 0.47619047619047616, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.689453125, + "eval_allNLI-dev_dot_accuracy_threshold": 450.1946716308594, + "eval_allNLI-dev_dot_ap": 0.5096208353059024, + "eval_allNLI-dev_dot_f1": 0.5690021231422505, + "eval_allNLI-dev_dot_f1_threshold": 398.77850341796875, + "eval_allNLI-dev_dot_precision": 0.44966442953020136, + "eval_allNLI-dev_dot_recall": 0.7745664739884393, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.649042129516602, + "eval_allNLI-dev_euclidean_ap": 0.5979924892509634, + "eval_allNLI-dev_euclidean_f1": 0.6090534979423868, + "eval_allNLI-dev_euclidean_f1_threshold": 14.710177421569824, + "eval_allNLI-dev_euclidean_precision": 0.4728434504792332, + "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 218.83389282226562, + "eval_allNLI-dev_manhattan_ap": 0.5954291033762709, + "eval_allNLI-dev_manhattan_f1": 0.5973451327433628, + "eval_allNLI-dev_manhattan_f1_threshold": 288.9541015625, + "eval_allNLI-dev_manhattan_precision": 0.4838709677419355, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 450.1946716308594, + "eval_allNLI-dev_max_ap": 0.5979924892509634, + "eval_allNLI-dev_max_f1": 0.6090534979423868, + "eval_allNLI-dev_max_f1_threshold": 398.77850341796875, + "eval_allNLI-dev_max_precision": 0.4838709677419355, + "eval_allNLI-dev_max_recall": 0.8554913294797688, + "eval_sequential_score": 0.7487350788106928, + "eval_sts-test_pearson_cosine": 0.809748141776852, + "eval_sts-test_pearson_dot": 0.7852622986479767, + "eval_sts-test_pearson_euclidean": 0.8383482677548499, + "eval_sts-test_pearson_manhattan": 0.8356178836101067, + "eval_sts-test_pearson_max": 0.8383482677548499, + "eval_sts-test_spearman_cosine": 0.8342041017297689, + "eval_sts-test_spearman_dot": 0.7727315762707344, + "eval_sts-test_spearman_euclidean": 0.8310839542830377, + "eval_sts-test_spearman_manhattan": 0.8265729823835233, + "eval_sts-test_spearman_max": 0.8342041017297689, + "eval_vitaminc-pairs_loss": 2.8169939517974854, + "eval_vitaminc-pairs_runtime": 3.1955, + "eval_vitaminc-pairs_samples_per_second": 40.056, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_negation-triplets_loss": 1.216970443725586, + "eval_negation-triplets_runtime": 0.7501, + "eval_negation-triplets_samples_per_second": 170.642, + "eval_negation-triplets_steps_per_second": 1.333, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_scitail-pairs-pos_loss": 0.2154267579317093, + "eval_scitail-pairs-pos_runtime": 0.8251, + "eval_scitail-pairs-pos_samples_per_second": 155.127, + "eval_scitail-pairs-pos_steps_per_second": 1.212, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_scitail-pairs-qa_loss": 0.008771178312599659, + "eval_scitail-pairs-qa_runtime": 0.5793, + "eval_scitail-pairs-qa_samples_per_second": 220.954, + "eval_scitail-pairs-qa_steps_per_second": 1.726, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_xsum-pairs_loss": 0.6624985933303833, + "eval_xsum-pairs_runtime": 3.0194, + "eval_xsum-pairs_samples_per_second": 42.393, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_sciq_pairs_loss": 0.12456458061933517, + "eval_sciq_pairs_runtime": 3.4544, + "eval_sciq_pairs_samples_per_second": 37.055, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_qasc_pairs_loss": 0.5933777093887329, + "eval_qasc_pairs_runtime": 0.6095, + "eval_qasc_pairs_samples_per_second": 209.991, + "eval_qasc_pairs_steps_per_second": 1.641, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_openbookqa_pairs_loss": 1.2264533042907715, + "eval_openbookqa_pairs_runtime": 0.5907, + "eval_openbookqa_pairs_samples_per_second": 216.708, + "eval_openbookqa_pairs_steps_per_second": 1.693, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_msmarco_pairs_loss": 1.2734606266021729, + "eval_msmarco_pairs_runtime": 1.5181, + "eval_msmarco_pairs_samples_per_second": 84.315, + "eval_msmarco_pairs_steps_per_second": 0.659, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_nq_pairs_loss": 1.6421589851379395, + "eval_nq_pairs_runtime": 2.8912, + "eval_nq_pairs_samples_per_second": 44.272, + "eval_nq_pairs_steps_per_second": 0.346, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_trivia_pairs_loss": 1.1045206785202026, + "eval_trivia_pairs_runtime": 3.4335, + "eval_trivia_pairs_samples_per_second": 37.28, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_gooaq_pairs_loss": 0.7241554856300354, + "eval_gooaq_pairs_runtime": 0.9492, + "eval_gooaq_pairs_samples_per_second": 134.856, + "eval_gooaq_pairs_steps_per_second": 1.054, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_paws-pos_loss": 0.03431744873523712, + "eval_paws-pos_runtime": 0.6884, + "eval_paws-pos_samples_per_second": 185.934, + "eval_paws-pos_steps_per_second": 1.453, + "step": 660 + }, + { + "epoch": 0.6790123456790124, + "eval_global_dataset_loss": 0.6402216553688049, + "eval_global_dataset_runtime": 13.4067, + "eval_global_dataset_samples_per_second": 31.029, + "eval_global_dataset_steps_per_second": 0.298, + "step": 660 + }, + { + "epoch": 0.6800411522633745, + "grad_norm": 20.211734771728516, + "learning_rate": 2.391484942886812e-05, + "loss": 2.1395, + "step": 661 + }, + { + "epoch": 0.6810699588477366, + "grad_norm": 7.7893218994140625, + "learning_rate": 2.3951194184839044e-05, + "loss": 0.6218, + "step": 662 + }, + { + "epoch": 0.6820987654320988, + "grad_norm": 16.382932662963867, + "learning_rate": 2.3987538940809965e-05, + "loss": 1.691, + "step": 663 + }, + { + "epoch": 0.6831275720164609, + "grad_norm": 13.506409645080566, + "learning_rate": 2.4023883696780892e-05, + "loss": 1.3362, + "step": 664 + }, + { + "epoch": 0.684156378600823, + "grad_norm": 13.324780464172363, + "learning_rate": 2.4060228452751816e-05, + "loss": 1.1382, + "step": 665 + }, + { + "epoch": 0.6851851851851852, + "grad_norm": 10.345579147338867, + "learning_rate": 2.4096573208722737e-05, + "loss": 1.0932, + "step": 666 + }, + { + "epoch": 0.6862139917695473, + "grad_norm": 10.737591743469238, + "learning_rate": 2.4132917964693664e-05, + "loss": 0.9572, + "step": 667 + }, + { + "epoch": 0.6872427983539094, + "grad_norm": 17.071697235107422, + "learning_rate": 2.4169262720664588e-05, + "loss": 1.9663, + "step": 668 + }, + { + "epoch": 0.6882716049382716, + "grad_norm": 11.74267292022705, + "learning_rate": 2.4205607476635512e-05, + "loss": 0.8968, + "step": 669 + }, + { + "epoch": 0.6893004115226338, + "grad_norm": 11.056696891784668, + "learning_rate": 2.4241952232606436e-05, + "loss": 0.7906, + "step": 670 + }, + { + "epoch": 0.6903292181069959, + "grad_norm": 10.595043182373047, + "learning_rate": 2.427829698857736e-05, + "loss": 0.7443, + "step": 671 + }, + { + "epoch": 0.691358024691358, + "grad_norm": 9.793761253356934, + "learning_rate": 2.4314641744548284e-05, + "loss": 0.6939, + "step": 672 + }, + { + "epoch": 0.6923868312757202, + "grad_norm": 10.305285453796387, + "learning_rate": 2.4350986500519212e-05, + "loss": 1.202, + "step": 673 + }, + { + "epoch": 0.6934156378600823, + "grad_norm": 1.1254714727401733, + "learning_rate": 2.4387331256490132e-05, + "loss": 0.0276, + "step": 674 + }, + { + "epoch": 0.6944444444444444, + "grad_norm": 10.750346183776855, + "learning_rate": 2.4423676012461056e-05, + "loss": 1.121, + "step": 675 + }, + { + "epoch": 0.6954732510288066, + "grad_norm": 9.77961254119873, + "learning_rate": 2.4460020768431984e-05, + "loss": 0.721, + "step": 676 + }, + { + "epoch": 0.6965020576131687, + "grad_norm": 10.97049331665039, + "learning_rate": 2.4496365524402904e-05, + "loss": 1.0949, + "step": 677 + }, + { + "epoch": 0.6975308641975309, + "grad_norm": 13.591765403747559, + "learning_rate": 2.453271028037383e-05, + "loss": 1.3044, + "step": 678 + }, + { + "epoch": 0.698559670781893, + "grad_norm": 10.30559253692627, + "learning_rate": 2.4569055036344756e-05, + "loss": 0.6867, + "step": 679 + }, + { + "epoch": 0.6995884773662552, + "grad_norm": 9.589376449584961, + "learning_rate": 2.4605399792315676e-05, + "loss": 0.6253, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_Qnli-dev_cosine_accuracy": 0.689453125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8195374011993408, + "eval_Qnli-dev_cosine_ap": 0.7393646924153436, + "eval_Qnli-dev_cosine_f1": 0.702054794520548, + "eval_Qnli-dev_cosine_f1_threshold": 0.7384560704231262, + "eval_Qnli-dev_cosine_precision": 0.5890804597701149, + "eval_Qnli-dev_cosine_recall": 0.8686440677966102, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 395.6339111328125, + "eval_Qnli-dev_dot_ap": 0.6696734110834349, + "eval_Qnli-dev_dot_f1": 0.6894308943089431, + "eval_Qnli-dev_dot_f1_threshold": 355.788330078125, + "eval_Qnli-dev_dot_precision": 0.5593667546174143, + "eval_Qnli-dev_dot_recall": 0.8983050847457628, + "eval_Qnli-dev_euclidean_accuracy": 0.701171875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.906505584716797, + "eval_Qnli-dev_euclidean_ap": 0.7444803762790224, + "eval_Qnli-dev_euclidean_f1": 0.7016949152542373, + "eval_Qnli-dev_euclidean_f1_threshold": 16.160581588745117, + "eval_Qnli-dev_euclidean_precision": 0.5847457627118644, + "eval_Qnli-dev_euclidean_recall": 0.8771186440677966, + "eval_Qnli-dev_manhattan_accuracy": 0.703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 292.0366516113281, + "eval_Qnli-dev_manhattan_ap": 0.7455488536354595, + "eval_Qnli-dev_manhattan_f1": 0.7037037037037036, + "eval_Qnli-dev_manhattan_f1_threshold": 331.2184753417969, + "eval_Qnli-dev_manhattan_precision": 0.5837988826815642, + "eval_Qnli-dev_manhattan_recall": 0.885593220338983, + "eval_Qnli-dev_max_accuracy": 0.703125, + "eval_Qnli-dev_max_accuracy_threshold": 395.6339111328125, + "eval_Qnli-dev_max_ap": 0.7455488536354595, + "eval_Qnli-dev_max_f1": 0.7037037037037036, + "eval_Qnli-dev_max_f1_threshold": 355.788330078125, + "eval_Qnli-dev_max_precision": 0.5890804597701149, + "eval_Qnli-dev_max_recall": 0.8983050847457628, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8807967901229858, + "eval_allNLI-dev_cosine_ap": 0.5923755189013276, + "eval_allNLI-dev_cosine_f1": 0.5885286783042394, + "eval_allNLI-dev_cosine_f1_threshold": 0.8102627992630005, + "eval_allNLI-dev_cosine_precision": 0.5175438596491229, + "eval_allNLI-dev_cosine_recall": 0.6820809248554913, + "eval_allNLI-dev_dot_accuracy": 0.689453125, + "eval_allNLI-dev_dot_accuracy_threshold": 468.3880615234375, + "eval_allNLI-dev_dot_ap": 0.5099487314518958, + "eval_allNLI-dev_dot_f1": 0.5726872246696035, + "eval_allNLI-dev_dot_f1_threshold": 388.5802001953125, + "eval_allNLI-dev_dot_precision": 0.4626334519572954, + "eval_allNLI-dev_dot_recall": 0.7514450867052023, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.58234691619873, + "eval_allNLI-dev_euclidean_ap": 0.5960351196666029, + "eval_allNLI-dev_euclidean_f1": 0.5934065934065934, + "eval_allNLI-dev_euclidean_f1_threshold": 14.820318222045898, + "eval_allNLI-dev_euclidean_precision": 0.4787234042553192, + "eval_allNLI-dev_euclidean_recall": 0.7803468208092486, + "eval_allNLI-dev_manhattan_accuracy": 0.734375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 219.4961700439453, + "eval_allNLI-dev_manhattan_ap": 0.5953606180151316, + "eval_allNLI-dev_manhattan_f1": 0.5929411764705882, + "eval_allNLI-dev_manhattan_f1_threshold": 293.4901428222656, + "eval_allNLI-dev_manhattan_precision": 0.5, + "eval_allNLI-dev_manhattan_recall": 0.7283236994219653, + "eval_allNLI-dev_max_accuracy": 0.734375, + "eval_allNLI-dev_max_accuracy_threshold": 468.3880615234375, + "eval_allNLI-dev_max_ap": 0.5960351196666029, + "eval_allNLI-dev_max_f1": 0.5934065934065934, + "eval_allNLI-dev_max_f1_threshold": 388.5802001953125, + "eval_allNLI-dev_max_precision": 0.5175438596491229, + "eval_allNLI-dev_max_recall": 0.7803468208092486, + "eval_sequential_score": 0.7455488536354595, + "eval_sts-test_pearson_cosine": 0.8159881240293081, + "eval_sts-test_pearson_dot": 0.7825955488055716, + "eval_sts-test_pearson_euclidean": 0.8454112920840406, + "eval_sts-test_pearson_manhattan": 0.8444832657606673, + "eval_sts-test_pearson_max": 0.8454112920840406, + "eval_sts-test_spearman_cosine": 0.8368029417325517, + "eval_sts-test_spearman_dot": 0.7614820041821643, + "eval_sts-test_spearman_euclidean": 0.8350227813056632, + "eval_sts-test_spearman_manhattan": 0.8336858821228565, + "eval_sts-test_spearman_max": 0.8368029417325517, + "eval_vitaminc-pairs_loss": 2.8485310077667236, + "eval_vitaminc-pairs_runtime": 3.1999, + "eval_vitaminc-pairs_samples_per_second": 40.001, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_negation-triplets_loss": 1.1648355722427368, + "eval_negation-triplets_runtime": 0.7448, + "eval_negation-triplets_samples_per_second": 171.851, + "eval_negation-triplets_steps_per_second": 1.343, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_scitail-pairs-pos_loss": 0.21600204706192017, + "eval_scitail-pairs-pos_runtime": 0.8346, + "eval_scitail-pairs-pos_samples_per_second": 153.362, + "eval_scitail-pairs-pos_steps_per_second": 1.198, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_scitail-pairs-qa_loss": 0.00846769753843546, + "eval_scitail-pairs-qa_runtime": 0.5928, + "eval_scitail-pairs-qa_samples_per_second": 215.932, + "eval_scitail-pairs-qa_steps_per_second": 1.687, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_xsum-pairs_loss": 0.6605619192123413, + "eval_xsum-pairs_runtime": 3.025, + "eval_xsum-pairs_samples_per_second": 42.314, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_sciq_pairs_loss": 0.12335162609815598, + "eval_sciq_pairs_runtime": 3.4321, + "eval_sciq_pairs_samples_per_second": 37.295, + "eval_sciq_pairs_steps_per_second": 0.291, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_qasc_pairs_loss": 0.5843737721443176, + "eval_qasc_pairs_runtime": 0.6047, + "eval_qasc_pairs_samples_per_second": 211.678, + "eval_qasc_pairs_steps_per_second": 1.654, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_openbookqa_pairs_loss": 1.2838267087936401, + "eval_openbookqa_pairs_runtime": 0.5755, + "eval_openbookqa_pairs_samples_per_second": 222.41, + "eval_openbookqa_pairs_steps_per_second": 1.738, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_msmarco_pairs_loss": 1.3720968961715698, + "eval_msmarco_pairs_runtime": 1.518, + "eval_msmarco_pairs_samples_per_second": 84.323, + "eval_msmarco_pairs_steps_per_second": 0.659, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_nq_pairs_loss": 1.5162333250045776, + "eval_nq_pairs_runtime": 2.9004, + "eval_nq_pairs_samples_per_second": 44.131, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_trivia_pairs_loss": 1.12861168384552, + "eval_trivia_pairs_runtime": 3.4369, + "eval_trivia_pairs_samples_per_second": 37.243, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_gooaq_pairs_loss": 0.6519899368286133, + "eval_gooaq_pairs_runtime": 0.9806, + "eval_gooaq_pairs_samples_per_second": 130.53, + "eval_gooaq_pairs_steps_per_second": 1.02, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_paws-pos_loss": 0.03412044420838356, + "eval_paws-pos_runtime": 0.6854, + "eval_paws-pos_samples_per_second": 186.764, + "eval_paws-pos_steps_per_second": 1.459, + "step": 680 + }, + { + "epoch": 0.6995884773662552, + "eval_global_dataset_loss": 0.6541453003883362, + "eval_global_dataset_runtime": 13.4093, + "eval_global_dataset_samples_per_second": 31.023, + "eval_global_dataset_steps_per_second": 0.298, + "step": 680 + }, + { + "epoch": 0.7006172839506173, + "grad_norm": 1.0433952808380127, + "learning_rate": 2.46417445482866e-05, + "loss": 0.063, + "step": 681 + }, + { + "epoch": 0.7016460905349794, + "grad_norm": 16.610177993774414, + "learning_rate": 2.4678089304257528e-05, + "loss": 1.4254, + "step": 682 + }, + { + "epoch": 0.7026748971193416, + "grad_norm": 19.412683486938477, + "learning_rate": 2.4714434060228452e-05, + "loss": 3.1631, + "step": 683 + }, + { + "epoch": 0.7037037037037037, + "grad_norm": 13.261174201965332, + "learning_rate": 2.4750778816199373e-05, + "loss": 1.2375, + "step": 684 + }, + { + "epoch": 0.7047325102880658, + "grad_norm": 9.231230735778809, + "learning_rate": 2.47871235721703e-05, + "loss": 0.5716, + "step": 685 + }, + { + "epoch": 0.7057613168724279, + "grad_norm": 16.746212005615234, + "learning_rate": 2.4823468328141224e-05, + "loss": 2.939, + "step": 686 + }, + { + "epoch": 0.7067901234567902, + "grad_norm": 16.490650177001953, + "learning_rate": 2.4859813084112145e-05, + "loss": 1.7054, + "step": 687 + }, + { + "epoch": 0.7078189300411523, + "grad_norm": 8.707398414611816, + "learning_rate": 2.4896157840083072e-05, + "loss": 0.4784, + "step": 688 + }, + { + "epoch": 0.7088477366255144, + "grad_norm": 9.790912628173828, + "learning_rate": 2.4932502596053996e-05, + "loss": 0.7157, + "step": 689 + }, + { + "epoch": 0.7098765432098766, + "grad_norm": 8.632383346557617, + "learning_rate": 2.496884735202492e-05, + "loss": 0.6421, + "step": 690 + }, + { + "epoch": 0.7109053497942387, + "grad_norm": 8.732678413391113, + "learning_rate": 2.5005192107995844e-05, + "loss": 0.6502, + "step": 691 + }, + { + "epoch": 0.7119341563786008, + "grad_norm": 16.7855281829834, + "learning_rate": 2.5041536863966768e-05, + "loss": 3.4679, + "step": 692 + }, + { + "epoch": 0.7129629629629629, + "grad_norm": 8.66584587097168, + "learning_rate": 2.5077881619937692e-05, + "loss": 0.5872, + "step": 693 + }, + { + "epoch": 0.7139917695473251, + "grad_norm": 14.179039001464844, + "learning_rate": 2.511422637590862e-05, + "loss": 1.5769, + "step": 694 + }, + { + "epoch": 0.7150205761316872, + "grad_norm": 8.276007652282715, + "learning_rate": 2.515057113187954e-05, + "loss": 0.5454, + "step": 695 + }, + { + "epoch": 0.7160493827160493, + "grad_norm": 12.96976375579834, + "learning_rate": 2.5186915887850464e-05, + "loss": 1.4251, + "step": 696 + }, + { + "epoch": 0.7170781893004116, + "grad_norm": 8.970144271850586, + "learning_rate": 2.522326064382139e-05, + "loss": 0.6667, + "step": 697 + }, + { + "epoch": 0.7181069958847737, + "grad_norm": 1.4171106815338135, + "learning_rate": 2.5259605399792312e-05, + "loss": 0.0382, + "step": 698 + }, + { + "epoch": 0.7191358024691358, + "grad_norm": 4.66494607925415, + "learning_rate": 2.5295950155763236e-05, + "loss": 0.1808, + "step": 699 + }, + { + "epoch": 0.720164609053498, + "grad_norm": 9.647722244262695, + "learning_rate": 2.5332294911734164e-05, + "loss": 0.8819, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_Qnli-dev_cosine_accuracy": 0.6875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8055030107498169, + "eval_Qnli-dev_cosine_ap": 0.7346523188251083, + "eval_Qnli-dev_cosine_f1": 0.7008849557522123, + "eval_Qnli-dev_cosine_f1_threshold": 0.7691887021064758, + "eval_Qnli-dev_cosine_precision": 0.601823708206687, + "eval_Qnli-dev_cosine_recall": 0.8389830508474576, + "eval_Qnli-dev_dot_accuracy": 0.66015625, + "eval_Qnli-dev_dot_accuracy_threshold": 403.0814208984375, + "eval_Qnli-dev_dot_ap": 0.6423809971933063, + "eval_Qnli-dev_dot_f1": 0.6771929824561403, + "eval_Qnli-dev_dot_f1_threshold": 380.7566833496094, + "eval_Qnli-dev_dot_precision": 0.5778443113772455, + "eval_Qnli-dev_dot_recall": 0.8177966101694916, + "eval_Qnli-dev_euclidean_accuracy": 0.70703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.891797065734863, + "eval_Qnli-dev_euclidean_ap": 0.7419509834416282, + "eval_Qnli-dev_euclidean_f1": 0.7024221453287196, + "eval_Qnli-dev_euclidean_f1_threshold": 15.521956443786621, + "eval_Qnli-dev_euclidean_precision": 0.5935672514619883, + "eval_Qnli-dev_euclidean_recall": 0.8601694915254238, + "eval_Qnli-dev_manhattan_accuracy": 0.705078125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 284.7353515625, + "eval_Qnli-dev_manhattan_ap": 0.7404308497091309, + "eval_Qnli-dev_manhattan_f1": 0.6989619377162629, + "eval_Qnli-dev_manhattan_f1_threshold": 318.97943115234375, + "eval_Qnli-dev_manhattan_precision": 0.5906432748538012, + "eval_Qnli-dev_manhattan_recall": 0.8559322033898306, + "eval_Qnli-dev_max_accuracy": 0.70703125, + "eval_Qnli-dev_max_accuracy_threshold": 403.0814208984375, + "eval_Qnli-dev_max_ap": 0.7419509834416282, + "eval_Qnli-dev_max_f1": 0.7024221453287196, + "eval_Qnli-dev_max_f1_threshold": 380.7566833496094, + "eval_Qnli-dev_max_precision": 0.601823708206687, + "eval_Qnli-dev_max_recall": 0.8601694915254238, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8836915493011475, + "eval_allNLI-dev_cosine_ap": 0.5879168509874412, + "eval_allNLI-dev_cosine_f1": 0.5914893617021276, + "eval_allNLI-dev_cosine_f1_threshold": 0.7854909896850586, + "eval_allNLI-dev_cosine_precision": 0.468013468013468, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.689453125, + "eval_allNLI-dev_dot_accuracy_threshold": 468.509765625, + "eval_allNLI-dev_dot_ap": 0.5290950515284383, + "eval_allNLI-dev_dot_f1": 0.576923076923077, + "eval_allNLI-dev_dot_f1_threshold": 394.4248352050781, + "eval_allNLI-dev_dot_precision": 0.4576271186440678, + "eval_allNLI-dev_dot_recall": 0.7803468208092486, + "eval_allNLI-dev_euclidean_accuracy": 0.736328125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.812106132507324, + "eval_allNLI-dev_euclidean_ap": 0.5902832492425357, + "eval_allNLI-dev_euclidean_f1": 0.5927505330490405, + "eval_allNLI-dev_euclidean_f1_threshold": 14.791348457336426, + "eval_allNLI-dev_euclidean_precision": 0.46959459459459457, + "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 228.82778930664062, + "eval_allNLI-dev_manhattan_ap": 0.5867522032562185, + "eval_allNLI-dev_manhattan_f1": 0.592255125284738, + "eval_allNLI-dev_manhattan_f1_threshold": 288.2390441894531, + "eval_allNLI-dev_manhattan_precision": 0.48872180451127817, + "eval_allNLI-dev_manhattan_recall": 0.7514450867052023, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 468.509765625, + "eval_allNLI-dev_max_ap": 0.5902832492425357, + "eval_allNLI-dev_max_f1": 0.5927505330490405, + "eval_allNLI-dev_max_f1_threshold": 394.4248352050781, + "eval_allNLI-dev_max_precision": 0.48872180451127817, + "eval_allNLI-dev_max_recall": 0.8034682080924855, + "eval_sequential_score": 0.7419509834416282, + "eval_sts-test_pearson_cosine": 0.814122807787653, + "eval_sts-test_pearson_dot": 0.7849759177486642, + "eval_sts-test_pearson_euclidean": 0.8421714998904108, + "eval_sts-test_pearson_manhattan": 0.8394866389200708, + "eval_sts-test_pearson_max": 0.8421714998904108, + "eval_sts-test_spearman_cosine": 0.837628602505223, + "eval_sts-test_spearman_dot": 0.7737345862922999, + "eval_sts-test_spearman_euclidean": 0.8339600731014016, + "eval_sts-test_spearman_manhattan": 0.831537105555887, + "eval_sts-test_spearman_max": 0.837628602505223, + "eval_vitaminc-pairs_loss": 2.8523178100585938, + "eval_vitaminc-pairs_runtime": 3.1968, + "eval_vitaminc-pairs_samples_per_second": 40.039, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_negation-triplets_loss": 1.1572741270065308, + "eval_negation-triplets_runtime": 0.7545, + "eval_negation-triplets_samples_per_second": 169.646, + "eval_negation-triplets_steps_per_second": 1.325, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_scitail-pairs-pos_loss": 0.21792583167552948, + "eval_scitail-pairs-pos_runtime": 0.8512, + "eval_scitail-pairs-pos_samples_per_second": 150.374, + "eval_scitail-pairs-pos_steps_per_second": 1.175, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_scitail-pairs-qa_loss": 0.014304843731224537, + "eval_scitail-pairs-qa_runtime": 0.5821, + "eval_scitail-pairs-qa_samples_per_second": 219.895, + "eval_scitail-pairs-qa_steps_per_second": 1.718, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_xsum-pairs_loss": 0.688365638256073, + "eval_xsum-pairs_runtime": 3.0302, + "eval_xsum-pairs_samples_per_second": 42.242, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_sciq_pairs_loss": 0.12412170320749283, + "eval_sciq_pairs_runtime": 3.4839, + "eval_sciq_pairs_samples_per_second": 36.741, + "eval_sciq_pairs_steps_per_second": 0.287, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_qasc_pairs_loss": 0.5808658003807068, + "eval_qasc_pairs_runtime": 0.6151, + "eval_qasc_pairs_samples_per_second": 208.103, + "eval_qasc_pairs_steps_per_second": 1.626, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_openbookqa_pairs_loss": 1.136744499206543, + "eval_openbookqa_pairs_runtime": 0.5932, + "eval_openbookqa_pairs_samples_per_second": 215.777, + "eval_openbookqa_pairs_steps_per_second": 1.686, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_msmarco_pairs_loss": 1.2205469608306885, + "eval_msmarco_pairs_runtime": 1.5248, + "eval_msmarco_pairs_samples_per_second": 83.947, + "eval_msmarco_pairs_steps_per_second": 0.656, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_nq_pairs_loss": 1.6330437660217285, + "eval_nq_pairs_runtime": 2.9004, + "eval_nq_pairs_samples_per_second": 44.131, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_trivia_pairs_loss": 1.195753812789917, + "eval_trivia_pairs_runtime": 3.4466, + "eval_trivia_pairs_samples_per_second": 37.138, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_gooaq_pairs_loss": 0.7037076950073242, + "eval_gooaq_pairs_runtime": 0.9565, + "eval_gooaq_pairs_samples_per_second": 133.823, + "eval_gooaq_pairs_steps_per_second": 1.045, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_paws-pos_loss": 0.03305948153138161, + "eval_paws-pos_runtime": 0.6963, + "eval_paws-pos_samples_per_second": 183.824, + "eval_paws-pos_steps_per_second": 1.436, + "step": 700 + }, + { + "epoch": 0.720164609053498, + "eval_global_dataset_loss": 0.6367413401603699, + "eval_global_dataset_runtime": 13.4188, + "eval_global_dataset_samples_per_second": 31.001, + "eval_global_dataset_steps_per_second": 0.298, + "step": 700 + }, + { + "epoch": 0.7211934156378601, + "grad_norm": 9.350117683410645, + "learning_rate": 2.5368639667705088e-05, + "loss": 0.6013, + "step": 701 + }, + { + "epoch": 0.7222222222222222, + "grad_norm": 11.008674621582031, + "learning_rate": 2.540498442367601e-05, + "loss": 1.181, + "step": 702 + }, + { + "epoch": 0.7232510288065843, + "grad_norm": 12.882591247558594, + "learning_rate": 2.5441329179646936e-05, + "loss": 1.1574, + "step": 703 + }, + { + "epoch": 0.7242798353909465, + "grad_norm": 10.404853820800781, + "learning_rate": 2.547767393561786e-05, + "loss": 0.6094, + "step": 704 + }, + { + "epoch": 0.7253086419753086, + "grad_norm": 10.375190734863281, + "learning_rate": 2.551401869158878e-05, + "loss": 0.6303, + "step": 705 + }, + { + "epoch": 0.7263374485596708, + "grad_norm": 11.026881217956543, + "learning_rate": 2.5550363447559708e-05, + "loss": 0.626, + "step": 706 + }, + { + "epoch": 0.727366255144033, + "grad_norm": 9.781618118286133, + "learning_rate": 2.5586708203530632e-05, + "loss": 0.5284, + "step": 707 + }, + { + "epoch": 0.7283950617283951, + "grad_norm": 2.4945054054260254, + "learning_rate": 2.5623052959501556e-05, + "loss": 0.0619, + "step": 708 + }, + { + "epoch": 0.7294238683127572, + "grad_norm": 14.84467887878418, + "learning_rate": 2.565939771547248e-05, + "loss": 1.3394, + "step": 709 + }, + { + "epoch": 0.7304526748971193, + "grad_norm": 3.6432929039001465, + "learning_rate": 2.5695742471443404e-05, + "loss": 0.0922, + "step": 710 + }, + { + "epoch": 0.7314814814814815, + "grad_norm": 3.2191617488861084, + "learning_rate": 2.5732087227414328e-05, + "loss": 0.068, + "step": 711 + }, + { + "epoch": 0.7325102880658436, + "grad_norm": 10.091761589050293, + "learning_rate": 2.5768431983385255e-05, + "loss": 0.5414, + "step": 712 + }, + { + "epoch": 0.7335390946502057, + "grad_norm": 9.839192390441895, + "learning_rate": 2.5804776739356176e-05, + "loss": 0.5332, + "step": 713 + }, + { + "epoch": 0.7345679012345679, + "grad_norm": 9.548250198364258, + "learning_rate": 2.58411214953271e-05, + "loss": 0.5112, + "step": 714 + }, + { + "epoch": 0.73559670781893, + "grad_norm": 23.554458618164062, + "learning_rate": 2.5877466251298027e-05, + "loss": 3.5468, + "step": 715 + }, + { + "epoch": 0.7366255144032922, + "grad_norm": 1.0547456741333008, + "learning_rate": 2.5913811007268948e-05, + "loss": 0.0244, + "step": 716 + }, + { + "epoch": 0.7376543209876543, + "grad_norm": 10.332133293151855, + "learning_rate": 2.5950155763239872e-05, + "loss": 0.528, + "step": 717 + }, + { + "epoch": 0.7386831275720165, + "grad_norm": 16.862545013427734, + "learning_rate": 2.59865005192108e-05, + "loss": 1.7134, + "step": 718 + }, + { + "epoch": 0.7397119341563786, + "grad_norm": 9.824862480163574, + "learning_rate": 2.6022845275181723e-05, + "loss": 0.6181, + "step": 719 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 17.680917739868164, + "learning_rate": 2.6059190031152644e-05, + "loss": 1.7897, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_Qnli-dev_cosine_accuracy": 0.712890625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8089187741279602, + "eval_Qnli-dev_cosine_ap": 0.7445288564241996, + "eval_Qnli-dev_cosine_f1": 0.7088607594936709, + "eval_Qnli-dev_cosine_f1_threshold": 0.7645823955535889, + "eval_Qnli-dev_cosine_precision": 0.6182965299684543, + "eval_Qnli-dev_cosine_recall": 0.8305084745762712, + "eval_Qnli-dev_dot_accuracy": 0.66796875, + "eval_Qnli-dev_dot_accuracy_threshold": 374.24700927734375, + "eval_Qnli-dev_dot_ap": 0.6643934387620949, + "eval_Qnli-dev_dot_f1": 0.6875, + "eval_Qnli-dev_dot_f1_threshold": 374.24700927734375, + "eval_Qnli-dev_dot_precision": 0.6071428571428571, + "eval_Qnli-dev_dot_recall": 0.7923728813559322, + "eval_Qnli-dev_euclidean_accuracy": 0.716796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.01733684539795, + "eval_Qnli-dev_euclidean_ap": 0.7511376116503252, + "eval_Qnli-dev_euclidean_f1": 0.7107750472589792, + "eval_Qnli-dev_euclidean_f1_threshold": 14.925470352172852, + "eval_Qnli-dev_euclidean_precision": 0.6416382252559727, + "eval_Qnli-dev_euclidean_recall": 0.7966101694915254, + "eval_Qnli-dev_manhattan_accuracy": 0.708984375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 279.2970886230469, + "eval_Qnli-dev_manhattan_ap": 0.7531882826368892, + "eval_Qnli-dev_manhattan_f1": 0.7052810902896082, + "eval_Qnli-dev_manhattan_f1_threshold": 327.6318359375, + "eval_Qnli-dev_manhattan_precision": 0.5897435897435898, + "eval_Qnli-dev_manhattan_recall": 0.8771186440677966, + "eval_Qnli-dev_max_accuracy": 0.716796875, + "eval_Qnli-dev_max_accuracy_threshold": 374.24700927734375, + "eval_Qnli-dev_max_ap": 0.7531882826368892, + "eval_Qnli-dev_max_f1": 0.7107750472589792, + "eval_Qnli-dev_max_f1_threshold": 374.24700927734375, + "eval_Qnli-dev_max_precision": 0.6416382252559727, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.740234375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8845050930976868, + "eval_allNLI-dev_cosine_ap": 0.6029211833635529, + "eval_allNLI-dev_cosine_f1": 0.6096033402922756, + "eval_allNLI-dev_cosine_f1_threshold": 0.7970777750015259, + "eval_allNLI-dev_cosine_precision": 0.477124183006536, + "eval_allNLI-dev_cosine_recall": 0.8439306358381503, + "eval_allNLI-dev_dot_accuracy": 0.693359375, + "eval_allNLI-dev_dot_accuracy_threshold": 465.4620361328125, + "eval_allNLI-dev_dot_ap": 0.512993085572406, + "eval_allNLI-dev_dot_f1": 0.5753968253968255, + "eval_allNLI-dev_dot_f1_threshold": 391.34271240234375, + "eval_allNLI-dev_dot_precision": 0.4380664652567976, + "eval_allNLI-dev_dot_recall": 0.838150289017341, + "eval_allNLI-dev_euclidean_accuracy": 0.7421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.847936630249023, + "eval_allNLI-dev_euclidean_ap": 0.6067823005817112, + "eval_allNLI-dev_euclidean_f1": 0.60813704496788, + "eval_allNLI-dev_euclidean_f1_threshold": 14.172441482543945, + "eval_allNLI-dev_euclidean_precision": 0.48299319727891155, + "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, + "eval_allNLI-dev_manhattan_accuracy": 0.73828125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 217.6175537109375, + "eval_allNLI-dev_manhattan_ap": 0.5978323891873064, + "eval_allNLI-dev_manhattan_f1": 0.5991902834008097, + "eval_allNLI-dev_manhattan_f1_threshold": 298.9595031738281, + "eval_allNLI-dev_manhattan_precision": 0.46105919003115264, + "eval_allNLI-dev_manhattan_recall": 0.8554913294797688, + "eval_allNLI-dev_max_accuracy": 0.7421875, + "eval_allNLI-dev_max_accuracy_threshold": 465.4620361328125, + "eval_allNLI-dev_max_ap": 0.6067823005817112, + "eval_allNLI-dev_max_f1": 0.6096033402922756, + "eval_allNLI-dev_max_f1_threshold": 391.34271240234375, + "eval_allNLI-dev_max_precision": 0.48299319727891155, + "eval_allNLI-dev_max_recall": 0.8554913294797688, + "eval_sequential_score": 0.7531882826368892, + "eval_sts-test_pearson_cosine": 0.7943928851510986, + "eval_sts-test_pearson_dot": 0.7406480169219867, + "eval_sts-test_pearson_euclidean": 0.8200699159277771, + "eval_sts-test_pearson_manhattan": 0.8153052752015822, + "eval_sts-test_pearson_max": 0.8200699159277771, + "eval_sts-test_spearman_cosine": 0.814777534408501, + "eval_sts-test_spearman_dot": 0.7252969844950452, + "eval_sts-test_spearman_euclidean": 0.8124804521612804, + "eval_sts-test_spearman_manhattan": 0.8084946543855285, + "eval_sts-test_spearman_max": 0.814777534408501, + "eval_vitaminc-pairs_loss": 2.5636518001556396, + "eval_vitaminc-pairs_runtime": 3.2076, + "eval_vitaminc-pairs_samples_per_second": 39.905, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_negation-triplets_loss": 1.1352839469909668, + "eval_negation-triplets_runtime": 0.749, + "eval_negation-triplets_samples_per_second": 170.903, + "eval_negation-triplets_steps_per_second": 1.335, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_scitail-pairs-pos_loss": 0.2415001094341278, + "eval_scitail-pairs-pos_runtime": 0.8417, + "eval_scitail-pairs-pos_samples_per_second": 152.073, + "eval_scitail-pairs-pos_steps_per_second": 1.188, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_scitail-pairs-qa_loss": 0.0037513382267206907, + "eval_scitail-pairs-qa_runtime": 0.5837, + "eval_scitail-pairs-qa_samples_per_second": 219.305, + "eval_scitail-pairs-qa_steps_per_second": 1.713, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_xsum-pairs_loss": 0.7015084624290466, + "eval_xsum-pairs_runtime": 3.0329, + "eval_xsum-pairs_samples_per_second": 42.204, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_sciq_pairs_loss": 0.13029436767101288, + "eval_sciq_pairs_runtime": 3.454, + "eval_sciq_pairs_samples_per_second": 37.059, + "eval_sciq_pairs_steps_per_second": 0.29, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_qasc_pairs_loss": 0.5081034302711487, + "eval_qasc_pairs_runtime": 0.6041, + "eval_qasc_pairs_samples_per_second": 211.882, + "eval_qasc_pairs_steps_per_second": 1.655, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_openbookqa_pairs_loss": 1.2555147409439087, + "eval_openbookqa_pairs_runtime": 0.5953, + "eval_openbookqa_pairs_samples_per_second": 215.03, + "eval_openbookqa_pairs_steps_per_second": 1.68, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_msmarco_pairs_loss": 1.305182695388794, + "eval_msmarco_pairs_runtime": 1.5199, + "eval_msmarco_pairs_samples_per_second": 84.214, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_nq_pairs_loss": 1.5818196535110474, + "eval_nq_pairs_runtime": 2.8983, + "eval_nq_pairs_samples_per_second": 44.163, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_trivia_pairs_loss": 1.2283203601837158, + "eval_trivia_pairs_runtime": 3.4398, + "eval_trivia_pairs_samples_per_second": 37.212, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_gooaq_pairs_loss": 0.7275317907333374, + "eval_gooaq_pairs_runtime": 0.948, + "eval_gooaq_pairs_samples_per_second": 135.023, + "eval_gooaq_pairs_steps_per_second": 1.055, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_paws-pos_loss": 0.03339088708162308, + "eval_paws-pos_runtime": 0.6932, + "eval_paws-pos_samples_per_second": 184.64, + "eval_paws-pos_steps_per_second": 1.442, + "step": 720 + }, + { + "epoch": 0.7407407407407407, + "eval_global_dataset_loss": 0.5928239226341248, + "eval_global_dataset_runtime": 13.3878, + "eval_global_dataset_samples_per_second": 31.073, + "eval_global_dataset_steps_per_second": 0.299, + "step": 720 + }, + { + "epoch": 0.7417695473251029, + "grad_norm": 10.516244888305664, + "learning_rate": 2.609553478712357e-05, + "loss": 0.7104, + "step": 721 + }, + { + "epoch": 0.742798353909465, + "grad_norm": 0.8260862827301025, + "learning_rate": 2.6131879543094495e-05, + "loss": 0.0219, + "step": 722 + }, + { + "epoch": 0.7438271604938271, + "grad_norm": 14.152036666870117, + "learning_rate": 2.6168224299065416e-05, + "loss": 1.3516, + "step": 723 + }, + { + "epoch": 0.7448559670781894, + "grad_norm": 8.1348237991333, + "learning_rate": 2.6204569055036344e-05, + "loss": 0.5472, + "step": 724 + }, + { + "epoch": 0.7458847736625515, + "grad_norm": 8.534761428833008, + "learning_rate": 2.6240913811007268e-05, + "loss": 0.5357, + "step": 725 + }, + { + "epoch": 0.7469135802469136, + "grad_norm": 11.620552062988281, + "learning_rate": 2.627725856697819e-05, + "loss": 1.0346, + "step": 726 + }, + { + "epoch": 0.7479423868312757, + "grad_norm": 10.823874473571777, + "learning_rate": 2.6313603322949116e-05, + "loss": 0.8461, + "step": 727 + }, + { + "epoch": 0.7489711934156379, + "grad_norm": 14.860071182250977, + "learning_rate": 2.634994807892004e-05, + "loss": 1.7762, + "step": 728 + }, + { + "epoch": 0.75, + "grad_norm": 9.170268058776855, + "learning_rate": 2.6386292834890964e-05, + "loss": 0.6121, + "step": 729 + }, + { + "epoch": 0.7510288065843621, + "grad_norm": 3.6571240425109863, + "learning_rate": 2.6422637590861888e-05, + "loss": 0.1051, + "step": 730 + }, + { + "epoch": 0.7520576131687243, + "grad_norm": 7.615705966949463, + "learning_rate": 2.645898234683281e-05, + "loss": 0.5804, + "step": 731 + }, + { + "epoch": 0.7530864197530864, + "grad_norm": 11.42629337310791, + "learning_rate": 2.6495327102803736e-05, + "loss": 1.0625, + "step": 732 + }, + { + "epoch": 0.7541152263374485, + "grad_norm": 1.1732608079910278, + "learning_rate": 2.6531671858774663e-05, + "loss": 0.0471, + "step": 733 + }, + { + "epoch": 0.7551440329218106, + "grad_norm": 10.805855751037598, + "learning_rate": 2.6568016614745584e-05, + "loss": 0.767, + "step": 734 + }, + { + "epoch": 0.7561728395061729, + "grad_norm": 7.8192009925842285, + "learning_rate": 2.6604361370716508e-05, + "loss": 0.4262, + "step": 735 + }, + { + "epoch": 0.757201646090535, + "grad_norm": 14.414314270019531, + "learning_rate": 2.6640706126687435e-05, + "loss": 1.4077, + "step": 736 + }, + { + "epoch": 0.7582304526748971, + "grad_norm": 18.263036727905273, + "learning_rate": 2.6677050882658356e-05, + "loss": 1.5963, + "step": 737 + }, + { + "epoch": 0.7592592592592593, + "grad_norm": 11.086414337158203, + "learning_rate": 2.671339563862928e-05, + "loss": 1.2141, + "step": 738 + }, + { + "epoch": 0.7602880658436214, + "grad_norm": 13.789649963378906, + "learning_rate": 2.6749740394600207e-05, + "loss": 1.454, + "step": 739 + }, + { + "epoch": 0.7613168724279835, + "grad_norm": 9.959060668945312, + "learning_rate": 2.678608515057113e-05, + "loss": 0.696, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_Qnli-dev_cosine_accuracy": 0.701171875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7968876361846924, + "eval_Qnli-dev_cosine_ap": 0.727602546794372, + "eval_Qnli-dev_cosine_f1": 0.6979166666666667, + "eval_Qnli-dev_cosine_f1_threshold": 0.7464833855628967, + "eval_Qnli-dev_cosine_precision": 0.5911764705882353, + "eval_Qnli-dev_cosine_recall": 0.8516949152542372, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 399.69769287109375, + "eval_Qnli-dev_dot_ap": 0.645670123752458, + "eval_Qnli-dev_dot_f1": 0.6833631484794276, + "eval_Qnli-dev_dot_f1_threshold": 367.0381774902344, + "eval_Qnli-dev_dot_precision": 0.5913312693498453, + "eval_Qnli-dev_dot_recall": 0.809322033898305, + "eval_Qnli-dev_euclidean_accuracy": 0.69921875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.117036819458008, + "eval_Qnli-dev_euclidean_ap": 0.7337639674568743, + "eval_Qnli-dev_euclidean_f1": 0.7001675041876045, + "eval_Qnli-dev_euclidean_f1_threshold": 16.333152770996094, + "eval_Qnli-dev_euclidean_precision": 0.5789473684210527, + "eval_Qnli-dev_euclidean_recall": 0.885593220338983, + "eval_Qnli-dev_manhattan_accuracy": 0.703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 294.5778503417969, + "eval_Qnli-dev_manhattan_ap": 0.7366181110833769, + "eval_Qnli-dev_manhattan_f1": 0.6973180076628352, + "eval_Qnli-dev_manhattan_f1_threshold": 302.7152099609375, + "eval_Qnli-dev_manhattan_precision": 0.6363636363636364, + "eval_Qnli-dev_manhattan_recall": 0.7711864406779662, + "eval_Qnli-dev_max_accuracy": 0.703125, + "eval_Qnli-dev_max_accuracy_threshold": 399.69769287109375, + "eval_Qnli-dev_max_ap": 0.7366181110833769, + "eval_Qnli-dev_max_f1": 0.7001675041876045, + "eval_Qnli-dev_max_f1_threshold": 367.0381774902344, + "eval_Qnli-dev_max_precision": 0.6363636363636364, + "eval_Qnli-dev_max_recall": 0.885593220338983, + "eval_allNLI-dev_cosine_accuracy": 0.728515625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8710236549377441, + "eval_allNLI-dev_cosine_ap": 0.5945373648797287, + "eval_allNLI-dev_cosine_f1": 0.5975609756097561, + "eval_allNLI-dev_cosine_f1_threshold": 0.7772917747497559, + "eval_allNLI-dev_cosine_precision": 0.4608150470219436, + "eval_allNLI-dev_cosine_recall": 0.8497109826589595, + "eval_allNLI-dev_dot_accuracy": 0.6953125, + "eval_allNLI-dev_dot_accuracy_threshold": 456.685546875, + "eval_allNLI-dev_dot_ap": 0.516668964052817, + "eval_allNLI-dev_dot_f1": 0.5790554414784395, + "eval_allNLI-dev_dot_f1_threshold": 387.36737060546875, + "eval_allNLI-dev_dot_precision": 0.44904458598726116, + "eval_allNLI-dev_dot_recall": 0.815028901734104, + "eval_allNLI-dev_euclidean_accuracy": 0.73828125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.068269729614258, + "eval_allNLI-dev_euclidean_ap": 0.5966992226114267, + "eval_allNLI-dev_euclidean_f1": 0.5970772442588727, + "eval_allNLI-dev_euclidean_f1_threshold": 14.66142463684082, + "eval_allNLI-dev_euclidean_precision": 0.4673202614379085, + "eval_allNLI-dev_euclidean_recall": 0.8265895953757225, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 237.41098022460938, + "eval_allNLI-dev_manhattan_ap": 0.5880205832464749, + "eval_allNLI-dev_manhattan_f1": 0.5914893617021276, + "eval_allNLI-dev_manhattan_f1_threshold": 297.3165283203125, + "eval_allNLI-dev_manhattan_precision": 0.468013468013468, + "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, + "eval_allNLI-dev_max_accuracy": 0.73828125, + "eval_allNLI-dev_max_accuracy_threshold": 456.685546875, + "eval_allNLI-dev_max_ap": 0.5966992226114267, + "eval_allNLI-dev_max_f1": 0.5975609756097561, + "eval_allNLI-dev_max_f1_threshold": 387.36737060546875, + "eval_allNLI-dev_max_precision": 0.468013468013468, + "eval_allNLI-dev_max_recall": 0.8497109826589595, + "eval_sequential_score": 0.7366181110833769, + "eval_sts-test_pearson_cosine": 0.8088324955753331, + "eval_sts-test_pearson_dot": 0.7775578039423507, + "eval_sts-test_pearson_euclidean": 0.8322159624410153, + "eval_sts-test_pearson_manhattan": 0.8289905701496498, + "eval_sts-test_pearson_max": 0.8322159624410153, + "eval_sts-test_spearman_cosine": 0.829002036100587, + "eval_sts-test_spearman_dot": 0.7651558142348298, + "eval_sts-test_spearman_euclidean": 0.8238636515163652, + "eval_sts-test_spearman_manhattan": 0.8193701326087933, + "eval_sts-test_spearman_max": 0.829002036100587, + "eval_vitaminc-pairs_loss": 2.652156114578247, + "eval_vitaminc-pairs_runtime": 3.195, + "eval_vitaminc-pairs_samples_per_second": 40.062, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_negation-triplets_loss": 1.1374459266662598, + "eval_negation-triplets_runtime": 0.7568, + "eval_negation-triplets_samples_per_second": 169.13, + "eval_negation-triplets_steps_per_second": 1.321, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_scitail-pairs-pos_loss": 0.18683280050754547, + "eval_scitail-pairs-pos_runtime": 0.8273, + "eval_scitail-pairs-pos_samples_per_second": 154.717, + "eval_scitail-pairs-pos_steps_per_second": 1.209, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_scitail-pairs-qa_loss": 0.004593902267515659, + "eval_scitail-pairs-qa_runtime": 0.5777, + "eval_scitail-pairs-qa_samples_per_second": 221.553, + "eval_scitail-pairs-qa_steps_per_second": 1.731, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_xsum-pairs_loss": 0.7033074498176575, + "eval_xsum-pairs_runtime": 3.0213, + "eval_xsum-pairs_samples_per_second": 42.366, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_sciq_pairs_loss": 0.12240559607744217, + "eval_sciq_pairs_runtime": 3.4526, + "eval_sciq_pairs_samples_per_second": 37.074, + "eval_sciq_pairs_steps_per_second": 0.29, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_qasc_pairs_loss": 0.5442161560058594, + "eval_qasc_pairs_runtime": 0.609, + "eval_qasc_pairs_samples_per_second": 210.184, + "eval_qasc_pairs_steps_per_second": 1.642, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_openbookqa_pairs_loss": 1.1632599830627441, + "eval_openbookqa_pairs_runtime": 0.5933, + "eval_openbookqa_pairs_samples_per_second": 215.749, + "eval_openbookqa_pairs_steps_per_second": 1.686, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_msmarco_pairs_loss": 1.1908891201019287, + "eval_msmarco_pairs_runtime": 1.5422, + "eval_msmarco_pairs_samples_per_second": 83.0, + "eval_msmarco_pairs_steps_per_second": 0.648, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_nq_pairs_loss": 1.4470250606536865, + "eval_nq_pairs_runtime": 2.8983, + "eval_nq_pairs_samples_per_second": 44.165, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_trivia_pairs_loss": 1.1257771253585815, + "eval_trivia_pairs_runtime": 3.4458, + "eval_trivia_pairs_samples_per_second": 37.147, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_gooaq_pairs_loss": 0.6294673085212708, + "eval_gooaq_pairs_runtime": 0.9529, + "eval_gooaq_pairs_samples_per_second": 134.331, + "eval_gooaq_pairs_steps_per_second": 1.049, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_paws-pos_loss": 0.029657872393727303, + "eval_paws-pos_runtime": 0.6916, + "eval_paws-pos_samples_per_second": 185.091, + "eval_paws-pos_steps_per_second": 1.446, + "step": 740 + }, + { + "epoch": 0.7613168724279835, + "eval_global_dataset_loss": 0.574967622756958, + "eval_global_dataset_runtime": 13.3853, + "eval_global_dataset_samples_per_second": 31.079, + "eval_global_dataset_steps_per_second": 0.299, + "step": 740 + }, + { + "epoch": 0.7623456790123457, + "grad_norm": 8.358269691467285, + "learning_rate": 2.6822429906542052e-05, + "loss": 0.5052, + "step": 741 + }, + { + "epoch": 0.7633744855967078, + "grad_norm": 3.5208804607391357, + "learning_rate": 2.685877466251298e-05, + "loss": 0.101, + "step": 742 + }, + { + "epoch": 0.76440329218107, + "grad_norm": 14.886555671691895, + "learning_rate": 2.6895119418483903e-05, + "loss": 1.6467, + "step": 743 + }, + { + "epoch": 0.7654320987654321, + "grad_norm": 10.37888240814209, + "learning_rate": 2.6931464174454824e-05, + "loss": 0.7924, + "step": 744 + }, + { + "epoch": 0.7664609053497943, + "grad_norm": 14.076517105102539, + "learning_rate": 2.696780893042575e-05, + "loss": 1.6842, + "step": 745 + }, + { + "epoch": 0.7674897119341564, + "grad_norm": 16.620922088623047, + "learning_rate": 2.7004153686396675e-05, + "loss": 2.809, + "step": 746 + }, + { + "epoch": 0.7685185185185185, + "grad_norm": 22.974336624145508, + "learning_rate": 2.70404984423676e-05, + "loss": 1.9317, + "step": 747 + }, + { + "epoch": 0.7695473251028807, + "grad_norm": 7.3669657707214355, + "learning_rate": 2.7076843198338523e-05, + "loss": 0.4177, + "step": 748 + }, + { + "epoch": 0.7705761316872428, + "grad_norm": 10.947649002075195, + "learning_rate": 2.7113187954309447e-05, + "loss": 0.9269, + "step": 749 + }, + { + "epoch": 0.7716049382716049, + "grad_norm": 9.538216590881348, + "learning_rate": 2.714953271028037e-05, + "loss": 0.9832, + "step": 750 + }, + { + "epoch": 0.772633744855967, + "grad_norm": 7.307182312011719, + "learning_rate": 2.71858774662513e-05, + "loss": 0.4875, + "step": 751 + }, + { + "epoch": 0.7736625514403292, + "grad_norm": 3.3512260913848877, + "learning_rate": 2.722222222222222e-05, + "loss": 0.1066, + "step": 752 + }, + { + "epoch": 0.7746913580246914, + "grad_norm": 8.798376083374023, + "learning_rate": 2.7258566978193143e-05, + "loss": 0.4801, + "step": 753 + }, + { + "epoch": 0.7757201646090535, + "grad_norm": 9.195924758911133, + "learning_rate": 2.729491173416407e-05, + "loss": 0.4494, + "step": 754 + }, + { + "epoch": 0.7767489711934157, + "grad_norm": 6.361667156219482, + "learning_rate": 2.733125649013499e-05, + "loss": 0.254, + "step": 755 + }, + { + "epoch": 0.7777777777777778, + "grad_norm": 11.094511985778809, + "learning_rate": 2.7367601246105916e-05, + "loss": 0.5735, + "step": 756 + }, + { + "epoch": 0.7788065843621399, + "grad_norm": 13.668522834777832, + "learning_rate": 2.7403946002076843e-05, + "loss": 1.109, + "step": 757 + }, + { + "epoch": 0.779835390946502, + "grad_norm": 9.678313255310059, + "learning_rate": 2.7440290758047767e-05, + "loss": 0.5538, + "step": 758 + }, + { + "epoch": 0.7808641975308642, + "grad_norm": 18.492931365966797, + "learning_rate": 2.7476635514018688e-05, + "loss": 1.6073, + "step": 759 + }, + { + "epoch": 0.7818930041152263, + "grad_norm": 20.688257217407227, + "learning_rate": 2.7512980269989615e-05, + "loss": 3.0436, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_Qnli-dev_cosine_accuracy": 0.703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7550844550132751, + "eval_Qnli-dev_cosine_ap": 0.7364566550443425, + "eval_Qnli-dev_cosine_f1": 0.7132075471698114, + "eval_Qnli-dev_cosine_f1_threshold": 0.7550844550132751, + "eval_Qnli-dev_cosine_precision": 0.6428571428571429, + "eval_Qnli-dev_cosine_recall": 0.8008474576271186, + "eval_Qnli-dev_dot_accuracy": 0.677734375, + "eval_Qnli-dev_dot_accuracy_threshold": 369.6612854003906, + "eval_Qnli-dev_dot_ap": 0.6549287118943474, + "eval_Qnli-dev_dot_f1": 0.6805555555555555, + "eval_Qnli-dev_dot_f1_threshold": 328.00164794921875, + "eval_Qnli-dev_dot_precision": 0.5764705882352941, + "eval_Qnli-dev_dot_recall": 0.8305084745762712, + "eval_Qnli-dev_euclidean_accuracy": 0.705078125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.8548583984375, + "eval_Qnli-dev_euclidean_ap": 0.7442510510869947, + "eval_Qnli-dev_euclidean_f1": 0.7076923076923077, + "eval_Qnli-dev_euclidean_f1_threshold": 16.119770050048828, + "eval_Qnli-dev_euclidean_precision": 0.5931232091690545, + "eval_Qnli-dev_euclidean_recall": 0.8771186440677966, + "eval_Qnli-dev_manhattan_accuracy": 0.701171875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 297.2847595214844, + "eval_Qnli-dev_manhattan_ap": 0.7470615407792083, + "eval_Qnli-dev_manhattan_f1": 0.7087198515769945, + "eval_Qnli-dev_manhattan_f1_threshold": 312.7979431152344, + "eval_Qnli-dev_manhattan_precision": 0.6303630363036303, + "eval_Qnli-dev_manhattan_recall": 0.809322033898305, + "eval_Qnli-dev_max_accuracy": 0.705078125, + "eval_Qnli-dev_max_accuracy_threshold": 369.6612854003906, + "eval_Qnli-dev_max_ap": 0.7470615407792083, + "eval_Qnli-dev_max_f1": 0.7132075471698114, + "eval_Qnli-dev_max_f1_threshold": 328.00164794921875, + "eval_Qnli-dev_max_precision": 0.6428571428571429, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.72265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8610843420028687, + "eval_allNLI-dev_cosine_ap": 0.5881774055043343, + "eval_allNLI-dev_cosine_f1": 0.5910064239828695, + "eval_allNLI-dev_cosine_f1_threshold": 0.7733876705169678, + "eval_allNLI-dev_cosine_precision": 0.46938775510204084, + "eval_allNLI-dev_cosine_recall": 0.7976878612716763, + "eval_allNLI-dev_dot_accuracy": 0.681640625, + "eval_allNLI-dev_dot_accuracy_threshold": 442.53680419921875, + "eval_allNLI-dev_dot_ap": 0.5120414811620706, + "eval_allNLI-dev_dot_f1": 0.5700934579439252, + "eval_allNLI-dev_dot_f1_threshold": 351.6019592285156, + "eval_allNLI-dev_dot_precision": 0.47843137254901963, + "eval_allNLI-dev_dot_recall": 0.7052023121387283, + "eval_allNLI-dev_euclidean_accuracy": 0.732421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.514341354370117, + "eval_allNLI-dev_euclidean_ap": 0.5926528852791054, + "eval_allNLI-dev_euclidean_f1": 0.596, + "eval_allNLI-dev_euclidean_f1_threshold": 14.976218223571777, + "eval_allNLI-dev_euclidean_precision": 0.45565749235474007, + "eval_allNLI-dev_euclidean_recall": 0.861271676300578, + "eval_allNLI-dev_manhattan_accuracy": 0.71875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 222.68905639648438, + "eval_allNLI-dev_manhattan_ap": 0.5863936149481368, + "eval_allNLI-dev_manhattan_f1": 0.5978947368421053, + "eval_allNLI-dev_manhattan_f1_threshold": 297.7838134765625, + "eval_allNLI-dev_manhattan_precision": 0.47019867549668876, + "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, + "eval_allNLI-dev_max_accuracy": 0.732421875, + "eval_allNLI-dev_max_accuracy_threshold": 442.53680419921875, + "eval_allNLI-dev_max_ap": 0.5926528852791054, + "eval_allNLI-dev_max_f1": 0.5978947368421053, + "eval_allNLI-dev_max_f1_threshold": 351.6019592285156, + "eval_allNLI-dev_max_precision": 0.47843137254901963, + "eval_allNLI-dev_max_recall": 0.861271676300578, + "eval_sequential_score": 0.7470615407792083, + "eval_sts-test_pearson_cosine": 0.8068314455509153, + "eval_sts-test_pearson_dot": 0.772425389013349, + "eval_sts-test_pearson_euclidean": 0.8289777456195899, + "eval_sts-test_pearson_manhattan": 0.8263159059644403, + "eval_sts-test_pearson_max": 0.8289777456195899, + "eval_sts-test_spearman_cosine": 0.8266359474083009, + "eval_sts-test_spearman_dot": 0.7547315896601016, + "eval_sts-test_spearman_euclidean": 0.8200646274343266, + "eval_sts-test_spearman_manhattan": 0.8175935970340776, + "eval_sts-test_spearman_max": 0.8266359474083009, + "eval_vitaminc-pairs_loss": 2.7475264072418213, + "eval_vitaminc-pairs_runtime": 3.1935, + "eval_vitaminc-pairs_samples_per_second": 40.081, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_negation-triplets_loss": 1.0812993049621582, + "eval_negation-triplets_runtime": 0.744, + "eval_negation-triplets_samples_per_second": 172.045, + "eval_negation-triplets_steps_per_second": 1.344, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_scitail-pairs-pos_loss": 0.1583121418952942, + "eval_scitail-pairs-pos_runtime": 0.8387, + "eval_scitail-pairs-pos_samples_per_second": 152.624, + "eval_scitail-pairs-pos_steps_per_second": 1.192, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_scitail-pairs-qa_loss": 0.0030275785829871893, + "eval_scitail-pairs-qa_runtime": 0.5811, + "eval_scitail-pairs-qa_samples_per_second": 220.288, + "eval_scitail-pairs-qa_steps_per_second": 1.721, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_xsum-pairs_loss": 0.6426714658737183, + "eval_xsum-pairs_runtime": 3.0216, + "eval_xsum-pairs_samples_per_second": 42.361, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_sciq_pairs_loss": 0.12087687849998474, + "eval_sciq_pairs_runtime": 3.4733, + "eval_sciq_pairs_samples_per_second": 36.852, + "eval_sciq_pairs_steps_per_second": 0.288, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_qasc_pairs_loss": 0.5539246201515198, + "eval_qasc_pairs_runtime": 0.6065, + "eval_qasc_pairs_samples_per_second": 211.043, + "eval_qasc_pairs_steps_per_second": 1.649, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_openbookqa_pairs_loss": 1.1023366451263428, + "eval_openbookqa_pairs_runtime": 0.5847, + "eval_openbookqa_pairs_samples_per_second": 218.917, + "eval_openbookqa_pairs_steps_per_second": 1.71, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_msmarco_pairs_loss": 1.2618669271469116, + "eval_msmarco_pairs_runtime": 1.5194, + "eval_msmarco_pairs_samples_per_second": 84.242, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_nq_pairs_loss": 1.4234434366226196, + "eval_nq_pairs_runtime": 2.9033, + "eval_nq_pairs_samples_per_second": 44.088, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_trivia_pairs_loss": 1.1620062589645386, + "eval_trivia_pairs_runtime": 3.4422, + "eval_trivia_pairs_samples_per_second": 37.185, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_gooaq_pairs_loss": 0.622553825378418, + "eval_gooaq_pairs_runtime": 0.9454, + "eval_gooaq_pairs_samples_per_second": 135.393, + "eval_gooaq_pairs_steps_per_second": 1.058, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_paws-pos_loss": 0.029666246846318245, + "eval_paws-pos_runtime": 0.6886, + "eval_paws-pos_samples_per_second": 185.877, + "eval_paws-pos_steps_per_second": 1.452, + "step": 760 + }, + { + "epoch": 0.7818930041152263, + "eval_global_dataset_loss": 0.599385678768158, + "eval_global_dataset_runtime": 13.3915, + "eval_global_dataset_samples_per_second": 31.064, + "eval_global_dataset_steps_per_second": 0.299, + "step": 760 + }, + { + "epoch": 0.7829218106995884, + "grad_norm": 13.270237922668457, + "learning_rate": 2.754932502596054e-05, + "loss": 0.9703, + "step": 761 + }, + { + "epoch": 0.7839506172839507, + "grad_norm": 10.215360641479492, + "learning_rate": 2.758566978193146e-05, + "loss": 1.0167, + "step": 762 + }, + { + "epoch": 0.7849794238683128, + "grad_norm": 14.256012916564941, + "learning_rate": 2.7622014537902387e-05, + "loss": 1.8575, + "step": 763 + }, + { + "epoch": 0.7860082304526749, + "grad_norm": 14.234450340270996, + "learning_rate": 2.765835929387331e-05, + "loss": 1.48, + "step": 764 + }, + { + "epoch": 0.7870370370370371, + "grad_norm": 15.287798881530762, + "learning_rate": 2.7694704049844235e-05, + "loss": 1.5257, + "step": 765 + }, + { + "epoch": 0.7880658436213992, + "grad_norm": 12.686257362365723, + "learning_rate": 2.773104880581516e-05, + "loss": 1.2119, + "step": 766 + }, + { + "epoch": 0.7890946502057613, + "grad_norm": 11.21288013458252, + "learning_rate": 2.7767393561786083e-05, + "loss": 1.0656, + "step": 767 + }, + { + "epoch": 0.7901234567901234, + "grad_norm": 9.147239685058594, + "learning_rate": 2.7803738317757007e-05, + "loss": 0.5485, + "step": 768 + }, + { + "epoch": 0.7911522633744856, + "grad_norm": 8.927838325500488, + "learning_rate": 2.7840083073727935e-05, + "loss": 0.6264, + "step": 769 + }, + { + "epoch": 0.7921810699588477, + "grad_norm": 12.626420974731445, + "learning_rate": 2.7876427829698855e-05, + "loss": 1.0876, + "step": 770 + }, + { + "epoch": 0.7932098765432098, + "grad_norm": 8.545890808105469, + "learning_rate": 2.791277258566978e-05, + "loss": 0.5902, + "step": 771 + }, + { + "epoch": 0.7942386831275721, + "grad_norm": 12.124262809753418, + "learning_rate": 2.7949117341640707e-05, + "loss": 0.9689, + "step": 772 + }, + { + "epoch": 0.7952674897119342, + "grad_norm": 8.3804292678833, + "learning_rate": 2.7985462097611627e-05, + "loss": 0.5276, + "step": 773 + }, + { + "epoch": 0.7962962962962963, + "grad_norm": 12.29673957824707, + "learning_rate": 2.802180685358255e-05, + "loss": 1.2571, + "step": 774 + }, + { + "epoch": 0.7973251028806584, + "grad_norm": 6.740438938140869, + "learning_rate": 2.805815160955348e-05, + "loss": 0.3492, + "step": 775 + }, + { + "epoch": 0.7983539094650206, + "grad_norm": 13.983535766601562, + "learning_rate": 2.80944963655244e-05, + "loss": 1.4877, + "step": 776 + }, + { + "epoch": 0.7993827160493827, + "grad_norm": 10.374014854431152, + "learning_rate": 2.8130841121495323e-05, + "loss": 1.2044, + "step": 777 + }, + { + "epoch": 0.8004115226337448, + "grad_norm": 14.681657791137695, + "learning_rate": 2.816718587746625e-05, + "loss": 1.2838, + "step": 778 + }, + { + "epoch": 0.801440329218107, + "grad_norm": 8.073484420776367, + "learning_rate": 2.8203530633437175e-05, + "loss": 0.4491, + "step": 779 + }, + { + "epoch": 0.8024691358024691, + "grad_norm": 14.766283988952637, + "learning_rate": 2.8239875389408095e-05, + "loss": 1.5724, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_Qnli-dev_cosine_accuracy": 0.708984375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8231204152107239, + "eval_Qnli-dev_cosine_ap": 0.7391395822952389, + "eval_Qnli-dev_cosine_f1": 0.7054545454545453, + "eval_Qnli-dev_cosine_f1_threshold": 0.7827090620994568, + "eval_Qnli-dev_cosine_precision": 0.6178343949044586, + "eval_Qnli-dev_cosine_recall": 0.8220338983050848, + "eval_Qnli-dev_dot_accuracy": 0.6640625, + "eval_Qnli-dev_dot_accuracy_threshold": 410.32037353515625, + "eval_Qnli-dev_dot_ap": 0.6504819630539224, + "eval_Qnli-dev_dot_f1": 0.6780238500851788, + "eval_Qnli-dev_dot_f1_threshold": 381.0080871582031, + "eval_Qnli-dev_dot_precision": 0.5669515669515669, + "eval_Qnli-dev_dot_recall": 0.8432203389830508, + "eval_Qnli-dev_euclidean_accuracy": 0.70703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.04183578491211, + "eval_Qnli-dev_euclidean_ap": 0.7438731947506383, + "eval_Qnli-dev_euclidean_f1": 0.7050847457627119, + "eval_Qnli-dev_euclidean_f1_threshold": 15.714797019958496, + "eval_Qnli-dev_euclidean_precision": 0.5875706214689266, + "eval_Qnli-dev_euclidean_recall": 0.8813559322033898, + "eval_Qnli-dev_manhattan_accuracy": 0.701171875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 290.35009765625, + "eval_Qnli-dev_manhattan_ap": 0.7446632934882194, + "eval_Qnli-dev_manhattan_f1": 0.7015503875968992, + "eval_Qnli-dev_manhattan_f1_threshold": 293.35986328125, + "eval_Qnli-dev_manhattan_precision": 0.6464285714285715, + "eval_Qnli-dev_manhattan_recall": 0.7669491525423728, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 410.32037353515625, + "eval_Qnli-dev_max_ap": 0.7446632934882194, + "eval_Qnli-dev_max_f1": 0.7054545454545453, + "eval_Qnli-dev_max_f1_threshold": 381.0080871582031, + "eval_Qnli-dev_max_precision": 0.6464285714285715, + "eval_Qnli-dev_max_recall": 0.8813559322033898, + "eval_allNLI-dev_cosine_accuracy": 0.7265625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8960142135620117, + "eval_allNLI-dev_cosine_ap": 0.5873660686997946, + "eval_allNLI-dev_cosine_f1": 0.6017316017316017, + "eval_allNLI-dev_cosine_f1_threshold": 0.8052390813827515, + "eval_allNLI-dev_cosine_precision": 0.4809688581314879, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.703125, + "eval_allNLI-dev_dot_accuracy_threshold": 469.1497497558594, + "eval_allNLI-dev_dot_ap": 0.5278322808998677, + "eval_allNLI-dev_dot_f1": 0.5864978902953586, + "eval_allNLI-dev_dot_f1_threshold": 398.7422790527344, + "eval_allNLI-dev_dot_precision": 0.46179401993355484, + "eval_allNLI-dev_dot_recall": 0.8034682080924855, + "eval_allNLI-dev_euclidean_accuracy": 0.724609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.34986400604248, + "eval_allNLI-dev_euclidean_ap": 0.5891503709712753, + "eval_allNLI-dev_euclidean_f1": 0.6061855670103092, + "eval_allNLI-dev_euclidean_f1_threshold": 14.381561279296875, + "eval_allNLI-dev_euclidean_precision": 0.47115384615384615, + "eval_allNLI-dev_euclidean_recall": 0.8497109826589595, + "eval_allNLI-dev_manhattan_accuracy": 0.72265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 202.1298828125, + "eval_allNLI-dev_manhattan_ap": 0.5845629157897831, + "eval_allNLI-dev_manhattan_f1": 0.6017316017316017, + "eval_allNLI-dev_manhattan_f1_threshold": 290.66619873046875, + "eval_allNLI-dev_manhattan_precision": 0.4809688581314879, + "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, + "eval_allNLI-dev_max_accuracy": 0.7265625, + "eval_allNLI-dev_max_accuracy_threshold": 469.1497497558594, + "eval_allNLI-dev_max_ap": 0.5891503709712753, + "eval_allNLI-dev_max_f1": 0.6061855670103092, + "eval_allNLI-dev_max_f1_threshold": 398.7422790527344, + "eval_allNLI-dev_max_precision": 0.4809688581314879, + "eval_allNLI-dev_max_recall": 0.8497109826589595, + "eval_sequential_score": 0.7446632934882194, + "eval_sts-test_pearson_cosine": 0.8117461676559997, + "eval_sts-test_pearson_dot": 0.7886477825036372, + "eval_sts-test_pearson_euclidean": 0.8403273274655056, + "eval_sts-test_pearson_manhattan": 0.8379940892338228, + "eval_sts-test_pearson_max": 0.8403273274655056, + "eval_sts-test_spearman_cosine": 0.8390338816154358, + "eval_sts-test_spearman_dot": 0.7805059891559553, + "eval_sts-test_spearman_euclidean": 0.8335163643447059, + "eval_sts-test_spearman_manhattan": 0.8315699082304869, + "eval_sts-test_spearman_max": 0.8390338816154358, + "eval_vitaminc-pairs_loss": 2.659418821334839, + "eval_vitaminc-pairs_runtime": 3.2064, + "eval_vitaminc-pairs_samples_per_second": 39.92, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_negation-triplets_loss": 1.0590914487838745, + "eval_negation-triplets_runtime": 0.7504, + "eval_negation-triplets_samples_per_second": 170.58, + "eval_negation-triplets_steps_per_second": 1.333, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_scitail-pairs-pos_loss": 0.1532289683818817, + "eval_scitail-pairs-pos_runtime": 0.8417, + "eval_scitail-pairs-pos_samples_per_second": 152.081, + "eval_scitail-pairs-pos_steps_per_second": 1.188, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_scitail-pairs-qa_loss": 0.0047495742328464985, + "eval_scitail-pairs-qa_runtime": 0.6048, + "eval_scitail-pairs-qa_samples_per_second": 211.629, + "eval_scitail-pairs-qa_steps_per_second": 1.653, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_xsum-pairs_loss": 0.6486428380012512, + "eval_xsum-pairs_runtime": 3.0654, + "eval_xsum-pairs_samples_per_second": 41.757, + "eval_xsum-pairs_steps_per_second": 0.326, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_sciq_pairs_loss": 0.11410364508628845, + "eval_sciq_pairs_runtime": 3.4977, + "eval_sciq_pairs_samples_per_second": 36.596, + "eval_sciq_pairs_steps_per_second": 0.286, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_qasc_pairs_loss": 0.5198172330856323, + "eval_qasc_pairs_runtime": 0.6108, + "eval_qasc_pairs_samples_per_second": 209.558, + "eval_qasc_pairs_steps_per_second": 1.637, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_openbookqa_pairs_loss": 1.029971957206726, + "eval_openbookqa_pairs_runtime": 0.5904, + "eval_openbookqa_pairs_samples_per_second": 216.82, + "eval_openbookqa_pairs_steps_per_second": 1.694, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_msmarco_pairs_loss": 1.185034990310669, + "eval_msmarco_pairs_runtime": 1.5164, + "eval_msmarco_pairs_samples_per_second": 84.41, + "eval_msmarco_pairs_steps_per_second": 0.659, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_nq_pairs_loss": 1.3883589506149292, + "eval_nq_pairs_runtime": 2.9107, + "eval_nq_pairs_samples_per_second": 43.976, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_trivia_pairs_loss": 1.0724446773529053, + "eval_trivia_pairs_runtime": 3.4605, + "eval_trivia_pairs_samples_per_second": 36.989, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_gooaq_pairs_loss": 0.5710882544517517, + "eval_gooaq_pairs_runtime": 0.9518, + "eval_gooaq_pairs_samples_per_second": 134.481, + "eval_gooaq_pairs_steps_per_second": 1.051, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_paws-pos_loss": 0.031913165003061295, + "eval_paws-pos_runtime": 0.6967, + "eval_paws-pos_samples_per_second": 183.728, + "eval_paws-pos_steps_per_second": 1.435, + "step": 780 + }, + { + "epoch": 0.8024691358024691, + "eval_global_dataset_loss": 0.5888291597366333, + "eval_global_dataset_runtime": 13.3816, + "eval_global_dataset_samples_per_second": 31.088, + "eval_global_dataset_steps_per_second": 0.299, + "step": 780 + }, + { + "epoch": 0.8034979423868313, + "grad_norm": 12.456541061401367, + "learning_rate": 2.8276220145379023e-05, + "loss": 1.4439, + "step": 781 + }, + { + "epoch": 0.8045267489711934, + "grad_norm": 1.0020017623901367, + "learning_rate": 2.8312564901349947e-05, + "loss": 0.0386, + "step": 782 + }, + { + "epoch": 0.8055555555555556, + "grad_norm": 9.883146286010742, + "learning_rate": 2.8348909657320867e-05, + "loss": 0.5761, + "step": 783 + }, + { + "epoch": 0.8065843621399177, + "grad_norm": 6.427492141723633, + "learning_rate": 2.8385254413291795e-05, + "loss": 0.2763, + "step": 784 + }, + { + "epoch": 0.8076131687242798, + "grad_norm": 18.895719528198242, + "learning_rate": 2.842159916926272e-05, + "loss": 1.7799, + "step": 785 + }, + { + "epoch": 0.808641975308642, + "grad_norm": 13.417122840881348, + "learning_rate": 2.8457943925233643e-05, + "loss": 1.6238, + "step": 786 + }, + { + "epoch": 0.8096707818930041, + "grad_norm": 14.21626091003418, + "learning_rate": 2.8494288681204567e-05, + "loss": 1.2406, + "step": 787 + }, + { + "epoch": 0.8106995884773662, + "grad_norm": 10.437925338745117, + "learning_rate": 2.853063343717549e-05, + "loss": 1.2021, + "step": 788 + }, + { + "epoch": 0.8117283950617284, + "grad_norm": 9.257889747619629, + "learning_rate": 2.8566978193146415e-05, + "loss": 0.5357, + "step": 789 + }, + { + "epoch": 0.8127572016460906, + "grad_norm": 7.7459940910339355, + "learning_rate": 2.8603322949117342e-05, + "loss": 0.3974, + "step": 790 + }, + { + "epoch": 0.8137860082304527, + "grad_norm": 9.664865493774414, + "learning_rate": 2.8639667705088263e-05, + "loss": 0.681, + "step": 791 + }, + { + "epoch": 0.8148148148148148, + "grad_norm": 1.9434237480163574, + "learning_rate": 2.8676012461059187e-05, + "loss": 0.0454, + "step": 792 + }, + { + "epoch": 0.815843621399177, + "grad_norm": 11.688820838928223, + "learning_rate": 2.8712357217030114e-05, + "loss": 0.8601, + "step": 793 + }, + { + "epoch": 0.8168724279835391, + "grad_norm": 8.6393461227417, + "learning_rate": 2.8748701973001035e-05, + "loss": 0.5149, + "step": 794 + }, + { + "epoch": 0.8179012345679012, + "grad_norm": 3.408317804336548, + "learning_rate": 2.878504672897196e-05, + "loss": 0.1049, + "step": 795 + }, + { + "epoch": 0.8189300411522634, + "grad_norm": 2.3510513305664062, + "learning_rate": 2.8821391484942886e-05, + "loss": 0.0591, + "step": 796 + }, + { + "epoch": 0.8199588477366255, + "grad_norm": 19.143835067749023, + "learning_rate": 2.885773624091381e-05, + "loss": 1.7556, + "step": 797 + }, + { + "epoch": 0.8209876543209876, + "grad_norm": 2.234999895095825, + "learning_rate": 2.889408099688473e-05, + "loss": 0.0651, + "step": 798 + }, + { + "epoch": 0.8220164609053497, + "grad_norm": 7.49348783493042, + "learning_rate": 2.893042575285566e-05, + "loss": 0.3813, + "step": 799 + }, + { + "epoch": 0.823045267489712, + "grad_norm": 8.669596672058105, + "learning_rate": 2.8966770508826583e-05, + "loss": 0.4154, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_Qnli-dev_cosine_accuracy": 0.701171875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7969362139701843, + "eval_Qnli-dev_cosine_ap": 0.7404462233066612, + "eval_Qnli-dev_cosine_f1": 0.6980802792321116, + "eval_Qnli-dev_cosine_f1_threshold": 0.7222884893417358, + "eval_Qnli-dev_cosine_precision": 0.5934718100890207, + "eval_Qnli-dev_cosine_recall": 0.847457627118644, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 375.7672119140625, + "eval_Qnli-dev_dot_ap": 0.6479436940211677, + "eval_Qnli-dev_dot_f1": 0.6717325227963526, + "eval_Qnli-dev_dot_f1_threshold": 306.73577880859375, + "eval_Qnli-dev_dot_precision": 0.523696682464455, + "eval_Qnli-dev_dot_recall": 0.9364406779661016, + "eval_Qnli-dev_euclidean_accuracy": 0.712890625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.082931518554688, + "eval_Qnli-dev_euclidean_ap": 0.7512343790385024, + "eval_Qnli-dev_euclidean_f1": 0.7015706806282722, + "eval_Qnli-dev_euclidean_f1_threshold": 16.305587768554688, + "eval_Qnli-dev_euclidean_precision": 0.5964391691394659, + "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, + "eval_Qnli-dev_manhattan_accuracy": 0.70703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 294.2919006347656, + "eval_Qnli-dev_manhattan_ap": 0.7497633403333601, + "eval_Qnli-dev_manhattan_f1": 0.7025089605734768, + "eval_Qnli-dev_manhattan_f1_threshold": 333.9628601074219, + "eval_Qnli-dev_manhattan_precision": 0.6086956521739131, + "eval_Qnli-dev_manhattan_recall": 0.8305084745762712, + "eval_Qnli-dev_max_accuracy": 0.712890625, + "eval_Qnli-dev_max_accuracy_threshold": 375.7672119140625, + "eval_Qnli-dev_max_ap": 0.7512343790385024, + "eval_Qnli-dev_max_f1": 0.7025089605734768, + "eval_Qnli-dev_max_f1_threshold": 333.9628601074219, + "eval_Qnli-dev_max_precision": 0.6086956521739131, + "eval_Qnli-dev_max_recall": 0.9364406779661016, + "eval_allNLI-dev_cosine_accuracy": 0.732421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8626278638839722, + "eval_allNLI-dev_cosine_ap": 0.5970787093998088, + "eval_allNLI-dev_cosine_f1": 0.6073752711496746, + "eval_allNLI-dev_cosine_f1_threshold": 0.7643657326698303, + "eval_allNLI-dev_cosine_precision": 0.4861111111111111, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.69921875, + "eval_allNLI-dev_dot_accuracy_threshold": 427.77294921875, + "eval_allNLI-dev_dot_ap": 0.5307372759630803, + "eval_allNLI-dev_dot_f1": 0.5900900900900902, + "eval_allNLI-dev_dot_f1_threshold": 365.810302734375, + "eval_allNLI-dev_dot_precision": 0.4833948339483395, + "eval_allNLI-dev_dot_recall": 0.7572254335260116, + "eval_allNLI-dev_euclidean_accuracy": 0.732421875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 11.048674583435059, + "eval_allNLI-dev_euclidean_ap": 0.6004011776083211, + "eval_allNLI-dev_euclidean_f1": 0.6140724946695097, + "eval_allNLI-dev_euclidean_f1_threshold": 15.054520606994629, + "eval_allNLI-dev_euclidean_precision": 0.4864864864864865, + "eval_allNLI-dev_euclidean_recall": 0.8323699421965318, + "eval_allNLI-dev_manhattan_accuracy": 0.7265625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 219.22201538085938, + "eval_allNLI-dev_manhattan_ap": 0.5983608065576813, + "eval_allNLI-dev_manhattan_f1": 0.6170678336980306, + "eval_allNLI-dev_manhattan_f1_threshold": 307.1187744140625, + "eval_allNLI-dev_manhattan_precision": 0.4964788732394366, + "eval_allNLI-dev_manhattan_recall": 0.815028901734104, + "eval_allNLI-dev_max_accuracy": 0.732421875, + "eval_allNLI-dev_max_accuracy_threshold": 427.77294921875, + "eval_allNLI-dev_max_ap": 0.6004011776083211, + "eval_allNLI-dev_max_f1": 0.6170678336980306, + "eval_allNLI-dev_max_f1_threshold": 365.810302734375, + "eval_allNLI-dev_max_precision": 0.4964788732394366, + "eval_allNLI-dev_max_recall": 0.8323699421965318, + "eval_sequential_score": 0.7512343790385024, + "eval_sts-test_pearson_cosine": 0.8198131802310901, + "eval_sts-test_pearson_dot": 0.8060682717309235, + "eval_sts-test_pearson_euclidean": 0.8431733382027362, + "eval_sts-test_pearson_manhattan": 0.8415106486610984, + "eval_sts-test_pearson_max": 0.8431733382027362, + "eval_sts-test_spearman_cosine": 0.8405863476243647, + "eval_sts-test_spearman_dot": 0.7927496657650738, + "eval_sts-test_spearman_euclidean": 0.8335884138755459, + "eval_sts-test_spearman_manhattan": 0.8322680279081929, + "eval_sts-test_spearman_max": 0.8405863476243647, + "eval_vitaminc-pairs_loss": 2.8224048614501953, + "eval_vitaminc-pairs_runtime": 3.191, + "eval_vitaminc-pairs_samples_per_second": 40.112, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_negation-triplets_loss": 1.0728732347488403, + "eval_negation-triplets_runtime": 0.7461, + "eval_negation-triplets_samples_per_second": 171.567, + "eval_negation-triplets_steps_per_second": 1.34, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_scitail-pairs-pos_loss": 0.16487395763397217, + "eval_scitail-pairs-pos_runtime": 0.8425, + "eval_scitail-pairs-pos_samples_per_second": 151.933, + "eval_scitail-pairs-pos_steps_per_second": 1.187, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_scitail-pairs-qa_loss": 0.002279088133946061, + "eval_scitail-pairs-qa_runtime": 0.5921, + "eval_scitail-pairs-qa_samples_per_second": 216.187, + "eval_scitail-pairs-qa_steps_per_second": 1.689, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_xsum-pairs_loss": 0.6205843687057495, + "eval_xsum-pairs_runtime": 3.0236, + "eval_xsum-pairs_samples_per_second": 42.333, + "eval_xsum-pairs_steps_per_second": 0.331, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_sciq_pairs_loss": 0.13013440370559692, + "eval_sciq_pairs_runtime": 3.4722, + "eval_sciq_pairs_samples_per_second": 36.864, + "eval_sciq_pairs_steps_per_second": 0.288, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_qasc_pairs_loss": 0.45994842052459717, + "eval_qasc_pairs_runtime": 0.6086, + "eval_qasc_pairs_samples_per_second": 210.312, + "eval_qasc_pairs_steps_per_second": 1.643, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_openbookqa_pairs_loss": 1.1286119222640991, + "eval_openbookqa_pairs_runtime": 0.5867, + "eval_openbookqa_pairs_samples_per_second": 218.188, + "eval_openbookqa_pairs_steps_per_second": 1.705, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_msmarco_pairs_loss": 1.2176563739776611, + "eval_msmarco_pairs_runtime": 1.5198, + "eval_msmarco_pairs_samples_per_second": 84.221, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_nq_pairs_loss": 1.3381102085113525, + "eval_nq_pairs_runtime": 2.9052, + "eval_nq_pairs_samples_per_second": 44.059, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_trivia_pairs_loss": 1.1009663343429565, + "eval_trivia_pairs_runtime": 3.4646, + "eval_trivia_pairs_samples_per_second": 36.946, + "eval_trivia_pairs_steps_per_second": 0.289, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_gooaq_pairs_loss": 0.6627429723739624, + "eval_gooaq_pairs_runtime": 0.9514, + "eval_gooaq_pairs_samples_per_second": 134.542, + "eval_gooaq_pairs_steps_per_second": 1.051, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_paws-pos_loss": 0.03013201802968979, + "eval_paws-pos_runtime": 0.6964, + "eval_paws-pos_samples_per_second": 183.798, + "eval_paws-pos_steps_per_second": 1.436, + "step": 800 + }, + { + "epoch": 0.823045267489712, + "eval_global_dataset_loss": 0.5941927433013916, + "eval_global_dataset_runtime": 13.3686, + "eval_global_dataset_samples_per_second": 31.118, + "eval_global_dataset_steps_per_second": 0.299, + "step": 800 + }, + { + "epoch": 0.8240740740740741, + "grad_norm": 14.879446029663086, + "learning_rate": 2.9003115264797503e-05, + "loss": 1.0372, + "step": 801 + }, + { + "epoch": 0.8251028806584362, + "grad_norm": 9.06614875793457, + "learning_rate": 2.903946002076843e-05, + "loss": 0.4029, + "step": 802 + }, + { + "epoch": 0.8261316872427984, + "grad_norm": 8.139265060424805, + "learning_rate": 2.9075804776739355e-05, + "loss": 0.376, + "step": 803 + }, + { + "epoch": 0.8271604938271605, + "grad_norm": 13.07675838470459, + "learning_rate": 2.911214953271028e-05, + "loss": 1.4204, + "step": 804 + }, + { + "epoch": 0.8281893004115226, + "grad_norm": 13.634737968444824, + "learning_rate": 2.9148494288681203e-05, + "loss": 0.1015, + "step": 805 + }, + { + "epoch": 0.8292181069958847, + "grad_norm": 9.257582664489746, + "learning_rate": 2.9184839044652127e-05, + "loss": 0.7088, + "step": 806 + }, + { + "epoch": 0.8302469135802469, + "grad_norm": 11.305009841918945, + "learning_rate": 2.922118380062305e-05, + "loss": 0.8444, + "step": 807 + }, + { + "epoch": 0.831275720164609, + "grad_norm": 17.285337448120117, + "learning_rate": 2.925752855659397e-05, + "loss": 1.4104, + "step": 808 + }, + { + "epoch": 0.8323045267489712, + "grad_norm": 21.70269012451172, + "learning_rate": 2.92938733125649e-05, + "loss": 3.4062, + "step": 809 + }, + { + "epoch": 0.8333333333333334, + "grad_norm": 10.347410202026367, + "learning_rate": 2.9330218068535823e-05, + "loss": 0.632, + "step": 810 + }, + { + "epoch": 0.8343621399176955, + "grad_norm": 12.212241172790527, + "learning_rate": 2.9366562824506747e-05, + "loss": 0.9835, + "step": 811 + }, + { + "epoch": 0.8353909465020576, + "grad_norm": 12.607038497924805, + "learning_rate": 2.940290758047767e-05, + "loss": 1.6676, + "step": 812 + }, + { + "epoch": 0.8364197530864198, + "grad_norm": 13.61136531829834, + "learning_rate": 2.9439252336448595e-05, + "loss": 1.1451, + "step": 813 + }, + { + "epoch": 0.8374485596707819, + "grad_norm": 9.604448318481445, + "learning_rate": 2.947559709241952e-05, + "loss": 0.6491, + "step": 814 + }, + { + "epoch": 0.838477366255144, + "grad_norm": 8.173309326171875, + "learning_rate": 2.9511941848390446e-05, + "loss": 0.4395, + "step": 815 + }, + { + "epoch": 0.8395061728395061, + "grad_norm": 16.461246490478516, + "learning_rate": 2.9548286604361367e-05, + "loss": 1.5228, + "step": 816 + }, + { + "epoch": 0.8405349794238683, + "grad_norm": 12.087141036987305, + "learning_rate": 2.958463136033229e-05, + "loss": 1.1335, + "step": 817 + }, + { + "epoch": 0.8415637860082305, + "grad_norm": 10.887614250183105, + "learning_rate": 2.962097611630322e-05, + "loss": 1.034, + "step": 818 + }, + { + "epoch": 0.8425925925925926, + "grad_norm": 9.939887046813965, + "learning_rate": 2.965732087227414e-05, + "loss": 0.8548, + "step": 819 + }, + { + "epoch": 0.8436213991769548, + "grad_norm": 11.749360084533691, + "learning_rate": 2.9693665628245063e-05, + "loss": 1.0941, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_Qnli-dev_cosine_accuracy": 0.69921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.8176131248474121, + "eval_Qnli-dev_cosine_ap": 0.7388220297886944, + "eval_Qnli-dev_cosine_f1": 0.7024029574861368, + "eval_Qnli-dev_cosine_f1_threshold": 0.7822612524032593, + "eval_Qnli-dev_cosine_precision": 0.6229508196721312, + "eval_Qnli-dev_cosine_recall": 0.8050847457627118, + "eval_Qnli-dev_dot_accuracy": 0.658203125, + "eval_Qnli-dev_dot_accuracy_threshold": 418.55694580078125, + "eval_Qnli-dev_dot_ap": 0.6532207993640291, + "eval_Qnli-dev_dot_f1": 0.6723842195540308, + "eval_Qnli-dev_dot_f1_threshold": 375.86895751953125, + "eval_Qnli-dev_dot_precision": 0.5648414985590778, + "eval_Qnli-dev_dot_recall": 0.8305084745762712, + "eval_Qnli-dev_euclidean_accuracy": 0.70703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 13.418533325195312, + "eval_Qnli-dev_euclidean_ap": 0.7439997782812441, + "eval_Qnli-dev_euclidean_f1": 0.7044673539518899, + "eval_Qnli-dev_euclidean_f1_threshold": 15.451794624328613, + "eval_Qnli-dev_euclidean_precision": 0.5924855491329479, + "eval_Qnli-dev_euclidean_recall": 0.8686440677966102, + "eval_Qnli-dev_manhattan_accuracy": 0.69921875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 281.7642517089844, + "eval_Qnli-dev_manhattan_ap": 0.745379018581688, + "eval_Qnli-dev_manhattan_f1": 0.70223752151463, + "eval_Qnli-dev_manhattan_f1_threshold": 318.57647705078125, + "eval_Qnli-dev_manhattan_precision": 0.591304347826087, + "eval_Qnli-dev_manhattan_recall": 0.864406779661017, + "eval_Qnli-dev_max_accuracy": 0.70703125, + "eval_Qnli-dev_max_accuracy_threshold": 418.55694580078125, + "eval_Qnli-dev_max_ap": 0.745379018581688, + "eval_Qnli-dev_max_f1": 0.7044673539518899, + "eval_Qnli-dev_max_f1_threshold": 375.86895751953125, + "eval_Qnli-dev_max_precision": 0.6229508196721312, + "eval_Qnli-dev_max_recall": 0.8686440677966102, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8881274461746216, + "eval_allNLI-dev_cosine_ap": 0.6029662836602825, + "eval_allNLI-dev_cosine_f1": 0.6047619047619047, + "eval_allNLI-dev_cosine_f1_threshold": 0.8256221413612366, + "eval_allNLI-dev_cosine_precision": 0.5141700404858299, + "eval_allNLI-dev_cosine_recall": 0.7341040462427746, + "eval_allNLI-dev_dot_accuracy": 0.697265625, + "eval_allNLI-dev_dot_accuracy_threshold": 443.00537109375, + "eval_allNLI-dev_dot_ap": 0.5307385620455893, + "eval_allNLI-dev_dot_f1": 0.5751633986928105, + "eval_allNLI-dev_dot_f1_threshold": 395.2938232421875, + "eval_allNLI-dev_dot_precision": 0.46153846153846156, + "eval_allNLI-dev_dot_recall": 0.7630057803468208, + "eval_allNLI-dev_euclidean_accuracy": 0.7265625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.200397491455078, + "eval_allNLI-dev_euclidean_ap": 0.6054028078040996, + "eval_allNLI-dev_euclidean_f1": 0.6042154566744731, + "eval_allNLI-dev_euclidean_f1_threshold": 13.174400329589844, + "eval_allNLI-dev_euclidean_precision": 0.5078740157480315, + "eval_allNLI-dev_euclidean_recall": 0.7456647398843931, + "eval_allNLI-dev_manhattan_accuracy": 0.73046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 204.298828125, + "eval_allNLI-dev_manhattan_ap": 0.6039169640925666, + "eval_allNLI-dev_manhattan_f1": 0.5994694960212201, + "eval_allNLI-dev_manhattan_f1_threshold": 254.79234313964844, + "eval_allNLI-dev_manhattan_precision": 0.553921568627451, + "eval_allNLI-dev_manhattan_recall": 0.653179190751445, + "eval_allNLI-dev_max_accuracy": 0.73046875, + "eval_allNLI-dev_max_accuracy_threshold": 443.00537109375, + "eval_allNLI-dev_max_ap": 0.6054028078040996, + "eval_allNLI-dev_max_f1": 0.6047619047619047, + "eval_allNLI-dev_max_f1_threshold": 395.2938232421875, + "eval_allNLI-dev_max_precision": 0.553921568627451, + "eval_allNLI-dev_max_recall": 0.7630057803468208, + "eval_sequential_score": 0.745379018581688, + "eval_sts-test_pearson_cosine": 0.8219060292244447, + "eval_sts-test_pearson_dot": 0.7914174536034212, + "eval_sts-test_pearson_euclidean": 0.8498595001040936, + "eval_sts-test_pearson_manhattan": 0.8479607961602269, + "eval_sts-test_pearson_max": 0.8498595001040936, + "eval_sts-test_spearman_cosine": 0.8433385949511971, + "eval_sts-test_spearman_dot": 0.7770873060444821, + "eval_sts-test_spearman_euclidean": 0.8418306116960912, + "eval_sts-test_spearman_manhattan": 0.8404455601560273, + "eval_sts-test_spearman_max": 0.8433385949511971, + "eval_vitaminc-pairs_loss": 2.736114501953125, + "eval_vitaminc-pairs_runtime": 3.2033, + "eval_vitaminc-pairs_samples_per_second": 39.958, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_negation-triplets_loss": 1.0854538679122925, + "eval_negation-triplets_runtime": 0.7435, + "eval_negation-triplets_samples_per_second": 172.17, + "eval_negation-triplets_steps_per_second": 1.345, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_scitail-pairs-pos_loss": 0.1739039570093155, + "eval_scitail-pairs-pos_runtime": 0.8433, + "eval_scitail-pairs-pos_samples_per_second": 151.783, + "eval_scitail-pairs-pos_steps_per_second": 1.186, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_scitail-pairs-qa_loss": 0.003931767772883177, + "eval_scitail-pairs-qa_runtime": 0.5977, + "eval_scitail-pairs-qa_samples_per_second": 214.159, + "eval_scitail-pairs-qa_steps_per_second": 1.673, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_xsum-pairs_loss": 0.6559375524520874, + "eval_xsum-pairs_runtime": 3.0363, + "eval_xsum-pairs_samples_per_second": 42.157, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_sciq_pairs_loss": 0.11851135641336441, + "eval_sciq_pairs_runtime": 3.4675, + "eval_sciq_pairs_samples_per_second": 36.914, + "eval_sciq_pairs_steps_per_second": 0.288, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_qasc_pairs_loss": 0.3914608359336853, + "eval_qasc_pairs_runtime": 0.6158, + "eval_qasc_pairs_samples_per_second": 207.872, + "eval_qasc_pairs_steps_per_second": 1.624, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_openbookqa_pairs_loss": 1.059507131576538, + "eval_openbookqa_pairs_runtime": 0.5868, + "eval_openbookqa_pairs_samples_per_second": 218.116, + "eval_openbookqa_pairs_steps_per_second": 1.704, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_msmarco_pairs_loss": 1.0388420820236206, + "eval_msmarco_pairs_runtime": 1.5195, + "eval_msmarco_pairs_samples_per_second": 84.238, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_nq_pairs_loss": 1.4129403829574585, + "eval_nq_pairs_runtime": 2.904, + "eval_nq_pairs_samples_per_second": 44.077, + "eval_nq_pairs_steps_per_second": 0.344, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_trivia_pairs_loss": 1.0265684127807617, + "eval_trivia_pairs_runtime": 3.45, + "eval_trivia_pairs_samples_per_second": 37.101, + "eval_trivia_pairs_steps_per_second": 0.29, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_gooaq_pairs_loss": 0.6007567048072815, + "eval_gooaq_pairs_runtime": 0.9464, + "eval_gooaq_pairs_samples_per_second": 135.252, + "eval_gooaq_pairs_steps_per_second": 1.057, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_paws-pos_loss": 0.03185836598277092, + "eval_paws-pos_runtime": 0.6912, + "eval_paws-pos_samples_per_second": 185.193, + "eval_paws-pos_steps_per_second": 1.447, + "step": 820 + }, + { + "epoch": 0.8436213991769548, + "eval_global_dataset_loss": 0.5914937257766724, + "eval_global_dataset_runtime": 13.4037, + "eval_global_dataset_samples_per_second": 31.036, + "eval_global_dataset_steps_per_second": 0.298, + "step": 820 + }, + { + "epoch": 0.8446502057613169, + "grad_norm": 10.527436256408691, + "learning_rate": 2.973001038421599e-05, + "loss": 1.3147, + "step": 821 + }, + { + "epoch": 0.845679012345679, + "grad_norm": 13.003664016723633, + "learning_rate": 2.9766355140186914e-05, + "loss": 1.3354, + "step": 822 + }, + { + "epoch": 0.8467078189300411, + "grad_norm": 2.857788324356079, + "learning_rate": 2.9802699896157835e-05, + "loss": 0.0733, + "step": 823 + }, + { + "epoch": 0.8477366255144033, + "grad_norm": 11.483878135681152, + "learning_rate": 2.9839044652128762e-05, + "loss": 0.935, + "step": 824 + }, + { + "epoch": 0.8487654320987654, + "grad_norm": 2.5351336002349854, + "learning_rate": 2.9875389408099686e-05, + "loss": 0.0684, + "step": 825 + }, + { + "epoch": 0.8497942386831275, + "grad_norm": 8.322936058044434, + "learning_rate": 2.9911734164070607e-05, + "loss": 0.6753, + "step": 826 + }, + { + "epoch": 0.8508230452674898, + "grad_norm": 1.9907835721969604, + "learning_rate": 2.9948078920041534e-05, + "loss": 0.0438, + "step": 827 + }, + { + "epoch": 0.8518518518518519, + "grad_norm": 14.835284233093262, + "learning_rate": 2.998442367601246e-05, + "loss": 1.1541, + "step": 828 + }, + { + "epoch": 0.852880658436214, + "grad_norm": 13.292768478393555, + "learning_rate": 3.002076843198338e-05, + "loss": 0.7087, + "step": 829 + }, + { + "epoch": 0.8539094650205762, + "grad_norm": 9.636879920959473, + "learning_rate": 3.0057113187954307e-05, + "loss": 0.5391, + "step": 830 + }, + { + "epoch": 0.8549382716049383, + "grad_norm": 8.648504257202148, + "learning_rate": 3.009345794392523e-05, + "loss": 0.3831, + "step": 831 + }, + { + "epoch": 0.8559670781893004, + "grad_norm": 9.20128345489502, + "learning_rate": 3.0129802699896155e-05, + "loss": 0.4752, + "step": 832 + }, + { + "epoch": 0.8569958847736625, + "grad_norm": 7.908294677734375, + "learning_rate": 3.016614745586708e-05, + "loss": 0.3662, + "step": 833 + }, + { + "epoch": 0.8580246913580247, + "grad_norm": 18.368688583374023, + "learning_rate": 3.0202492211838003e-05, + "loss": 1.6192, + "step": 834 + }, + { + "epoch": 0.8590534979423868, + "grad_norm": 2.4204726219177246, + "learning_rate": 3.0238836967808927e-05, + "loss": 0.0369, + "step": 835 + }, + { + "epoch": 0.8600823045267489, + "grad_norm": 15.491935729980469, + "learning_rate": 3.0275181723779854e-05, + "loss": 1.3151, + "step": 836 + }, + { + "epoch": 0.8611111111111112, + "grad_norm": 7.942100524902344, + "learning_rate": 3.0311526479750775e-05, + "loss": 0.4427, + "step": 837 + }, + { + "epoch": 0.8621399176954733, + "grad_norm": 12.48727035522461, + "learning_rate": 3.03478712357217e-05, + "loss": 0.8185, + "step": 838 + }, + { + "epoch": 0.8631687242798354, + "grad_norm": 9.763201713562012, + "learning_rate": 3.0384215991692626e-05, + "loss": 0.8389, + "step": 839 + }, + { + "epoch": 0.8641975308641975, + "grad_norm": 1.3098586797714233, + "learning_rate": 3.0420560747663547e-05, + "loss": 0.0542, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_Qnli-dev_cosine_accuracy": 0.70703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7717105150222778, + "eval_Qnli-dev_cosine_ap": 0.7440826784027825, + "eval_Qnli-dev_cosine_f1": 0.7112676056338029, + "eval_Qnli-dev_cosine_f1_threshold": 0.7245498895645142, + "eval_Qnli-dev_cosine_precision": 0.608433734939759, + "eval_Qnli-dev_cosine_recall": 0.8559322033898306, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 360.73333740234375, + "eval_Qnli-dev_dot_ap": 0.6721991504226604, + "eval_Qnli-dev_dot_f1": 0.6755852842809364, + "eval_Qnli-dev_dot_f1_threshold": 324.03253173828125, + "eval_Qnli-dev_dot_precision": 0.5580110497237569, + "eval_Qnli-dev_dot_recall": 0.8559322033898306, + "eval_Qnli-dev_euclidean_accuracy": 0.71484375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.42113208770752, + "eval_Qnli-dev_euclidean_ap": 0.7499644370026034, + "eval_Qnli-dev_euclidean_f1": 0.717391304347826, + "eval_Qnli-dev_euclidean_f1_threshold": 15.633472442626953, + "eval_Qnli-dev_euclidean_precision": 0.6265822784810127, + "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, + "eval_Qnli-dev_manhattan_accuracy": 0.716796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 304.3686828613281, + "eval_Qnli-dev_manhattan_ap": 0.7552844807907888, + "eval_Qnli-dev_manhattan_f1": 0.7099236641221374, + "eval_Qnli-dev_manhattan_f1_threshold": 313.06787109375, + "eval_Qnli-dev_manhattan_precision": 0.6458333333333334, + "eval_Qnli-dev_manhattan_recall": 0.788135593220339, + "eval_Qnli-dev_max_accuracy": 0.716796875, + "eval_Qnli-dev_max_accuracy_threshold": 360.73333740234375, + "eval_Qnli-dev_max_ap": 0.7552844807907888, + "eval_Qnli-dev_max_f1": 0.717391304347826, + "eval_Qnli-dev_max_f1_threshold": 324.03253173828125, + "eval_Qnli-dev_max_precision": 0.6458333333333334, + "eval_Qnli-dev_max_recall": 0.8559322033898306, + "eval_allNLI-dev_cosine_accuracy": 0.73046875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8676639795303345, + "eval_allNLI-dev_cosine_ap": 0.5974870153364504, + "eval_allNLI-dev_cosine_f1": 0.5968819599109131, + "eval_allNLI-dev_cosine_f1_threshold": 0.7826240062713623, + "eval_allNLI-dev_cosine_precision": 0.4855072463768116, + "eval_allNLI-dev_cosine_recall": 0.7745664739884393, + "eval_allNLI-dev_dot_accuracy": 0.69140625, + "eval_allNLI-dev_dot_accuracy_threshold": 390.4407653808594, + "eval_allNLI-dev_dot_ap": 0.5132556641569763, + "eval_allNLI-dev_dot_f1": 0.5889830508474576, + "eval_allNLI-dev_dot_f1_threshold": 349.6282958984375, + "eval_allNLI-dev_dot_precision": 0.46488294314381273, + "eval_allNLI-dev_dot_recall": 0.8034682080924855, + "eval_allNLI-dev_euclidean_accuracy": 0.7265625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.85896110534668, + "eval_allNLI-dev_euclidean_ap": 0.600945196021151, + "eval_allNLI-dev_euclidean_f1": 0.6117136659436009, + "eval_allNLI-dev_euclidean_f1_threshold": 14.226009368896484, + "eval_allNLI-dev_euclidean_precision": 0.4895833333333333, + "eval_allNLI-dev_euclidean_recall": 0.815028901734104, + "eval_allNLI-dev_manhattan_accuracy": 0.724609375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 236.98345947265625, + "eval_allNLI-dev_manhattan_ap": 0.595652875609926, + "eval_allNLI-dev_manhattan_f1": 0.6052631578947368, + "eval_allNLI-dev_manhattan_f1_threshold": 292.81805419921875, + "eval_allNLI-dev_manhattan_precision": 0.4876325088339223, + "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, + "eval_allNLI-dev_max_accuracy": 0.73046875, + "eval_allNLI-dev_max_accuracy_threshold": 390.4407653808594, + "eval_allNLI-dev_max_ap": 0.600945196021151, + "eval_allNLI-dev_max_f1": 0.6117136659436009, + "eval_allNLI-dev_max_f1_threshold": 349.6282958984375, + "eval_allNLI-dev_max_precision": 0.4895833333333333, + "eval_allNLI-dev_max_recall": 0.815028901734104, + "eval_sequential_score": 0.7552844807907888, + "eval_sts-test_pearson_cosine": 0.8197263747311968, + "eval_sts-test_pearson_dot": 0.7930399784089159, + "eval_sts-test_pearson_euclidean": 0.8451878041621638, + "eval_sts-test_pearson_manhattan": 0.8428233677391169, + "eval_sts-test_pearson_max": 0.8451878041621638, + "eval_sts-test_spearman_cosine": 0.8404950102998648, + "eval_sts-test_spearman_dot": 0.7717234646053703, + "eval_sts-test_spearman_euclidean": 0.8380116285514719, + "eval_sts-test_spearman_manhattan": 0.8359618417747002, + "eval_sts-test_spearman_max": 0.8404950102998648, + "eval_vitaminc-pairs_loss": 2.756269693374634, + "eval_vitaminc-pairs_runtime": 3.1914, + "eval_vitaminc-pairs_samples_per_second": 40.108, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_negation-triplets_loss": 1.07953941822052, + "eval_negation-triplets_runtime": 0.7561, + "eval_negation-triplets_samples_per_second": 169.298, + "eval_negation-triplets_steps_per_second": 1.323, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_scitail-pairs-pos_loss": 0.15982350707054138, + "eval_scitail-pairs-pos_runtime": 0.8403, + "eval_scitail-pairs-pos_samples_per_second": 152.323, + "eval_scitail-pairs-pos_steps_per_second": 1.19, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_scitail-pairs-qa_loss": 0.000591381685808301, + "eval_scitail-pairs-qa_runtime": 0.59, + "eval_scitail-pairs-qa_samples_per_second": 216.958, + "eval_scitail-pairs-qa_steps_per_second": 1.695, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_xsum-pairs_loss": 0.620231568813324, + "eval_xsum-pairs_runtime": 3.0356, + "eval_xsum-pairs_samples_per_second": 42.166, + "eval_xsum-pairs_steps_per_second": 0.329, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_sciq_pairs_loss": 0.1389157921075821, + "eval_sciq_pairs_runtime": 3.4358, + "eval_sciq_pairs_samples_per_second": 37.255, + "eval_sciq_pairs_steps_per_second": 0.291, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_qasc_pairs_loss": 0.3473445475101471, + "eval_qasc_pairs_runtime": 0.6187, + "eval_qasc_pairs_samples_per_second": 206.89, + "eval_qasc_pairs_steps_per_second": 1.616, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_openbookqa_pairs_loss": 1.0230737924575806, + "eval_openbookqa_pairs_runtime": 0.5845, + "eval_openbookqa_pairs_samples_per_second": 218.995, + "eval_openbookqa_pairs_steps_per_second": 1.711, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_msmarco_pairs_loss": 1.2953341007232666, + "eval_msmarco_pairs_runtime": 1.5198, + "eval_msmarco_pairs_samples_per_second": 84.223, + "eval_msmarco_pairs_steps_per_second": 0.658, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_nq_pairs_loss": 1.5245081186294556, + "eval_nq_pairs_runtime": 2.9025, + "eval_nq_pairs_samples_per_second": 44.1, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_trivia_pairs_loss": 1.1853358745574951, + "eval_trivia_pairs_runtime": 3.4357, + "eval_trivia_pairs_samples_per_second": 37.256, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_gooaq_pairs_loss": 0.6523827910423279, + "eval_gooaq_pairs_runtime": 0.954, + "eval_gooaq_pairs_samples_per_second": 134.174, + "eval_gooaq_pairs_steps_per_second": 1.048, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_paws-pos_loss": 0.029700685292482376, + "eval_paws-pos_runtime": 0.692, + "eval_paws-pos_samples_per_second": 184.977, + "eval_paws-pos_steps_per_second": 1.445, + "step": 840 + }, + { + "epoch": 0.8641975308641975, + "eval_global_dataset_loss": 0.5667285919189453, + "eval_global_dataset_runtime": 13.3955, + "eval_global_dataset_samples_per_second": 31.055, + "eval_global_dataset_steps_per_second": 0.299, + "step": 840 + }, + { + "epoch": 0.8652263374485597, + "grad_norm": 11.04948902130127, + "learning_rate": 3.045690550363447e-05, + "loss": 0.6135, + "step": 841 + }, + { + "epoch": 0.8662551440329218, + "grad_norm": 13.294988632202148, + "learning_rate": 3.0493250259605398e-05, + "loss": 1.4091, + "step": 842 + }, + { + "epoch": 0.8672839506172839, + "grad_norm": 11.463438987731934, + "learning_rate": 3.052959501557632e-05, + "loss": 0.6724, + "step": 843 + }, + { + "epoch": 0.8683127572016461, + "grad_norm": 2.22076678276062, + "learning_rate": 3.0565939771547246e-05, + "loss": 0.0353, + "step": 844 + }, + { + "epoch": 0.8693415637860082, + "grad_norm": 9.272378921508789, + "learning_rate": 3.0602284527518174e-05, + "loss": 0.5297, + "step": 845 + }, + { + "epoch": 0.8703703703703703, + "grad_norm": 10.213794708251953, + "learning_rate": 3.0638629283489094e-05, + "loss": 0.5211, + "step": 846 + }, + { + "epoch": 0.8713991769547325, + "grad_norm": 12.306347846984863, + "learning_rate": 3.0674974039460015e-05, + "loss": 0.8431, + "step": 847 + }, + { + "epoch": 0.8724279835390947, + "grad_norm": 10.407583236694336, + "learning_rate": 3.071131879543094e-05, + "loss": 0.7195, + "step": 848 + }, + { + "epoch": 0.8734567901234568, + "grad_norm": 8.843184471130371, + "learning_rate": 3.074766355140186e-05, + "loss": 0.4965, + "step": 849 + }, + { + "epoch": 0.8744855967078189, + "grad_norm": 10.975191116333008, + "learning_rate": 3.078400830737279e-05, + "loss": 0.7786, + "step": 850 + }, + { + "epoch": 0.8755144032921811, + "grad_norm": 16.885013580322266, + "learning_rate": 3.082035306334372e-05, + "loss": 1.7078, + "step": 851 + }, + { + "epoch": 0.8765432098765432, + "grad_norm": 10.905181884765625, + "learning_rate": 3.085669781931464e-05, + "loss": 0.6685, + "step": 852 + }, + { + "epoch": 0.8775720164609053, + "grad_norm": 12.853326797485352, + "learning_rate": 3.089304257528556e-05, + "loss": 1.2114, + "step": 853 + }, + { + "epoch": 0.8786008230452675, + "grad_norm": 9.456357955932617, + "learning_rate": 3.0929387331256486e-05, + "loss": 0.6199, + "step": 854 + }, + { + "epoch": 0.8796296296296297, + "grad_norm": 15.603614807128906, + "learning_rate": 3.0965732087227414e-05, + "loss": 1.3311, + "step": 855 + }, + { + "epoch": 0.8806584362139918, + "grad_norm": 10.03974437713623, + "learning_rate": 3.1002076843198334e-05, + "loss": 0.5718, + "step": 856 + }, + { + "epoch": 0.8816872427983539, + "grad_norm": 8.548869132995605, + "learning_rate": 3.103842159916926e-05, + "loss": 0.4969, + "step": 857 + }, + { + "epoch": 0.8827160493827161, + "grad_norm": 13.353643417358398, + "learning_rate": 3.107476635514018e-05, + "loss": 1.1514, + "step": 858 + }, + { + "epoch": 0.8837448559670782, + "grad_norm": 11.166017532348633, + "learning_rate": 3.111111111111111e-05, + "loss": 1.361, + "step": 859 + }, + { + "epoch": 0.8847736625514403, + "grad_norm": 10.166590690612793, + "learning_rate": 3.114745586708203e-05, + "loss": 0.801, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_Qnli-dev_cosine_accuracy": 0.720703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7913862466812134, + "eval_Qnli-dev_cosine_ap": 0.7484120022003069, + "eval_Qnli-dev_cosine_f1": 0.7155635062611807, + "eval_Qnli-dev_cosine_f1_threshold": 0.7564002275466919, + "eval_Qnli-dev_cosine_precision": 0.6191950464396285, + "eval_Qnli-dev_cosine_recall": 0.847457627118644, + "eval_Qnli-dev_dot_accuracy": 0.669921875, + "eval_Qnli-dev_dot_accuracy_threshold": 381.15460205078125, + "eval_Qnli-dev_dot_ap": 0.6554039139593089, + "eval_Qnli-dev_dot_f1": 0.6929982046678635, + "eval_Qnli-dev_dot_f1_threshold": 375.46405029296875, + "eval_Qnli-dev_dot_precision": 0.6012461059190031, + "eval_Qnli-dev_dot_recall": 0.8177966101694916, + "eval_Qnli-dev_euclidean_accuracy": 0.724609375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 14.634359359741211, + "eval_Qnli-dev_euclidean_ap": 0.7543039429243505, + "eval_Qnli-dev_euclidean_f1": 0.7129798903107861, + "eval_Qnli-dev_euclidean_f1_threshold": 15.349479675292969, + "eval_Qnli-dev_euclidean_precision": 0.6270096463022508, + "eval_Qnli-dev_euclidean_recall": 0.826271186440678, + "eval_Qnli-dev_manhattan_accuracy": 0.71484375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 285.07244873046875, + "eval_Qnli-dev_manhattan_ap": 0.7579405691223697, + "eval_Qnli-dev_manhattan_f1": 0.7142857142857143, + "eval_Qnli-dev_manhattan_f1_threshold": 311.107421875, + "eval_Qnli-dev_manhattan_precision": 0.6560283687943262, + "eval_Qnli-dev_manhattan_recall": 0.7838983050847458, + "eval_Qnli-dev_max_accuracy": 0.724609375, + "eval_Qnli-dev_max_accuracy_threshold": 381.15460205078125, + "eval_Qnli-dev_max_ap": 0.7579405691223697, + "eval_Qnli-dev_max_f1": 0.7155635062611807, + "eval_Qnli-dev_max_f1_threshold": 375.46405029296875, + "eval_Qnli-dev_max_precision": 0.6560283687943262, + "eval_Qnli-dev_max_recall": 0.847457627118644, + "eval_allNLI-dev_cosine_accuracy": 0.736328125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8940784931182861, + "eval_allNLI-dev_cosine_ap": 0.5955386793059732, + "eval_allNLI-dev_cosine_f1": 0.6008064516129031, + "eval_allNLI-dev_cosine_f1_threshold": 0.7910170555114746, + "eval_allNLI-dev_cosine_precision": 0.4613003095975232, + "eval_allNLI-dev_cosine_recall": 0.861271676300578, + "eval_allNLI-dev_dot_accuracy": 0.69921875, + "eval_allNLI-dev_dot_accuracy_threshold": 451.13623046875, + "eval_allNLI-dev_dot_ap": 0.5115980330253447, + "eval_allNLI-dev_dot_f1": 0.5823927765237021, + "eval_allNLI-dev_dot_f1_threshold": 410.14447021484375, + "eval_allNLI-dev_dot_precision": 0.4777777777777778, + "eval_allNLI-dev_dot_recall": 0.7456647398843931, + "eval_allNLI-dev_euclidean_accuracy": 0.734375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 10.620901107788086, + "eval_allNLI-dev_euclidean_ap": 0.5964863175832775, + "eval_allNLI-dev_euclidean_f1": 0.6052104208416834, + "eval_allNLI-dev_euclidean_f1_threshold": 14.621801376342773, + "eval_allNLI-dev_euclidean_precision": 0.46319018404907975, + "eval_allNLI-dev_euclidean_recall": 0.8728323699421965, + "eval_allNLI-dev_manhattan_accuracy": 0.732421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 226.17271423339844, + "eval_allNLI-dev_manhattan_ap": 0.594460510313827, + "eval_allNLI-dev_manhattan_f1": 0.6003976143141153, + "eval_allNLI-dev_manhattan_f1_threshold": 303.3892517089844, + "eval_allNLI-dev_manhattan_precision": 0.4575757575757576, + "eval_allNLI-dev_manhattan_recall": 0.8728323699421965, + "eval_allNLI-dev_max_accuracy": 0.736328125, + "eval_allNLI-dev_max_accuracy_threshold": 451.13623046875, + "eval_allNLI-dev_max_ap": 0.5964863175832775, + "eval_allNLI-dev_max_f1": 0.6052104208416834, + "eval_allNLI-dev_max_f1_threshold": 410.14447021484375, + "eval_allNLI-dev_max_precision": 0.4777777777777778, + "eval_allNLI-dev_max_recall": 0.8728323699421965, + "eval_sequential_score": 0.7579405691223697, + "eval_sts-test_pearson_cosine": 0.8116140949252031, + "eval_sts-test_pearson_dot": 0.7799016605392657, + "eval_sts-test_pearson_euclidean": 0.8407077538986545, + "eval_sts-test_pearson_manhattan": 0.8400909131579789, + "eval_sts-test_pearson_max": 0.8407077538986545, + "eval_sts-test_spearman_cosine": 0.8376718769749885, + "eval_sts-test_spearman_dot": 0.7616373358104539, + "eval_sts-test_spearman_euclidean": 0.8340390590777574, + "eval_sts-test_spearman_manhattan": 0.8319958059851489, + "eval_sts-test_spearman_max": 0.8376718769749885, + "eval_vitaminc-pairs_loss": 2.8492391109466553, + "eval_vitaminc-pairs_runtime": 3.2108, + "eval_vitaminc-pairs_samples_per_second": 39.866, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_negation-triplets_loss": 1.0788973569869995, + "eval_negation-triplets_runtime": 0.7524, + "eval_negation-triplets_samples_per_second": 170.133, + "eval_negation-triplets_steps_per_second": 1.329, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_scitail-pairs-pos_loss": 0.19070731103420258, + "eval_scitail-pairs-pos_runtime": 0.8535, + "eval_scitail-pairs-pos_samples_per_second": 149.971, + "eval_scitail-pairs-pos_steps_per_second": 1.172, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_scitail-pairs-qa_loss": 0.0008353625307790935, + "eval_scitail-pairs-qa_runtime": 0.594, + "eval_scitail-pairs-qa_samples_per_second": 215.481, + "eval_scitail-pairs-qa_steps_per_second": 1.683, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_xsum-pairs_loss": 0.6224209666252136, + "eval_xsum-pairs_runtime": 3.0265, + "eval_xsum-pairs_samples_per_second": 42.293, + "eval_xsum-pairs_steps_per_second": 0.33, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_sciq_pairs_loss": 0.12949666380882263, + "eval_sciq_pairs_runtime": 3.4626, + "eval_sciq_pairs_samples_per_second": 36.966, + "eval_sciq_pairs_steps_per_second": 0.289, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_qasc_pairs_loss": 0.3247033953666687, + "eval_qasc_pairs_runtime": 0.6062, + "eval_qasc_pairs_samples_per_second": 211.165, + "eval_qasc_pairs_steps_per_second": 1.65, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_openbookqa_pairs_loss": 1.1479803323745728, + "eval_openbookqa_pairs_runtime": 0.6115, + "eval_openbookqa_pairs_samples_per_second": 209.313, + "eval_openbookqa_pairs_steps_per_second": 1.635, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_msmarco_pairs_loss": 1.1639130115509033, + "eval_msmarco_pairs_runtime": 1.5482, + "eval_msmarco_pairs_samples_per_second": 82.677, + "eval_msmarco_pairs_steps_per_second": 0.646, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_nq_pairs_loss": 1.2727266550064087, + "eval_nq_pairs_runtime": 2.8951, + "eval_nq_pairs_samples_per_second": 44.212, + "eval_nq_pairs_steps_per_second": 0.345, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_trivia_pairs_loss": 1.1261823177337646, + "eval_trivia_pairs_runtime": 3.4344, + "eval_trivia_pairs_samples_per_second": 37.27, + "eval_trivia_pairs_steps_per_second": 0.291, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_gooaq_pairs_loss": 0.6438990831375122, + "eval_gooaq_pairs_runtime": 0.949, + "eval_gooaq_pairs_samples_per_second": 134.878, + "eval_gooaq_pairs_steps_per_second": 1.054, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_paws-pos_loss": 0.02884558029472828, + "eval_paws-pos_runtime": 0.6952, + "eval_paws-pos_samples_per_second": 184.107, + "eval_paws-pos_steps_per_second": 1.438, + "step": 860 + }, + { + "epoch": 0.8847736625514403, + "eval_global_dataset_loss": 0.5700183510780334, + "eval_global_dataset_runtime": 13.3817, + "eval_global_dataset_samples_per_second": 31.087, + "eval_global_dataset_steps_per_second": 0.299, + "step": 860 + }, + { + "epoch": 0.8858024691358025, + "grad_norm": 14.423850059509277, + "learning_rate": 3.118380062305296e-05, + "loss": 1.5436, + "step": 861 + }, + { + "epoch": 0.8868312757201646, + "grad_norm": 6.131687164306641, + "learning_rate": 3.122014537902388e-05, + "loss": 0.2666, + "step": 862 + }, + { + "epoch": 0.8878600823045267, + "grad_norm": 8.292266845703125, + "learning_rate": 3.1256490134994806e-05, + "loss": 0.5436, + "step": 863 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 15.915453910827637, + "learning_rate": 3.1292834890965727e-05, + "loss": 1.8489, + "step": 864 + }, + { + "epoch": 0.8899176954732511, + "grad_norm": 15.952044486999512, + "learning_rate": 3.1329179646936654e-05, + "loss": 1.3624, + "step": 865 + }, + { + "epoch": 0.8909465020576132, + "grad_norm": 0.0, + "learning_rate": 3.136552440290758e-05, + "loss": 0.0, + "step": 866 + }, + { + "epoch": 0.8919753086419753, + "grad_norm": 8.352066993713379, + "learning_rate": 3.14018691588785e-05, + "loss": 0.5817, + "step": 867 + }, + { + "epoch": 0.8930041152263375, + "grad_norm": 0.0, + "learning_rate": 3.143821391484942e-05, + "loss": 0.0, + "step": 868 + }, + { + "epoch": 0.8940329218106996, + "grad_norm": 7.5998640060424805, + "learning_rate": 3.147455867082035e-05, + "loss": 0.4523, + "step": 869 + }, + { + "epoch": 0.8950617283950617, + "grad_norm": 9.014819145202637, + "learning_rate": 3.151090342679128e-05, + "loss": 0.8566, + "step": 870 + }, + { + "epoch": 0.8960905349794238, + "grad_norm": 9.435276985168457, + "learning_rate": 3.15472481827622e-05, + "loss": 0.5609, + "step": 871 + }, + { + "epoch": 0.897119341563786, + "grad_norm": 6.9305219650268555, + "learning_rate": 3.1583592938733126e-05, + "loss": 0.4103, + "step": 872 + }, + { + "epoch": 0.8981481481481481, + "grad_norm": 7.279191493988037, + "learning_rate": 3.1619937694704046e-05, + "loss": 0.4226, + "step": 873 + }, + { + "epoch": 0.8991769547325102, + "grad_norm": 11.90969181060791, + "learning_rate": 3.165628245067497e-05, + "loss": 1.3344, + "step": 874 + }, + { + "epoch": 0.9002057613168725, + "grad_norm": 1.5162785053253174, + "learning_rate": 3.1692627206645894e-05, + "loss": 0.0354, + "step": 875 + }, + { + "epoch": 0.9012345679012346, + "grad_norm": 1.3723441362380981, + "learning_rate": 3.172897196261682e-05, + "loss": 0.0377, + "step": 876 + } + ], + "logging_steps": 1, + "max_steps": 2916, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 292, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": null +}