diff --git "a/checkpoint-14685/trainer_state.json" "b/checkpoint-14685/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-14685/trainer_state.json" @@ -0,0 +1,3196 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.5, + "eval_steps": 735, + "global_step": 14685, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02502553626149132, + "grad_norm": 65.55949401855469, + "learning_rate": 4.834865509022812e-07, + "loss": 16.851, + "step": 147 + }, + { + "epoch": 0.05005107252298264, + "grad_norm": 23.207971572875977, + "learning_rate": 9.805924412665985e-07, + "loss": 11.2787, + "step": 294 + }, + { + "epoch": 0.07507660878447395, + "grad_norm": 176.1532440185547, + "learning_rate": 1.481103166496425e-06, + "loss": 8.9166, + "step": 441 + }, + { + "epoch": 0.10010214504596528, + "grad_norm": 22.1564998626709, + "learning_rate": 1.981613891726251e-06, + "loss": 7.9463, + "step": 588 + }, + { + "epoch": 0.12512768130745658, + "grad_norm": 20.11876106262207, + "learning_rate": 2.4821246169560777e-06, + "loss": 7.2108, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_nli-pairs_loss": 6.905651569366455, + "eval_nli-pairs_runtime": 4.0844, + "eval_nli-pairs_samples_per_second": 36.725, + "eval_nli-pairs_steps_per_second": 1.224, + "eval_sts-test_pearson_cosine": 0.3740256550072784, + "eval_sts-test_pearson_dot": 0.13384893803205677, + "eval_sts-test_pearson_euclidean": 0.3912387619869807, + "eval_sts-test_pearson_manhattan": 0.4202605137823524, + "eval_sts-test_pearson_max": 0.4202605137823524, + "eval_sts-test_spearman_cosine": 0.37210107338950205, + "eval_sts-test_spearman_dot": 0.12092409843417483, + "eval_sts-test_spearman_euclidean": 0.39172287978780546, + "eval_sts-test_spearman_manhattan": 0.4169664738563951, + "eval_sts-test_spearman_max": 0.4169664738563951, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_vitaminc-pairs_loss": 5.720878601074219, + "eval_vitaminc-pairs_runtime": 2.1703, + "eval_vitaminc-pairs_samples_per_second": 69.115, + "eval_vitaminc-pairs_steps_per_second": 2.304, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_qnli-contrastive_loss": 8.1649751663208, + "eval_qnli-contrastive_runtime": 0.4937, + "eval_qnli-contrastive_samples_per_second": 303.841, + "eval_qnli-contrastive_steps_per_second": 10.128, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_scitail-pairs-qa_loss": 3.7859296798706055, + "eval_scitail-pairs-qa_runtime": 1.1509, + "eval_scitail-pairs-qa_samples_per_second": 130.329, + "eval_scitail-pairs-qa_steps_per_second": 4.344, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_scitail-pairs-pos_loss": 3.9919917583465576, + "eval_scitail-pairs-pos_runtime": 2.1442, + "eval_scitail-pairs-pos_samples_per_second": 69.956, + "eval_scitail-pairs-pos_steps_per_second": 2.332, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_xsum-pairs_loss": 4.600368976593018, + "eval_xsum-pairs_runtime": 2.26, + "eval_xsum-pairs_samples_per_second": 66.371, + "eval_xsum-pairs_steps_per_second": 2.212, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_compression-pairs_loss": 3.3037569522857666, + "eval_compression-pairs_runtime": 0.449, + "eval_compression-pairs_samples_per_second": 334.078, + "eval_compression-pairs_steps_per_second": 11.136, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_sciq_pairs_loss": 10.214456558227539, + "eval_sciq_pairs_runtime": 7.1179, + "eval_sciq_pairs_samples_per_second": 21.074, + "eval_sciq_pairs_steps_per_second": 0.702, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_qasc_pairs_loss": 10.58031940460205, + "eval_qasc_pairs_runtime": 2.0175, + "eval_qasc_pairs_samples_per_second": 74.348, + "eval_qasc_pairs_steps_per_second": 2.478, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_openbookqa_pairs_loss": 7.862658977508545, + "eval_openbookqa_pairs_runtime": 0.8571, + "eval_openbookqa_pairs_samples_per_second": 120.168, + "eval_openbookqa_pairs_steps_per_second": 4.667, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_msmarco_pairs_loss": 8.754273414611816, + "eval_msmarco_pairs_runtime": 2.7533, + "eval_msmarco_pairs_samples_per_second": 54.481, + "eval_msmarco_pairs_steps_per_second": 1.816, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_nq_pairs_loss": 8.415486335754395, + "eval_nq_pairs_runtime": 5.0894, + "eval_nq_pairs_samples_per_second": 29.473, + "eval_nq_pairs_steps_per_second": 0.982, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_trivia_pairs_loss": 9.051105499267578, + "eval_trivia_pairs_runtime": 9.5498, + "eval_trivia_pairs_samples_per_second": 15.707, + "eval_trivia_pairs_steps_per_second": 0.524, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_quora_pairs_loss": 4.5232110023498535, + "eval_quora_pairs_runtime": 1.1469, + "eval_quora_pairs_samples_per_second": 130.785, + "eval_quora_pairs_steps_per_second": 4.36, + "step": 735 + }, + { + "epoch": 0.12512768130745658, + "eval_gooaq_pairs_loss": 7.579105854034424, + "eval_gooaq_pairs_runtime": 2.0491, + "eval_gooaq_pairs_samples_per_second": 73.203, + "eval_gooaq_pairs_steps_per_second": 2.44, + "step": 735 + }, + { + "epoch": 0.1501532175689479, + "grad_norm": 31.7736759185791, + "learning_rate": 2.982635342185904e-06, + "loss": 6.7709, + "step": 882 + }, + { + "epoch": 0.1751787538304392, + "grad_norm": 31.57339096069336, + "learning_rate": 3.4831460674157306e-06, + "loss": 6.1746, + "step": 1029 + }, + { + "epoch": 0.20020429009193055, + "grad_norm": 25.392702102661133, + "learning_rate": 3.9836567926455565e-06, + "loss": 5.7706, + "step": 1176 + }, + { + "epoch": 0.22522982635342187, + "grad_norm": 32.390472412109375, + "learning_rate": 4.484167517875383e-06, + "loss": 5.7283, + "step": 1323 + }, + { + "epoch": 0.25025536261491316, + "grad_norm": 18.85039520263672, + "learning_rate": 4.98467824310521e-06, + "loss": 5.1856, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_nli-pairs_loss": 4.352054119110107, + "eval_nli-pairs_runtime": 4.1476, + "eval_nli-pairs_samples_per_second": 36.165, + "eval_nli-pairs_steps_per_second": 1.206, + "eval_sts-test_pearson_cosine": 0.6694155778571752, + "eval_sts-test_pearson_dot": 0.5201102118957572, + "eval_sts-test_pearson_euclidean": 0.6613028243200022, + "eval_sts-test_pearson_manhattan": 0.6670710500315469, + "eval_sts-test_pearson_max": 0.6694155778571752, + "eval_sts-test_spearman_cosine": 0.6367853204388882, + "eval_sts-test_spearman_dot": 0.4940207180607985, + "eval_sts-test_spearman_euclidean": 0.6391132775161348, + "eval_sts-test_spearman_manhattan": 0.6446159957787251, + "eval_sts-test_spearman_max": 0.6446159957787251, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_vitaminc-pairs_loss": 3.4987735748291016, + "eval_vitaminc-pairs_runtime": 2.1678, + "eval_vitaminc-pairs_samples_per_second": 69.194, + "eval_vitaminc-pairs_steps_per_second": 2.306, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_qnli-contrastive_loss": 12.915559768676758, + "eval_qnli-contrastive_runtime": 0.4918, + "eval_qnli-contrastive_samples_per_second": 304.99, + "eval_qnli-contrastive_steps_per_second": 10.166, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_scitail-pairs-qa_loss": 1.3250077962875366, + "eval_scitail-pairs-qa_runtime": 1.154, + "eval_scitail-pairs-qa_samples_per_second": 129.984, + "eval_scitail-pairs-qa_steps_per_second": 4.333, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_scitail-pairs-pos_loss": 2.457335948944092, + "eval_scitail-pairs-pos_runtime": 2.1475, + "eval_scitail-pairs-pos_samples_per_second": 69.85, + "eval_scitail-pairs-pos_steps_per_second": 2.328, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_xsum-pairs_loss": 3.071201801300049, + "eval_xsum-pairs_runtime": 2.2634, + "eval_xsum-pairs_samples_per_second": 66.271, + "eval_xsum-pairs_steps_per_second": 2.209, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_compression-pairs_loss": 2.0629916191101074, + "eval_compression-pairs_runtime": 0.4529, + "eval_compression-pairs_samples_per_second": 331.23, + "eval_compression-pairs_steps_per_second": 11.041, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_sciq_pairs_loss": 9.06814193725586, + "eval_sciq_pairs_runtime": 7.1445, + "eval_sciq_pairs_samples_per_second": 20.995, + "eval_sciq_pairs_steps_per_second": 0.7, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_qasc_pairs_loss": 9.245658874511719, + "eval_qasc_pairs_runtime": 2.0471, + "eval_qasc_pairs_samples_per_second": 73.274, + "eval_qasc_pairs_steps_per_second": 2.442, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_openbookqa_pairs_loss": 5.652446746826172, + "eval_openbookqa_pairs_runtime": 0.8946, + "eval_openbookqa_pairs_samples_per_second": 115.14, + "eval_openbookqa_pairs_steps_per_second": 4.471, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_msmarco_pairs_loss": 4.844855785369873, + "eval_msmarco_pairs_runtime": 2.7887, + "eval_msmarco_pairs_samples_per_second": 53.788, + "eval_msmarco_pairs_steps_per_second": 1.793, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_nq_pairs_loss": 5.023958206176758, + "eval_nq_pairs_runtime": 5.0823, + "eval_nq_pairs_samples_per_second": 29.514, + "eval_nq_pairs_steps_per_second": 0.984, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_trivia_pairs_loss": 5.2907304763793945, + "eval_trivia_pairs_runtime": 9.6673, + "eval_trivia_pairs_samples_per_second": 15.516, + "eval_trivia_pairs_steps_per_second": 0.517, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_quora_pairs_loss": 1.5572240352630615, + "eval_quora_pairs_runtime": 1.1979, + "eval_quora_pairs_samples_per_second": 125.218, + "eval_quora_pairs_steps_per_second": 4.174, + "step": 1470 + }, + { + "epoch": 0.25025536261491316, + "eval_gooaq_pairs_loss": 3.970768928527832, + "eval_gooaq_pairs_runtime": 2.117, + "eval_gooaq_pairs_samples_per_second": 70.855, + "eval_gooaq_pairs_steps_per_second": 2.362, + "step": 1470 + }, + { + "epoch": 0.2752808988764045, + "grad_norm": 40.67585754394531, + "learning_rate": 5.4851889683350365e-06, + "loss": 4.185, + "step": 1617 + }, + { + "epoch": 0.3003064351378958, + "grad_norm": 45.92570495605469, + "learning_rate": 5.985699693564862e-06, + "loss": 4.6367, + "step": 1764 + }, + { + "epoch": 0.32533197139938713, + "grad_norm": 13.566838264465332, + "learning_rate": 6.486210418794688e-06, + "loss": 4.3615, + "step": 1911 + }, + { + "epoch": 0.3503575076608784, + "grad_norm": 9.495999336242676, + "learning_rate": 6.986721144024515e-06, + "loss": 4.1791, + "step": 2058 + }, + { + "epoch": 0.37538304392236976, + "grad_norm": 32.735416412353516, + "learning_rate": 7.487231869254341e-06, + "loss": 4.1051, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_nli-pairs_loss": 3.2717113494873047, + "eval_nli-pairs_runtime": 4.0124, + "eval_nli-pairs_samples_per_second": 37.384, + "eval_nli-pairs_steps_per_second": 1.246, + "eval_sts-test_pearson_cosine": 0.6958570089637609, + "eval_sts-test_pearson_dot": 0.5824298957890577, + "eval_sts-test_pearson_euclidean": 0.6893962819387462, + "eval_sts-test_pearson_manhattan": 0.6993681181979946, + "eval_sts-test_pearson_max": 0.6993681181979946, + "eval_sts-test_spearman_cosine": 0.6652712160836801, + "eval_sts-test_spearman_dot": 0.5536505624407877, + "eval_sts-test_spearman_euclidean": 0.6659844314307678, + "eval_sts-test_spearman_manhattan": 0.675740852112121, + "eval_sts-test_spearman_max": 0.675740852112121, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_vitaminc-pairs_loss": 2.7197911739349365, + "eval_vitaminc-pairs_runtime": 2.1625, + "eval_vitaminc-pairs_samples_per_second": 69.365, + "eval_vitaminc-pairs_steps_per_second": 2.312, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_qnli-contrastive_loss": 9.638714790344238, + "eval_qnli-contrastive_runtime": 0.4877, + "eval_qnli-contrastive_samples_per_second": 307.567, + "eval_qnli-contrastive_steps_per_second": 10.252, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_scitail-pairs-qa_loss": 0.8106752634048462, + "eval_scitail-pairs-qa_runtime": 1.1588, + "eval_scitail-pairs-qa_samples_per_second": 129.449, + "eval_scitail-pairs-qa_steps_per_second": 4.315, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_scitail-pairs-pos_loss": 1.8894625902175903, + "eval_scitail-pairs-pos_runtime": 2.1181, + "eval_scitail-pairs-pos_samples_per_second": 70.817, + "eval_scitail-pairs-pos_steps_per_second": 2.361, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_xsum-pairs_loss": 2.262718439102173, + "eval_xsum-pairs_runtime": 2.2585, + "eval_xsum-pairs_samples_per_second": 66.416, + "eval_xsum-pairs_steps_per_second": 2.214, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_compression-pairs_loss": 1.4910633563995361, + "eval_compression-pairs_runtime": 0.4462, + "eval_compression-pairs_samples_per_second": 336.204, + "eval_compression-pairs_steps_per_second": 11.207, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_sciq_pairs_loss": 8.59740161895752, + "eval_sciq_pairs_runtime": 7.1845, + "eval_sciq_pairs_samples_per_second": 20.878, + "eval_sciq_pairs_steps_per_second": 0.696, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_qasc_pairs_loss": 8.103879928588867, + "eval_qasc_pairs_runtime": 2.0762, + "eval_qasc_pairs_samples_per_second": 72.246, + "eval_qasc_pairs_steps_per_second": 2.408, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_openbookqa_pairs_loss": 5.090969562530518, + "eval_openbookqa_pairs_runtime": 0.89, + "eval_openbookqa_pairs_samples_per_second": 115.726, + "eval_openbookqa_pairs_steps_per_second": 4.494, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_msmarco_pairs_loss": 3.9566943645477295, + "eval_msmarco_pairs_runtime": 2.8183, + "eval_msmarco_pairs_samples_per_second": 53.223, + "eval_msmarco_pairs_steps_per_second": 1.774, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_nq_pairs_loss": 4.009054183959961, + "eval_nq_pairs_runtime": 5.0219, + "eval_nq_pairs_samples_per_second": 29.869, + "eval_nq_pairs_steps_per_second": 0.996, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_trivia_pairs_loss": 4.286431312561035, + "eval_trivia_pairs_runtime": 9.4975, + "eval_trivia_pairs_samples_per_second": 15.794, + "eval_trivia_pairs_steps_per_second": 0.526, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_quora_pairs_loss": 1.123273491859436, + "eval_quora_pairs_runtime": 1.1487, + "eval_quora_pairs_samples_per_second": 130.586, + "eval_quora_pairs_steps_per_second": 4.353, + "step": 2205 + }, + { + "epoch": 0.37538304392236976, + "eval_gooaq_pairs_loss": 3.222414255142212, + "eval_gooaq_pairs_runtime": 2.0173, + "eval_gooaq_pairs_samples_per_second": 74.357, + "eval_gooaq_pairs_steps_per_second": 2.479, + "step": 2205 + }, + { + "epoch": 0.4004085801838611, + "grad_norm": 218.56105041503906, + "learning_rate": 7.987742594484168e-06, + "loss": 3.7674, + "step": 2352 + }, + { + "epoch": 0.4254341164453524, + "grad_norm": 27.877609252929688, + "learning_rate": 8.488253319713993e-06, + "loss": 3.8729, + "step": 2499 + }, + { + "epoch": 0.45045965270684374, + "grad_norm": 33.50013732910156, + "learning_rate": 8.988764044943822e-06, + "loss": 3.4527, + "step": 2646 + }, + { + "epoch": 0.475485188968335, + "grad_norm": 14.015911102294922, + "learning_rate": 9.489274770173647e-06, + "loss": 3.3545, + "step": 2793 + }, + { + "epoch": 0.5005107252298263, + "grad_norm": 33.59694290161133, + "learning_rate": 9.989785495403473e-06, + "loss": 3.3247, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_nli-pairs_loss": 2.7121565341949463, + "eval_nli-pairs_runtime": 4.1564, + "eval_nli-pairs_samples_per_second": 36.089, + "eval_nli-pairs_steps_per_second": 1.203, + "eval_sts-test_pearson_cosine": 0.716623047702725, + "eval_sts-test_pearson_dot": 0.6128451070598809, + "eval_sts-test_pearson_euclidean": 0.7138791236031807, + "eval_sts-test_pearson_manhattan": 0.7213151818687454, + "eval_sts-test_pearson_max": 0.7213151818687454, + "eval_sts-test_spearman_cosine": 0.6919792400941177, + "eval_sts-test_spearman_dot": 0.5867158357121192, + "eval_sts-test_spearman_euclidean": 0.6925037259567834, + "eval_sts-test_spearman_manhattan": 0.7008895667910079, + "eval_sts-test_spearman_max": 0.7008895667910079, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_vitaminc-pairs_loss": 2.225992441177368, + "eval_vitaminc-pairs_runtime": 2.253, + "eval_vitaminc-pairs_samples_per_second": 66.577, + "eval_vitaminc-pairs_steps_per_second": 2.219, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_qnli-contrastive_loss": 4.92629861831665, + "eval_qnli-contrastive_runtime": 0.5005, + "eval_qnli-contrastive_samples_per_second": 299.691, + "eval_qnli-contrastive_steps_per_second": 9.99, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_scitail-pairs-qa_loss": 0.5898066163063049, + "eval_scitail-pairs-qa_runtime": 1.2227, + "eval_scitail-pairs-qa_samples_per_second": 122.682, + "eval_scitail-pairs-qa_steps_per_second": 4.089, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_scitail-pairs-pos_loss": 1.4237287044525146, + "eval_scitail-pairs-pos_runtime": 2.4409, + "eval_scitail-pairs-pos_samples_per_second": 61.452, + "eval_scitail-pairs-pos_steps_per_second": 2.048, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_xsum-pairs_loss": 1.8388895988464355, + "eval_xsum-pairs_runtime": 2.2831, + "eval_xsum-pairs_samples_per_second": 65.7, + "eval_xsum-pairs_steps_per_second": 2.19, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_compression-pairs_loss": 1.1590967178344727, + "eval_compression-pairs_runtime": 0.5152, + "eval_compression-pairs_samples_per_second": 291.165, + "eval_compression-pairs_steps_per_second": 9.706, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_sciq_pairs_loss": 8.282496452331543, + "eval_sciq_pairs_runtime": 7.2871, + "eval_sciq_pairs_samples_per_second": 20.584, + "eval_sciq_pairs_steps_per_second": 0.686, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_qasc_pairs_loss": 7.817965507507324, + "eval_qasc_pairs_runtime": 2.0211, + "eval_qasc_pairs_samples_per_second": 74.218, + "eval_qasc_pairs_steps_per_second": 2.474, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_openbookqa_pairs_loss": 4.619383811950684, + "eval_openbookqa_pairs_runtime": 0.8531, + "eval_openbookqa_pairs_samples_per_second": 120.731, + "eval_openbookqa_pairs_steps_per_second": 4.689, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_msmarco_pairs_loss": 3.478559970855713, + "eval_msmarco_pairs_runtime": 2.7512, + "eval_msmarco_pairs_samples_per_second": 54.522, + "eval_msmarco_pairs_steps_per_second": 1.817, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_nq_pairs_loss": 3.3449866771698, + "eval_nq_pairs_runtime": 5.0591, + "eval_nq_pairs_samples_per_second": 29.649, + "eval_nq_pairs_steps_per_second": 0.988, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_trivia_pairs_loss": 3.524484872817993, + "eval_trivia_pairs_runtime": 9.662, + "eval_trivia_pairs_samples_per_second": 15.525, + "eval_trivia_pairs_steps_per_second": 0.517, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_quora_pairs_loss": 0.9095575213432312, + "eval_quora_pairs_runtime": 1.2482, + "eval_quora_pairs_samples_per_second": 120.175, + "eval_quora_pairs_steps_per_second": 4.006, + "step": 2940 + }, + { + "epoch": 0.5005107252298263, + "eval_gooaq_pairs_loss": 2.6586034297943115, + "eval_gooaq_pairs_runtime": 2.1091, + "eval_gooaq_pairs_samples_per_second": 71.12, + "eval_gooaq_pairs_steps_per_second": 2.371, + "step": 2940 + }, + { + "epoch": 0.5255362614913177, + "grad_norm": 35.33409118652344, + "learning_rate": 1.04902962206333e-05, + "loss": 3.116, + "step": 3087 + }, + { + "epoch": 0.550561797752809, + "grad_norm": 22.29003143310547, + "learning_rate": 1.0990806945863125e-05, + "loss": 3.2418, + "step": 3234 + }, + { + "epoch": 0.5755873340143003, + "grad_norm": 31.277965545654297, + "learning_rate": 1.1491317671092953e-05, + "loss": 3.0757, + "step": 3381 + }, + { + "epoch": 0.6006128702757916, + "grad_norm": 24.612506866455078, + "learning_rate": 1.1991828396322778e-05, + "loss": 2.8524, + "step": 3528 + }, + { + "epoch": 0.625638406537283, + "grad_norm": 25.11741065979004, + "learning_rate": 1.2492339121552605e-05, + "loss": 2.6875, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_nli-pairs_loss": 2.479051113128662, + "eval_nli-pairs_runtime": 3.9943, + "eval_nli-pairs_samples_per_second": 37.553, + "eval_nli-pairs_steps_per_second": 1.252, + "eval_sts-test_pearson_cosine": 0.7278742453545186, + "eval_sts-test_pearson_dot": 0.6217650825208566, + "eval_sts-test_pearson_euclidean": 0.7243228472931561, + "eval_sts-test_pearson_manhattan": 0.7333297580184588, + "eval_sts-test_pearson_max": 0.7333297580184588, + "eval_sts-test_spearman_cosine": 0.7013110457844404, + "eval_sts-test_spearman_dot": 0.5970993074902947, + "eval_sts-test_spearman_euclidean": 0.701564129266252, + "eval_sts-test_spearman_manhattan": 0.7116482009924582, + "eval_sts-test_spearman_max": 0.7116482009924582, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_vitaminc-pairs_loss": 1.974273681640625, + "eval_vitaminc-pairs_runtime": 2.1754, + "eval_vitaminc-pairs_samples_per_second": 68.953, + "eval_vitaminc-pairs_steps_per_second": 2.298, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_qnli-contrastive_loss": 1.7706010341644287, + "eval_qnli-contrastive_runtime": 0.4866, + "eval_qnli-contrastive_samples_per_second": 308.244, + "eval_qnli-contrastive_steps_per_second": 10.275, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_scitail-pairs-qa_loss": 0.4400452673435211, + "eval_scitail-pairs-qa_runtime": 1.1519, + "eval_scitail-pairs-qa_samples_per_second": 130.222, + "eval_scitail-pairs-qa_steps_per_second": 4.341, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_scitail-pairs-pos_loss": 1.1909903287887573, + "eval_scitail-pairs-pos_runtime": 2.1319, + "eval_scitail-pairs-pos_samples_per_second": 70.36, + "eval_scitail-pairs-pos_steps_per_second": 2.345, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_xsum-pairs_loss": 1.4811985492706299, + "eval_xsum-pairs_runtime": 2.254, + "eval_xsum-pairs_samples_per_second": 66.548, + "eval_xsum-pairs_steps_per_second": 2.218, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_compression-pairs_loss": 0.8453781008720398, + "eval_compression-pairs_runtime": 0.4401, + "eval_compression-pairs_samples_per_second": 340.826, + "eval_compression-pairs_steps_per_second": 11.361, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_sciq_pairs_loss": 8.014656066894531, + "eval_sciq_pairs_runtime": 7.0707, + "eval_sciq_pairs_samples_per_second": 21.214, + "eval_sciq_pairs_steps_per_second": 0.707, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_qasc_pairs_loss": 6.9316277503967285, + "eval_qasc_pairs_runtime": 2.0338, + "eval_qasc_pairs_samples_per_second": 73.752, + "eval_qasc_pairs_steps_per_second": 2.458, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_openbookqa_pairs_loss": 4.21690034866333, + "eval_openbookqa_pairs_runtime": 0.918, + "eval_openbookqa_pairs_samples_per_second": 112.202, + "eval_openbookqa_pairs_steps_per_second": 4.357, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_msmarco_pairs_loss": 3.0209598541259766, + "eval_msmarco_pairs_runtime": 2.7749, + "eval_msmarco_pairs_samples_per_second": 54.056, + "eval_msmarco_pairs_steps_per_second": 1.802, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_nq_pairs_loss": 2.956088066101074, + "eval_nq_pairs_runtime": 5.0024, + "eval_nq_pairs_samples_per_second": 29.986, + "eval_nq_pairs_steps_per_second": 1.0, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_trivia_pairs_loss": 3.17364501953125, + "eval_trivia_pairs_runtime": 9.4856, + "eval_trivia_pairs_samples_per_second": 15.813, + "eval_trivia_pairs_steps_per_second": 0.527, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_quora_pairs_loss": 0.763593852519989, + "eval_quora_pairs_runtime": 1.1441, + "eval_quora_pairs_samples_per_second": 131.104, + "eval_quora_pairs_steps_per_second": 4.37, + "step": 3675 + }, + { + "epoch": 0.625638406537283, + "eval_gooaq_pairs_loss": 2.3524909019470215, + "eval_gooaq_pairs_runtime": 2.0161, + "eval_gooaq_pairs_samples_per_second": 74.4, + "eval_gooaq_pairs_steps_per_second": 2.48, + "step": 3675 + }, + { + "epoch": 0.6506639427987743, + "grad_norm": 31.163997650146484, + "learning_rate": 1.2992849846782432e-05, + "loss": 2.7808, + "step": 3822 + }, + { + "epoch": 0.6756894790602656, + "grad_norm": 14.883658409118652, + "learning_rate": 1.3493360572012258e-05, + "loss": 2.5687, + "step": 3969 + }, + { + "epoch": 0.7007150153217568, + "grad_norm": 5.874042987823486, + "learning_rate": 1.3993871297242083e-05, + "loss": 2.3034, + "step": 4116 + }, + { + "epoch": 0.7257405515832482, + "grad_norm": 31.464054107666016, + "learning_rate": 1.4494382022471912e-05, + "loss": 2.4412, + "step": 4263 + }, + { + "epoch": 0.7507660878447395, + "grad_norm": 16.43915367126465, + "learning_rate": 1.4994892747701737e-05, + "loss": 2.3293, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_nli-pairs_loss": 2.3226094245910645, + "eval_nli-pairs_runtime": 4.113, + "eval_nli-pairs_samples_per_second": 36.47, + "eval_nli-pairs_steps_per_second": 1.216, + "eval_sts-test_pearson_cosine": 0.7356971966139032, + "eval_sts-test_pearson_dot": 0.6150809513049869, + "eval_sts-test_pearson_euclidean": 0.7330733579988641, + "eval_sts-test_pearson_manhattan": 0.7423412248131348, + "eval_sts-test_pearson_max": 0.7423412248131348, + "eval_sts-test_spearman_cosine": 0.7121899723082045, + "eval_sts-test_spearman_dot": 0.5926505936679538, + "eval_sts-test_spearman_euclidean": 0.7130179905407037, + "eval_sts-test_spearman_manhattan": 0.7227257562995023, + "eval_sts-test_spearman_max": 0.7227257562995023, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_vitaminc-pairs_loss": 1.7956713438034058, + "eval_vitaminc-pairs_runtime": 2.174, + "eval_vitaminc-pairs_samples_per_second": 68.996, + "eval_vitaminc-pairs_steps_per_second": 2.3, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_qnli-contrastive_loss": 1.0078614950180054, + "eval_qnli-contrastive_runtime": 0.4874, + "eval_qnli-contrastive_samples_per_second": 307.763, + "eval_qnli-contrastive_steps_per_second": 10.259, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_scitail-pairs-qa_loss": 0.36971578001976013, + "eval_scitail-pairs-qa_runtime": 1.164, + "eval_scitail-pairs-qa_samples_per_second": 128.863, + "eval_scitail-pairs-qa_steps_per_second": 4.295, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_scitail-pairs-pos_loss": 1.0497769117355347, + "eval_scitail-pairs-pos_runtime": 2.1205, + "eval_scitail-pairs-pos_samples_per_second": 70.74, + "eval_scitail-pairs-pos_steps_per_second": 2.358, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_xsum-pairs_loss": 1.1691261529922485, + "eval_xsum-pairs_runtime": 2.259, + "eval_xsum-pairs_samples_per_second": 66.401, + "eval_xsum-pairs_steps_per_second": 2.213, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_compression-pairs_loss": 0.5027483105659485, + "eval_compression-pairs_runtime": 0.4403, + "eval_compression-pairs_samples_per_second": 340.682, + "eval_compression-pairs_steps_per_second": 11.356, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_sciq_pairs_loss": 7.823739528656006, + "eval_sciq_pairs_runtime": 7.0738, + "eval_sciq_pairs_samples_per_second": 21.205, + "eval_sciq_pairs_steps_per_second": 0.707, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_qasc_pairs_loss": 6.404655933380127, + "eval_qasc_pairs_runtime": 2.0346, + "eval_qasc_pairs_samples_per_second": 73.723, + "eval_qasc_pairs_steps_per_second": 2.457, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_openbookqa_pairs_loss": 3.857389211654663, + "eval_openbookqa_pairs_runtime": 0.8544, + "eval_openbookqa_pairs_samples_per_second": 120.547, + "eval_openbookqa_pairs_steps_per_second": 4.681, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_msmarco_pairs_loss": 2.7028510570526123, + "eval_msmarco_pairs_runtime": 2.7448, + "eval_msmarco_pairs_samples_per_second": 54.649, + "eval_msmarco_pairs_steps_per_second": 1.822, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_nq_pairs_loss": 2.679351329803467, + "eval_nq_pairs_runtime": 5.067, + "eval_nq_pairs_samples_per_second": 29.603, + "eval_nq_pairs_steps_per_second": 0.987, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_trivia_pairs_loss": 2.8798065185546875, + "eval_trivia_pairs_runtime": 9.5449, + "eval_trivia_pairs_samples_per_second": 15.715, + "eval_trivia_pairs_steps_per_second": 0.524, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_quora_pairs_loss": 0.6825175285339355, + "eval_quora_pairs_runtime": 1.1431, + "eval_quora_pairs_samples_per_second": 131.221, + "eval_quora_pairs_steps_per_second": 4.374, + "step": 4410 + }, + { + "epoch": 0.7507660878447395, + "eval_gooaq_pairs_loss": 2.0472166538238525, + "eval_gooaq_pairs_runtime": 2.0218, + "eval_gooaq_pairs_samples_per_second": 74.191, + "eval_gooaq_pairs_steps_per_second": 2.473, + "step": 4410 + }, + { + "epoch": 0.7757916241062308, + "grad_norm": 4.2425055503845215, + "learning_rate": 1.5495403472931565e-05, + "loss": 2.3651, + "step": 4557 + }, + { + "epoch": 0.8008171603677222, + "grad_norm": 22.42776107788086, + "learning_rate": 1.5995914198161388e-05, + "loss": 2.6296, + "step": 4704 + }, + { + "epoch": 0.8258426966292135, + "grad_norm": 21.169517517089844, + "learning_rate": 1.6496424923391215e-05, + "loss": 2.2108, + "step": 4851 + }, + { + "epoch": 0.8508682328907048, + "grad_norm": 23.326181411743164, + "learning_rate": 1.699693564862104e-05, + "loss": 2.1852, + "step": 4998 + }, + { + "epoch": 0.8758937691521961, + "grad_norm": 24.574176788330078, + "learning_rate": 1.7497446373850868e-05, + "loss": 2.2944, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_nli-pairs_loss": 2.0634915828704834, + "eval_nli-pairs_runtime": 4.0019, + "eval_nli-pairs_samples_per_second": 37.482, + "eval_nli-pairs_steps_per_second": 1.249, + "eval_sts-test_pearson_cosine": 0.7466390532977636, + "eval_sts-test_pearson_dot": 0.612259458274589, + "eval_sts-test_pearson_euclidean": 0.7432536346376271, + "eval_sts-test_pearson_manhattan": 0.7500490179501229, + "eval_sts-test_pearson_max": 0.7500490179501229, + "eval_sts-test_spearman_cosine": 0.728273260456201, + "eval_sts-test_spearman_dot": 0.5960115087190596, + "eval_sts-test_spearman_euclidean": 0.7272394395622148, + "eval_sts-test_spearman_manhattan": 0.7334149564445704, + "eval_sts-test_spearman_max": 0.7334149564445704, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_vitaminc-pairs_loss": 1.638654112815857, + "eval_vitaminc-pairs_runtime": 2.1637, + "eval_vitaminc-pairs_samples_per_second": 69.327, + "eval_vitaminc-pairs_steps_per_second": 2.311, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_qnli-contrastive_loss": 0.9639705419540405, + "eval_qnli-contrastive_runtime": 0.4889, + "eval_qnli-contrastive_samples_per_second": 306.825, + "eval_qnli-contrastive_steps_per_second": 10.228, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_scitail-pairs-qa_loss": 0.31595128774642944, + "eval_scitail-pairs-qa_runtime": 1.1467, + "eval_scitail-pairs-qa_samples_per_second": 130.806, + "eval_scitail-pairs-qa_steps_per_second": 4.36, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_scitail-pairs-pos_loss": 0.9187478423118591, + "eval_scitail-pairs-pos_runtime": 2.1273, + "eval_scitail-pairs-pos_samples_per_second": 70.512, + "eval_scitail-pairs-pos_steps_per_second": 2.35, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_xsum-pairs_loss": 1.060194492340088, + "eval_xsum-pairs_runtime": 2.2836, + "eval_xsum-pairs_samples_per_second": 65.686, + "eval_xsum-pairs_steps_per_second": 2.19, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_compression-pairs_loss": 0.41078585386276245, + "eval_compression-pairs_runtime": 0.4434, + "eval_compression-pairs_samples_per_second": 338.276, + "eval_compression-pairs_steps_per_second": 11.276, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_sciq_pairs_loss": 7.577760696411133, + "eval_sciq_pairs_runtime": 7.1025, + "eval_sciq_pairs_samples_per_second": 21.119, + "eval_sciq_pairs_steps_per_second": 0.704, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_qasc_pairs_loss": 6.353766918182373, + "eval_qasc_pairs_runtime": 2.0113, + "eval_qasc_pairs_samples_per_second": 74.58, + "eval_qasc_pairs_steps_per_second": 2.486, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_openbookqa_pairs_loss": 3.7140932083129883, + "eval_openbookqa_pairs_runtime": 0.8529, + "eval_openbookqa_pairs_samples_per_second": 120.762, + "eval_openbookqa_pairs_steps_per_second": 4.69, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_msmarco_pairs_loss": 2.3862576484680176, + "eval_msmarco_pairs_runtime": 2.8953, + "eval_msmarco_pairs_samples_per_second": 51.808, + "eval_msmarco_pairs_steps_per_second": 1.727, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_nq_pairs_loss": 2.3543190956115723, + "eval_nq_pairs_runtime": 5.0048, + "eval_nq_pairs_samples_per_second": 29.971, + "eval_nq_pairs_steps_per_second": 0.999, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_trivia_pairs_loss": 2.494807481765747, + "eval_trivia_pairs_runtime": 9.5513, + "eval_trivia_pairs_samples_per_second": 15.705, + "eval_trivia_pairs_steps_per_second": 0.523, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_quora_pairs_loss": 0.6137441992759705, + "eval_quora_pairs_runtime": 1.1541, + "eval_quora_pairs_samples_per_second": 129.967, + "eval_quora_pairs_steps_per_second": 4.332, + "step": 5145 + }, + { + "epoch": 0.8758937691521961, + "eval_gooaq_pairs_loss": 1.8279658555984497, + "eval_gooaq_pairs_runtime": 2.0951, + "eval_gooaq_pairs_samples_per_second": 71.595, + "eval_gooaq_pairs_steps_per_second": 2.387, + "step": 5145 + }, + { + "epoch": 0.9009193054136875, + "grad_norm": 10.590804100036621, + "learning_rate": 1.7997957099080695e-05, + "loss": 2.2133, + "step": 5292 + }, + { + "epoch": 0.9259448416751788, + "grad_norm": 18.527711868286133, + "learning_rate": 1.849846782431052e-05, + "loss": 2.2255, + "step": 5439 + }, + { + "epoch": 0.95097037793667, + "grad_norm": 2.617710828781128, + "learning_rate": 1.8995573714674838e-05, + "loss": 2.3502, + "step": 5586 + }, + { + "epoch": 0.9759959141981613, + "grad_norm": 19.551551818847656, + "learning_rate": 1.9496084439904668e-05, + "loss": 1.8964, + "step": 5733 + }, + { + "epoch": 1.0010214504596526, + "grad_norm": 11.783225059509277, + "learning_rate": 1.999319033026898e-05, + "loss": 1.913, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_nli-pairs_loss": 1.9677053689956665, + "eval_nli-pairs_runtime": 4.3863, + "eval_nli-pairs_samples_per_second": 34.198, + "eval_nli-pairs_steps_per_second": 1.14, + "eval_sts-test_pearson_cosine": 0.7531824359441671, + "eval_sts-test_pearson_dot": 0.602579906515822, + "eval_sts-test_pearson_euclidean": 0.7486763477944213, + "eval_sts-test_pearson_manhattan": 0.7566220287347274, + "eval_sts-test_pearson_max": 0.7566220287347274, + "eval_sts-test_spearman_cosine": 0.7387792578665129, + "eval_sts-test_spearman_dot": 0.5926594656319394, + "eval_sts-test_spearman_euclidean": 0.733653805383597, + "eval_sts-test_spearman_manhattan": 0.7420657558603486, + "eval_sts-test_spearman_max": 0.7420657558603486, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_vitaminc-pairs_loss": 1.4394291639328003, + "eval_vitaminc-pairs_runtime": 2.2575, + "eval_vitaminc-pairs_samples_per_second": 66.446, + "eval_vitaminc-pairs_steps_per_second": 2.215, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_qnli-contrastive_loss": 0.45715218782424927, + "eval_qnli-contrastive_runtime": 0.501, + "eval_qnli-contrastive_samples_per_second": 299.385, + "eval_qnli-contrastive_steps_per_second": 9.979, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_scitail-pairs-qa_loss": 0.26679515838623047, + "eval_scitail-pairs-qa_runtime": 1.4342, + "eval_scitail-pairs-qa_samples_per_second": 104.587, + "eval_scitail-pairs-qa_steps_per_second": 3.486, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_scitail-pairs-pos_loss": 0.8628473281860352, + "eval_scitail-pairs-pos_runtime": 2.3485, + "eval_scitail-pairs-pos_samples_per_second": 63.871, + "eval_scitail-pairs-pos_steps_per_second": 2.129, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_xsum-pairs_loss": 0.9014443755149841, + "eval_xsum-pairs_runtime": 2.2896, + "eval_xsum-pairs_samples_per_second": 65.513, + "eval_xsum-pairs_steps_per_second": 2.184, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_compression-pairs_loss": 0.3047434389591217, + "eval_compression-pairs_runtime": 0.4852, + "eval_compression-pairs_samples_per_second": 309.163, + "eval_compression-pairs_steps_per_second": 10.305, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_sciq_pairs_loss": 1.091601848602295, + "eval_sciq_pairs_runtime": 7.3046, + "eval_sciq_pairs_samples_per_second": 20.535, + "eval_sciq_pairs_steps_per_second": 0.684, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_qasc_pairs_loss": 5.947833061218262, + "eval_qasc_pairs_runtime": 2.1787, + "eval_qasc_pairs_samples_per_second": 68.849, + "eval_qasc_pairs_steps_per_second": 2.295, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_openbookqa_pairs_loss": 3.4724366664886475, + "eval_openbookqa_pairs_runtime": 0.9106, + "eval_openbookqa_pairs_samples_per_second": 113.111, + "eval_openbookqa_pairs_steps_per_second": 4.393, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_msmarco_pairs_loss": 2.1638240814208984, + "eval_msmarco_pairs_runtime": 2.82, + "eval_msmarco_pairs_samples_per_second": 53.191, + "eval_msmarco_pairs_steps_per_second": 1.773, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_nq_pairs_loss": 2.110903739929199, + "eval_nq_pairs_runtime": 5.2303, + "eval_nq_pairs_samples_per_second": 28.679, + "eval_nq_pairs_steps_per_second": 0.956, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_trivia_pairs_loss": 2.3711097240448, + "eval_trivia_pairs_runtime": 9.6247, + "eval_trivia_pairs_samples_per_second": 15.585, + "eval_trivia_pairs_steps_per_second": 0.519, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_quora_pairs_loss": 0.5216041803359985, + "eval_quora_pairs_runtime": 1.3072, + "eval_quora_pairs_samples_per_second": 114.749, + "eval_quora_pairs_steps_per_second": 3.825, + "step": 5880 + }, + { + "epoch": 1.0010214504596526, + "eval_gooaq_pairs_loss": 1.7041363716125488, + "eval_gooaq_pairs_runtime": 2.0973, + "eval_gooaq_pairs_samples_per_second": 71.521, + "eval_gooaq_pairs_steps_per_second": 2.384, + "step": 5880 + }, + { + "epoch": 1.026046986721144, + "grad_norm": 17.308378219604492, + "learning_rate": 2.0493701055498808e-05, + "loss": 1.7772, + "step": 6027 + }, + { + "epoch": 1.0510725229826354, + "grad_norm": 20.248981475830078, + "learning_rate": 2.0994211780728634e-05, + "loss": 1.9079, + "step": 6174 + }, + { + "epoch": 1.0760980592441267, + "grad_norm": 6.012618064880371, + "learning_rate": 2.1494722505958464e-05, + "loss": 1.8657, + "step": 6321 + }, + { + "epoch": 1.101123595505618, + "grad_norm": 1.1185024976730347, + "learning_rate": 2.1995233231188288e-05, + "loss": 1.7144, + "step": 6468 + }, + { + "epoch": 1.1261491317671093, + "grad_norm": 1.2436251640319824, + "learning_rate": 2.2495743956418114e-05, + "loss": 1.7661, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_nli-pairs_loss": 1.7907973527908325, + "eval_nli-pairs_runtime": 4.0147, + "eval_nli-pairs_samples_per_second": 37.363, + "eval_nli-pairs_steps_per_second": 1.245, + "eval_sts-test_pearson_cosine": 0.755444461779583, + "eval_sts-test_pearson_dot": 0.5833168145328357, + "eval_sts-test_pearson_euclidean": 0.7437155007996056, + "eval_sts-test_pearson_manhattan": 0.7524938984567344, + "eval_sts-test_pearson_max": 0.755444461779583, + "eval_sts-test_spearman_cosine": 0.7446166596886566, + "eval_sts-test_spearman_dot": 0.5792340720766105, + "eval_sts-test_spearman_euclidean": 0.7317285388028532, + "eval_sts-test_spearman_manhattan": 0.7401637904976945, + "eval_sts-test_spearman_max": 0.7446166596886566, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_vitaminc-pairs_loss": 1.3403607606887817, + "eval_vitaminc-pairs_runtime": 2.168, + "eval_vitaminc-pairs_samples_per_second": 69.189, + "eval_vitaminc-pairs_steps_per_second": 2.306, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_qnli-contrastive_loss": 0.2736852467060089, + "eval_qnli-contrastive_runtime": 0.4913, + "eval_qnli-contrastive_samples_per_second": 305.336, + "eval_qnli-contrastive_steps_per_second": 10.178, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_scitail-pairs-qa_loss": 0.22441554069519043, + "eval_scitail-pairs-qa_runtime": 1.1614, + "eval_scitail-pairs-qa_samples_per_second": 129.152, + "eval_scitail-pairs-qa_steps_per_second": 4.305, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_scitail-pairs-pos_loss": 0.7723743915557861, + "eval_scitail-pairs-pos_runtime": 2.1567, + "eval_scitail-pairs-pos_samples_per_second": 69.55, + "eval_scitail-pairs-pos_steps_per_second": 2.318, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_xsum-pairs_loss": 0.8370540142059326, + "eval_xsum-pairs_runtime": 2.2569, + "eval_xsum-pairs_samples_per_second": 66.463, + "eval_xsum-pairs_steps_per_second": 2.215, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_compression-pairs_loss": 0.265947163105011, + "eval_compression-pairs_runtime": 0.4431, + "eval_compression-pairs_samples_per_second": 338.529, + "eval_compression-pairs_steps_per_second": 11.284, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_sciq_pairs_loss": 0.9383512735366821, + "eval_sciq_pairs_runtime": 7.1464, + "eval_sciq_pairs_samples_per_second": 20.99, + "eval_sciq_pairs_steps_per_second": 0.7, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_qasc_pairs_loss": 5.753899097442627, + "eval_qasc_pairs_runtime": 2.0099, + "eval_qasc_pairs_samples_per_second": 74.63, + "eval_qasc_pairs_steps_per_second": 2.488, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_openbookqa_pairs_loss": 3.3517918586730957, + "eval_openbookqa_pairs_runtime": 0.8594, + "eval_openbookqa_pairs_samples_per_second": 119.858, + "eval_openbookqa_pairs_steps_per_second": 4.655, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_msmarco_pairs_loss": 2.044360399246216, + "eval_msmarco_pairs_runtime": 2.7431, + "eval_msmarco_pairs_samples_per_second": 54.682, + "eval_msmarco_pairs_steps_per_second": 1.823, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_nq_pairs_loss": 1.9409464597702026, + "eval_nq_pairs_runtime": 5.028, + "eval_nq_pairs_samples_per_second": 29.833, + "eval_nq_pairs_steps_per_second": 0.994, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_trivia_pairs_loss": 2.369060754776001, + "eval_trivia_pairs_runtime": 9.5137, + "eval_trivia_pairs_samples_per_second": 15.767, + "eval_trivia_pairs_steps_per_second": 0.526, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_quora_pairs_loss": 0.47849634289741516, + "eval_quora_pairs_runtime": 1.1413, + "eval_quora_pairs_samples_per_second": 131.424, + "eval_quora_pairs_steps_per_second": 4.381, + "step": 6615 + }, + { + "epoch": 1.1261491317671093, + "eval_gooaq_pairs_loss": 1.5795674324035645, + "eval_gooaq_pairs_runtime": 2.0155, + "eval_gooaq_pairs_samples_per_second": 74.422, + "eval_gooaq_pairs_steps_per_second": 2.481, + "step": 6615 + }, + { + "epoch": 1.1511746680286006, + "grad_norm": 20.95261001586914, + "learning_rate": 2.299625468164794e-05, + "loss": 1.8066, + "step": 6762 + }, + { + "epoch": 1.1762002042900919, + "grad_norm": 20.31597900390625, + "learning_rate": 2.3496765406877764e-05, + "loss": 1.7438, + "step": 6909 + }, + { + "epoch": 1.2012257405515832, + "grad_norm": 28.363882064819336, + "learning_rate": 2.399727613210759e-05, + "loss": 2.0231, + "step": 7056 + }, + { + "epoch": 1.2262512768130747, + "grad_norm": 14.403656959533691, + "learning_rate": 2.449778685733742e-05, + "loss": 1.8966, + "step": 7203 + }, + { + "epoch": 1.251276813074566, + "grad_norm": 17.73562240600586, + "learning_rate": 2.4998297582567248e-05, + "loss": 1.7958, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_nli-pairs_loss": 1.5906368494033813, + "eval_nli-pairs_runtime": 4.0261, + "eval_nli-pairs_samples_per_second": 37.257, + "eval_nli-pairs_steps_per_second": 1.242, + "eval_sts-test_pearson_cosine": 0.7626661521495873, + "eval_sts-test_pearson_dot": 0.5632604768989181, + "eval_sts-test_pearson_euclidean": 0.7370060575260952, + "eval_sts-test_pearson_manhattan": 0.7472706980613159, + "eval_sts-test_pearson_max": 0.7626661521495873, + "eval_sts-test_spearman_cosine": 0.7535266725567149, + "eval_sts-test_spearman_dot": 0.5848997224802808, + "eval_sts-test_spearman_euclidean": 0.7290608032903477, + "eval_sts-test_spearman_manhattan": 0.739032087078249, + "eval_sts-test_spearman_max": 0.7535266725567149, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_vitaminc-pairs_loss": 1.222551941871643, + "eval_vitaminc-pairs_runtime": 2.1784, + "eval_vitaminc-pairs_samples_per_second": 68.857, + "eval_vitaminc-pairs_steps_per_second": 2.295, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_qnli-contrastive_loss": 0.3951484262943268, + "eval_qnli-contrastive_runtime": 0.4916, + "eval_qnli-contrastive_samples_per_second": 305.11, + "eval_qnli-contrastive_steps_per_second": 10.17, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_scitail-pairs-qa_loss": 0.17783091962337494, + "eval_scitail-pairs-qa_runtime": 1.1549, + "eval_scitail-pairs-qa_samples_per_second": 129.88, + "eval_scitail-pairs-qa_steps_per_second": 4.329, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_scitail-pairs-pos_loss": 0.7214661836624146, + "eval_scitail-pairs-pos_runtime": 2.132, + "eval_scitail-pairs-pos_samples_per_second": 70.357, + "eval_scitail-pairs-pos_steps_per_second": 2.345, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_xsum-pairs_loss": 0.7919928431510925, + "eval_xsum-pairs_runtime": 2.2579, + "eval_xsum-pairs_samples_per_second": 66.432, + "eval_xsum-pairs_steps_per_second": 2.214, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_compression-pairs_loss": 0.24975377321243286, + "eval_compression-pairs_runtime": 0.447, + "eval_compression-pairs_samples_per_second": 335.534, + "eval_compression-pairs_steps_per_second": 11.184, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_sciq_pairs_loss": 0.8343773484230042, + "eval_sciq_pairs_runtime": 7.1288, + "eval_sciq_pairs_samples_per_second": 21.042, + "eval_sciq_pairs_steps_per_second": 0.701, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_qasc_pairs_loss": 5.4840240478515625, + "eval_qasc_pairs_runtime": 2.025, + "eval_qasc_pairs_samples_per_second": 74.074, + "eval_qasc_pairs_steps_per_second": 2.469, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_openbookqa_pairs_loss": 3.1631176471710205, + "eval_openbookqa_pairs_runtime": 0.8612, + "eval_openbookqa_pairs_samples_per_second": 119.598, + "eval_openbookqa_pairs_steps_per_second": 4.645, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_msmarco_pairs_loss": 1.8952231407165527, + "eval_msmarco_pairs_runtime": 2.7585, + "eval_msmarco_pairs_samples_per_second": 54.378, + "eval_msmarco_pairs_steps_per_second": 1.813, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_nq_pairs_loss": 1.6934970617294312, + "eval_nq_pairs_runtime": 5.0253, + "eval_nq_pairs_samples_per_second": 29.849, + "eval_nq_pairs_steps_per_second": 0.995, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_trivia_pairs_loss": 1.9966663122177124, + "eval_trivia_pairs_runtime": 9.5675, + "eval_trivia_pairs_samples_per_second": 15.678, + "eval_trivia_pairs_steps_per_second": 0.523, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_quora_pairs_loss": 0.405385285615921, + "eval_quora_pairs_runtime": 1.1432, + "eval_quora_pairs_samples_per_second": 131.209, + "eval_quora_pairs_steps_per_second": 4.374, + "step": 7350 + }, + { + "epoch": 1.251276813074566, + "eval_gooaq_pairs_loss": 1.3951071500778198, + "eval_gooaq_pairs_runtime": 2.038, + "eval_gooaq_pairs_samples_per_second": 73.601, + "eval_gooaq_pairs_steps_per_second": 2.453, + "step": 7350 + }, + { + "epoch": 1.2763023493360572, + "grad_norm": 21.254159927368164, + "learning_rate": 2.549880830779707e-05, + "loss": 1.5109, + "step": 7497 + }, + { + "epoch": 1.3013278855975485, + "grad_norm": 20.08012580871582, + "learning_rate": 2.5999319033026898e-05, + "loss": 1.8119, + "step": 7644 + }, + { + "epoch": 1.3263534218590398, + "grad_norm": 0.6448306441307068, + "learning_rate": 2.6499829758256724e-05, + "loss": 1.6833, + "step": 7791 + }, + { + "epoch": 1.351378958120531, + "grad_norm": 16.65821647644043, + "learning_rate": 2.7000340483486554e-05, + "loss": 1.5917, + "step": 7938 + }, + { + "epoch": 1.3764044943820224, + "grad_norm": 14.949362754821777, + "learning_rate": 2.7500851208716378e-05, + "loss": 1.809, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_nli-pairs_loss": 1.5967836380004883, + "eval_nli-pairs_runtime": 4.0496, + "eval_nli-pairs_samples_per_second": 37.041, + "eval_nli-pairs_steps_per_second": 1.235, + "eval_sts-test_pearson_cosine": 0.7653416933913197, + "eval_sts-test_pearson_dot": 0.5401711611334493, + "eval_sts-test_pearson_euclidean": 0.7529907774019836, + "eval_sts-test_pearson_manhattan": 0.7605105025260754, + "eval_sts-test_pearson_max": 0.7653416933913197, + "eval_sts-test_spearman_cosine": 0.7593865234485873, + "eval_sts-test_spearman_dot": 0.5559615063301898, + "eval_sts-test_spearman_euclidean": 0.7436431053840061, + "eval_sts-test_spearman_manhattan": 0.7515978828464567, + "eval_sts-test_spearman_max": 0.7593865234485873, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_vitaminc-pairs_loss": 1.1434590816497803, + "eval_vitaminc-pairs_runtime": 2.2066, + "eval_vitaminc-pairs_samples_per_second": 67.977, + "eval_vitaminc-pairs_steps_per_second": 2.266, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_qnli-contrastive_loss": 0.3819103538990021, + "eval_qnli-contrastive_runtime": 0.4972, + "eval_qnli-contrastive_samples_per_second": 301.706, + "eval_qnli-contrastive_steps_per_second": 10.057, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_scitail-pairs-qa_loss": 0.15774373710155487, + "eval_scitail-pairs-qa_runtime": 1.1704, + "eval_scitail-pairs-qa_samples_per_second": 128.161, + "eval_scitail-pairs-qa_steps_per_second": 4.272, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_scitail-pairs-pos_loss": 0.6571963429450989, + "eval_scitail-pairs-pos_runtime": 2.1634, + "eval_scitail-pairs-pos_samples_per_second": 69.335, + "eval_scitail-pairs-pos_steps_per_second": 2.311, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_xsum-pairs_loss": 0.7028753757476807, + "eval_xsum-pairs_runtime": 2.2608, + "eval_xsum-pairs_samples_per_second": 66.347, + "eval_xsum-pairs_steps_per_second": 2.212, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_compression-pairs_loss": 0.23010987043380737, + "eval_compression-pairs_runtime": 0.4514, + "eval_compression-pairs_samples_per_second": 332.284, + "eval_compression-pairs_steps_per_second": 11.076, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_sciq_pairs_loss": 0.799666702747345, + "eval_sciq_pairs_runtime": 7.1816, + "eval_sciq_pairs_samples_per_second": 20.887, + "eval_sciq_pairs_steps_per_second": 0.696, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_qasc_pairs_loss": 5.433376789093018, + "eval_qasc_pairs_runtime": 2.0592, + "eval_qasc_pairs_samples_per_second": 72.843, + "eval_qasc_pairs_steps_per_second": 2.428, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_openbookqa_pairs_loss": 2.9010672569274902, + "eval_openbookqa_pairs_runtime": 0.865, + "eval_openbookqa_pairs_samples_per_second": 119.074, + "eval_openbookqa_pairs_steps_per_second": 4.624, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_msmarco_pairs_loss": 1.7567836046218872, + "eval_msmarco_pairs_runtime": 2.7812, + "eval_msmarco_pairs_samples_per_second": 53.933, + "eval_msmarco_pairs_steps_per_second": 1.798, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_nq_pairs_loss": 1.5407707691192627, + "eval_nq_pairs_runtime": 5.0607, + "eval_nq_pairs_samples_per_second": 29.64, + "eval_nq_pairs_steps_per_second": 0.988, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_trivia_pairs_loss": 1.8419283628463745, + "eval_trivia_pairs_runtime": 9.5535, + "eval_trivia_pairs_samples_per_second": 15.701, + "eval_trivia_pairs_steps_per_second": 0.523, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_quora_pairs_loss": 0.3561370372772217, + "eval_quora_pairs_runtime": 1.2005, + "eval_quora_pairs_samples_per_second": 124.946, + "eval_quora_pairs_steps_per_second": 4.165, + "step": 8085 + }, + { + "epoch": 1.3764044943820224, + "eval_gooaq_pairs_loss": 1.1745914220809937, + "eval_gooaq_pairs_runtime": 2.0463, + "eval_gooaq_pairs_samples_per_second": 73.305, + "eval_gooaq_pairs_steps_per_second": 2.443, + "step": 8085 + }, + { + "epoch": 1.401430030643514, + "grad_norm": 14.31106185913086, + "learning_rate": 2.8001361933946204e-05, + "loss": 1.5561, + "step": 8232 + }, + { + "epoch": 1.4264555669050052, + "grad_norm": 11.82392692565918, + "learning_rate": 2.850187265917603e-05, + "loss": 1.5325, + "step": 8379 + }, + { + "epoch": 1.4514811031664965, + "grad_norm": 21.716449737548828, + "learning_rate": 2.9002383384405858e-05, + "loss": 1.5085, + "step": 8526 + }, + { + "epoch": 1.4765066394279878, + "grad_norm": 6.5607147216796875, + "learning_rate": 2.950289410963568e-05, + "loss": 1.5634, + "step": 8673 + }, + { + "epoch": 1.501532175689479, + "grad_norm": 8.737595558166504, + "learning_rate": 2.9999998423842776e-05, + "loss": 1.3857, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_nli-pairs_loss": 1.454946517944336, + "eval_nli-pairs_runtime": 4.3786, + "eval_nli-pairs_samples_per_second": 34.257, + "eval_nli-pairs_steps_per_second": 1.142, + "eval_sts-test_pearson_cosine": 0.758856517299588, + "eval_sts-test_pearson_dot": 0.5254244903711445, + "eval_sts-test_pearson_euclidean": 0.7467439510002647, + "eval_sts-test_pearson_manhattan": 0.7525779346304055, + "eval_sts-test_pearson_max": 0.758856517299588, + "eval_sts-test_spearman_cosine": 0.7596605816446022, + "eval_sts-test_spearman_dot": 0.5600186533991508, + "eval_sts-test_spearman_euclidean": 0.7367598380547504, + "eval_sts-test_spearman_manhattan": 0.7440123650923844, + "eval_sts-test_spearman_max": 0.7596605816446022, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_vitaminc-pairs_loss": 1.1383781433105469, + "eval_vitaminc-pairs_runtime": 2.3314, + "eval_vitaminc-pairs_samples_per_second": 64.34, + "eval_vitaminc-pairs_steps_per_second": 2.145, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_qnli-contrastive_loss": 0.32092100381851196, + "eval_qnli-contrastive_runtime": 0.5002, + "eval_qnli-contrastive_samples_per_second": 299.881, + "eval_qnli-contrastive_steps_per_second": 9.996, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_scitail-pairs-qa_loss": 0.14513270556926727, + "eval_scitail-pairs-qa_runtime": 1.5154, + "eval_scitail-pairs-qa_samples_per_second": 98.985, + "eval_scitail-pairs-qa_steps_per_second": 3.3, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_scitail-pairs-pos_loss": 0.6857669353485107, + "eval_scitail-pairs-pos_runtime": 2.4178, + "eval_scitail-pairs-pos_samples_per_second": 62.041, + "eval_scitail-pairs-pos_steps_per_second": 2.068, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_xsum-pairs_loss": 0.683724045753479, + "eval_xsum-pairs_runtime": 2.2766, + "eval_xsum-pairs_samples_per_second": 65.887, + "eval_xsum-pairs_steps_per_second": 2.196, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_compression-pairs_loss": 0.20896266400814056, + "eval_compression-pairs_runtime": 0.4683, + "eval_compression-pairs_samples_per_second": 320.274, + "eval_compression-pairs_steps_per_second": 10.676, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_sciq_pairs_loss": 0.7911179661750793, + "eval_sciq_pairs_runtime": 7.3506, + "eval_sciq_pairs_samples_per_second": 20.407, + "eval_sciq_pairs_steps_per_second": 0.68, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_qasc_pairs_loss": 5.3092241287231445, + "eval_qasc_pairs_runtime": 2.1926, + "eval_qasc_pairs_samples_per_second": 68.411, + "eval_qasc_pairs_steps_per_second": 2.28, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_openbookqa_pairs_loss": 2.923464298248291, + "eval_openbookqa_pairs_runtime": 0.963, + "eval_openbookqa_pairs_samples_per_second": 106.961, + "eval_openbookqa_pairs_steps_per_second": 4.154, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_msmarco_pairs_loss": 1.674107313156128, + "eval_msmarco_pairs_runtime": 2.8516, + "eval_msmarco_pairs_samples_per_second": 52.602, + "eval_msmarco_pairs_steps_per_second": 1.753, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_nq_pairs_loss": 1.4419037103652954, + "eval_nq_pairs_runtime": 5.1485, + "eval_nq_pairs_samples_per_second": 29.135, + "eval_nq_pairs_steps_per_second": 0.971, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_trivia_pairs_loss": 1.7546964883804321, + "eval_trivia_pairs_runtime": 9.6901, + "eval_trivia_pairs_samples_per_second": 15.48, + "eval_trivia_pairs_steps_per_second": 0.516, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_quora_pairs_loss": 0.31785744428634644, + "eval_quora_pairs_runtime": 1.2699, + "eval_quora_pairs_samples_per_second": 118.117, + "eval_quora_pairs_steps_per_second": 3.937, + "step": 8820 + }, + { + "epoch": 1.501532175689479, + "eval_gooaq_pairs_loss": 1.1328644752502441, + "eval_gooaq_pairs_runtime": 2.1292, + "eval_gooaq_pairs_samples_per_second": 70.448, + "eval_gooaq_pairs_steps_per_second": 2.348, + "step": 8820 + }, + { + "epoch": 1.5265577119509703, + "grad_norm": 15.168123245239258, + "learning_rate": 2.9965489092992677e-05, + "loss": 1.6167, + "step": 8967 + }, + { + "epoch": 1.5515832482124616, + "grad_norm": 29.32268524169922, + "learning_rate": 2.9863043834895476e-05, + "loss": 1.6664, + "step": 9114 + }, + { + "epoch": 1.5766087844739531, + "grad_norm": 2.5159287452697754, + "learning_rate": 2.9693127700413034e-05, + "loss": 1.4785, + "step": 9261 + }, + { + "epoch": 1.6016343207354442, + "grad_norm": 17.4219970703125, + "learning_rate": 2.9456512024854113e-05, + "loss": 1.5881, + "step": 9408 + }, + { + "epoch": 1.6266598569969357, + "grad_norm": 15.60139274597168, + "learning_rate": 2.915427092649312e-05, + "loss": 1.3379, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_nli-pairs_loss": 1.3879741430282593, + "eval_nli-pairs_runtime": 4.1363, + "eval_nli-pairs_samples_per_second": 36.264, + "eval_nli-pairs_steps_per_second": 1.209, + "eval_sts-test_pearson_cosine": 0.7733483283639441, + "eval_sts-test_pearson_dot": 0.5424296843493538, + "eval_sts-test_pearson_euclidean": 0.7555770040784449, + "eval_sts-test_pearson_manhattan": 0.7604742759594404, + "eval_sts-test_pearson_max": 0.7733483283639441, + "eval_sts-test_spearman_cosine": 0.779671933510953, + "eval_sts-test_spearman_dot": 0.5784449139725663, + "eval_sts-test_spearman_euclidean": 0.7515003599642571, + "eval_sts-test_spearman_manhattan": 0.7568440288585417, + "eval_sts-test_spearman_max": 0.779671933510953, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_vitaminc-pairs_loss": 0.9942379593849182, + "eval_vitaminc-pairs_runtime": 2.2185, + "eval_vitaminc-pairs_samples_per_second": 67.613, + "eval_vitaminc-pairs_steps_per_second": 2.254, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_qnli-contrastive_loss": 0.2178214192390442, + "eval_qnli-contrastive_runtime": 0.4991, + "eval_qnli-contrastive_samples_per_second": 300.549, + "eval_qnli-contrastive_steps_per_second": 10.018, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_scitail-pairs-qa_loss": 0.13629749417304993, + "eval_scitail-pairs-qa_runtime": 1.1751, + "eval_scitail-pairs-qa_samples_per_second": 127.653, + "eval_scitail-pairs-qa_steps_per_second": 4.255, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_scitail-pairs-pos_loss": 0.5964671969413757, + "eval_scitail-pairs-pos_runtime": 2.1841, + "eval_scitail-pairs-pos_samples_per_second": 68.677, + "eval_scitail-pairs-pos_steps_per_second": 2.289, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_xsum-pairs_loss": 0.6746851205825806, + "eval_xsum-pairs_runtime": 2.2628, + "eval_xsum-pairs_samples_per_second": 66.291, + "eval_xsum-pairs_steps_per_second": 2.21, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_compression-pairs_loss": 0.17857055366039276, + "eval_compression-pairs_runtime": 0.4506, + "eval_compression-pairs_samples_per_second": 332.902, + "eval_compression-pairs_steps_per_second": 11.097, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_sciq_pairs_loss": 0.7349148988723755, + "eval_sciq_pairs_runtime": 7.116, + "eval_sciq_pairs_samples_per_second": 21.079, + "eval_sciq_pairs_steps_per_second": 0.703, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_qasc_pairs_loss": 5.115650177001953, + "eval_qasc_pairs_runtime": 2.0271, + "eval_qasc_pairs_samples_per_second": 73.997, + "eval_qasc_pairs_steps_per_second": 2.467, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_openbookqa_pairs_loss": 2.694535255432129, + "eval_openbookqa_pairs_runtime": 0.8634, + "eval_openbookqa_pairs_samples_per_second": 119.302, + "eval_openbookqa_pairs_steps_per_second": 4.633, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_msmarco_pairs_loss": 1.5184054374694824, + "eval_msmarco_pairs_runtime": 2.7561, + "eval_msmarco_pairs_samples_per_second": 54.424, + "eval_msmarco_pairs_steps_per_second": 1.814, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_nq_pairs_loss": 1.293426752090454, + "eval_nq_pairs_runtime": 5.0107, + "eval_nq_pairs_samples_per_second": 29.936, + "eval_nq_pairs_steps_per_second": 0.998, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_trivia_pairs_loss": 1.5939557552337646, + "eval_trivia_pairs_runtime": 9.5368, + "eval_trivia_pairs_samples_per_second": 15.728, + "eval_trivia_pairs_steps_per_second": 0.524, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_quora_pairs_loss": 0.31308451294898987, + "eval_quora_pairs_runtime": 1.1456, + "eval_quora_pairs_samples_per_second": 130.932, + "eval_quora_pairs_steps_per_second": 4.364, + "step": 9555 + }, + { + "epoch": 1.6266598569969357, + "eval_gooaq_pairs_loss": 1.0807112455368042, + "eval_gooaq_pairs_runtime": 2.0197, + "eval_gooaq_pairs_samples_per_second": 74.269, + "eval_gooaq_pairs_steps_per_second": 2.476, + "step": 9555 + }, + { + "epoch": 1.651685393258427, + "grad_norm": 0.7546759843826294, + "learning_rate": 2.878777643060379e-05, + "loss": 1.4469, + "step": 9702 + }, + { + "epoch": 1.6767109295199183, + "grad_norm": 0.8483991026878357, + "learning_rate": 2.835869224114224e-05, + "loss": 1.3878, + "step": 9849 + }, + { + "epoch": 1.7017364657814096, + "grad_norm": 20.814105987548828, + "learning_rate": 2.7868966188352908e-05, + "loss": 1.2764, + "step": 9996 + }, + { + "epoch": 1.7267620020429009, + "grad_norm": 3.1025094985961914, + "learning_rate": 2.73208213865815e-05, + "loss": 1.3884, + "step": 10143 + }, + { + "epoch": 1.7517875383043924, + "grad_norm": 14.80810260772705, + "learning_rate": 2.671674614243416e-05, + "loss": 1.2977, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_nli-pairs_loss": 1.3081562519073486, + "eval_nli-pairs_runtime": 4.0165, + "eval_nli-pairs_samples_per_second": 37.346, + "eval_nli-pairs_steps_per_second": 1.245, + "eval_sts-test_pearson_cosine": 0.7681143802843627, + "eval_sts-test_pearson_dot": 0.5287526695750702, + "eval_sts-test_pearson_euclidean": 0.7538805205317111, + "eval_sts-test_pearson_manhattan": 0.7596894203751682, + "eval_sts-test_pearson_max": 0.7681143802843627, + "eval_sts-test_spearman_cosine": 0.770908506196058, + "eval_sts-test_spearman_dot": 0.5670572774538138, + "eval_sts-test_spearman_euclidean": 0.7452730842318486, + "eval_sts-test_spearman_manhattan": 0.7517699916174685, + "eval_sts-test_spearman_max": 0.770908506196058, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_vitaminc-pairs_loss": 0.9676446318626404, + "eval_vitaminc-pairs_runtime": 2.1787, + "eval_vitaminc-pairs_samples_per_second": 68.85, + "eval_vitaminc-pairs_steps_per_second": 2.295, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_qnli-contrastive_loss": 0.244391530752182, + "eval_qnli-contrastive_runtime": 0.4884, + "eval_qnli-contrastive_samples_per_second": 307.113, + "eval_qnli-contrastive_steps_per_second": 10.237, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_scitail-pairs-qa_loss": 0.1264333575963974, + "eval_scitail-pairs-qa_runtime": 1.1536, + "eval_scitail-pairs-qa_samples_per_second": 130.03, + "eval_scitail-pairs-qa_steps_per_second": 4.334, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_scitail-pairs-pos_loss": 0.5472012162208557, + "eval_scitail-pairs-pos_runtime": 2.1213, + "eval_scitail-pairs-pos_samples_per_second": 70.711, + "eval_scitail-pairs-pos_steps_per_second": 2.357, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_xsum-pairs_loss": 0.5869634747505188, + "eval_xsum-pairs_runtime": 2.2876, + "eval_xsum-pairs_samples_per_second": 65.571, + "eval_xsum-pairs_steps_per_second": 2.186, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_compression-pairs_loss": 0.16663199663162231, + "eval_compression-pairs_runtime": 0.4431, + "eval_compression-pairs_samples_per_second": 338.526, + "eval_compression-pairs_steps_per_second": 11.284, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_sciq_pairs_loss": 0.6884138584136963, + "eval_sciq_pairs_runtime": 7.0451, + "eval_sciq_pairs_samples_per_second": 21.291, + "eval_sciq_pairs_steps_per_second": 0.71, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_qasc_pairs_loss": 5.099090099334717, + "eval_qasc_pairs_runtime": 2.0309, + "eval_qasc_pairs_samples_per_second": 73.86, + "eval_qasc_pairs_steps_per_second": 2.462, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_openbookqa_pairs_loss": 2.6562159061431885, + "eval_openbookqa_pairs_runtime": 0.8531, + "eval_openbookqa_pairs_samples_per_second": 120.74, + "eval_openbookqa_pairs_steps_per_second": 4.689, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_msmarco_pairs_loss": 1.3729219436645508, + "eval_msmarco_pairs_runtime": 2.7346, + "eval_msmarco_pairs_samples_per_second": 54.853, + "eval_msmarco_pairs_steps_per_second": 1.828, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_nq_pairs_loss": 1.2174726724624634, + "eval_nq_pairs_runtime": 4.9981, + "eval_nq_pairs_samples_per_second": 30.012, + "eval_nq_pairs_steps_per_second": 1.0, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_trivia_pairs_loss": 1.5839861631393433, + "eval_trivia_pairs_runtime": 9.4611, + "eval_trivia_pairs_samples_per_second": 15.854, + "eval_trivia_pairs_steps_per_second": 0.528, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_quora_pairs_loss": 0.2804078757762909, + "eval_quora_pairs_runtime": 1.1799, + "eval_quora_pairs_samples_per_second": 127.13, + "eval_quora_pairs_steps_per_second": 4.238, + "step": 10290 + }, + { + "epoch": 1.7517875383043924, + "eval_gooaq_pairs_loss": 0.9541385769844055, + "eval_gooaq_pairs_runtime": 2.1014, + "eval_gooaq_pairs_samples_per_second": 71.38, + "eval_gooaq_pairs_steps_per_second": 2.379, + "step": 10290 + }, + { + "epoch": 1.7768130745658834, + "grad_norm": 51.763004302978516, + "learning_rate": 2.6059482659094694e-05, + "loss": 1.4422, + "step": 10437 + }, + { + "epoch": 1.801838610827375, + "grad_norm": 3.4887988567352295, + "learning_rate": 2.5352014588076858e-05, + "loss": 1.4997, + "step": 10584 + }, + { + "epoch": 1.8268641470888662, + "grad_norm": 6.360722064971924, + "learning_rate": 2.4597553484920438e-05, + "loss": 1.2797, + "step": 10731 + }, + { + "epoch": 1.8518896833503575, + "grad_norm": 16.216428756713867, + "learning_rate": 2.3799524230315696e-05, + "loss": 1.2362, + "step": 10878 + }, + { + "epoch": 1.8769152196118488, + "grad_norm": 19.113628387451172, + "learning_rate": 2.2961549482836967e-05, + "loss": 1.2799, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_nli-pairs_loss": 1.2669230699539185, + "eval_nli-pairs_runtime": 4.0111, + "eval_nli-pairs_samples_per_second": 37.396, + "eval_nli-pairs_steps_per_second": 1.247, + "eval_sts-test_pearson_cosine": 0.774489523257569, + "eval_sts-test_pearson_dot": 0.5150859135257536, + "eval_sts-test_pearson_euclidean": 0.7570251269629877, + "eval_sts-test_pearson_manhattan": 0.7623769541465137, + "eval_sts-test_pearson_max": 0.774489523257569, + "eval_sts-test_spearman_cosine": 0.7816800005074528, + "eval_sts-test_spearman_dot": 0.565603897190929, + "eval_sts-test_spearman_euclidean": 0.7507848233553155, + "eval_sts-test_spearman_manhattan": 0.756029656784038, + "eval_sts-test_spearman_max": 0.7816800005074528, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_vitaminc-pairs_loss": 0.875577986240387, + "eval_vitaminc-pairs_runtime": 2.2185, + "eval_vitaminc-pairs_samples_per_second": 67.614, + "eval_vitaminc-pairs_steps_per_second": 2.254, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_qnli-contrastive_loss": 0.23095794022083282, + "eval_qnli-contrastive_runtime": 0.4906, + "eval_qnli-contrastive_samples_per_second": 305.756, + "eval_qnli-contrastive_steps_per_second": 10.192, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_scitail-pairs-qa_loss": 0.11762743443250656, + "eval_scitail-pairs-qa_runtime": 1.1505, + "eval_scitail-pairs-qa_samples_per_second": 130.379, + "eval_scitail-pairs-qa_steps_per_second": 4.346, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_scitail-pairs-pos_loss": 0.5055103898048401, + "eval_scitail-pairs-pos_runtime": 2.1912, + "eval_scitail-pairs-pos_samples_per_second": 68.456, + "eval_scitail-pairs-pos_steps_per_second": 2.282, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_xsum-pairs_loss": 0.5941822528839111, + "eval_xsum-pairs_runtime": 2.26, + "eval_xsum-pairs_samples_per_second": 66.371, + "eval_xsum-pairs_steps_per_second": 2.212, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_compression-pairs_loss": 0.16561630368232727, + "eval_compression-pairs_runtime": 0.4447, + "eval_compression-pairs_samples_per_second": 337.281, + "eval_compression-pairs_steps_per_second": 11.243, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_sciq_pairs_loss": 0.6859617233276367, + "eval_sciq_pairs_runtime": 7.2855, + "eval_sciq_pairs_samples_per_second": 20.589, + "eval_sciq_pairs_steps_per_second": 0.686, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_qasc_pairs_loss": 4.979205131530762, + "eval_qasc_pairs_runtime": 2.0332, + "eval_qasc_pairs_samples_per_second": 73.775, + "eval_qasc_pairs_steps_per_second": 2.459, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_openbookqa_pairs_loss": 2.5103061199188232, + "eval_openbookqa_pairs_runtime": 0.8673, + "eval_openbookqa_pairs_samples_per_second": 118.755, + "eval_openbookqa_pairs_steps_per_second": 4.612, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_msmarco_pairs_loss": 1.2753304243087769, + "eval_msmarco_pairs_runtime": 2.7942, + "eval_msmarco_pairs_samples_per_second": 53.683, + "eval_msmarco_pairs_steps_per_second": 1.789, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_nq_pairs_loss": 1.057248592376709, + "eval_nq_pairs_runtime": 5.0749, + "eval_nq_pairs_samples_per_second": 29.557, + "eval_nq_pairs_steps_per_second": 0.985, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_trivia_pairs_loss": 1.4893617630004883, + "eval_trivia_pairs_runtime": 9.5535, + "eval_trivia_pairs_samples_per_second": 15.701, + "eval_trivia_pairs_steps_per_second": 0.523, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_quora_pairs_loss": 0.27783504128456116, + "eval_quora_pairs_runtime": 1.1843, + "eval_quora_pairs_samples_per_second": 126.653, + "eval_quora_pairs_steps_per_second": 4.222, + "step": 11025 + }, + { + "epoch": 1.8769152196118488, + "eval_gooaq_pairs_loss": 0.8971360325813293, + "eval_gooaq_pairs_runtime": 2.0278, + "eval_gooaq_pairs_samples_per_second": 73.97, + "eval_gooaq_pairs_steps_per_second": 2.466, + "step": 11025 + }, + { + "epoch": 1.90194075587334, + "grad_norm": 14.915979385375977, + "learning_rate": 2.2087433233862403e-05, + "loss": 1.2292, + "step": 11172 + }, + { + "epoch": 1.9269662921348316, + "grad_norm": 13.753366470336914, + "learning_rate": 2.118740830659258e-05, + "loss": 1.0362, + "step": 11319 + }, + { + "epoch": 1.9519918283963227, + "grad_norm": 8.33267593383789, + "learning_rate": 2.0259676306932596e-05, + "loss": 1.1851, + "step": 11466 + }, + { + "epoch": 1.9770173646578142, + "grad_norm": 0.6671110987663269, + "learning_rate": 1.9301804508269106e-05, + "loss": 1.0248, + "step": 11613 + }, + { + "epoch": 2.0020429009193053, + "grad_norm": 71.1603775024414, + "learning_rate": 1.8331109675851356e-05, + "loss": 1.1305, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_nli-pairs_loss": 1.1981595754623413, + "eval_nli-pairs_runtime": 4.4194, + "eval_nli-pairs_samples_per_second": 33.941, + "eval_nli-pairs_steps_per_second": 1.131, + "eval_sts-test_pearson_cosine": 0.7691168917727959, + "eval_sts-test_pearson_dot": 0.5009080741883037, + "eval_sts-test_pearson_euclidean": 0.7565965590806436, + "eval_sts-test_pearson_manhattan": 0.7607578912460005, + "eval_sts-test_pearson_max": 0.7691168917727959, + "eval_sts-test_spearman_cosine": 0.7788020160239207, + "eval_sts-test_spearman_dot": 0.5543439729717182, + "eval_sts-test_spearman_euclidean": 0.7507099854871488, + "eval_sts-test_spearman_manhattan": 0.7550850801051086, + "eval_sts-test_spearman_max": 0.7788020160239207, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_vitaminc-pairs_loss": 0.8724684715270996, + "eval_vitaminc-pairs_runtime": 2.2855, + "eval_vitaminc-pairs_samples_per_second": 65.632, + "eval_vitaminc-pairs_steps_per_second": 2.188, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_qnli-contrastive_loss": 0.1063760370016098, + "eval_qnli-contrastive_runtime": 0.5211, + "eval_qnli-contrastive_samples_per_second": 287.861, + "eval_qnli-contrastive_steps_per_second": 9.595, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_scitail-pairs-qa_loss": 0.11115950345993042, + "eval_scitail-pairs-qa_runtime": 1.2652, + "eval_scitail-pairs-qa_samples_per_second": 118.561, + "eval_scitail-pairs-qa_steps_per_second": 3.952, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_scitail-pairs-pos_loss": 0.5056447386741638, + "eval_scitail-pairs-pos_runtime": 2.3265, + "eval_scitail-pairs-pos_samples_per_second": 64.475, + "eval_scitail-pairs-pos_steps_per_second": 2.149, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_xsum-pairs_loss": 0.5417940020561218, + "eval_xsum-pairs_runtime": 2.2651, + "eval_xsum-pairs_samples_per_second": 66.224, + "eval_xsum-pairs_steps_per_second": 2.207, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_compression-pairs_loss": 0.13307414948940277, + "eval_compression-pairs_runtime": 0.4568, + "eval_compression-pairs_samples_per_second": 328.338, + "eval_compression-pairs_steps_per_second": 10.945, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_sciq_pairs_loss": 0.5748575329780579, + "eval_sciq_pairs_runtime": 7.352, + "eval_sciq_pairs_samples_per_second": 20.403, + "eval_sciq_pairs_steps_per_second": 0.68, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_qasc_pairs_loss": 4.8620710372924805, + "eval_qasc_pairs_runtime": 2.2185, + "eval_qasc_pairs_samples_per_second": 67.614, + "eval_qasc_pairs_steps_per_second": 2.254, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_openbookqa_pairs_loss": 2.3957626819610596, + "eval_openbookqa_pairs_runtime": 0.9894, + "eval_openbookqa_pairs_samples_per_second": 104.099, + "eval_openbookqa_pairs_steps_per_second": 4.043, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_msmarco_pairs_loss": 1.2221691608428955, + "eval_msmarco_pairs_runtime": 2.8364, + "eval_msmarco_pairs_samples_per_second": 52.883, + "eval_msmarco_pairs_steps_per_second": 1.763, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_nq_pairs_loss": 1.056867241859436, + "eval_nq_pairs_runtime": 5.123, + "eval_nq_pairs_samples_per_second": 29.28, + "eval_nq_pairs_steps_per_second": 0.976, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_trivia_pairs_loss": 1.5130479335784912, + "eval_trivia_pairs_runtime": 9.628, + "eval_trivia_pairs_samples_per_second": 15.58, + "eval_trivia_pairs_steps_per_second": 0.519, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_quora_pairs_loss": 0.2696760296821594, + "eval_quora_pairs_runtime": 1.2258, + "eval_quora_pairs_samples_per_second": 122.368, + "eval_quora_pairs_steps_per_second": 4.079, + "step": 11760 + }, + { + "epoch": 2.0020429009193053, + "eval_gooaq_pairs_loss": 0.8714584112167358, + "eval_gooaq_pairs_runtime": 2.1309, + "eval_gooaq_pairs_samples_per_second": 70.391, + "eval_gooaq_pairs_steps_per_second": 2.346, + "step": 11760 + }, + { + "epoch": 2.0270684371807968, + "grad_norm": 0.7177102565765381, + "learning_rate": 1.7338706161920983e-05, + "loss": 0.9284, + "step": 11907 + }, + { + "epoch": 2.052093973442288, + "grad_norm": 11.534607887268066, + "learning_rate": 1.633568607738064e-05, + "loss": 1.0998, + "step": 12054 + }, + { + "epoch": 2.0771195097037793, + "grad_norm": 13.443835258483887, + "learning_rate": 1.5326602637903215e-05, + "loss": 1.1181, + "step": 12201 + }, + { + "epoch": 2.102145045965271, + "grad_norm": 1.4795461893081665, + "learning_rate": 1.431603658379759e-05, + "loss": 0.9978, + "step": 12348 + }, + { + "epoch": 2.127170582226762, + "grad_norm": 13.516138076782227, + "learning_rate": 1.3308575385710644e-05, + "loss": 1.0565, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_nli-pairs_loss": 1.210241675376892, + "eval_nli-pairs_runtime": 4.0103, + "eval_nli-pairs_samples_per_second": 37.404, + "eval_nli-pairs_steps_per_second": 1.247, + "eval_sts-test_pearson_cosine": 0.7762452815355265, + "eval_sts-test_pearson_dot": 0.48818071088823645, + "eval_sts-test_pearson_euclidean": 0.7572653656278441, + "eval_sts-test_pearson_manhattan": 0.7608519923908275, + "eval_sts-test_pearson_max": 0.7762452815355265, + "eval_sts-test_spearman_cosine": 0.7839057066535283, + "eval_sts-test_spearman_dot": 0.5570503640965535, + "eval_sts-test_spearman_euclidean": 0.7527973687121541, + "eval_sts-test_spearman_manhattan": 0.755831239077737, + "eval_sts-test_spearman_max": 0.7839057066535283, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_vitaminc-pairs_loss": 0.7919407486915588, + "eval_vitaminc-pairs_runtime": 2.1778, + "eval_vitaminc-pairs_samples_per_second": 68.877, + "eval_vitaminc-pairs_steps_per_second": 2.296, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_qnli-contrastive_loss": 0.10323584824800491, + "eval_qnli-contrastive_runtime": 0.4911, + "eval_qnli-contrastive_samples_per_second": 305.443, + "eval_qnli-contrastive_steps_per_second": 10.181, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_scitail-pairs-qa_loss": 0.1031724140048027, + "eval_scitail-pairs-qa_runtime": 1.1871, + "eval_scitail-pairs-qa_samples_per_second": 126.362, + "eval_scitail-pairs-qa_steps_per_second": 4.212, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_scitail-pairs-pos_loss": 0.49804234504699707, + "eval_scitail-pairs-pos_runtime": 2.1491, + "eval_scitail-pairs-pos_samples_per_second": 69.797, + "eval_scitail-pairs-pos_steps_per_second": 2.327, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_xsum-pairs_loss": 0.5050535202026367, + "eval_xsum-pairs_runtime": 2.2665, + "eval_xsum-pairs_samples_per_second": 66.182, + "eval_xsum-pairs_steps_per_second": 2.206, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_compression-pairs_loss": 0.12162226438522339, + "eval_compression-pairs_runtime": 0.4514, + "eval_compression-pairs_samples_per_second": 332.334, + "eval_compression-pairs_steps_per_second": 11.078, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_sciq_pairs_loss": 0.5630886554718018, + "eval_sciq_pairs_runtime": 7.0948, + "eval_sciq_pairs_samples_per_second": 21.142, + "eval_sciq_pairs_steps_per_second": 0.705, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_qasc_pairs_loss": 4.82968282699585, + "eval_qasc_pairs_runtime": 2.0203, + "eval_qasc_pairs_samples_per_second": 74.248, + "eval_qasc_pairs_steps_per_second": 2.475, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_openbookqa_pairs_loss": 2.291588544845581, + "eval_openbookqa_pairs_runtime": 0.8653, + "eval_openbookqa_pairs_samples_per_second": 119.034, + "eval_openbookqa_pairs_steps_per_second": 4.623, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_msmarco_pairs_loss": 1.179781198501587, + "eval_msmarco_pairs_runtime": 2.7463, + "eval_msmarco_pairs_samples_per_second": 54.619, + "eval_msmarco_pairs_steps_per_second": 1.821, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_nq_pairs_loss": 0.9966514110565186, + "eval_nq_pairs_runtime": 5.0621, + "eval_nq_pairs_samples_per_second": 29.632, + "eval_nq_pairs_steps_per_second": 0.988, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_trivia_pairs_loss": 1.4555574655532837, + "eval_trivia_pairs_runtime": 9.5288, + "eval_trivia_pairs_samples_per_second": 15.742, + "eval_trivia_pairs_steps_per_second": 0.525, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_quora_pairs_loss": 0.2517216205596924, + "eval_quora_pairs_runtime": 1.154, + "eval_quora_pairs_samples_per_second": 129.984, + "eval_quora_pairs_steps_per_second": 4.333, + "step": 12495 + }, + { + "epoch": 2.127170582226762, + "eval_gooaq_pairs_loss": 0.8206157684326172, + "eval_gooaq_pairs_runtime": 2.0213, + "eval_gooaq_pairs_samples_per_second": 74.209, + "eval_gooaq_pairs_steps_per_second": 2.474, + "step": 12495 + }, + { + "epoch": 2.1521961184882534, + "grad_norm": 10.220344543457031, + "learning_rate": 1.2308792419776779e-05, + "loss": 1.1317, + "step": 12642 + }, + { + "epoch": 2.1772216547497445, + "grad_norm": 6.893187046051025, + "learning_rate": 1.13212262067496e-05, + "loss": 1.0682, + "step": 12789 + }, + { + "epoch": 2.202247191011236, + "grad_norm": 14.389963150024414, + "learning_rate": 1.0350359809359845e-05, + "loss": 1.2708, + "step": 12936 + }, + { + "epoch": 2.227272727272727, + "grad_norm": 0.3779028058052063, + "learning_rate": 9.400600481425268e-06, + "loss": 1.2129, + "step": 13083 + }, + { + "epoch": 2.2522982635342186, + "grad_norm": 3.433882713317871, + "learning_rate": 8.476259661095597e-06, + "loss": 1.053, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_nli-pairs_loss": 1.1418253183364868, + "eval_nli-pairs_runtime": 4.1287, + "eval_nli-pairs_samples_per_second": 36.331, + "eval_nli-pairs_steps_per_second": 1.211, + "eval_sts-test_pearson_cosine": 0.7786789365004515, + "eval_sts-test_pearson_dot": 0.4855185680416273, + "eval_sts-test_pearson_euclidean": 0.7514151357124674, + "eval_sts-test_pearson_manhattan": 0.7548721969767885, + "eval_sts-test_pearson_max": 0.7786789365004515, + "eval_sts-test_spearman_cosine": 0.7870432894305359, + "eval_sts-test_spearman_dot": 0.5630314308020745, + "eval_sts-test_spearman_euclidean": 0.7495100025349075, + "eval_sts-test_spearman_manhattan": 0.7525107811391334, + "eval_sts-test_spearman_max": 0.7870432894305359, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_vitaminc-pairs_loss": 0.7908185720443726, + "eval_vitaminc-pairs_runtime": 2.1735, + "eval_vitaminc-pairs_samples_per_second": 69.012, + "eval_vitaminc-pairs_steps_per_second": 2.3, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_qnli-contrastive_loss": 0.17070643603801727, + "eval_qnli-contrastive_runtime": 0.4906, + "eval_qnli-contrastive_samples_per_second": 305.758, + "eval_qnli-contrastive_steps_per_second": 10.192, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_scitail-pairs-qa_loss": 0.09536581486463547, + "eval_scitail-pairs-qa_runtime": 1.1537, + "eval_scitail-pairs-qa_samples_per_second": 130.014, + "eval_scitail-pairs-qa_steps_per_second": 4.334, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_scitail-pairs-pos_loss": 0.4803718328475952, + "eval_scitail-pairs-pos_runtime": 2.1338, + "eval_scitail-pairs-pos_samples_per_second": 70.297, + "eval_scitail-pairs-pos_steps_per_second": 2.343, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_xsum-pairs_loss": 0.4886069595813751, + "eval_xsum-pairs_runtime": 2.2577, + "eval_xsum-pairs_samples_per_second": 66.438, + "eval_xsum-pairs_steps_per_second": 2.215, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_compression-pairs_loss": 0.12639394402503967, + "eval_compression-pairs_runtime": 0.4532, + "eval_compression-pairs_samples_per_second": 330.97, + "eval_compression-pairs_steps_per_second": 11.032, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_sciq_pairs_loss": 0.5328854918479919, + "eval_sciq_pairs_runtime": 7.1317, + "eval_sciq_pairs_samples_per_second": 21.033, + "eval_sciq_pairs_steps_per_second": 0.701, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_qasc_pairs_loss": 4.704314231872559, + "eval_qasc_pairs_runtime": 2.0312, + "eval_qasc_pairs_samples_per_second": 73.848, + "eval_qasc_pairs_steps_per_second": 2.462, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_openbookqa_pairs_loss": 2.2545013427734375, + "eval_openbookqa_pairs_runtime": 0.8657, + "eval_openbookqa_pairs_samples_per_second": 118.98, + "eval_openbookqa_pairs_steps_per_second": 4.621, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_msmarco_pairs_loss": 1.1227293014526367, + "eval_msmarco_pairs_runtime": 2.7855, + "eval_msmarco_pairs_samples_per_second": 53.851, + "eval_msmarco_pairs_steps_per_second": 1.795, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_nq_pairs_loss": 0.9163884520530701, + "eval_nq_pairs_runtime": 5.0251, + "eval_nq_pairs_samples_per_second": 29.85, + "eval_nq_pairs_steps_per_second": 0.995, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_trivia_pairs_loss": 1.2854268550872803, + "eval_trivia_pairs_runtime": 9.6199, + "eval_trivia_pairs_samples_per_second": 15.593, + "eval_trivia_pairs_steps_per_second": 0.52, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_quora_pairs_loss": 0.24444325268268585, + "eval_quora_pairs_runtime": 1.1606, + "eval_quora_pairs_samples_per_second": 129.238, + "eval_quora_pairs_steps_per_second": 4.308, + "step": 13230 + }, + { + "epoch": 2.2522982635342186, + "eval_gooaq_pairs_loss": 0.8153015971183777, + "eval_gooaq_pairs_runtime": 2.0348, + "eval_gooaq_pairs_samples_per_second": 73.718, + "eval_gooaq_pairs_steps_per_second": 2.457, + "step": 13230 + }, + { + "epoch": 2.27732379979571, + "grad_norm": 1.6442259550094604, + "learning_rate": 7.58153339905326e-06, + "loss": 0.8897, + "step": 13377 + }, + { + "epoch": 2.302349336057201, + "grad_norm": 3.068699598312378, + "learning_rate": 6.720483310516198e-06, + "loss": 1.181, + "step": 13524 + }, + { + "epoch": 2.3273748723186927, + "grad_norm": 15.221121788024902, + "learning_rate": 5.897018137511326e-06, + "loss": 1.0895, + "step": 13671 + }, + { + "epoch": 2.3524004085801837, + "grad_norm": 8.72175121307373, + "learning_rate": 5.114876005116682e-06, + "loss": 1.0347, + "step": 13818 + }, + { + "epoch": 2.3774259448416752, + "grad_norm": 15.892960548400879, + "learning_rate": 4.377607452220317e-06, + "loss": 1.1473, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_nli-pairs_loss": 1.1473166942596436, + "eval_nli-pairs_runtime": 4.0846, + "eval_nli-pairs_samples_per_second": 36.723, + "eval_nli-pairs_steps_per_second": 1.224, + "eval_sts-test_pearson_cosine": 0.7794293138100197, + "eval_sts-test_pearson_dot": 0.47438029525552705, + "eval_sts-test_pearson_euclidean": 0.751105924306521, + "eval_sts-test_pearson_manhattan": 0.755281014746346, + "eval_sts-test_pearson_max": 0.7794293138100197, + "eval_sts-test_spearman_cosine": 0.7872791214894774, + "eval_sts-test_spearman_dot": 0.5580180518636964, + "eval_sts-test_spearman_euclidean": 0.7478338358714589, + "eval_sts-test_spearman_manhattan": 0.7517708620916009, + "eval_sts-test_spearman_max": 0.7872791214894774, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_vitaminc-pairs_loss": 0.7656364440917969, + "eval_vitaminc-pairs_runtime": 2.1781, + "eval_vitaminc-pairs_samples_per_second": 68.869, + "eval_vitaminc-pairs_steps_per_second": 2.296, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_qnli-contrastive_loss": 0.18101921677589417, + "eval_qnli-contrastive_runtime": 0.4943, + "eval_qnli-contrastive_samples_per_second": 303.474, + "eval_qnli-contrastive_steps_per_second": 10.116, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_scitail-pairs-qa_loss": 0.09049389511346817, + "eval_scitail-pairs-qa_runtime": 1.1619, + "eval_scitail-pairs-qa_samples_per_second": 129.104, + "eval_scitail-pairs-qa_steps_per_second": 4.303, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_scitail-pairs-pos_loss": 0.47021567821502686, + "eval_scitail-pairs-pos_runtime": 2.1593, + "eval_scitail-pairs-pos_samples_per_second": 69.466, + "eval_scitail-pairs-pos_steps_per_second": 2.316, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_xsum-pairs_loss": 0.4638828933238983, + "eval_xsum-pairs_runtime": 2.2613, + "eval_xsum-pairs_samples_per_second": 66.334, + "eval_xsum-pairs_steps_per_second": 2.211, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_compression-pairs_loss": 0.12560921907424927, + "eval_compression-pairs_runtime": 0.4496, + "eval_compression-pairs_samples_per_second": 333.638, + "eval_compression-pairs_steps_per_second": 11.121, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_sciq_pairs_loss": 0.5231578946113586, + "eval_sciq_pairs_runtime": 7.1367, + "eval_sciq_pairs_samples_per_second": 21.018, + "eval_sciq_pairs_steps_per_second": 0.701, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_qasc_pairs_loss": 4.6708855628967285, + "eval_qasc_pairs_runtime": 2.0351, + "eval_qasc_pairs_samples_per_second": 73.705, + "eval_qasc_pairs_steps_per_second": 2.457, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_openbookqa_pairs_loss": 2.246180772781372, + "eval_openbookqa_pairs_runtime": 0.8632, + "eval_openbookqa_pairs_samples_per_second": 119.324, + "eval_openbookqa_pairs_steps_per_second": 4.634, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_msmarco_pairs_loss": 1.114973545074463, + "eval_msmarco_pairs_runtime": 2.7619, + "eval_msmarco_pairs_samples_per_second": 54.309, + "eval_msmarco_pairs_steps_per_second": 1.81, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_nq_pairs_loss": 0.8807224631309509, + "eval_nq_pairs_runtime": 5.0622, + "eval_nq_pairs_samples_per_second": 29.632, + "eval_nq_pairs_steps_per_second": 0.988, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_trivia_pairs_loss": 1.2553032636642456, + "eval_trivia_pairs_runtime": 9.5755, + "eval_trivia_pairs_samples_per_second": 15.665, + "eval_trivia_pairs_steps_per_second": 0.522, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_quora_pairs_loss": 0.2363266944885254, + "eval_quora_pairs_runtime": 1.1671, + "eval_quora_pairs_samples_per_second": 128.525, + "eval_quora_pairs_steps_per_second": 4.284, + "step": 13965 + }, + { + "epoch": 2.3774259448416752, + "eval_gooaq_pairs_loss": 0.7755452990531921, + "eval_gooaq_pairs_runtime": 2.0356, + "eval_gooaq_pairs_samples_per_second": 73.69, + "eval_gooaq_pairs_steps_per_second": 2.456, + "step": 13965 + }, + { + "epoch": 2.4024514811031663, + "grad_norm": 0.3262540102005005, + "learning_rate": 3.688559313827753e-06, + "loss": 1.0026, + "step": 14112 + }, + { + "epoch": 2.427477017364658, + "grad_norm": 10.04266357421875, + "learning_rate": 3.050859528084451e-06, + "loss": 1.0728, + "step": 14259 + }, + { + "epoch": 2.4525025536261493, + "grad_norm": 0.9428766369819641, + "learning_rate": 2.46740293698192e-06, + "loss": 0.8232, + "step": 14406 + }, + { + "epoch": 2.4775280898876404, + "grad_norm": 0.5977104902267456, + "learning_rate": 1.9408381452051525e-06, + "loss": 1.0261, + "step": 14553 + } + ], + "logging_steps": 147, + "max_steps": 29370, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 2937, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +}