{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 740, "global_step": 2957, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.025025363544132567, "grad_norm": 24.194847106933594, "learning_rate": 8.048698004734527e-07, "loss": 12.9253, "step": 74 }, { "epoch": 0.050050727088265135, "grad_norm": 16.451122283935547, "learning_rate": 1.6807575245180925e-06, "loss": 9.5557, "step": 148 }, { "epoch": 0.0750760906323977, "grad_norm": 166.15907287597656, "learning_rate": 2.5566452485627323e-06, "loss": 9.2788, "step": 222 }, { "epoch": 0.10010145417653027, "grad_norm": 29.49967384338379, "learning_rate": 3.432532972607372e-06, "loss": 8.6849, "step": 296 }, { "epoch": 0.12512681772066284, "grad_norm": 8.201894760131836, "learning_rate": 4.308420696652012e-06, "loss": 7.7612, "step": 370 }, { "epoch": 0.1501521812647954, "grad_norm": 35.00122833251953, "learning_rate": 5.184308420696652e-06, "loss": 7.1482, "step": 444 }, { "epoch": 0.17517754480892797, "grad_norm": 15.764969825744629, "learning_rate": 6.0601961447412915e-06, "loss": 6.5551, "step": 518 }, { "epoch": 0.20020290835306054, "grad_norm": 33.325008392333984, "learning_rate": 6.936083868785931e-06, "loss": 6.2869, "step": 592 }, { "epoch": 0.2252282718971931, "grad_norm": 30.581892013549805, "learning_rate": 7.81197159283057e-06, "loss": 5.092, "step": 666 }, { "epoch": 0.2502536354413257, "grad_norm": 39.986324310302734, "learning_rate": 8.68785931687521e-06, "loss": 4.7866, "step": 740 }, { "epoch": 0.2502536354413257, "eval_Vitaminc-test_cosine_accuracy": 0.55, "eval_Vitaminc-test_cosine_accuracy_threshold": 0.9010212421417236, "eval_Vitaminc-test_cosine_ap": 0.5414572634918511, "eval_Vitaminc-test_cosine_f1": 0.654627539503386, "eval_Vitaminc-test_cosine_f1_threshold": 0.5345202088356018, "eval_Vitaminc-test_cosine_precision": 0.4865771812080537, "eval_Vitaminc-test_cosine_recall": 1.0, "eval_Vitaminc-test_dot_accuracy": 0.56, "eval_Vitaminc-test_dot_accuracy_threshold": 251.8475341796875, "eval_Vitaminc-test_dot_ap": 0.5368797297298245, "eval_Vitaminc-test_dot_f1": 0.654627539503386, "eval_Vitaminc-test_dot_f1_threshold": 151.05996704101562, "eval_Vitaminc-test_dot_precision": 0.4865771812080537, "eval_Vitaminc-test_dot_recall": 1.0, "eval_Vitaminc-test_euclidean_accuracy": 0.55, "eval_Vitaminc-test_euclidean_accuracy_threshold": 7.4731292724609375, "eval_Vitaminc-test_euclidean_ap": 0.5349241278127328, "eval_Vitaminc-test_euclidean_f1": 0.654627539503386, "eval_Vitaminc-test_euclidean_f1_threshold": 16.456798553466797, "eval_Vitaminc-test_euclidean_precision": 0.4865771812080537, "eval_Vitaminc-test_euclidean_recall": 1.0, "eval_Vitaminc-test_manhattan_accuracy": 0.55, "eval_Vitaminc-test_manhattan_accuracy_threshold": 153.8025360107422, "eval_Vitaminc-test_manhattan_ap": 0.5359223565961893, "eval_Vitaminc-test_manhattan_f1": 0.6560364464692483, "eval_Vitaminc-test_manhattan_f1_threshold": 310.77142333984375, "eval_Vitaminc-test_manhattan_precision": 0.4897959183673469, "eval_Vitaminc-test_manhattan_recall": 0.993103448275862, "eval_Vitaminc-test_max_accuracy": 0.56, "eval_Vitaminc-test_max_accuracy_threshold": 251.8475341796875, "eval_Vitaminc-test_max_ap": 0.5414572634918511, "eval_Vitaminc-test_max_f1": 0.6560364464692483, "eval_Vitaminc-test_max_f1_threshold": 310.77142333984375, "eval_Vitaminc-test_max_precision": 0.4897959183673469, "eval_Vitaminc-test_max_recall": 1.0, "eval_mrpc-test_cosine_accuracy": 0.7133333333333334, "eval_mrpc-test_cosine_accuracy_threshold": 0.8482479453086853, "eval_mrpc-test_cosine_ap": 0.8126327786702257, "eval_mrpc-test_cosine_f1": 0.8080357142857143, "eval_mrpc-test_cosine_f1_threshold": 0.8482479453086853, "eval_mrpc-test_cosine_precision": 0.7182539682539683, "eval_mrpc-test_cosine_recall": 0.923469387755102, "eval_mrpc-test_dot_accuracy": 0.6733333333333333, "eval_mrpc-test_dot_accuracy_threshold": 192.46002197265625, "eval_mrpc-test_dot_ap": 0.7048128672008315, "eval_mrpc-test_dot_f1": 0.7975460122699387, "eval_mrpc-test_dot_f1_threshold": 186.87075805664062, "eval_mrpc-test_dot_precision": 0.6655290102389079, "eval_mrpc-test_dot_recall": 0.9948979591836735, "eval_mrpc-test_euclidean_accuracy": 0.7133333333333334, "eval_mrpc-test_euclidean_accuracy_threshold": 9.194692611694336, "eval_mrpc-test_euclidean_ap": 0.8190781511691683, "eval_mrpc-test_euclidean_f1": 0.8080357142857143, "eval_mrpc-test_euclidean_f1_threshold": 9.194692611694336, "eval_mrpc-test_euclidean_precision": 0.7182539682539683, "eval_mrpc-test_euclidean_recall": 0.923469387755102, "eval_mrpc-test_manhattan_accuracy": 0.71, "eval_mrpc-test_manhattan_accuracy_threshold": 167.8056640625, "eval_mrpc-test_manhattan_ap": 0.8350377066418277, "eval_mrpc-test_manhattan_f1": 0.8034934497816593, "eval_mrpc-test_manhattan_f1_threshold": 187.3101806640625, "eval_mrpc-test_manhattan_precision": 0.7022900763358778, "eval_mrpc-test_manhattan_recall": 0.9387755102040817, "eval_mrpc-test_max_accuracy": 0.7133333333333334, "eval_mrpc-test_max_accuracy_threshold": 192.46002197265625, "eval_mrpc-test_max_ap": 0.8350377066418277, "eval_mrpc-test_max_f1": 0.8080357142857143, "eval_mrpc-test_max_f1_threshold": 187.3101806640625, "eval_mrpc-test_max_precision": 0.7182539682539683, "eval_mrpc-test_max_recall": 0.9948979591836735, "eval_negationNLI-test_cosine_accuracy": 1.0, "eval_negationNLI-test_dot_accuracy": 0.0, "eval_negationNLI-test_euclidean_accuracy": 1.0, "eval_negationNLI-test_manhattan_accuracy": 1.0, "eval_negationNLI-test_max_accuracy": 1.0, "eval_nli-pairs_loss": 7.208528518676758, "eval_nli-pairs_runtime": 2.8235, "eval_nli-pairs_samples_per_second": 354.167, "eval_nli-pairs_steps_per_second": 7.083, "eval_sequential_score": 0.5414572634918511, "eval_sts-test_pearson_cosine": 0.5924382437556898, "eval_sts-test_pearson_dot": 0.5712096252698496, "eval_sts-test_pearson_euclidean": 0.567118545585895, "eval_sts-test_pearson_manhattan": 0.5851061637430899, "eval_sts-test_pearson_max": 0.5924382437556898, "eval_sts-test_spearman_cosine": 0.6094124531020365, "eval_sts-test_spearman_dot": 0.5838193528505712, "eval_sts-test_spearman_euclidean": 0.5744646395850245, "eval_sts-test_spearman_manhattan": 0.5976596658090335, "eval_sts-test_spearman_max": 0.6094124531020365, "step": 740 }, { "epoch": 0.2502536354413257, "eval_vitaminc-pairs_loss": 6.389328479766846, "eval_vitaminc-pairs_runtime": 0.3763, "eval_vitaminc-pairs_samples_per_second": 265.723, "eval_vitaminc-pairs_steps_per_second": 5.314, "step": 740 }, { "epoch": 0.2502536354413257, "eval_negation-triplets_loss": 5.878116607666016, "eval_negation-triplets_runtime": 0.123, "eval_negation-triplets_samples_per_second": 544.706, "eval_negation-triplets_steps_per_second": 16.26, "step": 740 }, { "epoch": 0.2502536354413257, "eval_qnli-contrastive_loss": 4.718408584594727, "eval_qnli-contrastive_runtime": 0.1805, "eval_qnli-contrastive_samples_per_second": 553.913, "eval_qnli-contrastive_steps_per_second": 11.078, "step": 740 }, { "epoch": 0.2502536354413257, "eval_scitail-pairs-qa_loss": 0.7982025146484375, "eval_scitail-pairs-qa_runtime": 0.2817, "eval_scitail-pairs-qa_samples_per_second": 354.927, "eval_scitail-pairs-qa_steps_per_second": 7.099, "step": 740 }, { "epoch": 0.2502536354413257, "eval_scitail-pairs-pos_loss": 1.6438982486724854, "eval_scitail-pairs-pos_runtime": 0.556, "eval_scitail-pairs-pos_samples_per_second": 179.843, "eval_scitail-pairs-pos_steps_per_second": 3.597, "step": 740 }, { "epoch": 0.2502536354413257, "eval_xsum-pairs_loss": 2.896432399749756, "eval_xsum-pairs_runtime": 0.6662, "eval_xsum-pairs_samples_per_second": 150.113, "eval_xsum-pairs_steps_per_second": 3.002, "step": 740 }, { "epoch": 0.2502536354413257, "eval_compression-pairs_loss": 2.1321794986724854, "eval_compression-pairs_runtime": 0.0988, "eval_compression-pairs_samples_per_second": 1012.518, "eval_compression-pairs_steps_per_second": 20.25, "step": 740 }, { "epoch": 0.2502536354413257, "eval_sciq_pairs_loss": 2.1849400997161865, "eval_sciq_pairs_runtime": 2.003, "eval_sciq_pairs_samples_per_second": 49.926, "eval_sciq_pairs_steps_per_second": 0.999, "step": 740 }, { "epoch": 0.2502536354413257, "eval_qasc_pairs_loss": 3.1045215129852295, "eval_qasc_pairs_runtime": 0.2356, "eval_qasc_pairs_samples_per_second": 424.456, "eval_qasc_pairs_steps_per_second": 8.489, "step": 740 }, { "epoch": 0.2502536354413257, "eval_qasc_facts_sym_loss": 2.4955689907073975, "eval_qasc_facts_sym_runtime": 0.0956, "eval_qasc_facts_sym_samples_per_second": 1045.497, "eval_qasc_facts_sym_steps_per_second": 20.91, "step": 740 }, { "epoch": 0.2502536354413257, "eval_openbookqa_pairs_loss": 4.331984519958496, "eval_openbookqa_pairs_runtime": 0.2874, "eval_openbookqa_pairs_samples_per_second": 347.959, "eval_openbookqa_pairs_steps_per_second": 6.959, "step": 740 }, { "epoch": 0.2502536354413257, "eval_msmarco_pairs_loss": 6.1474928855896, "eval_msmarco_pairs_runtime": 0.5405, "eval_msmarco_pairs_samples_per_second": 185.006, "eval_msmarco_pairs_steps_per_second": 3.7, "step": 740 }, { "epoch": 0.2502536354413257, "eval_nq_pairs_loss": 6.490893363952637, "eval_nq_pairs_runtime": 1.5599, "eval_nq_pairs_samples_per_second": 64.106, "eval_nq_pairs_steps_per_second": 1.282, "step": 740 }, { "epoch": 0.2502536354413257, "eval_trivia_pairs_loss": 6.1789960861206055, "eval_trivia_pairs_runtime": 2.1019, "eval_trivia_pairs_samples_per_second": 47.576, "eval_trivia_pairs_steps_per_second": 0.952, "step": 740 }, { "epoch": 0.2502536354413257, "eval_quora_pairs_loss": 1.2507822513580322, "eval_quora_pairs_runtime": 7.7343, "eval_quora_pairs_samples_per_second": 217.213, "eval_quora_pairs_steps_per_second": 4.396, "step": 740 }, { "epoch": 0.2502536354413257, "eval_gooaq_pairs_loss": 4.71970272064209, "eval_gooaq_pairs_runtime": 0.4295, "eval_gooaq_pairs_samples_per_second": 232.84, "eval_gooaq_pairs_steps_per_second": 4.657, "step": 740 }, { "epoch": 0.2502536354413257, "eval_mrpc_pairs_loss": 1.2289129495620728, "eval_mrpc_pairs_runtime": 0.0981, "eval_mrpc_pairs_samples_per_second": 1019.237, "eval_mrpc_pairs_steps_per_second": 20.385, "step": 740 }, { "epoch": 0.2752789989854582, "grad_norm": 13.371368408203125, "learning_rate": 9.56374704091985e-06, "loss": 4.9352, "step": 814 }, { "epoch": 0.3003043625295908, "grad_norm": 11.514687538146973, "learning_rate": 1.0439634764964491e-05, "loss": 4.6466, "step": 888 }, { "epoch": 0.32532972607372335, "grad_norm": 7.828142166137695, "learning_rate": 1.131552248900913e-05, "loss": 4.5722, "step": 962 }, { "epoch": 0.35035508961785594, "grad_norm": 51.203739166259766, "learning_rate": 1.2191410213053771e-05, "loss": 4.3531, "step": 1036 }, { "epoch": 0.3753804531619885, "grad_norm": 22.266889572143555, "learning_rate": 1.3067297937098409e-05, "loss": 4.2219, "step": 1110 }, { "epoch": 0.4004058167061211, "grad_norm": 49.611026763916016, "learning_rate": 1.394318566114305e-05, "loss": 4.7228, "step": 1184 }, { "epoch": 0.4254311802502536, "grad_norm": 16.54360580444336, "learning_rate": 1.4819073385187689e-05, "loss": 4.1036, "step": 1258 }, { "epoch": 0.4504565437943862, "grad_norm": 11.04261302947998, "learning_rate": 1.569496110923233e-05, "loss": 3.0328, "step": 1332 }, { "epoch": 0.47548190733851875, "grad_norm": 12.153929710388184, "learning_rate": 1.6570848833276968e-05, "loss": 3.6321, "step": 1406 }, { "epoch": 0.5005072708826513, "grad_norm": 43.6102180480957, "learning_rate": 1.7446736557321606e-05, "loss": 3.6522, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_Vitaminc-test_cosine_accuracy": 0.56, "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7371894121170044, "eval_Vitaminc-test_cosine_ap": 0.5260625326082631, "eval_Vitaminc-test_cosine_f1": 0.6586538461538463, "eval_Vitaminc-test_cosine_f1_threshold": 0.543196976184845, "eval_Vitaminc-test_cosine_precision": 0.5055350553505535, "eval_Vitaminc-test_cosine_recall": 0.9448275862068966, "eval_Vitaminc-test_dot_accuracy": 0.5533333333333333, "eval_Vitaminc-test_dot_accuracy_threshold": 120.95848846435547, "eval_Vitaminc-test_dot_ap": 0.5280492506160784, "eval_Vitaminc-test_dot_f1": 0.6545454545454545, "eval_Vitaminc-test_dot_f1_threshold": 64.36978149414062, "eval_Vitaminc-test_dot_precision": 0.488135593220339, "eval_Vitaminc-test_dot_recall": 0.993103448275862, "eval_Vitaminc-test_euclidean_accuracy": 0.5533333333333333, "eval_Vitaminc-test_euclidean_accuracy_threshold": 9.099786758422852, "eval_Vitaminc-test_euclidean_ap": 0.5285735728609713, "eval_Vitaminc-test_euclidean_f1": 0.6531531531531533, "eval_Vitaminc-test_euclidean_f1_threshold": 16.646543502807617, "eval_Vitaminc-test_euclidean_precision": 0.48494983277591974, "eval_Vitaminc-test_euclidean_recall": 1.0, "eval_Vitaminc-test_manhattan_accuracy": 0.5566666666666666, "eval_Vitaminc-test_manhattan_accuracy_threshold": 183.7972412109375, "eval_Vitaminc-test_manhattan_ap": 0.5272973961720947, "eval_Vitaminc-test_manhattan_f1": 0.6540284360189572, "eval_Vitaminc-test_manhattan_f1_threshold": 248.92129516601562, "eval_Vitaminc-test_manhattan_precision": 0.4981949458483754, "eval_Vitaminc-test_manhattan_recall": 0.9517241379310345, "eval_Vitaminc-test_max_accuracy": 0.56, "eval_Vitaminc-test_max_accuracy_threshold": 183.7972412109375, "eval_Vitaminc-test_max_ap": 0.5285735728609713, "eval_Vitaminc-test_max_f1": 0.6586538461538463, "eval_Vitaminc-test_max_f1_threshold": 248.92129516601562, "eval_Vitaminc-test_max_precision": 0.5055350553505535, "eval_Vitaminc-test_max_recall": 1.0, "eval_mrpc-test_cosine_accuracy": 0.7166666666666667, "eval_mrpc-test_cosine_accuracy_threshold": 0.7689210176467896, "eval_mrpc-test_cosine_ap": 0.8209283332512223, "eval_mrpc-test_cosine_f1": 0.8067226890756303, "eval_mrpc-test_cosine_f1_threshold": 0.6529129147529602, "eval_mrpc-test_cosine_precision": 0.6857142857142857, "eval_mrpc-test_cosine_recall": 0.9795918367346939, "eval_mrpc-test_dot_accuracy": 0.6766666666666666, "eval_mrpc-test_dot_accuracy_threshold": 77.22791290283203, "eval_mrpc-test_dot_ap": 0.6966742608454493, "eval_mrpc-test_dot_f1": 0.7957894736842105, "eval_mrpc-test_dot_f1_threshold": 77.22791290283203, "eval_mrpc-test_dot_precision": 0.6774193548387096, "eval_mrpc-test_dot_recall": 0.9642857142857143, "eval_mrpc-test_euclidean_accuracy": 0.7, "eval_mrpc-test_euclidean_accuracy_threshold": 7.456927299499512, "eval_mrpc-test_euclidean_ap": 0.830942263798408, "eval_mrpc-test_euclidean_f1": 0.8067940552016986, "eval_mrpc-test_euclidean_f1_threshold": 9.238859176635742, "eval_mrpc-test_euclidean_precision": 0.6909090909090909, "eval_mrpc-test_euclidean_recall": 0.9693877551020408, "eval_mrpc-test_manhattan_accuracy": 0.7033333333333334, "eval_mrpc-test_manhattan_accuracy_threshold": 152.87489318847656, "eval_mrpc-test_manhattan_ap": 0.8386748577280486, "eval_mrpc-test_manhattan_f1": 0.8077753779697625, "eval_mrpc-test_manhattan_f1_threshold": 174.57843017578125, "eval_mrpc-test_manhattan_precision": 0.700374531835206, "eval_mrpc-test_manhattan_recall": 0.9540816326530612, "eval_mrpc-test_max_accuracy": 0.7166666666666667, "eval_mrpc-test_max_accuracy_threshold": 152.87489318847656, "eval_mrpc-test_max_ap": 0.8386748577280486, "eval_mrpc-test_max_f1": 0.8077753779697625, "eval_mrpc-test_max_f1_threshold": 174.57843017578125, "eval_mrpc-test_max_precision": 0.700374531835206, "eval_mrpc-test_max_recall": 0.9795918367346939, "eval_negationNLI-test_cosine_accuracy": 1.0, "eval_negationNLI-test_dot_accuracy": 0.0, "eval_negationNLI-test_euclidean_accuracy": 1.0, "eval_negationNLI-test_manhattan_accuracy": 1.0, "eval_negationNLI-test_max_accuracy": 1.0, "eval_nli-pairs_loss": 4.24396276473999, "eval_nli-pairs_runtime": 2.8135, "eval_nli-pairs_samples_per_second": 355.432, "eval_nli-pairs_steps_per_second": 7.109, "eval_sequential_score": 0.5285735728609713, "eval_sts-test_pearson_cosine": 0.7766910180979095, "eval_sts-test_pearson_dot": 0.7319420450752285, "eval_sts-test_pearson_euclidean": 0.7642908181972558, "eval_sts-test_pearson_manhattan": 0.7791004599919719, "eval_sts-test_pearson_max": 0.7791004599919719, "eval_sts-test_spearman_cosine": 0.7711480866239301, "eval_sts-test_spearman_dot": 0.7296255905576166, "eval_sts-test_spearman_euclidean": 0.7593187745196615, "eval_sts-test_spearman_manhattan": 0.768980476853658, "eval_sts-test_spearman_max": 0.7711480866239301, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_vitaminc-pairs_loss": 6.655053615570068, "eval_vitaminc-pairs_runtime": 0.3746, "eval_vitaminc-pairs_samples_per_second": 266.927, "eval_vitaminc-pairs_steps_per_second": 5.339, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_negation-triplets_loss": 4.761081695556641, "eval_negation-triplets_runtime": 0.1309, "eval_negation-triplets_samples_per_second": 511.863, "eval_negation-triplets_steps_per_second": 15.279, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_qnli-contrastive_loss": 3.232550859451294, "eval_qnli-contrastive_runtime": 0.177, "eval_qnli-contrastive_samples_per_second": 564.83, "eval_qnli-contrastive_steps_per_second": 11.297, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_scitail-pairs-qa_loss": 0.2729453146457672, "eval_scitail-pairs-qa_runtime": 0.2728, "eval_scitail-pairs-qa_samples_per_second": 366.588, "eval_scitail-pairs-qa_steps_per_second": 7.332, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_scitail-pairs-pos_loss": 0.8931738138198853, "eval_scitail-pairs-pos_runtime": 0.5321, "eval_scitail-pairs-pos_samples_per_second": 187.936, "eval_scitail-pairs-pos_steps_per_second": 3.759, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_xsum-pairs_loss": 1.9395147562026978, "eval_xsum-pairs_runtime": 0.6642, "eval_xsum-pairs_samples_per_second": 150.564, "eval_xsum-pairs_steps_per_second": 3.011, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_compression-pairs_loss": 1.3951506614685059, "eval_compression-pairs_runtime": 0.0986, "eval_compression-pairs_samples_per_second": 1013.751, "eval_compression-pairs_steps_per_second": 20.275, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_sciq_pairs_loss": 1.6004968881607056, "eval_sciq_pairs_runtime": 1.9806, "eval_sciq_pairs_samples_per_second": 50.489, "eval_sciq_pairs_steps_per_second": 1.01, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_qasc_pairs_loss": 1.3576843738555908, "eval_qasc_pairs_runtime": 0.2305, "eval_qasc_pairs_samples_per_second": 433.83, "eval_qasc_pairs_steps_per_second": 8.677, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_qasc_facts_sym_loss": 1.5506917238235474, "eval_qasc_facts_sym_runtime": 0.0975, "eval_qasc_facts_sym_samples_per_second": 1025.259, "eval_qasc_facts_sym_steps_per_second": 20.505, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_openbookqa_pairs_loss": 2.7663590908050537, "eval_openbookqa_pairs_runtime": 0.2815, "eval_openbookqa_pairs_samples_per_second": 355.224, "eval_openbookqa_pairs_steps_per_second": 7.104, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_msmarco_pairs_loss": 3.48696231842041, "eval_msmarco_pairs_runtime": 0.5347, "eval_msmarco_pairs_samples_per_second": 187.005, "eval_msmarco_pairs_steps_per_second": 3.74, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_nq_pairs_loss": 4.686245441436768, "eval_nq_pairs_runtime": 1.5466, "eval_nq_pairs_samples_per_second": 64.657, "eval_nq_pairs_steps_per_second": 1.293, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_trivia_pairs_loss": 4.968179225921631, "eval_trivia_pairs_runtime": 2.0808, "eval_trivia_pairs_samples_per_second": 48.058, "eval_trivia_pairs_steps_per_second": 0.961, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_quora_pairs_loss": 0.9853857159614563, "eval_quora_pairs_runtime": 7.7167, "eval_quora_pairs_samples_per_second": 217.708, "eval_quora_pairs_steps_per_second": 4.406, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_gooaq_pairs_loss": 2.873445987701416, "eval_gooaq_pairs_runtime": 0.4311, "eval_gooaq_pairs_samples_per_second": 231.973, "eval_gooaq_pairs_steps_per_second": 4.639, "step": 1480 }, { "epoch": 0.5005072708826513, "eval_mrpc_pairs_loss": 0.6949604749679565, "eval_mrpc_pairs_runtime": 0.0976, "eval_mrpc_pairs_samples_per_second": 1024.63, "eval_mrpc_pairs_steps_per_second": 20.493, "step": 1480 }, { "epoch": 0.5255326344267839, "grad_norm": 26.758914947509766, "learning_rate": 1.8322624281366248e-05, "loss": 3.6262, "step": 1554 }, { "epoch": 0.5505579979709164, "grad_norm": 31.105188369750977, "learning_rate": 1.919851200541089e-05, "loss": 3.314, "step": 1628 }, { "epoch": 0.5755833615150491, "grad_norm": 22.02496910095215, "learning_rate": 2.0074399729455527e-05, "loss": 3.0849, "step": 1702 }, { "epoch": 0.6006087250591816, "grad_norm": 13.833592414855957, "learning_rate": 2.0950287453500165e-05, "loss": 2.9041, "step": 1776 }, { "epoch": 0.6256340886033142, "grad_norm": 33.3592643737793, "learning_rate": 2.1826175177544804e-05, "loss": 3.1351, "step": 1850 }, { "epoch": 0.6506594521474467, "grad_norm": 37.34846496582031, "learning_rate": 2.270206290158945e-05, "loss": 3.1039, "step": 1924 }, { "epoch": 0.6756848156915793, "grad_norm": 36.27501678466797, "learning_rate": 2.3577950625634087e-05, "loss": 3.1698, "step": 1998 }, { "epoch": 0.7007101792357119, "grad_norm": 17.501544952392578, "learning_rate": 2.4453838349678725e-05, "loss": 2.5172, "step": 2072 }, { "epoch": 0.7257355427798444, "grad_norm": 6.748822212219238, "learning_rate": 2.5329726073723363e-05, "loss": 2.4682, "step": 2146 }, { "epoch": 0.750760906323977, "grad_norm": 7.4816694259643555, "learning_rate": 2.6205613797768008e-05, "loss": 2.6695, "step": 2220 }, { "epoch": 0.750760906323977, "eval_Vitaminc-test_cosine_accuracy": 0.5566666666666666, "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7603035569190979, "eval_Vitaminc-test_cosine_ap": 0.5291811141478275, "eval_Vitaminc-test_cosine_f1": 0.6635071090047393, "eval_Vitaminc-test_cosine_f1_threshold": 0.5236827731132507, "eval_Vitaminc-test_cosine_precision": 0.5054151624548736, "eval_Vitaminc-test_cosine_recall": 0.9655172413793104, "eval_Vitaminc-test_dot_accuracy": 0.5633333333333334, "eval_Vitaminc-test_dot_accuracy_threshold": 92.43148803710938, "eval_Vitaminc-test_dot_ap": 0.5319629878299388, "eval_Vitaminc-test_dot_f1": 0.6575342465753424, "eval_Vitaminc-test_dot_f1_threshold": 56.82046127319336, "eval_Vitaminc-test_dot_precision": 0.49146757679180886, "eval_Vitaminc-test_dot_recall": 0.993103448275862, "eval_Vitaminc-test_euclidean_accuracy": 0.5466666666666666, "eval_Vitaminc-test_euclidean_accuracy_threshold": 8.31692123413086, "eval_Vitaminc-test_euclidean_ap": 0.522357181501639, "eval_Vitaminc-test_euclidean_f1": 0.662037037037037, "eval_Vitaminc-test_euclidean_f1_threshold": 12.49872875213623, "eval_Vitaminc-test_euclidean_precision": 0.49825783972125437, "eval_Vitaminc-test_euclidean_recall": 0.9862068965517241, "eval_Vitaminc-test_manhattan_accuracy": 0.5566666666666666, "eval_Vitaminc-test_manhattan_accuracy_threshold": 171.0796661376953, "eval_Vitaminc-test_manhattan_ap": 0.5296616128404239, "eval_Vitaminc-test_manhattan_f1": 0.6605922551252847, "eval_Vitaminc-test_manhattan_f1_threshold": 271.766357421875, "eval_Vitaminc-test_manhattan_precision": 0.4931972789115646, "eval_Vitaminc-test_manhattan_recall": 1.0, "eval_Vitaminc-test_max_accuracy": 0.5633333333333334, "eval_Vitaminc-test_max_accuracy_threshold": 171.0796661376953, "eval_Vitaminc-test_max_ap": 0.5319629878299388, "eval_Vitaminc-test_max_f1": 0.6635071090047393, "eval_Vitaminc-test_max_f1_threshold": 271.766357421875, "eval_Vitaminc-test_max_precision": 0.5054151624548736, "eval_Vitaminc-test_max_recall": 1.0, "eval_mrpc-test_cosine_accuracy": 0.7333333333333333, "eval_mrpc-test_cosine_accuracy_threshold": 0.7723015546798706, "eval_mrpc-test_cosine_ap": 0.847047462638431, "eval_mrpc-test_cosine_f1": 0.8137931034482758, "eval_mrpc-test_cosine_f1_threshold": 0.7615677118301392, "eval_mrpc-test_cosine_precision": 0.7405857740585774, "eval_mrpc-test_cosine_recall": 0.9030612244897959, "eval_mrpc-test_dot_accuracy": 0.6766666666666666, "eval_mrpc-test_dot_accuracy_threshold": 71.28694915771484, "eval_mrpc-test_dot_ap": 0.7597503410099737, "eval_mrpc-test_dot_f1": 0.7967479674796748, "eval_mrpc-test_dot_f1_threshold": 60.0638427734375, "eval_mrpc-test_dot_precision": 0.6621621621621622, "eval_mrpc-test_dot_recall": 1.0, "eval_mrpc-test_euclidean_accuracy": 0.72, "eval_mrpc-test_euclidean_accuracy_threshold": 6.903799057006836, "eval_mrpc-test_euclidean_ap": 0.832934238772353, "eval_mrpc-test_euclidean_f1": 0.8025751072961373, "eval_mrpc-test_euclidean_f1_threshold": 8.285726547241211, "eval_mrpc-test_euclidean_precision": 0.6925925925925925, "eval_mrpc-test_euclidean_recall": 0.9540816326530612, "eval_mrpc-test_manhattan_accuracy": 0.7166666666666667, "eval_mrpc-test_manhattan_accuracy_threshold": 144.24057006835938, "eval_mrpc-test_manhattan_ap": 0.8379421798530551, "eval_mrpc-test_manhattan_f1": 0.8079470198675496, "eval_mrpc-test_manhattan_f1_threshold": 158.62255859375, "eval_mrpc-test_manhattan_precision": 0.7120622568093385, "eval_mrpc-test_manhattan_recall": 0.9336734693877551, "eval_mrpc-test_max_accuracy": 0.7333333333333333, "eval_mrpc-test_max_accuracy_threshold": 144.24057006835938, "eval_mrpc-test_max_ap": 0.847047462638431, "eval_mrpc-test_max_f1": 0.8137931034482758, "eval_mrpc-test_max_f1_threshold": 158.62255859375, "eval_mrpc-test_max_precision": 0.7405857740585774, "eval_mrpc-test_max_recall": 1.0, "eval_negationNLI-test_cosine_accuracy": 1.0, "eval_negationNLI-test_dot_accuracy": 0.0, "eval_negationNLI-test_euclidean_accuracy": 1.0, "eval_negationNLI-test_manhattan_accuracy": 1.0, "eval_negationNLI-test_max_accuracy": 1.0, "eval_nli-pairs_loss": 3.2204086780548096, "eval_nli-pairs_runtime": 2.7643, "eval_nli-pairs_samples_per_second": 361.751, "eval_nli-pairs_steps_per_second": 7.235, "eval_sequential_score": 0.5319629878299388, "eval_sts-test_pearson_cosine": 0.816293607681843, "eval_sts-test_pearson_dot": 0.7700870243703964, "eval_sts-test_pearson_euclidean": 0.8022637024623361, "eval_sts-test_pearson_manhattan": 0.8129923580109858, "eval_sts-test_pearson_max": 0.816293607681843, "eval_sts-test_spearman_cosine": 0.8161010743022479, "eval_sts-test_spearman_dot": 0.7831264441454899, "eval_sts-test_spearman_euclidean": 0.8001752377809467, "eval_sts-test_spearman_manhattan": 0.8099359329667263, "eval_sts-test_spearman_max": 0.8161010743022479, "step": 2220 }, { "epoch": 0.750760906323977, "eval_vitaminc-pairs_loss": 5.918190002441406, "eval_vitaminc-pairs_runtime": 0.3701, "eval_vitaminc-pairs_samples_per_second": 270.161, "eval_vitaminc-pairs_steps_per_second": 5.403, "step": 2220 }, { "epoch": 0.750760906323977, "eval_negation-triplets_loss": 4.330984115600586, "eval_negation-triplets_runtime": 0.1178, "eval_negation-triplets_samples_per_second": 568.728, "eval_negation-triplets_steps_per_second": 16.977, "step": 2220 }, { "epoch": 0.750760906323977, "eval_qnli-contrastive_loss": 2.517563581466675, "eval_qnli-contrastive_runtime": 0.1727, "eval_qnli-contrastive_samples_per_second": 579.006, "eval_qnli-contrastive_steps_per_second": 11.58, "step": 2220 }, { "epoch": 0.750760906323977, "eval_scitail-pairs-qa_loss": 0.16213805973529816, "eval_scitail-pairs-qa_runtime": 0.2729, "eval_scitail-pairs-qa_samples_per_second": 366.427, "eval_scitail-pairs-qa_steps_per_second": 7.329, "step": 2220 }, { "epoch": 0.750760906323977, "eval_scitail-pairs-pos_loss": 0.8153313994407654, "eval_scitail-pairs-pos_runtime": 0.5332, "eval_scitail-pairs-pos_samples_per_second": 187.536, "eval_scitail-pairs-pos_steps_per_second": 3.751, "step": 2220 }, { "epoch": 0.750760906323977, "eval_xsum-pairs_loss": 1.5002162456512451, "eval_xsum-pairs_runtime": 0.6643, "eval_xsum-pairs_samples_per_second": 150.54, "eval_xsum-pairs_steps_per_second": 3.011, "step": 2220 }, { "epoch": 0.750760906323977, "eval_compression-pairs_loss": 1.024855613708496, "eval_compression-pairs_runtime": 0.099, "eval_compression-pairs_samples_per_second": 1009.822, "eval_compression-pairs_steps_per_second": 20.196, "step": 2220 }, { "epoch": 0.750760906323977, "eval_sciq_pairs_loss": 1.4846413135528564, "eval_sciq_pairs_runtime": 1.9965, "eval_sciq_pairs_samples_per_second": 50.088, "eval_sciq_pairs_steps_per_second": 1.002, "step": 2220 }, { "epoch": 0.750760906323977, "eval_qasc_pairs_loss": 1.003045916557312, "eval_qasc_pairs_runtime": 0.233, "eval_qasc_pairs_samples_per_second": 429.119, "eval_qasc_pairs_steps_per_second": 8.582, "step": 2220 }, { "epoch": 0.750760906323977, "eval_qasc_facts_sym_loss": 0.9863900542259216, "eval_qasc_facts_sym_runtime": 0.0963, "eval_qasc_facts_sym_samples_per_second": 1038.618, "eval_qasc_facts_sym_steps_per_second": 20.772, "step": 2220 }, { "epoch": 0.750760906323977, "eval_openbookqa_pairs_loss": 2.242171049118042, "eval_openbookqa_pairs_runtime": 0.2893, "eval_openbookqa_pairs_samples_per_second": 345.65, "eval_openbookqa_pairs_steps_per_second": 6.913, "step": 2220 }, { "epoch": 0.750760906323977, "eval_msmarco_pairs_loss": 3.1295664310455322, "eval_msmarco_pairs_runtime": 0.5364, "eval_msmarco_pairs_samples_per_second": 186.435, "eval_msmarco_pairs_steps_per_second": 3.729, "step": 2220 }, { "epoch": 0.750760906323977, "eval_nq_pairs_loss": 4.279793739318848, "eval_nq_pairs_runtime": 1.5629, "eval_nq_pairs_samples_per_second": 63.982, "eval_nq_pairs_steps_per_second": 1.28, "step": 2220 }, { "epoch": 0.750760906323977, "eval_trivia_pairs_loss": 4.566234588623047, "eval_trivia_pairs_runtime": 2.08, "eval_trivia_pairs_samples_per_second": 48.076, "eval_trivia_pairs_steps_per_second": 0.962, "step": 2220 }, { "epoch": 0.750760906323977, "eval_quora_pairs_loss": 0.7613513469696045, "eval_quora_pairs_runtime": 7.6052, "eval_quora_pairs_samples_per_second": 220.903, "eval_quora_pairs_steps_per_second": 4.471, "step": 2220 }, { "epoch": 0.750760906323977, "eval_gooaq_pairs_loss": 2.5801427364349365, "eval_gooaq_pairs_runtime": 0.4287, "eval_gooaq_pairs_samples_per_second": 233.285, "eval_gooaq_pairs_steps_per_second": 4.666, "step": 2220 }, { "epoch": 0.750760906323977, "eval_mrpc_pairs_loss": 0.4525637924671173, "eval_mrpc_pairs_runtime": 0.1029, "eval_mrpc_pairs_samples_per_second": 971.921, "eval_mrpc_pairs_steps_per_second": 19.438, "step": 2220 }, { "epoch": 0.7757862698681096, "grad_norm": 6.072179794311523, "learning_rate": 2.7081501521812646e-05, "loss": 2.8803, "step": 2294 }, { "epoch": 0.8008116334122422, "grad_norm": 26.244081497192383, "learning_rate": 2.7957389245857284e-05, "loss": 3.1122, "step": 2368 }, { "epoch": 0.8258369969563747, "grad_norm": 20.519241333007812, "learning_rate": 2.8833276969901922e-05, "loss": 2.7361, "step": 2442 }, { "epoch": 0.8508623605005072, "grad_norm": 22.986614227294922, "learning_rate": 2.9709164693946567e-05, "loss": 2.4439, "step": 2516 }, { "epoch": 0.8758877240446399, "grad_norm": 32.69511032104492, "learning_rate": 3.0585052417991205e-05, "loss": 2.5529, "step": 2590 }, { "epoch": 0.9009130875887724, "grad_norm": 25.88425636291504, "learning_rate": 3.146094014203584e-05, "loss": 2.4578, "step": 2664 }, { "epoch": 0.925938451132905, "grad_norm": 13.460915565490723, "learning_rate": 3.233682786608048e-05, "loss": 2.6346, "step": 2738 }, { "epoch": 0.9509638146770375, "grad_norm": 14.984380722045898, "learning_rate": 3.3212715590125126e-05, "loss": 2.4693, "step": 2812 }, { "epoch": 0.9759891782211702, "grad_norm": 8.878656387329102, "learning_rate": 3.4088603314169764e-05, "loss": 2.4323, "step": 2886 } ], "logging_steps": 74, "max_steps": 29570, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 2957, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 50, "trial_name": null, "trial_params": null }