diff --git "a/checkpoint-623/trainer_state.json" "b/checkpoint-623/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-623/trainer_state.json" @@ -0,0 +1,6344 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.4, + "eval_steps": 45, + "global_step": 623, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0022471910112359553, + "grad_norm": 3.9492883682250977, + "learning_rate": 1.9662921348314604e-07, + "loss": 0.8103, + "step": 1 + }, + { + "epoch": 0.0044943820224719105, + "grad_norm": 4.117438793182373, + "learning_rate": 3.932584269662921e-07, + "loss": 0.8803, + "step": 2 + }, + { + "epoch": 0.006741573033707865, + "grad_norm": 3.809002161026001, + "learning_rate": 5.898876404494381e-07, + "loss": 0.8219, + "step": 3 + }, + { + "epoch": 0.008988764044943821, + "grad_norm": 0.7417504787445068, + "learning_rate": 7.865168539325842e-07, + "loss": 0.0574, + "step": 4 + }, + { + "epoch": 0.011235955056179775, + "grad_norm": 2.707460403442383, + "learning_rate": 9.831460674157302e-07, + "loss": 0.3044, + "step": 5 + }, + { + "epoch": 0.01348314606741573, + "grad_norm": 3.082705497741699, + "learning_rate": 1.1797752808988763e-06, + "loss": 0.3306, + "step": 6 + }, + { + "epoch": 0.015730337078651686, + "grad_norm": 3.102416753768921, + "learning_rate": 1.3764044943820223e-06, + "loss": 0.759, + "step": 7 + }, + { + "epoch": 0.017977528089887642, + "grad_norm": 0.6271047592163086, + "learning_rate": 1.5730337078651683e-06, + "loss": 0.0472, + "step": 8 + }, + { + "epoch": 0.020224719101123594, + "grad_norm": 3.1362593173980713, + "learning_rate": 1.7696629213483144e-06, + "loss": 0.7782, + "step": 9 + }, + { + "epoch": 0.02247191011235955, + "grad_norm": 1.124997615814209, + "learning_rate": 1.9662921348314604e-06, + "loss": 0.0757, + "step": 10 + }, + { + "epoch": 0.024719101123595506, + "grad_norm": 3.194413185119629, + "learning_rate": 2.1629213483146067e-06, + "loss": 0.7778, + "step": 11 + }, + { + "epoch": 0.02696629213483146, + "grad_norm": 3.966202974319458, + "learning_rate": 2.3595505617977525e-06, + "loss": 0.7111, + "step": 12 + }, + { + "epoch": 0.029213483146067417, + "grad_norm": 3.63393235206604, + "learning_rate": 2.5561797752808988e-06, + "loss": 0.6598, + "step": 13 + }, + { + "epoch": 0.03146067415730337, + "grad_norm": 4.087065696716309, + "learning_rate": 2.7528089887640446e-06, + "loss": 0.8901, + "step": 14 + }, + { + "epoch": 0.033707865168539325, + "grad_norm": 2.769573211669922, + "learning_rate": 2.949438202247191e-06, + "loss": 0.3206, + "step": 15 + }, + { + "epoch": 0.035955056179775284, + "grad_norm": 2.630620002746582, + "learning_rate": 3.1460674157303367e-06, + "loss": 0.3408, + "step": 16 + }, + { + "epoch": 0.038202247191011236, + "grad_norm": 2.9570937156677246, + "learning_rate": 3.342696629213483e-06, + "loss": 0.5623, + "step": 17 + }, + { + "epoch": 0.04044943820224719, + "grad_norm": 1.0999970436096191, + "learning_rate": 3.5393258426966288e-06, + "loss": 0.0758, + "step": 18 + }, + { + "epoch": 0.04269662921348315, + "grad_norm": 5.516472816467285, + "learning_rate": 3.735955056179775e-06, + "loss": 0.994, + "step": 19 + }, + { + "epoch": 0.0449438202247191, + "grad_norm": 6.245299816131592, + "learning_rate": 3.932584269662921e-06, + "loss": 2.4196, + "step": 20 + }, + { + "epoch": 0.04719101123595506, + "grad_norm": 0.546605110168457, + "learning_rate": 4.129213483146067e-06, + "loss": 0.0561, + "step": 21 + }, + { + "epoch": 0.04943820224719101, + "grad_norm": 0.7049635648727417, + "learning_rate": 4.325842696629213e-06, + "loss": 0.0827, + "step": 22 + }, + { + "epoch": 0.051685393258426963, + "grad_norm": 3.1022439002990723, + "learning_rate": 4.522471910112359e-06, + "loss": 0.7405, + "step": 23 + }, + { + "epoch": 0.05393258426966292, + "grad_norm": 4.534759044647217, + "learning_rate": 4.719101123595505e-06, + "loss": 0.9656, + "step": 24 + }, + { + "epoch": 0.056179775280898875, + "grad_norm": 3.0486032962799072, + "learning_rate": 4.915730337078652e-06, + "loss": 0.7855, + "step": 25 + }, + { + "epoch": 0.058426966292134834, + "grad_norm": 3.7457478046417236, + "learning_rate": 5.1123595505617975e-06, + "loss": 0.6349, + "step": 26 + }, + { + "epoch": 0.060674157303370786, + "grad_norm": 3.2051479816436768, + "learning_rate": 5.308988764044943e-06, + "loss": 0.8087, + "step": 27 + }, + { + "epoch": 0.06292134831460675, + "grad_norm": 4.389094829559326, + "learning_rate": 5.505617977528089e-06, + "loss": 0.9282, + "step": 28 + }, + { + "epoch": 0.0651685393258427, + "grad_norm": 2.920410394668579, + "learning_rate": 5.702247191011236e-06, + "loss": 0.3377, + "step": 29 + }, + { + "epoch": 0.06741573033707865, + "grad_norm": 2.7193148136138916, + "learning_rate": 5.898876404494382e-06, + "loss": 0.3289, + "step": 30 + }, + { + "epoch": 0.0696629213483146, + "grad_norm": 4.0008225440979, + "learning_rate": 6.0955056179775275e-06, + "loss": 0.6314, + "step": 31 + }, + { + "epoch": 0.07191011235955057, + "grad_norm": 0.5842159390449524, + "learning_rate": 6.292134831460673e-06, + "loss": 0.0611, + "step": 32 + }, + { + "epoch": 0.07415730337078652, + "grad_norm": 3.1256043910980225, + "learning_rate": 6.48876404494382e-06, + "loss": 0.8942, + "step": 33 + }, + { + "epoch": 0.07640449438202247, + "grad_norm": 0.9526051878929138, + "learning_rate": 6.685393258426966e-06, + "loss": 0.0701, + "step": 34 + }, + { + "epoch": 0.07865168539325842, + "grad_norm": 4.061926364898682, + "learning_rate": 6.882022471910112e-06, + "loss": 0.8506, + "step": 35 + }, + { + "epoch": 0.08089887640449438, + "grad_norm": 2.8898491859436035, + "learning_rate": 7.0786516853932575e-06, + "loss": 0.3386, + "step": 36 + }, + { + "epoch": 0.08314606741573034, + "grad_norm": 0.9806709289550781, + "learning_rate": 7.275280898876404e-06, + "loss": 0.0701, + "step": 37 + }, + { + "epoch": 0.0853932584269663, + "grad_norm": 3.8004391193389893, + "learning_rate": 7.47191011235955e-06, + "loss": 0.8042, + "step": 38 + }, + { + "epoch": 0.08764044943820225, + "grad_norm": 4.089083194732666, + "learning_rate": 7.668539325842697e-06, + "loss": 0.8744, + "step": 39 + }, + { + "epoch": 0.0898876404494382, + "grad_norm": 3.419440984725952, + "learning_rate": 7.865168539325842e-06, + "loss": 0.8644, + "step": 40 + }, + { + "epoch": 0.09213483146067415, + "grad_norm": 4.094921588897705, + "learning_rate": 8.061797752808988e-06, + "loss": 0.8647, + "step": 41 + }, + { + "epoch": 0.09438202247191012, + "grad_norm": 3.9199764728546143, + "learning_rate": 8.258426966292133e-06, + "loss": 0.7916, + "step": 42 + }, + { + "epoch": 0.09662921348314607, + "grad_norm": 4.082360744476318, + "learning_rate": 8.45505617977528e-06, + "loss": 0.8599, + "step": 43 + }, + { + "epoch": 0.09887640449438202, + "grad_norm": 0.6443855166435242, + "learning_rate": 8.651685393258427e-06, + "loss": 0.0523, + "step": 44 + }, + { + "epoch": 0.10112359550561797, + "grad_norm": 4.051048278808594, + "learning_rate": 8.848314606741572e-06, + "loss": 0.6968, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_VitaminC_cosine_accuracy": 0.55859375, + "eval_VitaminC_cosine_accuracy_threshold": 0.8300318121910095, + "eval_VitaminC_cosine_ap": 0.5514483751609435, + "eval_VitaminC_cosine_f1": 0.6657718120805369, + "eval_VitaminC_cosine_f1_threshold": 0.37456807494163513, + "eval_VitaminC_cosine_precision": 0.5020242914979757, + "eval_VitaminC_cosine_recall": 0.9880478087649402, + "eval_VitaminC_dot_accuracy": 0.5546875, + "eval_VitaminC_dot_accuracy_threshold": 311.380615234375, + "eval_VitaminC_dot_ap": 0.5333497363350208, + "eval_VitaminC_dot_f1": 0.6684709066305818, + "eval_VitaminC_dot_f1_threshold": 144.8927001953125, + "eval_VitaminC_dot_precision": 0.5061475409836066, + "eval_VitaminC_dot_recall": 0.9840637450199203, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 11.028482437133789, + "eval_VitaminC_euclidean_ap": 0.5544340410314673, + "eval_VitaminC_euclidean_f1": 0.6649006622516557, + "eval_VitaminC_euclidean_f1_threshold": 23.38451385498047, + "eval_VitaminC_euclidean_precision": 0.498015873015873, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.556640625, + "eval_VitaminC_manhattan_accuracy_threshold": 232.38790893554688, + "eval_VitaminC_manhattan_ap": 0.5515569514532939, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 498.126220703125, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 311.380615234375, + "eval_VitaminC_max_ap": 0.5544340410314673, + "eval_VitaminC_max_f1": 0.6684709066305818, + "eval_VitaminC_max_f1_threshold": 498.126220703125, + "eval_VitaminC_max_precision": 0.5061475409836066, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5544340410314673, + "eval_sts-test_pearson_cosine": 0.8803067271464453, + "eval_sts-test_pearson_dot": 0.8698285291814508, + "eval_sts-test_pearson_euclidean": 0.9023937835918766, + "eval_sts-test_pearson_manhattan": 0.9020751259156048, + "eval_sts-test_pearson_max": 0.9023937835918766, + "eval_sts-test_spearman_cosine": 0.9038005474254912, + "eval_sts-test_spearman_dot": 0.8707897794601254, + "eval_sts-test_spearman_euclidean": 0.8989733631129851, + "eval_sts-test_spearman_manhattan": 0.8980189529612906, + "eval_sts-test_spearman_max": 0.9038005474254912, + "eval_vitaminc-pairs_loss": 1.7273772954940796, + "eval_vitaminc-pairs_runtime": 1.8924, + "eval_vitaminc-pairs_samples_per_second": 57.071, + "eval_vitaminc-pairs_steps_per_second": 1.057, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_negation-triplets_loss": 0.9174526929855347, + "eval_negation-triplets_runtime": 0.2972, + "eval_negation-triplets_samples_per_second": 215.314, + "eval_negation-triplets_steps_per_second": 3.364, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_scitail-pairs-pos_loss": 0.07368183881044388, + "eval_scitail-pairs-pos_runtime": 0.379, + "eval_scitail-pairs-pos_samples_per_second": 142.492, + "eval_scitail-pairs-pos_steps_per_second": 2.639, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_scitail-pairs-qa_loss": 0.001584450714290142, + "eval_scitail-pairs-qa_runtime": 0.5178, + "eval_scitail-pairs-qa_samples_per_second": 247.198, + "eval_scitail-pairs-qa_steps_per_second": 3.862, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_xsum-pairs_loss": 0.038235221058130264, + "eval_xsum-pairs_runtime": 2.7268, + "eval_xsum-pairs_samples_per_second": 46.941, + "eval_xsum-pairs_steps_per_second": 0.733, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_sciq_pairs_loss": 0.01538097020238638, + "eval_sciq_pairs_runtime": 2.7808, + "eval_sciq_pairs_samples_per_second": 46.029, + "eval_sciq_pairs_steps_per_second": 0.719, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_qasc_pairs_loss": 0.09078988432884216, + "eval_qasc_pairs_runtime": 0.6473, + "eval_qasc_pairs_samples_per_second": 197.758, + "eval_qasc_pairs_steps_per_second": 3.09, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_openbookqa_pairs_loss": 0.6754768490791321, + "eval_openbookqa_pairs_runtime": 0.573, + "eval_openbookqa_pairs_samples_per_second": 223.397, + "eval_openbookqa_pairs_steps_per_second": 3.491, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_msmarco_pairs_loss": 0.15991328656673431, + "eval_msmarco_pairs_runtime": 1.487, + "eval_msmarco_pairs_samples_per_second": 86.078, + "eval_msmarco_pairs_steps_per_second": 1.345, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_nq_pairs_loss": 0.09591890126466751, + "eval_nq_pairs_runtime": 2.3943, + "eval_nq_pairs_samples_per_second": 53.459, + "eval_nq_pairs_steps_per_second": 0.835, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_trivia_pairs_loss": 0.5305934548377991, + "eval_trivia_pairs_runtime": 3.5752, + "eval_trivia_pairs_samples_per_second": 35.802, + "eval_trivia_pairs_steps_per_second": 0.559, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_gooaq_pairs_loss": 0.29681000113487244, + "eval_gooaq_pairs_runtime": 0.9087, + "eval_gooaq_pairs_samples_per_second": 140.861, + "eval_gooaq_pairs_steps_per_second": 2.201, + "step": 45 + }, + { + "epoch": 0.10112359550561797, + "eval_paws-pos_loss": 0.024501051753759384, + "eval_paws-pos_runtime": 0.6773, + "eval_paws-pos_samples_per_second": 188.996, + "eval_paws-pos_steps_per_second": 2.953, + "step": 45 + }, + { + "epoch": 0.10337078651685393, + "grad_norm": 2.9021923542022705, + "learning_rate": 9.044943820224718e-06, + "loss": 0.3376, + "step": 46 + }, + { + "epoch": 0.10561797752808989, + "grad_norm": 3.179288625717163, + "learning_rate": 9.241573033707863e-06, + "loss": 0.5174, + "step": 47 + }, + { + "epoch": 0.10786516853932585, + "grad_norm": 3.1919493675231934, + "learning_rate": 9.43820224719101e-06, + "loss": 0.8162, + "step": 48 + }, + { + "epoch": 0.1101123595505618, + "grad_norm": 2.8602521419525146, + "learning_rate": 9.634831460674157e-06, + "loss": 0.3545, + "step": 49 + }, + { + "epoch": 0.11235955056179775, + "grad_norm": 2.7570478916168213, + "learning_rate": 9.831460674157303e-06, + "loss": 0.315, + "step": 50 + }, + { + "epoch": 0.1146067415730337, + "grad_norm": 0.8641514778137207, + "learning_rate": 1.0028089887640448e-05, + "loss": 0.0627, + "step": 51 + }, + { + "epoch": 0.11685393258426967, + "grad_norm": 3.9437484741210938, + "learning_rate": 1.0224719101123595e-05, + "loss": 0.8851, + "step": 52 + }, + { + "epoch": 0.11910112359550562, + "grad_norm": 4.144773006439209, + "learning_rate": 1.042134831460674e-05, + "loss": 0.8382, + "step": 53 + }, + { + "epoch": 0.12134831460674157, + "grad_norm": 4.277736186981201, + "learning_rate": 1.0617977528089887e-05, + "loss": 0.733, + "step": 54 + }, + { + "epoch": 0.12359550561797752, + "grad_norm": 4.025904178619385, + "learning_rate": 1.0814606741573032e-05, + "loss": 0.7173, + "step": 55 + }, + { + "epoch": 0.1258426966292135, + "grad_norm": 3.923046827316284, + "learning_rate": 1.1011235955056178e-05, + "loss": 0.7659, + "step": 56 + }, + { + "epoch": 0.12808988764044943, + "grad_norm": 3.2707138061523438, + "learning_rate": 1.1207865168539325e-05, + "loss": 0.793, + "step": 57 + }, + { + "epoch": 0.1303370786516854, + "grad_norm": 3.1660959720611572, + "learning_rate": 1.1404494382022472e-05, + "loss": 0.5426, + "step": 58 + }, + { + "epoch": 0.13258426966292136, + "grad_norm": 4.5236663818359375, + "learning_rate": 1.1601123595505617e-05, + "loss": 0.7641, + "step": 59 + }, + { + "epoch": 0.1348314606741573, + "grad_norm": 0.5771021246910095, + "learning_rate": 1.1797752808988763e-05, + "loss": 0.0657, + "step": 60 + }, + { + "epoch": 0.13707865168539327, + "grad_norm": 3.8541343212127686, + "learning_rate": 1.1994382022471908e-05, + "loss": 0.7836, + "step": 61 + }, + { + "epoch": 0.1393258426966292, + "grad_norm": 4.284148693084717, + "learning_rate": 1.2191011235955055e-05, + "loss": 0.9306, + "step": 62 + }, + { + "epoch": 0.14157303370786517, + "grad_norm": 4.175032615661621, + "learning_rate": 1.23876404494382e-05, + "loss": 0.8673, + "step": 63 + }, + { + "epoch": 0.14382022471910114, + "grad_norm": 5.025452136993408, + "learning_rate": 1.2584269662921347e-05, + "loss": 0.9296, + "step": 64 + }, + { + "epoch": 0.14606741573033707, + "grad_norm": 3.970745086669922, + "learning_rate": 1.2780898876404493e-05, + "loss": 0.8211, + "step": 65 + }, + { + "epoch": 0.14831460674157304, + "grad_norm": 3.150197744369507, + "learning_rate": 1.297752808988764e-05, + "loss": 0.7685, + "step": 66 + }, + { + "epoch": 0.15056179775280898, + "grad_norm": 4.280994415283203, + "learning_rate": 1.3174157303370785e-05, + "loss": 0.7139, + "step": 67 + }, + { + "epoch": 0.15280898876404495, + "grad_norm": 4.288730621337891, + "learning_rate": 1.3370786516853932e-05, + "loss": 0.8241, + "step": 68 + }, + { + "epoch": 0.1550561797752809, + "grad_norm": 3.7402424812316895, + "learning_rate": 1.3567415730337077e-05, + "loss": 0.6256, + "step": 69 + }, + { + "epoch": 0.15730337078651685, + "grad_norm": 4.478890895843506, + "learning_rate": 1.3764044943820223e-05, + "loss": 0.8842, + "step": 70 + }, + { + "epoch": 0.15955056179775282, + "grad_norm": 3.8147876262664795, + "learning_rate": 1.3960674157303368e-05, + "loss": 0.804, + "step": 71 + }, + { + "epoch": 0.16179775280898875, + "grad_norm": 0.7314035296440125, + "learning_rate": 1.4157303370786515e-05, + "loss": 0.0989, + "step": 72 + }, + { + "epoch": 0.16404494382022472, + "grad_norm": 3.074303150177002, + "learning_rate": 1.4353932584269662e-05, + "loss": 0.332, + "step": 73 + }, + { + "epoch": 0.1662921348314607, + "grad_norm": 3.414987325668335, + "learning_rate": 1.4550561797752808e-05, + "loss": 0.5736, + "step": 74 + }, + { + "epoch": 0.16853932584269662, + "grad_norm": 3.7946674823760986, + "learning_rate": 1.4747191011235953e-05, + "loss": 0.8285, + "step": 75 + }, + { + "epoch": 0.1707865168539326, + "grad_norm": 4.310474395751953, + "learning_rate": 1.49438202247191e-05, + "loss": 0.9561, + "step": 76 + }, + { + "epoch": 0.17303370786516853, + "grad_norm": 0.9791378974914551, + "learning_rate": 1.5140449438202245e-05, + "loss": 0.0633, + "step": 77 + }, + { + "epoch": 0.1752808988764045, + "grad_norm": 0.6351795196533203, + "learning_rate": 1.5337078651685393e-05, + "loss": 0.0848, + "step": 78 + }, + { + "epoch": 0.17752808988764046, + "grad_norm": 3.4832303524017334, + "learning_rate": 1.553370786516854e-05, + "loss": 0.8325, + "step": 79 + }, + { + "epoch": 0.1797752808988764, + "grad_norm": 5.115800380706787, + "learning_rate": 1.5730337078651683e-05, + "loss": 1.0011, + "step": 80 + }, + { + "epoch": 0.18202247191011237, + "grad_norm": 3.552396297454834, + "learning_rate": 1.592696629213483e-05, + "loss": 0.8697, + "step": 81 + }, + { + "epoch": 0.1842696629213483, + "grad_norm": 4.491541862487793, + "learning_rate": 1.6123595505617977e-05, + "loss": 0.8344, + "step": 82 + }, + { + "epoch": 0.18651685393258427, + "grad_norm": 4.73278284072876, + "learning_rate": 1.6320224719101122e-05, + "loss": 0.9967, + "step": 83 + }, + { + "epoch": 0.18876404494382024, + "grad_norm": 2.994192123413086, + "learning_rate": 1.6516853932584267e-05, + "loss": 0.4638, + "step": 84 + }, + { + "epoch": 0.19101123595505617, + "grad_norm": 4.142394542694092, + "learning_rate": 1.6713483146067415e-05, + "loss": 0.8994, + "step": 85 + }, + { + "epoch": 0.19325842696629214, + "grad_norm": 4.149839401245117, + "learning_rate": 1.691011235955056e-05, + "loss": 0.7789, + "step": 86 + }, + { + "epoch": 0.19550561797752808, + "grad_norm": 0.45795938372612, + "learning_rate": 1.7106741573033705e-05, + "loss": 0.0555, + "step": 87 + }, + { + "epoch": 0.19775280898876405, + "grad_norm": 3.4293618202209473, + "learning_rate": 1.7303370786516853e-05, + "loss": 0.3778, + "step": 88 + }, + { + "epoch": 0.2, + "grad_norm": 4.041529655456543, + "learning_rate": 1.75e-05, + "loss": 0.708, + "step": 89 + }, + { + "epoch": 0.20224719101123595, + "grad_norm": 0.6160458922386169, + "learning_rate": 1.7696629213483143e-05, + "loss": 0.0689, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.74173903465271, + "eval_VitaminC_cosine_ap": 0.5513770735348443, + "eval_VitaminC_cosine_f1": 0.6675531914893617, + "eval_VitaminC_cosine_f1_threshold": 0.32480987906455994, + "eval_VitaminC_cosine_precision": 0.500998003992016, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.560546875, + "eval_VitaminC_dot_accuracy_threshold": 297.664794921875, + "eval_VitaminC_dot_ap": 0.5340088824099496, + "eval_VitaminC_dot_f1": 0.6666666666666667, + "eval_VitaminC_dot_f1_threshold": 126.67618560791016, + "eval_VitaminC_dot_precision": 0.501002004008016, + "eval_VitaminC_dot_recall": 0.9960159362549801, + "eval_VitaminC_euclidean_accuracy": 0.55859375, + "eval_VitaminC_euclidean_accuracy_threshold": 14.345688819885254, + "eval_VitaminC_euclidean_ap": 0.5542145004976253, + "eval_VitaminC_euclidean_f1": 0.6675531914893617, + "eval_VitaminC_euclidean_f1_threshold": 23.381019592285156, + "eval_VitaminC_euclidean_precision": 0.500998003992016, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.552734375, + "eval_VitaminC_manhattan_accuracy_threshold": 232.7296142578125, + "eval_VitaminC_manhattan_ap": 0.5523953693907266, + "eval_VitaminC_manhattan_f1": 0.6657824933687002, + "eval_VitaminC_manhattan_f1_threshold": 496.4290466308594, + "eval_VitaminC_manhattan_precision": 0.4990059642147117, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.560546875, + "eval_VitaminC_max_accuracy_threshold": 297.664794921875, + "eval_VitaminC_max_ap": 0.5542145004976253, + "eval_VitaminC_max_f1": 0.6675531914893617, + "eval_VitaminC_max_f1_threshold": 496.4290466308594, + "eval_VitaminC_max_precision": 0.501002004008016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5542145004976253, + "eval_sts-test_pearson_cosine": 0.8800782580988616, + "eval_sts-test_pearson_dot": 0.8687642290872662, + "eval_sts-test_pearson_euclidean": 0.9034088230546415, + "eval_sts-test_pearson_manhattan": 0.9030146212284895, + "eval_sts-test_pearson_max": 0.9034088230546415, + "eval_sts-test_spearman_cosine": 0.904560289590133, + "eval_sts-test_spearman_dot": 0.8705944849554133, + "eval_sts-test_spearman_euclidean": 0.8998959103665689, + "eval_sts-test_spearman_manhattan": 0.8995891404697307, + "eval_sts-test_spearman_max": 0.904560289590133, + "eval_vitaminc-pairs_loss": 1.6141985654830933, + "eval_vitaminc-pairs_runtime": 1.864, + "eval_vitaminc-pairs_samples_per_second": 57.94, + "eval_vitaminc-pairs_steps_per_second": 1.073, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_negation-triplets_loss": 0.9220322370529175, + "eval_negation-triplets_runtime": 0.3199, + "eval_negation-triplets_samples_per_second": 200.043, + "eval_negation-triplets_steps_per_second": 3.126, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_scitail-pairs-pos_loss": 0.0654294565320015, + "eval_scitail-pairs-pos_runtime": 0.4625, + "eval_scitail-pairs-pos_samples_per_second": 116.76, + "eval_scitail-pairs-pos_steps_per_second": 2.162, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_scitail-pairs-qa_loss": 0.0015887805493548512, + "eval_scitail-pairs-qa_runtime": 0.5768, + "eval_scitail-pairs-qa_samples_per_second": 221.899, + "eval_scitail-pairs-qa_steps_per_second": 3.467, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_xsum-pairs_loss": 0.03991687670350075, + "eval_xsum-pairs_runtime": 2.7403, + "eval_xsum-pairs_samples_per_second": 46.71, + "eval_xsum-pairs_steps_per_second": 0.73, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_sciq_pairs_loss": 0.01584962010383606, + "eval_sciq_pairs_runtime": 2.8429, + "eval_sciq_pairs_samples_per_second": 45.024, + "eval_sciq_pairs_steps_per_second": 0.703, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_qasc_pairs_loss": 0.09112343192100525, + "eval_qasc_pairs_runtime": 0.6492, + "eval_qasc_pairs_samples_per_second": 197.154, + "eval_qasc_pairs_steps_per_second": 3.081, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_openbookqa_pairs_loss": 0.7132729887962341, + "eval_openbookqa_pairs_runtime": 0.5847, + "eval_openbookqa_pairs_samples_per_second": 218.922, + "eval_openbookqa_pairs_steps_per_second": 3.421, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_msmarco_pairs_loss": 0.15173853933811188, + "eval_msmarco_pairs_runtime": 1.4966, + "eval_msmarco_pairs_samples_per_second": 85.527, + "eval_msmarco_pairs_steps_per_second": 1.336, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_nq_pairs_loss": 0.09653442353010178, + "eval_nq_pairs_runtime": 2.3749, + "eval_nq_pairs_samples_per_second": 53.897, + "eval_nq_pairs_steps_per_second": 0.842, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_trivia_pairs_loss": 0.5191965699195862, + "eval_trivia_pairs_runtime": 3.6006, + "eval_trivia_pairs_samples_per_second": 35.55, + "eval_trivia_pairs_steps_per_second": 0.555, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_gooaq_pairs_loss": 0.30713126063346863, + "eval_gooaq_pairs_runtime": 0.9131, + "eval_gooaq_pairs_samples_per_second": 140.178, + "eval_gooaq_pairs_steps_per_second": 2.19, + "step": 90 + }, + { + "epoch": 0.20224719101123595, + "eval_paws-pos_loss": 0.024471310898661613, + "eval_paws-pos_runtime": 0.6872, + "eval_paws-pos_samples_per_second": 186.254, + "eval_paws-pos_steps_per_second": 2.91, + "step": 90 + }, + { + "epoch": 0.20449438202247192, + "grad_norm": 6.209661483764648, + "learning_rate": 1.7893258426966292e-05, + "loss": 2.3489, + "step": 91 + }, + { + "epoch": 0.20674157303370785, + "grad_norm": 3.1821141242980957, + "learning_rate": 1.8089887640449437e-05, + "loss": 0.741, + "step": 92 + }, + { + "epoch": 0.20898876404494382, + "grad_norm": 3.871994972229004, + "learning_rate": 1.8286516853932585e-05, + "loss": 0.7729, + "step": 93 + }, + { + "epoch": 0.21123595505617979, + "grad_norm": 0.5280765891075134, + "learning_rate": 1.8483146067415727e-05, + "loss": 0.0631, + "step": 94 + }, + { + "epoch": 0.21348314606741572, + "grad_norm": 4.475915431976318, + "learning_rate": 1.8679775280898875e-05, + "loss": 0.9342, + "step": 95 + }, + { + "epoch": 0.2157303370786517, + "grad_norm": 3.949381113052368, + "learning_rate": 1.887640449438202e-05, + "loss": 0.8581, + "step": 96 + }, + { + "epoch": 0.21797752808988763, + "grad_norm": 2.910426616668701, + "learning_rate": 1.907303370786517e-05, + "loss": 0.5198, + "step": 97 + }, + { + "epoch": 0.2202247191011236, + "grad_norm": 4.028941631317139, + "learning_rate": 1.9269662921348313e-05, + "loss": 0.846, + "step": 98 + }, + { + "epoch": 0.22247191011235956, + "grad_norm": 4.183433532714844, + "learning_rate": 1.946629213483146e-05, + "loss": 0.6581, + "step": 99 + }, + { + "epoch": 0.2247191011235955, + "grad_norm": 3.348114252090454, + "learning_rate": 1.9662921348314607e-05, + "loss": 0.3579, + "step": 100 + }, + { + "epoch": 0.22696629213483147, + "grad_norm": 4.055211544036865, + "learning_rate": 1.9859550561797752e-05, + "loss": 0.908, + "step": 101 + }, + { + "epoch": 0.2292134831460674, + "grad_norm": 1.0024710893630981, + "learning_rate": 2.0056179775280897e-05, + "loss": 0.0664, + "step": 102 + }, + { + "epoch": 0.23146067415730337, + "grad_norm": 3.582249641418457, + "learning_rate": 2.0252808988764042e-05, + "loss": 0.5411, + "step": 103 + }, + { + "epoch": 0.23370786516853934, + "grad_norm": 4.226349830627441, + "learning_rate": 2.044943820224719e-05, + "loss": 0.9163, + "step": 104 + }, + { + "epoch": 0.23595505617977527, + "grad_norm": 3.002727508544922, + "learning_rate": 2.0646067415730335e-05, + "loss": 0.7975, + "step": 105 + }, + { + "epoch": 0.23820224719101124, + "grad_norm": 3.5497515201568604, + "learning_rate": 2.084269662921348e-05, + "loss": 0.37, + "step": 106 + }, + { + "epoch": 0.24044943820224718, + "grad_norm": 4.381045341491699, + "learning_rate": 2.103932584269663e-05, + "loss": 0.8495, + "step": 107 + }, + { + "epoch": 0.24269662921348314, + "grad_norm": 3.926840305328369, + "learning_rate": 2.1235955056179773e-05, + "loss": 0.8073, + "step": 108 + }, + { + "epoch": 0.2449438202247191, + "grad_norm": 3.0835390090942383, + "learning_rate": 2.1432584269662922e-05, + "loss": 0.7563, + "step": 109 + }, + { + "epoch": 0.24719101123595505, + "grad_norm": 4.230669975280762, + "learning_rate": 2.1629213483146063e-05, + "loss": 0.6585, + "step": 110 + }, + { + "epoch": 0.24943820224719102, + "grad_norm": 2.8849070072174072, + "learning_rate": 2.1825842696629212e-05, + "loss": 0.3246, + "step": 111 + }, + { + "epoch": 0.251685393258427, + "grad_norm": 4.796951770782471, + "learning_rate": 2.2022471910112357e-05, + "loss": 0.9718, + "step": 112 + }, + { + "epoch": 0.2539325842696629, + "grad_norm": 4.60318660736084, + "learning_rate": 2.2219101123595505e-05, + "loss": 0.8584, + "step": 113 + }, + { + "epoch": 0.25617977528089886, + "grad_norm": 3.098703384399414, + "learning_rate": 2.241573033707865e-05, + "loss": 0.3385, + "step": 114 + }, + { + "epoch": 0.25842696629213485, + "grad_norm": 2.9519224166870117, + "learning_rate": 2.2612359550561795e-05, + "loss": 0.323, + "step": 115 + }, + { + "epoch": 0.2606741573033708, + "grad_norm": 2.913742780685425, + "learning_rate": 2.2808988764044944e-05, + "loss": 0.3359, + "step": 116 + }, + { + "epoch": 0.26292134831460673, + "grad_norm": 4.148440837860107, + "learning_rate": 2.300561797752809e-05, + "loss": 0.6955, + "step": 117 + }, + { + "epoch": 0.2651685393258427, + "grad_norm": 0.8463248610496521, + "learning_rate": 2.3202247191011234e-05, + "loss": 0.0539, + "step": 118 + }, + { + "epoch": 0.26741573033707866, + "grad_norm": 0.7284589409828186, + "learning_rate": 2.339887640449438e-05, + "loss": 0.0507, + "step": 119 + }, + { + "epoch": 0.2696629213483146, + "grad_norm": 3.615086317062378, + "learning_rate": 2.3595505617977527e-05, + "loss": 0.314, + "step": 120 + }, + { + "epoch": 0.27191011235955054, + "grad_norm": 5.229820728302002, + "learning_rate": 2.3792134831460672e-05, + "loss": 1.0339, + "step": 121 + }, + { + "epoch": 0.27415730337078653, + "grad_norm": 3.6847782135009766, + "learning_rate": 2.3988764044943817e-05, + "loss": 0.3158, + "step": 122 + }, + { + "epoch": 0.27640449438202247, + "grad_norm": 4.280517578125, + "learning_rate": 2.4185393258426965e-05, + "loss": 0.7809, + "step": 123 + }, + { + "epoch": 0.2786516853932584, + "grad_norm": 4.476150035858154, + "learning_rate": 2.438202247191011e-05, + "loss": 0.9516, + "step": 124 + }, + { + "epoch": 0.2808988764044944, + "grad_norm": 2.7380239963531494, + "learning_rate": 2.457865168539326e-05, + "loss": 0.3117, + "step": 125 + }, + { + "epoch": 0.28314606741573034, + "grad_norm": 3.9667162895202637, + "learning_rate": 2.47752808988764e-05, + "loss": 0.8366, + "step": 126 + }, + { + "epoch": 0.2853932584269663, + "grad_norm": 4.552999019622803, + "learning_rate": 2.497191011235955e-05, + "loss": 0.8033, + "step": 127 + }, + { + "epoch": 0.2876404494382023, + "grad_norm": 3.4238576889038086, + "learning_rate": 2.5168539325842694e-05, + "loss": 0.7253, + "step": 128 + }, + { + "epoch": 0.2898876404494382, + "grad_norm": 4.677807331085205, + "learning_rate": 2.5365168539325842e-05, + "loss": 0.8345, + "step": 129 + }, + { + "epoch": 0.29213483146067415, + "grad_norm": 4.282113075256348, + "learning_rate": 2.5561797752808987e-05, + "loss": 0.7532, + "step": 130 + }, + { + "epoch": 0.2943820224719101, + "grad_norm": 4.375221252441406, + "learning_rate": 2.5758426966292132e-05, + "loss": 0.8247, + "step": 131 + }, + { + "epoch": 0.2966292134831461, + "grad_norm": 3.2591633796691895, + "learning_rate": 2.595505617977528e-05, + "loss": 0.5175, + "step": 132 + }, + { + "epoch": 0.298876404494382, + "grad_norm": 4.146636962890625, + "learning_rate": 2.6151685393258425e-05, + "loss": 0.7813, + "step": 133 + }, + { + "epoch": 0.30112359550561796, + "grad_norm": 4.2413249015808105, + "learning_rate": 2.634831460674157e-05, + "loss": 0.6582, + "step": 134 + }, + { + "epoch": 0.30337078651685395, + "grad_norm": 4.541455268859863, + "learning_rate": 2.6544943820224715e-05, + "loss": 0.3484, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_VitaminC_cosine_accuracy": 0.560546875, + "eval_VitaminC_cosine_accuracy_threshold": 0.7956135272979736, + "eval_VitaminC_cosine_ap": 0.5505565383154402, + "eval_VitaminC_cosine_f1": 0.6684709066305818, + "eval_VitaminC_cosine_f1_threshold": 0.40466147661209106, + "eval_VitaminC_cosine_precision": 0.5061475409836066, + "eval_VitaminC_cosine_recall": 0.9840637450199203, + "eval_VitaminC_dot_accuracy": 0.5546875, + "eval_VitaminC_dot_accuracy_threshold": 312.2774658203125, + "eval_VitaminC_dot_ap": 0.5365135091766033, + "eval_VitaminC_dot_f1": 0.6684856753069577, + "eval_VitaminC_dot_f1_threshold": 157.33203125, + "eval_VitaminC_dot_precision": 0.508298755186722, + "eval_VitaminC_dot_recall": 0.9760956175298805, + "eval_VitaminC_euclidean_accuracy": 0.5546875, + "eval_VitaminC_euclidean_accuracy_threshold": 12.184114456176758, + "eval_VitaminC_euclidean_ap": 0.5517706579195627, + "eval_VitaminC_euclidean_f1": 0.6649006622516557, + "eval_VitaminC_euclidean_f1_threshold": 23.68879508972168, + "eval_VitaminC_euclidean_precision": 0.498015873015873, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.5546875, + "eval_VitaminC_manhattan_accuracy_threshold": 241.22061157226562, + "eval_VitaminC_manhattan_ap": 0.5494156168773414, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 510.2530212402344, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.560546875, + "eval_VitaminC_max_accuracy_threshold": 312.2774658203125, + "eval_VitaminC_max_ap": 0.5517706579195627, + "eval_VitaminC_max_f1": 0.6684856753069577, + "eval_VitaminC_max_f1_threshold": 510.2530212402344, + "eval_VitaminC_max_precision": 0.508298755186722, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5517706579195627, + "eval_sts-test_pearson_cosine": 0.8812438499723412, + "eval_sts-test_pearson_dot": 0.8695651753004092, + "eval_sts-test_pearson_euclidean": 0.9036940037118162, + "eval_sts-test_pearson_manhattan": 0.9035516699922166, + "eval_sts-test_pearson_max": 0.9036940037118162, + "eval_sts-test_spearman_cosine": 0.9049742835092648, + "eval_sts-test_spearman_dot": 0.8707925987895928, + "eval_sts-test_spearman_euclidean": 0.9003956924537878, + "eval_sts-test_spearman_manhattan": 0.9002747745455083, + "eval_sts-test_spearman_max": 0.9049742835092648, + "eval_vitaminc-pairs_loss": 1.5520410537719727, + "eval_vitaminc-pairs_runtime": 1.8323, + "eval_vitaminc-pairs_samples_per_second": 58.943, + "eval_vitaminc-pairs_steps_per_second": 1.092, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_negation-triplets_loss": 0.9211694002151489, + "eval_negation-triplets_runtime": 0.2923, + "eval_negation-triplets_samples_per_second": 218.93, + "eval_negation-triplets_steps_per_second": 3.421, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_scitail-pairs-pos_loss": 0.07377135753631592, + "eval_scitail-pairs-pos_runtime": 0.3681, + "eval_scitail-pairs-pos_samples_per_second": 146.691, + "eval_scitail-pairs-pos_steps_per_second": 2.716, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_scitail-pairs-qa_loss": 0.00150959100574255, + "eval_scitail-pairs-qa_runtime": 0.5123, + "eval_scitail-pairs-qa_samples_per_second": 249.842, + "eval_scitail-pairs-qa_steps_per_second": 3.904, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_xsum-pairs_loss": 0.036599572747945786, + "eval_xsum-pairs_runtime": 2.7238, + "eval_xsum-pairs_samples_per_second": 46.994, + "eval_xsum-pairs_steps_per_second": 0.734, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_sciq_pairs_loss": 0.01615014858543873, + "eval_sciq_pairs_runtime": 2.8064, + "eval_sciq_pairs_samples_per_second": 45.61, + "eval_sciq_pairs_steps_per_second": 0.713, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_qasc_pairs_loss": 0.09235507994890213, + "eval_qasc_pairs_runtime": 0.6488, + "eval_qasc_pairs_samples_per_second": 197.276, + "eval_qasc_pairs_steps_per_second": 3.082, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_openbookqa_pairs_loss": 0.6891775727272034, + "eval_openbookqa_pairs_runtime": 0.5698, + "eval_openbookqa_pairs_samples_per_second": 224.641, + "eval_openbookqa_pairs_steps_per_second": 3.51, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_msmarco_pairs_loss": 0.16766037046909332, + "eval_msmarco_pairs_runtime": 1.4798, + "eval_msmarco_pairs_samples_per_second": 86.499, + "eval_msmarco_pairs_steps_per_second": 1.352, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_nq_pairs_loss": 0.09737721085548401, + "eval_nq_pairs_runtime": 2.3409, + "eval_nq_pairs_samples_per_second": 54.68, + "eval_nq_pairs_steps_per_second": 0.854, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_trivia_pairs_loss": 0.5458433032035828, + "eval_trivia_pairs_runtime": 3.5771, + "eval_trivia_pairs_samples_per_second": 35.783, + "eval_trivia_pairs_steps_per_second": 0.559, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_gooaq_pairs_loss": 0.3082329332828522, + "eval_gooaq_pairs_runtime": 0.9181, + "eval_gooaq_pairs_samples_per_second": 139.413, + "eval_gooaq_pairs_steps_per_second": 2.178, + "step": 135 + }, + { + "epoch": 0.30337078651685395, + "eval_paws-pos_loss": 0.02423396334052086, + "eval_paws-pos_runtime": 0.6827, + "eval_paws-pos_samples_per_second": 187.501, + "eval_paws-pos_steps_per_second": 2.93, + "step": 135 + }, + { + "epoch": 0.3056179775280899, + "grad_norm": 4.549901485443115, + "learning_rate": 2.6741573033707864e-05, + "loss": 0.7648, + "step": 136 + }, + { + "epoch": 0.30786516853932583, + "grad_norm": 3.225851535797119, + "learning_rate": 2.693820224719101e-05, + "loss": 0.7554, + "step": 137 + }, + { + "epoch": 0.3101123595505618, + "grad_norm": 0.6228423118591309, + "learning_rate": 2.7134831460674154e-05, + "loss": 0.0753, + "step": 138 + }, + { + "epoch": 0.31235955056179776, + "grad_norm": 3.12802791595459, + "learning_rate": 2.7331460674157302e-05, + "loss": 0.4987, + "step": 139 + }, + { + "epoch": 0.3146067415730337, + "grad_norm": 4.1997880935668945, + "learning_rate": 2.7528089887640447e-05, + "loss": 0.8543, + "step": 140 + }, + { + "epoch": 0.31685393258426964, + "grad_norm": 4.3362860679626465, + "learning_rate": 2.7724719101123595e-05, + "loss": 0.9425, + "step": 141 + }, + { + "epoch": 0.31910112359550563, + "grad_norm": 0.5599316954612732, + "learning_rate": 2.7921348314606737e-05, + "loss": 0.0472, + "step": 142 + }, + { + "epoch": 0.32134831460674157, + "grad_norm": 3.503603458404541, + "learning_rate": 2.8117977528089885e-05, + "loss": 0.848, + "step": 143 + }, + { + "epoch": 0.3235955056179775, + "grad_norm": 4.712310314178467, + "learning_rate": 2.831460674157303e-05, + "loss": 0.8946, + "step": 144 + }, + { + "epoch": 0.3258426966292135, + "grad_norm": 3.1823527812957764, + "learning_rate": 2.851123595505618e-05, + "loss": 0.7841, + "step": 145 + }, + { + "epoch": 0.32808988764044944, + "grad_norm": 4.423196315765381, + "learning_rate": 2.8707865168539324e-05, + "loss": 0.6653, + "step": 146 + }, + { + "epoch": 0.3303370786516854, + "grad_norm": 4.137822151184082, + "learning_rate": 2.890449438202247e-05, + "loss": 0.3522, + "step": 147 + }, + { + "epoch": 0.3325842696629214, + "grad_norm": 2.997777223587036, + "learning_rate": 2.9101123595505617e-05, + "loss": 0.4853, + "step": 148 + }, + { + "epoch": 0.3348314606741573, + "grad_norm": 2.89650559425354, + "learning_rate": 2.9297752808988762e-05, + "loss": 0.4726, + "step": 149 + }, + { + "epoch": 0.33707865168539325, + "grad_norm": 5.486624717712402, + "learning_rate": 2.9494382022471907e-05, + "loss": 0.8693, + "step": 150 + }, + { + "epoch": 0.3393258426966292, + "grad_norm": 4.800889015197754, + "learning_rate": 2.9691011235955052e-05, + "loss": 0.8124, + "step": 151 + }, + { + "epoch": 0.3415730337078652, + "grad_norm": 4.188066005706787, + "learning_rate": 2.98876404494382e-05, + "loss": 0.8206, + "step": 152 + }, + { + "epoch": 0.3438202247191011, + "grad_norm": 4.340461254119873, + "learning_rate": 3.0084269662921345e-05, + "loss": 0.9406, + "step": 153 + }, + { + "epoch": 0.34606741573033706, + "grad_norm": 4.658304214477539, + "learning_rate": 3.028089887640449e-05, + "loss": 0.7944, + "step": 154 + }, + { + "epoch": 0.34831460674157305, + "grad_norm": 0.6266987919807434, + "learning_rate": 3.047752808988764e-05, + "loss": 0.0766, + "step": 155 + }, + { + "epoch": 0.350561797752809, + "grad_norm": 4.252346515655518, + "learning_rate": 3.067415730337079e-05, + "loss": 0.8609, + "step": 156 + }, + { + "epoch": 0.35280898876404493, + "grad_norm": 4.9649658203125, + "learning_rate": 3.087078651685393e-05, + "loss": 1.0533, + "step": 157 + }, + { + "epoch": 0.3550561797752809, + "grad_norm": 4.485607624053955, + "learning_rate": 3.106741573033708e-05, + "loss": 0.8396, + "step": 158 + }, + { + "epoch": 0.35730337078651686, + "grad_norm": 3.241231918334961, + "learning_rate": 3.126404494382022e-05, + "loss": 0.7865, + "step": 159 + }, + { + "epoch": 0.3595505617977528, + "grad_norm": 6.846582889556885, + "learning_rate": 3.146067415730337e-05, + "loss": 2.4616, + "step": 160 + }, + { + "epoch": 0.36179775280898874, + "grad_norm": 0.5514687895774841, + "learning_rate": 3.165730337078651e-05, + "loss": 0.0556, + "step": 161 + }, + { + "epoch": 0.36404494382022473, + "grad_norm": 3.7877562046051025, + "learning_rate": 3.185393258426966e-05, + "loss": 0.3758, + "step": 162 + }, + { + "epoch": 0.36629213483146067, + "grad_norm": 5.397939682006836, + "learning_rate": 3.205056179775281e-05, + "loss": 0.9312, + "step": 163 + }, + { + "epoch": 0.3685393258426966, + "grad_norm": 4.301459312438965, + "learning_rate": 3.2247191011235954e-05, + "loss": 0.7993, + "step": 164 + }, + { + "epoch": 0.3707865168539326, + "grad_norm": 4.49428129196167, + "learning_rate": 3.24438202247191e-05, + "loss": 0.8104, + "step": 165 + }, + { + "epoch": 0.37303370786516854, + "grad_norm": 3.2210912704467773, + "learning_rate": 3.2640449438202244e-05, + "loss": 0.8199, + "step": 166 + }, + { + "epoch": 0.3752808988764045, + "grad_norm": 5.359859466552734, + "learning_rate": 3.283707865168539e-05, + "loss": 1.0724, + "step": 167 + }, + { + "epoch": 0.3775280898876405, + "grad_norm": 4.00059700012207, + "learning_rate": 3.3033707865168534e-05, + "loss": 0.3521, + "step": 168 + }, + { + "epoch": 0.3797752808988764, + "grad_norm": 4.418768882751465, + "learning_rate": 3.3230337078651685e-05, + "loss": 0.8536, + "step": 169 + }, + { + "epoch": 0.38202247191011235, + "grad_norm": 4.15454626083374, + "learning_rate": 3.342696629213483e-05, + "loss": 0.872, + "step": 170 + }, + { + "epoch": 0.3842696629213483, + "grad_norm": 3.8060054779052734, + "learning_rate": 3.3623595505617975e-05, + "loss": 0.8009, + "step": 171 + }, + { + "epoch": 0.3865168539325843, + "grad_norm": 3.584745407104492, + "learning_rate": 3.382022471910112e-05, + "loss": 0.7798, + "step": 172 + }, + { + "epoch": 0.3887640449438202, + "grad_norm": 4.861410140991211, + "learning_rate": 3.4016853932584265e-05, + "loss": 0.5953, + "step": 173 + }, + { + "epoch": 0.39101123595505616, + "grad_norm": 3.983793020248413, + "learning_rate": 3.421348314606741e-05, + "loss": 0.7562, + "step": 174 + }, + { + "epoch": 0.39325842696629215, + "grad_norm": 4.841738224029541, + "learning_rate": 3.4410112359550555e-05, + "loss": 0.7227, + "step": 175 + }, + { + "epoch": 0.3955056179775281, + "grad_norm": 4.787370204925537, + "learning_rate": 3.460674157303371e-05, + "loss": 0.8953, + "step": 176 + }, + { + "epoch": 0.39775280898876403, + "grad_norm": 4.337812900543213, + "learning_rate": 3.480337078651685e-05, + "loss": 0.7102, + "step": 177 + }, + { + "epoch": 0.4, + "grad_norm": 0.9599294662475586, + "learning_rate": 3.5e-05, + "loss": 0.0667, + "step": 178 + }, + { + "epoch": 0.40224719101123596, + "grad_norm": 0.6864398717880249, + "learning_rate": 3.4999863718440846e-05, + "loss": 0.0528, + "step": 179 + }, + { + "epoch": 0.4044943820224719, + "grad_norm": 4.738316059112549, + "learning_rate": 3.499945487641664e-05, + "loss": 0.7312, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.8256886005401611, + "eval_VitaminC_cosine_ap": 0.5557251062538118, + "eval_VitaminC_cosine_f1": 0.6666666666666667, + "eval_VitaminC_cosine_f1_threshold": 0.4391498863697052, + "eval_VitaminC_cosine_precision": 0.5051334702258727, + "eval_VitaminC_cosine_recall": 0.9800796812749004, + "eval_VitaminC_dot_accuracy": 0.556640625, + "eval_VitaminC_dot_accuracy_threshold": 314.2790832519531, + "eval_VitaminC_dot_ap": 0.5397120960874565, + "eval_VitaminC_dot_f1": 0.6684636118598383, + "eval_VitaminC_dot_f1_threshold": 144.02464294433594, + "eval_VitaminC_dot_precision": 0.505091649694501, + "eval_VitaminC_dot_recall": 0.9880478087649402, + "eval_VitaminC_euclidean_accuracy": 0.560546875, + "eval_VitaminC_euclidean_accuracy_threshold": 13.859346389770508, + "eval_VitaminC_euclidean_ap": 0.5582755831276058, + "eval_VitaminC_euclidean_f1": 0.667605633802817, + "eval_VitaminC_euclidean_f1_threshold": 18.874879837036133, + "eval_VitaminC_euclidean_precision": 0.5163398692810458, + "eval_VitaminC_euclidean_recall": 0.9442231075697212, + "eval_VitaminC_manhattan_accuracy": 0.560546875, + "eval_VitaminC_manhattan_accuracy_threshold": 239.6153564453125, + "eval_VitaminC_manhattan_ap": 0.5569115785564898, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 501.158447265625, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.560546875, + "eval_VitaminC_max_accuracy_threshold": 314.2790832519531, + "eval_VitaminC_max_ap": 0.5582755831276058, + "eval_VitaminC_max_f1": 0.6684636118598383, + "eval_VitaminC_max_f1_threshold": 501.158447265625, + "eval_VitaminC_max_precision": 0.5163398692810458, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5582755831276058, + "eval_sts-test_pearson_cosine": 0.8825432226222443, + "eval_sts-test_pearson_dot": 0.8720125241659442, + "eval_sts-test_pearson_euclidean": 0.9053801707227738, + "eval_sts-test_pearson_manhattan": 0.9060044572091359, + "eval_sts-test_pearson_max": 0.9060044572091359, + "eval_sts-test_spearman_cosine": 0.9055030196626042, + "eval_sts-test_spearman_dot": 0.8729395405548455, + "eval_sts-test_spearman_euclidean": 0.9013990604854444, + "eval_sts-test_spearman_manhattan": 0.9021052353902007, + "eval_sts-test_spearman_max": 0.9055030196626042, + "eval_vitaminc-pairs_loss": 1.5215541124343872, + "eval_vitaminc-pairs_runtime": 1.8745, + "eval_vitaminc-pairs_samples_per_second": 57.614, + "eval_vitaminc-pairs_steps_per_second": 1.067, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_negation-triplets_loss": 0.9813100099563599, + "eval_negation-triplets_runtime": 0.3009, + "eval_negation-triplets_samples_per_second": 212.73, + "eval_negation-triplets_steps_per_second": 3.324, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_scitail-pairs-pos_loss": 0.09161412715911865, + "eval_scitail-pairs-pos_runtime": 0.3936, + "eval_scitail-pairs-pos_samples_per_second": 137.188, + "eval_scitail-pairs-pos_steps_per_second": 2.541, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_scitail-pairs-qa_loss": 0.0013133077882230282, + "eval_scitail-pairs-qa_runtime": 0.5286, + "eval_scitail-pairs-qa_samples_per_second": 242.147, + "eval_scitail-pairs-qa_steps_per_second": 3.784, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_xsum-pairs_loss": 0.049595557153224945, + "eval_xsum-pairs_runtime": 2.7447, + "eval_xsum-pairs_samples_per_second": 46.636, + "eval_xsum-pairs_steps_per_second": 0.729, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_sciq_pairs_loss": 0.017273178324103355, + "eval_sciq_pairs_runtime": 2.8401, + "eval_sciq_pairs_samples_per_second": 45.069, + "eval_sciq_pairs_steps_per_second": 0.704, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_qasc_pairs_loss": 0.09485691040754318, + "eval_qasc_pairs_runtime": 0.6594, + "eval_qasc_pairs_samples_per_second": 194.113, + "eval_qasc_pairs_steps_per_second": 3.033, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_openbookqa_pairs_loss": 0.7253161072731018, + "eval_openbookqa_pairs_runtime": 0.5801, + "eval_openbookqa_pairs_samples_per_second": 220.633, + "eval_openbookqa_pairs_steps_per_second": 3.447, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_msmarco_pairs_loss": 0.17383378744125366, + "eval_msmarco_pairs_runtime": 1.4824, + "eval_msmarco_pairs_samples_per_second": 86.346, + "eval_msmarco_pairs_steps_per_second": 1.349, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_nq_pairs_loss": 0.10324681550264359, + "eval_nq_pairs_runtime": 2.3542, + "eval_nq_pairs_samples_per_second": 54.372, + "eval_nq_pairs_steps_per_second": 0.85, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_trivia_pairs_loss": 0.5358972549438477, + "eval_trivia_pairs_runtime": 3.5881, + "eval_trivia_pairs_samples_per_second": 35.673, + "eval_trivia_pairs_steps_per_second": 0.557, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_gooaq_pairs_loss": 0.3070329427719116, + "eval_gooaq_pairs_runtime": 0.9009, + "eval_gooaq_pairs_samples_per_second": 142.079, + "eval_gooaq_pairs_steps_per_second": 2.22, + "step": 180 + }, + { + "epoch": 0.4044943820224719, + "eval_paws-pos_loss": 0.024055125191807747, + "eval_paws-pos_runtime": 0.6792, + "eval_paws-pos_samples_per_second": 188.469, + "eval_paws-pos_steps_per_second": 2.945, + "step": 180 + }, + { + "epoch": 0.4067415730337079, + "grad_norm": 5.063413143157959, + "learning_rate": 3.4998773481887046e-05, + "loss": 0.7809, + "step": 181 + }, + { + "epoch": 0.40898876404494383, + "grad_norm": 4.108719825744629, + "learning_rate": 3.499781954811798e-05, + "loss": 0.8333, + "step": 182 + }, + { + "epoch": 0.41123595505617977, + "grad_norm": 4.6362104415893555, + "learning_rate": 3.499659309368139e-05, + "loss": 0.9283, + "step": 183 + }, + { + "epoch": 0.4134831460674157, + "grad_norm": 4.432968616485596, + "learning_rate": 3.499509414245486e-05, + "loss": 0.7011, + "step": 184 + }, + { + "epoch": 0.4157303370786517, + "grad_norm": 4.040768623352051, + "learning_rate": 3.4993322723621164e-05, + "loss": 0.8413, + "step": 185 + }, + { + "epoch": 0.41797752808988764, + "grad_norm": 5.797406196594238, + "learning_rate": 3.499127887166769e-05, + "loss": 1.1679, + "step": 186 + }, + { + "epoch": 0.4202247191011236, + "grad_norm": 4.275143623352051, + "learning_rate": 3.498896262638578e-05, + "loss": 0.8701, + "step": 187 + }, + { + "epoch": 0.42247191011235957, + "grad_norm": 3.920672655105591, + "learning_rate": 3.498637403286993e-05, + "loss": 0.8139, + "step": 188 + }, + { + "epoch": 0.4247191011235955, + "grad_norm": 4.049210071563721, + "learning_rate": 3.498351314151693e-05, + "loss": 0.664, + "step": 189 + }, + { + "epoch": 0.42696629213483145, + "grad_norm": 4.007586479187012, + "learning_rate": 3.498038000802489e-05, + "loss": 0.3835, + "step": 190 + }, + { + "epoch": 0.42921348314606744, + "grad_norm": 3.7303507328033447, + "learning_rate": 3.497697469339215e-05, + "loss": 0.8516, + "step": 191 + }, + { + "epoch": 0.4314606741573034, + "grad_norm": 2.96820330619812, + "learning_rate": 3.497329726391606e-05, + "loss": 0.5479, + "step": 192 + }, + { + "epoch": 0.4337078651685393, + "grad_norm": 5.242271423339844, + "learning_rate": 3.496934779119175e-05, + "loss": 0.8642, + "step": 193 + }, + { + "epoch": 0.43595505617977526, + "grad_norm": 2.740006685256958, + "learning_rate": 3.496512635211069e-05, + "loss": 0.3121, + "step": 194 + }, + { + "epoch": 0.43820224719101125, + "grad_norm": 4.162242889404297, + "learning_rate": 3.496063302885921e-05, + "loss": 0.6932, + "step": 195 + }, + { + "epoch": 0.4404494382022472, + "grad_norm": 0.632938027381897, + "learning_rate": 3.495586790891689e-05, + "loss": 0.0647, + "step": 196 + }, + { + "epoch": 0.44269662921348313, + "grad_norm": 4.595058917999268, + "learning_rate": 3.495083108505487e-05, + "loss": 0.8173, + "step": 197 + }, + { + "epoch": 0.4449438202247191, + "grad_norm": 3.102372646331787, + "learning_rate": 3.494552265533404e-05, + "loss": 0.3122, + "step": 198 + }, + { + "epoch": 0.44719101123595506, + "grad_norm": 4.9895830154418945, + "learning_rate": 3.493994272310313e-05, + "loss": 0.7852, + "step": 199 + }, + { + "epoch": 0.449438202247191, + "grad_norm": 4.032258987426758, + "learning_rate": 3.493409139699669e-05, + "loss": 0.811, + "step": 200 + }, + { + "epoch": 0.451685393258427, + "grad_norm": 4.17324161529541, + "learning_rate": 3.4927968790932973e-05, + "loss": 0.7564, + "step": 201 + }, + { + "epoch": 0.45393258426966293, + "grad_norm": 0.49707159399986267, + "learning_rate": 3.492157502411174e-05, + "loss": 0.0541, + "step": 202 + }, + { + "epoch": 0.45617977528089887, + "grad_norm": 3.847059965133667, + "learning_rate": 3.491491022101194e-05, + "loss": 0.9085, + "step": 203 + }, + { + "epoch": 0.4584269662921348, + "grad_norm": 4.565647602081299, + "learning_rate": 3.4907974511389224e-05, + "loss": 0.8416, + "step": 204 + }, + { + "epoch": 0.4606741573033708, + "grad_norm": 0.8872150778770447, + "learning_rate": 3.4900768030273515e-05, + "loss": 0.0569, + "step": 205 + }, + { + "epoch": 0.46292134831460674, + "grad_norm": 3.2797999382019043, + "learning_rate": 3.4893290917966305e-05, + "loss": 0.7998, + "step": 206 + }, + { + "epoch": 0.4651685393258427, + "grad_norm": 5.683195114135742, + "learning_rate": 3.4885543320037956e-05, + "loss": 0.7218, + "step": 207 + }, + { + "epoch": 0.46741573033707867, + "grad_norm": 5.348382949829102, + "learning_rate": 3.4877525387324844e-05, + "loss": 0.9292, + "step": 208 + }, + { + "epoch": 0.4696629213483146, + "grad_norm": 4.3047099113464355, + "learning_rate": 3.486923727592647e-05, + "loss": 0.8279, + "step": 209 + }, + { + "epoch": 0.47191011235955055, + "grad_norm": 4.425166130065918, + "learning_rate": 3.486067914720236e-05, + "loss": 0.8452, + "step": 210 + }, + { + "epoch": 0.47415730337078654, + "grad_norm": 5.7947916984558105, + "learning_rate": 3.485185116776896e-05, + "loss": 1.1099, + "step": 211 + }, + { + "epoch": 0.4764044943820225, + "grad_norm": 4.257087230682373, + "learning_rate": 3.4842753509496385e-05, + "loss": 0.9436, + "step": 212 + }, + { + "epoch": 0.4786516853932584, + "grad_norm": 4.357375144958496, + "learning_rate": 3.483338634950507e-05, + "loss": 0.8389, + "step": 213 + }, + { + "epoch": 0.48089887640449436, + "grad_norm": 3.666268825531006, + "learning_rate": 3.482374987016233e-05, + "loss": 0.3297, + "step": 214 + }, + { + "epoch": 0.48314606741573035, + "grad_norm": 3.0593607425689697, + "learning_rate": 3.481384425907879e-05, + "loss": 0.8098, + "step": 215 + }, + { + "epoch": 0.4853932584269663, + "grad_norm": 0.4539957344532013, + "learning_rate": 3.480366970910476e-05, + "loss": 0.0386, + "step": 216 + }, + { + "epoch": 0.48764044943820223, + "grad_norm": 3.3102784156799316, + "learning_rate": 3.479322641832646e-05, + "loss": 0.7752, + "step": 217 + }, + { + "epoch": 0.4898876404494382, + "grad_norm": 3.8798298835754395, + "learning_rate": 3.4782514590062165e-05, + "loss": 0.8071, + "step": 218 + }, + { + "epoch": 0.49213483146067416, + "grad_norm": 6.300197124481201, + "learning_rate": 3.4771534432858255e-05, + "loss": 2.571, + "step": 219 + }, + { + "epoch": 0.4943820224719101, + "grad_norm": 4.163381099700928, + "learning_rate": 3.4760286160485145e-05, + "loss": 0.5912, + "step": 220 + }, + { + "epoch": 0.4966292134831461, + "grad_norm": 3.5834686756134033, + "learning_rate": 3.474876999193314e-05, + "loss": 0.3792, + "step": 221 + }, + { + "epoch": 0.49887640449438203, + "grad_norm": 4.494593143463135, + "learning_rate": 3.473698615140816e-05, + "loss": 0.7456, + "step": 222 + }, + { + "epoch": 0.501123595505618, + "grad_norm": 3.909142017364502, + "learning_rate": 3.4724934868327366e-05, + "loss": 0.7207, + "step": 223 + }, + { + "epoch": 0.503370786516854, + "grad_norm": 3.0387282371520996, + "learning_rate": 3.47126163773147e-05, + "loss": 0.3254, + "step": 224 + }, + { + "epoch": 0.5056179775280899, + "grad_norm": 0.6529088616371155, + "learning_rate": 3.4700030918196344e-05, + "loss": 0.0461, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.8303268551826477, + "eval_VitaminC_cosine_ap": 0.5509523400010791, + "eval_VitaminC_cosine_f1": 0.6657824933687002, + "eval_VitaminC_cosine_f1_threshold": 0.2634955048561096, + "eval_VitaminC_cosine_precision": 0.4990059642147117, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.55078125, + "eval_VitaminC_dot_accuracy_threshold": 281.011474609375, + "eval_VitaminC_dot_ap": 0.5281394234221073, + "eval_VitaminC_dot_f1": 0.6711772665764546, + "eval_VitaminC_dot_f1_threshold": 141.11529541015625, + "eval_VitaminC_dot_precision": 0.5081967213114754, + "eval_VitaminC_dot_recall": 0.9880478087649402, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 11.405111312866211, + "eval_VitaminC_euclidean_ap": 0.5573376843815556, + "eval_VitaminC_euclidean_f1": 0.6640211640211641, + "eval_VitaminC_euclidean_f1_threshold": 24.63976287841797, + "eval_VitaminC_euclidean_precision": 0.497029702970297, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.556640625, + "eval_VitaminC_manhattan_accuracy_threshold": 349.33441162109375, + "eval_VitaminC_manhattan_ap": 0.5561637270496671, + "eval_VitaminC_manhattan_f1": 0.6657824933687002, + "eval_VitaminC_manhattan_f1_threshold": 505.0340270996094, + "eval_VitaminC_manhattan_precision": 0.4990059642147117, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.556640625, + "eval_VitaminC_max_accuracy_threshold": 349.33441162109375, + "eval_VitaminC_max_ap": 0.5573376843815556, + "eval_VitaminC_max_f1": 0.6711772665764546, + "eval_VitaminC_max_f1_threshold": 505.0340270996094, + "eval_VitaminC_max_precision": 0.5081967213114754, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5573376843815556, + "eval_sts-test_pearson_cosine": 0.8848200869109313, + "eval_sts-test_pearson_dot": 0.8723563516714744, + "eval_sts-test_pearson_euclidean": 0.9070688973489409, + "eval_sts-test_pearson_manhattan": 0.9073961699007848, + "eval_sts-test_pearson_max": 0.9073961699007848, + "eval_sts-test_spearman_cosine": 0.9050875937031079, + "eval_sts-test_spearman_dot": 0.8699468894518183, + "eval_sts-test_spearman_euclidean": 0.9020747597811932, + "eval_sts-test_spearman_manhattan": 0.9019608230696907, + "eval_sts-test_spearman_max": 0.9050875937031079, + "eval_vitaminc-pairs_loss": 1.4897230863571167, + "eval_vitaminc-pairs_runtime": 1.8927, + "eval_vitaminc-pairs_samples_per_second": 57.062, + "eval_vitaminc-pairs_steps_per_second": 1.057, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_negation-triplets_loss": 0.9457363486289978, + "eval_negation-triplets_runtime": 0.3019, + "eval_negation-triplets_samples_per_second": 212.002, + "eval_negation-triplets_steps_per_second": 3.313, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_scitail-pairs-pos_loss": 0.07606112211942673, + "eval_scitail-pairs-pos_runtime": 0.3972, + "eval_scitail-pairs-pos_samples_per_second": 135.938, + "eval_scitail-pairs-pos_steps_per_second": 2.517, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_scitail-pairs-qa_loss": 0.001212431932799518, + "eval_scitail-pairs-qa_runtime": 0.5348, + "eval_scitail-pairs-qa_samples_per_second": 239.347, + "eval_scitail-pairs-qa_steps_per_second": 3.74, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_xsum-pairs_loss": 0.02758924476802349, + "eval_xsum-pairs_runtime": 2.767, + "eval_xsum-pairs_samples_per_second": 46.26, + "eval_xsum-pairs_steps_per_second": 0.723, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_sciq_pairs_loss": 0.016450434923171997, + "eval_sciq_pairs_runtime": 2.8812, + "eval_sciq_pairs_samples_per_second": 44.426, + "eval_sciq_pairs_steps_per_second": 0.694, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_qasc_pairs_loss": 0.09214109182357788, + "eval_qasc_pairs_runtime": 0.6597, + "eval_qasc_pairs_samples_per_second": 194.029, + "eval_qasc_pairs_steps_per_second": 3.032, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_openbookqa_pairs_loss": 0.7429620623588562, + "eval_openbookqa_pairs_runtime": 0.5947, + "eval_openbookqa_pairs_samples_per_second": 215.22, + "eval_openbookqa_pairs_steps_per_second": 3.363, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_msmarco_pairs_loss": 0.17871831357479095, + "eval_msmarco_pairs_runtime": 1.5003, + "eval_msmarco_pairs_samples_per_second": 85.314, + "eval_msmarco_pairs_steps_per_second": 1.333, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_nq_pairs_loss": 0.09803248196840286, + "eval_nq_pairs_runtime": 2.3587, + "eval_nq_pairs_samples_per_second": 54.267, + "eval_nq_pairs_steps_per_second": 0.848, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_trivia_pairs_loss": 0.5323590636253357, + "eval_trivia_pairs_runtime": 3.6206, + "eval_trivia_pairs_samples_per_second": 35.354, + "eval_trivia_pairs_steps_per_second": 0.552, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_gooaq_pairs_loss": 0.2667708098888397, + "eval_gooaq_pairs_runtime": 0.9171, + "eval_gooaq_pairs_samples_per_second": 139.573, + "eval_gooaq_pairs_steps_per_second": 2.181, + "step": 225 + }, + { + "epoch": 0.5056179775280899, + "eval_paws-pos_loss": 0.0236118845641613, + "eval_paws-pos_runtime": 0.6973, + "eval_paws-pos_samples_per_second": 183.563, + "eval_paws-pos_steps_per_second": 2.868, + "step": 225 + }, + { + "epoch": 0.5078651685393258, + "grad_norm": 3.5867371559143066, + "learning_rate": 3.4687178735995997e-05, + "loss": 0.347, + "step": 226 + }, + { + "epoch": 0.5101123595505618, + "grad_norm": 0.37994861602783203, + "learning_rate": 3.467406008093016e-05, + "loss": 0.0417, + "step": 227 + }, + { + "epoch": 0.5123595505617977, + "grad_norm": 4.081336975097656, + "learning_rate": 3.466067520840322e-05, + "loss": 0.7783, + "step": 228 + }, + { + "epoch": 0.5146067415730337, + "grad_norm": 4.306976795196533, + "learning_rate": 3.46470243790025e-05, + "loss": 0.9027, + "step": 229 + }, + { + "epoch": 0.5168539325842697, + "grad_norm": 4.0280022621154785, + "learning_rate": 3.4633107858493206e-05, + "loss": 0.7166, + "step": 230 + }, + { + "epoch": 0.5191011235955056, + "grad_norm": 3.4807679653167725, + "learning_rate": 3.461892591781319e-05, + "loss": 0.705, + "step": 231 + }, + { + "epoch": 0.5213483146067416, + "grad_norm": 4.166563510894775, + "learning_rate": 3.4604478833067756e-05, + "loss": 0.8425, + "step": 232 + }, + { + "epoch": 0.5235955056179775, + "grad_norm": 3.828537940979004, + "learning_rate": 3.4589766885524204e-05, + "loss": 0.5362, + "step": 233 + }, + { + "epoch": 0.5258426966292135, + "grad_norm": 4.316190242767334, + "learning_rate": 3.4574790361606435e-05, + "loss": 0.7869, + "step": 234 + }, + { + "epoch": 0.5280898876404494, + "grad_norm": 4.244805335998535, + "learning_rate": 3.4559549552889285e-05, + "loss": 0.88, + "step": 235 + }, + { + "epoch": 0.5303370786516854, + "grad_norm": 4.208700656890869, + "learning_rate": 3.454404475609294e-05, + "loss": 0.8077, + "step": 236 + }, + { + "epoch": 0.5325842696629214, + "grad_norm": 3.1473183631896973, + "learning_rate": 3.4528276273077094e-05, + "loss": 0.8145, + "step": 237 + }, + { + "epoch": 0.5348314606741573, + "grad_norm": 3.798297166824341, + "learning_rate": 3.4512244410835094e-05, + "loss": 0.78, + "step": 238 + }, + { + "epoch": 0.5370786516853933, + "grad_norm": 0.535529375076294, + "learning_rate": 3.449594948148796e-05, + "loss": 0.0536, + "step": 239 + }, + { + "epoch": 0.5393258426966292, + "grad_norm": 3.2119970321655273, + "learning_rate": 3.447939180227833e-05, + "loss": 0.7975, + "step": 240 + }, + { + "epoch": 0.5415730337078651, + "grad_norm": 4.725860118865967, + "learning_rate": 3.446257169556425e-05, + "loss": 0.8932, + "step": 241 + }, + { + "epoch": 0.5438202247191011, + "grad_norm": 3.867676258087158, + "learning_rate": 3.4445489488812906e-05, + "loss": 0.3386, + "step": 242 + }, + { + "epoch": 0.5460674157303371, + "grad_norm": 3.981114387512207, + "learning_rate": 3.4428145514594274e-05, + "loss": 0.7741, + "step": 243 + }, + { + "epoch": 0.5483146067415731, + "grad_norm": 4.034990310668945, + "learning_rate": 3.4410540110574616e-05, + "loss": 0.7439, + "step": 244 + }, + { + "epoch": 0.550561797752809, + "grad_norm": 4.209812641143799, + "learning_rate": 3.4392673619509916e-05, + "loss": 0.7999, + "step": 245 + }, + { + "epoch": 0.5528089887640449, + "grad_norm": 3.942631244659424, + "learning_rate": 3.437454638923921e-05, + "loss": 0.8542, + "step": 246 + }, + { + "epoch": 0.5550561797752809, + "grad_norm": 4.087955951690674, + "learning_rate": 3.435615877267783e-05, + "loss": 0.6992, + "step": 247 + }, + { + "epoch": 0.5573033707865168, + "grad_norm": 3.885822057723999, + "learning_rate": 3.4337511127810466e-05, + "loss": 0.8579, + "step": 248 + }, + { + "epoch": 0.5595505617977528, + "grad_norm": 5.198770523071289, + "learning_rate": 3.431860381768431e-05, + "loss": 1.0221, + "step": 249 + }, + { + "epoch": 0.5617977528089888, + "grad_norm": 4.321418285369873, + "learning_rate": 3.4299437210401866e-05, + "loss": 0.699, + "step": 250 + }, + { + "epoch": 0.5640449438202247, + "grad_norm": 3.1992154121398926, + "learning_rate": 3.4280011679113884e-05, + "loss": 0.8523, + "step": 251 + }, + { + "epoch": 0.5662921348314607, + "grad_norm": 4.94226598739624, + "learning_rate": 3.4260327602012027e-05, + "loss": 1.0307, + "step": 252 + }, + { + "epoch": 0.5685393258426966, + "grad_norm": 3.958935499191284, + "learning_rate": 3.424038536232154e-05, + "loss": 0.846, + "step": 253 + }, + { + "epoch": 0.5707865168539326, + "grad_norm": 4.023487091064453, + "learning_rate": 3.4220185348293775e-05, + "loss": 0.8361, + "step": 254 + }, + { + "epoch": 0.5730337078651685, + "grad_norm": 3.275102138519287, + "learning_rate": 3.4199727953198665e-05, + "loss": 0.8224, + "step": 255 + }, + { + "epoch": 0.5752808988764045, + "grad_norm": 3.6130261421203613, + "learning_rate": 3.417901357531701e-05, + "loss": 0.5301, + "step": 256 + }, + { + "epoch": 0.5775280898876405, + "grad_norm": 4.571770668029785, + "learning_rate": 3.415804261793277e-05, + "loss": 0.3795, + "step": 257 + }, + { + "epoch": 0.5797752808988764, + "grad_norm": 3.1884663105010986, + "learning_rate": 3.413681548932521e-05, + "loss": 0.5434, + "step": 258 + }, + { + "epoch": 0.5820224719101124, + "grad_norm": 4.795211315155029, + "learning_rate": 3.411533260276091e-05, + "loss": 0.847, + "step": 259 + }, + { + "epoch": 0.5842696629213483, + "grad_norm": 4.761318206787109, + "learning_rate": 3.409359437648579e-05, + "loss": 0.7323, + "step": 260 + }, + { + "epoch": 0.5865168539325842, + "grad_norm": 4.4683098793029785, + "learning_rate": 3.407160123371687e-05, + "loss": 0.6606, + "step": 261 + }, + { + "epoch": 0.5887640449438202, + "grad_norm": 0.7677178382873535, + "learning_rate": 3.404935360263415e-05, + "loss": 0.0543, + "step": 262 + }, + { + "epoch": 0.5910112359550562, + "grad_norm": 4.110381126403809, + "learning_rate": 3.4026851916372166e-05, + "loss": 0.6709, + "step": 263 + }, + { + "epoch": 0.5932584269662922, + "grad_norm": 4.766375541687012, + "learning_rate": 3.400409661301162e-05, + "loss": 0.809, + "step": 264 + }, + { + "epoch": 0.5955056179775281, + "grad_norm": 5.389264106750488, + "learning_rate": 3.398108813557082e-05, + "loss": 1.0391, + "step": 265 + }, + { + "epoch": 0.597752808988764, + "grad_norm": 3.8780810832977295, + "learning_rate": 3.3957826931997094e-05, + "loss": 0.7396, + "step": 266 + }, + { + "epoch": 0.6, + "grad_norm": 4.399974822998047, + "learning_rate": 3.393431345515801e-05, + "loss": 0.7839, + "step": 267 + }, + { + "epoch": 0.6022471910112359, + "grad_norm": 3.2098612785339355, + "learning_rate": 3.391054816283262e-05, + "loss": 0.3054, + "step": 268 + }, + { + "epoch": 0.604494382022472, + "grad_norm": 3.606182098388672, + "learning_rate": 3.3886531517702505e-05, + "loss": 0.5258, + "step": 269 + }, + { + "epoch": 0.6067415730337079, + "grad_norm": 4.3564934730529785, + "learning_rate": 3.3862263987342784e-05, + "loss": 0.7367, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_VitaminC_cosine_accuracy": 0.552734375, + "eval_VitaminC_cosine_accuracy_threshold": 0.814909815788269, + "eval_VitaminC_cosine_ap": 0.5506214433093293, + "eval_VitaminC_cosine_f1": 0.664886515353805, + "eval_VitaminC_cosine_f1_threshold": 0.3506072461605072, + "eval_VitaminC_cosine_precision": 0.5, + "eval_VitaminC_cosine_recall": 0.9920318725099602, + "eval_VitaminC_dot_accuracy": 0.55078125, + "eval_VitaminC_dot_accuracy_threshold": 316.90899658203125, + "eval_VitaminC_dot_ap": 0.5353657977329522, + "eval_VitaminC_dot_f1": 0.6666666666666667, + "eval_VitaminC_dot_f1_threshold": 155.67796325683594, + "eval_VitaminC_dot_precision": 0.506198347107438, + "eval_VitaminC_dot_recall": 0.9760956175298805, + "eval_VitaminC_euclidean_accuracy": 0.55078125, + "eval_VitaminC_euclidean_accuracy_threshold": 10.77621841430664, + "eval_VitaminC_euclidean_ap": 0.550546292530568, + "eval_VitaminC_euclidean_f1": 0.6666666666666666, + "eval_VitaminC_euclidean_f1_threshold": 24.22284698486328, + "eval_VitaminC_euclidean_precision": 0.5, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.5546875, + "eval_VitaminC_manhattan_accuracy_threshold": 335.6986389160156, + "eval_VitaminC_manhattan_ap": 0.5497325043939846, + "eval_VitaminC_manhattan_f1": 0.6640211640211641, + "eval_VitaminC_manhattan_f1_threshold": 513.494873046875, + "eval_VitaminC_manhattan_precision": 0.497029702970297, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.5546875, + "eval_VitaminC_max_accuracy_threshold": 335.6986389160156, + "eval_VitaminC_max_ap": 0.5506214433093293, + "eval_VitaminC_max_f1": 0.6666666666666667, + "eval_VitaminC_max_f1_threshold": 513.494873046875, + "eval_VitaminC_max_precision": 0.506198347107438, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5506214433093293, + "eval_sts-test_pearson_cosine": 0.8848372816940555, + "eval_sts-test_pearson_dot": 0.8774995772730847, + "eval_sts-test_pearson_euclidean": 0.9058906663416005, + "eval_sts-test_pearson_manhattan": 0.9066316554236529, + "eval_sts-test_pearson_max": 0.9066316554236529, + "eval_sts-test_spearman_cosine": 0.9085018016884417, + "eval_sts-test_spearman_dot": 0.8776881864036095, + "eval_sts-test_spearman_euclidean": 0.903223569412372, + "eval_sts-test_spearman_manhattan": 0.9037578547221237, + "eval_sts-test_spearman_max": 0.9085018016884417, + "eval_vitaminc-pairs_loss": 1.4935871362686157, + "eval_vitaminc-pairs_runtime": 1.8963, + "eval_vitaminc-pairs_samples_per_second": 56.952, + "eval_vitaminc-pairs_steps_per_second": 1.055, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_negation-triplets_loss": 0.9505463242530823, + "eval_negation-triplets_runtime": 0.3041, + "eval_negation-triplets_samples_per_second": 210.485, + "eval_negation-triplets_steps_per_second": 3.289, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_scitail-pairs-pos_loss": 0.09635873883962631, + "eval_scitail-pairs-pos_runtime": 0.4048, + "eval_scitail-pairs-pos_samples_per_second": 133.396, + "eval_scitail-pairs-pos_steps_per_second": 2.47, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_scitail-pairs-qa_loss": 0.0009468490607105196, + "eval_scitail-pairs-qa_runtime": 0.5341, + "eval_scitail-pairs-qa_samples_per_second": 239.65, + "eval_scitail-pairs-qa_steps_per_second": 3.745, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_xsum-pairs_loss": 0.026903513818979263, + "eval_xsum-pairs_runtime": 2.7518, + "eval_xsum-pairs_samples_per_second": 46.514, + "eval_xsum-pairs_steps_per_second": 0.727, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_sciq_pairs_loss": 0.01619444414973259, + "eval_sciq_pairs_runtime": 2.8856, + "eval_sciq_pairs_samples_per_second": 44.358, + "eval_sciq_pairs_steps_per_second": 0.693, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_qasc_pairs_loss": 0.09130185097455978, + "eval_qasc_pairs_runtime": 0.6645, + "eval_qasc_pairs_samples_per_second": 192.631, + "eval_qasc_pairs_steps_per_second": 3.01, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_openbookqa_pairs_loss": 0.7336423397064209, + "eval_openbookqa_pairs_runtime": 0.5935, + "eval_openbookqa_pairs_samples_per_second": 215.687, + "eval_openbookqa_pairs_steps_per_second": 3.37, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_msmarco_pairs_loss": 0.15868164598941803, + "eval_msmarco_pairs_runtime": 1.5086, + "eval_msmarco_pairs_samples_per_second": 84.844, + "eval_msmarco_pairs_steps_per_second": 1.326, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_nq_pairs_loss": 0.10780799388885498, + "eval_nq_pairs_runtime": 2.3746, + "eval_nq_pairs_samples_per_second": 53.905, + "eval_nq_pairs_steps_per_second": 0.842, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_trivia_pairs_loss": 0.49691149592399597, + "eval_trivia_pairs_runtime": 3.5992, + "eval_trivia_pairs_samples_per_second": 35.563, + "eval_trivia_pairs_steps_per_second": 0.556, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_gooaq_pairs_loss": 0.3025541603565216, + "eval_gooaq_pairs_runtime": 0.9181, + "eval_gooaq_pairs_samples_per_second": 139.423, + "eval_gooaq_pairs_steps_per_second": 2.178, + "step": 270 + }, + { + "epoch": 0.6067415730337079, + "eval_paws-pos_loss": 0.024440350010991096, + "eval_paws-pos_runtime": 0.7046, + "eval_paws-pos_samples_per_second": 181.67, + "eval_paws-pos_steps_per_second": 2.839, + "step": 270 + }, + { + "epoch": 0.6089887640449438, + "grad_norm": 3.21183705329895, + "learning_rate": 3.383774604421301e-05, + "loss": 0.747, + "step": 271 + }, + { + "epoch": 0.6112359550561798, + "grad_norm": 4.403411865234375, + "learning_rate": 3.3812978165647975e-05, + "loss": 0.7855, + "step": 272 + }, + { + "epoch": 0.6134831460674157, + "grad_norm": 0.46612274646759033, + "learning_rate": 3.3787960833848405e-05, + "loss": 0.0473, + "step": 273 + }, + { + "epoch": 0.6157303370786517, + "grad_norm": 3.30610990524292, + "learning_rate": 3.3762694535871584e-05, + "loss": 0.4378, + "step": 274 + }, + { + "epoch": 0.6179775280898876, + "grad_norm": 3.7408640384674072, + "learning_rate": 3.373717976362187e-05, + "loss": 0.8767, + "step": 275 + }, + { + "epoch": 0.6202247191011236, + "grad_norm": 5.345012187957764, + "learning_rate": 3.3711417013841105e-05, + "loss": 1.0345, + "step": 276 + }, + { + "epoch": 0.6224719101123596, + "grad_norm": 3.518765449523926, + "learning_rate": 3.368540678809897e-05, + "loss": 0.5182, + "step": 277 + }, + { + "epoch": 0.6247191011235955, + "grad_norm": 6.666887283325195, + "learning_rate": 3.3659149592783186e-05, + "loss": 2.5949, + "step": 278 + }, + { + "epoch": 0.6269662921348315, + "grad_norm": 3.197411298751831, + "learning_rate": 3.363264593908969e-05, + "loss": 0.833, + "step": 279 + }, + { + "epoch": 0.6292134831460674, + "grad_norm": 0.6012090444564819, + "learning_rate": 3.360589634301267e-05, + "loss": 0.0778, + "step": 280 + }, + { + "epoch": 0.6314606741573033, + "grad_norm": 4.5016188621521, + "learning_rate": 3.357890132533449e-05, + "loss": 0.8048, + "step": 281 + }, + { + "epoch": 0.6337078651685393, + "grad_norm": 3.865889072418213, + "learning_rate": 3.35516614116156e-05, + "loss": 0.7524, + "step": 282 + }, + { + "epoch": 0.6359550561797753, + "grad_norm": 3.2998361587524414, + "learning_rate": 3.3524177132184266e-05, + "loss": 0.3246, + "step": 283 + }, + { + "epoch": 0.6382022471910113, + "grad_norm": 0.6418587565422058, + "learning_rate": 3.349644902212628e-05, + "loss": 0.0728, + "step": 284 + }, + { + "epoch": 0.6404494382022472, + "grad_norm": 5.772351264953613, + "learning_rate": 3.34684776212745e-05, + "loss": 2.3619, + "step": 285 + }, + { + "epoch": 0.6426966292134831, + "grad_norm": 3.769488573074341, + "learning_rate": 3.3440263474198376e-05, + "loss": 0.7464, + "step": 286 + }, + { + "epoch": 0.6449438202247191, + "grad_norm": 4.559601783752441, + "learning_rate": 3.3411807130193325e-05, + "loss": 0.6691, + "step": 287 + }, + { + "epoch": 0.647191011235955, + "grad_norm": 0.45337462425231934, + "learning_rate": 3.338310914327005e-05, + "loss": 0.059, + "step": 288 + }, + { + "epoch": 0.6494382022471911, + "grad_norm": 4.7184553146362305, + "learning_rate": 3.3354170072143766e-05, + "loss": 0.7841, + "step": 289 + }, + { + "epoch": 0.651685393258427, + "grad_norm": 3.886216640472412, + "learning_rate": 3.332499048022328e-05, + "loss": 0.647, + "step": 290 + }, + { + "epoch": 0.6539325842696629, + "grad_norm": 4.497567176818848, + "learning_rate": 3.329557093560006e-05, + "loss": 0.8814, + "step": 291 + }, + { + "epoch": 0.6561797752808989, + "grad_norm": 3.995391368865967, + "learning_rate": 3.326591201103716e-05, + "loss": 0.7247, + "step": 292 + }, + { + "epoch": 0.6584269662921348, + "grad_norm": 0.4348815083503723, + "learning_rate": 3.323601428395809e-05, + "loss": 0.059, + "step": 293 + }, + { + "epoch": 0.6606741573033708, + "grad_norm": 3.6197896003723145, + "learning_rate": 3.320587833643554e-05, + "loss": 0.8317, + "step": 294 + }, + { + "epoch": 0.6629213483146067, + "grad_norm": 4.4088215827941895, + "learning_rate": 3.317550475518006e-05, + "loss": 0.8548, + "step": 295 + }, + { + "epoch": 0.6651685393258427, + "grad_norm": 4.541014194488525, + "learning_rate": 3.314489413152867e-05, + "loss": 0.9213, + "step": 296 + }, + { + "epoch": 0.6674157303370787, + "grad_norm": 3.067857265472412, + "learning_rate": 3.311404706143329e-05, + "loss": 0.6923, + "step": 297 + }, + { + "epoch": 0.6696629213483146, + "grad_norm": 4.037753582000732, + "learning_rate": 3.3082964145449174e-05, + "loss": 0.7777, + "step": 298 + }, + { + "epoch": 0.6719101123595506, + "grad_norm": 4.280182838439941, + "learning_rate": 3.305164598872322e-05, + "loss": 0.7496, + "step": 299 + }, + { + "epoch": 0.6741573033707865, + "grad_norm": 4.357325077056885, + "learning_rate": 3.302009320098218e-05, + "loss": 0.7636, + "step": 300 + }, + { + "epoch": 0.6764044943820224, + "grad_norm": 4.007940292358398, + "learning_rate": 3.2988306396520775e-05, + "loss": 0.6867, + "step": 301 + }, + { + "epoch": 0.6786516853932584, + "grad_norm": 0.8544747233390808, + "learning_rate": 3.295628619418977e-05, + "loss": 0.0506, + "step": 302 + }, + { + "epoch": 0.6808988764044944, + "grad_norm": 3.34498929977417, + "learning_rate": 3.292403321738387e-05, + "loss": 0.3346, + "step": 303 + }, + { + "epoch": 0.6831460674157304, + "grad_norm": 2.441420316696167, + "learning_rate": 3.289154809402967e-05, + "loss": 0.2485, + "step": 304 + }, + { + "epoch": 0.6853932584269663, + "grad_norm": 4.533839702606201, + "learning_rate": 3.285883145657334e-05, + "loss": 0.8508, + "step": 305 + }, + { + "epoch": 0.6876404494382022, + "grad_norm": 3.2033944129943848, + "learning_rate": 3.2825883941968346e-05, + "loss": 0.8464, + "step": 306 + }, + { + "epoch": 0.6898876404494382, + "grad_norm": 3.6305220127105713, + "learning_rate": 3.279270619166309e-05, + "loss": 0.3385, + "step": 307 + }, + { + "epoch": 0.6921348314606741, + "grad_norm": 4.438405990600586, + "learning_rate": 3.2759298851588336e-05, + "loss": 0.8837, + "step": 308 + }, + { + "epoch": 0.6943820224719102, + "grad_norm": 4.252586841583252, + "learning_rate": 3.272566257214474e-05, + "loss": 0.9019, + "step": 309 + }, + { + "epoch": 0.6966292134831461, + "grad_norm": 4.231752872467041, + "learning_rate": 3.2691798008190096e-05, + "loss": 0.6922, + "step": 310 + }, + { + "epoch": 0.698876404494382, + "grad_norm": 3.862682342529297, + "learning_rate": 3.265770581902662e-05, + "loss": 0.6348, + "step": 311 + }, + { + "epoch": 0.701123595505618, + "grad_norm": 3.783026933670044, + "learning_rate": 3.262338666838813e-05, + "loss": 0.7522, + "step": 312 + }, + { + "epoch": 0.7033707865168539, + "grad_norm": 4.141933917999268, + "learning_rate": 3.25888412244271e-05, + "loss": 0.7843, + "step": 313 + }, + { + "epoch": 0.7056179775280899, + "grad_norm": 0.7638006210327148, + "learning_rate": 3.2554070159701684e-05, + "loss": 0.0493, + "step": 314 + }, + { + "epoch": 0.7078651685393258, + "grad_norm": 3.7285079956054688, + "learning_rate": 3.2519074151162564e-05, + "loss": 0.357, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.8318675756454468, + "eval_VitaminC_cosine_ap": 0.553255462027648, + "eval_VitaminC_cosine_f1": 0.6666666666666666, + "eval_VitaminC_cosine_f1_threshold": 0.3080925941467285, + "eval_VitaminC_cosine_precision": 0.5, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.5546875, + "eval_VitaminC_dot_accuracy_threshold": 284.4936218261719, + "eval_VitaminC_dot_ap": 0.5335304755231123, + "eval_VitaminC_dot_f1": 0.6675531914893617, + "eval_VitaminC_dot_f1_threshold": 117.11366271972656, + "eval_VitaminC_dot_precision": 0.500998003992016, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 14.916669845581055, + "eval_VitaminC_euclidean_ap": 0.5560392780320775, + "eval_VitaminC_euclidean_f1": 0.6657824933687002, + "eval_VitaminC_euclidean_f1_threshold": 23.758323669433594, + "eval_VitaminC_euclidean_precision": 0.4990059642147117, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.5546875, + "eval_VitaminC_manhattan_accuracy_threshold": 303.769775390625, + "eval_VitaminC_manhattan_ap": 0.5575735035337728, + "eval_VitaminC_manhattan_f1": 0.6666666666666666, + "eval_VitaminC_manhattan_f1_threshold": 500.6726989746094, + "eval_VitaminC_manhattan_precision": 0.5, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.556640625, + "eval_VitaminC_max_accuracy_threshold": 303.769775390625, + "eval_VitaminC_max_ap": 0.5575735035337728, + "eval_VitaminC_max_f1": 0.6675531914893617, + "eval_VitaminC_max_f1_threshold": 500.6726989746094, + "eval_VitaminC_max_precision": 0.500998003992016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5575735035337728, + "eval_sts-test_pearson_cosine": 0.884017793393225, + "eval_sts-test_pearson_dot": 0.8725802033594147, + "eval_sts-test_pearson_euclidean": 0.9065592531799239, + "eval_sts-test_pearson_manhattan": 0.9070236641674441, + "eval_sts-test_pearson_max": 0.9070236641674441, + "eval_sts-test_spearman_cosine": 0.9067846957888538, + "eval_sts-test_spearman_dot": 0.8716365180769119, + "eval_sts-test_spearman_euclidean": 0.9026938039800204, + "eval_sts-test_spearman_manhattan": 0.903306941012344, + "eval_sts-test_spearman_max": 0.9067846957888538, + "eval_vitaminc-pairs_loss": 1.4885247945785522, + "eval_vitaminc-pairs_runtime": 1.9137, + "eval_vitaminc-pairs_samples_per_second": 56.436, + "eval_vitaminc-pairs_steps_per_second": 1.045, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_negation-triplets_loss": 0.9597576856613159, + "eval_negation-triplets_runtime": 0.3023, + "eval_negation-triplets_samples_per_second": 211.742, + "eval_negation-triplets_steps_per_second": 3.308, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_scitail-pairs-pos_loss": 0.09951130300760269, + "eval_scitail-pairs-pos_runtime": 0.3896, + "eval_scitail-pairs-pos_samples_per_second": 138.608, + "eval_scitail-pairs-pos_steps_per_second": 2.567, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_scitail-pairs-qa_loss": 0.0010157548822462559, + "eval_scitail-pairs-qa_runtime": 0.5373, + "eval_scitail-pairs-qa_samples_per_second": 238.245, + "eval_scitail-pairs-qa_steps_per_second": 3.723, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_xsum-pairs_loss": 0.027823584154248238, + "eval_xsum-pairs_runtime": 2.7408, + "eval_xsum-pairs_samples_per_second": 46.701, + "eval_xsum-pairs_steps_per_second": 0.73, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_sciq_pairs_loss": 0.015241424553096294, + "eval_sciq_pairs_runtime": 2.8458, + "eval_sciq_pairs_samples_per_second": 44.978, + "eval_sciq_pairs_steps_per_second": 0.703, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_qasc_pairs_loss": 0.09173130989074707, + "eval_qasc_pairs_runtime": 0.6608, + "eval_qasc_pairs_samples_per_second": 193.694, + "eval_qasc_pairs_steps_per_second": 3.026, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_openbookqa_pairs_loss": 0.6921954154968262, + "eval_openbookqa_pairs_runtime": 0.5893, + "eval_openbookqa_pairs_samples_per_second": 217.196, + "eval_openbookqa_pairs_steps_per_second": 3.394, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_msmarco_pairs_loss": 0.15177518129348755, + "eval_msmarco_pairs_runtime": 1.494, + "eval_msmarco_pairs_samples_per_second": 85.673, + "eval_msmarco_pairs_steps_per_second": 1.339, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_nq_pairs_loss": 0.10136909037828445, + "eval_nq_pairs_runtime": 2.3524, + "eval_nq_pairs_samples_per_second": 54.413, + "eval_nq_pairs_steps_per_second": 0.85, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_trivia_pairs_loss": 0.5301617980003357, + "eval_trivia_pairs_runtime": 3.5809, + "eval_trivia_pairs_samples_per_second": 35.745, + "eval_trivia_pairs_steps_per_second": 0.559, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_gooaq_pairs_loss": 0.28424739837646484, + "eval_gooaq_pairs_runtime": 0.9167, + "eval_gooaq_pairs_samples_per_second": 139.635, + "eval_gooaq_pairs_steps_per_second": 2.182, + "step": 315 + }, + { + "epoch": 0.7078651685393258, + "eval_paws-pos_loss": 0.023981213569641113, + "eval_paws-pos_runtime": 0.6966, + "eval_paws-pos_samples_per_second": 183.744, + "eval_paws-pos_steps_per_second": 2.871, + "step": 315 + }, + { + "epoch": 0.7101123595505618, + "grad_norm": 3.6374969482421875, + "learning_rate": 3.248385388013984e-05, + "loss": 0.841, + "step": 316 + }, + { + "epoch": 0.7123595505617978, + "grad_norm": 4.251607418060303, + "learning_rate": 3.2448410032329716e-05, + "loss": 0.5849, + "step": 317 + }, + { + "epoch": 0.7146067415730337, + "grad_norm": 4.323038101196289, + "learning_rate": 3.241274329778117e-05, + "loss": 0.6818, + "step": 318 + }, + { + "epoch": 0.7168539325842697, + "grad_norm": 4.027289867401123, + "learning_rate": 3.237685437088251e-05, + "loss": 0.8269, + "step": 319 + }, + { + "epoch": 0.7191011235955056, + "grad_norm": 3.014479875564575, + "learning_rate": 3.234074395034787e-05, + "loss": 0.6979, + "step": 320 + }, + { + "epoch": 0.7213483146067415, + "grad_norm": 3.5980277061462402, + "learning_rate": 3.2304412739203595e-05, + "loss": 0.3218, + "step": 321 + }, + { + "epoch": 0.7235955056179775, + "grad_norm": 3.2924134731292725, + "learning_rate": 3.226786144477456e-05, + "loss": 0.8206, + "step": 322 + }, + { + "epoch": 0.7258426966292135, + "grad_norm": 2.524231195449829, + "learning_rate": 3.2231090778670385e-05, + "loss": 0.2106, + "step": 323 + }, + { + "epoch": 0.7280898876404495, + "grad_norm": 5.464061260223389, + "learning_rate": 3.2194101456771604e-05, + "loss": 1.0524, + "step": 324 + }, + { + "epoch": 0.7303370786516854, + "grad_norm": 3.4692578315734863, + "learning_rate": 3.215689419921572e-05, + "loss": 0.3774, + "step": 325 + }, + { + "epoch": 0.7325842696629213, + "grad_norm": 4.947183132171631, + "learning_rate": 3.211946973038315e-05, + "loss": 0.9098, + "step": 326 + }, + { + "epoch": 0.7348314606741573, + "grad_norm": 4.432866096496582, + "learning_rate": 3.208182877888319e-05, + "loss": 0.7988, + "step": 327 + }, + { + "epoch": 0.7370786516853932, + "grad_norm": 4.585951328277588, + "learning_rate": 3.204397207753978e-05, + "loss": 0.7916, + "step": 328 + }, + { + "epoch": 0.7393258426966293, + "grad_norm": 3.7288637161254883, + "learning_rate": 3.200590036337724e-05, + "loss": 0.6314, + "step": 329 + }, + { + "epoch": 0.7415730337078652, + "grad_norm": 3.840074300765991, + "learning_rate": 3.196761437760593e-05, + "loss": 0.8628, + "step": 330 + }, + { + "epoch": 0.7438202247191011, + "grad_norm": 0.6423048377037048, + "learning_rate": 3.192911486560784e-05, + "loss": 0.0688, + "step": 331 + }, + { + "epoch": 0.7460674157303371, + "grad_norm": 4.148509502410889, + "learning_rate": 3.1890402576922036e-05, + "loss": 0.7386, + "step": 332 + }, + { + "epoch": 0.748314606741573, + "grad_norm": 4.7345147132873535, + "learning_rate": 3.1851478265230103e-05, + "loss": 0.8458, + "step": 333 + }, + { + "epoch": 0.750561797752809, + "grad_norm": 0.695708155632019, + "learning_rate": 3.181234268834144e-05, + "loss": 0.0442, + "step": 334 + }, + { + "epoch": 0.7528089887640449, + "grad_norm": 3.434741735458374, + "learning_rate": 3.177299660817856e-05, + "loss": 0.317, + "step": 335 + }, + { + "epoch": 0.755056179775281, + "grad_norm": 3.306964874267578, + "learning_rate": 3.1733440790762176e-05, + "loss": 0.8087, + "step": 336 + }, + { + "epoch": 0.7573033707865169, + "grad_norm": 3.010828733444214, + "learning_rate": 3.169367600619637e-05, + "loss": 0.3398, + "step": 337 + }, + { + "epoch": 0.7595505617977528, + "grad_norm": 4.152151584625244, + "learning_rate": 3.1653703028653545e-05, + "loss": 0.699, + "step": 338 + }, + { + "epoch": 0.7617977528089888, + "grad_norm": 4.073326110839844, + "learning_rate": 3.161352263635937e-05, + "loss": 0.7901, + "step": 339 + }, + { + "epoch": 0.7640449438202247, + "grad_norm": 4.365633487701416, + "learning_rate": 3.157313561157764e-05, + "loss": 0.8072, + "step": 340 + }, + { + "epoch": 0.7662921348314606, + "grad_norm": 3.506556272506714, + "learning_rate": 3.153254274059501e-05, + "loss": 0.5939, + "step": 341 + }, + { + "epoch": 0.7685393258426966, + "grad_norm": 4.319092273712158, + "learning_rate": 3.149174481370575e-05, + "loss": 0.6933, + "step": 342 + }, + { + "epoch": 0.7707865168539326, + "grad_norm": 0.6184964179992676, + "learning_rate": 3.145074262519629e-05, + "loss": 0.0437, + "step": 343 + }, + { + "epoch": 0.7730337078651686, + "grad_norm": 4.866581916809082, + "learning_rate": 3.140953697332979e-05, + "loss": 0.9882, + "step": 344 + }, + { + "epoch": 0.7752808988764045, + "grad_norm": 3.9585559368133545, + "learning_rate": 3.136812866033063e-05, + "loss": 0.3707, + "step": 345 + }, + { + "epoch": 0.7775280898876404, + "grad_norm": 4.253391265869141, + "learning_rate": 3.132651849236871e-05, + "loss": 0.7103, + "step": 346 + }, + { + "epoch": 0.7797752808988764, + "grad_norm": 0.5847011208534241, + "learning_rate": 3.128470727954383e-05, + "loss": 0.0372, + "step": 347 + }, + { + "epoch": 0.7820224719101123, + "grad_norm": 0.5127836465835571, + "learning_rate": 3.124269583586989e-05, + "loss": 0.028, + "step": 348 + }, + { + "epoch": 0.7842696629213484, + "grad_norm": 4.145182132720947, + "learning_rate": 3.120048497925904e-05, + "loss": 0.7676, + "step": 349 + }, + { + "epoch": 0.7865168539325843, + "grad_norm": 4.833105087280273, + "learning_rate": 3.1158075531505755e-05, + "loss": 0.6754, + "step": 350 + }, + { + "epoch": 0.7887640449438202, + "grad_norm": 0.49345946311950684, + "learning_rate": 3.1115468318270844e-05, + "loss": 0.0439, + "step": 351 + }, + { + "epoch": 0.7910112359550562, + "grad_norm": 3.357720375061035, + "learning_rate": 3.107266416906538e-05, + "loss": 0.8039, + "step": 352 + }, + { + "epoch": 0.7932584269662921, + "grad_norm": 0.2371903359889984, + "learning_rate": 3.1029663917234514e-05, + "loss": 0.0104, + "step": 353 + }, + { + "epoch": 0.7955056179775281, + "grad_norm": 0.48881796002388, + "learning_rate": 3.098646839994132e-05, + "loss": 0.0555, + "step": 354 + }, + { + "epoch": 0.797752808988764, + "grad_norm": 3.3021090030670166, + "learning_rate": 3.094307845815042e-05, + "loss": 0.8646, + "step": 355 + }, + { + "epoch": 0.8, + "grad_norm": 3.0412533283233643, + "learning_rate": 3.0899494936611663e-05, + "loss": 0.7781, + "step": 356 + }, + { + "epoch": 0.802247191011236, + "grad_norm": 0.30917835235595703, + "learning_rate": 3.085571868384366e-05, + "loss": 0.011, + "step": 357 + }, + { + "epoch": 0.8044943820224719, + "grad_norm": 3.6957950592041016, + "learning_rate": 3.081175055211726e-05, + "loss": 0.3267, + "step": 358 + }, + { + "epoch": 0.8067415730337079, + "grad_norm": 7.202300071716309, + "learning_rate": 3.0767591397438974e-05, + "loss": 2.5281, + "step": 359 + }, + { + "epoch": 0.8089887640449438, + "grad_norm": 2.9833834171295166, + "learning_rate": 3.072324207953429e-05, + "loss": 0.301, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_VitaminC_cosine_accuracy": 0.55859375, + "eval_VitaminC_cosine_accuracy_threshold": 0.6793336868286133, + "eval_VitaminC_cosine_ap": 0.5555632752592039, + "eval_VitaminC_cosine_f1": 0.6657824933687002, + "eval_VitaminC_cosine_f1_threshold": 0.28029173612594604, + "eval_VitaminC_cosine_precision": 0.4990059642147117, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.55078125, + "eval_VitaminC_dot_accuracy_threshold": 265.5102844238281, + "eval_VitaminC_dot_ap": 0.5326105108889087, + "eval_VitaminC_dot_f1": 0.6675531914893617, + "eval_VitaminC_dot_f1_threshold": 106.37774658203125, + "eval_VitaminC_dot_precision": 0.500998003992016, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.55859375, + "eval_VitaminC_euclidean_accuracy_threshold": 15.296594619750977, + "eval_VitaminC_euclidean_ap": 0.5592294311948881, + "eval_VitaminC_euclidean_f1": 0.6657824933687002, + "eval_VitaminC_euclidean_f1_threshold": 23.58568572998047, + "eval_VitaminC_euclidean_precision": 0.4990059642147117, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.556640625, + "eval_VitaminC_manhattan_accuracy_threshold": 306.79913330078125, + "eval_VitaminC_manhattan_ap": 0.5598941655081213, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 512.0101318359375, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 306.79913330078125, + "eval_VitaminC_max_ap": 0.5598941655081213, + "eval_VitaminC_max_f1": 0.6675531914893617, + "eval_VitaminC_max_f1_threshold": 512.0101318359375, + "eval_VitaminC_max_precision": 0.500998003992016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5598941655081213, + "eval_sts-test_pearson_cosine": 0.8832151520369376, + "eval_sts-test_pearson_dot": 0.8763916954110884, + "eval_sts-test_pearson_euclidean": 0.9046869354209082, + "eval_sts-test_pearson_manhattan": 0.9047119917370259, + "eval_sts-test_pearson_max": 0.9047119917370259, + "eval_sts-test_spearman_cosine": 0.9054341922225841, + "eval_sts-test_spearman_dot": 0.8786041104705073, + "eval_sts-test_spearman_euclidean": 0.9002407635868509, + "eval_sts-test_spearman_manhattan": 0.9006719867416183, + "eval_sts-test_spearman_max": 0.9054341922225841, + "eval_vitaminc-pairs_loss": 1.4290639162063599, + "eval_vitaminc-pairs_runtime": 1.8905, + "eval_vitaminc-pairs_samples_per_second": 57.128, + "eval_vitaminc-pairs_steps_per_second": 1.058, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_negation-triplets_loss": 0.9030703902244568, + "eval_negation-triplets_runtime": 0.2986, + "eval_negation-triplets_samples_per_second": 214.299, + "eval_negation-triplets_steps_per_second": 3.348, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_scitail-pairs-pos_loss": 0.10728535801172256, + "eval_scitail-pairs-pos_runtime": 0.3831, + "eval_scitail-pairs-pos_samples_per_second": 140.965, + "eval_scitail-pairs-pos_steps_per_second": 2.61, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_scitail-pairs-qa_loss": 0.0005650219391100109, + "eval_scitail-pairs-qa_runtime": 0.5259, + "eval_scitail-pairs-qa_samples_per_second": 243.397, + "eval_scitail-pairs-qa_steps_per_second": 3.803, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_xsum-pairs_loss": 0.025990577414631844, + "eval_xsum-pairs_runtime": 2.734, + "eval_xsum-pairs_samples_per_second": 46.818, + "eval_xsum-pairs_steps_per_second": 0.732, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_sciq_pairs_loss": 0.016017427667975426, + "eval_sciq_pairs_runtime": 2.8252, + "eval_sciq_pairs_samples_per_second": 45.307, + "eval_sciq_pairs_steps_per_second": 0.708, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_qasc_pairs_loss": 0.10250324755907059, + "eval_qasc_pairs_runtime": 0.6511, + "eval_qasc_pairs_samples_per_second": 196.585, + "eval_qasc_pairs_steps_per_second": 3.072, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_openbookqa_pairs_loss": 0.6710968613624573, + "eval_openbookqa_pairs_runtime": 0.5776, + "eval_openbookqa_pairs_samples_per_second": 221.625, + "eval_openbookqa_pairs_steps_per_second": 3.463, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_msmarco_pairs_loss": 0.14522777497768402, + "eval_msmarco_pairs_runtime": 1.4981, + "eval_msmarco_pairs_samples_per_second": 85.441, + "eval_msmarco_pairs_steps_per_second": 1.335, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_nq_pairs_loss": 0.10225611180067062, + "eval_nq_pairs_runtime": 2.3595, + "eval_nq_pairs_samples_per_second": 54.248, + "eval_nq_pairs_steps_per_second": 0.848, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_trivia_pairs_loss": 0.5312957167625427, + "eval_trivia_pairs_runtime": 3.5813, + "eval_trivia_pairs_samples_per_second": 35.741, + "eval_trivia_pairs_steps_per_second": 0.558, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_gooaq_pairs_loss": 0.27713337540626526, + "eval_gooaq_pairs_runtime": 0.9166, + "eval_gooaq_pairs_samples_per_second": 139.645, + "eval_gooaq_pairs_steps_per_second": 2.182, + "step": 360 + }, + { + "epoch": 0.8089887640449438, + "eval_paws-pos_loss": 0.024326296523213387, + "eval_paws-pos_runtime": 0.6893, + "eval_paws-pos_samples_per_second": 185.682, + "eval_paws-pos_steps_per_second": 2.901, + "step": 360 + }, + { + "epoch": 0.8112359550561797, + "grad_norm": 4.372533798217773, + "learning_rate": 3.067870346183096e-05, + "loss": 0.7533, + "step": 361 + }, + { + "epoch": 0.8134831460674158, + "grad_norm": 2.6585452556610107, + "learning_rate": 3.063397641144216e-05, + "loss": 0.2958, + "step": 362 + }, + { + "epoch": 0.8157303370786517, + "grad_norm": 4.378647327423096, + "learning_rate": 3.058906179914962e-05, + "loss": 0.8296, + "step": 363 + }, + { + "epoch": 0.8179775280898877, + "grad_norm": 3.1601309776306152, + "learning_rate": 3.0543960499386694e-05, + "loss": 0.3191, + "step": 364 + }, + { + "epoch": 0.8202247191011236, + "grad_norm": 3.446498394012451, + "learning_rate": 3.049867339022129e-05, + "loss": 0.7866, + "step": 365 + }, + { + "epoch": 0.8224719101123595, + "grad_norm": 3.0058486461639404, + "learning_rate": 3.0453201353338826e-05, + "loss": 0.3157, + "step": 366 + }, + { + "epoch": 0.8247191011235955, + "grad_norm": 4.380611419677734, + "learning_rate": 3.040754527402502e-05, + "loss": 0.7402, + "step": 367 + }, + { + "epoch": 0.8269662921348314, + "grad_norm": 3.8081209659576416, + "learning_rate": 3.036170604114869e-05, + "loss": 0.4957, + "step": 368 + }, + { + "epoch": 0.8292134831460675, + "grad_norm": 4.2056989669799805, + "learning_rate": 3.031568454714442e-05, + "loss": 0.8505, + "step": 369 + }, + { + "epoch": 0.8314606741573034, + "grad_norm": 3.101804733276367, + "learning_rate": 3.0269481687995207e-05, + "loss": 0.7702, + "step": 370 + }, + { + "epoch": 0.8337078651685393, + "grad_norm": 4.0704345703125, + "learning_rate": 3.0223098363215002e-05, + "loss": 0.7591, + "step": 371 + }, + { + "epoch": 0.8359550561797753, + "grad_norm": 2.9631364345550537, + "learning_rate": 3.0176535475831208e-05, + "loss": 0.727, + "step": 372 + }, + { + "epoch": 0.8382022471910112, + "grad_norm": 3.3760929107666016, + "learning_rate": 3.01297939323671e-05, + "loss": 0.3233, + "step": 373 + }, + { + "epoch": 0.8404494382022472, + "grad_norm": 4.116260051727295, + "learning_rate": 3.0082874642824164e-05, + "loss": 0.8738, + "step": 374 + }, + { + "epoch": 0.8426966292134831, + "grad_norm": 0.40298929810523987, + "learning_rate": 3.0035778520664388e-05, + "loss": 0.0393, + "step": 375 + }, + { + "epoch": 0.8449438202247191, + "grad_norm": 3.0647614002227783, + "learning_rate": 2.9988506482792485e-05, + "loss": 0.7454, + "step": 376 + }, + { + "epoch": 0.8471910112359551, + "grad_norm": 2.951953649520874, + "learning_rate": 2.994105944953803e-05, + "loss": 0.8297, + "step": 377 + }, + { + "epoch": 0.849438202247191, + "grad_norm": 4.049951553344727, + "learning_rate": 2.9893438344637538e-05, + "loss": 0.7802, + "step": 378 + }, + { + "epoch": 0.851685393258427, + "grad_norm": 3.7383949756622314, + "learning_rate": 2.984564409521651e-05, + "loss": 0.6229, + "step": 379 + }, + { + "epoch": 0.8539325842696629, + "grad_norm": 0.0, + "learning_rate": 2.979767763177134e-05, + "loss": 0.0, + "step": 380 + }, + { + "epoch": 0.8561797752808988, + "grad_norm": 3.399641513824463, + "learning_rate": 2.9749539888151244e-05, + "loss": 0.3506, + "step": 381 + }, + { + "epoch": 0.8584269662921349, + "grad_norm": 0.48723292350769043, + "learning_rate": 2.9701231801540032e-05, + "loss": 0.041, + "step": 382 + }, + { + "epoch": 0.8606741573033708, + "grad_norm": 3.1171765327453613, + "learning_rate": 2.9652754312437897e-05, + "loss": 0.725, + "step": 383 + }, + { + "epoch": 0.8629213483146068, + "grad_norm": 2.6491808891296387, + "learning_rate": 2.9604108364643112e-05, + "loss": 0.257, + "step": 384 + }, + { + "epoch": 0.8651685393258427, + "grad_norm": 4.025605201721191, + "learning_rate": 2.9555294905233606e-05, + "loss": 0.7912, + "step": 385 + }, + { + "epoch": 0.8674157303370786, + "grad_norm": 4.142299652099609, + "learning_rate": 2.9506314884548583e-05, + "loss": 0.8915, + "step": 386 + }, + { + "epoch": 0.8696629213483146, + "grad_norm": 2.943582534790039, + "learning_rate": 2.945716925616998e-05, + "loss": 0.779, + "step": 387 + }, + { + "epoch": 0.8719101123595505, + "grad_norm": 4.478114604949951, + "learning_rate": 2.9407858976903913e-05, + "loss": 0.7828, + "step": 388 + }, + { + "epoch": 0.8741573033707866, + "grad_norm": 3.9878995418548584, + "learning_rate": 2.935838500676207e-05, + "loss": 0.7462, + "step": 389 + }, + { + "epoch": 0.8764044943820225, + "grad_norm": 3.7733311653137207, + "learning_rate": 2.9308748308942983e-05, + "loss": 0.7913, + "step": 390 + }, + { + "epoch": 0.8786516853932584, + "grad_norm": 3.179732322692871, + "learning_rate": 2.9258949849813315e-05, + "loss": 0.3209, + "step": 391 + }, + { + "epoch": 0.8808988764044944, + "grad_norm": 3.6665351390838623, + "learning_rate": 2.9208990598889008e-05, + "loss": 0.5932, + "step": 392 + }, + { + "epoch": 0.8831460674157303, + "grad_norm": 0.545093834400177, + "learning_rate": 2.9158871528816442e-05, + "loss": 0.0613, + "step": 393 + }, + { + "epoch": 0.8853932584269663, + "grad_norm": 5.226474285125732, + "learning_rate": 2.9108593615353467e-05, + "loss": 0.8802, + "step": 394 + }, + { + "epoch": 0.8876404494382022, + "grad_norm": 3.691817283630371, + "learning_rate": 2.9058157837350437e-05, + "loss": 0.6116, + "step": 395 + }, + { + "epoch": 0.8898876404494382, + "grad_norm": 0.4754512906074524, + "learning_rate": 2.900756517673113e-05, + "loss": 0.0537, + "step": 396 + }, + { + "epoch": 0.8921348314606742, + "grad_norm": 2.874117374420166, + "learning_rate": 2.8956816618473647e-05, + "loss": 0.3006, + "step": 397 + }, + { + "epoch": 0.8943820224719101, + "grad_norm": 3.8957912921905518, + "learning_rate": 2.890591315059121e-05, + "loss": 0.7636, + "step": 398 + }, + { + "epoch": 0.8966292134831461, + "grad_norm": 3.7385432720184326, + "learning_rate": 2.8854855764112973e-05, + "loss": 0.612, + "step": 399 + }, + { + "epoch": 0.898876404494382, + "grad_norm": 3.7403082847595215, + "learning_rate": 2.880364545306468e-05, + "loss": 0.54, + "step": 400 + }, + { + "epoch": 0.9011235955056179, + "grad_norm": 2.7360849380493164, + "learning_rate": 2.8752283214449328e-05, + "loss": 0.2761, + "step": 401 + }, + { + "epoch": 0.903370786516854, + "grad_norm": 8.988025665283203, + "learning_rate": 2.8700770048227775e-05, + "loss": 1.2668, + "step": 402 + }, + { + "epoch": 0.9056179775280899, + "grad_norm": 3.411295175552368, + "learning_rate": 2.864910695729925e-05, + "loss": 0.8066, + "step": 403 + }, + { + "epoch": 0.9078651685393259, + "grad_norm": 0.3018481135368347, + "learning_rate": 2.8597294947481834e-05, + "loss": 0.0094, + "step": 404 + }, + { + "epoch": 0.9101123595505618, + "grad_norm": 4.116438388824463, + "learning_rate": 2.8545335027492885e-05, + "loss": 0.673, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_VitaminC_cosine_accuracy": 0.55859375, + "eval_VitaminC_cosine_accuracy_threshold": 0.7188639044761658, + "eval_VitaminC_cosine_ap": 0.5516905675485202, + "eval_VitaminC_cosine_f1": 0.6675712347354138, + "eval_VitaminC_cosine_f1_threshold": 0.42514583468437195, + "eval_VitaminC_cosine_precision": 0.5061728395061729, + "eval_VitaminC_cosine_recall": 0.9800796812749004, + "eval_VitaminC_dot_accuracy": 0.548828125, + "eval_VitaminC_dot_accuracy_threshold": 320.3775329589844, + "eval_VitaminC_dot_ap": 0.5343066680873013, + "eval_VitaminC_dot_f1": 0.6720867208672087, + "eval_VitaminC_dot_f1_threshold": 152.709716796875, + "eval_VitaminC_dot_precision": 0.5092402464065708, + "eval_VitaminC_dot_recall": 0.9880478087649402, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 15.12228775024414, + "eval_VitaminC_euclidean_ap": 0.5542894540784595, + "eval_VitaminC_euclidean_f1": 0.6640211640211641, + "eval_VitaminC_euclidean_f1_threshold": 24.3716983795166, + "eval_VitaminC_euclidean_precision": 0.497029702970297, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.55859375, + "eval_VitaminC_manhattan_accuracy_threshold": 305.93597412109375, + "eval_VitaminC_manhattan_ap": 0.5533328154567183, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 509.4247741699219, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 320.3775329589844, + "eval_VitaminC_max_ap": 0.5542894540784595, + "eval_VitaminC_max_f1": 0.6720867208672087, + "eval_VitaminC_max_f1_threshold": 509.4247741699219, + "eval_VitaminC_max_precision": 0.5092402464065708, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5542894540784595, + "eval_sts-test_pearson_cosine": 0.8820726638294588, + "eval_sts-test_pearson_dot": 0.8723940521896922, + "eval_sts-test_pearson_euclidean": 0.9038814103150634, + "eval_sts-test_pearson_manhattan": 0.904449390563823, + "eval_sts-test_pearson_max": 0.904449390563823, + "eval_sts-test_spearman_cosine": 0.9051641183600871, + "eval_sts-test_spearman_dot": 0.8721959088443044, + "eval_sts-test_spearman_euclidean": 0.8999642007914521, + "eval_sts-test_spearman_manhattan": 0.9005904051921018, + "eval_sts-test_spearman_max": 0.9051641183600871, + "eval_vitaminc-pairs_loss": 1.48486328125, + "eval_vitaminc-pairs_runtime": 1.8874, + "eval_vitaminc-pairs_samples_per_second": 57.222, + "eval_vitaminc-pairs_steps_per_second": 1.06, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_negation-triplets_loss": 0.9023827314376831, + "eval_negation-triplets_runtime": 0.302, + "eval_negation-triplets_samples_per_second": 211.927, + "eval_negation-triplets_steps_per_second": 3.311, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_scitail-pairs-pos_loss": 0.10495099425315857, + "eval_scitail-pairs-pos_runtime": 0.3856, + "eval_scitail-pairs-pos_samples_per_second": 140.031, + "eval_scitail-pairs-pos_steps_per_second": 2.593, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_scitail-pairs-qa_loss": 0.0008332311408594251, + "eval_scitail-pairs-qa_runtime": 0.5224, + "eval_scitail-pairs-qa_samples_per_second": 245.005, + "eval_scitail-pairs-qa_steps_per_second": 3.828, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_xsum-pairs_loss": 0.028531953692436218, + "eval_xsum-pairs_runtime": 2.7425, + "eval_xsum-pairs_samples_per_second": 46.672, + "eval_xsum-pairs_steps_per_second": 0.729, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_sciq_pairs_loss": 0.015175853855907917, + "eval_sciq_pairs_runtime": 2.8294, + "eval_sciq_pairs_samples_per_second": 45.239, + "eval_sciq_pairs_steps_per_second": 0.707, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_qasc_pairs_loss": 0.09416583180427551, + "eval_qasc_pairs_runtime": 0.6538, + "eval_qasc_pairs_samples_per_second": 195.781, + "eval_qasc_pairs_steps_per_second": 3.059, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_openbookqa_pairs_loss": 0.715216875076294, + "eval_openbookqa_pairs_runtime": 0.578, + "eval_openbookqa_pairs_samples_per_second": 221.449, + "eval_openbookqa_pairs_steps_per_second": 3.46, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_msmarco_pairs_loss": 0.1417744755744934, + "eval_msmarco_pairs_runtime": 1.4882, + "eval_msmarco_pairs_samples_per_second": 86.012, + "eval_msmarco_pairs_steps_per_second": 1.344, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_nq_pairs_loss": 0.10870223492383957, + "eval_nq_pairs_runtime": 2.3451, + "eval_nq_pairs_samples_per_second": 54.583, + "eval_nq_pairs_steps_per_second": 0.853, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_trivia_pairs_loss": 0.49194595217704773, + "eval_trivia_pairs_runtime": 3.5796, + "eval_trivia_pairs_samples_per_second": 35.759, + "eval_trivia_pairs_steps_per_second": 0.559, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_gooaq_pairs_loss": 0.2616226375102997, + "eval_gooaq_pairs_runtime": 0.9137, + "eval_gooaq_pairs_samples_per_second": 140.093, + "eval_gooaq_pairs_steps_per_second": 2.189, + "step": 405 + }, + { + "epoch": 0.9101123595505618, + "eval_paws-pos_loss": 0.02422034554183483, + "eval_paws-pos_runtime": 0.6895, + "eval_paws-pos_samples_per_second": 185.641, + "eval_paws-pos_steps_per_second": 2.901, + "step": 405 + }, + { + "epoch": 0.9123595505617977, + "grad_norm": 3.427104949951172, + "learning_rate": 2.8493228208929387e-05, + "loss": 0.5189, + "step": 406 + }, + { + "epoch": 0.9146067415730337, + "grad_norm": 4.941195487976074, + "learning_rate": 2.8440975506248268e-05, + "loss": 0.649, + "step": 407 + }, + { + "epoch": 0.9168539325842696, + "grad_norm": 2.7992403507232666, + "learning_rate": 2.8388577936746633e-05, + "loss": 0.2982, + "step": 408 + }, + { + "epoch": 0.9191011235955057, + "grad_norm": 3.8877484798431396, + "learning_rate": 2.833603652054199e-05, + "loss": 0.7511, + "step": 409 + }, + { + "epoch": 0.9213483146067416, + "grad_norm": 3.2458090782165527, + "learning_rate": 2.8283352280552348e-05, + "loss": 0.5164, + "step": 410 + }, + { + "epoch": 0.9235955056179775, + "grad_norm": 3.7385945320129395, + "learning_rate": 2.8230526242476332e-05, + "loss": 0.5924, + "step": 411 + }, + { + "epoch": 0.9258426966292135, + "grad_norm": 4.369627952575684, + "learning_rate": 2.8177559434773203e-05, + "loss": 0.8191, + "step": 412 + }, + { + "epoch": 0.9280898876404494, + "grad_norm": 2.95206356048584, + "learning_rate": 2.8124452888642838e-05, + "loss": 0.2311, + "step": 413 + }, + { + "epoch": 0.9303370786516854, + "grad_norm": 3.984375238418579, + "learning_rate": 2.8071207638005662e-05, + "loss": 0.7421, + "step": 414 + }, + { + "epoch": 0.9325842696629213, + "grad_norm": 3.0188541412353516, + "learning_rate": 2.801782471948248e-05, + "loss": 0.2936, + "step": 415 + }, + { + "epoch": 0.9348314606741573, + "grad_norm": 4.104308605194092, + "learning_rate": 2.7964305172374362e-05, + "loss": 0.737, + "step": 416 + }, + { + "epoch": 0.9370786516853933, + "grad_norm": 3.686523675918579, + "learning_rate": 2.791065003864235e-05, + "loss": 0.6539, + "step": 417 + }, + { + "epoch": 0.9393258426966292, + "grad_norm": 3.839590311050415, + "learning_rate": 2.785686036288719e-05, + "loss": 0.6855, + "step": 418 + }, + { + "epoch": 0.9415730337078652, + "grad_norm": 4.174718856811523, + "learning_rate": 2.780293719232902e-05, + "loss": 0.8134, + "step": 419 + }, + { + "epoch": 0.9438202247191011, + "grad_norm": 4.046380043029785, + "learning_rate": 2.7748881576786946e-05, + "loss": 0.6885, + "step": 420 + }, + { + "epoch": 0.946067415730337, + "grad_norm": 3.4202940464019775, + "learning_rate": 2.7694694568658613e-05, + "loss": 0.5581, + "step": 421 + }, + { + "epoch": 0.9483146067415731, + "grad_norm": 3.787081718444824, + "learning_rate": 2.764037722289973e-05, + "loss": 0.8029, + "step": 422 + }, + { + "epoch": 0.950561797752809, + "grad_norm": 3.870718240737915, + "learning_rate": 2.7585930597003524e-05, + "loss": 0.8126, + "step": 423 + }, + { + "epoch": 0.952808988764045, + "grad_norm": 3.1959424018859863, + "learning_rate": 2.753135575098015e-05, + "loss": 0.8425, + "step": 424 + }, + { + "epoch": 0.9550561797752809, + "grad_norm": 0.4186573922634125, + "learning_rate": 2.7476653747336047e-05, + "loss": 0.049, + "step": 425 + }, + { + "epoch": 0.9573033707865168, + "grad_norm": 4.299917697906494, + "learning_rate": 2.7421825651053265e-05, + "loss": 0.7849, + "step": 426 + }, + { + "epoch": 0.9595505617977528, + "grad_norm": 2.6435227394104004, + "learning_rate": 2.736687252956873e-05, + "loss": 0.068, + "step": 427 + }, + { + "epoch": 0.9617977528089887, + "grad_norm": 2.717653274536133, + "learning_rate": 2.7311795452753443e-05, + "loss": 0.2925, + "step": 428 + }, + { + "epoch": 0.9640449438202248, + "grad_norm": 3.6929807662963867, + "learning_rate": 2.7256595492891683e-05, + "loss": 0.777, + "step": 429 + }, + { + "epoch": 0.9662921348314607, + "grad_norm": 2.8760790824890137, + "learning_rate": 2.720127372466011e-05, + "loss": 0.7397, + "step": 430 + }, + { + "epoch": 0.9685393258426966, + "grad_norm": 0.03685740381479263, + "learning_rate": 2.714583122510683e-05, + "loss": 0.0007, + "step": 431 + }, + { + "epoch": 0.9707865168539326, + "grad_norm": 4.058692455291748, + "learning_rate": 2.709026907363047e-05, + "loss": 0.8535, + "step": 432 + }, + { + "epoch": 0.9730337078651685, + "grad_norm": 4.2914276123046875, + "learning_rate": 2.703458835195911e-05, + "loss": 0.7026, + "step": 433 + }, + { + "epoch": 0.9752808988764045, + "grad_norm": 3.735518217086792, + "learning_rate": 2.6978790144129262e-05, + "loss": 0.7557, + "step": 434 + }, + { + "epoch": 0.9775280898876404, + "grad_norm": 4.058504104614258, + "learning_rate": 2.6922875536464747e-05, + "loss": 0.7225, + "step": 435 + }, + { + "epoch": 0.9797752808988764, + "grad_norm": 0.0, + "learning_rate": 2.6866845617555555e-05, + "loss": 0.0, + "step": 436 + }, + { + "epoch": 0.9820224719101124, + "grad_norm": 5.648872375488281, + "learning_rate": 2.6810701478236642e-05, + "loss": 0.4131, + "step": 437 + }, + { + "epoch": 0.9842696629213483, + "grad_norm": 2.7032744884490967, + "learning_rate": 2.6754444211566702e-05, + "loss": 0.2824, + "step": 438 + }, + { + "epoch": 0.9865168539325843, + "grad_norm": 3.150801420211792, + "learning_rate": 2.6698074912806882e-05, + "loss": 0.3144, + "step": 439 + }, + { + "epoch": 0.9887640449438202, + "grad_norm": 2.3572490215301514, + "learning_rate": 2.6641594679399448e-05, + "loss": 0.0509, + "step": 440 + }, + { + "epoch": 0.9910112359550561, + "grad_norm": 3.2544448375701904, + "learning_rate": 2.6585004610946452e-05, + "loss": 0.7645, + "step": 441 + }, + { + "epoch": 0.9932584269662922, + "grad_norm": 4.310440540313721, + "learning_rate": 2.6528305809188273e-05, + "loss": 0.2787, + "step": 442 + }, + { + "epoch": 0.9955056179775281, + "grad_norm": 3.863487482070923, + "learning_rate": 2.6471499377982225e-05, + "loss": 0.64, + "step": 443 + }, + { + "epoch": 0.9977528089887641, + "grad_norm": 6.1020612716674805, + "learning_rate": 2.6414586423281017e-05, + "loss": 0.4045, + "step": 444 + }, + { + "epoch": 1.0, + "grad_norm": 3.1245224475860596, + "learning_rate": 2.6357568053111255e-05, + "loss": 0.7661, + "step": 445 + }, + { + "epoch": 1.002247191011236, + "grad_norm": 3.7888576984405518, + "learning_rate": 2.6300445377551847e-05, + "loss": 0.7335, + "step": 446 + }, + { + "epoch": 1.0044943820224719, + "grad_norm": 3.935758590698242, + "learning_rate": 2.62432195087124e-05, + "loss": 0.7835, + "step": 447 + }, + { + "epoch": 1.006741573033708, + "grad_norm": 3.7737417221069336, + "learning_rate": 2.6185891560711587e-05, + "loss": 0.7674, + "step": 448 + }, + { + "epoch": 1.0089887640449438, + "grad_norm": 0.457439124584198, + "learning_rate": 2.612846264965542e-05, + "loss": 0.0489, + "step": 449 + }, + { + "epoch": 1.0112359550561798, + "grad_norm": 3.515545606613159, + "learning_rate": 2.607093389361555e-05, + "loss": 0.3104, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_VitaminC_cosine_accuracy": 0.5625, + "eval_VitaminC_cosine_accuracy_threshold": 0.7603898048400879, + "eval_VitaminC_cosine_ap": 0.5525005100698708, + "eval_VitaminC_cosine_f1": 0.6685006877579092, + "eval_VitaminC_cosine_f1_threshold": 0.4857867360115051, + "eval_VitaminC_cosine_precision": 0.5105042016806722, + "eval_VitaminC_cosine_recall": 0.9681274900398407, + "eval_VitaminC_dot_accuracy": 0.548828125, + "eval_VitaminC_dot_accuracy_threshold": 325.483154296875, + "eval_VitaminC_dot_ap": 0.5344057014880635, + "eval_VitaminC_dot_f1": 0.6675749318801091, + "eval_VitaminC_dot_f1_threshold": 159.2823028564453, + "eval_VitaminC_dot_precision": 0.5072463768115942, + "eval_VitaminC_dot_recall": 0.9760956175298805, + "eval_VitaminC_euclidean_accuracy": 0.55859375, + "eval_VitaminC_euclidean_accuracy_threshold": 15.577638626098633, + "eval_VitaminC_euclidean_ap": 0.5540831040718627, + "eval_VitaminC_euclidean_f1": 0.6666666666666667, + "eval_VitaminC_euclidean_f1_threshold": 21.39883804321289, + "eval_VitaminC_euclidean_precision": 0.5030425963488844, + "eval_VitaminC_euclidean_recall": 0.9880478087649402, + "eval_VitaminC_manhattan_accuracy": 0.552734375, + "eval_VitaminC_manhattan_accuracy_threshold": 241.35984802246094, + "eval_VitaminC_manhattan_ap": 0.5536965508228381, + "eval_VitaminC_manhattan_f1": 0.6657754010695187, + "eval_VitaminC_manhattan_f1_threshold": 463.26080322265625, + "eval_VitaminC_manhattan_precision": 0.5010060362173038, + "eval_VitaminC_manhattan_recall": 0.9920318725099602, + "eval_VitaminC_max_accuracy": 0.5625, + "eval_VitaminC_max_accuracy_threshold": 325.483154296875, + "eval_VitaminC_max_ap": 0.5540831040718627, + "eval_VitaminC_max_f1": 0.6685006877579092, + "eval_VitaminC_max_f1_threshold": 463.26080322265625, + "eval_VitaminC_max_precision": 0.5105042016806722, + "eval_VitaminC_max_recall": 0.9920318725099602, + "eval_sequential_score": 0.5540831040718627, + "eval_sts-test_pearson_cosine": 0.8812335915964673, + "eval_sts-test_pearson_dot": 0.8749461926810898, + "eval_sts-test_pearson_euclidean": 0.9037170548962163, + "eval_sts-test_pearson_manhattan": 0.9046002457312785, + "eval_sts-test_pearson_max": 0.9046002457312785, + "eval_sts-test_spearman_cosine": 0.9043227946459288, + "eval_sts-test_spearman_dot": 0.8763633253101171, + "eval_sts-test_spearman_euclidean": 0.8995340964182194, + "eval_sts-test_spearman_manhattan": 0.9002530254324721, + "eval_sts-test_spearman_max": 0.9043227946459288, + "eval_vitaminc-pairs_loss": 1.4284634590148926, + "eval_vitaminc-pairs_runtime": 1.9114, + "eval_vitaminc-pairs_samples_per_second": 56.503, + "eval_vitaminc-pairs_steps_per_second": 1.046, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_negation-triplets_loss": 0.8765377402305603, + "eval_negation-triplets_runtime": 0.3186, + "eval_negation-triplets_samples_per_second": 200.858, + "eval_negation-triplets_steps_per_second": 3.138, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_scitail-pairs-pos_loss": 0.06986676901578903, + "eval_scitail-pairs-pos_runtime": 0.4519, + "eval_scitail-pairs-pos_samples_per_second": 119.485, + "eval_scitail-pairs-pos_steps_per_second": 2.213, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_scitail-pairs-qa_loss": 0.0008211968233808875, + "eval_scitail-pairs-qa_runtime": 0.5761, + "eval_scitail-pairs-qa_samples_per_second": 222.193, + "eval_scitail-pairs-qa_steps_per_second": 3.472, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_xsum-pairs_loss": 0.028749318793416023, + "eval_xsum-pairs_runtime": 2.7556, + "eval_xsum-pairs_samples_per_second": 46.45, + "eval_xsum-pairs_steps_per_second": 0.726, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_sciq_pairs_loss": 0.01784924976527691, + "eval_sciq_pairs_runtime": 2.8996, + "eval_sciq_pairs_samples_per_second": 44.143, + "eval_sciq_pairs_steps_per_second": 0.69, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_qasc_pairs_loss": 0.09589868038892746, + "eval_qasc_pairs_runtime": 0.6801, + "eval_qasc_pairs_samples_per_second": 188.207, + "eval_qasc_pairs_steps_per_second": 2.941, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_openbookqa_pairs_loss": 0.7216827273368835, + "eval_openbookqa_pairs_runtime": 0.5958, + "eval_openbookqa_pairs_samples_per_second": 214.846, + "eval_openbookqa_pairs_steps_per_second": 3.357, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_msmarco_pairs_loss": 0.15124906599521637, + "eval_msmarco_pairs_runtime": 1.5017, + "eval_msmarco_pairs_samples_per_second": 85.239, + "eval_msmarco_pairs_steps_per_second": 1.332, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_nq_pairs_loss": 0.10319234430789948, + "eval_nq_pairs_runtime": 2.3696, + "eval_nq_pairs_samples_per_second": 54.018, + "eval_nq_pairs_steps_per_second": 0.844, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_trivia_pairs_loss": 0.48776012659072876, + "eval_trivia_pairs_runtime": 3.5941, + "eval_trivia_pairs_samples_per_second": 35.614, + "eval_trivia_pairs_steps_per_second": 0.556, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_gooaq_pairs_loss": 0.26884058117866516, + "eval_gooaq_pairs_runtime": 0.9231, + "eval_gooaq_pairs_samples_per_second": 138.665, + "eval_gooaq_pairs_steps_per_second": 2.167, + "step": 450 + }, + { + "epoch": 1.0112359550561798, + "eval_paws-pos_loss": 0.02506968565285206, + "eval_paws-pos_runtime": 0.7124, + "eval_paws-pos_samples_per_second": 179.665, + "eval_paws-pos_steps_per_second": 2.807, + "step": 450 + }, + { + "epoch": 1.0134831460674156, + "grad_norm": 3.3714258670806885, + "learning_rate": 2.6013306412607486e-05, + "loss": 0.2977, + "step": 451 + }, + { + "epoch": 1.0157303370786517, + "grad_norm": 3.0152552127838135, + "learning_rate": 2.59555813285688e-05, + "loss": 0.7256, + "step": 452 + }, + { + "epoch": 1.0179775280898877, + "grad_norm": 0.36391642689704895, + "learning_rate": 2.589775976533726e-05, + "loss": 0.0327, + "step": 453 + }, + { + "epoch": 1.0202247191011236, + "grad_norm": 2.959923505783081, + "learning_rate": 2.5839842848628985e-05, + "loss": 0.7372, + "step": 454 + }, + { + "epoch": 1.0224719101123596, + "grad_norm": 0.8520237803459167, + "learning_rate": 2.5781831706016506e-05, + "loss": 0.0518, + "step": 455 + }, + { + "epoch": 1.0247191011235954, + "grad_norm": 3.161862850189209, + "learning_rate": 2.5723727466906813e-05, + "loss": 0.7668, + "step": 456 + }, + { + "epoch": 1.0269662921348315, + "grad_norm": 4.314478397369385, + "learning_rate": 2.5665531262519385e-05, + "loss": 0.6634, + "step": 457 + }, + { + "epoch": 1.0292134831460673, + "grad_norm": 4.253237247467041, + "learning_rate": 2.5607244225864135e-05, + "loss": 0.6022, + "step": 458 + }, + { + "epoch": 1.0314606741573034, + "grad_norm": 3.9452831745147705, + "learning_rate": 2.5548867491719395e-05, + "loss": 0.7255, + "step": 459 + }, + { + "epoch": 1.0337078651685394, + "grad_norm": 2.962646007537842, + "learning_rate": 2.549040219660978e-05, + "loss": 0.2823, + "step": 460 + }, + { + "epoch": 1.0359550561797752, + "grad_norm": 2.687352418899536, + "learning_rate": 2.543184947878408e-05, + "loss": 0.2614, + "step": 461 + }, + { + "epoch": 1.0382022471910113, + "grad_norm": 2.9925472736358643, + "learning_rate": 2.5373210478193118e-05, + "loss": 0.5231, + "step": 462 + }, + { + "epoch": 1.0404494382022471, + "grad_norm": 0.694948136806488, + "learning_rate": 2.5314486336467516e-05, + "loss": 0.0424, + "step": 463 + }, + { + "epoch": 1.0426966292134832, + "grad_norm": 5.543944835662842, + "learning_rate": 2.525567819689551e-05, + "loss": 0.9838, + "step": 464 + }, + { + "epoch": 1.0449438202247192, + "grad_norm": 6.339949607849121, + "learning_rate": 2.5196787204400655e-05, + "loss": 2.4683, + "step": 465 + }, + { + "epoch": 1.047191011235955, + "grad_norm": 0.48354002833366394, + "learning_rate": 2.5137814505519576e-05, + "loss": 0.0497, + "step": 466 + }, + { + "epoch": 1.049438202247191, + "grad_norm": 0.5280358791351318, + "learning_rate": 2.5078761248379596e-05, + "loss": 0.0766, + "step": 467 + }, + { + "epoch": 1.051685393258427, + "grad_norm": 3.0178725719451904, + "learning_rate": 2.5019628582676428e-05, + "loss": 0.7333, + "step": 468 + }, + { + "epoch": 1.053932584269663, + "grad_norm": 3.958301305770874, + "learning_rate": 2.4960417659651765e-05, + "loss": 0.7881, + "step": 469 + }, + { + "epoch": 1.0561797752808988, + "grad_norm": 3.143247127532959, + "learning_rate": 2.4901129632070887e-05, + "loss": 0.7611, + "step": 470 + }, + { + "epoch": 1.0584269662921348, + "grad_norm": 3.9136295318603516, + "learning_rate": 2.48417656542002e-05, + "loss": 0.6023, + "step": 471 + }, + { + "epoch": 1.060674157303371, + "grad_norm": 3.149609327316284, + "learning_rate": 2.4782326881784757e-05, + "loss": 0.7884, + "step": 472 + }, + { + "epoch": 1.0629213483146067, + "grad_norm": 4.164999961853027, + "learning_rate": 2.4722814472025798e-05, + "loss": 0.8465, + "step": 473 + }, + { + "epoch": 1.0651685393258428, + "grad_norm": 2.767392158508301, + "learning_rate": 2.466322958355817e-05, + "loss": 0.2752, + "step": 474 + }, + { + "epoch": 1.0674157303370786, + "grad_norm": 2.7601747512817383, + "learning_rate": 2.4603573376427804e-05, + "loss": 0.2648, + "step": 475 + }, + { + "epoch": 1.0696629213483146, + "grad_norm": 3.3139634132385254, + "learning_rate": 2.4543847012069114e-05, + "loss": 0.5548, + "step": 476 + }, + { + "epoch": 1.0719101123595505, + "grad_norm": 0.46029964089393616, + "learning_rate": 2.4484051653282405e-05, + "loss": 0.0554, + "step": 477 + }, + { + "epoch": 1.0741573033707865, + "grad_norm": 3.19266939163208, + "learning_rate": 2.44241884642112e-05, + "loss": 0.8244, + "step": 478 + }, + { + "epoch": 1.0764044943820226, + "grad_norm": 0.49586713314056396, + "learning_rate": 2.4364258610319604e-05, + "loss": 0.0369, + "step": 479 + }, + { + "epoch": 1.0786516853932584, + "grad_norm": 3.8522789478302, + "learning_rate": 2.4304263258369612e-05, + "loss": 0.747, + "step": 480 + }, + { + "epoch": 1.0808988764044944, + "grad_norm": 2.929213285446167, + "learning_rate": 2.4244203576398378e-05, + "loss": 0.2507, + "step": 481 + }, + { + "epoch": 1.0831460674157303, + "grad_norm": 0.4208325147628784, + "learning_rate": 2.418408073369549e-05, + "loss": 0.0304, + "step": 482 + }, + { + "epoch": 1.0853932584269663, + "grad_norm": 3.958451271057129, + "learning_rate": 2.4123895900780194e-05, + "loss": 0.7735, + "step": 483 + }, + { + "epoch": 1.0876404494382022, + "grad_norm": 3.8647449016571045, + "learning_rate": 2.4063650249378617e-05, + "loss": 0.7526, + "step": 484 + }, + { + "epoch": 1.0898876404494382, + "grad_norm": 3.1796231269836426, + "learning_rate": 2.4003344952400947e-05, + "loss": 0.7959, + "step": 485 + }, + { + "epoch": 1.0921348314606742, + "grad_norm": 3.6608550548553467, + "learning_rate": 2.3942981183918597e-05, + "loss": 0.7405, + "step": 486 + }, + { + "epoch": 1.09438202247191, + "grad_norm": 3.589618682861328, + "learning_rate": 2.388256011914134e-05, + "loss": 0.7041, + "step": 487 + }, + { + "epoch": 1.0966292134831461, + "grad_norm": 3.798146963119507, + "learning_rate": 2.382208293439447e-05, + "loss": 0.6991, + "step": 488 + }, + { + "epoch": 1.098876404494382, + "grad_norm": 0.4198363423347473, + "learning_rate": 2.3761550807095828e-05, + "loss": 0.0462, + "step": 489 + }, + { + "epoch": 1.101123595505618, + "grad_norm": 3.7368414402008057, + "learning_rate": 2.3700964915732954e-05, + "loss": 0.5835, + "step": 490 + }, + { + "epoch": 1.1033707865168538, + "grad_norm": 2.6319525241851807, + "learning_rate": 2.364032643984009e-05, + "loss": 0.2632, + "step": 491 + }, + { + "epoch": 1.1056179775280899, + "grad_norm": 3.6233298778533936, + "learning_rate": 2.3579636559975242e-05, + "loss": 0.4681, + "step": 492 + }, + { + "epoch": 1.107865168539326, + "grad_norm": 3.0172781944274902, + "learning_rate": 2.351889645769719e-05, + "loss": 0.7271, + "step": 493 + }, + { + "epoch": 1.1101123595505618, + "grad_norm": 2.4773848056793213, + "learning_rate": 2.3458107315542488e-05, + "loss": 0.2582, + "step": 494 + }, + { + "epoch": 1.1123595505617978, + "grad_norm": 2.33353853225708, + "learning_rate": 2.3397270317002424e-05, + "loss": 0.2251, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_VitaminC_cosine_accuracy": 0.552734375, + "eval_VitaminC_cosine_accuracy_threshold": 0.7386432886123657, + "eval_VitaminC_cosine_ap": 0.5475541126516937, + "eval_VitaminC_cosine_f1": 0.6675639300134589, + "eval_VitaminC_cosine_f1_threshold": 0.36797067523002625, + "eval_VitaminC_cosine_precision": 0.5040650406504065, + "eval_VitaminC_cosine_recall": 0.9880478087649402, + "eval_VitaminC_dot_accuracy": 0.546875, + "eval_VitaminC_dot_accuracy_threshold": 329.7876281738281, + "eval_VitaminC_dot_ap": 0.5321466642848512, + "eval_VitaminC_dot_f1": 0.6684931506849316, + "eval_VitaminC_dot_f1_threshold": 163.3940887451172, + "eval_VitaminC_dot_precision": 0.5093945720250522, + "eval_VitaminC_dot_recall": 0.9721115537848606, + "eval_VitaminC_euclidean_accuracy": 0.5546875, + "eval_VitaminC_euclidean_accuracy_threshold": 16.11573028564453, + "eval_VitaminC_euclidean_ap": 0.5516659121082983, + "eval_VitaminC_euclidean_f1": 0.6657789613848203, + "eval_VitaminC_euclidean_f1_threshold": 23.179344177246094, + "eval_VitaminC_euclidean_precision": 0.5, + "eval_VitaminC_euclidean_recall": 0.9960159362549801, + "eval_VitaminC_manhattan_accuracy": 0.552734375, + "eval_VitaminC_manhattan_accuracy_threshold": 311.29736328125, + "eval_VitaminC_manhattan_ap": 0.5507056801905115, + "eval_VitaminC_manhattan_f1": 0.6657789613848203, + "eval_VitaminC_manhattan_f1_threshold": 492.5957946777344, + "eval_VitaminC_manhattan_precision": 0.5, + "eval_VitaminC_manhattan_recall": 0.9960159362549801, + "eval_VitaminC_max_accuracy": 0.5546875, + "eval_VitaminC_max_accuracy_threshold": 329.7876281738281, + "eval_VitaminC_max_ap": 0.5516659121082983, + "eval_VitaminC_max_f1": 0.6684931506849316, + "eval_VitaminC_max_f1_threshold": 492.5957946777344, + "eval_VitaminC_max_precision": 0.5093945720250522, + "eval_VitaminC_max_recall": 0.9960159362549801, + "eval_sequential_score": 0.5516659121082983, + "eval_sts-test_pearson_cosine": 0.8841762545397394, + "eval_sts-test_pearson_dot": 0.8767370504598664, + "eval_sts-test_pearson_euclidean": 0.9052591981779188, + "eval_sts-test_pearson_manhattan": 0.9058933866613134, + "eval_sts-test_pearson_max": 0.9058933866613134, + "eval_sts-test_spearman_cosine": 0.9052463711785245, + "eval_sts-test_spearman_dot": 0.8788541357679011, + "eval_sts-test_spearman_euclidean": 0.8998455204462421, + "eval_sts-test_spearman_manhattan": 0.9007272097981753, + "eval_sts-test_spearman_max": 0.9052463711785245, + "eval_vitaminc-pairs_loss": 1.385530948638916, + "eval_vitaminc-pairs_runtime": 1.9019, + "eval_vitaminc-pairs_samples_per_second": 56.786, + "eval_vitaminc-pairs_steps_per_second": 1.052, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_negation-triplets_loss": 0.8651055097579956, + "eval_negation-triplets_runtime": 0.3083, + "eval_negation-triplets_samples_per_second": 207.568, + "eval_negation-triplets_steps_per_second": 3.243, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_scitail-pairs-pos_loss": 0.0760912150144577, + "eval_scitail-pairs-pos_runtime": 0.4061, + "eval_scitail-pairs-pos_samples_per_second": 132.981, + "eval_scitail-pairs-pos_steps_per_second": 2.463, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_scitail-pairs-qa_loss": 0.0005116994143463671, + "eval_scitail-pairs-qa_runtime": 0.5389, + "eval_scitail-pairs-qa_samples_per_second": 237.512, + "eval_scitail-pairs-qa_steps_per_second": 3.711, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_xsum-pairs_loss": 0.02057916484773159, + "eval_xsum-pairs_runtime": 2.7698, + "eval_xsum-pairs_samples_per_second": 46.213, + "eval_xsum-pairs_steps_per_second": 0.722, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_sciq_pairs_loss": 0.015313890762627125, + "eval_sciq_pairs_runtime": 2.8967, + "eval_sciq_pairs_samples_per_second": 44.189, + "eval_sciq_pairs_steps_per_second": 0.69, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_qasc_pairs_loss": 0.10294634103775024, + "eval_qasc_pairs_runtime": 0.6624, + "eval_qasc_pairs_samples_per_second": 193.223, + "eval_qasc_pairs_steps_per_second": 3.019, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_openbookqa_pairs_loss": 0.679233968257904, + "eval_openbookqa_pairs_runtime": 0.5955, + "eval_openbookqa_pairs_samples_per_second": 214.948, + "eval_openbookqa_pairs_steps_per_second": 3.359, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_msmarco_pairs_loss": 0.15479065477848053, + "eval_msmarco_pairs_runtime": 1.498, + "eval_msmarco_pairs_samples_per_second": 85.447, + "eval_msmarco_pairs_steps_per_second": 1.335, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_nq_pairs_loss": 0.0932854488492012, + "eval_nq_pairs_runtime": 2.3621, + "eval_nq_pairs_samples_per_second": 54.188, + "eval_nq_pairs_steps_per_second": 0.847, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_trivia_pairs_loss": 0.5306271910667419, + "eval_trivia_pairs_runtime": 3.5969, + "eval_trivia_pairs_samples_per_second": 35.586, + "eval_trivia_pairs_steps_per_second": 0.556, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_gooaq_pairs_loss": 0.2823023796081543, + "eval_gooaq_pairs_runtime": 0.9247, + "eval_gooaq_pairs_samples_per_second": 138.43, + "eval_gooaq_pairs_steps_per_second": 2.163, + "step": 495 + }, + { + "epoch": 1.1123595505617978, + "eval_paws-pos_loss": 0.02393445000052452, + "eval_paws-pos_runtime": 0.7045, + "eval_paws-pos_samples_per_second": 181.692, + "eval_paws-pos_steps_per_second": 2.839, + "step": 495 + }, + { + "epoch": 1.1146067415730336, + "grad_norm": 0.5595234632492065, + "learning_rate": 2.3336386646500005e-05, + "loss": 0.0385, + "step": 496 + }, + { + "epoch": 1.1168539325842697, + "grad_norm": 3.837158679962158, + "learning_rate": 2.327545748936687e-05, + "loss": 0.7277, + "step": 497 + }, + { + "epoch": 1.1191011235955055, + "grad_norm": 3.817466974258423, + "learning_rate": 2.3214484031820252e-05, + "loss": 0.705, + "step": 498 + }, + { + "epoch": 1.1213483146067416, + "grad_norm": 3.6540205478668213, + "learning_rate": 2.315346746093984e-05, + "loss": 0.6059, + "step": 499 + }, + { + "epoch": 1.1235955056179776, + "grad_norm": 3.6074440479278564, + "learning_rate": 2.309240896464469e-05, + "loss": 0.6156, + "step": 500 + }, + { + "epoch": 1.1258426966292134, + "grad_norm": 3.6017813682556152, + "learning_rate": 2.30313097316701e-05, + "loss": 0.6809, + "step": 501 + }, + { + "epoch": 1.1280898876404495, + "grad_norm": 2.9375529289245605, + "learning_rate": 2.2970170951544467e-05, + "loss": 0.7104, + "step": 502 + }, + { + "epoch": 1.1303370786516853, + "grad_norm": 2.9063873291015625, + "learning_rate": 2.2908993814566104e-05, + "loss": 0.4397, + "step": 503 + }, + { + "epoch": 1.1325842696629214, + "grad_norm": 4.570181846618652, + "learning_rate": 2.284777951178011e-05, + "loss": 0.6952, + "step": 504 + }, + { + "epoch": 1.1348314606741572, + "grad_norm": 0.4327554702758789, + "learning_rate": 2.2786529234955137e-05, + "loss": 0.0557, + "step": 505 + }, + { + "epoch": 1.1370786516853932, + "grad_norm": 3.457714796066284, + "learning_rate": 2.2725244176560217e-05, + "loss": 0.6711, + "step": 506 + }, + { + "epoch": 1.1393258426966293, + "grad_norm": 3.4728267192840576, + "learning_rate": 2.2663925529741547e-05, + "loss": 0.7173, + "step": 507 + }, + { + "epoch": 1.1415730337078651, + "grad_norm": 3.583503246307373, + "learning_rate": 2.2602574488299232e-05, + "loss": 0.7037, + "step": 508 + }, + { + "epoch": 1.1438202247191012, + "grad_norm": 4.4957661628723145, + "learning_rate": 2.2541192246664077e-05, + "loss": 0.8578, + "step": 509 + }, + { + "epoch": 1.146067415730337, + "grad_norm": 3.447329044342041, + "learning_rate": 2.2479779999874303e-05, + "loss": 0.6712, + "step": 510 + }, + { + "epoch": 1.148314606741573, + "grad_norm": 3.2203116416931152, + "learning_rate": 2.2418338943552296e-05, + "loss": 0.7472, + "step": 511 + }, + { + "epoch": 1.1505617977528089, + "grad_norm": 3.551426410675049, + "learning_rate": 2.235687027388135e-05, + "loss": 0.5911, + "step": 512 + }, + { + "epoch": 1.152808988764045, + "grad_norm": 3.8964977264404297, + "learning_rate": 2.229537518758233e-05, + "loss": 0.6827, + "step": 513 + }, + { + "epoch": 1.155056179775281, + "grad_norm": 3.483255624771118, + "learning_rate": 2.2233854881890425e-05, + "loss": 0.5034, + "step": 514 + }, + { + "epoch": 1.1573033707865168, + "grad_norm": 4.6676740646362305, + "learning_rate": 2.2172310554531788e-05, + "loss": 0.8367, + "step": 515 + }, + { + "epoch": 1.1595505617977528, + "grad_norm": 3.4448702335357666, + "learning_rate": 2.2110743403700276e-05, + "loss": 0.6596, + "step": 516 + }, + { + "epoch": 1.1617977528089887, + "grad_norm": 0.5997368097305298, + "learning_rate": 2.2049154628034062e-05, + "loss": 0.0859, + "step": 517 + }, + { + "epoch": 1.1640449438202247, + "grad_norm": 2.822986602783203, + "learning_rate": 2.1987545426592347e-05, + "loss": 0.2797, + "step": 518 + }, + { + "epoch": 1.1662921348314608, + "grad_norm": 3.156625270843506, + "learning_rate": 2.1925916998832005e-05, + "loss": 0.5181, + "step": 519 + }, + { + "epoch": 1.1685393258426966, + "grad_norm": 3.550964832305908, + "learning_rate": 2.1864270544584192e-05, + "loss": 0.6837, + "step": 520 + }, + { + "epoch": 1.1707865168539326, + "grad_norm": 3.6208648681640625, + "learning_rate": 2.1802607264031045e-05, + "loss": 0.7238, + "step": 521 + }, + { + "epoch": 1.1730337078651685, + "grad_norm": 0.5348507165908813, + "learning_rate": 2.174092835768228e-05, + "loss": 0.0318, + "step": 522 + }, + { + "epoch": 1.1752808988764045, + "grad_norm": 0.4848617911338806, + "learning_rate": 2.167923502635183e-05, + "loss": 0.0694, + "step": 523 + }, + { + "epoch": 1.1775280898876406, + "grad_norm": 3.322484016418457, + "learning_rate": 2.161752847113446e-05, + "loss": 0.7472, + "step": 524 + }, + { + "epoch": 1.1797752808988764, + "grad_norm": 5.0792999267578125, + "learning_rate": 2.1555809893382403e-05, + "loss": 0.8912, + "step": 525 + }, + { + "epoch": 1.1820224719101124, + "grad_norm": 3.029616117477417, + "learning_rate": 2.1494080494681936e-05, + "loss": 0.7744, + "step": 526 + }, + { + "epoch": 1.1842696629213483, + "grad_norm": 3.989457845687866, + "learning_rate": 2.1432341476830015e-05, + "loss": 0.6869, + "step": 527 + }, + { + "epoch": 1.1865168539325843, + "grad_norm": 4.336198806762695, + "learning_rate": 2.137059404181087e-05, + "loss": 0.8497, + "step": 528 + }, + { + "epoch": 1.1887640449438202, + "grad_norm": 2.7506906986236572, + "learning_rate": 2.1308839391772608e-05, + "loss": 0.4281, + "step": 529 + }, + { + "epoch": 1.1910112359550562, + "grad_norm": 3.7945544719696045, + "learning_rate": 2.12470787290038e-05, + "loss": 0.7605, + "step": 530 + }, + { + "epoch": 1.1932584269662923, + "grad_norm": 3.5343854427337646, + "learning_rate": 2.1185313255910074e-05, + "loss": 0.6354, + "step": 531 + }, + { + "epoch": 1.195505617977528, + "grad_norm": 0.35124847292900085, + "learning_rate": 2.1123544174990714e-05, + "loss": 0.0518, + "step": 532 + }, + { + "epoch": 1.1977528089887641, + "grad_norm": 2.397141456604004, + "learning_rate": 2.106177268881524e-05, + "loss": 0.2602, + "step": 533 + }, + { + "epoch": 1.2, + "grad_norm": 3.2060976028442383, + "learning_rate": 2.1e-05, + "loss": 0.5082, + "step": 534 + }, + { + "epoch": 1.202247191011236, + "grad_norm": 0.4915749132633209, + "learning_rate": 2.093822731118476e-05, + "loss": 0.0603, + "step": 535 + }, + { + "epoch": 1.2044943820224718, + "grad_norm": 6.44587516784668, + "learning_rate": 2.087645582500929e-05, + "loss": 2.3371, + "step": 536 + }, + { + "epoch": 1.2067415730337079, + "grad_norm": 2.940648078918457, + "learning_rate": 2.0814686744089924e-05, + "loss": 0.6513, + "step": 537 + }, + { + "epoch": 1.208988764044944, + "grad_norm": 3.5047895908355713, + "learning_rate": 2.0752921270996197e-05, + "loss": 0.6053, + "step": 538 + }, + { + "epoch": 1.2112359550561798, + "grad_norm": 0.4148883819580078, + "learning_rate": 2.069116060822739e-05, + "loss": 0.0544, + "step": 539 + }, + { + "epoch": 1.2134831460674158, + "grad_norm": 3.633577823638916, + "learning_rate": 2.062940595818913e-05, + "loss": 0.7219, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_VitaminC_cosine_accuracy": 0.55859375, + "eval_VitaminC_cosine_accuracy_threshold": 0.750012993812561, + "eval_VitaminC_cosine_ap": 0.5498478359791117, + "eval_VitaminC_cosine_f1": 0.6666666666666666, + "eval_VitaminC_cosine_f1_threshold": 0.2890807092189789, + "eval_VitaminC_cosine_precision": 0.5, + "eval_VitaminC_cosine_recall": 1.0, + "eval_VitaminC_dot_accuracy": 0.544921875, + "eval_VitaminC_dot_accuracy_threshold": 325.9202880859375, + "eval_VitaminC_dot_ap": 0.5307476674257613, + "eval_VitaminC_dot_f1": 0.6657824933687002, + "eval_VitaminC_dot_f1_threshold": 100.63825988769531, + "eval_VitaminC_dot_precision": 0.4990059642147117, + "eval_VitaminC_dot_recall": 1.0, + "eval_VitaminC_euclidean_accuracy": 0.556640625, + "eval_VitaminC_euclidean_accuracy_threshold": 12.168689727783203, + "eval_VitaminC_euclidean_ap": 0.5532762404675531, + "eval_VitaminC_euclidean_f1": 0.6675531914893617, + "eval_VitaminC_euclidean_f1_threshold": 23.49704360961914, + "eval_VitaminC_euclidean_precision": 0.500998003992016, + "eval_VitaminC_euclidean_recall": 1.0, + "eval_VitaminC_manhattan_accuracy": 0.552734375, + "eval_VitaminC_manhattan_accuracy_threshold": 251.46685791015625, + "eval_VitaminC_manhattan_ap": 0.5528651894260193, + "eval_VitaminC_manhattan_f1": 0.6649006622516557, + "eval_VitaminC_manhattan_f1_threshold": 511.0567321777344, + "eval_VitaminC_manhattan_precision": 0.498015873015873, + "eval_VitaminC_manhattan_recall": 1.0, + "eval_VitaminC_max_accuracy": 0.55859375, + "eval_VitaminC_max_accuracy_threshold": 325.9202880859375, + "eval_VitaminC_max_ap": 0.5532762404675531, + "eval_VitaminC_max_f1": 0.6675531914893617, + "eval_VitaminC_max_f1_threshold": 511.0567321777344, + "eval_VitaminC_max_precision": 0.500998003992016, + "eval_VitaminC_max_recall": 1.0, + "eval_sequential_score": 0.5532762404675531, + "eval_sts-test_pearson_cosine": 0.8836542354588774, + "eval_sts-test_pearson_dot": 0.8766446823118297, + "eval_sts-test_pearson_euclidean": 0.9062930503225336, + "eval_sts-test_pearson_manhattan": 0.9067495755923205, + "eval_sts-test_pearson_max": 0.9067495755923205, + "eval_sts-test_spearman_cosine": 0.9065594179390095, + "eval_sts-test_spearman_dot": 0.8763055514316607, + "eval_sts-test_spearman_euclidean": 0.9012305719863057, + "eval_sts-test_spearman_manhattan": 0.901725878947386, + "eval_sts-test_spearman_max": 0.9065594179390095, + "eval_vitaminc-pairs_loss": 1.377655029296875, + "eval_vitaminc-pairs_runtime": 1.9029, + "eval_vitaminc-pairs_samples_per_second": 56.754, + "eval_vitaminc-pairs_steps_per_second": 1.051, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_negation-triplets_loss": 0.8754605650901794, + "eval_negation-triplets_runtime": 0.3041, + "eval_negation-triplets_samples_per_second": 210.479, + "eval_negation-triplets_steps_per_second": 3.289, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_scitail-pairs-pos_loss": 0.08722448348999023, + "eval_scitail-pairs-pos_runtime": 0.4119, + "eval_scitail-pairs-pos_samples_per_second": 131.115, + "eval_scitail-pairs-pos_steps_per_second": 2.428, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_scitail-pairs-qa_loss": 0.0006766854785382748, + "eval_scitail-pairs-qa_runtime": 0.5432, + "eval_scitail-pairs-qa_samples_per_second": 235.655, + "eval_scitail-pairs-qa_steps_per_second": 3.682, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_xsum-pairs_loss": 0.02525785192847252, + "eval_xsum-pairs_runtime": 2.7625, + "eval_xsum-pairs_samples_per_second": 46.335, + "eval_xsum-pairs_steps_per_second": 0.724, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_sciq_pairs_loss": 0.016477206721901894, + "eval_sciq_pairs_runtime": 2.9071, + "eval_sciq_pairs_samples_per_second": 44.031, + "eval_sciq_pairs_steps_per_second": 0.688, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_qasc_pairs_loss": 0.09754681587219238, + "eval_qasc_pairs_runtime": 0.6665, + "eval_qasc_pairs_samples_per_second": 192.059, + "eval_qasc_pairs_steps_per_second": 3.001, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_openbookqa_pairs_loss": 0.6885332465171814, + "eval_openbookqa_pairs_runtime": 0.5914, + "eval_openbookqa_pairs_samples_per_second": 216.427, + "eval_openbookqa_pairs_steps_per_second": 3.382, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_msmarco_pairs_loss": 0.13402031362056732, + "eval_msmarco_pairs_runtime": 1.5047, + "eval_msmarco_pairs_samples_per_second": 85.067, + "eval_msmarco_pairs_steps_per_second": 1.329, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_nq_pairs_loss": 0.09435093402862549, + "eval_nq_pairs_runtime": 2.366, + "eval_nq_pairs_samples_per_second": 54.101, + "eval_nq_pairs_steps_per_second": 0.845, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_trivia_pairs_loss": 0.5325451493263245, + "eval_trivia_pairs_runtime": 3.6088, + "eval_trivia_pairs_samples_per_second": 35.468, + "eval_trivia_pairs_steps_per_second": 0.554, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_gooaq_pairs_loss": 0.2803599536418915, + "eval_gooaq_pairs_runtime": 0.9204, + "eval_gooaq_pairs_samples_per_second": 139.063, + "eval_gooaq_pairs_steps_per_second": 2.173, + "step": 540 + }, + { + "epoch": 1.2134831460674158, + "eval_paws-pos_loss": 0.024230225011706352, + "eval_paws-pos_runtime": 0.7045, + "eval_paws-pos_samples_per_second": 181.679, + "eval_paws-pos_steps_per_second": 2.839, + "step": 540 + }, + { + "epoch": 1.2157303370786516, + "grad_norm": 3.6125593185424805, + "learning_rate": 2.056765852316999e-05, + "loss": 0.6862, + "step": 541 + }, + { + "epoch": 1.2179775280898877, + "grad_norm": 2.6327385902404785, + "learning_rate": 2.0505919505318062e-05, + "loss": 0.4639, + "step": 542 + }, + { + "epoch": 1.2202247191011235, + "grad_norm": 3.4139833450317383, + "learning_rate": 2.0444190106617598e-05, + "loss": 0.6663, + "step": 543 + }, + { + "epoch": 1.2224719101123596, + "grad_norm": 3.4155499935150146, + "learning_rate": 2.0382471528865537e-05, + "loss": 0.5047, + "step": 544 + }, + { + "epoch": 1.2247191011235956, + "grad_norm": 2.4931771755218506, + "learning_rate": 2.0320764973648166e-05, + "loss": 0.2306, + "step": 545 + }, + { + "epoch": 1.2269662921348314, + "grad_norm": 3.664468288421631, + "learning_rate": 2.0259071642317713e-05, + "loss": 0.7147, + "step": 546 + }, + { + "epoch": 1.2292134831460675, + "grad_norm": 0.5503119826316833, + "learning_rate": 2.0197392735968953e-05, + "loss": 0.0344, + "step": 547 + }, + { + "epoch": 1.2314606741573033, + "grad_norm": 2.96278977394104, + "learning_rate": 2.013572945541581e-05, + "loss": 0.4429, + "step": 548 + }, + { + "epoch": 1.2337078651685394, + "grad_norm": 3.6760940551757812, + "learning_rate": 2.0074083001167992e-05, + "loss": 0.6966, + "step": 549 + }, + { + "epoch": 1.2359550561797752, + "grad_norm": 2.8022167682647705, + "learning_rate": 2.0012454573407644e-05, + "loss": 0.6926, + "step": 550 + }, + { + "epoch": 1.2382022471910112, + "grad_norm": 2.4972310066223145, + "learning_rate": 1.995084537196594e-05, + "loss": 0.261, + "step": 551 + }, + { + "epoch": 1.2404494382022473, + "grad_norm": 3.387359142303467, + "learning_rate": 1.9889256596299725e-05, + "loss": 0.6558, + "step": 552 + }, + { + "epoch": 1.2426966292134831, + "grad_norm": 3.303635358810425, + "learning_rate": 1.9827689445468206e-05, + "loss": 0.6285, + "step": 553 + }, + { + "epoch": 1.2449438202247192, + "grad_norm": 2.8494338989257812, + "learning_rate": 1.9766145118109576e-05, + "loss": 0.6471, + "step": 554 + }, + { + "epoch": 1.247191011235955, + "grad_norm": 3.350094795227051, + "learning_rate": 1.9704624812417665e-05, + "loss": 0.4989, + "step": 555 + }, + { + "epoch": 1.249438202247191, + "grad_norm": 2.0344486236572266, + "learning_rate": 1.9643129726118646e-05, + "loss": 0.195, + "step": 556 + }, + { + "epoch": 1.2516853932584269, + "grad_norm": 4.317070007324219, + "learning_rate": 1.9581661056447698e-05, + "loss": 0.8431, + "step": 557 + }, + { + "epoch": 1.253932584269663, + "grad_norm": 3.8960471153259277, + "learning_rate": 1.95202200001257e-05, + "loss": 0.642, + "step": 558 + }, + { + "epoch": 1.256179775280899, + "grad_norm": 2.2174787521362305, + "learning_rate": 1.9458807753335924e-05, + "loss": 0.2251, + "step": 559 + }, + { + "epoch": 1.2584269662921348, + "grad_norm": 2.0047852993011475, + "learning_rate": 1.9397425511700762e-05, + "loss": 0.2057, + "step": 560 + }, + { + "epoch": 1.2606741573033708, + "grad_norm": 2.035231828689575, + "learning_rate": 1.933607447025845e-05, + "loss": 0.2198, + "step": 561 + }, + { + "epoch": 1.2629213483146067, + "grad_norm": 3.2453677654266357, + "learning_rate": 1.9274755823439777e-05, + "loss": 0.4856, + "step": 562 + }, + { + "epoch": 1.2651685393258427, + "grad_norm": 0.46472156047821045, + "learning_rate": 1.9213470765044864e-05, + "loss": 0.0273, + "step": 563 + }, + { + "epoch": 1.2674157303370785, + "grad_norm": 0.5257102251052856, + "learning_rate": 1.915222048821989e-05, + "loss": 0.0302, + "step": 564 + }, + { + "epoch": 1.2696629213483146, + "grad_norm": 2.3320302963256836, + "learning_rate": 1.9091006185433897e-05, + "loss": 0.1863, + "step": 565 + }, + { + "epoch": 1.2719101123595506, + "grad_norm": 4.208177089691162, + "learning_rate": 1.9029829048455534e-05, + "loss": 0.8053, + "step": 566 + }, + { + "epoch": 1.2741573033707865, + "grad_norm": 2.2916760444641113, + "learning_rate": 1.8968690268329893e-05, + "loss": 0.1935, + "step": 567 + }, + { + "epoch": 1.2764044943820225, + "grad_norm": 3.5239036083221436, + "learning_rate": 1.8907591035355305e-05, + "loss": 0.5837, + "step": 568 + }, + { + "epoch": 1.2786516853932584, + "grad_norm": 3.938844680786133, + "learning_rate": 1.884653253906016e-05, + "loss": 0.7606, + "step": 569 + }, + { + "epoch": 1.2808988764044944, + "grad_norm": 1.9056942462921143, + "learning_rate": 1.8785515968179746e-05, + "loss": 0.1904, + "step": 570 + }, + { + "epoch": 1.2831460674157302, + "grad_norm": 3.481647253036499, + "learning_rate": 1.8724542510633123e-05, + "loss": 0.6585, + "step": 571 + }, + { + "epoch": 1.2853932584269663, + "grad_norm": 3.712449550628662, + "learning_rate": 1.8663613353499996e-05, + "loss": 0.7043, + "step": 572 + }, + { + "epoch": 1.2876404494382023, + "grad_norm": 2.702223539352417, + "learning_rate": 1.8602729682997573e-05, + "loss": 0.6083, + "step": 573 + }, + { + "epoch": 1.2898876404494382, + "grad_norm": 3.8883962631225586, + "learning_rate": 1.854189268445751e-05, + "loss": 0.6523, + "step": 574 + }, + { + "epoch": 1.2921348314606742, + "grad_norm": 3.663496494293213, + "learning_rate": 1.8481103542302805e-05, + "loss": 0.553, + "step": 575 + }, + { + "epoch": 1.29438202247191, + "grad_norm": 3.476609945297241, + "learning_rate": 1.8420363440024752e-05, + "loss": 0.6234, + "step": 576 + }, + { + "epoch": 1.296629213483146, + "grad_norm": 2.72796368598938, + "learning_rate": 1.8359673560159906e-05, + "loss": 0.4428, + "step": 577 + }, + { + "epoch": 1.298876404494382, + "grad_norm": 3.249882698059082, + "learning_rate": 1.829903508426704e-05, + "loss": 0.5433, + "step": 578 + }, + { + "epoch": 1.301123595505618, + "grad_norm": 3.393094539642334, + "learning_rate": 1.823844919290417e-05, + "loss": 0.4937, + "step": 579 + }, + { + "epoch": 1.303370786516854, + "grad_norm": 2.7802557945251465, + "learning_rate": 1.817791706560553e-05, + "loss": 0.2222, + "step": 580 + }, + { + "epoch": 1.3056179775280898, + "grad_norm": 3.4187793731689453, + "learning_rate": 1.8117439880858653e-05, + "loss": 0.5672, + "step": 581 + }, + { + "epoch": 1.3078651685393259, + "grad_norm": 2.9662680625915527, + "learning_rate": 1.8057018816081404e-05, + "loss": 0.6562, + "step": 582 + }, + { + "epoch": 1.310112359550562, + "grad_norm": 0.4572099447250366, + "learning_rate": 1.7996655047599054e-05, + "loss": 0.056, + "step": 583 + }, + { + "epoch": 1.3123595505617978, + "grad_norm": 2.625009775161743, + "learning_rate": 1.7936349750621377e-05, + "loss": 0.4015, + "step": 584 + }, + { + "epoch": 1.3146067415730336, + "grad_norm": 3.479508399963379, + "learning_rate": 1.7876104099219804e-05, + "loss": 0.6675, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_VitaminC_cosine_accuracy": 0.556640625, + "eval_VitaminC_cosine_accuracy_threshold": 0.8350518941879272, + "eval_VitaminC_cosine_ap": 0.5499225365506326, + "eval_VitaminC_cosine_f1": 0.6721311475409836, + "eval_VitaminC_cosine_f1_threshold": 0.4279438257217407, + "eval_VitaminC_cosine_precision": 0.5114345114345115, + "eval_VitaminC_cosine_recall": 0.9800796812749004, + "eval_VitaminC_dot_accuracy": 0.55078125, + "eval_VitaminC_dot_accuracy_threshold": 320.0419616699219, + "eval_VitaminC_dot_ap": 0.532421197041184, + "eval_VitaminC_dot_f1": 0.6693989071038251, + "eval_VitaminC_dot_f1_threshold": 162.15530395507812, + "eval_VitaminC_dot_precision": 0.5093555093555093, + "eval_VitaminC_dot_recall": 0.9760956175298805, + "eval_VitaminC_euclidean_accuracy": 0.552734375, + "eval_VitaminC_euclidean_accuracy_threshold": 11.53189468383789, + "eval_VitaminC_euclidean_ap": 0.5507235346667002, + "eval_VitaminC_euclidean_f1": 0.665742024965326, + "eval_VitaminC_euclidean_f1_threshold": 20.513931274414062, + "eval_VitaminC_euclidean_precision": 0.5106382978723404, + "eval_VitaminC_euclidean_recall": 0.9561752988047809, + "eval_VitaminC_manhattan_accuracy": 0.5546875, + "eval_VitaminC_manhattan_accuracy_threshold": 236.3373260498047, + "eval_VitaminC_manhattan_ap": 0.5500919145575321, + "eval_VitaminC_manhattan_f1": 0.664886515353805, + "eval_VitaminC_manhattan_f1_threshold": 483.1922607421875, + "eval_VitaminC_manhattan_precision": 0.5, + "eval_VitaminC_manhattan_recall": 0.9920318725099602, + "eval_VitaminC_max_accuracy": 0.556640625, + "eval_VitaminC_max_accuracy_threshold": 320.0419616699219, + "eval_VitaminC_max_ap": 0.5507235346667002, + "eval_VitaminC_max_f1": 0.6721311475409836, + "eval_VitaminC_max_f1_threshold": 483.1922607421875, + "eval_VitaminC_max_precision": 0.5114345114345115, + "eval_VitaminC_max_recall": 0.9920318725099602, + "eval_sequential_score": 0.5507235346667002, + "eval_sts-test_pearson_cosine": 0.8854424628564648, + "eval_sts-test_pearson_dot": 0.8781775369503937, + "eval_sts-test_pearson_euclidean": 0.9073948686610891, + "eval_sts-test_pearson_manhattan": 0.9077723844704348, + "eval_sts-test_pearson_max": 0.9077723844704348, + "eval_sts-test_spearman_cosine": 0.9079970522112082, + "eval_sts-test_spearman_dot": 0.8790970008634722, + "eval_sts-test_spearman_euclidean": 0.9029484386573375, + "eval_sts-test_spearman_manhattan": 0.9037780375410113, + "eval_sts-test_spearman_max": 0.9079970522112082, + "eval_vitaminc-pairs_loss": 1.3895310163497925, + "eval_vitaminc-pairs_runtime": 1.8903, + "eval_vitaminc-pairs_samples_per_second": 57.134, + "eval_vitaminc-pairs_steps_per_second": 1.058, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_negation-triplets_loss": 0.9072961807250977, + "eval_negation-triplets_runtime": 0.3009, + "eval_negation-triplets_samples_per_second": 212.728, + "eval_negation-triplets_steps_per_second": 3.324, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_scitail-pairs-pos_loss": 0.09969545155763626, + "eval_scitail-pairs-pos_runtime": 0.3958, + "eval_scitail-pairs-pos_samples_per_second": 136.434, + "eval_scitail-pairs-pos_steps_per_second": 2.527, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_scitail-pairs-qa_loss": 0.0006847005570307374, + "eval_scitail-pairs-qa_runtime": 0.5306, + "eval_scitail-pairs-qa_samples_per_second": 241.242, + "eval_scitail-pairs-qa_steps_per_second": 3.769, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_xsum-pairs_loss": 0.02075883559882641, + "eval_xsum-pairs_runtime": 2.7478, + "eval_xsum-pairs_samples_per_second": 46.583, + "eval_xsum-pairs_steps_per_second": 0.728, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_sciq_pairs_loss": 0.01607341691851616, + "eval_sciq_pairs_runtime": 2.8547, + "eval_sciq_pairs_samples_per_second": 44.838, + "eval_sciq_pairs_steps_per_second": 0.701, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_qasc_pairs_loss": 0.09826790541410446, + "eval_qasc_pairs_runtime": 0.6743, + "eval_qasc_pairs_samples_per_second": 189.831, + "eval_qasc_pairs_steps_per_second": 2.966, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_openbookqa_pairs_loss": 0.7106958031654358, + "eval_openbookqa_pairs_runtime": 0.6028, + "eval_openbookqa_pairs_samples_per_second": 212.347, + "eval_openbookqa_pairs_steps_per_second": 3.318, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_msmarco_pairs_loss": 0.14438961446285248, + "eval_msmarco_pairs_runtime": 1.4968, + "eval_msmarco_pairs_samples_per_second": 85.516, + "eval_msmarco_pairs_steps_per_second": 1.336, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_nq_pairs_loss": 0.08692270517349243, + "eval_nq_pairs_runtime": 2.3567, + "eval_nq_pairs_samples_per_second": 54.312, + "eval_nq_pairs_steps_per_second": 0.849, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_trivia_pairs_loss": 0.4988090991973877, + "eval_trivia_pairs_runtime": 3.5886, + "eval_trivia_pairs_samples_per_second": 35.669, + "eval_trivia_pairs_steps_per_second": 0.557, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_gooaq_pairs_loss": 0.2779709994792938, + "eval_gooaq_pairs_runtime": 0.9125, + "eval_gooaq_pairs_samples_per_second": 140.269, + "eval_gooaq_pairs_steps_per_second": 2.192, + "step": 585 + }, + { + "epoch": 1.3146067415730336, + "eval_paws-pos_loss": 0.024158792570233345, + "eval_paws-pos_runtime": 0.7009, + "eval_paws-pos_samples_per_second": 182.63, + "eval_paws-pos_steps_per_second": 2.854, + "step": 585 + }, + { + "epoch": 1.3168539325842696, + "grad_norm": 3.7084991931915283, + "learning_rate": 1.781591926630451e-05, + "loss": 0.7298, + "step": 586 + }, + { + "epoch": 1.3191011235955057, + "grad_norm": 0.33646777272224426, + "learning_rate": 1.7755796423601624e-05, + "loss": 0.0372, + "step": 587 + }, + { + "epoch": 1.3213483146067415, + "grad_norm": 3.1976261138916016, + "learning_rate": 1.7695736741630386e-05, + "loss": 0.7247, + "step": 588 + }, + { + "epoch": 1.3235955056179776, + "grad_norm": 3.706315040588379, + "learning_rate": 1.7635741389680394e-05, + "loss": 0.6839, + "step": 589 + }, + { + "epoch": 1.3258426966292136, + "grad_norm": 3.0135083198547363, + "learning_rate": 1.75758115357888e-05, + "loss": 0.6848, + "step": 590 + }, + { + "epoch": 1.3280898876404494, + "grad_norm": 3.324838876724243, + "learning_rate": 1.751594834671759e-05, + "loss": 0.4449, + "step": 591 + }, + { + "epoch": 1.3303370786516853, + "grad_norm": 2.1507813930511475, + "learning_rate": 1.7456152987930877e-05, + "loss": 0.2104, + "step": 592 + }, + { + "epoch": 1.3325842696629213, + "grad_norm": 2.267099380493164, + "learning_rate": 1.73964266235722e-05, + "loss": 0.391, + "step": 593 + }, + { + "epoch": 1.3348314606741574, + "grad_norm": 2.256056308746338, + "learning_rate": 1.7336770416441832e-05, + "loss": 0.3641, + "step": 594 + }, + { + "epoch": 1.3370786516853932, + "grad_norm": 3.728010416030884, + "learning_rate": 1.7277185527974203e-05, + "loss": 0.6953, + "step": 595 + }, + { + "epoch": 1.3393258426966292, + "grad_norm": 3.9698784351348877, + "learning_rate": 1.7217673118215237e-05, + "loss": 0.6382, + "step": 596 + }, + { + "epoch": 1.3415730337078653, + "grad_norm": 3.2924447059631348, + "learning_rate": 1.7158234345799802e-05, + "loss": 0.6245, + "step": 597 + }, + { + "epoch": 1.3438202247191011, + "grad_norm": 3.6384663581848145, + "learning_rate": 1.7098870367929108e-05, + "loss": 0.6775, + "step": 598 + }, + { + "epoch": 1.346067415730337, + "grad_norm": 3.9194507598876953, + "learning_rate": 1.703958234034823e-05, + "loss": 0.5727, + "step": 599 + }, + { + "epoch": 1.348314606741573, + "grad_norm": 0.4283960163593292, + "learning_rate": 1.698037141732357e-05, + "loss": 0.0567, + "step": 600 + }, + { + "epoch": 1.350561797752809, + "grad_norm": 3.536198377609253, + "learning_rate": 1.6921238751620402e-05, + "loss": 0.6258, + "step": 601 + }, + { + "epoch": 1.3528089887640449, + "grad_norm": 4.333379745483398, + "learning_rate": 1.6862185494480425e-05, + "loss": 0.8138, + "step": 602 + }, + { + "epoch": 1.355056179775281, + "grad_norm": 3.6202950477600098, + "learning_rate": 1.680321279559934e-05, + "loss": 0.6099, + "step": 603 + }, + { + "epoch": 1.357303370786517, + "grad_norm": 3.049006938934326, + "learning_rate": 1.6744321803104493e-05, + "loss": 0.6801, + "step": 604 + }, + { + "epoch": 1.3595505617977528, + "grad_norm": 6.1929850578308105, + "learning_rate": 1.668551366353248e-05, + "loss": 2.2003, + "step": 605 + }, + { + "epoch": 1.3617977528089886, + "grad_norm": 0.40746456384658813, + "learning_rate": 1.662678952180688e-05, + "loss": 0.052, + "step": 606 + }, + { + "epoch": 1.3640449438202247, + "grad_norm": 2.2396955490112305, + "learning_rate": 1.656815052121592e-05, + "loss": 0.2175, + "step": 607 + }, + { + "epoch": 1.3662921348314607, + "grad_norm": 4.395736217498779, + "learning_rate": 1.6509597803390222e-05, + "loss": 0.7671, + "step": 608 + }, + { + "epoch": 1.3685393258426966, + "grad_norm": 3.2075412273406982, + "learning_rate": 1.6451132508280602e-05, + "loss": 0.5524, + "step": 609 + }, + { + "epoch": 1.3707865168539326, + "grad_norm": 3.6282618045806885, + "learning_rate": 1.639275577413586e-05, + "loss": 0.5868, + "step": 610 + }, + { + "epoch": 1.3730337078651687, + "grad_norm": 2.8120224475860596, + "learning_rate": 1.6334468737480616e-05, + "loss": 0.6628, + "step": 611 + }, + { + "epoch": 1.3752808988764045, + "grad_norm": 4.477191925048828, + "learning_rate": 1.6276272533093184e-05, + "loss": 0.8106, + "step": 612 + }, + { + "epoch": 1.3775280898876405, + "grad_norm": 2.078286647796631, + "learning_rate": 1.621816829398349e-05, + "loss": 0.2, + "step": 613 + }, + { + "epoch": 1.3797752808988764, + "grad_norm": 3.698269844055176, + "learning_rate": 1.616015715137101e-05, + "loss": 0.57, + "step": 614 + }, + { + "epoch": 1.3820224719101124, + "grad_norm": 3.7442100048065186, + "learning_rate": 1.6102240234662735e-05, + "loss": 0.6329, + "step": 615 + }, + { + "epoch": 1.3842696629213482, + "grad_norm": 3.1588127613067627, + "learning_rate": 1.6044418671431197e-05, + "loss": 0.5616, + "step": 616 + }, + { + "epoch": 1.3865168539325843, + "grad_norm": 2.8839151859283447, + "learning_rate": 1.5986693587392505e-05, + "loss": 0.6678, + "step": 617 + }, + { + "epoch": 1.3887640449438203, + "grad_norm": 3.6140055656433105, + "learning_rate": 1.5929066106384448e-05, + "loss": 0.454, + "step": 618 + }, + { + "epoch": 1.3910112359550562, + "grad_norm": 3.0111019611358643, + "learning_rate": 1.5871537350344574e-05, + "loss": 0.5198, + "step": 619 + }, + { + "epoch": 1.3932584269662922, + "grad_norm": 3.6177947521209717, + "learning_rate": 1.581410843928841e-05, + "loss": 0.5259, + "step": 620 + }, + { + "epoch": 1.395505617977528, + "grad_norm": 4.257228374481201, + "learning_rate": 1.5756780491287593e-05, + "loss": 0.714, + "step": 621 + }, + { + "epoch": 1.397752808988764, + "grad_norm": 3.3783249855041504, + "learning_rate": 1.5699554622448154e-05, + "loss": 0.4943, + "step": 622 + }, + { + "epoch": 1.4, + "grad_norm": 0.527916431427002, + "learning_rate": 1.5642431946888743e-05, + "loss": 0.0324, + "step": 623 + } + ], + "logging_steps": 1, + "max_steps": 890, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 89, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 640, + "trial_name": null, + "trial_params": null +}